/*
    libfame - Fast Assembly MPEG Encoder Library
    Copyright (C) 2000-2001 Damien Vincent

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Library General Public
    License as published by the Free Software Foundation; either
    version 2 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Library General Public License for more details.

    You should have received a copy of the GNU Library General Public
    License along with this library; if not, write to the Free
    Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/****** compute bilinear interpolation on a picture ******************/
#ifndef __BILINEAR_INT_H__
#define __BILINEAR_INT_H__


#define MASK_LB 16843009   //0x0001000100010001
#define MASK_HB 4278124286 //0xfffefffefffefffe

#define MASK_LB2 201326595  //0x0003000300030003
#define MASK_HB2 4244438268 //0xfffcfffcfffcfffc

static void inline interpolation_half(unsigned char *pict_half1, unsigned char *pict_half2,
				      unsigned char *pict_half3, unsigned char *pict,
				      int mb_width, int mb_height)
{
  int i, j;
  int width, height, size;

  unsigned int a1_lb, a1_hb, a2_lb, a2_hb;
  unsigned int b1_lb, b1_hb, b2_lb, b2_hb;
  unsigned int c1_lb, c1_hb, c2_lb, c2_hb;
  unsigned int a1, a2, b1, b2, c1, c2;

  width = (mb_width<<4);
  height = (mb_height<<4);
  size = width*height - width - 4;

  for(j=0; j<width-4; j+=4)
  {
    /* Init b1, b2, ... */
    b1 = *((unsigned int *) (pict));
    b2 = *((unsigned int *) (pict+1));
    b1_lb = (b1 & MASK_LB);
    b1_hb = (b1 & MASK_HB) >> 1;
    b2_lb = (b2 & MASK_LB);
    b2_hb = (b2 & MASK_HB) >> 1;
    c2_lb = (b1&MASK_LB2) + (b2&MASK_LB2);
    c2_hb = ((b1&MASK_HB2)>>2) + ((b2&MASK_HB2)>>2);

    for(i=0; i<height-1; i++)
    {
      pict += width;
      a1 = b1;
      a2 = b2;
      a1_lb = b1_lb;
      a1_hb = b1_hb;
      a2_lb = b2_lb;
      a2_hb = b2_hb;

      b1 = *((unsigned int *) (pict));
      b2 = *((unsigned int *) (pict+1));
      b1_lb = (b1 & MASK_LB);
      b1_hb = (b1 & MASK_HB) >> 1;
      b2_lb = (b2 & MASK_LB);
      b2_hb = (b2 & MASK_HB) >> 1;

      *((unsigned int*)pict_half1) = (a1_hb + a2_hb) + (a1_lb | a2_lb);
      *((unsigned int*)pict_half2) = (a1_hb + b1_hb) + (a1_lb | b1_lb);

      c1_lb = c2_lb;
      c2_lb = (b1&MASK_LB2) + (b2&MASK_LB2);
      c1_hb = c2_hb;
      c2_hb = ((b1&MASK_HB2)>>2) + ((b2&MASK_HB2)>>2);

      *((unsigned int*)pict_half3) = (c1_hb+c2_hb) + ((c1_lb+c2_lb+2)>>2);

      pict_half1 += width;
      pict_half2 += width;
      pict_half3 += width;
    }

    /* picture boundary (bottom) */
    *((unsigned int*)pict_half1) = (b1_hb + b2_hb) + (b1_lb | b2_lb);    
    *((unsigned int*)pict_half2) = b1;
    *((unsigned int*)pict_half3) = *((unsigned int*)pict_half1);

    pict -= size;
    pict_half1 -= size;
    pict_half2 -= size;
    pict_half3 -= size;
  }



  /* Picture boundary (right) */
  b1 = *((unsigned int *) (pict));
  b2 = (b1<<8) | (b1&255);
  b1_lb = (b1 & MASK_LB);
  b1_hb = (b1 & MASK_HB) >> 1;
  b2_lb = (b2 & MASK_LB);
  b2_hb = (b2 & MASK_HB) >> 1;
  c2_lb = (b1&MASK_LB2) + (b2&MASK_LB2);
  c2_hb = ((b1&MASK_HB2)>>2) + ((b2&MASK_HB2)>>2);
 
  for(i=0; i<height-1; i++)
  {
    pict += width;
    a1 = b1;
    a1_lb = b1_lb;
    a1_hb = b1_hb;
    a2_lb = b2_lb;
    a2_hb = b2_hb;
    
    b1 = *((unsigned int *) (pict));
    b2 = (b1<<8) | (b1&255);
    b1_lb = (b1 & MASK_LB);
    b1_hb = (b1 & MASK_HB) >> 1;
    b2_lb = (b2 & MASK_LB);
    b2_hb = (b2 & MASK_HB) >> 1;
        
    c1_lb = c2_lb;
    c2_lb = (b1&MASK_LB2) + (b2&MASK_LB2);
    c1_hb = c2_hb;
    c2_hb = ((b1&MASK_HB2)>>2) + ((b2&MASK_HB2)>>2);

    *((unsigned int*)pict_half1) = (a1_hb + a2_hb) + (a1_lb | a2_lb);
    *((unsigned int*)pict_half2) = (a1_hb + b1_hb) + (a1_lb | b1_lb);
    *((unsigned int*)pict_half3) = (c1_hb + c2_hb) + ((c1_lb+c2_lb+2)>>2);

    pict_half1 += width;
    pict_half2 += width;
    pict_half3 += width;
  }

  /* picture boundary (bottom rigth) */
  *((unsigned int*)pict_half1) = (b1_hb + b2_hb) + (b1_lb | b2_lb);    
  *((unsigned int*)pict_half2) = b1;
  *((unsigned int*)pict_half3) = *((unsigned int*)pict_half1);

}




#endif
