• Main Page
  • Related Pages
  • Modules
  • Data Structures
  • Files
  • Examples
  • File List
  • Globals

libavcodec/dsputil.c

Go to the documentation of this file.
00001 /*
00002  * DSP utils
00003  * Copyright (c) 2000, 2001 Fabrice Bellard
00004  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
00005  *
00006  * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
00007  *
00008  * This file is part of Libav.
00009  *
00010  * Libav is free software; you can redistribute it and/or
00011  * modify it under the terms of the GNU Lesser General Public
00012  * License as published by the Free Software Foundation; either
00013  * version 2.1 of the License, or (at your option) any later version.
00014  *
00015  * Libav is distributed in the hope that it will be useful,
00016  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00017  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018  * Lesser General Public License for more details.
00019  *
00020  * You should have received a copy of the GNU Lesser General Public
00021  * License along with Libav; if not, write to the Free Software
00022  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00023  */
00024 
00030 #include "libavutil/imgutils.h"
00031 #include "avcodec.h"
00032 #include "dsputil.h"
00033 #include "simple_idct.h"
00034 #include "faandct.h"
00035 #include "faanidct.h"
00036 #include "mathops.h"
00037 #include "mpegvideo.h"
00038 #include "config.h"
00039 #include "ac3dec.h"
00040 #include "vorbis.h"
00041 #include "png.h"
00042 
00043 uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
00044 uint32_t ff_squareTbl[512] = {0, };
00045 
00046 #define BIT_DEPTH 9
00047 #include "dsputil_template.c"
00048 #undef BIT_DEPTH
00049 
00050 #define BIT_DEPTH 10
00051 #include "dsputil_template.c"
00052 #undef BIT_DEPTH
00053 
00054 #define BIT_DEPTH 8
00055 #include "dsputil_template.c"
00056 
00057 // 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size
00058 #define pb_7f (~0UL/255 * 0x7f)
00059 #define pb_80 (~0UL/255 * 0x80)
00060 
00061 const uint8_t ff_zigzag_direct[64] = {
00062     0,   1,  8, 16,  9,  2,  3, 10,
00063     17, 24, 32, 25, 18, 11,  4,  5,
00064     12, 19, 26, 33, 40, 48, 41, 34,
00065     27, 20, 13,  6,  7, 14, 21, 28,
00066     35, 42, 49, 56, 57, 50, 43, 36,
00067     29, 22, 15, 23, 30, 37, 44, 51,
00068     58, 59, 52, 45, 38, 31, 39, 46,
00069     53, 60, 61, 54, 47, 55, 62, 63
00070 };
00071 
00072 /* Specific zigzag scan for 248 idct. NOTE that unlike the
00073    specification, we interleave the fields */
00074 const uint8_t ff_zigzag248_direct[64] = {
00075      0,  8,  1,  9, 16, 24,  2, 10,
00076     17, 25, 32, 40, 48, 56, 33, 41,
00077     18, 26,  3, 11,  4, 12, 19, 27,
00078     34, 42, 49, 57, 50, 58, 35, 43,
00079     20, 28,  5, 13,  6, 14, 21, 29,
00080     36, 44, 51, 59, 52, 60, 37, 45,
00081     22, 30,  7, 15, 23, 31, 38, 46,
00082     53, 61, 54, 62, 39, 47, 55, 63,
00083 };
00084 
00085 /* not permutated inverse zigzag_direct + 1 for MMX quantizer */
00086 DECLARE_ALIGNED(16, uint16_t, inv_zigzag_direct16)[64];
00087 
00088 const uint8_t ff_alternate_horizontal_scan[64] = {
00089     0,  1,   2,  3,  8,  9, 16, 17,
00090     10, 11,  4,  5,  6,  7, 15, 14,
00091     13, 12, 19, 18, 24, 25, 32, 33,
00092     26, 27, 20, 21, 22, 23, 28, 29,
00093     30, 31, 34, 35, 40, 41, 48, 49,
00094     42, 43, 36, 37, 38, 39, 44, 45,
00095     46, 47, 50, 51, 56, 57, 58, 59,
00096     52, 53, 54, 55, 60, 61, 62, 63,
00097 };
00098 
00099 const uint8_t ff_alternate_vertical_scan[64] = {
00100     0,  8,  16, 24,  1,  9,  2, 10,
00101     17, 25, 32, 40, 48, 56, 57, 49,
00102     41, 33, 26, 18,  3, 11,  4, 12,
00103     19, 27, 34, 42, 50, 58, 35, 43,
00104     51, 59, 20, 28,  5, 13,  6, 14,
00105     21, 29, 36, 44, 52, 60, 37, 45,
00106     53, 61, 22, 30,  7, 15, 23, 31,
00107     38, 46, 54, 62, 39, 47, 55, 63,
00108 };
00109 
00110 /* Input permutation for the simple_idct_mmx */
00111 static const uint8_t simple_mmx_permutation[64]={
00112         0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
00113         0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
00114         0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
00115         0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
00116         0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
00117         0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
00118         0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
00119         0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
00120 };
00121 
00122 static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
00123 
00124 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
00125     int i;
00126     int end;
00127 
00128     st->scantable= src_scantable;
00129 
00130     for(i=0; i<64; i++){
00131         int j;
00132         j = src_scantable[i];
00133         st->permutated[i] = permutation[j];
00134 #if ARCH_PPC
00135         st->inverse[j] = i;
00136 #endif
00137     }
00138 
00139     end=-1;
00140     for(i=0; i<64; i++){
00141         int j;
00142         j = st->permutated[i];
00143         if(j>end) end=j;
00144         st->raster_end[i]= end;
00145     }
00146 }
00147 
00148 void ff_init_scantable_permutation(uint8_t *idct_permutation,
00149                                    int idct_permutation_type)
00150 {
00151     int i;
00152 
00153     switch(idct_permutation_type){
00154     case FF_NO_IDCT_PERM:
00155         for(i=0; i<64; i++)
00156             idct_permutation[i]= i;
00157         break;
00158     case FF_LIBMPEG2_IDCT_PERM:
00159         for(i=0; i<64; i++)
00160             idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
00161         break;
00162     case FF_SIMPLE_IDCT_PERM:
00163         for(i=0; i<64; i++)
00164             idct_permutation[i]= simple_mmx_permutation[i];
00165         break;
00166     case FF_TRANSPOSE_IDCT_PERM:
00167         for(i=0; i<64; i++)
00168             idct_permutation[i]= ((i&7)<<3) | (i>>3);
00169         break;
00170     case FF_PARTTRANS_IDCT_PERM:
00171         for(i=0; i<64; i++)
00172             idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
00173         break;
00174     case FF_SSE2_IDCT_PERM:
00175         for(i=0; i<64; i++)
00176             idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7];
00177         break;
00178     default:
00179         av_log(NULL, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");
00180     }
00181 }
00182 
00183 static int pix_sum_c(uint8_t * pix, int line_size)
00184 {
00185     int s, i, j;
00186 
00187     s = 0;
00188     for (i = 0; i < 16; i++) {
00189         for (j = 0; j < 16; j += 8) {
00190             s += pix[0];
00191             s += pix[1];
00192             s += pix[2];
00193             s += pix[3];
00194             s += pix[4];
00195             s += pix[5];
00196             s += pix[6];
00197             s += pix[7];
00198             pix += 8;
00199         }
00200         pix += line_size - 16;
00201     }
00202     return s;
00203 }
00204 
00205 static int pix_norm1_c(uint8_t * pix, int line_size)
00206 {
00207     int s, i, j;
00208     uint32_t *sq = ff_squareTbl + 256;
00209 
00210     s = 0;
00211     for (i = 0; i < 16; i++) {
00212         for (j = 0; j < 16; j += 8) {
00213 #if 0
00214             s += sq[pix[0]];
00215             s += sq[pix[1]];
00216             s += sq[pix[2]];
00217             s += sq[pix[3]];
00218             s += sq[pix[4]];
00219             s += sq[pix[5]];
00220             s += sq[pix[6]];
00221             s += sq[pix[7]];
00222 #else
00223 #if HAVE_FAST_64BIT
00224             register uint64_t x=*(uint64_t*)pix;
00225             s += sq[x&0xff];
00226             s += sq[(x>>8)&0xff];
00227             s += sq[(x>>16)&0xff];
00228             s += sq[(x>>24)&0xff];
00229             s += sq[(x>>32)&0xff];
00230             s += sq[(x>>40)&0xff];
00231             s += sq[(x>>48)&0xff];
00232             s += sq[(x>>56)&0xff];
00233 #else
00234             register uint32_t x=*(uint32_t*)pix;
00235             s += sq[x&0xff];
00236             s += sq[(x>>8)&0xff];
00237             s += sq[(x>>16)&0xff];
00238             s += sq[(x>>24)&0xff];
00239             x=*(uint32_t*)(pix+4);
00240             s += sq[x&0xff];
00241             s += sq[(x>>8)&0xff];
00242             s += sq[(x>>16)&0xff];
00243             s += sq[(x>>24)&0xff];
00244 #endif
00245 #endif
00246             pix += 8;
00247         }
00248         pix += line_size - 16;
00249     }
00250     return s;
00251 }
00252 
00253 static void bswap_buf(uint32_t *dst, const uint32_t *src, int w){
00254     int i;
00255 
00256     for(i=0; i+8<=w; i+=8){
00257         dst[i+0]= av_bswap32(src[i+0]);
00258         dst[i+1]= av_bswap32(src[i+1]);
00259         dst[i+2]= av_bswap32(src[i+2]);
00260         dst[i+3]= av_bswap32(src[i+3]);
00261         dst[i+4]= av_bswap32(src[i+4]);
00262         dst[i+5]= av_bswap32(src[i+5]);
00263         dst[i+6]= av_bswap32(src[i+6]);
00264         dst[i+7]= av_bswap32(src[i+7]);
00265     }
00266     for(;i<w; i++){
00267         dst[i+0]= av_bswap32(src[i+0]);
00268     }
00269 }
00270 
00271 static void bswap16_buf(uint16_t *dst, const uint16_t *src, int len)
00272 {
00273     while (len--)
00274         *dst++ = av_bswap16(*src++);
00275 }
00276 
00277 static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
00278 {
00279     int s, i;
00280     uint32_t *sq = ff_squareTbl + 256;
00281 
00282     s = 0;
00283     for (i = 0; i < h; i++) {
00284         s += sq[pix1[0] - pix2[0]];
00285         s += sq[pix1[1] - pix2[1]];
00286         s += sq[pix1[2] - pix2[2]];
00287         s += sq[pix1[3] - pix2[3]];
00288         pix1 += line_size;
00289         pix2 += line_size;
00290     }
00291     return s;
00292 }
00293 
00294 static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
00295 {
00296     int s, i;
00297     uint32_t *sq = ff_squareTbl + 256;
00298 
00299     s = 0;
00300     for (i = 0; i < h; i++) {
00301         s += sq[pix1[0] - pix2[0]];
00302         s += sq[pix1[1] - pix2[1]];
00303         s += sq[pix1[2] - pix2[2]];
00304         s += sq[pix1[3] - pix2[3]];
00305         s += sq[pix1[4] - pix2[4]];
00306         s += sq[pix1[5] - pix2[5]];
00307         s += sq[pix1[6] - pix2[6]];
00308         s += sq[pix1[7] - pix2[7]];
00309         pix1 += line_size;
00310         pix2 += line_size;
00311     }
00312     return s;
00313 }
00314 
00315 static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
00316 {
00317     int s, i;
00318     uint32_t *sq = ff_squareTbl + 256;
00319 
00320     s = 0;
00321     for (i = 0; i < h; i++) {
00322         s += sq[pix1[ 0] - pix2[ 0]];
00323         s += sq[pix1[ 1] - pix2[ 1]];
00324         s += sq[pix1[ 2] - pix2[ 2]];
00325         s += sq[pix1[ 3] - pix2[ 3]];
00326         s += sq[pix1[ 4] - pix2[ 4]];
00327         s += sq[pix1[ 5] - pix2[ 5]];
00328         s += sq[pix1[ 6] - pix2[ 6]];
00329         s += sq[pix1[ 7] - pix2[ 7]];
00330         s += sq[pix1[ 8] - pix2[ 8]];
00331         s += sq[pix1[ 9] - pix2[ 9]];
00332         s += sq[pix1[10] - pix2[10]];
00333         s += sq[pix1[11] - pix2[11]];
00334         s += sq[pix1[12] - pix2[12]];
00335         s += sq[pix1[13] - pix2[13]];
00336         s += sq[pix1[14] - pix2[14]];
00337         s += sq[pix1[15] - pix2[15]];
00338 
00339         pix1 += line_size;
00340         pix2 += line_size;
00341     }
00342     return s;
00343 }
00344 
00345 static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
00346                           const uint8_t *s2, int stride){
00347     int i;
00348 
00349     /* read the pixels */
00350     for(i=0;i<8;i++) {
00351         block[0] = s1[0] - s2[0];
00352         block[1] = s1[1] - s2[1];
00353         block[2] = s1[2] - s2[2];
00354         block[3] = s1[3] - s2[3];
00355         block[4] = s1[4] - s2[4];
00356         block[5] = s1[5] - s2[5];
00357         block[6] = s1[6] - s2[6];
00358         block[7] = s1[7] - s2[7];
00359         s1 += stride;
00360         s2 += stride;
00361         block += 8;
00362     }
00363 }
00364 
00365 
00366 void ff_put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
00367                              int line_size)
00368 {
00369     int i;
00370     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00371 
00372     /* read the pixels */
00373     for(i=0;i<8;i++) {
00374         pixels[0] = cm[block[0]];
00375         pixels[1] = cm[block[1]];
00376         pixels[2] = cm[block[2]];
00377         pixels[3] = cm[block[3]];
00378         pixels[4] = cm[block[4]];
00379         pixels[5] = cm[block[5]];
00380         pixels[6] = cm[block[6]];
00381         pixels[7] = cm[block[7]];
00382 
00383         pixels += line_size;
00384         block += 8;
00385     }
00386 }
00387 
00388 static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
00389                                  int line_size)
00390 {
00391     int i;
00392     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00393 
00394     /* read the pixels */
00395     for(i=0;i<4;i++) {
00396         pixels[0] = cm[block[0]];
00397         pixels[1] = cm[block[1]];
00398         pixels[2] = cm[block[2]];
00399         pixels[3] = cm[block[3]];
00400 
00401         pixels += line_size;
00402         block += 8;
00403     }
00404 }
00405 
00406 static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
00407                                  int line_size)
00408 {
00409     int i;
00410     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00411 
00412     /* read the pixels */
00413     for(i=0;i<2;i++) {
00414         pixels[0] = cm[block[0]];
00415         pixels[1] = cm[block[1]];
00416 
00417         pixels += line_size;
00418         block += 8;
00419     }
00420 }
00421 
00422 void ff_put_signed_pixels_clamped_c(const DCTELEM *block,
00423                                     uint8_t *restrict pixels,
00424                                     int line_size)
00425 {
00426     int i, j;
00427 
00428     for (i = 0; i < 8; i++) {
00429         for (j = 0; j < 8; j++) {
00430             if (*block < -128)
00431                 *pixels = 0;
00432             else if (*block > 127)
00433                 *pixels = 255;
00434             else
00435                 *pixels = (uint8_t)(*block + 128);
00436             block++;
00437             pixels++;
00438         }
00439         pixels += (line_size - 8);
00440     }
00441 }
00442 
00443 void ff_add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
00444                              int line_size)
00445 {
00446     int i;
00447     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00448 
00449     /* read the pixels */
00450     for(i=0;i<8;i++) {
00451         pixels[0] = cm[pixels[0] + block[0]];
00452         pixels[1] = cm[pixels[1] + block[1]];
00453         pixels[2] = cm[pixels[2] + block[2]];
00454         pixels[3] = cm[pixels[3] + block[3]];
00455         pixels[4] = cm[pixels[4] + block[4]];
00456         pixels[5] = cm[pixels[5] + block[5]];
00457         pixels[6] = cm[pixels[6] + block[6]];
00458         pixels[7] = cm[pixels[7] + block[7]];
00459         pixels += line_size;
00460         block += 8;
00461     }
00462 }
00463 
00464 static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
00465                           int line_size)
00466 {
00467     int i;
00468     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00469 
00470     /* read the pixels */
00471     for(i=0;i<4;i++) {
00472         pixels[0] = cm[pixels[0] + block[0]];
00473         pixels[1] = cm[pixels[1] + block[1]];
00474         pixels[2] = cm[pixels[2] + block[2]];
00475         pixels[3] = cm[pixels[3] + block[3]];
00476         pixels += line_size;
00477         block += 8;
00478     }
00479 }
00480 
00481 static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
00482                           int line_size)
00483 {
00484     int i;
00485     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00486 
00487     /* read the pixels */
00488     for(i=0;i<2;i++) {
00489         pixels[0] = cm[pixels[0] + block[0]];
00490         pixels[1] = cm[pixels[1] + block[1]];
00491         pixels += line_size;
00492         block += 8;
00493     }
00494 }
00495 
00496 static int sum_abs_dctelem_c(DCTELEM *block)
00497 {
00498     int sum=0, i;
00499     for(i=0; i<64; i++)
00500         sum+= FFABS(block[i]);
00501     return sum;
00502 }
00503 
00504 static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h)
00505 {
00506     int i;
00507 
00508     for (i = 0; i < h; i++) {
00509         memset(block, value, 16);
00510         block += line_size;
00511     }
00512 }
00513 
00514 static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h)
00515 {
00516     int i;
00517 
00518     for (i = 0; i < h; i++) {
00519         memset(block, value, 8);
00520         block += line_size;
00521     }
00522 }
00523 
00524 #define avg2(a,b) ((a+b+1)>>1)
00525 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
00526 
00527 static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
00528 {
00529     const int A=(16-x16)*(16-y16);
00530     const int B=(   x16)*(16-y16);
00531     const int C=(16-x16)*(   y16);
00532     const int D=(   x16)*(   y16);
00533     int i;
00534 
00535     for(i=0; i<h; i++)
00536     {
00537         dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
00538         dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
00539         dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
00540         dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
00541         dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
00542         dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
00543         dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
00544         dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
00545         dst+= stride;
00546         src+= stride;
00547     }
00548 }
00549 
00550 void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
00551                   int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
00552 {
00553     int y, vx, vy;
00554     const int s= 1<<shift;
00555 
00556     width--;
00557     height--;
00558 
00559     for(y=0; y<h; y++){
00560         int x;
00561 
00562         vx= ox;
00563         vy= oy;
00564         for(x=0; x<8; x++){ //XXX FIXME optimize
00565             int src_x, src_y, frac_x, frac_y, index;
00566 
00567             src_x= vx>>16;
00568             src_y= vy>>16;
00569             frac_x= src_x&(s-1);
00570             frac_y= src_y&(s-1);
00571             src_x>>=shift;
00572             src_y>>=shift;
00573 
00574             if((unsigned)src_x < width){
00575                 if((unsigned)src_y < height){
00576                     index= src_x + src_y*stride;
00577                     dst[y*stride + x]= (  (  src[index         ]*(s-frac_x)
00578                                            + src[index       +1]*   frac_x )*(s-frac_y)
00579                                         + (  src[index+stride  ]*(s-frac_x)
00580                                            + src[index+stride+1]*   frac_x )*   frac_y
00581                                         + r)>>(shift*2);
00582                 }else{
00583                     index= src_x + av_clip(src_y, 0, height)*stride;
00584                     dst[y*stride + x]= ( (  src[index         ]*(s-frac_x)
00585                                           + src[index       +1]*   frac_x )*s
00586                                         + r)>>(shift*2);
00587                 }
00588             }else{
00589                 if((unsigned)src_y < height){
00590                     index= av_clip(src_x, 0, width) + src_y*stride;
00591                     dst[y*stride + x]= (  (  src[index         ]*(s-frac_y)
00592                                            + src[index+stride  ]*   frac_y )*s
00593                                         + r)>>(shift*2);
00594                 }else{
00595                     index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride;
00596                     dst[y*stride + x]=    src[index         ];
00597                 }
00598             }
00599 
00600             vx+= dxx;
00601             vy+= dyx;
00602         }
00603         ox += dxy;
00604         oy += dyy;
00605     }
00606 }
00607 
00608 static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00609     switch(width){
00610     case 2: put_pixels2_8_c (dst, src, stride, height); break;
00611     case 4: put_pixels4_8_c (dst, src, stride, height); break;
00612     case 8: put_pixels8_8_c (dst, src, stride, height); break;
00613     case 16:put_pixels16_8_c(dst, src, stride, height); break;
00614     }
00615 }
00616 
00617 static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00618     int i,j;
00619     for (i=0; i < height; i++) {
00620       for (j=0; j < width; j++) {
00621         dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
00622       }
00623       src += stride;
00624       dst += stride;
00625     }
00626 }
00627 
00628 static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00629     int i,j;
00630     for (i=0; i < height; i++) {
00631       for (j=0; j < width; j++) {
00632         dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
00633       }
00634       src += stride;
00635       dst += stride;
00636     }
00637 }
00638 
00639 static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00640     int i,j;
00641     for (i=0; i < height; i++) {
00642       for (j=0; j < width; j++) {
00643         dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
00644       }
00645       src += stride;
00646       dst += stride;
00647     }
00648 }
00649 
00650 static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00651     int i,j;
00652     for (i=0; i < height; i++) {
00653       for (j=0; j < width; j++) {
00654         dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
00655       }
00656       src += stride;
00657       dst += stride;
00658     }
00659 }
00660 
00661 static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00662     int i,j;
00663     for (i=0; i < height; i++) {
00664       for (j=0; j < width; j++) {
00665         dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
00666       }
00667       src += stride;
00668       dst += stride;
00669     }
00670 }
00671 
00672 static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00673     int i,j;
00674     for (i=0; i < height; i++) {
00675       for (j=0; j < width; j++) {
00676         dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
00677       }
00678       src += stride;
00679       dst += stride;
00680     }
00681 }
00682 
00683 static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00684     int i,j;
00685     for (i=0; i < height; i++) {
00686       for (j=0; j < width; j++) {
00687         dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
00688       }
00689       src += stride;
00690       dst += stride;
00691     }
00692 }
00693 
00694 static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00695     int i,j;
00696     for (i=0; i < height; i++) {
00697       for (j=0; j < width; j++) {
00698         dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
00699       }
00700       src += stride;
00701       dst += stride;
00702     }
00703 }
00704 
00705 static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00706     switch(width){
00707     case 2: avg_pixels2_8_c (dst, src, stride, height); break;
00708     case 4: avg_pixels4_8_c (dst, src, stride, height); break;
00709     case 8: avg_pixels8_8_c (dst, src, stride, height); break;
00710     case 16:avg_pixels16_8_c(dst, src, stride, height); break;
00711     }
00712 }
00713 
00714 static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00715     int i,j;
00716     for (i=0; i < height; i++) {
00717       for (j=0; j < width; j++) {
00718         dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
00719       }
00720       src += stride;
00721       dst += stride;
00722     }
00723 }
00724 
00725 static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00726     int i,j;
00727     for (i=0; i < height; i++) {
00728       for (j=0; j < width; j++) {
00729         dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
00730       }
00731       src += stride;
00732       dst += stride;
00733     }
00734 }
00735 
00736 static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00737     int i,j;
00738     for (i=0; i < height; i++) {
00739       for (j=0; j < width; j++) {
00740         dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
00741       }
00742       src += stride;
00743       dst += stride;
00744     }
00745 }
00746 
00747 static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00748     int i,j;
00749     for (i=0; i < height; i++) {
00750       for (j=0; j < width; j++) {
00751         dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
00752       }
00753       src += stride;
00754       dst += stride;
00755     }
00756 }
00757 
00758 static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00759     int i,j;
00760     for (i=0; i < height; i++) {
00761       for (j=0; j < width; j++) {
00762         dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
00763       }
00764       src += stride;
00765       dst += stride;
00766     }
00767 }
00768 
00769 static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00770     int i,j;
00771     for (i=0; i < height; i++) {
00772       for (j=0; j < width; j++) {
00773         dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
00774       }
00775       src += stride;
00776       dst += stride;
00777     }
00778 }
00779 
00780 static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00781     int i,j;
00782     for (i=0; i < height; i++) {
00783       for (j=0; j < width; j++) {
00784         dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
00785       }
00786       src += stride;
00787       dst += stride;
00788     }
00789 }
00790 
00791 static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00792     int i,j;
00793     for (i=0; i < height; i++) {
00794       for (j=0; j < width; j++) {
00795         dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
00796       }
00797       src += stride;
00798       dst += stride;
00799     }
00800 }
00801 
00802 #define QPEL_MC(r, OPNAME, RND, OP) \
00803 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
00804     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
00805     int i;\
00806     for(i=0; i<h; i++)\
00807     {\
00808         OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
00809         OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
00810         OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
00811         OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
00812         OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
00813         OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
00814         OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
00815         OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
00816         dst+=dstStride;\
00817         src+=srcStride;\
00818     }\
00819 }\
00820 \
00821 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
00822     const int w=8;\
00823     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
00824     int i;\
00825     for(i=0; i<w; i++)\
00826     {\
00827         const int src0= src[0*srcStride];\
00828         const int src1= src[1*srcStride];\
00829         const int src2= src[2*srcStride];\
00830         const int src3= src[3*srcStride];\
00831         const int src4= src[4*srcStride];\
00832         const int src5= src[5*srcStride];\
00833         const int src6= src[6*srcStride];\
00834         const int src7= src[7*srcStride];\
00835         const int src8= src[8*srcStride];\
00836         OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
00837         OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
00838         OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
00839         OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
00840         OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
00841         OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
00842         OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
00843         OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
00844         dst++;\
00845         src++;\
00846     }\
00847 }\
00848 \
00849 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
00850     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
00851     int i;\
00852     \
00853     for(i=0; i<h; i++)\
00854     {\
00855         OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
00856         OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
00857         OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
00858         OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
00859         OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
00860         OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
00861         OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
00862         OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
00863         OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
00864         OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
00865         OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
00866         OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
00867         OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
00868         OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
00869         OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
00870         OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
00871         dst+=dstStride;\
00872         src+=srcStride;\
00873     }\
00874 }\
00875 \
00876 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
00877     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
00878     int i;\
00879     const int w=16;\
00880     for(i=0; i<w; i++)\
00881     {\
00882         const int src0= src[0*srcStride];\
00883         const int src1= src[1*srcStride];\
00884         const int src2= src[2*srcStride];\
00885         const int src3= src[3*srcStride];\
00886         const int src4= src[4*srcStride];\
00887         const int src5= src[5*srcStride];\
00888         const int src6= src[6*srcStride];\
00889         const int src7= src[7*srcStride];\
00890         const int src8= src[8*srcStride];\
00891         const int src9= src[9*srcStride];\
00892         const int src10= src[10*srcStride];\
00893         const int src11= src[11*srcStride];\
00894         const int src12= src[12*srcStride];\
00895         const int src13= src[13*srcStride];\
00896         const int src14= src[14*srcStride];\
00897         const int src15= src[15*srcStride];\
00898         const int src16= src[16*srcStride];\
00899         OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
00900         OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
00901         OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
00902         OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
00903         OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
00904         OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
00905         OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
00906         OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
00907         OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
00908         OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
00909         OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
00910         OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
00911         OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
00912         OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
00913         OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
00914         OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
00915         dst++;\
00916         src++;\
00917     }\
00918 }\
00919 \
00920 static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
00921     uint8_t half[64];\
00922     put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
00923     OPNAME ## pixels8_l2_8(dst, src, half, stride, stride, 8, 8);\
00924 }\
00925 \
00926 static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
00927     OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
00928 }\
00929 \
00930 static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
00931     uint8_t half[64];\
00932     put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
00933     OPNAME ## pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);\
00934 }\
00935 \
00936 static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
00937     uint8_t full[16*9];\
00938     uint8_t half[64];\
00939     copy_block9(full, src, 16, stride, 9);\
00940     put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
00941     OPNAME ## pixels8_l2_8(dst, full, half, stride, 16, 8, 8);\
00942 }\
00943 \
00944 static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
00945     uint8_t full[16*9];\
00946     copy_block9(full, src, 16, stride, 9);\
00947     OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
00948 }\
00949 \
00950 static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
00951     uint8_t full[16*9];\
00952     uint8_t half[64];\
00953     copy_block9(full, src, 16, stride, 9);\
00954     put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
00955     OPNAME ## pixels8_l2_8(dst, full+16, half, stride, 16, 8, 8);\
00956 }\
00957 void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
00958     uint8_t full[16*9];\
00959     uint8_t halfH[72];\
00960     uint8_t halfV[64];\
00961     uint8_t halfHV[64];\
00962     copy_block9(full, src, 16, stride, 9);\
00963     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00964     put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
00965     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00966     OPNAME ## pixels8_l4_8(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
00967 }\
00968 static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
00969     uint8_t full[16*9];\
00970     uint8_t halfH[72];\
00971     uint8_t halfHV[64];\
00972     copy_block9(full, src, 16, stride, 9);\
00973     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00974     put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
00975     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00976     OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
00977 }\
00978 void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
00979     uint8_t full[16*9];\
00980     uint8_t halfH[72];\
00981     uint8_t halfV[64];\
00982     uint8_t halfHV[64];\
00983     copy_block9(full, src, 16, stride, 9);\
00984     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00985     put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
00986     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00987     OPNAME ## pixels8_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
00988 }\
00989 static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
00990     uint8_t full[16*9];\
00991     uint8_t halfH[72];\
00992     uint8_t halfHV[64];\
00993     copy_block9(full, src, 16, stride, 9);\
00994     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00995     put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
00996     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00997     OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
00998 }\
00999 void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
01000     uint8_t full[16*9];\
01001     uint8_t halfH[72];\
01002     uint8_t halfV[64];\
01003     uint8_t halfHV[64];\
01004     copy_block9(full, src, 16, stride, 9);\
01005     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01006     put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
01007     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01008     OPNAME ## pixels8_l4_8(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
01009 }\
01010 static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
01011     uint8_t full[16*9];\
01012     uint8_t halfH[72];\
01013     uint8_t halfHV[64];\
01014     copy_block9(full, src, 16, stride, 9);\
01015     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01016     put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
01017     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01018     OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
01019 }\
01020 void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
01021     uint8_t full[16*9];\
01022     uint8_t halfH[72];\
01023     uint8_t halfV[64];\
01024     uint8_t halfHV[64];\
01025     copy_block9(full, src, 16, stride, 9);\
01026     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full  , 8, 16, 9);\
01027     put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
01028     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01029     OPNAME ## pixels8_l4_8(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
01030 }\
01031 static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
01032     uint8_t full[16*9];\
01033     uint8_t halfH[72];\
01034     uint8_t halfHV[64];\
01035     copy_block9(full, src, 16, stride, 9);\
01036     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01037     put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
01038     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01039     OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
01040 }\
01041 static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
01042     uint8_t halfH[72];\
01043     uint8_t halfHV[64];\
01044     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
01045     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01046     OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
01047 }\
01048 static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
01049     uint8_t halfH[72];\
01050     uint8_t halfHV[64];\
01051     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
01052     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01053     OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
01054 }\
01055 void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
01056     uint8_t full[16*9];\
01057     uint8_t halfH[72];\
01058     uint8_t halfV[64];\
01059     uint8_t halfHV[64];\
01060     copy_block9(full, src, 16, stride, 9);\
01061     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01062     put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
01063     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01064     OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
01065 }\
01066 static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
01067     uint8_t full[16*9];\
01068     uint8_t halfH[72];\
01069     copy_block9(full, src, 16, stride, 9);\
01070     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01071     put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
01072     OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
01073 }\
01074 void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
01075     uint8_t full[16*9];\
01076     uint8_t halfH[72];\
01077     uint8_t halfV[64];\
01078     uint8_t halfHV[64];\
01079     copy_block9(full, src, 16, stride, 9);\
01080     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01081     put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
01082     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01083     OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
01084 }\
01085 static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
01086     uint8_t full[16*9];\
01087     uint8_t halfH[72];\
01088     copy_block9(full, src, 16, stride, 9);\
01089     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01090     put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
01091     OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
01092 }\
01093 static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
01094     uint8_t halfH[72];\
01095     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
01096     OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
01097 }\
01098 \
01099 static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
01100     uint8_t half[256];\
01101     put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
01102     OPNAME ## pixels16_l2_8(dst, src, half, stride, stride, 16, 16);\
01103 }\
01104 \
01105 static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
01106     OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
01107 }\
01108 \
01109 static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
01110     uint8_t half[256];\
01111     put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
01112     OPNAME ## pixels16_l2_8(dst, src+1, half, stride, stride, 16, 16);\
01113 }\
01114 \
01115 static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
01116     uint8_t full[24*17];\
01117     uint8_t half[256];\
01118     copy_block17(full, src, 24, stride, 17);\
01119     put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
01120     OPNAME ## pixels16_l2_8(dst, full, half, stride, 24, 16, 16);\
01121 }\
01122 \
01123 static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
01124     uint8_t full[24*17];\
01125     copy_block17(full, src, 24, stride, 17);\
01126     OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
01127 }\
01128 \
01129 static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
01130     uint8_t full[24*17];\
01131     uint8_t half[256];\
01132     copy_block17(full, src, 24, stride, 17);\
01133     put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
01134     OPNAME ## pixels16_l2_8(dst, full+24, half, stride, 24, 16, 16);\
01135 }\
01136 void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
01137     uint8_t full[24*17];\
01138     uint8_t halfH[272];\
01139     uint8_t halfV[256];\
01140     uint8_t halfHV[256];\
01141     copy_block17(full, src, 24, stride, 17);\
01142     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01143     put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
01144     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01145     OPNAME ## pixels16_l4_8(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01146 }\
01147 static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
01148     uint8_t full[24*17];\
01149     uint8_t halfH[272];\
01150     uint8_t halfHV[256];\
01151     copy_block17(full, src, 24, stride, 17);\
01152     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01153     put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
01154     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01155     OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
01156 }\
01157 void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
01158     uint8_t full[24*17];\
01159     uint8_t halfH[272];\
01160     uint8_t halfV[256];\
01161     uint8_t halfHV[256];\
01162     copy_block17(full, src, 24, stride, 17);\
01163     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01164     put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
01165     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01166     OPNAME ## pixels16_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01167 }\
01168 static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
01169     uint8_t full[24*17];\
01170     uint8_t halfH[272];\
01171     uint8_t halfHV[256];\
01172     copy_block17(full, src, 24, stride, 17);\
01173     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01174     put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
01175     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01176     OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
01177 }\
01178 void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
01179     uint8_t full[24*17];\
01180     uint8_t halfH[272];\
01181     uint8_t halfV[256];\
01182     uint8_t halfHV[256];\
01183     copy_block17(full, src, 24, stride, 17);\
01184     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01185     put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
01186     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01187     OPNAME ## pixels16_l4_8(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01188 }\
01189 static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
01190     uint8_t full[24*17];\
01191     uint8_t halfH[272];\
01192     uint8_t halfHV[256];\
01193     copy_block17(full, src, 24, stride, 17);\
01194     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01195     put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
01196     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01197     OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
01198 }\
01199 void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
01200     uint8_t full[24*17];\
01201     uint8_t halfH[272];\
01202     uint8_t halfV[256];\
01203     uint8_t halfHV[256];\
01204     copy_block17(full, src, 24, stride, 17);\
01205     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full  , 16, 24, 17);\
01206     put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
01207     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01208     OPNAME ## pixels16_l4_8(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01209 }\
01210 static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
01211     uint8_t full[24*17];\
01212     uint8_t halfH[272];\
01213     uint8_t halfHV[256];\
01214     copy_block17(full, src, 24, stride, 17);\
01215     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01216     put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
01217     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01218     OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
01219 }\
01220 static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
01221     uint8_t halfH[272];\
01222     uint8_t halfHV[256];\
01223     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
01224     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01225     OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
01226 }\
01227 static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
01228     uint8_t halfH[272];\
01229     uint8_t halfHV[256];\
01230     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
01231     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01232     OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
01233 }\
01234 void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
01235     uint8_t full[24*17];\
01236     uint8_t halfH[272];\
01237     uint8_t halfV[256];\
01238     uint8_t halfHV[256];\
01239     copy_block17(full, src, 24, stride, 17);\
01240     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01241     put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
01242     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01243     OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
01244 }\
01245 static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
01246     uint8_t full[24*17];\
01247     uint8_t halfH[272];\
01248     copy_block17(full, src, 24, stride, 17);\
01249     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01250     put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
01251     OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
01252 }\
01253 void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
01254     uint8_t full[24*17];\
01255     uint8_t halfH[272];\
01256     uint8_t halfV[256];\
01257     uint8_t halfHV[256];\
01258     copy_block17(full, src, 24, stride, 17);\
01259     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01260     put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
01261     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01262     OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
01263 }\
01264 static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
01265     uint8_t full[24*17];\
01266     uint8_t halfH[272];\
01267     copy_block17(full, src, 24, stride, 17);\
01268     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01269     put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
01270     OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
01271 }\
01272 static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
01273     uint8_t halfH[272];\
01274     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
01275     OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
01276 }
01277 
01278 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
01279 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
01280 #define op_put(a, b) a = cm[((b) + 16)>>5]
01281 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
01282 
01283 QPEL_MC(0, put_       , _       , op_put)
01284 QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
01285 QPEL_MC(0, avg_       , _       , op_avg)
01286 //QPEL_MC(1, avg_no_rnd , _       , op_avg)
01287 #undef op_avg
01288 #undef op_avg_no_rnd
01289 #undef op_put
01290 #undef op_put_no_rnd
01291 
01292 #define put_qpel8_mc00_c  ff_put_pixels8x8_c
01293 #define avg_qpel8_mc00_c  ff_avg_pixels8x8_c
01294 #define put_qpel16_mc00_c ff_put_pixels16x16_c
01295 #define avg_qpel16_mc00_c ff_avg_pixels16x16_c
01296 #define put_no_rnd_qpel8_mc00_c  ff_put_pixels8x8_c
01297 #define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_8_c
01298 
01299 static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
01300     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
01301     int i;
01302 
01303     for(i=0; i<h; i++){
01304         dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
01305         dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
01306         dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
01307         dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
01308         dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
01309         dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
01310         dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
01311         dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
01312         dst+=dstStride;
01313         src+=srcStride;
01314     }
01315 }
01316 
01317 #if CONFIG_RV40_DECODER
01318 void ff_put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
01319     put_pixels16_xy2_8_c(dst, src, stride, 16);
01320 }
01321 void ff_avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
01322     avg_pixels16_xy2_8_c(dst, src, stride, 16);
01323 }
01324 void ff_put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
01325     put_pixels8_xy2_8_c(dst, src, stride, 8);
01326 }
01327 void ff_avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
01328     avg_pixels8_xy2_8_c(dst, src, stride, 8);
01329 }
01330 #endif /* CONFIG_RV40_DECODER */
01331 
01332 static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
01333     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
01334     int i;
01335 
01336     for(i=0; i<w; i++){
01337         const int src_1= src[ -srcStride];
01338         const int src0 = src[0          ];
01339         const int src1 = src[  srcStride];
01340         const int src2 = src[2*srcStride];
01341         const int src3 = src[3*srcStride];
01342         const int src4 = src[4*srcStride];
01343         const int src5 = src[5*srcStride];
01344         const int src6 = src[6*srcStride];
01345         const int src7 = src[7*srcStride];
01346         const int src8 = src[8*srcStride];
01347         const int src9 = src[9*srcStride];
01348         dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
01349         dst[1*dstStride]= cm[(9*(src1 + src2) - (src0  + src3) + 8)>>4];
01350         dst[2*dstStride]= cm[(9*(src2 + src3) - (src1  + src4) + 8)>>4];
01351         dst[3*dstStride]= cm[(9*(src3 + src4) - (src2  + src5) + 8)>>4];
01352         dst[4*dstStride]= cm[(9*(src4 + src5) - (src3  + src6) + 8)>>4];
01353         dst[5*dstStride]= cm[(9*(src5 + src6) - (src4  + src7) + 8)>>4];
01354         dst[6*dstStride]= cm[(9*(src6 + src7) - (src5  + src8) + 8)>>4];
01355         dst[7*dstStride]= cm[(9*(src7 + src8) - (src6  + src9) + 8)>>4];
01356         src++;
01357         dst++;
01358     }
01359 }
01360 
01361 static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
01362     uint8_t half[64];
01363     wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
01364     put_pixels8_l2_8(dst, src, half, stride, stride, 8, 8);
01365 }
01366 
01367 static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
01368     wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
01369 }
01370 
01371 static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
01372     uint8_t half[64];
01373     wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
01374     put_pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);
01375 }
01376 
01377 static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
01378     wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
01379 }
01380 
01381 static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
01382     uint8_t halfH[88];
01383     uint8_t halfV[64];
01384     uint8_t halfHV[64];
01385     wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
01386     wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
01387     wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
01388     put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
01389 }
01390 static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
01391     uint8_t halfH[88];
01392     uint8_t halfV[64];
01393     uint8_t halfHV[64];
01394     wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
01395     wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
01396     wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
01397     put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
01398 }
01399 static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
01400     uint8_t halfH[88];
01401     wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
01402     wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
01403 }
01404 
01405 static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){
01406     if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
01407     int x;
01408     const int strength= ff_h263_loop_filter_strength[qscale];
01409 
01410     for(x=0; x<8; x++){
01411         int d1, d2, ad1;
01412         int p0= src[x-2*stride];
01413         int p1= src[x-1*stride];
01414         int p2= src[x+0*stride];
01415         int p3= src[x+1*stride];
01416         int d = (p0 - p3 + 4*(p2 - p1)) / 8;
01417 
01418         if     (d<-2*strength) d1= 0;
01419         else if(d<-  strength) d1=-2*strength - d;
01420         else if(d<   strength) d1= d;
01421         else if(d< 2*strength) d1= 2*strength - d;
01422         else                   d1= 0;
01423 
01424         p1 += d1;
01425         p2 -= d1;
01426         if(p1&256) p1= ~(p1>>31);
01427         if(p2&256) p2= ~(p2>>31);
01428 
01429         src[x-1*stride] = p1;
01430         src[x+0*stride] = p2;
01431 
01432         ad1= FFABS(d1)>>1;
01433 
01434         d2= av_clip((p0-p3)/4, -ad1, ad1);
01435 
01436         src[x-2*stride] = p0 - d2;
01437         src[x+  stride] = p3 + d2;
01438     }
01439     }
01440 }
01441 
01442 static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){
01443     if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
01444     int y;
01445     const int strength= ff_h263_loop_filter_strength[qscale];
01446 
01447     for(y=0; y<8; y++){
01448         int d1, d2, ad1;
01449         int p0= src[y*stride-2];
01450         int p1= src[y*stride-1];
01451         int p2= src[y*stride+0];
01452         int p3= src[y*stride+1];
01453         int d = (p0 - p3 + 4*(p2 - p1)) / 8;
01454 
01455         if     (d<-2*strength) d1= 0;
01456         else if(d<-  strength) d1=-2*strength - d;
01457         else if(d<   strength) d1= d;
01458         else if(d< 2*strength) d1= 2*strength - d;
01459         else                   d1= 0;
01460 
01461         p1 += d1;
01462         p2 -= d1;
01463         if(p1&256) p1= ~(p1>>31);
01464         if(p2&256) p2= ~(p2>>31);
01465 
01466         src[y*stride-1] = p1;
01467         src[y*stride+0] = p2;
01468 
01469         ad1= FFABS(d1)>>1;
01470 
01471         d2= av_clip((p0-p3)/4, -ad1, ad1);
01472 
01473         src[y*stride-2] = p0 - d2;
01474         src[y*stride+1] = p3 + d2;
01475     }
01476     }
01477 }
01478 
01479 static void h261_loop_filter_c(uint8_t *src, int stride){
01480     int x,y,xy,yz;
01481     int temp[64];
01482 
01483     for(x=0; x<8; x++){
01484         temp[x      ] = 4*src[x           ];
01485         temp[x + 7*8] = 4*src[x + 7*stride];
01486     }
01487     for(y=1; y<7; y++){
01488         for(x=0; x<8; x++){
01489             xy = y * stride + x;
01490             yz = y * 8 + x;
01491             temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride];
01492         }
01493     }
01494 
01495     for(y=0; y<8; y++){
01496         src[  y*stride] = (temp[  y*8] + 2)>>2;
01497         src[7+y*stride] = (temp[7+y*8] + 2)>>2;
01498         for(x=1; x<7; x++){
01499             xy = y * stride + x;
01500             yz = y * 8 + x;
01501             src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4;
01502         }
01503     }
01504 }
01505 
01506 static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01507 {
01508     int s, i;
01509 
01510     s = 0;
01511     for(i=0;i<h;i++) {
01512         s += abs(pix1[0] - pix2[0]);
01513         s += abs(pix1[1] - pix2[1]);
01514         s += abs(pix1[2] - pix2[2]);
01515         s += abs(pix1[3] - pix2[3]);
01516         s += abs(pix1[4] - pix2[4]);
01517         s += abs(pix1[5] - pix2[5]);
01518         s += abs(pix1[6] - pix2[6]);
01519         s += abs(pix1[7] - pix2[7]);
01520         s += abs(pix1[8] - pix2[8]);
01521         s += abs(pix1[9] - pix2[9]);
01522         s += abs(pix1[10] - pix2[10]);
01523         s += abs(pix1[11] - pix2[11]);
01524         s += abs(pix1[12] - pix2[12]);
01525         s += abs(pix1[13] - pix2[13]);
01526         s += abs(pix1[14] - pix2[14]);
01527         s += abs(pix1[15] - pix2[15]);
01528         pix1 += line_size;
01529         pix2 += line_size;
01530     }
01531     return s;
01532 }
01533 
01534 static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01535 {
01536     int s, i;
01537 
01538     s = 0;
01539     for(i=0;i<h;i++) {
01540         s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
01541         s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
01542         s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
01543         s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
01544         s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
01545         s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
01546         s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
01547         s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
01548         s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
01549         s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
01550         s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
01551         s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
01552         s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
01553         s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
01554         s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
01555         s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
01556         pix1 += line_size;
01557         pix2 += line_size;
01558     }
01559     return s;
01560 }
01561 
01562 static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01563 {
01564     int s, i;
01565     uint8_t *pix3 = pix2 + line_size;
01566 
01567     s = 0;
01568     for(i=0;i<h;i++) {
01569         s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
01570         s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
01571         s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
01572         s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
01573         s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
01574         s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
01575         s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
01576         s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
01577         s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
01578         s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
01579         s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
01580         s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
01581         s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
01582         s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
01583         s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
01584         s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
01585         pix1 += line_size;
01586         pix2 += line_size;
01587         pix3 += line_size;
01588     }
01589     return s;
01590 }
01591 
01592 static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01593 {
01594     int s, i;
01595     uint8_t *pix3 = pix2 + line_size;
01596 
01597     s = 0;
01598     for(i=0;i<h;i++) {
01599         s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
01600         s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
01601         s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
01602         s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
01603         s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
01604         s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
01605         s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
01606         s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
01607         s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
01608         s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
01609         s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
01610         s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
01611         s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
01612         s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
01613         s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
01614         s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
01615         pix1 += line_size;
01616         pix2 += line_size;
01617         pix3 += line_size;
01618     }
01619     return s;
01620 }
01621 
01622 static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01623 {
01624     int s, i;
01625 
01626     s = 0;
01627     for(i=0;i<h;i++) {
01628         s += abs(pix1[0] - pix2[0]);
01629         s += abs(pix1[1] - pix2[1]);
01630         s += abs(pix1[2] - pix2[2]);
01631         s += abs(pix1[3] - pix2[3]);
01632         s += abs(pix1[4] - pix2[4]);
01633         s += abs(pix1[5] - pix2[5]);
01634         s += abs(pix1[6] - pix2[6]);
01635         s += abs(pix1[7] - pix2[7]);
01636         pix1 += line_size;
01637         pix2 += line_size;
01638     }
01639     return s;
01640 }
01641 
01642 static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01643 {
01644     int s, i;
01645 
01646     s = 0;
01647     for(i=0;i<h;i++) {
01648         s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
01649         s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
01650         s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
01651         s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
01652         s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
01653         s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
01654         s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
01655         s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
01656         pix1 += line_size;
01657         pix2 += line_size;
01658     }
01659     return s;
01660 }
01661 
01662 static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01663 {
01664     int s, i;
01665     uint8_t *pix3 = pix2 + line_size;
01666 
01667     s = 0;
01668     for(i=0;i<h;i++) {
01669         s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
01670         s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
01671         s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
01672         s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
01673         s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
01674         s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
01675         s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
01676         s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
01677         pix1 += line_size;
01678         pix2 += line_size;
01679         pix3 += line_size;
01680     }
01681     return s;
01682 }
01683 
01684 static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01685 {
01686     int s, i;
01687     uint8_t *pix3 = pix2 + line_size;
01688 
01689     s = 0;
01690     for(i=0;i<h;i++) {
01691         s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
01692         s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
01693         s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
01694         s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
01695         s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
01696         s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
01697         s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
01698         s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
01699         pix1 += line_size;
01700         pix2 += line_size;
01701         pix3 += line_size;
01702     }
01703     return s;
01704 }
01705 
01706 static int nsse16_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
01707     MpegEncContext *c = v;
01708     int score1=0;
01709     int score2=0;
01710     int x,y;
01711 
01712     for(y=0; y<h; y++){
01713         for(x=0; x<16; x++){
01714             score1+= (s1[x  ] - s2[x ])*(s1[x  ] - s2[x ]);
01715         }
01716         if(y+1<h){
01717             for(x=0; x<15; x++){
01718                 score2+= FFABS(  s1[x  ] - s1[x  +stride]
01719                              - s1[x+1] + s1[x+1+stride])
01720                         -FFABS(  s2[x  ] - s2[x  +stride]
01721                              - s2[x+1] + s2[x+1+stride]);
01722             }
01723         }
01724         s1+= stride;
01725         s2+= stride;
01726     }
01727 
01728     if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
01729     else  return score1 + FFABS(score2)*8;
01730 }
01731 
01732 static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
01733     MpegEncContext *c = v;
01734     int score1=0;
01735     int score2=0;
01736     int x,y;
01737 
01738     for(y=0; y<h; y++){
01739         for(x=0; x<8; x++){
01740             score1+= (s1[x  ] - s2[x ])*(s1[x  ] - s2[x ]);
01741         }
01742         if(y+1<h){
01743             for(x=0; x<7; x++){
01744                 score2+= FFABS(  s1[x  ] - s1[x  +stride]
01745                              - s1[x+1] + s1[x+1+stride])
01746                         -FFABS(  s2[x  ] - s2[x  +stride]
01747                              - s2[x+1] + s2[x+1+stride]);
01748             }
01749         }
01750         s1+= stride;
01751         s2+= stride;
01752     }
01753 
01754     if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
01755     else  return score1 + FFABS(score2)*8;
01756 }
01757 
01758 static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){
01759     int i;
01760     unsigned int sum=0;
01761 
01762     for(i=0; i<8*8; i++){
01763         int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT));
01764         int w= weight[i];
01765         b>>= RECON_SHIFT;
01766         assert(-512<b && b<512);
01767 
01768         sum += (w*b)*(w*b)>>4;
01769     }
01770     return sum>>2;
01771 }
01772 
01773 static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){
01774     int i;
01775 
01776     for(i=0; i<8*8; i++){
01777         rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
01778     }
01779 }
01780 
01789 void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last)
01790 {
01791     int i;
01792     DCTELEM temp[64];
01793 
01794     if(last<=0) return;
01795     //if(permutation[1]==1) return; //FIXME it is ok but not clean and might fail for some permutations
01796 
01797     for(i=0; i<=last; i++){
01798         const int j= scantable[i];
01799         temp[j]= block[j];
01800         block[j]=0;
01801     }
01802 
01803     for(i=0; i<=last; i++){
01804         const int j= scantable[i];
01805         const int perm_j= permutation[j];
01806         block[perm_j]= temp[j];
01807     }
01808 }
01809 
01810 static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
01811     return 0;
01812 }
01813 
01814 void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
01815     int i;
01816 
01817     memset(cmp, 0, sizeof(void*)*6);
01818 
01819     for(i=0; i<6; i++){
01820         switch(type&0xFF){
01821         case FF_CMP_SAD:
01822             cmp[i]= c->sad[i];
01823             break;
01824         case FF_CMP_SATD:
01825             cmp[i]= c->hadamard8_diff[i];
01826             break;
01827         case FF_CMP_SSE:
01828             cmp[i]= c->sse[i];
01829             break;
01830         case FF_CMP_DCT:
01831             cmp[i]= c->dct_sad[i];
01832             break;
01833         case FF_CMP_DCT264:
01834             cmp[i]= c->dct264_sad[i];
01835             break;
01836         case FF_CMP_DCTMAX:
01837             cmp[i]= c->dct_max[i];
01838             break;
01839         case FF_CMP_PSNR:
01840             cmp[i]= c->quant_psnr[i];
01841             break;
01842         case FF_CMP_BIT:
01843             cmp[i]= c->bit[i];
01844             break;
01845         case FF_CMP_RD:
01846             cmp[i]= c->rd[i];
01847             break;
01848         case FF_CMP_VSAD:
01849             cmp[i]= c->vsad[i];
01850             break;
01851         case FF_CMP_VSSE:
01852             cmp[i]= c->vsse[i];
01853             break;
01854         case FF_CMP_ZERO:
01855             cmp[i]= zero_cmp;
01856             break;
01857         case FF_CMP_NSSE:
01858             cmp[i]= c->nsse[i];
01859             break;
01860 #if CONFIG_DWT
01861         case FF_CMP_W53:
01862             cmp[i]= c->w53[i];
01863             break;
01864         case FF_CMP_W97:
01865             cmp[i]= c->w97[i];
01866             break;
01867 #endif
01868         default:
01869             av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n");
01870         }
01871     }
01872 }
01873 
01874 static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
01875     long i;
01876     for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
01877         long a = *(long*)(src+i);
01878         long b = *(long*)(dst+i);
01879         *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
01880     }
01881     for(; i<w; i++)
01882         dst[i+0] += src[i+0];
01883 }
01884 
01885 static void add_bytes_l2_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
01886     long i;
01887     for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
01888         long a = *(long*)(src1+i);
01889         long b = *(long*)(src2+i);
01890         *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
01891     }
01892     for(; i<w; i++)
01893         dst[i] = src1[i]+src2[i];
01894 }
01895 
01896 static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
01897     long i;
01898 #if !HAVE_FAST_UNALIGNED
01899     if((long)src2 & (sizeof(long)-1)){
01900         for(i=0; i+7<w; i+=8){
01901             dst[i+0] = src1[i+0]-src2[i+0];
01902             dst[i+1] = src1[i+1]-src2[i+1];
01903             dst[i+2] = src1[i+2]-src2[i+2];
01904             dst[i+3] = src1[i+3]-src2[i+3];
01905             dst[i+4] = src1[i+4]-src2[i+4];
01906             dst[i+5] = src1[i+5]-src2[i+5];
01907             dst[i+6] = src1[i+6]-src2[i+6];
01908             dst[i+7] = src1[i+7]-src2[i+7];
01909         }
01910     }else
01911 #endif
01912     for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
01913         long a = *(long*)(src1+i);
01914         long b = *(long*)(src2+i);
01915         *(long*)(dst+i) = ((a|pb_80) - (b&pb_7f)) ^ ((a^b^pb_80)&pb_80);
01916     }
01917     for(; i<w; i++)
01918         dst[i+0] = src1[i+0]-src2[i+0];
01919 }
01920 
01921 static void add_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *diff, int w, int *left, int *left_top){
01922     int i;
01923     uint8_t l, lt;
01924 
01925     l= *left;
01926     lt= *left_top;
01927 
01928     for(i=0; i<w; i++){
01929         l= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF) + diff[i];
01930         lt= src1[i];
01931         dst[i]= l;
01932     }
01933 
01934     *left= l;
01935     *left_top= lt;
01936 }
01937 
01938 static void sub_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top){
01939     int i;
01940     uint8_t l, lt;
01941 
01942     l= *left;
01943     lt= *left_top;
01944 
01945     for(i=0; i<w; i++){
01946         const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF);
01947         lt= src1[i];
01948         l= src2[i];
01949         dst[i]= l - pred;
01950     }
01951 
01952     *left= l;
01953     *left_top= lt;
01954 }
01955 
01956 static int add_hfyu_left_prediction_c(uint8_t *dst, const uint8_t *src, int w, int acc){
01957     int i;
01958 
01959     for(i=0; i<w-1; i++){
01960         acc+= src[i];
01961         dst[i]= acc;
01962         i++;
01963         acc+= src[i];
01964         dst[i]= acc;
01965     }
01966 
01967     for(; i<w; i++){
01968         acc+= src[i];
01969         dst[i]= acc;
01970     }
01971 
01972     return acc;
01973 }
01974 
01975 #if HAVE_BIGENDIAN
01976 #define B 3
01977 #define G 2
01978 #define R 1
01979 #define A 0
01980 #else
01981 #define B 0
01982 #define G 1
01983 #define R 2
01984 #define A 3
01985 #endif
01986 static void add_hfyu_left_prediction_bgr32_c(uint8_t *dst, const uint8_t *src, int w, int *red, int *green, int *blue, int *alpha){
01987     int i;
01988     int r,g,b,a;
01989     r= *red;
01990     g= *green;
01991     b= *blue;
01992     a= *alpha;
01993 
01994     for(i=0; i<w; i++){
01995         b+= src[4*i+B];
01996         g+= src[4*i+G];
01997         r+= src[4*i+R];
01998         a+= src[4*i+A];
01999 
02000         dst[4*i+B]= b;
02001         dst[4*i+G]= g;
02002         dst[4*i+R]= r;
02003         dst[4*i+A]= a;
02004     }
02005 
02006     *red= r;
02007     *green= g;
02008     *blue= b;
02009     *alpha= a;
02010 }
02011 #undef B
02012 #undef G
02013 #undef R
02014 #undef A
02015 
02016 #define BUTTERFLY2(o1,o2,i1,i2) \
02017 o1= (i1)+(i2);\
02018 o2= (i1)-(i2);
02019 
02020 #define BUTTERFLY1(x,y) \
02021 {\
02022     int a,b;\
02023     a= x;\
02024     b= y;\
02025     x= a+b;\
02026     y= a-b;\
02027 }
02028 
02029 #define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y)))
02030 
02031 static int hadamard8_diff8x8_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){
02032     int i;
02033     int temp[64];
02034     int sum=0;
02035 
02036     assert(h==8);
02037 
02038     for(i=0; i<8; i++){
02039         //FIXME try pointer walks
02040         BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
02041         BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
02042         BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
02043         BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
02044 
02045         BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
02046         BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
02047         BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
02048         BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
02049 
02050         BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
02051         BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
02052         BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
02053         BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
02054     }
02055 
02056     for(i=0; i<8; i++){
02057         BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
02058         BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
02059         BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
02060         BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
02061 
02062         BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
02063         BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
02064         BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
02065         BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
02066 
02067         sum +=
02068              BUTTERFLYA(temp[8*0+i], temp[8*4+i])
02069             +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
02070             +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
02071             +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
02072     }
02073     return sum;
02074 }
02075 
02076 static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s, uint8_t *src, uint8_t *dummy, int stride, int h){
02077     int i;
02078     int temp[64];
02079     int sum=0;
02080 
02081     assert(h==8);
02082 
02083     for(i=0; i<8; i++){
02084         //FIXME try pointer walks
02085         BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]);
02086         BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]);
02087         BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]);
02088         BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]);
02089 
02090         BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
02091         BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
02092         BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
02093         BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
02094 
02095         BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
02096         BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
02097         BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
02098         BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
02099     }
02100 
02101     for(i=0; i<8; i++){
02102         BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
02103         BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
02104         BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
02105         BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
02106 
02107         BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
02108         BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
02109         BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
02110         BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
02111 
02112         sum +=
02113              BUTTERFLYA(temp[8*0+i], temp[8*4+i])
02114             +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
02115             +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
02116             +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
02117     }
02118 
02119     sum -= FFABS(temp[8*0] + temp[8*4]); // -mean
02120 
02121     return sum;
02122 }
02123 
02124 static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02125     MpegEncContext * const s= (MpegEncContext *)c;
02126     LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
02127 
02128     assert(h==8);
02129 
02130     s->dsp.diff_pixels(temp, src1, src2, stride);
02131     s->dsp.fdct(temp);
02132     return s->dsp.sum_abs_dctelem(temp);
02133 }
02134 
02135 #if CONFIG_GPL
02136 #define DCT8_1D {\
02137     const int s07 = SRC(0) + SRC(7);\
02138     const int s16 = SRC(1) + SRC(6);\
02139     const int s25 = SRC(2) + SRC(5);\
02140     const int s34 = SRC(3) + SRC(4);\
02141     const int a0 = s07 + s34;\
02142     const int a1 = s16 + s25;\
02143     const int a2 = s07 - s34;\
02144     const int a3 = s16 - s25;\
02145     const int d07 = SRC(0) - SRC(7);\
02146     const int d16 = SRC(1) - SRC(6);\
02147     const int d25 = SRC(2) - SRC(5);\
02148     const int d34 = SRC(3) - SRC(4);\
02149     const int a4 = d16 + d25 + (d07 + (d07>>1));\
02150     const int a5 = d07 - d34 - (d25 + (d25>>1));\
02151     const int a6 = d07 + d34 - (d16 + (d16>>1));\
02152     const int a7 = d16 - d25 + (d34 + (d34>>1));\
02153     DST(0,  a0 + a1     ) ;\
02154     DST(1,  a4 + (a7>>2)) ;\
02155     DST(2,  a2 + (a3>>1)) ;\
02156     DST(3,  a5 + (a6>>2)) ;\
02157     DST(4,  a0 - a1     ) ;\
02158     DST(5,  a6 - (a5>>2)) ;\
02159     DST(6, (a2>>1) - a3 ) ;\
02160     DST(7, (a4>>2) - a7 ) ;\
02161 }
02162 
02163 static int dct264_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02164     MpegEncContext * const s= (MpegEncContext *)c;
02165     DCTELEM dct[8][8];
02166     int i;
02167     int sum=0;
02168 
02169     s->dsp.diff_pixels(dct[0], src1, src2, stride);
02170 
02171 #define SRC(x) dct[i][x]
02172 #define DST(x,v) dct[i][x]= v
02173     for( i = 0; i < 8; i++ )
02174         DCT8_1D
02175 #undef SRC
02176 #undef DST
02177 
02178 #define SRC(x) dct[x][i]
02179 #define DST(x,v) sum += FFABS(v)
02180     for( i = 0; i < 8; i++ )
02181         DCT8_1D
02182 #undef SRC
02183 #undef DST
02184     return sum;
02185 }
02186 #endif
02187 
02188 static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02189     MpegEncContext * const s= (MpegEncContext *)c;
02190     LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
02191     int sum=0, i;
02192 
02193     assert(h==8);
02194 
02195     s->dsp.diff_pixels(temp, src1, src2, stride);
02196     s->dsp.fdct(temp);
02197 
02198     for(i=0; i<64; i++)
02199         sum= FFMAX(sum, FFABS(temp[i]));
02200 
02201     return sum;
02202 }
02203 
02204 static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02205     MpegEncContext * const s= (MpegEncContext *)c;
02206     LOCAL_ALIGNED_16(DCTELEM, temp, [64*2]);
02207     DCTELEM * const bak = temp+64;
02208     int sum=0, i;
02209 
02210     assert(h==8);
02211     s->mb_intra=0;
02212 
02213     s->dsp.diff_pixels(temp, src1, src2, stride);
02214 
02215     memcpy(bak, temp, 64*sizeof(DCTELEM));
02216 
02217     s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
02218     s->dct_unquantize_inter(s, temp, 0, s->qscale);
02219     ff_simple_idct_8(temp); //FIXME
02220 
02221     for(i=0; i<64; i++)
02222         sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
02223 
02224     return sum;
02225 }
02226 
02227 static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02228     MpegEncContext * const s= (MpegEncContext *)c;
02229     const uint8_t *scantable= s->intra_scantable.permutated;
02230     LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
02231     LOCAL_ALIGNED_16(uint8_t, lsrc1, [64]);
02232     LOCAL_ALIGNED_16(uint8_t, lsrc2, [64]);
02233     int i, last, run, bits, level, distortion, start_i;
02234     const int esc_length= s->ac_esc_length;
02235     uint8_t * length;
02236     uint8_t * last_length;
02237 
02238     assert(h==8);
02239 
02240     copy_block8(lsrc1, src1, 8, stride, 8);
02241     copy_block8(lsrc2, src2, 8, stride, 8);
02242 
02243     s->dsp.diff_pixels(temp, lsrc1, lsrc2, 8);
02244 
02245     s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
02246 
02247     bits=0;
02248 
02249     if (s->mb_intra) {
02250         start_i = 1;
02251         length     = s->intra_ac_vlc_length;
02252         last_length= s->intra_ac_vlc_last_length;
02253         bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
02254     } else {
02255         start_i = 0;
02256         length     = s->inter_ac_vlc_length;
02257         last_length= s->inter_ac_vlc_last_length;
02258     }
02259 
02260     if(last>=start_i){
02261         run=0;
02262         for(i=start_i; i<last; i++){
02263             int j= scantable[i];
02264             level= temp[j];
02265 
02266             if(level){
02267                 level+=64;
02268                 if((level&(~127)) == 0){
02269                     bits+= length[UNI_AC_ENC_INDEX(run, level)];
02270                 }else
02271                     bits+= esc_length;
02272                 run=0;
02273             }else
02274                 run++;
02275         }
02276         i= scantable[last];
02277 
02278         level= temp[i] + 64;
02279 
02280         assert(level - 64);
02281 
02282         if((level&(~127)) == 0){
02283             bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
02284         }else
02285             bits+= esc_length;
02286 
02287     }
02288 
02289     if(last>=0){
02290         if(s->mb_intra)
02291             s->dct_unquantize_intra(s, temp, 0, s->qscale);
02292         else
02293             s->dct_unquantize_inter(s, temp, 0, s->qscale);
02294     }
02295 
02296     s->dsp.idct_add(lsrc2, 8, temp);
02297 
02298     distortion= s->dsp.sse[1](NULL, lsrc2, lsrc1, 8, 8);
02299 
02300     return distortion + ((bits*s->qscale*s->qscale*109 + 64)>>7);
02301 }
02302 
02303 static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02304     MpegEncContext * const s= (MpegEncContext *)c;
02305     const uint8_t *scantable= s->intra_scantable.permutated;
02306     LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
02307     int i, last, run, bits, level, start_i;
02308     const int esc_length= s->ac_esc_length;
02309     uint8_t * length;
02310     uint8_t * last_length;
02311 
02312     assert(h==8);
02313 
02314     s->dsp.diff_pixels(temp, src1, src2, stride);
02315 
02316     s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
02317 
02318     bits=0;
02319 
02320     if (s->mb_intra) {
02321         start_i = 1;
02322         length     = s->intra_ac_vlc_length;
02323         last_length= s->intra_ac_vlc_last_length;
02324         bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
02325     } else {
02326         start_i = 0;
02327         length     = s->inter_ac_vlc_length;
02328         last_length= s->inter_ac_vlc_last_length;
02329     }
02330 
02331     if(last>=start_i){
02332         run=0;
02333         for(i=start_i; i<last; i++){
02334             int j= scantable[i];
02335             level= temp[j];
02336 
02337             if(level){
02338                 level+=64;
02339                 if((level&(~127)) == 0){
02340                     bits+= length[UNI_AC_ENC_INDEX(run, level)];
02341                 }else
02342                     bits+= esc_length;
02343                 run=0;
02344             }else
02345                 run++;
02346         }
02347         i= scantable[last];
02348 
02349         level= temp[i] + 64;
02350 
02351         assert(level - 64);
02352 
02353         if((level&(~127)) == 0){
02354             bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
02355         }else
02356             bits+= esc_length;
02357     }
02358 
02359     return bits;
02360 }
02361 
02362 #define VSAD_INTRA(size) \
02363 static int vsad_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
02364     int score=0;                                                                                            \
02365     int x,y;                                                                                                \
02366                                                                                                             \
02367     for(y=1; y<h; y++){                                                                                     \
02368         for(x=0; x<size; x+=4){                                                                             \
02369             score+= FFABS(s[x  ] - s[x  +stride]) + FFABS(s[x+1] - s[x+1+stride])                           \
02370                    +FFABS(s[x+2] - s[x+2+stride]) + FFABS(s[x+3] - s[x+3+stride]);                          \
02371         }                                                                                                   \
02372         s+= stride;                                                                                         \
02373     }                                                                                                       \
02374                                                                                                             \
02375     return score;                                                                                           \
02376 }
02377 VSAD_INTRA(8)
02378 VSAD_INTRA(16)
02379 
02380 static int vsad16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
02381     int score=0;
02382     int x,y;
02383 
02384     for(y=1; y<h; y++){
02385         for(x=0; x<16; x++){
02386             score+= FFABS(s1[x  ] - s2[x ] - s1[x  +stride] + s2[x +stride]);
02387         }
02388         s1+= stride;
02389         s2+= stride;
02390     }
02391 
02392     return score;
02393 }
02394 
02395 #define SQ(a) ((a)*(a))
02396 #define VSSE_INTRA(size) \
02397 static int vsse_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
02398     int score=0;                                                                                            \
02399     int x,y;                                                                                                \
02400                                                                                                             \
02401     for(y=1; y<h; y++){                                                                                     \
02402         for(x=0; x<size; x+=4){                                                                               \
02403             score+= SQ(s[x  ] - s[x  +stride]) + SQ(s[x+1] - s[x+1+stride])                                 \
02404                    +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]);                                \
02405         }                                                                                                   \
02406         s+= stride;                                                                                         \
02407     }                                                                                                       \
02408                                                                                                             \
02409     return score;                                                                                           \
02410 }
02411 VSSE_INTRA(8)
02412 VSSE_INTRA(16)
02413 
02414 static int vsse16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
02415     int score=0;
02416     int x,y;
02417 
02418     for(y=1; y<h; y++){
02419         for(x=0; x<16; x++){
02420             score+= SQ(s1[x  ] - s2[x ] - s1[x  +stride] + s2[x +stride]);
02421         }
02422         s1+= stride;
02423         s2+= stride;
02424     }
02425 
02426     return score;
02427 }
02428 
02429 static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2,
02430                                int size){
02431     int score=0;
02432     int i;
02433     for(i=0; i<size; i++)
02434         score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]);
02435     return score;
02436 }
02437 
02438 WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
02439 WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
02440 WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
02441 #if CONFIG_GPL
02442 WRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c)
02443 #endif
02444 WRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c)
02445 WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
02446 WRAPPER8_16_SQ(rd8x8_c, rd16_c)
02447 WRAPPER8_16_SQ(bit8x8_c, bit16_c)
02448 
02449 static void vector_fmul_c(float *dst, const float *src0, const float *src1, int len){
02450     int i;
02451     for(i=0; i<len; i++)
02452         dst[i] = src0[i] * src1[i];
02453 }
02454 
02455 static void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1, int len){
02456     int i;
02457     src1 += len-1;
02458     for(i=0; i<len; i++)
02459         dst[i] = src0[i] * src1[-i];
02460 }
02461 
02462 static void vector_fmul_add_c(float *dst, const float *src0, const float *src1, const float *src2, int len){
02463     int i;
02464     for(i=0; i<len; i++)
02465         dst[i] = src0[i] * src1[i] + src2[i];
02466 }
02467 
02468 static void vector_fmul_window_c(float *dst, const float *src0,
02469                                  const float *src1, const float *win, int len)
02470 {
02471     int i,j;
02472     dst += len;
02473     win += len;
02474     src0+= len;
02475     for(i=-len, j=len-1; i<0; i++, j--) {
02476         float s0 = src0[i];
02477         float s1 = src1[j];
02478         float wi = win[i];
02479         float wj = win[j];
02480         dst[i] = s0*wj - s1*wi;
02481         dst[j] = s0*wi + s1*wj;
02482     }
02483 }
02484 
02485 static void vector_fmul_scalar_c(float *dst, const float *src, float mul,
02486                                  int len)
02487 {
02488     int i;
02489     for (i = 0; i < len; i++)
02490         dst[i] = src[i] * mul;
02491 }
02492 
02493 static void vector_fmac_scalar_c(float *dst, const float *src, float mul,
02494                                  int len)
02495 {
02496     int i;
02497     for (i = 0; i < len; i++)
02498         dst[i] += src[i] * mul;
02499 }
02500 
02501 static void butterflies_float_c(float *restrict v1, float *restrict v2,
02502                                 int len)
02503 {
02504     int i;
02505     for (i = 0; i < len; i++) {
02506         float t = v1[i] - v2[i];
02507         v1[i] += v2[i];
02508         v2[i] = t;
02509     }
02510 }
02511 
02512 static void butterflies_float_interleave_c(float *dst, const float *src0,
02513                                            const float *src1, int len)
02514 {
02515     int i;
02516     for (i = 0; i < len; i++) {
02517         float f1 = src0[i];
02518         float f2 = src1[i];
02519         dst[2*i    ] = f1 + f2;
02520         dst[2*i + 1] = f1 - f2;
02521     }
02522 }
02523 
02524 static float scalarproduct_float_c(const float *v1, const float *v2, int len)
02525 {
02526     float p = 0.0;
02527     int i;
02528 
02529     for (i = 0; i < len; i++)
02530         p += v1[i] * v2[i];
02531 
02532     return p;
02533 }
02534 
02535 static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini,
02536                    uint32_t maxi, uint32_t maxisign)
02537 {
02538 
02539     if(a > mini) return mini;
02540     else if((a^(1U<<31)) > maxisign) return maxi;
02541     else return a;
02542 }
02543 
02544 static void vector_clipf_c_opposite_sign(float *dst, const float *src, float *min, float *max, int len){
02545     int i;
02546     uint32_t mini = *(uint32_t*)min;
02547     uint32_t maxi = *(uint32_t*)max;
02548     uint32_t maxisign = maxi ^ (1U<<31);
02549     uint32_t *dsti = (uint32_t*)dst;
02550     const uint32_t *srci = (const uint32_t*)src;
02551     for(i=0; i<len; i+=8) {
02552         dsti[i + 0] = clipf_c_one(srci[i + 0], mini, maxi, maxisign);
02553         dsti[i + 1] = clipf_c_one(srci[i + 1], mini, maxi, maxisign);
02554         dsti[i + 2] = clipf_c_one(srci[i + 2], mini, maxi, maxisign);
02555         dsti[i + 3] = clipf_c_one(srci[i + 3], mini, maxi, maxisign);
02556         dsti[i + 4] = clipf_c_one(srci[i + 4], mini, maxi, maxisign);
02557         dsti[i + 5] = clipf_c_one(srci[i + 5], mini, maxi, maxisign);
02558         dsti[i + 6] = clipf_c_one(srci[i + 6], mini, maxi, maxisign);
02559         dsti[i + 7] = clipf_c_one(srci[i + 7], mini, maxi, maxisign);
02560     }
02561 }
02562 static void vector_clipf_c(float *dst, const float *src, float min, float max, int len){
02563     int i;
02564     if(min < 0 && max > 0) {
02565         vector_clipf_c_opposite_sign(dst, src, &min, &max, len);
02566     } else {
02567         for(i=0; i < len; i+=8) {
02568             dst[i    ] = av_clipf(src[i    ], min, max);
02569             dst[i + 1] = av_clipf(src[i + 1], min, max);
02570             dst[i + 2] = av_clipf(src[i + 2], min, max);
02571             dst[i + 3] = av_clipf(src[i + 3], min, max);
02572             dst[i + 4] = av_clipf(src[i + 4], min, max);
02573             dst[i + 5] = av_clipf(src[i + 5], min, max);
02574             dst[i + 6] = av_clipf(src[i + 6], min, max);
02575             dst[i + 7] = av_clipf(src[i + 7], min, max);
02576         }
02577     }
02578 }
02579 
02580 static int32_t scalarproduct_int16_c(const int16_t * v1, const int16_t * v2, int order, int shift)
02581 {
02582     int res = 0;
02583 
02584     while (order--)
02585         res += (*v1++ * *v2++) >> shift;
02586 
02587     return res;
02588 }
02589 
02590 static int32_t scalarproduct_and_madd_int16_c(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul)
02591 {
02592     int res = 0;
02593     while (order--) {
02594         res   += *v1 * *v2++;
02595         *v1++ += mul * *v3++;
02596     }
02597     return res;
02598 }
02599 
02600 static void apply_window_int16_c(int16_t *output, const int16_t *input,
02601                                  const int16_t *window, unsigned int len)
02602 {
02603     int i;
02604     int len2 = len >> 1;
02605 
02606     for (i = 0; i < len2; i++) {
02607         int16_t w       = window[i];
02608         output[i]       = (MUL16(input[i],       w) + (1 << 14)) >> 15;
02609         output[len-i-1] = (MUL16(input[len-i-1], w) + (1 << 14)) >> 15;
02610     }
02611 }
02612 
02613 static void vector_clip_int32_c(int32_t *dst, const int32_t *src, int32_t min,
02614                                 int32_t max, unsigned int len)
02615 {
02616     do {
02617         *dst++ = av_clip(*src++, min, max);
02618         *dst++ = av_clip(*src++, min, max);
02619         *dst++ = av_clip(*src++, min, max);
02620         *dst++ = av_clip(*src++, min, max);
02621         *dst++ = av_clip(*src++, min, max);
02622         *dst++ = av_clip(*src++, min, max);
02623         *dst++ = av_clip(*src++, min, max);
02624         *dst++ = av_clip(*src++, min, max);
02625         len -= 8;
02626     } while (len > 0);
02627 }
02628 
02629 #define W0 2048
02630 #define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */
02631 #define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */
02632 #define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */
02633 #define W4 2048 /* 2048*sqrt (2)*cos (4*pi/16) */
02634 #define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */
02635 #define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */
02636 #define W7 565  /* 2048*sqrt (2)*cos (7*pi/16) */
02637 
02638 static void wmv2_idct_row(short * b)
02639 {
02640     int s1,s2;
02641     int a0,a1,a2,a3,a4,a5,a6,a7;
02642     /*step 1*/
02643     a1 = W1*b[1]+W7*b[7];
02644     a7 = W7*b[1]-W1*b[7];
02645     a5 = W5*b[5]+W3*b[3];
02646     a3 = W3*b[5]-W5*b[3];
02647     a2 = W2*b[2]+W6*b[6];
02648     a6 = W6*b[2]-W2*b[6];
02649     a0 = W0*b[0]+W0*b[4];
02650     a4 = W0*b[0]-W0*b[4];
02651     /*step 2*/
02652     s1 = (181*(a1-a5+a7-a3)+128)>>8;//1,3,5,7,
02653     s2 = (181*(a1-a5-a7+a3)+128)>>8;
02654     /*step 3*/
02655     b[0] = (a0+a2+a1+a5 + (1<<7))>>8;
02656     b[1] = (a4+a6 +s1   + (1<<7))>>8;
02657     b[2] = (a4-a6 +s2   + (1<<7))>>8;
02658     b[3] = (a0-a2+a7+a3 + (1<<7))>>8;
02659     b[4] = (a0-a2-a7-a3 + (1<<7))>>8;
02660     b[5] = (a4-a6 -s2   + (1<<7))>>8;
02661     b[6] = (a4+a6 -s1   + (1<<7))>>8;
02662     b[7] = (a0+a2-a1-a5 + (1<<7))>>8;
02663 }
02664 static void wmv2_idct_col(short * b)
02665 {
02666     int s1,s2;
02667     int a0,a1,a2,a3,a4,a5,a6,a7;
02668     /*step 1, with extended precision*/
02669     a1 = (W1*b[8*1]+W7*b[8*7] + 4)>>3;
02670     a7 = (W7*b[8*1]-W1*b[8*7] + 4)>>3;
02671     a5 = (W5*b[8*5]+W3*b[8*3] + 4)>>3;
02672     a3 = (W3*b[8*5]-W5*b[8*3] + 4)>>3;
02673     a2 = (W2*b[8*2]+W6*b[8*6] + 4)>>3;
02674     a6 = (W6*b[8*2]-W2*b[8*6] + 4)>>3;
02675     a0 = (W0*b[8*0]+W0*b[8*4]    )>>3;
02676     a4 = (W0*b[8*0]-W0*b[8*4]    )>>3;
02677     /*step 2*/
02678     s1 = (181*(a1-a5+a7-a3)+128)>>8;
02679     s2 = (181*(a1-a5-a7+a3)+128)>>8;
02680     /*step 3*/
02681     b[8*0] = (a0+a2+a1+a5 + (1<<13))>>14;
02682     b[8*1] = (a4+a6 +s1   + (1<<13))>>14;
02683     b[8*2] = (a4-a6 +s2   + (1<<13))>>14;
02684     b[8*3] = (a0-a2+a7+a3 + (1<<13))>>14;
02685 
02686     b[8*4] = (a0-a2-a7-a3 + (1<<13))>>14;
02687     b[8*5] = (a4-a6 -s2   + (1<<13))>>14;
02688     b[8*6] = (a4+a6 -s1   + (1<<13))>>14;
02689     b[8*7] = (a0+a2-a1-a5 + (1<<13))>>14;
02690 }
02691 void ff_wmv2_idct_c(short * block){
02692     int i;
02693 
02694     for(i=0;i<64;i+=8){
02695         wmv2_idct_row(block+i);
02696     }
02697     for(i=0;i<8;i++){
02698         wmv2_idct_col(block+i);
02699     }
02700 }
02701 /* XXX: those functions should be suppressed ASAP when all IDCTs are
02702  converted */
02703 static void ff_wmv2_idct_put_c(uint8_t *dest, int line_size, DCTELEM *block)
02704 {
02705     ff_wmv2_idct_c(block);
02706     ff_put_pixels_clamped_c(block, dest, line_size);
02707 }
02708 static void ff_wmv2_idct_add_c(uint8_t *dest, int line_size, DCTELEM *block)
02709 {
02710     ff_wmv2_idct_c(block);
02711     ff_add_pixels_clamped_c(block, dest, line_size);
02712 }
02713 static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
02714 {
02715     j_rev_dct (block);
02716     ff_put_pixels_clamped_c(block, dest, line_size);
02717 }
02718 static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
02719 {
02720     j_rev_dct (block);
02721     ff_add_pixels_clamped_c(block, dest, line_size);
02722 }
02723 
02724 static void ff_jref_idct4_put(uint8_t *dest, int line_size, DCTELEM *block)
02725 {
02726     j_rev_dct4 (block);
02727     put_pixels_clamped4_c(block, dest, line_size);
02728 }
02729 static void ff_jref_idct4_add(uint8_t *dest, int line_size, DCTELEM *block)
02730 {
02731     j_rev_dct4 (block);
02732     add_pixels_clamped4_c(block, dest, line_size);
02733 }
02734 
02735 static void ff_jref_idct2_put(uint8_t *dest, int line_size, DCTELEM *block)
02736 {
02737     j_rev_dct2 (block);
02738     put_pixels_clamped2_c(block, dest, line_size);
02739 }
02740 static void ff_jref_idct2_add(uint8_t *dest, int line_size, DCTELEM *block)
02741 {
02742     j_rev_dct2 (block);
02743     add_pixels_clamped2_c(block, dest, line_size);
02744 }
02745 
02746 static void ff_jref_idct1_put(uint8_t *dest, int line_size, DCTELEM *block)
02747 {
02748     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
02749 
02750     dest[0] = cm[(block[0] + 4)>>3];
02751 }
02752 static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block)
02753 {
02754     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
02755 
02756     dest[0] = cm[dest[0] + ((block[0] + 4)>>3)];
02757 }
02758 
02759 static void just_return(void *mem av_unused, int stride av_unused, int h av_unused) { return; }
02760 
02761 /* init static data */
02762 av_cold void dsputil_static_init(void)
02763 {
02764     int i;
02765 
02766     for(i=0;i<256;i++) ff_cropTbl[i + MAX_NEG_CROP] = i;
02767     for(i=0;i<MAX_NEG_CROP;i++) {
02768         ff_cropTbl[i] = 0;
02769         ff_cropTbl[i + MAX_NEG_CROP + 256] = 255;
02770     }
02771 
02772     for(i=0;i<512;i++) {
02773         ff_squareTbl[i] = (i - 256) * (i - 256);
02774     }
02775 
02776     for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
02777 }
02778 
02779 int ff_check_alignment(void){
02780     static int did_fail=0;
02781     LOCAL_ALIGNED_16(int, aligned, [4]);
02782 
02783     if((intptr_t)aligned & 15){
02784         if(!did_fail){
02785 #if HAVE_MMX || HAVE_ALTIVEC
02786             av_log(NULL, AV_LOG_ERROR,
02787                 "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
02788                 "and may be very slow or crash. This is not a bug in libavcodec,\n"
02789                 "but in the compiler. You may try recompiling using gcc >= 4.2.\n"
02790                 "Do not report crashes to Libav developers.\n");
02791 #endif
02792             did_fail=1;
02793         }
02794         return -1;
02795     }
02796     return 0;
02797 }
02798 
02799 av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
02800 {
02801     int i;
02802 
02803     ff_check_alignment();
02804 
02805 #if CONFIG_ENCODERS
02806     if (avctx->bits_per_raw_sample == 10) {
02807         c->fdct    = ff_jpeg_fdct_islow_10;
02808         c->fdct248 = ff_fdct248_islow_10;
02809     } else {
02810         if(avctx->dct_algo==FF_DCT_FASTINT) {
02811             c->fdct    = fdct_ifast;
02812             c->fdct248 = fdct_ifast248;
02813         }
02814         else if(avctx->dct_algo==FF_DCT_FAAN) {
02815             c->fdct    = ff_faandct;
02816             c->fdct248 = ff_faandct248;
02817         }
02818         else {
02819             c->fdct    = ff_jpeg_fdct_islow_8; //slow/accurate/default
02820             c->fdct248 = ff_fdct248_islow_8;
02821         }
02822     }
02823 #endif //CONFIG_ENCODERS
02824 
02825     if(avctx->lowres==1){
02826         c->idct_put= ff_jref_idct4_put;
02827         c->idct_add= ff_jref_idct4_add;
02828         c->idct    = j_rev_dct4;
02829         c->idct_permutation_type= FF_NO_IDCT_PERM;
02830     }else if(avctx->lowres==2){
02831         c->idct_put= ff_jref_idct2_put;
02832         c->idct_add= ff_jref_idct2_add;
02833         c->idct    = j_rev_dct2;
02834         c->idct_permutation_type= FF_NO_IDCT_PERM;
02835     }else if(avctx->lowres==3){
02836         c->idct_put= ff_jref_idct1_put;
02837         c->idct_add= ff_jref_idct1_add;
02838         c->idct    = j_rev_dct1;
02839         c->idct_permutation_type= FF_NO_IDCT_PERM;
02840     }else{
02841         if (avctx->bits_per_raw_sample == 10) {
02842             c->idct_put              = ff_simple_idct_put_10;
02843             c->idct_add              = ff_simple_idct_add_10;
02844             c->idct                  = ff_simple_idct_10;
02845             c->idct_permutation_type = FF_NO_IDCT_PERM;
02846         } else {
02847         if(avctx->idct_algo==FF_IDCT_INT){
02848             c->idct_put= ff_jref_idct_put;
02849             c->idct_add= ff_jref_idct_add;
02850             c->idct    = j_rev_dct;
02851             c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
02852         }else if((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER || CONFIG_VP6_DECODER ) &&
02853                 avctx->idct_algo==FF_IDCT_VP3){
02854             c->idct_put= ff_vp3_idct_put_c;
02855             c->idct_add= ff_vp3_idct_add_c;
02856             c->idct    = ff_vp3_idct_c;
02857             c->idct_permutation_type= FF_NO_IDCT_PERM;
02858         }else if(avctx->idct_algo==FF_IDCT_WMV2){
02859             c->idct_put= ff_wmv2_idct_put_c;
02860             c->idct_add= ff_wmv2_idct_add_c;
02861             c->idct    = ff_wmv2_idct_c;
02862             c->idct_permutation_type= FF_NO_IDCT_PERM;
02863         }else if(avctx->idct_algo==FF_IDCT_FAAN){
02864             c->idct_put= ff_faanidct_put;
02865             c->idct_add= ff_faanidct_add;
02866             c->idct    = ff_faanidct;
02867             c->idct_permutation_type= FF_NO_IDCT_PERM;
02868         }else if(CONFIG_EATGQ_DECODER && avctx->idct_algo==FF_IDCT_EA) {
02869             c->idct_put= ff_ea_idct_put_c;
02870             c->idct_permutation_type= FF_NO_IDCT_PERM;
02871         }else{ //accurate/default
02872             c->idct_put = ff_simple_idct_put_8;
02873             c->idct_add = ff_simple_idct_add_8;
02874             c->idct     = ff_simple_idct_8;
02875             c->idct_permutation_type= FF_NO_IDCT_PERM;
02876         }
02877         }
02878     }
02879 
02880     c->diff_pixels = diff_pixels_c;
02881     c->put_pixels_clamped = ff_put_pixels_clamped_c;
02882     c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_c;
02883     c->add_pixels_clamped = ff_add_pixels_clamped_c;
02884     c->sum_abs_dctelem = sum_abs_dctelem_c;
02885     c->gmc1 = gmc1_c;
02886     c->gmc = ff_gmc_c;
02887     c->pix_sum = pix_sum_c;
02888     c->pix_norm1 = pix_norm1_c;
02889 
02890     c->fill_block_tab[0] = fill_block16_c;
02891     c->fill_block_tab[1] = fill_block8_c;
02892 
02893     /* TODO [0] 16  [1] 8 */
02894     c->pix_abs[0][0] = pix_abs16_c;
02895     c->pix_abs[0][1] = pix_abs16_x2_c;
02896     c->pix_abs[0][2] = pix_abs16_y2_c;
02897     c->pix_abs[0][3] = pix_abs16_xy2_c;
02898     c->pix_abs[1][0] = pix_abs8_c;
02899     c->pix_abs[1][1] = pix_abs8_x2_c;
02900     c->pix_abs[1][2] = pix_abs8_y2_c;
02901     c->pix_abs[1][3] = pix_abs8_xy2_c;
02902 
02903     c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c;
02904     c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c;
02905     c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c;
02906     c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c;
02907     c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c;
02908     c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c;
02909     c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c;
02910     c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c;
02911     c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c;
02912 
02913     c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c;
02914     c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c;
02915     c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c;
02916     c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c;
02917     c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c;
02918     c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c;
02919     c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c;
02920     c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c;
02921     c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c;
02922 
02923 #define dspfunc(PFX, IDX, NUM) \
02924     c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
02925     c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
02926     c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
02927     c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
02928     c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
02929     c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
02930     c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
02931     c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
02932     c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
02933     c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
02934     c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
02935     c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
02936     c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
02937     c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
02938     c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
02939     c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
02940 
02941     dspfunc(put_qpel, 0, 16);
02942     dspfunc(put_no_rnd_qpel, 0, 16);
02943 
02944     dspfunc(avg_qpel, 0, 16);
02945     /* dspfunc(avg_no_rnd_qpel, 0, 16); */
02946 
02947     dspfunc(put_qpel, 1, 8);
02948     dspfunc(put_no_rnd_qpel, 1, 8);
02949 
02950     dspfunc(avg_qpel, 1, 8);
02951     /* dspfunc(avg_no_rnd_qpel, 1, 8); */
02952 
02953 #undef dspfunc
02954 
02955 #if CONFIG_MLP_DECODER || CONFIG_TRUEHD_DECODER
02956     ff_mlp_init(c, avctx);
02957 #endif
02958 #if CONFIG_WMV2_DECODER || CONFIG_VC1_DECODER
02959     ff_intrax8dsp_init(c,avctx);
02960 #endif
02961 
02962     c->put_mspel_pixels_tab[0]= ff_put_pixels8x8_c;
02963     c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
02964     c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c;
02965     c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;
02966     c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c;
02967     c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;
02968     c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
02969     c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
02970 
02971 #define SET_CMP_FUNC(name) \
02972     c->name[0]= name ## 16_c;\
02973     c->name[1]= name ## 8x8_c;
02974 
02975     SET_CMP_FUNC(hadamard8_diff)
02976     c->hadamard8_diff[4]= hadamard8_intra16_c;
02977     c->hadamard8_diff[5]= hadamard8_intra8x8_c;
02978     SET_CMP_FUNC(dct_sad)
02979     SET_CMP_FUNC(dct_max)
02980 #if CONFIG_GPL
02981     SET_CMP_FUNC(dct264_sad)
02982 #endif
02983     c->sad[0]= pix_abs16_c;
02984     c->sad[1]= pix_abs8_c;
02985     c->sse[0]= sse16_c;
02986     c->sse[1]= sse8_c;
02987     c->sse[2]= sse4_c;
02988     SET_CMP_FUNC(quant_psnr)
02989     SET_CMP_FUNC(rd)
02990     SET_CMP_FUNC(bit)
02991     c->vsad[0]= vsad16_c;
02992     c->vsad[4]= vsad_intra16_c;
02993     c->vsad[5]= vsad_intra8_c;
02994     c->vsse[0]= vsse16_c;
02995     c->vsse[4]= vsse_intra16_c;
02996     c->vsse[5]= vsse_intra8_c;
02997     c->nsse[0]= nsse16_c;
02998     c->nsse[1]= nsse8_c;
02999 #if CONFIG_DWT
03000     ff_dsputil_init_dwt(c);
03001 #endif
03002 
03003     c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c;
03004 
03005     c->add_bytes= add_bytes_c;
03006     c->add_bytes_l2= add_bytes_l2_c;
03007     c->diff_bytes= diff_bytes_c;
03008     c->add_hfyu_median_prediction= add_hfyu_median_prediction_c;
03009     c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c;
03010     c->add_hfyu_left_prediction  = add_hfyu_left_prediction_c;
03011     c->add_hfyu_left_prediction_bgr32 = add_hfyu_left_prediction_bgr32_c;
03012     c->bswap_buf= bswap_buf;
03013     c->bswap16_buf = bswap16_buf;
03014 #if CONFIG_PNG_DECODER
03015     c->add_png_paeth_prediction= ff_add_png_paeth_prediction;
03016 #endif
03017 
03018     if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
03019         c->h263_h_loop_filter= h263_h_loop_filter_c;
03020         c->h263_v_loop_filter= h263_v_loop_filter_c;
03021     }
03022 
03023     if (CONFIG_VP3_DECODER) {
03024         c->vp3_h_loop_filter= ff_vp3_h_loop_filter_c;
03025         c->vp3_v_loop_filter= ff_vp3_v_loop_filter_c;
03026         c->vp3_idct_dc_add= ff_vp3_idct_dc_add_c;
03027     }
03028 
03029     c->h261_loop_filter= h261_loop_filter_c;
03030 
03031     c->try_8x8basis= try_8x8basis_c;
03032     c->add_8x8basis= add_8x8basis_c;
03033 
03034 #if CONFIG_VORBIS_DECODER
03035     c->vorbis_inverse_coupling = vorbis_inverse_coupling;
03036 #endif
03037 #if CONFIG_AC3_DECODER
03038     c->ac3_downmix = ff_ac3_downmix_c;
03039 #endif
03040     c->vector_fmul = vector_fmul_c;
03041     c->vector_fmul_reverse = vector_fmul_reverse_c;
03042     c->vector_fmul_add = vector_fmul_add_c;
03043     c->vector_fmul_window = vector_fmul_window_c;
03044     c->vector_clipf = vector_clipf_c;
03045     c->scalarproduct_int16 = scalarproduct_int16_c;
03046     c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c;
03047     c->apply_window_int16 = apply_window_int16_c;
03048     c->vector_clip_int32 = vector_clip_int32_c;
03049     c->scalarproduct_float = scalarproduct_float_c;
03050     c->butterflies_float = butterflies_float_c;
03051     c->butterflies_float_interleave = butterflies_float_interleave_c;
03052     c->vector_fmul_scalar = vector_fmul_scalar_c;
03053     c->vector_fmac_scalar = vector_fmac_scalar_c;
03054 
03055     c->shrink[0]= av_image_copy_plane;
03056     c->shrink[1]= ff_shrink22;
03057     c->shrink[2]= ff_shrink44;
03058     c->shrink[3]= ff_shrink88;
03059 
03060     c->prefetch= just_return;
03061 
03062     memset(c->put_2tap_qpel_pixels_tab, 0, sizeof(c->put_2tap_qpel_pixels_tab));
03063     memset(c->avg_2tap_qpel_pixels_tab, 0, sizeof(c->avg_2tap_qpel_pixels_tab));
03064 
03065 #undef FUNC
03066 #undef FUNCC
03067 #define FUNC(f, depth) f ## _ ## depth
03068 #define FUNCC(f, depth) f ## _ ## depth ## _c
03069 
03070 #define dspfunc1(PFX, IDX, NUM, depth)\
03071     c->PFX ## _pixels_tab[IDX][0] = FUNCC(PFX ## _pixels ## NUM        , depth);\
03072     c->PFX ## _pixels_tab[IDX][1] = FUNCC(PFX ## _pixels ## NUM ## _x2 , depth);\
03073     c->PFX ## _pixels_tab[IDX][2] = FUNCC(PFX ## _pixels ## NUM ## _y2 , depth);\
03074     c->PFX ## _pixels_tab[IDX][3] = FUNCC(PFX ## _pixels ## NUM ## _xy2, depth)
03075 
03076 #define dspfunc2(PFX, IDX, NUM, depth)\
03077     c->PFX ## _pixels_tab[IDX][ 0] = FUNCC(PFX ## NUM ## _mc00, depth);\
03078     c->PFX ## _pixels_tab[IDX][ 1] = FUNCC(PFX ## NUM ## _mc10, depth);\
03079     c->PFX ## _pixels_tab[IDX][ 2] = FUNCC(PFX ## NUM ## _mc20, depth);\
03080     c->PFX ## _pixels_tab[IDX][ 3] = FUNCC(PFX ## NUM ## _mc30, depth);\
03081     c->PFX ## _pixels_tab[IDX][ 4] = FUNCC(PFX ## NUM ## _mc01, depth);\
03082     c->PFX ## _pixels_tab[IDX][ 5] = FUNCC(PFX ## NUM ## _mc11, depth);\
03083     c->PFX ## _pixels_tab[IDX][ 6] = FUNCC(PFX ## NUM ## _mc21, depth);\
03084     c->PFX ## _pixels_tab[IDX][ 7] = FUNCC(PFX ## NUM ## _mc31, depth);\
03085     c->PFX ## _pixels_tab[IDX][ 8] = FUNCC(PFX ## NUM ## _mc02, depth);\
03086     c->PFX ## _pixels_tab[IDX][ 9] = FUNCC(PFX ## NUM ## _mc12, depth);\
03087     c->PFX ## _pixels_tab[IDX][10] = FUNCC(PFX ## NUM ## _mc22, depth);\
03088     c->PFX ## _pixels_tab[IDX][11] = FUNCC(PFX ## NUM ## _mc32, depth);\
03089     c->PFX ## _pixels_tab[IDX][12] = FUNCC(PFX ## NUM ## _mc03, depth);\
03090     c->PFX ## _pixels_tab[IDX][13] = FUNCC(PFX ## NUM ## _mc13, depth);\
03091     c->PFX ## _pixels_tab[IDX][14] = FUNCC(PFX ## NUM ## _mc23, depth);\
03092     c->PFX ## _pixels_tab[IDX][15] = FUNCC(PFX ## NUM ## _mc33, depth)
03093 
03094 
03095 #define BIT_DEPTH_FUNCS(depth, dct)\
03096     c->get_pixels                    = FUNCC(get_pixels   ## dct   , depth);\
03097     c->draw_edges                    = FUNCC(draw_edges            , depth);\
03098     c->emulated_edge_mc              = FUNC (ff_emulated_edge_mc   , depth);\
03099     c->clear_block                   = FUNCC(clear_block  ## dct   , depth);\
03100     c->clear_blocks                  = FUNCC(clear_blocks ## dct   , depth);\
03101     c->add_pixels8                   = FUNCC(add_pixels8  ## dct   , depth);\
03102     c->add_pixels4                   = FUNCC(add_pixels4  ## dct   , depth);\
03103     c->put_no_rnd_pixels_l2[0]       = FUNCC(put_no_rnd_pixels16_l2, depth);\
03104     c->put_no_rnd_pixels_l2[1]       = FUNCC(put_no_rnd_pixels8_l2 , depth);\
03105 \
03106     c->put_h264_chroma_pixels_tab[0] = FUNCC(put_h264_chroma_mc8   , depth);\
03107     c->put_h264_chroma_pixels_tab[1] = FUNCC(put_h264_chroma_mc4   , depth);\
03108     c->put_h264_chroma_pixels_tab[2] = FUNCC(put_h264_chroma_mc2   , depth);\
03109     c->avg_h264_chroma_pixels_tab[0] = FUNCC(avg_h264_chroma_mc8   , depth);\
03110     c->avg_h264_chroma_pixels_tab[1] = FUNCC(avg_h264_chroma_mc4   , depth);\
03111     c->avg_h264_chroma_pixels_tab[2] = FUNCC(avg_h264_chroma_mc2   , depth);\
03112 \
03113     dspfunc1(put       , 0, 16, depth);\
03114     dspfunc1(put       , 1,  8, depth);\
03115     dspfunc1(put       , 2,  4, depth);\
03116     dspfunc1(put       , 3,  2, depth);\
03117     dspfunc1(put_no_rnd, 0, 16, depth);\
03118     dspfunc1(put_no_rnd, 1,  8, depth);\
03119     dspfunc1(avg       , 0, 16, depth);\
03120     dspfunc1(avg       , 1,  8, depth);\
03121     dspfunc1(avg       , 2,  4, depth);\
03122     dspfunc1(avg       , 3,  2, depth);\
03123     dspfunc1(avg_no_rnd, 0, 16, depth);\
03124     dspfunc1(avg_no_rnd, 1,  8, depth);\
03125 \
03126     dspfunc2(put_h264_qpel, 0, 16, depth);\
03127     dspfunc2(put_h264_qpel, 1,  8, depth);\
03128     dspfunc2(put_h264_qpel, 2,  4, depth);\
03129     dspfunc2(put_h264_qpel, 3,  2, depth);\
03130     dspfunc2(avg_h264_qpel, 0, 16, depth);\
03131     dspfunc2(avg_h264_qpel, 1,  8, depth);\
03132     dspfunc2(avg_h264_qpel, 2,  4, depth);
03133 
03134     switch (avctx->bits_per_raw_sample) {
03135     case 9:
03136         if (c->dct_bits == 32) {
03137             BIT_DEPTH_FUNCS(9, _32);
03138         } else {
03139             BIT_DEPTH_FUNCS(9, _16);
03140         }
03141         break;
03142     case 10:
03143         if (c->dct_bits == 32) {
03144             BIT_DEPTH_FUNCS(10, _32);
03145         } else {
03146             BIT_DEPTH_FUNCS(10, _16);
03147         }
03148         break;
03149     default:
03150         av_log(avctx, AV_LOG_DEBUG, "Unsupported bit depth: %d\n", avctx->bits_per_raw_sample);
03151     case 8:
03152         BIT_DEPTH_FUNCS(8, _16);
03153         break;
03154     }
03155 
03156 
03157     if (HAVE_MMX)        dsputil_init_mmx   (c, avctx);
03158     if (ARCH_ARM)        dsputil_init_arm   (c, avctx);
03159     if (CONFIG_MLIB)     dsputil_init_mlib  (c, avctx);
03160     if (HAVE_VIS)        dsputil_init_vis   (c, avctx);
03161     if (ARCH_ALPHA)      dsputil_init_alpha (c, avctx);
03162     if (ARCH_PPC)        dsputil_init_ppc   (c, avctx);
03163     if (HAVE_MMI)        dsputil_init_mmi   (c, avctx);
03164     if (ARCH_SH4)        dsputil_init_sh4   (c, avctx);
03165     if (ARCH_BFIN)       dsputil_init_bfin  (c, avctx);
03166 
03167     for(i=0; i<64; i++){
03168         if(!c->put_2tap_qpel_pixels_tab[0][i])
03169             c->put_2tap_qpel_pixels_tab[0][i]= c->put_h264_qpel_pixels_tab[0][i];
03170         if(!c->avg_2tap_qpel_pixels_tab[0][i])
03171             c->avg_2tap_qpel_pixels_tab[0][i]= c->avg_h264_qpel_pixels_tab[0][i];
03172     }
03173 
03174     ff_init_scantable_permutation(c->idct_permutation,
03175                                   c->idct_permutation_type);
03176 }
Generated on Sun Apr 22 2012 21:54:00 for Libav by doxygen 1.7.1