00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00030 #include "libavutil/imgutils.h"
00031 #include "avcodec.h"
00032 #include "dsputil.h"
00033 #include "simple_idct.h"
00034 #include "faandct.h"
00035 #include "faanidct.h"
00036 #include "mathops.h"
00037 #include "mpegvideo.h"
00038 #include "config.h"
00039 #include "ac3dec.h"
00040 #include "vorbis.h"
00041 #include "png.h"
00042
00043 uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
00044 uint32_t ff_squareTbl[512] = {0, };
00045
00046 #define BIT_DEPTH 9
00047 #include "dsputil_template.c"
00048 #undef BIT_DEPTH
00049
00050 #define BIT_DEPTH 10
00051 #include "dsputil_template.c"
00052 #undef BIT_DEPTH
00053
00054 #define BIT_DEPTH 8
00055 #include "dsputil_template.c"
00056
00057
00058 #define pb_7f (~0UL/255 * 0x7f)
00059 #define pb_80 (~0UL/255 * 0x80)
00060
00061 const uint8_t ff_zigzag_direct[64] = {
00062 0, 1, 8, 16, 9, 2, 3, 10,
00063 17, 24, 32, 25, 18, 11, 4, 5,
00064 12, 19, 26, 33, 40, 48, 41, 34,
00065 27, 20, 13, 6, 7, 14, 21, 28,
00066 35, 42, 49, 56, 57, 50, 43, 36,
00067 29, 22, 15, 23, 30, 37, 44, 51,
00068 58, 59, 52, 45, 38, 31, 39, 46,
00069 53, 60, 61, 54, 47, 55, 62, 63
00070 };
00071
00072
00073
00074 const uint8_t ff_zigzag248_direct[64] = {
00075 0, 8, 1, 9, 16, 24, 2, 10,
00076 17, 25, 32, 40, 48, 56, 33, 41,
00077 18, 26, 3, 11, 4, 12, 19, 27,
00078 34, 42, 49, 57, 50, 58, 35, 43,
00079 20, 28, 5, 13, 6, 14, 21, 29,
00080 36, 44, 51, 59, 52, 60, 37, 45,
00081 22, 30, 7, 15, 23, 31, 38, 46,
00082 53, 61, 54, 62, 39, 47, 55, 63,
00083 };
00084
00085
00086 DECLARE_ALIGNED(16, uint16_t, inv_zigzag_direct16)[64];
00087
00088 const uint8_t ff_alternate_horizontal_scan[64] = {
00089 0, 1, 2, 3, 8, 9, 16, 17,
00090 10, 11, 4, 5, 6, 7, 15, 14,
00091 13, 12, 19, 18, 24, 25, 32, 33,
00092 26, 27, 20, 21, 22, 23, 28, 29,
00093 30, 31, 34, 35, 40, 41, 48, 49,
00094 42, 43, 36, 37, 38, 39, 44, 45,
00095 46, 47, 50, 51, 56, 57, 58, 59,
00096 52, 53, 54, 55, 60, 61, 62, 63,
00097 };
00098
00099 const uint8_t ff_alternate_vertical_scan[64] = {
00100 0, 8, 16, 24, 1, 9, 2, 10,
00101 17, 25, 32, 40, 48, 56, 57, 49,
00102 41, 33, 26, 18, 3, 11, 4, 12,
00103 19, 27, 34, 42, 50, 58, 35, 43,
00104 51, 59, 20, 28, 5, 13, 6, 14,
00105 21, 29, 36, 44, 52, 60, 37, 45,
00106 53, 61, 22, 30, 7, 15, 23, 31,
00107 38, 46, 54, 62, 39, 47, 55, 63,
00108 };
00109
00110
00111 static const uint8_t simple_mmx_permutation[64]={
00112 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
00113 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
00114 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
00115 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
00116 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
00117 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
00118 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
00119 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
00120 };
00121
00122 static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
00123
00124 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
00125 int i;
00126 int end;
00127
00128 st->scantable= src_scantable;
00129
00130 for(i=0; i<64; i++){
00131 int j;
00132 j = src_scantable[i];
00133 st->permutated[i] = permutation[j];
00134 #if ARCH_PPC
00135 st->inverse[j] = i;
00136 #endif
00137 }
00138
00139 end=-1;
00140 for(i=0; i<64; i++){
00141 int j;
00142 j = st->permutated[i];
00143 if(j>end) end=j;
00144 st->raster_end[i]= end;
00145 }
00146 }
00147
00148 void ff_init_scantable_permutation(uint8_t *idct_permutation,
00149 int idct_permutation_type)
00150 {
00151 int i;
00152
00153 switch(idct_permutation_type){
00154 case FF_NO_IDCT_PERM:
00155 for(i=0; i<64; i++)
00156 idct_permutation[i]= i;
00157 break;
00158 case FF_LIBMPEG2_IDCT_PERM:
00159 for(i=0; i<64; i++)
00160 idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
00161 break;
00162 case FF_SIMPLE_IDCT_PERM:
00163 for(i=0; i<64; i++)
00164 idct_permutation[i]= simple_mmx_permutation[i];
00165 break;
00166 case FF_TRANSPOSE_IDCT_PERM:
00167 for(i=0; i<64; i++)
00168 idct_permutation[i]= ((i&7)<<3) | (i>>3);
00169 break;
00170 case FF_PARTTRANS_IDCT_PERM:
00171 for(i=0; i<64; i++)
00172 idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
00173 break;
00174 case FF_SSE2_IDCT_PERM:
00175 for(i=0; i<64; i++)
00176 idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7];
00177 break;
00178 default:
00179 av_log(NULL, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");
00180 }
00181 }
00182
00183 static int pix_sum_c(uint8_t * pix, int line_size)
00184 {
00185 int s, i, j;
00186
00187 s = 0;
00188 for (i = 0; i < 16; i++) {
00189 for (j = 0; j < 16; j += 8) {
00190 s += pix[0];
00191 s += pix[1];
00192 s += pix[2];
00193 s += pix[3];
00194 s += pix[4];
00195 s += pix[5];
00196 s += pix[6];
00197 s += pix[7];
00198 pix += 8;
00199 }
00200 pix += line_size - 16;
00201 }
00202 return s;
00203 }
00204
00205 static int pix_norm1_c(uint8_t * pix, int line_size)
00206 {
00207 int s, i, j;
00208 uint32_t *sq = ff_squareTbl + 256;
00209
00210 s = 0;
00211 for (i = 0; i < 16; i++) {
00212 for (j = 0; j < 16; j += 8) {
00213 #if 0
00214 s += sq[pix[0]];
00215 s += sq[pix[1]];
00216 s += sq[pix[2]];
00217 s += sq[pix[3]];
00218 s += sq[pix[4]];
00219 s += sq[pix[5]];
00220 s += sq[pix[6]];
00221 s += sq[pix[7]];
00222 #else
00223 #if HAVE_FAST_64BIT
00224 register uint64_t x=*(uint64_t*)pix;
00225 s += sq[x&0xff];
00226 s += sq[(x>>8)&0xff];
00227 s += sq[(x>>16)&0xff];
00228 s += sq[(x>>24)&0xff];
00229 s += sq[(x>>32)&0xff];
00230 s += sq[(x>>40)&0xff];
00231 s += sq[(x>>48)&0xff];
00232 s += sq[(x>>56)&0xff];
00233 #else
00234 register uint32_t x=*(uint32_t*)pix;
00235 s += sq[x&0xff];
00236 s += sq[(x>>8)&0xff];
00237 s += sq[(x>>16)&0xff];
00238 s += sq[(x>>24)&0xff];
00239 x=*(uint32_t*)(pix+4);
00240 s += sq[x&0xff];
00241 s += sq[(x>>8)&0xff];
00242 s += sq[(x>>16)&0xff];
00243 s += sq[(x>>24)&0xff];
00244 #endif
00245 #endif
00246 pix += 8;
00247 }
00248 pix += line_size - 16;
00249 }
00250 return s;
00251 }
00252
00253 static void bswap_buf(uint32_t *dst, const uint32_t *src, int w){
00254 int i;
00255
00256 for(i=0; i+8<=w; i+=8){
00257 dst[i+0]= av_bswap32(src[i+0]);
00258 dst[i+1]= av_bswap32(src[i+1]);
00259 dst[i+2]= av_bswap32(src[i+2]);
00260 dst[i+3]= av_bswap32(src[i+3]);
00261 dst[i+4]= av_bswap32(src[i+4]);
00262 dst[i+5]= av_bswap32(src[i+5]);
00263 dst[i+6]= av_bswap32(src[i+6]);
00264 dst[i+7]= av_bswap32(src[i+7]);
00265 }
00266 for(;i<w; i++){
00267 dst[i+0]= av_bswap32(src[i+0]);
00268 }
00269 }
00270
00271 static void bswap16_buf(uint16_t *dst, const uint16_t *src, int len)
00272 {
00273 while (len--)
00274 *dst++ = av_bswap16(*src++);
00275 }
00276
00277 static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
00278 {
00279 int s, i;
00280 uint32_t *sq = ff_squareTbl + 256;
00281
00282 s = 0;
00283 for (i = 0; i < h; i++) {
00284 s += sq[pix1[0] - pix2[0]];
00285 s += sq[pix1[1] - pix2[1]];
00286 s += sq[pix1[2] - pix2[2]];
00287 s += sq[pix1[3] - pix2[3]];
00288 pix1 += line_size;
00289 pix2 += line_size;
00290 }
00291 return s;
00292 }
00293
00294 static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
00295 {
00296 int s, i;
00297 uint32_t *sq = ff_squareTbl + 256;
00298
00299 s = 0;
00300 for (i = 0; i < h; i++) {
00301 s += sq[pix1[0] - pix2[0]];
00302 s += sq[pix1[1] - pix2[1]];
00303 s += sq[pix1[2] - pix2[2]];
00304 s += sq[pix1[3] - pix2[3]];
00305 s += sq[pix1[4] - pix2[4]];
00306 s += sq[pix1[5] - pix2[5]];
00307 s += sq[pix1[6] - pix2[6]];
00308 s += sq[pix1[7] - pix2[7]];
00309 pix1 += line_size;
00310 pix2 += line_size;
00311 }
00312 return s;
00313 }
00314
00315 static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
00316 {
00317 int s, i;
00318 uint32_t *sq = ff_squareTbl + 256;
00319
00320 s = 0;
00321 for (i = 0; i < h; i++) {
00322 s += sq[pix1[ 0] - pix2[ 0]];
00323 s += sq[pix1[ 1] - pix2[ 1]];
00324 s += sq[pix1[ 2] - pix2[ 2]];
00325 s += sq[pix1[ 3] - pix2[ 3]];
00326 s += sq[pix1[ 4] - pix2[ 4]];
00327 s += sq[pix1[ 5] - pix2[ 5]];
00328 s += sq[pix1[ 6] - pix2[ 6]];
00329 s += sq[pix1[ 7] - pix2[ 7]];
00330 s += sq[pix1[ 8] - pix2[ 8]];
00331 s += sq[pix1[ 9] - pix2[ 9]];
00332 s += sq[pix1[10] - pix2[10]];
00333 s += sq[pix1[11] - pix2[11]];
00334 s += sq[pix1[12] - pix2[12]];
00335 s += sq[pix1[13] - pix2[13]];
00336 s += sq[pix1[14] - pix2[14]];
00337 s += sq[pix1[15] - pix2[15]];
00338
00339 pix1 += line_size;
00340 pix2 += line_size;
00341 }
00342 return s;
00343 }
00344
00345 static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
00346 const uint8_t *s2, int stride){
00347 int i;
00348
00349
00350 for(i=0;i<8;i++) {
00351 block[0] = s1[0] - s2[0];
00352 block[1] = s1[1] - s2[1];
00353 block[2] = s1[2] - s2[2];
00354 block[3] = s1[3] - s2[3];
00355 block[4] = s1[4] - s2[4];
00356 block[5] = s1[5] - s2[5];
00357 block[6] = s1[6] - s2[6];
00358 block[7] = s1[7] - s2[7];
00359 s1 += stride;
00360 s2 += stride;
00361 block += 8;
00362 }
00363 }
00364
00365
00366 void ff_put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
00367 int line_size)
00368 {
00369 int i;
00370 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00371
00372
00373 for(i=0;i<8;i++) {
00374 pixels[0] = cm[block[0]];
00375 pixels[1] = cm[block[1]];
00376 pixels[2] = cm[block[2]];
00377 pixels[3] = cm[block[3]];
00378 pixels[4] = cm[block[4]];
00379 pixels[5] = cm[block[5]];
00380 pixels[6] = cm[block[6]];
00381 pixels[7] = cm[block[7]];
00382
00383 pixels += line_size;
00384 block += 8;
00385 }
00386 }
00387
00388 static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
00389 int line_size)
00390 {
00391 int i;
00392 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00393
00394
00395 for(i=0;i<4;i++) {
00396 pixels[0] = cm[block[0]];
00397 pixels[1] = cm[block[1]];
00398 pixels[2] = cm[block[2]];
00399 pixels[3] = cm[block[3]];
00400
00401 pixels += line_size;
00402 block += 8;
00403 }
00404 }
00405
00406 static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
00407 int line_size)
00408 {
00409 int i;
00410 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00411
00412
00413 for(i=0;i<2;i++) {
00414 pixels[0] = cm[block[0]];
00415 pixels[1] = cm[block[1]];
00416
00417 pixels += line_size;
00418 block += 8;
00419 }
00420 }
00421
00422 void ff_put_signed_pixels_clamped_c(const DCTELEM *block,
00423 uint8_t *restrict pixels,
00424 int line_size)
00425 {
00426 int i, j;
00427
00428 for (i = 0; i < 8; i++) {
00429 for (j = 0; j < 8; j++) {
00430 if (*block < -128)
00431 *pixels = 0;
00432 else if (*block > 127)
00433 *pixels = 255;
00434 else
00435 *pixels = (uint8_t)(*block + 128);
00436 block++;
00437 pixels++;
00438 }
00439 pixels += (line_size - 8);
00440 }
00441 }
00442
00443 void ff_add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
00444 int line_size)
00445 {
00446 int i;
00447 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00448
00449
00450 for(i=0;i<8;i++) {
00451 pixels[0] = cm[pixels[0] + block[0]];
00452 pixels[1] = cm[pixels[1] + block[1]];
00453 pixels[2] = cm[pixels[2] + block[2]];
00454 pixels[3] = cm[pixels[3] + block[3]];
00455 pixels[4] = cm[pixels[4] + block[4]];
00456 pixels[5] = cm[pixels[5] + block[5]];
00457 pixels[6] = cm[pixels[6] + block[6]];
00458 pixels[7] = cm[pixels[7] + block[7]];
00459 pixels += line_size;
00460 block += 8;
00461 }
00462 }
00463
00464 static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
00465 int line_size)
00466 {
00467 int i;
00468 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00469
00470
00471 for(i=0;i<4;i++) {
00472 pixels[0] = cm[pixels[0] + block[0]];
00473 pixels[1] = cm[pixels[1] + block[1]];
00474 pixels[2] = cm[pixels[2] + block[2]];
00475 pixels[3] = cm[pixels[3] + block[3]];
00476 pixels += line_size;
00477 block += 8;
00478 }
00479 }
00480
00481 static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
00482 int line_size)
00483 {
00484 int i;
00485 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00486
00487
00488 for(i=0;i<2;i++) {
00489 pixels[0] = cm[pixels[0] + block[0]];
00490 pixels[1] = cm[pixels[1] + block[1]];
00491 pixels += line_size;
00492 block += 8;
00493 }
00494 }
00495
00496 static int sum_abs_dctelem_c(DCTELEM *block)
00497 {
00498 int sum=0, i;
00499 for(i=0; i<64; i++)
00500 sum+= FFABS(block[i]);
00501 return sum;
00502 }
00503
00504 static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h)
00505 {
00506 int i;
00507
00508 for (i = 0; i < h; i++) {
00509 memset(block, value, 16);
00510 block += line_size;
00511 }
00512 }
00513
00514 static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h)
00515 {
00516 int i;
00517
00518 for (i = 0; i < h; i++) {
00519 memset(block, value, 8);
00520 block += line_size;
00521 }
00522 }
00523
00524 #define avg2(a,b) ((a+b+1)>>1)
00525 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
00526
00527 static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
00528 {
00529 const int A=(16-x16)*(16-y16);
00530 const int B=( x16)*(16-y16);
00531 const int C=(16-x16)*( y16);
00532 const int D=( x16)*( y16);
00533 int i;
00534
00535 for(i=0; i<h; i++)
00536 {
00537 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
00538 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
00539 dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
00540 dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
00541 dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
00542 dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
00543 dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
00544 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
00545 dst+= stride;
00546 src+= stride;
00547 }
00548 }
00549
00550 void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
00551 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
00552 {
00553 int y, vx, vy;
00554 const int s= 1<<shift;
00555
00556 width--;
00557 height--;
00558
00559 for(y=0; y<h; y++){
00560 int x;
00561
00562 vx= ox;
00563 vy= oy;
00564 for(x=0; x<8; x++){
00565 int src_x, src_y, frac_x, frac_y, index;
00566
00567 src_x= vx>>16;
00568 src_y= vy>>16;
00569 frac_x= src_x&(s-1);
00570 frac_y= src_y&(s-1);
00571 src_x>>=shift;
00572 src_y>>=shift;
00573
00574 if((unsigned)src_x < width){
00575 if((unsigned)src_y < height){
00576 index= src_x + src_y*stride;
00577 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
00578 + src[index +1]* frac_x )*(s-frac_y)
00579 + ( src[index+stride ]*(s-frac_x)
00580 + src[index+stride+1]* frac_x )* frac_y
00581 + r)>>(shift*2);
00582 }else{
00583 index= src_x + av_clip(src_y, 0, height)*stride;
00584 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
00585 + src[index +1]* frac_x )*s
00586 + r)>>(shift*2);
00587 }
00588 }else{
00589 if((unsigned)src_y < height){
00590 index= av_clip(src_x, 0, width) + src_y*stride;
00591 dst[y*stride + x]= ( ( src[index ]*(s-frac_y)
00592 + src[index+stride ]* frac_y )*s
00593 + r)>>(shift*2);
00594 }else{
00595 index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride;
00596 dst[y*stride + x]= src[index ];
00597 }
00598 }
00599
00600 vx+= dxx;
00601 vy+= dyx;
00602 }
00603 ox += dxy;
00604 oy += dyy;
00605 }
00606 }
00607
00608 static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00609 switch(width){
00610 case 2: put_pixels2_8_c (dst, src, stride, height); break;
00611 case 4: put_pixels4_8_c (dst, src, stride, height); break;
00612 case 8: put_pixels8_8_c (dst, src, stride, height); break;
00613 case 16:put_pixels16_8_c(dst, src, stride, height); break;
00614 }
00615 }
00616
00617 static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00618 int i,j;
00619 for (i=0; i < height; i++) {
00620 for (j=0; j < width; j++) {
00621 dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
00622 }
00623 src += stride;
00624 dst += stride;
00625 }
00626 }
00627
00628 static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00629 int i,j;
00630 for (i=0; i < height; i++) {
00631 for (j=0; j < width; j++) {
00632 dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
00633 }
00634 src += stride;
00635 dst += stride;
00636 }
00637 }
00638
00639 static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00640 int i,j;
00641 for (i=0; i < height; i++) {
00642 for (j=0; j < width; j++) {
00643 dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
00644 }
00645 src += stride;
00646 dst += stride;
00647 }
00648 }
00649
00650 static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00651 int i,j;
00652 for (i=0; i < height; i++) {
00653 for (j=0; j < width; j++) {
00654 dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
00655 }
00656 src += stride;
00657 dst += stride;
00658 }
00659 }
00660
00661 static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00662 int i,j;
00663 for (i=0; i < height; i++) {
00664 for (j=0; j < width; j++) {
00665 dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
00666 }
00667 src += stride;
00668 dst += stride;
00669 }
00670 }
00671
00672 static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00673 int i,j;
00674 for (i=0; i < height; i++) {
00675 for (j=0; j < width; j++) {
00676 dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
00677 }
00678 src += stride;
00679 dst += stride;
00680 }
00681 }
00682
00683 static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00684 int i,j;
00685 for (i=0; i < height; i++) {
00686 for (j=0; j < width; j++) {
00687 dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
00688 }
00689 src += stride;
00690 dst += stride;
00691 }
00692 }
00693
00694 static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00695 int i,j;
00696 for (i=0; i < height; i++) {
00697 for (j=0; j < width; j++) {
00698 dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
00699 }
00700 src += stride;
00701 dst += stride;
00702 }
00703 }
00704
00705 static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00706 switch(width){
00707 case 2: avg_pixels2_8_c (dst, src, stride, height); break;
00708 case 4: avg_pixels4_8_c (dst, src, stride, height); break;
00709 case 8: avg_pixels8_8_c (dst, src, stride, height); break;
00710 case 16:avg_pixels16_8_c(dst, src, stride, height); break;
00711 }
00712 }
00713
00714 static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00715 int i,j;
00716 for (i=0; i < height; i++) {
00717 for (j=0; j < width; j++) {
00718 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
00719 }
00720 src += stride;
00721 dst += stride;
00722 }
00723 }
00724
00725 static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00726 int i,j;
00727 for (i=0; i < height; i++) {
00728 for (j=0; j < width; j++) {
00729 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
00730 }
00731 src += stride;
00732 dst += stride;
00733 }
00734 }
00735
00736 static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00737 int i,j;
00738 for (i=0; i < height; i++) {
00739 for (j=0; j < width; j++) {
00740 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
00741 }
00742 src += stride;
00743 dst += stride;
00744 }
00745 }
00746
00747 static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00748 int i,j;
00749 for (i=0; i < height; i++) {
00750 for (j=0; j < width; j++) {
00751 dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
00752 }
00753 src += stride;
00754 dst += stride;
00755 }
00756 }
00757
00758 static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00759 int i,j;
00760 for (i=0; i < height; i++) {
00761 for (j=0; j < width; j++) {
00762 dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
00763 }
00764 src += stride;
00765 dst += stride;
00766 }
00767 }
00768
00769 static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00770 int i,j;
00771 for (i=0; i < height; i++) {
00772 for (j=0; j < width; j++) {
00773 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
00774 }
00775 src += stride;
00776 dst += stride;
00777 }
00778 }
00779
00780 static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00781 int i,j;
00782 for (i=0; i < height; i++) {
00783 for (j=0; j < width; j++) {
00784 dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
00785 }
00786 src += stride;
00787 dst += stride;
00788 }
00789 }
00790
00791 static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00792 int i,j;
00793 for (i=0; i < height; i++) {
00794 for (j=0; j < width; j++) {
00795 dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
00796 }
00797 src += stride;
00798 dst += stride;
00799 }
00800 }
00801
00802 #define QPEL_MC(r, OPNAME, RND, OP) \
00803 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
00804 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
00805 int i;\
00806 for(i=0; i<h; i++)\
00807 {\
00808 OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
00809 OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
00810 OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
00811 OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
00812 OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
00813 OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
00814 OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
00815 OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
00816 dst+=dstStride;\
00817 src+=srcStride;\
00818 }\
00819 }\
00820 \
00821 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
00822 const int w=8;\
00823 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
00824 int i;\
00825 for(i=0; i<w; i++)\
00826 {\
00827 const int src0= src[0*srcStride];\
00828 const int src1= src[1*srcStride];\
00829 const int src2= src[2*srcStride];\
00830 const int src3= src[3*srcStride];\
00831 const int src4= src[4*srcStride];\
00832 const int src5= src[5*srcStride];\
00833 const int src6= src[6*srcStride];\
00834 const int src7= src[7*srcStride];\
00835 const int src8= src[8*srcStride];\
00836 OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
00837 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
00838 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
00839 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
00840 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
00841 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
00842 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
00843 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
00844 dst++;\
00845 src++;\
00846 }\
00847 }\
00848 \
00849 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
00850 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
00851 int i;\
00852 \
00853 for(i=0; i<h; i++)\
00854 {\
00855 OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
00856 OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
00857 OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
00858 OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
00859 OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
00860 OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
00861 OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
00862 OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
00863 OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
00864 OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
00865 OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
00866 OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
00867 OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
00868 OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
00869 OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
00870 OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
00871 dst+=dstStride;\
00872 src+=srcStride;\
00873 }\
00874 }\
00875 \
00876 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
00877 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
00878 int i;\
00879 const int w=16;\
00880 for(i=0; i<w; i++)\
00881 {\
00882 const int src0= src[0*srcStride];\
00883 const int src1= src[1*srcStride];\
00884 const int src2= src[2*srcStride];\
00885 const int src3= src[3*srcStride];\
00886 const int src4= src[4*srcStride];\
00887 const int src5= src[5*srcStride];\
00888 const int src6= src[6*srcStride];\
00889 const int src7= src[7*srcStride];\
00890 const int src8= src[8*srcStride];\
00891 const int src9= src[9*srcStride];\
00892 const int src10= src[10*srcStride];\
00893 const int src11= src[11*srcStride];\
00894 const int src12= src[12*srcStride];\
00895 const int src13= src[13*srcStride];\
00896 const int src14= src[14*srcStride];\
00897 const int src15= src[15*srcStride];\
00898 const int src16= src[16*srcStride];\
00899 OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
00900 OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
00901 OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
00902 OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
00903 OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
00904 OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
00905 OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
00906 OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
00907 OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
00908 OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
00909 OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
00910 OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
00911 OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
00912 OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
00913 OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
00914 OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
00915 dst++;\
00916 src++;\
00917 }\
00918 }\
00919 \
00920 static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
00921 uint8_t half[64];\
00922 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
00923 OPNAME ## pixels8_l2_8(dst, src, half, stride, stride, 8, 8);\
00924 }\
00925 \
00926 static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
00927 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
00928 }\
00929 \
00930 static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
00931 uint8_t half[64];\
00932 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
00933 OPNAME ## pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);\
00934 }\
00935 \
00936 static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
00937 uint8_t full[16*9];\
00938 uint8_t half[64];\
00939 copy_block9(full, src, 16, stride, 9);\
00940 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
00941 OPNAME ## pixels8_l2_8(dst, full, half, stride, 16, 8, 8);\
00942 }\
00943 \
00944 static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
00945 uint8_t full[16*9];\
00946 copy_block9(full, src, 16, stride, 9);\
00947 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
00948 }\
00949 \
00950 static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
00951 uint8_t full[16*9];\
00952 uint8_t half[64];\
00953 copy_block9(full, src, 16, stride, 9);\
00954 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
00955 OPNAME ## pixels8_l2_8(dst, full+16, half, stride, 16, 8, 8);\
00956 }\
00957 void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
00958 uint8_t full[16*9];\
00959 uint8_t halfH[72];\
00960 uint8_t halfV[64];\
00961 uint8_t halfHV[64];\
00962 copy_block9(full, src, 16, stride, 9);\
00963 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00964 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
00965 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00966 OPNAME ## pixels8_l4_8(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
00967 }\
00968 static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
00969 uint8_t full[16*9];\
00970 uint8_t halfH[72];\
00971 uint8_t halfHV[64];\
00972 copy_block9(full, src, 16, stride, 9);\
00973 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00974 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
00975 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00976 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
00977 }\
00978 void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
00979 uint8_t full[16*9];\
00980 uint8_t halfH[72];\
00981 uint8_t halfV[64];\
00982 uint8_t halfHV[64];\
00983 copy_block9(full, src, 16, stride, 9);\
00984 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00985 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
00986 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00987 OPNAME ## pixels8_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
00988 }\
00989 static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
00990 uint8_t full[16*9];\
00991 uint8_t halfH[72];\
00992 uint8_t halfHV[64];\
00993 copy_block9(full, src, 16, stride, 9);\
00994 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00995 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
00996 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00997 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
00998 }\
00999 void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
01000 uint8_t full[16*9];\
01001 uint8_t halfH[72];\
01002 uint8_t halfV[64];\
01003 uint8_t halfHV[64];\
01004 copy_block9(full, src, 16, stride, 9);\
01005 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01006 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
01007 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01008 OPNAME ## pixels8_l4_8(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
01009 }\
01010 static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
01011 uint8_t full[16*9];\
01012 uint8_t halfH[72];\
01013 uint8_t halfHV[64];\
01014 copy_block9(full, src, 16, stride, 9);\
01015 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01016 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
01017 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01018 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
01019 }\
01020 void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
01021 uint8_t full[16*9];\
01022 uint8_t halfH[72];\
01023 uint8_t halfV[64];\
01024 uint8_t halfHV[64];\
01025 copy_block9(full, src, 16, stride, 9);\
01026 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
01027 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
01028 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01029 OPNAME ## pixels8_l4_8(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
01030 }\
01031 static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
01032 uint8_t full[16*9];\
01033 uint8_t halfH[72];\
01034 uint8_t halfHV[64];\
01035 copy_block9(full, src, 16, stride, 9);\
01036 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01037 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
01038 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01039 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
01040 }\
01041 static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
01042 uint8_t halfH[72];\
01043 uint8_t halfHV[64];\
01044 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
01045 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01046 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
01047 }\
01048 static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
01049 uint8_t halfH[72];\
01050 uint8_t halfHV[64];\
01051 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
01052 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01053 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
01054 }\
01055 void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
01056 uint8_t full[16*9];\
01057 uint8_t halfH[72];\
01058 uint8_t halfV[64];\
01059 uint8_t halfHV[64];\
01060 copy_block9(full, src, 16, stride, 9);\
01061 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01062 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
01063 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01064 OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
01065 }\
01066 static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
01067 uint8_t full[16*9];\
01068 uint8_t halfH[72];\
01069 copy_block9(full, src, 16, stride, 9);\
01070 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01071 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
01072 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
01073 }\
01074 void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
01075 uint8_t full[16*9];\
01076 uint8_t halfH[72];\
01077 uint8_t halfV[64];\
01078 uint8_t halfHV[64];\
01079 copy_block9(full, src, 16, stride, 9);\
01080 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01081 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
01082 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01083 OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
01084 }\
01085 static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
01086 uint8_t full[16*9];\
01087 uint8_t halfH[72];\
01088 copy_block9(full, src, 16, stride, 9);\
01089 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01090 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
01091 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
01092 }\
01093 static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
01094 uint8_t halfH[72];\
01095 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
01096 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
01097 }\
01098 \
01099 static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
01100 uint8_t half[256];\
01101 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
01102 OPNAME ## pixels16_l2_8(dst, src, half, stride, stride, 16, 16);\
01103 }\
01104 \
01105 static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
01106 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
01107 }\
01108 \
01109 static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
01110 uint8_t half[256];\
01111 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
01112 OPNAME ## pixels16_l2_8(dst, src+1, half, stride, stride, 16, 16);\
01113 }\
01114 \
01115 static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
01116 uint8_t full[24*17];\
01117 uint8_t half[256];\
01118 copy_block17(full, src, 24, stride, 17);\
01119 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
01120 OPNAME ## pixels16_l2_8(dst, full, half, stride, 24, 16, 16);\
01121 }\
01122 \
01123 static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
01124 uint8_t full[24*17];\
01125 copy_block17(full, src, 24, stride, 17);\
01126 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
01127 }\
01128 \
01129 static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
01130 uint8_t full[24*17];\
01131 uint8_t half[256];\
01132 copy_block17(full, src, 24, stride, 17);\
01133 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
01134 OPNAME ## pixels16_l2_8(dst, full+24, half, stride, 24, 16, 16);\
01135 }\
01136 void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
01137 uint8_t full[24*17];\
01138 uint8_t halfH[272];\
01139 uint8_t halfV[256];\
01140 uint8_t halfHV[256];\
01141 copy_block17(full, src, 24, stride, 17);\
01142 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01143 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
01144 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01145 OPNAME ## pixels16_l4_8(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01146 }\
01147 static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
01148 uint8_t full[24*17];\
01149 uint8_t halfH[272];\
01150 uint8_t halfHV[256];\
01151 copy_block17(full, src, 24, stride, 17);\
01152 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01153 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
01154 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01155 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
01156 }\
01157 void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
01158 uint8_t full[24*17];\
01159 uint8_t halfH[272];\
01160 uint8_t halfV[256];\
01161 uint8_t halfHV[256];\
01162 copy_block17(full, src, 24, stride, 17);\
01163 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01164 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
01165 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01166 OPNAME ## pixels16_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01167 }\
01168 static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
01169 uint8_t full[24*17];\
01170 uint8_t halfH[272];\
01171 uint8_t halfHV[256];\
01172 copy_block17(full, src, 24, stride, 17);\
01173 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01174 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
01175 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01176 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
01177 }\
01178 void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
01179 uint8_t full[24*17];\
01180 uint8_t halfH[272];\
01181 uint8_t halfV[256];\
01182 uint8_t halfHV[256];\
01183 copy_block17(full, src, 24, stride, 17);\
01184 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01185 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
01186 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01187 OPNAME ## pixels16_l4_8(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01188 }\
01189 static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
01190 uint8_t full[24*17];\
01191 uint8_t halfH[272];\
01192 uint8_t halfHV[256];\
01193 copy_block17(full, src, 24, stride, 17);\
01194 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01195 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
01196 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01197 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
01198 }\
01199 void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
01200 uint8_t full[24*17];\
01201 uint8_t halfH[272];\
01202 uint8_t halfV[256];\
01203 uint8_t halfHV[256];\
01204 copy_block17(full, src, 24, stride, 17);\
01205 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
01206 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
01207 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01208 OPNAME ## pixels16_l4_8(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01209 }\
01210 static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
01211 uint8_t full[24*17];\
01212 uint8_t halfH[272];\
01213 uint8_t halfHV[256];\
01214 copy_block17(full, src, 24, stride, 17);\
01215 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01216 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
01217 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01218 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
01219 }\
01220 static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
01221 uint8_t halfH[272];\
01222 uint8_t halfHV[256];\
01223 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
01224 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01225 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
01226 }\
01227 static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
01228 uint8_t halfH[272];\
01229 uint8_t halfHV[256];\
01230 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
01231 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01232 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
01233 }\
01234 void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
01235 uint8_t full[24*17];\
01236 uint8_t halfH[272];\
01237 uint8_t halfV[256];\
01238 uint8_t halfHV[256];\
01239 copy_block17(full, src, 24, stride, 17);\
01240 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01241 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
01242 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01243 OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
01244 }\
01245 static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
01246 uint8_t full[24*17];\
01247 uint8_t halfH[272];\
01248 copy_block17(full, src, 24, stride, 17);\
01249 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01250 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
01251 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
01252 }\
01253 void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
01254 uint8_t full[24*17];\
01255 uint8_t halfH[272];\
01256 uint8_t halfV[256];\
01257 uint8_t halfHV[256];\
01258 copy_block17(full, src, 24, stride, 17);\
01259 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01260 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
01261 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01262 OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
01263 }\
01264 static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
01265 uint8_t full[24*17];\
01266 uint8_t halfH[272];\
01267 copy_block17(full, src, 24, stride, 17);\
01268 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01269 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
01270 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
01271 }\
01272 static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
01273 uint8_t halfH[272];\
01274 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
01275 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
01276 }
01277
01278 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
01279 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
01280 #define op_put(a, b) a = cm[((b) + 16)>>5]
01281 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
01282
01283 QPEL_MC(0, put_ , _ , op_put)
01284 QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
01285 QPEL_MC(0, avg_ , _ , op_avg)
01286
01287 #undef op_avg
01288 #undef op_avg_no_rnd
01289 #undef op_put
01290 #undef op_put_no_rnd
01291
01292 #define put_qpel8_mc00_c ff_put_pixels8x8_c
01293 #define avg_qpel8_mc00_c ff_avg_pixels8x8_c
01294 #define put_qpel16_mc00_c ff_put_pixels16x16_c
01295 #define avg_qpel16_mc00_c ff_avg_pixels16x16_c
01296 #define put_no_rnd_qpel8_mc00_c ff_put_pixels8x8_c
01297 #define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_8_c
01298
01299 static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
01300 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
01301 int i;
01302
01303 for(i=0; i<h; i++){
01304 dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
01305 dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
01306 dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
01307 dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
01308 dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
01309 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
01310 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
01311 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
01312 dst+=dstStride;
01313 src+=srcStride;
01314 }
01315 }
01316
01317 #if CONFIG_RV40_DECODER
01318 void ff_put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
01319 put_pixels16_xy2_8_c(dst, src, stride, 16);
01320 }
01321 void ff_avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
01322 avg_pixels16_xy2_8_c(dst, src, stride, 16);
01323 }
01324 void ff_put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
01325 put_pixels8_xy2_8_c(dst, src, stride, 8);
01326 }
01327 void ff_avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
01328 avg_pixels8_xy2_8_c(dst, src, stride, 8);
01329 }
01330 #endif
01331
01332 static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
01333 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
01334 int i;
01335
01336 for(i=0; i<w; i++){
01337 const int src_1= src[ -srcStride];
01338 const int src0 = src[0 ];
01339 const int src1 = src[ srcStride];
01340 const int src2 = src[2*srcStride];
01341 const int src3 = src[3*srcStride];
01342 const int src4 = src[4*srcStride];
01343 const int src5 = src[5*srcStride];
01344 const int src6 = src[6*srcStride];
01345 const int src7 = src[7*srcStride];
01346 const int src8 = src[8*srcStride];
01347 const int src9 = src[9*srcStride];
01348 dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
01349 dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4];
01350 dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4];
01351 dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4];
01352 dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4];
01353 dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4];
01354 dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4];
01355 dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4];
01356 src++;
01357 dst++;
01358 }
01359 }
01360
01361 static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
01362 uint8_t half[64];
01363 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
01364 put_pixels8_l2_8(dst, src, half, stride, stride, 8, 8);
01365 }
01366
01367 static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
01368 wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
01369 }
01370
01371 static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
01372 uint8_t half[64];
01373 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
01374 put_pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);
01375 }
01376
01377 static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
01378 wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
01379 }
01380
01381 static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
01382 uint8_t halfH[88];
01383 uint8_t halfV[64];
01384 uint8_t halfHV[64];
01385 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
01386 wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
01387 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
01388 put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
01389 }
01390 static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
01391 uint8_t halfH[88];
01392 uint8_t halfV[64];
01393 uint8_t halfHV[64];
01394 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
01395 wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
01396 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
01397 put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
01398 }
01399 static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
01400 uint8_t halfH[88];
01401 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
01402 wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
01403 }
01404
01405 static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){
01406 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
01407 int x;
01408 const int strength= ff_h263_loop_filter_strength[qscale];
01409
01410 for(x=0; x<8; x++){
01411 int d1, d2, ad1;
01412 int p0= src[x-2*stride];
01413 int p1= src[x-1*stride];
01414 int p2= src[x+0*stride];
01415 int p3= src[x+1*stride];
01416 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
01417
01418 if (d<-2*strength) d1= 0;
01419 else if(d<- strength) d1=-2*strength - d;
01420 else if(d< strength) d1= d;
01421 else if(d< 2*strength) d1= 2*strength - d;
01422 else d1= 0;
01423
01424 p1 += d1;
01425 p2 -= d1;
01426 if(p1&256) p1= ~(p1>>31);
01427 if(p2&256) p2= ~(p2>>31);
01428
01429 src[x-1*stride] = p1;
01430 src[x+0*stride] = p2;
01431
01432 ad1= FFABS(d1)>>1;
01433
01434 d2= av_clip((p0-p3)/4, -ad1, ad1);
01435
01436 src[x-2*stride] = p0 - d2;
01437 src[x+ stride] = p3 + d2;
01438 }
01439 }
01440 }
01441
01442 static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){
01443 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
01444 int y;
01445 const int strength= ff_h263_loop_filter_strength[qscale];
01446
01447 for(y=0; y<8; y++){
01448 int d1, d2, ad1;
01449 int p0= src[y*stride-2];
01450 int p1= src[y*stride-1];
01451 int p2= src[y*stride+0];
01452 int p3= src[y*stride+1];
01453 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
01454
01455 if (d<-2*strength) d1= 0;
01456 else if(d<- strength) d1=-2*strength - d;
01457 else if(d< strength) d1= d;
01458 else if(d< 2*strength) d1= 2*strength - d;
01459 else d1= 0;
01460
01461 p1 += d1;
01462 p2 -= d1;
01463 if(p1&256) p1= ~(p1>>31);
01464 if(p2&256) p2= ~(p2>>31);
01465
01466 src[y*stride-1] = p1;
01467 src[y*stride+0] = p2;
01468
01469 ad1= FFABS(d1)>>1;
01470
01471 d2= av_clip((p0-p3)/4, -ad1, ad1);
01472
01473 src[y*stride-2] = p0 - d2;
01474 src[y*stride+1] = p3 + d2;
01475 }
01476 }
01477 }
01478
01479 static void h261_loop_filter_c(uint8_t *src, int stride){
01480 int x,y,xy,yz;
01481 int temp[64];
01482
01483 for(x=0; x<8; x++){
01484 temp[x ] = 4*src[x ];
01485 temp[x + 7*8] = 4*src[x + 7*stride];
01486 }
01487 for(y=1; y<7; y++){
01488 for(x=0; x<8; x++){
01489 xy = y * stride + x;
01490 yz = y * 8 + x;
01491 temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride];
01492 }
01493 }
01494
01495 for(y=0; y<8; y++){
01496 src[ y*stride] = (temp[ y*8] + 2)>>2;
01497 src[7+y*stride] = (temp[7+y*8] + 2)>>2;
01498 for(x=1; x<7; x++){
01499 xy = y * stride + x;
01500 yz = y * 8 + x;
01501 src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4;
01502 }
01503 }
01504 }
01505
01506 static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01507 {
01508 int s, i;
01509
01510 s = 0;
01511 for(i=0;i<h;i++) {
01512 s += abs(pix1[0] - pix2[0]);
01513 s += abs(pix1[1] - pix2[1]);
01514 s += abs(pix1[2] - pix2[2]);
01515 s += abs(pix1[3] - pix2[3]);
01516 s += abs(pix1[4] - pix2[4]);
01517 s += abs(pix1[5] - pix2[5]);
01518 s += abs(pix1[6] - pix2[6]);
01519 s += abs(pix1[7] - pix2[7]);
01520 s += abs(pix1[8] - pix2[8]);
01521 s += abs(pix1[9] - pix2[9]);
01522 s += abs(pix1[10] - pix2[10]);
01523 s += abs(pix1[11] - pix2[11]);
01524 s += abs(pix1[12] - pix2[12]);
01525 s += abs(pix1[13] - pix2[13]);
01526 s += abs(pix1[14] - pix2[14]);
01527 s += abs(pix1[15] - pix2[15]);
01528 pix1 += line_size;
01529 pix2 += line_size;
01530 }
01531 return s;
01532 }
01533
01534 static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01535 {
01536 int s, i;
01537
01538 s = 0;
01539 for(i=0;i<h;i++) {
01540 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
01541 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
01542 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
01543 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
01544 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
01545 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
01546 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
01547 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
01548 s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
01549 s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
01550 s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
01551 s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
01552 s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
01553 s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
01554 s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
01555 s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
01556 pix1 += line_size;
01557 pix2 += line_size;
01558 }
01559 return s;
01560 }
01561
01562 static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01563 {
01564 int s, i;
01565 uint8_t *pix3 = pix2 + line_size;
01566
01567 s = 0;
01568 for(i=0;i<h;i++) {
01569 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
01570 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
01571 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
01572 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
01573 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
01574 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
01575 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
01576 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
01577 s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
01578 s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
01579 s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
01580 s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
01581 s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
01582 s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
01583 s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
01584 s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
01585 pix1 += line_size;
01586 pix2 += line_size;
01587 pix3 += line_size;
01588 }
01589 return s;
01590 }
01591
01592 static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01593 {
01594 int s, i;
01595 uint8_t *pix3 = pix2 + line_size;
01596
01597 s = 0;
01598 for(i=0;i<h;i++) {
01599 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
01600 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
01601 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
01602 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
01603 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
01604 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
01605 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
01606 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
01607 s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
01608 s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
01609 s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
01610 s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
01611 s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
01612 s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
01613 s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
01614 s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
01615 pix1 += line_size;
01616 pix2 += line_size;
01617 pix3 += line_size;
01618 }
01619 return s;
01620 }
01621
01622 static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01623 {
01624 int s, i;
01625
01626 s = 0;
01627 for(i=0;i<h;i++) {
01628 s += abs(pix1[0] - pix2[0]);
01629 s += abs(pix1[1] - pix2[1]);
01630 s += abs(pix1[2] - pix2[2]);
01631 s += abs(pix1[3] - pix2[3]);
01632 s += abs(pix1[4] - pix2[4]);
01633 s += abs(pix1[5] - pix2[5]);
01634 s += abs(pix1[6] - pix2[6]);
01635 s += abs(pix1[7] - pix2[7]);
01636 pix1 += line_size;
01637 pix2 += line_size;
01638 }
01639 return s;
01640 }
01641
01642 static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01643 {
01644 int s, i;
01645
01646 s = 0;
01647 for(i=0;i<h;i++) {
01648 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
01649 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
01650 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
01651 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
01652 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
01653 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
01654 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
01655 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
01656 pix1 += line_size;
01657 pix2 += line_size;
01658 }
01659 return s;
01660 }
01661
01662 static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01663 {
01664 int s, i;
01665 uint8_t *pix3 = pix2 + line_size;
01666
01667 s = 0;
01668 for(i=0;i<h;i++) {
01669 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
01670 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
01671 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
01672 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
01673 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
01674 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
01675 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
01676 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
01677 pix1 += line_size;
01678 pix2 += line_size;
01679 pix3 += line_size;
01680 }
01681 return s;
01682 }
01683
01684 static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01685 {
01686 int s, i;
01687 uint8_t *pix3 = pix2 + line_size;
01688
01689 s = 0;
01690 for(i=0;i<h;i++) {
01691 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
01692 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
01693 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
01694 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
01695 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
01696 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
01697 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
01698 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
01699 pix1 += line_size;
01700 pix2 += line_size;
01701 pix3 += line_size;
01702 }
01703 return s;
01704 }
01705
01706 static int nsse16_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
01707 MpegEncContext *c = v;
01708 int score1=0;
01709 int score2=0;
01710 int x,y;
01711
01712 for(y=0; y<h; y++){
01713 for(x=0; x<16; x++){
01714 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
01715 }
01716 if(y+1<h){
01717 for(x=0; x<15; x++){
01718 score2+= FFABS( s1[x ] - s1[x +stride]
01719 - s1[x+1] + s1[x+1+stride])
01720 -FFABS( s2[x ] - s2[x +stride]
01721 - s2[x+1] + s2[x+1+stride]);
01722 }
01723 }
01724 s1+= stride;
01725 s2+= stride;
01726 }
01727
01728 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
01729 else return score1 + FFABS(score2)*8;
01730 }
01731
01732 static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
01733 MpegEncContext *c = v;
01734 int score1=0;
01735 int score2=0;
01736 int x,y;
01737
01738 for(y=0; y<h; y++){
01739 for(x=0; x<8; x++){
01740 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
01741 }
01742 if(y+1<h){
01743 for(x=0; x<7; x++){
01744 score2+= FFABS( s1[x ] - s1[x +stride]
01745 - s1[x+1] + s1[x+1+stride])
01746 -FFABS( s2[x ] - s2[x +stride]
01747 - s2[x+1] + s2[x+1+stride]);
01748 }
01749 }
01750 s1+= stride;
01751 s2+= stride;
01752 }
01753
01754 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
01755 else return score1 + FFABS(score2)*8;
01756 }
01757
01758 static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){
01759 int i;
01760 unsigned int sum=0;
01761
01762 for(i=0; i<8*8; i++){
01763 int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT));
01764 int w= weight[i];
01765 b>>= RECON_SHIFT;
01766 assert(-512<b && b<512);
01767
01768 sum += (w*b)*(w*b)>>4;
01769 }
01770 return sum>>2;
01771 }
01772
01773 static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){
01774 int i;
01775
01776 for(i=0; i<8*8; i++){
01777 rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
01778 }
01779 }
01780
01789 void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last)
01790 {
01791 int i;
01792 DCTELEM temp[64];
01793
01794 if(last<=0) return;
01795
01796
01797 for(i=0; i<=last; i++){
01798 const int j= scantable[i];
01799 temp[j]= block[j];
01800 block[j]=0;
01801 }
01802
01803 for(i=0; i<=last; i++){
01804 const int j= scantable[i];
01805 const int perm_j= permutation[j];
01806 block[perm_j]= temp[j];
01807 }
01808 }
01809
01810 static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
01811 return 0;
01812 }
01813
01814 void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
01815 int i;
01816
01817 memset(cmp, 0, sizeof(void*)*6);
01818
01819 for(i=0; i<6; i++){
01820 switch(type&0xFF){
01821 case FF_CMP_SAD:
01822 cmp[i]= c->sad[i];
01823 break;
01824 case FF_CMP_SATD:
01825 cmp[i]= c->hadamard8_diff[i];
01826 break;
01827 case FF_CMP_SSE:
01828 cmp[i]= c->sse[i];
01829 break;
01830 case FF_CMP_DCT:
01831 cmp[i]= c->dct_sad[i];
01832 break;
01833 case FF_CMP_DCT264:
01834 cmp[i]= c->dct264_sad[i];
01835 break;
01836 case FF_CMP_DCTMAX:
01837 cmp[i]= c->dct_max[i];
01838 break;
01839 case FF_CMP_PSNR:
01840 cmp[i]= c->quant_psnr[i];
01841 break;
01842 case FF_CMP_BIT:
01843 cmp[i]= c->bit[i];
01844 break;
01845 case FF_CMP_RD:
01846 cmp[i]= c->rd[i];
01847 break;
01848 case FF_CMP_VSAD:
01849 cmp[i]= c->vsad[i];
01850 break;
01851 case FF_CMP_VSSE:
01852 cmp[i]= c->vsse[i];
01853 break;
01854 case FF_CMP_ZERO:
01855 cmp[i]= zero_cmp;
01856 break;
01857 case FF_CMP_NSSE:
01858 cmp[i]= c->nsse[i];
01859 break;
01860 #if CONFIG_DWT
01861 case FF_CMP_W53:
01862 cmp[i]= c->w53[i];
01863 break;
01864 case FF_CMP_W97:
01865 cmp[i]= c->w97[i];
01866 break;
01867 #endif
01868 default:
01869 av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n");
01870 }
01871 }
01872 }
01873
01874 static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
01875 long i;
01876 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
01877 long a = *(long*)(src+i);
01878 long b = *(long*)(dst+i);
01879 *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
01880 }
01881 for(; i<w; i++)
01882 dst[i+0] += src[i+0];
01883 }
01884
01885 static void add_bytes_l2_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
01886 long i;
01887 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
01888 long a = *(long*)(src1+i);
01889 long b = *(long*)(src2+i);
01890 *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
01891 }
01892 for(; i<w; i++)
01893 dst[i] = src1[i]+src2[i];
01894 }
01895
01896 static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
01897 long i;
01898 #if !HAVE_FAST_UNALIGNED
01899 if((long)src2 & (sizeof(long)-1)){
01900 for(i=0; i+7<w; i+=8){
01901 dst[i+0] = src1[i+0]-src2[i+0];
01902 dst[i+1] = src1[i+1]-src2[i+1];
01903 dst[i+2] = src1[i+2]-src2[i+2];
01904 dst[i+3] = src1[i+3]-src2[i+3];
01905 dst[i+4] = src1[i+4]-src2[i+4];
01906 dst[i+5] = src1[i+5]-src2[i+5];
01907 dst[i+6] = src1[i+6]-src2[i+6];
01908 dst[i+7] = src1[i+7]-src2[i+7];
01909 }
01910 }else
01911 #endif
01912 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
01913 long a = *(long*)(src1+i);
01914 long b = *(long*)(src2+i);
01915 *(long*)(dst+i) = ((a|pb_80) - (b&pb_7f)) ^ ((a^b^pb_80)&pb_80);
01916 }
01917 for(; i<w; i++)
01918 dst[i+0] = src1[i+0]-src2[i+0];
01919 }
01920
01921 static void add_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *diff, int w, int *left, int *left_top){
01922 int i;
01923 uint8_t l, lt;
01924
01925 l= *left;
01926 lt= *left_top;
01927
01928 for(i=0; i<w; i++){
01929 l= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF) + diff[i];
01930 lt= src1[i];
01931 dst[i]= l;
01932 }
01933
01934 *left= l;
01935 *left_top= lt;
01936 }
01937
01938 static void sub_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top){
01939 int i;
01940 uint8_t l, lt;
01941
01942 l= *left;
01943 lt= *left_top;
01944
01945 for(i=0; i<w; i++){
01946 const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF);
01947 lt= src1[i];
01948 l= src2[i];
01949 dst[i]= l - pred;
01950 }
01951
01952 *left= l;
01953 *left_top= lt;
01954 }
01955
01956 static int add_hfyu_left_prediction_c(uint8_t *dst, const uint8_t *src, int w, int acc){
01957 int i;
01958
01959 for(i=0; i<w-1; i++){
01960 acc+= src[i];
01961 dst[i]= acc;
01962 i++;
01963 acc+= src[i];
01964 dst[i]= acc;
01965 }
01966
01967 for(; i<w; i++){
01968 acc+= src[i];
01969 dst[i]= acc;
01970 }
01971
01972 return acc;
01973 }
01974
01975 #if HAVE_BIGENDIAN
01976 #define B 3
01977 #define G 2
01978 #define R 1
01979 #define A 0
01980 #else
01981 #define B 0
01982 #define G 1
01983 #define R 2
01984 #define A 3
01985 #endif
01986 static void add_hfyu_left_prediction_bgr32_c(uint8_t *dst, const uint8_t *src, int w, int *red, int *green, int *blue, int *alpha){
01987 int i;
01988 int r,g,b,a;
01989 r= *red;
01990 g= *green;
01991 b= *blue;
01992 a= *alpha;
01993
01994 for(i=0; i<w; i++){
01995 b+= src[4*i+B];
01996 g+= src[4*i+G];
01997 r+= src[4*i+R];
01998 a+= src[4*i+A];
01999
02000 dst[4*i+B]= b;
02001 dst[4*i+G]= g;
02002 dst[4*i+R]= r;
02003 dst[4*i+A]= a;
02004 }
02005
02006 *red= r;
02007 *green= g;
02008 *blue= b;
02009 *alpha= a;
02010 }
02011 #undef B
02012 #undef G
02013 #undef R
02014 #undef A
02015
02016 #define BUTTERFLY2(o1,o2,i1,i2) \
02017 o1= (i1)+(i2);\
02018 o2= (i1)-(i2);
02019
02020 #define BUTTERFLY1(x,y) \
02021 {\
02022 int a,b;\
02023 a= x;\
02024 b= y;\
02025 x= a+b;\
02026 y= a-b;\
02027 }
02028
02029 #define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y)))
02030
02031 static int hadamard8_diff8x8_c( void *s, uint8_t *dst, uint8_t *src, int stride, int h){
02032 int i;
02033 int temp[64];
02034 int sum=0;
02035
02036 assert(h==8);
02037
02038 for(i=0; i<8; i++){
02039
02040 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
02041 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
02042 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
02043 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
02044
02045 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
02046 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
02047 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
02048 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
02049
02050 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
02051 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
02052 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
02053 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
02054 }
02055
02056 for(i=0; i<8; i++){
02057 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
02058 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
02059 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
02060 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
02061
02062 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
02063 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
02064 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
02065 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
02066
02067 sum +=
02068 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
02069 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
02070 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
02071 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
02072 }
02073 return sum;
02074 }
02075
02076 static int hadamard8_intra8x8_c( void *s, uint8_t *src, uint8_t *dummy, int stride, int h){
02077 int i;
02078 int temp[64];
02079 int sum=0;
02080
02081 assert(h==8);
02082
02083 for(i=0; i<8; i++){
02084
02085 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]);
02086 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]);
02087 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]);
02088 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]);
02089
02090 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
02091 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
02092 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
02093 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
02094
02095 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
02096 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
02097 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
02098 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
02099 }
02100
02101 for(i=0; i<8; i++){
02102 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
02103 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
02104 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
02105 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
02106
02107 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
02108 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
02109 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
02110 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
02111
02112 sum +=
02113 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
02114 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
02115 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
02116 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
02117 }
02118
02119 sum -= FFABS(temp[8*0] + temp[8*4]);
02120
02121 return sum;
02122 }
02123
02124 static int dct_sad8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02125 MpegEncContext * const s= (MpegEncContext *)c;
02126 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
02127
02128 assert(h==8);
02129
02130 s->dsp.diff_pixels(temp, src1, src2, stride);
02131 s->dsp.fdct(temp);
02132 return s->dsp.sum_abs_dctelem(temp);
02133 }
02134
02135 #if CONFIG_GPL
02136 #define DCT8_1D {\
02137 const int s07 = SRC(0) + SRC(7);\
02138 const int s16 = SRC(1) + SRC(6);\
02139 const int s25 = SRC(2) + SRC(5);\
02140 const int s34 = SRC(3) + SRC(4);\
02141 const int a0 = s07 + s34;\
02142 const int a1 = s16 + s25;\
02143 const int a2 = s07 - s34;\
02144 const int a3 = s16 - s25;\
02145 const int d07 = SRC(0) - SRC(7);\
02146 const int d16 = SRC(1) - SRC(6);\
02147 const int d25 = SRC(2) - SRC(5);\
02148 const int d34 = SRC(3) - SRC(4);\
02149 const int a4 = d16 + d25 + (d07 + (d07>>1));\
02150 const int a5 = d07 - d34 - (d25 + (d25>>1));\
02151 const int a6 = d07 + d34 - (d16 + (d16>>1));\
02152 const int a7 = d16 - d25 + (d34 + (d34>>1));\
02153 DST(0, a0 + a1 ) ;\
02154 DST(1, a4 + (a7>>2)) ;\
02155 DST(2, a2 + (a3>>1)) ;\
02156 DST(3, a5 + (a6>>2)) ;\
02157 DST(4, a0 - a1 ) ;\
02158 DST(5, a6 - (a5>>2)) ;\
02159 DST(6, (a2>>1) - a3 ) ;\
02160 DST(7, (a4>>2) - a7 ) ;\
02161 }
02162
02163 static int dct264_sad8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02164 MpegEncContext * const s= (MpegEncContext *)c;
02165 DCTELEM dct[8][8];
02166 int i;
02167 int sum=0;
02168
02169 s->dsp.diff_pixels(dct[0], src1, src2, stride);
02170
02171 #define SRC(x) dct[i][x]
02172 #define DST(x,v) dct[i][x]= v
02173 for( i = 0; i < 8; i++ )
02174 DCT8_1D
02175 #undef SRC
02176 #undef DST
02177
02178 #define SRC(x) dct[x][i]
02179 #define DST(x,v) sum += FFABS(v)
02180 for( i = 0; i < 8; i++ )
02181 DCT8_1D
02182 #undef SRC
02183 #undef DST
02184 return sum;
02185 }
02186 #endif
02187
02188 static int dct_max8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02189 MpegEncContext * const s= (MpegEncContext *)c;
02190 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
02191 int sum=0, i;
02192
02193 assert(h==8);
02194
02195 s->dsp.diff_pixels(temp, src1, src2, stride);
02196 s->dsp.fdct(temp);
02197
02198 for(i=0; i<64; i++)
02199 sum= FFMAX(sum, FFABS(temp[i]));
02200
02201 return sum;
02202 }
02203
02204 static int quant_psnr8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02205 MpegEncContext * const s= (MpegEncContext *)c;
02206 LOCAL_ALIGNED_16(DCTELEM, temp, [64*2]);
02207 DCTELEM * const bak = temp+64;
02208 int sum=0, i;
02209
02210 assert(h==8);
02211 s->mb_intra=0;
02212
02213 s->dsp.diff_pixels(temp, src1, src2, stride);
02214
02215 memcpy(bak, temp, 64*sizeof(DCTELEM));
02216
02217 s->block_last_index[0]= s->fast_dct_quantize(s, temp, 0, s->qscale, &i);
02218 s->dct_unquantize_inter(s, temp, 0, s->qscale);
02219 ff_simple_idct_8(temp);
02220
02221 for(i=0; i<64; i++)
02222 sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
02223
02224 return sum;
02225 }
02226
02227 static int rd8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02228 MpegEncContext * const s= (MpegEncContext *)c;
02229 const uint8_t *scantable= s->intra_scantable.permutated;
02230 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
02231 LOCAL_ALIGNED_16(uint8_t, lsrc1, [64]);
02232 LOCAL_ALIGNED_16(uint8_t, lsrc2, [64]);
02233 int i, last, run, bits, level, distortion, start_i;
02234 const int esc_length= s->ac_esc_length;
02235 uint8_t * length;
02236 uint8_t * last_length;
02237
02238 assert(h==8);
02239
02240 copy_block8(lsrc1, src1, 8, stride, 8);
02241 copy_block8(lsrc2, src2, 8, stride, 8);
02242
02243 s->dsp.diff_pixels(temp, lsrc1, lsrc2, 8);
02244
02245 s->block_last_index[0]= last= s->fast_dct_quantize(s, temp, 0, s->qscale, &i);
02246
02247 bits=0;
02248
02249 if (s->mb_intra) {
02250 start_i = 1;
02251 length = s->intra_ac_vlc_length;
02252 last_length= s->intra_ac_vlc_last_length;
02253 bits+= s->luma_dc_vlc_length[temp[0] + 256];
02254 } else {
02255 start_i = 0;
02256 length = s->inter_ac_vlc_length;
02257 last_length= s->inter_ac_vlc_last_length;
02258 }
02259
02260 if(last>=start_i){
02261 run=0;
02262 for(i=start_i; i<last; i++){
02263 int j= scantable[i];
02264 level= temp[j];
02265
02266 if(level){
02267 level+=64;
02268 if((level&(~127)) == 0){
02269 bits+= length[UNI_AC_ENC_INDEX(run, level)];
02270 }else
02271 bits+= esc_length;
02272 run=0;
02273 }else
02274 run++;
02275 }
02276 i= scantable[last];
02277
02278 level= temp[i] + 64;
02279
02280 assert(level - 64);
02281
02282 if((level&(~127)) == 0){
02283 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
02284 }else
02285 bits+= esc_length;
02286
02287 }
02288
02289 if(last>=0){
02290 if(s->mb_intra)
02291 s->dct_unquantize_intra(s, temp, 0, s->qscale);
02292 else
02293 s->dct_unquantize_inter(s, temp, 0, s->qscale);
02294 }
02295
02296 s->dsp.idct_add(lsrc2, 8, temp);
02297
02298 distortion= s->dsp.sse[1](NULL, lsrc2, lsrc1, 8, 8);
02299
02300 return distortion + ((bits*s->qscale*s->qscale*109 + 64)>>7);
02301 }
02302
02303 static int bit8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02304 MpegEncContext * const s= (MpegEncContext *)c;
02305 const uint8_t *scantable= s->intra_scantable.permutated;
02306 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
02307 int i, last, run, bits, level, start_i;
02308 const int esc_length= s->ac_esc_length;
02309 uint8_t * length;
02310 uint8_t * last_length;
02311
02312 assert(h==8);
02313
02314 s->dsp.diff_pixels(temp, src1, src2, stride);
02315
02316 s->block_last_index[0]= last= s->fast_dct_quantize(s, temp, 0, s->qscale, &i);
02317
02318 bits=0;
02319
02320 if (s->mb_intra) {
02321 start_i = 1;
02322 length = s->intra_ac_vlc_length;
02323 last_length= s->intra_ac_vlc_last_length;
02324 bits+= s->luma_dc_vlc_length[temp[0] + 256];
02325 } else {
02326 start_i = 0;
02327 length = s->inter_ac_vlc_length;
02328 last_length= s->inter_ac_vlc_last_length;
02329 }
02330
02331 if(last>=start_i){
02332 run=0;
02333 for(i=start_i; i<last; i++){
02334 int j= scantable[i];
02335 level= temp[j];
02336
02337 if(level){
02338 level+=64;
02339 if((level&(~127)) == 0){
02340 bits+= length[UNI_AC_ENC_INDEX(run, level)];
02341 }else
02342 bits+= esc_length;
02343 run=0;
02344 }else
02345 run++;
02346 }
02347 i= scantable[last];
02348
02349 level= temp[i] + 64;
02350
02351 assert(level - 64);
02352
02353 if((level&(~127)) == 0){
02354 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
02355 }else
02356 bits+= esc_length;
02357 }
02358
02359 return bits;
02360 }
02361
02362 #define VSAD_INTRA(size) \
02363 static int vsad_intra##size##_c( void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
02364 int score=0; \
02365 int x,y; \
02366 \
02367 for(y=1; y<h; y++){ \
02368 for(x=0; x<size; x+=4){ \
02369 score+= FFABS(s[x ] - s[x +stride]) + FFABS(s[x+1] - s[x+1+stride]) \
02370 +FFABS(s[x+2] - s[x+2+stride]) + FFABS(s[x+3] - s[x+3+stride]); \
02371 } \
02372 s+= stride; \
02373 } \
02374 \
02375 return score; \
02376 }
02377 VSAD_INTRA(8)
02378 VSAD_INTRA(16)
02379
02380 static int vsad16_c( void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
02381 int score=0;
02382 int x,y;
02383
02384 for(y=1; y<h; y++){
02385 for(x=0; x<16; x++){
02386 score+= FFABS(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
02387 }
02388 s1+= stride;
02389 s2+= stride;
02390 }
02391
02392 return score;
02393 }
02394
02395 #define SQ(a) ((a)*(a))
02396 #define VSSE_INTRA(size) \
02397 static int vsse_intra##size##_c( void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
02398 int score=0; \
02399 int x,y; \
02400 \
02401 for(y=1; y<h; y++){ \
02402 for(x=0; x<size; x+=4){ \
02403 score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride]) \
02404 +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]); \
02405 } \
02406 s+= stride; \
02407 } \
02408 \
02409 return score; \
02410 }
02411 VSSE_INTRA(8)
02412 VSSE_INTRA(16)
02413
02414 static int vsse16_c( void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
02415 int score=0;
02416 int x,y;
02417
02418 for(y=1; y<h; y++){
02419 for(x=0; x<16; x++){
02420 score+= SQ(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
02421 }
02422 s1+= stride;
02423 s2+= stride;
02424 }
02425
02426 return score;
02427 }
02428
02429 static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2,
02430 int size){
02431 int score=0;
02432 int i;
02433 for(i=0; i<size; i++)
02434 score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]);
02435 return score;
02436 }
02437
02438 WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
02439 WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
02440 WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
02441 #if CONFIG_GPL
02442 WRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c)
02443 #endif
02444 WRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c)
02445 WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
02446 WRAPPER8_16_SQ(rd8x8_c, rd16_c)
02447 WRAPPER8_16_SQ(bit8x8_c, bit16_c)
02448
02449 static void vector_fmul_c(float *dst, const float *src0, const float *src1, int len){
02450 int i;
02451 for(i=0; i<len; i++)
02452 dst[i] = src0[i] * src1[i];
02453 }
02454
02455 static void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1, int len){
02456 int i;
02457 src1 += len-1;
02458 for(i=0; i<len; i++)
02459 dst[i] = src0[i] * src1[-i];
02460 }
02461
02462 static void vector_fmul_add_c(float *dst, const float *src0, const float *src1, const float *src2, int len){
02463 int i;
02464 for(i=0; i<len; i++)
02465 dst[i] = src0[i] * src1[i] + src2[i];
02466 }
02467
02468 static void vector_fmul_window_c(float *dst, const float *src0,
02469 const float *src1, const float *win, int len)
02470 {
02471 int i,j;
02472 dst += len;
02473 win += len;
02474 src0+= len;
02475 for(i=-len, j=len-1; i<0; i++, j--) {
02476 float s0 = src0[i];
02477 float s1 = src1[j];
02478 float wi = win[i];
02479 float wj = win[j];
02480 dst[i] = s0*wj - s1*wi;
02481 dst[j] = s0*wi + s1*wj;
02482 }
02483 }
02484
02485 static void vector_fmul_scalar_c(float *dst, const float *src, float mul,
02486 int len)
02487 {
02488 int i;
02489 for (i = 0; i < len; i++)
02490 dst[i] = src[i] * mul;
02491 }
02492
02493 static void vector_fmac_scalar_c(float *dst, const float *src, float mul,
02494 int len)
02495 {
02496 int i;
02497 for (i = 0; i < len; i++)
02498 dst[i] += src[i] * mul;
02499 }
02500
02501 static void butterflies_float_c(float *restrict v1, float *restrict v2,
02502 int len)
02503 {
02504 int i;
02505 for (i = 0; i < len; i++) {
02506 float t = v1[i] - v2[i];
02507 v1[i] += v2[i];
02508 v2[i] = t;
02509 }
02510 }
02511
02512 static void butterflies_float_interleave_c(float *dst, const float *src0,
02513 const float *src1, int len)
02514 {
02515 int i;
02516 for (i = 0; i < len; i++) {
02517 float f1 = src0[i];
02518 float f2 = src1[i];
02519 dst[2*i ] = f1 + f2;
02520 dst[2*i + 1] = f1 - f2;
02521 }
02522 }
02523
02524 static float scalarproduct_float_c(const float *v1, const float *v2, int len)
02525 {
02526 float p = 0.0;
02527 int i;
02528
02529 for (i = 0; i < len; i++)
02530 p += v1[i] * v2[i];
02531
02532 return p;
02533 }
02534
02535 static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini,
02536 uint32_t maxi, uint32_t maxisign)
02537 {
02538
02539 if(a > mini) return mini;
02540 else if((a^(1U<<31)) > maxisign) return maxi;
02541 else return a;
02542 }
02543
02544 static void vector_clipf_c_opposite_sign(float *dst, const float *src, float *min, float *max, int len){
02545 int i;
02546 uint32_t mini = *(uint32_t*)min;
02547 uint32_t maxi = *(uint32_t*)max;
02548 uint32_t maxisign = maxi ^ (1U<<31);
02549 uint32_t *dsti = (uint32_t*)dst;
02550 const uint32_t *srci = (const uint32_t*)src;
02551 for(i=0; i<len; i+=8) {
02552 dsti[i + 0] = clipf_c_one(srci[i + 0], mini, maxi, maxisign);
02553 dsti[i + 1] = clipf_c_one(srci[i + 1], mini, maxi, maxisign);
02554 dsti[i + 2] = clipf_c_one(srci[i + 2], mini, maxi, maxisign);
02555 dsti[i + 3] = clipf_c_one(srci[i + 3], mini, maxi, maxisign);
02556 dsti[i + 4] = clipf_c_one(srci[i + 4], mini, maxi, maxisign);
02557 dsti[i + 5] = clipf_c_one(srci[i + 5], mini, maxi, maxisign);
02558 dsti[i + 6] = clipf_c_one(srci[i + 6], mini, maxi, maxisign);
02559 dsti[i + 7] = clipf_c_one(srci[i + 7], mini, maxi, maxisign);
02560 }
02561 }
02562 static void vector_clipf_c(float *dst, const float *src, float min, float max, int len){
02563 int i;
02564 if(min < 0 && max > 0) {
02565 vector_clipf_c_opposite_sign(dst, src, &min, &max, len);
02566 } else {
02567 for(i=0; i < len; i+=8) {
02568 dst[i ] = av_clipf(src[i ], min, max);
02569 dst[i + 1] = av_clipf(src[i + 1], min, max);
02570 dst[i + 2] = av_clipf(src[i + 2], min, max);
02571 dst[i + 3] = av_clipf(src[i + 3], min, max);
02572 dst[i + 4] = av_clipf(src[i + 4], min, max);
02573 dst[i + 5] = av_clipf(src[i + 5], min, max);
02574 dst[i + 6] = av_clipf(src[i + 6], min, max);
02575 dst[i + 7] = av_clipf(src[i + 7], min, max);
02576 }
02577 }
02578 }
02579
02580 static int32_t scalarproduct_int16_c(const int16_t * v1, const int16_t * v2, int order, int shift)
02581 {
02582 int res = 0;
02583
02584 while (order--)
02585 res += (*v1++ * *v2++) >> shift;
02586
02587 return res;
02588 }
02589
02590 static int32_t scalarproduct_and_madd_int16_c(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul)
02591 {
02592 int res = 0;
02593 while (order--) {
02594 res += *v1 * *v2++;
02595 *v1++ += mul * *v3++;
02596 }
02597 return res;
02598 }
02599
02600 static void apply_window_int16_c(int16_t *output, const int16_t *input,
02601 const int16_t *window, unsigned int len)
02602 {
02603 int i;
02604 int len2 = len >> 1;
02605
02606 for (i = 0; i < len2; i++) {
02607 int16_t w = window[i];
02608 output[i] = (MUL16(input[i], w) + (1 << 14)) >> 15;
02609 output[len-i-1] = (MUL16(input[len-i-1], w) + (1 << 14)) >> 15;
02610 }
02611 }
02612
02613 static void vector_clip_int32_c(int32_t *dst, const int32_t *src, int32_t min,
02614 int32_t max, unsigned int len)
02615 {
02616 do {
02617 *dst++ = av_clip(*src++, min, max);
02618 *dst++ = av_clip(*src++, min, max);
02619 *dst++ = av_clip(*src++, min, max);
02620 *dst++ = av_clip(*src++, min, max);
02621 *dst++ = av_clip(*src++, min, max);
02622 *dst++ = av_clip(*src++, min, max);
02623 *dst++ = av_clip(*src++, min, max);
02624 *dst++ = av_clip(*src++, min, max);
02625 len -= 8;
02626 } while (len > 0);
02627 }
02628
02629 #define W0 2048
02630 #define W1 2841
02631 #define W2 2676
02632 #define W3 2408
02633 #define W4 2048
02634 #define W5 1609
02635 #define W6 1108
02636 #define W7 565
02637
02638 static void wmv2_idct_row(short * b)
02639 {
02640 int s1,s2;
02641 int a0,a1,a2,a3,a4,a5,a6,a7;
02642
02643 a1 = W1*b[1]+W7*b[7];
02644 a7 = W7*b[1]-W1*b[7];
02645 a5 = W5*b[5]+W3*b[3];
02646 a3 = W3*b[5]-W5*b[3];
02647 a2 = W2*b[2]+W6*b[6];
02648 a6 = W6*b[2]-W2*b[6];
02649 a0 = W0*b[0]+W0*b[4];
02650 a4 = W0*b[0]-W0*b[4];
02651
02652 s1 = (181*(a1-a5+a7-a3)+128)>>8;
02653 s2 = (181*(a1-a5-a7+a3)+128)>>8;
02654
02655 b[0] = (a0+a2+a1+a5 + (1<<7))>>8;
02656 b[1] = (a4+a6 +s1 + (1<<7))>>8;
02657 b[2] = (a4-a6 +s2 + (1<<7))>>8;
02658 b[3] = (a0-a2+a7+a3 + (1<<7))>>8;
02659 b[4] = (a0-a2-a7-a3 + (1<<7))>>8;
02660 b[5] = (a4-a6 -s2 + (1<<7))>>8;
02661 b[6] = (a4+a6 -s1 + (1<<7))>>8;
02662 b[7] = (a0+a2-a1-a5 + (1<<7))>>8;
02663 }
02664 static void wmv2_idct_col(short * b)
02665 {
02666 int s1,s2;
02667 int a0,a1,a2,a3,a4,a5,a6,a7;
02668
02669 a1 = (W1*b[8*1]+W7*b[8*7] + 4)>>3;
02670 a7 = (W7*b[8*1]-W1*b[8*7] + 4)>>3;
02671 a5 = (W5*b[8*5]+W3*b[8*3] + 4)>>3;
02672 a3 = (W3*b[8*5]-W5*b[8*3] + 4)>>3;
02673 a2 = (W2*b[8*2]+W6*b[8*6] + 4)>>3;
02674 a6 = (W6*b[8*2]-W2*b[8*6] + 4)>>3;
02675 a0 = (W0*b[8*0]+W0*b[8*4] )>>3;
02676 a4 = (W0*b[8*0]-W0*b[8*4] )>>3;
02677
02678 s1 = (181*(a1-a5+a7-a3)+128)>>8;
02679 s2 = (181*(a1-a5-a7+a3)+128)>>8;
02680
02681 b[8*0] = (a0+a2+a1+a5 + (1<<13))>>14;
02682 b[8*1] = (a4+a6 +s1 + (1<<13))>>14;
02683 b[8*2] = (a4-a6 +s2 + (1<<13))>>14;
02684 b[8*3] = (a0-a2+a7+a3 + (1<<13))>>14;
02685
02686 b[8*4] = (a0-a2-a7-a3 + (1<<13))>>14;
02687 b[8*5] = (a4-a6 -s2 + (1<<13))>>14;
02688 b[8*6] = (a4+a6 -s1 + (1<<13))>>14;
02689 b[8*7] = (a0+a2-a1-a5 + (1<<13))>>14;
02690 }
02691 void ff_wmv2_idct_c(short * block){
02692 int i;
02693
02694 for(i=0;i<64;i+=8){
02695 wmv2_idct_row(block+i);
02696 }
02697 for(i=0;i<8;i++){
02698 wmv2_idct_col(block+i);
02699 }
02700 }
02701
02702
02703 static void ff_wmv2_idct_put_c(uint8_t *dest, int line_size, DCTELEM *block)
02704 {
02705 ff_wmv2_idct_c(block);
02706 ff_put_pixels_clamped_c(block, dest, line_size);
02707 }
02708 static void ff_wmv2_idct_add_c(uint8_t *dest, int line_size, DCTELEM *block)
02709 {
02710 ff_wmv2_idct_c(block);
02711 ff_add_pixels_clamped_c(block, dest, line_size);
02712 }
02713 static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
02714 {
02715 j_rev_dct (block);
02716 ff_put_pixels_clamped_c(block, dest, line_size);
02717 }
02718 static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
02719 {
02720 j_rev_dct (block);
02721 ff_add_pixels_clamped_c(block, dest, line_size);
02722 }
02723
02724 static void ff_jref_idct4_put(uint8_t *dest, int line_size, DCTELEM *block)
02725 {
02726 j_rev_dct4 (block);
02727 put_pixels_clamped4_c(block, dest, line_size);
02728 }
02729 static void ff_jref_idct4_add(uint8_t *dest, int line_size, DCTELEM *block)
02730 {
02731 j_rev_dct4 (block);
02732 add_pixels_clamped4_c(block, dest, line_size);
02733 }
02734
02735 static void ff_jref_idct2_put(uint8_t *dest, int line_size, DCTELEM *block)
02736 {
02737 j_rev_dct2 (block);
02738 put_pixels_clamped2_c(block, dest, line_size);
02739 }
02740 static void ff_jref_idct2_add(uint8_t *dest, int line_size, DCTELEM *block)
02741 {
02742 j_rev_dct2 (block);
02743 add_pixels_clamped2_c(block, dest, line_size);
02744 }
02745
02746 static void ff_jref_idct1_put(uint8_t *dest, int line_size, DCTELEM *block)
02747 {
02748 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
02749
02750 dest[0] = cm[(block[0] + 4)>>3];
02751 }
02752 static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block)
02753 {
02754 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
02755
02756 dest[0] = cm[dest[0] + ((block[0] + 4)>>3)];
02757 }
02758
02759 static void just_return(void *mem av_unused, int stride av_unused, int h av_unused) { return; }
02760
02761
02762 av_cold void dsputil_static_init(void)
02763 {
02764 int i;
02765
02766 for(i=0;i<256;i++) ff_cropTbl[i + MAX_NEG_CROP] = i;
02767 for(i=0;i<MAX_NEG_CROP;i++) {
02768 ff_cropTbl[i] = 0;
02769 ff_cropTbl[i + MAX_NEG_CROP + 256] = 255;
02770 }
02771
02772 for(i=0;i<512;i++) {
02773 ff_squareTbl[i] = (i - 256) * (i - 256);
02774 }
02775
02776 for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
02777 }
02778
02779 int ff_check_alignment(void){
02780 static int did_fail=0;
02781 LOCAL_ALIGNED_16(int, aligned, [4]);
02782
02783 if((intptr_t)aligned & 15){
02784 if(!did_fail){
02785 #if HAVE_MMX || HAVE_ALTIVEC
02786 av_log(NULL, AV_LOG_ERROR,
02787 "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
02788 "and may be very slow or crash. This is not a bug in libavcodec,\n"
02789 "but in the compiler. You may try recompiling using gcc >= 4.2.\n"
02790 "Do not report crashes to Libav developers.\n");
02791 #endif
02792 did_fail=1;
02793 }
02794 return -1;
02795 }
02796 return 0;
02797 }
02798
02799 av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
02800 {
02801 int i;
02802
02803 ff_check_alignment();
02804
02805 #if CONFIG_ENCODERS
02806 if (avctx->bits_per_raw_sample == 10) {
02807 c->fdct = ff_jpeg_fdct_islow_10;
02808 c->fdct248 = ff_fdct248_islow_10;
02809 } else {
02810 if(avctx->dct_algo==FF_DCT_FASTINT) {
02811 c->fdct = fdct_ifast;
02812 c->fdct248 = fdct_ifast248;
02813 }
02814 else if(avctx->dct_algo==FF_DCT_FAAN) {
02815 c->fdct = ff_faandct;
02816 c->fdct248 = ff_faandct248;
02817 }
02818 else {
02819 c->fdct = ff_jpeg_fdct_islow_8;
02820 c->fdct248 = ff_fdct248_islow_8;
02821 }
02822 }
02823 #endif //CONFIG_ENCODERS
02824
02825 if(avctx->lowres==1){
02826 c->idct_put= ff_jref_idct4_put;
02827 c->idct_add= ff_jref_idct4_add;
02828 c->idct = j_rev_dct4;
02829 c->idct_permutation_type= FF_NO_IDCT_PERM;
02830 }else if(avctx->lowres==2){
02831 c->idct_put= ff_jref_idct2_put;
02832 c->idct_add= ff_jref_idct2_add;
02833 c->idct = j_rev_dct2;
02834 c->idct_permutation_type= FF_NO_IDCT_PERM;
02835 }else if(avctx->lowres==3){
02836 c->idct_put= ff_jref_idct1_put;
02837 c->idct_add= ff_jref_idct1_add;
02838 c->idct = j_rev_dct1;
02839 c->idct_permutation_type= FF_NO_IDCT_PERM;
02840 }else{
02841 if (avctx->bits_per_raw_sample == 10) {
02842 c->idct_put = ff_simple_idct_put_10;
02843 c->idct_add = ff_simple_idct_add_10;
02844 c->idct = ff_simple_idct_10;
02845 c->idct_permutation_type = FF_NO_IDCT_PERM;
02846 } else {
02847 if(avctx->idct_algo==FF_IDCT_INT){
02848 c->idct_put= ff_jref_idct_put;
02849 c->idct_add= ff_jref_idct_add;
02850 c->idct = j_rev_dct;
02851 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
02852 }else if((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER || CONFIG_VP6_DECODER ) &&
02853 avctx->idct_algo==FF_IDCT_VP3){
02854 c->idct_put= ff_vp3_idct_put_c;
02855 c->idct_add= ff_vp3_idct_add_c;
02856 c->idct = ff_vp3_idct_c;
02857 c->idct_permutation_type= FF_NO_IDCT_PERM;
02858 }else if(avctx->idct_algo==FF_IDCT_WMV2){
02859 c->idct_put= ff_wmv2_idct_put_c;
02860 c->idct_add= ff_wmv2_idct_add_c;
02861 c->idct = ff_wmv2_idct_c;
02862 c->idct_permutation_type= FF_NO_IDCT_PERM;
02863 }else if(avctx->idct_algo==FF_IDCT_FAAN){
02864 c->idct_put= ff_faanidct_put;
02865 c->idct_add= ff_faanidct_add;
02866 c->idct = ff_faanidct;
02867 c->idct_permutation_type= FF_NO_IDCT_PERM;
02868 }else if(CONFIG_EATGQ_DECODER && avctx->idct_algo==FF_IDCT_EA) {
02869 c->idct_put= ff_ea_idct_put_c;
02870 c->idct_permutation_type= FF_NO_IDCT_PERM;
02871 }else{
02872 c->idct_put = ff_simple_idct_put_8;
02873 c->idct_add = ff_simple_idct_add_8;
02874 c->idct = ff_simple_idct_8;
02875 c->idct_permutation_type= FF_NO_IDCT_PERM;
02876 }
02877 }
02878 }
02879
02880 c->diff_pixels = diff_pixels_c;
02881 c->put_pixels_clamped = ff_put_pixels_clamped_c;
02882 c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_c;
02883 c->add_pixels_clamped = ff_add_pixels_clamped_c;
02884 c->sum_abs_dctelem = sum_abs_dctelem_c;
02885 c->gmc1 = gmc1_c;
02886 c->gmc = ff_gmc_c;
02887 c->pix_sum = pix_sum_c;
02888 c->pix_norm1 = pix_norm1_c;
02889
02890 c->fill_block_tab[0] = fill_block16_c;
02891 c->fill_block_tab[1] = fill_block8_c;
02892
02893
02894 c->pix_abs[0][0] = pix_abs16_c;
02895 c->pix_abs[0][1] = pix_abs16_x2_c;
02896 c->pix_abs[0][2] = pix_abs16_y2_c;
02897 c->pix_abs[0][3] = pix_abs16_xy2_c;
02898 c->pix_abs[1][0] = pix_abs8_c;
02899 c->pix_abs[1][1] = pix_abs8_x2_c;
02900 c->pix_abs[1][2] = pix_abs8_y2_c;
02901 c->pix_abs[1][3] = pix_abs8_xy2_c;
02902
02903 c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c;
02904 c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c;
02905 c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c;
02906 c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c;
02907 c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c;
02908 c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c;
02909 c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c;
02910 c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c;
02911 c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c;
02912
02913 c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c;
02914 c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c;
02915 c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c;
02916 c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c;
02917 c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c;
02918 c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c;
02919 c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c;
02920 c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c;
02921 c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c;
02922
02923 #define dspfunc(PFX, IDX, NUM) \
02924 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
02925 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
02926 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
02927 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
02928 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
02929 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
02930 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
02931 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
02932 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
02933 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
02934 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
02935 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
02936 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
02937 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
02938 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
02939 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
02940
02941 dspfunc(put_qpel, 0, 16);
02942 dspfunc(put_no_rnd_qpel, 0, 16);
02943
02944 dspfunc(avg_qpel, 0, 16);
02945
02946
02947 dspfunc(put_qpel, 1, 8);
02948 dspfunc(put_no_rnd_qpel, 1, 8);
02949
02950 dspfunc(avg_qpel, 1, 8);
02951
02952
02953 #undef dspfunc
02954
02955 #if CONFIG_MLP_DECODER || CONFIG_TRUEHD_DECODER
02956 ff_mlp_init(c, avctx);
02957 #endif
02958 #if CONFIG_WMV2_DECODER || CONFIG_VC1_DECODER
02959 ff_intrax8dsp_init(c,avctx);
02960 #endif
02961
02962 c->put_mspel_pixels_tab[0]= ff_put_pixels8x8_c;
02963 c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
02964 c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c;
02965 c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;
02966 c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c;
02967 c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;
02968 c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
02969 c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
02970
02971 #define SET_CMP_FUNC(name) \
02972 c->name[0]= name ## 16_c;\
02973 c->name[1]= name ## 8x8_c;
02974
02975 SET_CMP_FUNC(hadamard8_diff)
02976 c->hadamard8_diff[4]= hadamard8_intra16_c;
02977 c->hadamard8_diff[5]= hadamard8_intra8x8_c;
02978 SET_CMP_FUNC(dct_sad)
02979 SET_CMP_FUNC(dct_max)
02980 #if CONFIG_GPL
02981 SET_CMP_FUNC(dct264_sad)
02982 #endif
02983 c->sad[0]= pix_abs16_c;
02984 c->sad[1]= pix_abs8_c;
02985 c->sse[0]= sse16_c;
02986 c->sse[1]= sse8_c;
02987 c->sse[2]= sse4_c;
02988 SET_CMP_FUNC(quant_psnr)
02989 SET_CMP_FUNC(rd)
02990 SET_CMP_FUNC(bit)
02991 c->vsad[0]= vsad16_c;
02992 c->vsad[4]= vsad_intra16_c;
02993 c->vsad[5]= vsad_intra8_c;
02994 c->vsse[0]= vsse16_c;
02995 c->vsse[4]= vsse_intra16_c;
02996 c->vsse[5]= vsse_intra8_c;
02997 c->nsse[0]= nsse16_c;
02998 c->nsse[1]= nsse8_c;
02999 #if CONFIG_DWT
03000 ff_dsputil_init_dwt(c);
03001 #endif
03002
03003 c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c;
03004
03005 c->add_bytes= add_bytes_c;
03006 c->add_bytes_l2= add_bytes_l2_c;
03007 c->diff_bytes= diff_bytes_c;
03008 c->add_hfyu_median_prediction= add_hfyu_median_prediction_c;
03009 c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c;
03010 c->add_hfyu_left_prediction = add_hfyu_left_prediction_c;
03011 c->add_hfyu_left_prediction_bgr32 = add_hfyu_left_prediction_bgr32_c;
03012 c->bswap_buf= bswap_buf;
03013 c->bswap16_buf = bswap16_buf;
03014 #if CONFIG_PNG_DECODER
03015 c->add_png_paeth_prediction= ff_add_png_paeth_prediction;
03016 #endif
03017
03018 if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
03019 c->h263_h_loop_filter= h263_h_loop_filter_c;
03020 c->h263_v_loop_filter= h263_v_loop_filter_c;
03021 }
03022
03023 if (CONFIG_VP3_DECODER) {
03024 c->vp3_h_loop_filter= ff_vp3_h_loop_filter_c;
03025 c->vp3_v_loop_filter= ff_vp3_v_loop_filter_c;
03026 c->vp3_idct_dc_add= ff_vp3_idct_dc_add_c;
03027 }
03028
03029 c->h261_loop_filter= h261_loop_filter_c;
03030
03031 c->try_8x8basis= try_8x8basis_c;
03032 c->add_8x8basis= add_8x8basis_c;
03033
03034 #if CONFIG_VORBIS_DECODER
03035 c->vorbis_inverse_coupling = vorbis_inverse_coupling;
03036 #endif
03037 #if CONFIG_AC3_DECODER
03038 c->ac3_downmix = ff_ac3_downmix_c;
03039 #endif
03040 c->vector_fmul = vector_fmul_c;
03041 c->vector_fmul_reverse = vector_fmul_reverse_c;
03042 c->vector_fmul_add = vector_fmul_add_c;
03043 c->vector_fmul_window = vector_fmul_window_c;
03044 c->vector_clipf = vector_clipf_c;
03045 c->scalarproduct_int16 = scalarproduct_int16_c;
03046 c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c;
03047 c->apply_window_int16 = apply_window_int16_c;
03048 c->vector_clip_int32 = vector_clip_int32_c;
03049 c->scalarproduct_float = scalarproduct_float_c;
03050 c->butterflies_float = butterflies_float_c;
03051 c->butterflies_float_interleave = butterflies_float_interleave_c;
03052 c->vector_fmul_scalar = vector_fmul_scalar_c;
03053 c->vector_fmac_scalar = vector_fmac_scalar_c;
03054
03055 c->shrink[0]= av_image_copy_plane;
03056 c->shrink[1]= ff_shrink22;
03057 c->shrink[2]= ff_shrink44;
03058 c->shrink[3]= ff_shrink88;
03059
03060 c->prefetch= just_return;
03061
03062 memset(c->put_2tap_qpel_pixels_tab, 0, sizeof(c->put_2tap_qpel_pixels_tab));
03063 memset(c->avg_2tap_qpel_pixels_tab, 0, sizeof(c->avg_2tap_qpel_pixels_tab));
03064
03065 #undef FUNC
03066 #undef FUNCC
03067 #define FUNC(f, depth) f ## _ ## depth
03068 #define FUNCC(f, depth) f ## _ ## depth ## _c
03069
03070 #define dspfunc1(PFX, IDX, NUM, depth)\
03071 c->PFX ## _pixels_tab[IDX][0] = FUNCC(PFX ## _pixels ## NUM , depth);\
03072 c->PFX ## _pixels_tab[IDX][1] = FUNCC(PFX ## _pixels ## NUM ## _x2 , depth);\
03073 c->PFX ## _pixels_tab[IDX][2] = FUNCC(PFX ## _pixels ## NUM ## _y2 , depth);\
03074 c->PFX ## _pixels_tab[IDX][3] = FUNCC(PFX ## _pixels ## NUM ## _xy2, depth)
03075
03076 #define dspfunc2(PFX, IDX, NUM, depth)\
03077 c->PFX ## _pixels_tab[IDX][ 0] = FUNCC(PFX ## NUM ## _mc00, depth);\
03078 c->PFX ## _pixels_tab[IDX][ 1] = FUNCC(PFX ## NUM ## _mc10, depth);\
03079 c->PFX ## _pixels_tab[IDX][ 2] = FUNCC(PFX ## NUM ## _mc20, depth);\
03080 c->PFX ## _pixels_tab[IDX][ 3] = FUNCC(PFX ## NUM ## _mc30, depth);\
03081 c->PFX ## _pixels_tab[IDX][ 4] = FUNCC(PFX ## NUM ## _mc01, depth);\
03082 c->PFX ## _pixels_tab[IDX][ 5] = FUNCC(PFX ## NUM ## _mc11, depth);\
03083 c->PFX ## _pixels_tab[IDX][ 6] = FUNCC(PFX ## NUM ## _mc21, depth);\
03084 c->PFX ## _pixels_tab[IDX][ 7] = FUNCC(PFX ## NUM ## _mc31, depth);\
03085 c->PFX ## _pixels_tab[IDX][ 8] = FUNCC(PFX ## NUM ## _mc02, depth);\
03086 c->PFX ## _pixels_tab[IDX][ 9] = FUNCC(PFX ## NUM ## _mc12, depth);\
03087 c->PFX ## _pixels_tab[IDX][10] = FUNCC(PFX ## NUM ## _mc22, depth);\
03088 c->PFX ## _pixels_tab[IDX][11] = FUNCC(PFX ## NUM ## _mc32, depth);\
03089 c->PFX ## _pixels_tab[IDX][12] = FUNCC(PFX ## NUM ## _mc03, depth);\
03090 c->PFX ## _pixels_tab[IDX][13] = FUNCC(PFX ## NUM ## _mc13, depth);\
03091 c->PFX ## _pixels_tab[IDX][14] = FUNCC(PFX ## NUM ## _mc23, depth);\
03092 c->PFX ## _pixels_tab[IDX][15] = FUNCC(PFX ## NUM ## _mc33, depth)
03093
03094
03095 #define BIT_DEPTH_FUNCS(depth, dct)\
03096 c->get_pixels = FUNCC(get_pixels ## dct , depth);\
03097 c->draw_edges = FUNCC(draw_edges , depth);\
03098 c->emulated_edge_mc = FUNC (ff_emulated_edge_mc , depth);\
03099 c->clear_block = FUNCC(clear_block ## dct , depth);\
03100 c->clear_blocks = FUNCC(clear_blocks ## dct , depth);\
03101 c->add_pixels8 = FUNCC(add_pixels8 ## dct , depth);\
03102 c->add_pixels4 = FUNCC(add_pixels4 ## dct , depth);\
03103 c->put_no_rnd_pixels_l2[0] = FUNCC(put_no_rnd_pixels16_l2, depth);\
03104 c->put_no_rnd_pixels_l2[1] = FUNCC(put_no_rnd_pixels8_l2 , depth);\
03105 \
03106 c->put_h264_chroma_pixels_tab[0] = FUNCC(put_h264_chroma_mc8 , depth);\
03107 c->put_h264_chroma_pixels_tab[1] = FUNCC(put_h264_chroma_mc4 , depth);\
03108 c->put_h264_chroma_pixels_tab[2] = FUNCC(put_h264_chroma_mc2 , depth);\
03109 c->avg_h264_chroma_pixels_tab[0] = FUNCC(avg_h264_chroma_mc8 , depth);\
03110 c->avg_h264_chroma_pixels_tab[1] = FUNCC(avg_h264_chroma_mc4 , depth);\
03111 c->avg_h264_chroma_pixels_tab[2] = FUNCC(avg_h264_chroma_mc2 , depth);\
03112 \
03113 dspfunc1(put , 0, 16, depth);\
03114 dspfunc1(put , 1, 8, depth);\
03115 dspfunc1(put , 2, 4, depth);\
03116 dspfunc1(put , 3, 2, depth);\
03117 dspfunc1(put_no_rnd, 0, 16, depth);\
03118 dspfunc1(put_no_rnd, 1, 8, depth);\
03119 dspfunc1(avg , 0, 16, depth);\
03120 dspfunc1(avg , 1, 8, depth);\
03121 dspfunc1(avg , 2, 4, depth);\
03122 dspfunc1(avg , 3, 2, depth);\
03123 dspfunc1(avg_no_rnd, 0, 16, depth);\
03124 dspfunc1(avg_no_rnd, 1, 8, depth);\
03125 \
03126 dspfunc2(put_h264_qpel, 0, 16, depth);\
03127 dspfunc2(put_h264_qpel, 1, 8, depth);\
03128 dspfunc2(put_h264_qpel, 2, 4, depth);\
03129 dspfunc2(put_h264_qpel, 3, 2, depth);\
03130 dspfunc2(avg_h264_qpel, 0, 16, depth);\
03131 dspfunc2(avg_h264_qpel, 1, 8, depth);\
03132 dspfunc2(avg_h264_qpel, 2, 4, depth);
03133
03134 switch (avctx->bits_per_raw_sample) {
03135 case 9:
03136 if (c->dct_bits == 32) {
03137 BIT_DEPTH_FUNCS(9, _32);
03138 } else {
03139 BIT_DEPTH_FUNCS(9, _16);
03140 }
03141 break;
03142 case 10:
03143 if (c->dct_bits == 32) {
03144 BIT_DEPTH_FUNCS(10, _32);
03145 } else {
03146 BIT_DEPTH_FUNCS(10, _16);
03147 }
03148 break;
03149 default:
03150 av_log(avctx, AV_LOG_DEBUG, "Unsupported bit depth: %d\n", avctx->bits_per_raw_sample);
03151 case 8:
03152 BIT_DEPTH_FUNCS(8, _16);
03153 break;
03154 }
03155
03156
03157 if (HAVE_MMX) dsputil_init_mmx (c, avctx);
03158 if (ARCH_ARM) dsputil_init_arm (c, avctx);
03159 if (CONFIG_MLIB) dsputil_init_mlib (c, avctx);
03160 if (HAVE_VIS) dsputil_init_vis (c, avctx);
03161 if (ARCH_ALPHA) dsputil_init_alpha (c, avctx);
03162 if (ARCH_PPC) dsputil_init_ppc (c, avctx);
03163 if (HAVE_MMI) dsputil_init_mmi (c, avctx);
03164 if (ARCH_SH4) dsputil_init_sh4 (c, avctx);
03165 if (ARCH_BFIN) dsputil_init_bfin (c, avctx);
03166
03167 for(i=0; i<64; i++){
03168 if(!c->put_2tap_qpel_pixels_tab[0][i])
03169 c->put_2tap_qpel_pixels_tab[0][i]= c->put_h264_qpel_pixels_tab[0][i];
03170 if(!c->avg_2tap_qpel_pixels_tab[0][i])
03171 c->avg_2tap_qpel_pixels_tab[0][i]= c->avg_h264_qpel_pixels_tab[0][i];
03172 }
03173
03174 ff_init_scantable_permutation(c->idct_permutation,
03175 c->idct_permutation_type);
03176 }