00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076 #include "config.h"
00077 #include "libavutil/avutil.h"
00078 #include <inttypes.h>
00079 #include <stdio.h>
00080 #include <stdlib.h>
00081 #include <string.h>
00082
00083
00084
00085
00086
00087 #include "postprocess.h"
00088 #include "postprocess_internal.h"
00089 #include "libavutil/avstring.h"
00090
00091 unsigned postproc_version(void)
00092 {
00093 return LIBPOSTPROC_VERSION_INT;
00094 }
00095
00096 const char *postproc_configuration(void)
00097 {
00098 return LIBAV_CONFIGURATION;
00099 }
00100
00101 const char *postproc_license(void)
00102 {
00103 #define LICENSE_PREFIX "libpostproc license: "
00104 return LICENSE_PREFIX LIBAV_LICENSE + sizeof(LICENSE_PREFIX) - 1;
00105 }
00106
00107 #if HAVE_ALTIVEC_H
00108 #include <altivec.h>
00109 #endif
00110
00111 #define GET_MODE_BUFFER_SIZE 500
00112 #define OPTIONS_ARRAY_SIZE 10
00113 #define BLOCK_SIZE 8
00114 #define TEMP_STRIDE 8
00115
00116
00117 #if ARCH_X86
00118 DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
00119 DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
00120 DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
00121 DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
00122 DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
00123 DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
00124 DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
00125 DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
00126 #endif
00127
00128 DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
00129
00130
00131 static struct PPFilter filters[]=
00132 {
00133 {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},
00134 {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},
00135
00136
00137 {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER},
00138 {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER},
00139 {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK},
00140 {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK},
00141 {"dr", "dering", 1, 5, 6, DERING},
00142 {"al", "autolevels", 0, 1, 2, LEVEL_FIX},
00143 {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
00144 {"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
00145 {"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
00146 {"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER},
00147 {"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER},
00148 {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER},
00149 {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER},
00150 {"fq", "forcequant", 1, 0, 0, FORCE_QUANT},
00151 {NULL, NULL,0,0,0,0}
00152 };
00153
00154 static const char *replaceTable[]=
00155 {
00156 "default", "hb:a,vb:a,dr:a",
00157 "de", "hb:a,vb:a,dr:a",
00158 "fast", "h1:a,v1:a,dr:a",
00159 "fa", "h1:a,v1:a,dr:a",
00160 "ac", "ha:a:128:7,va:a,dr:a",
00161 NULL
00162 };
00163
00164
00165 #if ARCH_X86
00166 static inline void prefetchnta(void *p)
00167 {
00168 __asm__ volatile( "prefetchnta (%0)\n\t"
00169 : : "r" (p)
00170 );
00171 }
00172
00173 static inline void prefetcht0(void *p)
00174 {
00175 __asm__ volatile( "prefetcht0 (%0)\n\t"
00176 : : "r" (p)
00177 );
00178 }
00179
00180 static inline void prefetcht1(void *p)
00181 {
00182 __asm__ volatile( "prefetcht1 (%0)\n\t"
00183 : : "r" (p)
00184 );
00185 }
00186
00187 static inline void prefetcht2(void *p)
00188 {
00189 __asm__ volatile( "prefetcht2 (%0)\n\t"
00190 : : "r" (p)
00191 );
00192 }
00193 #endif
00194
00195
00196
00197
00201 static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
00202 {
00203 int numEq= 0;
00204 int y;
00205 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
00206 const int dcThreshold= dcOffset*2 + 1;
00207
00208 for(y=0; y<BLOCK_SIZE; y++){
00209 if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
00210 if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
00211 if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
00212 if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
00213 if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
00214 if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
00215 if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
00216 src+= stride;
00217 }
00218 return numEq > c->ppMode.flatnessThreshold;
00219 }
00220
00224 static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c)
00225 {
00226 int numEq= 0;
00227 int y;
00228 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
00229 const int dcThreshold= dcOffset*2 + 1;
00230
00231 src+= stride*4;
00232 for(y=0; y<BLOCK_SIZE-1; y++){
00233 if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
00234 if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
00235 if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
00236 if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
00237 if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
00238 if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
00239 if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
00240 if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
00241 src+= stride;
00242 }
00243 return numEq > c->ppMode.flatnessThreshold;
00244 }
00245
00246 static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
00247 {
00248 int i;
00249 for(i=0; i<2; i++){
00250 if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
00251 src += stride;
00252 if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
00253 src += stride;
00254 if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
00255 src += stride;
00256 if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
00257 src += stride;
00258 }
00259 return 1;
00260 }
00261
00262 static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
00263 {
00264 int x;
00265 src+= stride*4;
00266 for(x=0; x<BLOCK_SIZE; x+=4){
00267 if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0;
00268 if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
00269 if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
00270 if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
00271 }
00272 return 1;
00273 }
00274
00275 static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c)
00276 {
00277 if( isHorizDC_C(src, stride, c) ){
00278 if( isHorizMinMaxOk_C(src, stride, c->QP) )
00279 return 1;
00280 else
00281 return 0;
00282 }else{
00283 return 2;
00284 }
00285 }
00286
00287 static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c)
00288 {
00289 if( isVertDC_C(src, stride, c) ){
00290 if( isVertMinMaxOk_C(src, stride, c->QP) )
00291 return 1;
00292 else
00293 return 0;
00294 }else{
00295 return 2;
00296 }
00297 }
00298
00299 static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
00300 {
00301 int y;
00302 for(y=0; y<BLOCK_SIZE; y++){
00303 const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
00304
00305 if(FFABS(middleEnergy) < 8*c->QP){
00306 const int q=(dst[3] - dst[4])/2;
00307 const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
00308 const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
00309
00310 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
00311 d= FFMAX(d, 0);
00312
00313 d= (5*d + 32) >> 6;
00314 d*= FFSIGN(-middleEnergy);
00315
00316 if(q>0)
00317 {
00318 d= d<0 ? 0 : d;
00319 d= d>q ? q : d;
00320 }
00321 else
00322 {
00323 d= d>0 ? 0 : d;
00324 d= d<q ? q : d;
00325 }
00326
00327 dst[3]-= d;
00328 dst[4]+= d;
00329 }
00330 dst+= stride;
00331 }
00332 }
00333
00338 static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
00339 {
00340 int y;
00341 for(y=0; y<BLOCK_SIZE; y++){
00342 const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
00343 const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
00344
00345 int sums[10];
00346 sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
00347 sums[1] = sums[0] - first + dst[3];
00348 sums[2] = sums[1] - first + dst[4];
00349 sums[3] = sums[2] - first + dst[5];
00350 sums[4] = sums[3] - first + dst[6];
00351 sums[5] = sums[4] - dst[0] + dst[7];
00352 sums[6] = sums[5] - dst[1] + last;
00353 sums[7] = sums[6] - dst[2] + last;
00354 sums[8] = sums[7] - dst[3] + last;
00355 sums[9] = sums[8] - dst[4] + last;
00356
00357 dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
00358 dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
00359 dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
00360 dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
00361 dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
00362 dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
00363 dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
00364 dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
00365
00366 dst+= stride;
00367 }
00368 }
00369
00378 static inline void horizX1Filter(uint8_t *src, int stride, int QP)
00379 {
00380 int y;
00381 static uint64_t *lut= NULL;
00382 if(lut==NULL)
00383 {
00384 int i;
00385 lut = av_malloc(256*8);
00386 for(i=0; i<256; i++)
00387 {
00388 int v= i < 128 ? 2*i : 2*(i-256);
00389
00390
00391
00392
00393
00394
00395
00396
00397 uint64_t a= (v/16) & 0xFF;
00398 uint64_t b= (v*3/16) & 0xFF;
00399 uint64_t c= (v*5/16) & 0xFF;
00400 uint64_t d= (7*v/16) & 0xFF;
00401 uint64_t A= (0x100 - a)&0xFF;
00402 uint64_t B= (0x100 - b)&0xFF;
00403 uint64_t C= (0x100 - c)&0xFF;
00404 uint64_t D= (0x100 - c)&0xFF;
00405
00406 lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
00407 (D<<24) | (C<<16) | (B<<8) | (A);
00408
00409 }
00410 }
00411
00412 for(y=0; y<BLOCK_SIZE; y++){
00413 int a= src[1] - src[2];
00414 int b= src[3] - src[4];
00415 int c= src[5] - src[6];
00416
00417 int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
00418
00419 if(d < QP){
00420 int v = d * FFSIGN(-b);
00421
00422 src[1] +=v/8;
00423 src[2] +=v/4;
00424 src[3] +=3*v/8;
00425 src[4] -=3*v/8;
00426 src[5] -=v/4;
00427 src[6] -=v/8;
00428 }
00429 src+=stride;
00430 }
00431 }
00432
00436 static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
00437 int y;
00438 const int QP= c->QP;
00439 const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
00440 const int dcThreshold= dcOffset*2 + 1;
00441
00442 src+= step*4;
00443 for(y=0; y<8; y++){
00444 int numEq= 0;
00445
00446 if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
00447 if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
00448 if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
00449 if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
00450 if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
00451 if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
00452 if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
00453 if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
00454 if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
00455 if(numEq > c->ppMode.flatnessThreshold){
00456 int min, max, x;
00457
00458 if(src[0] > src[step]){
00459 max= src[0];
00460 min= src[step];
00461 }else{
00462 max= src[step];
00463 min= src[0];
00464 }
00465 for(x=2; x<8; x+=2){
00466 if(src[x*step] > src[(x+1)*step]){
00467 if(src[x *step] > max) max= src[ x *step];
00468 if(src[(x+1)*step] < min) min= src[(x+1)*step];
00469 }else{
00470 if(src[(x+1)*step] > max) max= src[(x+1)*step];
00471 if(src[ x *step] < min) min= src[ x *step];
00472 }
00473 }
00474 if(max-min < 2*QP){
00475 const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
00476 const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
00477
00478 int sums[10];
00479 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
00480 sums[1] = sums[0] - first + src[3*step];
00481 sums[2] = sums[1] - first + src[4*step];
00482 sums[3] = sums[2] - first + src[5*step];
00483 sums[4] = sums[3] - first + src[6*step];
00484 sums[5] = sums[4] - src[0*step] + src[7*step];
00485 sums[6] = sums[5] - src[1*step] + last;
00486 sums[7] = sums[6] - src[2*step] + last;
00487 sums[8] = sums[7] - src[3*step] + last;
00488 sums[9] = sums[8] - src[4*step] + last;
00489
00490 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
00491 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
00492 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
00493 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
00494 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
00495 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
00496 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
00497 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
00498 }
00499 }else{
00500 const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
00501
00502 if(FFABS(middleEnergy) < 8*QP){
00503 const int q=(src[3*step] - src[4*step])/2;
00504 const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
00505 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
00506
00507 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
00508 d= FFMAX(d, 0);
00509
00510 d= (5*d + 32) >> 6;
00511 d*= FFSIGN(-middleEnergy);
00512
00513 if(q>0){
00514 d= d<0 ? 0 : d;
00515 d= d>q ? q : d;
00516 }else{
00517 d= d>0 ? 0 : d;
00518 d= d<q ? q : d;
00519 }
00520
00521 src[3*step]-= d;
00522 src[4*step]+= d;
00523 }
00524 }
00525
00526 src += stride;
00527 }
00528
00529
00530
00531
00532
00533 }
00534
00535
00536
00537 #if !(HAVE_MMX || HAVE_ALTIVEC) || CONFIG_RUNTIME_CPUDETECT
00538 #define COMPILE_C
00539 #endif
00540
00541 #if HAVE_ALTIVEC
00542 #define COMPILE_ALTIVEC
00543 #endif //HAVE_ALTIVEC
00544
00545 #if ARCH_X86
00546
00547 #if (HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
00548 #define COMPILE_MMX
00549 #endif
00550
00551 #if HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT
00552 #define COMPILE_MMX2
00553 #endif
00554
00555 #if (HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
00556 #define COMPILE_3DNOW
00557 #endif
00558 #endif
00559
00560 #undef HAVE_MMX
00561 #define HAVE_MMX 0
00562 #undef HAVE_MMX2
00563 #define HAVE_MMX2 0
00564 #undef HAVE_AMD3DNOW
00565 #define HAVE_AMD3DNOW 0
00566 #undef HAVE_ALTIVEC
00567 #define HAVE_ALTIVEC 0
00568
00569 #ifdef COMPILE_C
00570 #define RENAME(a) a ## _C
00571 #include "postprocess_template.c"
00572 #endif
00573
00574 #ifdef COMPILE_ALTIVEC
00575 #undef RENAME
00576 #undef HAVE_ALTIVEC
00577 #define HAVE_ALTIVEC 1
00578 #define RENAME(a) a ## _altivec
00579 #include "postprocess_altivec_template.c"
00580 #include "postprocess_template.c"
00581 #endif
00582
00583
00584 #ifdef COMPILE_MMX
00585 #undef RENAME
00586 #undef HAVE_MMX
00587 #define HAVE_MMX 1
00588 #define RENAME(a) a ## _MMX
00589 #include "postprocess_template.c"
00590 #endif
00591
00592
00593 #ifdef COMPILE_MMX2
00594 #undef RENAME
00595 #undef HAVE_MMX
00596 #undef HAVE_MMX2
00597 #define HAVE_MMX 1
00598 #define HAVE_MMX2 1
00599 #define RENAME(a) a ## _MMX2
00600 #include "postprocess_template.c"
00601 #endif
00602
00603
00604 #ifdef COMPILE_3DNOW
00605 #undef RENAME
00606 #undef HAVE_MMX
00607 #undef HAVE_MMX2
00608 #undef HAVE_AMD3DNOW
00609 #define HAVE_MMX 1
00610 #define HAVE_MMX2 0
00611 #define HAVE_AMD3DNOW 1
00612 #define RENAME(a) a ## _3DNow
00613 #include "postprocess_template.c"
00614 #endif
00615
00616
00617
00618 static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
00619 const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc)
00620 {
00621 PPContext *c= (PPContext *)vc;
00622 PPMode *ppMode= (PPMode *)vm;
00623 c->ppMode= *ppMode;
00624
00625
00626
00627
00628 #if CONFIG_RUNTIME_CPUDETECT
00629 #if ARCH_X86
00630
00631 if(c->cpuCaps & PP_CPU_CAPS_MMX2)
00632 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00633 else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
00634 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00635 else if(c->cpuCaps & PP_CPU_CAPS_MMX)
00636 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00637 else
00638 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00639 #else
00640 #if HAVE_ALTIVEC
00641 if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
00642 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00643 else
00644 #endif
00645 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00646 #endif
00647 #else
00648 #if HAVE_MMX2
00649 postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00650 #elif HAVE_AMD3DNOW
00651 postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00652 #elif HAVE_MMX
00653 postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00654 #elif HAVE_ALTIVEC
00655 postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00656 #else
00657 postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00658 #endif
00659 #endif
00660 }
00661
00662
00663
00664
00665
00666
00667 const char pp_help[] =
00668 "Available postprocessing filters:\n"
00669 "Filters Options\n"
00670 "short long name short long option Description\n"
00671 "* * a autoq CPU power dependent enabler\n"
00672 " c chrom chrominance filtering enabled\n"
00673 " y nochrom chrominance filtering disabled\n"
00674 " n noluma luma filtering disabled\n"
00675 "hb hdeblock (2 threshold) horizontal deblocking filter\n"
00676 " 1. difference factor: default=32, higher -> more deblocking\n"
00677 " 2. flatness threshold: default=39, lower -> more deblocking\n"
00678 " the h & v deblocking filters share these\n"
00679 " so you can't set different thresholds for h / v\n"
00680 "vb vdeblock (2 threshold) vertical deblocking filter\n"
00681 "ha hadeblock (2 threshold) horizontal deblocking filter\n"
00682 "va vadeblock (2 threshold) vertical deblocking filter\n"
00683 "h1 x1hdeblock experimental h deblock filter 1\n"
00684 "v1 x1vdeblock experimental v deblock filter 1\n"
00685 "dr dering deringing filter\n"
00686 "al autolevels automatic brightness / contrast\n"
00687 " f fullyrange stretch luminance to (0..255)\n"
00688 "lb linblenddeint linear blend deinterlacer\n"
00689 "li linipoldeint linear interpolating deinterlace\n"
00690 "ci cubicipoldeint cubic interpolating deinterlacer\n"
00691 "md mediandeint median deinterlacer\n"
00692 "fd ffmpegdeint ffmpeg deinterlacer\n"
00693 "l5 lowpass5 FIR lowpass deinterlacer\n"
00694 "de default hb:a,vb:a,dr:a\n"
00695 "fa fast h1:a,v1:a,dr:a\n"
00696 "ac ha:a:128:7,va:a,dr:a\n"
00697 "tn tmpnoise (3 threshold) temporal noise reducer\n"
00698 " 1. <= 2. <= 3. larger -> stronger filtering\n"
00699 "fq forceQuant <quantizer> force quantizer\n"
00700 "Usage:\n"
00701 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
00702 "long form example:\n"
00703 "vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
00704 "short form example:\n"
00705 "vb:a/hb:a/lb de,-vb\n"
00706 "more examples:\n"
00707 "tn:64:128:256\n"
00708 "\n"
00709 ;
00710
00711 pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality)
00712 {
00713 char temp[GET_MODE_BUFFER_SIZE];
00714 char *p= temp;
00715 static const char filterDelimiters[] = ",/";
00716 static const char optionDelimiters[] = ":";
00717 struct PPMode *ppMode;
00718 char *filterToken;
00719
00720 ppMode= av_malloc(sizeof(PPMode));
00721
00722 ppMode->lumMode= 0;
00723 ppMode->chromMode= 0;
00724 ppMode->maxTmpNoise[0]= 700;
00725 ppMode->maxTmpNoise[1]= 1500;
00726 ppMode->maxTmpNoise[2]= 3000;
00727 ppMode->maxAllowedY= 234;
00728 ppMode->minAllowedY= 16;
00729 ppMode->baseDcDiff= 256/8;
00730 ppMode->flatnessThreshold= 56-16-1;
00731 ppMode->maxClippedThreshold= 0.01;
00732 ppMode->error=0;
00733
00734 memset(temp, 0, GET_MODE_BUFFER_SIZE);
00735 av_strlcpy(temp, name, GET_MODE_BUFFER_SIZE - 1);
00736
00737 av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
00738
00739 for(;;){
00740 char *filterName;
00741 int q= 1000000;
00742 int chrom=-1;
00743 int luma=-1;
00744 char *option;
00745 char *options[OPTIONS_ARRAY_SIZE];
00746 int i;
00747 int filterNameOk=0;
00748 int numOfUnknownOptions=0;
00749 int enable=1;
00750
00751 filterToken= strtok(p, filterDelimiters);
00752 if(filterToken == NULL) break;
00753 p+= strlen(filterToken) + 1;
00754 filterName= strtok(filterToken, optionDelimiters);
00755 av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
00756
00757 if(*filterName == '-'){
00758 enable=0;
00759 filterName++;
00760 }
00761
00762 for(;;){
00763 option= strtok(NULL, optionDelimiters);
00764 if(option == NULL) break;
00765
00766 av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
00767 if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
00768 else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
00769 else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
00770 else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
00771 else{
00772 options[numOfUnknownOptions] = option;
00773 numOfUnknownOptions++;
00774 }
00775 if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
00776 }
00777 options[numOfUnknownOptions] = NULL;
00778
00779
00780 for(i=0; replaceTable[2*i]!=NULL; i++){
00781 if(!strcmp(replaceTable[2*i], filterName)){
00782 int newlen= strlen(replaceTable[2*i + 1]);
00783 int plen;
00784 int spaceLeft;
00785
00786 if(p==NULL) p= temp, *p=0;
00787 else p--, *p=',';
00788
00789 plen= strlen(p);
00790 spaceLeft= p - temp + plen;
00791 if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE - 1){
00792 ppMode->error++;
00793 break;
00794 }
00795 memmove(p + newlen, p, plen+1);
00796 memcpy(p, replaceTable[2*i + 1], newlen);
00797 filterNameOk=1;
00798 }
00799 }
00800
00801 for(i=0; filters[i].shortName!=NULL; i++){
00802 if( !strcmp(filters[i].longName, filterName)
00803 || !strcmp(filters[i].shortName, filterName)){
00804 ppMode->lumMode &= ~filters[i].mask;
00805 ppMode->chromMode &= ~filters[i].mask;
00806
00807 filterNameOk=1;
00808 if(!enable) break;
00809
00810 if(q >= filters[i].minLumQuality && luma)
00811 ppMode->lumMode|= filters[i].mask;
00812 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
00813 if(q >= filters[i].minChromQuality)
00814 ppMode->chromMode|= filters[i].mask;
00815
00816 if(filters[i].mask == LEVEL_FIX){
00817 int o;
00818 ppMode->minAllowedY= 16;
00819 ppMode->maxAllowedY= 234;
00820 for(o=0; options[o]!=NULL; o++){
00821 if( !strcmp(options[o],"fullyrange")
00822 ||!strcmp(options[o],"f")){
00823 ppMode->minAllowedY= 0;
00824 ppMode->maxAllowedY= 255;
00825 numOfUnknownOptions--;
00826 }
00827 }
00828 }
00829 else if(filters[i].mask == TEMP_NOISE_FILTER)
00830 {
00831 int o;
00832 int numOfNoises=0;
00833
00834 for(o=0; options[o]!=NULL; o++){
00835 char *tail;
00836 ppMode->maxTmpNoise[numOfNoises]=
00837 strtol(options[o], &tail, 0);
00838 if(tail!=options[o]){
00839 numOfNoises++;
00840 numOfUnknownOptions--;
00841 if(numOfNoises >= 3) break;
00842 }
00843 }
00844 }
00845 else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK
00846 || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){
00847 int o;
00848
00849 for(o=0; options[o]!=NULL && o<2; o++){
00850 char *tail;
00851 int val= strtol(options[o], &tail, 0);
00852 if(tail==options[o]) break;
00853
00854 numOfUnknownOptions--;
00855 if(o==0) ppMode->baseDcDiff= val;
00856 else ppMode->flatnessThreshold= val;
00857 }
00858 }
00859 else if(filters[i].mask == FORCE_QUANT){
00860 int o;
00861 ppMode->forcedQuant= 15;
00862
00863 for(o=0; options[o]!=NULL && o<1; o++){
00864 char *tail;
00865 int val= strtol(options[o], &tail, 0);
00866 if(tail==options[o]) break;
00867
00868 numOfUnknownOptions--;
00869 ppMode->forcedQuant= val;
00870 }
00871 }
00872 }
00873 }
00874 if(!filterNameOk) ppMode->error++;
00875 ppMode->error += numOfUnknownOptions;
00876 }
00877
00878 av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
00879 if(ppMode->error){
00880 av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
00881 av_free(ppMode);
00882 return NULL;
00883 }
00884 return ppMode;
00885 }
00886
00887 void pp_free_mode(pp_mode *mode){
00888 av_free(mode);
00889 }
00890
00891 static void reallocAlign(void **p, int alignment, int size){
00892 av_free(*p);
00893 *p= av_mallocz(size);
00894 }
00895
00896 static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
00897 int mbWidth = (width+15)>>4;
00898 int mbHeight= (height+15)>>4;
00899 int i;
00900
00901 c->stride= stride;
00902 c->qpStride= qpStride;
00903
00904 reallocAlign((void **)&c->tempDst, 8, stride*24);
00905 reallocAlign((void **)&c->tempSrc, 8, stride*24);
00906 reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
00907 reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
00908 for(i=0; i<256; i++)
00909 c->yHistogram[i]= width*height/64*15/256;
00910
00911 for(i=0; i<3; i++){
00912
00913 reallocAlign((void **)&c->tempBlurred[i], 8, stride*mbHeight*16 + 17*1024);
00914 reallocAlign((void **)&c->tempBlurredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);
00915 }
00916
00917 reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
00918 reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
00919 reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
00920 reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
00921 }
00922
00923 static const char * context_to_name(void * ptr) {
00924 return "postproc";
00925 }
00926
00927 static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
00928
00929 pp_context *pp_get_context(int width, int height, int cpuCaps){
00930 PPContext *c= av_malloc(sizeof(PPContext));
00931 int stride= FFALIGN(width, 16);
00932 int qpStride= (width+15)/16 + 2;
00933
00934 memset(c, 0, sizeof(PPContext));
00935 c->av_class = &av_codec_context_class;
00936 c->cpuCaps= cpuCaps;
00937 if(cpuCaps&PP_FORMAT){
00938 c->hChromaSubSample= cpuCaps&0x3;
00939 c->vChromaSubSample= (cpuCaps>>4)&0x3;
00940 }else{
00941 c->hChromaSubSample= 1;
00942 c->vChromaSubSample= 1;
00943 }
00944
00945 reallocBuffers(c, width, height, stride, qpStride);
00946
00947 c->frameNum=-1;
00948
00949 return c;
00950 }
00951
00952 void pp_free_context(void *vc){
00953 PPContext *c = (PPContext*)vc;
00954 int i;
00955
00956 for(i=0; i<3; i++) av_free(c->tempBlurred[i]);
00957 for(i=0; i<3; i++) av_free(c->tempBlurredPast[i]);
00958
00959 av_free(c->tempBlocks);
00960 av_free(c->yHistogram);
00961 av_free(c->tempDst);
00962 av_free(c->tempSrc);
00963 av_free(c->deintTemp);
00964 av_free(c->stdQPTable);
00965 av_free(c->nonBQPTable);
00966 av_free(c->forcedQPTable);
00967
00968 memset(c, 0, sizeof(PPContext));
00969
00970 av_free(c);
00971 }
00972
00973 void pp_postprocess(const uint8_t * src[3], const int srcStride[3],
00974 uint8_t * dst[3], const int dstStride[3],
00975 int width, int height,
00976 const QP_STORE_T *QP_store, int QPStride,
00977 pp_mode *vm, void *vc, int pict_type)
00978 {
00979 int mbWidth = (width+15)>>4;
00980 int mbHeight= (height+15)>>4;
00981 PPMode *mode = (PPMode*)vm;
00982 PPContext *c = (PPContext*)vc;
00983 int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
00984 int absQPStride = FFABS(QPStride);
00985
00986
00987 if(c->stride < minStride || c->qpStride < absQPStride)
00988 reallocBuffers(c, width, height,
00989 FFMAX(minStride, c->stride),
00990 FFMAX(c->qpStride, absQPStride));
00991
00992 if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)){
00993 int i;
00994 QP_store= c->forcedQPTable;
00995 absQPStride = QPStride = 0;
00996 if(mode->lumMode & FORCE_QUANT)
00997 for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
00998 else
00999 for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
01000 }
01001
01002 if(pict_type & PP_PICT_TYPE_QP2){
01003 int i;
01004 const int count= mbHeight * absQPStride;
01005 for(i=0; i<(count>>2); i++){
01006 ((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
01007 }
01008 for(i<<=2; i<count; i++){
01009 c->stdQPTable[i] = QP_store[i]>>1;
01010 }
01011 QP_store= c->stdQPTable;
01012 QPStride= absQPStride;
01013 }
01014
01015 if(0){
01016 int x,y;
01017 for(y=0; y<mbHeight; y++){
01018 for(x=0; x<mbWidth; x++){
01019 av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
01020 }
01021 av_log(c, AV_LOG_INFO, "\n");
01022 }
01023 av_log(c, AV_LOG_INFO, "\n");
01024 }
01025
01026 if((pict_type&7)!=3){
01027 if (QPStride >= 0){
01028 int i;
01029 const int count= mbHeight * QPStride;
01030 for(i=0; i<(count>>2); i++){
01031 ((uint32_t*)c->nonBQPTable)[i] = ((const uint32_t*)QP_store)[i] & 0x3F3F3F3F;
01032 }
01033 for(i<<=2; i<count; i++){
01034 c->nonBQPTable[i] = QP_store[i] & 0x3F;
01035 }
01036 } else {
01037 int i,j;
01038 for(i=0; i<mbHeight; i++) {
01039 for(j=0; j<absQPStride; j++) {
01040 c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
01041 }
01042 }
01043 }
01044 }
01045
01046 av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
01047 mode->lumMode, mode->chromMode);
01048
01049 postProcess(src[0], srcStride[0], dst[0], dstStride[0],
01050 width, height, QP_store, QPStride, 0, mode, c);
01051
01052 width = (width )>>c->hChromaSubSample;
01053 height = (height)>>c->vChromaSubSample;
01054
01055 if(mode->chromMode){
01056 postProcess(src[1], srcStride[1], dst[1], dstStride[1],
01057 width, height, QP_store, QPStride, 1, mode, c);
01058 postProcess(src[2], srcStride[2], dst[2], dstStride[2],
01059 width, height, QP_store, QPStride, 2, mode, c);
01060 }
01061 else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
01062 linecpy(dst[1], src[1], height, srcStride[1]);
01063 linecpy(dst[2], src[2], height, srcStride[2]);
01064 }else{
01065 int y;
01066 for(y=0; y<height; y++){
01067 memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
01068 memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
01069 }
01070 }
01071 }