• Main Page
  • Related Pages
  • Modules
  • Data Structures
  • Files
  • Examples
  • File List
  • Globals

libpostproc/postprocess.c

Go to the documentation of this file.
00001 /*
00002  * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
00003  *
00004  * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
00005  *
00006  * This file is part of Libav.
00007  *
00008  * Libav is free software; you can redistribute it and/or modify
00009  * it under the terms of the GNU General Public License as published by
00010  * the Free Software Foundation; either version 2 of the License, or
00011  * (at your option) any later version.
00012  *
00013  * Libav is distributed in the hope that it will be useful,
00014  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00015  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00016  * GNU General Public License for more details.
00017  *
00018  * You should have received a copy of the GNU General Public License
00019  * along with Libav; if not, write to the Free Software
00020  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00021  */
00022 
00028 /*
00029                         C       MMX     MMX2    3DNow   AltiVec
00030 isVertDC                Ec      Ec                      Ec
00031 isVertMinMaxOk          Ec      Ec                      Ec
00032 doVertLowPass           E               e       e       Ec
00033 doVertDefFilter         Ec      Ec      e       e       Ec
00034 isHorizDC               Ec      Ec                      Ec
00035 isHorizMinMaxOk         a       E                       Ec
00036 doHorizLowPass          E               e       e       Ec
00037 doHorizDefFilter        Ec      Ec      e       e       Ec
00038 do_a_deblock            Ec      E       Ec      E
00039 deRing                  E               e       e*      Ecp
00040 Vertical RKAlgo1        E               a       a
00041 Horizontal RKAlgo1                      a       a
00042 Vertical X1#            a               E       E
00043 Horizontal X1#          a               E       E
00044 LinIpolDeinterlace      e               E       E*
00045 CubicIpolDeinterlace    a               e       e*
00046 LinBlendDeinterlace     e               E       E*
00047 MedianDeinterlace#      E       Ec      Ec
00048 TempDeNoiser#           E               e       e       Ec
00049 
00050 * I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
00051 # more or less selfinvented filters so the exactness is not too meaningful
00052 E = Exact implementation
00053 e = almost exact implementation (slightly different rounding,...)
00054 a = alternative / approximate impl
00055 c = checked against the other implementations (-vo md5)
00056 p = partially optimized, still some work to do
00057 */
00058 
00059 /*
00060 TODO:
00061 reduce the time wasted on the mem transfer
00062 unroll stuff if instructions depend too much on the prior one
00063 move YScale thing to the end instead of fixing QP
00064 write a faster and higher quality deblocking filter :)
00065 make the mainloop more flexible (variable number of blocks at once
00066         (the if/else stuff per block is slowing things down)
00067 compare the quality & speed of all filters
00068 split this huge file
00069 optimize c versions
00070 try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
00071 ...
00072 */
00073 
00074 //Changelog: use git log
00075 
00076 #include "config.h"
00077 #include "libavutil/avutil.h"
00078 #include <inttypes.h>
00079 #include <stdio.h>
00080 #include <stdlib.h>
00081 #include <string.h>
00082 //#undef HAVE_MMX2
00083 //#define HAVE_AMD3DNOW
00084 //#undef HAVE_MMX
00085 //#undef ARCH_X86
00086 //#define DEBUG_BRIGHTNESS
00087 #include "postprocess.h"
00088 #include "postprocess_internal.h"
00089 #include "libavutil/avstring.h"
00090 
00091 unsigned postproc_version(void)
00092 {
00093     return LIBPOSTPROC_VERSION_INT;
00094 }
00095 
00096 const char *postproc_configuration(void)
00097 {
00098     return LIBAV_CONFIGURATION;
00099 }
00100 
00101 const char *postproc_license(void)
00102 {
00103 #define LICENSE_PREFIX "libpostproc license: "
00104     return LICENSE_PREFIX LIBAV_LICENSE + sizeof(LICENSE_PREFIX) - 1;
00105 }
00106 
00107 #if HAVE_ALTIVEC_H
00108 #include <altivec.h>
00109 #endif
00110 
00111 #define GET_MODE_BUFFER_SIZE 500
00112 #define OPTIONS_ARRAY_SIZE 10
00113 #define BLOCK_SIZE 8
00114 #define TEMP_STRIDE 8
00115 //#define NUM_BLOCKS_AT_ONCE 16 //not used yet
00116 
00117 #if ARCH_X86
00118 DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
00119 DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
00120 DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
00121 DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
00122 DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
00123 DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
00124 DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
00125 DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
00126 #endif
00127 
00128 DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
00129 
00130 
00131 static struct PPFilter filters[]=
00132 {
00133     {"hb", "hdeblock",              1, 1, 3, H_DEBLOCK},
00134     {"vb", "vdeblock",              1, 2, 4, V_DEBLOCK},
00135 /*  {"hr", "rkhdeblock",            1, 1, 3, H_RK1_FILTER},
00136     {"vr", "rkvdeblock",            1, 2, 4, V_RK1_FILTER},*/
00137     {"h1", "x1hdeblock",            1, 1, 3, H_X1_FILTER},
00138     {"v1", "x1vdeblock",            1, 2, 4, V_X1_FILTER},
00139     {"ha", "ahdeblock",             1, 1, 3, H_A_DEBLOCK},
00140     {"va", "avdeblock",             1, 2, 4, V_A_DEBLOCK},
00141     {"dr", "dering",                1, 5, 6, DERING},
00142     {"al", "autolevels",            0, 1, 2, LEVEL_FIX},
00143     {"lb", "linblenddeint",         1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
00144     {"li", "linipoldeint",          1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
00145     {"ci", "cubicipoldeint",        1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
00146     {"md", "mediandeint",           1, 1, 4, MEDIAN_DEINT_FILTER},
00147     {"fd", "ffmpegdeint",           1, 1, 4, FFMPEG_DEINT_FILTER},
00148     {"l5", "lowpass5",              1, 1, 4, LOWPASS5_DEINT_FILTER},
00149     {"tn", "tmpnoise",              1, 7, 8, TEMP_NOISE_FILTER},
00150     {"fq", "forcequant",            1, 0, 0, FORCE_QUANT},
00151     {NULL, NULL,0,0,0,0} //End Marker
00152 };
00153 
00154 static const char *replaceTable[]=
00155 {
00156     "default",      "hb:a,vb:a,dr:a",
00157     "de",           "hb:a,vb:a,dr:a",
00158     "fast",         "h1:a,v1:a,dr:a",
00159     "fa",           "h1:a,v1:a,dr:a",
00160     "ac",           "ha:a:128:7,va:a,dr:a",
00161     NULL //End Marker
00162 };
00163 
00164 
00165 #if ARCH_X86
00166 static inline void prefetchnta(void *p)
00167 {
00168     __asm__ volatile(   "prefetchnta (%0)\n\t"
00169         : : "r" (p)
00170     );
00171 }
00172 
00173 static inline void prefetcht0(void *p)
00174 {
00175     __asm__ volatile(   "prefetcht0 (%0)\n\t"
00176         : : "r" (p)
00177     );
00178 }
00179 
00180 static inline void prefetcht1(void *p)
00181 {
00182     __asm__ volatile(   "prefetcht1 (%0)\n\t"
00183         : : "r" (p)
00184     );
00185 }
00186 
00187 static inline void prefetcht2(void *p)
00188 {
00189     __asm__ volatile(   "prefetcht2 (%0)\n\t"
00190         : : "r" (p)
00191     );
00192 }
00193 #endif
00194 
00195 /* The horizontal functions exist only in C because the MMX
00196  * code is faster with vertical filters and transposing. */
00197 
00201 static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
00202 {
00203     int numEq= 0;
00204     int y;
00205     const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
00206     const int dcThreshold= dcOffset*2 + 1;
00207 
00208     for(y=0; y<BLOCK_SIZE; y++){
00209         if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
00210         if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
00211         if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
00212         if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
00213         if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
00214         if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
00215         if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
00216         src+= stride;
00217     }
00218     return numEq > c->ppMode.flatnessThreshold;
00219 }
00220 
00224 static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c)
00225 {
00226     int numEq= 0;
00227     int y;
00228     const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
00229     const int dcThreshold= dcOffset*2 + 1;
00230 
00231     src+= stride*4; // src points to begin of the 8x8 Block
00232     for(y=0; y<BLOCK_SIZE-1; y++){
00233         if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
00234         if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
00235         if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
00236         if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
00237         if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
00238         if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
00239         if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
00240         if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
00241         src+= stride;
00242     }
00243     return numEq > c->ppMode.flatnessThreshold;
00244 }
00245 
00246 static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
00247 {
00248     int i;
00249     for(i=0; i<2; i++){
00250         if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
00251         src += stride;
00252         if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
00253         src += stride;
00254         if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
00255         src += stride;
00256         if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
00257         src += stride;
00258     }
00259     return 1;
00260 }
00261 
00262 static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
00263 {
00264     int x;
00265     src+= stride*4;
00266     for(x=0; x<BLOCK_SIZE; x+=4){
00267         if((unsigned)(src[  x + 0*stride] - src[  x + 5*stride] + 2*QP) > 4*QP) return 0;
00268         if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
00269         if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
00270         if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
00271     }
00272     return 1;
00273 }
00274 
00275 static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c)
00276 {
00277     if( isHorizDC_C(src, stride, c) ){
00278         if( isHorizMinMaxOk_C(src, stride, c->QP) )
00279             return 1;
00280         else
00281             return 0;
00282     }else{
00283         return 2;
00284     }
00285 }
00286 
00287 static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c)
00288 {
00289     if( isVertDC_C(src, stride, c) ){
00290         if( isVertMinMaxOk_C(src, stride, c->QP) )
00291             return 1;
00292         else
00293             return 0;
00294     }else{
00295         return 2;
00296     }
00297 }
00298 
00299 static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
00300 {
00301     int y;
00302     for(y=0; y<BLOCK_SIZE; y++){
00303         const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
00304 
00305         if(FFABS(middleEnergy) < 8*c->QP){
00306             const int q=(dst[3] - dst[4])/2;
00307             const int leftEnergy=  5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
00308             const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
00309 
00310             int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
00311             d= FFMAX(d, 0);
00312 
00313             d= (5*d + 32) >> 6;
00314             d*= FFSIGN(-middleEnergy);
00315 
00316             if(q>0)
00317             {
00318                 d= d<0 ? 0 : d;
00319                 d= d>q ? q : d;
00320             }
00321             else
00322             {
00323                 d= d>0 ? 0 : d;
00324                 d= d<q ? q : d;
00325             }
00326 
00327             dst[3]-= d;
00328             dst[4]+= d;
00329         }
00330         dst+= stride;
00331     }
00332 }
00333 
00338 static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
00339 {
00340     int y;
00341     for(y=0; y<BLOCK_SIZE; y++){
00342         const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
00343         const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
00344 
00345         int sums[10];
00346         sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
00347         sums[1] = sums[0] - first  + dst[3];
00348         sums[2] = sums[1] - first  + dst[4];
00349         sums[3] = sums[2] - first  + dst[5];
00350         sums[4] = sums[3] - first  + dst[6];
00351         sums[5] = sums[4] - dst[0] + dst[7];
00352         sums[6] = sums[5] - dst[1] + last;
00353         sums[7] = sums[6] - dst[2] + last;
00354         sums[8] = sums[7] - dst[3] + last;
00355         sums[9] = sums[8] - dst[4] + last;
00356 
00357         dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
00358         dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
00359         dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
00360         dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
00361         dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
00362         dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
00363         dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
00364         dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
00365 
00366         dst+= stride;
00367     }
00368 }
00369 
00378 static inline void horizX1Filter(uint8_t *src, int stride, int QP)
00379 {
00380     int y;
00381     static uint64_t *lut= NULL;
00382     if(lut==NULL)
00383     {
00384         int i;
00385         lut = av_malloc(256*8);
00386         for(i=0; i<256; i++)
00387         {
00388             int v= i < 128 ? 2*i : 2*(i-256);
00389 /*
00390 //Simulate 112242211 9-Tap filter
00391             uint64_t a= (v/16)  & 0xFF;
00392             uint64_t b= (v/8)   & 0xFF;
00393             uint64_t c= (v/4)   & 0xFF;
00394             uint64_t d= (3*v/8) & 0xFF;
00395 */
00396 //Simulate piecewise linear interpolation
00397             uint64_t a= (v/16)   & 0xFF;
00398             uint64_t b= (v*3/16) & 0xFF;
00399             uint64_t c= (v*5/16) & 0xFF;
00400             uint64_t d= (7*v/16) & 0xFF;
00401             uint64_t A= (0x100 - a)&0xFF;
00402             uint64_t B= (0x100 - b)&0xFF;
00403             uint64_t C= (0x100 - c)&0xFF;
00404             uint64_t D= (0x100 - c)&0xFF;
00405 
00406             lut[i]   = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
00407                        (D<<24) | (C<<16) | (B<<8)  | (A);
00408             //lut[i] = (v<<32) | (v<<24);
00409         }
00410     }
00411 
00412     for(y=0; y<BLOCK_SIZE; y++){
00413         int a= src[1] - src[2];
00414         int b= src[3] - src[4];
00415         int c= src[5] - src[6];
00416 
00417         int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
00418 
00419         if(d < QP){
00420             int v = d * FFSIGN(-b);
00421 
00422             src[1] +=v/8;
00423             src[2] +=v/4;
00424             src[3] +=3*v/8;
00425             src[4] -=3*v/8;
00426             src[5] -=v/4;
00427             src[6] -=v/8;
00428         }
00429         src+=stride;
00430     }
00431 }
00432 
00436 static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
00437     int y;
00438     const int QP= c->QP;
00439     const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
00440     const int dcThreshold= dcOffset*2 + 1;
00441 //START_TIMER
00442     src+= step*4; // src points to begin of the 8x8 Block
00443     for(y=0; y<8; y++){
00444         int numEq= 0;
00445 
00446         if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
00447         if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
00448         if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
00449         if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
00450         if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
00451         if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
00452         if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
00453         if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
00454         if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
00455         if(numEq > c->ppMode.flatnessThreshold){
00456             int min, max, x;
00457 
00458             if(src[0] > src[step]){
00459                 max= src[0];
00460                 min= src[step];
00461             }else{
00462                 max= src[step];
00463                 min= src[0];
00464             }
00465             for(x=2; x<8; x+=2){
00466                 if(src[x*step] > src[(x+1)*step]){
00467                         if(src[x    *step] > max) max= src[ x   *step];
00468                         if(src[(x+1)*step] < min) min= src[(x+1)*step];
00469                 }else{
00470                         if(src[(x+1)*step] > max) max= src[(x+1)*step];
00471                         if(src[ x   *step] < min) min= src[ x   *step];
00472                 }
00473             }
00474             if(max-min < 2*QP){
00475                 const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
00476                 const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
00477 
00478                 int sums[10];
00479                 sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
00480                 sums[1] = sums[0] - first       + src[3*step];
00481                 sums[2] = sums[1] - first       + src[4*step];
00482                 sums[3] = sums[2] - first       + src[5*step];
00483                 sums[4] = sums[3] - first       + src[6*step];
00484                 sums[5] = sums[4] - src[0*step] + src[7*step];
00485                 sums[6] = sums[5] - src[1*step] + last;
00486                 sums[7] = sums[6] - src[2*step] + last;
00487                 sums[8] = sums[7] - src[3*step] + last;
00488                 sums[9] = sums[8] - src[4*step] + last;
00489 
00490                 src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
00491                 src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
00492                 src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
00493                 src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
00494                 src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
00495                 src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
00496                 src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
00497                 src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
00498             }
00499         }else{
00500             const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
00501 
00502             if(FFABS(middleEnergy) < 8*QP){
00503                 const int q=(src[3*step] - src[4*step])/2;
00504                 const int leftEnergy=  5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
00505                 const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
00506 
00507                 int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
00508                 d= FFMAX(d, 0);
00509 
00510                 d= (5*d + 32) >> 6;
00511                 d*= FFSIGN(-middleEnergy);
00512 
00513                 if(q>0){
00514                     d= d<0 ? 0 : d;
00515                     d= d>q ? q : d;
00516                 }else{
00517                     d= d>0 ? 0 : d;
00518                     d= d<q ? q : d;
00519                 }
00520 
00521                 src[3*step]-= d;
00522                 src[4*step]+= d;
00523             }
00524         }
00525 
00526         src += stride;
00527     }
00528 /*if(step==16){
00529     STOP_TIMER("step16")
00530 }else{
00531     STOP_TIMER("stepX")
00532 }*/
00533 }
00534 
00535 //Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
00536 //Plain C versions
00537 #if !(HAVE_MMX || HAVE_ALTIVEC) || CONFIG_RUNTIME_CPUDETECT
00538 #define COMPILE_C
00539 #endif
00540 
00541 #if HAVE_ALTIVEC
00542 #define COMPILE_ALTIVEC
00543 #endif //HAVE_ALTIVEC
00544 
00545 #if ARCH_X86
00546 
00547 #if (HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
00548 #define COMPILE_MMX
00549 #endif
00550 
00551 #if HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT
00552 #define COMPILE_MMX2
00553 #endif
00554 
00555 #if (HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
00556 #define COMPILE_3DNOW
00557 #endif
00558 #endif /* ARCH_X86 */
00559 
00560 #undef HAVE_MMX
00561 #define HAVE_MMX 0
00562 #undef HAVE_MMX2
00563 #define HAVE_MMX2 0
00564 #undef HAVE_AMD3DNOW
00565 #define HAVE_AMD3DNOW 0
00566 #undef HAVE_ALTIVEC
00567 #define HAVE_ALTIVEC 0
00568 
00569 #ifdef COMPILE_C
00570 #define RENAME(a) a ## _C
00571 #include "postprocess_template.c"
00572 #endif
00573 
00574 #ifdef COMPILE_ALTIVEC
00575 #undef RENAME
00576 #undef HAVE_ALTIVEC
00577 #define HAVE_ALTIVEC 1
00578 #define RENAME(a) a ## _altivec
00579 #include "postprocess_altivec_template.c"
00580 #include "postprocess_template.c"
00581 #endif
00582 
00583 //MMX versions
00584 #ifdef COMPILE_MMX
00585 #undef RENAME
00586 #undef HAVE_MMX
00587 #define HAVE_MMX 1
00588 #define RENAME(a) a ## _MMX
00589 #include "postprocess_template.c"
00590 #endif
00591 
00592 //MMX2 versions
00593 #ifdef COMPILE_MMX2
00594 #undef RENAME
00595 #undef HAVE_MMX
00596 #undef HAVE_MMX2
00597 #define HAVE_MMX 1
00598 #define HAVE_MMX2 1
00599 #define RENAME(a) a ## _MMX2
00600 #include "postprocess_template.c"
00601 #endif
00602 
00603 //3DNOW versions
00604 #ifdef COMPILE_3DNOW
00605 #undef RENAME
00606 #undef HAVE_MMX
00607 #undef HAVE_MMX2
00608 #undef HAVE_AMD3DNOW
00609 #define HAVE_MMX 1
00610 #define HAVE_MMX2 0
00611 #define HAVE_AMD3DNOW 1
00612 #define RENAME(a) a ## _3DNow
00613 #include "postprocess_template.c"
00614 #endif
00615 
00616 // minor note: the HAVE_xyz is messed up after that line so do not use it.
00617 
00618 static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
00619         const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc)
00620 {
00621     PPContext *c= (PPContext *)vc;
00622     PPMode *ppMode= (PPMode *)vm;
00623     c->ppMode= *ppMode; //FIXME
00624 
00625     // Using ifs here as they are faster than function pointers although the
00626     // difference would not be measurable here but it is much better because
00627     // someone might exchange the CPU whithout restarting MPlayer ;)
00628 #if CONFIG_RUNTIME_CPUDETECT
00629 #if ARCH_X86
00630     // ordered per speed fastest first
00631     if(c->cpuCaps & PP_CPU_CAPS_MMX2)
00632         postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00633     else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
00634         postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00635     else if(c->cpuCaps & PP_CPU_CAPS_MMX)
00636         postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00637     else
00638         postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00639 #else
00640 #if HAVE_ALTIVEC
00641     if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
00642             postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00643     else
00644 #endif
00645             postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00646 #endif
00647 #else /* CONFIG_RUNTIME_CPUDETECT */
00648 #if   HAVE_MMX2
00649             postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00650 #elif HAVE_AMD3DNOW
00651             postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00652 #elif HAVE_MMX
00653             postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00654 #elif HAVE_ALTIVEC
00655             postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00656 #else
00657             postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
00658 #endif
00659 #endif /* !CONFIG_RUNTIME_CPUDETECT */
00660 }
00661 
00662 //static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
00663 //        QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
00664 
00665 /* -pp Command line Help
00666 */
00667 const char pp_help[] =
00668 "Available postprocessing filters:\n"
00669 "Filters                        Options\n"
00670 "short  long name       short   long option     Description\n"
00671 "*      *               a       autoq           CPU power dependent enabler\n"
00672 "                       c       chrom           chrominance filtering enabled\n"
00673 "                       y       nochrom         chrominance filtering disabled\n"
00674 "                       n       noluma          luma filtering disabled\n"
00675 "hb     hdeblock        (2 threshold)           horizontal deblocking filter\n"
00676 "       1. difference factor: default=32, higher -> more deblocking\n"
00677 "       2. flatness threshold: default=39, lower -> more deblocking\n"
00678 "                       the h & v deblocking filters share these\n"
00679 "                       so you can't set different thresholds for h / v\n"
00680 "vb     vdeblock        (2 threshold)           vertical deblocking filter\n"
00681 "ha     hadeblock       (2 threshold)           horizontal deblocking filter\n"
00682 "va     vadeblock       (2 threshold)           vertical deblocking filter\n"
00683 "h1     x1hdeblock                              experimental h deblock filter 1\n"
00684 "v1     x1vdeblock                              experimental v deblock filter 1\n"
00685 "dr     dering                                  deringing filter\n"
00686 "al     autolevels                              automatic brightness / contrast\n"
00687 "                       f        fullyrange     stretch luminance to (0..255)\n"
00688 "lb     linblenddeint                           linear blend deinterlacer\n"
00689 "li     linipoldeint                            linear interpolating deinterlace\n"
00690 "ci     cubicipoldeint                          cubic interpolating deinterlacer\n"
00691 "md     mediandeint                             median deinterlacer\n"
00692 "fd     ffmpegdeint                             ffmpeg deinterlacer\n"
00693 "l5     lowpass5                                FIR lowpass deinterlacer\n"
00694 "de     default                                 hb:a,vb:a,dr:a\n"
00695 "fa     fast                                    h1:a,v1:a,dr:a\n"
00696 "ac                                             ha:a:128:7,va:a,dr:a\n"
00697 "tn     tmpnoise        (3 threshold)           temporal noise reducer\n"
00698 "                     1. <= 2. <= 3.            larger -> stronger filtering\n"
00699 "fq     forceQuant      <quantizer>             force quantizer\n"
00700 "Usage:\n"
00701 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
00702 "long form example:\n"
00703 "vdeblock:autoq/hdeblock:autoq/linblenddeint    default,-vdeblock\n"
00704 "short form example:\n"
00705 "vb:a/hb:a/lb                                   de,-vb\n"
00706 "more examples:\n"
00707 "tn:64:128:256\n"
00708 "\n"
00709 ;
00710 
00711 pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality)
00712 {
00713     char temp[GET_MODE_BUFFER_SIZE];
00714     char *p= temp;
00715     static const char filterDelimiters[] = ",/";
00716     static const char optionDelimiters[] = ":";
00717     struct PPMode *ppMode;
00718     char *filterToken;
00719 
00720     ppMode= av_malloc(sizeof(PPMode));
00721 
00722     ppMode->lumMode= 0;
00723     ppMode->chromMode= 0;
00724     ppMode->maxTmpNoise[0]= 700;
00725     ppMode->maxTmpNoise[1]= 1500;
00726     ppMode->maxTmpNoise[2]= 3000;
00727     ppMode->maxAllowedY= 234;
00728     ppMode->minAllowedY= 16;
00729     ppMode->baseDcDiff= 256/8;
00730     ppMode->flatnessThreshold= 56-16-1;
00731     ppMode->maxClippedThreshold= 0.01;
00732     ppMode->error=0;
00733 
00734     memset(temp, 0, GET_MODE_BUFFER_SIZE);
00735     av_strlcpy(temp, name, GET_MODE_BUFFER_SIZE - 1);
00736 
00737     av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
00738 
00739     for(;;){
00740         char *filterName;
00741         int q= 1000000; //PP_QUALITY_MAX;
00742         int chrom=-1;
00743         int luma=-1;
00744         char *option;
00745         char *options[OPTIONS_ARRAY_SIZE];
00746         int i;
00747         int filterNameOk=0;
00748         int numOfUnknownOptions=0;
00749         int enable=1; //does the user want us to enabled or disabled the filter
00750 
00751         filterToken= strtok(p, filterDelimiters);
00752         if(filterToken == NULL) break;
00753         p+= strlen(filterToken) + 1; // p points to next filterToken
00754         filterName= strtok(filterToken, optionDelimiters);
00755         av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
00756 
00757         if(*filterName == '-'){
00758             enable=0;
00759             filterName++;
00760         }
00761 
00762         for(;;){ //for all options
00763             option= strtok(NULL, optionDelimiters);
00764             if(option == NULL) break;
00765 
00766             av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
00767             if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
00768             else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
00769             else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
00770             else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
00771             else{
00772                 options[numOfUnknownOptions] = option;
00773                 numOfUnknownOptions++;
00774             }
00775             if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
00776         }
00777         options[numOfUnknownOptions] = NULL;
00778 
00779         /* replace stuff from the replace Table */
00780         for(i=0; replaceTable[2*i]!=NULL; i++){
00781             if(!strcmp(replaceTable[2*i], filterName)){
00782                 int newlen= strlen(replaceTable[2*i + 1]);
00783                 int plen;
00784                 int spaceLeft;
00785 
00786                 if(p==NULL) p= temp, *p=0;      //last filter
00787                 else p--, *p=',';               //not last filter
00788 
00789                 plen= strlen(p);
00790                 spaceLeft= p - temp + plen;
00791                 if(spaceLeft + newlen  >= GET_MODE_BUFFER_SIZE - 1){
00792                     ppMode->error++;
00793                     break;
00794                 }
00795                 memmove(p + newlen, p, plen+1);
00796                 memcpy(p, replaceTable[2*i + 1], newlen);
00797                 filterNameOk=1;
00798             }
00799         }
00800 
00801         for(i=0; filters[i].shortName!=NULL; i++){
00802             if(   !strcmp(filters[i].longName, filterName)
00803                || !strcmp(filters[i].shortName, filterName)){
00804                 ppMode->lumMode &= ~filters[i].mask;
00805                 ppMode->chromMode &= ~filters[i].mask;
00806 
00807                 filterNameOk=1;
00808                 if(!enable) break; // user wants to disable it
00809 
00810                 if(q >= filters[i].minLumQuality && luma)
00811                     ppMode->lumMode|= filters[i].mask;
00812                 if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
00813                     if(q >= filters[i].minChromQuality)
00814                             ppMode->chromMode|= filters[i].mask;
00815 
00816                 if(filters[i].mask == LEVEL_FIX){
00817                     int o;
00818                     ppMode->minAllowedY= 16;
00819                     ppMode->maxAllowedY= 234;
00820                     for(o=0; options[o]!=NULL; o++){
00821                         if(  !strcmp(options[o],"fullyrange")
00822                            ||!strcmp(options[o],"f")){
00823                             ppMode->minAllowedY= 0;
00824                             ppMode->maxAllowedY= 255;
00825                             numOfUnknownOptions--;
00826                         }
00827                     }
00828                 }
00829                 else if(filters[i].mask == TEMP_NOISE_FILTER)
00830                 {
00831                     int o;
00832                     int numOfNoises=0;
00833 
00834                     for(o=0; options[o]!=NULL; o++){
00835                         char *tail;
00836                         ppMode->maxTmpNoise[numOfNoises]=
00837                             strtol(options[o], &tail, 0);
00838                         if(tail!=options[o]){
00839                             numOfNoises++;
00840                             numOfUnknownOptions--;
00841                             if(numOfNoises >= 3) break;
00842                         }
00843                     }
00844                 }
00845                 else if(filters[i].mask == V_DEBLOCK   || filters[i].mask == H_DEBLOCK
00846                      || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){
00847                     int o;
00848 
00849                     for(o=0; options[o]!=NULL && o<2; o++){
00850                         char *tail;
00851                         int val= strtol(options[o], &tail, 0);
00852                         if(tail==options[o]) break;
00853 
00854                         numOfUnknownOptions--;
00855                         if(o==0) ppMode->baseDcDiff= val;
00856                         else ppMode->flatnessThreshold= val;
00857                     }
00858                 }
00859                 else if(filters[i].mask == FORCE_QUANT){
00860                     int o;
00861                     ppMode->forcedQuant= 15;
00862 
00863                     for(o=0; options[o]!=NULL && o<1; o++){
00864                         char *tail;
00865                         int val= strtol(options[o], &tail, 0);
00866                         if(tail==options[o]) break;
00867 
00868                         numOfUnknownOptions--;
00869                         ppMode->forcedQuant= val;
00870                     }
00871                 }
00872             }
00873         }
00874         if(!filterNameOk) ppMode->error++;
00875         ppMode->error += numOfUnknownOptions;
00876     }
00877 
00878     av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
00879     if(ppMode->error){
00880         av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
00881         av_free(ppMode);
00882         return NULL;
00883     }
00884     return ppMode;
00885 }
00886 
00887 void pp_free_mode(pp_mode *mode){
00888     av_free(mode);
00889 }
00890 
00891 static void reallocAlign(void **p, int alignment, int size){
00892     av_free(*p);
00893     *p= av_mallocz(size);
00894 }
00895 
00896 static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
00897     int mbWidth = (width+15)>>4;
00898     int mbHeight= (height+15)>>4;
00899     int i;
00900 
00901     c->stride= stride;
00902     c->qpStride= qpStride;
00903 
00904     reallocAlign((void **)&c->tempDst, 8, stride*24);
00905     reallocAlign((void **)&c->tempSrc, 8, stride*24);
00906     reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
00907     reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
00908     for(i=0; i<256; i++)
00909             c->yHistogram[i]= width*height/64*15/256;
00910 
00911     for(i=0; i<3; i++){
00912         //Note: The +17*1024 is just there so I do not have to worry about r/w over the end.
00913         reallocAlign((void **)&c->tempBlurred[i], 8, stride*mbHeight*16 + 17*1024);
00914         reallocAlign((void **)&c->tempBlurredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
00915     }
00916 
00917     reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
00918     reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
00919     reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
00920     reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
00921 }
00922 
00923 static const char * context_to_name(void * ptr) {
00924     return "postproc";
00925 }
00926 
00927 static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
00928 
00929 pp_context *pp_get_context(int width, int height, int cpuCaps){
00930     PPContext *c= av_malloc(sizeof(PPContext));
00931     int stride= FFALIGN(width, 16);  //assumed / will realloc if needed
00932     int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
00933 
00934     memset(c, 0, sizeof(PPContext));
00935     c->av_class = &av_codec_context_class;
00936     c->cpuCaps= cpuCaps;
00937     if(cpuCaps&PP_FORMAT){
00938         c->hChromaSubSample= cpuCaps&0x3;
00939         c->vChromaSubSample= (cpuCaps>>4)&0x3;
00940     }else{
00941         c->hChromaSubSample= 1;
00942         c->vChromaSubSample= 1;
00943     }
00944 
00945     reallocBuffers(c, width, height, stride, qpStride);
00946 
00947     c->frameNum=-1;
00948 
00949     return c;
00950 }
00951 
00952 void pp_free_context(void *vc){
00953     PPContext *c = (PPContext*)vc;
00954     int i;
00955 
00956     for(i=0; i<3; i++) av_free(c->tempBlurred[i]);
00957     for(i=0; i<3; i++) av_free(c->tempBlurredPast[i]);
00958 
00959     av_free(c->tempBlocks);
00960     av_free(c->yHistogram);
00961     av_free(c->tempDst);
00962     av_free(c->tempSrc);
00963     av_free(c->deintTemp);
00964     av_free(c->stdQPTable);
00965     av_free(c->nonBQPTable);
00966     av_free(c->forcedQPTable);
00967 
00968     memset(c, 0, sizeof(PPContext));
00969 
00970     av_free(c);
00971 }
00972 
00973 void  pp_postprocess(const uint8_t * src[3], const int srcStride[3],
00974                      uint8_t * dst[3], const int dstStride[3],
00975                      int width, int height,
00976                      const QP_STORE_T *QP_store,  int QPStride,
00977                      pp_mode *vm,  void *vc, int pict_type)
00978 {
00979     int mbWidth = (width+15)>>4;
00980     int mbHeight= (height+15)>>4;
00981     PPMode *mode = (PPMode*)vm;
00982     PPContext *c = (PPContext*)vc;
00983     int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
00984     int absQPStride = FFABS(QPStride);
00985 
00986     // c->stride and c->QPStride are always positive
00987     if(c->stride < minStride || c->qpStride < absQPStride)
00988         reallocBuffers(c, width, height,
00989                        FFMAX(minStride, c->stride),
00990                        FFMAX(c->qpStride, absQPStride));
00991 
00992     if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)){
00993         int i;
00994         QP_store= c->forcedQPTable;
00995         absQPStride = QPStride = 0;
00996         if(mode->lumMode & FORCE_QUANT)
00997             for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
00998         else
00999             for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
01000     }
01001 
01002     if(pict_type & PP_PICT_TYPE_QP2){
01003         int i;
01004         const int count= mbHeight * absQPStride;
01005         for(i=0; i<(count>>2); i++){
01006             ((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
01007         }
01008         for(i<<=2; i<count; i++){
01009             c->stdQPTable[i] = QP_store[i]>>1;
01010         }
01011         QP_store= c->stdQPTable;
01012         QPStride= absQPStride;
01013     }
01014 
01015     if(0){
01016         int x,y;
01017         for(y=0; y<mbHeight; y++){
01018             for(x=0; x<mbWidth; x++){
01019                 av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
01020             }
01021             av_log(c, AV_LOG_INFO, "\n");
01022         }
01023         av_log(c, AV_LOG_INFO, "\n");
01024     }
01025 
01026     if((pict_type&7)!=3){
01027         if (QPStride >= 0){
01028             int i;
01029             const int count= mbHeight * QPStride;
01030             for(i=0; i<(count>>2); i++){
01031                 ((uint32_t*)c->nonBQPTable)[i] = ((const uint32_t*)QP_store)[i] & 0x3F3F3F3F;
01032             }
01033             for(i<<=2; i<count; i++){
01034                 c->nonBQPTable[i] = QP_store[i] & 0x3F;
01035             }
01036         } else {
01037             int i,j;
01038             for(i=0; i<mbHeight; i++) {
01039                 for(j=0; j<absQPStride; j++) {
01040                     c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
01041                 }
01042             }
01043         }
01044     }
01045 
01046     av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
01047            mode->lumMode, mode->chromMode);
01048 
01049     postProcess(src[0], srcStride[0], dst[0], dstStride[0],
01050                 width, height, QP_store, QPStride, 0, mode, c);
01051 
01052     width  = (width )>>c->hChromaSubSample;
01053     height = (height)>>c->vChromaSubSample;
01054 
01055     if(mode->chromMode){
01056         postProcess(src[1], srcStride[1], dst[1], dstStride[1],
01057                     width, height, QP_store, QPStride, 1, mode, c);
01058         postProcess(src[2], srcStride[2], dst[2], dstStride[2],
01059                     width, height, QP_store, QPStride, 2, mode, c);
01060     }
01061     else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
01062         linecpy(dst[1], src[1], height, srcStride[1]);
01063         linecpy(dst[2], src[2], height, srcStride[2]);
01064     }else{
01065         int y;
01066         for(y=0; y<height; y++){
01067             memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
01068             memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
01069         }
01070     }
01071 }
Generated on Sun Apr 22 2012 21:54:09 for Libav by doxygen 1.7.1