• Main Page
  • Related Pages
  • Modules
  • Data Structures
  • Files
  • File List
  • Globals

libavcodec/dct-test.c

Go to the documentation of this file.
00001 /*
00002  * (c) 2001 Fabrice Bellard
00003  *     2007 Marc Hoffman <marc.hoffman@analog.com>
00004  *
00005  * This file is part of FFmpeg.
00006  *
00007  * FFmpeg is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU Lesser General Public
00009  * License as published by the Free Software Foundation; either
00010  * version 2.1 of the License, or (at your option) any later version.
00011  *
00012  * FFmpeg is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015  * Lesser General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU Lesser General Public
00018  * License along with FFmpeg; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00020  */
00021 
00028 #include <stdlib.h>
00029 #include <stdio.h>
00030 #include <string.h>
00031 #include <sys/time.h>
00032 #include <unistd.h>
00033 #include <math.h>
00034 
00035 #include "libavutil/cpu.h"
00036 #include "libavutil/common.h"
00037 #include "libavutil/lfg.h"
00038 
00039 #include "simple_idct.h"
00040 #include "aandcttab.h"
00041 #include "faandct.h"
00042 #include "faanidct.h"
00043 #include "x86/idct_xvid.h"
00044 #include "dctref.h"
00045 
00046 #undef printf
00047 
00048 void ff_mmx_idct(DCTELEM *data);
00049 void ff_mmxext_idct(DCTELEM *data);
00050 
00051 void odivx_idct_c(short *block);
00052 
00053 // BFIN
00054 void ff_bfin_idct(DCTELEM *block);
00055 void ff_bfin_fdct(DCTELEM *block);
00056 
00057 // ALTIVEC
00058 void fdct_altivec(DCTELEM *block);
00059 //void idct_altivec(DCTELEM *block);?? no routine
00060 
00061 // ARM
00062 void ff_j_rev_dct_arm(DCTELEM *data);
00063 void ff_simple_idct_arm(DCTELEM *data);
00064 void ff_simple_idct_armv5te(DCTELEM *data);
00065 void ff_simple_idct_armv6(DCTELEM *data);
00066 void ff_simple_idct_neon(DCTELEM *data);
00067 
00068 void ff_simple_idct_axp(DCTELEM *data);
00069 
00070 struct algo {
00071     const char *name;
00072     void (*func)(DCTELEM *block);
00073     enum formattag { NO_PERM, MMX_PERM, MMX_SIMPLE_PERM, SCALE_PERM,
00074                      SSE2_PERM, PARTTRANS_PERM, TRANSPOSE_PERM } format;
00075     int mm_support;
00076     int nonspec;
00077 };
00078 
00079 #ifndef FAAN_POSTSCALE
00080 #define FAAN_SCALE SCALE_PERM
00081 #else
00082 #define FAAN_SCALE NO_PERM
00083 #endif
00084 
00085 static int cpu_flags;
00086 
00087 static const struct algo fdct_tab[] = {
00088     { "REF-DBL",        ff_ref_fdct,           NO_PERM    },
00089     { "FAAN",           ff_faandct,            FAAN_SCALE },
00090     { "IJG-AAN-INT",    fdct_ifast,            SCALE_PERM },
00091     { "IJG-LLM-INT",    ff_jpeg_fdct_islow_8,  NO_PERM    },
00092 
00093 #if HAVE_MMX
00094     { "MMX",            ff_fdct_mmx,           NO_PERM,   AV_CPU_FLAG_MMX     },
00095     { "MMX2",           ff_fdct_mmx2,          NO_PERM,   AV_CPU_FLAG_MMX2    },
00096     { "SSE2",           ff_fdct_sse2,          NO_PERM,   AV_CPU_FLAG_SSE2    },
00097 #endif
00098 
00099 #if HAVE_ALTIVEC
00100     { "altivecfdct",    fdct_altivec,          NO_PERM,   AV_CPU_FLAG_ALTIVEC },
00101 #endif
00102 
00103 #if ARCH_BFIN
00104     { "BFINfdct",       ff_bfin_fdct,          NO_PERM  },
00105 #endif
00106 
00107     { 0 }
00108 };
00109 
00110 #if HAVE_MMX
00111 void ff_prores_idct_put_10_sse2(uint16_t *dst, int linesize,
00112                                 DCTELEM *block, int16_t *qmat);
00113 
00114 static void ff_prores_idct_put_10_sse2_wrap(uint16_t *dst){
00115     int16_t qmat[64]; int i;
00116     int16_t tmp[64];
00117 
00118     for(i=0; i<64; i++){
00119         qmat[i]=4;
00120         tmp[i]= dst[i];
00121     }
00122     ff_prores_idct_put_10_sse2(dst, 16, tmp, qmat);
00123 }
00124 #endif
00125 
00126 static const struct algo idct_tab[] = {
00127     { "FAANI",          ff_faanidct,           NO_PERM  },
00128     { "REF-DBL",        ff_ref_idct,           NO_PERM  },
00129     { "INT",            j_rev_dct,             MMX_PERM },
00130     { "SIMPLE-C",       ff_simple_idct_8,      NO_PERM  },
00131 
00132 #if HAVE_MMX
00133 #if CONFIG_GPL
00134     { "LIBMPEG2-MMX",   ff_mmx_idct,           MMX_PERM,  AV_CPU_FLAG_MMX,  1 },
00135     { "LIBMPEG2-MMX2",  ff_mmxext_idct,        MMX_PERM,  AV_CPU_FLAG_MMX2, 1 },
00136 #endif
00137     { "SIMPLE-MMX",     ff_simple_idct_mmx,  MMX_SIMPLE_PERM, AV_CPU_FLAG_MMX },
00138     { "XVID-MMX",       ff_idct_xvid_mmx,      NO_PERM,   AV_CPU_FLAG_MMX,  1 },
00139     { "XVID-MMX2",      ff_idct_xvid_mmx2,     NO_PERM,   AV_CPU_FLAG_MMX2, 1 },
00140     { "XVID-SSE2",      ff_idct_xvid_sse2,     SSE2_PERM, AV_CPU_FLAG_SSE2, 1 },
00141 #if ARCH_X86_64
00142     { "PR-SSE2",        ff_prores_idct_put_10_sse2_wrap,     TRANSPOSE_PERM, AV_CPU_FLAG_SSE2, 1 },
00143 #endif
00144 #endif
00145 
00146 #if ARCH_BFIN
00147     { "BFINidct",       ff_bfin_idct,          NO_PERM  },
00148 #endif
00149 
00150 #if ARCH_ARM
00151     { "SIMPLE-ARM",     ff_simple_idct_arm,    NO_PERM  },
00152     { "INT-ARM",        ff_j_rev_dct_arm,      MMX_PERM },
00153 #endif
00154 #if HAVE_ARMV5TE
00155     { "SIMPLE-ARMV5TE", ff_simple_idct_armv5te,NO_PERM  },
00156 #endif
00157 #if HAVE_ARMV6
00158     { "SIMPLE-ARMV6",   ff_simple_idct_armv6,  MMX_PERM },
00159 #endif
00160 #if HAVE_NEON
00161     { "SIMPLE-NEON",    ff_simple_idct_neon,   PARTTRANS_PERM },
00162 #endif
00163 
00164 #if ARCH_ALPHA
00165     { "SIMPLE-ALPHA",   ff_simple_idct_axp,    NO_PERM },
00166 #endif
00167 
00168     { 0 }
00169 };
00170 
00171 #define AANSCALE_BITS 12
00172 
00173 static int64_t gettime(void)
00174 {
00175     struct timeval tv;
00176     gettimeofday(&tv, NULL);
00177     return (int64_t)tv.tv_sec * 1000000 + tv.tv_usec;
00178 }
00179 
00180 #define NB_ITS 20000
00181 #define NB_ITS_SPEED 50000
00182 
00183 static short idct_mmx_perm[64];
00184 
00185 static short idct_simple_mmx_perm[64] = {
00186     0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
00187     0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
00188     0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
00189     0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
00190     0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
00191     0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
00192     0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
00193     0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
00194 };
00195 
00196 static const uint8_t idct_sse2_row_perm[8] = { 0, 4, 1, 5, 2, 6, 3, 7 };
00197 
00198 static void idct_mmx_init(void)
00199 {
00200     int i;
00201 
00202     /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
00203     for (i = 0; i < 64; i++) {
00204         idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
00205     }
00206 }
00207 
00208 DECLARE_ALIGNED(16, static DCTELEM, block)[64];
00209 DECLARE_ALIGNED(8,  static DCTELEM, block1)[64];
00210 
00211 static inline void mmx_emms(void)
00212 {
00213 #if HAVE_MMX
00214     if (cpu_flags & AV_CPU_FLAG_MMX)
00215         __asm__ volatile ("emms\n\t");
00216 #endif
00217 }
00218 
00219 static void init_block(DCTELEM block[64], int test, int is_idct, AVLFG *prng, int vals)
00220 {
00221     int i, j;
00222 
00223     memset(block, 0, 64 * sizeof(*block));
00224 
00225     switch (test) {
00226     case 0:
00227         for (i = 0; i < 64; i++)
00228             block[i] = (av_lfg_get(prng) % (2*vals)) -vals;
00229         if (is_idct) {
00230             ff_ref_fdct(block);
00231             for (i = 0; i < 64; i++)
00232                 block[i] >>= 3;
00233         }
00234         break;
00235     case 1:
00236         j = av_lfg_get(prng) % 10 + 1;
00237         for (i = 0; i < j; i++)
00238             block[av_lfg_get(prng) % 64] = av_lfg_get(prng) % (2*vals) -vals;
00239         break;
00240     case 2:
00241         block[ 0] = av_lfg_get(prng) % (16*vals) - (8*vals);
00242         block[63] = (block[0] & 1) ^ 1;
00243         break;
00244     }
00245 }
00246 
00247 static void permute(DCTELEM dst[64], const DCTELEM src[64], int perm)
00248 {
00249     int i;
00250 
00251     if (perm == MMX_PERM) {
00252         for (i = 0; i < 64; i++)
00253             dst[idct_mmx_perm[i]] = src[i];
00254     } else if (perm == MMX_SIMPLE_PERM) {
00255         for (i = 0; i < 64; i++)
00256             dst[idct_simple_mmx_perm[i]] = src[i];
00257     } else if (perm == SSE2_PERM) {
00258         for (i = 0; i < 64; i++)
00259             dst[(i & 0x38) | idct_sse2_row_perm[i & 7]] = src[i];
00260     } else if (perm == PARTTRANS_PERM) {
00261         for (i = 0; i < 64; i++)
00262             dst[(i & 0x24) | ((i & 3) << 3) | ((i >> 3) & 3)] = src[i];
00263     } else if (perm == TRANSPOSE_PERM) {
00264         for (i = 0; i < 64; i++)
00265             dst[(i>>3) | ((i<<3)&0x38)] = src[i];
00266     } else {
00267         for (i = 0; i < 64; i++)
00268             dst[i] = src[i];
00269     }
00270 }
00271 
00272 static int dct_error(const struct algo *dct, int test, int is_idct, int speed, const int bits)
00273 {
00274     void (*ref)(DCTELEM *block) = is_idct ? ff_ref_idct : ff_ref_fdct;
00275     int it, i, scale;
00276     int err_inf, v;
00277     int64_t err2, ti, ti1, it1, err_sum = 0;
00278     int64_t sysErr[64], sysErrMax = 0;
00279     int maxout = 0;
00280     int blockSumErrMax = 0, blockSumErr;
00281     AVLFG prng;
00282     const int vals=1<<bits;
00283     double omse, ome;
00284     int spec_err;
00285 
00286     av_lfg_init(&prng, 1);
00287 
00288     err_inf = 0;
00289     err2 = 0;
00290     for (i = 0; i < 64; i++)
00291         sysErr[i] = 0;
00292     for (it = 0; it < NB_ITS; it++) {
00293         init_block(block1, test, is_idct, &prng, vals);
00294         permute(block, block1, dct->format);
00295 
00296         dct->func(block);
00297         mmx_emms();
00298 
00299         if (dct->format == SCALE_PERM) {
00300             for (i = 0; i < 64; i++) {
00301                 scale = 8 * (1 << (AANSCALE_BITS + 11)) / ff_aanscales[i];
00302                 block[i] = (block[i] * scale) >> AANSCALE_BITS;
00303             }
00304         }
00305 
00306         ref(block1);
00307 
00308         blockSumErr = 0;
00309         for (i = 0; i < 64; i++) {
00310             int err = block[i] - block1[i];
00311             err_sum += err;
00312             v = abs(err);
00313             if (v > err_inf)
00314                 err_inf = v;
00315             err2 += v * v;
00316             sysErr[i] += block[i] - block1[i];
00317             blockSumErr += v;
00318             if (abs(block[i]) > maxout)
00319                 maxout = abs(block[i]);
00320         }
00321         if (blockSumErrMax < blockSumErr)
00322             blockSumErrMax = blockSumErr;
00323     }
00324     for (i = 0; i < 64; i++)
00325         sysErrMax = FFMAX(sysErrMax, FFABS(sysErr[i]));
00326 
00327     for (i = 0; i < 64; i++) {
00328         if (i % 8 == 0)
00329             printf("\n");
00330         printf("%7d ", (int) sysErr[i]);
00331     }
00332     printf("\n");
00333 
00334     omse = (double) err2 / NB_ITS / 64;
00335     ome  = (double) err_sum / NB_ITS / 64;
00336 
00337     spec_err = is_idct && (err_inf > 1 || omse > 0.02 || fabs(ome) > 0.0015);
00338 
00339     printf("%s %s: max_err=%d omse=%0.8f ome=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
00340            is_idct ? "IDCT" : "DCT", dct->name, err_inf,
00341            omse, ome, (double) sysErrMax / NB_ITS,
00342            maxout, blockSumErrMax);
00343 
00344     if (spec_err && !dct->nonspec)
00345         return 1;
00346 
00347     if (!speed)
00348         return 0;
00349 
00350     /* speed test */
00351 
00352     init_block(block, test, is_idct, &prng, vals);
00353     permute(block1, block, dct->format);
00354 
00355     ti = gettime();
00356     it1 = 0;
00357     do {
00358         for (it = 0; it < NB_ITS_SPEED; it++) {
00359             memcpy(block, block1, sizeof(block));
00360             dct->func(block);
00361         }
00362         it1 += NB_ITS_SPEED;
00363         ti1 = gettime() - ti;
00364     } while (ti1 < 1000000);
00365     mmx_emms();
00366 
00367     printf("%s %s: %0.1f kdct/s\n", is_idct ? "IDCT" : "DCT", dct->name,
00368            (double) it1 * 1000.0 / (double) ti1);
00369 
00370     return 0;
00371 }
00372 
00373 DECLARE_ALIGNED(8, static uint8_t, img_dest)[64];
00374 DECLARE_ALIGNED(8, static uint8_t, img_dest1)[64];
00375 
00376 static void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
00377 {
00378     static int init;
00379     static double c8[8][8];
00380     static double c4[4][4];
00381     double block1[64], block2[64], block3[64];
00382     double s, sum, v;
00383     int i, j, k;
00384 
00385     if (!init) {
00386         init = 1;
00387 
00388         for (i = 0; i < 8; i++) {
00389             sum = 0;
00390             for (j = 0; j < 8; j++) {
00391                 s = (i == 0) ? sqrt(1.0 / 8.0) : sqrt(1.0 / 4.0);
00392                 c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
00393                 sum += c8[i][j] * c8[i][j];
00394             }
00395         }
00396 
00397         for (i = 0; i < 4; i++) {
00398             sum = 0;
00399             for (j = 0; j < 4; j++) {
00400                 s = (i == 0) ? sqrt(1.0 / 4.0) : sqrt(1.0 / 2.0);
00401                 c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
00402                 sum += c4[i][j] * c4[i][j];
00403             }
00404         }
00405     }
00406 
00407     /* butterfly */
00408     s = 0.5 * sqrt(2.0);
00409     for (i = 0; i < 4; i++) {
00410         for (j = 0; j < 8; j++) {
00411             block1[8 * (2 * i) + j] =
00412                 (block[8 * (2 * i) + j] + block[8 * (2 * i + 1) + j]) * s;
00413             block1[8 * (2 * i + 1) + j] =
00414                 (block[8 * (2 * i) + j] - block[8 * (2 * i + 1) + j]) * s;
00415         }
00416     }
00417 
00418     /* idct8 on lines */
00419     for (i = 0; i < 8; i++) {
00420         for (j = 0; j < 8; j++) {
00421             sum = 0;
00422             for (k = 0; k < 8; k++)
00423                 sum += c8[k][j] * block1[8 * i + k];
00424             block2[8 * i + j] = sum;
00425         }
00426     }
00427 
00428     /* idct4 */
00429     for (i = 0; i < 8; i++) {
00430         for (j = 0; j < 4; j++) {
00431             /* top */
00432             sum = 0;
00433             for (k = 0; k < 4; k++)
00434                 sum += c4[k][j] * block2[8 * (2 * k) + i];
00435             block3[8 * (2 * j) + i] = sum;
00436 
00437             /* bottom */
00438             sum = 0;
00439             for (k = 0; k < 4; k++)
00440                 sum += c4[k][j] * block2[8 * (2 * k + 1) + i];
00441             block3[8 * (2 * j + 1) + i] = sum;
00442         }
00443     }
00444 
00445     /* clamp and store the result */
00446     for (i = 0; i < 8; i++) {
00447         for (j = 0; j < 8; j++) {
00448             v = block3[8 * i + j];
00449             if      (v < 0)   v = 0;
00450             else if (v > 255) v = 255;
00451             dest[i * linesize + j] = (int) rint(v);
00452         }
00453     }
00454 }
00455 
00456 static void idct248_error(const char *name,
00457                           void (*idct248_put)(uint8_t *dest, int line_size,
00458                                               int16_t *block),
00459                           int speed)
00460 {
00461     int it, i, it1, ti, ti1, err_max, v;
00462     AVLFG prng;
00463 
00464     av_lfg_init(&prng, 1);
00465 
00466     /* just one test to see if code is correct (precision is less
00467        important here) */
00468     err_max = 0;
00469     for (it = 0; it < NB_ITS; it++) {
00470         /* XXX: use forward transform to generate values */
00471         for (i = 0; i < 64; i++)
00472             block1[i] = av_lfg_get(&prng) % 256 - 128;
00473         block1[0] += 1024;
00474 
00475         for (i = 0; i < 64; i++)
00476             block[i] = block1[i];
00477         idct248_ref(img_dest1, 8, block);
00478 
00479         for (i = 0; i < 64; i++)
00480             block[i] = block1[i];
00481         idct248_put(img_dest, 8, block);
00482 
00483         for (i = 0; i < 64; i++) {
00484             v = abs((int) img_dest[i] - (int) img_dest1[i]);
00485             if (v == 255)
00486                 printf("%d %d\n", img_dest[i], img_dest1[i]);
00487             if (v > err_max)
00488                 err_max = v;
00489         }
00490 #if 0
00491         printf("ref=\n");
00492         for(i=0;i<8;i++) {
00493             int j;
00494             for(j=0;j<8;j++) {
00495                 printf(" %3d", img_dest1[i*8+j]);
00496             }
00497             printf("\n");
00498         }
00499 
00500         printf("out=\n");
00501         for(i=0;i<8;i++) {
00502             int j;
00503             for(j=0;j<8;j++) {
00504                 printf(" %3d", img_dest[i*8+j]);
00505             }
00506             printf("\n");
00507         }
00508 #endif
00509     }
00510     printf("%s %s: err_inf=%d\n", 1 ? "IDCT248" : "DCT248", name, err_max);
00511 
00512     if (!speed)
00513         return;
00514 
00515     ti = gettime();
00516     it1 = 0;
00517     do {
00518         for (it = 0; it < NB_ITS_SPEED; it++) {
00519             for (i = 0; i < 64; i++)
00520                 block[i] = block1[i];
00521             idct248_put(img_dest, 8, block);
00522         }
00523         it1 += NB_ITS_SPEED;
00524         ti1 = gettime() - ti;
00525     } while (ti1 < 1000000);
00526     mmx_emms();
00527 
00528     printf("%s %s: %0.1f kdct/s\n", 1 ? "IDCT248" : "DCT248", name,
00529            (double) it1 * 1000.0 / (double) ti1);
00530 }
00531 
00532 static void help(void)
00533 {
00534     printf("dct-test [-i] [<test-number>] [<bits>]\n"
00535            "test-number 0 -> test with random matrixes\n"
00536            "            1 -> test with random sparse matrixes\n"
00537            "            2 -> do 3. test from mpeg4 std\n"
00538            "bits        Number of time domain bits to use, 8 is default\n"
00539            "-i          test IDCT implementations\n"
00540            "-4          test IDCT248 implementations\n"
00541            "-t          speed test\n");
00542 }
00543 
00544 int main(int argc, char **argv)
00545 {
00546     int test_idct = 0, test_248_dct = 0;
00547     int c, i;
00548     int test = 1;
00549     int speed = 0;
00550     int err = 0;
00551     int bits=8;
00552 
00553     cpu_flags = av_get_cpu_flags();
00554 
00555     ff_ref_dct_init();
00556     idct_mmx_init();
00557 
00558     for (;;) {
00559         c = getopt(argc, argv, "ih4t");
00560         if (c == -1)
00561             break;
00562         switch (c) {
00563         case 'i':
00564             test_idct = 1;
00565             break;
00566         case '4':
00567             test_248_dct = 1;
00568             break;
00569         case 't':
00570             speed = 1;
00571             break;
00572         default:
00573         case 'h':
00574             help();
00575             return 0;
00576         }
00577     }
00578 
00579     if (optind < argc)
00580         test = atoi(argv[optind]);
00581     if(optind+1 < argc) bits= atoi(argv[optind+1]);
00582 
00583     printf("ffmpeg DCT/IDCT test\n");
00584 
00585     if (test_248_dct) {
00586         idct248_error("SIMPLE-C", ff_simple_idct248_put, speed);
00587     } else {
00588         const struct algo *algos = test_idct ? idct_tab : fdct_tab;
00589         for (i = 0; algos[i].name; i++)
00590             if (!(~cpu_flags & algos[i].mm_support)) {
00591                 err |= dct_error(&algos[i], test, test_idct, speed, bits);
00592             }
00593     }
00594 
00595     return err;
00596 }
Generated on Fri Feb 1 2013 14:34:31 for FFmpeg by doxygen 1.7.1