• Main Page
  • Related Pages
  • Modules
  • Data Structures
  • Files
  • File List
  • Globals

libavcodec/dsputil.c

Go to the documentation of this file.
00001 /*
00002  * DSP utils
00003  * Copyright (c) 2000, 2001 Fabrice Bellard
00004  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
00005  *
00006  * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
00007  *
00008  * This file is part of FFmpeg.
00009  *
00010  * FFmpeg is free software; you can redistribute it and/or
00011  * modify it under the terms of the GNU Lesser General Public
00012  * License as published by the Free Software Foundation; either
00013  * version 2.1 of the License, or (at your option) any later version.
00014  *
00015  * FFmpeg is distributed in the hope that it will be useful,
00016  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00017  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018  * Lesser General Public License for more details.
00019  *
00020  * You should have received a copy of the GNU Lesser General Public
00021  * License along with FFmpeg; if not, write to the Free Software
00022  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00023  */
00024 
00030 #include "libavutil/imgutils.h"
00031 #include "avcodec.h"
00032 #include "dsputil.h"
00033 #include "simple_idct.h"
00034 #include "faandct.h"
00035 #include "faanidct.h"
00036 #include "mathops.h"
00037 #include "mpegvideo.h"
00038 #include "config.h"
00039 #include "ac3dec.h"
00040 #include "vorbis.h"
00041 #include "diracdsp.h"
00042 
00043 uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
00044 uint32_t ff_squareTbl[512] = {0, };
00045 
00046 #define BIT_DEPTH 9
00047 #include "dsputil_template.c"
00048 #undef BIT_DEPTH
00049 
00050 #define BIT_DEPTH 10
00051 #include "dsputil_template.c"
00052 #undef BIT_DEPTH
00053 
00054 #define BIT_DEPTH 8
00055 #include "dsputil_template.c"
00056 
00057 // 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size
00058 #define pb_7f (~0UL/255 * 0x7f)
00059 #define pb_80 (~0UL/255 * 0x80)
00060 
00061 const uint8_t ff_zigzag_direct[64] = {
00062     0,   1,  8, 16,  9,  2,  3, 10,
00063     17, 24, 32, 25, 18, 11,  4,  5,
00064     12, 19, 26, 33, 40, 48, 41, 34,
00065     27, 20, 13,  6,  7, 14, 21, 28,
00066     35, 42, 49, 56, 57, 50, 43, 36,
00067     29, 22, 15, 23, 30, 37, 44, 51,
00068     58, 59, 52, 45, 38, 31, 39, 46,
00069     53, 60, 61, 54, 47, 55, 62, 63
00070 };
00071 
00072 /* Specific zigzag scan for 248 idct. NOTE that unlike the
00073    specification, we interleave the fields */
00074 const uint8_t ff_zigzag248_direct[64] = {
00075      0,  8,  1,  9, 16, 24,  2, 10,
00076     17, 25, 32, 40, 48, 56, 33, 41,
00077     18, 26,  3, 11,  4, 12, 19, 27,
00078     34, 42, 49, 57, 50, 58, 35, 43,
00079     20, 28,  5, 13,  6, 14, 21, 29,
00080     36, 44, 51, 59, 52, 60, 37, 45,
00081     22, 30,  7, 15, 23, 31, 38, 46,
00082     53, 61, 54, 62, 39, 47, 55, 63,
00083 };
00084 
00085 /* not permutated inverse zigzag_direct + 1 for MMX quantizer */
00086 DECLARE_ALIGNED(16, uint16_t, inv_zigzag_direct16)[64];
00087 
00088 const uint8_t ff_alternate_horizontal_scan[64] = {
00089     0,  1,   2,  3,  8,  9, 16, 17,
00090     10, 11,  4,  5,  6,  7, 15, 14,
00091     13, 12, 19, 18, 24, 25, 32, 33,
00092     26, 27, 20, 21, 22, 23, 28, 29,
00093     30, 31, 34, 35, 40, 41, 48, 49,
00094     42, 43, 36, 37, 38, 39, 44, 45,
00095     46, 47, 50, 51, 56, 57, 58, 59,
00096     52, 53, 54, 55, 60, 61, 62, 63,
00097 };
00098 
00099 const uint8_t ff_alternate_vertical_scan[64] = {
00100     0,  8,  16, 24,  1,  9,  2, 10,
00101     17, 25, 32, 40, 48, 56, 57, 49,
00102     41, 33, 26, 18,  3, 11,  4, 12,
00103     19, 27, 34, 42, 50, 58, 35, 43,
00104     51, 59, 20, 28,  5, 13,  6, 14,
00105     21, 29, 36, 44, 52, 60, 37, 45,
00106     53, 61, 22, 30,  7, 15, 23, 31,
00107     38, 46, 54, 62, 39, 47, 55, 63,
00108 };
00109 
00110 /* Input permutation for the simple_idct_mmx */
00111 static const uint8_t simple_mmx_permutation[64]={
00112         0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
00113         0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
00114         0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
00115         0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
00116         0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
00117         0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
00118         0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
00119         0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
00120 };
00121 
00122 static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
00123 
00124 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
00125     int i;
00126     int end;
00127 
00128     st->scantable= src_scantable;
00129 
00130     for(i=0; i<64; i++){
00131         int j;
00132         j = src_scantable[i];
00133         st->permutated[i] = permutation[j];
00134 #if ARCH_PPC
00135         st->inverse[j] = i;
00136 #endif
00137     }
00138 
00139     end=-1;
00140     for(i=0; i<64; i++){
00141         int j;
00142         j = st->permutated[i];
00143         if(j>end) end=j;
00144         st->raster_end[i]= end;
00145     }
00146 }
00147 
00148 void ff_init_scantable_permutation(uint8_t *idct_permutation,
00149                                    int idct_permutation_type)
00150 {
00151     int i;
00152 
00153     switch(idct_permutation_type){
00154     case FF_NO_IDCT_PERM:
00155         for(i=0; i<64; i++)
00156             idct_permutation[i]= i;
00157         break;
00158     case FF_LIBMPEG2_IDCT_PERM:
00159         for(i=0; i<64; i++)
00160             idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
00161         break;
00162     case FF_SIMPLE_IDCT_PERM:
00163         for(i=0; i<64; i++)
00164             idct_permutation[i]= simple_mmx_permutation[i];
00165         break;
00166     case FF_TRANSPOSE_IDCT_PERM:
00167         for(i=0; i<64; i++)
00168             idct_permutation[i]= ((i&7)<<3) | (i>>3);
00169         break;
00170     case FF_PARTTRANS_IDCT_PERM:
00171         for(i=0; i<64; i++)
00172             idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
00173         break;
00174     case FF_SSE2_IDCT_PERM:
00175         for(i=0; i<64; i++)
00176             idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7];
00177         break;
00178     default:
00179         av_log(NULL, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");
00180     }
00181 }
00182 
00183 static int pix_sum_c(uint8_t * pix, int line_size)
00184 {
00185     int s, i, j;
00186 
00187     s = 0;
00188     for (i = 0; i < 16; i++) {
00189         for (j = 0; j < 16; j += 8) {
00190             s += pix[0];
00191             s += pix[1];
00192             s += pix[2];
00193             s += pix[3];
00194             s += pix[4];
00195             s += pix[5];
00196             s += pix[6];
00197             s += pix[7];
00198             pix += 8;
00199         }
00200         pix += line_size - 16;
00201     }
00202     return s;
00203 }
00204 
00205 static int pix_norm1_c(uint8_t * pix, int line_size)
00206 {
00207     int s, i, j;
00208     uint32_t *sq = ff_squareTbl + 256;
00209 
00210     s = 0;
00211     for (i = 0; i < 16; i++) {
00212         for (j = 0; j < 16; j += 8) {
00213 #if 0
00214             s += sq[pix[0]];
00215             s += sq[pix[1]];
00216             s += sq[pix[2]];
00217             s += sq[pix[3]];
00218             s += sq[pix[4]];
00219             s += sq[pix[5]];
00220             s += sq[pix[6]];
00221             s += sq[pix[7]];
00222 #else
00223 #if HAVE_FAST_64BIT
00224             register uint64_t x=*(uint64_t*)pix;
00225             s += sq[x&0xff];
00226             s += sq[(x>>8)&0xff];
00227             s += sq[(x>>16)&0xff];
00228             s += sq[(x>>24)&0xff];
00229             s += sq[(x>>32)&0xff];
00230             s += sq[(x>>40)&0xff];
00231             s += sq[(x>>48)&0xff];
00232             s += sq[(x>>56)&0xff];
00233 #else
00234             register uint32_t x=*(uint32_t*)pix;
00235             s += sq[x&0xff];
00236             s += sq[(x>>8)&0xff];
00237             s += sq[(x>>16)&0xff];
00238             s += sq[(x>>24)&0xff];
00239             x=*(uint32_t*)(pix+4);
00240             s += sq[x&0xff];
00241             s += sq[(x>>8)&0xff];
00242             s += sq[(x>>16)&0xff];
00243             s += sq[(x>>24)&0xff];
00244 #endif
00245 #endif
00246             pix += 8;
00247         }
00248         pix += line_size - 16;
00249     }
00250     return s;
00251 }
00252 
00253 static void bswap_buf(uint32_t *dst, const uint32_t *src, int w){
00254     int i;
00255 
00256     for(i=0; i+8<=w; i+=8){
00257         dst[i+0]= av_bswap32(src[i+0]);
00258         dst[i+1]= av_bswap32(src[i+1]);
00259         dst[i+2]= av_bswap32(src[i+2]);
00260         dst[i+3]= av_bswap32(src[i+3]);
00261         dst[i+4]= av_bswap32(src[i+4]);
00262         dst[i+5]= av_bswap32(src[i+5]);
00263         dst[i+6]= av_bswap32(src[i+6]);
00264         dst[i+7]= av_bswap32(src[i+7]);
00265     }
00266     for(;i<w; i++){
00267         dst[i+0]= av_bswap32(src[i+0]);
00268     }
00269 }
00270 
00271 static void bswap16_buf(uint16_t *dst, const uint16_t *src, int len)
00272 {
00273     while (len--)
00274         *dst++ = av_bswap16(*src++);
00275 }
00276 
00277 static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
00278 {
00279     int s, i;
00280     uint32_t *sq = ff_squareTbl + 256;
00281 
00282     s = 0;
00283     for (i = 0; i < h; i++) {
00284         s += sq[pix1[0] - pix2[0]];
00285         s += sq[pix1[1] - pix2[1]];
00286         s += sq[pix1[2] - pix2[2]];
00287         s += sq[pix1[3] - pix2[3]];
00288         pix1 += line_size;
00289         pix2 += line_size;
00290     }
00291     return s;
00292 }
00293 
00294 static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
00295 {
00296     int s, i;
00297     uint32_t *sq = ff_squareTbl + 256;
00298 
00299     s = 0;
00300     for (i = 0; i < h; i++) {
00301         s += sq[pix1[0] - pix2[0]];
00302         s += sq[pix1[1] - pix2[1]];
00303         s += sq[pix1[2] - pix2[2]];
00304         s += sq[pix1[3] - pix2[3]];
00305         s += sq[pix1[4] - pix2[4]];
00306         s += sq[pix1[5] - pix2[5]];
00307         s += sq[pix1[6] - pix2[6]];
00308         s += sq[pix1[7] - pix2[7]];
00309         pix1 += line_size;
00310         pix2 += line_size;
00311     }
00312     return s;
00313 }
00314 
00315 static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
00316 {
00317     int s, i;
00318     uint32_t *sq = ff_squareTbl + 256;
00319 
00320     s = 0;
00321     for (i = 0; i < h; i++) {
00322         s += sq[pix1[ 0] - pix2[ 0]];
00323         s += sq[pix1[ 1] - pix2[ 1]];
00324         s += sq[pix1[ 2] - pix2[ 2]];
00325         s += sq[pix1[ 3] - pix2[ 3]];
00326         s += sq[pix1[ 4] - pix2[ 4]];
00327         s += sq[pix1[ 5] - pix2[ 5]];
00328         s += sq[pix1[ 6] - pix2[ 6]];
00329         s += sq[pix1[ 7] - pix2[ 7]];
00330         s += sq[pix1[ 8] - pix2[ 8]];
00331         s += sq[pix1[ 9] - pix2[ 9]];
00332         s += sq[pix1[10] - pix2[10]];
00333         s += sq[pix1[11] - pix2[11]];
00334         s += sq[pix1[12] - pix2[12]];
00335         s += sq[pix1[13] - pix2[13]];
00336         s += sq[pix1[14] - pix2[14]];
00337         s += sq[pix1[15] - pix2[15]];
00338 
00339         pix1 += line_size;
00340         pix2 += line_size;
00341     }
00342     return s;
00343 }
00344 
00345 static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
00346                           const uint8_t *s2, int stride){
00347     int i;
00348 
00349     /* read the pixels */
00350     for(i=0;i<8;i++) {
00351         block[0] = s1[0] - s2[0];
00352         block[1] = s1[1] - s2[1];
00353         block[2] = s1[2] - s2[2];
00354         block[3] = s1[3] - s2[3];
00355         block[4] = s1[4] - s2[4];
00356         block[5] = s1[5] - s2[5];
00357         block[6] = s1[6] - s2[6];
00358         block[7] = s1[7] - s2[7];
00359         s1 += stride;
00360         s2 += stride;
00361         block += 8;
00362     }
00363 }
00364 
00365 
00366 void ff_put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
00367                              int line_size)
00368 {
00369     int i;
00370 
00371     /* read the pixels */
00372     for(i=0;i<8;i++) {
00373         pixels[0] = av_clip_uint8(block[0]);
00374         pixels[1] = av_clip_uint8(block[1]);
00375         pixels[2] = av_clip_uint8(block[2]);
00376         pixels[3] = av_clip_uint8(block[3]);
00377         pixels[4] = av_clip_uint8(block[4]);
00378         pixels[5] = av_clip_uint8(block[5]);
00379         pixels[6] = av_clip_uint8(block[6]);
00380         pixels[7] = av_clip_uint8(block[7]);
00381 
00382         pixels += line_size;
00383         block += 8;
00384     }
00385 }
00386 
00387 static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
00388                                  int line_size)
00389 {
00390     int i;
00391 
00392     /* read the pixels */
00393     for(i=0;i<4;i++) {
00394         pixels[0] = av_clip_uint8(block[0]);
00395         pixels[1] = av_clip_uint8(block[1]);
00396         pixels[2] = av_clip_uint8(block[2]);
00397         pixels[3] = av_clip_uint8(block[3]);
00398 
00399         pixels += line_size;
00400         block += 8;
00401     }
00402 }
00403 
00404 static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
00405                                  int line_size)
00406 {
00407     int i;
00408 
00409     /* read the pixels */
00410     for(i=0;i<2;i++) {
00411         pixels[0] = av_clip_uint8(block[0]);
00412         pixels[1] = av_clip_uint8(block[1]);
00413 
00414         pixels += line_size;
00415         block += 8;
00416     }
00417 }
00418 
00419 void ff_put_signed_pixels_clamped_c(const DCTELEM *block,
00420                                     uint8_t *restrict pixels,
00421                                     int line_size)
00422 {
00423     int i, j;
00424 
00425     for (i = 0; i < 8; i++) {
00426         for (j = 0; j < 8; j++) {
00427             if (*block < -128)
00428                 *pixels = 0;
00429             else if (*block > 127)
00430                 *pixels = 255;
00431             else
00432                 *pixels = (uint8_t)(*block + 128);
00433             block++;
00434             pixels++;
00435         }
00436         pixels += (line_size - 8);
00437     }
00438 }
00439 
00440 void ff_add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
00441                              int line_size)
00442 {
00443     int i;
00444 
00445     /* read the pixels */
00446     for(i=0;i<8;i++) {
00447         pixels[0] = av_clip_uint8(pixels[0] + block[0]);
00448         pixels[1] = av_clip_uint8(pixels[1] + block[1]);
00449         pixels[2] = av_clip_uint8(pixels[2] + block[2]);
00450         pixels[3] = av_clip_uint8(pixels[3] + block[3]);
00451         pixels[4] = av_clip_uint8(pixels[4] + block[4]);
00452         pixels[5] = av_clip_uint8(pixels[5] + block[5]);
00453         pixels[6] = av_clip_uint8(pixels[6] + block[6]);
00454         pixels[7] = av_clip_uint8(pixels[7] + block[7]);
00455         pixels += line_size;
00456         block += 8;
00457     }
00458 }
00459 
00460 static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
00461                           int line_size)
00462 {
00463     int i;
00464 
00465     /* read the pixels */
00466     for(i=0;i<4;i++) {
00467         pixels[0] = av_clip_uint8(pixels[0] + block[0]);
00468         pixels[1] = av_clip_uint8(pixels[1] + block[1]);
00469         pixels[2] = av_clip_uint8(pixels[2] + block[2]);
00470         pixels[3] = av_clip_uint8(pixels[3] + block[3]);
00471         pixels += line_size;
00472         block += 8;
00473     }
00474 }
00475 
00476 static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
00477                           int line_size)
00478 {
00479     int i;
00480 
00481     /* read the pixels */
00482     for(i=0;i<2;i++) {
00483         pixels[0] = av_clip_uint8(pixels[0] + block[0]);
00484         pixels[1] = av_clip_uint8(pixels[1] + block[1]);
00485         pixels += line_size;
00486         block += 8;
00487     }
00488 }
00489 
00490 static int sum_abs_dctelem_c(DCTELEM *block)
00491 {
00492     int sum=0, i;
00493     for(i=0; i<64; i++)
00494         sum+= FFABS(block[i]);
00495     return sum;
00496 }
00497 
00498 static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h)
00499 {
00500     int i;
00501 
00502     for (i = 0; i < h; i++) {
00503         memset(block, value, 16);
00504         block += line_size;
00505     }
00506 }
00507 
00508 static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h)
00509 {
00510     int i;
00511 
00512     for (i = 0; i < h; i++) {
00513         memset(block, value, 8);
00514         block += line_size;
00515     }
00516 }
00517 
00518 #define avg2(a,b) ((a+b+1)>>1)
00519 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
00520 
00521 static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
00522 {
00523     const int A=(16-x16)*(16-y16);
00524     const int B=(   x16)*(16-y16);
00525     const int C=(16-x16)*(   y16);
00526     const int D=(   x16)*(   y16);
00527     int i;
00528 
00529     for(i=0; i<h; i++)
00530     {
00531         dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
00532         dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
00533         dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
00534         dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
00535         dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
00536         dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
00537         dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
00538         dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
00539         dst+= stride;
00540         src+= stride;
00541     }
00542 }
00543 
00544 void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
00545                   int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
00546 {
00547     int y, vx, vy;
00548     const int s= 1<<shift;
00549 
00550     width--;
00551     height--;
00552 
00553     for(y=0; y<h; y++){
00554         int x;
00555 
00556         vx= ox;
00557         vy= oy;
00558         for(x=0; x<8; x++){ //XXX FIXME optimize
00559             int src_x, src_y, frac_x, frac_y, index;
00560 
00561             src_x= vx>>16;
00562             src_y= vy>>16;
00563             frac_x= src_x&(s-1);
00564             frac_y= src_y&(s-1);
00565             src_x>>=shift;
00566             src_y>>=shift;
00567 
00568             if((unsigned)src_x < width){
00569                 if((unsigned)src_y < height){
00570                     index= src_x + src_y*stride;
00571                     dst[y*stride + x]= (  (  src[index         ]*(s-frac_x)
00572                                            + src[index       +1]*   frac_x )*(s-frac_y)
00573                                         + (  src[index+stride  ]*(s-frac_x)
00574                                            + src[index+stride+1]*   frac_x )*   frac_y
00575                                         + r)>>(shift*2);
00576                 }else{
00577                     index= src_x + av_clip(src_y, 0, height)*stride;
00578                     dst[y*stride + x]= ( (  src[index         ]*(s-frac_x)
00579                                           + src[index       +1]*   frac_x )*s
00580                                         + r)>>(shift*2);
00581                 }
00582             }else{
00583                 if((unsigned)src_y < height){
00584                     index= av_clip(src_x, 0, width) + src_y*stride;
00585                     dst[y*stride + x]= (  (  src[index         ]*(s-frac_y)
00586                                            + src[index+stride  ]*   frac_y )*s
00587                                         + r)>>(shift*2);
00588                 }else{
00589                     index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride;
00590                     dst[y*stride + x]=    src[index         ];
00591                 }
00592             }
00593 
00594             vx+= dxx;
00595             vy+= dyx;
00596         }
00597         ox += dxy;
00598         oy += dyy;
00599     }
00600 }
00601 
00602 static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00603     switch(width){
00604     case 2: put_pixels2_8_c (dst, src, stride, height); break;
00605     case 4: put_pixels4_8_c (dst, src, stride, height); break;
00606     case 8: put_pixels8_8_c (dst, src, stride, height); break;
00607     case 16:put_pixels16_8_c(dst, src, stride, height); break;
00608     }
00609 }
00610 
00611 static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00612     int i,j;
00613     for (i=0; i < height; i++) {
00614       for (j=0; j < width; j++) {
00615         dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
00616       }
00617       src += stride;
00618       dst += stride;
00619     }
00620 }
00621 
00622 static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00623     int i,j;
00624     for (i=0; i < height; i++) {
00625       for (j=0; j < width; j++) {
00626         dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
00627       }
00628       src += stride;
00629       dst += stride;
00630     }
00631 }
00632 
00633 static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00634     int i,j;
00635     for (i=0; i < height; i++) {
00636       for (j=0; j < width; j++) {
00637         dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
00638       }
00639       src += stride;
00640       dst += stride;
00641     }
00642 }
00643 
00644 static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00645     int i,j;
00646     for (i=0; i < height; i++) {
00647       for (j=0; j < width; j++) {
00648         dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
00649       }
00650       src += stride;
00651       dst += stride;
00652     }
00653 }
00654 
00655 static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00656     int i,j;
00657     for (i=0; i < height; i++) {
00658       for (j=0; j < width; j++) {
00659         dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
00660       }
00661       src += stride;
00662       dst += stride;
00663     }
00664 }
00665 
00666 static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00667     int i,j;
00668     for (i=0; i < height; i++) {
00669       for (j=0; j < width; j++) {
00670         dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
00671       }
00672       src += stride;
00673       dst += stride;
00674     }
00675 }
00676 
00677 static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00678     int i,j;
00679     for (i=0; i < height; i++) {
00680       for (j=0; j < width; j++) {
00681         dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
00682       }
00683       src += stride;
00684       dst += stride;
00685     }
00686 }
00687 
00688 static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00689     int i,j;
00690     for (i=0; i < height; i++) {
00691       for (j=0; j < width; j++) {
00692         dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
00693       }
00694       src += stride;
00695       dst += stride;
00696     }
00697 }
00698 
00699 static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00700     switch(width){
00701     case 2: avg_pixels2_8_c (dst, src, stride, height); break;
00702     case 4: avg_pixels4_8_c (dst, src, stride, height); break;
00703     case 8: avg_pixels8_8_c (dst, src, stride, height); break;
00704     case 16:avg_pixels16_8_c(dst, src, stride, height); break;
00705     }
00706 }
00707 
00708 static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00709     int i,j;
00710     for (i=0; i < height; i++) {
00711       for (j=0; j < width; j++) {
00712         dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
00713       }
00714       src += stride;
00715       dst += stride;
00716     }
00717 }
00718 
00719 static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00720     int i,j;
00721     for (i=0; i < height; i++) {
00722       for (j=0; j < width; j++) {
00723         dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
00724       }
00725       src += stride;
00726       dst += stride;
00727     }
00728 }
00729 
00730 static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00731     int i,j;
00732     for (i=0; i < height; i++) {
00733       for (j=0; j < width; j++) {
00734         dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
00735       }
00736       src += stride;
00737       dst += stride;
00738     }
00739 }
00740 
00741 static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00742     int i,j;
00743     for (i=0; i < height; i++) {
00744       for (j=0; j < width; j++) {
00745         dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
00746       }
00747       src += stride;
00748       dst += stride;
00749     }
00750 }
00751 
00752 static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00753     int i,j;
00754     for (i=0; i < height; i++) {
00755       for (j=0; j < width; j++) {
00756         dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
00757       }
00758       src += stride;
00759       dst += stride;
00760     }
00761 }
00762 
00763 static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00764     int i,j;
00765     for (i=0; i < height; i++) {
00766       for (j=0; j < width; j++) {
00767         dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
00768       }
00769       src += stride;
00770       dst += stride;
00771     }
00772 }
00773 
00774 static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00775     int i,j;
00776     for (i=0; i < height; i++) {
00777       for (j=0; j < width; j++) {
00778         dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
00779       }
00780       src += stride;
00781       dst += stride;
00782     }
00783 }
00784 
00785 static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00786     int i,j;
00787     for (i=0; i < height; i++) {
00788       for (j=0; j < width; j++) {
00789         dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
00790       }
00791       src += stride;
00792       dst += stride;
00793     }
00794 }
00795 
00796 #define QPEL_MC(r, OPNAME, RND, OP) \
00797 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
00798     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
00799     int i;\
00800     for(i=0; i<h; i++)\
00801     {\
00802         OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
00803         OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
00804         OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
00805         OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
00806         OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
00807         OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
00808         OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
00809         OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
00810         dst+=dstStride;\
00811         src+=srcStride;\
00812     }\
00813 }\
00814 \
00815 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
00816     const int w=8;\
00817     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
00818     int i;\
00819     for(i=0; i<w; i++)\
00820     {\
00821         const int src0= src[0*srcStride];\
00822         const int src1= src[1*srcStride];\
00823         const int src2= src[2*srcStride];\
00824         const int src3= src[3*srcStride];\
00825         const int src4= src[4*srcStride];\
00826         const int src5= src[5*srcStride];\
00827         const int src6= src[6*srcStride];\
00828         const int src7= src[7*srcStride];\
00829         const int src8= src[8*srcStride];\
00830         OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
00831         OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
00832         OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
00833         OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
00834         OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
00835         OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
00836         OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
00837         OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
00838         dst++;\
00839         src++;\
00840     }\
00841 }\
00842 \
00843 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
00844     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
00845     int i;\
00846     \
00847     for(i=0; i<h; i++)\
00848     {\
00849         OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
00850         OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
00851         OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
00852         OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
00853         OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
00854         OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
00855         OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
00856         OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
00857         OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
00858         OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
00859         OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
00860         OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
00861         OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
00862         OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
00863         OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
00864         OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
00865         dst+=dstStride;\
00866         src+=srcStride;\
00867     }\
00868 }\
00869 \
00870 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
00871     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
00872     int i;\
00873     const int w=16;\
00874     for(i=0; i<w; i++)\
00875     {\
00876         const int src0= src[0*srcStride];\
00877         const int src1= src[1*srcStride];\
00878         const int src2= src[2*srcStride];\
00879         const int src3= src[3*srcStride];\
00880         const int src4= src[4*srcStride];\
00881         const int src5= src[5*srcStride];\
00882         const int src6= src[6*srcStride];\
00883         const int src7= src[7*srcStride];\
00884         const int src8= src[8*srcStride];\
00885         const int src9= src[9*srcStride];\
00886         const int src10= src[10*srcStride];\
00887         const int src11= src[11*srcStride];\
00888         const int src12= src[12*srcStride];\
00889         const int src13= src[13*srcStride];\
00890         const int src14= src[14*srcStride];\
00891         const int src15= src[15*srcStride];\
00892         const int src16= src[16*srcStride];\
00893         OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
00894         OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
00895         OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
00896         OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
00897         OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
00898         OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
00899         OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
00900         OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
00901         OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
00902         OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
00903         OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
00904         OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
00905         OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
00906         OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
00907         OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
00908         OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
00909         dst++;\
00910         src++;\
00911     }\
00912 }\
00913 \
00914 static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
00915     uint8_t half[64];\
00916     put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
00917     OPNAME ## pixels8_l2_8(dst, src, half, stride, stride, 8, 8);\
00918 }\
00919 \
00920 static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
00921     OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
00922 }\
00923 \
00924 static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
00925     uint8_t half[64];\
00926     put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
00927     OPNAME ## pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);\
00928 }\
00929 \
00930 static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
00931     uint8_t full[16*9];\
00932     uint8_t half[64];\
00933     copy_block9(full, src, 16, stride, 9);\
00934     put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
00935     OPNAME ## pixels8_l2_8(dst, full, half, stride, 16, 8, 8);\
00936 }\
00937 \
00938 static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
00939     uint8_t full[16*9];\
00940     copy_block9(full, src, 16, stride, 9);\
00941     OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
00942 }\
00943 \
00944 static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
00945     uint8_t full[16*9];\
00946     uint8_t half[64];\
00947     copy_block9(full, src, 16, stride, 9);\
00948     put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
00949     OPNAME ## pixels8_l2_8(dst, full+16, half, stride, 16, 8, 8);\
00950 }\
00951 void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
00952     uint8_t full[16*9];\
00953     uint8_t halfH[72];\
00954     uint8_t halfV[64];\
00955     uint8_t halfHV[64];\
00956     copy_block9(full, src, 16, stride, 9);\
00957     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00958     put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
00959     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00960     OPNAME ## pixels8_l4_8(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
00961 }\
00962 static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
00963     uint8_t full[16*9];\
00964     uint8_t halfH[72];\
00965     uint8_t halfHV[64];\
00966     copy_block9(full, src, 16, stride, 9);\
00967     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00968     put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
00969     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00970     OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
00971 }\
00972 void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
00973     uint8_t full[16*9];\
00974     uint8_t halfH[72];\
00975     uint8_t halfV[64];\
00976     uint8_t halfHV[64];\
00977     copy_block9(full, src, 16, stride, 9);\
00978     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00979     put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
00980     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00981     OPNAME ## pixels8_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
00982 }\
00983 static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
00984     uint8_t full[16*9];\
00985     uint8_t halfH[72];\
00986     uint8_t halfHV[64];\
00987     copy_block9(full, src, 16, stride, 9);\
00988     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00989     put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
00990     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00991     OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
00992 }\
00993 void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
00994     uint8_t full[16*9];\
00995     uint8_t halfH[72];\
00996     uint8_t halfV[64];\
00997     uint8_t halfHV[64];\
00998     copy_block9(full, src, 16, stride, 9);\
00999     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01000     put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
01001     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01002     OPNAME ## pixels8_l4_8(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
01003 }\
01004 static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
01005     uint8_t full[16*9];\
01006     uint8_t halfH[72];\
01007     uint8_t halfHV[64];\
01008     copy_block9(full, src, 16, stride, 9);\
01009     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01010     put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
01011     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01012     OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
01013 }\
01014 void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
01015     uint8_t full[16*9];\
01016     uint8_t halfH[72];\
01017     uint8_t halfV[64];\
01018     uint8_t halfHV[64];\
01019     copy_block9(full, src, 16, stride, 9);\
01020     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full  , 8, 16, 9);\
01021     put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
01022     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01023     OPNAME ## pixels8_l4_8(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
01024 }\
01025 static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
01026     uint8_t full[16*9];\
01027     uint8_t halfH[72];\
01028     uint8_t halfHV[64];\
01029     copy_block9(full, src, 16, stride, 9);\
01030     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01031     put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
01032     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01033     OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
01034 }\
01035 static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
01036     uint8_t halfH[72];\
01037     uint8_t halfHV[64];\
01038     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
01039     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01040     OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
01041 }\
01042 static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
01043     uint8_t halfH[72];\
01044     uint8_t halfHV[64];\
01045     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
01046     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01047     OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
01048 }\
01049 void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
01050     uint8_t full[16*9];\
01051     uint8_t halfH[72];\
01052     uint8_t halfV[64];\
01053     uint8_t halfHV[64];\
01054     copy_block9(full, src, 16, stride, 9);\
01055     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01056     put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
01057     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01058     OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
01059 }\
01060 static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
01061     uint8_t full[16*9];\
01062     uint8_t halfH[72];\
01063     copy_block9(full, src, 16, stride, 9);\
01064     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01065     put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
01066     OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
01067 }\
01068 void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
01069     uint8_t full[16*9];\
01070     uint8_t halfH[72];\
01071     uint8_t halfV[64];\
01072     uint8_t halfHV[64];\
01073     copy_block9(full, src, 16, stride, 9);\
01074     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01075     put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
01076     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01077     OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
01078 }\
01079 static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
01080     uint8_t full[16*9];\
01081     uint8_t halfH[72];\
01082     copy_block9(full, src, 16, stride, 9);\
01083     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01084     put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
01085     OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
01086 }\
01087 static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
01088     uint8_t halfH[72];\
01089     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
01090     OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
01091 }\
01092 \
01093 static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
01094     uint8_t half[256];\
01095     put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
01096     OPNAME ## pixels16_l2_8(dst, src, half, stride, stride, 16, 16);\
01097 }\
01098 \
01099 static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
01100     OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
01101 }\
01102 \
01103 static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
01104     uint8_t half[256];\
01105     put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
01106     OPNAME ## pixels16_l2_8(dst, src+1, half, stride, stride, 16, 16);\
01107 }\
01108 \
01109 static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
01110     uint8_t full[24*17];\
01111     uint8_t half[256];\
01112     copy_block17(full, src, 24, stride, 17);\
01113     put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
01114     OPNAME ## pixels16_l2_8(dst, full, half, stride, 24, 16, 16);\
01115 }\
01116 \
01117 static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
01118     uint8_t full[24*17];\
01119     copy_block17(full, src, 24, stride, 17);\
01120     OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
01121 }\
01122 \
01123 static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
01124     uint8_t full[24*17];\
01125     uint8_t half[256];\
01126     copy_block17(full, src, 24, stride, 17);\
01127     put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
01128     OPNAME ## pixels16_l2_8(dst, full+24, half, stride, 24, 16, 16);\
01129 }\
01130 void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
01131     uint8_t full[24*17];\
01132     uint8_t halfH[272];\
01133     uint8_t halfV[256];\
01134     uint8_t halfHV[256];\
01135     copy_block17(full, src, 24, stride, 17);\
01136     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01137     put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
01138     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01139     OPNAME ## pixels16_l4_8(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01140 }\
01141 static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
01142     uint8_t full[24*17];\
01143     uint8_t halfH[272];\
01144     uint8_t halfHV[256];\
01145     copy_block17(full, src, 24, stride, 17);\
01146     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01147     put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
01148     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01149     OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
01150 }\
01151 void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
01152     uint8_t full[24*17];\
01153     uint8_t halfH[272];\
01154     uint8_t halfV[256];\
01155     uint8_t halfHV[256];\
01156     copy_block17(full, src, 24, stride, 17);\
01157     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01158     put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
01159     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01160     OPNAME ## pixels16_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01161 }\
01162 static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
01163     uint8_t full[24*17];\
01164     uint8_t halfH[272];\
01165     uint8_t halfHV[256];\
01166     copy_block17(full, src, 24, stride, 17);\
01167     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01168     put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
01169     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01170     OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
01171 }\
01172 void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
01173     uint8_t full[24*17];\
01174     uint8_t halfH[272];\
01175     uint8_t halfV[256];\
01176     uint8_t halfHV[256];\
01177     copy_block17(full, src, 24, stride, 17);\
01178     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01179     put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
01180     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01181     OPNAME ## pixels16_l4_8(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01182 }\
01183 static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
01184     uint8_t full[24*17];\
01185     uint8_t halfH[272];\
01186     uint8_t halfHV[256];\
01187     copy_block17(full, src, 24, stride, 17);\
01188     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01189     put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
01190     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01191     OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
01192 }\
01193 void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
01194     uint8_t full[24*17];\
01195     uint8_t halfH[272];\
01196     uint8_t halfV[256];\
01197     uint8_t halfHV[256];\
01198     copy_block17(full, src, 24, stride, 17);\
01199     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full  , 16, 24, 17);\
01200     put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
01201     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01202     OPNAME ## pixels16_l4_8(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01203 }\
01204 static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
01205     uint8_t full[24*17];\
01206     uint8_t halfH[272];\
01207     uint8_t halfHV[256];\
01208     copy_block17(full, src, 24, stride, 17);\
01209     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01210     put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
01211     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01212     OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
01213 }\
01214 static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
01215     uint8_t halfH[272];\
01216     uint8_t halfHV[256];\
01217     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
01218     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01219     OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
01220 }\
01221 static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
01222     uint8_t halfH[272];\
01223     uint8_t halfHV[256];\
01224     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
01225     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01226     OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
01227 }\
01228 void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
01229     uint8_t full[24*17];\
01230     uint8_t halfH[272];\
01231     uint8_t halfV[256];\
01232     uint8_t halfHV[256];\
01233     copy_block17(full, src, 24, stride, 17);\
01234     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01235     put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
01236     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01237     OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
01238 }\
01239 static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
01240     uint8_t full[24*17];\
01241     uint8_t halfH[272];\
01242     copy_block17(full, src, 24, stride, 17);\
01243     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01244     put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
01245     OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
01246 }\
01247 void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
01248     uint8_t full[24*17];\
01249     uint8_t halfH[272];\
01250     uint8_t halfV[256];\
01251     uint8_t halfHV[256];\
01252     copy_block17(full, src, 24, stride, 17);\
01253     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01254     put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
01255     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01256     OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
01257 }\
01258 static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
01259     uint8_t full[24*17];\
01260     uint8_t halfH[272];\
01261     copy_block17(full, src, 24, stride, 17);\
01262     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01263     put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
01264     OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
01265 }\
01266 static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
01267     uint8_t halfH[272];\
01268     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
01269     OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
01270 }
01271 
01272 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
01273 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
01274 #define op_put(a, b) a = cm[((b) + 16)>>5]
01275 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
01276 
01277 QPEL_MC(0, put_       , _       , op_put)
01278 QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
01279 QPEL_MC(0, avg_       , _       , op_avg)
01280 //QPEL_MC(1, avg_no_rnd , _       , op_avg)
01281 #undef op_avg
01282 #undef op_avg_no_rnd
01283 #undef op_put
01284 #undef op_put_no_rnd
01285 
01286 #define put_qpel8_mc00_c  ff_put_pixels8x8_c
01287 #define avg_qpel8_mc00_c  ff_avg_pixels8x8_c
01288 #define put_qpel16_mc00_c ff_put_pixels16x16_c
01289 #define avg_qpel16_mc00_c ff_avg_pixels16x16_c
01290 #define put_no_rnd_qpel8_mc00_c  ff_put_pixels8x8_c
01291 #define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_8_c
01292 
01293 static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
01294     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
01295     int i;
01296 
01297     for(i=0; i<h; i++){
01298         dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
01299         dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
01300         dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
01301         dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
01302         dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
01303         dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
01304         dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
01305         dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
01306         dst+=dstStride;
01307         src+=srcStride;
01308     }
01309 }
01310 
01311 #if CONFIG_RV40_DECODER
01312 void ff_put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
01313     put_pixels16_xy2_8_c(dst, src, stride, 16);
01314 }
01315 void ff_avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
01316     avg_pixels16_xy2_8_c(dst, src, stride, 16);
01317 }
01318 void ff_put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
01319     put_pixels8_xy2_8_c(dst, src, stride, 8);
01320 }
01321 void ff_avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
01322     avg_pixels8_xy2_8_c(dst, src, stride, 8);
01323 }
01324 #endif /* CONFIG_RV40_DECODER */
01325 
01326 #if CONFIG_DIRAC_DECODER
01327 #define DIRAC_MC(OPNAME)\
01328 void ff_ ## OPNAME ## _dirac_pixels8_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01329 {\
01330      OPNAME ## _pixels8_8_c(dst, src[0], stride, h);\
01331 }\
01332 void ff_ ## OPNAME ## _dirac_pixels16_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01333 {\
01334     OPNAME ## _pixels16_8_c(dst, src[0], stride, h);\
01335 }\
01336 void ff_ ## OPNAME ## _dirac_pixels32_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01337 {\
01338     OPNAME ## _pixels16_8_c(dst   , src[0]   , stride, h);\
01339     OPNAME ## _pixels16_8_c(dst+16, src[0]+16, stride, h);\
01340 }\
01341 void ff_ ## OPNAME ## _dirac_pixels8_l2_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01342 {\
01343     OPNAME ## _pixels8_l2_8(dst, src[0], src[1], stride, stride, stride, h);\
01344 }\
01345 void ff_ ## OPNAME ## _dirac_pixels16_l2_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01346 {\
01347     OPNAME ## _pixels16_l2_8(dst, src[0], src[1], stride, stride, stride, h);\
01348 }\
01349 void ff_ ## OPNAME ## _dirac_pixels32_l2_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01350 {\
01351     OPNAME ## _pixels16_l2_8(dst   , src[0]   , src[1]   , stride, stride, stride, h);\
01352     OPNAME ## _pixels16_l2_8(dst+16, src[0]+16, src[1]+16, stride, stride, stride, h);\
01353 }\
01354 void ff_ ## OPNAME ## _dirac_pixels8_l4_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01355 {\
01356     OPNAME ## _pixels8_l4_8(dst, src[0], src[1], src[2], src[3], stride, stride, stride, stride, stride, h);\
01357 }\
01358 void ff_ ## OPNAME ## _dirac_pixels16_l4_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01359 {\
01360     OPNAME ## _pixels16_l4_8(dst, src[0], src[1], src[2], src[3], stride, stride, stride, stride, stride, h);\
01361 }\
01362 void ff_ ## OPNAME ## _dirac_pixels32_l4_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
01363 {\
01364     OPNAME ## _pixels16_l4_8(dst   , src[0]   , src[1]   , src[2]   , src[3]   , stride, stride, stride, stride, stride, h);\
01365     OPNAME ## _pixels16_l4_8(dst+16, src[0]+16, src[1]+16, src[2]+16, src[3]+16, stride, stride, stride, stride, stride, h);\
01366 }
01367 DIRAC_MC(put)
01368 DIRAC_MC(avg)
01369 #endif
01370 
01371 static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
01372     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
01373     int i;
01374 
01375     for(i=0; i<w; i++){
01376         const int src_1= src[ -srcStride];
01377         const int src0 = src[0          ];
01378         const int src1 = src[  srcStride];
01379         const int src2 = src[2*srcStride];
01380         const int src3 = src[3*srcStride];
01381         const int src4 = src[4*srcStride];
01382         const int src5 = src[5*srcStride];
01383         const int src6 = src[6*srcStride];
01384         const int src7 = src[7*srcStride];
01385         const int src8 = src[8*srcStride];
01386         const int src9 = src[9*srcStride];
01387         dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
01388         dst[1*dstStride]= cm[(9*(src1 + src2) - (src0  + src3) + 8)>>4];
01389         dst[2*dstStride]= cm[(9*(src2 + src3) - (src1  + src4) + 8)>>4];
01390         dst[3*dstStride]= cm[(9*(src3 + src4) - (src2  + src5) + 8)>>4];
01391         dst[4*dstStride]= cm[(9*(src4 + src5) - (src3  + src6) + 8)>>4];
01392         dst[5*dstStride]= cm[(9*(src5 + src6) - (src4  + src7) + 8)>>4];
01393         dst[6*dstStride]= cm[(9*(src6 + src7) - (src5  + src8) + 8)>>4];
01394         dst[7*dstStride]= cm[(9*(src7 + src8) - (src6  + src9) + 8)>>4];
01395         src++;
01396         dst++;
01397     }
01398 }
01399 
01400 static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
01401     uint8_t half[64];
01402     wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
01403     put_pixels8_l2_8(dst, src, half, stride, stride, 8, 8);
01404 }
01405 
01406 static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
01407     wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
01408 }
01409 
01410 static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
01411     uint8_t half[64];
01412     wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
01413     put_pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);
01414 }
01415 
01416 static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
01417     wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
01418 }
01419 
01420 static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
01421     uint8_t halfH[88];
01422     uint8_t halfV[64];
01423     uint8_t halfHV[64];
01424     wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
01425     wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
01426     wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
01427     put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
01428 }
01429 static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
01430     uint8_t halfH[88];
01431     uint8_t halfV[64];
01432     uint8_t halfHV[64];
01433     wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
01434     wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
01435     wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
01436     put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
01437 }
01438 static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
01439     uint8_t halfH[88];
01440     wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
01441     wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
01442 }
01443 
01444 static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){
01445     if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
01446     int x;
01447     const int strength= ff_h263_loop_filter_strength[qscale];
01448 
01449     for(x=0; x<8; x++){
01450         int d1, d2, ad1;
01451         int p0= src[x-2*stride];
01452         int p1= src[x-1*stride];
01453         int p2= src[x+0*stride];
01454         int p3= src[x+1*stride];
01455         int d = (p0 - p3 + 4*(p2 - p1)) / 8;
01456 
01457         if     (d<-2*strength) d1= 0;
01458         else if(d<-  strength) d1=-2*strength - d;
01459         else if(d<   strength) d1= d;
01460         else if(d< 2*strength) d1= 2*strength - d;
01461         else                   d1= 0;
01462 
01463         p1 += d1;
01464         p2 -= d1;
01465         if(p1&256) p1= ~(p1>>31);
01466         if(p2&256) p2= ~(p2>>31);
01467 
01468         src[x-1*stride] = p1;
01469         src[x+0*stride] = p2;
01470 
01471         ad1= FFABS(d1)>>1;
01472 
01473         d2= av_clip((p0-p3)/4, -ad1, ad1);
01474 
01475         src[x-2*stride] = p0 - d2;
01476         src[x+  stride] = p3 + d2;
01477     }
01478     }
01479 }
01480 
01481 static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){
01482     if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
01483     int y;
01484     const int strength= ff_h263_loop_filter_strength[qscale];
01485 
01486     for(y=0; y<8; y++){
01487         int d1, d2, ad1;
01488         int p0= src[y*stride-2];
01489         int p1= src[y*stride-1];
01490         int p2= src[y*stride+0];
01491         int p3= src[y*stride+1];
01492         int d = (p0 - p3 + 4*(p2 - p1)) / 8;
01493 
01494         if     (d<-2*strength) d1= 0;
01495         else if(d<-  strength) d1=-2*strength - d;
01496         else if(d<   strength) d1= d;
01497         else if(d< 2*strength) d1= 2*strength - d;
01498         else                   d1= 0;
01499 
01500         p1 += d1;
01501         p2 -= d1;
01502         if(p1&256) p1= ~(p1>>31);
01503         if(p2&256) p2= ~(p2>>31);
01504 
01505         src[y*stride-1] = p1;
01506         src[y*stride+0] = p2;
01507 
01508         ad1= FFABS(d1)>>1;
01509 
01510         d2= av_clip((p0-p3)/4, -ad1, ad1);
01511 
01512         src[y*stride-2] = p0 - d2;
01513         src[y*stride+1] = p3 + d2;
01514     }
01515     }
01516 }
01517 
01518 static void h261_loop_filter_c(uint8_t *src, int stride){
01519     int x,y,xy,yz;
01520     int temp[64];
01521 
01522     for(x=0; x<8; x++){
01523         temp[x      ] = 4*src[x           ];
01524         temp[x + 7*8] = 4*src[x + 7*stride];
01525     }
01526     for(y=1; y<7; y++){
01527         for(x=0; x<8; x++){
01528             xy = y * stride + x;
01529             yz = y * 8 + x;
01530             temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride];
01531         }
01532     }
01533 
01534     for(y=0; y<8; y++){
01535         src[  y*stride] = (temp[  y*8] + 2)>>2;
01536         src[7+y*stride] = (temp[7+y*8] + 2)>>2;
01537         for(x=1; x<7; x++){
01538             xy = y * stride + x;
01539             yz = y * 8 + x;
01540             src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4;
01541         }
01542     }
01543 }
01544 
01545 static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01546 {
01547     int s, i;
01548 
01549     s = 0;
01550     for(i=0;i<h;i++) {
01551         s += abs(pix1[0] - pix2[0]);
01552         s += abs(pix1[1] - pix2[1]);
01553         s += abs(pix1[2] - pix2[2]);
01554         s += abs(pix1[3] - pix2[3]);
01555         s += abs(pix1[4] - pix2[4]);
01556         s += abs(pix1[5] - pix2[5]);
01557         s += abs(pix1[6] - pix2[6]);
01558         s += abs(pix1[7] - pix2[7]);
01559         s += abs(pix1[8] - pix2[8]);
01560         s += abs(pix1[9] - pix2[9]);
01561         s += abs(pix1[10] - pix2[10]);
01562         s += abs(pix1[11] - pix2[11]);
01563         s += abs(pix1[12] - pix2[12]);
01564         s += abs(pix1[13] - pix2[13]);
01565         s += abs(pix1[14] - pix2[14]);
01566         s += abs(pix1[15] - pix2[15]);
01567         pix1 += line_size;
01568         pix2 += line_size;
01569     }
01570     return s;
01571 }
01572 
01573 static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01574 {
01575     int s, i;
01576 
01577     s = 0;
01578     for(i=0;i<h;i++) {
01579         s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
01580         s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
01581         s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
01582         s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
01583         s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
01584         s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
01585         s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
01586         s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
01587         s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
01588         s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
01589         s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
01590         s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
01591         s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
01592         s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
01593         s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
01594         s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
01595         pix1 += line_size;
01596         pix2 += line_size;
01597     }
01598     return s;
01599 }
01600 
01601 static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01602 {
01603     int s, i;
01604     uint8_t *pix3 = pix2 + line_size;
01605 
01606     s = 0;
01607     for(i=0;i<h;i++) {
01608         s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
01609         s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
01610         s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
01611         s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
01612         s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
01613         s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
01614         s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
01615         s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
01616         s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
01617         s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
01618         s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
01619         s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
01620         s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
01621         s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
01622         s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
01623         s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
01624         pix1 += line_size;
01625         pix2 += line_size;
01626         pix3 += line_size;
01627     }
01628     return s;
01629 }
01630 
01631 static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01632 {
01633     int s, i;
01634     uint8_t *pix3 = pix2 + line_size;
01635 
01636     s = 0;
01637     for(i=0;i<h;i++) {
01638         s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
01639         s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
01640         s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
01641         s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
01642         s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
01643         s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
01644         s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
01645         s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
01646         s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
01647         s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
01648         s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
01649         s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
01650         s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
01651         s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
01652         s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
01653         s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
01654         pix1 += line_size;
01655         pix2 += line_size;
01656         pix3 += line_size;
01657     }
01658     return s;
01659 }
01660 
01661 static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01662 {
01663     int s, i;
01664 
01665     s = 0;
01666     for(i=0;i<h;i++) {
01667         s += abs(pix1[0] - pix2[0]);
01668         s += abs(pix1[1] - pix2[1]);
01669         s += abs(pix1[2] - pix2[2]);
01670         s += abs(pix1[3] - pix2[3]);
01671         s += abs(pix1[4] - pix2[4]);
01672         s += abs(pix1[5] - pix2[5]);
01673         s += abs(pix1[6] - pix2[6]);
01674         s += abs(pix1[7] - pix2[7]);
01675         pix1 += line_size;
01676         pix2 += line_size;
01677     }
01678     return s;
01679 }
01680 
01681 static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01682 {
01683     int s, i;
01684 
01685     s = 0;
01686     for(i=0;i<h;i++) {
01687         s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
01688         s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
01689         s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
01690         s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
01691         s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
01692         s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
01693         s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
01694         s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
01695         pix1 += line_size;
01696         pix2 += line_size;
01697     }
01698     return s;
01699 }
01700 
01701 static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01702 {
01703     int s, i;
01704     uint8_t *pix3 = pix2 + line_size;
01705 
01706     s = 0;
01707     for(i=0;i<h;i++) {
01708         s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
01709         s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
01710         s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
01711         s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
01712         s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
01713         s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
01714         s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
01715         s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
01716         pix1 += line_size;
01717         pix2 += line_size;
01718         pix3 += line_size;
01719     }
01720     return s;
01721 }
01722 
01723 static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01724 {
01725     int s, i;
01726     uint8_t *pix3 = pix2 + line_size;
01727 
01728     s = 0;
01729     for(i=0;i<h;i++) {
01730         s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
01731         s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
01732         s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
01733         s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
01734         s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
01735         s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
01736         s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
01737         s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
01738         pix1 += line_size;
01739         pix2 += line_size;
01740         pix3 += line_size;
01741     }
01742     return s;
01743 }
01744 
01745 static int nsse16_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
01746     MpegEncContext *c = v;
01747     int score1=0;
01748     int score2=0;
01749     int x,y;
01750 
01751     for(y=0; y<h; y++){
01752         for(x=0; x<16; x++){
01753             score1+= (s1[x  ] - s2[x ])*(s1[x  ] - s2[x ]);
01754         }
01755         if(y+1<h){
01756             for(x=0; x<15; x++){
01757                 score2+= FFABS(  s1[x  ] - s1[x  +stride]
01758                              - s1[x+1] + s1[x+1+stride])
01759                         -FFABS(  s2[x  ] - s2[x  +stride]
01760                              - s2[x+1] + s2[x+1+stride]);
01761             }
01762         }
01763         s1+= stride;
01764         s2+= stride;
01765     }
01766 
01767     if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
01768     else  return score1 + FFABS(score2)*8;
01769 }
01770 
01771 static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
01772     MpegEncContext *c = v;
01773     int score1=0;
01774     int score2=0;
01775     int x,y;
01776 
01777     for(y=0; y<h; y++){
01778         for(x=0; x<8; x++){
01779             score1+= (s1[x  ] - s2[x ])*(s1[x  ] - s2[x ]);
01780         }
01781         if(y+1<h){
01782             for(x=0; x<7; x++){
01783                 score2+= FFABS(  s1[x  ] - s1[x  +stride]
01784                              - s1[x+1] + s1[x+1+stride])
01785                         -FFABS(  s2[x  ] - s2[x  +stride]
01786                              - s2[x+1] + s2[x+1+stride]);
01787             }
01788         }
01789         s1+= stride;
01790         s2+= stride;
01791     }
01792 
01793     if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
01794     else  return score1 + FFABS(score2)*8;
01795 }
01796 
01797 static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){
01798     int i;
01799     unsigned int sum=0;
01800 
01801     for(i=0; i<8*8; i++){
01802         int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT));
01803         int w= weight[i];
01804         b>>= RECON_SHIFT;
01805         assert(-512<b && b<512);
01806 
01807         sum += (w*b)*(w*b)>>4;
01808     }
01809     return sum>>2;
01810 }
01811 
01812 static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){
01813     int i;
01814 
01815     for(i=0; i<8*8; i++){
01816         rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
01817     }
01818 }
01819 
01828 void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last)
01829 {
01830     int i;
01831     DCTELEM temp[64];
01832 
01833     if(last<=0) return;
01834     //if(permutation[1]==1) return; //FIXME it is ok but not clean and might fail for some permutations
01835 
01836     for(i=0; i<=last; i++){
01837         const int j= scantable[i];
01838         temp[j]= block[j];
01839         block[j]=0;
01840     }
01841 
01842     for(i=0; i<=last; i++){
01843         const int j= scantable[i];
01844         const int perm_j= permutation[j];
01845         block[perm_j]= temp[j];
01846     }
01847 }
01848 
01849 static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
01850     return 0;
01851 }
01852 
01853 void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
01854     int i;
01855 
01856     memset(cmp, 0, sizeof(void*)*6);
01857 
01858     for(i=0; i<6; i++){
01859         switch(type&0xFF){
01860         case FF_CMP_SAD:
01861             cmp[i]= c->sad[i];
01862             break;
01863         case FF_CMP_SATD:
01864             cmp[i]= c->hadamard8_diff[i];
01865             break;
01866         case FF_CMP_SSE:
01867             cmp[i]= c->sse[i];
01868             break;
01869         case FF_CMP_DCT:
01870             cmp[i]= c->dct_sad[i];
01871             break;
01872         case FF_CMP_DCT264:
01873             cmp[i]= c->dct264_sad[i];
01874             break;
01875         case FF_CMP_DCTMAX:
01876             cmp[i]= c->dct_max[i];
01877             break;
01878         case FF_CMP_PSNR:
01879             cmp[i]= c->quant_psnr[i];
01880             break;
01881         case FF_CMP_BIT:
01882             cmp[i]= c->bit[i];
01883             break;
01884         case FF_CMP_RD:
01885             cmp[i]= c->rd[i];
01886             break;
01887         case FF_CMP_VSAD:
01888             cmp[i]= c->vsad[i];
01889             break;
01890         case FF_CMP_VSSE:
01891             cmp[i]= c->vsse[i];
01892             break;
01893         case FF_CMP_ZERO:
01894             cmp[i]= zero_cmp;
01895             break;
01896         case FF_CMP_NSSE:
01897             cmp[i]= c->nsse[i];
01898             break;
01899 #if CONFIG_DWT
01900         case FF_CMP_W53:
01901             cmp[i]= c->w53[i];
01902             break;
01903         case FF_CMP_W97:
01904             cmp[i]= c->w97[i];
01905             break;
01906 #endif
01907         default:
01908             av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n");
01909         }
01910     }
01911 }
01912 
01913 static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
01914     long i;
01915     for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
01916         long a = *(long*)(src+i);
01917         long b = *(long*)(dst+i);
01918         *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
01919     }
01920     for(; i<w; i++)
01921         dst[i+0] += src[i+0];
01922 }
01923 
01924 static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
01925     long i;
01926 #if !HAVE_FAST_UNALIGNED
01927     if((long)src2 & (sizeof(long)-1)){
01928         for(i=0; i+7<w; i+=8){
01929             dst[i+0] = src1[i+0]-src2[i+0];
01930             dst[i+1] = src1[i+1]-src2[i+1];
01931             dst[i+2] = src1[i+2]-src2[i+2];
01932             dst[i+3] = src1[i+3]-src2[i+3];
01933             dst[i+4] = src1[i+4]-src2[i+4];
01934             dst[i+5] = src1[i+5]-src2[i+5];
01935             dst[i+6] = src1[i+6]-src2[i+6];
01936             dst[i+7] = src1[i+7]-src2[i+7];
01937         }
01938     }else
01939 #endif
01940     for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
01941         long a = *(long*)(src1+i);
01942         long b = *(long*)(src2+i);
01943         *(long*)(dst+i) = ((a|pb_80) - (b&pb_7f)) ^ ((a^b^pb_80)&pb_80);
01944     }
01945     for(; i<w; i++)
01946         dst[i+0] = src1[i+0]-src2[i+0];
01947 }
01948 
01949 static void add_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *diff, int w, int *left, int *left_top){
01950     int i;
01951     uint8_t l, lt;
01952 
01953     l= *left;
01954     lt= *left_top;
01955 
01956     for(i=0; i<w; i++){
01957         l= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF) + diff[i];
01958         lt= src1[i];
01959         dst[i]= l;
01960     }
01961 
01962     *left= l;
01963     *left_top= lt;
01964 }
01965 
01966 static void sub_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top){
01967     int i;
01968     uint8_t l, lt;
01969 
01970     l= *left;
01971     lt= *left_top;
01972 
01973     for(i=0; i<w; i++){
01974         const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF);
01975         lt= src1[i];
01976         l= src2[i];
01977         dst[i]= l - pred;
01978     }
01979 
01980     *left= l;
01981     *left_top= lt;
01982 }
01983 
01984 static int add_hfyu_left_prediction_c(uint8_t *dst, const uint8_t *src, int w, int acc){
01985     int i;
01986 
01987     for(i=0; i<w-1; i++){
01988         acc+= src[i];
01989         dst[i]= acc;
01990         i++;
01991         acc+= src[i];
01992         dst[i]= acc;
01993     }
01994 
01995     for(; i<w; i++){
01996         acc+= src[i];
01997         dst[i]= acc;
01998     }
01999 
02000     return acc;
02001 }
02002 
02003 #if HAVE_BIGENDIAN
02004 #define B 3
02005 #define G 2
02006 #define R 1
02007 #define A 0
02008 #else
02009 #define B 0
02010 #define G 1
02011 #define R 2
02012 #define A 3
02013 #endif
02014 static void add_hfyu_left_prediction_bgr32_c(uint8_t *dst, const uint8_t *src, int w, int *red, int *green, int *blue, int *alpha){
02015     int i;
02016     int r,g,b,a;
02017     r= *red;
02018     g= *green;
02019     b= *blue;
02020     a= *alpha;
02021 
02022     for(i=0; i<w; i++){
02023         b+= src[4*i+B];
02024         g+= src[4*i+G];
02025         r+= src[4*i+R];
02026         a+= src[4*i+A];
02027 
02028         dst[4*i+B]= b;
02029         dst[4*i+G]= g;
02030         dst[4*i+R]= r;
02031         dst[4*i+A]= a;
02032     }
02033 
02034     *red= r;
02035     *green= g;
02036     *blue= b;
02037     *alpha= a;
02038 }
02039 #undef B
02040 #undef G
02041 #undef R
02042 #undef A
02043 
02044 #define BUTTERFLY2(o1,o2,i1,i2) \
02045 o1= (i1)+(i2);\
02046 o2= (i1)-(i2);
02047 
02048 #define BUTTERFLY1(x,y) \
02049 {\
02050     int a,b;\
02051     a= x;\
02052     b= y;\
02053     x= a+b;\
02054     y= a-b;\
02055 }
02056 
02057 #define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y)))
02058 
02059 static int hadamard8_diff8x8_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){
02060     int i;
02061     int temp[64];
02062     int sum=0;
02063 
02064     assert(h==8);
02065 
02066     for(i=0; i<8; i++){
02067         //FIXME try pointer walks
02068         BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
02069         BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
02070         BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
02071         BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
02072 
02073         BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
02074         BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
02075         BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
02076         BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
02077 
02078         BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
02079         BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
02080         BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
02081         BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
02082     }
02083 
02084     for(i=0; i<8; i++){
02085         BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
02086         BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
02087         BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
02088         BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
02089 
02090         BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
02091         BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
02092         BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
02093         BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
02094 
02095         sum +=
02096              BUTTERFLYA(temp[8*0+i], temp[8*4+i])
02097             +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
02098             +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
02099             +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
02100     }
02101     return sum;
02102 }
02103 
02104 static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s, uint8_t *src, uint8_t *dummy, int stride, int h){
02105     int i;
02106     int temp[64];
02107     int sum=0;
02108 
02109     assert(h==8);
02110 
02111     for(i=0; i<8; i++){
02112         //FIXME try pointer walks
02113         BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]);
02114         BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]);
02115         BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]);
02116         BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]);
02117 
02118         BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
02119         BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
02120         BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
02121         BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
02122 
02123         BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
02124         BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
02125         BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
02126         BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
02127     }
02128 
02129     for(i=0; i<8; i++){
02130         BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
02131         BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
02132         BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
02133         BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
02134 
02135         BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
02136         BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
02137         BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
02138         BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
02139 
02140         sum +=
02141              BUTTERFLYA(temp[8*0+i], temp[8*4+i])
02142             +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
02143             +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
02144             +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
02145     }
02146 
02147     sum -= FFABS(temp[8*0] + temp[8*4]); // -mean
02148 
02149     return sum;
02150 }
02151 
02152 static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02153     MpegEncContext * const s= (MpegEncContext *)c;
02154     LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
02155 
02156     assert(h==8);
02157 
02158     s->dsp.diff_pixels(temp, src1, src2, stride);
02159     s->dsp.fdct(temp);
02160     return s->dsp.sum_abs_dctelem(temp);
02161 }
02162 
02163 #if CONFIG_GPL
02164 #define DCT8_1D {\
02165     const int s07 = SRC(0) + SRC(7);\
02166     const int s16 = SRC(1) + SRC(6);\
02167     const int s25 = SRC(2) + SRC(5);\
02168     const int s34 = SRC(3) + SRC(4);\
02169     const int a0 = s07 + s34;\
02170     const int a1 = s16 + s25;\
02171     const int a2 = s07 - s34;\
02172     const int a3 = s16 - s25;\
02173     const int d07 = SRC(0) - SRC(7);\
02174     const int d16 = SRC(1) - SRC(6);\
02175     const int d25 = SRC(2) - SRC(5);\
02176     const int d34 = SRC(3) - SRC(4);\
02177     const int a4 = d16 + d25 + (d07 + (d07>>1));\
02178     const int a5 = d07 - d34 - (d25 + (d25>>1));\
02179     const int a6 = d07 + d34 - (d16 + (d16>>1));\
02180     const int a7 = d16 - d25 + (d34 + (d34>>1));\
02181     DST(0,  a0 + a1     ) ;\
02182     DST(1,  a4 + (a7>>2)) ;\
02183     DST(2,  a2 + (a3>>1)) ;\
02184     DST(3,  a5 + (a6>>2)) ;\
02185     DST(4,  a0 - a1     ) ;\
02186     DST(5,  a6 - (a5>>2)) ;\
02187     DST(6, (a2>>1) - a3 ) ;\
02188     DST(7, (a4>>2) - a7 ) ;\
02189 }
02190 
02191 static int dct264_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02192     MpegEncContext * const s= (MpegEncContext *)c;
02193     DCTELEM dct[8][8];
02194     int i;
02195     int sum=0;
02196 
02197     s->dsp.diff_pixels(dct[0], src1, src2, stride);
02198 
02199 #define SRC(x) dct[i][x]
02200 #define DST(x,v) dct[i][x]= v
02201     for( i = 0; i < 8; i++ )
02202         DCT8_1D
02203 #undef SRC
02204 #undef DST
02205 
02206 #define SRC(x) dct[x][i]
02207 #define DST(x,v) sum += FFABS(v)
02208     for( i = 0; i < 8; i++ )
02209         DCT8_1D
02210 #undef SRC
02211 #undef DST
02212     return sum;
02213 }
02214 #endif
02215 
02216 static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02217     MpegEncContext * const s= (MpegEncContext *)c;
02218     LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
02219     int sum=0, i;
02220 
02221     assert(h==8);
02222 
02223     s->dsp.diff_pixels(temp, src1, src2, stride);
02224     s->dsp.fdct(temp);
02225 
02226     for(i=0; i<64; i++)
02227         sum= FFMAX(sum, FFABS(temp[i]));
02228 
02229     return sum;
02230 }
02231 
02232 static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02233     MpegEncContext * const s= (MpegEncContext *)c;
02234     LOCAL_ALIGNED_16(DCTELEM, temp, [64*2]);
02235     DCTELEM * const bak = temp+64;
02236     int sum=0, i;
02237 
02238     assert(h==8);
02239     s->mb_intra=0;
02240 
02241     s->dsp.diff_pixels(temp, src1, src2, stride);
02242 
02243     memcpy(bak, temp, 64*sizeof(DCTELEM));
02244 
02245     s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
02246     s->dct_unquantize_inter(s, temp, 0, s->qscale);
02247     ff_simple_idct_8(temp); //FIXME
02248 
02249     for(i=0; i<64; i++)
02250         sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
02251 
02252     return sum;
02253 }
02254 
02255 static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02256     MpegEncContext * const s= (MpegEncContext *)c;
02257     const uint8_t *scantable= s->intra_scantable.permutated;
02258     LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
02259     LOCAL_ALIGNED_16(uint8_t, lsrc1, [64]);
02260     LOCAL_ALIGNED_16(uint8_t, lsrc2, [64]);
02261     int i, last, run, bits, level, distortion, start_i;
02262     const int esc_length= s->ac_esc_length;
02263     uint8_t * length;
02264     uint8_t * last_length;
02265 
02266     assert(h==8);
02267 
02268     copy_block8(lsrc1, src1, 8, stride, 8);
02269     copy_block8(lsrc2, src2, 8, stride, 8);
02270 
02271     s->dsp.diff_pixels(temp, lsrc1, lsrc2, 8);
02272 
02273     s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
02274 
02275     bits=0;
02276 
02277     if (s->mb_intra) {
02278         start_i = 1;
02279         length     = s->intra_ac_vlc_length;
02280         last_length= s->intra_ac_vlc_last_length;
02281         bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
02282     } else {
02283         start_i = 0;
02284         length     = s->inter_ac_vlc_length;
02285         last_length= s->inter_ac_vlc_last_length;
02286     }
02287 
02288     if(last>=start_i){
02289         run=0;
02290         for(i=start_i; i<last; i++){
02291             int j= scantable[i];
02292             level= temp[j];
02293 
02294             if(level){
02295                 level+=64;
02296                 if((level&(~127)) == 0){
02297                     bits+= length[UNI_AC_ENC_INDEX(run, level)];
02298                 }else
02299                     bits+= esc_length;
02300                 run=0;
02301             }else
02302                 run++;
02303         }
02304         i= scantable[last];
02305 
02306         level= temp[i] + 64;
02307 
02308         assert(level - 64);
02309 
02310         if((level&(~127)) == 0){
02311             bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
02312         }else
02313             bits+= esc_length;
02314 
02315     }
02316 
02317     if(last>=0){
02318         if(s->mb_intra)
02319             s->dct_unquantize_intra(s, temp, 0, s->qscale);
02320         else
02321             s->dct_unquantize_inter(s, temp, 0, s->qscale);
02322     }
02323 
02324     s->dsp.idct_add(lsrc2, 8, temp);
02325 
02326     distortion= s->dsp.sse[1](NULL, lsrc2, lsrc1, 8, 8);
02327 
02328     return distortion + ((bits*s->qscale*s->qscale*109 + 64)>>7);
02329 }
02330 
02331 static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02332     MpegEncContext * const s= (MpegEncContext *)c;
02333     const uint8_t *scantable= s->intra_scantable.permutated;
02334     LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
02335     int i, last, run, bits, level, start_i;
02336     const int esc_length= s->ac_esc_length;
02337     uint8_t * length;
02338     uint8_t * last_length;
02339 
02340     assert(h==8);
02341 
02342     s->dsp.diff_pixels(temp, src1, src2, stride);
02343 
02344     s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
02345 
02346     bits=0;
02347 
02348     if (s->mb_intra) {
02349         start_i = 1;
02350         length     = s->intra_ac_vlc_length;
02351         last_length= s->intra_ac_vlc_last_length;
02352         bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
02353     } else {
02354         start_i = 0;
02355         length     = s->inter_ac_vlc_length;
02356         last_length= s->inter_ac_vlc_last_length;
02357     }
02358 
02359     if(last>=start_i){
02360         run=0;
02361         for(i=start_i; i<last; i++){
02362             int j= scantable[i];
02363             level= temp[j];
02364 
02365             if(level){
02366                 level+=64;
02367                 if((level&(~127)) == 0){
02368                     bits+= length[UNI_AC_ENC_INDEX(run, level)];
02369                 }else
02370                     bits+= esc_length;
02371                 run=0;
02372             }else
02373                 run++;
02374         }
02375         i= scantable[last];
02376 
02377         level= temp[i] + 64;
02378 
02379         assert(level - 64);
02380 
02381         if((level&(~127)) == 0){
02382             bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
02383         }else
02384             bits+= esc_length;
02385     }
02386 
02387     return bits;
02388 }
02389 
02390 #define VSAD_INTRA(size) \
02391 static int vsad_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
02392     int score=0;                                                                                            \
02393     int x,y;                                                                                                \
02394                                                                                                             \
02395     for(y=1; y<h; y++){                                                                                     \
02396         for(x=0; x<size; x+=4){                                                                             \
02397             score+= FFABS(s[x  ] - s[x  +stride]) + FFABS(s[x+1] - s[x+1+stride])                           \
02398                    +FFABS(s[x+2] - s[x+2+stride]) + FFABS(s[x+3] - s[x+3+stride]);                          \
02399         }                                                                                                   \
02400         s+= stride;                                                                                         \
02401     }                                                                                                       \
02402                                                                                                             \
02403     return score;                                                                                           \
02404 }
02405 VSAD_INTRA(8)
02406 VSAD_INTRA(16)
02407 
02408 static int vsad16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
02409     int score=0;
02410     int x,y;
02411 
02412     for(y=1; y<h; y++){
02413         for(x=0; x<16; x++){
02414             score+= FFABS(s1[x  ] - s2[x ] - s1[x  +stride] + s2[x +stride]);
02415         }
02416         s1+= stride;
02417         s2+= stride;
02418     }
02419 
02420     return score;
02421 }
02422 
02423 #define SQ(a) ((a)*(a))
02424 #define VSSE_INTRA(size) \
02425 static int vsse_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
02426     int score=0;                                                                                            \
02427     int x,y;                                                                                                \
02428                                                                                                             \
02429     for(y=1; y<h; y++){                                                                                     \
02430         for(x=0; x<size; x+=4){                                                                               \
02431             score+= SQ(s[x  ] - s[x  +stride]) + SQ(s[x+1] - s[x+1+stride])                                 \
02432                    +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]);                                \
02433         }                                                                                                   \
02434         s+= stride;                                                                                         \
02435     }                                                                                                       \
02436                                                                                                             \
02437     return score;                                                                                           \
02438 }
02439 VSSE_INTRA(8)
02440 VSSE_INTRA(16)
02441 
02442 static int vsse16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
02443     int score=0;
02444     int x,y;
02445 
02446     for(y=1; y<h; y++){
02447         for(x=0; x<16; x++){
02448             score+= SQ(s1[x  ] - s2[x ] - s1[x  +stride] + s2[x +stride]);
02449         }
02450         s1+= stride;
02451         s2+= stride;
02452     }
02453 
02454     return score;
02455 }
02456 
02457 static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2,
02458                                int size){
02459     int score=0;
02460     int i;
02461     for(i=0; i<size; i++)
02462         score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]);
02463     return score;
02464 }
02465 
02466 WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
02467 WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
02468 WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
02469 #if CONFIG_GPL
02470 WRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c)
02471 #endif
02472 WRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c)
02473 WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
02474 WRAPPER8_16_SQ(rd8x8_c, rd16_c)
02475 WRAPPER8_16_SQ(bit8x8_c, bit16_c)
02476 
02477 static void vector_fmul_c(float *dst, const float *src0, const float *src1, int len){
02478     int i;
02479     for(i=0; i<len; i++)
02480         dst[i] = src0[i] * src1[i];
02481 }
02482 
02483 static void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1, int len){
02484     int i;
02485     src1 += len-1;
02486     for(i=0; i<len; i++)
02487         dst[i] = src0[i] * src1[-i];
02488 }
02489 
02490 static void vector_fmul_add_c(float *dst, const float *src0, const float *src1, const float *src2, int len){
02491     int i;
02492     for(i=0; i<len; i++)
02493         dst[i] = src0[i] * src1[i] + src2[i];
02494 }
02495 
02496 static void vector_fmul_window_c(float *dst, const float *src0,
02497                                  const float *src1, const float *win, int len)
02498 {
02499     int i,j;
02500     dst += len;
02501     win += len;
02502     src0+= len;
02503     for(i=-len, j=len-1; i<0; i++, j--) {
02504         float s0 = src0[i];
02505         float s1 = src1[j];
02506         float wi = win[i];
02507         float wj = win[j];
02508         dst[i] = s0*wj - s1*wi;
02509         dst[j] = s0*wi + s1*wj;
02510     }
02511 }
02512 
02513 static void vector_fmul_scalar_c(float *dst, const float *src, float mul,
02514                                  int len)
02515 {
02516     int i;
02517     for (i = 0; i < len; i++)
02518         dst[i] = src[i] * mul;
02519 }
02520 
02521 static void vector_fmac_scalar_c(float *dst, const float *src, float mul,
02522                                  int len)
02523 {
02524     int i;
02525     for (i = 0; i < len; i++)
02526         dst[i] += src[i] * mul;
02527 }
02528 
02529 static void butterflies_float_c(float *restrict v1, float *restrict v2,
02530                                 int len)
02531 {
02532     int i;
02533     for (i = 0; i < len; i++) {
02534         float t = v1[i] - v2[i];
02535         v1[i] += v2[i];
02536         v2[i] = t;
02537     }
02538 }
02539 
02540 static void butterflies_float_interleave_c(float *dst, const float *src0,
02541                                            const float *src1, int len)
02542 {
02543     int i;
02544     for (i = 0; i < len; i++) {
02545         float f1 = src0[i];
02546         float f2 = src1[i];
02547         dst[2*i    ] = f1 + f2;
02548         dst[2*i + 1] = f1 - f2;
02549     }
02550 }
02551 
02552 static float scalarproduct_float_c(const float *v1, const float *v2, int len)
02553 {
02554     float p = 0.0;
02555     int i;
02556 
02557     for (i = 0; i < len; i++)
02558         p += v1[i] * v2[i];
02559 
02560     return p;
02561 }
02562 
02563 static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini,
02564                    uint32_t maxi, uint32_t maxisign)
02565 {
02566 
02567     if(a > mini) return mini;
02568     else if((a^(1U<<31)) > maxisign) return maxi;
02569     else return a;
02570 }
02571 
02572 static void vector_clipf_c_opposite_sign(float *dst, const float *src, float *min, float *max, int len){
02573     int i;
02574     uint32_t mini = *(uint32_t*)min;
02575     uint32_t maxi = *(uint32_t*)max;
02576     uint32_t maxisign = maxi ^ (1U<<31);
02577     uint32_t *dsti = (uint32_t*)dst;
02578     const uint32_t *srci = (const uint32_t*)src;
02579     for(i=0; i<len; i+=8) {
02580         dsti[i + 0] = clipf_c_one(srci[i + 0], mini, maxi, maxisign);
02581         dsti[i + 1] = clipf_c_one(srci[i + 1], mini, maxi, maxisign);
02582         dsti[i + 2] = clipf_c_one(srci[i + 2], mini, maxi, maxisign);
02583         dsti[i + 3] = clipf_c_one(srci[i + 3], mini, maxi, maxisign);
02584         dsti[i + 4] = clipf_c_one(srci[i + 4], mini, maxi, maxisign);
02585         dsti[i + 5] = clipf_c_one(srci[i + 5], mini, maxi, maxisign);
02586         dsti[i + 6] = clipf_c_one(srci[i + 6], mini, maxi, maxisign);
02587         dsti[i + 7] = clipf_c_one(srci[i + 7], mini, maxi, maxisign);
02588     }
02589 }
02590 static void vector_clipf_c(float *dst, const float *src, float min, float max, int len){
02591     int i;
02592     if(min < 0 && max > 0) {
02593         vector_clipf_c_opposite_sign(dst, src, &min, &max, len);
02594     } else {
02595         for(i=0; i < len; i+=8) {
02596             dst[i    ] = av_clipf(src[i    ], min, max);
02597             dst[i + 1] = av_clipf(src[i + 1], min, max);
02598             dst[i + 2] = av_clipf(src[i + 2], min, max);
02599             dst[i + 3] = av_clipf(src[i + 3], min, max);
02600             dst[i + 4] = av_clipf(src[i + 4], min, max);
02601             dst[i + 5] = av_clipf(src[i + 5], min, max);
02602             dst[i + 6] = av_clipf(src[i + 6], min, max);
02603             dst[i + 7] = av_clipf(src[i + 7], min, max);
02604         }
02605     }
02606 }
02607 
02608 static int32_t scalarproduct_int16_c(const int16_t * v1, const int16_t * v2, int order, int shift)
02609 {
02610     int res = 0;
02611 
02612     while (order--)
02613         res += (*v1++ * *v2++) >> shift;
02614 
02615     return res;
02616 }
02617 
02618 static int32_t scalarproduct_and_madd_int16_c(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul)
02619 {
02620     int res = 0;
02621     while (order--) {
02622         res   += *v1 * *v2++;
02623         *v1++ += mul * *v3++;
02624     }
02625     return res;
02626 }
02627 
02628 static void apply_window_int16_c(int16_t *output, const int16_t *input,
02629                                  const int16_t *window, unsigned int len)
02630 {
02631     int i;
02632     int len2 = len >> 1;
02633 
02634     for (i = 0; i < len2; i++) {
02635         int16_t w       = window[i];
02636         output[i]       = (MUL16(input[i],       w) + (1 << 14)) >> 15;
02637         output[len-i-1] = (MUL16(input[len-i-1], w) + (1 << 14)) >> 15;
02638     }
02639 }
02640 
02641 static void vector_clip_int32_c(int32_t *dst, const int32_t *src, int32_t min,
02642                                 int32_t max, unsigned int len)
02643 {
02644     do {
02645         *dst++ = av_clip(*src++, min, max);
02646         *dst++ = av_clip(*src++, min, max);
02647         *dst++ = av_clip(*src++, min, max);
02648         *dst++ = av_clip(*src++, min, max);
02649         *dst++ = av_clip(*src++, min, max);
02650         *dst++ = av_clip(*src++, min, max);
02651         *dst++ = av_clip(*src++, min, max);
02652         *dst++ = av_clip(*src++, min, max);
02653         len -= 8;
02654     } while (len > 0);
02655 }
02656 
02657 #define W0 2048
02658 #define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */
02659 #define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */
02660 #define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */
02661 #define W4 2048 /* 2048*sqrt (2)*cos (4*pi/16) */
02662 #define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */
02663 #define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */
02664 #define W7 565  /* 2048*sqrt (2)*cos (7*pi/16) */
02665 
02666 static void wmv2_idct_row(short * b)
02667 {
02668     int s1,s2;
02669     int a0,a1,a2,a3,a4,a5,a6,a7;
02670     /*step 1*/
02671     a1 = W1*b[1]+W7*b[7];
02672     a7 = W7*b[1]-W1*b[7];
02673     a5 = W5*b[5]+W3*b[3];
02674     a3 = W3*b[5]-W5*b[3];
02675     a2 = W2*b[2]+W6*b[6];
02676     a6 = W6*b[2]-W2*b[6];
02677     a0 = W0*b[0]+W0*b[4];
02678     a4 = W0*b[0]-W0*b[4];
02679     /*step 2*/
02680     s1 = (181*(a1-a5+a7-a3)+128)>>8;//1,3,5,7,
02681     s2 = (181*(a1-a5-a7+a3)+128)>>8;
02682     /*step 3*/
02683     b[0] = (a0+a2+a1+a5 + (1<<7))>>8;
02684     b[1] = (a4+a6 +s1   + (1<<7))>>8;
02685     b[2] = (a4-a6 +s2   + (1<<7))>>8;
02686     b[3] = (a0-a2+a7+a3 + (1<<7))>>8;
02687     b[4] = (a0-a2-a7-a3 + (1<<7))>>8;
02688     b[5] = (a4-a6 -s2   + (1<<7))>>8;
02689     b[6] = (a4+a6 -s1   + (1<<7))>>8;
02690     b[7] = (a0+a2-a1-a5 + (1<<7))>>8;
02691 }
02692 static void wmv2_idct_col(short * b)
02693 {
02694     int s1,s2;
02695     int a0,a1,a2,a3,a4,a5,a6,a7;
02696     /*step 1, with extended precision*/
02697     a1 = (W1*b[8*1]+W7*b[8*7] + 4)>>3;
02698     a7 = (W7*b[8*1]-W1*b[8*7] + 4)>>3;
02699     a5 = (W5*b[8*5]+W3*b[8*3] + 4)>>3;
02700     a3 = (W3*b[8*5]-W5*b[8*3] + 4)>>3;
02701     a2 = (W2*b[8*2]+W6*b[8*6] + 4)>>3;
02702     a6 = (W6*b[8*2]-W2*b[8*6] + 4)>>3;
02703     a0 = (W0*b[8*0]+W0*b[8*4]    )>>3;
02704     a4 = (W0*b[8*0]-W0*b[8*4]    )>>3;
02705     /*step 2*/
02706     s1 = (181*(a1-a5+a7-a3)+128)>>8;
02707     s2 = (181*(a1-a5-a7+a3)+128)>>8;
02708     /*step 3*/
02709     b[8*0] = (a0+a2+a1+a5 + (1<<13))>>14;
02710     b[8*1] = (a4+a6 +s1   + (1<<13))>>14;
02711     b[8*2] = (a4-a6 +s2   + (1<<13))>>14;
02712     b[8*3] = (a0-a2+a7+a3 + (1<<13))>>14;
02713 
02714     b[8*4] = (a0-a2-a7-a3 + (1<<13))>>14;
02715     b[8*5] = (a4-a6 -s2   + (1<<13))>>14;
02716     b[8*6] = (a4+a6 -s1   + (1<<13))>>14;
02717     b[8*7] = (a0+a2-a1-a5 + (1<<13))>>14;
02718 }
02719 void ff_wmv2_idct_c(short * block){
02720     int i;
02721 
02722     for(i=0;i<64;i+=8){
02723         wmv2_idct_row(block+i);
02724     }
02725     for(i=0;i<8;i++){
02726         wmv2_idct_col(block+i);
02727     }
02728 }
02729 /* XXX: those functions should be suppressed ASAP when all IDCTs are
02730  converted */
02731 static void ff_wmv2_idct_put_c(uint8_t *dest, int line_size, DCTELEM *block)
02732 {
02733     ff_wmv2_idct_c(block);
02734     ff_put_pixels_clamped_c(block, dest, line_size);
02735 }
02736 static void ff_wmv2_idct_add_c(uint8_t *dest, int line_size, DCTELEM *block)
02737 {
02738     ff_wmv2_idct_c(block);
02739     ff_add_pixels_clamped_c(block, dest, line_size);
02740 }
02741 static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
02742 {
02743     j_rev_dct (block);
02744     ff_put_pixels_clamped_c(block, dest, line_size);
02745 }
02746 static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
02747 {
02748     j_rev_dct (block);
02749     ff_add_pixels_clamped_c(block, dest, line_size);
02750 }
02751 
02752 static void ff_jref_idct4_put(uint8_t *dest, int line_size, DCTELEM *block)
02753 {
02754     j_rev_dct4 (block);
02755     put_pixels_clamped4_c(block, dest, line_size);
02756 }
02757 static void ff_jref_idct4_add(uint8_t *dest, int line_size, DCTELEM *block)
02758 {
02759     j_rev_dct4 (block);
02760     add_pixels_clamped4_c(block, dest, line_size);
02761 }
02762 
02763 static void ff_jref_idct2_put(uint8_t *dest, int line_size, DCTELEM *block)
02764 {
02765     j_rev_dct2 (block);
02766     put_pixels_clamped2_c(block, dest, line_size);
02767 }
02768 static void ff_jref_idct2_add(uint8_t *dest, int line_size, DCTELEM *block)
02769 {
02770     j_rev_dct2 (block);
02771     add_pixels_clamped2_c(block, dest, line_size);
02772 }
02773 
02774 static void ff_jref_idct1_put(uint8_t *dest, int line_size, DCTELEM *block)
02775 {
02776     dest[0] = av_clip_uint8((block[0] + 4)>>3);
02777 }
02778 static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block)
02779 {
02780     dest[0] = av_clip_uint8(dest[0] + ((block[0] + 4)>>3));
02781 }
02782 
02783 static void just_return(void *mem av_unused, int stride av_unused, int h av_unused) { return; }
02784 
02785 /* init static data */
02786 av_cold void dsputil_static_init(void)
02787 {
02788     int i;
02789 
02790     for(i=0;i<256;i++) ff_cropTbl[i + MAX_NEG_CROP] = i;
02791     for(i=0;i<MAX_NEG_CROP;i++) {
02792         ff_cropTbl[i] = 0;
02793         ff_cropTbl[i + MAX_NEG_CROP + 256] = 255;
02794     }
02795 
02796     for(i=0;i<512;i++) {
02797         ff_squareTbl[i] = (i - 256) * (i - 256);
02798     }
02799 
02800     for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
02801 }
02802 
02803 int ff_check_alignment(void){
02804     static int did_fail=0;
02805     LOCAL_ALIGNED_16(int, aligned, [4]);
02806 
02807     if((intptr_t)aligned & 15){
02808         if(!did_fail){
02809 #if HAVE_MMX || HAVE_ALTIVEC
02810             av_log(NULL, AV_LOG_ERROR,
02811                 "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
02812                 "and may be very slow or crash. This is not a bug in libavcodec,\n"
02813                 "but in the compiler. You may try recompiling using gcc >= 4.2.\n"
02814                 "Do not report crashes to FFmpeg developers.\n");
02815 #endif
02816             did_fail=1;
02817         }
02818         return -1;
02819     }
02820     return 0;
02821 }
02822 
02823 av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
02824 {
02825     int i;
02826 
02827     ff_check_alignment();
02828 
02829 #if CONFIG_ENCODERS
02830     if (avctx->bits_per_raw_sample == 10) {
02831         c->fdct    = ff_jpeg_fdct_islow_10;
02832         c->fdct248 = ff_fdct248_islow_10;
02833     } else {
02834         if(avctx->dct_algo==FF_DCT_FASTINT) {
02835             c->fdct    = fdct_ifast;
02836             c->fdct248 = fdct_ifast248;
02837         }
02838         else if(avctx->dct_algo==FF_DCT_FAAN) {
02839             c->fdct    = ff_faandct;
02840             c->fdct248 = ff_faandct248;
02841         }
02842         else {
02843             c->fdct    = ff_jpeg_fdct_islow_8; //slow/accurate/default
02844             c->fdct248 = ff_fdct248_islow_8;
02845         }
02846     }
02847 #endif //CONFIG_ENCODERS
02848 
02849     if(avctx->lowres==1){
02850         c->idct_put= ff_jref_idct4_put;
02851         c->idct_add= ff_jref_idct4_add;
02852         c->idct    = j_rev_dct4;
02853         c->idct_permutation_type= FF_NO_IDCT_PERM;
02854     }else if(avctx->lowres==2){
02855         c->idct_put= ff_jref_idct2_put;
02856         c->idct_add= ff_jref_idct2_add;
02857         c->idct    = j_rev_dct2;
02858         c->idct_permutation_type= FF_NO_IDCT_PERM;
02859     }else if(avctx->lowres==3){
02860         c->idct_put= ff_jref_idct1_put;
02861         c->idct_add= ff_jref_idct1_add;
02862         c->idct    = j_rev_dct1;
02863         c->idct_permutation_type= FF_NO_IDCT_PERM;
02864     }else{
02865         if (avctx->bits_per_raw_sample == 10) {
02866             c->idct_put              = ff_simple_idct_put_10;
02867             c->idct_add              = ff_simple_idct_add_10;
02868             c->idct                  = ff_simple_idct_10;
02869             c->idct_permutation_type = FF_NO_IDCT_PERM;
02870         } else {
02871         if(avctx->idct_algo==FF_IDCT_INT){
02872             c->idct_put= ff_jref_idct_put;
02873             c->idct_add= ff_jref_idct_add;
02874             c->idct    = j_rev_dct;
02875             c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
02876         }else if((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER || CONFIG_VP6_DECODER ) &&
02877                 avctx->idct_algo==FF_IDCT_VP3){
02878             c->idct_put= ff_vp3_idct_put_c;
02879             c->idct_add= ff_vp3_idct_add_c;
02880             c->idct    = ff_vp3_idct_c;
02881             c->idct_permutation_type= FF_NO_IDCT_PERM;
02882         }else if(avctx->idct_algo==FF_IDCT_WMV2){
02883             c->idct_put= ff_wmv2_idct_put_c;
02884             c->idct_add= ff_wmv2_idct_add_c;
02885             c->idct    = ff_wmv2_idct_c;
02886             c->idct_permutation_type= FF_NO_IDCT_PERM;
02887         }else if(avctx->idct_algo==FF_IDCT_FAAN){
02888             c->idct_put= ff_faanidct_put;
02889             c->idct_add= ff_faanidct_add;
02890             c->idct    = ff_faanidct;
02891             c->idct_permutation_type= FF_NO_IDCT_PERM;
02892         }else if(CONFIG_EATGQ_DECODER && avctx->idct_algo==FF_IDCT_EA) {
02893             c->idct_put= ff_ea_idct_put_c;
02894             c->idct_permutation_type= FF_NO_IDCT_PERM;
02895         }else{ //accurate/default
02896             c->idct_put = ff_simple_idct_put_8;
02897             c->idct_add = ff_simple_idct_add_8;
02898             c->idct     = ff_simple_idct_8;
02899             c->idct_permutation_type= FF_NO_IDCT_PERM;
02900         }
02901         }
02902     }
02903 
02904     c->diff_pixels = diff_pixels_c;
02905     c->put_pixels_clamped = ff_put_pixels_clamped_c;
02906     c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_c;
02907     c->add_pixels_clamped = ff_add_pixels_clamped_c;
02908     c->sum_abs_dctelem = sum_abs_dctelem_c;
02909     c->gmc1 = gmc1_c;
02910     c->gmc = ff_gmc_c;
02911     c->pix_sum = pix_sum_c;
02912     c->pix_norm1 = pix_norm1_c;
02913 
02914     c->fill_block_tab[0] = fill_block16_c;
02915     c->fill_block_tab[1] = fill_block8_c;
02916 
02917     /* TODO [0] 16  [1] 8 */
02918     c->pix_abs[0][0] = pix_abs16_c;
02919     c->pix_abs[0][1] = pix_abs16_x2_c;
02920     c->pix_abs[0][2] = pix_abs16_y2_c;
02921     c->pix_abs[0][3] = pix_abs16_xy2_c;
02922     c->pix_abs[1][0] = pix_abs8_c;
02923     c->pix_abs[1][1] = pix_abs8_x2_c;
02924     c->pix_abs[1][2] = pix_abs8_y2_c;
02925     c->pix_abs[1][3] = pix_abs8_xy2_c;
02926 
02927     c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c;
02928     c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c;
02929     c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c;
02930     c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c;
02931     c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c;
02932     c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c;
02933     c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c;
02934     c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c;
02935     c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c;
02936 
02937     c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c;
02938     c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c;
02939     c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c;
02940     c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c;
02941     c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c;
02942     c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c;
02943     c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c;
02944     c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c;
02945     c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c;
02946 
02947 #define dspfunc(PFX, IDX, NUM) \
02948     c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
02949     c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
02950     c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
02951     c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
02952     c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
02953     c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
02954     c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
02955     c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
02956     c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
02957     c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
02958     c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
02959     c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
02960     c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
02961     c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
02962     c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
02963     c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
02964 
02965     dspfunc(put_qpel, 0, 16);
02966     dspfunc(put_no_rnd_qpel, 0, 16);
02967 
02968     dspfunc(avg_qpel, 0, 16);
02969     /* dspfunc(avg_no_rnd_qpel, 0, 16); */
02970 
02971     dspfunc(put_qpel, 1, 8);
02972     dspfunc(put_no_rnd_qpel, 1, 8);
02973 
02974     dspfunc(avg_qpel, 1, 8);
02975     /* dspfunc(avg_no_rnd_qpel, 1, 8); */
02976 
02977 #undef dspfunc
02978 
02979 #if CONFIG_MLP_DECODER || CONFIG_TRUEHD_DECODER
02980     ff_mlp_init(c, avctx);
02981 #endif
02982 #if CONFIG_WMV2_DECODER || CONFIG_VC1_DECODER
02983     ff_intrax8dsp_init(c,avctx);
02984 #endif
02985 
02986     c->put_mspel_pixels_tab[0]= ff_put_pixels8x8_c;
02987     c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
02988     c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c;
02989     c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;
02990     c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c;
02991     c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;
02992     c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
02993     c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
02994 
02995 #define SET_CMP_FUNC(name) \
02996     c->name[0]= name ## 16_c;\
02997     c->name[1]= name ## 8x8_c;
02998 
02999     SET_CMP_FUNC(hadamard8_diff)
03000     c->hadamard8_diff[4]= hadamard8_intra16_c;
03001     c->hadamard8_diff[5]= hadamard8_intra8x8_c;
03002     SET_CMP_FUNC(dct_sad)
03003     SET_CMP_FUNC(dct_max)
03004 #if CONFIG_GPL
03005     SET_CMP_FUNC(dct264_sad)
03006 #endif
03007     c->sad[0]= pix_abs16_c;
03008     c->sad[1]= pix_abs8_c;
03009     c->sse[0]= sse16_c;
03010     c->sse[1]= sse8_c;
03011     c->sse[2]= sse4_c;
03012     SET_CMP_FUNC(quant_psnr)
03013     SET_CMP_FUNC(rd)
03014     SET_CMP_FUNC(bit)
03015     c->vsad[0]= vsad16_c;
03016     c->vsad[4]= vsad_intra16_c;
03017     c->vsad[5]= vsad_intra8_c;
03018     c->vsse[0]= vsse16_c;
03019     c->vsse[4]= vsse_intra16_c;
03020     c->vsse[5]= vsse_intra8_c;
03021     c->nsse[0]= nsse16_c;
03022     c->nsse[1]= nsse8_c;
03023 #if CONFIG_DWT
03024     ff_dsputil_init_dwt(c);
03025 #endif
03026 
03027     c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c;
03028 
03029     c->add_bytes= add_bytes_c;
03030     c->diff_bytes= diff_bytes_c;
03031     c->add_hfyu_median_prediction= add_hfyu_median_prediction_c;
03032     c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c;
03033     c->add_hfyu_left_prediction  = add_hfyu_left_prediction_c;
03034     c->add_hfyu_left_prediction_bgr32 = add_hfyu_left_prediction_bgr32_c;
03035     c->bswap_buf= bswap_buf;
03036     c->bswap16_buf = bswap16_buf;
03037 
03038     if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
03039         c->h263_h_loop_filter= h263_h_loop_filter_c;
03040         c->h263_v_loop_filter= h263_v_loop_filter_c;
03041     }
03042 
03043     if (CONFIG_VP3_DECODER) {
03044         c->vp3_h_loop_filter= ff_vp3_h_loop_filter_c;
03045         c->vp3_v_loop_filter= ff_vp3_v_loop_filter_c;
03046         c->vp3_idct_dc_add= ff_vp3_idct_dc_add_c;
03047     }
03048 
03049     c->h261_loop_filter= h261_loop_filter_c;
03050 
03051     c->try_8x8basis= try_8x8basis_c;
03052     c->add_8x8basis= add_8x8basis_c;
03053 
03054 #if CONFIG_VORBIS_DECODER
03055     c->vorbis_inverse_coupling = vorbis_inverse_coupling;
03056 #endif
03057 #if CONFIG_AC3_DECODER
03058     c->ac3_downmix = ff_ac3_downmix_c;
03059 #endif
03060     c->vector_fmul = vector_fmul_c;
03061     c->vector_fmul_reverse = vector_fmul_reverse_c;
03062     c->vector_fmul_add = vector_fmul_add_c;
03063     c->vector_fmul_window = vector_fmul_window_c;
03064     c->vector_clipf = vector_clipf_c;
03065     c->scalarproduct_int16 = scalarproduct_int16_c;
03066     c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c;
03067     c->apply_window_int16 = apply_window_int16_c;
03068     c->vector_clip_int32 = vector_clip_int32_c;
03069     c->scalarproduct_float = scalarproduct_float_c;
03070     c->butterflies_float = butterflies_float_c;
03071     c->butterflies_float_interleave = butterflies_float_interleave_c;
03072     c->vector_fmul_scalar = vector_fmul_scalar_c;
03073     c->vector_fmac_scalar = vector_fmac_scalar_c;
03074 
03075     c->shrink[0]= av_image_copy_plane;
03076     c->shrink[1]= ff_shrink22;
03077     c->shrink[2]= ff_shrink44;
03078     c->shrink[3]= ff_shrink88;
03079 
03080     c->prefetch= just_return;
03081 
03082     memset(c->put_2tap_qpel_pixels_tab, 0, sizeof(c->put_2tap_qpel_pixels_tab));
03083     memset(c->avg_2tap_qpel_pixels_tab, 0, sizeof(c->avg_2tap_qpel_pixels_tab));
03084 
03085 #undef FUNC
03086 #undef FUNCC
03087 #define FUNC(f, depth) f ## _ ## depth
03088 #define FUNCC(f, depth) f ## _ ## depth ## _c
03089 
03090 #define dspfunc1(PFX, IDX, NUM, depth)\
03091     c->PFX ## _pixels_tab[IDX][0] = FUNCC(PFX ## _pixels ## NUM        , depth);\
03092     c->PFX ## _pixels_tab[IDX][1] = FUNCC(PFX ## _pixels ## NUM ## _x2 , depth);\
03093     c->PFX ## _pixels_tab[IDX][2] = FUNCC(PFX ## _pixels ## NUM ## _y2 , depth);\
03094     c->PFX ## _pixels_tab[IDX][3] = FUNCC(PFX ## _pixels ## NUM ## _xy2, depth)
03095 
03096 #define dspfunc2(PFX, IDX, NUM, depth)\
03097     c->PFX ## _pixels_tab[IDX][ 0] = FUNCC(PFX ## NUM ## _mc00, depth);\
03098     c->PFX ## _pixels_tab[IDX][ 1] = FUNCC(PFX ## NUM ## _mc10, depth);\
03099     c->PFX ## _pixels_tab[IDX][ 2] = FUNCC(PFX ## NUM ## _mc20, depth);\
03100     c->PFX ## _pixels_tab[IDX][ 3] = FUNCC(PFX ## NUM ## _mc30, depth);\
03101     c->PFX ## _pixels_tab[IDX][ 4] = FUNCC(PFX ## NUM ## _mc01, depth);\
03102     c->PFX ## _pixels_tab[IDX][ 5] = FUNCC(PFX ## NUM ## _mc11, depth);\
03103     c->PFX ## _pixels_tab[IDX][ 6] = FUNCC(PFX ## NUM ## _mc21, depth);\
03104     c->PFX ## _pixels_tab[IDX][ 7] = FUNCC(PFX ## NUM ## _mc31, depth);\
03105     c->PFX ## _pixels_tab[IDX][ 8] = FUNCC(PFX ## NUM ## _mc02, depth);\
03106     c->PFX ## _pixels_tab[IDX][ 9] = FUNCC(PFX ## NUM ## _mc12, depth);\
03107     c->PFX ## _pixels_tab[IDX][10] = FUNCC(PFX ## NUM ## _mc22, depth);\
03108     c->PFX ## _pixels_tab[IDX][11] = FUNCC(PFX ## NUM ## _mc32, depth);\
03109     c->PFX ## _pixels_tab[IDX][12] = FUNCC(PFX ## NUM ## _mc03, depth);\
03110     c->PFX ## _pixels_tab[IDX][13] = FUNCC(PFX ## NUM ## _mc13, depth);\
03111     c->PFX ## _pixels_tab[IDX][14] = FUNCC(PFX ## NUM ## _mc23, depth);\
03112     c->PFX ## _pixels_tab[IDX][15] = FUNCC(PFX ## NUM ## _mc33, depth)
03113 
03114 
03115 #define BIT_DEPTH_FUNCS(depth, dct)\
03116     c->get_pixels                    = FUNCC(get_pixels   ## dct   , depth);\
03117     c->draw_edges                    = FUNCC(draw_edges            , depth);\
03118     c->emulated_edge_mc              = FUNC (ff_emulated_edge_mc   , depth);\
03119     c->clear_block                   = FUNCC(clear_block  ## dct   , depth);\
03120     c->clear_blocks                  = FUNCC(clear_blocks ## dct   , depth);\
03121     c->add_pixels8                   = FUNCC(add_pixels8  ## dct   , depth);\
03122     c->add_pixels4                   = FUNCC(add_pixels4  ## dct   , depth);\
03123     c->put_no_rnd_pixels_l2[0]       = FUNCC(put_no_rnd_pixels16_l2, depth);\
03124     c->put_no_rnd_pixels_l2[1]       = FUNCC(put_no_rnd_pixels8_l2 , depth);\
03125 \
03126     c->put_h264_chroma_pixels_tab[0] = FUNCC(put_h264_chroma_mc8   , depth);\
03127     c->put_h264_chroma_pixels_tab[1] = FUNCC(put_h264_chroma_mc4   , depth);\
03128     c->put_h264_chroma_pixels_tab[2] = FUNCC(put_h264_chroma_mc2   , depth);\
03129     c->avg_h264_chroma_pixels_tab[0] = FUNCC(avg_h264_chroma_mc8   , depth);\
03130     c->avg_h264_chroma_pixels_tab[1] = FUNCC(avg_h264_chroma_mc4   , depth);\
03131     c->avg_h264_chroma_pixels_tab[2] = FUNCC(avg_h264_chroma_mc2   , depth);\
03132 \
03133     dspfunc1(put       , 0, 16, depth);\
03134     dspfunc1(put       , 1,  8, depth);\
03135     dspfunc1(put       , 2,  4, depth);\
03136     dspfunc1(put       , 3,  2, depth);\
03137     dspfunc1(put_no_rnd, 0, 16, depth);\
03138     dspfunc1(put_no_rnd, 1,  8, depth);\
03139     dspfunc1(avg       , 0, 16, depth);\
03140     dspfunc1(avg       , 1,  8, depth);\
03141     dspfunc1(avg       , 2,  4, depth);\
03142     dspfunc1(avg       , 3,  2, depth);\
03143     dspfunc1(avg_no_rnd, 0, 16, depth);\
03144     dspfunc1(avg_no_rnd, 1,  8, depth);\
03145 \
03146     dspfunc2(put_h264_qpel, 0, 16, depth);\
03147     dspfunc2(put_h264_qpel, 1,  8, depth);\
03148     dspfunc2(put_h264_qpel, 2,  4, depth);\
03149     dspfunc2(put_h264_qpel, 3,  2, depth);\
03150     dspfunc2(avg_h264_qpel, 0, 16, depth);\
03151     dspfunc2(avg_h264_qpel, 1,  8, depth);\
03152     dspfunc2(avg_h264_qpel, 2,  4, depth);
03153 
03154     switch (avctx->bits_per_raw_sample) {
03155     case 9:
03156         if (c->dct_bits == 32) {
03157             BIT_DEPTH_FUNCS(9, _32);
03158         } else {
03159             BIT_DEPTH_FUNCS(9, _16);
03160         }
03161         break;
03162     case 10:
03163         if (c->dct_bits == 32) {
03164             BIT_DEPTH_FUNCS(10, _32);
03165         } else {
03166             BIT_DEPTH_FUNCS(10, _16);
03167         }
03168         break;
03169     default:
03170         av_log(avctx, AV_LOG_DEBUG, "Unsupported bit depth: %d\n", avctx->bits_per_raw_sample);
03171     case 8:
03172         BIT_DEPTH_FUNCS(8, _16);
03173         break;
03174     }
03175 
03176 
03177     if (HAVE_MMX)        dsputil_init_mmx   (c, avctx);
03178     if (ARCH_ARM)        dsputil_init_arm   (c, avctx);
03179     if (CONFIG_MLIB)     dsputil_init_mlib  (c, avctx);
03180     if (HAVE_VIS)        dsputil_init_vis   (c, avctx);
03181     if (ARCH_ALPHA)      dsputil_init_alpha (c, avctx);
03182     if (ARCH_PPC)        dsputil_init_ppc   (c, avctx);
03183     if (HAVE_MMI)        dsputil_init_mmi   (c, avctx);
03184     if (ARCH_SH4)        dsputil_init_sh4   (c, avctx);
03185     if (ARCH_BFIN)       dsputil_init_bfin  (c, avctx);
03186 
03187     for(i=0; i<64; i++){
03188         if(!c->put_2tap_qpel_pixels_tab[0][i])
03189             c->put_2tap_qpel_pixels_tab[0][i]= c->put_h264_qpel_pixels_tab[0][i];
03190         if(!c->avg_2tap_qpel_pixels_tab[0][i])
03191             c->avg_2tap_qpel_pixels_tab[0][i]= c->avg_h264_qpel_pixels_tab[0][i];
03192     }
03193 
03194     ff_init_scantable_permutation(c->idct_permutation,
03195                                   c->idct_permutation_type);
03196 }
Generated on Fri Feb 1 2013 14:34:32 for FFmpeg by doxygen 1.7.1