00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00030 #include "libavutil/imgutils.h"
00031 #include "avcodec.h"
00032 #include "dsputil.h"
00033 #include "simple_idct.h"
00034 #include "faandct.h"
00035 #include "faanidct.h"
00036 #include "mathops.h"
00037 #include "mpegvideo.h"
00038 #include "config.h"
00039 #include "ac3dec.h"
00040 #include "vorbis.h"
00041 #include "png.h"
00042
00043 uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
00044 uint32_t ff_squareTbl[512] = {0, };
00045
00046
00047 #define pb_7f (~0UL/255 * 0x7f)
00048 #define pb_80 (~0UL/255 * 0x80)
00049
00050 const uint8_t ff_zigzag_direct[64] = {
00051 0, 1, 8, 16, 9, 2, 3, 10,
00052 17, 24, 32, 25, 18, 11, 4, 5,
00053 12, 19, 26, 33, 40, 48, 41, 34,
00054 27, 20, 13, 6, 7, 14, 21, 28,
00055 35, 42, 49, 56, 57, 50, 43, 36,
00056 29, 22, 15, 23, 30, 37, 44, 51,
00057 58, 59, 52, 45, 38, 31, 39, 46,
00058 53, 60, 61, 54, 47, 55, 62, 63
00059 };
00060
00061
00062
00063 const uint8_t ff_zigzag248_direct[64] = {
00064 0, 8, 1, 9, 16, 24, 2, 10,
00065 17, 25, 32, 40, 48, 56, 33, 41,
00066 18, 26, 3, 11, 4, 12, 19, 27,
00067 34, 42, 49, 57, 50, 58, 35, 43,
00068 20, 28, 5, 13, 6, 14, 21, 29,
00069 36, 44, 51, 59, 52, 60, 37, 45,
00070 22, 30, 7, 15, 23, 31, 38, 46,
00071 53, 61, 54, 62, 39, 47, 55, 63,
00072 };
00073
00074
00075 DECLARE_ALIGNED(16, uint16_t, inv_zigzag_direct16)[64];
00076
00077 const uint8_t ff_alternate_horizontal_scan[64] = {
00078 0, 1, 2, 3, 8, 9, 16, 17,
00079 10, 11, 4, 5, 6, 7, 15, 14,
00080 13, 12, 19, 18, 24, 25, 32, 33,
00081 26, 27, 20, 21, 22, 23, 28, 29,
00082 30, 31, 34, 35, 40, 41, 48, 49,
00083 42, 43, 36, 37, 38, 39, 44, 45,
00084 46, 47, 50, 51, 56, 57, 58, 59,
00085 52, 53, 54, 55, 60, 61, 62, 63,
00086 };
00087
00088 const uint8_t ff_alternate_vertical_scan[64] = {
00089 0, 8, 16, 24, 1, 9, 2, 10,
00090 17, 25, 32, 40, 48, 56, 57, 49,
00091 41, 33, 26, 18, 3, 11, 4, 12,
00092 19, 27, 34, 42, 50, 58, 35, 43,
00093 51, 59, 20, 28, 5, 13, 6, 14,
00094 21, 29, 36, 44, 52, 60, 37, 45,
00095 53, 61, 22, 30, 7, 15, 23, 31,
00096 38, 46, 54, 62, 39, 47, 55, 63,
00097 };
00098
00099
00100 static const uint8_t simple_mmx_permutation[64]={
00101 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
00102 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
00103 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
00104 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
00105 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
00106 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
00107 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
00108 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
00109 };
00110
00111 static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
00112
00113 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
00114 int i;
00115 int end;
00116
00117 st->scantable= src_scantable;
00118
00119 for(i=0; i<64; i++){
00120 int j;
00121 j = src_scantable[i];
00122 st->permutated[i] = permutation[j];
00123 #if ARCH_PPC
00124 st->inverse[j] = i;
00125 #endif
00126 }
00127
00128 end=-1;
00129 for(i=0; i<64; i++){
00130 int j;
00131 j = st->permutated[i];
00132 if(j>end) end=j;
00133 st->raster_end[i]= end;
00134 }
00135 }
00136
00137 static int pix_sum_c(uint8_t * pix, int line_size)
00138 {
00139 int s, i, j;
00140
00141 s = 0;
00142 for (i = 0; i < 16; i++) {
00143 for (j = 0; j < 16; j += 8) {
00144 s += pix[0];
00145 s += pix[1];
00146 s += pix[2];
00147 s += pix[3];
00148 s += pix[4];
00149 s += pix[5];
00150 s += pix[6];
00151 s += pix[7];
00152 pix += 8;
00153 }
00154 pix += line_size - 16;
00155 }
00156 return s;
00157 }
00158
00159 static int pix_norm1_c(uint8_t * pix, int line_size)
00160 {
00161 int s, i, j;
00162 uint32_t *sq = ff_squareTbl + 256;
00163
00164 s = 0;
00165 for (i = 0; i < 16; i++) {
00166 for (j = 0; j < 16; j += 8) {
00167 #if 0
00168 s += sq[pix[0]];
00169 s += sq[pix[1]];
00170 s += sq[pix[2]];
00171 s += sq[pix[3]];
00172 s += sq[pix[4]];
00173 s += sq[pix[5]];
00174 s += sq[pix[6]];
00175 s += sq[pix[7]];
00176 #else
00177 #if LONG_MAX > 2147483647
00178 register uint64_t x=*(uint64_t*)pix;
00179 s += sq[x&0xff];
00180 s += sq[(x>>8)&0xff];
00181 s += sq[(x>>16)&0xff];
00182 s += sq[(x>>24)&0xff];
00183 s += sq[(x>>32)&0xff];
00184 s += sq[(x>>40)&0xff];
00185 s += sq[(x>>48)&0xff];
00186 s += sq[(x>>56)&0xff];
00187 #else
00188 register uint32_t x=*(uint32_t*)pix;
00189 s += sq[x&0xff];
00190 s += sq[(x>>8)&0xff];
00191 s += sq[(x>>16)&0xff];
00192 s += sq[(x>>24)&0xff];
00193 x=*(uint32_t*)(pix+4);
00194 s += sq[x&0xff];
00195 s += sq[(x>>8)&0xff];
00196 s += sq[(x>>16)&0xff];
00197 s += sq[(x>>24)&0xff];
00198 #endif
00199 #endif
00200 pix += 8;
00201 }
00202 pix += line_size - 16;
00203 }
00204 return s;
00205 }
00206
00207 static void bswap_buf(uint32_t *dst, const uint32_t *src, int w){
00208 int i;
00209
00210 for(i=0; i+8<=w; i+=8){
00211 dst[i+0]= av_bswap32(src[i+0]);
00212 dst[i+1]= av_bswap32(src[i+1]);
00213 dst[i+2]= av_bswap32(src[i+2]);
00214 dst[i+3]= av_bswap32(src[i+3]);
00215 dst[i+4]= av_bswap32(src[i+4]);
00216 dst[i+5]= av_bswap32(src[i+5]);
00217 dst[i+6]= av_bswap32(src[i+6]);
00218 dst[i+7]= av_bswap32(src[i+7]);
00219 }
00220 for(;i<w; i++){
00221 dst[i+0]= av_bswap32(src[i+0]);
00222 }
00223 }
00224
00225 static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
00226 {
00227 int s, i;
00228 uint32_t *sq = ff_squareTbl + 256;
00229
00230 s = 0;
00231 for (i = 0; i < h; i++) {
00232 s += sq[pix1[0] - pix2[0]];
00233 s += sq[pix1[1] - pix2[1]];
00234 s += sq[pix1[2] - pix2[2]];
00235 s += sq[pix1[3] - pix2[3]];
00236 pix1 += line_size;
00237 pix2 += line_size;
00238 }
00239 return s;
00240 }
00241
00242 static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
00243 {
00244 int s, i;
00245 uint32_t *sq = ff_squareTbl + 256;
00246
00247 s = 0;
00248 for (i = 0; i < h; i++) {
00249 s += sq[pix1[0] - pix2[0]];
00250 s += sq[pix1[1] - pix2[1]];
00251 s += sq[pix1[2] - pix2[2]];
00252 s += sq[pix1[3] - pix2[3]];
00253 s += sq[pix1[4] - pix2[4]];
00254 s += sq[pix1[5] - pix2[5]];
00255 s += sq[pix1[6] - pix2[6]];
00256 s += sq[pix1[7] - pix2[7]];
00257 pix1 += line_size;
00258 pix2 += line_size;
00259 }
00260 return s;
00261 }
00262
00263 static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
00264 {
00265 int s, i;
00266 uint32_t *sq = ff_squareTbl + 256;
00267
00268 s = 0;
00269 for (i = 0; i < h; i++) {
00270 s += sq[pix1[ 0] - pix2[ 0]];
00271 s += sq[pix1[ 1] - pix2[ 1]];
00272 s += sq[pix1[ 2] - pix2[ 2]];
00273 s += sq[pix1[ 3] - pix2[ 3]];
00274 s += sq[pix1[ 4] - pix2[ 4]];
00275 s += sq[pix1[ 5] - pix2[ 5]];
00276 s += sq[pix1[ 6] - pix2[ 6]];
00277 s += sq[pix1[ 7] - pix2[ 7]];
00278 s += sq[pix1[ 8] - pix2[ 8]];
00279 s += sq[pix1[ 9] - pix2[ 9]];
00280 s += sq[pix1[10] - pix2[10]];
00281 s += sq[pix1[11] - pix2[11]];
00282 s += sq[pix1[12] - pix2[12]];
00283 s += sq[pix1[13] - pix2[13]];
00284 s += sq[pix1[14] - pix2[14]];
00285 s += sq[pix1[15] - pix2[15]];
00286
00287 pix1 += line_size;
00288 pix2 += line_size;
00289 }
00290 return s;
00291 }
00292
00293
00294
00295 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w)
00296 {
00297 uint8_t *ptr, *last_line;
00298 int i;
00299
00300 last_line = buf + (height - 1) * wrap;
00301 for(i=0;i<w;i++) {
00302
00303 memcpy(buf - (i + 1) * wrap, buf, width);
00304 memcpy(last_line + (i + 1) * wrap, last_line, width);
00305 }
00306
00307 ptr = buf;
00308 for(i=0;i<height;i++) {
00309 memset(ptr - w, ptr[0], w);
00310 memset(ptr + width, ptr[width-1], w);
00311 ptr += wrap;
00312 }
00313
00314 for(i=0;i<w;i++) {
00315 memset(buf - (i + 1) * wrap - w, buf[0], w);
00316 memset(buf - (i + 1) * wrap + width, buf[width-1], w);
00317 memset(last_line + (i + 1) * wrap - w, last_line[0], w);
00318 memset(last_line + (i + 1) * wrap + width, last_line[width-1], w);
00319 }
00320 }
00321
00334 void ff_emulated_edge_mc(uint8_t *buf, const uint8_t *src, int linesize, int block_w, int block_h,
00335 int src_x, int src_y, int w, int h){
00336 int x, y;
00337 int start_y, start_x, end_y, end_x;
00338
00339 if(src_y>= h){
00340 src+= (h-1-src_y)*linesize;
00341 src_y=h-1;
00342 }else if(src_y<=-block_h){
00343 src+= (1-block_h-src_y)*linesize;
00344 src_y=1-block_h;
00345 }
00346 if(src_x>= w){
00347 src+= (w-1-src_x);
00348 src_x=w-1;
00349 }else if(src_x<=-block_w){
00350 src+= (1-block_w-src_x);
00351 src_x=1-block_w;
00352 }
00353
00354 start_y= FFMAX(0, -src_y);
00355 start_x= FFMAX(0, -src_x);
00356 end_y= FFMIN(block_h, h-src_y);
00357 end_x= FFMIN(block_w, w-src_x);
00358 assert(start_y < end_y && block_h);
00359 assert(start_x < end_x && block_w);
00360
00361 w = end_x - start_x;
00362 src += start_y*linesize + start_x;
00363 buf += start_x;
00364
00365
00366 for(y=0; y<start_y; y++){
00367 memcpy(buf, src, w);
00368 buf += linesize;
00369 }
00370
00371
00372 for(; y<end_y; y++){
00373 memcpy(buf, src, w);
00374 src += linesize;
00375 buf += linesize;
00376 }
00377
00378
00379 src -= linesize;
00380 for(; y<block_h; y++){
00381 memcpy(buf, src, w);
00382 buf += linesize;
00383 }
00384
00385 buf -= block_h * linesize + start_x;
00386 while (block_h--){
00387
00388 for(x=0; x<start_x; x++){
00389 buf[x] = buf[start_x];
00390 }
00391
00392
00393 for(x=end_x; x<block_w; x++){
00394 buf[x] = buf[end_x - 1];
00395 }
00396 buf += linesize;
00397 }
00398 }
00399
00400 static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
00401 {
00402 int i;
00403
00404
00405 for(i=0;i<8;i++) {
00406 block[0] = pixels[0];
00407 block[1] = pixels[1];
00408 block[2] = pixels[2];
00409 block[3] = pixels[3];
00410 block[4] = pixels[4];
00411 block[5] = pixels[5];
00412 block[6] = pixels[6];
00413 block[7] = pixels[7];
00414 pixels += line_size;
00415 block += 8;
00416 }
00417 }
00418
00419 static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
00420 const uint8_t *s2, int stride){
00421 int i;
00422
00423
00424 for(i=0;i<8;i++) {
00425 block[0] = s1[0] - s2[0];
00426 block[1] = s1[1] - s2[1];
00427 block[2] = s1[2] - s2[2];
00428 block[3] = s1[3] - s2[3];
00429 block[4] = s1[4] - s2[4];
00430 block[5] = s1[5] - s2[5];
00431 block[6] = s1[6] - s2[6];
00432 block[7] = s1[7] - s2[7];
00433 s1 += stride;
00434 s2 += stride;
00435 block += 8;
00436 }
00437 }
00438
00439
00440 void ff_put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
00441 int line_size)
00442 {
00443 int i;
00444 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00445
00446
00447 for(i=0;i<8;i++) {
00448 pixels[0] = cm[block[0]];
00449 pixels[1] = cm[block[1]];
00450 pixels[2] = cm[block[2]];
00451 pixels[3] = cm[block[3]];
00452 pixels[4] = cm[block[4]];
00453 pixels[5] = cm[block[5]];
00454 pixels[6] = cm[block[6]];
00455 pixels[7] = cm[block[7]];
00456
00457 pixels += line_size;
00458 block += 8;
00459 }
00460 }
00461
00462 static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
00463 int line_size)
00464 {
00465 int i;
00466 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00467
00468
00469 for(i=0;i<4;i++) {
00470 pixels[0] = cm[block[0]];
00471 pixels[1] = cm[block[1]];
00472 pixels[2] = cm[block[2]];
00473 pixels[3] = cm[block[3]];
00474
00475 pixels += line_size;
00476 block += 8;
00477 }
00478 }
00479
00480 static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
00481 int line_size)
00482 {
00483 int i;
00484 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00485
00486
00487 for(i=0;i<2;i++) {
00488 pixels[0] = cm[block[0]];
00489 pixels[1] = cm[block[1]];
00490
00491 pixels += line_size;
00492 block += 8;
00493 }
00494 }
00495
00496 void ff_put_signed_pixels_clamped_c(const DCTELEM *block,
00497 uint8_t *restrict pixels,
00498 int line_size)
00499 {
00500 int i, j;
00501
00502 for (i = 0; i < 8; i++) {
00503 for (j = 0; j < 8; j++) {
00504 if (*block < -128)
00505 *pixels = 0;
00506 else if (*block > 127)
00507 *pixels = 255;
00508 else
00509 *pixels = (uint8_t)(*block + 128);
00510 block++;
00511 pixels++;
00512 }
00513 pixels += (line_size - 8);
00514 }
00515 }
00516
00517 static void put_pixels_nonclamped_c(const DCTELEM *block, uint8_t *restrict pixels,
00518 int line_size)
00519 {
00520 int i;
00521
00522
00523 for(i=0;i<8;i++) {
00524 pixels[0] = block[0];
00525 pixels[1] = block[1];
00526 pixels[2] = block[2];
00527 pixels[3] = block[3];
00528 pixels[4] = block[4];
00529 pixels[5] = block[5];
00530 pixels[6] = block[6];
00531 pixels[7] = block[7];
00532
00533 pixels += line_size;
00534 block += 8;
00535 }
00536 }
00537
00538 void ff_add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
00539 int line_size)
00540 {
00541 int i;
00542 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00543
00544
00545 for(i=0;i<8;i++) {
00546 pixels[0] = cm[pixels[0] + block[0]];
00547 pixels[1] = cm[pixels[1] + block[1]];
00548 pixels[2] = cm[pixels[2] + block[2]];
00549 pixels[3] = cm[pixels[3] + block[3]];
00550 pixels[4] = cm[pixels[4] + block[4]];
00551 pixels[5] = cm[pixels[5] + block[5]];
00552 pixels[6] = cm[pixels[6] + block[6]];
00553 pixels[7] = cm[pixels[7] + block[7]];
00554 pixels += line_size;
00555 block += 8;
00556 }
00557 }
00558
00559 static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
00560 int line_size)
00561 {
00562 int i;
00563 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00564
00565
00566 for(i=0;i<4;i++) {
00567 pixels[0] = cm[pixels[0] + block[0]];
00568 pixels[1] = cm[pixels[1] + block[1]];
00569 pixels[2] = cm[pixels[2] + block[2]];
00570 pixels[3] = cm[pixels[3] + block[3]];
00571 pixels += line_size;
00572 block += 8;
00573 }
00574 }
00575
00576 static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
00577 int line_size)
00578 {
00579 int i;
00580 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00581
00582
00583 for(i=0;i<2;i++) {
00584 pixels[0] = cm[pixels[0] + block[0]];
00585 pixels[1] = cm[pixels[1] + block[1]];
00586 pixels += line_size;
00587 block += 8;
00588 }
00589 }
00590
00591 static void add_pixels8_c(uint8_t *restrict pixels, DCTELEM *block, int line_size)
00592 {
00593 int i;
00594 for(i=0;i<8;i++) {
00595 pixels[0] += block[0];
00596 pixels[1] += block[1];
00597 pixels[2] += block[2];
00598 pixels[3] += block[3];
00599 pixels[4] += block[4];
00600 pixels[5] += block[5];
00601 pixels[6] += block[6];
00602 pixels[7] += block[7];
00603 pixels += line_size;
00604 block += 8;
00605 }
00606 }
00607
00608 static void add_pixels4_c(uint8_t *restrict pixels, DCTELEM *block, int line_size)
00609 {
00610 int i;
00611 for(i=0;i<4;i++) {
00612 pixels[0] += block[0];
00613 pixels[1] += block[1];
00614 pixels[2] += block[2];
00615 pixels[3] += block[3];
00616 pixels += line_size;
00617 block += 4;
00618 }
00619 }
00620
00621 static int sum_abs_dctelem_c(DCTELEM *block)
00622 {
00623 int sum=0, i;
00624 for(i=0; i<64; i++)
00625 sum+= FFABS(block[i]);
00626 return sum;
00627 }
00628
00629 static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h)
00630 {
00631 int i;
00632
00633 for (i = 0; i < h; i++) {
00634 memset(block, value, 16);
00635 block += line_size;
00636 }
00637 }
00638
00639 static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h)
00640 {
00641 int i;
00642
00643 for (i = 0; i < h; i++) {
00644 memset(block, value, 8);
00645 block += line_size;
00646 }
00647 }
00648
00649 static void scale_block_c(const uint8_t src[64], uint8_t *dst, int linesize)
00650 {
00651 int i, j;
00652 uint16_t *dst1 = (uint16_t *) dst;
00653 uint16_t *dst2 = (uint16_t *)(dst + linesize);
00654
00655 for (j = 0; j < 8; j++) {
00656 for (i = 0; i < 8; i++) {
00657 dst1[i] = dst2[i] = src[i] * 0x0101;
00658 }
00659 src += 8;
00660 dst1 += linesize;
00661 dst2 += linesize;
00662 }
00663 }
00664
00665 #if 0
00666
00667 #define PIXOP2(OPNAME, OP) \
00668 static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
00669 {\
00670 int i;\
00671 for(i=0; i<h; i++){\
00672 OP(*((uint64_t*)block), AV_RN64(pixels));\
00673 pixels+=line_size;\
00674 block +=line_size;\
00675 }\
00676 }\
00677 \
00678 static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
00679 {\
00680 int i;\
00681 for(i=0; i<h; i++){\
00682 const uint64_t a= AV_RN64(pixels );\
00683 const uint64_t b= AV_RN64(pixels+1);\
00684 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
00685 pixels+=line_size;\
00686 block +=line_size;\
00687 }\
00688 }\
00689 \
00690 static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
00691 {\
00692 int i;\
00693 for(i=0; i<h; i++){\
00694 const uint64_t a= AV_RN64(pixels );\
00695 const uint64_t b= AV_RN64(pixels+1);\
00696 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
00697 pixels+=line_size;\
00698 block +=line_size;\
00699 }\
00700 }\
00701 \
00702 static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
00703 {\
00704 int i;\
00705 for(i=0; i<h; i++){\
00706 const uint64_t a= AV_RN64(pixels );\
00707 const uint64_t b= AV_RN64(pixels+line_size);\
00708 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
00709 pixels+=line_size;\
00710 block +=line_size;\
00711 }\
00712 }\
00713 \
00714 static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
00715 {\
00716 int i;\
00717 for(i=0; i<h; i++){\
00718 const uint64_t a= AV_RN64(pixels );\
00719 const uint64_t b= AV_RN64(pixels+line_size);\
00720 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
00721 pixels+=line_size;\
00722 block +=line_size;\
00723 }\
00724 }\
00725 \
00726 static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
00727 {\
00728 int i;\
00729 const uint64_t a= AV_RN64(pixels );\
00730 const uint64_t b= AV_RN64(pixels+1);\
00731 uint64_t l0= (a&0x0303030303030303ULL)\
00732 + (b&0x0303030303030303ULL)\
00733 + 0x0202020202020202ULL;\
00734 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
00735 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
00736 uint64_t l1,h1;\
00737 \
00738 pixels+=line_size;\
00739 for(i=0; i<h; i+=2){\
00740 uint64_t a= AV_RN64(pixels );\
00741 uint64_t b= AV_RN64(pixels+1);\
00742 l1= (a&0x0303030303030303ULL)\
00743 + (b&0x0303030303030303ULL);\
00744 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
00745 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
00746 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
00747 pixels+=line_size;\
00748 block +=line_size;\
00749 a= AV_RN64(pixels );\
00750 b= AV_RN64(pixels+1);\
00751 l0= (a&0x0303030303030303ULL)\
00752 + (b&0x0303030303030303ULL)\
00753 + 0x0202020202020202ULL;\
00754 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
00755 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
00756 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
00757 pixels+=line_size;\
00758 block +=line_size;\
00759 }\
00760 }\
00761 \
00762 static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
00763 {\
00764 int i;\
00765 const uint64_t a= AV_RN64(pixels );\
00766 const uint64_t b= AV_RN64(pixels+1);\
00767 uint64_t l0= (a&0x0303030303030303ULL)\
00768 + (b&0x0303030303030303ULL)\
00769 + 0x0101010101010101ULL;\
00770 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
00771 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
00772 uint64_t l1,h1;\
00773 \
00774 pixels+=line_size;\
00775 for(i=0; i<h; i+=2){\
00776 uint64_t a= AV_RN64(pixels );\
00777 uint64_t b= AV_RN64(pixels+1);\
00778 l1= (a&0x0303030303030303ULL)\
00779 + (b&0x0303030303030303ULL);\
00780 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
00781 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
00782 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
00783 pixels+=line_size;\
00784 block +=line_size;\
00785 a= AV_RN64(pixels );\
00786 b= AV_RN64(pixels+1);\
00787 l0= (a&0x0303030303030303ULL)\
00788 + (b&0x0303030303030303ULL)\
00789 + 0x0101010101010101ULL;\
00790 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
00791 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
00792 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
00793 pixels+=line_size;\
00794 block +=line_size;\
00795 }\
00796 }\
00797 \
00798 CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels_c , 8)\
00799 CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8)\
00800 CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8)\
00801 CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8)\
00802 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8)\
00803 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8)\
00804 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8)
00805
00806 #define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) )
00807 #else // 64 bit variant
00808
00809 #define PIXOP2(OPNAME, OP) \
00810 static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00811 int i;\
00812 for(i=0; i<h; i++){\
00813 OP(*((uint16_t*)(block )), AV_RN16(pixels ));\
00814 pixels+=line_size;\
00815 block +=line_size;\
00816 }\
00817 }\
00818 static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00819 int i;\
00820 for(i=0; i<h; i++){\
00821 OP(*((uint32_t*)(block )), AV_RN32(pixels ));\
00822 pixels+=line_size;\
00823 block +=line_size;\
00824 }\
00825 }\
00826 static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00827 int i;\
00828 for(i=0; i<h; i++){\
00829 OP(*((uint32_t*)(block )), AV_RN32(pixels ));\
00830 OP(*((uint32_t*)(block+4)), AV_RN32(pixels+4));\
00831 pixels+=line_size;\
00832 block +=line_size;\
00833 }\
00834 }\
00835 static inline void OPNAME ## _no_rnd_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00836 OPNAME ## _pixels8_c(block, pixels, line_size, h);\
00837 }\
00838 \
00839 static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
00840 int src_stride1, int src_stride2, int h){\
00841 int i;\
00842 for(i=0; i<h; i++){\
00843 uint32_t a,b;\
00844 a= AV_RN32(&src1[i*src_stride1 ]);\
00845 b= AV_RN32(&src2[i*src_stride2 ]);\
00846 OP(*((uint32_t*)&dst[i*dst_stride ]), no_rnd_avg32(a, b));\
00847 a= AV_RN32(&src1[i*src_stride1+4]);\
00848 b= AV_RN32(&src2[i*src_stride2+4]);\
00849 OP(*((uint32_t*)&dst[i*dst_stride+4]), no_rnd_avg32(a, b));\
00850 }\
00851 }\
00852 \
00853 static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
00854 int src_stride1, int src_stride2, int h){\
00855 int i;\
00856 for(i=0; i<h; i++){\
00857 uint32_t a,b;\
00858 a= AV_RN32(&src1[i*src_stride1 ]);\
00859 b= AV_RN32(&src2[i*src_stride2 ]);\
00860 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
00861 a= AV_RN32(&src1[i*src_stride1+4]);\
00862 b= AV_RN32(&src2[i*src_stride2+4]);\
00863 OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\
00864 }\
00865 }\
00866 \
00867 static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
00868 int src_stride1, int src_stride2, int h){\
00869 int i;\
00870 for(i=0; i<h; i++){\
00871 uint32_t a,b;\
00872 a= AV_RN32(&src1[i*src_stride1 ]);\
00873 b= AV_RN32(&src2[i*src_stride2 ]);\
00874 OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
00875 }\
00876 }\
00877 \
00878 static inline void OPNAME ## _pixels2_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
00879 int src_stride1, int src_stride2, int h){\
00880 int i;\
00881 for(i=0; i<h; i++){\
00882 uint32_t a,b;\
00883 a= AV_RN16(&src1[i*src_stride1 ]);\
00884 b= AV_RN16(&src2[i*src_stride2 ]);\
00885 OP(*((uint16_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
00886 }\
00887 }\
00888 \
00889 static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
00890 int src_stride1, int src_stride2, int h){\
00891 OPNAME ## _pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
00892 OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
00893 }\
00894 \
00895 static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
00896 int src_stride1, int src_stride2, int h){\
00897 OPNAME ## _no_rnd_pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
00898 OPNAME ## _no_rnd_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
00899 }\
00900 \
00901 static inline void OPNAME ## _no_rnd_pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00902 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
00903 }\
00904 \
00905 static inline void OPNAME ## _pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00906 OPNAME ## _pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
00907 }\
00908 \
00909 static inline void OPNAME ## _no_rnd_pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00910 OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
00911 }\
00912 \
00913 static inline void OPNAME ## _pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00914 OPNAME ## _pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
00915 }\
00916 \
00917 static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
00918 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
00919 int i;\
00920 for(i=0; i<h; i++){\
00921 uint32_t a, b, c, d, l0, l1, h0, h1;\
00922 a= AV_RN32(&src1[i*src_stride1]);\
00923 b= AV_RN32(&src2[i*src_stride2]);\
00924 c= AV_RN32(&src3[i*src_stride3]);\
00925 d= AV_RN32(&src4[i*src_stride4]);\
00926 l0= (a&0x03030303UL)\
00927 + (b&0x03030303UL)\
00928 + 0x02020202UL;\
00929 h0= ((a&0xFCFCFCFCUL)>>2)\
00930 + ((b&0xFCFCFCFCUL)>>2);\
00931 l1= (c&0x03030303UL)\
00932 + (d&0x03030303UL);\
00933 h1= ((c&0xFCFCFCFCUL)>>2)\
00934 + ((d&0xFCFCFCFCUL)>>2);\
00935 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
00936 a= AV_RN32(&src1[i*src_stride1+4]);\
00937 b= AV_RN32(&src2[i*src_stride2+4]);\
00938 c= AV_RN32(&src3[i*src_stride3+4]);\
00939 d= AV_RN32(&src4[i*src_stride4+4]);\
00940 l0= (a&0x03030303UL)\
00941 + (b&0x03030303UL)\
00942 + 0x02020202UL;\
00943 h0= ((a&0xFCFCFCFCUL)>>2)\
00944 + ((b&0xFCFCFCFCUL)>>2);\
00945 l1= (c&0x03030303UL)\
00946 + (d&0x03030303UL);\
00947 h1= ((c&0xFCFCFCFCUL)>>2)\
00948 + ((d&0xFCFCFCFCUL)>>2);\
00949 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
00950 }\
00951 }\
00952 \
00953 static inline void OPNAME ## _pixels4_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00954 OPNAME ## _pixels4_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
00955 }\
00956 \
00957 static inline void OPNAME ## _pixels4_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00958 OPNAME ## _pixels4_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
00959 }\
00960 \
00961 static inline void OPNAME ## _pixels2_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00962 OPNAME ## _pixels2_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
00963 }\
00964 \
00965 static inline void OPNAME ## _pixels2_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
00966 OPNAME ## _pixels2_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
00967 }\
00968 \
00969 static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
00970 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
00971 int i;\
00972 for(i=0; i<h; i++){\
00973 uint32_t a, b, c, d, l0, l1, h0, h1;\
00974 a= AV_RN32(&src1[i*src_stride1]);\
00975 b= AV_RN32(&src2[i*src_stride2]);\
00976 c= AV_RN32(&src3[i*src_stride3]);\
00977 d= AV_RN32(&src4[i*src_stride4]);\
00978 l0= (a&0x03030303UL)\
00979 + (b&0x03030303UL)\
00980 + 0x01010101UL;\
00981 h0= ((a&0xFCFCFCFCUL)>>2)\
00982 + ((b&0xFCFCFCFCUL)>>2);\
00983 l1= (c&0x03030303UL)\
00984 + (d&0x03030303UL);\
00985 h1= ((c&0xFCFCFCFCUL)>>2)\
00986 + ((d&0xFCFCFCFCUL)>>2);\
00987 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
00988 a= AV_RN32(&src1[i*src_stride1+4]);\
00989 b= AV_RN32(&src2[i*src_stride2+4]);\
00990 c= AV_RN32(&src3[i*src_stride3+4]);\
00991 d= AV_RN32(&src4[i*src_stride4+4]);\
00992 l0= (a&0x03030303UL)\
00993 + (b&0x03030303UL)\
00994 + 0x01010101UL;\
00995 h0= ((a&0xFCFCFCFCUL)>>2)\
00996 + ((b&0xFCFCFCFCUL)>>2);\
00997 l1= (c&0x03030303UL)\
00998 + (d&0x03030303UL);\
00999 h1= ((c&0xFCFCFCFCUL)>>2)\
01000 + ((d&0xFCFCFCFCUL)>>2);\
01001 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
01002 }\
01003 }\
01004 static inline void OPNAME ## _pixels16_l4(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
01005 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
01006 OPNAME ## _pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
01007 OPNAME ## _pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
01008 }\
01009 static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
01010 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
01011 OPNAME ## _no_rnd_pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
01012 OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
01013 }\
01014 \
01015 static inline void OPNAME ## _pixels2_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
01016 {\
01017 int i, a0, b0, a1, b1;\
01018 a0= pixels[0];\
01019 b0= pixels[1] + 2;\
01020 a0 += b0;\
01021 b0 += pixels[2];\
01022 \
01023 pixels+=line_size;\
01024 for(i=0; i<h; i+=2){\
01025 a1= pixels[0];\
01026 b1= pixels[1];\
01027 a1 += b1;\
01028 b1 += pixels[2];\
01029 \
01030 block[0]= (a1+a0)>>2; \
01031 block[1]= (b1+b0)>>2;\
01032 \
01033 pixels+=line_size;\
01034 block +=line_size;\
01035 \
01036 a0= pixels[0];\
01037 b0= pixels[1] + 2;\
01038 a0 += b0;\
01039 b0 += pixels[2];\
01040 \
01041 block[0]= (a1+a0)>>2;\
01042 block[1]= (b1+b0)>>2;\
01043 pixels+=line_size;\
01044 block +=line_size;\
01045 }\
01046 }\
01047 \
01048 static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
01049 {\
01050 int i;\
01051 const uint32_t a= AV_RN32(pixels );\
01052 const uint32_t b= AV_RN32(pixels+1);\
01053 uint32_t l0= (a&0x03030303UL)\
01054 + (b&0x03030303UL)\
01055 + 0x02020202UL;\
01056 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
01057 + ((b&0xFCFCFCFCUL)>>2);\
01058 uint32_t l1,h1;\
01059 \
01060 pixels+=line_size;\
01061 for(i=0; i<h; i+=2){\
01062 uint32_t a= AV_RN32(pixels );\
01063 uint32_t b= AV_RN32(pixels+1);\
01064 l1= (a&0x03030303UL)\
01065 + (b&0x03030303UL);\
01066 h1= ((a&0xFCFCFCFCUL)>>2)\
01067 + ((b&0xFCFCFCFCUL)>>2);\
01068 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
01069 pixels+=line_size;\
01070 block +=line_size;\
01071 a= AV_RN32(pixels );\
01072 b= AV_RN32(pixels+1);\
01073 l0= (a&0x03030303UL)\
01074 + (b&0x03030303UL)\
01075 + 0x02020202UL;\
01076 h0= ((a&0xFCFCFCFCUL)>>2)\
01077 + ((b&0xFCFCFCFCUL)>>2);\
01078 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
01079 pixels+=line_size;\
01080 block +=line_size;\
01081 }\
01082 }\
01083 \
01084 static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
01085 {\
01086 int j;\
01087 for(j=0; j<2; j++){\
01088 int i;\
01089 const uint32_t a= AV_RN32(pixels );\
01090 const uint32_t b= AV_RN32(pixels+1);\
01091 uint32_t l0= (a&0x03030303UL)\
01092 + (b&0x03030303UL)\
01093 + 0x02020202UL;\
01094 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
01095 + ((b&0xFCFCFCFCUL)>>2);\
01096 uint32_t l1,h1;\
01097 \
01098 pixels+=line_size;\
01099 for(i=0; i<h; i+=2){\
01100 uint32_t a= AV_RN32(pixels );\
01101 uint32_t b= AV_RN32(pixels+1);\
01102 l1= (a&0x03030303UL)\
01103 + (b&0x03030303UL);\
01104 h1= ((a&0xFCFCFCFCUL)>>2)\
01105 + ((b&0xFCFCFCFCUL)>>2);\
01106 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
01107 pixels+=line_size;\
01108 block +=line_size;\
01109 a= AV_RN32(pixels );\
01110 b= AV_RN32(pixels+1);\
01111 l0= (a&0x03030303UL)\
01112 + (b&0x03030303UL)\
01113 + 0x02020202UL;\
01114 h0= ((a&0xFCFCFCFCUL)>>2)\
01115 + ((b&0xFCFCFCFCUL)>>2);\
01116 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
01117 pixels+=line_size;\
01118 block +=line_size;\
01119 }\
01120 pixels+=4-line_size*(h+1);\
01121 block +=4-line_size*h;\
01122 }\
01123 }\
01124 \
01125 static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
01126 {\
01127 int j;\
01128 for(j=0; j<2; j++){\
01129 int i;\
01130 const uint32_t a= AV_RN32(pixels );\
01131 const uint32_t b= AV_RN32(pixels+1);\
01132 uint32_t l0= (a&0x03030303UL)\
01133 + (b&0x03030303UL)\
01134 + 0x01010101UL;\
01135 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
01136 + ((b&0xFCFCFCFCUL)>>2);\
01137 uint32_t l1,h1;\
01138 \
01139 pixels+=line_size;\
01140 for(i=0; i<h; i+=2){\
01141 uint32_t a= AV_RN32(pixels );\
01142 uint32_t b= AV_RN32(pixels+1);\
01143 l1= (a&0x03030303UL)\
01144 + (b&0x03030303UL);\
01145 h1= ((a&0xFCFCFCFCUL)>>2)\
01146 + ((b&0xFCFCFCFCUL)>>2);\
01147 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
01148 pixels+=line_size;\
01149 block +=line_size;\
01150 a= AV_RN32(pixels );\
01151 b= AV_RN32(pixels+1);\
01152 l0= (a&0x03030303UL)\
01153 + (b&0x03030303UL)\
01154 + 0x01010101UL;\
01155 h0= ((a&0xFCFCFCFCUL)>>2)\
01156 + ((b&0xFCFCFCFCUL)>>2);\
01157 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
01158 pixels+=line_size;\
01159 block +=line_size;\
01160 }\
01161 pixels+=4-line_size*(h+1);\
01162 block +=4-line_size*h;\
01163 }\
01164 }\
01165 \
01166 CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels8_c , 8)\
01167 CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels8_x2_c , 8)\
01168 CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels8_y2_c , 8)\
01169 CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels8_xy2_c, 8)\
01170 av_unused CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_c , OPNAME ## _pixels8_c , 8)\
01171 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels8_x2_c , 8)\
01172 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels8_y2_c , 8)\
01173 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels8_xy2_c, 8)\
01174
01175 #define op_avg(a, b) a = rnd_avg32(a, b)
01176 #endif
01177 #define op_put(a, b) a = b
01178
01179 PIXOP2(avg, op_avg)
01180 PIXOP2(put, op_put)
01181 #undef op_avg
01182 #undef op_put
01183
01184 #define put_no_rnd_pixels8_c put_pixels8_c
01185 #define put_no_rnd_pixels16_c put_pixels16_c
01186
01187 #define avg2(a,b) ((a+b+1)>>1)
01188 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
01189
01190 static void put_no_rnd_pixels16_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
01191 put_no_rnd_pixels16_l2(dst, a, b, stride, stride, stride, h);
01192 }
01193
01194 static void put_no_rnd_pixels8_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
01195 put_no_rnd_pixels8_l2(dst, a, b, stride, stride, stride, h);
01196 }
01197
01198 static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
01199 {
01200 const int A=(16-x16)*(16-y16);
01201 const int B=( x16)*(16-y16);
01202 const int C=(16-x16)*( y16);
01203 const int D=( x16)*( y16);
01204 int i;
01205
01206 for(i=0; i<h; i++)
01207 {
01208 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
01209 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
01210 dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
01211 dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
01212 dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
01213 dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
01214 dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
01215 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
01216 dst+= stride;
01217 src+= stride;
01218 }
01219 }
01220
01221 void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
01222 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
01223 {
01224 int y, vx, vy;
01225 const int s= 1<<shift;
01226
01227 width--;
01228 height--;
01229
01230 for(y=0; y<h; y++){
01231 int x;
01232
01233 vx= ox;
01234 vy= oy;
01235 for(x=0; x<8; x++){
01236 int src_x, src_y, frac_x, frac_y, index;
01237
01238 src_x= vx>>16;
01239 src_y= vy>>16;
01240 frac_x= src_x&(s-1);
01241 frac_y= src_y&(s-1);
01242 src_x>>=shift;
01243 src_y>>=shift;
01244
01245 if((unsigned)src_x < width){
01246 if((unsigned)src_y < height){
01247 index= src_x + src_y*stride;
01248 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
01249 + src[index +1]* frac_x )*(s-frac_y)
01250 + ( src[index+stride ]*(s-frac_x)
01251 + src[index+stride+1]* frac_x )* frac_y
01252 + r)>>(shift*2);
01253 }else{
01254 index= src_x + av_clip(src_y, 0, height)*stride;
01255 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
01256 + src[index +1]* frac_x )*s
01257 + r)>>(shift*2);
01258 }
01259 }else{
01260 if((unsigned)src_y < height){
01261 index= av_clip(src_x, 0, width) + src_y*stride;
01262 dst[y*stride + x]= ( ( src[index ]*(s-frac_y)
01263 + src[index+stride ]* frac_y )*s
01264 + r)>>(shift*2);
01265 }else{
01266 index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride;
01267 dst[y*stride + x]= src[index ];
01268 }
01269 }
01270
01271 vx+= dxx;
01272 vy+= dyx;
01273 }
01274 ox += dxy;
01275 oy += dyy;
01276 }
01277 }
01278
01279 static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01280 switch(width){
01281 case 2: put_pixels2_c (dst, src, stride, height); break;
01282 case 4: put_pixels4_c (dst, src, stride, height); break;
01283 case 8: put_pixels8_c (dst, src, stride, height); break;
01284 case 16:put_pixels16_c(dst, src, stride, height); break;
01285 }
01286 }
01287
01288 static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01289 int i,j;
01290 for (i=0; i < height; i++) {
01291 for (j=0; j < width; j++) {
01292 dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
01293 }
01294 src += stride;
01295 dst += stride;
01296 }
01297 }
01298
01299 static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01300 int i,j;
01301 for (i=0; i < height; i++) {
01302 for (j=0; j < width; j++) {
01303 dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
01304 }
01305 src += stride;
01306 dst += stride;
01307 }
01308 }
01309
01310 static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01311 int i,j;
01312 for (i=0; i < height; i++) {
01313 for (j=0; j < width; j++) {
01314 dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
01315 }
01316 src += stride;
01317 dst += stride;
01318 }
01319 }
01320
01321 static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01322 int i,j;
01323 for (i=0; i < height; i++) {
01324 for (j=0; j < width; j++) {
01325 dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
01326 }
01327 src += stride;
01328 dst += stride;
01329 }
01330 }
01331
01332 static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01333 int i,j;
01334 for (i=0; i < height; i++) {
01335 for (j=0; j < width; j++) {
01336 dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
01337 }
01338 src += stride;
01339 dst += stride;
01340 }
01341 }
01342
01343 static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01344 int i,j;
01345 for (i=0; i < height; i++) {
01346 for (j=0; j < width; j++) {
01347 dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
01348 }
01349 src += stride;
01350 dst += stride;
01351 }
01352 }
01353
01354 static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01355 int i,j;
01356 for (i=0; i < height; i++) {
01357 for (j=0; j < width; j++) {
01358 dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
01359 }
01360 src += stride;
01361 dst += stride;
01362 }
01363 }
01364
01365 static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01366 int i,j;
01367 for (i=0; i < height; i++) {
01368 for (j=0; j < width; j++) {
01369 dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
01370 }
01371 src += stride;
01372 dst += stride;
01373 }
01374 }
01375
01376 static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01377 switch(width){
01378 case 2: avg_pixels2_c (dst, src, stride, height); break;
01379 case 4: avg_pixels4_c (dst, src, stride, height); break;
01380 case 8: avg_pixels8_c (dst, src, stride, height); break;
01381 case 16:avg_pixels16_c(dst, src, stride, height); break;
01382 }
01383 }
01384
01385 static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01386 int i,j;
01387 for (i=0; i < height; i++) {
01388 for (j=0; j < width; j++) {
01389 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
01390 }
01391 src += stride;
01392 dst += stride;
01393 }
01394 }
01395
01396 static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01397 int i,j;
01398 for (i=0; i < height; i++) {
01399 for (j=0; j < width; j++) {
01400 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
01401 }
01402 src += stride;
01403 dst += stride;
01404 }
01405 }
01406
01407 static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01408 int i,j;
01409 for (i=0; i < height; i++) {
01410 for (j=0; j < width; j++) {
01411 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
01412 }
01413 src += stride;
01414 dst += stride;
01415 }
01416 }
01417
01418 static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01419 int i,j;
01420 for (i=0; i < height; i++) {
01421 for (j=0; j < width; j++) {
01422 dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
01423 }
01424 src += stride;
01425 dst += stride;
01426 }
01427 }
01428
01429 static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01430 int i,j;
01431 for (i=0; i < height; i++) {
01432 for (j=0; j < width; j++) {
01433 dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
01434 }
01435 src += stride;
01436 dst += stride;
01437 }
01438 }
01439
01440 static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01441 int i,j;
01442 for (i=0; i < height; i++) {
01443 for (j=0; j < width; j++) {
01444 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
01445 }
01446 src += stride;
01447 dst += stride;
01448 }
01449 }
01450
01451 static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01452 int i,j;
01453 for (i=0; i < height; i++) {
01454 for (j=0; j < width; j++) {
01455 dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
01456 }
01457 src += stride;
01458 dst += stride;
01459 }
01460 }
01461
01462 static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
01463 int i,j;
01464 for (i=0; i < height; i++) {
01465 for (j=0; j < width; j++) {
01466 dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
01467 }
01468 src += stride;
01469 dst += stride;
01470 }
01471 }
01472 #if 0
01473 #define TPEL_WIDTH(width)\
01474 static void put_tpel_pixels ## width ## _mc00_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01475 void put_tpel_pixels_mc00_c(dst, src, stride, width, height);}\
01476 static void put_tpel_pixels ## width ## _mc10_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01477 void put_tpel_pixels_mc10_c(dst, src, stride, width, height);}\
01478 static void put_tpel_pixels ## width ## _mc20_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01479 void put_tpel_pixels_mc20_c(dst, src, stride, width, height);}\
01480 static void put_tpel_pixels ## width ## _mc01_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01481 void put_tpel_pixels_mc01_c(dst, src, stride, width, height);}\
01482 static void put_tpel_pixels ## width ## _mc11_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01483 void put_tpel_pixels_mc11_c(dst, src, stride, width, height);}\
01484 static void put_tpel_pixels ## width ## _mc21_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01485 void put_tpel_pixels_mc21_c(dst, src, stride, width, height);}\
01486 static void put_tpel_pixels ## width ## _mc02_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01487 void put_tpel_pixels_mc02_c(dst, src, stride, width, height);}\
01488 static void put_tpel_pixels ## width ## _mc12_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01489 void put_tpel_pixels_mc12_c(dst, src, stride, width, height);}\
01490 static void put_tpel_pixels ## width ## _mc22_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
01491 void put_tpel_pixels_mc22_c(dst, src, stride, width, height);}
01492 #endif
01493
01494 #define H264_CHROMA_MC(OPNAME, OP)\
01495 static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y){\
01496 const int A=(8-x)*(8-y);\
01497 const int B=( x)*(8-y);\
01498 const int C=(8-x)*( y);\
01499 const int D=( x)*( y);\
01500 int i;\
01501 \
01502 assert(x<8 && y<8 && x>=0 && y>=0);\
01503 \
01504 if(D){\
01505 for(i=0; i<h; i++){\
01506 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
01507 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
01508 dst+= stride;\
01509 src+= stride;\
01510 }\
01511 }else{\
01512 const int E= B+C;\
01513 const int step= C ? stride : 1;\
01514 for(i=0; i<h; i++){\
01515 OP(dst[0], (A*src[0] + E*src[step+0]));\
01516 OP(dst[1], (A*src[1] + E*src[step+1]));\
01517 dst+= stride;\
01518 src+= stride;\
01519 }\
01520 }\
01521 }\
01522 \
01523 static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y){\
01524 const int A=(8-x)*(8-y);\
01525 const int B=( x)*(8-y);\
01526 const int C=(8-x)*( y);\
01527 const int D=( x)*( y);\
01528 int i;\
01529 \
01530 assert(x<8 && y<8 && x>=0 && y>=0);\
01531 \
01532 if(D){\
01533 for(i=0; i<h; i++){\
01534 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
01535 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
01536 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
01537 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
01538 dst+= stride;\
01539 src+= stride;\
01540 }\
01541 }else{\
01542 const int E= B+C;\
01543 const int step= C ? stride : 1;\
01544 for(i=0; i<h; i++){\
01545 OP(dst[0], (A*src[0] + E*src[step+0]));\
01546 OP(dst[1], (A*src[1] + E*src[step+1]));\
01547 OP(dst[2], (A*src[2] + E*src[step+2]));\
01548 OP(dst[3], (A*src[3] + E*src[step+3]));\
01549 dst+= stride;\
01550 src+= stride;\
01551 }\
01552 }\
01553 }\
01554 \
01555 static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y){\
01556 const int A=(8-x)*(8-y);\
01557 const int B=( x)*(8-y);\
01558 const int C=(8-x)*( y);\
01559 const int D=( x)*( y);\
01560 int i;\
01561 \
01562 assert(x<8 && y<8 && x>=0 && y>=0);\
01563 \
01564 if(D){\
01565 for(i=0; i<h; i++){\
01566 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
01567 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
01568 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
01569 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
01570 OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\
01571 OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\
01572 OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\
01573 OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\
01574 dst+= stride;\
01575 src+= stride;\
01576 }\
01577 }else{\
01578 const int E= B+C;\
01579 const int step= C ? stride : 1;\
01580 for(i=0; i<h; i++){\
01581 OP(dst[0], (A*src[0] + E*src[step+0]));\
01582 OP(dst[1], (A*src[1] + E*src[step+1]));\
01583 OP(dst[2], (A*src[2] + E*src[step+2]));\
01584 OP(dst[3], (A*src[3] + E*src[step+3]));\
01585 OP(dst[4], (A*src[4] + E*src[step+4]));\
01586 OP(dst[5], (A*src[5] + E*src[step+5]));\
01587 OP(dst[6], (A*src[6] + E*src[step+6]));\
01588 OP(dst[7], (A*src[7] + E*src[step+7]));\
01589 dst+= stride;\
01590 src+= stride;\
01591 }\
01592 }\
01593 }
01594
01595 #define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1)
01596 #define op_put(a, b) a = (((b) + 32)>>6)
01597
01598 H264_CHROMA_MC(put_ , op_put)
01599 H264_CHROMA_MC(avg_ , op_avg)
01600 #undef op_avg
01601 #undef op_put
01602
01603 #define QPEL_MC(r, OPNAME, RND, OP) \
01604 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
01605 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
01606 int i;\
01607 for(i=0; i<h; i++)\
01608 {\
01609 OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
01610 OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
01611 OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
01612 OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
01613 OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
01614 OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
01615 OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
01616 OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
01617 dst+=dstStride;\
01618 src+=srcStride;\
01619 }\
01620 }\
01621 \
01622 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
01623 const int w=8;\
01624 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
01625 int i;\
01626 for(i=0; i<w; i++)\
01627 {\
01628 const int src0= src[0*srcStride];\
01629 const int src1= src[1*srcStride];\
01630 const int src2= src[2*srcStride];\
01631 const int src3= src[3*srcStride];\
01632 const int src4= src[4*srcStride];\
01633 const int src5= src[5*srcStride];\
01634 const int src6= src[6*srcStride];\
01635 const int src7= src[7*srcStride];\
01636 const int src8= src[8*srcStride];\
01637 OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
01638 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
01639 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
01640 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
01641 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
01642 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
01643 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
01644 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
01645 dst++;\
01646 src++;\
01647 }\
01648 }\
01649 \
01650 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
01651 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
01652 int i;\
01653 \
01654 for(i=0; i<h; i++)\
01655 {\
01656 OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
01657 OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
01658 OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
01659 OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
01660 OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
01661 OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
01662 OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
01663 OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
01664 OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
01665 OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
01666 OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
01667 OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
01668 OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
01669 OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
01670 OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
01671 OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
01672 dst+=dstStride;\
01673 src+=srcStride;\
01674 }\
01675 }\
01676 \
01677 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
01678 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
01679 int i;\
01680 const int w=16;\
01681 for(i=0; i<w; i++)\
01682 {\
01683 const int src0= src[0*srcStride];\
01684 const int src1= src[1*srcStride];\
01685 const int src2= src[2*srcStride];\
01686 const int src3= src[3*srcStride];\
01687 const int src4= src[4*srcStride];\
01688 const int src5= src[5*srcStride];\
01689 const int src6= src[6*srcStride];\
01690 const int src7= src[7*srcStride];\
01691 const int src8= src[8*srcStride];\
01692 const int src9= src[9*srcStride];\
01693 const int src10= src[10*srcStride];\
01694 const int src11= src[11*srcStride];\
01695 const int src12= src[12*srcStride];\
01696 const int src13= src[13*srcStride];\
01697 const int src14= src[14*srcStride];\
01698 const int src15= src[15*srcStride];\
01699 const int src16= src[16*srcStride];\
01700 OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
01701 OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
01702 OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
01703 OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
01704 OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
01705 OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
01706 OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
01707 OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
01708 OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
01709 OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
01710 OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
01711 OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
01712 OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
01713 OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
01714 OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
01715 OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
01716 dst++;\
01717 src++;\
01718 }\
01719 }\
01720 \
01721 static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
01722 uint8_t half[64];\
01723 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
01724 OPNAME ## pixels8_l2(dst, src, half, stride, stride, 8, 8);\
01725 }\
01726 \
01727 static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
01728 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
01729 }\
01730 \
01731 static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
01732 uint8_t half[64];\
01733 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
01734 OPNAME ## pixels8_l2(dst, src+1, half, stride, stride, 8, 8);\
01735 }\
01736 \
01737 static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
01738 uint8_t full[16*9];\
01739 uint8_t half[64];\
01740 copy_block9(full, src, 16, stride, 9);\
01741 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
01742 OPNAME ## pixels8_l2(dst, full, half, stride, 16, 8, 8);\
01743 }\
01744 \
01745 static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
01746 uint8_t full[16*9];\
01747 copy_block9(full, src, 16, stride, 9);\
01748 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
01749 }\
01750 \
01751 static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
01752 uint8_t full[16*9];\
01753 uint8_t half[64];\
01754 copy_block9(full, src, 16, stride, 9);\
01755 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
01756 OPNAME ## pixels8_l2(dst, full+16, half, stride, 16, 8, 8);\
01757 }\
01758 void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
01759 uint8_t full[16*9];\
01760 uint8_t halfH[72];\
01761 uint8_t halfV[64];\
01762 uint8_t halfHV[64];\
01763 copy_block9(full, src, 16, stride, 9);\
01764 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01765 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
01766 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01767 OPNAME ## pixels8_l4(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
01768 }\
01769 static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
01770 uint8_t full[16*9];\
01771 uint8_t halfH[72];\
01772 uint8_t halfHV[64];\
01773 copy_block9(full, src, 16, stride, 9);\
01774 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01775 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
01776 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01777 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
01778 }\
01779 void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
01780 uint8_t full[16*9];\
01781 uint8_t halfH[72];\
01782 uint8_t halfV[64];\
01783 uint8_t halfHV[64];\
01784 copy_block9(full, src, 16, stride, 9);\
01785 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01786 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
01787 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01788 OPNAME ## pixels8_l4(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
01789 }\
01790 static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
01791 uint8_t full[16*9];\
01792 uint8_t halfH[72];\
01793 uint8_t halfHV[64];\
01794 copy_block9(full, src, 16, stride, 9);\
01795 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01796 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
01797 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01798 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
01799 }\
01800 void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
01801 uint8_t full[16*9];\
01802 uint8_t halfH[72];\
01803 uint8_t halfV[64];\
01804 uint8_t halfHV[64];\
01805 copy_block9(full, src, 16, stride, 9);\
01806 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01807 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
01808 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01809 OPNAME ## pixels8_l4(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
01810 }\
01811 static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
01812 uint8_t full[16*9];\
01813 uint8_t halfH[72];\
01814 uint8_t halfHV[64];\
01815 copy_block9(full, src, 16, stride, 9);\
01816 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01817 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
01818 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01819 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
01820 }\
01821 void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
01822 uint8_t full[16*9];\
01823 uint8_t halfH[72];\
01824 uint8_t halfV[64];\
01825 uint8_t halfHV[64];\
01826 copy_block9(full, src, 16, stride, 9);\
01827 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
01828 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
01829 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01830 OPNAME ## pixels8_l4(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
01831 }\
01832 static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
01833 uint8_t full[16*9];\
01834 uint8_t halfH[72];\
01835 uint8_t halfHV[64];\
01836 copy_block9(full, src, 16, stride, 9);\
01837 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01838 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
01839 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01840 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
01841 }\
01842 static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
01843 uint8_t halfH[72];\
01844 uint8_t halfHV[64];\
01845 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
01846 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01847 OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
01848 }\
01849 static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
01850 uint8_t halfH[72];\
01851 uint8_t halfHV[64];\
01852 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
01853 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01854 OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
01855 }\
01856 void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
01857 uint8_t full[16*9];\
01858 uint8_t halfH[72];\
01859 uint8_t halfV[64];\
01860 uint8_t halfHV[64];\
01861 copy_block9(full, src, 16, stride, 9);\
01862 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01863 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
01864 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01865 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
01866 }\
01867 static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
01868 uint8_t full[16*9];\
01869 uint8_t halfH[72];\
01870 copy_block9(full, src, 16, stride, 9);\
01871 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01872 put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
01873 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
01874 }\
01875 void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
01876 uint8_t full[16*9];\
01877 uint8_t halfH[72];\
01878 uint8_t halfV[64];\
01879 uint8_t halfHV[64];\
01880 copy_block9(full, src, 16, stride, 9);\
01881 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01882 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
01883 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01884 OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
01885 }\
01886 static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
01887 uint8_t full[16*9];\
01888 uint8_t halfH[72];\
01889 copy_block9(full, src, 16, stride, 9);\
01890 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01891 put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
01892 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
01893 }\
01894 static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
01895 uint8_t halfH[72];\
01896 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
01897 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
01898 }\
01899 \
01900 static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
01901 uint8_t half[256];\
01902 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
01903 OPNAME ## pixels16_l2(dst, src, half, stride, stride, 16, 16);\
01904 }\
01905 \
01906 static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
01907 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
01908 }\
01909 \
01910 static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
01911 uint8_t half[256];\
01912 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
01913 OPNAME ## pixels16_l2(dst, src+1, half, stride, stride, 16, 16);\
01914 }\
01915 \
01916 static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
01917 uint8_t full[24*17];\
01918 uint8_t half[256];\
01919 copy_block17(full, src, 24, stride, 17);\
01920 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
01921 OPNAME ## pixels16_l2(dst, full, half, stride, 24, 16, 16);\
01922 }\
01923 \
01924 static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
01925 uint8_t full[24*17];\
01926 copy_block17(full, src, 24, stride, 17);\
01927 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
01928 }\
01929 \
01930 static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
01931 uint8_t full[24*17];\
01932 uint8_t half[256];\
01933 copy_block17(full, src, 24, stride, 17);\
01934 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
01935 OPNAME ## pixels16_l2(dst, full+24, half, stride, 24, 16, 16);\
01936 }\
01937 void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
01938 uint8_t full[24*17];\
01939 uint8_t halfH[272];\
01940 uint8_t halfV[256];\
01941 uint8_t halfHV[256];\
01942 copy_block17(full, src, 24, stride, 17);\
01943 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01944 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
01945 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01946 OPNAME ## pixels16_l4(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01947 }\
01948 static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
01949 uint8_t full[24*17];\
01950 uint8_t halfH[272];\
01951 uint8_t halfHV[256];\
01952 copy_block17(full, src, 24, stride, 17);\
01953 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01954 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
01955 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01956 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
01957 }\
01958 void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
01959 uint8_t full[24*17];\
01960 uint8_t halfH[272];\
01961 uint8_t halfV[256];\
01962 uint8_t halfHV[256];\
01963 copy_block17(full, src, 24, stride, 17);\
01964 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01965 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
01966 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01967 OPNAME ## pixels16_l4(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01968 }\
01969 static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
01970 uint8_t full[24*17];\
01971 uint8_t halfH[272];\
01972 uint8_t halfHV[256];\
01973 copy_block17(full, src, 24, stride, 17);\
01974 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01975 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
01976 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01977 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
01978 }\
01979 void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
01980 uint8_t full[24*17];\
01981 uint8_t halfH[272];\
01982 uint8_t halfV[256];\
01983 uint8_t halfHV[256];\
01984 copy_block17(full, src, 24, stride, 17);\
01985 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01986 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
01987 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01988 OPNAME ## pixels16_l4(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01989 }\
01990 static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
01991 uint8_t full[24*17];\
01992 uint8_t halfH[272];\
01993 uint8_t halfHV[256];\
01994 copy_block17(full, src, 24, stride, 17);\
01995 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01996 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
01997 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01998 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
01999 }\
02000 void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
02001 uint8_t full[24*17];\
02002 uint8_t halfH[272];\
02003 uint8_t halfV[256];\
02004 uint8_t halfHV[256];\
02005 copy_block17(full, src, 24, stride, 17);\
02006 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
02007 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
02008 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
02009 OPNAME ## pixels16_l4(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
02010 }\
02011 static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
02012 uint8_t full[24*17];\
02013 uint8_t halfH[272];\
02014 uint8_t halfHV[256];\
02015 copy_block17(full, src, 24, stride, 17);\
02016 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
02017 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
02018 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
02019 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
02020 }\
02021 static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
02022 uint8_t halfH[272];\
02023 uint8_t halfHV[256];\
02024 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
02025 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
02026 OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
02027 }\
02028 static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
02029 uint8_t halfH[272];\
02030 uint8_t halfHV[256];\
02031 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
02032 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
02033 OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
02034 }\
02035 void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
02036 uint8_t full[24*17];\
02037 uint8_t halfH[272];\
02038 uint8_t halfV[256];\
02039 uint8_t halfHV[256];\
02040 copy_block17(full, src, 24, stride, 17);\
02041 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
02042 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
02043 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
02044 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
02045 }\
02046 static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
02047 uint8_t full[24*17];\
02048 uint8_t halfH[272];\
02049 copy_block17(full, src, 24, stride, 17);\
02050 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
02051 put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
02052 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
02053 }\
02054 void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
02055 uint8_t full[24*17];\
02056 uint8_t halfH[272];\
02057 uint8_t halfV[256];\
02058 uint8_t halfHV[256];\
02059 copy_block17(full, src, 24, stride, 17);\
02060 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
02061 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
02062 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
02063 OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
02064 }\
02065 static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
02066 uint8_t full[24*17];\
02067 uint8_t halfH[272];\
02068 copy_block17(full, src, 24, stride, 17);\
02069 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
02070 put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
02071 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
02072 }\
02073 static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
02074 uint8_t halfH[272];\
02075 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
02076 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
02077 }
02078
02079 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
02080 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
02081 #define op_put(a, b) a = cm[((b) + 16)>>5]
02082 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
02083
02084 QPEL_MC(0, put_ , _ , op_put)
02085 QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
02086 QPEL_MC(0, avg_ , _ , op_avg)
02087
02088 #undef op_avg
02089 #undef op_avg_no_rnd
02090 #undef op_put
02091 #undef op_put_no_rnd
02092
02093 #define put_qpel8_mc00_c ff_put_pixels8x8_c
02094 #define avg_qpel8_mc00_c ff_avg_pixels8x8_c
02095 #define put_qpel16_mc00_c ff_put_pixels16x16_c
02096 #define avg_qpel16_mc00_c ff_avg_pixels16x16_c
02097 #define put_no_rnd_qpel8_mc00_c ff_put_pixels8x8_c
02098 #define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_c
02099
02100 #if 1
02101 #define H264_LOWPASS(OPNAME, OP, OP2) \
02102 static av_unused void OPNAME ## h264_qpel2_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
02103 const int h=2;\
02104 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
02105 int i;\
02106 for(i=0; i<h; i++)\
02107 {\
02108 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
02109 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
02110 dst+=dstStride;\
02111 src+=srcStride;\
02112 }\
02113 }\
02114 \
02115 static av_unused void OPNAME ## h264_qpel2_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
02116 const int w=2;\
02117 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
02118 int i;\
02119 for(i=0; i<w; i++)\
02120 {\
02121 const int srcB= src[-2*srcStride];\
02122 const int srcA= src[-1*srcStride];\
02123 const int src0= src[0 *srcStride];\
02124 const int src1= src[1 *srcStride];\
02125 const int src2= src[2 *srcStride];\
02126 const int src3= src[3 *srcStride];\
02127 const int src4= src[4 *srcStride];\
02128 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
02129 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
02130 dst++;\
02131 src++;\
02132 }\
02133 }\
02134 \
02135 static av_unused void OPNAME ## h264_qpel2_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
02136 const int h=2;\
02137 const int w=2;\
02138 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
02139 int i;\
02140 src -= 2*srcStride;\
02141 for(i=0; i<h+5; i++)\
02142 {\
02143 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
02144 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
02145 tmp+=tmpStride;\
02146 src+=srcStride;\
02147 }\
02148 tmp -= tmpStride*(h+5-2);\
02149 for(i=0; i<w; i++)\
02150 {\
02151 const int tmpB= tmp[-2*tmpStride];\
02152 const int tmpA= tmp[-1*tmpStride];\
02153 const int tmp0= tmp[0 *tmpStride];\
02154 const int tmp1= tmp[1 *tmpStride];\
02155 const int tmp2= tmp[2 *tmpStride];\
02156 const int tmp3= tmp[3 *tmpStride];\
02157 const int tmp4= tmp[4 *tmpStride];\
02158 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
02159 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
02160 dst++;\
02161 tmp++;\
02162 }\
02163 }\
02164 static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
02165 const int h=4;\
02166 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
02167 int i;\
02168 for(i=0; i<h; i++)\
02169 {\
02170 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
02171 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
02172 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\
02173 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\
02174 dst+=dstStride;\
02175 src+=srcStride;\
02176 }\
02177 }\
02178 \
02179 static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
02180 const int w=4;\
02181 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
02182 int i;\
02183 for(i=0; i<w; i++)\
02184 {\
02185 const int srcB= src[-2*srcStride];\
02186 const int srcA= src[-1*srcStride];\
02187 const int src0= src[0 *srcStride];\
02188 const int src1= src[1 *srcStride];\
02189 const int src2= src[2 *srcStride];\
02190 const int src3= src[3 *srcStride];\
02191 const int src4= src[4 *srcStride];\
02192 const int src5= src[5 *srcStride];\
02193 const int src6= src[6 *srcStride];\
02194 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
02195 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
02196 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
02197 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
02198 dst++;\
02199 src++;\
02200 }\
02201 }\
02202 \
02203 static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
02204 const int h=4;\
02205 const int w=4;\
02206 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
02207 int i;\
02208 src -= 2*srcStride;\
02209 for(i=0; i<h+5; i++)\
02210 {\
02211 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
02212 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
02213 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]);\
02214 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]);\
02215 tmp+=tmpStride;\
02216 src+=srcStride;\
02217 }\
02218 tmp -= tmpStride*(h+5-2);\
02219 for(i=0; i<w; i++)\
02220 {\
02221 const int tmpB= tmp[-2*tmpStride];\
02222 const int tmpA= tmp[-1*tmpStride];\
02223 const int tmp0= tmp[0 *tmpStride];\
02224 const int tmp1= tmp[1 *tmpStride];\
02225 const int tmp2= tmp[2 *tmpStride];\
02226 const int tmp3= tmp[3 *tmpStride];\
02227 const int tmp4= tmp[4 *tmpStride];\
02228 const int tmp5= tmp[5 *tmpStride];\
02229 const int tmp6= tmp[6 *tmpStride];\
02230 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
02231 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
02232 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
02233 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
02234 dst++;\
02235 tmp++;\
02236 }\
02237 }\
02238 \
02239 static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
02240 const int h=8;\
02241 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
02242 int i;\
02243 for(i=0; i<h; i++)\
02244 {\
02245 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\
02246 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]));\
02247 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]));\
02248 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]));\
02249 OP(dst[4], (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]));\
02250 OP(dst[5], (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]));\
02251 OP(dst[6], (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]));\
02252 OP(dst[7], (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]));\
02253 dst+=dstStride;\
02254 src+=srcStride;\
02255 }\
02256 }\
02257 \
02258 static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
02259 const int w=8;\
02260 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
02261 int i;\
02262 for(i=0; i<w; i++)\
02263 {\
02264 const int srcB= src[-2*srcStride];\
02265 const int srcA= src[-1*srcStride];\
02266 const int src0= src[0 *srcStride];\
02267 const int src1= src[1 *srcStride];\
02268 const int src2= src[2 *srcStride];\
02269 const int src3= src[3 *srcStride];\
02270 const int src4= src[4 *srcStride];\
02271 const int src5= src[5 *srcStride];\
02272 const int src6= src[6 *srcStride];\
02273 const int src7= src[7 *srcStride];\
02274 const int src8= src[8 *srcStride];\
02275 const int src9= src[9 *srcStride];\
02276 const int src10=src[10*srcStride];\
02277 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
02278 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
02279 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
02280 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
02281 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\
02282 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\
02283 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\
02284 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\
02285 dst++;\
02286 src++;\
02287 }\
02288 }\
02289 \
02290 static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
02291 const int h=8;\
02292 const int w=8;\
02293 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
02294 int i;\
02295 src -= 2*srcStride;\
02296 for(i=0; i<h+5; i++)\
02297 {\
02298 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]);\
02299 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]);\
02300 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]);\
02301 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]);\
02302 tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]);\
02303 tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]);\
02304 tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]);\
02305 tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]);\
02306 tmp+=tmpStride;\
02307 src+=srcStride;\
02308 }\
02309 tmp -= tmpStride*(h+5-2);\
02310 for(i=0; i<w; i++)\
02311 {\
02312 const int tmpB= tmp[-2*tmpStride];\
02313 const int tmpA= tmp[-1*tmpStride];\
02314 const int tmp0= tmp[0 *tmpStride];\
02315 const int tmp1= tmp[1 *tmpStride];\
02316 const int tmp2= tmp[2 *tmpStride];\
02317 const int tmp3= tmp[3 *tmpStride];\
02318 const int tmp4= tmp[4 *tmpStride];\
02319 const int tmp5= tmp[5 *tmpStride];\
02320 const int tmp6= tmp[6 *tmpStride];\
02321 const int tmp7= tmp[7 *tmpStride];\
02322 const int tmp8= tmp[8 *tmpStride];\
02323 const int tmp9= tmp[9 *tmpStride];\
02324 const int tmp10=tmp[10*tmpStride];\
02325 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
02326 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
02327 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
02328 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
02329 OP2(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));\
02330 OP2(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));\
02331 OP2(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));\
02332 OP2(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));\
02333 dst++;\
02334 tmp++;\
02335 }\
02336 }\
02337 \
02338 static void OPNAME ## h264_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
02339 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\
02340 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
02341 src += 8*srcStride;\
02342 dst += 8*dstStride;\
02343 OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\
02344 OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
02345 }\
02346 \
02347 static void OPNAME ## h264_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
02348 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\
02349 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
02350 src += 8*srcStride;\
02351 dst += 8*dstStride;\
02352 OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\
02353 OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
02354 }\
02355 \
02356 static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
02357 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\
02358 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
02359 src += 8*srcStride;\
02360 dst += 8*dstStride;\
02361 OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\
02362 OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
02363 }\
02364
02365 #define H264_MC(OPNAME, SIZE) \
02366 static av_unused void OPNAME ## h264_qpel ## SIZE ## _mc00_c (uint8_t *dst, uint8_t *src, int stride){\
02367 OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\
02368 }\
02369 \
02370 static void OPNAME ## h264_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\
02371 uint8_t half[SIZE*SIZE];\
02372 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
02373 OPNAME ## pixels ## SIZE ## _l2(dst, src, half, stride, stride, SIZE, SIZE);\
02374 }\
02375 \
02376 static void OPNAME ## h264_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\
02377 OPNAME ## h264_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride);\
02378 }\
02379 \
02380 static void OPNAME ## h264_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\
02381 uint8_t half[SIZE*SIZE];\
02382 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
02383 OPNAME ## pixels ## SIZE ## _l2(dst, src+1, half, stride, stride, SIZE, SIZE);\
02384 }\
02385 \
02386 static void OPNAME ## h264_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\
02387 uint8_t full[SIZE*(SIZE+5)];\
02388 uint8_t * const full_mid= full + SIZE*2;\
02389 uint8_t half[SIZE*SIZE];\
02390 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
02391 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
02392 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\
02393 }\
02394 \
02395 static void OPNAME ## h264_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\
02396 uint8_t full[SIZE*(SIZE+5)];\
02397 uint8_t * const full_mid= full + SIZE*2;\
02398 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
02399 OPNAME ## h264_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE);\
02400 }\
02401 \
02402 static void OPNAME ## h264_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\
02403 uint8_t full[SIZE*(SIZE+5)];\
02404 uint8_t * const full_mid= full + SIZE*2;\
02405 uint8_t half[SIZE*SIZE];\
02406 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
02407 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
02408 OPNAME ## pixels ## SIZE ## _l2(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\
02409 }\
02410 \
02411 static void OPNAME ## h264_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\
02412 uint8_t full[SIZE*(SIZE+5)];\
02413 uint8_t * const full_mid= full + SIZE*2;\
02414 uint8_t halfH[SIZE*SIZE];\
02415 uint8_t halfV[SIZE*SIZE];\
02416 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
02417 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
02418 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
02419 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
02420 }\
02421 \
02422 static void OPNAME ## h264_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\
02423 uint8_t full[SIZE*(SIZE+5)];\
02424 uint8_t * const full_mid= full + SIZE*2;\
02425 uint8_t halfH[SIZE*SIZE];\
02426 uint8_t halfV[SIZE*SIZE];\
02427 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
02428 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
02429 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
02430 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
02431 }\
02432 \
02433 static void OPNAME ## h264_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\
02434 uint8_t full[SIZE*(SIZE+5)];\
02435 uint8_t * const full_mid= full + SIZE*2;\
02436 uint8_t halfH[SIZE*SIZE];\
02437 uint8_t halfV[SIZE*SIZE];\
02438 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
02439 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
02440 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
02441 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
02442 }\
02443 \
02444 static void OPNAME ## h264_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, int stride){\
02445 uint8_t full[SIZE*(SIZE+5)];\
02446 uint8_t * const full_mid= full + SIZE*2;\
02447 uint8_t halfH[SIZE*SIZE];\
02448 uint8_t halfV[SIZE*SIZE];\
02449 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
02450 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
02451 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
02452 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
02453 }\
02454 \
02455 static void OPNAME ## h264_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\
02456 int16_t tmp[SIZE*(SIZE+5)];\
02457 OPNAME ## h264_qpel ## SIZE ## _hv_lowpass(dst, tmp, src, stride, SIZE, stride);\
02458 }\
02459 \
02460 static void OPNAME ## h264_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\
02461 int16_t tmp[SIZE*(SIZE+5)];\
02462 uint8_t halfH[SIZE*SIZE];\
02463 uint8_t halfHV[SIZE*SIZE];\
02464 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
02465 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
02466 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
02467 }\
02468 \
02469 static void OPNAME ## h264_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\
02470 int16_t tmp[SIZE*(SIZE+5)];\
02471 uint8_t halfH[SIZE*SIZE];\
02472 uint8_t halfHV[SIZE*SIZE];\
02473 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
02474 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
02475 OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
02476 }\
02477 \
02478 static void OPNAME ## h264_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\
02479 uint8_t full[SIZE*(SIZE+5)];\
02480 uint8_t * const full_mid= full + SIZE*2;\
02481 int16_t tmp[SIZE*(SIZE+5)];\
02482 uint8_t halfV[SIZE*SIZE];\
02483 uint8_t halfHV[SIZE*SIZE];\
02484 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
02485 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
02486 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
02487 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
02488 }\
02489 \
02490 static void OPNAME ## h264_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\
02491 uint8_t full[SIZE*(SIZE+5)];\
02492 uint8_t * const full_mid= full + SIZE*2;\
02493 int16_t tmp[SIZE*(SIZE+5)];\
02494 uint8_t halfV[SIZE*SIZE];\
02495 uint8_t halfHV[SIZE*SIZE];\
02496 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
02497 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
02498 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
02499 OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
02500 }\
02501
02502 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
02503
02504 #define op_put(a, b) a = cm[((b) + 16)>>5]
02505 #define op2_avg(a, b) a = (((a)+cm[((b) + 512)>>10]+1)>>1)
02506 #define op2_put(a, b) a = cm[((b) + 512)>>10]
02507
02508 H264_LOWPASS(put_ , op_put, op2_put)
02509 H264_LOWPASS(avg_ , op_avg, op2_avg)
02510 H264_MC(put_, 2)
02511 H264_MC(put_, 4)
02512 H264_MC(put_, 8)
02513 H264_MC(put_, 16)
02514 H264_MC(avg_, 4)
02515 H264_MC(avg_, 8)
02516 H264_MC(avg_, 16)
02517
02518 #undef op_avg
02519 #undef op_put
02520 #undef op2_avg
02521 #undef op2_put
02522 #endif
02523
02524 #define put_h264_qpel8_mc00_c ff_put_pixels8x8_c
02525 #define avg_h264_qpel8_mc00_c ff_avg_pixels8x8_c
02526 #define put_h264_qpel16_mc00_c ff_put_pixels16x16_c
02527 #define avg_h264_qpel16_mc00_c ff_avg_pixels16x16_c
02528
02529 static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
02530 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
02531 int i;
02532
02533 for(i=0; i<h; i++){
02534 dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
02535 dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
02536 dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
02537 dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
02538 dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
02539 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
02540 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
02541 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
02542 dst+=dstStride;
02543 src+=srcStride;
02544 }
02545 }
02546
02547 void ff_put_pixels8x8_c(uint8_t *dst, uint8_t *src, int stride) {
02548 put_pixels8_c(dst, src, stride, 8);
02549 }
02550 void ff_avg_pixels8x8_c(uint8_t *dst, uint8_t *src, int stride) {
02551 avg_pixels8_c(dst, src, stride, 8);
02552 }
02553 void ff_put_pixels16x16_c(uint8_t *dst, uint8_t *src, int stride) {
02554 put_pixels16_c(dst, src, stride, 16);
02555 }
02556 void ff_avg_pixels16x16_c(uint8_t *dst, uint8_t *src, int stride) {
02557 avg_pixels16_c(dst, src, stride, 16);
02558 }
02559
02560 #if CONFIG_RV40_DECODER
02561 static void put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
02562 put_pixels16_xy2_c(dst, src, stride, 16);
02563 }
02564 static void avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
02565 avg_pixels16_xy2_c(dst, src, stride, 16);
02566 }
02567 static void put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
02568 put_pixels8_xy2_c(dst, src, stride, 8);
02569 }
02570 static void avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
02571 avg_pixels8_xy2_c(dst, src, stride, 8);
02572 }
02573 #endif
02574
02575 static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
02576 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
02577 int i;
02578
02579 for(i=0; i<w; i++){
02580 const int src_1= src[ -srcStride];
02581 const int src0 = src[0 ];
02582 const int src1 = src[ srcStride];
02583 const int src2 = src[2*srcStride];
02584 const int src3 = src[3*srcStride];
02585 const int src4 = src[4*srcStride];
02586 const int src5 = src[5*srcStride];
02587 const int src6 = src[6*srcStride];
02588 const int src7 = src[7*srcStride];
02589 const int src8 = src[8*srcStride];
02590 const int src9 = src[9*srcStride];
02591 dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
02592 dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4];
02593 dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4];
02594 dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4];
02595 dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4];
02596 dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4];
02597 dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4];
02598 dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4];
02599 src++;
02600 dst++;
02601 }
02602 }
02603
02604 static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
02605 uint8_t half[64];
02606 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
02607 put_pixels8_l2(dst, src, half, stride, stride, 8, 8);
02608 }
02609
02610 static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
02611 wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
02612 }
02613
02614 static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
02615 uint8_t half[64];
02616 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
02617 put_pixels8_l2(dst, src+1, half, stride, stride, 8, 8);
02618 }
02619
02620 static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
02621 wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
02622 }
02623
02624 static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
02625 uint8_t halfH[88];
02626 uint8_t halfV[64];
02627 uint8_t halfHV[64];
02628 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
02629 wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
02630 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
02631 put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
02632 }
02633 static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
02634 uint8_t halfH[88];
02635 uint8_t halfV[64];
02636 uint8_t halfHV[64];
02637 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
02638 wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
02639 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
02640 put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
02641 }
02642 static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
02643 uint8_t halfH[88];
02644 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
02645 wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
02646 }
02647
02648 static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){
02649 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
02650 int x;
02651 const int strength= ff_h263_loop_filter_strength[qscale];
02652
02653 for(x=0; x<8; x++){
02654 int d1, d2, ad1;
02655 int p0= src[x-2*stride];
02656 int p1= src[x-1*stride];
02657 int p2= src[x+0*stride];
02658 int p3= src[x+1*stride];
02659 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
02660
02661 if (d<-2*strength) d1= 0;
02662 else if(d<- strength) d1=-2*strength - d;
02663 else if(d< strength) d1= d;
02664 else if(d< 2*strength) d1= 2*strength - d;
02665 else d1= 0;
02666
02667 p1 += d1;
02668 p2 -= d1;
02669 if(p1&256) p1= ~(p1>>31);
02670 if(p2&256) p2= ~(p2>>31);
02671
02672 src[x-1*stride] = p1;
02673 src[x+0*stride] = p2;
02674
02675 ad1= FFABS(d1)>>1;
02676
02677 d2= av_clip((p0-p3)/4, -ad1, ad1);
02678
02679 src[x-2*stride] = p0 - d2;
02680 src[x+ stride] = p3 + d2;
02681 }
02682 }
02683 }
02684
02685 static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){
02686 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
02687 int y;
02688 const int strength= ff_h263_loop_filter_strength[qscale];
02689
02690 for(y=0; y<8; y++){
02691 int d1, d2, ad1;
02692 int p0= src[y*stride-2];
02693 int p1= src[y*stride-1];
02694 int p2= src[y*stride+0];
02695 int p3= src[y*stride+1];
02696 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
02697
02698 if (d<-2*strength) d1= 0;
02699 else if(d<- strength) d1=-2*strength - d;
02700 else if(d< strength) d1= d;
02701 else if(d< 2*strength) d1= 2*strength - d;
02702 else d1= 0;
02703
02704 p1 += d1;
02705 p2 -= d1;
02706 if(p1&256) p1= ~(p1>>31);
02707 if(p2&256) p2= ~(p2>>31);
02708
02709 src[y*stride-1] = p1;
02710 src[y*stride+0] = p2;
02711
02712 ad1= FFABS(d1)>>1;
02713
02714 d2= av_clip((p0-p3)/4, -ad1, ad1);
02715
02716 src[y*stride-2] = p0 - d2;
02717 src[y*stride+1] = p3 + d2;
02718 }
02719 }
02720 }
02721
02722 static void h261_loop_filter_c(uint8_t *src, int stride){
02723 int x,y,xy,yz;
02724 int temp[64];
02725
02726 for(x=0; x<8; x++){
02727 temp[x ] = 4*src[x ];
02728 temp[x + 7*8] = 4*src[x + 7*stride];
02729 }
02730 for(y=1; y<7; y++){
02731 for(x=0; x<8; x++){
02732 xy = y * stride + x;
02733 yz = y * 8 + x;
02734 temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride];
02735 }
02736 }
02737
02738 for(y=0; y<8; y++){
02739 src[ y*stride] = (temp[ y*8] + 2)>>2;
02740 src[7+y*stride] = (temp[7+y*8] + 2)>>2;
02741 for(x=1; x<7; x++){
02742 xy = y * stride + x;
02743 yz = y * 8 + x;
02744 src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4;
02745 }
02746 }
02747 }
02748
02749 static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
02750 {
02751 int s, i;
02752
02753 s = 0;
02754 for(i=0;i<h;i++) {
02755 s += abs(pix1[0] - pix2[0]);
02756 s += abs(pix1[1] - pix2[1]);
02757 s += abs(pix1[2] - pix2[2]);
02758 s += abs(pix1[3] - pix2[3]);
02759 s += abs(pix1[4] - pix2[4]);
02760 s += abs(pix1[5] - pix2[5]);
02761 s += abs(pix1[6] - pix2[6]);
02762 s += abs(pix1[7] - pix2[7]);
02763 s += abs(pix1[8] - pix2[8]);
02764 s += abs(pix1[9] - pix2[9]);
02765 s += abs(pix1[10] - pix2[10]);
02766 s += abs(pix1[11] - pix2[11]);
02767 s += abs(pix1[12] - pix2[12]);
02768 s += abs(pix1[13] - pix2[13]);
02769 s += abs(pix1[14] - pix2[14]);
02770 s += abs(pix1[15] - pix2[15]);
02771 pix1 += line_size;
02772 pix2 += line_size;
02773 }
02774 return s;
02775 }
02776
02777 static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
02778 {
02779 int s, i;
02780
02781 s = 0;
02782 for(i=0;i<h;i++) {
02783 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
02784 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
02785 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
02786 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
02787 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
02788 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
02789 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
02790 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
02791 s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
02792 s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
02793 s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
02794 s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
02795 s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
02796 s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
02797 s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
02798 s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
02799 pix1 += line_size;
02800 pix2 += line_size;
02801 }
02802 return s;
02803 }
02804
02805 static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
02806 {
02807 int s, i;
02808 uint8_t *pix3 = pix2 + line_size;
02809
02810 s = 0;
02811 for(i=0;i<h;i++) {
02812 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
02813 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
02814 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
02815 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
02816 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
02817 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
02818 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
02819 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
02820 s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
02821 s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
02822 s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
02823 s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
02824 s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
02825 s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
02826 s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
02827 s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
02828 pix1 += line_size;
02829 pix2 += line_size;
02830 pix3 += line_size;
02831 }
02832 return s;
02833 }
02834
02835 static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
02836 {
02837 int s, i;
02838 uint8_t *pix3 = pix2 + line_size;
02839
02840 s = 0;
02841 for(i=0;i<h;i++) {
02842 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
02843 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
02844 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
02845 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
02846 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
02847 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
02848 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
02849 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
02850 s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
02851 s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
02852 s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
02853 s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
02854 s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
02855 s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
02856 s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
02857 s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
02858 pix1 += line_size;
02859 pix2 += line_size;
02860 pix3 += line_size;
02861 }
02862 return s;
02863 }
02864
02865 static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
02866 {
02867 int s, i;
02868
02869 s = 0;
02870 for(i=0;i<h;i++) {
02871 s += abs(pix1[0] - pix2[0]);
02872 s += abs(pix1[1] - pix2[1]);
02873 s += abs(pix1[2] - pix2[2]);
02874 s += abs(pix1[3] - pix2[3]);
02875 s += abs(pix1[4] - pix2[4]);
02876 s += abs(pix1[5] - pix2[5]);
02877 s += abs(pix1[6] - pix2[6]);
02878 s += abs(pix1[7] - pix2[7]);
02879 pix1 += line_size;
02880 pix2 += line_size;
02881 }
02882 return s;
02883 }
02884
02885 static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
02886 {
02887 int s, i;
02888
02889 s = 0;
02890 for(i=0;i<h;i++) {
02891 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
02892 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
02893 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
02894 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
02895 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
02896 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
02897 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
02898 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
02899 pix1 += line_size;
02900 pix2 += line_size;
02901 }
02902 return s;
02903 }
02904
02905 static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
02906 {
02907 int s, i;
02908 uint8_t *pix3 = pix2 + line_size;
02909
02910 s = 0;
02911 for(i=0;i<h;i++) {
02912 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
02913 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
02914 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
02915 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
02916 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
02917 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
02918 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
02919 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
02920 pix1 += line_size;
02921 pix2 += line_size;
02922 pix3 += line_size;
02923 }
02924 return s;
02925 }
02926
02927 static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
02928 {
02929 int s, i;
02930 uint8_t *pix3 = pix2 + line_size;
02931
02932 s = 0;
02933 for(i=0;i<h;i++) {
02934 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
02935 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
02936 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
02937 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
02938 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
02939 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
02940 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
02941 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
02942 pix1 += line_size;
02943 pix2 += line_size;
02944 pix3 += line_size;
02945 }
02946 return s;
02947 }
02948
02949 static int nsse16_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
02950 MpegEncContext *c = v;
02951 int score1=0;
02952 int score2=0;
02953 int x,y;
02954
02955 for(y=0; y<h; y++){
02956 for(x=0; x<16; x++){
02957 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
02958 }
02959 if(y+1<h){
02960 for(x=0; x<15; x++){
02961 score2+= FFABS( s1[x ] - s1[x +stride]
02962 - s1[x+1] + s1[x+1+stride])
02963 -FFABS( s2[x ] - s2[x +stride]
02964 - s2[x+1] + s2[x+1+stride]);
02965 }
02966 }
02967 s1+= stride;
02968 s2+= stride;
02969 }
02970
02971 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
02972 else return score1 + FFABS(score2)*8;
02973 }
02974
02975 static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
02976 MpegEncContext *c = v;
02977 int score1=0;
02978 int score2=0;
02979 int x,y;
02980
02981 for(y=0; y<h; y++){
02982 for(x=0; x<8; x++){
02983 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
02984 }
02985 if(y+1<h){
02986 for(x=0; x<7; x++){
02987 score2+= FFABS( s1[x ] - s1[x +stride]
02988 - s1[x+1] + s1[x+1+stride])
02989 -FFABS( s2[x ] - s2[x +stride]
02990 - s2[x+1] + s2[x+1+stride]);
02991 }
02992 }
02993 s1+= stride;
02994 s2+= stride;
02995 }
02996
02997 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
02998 else return score1 + FFABS(score2)*8;
02999 }
03000
03001 static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){
03002 int i;
03003 unsigned int sum=0;
03004
03005 for(i=0; i<8*8; i++){
03006 int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT));
03007 int w= weight[i];
03008 b>>= RECON_SHIFT;
03009 assert(-512<b && b<512);
03010
03011 sum += (w*b)*(w*b)>>4;
03012 }
03013 return sum>>2;
03014 }
03015
03016 static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){
03017 int i;
03018
03019 for(i=0; i<8*8; i++){
03020 rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
03021 }
03022 }
03023
03032 void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last)
03033 {
03034 int i;
03035 DCTELEM temp[64];
03036
03037 if(last<=0) return;
03038
03039
03040 for(i=0; i<=last; i++){
03041 const int j= scantable[i];
03042 temp[j]= block[j];
03043 block[j]=0;
03044 }
03045
03046 for(i=0; i<=last; i++){
03047 const int j= scantable[i];
03048 const int perm_j= permutation[j];
03049 block[perm_j]= temp[j];
03050 }
03051 }
03052
03053 static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
03054 return 0;
03055 }
03056
03057 void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
03058 int i;
03059
03060 memset(cmp, 0, sizeof(void*)*6);
03061
03062 for(i=0; i<6; i++){
03063 switch(type&0xFF){
03064 case FF_CMP_SAD:
03065 cmp[i]= c->sad[i];
03066 break;
03067 case FF_CMP_SATD:
03068 cmp[i]= c->hadamard8_diff[i];
03069 break;
03070 case FF_CMP_SSE:
03071 cmp[i]= c->sse[i];
03072 break;
03073 case FF_CMP_DCT:
03074 cmp[i]= c->dct_sad[i];
03075 break;
03076 case FF_CMP_DCT264:
03077 cmp[i]= c->dct264_sad[i];
03078 break;
03079 case FF_CMP_DCTMAX:
03080 cmp[i]= c->dct_max[i];
03081 break;
03082 case FF_CMP_PSNR:
03083 cmp[i]= c->quant_psnr[i];
03084 break;
03085 case FF_CMP_BIT:
03086 cmp[i]= c->bit[i];
03087 break;
03088 case FF_CMP_RD:
03089 cmp[i]= c->rd[i];
03090 break;
03091 case FF_CMP_VSAD:
03092 cmp[i]= c->vsad[i];
03093 break;
03094 case FF_CMP_VSSE:
03095 cmp[i]= c->vsse[i];
03096 break;
03097 case FF_CMP_ZERO:
03098 cmp[i]= zero_cmp;
03099 break;
03100 case FF_CMP_NSSE:
03101 cmp[i]= c->nsse[i];
03102 break;
03103 #if CONFIG_DWT
03104 case FF_CMP_W53:
03105 cmp[i]= c->w53[i];
03106 break;
03107 case FF_CMP_W97:
03108 cmp[i]= c->w97[i];
03109 break;
03110 #endif
03111 default:
03112 av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n");
03113 }
03114 }
03115 }
03116
03117 static void clear_block_c(DCTELEM *block)
03118 {
03119 memset(block, 0, sizeof(DCTELEM)*64);
03120 }
03121
03125 static void clear_blocks_c(DCTELEM *blocks)
03126 {
03127 memset(blocks, 0, sizeof(DCTELEM)*6*64);
03128 }
03129
03130 static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
03131 long i;
03132 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
03133 long a = *(long*)(src+i);
03134 long b = *(long*)(dst+i);
03135 *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
03136 }
03137 for(; i<w; i++)
03138 dst[i+0] += src[i+0];
03139 }
03140
03141 static void add_bytes_l2_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
03142 long i;
03143 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
03144 long a = *(long*)(src1+i);
03145 long b = *(long*)(src2+i);
03146 *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
03147 }
03148 for(; i<w; i++)
03149 dst[i] = src1[i]+src2[i];
03150 }
03151
03152 static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
03153 long i;
03154 #if !HAVE_FAST_UNALIGNED
03155 if((long)src2 & (sizeof(long)-1)){
03156 for(i=0; i+7<w; i+=8){
03157 dst[i+0] = src1[i+0]-src2[i+0];
03158 dst[i+1] = src1[i+1]-src2[i+1];
03159 dst[i+2] = src1[i+2]-src2[i+2];
03160 dst[i+3] = src1[i+3]-src2[i+3];
03161 dst[i+4] = src1[i+4]-src2[i+4];
03162 dst[i+5] = src1[i+5]-src2[i+5];
03163 dst[i+6] = src1[i+6]-src2[i+6];
03164 dst[i+7] = src1[i+7]-src2[i+7];
03165 }
03166 }else
03167 #endif
03168 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
03169 long a = *(long*)(src1+i);
03170 long b = *(long*)(src2+i);
03171 *(long*)(dst+i) = ((a|pb_80) - (b&pb_7f)) ^ ((a^b^pb_80)&pb_80);
03172 }
03173 for(; i<w; i++)
03174 dst[i+0] = src1[i+0]-src2[i+0];
03175 }
03176
03177 static void add_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *diff, int w, int *left, int *left_top){
03178 int i;
03179 uint8_t l, lt;
03180
03181 l= *left;
03182 lt= *left_top;
03183
03184 for(i=0; i<w; i++){
03185 l= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF) + diff[i];
03186 lt= src1[i];
03187 dst[i]= l;
03188 }
03189
03190 *left= l;
03191 *left_top= lt;
03192 }
03193
03194 static void sub_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top){
03195 int i;
03196 uint8_t l, lt;
03197
03198 l= *left;
03199 lt= *left_top;
03200
03201 for(i=0; i<w; i++){
03202 const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF);
03203 lt= src1[i];
03204 l= src2[i];
03205 dst[i]= l - pred;
03206 }
03207
03208 *left= l;
03209 *left_top= lt;
03210 }
03211
03212 static int add_hfyu_left_prediction_c(uint8_t *dst, const uint8_t *src, int w, int acc){
03213 int i;
03214
03215 for(i=0; i<w-1; i++){
03216 acc+= src[i];
03217 dst[i]= acc;
03218 i++;
03219 acc+= src[i];
03220 dst[i]= acc;
03221 }
03222
03223 for(; i<w; i++){
03224 acc+= src[i];
03225 dst[i]= acc;
03226 }
03227
03228 return acc;
03229 }
03230
03231 #if HAVE_BIGENDIAN
03232 #define B 3
03233 #define G 2
03234 #define R 1
03235 #define A 0
03236 #else
03237 #define B 0
03238 #define G 1
03239 #define R 2
03240 #define A 3
03241 #endif
03242 static void add_hfyu_left_prediction_bgr32_c(uint8_t *dst, const uint8_t *src, int w, int *red, int *green, int *blue, int *alpha){
03243 int i;
03244 int r,g,b,a;
03245 r= *red;
03246 g= *green;
03247 b= *blue;
03248 a= *alpha;
03249
03250 for(i=0; i<w; i++){
03251 b+= src[4*i+B];
03252 g+= src[4*i+G];
03253 r+= src[4*i+R];
03254 a+= src[4*i+A];
03255
03256 dst[4*i+B]= b;
03257 dst[4*i+G]= g;
03258 dst[4*i+R]= r;
03259 dst[4*i+A]= a;
03260 }
03261
03262 *red= r;
03263 *green= g;
03264 *blue= b;
03265 *alpha= a;
03266 }
03267 #undef B
03268 #undef G
03269 #undef R
03270 #undef A
03271
03272 #define BUTTERFLY2(o1,o2,i1,i2) \
03273 o1= (i1)+(i2);\
03274 o2= (i1)-(i2);
03275
03276 #define BUTTERFLY1(x,y) \
03277 {\
03278 int a,b;\
03279 a= x;\
03280 b= y;\
03281 x= a+b;\
03282 y= a-b;\
03283 }
03284
03285 #define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y)))
03286
03287 static int hadamard8_diff8x8_c( void *s, uint8_t *dst, uint8_t *src, int stride, int h){
03288 int i;
03289 int temp[64];
03290 int sum=0;
03291
03292 assert(h==8);
03293
03294 for(i=0; i<8; i++){
03295
03296 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
03297 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
03298 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
03299 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
03300
03301 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
03302 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
03303 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
03304 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
03305
03306 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
03307 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
03308 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
03309 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
03310 }
03311
03312 for(i=0; i<8; i++){
03313 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
03314 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
03315 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
03316 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
03317
03318 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
03319 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
03320 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
03321 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
03322
03323 sum +=
03324 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
03325 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
03326 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
03327 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
03328 }
03329 #if 0
03330 static int maxi=0;
03331 if(sum>maxi){
03332 maxi=sum;
03333 printf("MAX:%d\n", maxi);
03334 }
03335 #endif
03336 return sum;
03337 }
03338
03339 static int hadamard8_intra8x8_c( void *s, uint8_t *src, uint8_t *dummy, int stride, int h){
03340 int i;
03341 int temp[64];
03342 int sum=0;
03343
03344 assert(h==8);
03345
03346 for(i=0; i<8; i++){
03347
03348 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]);
03349 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]);
03350 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]);
03351 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]);
03352
03353 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
03354 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
03355 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
03356 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
03357
03358 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
03359 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
03360 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
03361 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
03362 }
03363
03364 for(i=0; i<8; i++){
03365 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
03366 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
03367 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
03368 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
03369
03370 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
03371 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
03372 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
03373 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
03374
03375 sum +=
03376 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
03377 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
03378 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
03379 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
03380 }
03381
03382 sum -= FFABS(temp[8*0] + temp[8*4]);
03383
03384 return sum;
03385 }
03386
03387 static int dct_sad8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
03388 MpegEncContext * const s= (MpegEncContext *)c;
03389 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
03390
03391 assert(h==8);
03392
03393 s->dsp.diff_pixels(temp, src1, src2, stride);
03394 s->dsp.fdct(temp);
03395 return s->dsp.sum_abs_dctelem(temp);
03396 }
03397
03398 #if CONFIG_GPL
03399 #define DCT8_1D {\
03400 const int s07 = SRC(0) + SRC(7);\
03401 const int s16 = SRC(1) + SRC(6);\
03402 const int s25 = SRC(2) + SRC(5);\
03403 const int s34 = SRC(3) + SRC(4);\
03404 const int a0 = s07 + s34;\
03405 const int a1 = s16 + s25;\
03406 const int a2 = s07 - s34;\
03407 const int a3 = s16 - s25;\
03408 const int d07 = SRC(0) - SRC(7);\
03409 const int d16 = SRC(1) - SRC(6);\
03410 const int d25 = SRC(2) - SRC(5);\
03411 const int d34 = SRC(3) - SRC(4);\
03412 const int a4 = d16 + d25 + (d07 + (d07>>1));\
03413 const int a5 = d07 - d34 - (d25 + (d25>>1));\
03414 const int a6 = d07 + d34 - (d16 + (d16>>1));\
03415 const int a7 = d16 - d25 + (d34 + (d34>>1));\
03416 DST(0, a0 + a1 ) ;\
03417 DST(1, a4 + (a7>>2)) ;\
03418 DST(2, a2 + (a3>>1)) ;\
03419 DST(3, a5 + (a6>>2)) ;\
03420 DST(4, a0 - a1 ) ;\
03421 DST(5, a6 - (a5>>2)) ;\
03422 DST(6, (a2>>1) - a3 ) ;\
03423 DST(7, (a4>>2) - a7 ) ;\
03424 }
03425
03426 static int dct264_sad8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
03427 MpegEncContext * const s= (MpegEncContext *)c;
03428 DCTELEM dct[8][8];
03429 int i;
03430 int sum=0;
03431
03432 s->dsp.diff_pixels(dct[0], src1, src2, stride);
03433
03434 #define SRC(x) dct[i][x]
03435 #define DST(x,v) dct[i][x]= v
03436 for( i = 0; i < 8; i++ )
03437 DCT8_1D
03438 #undef SRC
03439 #undef DST
03440
03441 #define SRC(x) dct[x][i]
03442 #define DST(x,v) sum += FFABS(v)
03443 for( i = 0; i < 8; i++ )
03444 DCT8_1D
03445 #undef SRC
03446 #undef DST
03447 return sum;
03448 }
03449 #endif
03450
03451 static int dct_max8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
03452 MpegEncContext * const s= (MpegEncContext *)c;
03453 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
03454 int sum=0, i;
03455
03456 assert(h==8);
03457
03458 s->dsp.diff_pixels(temp, src1, src2, stride);
03459 s->dsp.fdct(temp);
03460
03461 for(i=0; i<64; i++)
03462 sum= FFMAX(sum, FFABS(temp[i]));
03463
03464 return sum;
03465 }
03466
03467 static int quant_psnr8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
03468 MpegEncContext * const s= (MpegEncContext *)c;
03469 LOCAL_ALIGNED_16(DCTELEM, temp, [64*2]);
03470 DCTELEM * const bak = temp+64;
03471 int sum=0, i;
03472
03473 assert(h==8);
03474 s->mb_intra=0;
03475
03476 s->dsp.diff_pixels(temp, src1, src2, stride);
03477
03478 memcpy(bak, temp, 64*sizeof(DCTELEM));
03479
03480 s->block_last_index[0]= s->fast_dct_quantize(s, temp, 0, s->qscale, &i);
03481 s->dct_unquantize_inter(s, temp, 0, s->qscale);
03482 ff_simple_idct(temp);
03483
03484 for(i=0; i<64; i++)
03485 sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
03486
03487 return sum;
03488 }
03489
03490 static int rd8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
03491 MpegEncContext * const s= (MpegEncContext *)c;
03492 const uint8_t *scantable= s->intra_scantable.permutated;
03493 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
03494 LOCAL_ALIGNED_16(uint8_t, lsrc1, [64]);
03495 LOCAL_ALIGNED_16(uint8_t, lsrc2, [64]);
03496 int i, last, run, bits, level, distortion, start_i;
03497 const int esc_length= s->ac_esc_length;
03498 uint8_t * length;
03499 uint8_t * last_length;
03500
03501 assert(h==8);
03502
03503 copy_block8(lsrc1, src1, 8, stride, 8);
03504 copy_block8(lsrc2, src2, 8, stride, 8);
03505
03506 s->dsp.diff_pixels(temp, lsrc1, lsrc2, 8);
03507
03508 s->block_last_index[0]= last= s->fast_dct_quantize(s, temp, 0, s->qscale, &i);
03509
03510 bits=0;
03511
03512 if (s->mb_intra) {
03513 start_i = 1;
03514 length = s->intra_ac_vlc_length;
03515 last_length= s->intra_ac_vlc_last_length;
03516 bits+= s->luma_dc_vlc_length[temp[0] + 256];
03517 } else {
03518 start_i = 0;
03519 length = s->inter_ac_vlc_length;
03520 last_length= s->inter_ac_vlc_last_length;
03521 }
03522
03523 if(last>=start_i){
03524 run=0;
03525 for(i=start_i; i<last; i++){
03526 int j= scantable[i];
03527 level= temp[j];
03528
03529 if(level){
03530 level+=64;
03531 if((level&(~127)) == 0){
03532 bits+= length[UNI_AC_ENC_INDEX(run, level)];
03533 }else
03534 bits+= esc_length;
03535 run=0;
03536 }else
03537 run++;
03538 }
03539 i= scantable[last];
03540
03541 level= temp[i] + 64;
03542
03543 assert(level - 64);
03544
03545 if((level&(~127)) == 0){
03546 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
03547 }else
03548 bits+= esc_length;
03549
03550 }
03551
03552 if(last>=0){
03553 if(s->mb_intra)
03554 s->dct_unquantize_intra(s, temp, 0, s->qscale);
03555 else
03556 s->dct_unquantize_inter(s, temp, 0, s->qscale);
03557 }
03558
03559 s->dsp.idct_add(lsrc2, 8, temp);
03560
03561 distortion= s->dsp.sse[1](NULL, lsrc2, lsrc1, 8, 8);
03562
03563 return distortion + ((bits*s->qscale*s->qscale*109 + 64)>>7);
03564 }
03565
03566 static int bit8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
03567 MpegEncContext * const s= (MpegEncContext *)c;
03568 const uint8_t *scantable= s->intra_scantable.permutated;
03569 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
03570 int i, last, run, bits, level, start_i;
03571 const int esc_length= s->ac_esc_length;
03572 uint8_t * length;
03573 uint8_t * last_length;
03574
03575 assert(h==8);
03576
03577 s->dsp.diff_pixels(temp, src1, src2, stride);
03578
03579 s->block_last_index[0]= last= s->fast_dct_quantize(s, temp, 0, s->qscale, &i);
03580
03581 bits=0;
03582
03583 if (s->mb_intra) {
03584 start_i = 1;
03585 length = s->intra_ac_vlc_length;
03586 last_length= s->intra_ac_vlc_last_length;
03587 bits+= s->luma_dc_vlc_length[temp[0] + 256];
03588 } else {
03589 start_i = 0;
03590 length = s->inter_ac_vlc_length;
03591 last_length= s->inter_ac_vlc_last_length;
03592 }
03593
03594 if(last>=start_i){
03595 run=0;
03596 for(i=start_i; i<last; i++){
03597 int j= scantable[i];
03598 level= temp[j];
03599
03600 if(level){
03601 level+=64;
03602 if((level&(~127)) == 0){
03603 bits+= length[UNI_AC_ENC_INDEX(run, level)];
03604 }else
03605 bits+= esc_length;
03606 run=0;
03607 }else
03608 run++;
03609 }
03610 i= scantable[last];
03611
03612 level= temp[i] + 64;
03613
03614 assert(level - 64);
03615
03616 if((level&(~127)) == 0){
03617 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
03618 }else
03619 bits+= esc_length;
03620 }
03621
03622 return bits;
03623 }
03624
03625 #define VSAD_INTRA(size) \
03626 static int vsad_intra##size##_c( void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
03627 int score=0; \
03628 int x,y; \
03629 \
03630 for(y=1; y<h; y++){ \
03631 for(x=0; x<size; x+=4){ \
03632 score+= FFABS(s[x ] - s[x +stride]) + FFABS(s[x+1] - s[x+1+stride]) \
03633 +FFABS(s[x+2] - s[x+2+stride]) + FFABS(s[x+3] - s[x+3+stride]); \
03634 } \
03635 s+= stride; \
03636 } \
03637 \
03638 return score; \
03639 }
03640 VSAD_INTRA(8)
03641 VSAD_INTRA(16)
03642
03643 static int vsad16_c( void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
03644 int score=0;
03645 int x,y;
03646
03647 for(y=1; y<h; y++){
03648 for(x=0; x<16; x++){
03649 score+= FFABS(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
03650 }
03651 s1+= stride;
03652 s2+= stride;
03653 }
03654
03655 return score;
03656 }
03657
03658 #define SQ(a) ((a)*(a))
03659 #define VSSE_INTRA(size) \
03660 static int vsse_intra##size##_c( void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
03661 int score=0; \
03662 int x,y; \
03663 \
03664 for(y=1; y<h; y++){ \
03665 for(x=0; x<size; x+=4){ \
03666 score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride]) \
03667 +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]); \
03668 } \
03669 s+= stride; \
03670 } \
03671 \
03672 return score; \
03673 }
03674 VSSE_INTRA(8)
03675 VSSE_INTRA(16)
03676
03677 static int vsse16_c( void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
03678 int score=0;
03679 int x,y;
03680
03681 for(y=1; y<h; y++){
03682 for(x=0; x<16; x++){
03683 score+= SQ(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
03684 }
03685 s1+= stride;
03686 s2+= stride;
03687 }
03688
03689 return score;
03690 }
03691
03692 static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2,
03693 int size){
03694 int score=0;
03695 int i;
03696 for(i=0; i<size; i++)
03697 score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]);
03698 return score;
03699 }
03700
03701 WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
03702 WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
03703 WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
03704 #if CONFIG_GPL
03705 WRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c)
03706 #endif
03707 WRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c)
03708 WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
03709 WRAPPER8_16_SQ(rd8x8_c, rd16_c)
03710 WRAPPER8_16_SQ(bit8x8_c, bit16_c)
03711
03712 static void vector_fmul_c(float *dst, const float *src0, const float *src1, int len){
03713 int i;
03714 for(i=0; i<len; i++)
03715 dst[i] = src0[i] * src1[i];
03716 }
03717
03718 static void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1, int len){
03719 int i;
03720 src1 += len-1;
03721 for(i=0; i<len; i++)
03722 dst[i] = src0[i] * src1[-i];
03723 }
03724
03725 static void vector_fmul_add_c(float *dst, const float *src0, const float *src1, const float *src2, int len){
03726 int i;
03727 for(i=0; i<len; i++)
03728 dst[i] = src0[i] * src1[i] + src2[i];
03729 }
03730
03731 static void vector_fmul_window_c(float *dst, const float *src0,
03732 const float *src1, const float *win, int len)
03733 {
03734 int i,j;
03735 dst += len;
03736 win += len;
03737 src0+= len;
03738 for(i=-len, j=len-1; i<0; i++, j--) {
03739 float s0 = src0[i];
03740 float s1 = src1[j];
03741 float wi = win[i];
03742 float wj = win[j];
03743 dst[i] = s0*wj - s1*wi;
03744 dst[j] = s0*wi + s1*wj;
03745 }
03746 }
03747
03748 static void vector_fmul_scalar_c(float *dst, const float *src, float mul,
03749 int len)
03750 {
03751 int i;
03752 for (i = 0; i < len; i++)
03753 dst[i] = src[i] * mul;
03754 }
03755
03756 static void vector_fmul_sv_scalar_2_c(float *dst, const float *src,
03757 const float **sv, float mul, int len)
03758 {
03759 int i;
03760 for (i = 0; i < len; i += 2, sv++) {
03761 dst[i ] = src[i ] * sv[0][0] * mul;
03762 dst[i+1] = src[i+1] * sv[0][1] * mul;
03763 }
03764 }
03765
03766 static void vector_fmul_sv_scalar_4_c(float *dst, const float *src,
03767 const float **sv, float mul, int len)
03768 {
03769 int i;
03770 for (i = 0; i < len; i += 4, sv++) {
03771 dst[i ] = src[i ] * sv[0][0] * mul;
03772 dst[i+1] = src[i+1] * sv[0][1] * mul;
03773 dst[i+2] = src[i+2] * sv[0][2] * mul;
03774 dst[i+3] = src[i+3] * sv[0][3] * mul;
03775 }
03776 }
03777
03778 static void sv_fmul_scalar_2_c(float *dst, const float **sv, float mul,
03779 int len)
03780 {
03781 int i;
03782 for (i = 0; i < len; i += 2, sv++) {
03783 dst[i ] = sv[0][0] * mul;
03784 dst[i+1] = sv[0][1] * mul;
03785 }
03786 }
03787
03788 static void sv_fmul_scalar_4_c(float *dst, const float **sv, float mul,
03789 int len)
03790 {
03791 int i;
03792 for (i = 0; i < len; i += 4, sv++) {
03793 dst[i ] = sv[0][0] * mul;
03794 dst[i+1] = sv[0][1] * mul;
03795 dst[i+2] = sv[0][2] * mul;
03796 dst[i+3] = sv[0][3] * mul;
03797 }
03798 }
03799
03800 static void butterflies_float_c(float *restrict v1, float *restrict v2,
03801 int len)
03802 {
03803 int i;
03804 for (i = 0; i < len; i++) {
03805 float t = v1[i] - v2[i];
03806 v1[i] += v2[i];
03807 v2[i] = t;
03808 }
03809 }
03810
03811 static float scalarproduct_float_c(const float *v1, const float *v2, int len)
03812 {
03813 float p = 0.0;
03814 int i;
03815
03816 for (i = 0; i < len; i++)
03817 p += v1[i] * v2[i];
03818
03819 return p;
03820 }
03821
03822 static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini,
03823 uint32_t maxi, uint32_t maxisign)
03824 {
03825
03826 if(a > mini) return mini;
03827 else if((a^(1<<31)) > maxisign) return maxi;
03828 else return a;
03829 }
03830
03831 static void vector_clipf_c_opposite_sign(float *dst, const float *src, float *min, float *max, int len){
03832 int i;
03833 uint32_t mini = *(uint32_t*)min;
03834 uint32_t maxi = *(uint32_t*)max;
03835 uint32_t maxisign = maxi ^ (1<<31);
03836 uint32_t *dsti = (uint32_t*)dst;
03837 const uint32_t *srci = (const uint32_t*)src;
03838 for(i=0; i<len; i+=8) {
03839 dsti[i + 0] = clipf_c_one(srci[i + 0], mini, maxi, maxisign);
03840 dsti[i + 1] = clipf_c_one(srci[i + 1], mini, maxi, maxisign);
03841 dsti[i + 2] = clipf_c_one(srci[i + 2], mini, maxi, maxisign);
03842 dsti[i + 3] = clipf_c_one(srci[i + 3], mini, maxi, maxisign);
03843 dsti[i + 4] = clipf_c_one(srci[i + 4], mini, maxi, maxisign);
03844 dsti[i + 5] = clipf_c_one(srci[i + 5], mini, maxi, maxisign);
03845 dsti[i + 6] = clipf_c_one(srci[i + 6], mini, maxi, maxisign);
03846 dsti[i + 7] = clipf_c_one(srci[i + 7], mini, maxi, maxisign);
03847 }
03848 }
03849 static void vector_clipf_c(float *dst, const float *src, float min, float max, int len){
03850 int i;
03851 if(min < 0 && max > 0) {
03852 vector_clipf_c_opposite_sign(dst, src, &min, &max, len);
03853 } else {
03854 for(i=0; i < len; i+=8) {
03855 dst[i ] = av_clipf(src[i ], min, max);
03856 dst[i + 1] = av_clipf(src[i + 1], min, max);
03857 dst[i + 2] = av_clipf(src[i + 2], min, max);
03858 dst[i + 3] = av_clipf(src[i + 3], min, max);
03859 dst[i + 4] = av_clipf(src[i + 4], min, max);
03860 dst[i + 5] = av_clipf(src[i + 5], min, max);
03861 dst[i + 6] = av_clipf(src[i + 6], min, max);
03862 dst[i + 7] = av_clipf(src[i + 7], min, max);
03863 }
03864 }
03865 }
03866
03867 static int32_t scalarproduct_int16_c(const int16_t * v1, const int16_t * v2, int order, int shift)
03868 {
03869 int res = 0;
03870
03871 while (order--)
03872 res += (*v1++ * *v2++) >> shift;
03873
03874 return res;
03875 }
03876
03877 static int32_t scalarproduct_and_madd_int16_c(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul)
03878 {
03879 int res = 0;
03880 while (order--) {
03881 res += *v1 * *v2++;
03882 *v1++ += mul * *v3++;
03883 }
03884 return res;
03885 }
03886
03887 #define W0 2048
03888 #define W1 2841
03889 #define W2 2676
03890 #define W3 2408
03891 #define W4 2048
03892 #define W5 1609
03893 #define W6 1108
03894 #define W7 565
03895
03896 static void wmv2_idct_row(short * b)
03897 {
03898 int s1,s2;
03899 int a0,a1,a2,a3,a4,a5,a6,a7;
03900
03901 a1 = W1*b[1]+W7*b[7];
03902 a7 = W7*b[1]-W1*b[7];
03903 a5 = W5*b[5]+W3*b[3];
03904 a3 = W3*b[5]-W5*b[3];
03905 a2 = W2*b[2]+W6*b[6];
03906 a6 = W6*b[2]-W2*b[6];
03907 a0 = W0*b[0]+W0*b[4];
03908 a4 = W0*b[0]-W0*b[4];
03909
03910 s1 = (181*(a1-a5+a7-a3)+128)>>8;
03911 s2 = (181*(a1-a5-a7+a3)+128)>>8;
03912
03913 b[0] = (a0+a2+a1+a5 + (1<<7))>>8;
03914 b[1] = (a4+a6 +s1 + (1<<7))>>8;
03915 b[2] = (a4-a6 +s2 + (1<<7))>>8;
03916 b[3] = (a0-a2+a7+a3 + (1<<7))>>8;
03917 b[4] = (a0-a2-a7-a3 + (1<<7))>>8;
03918 b[5] = (a4-a6 -s2 + (1<<7))>>8;
03919 b[6] = (a4+a6 -s1 + (1<<7))>>8;
03920 b[7] = (a0+a2-a1-a5 + (1<<7))>>8;
03921 }
03922 static void wmv2_idct_col(short * b)
03923 {
03924 int s1,s2;
03925 int a0,a1,a2,a3,a4,a5,a6,a7;
03926
03927 a1 = (W1*b[8*1]+W7*b[8*7] + 4)>>3;
03928 a7 = (W7*b[8*1]-W1*b[8*7] + 4)>>3;
03929 a5 = (W5*b[8*5]+W3*b[8*3] + 4)>>3;
03930 a3 = (W3*b[8*5]-W5*b[8*3] + 4)>>3;
03931 a2 = (W2*b[8*2]+W6*b[8*6] + 4)>>3;
03932 a6 = (W6*b[8*2]-W2*b[8*6] + 4)>>3;
03933 a0 = (W0*b[8*0]+W0*b[8*4] )>>3;
03934 a4 = (W0*b[8*0]-W0*b[8*4] )>>3;
03935
03936 s1 = (181*(a1-a5+a7-a3)+128)>>8;
03937 s2 = (181*(a1-a5-a7+a3)+128)>>8;
03938
03939 b[8*0] = (a0+a2+a1+a5 + (1<<13))>>14;
03940 b[8*1] = (a4+a6 +s1 + (1<<13))>>14;
03941 b[8*2] = (a4-a6 +s2 + (1<<13))>>14;
03942 b[8*3] = (a0-a2+a7+a3 + (1<<13))>>14;
03943
03944 b[8*4] = (a0-a2-a7-a3 + (1<<13))>>14;
03945 b[8*5] = (a4-a6 -s2 + (1<<13))>>14;
03946 b[8*6] = (a4+a6 -s1 + (1<<13))>>14;
03947 b[8*7] = (a0+a2-a1-a5 + (1<<13))>>14;
03948 }
03949 void ff_wmv2_idct_c(short * block){
03950 int i;
03951
03952 for(i=0;i<64;i+=8){
03953 wmv2_idct_row(block+i);
03954 }
03955 for(i=0;i<8;i++){
03956 wmv2_idct_col(block+i);
03957 }
03958 }
03959
03960
03961 static void ff_wmv2_idct_put_c(uint8_t *dest, int line_size, DCTELEM *block)
03962 {
03963 ff_wmv2_idct_c(block);
03964 ff_put_pixels_clamped_c(block, dest, line_size);
03965 }
03966 static void ff_wmv2_idct_add_c(uint8_t *dest, int line_size, DCTELEM *block)
03967 {
03968 ff_wmv2_idct_c(block);
03969 ff_add_pixels_clamped_c(block, dest, line_size);
03970 }
03971 static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
03972 {
03973 j_rev_dct (block);
03974 ff_put_pixels_clamped_c(block, dest, line_size);
03975 }
03976 static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
03977 {
03978 j_rev_dct (block);
03979 ff_add_pixels_clamped_c(block, dest, line_size);
03980 }
03981
03982 static void ff_jref_idct4_put(uint8_t *dest, int line_size, DCTELEM *block)
03983 {
03984 j_rev_dct4 (block);
03985 put_pixels_clamped4_c(block, dest, line_size);
03986 }
03987 static void ff_jref_idct4_add(uint8_t *dest, int line_size, DCTELEM *block)
03988 {
03989 j_rev_dct4 (block);
03990 add_pixels_clamped4_c(block, dest, line_size);
03991 }
03992
03993 static void ff_jref_idct2_put(uint8_t *dest, int line_size, DCTELEM *block)
03994 {
03995 j_rev_dct2 (block);
03996 put_pixels_clamped2_c(block, dest, line_size);
03997 }
03998 static void ff_jref_idct2_add(uint8_t *dest, int line_size, DCTELEM *block)
03999 {
04000 j_rev_dct2 (block);
04001 add_pixels_clamped2_c(block, dest, line_size);
04002 }
04003
04004 static void ff_jref_idct1_put(uint8_t *dest, int line_size, DCTELEM *block)
04005 {
04006 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
04007
04008 dest[0] = cm[(block[0] + 4)>>3];
04009 }
04010 static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block)
04011 {
04012 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
04013
04014 dest[0] = cm[dest[0] + ((block[0] + 4)>>3)];
04015 }
04016
04017 static void just_return(void *mem av_unused, int stride av_unused, int h av_unused) { return; }
04018
04019
04020 av_cold void dsputil_static_init(void)
04021 {
04022 int i;
04023
04024 for(i=0;i<256;i++) ff_cropTbl[i + MAX_NEG_CROP] = i;
04025 for(i=0;i<MAX_NEG_CROP;i++) {
04026 ff_cropTbl[i] = 0;
04027 ff_cropTbl[i + MAX_NEG_CROP + 256] = 255;
04028 }
04029
04030 for(i=0;i<512;i++) {
04031 ff_squareTbl[i] = (i - 256) * (i - 256);
04032 }
04033
04034 for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
04035 }
04036
04037 int ff_check_alignment(void){
04038 static int did_fail=0;
04039 DECLARE_ALIGNED(16, int, aligned);
04040
04041 if((intptr_t)&aligned & 15){
04042 if(!did_fail){
04043 #if HAVE_MMX || HAVE_ALTIVEC
04044 av_log(NULL, AV_LOG_ERROR,
04045 "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
04046 "and may be very slow or crash. This is not a bug in libavcodec,\n"
04047 "but in the compiler. You may try recompiling using gcc >= 4.2.\n"
04048 "Do not report crashes to FFmpeg developers.\n");
04049 #endif
04050 did_fail=1;
04051 }
04052 return -1;
04053 }
04054 return 0;
04055 }
04056
04057 av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
04058 {
04059 int i;
04060
04061 ff_check_alignment();
04062
04063 #if CONFIG_ENCODERS
04064 if(avctx->dct_algo==FF_DCT_FASTINT) {
04065 c->fdct = fdct_ifast;
04066 c->fdct248 = fdct_ifast248;
04067 }
04068 else if(avctx->dct_algo==FF_DCT_FAAN) {
04069 c->fdct = ff_faandct;
04070 c->fdct248 = ff_faandct248;
04071 }
04072 else {
04073 c->fdct = ff_jpeg_fdct_islow;
04074 c->fdct248 = ff_fdct248_islow;
04075 }
04076 #endif //CONFIG_ENCODERS
04077
04078 if(avctx->lowres==1){
04079 if(avctx->idct_algo==FF_IDCT_INT || avctx->idct_algo==FF_IDCT_AUTO || !CONFIG_H264_DECODER){
04080 c->idct_put= ff_jref_idct4_put;
04081 c->idct_add= ff_jref_idct4_add;
04082 }else{
04083 c->idct_put= ff_h264_lowres_idct_put_c;
04084 c->idct_add= ff_h264_lowres_idct_add_c;
04085 }
04086 c->idct = j_rev_dct4;
04087 c->idct_permutation_type= FF_NO_IDCT_PERM;
04088 }else if(avctx->lowres==2){
04089 c->idct_put= ff_jref_idct2_put;
04090 c->idct_add= ff_jref_idct2_add;
04091 c->idct = j_rev_dct2;
04092 c->idct_permutation_type= FF_NO_IDCT_PERM;
04093 }else if(avctx->lowres==3){
04094 c->idct_put= ff_jref_idct1_put;
04095 c->idct_add= ff_jref_idct1_add;
04096 c->idct = j_rev_dct1;
04097 c->idct_permutation_type= FF_NO_IDCT_PERM;
04098 }else{
04099 if(avctx->idct_algo==FF_IDCT_INT){
04100 c->idct_put= ff_jref_idct_put;
04101 c->idct_add= ff_jref_idct_add;
04102 c->idct = j_rev_dct;
04103 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
04104 }else if((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER || CONFIG_VP6_DECODER ) &&
04105 avctx->idct_algo==FF_IDCT_VP3){
04106 c->idct_put= ff_vp3_idct_put_c;
04107 c->idct_add= ff_vp3_idct_add_c;
04108 c->idct = ff_vp3_idct_c;
04109 c->idct_permutation_type= FF_NO_IDCT_PERM;
04110 }else if(avctx->idct_algo==FF_IDCT_WMV2){
04111 c->idct_put= ff_wmv2_idct_put_c;
04112 c->idct_add= ff_wmv2_idct_add_c;
04113 c->idct = ff_wmv2_idct_c;
04114 c->idct_permutation_type= FF_NO_IDCT_PERM;
04115 }else if(avctx->idct_algo==FF_IDCT_FAAN){
04116 c->idct_put= ff_faanidct_put;
04117 c->idct_add= ff_faanidct_add;
04118 c->idct = ff_faanidct;
04119 c->idct_permutation_type= FF_NO_IDCT_PERM;
04120 }else if(CONFIG_EATGQ_DECODER && avctx->idct_algo==FF_IDCT_EA) {
04121 c->idct_put= ff_ea_idct_put_c;
04122 c->idct_permutation_type= FF_NO_IDCT_PERM;
04123 }else if(CONFIG_BINK_DECODER && avctx->idct_algo==FF_IDCT_BINK) {
04124 c->idct = ff_bink_idct_c;
04125 c->idct_add = ff_bink_idct_add_c;
04126 c->idct_put = ff_bink_idct_put_c;
04127 c->idct_permutation_type = FF_NO_IDCT_PERM;
04128 }else{
04129 c->idct_put= ff_simple_idct_put;
04130 c->idct_add= ff_simple_idct_add;
04131 c->idct = ff_simple_idct;
04132 c->idct_permutation_type= FF_NO_IDCT_PERM;
04133 }
04134 }
04135
04136 c->get_pixels = get_pixels_c;
04137 c->diff_pixels = diff_pixels_c;
04138 c->put_pixels_clamped = ff_put_pixels_clamped_c;
04139 c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_c;
04140 c->put_pixels_nonclamped = put_pixels_nonclamped_c;
04141 c->add_pixels_clamped = ff_add_pixels_clamped_c;
04142 c->add_pixels8 = add_pixels8_c;
04143 c->add_pixels4 = add_pixels4_c;
04144 c->sum_abs_dctelem = sum_abs_dctelem_c;
04145 c->emulated_edge_mc = ff_emulated_edge_mc;
04146 c->gmc1 = gmc1_c;
04147 c->gmc = ff_gmc_c;
04148 c->clear_block = clear_block_c;
04149 c->clear_blocks = clear_blocks_c;
04150 c->pix_sum = pix_sum_c;
04151 c->pix_norm1 = pix_norm1_c;
04152
04153 c->fill_block_tab[0] = fill_block16_c;
04154 c->fill_block_tab[1] = fill_block8_c;
04155 c->scale_block = scale_block_c;
04156
04157
04158 c->pix_abs[0][0] = pix_abs16_c;
04159 c->pix_abs[0][1] = pix_abs16_x2_c;
04160 c->pix_abs[0][2] = pix_abs16_y2_c;
04161 c->pix_abs[0][3] = pix_abs16_xy2_c;
04162 c->pix_abs[1][0] = pix_abs8_c;
04163 c->pix_abs[1][1] = pix_abs8_x2_c;
04164 c->pix_abs[1][2] = pix_abs8_y2_c;
04165 c->pix_abs[1][3] = pix_abs8_xy2_c;
04166
04167 #define dspfunc(PFX, IDX, NUM) \
04168 c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## NUM ## _c; \
04169 c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## NUM ## _x2_c; \
04170 c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## NUM ## _y2_c; \
04171 c->PFX ## _pixels_tab[IDX][3] = PFX ## _pixels ## NUM ## _xy2_c
04172
04173 dspfunc(put, 0, 16);
04174 dspfunc(put_no_rnd, 0, 16);
04175 dspfunc(put, 1, 8);
04176 dspfunc(put_no_rnd, 1, 8);
04177 dspfunc(put, 2, 4);
04178 dspfunc(put, 3, 2);
04179
04180 dspfunc(avg, 0, 16);
04181 dspfunc(avg_no_rnd, 0, 16);
04182 dspfunc(avg, 1, 8);
04183 dspfunc(avg_no_rnd, 1, 8);
04184 dspfunc(avg, 2, 4);
04185 dspfunc(avg, 3, 2);
04186 #undef dspfunc
04187
04188 c->put_no_rnd_pixels_l2[0]= put_no_rnd_pixels16_l2_c;
04189 c->put_no_rnd_pixels_l2[1]= put_no_rnd_pixels8_l2_c;
04190
04191 c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c;
04192 c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c;
04193 c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c;
04194 c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c;
04195 c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c;
04196 c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c;
04197 c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c;
04198 c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c;
04199 c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c;
04200
04201 c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c;
04202 c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c;
04203 c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c;
04204 c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c;
04205 c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c;
04206 c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c;
04207 c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c;
04208 c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c;
04209 c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c;
04210
04211 #define dspfunc(PFX, IDX, NUM) \
04212 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
04213 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
04214 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
04215 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
04216 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
04217 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
04218 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
04219 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
04220 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
04221 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
04222 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
04223 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
04224 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
04225 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
04226 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
04227 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
04228
04229 dspfunc(put_qpel, 0, 16);
04230 dspfunc(put_no_rnd_qpel, 0, 16);
04231
04232 dspfunc(avg_qpel, 0, 16);
04233
04234
04235 dspfunc(put_qpel, 1, 8);
04236 dspfunc(put_no_rnd_qpel, 1, 8);
04237
04238 dspfunc(avg_qpel, 1, 8);
04239
04240
04241 dspfunc(put_h264_qpel, 0, 16);
04242 dspfunc(put_h264_qpel, 1, 8);
04243 dspfunc(put_h264_qpel, 2, 4);
04244 dspfunc(put_h264_qpel, 3, 2);
04245 dspfunc(avg_h264_qpel, 0, 16);
04246 dspfunc(avg_h264_qpel, 1, 8);
04247 dspfunc(avg_h264_qpel, 2, 4);
04248
04249 #undef dspfunc
04250 c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_c;
04251 c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_c;
04252 c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_c;
04253 c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_c;
04254 c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_c;
04255 c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_c;
04256
04257 c->draw_edges = draw_edges_c;
04258
04259 #if CONFIG_MLP_DECODER || CONFIG_TRUEHD_DECODER
04260 ff_mlp_init(c, avctx);
04261 #endif
04262 #if CONFIG_WMV2_DECODER || CONFIG_VC1_DECODER
04263 ff_intrax8dsp_init(c,avctx);
04264 #endif
04265 #if CONFIG_RV30_DECODER
04266 ff_rv30dsp_init(c,avctx);
04267 #endif
04268 #if CONFIG_RV40_DECODER
04269 ff_rv40dsp_init(c,avctx);
04270 c->put_rv40_qpel_pixels_tab[0][15] = put_rv40_qpel16_mc33_c;
04271 c->avg_rv40_qpel_pixels_tab[0][15] = avg_rv40_qpel16_mc33_c;
04272 c->put_rv40_qpel_pixels_tab[1][15] = put_rv40_qpel8_mc33_c;
04273 c->avg_rv40_qpel_pixels_tab[1][15] = avg_rv40_qpel8_mc33_c;
04274 #endif
04275
04276 c->put_mspel_pixels_tab[0]= ff_put_pixels8x8_c;
04277 c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
04278 c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c;
04279 c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;
04280 c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c;
04281 c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;
04282 c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
04283 c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
04284
04285 #define SET_CMP_FUNC(name) \
04286 c->name[0]= name ## 16_c;\
04287 c->name[1]= name ## 8x8_c;
04288
04289 SET_CMP_FUNC(hadamard8_diff)
04290 c->hadamard8_diff[4]= hadamard8_intra16_c;
04291 c->hadamard8_diff[5]= hadamard8_intra8x8_c;
04292 SET_CMP_FUNC(dct_sad)
04293 SET_CMP_FUNC(dct_max)
04294 #if CONFIG_GPL
04295 SET_CMP_FUNC(dct264_sad)
04296 #endif
04297 c->sad[0]= pix_abs16_c;
04298 c->sad[1]= pix_abs8_c;
04299 c->sse[0]= sse16_c;
04300 c->sse[1]= sse8_c;
04301 c->sse[2]= sse4_c;
04302 SET_CMP_FUNC(quant_psnr)
04303 SET_CMP_FUNC(rd)
04304 SET_CMP_FUNC(bit)
04305 c->vsad[0]= vsad16_c;
04306 c->vsad[4]= vsad_intra16_c;
04307 c->vsad[5]= vsad_intra8_c;
04308 c->vsse[0]= vsse16_c;
04309 c->vsse[4]= vsse_intra16_c;
04310 c->vsse[5]= vsse_intra8_c;
04311 c->nsse[0]= nsse16_c;
04312 c->nsse[1]= nsse8_c;
04313 #if CONFIG_DWT
04314 ff_dsputil_init_dwt(c);
04315 #endif
04316
04317 c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c;
04318
04319 c->add_bytes= add_bytes_c;
04320 c->add_bytes_l2= add_bytes_l2_c;
04321 c->diff_bytes= diff_bytes_c;
04322 c->add_hfyu_median_prediction= add_hfyu_median_prediction_c;
04323 c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c;
04324 c->add_hfyu_left_prediction = add_hfyu_left_prediction_c;
04325 c->add_hfyu_left_prediction_bgr32 = add_hfyu_left_prediction_bgr32_c;
04326 c->bswap_buf= bswap_buf;
04327 #if CONFIG_PNG_DECODER
04328 c->add_png_paeth_prediction= ff_add_png_paeth_prediction;
04329 #endif
04330
04331 if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
04332 c->h263_h_loop_filter= h263_h_loop_filter_c;
04333 c->h263_v_loop_filter= h263_v_loop_filter_c;
04334 }
04335
04336 if (CONFIG_VP3_DECODER) {
04337 c->vp3_h_loop_filter= ff_vp3_h_loop_filter_c;
04338 c->vp3_v_loop_filter= ff_vp3_v_loop_filter_c;
04339 c->vp3_idct_dc_add= ff_vp3_idct_dc_add_c;
04340 }
04341
04342 c->h261_loop_filter= h261_loop_filter_c;
04343
04344 c->try_8x8basis= try_8x8basis_c;
04345 c->add_8x8basis= add_8x8basis_c;
04346
04347 #if CONFIG_VORBIS_DECODER
04348 c->vorbis_inverse_coupling = vorbis_inverse_coupling;
04349 #endif
04350 #if CONFIG_AC3_DECODER
04351 c->ac3_downmix = ff_ac3_downmix_c;
04352 #endif
04353 c->vector_fmul = vector_fmul_c;
04354 c->vector_fmul_reverse = vector_fmul_reverse_c;
04355 c->vector_fmul_add = vector_fmul_add_c;
04356 c->vector_fmul_window = vector_fmul_window_c;
04357 c->vector_clipf = vector_clipf_c;
04358 c->scalarproduct_int16 = scalarproduct_int16_c;
04359 c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c;
04360 c->scalarproduct_float = scalarproduct_float_c;
04361 c->butterflies_float = butterflies_float_c;
04362 c->vector_fmul_scalar = vector_fmul_scalar_c;
04363
04364 c->vector_fmul_sv_scalar[0] = vector_fmul_sv_scalar_2_c;
04365 c->vector_fmul_sv_scalar[1] = vector_fmul_sv_scalar_4_c;
04366
04367 c->sv_fmul_scalar[0] = sv_fmul_scalar_2_c;
04368 c->sv_fmul_scalar[1] = sv_fmul_scalar_4_c;
04369
04370 c->shrink[0]= av_image_copy_plane;
04371 c->shrink[1]= ff_shrink22;
04372 c->shrink[2]= ff_shrink44;
04373 c->shrink[3]= ff_shrink88;
04374
04375 c->prefetch= just_return;
04376
04377 memset(c->put_2tap_qpel_pixels_tab, 0, sizeof(c->put_2tap_qpel_pixels_tab));
04378 memset(c->avg_2tap_qpel_pixels_tab, 0, sizeof(c->avg_2tap_qpel_pixels_tab));
04379
04380 if (HAVE_MMX) dsputil_init_mmx (c, avctx);
04381 if (ARCH_ARM) dsputil_init_arm (c, avctx);
04382 if (CONFIG_MLIB) dsputil_init_mlib (c, avctx);
04383 if (HAVE_VIS) dsputil_init_vis (c, avctx);
04384 if (ARCH_ALPHA) dsputil_init_alpha (c, avctx);
04385 if (ARCH_PPC) dsputil_init_ppc (c, avctx);
04386 if (HAVE_MMI) dsputil_init_mmi (c, avctx);
04387 if (ARCH_SH4) dsputil_init_sh4 (c, avctx);
04388 if (ARCH_BFIN) dsputil_init_bfin (c, avctx);
04389
04390 for(i=0; i<64; i++){
04391 if(!c->put_2tap_qpel_pixels_tab[0][i])
04392 c->put_2tap_qpel_pixels_tab[0][i]= c->put_h264_qpel_pixels_tab[0][i];
04393 if(!c->avg_2tap_qpel_pixels_tab[0][i])
04394 c->avg_2tap_qpel_pixels_tab[0][i]= c->avg_h264_qpel_pixels_tab[0][i];
04395 }
04396
04397 c->put_rv30_tpel_pixels_tab[0][0] = c->put_h264_qpel_pixels_tab[0][0];
04398 c->put_rv30_tpel_pixels_tab[1][0] = c->put_h264_qpel_pixels_tab[1][0];
04399 c->avg_rv30_tpel_pixels_tab[0][0] = c->avg_h264_qpel_pixels_tab[0][0];
04400 c->avg_rv30_tpel_pixels_tab[1][0] = c->avg_h264_qpel_pixels_tab[1][0];
04401
04402 c->put_rv40_qpel_pixels_tab[0][0] = c->put_h264_qpel_pixels_tab[0][0];
04403 c->put_rv40_qpel_pixels_tab[1][0] = c->put_h264_qpel_pixels_tab[1][0];
04404 c->avg_rv40_qpel_pixels_tab[0][0] = c->avg_h264_qpel_pixels_tab[0][0];
04405 c->avg_rv40_qpel_pixels_tab[1][0] = c->avg_h264_qpel_pixels_tab[1][0];
04406
04407 switch(c->idct_permutation_type){
04408 case FF_NO_IDCT_PERM:
04409 for(i=0; i<64; i++)
04410 c->idct_permutation[i]= i;
04411 break;
04412 case FF_LIBMPEG2_IDCT_PERM:
04413 for(i=0; i<64; i++)
04414 c->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
04415 break;
04416 case FF_SIMPLE_IDCT_PERM:
04417 for(i=0; i<64; i++)
04418 c->idct_permutation[i]= simple_mmx_permutation[i];
04419 break;
04420 case FF_TRANSPOSE_IDCT_PERM:
04421 for(i=0; i<64; i++)
04422 c->idct_permutation[i]= ((i&7)<<3) | (i>>3);
04423 break;
04424 case FF_PARTTRANS_IDCT_PERM:
04425 for(i=0; i<64; i++)
04426 c->idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
04427 break;
04428 case FF_SSE2_IDCT_PERM:
04429 for(i=0; i<64; i++)
04430 c->idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7];
04431 break;
04432 default:
04433 av_log(avctx, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");
04434 }
04435 }
04436