35 #define VP9_SYNCCODE 0x498342
148 #define MAX_SEGMENT 8
207 unsigned coef[4][2][2][6][6][3];
208 unsigned eob[4][2][2][6][6][2];
256 { 16, 16 }, { 16, 8 }, { 8, 16 }, { 8, 8 }, { 8, 4 }, { 4, 8 },
257 { 4, 4 }, { 4, 2 }, { 2, 4 }, { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 },
259 { 8, 8 }, { 8, 4 }, { 4, 8 }, { 4, 4 }, { 4, 2 }, { 2, 4 },
260 { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 },
327 s->
cols = (w + 7) >> 3;
328 s->
rows = (h + 7) >> 3;
330 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
407 return v > 2 * m ? v : v & 1 ? m - ((v + 1) >> 1) : m + (v >> 1);
413 static const int inv_map_table[254] = {
414 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
415 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
416 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
417 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
418 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
419 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
420 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
421 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
422 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
423 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
424 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
425 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
426 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
427 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
428 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
429 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
430 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
431 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
472 int c, i, j, k, l,
m,
n, w, h, max, size2, res, sharp;
601 for (i = 0; i < 4; i++)
604 for (i = 0; i < 2; i++)
623 for (i = 0; i < 7; i++)
627 for (i = 0; i < 3; i++)
636 "Reference segmap (temp=%d,update=%d) enabled on size-change!\n",
643 for (i = 0; i < 8; i++) {
662 int qyac, qydc, quvac, quvdc, lflvl, sh;
672 qydc = av_clip_uintp2(qyac + s->
ydc_qdelta, 8);
675 qyac = av_clip_uintp2(qyac, 8);
693 av_clip_uintp2(lflvl + (s->
lf_delta.
ref[0] << sh), 6);
694 for (j = 1; j < 4; j++) {
712 for (max = 0; (s->
sb_cols >> max) >= 4; max++) ;
713 max =
FFMAX(0, max - 1);
748 if (size2 > size - (data2 - data)) {
777 for (i = 0; i < 2; i++)
780 for (i = 0; i < 2; i++)
781 for (j = 0; j < 2; j++)
785 for (i = 0; i < 2; i++)
786 for (j = 0; j < 3; j++)
794 for (i = 0; i < 4; i++) {
797 for (j = 0; j < 2; j++)
798 for (k = 0; k < 2; k++)
799 for (l = 0; l < 6; l++)
800 for (m = 0; m < 6; m++) {
803 if (m >= 3 && l == 0)
805 for (n = 0; n < 3; n++) {
815 for (j = 0; j < 2; j++)
816 for (k = 0; k < 2; k++)
817 for (l = 0; l < 6; l++)
818 for (m = 0; m < 6; m++) {
832 for (i = 0; i < 3; i++)
836 for (i = 0; i < 7; i++)
837 for (j = 0; j < 3; j++)
843 for (i = 0; i < 4; i++)
844 for (j = 0; j < 2; j++)
849 for (i = 0; i < 4; i++)
858 for (i = 0; i < 5; i++)
867 for (i = 0; i < 5; i++) {
878 for (i = 0; i < 5; i++)
884 for (i = 0; i < 4; i++)
885 for (j = 0; j < 9; j++)
890 for (i = 0; i < 4; i++)
891 for (j = 0; j < 4; j++)
892 for (k = 0; k < 3; k++)
898 for (i = 0; i < 3; i++)
902 for (i = 0; i < 2; i++) {
906 for (j = 0; j < 10; j++)
914 for (j = 0; j < 10; j++)
920 for (i = 0; i < 2; i++) {
921 for (j = 0; j < 2; j++)
922 for (k = 0; k < 3; k++)
927 for (j = 0; j < 3; j++)
934 for (i = 0; i < 2; i++) {
946 return (data2 - data) + size2;
957 VP56mv *pmv,
int ref,
int z,
int idx,
int sb)
959 static const int8_t mv_ref_blk_off[
N_BS_SIZES][8][2] = {
960 [
BS_64x64] = {{ 3, -1 }, { -1, 3 }, { 4, -1 }, { -1, 4 },
961 { -1, -1 }, { 0, -1 }, { -1, 0 }, { 6, -1 }},
962 [
BS_64x32] = {{ 0, -1 }, { -1, 0 }, { 4, -1 }, { -1, 2 },
963 { -1, -1 }, { 0, -3 }, { -3, 0 }, { 2, -1 }},
964 [
BS_32x64] = {{ -1, 0 }, { 0, -1 }, { -1, 4 }, { 2, -1 },
965 { -1, -1 }, { -3, 0 }, { 0, -3 }, { -1, 2 }},
966 [
BS_32x32] = {{ 1, -1 }, { -1, 1 }, { 2, -1 }, { -1, 2 },
967 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
968 [
BS_32x16] = {{ 0, -1 }, { -1, 0 }, { 2, -1 }, { -1, -1 },
969 { -1, 1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
970 [
BS_16x32] = {{ -1, 0 }, { 0, -1 }, { -1, 2 }, { -1, -1 },
971 { 1, -1 }, { -3, 0 }, { 0, -3 }, { -3, -3 }},
972 [
BS_16x16] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, 1 },
973 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
974 [
BS_16x8] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, -1 },
975 { 0, -2 }, { -2, 0 }, { -2, -1 }, { -1, -2 }},
976 [
BS_8x16] = {{ -1, 0 }, { 0, -1 }, { -1, 1 }, { -1, -1 },
977 { -2, 0 }, { 0, -2 }, { -1, -2 }, { -2, -1 }},
978 [
BS_8x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
979 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
980 [
BS_8x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
981 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
982 [
BS_4x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
983 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
984 [
BS_4x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
985 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
988 int row = s->
row, col = s->
col, row7 = s->
row7;
989 const int8_t (*p)[2] = mv_ref_blk_off[b->
bs];
990 #define INVALID_MV 0x80008000U
994 #define RETURN_DIRECT_MV(mv) \
996 uint32_t m = AV_RN32A(&mv); \
1000 } else if (mem == INVALID_MV) { \
1002 } else if (m != mem) { \
1009 if (sb == 2 || sb == 1) {
1011 }
else if (sb == 3) {
1017 #define RETURN_MV(mv) \
1022 clamp_mv(&tmp, &mv, s); \
1023 m = AV_RN32A(&tmp); \
1027 } else if (mem == INVALID_MV) { \
1029 } else if (m != mem) { \
1034 uint32_t m = AV_RN32A(&mv); \
1036 clamp_mv(pmv, &mv, s); \
1038 } else if (mem == INVALID_MV) { \
1040 } else if (m != mem) { \
1041 clamp_mv(pmv, &mv, s); \
1049 if (mv->
ref[0] == ref) {
1051 }
else if (mv->
ref[1] == ref) {
1057 if (mv->
ref[0] == ref) {
1059 }
else if (mv->
ref[1] == ref) {
1069 for (; i < 8; i++) {
1070 int c = p[i][0] + col,
r = p[i][1] + row;
1075 if (mv->
ref[0] == ref) {
1077 }
else if (mv->
ref[1] == ref) {
1089 if (mv->
ref[0] == ref) {
1091 }
else if (mv->
ref[1] == ref) {
1096 #define RETURN_SCALE_MV(mv, scale) \
1099 VP56mv mv_temp = { -mv.x, -mv.y }; \
1100 RETURN_MV(mv_temp); \
1107 for (i = 0; i < 8; i++) {
1108 int c = p[i][0] + col,
r = p[i][1] + row;
1113 if (mv->
ref[0] != ref && mv->
ref[0] >= 0) {
1116 if (mv->
ref[1] != ref && mv->
ref[1] >= 0 &&
1130 if (mv->
ref[0] != ref && mv->
ref[0] >= 0) {
1133 if (mv->
ref[1] != ref && mv->
ref[1] >= 0 &&
1144 #undef RETURN_SCALE_MV
1158 for (n = 0, m = 0; m <
c; m++) {
1184 n = (n << 3) | (bit << 1);
1197 return sign ? -(n + 1) : (n + 1);
1212 mode ==
NEWMV ? -1 : sb);
1214 if ((mode ==
NEWMV || sb == -1) &&
1229 if (mode ==
NEWMV) {
1243 mode ==
NEWMV ? -1 : sb);
1244 if ((mode ==
NEWMV || sb == -1) &&
1259 if (mode ==
NEWMV) {
1284 int v16 = v * 0x0101;
1292 uint32_t v32 = v * 0x01010101;
1301 uint64_t v64 = v * 0x0101010101010101ULL;
1307 uint32_t v32 = v * 0x01010101;
1322 0x0, 0x8, 0x0, 0x8, 0xc, 0x8, 0xc, 0xe, 0xc, 0xe, 0xf, 0xe, 0xf
1325 0x0, 0x0, 0x8, 0x8, 0x8, 0xc, 0xc, 0xc, 0xe, 0xe, 0xe, 0xf, 0xf
1333 int row = s->
row, col = s->
col, row7 = s->
row7;
1334 enum TxfmMode max_tx = max_tx_for_bl_bp[b->
bs];
1338 int vref, filter_id;
1355 for (
y = 0;
y < h4;
y++) {
1356 int idx_base = (
y + row) * 8 * s->
sb_cols + col;
1357 for (x = 0; x < w4; x++)
1358 pred =
FFMIN(pred, refsegmap[idx_base + x]);
1364 &refsegmap[idx_base], w4);
1403 if (have_a && have_l) {
1427 }
else if (have_l) {
1475 l[0] = a[1] = b->
mode[1];
1477 l[0] = a[1] = b->
mode[1] = b->
mode[0];
1485 l[1] = a[1] = b->
mode[3];
1487 l[1] = a[1] = b->
mode[3] = b->
mode[2];
1491 l[1] = a[1] = b->
mode[3] = b->
mode[1];
1503 }
else if (b->
intra) {
1532 static const uint8_t size_group[10] = {
1533 3, 3, 3, 3, 2, 2, 2, 1, 1, 1
1535 int sz = size_group[b->
bs];
1546 static const uint8_t inter_mode_ctx_lut[14][14] = {
1547 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1548 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1549 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1550 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1551 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1552 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1553 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1554 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1555 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1556 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1557 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1558 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1559 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 3 },
1560 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 4 },
1595 }
else if (have_l) {
1626 if (refl == refa && refa == s->
varcompref[1]) {
1633 c = (refa == refl) ? 3 : 1;
1650 c = (refl == refa) ? 4 : 2;
1662 }
else if (have_l) {
1788 }
else if (have_l) {
1802 b->
ref[0] = 1 + bit;
1811 static const uint8_t off[10] = {
1812 3, 0, 0, 1, 0, 0, 0, 0, 0, 0
1908 #define SPLAT_CTX(var, val, n) \
1910 case 1: var = val; break; \
1911 case 2: AV_WN16A(&var, val * 0x0101); break; \
1912 case 4: AV_WN32A(&var, val * 0x01010101); break; \
1913 case 8: AV_WN64A(&var, val * 0x0101010101010101ULL); break; \
1915 uint64_t v64 = val * 0x0101010101010101ULL; \
1916 AV_WN64A( &var, v64); \
1917 AV_WN64A(&((uint8_t *) &var)[8], v64); \
1922 #define SPLAT_CTX(var, val, n) \
1924 case 1: var = val; break; \
1925 case 2: AV_WN16A(&var, val * 0x0101); break; \
1926 case 4: AV_WN32A(&var, val * 0x01010101); break; \
1928 uint32_t v32 = val * 0x01010101; \
1929 AV_WN32A( &var, v32); \
1930 AV_WN32A(&((uint8_t *) &var)[4], v32); \
1934 uint32_t v32 = val * 0x01010101; \
1935 AV_WN32A( &var, v32); \
1936 AV_WN32A(&((uint8_t *) &var)[4], v32); \
1937 AV_WN32A(&((uint8_t *) &var)[8], v32); \
1938 AV_WN32A(&((uint8_t *) &var)[12], v32); \
1945 #define SET_CTXS(dir, off, n) \
1947 SPLAT_CTX(s->dir##_skip_ctx[off], b->skip, n); \
1948 SPLAT_CTX(s->dir##_txfm_ctx[off], b->tx, n); \
1949 SPLAT_CTX(s->dir##_partition_ctx[off], dir##_ctx[b->bs], n); \
1950 if (!s->keyframe && !s->intraonly) { \
1951 SPLAT_CTX(s->dir##_intra_ctx[off], b->intra, n); \
1952 SPLAT_CTX(s->dir##_comp_ctx[off], b->comp, n); \
1953 SPLAT_CTX(s->dir##_mode_ctx[off], b->mode[3], n); \
1955 SPLAT_CTX(s->dir##_ref_ctx[off], vref, n); \
1956 if (s->filtermode == FILTER_SWITCHABLE) { \
1957 SPLAT_CTX(s->dir##_filter_ctx[off], filter_id, n); \
1962 case 1:
SET_CTXS(above, col, 1);
break;
1963 case 2:
SET_CTXS(above, col, 2);
break;
1964 case 4:
SET_CTXS(above, col, 4);
break;
1965 case 8:
SET_CTXS(above, col, 8);
break;
1968 case 1:
SET_CTXS(left, row7, 1);
break;
1969 case 2:
SET_CTXS(left, row7, 2);
break;
1970 case 4:
SET_CTXS(left, row7, 4);
break;
1971 case 8:
SET_CTXS(left, row7, 8);
break;
1991 for (n = 0; n < w4 * 2; n++) {
1995 for (n = 0; n < h4 * 2; n++) {
2003 for (
y = 0;
y < h4;
y++) {
2004 int x, o = (row +
y) * s->
sb_cols * 8 + col;
2008 for (x = 0; x < w4; x++) {
2012 }
else if (b->
comp) {
2013 for (x = 0; x < w4; x++) {
2014 mv[x].ref[0] = b->
ref[0];
2015 mv[x].ref[1] = b->
ref[1];
2020 for (x = 0; x < w4; x++) {
2021 mv[x].ref[0] = b->
ref[0];
2032 int is_tx32x32,
unsigned (*cnt)[6][3],
2033 unsigned (*eob)[6][2],
uint8_t (*p)[6][11],
2034 int nnz,
const int16_t *scan,
const int16_t (*nb)[2],
2035 const int16_t *band_counts,
const int16_t *qmul)
2037 int i = 0,
band = 0, band_left = band_counts[
band];
2051 cnt[
band][nnz][0]++;
2053 band_left = band_counts[++
band];
2055 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2057 if (++i == n_coeffs)
2064 cnt[
band][nnz][1]++;
2072 cnt[
band][nnz][2]++;
2075 cache[rc] = val = 2;
2126 band_left = band_counts[++
band];
2131 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2133 }
while (++i < n_coeffs);
2139 unsigned (*cnt)[6][3],
unsigned (*eob)[6][2],
2140 uint8_t (*p)[6][11],
int nnz,
const int16_t *scan,
2141 const int16_t (*nb)[2],
const int16_t *band_counts,
2142 const int16_t *qmul)
2145 nnz, scan, nb, band_counts, qmul);
2149 unsigned (*cnt)[6][3],
unsigned (*eob)[6][2],
2150 uint8_t (*p)[6][11],
int nnz,
const int16_t *scan,
2151 const int16_t (*nb)[2],
const int16_t *band_counts,
2152 const int16_t *qmul)
2155 nnz, scan, nb, band_counts, qmul);
2162 int row = s->
row, col = s->
col;
2167 int end_x =
FFMIN(2 * (s->
cols - col), w4);
2168 int end_y =
FFMIN(2 * (s->
rows - row), h4);
2169 int n, pl, x,
y, res;
2172 const int16_t *
const *yscans =
vp9_scans[tx];
2178 static const int16_t band_counts[4][8] = {
2179 { 1, 2, 3, 4, 3, 16 - 13 },
2180 { 1, 2, 3, 4, 11, 64 - 21 },
2181 { 1, 2, 3, 4, 11, 256 - 21 },
2182 { 1, 2, 3, 4, 11, 1024 - 21 },
2184 const int16_t *y_band_counts = band_counts[b->tx];
2185 const int16_t *uv_band_counts = band_counts[b->
uvtx];
2187 #define MERGE(la, end, step, rd) \
2188 for (n = 0; n < end; n += step) \
2189 la[n] = !!rd(&la[n])
2190 #define MERGE_CTX(step, rd) \
2192 MERGE(l, end_y, step, rd); \
2193 MERGE(a, end_x, step, rd); \
2196 #define DECODE_Y_COEF_LOOP(step, mode_index, v) \
2197 for (n = 0, y = 0; y < end_y; y += step) { \
2198 for (x = 0; x < end_x; x += step, n += step * step) { \
2199 enum TxfmType txtp = vp9_intra_txfm_type[b->mode[mode_index]]; \
2200 res = decode_coeffs_b##v(&s->c, s->block + 16 * n, 16 * step * step, \
2201 c, e, p, a[x] + l[y], yscans[txtp], \
2202 ynbs[txtp], y_band_counts, qmul[0]); \
2203 a[x] = l[y] = !!res; \
2205 AV_WN16A(&s->eob[n], res); \
2212 #define SPLAT(la, end, step, cond) \
2214 for (n = 1; n < end; n += step) \
2215 la[n] = la[n - 1]; \
2216 } else if (step == 4) { \
2218 for (n = 0; n < end; n += step) \
2219 AV_WN32A(&la[n], la[n] * 0x01010101); \
2221 for (n = 0; n < end; n += step) \
2222 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 3)); \
2226 if (HAVE_FAST_64BIT) { \
2227 for (n = 0; n < end; n += step) \
2228 AV_WN64A(&la[n], la[n] * 0x0101010101010101ULL); \
2230 for (n = 0; n < end; n += step) { \
2231 uint32_t v32 = la[n] * 0x01010101; \
2232 AV_WN32A(&la[n], v32); \
2233 AV_WN32A(&la[n + 4], v32); \
2237 for (n = 0; n < end; n += step) \
2238 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 7)); \
2241 #define SPLAT_CTX(step) \
2243 SPLAT(a, end_x, step, end_x == w4); \
2244 SPLAT(l, end_y, step, end_y == h4); \
2269 #define DECODE_UV_COEF_LOOP(step) \
2270 for (n = 0, y = 0; y < end_y; y += step) { \
2271 for (x = 0; x < end_x; x += step, n += step * step) { \
2272 res = decode_coeffs_b(&s->c, s->uvblock[pl] + 16 * n, \
2273 16 * step * step, c, e, p, a[x] + l[y], \
2274 uvscan, uvnb, uv_band_counts, qmul[1]); \
2275 a[x] = l[y] = !!res; \
2277 AV_WN16A(&s->uveob[pl][n], res); \
2279 s->uveob[pl][n] = res; \
2291 for (pl = 0; pl < 2; pl++) {
2313 1024, c, e, p, a[0] + l[0],
2314 uvscan, uvnb, uv_band_counts, qmul[1]);
2315 a[0] = l[0] = !!res;
2324 uint8_t *dst_edge, ptrdiff_t stride_edge,
2325 uint8_t *dst_inner, ptrdiff_t stride_inner,
2326 uint8_t *l,
int col,
int x,
int w,
2330 int have_top = row > 0 || y > 0;
2332 int have_right = x < w - 1;
2333 static const uint8_t mode_conv[10][2 ][2 ] = {
2355 static const struct {
2364 [
DC_PRED] = { .needs_top = 1, .needs_left = 1 },
2367 [
VERT_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2368 [
HOR_DOWN_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2370 [
HOR_UP_PRED] = { .needs_left = 1, .invert_left = 1 },
2371 [
TM_VP8_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2380 mode = mode_conv[
mode][have_left][have_top];
2381 if (edges[mode].needs_top) {
2383 int n_px_need = 4 << tx, n_px_have = (((s->
cols - col) << !p) - x) * 4;
2384 int n_px_need_tr = 0;
2386 if (tx ==
TX_4X4 && edges[mode].needs_topright && have_right)
2393 top = !(row & 7) && !y ?
2395 y == 0 ? &dst_edge[-stride_edge] : &dst_inner[-stride_inner];
2397 topleft = !(row & 7) && !y ?
2399 y == 0 || x == 0 ? &dst_edge[-stride_edge] :
2400 &dst_inner[-stride_inner];
2404 (!edges[mode].needs_topleft || (have_left && top == topleft)) &&
2405 (tx !=
TX_4X4 || !edges[mode].needs_topright || have_right) &&
2406 n_px_need + n_px_need_tr <= n_px_have) {
2410 if (n_px_need <= n_px_have) {
2411 memcpy(*a, top, n_px_need);
2413 memcpy(*a, top, n_px_have);
2414 memset(&(*a)[n_px_have], (*a)[n_px_have - 1],
2415 n_px_need - n_px_have);
2418 memset(*a, 127, n_px_need);
2420 if (edges[mode].needs_topleft) {
2421 if (have_left && have_top) {
2422 (*a)[-1] = topleft[-1];
2424 (*a)[-1] = have_top ? 129 : 127;
2427 if (tx ==
TX_4X4 && edges[mode].needs_topright) {
2428 if (have_top && have_right &&
2429 n_px_need + n_px_need_tr <= n_px_have) {
2430 memcpy(&(*a)[4], &top[4], 4);
2432 memset(&(*a)[4], (*a)[3], 4);
2437 if (edges[mode].needs_left) {
2439 int n_px_need = 4 << tx, i, n_px_have = (((s->
rows - row) << !p) -
y) * 4;
2440 uint8_t *dst = x == 0 ? dst_edge : dst_inner;
2441 ptrdiff_t
stride = x == 0 ? stride_edge : stride_inner;
2443 if (edges[mode].invert_left) {
2444 if (n_px_need <= n_px_have) {
2445 for (i = 0; i < n_px_need; i++)
2446 l[i] = dst[i * stride - 1];
2448 for (i = 0; i < n_px_have; i++)
2449 l[i] = dst[i * stride - 1];
2450 memset(&l[n_px_have], l[n_px_have - 1], n_px_need - n_px_have);
2453 if (n_px_need <= n_px_have) {
2454 for (i = 0; i < n_px_need; i++)
2455 l[n_px_need - 1 - i] = dst[i * stride - 1];
2457 for (i = 0; i < n_px_have; i++)
2458 l[n_px_need - 1 - i] = dst[i * stride - 1];
2459 memset(l, l[n_px_need - n_px_have], n_px_need - n_px_have);
2463 memset(l, 129, 4 << tx);
2474 int row = s->
row, col = s->
col;
2475 int w4 =
bwh_tab[1][b->
bs][0] << 1, step1d = 1 << b->tx,
n;
2476 int h4 =
bwh_tab[1][b->
bs][1] << 1, x,
y, step = 1 << (b->tx * 2);
2477 int end_x =
FFMIN(2 * (s->
cols - col), w4);
2478 int end_y =
FFMIN(2 * (s->
rows - row), h4);
2480 int uvstep1d = 1 << b->
uvtx, p;
2485 for (
n = 0, y = 0; y < end_y; y += step1d) {
2486 uint8_t *ptr = dst, *ptr_r = dst_r;
2487 for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d,
2488 ptr_r += 4 * step1d,
n += step) {
2498 col, x, w4, row, y, b->tx, 0);
2512 step = 1 << (b->
uvtx * 2);
2513 for (p = 0; p < 2; p++) {
2514 dst = s->
dst[1 + p];
2516 for (
n = 0, y = 0; y < end_y; y += uvstep1d) {
2517 uint8_t *ptr = dst, *ptr_r = dst_r;
2518 for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d,
2519 ptr_r += 4 * uvstep1d,
n += step) {
2527 col, x, w4, row, y, b->
uvtx, p + 1);
2540 uint8_t *dst, ptrdiff_t dst_stride,
2541 const uint8_t *ref, ptrdiff_t ref_stride,
2543 ptrdiff_t
y, ptrdiff_t x,
const VP56mv *
mv,
2544 int bw,
int bh,
int w,
int h)
2546 int mx = mv->
x, my = mv->
y,
th;
2550 ref += y * ref_stride + x;
2556 th = (y + bh + 4 * !!my + 7) >> 6;
2558 if (x < !!mx * 3 || y < !!my * 3 ||
2559 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2561 ref - !!my * 3 * ref_stride - !!mx * 3,
2563 bw + !!mx * 7, bh + !!my * 7,
2564 x - !!mx * 3, y - !!my * 3, w, h);
2568 mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
2573 ptrdiff_t dst_stride,
2574 const uint8_t *ref_u, ptrdiff_t src_stride_u,
2575 const uint8_t *ref_v, ptrdiff_t src_stride_v,
2577 ptrdiff_t
y, ptrdiff_t x,
const VP56mv *
mv,
2578 int bw,
int bh,
int w,
int h)
2580 int mx = mv->
x, my = mv->
y,
th;
2584 ref_u += y * src_stride_u + x;
2585 ref_v += y * src_stride_v + x;
2591 th = (y + bh + 4 * !!my + 7) >> 5;
2593 if (x < !!mx * 3 || y < !!my * 3 ||
2594 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2596 ref_u - !!my * 3 * src_stride_u - !!mx * 3,
2598 bw + !!mx * 7, bh + !!my * 7,
2599 x - !!mx * 3, y - !!my * 3, w, h);
2601 mc[!!mx][!!my](dst_u, dst_stride, ref_u, 80, bh, mx, my);
2604 ref_v - !!my * 3 * src_stride_v - !!mx * 3,
2606 bw + !!mx * 7, bh + !!my * 7,
2607 x - !!mx * 3, y - !!my * 3, w, h);
2609 mc[!!mx][!!my](dst_v, dst_stride, ref_v, 80, bh, mx, my);
2611 mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
2612 mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
2619 { 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4 },
2620 { 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4 },
2624 int row = s->
row, col = s->
col;
2642 row << 3, col << 3, &b->
mv[0][0], 8, 4, w1, h1);
2644 s->
dst[0] + 4 * ls_y, ls_y,
2646 (row << 3) + 4, col << 3, &b->
mv[2][0], 8, 4, w1, h1);
2650 ref2->data[0], ref2->linesize[0], tref2,
2651 row << 3, col << 3, &b->
mv[0][1], 8, 4, w2, h2);
2653 s->
dst[0] + 4 * ls_y, ls_y,
2654 ref2->data[0], ref2->linesize[0], tref2,
2655 (row << 3) + 4, col << 3, &b->
mv[2][1], 8, 4, w2, h2);
2660 row << 3, col << 3, &b->
mv[0][0], 4, 8, w1, h1);
2663 row << 3, (col << 3) + 4, &b->
mv[1][0], 4, 8, w1, h1);
2667 ref2->data[0], ref2->linesize[0], tref2,
2668 row << 3, col << 3, &b->
mv[0][1], 4, 8, w2, h2);
2670 ref2->data[0], ref2->linesize[0], tref2,
2671 row << 3, (col << 3) + 4, &b->
mv[1][1], 4, 8, w2, h2);
2680 row << 3, col << 3, &b->
mv[0][0], 4, 4, w1, h1);
2683 row << 3, (col << 3) + 4, &b->
mv[1][0], 4, 4, w1, h1);
2685 s->
dst[0] + 4 * ls_y, ls_y,
2687 (row << 3) + 4, col << 3, &b->
mv[2][0], 4, 4, w1, h1);
2689 s->
dst[0] + 4 * ls_y + 4, ls_y,
2691 (row << 3) + 4, (col << 3) + 4, &b->
mv[3][0], 4, 4, w1, h1);
2695 ref2->data[0], ref2->linesize[0], tref2,
2696 row << 3, col << 3, &b->
mv[0][1], 4, 4, w2, h2);
2698 ref2->data[0], ref2->linesize[0], tref2,
2699 row << 3, (col << 3) + 4, &b->
mv[1][1], 4, 4, w2, h2);
2701 s->
dst[0] + 4 * ls_y, ls_y,
2702 ref2->data[0], ref2->linesize[0], tref2,
2703 (row << 3) + 4, col << 3, &b->
mv[2][1], 4, 4, w2, h2);
2705 s->
dst[0] + 4 * ls_y + 4, ls_y,
2706 ref2->data[0], ref2->linesize[0], tref2,
2707 (row << 3) + 4, (col << 3) + 4, &b->
mv[3][1], 4, 4, w2, h2);
2711 int bwl = bwlog_tab[0][b->
bs];
2716 row << 3, col << 3, &b->
mv[0][0],bw, bh, w1, h1);
2720 ref2->data[0], ref2->linesize[0], tref2,
2721 row << 3, col << 3, &b->
mv[0][1], bw, bh, w2, h2);
2726 int bwl = bwlog_tab[1][b->
bs];
2744 s->
dst[1], s->
dst[2], ls_uv,
2747 row << 2, col << 2, &mvuv, bw, bh, w1, h1);
2757 s->
dst[1], s->
dst[2], ls_uv,
2758 ref2->data[1], ref2->linesize[1],
2759 ref2->data[2], ref2->linesize[2], tref2,
2760 row << 2, col << 2, &mvuv, bw, bh, w2, h2);
2767 int w4 =
bwh_tab[1][b->
bs][0] << 1, step1d = 1 << b->tx,
n;
2768 int h4 =
bwh_tab[1][b->
bs][1] << 1, x,
y, step = 1 << (b->tx * 2);
2769 int end_x =
FFMIN(2 * (s->
cols - col), w4);
2770 int end_y =
FFMIN(2 * (s->
rows - row), h4);
2772 int uvstep1d = 1 << b->
uvtx, p;
2776 for (
n = 0, y = 0; y < end_y; y += step1d) {
2778 for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d,
n += step) {
2791 step = 1 << (b->
uvtx * 2);
2792 for (p = 0; p < 2; p++) {
2793 dst = s->
dst[p + 1];
2794 for (
n = 0, y = 0; y < end_y; y += uvstep1d) {
2796 for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d,
n += step) {
2810 int row_and_7,
int col_and_7,
2811 int w,
int h,
int col_end,
int row_end,
2824 if (tx ==
TX_4X4 && is_uv) {
2839 if (tx ==
TX_4X4 && !skip_inter) {
2840 int t = 1 << col_and_7, m_col = (t << w) - t,
y;
2841 int m_col_odd = (t << (w - 1)) - t;
2845 int m_row_8 = m_col & 0x01, m_row_4 = m_col - m_row_8;
2847 for (
y = row_and_7;
y < h + row_and_7;
y++) {
2848 int col_mask_id = 2 - !(
y & 7);
2850 lflvl->
mask[is_uv][0][
y][1] |= m_row_8;
2851 lflvl->
mask[is_uv][0][
y][2] |= m_row_4;
2862 if ((col_end & 1) && (
y & 1)) {
2863 lflvl->
mask[is_uv][1][
y][col_mask_id] |= m_col_odd;
2865 lflvl->
mask[is_uv][1][
y][col_mask_id] |= m_col;
2869 int m_row_8 = m_col & 0x11, m_row_4 = m_col - m_row_8;
2871 for (
y = row_and_7;
y < h + row_and_7;
y++) {
2872 int col_mask_id = 2 - !(
y & 3);
2874 lflvl->
mask[is_uv][0][
y][1] |= m_row_8;
2875 lflvl->
mask[is_uv][0][
y][2] |= m_row_4;
2876 lflvl->
mask[is_uv][1][
y][col_mask_id] |= m_col;
2877 lflvl->
mask[is_uv][0][
y][3] |= m_col;
2878 lflvl->
mask[is_uv][1][
y][3] |= m_col;
2882 int y, t = 1 << col_and_7, m_col = (t << w) - t;
2885 int mask_id = (tx ==
TX_8X8);
2886 int l2 = tx + is_uv - 1, step1d = 1 << l2;
2887 static const unsigned masks[4] = { 0xff, 0x55, 0x11, 0x01 };
2888 int m_row = m_col & masks[l2];
2892 if (is_uv && tx >
TX_8X8 && (w ^ (w - 1)) == 1) {
2893 int m_row_16 = ((t << (w - 1)) - t) & masks[l2];
2894 int m_row_8 = m_row - m_row_16;
2896 for (y = row_and_7; y < h + row_and_7; y++) {
2897 lflvl->
mask[is_uv][0][
y][0] |= m_row_16;
2898 lflvl->
mask[is_uv][0][
y][1] |= m_row_8;
2901 for (y = row_and_7; y < h + row_and_7; y++)
2902 lflvl->
mask[is_uv][0][y][mask_id] |= m_row;
2905 if (is_uv && tx >
TX_8X8 && (h ^ (h - 1)) == 1) {
2906 for (y = row_and_7; y < h + row_and_7 - 1; y += step1d)
2907 lflvl->
mask[is_uv][1][y][0] |= m_col;
2908 if (y - row_and_7 == h - 1)
2909 lflvl->
mask[is_uv][1][
y][1] |= m_col;
2911 for (y = row_and_7; y < h + row_and_7; y += step1d)
2912 lflvl->
mask[is_uv][1][y][mask_id] |= m_col;
2914 }
else if (tx !=
TX_4X4) {
2917 mask_id = (tx ==
TX_8X8) || (is_uv && h == 1);
2918 lflvl->
mask[is_uv][1][row_and_7][mask_id] |= m_col;
2919 mask_id = (tx ==
TX_8X8) || (is_uv && w == 1);
2920 for (y = row_and_7; y < h + row_and_7; y++)
2921 lflvl->
mask[is_uv][0][y][mask_id] |= t;
2923 int t8 = t & 0x01,
t4 = t -
t8;
2925 for (y = row_and_7; y < h + row_and_7; y++) {
2926 lflvl->
mask[is_uv][0][
y][2] |=
t4;
2927 lflvl->
mask[is_uv][0][
y][1] |=
t8;
2929 lflvl->
mask[is_uv][1][row_and_7][2 - !(row_and_7 & 7)] |= m_col;
2931 int t8 = t & 0x11,
t4 = t -
t8;
2933 for (y = row_and_7; y < h + row_and_7; y++) {
2934 lflvl->
mask[is_uv][0][
y][2] |=
t4;
2935 lflvl->
mask[is_uv][0][
y][1] |=
t8;
2937 lflvl->
mask[is_uv][1][row_and_7][2 - !(row_and_7 & 3)] |= m_col;
2943 struct VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff,
2957 s->
min_mv.
x = -(128 + col * 64);
2958 s->
min_mv.
y = -(128 + row * 64);
2966 b->
uvtx = b->tx - (w4 * 2 == (1 << b->tx) || h4 * 2 == (1 << b->tx));
2973 #define SPLAT_ZERO_CTX(v, n) \
2975 case 1: v = 0; break; \
2976 case 2: AV_ZERO16(&v); break; \
2977 case 4: AV_ZERO32(&v); break; \
2978 case 8: AV_ZERO64(&v); break; \
2979 case 16: AV_ZERO128(&v); break; \
2981 #define SPLAT_ZERO_YUV(dir, var, off, n) \
2983 SPLAT_ZERO_CTX(s->dir##_y_##var[off * 2], n * 2); \
2984 SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off], n); \
2985 SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off], n); \
3003 s->
block += w4 * h4 * 64;
3004 s->
uvblock[0] += w4 * h4 * 16;
3005 s->
uvblock[1] += w4 * h4 * 16;
3006 s->
eob += 4 * w4 * h4;
3007 s->
uveob[0] += w4 * h4;
3008 s->
uveob[1] += w4 * h4;
3017 emu[0] = (col + w4) * 8 > f->
linesize[0] ||
3018 (row + h4) > s->
rows;
3019 emu[1] = (col + w4) * 4 > f->
linesize[1] ||
3020 (row + h4) > s->
rows;
3025 s->
dst[0] = f->
data[0] + yoff;
3033 s->
dst[1] = f->
data[1] + uvoff;
3034 s->
dst[2] = f->
data[2] + uvoff;
3045 for (
n = 0; o < w;
n++) {
3051 s->
tmp_y + o, 64, h, 0, 0);
3059 for (
n = 1; o < w;
n++) {
3065 s->
tmp_uv[0] + o, 32, h, 0, 0);
3067 s->
tmp_uv[1] + o, 32, h, 0, 0);
3081 mask_edges(lflvl, 0, row7, col7, x_end, y_end, 0, 0, b->tx, skip_inter);
3082 mask_edges(lflvl, 1, row7, col7, x_end, y_end,
3085 b->
uvtx, skip_inter);
3092 limit >>= (sharp + 3) >> 2;
3093 limit =
FFMIN(limit, 9 - sharp);
3095 limit =
FFMAX(limit, 1);
3104 s->
block += w4 * h4 * 64;
3105 s->
uvblock[0] += w4 * h4 * 16;
3106 s->
uvblock[1] += w4 * h4 * 16;
3107 s->
eob += 4 * w4 * h4;
3108 s->
uveob[0] += w4 * h4;
3109 s->
uveob[1] += w4 * h4;
3114 ptrdiff_t yoff, ptrdiff_t uvoff,
enum BlockLevel bl)
3122 ptrdiff_t hbs = 4 >> bl;
3128 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3129 }
else if (col + hbs < s->cols) {
3130 if (row + hbs < s->rows) {
3134 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3137 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3138 yoff += hbs * 8 * y_stride;
3139 uvoff += hbs * 4 * uv_stride;
3140 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
3143 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3146 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
3149 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3151 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
3152 yoff += hbs * 8 * y_stride;
3153 uvoff += hbs * 4 * uv_stride;
3154 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3155 decode_sb(ctx, row + hbs, col + hbs, lflvl,
3156 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
3163 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3165 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
3168 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3170 }
else if (row + hbs < s->rows) {
3173 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3174 yoff += hbs * 8 * y_stride;
3175 uvoff += hbs * 4 * uv_stride;
3176 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3179 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3183 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3189 ptrdiff_t yoff, ptrdiff_t uvoff,
enum BlockLevel bl)
3193 ptrdiff_t hbs = 4 >> bl;
3199 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->
bl, b->
bp);
3200 }
else if (s->
b->
bl == bl) {
3201 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->
bl, b->
bp);
3203 yoff += hbs * 8 * y_stride;
3204 uvoff += hbs * 4 * uv_stride;
3205 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, b->
bl, b->
bp);
3209 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, b->
bl, b->
bp);
3213 if (col + hbs < s->cols) {
3214 if (row + hbs < s->rows) {
3216 uvoff + 4 * hbs, bl + 1);
3217 yoff += hbs * 8 * y_stride;
3218 uvoff += hbs * 4 * uv_stride;
3219 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3221 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
3225 decode_sb_mem(ctx, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
3227 }
else if (row + hbs < s->rows) {
3228 yoff += hbs * 8 * y_stride;
3229 uvoff += hbs * 4 * uv_stride;
3230 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3236 int row,
int col, ptrdiff_t yoff, ptrdiff_t uvoff)
3251 for (y = 0; y < 8; y += 2, dst += 16 * ls_y, lvl += 16) {
3252 uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->
mask[0][0][
y];
3254 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2], hm13 = hmask1[3];
3255 unsigned hm2 = hmask2[1] | hmask2[2], hm23 = hmask2[3];
3256 unsigned hm = hm1 | hm2 | hm13 | hm23;
3258 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 8, l++) {
3260 int L = *l,
H = L >> 4;
3264 if (hmask1[0] & x) {
3265 if (hmask2[0] & x) {
3271 }
else if (hm2 & x) {
3278 [0](ptr, ls_y,
E, I,
H);
3281 [0](ptr, ls_y, E, I, H);
3284 }
else if (hm2 & x) {
3285 int L = l[8],
H = L >> 4;
3290 [0](ptr + 8 * ls_y, ls_y, E, I, H);
3294 int L = *l,
H = L >> 4;
3306 }
else if (hm23 & x) {
3307 int L = l[8],
H = L >> 4;
3318 dst = f->
data[0] + yoff;
3320 for (y = 0; y < 8; y++, dst += 8 * ls_y, lvl += 8) {
3321 uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->
mask[0][1][
y];
3322 unsigned vm = vmask[0] | vmask[1] | vmask[2], vm3 = vmask[3];
3324 for (x = 1; vm & ~(x - 1); x <<= 2, ptr += 16, l += 2) {
3327 int L = *l,
H = L >> 4;
3331 if (vmask[0] & (x << 1)) {
3337 }
else if (vm & (x << 1)) {
3343 [!!(vmask[1] & (x << 1))]
3344 [1](ptr, ls_y, E, I, H);
3347 [1](ptr, ls_y, E, I, H);
3349 }
else if (vm & (x << 1)) {
3350 int L = l[1],
H = L >> 4;
3354 [1](ptr + 8, ls_y,
E, I,
H);
3358 int L = *l,
H = L >> 4;
3361 if (vm3 & (x << 1)) {
3370 }
else if (vm3 & (x << 1)) {
3371 int L = l[1],
H = L >> 4;
3380 for (p = 0; p < 2; p++) {
3382 dst = f->
data[1 + p] + uvoff;
3383 for (y = 0; y < 8; y += 4, dst += 16 * ls_uv, lvl += 32) {
3384 uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->
mask[1][0][
y];
3386 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2];
3387 unsigned hm2 = hmask2[1] | hmask2[2], hm = hm1 | hm2;
3389 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 4) {
3392 int L = *l,
H = L >> 4;
3395 if (hmask1[0] & x) {
3396 if (hmask2[0] & x) {
3402 }
else if (hm2 & x) {
3409 [0](ptr, ls_uv,
E, I,
H);
3412 [0](ptr, ls_uv, E, I, H);
3414 }
else if (hm2 & x) {
3415 int L = l[16],
H = L >> 4;
3419 [0](ptr + 8 * ls_uv, ls_uv, E, I, H);
3427 dst = f->
data[1 + p] + uvoff;
3428 for (y = 0; y < 8; y++, dst += 4 * ls_uv) {
3429 uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->
mask[1][1][
y];
3430 unsigned vm = vmask[0] | vmask[1] | vmask[2];
3432 for (x = 1; vm & ~(x - 1); x <<= 4, ptr += 16, l += 4) {
3435 int L = *l,
H = L >> 4;
3439 if (vmask[0] & (x << 2)) {
3445 }
else if (vm & (x << 2)) {
3451 [!!(vmask[1] & (x << 2))]
3452 [1](ptr, ls_uv, E, I, H);
3455 [1](ptr, ls_uv, E, I, H);
3457 }
else if (vm & (x << 2)) {
3458 int L = l[2],
H = L >> 4;
3462 [1](ptr + 8, ls_uv,
E, I,
H);
3474 int sb_start = ( idx *
n) >> log2_n;
3475 int sb_end = ((idx + 1) * n) >> log2_n;
3476 *start =
FFMIN(sb_start, n) << 3;
3477 *end =
FFMIN(sb_end, n) << 3;
3481 int max_count,
int update_factor)
3483 unsigned ct = ct0 + ct1, p2, p1;
3489 p2 = ((ct0 << 8) + (ct >> 1)) / ct;
3490 p2 = av_clip(p2, 1, 255);
3491 ct =
FFMIN(ct, max_count);
3492 update_factor =
FASTDIV(update_factor * ct, max_count);
3495 *p = p1 + (((p2 - p1) * update_factor + 128) >> 8);
3505 for (i = 0; i < 4; i++)
3506 for (j = 0; j < 2; j++)
3507 for (k = 0; k < 2; k++)
3508 for (l = 0; l < 6; l++)
3509 for (m = 0; m < 6; m++) {
3514 if (l == 0 && m >= 3)
3518 adapt_prob(&pp[1], c[0], c[1] + c[2], 24, uf);
3531 for (i = 0; i < 3; i++)
3535 for (i = 0; i < 4; i++)
3540 for (i = 0; i < 5; i++)
3546 for (i = 0; i < 5; i++)
3552 for (i = 0; i < 5; i++) {
3556 adapt_prob(&pp[0], c[0][0], c[0][1], 20, 128);
3557 adapt_prob(&pp[1], c[1][0], c[1][1], 20, 128);
3562 for (i = 0; i < 4; i++)
3563 for (j = 0; j < 4; j++) {
3567 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3568 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3574 for (i = 0; i < 2; i++) {
3580 adapt_prob(&p->
tx32p[i][0], c32[0], c32[1] + c32[2] + c32[3], 20, 128);
3588 for (i = 0; i < 4; i++) {
3592 adapt_prob(&pp[0], c[0], c[1] + c[2], 20, 128);
3598 for (i = 0; i < 7; i++) {
3602 adapt_prob(&pp[0], c[2], c[1] + c[0] + c[3], 20, 128);
3603 adapt_prob(&pp[1], c[0], c[1] + c[3], 20, 128);
3612 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3613 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3618 for (i = 0; i < 2; i++) {
3620 unsigned *
c, (*c2)[2], sum;
3627 sum = c[1] + c[2] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9] + c[10];
3632 adapt_prob(&pp[2], c[2] + c[3], sum, 20, 128);
3635 adapt_prob(&pp[4], c[4] + c[5], sum, 20, 128);
3639 adapt_prob(&pp[7], c[7] + c[8], c[9] + c[10], 20, 128);
3647 for (j = 0; j < 10; j++)
3648 adapt_prob(&pp[j], c2[j][0], c2[j][1], 20, 128);
3650 for (j = 0; j < 2; j++) {
3653 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3654 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3659 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3660 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3672 for (i = 0; i < 4; i++) {
3676 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3696 for (i = 0; i < 10; i++) {
3700 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3732 for (i = 0; i < 2; i++) {
3737 for (i = 0; i < 8; i++) {
3759 int res, tile_row, tile_col, i, ref, row, col;
3760 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
3765 }
else if (res == 0) {
3794 for (i = 0; i < 8; i++) {
3834 "Failed to allocate block buffers\n");
3840 for (i = 0; i < 4; i++) {
3841 for (j = 0; j < 2; j++)
3842 for (k = 0; k < 2; k++)
3843 for (l = 0; l < 6; l++)
3844 for (m = 0; m < 6; m++)
3881 if (tile_size > size) {
3896 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 32) {
3898 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
3916 memcpy(&s->
c, &s->
c_b[tile_col],
sizeof(s->
c));
3920 col < s->tiling.tile_col_end;
3921 col += 8, yoff2 += 64, uvoff2 += 32, lflvl_ptr++) {
3925 memset(lflvl_ptr->
mask, 0,
sizeof(lflvl_ptr->
mask));
3937 memcpy(&s->
c_b[tile_col], &s->
c,
sizeof(s->
c));
3947 if (row + 8 < s->
rows) {
3949 f->
data[0] + yoff + 63 * ls_y,
3952 f->
data[1] + uvoff + 31 * ls_uv,
3955 f->
data[2] + uvoff + 31 * ls_uv,
3963 lflvl_ptr = s->
lflvl;
3964 for (col = 0; col < s->
cols;
3965 col += 8, yoff2 += 64, uvoff2 += 32, lflvl_ptr++) {
3981 }
while (s->
pass++ == 1);
3985 for (i = 0; i < 8; i++) {
4005 for (i = 0; i < 2; i++)
4007 for (i = 0; i < 8; i++)
4016 for (i = 0; i < 2; i++) {
4024 for (i = 0; i < 8; i++) {
4062 (!ssrc->intra_pred_data[0] || s->
cols != ssrc->cols || s->
rows != ssrc->rows)) {
4066 for (i = 0; i < 2; i++) {
4069 if (ssrc->frames[i].tf.f->data[0]) {
4074 for (i = 0; i < 8; i++) {
4077 if (ssrc->next_refs[i].f->data[0]) {
4088 if (ssrc->segmentation.enabled) {
also ITU-R BT1361 / IEC 61966-2-4 xvYCC709 / SMPTE RP177 Annex B
const char const char void * val
#define AVERROR_INVALIDDATA
Invalid data found when processing input.
uint8_t * segmentation_map
void av_buffer_unref(AVBufferRef **buf)
Free a given reference and automatically free the buffer if there are no more references to it...
This structure describes decoded (raw) audio or video data.
static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
static void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
static int update_size(AVCodecContext *ctx, int w, int h)
ptrdiff_t const GLvoid * data
uint8_t left_segpred_ctx[8]
VP5 and VP6 compatible video decoder (common features)
static av_always_inline int get_sbits_inv(GetBitContext *gb, int n)
static const int8_t vp9_segmentation_tree[7][2]
static av_always_inline int decode_coeffs_b_generic(VP56RangeCoder *c, int16_t *coef, int n_coeffs, int is_tx32x32, unsigned(*cnt)[6][3], unsigned(*eob)[6][2], uint8_t(*p)[6][11], int nnz, const int16_t *scan, const int16_t(*nb)[2], const int16_t *band_counts, const int16_t *qmul)
static const uint8_t vp9_model_pareto8[256][8]
static unsigned int get_bits(GetBitContext *s, int n)
Read 1-25 bits.
static av_always_inline int vp8_rac_get_tree(VP56RangeCoder *c, const int8_t(*tree)[2], const uint8_t *probs)
static av_cold int init(AVCodecContext *avctx)
#define SPLAT_CTX(var, val, n)
static int vp9_alloc_frame(AVCodecContext *ctx, VP9Frame *f)
static const uint8_t vp9_default_kf_ymode_probs[10][10][9]
also ITU-R BT601-6 625 / ITU-R BT1358 625 / ITU-R BT1700 625 PAL & SECAM / IEC 61966-2-4 xvYCC601 ...
static const int8_t vp9_mv_fp_tree[3][2]
enum AVColorRange color_range
MPEG vs JPEG YUV range.
#define SET_CTXS(dir, off, n)
static const int8_t vp9_intramode_tree[9][2]
#define DECLARE_ALIGNED(n, t, v)
uint8_t left_uv_nnz_ctx[2][8]
also ITU-R BT601-6 525 / ITU-R BT1358 525 / ITU-R BT1700 NTSC / functionally identical to above ...
static const uint8_t vp9_default_kf_uvmode_probs[10][9]
enum AVPixelFormat pix_fmt
Pixel format, see AV_PIX_FMT_xxx.
struct VP9Context::@96 segmentation
static void adapt_probs(VP9Context *s)
void(* intra_pred[N_TXFM_SIZES][N_INTRA_PRED_MODES])(uint8_t *dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *top)
static int decode_frame_header(AVCodecContext *ctx, const uint8_t *data, int size, int *ref)
void ff_thread_await_progress(ThreadFrame *f, int n, int field)
Wait for earlier decoding threads to finish reference pictures.
struct VP9Context::@94 filter
static av_always_inline void mc_luma_dir(VP9Context *s, vp9_mc_func(*mc)[2], uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *ref, ptrdiff_t ref_stride, ThreadFrame *ref_frame, ptrdiff_t y, ptrdiff_t x, const VP56mv *mv, int bw, int bh, int w, int h)
static int vp9_decode_frame(AVCodecContext *ctx, void *frame, int *got_frame, AVPacket *pkt)
uint8_t * intra_pred_data[3]
vp9_mc_func mc[5][4][2][2][2]
#define av_assert0(cond)
assert() equivalent, that is always enabled.
static void vp9_unref_frame(AVCodecContext *ctx, VP9Frame *f)
void(* emulated_edge_mc)(uint8_t *dst, const uint8_t *src, ptrdiff_t dst_linesize, ptrdiff_t src_linesize, int block_w, int block_h, int src_x, int src_y, int w, int h)
Copy a rectangular area of samples to a temporary buffer and replicate the border samples...
uint8_t coef[4][2][2][6][6][3]
static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
AVFrame * av_frame_alloc(void)
Allocate an AVFrame and set its fields to default values.
#define av_assert2(cond)
assert() equivalent, that does lie in speed critical code.
static av_always_inline void mask_edges(struct VP9Filter *lflvl, int is_uv, int row_and_7, int col_and_7, int w, int h, int col_end, int row_end, enum TxfmMode tx, int skip_inter)
static void free_buffers(VP9Context *s)
static av_cold int end(AVCodecContext *avctx)
Multithreading support functions.
int av_frame_ref(AVFrame *dst, const AVFrame *src)
Set up a new reference to the data described by the source frame.
static const uint8_t vp9_default_coef_probs[4][2][2][6][6][3]
#define CODEC_CAP_DR1
Codec uses get_buffer() for allocating buffers and supports custom allocators.
static av_cold int vp9_decode_free(AVCodecContext *ctx)
static av_always_inline int check_intra_mode(VP9Context *s, int mode, uint8_t **a, uint8_t *dst_edge, ptrdiff_t stride_edge, uint8_t *dst_inner, ptrdiff_t stride_inner, uint8_t *l, int col, int x, int w, int row, int y, enum TxfmMode tx, int p)
int ff_thread_ref_frame(ThreadFrame *dst, ThreadFrame *src)
bitstream reader API header.
uint8_t * above_uv_nnz_ctx[2]
static av_always_inline void clamp_mv(VP56mv *dst, const VP56mv *src, VP9Context *s)
static void decode_sb(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
void ff_thread_finish_setup(AVCodecContext *avctx)
If the codec defines update_thread_context(), call this when they are ready for the next thread to st...
static void decode_coeffs(AVCodecContext *ctx)
#define ROUNDED_DIV(a, b)
int width
width and height of the video frame
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
void * av_fast_realloc(void *ptr, unsigned int *size, size_t min_size)
Reallocate the given block if it is not large enough, otherwise do nothing.
struct VP9Context::@101 max_mv
void ff_thread_release_buffer(AVCodecContext *avctx, ThreadFrame *f)
Wrapper around release_buffer() frame-for multithreaded codecs.
static int update_block_buffers(AVCodecContext *ctx)
enum CompPredMode comppredmode
static const int8_t vp9_mv_class_tree[10][2]
uint8_t left_partition_ctx[8]
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
static enum FilterMode vp9_filter_lut[3]
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification. ...
int active_thread_type
Which multithreading methods are in use by the codec.
struct VP9Context::@96::@102 feat[MAX_SEGMENT]
#define MERGE_CTX(step, rd)
simple assert() macros that are a bit more flexible than ISO C assert().
static void find_ref_mvs(VP9Context *s, VP56mv *pmv, int ref, int z, int idx, int sb)
static enum TxfmType vp9_intra_txfm_type[14]
const char * name
Name of the codec implementation.
int16_t * uvblock_base[2]
struct VP9Context::@95 lf_delta
uint8_t use_last_frame_mvs
Libavcodec external API header.
uint8_t * above_filter_ctx
#define RETURN_DIRECT_MV(mv)
#define ONLY_IF_THREADS_ENABLED(x)
Define a function with only the non-default version specified.
static void vp9_decode_flush(AVCodecContext *ctx)
static void loopfilter_sb(AVCodecContext *ctx, struct VP9Filter *lflvl, int row, int col, ptrdiff_t yoff, ptrdiff_t uvoff)
av_cold void ff_videodsp_init(VideoDSPContext *ctx, int bpc)
static void decode_mode(AVCodecContext *ctx)
enum AVPictureType pict_type
Picture type of the frame.
#define av_assert1(cond)
assert() equivalent, that does not lie in speed critical code.
VP56mv left_mv_ctx[16][2]
uint8_t left_y_nnz_ctx[16]
int width
picture width / height.
uint8_t left_mode_ctx[16]
static void decode_sb_mem(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
unsigned eob[4][2][2][6][6][2]
void(* loop_filter_16[2])(uint8_t *dst, ptrdiff_t stride, int mb_lim, int lim, int hev_thr)
void ff_thread_report_progress(ThreadFrame *f, int n, int field)
Notify later decoding threads when part of their reference picture is ready.
uint8_t partition[4][4][3]
enum FilterMode filtermode
static const uint8_t bwh_tab[2][N_BS_SIZES][2]
uint8_t * above_partition_ctx
static av_cold int vp9_decode_init_thread_copy(AVCodecContext *avctx)
static const int8_t vp9_inter_mode_tree[3][2]
#define vp56_rac_get_prob
static int init_frames(AVCodecContext *ctx)
uint8_t * above_segpred_ctx
static void flush(AVCodecContext *avctx)
the normal 2^n-1 "JPEG" YUV ranges
struct VP9Context::@99 prob
static const float pred[4]
static int decode_coeffs_b(VP56RangeCoder *c, int16_t *coef, int n_coeffs, unsigned(*cnt)[6][3], unsigned(*eob)[6][2], uint8_t(*p)[6][11], int nnz, const int16_t *scan, const int16_t(*nb)[2], const int16_t *band_counts, const int16_t *qmul)
static const int8_t mv[256][2]
void(* loop_filter_mix2[2][2][2])(uint8_t *dst, ptrdiff_t stride, int mb_lim, int lim, int hev_thr)
static av_always_inline int vp56_rac_get_prob_branchy(VP56RangeCoder *c, int prob)
void(* loop_filter_8[3][2])(uint8_t *dst, ptrdiff_t stride, int mb_lim, int lim, int hev_thr)
static const int16_t vp9_ac_qlookup[256]
VP56mv(* above_mv_ctx)[2]
av_cold void ff_vp9dsp_init(VP9DSPContext *dsp)
static const prob_context vp9_default_probs
void(* vp9_mc_func)(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *ref, ptrdiff_t ref_stride, int h, int mx, int my)
static void fill_mv(VP9Context *s, VP56mv *mv, int mode, int sb)
int linesize[AV_NUM_DATA_POINTERS]
For video, size in bytes of each picture line.
static int init_get_bits8(GetBitContext *s, const uint8_t *buffer, int byte_size)
Initialize GetBitContext.
int ff_thread_get_buffer(AVCodecContext *avctx, ThreadFrame *f, int flags)
Wrapper around get_buffer() for frame-multithreaded codecs.
static const int16_t vp9_dc_qlookup[256]
main external API structure.
uint8_t * data
The data buffer.
static av_cold int vp9_decode_init(AVCodecContext *ctx)
unsigned single_ref[5][2][2]
AVBufferRef * av_buffer_allocz(int size)
Same as av_buffer_alloc(), except the returned buffer will be initialized to zero.
static unsigned int get_bits1(GetBitContext *s)
#define FF_THREAD_FRAME
Decode more than one frame at once.
unsigned partition[4][4][4]
uint8_t * above_y_nnz_ctx
struct VP9Context::@101 min_mv
static void skip_bits(GetBitContext *s, int n)
enum AVColorSpace colorspace
YUV colorspace type.
static const int16_t *const vp9_scans[5][4]
#define DECODE_Y_COEF_LOOP(step, mode_index, v)
static const int8_t vp9_filter_tree[2][2]
struct VP9Context::@100::@103 mv_comp[2]
uint8_t left_filter_ctx[8]
uint8_t * above_intra_ctx
int allocate_progress
Whether to allocate progress for frame threading.
static unsigned int get_bits_long(GetBitContext *s, int n)
Read 0-32 bits.
uint8_t tmp_uv[2][32 *32]
static int vp8_rac_get_uint(VP56RangeCoder *c, int bits)
static int vp9_ref_frame(AVCodecContext *ctx, VP9Frame *dst, VP9Frame *src)
void ff_vp56_init_range_decoder(VP56RangeCoder *c, const uint8_t *buf, int buf_size)
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
the normal 219*2^(n-8) "MPEG" YUV ranges
#define LOCAL_ALIGNED_32(t, v,...)
static int decode(AVCodecContext *avctx, void *data, int *got_sub, AVPacket *avpkt)
A reference to a data buffer.
static av_always_inline int inv_recenter_nonneg(int v, int m)
GLint GLenum GLboolean GLsizei stride
void(* itxfm_add[N_TXFM_SIZES+1][N_TXFM_TYPES])(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob)
planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
common internal api header.
static const uint8_t vp9_default_kf_partition_probs[4][4][3]
#define CODEC_CAP_FRAME_THREADS
Codec supports frame-level multithreading.
static int ref_frame(Vp3DecodeContext *s, ThreadFrame *dst, ThreadFrame *src)
struct VP9Context::@98 prob_ctx[4]
#define assign(var, type, n)
AVBufferRef * av_buffer_ref(AVBufferRef *buf)
Create a new reference to an AVBuffer.
#define DECODE_UV_COEF_LOOP(step)
static av_always_inline int vp8_rac_get(VP56RangeCoder *c)
Core video DSP helper functions.
uint8_t edge_emu_buffer[71 *80]
struct VP9Context::@97 tiling
static av_always_inline void setctx_2d(uint8_t *ptr, int w, int h, ptrdiff_t stride, int v)
static const int8_t vp9_mv_joint_tree[3][2]
static av_always_inline int read_mv_component(VP9Context *s, int idx, int hp)
struct AVCodecInternal * internal
Private context used for internal data.
struct prob_context::@104 mv_comp[2]
static int decode012(GetBitContext *gb)
static int decode_coeffs_b32(VP56RangeCoder *c, int16_t *coef, int n_coeffs, unsigned(*cnt)[6][3], unsigned(*eob)[6][2], uint8_t(*p)[6][11], int nnz, const int16_t *scan, const int16_t(*nb)[2], const int16_t *band_counts, const int16_t *qmul)
int key_frame
1 -> keyframe, 0-> not
static void decode_b(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl, enum BlockPartition bp)
static void inter_recon(AVCodecContext *ctx)
static const uint8_t * align_get_bits(GetBitContext *s)
static av_always_inline void adapt_prob(uint8_t *p, unsigned ct0, unsigned ct1, int max_count, int update_factor)
static const int16_t(*const [5][4] vp9_scans_nb)[2]
static av_always_inline void mc_chroma_dir(VP9Context *s, vp9_mc_func(*mc)[2], uint8_t *dst_u, uint8_t *dst_v, ptrdiff_t dst_stride, const uint8_t *ref_u, ptrdiff_t src_stride_u, const uint8_t *ref_v, ptrdiff_t src_stride_v, ThreadFrame *ref_frame, ptrdiff_t y, ptrdiff_t x, const VP56mv *mv, int bw, int bh, int w, int h)
static int init_thread_copy(AVCodecContext *avctx)
#define SPLAT_ZERO_YUV(dir, var, off, n)
static int update_prob(VP56RangeCoder *c, int p)
#define av_malloc_array(a, b)
uint8_t left_intra_ctx[8]
This structure stores compressed data.
struct VP9Context::@100 counts
#define AV_GET_BUFFER_FLAG_REF
The decoder will keep a reference to the frame and may reuse it later.
void * av_mallocz(size_t size)
Allocate a block of size bytes with alignment suitable for all memory accesses (including vectors if ...
#define RETURN_SCALE_MV(mv, scale)
int block_alloc_using_2pass
static const int8_t vp9_partition_tree[3][2]