00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00027 #include "avcodec.h"
00028 #include "aactab.h"
00029 #include "psymodel.h"
00030
00031
00032
00033
00034
00035
00036
00037
00042 #define PSY_3GPP_SPREAD_HI 1.5f // spreading factor for ascending threshold spreading (15 dB/Bark)
00043 #define PSY_3GPP_SPREAD_LOW 3.0f // spreading factor for descending threshold spreading (30 dB/Bark)
00044
00045 #define PSY_3GPP_RPEMIN 0.01f
00046 #define PSY_3GPP_RPELEV 2.0f
00047
00048
00049 #define PSY_LAME_FIR_LEN 21
00050 #define AAC_BLOCK_SIZE_LONG 1024
00051 #define AAC_BLOCK_SIZE_SHORT 128
00052 #define AAC_NUM_BLOCKS_SHORT 8
00053 #define PSY_LAME_NUM_SUBBLOCKS 3
00054
00055
00062 typedef struct AacPsyBand{
00063 float energy;
00064 float ffac;
00065 float thr;
00066 float min_snr;
00067 float thr_quiet;
00068 }AacPsyBand;
00069
00073 typedef struct AacPsyChannel{
00074 AacPsyBand band[128];
00075 AacPsyBand prev_band[128];
00076
00077 float win_energy;
00078 float iir_state[2];
00079 uint8_t next_grouping;
00080 enum WindowSequence next_window_seq;
00081
00082 float attack_threshold;
00083 float prev_energy_subshort[AAC_NUM_BLOCKS_SHORT * PSY_LAME_NUM_SUBBLOCKS];
00084 int prev_attack;
00085 }AacPsyChannel;
00086
00090 typedef struct AacPsyCoeffs{
00091 float ath [64];
00092 float barks [64];
00093 float spread_low[64];
00094 float spread_hi [64];
00095 }AacPsyCoeffs;
00096
00100 typedef struct AacPsyContext{
00101 AacPsyCoeffs psy_coef[2];
00102 AacPsyChannel *ch;
00103 }AacPsyContext;
00104
00108 typedef struct {
00109 int quality;
00110
00111
00112
00113 float st_lrm;
00114 } PsyLamePreset;
00115
00119 static const PsyLamePreset psy_abr_map[] = {
00120
00121
00122 { 8, 6.60},
00123 { 16, 6.60},
00124 { 24, 6.60},
00125 { 32, 6.60},
00126 { 40, 6.60},
00127 { 48, 6.60},
00128 { 56, 6.60},
00129 { 64, 6.40},
00130 { 80, 6.00},
00131 { 96, 5.60},
00132 {112, 5.20},
00133 {128, 5.20},
00134 {160, 5.20}
00135 };
00136
00140 static const PsyLamePreset psy_vbr_map[] = {
00141
00142 { 0, 4.20},
00143 { 1, 4.20},
00144 { 2, 4.20},
00145 { 3, 4.20},
00146 { 4, 4.20},
00147 { 5, 4.20},
00148 { 6, 4.20},
00149 { 7, 4.20},
00150 { 8, 4.20},
00151 { 9, 4.20},
00152 {10, 4.20}
00153 };
00154
00158 static const float psy_fir_coeffs[] = {
00159 -8.65163e-18 * 2, -0.00851586 * 2, -6.74764e-18 * 2, 0.0209036 * 2,
00160 -3.36639e-17 * 2, -0.0438162 * 2, -1.54175e-17 * 2, 0.0931738 * 2,
00161 -5.52212e-17 * 2, -0.313819 * 2
00162 };
00163
00167 static float lame_calc_attack_threshold(int bitrate)
00168 {
00169
00170 int lower_range = 12, upper_range = 12;
00171 int lower_range_kbps = psy_abr_map[12].quality;
00172 int upper_range_kbps = psy_abr_map[12].quality;
00173 int i;
00174
00175
00176
00177
00178 for (i = 1; i < 13; i++) {
00179 if (FFMAX(bitrate, psy_abr_map[i].quality) != bitrate) {
00180 upper_range = i;
00181 upper_range_kbps = psy_abr_map[i ].quality;
00182 lower_range = i - 1;
00183 lower_range_kbps = psy_abr_map[i - 1].quality;
00184 break;
00185 }
00186 }
00187
00188
00189 if ((upper_range_kbps - bitrate) > (bitrate - lower_range_kbps))
00190 return psy_abr_map[lower_range].st_lrm;
00191 return psy_abr_map[upper_range].st_lrm;
00192 }
00193
00197 static void lame_window_init(AacPsyContext *ctx, AVCodecContext *avctx) {
00198 int i, j;
00199
00200 for (i = 0; i < avctx->channels; i++) {
00201 AacPsyChannel *pch = &ctx->ch[i];
00202
00203 if (avctx->flags & CODEC_FLAG_QSCALE)
00204 pch->attack_threshold = psy_vbr_map[avctx->global_quality / FF_QP2LAMBDA].st_lrm;
00205 else
00206 pch->attack_threshold = lame_calc_attack_threshold(avctx->bit_rate / avctx->channels / 1000);
00207
00208 for (j = 0; j < AAC_NUM_BLOCKS_SHORT * PSY_LAME_NUM_SUBBLOCKS; j++)
00209 pch->prev_energy_subshort[j] = 10.0f;
00210 }
00211 }
00212
00216 static av_cold float calc_bark(float f)
00217 {
00218 return 13.3f * atanf(0.00076f * f) + 3.5f * atanf((f / 7500.0f) * (f / 7500.0f));
00219 }
00220
00221 #define ATH_ADD 4
00222
00226 static av_cold float ath(float f, float add)
00227 {
00228 f /= 1000.0f;
00229 return 3.64 * pow(f, -0.8)
00230 - 6.8 * exp(-0.6 * (f - 3.4) * (f - 3.4))
00231 + 6.0 * exp(-0.15 * (f - 8.7) * (f - 8.7))
00232 + (0.6 + 0.04 * add) * 0.001 * f * f * f * f;
00233 }
00234
00235 static av_cold int psy_3gpp_init(FFPsyContext *ctx) {
00236 AacPsyContext *pctx;
00237 float bark;
00238 int i, j, g, start;
00239 float prev, minscale, minath;
00240
00241 ctx->model_priv_data = av_mallocz(sizeof(AacPsyContext));
00242 pctx = (AacPsyContext*) ctx->model_priv_data;
00243
00244 minath = ath(3410, ATH_ADD);
00245 for (j = 0; j < 2; j++) {
00246 AacPsyCoeffs *coeffs = &pctx->psy_coef[j];
00247 float line_to_frequency = ctx->avctx->sample_rate / (j ? 256.f : 2048.0f);
00248 i = 0;
00249 prev = 0.0;
00250 for (g = 0; g < ctx->num_bands[j]; g++) {
00251 i += ctx->bands[j][g];
00252 bark = calc_bark((i-1) * line_to_frequency);
00253 coeffs->barks[g] = (bark + prev) / 2.0;
00254 prev = bark;
00255 }
00256 for (g = 0; g < ctx->num_bands[j] - 1; g++) {
00257 coeffs->spread_low[g] = pow(10.0, -(coeffs->barks[g+1] - coeffs->barks[g]) * PSY_3GPP_SPREAD_LOW);
00258 coeffs->spread_hi [g] = pow(10.0, -(coeffs->barks[g+1] - coeffs->barks[g]) * PSY_3GPP_SPREAD_HI);
00259 }
00260 start = 0;
00261 for (g = 0; g < ctx->num_bands[j]; g++) {
00262 minscale = ath(start * line_to_frequency, ATH_ADD);
00263 for (i = 1; i < ctx->bands[j][g]; i++)
00264 minscale = FFMIN(minscale, ath((start + i) * line_to_frequency, ATH_ADD));
00265 coeffs->ath[g] = minscale - minath;
00266 start += ctx->bands[j][g];
00267 }
00268 }
00269
00270 pctx->ch = av_mallocz(sizeof(AacPsyChannel) * ctx->avctx->channels);
00271
00272 lame_window_init(pctx, ctx->avctx);
00273
00274 return 0;
00275 }
00276
00280 static float iir_filter(int in, float state[2])
00281 {
00282 float ret;
00283
00284 ret = 0.7548f * (in - state[0]) + 0.5095f * state[1];
00285 state[0] = in;
00286 state[1] = ret;
00287 return ret;
00288 }
00289
00293 static const uint8_t window_grouping[9] = {
00294 0xB6, 0x6C, 0xD8, 0xB2, 0x66, 0xC6, 0x96, 0x36, 0x36
00295 };
00296
00301 static FFPsyWindowInfo psy_3gpp_window(FFPsyContext *ctx,
00302 const int16_t *audio, const int16_t *la,
00303 int channel, int prev_type)
00304 {
00305 int i, j;
00306 int br = ctx->avctx->bit_rate / ctx->avctx->channels;
00307 int attack_ratio = br <= 16000 ? 18 : 10;
00308 AacPsyContext *pctx = (AacPsyContext*) ctx->model_priv_data;
00309 AacPsyChannel *pch = &pctx->ch[channel];
00310 uint8_t grouping = 0;
00311 int next_type = pch->next_window_seq;
00312 FFPsyWindowInfo wi;
00313
00314 memset(&wi, 0, sizeof(wi));
00315 if (la) {
00316 float s[8], v;
00317 int switch_to_eight = 0;
00318 float sum = 0.0, sum2 = 0.0;
00319 int attack_n = 0;
00320 int stay_short = 0;
00321 for (i = 0; i < 8; i++) {
00322 for (j = 0; j < 128; j++) {
00323 v = iir_filter(la[(i*128+j)*ctx->avctx->channels], pch->iir_state);
00324 sum += v*v;
00325 }
00326 s[i] = sum;
00327 sum2 += sum;
00328 }
00329 for (i = 0; i < 8; i++) {
00330 if (s[i] > pch->win_energy * attack_ratio) {
00331 attack_n = i + 1;
00332 switch_to_eight = 1;
00333 break;
00334 }
00335 }
00336 pch->win_energy = pch->win_energy*7/8 + sum2/64;
00337
00338 wi.window_type[1] = prev_type;
00339 switch (prev_type) {
00340 case ONLY_LONG_SEQUENCE:
00341 wi.window_type[0] = switch_to_eight ? LONG_START_SEQUENCE : ONLY_LONG_SEQUENCE;
00342 next_type = switch_to_eight ? EIGHT_SHORT_SEQUENCE : ONLY_LONG_SEQUENCE;
00343 break;
00344 case LONG_START_SEQUENCE:
00345 wi.window_type[0] = EIGHT_SHORT_SEQUENCE;
00346 grouping = pch->next_grouping;
00347 next_type = switch_to_eight ? EIGHT_SHORT_SEQUENCE : LONG_STOP_SEQUENCE;
00348 break;
00349 case LONG_STOP_SEQUENCE:
00350 wi.window_type[0] = switch_to_eight ? LONG_START_SEQUENCE : ONLY_LONG_SEQUENCE;
00351 next_type = switch_to_eight ? EIGHT_SHORT_SEQUENCE : ONLY_LONG_SEQUENCE;
00352 break;
00353 case EIGHT_SHORT_SEQUENCE:
00354 stay_short = next_type == EIGHT_SHORT_SEQUENCE || switch_to_eight;
00355 wi.window_type[0] = stay_short ? EIGHT_SHORT_SEQUENCE : LONG_STOP_SEQUENCE;
00356 grouping = next_type == EIGHT_SHORT_SEQUENCE ? pch->next_grouping : 0;
00357 next_type = switch_to_eight ? EIGHT_SHORT_SEQUENCE : LONG_STOP_SEQUENCE;
00358 break;
00359 }
00360
00361 pch->next_grouping = window_grouping[attack_n];
00362 pch->next_window_seq = next_type;
00363 } else {
00364 for (i = 0; i < 3; i++)
00365 wi.window_type[i] = prev_type;
00366 grouping = (prev_type == EIGHT_SHORT_SEQUENCE) ? window_grouping[0] : 0;
00367 }
00368
00369 wi.window_shape = 1;
00370 if (wi.window_type[0] != EIGHT_SHORT_SEQUENCE) {
00371 wi.num_windows = 1;
00372 wi.grouping[0] = 1;
00373 } else {
00374 int lastgrp = 0;
00375 wi.num_windows = 8;
00376 for (i = 0; i < 8; i++) {
00377 if (!((grouping >> i) & 1))
00378 lastgrp = i;
00379 wi.grouping[lastgrp]++;
00380 }
00381 }
00382
00383 return wi;
00384 }
00385
00389 static void psy_3gpp_analyze(FFPsyContext *ctx, int channel,
00390 const float *coefs, const FFPsyWindowInfo *wi)
00391 {
00392 AacPsyContext *pctx = (AacPsyContext*) ctx->model_priv_data;
00393 AacPsyChannel *pch = &pctx->ch[channel];
00394 int start = 0;
00395 int i, w, g;
00396 const int num_bands = ctx->num_bands[wi->num_windows == 8];
00397 const uint8_t* band_sizes = ctx->bands[wi->num_windows == 8];
00398 AacPsyCoeffs *coeffs = &pctx->psy_coef[wi->num_windows == 8];
00399
00400
00401 for (w = 0; w < wi->num_windows*16; w += 16) {
00402 for (g = 0; g < num_bands; g++) {
00403 AacPsyBand *band = &pch->band[w+g];
00404 band->energy = 0.0f;
00405 for (i = 0; i < band_sizes[g]; i++)
00406 band->energy += coefs[start+i] * coefs[start+i];
00407 band->thr = band->energy * 0.001258925f;
00408 start += band_sizes[g];
00409
00410 ctx->psy_bands[channel*PSY_MAX_BANDS+w+g].energy = band->energy;
00411 }
00412 }
00413
00414 for (w = 0; w < wi->num_windows*16; w += 16) {
00415 AacPsyBand *band = &pch->band[w];
00416 for (g = 1; g < num_bands; g++)
00417 band[g].thr = FFMAX(band[g].thr, band[g-1].thr * coeffs->spread_hi [g]);
00418 for (g = num_bands - 2; g >= 0; g--)
00419 band[g].thr = FFMAX(band[g].thr, band[g+1].thr * coeffs->spread_low[g]);
00420 for (g = 0; g < num_bands; g++) {
00421 band[g].thr_quiet = band[g].thr = FFMAX(band[g].thr, coeffs->ath[g]);
00422 if (!(wi->window_type[0] == LONG_STOP_SEQUENCE || (wi->window_type[1] == LONG_START_SEQUENCE && !w)))
00423 band[g].thr = FFMAX(PSY_3GPP_RPEMIN*band[g].thr, FFMIN(band[g].thr,
00424 PSY_3GPP_RPELEV*pch->prev_band[w+g].thr_quiet));
00425
00426 ctx->psy_bands[channel*PSY_MAX_BANDS+w+g].threshold = band[g].thr;
00427 }
00428 }
00429 memcpy(pch->prev_band, pch->band, sizeof(pch->band));
00430 }
00431
00432 static av_cold void psy_3gpp_end(FFPsyContext *apc)
00433 {
00434 AacPsyContext *pctx = (AacPsyContext*) apc->model_priv_data;
00435 av_freep(&pctx->ch);
00436 av_freep(&apc->model_priv_data);
00437 }
00438
00439 static void lame_apply_block_type(AacPsyChannel *ctx, FFPsyWindowInfo *wi, int uselongblock)
00440 {
00441 int blocktype = ONLY_LONG_SEQUENCE;
00442 if (uselongblock) {
00443 if (ctx->next_window_seq == EIGHT_SHORT_SEQUENCE)
00444 blocktype = LONG_STOP_SEQUENCE;
00445 } else {
00446 blocktype = EIGHT_SHORT_SEQUENCE;
00447 if (ctx->next_window_seq == ONLY_LONG_SEQUENCE)
00448 ctx->next_window_seq = LONG_START_SEQUENCE;
00449 if (ctx->next_window_seq == LONG_STOP_SEQUENCE)
00450 ctx->next_window_seq = EIGHT_SHORT_SEQUENCE;
00451 }
00452
00453 wi->window_type[0] = ctx->next_window_seq;
00454 ctx->next_window_seq = blocktype;
00455 }
00456
00457 static FFPsyWindowInfo psy_lame_window(FFPsyContext *ctx,
00458 const int16_t *audio, const int16_t *la,
00459 int channel, int prev_type)
00460 {
00461 AacPsyContext *pctx = (AacPsyContext*) ctx->model_priv_data;
00462 AacPsyChannel *pch = &pctx->ch[channel];
00463 int grouping = 0;
00464 int uselongblock = 1;
00465 int attacks[AAC_NUM_BLOCKS_SHORT + 1] = { 0 };
00466 int i;
00467 FFPsyWindowInfo wi;
00468
00469 memset(&wi, 0, sizeof(wi));
00470 if (la) {
00471 float hpfsmpl[AAC_BLOCK_SIZE_LONG];
00472 float const *pf = hpfsmpl;
00473 float attack_intensity[(AAC_NUM_BLOCKS_SHORT + 1) * PSY_LAME_NUM_SUBBLOCKS];
00474 float energy_subshort[(AAC_NUM_BLOCKS_SHORT + 1) * PSY_LAME_NUM_SUBBLOCKS];
00475 float energy_short[AAC_NUM_BLOCKS_SHORT + 1] = { 0 };
00476 int chans = ctx->avctx->channels;
00477 const int16_t *firbuf = la + (AAC_BLOCK_SIZE_SHORT/4 - PSY_LAME_FIR_LEN) * chans;
00478 int j, att_sum = 0;
00479
00480
00481 for (i = 0; i < AAC_BLOCK_SIZE_LONG; i++) {
00482 float sum1, sum2;
00483 sum1 = firbuf[(i + ((PSY_LAME_FIR_LEN - 1) / 2)) * chans];
00484 sum2 = 0.0;
00485 for (j = 0; j < ((PSY_LAME_FIR_LEN - 1) / 2) - 1; j += 2) {
00486 sum1 += psy_fir_coeffs[j] * (firbuf[(i + j) * chans] + firbuf[(i + PSY_LAME_FIR_LEN - j) * chans]);
00487 sum2 += psy_fir_coeffs[j + 1] * (firbuf[(i + j + 1) * chans] + firbuf[(i + PSY_LAME_FIR_LEN - j - 1) * chans]);
00488 }
00489 hpfsmpl[i] = sum1 + sum2;
00490 }
00491
00492
00493 for (i = 0; i < PSY_LAME_NUM_SUBBLOCKS; i++) {
00494 energy_subshort[i] = pch->prev_energy_subshort[i + ((AAC_NUM_BLOCKS_SHORT - 1) * PSY_LAME_NUM_SUBBLOCKS)];
00495 assert(pch->prev_energy_subshort[i + ((AAC_NUM_BLOCKS_SHORT - 2) * PSY_LAME_NUM_SUBBLOCKS + 1)] > 0);
00496 attack_intensity[i] = energy_subshort[i] / pch->prev_energy_subshort[i + ((AAC_NUM_BLOCKS_SHORT - 2) * PSY_LAME_NUM_SUBBLOCKS + 1)];
00497 energy_short[0] += energy_subshort[i];
00498 }
00499
00500 for (i = 0; i < AAC_NUM_BLOCKS_SHORT * PSY_LAME_NUM_SUBBLOCKS; i++) {
00501 float const *const pfe = pf + AAC_BLOCK_SIZE_LONG / (AAC_NUM_BLOCKS_SHORT * PSY_LAME_NUM_SUBBLOCKS);
00502 float p = 1.0f;
00503 for (; pf < pfe; pf++)
00504 if (p < fabsf(*pf))
00505 p = fabsf(*pf);
00506 pch->prev_energy_subshort[i] = energy_subshort[i + PSY_LAME_NUM_SUBBLOCKS] = p;
00507 energy_short[1 + i / PSY_LAME_NUM_SUBBLOCKS] += p;
00508
00509
00510
00511
00512
00513
00514
00515 if (p > energy_subshort[i + 1])
00516 p = p / energy_subshort[i + 1];
00517 else if (energy_subshort[i + 1] > p * 10.0f)
00518 p = energy_subshort[i + 1] / (p * 10.0f);
00519 else
00520 p = 0.0;
00521 attack_intensity[i + PSY_LAME_NUM_SUBBLOCKS] = p;
00522 }
00523
00524
00525 for (i = 0; i < (AAC_NUM_BLOCKS_SHORT + 1) * PSY_LAME_NUM_SUBBLOCKS; i++)
00526 if (!attacks[i / PSY_LAME_NUM_SUBBLOCKS])
00527 if (attack_intensity[i] > pch->attack_threshold)
00528 attacks[i / PSY_LAME_NUM_SUBBLOCKS] = (i % PSY_LAME_NUM_SUBBLOCKS) + 1;
00529
00530
00531
00532
00533
00534 for (i = 1; i < AAC_NUM_BLOCKS_SHORT + 1; i++) {
00535 float const u = energy_short[i - 1];
00536 float const v = energy_short[i];
00537 float const m = FFMAX(u, v);
00538 if (m < 40000) {
00539 if (u < 1.7f * v && v < 1.7f * u) {
00540 if (i == 1 && attacks[0] < attacks[i])
00541 attacks[0] = 0;
00542 attacks[i] = 0;
00543 }
00544 }
00545 att_sum += attacks[i];
00546 }
00547
00548 if (attacks[0] <= pch->prev_attack)
00549 attacks[0] = 0;
00550
00551 att_sum += attacks[0];
00552
00553 if (pch->prev_attack == 3 || att_sum) {
00554 uselongblock = 0;
00555
00556 if (attacks[1] && attacks[0])
00557 attacks[1] = 0;
00558 if (attacks[2] && attacks[1])
00559 attacks[2] = 0;
00560 if (attacks[3] && attacks[2])
00561 attacks[3] = 0;
00562 if (attacks[4] && attacks[3])
00563 attacks[4] = 0;
00564 if (attacks[5] && attacks[4])
00565 attacks[5] = 0;
00566 if (attacks[6] && attacks[5])
00567 attacks[6] = 0;
00568 if (attacks[7] && attacks[6])
00569 attacks[7] = 0;
00570 if (attacks[8] && attacks[7])
00571 attacks[8] = 0;
00572 }
00573 } else {
00574
00575 uselongblock = !(prev_type == EIGHT_SHORT_SEQUENCE);
00576 }
00577
00578 lame_apply_block_type(pch, &wi, uselongblock);
00579
00580 wi.window_type[1] = prev_type;
00581 if (wi.window_type[0] != EIGHT_SHORT_SEQUENCE) {
00582 wi.num_windows = 1;
00583 wi.grouping[0] = 1;
00584 if (wi.window_type[0] == LONG_START_SEQUENCE)
00585 wi.window_shape = 0;
00586 else
00587 wi.window_shape = 1;
00588 } else {
00589 int lastgrp = 0;
00590
00591 wi.num_windows = 8;
00592 wi.window_shape = 0;
00593 for (i = 0; i < 8; i++) {
00594 if (!((pch->next_grouping >> i) & 1))
00595 lastgrp = i;
00596 wi.grouping[lastgrp]++;
00597 }
00598 }
00599
00600
00601
00602
00603
00604
00605
00606 for (i = 0; i < 9; i++) {
00607 if (attacks[i]) {
00608 grouping = i;
00609 break;
00610 }
00611 }
00612 pch->next_grouping = window_grouping[grouping];
00613
00614 pch->prev_attack = attacks[8];
00615
00616 return wi;
00617 }
00618
00619 const FFPsyModel ff_aac_psy_model =
00620 {
00621 .name = "3GPP TS 26.403-inspired model",
00622 .init = psy_3gpp_init,
00623 .window = psy_lame_window,
00624 .analyze = psy_3gpp_analyze,
00625 .end = psy_3gpp_end,
00626 };