00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00028 #include <math.h>
00029 #include "avcodec.h"
00030 #include "get_bits.h"
00031 #include "put_bits.h"
00032 #include "wmavoice_data.h"
00033 #include "celp_math.h"
00034 #include "celp_filters.h"
00035 #include "acelp_vectors.h"
00036 #include "acelp_filters.h"
00037 #include "lsp.h"
00038 #include "libavutil/lzo.h"
00039 #include "avfft.h"
00040 #include "fft.h"
00041
00042 #define MAX_BLOCKS 8
00043 #define MAX_LSPS 16
00044 #define MAX_LSPS_ALIGN16 16
00045
00046 #define MAX_FRAMES 3
00047 #define MAX_FRAMESIZE 160
00048 #define MAX_SIGNAL_HISTORY 416
00049 #define MAX_SFRAMESIZE (MAX_FRAMESIZE * MAX_FRAMES)
00051 #define SFRAME_CACHE_MAXSIZE 256
00052
00053 #define VLC_NBITS 6
00054
00055
00058 static VLC frame_type_vlc;
00059
00063 enum {
00064 ACB_TYPE_NONE = 0,
00065 ACB_TYPE_ASYMMETRIC = 1,
00066
00067
00068
00069
00070 ACB_TYPE_HAMMING = 2
00071
00072
00073 };
00074
00078 enum {
00079 FCB_TYPE_SILENCE = 0,
00080
00081
00082 FCB_TYPE_HARDCODED = 1,
00083
00084 FCB_TYPE_AW_PULSES = 2,
00085
00086 FCB_TYPE_EXC_PULSES = 3,
00087
00088
00089 };
00090
00094 static const struct frame_type_desc {
00095 uint8_t n_blocks;
00096
00097 uint8_t log_n_blocks;
00098 uint8_t acb_type;
00099 uint8_t fcb_type;
00100 uint8_t dbl_pulses;
00101
00102
00103 uint16_t frame_size;
00104
00105 } frame_descs[17] = {
00106 { 1, 0, ACB_TYPE_NONE, FCB_TYPE_SILENCE, 0, 0 },
00107 { 2, 1, ACB_TYPE_NONE, FCB_TYPE_HARDCODED, 0, 28 },
00108 { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_AW_PULSES, 0, 46 },
00109 { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 2, 80 },
00110 { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 5, 104 },
00111 { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 0, 108 },
00112 { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 2, 132 },
00113 { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 5, 168 },
00114 { 2, 1, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 0, 64 },
00115 { 2, 1, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 2, 80 },
00116 { 2, 1, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 5, 104 },
00117 { 4, 2, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 0, 108 },
00118 { 4, 2, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 2, 132 },
00119 { 4, 2, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 5, 168 },
00120 { 8, 3, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 0, 176 },
00121 { 8, 3, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 2, 208 },
00122 { 8, 3, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 5, 256 }
00123 };
00124
00128 typedef struct {
00135 GetBitContext gb;
00136
00137
00138
00139 int8_t vbm_tree[25];
00140
00141 int spillover_bitsize;
00142
00143
00144 int history_nsamples;
00145
00146
00147
00148 int do_apf;
00149
00150 int denoise_strength;
00151
00152 int denoise_tilt_corr;
00153
00154 int dc_level;
00155
00156
00157 int lsps;
00158 int lsp_q_mode;
00159 int lsp_def_mode;
00160
00161 int frame_lsp_bitsize;
00162
00163 int sframe_lsp_bitsize;
00164
00165
00166 int min_pitch_val;
00167 int max_pitch_val;
00168 int pitch_nbits;
00169
00170 int block_pitch_nbits;
00171
00172 int block_pitch_range;
00173 int block_delta_pitch_nbits;
00174
00175
00176
00177 int block_delta_pitch_hrange;
00178
00179 uint16_t block_conv_table[4];
00180
00181
00190 int spillover_nbits;
00191
00192
00193
00194 int has_residual_lsps;
00195
00196
00197
00198
00199 int skip_bits_next;
00200
00201
00202
00203 uint8_t sframe_cache[SFRAME_CACHE_MAXSIZE + FF_INPUT_BUFFER_PADDING_SIZE];
00206 int sframe_cache_size;
00207
00208
00209
00210
00211 PutBitContext pb;
00212
00221 double prev_lsps[MAX_LSPS];
00222
00223 int last_pitch_val;
00224 int last_acb_type;
00225 int pitch_diff_sh16;
00226
00227 float silence_gain;
00228
00229 int aw_idx_is_ext;
00230
00231 int aw_pulse_range;
00232
00233
00234
00235
00236
00237 int aw_n_pulses[2];
00238
00239
00240 int aw_first_pulse_off[2];
00241
00242 int aw_next_pulse_off_cache;
00243
00244
00245
00246
00247
00248 int frame_cntr;
00249
00250 float gain_pred_err[6];
00251 float excitation_history[MAX_SIGNAL_HISTORY];
00255 float synth_history[MAX_LSPS];
00256
00263 RDFTContext rdft, irdft;
00264
00265 DCTContext dct, dst;
00266
00267 float sin[511], cos[511];
00268
00269 float postfilter_agc;
00270
00271 float dcf_mem[2];
00272 float zero_exc_pf[MAX_SIGNAL_HISTORY + MAX_SFRAMESIZE];
00275 float denoise_filter_cache[MAX_FRAMESIZE];
00276 int denoise_filter_cache_size;
00277 DECLARE_ALIGNED(16, float, tilted_lpcs_pf)[0x80];
00279 DECLARE_ALIGNED(16, float, denoise_coeffs_pf)[0x80];
00281 DECLARE_ALIGNED(16, float, synth_filter_out_buf)[0x80 + MAX_LSPS_ALIGN16];
00284
00287 } WMAVoiceContext;
00288
00298 static av_cold int decode_vbmtree(GetBitContext *gb, int8_t vbm_tree[25])
00299 {
00300 static const uint8_t bits[] = {
00301 2, 2, 2, 4, 4, 4,
00302 6, 6, 6, 8, 8, 8,
00303 10, 10, 10, 12, 12, 12,
00304 14, 14, 14, 14
00305 };
00306 static const uint16_t codes[] = {
00307 0x0000, 0x0001, 0x0002,
00308 0x000c, 0x000d, 0x000e,
00309 0x003c, 0x003d, 0x003e,
00310 0x00fc, 0x00fd, 0x00fe,
00311 0x03fc, 0x03fd, 0x03fe,
00312 0x0ffc, 0x0ffd, 0x0ffe,
00313 0x3ffc, 0x3ffd, 0x3ffe, 0x3fff
00314 };
00315 int cntr[8], n, res;
00316
00317 memset(vbm_tree, 0xff, sizeof(vbm_tree));
00318 memset(cntr, 0, sizeof(cntr));
00319 for (n = 0; n < 17; n++) {
00320 res = get_bits(gb, 3);
00321 if (cntr[res] > 3)
00322 return -1;
00323 vbm_tree[res * 3 + cntr[res]++] = n;
00324 }
00325 INIT_VLC_STATIC(&frame_type_vlc, VLC_NBITS, sizeof(bits),
00326 bits, 1, 1, codes, 2, 2, 132);
00327 return 0;
00328 }
00329
00333 static av_cold int wmavoice_decode_init(AVCodecContext *ctx)
00334 {
00335 int n, flags, pitch_range, lsp16_flag;
00336 WMAVoiceContext *s = ctx->priv_data;
00337
00346 if (ctx->extradata_size != 46) {
00347 av_log(ctx, AV_LOG_ERROR,
00348 "Invalid extradata size %d (should be 46)\n",
00349 ctx->extradata_size);
00350 return -1;
00351 }
00352 flags = AV_RL32(ctx->extradata + 18);
00353 s->spillover_bitsize = 3 + av_ceil_log2(ctx->block_align);
00354 s->do_apf = flags & 0x1;
00355 if (s->do_apf) {
00356 ff_rdft_init(&s->rdft, 7, DFT_R2C);
00357 ff_rdft_init(&s->irdft, 7, IDFT_C2R);
00358 ff_dct_init(&s->dct, 6, DCT_I);
00359 ff_dct_init(&s->dst, 6, DST_I);
00360
00361 ff_sine_window_init(s->cos, 256);
00362 memcpy(&s->sin[255], s->cos, 256 * sizeof(s->cos[0]));
00363 for (n = 0; n < 255; n++) {
00364 s->sin[n] = -s->sin[510 - n];
00365 s->cos[510 - n] = s->cos[n];
00366 }
00367 }
00368 s->denoise_strength = (flags >> 2) & 0xF;
00369 if (s->denoise_strength >= 12) {
00370 av_log(ctx, AV_LOG_ERROR,
00371 "Invalid denoise filter strength %d (max=11)\n",
00372 s->denoise_strength);
00373 return -1;
00374 }
00375 s->denoise_tilt_corr = !!(flags & 0x40);
00376 s->dc_level = (flags >> 7) & 0xF;
00377 s->lsp_q_mode = !!(flags & 0x2000);
00378 s->lsp_def_mode = !!(flags & 0x4000);
00379 lsp16_flag = flags & 0x1000;
00380 if (lsp16_flag) {
00381 s->lsps = 16;
00382 s->frame_lsp_bitsize = 34;
00383 s->sframe_lsp_bitsize = 60;
00384 } else {
00385 s->lsps = 10;
00386 s->frame_lsp_bitsize = 24;
00387 s->sframe_lsp_bitsize = 48;
00388 }
00389 for (n = 0; n < s->lsps; n++)
00390 s->prev_lsps[n] = M_PI * (n + 1.0) / (s->lsps + 1.0);
00391
00392 init_get_bits(&s->gb, ctx->extradata + 22, (ctx->extradata_size - 22) << 3);
00393 if (decode_vbmtree(&s->gb, s->vbm_tree) < 0) {
00394 av_log(ctx, AV_LOG_ERROR, "Invalid VBM tree; broken extradata?\n");
00395 return -1;
00396 }
00397
00398 s->min_pitch_val = ((ctx->sample_rate << 8) / 400 + 50) >> 8;
00399 s->max_pitch_val = ((ctx->sample_rate << 8) * 37 / 2000 + 50) >> 8;
00400 pitch_range = s->max_pitch_val - s->min_pitch_val;
00401 s->pitch_nbits = av_ceil_log2(pitch_range);
00402 s->last_pitch_val = 40;
00403 s->last_acb_type = ACB_TYPE_NONE;
00404 s->history_nsamples = s->max_pitch_val + 8;
00405
00406 if (s->min_pitch_val < 1 || s->history_nsamples > MAX_SIGNAL_HISTORY) {
00407 int min_sr = ((((1 << 8) - 50) * 400) + 0xFF) >> 8,
00408 max_sr = ((((MAX_SIGNAL_HISTORY - 8) << 8) + 205) * 2000 / 37) >> 8;
00409
00410 av_log(ctx, AV_LOG_ERROR,
00411 "Unsupported samplerate %d (min=%d, max=%d)\n",
00412 ctx->sample_rate, min_sr, max_sr);
00413
00414 return -1;
00415 }
00416
00417 s->block_conv_table[0] = s->min_pitch_val;
00418 s->block_conv_table[1] = (pitch_range * 25) >> 6;
00419 s->block_conv_table[2] = (pitch_range * 44) >> 6;
00420 s->block_conv_table[3] = s->max_pitch_val - 1;
00421 s->block_delta_pitch_hrange = (pitch_range >> 3) & ~0xF;
00422 s->block_delta_pitch_nbits = 1 + av_ceil_log2(s->block_delta_pitch_hrange);
00423 s->block_pitch_range = s->block_conv_table[2] +
00424 s->block_conv_table[3] + 1 +
00425 2 * (s->block_conv_table[1] - 2 * s->min_pitch_val);
00426 s->block_pitch_nbits = av_ceil_log2(s->block_pitch_range);
00427
00428 ctx->sample_fmt = AV_SAMPLE_FMT_FLT;
00429
00430 return 0;
00431 }
00432
00454 static void adaptive_gain_control(float *out, const float *in,
00455 const float *speech_synth,
00456 int size, float alpha, float *gain_mem)
00457 {
00458 int i;
00459 float speech_energy = 0.0, postfilter_energy = 0.0, gain_scale_factor;
00460 float mem = *gain_mem;
00461
00462 for (i = 0; i < size; i++) {
00463 speech_energy += fabsf(speech_synth[i]);
00464 postfilter_energy += fabsf(in[i]);
00465 }
00466 gain_scale_factor = (1.0 - alpha) * speech_energy / postfilter_energy;
00467
00468 for (i = 0; i < size; i++) {
00469 mem = alpha * mem + gain_scale_factor;
00470 out[i] = in[i] * mem;
00471 }
00472
00473 *gain_mem = mem;
00474 }
00475
00494 static int kalman_smoothen(WMAVoiceContext *s, int pitch,
00495 const float *in, float *out, int size)
00496 {
00497 int n;
00498 float optimal_gain = 0, dot;
00499 const float *ptr = &in[-FFMAX(s->min_pitch_val, pitch - 3)],
00500 *end = &in[-FFMIN(s->max_pitch_val, pitch + 3)],
00501 *best_hist_ptr;
00502
00503
00504 do {
00505 dot = ff_dot_productf(in, ptr, size);
00506 if (dot > optimal_gain) {
00507 optimal_gain = dot;
00508 best_hist_ptr = ptr;
00509 }
00510 } while (--ptr >= end);
00511
00512 if (optimal_gain <= 0)
00513 return -1;
00514 dot = ff_dot_productf(best_hist_ptr, best_hist_ptr, size);
00515 if (dot <= 0)
00516 return -1;
00517
00518 if (optimal_gain <= dot) {
00519 dot = dot / (dot + 0.6 * optimal_gain);
00520 } else
00521 dot = 0.625;
00522
00523
00524 for (n = 0; n < size; n++)
00525 out[n] = best_hist_ptr[n] + dot * (in[n] - best_hist_ptr[n]);
00526
00527 return 0;
00528 }
00529
00540 static float tilt_factor(const float *lpcs, int n_lpcs)
00541 {
00542 float rh0, rh1;
00543
00544 rh0 = 1.0 + ff_dot_productf(lpcs, lpcs, n_lpcs);
00545 rh1 = lpcs[0] + ff_dot_productf(lpcs, &lpcs[1], n_lpcs - 1);
00546
00547 return rh1 / rh0;
00548 }
00549
00553 static void calc_input_response(WMAVoiceContext *s, float *lpcs,
00554 int fcb_type, float *coeffs, int remainder)
00555 {
00556 float last_coeff, min = 15.0, max = -15.0;
00557 float irange, angle_mul, gain_mul, range, sq;
00558 int n, idx;
00559
00560
00561 ff_rdft_calc(&s->rdft, lpcs);
00562 #define log_range(var, assign) do { \
00563 float tmp = log10f(assign); var = tmp; \
00564 max = FFMAX(max, tmp); min = FFMIN(min, tmp); \
00565 } while (0)
00566 log_range(last_coeff, lpcs[1] * lpcs[1]);
00567 for (n = 1; n < 64; n++)
00568 log_range(lpcs[n], lpcs[n * 2] * lpcs[n * 2] +
00569 lpcs[n * 2 + 1] * lpcs[n * 2 + 1]);
00570 log_range(lpcs[0], lpcs[0] * lpcs[0]);
00571 #undef log_range
00572 range = max - min;
00573 lpcs[64] = last_coeff;
00574
00575
00576
00577
00578
00579
00580 irange = 64.0 / range;
00581 gain_mul = range * (fcb_type == FCB_TYPE_HARDCODED ? (5.0 / 13.0) :
00582 (5.0 / 14.7));
00583 angle_mul = gain_mul * (8.0 * M_LN10 / M_PI);
00584 for (n = 0; n <= 64; n++) {
00585 float pwr;
00586
00587 idx = FFMAX(0, lrint((max - lpcs[n]) * irange) - 1);
00588 pwr = wmavoice_denoise_power_table[s->denoise_strength][idx];
00589 lpcs[n] = angle_mul * pwr;
00590
00591
00592 idx = (pwr * gain_mul - 0.0295) * 70.570526123;
00593 if (idx > 127) {
00594 coeffs[n] = wmavoice_energy_table[127] *
00595 powf(1.0331663, idx - 127);
00596 } else
00597 coeffs[n] = wmavoice_energy_table[FFMAX(0, idx)];
00598 }
00599
00600
00601
00602
00603
00604 ff_dct_calc(&s->dct, lpcs);
00605 ff_dct_calc(&s->dst, lpcs);
00606
00607
00608 idx = 255 + av_clip(lpcs[64], -255, 255);
00609 coeffs[0] = coeffs[0] * s->cos[idx];
00610 idx = 255 + av_clip(lpcs[64] - 2 * lpcs[63], -255, 255);
00611 last_coeff = coeffs[64] * s->cos[idx];
00612 for (n = 63;; n--) {
00613 idx = 255 + av_clip(-lpcs[64] - 2 * lpcs[n - 1], -255, 255);
00614 coeffs[n * 2 + 1] = coeffs[n] * s->sin[idx];
00615 coeffs[n * 2] = coeffs[n] * s->cos[idx];
00616
00617 if (!--n) break;
00618
00619 idx = 255 + av_clip( lpcs[64] - 2 * lpcs[n - 1], -255, 255);
00620 coeffs[n * 2 + 1] = coeffs[n] * s->sin[idx];
00621 coeffs[n * 2] = coeffs[n] * s->cos[idx];
00622 }
00623 coeffs[1] = last_coeff;
00624
00625
00626 ff_rdft_calc(&s->irdft, coeffs);
00627
00628
00629 memset(&coeffs[remainder], 0, sizeof(coeffs[0]) * (128 - remainder));
00630 if (s->denoise_tilt_corr) {
00631 float tilt_mem = 0;
00632
00633 coeffs[remainder - 1] = 0;
00634 ff_tilt_compensation(&tilt_mem,
00635 -1.8 * tilt_factor(coeffs, remainder - 1),
00636 coeffs, remainder);
00637 }
00638 sq = (1.0 / 64.0) * sqrtf(1 / ff_dot_productf(coeffs, coeffs, remainder));
00639 for (n = 0; n < remainder; n++)
00640 coeffs[n] *= sq;
00641 }
00642
00669 static void wiener_denoise(WMAVoiceContext *s, int fcb_type,
00670 float *synth_pf, int size,
00671 const float *lpcs)
00672 {
00673 int remainder, lim, n;
00674
00675 if (fcb_type != FCB_TYPE_SILENCE) {
00676 float *tilted_lpcs = s->tilted_lpcs_pf,
00677 *coeffs = s->denoise_coeffs_pf, tilt_mem = 0;
00678
00679 tilted_lpcs[0] = 1.0;
00680 memcpy(&tilted_lpcs[1], lpcs, sizeof(lpcs[0]) * s->lsps);
00681 memset(&tilted_lpcs[s->lsps + 1], 0,
00682 sizeof(tilted_lpcs[0]) * (128 - s->lsps - 1));
00683 ff_tilt_compensation(&tilt_mem, 0.7 * tilt_factor(lpcs, s->lsps),
00684 tilted_lpcs, s->lsps + 2);
00685
00686
00687
00688
00689
00690 remainder = FFMIN(127 - size, size - 1);
00691 calc_input_response(s, tilted_lpcs, fcb_type, coeffs, remainder);
00692
00693
00694
00695 memset(&synth_pf[size], 0, sizeof(synth_pf[0]) * (128 - size));
00696 ff_rdft_calc(&s->rdft, synth_pf);
00697 ff_rdft_calc(&s->rdft, coeffs);
00698 synth_pf[0] *= coeffs[0];
00699 synth_pf[1] *= coeffs[1];
00700 for (n = 1; n < 64; n++) {
00701 float v1 = synth_pf[n * 2], v2 = synth_pf[n * 2 + 1];
00702 synth_pf[n * 2] = v1 * coeffs[n * 2] - v2 * coeffs[n * 2 + 1];
00703 synth_pf[n * 2 + 1] = v2 * coeffs[n * 2] + v1 * coeffs[n * 2 + 1];
00704 }
00705 ff_rdft_calc(&s->irdft, synth_pf);
00706 }
00707
00708
00709 if (s->denoise_filter_cache_size) {
00710 lim = FFMIN(s->denoise_filter_cache_size, size);
00711 for (n = 0; n < lim; n++)
00712 synth_pf[n] += s->denoise_filter_cache[n];
00713 s->denoise_filter_cache_size -= lim;
00714 memmove(s->denoise_filter_cache, &s->denoise_filter_cache[size],
00715 sizeof(s->denoise_filter_cache[0]) * s->denoise_filter_cache_size);
00716 }
00717
00718
00719 if (fcb_type != FCB_TYPE_SILENCE) {
00720 lim = FFMIN(remainder, s->denoise_filter_cache_size);
00721 for (n = 0; n < lim; n++)
00722 s->denoise_filter_cache[n] += synth_pf[size + n];
00723 if (lim < remainder) {
00724 memcpy(&s->denoise_filter_cache[lim], &synth_pf[size + lim],
00725 sizeof(s->denoise_filter_cache[0]) * (remainder - lim));
00726 s->denoise_filter_cache_size = remainder;
00727 }
00728 }
00729 }
00730
00751 static void postfilter(WMAVoiceContext *s, const float *synth,
00752 float *samples, int size,
00753 const float *lpcs, float *zero_exc_pf,
00754 int fcb_type, int pitch)
00755 {
00756 float synth_filter_in_buf[MAX_FRAMESIZE / 2],
00757 *synth_pf = &s->synth_filter_out_buf[MAX_LSPS_ALIGN16],
00758 *synth_filter_in = zero_exc_pf;
00759
00760 assert(size <= MAX_FRAMESIZE / 2);
00761
00762
00763 ff_celp_lp_zero_synthesis_filterf(zero_exc_pf, lpcs, synth, size, s->lsps);
00764
00765 if (fcb_type >= FCB_TYPE_AW_PULSES &&
00766 !kalman_smoothen(s, pitch, zero_exc_pf, synth_filter_in_buf, size))
00767 synth_filter_in = synth_filter_in_buf;
00768
00769
00770 ff_celp_lp_synthesis_filterf(synth_pf, lpcs,
00771 synth_filter_in, size, s->lsps);
00772 memcpy(&synth_pf[-s->lsps], &synth_pf[size - s->lsps],
00773 sizeof(synth_pf[0]) * s->lsps);
00774
00775 wiener_denoise(s, fcb_type, synth_pf, size, lpcs);
00776
00777 adaptive_gain_control(samples, synth_pf, synth, size, 0.99,
00778 &s->postfilter_agc);
00779
00780 if (s->dc_level > 8) {
00781
00782
00783
00784 ff_acelp_apply_order_2_transfer_function(samples, samples,
00785 (const float[2]) { -1.99997, 1.0 },
00786 (const float[2]) { -1.9330735188, 0.93589198496 },
00787 0.93980580475, s->dcf_mem, size);
00788 }
00789 }
00805 static void dequant_lsps(double *lsps, int num,
00806 const uint16_t *values,
00807 const uint16_t *sizes,
00808 int n_stages, const uint8_t *table,
00809 const double *mul_q,
00810 const double *base_q)
00811 {
00812 int n, m;
00813
00814 memset(lsps, 0, num * sizeof(*lsps));
00815 for (n = 0; n < n_stages; n++) {
00816 const uint8_t *t_off = &table[values[n] * num];
00817 double base = base_q[n], mul = mul_q[n];
00818
00819 for (m = 0; m < num; m++)
00820 lsps[m] += base + mul * t_off[m];
00821
00822 table += sizes[n] * num;
00823 }
00824 }
00825
00837 static void dequant_lsp10i(GetBitContext *gb, double *lsps)
00838 {
00839 static const uint16_t vec_sizes[4] = { 256, 64, 32, 32 };
00840 static const double mul_lsf[4] = {
00841 5.2187144800e-3, 1.4626986422e-3,
00842 9.6179549166e-4, 1.1325736225e-3
00843 };
00844 static const double base_lsf[4] = {
00845 M_PI * -2.15522e-1, M_PI * -6.1646e-2,
00846 M_PI * -3.3486e-2, M_PI * -5.7408e-2
00847 };
00848 uint16_t v[4];
00849
00850 v[0] = get_bits(gb, 8);
00851 v[1] = get_bits(gb, 6);
00852 v[2] = get_bits(gb, 5);
00853 v[3] = get_bits(gb, 5);
00854
00855 dequant_lsps(lsps, 10, v, vec_sizes, 4, wmavoice_dq_lsp10i,
00856 mul_lsf, base_lsf);
00857 }
00858
00863 static void dequant_lsp10r(GetBitContext *gb,
00864 double *i_lsps, const double *old,
00865 double *a1, double *a2, int q_mode)
00866 {
00867 static const uint16_t vec_sizes[3] = { 128, 64, 64 };
00868 static const double mul_lsf[3] = {
00869 2.5807601174e-3, 1.2354460219e-3, 1.1763821673e-3
00870 };
00871 static const double base_lsf[3] = {
00872 M_PI * -1.07448e-1, M_PI * -5.2706e-2, M_PI * -5.1634e-2
00873 };
00874 const float (*ipol_tab)[2][10] = q_mode ?
00875 wmavoice_lsp10_intercoeff_b : wmavoice_lsp10_intercoeff_a;
00876 uint16_t interpol, v[3];
00877 int n;
00878
00879 dequant_lsp10i(gb, i_lsps);
00880
00881 interpol = get_bits(gb, 5);
00882 v[0] = get_bits(gb, 7);
00883 v[1] = get_bits(gb, 6);
00884 v[2] = get_bits(gb, 6);
00885
00886 for (n = 0; n < 10; n++) {
00887 double delta = old[n] - i_lsps[n];
00888 a1[n] = ipol_tab[interpol][0][n] * delta + i_lsps[n];
00889 a1[10 + n] = ipol_tab[interpol][1][n] * delta + i_lsps[n];
00890 }
00891
00892 dequant_lsps(a2, 20, v, vec_sizes, 3, wmavoice_dq_lsp10r,
00893 mul_lsf, base_lsf);
00894 }
00895
00899 static void dequant_lsp16i(GetBitContext *gb, double *lsps)
00900 {
00901 static const uint16_t vec_sizes[5] = { 256, 64, 128, 64, 128 };
00902 static const double mul_lsf[5] = {
00903 3.3439586280e-3, 6.9908173703e-4,
00904 3.3216608306e-3, 1.0334960326e-3,
00905 3.1899104283e-3
00906 };
00907 static const double base_lsf[5] = {
00908 M_PI * -1.27576e-1, M_PI * -2.4292e-2,
00909 M_PI * -1.28094e-1, M_PI * -3.2128e-2,
00910 M_PI * -1.29816e-1
00911 };
00912 uint16_t v[5];
00913
00914 v[0] = get_bits(gb, 8);
00915 v[1] = get_bits(gb, 6);
00916 v[2] = get_bits(gb, 7);
00917 v[3] = get_bits(gb, 6);
00918 v[4] = get_bits(gb, 7);
00919
00920 dequant_lsps( lsps, 5, v, vec_sizes, 2,
00921 wmavoice_dq_lsp16i1, mul_lsf, base_lsf);
00922 dequant_lsps(&lsps[5], 5, &v[2], &vec_sizes[2], 2,
00923 wmavoice_dq_lsp16i2, &mul_lsf[2], &base_lsf[2]);
00924 dequant_lsps(&lsps[10], 6, &v[4], &vec_sizes[4], 1,
00925 wmavoice_dq_lsp16i3, &mul_lsf[4], &base_lsf[4]);
00926 }
00927
00932 static void dequant_lsp16r(GetBitContext *gb,
00933 double *i_lsps, const double *old,
00934 double *a1, double *a2, int q_mode)
00935 {
00936 static const uint16_t vec_sizes[3] = { 128, 128, 128 };
00937 static const double mul_lsf[3] = {
00938 1.2232979501e-3, 1.4062241527e-3, 1.6114744851e-3
00939 };
00940 static const double base_lsf[3] = {
00941 M_PI * -5.5830e-2, M_PI * -5.2908e-2, M_PI * -5.4776e-2
00942 };
00943 const float (*ipol_tab)[2][16] = q_mode ?
00944 wmavoice_lsp16_intercoeff_b : wmavoice_lsp16_intercoeff_a;
00945 uint16_t interpol, v[3];
00946 int n;
00947
00948 dequant_lsp16i(gb, i_lsps);
00949
00950 interpol = get_bits(gb, 5);
00951 v[0] = get_bits(gb, 7);
00952 v[1] = get_bits(gb, 7);
00953 v[2] = get_bits(gb, 7);
00954
00955 for (n = 0; n < 16; n++) {
00956 double delta = old[n] - i_lsps[n];
00957 a1[n] = ipol_tab[interpol][0][n] * delta + i_lsps[n];
00958 a1[16 + n] = ipol_tab[interpol][1][n] * delta + i_lsps[n];
00959 }
00960
00961 dequant_lsps( a2, 10, v, vec_sizes, 1,
00962 wmavoice_dq_lsp16r1, mul_lsf, base_lsf);
00963 dequant_lsps(&a2[10], 10, &v[1], &vec_sizes[1], 1,
00964 wmavoice_dq_lsp16r2, &mul_lsf[1], &base_lsf[1]);
00965 dequant_lsps(&a2[20], 12, &v[2], &vec_sizes[2], 1,
00966 wmavoice_dq_lsp16r3, &mul_lsf[2], &base_lsf[2]);
00967 }
00968
00982 static void aw_parse_coords(WMAVoiceContext *s, GetBitContext *gb,
00983 const int *pitch)
00984 {
00985 static const int16_t start_offset[94] = {
00986 -11, -9, -7, -5, -3, -1, 1, 3, 5, 7, 9, 11,
00987 13, 15, 18, 17, 19, 20, 21, 22, 23, 24, 25, 26,
00988 27, 28, 29, 30, 31, 32, 33, 35, 37, 39, 41, 43,
00989 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67,
00990 69, 71, 73, 75, 77, 79, 81, 83, 85, 87, 89, 91,
00991 93, 95, 97, 99, 101, 103, 105, 107, 109, 111, 113, 115,
00992 117, 119, 121, 123, 125, 127, 129, 131, 133, 135, 137, 139,
00993 141, 143, 145, 147, 149, 151, 153, 155, 157, 159
00994 };
00995 int bits, offset;
00996
00997
00998 s->aw_idx_is_ext = 0;
00999 if ((bits = get_bits(gb, 6)) >= 54) {
01000 s->aw_idx_is_ext = 1;
01001 bits += (bits - 54) * 3 + get_bits(gb, 2);
01002 }
01003
01004
01005
01006 s->aw_pulse_range = FFMIN(pitch[0], pitch[1]) > 32 ? 24 : 16;
01007 for (offset = start_offset[bits]; offset < 0; offset += pitch[0]) ;
01008 s->aw_n_pulses[0] = (pitch[0] - 1 + MAX_FRAMESIZE / 2 - offset) / pitch[0];
01009 s->aw_first_pulse_off[0] = offset - s->aw_pulse_range / 2;
01010 offset += s->aw_n_pulses[0] * pitch[0];
01011 s->aw_n_pulses[1] = (pitch[1] - 1 + MAX_FRAMESIZE - offset) / pitch[1];
01012 s->aw_first_pulse_off[1] = offset - (MAX_FRAMESIZE + s->aw_pulse_range) / 2;
01013
01014
01015
01016
01017 if (start_offset[bits] < MAX_FRAMESIZE / 2) {
01018 while (s->aw_first_pulse_off[1] - pitch[1] + s->aw_pulse_range > 0)
01019 s->aw_first_pulse_off[1] -= pitch[1];
01020 if (start_offset[bits] < 0)
01021 while (s->aw_first_pulse_off[0] - pitch[0] + s->aw_pulse_range > 0)
01022 s->aw_first_pulse_off[0] -= pitch[0];
01023 }
01024 }
01025
01033 static void aw_pulse_set2(WMAVoiceContext *s, GetBitContext *gb,
01034 int block_idx, AMRFixed *fcb)
01035 {
01036 uint16_t use_mask_mem[9];
01037 uint16_t *use_mask = use_mask_mem + 2;
01038
01039
01040
01041
01042
01043
01044
01045 int pulse_off = s->aw_first_pulse_off[block_idx],
01046 pulse_start, n, idx, range, aidx, start_off = 0;
01047
01048
01049 if (s->aw_n_pulses[block_idx] > 0)
01050 while (pulse_off + s->aw_pulse_range < 1)
01051 pulse_off += fcb->pitch_lag;
01052
01053
01054 if (s->aw_n_pulses[0] > 0) {
01055 if (block_idx == 0) {
01056 range = 32;
01057 } else {
01058 range = 8;
01059 if (s->aw_n_pulses[block_idx] > 0)
01060 pulse_off = s->aw_next_pulse_off_cache;
01061 }
01062 } else
01063 range = 16;
01064 pulse_start = s->aw_n_pulses[block_idx] > 0 ? pulse_off - range / 2 : 0;
01065
01066
01067
01068
01069 memset(&use_mask[-2], 0, 2 * sizeof(use_mask[0]));
01070 memset( use_mask, -1, 5 * sizeof(use_mask[0]));
01071 memset(&use_mask[5], 0, 2 * sizeof(use_mask[0]));
01072 if (s->aw_n_pulses[block_idx] > 0)
01073 for (idx = pulse_off; idx < MAX_FRAMESIZE / 2; idx += fcb->pitch_lag) {
01074 int excl_range = s->aw_pulse_range;
01075 uint16_t *use_mask_ptr = &use_mask[idx >> 4];
01076 int first_sh = 16 - (idx & 15);
01077 *use_mask_ptr++ &= 0xFFFF << first_sh;
01078 excl_range -= first_sh;
01079 if (excl_range >= 16) {
01080 *use_mask_ptr++ = 0;
01081 *use_mask_ptr &= 0xFFFF >> (excl_range - 16);
01082 } else
01083 *use_mask_ptr &= 0xFFFF >> excl_range;
01084 }
01085
01086
01087 aidx = get_bits(gb, s->aw_n_pulses[0] > 0 ? 5 - 2 * block_idx : 4);
01088 for (n = 0; n <= aidx; pulse_start++) {
01089 for (idx = pulse_start; idx < 0; idx += fcb->pitch_lag) ;
01090 if (idx >= MAX_FRAMESIZE / 2) {
01091 if (use_mask[0]) idx = 0x0F;
01092 else if (use_mask[1]) idx = 0x1F;
01093 else if (use_mask[2]) idx = 0x2F;
01094 else if (use_mask[3]) idx = 0x3F;
01095 else if (use_mask[4]) idx = 0x4F;
01096 else return;
01097 idx -= av_log2_16bit(use_mask[idx >> 4]);
01098 }
01099 if (use_mask[idx >> 4] & (0x8000 >> (idx & 15))) {
01100 use_mask[idx >> 4] &= ~(0x8000 >> (idx & 15));
01101 n++;
01102 start_off = idx;
01103 }
01104 }
01105
01106 fcb->x[fcb->n] = start_off;
01107 fcb->y[fcb->n] = get_bits1(gb) ? -1.0 : 1.0;
01108 fcb->n++;
01109
01110
01111 n = (MAX_FRAMESIZE / 2 - start_off) % fcb->pitch_lag;
01112 s->aw_next_pulse_off_cache = n ? fcb->pitch_lag - n : 0;
01113 }
01114
01122 static void aw_pulse_set1(WMAVoiceContext *s, GetBitContext *gb,
01123 int block_idx, AMRFixed *fcb)
01124 {
01125 int val = get_bits(gb, 12 - 2 * (s->aw_idx_is_ext && !block_idx));
01126 float v;
01127
01128 if (s->aw_n_pulses[block_idx] > 0) {
01129 int n, v_mask, i_mask, sh, n_pulses;
01130
01131 if (s->aw_pulse_range == 24) {
01132 n_pulses = 3;
01133 v_mask = 8;
01134 i_mask = 7;
01135 sh = 4;
01136 } else {
01137 n_pulses = 4;
01138 v_mask = 4;
01139 i_mask = 3;
01140 sh = 3;
01141 }
01142
01143 for (n = n_pulses - 1; n >= 0; n--, val >>= sh) {
01144 fcb->y[fcb->n] = (val & v_mask) ? -1.0 : 1.0;
01145 fcb->x[fcb->n] = (val & i_mask) * n_pulses + n +
01146 s->aw_first_pulse_off[block_idx];
01147 while (fcb->x[fcb->n] < 0)
01148 fcb->x[fcb->n] += fcb->pitch_lag;
01149 if (fcb->x[fcb->n] < MAX_FRAMESIZE / 2)
01150 fcb->n++;
01151 }
01152 } else {
01153 int num2 = (val & 0x1FF) >> 1, delta, idx;
01154
01155 if (num2 < 1 * 79) { delta = 1; idx = num2 + 1; }
01156 else if (num2 < 2 * 78) { delta = 3; idx = num2 + 1 - 1 * 77; }
01157 else if (num2 < 3 * 77) { delta = 5; idx = num2 + 1 - 2 * 76; }
01158 else { delta = 7; idx = num2 + 1 - 3 * 75; }
01159 v = (val & 0x200) ? -1.0 : 1.0;
01160
01161 fcb->no_repeat_mask |= 3 << fcb->n;
01162 fcb->x[fcb->n] = idx - delta;
01163 fcb->y[fcb->n] = v;
01164 fcb->x[fcb->n + 1] = idx;
01165 fcb->y[fcb->n + 1] = (val & 1) ? -v : v;
01166 fcb->n += 2;
01167 }
01168 }
01169
01183 static int pRNG(int frame_cntr, int block_num, int block_size)
01184 {
01185
01186
01187
01188
01189
01190
01191
01192
01193
01194
01195 static const unsigned int div_tbl[9][2] = {
01196 { 8332, 3 * 715827883U },
01197 { 4545, 0 * 390451573U },
01198 { 3124, 11 * 268435456U },
01199 { 2380, 15 * 204522253U },
01200 { 1922, 23 * 165191050U },
01201 { 1612, 23 * 138547333U },
01202 { 1388, 27 * 119304648U },
01203 { 1219, 16 * 104755300U },
01204 { 1086, 39 * 93368855U }
01205 };
01206 unsigned int z, y, x = MUL16(block_num, 1877) + frame_cntr;
01207 if (x >= 0xFFFF) x -= 0xFFFF;
01208
01209 y = x - 9 * MULH(477218589, x);
01210 z = (uint16_t) (x * div_tbl[y][0] + UMULH(x, div_tbl[y][1]));
01211
01212 return z % (1000 - block_size);
01213 }
01214
01219 static void synth_block_hardcoded(WMAVoiceContext *s, GetBitContext *gb,
01220 int block_idx, int size,
01221 const struct frame_type_desc *frame_desc,
01222 float *excitation)
01223 {
01224 float gain;
01225 int n, r_idx;
01226
01227 assert(size <= MAX_FRAMESIZE);
01228
01229
01230 if (frame_desc->fcb_type == FCB_TYPE_SILENCE) {
01231 r_idx = pRNG(s->frame_cntr, block_idx, size);
01232 gain = s->silence_gain;
01233 } else {
01234 r_idx = get_bits(gb, 8);
01235 gain = wmavoice_gain_universal[get_bits(gb, 6)];
01236 }
01237
01238
01239 memset(s->gain_pred_err, 0, sizeof(s->gain_pred_err));
01240
01241
01242 for (n = 0; n < size; n++)
01243 excitation[n] = wmavoice_std_codebook[r_idx + n] * gain;
01244 }
01245
01250 static void synth_block_fcb_acb(WMAVoiceContext *s, GetBitContext *gb,
01251 int block_idx, int size,
01252 int block_pitch_sh2,
01253 const struct frame_type_desc *frame_desc,
01254 float *excitation)
01255 {
01256 static const float gain_coeff[6] = {
01257 0.8169, -0.06545, 0.1726, 0.0185, -0.0359, 0.0458
01258 };
01259 float pulses[MAX_FRAMESIZE / 2], pred_err, acb_gain, fcb_gain;
01260 int n, idx, gain_weight;
01261 AMRFixed fcb;
01262
01263 assert(size <= MAX_FRAMESIZE / 2);
01264 memset(pulses, 0, sizeof(*pulses) * size);
01265
01266 fcb.pitch_lag = block_pitch_sh2 >> 2;
01267 fcb.pitch_fac = 1.0;
01268 fcb.no_repeat_mask = 0;
01269 fcb.n = 0;
01270
01271
01272
01273 if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
01274 aw_pulse_set1(s, gb, block_idx, &fcb);
01275 aw_pulse_set2(s, gb, block_idx, &fcb);
01276 } else {
01277 int offset_nbits = 5 - frame_desc->log_n_blocks;
01278
01279 fcb.no_repeat_mask = -1;
01280
01281
01282 for (n = 0; n < 5; n++) {
01283 float sign;
01284 int pos1, pos2;
01285
01286 sign = get_bits1(gb) ? 1.0 : -1.0;
01287 pos1 = get_bits(gb, offset_nbits);
01288 fcb.x[fcb.n] = n + 5 * pos1;
01289 fcb.y[fcb.n++] = sign;
01290 if (n < frame_desc->dbl_pulses) {
01291 pos2 = get_bits(gb, offset_nbits);
01292 fcb.x[fcb.n] = n + 5 * pos2;
01293 fcb.y[fcb.n++] = (pos1 < pos2) ? -sign : sign;
01294 }
01295 }
01296 }
01297 ff_set_fixed_vector(pulses, &fcb, 1.0, size);
01298
01299
01300
01301 idx = get_bits(gb, 7);
01302 fcb_gain = expf(ff_dot_productf(s->gain_pred_err, gain_coeff, 6) -
01303 5.2409161640 + wmavoice_gain_codebook_fcb[idx]);
01304 acb_gain = wmavoice_gain_codebook_acb[idx];
01305 pred_err = av_clipf(wmavoice_gain_codebook_fcb[idx],
01306 -2.9957322736 ,
01307 1.6094379124 );
01308
01309 gain_weight = 8 >> frame_desc->log_n_blocks;
01310 memmove(&s->gain_pred_err[gain_weight], s->gain_pred_err,
01311 sizeof(*s->gain_pred_err) * (6 - gain_weight));
01312 for (n = 0; n < gain_weight; n++)
01313 s->gain_pred_err[n] = pred_err;
01314
01315
01316 if (frame_desc->acb_type == ACB_TYPE_ASYMMETRIC) {
01317 int len;
01318 for (n = 0; n < size; n += len) {
01319 int next_idx_sh16;
01320 int abs_idx = block_idx * size + n;
01321 int pitch_sh16 = (s->last_pitch_val << 16) +
01322 s->pitch_diff_sh16 * abs_idx;
01323 int pitch = (pitch_sh16 + 0x6FFF) >> 16;
01324 int idx_sh16 = ((pitch << 16) - pitch_sh16) * 8 + 0x58000;
01325 idx = idx_sh16 >> 16;
01326 if (s->pitch_diff_sh16) {
01327 if (s->pitch_diff_sh16 > 0) {
01328 next_idx_sh16 = (idx_sh16) &~ 0xFFFF;
01329 } else
01330 next_idx_sh16 = (idx_sh16 + 0x10000) &~ 0xFFFF;
01331 len = av_clip((idx_sh16 - next_idx_sh16) / s->pitch_diff_sh16 / 8,
01332 1, size - n);
01333 } else
01334 len = size;
01335
01336 ff_acelp_interpolatef(&excitation[n], &excitation[n - pitch],
01337 wmavoice_ipol1_coeffs, 17,
01338 idx, 9, len);
01339 }
01340 } else {
01341 int block_pitch = block_pitch_sh2 >> 2;
01342 idx = block_pitch_sh2 & 3;
01343 if (idx) {
01344 ff_acelp_interpolatef(excitation, &excitation[-block_pitch],
01345 wmavoice_ipol2_coeffs, 4,
01346 idx, 8, size);
01347 } else
01348 av_memcpy_backptr((uint8_t *) excitation, sizeof(float) * block_pitch,
01349 sizeof(float) * size);
01350 }
01351
01352
01353 ff_weighted_vector_sumf(excitation, excitation, pulses,
01354 acb_gain, fcb_gain, size);
01355 }
01356
01373 static void synth_block(WMAVoiceContext *s, GetBitContext *gb,
01374 int block_idx, int size,
01375 int block_pitch_sh2,
01376 const double *lsps, const double *prev_lsps,
01377 const struct frame_type_desc *frame_desc,
01378 float *excitation, float *synth)
01379 {
01380 double i_lsps[MAX_LSPS];
01381 float lpcs[MAX_LSPS];
01382 float fac;
01383 int n;
01384
01385 if (frame_desc->acb_type == ACB_TYPE_NONE)
01386 synth_block_hardcoded(s, gb, block_idx, size, frame_desc, excitation);
01387 else
01388 synth_block_fcb_acb(s, gb, block_idx, size, block_pitch_sh2,
01389 frame_desc, excitation);
01390
01391
01392 fac = (block_idx + 0.5) / frame_desc->n_blocks;
01393 for (n = 0; n < s->lsps; n++)
01394 i_lsps[n] = cos(prev_lsps[n] + fac * (lsps[n] - prev_lsps[n]));
01395 ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
01396
01397
01398 ff_celp_lp_synthesis_filterf(synth, lpcs, excitation, size, s->lsps);
01399 }
01400
01416 static int synth_frame(AVCodecContext *ctx, GetBitContext *gb, int frame_idx,
01417 float *samples,
01418 const double *lsps, const double *prev_lsps,
01419 float *excitation, float *synth)
01420 {
01421 WMAVoiceContext *s = ctx->priv_data;
01422 int n, n_blocks_x2, log_n_blocks_x2, cur_pitch_val;
01423 int pitch[MAX_BLOCKS], last_block_pitch;
01424
01425
01426 int bd_idx = s->vbm_tree[get_vlc2(gb, frame_type_vlc.table, 6, 3)],
01427 block_nsamples = MAX_FRAMESIZE / frame_descs[bd_idx].n_blocks;
01428
01429 if (bd_idx < 0) {
01430 av_log(ctx, AV_LOG_ERROR,
01431 "Invalid frame type VLC code, skipping\n");
01432 return -1;
01433 }
01434
01435
01436 if (frame_descs[bd_idx].acb_type == ACB_TYPE_ASYMMETRIC) {
01437
01438
01439
01440
01441 n_blocks_x2 = frame_descs[bd_idx].n_blocks << 1;
01442 log_n_blocks_x2 = frame_descs[bd_idx].log_n_blocks + 1;
01443 cur_pitch_val = s->min_pitch_val + get_bits(gb, s->pitch_nbits);
01444 cur_pitch_val = FFMIN(cur_pitch_val, s->max_pitch_val - 1);
01445 if (s->last_acb_type == ACB_TYPE_NONE ||
01446 20 * abs(cur_pitch_val - s->last_pitch_val) >
01447 (cur_pitch_val + s->last_pitch_val))
01448 s->last_pitch_val = cur_pitch_val;
01449
01450
01451 for (n = 0; n < frame_descs[bd_idx].n_blocks; n++) {
01452 int fac = n * 2 + 1;
01453
01454 pitch[n] = (MUL16(fac, cur_pitch_val) +
01455 MUL16((n_blocks_x2 - fac), s->last_pitch_val) +
01456 frame_descs[bd_idx].n_blocks) >> log_n_blocks_x2;
01457 }
01458
01459
01460 s->pitch_diff_sh16 =
01461 ((cur_pitch_val - s->last_pitch_val) << 16) / MAX_FRAMESIZE;
01462 }
01463
01464
01465 switch (frame_descs[bd_idx].fcb_type) {
01466 case FCB_TYPE_SILENCE:
01467 s->silence_gain = wmavoice_gain_silence[get_bits(gb, 8)];
01468 break;
01469 case FCB_TYPE_AW_PULSES:
01470 aw_parse_coords(s, gb, pitch);
01471 break;
01472 }
01473
01474 for (n = 0; n < frame_descs[bd_idx].n_blocks; n++) {
01475 int bl_pitch_sh2;
01476
01477
01478 switch (frame_descs[bd_idx].acb_type) {
01479 case ACB_TYPE_HAMMING: {
01480
01481
01482
01483
01484
01485 int block_pitch,
01486 t1 = (s->block_conv_table[1] - s->block_conv_table[0]) << 2,
01487 t2 = (s->block_conv_table[2] - s->block_conv_table[1]) << 1,
01488 t3 = s->block_conv_table[3] - s->block_conv_table[2] + 1;
01489
01490 if (n == 0) {
01491 block_pitch = get_bits(gb, s->block_pitch_nbits);
01492 } else
01493 block_pitch = last_block_pitch - s->block_delta_pitch_hrange +
01494 get_bits(gb, s->block_delta_pitch_nbits);
01495
01496 last_block_pitch = av_clip(block_pitch,
01497 s->block_delta_pitch_hrange,
01498 s->block_pitch_range -
01499 s->block_delta_pitch_hrange);
01500
01501
01502 if (block_pitch < t1) {
01503 bl_pitch_sh2 = (s->block_conv_table[0] << 2) + block_pitch;
01504 } else {
01505 block_pitch -= t1;
01506 if (block_pitch < t2) {
01507 bl_pitch_sh2 =
01508 (s->block_conv_table[1] << 2) + (block_pitch << 1);
01509 } else {
01510 block_pitch -= t2;
01511 if (block_pitch < t3) {
01512 bl_pitch_sh2 =
01513 (s->block_conv_table[2] + block_pitch) << 2;
01514 } else
01515 bl_pitch_sh2 = s->block_conv_table[3] << 2;
01516 }
01517 }
01518 pitch[n] = bl_pitch_sh2 >> 2;
01519 break;
01520 }
01521
01522 case ACB_TYPE_ASYMMETRIC: {
01523 bl_pitch_sh2 = pitch[n] << 2;
01524 break;
01525 }
01526
01527 default:
01528 bl_pitch_sh2 = 0;
01529 break;
01530 }
01531
01532 synth_block(s, gb, n, block_nsamples, bl_pitch_sh2,
01533 lsps, prev_lsps, &frame_descs[bd_idx],
01534 &excitation[n * block_nsamples],
01535 &synth[n * block_nsamples]);
01536 }
01537
01538
01539
01540 if (s->do_apf) {
01541 double i_lsps[MAX_LSPS];
01542 float lpcs[MAX_LSPS];
01543
01544 for (n = 0; n < s->lsps; n++)
01545 i_lsps[n] = cos(0.5 * (prev_lsps[n] + lsps[n]));
01546 ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
01547 postfilter(s, synth, samples, 80, lpcs,
01548 &s->zero_exc_pf[s->history_nsamples + MAX_FRAMESIZE * frame_idx],
01549 frame_descs[bd_idx].fcb_type, pitch[0]);
01550
01551 for (n = 0; n < s->lsps; n++)
01552 i_lsps[n] = cos(lsps[n]);
01553 ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
01554 postfilter(s, &synth[80], &samples[80], 80, lpcs,
01555 &s->zero_exc_pf[s->history_nsamples + MAX_FRAMESIZE * frame_idx + 80],
01556 frame_descs[bd_idx].fcb_type, pitch[0]);
01557 } else
01558 memcpy(samples, synth, 160 * sizeof(synth[0]));
01559
01560
01561 s->frame_cntr++;
01562 if (s->frame_cntr >= 0xFFFF) s->frame_cntr -= 0xFFFF;
01563 s->last_acb_type = frame_descs[bd_idx].acb_type;
01564 switch (frame_descs[bd_idx].acb_type) {
01565 case ACB_TYPE_NONE:
01566 s->last_pitch_val = 0;
01567 break;
01568 case ACB_TYPE_ASYMMETRIC:
01569 s->last_pitch_val = cur_pitch_val;
01570 break;
01571 case ACB_TYPE_HAMMING:
01572 s->last_pitch_val = pitch[frame_descs[bd_idx].n_blocks - 1];
01573 break;
01574 }
01575
01576 return 0;
01577 }
01578
01591 static void stabilize_lsps(double *lsps, int num)
01592 {
01593 int n, m, l;
01594
01595
01596
01597
01598 lsps[0] = FFMAX(lsps[0], 0.0015 * M_PI);
01599 for (n = 1; n < num; n++)
01600 lsps[n] = FFMAX(lsps[n], lsps[n - 1] + 0.0125 * M_PI);
01601 lsps[num - 1] = FFMIN(lsps[num - 1], 0.9985 * M_PI);
01602
01603
01604
01605 for (n = 1; n < num; n++) {
01606 if (lsps[n] < lsps[n - 1]) {
01607 for (m = 1; m < num; m++) {
01608 double tmp = lsps[m];
01609 for (l = m - 1; l >= 0; l--) {
01610 if (lsps[l] <= tmp) break;
01611 lsps[l + 1] = lsps[l];
01612 }
01613 lsps[l + 1] = tmp;
01614 }
01615 break;
01616 }
01617 }
01618 }
01619
01629 static int check_bits_for_superframe(GetBitContext *orig_gb,
01630 WMAVoiceContext *s)
01631 {
01632 GetBitContext s_gb, *gb = &s_gb;
01633 int n, need_bits, bd_idx;
01634 const struct frame_type_desc *frame_desc;
01635
01636
01637 init_get_bits(gb, orig_gb->buffer, orig_gb->size_in_bits);
01638 skip_bits_long(gb, get_bits_count(orig_gb));
01639 assert(get_bits_left(gb) == get_bits_left(orig_gb));
01640
01641
01642 if (get_bits_left(gb) < 14)
01643 return 1;
01644 if (!get_bits1(gb))
01645 return -1;
01646 if (get_bits1(gb)) skip_bits(gb, 12);
01647 if (s->has_residual_lsps) {
01648 if (get_bits_left(gb) < s->sframe_lsp_bitsize)
01649 return 1;
01650 skip_bits_long(gb, s->sframe_lsp_bitsize);
01651 }
01652
01653
01654 for (n = 0; n < MAX_FRAMES; n++) {
01655 int aw_idx_is_ext = 0;
01656
01657 if (!s->has_residual_lsps) {
01658 if (get_bits_left(gb) < s->frame_lsp_bitsize) return 1;
01659 skip_bits_long(gb, s->frame_lsp_bitsize);
01660 }
01661 bd_idx = s->vbm_tree[get_vlc2(gb, frame_type_vlc.table, 6, 3)];
01662 if (bd_idx < 0)
01663 return -1;
01664 frame_desc = &frame_descs[bd_idx];
01665 if (frame_desc->acb_type == ACB_TYPE_ASYMMETRIC) {
01666 if (get_bits_left(gb) < s->pitch_nbits)
01667 return 1;
01668 skip_bits_long(gb, s->pitch_nbits);
01669 }
01670 if (frame_desc->fcb_type == FCB_TYPE_SILENCE) {
01671 skip_bits(gb, 8);
01672 } else if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
01673 int tmp = get_bits(gb, 6);
01674 if (tmp >= 0x36) {
01675 skip_bits(gb, 2);
01676 aw_idx_is_ext = 1;
01677 }
01678 }
01679
01680
01681 if (frame_desc->acb_type == ACB_TYPE_HAMMING) {
01682 need_bits = s->block_pitch_nbits +
01683 (frame_desc->n_blocks - 1) * s->block_delta_pitch_nbits;
01684 } else if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
01685 need_bits = 2 * !aw_idx_is_ext;
01686 } else
01687 need_bits = 0;
01688 need_bits += frame_desc->frame_size;
01689 if (get_bits_left(gb) < need_bits)
01690 return 1;
01691 skip_bits_long(gb, need_bits);
01692 }
01693
01694 return 0;
01695 }
01696
01717 static int synth_superframe(AVCodecContext *ctx,
01718 float *samples, int *data_size)
01719 {
01720 WMAVoiceContext *s = ctx->priv_data;
01721 GetBitContext *gb = &s->gb, s_gb;
01722 int n, res, n_samples = 480;
01723 double lsps[MAX_FRAMES][MAX_LSPS];
01724 const double *mean_lsf = s->lsps == 16 ?
01725 wmavoice_mean_lsf16[s->lsp_def_mode] : wmavoice_mean_lsf10[s->lsp_def_mode];
01726 float excitation[MAX_SIGNAL_HISTORY + MAX_SFRAMESIZE + 12];
01727 float synth[MAX_LSPS + MAX_SFRAMESIZE];
01728
01729 memcpy(synth, s->synth_history,
01730 s->lsps * sizeof(*synth));
01731 memcpy(excitation, s->excitation_history,
01732 s->history_nsamples * sizeof(*excitation));
01733
01734 if (s->sframe_cache_size > 0) {
01735 gb = &s_gb;
01736 init_get_bits(gb, s->sframe_cache, s->sframe_cache_size);
01737 s->sframe_cache_size = 0;
01738 }
01739
01740 if ((res = check_bits_for_superframe(gb, s)) == 1) return 1;
01741
01742
01743
01744
01745
01746 if (!get_bits1(gb)) {
01747 av_log_missing_feature(ctx, "WMAPro-in-WMAVoice support", 1);
01748 return -1;
01749 }
01750
01751
01752 if (get_bits1(gb)) {
01753 if ((n_samples = get_bits(gb, 12)) > 480) {
01754 av_log(ctx, AV_LOG_ERROR,
01755 "Superframe encodes >480 samples (%d), not allowed\n",
01756 n_samples);
01757 return -1;
01758 }
01759 }
01760
01761 if (s->has_residual_lsps) {
01762 double prev_lsps[MAX_LSPS], a1[MAX_LSPS * 2], a2[MAX_LSPS * 2];
01763
01764 for (n = 0; n < s->lsps; n++)
01765 prev_lsps[n] = s->prev_lsps[n] - mean_lsf[n];
01766
01767 if (s->lsps == 10) {
01768 dequant_lsp10r(gb, lsps[2], prev_lsps, a1, a2, s->lsp_q_mode);
01769 } else
01770 dequant_lsp16r(gb, lsps[2], prev_lsps, a1, a2, s->lsp_q_mode);
01771
01772 for (n = 0; n < s->lsps; n++) {
01773 lsps[0][n] = mean_lsf[n] + (a1[n] - a2[n * 2]);
01774 lsps[1][n] = mean_lsf[n] + (a1[s->lsps + n] - a2[n * 2 + 1]);
01775 lsps[2][n] += mean_lsf[n];
01776 }
01777 for (n = 0; n < 3; n++)
01778 stabilize_lsps(lsps[n], s->lsps);
01779 }
01780
01781
01782 for (n = 0; n < 3; n++) {
01783 if (!s->has_residual_lsps) {
01784 int m;
01785
01786 if (s->lsps == 10) {
01787 dequant_lsp10i(gb, lsps[n]);
01788 } else
01789 dequant_lsp16i(gb, lsps[n]);
01790
01791 for (m = 0; m < s->lsps; m++)
01792 lsps[n][m] += mean_lsf[m];
01793 stabilize_lsps(lsps[n], s->lsps);
01794 }
01795
01796 if ((res = synth_frame(ctx, gb, n,
01797 &samples[n * MAX_FRAMESIZE],
01798 lsps[n], n == 0 ? s->prev_lsps : lsps[n - 1],
01799 &excitation[s->history_nsamples + n * MAX_FRAMESIZE],
01800 &synth[s->lsps + n * MAX_FRAMESIZE])))
01801 return res;
01802 }
01803
01804
01805
01806
01807 if (get_bits1(gb)) {
01808 res = get_bits(gb, 4);
01809 skip_bits(gb, 10 * (res + 1));
01810 }
01811
01812
01813 *data_size = n_samples * sizeof(float);
01814
01815
01816 memcpy(s->prev_lsps, lsps[2],
01817 s->lsps * sizeof(*s->prev_lsps));
01818 memcpy(s->synth_history, &synth[MAX_SFRAMESIZE],
01819 s->lsps * sizeof(*synth));
01820 memcpy(s->excitation_history, &excitation[MAX_SFRAMESIZE],
01821 s->history_nsamples * sizeof(*excitation));
01822 if (s->do_apf)
01823 memmove(s->zero_exc_pf, &s->zero_exc_pf[MAX_SFRAMESIZE],
01824 s->history_nsamples * sizeof(*s->zero_exc_pf));
01825
01826 return 0;
01827 }
01828
01836 static int parse_packet_header(WMAVoiceContext *s)
01837 {
01838 GetBitContext *gb = &s->gb;
01839 unsigned int res;
01840
01841 if (get_bits_left(gb) < 11)
01842 return 1;
01843 skip_bits(gb, 4);
01844 s->has_residual_lsps = get_bits1(gb);
01845 do {
01846 res = get_bits(gb, 6);
01847
01848 if (get_bits_left(gb) < 6 * (res == 0x3F) + s->spillover_bitsize)
01849 return 1;
01850 } while (res == 0x3F);
01851 s->spillover_nbits = get_bits(gb, s->spillover_bitsize);
01852
01853 return 0;
01854 }
01855
01871 static void copy_bits(PutBitContext *pb,
01872 const uint8_t *data, int size,
01873 GetBitContext *gb, int nbits)
01874 {
01875 int rmn_bytes, rmn_bits;
01876
01877 rmn_bits = rmn_bytes = get_bits_left(gb);
01878 if (rmn_bits < nbits)
01879 return;
01880 rmn_bits &= 7; rmn_bytes >>= 3;
01881 if ((rmn_bits = FFMIN(rmn_bits, nbits)) > 0)
01882 put_bits(pb, rmn_bits, get_bits(gb, rmn_bits));
01883 ff_copy_bits(pb, data + size - rmn_bytes,
01884 FFMIN(nbits - rmn_bits, rmn_bytes << 3));
01885 }
01886
01898 static int wmavoice_decode_packet(AVCodecContext *ctx, void *data,
01899 int *data_size, AVPacket *avpkt)
01900 {
01901 WMAVoiceContext *s = ctx->priv_data;
01902 GetBitContext *gb = &s->gb;
01903 int size, res, pos;
01904
01905 if (*data_size < 480 * sizeof(float)) {
01906 av_log(ctx, AV_LOG_ERROR,
01907 "Output buffer too small (%d given - %zu needed)\n",
01908 *data_size, 480 * sizeof(float));
01909 return -1;
01910 }
01911 *data_size = 0;
01912
01913
01914
01915
01916
01917
01918 for (size = avpkt->size; size > ctx->block_align; size -= ctx->block_align);
01919 if (!size)
01920 return 0;
01921 init_get_bits(&s->gb, avpkt->data, size << 3);
01922
01923
01924
01925
01926 if (size == ctx->block_align) {
01927 if ((res = parse_packet_header(s)) < 0)
01928 return res;
01929
01930
01931
01932
01933 if (s->spillover_nbits > 0) {
01934 if (s->sframe_cache_size > 0) {
01935 int cnt = get_bits_count(gb);
01936 copy_bits(&s->pb, avpkt->data, size, gb, s->spillover_nbits);
01937 flush_put_bits(&s->pb);
01938 s->sframe_cache_size += s->spillover_nbits;
01939 if ((res = synth_superframe(ctx, data, data_size)) == 0 &&
01940 *data_size > 0) {
01941 cnt += s->spillover_nbits;
01942 s->skip_bits_next = cnt & 7;
01943 return cnt >> 3;
01944 } else
01945 skip_bits_long (gb, s->spillover_nbits - cnt +
01946 get_bits_count(gb));
01947 } else
01948 skip_bits_long(gb, s->spillover_nbits);
01949 }
01950 } else if (s->skip_bits_next)
01951 skip_bits(gb, s->skip_bits_next);
01952
01953
01954 s->sframe_cache_size = 0;
01955 s->skip_bits_next = 0;
01956 pos = get_bits_left(gb);
01957 if ((res = synth_superframe(ctx, data, data_size)) < 0) {
01958 return res;
01959 } else if (*data_size > 0) {
01960 int cnt = get_bits_count(gb);
01961 s->skip_bits_next = cnt & 7;
01962 return cnt >> 3;
01963 } else if ((s->sframe_cache_size = pos) > 0) {
01964
01965 init_get_bits(gb, avpkt->data, size << 3);
01966 skip_bits_long(gb, (size << 3) - pos);
01967 assert(get_bits_left(gb) == pos);
01968
01969
01970 init_put_bits(&s->pb, s->sframe_cache, SFRAME_CACHE_MAXSIZE);
01971 copy_bits(&s->pb, avpkt->data, size, gb, s->sframe_cache_size);
01972
01973
01974 }
01975
01976 return size;
01977 }
01978
01979 static av_cold int wmavoice_decode_end(AVCodecContext *ctx)
01980 {
01981 WMAVoiceContext *s = ctx->priv_data;
01982
01983 if (s->do_apf) {
01984 ff_rdft_end(&s->rdft);
01985 ff_rdft_end(&s->irdft);
01986 ff_dct_end(&s->dct);
01987 ff_dct_end(&s->dst);
01988 }
01989
01990 return 0;
01991 }
01992
01993 static av_cold void wmavoice_flush(AVCodecContext *ctx)
01994 {
01995 WMAVoiceContext *s = ctx->priv_data;
01996 int n;
01997
01998 s->postfilter_agc = 0;
01999 s->sframe_cache_size = 0;
02000 s->skip_bits_next = 0;
02001 for (n = 0; n < s->lsps; n++)
02002 s->prev_lsps[n] = M_PI * (n + 1.0) / (s->lsps + 1.0);
02003 memset(s->excitation_history, 0,
02004 sizeof(*s->excitation_history) * MAX_SIGNAL_HISTORY);
02005 memset(s->synth_history, 0,
02006 sizeof(*s->synth_history) * MAX_LSPS);
02007 memset(s->gain_pred_err, 0,
02008 sizeof(s->gain_pred_err));
02009
02010 if (s->do_apf) {
02011 memset(&s->synth_filter_out_buf[MAX_LSPS_ALIGN16 - s->lsps], 0,
02012 sizeof(*s->synth_filter_out_buf) * s->lsps);
02013 memset(s->dcf_mem, 0,
02014 sizeof(*s->dcf_mem) * 2);
02015 memset(s->zero_exc_pf, 0,
02016 sizeof(*s->zero_exc_pf) * s->history_nsamples);
02017 memset(s->denoise_filter_cache, 0, sizeof(s->denoise_filter_cache));
02018 }
02019 }
02020
02021 AVCodec ff_wmavoice_decoder = {
02022 "wmavoice",
02023 AVMEDIA_TYPE_AUDIO,
02024 CODEC_ID_WMAVOICE,
02025 sizeof(WMAVoiceContext),
02026 wmavoice_decode_init,
02027 NULL,
02028 wmavoice_decode_end,
02029 wmavoice_decode_packet,
02030 CODEC_CAP_SUBFRAMES,
02031 .flush = wmavoice_flush,
02032 .long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio Voice"),
02033 };