1 /*
2 * QCELP decoder
3 * Copyright (c) 2007 Reynaldo H. Verdejo Pinochet
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22 /**
23 * @file
24 * QCELP decoder
25 * @author Reynaldo H. Verdejo Pinochet
26 * @remark FFmpeg merging spearheaded by Kenan Gillet
27 * @remark Development mentored by Benjamin Larson
28 */
29
30 #include <stddef.h>
31
42
43 #undef NDEBUG
44 #include <assert.h>
45
47 I_F_Q = -1,
/**< insufficient frame quality */
54
59
63 float predictor_lspf[10];
/**< LSP predictor for RATE_OCTAVE and I_F_Q */
64 float pitch_synthesis_filter_mem[303];
65 float pitch_pre_filter_mem[303];
66 float rnd_fir_filter_mem[180];
67 float formant_mem[170];
75
76 /* postfilter */
77 float postfilter_synth_mem[10];
81
82 /**
83 * Initialize the speech codec according to the specification.
84 *
85 * TIA/EIA/IS-733 2.4.9
86 */
88 {
90 int i;
91
95
96 for (i = 0; i < 10; i++)
98
99 return 0;
100 }
101
102 /**
103 * Decode the 10 quantized LSP frequencies from the LSPV/LSP
104 * transmission codes of any bitrate and check for badly received packets.
105 *
106 * @param q the context
107 * @param lspf line spectral pair frequencies
108 *
109 * @return 0 on success, -1 if the packet is badly received
110 *
111 * TIA/EIA/IS-733 2.4.3.2.6.2-2, 2.4.8.7.3
112 */
114 {
115 int i;
116 float tmp_lspf, smooth, erasure_coeff;
117 const float *predictors;
118
123
126
127 for (i = 0; i < 10; i++) {
133 }
135 } else {
137
139
142
143 for (i = 0; i < 10; i++) {
145 lspf[i] = (i + 1) * (1 - erasure_coeff) / 11 +
146 erasure_coeff * predictors[i];
147 }
148 smooth = 0.125;
149 }
150
151 // Check the stability of the LSP frequencies.
153 for (i = 1; i < 10; i++)
155
157 for (i = 9; i > 0; i--)
159
160 // Low-pass filter the LSP frequencies.
162 } else {
164
165 tmp_lspf = 0.;
166 for (i = 0; i < 5; i++) {
169 }
170
171 // Check for badly received packets.
173 if (lspf[9] <= .70 || lspf[9] >= .97)
174 return -1;
175 for (i = 3; i < 10; i++)
176 if (fabs(lspf[i] - lspf[i - 2]) < .08)
177 return -1;
178 } else {
179 if (lspf[9] <= .66 || lspf[9] >= .985)
180 return -1;
181 for (i = 4; i < 10; i++)
182 if (fabs(lspf[i] - lspf[i - 4]) < .0931)
183 return -1;
184 }
185 }
186 return 0;
187 }
188
189 /**
190 * Convert codebook transmission codes to GAIN and INDEX.
191 *
192 * @param q the context
193 * @param gain array holding the decoded gain
194 *
195 * TIA/EIA/IS-733 2.4.6.2
196 */
198 {
199 int i, subframes_count, g1[16];
200 float slope;
201
204 case RATE_FULL: subframes_count = 16;
break;
205 case RATE_HALF: subframes_count = 4;
break;
206 default: subframes_count = 5;
207 }
208 for (i = 0; i < subframes_count; i++) {
211 g1[i] += av_clip((g1[i - 1] + g1[i - 2] + g1[i - 3]) / 3 - 6, 0, 32);
212 }
213
215
217 gain[i] = -gain[i];
219 }
220 }
221
225
227 // Provide smoothing of the unvoiced excitation energy.
228 gain[7] = gain[4];
229 gain[6] = 0.4 * gain[3] + 0.6 * gain[4];
230 gain[5] = gain[3];
231 gain[4] = 0.8 * gain[2] + 0.2 * gain[3];
232 gain[3] = 0.2 * gain[1] + 0.8 * gain[2];
233 gain[2] = gain[1];
234 gain[1] = 0.6 * gain[0] + 0.4 * gain[1];
235 }
240 subframes_count = 8;
241 } else {
243
246 case 1 : break;
247 case 2 : g1[0] -= 1; break;
248 case 3 : g1[0] -= 2; break;
249 default: g1[0] -= 6;
250 }
251 if (g1[0] < 0)
252 g1[0] = 0;
253 subframes_count = 4;
254 }
255 // This interpolation is done to produce smoother background noise.
257 for (i = 1; i <= subframes_count; i++)
259
263 }
264 }
265
266 /**
267 * If the received packet is Rate 1/4 a further sanity check is made of the
268 * codebook gain.
269 *
270 * @param cbgain the unpacked cbgain array
271 * @return -1 if the sanity check fails, 0 otherwise
272 *
273 * TIA/EIA/IS-733 2.4.8.7.3
274 */
276 {
277 int i,
diff, prev_diff = 0;
278
279 for (i = 1; i < 5; i++) {
280 diff = cbgain[i] - cbgain[i-1];
281 if (
FFABS(diff) > 10)
282 return -1;
283 else if (
FFABS(diff - prev_diff) > 12)
284 return -1;
286 }
287 return 0;
288 }
289
290 /**
291 * Compute the scaled codebook vector Cdn From INDEX and GAIN
292 * for all rates.
293 *
294 * The specification lacks some information here.
295 *
296 * TIA/EIA/IS-733 has an omission on the codebook index determination
297 * formula for RATE_FULL and RATE_HALF frames at section 2.4.8.1.1. It says
298 * you have to subtract the decoded index parameter from the given scaled
299 * codebook vector index 'n' to get the desired circular codebook index, but
300 * it does not mention that you have to clamp 'n' to [0-9] in order to get
301 * RI-compliant results.
302 *
303 * The reason for this mistake seems to be the fact they forgot to mention you
304 * have to do these calculations per codebook subframe and adjust given
305 * equation values accordingly.
306 *
307 * @param q the context
308 * @param gain array holding the 4 pitch subframe gain values
309 * @param cdn_vector array for the generated scaled codebook vector
310 */
312 float *cdn_vector)
313 {
314 int i, j, k;
315 uint16_t cbseed, cindex;
316 float *rnd, tmp_gain, fir_filter_value;
317
320 for (i = 0; i < 16; i++) {
323 for (j = 0; j < 10; j++)
325 }
326 break;
328 for (i = 0; i < 4; i++) {
331 for (j = 0; j < 40; j++)
333 }
334 break;
336 cbseed = (0x0003 & q->
frame.
lspv[4]) << 14 |
342 for (i = 0; i < 8; i++) {
344 for (k = 0; k < 20; k++) {
345 cbseed = 521 * cbseed + 259;
346 *rnd = (int16_t) cbseed;
347
348 // FIR filter
349 fir_filter_value = 0.0;
350 for (j = 0; j < 10; j++)
352 (rnd[-j] + rnd[-20+j]);
353
355 *cdn_vector++ = tmp_gain * fir_filter_value;
356 rnd++;
357 }
358 }
360 20 * sizeof(float));
361 break;
364 for (i = 0; i < 8; i++) {
366 for (j = 0; j < 20; j++) {
367 cbseed = 521 * cbseed + 259;
368 *cdn_vector++ = tmp_gain * (int16_t) cbseed;
369 }
370 }
371 break;
373 cbseed = -44; // random codebook index
374 for (i = 0; i < 4; i++) {
376 for (j = 0; j < 40; j++)
378 }
379 break;
381 memset(cdn_vector, 0, 160 * sizeof(float));
382 break;
383 }
384 }
385
386 /**
387 * Apply generic gain control.
388 *
389 * @param v_out output vector
390 * @param v_in gain-controlled vector
391 * @param v_ref vector to control gain of
392 *
393 * TIA/EIA/IS-733 2.4.8.3, 2.4.8.6
394 */
396 {
397 int i;
398
399 for (i = 0; i < 160; i += 40) {
402 }
403 }
404
405 /**
406 * Apply filter in pitch-subframe steps.
407 *
408 * @param memory buffer for the previous state of the filter
409 * - must be able to contain 303 elements
410 * - the 143 first elements are from the previous state
411 * - the next 160 are for output
412 * @param v_in input filter vector
413 * @param gain per-subframe gain array, each element is between 0.0 and 2.0
414 * @param lag per-subframe lag array, each element is
415 * - between 16 and 143 if its corresponding pfrac is 0,
416 * - between 16 and 139 otherwise
417 * @param pfrac per-subframe boolean array, 1 if the lag is fractional, 0
418 * otherwise
419 *
420 * @return filter output vector
421 */
423 const float gain[4],
const uint8_t *lag,
425 {
426 int i, j;
427 float *v_lag, *v_out;
428 const float *v_len;
429
430 v_out = memory + 143; // Output vector starts at memory[143].
431
432 for (i = 0; i < 4; i++) {
433 if (gain[i]) {
434 v_lag = memory + 143 + 40 * i - lag[i];
435 for (v_len = v_in + 40; v_in < v_len; v_in++) {
436 if (pfrac[i]) { // If it is a fractional lag...
437 for (j = 0, *v_out = 0.; j < 4; j++)
439 } else
440 *v_out = *v_lag;
441
442 *v_out = *v_in + gain[i] * *v_out;
443
444 v_lag++;
445 v_out++;
446 }
447 } else {
448 memcpy(v_out, v_in, 40 * sizeof(float));
449 v_in += 40;
450 v_out += 40;
451 }
452 }
453
454 memmove(memory, memory + 160, 143 * sizeof(float));
455 return memory + 143;
456 }
457
458 /**
459 * Apply pitch synthesis filter and pitch prefilter to the scaled codebook vector.
460 * TIA/EIA/IS-733 2.4.5.2, 2.4.8.7.2
461 *
462 * @param q the context
463 * @param cdn_vector the scaled codebook vector
464 */
466 {
467 int i;
468 const float *v_synthesis_filtered, *v_pre_filtered;
469
472
474 // Compute gain & lag for the whole frame.
475 for (i = 0; i < 4; i++) {
477
479 }
480 } else {
481 float max_pitch_gain;
482
486 else
487 max_pitch_gain = 0.0;
488 } else {
490 max_pitch_gain = 1.0;
491 }
492 for (i = 0; i < 4; i++)
494
496 }
497
498 // pitch synthesis filter
502
503 // pitch prefilter update
504 for (i = 0; i < 4; i++)
506
508 v_synthesis_filtered,
511
513 } else {
518 }
519 }
520
521 /**
522 * Reconstruct LPC coefficients from the line spectral pair frequencies
523 * and perform bandwidth expansion.
524 *
525 * @param lspf line spectral pair frequencies
526 * @param lpc linear predictive coding coefficients
527 *
528 * @note: bandwidth_expansion_coeff could be precalculated into a table
529 * but it seems to be slower on x86
530 *
531 * TIA/EIA/IS-733 2.4.3.3.5
532 */
533 static void lspf2lpc(
const float *lspf,
float *lpc)
534 {
535 double lsp[10];
537 int i;
538
539 for (i = 0; i < 10; i++)
540 lsp[i] = cos(
M_PI * lspf[i]);
541
543
544 for (i = 0; i < 10; i++) {
545 lpc[i] *= bandwidth_expansion_coeff;
547 }
548 }
549
550 /**
551 * Interpolate LSP frequencies and compute LPC coefficients
552 * for a given bitrate & pitch subframe.
553 *
554 * TIA/EIA/IS-733 2.4.3.3.4, 2.4.8.7.2
555 *
556 * @param q the context
557 * @param curr_lspf LSP frequencies vector of the current frame
558 * @param lpc float vector for the resulting LPC
559 * @param subframe_num frame number in decoded stream
560 */
562 float *lpc, const int subframe_num)
563 {
564 float interpolated_lspf[10];
566
568 weight = 0.25 * (subframe_num + 1);
570 weight = 0.625;
571 else
572 weight = 1.0;
573
574 if (weight != 1.0) {
576 weight, 1.0 - weight, 10);
583 }
584
586 {
587 switch (buf_size) {
593 }
594
596 }
597
598 /**
599 * Determine the bitrate from the frame size and/or the first byte of the frame.
600 *
601 * @param avctx the AV codec context
602 * @param buf_size length of the buffer
603 * @param buf the bufffer
604 *
605 * @return the bitrate on success,
606 * I_F_Q if the bitrate cannot be satisfactorily determined
607 *
608 * TIA/EIA/IS-733 2.4.8.7.1
609 */
611 const int buf_size,
613 {
615
617 if (bitrate > **buf) {
621 "Claimed bitrate and buffer size mismatch.\n");
623 }
625 } else if (bitrate < **buf) {
627 "Buffer is too small for the claimed bitrate.\n");
629 }
630 (*buf)++;
633 "Bitrate byte is missing, guessing the bitrate from packet size.\n");
634 } else
636
638 // FIXME: Remove this warning when tested with samples.
640 }
641 return bitrate;
642 }
643
645 const char *message)
646 {
649 }
650
652 {
653 static const float pow_0_775[10] = {
654 0.775000, 0.600625, 0.465484, 0.360750, 0.279582,
655 0.216676, 0.167924, 0.130141, 0.100859, 0.078166
656 }, pow_0_625[10] = {
657 0.625000, 0.390625, 0.244141, 0.152588, 0.095367,
658 0.059605, 0.037253, 0.023283, 0.014552, 0.009095
659 };
660 float lpc_s[10], lpc_p[10], pole_out[170], zero_out[160];
662
663 for (n = 0; n < 10; n++) {
664 lpc_s[
n] = lpc[
n] * pow_0_625[
n];
665 lpc_p[
n] = lpc[
n] * pow_0_775[
n];
666 }
667
673
675
679 160),
681 }
682
684 int *got_frame_ptr,
AVPacket *avpkt)
685 {
687 int buf_size = avpkt->
size;
690 float *outbuffer;
692 float quantized_lspf[10], lpc[10];
693 float gain[16];
694 float *formant_mem;
695
696 /* get output buffer */
700 outbuffer = (
float *)frame->
data[0];
701
704 goto erasure;
705 }
706
710 goto erasure;
711 }
712
718
720
722
723 for (; bitmaps < bitmaps_end; bitmaps++)
725
726 // Check for erasures/blanks on rates 1, 1/4 and 1/8.
729 goto erasure;
730 }
734 goto erasure;
735 }
736
738 for (i = 0; i < 4; i++) {
741 goto erasure;
742 }
743 }
744 }
745 }
746
749
752 goto erasure;
753 }
754
756
758 erasure:
765 } else
767
769 for (i = 0; i < 4; i++) {
772 formant_mem += 40;
773 }
774
775 // postfilter, as per TIA/EIA/IS-733 2.4.8.6
777
779
782
783 *got_frame_ptr = 1;
784
785 return buf_size;
786 }
787
797 };