1 /*
2 * The simplest mpeg audio layer 2 encoder
3 * Copyright (c) 2000, 2001 Fabrice Bellard
4 *
5 * This file is part of FFmpeg.
6 *
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22 /**
23 * @file
24 * The simplest mpeg audio layer 2 encoder.
25 */
26
28
33
34 #define FRAC_BITS 15 /* fractional bits for sb_samples and dct */
35 #define WFRAC_BITS 14 /* fractional bits for window */
36
41
42 /* currently, cannot change these constants (need to modify
43 quantization stage) */
44 #define MUL(a,b) (((int64_t)(a) * (int64_t)(b)) >> FRAC_BITS)
45
46 #define SAMPLES_BUF_SIZE 4096
47
51 int lsf;
/* 1 if mpeg2 low bitrate selected */
54 int frame_size;
/* frame size, in bits, without padding */
55 /* padding computation */
61 /* code to group 3 scale factors */
63 int sblimit;
/* number of used subbands */
68 #if USE_FLOATS
69 float scale_factor_inv_table[64];
70 #else
73 #endif
76
78 {
85
89 }
94
95 /* encoding freq */
99 break;
102 break;
103 }
104 }
108 }
110
111 /* encoding bitrate & frequency */
114 break;
115 }
120 }
124 }
125 s->bitrate_index =
i;
126
127 /* compute total header size & pad bit */
128
130 s->frame_size = ((
int)
a) * 8;
131
132 /* frame fractional size to compute padding */
134 s->frame_frac_incr = (
int)((
a -
floor(
a)) * 65536.0);
135
136 /* select the right allocation table */
138
139 /* number of used subbands */
142
143 ff_dlog(avctx,
"%d kb/s, %d Hz, frame_size=%d bits, table=%d, padincr=%x\n",
145
146 for(
i=0;
i<
s->nb_channels;
i++)
147 s->samples_offset[
i] = 0;
148
150 int v;
152 #if WFRAC_BITS != 16
154 #endif
155 s->filter_bank[
i] = v;
157 v = -v;
159 s->filter_bank[512 -
i] = v;
160 }
161
163 v = (
int)(
exp2((3 -
i) / 3.0) * (1 << 20));
164 if (v <= 0)
165 v = 1;
166 s->scale_factor_table[
i] = v;
167 #if USE_FLOATS
168 s->scale_factor_inv_table[
i] =
exp2(-(3 -
i) / 3.0) / (float)(1 << 20);
169 #else
170 #define P 15
171 s->scale_factor_shift[
i] = 21 -
P - (
i / 3);
172 s->scale_factor_mult[
i] = (1 <<
P) *
exp2((
i % 3) / 3.0);
173 #endif
174 }
177 if (v <= -3)
178 v = 0;
179 else if (v < 0)
180 v = 1;
181 else if (v == 0)
182 v = 2;
183 else if (v < 3)
184 v = 3;
185 else
186 v = 4;
187 s->scale_diff_table[
i] = v;
188 }
189
192 if (v < 0)
193 v = -v;
194 else
195 v = v * 3;
196 s->total_quant_bits[
i] = 12 * v;
197 }
198
199 return 0;
200 }
201
202 /* 32 point floating point IDCT without 1/sqrt(2) coef zero scaling */
204 {
208
209 for(j=31;j>=3;j-=2)
tab[j] +=
tab[j - 2];
210
213 do {
214 t[0] += t[-4];
215 t[1] += t[1 - 4];
216 t -= 4;
218
221 do {
222 t[0] += t[-8];
223 t[1] += t[1-8];
224 t[2] += t[2-8];
225 t[3] += t[3-8];
226 t -= 8;
228
231 do {
232 t[ 3] = -t[ 3];
233 t[ 6] = -t[ 6];
234
235 t[11] = -t[11];
236 t[12] = -t[12];
237 t[13] = -t[13];
238 t[15] = -t[15];
239 t += 16;
241
242
245 do {
246 int x1, x2, x3, x4;
247
249 x4 = t[0] - x3;
250 x3 = t[0] + x3;
251
253 x1 =
MUL((t[8] - x2), xp[0]);
254 x2 =
MUL((t[8] + x2), xp[1]);
255
256 t[ 0] = x3 + x1;
257 t[ 8] = x4 - x2;
258 t[16] = x4 + x2;
259 t[24] = x3 - x1;
260 t++;
262
263 xp += 2;
266 do {
267 xr =
MUL(t[28],xp[0]);
268 t[28] = (t[0] - xr);
269 t[0] = (t[0] + xr);
270
271 xr =
MUL(t[4],xp[1]);
272 t[ 4] = (t[24] - xr);
273 t[24] = (t[24] + xr);
274
275 xr =
MUL(t[20],xp[2]);
276 t[20] = (t[8] - xr);
277 t[ 8] = (t[8] + xr);
278
279 xr =
MUL(t[12],xp[3]);
280 t[12] = (t[16] - xr);
281 t[16] = (t[16] + xr);
282 t++;
284 xp += 4;
285
286 for (
i = 0;
i < 4;
i++) {
290
294
298
302
303 xp += 2;
304 }
305
308 do {
309 xr =
MUL(
t1[0], *xp);
311 t[0] = (t[0] + xr);
312 t -= 2;
314 xp++;
316
319 }
320 }
321
322 #define WSHIFT (WFRAC_BITS + 15 - FRAC_BITS)
323
325 {
326 short *p, *q;
329 int tmp1[32];
331
332 offset =
s->samples_offset[ch];
333 out = &
s->sb_samples[ch][0][0][0];
334 for(j=0;j<36;j++) {
335 /* 32 samples at once */
339 }
340
341 /* filter */
342 p =
s->samples_buf[ch] +
offset;
344 /* maxsum = 23169 */
346 sum = p[0*64] * q[0*64];
347 sum += p[1*64] * q[1*64];
348 sum += p[2*64] * q[2*64];
349 sum += p[3*64] * q[3*64];
350 sum += p[4*64] * q[4*64];
351 sum += p[5*64] * q[5*64];
352 sum += p[6*64] * q[6*64];
353 sum += p[7*64] * q[7*64];
355 p++;
356 q++;
357 }
361
363
364 /* advance of 32 samples */
367 /* handle the wrap around */
370 s->samples_buf[ch], (512 - 32) * 2);
372 }
373 }
374 s->samples_offset[ch] =
offset;
375 }
376
378 unsigned char scale_code[
SBLIMIT],
379 unsigned char scale_factors[
SBLIMIT][3],
380 int sb_samples[3][12][
SBLIMIT],
381 int sblimit)
382 {
383 int *p, vmax, v, n,
i, j, k,
code;
385 unsigned char *sf = &scale_factors[0][0];
386
387 for(j=0;j<sblimit;j++) {
389 /* find the max absolute value */
390 p = &sb_samples[
i][0][j];
392 for(k=1;k<12;k++) {
395 if (v > vmax)
396 vmax = v;
397 }
398 /* compute the scale factor index using log 2 computations */
399 if (vmax > 1) {
401 /* n is the position of the MSB of vmax. now
402 use at most 2 compares to find the index */
403 index = (21 - n) * 3 - 3;
405 while (vmax <= s->scale_factor_table[
index+1])
407 } else {
408 index = 0;
/* very unlikely case of overflow */
409 }
410 } else {
411 index = 62;
/* value 63 is not allowed */
412 }
413
416 /* store the scale factor */
419 }
420
421 /* compute the transmission factor : look if the scale factors
422 are close enough to each other */
423 d1 =
s->scale_diff_table[sf[0] - sf[1] + 64];
424 d2 =
s->scale_diff_table[sf[1] - sf[2] + 64];
425
426 /* handle the 25 cases */
427 switch(d1 * 5 + d2) {
428 case 0*5+0:
429 case 0*5+4:
430 case 3*5+4:
431 case 4*5+0:
432 case 4*5+4:
434 break;
435 case 0*5+1:
436 case 0*5+2:
437 case 4*5+1:
438 case 4*5+2:
440 sf[2] = sf[1];
441 break;
442 case 0*5+3:
443 case 4*5+3:
445 sf[1] = sf[2];
446 break;
447 case 1*5+0:
448 case 1*5+4:
449 case 2*5+4:
451 sf[1] = sf[0];
452 break;
453 case 1*5+1:
454 case 1*5+2:
455 case 2*5+0:
456 case 2*5+1:
457 case 2*5+2:
459 sf[1] = sf[2] = sf[0];
460 break;
461 case 2*5+3:
462 case 3*5+3:
464 sf[0] = sf[1] = sf[2];
465 break;
466 case 3*5+0:
467 case 3*5+1:
468 case 3*5+2:
470 sf[0] = sf[2] = sf[1];
471 break;
472 case 1*5+3:
474 if (sf[0] > sf[2])
475 sf[0] = sf[2];
476 sf[1] = sf[2] = sf[0];
477 break;
478 default:
480 code = 0;
/* kill warning */
481 }
482
484 sf[0], sf[1], sf[2], d1, d2,
code);
485 scale_code[j] =
code;
486 sf += 3;
487 }
488 }
489
490 /* The most important function : psycho acoustic module. In this
491 encoder there is basically none, so this is the worst you can do,
492 but also this is the simpler. */
494 {
496
497 for(
i=0;
i<
s->sblimit;
i++) {
499 }
500 }
501
502
503 #define SB_NOTALLOCATED 0
504 #define SB_ALLOCATED 1
506
507 /* Try to maximize the smr while using a number of bits inferior to
508 the frame size. I tried to make the code simpler, faster and
509 smaller than other encoders :-) */
513 int *padding)
514 {
515 int i, ch,
b, max_smr, max_ch, max_sb, current_frame_size, max_frame_size;
516 int incr;
519 const unsigned char *alloc;
520
521 memcpy(smr, smr1,
s->nb_channels *
sizeof(
short) *
SBLIMIT);
524
525 /* compute frame size and padding */
526 max_frame_size =
s->frame_size;
527 s->frame_frac +=
s->frame_frac_incr;
528 if (
s->frame_frac >= 65536) {
529 s->frame_frac -= 65536;
531 max_frame_size += 8;
532 } else {
534 }
535
536 /* compute the header + bit alloc size */
537 current_frame_size = 32;
538 alloc =
s->alloc_table;
539 for(
i=0;
i<
s->sblimit;
i++) {
540 incr = alloc[0];
541 current_frame_size += incr *
s->nb_channels;
542 alloc += 1 << incr;
543 }
544 for(;;) {
545 /* look for the subband with the largest signal to mask ratio */
546 max_sb = -1;
547 max_ch = -1;
548 max_smr = INT_MIN;
549 for(ch=0;ch<
s->nb_channels;ch++) {
550 for(
i=0;
i<
s->sblimit;
i++) {
551 if (smr[ch][
i] > max_smr && subband_status[ch][
i] !=
SB_NOMORE) {
552 max_smr = smr[ch][
i];
554 max_ch = ch;
555 }
556 }
557 }
558 if (max_sb < 0)
559 break;
560 ff_dlog(
NULL,
"current=%d max=%d max_sb=%d max_ch=%d alloc=%d\n",
561 current_frame_size, max_frame_size, max_sb, max_ch,
563
564 /* find alloc table entry (XXX: not optimal, should use
565 pointer table) */
566 alloc =
s->alloc_table;
567 for(
i=0;
i<max_sb;
i++) {
568 alloc += 1 << alloc[0];
569 }
570
572 /* nothing was coded for this band: add the necessary bits */
574 incr +=
s->total_quant_bits[alloc[1]];
575 } else {
576 /* increments bit allocation */
578 incr =
s->total_quant_bits[alloc[
b + 1]] -
579 s->total_quant_bits[alloc[
b]];
580 }
581
582 if (current_frame_size + incr <= max_frame_size) {
583 /* can increase size */
585 current_frame_size += incr;
586 /* decrease smr by the resolution we added */
587 smr[max_ch][max_sb] = smr1[max_ch][max_sb] -
quant_snr[alloc[
b]];
588 /* max allocation size reached ? */
589 if (
b == ((1 << alloc[0]) - 1))
590 subband_status[max_ch][max_sb] =
SB_NOMORE;
591 else
593 } else {
594 /* cannot increase the size of this subband */
595 subband_status[max_ch][max_sb] =
SB_NOMORE;
596 }
597 }
598 *padding = max_frame_size - current_frame_size;
600 }
601
602 /*
603 * Output the MPEG audio layer 2 frame. Note how the code is small
604 * compared to other encoders :-)
605 */
608 int padding)
609 {
610 int i, j, k, l, bit_alloc_bits,
b, ch;
611 unsigned char *sf;
612 int q[3];
614
615 /* header */
616
618 put_bits(p, 1, 1 -
s->lsf);
/* 1 = MPEG-1 ID, 0 = MPEG-2 lsf ID */
620 put_bits(p, 1, 1);
/* no error protection */
623 put_bits(p, 1,
s->do_padding);
/* use padding */
624 put_bits(p, 1, 0);
/* private_bit */
627 put_bits(p, 1, 0);
/* no copyright */
629 put_bits(p, 2, 0);
/* no emphasis */
630
631 /* bit allocation */
632 j = 0;
633 for(
i=0;
i<
s->sblimit;
i++) {
634 bit_alloc_bits =
s->alloc_table[j];
635 for(ch=0;ch<
s->nb_channels;ch++) {
637 }
638 j += 1 << bit_alloc_bits;
639 }
640
641 /* scale codes */
642 for(
i=0;
i<
s->sblimit;
i++) {
643 for(ch=0;ch<
s->nb_channels;ch++) {
646 }
647 }
648
649 /* scale factors */
650 for(
i=0;
i<
s->sblimit;
i++) {
651 for(ch=0;ch<
s->nb_channels;ch++) {
653 sf = &
s->scale_factors[ch][
i][0];
654 switch(
s->scale_code[ch][
i]) {
655 case 0:
659 break;
660 case 3:
661 case 1:
664 break;
665 case 2:
667 break;
668 }
669 }
670 }
671 }
672
673 /* quantization & write sub band samples */
674
675 for(k=0;k<3;k++) {
676 for(l=0;l<12;l+=3) {
677 j = 0;
678 for(
i=0;
i<
s->sblimit;
i++) {
679 bit_alloc_bits =
s->alloc_table[j];
680 for(ch=0;ch<
s->nb_channels;ch++) {
684 /* we encode 3 sub band samples of the same sub band at a time */
685 qindex =
s->alloc_table[j+
b];
687 for(m=0;m<3;m++) {
688 sample =
s->sb_samples[ch][k][l + m][
i];
689 /* divide by scale factor */
690 #if USE_FLOATS
691 {
693 a = (float)
sample *
s->scale_factor_inv_table[
s->scale_factors[ch][
i][k]];
694 q[m] = (
int)((
a + 1.0) * steps * 0.5);
695 }
696 #else
697 {
699 e =
s->scale_factors[ch][
i][k];
700 shift =
s->scale_factor_shift[e];
701 mult =
s->scale_factor_mult[e];
702
703 /* normalize to P bits */
706 else
712 q[m] = (
q1 * (unsigned)steps) >> (
P + 1);
713 }
714 #endif
715 if (q[m] >= steps)
716 q[m] = steps - 1;
718 }
721 /* group the 3 values to save bits */
723 q[0] + steps * (q[1] + steps * q[2]));
724 } else {
728 }
729 }
730 }
731 /* next subband in alloc table */
732 j += 1 << bit_alloc_bits;
733 }
734 }
735 }
736
737 /* padding */
738 for(
i=0;
i<padding;
i++)
740 }
741
744 {
746 const int16_t *
samples = (
const int16_t *)
frame->data[0];
750
753 }
754
755 for(
i=0;
i<
s->nb_channels;
i++) {
757 s->sb_samples[
i],
s->sblimit);
758 }
759 for(
i=0;
i<
s->nb_channels;
i++) {
761 }
763
766
768
770
771 /* flush */
774
777
778 *got_packet_ptr = 1;
779 return 0;
780 }
781
783 { "b", "0" },
785 };
786