Go to the documentation of this file. 1 /*
2 * Copyright (c) 2020
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
26
28 {
29 switch (dt)
30 {
34 return sizeof(uint8_t);
35 default:
37 return 1;
38 }
39 }
40
42 {
45 int linesize[4] = { 0 };
46 void **dst_data =
NULL;
47 void *middle_data =
NULL;
48 uint8_t *planar_data[4] = { 0 };
52
54 if (bytewidth < 0) {
56 }
57 /* scale == 1 and mean == 0 and dt == UINT8: passthrough */
60 /* (scale == 255 or scale == 0) and mean == 0 and dt == FLOAT: normalization */
64 else {
68 }
69
74 if (!middle_data) {
76 goto err;
77 }
78 dst_data = &middle_data;
80 }
81
87 src_fmt,
92 if (!sws_ctx) {
94 "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",
98 goto err;
99 }
100 sws_scale(sws_ctx, (
const uint8_t *[4]){(
const uint8_t *)
output->data, 0, 0, 0},
102 (uint8_t * const*)dst_data, linesize);
104 // convert data from planar to packed
113 if (!sws_ctx) {
114 av_log(log_ctx,
AV_LOG_ERROR,
"Impossible to create scale context for the conversion "
115 "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",
119 goto err;
120 }
122 planar_data[0] = (uint8_t *)middle_data + plane_size;
123 planar_data[1] = (uint8_t *)middle_data + plane_size * 2;
124 planar_data[2] = (uint8_t *)middle_data;
126 planar_data[0] = (uint8_t *)middle_data + plane_size;
127 planar_data[1] = (uint8_t *)middle_data;
128 planar_data[2] = (uint8_t *)middle_data + plane_size * 2;
129 }
130 sws_scale(sws_ctx, (
const uint8_t *
const *)planar_data,
131 (
const int [4]){
frame->
width *
sizeof(uint8_t),
136 }
137 break;
142 break;
157 if (!sws_ctx) {
158 av_log(log_ctx,
AV_LOG_ERROR,
"Impossible to create scale context for the conversion "
159 "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",
163 goto err;
164 }
165 sws_scale(sws_ctx, (
const uint8_t *[4]){(
const uint8_t *)
output->data, 0, 0, 0},
169 break;
170 default:
173 goto err;
174 }
175
176 err:
179 }
180
182 {
185 int linesize[4] = { 0 };
186 void **src_data =
NULL;
187 void *middle_data =
NULL;
188 uint8_t *planar_data[4] = { 0 };
193 if (bytewidth < 0) {
195 }
196 /* scale == 1 and mean == 0 and dt == UINT8: passthrough */
199 /* (scale == 255 or scale == 0) and mean == 0 and dt == FLOAT: normalization */
203 else {
205 "scale: %f, mean: %f\n",
input->scale,
input->mean);
207 }
208
213 if (!middle_data) {
215 goto err;
216 }
217 src_data = &middle_data;
219 }
220
224 // convert data from planar to packed
233 if (!sws_ctx) {
234 av_log(log_ctx,
AV_LOG_ERROR,
"Impossible to create scale context for the conversion "
235 "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",
239 goto err;
240 }
242 planar_data[0] = (uint8_t *)middle_data + plane_size;
243 planar_data[1] = (uint8_t *)middle_data + plane_size * 2;
244 planar_data[2] = (uint8_t *)middle_data;
246 planar_data[0] = (uint8_t *)middle_data + plane_size;
247 planar_data[1] = (uint8_t *)middle_data;
248 planar_data[2] = (uint8_t *)middle_data + plane_size * 2;
249 }
252 (const int [4]){frame->width * sizeof(uint8_t),
253 frame->width * sizeof(uint8_t),
254 frame->width * sizeof(uint8_t), 0});
256 }
262 dst_fmt,
264 if (!sws_ctx) {
265 av_log(log_ctx,
AV_LOG_ERROR,
"Impossible to create scale context for the conversion "
266 "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",
270 goto err;
271 }
272 sws_scale(sws_ctx, (
const uint8_t **)src_data,
274 (uint8_t * const [4]){input->data, 0, 0, 0},
275 (const int [4]){frame->width * 3 * dst_datatype_size, 0, 0, 0});
277 break;
282 break;
295 dst_fmt,
297 if (!sws_ctx) {
298 av_log(log_ctx,
AV_LOG_ERROR,
"Impossible to create scale context for the conversion "
299 "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",
303 goto err;
304 }
307 (uint8_t * const [4]){input->data, 0, 0, 0},
308 (const int [4]){frame->width * dst_datatype_size, 0, 0, 0});
310 break;
311 default:
314 goto err;
315 }
316 err:
319 }
320
322 {
324 switch (
data->order) {
329 default:
330 av_assert0(!
"unsupported data pixel format.\n");
332 }
333 }
334
337 }
338
340 {
342 int offsetx[4], offsety[4];
343 uint8_t *bbox_data[4];
345 int linesizes[4];
349 int width_idx, height_idx;
354
355 /* (scale != 1 and scale != 0) or mean != 0 */
359 "scale: %f, mean: %f\n",
input->scale,
input->mean);
361 }
362
364 av_log(log_ctx,
AV_LOG_ERROR,
"dnn_classify input data doesn't support layout: NCHW\n");
366 }
367
370
373
378
381 input->dims[width_idx],
382 input->dims[height_idx], fmt,
384 if (!sws_ctx) {
386 "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",
389 input->dims[width_idx],
390 input->dims[height_idx]);
392 }
393
396 av_log(log_ctx,
AV_LOG_ERROR,
"unable to get linesizes with av_image_fill_linesizes");
399 }
400
403 offsetx[0] = offsetx[3] =
left;
404
406 offsety[0] = offsety[3] = top;
407
410
413 (uint8_t *const [4]){input->data, 0, 0, 0}, linesizes);
414
416
418 }
419
421 {
423 int linesizes[4];
424 int ret = 0, width_idx, height_idx;
426
427 /* (scale != 1 and scale != 0) or mean != 0 */
431 "scale: %f, mean: %f\n",
input->scale,
input->mean);
433 }
434
436 av_log(log_ctx,
AV_LOG_ERROR,
"dnn_detect input data doesn't support layout: NCHW\n");
438 }
439
442
444 input->dims[width_idx],
445 input->dims[height_idx], fmt,
447 if (!sws_ctx) {
448 av_log(log_ctx,
AV_LOG_ERROR,
"Impossible to create scale context for the conversion "
449 "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",
452 input->dims[height_idx]);
454 }
455
458 av_log(log_ctx,
AV_LOG_ERROR,
"unable to get linesizes with av_image_fill_linesizes");
461 }
462
464 (uint8_t *const [4]){input->data, 0, 0, 0}, linesizes);
465
468 }
Tag MUST be and< 10hcoeff half pel interpolation filter coefficients, hcoeff[0] are the 2 middle coefficients[1] are the next outer ones and so on, resulting in a filter like:...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2] ... the sign of the coefficients is not explicitly stored but alternates after each coeff and coeff[0] is positive, so ...,+,-,+,-,+,+,-,+,-,+,... hcoeff[0] is not explicitly stored but found by subtracting the sum of all stored coefficients with signs from 32 hcoeff[0]=32 - hcoeff[1] - hcoeff[2] - ... a good choice for hcoeff and htaps is htaps=6 hcoeff={40,-10, 2} an alternative which requires more computations at both encoder and decoder side and may or may not be better is htaps=8 hcoeff={42,-14, 6,-2}ref_frames minimum of the number of available reference frames and max_ref_frames for example the first frame after a key frame always has ref_frames=1spatial_decomposition_type wavelet type 0 is a 9/7 symmetric compact integer wavelet 1 is a 5/3 symmetric compact integer wavelet others are reserved stored as delta from last, last is reset to 0 if always_reset||keyframeqlog quality(logarithmic quantizer scale) stored as delta from last, last is reset to 0 if always_reset||keyframemv_scale stored as delta from last, last is reset to 0 if always_reset||keyframe FIXME check that everything works fine if this changes between framesqbias dequantization bias stored as delta from last, last is reset to 0 if always_reset||keyframeblock_max_depth maximum depth of the block tree stored as delta from last, last is reset to 0 if always_reset||keyframequant_table quantization tableHighlevel bitstream structure:==============================--------------------------------------------|Header|--------------------------------------------|------------------------------------|||Block0||||split?||||yes no||||......... intra?||||:Block01 :yes no||||:Block02 :....... ..........||||:Block03 ::y DC ::ref index:||||:Block04 ::cb DC ::motion x :||||......... :cr DC ::motion y :||||....... ..........|||------------------------------------||------------------------------------|||Block1|||...|--------------------------------------------|------------ ------------ ------------|||Y subbands||Cb subbands||Cr subbands||||--- ---||--- ---||--- ---|||||LL0||HL0||||LL0||HL0||||LL0||HL0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||LH0||HH0||||LH0||HH0||||LH0||HH0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HL1||LH1||||HL1||LH1||||HL1||LH1|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HH1||HL2||||HH1||HL2||||HH1||HL2|||||...||...||...|||------------ ------------ ------------|--------------------------------------------Decoding process:=================------------|||Subbands|------------||||------------|Intra DC||||LL0 subband prediction ------------|\ Dequantization ------------------- \||Reference frames|\ IDWT|------- -------|Motion \|||Frame 0||Frame 1||Compensation . OBMC v -------|------- -------|--------------. \------> Frame n output Frame Frame<----------------------------------/|...|------------------- Range Coder:============Binary Range Coder:------------------- The implemented range coder is an adapted version based upon "Range encoding: an algorithm for removing redundancy from a digitised message." by G. N. N. Martin. The symbols encoded by the Snow range coder are bits(0|1). The associated probabilities are not fix but change depending on the symbol mix seen so far. bit seen|new state ---------+----------------------------------------------- 0|256 - state_transition_table[256 - old_state];1|state_transition_table[old_state];state_transition_table={ 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};FIXME Range Coding of integers:------------------------- FIXME Neighboring Blocks:===================left and top are set to the respective blocks unless they are outside of the image in which case they are set to the Null block top-left is set to the top left block unless it is outside of the image in which case it is set to the left block if this block has no larger parent block or it is at the left side of its parent block and the top right block is not outside of the image then the top right block is used for top-right else the top-left block is used Null block y, cb, cr are 128 level, ref, mx and my are 0 Motion Vector Prediction:=========================1. the motion vectors of all the neighboring blocks are scaled to compensate for the difference of reference frames scaled_mv=(mv *(256 *(current_reference+1)/(mv.reference+1))+128)> the median of the scaled left