FFmpeg: libavfilter/dnn/dnn_io_proc.c Source File

FFmpeg

[フレーム]

dnn_io_proc.c

Go to the documentation of this file.

1 /*

3 *

4 * This file is part of FFmpeg.

5 *

6 * FFmpeg is free software; you can redistribute it and/or

7 * modify it under the terms of the GNU Lesser General Public

8 * License as published by the Free Software Foundation; either

9 * version 2.1 of the License, or (at your option) any later version.

10 *

11 * FFmpeg is distributed in the hope that it will be useful,

12 * but WITHOUT ANY WARRANTY; without even the implied warranty of

13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

14 * Lesser General Public License for more details.

15 *

16 * You should have received a copy of the GNU Lesser General Public

17 * License along with FFmpeg; if not, write to the Free Software

18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

19 */

21 #include "dnn_io_proc.h"

22 #include "libavutil/imgutils.h"

23 #include "libswscale/swscale.h"

24 #include "libavutil/avassert.h"

25 #include "libavutil/detection_bbox.h"

27 static int get_datatype_size(DNNDataType dt)

28 {

29 switch (dt)

30 {

31 case DNN_FLOAT:

32 return sizeof(float);

33 case DNN_UINT8:

34 return sizeof(uint8_t);

35 default:

36 av_assert0(!"not supported yet.");

37 return 1;

38 }

39 }

41 int ff_proc_from_dnn_to_frame(AVFrame *frame, DNNData *output, void *log_ctx)

42 {

43 struct SwsContext *sws_ctx;

44 int ret = 0;

45 int linesize[4] = { 0 };

46 void **dst_data = NULL;

47 void *middle_data = NULL;

48 uint8_t *planar_data[4] = { 0 };

49 int plane_size = frame->width * frame->height * sizeof(uint8_t);

50 enum AVPixelFormat src_fmt = AV_PIX_FMT_NONE;

51 int src_datatype_size = get_datatype_size(output->dt);

53 int bytewidth = av_image_get_linesize(frame->format, frame->width, 0);

54 if (bytewidth < 0) {

55 return AVERROR(EINVAL);

56 }

57 /* scale == 1 and mean == 0 and dt == UINT8: passthrough */

58 if (fabsf(output->scale - 1) < 1e-6f && fabsf(output->mean) < 1e-6 && output->dt == DNN_UINT8)

59 src_fmt = AV_PIX_FMT_GRAY8;

60 /* (scale == 255 or scale == 0) and mean == 0 and dt == FLOAT: normalization */

61 else if ((fabsf(output->scale - 255) < 1e-6f || fabsf(output->scale) < 1e-6f) &&

62 fabsf(output->mean) < 1e-6 && output->dt == DNN_FLOAT)

63 src_fmt = AV_PIX_FMT_GRAYF32;

64 else {

65 av_log(log_ctx, AV_LOG_ERROR, "dnn_process output data doesn't type: UINT8 "

66 "scale: %f, mean: %f\n", output->scale, output->mean);

67 return AVERROR(ENOSYS);

68 }

70 dst_data = (void **)frame->data;

71 linesize[0] = frame->linesize[0];

72 if (output->layout == DL_NCHW) {

73 middle_data = av_malloc(plane_size * output->dims[1]);

74 if (!middle_data) {

75 ret = AVERROR(ENOMEM);

76 goto err;

77 }

78 dst_data = &middle_data;

79 linesize[0] = frame->width * 3;

80 }

82 switch (frame->format) {

83 case AV_PIX_FMT_RGB24:

84 case AV_PIX_FMT_BGR24:

85 sws_ctx = sws_getContext(frame->width * 3,

86 frame->height,

87 src_fmt,

88 frame->width * 3,

89 frame->height,

90 AV_PIX_FMT_GRAY8,

91 0, NULL, NULL, NULL);

92 if (!sws_ctx) {

93 av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion "

94 "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",

95 av_get_pix_fmt_name(src_fmt), frame->width * 3, frame->height,

96 av_get_pix_fmt_name(AV_PIX_FMT_GRAY8), frame->width * 3, frame->height);

97 ret = AVERROR(EINVAL);

98 goto err;

99 }

100 sws_scale(sws_ctx, (const uint8_t *[4]){(const uint8_t *)output->data, 0, 0, 0},

101 (const int[4]){frame->width * 3 * src_datatype_size, 0, 0, 0}, 0, frame->height,

102 (uint8_t * const*)dst_data, linesize);

103 sws_freeContext(sws_ctx);

104 // convert data from planar to packed

105 if (output->layout == DL_NCHW) {

106 sws_ctx = sws_getContext(frame->width,

107 frame->height,

108 AV_PIX_FMT_GBRP,

109 frame->width,

110 frame->height,

111 frame->format,

112 0, NULL, NULL, NULL);

113 if (!sws_ctx) {

114 av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion "

115 "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",

116 av_get_pix_fmt_name(AV_PIX_FMT_GBRP), frame->width, frame->height,

117 av_get_pix_fmt_name(frame->format),frame->width, frame->height);

118 ret = AVERROR(EINVAL);

119 goto err;

120 }

121 if (frame->format == AV_PIX_FMT_RGB24) {

122 planar_data[0] = (uint8_t *)middle_data + plane_size;

123 planar_data[1] = (uint8_t *)middle_data + plane_size * 2;

124 planar_data[2] = (uint8_t *)middle_data;

125 } else if (frame->format == AV_PIX_FMT_BGR24) {

126 planar_data[0] = (uint8_t *)middle_data + plane_size;

127 planar_data[1] = (uint8_t *)middle_data;

128 planar_data[2] = (uint8_t *)middle_data + plane_size * 2;

129 }

130 sws_scale(sws_ctx, (const uint8_t * const *)planar_data,

131 (const int [4]){frame->width * sizeof(uint8_t),

132 frame->width * sizeof(uint8_t),

133 frame->width * sizeof(uint8_t), 0},

134 0, frame->height, frame->data, frame->linesize);

135 sws_freeContext(sws_ctx);

136 }

137 break;

138 case AV_PIX_FMT_GRAYF32:

139 av_image_copy_plane(frame->data[0], frame->linesize[0],

140 output->data, bytewidth,

141 bytewidth, frame->height);

142 break;

143 case AV_PIX_FMT_YUV420P:

144 case AV_PIX_FMT_YUV422P:

145 case AV_PIX_FMT_YUV444P:

146 case AV_PIX_FMT_YUV410P:

147 case AV_PIX_FMT_YUV411P:

148 case AV_PIX_FMT_GRAY8:

149 case AV_PIX_FMT_NV12:

150 sws_ctx = sws_getContext(frame->width,

151 frame->height,

152 AV_PIX_FMT_GRAYF32,

153 frame->width,

154 frame->height,

155 AV_PIX_FMT_GRAY8,

156 0, NULL, NULL, NULL);

157 if (!sws_ctx) {

158 av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion "

159 "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",

160 av_get_pix_fmt_name(src_fmt), frame->width, frame->height,

161 av_get_pix_fmt_name(AV_PIX_FMT_GRAY8), frame->width, frame->height);

162 ret = AVERROR(EINVAL);

163 goto err;

164 }

165 sws_scale(sws_ctx, (const uint8_t *[4]){(const uint8_t *)output->data, 0, 0, 0},

166 (const int[4]){frame->width * src_datatype_size, 0, 0, 0}, 0, frame->height,

167 (uint8_t * const*)frame->data, frame->linesize);

168 sws_freeContext(sws_ctx);

169 break;

170 default:

171 avpriv_report_missing_feature(log_ctx, "%s", av_get_pix_fmt_name(frame->format));

172 ret = AVERROR(ENOSYS);

173 goto err;

174 }

175

176 err:

177 av_free(middle_data);

178 return ret;

179 }

180

181 int ff_proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, void *log_ctx)

182 {

183 struct SwsContext *sws_ctx;

184 int ret = 0;

185 int linesize[4] = { 0 };

186 void **src_data = NULL;

187 void *middle_data = NULL;

188 uint8_t *planar_data[4] = { 0 };

189 int plane_size = frame->width * frame->height * sizeof(uint8_t);

190 enum AVPixelFormat dst_fmt = AV_PIX_FMT_NONE;

191 int dst_datatype_size = get_datatype_size(input->dt);

192 int bytewidth = av_image_get_linesize(frame->format, frame->width, 0);

193 if (bytewidth < 0) {

194 return AVERROR(EINVAL);

195 }

196 /* scale == 1 and mean == 0 and dt == UINT8: passthrough */

197 if (fabsf(input->scale - 1) < 1e-6f && fabsf(input->mean) < 1e-6 && input->dt == DNN_UINT8)

198 dst_fmt = AV_PIX_FMT_GRAY8;

199 /* (scale == 255 or scale == 0) and mean == 0 and dt == FLOAT: normalization */

200 else if ((fabsf(input->scale - 255) < 1e-6f || fabsf(input->scale) < 1e-6f) &&

201 fabsf(input->mean) < 1e-6 && input->dt == DNN_FLOAT)

202 dst_fmt = AV_PIX_FMT_GRAYF32;

203 else {

204 av_log(log_ctx, AV_LOG_ERROR, "dnn_process input data doesn't support type: UINT8 "

205 "scale: %f, mean: %f\n", input->scale, input->mean);

206 return AVERROR(ENOSYS);

207 }

208

209 src_data = (void **)frame->data;

210 linesize[0] = frame->linesize[0];

211 if (input->layout == DL_NCHW) {

212 middle_data = av_malloc(plane_size * input->dims[1]);

213 if (!middle_data) {

214 ret = AVERROR(ENOMEM);

215 goto err;

216 }

217 src_data = &middle_data;

218 linesize[0] = frame->width * 3;

219 }

220

221 switch (frame->format) {

222 case AV_PIX_FMT_RGB24:

223 case AV_PIX_FMT_BGR24:

224 // convert data from planar to packed

225 if (input->layout == DL_NCHW) {

226 sws_ctx = sws_getContext(frame->width,

227 frame->height,

228 frame->format,

229 frame->width,

230 frame->height,

231 AV_PIX_FMT_GBRP,

232 0, NULL, NULL, NULL);

233 if (!sws_ctx) {

234 av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion "

235 "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",

236 av_get_pix_fmt_name(frame->format), frame->width, frame->height,

237 av_get_pix_fmt_name(AV_PIX_FMT_GBRP),frame->width, frame->height);

238 ret = AVERROR(EINVAL);

239 goto err;

240 }

241 if (frame->format == AV_PIX_FMT_RGB24) {

242 planar_data[0] = (uint8_t *)middle_data + plane_size;

243 planar_data[1] = (uint8_t *)middle_data + plane_size * 2;

244 planar_data[2] = (uint8_t *)middle_data;

245 } else if (frame->format == AV_PIX_FMT_BGR24) {

246 planar_data[0] = (uint8_t *)middle_data + plane_size;

247 planar_data[1] = (uint8_t *)middle_data;

248 planar_data[2] = (uint8_t *)middle_data + plane_size * 2;

249 }

250 sws_scale(sws_ctx, (const uint8_t * const *)frame->data,

251 frame->linesize, 0, frame->height, planar_data,

252 (const int [4]){frame->width * sizeof(uint8_t),

253 frame->width * sizeof(uint8_t),

254 frame->width * sizeof(uint8_t), 0});

255 sws_freeContext(sws_ctx);

256 }

257 sws_ctx = sws_getContext(frame->width * 3,

258 frame->height,

259 AV_PIX_FMT_GRAY8,

260 frame->width * 3,

261 frame->height,

262 dst_fmt,

263 0, NULL, NULL, NULL);

264 if (!sws_ctx) {

265 av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion "

266 "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",

267 av_get_pix_fmt_name(AV_PIX_FMT_GRAY8), frame->width * 3, frame->height,

268 av_get_pix_fmt_name(dst_fmt),frame->width * 3, frame->height);

269 ret = AVERROR(EINVAL);

270 goto err;

271 }

272 sws_scale(sws_ctx, (const uint8_t **)src_data,

273 linesize, 0, frame->height,

274 (uint8_t * const [4]){input->data, 0, 0, 0},

275 (const int [4]){frame->width * 3 * dst_datatype_size, 0, 0, 0});

276 sws_freeContext(sws_ctx);

277 break;

278 case AV_PIX_FMT_GRAYF32:

279 av_image_copy_plane(input->data, bytewidth,

280 frame->data[0], frame->linesize[0],

281 bytewidth, frame->height);

282 break;

283 case AV_PIX_FMT_YUV420P:

284 case AV_PIX_FMT_YUV422P:

285 case AV_PIX_FMT_YUV444P:

286 case AV_PIX_FMT_YUV410P:

287 case AV_PIX_FMT_YUV411P:

288 case AV_PIX_FMT_GRAY8:

289 case AV_PIX_FMT_NV12:

290 sws_ctx = sws_getContext(frame->width,

291 frame->height,

292 AV_PIX_FMT_GRAY8,

293 frame->width,

294 frame->height,

295 dst_fmt,

296 0, NULL, NULL, NULL);

297 if (!sws_ctx) {

298 av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion "

299 "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",

300 av_get_pix_fmt_name(AV_PIX_FMT_GRAY8), frame->width, frame->height,

301 av_get_pix_fmt_name(dst_fmt),frame->width, frame->height);

302 ret = AVERROR(EINVAL);

303 goto err;

304 }

305 sws_scale(sws_ctx, (const uint8_t **)frame->data,

306 frame->linesize, 0, frame->height,

307 (uint8_t * const [4]){input->data, 0, 0, 0},

308 (const int [4]){frame->width * dst_datatype_size, 0, 0, 0});

309 sws_freeContext(sws_ctx);

310 break;

311 default:

312 avpriv_report_missing_feature(log_ctx, "%s", av_get_pix_fmt_name(frame->format));

313 ret = AVERROR(ENOSYS);

314 goto err;

315 }

316 err:

317 av_free(middle_data);

318 return ret;

319 }

320

321 static enum AVPixelFormat get_pixel_format(DNNData *data)

322 {

323 if (data->dt == DNN_UINT8) {

324 switch (data->order) {

325 case DCO_BGR:

326 return AV_PIX_FMT_BGR24;

327 case DCO_RGB:

328 return AV_PIX_FMT_RGB24;

329 default:

330 av_assert0(!"unsupported data pixel format.\n");

331 return AV_PIX_FMT_BGR24;

332 }

333 }

334

335 av_assert0(!"unsupported data type.\n");

336 return AV_PIX_FMT_BGR24;

337 }

338

339 int ff_frame_to_dnn_classify(AVFrame *frame, DNNData *input, uint32_t bbox_index, void *log_ctx)

340 {

341 const AVPixFmtDescriptor *desc;

342 int offsetx[4], offsety[4];

343 uint8_t *bbox_data[4];

344 struct SwsContext *sws_ctx;

345 int linesizes[4];

346 int ret = 0;

347 enum AVPixelFormat fmt;

348 int left, top, width, height;

349 int width_idx, height_idx;

350 const AVDetectionBBoxHeader *header;

351 const AVDetectionBBox *bbox;

352 AVFrameSideData *sd = av_frame_get_side_data(frame, AV_FRAME_DATA_DETECTION_BBOXES);

353 av_assert0(sd);

354

355 /* (scale != 1 and scale != 0) or mean != 0 */

356 if ((fabsf(input->scale - 1) > 1e-6f && fabsf(input->scale) > 1e-6f) ||

357 fabsf(input->mean) > 1e-6f) {

358 av_log(log_ctx, AV_LOG_ERROR, "dnn_classify input data doesn't support "

359 "scale: %f, mean: %f\n", input->scale, input->mean);

360 return AVERROR(ENOSYS);

361 }

362

363 if (input->layout == DL_NCHW) {

364 av_log(log_ctx, AV_LOG_ERROR, "dnn_classify input data doesn't support layout: NCHW\n");

365 return AVERROR(ENOSYS);

366 }

367

368 width_idx = dnn_get_width_idx_by_layout(input->layout);

369 height_idx = dnn_get_height_idx_by_layout(input->layout);

370

371 header = (const AVDetectionBBoxHeader *)sd->data;

372 bbox = av_get_detection_bbox(header, bbox_index);

373

374 left = bbox->x;

375 width = bbox->w;

376 top = bbox->y;

377 height = bbox->h;

378

379 fmt = get_pixel_format(input);

380 sws_ctx = sws_getContext(width, height, frame->format,

381 input->dims[width_idx],

382 input->dims[height_idx], fmt,

383 SWS_FAST_BILINEAR, NULL, NULL, NULL);

384 if (!sws_ctx) {

385 av_log(log_ctx, AV_LOG_ERROR, "Failed to create scale context for the conversion "

386 "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",

387 av_get_pix_fmt_name(frame->format), width, height,

388 av_get_pix_fmt_name(fmt),

389 input->dims[width_idx],

390 input->dims[height_idx]);

391 return AVERROR(EINVAL);

392 }

393

394 ret = av_image_fill_linesizes(linesizes, fmt, input->dims[width_idx]);

395 if (ret < 0) {

396 av_log(log_ctx, AV_LOG_ERROR, "unable to get linesizes with av_image_fill_linesizes");

397 sws_freeContext(sws_ctx);

398 return ret;

399 }

400

401 desc = av_pix_fmt_desc_get(frame->format);

402 offsetx[1] = offsetx[2] = AV_CEIL_RSHIFT(left, desc->log2_chroma_w);

403 offsetx[0] = offsetx[3] = left;

404

405 offsety[1] = offsety[2] = AV_CEIL_RSHIFT(top, desc->log2_chroma_h);

406 offsety[0] = offsety[3] = top;

407

408 for (int k = 0; frame->data[k]; k++)

409 bbox_data[k] = frame->data[k] + offsety[k] * frame->linesize[k] + offsetx[k];

410

411 sws_scale(sws_ctx, (const uint8_t *const *)&bbox_data, frame->linesize,

412 0, height,

413 (uint8_t *const [4]){input->data, 0, 0, 0}, linesizes);

414

415 sws_freeContext(sws_ctx);

416

417 return ret;

418 }

419

420 int ff_frame_to_dnn_detect(AVFrame *frame, DNNData *input, void *log_ctx)

421 {

422 struct SwsContext *sws_ctx;

423 int linesizes[4];

424 int ret = 0, width_idx, height_idx;

425 enum AVPixelFormat fmt = get_pixel_format(input);

426

427 /* (scale != 1 and scale != 0) or mean != 0 */

428 if ((fabsf(input->scale - 1) > 1e-6f && fabsf(input->scale) > 1e-6f) ||

429 fabsf(input->mean) > 1e-6f) {

430 av_log(log_ctx, AV_LOG_ERROR, "dnn_detect input data doesn't support "

431 "scale: %f, mean: %f\n", input->scale, input->mean);

432 return AVERROR(ENOSYS);

433 }

434

435 if (input->layout == DL_NCHW) {

436 av_log(log_ctx, AV_LOG_ERROR, "dnn_detect input data doesn't support layout: NCHW\n");

437 return AVERROR(ENOSYS);

438 }

439

440 width_idx = dnn_get_width_idx_by_layout(input->layout);

441 height_idx = dnn_get_height_idx_by_layout(input->layout);

442

443 sws_ctx = sws_getContext(frame->width, frame->height, frame->format,

444 input->dims[width_idx],

445 input->dims[height_idx], fmt,

446 SWS_FAST_BILINEAR, NULL, NULL, NULL);

447 if (!sws_ctx) {

448 av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion "

449 "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",

450 av_get_pix_fmt_name(frame->format), frame->width, frame->height,

451 av_get_pix_fmt_name(fmt), input->dims[width_idx],

452 input->dims[height_idx]);

453 return AVERROR(EINVAL);

454 }

455

456 ret = av_image_fill_linesizes(linesizes, fmt, input->dims[width_idx]);

457 if (ret < 0) {

458 av_log(log_ctx, AV_LOG_ERROR, "unable to get linesizes with av_image_fill_linesizes");

459 sws_freeContext(sws_ctx);

460 return ret;

461 }

462

463 sws_scale(sws_ctx, (const uint8_t *const *)frame->data, frame->linesize, 0, frame->height,

464 (uint8_t *const [4]){input->data, 0, 0, 0}, linesizes);

465

466 sws_freeContext(sws_ctx);

467 return ret;

468 }

AVPixelFormat

Pixel format.

Definition: pixfmt.h:71

AVERROR

Filter the word "frame" indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions

av_frame_get_side_data

AVFrameSideData * av_frame_get_side_data(const AVFrame *frame, enum AVFrameSideDataType type)

Definition: frame.c:838

av_pix_fmt_desc_get

const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)

Definition: pixdesc.c:2962

output

filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce output

Definition: filter_design.txt:225

AVFrame

This structure describes decoded (raw) audio or video data.

Definition: frame.h:344

AVFrame::width

int width

Definition: frame.h:416

data

const char data[16]

Definition: mxf.c:148

AV_PIX_FMT_BGR24

@ AV_PIX_FMT_BGR24

packed RGB 8:8:8, 24bpp, BGRBGR...

Definition: pixfmt.h:76

sws_scale

int attribute_align_arg sws_scale(struct SwsContext *c, const uint8_t *const srcSlice[], const int srcStride[], int srcSliceY, int srcSliceH, uint8_t *const dst[], const int dstStride[])

swscale wrapper, so we don't need to export the SwsContext.

Definition: swscale.c:1205

dnn_io_proc.h

AVDetectionBBox::y

int y

Definition: detection_bbox.h:32

AVFrame::data

uint8_t * data[AV_NUM_DATA_POINTERS]

pointer to the picture/channel planes.

Definition: frame.h:365

av_malloc

#define av_malloc(s)

Definition: tableprint_vlc.h:30

av_image_copy_plane

void av_image_copy_plane(uint8_t *dst, int dst_linesize, const uint8_t *src, int src_linesize, int bytewidth, int height)

Copy image plane from src to dst.

Definition: imgutils.c:374

SWS_FAST_BILINEAR

#define SWS_FAST_BILINEAR

Definition: swscale.h:65

dnn_get_width_idx_by_layout

static int dnn_get_width_idx_by_layout(DNNLayout layout)

Definition: dnn_interface.h:137

get_pixel_format

static enum AVPixelFormat get_pixel_format(DNNData *data)

Definition: dnn_io_proc.c:321

fabsf

static __device__ float fabsf(float a)

Definition: cuda_runtime.h:181

av_get_detection_bbox

static av_always_inline AVDetectionBBox * av_get_detection_bbox(const AVDetectionBBoxHeader *header, unsigned int idx)

Definition: detection_bbox.h:84

avassert.h

AV_LOG_ERROR

#define AV_LOG_ERROR

Something went wrong and cannot losslessly be recovered.

Definition: log.h:180

float

Definition: af_crystalizer.c:121

width

#define width

av_image_fill_linesizes

int av_image_fill_linesizes(int linesizes[4], enum AVPixelFormat pix_fmt, int width)

Fill plane linesizes for an image with pixel format pix_fmt and width width.

Definition: imgutils.c:89

AV_CEIL_RSHIFT

#define AV_CEIL_RSHIFT(a, b)

Definition: common.h:58

av_assert0

#define av_assert0(cond)

assert() equivalent, that is always enabled.

Definition: avassert.h:40

DNNData

Definition: dnn_interface.h:65

DL_NCHW

@ DL_NCHW

Definition: dnn_interface.h:61

AV_PIX_FMT_YUV420P

@ AV_PIX_FMT_YUV420P

planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)

Definition: pixfmt.h:73

AV_PIX_FMT_GRAYF32

#define AV_PIX_FMT_GRAYF32

Definition: pixfmt.h:511

frame

static AVFrame * frame

Definition: demux_decode.c:54

if(ret)

Definition: filter_design.txt:179

ff_proc_from_frame_to_dnn

int ff_proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, void *log_ctx)

Definition: dnn_io_proc.c:181

ff_frame_to_dnn_detect

int ff_frame_to_dnn_detect(AVFrame *frame, DNNData *input, void *log_ctx)

Definition: dnn_io_proc.c:420

NULL

#define NULL

Definition: coverity.c:32

AVDetectionBBoxHeader

Definition: detection_bbox.h:56

AV_PIX_FMT_GRAY8

@ AV_PIX_FMT_GRAY8

Y , 8bpp.

Definition: pixfmt.h:81

Definition: af_crystalizer.c:121

AV_PIX_FMT_RGB24

@ AV_PIX_FMT_RGB24

packed RGB 8:8:8, 24bpp, RGBRGB...

Definition: pixfmt.h:75

AVDetectionBBox::w

int w

Definition: detection_bbox.h:33

sws_getContext

struct SwsContext * sws_getContext(int srcW, int srcH, enum AVPixelFormat srcFormat, int dstW, int dstH, enum AVPixelFormat dstFormat, int flags, SwsFilter *srcFilter, SwsFilter *dstFilter, const double *param)

Allocate and return an SwsContext.

Definition: utils.c:2094

avpriv_report_missing_feature

void avpriv_report_missing_feature(void *avc, const char *msg,...) av_printf_format(2

Log a generic warning message about a missing feature.

AVFrameSideData::data

uint8_t * data

Definition: frame.h:252

DNNDataType

Definition: dnn_interface.h:37

AVFrame::format

int format

format of the frame, -1 if unknown or unset Values correspond to enum AVPixelFormat for video frames,...

Definition: frame.h:431

get_datatype_size

static int get_datatype_size(DNNDataType dt)

Definition: dnn_io_proc.c:27

header

static const uint8_t header[24]

Definition: sdr2.c:68

height

#define height

DNN_FLOAT

@ DNN_FLOAT

Definition: dnn_interface.h:37

input

and forward the test the status of outputs and forward it to the corresponding return FFERROR_NOT_READY If the filters stores internally one or a few frame for some input

Definition: filter_design.txt:172

av_image_get_linesize

int av_image_get_linesize(enum AVPixelFormat pix_fmt, int width, int plane)

Compute the size of an image line with format pix_fmt and width width for the plane plane.

Definition: imgutils.c:76

ret

Definition: filter_design.txt:187

AV_PIX_FMT_NV12

@ AV_PIX_FMT_NV12

planar YUV 4:2:0, 12bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...

Definition: pixfmt.h:96

AVDetectionBBox::h

int h

Definition: detection_bbox.h:34

DNN_UINT8

@ DNN_UINT8

Definition: dnn_interface.h:37

left

Tag MUST be and< 10hcoeff half pel interpolation filter coefficients, hcoeff[0] are the 2 middle coefficients[1] are the next outer ones and so on, resulting in a filter like:...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2] ... the sign of the coefficients is not explicitly stored but alternates after each coeff and coeff[0] is positive, so ...,+,-,+,-,+,+,-,+,-,+,... hcoeff[0] is not explicitly stored but found by subtracting the sum of all stored coefficients with signs from 32 hcoeff[0]=32 - hcoeff[1] - hcoeff[2] - ... a good choice for hcoeff and htaps is htaps=6 hcoeff={40,-10, 2} an alternative which requires more computations at both encoder and decoder side and may or may not be better is htaps=8 hcoeff={42,-14, 6,-2}ref_frames minimum of the number of available reference frames and max_ref_frames for example the first frame after a key frame always has ref_frames=1spatial_decomposition_type wavelet type 0 is a 9/7 symmetric compact integer wavelet 1 is a 5/3 symmetric compact integer wavelet others are reserved stored as delta from last, last is reset to 0 if always_reset||keyframeqlog quality(logarithmic quantizer scale) stored as delta from last, last is reset to 0 if always_reset||keyframemv_scale stored as delta from last, last is reset to 0 if always_reset||keyframe FIXME check that everything works fine if this changes between framesqbias dequantization bias stored as delta from last, last is reset to 0 if always_reset||keyframeblock_max_depth maximum depth of the block tree stored as delta from last, last is reset to 0 if always_reset||keyframequant_table quantization tableHighlevel bitstream structure:==============================--------------------------------------------|Header|--------------------------------------------|------------------------------------|||Block0||||split?||||yes no||||......... intra?||||:Block01 :yes no||||:Block02 :....... ..........||||:Block03 ::y DC ::ref index:||||:Block04 ::cb DC ::motion x :||||......... :cr DC ::motion y :||||....... ..........|||------------------------------------||------------------------------------|||Block1|||...|--------------------------------------------|------------ ------------ ------------|||Y subbands||Cb subbands||Cr subbands||||--- ---||--- ---||--- ---|||||LL0||HL0||||LL0||HL0||||LL0||HL0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||LH0||HH0||||LH0||HH0||||LH0||HH0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HL1||LH1||||HL1||LH1||||HL1||LH1|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HH1||HL2||||HH1||HL2||||HH1||HL2|||||...||...||...|||------------ ------------ ------------|--------------------------------------------Decoding process:=================------------|||Subbands|------------||||------------|Intra DC||||LL0 subband prediction ------------|\ Dequantization ------------------- \||Reference frames|\ IDWT|------- -------|Motion \|||Frame 0||Frame 1||Compensation . OBMC v -------|------- -------|--------------. \------> Frame n output Frame Frame<----------------------------------/|...|------------------- Range Coder:============Binary Range Coder:------------------- The implemented range coder is an adapted version based upon "Range encoding: an algorithm for removing redundancy from a digitised message." by G. N. N. Martin. The symbols encoded by the Snow range coder are bits(0|1). The associated probabilities are not fix but change depending on the symbol mix seen so far. bit seen|new state ---------+----------------------------------------------- 0|256 - state_transition_table[256 - old_state];1|state_transition_table[old_state];state_transition_table={ 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};FIXME Range Coding of integers:------------------------- FIXME Neighboring Blocks:===================left and top are set to the respective blocks unless they are outside of the image in which case they are set to the Null block top-left is set to the top left block unless it is outside of the image in which case it is set to the left block if this block has no larger parent block or it is at the left side of its parent block and the top right block is not outside of the image then the top right block is used for top-right else the top-left block is used Null block y, cb, cr are 128 level, ref, mx and my are 0 Motion Vector Prediction:=========================1. the motion vectors of all the neighboring blocks are scaled to compensate for the difference of reference frames scaled_mv=(mv *(256 *(current_reference+1)/(mv.reference+1))+128)> the median of the scaled left

Definition: snow.txt:386

AVFrame::height

int height

Definition: frame.h:416

sws_freeContext

void sws_freeContext(struct SwsContext *swsContext)

Free the swscaler context swsContext.

Definition: utils.c:2425

AV_PIX_FMT_NONE

@ AV_PIX_FMT_NONE

Definition: pixfmt.h:72

AVDetectionBBox::x