FFmpeg: libavfilter/dnn/dnn_io_proc.c Source File

FFmpeg

[フレーム]

dnn_io_proc.c

Go to the documentation of this file.

1 /*

3 *

4 * This file is part of FFmpeg.

5 *

6 * FFmpeg is free software; you can redistribute it and/or

7 * modify it under the terms of the GNU Lesser General Public

8 * License as published by the Free Software Foundation; either

9 * version 2.1 of the License, or (at your option) any later version.

10 *

11 * FFmpeg is distributed in the hope that it will be useful,

12 * but WITHOUT ANY WARRANTY; without even the implied warranty of

13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

14 * Lesser General Public License for more details.

15 *

16 * You should have received a copy of the GNU Lesser General Public

17 * License along with FFmpeg; if not, write to the Free Software

18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

19 */

21 #include "dnn_io_proc.h"

22 #include "libavutil/imgutils.h"

23 #include "libavutil/mem.h"

24 #include "libswscale/swscale.h"

25 #include "libavutil/avassert.h"

26 #include "libavutil/detection_bbox.h"

28 static int get_datatype_size(DNNDataType dt)

29 {

30 switch (dt)

31 {

32 case DNN_FLOAT:

33 return sizeof(float);

34 case DNN_UINT8:

35 return sizeof(uint8_t);

36 default:

37 av_assert0(!"not supported yet.");

38 return 1;

39 }

40 }

42 int ff_proc_from_dnn_to_frame(AVFrame *frame, DNNData *output, void *log_ctx)

43 {

44 struct SwsContext *sws_ctx;

45 int ret = 0;

46 int linesize[4] = { 0 };

47 void **dst_data = NULL;

48 void *middle_data = NULL;

49 uint8_t *planar_data[4] = { 0 };

50 int plane_size = frame->width * frame->height * sizeof(uint8_t);

51 enum AVPixelFormat src_fmt = AV_PIX_FMT_NONE;

52 int src_datatype_size = get_datatype_size(output->dt);

54 int bytewidth = av_image_get_linesize(frame->format, frame->width, 0);

55 if (bytewidth < 0) {

56 return AVERROR(EINVAL);

57 }

58 /* scale == 1 and mean == 0 and dt == UINT8: passthrough */

59 if (fabsf(output->scale - 1) < 1e-6f && fabsf(output->mean) < 1e-6 && output->dt == DNN_UINT8)

60 src_fmt = AV_PIX_FMT_GRAY8;

61 /* (scale == 255 or scale == 0) and mean == 0 and dt == FLOAT: normalization */

62 else if ((fabsf(output->scale - 255) < 1e-6f || fabsf(output->scale) < 1e-6f) &&

63 fabsf(output->mean) < 1e-6 && output->dt == DNN_FLOAT)

64 src_fmt = AV_PIX_FMT_GRAYF32;

65 else {

66 av_log(log_ctx, AV_LOG_ERROR, "dnn_process output data doesn't type: UINT8 "

67 "scale: %f, mean: %f\n", output->scale, output->mean);

68 return AVERROR(ENOSYS);

69 }

71 dst_data = (void **)frame->data;

72 linesize[0] = frame->linesize[0];

73 if (output->layout == DL_NCHW) {

74 middle_data = av_malloc(plane_size * output->dims[1]);

75 if (!middle_data) {

76 ret = AVERROR(ENOMEM);

77 goto err;

78 }

79 dst_data = &middle_data;

80 linesize[0] = frame->width * 3;

81 }

83 switch (frame->format) {

84 case AV_PIX_FMT_RGB24:

85 case AV_PIX_FMT_BGR24:

86 sws_ctx = sws_getContext(frame->width * 3,

87 frame->height,

88 src_fmt,

89 frame->width * 3,

90 frame->height,

91 AV_PIX_FMT_GRAY8,

92 0, NULL, NULL, NULL);

93 if (!sws_ctx) {

94 av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion "

95 "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",

96 av_get_pix_fmt_name(src_fmt), frame->width * 3, frame->height,

97 av_get_pix_fmt_name(AV_PIX_FMT_GRAY8), frame->width * 3, frame->height);

98 ret = AVERROR(EINVAL);

99 goto err;

100 }

101 sws_scale(sws_ctx, (const uint8_t *[4]){(const uint8_t *)output->data, 0, 0, 0},

102 (const int[4]){frame->width * 3 * src_datatype_size, 0, 0, 0}, 0, frame->height,

103 (uint8_t * const*)dst_data, linesize);

104 sws_freeContext(sws_ctx);

105 // convert data from planar to packed

106 if (output->layout == DL_NCHW) {

107 sws_ctx = sws_getContext(frame->width,

108 frame->height,

109 AV_PIX_FMT_GBRP,

110 frame->width,

111 frame->height,

112 frame->format,

113 0, NULL, NULL, NULL);

114 if (!sws_ctx) {

115 av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion "

116 "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",

117 av_get_pix_fmt_name(AV_PIX_FMT_GBRP), frame->width, frame->height,

118 av_get_pix_fmt_name(frame->format),frame->width, frame->height);

119 ret = AVERROR(EINVAL);

120 goto err;

121 }

122 if (frame->format == AV_PIX_FMT_RGB24) {

123 planar_data[0] = (uint8_t *)middle_data + plane_size;

124 planar_data[1] = (uint8_t *)middle_data + plane_size * 2;

125 planar_data[2] = (uint8_t *)middle_data;

126 } else if (frame->format == AV_PIX_FMT_BGR24) {

127 planar_data[0] = (uint8_t *)middle_data + plane_size;

128 planar_data[1] = (uint8_t *)middle_data;

129 planar_data[2] = (uint8_t *)middle_data + plane_size * 2;

130 }

131 sws_scale(sws_ctx, (const uint8_t * const *)planar_data,

132 (const int [4]){frame->width * sizeof(uint8_t),

133 frame->width * sizeof(uint8_t),

134 frame->width * sizeof(uint8_t), 0},

135 0, frame->height, frame->data, frame->linesize);

136 sws_freeContext(sws_ctx);

137 }

138 break;

139 case AV_PIX_FMT_GRAYF32:

140 av_image_copy_plane(frame->data[0], frame->linesize[0],

141 output->data, bytewidth,

142 bytewidth, frame->height);

143 break;

144 case AV_PIX_FMT_YUV420P:

145 case AV_PIX_FMT_YUV422P:

146 case AV_PIX_FMT_YUV444P:

147 case AV_PIX_FMT_YUV410P:

148 case AV_PIX_FMT_YUV411P:

149 case AV_PIX_FMT_GRAY8:

150 case AV_PIX_FMT_NV12:

151 sws_ctx = sws_getContext(frame->width,

152 frame->height,

153 AV_PIX_FMT_GRAYF32,

154 frame->width,

155 frame->height,

156 AV_PIX_FMT_GRAY8,

157 0, NULL, NULL, NULL);

158 if (!sws_ctx) {

159 av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion "

160 "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",

161 av_get_pix_fmt_name(src_fmt), frame->width, frame->height,

162 av_get_pix_fmt_name(AV_PIX_FMT_GRAY8), frame->width, frame->height);

163 ret = AVERROR(EINVAL);

164 goto err;

165 }

166 sws_scale(sws_ctx, (const uint8_t *[4]){(const uint8_t *)output->data, 0, 0, 0},

167 (const int[4]){frame->width * src_datatype_size, 0, 0, 0}, 0, frame->height,

168 (uint8_t * const*)frame->data, frame->linesize);

169 sws_freeContext(sws_ctx);

170 break;

171 default:

172 avpriv_report_missing_feature(log_ctx, "%s", av_get_pix_fmt_name(frame->format));

173 ret = AVERROR(ENOSYS);

174 goto err;

175 }

176

177 err:

178 av_free(middle_data);

179 return ret;

180 }

181

182 int ff_proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, void *log_ctx)

183 {

184 struct SwsContext *sws_ctx;

185 int ret = 0;

186 int linesize[4] = { 0 };

187 void **src_data = NULL;

188 void *middle_data = NULL;

189 uint8_t *planar_data[4] = { 0 };

190 int plane_size = frame->width * frame->height * sizeof(uint8_t);

191 enum AVPixelFormat dst_fmt = AV_PIX_FMT_NONE;

192 int dst_datatype_size = get_datatype_size(input->dt);

193 int bytewidth = av_image_get_linesize(frame->format, frame->width, 0);

194 if (bytewidth < 0) {

195 return AVERROR(EINVAL);

196 }

197 /* scale == 1 and mean == 0 and dt == UINT8: passthrough */

198 if (fabsf(input->scale - 1) < 1e-6f && fabsf(input->mean) < 1e-6 && input->dt == DNN_UINT8)

199 dst_fmt = AV_PIX_FMT_GRAY8;

200 /* (scale == 255 or scale == 0) and mean == 0 and dt == FLOAT: normalization */

201 else if ((fabsf(input->scale - 255) < 1e-6f || fabsf(input->scale) < 1e-6f) &&

202 fabsf(input->mean) < 1e-6 && input->dt == DNN_FLOAT)

203 dst_fmt = AV_PIX_FMT_GRAYF32;

204 else {

205 av_log(log_ctx, AV_LOG_ERROR, "dnn_process input data doesn't support type: UINT8 "

206 "scale: %f, mean: %f\n", input->scale, input->mean);

207 return AVERROR(ENOSYS);

208 }

209

210 src_data = (void **)frame->data;

211 linesize[0] = frame->linesize[0];

212 if (input->layout == DL_NCHW) {

213 middle_data = av_malloc(plane_size * input->dims[1]);

214 if (!middle_data) {

215 ret = AVERROR(ENOMEM);

216 goto err;

217 }

218 src_data = &middle_data;

219 linesize[0] = frame->width * 3;

220 }

221

222 switch (frame->format) {

223 case AV_PIX_FMT_RGB24:

224 case AV_PIX_FMT_BGR24:

225 // convert data from planar to packed

226 if (input->layout == DL_NCHW) {

227 sws_ctx = sws_getContext(frame->width,

228 frame->height,

229 frame->format,

230 frame->width,

231 frame->height,

232 AV_PIX_FMT_GBRP,

233 0, NULL, NULL, NULL);

234 if (!sws_ctx) {

235 av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion "

236 "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",

237 av_get_pix_fmt_name(frame->format), frame->width, frame->height,

238 av_get_pix_fmt_name(AV_PIX_FMT_GBRP),frame->width, frame->height);

239 ret = AVERROR(EINVAL);

240 goto err;

241 }

242 if (frame->format == AV_PIX_FMT_RGB24) {

243 planar_data[0] = (uint8_t *)middle_data + plane_size;

244 planar_data[1] = (uint8_t *)middle_data + plane_size * 2;

245 planar_data[2] = (uint8_t *)middle_data;

246 } else if (frame->format == AV_PIX_FMT_BGR24) {

247 planar_data[0] = (uint8_t *)middle_data + plane_size;

248 planar_data[1] = (uint8_t *)middle_data;

249 planar_data[2] = (uint8_t *)middle_data + plane_size * 2;

250 }

251 sws_scale(sws_ctx, (const uint8_t * const *)frame->data,

252 frame->linesize, 0, frame->height, planar_data,

253 (const int [4]){frame->width * sizeof(uint8_t),

254 frame->width * sizeof(uint8_t),

255 frame->width * sizeof(uint8_t), 0});

256 sws_freeContext(sws_ctx);

257 }

258 sws_ctx = sws_getContext(frame->width * 3,

259 frame->height,

260 AV_PIX_FMT_GRAY8,

261 frame->width * 3,

262 frame->height,

263 dst_fmt,

264 0, NULL, NULL, NULL);

265 if (!sws_ctx) {

266 av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion "

267 "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",

268 av_get_pix_fmt_name(AV_PIX_FMT_GRAY8), frame->width * 3, frame->height,

269 av_get_pix_fmt_name(dst_fmt),frame->width * 3, frame->height);

270 ret = AVERROR(EINVAL);

271 goto err;

272 }

273 sws_scale(sws_ctx, (const uint8_t **)src_data,

274 linesize, 0, frame->height,

275 (uint8_t * const [4]){input->data, 0, 0, 0},

276 (const int [4]){frame->width * 3 * dst_datatype_size, 0, 0, 0});

277 sws_freeContext(sws_ctx);

278 break;

279 case AV_PIX_FMT_GRAYF32:

280 av_image_copy_plane(input->data, bytewidth,

281 frame->data[0], frame->linesize[0],

282 bytewidth, frame->height);

283 break;

284 case AV_PIX_FMT_YUV420P:

285 case AV_PIX_FMT_YUV422P:

286 case AV_PIX_FMT_YUV444P:

287 case AV_PIX_FMT_YUV410P:

288 case AV_PIX_FMT_YUV411P:

289 case AV_PIX_FMT_GRAY8:

290 case AV_PIX_FMT_NV12:

291 sws_ctx = sws_getContext(frame->width,

292 frame->height,

293 AV_PIX_FMT_GRAY8,

294 frame->width,

295 frame->height,

296 dst_fmt,

297 0, NULL, NULL, NULL);

298 if (!sws_ctx) {

299 av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion "

300 "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",

301 av_get_pix_fmt_name(AV_PIX_FMT_GRAY8), frame->width, frame->height,

302 av_get_pix_fmt_name(dst_fmt),frame->width, frame->height);

303 ret = AVERROR(EINVAL);

304 goto err;

305 }

306 sws_scale(sws_ctx, (const uint8_t **)frame->data,

307 frame->linesize, 0, frame->height,

308 (uint8_t * const [4]){input->data, 0, 0, 0},

309 (const int [4]){frame->width * dst_datatype_size, 0, 0, 0});

310 sws_freeContext(sws_ctx);

311 break;

312 default:

313 avpriv_report_missing_feature(log_ctx, "%s", av_get_pix_fmt_name(frame->format));

314 ret = AVERROR(ENOSYS);

315 goto err;

316 }

317 err:

318 av_free(middle_data);

319 return ret;

320 }

321

322 static enum AVPixelFormat get_pixel_format(DNNData *data)

323 {

324 if (data->dt == DNN_UINT8) {

325 switch (data->order) {

326 case DCO_BGR:

327 return AV_PIX_FMT_BGR24;

328 case DCO_RGB:

329 return AV_PIX_FMT_RGB24;

330 default:

331 av_assert0(!"unsupported data pixel format.\n");

332 return AV_PIX_FMT_BGR24;

333 }

334 }

335

336 av_assert0(!"unsupported data type.\n");

337 return AV_PIX_FMT_BGR24;

338 }

339

340 int ff_frame_to_dnn_classify(AVFrame *frame, DNNData *input, uint32_t bbox_index, void *log_ctx)

341 {

342 const AVPixFmtDescriptor *desc;

343 int offsetx[4], offsety[4];

344 uint8_t *bbox_data[4];

345 struct SwsContext *sws_ctx;

346 int linesizes[4];

347 int ret = 0;

348 enum AVPixelFormat fmt;

349 int left, top, width, height;

350 int width_idx, height_idx;

351 const AVDetectionBBoxHeader *header;

352 const AVDetectionBBox *bbox;

353 AVFrameSideData *sd = av_frame_get_side_data(frame, AV_FRAME_DATA_DETECTION_BBOXES);

354 int max_step[4] = { 0 };

355 av_assert0(sd);

356

357 /* (scale != 1 and scale != 0) or mean != 0 */

358 if ((fabsf(input->scale - 1) > 1e-6f && fabsf(input->scale) > 1e-6f) ||

359 fabsf(input->mean) > 1e-6f) {

360 av_log(log_ctx, AV_LOG_ERROR, "dnn_classify input data doesn't support "

361 "scale: %f, mean: %f\n", input->scale, input->mean);

362 return AVERROR(ENOSYS);

363 }

364

365 if (input->layout == DL_NCHW) {

366 av_log(log_ctx, AV_LOG_ERROR, "dnn_classify input data doesn't support layout: NCHW\n");

367 return AVERROR(ENOSYS);

368 }

369

370 width_idx = dnn_get_width_idx_by_layout(input->layout);

371 height_idx = dnn_get_height_idx_by_layout(input->layout);

372

373 header = (const AVDetectionBBoxHeader *)sd->data;

374 bbox = av_get_detection_bbox(header, bbox_index);

375

376 left = bbox->x;

377 width = bbox->w;

378 top = bbox->y;

379 height = bbox->h;

380

381 fmt = get_pixel_format(input);

382 sws_ctx = sws_getContext(width, height, frame->format,

383 input->dims[width_idx],

384 input->dims[height_idx], fmt,

385 SWS_FAST_BILINEAR, NULL, NULL, NULL);

386 if (!sws_ctx) {

387 av_log(log_ctx, AV_LOG_ERROR, "Failed to create scale context for the conversion "

388 "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",

389 av_get_pix_fmt_name(frame->format), width, height,

390 av_get_pix_fmt_name(fmt),

391 input->dims[width_idx],

392 input->dims[height_idx]);

393 return AVERROR(EINVAL);

394 }

395

396 ret = av_image_fill_linesizes(linesizes, fmt, input->dims[width_idx]);

397 if (ret < 0) {

398 av_log(log_ctx, AV_LOG_ERROR, "unable to get linesizes with av_image_fill_linesizes");

399 sws_freeContext(sws_ctx);

400 return ret;

401 }

402

403 desc = av_pix_fmt_desc_get(frame->format);

404 offsetx[1] = offsetx[2] = AV_CEIL_RSHIFT(left, desc->log2_chroma_w);

405 offsetx[0] = offsetx[3] = left;

406

407 offsety[1] = offsety[2] = AV_CEIL_RSHIFT(top, desc->log2_chroma_h);

408 offsety[0] = offsety[3] = top;

409

410 av_image_fill_max_pixsteps(max_step, NULL, desc);

411 for (int k = 0; frame->data[k]; k++)

412 bbox_data[k] = frame->data[k] + offsety[k] * frame->linesize[k] + offsetx[k] * max_step[k];

413

414 sws_scale(sws_ctx, (const uint8_t *const *)&bbox_data, frame->linesize,

415 0, height,

416 (uint8_t *const [4]){input->data, 0, 0, 0}, linesizes);

417

418 sws_freeContext(sws_ctx);

419

420 return ret;

421 }

422

423 int ff_frame_to_dnn_detect(AVFrame *frame, DNNData *input, void *log_ctx)

424 {

425 struct SwsContext *sws_ctx;

426 int linesizes[4];

427 int ret = 0, width_idx, height_idx;

428 enum AVPixelFormat fmt = get_pixel_format(input);

429

430 /* (scale != 1 and scale != 0) or mean != 0 */

431 if ((fabsf(input->scale - 1) > 1e-6f && fabsf(input->scale) > 1e-6f) ||

432 fabsf(input->mean) > 1e-6f) {

433 av_log(log_ctx, AV_LOG_ERROR, "dnn_detect input data doesn't support "

434 "scale: %f, mean: %f\n", input->scale, input->mean);

435 return AVERROR(ENOSYS);

436 }

437

438 if (input->layout == DL_NCHW) {

439 av_log(log_ctx, AV_LOG_ERROR, "dnn_detect input data doesn't support layout: NCHW\n");

440 return AVERROR(ENOSYS);

441 }

442

443 width_idx = dnn_get_width_idx_by_layout(input->layout);

444 height_idx = dnn_get_height_idx_by_layout(input->layout);

445

446 sws_ctx = sws_getContext(frame->width, frame->height, frame->format,

447 input->dims[width_idx],

448 input->dims[height_idx], fmt,

449 SWS_FAST_BILINEAR, NULL, NULL, NULL);

450 if (!sws_ctx) {

451 av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion "

452 "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",

453 av_get_pix_fmt_name(frame->format), frame->width, frame->height,

454 av_get_pix_fmt_name(fmt), input->dims[width_idx],

455 input->dims[height_idx]);

456 return AVERROR(EINVAL);

457 }

458

459 ret = av_image_fill_linesizes(linesizes, fmt, input->dims[width_idx]);

460 if (ret < 0) {

461 av_log(log_ctx, AV_LOG_ERROR, "unable to get linesizes with av_image_fill_linesizes");

462 sws_freeContext(sws_ctx);

463 return ret;

464 }

465

466 sws_scale(sws_ctx, (const uint8_t *const *)frame->data, frame->linesize, 0, frame->height,

467 (uint8_t *const [4]){input->data, 0, 0, 0}, linesizes);

468

469 sws_freeContext(sws_ctx);

470 return ret;

471 }

AVPixelFormat

Pixel format.

Definition: pixfmt.h:71

AVERROR

Filter the word "frame" indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions

av_frame_get_side_data

AVFrameSideData * av_frame_get_side_data(const AVFrame *frame, enum AVFrameSideDataType type)

Definition: frame.c:659

av_pix_fmt_desc_get

const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)

Definition: pixdesc.c:3447

output

filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce output

Definition: filter_design.txt:226

sws_freeContext

void sws_freeContext(SwsContext *swsContext)

Free the swscaler context swsContext.

Definition: utils.c:2244

AVFrame

This structure describes decoded (raw) audio or video data.

Definition: frame.h:427

data

const char data[16]

Definition: mxf.c:149

AV_PIX_FMT_BGR24

@ AV_PIX_FMT_BGR24

packed RGB 8:8:8, 24bpp, BGRBGR...

Definition: pixfmt.h:76

dnn_io_proc.h

AVDetectionBBox::y

int y

Definition: detection_bbox.h:32

av_malloc

#define av_malloc(s)

Definition: tableprint_vlc.h:31

av_image_copy_plane

void av_image_copy_plane(uint8_t *dst, int dst_linesize, const uint8_t *src, int src_linesize, int bytewidth, int height)

Copy image plane from src to dst.

Definition: imgutils.c:374

SWS_FAST_BILINEAR

@ SWS_FAST_BILINEAR

Scaler selection options.

Definition: swscale.h:98

dnn_get_width_idx_by_layout

static int dnn_get_width_idx_by_layout(DNNLayout layout)

Definition: dnn_interface.h:197

get_pixel_format

static enum AVPixelFormat get_pixel_format(DNNData *data)

Definition: dnn_io_proc.c:322

fabsf

static __device__ float fabsf(float a)

Definition: cuda_runtime.h:181

av_get_detection_bbox

static av_always_inline AVDetectionBBox * av_get_detection_bbox(const AVDetectionBBoxHeader *header, unsigned int idx)

Definition: detection_bbox.h:84

avassert.h

AV_LOG_ERROR

#define AV_LOG_ERROR

Something went wrong and cannot losslessly be recovered.

Definition: log.h:210

float

Definition: af_crystalizer.c:122

av_image_fill_linesizes

int av_image_fill_linesizes(int linesizes[4], enum AVPixelFormat pix_fmt, int width)

Fill plane linesizes for an image with pixel format pix_fmt and width width.

Definition: imgutils.c:89

AV_CEIL_RSHIFT

#define AV_CEIL_RSHIFT(a, b)

Definition: common.h:60

av_assert0

#define av_assert0(cond)

assert() equivalent, that is always enabled.

Definition: avassert.h:41

DNNData

Definition: dnn_interface.h:69

DL_NCHW

@ DL_NCHW

Definition: dnn_interface.h:65

AV_PIX_FMT_YUV420P

@ AV_PIX_FMT_YUV420P

planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)

Definition: pixfmt.h:73

AV_PIX_FMT_GRAYF32

#define AV_PIX_FMT_GRAYF32

Definition: pixfmt.h:582

if(ret)

Definition: filter_design.txt:179

ff_proc_from_frame_to_dnn

int ff_proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, void *log_ctx)

Definition: dnn_io_proc.c:182

ff_frame_to_dnn_detect

int ff_frame_to_dnn_detect(AVFrame *frame, DNNData *input, void *log_ctx)

Definition: dnn_io_proc.c:423

NULL

#define NULL

Definition: coverity.c:32

AVDetectionBBoxHeader

Definition: detection_bbox.h:56

AV_PIX_FMT_GRAY8

@ AV_PIX_FMT_GRAY8

Y , 8bpp.

Definition: pixfmt.h:81

Definition: af_crystalizer.c:122

AV_PIX_FMT_RGB24

@ AV_PIX_FMT_RGB24

packed RGB 8:8:8, 24bpp, RGBRGB...

Definition: pixfmt.h:75

height

#define height

Definition: dsp.h:89

AVDetectionBBox::w

int w

Definition: detection_bbox.h:33

avpriv_report_missing_feature

void avpriv_report_missing_feature(void *avc, const char *msg,...) av_printf_format(2

Log a generic warning message about a missing feature.

AVFrameSideData::data

uint8_t * data

Definition: frame.h:284

DNNDataType

Definition: dnn_interface.h:41

get_datatype_size

static int get_datatype_size(DNNDataType dt)

Definition: dnn_io_proc.c:28

header

static const uint8_t header[24]

Definition: sdr2.c:68

DNN_FLOAT

@ DNN_FLOAT

Definition: dnn_interface.h:41

input

and forward the test the status of outputs and forward it to the corresponding return FFERROR_NOT_READY If the filters stores internally one or a few frame for some input

Definition: filter_design.txt:172

av_image_get_linesize

int av_image_get_linesize(enum AVPixelFormat pix_fmt, int width, int plane)

Compute the size of an image line with format pix_fmt and width width for the plane plane.

Definition: imgutils.c:76

ret

Definition: filter_design.txt:187

AV_PIX_FMT_NV12

@ AV_PIX_FMT_NV12

planar YUV 4:2:0, 12bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...

Definition: pixfmt.h:96

frame

these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame For filters that do not use the this method is called when a frame is wanted on an output For a it should directly call filter_frame on the corresponding output For a if there are queued frames already one of these frames should be pushed If the filter should request a frame on one of its repeatedly until at least one frame has been pushed Return or at least make progress towards producing a frame

Definition: filter_design.txt:265

AVDetectionBBox::h

int h

Definition: detection_bbox.h:34

DNN_UINT8

@ DNN_UINT8

Definition: dnn_interface.h:41

sws_getContext

SwsContext * sws_getContext(int srcW, int srcH, enum AVPixelFormat srcFormat, int dstW, int dstH, enum AVPixelFormat dstFormat, int flags, SwsFilter *srcFilter, SwsFilter *dstFilter, const double *param)

Allocate and return an SwsContext.

Definition: utils.c:1913

left

Tag MUST be and< 10hcoeff half pel interpolation filter coefficients, hcoeff[0] are the 2 middle coefficients[1] are the next outer ones and so on, resulting in a filter like:...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2] ... the sign of the coefficients is not explicitly stored but alternates after each coeff and coeff[0] is positive, so ...,+,-,+,-,+,+,-,+,-,+,... hcoeff[0] is not explicitly stored but found by subtracting the sum of all stored coefficients with signs from 32 hcoeff[0]=32 - hcoeff[1] - hcoeff[2] - ... a good choice for hcoeff and htaps is htaps=6 hcoeff={40,-10, 2} an alternative which requires more computations at both encoder and decoder side and may or may not be better is htaps=8 hcoeff={42,-14, 6,-2}ref_frames minimum of the number of available reference frames and max_ref_frames for example the first frame after a key frame always has ref_frames=1spatial_decomposition_type wavelet type 0 is a 9/7 symmetric compact integer wavelet 1 is a 5/3 symmetric compact integer wavelet others are reserved stored as delta from last, last is reset to 0 if always_reset||keyframeqlog quality(logarithmic quantizer scale) stored as delta from last, last is reset to 0 if always_reset||keyframemv_scale stored as delta from last, last is reset to 0 if always_reset||keyframe FIXME check that everything works fine if this changes between framesqbias dequantization bias stored as delta from last, last is reset to 0 if always_reset||keyframeblock_max_depth maximum depth of the block tree stored as delta from last, last is reset to 0 if always_reset||keyframequant_table quantization tableHighlevel bitstream structure:==============================--------------------------------------------|Header|--------------------------------------------|------------------------------------|||Block0||||split?||||yes no||||......... intra?||||:Block01 :yes no||||:Block02 :....... ..........||||:Block03 ::y DC ::ref index:||||:Block04 ::cb DC ::motion x :||||......... :cr DC ::motion y :||||....... ..........|||------------------------------------||------------------------------------|||Block1|||...|--------------------------------------------|------------ ------------ ------------|||Y subbands||Cb subbands||Cr subbands||||--- ---||--- ---||--- ---|||||LL0||HL0||||LL0||HL0||||LL0||HL0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||LH0||HH0||||LH0||HH0||||LH0||HH0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HL1||LH1||||HL1||LH1||||HL1||LH1|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HH1||HL2||||HH1||HL2||||HH1||HL2|||||...||...||...|||------------ ------------ ------------|--------------------------------------------Decoding process:=================------------|||Subbands|------------||||------------|Intra DC||||LL0 subband prediction ------------|\ Dequantization ------------------- \||Reference frames|\ IDWT|------- -------|Motion \|||Frame 0||Frame 1||Compensation . OBMC v -------|------- -------|--------------. \------> Frame n output Frame Frame<----------------------------------/|...|------------------- Range Coder:============Binary Range Coder:------------------- The implemented range coder is an adapted version based upon "Range encoding: an algorithm for removing redundancy from a digitised message." by G. N. N. Martin. The symbols encoded by the Snow range coder are bits(0|1). The associated probabilities are not fix but change depending on the symbol mix seen so far. bit seen|new state ---------+----------------------------------------------- 0|256 - state_transition_table[256 - old_state];1|state_transition_table[old_state];state_transition_table={ 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};FIXME Range Coding of integers:------------------------- FIXME Neighboring Blocks:===================left and top are set to the respective blocks unless they are outside of the image in which case they are set to the Null block top-left is set to the top left block unless it is outside of the image in which case it is set to the left block if this block has no larger parent block or it is at the left side of its parent block and the top right block is not outside of the image then the top right block is used for top-right else the top-left block is used Null block y, cb, cr are 128 level, ref, mx and my are 0 Motion Vector Prediction:=========================1. the motion vectors of all the neighboring blocks are scaled to compensate for the difference of reference frames scaled_mv=(mv *(256 *(current_reference+1)/(mv.reference+1))+128)> the median of the scaled left

Definition: snow.txt:386

sws_scale

int attribute_align_arg sws_scale(SwsContext *sws, const uint8_t *const srcSlice[], const int srcStride[], int srcSliceY, int srcSliceH, uint8_t *const dst[], const int dstStride[])

swscale wrapper, so we don't need to export the SwsContext.

Definition: swscale.c:1502

AV_PIX_FMT_NONE

@ AV_PIX_FMT_NONE

Definition: pixfmt.h:72

AVDetectionBBox::x

int x

Distance in pixels from the left/top edge of the frame, together with width and height,...

Definition: detection_bbox.h:31

DCO_RGB

@ DCO_RGB

Definition: dnn_interface.h:46