FFmpeg: libavfilter/vf_dnn_detect.c Source File

75 { "model_type", "DNN detection model type", OFFSET2(model_type), AV_OPT_TYPE_INT, { .i64 = DDMT_SSD }, INT_MIN, INT_MAX, FLAGS, .unit = "model_type" },

76 { "ssd", "output shape [1, 1, N, 7]", 0, AV_OPT_TYPE_CONST, { .i64 = DDMT_SSD }, 0, 0, FLAGS, .unit = "model_type" },

77 { "yolo", "output shape [1, N*Cx*Cy*DetectionBox]", 0, AV_OPT_TYPE_CONST, { .i64 = DDMT_YOLOV1V2 }, 0, 0, FLAGS, .unit = "model_type" },

78 { "yolov3", "outputs shape [1, N*D, Cx, Cy]", 0, AV_OPT_TYPE_CONST, { .i64 = DDMT_YOLOV3 }, 0, 0, FLAGS, .unit = "model_type" },

79 { "yolov4", "outputs shape [1, N*D, Cx, Cy]", 0, AV_OPT_TYPE_CONST, { .i64 = DDMT_YOLOV4 }, 0, 0, FLAGS, .unit = "model_type" },

80 { "cell_w", "cell width", OFFSET2(cell_w), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INTMAX_MAX, FLAGS },

81 { "cell_h", "cell height", OFFSET2(cell_h), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INTMAX_MAX, FLAGS },

82 { "nb_classes", "The number of class", OFFSET2(nb_classes), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INTMAX_MAX, FLAGS },

83 { "anchors", "anchors, splited by '&'", OFFSET2(anchors_str), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS },

84 { NULL }

85 };

87 AVFILTER_DEFINE_CLASS(dnn_detect);

89 static inline float sigmoid(float x) {

90 return 1.f / (1.f + exp(-x));

91 }

93 static inline float linear(float x) {

94 return x;

95 }

97 static int dnn_detect_get_label_id(int nb_classes, int cell_size, float *label_data)

98 {

99 float max_prob = 0;

100 int label_id = 0;

101 for (int i = 0; i < nb_classes; i++) {

102 if (label_data[i * cell_size] > max_prob) {

103 max_prob = label_data[i * cell_size];

104 label_id = i;

105 }

106 }

107 return label_id;

108 }

109

110 static int dnn_detect_parse_anchors(char *anchors_str, float **anchors)

111 {

112 char *saveptr = NULL, *token;

113 float *anchors_buf;

114 int nb_anchor = 0, i = 0;

115 while(anchors_str[i] != '0円') {

116 if(anchors_str[i] == '&')

117 nb_anchor++;

118 i++;

119 }

120 nb_anchor++;

121 anchors_buf = av_mallocz(nb_anchor * sizeof(**anchors));

122 if (!anchors_buf) {

123 return 0;

124 }

125 for (int i = 0; i < nb_anchor; i++) {

126 token = av_strtok(anchors_str, "&", &saveptr);

127 if (!token) {

128 av_freep(&anchors_buf);

129 return 0;

130 }

131 anchors_buf[i] = strtof(token, NULL);

132 anchors_str = NULL;

133 }

134 *anchors = anchors_buf;

135 return nb_anchor;

136 }

137

138 /* Calculate Intersection Over Union */

139 static float dnn_detect_IOU(AVDetectionBBox *bbox1, AVDetectionBBox *bbox2)

140 {

141 float overlapping_width = FFMIN(bbox1->x + bbox1->w, bbox2->x + bbox2->w) - FFMAX(bbox1->x, bbox2->x);

142 float overlapping_height = FFMIN(bbox1->y + bbox1->h, bbox2->y + bbox2->h) - FFMAX(bbox1->y, bbox2->y);

143 float intersection_area =

144 (overlapping_width < 0 || overlapping_height < 0) ? 0 : overlapping_height * overlapping_width;

145 float union_area = bbox1->w * bbox1->h + bbox2->w * bbox2->h - intersection_area;

146 return intersection_area / union_area;

147 }

148

149 static int dnn_detect_parse_yolo_output(AVFrame *frame, DNNData *output, int output_index,

150 AVFilterContext *filter_ctx)

151 {

152 DnnDetectContext *ctx = filter_ctx->priv;

153 float conf_threshold = ctx->confidence;

154 int detection_boxes, box_size;

155 int cell_w = 0, cell_h = 0, scale_w = 0, scale_h = 0;

156 int nb_classes = ctx->nb_classes;

157 float *output_data = output[output_index].data;

158 float *anchors = ctx->anchors;

159 AVDetectionBBox *bbox;

160 float (*post_process_raw_data)(float x) = linear;

161 int is_NHWC = 0;

162

163 if (ctx->model_type == DDMT_YOLOV1V2) {

164 cell_w = ctx->cell_w;

165 cell_h = ctx->cell_h;

166 scale_w = cell_w;

167 scale_h = cell_h;

168 } else {

169 if (output[output_index].dims[2] != output[output_index].dims[3] &&

170 output[output_index].dims[2] == output[output_index].dims[1]) {

171 is_NHWC = 1;

172 cell_w = output[output_index].dims[2];

173 cell_h = output[output_index].dims[1];

174 } else {

175 cell_w = output[output_index].dims[3];

176 cell_h = output[output_index].dims[2];

177 }

178 scale_w = ctx->scale_width;

179 scale_h = ctx->scale_height;

180 }

181 box_size = nb_classes + 5;

182

183 switch (ctx->model_type) {

184 case DDMT_YOLOV1V2:

185 case DDMT_YOLOV3:

186 post_process_raw_data = linear;

187 break;

188 case DDMT_YOLOV4:

189 post_process_raw_data = sigmoid;

190 break;

191 }

192

193 if (!cell_h || !cell_w) {

194 av_log(filter_ctx, AV_LOG_ERROR, "cell_w and cell_h are detected\n");

195 return AVERROR(EINVAL);

196 }

197

198 if (!nb_classes) {

199 av_log(filter_ctx, AV_LOG_ERROR, "nb_classes is not set\n");

200 return AVERROR(EINVAL);

201 }

202

203 if (!anchors) {

204 av_log(filter_ctx, AV_LOG_ERROR, "anchors is not set\n");

205 return AVERROR(EINVAL);

206 }

207

208 if (output[output_index].dims[1] * output[output_index].dims[2] *

209 output[output_index].dims[3] % (box_size * cell_w * cell_h)) {

210 av_log(filter_ctx, AV_LOG_ERROR, "wrong cell_w, cell_h or nb_classes\n");

211 return AVERROR(EINVAL);

212 }

213 detection_boxes = output[output_index].dims[1] *

214 output[output_index].dims[2] *

215 output[output_index].dims[3] / box_size / cell_w / cell_h;

216

217 anchors = anchors + (detection_boxes * output_index * 2);

218 /**

219 * find all candidate bbox

220 * yolo output can be reshaped to [B, N*D, Cx, Cy]

221 * Detection box 'D' has format [`x`, `y`, `h`, `w`, `box_score`, `class_no_1`, ...,]

222 **/

223 for (int box_id = 0; box_id < detection_boxes; box_id++) {

224 for (int cx = 0; cx < cell_w; cx++)

225 for (int cy = 0; cy < cell_h; cy++) {

226 float x, y, w, h, conf;

227 float *detection_boxes_data;

228 int label_id;

229

230 if (is_NHWC) {

231 detection_boxes_data = output_data +

232 ((cy * cell_w + cx) * detection_boxes + box_id) * box_size;

233 conf = post_process_raw_data(detection_boxes_data[4]);

234 } else {

235 detection_boxes_data = output_data + box_id * box_size * cell_w * cell_h;

236 conf = post_process_raw_data(

237 detection_boxes_data[cy * cell_w + cx + 4 * cell_w * cell_h]);

238 }

239

240 if (is_NHWC) {

241 x = post_process_raw_data(detection_boxes_data[0]);

242 y = post_process_raw_data(detection_boxes_data[1]);

243 w = detection_boxes_data[2];

244 h = detection_boxes_data[3];

245 label_id = dnn_detect_get_label_id(ctx->nb_classes, 1, detection_boxes_data + 5);

246 conf = conf * post_process_raw_data(detection_boxes_data[label_id + 5]);

247 } else {

248 x = post_process_raw_data(detection_boxes_data[cy * cell_w + cx]);

249 y = post_process_raw_data(detection_boxes_data[cy * cell_w + cx + cell_w * cell_h]);

250 w = detection_boxes_data[cy * cell_w + cx + 2 * cell_w * cell_h];

251 h = detection_boxes_data[cy * cell_w + cx + 3 * cell_w * cell_h];

252 label_id = dnn_detect_get_label_id(ctx->nb_classes, cell_w * cell_h,

253 detection_boxes_data + cy * cell_w + cx + 5 * cell_w * cell_h);

254 conf = conf * post_process_raw_data(

255 detection_boxes_data[cy * cell_w + cx + (label_id + 5) * cell_w * cell_h]);

256 }

257 if (conf < conf_threshold) {

258 continue;

259 }

260

261 bbox = av_mallocz(sizeof(*bbox));

262 if (!bbox)

263 return AVERROR(ENOMEM);

264

265 bbox->w = exp(w) * anchors[box_id * 2] * frame->width / scale_w;

266 bbox->h = exp(h) * anchors[box_id * 2 + 1] * frame->height / scale_h;

267 bbox->x = (cx + x) / cell_w * frame->width - bbox->w / 2;

268 bbox->y = (cy + y) / cell_h * frame->height - bbox->h / 2;

269 bbox->detect_confidence = av_make_q((int)(conf * 10000), 10000);

270 if (ctx->labels && label_id < ctx->label_count) {

271 av_strlcpy(bbox->detect_label, ctx->labels[label_id], sizeof(bbox->detect_label));

272 } else {

273 snprintf(bbox->detect_label, sizeof(bbox->detect_label), "%d", label_id);

274 }

275

276 if (av_fifo_write(ctx->bboxes_fifo, &bbox, 1) < 0) {

277 av_freep(&bbox);

278 return AVERROR(ENOMEM);

279 }

280 bbox = NULL;

281 }

282 }

283 return 0;

284 }

285

286 static int dnn_detect_fill_side_data(AVFrame *frame, AVFilterContext *filter_ctx)

287 {

288 DnnDetectContext *ctx = filter_ctx->priv;

289 float conf_threshold = ctx->confidence;

290 AVDetectionBBox *bbox;

291 int nb_bboxes = 0;

292 AVDetectionBBoxHeader *header;

293 if (av_fifo_can_read(ctx->bboxes_fifo) == 0) {

294 av_log(filter_ctx, AV_LOG_VERBOSE, "nothing detected in this frame.\n");

295 return 0;

296 }

297

298 /* remove overlap bboxes */

299 for (int i = 0; i < av_fifo_can_read(ctx->bboxes_fifo); i++){

300 av_fifo_peek(ctx->bboxes_fifo, &bbox, 1, i);

301 for (int j = 0; j < av_fifo_can_read(ctx->bboxes_fifo); j++) {

302 AVDetectionBBox *overlap_bbox;

303 av_fifo_peek(ctx->bboxes_fifo, &overlap_bbox, 1, j);

304 if (!strcmp(bbox->detect_label, overlap_bbox->detect_label) &&

305 av_cmp_q(bbox->detect_confidence, overlap_bbox->detect_confidence) < 0 &&

306 dnn_detect_IOU(bbox, overlap_bbox) >= conf_threshold) {

307 bbox->classify_count = -1; // bad result

308 nb_bboxes++;

309 break;

310 }

311 }

312 }

313 nb_bboxes = av_fifo_can_read(ctx->bboxes_fifo) - nb_bboxes;

314 header = av_detection_bbox_create_side_data(frame, nb_bboxes);

315 if (!header) {

316 av_log(filter_ctx, AV_LOG_ERROR, "failed to create side data with %d bounding boxes\n", nb_bboxes);

317 return -1;

318 }

319 av_strlcpy(header->source, ctx->dnnctx.model_filename, sizeof(header->source));

320

321 while(av_fifo_can_read(ctx->bboxes_fifo)) {

322 AVDetectionBBox *candidate_bbox;

323 av_fifo_read(ctx->bboxes_fifo, &candidate_bbox, 1);

324

325 if (nb_bboxes > 0 && candidate_bbox->classify_count != -1) {

326 bbox = av_get_detection_bbox(header, header->nb_bboxes - nb_bboxes);

327 memcpy(bbox, candidate_bbox, sizeof(*bbox));

328 nb_bboxes--;

329 }

330 av_freep(&candidate_bbox);

331 }

332 return 0;

333 }

334

335 static int dnn_detect_post_proc_yolo(AVFrame *frame, DNNData *output, AVFilterContext *filter_ctx)

336 {

337 int ret = 0;

338 ret = dnn_detect_parse_yolo_output(frame, output, 0, filter_ctx);

339 if (ret < 0)

340 return ret;

341 ret = dnn_detect_fill_side_data(frame, filter_ctx);

342 if (ret < 0)

343 return ret;

344 return 0;

345 }

346

347 static int dnn_detect_post_proc_yolov3(AVFrame *frame, DNNData *output,

348 AVFilterContext *filter_ctx, int nb_outputs)

349 {

350 int ret = 0;

351 for (int i = 0; i < nb_outputs; i++) {

352 ret = dnn_detect_parse_yolo_output(frame, output, i, filter_ctx);

353 if (ret < 0)

354 return ret;

355 }

356 ret = dnn_detect_fill_side_data(frame, filter_ctx);

357 if (ret < 0)

358 return ret;

359 return 0;

360 }

361

362 static int dnn_detect_post_proc_ssd(AVFrame *frame, DNNData *output, int nb_outputs,

363 AVFilterContext *filter_ctx)

364 {

365 DnnDetectContext *ctx = filter_ctx->priv;

366 float conf_threshold = ctx->confidence;

367 int proposal_count = 0;

368 int detect_size = 0;

369 float *detections = NULL, *labels = NULL;

370 int nb_bboxes = 0;

371 AVDetectionBBoxHeader *header;

372 AVDetectionBBox *bbox;

373 int scale_w = ctx->scale_width;

374 int scale_h = ctx->scale_height;

375

376 if (nb_outputs == 1 && output->dims[3] == 7) {

377 proposal_count = output->dims[2];

378 detect_size = output->dims[3];

379 detections = output->data;

380 } else if (nb_outputs == 2 && output[0].dims[3] == 5) {

381 proposal_count = output[0].dims[2];

382 detect_size = output[0].dims[3];

383 detections = output[0].data;

384 labels = output[1].data;

385 } else if (nb_outputs == 2 && output[1].dims[3] == 5) {

386 proposal_count = output[1].dims[2];

387 detect_size = output[1].dims[3];

388 detections = output[1].data;

389 labels = output[0].data;

390 } else {

391 av_log(filter_ctx, AV_LOG_ERROR, "Model output shape doesn't match ssd requirement.\n");

392 return AVERROR(EINVAL);

393 }

394

395 if (proposal_count == 0)

396 return 0;

397

398 for (int i = 0; i < proposal_count; ++i) {

399 float conf;

400 if (nb_outputs == 1)

401 conf = detections[i * detect_size + 2];

402 else

403 conf = detections[i * detect_size + 4];

404 if (conf < conf_threshold) {

405 continue;

406 }

407 nb_bboxes++;

408 }

409

410 if (nb_bboxes == 0) {

411 av_log(filter_ctx, AV_LOG_VERBOSE, "nothing detected in this frame.\n");

412 return 0;

413 }

414

415 header = av_detection_bbox_create_side_data(frame, nb_bboxes);

416 if (!header) {

417 av_log(filter_ctx, AV_LOG_ERROR, "failed to create side data with %d bounding boxes\n", nb_bboxes);

418 return -1;

419 }

420

421 av_strlcpy(header->source, ctx->dnnctx.model_filename, sizeof(header->source));

422

423 for (int i = 0; i < proposal_count; ++i) {

424 int av_unused image_id = (int)detections[i * detect_size + 0];

425 int label_id;

426 float conf, x0, y0, x1, y1;

427

428 if (nb_outputs == 1) {

429 label_id = (int)detections[i * detect_size + 1];

430 conf = detections[i * detect_size + 2];

431 x0 = detections[i * detect_size + 3];

432 y0 = detections[i * detect_size + 4];

433 x1 = detections[i * detect_size + 5];

434 y1 = detections[i * detect_size + 6];

435 } else {

436 label_id = (int)labels[i];

437 x0 = detections[i * detect_size] / scale_w;

438 y0 = detections[i * detect_size + 1] / scale_h;

439 x1 = detections[i * detect_size + 2] / scale_w;

440 y1 = detections[i * detect_size + 3] / scale_h;

441 conf = detections[i * detect_size + 4];

442 }

443

444 if (conf < conf_threshold) {

445 continue;

446 }

447

448 bbox = av_get_detection_bbox(header, header->nb_bboxes - nb_bboxes);

449 bbox->x = (int)(x0 * frame->width);

450 bbox->w = (int)(x1 * frame->width) - bbox->x;

451 bbox->y = (int)(y0 * frame->height);

452 bbox->h = (int)(y1 * frame->height) - bbox->y;

453

454 bbox->detect_confidence = av_make_q((int)(conf * 10000), 10000);

455 bbox->classify_count = 0;

456

457 if (ctx->labels && label_id < ctx->label_count) {

458 av_strlcpy(bbox->detect_label, ctx->labels[label_id], sizeof(bbox->detect_label));

459 } else {

460 snprintf(bbox->detect_label, sizeof(bbox->detect_label), "%d", label_id);

461 }

462

463 nb_bboxes--;

464 if (nb_bboxes == 0) {

465 break;

466 }

467 }

468 return 0;

469 }

470

471 static int dnn_detect_post_proc_ov(AVFrame *frame, DNNData *output, int nb_outputs,

472 AVFilterContext *filter_ctx)

473 {

474 AVFrameSideData *sd;

475 DnnDetectContext *ctx = filter_ctx->priv;

476 int ret = 0;

477

478 sd = av_frame_get_side_data(frame, AV_FRAME_DATA_DETECTION_BBOXES);

479 if (sd) {

480 av_log(filter_ctx, AV_LOG_ERROR, "already have bounding boxes in side data.\n");

481 return -1;

482 }

483

484 switch (ctx->model_type) {

485 case DDMT_SSD:

486 ret = dnn_detect_post_proc_ssd(frame, output, nb_outputs, filter_ctx);

487 if (ret < 0)

488 return ret;

489 break;

490 case DDMT_YOLOV1V2:

491 ret = dnn_detect_post_proc_yolo(frame, output, filter_ctx);

492 if (ret < 0)

493 return ret;

494 break;

495 case DDMT_YOLOV3:

496 case DDMT_YOLOV4:

497 ret = dnn_detect_post_proc_yolov3(frame, output, filter_ctx, nb_outputs);

498 if (ret < 0)

499 return ret;

500 break;

501 }

502 return 0;

503 }

504

505 static int dnn_detect_post_proc_tf(AVFrame *frame, DNNData *output, AVFilterContext *filter_ctx)

506 {

507 DnnDetectContext *ctx = filter_ctx->priv;

508 int proposal_count;

509 float conf_threshold = ctx->confidence;

510 float *conf, *position, *label_id, x0, y0, x1, y1;

511 int nb_bboxes = 0;

512 AVFrameSideData *sd;

513 AVDetectionBBox *bbox;

514 AVDetectionBBoxHeader *header;

515

516 proposal_count = *(float *)(output[0].data);

517 conf = output[1].data;

518 position = output[3].data;

519 label_id = output[2].data;

520

521 sd = av_frame_get_side_data(frame, AV_FRAME_DATA_DETECTION_BBOXES);

522 if (sd) {

523 av_log(filter_ctx, AV_LOG_ERROR, "already have dnn bounding boxes in side data.\n");

524 return -1;

525 }

526

527 for (int i = 0; i < proposal_count; ++i) {

528 if (conf[i] < conf_threshold)

529 continue;

530 nb_bboxes++;

531 }

532

533 if (nb_bboxes == 0) {

534 av_log(filter_ctx, AV_LOG_VERBOSE, "nothing detected in this frame.\n");

535 return 0;

536 }

537

538 header = av_detection_bbox_create_side_data(frame, nb_bboxes);

539 if (!header) {

540 av_log(filter_ctx, AV_LOG_ERROR, "failed to create side data with %d bounding boxes\n", nb_bboxes);

541 return -1;

542 }

543

544 av_strlcpy(header->source, ctx->dnnctx.model_filename, sizeof(header->source));

545

546 for (int i = 0; i < proposal_count; ++i) {

547 y0 = position[i * 4];

548 x0 = position[i * 4 + 1];

549 y1 = position[i * 4 + 2];

550 x1 = position[i * 4 + 3];

551

552 bbox = av_get_detection_bbox(header, i);

553

554 if (conf[i] < conf_threshold) {

555 continue;

556 }

557

558 bbox->x = (int)(x0 * frame->width);

559 bbox->w = (int)(x1 * frame->width) - bbox->x;

560 bbox->y = (int)(y0 * frame->height);

561 bbox->h = (int)(y1 * frame->height) - bbox->y;

562

563 bbox->detect_confidence = av_make_q((int)(conf[i] * 10000), 10000);

564 bbox->classify_count = 0;

565

566 if (ctx->labels && label_id[i] < ctx->label_count) {

567 av_strlcpy(bbox->detect_label, ctx->labels[(int)label_id[i]], sizeof(bbox->detect_label));

568 } else {

569 snprintf(bbox->detect_label, sizeof(bbox->detect_label), "%d", (int)label_id[i]);

570 }

571

572 nb_bboxes--;

573 if (nb_bboxes == 0) {

574 break;

575 }

576 }

577 return 0;

578 }

579

580 static int dnn_detect_post_proc(AVFrame *frame, DNNData *output, uint32_t nb, AVFilterContext *filter_ctx)

581 {

582 DnnDetectContext *ctx = filter_ctx->priv;

583 DnnContext *dnn_ctx = &ctx->dnnctx;

584 switch (dnn_ctx->backend_type) {

585 case DNN_OV:

586 return dnn_detect_post_proc_ov(frame, output, nb, filter_ctx);

587 case DNN_TF:

588 return dnn_detect_post_proc_tf(frame, output, filter_ctx);

589 default:

590 avpriv_report_missing_feature(filter_ctx, "Current dnn backend does not support detect filter\n");

591 return AVERROR(EINVAL);

592 }

593 }

594

595 static void free_detect_labels(DnnDetectContext *ctx)

596 {

597 for (int i = 0; i < ctx->label_count; i++) {

598 av_freep(&ctx->labels[i]);

599 }

600 ctx->label_count = 0;

601 av_freep(&ctx->labels);

602 }

603

604 static int read_detect_label_file(AVFilterContext *context)

605 {

606 int line_len;

607 FILE *file;

608 DnnDetectContext *ctx = context->priv;

609

610 file = avpriv_fopen_utf8(ctx->labels_filename, "r");

611 if (!file){

612 av_log(context, AV_LOG_ERROR, "failed to open file %s\n", ctx->labels_filename);

613 return AVERROR(EINVAL);

614 }

615

616 while (!feof(file)) {

617 char *label;

618 char buf[256];

619 if (!fgets(buf, 256, file)) {

620 break;

621 }

622

623 line_len = strlen(buf);

624 while (line_len) {

625 int i = line_len - 1;

626 if (buf[i] == '\n' || buf[i] == '\r' || buf[i] == ' ') {

627 buf[i] = '0円';

628 line_len--;

629 } else {

630 break;

631 }

632 }

633

634 if (line_len == 0) // empty line

635 continue;

636

637 if (line_len >= AV_DETECTION_BBOX_LABEL_NAME_MAX_SIZE) {

638 av_log(context, AV_LOG_ERROR, "label %s too long\n", buf);

639 fclose(file);

640 return AVERROR(EINVAL);

641 }

642

643 label = av_strdup(buf);

644 if (!label) {

645 av_log(context, AV_LOG_ERROR, "failed to allocate memory for label %s\n", buf);

646 fclose(file);

647 return AVERROR(ENOMEM);

648 }

649

650 if (av_dynarray_add_nofree(&ctx->labels, &ctx->label_count, label) < 0) {

651 av_log(context, AV_LOG_ERROR, "failed to do av_dynarray_add\n");

652 fclose(file);

653 av_freep(&label);

654 return AVERROR(ENOMEM);

655 }

656 }

657

658 fclose(file);

659 return 0;

660 }

661

662 static int check_output_nb(DnnDetectContext *ctx, DNNBackendType backend_type, int output_nb)

663 {

664 switch(backend_type) {

665 case DNN_TF:

666 if (output_nb != 4) {

667 av_log(ctx, AV_LOG_ERROR, "Only support tensorflow detect model with 4 outputs, \

668 but get %d instead\n", output_nb);

669 return AVERROR(EINVAL);

670 }

671 return 0;

672 case DNN_OV:

673 return 0;

674 default:

675 avpriv_report_missing_feature(ctx, "Dnn detect filter does not support current backend\n");

676 return AVERROR(EINVAL);

677 }

678 return 0;

679 }

680

681 static av_cold int dnn_detect_init(AVFilterContext *context)

682 {

683 DnnDetectContext *ctx = context->priv;

684 DnnContext *dnn_ctx = &ctx->dnnctx;

685 int ret;

686

687 ret = ff_dnn_init(&ctx->dnnctx, DFT_ANALYTICS_DETECT, context);

688 if (ret < 0)

689 return ret;

690 ret = check_output_nb(ctx, dnn_ctx->backend_type, dnn_ctx->nb_outputs);

691 if (ret < 0)

692 return ret;

693 ctx->bboxes_fifo = av_fifo_alloc2(1, sizeof(AVDetectionBBox *), AV_FIFO_FLAG_AUTO_GROW);

694 if (!ctx->bboxes_fifo)

695 return AVERROR(ENOMEM);

696 ff_dnn_set_detect_post_proc(&ctx->dnnctx, dnn_detect_post_proc);

697

698 if (ctx->labels_filename) {

699 return read_detect_label_file(context);

700 }

701 if (ctx->anchors_str) {

702 ret = dnn_detect_parse_anchors(ctx->anchors_str, &ctx->anchors);

703 if (!ctx->anchors) {

704 av_log(context, AV_LOG_ERROR, "failed to parse anchors_str\n");

705 return AVERROR(EINVAL);

706 }

707 ctx->nb_anchor = ret;

708 }

709 return 0;

710 }

711

712 static const enum AVPixelFormat pix_fmts[] = {

713 AV_PIX_FMT_RGB24, AV_PIX_FMT_BGR24,

714 AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAYF32,

715 AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P,

716 AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV411P,

717 AV_PIX_FMT_NV12,

718 AV_PIX_FMT_NONE

719 };

720

721 static int dnn_detect_flush_frame(AVFilterLink *outlink, int64_t pts, int64_t *out_pts)

722 {

723 DnnDetectContext *ctx = outlink->src->priv;

724 int ret;

725 DNNAsyncStatusType async_state;

726

727 ret = ff_dnn_flush(&ctx->dnnctx);

728 if (ret != 0) {

729 return -1;

730 }

731

732 do {

733 AVFrame *in_frame = NULL;

734 AVFrame *out_frame = NULL;

735 async_state = ff_dnn_get_result(&ctx->dnnctx, &in_frame, &out_frame);

736 if (async_state == DAST_SUCCESS) {

737 ret = ff_filter_frame(outlink, in_frame);

738 if (ret < 0)

739 return ret;

740 if (out_pts)

741 *out_pts = in_frame->pts + pts;

742 }

743 av_usleep(5000);

744 } while (async_state >= DAST_NOT_READY);

745

746 return 0;

747 }

748

749 static int dnn_detect_activate(AVFilterContext *filter_ctx)

750 {

751 AVFilterLink *inlink = filter_ctx->inputs[0];

752 AVFilterLink *outlink = filter_ctx->outputs[0];

753 DnnDetectContext *ctx = filter_ctx->priv;

754 AVFrame *in = NULL;

755 int64_t pts;

756 int ret, status;

757 int got_frame = 0;

758 int async_state;

759

760 FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);

761

762 do {

763 // drain all input frames

764 ret = ff_inlink_consume_frame(inlink, &in);

765 if (ret < 0)

766 return ret;

767 if (ret > 0) {

768 if (ff_dnn_execute_model(&ctx->dnnctx, in, NULL) != 0) {

769 return AVERROR(EIO);

770 }

771 }

772 } while (ret > 0);

773

774 // drain all processed frames

775 do {

776 AVFrame *in_frame = NULL;

777 AVFrame *out_frame = NULL;

778 async_state = ff_dnn_get_result(&ctx->dnnctx, &in_frame, &out_frame);

779 if (async_state == DAST_SUCCESS) {

780 ret = ff_filter_frame(outlink, in_frame);

781 if (ret < 0)

782 return ret;

783 got_frame = 1;

784 }

785 } while (async_state == DAST_SUCCESS);

786

787 // if frame got, schedule to next filter

788 if (got_frame)

789 return 0;

790

791 if (ff_inlink_acknowledge_status(inlink, &status, &pts)) {

792 if (status == AVERROR_EOF) {

793 int64_t out_pts = pts;

794 ret = dnn_detect_flush_frame(outlink, pts, &out_pts);

795 ff_outlink_set_status(outlink, status, out_pts);

796 return ret;

797 }

798 }

799

800 FF_FILTER_FORWARD_WANTED(outlink, inlink);

801

802 return 0;

803 }

804

805 static av_cold void dnn_detect_uninit(AVFilterContext *context)

806 {

807 DnnDetectContext *ctx = context->priv;

808 AVDetectionBBox *bbox;

809 ff_dnn_uninit(&ctx->dnnctx);

810 while(av_fifo_can_read(ctx->bboxes_fifo)) {

811 av_fifo_read(ctx->bboxes_fifo, &bbox, 1);

812 av_freep(&bbox);

813 }

814 av_fifo_freep2(&ctx->bboxes_fifo);

815 av_freep(&ctx->anchors);

816 free_detect_labels(ctx);

817 }

818

819 static int config_input(AVFilterLink *inlink)

820 {

821 AVFilterContext *context = inlink->dst;

822 DnnDetectContext *ctx = context->priv;

823 DNNData model_input;

824 int ret, width_idx, height_idx;

825

826 ret = ff_dnn_get_input(&ctx->dnnctx, &model_input);

827 if (ret != 0) {

828 av_log(ctx, AV_LOG_ERROR, "could not get input from the model\n");

829 return ret;

830 }

831 width_idx = dnn_get_width_idx_by_layout(model_input.layout);

832 height_idx = dnn_get_height_idx_by_layout(model_input.layout);

833 ctx->scale_width = model_input.dims[width_idx] == -1 ? inlink->w :

834 model_input.dims[width_idx];

835 ctx->scale_height = model_input.dims[height_idx] == -1 ? inlink->h :

836 model_input.dims[height_idx];

837

838 return 0;

839 }

840

841 static const AVFilterPad dnn_detect_inputs[] = {

842 {

843 .name = "default",

844 .type = AVMEDIA_TYPE_VIDEO,

845 .config_props = config_input,

846 },

847 };

848

849 const AVFilter ff_vf_dnn_detect = {

850 .name = "dnn_detect",

851 .description = NULL_IF_CONFIG_SMALL("Apply DNN detect filter to the input."),

852 .priv_size = sizeof(DnnDetectContext),

853 .init = dnn_detect_init,

854 .uninit = dnn_detect_uninit,

855 FILTER_INPUTS(dnn_detect_inputs),

856 FILTER_OUTPUTS(ff_video_default_filterpad),

857 FILTER_PIXFMTS_ARRAY(pix_fmts),

858 .priv_class = &dnn_detect_class,

859 .activate = dnn_detect_activate,

860 };

pix_fmts

static enum AVPixelFormat pix_fmts[]

Definition: vf_dnn_detect.c:712

DnnDetectContext::nb_classes

int nb_classes

Definition: vf_dnn_detect.c:52

AVPixelFormat

Pixel format.

Definition: pixfmt.h:71

dnn_detect_parse_yolo_output

static int dnn_detect_parse_yolo_output(AVFrame *frame, DNNData *output, int output_index, AVFilterContext *filter_ctx)

Definition: vf_dnn_detect.c:149

AVERROR

Filter the word "frame" indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions

opt.h

av_frame_get_side_data

AVFrameSideData * av_frame_get_side_data(const AVFrame *frame, enum AVFrameSideDataType type)

Definition: frame.c:838

ff_filter_frame

int ff_filter_frame(AVFilterLink *link, AVFrame *frame)

Send a frame of data to the next filter.

Definition: avfilter.c:1018

AVERROR_EOF

#define AVERROR_EOF

End of file.

Definition: error.h:57

FILTER_PIXFMTS_ARRAY

#define FILTER_PIXFMTS_ARRAY(array)

Definition: internal.h:162

int64_t

long long int64_t

Definition: coverity.c:34

output

filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce output

Definition: filter_design.txt:225

inlink

The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is inlink

Definition: filter_design.txt:212

av_unused

#define av_unused

Definition: attributes.h:131

av_fifo_peek

int av_fifo_peek(const AVFifo *f, void *buf, size_t nb_elems, size_t offset)

Read data from a FIFO without modifying FIFO state.

Definition: fifo.c:255

AVFrame

This structure describes decoded (raw) audio or video data.

Definition: frame.h:344

AVFrame::pts

int64_t pts

Presentation timestamp in time_base units (time when frame should be shown to user).

Definition: frame.h:456

AVFILTER_DEFINE_CLASS

AVFILTER_DEFINE_CLASS(dnn_detect)

AVFrame::width

int width

Definition: frame.h:416

uint8_t w

Definition: llviddspenc.c:38

read_detect_label_file

static int read_detect_label_file(AVFilterContext *context)

Definition: vf_dnn_detect.c:604

AVOption

AVOption.

Definition: opt.h:346

data

const char data[16]

Definition: mxf.c:148

dnn_detect_post_proc_ssd

static int dnn_detect_post_proc_ssd(AVFrame *frame, DNNData *output, int nb_outputs, AVFilterContext *filter_ctx)

Definition: vf_dnn_detect.c:362

dnn_detect_init

static av_cold int dnn_detect_init(AVFilterContext *context)

Definition: vf_dnn_detect.c:681

output_data

static int output_data(MLPDecodeContext *m, unsigned int substr, AVFrame *frame, int *got_frame_ptr)

Write the audio data into the output buffer.

Definition: mlpdec.c:1108

AV_LOG_VERBOSE

#define AV_LOG_VERBOSE

Detailed information.

Definition: log.h:196

dnn_detect_inputs

static const AVFilterPad dnn_detect_inputs[]

Definition: vf_dnn_detect.c:841

AV_PIX_FMT_BGR24

@ AV_PIX_FMT_BGR24

packed RGB 8:8:8, 24bpp, BGRBGR...

Definition: pixfmt.h:76

FFMAX

#define FFMAX(a, b)

Definition: macros.h:47

AVFilter::name

const char * name

Filter name.

Definition: avfilter.h:170

dnn_filter_common.h

AVDetectionBBox::y

int y

Definition: detection_bbox.h:32

video.h

AVFilterLink

A link between two filters.

Definition: avfilter.h:542

FF_FILTER_FORWARD_STATUS_BACK

#define FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink)

Forward the status on an output link to an input link.

Definition: filters.h:199

ff_inlink_consume_frame

int ff_inlink_consume_frame(AVFilterLink *link, AVFrame **rframe)

Take a frame from the link's FIFO and update the link's stats.

Definition: avfilter.c:1445

fifo.h

AV_DETECTION_BBOX_LABEL_NAME_MAX_SIZE

#define AV_DETECTION_BBOX_LABEL_NAME_MAX_SIZE

Definition: detection_bbox.h:36

dnn_get_width_idx_by_layout

static int dnn_get_width_idx_by_layout(DNNLayout layout)

Definition: dnn_interface.h:137

AVDetectionBBox::detect_label

char detect_label[AV_DETECTION_BBOX_LABEL_NAME_MAX_SIZE]

Detect result with confidence.

Definition: detection_bbox.h:41

AVFilterContext::priv

void * priv

private data for use by the filter

Definition: avfilter.h:422

av_fifo_write

int av_fifo_write(AVFifo *f, const void *buf, size_t nb_elems)

Write data into a FIFO.

Definition: fifo.c:188

DnnContext

Definition: dnn_filter_common.h:29

dnn_detect_IOU

static float dnn_detect_IOU(AVDetectionBBox *bbox1, AVDetectionBBox *bbox2)

Definition: vf_dnn_detect.c:139

filter_ctx

static FilteringContext * filter_ctx

Definition: transcode.c:51

dnn_detect_uninit

static av_cold void dnn_detect_uninit(AVFilterContext *context)

Definition: vf_dnn_detect.c:805

DnnDetectContext

Definition: vf_dnn_detect.c:42

pts

static int64_t pts

Definition: transcode_aac.c:643

DnnDetectContext::model_type

DNNDetectionModelType model_type

Definition: vf_dnn_detect.c:49

AVFilterPad

A filter pad used for either input or output.

Definition: internal.h:33

av_get_detection_bbox

static av_always_inline AVDetectionBBox * av_get_detection_bbox(const AVDetectionBBoxHeader *header, unsigned int idx)

Definition: detection_bbox.h:84

DnnDetectContext::scale_height

int scale_height

Definition: vf_dnn_detect.c:55

dnn_detect_post_proc_ov

static int dnn_detect_post_proc_ov(AVFrame *frame, DNNData *output, int nb_outputs, AVFilterContext *filter_ctx)

Definition: vf_dnn_detect.c:471

DNN_TF

@ DNN_TF

Definition: dnn_interface.h:35

AV_LOG_ERROR

#define AV_LOG_ERROR

Something went wrong and cannot losslessly be recovered.

Definition: log.h:180

av_cold

#define av_cold

Definition: attributes.h:90

av_fifo_read

int av_fifo_read(AVFifo *f, void *buf, size_t nb_elems)

Read data from a FIFO.

Definition: fifo.c:240

ff_video_default_filterpad

const AVFilterPad ff_video_default_filterpad[1]

An AVFilterPad array whose only entry has name "default" and is of type AVMEDIA_TYPE_VIDEO.

Definition: video.c:37

DnnDetectContext::bboxes_fifo

AVFifo * bboxes_fifo

Definition: vf_dnn_detect.c:53

float

Definition: af_crystalizer.c:121

ff_outlink_set_status

static void ff_outlink_set_status(AVFilterLink *link, int status, int64_t pts)

Set the status field of a link from the source filter.

Definition: filters.h:189

ff_dnn_set_detect_post_proc

int ff_dnn_set_detect_post_proc(DnnContext *ctx, DetectPostProc post_proc)

Definition: dnn_filter_common.c:109

av_strtok

char * av_strtok(char *s, const char *delim, char **saveptr)

Split the string into several tokens which can be accessed by successive calls to av_strtok().

Definition: avstring.c:178

free_detect_labels

static void free_detect_labels(DnnDetectContext *ctx)

Definition: vf_dnn_detect.c:595

DNNData

Definition: dnn_interface.h:65

dnn_detect_post_proc_yolov3

static int dnn_detect_post_proc_yolov3(AVFrame *frame, DNNData *output, AVFilterContext *filter_ctx, int nb_outputs)

Definition: vf_dnn_detect.c:347

filters.h

ff_dnn_get_result

DNNAsyncStatusType ff_dnn_get_result(DnnContext *ctx, AVFrame **in_frame, AVFrame **out_frame)

Definition: dnn_filter_common.c:161

ctx

AVFormatContext * ctx

Definition: movenc.c:48

config_input

static int config_input(AVFilterLink *inlink)

Definition: vf_dnn_detect.c:819

linear

static float linear(float x)

Definition: vf_dnn_detect.c:93

ff_vf_dnn_detect

const AVFilter ff_vf_dnn_detect

Definition: vf_dnn_detect.c:849

DnnDetectContext::scale_width

int scale_width

Definition: vf_dnn_detect.c:54

AV_PIX_FMT_YUV420P

@ AV_PIX_FMT_YUV420P

planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)

Definition: pixfmt.h:73

av_usleep

int av_usleep(unsigned usec)

Sleep for a period of time.

Definition: time.c:84

AV_PIX_FMT_GRAYF32

#define AV_PIX_FMT_GRAYF32

Definition: pixfmt.h:511

FILTER_INPUTS

#define FILTER_INPUTS(array)

Definition: internal.h:182

file_open.h

ff_dnn_get_input

int ff_dnn_get_input(DnnContext *ctx, DNNData *input)

Definition: dnn_filter_common.c:121

frame

static AVFrame * frame

Definition: demux_decode.c:54

DNN_OV

@ DNN_OV

Definition: dnn_interface.h:35

context

it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf default minimum maximum flags name is the option keep it simple and lowercase description are in without and describe what they for example set the foo of the bar offset is the offset of the field in your context

Definition: writing_filters.txt:91

AVClass

Describe the class of an AVClass context structure.

Definition: log.h:66

NULL

#define NULL

Definition: coverity.c:32

AVDetectionBBoxHeader

Definition: detection_bbox.h:56

DnnDetectContext::dnnctx

DnnContext dnnctx

Definition: vf_dnn_detect.c:44

DnnDetectContext::cell_h

int cell_h

Definition: vf_dnn_detect.c:51

dnn_detect_activate

static int dnn_detect_activate(AVFilterContext *filter_ctx)

Definition: vf_dnn_detect.c:749

DnnDetectContext::labels_filename

char * labels_filename

Definition: vf_dnn_detect.c:46

av_fifo_can_read

size_t av_fifo_can_read(const AVFifo *f)

Definition: fifo.c:87

DnnDetectContext::labels

char ** labels

Definition: vf_dnn_detect.c:47

DDMT_YOLOV3

@ DDMT_YOLOV3

Definition: vf_dnn_detect.c:38

time.h

AV_PIX_FMT_GRAY8

@ AV_PIX_FMT_GRAY8

Y , 8bpp.

Definition: pixfmt.h:81

exp

int8_t exp

Definition: eval.c:74

ff_dnn_flush

int ff_dnn_flush(DnnContext *ctx)

Definition: dnn_filter_common.c:166

ff_inlink_acknowledge_status

int ff_inlink_acknowledge_status(AVFilterLink *link, int *rstatus, int64_t *rpts)

Test and acknowledge the change of status on the link.

Definition: avfilter.c:1392

FLAGS

#define FLAGS

Definition: vf_dnn_detect.c:63

DDMT_YOLOV4

@ DDMT_YOLOV4

Definition: vf_dnn_detect.c:39

av_detection_bbox_create_side_data

AVDetectionBBoxHeader * av_detection_bbox_create_side_data(AVFrame *frame, uint32_t nb_bboxes)

Allocates memory for AVDetectionBBoxHeader, plus an array of.

Definition: detection_bbox.c:51

DNN_COMMON_OPTIONS

#define DNN_COMMON_OPTIONS

Definition: dnn_filter_common.h:43

DnnContext::backend_type

DNNBackendType backend_type

Definition: dnn_filter_common.h:31

init

int(* init)(AVBSFContext *ctx)

Definition: dts2pts.c:365

AV_PIX_FMT_RGB24

@ AV_PIX_FMT_RGB24

packed RGB 8:8:8, 24bpp, RGBRGB...

Definition: pixfmt.h:75

AVFifo

Definition: fifo.c:35

NULL_IF_CONFIG_SMALL

#define NULL_IF_CONFIG_SMALL(x)

Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.

Definition: internal.h:106

DnnDetectContext::label_count

int label_count

Definition: vf_dnn_detect.c:48

DAST_SUCCESS

@ DAST_SUCCESS

Definition: dnn_interface.h:49

AVDetectionBBox::w

int w

Definition: detection_bbox.h:33

DNNBackendType

Definition: dnn_interface.h:35

DnnContext::nb_outputs

uint32_t nb_outputs

Definition: dnn_filter_common.h:38

av_make_q

static AVRational av_make_q(int num, int den)

Create an AVRational.

Definition: rational.h:71

avpriv_report_missing_feature

void avpriv_report_missing_feature(void *avc, const char *msg,...) av_printf_format(2

Log a generic warning message about a missing feature.

AVFilterLink::src

AVFilterContext * src

source filter

Definition: avfilter.h:543

sigmoid

static float sigmoid(float x)

Definition: vf_dnn_detect.c:89

header

static const uint8_t header[24]

Definition: sdr2.c:68

DNNData::layout

DNNLayout layout

Definition: dnn_interface.h:71

AVDetectionBBox::classify_count

uint32_t classify_count

Definition: detection_bbox.h:51

DDMT_YOLOV1V2

@ DDMT_YOLOV1V2

Definition: vf_dnn_detect.c:37

DnnDetectContext::anchors_str

char * anchors_str

Definition: vf_dnn_detect.c:56

FF_FILTER_FORWARD_WANTED

FF_FILTER_FORWARD_WANTED(outlink, inlink)

dnn_detect_options

static const AVOption dnn_detect_options[]

Definition: vf_dnn_detect.c:64

internal.h

AV_OPT_TYPE_FLOAT

@ AV_OPT_TYPE_FLOAT

Definition: opt.h:238

dnn_detect_flush_frame

static int dnn_detect_flush_frame(AVFilterLink *outlink, int64_t pts, int64_t *out_pts)

Definition: vf_dnn_detect.c:721

dnn_detect_post_proc

static int dnn_detect_post_proc(AVFrame *frame, DNNData *output, uint32_t nb, AVFilterContext *filter_ctx)

Definition: vf_dnn_detect.c:580

uninit

static void uninit(AVBSFContext *ctx)

Definition: pcm_rechunk.c:68

#define i(width, name, range_min, range_max)

Definition: cbs_h2645.c:255

DFT_ANALYTICS_DETECT

@ DFT_ANALYTICS_DETECT

Definition: dnn_interface.h:55

check_output_nb

static int check_output_nb(DnnDetectContext *ctx, DNNBackendType backend_type, int output_nb)

Definition: vf_dnn_detect.c:662

FFMIN

#define FFMIN(a, b)

Definition: macros.h:49

av_mallocz

void * av_mallocz(size_t size)

Allocate a memory block with alignment suitable for all memory accesses (including vectors if availab...

Definition: mem.c:254

AVFilterPad::name

const char * name

Pad name.

Definition: internal.h:39

avpriv_fopen_utf8

FILE * avpriv_fopen_utf8(const char *path, const char *mode)

Open a file using a UTF-8 filename.

Definition: file_open.c:159

dnn_detect_post_proc_yolo

static int dnn_detect_post_proc_yolo(AVFrame *frame, DNNData *output, AVFilterContext *filter_ctx)

Definition: vf_dnn_detect.c:335

DnnDetectContext::confidence

float confidence

Definition: vf_dnn_detect.c:45

av_cmp_q

static int av_cmp_q(AVRational a, AVRational b)

Compare two rationals.

Definition: rational.h:89

AVFilter

Filter definition.

Definition: avfilter.h:166

ret

Definition: filter_design.txt:187

AV_PIX_FMT_NV12

@ AV_PIX_FMT_NV12

planar YUV 4:2:0, 12bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...

Definition: pixfmt.h:96

AVDetectionBBox::h

int h

Definition: detection_bbox.h:34

av_fifo_alloc2

AVFifo * av_fifo_alloc2(size_t nb_elems, size_t elem_size, unsigned int flags)

Allocate and initialize an AVFifo with a given element size.

Definition: fifo.c:47

AVDetectionBBox::detect_confidence

AVRational detect_confidence

Definition: detection_bbox.h:42

av_dynarray_add_nofree

int av_dynarray_add_nofree(void *tab_ptr, int *nb_ptr, void *elem)

Add an element to a dynamic array.

Definition: mem.c:313

AVFrame::height

int height

Definition: frame.h:416

DDMT_SSD

@ DDMT_SSD

Definition: vf_dnn_detect.c:36

status

ov_status_e status

Definition: dnn_backend_openvino.c:120

DNNDetectionModelType

Definition: vf_dnn_detect.c:35

AV_PIX_FMT_NONE

@ AV_PIX_FMT_NONE

Definition: pixfmt.h:72

AV_OPT_TYPE_INT

@ AV_OPT_TYPE_INT

Definition: opt.h:235

AVDetectionBBox::x

int x

Distance in pixels from the left/top edge of the frame, together with width and height,...

Definition: detection_bbox.h:31

DnnDetectContext::nb_anchor

int nb_anchor

Definition: vf_dnn_detect.c:58

AV_PIX_FMT_YUV444P

@ AV_PIX_FMT_YUV444P

planar YUV 4:4:4, 24bpp, (1 Cr & Cb sample per 1x1 Y samples)

Definition: pixfmt.h:78

AVFilterContext

An instance of a filter.

Definition: avfilter.h:407

DNNData::dims

int dims[4]

Definition: dnn_interface.h:67

av_strdup

char * av_strdup(const char *s)

Duplicate a string.

Definition: mem.c:270

AVMEDIA_TYPE_VIDEO

@ AVMEDIA_TYPE_VIDEO

Definition: avutil.h:201

AV_PIX_FMT_YUV422P

@ AV_PIX_FMT_YUV422P

planar YUV 4:2:2, 16bpp, (1 Cr & Cb sample per 2x1 Y samples)

Definition: pixfmt.h:77

OFFSET

#define OFFSET(x)

Definition: vf_dnn_detect.c:61

dnn_detect_fill_side_data

static int dnn_detect_fill_side_data(AVFrame *frame, AVFilterContext *filter_ctx)

Definition: vf_dnn_detect.c:286

dnn_get_height_idx_by_layout

static int dnn_get_height_idx_by_layout(DNNLayout layout)

Definition: dnn_interface.h:142

AVFrameSideData

Structure to hold side data for an AVFrame.

Definition: frame.h:250

DnnDetectContext::cell_w

int cell_w

Definition: vf_dnn_detect.c:50

DnnDetectContext::anchors

float * anchors

Definition: vf_dnn_detect.c:57

FILTER_OUTPUTS

#define FILTER_OUTPUTS(array)

Definition: internal.h:183

ff_dnn_init

int ff_dnn_init(DnnContext *ctx, DNNFunctionType func_type, AVFilterContext *filter_ctx)

Definition: dnn_filter_common.c:54

dnn_detect_get_label_id

static int dnn_detect_get_label_id(int nb_classes, int cell_size, float *label_data)

Definition: vf_dnn_detect.c:97

av_freep

#define av_freep(p)

Definition: tableprint_vlc.h:34

AV_PIX_FMT_YUV411P

@ AV_PIX_FMT_YUV411P

planar YUV 4:1:1, 12bpp, (1 Cr & Cb sample per 4x1 Y samples)

Definition: pixfmt.h:80

AV_PIX_FMT_YUV410P

@ AV_PIX_FMT_YUV410P

planar YUV 4:1:0, 9bpp, (1 Cr & Cb sample per 4x4 Y samples)

Definition: pixfmt.h:79

av_strlcpy

size_t av_strlcpy(char *dst, const char *src, size_t size)

Copy the string src to dst, but no more than size - 1 bytes, and null-terminate dst.

Definition: avstring.c:85

av_log

#define av_log(a,...)

Definition: tableprint_vlc.h:27

av_fifo_freep2

void av_fifo_freep2(AVFifo **f)

Free an AVFifo and reset pointer to NULL.

Definition: fifo.c:286

ff_dnn_uninit

void ff_dnn_uninit(DnnContext *ctx)

Definition: dnn_filter_common.c:171

AVDetectionBBox

Definition: detection_bbox.h:26

Definition: vp9dsp_template.c:2038

dnn_detect_post_proc_tf

static int dnn_detect_post_proc_tf(AVFrame *frame, DNNData *output, AVFilterContext *filter_ctx)

Definition: vf_dnn_detect.c:505

ff_dnn_execute_model

int ff_dnn_execute_model(DnnContext *ctx, AVFrame *in_frame, AVFrame *out_frame)

Definition: dnn_filter_common.c:134

avstring.h

AV_OPT_TYPE_STRING

@ AV_OPT_TYPE_STRING

Definition: opt.h:239

dnn_detect_parse_anchors

static int dnn_detect_parse_anchors(char *anchors_str, float **anchors)

Definition: vf_dnn_detect.c:110

DAST_NOT_READY

@ DAST_NOT_READY

Definition: dnn_interface.h:48

int

Definition: ffmpeg_filter.c:409

DNNAsyncStatusType

Definition: dnn_interface.h:45

AV_OPT_TYPE_CONST

@ AV_OPT_TYPE_CONST

Definition: opt.h:244

snprintf

#define snprintf

Definition: snprintf.h:34

OFFSET2

#define OFFSET2(x)

Definition: vf_dnn_detect.c:62

detection_bbox.h

AV_FIFO_FLAG_AUTO_GROW

#define AV_FIFO_FLAG_AUTO_GROW

Automatically resize the FIFO on writes, so that the data fits.

Definition: fifo.h:67

AV_FRAME_DATA_DETECTION_BBOXES

@ AV_FRAME_DATA_DETECTION_BBOXES

Bounding boxes for object detection and classification, as described by AVDetectionBBoxHeader.

Definition: frame.h:194

Generated on Thu Sep 26 2024 23:15:36 for FFmpeg by doxygen 1.8.17