FFmpeg: libavfilter/vf_deshake_opencl.c Source File

FFmpeg

[フレーム]

libavfilter

vf_deshake_opencl.c

Go to the documentation of this file.

1 /*

2 * This file is part of FFmpeg.

3 *

4 * FFmpeg is free software; you can redistribute it and/or

5 * modify it under the terms of the GNU Lesser General Public

6 * License as published by the Free Software Foundation; either

7 * version 2.1 of the License, or (at your option) any later version.

8 *

9 * FFmpeg is distributed in the hope that it will be useful,

10 * but WITHOUT ANY WARRANTY; without even the implied warranty of

11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

12 * Lesser General Public License for more details.

13 *

14 * You should have received a copy of the GNU Lesser General Public

15 * License along with FFmpeg; if not, write to the Free Software

16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

17 *

21 * Third party copyrights are property of their respective owners.

22 *

23 * Redistribution and use in source and binary forms, with or without modification,

24 * are permitted provided that the following conditions are met:

25 *

26 * * Redistribution's of source code must retain the above copyright notice,

27 * this list of conditions and the following disclaimer.

28 *

29 * * Redistribution's in binary form must reproduce the above copyright notice,

30 * this list of conditions and the following disclaimer in the documentation

31 * and/or other materials provided with the distribution.

32 *

33 * * The name of the copyright holders may not be used to endorse or promote products

34 * derived from this software without specific prior written permission.

35 *

36 * This software is provided by the copyright holders and contributors "as is" and

37 * any express or implied warranties, including, but not limited to, the implied

38 * warranties of merchantability and fitness for a particular purpose are disclaimed.

39 * In no event shall the Intel Corporation or contributors be liable for any direct,

40 * indirect, incidental, special, exemplary, or consequential damages

41 * (including, but not limited to, procurement of substitute goods or services;

42 * loss of use, data, or profits; or business interruption) however caused

43 * and on any theory of liability, whether in contract, strict liability,

44 * or tort (including negligence or otherwise) arising in any way out of

45 * the use of this software, even if advised of the possibility of such damage.

46 */

48 #include <float.h>

49 #include "libavutil/lfg.h"

50 #include "libavutil/opt.h"

51 #include "libavutil/mem.h"

52 #include "libavutil/fifo.h"

53 #include "libavutil/common.h"

54 #include "libavutil/avassert.h"

55 #include "libavutil/pixdesc.h"

56 #include "libavutil/pixfmt.h"

57 #include "avfilter.h"

58 #include "framequeue.h"

59 #include "filters.h"

60 #include "transform.h"

61 #include "opencl.h"

62 #include "opencl_source.h"

63 #include "video.h"

65 /*

66 This filter matches feature points between frames (dealing with outliers) and then

67 uses the matches to estimate an affine transform between frames. This transform is

68 decomposed into various values (translation, scale, rotation) and the values are

69 summed relative to the start of the video to obtain on absolute camera position

70 for each frame. This "camera path" is then smoothed via a gaussian filter, resulting

71 in a new path that is turned back into an affine transform and applied to each

72 frame to render it.

74 High-level overview:

76 All of the work to extract motion data from frames occurs in queue_frame. Motion data

77 is buffered in a smoothing window, so queue_frame simply computes the absolute camera

78 positions and places them in ringbuffers.

80 filter_frame is responsible for looking at the absolute camera positions currently

81 in the ringbuffers, applying the gaussian filter, and then transforming the frames.

82 */

84 // Number of bits for BRIEF descriptors

85 #define BREIFN 512

86 // Size of the patch from which a BRIEF descriptor is extracted

87 // This is the size used in OpenCV

88 #define BRIEF_PATCH_SIZE 31

89 #define BRIEF_PATCH_SIZE_HALF (BRIEF_PATCH_SIZE / 2)

91 #define MATCHES_CONTIG_SIZE 2000

93 #define ROUNDED_UP_DIV(a, b) ((a + (b - 1)) / b)

95 typedef struct PointPair {

96 // Previous frame

97 cl_float2 p1;

98 // Current frame

99 cl_float2 p2;

100 } PointPair;

101

102 typedef struct MotionVector {

103 PointPair p;

104 // Used to mark vectors as potential outliers

105 cl_int should_consider;

106 } MotionVector;

107

108 // Denotes the indices for the different types of motion in the ringbuffers array

109 enum RingbufferIndices {

110 RingbufX,

111 RingbufY,

112 RingbufRot,

113 RingbufScaleX,

114 RingbufScaleY,

115

116 // Should always be last

117 RingbufCount

118 };

119

120 // Struct that holds data for drawing point match debug data

121 typedef struct DebugMatches {

122 MotionVector *matches;

123 // The points used to calculate the affine transform for a frame

124 MotionVector model_matches[3];

125

126 int num_matches;

127 // For cases where we couldn't calculate a model

128 int num_model_matches;

129 } DebugMatches;

130

131 // Groups together the ringbuffers that store absolute distortion / position values

132 // for each frame

133 typedef struct AbsoluteFrameMotion {

134 // Array with the various ringbuffers, indexed via the RingbufferIndices enum

135 AVFifo *ringbuffers[RingbufCount];

136

137 // Offset to get to the current frame being processed

138 // (not in bytes)

139 int curr_frame_offset;

140 // Keeps track of where the start and end of contiguous motion data is (to

141 // deal with cases where no motion data is found between two frames)

142 int data_start_offset;

143 int data_end_offset;

144

145 AVFifo *debug_matches;

146 } AbsoluteFrameMotion;

147

148 // Takes care of freeing the arrays within the DebugMatches inside of the

149 // debug_matches ringbuffer and then freeing the buffer itself.

150 static void free_debug_matches(AbsoluteFrameMotion *afm) {

151 DebugMatches dm;

152

153 if (!afm->debug_matches) {

154 return;

155 }

156

157 while (av_fifo_read(afm->debug_matches, &dm, 1) >= 0)

158 av_freep(&dm.matches);

159

160 av_fifo_freep2(&afm->debug_matches);

161 }

162

163 // Stores the translation, scale, rotation, and skew deltas between two frames

164 typedef struct FrameDelta {

165 cl_float2 translation;

166 float rotation;

167 cl_float2 scale;

168 cl_float2 skew;

169 } FrameDelta;

170

171 typedef struct SimilarityMatrix {

172 // The 2x3 similarity matrix

173 double matrix[6];

174 } SimilarityMatrix;

175

176 typedef struct CropInfo {

177 // The top left corner of the bounding box for the crop

178 cl_float2 top_left;

179 // The bottom right corner of the bounding box for the crop

180 cl_float2 bottom_right;

181 } CropInfo;

182

183 // Returned from function that determines start and end values for iteration

184 // around the current frame in a ringbuffer

185 typedef struct IterIndices {

186 int start;

187 int end;

188 } IterIndices;

189

190 typedef struct DeshakeOpenCLContext {

191 OpenCLFilterContext ocf;

192 // Whether or not the above `OpenCLFilterContext` has been initialized

193 int initialized;

194

195 // These variables are used in the activate callback

196 int64_t duration;

197 int eof;

198

199 // State for random number generation

200 AVLFG alfg;

201

202 // FIFO frame queue used to buffer future frames for processing

203 FFFrameQueue fq;

204 // Ringbuffers for frame positions

205 AbsoluteFrameMotion abs_motion;

206

207 // The number of frames' motion to consider before and after the frame we are

208 // smoothing

209 int smooth_window;

210 // The number of the frame we are currently processing

211 int curr_frame;

212

213 // Stores a 1d array of normalised gaussian kernel values for convolution

214 float *gauss_kernel;

215

216 // Buffer for error values used in RANSAC code

217 float *ransac_err;

218

219 // Information regarding how to crop the smoothed luminance (or RGB) planes

220 CropInfo crop_y;

221 // Information regarding how to crop the smoothed chroma planes

222 CropInfo crop_uv;

223

224 // Whether or not we are processing YUV input (as oppposed to RGB)

225 int is_yuv;

226 // The underlying format of the hardware surfaces

227 int sw_format;

228

229 // Buffer to copy `matches` into for the CPU to work with

230 MotionVector *matches_host;

231 MotionVector *matches_contig_host;

232

233 MotionVector *inliers;

234

235 cl_command_queue command_queue;

236 cl_kernel kernel_grayscale;

237 cl_kernel kernel_harris_response;

238 cl_kernel kernel_refine_features;

239 cl_kernel kernel_brief_descriptors;

240 cl_kernel kernel_match_descriptors;

241 cl_kernel kernel_transform;

242 cl_kernel kernel_crop_upscale;

243

244 // Stores a frame converted to grayscale

245 cl_mem grayscale;

246 // Stores the harris response for a frame (measure of "cornerness" for each pixel)

247 cl_mem harris_buf;

248

249 // Detected features after non-maximum suppression and sub-pixel refinement

250 cl_mem refined_features;

251 // Saved from the previous frame

252 cl_mem prev_refined_features;

253

254 // BRIEF sampling pattern that is randomly initialized

255 cl_mem brief_pattern;

256 // Feature point descriptors for the current frame

257 cl_mem descriptors;

258 // Feature point descriptors for the previous frame

259 cl_mem prev_descriptors;

260 // Vectors between points in current and previous frame

261 cl_mem matches;

262 cl_mem matches_contig;

263 // Holds the matrix to transform luminance (or RGB) with

264 cl_mem transform_y;

265 // Holds the matrix to transform chroma with

266 cl_mem transform_uv;

267

268 // Configurable options

269

270 int tripod_mode;

271 int debug_on;

272 int should_crop;

273

274 // Whether or not feature points should be refined at a sub-pixel level

275 cl_int refine_features;

276 // If the user sets a value other than the default, 0, this percentage is

277 // translated into a sigma value ranging from 0.5 to 40.0

278 float smooth_percent;

279 // This number is multiplied by the video frame rate to determine the size

280 // of the smooth window

281 float smooth_window_multiplier;

282

283 // Debug stuff

284

285 cl_kernel kernel_draw_debug_info;

286 cl_mem debug_matches;

287 cl_mem debug_model_matches;

288

289 // These store the total time spent executing the different kernels in nanoseconds

290 unsigned long long grayscale_time;

291 unsigned long long harris_response_time;

292 unsigned long long refine_features_time;

293 unsigned long long brief_descriptors_time;

294 unsigned long long match_descriptors_time;

295 unsigned long long transform_time;

296 unsigned long long crop_upscale_time;

297

298 // Time spent copying matched features from the device to the host

299 unsigned long long read_buf_time;

300 } DeshakeOpenCLContext;

301

302 // Returns a random uniformly-distributed number in [low, high]

303 static int rand_in(int low, int high, AVLFG *alfg) {

304 return (av_lfg_get(alfg) % (high - low)) + low;

305 }

306

307 // Returns the average execution time for an event given the total time and the

308 // number of frames processed.

309 static double averaged_event_time_ms(unsigned long long total_time, int num_frames) {

310 return (double)total_time / (double)num_frames / 1000000.0;

311 }

312

313 // The following code is loosely ported from OpenCV

314

315 // Estimates affine transform from 3 point pairs

316 // model is a 2x3 matrix:

317 // a b c

318 // d e f

319 static void run_estimate_kernel(const MotionVector *point_pairs, double *model)

320 {

321 // src points

322 double x1 = point_pairs[0].p.p1.s[0];

323 double y1 = point_pairs[0].p.p1.s[1];

324 double x2 = point_pairs[1].p.p1.s[0];

325 double y2 = point_pairs[1].p.p1.s[1];

326 double x3 = point_pairs[2].p.p1.s[0];

327 double y3 = point_pairs[2].p.p1.s[1];

328

329 // dest points

330 double X1 = point_pairs[0].p.p2.s[0];

331 double Y1 = point_pairs[0].p.p2.s[1];

332 double X2 = point_pairs[1].p.p2.s[0];

333 double Y2 = point_pairs[1].p.p2.s[1];

334 double X3 = point_pairs[2].p.p2.s[0];

335 double Y3 = point_pairs[2].p.p2.s[1];

336

337 double d = 1.0 / ( x1*(y2-y3) + x2*(y3-y1) + x3*(y1-y2) );

338

339 model[0] = d * ( X1*(y2-y3) + X2*(y3-y1) + X3*(y1-y2) );

340 model[1] = d * ( X1*(x3-x2) + X2*(x1-x3) + X3*(x2-x1) );

341 model[2] = d * ( X1*(x2*y3 - x3*y2) + X2*(x3*y1 - x1*y3) + X3*(x1*y2 - x2*y1) );

342

343 model[3] = d * ( Y1*(y2-y3) + Y2*(y3-y1) + Y3*(y1-y2) );

344 model[4] = d * ( Y1*(x3-x2) + Y2*(x1-x3) + Y3*(x2-x1) );

345 model[5] = d * ( Y1*(x2*y3 - x3*y2) + Y2*(x3*y1 - x1*y3) + Y3*(x1*y2 - x2*y1) );

346 }

347

348 // Checks that the 3 points in the given array are not collinear

349 static int points_not_collinear(const cl_float2 **points)

350 {

351 int j, k, i = 2;

352

353 for (j = 0; j < i; j++) {

354 double dx1 = points[j]->s[0] - points[i]->s[0];

355 double dy1 = points[j]->s[1] - points[i]->s[1];

356

357 for (k = 0; k < j; k++) {

358 double dx2 = points[k]->s[0] - points[i]->s[0];

359 double dy2 = points[k]->s[1] - points[i]->s[1];

360

361 // Assuming a 3840 x 2160 video with a point at (0, 0) and one at

362 // (3839, 2159), this prevents a third point from being within roughly

363 // 0.5 of a pixel of the line connecting the two on both axes

364 if (fabs(dx2*dy1 - dy2*dx1) <= 1.0) {

365 return 0;

366 }

367 }

368 }

369

370 return 1;

371 }

372

373 // Checks a subset of 3 point pairs to make sure that the points are not collinear

374 // and not too close to each other

375 static int check_subset(const MotionVector *pairs_subset)

376 {

377 const cl_float2 *prev_points[] = {

378 &pairs_subset[0].p.p1,

379 &pairs_subset[1].p.p1,

380 &pairs_subset[2].p.p1

381 };

382

383 const cl_float2 *curr_points[] = {

384 &pairs_subset[0].p.p2,

385 &pairs_subset[1].p.p2,

386 &pairs_subset[2].p.p2

387 };

388

389 return points_not_collinear(prev_points) && points_not_collinear(curr_points);

390 }

391

392 // Selects a random subset of 3 points from point_pairs and places them in pairs_subset

393 static int get_subset(

394 AVLFG *alfg,

395 const MotionVector *point_pairs,

396 const int num_point_pairs,

397 MotionVector *pairs_subset,

398 int max_attempts

399 ) {

400 int idx[3];

401 int i = 0, j, iters = 0;

402

403 for (; iters < max_attempts; iters++) {

404 for (i = 0; i < 3 && iters < max_attempts;) {

405 int idx_i = 0;

406

407 for (;;) {

408 idx_i = idx[i] = rand_in(0, num_point_pairs, alfg);

409

410 for (j = 0; j < i; j++) {

411 if (idx_i == idx[j]) {

412 break;

413 }

414 }

415

416 if (j == i) {

417 break;

418 }

419 }

420

421 pairs_subset[i] = point_pairs[idx[i]];

422 i++;

423 }

424

425 if (i == 3 && !check_subset(pairs_subset)) {

426 continue;

427 }

428 break;

429 }

430

431 return i == 3 && iters < max_attempts;

432 }

433

434 // Computes the error for each of the given points based on the given model.

435 static void compute_error(

436 const MotionVector *point_pairs,

437 const int num_point_pairs,

438 const double *model,

439 float *err

440 ) {

441 double F0 = model[0], F1 = model[1], F2 = model[2];

442 double F3 = model[3], F4 = model[4], F5 = model[5];

443

444 for (int i = 0; i < num_point_pairs; i++) {

445 const cl_float2 *f = &point_pairs[i].p.p1;

446 const cl_float2 *t = &point_pairs[i].p.p2;

447

448 double a = F0*f->s[0] + F1*f->s[1] + F2 - t->s[0];

449 double b = F3*f->s[0] + F4*f->s[1] + F5 - t->s[1];

450

451 err[i] = a*a + b*b;

452 }

453 }

454

455 // Determines which of the given point matches are inliers for the given model

456 // based on the specified threshold.

457 //

458 // err must be an array of num_point_pairs length

459 static int find_inliers(

460 MotionVector *point_pairs,

461 const int num_point_pairs,

462 const double *model,

463 float *err,

464 double thresh

465 ) {

466 float t = (float)(thresh * thresh);

467 int i, n = num_point_pairs, num_inliers = 0;

468

469 compute_error(point_pairs, num_point_pairs, model, err);

470

471 for (i = 0; i < n; i++) {

472 if (err[i] <= t) {

473 // This is an inlier

474 point_pairs[i].should_consider = 1;

475 num_inliers += 1;

476 } else {

477 point_pairs[i].should_consider = 0;

478 }

479 }

480

481 return num_inliers;

482 }

483

484 // Determines the number of iterations required to achieve the desired confidence level.

485 //

486 // The equation used to determine the number of iterations to do is:

487 // 1 - confidence = (1 - inlier_probability^num_points)^num_iters

488 //

489 // Solving for num_iters:

490 //

491 // num_iters = log(1 - confidence) / log(1 - inlier_probability^num_points)

492 //

493 // A more in-depth explanation can be found at https://en.wikipedia.org/wiki/Random_sample_consensus

494 // under the 'Parameters' heading

495 static int ransac_update_num_iters(double confidence, double num_outliers, int max_iters)

496 {

497 double num, denom;

498

499 confidence = av_clipd(confidence, 0.0, 1.0);

500 num_outliers = av_clipd(num_outliers, 0.0, 1.0);

501

502 // avoid inf's & nan's

503 num = FFMAX(1.0 - confidence, DBL_MIN);

504 denom = 1.0 - pow(1.0 - num_outliers, 3);

505 if (denom < DBL_MIN) {

506 return 0;

507 }

508

509 num = log(num);

510 denom = log(denom);

511

512 return denom >= 0 || -num >= max_iters * (-denom) ? max_iters : (int)round(num / denom);

513 }

514

515 // Estimates an affine transform between the given pairs of points using RANdom

516 // SAmple Consensus

517 static int estimate_affine_2d(

518 DeshakeOpenCLContext *deshake_ctx,

519 MotionVector *point_pairs,

520 DebugMatches *debug_matches,

521 const int num_point_pairs,

522 double *model_out,

523 const double threshold,

524 const int max_iters,

525 const double confidence

526 ) {

527 int result = 0;

528 double best_model[6], model[6];

529 MotionVector pairs_subset[3], best_pairs[3];

530

531 int iter, niters = FFMAX(max_iters, 1);

532 int good_count, max_good_count = 0;

533

534 // We need at least 3 points to build a model from

535 if (num_point_pairs < 3) {

536 return 0;

537 } else if (num_point_pairs == 3) {

538 // There are only 3 points, so RANSAC doesn't apply here

539 run_estimate_kernel(point_pairs, model_out);

540

541 for (int i = 0; i < 3; ++i) {

542 point_pairs[i].should_consider = 1;

543 }

544

545 return 1;

546 }

547

548 for (iter = 0; iter < niters; ++iter) {

549 int found = get_subset(&deshake_ctx->alfg, point_pairs, num_point_pairs, pairs_subset, 10000);

550

551 if (!found) {

552 if (iter == 0) {

553 return 0;

554 }

555

556 break;

557 }

558

559 run_estimate_kernel(pairs_subset, model);

560 good_count = find_inliers(point_pairs, num_point_pairs, model, deshake_ctx->ransac_err, threshold);

561

562 if (good_count > FFMAX(max_good_count, 2)) {

563 for (int mi = 0; mi < 6; ++mi) {

564 best_model[mi] = model[mi];

565 }

566

567 for (int pi = 0; pi < 3; pi++) {

568 best_pairs[pi] = pairs_subset[pi];

569 }

570

571 max_good_count = good_count;

572 niters = ransac_update_num_iters(

573 confidence,

574 (double)(num_point_pairs - good_count) / num_point_pairs,

575 niters

576 );

577 }

578 }

579

580 if (max_good_count > 0) {

581 for (int mi = 0; mi < 6; ++mi) {

582 model_out[mi] = best_model[mi];

583 }

584

585 for (int pi = 0; pi < 3; ++pi) {

586 debug_matches->model_matches[pi] = best_pairs[pi];

587 }

588 debug_matches->num_model_matches = 3;

589

590 // Find the inliers again for the best model for debugging

591 find_inliers(point_pairs, num_point_pairs, best_model, deshake_ctx->ransac_err, threshold);

592 result = 1;

593 }

594

595 return result;

596 }

597

598 // "Wiggles" the first point in best_pairs around a tiny bit in order to decrease the

599 // total error

600 static void optimize_model(

601 DeshakeOpenCLContext *deshake_ctx,

602 MotionVector *best_pairs,

603 MotionVector *inliers,

604 const int num_inliers,

605 float best_err,

606 double *model_out

607 ) {

608 float move_x_val = 0.01;

609 float move_y_val = 0.01;

610 int move_x = 1;

611 float old_move_x_val = 0;

612 double model[6];

613 int last_changed = 0;

614

615 for (int iters = 0; iters < 200; iters++) {

616 float total_err = 0;

617

618 if (move_x) {

619 best_pairs[0].p.p2.s[0] += move_x_val;

620 } else {

621 best_pairs[0].p.p2.s[0] += move_y_val;

622 }

623

624 run_estimate_kernel(best_pairs, model);

625 compute_error(inliers, num_inliers, model, deshake_ctx->ransac_err);

626

627 for (int j = 0; j < num_inliers; j++) {

628 total_err += deshake_ctx->ransac_err[j];

629 }

630

631 if (total_err < best_err) {

632 for (int mi = 0; mi < 6; ++mi) {

633 model_out[mi] = model[mi];

634 }

635

636 best_err = total_err;

637 last_changed = iters;

638 } else {

639 // Undo the change

640 if (move_x) {

641 best_pairs[0].p.p2.s[0] -= move_x_val;

642 } else {

643 best_pairs[0].p.p2.s[0] -= move_y_val;

644 }

645

646 if (iters - last_changed > 4) {

647 // We've already improved the model as much as we can

648 break;

649 }

650

651 old_move_x_val = move_x_val;

652

653 if (move_x) {

654 move_x_val *= -1;

655 } else {

656 move_y_val *= -1;

657 }

658

659 if (old_move_x_val < 0) {

660 move_x = 0;

661 } else {

662 move_x = 1;

663 }

664 }

665 }

666 }

667

668 // Uses a process similar to that of RANSAC to find a transform that minimizes

669 // the total error for a set of point matches determined to be inliers

670 //

671 // (Pick random subsets, compute model, find total error, iterate until error

672 // is minimized.)

673 static int minimize_error(

674 DeshakeOpenCLContext *deshake_ctx,

675 MotionVector *inliers,

676 DebugMatches *debug_matches,

677 const int num_inliers,

678 double *model_out,

679 const int max_iters

680 ) {

681 int result = 0;

682 float best_err = FLT_MAX;

683 double best_model[6], model[6];

684 MotionVector pairs_subset[3], best_pairs[3];

685

686 for (int i = 0; i < max_iters; i++) {

687 float total_err = 0;

688 int found = get_subset(&deshake_ctx->alfg, inliers, num_inliers, pairs_subset, 10000);

689

690 if (!found) {

691 if (i == 0) {

692 return 0;

693 }

694

695 break;

696 }

697

698 run_estimate_kernel(pairs_subset, model);

699 compute_error(inliers, num_inliers, model, deshake_ctx->ransac_err);

700

701 for (int j = 0; j < num_inliers; j++) {

702 total_err += deshake_ctx->ransac_err[j];

703 }

704

705 if (i == 0 || total_err < best_err) {

706 for (int mi = 0; mi < 6; ++mi) {

707 best_model[mi] = model[mi];

708 }

709

710 for (int pi = 0; pi < 3; pi++) {

711 best_pairs[pi] = pairs_subset[pi];

712 }

713

714 best_err = total_err;

715 }

716 }

717

718 for (int mi = 0; mi < 6; ++mi) {

719 model_out[mi] = best_model[mi];

720 }

721

722 for (int pi = 0; pi < 3; ++pi) {

723 debug_matches->model_matches[pi] = best_pairs[pi];

724 }

725 debug_matches->num_model_matches = 3;

726 result = 1;

727

728 optimize_model(deshake_ctx, best_pairs, inliers, num_inliers, best_err, model_out);

729 return result;

730 }

731

732 // End code from OpenCV

733

734 // Decomposes a similarity matrix into translation, rotation, scale, and skew

735 //

736 // See http://frederic-wang.fr/decomposition-of-2d-transform-matrices.html

737 static FrameDelta decompose_transform(double *model)

738 {

739 FrameDelta ret;

740

741 double a = model[0];

742 double c = model[1];

743 double e = model[2];

744 double b = model[3];

745 double d = model[4];

746 double f = model[5];

747 double delta = a * d - b * c;

748

749 memset(&ret, 0, sizeof(ret));

750

751 ret.translation.s[0] = e;

752 ret.translation.s[1] = f;

753

754 // This is the QR method

755 if (a != 0 || b != 0) {

756 double r = hypot(a, b);

757

758 ret.rotation = FFSIGN(b) * acos(a / r);

759 ret.scale.s[0] = r;

760 ret.scale.s[1] = delta / r;

761 ret.skew.s[0] = atan((a * c + b * d) / (r * r));

762 ret.skew.s[1] = 0;

763 } else if (c != 0 || d != 0) {

764 double s = sqrt(c * c + d * d);

765

766 ret.rotation = M_PI / 2 - FFSIGN(d) * acos(-c / s);

767 ret.scale.s[0] = delta / s;

768 ret.scale.s[1] = s;

769 ret.skew.s[0] = 0;

770 ret.skew.s[1] = atan((a * c + b * d) / (s * s));

771 } // otherwise there is only translation

772

773 return ret;

774 }

775

776 // Move valid vectors from the 2d buffer into a 1d buffer where they are contiguous

777 static int make_vectors_contig(

778 DeshakeOpenCLContext *deshake_ctx,

779 int size_y,

780 int size_x

781 ) {

782 int num_vectors = 0;

783

784 for (int i = 0; i < size_y; ++i) {

785 for (int j = 0; j < size_x; ++j) {

786 MotionVector v = deshake_ctx->matches_host[j + i * size_x];

787

788 if (v.should_consider) {

789 deshake_ctx->matches_contig_host[num_vectors] = v;

790 ++num_vectors;

791 }

792

793 // Make sure we do not exceed the amount of space we allocated for these vectors

794 if (num_vectors == MATCHES_CONTIG_SIZE - 1) {

795 return num_vectors;

796 }

797 }

798 }

799 return num_vectors;

800 }

801

802 // Returns the gaussian kernel value for the given x coordinate and sigma value

803 static float gaussian_for(int x, float sigma) {

804 return 1.0f / expf(((float)x * (float)x) / (2.0f * sigma * sigma));

805 }

806

807 // Makes a normalized gaussian kernel of the given length for the given sigma

808 // and places it in gauss_kernel

809 static void make_gauss_kernel(float *gauss_kernel, float length, float sigma)

810 {

811 float gauss_sum = 0;

812 int window_half = length / 2;

813

814 for (int i = 0; i < length; ++i) {

815 float val = gaussian_for(i - window_half, sigma);

816

817 gauss_sum += val;

818 gauss_kernel[i] = val;

819 }

820

821 // Normalize the gaussian values

822 for (int i = 0; i < length; ++i) {

823 gauss_kernel[i] /= gauss_sum;

824 }

825 }

826

827 // Returns indices to start and end iteration at in order to iterate over a window

828 // of length size centered at the current frame in a ringbuffer

829 //

830 // Always returns numbers that result in a window of length size, even if that

831 // means specifying negative indices or indices past the end of the values in the

832 // ringbuffers. Make sure you clip indices appropriately within your loop.

833 static IterIndices start_end_for(DeshakeOpenCLContext *deshake_ctx, int length) {

834 IterIndices indices;

835

836 indices.start = deshake_ctx->abs_motion.curr_frame_offset - (length / 2);

837 indices.end = deshake_ctx->abs_motion.curr_frame_offset + (length / 2) + (length % 2);

838

839 return indices;

840 }

841

842 // Sets val to the value in the given ringbuffer at the given offset, taking care of

843 // clipping the offset into the appropriate range

844 static void ringbuf_float_at(

845 DeshakeOpenCLContext *deshake_ctx,

846 AVFifo *values,

847 float *val,

848 int offset

849 ) {

850 int clip_start, clip_end, offset_clipped;

851 if (deshake_ctx->abs_motion.data_end_offset != -1) {

852 clip_end = deshake_ctx->abs_motion.data_end_offset;

853 } else {

854 // This expression represents the last valid index in the buffer,

855 // which we use repeatedly at the end of the video.

856 clip_end = deshake_ctx->smooth_window - av_fifo_can_write(values) - 1;

857 }

858

859 if (deshake_ctx->abs_motion.data_start_offset != -1) {

860 clip_start = deshake_ctx->abs_motion.data_start_offset;

861 } else {

862 // Negative indices will occur at the start of the video, and we want

863 // them to be clipped to 0 in order to repeatedly use the position of

864 // the first frame.

865 clip_start = 0;

866 }

867

868 offset_clipped = av_clip(

869 offset,

870 clip_start,

871 clip_end

872 );

873

874 av_fifo_peek(values, val, 1, offset_clipped);

875 }

876

877 // Returns smoothed current frame value of the given buffer of floats based on the

878 // given Gaussian kernel and its length (also the window length, centered around the

879 // current frame) and the "maximum value" of the motion.

880 //

881 // This "maximum value" should be the width / height of the image in the case of

882 // translation and an empirically chosen constant for rotation / scale.

883 //

884 // The sigma chosen to generate the final gaussian kernel with used to smooth the

885 // camera path is either hardcoded (set by user, deshake_ctx->smooth_percent) or

886 // adaptively chosen.

887 static float smooth(

888 DeshakeOpenCLContext *deshake_ctx,

889 float *gauss_kernel,

890 int length,

891 float max_val,

892 AVFifo *values

893 ) {

894 float new_large_s = 0, new_small_s = 0, new_best = 0, old, diff_between,

895 percent_of_max, inverted_percent;

896 IterIndices indices = start_end_for(deshake_ctx, length);

897 float large_sigma = 40.0f;

898 float small_sigma = 2.0f;

899 float best_sigma;

900

901 if (deshake_ctx->smooth_percent) {

902 best_sigma = (large_sigma - 0.5f) * deshake_ctx->smooth_percent + 0.5f;

903 } else {

904 // Strategy to adaptively smooth trajectory:

905 //

906 // 1. Smooth path with large and small sigma values

907 // 2. Take the absolute value of the difference between them

908 // 3. Get a percentage by putting the difference over the "max value"

909 // 4, Invert the percentage

910 // 5. Calculate a new sigma value weighted towards the larger sigma value

911 // 6. Determine final smoothed trajectory value using that sigma

912

913 make_gauss_kernel(gauss_kernel, length, large_sigma);

914 for (int i = indices.start, j = 0; i < indices.end; ++i, ++j) {

915 ringbuf_float_at(deshake_ctx, values, &old, i);

916 new_large_s += old * gauss_kernel[j];

917 }

918

919 make_gauss_kernel(gauss_kernel, length, small_sigma);

920 for (int i = indices.start, j = 0; i < indices.end; ++i, ++j) {

921 ringbuf_float_at(deshake_ctx, values, &old, i);

922 new_small_s += old * gauss_kernel[j];

923 }

924

925 diff_between = fabsf(new_large_s - new_small_s);

926 percent_of_max = diff_between / max_val;

927 inverted_percent = 1 - percent_of_max;

928 best_sigma = large_sigma * powf(inverted_percent, 40);

929 }

930

931 make_gauss_kernel(gauss_kernel, length, best_sigma);

932 for (int i = indices.start, j = 0; i < indices.end; ++i, ++j) {

933 ringbuf_float_at(deshake_ctx, values, &old, i);

934 new_best += old * gauss_kernel[j];

935 }

936

937 return new_best;

938 }

939

940 // Returns the position of the given point after the transform is applied

941 static cl_float2 transformed_point(float x, float y, float *transform) {

942 cl_float2 ret;

943

944 ret.s[0] = x * transform[0] + y * transform[1] + transform[2];

945 ret.s[1] = x * transform[3] + y * transform[4] + transform[5];

946

947 return ret;

948 }

949

950 // Creates an affine transform that scales from the center of a frame

951 static void transform_center_scale(

952 float x_shift,

953 float y_shift,

954 float angle,

955 float scale_x,

956 float scale_y,

957 float center_w,

958 float center_h,

959 float *matrix

960 ) {

961 cl_float2 center_s;

962 float center_s_w, center_s_h;

963

964 ff_get_matrix(

965 0,

966 0,

967 0,

968 scale_x,

969 scale_y,

970 matrix

971 );

972

973 center_s = transformed_point(center_w, center_h, matrix);

974 center_s_w = center_w - center_s.s[0];

975 center_s_h = center_h - center_s.s[1];

976

977 ff_get_matrix(

978 x_shift + center_s_w,

979 y_shift + center_s_h,

980 angle,

981 scale_x,

982 scale_y,

983 matrix

984 );

985 }

986

987 // Determines the crop necessary to eliminate black borders from a smoothed frame

988 // and updates target crop accordingly

989 static void update_needed_crop(

990 CropInfo* crop,

991 float *transform,

992 float frame_width,

993 float frame_height

994 ) {

995 float new_width, new_height, adjusted_width, adjusted_height, adjusted_x, adjusted_y;

996

997 cl_float2 top_left = transformed_point(0, 0, transform);

998 cl_float2 top_right = transformed_point(frame_width, 0, transform);

999 cl_float2 bottom_left = transformed_point(0, frame_height, transform);

1000 cl_float2 bottom_right = transformed_point(frame_width, frame_height, transform);

1001 float ar_h = frame_height / frame_width;

1002 float ar_w = frame_width / frame_height;

1003

1004 if (crop->bottom_right.s[0] == 0) {

1005 // The crop hasn't been set to the original size of the plane

1006 crop->bottom_right.s[0] = frame_width;

1007 crop->bottom_right.s[1] = frame_height;

1008 }

1009

1010 crop->top_left.s[0] = FFMAX3(

1011 crop->top_left.s[0],

1012 top_left.s[0],

1013 bottom_left.s[0]

1014 );

1015

1016 crop->top_left.s[1] = FFMAX3(

1017 crop->top_left.s[1],

1018 top_left.s[1],

1019 top_right.s[1]

1020 );

1021

1022 crop->bottom_right.s[0] = FFMIN3(

1023 crop->bottom_right.s[0],

1024 bottom_right.s[0],

1025 top_right.s[0]

1026 );

1027

1028 crop->bottom_right.s[1] = FFMIN3(

1029 crop->bottom_right.s[1],

1030 bottom_right.s[1],

1031 bottom_left.s[1]

1032 );

1033

1034 // Make sure our potentially new bounding box has the same aspect ratio

1035 new_height = crop->bottom_right.s[1] - crop->top_left.s[1];

1036 new_width = crop->bottom_right.s[0] - crop->top_left.s[0];

1037

1038 adjusted_width = new_height * ar_w;

1039 adjusted_x = crop->bottom_right.s[0] - adjusted_width;

1040

1041 if (adjusted_x >= crop->top_left.s[0]) {

1042 crop->top_left.s[0] = adjusted_x;

1043 } else {

1044 adjusted_height = new_width * ar_h;

1045 adjusted_y = crop->bottom_right.s[1] - adjusted_height;

1046 crop->top_left.s[1] = adjusted_y;

1047 }

1048 }

1049

1050 static av_cold void deshake_opencl_uninit(AVFilterContext *avctx)

1051 {

1052 DeshakeOpenCLContext *ctx = avctx->priv;

1053 cl_int cle;

1054

1055 for (int i = 0; i < RingbufCount; i++)

1056 av_fifo_freep2(&ctx->abs_motion.ringbuffers[i]);

1057

1058 if (ctx->debug_on)

1059 free_debug_matches(&ctx->abs_motion);

1060

1061 if (ctx->gauss_kernel)

1062 av_freep(&ctx->gauss_kernel);

1063

1064 if (ctx->ransac_err)

1065 av_freep(&ctx->ransac_err);

1066

1067 if (ctx->matches_host)

1068 av_freep(&ctx->matches_host);

1069

1070 if (ctx->matches_contig_host)

1071 av_freep(&ctx->matches_contig_host);

1072

1073 if (ctx->inliers)

1074 av_freep(&ctx->inliers);

1075

1076 ff_framequeue_free(&ctx->fq);

1077

1078 CL_RELEASE_KERNEL(ctx->kernel_grayscale);

1079 CL_RELEASE_KERNEL(ctx->kernel_harris_response);

1080 CL_RELEASE_KERNEL(ctx->kernel_refine_features);

1081 CL_RELEASE_KERNEL(ctx->kernel_brief_descriptors);

1082 CL_RELEASE_KERNEL(ctx->kernel_match_descriptors);

1083 CL_RELEASE_KERNEL(ctx->kernel_crop_upscale);

1084 if (ctx->debug_on)

1085 CL_RELEASE_KERNEL(ctx->kernel_draw_debug_info);

1086

1087 CL_RELEASE_QUEUE(ctx->command_queue);

1088

1089 if (!ctx->is_yuv)

1090 CL_RELEASE_MEMORY(ctx->grayscale);

1091 CL_RELEASE_MEMORY(ctx->harris_buf);

1092 CL_RELEASE_MEMORY(ctx->refined_features);

1093 CL_RELEASE_MEMORY(ctx->prev_refined_features);

1094 CL_RELEASE_MEMORY(ctx->brief_pattern);

1095 CL_RELEASE_MEMORY(ctx->descriptors);

1096 CL_RELEASE_MEMORY(ctx->prev_descriptors);

1097 CL_RELEASE_MEMORY(ctx->matches);

1098 CL_RELEASE_MEMORY(ctx->matches_contig);

1099 CL_RELEASE_MEMORY(ctx->transform_y);

1100 CL_RELEASE_MEMORY(ctx->transform_uv);

1101 if (ctx->debug_on) {

1102 CL_RELEASE_MEMORY(ctx->debug_matches);

1103 CL_RELEASE_MEMORY(ctx->debug_model_matches);

1104 }

1105

1106 ff_opencl_filter_uninit(avctx);

1107 }

1108

1109 static int deshake_opencl_init(AVFilterContext *avctx)

1110 {

1111 DeshakeOpenCLContext *ctx = avctx->priv;

1112 AVFilterLink *outlink = avctx->outputs[0];

1113 AVFilterLink *inlink = avctx->inputs[0];

1114 FilterLink *inl = ff_filter_link(inlink);

1115 // Pointer to the host-side pattern buffer to be initialized and then copied

1116 // to the GPU

1117 PointPair *pattern_host = NULL;

1118 cl_int cle;

1119 int err;

1120 cl_ulong8 zeroed_ulong8;

1121 FFFrameQueueGlobal fqg;

1122 cl_image_format grayscale_format;

1123 cl_image_desc grayscale_desc;

1124 cl_command_queue_properties queue_props;

1125

1126 const enum AVPixelFormat disallowed_formats[14] = {

1127 AV_PIX_FMT_GBRP,

1128 AV_PIX_FMT_GBRP9BE,

1129 AV_PIX_FMT_GBRP9LE,

1130 AV_PIX_FMT_GBRP10BE,

1131 AV_PIX_FMT_GBRP10LE,

1132 AV_PIX_FMT_GBRP16BE,

1133 AV_PIX_FMT_GBRP16LE,

1134 AV_PIX_FMT_GBRAP,

1135 AV_PIX_FMT_GBRAP16BE,

1136 AV_PIX_FMT_GBRAP16LE,

1137 AV_PIX_FMT_GBRAP12BE,

1138 AV_PIX_FMT_GBRAP12LE,

1139 AV_PIX_FMT_GBRAP10BE,

1140 AV_PIX_FMT_GBRAP10LE

1141 };

1142

1143 // Number of elements for an array

1144 const int image_grid_32 = ROUNDED_UP_DIV(outlink->h, 32) * ROUNDED_UP_DIV(outlink->w, 32);

1145

1146 const int descriptor_buf_size = image_grid_32 * (BREIFN / 8);

1147 const int features_buf_size = image_grid_32 * sizeof(cl_float2);

1148

1149 const AVHWFramesContext *hw_frames_ctx = (AVHWFramesContext*)inl->hw_frames_ctx->data;

1150 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hw_frames_ctx->sw_format);

1151

1152 av_assert0(hw_frames_ctx);

1153 av_assert0(desc);

1154

1155 ff_framequeue_global_init(&fqg);

1156 ff_framequeue_init(&ctx->fq, &fqg);

1157 ctx->eof = 0;

1158 ctx->smooth_window = (int)(av_q2d(inl->frame_rate) * ctx->smooth_window_multiplier);

1159 ctx->curr_frame = 0;

1160

1161 memset(&zeroed_ulong8, 0, sizeof(cl_ulong8));

1162

1163 ctx->gauss_kernel = av_malloc_array(ctx->smooth_window, sizeof(float));

1164 if (!ctx->gauss_kernel) {

1165 err = AVERROR(ENOMEM);

1166 goto fail;

1167 }

1168

1169 ctx->ransac_err = av_malloc_array(MATCHES_CONTIG_SIZE, sizeof(float));

1170 if (!ctx->ransac_err) {

1171 err = AVERROR(ENOMEM);

1172 goto fail;

1173 }

1174

1175 for (int i = 0; i < RingbufCount; i++) {

1176 ctx->abs_motion.ringbuffers[i] = av_fifo_alloc2(ctx->smooth_window,

1177 sizeof(float), 0);

1178

1179 if (!ctx->abs_motion.ringbuffers[i]) {

1180 err = AVERROR(ENOMEM);

1181 goto fail;

1182 }

1183 }

1184

1185 if (ctx->debug_on) {

1186 ctx->abs_motion.debug_matches = av_fifo_alloc2(

1187 ctx->smooth_window / 2,

1188 sizeof(DebugMatches), 0

1189 );

1190

1191 if (!ctx->abs_motion.debug_matches) {

1192 err = AVERROR(ENOMEM);

1193 goto fail;

1194 }

1195 }

1196

1197 ctx->abs_motion.curr_frame_offset = 0;

1198 ctx->abs_motion.data_start_offset = -1;

1199 ctx->abs_motion.data_end_offset = -1;

1200

1201 pattern_host = av_malloc_array(BREIFN, sizeof(PointPair));

1202 if (!pattern_host) {

1203 err = AVERROR(ENOMEM);

1204 goto fail;

1205 }

1206

1207 ctx->matches_host = av_malloc_array(image_grid_32, sizeof(MotionVector));

1208 if (!ctx->matches_host) {

1209 err = AVERROR(ENOMEM);

1210 goto fail;

1211 }

1212

1213 ctx->matches_contig_host = av_malloc_array(MATCHES_CONTIG_SIZE, sizeof(MotionVector));

1214 if (!ctx->matches_contig_host) {

1215 err = AVERROR(ENOMEM);

1216 goto fail;

1217 }

1218

1219 ctx->inliers = av_malloc_array(MATCHES_CONTIG_SIZE, sizeof(MotionVector));

1220 if (!ctx->inliers) {

1221 err = AVERROR(ENOMEM);

1222 goto fail;

1223 }

1224

1225 // Initializing the patch pattern for building BREIF descriptors with

1226 av_lfg_init(&ctx->alfg, 234342424);

1227 for (int i = 0; i < BREIFN; ++i) {

1228 PointPair pair;

1229

1230 for (int j = 0; j < 2; ++j) {

1231 pair.p1.s[j] = rand_in(-BRIEF_PATCH_SIZE_HALF, BRIEF_PATCH_SIZE_HALF + 1, &ctx->alfg);

1232 pair.p2.s[j] = rand_in(-BRIEF_PATCH_SIZE_HALF, BRIEF_PATCH_SIZE_HALF + 1, &ctx->alfg);

1233 }

1234

1235 pattern_host[i] = pair;

1236 }

1237

1238 for (int i = 0; i < 14; i++) {

1239 if (ctx->sw_format == disallowed_formats[i]) {

1240 av_log(avctx, AV_LOG_ERROR, "unsupported format in deshake_opencl.\n");

1241 err = AVERROR(ENOSYS);

1242 goto fail;

1243 }

1244 }

1245

1246 if (desc->flags & AV_PIX_FMT_FLAG_RGB) {

1247 ctx->is_yuv = 0;

1248 } else {

1249 ctx->is_yuv = 1;

1250 }

1251 ctx->sw_format = hw_frames_ctx->sw_format;

1252

1253 err = ff_opencl_filter_load_program(avctx, &ff_source_deshake_cl, 1);

1254 if (err < 0)

1255 goto fail;

1256

1257 if (ctx->debug_on) {

1258 queue_props = CL_QUEUE_PROFILING_ENABLE;

1259 } else {

1260 queue_props = 0;

1261 }

1262 ctx->command_queue = clCreateCommandQueue(

1263 ctx->ocf.hwctx->context,

1264 ctx->ocf.hwctx->device_id,

1265 queue_props,

1266 &cle

1267 );

1268 CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create OpenCL command queue %d.\n", cle);

1269

1270 CL_CREATE_KERNEL(ctx, grayscale);

1271 CL_CREATE_KERNEL(ctx, harris_response);

1272 CL_CREATE_KERNEL(ctx, refine_features);

1273 CL_CREATE_KERNEL(ctx, brief_descriptors);

1274 CL_CREATE_KERNEL(ctx, match_descriptors);

1275 CL_CREATE_KERNEL(ctx, transform);

1276 CL_CREATE_KERNEL(ctx, crop_upscale);

1277 if (ctx->debug_on)

1278 CL_CREATE_KERNEL(ctx, draw_debug_info);

1279

1280 if (!ctx->is_yuv) {

1281 grayscale_format.image_channel_order = CL_R;

1282 grayscale_format.image_channel_data_type = CL_FLOAT;

1283

1284 grayscale_desc = (cl_image_desc) {

1285 .image_type = CL_MEM_OBJECT_IMAGE2D,

1286 .image_width = outlink->w,

1287 .image_height = outlink->h,

1288 .image_depth = 0,

1289 .image_array_size = 0,

1290 .image_row_pitch = 0,

1291 .image_slice_pitch = 0,

1292 .num_mip_levels = 0,

1293 .num_samples = 0,

1294 .buffer = NULL,

1295 };

1296

1297 ctx->grayscale = clCreateImage(

1298 ctx->ocf.hwctx->context,

1299 0,

1300 &grayscale_format,

1301 &grayscale_desc,

1302 NULL,

1303 &cle

1304 );

1305 CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create grayscale image: %d.\n", cle);

1306 }

1307

1308 CL_CREATE_BUFFER(ctx, harris_buf, outlink->h * outlink->w * sizeof(float));

1309 CL_CREATE_BUFFER(ctx, refined_features, features_buf_size);

1310 CL_CREATE_BUFFER(ctx, prev_refined_features, features_buf_size);

1311 CL_CREATE_BUFFER_FLAGS(

1312 ctx,

1313 brief_pattern,

1314 CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,

1315 BREIFN * sizeof(PointPair),

1316 pattern_host

1317 );

1318 CL_CREATE_BUFFER(ctx, descriptors, descriptor_buf_size);

1319 CL_CREATE_BUFFER(ctx, prev_descriptors, descriptor_buf_size);

1320 CL_CREATE_BUFFER(ctx, matches, image_grid_32 * sizeof(MotionVector));

1321 CL_CREATE_BUFFER(ctx, matches_contig, MATCHES_CONTIG_SIZE * sizeof(MotionVector));

1322 CL_CREATE_BUFFER(ctx, transform_y, 9 * sizeof(float));

1323 CL_CREATE_BUFFER(ctx, transform_uv, 9 * sizeof(float));

1324 if (ctx->debug_on) {

1325 CL_CREATE_BUFFER(ctx, debug_matches, MATCHES_CONTIG_SIZE * sizeof(MotionVector));

1326 CL_CREATE_BUFFER(ctx, debug_model_matches, 3 * sizeof(MotionVector));

1327 }

1328

1329 ctx->initialized = 1;

1330 av_freep(&pattern_host);

1331

1332 return 0;

1333

1334 fail:

1335 av_freep(&pattern_host);

1336 return err;

1337 }

1338

1339 // Logs debug information about the transform data

1340 static void transform_debug(AVFilterContext *avctx, float *new_vals, float *old_vals, int curr_frame) {

1341 av_log(avctx, AV_LOG_VERBOSE,

1342 "Frame %d:\n"

1343 "\tframe moved from: %f x, %f y\n"

1344 "\t to: %f x, %f y\n"

1345 "\t rotated from: %f degrees\n"

1346 "\t to: %f degrees\n"

1347 "\t scaled from: %f x, %f y\n"

1348 "\t to: %f x, %f y\n"

1349 "\n"

1350 "\tframe moved by: %f x, %f y\n"

1351 "\t rotated by: %f degrees\n"

1352 "\t scaled by: %f x, %f y\n",

1353 curr_frame,

1354 old_vals[RingbufX], old_vals[RingbufY],

1355 new_vals[RingbufX], new_vals[RingbufY],

1356 old_vals[RingbufRot] * (180.0 / M_PI),

1357 new_vals[RingbufRot] * (180.0 / M_PI),

1358 old_vals[RingbufScaleX], old_vals[RingbufScaleY],

1359 new_vals[RingbufScaleX], new_vals[RingbufScaleY],

1360 old_vals[RingbufX] - new_vals[RingbufX], old_vals[RingbufY] - new_vals[RingbufY],

1361 old_vals[RingbufRot] * (180.0 / M_PI) - new_vals[RingbufRot] * (180.0 / M_PI),

1362 new_vals[RingbufScaleX] / old_vals[RingbufScaleX], new_vals[RingbufScaleY] / old_vals[RingbufScaleY]

1363 );

1364 }

1365

1366 // Uses the buffered motion information to determine a transform that smooths the

1367 // given frame and applies it

1368 static int filter_frame(AVFilterLink *link, AVFrame *input_frame)

1369 {

1370 AVFilterContext *avctx = link->dst;

1371 AVFilterLink *outlink = avctx->outputs[0];

1372 FilterLink *outl = ff_filter_link(outlink);

1373 DeshakeOpenCLContext *deshake_ctx = avctx->priv;

1374 AVFrame *cropped_frame = NULL, *transformed_frame = NULL;

1375 int err;

1376 cl_int cle;

1377 float new_vals[RingbufCount];

1378 float old_vals[RingbufCount];

1379 // Luma (in the case of YUV) transform, or just the transform in the case of RGB

1380 float transform_y[9];

1381 // Chroma transform

1382 float transform_uv[9];

1383 // Luma crop transform (or RGB)

1384 float transform_crop_y[9];

1385 // Chroma crop transform

1386 float transform_crop_uv[9];

1387 float transform_debug_rgb[9];

1388 size_t global_work[2];

1389 int64_t duration;

1390 cl_mem src, transformed, dst;

1391 cl_mem transforms[AV_VIDEO_MAX_PLANES];

1392 CropInfo crops[AV_VIDEO_MAX_PLANES];

1393 cl_event transform_event, crop_upscale_event;

1394 DebugMatches debug_matches;

1395 cl_int num_model_matches;

1396

1397 const float center_w = (float)input_frame->width / 2;

1398 const float center_h = (float)input_frame->height / 2;

1399

1400 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(deshake_ctx->sw_format);

1401 const int chroma_width = AV_CEIL_RSHIFT(input_frame->width, desc->log2_chroma_w);

1402 const int chroma_height = AV_CEIL_RSHIFT(input_frame->height, desc->log2_chroma_h);

1403

1404 const float center_w_chroma = (float)chroma_width / 2;

1405 const float center_h_chroma = (float)chroma_height / 2;

1406

1407 const float luma_w_over_chroma_w = ((float)input_frame->width / (float)chroma_width);

1408 const float luma_h_over_chroma_h = ((float)input_frame->height / (float)chroma_height);

1409

1410 if (deshake_ctx->debug_on) {

1411 av_fifo_read(

1412 deshake_ctx->abs_motion.debug_matches,

1413 &debug_matches, 1);

1414 }

1415

1416 if (input_frame->duration) {

1417 duration = input_frame->duration;

1418 } else {

1419 duration = av_rescale_q(1, av_inv_q(outl->frame_rate), outlink->time_base);

1420 }

1421 deshake_ctx->duration = input_frame->pts + duration;

1422

1423 // Get the absolute transform data for this frame

1424 for (int i = 0; i < RingbufCount; i++) {

1425 av_fifo_peek(deshake_ctx->abs_motion.ringbuffers[i],

1426 &old_vals[i], 1,

1427 deshake_ctx->abs_motion.curr_frame_offset);

1428 }

1429

1430 if (deshake_ctx->tripod_mode) {

1431 // If tripod mode is turned on we simply undo all motion relative to the

1432 // first frame

1433

1434 new_vals[RingbufX] = 0.0f;

1435 new_vals[RingbufY] = 0.0f;

1436 new_vals[RingbufRot] = 0.0f;

1437 new_vals[RingbufScaleX] = 1.0f;

1438 new_vals[RingbufScaleY] = 1.0f;

1439 } else {

1440 // Tripod mode is off and we need to smooth a moving camera

1441

1442 new_vals[RingbufX] = smooth(

1443 deshake_ctx,

1444 deshake_ctx->gauss_kernel,

1445 deshake_ctx->smooth_window,

1446 input_frame->width,

1447 deshake_ctx->abs_motion.ringbuffers[RingbufX]

1448 );

1449 new_vals[RingbufY] = smooth(

1450 deshake_ctx,

1451 deshake_ctx->gauss_kernel,

1452 deshake_ctx->smooth_window,

1453 input_frame->height,

1454 deshake_ctx->abs_motion.ringbuffers[RingbufY]

1455 );

1456 new_vals[RingbufRot] = smooth(

1457 deshake_ctx,

1458 deshake_ctx->gauss_kernel,

1459 deshake_ctx->smooth_window,

1460 M_PI / 4,

1461 deshake_ctx->abs_motion.ringbuffers[RingbufRot]

1462 );

1463 new_vals[RingbufScaleX] = smooth(

1464 deshake_ctx,

1465 deshake_ctx->gauss_kernel,

1466 deshake_ctx->smooth_window,

1467 2.0f,

1468 deshake_ctx->abs_motion.ringbuffers[RingbufScaleX]

1469 );

1470 new_vals[RingbufScaleY] = smooth(

1471 deshake_ctx,

1472 deshake_ctx->gauss_kernel,

1473 deshake_ctx->smooth_window,

1474 2.0f,

1475 deshake_ctx->abs_motion.ringbuffers[RingbufScaleY]

1476 );

1477 }

1478

1479 transform_center_scale(

1480 old_vals[RingbufX] - new_vals[RingbufX],

1481 old_vals[RingbufY] - new_vals[RingbufY],

1482 old_vals[RingbufRot] - new_vals[RingbufRot],

1483 new_vals[RingbufScaleX] / old_vals[RingbufScaleX],

1484 new_vals[RingbufScaleY] / old_vals[RingbufScaleY],

1485 center_w,

1486 center_h,

1487 transform_y

1488 );

1489

1490 transform_center_scale(

1491 (old_vals[RingbufX] - new_vals[RingbufX]) / luma_w_over_chroma_w,

1492 (old_vals[RingbufY] - new_vals[RingbufY]) / luma_h_over_chroma_h,

1493 old_vals[RingbufRot] - new_vals[RingbufRot],

1494 new_vals[RingbufScaleX] / old_vals[RingbufScaleX],

1495 new_vals[RingbufScaleY] / old_vals[RingbufScaleY],

1496 center_w_chroma,

1497 center_h_chroma,

1498 transform_uv

1499 );

1500

1501 CL_BLOCKING_WRITE_BUFFER(deshake_ctx->command_queue, deshake_ctx->transform_y, 9 * sizeof(float), transform_y, NULL);

1502 CL_BLOCKING_WRITE_BUFFER(deshake_ctx->command_queue, deshake_ctx->transform_uv, 9 * sizeof(float), transform_uv, NULL);

1503

1504 if (deshake_ctx->debug_on)

1505 transform_debug(avctx, new_vals, old_vals, deshake_ctx->curr_frame);

1506

1507 cropped_frame = ff_get_video_buffer(outlink, outlink->w, outlink->h);

1508 if (!cropped_frame) {

1509 err = AVERROR(ENOMEM);

1510 goto fail;

1511 }

1512

1513 transformed_frame = ff_get_video_buffer(outlink, outlink->w, outlink->h);

1514 if (!transformed_frame) {

1515 err = AVERROR(ENOMEM);

1516 goto fail;

1517 }

1518

1519 transforms[0] = deshake_ctx->transform_y;

1520 transforms[1] = transforms[2] = deshake_ctx->transform_uv;

1521

1522 for (int p = 0; p < AV_VIDEO_MAX_PLANES; p++) {

1523 // Transform all of the planes appropriately

1524 src = (cl_mem)input_frame->data[p];

1525 transformed = (cl_mem)transformed_frame->data[p];

1526

1527 if (!transformed)

1528 break;

1529

1530 err = ff_opencl_filter_work_size_from_image(avctx, global_work, input_frame, p, 0);

1531 if (err < 0)

1532 goto fail;

1533

1534 CL_RUN_KERNEL_WITH_ARGS(

1535 deshake_ctx->command_queue,

1536 deshake_ctx->kernel_transform,

1537 global_work,

1538 NULL,

1539 &transform_event,

1540 { sizeof(cl_mem), &src },

1541 { sizeof(cl_mem), &transformed },

1542 { sizeof(cl_mem), &transforms[p] },

1543 );

1544 }

1545

1546 if (deshake_ctx->debug_on && !deshake_ctx->is_yuv && debug_matches.num_matches > 0) {

1547 CL_BLOCKING_WRITE_BUFFER(

1548 deshake_ctx->command_queue,

1549 deshake_ctx->debug_matches,

1550 debug_matches.num_matches * sizeof(MotionVector),

1551 debug_matches.matches,

1552 NULL

1553 );

1554

1555 CL_BLOCKING_WRITE_BUFFER(

1556 deshake_ctx->command_queue,

1557 deshake_ctx->debug_model_matches,

1558 debug_matches.num_model_matches * sizeof(MotionVector),

1559 debug_matches.model_matches,

1560 NULL

1561 );

1562

1563 num_model_matches = debug_matches.num_model_matches;

1564

1565 // Invert the transform

1566 transform_center_scale(

1567 new_vals[RingbufX] - old_vals[RingbufX],

1568 new_vals[RingbufY] - old_vals[RingbufY],

1569 new_vals[RingbufRot] - old_vals[RingbufRot],

1570 old_vals[RingbufScaleX] / new_vals[RingbufScaleX],

1571 old_vals[RingbufScaleY] / new_vals[RingbufScaleY],

1572 center_w,

1573 center_h,

1574 transform_debug_rgb

1575 );

1576

1577 CL_BLOCKING_WRITE_BUFFER(deshake_ctx->command_queue, deshake_ctx->transform_y, 9 * sizeof(float), transform_debug_rgb, NULL);

1578

1579 transformed = (cl_mem)transformed_frame->data[0];

1580 CL_RUN_KERNEL_WITH_ARGS(

1581 deshake_ctx->command_queue,

1582 deshake_ctx->kernel_draw_debug_info,

1583 (size_t[]){ debug_matches.num_matches },

1584 NULL,

1585 NULL,

1586 { sizeof(cl_mem), &transformed },

1587 { sizeof(cl_mem), &deshake_ctx->debug_matches },

1588 { sizeof(cl_mem), &deshake_ctx->debug_model_matches },

1589 { sizeof(cl_int), &num_model_matches },

1590 { sizeof(cl_mem), &deshake_ctx->transform_y }

1591 );

1592 }

1593

1594 if (deshake_ctx->should_crop) {

1595 // Generate transforms for cropping

1596 transform_center_scale(

1597 (old_vals[RingbufX] - new_vals[RingbufX]) / 5,

1598 (old_vals[RingbufY] - new_vals[RingbufY]) / 5,

1599 (old_vals[RingbufRot] - new_vals[RingbufRot]) / 5,

1600 new_vals[RingbufScaleX] / old_vals[RingbufScaleX],

1601 new_vals[RingbufScaleY] / old_vals[RingbufScaleY],

1602 center_w,

1603 center_h,

1604 transform_crop_y

1605 );

1606 update_needed_crop(&deshake_ctx->crop_y, transform_crop_y, input_frame->width, input_frame->height);

1607

1608 transform_center_scale(

1609 (old_vals[RingbufX] - new_vals[RingbufX]) / (5 * luma_w_over_chroma_w),

1610 (old_vals[RingbufY] - new_vals[RingbufY]) / (5 * luma_h_over_chroma_h),

1611 (old_vals[RingbufRot] - new_vals[RingbufRot]) / 5,

1612 new_vals[RingbufScaleX] / old_vals[RingbufScaleX],

1613 new_vals[RingbufScaleY] / old_vals[RingbufScaleY],

1614 center_w_chroma,

1615 center_h_chroma,

1616 transform_crop_uv

1617 );

1618 update_needed_crop(&deshake_ctx->crop_uv, transform_crop_uv, chroma_width, chroma_height);

1619

1620 crops[0] = deshake_ctx->crop_y;

1621 crops[1] = crops[2] = deshake_ctx->crop_uv;

1622

1623 for (int p = 0; p < AV_VIDEO_MAX_PLANES; p++) {

1624 // Crop all of the planes appropriately

1625 dst = (cl_mem)cropped_frame->data[p];

1626 transformed = (cl_mem)transformed_frame->data[p];

1627

1628 if (!dst)

1629 break;

1630

1631 err = ff_opencl_filter_work_size_from_image(avctx, global_work, input_frame, p, 0);

1632 if (err < 0)

1633 goto fail;

1634

1635 CL_RUN_KERNEL_WITH_ARGS(

1636 deshake_ctx->command_queue,

1637 deshake_ctx->kernel_crop_upscale,

1638 global_work,

1639 NULL,

1640 &crop_upscale_event,

1641 { sizeof(cl_mem), &transformed },

1642 { sizeof(cl_mem), &dst },

1643 { sizeof(cl_float2), &crops[p].top_left },

1644 { sizeof(cl_float2), &crops[p].bottom_right },

1645 );

1646 }

1647 }

1648

1649 if (deshake_ctx->curr_frame < deshake_ctx->smooth_window / 2) {

1650 // This means we are somewhere at the start of the video. We need to

1651 // increment the current frame offset until it reaches the center of

1652 // the ringbuffers (as the current frame will be located there for

1653 // the rest of the video).

1654 //

1655 // The end of the video is taken care of by draining motion data

1656 // one-by-one out of the buffer, causing the (at that point fixed)

1657 // offset to move towards later frames' data.

1658 ++deshake_ctx->abs_motion.curr_frame_offset;

1659 }

1660

1661 if (deshake_ctx->abs_motion.data_end_offset != -1) {

1662 // Keep the end offset in sync with the frame it's supposed to be

1663 // positioned at

1664 --deshake_ctx->abs_motion.data_end_offset;

1665

1666 if (deshake_ctx->abs_motion.data_end_offset == deshake_ctx->abs_motion.curr_frame_offset - 1) {

1667 // The end offset would be the start of the new video sequence; flip to

1668 // start offset

1669 deshake_ctx->abs_motion.data_end_offset = -1;

1670 deshake_ctx->abs_motion.data_start_offset = deshake_ctx->abs_motion.curr_frame_offset;

1671 }

1672 } else if (deshake_ctx->abs_motion.data_start_offset != -1) {

1673 // Keep the start offset in sync with the frame it's supposed to be

1674 // positioned at

1675 --deshake_ctx->abs_motion.data_start_offset;

1676 }

1677

1678 if (deshake_ctx->debug_on) {

1679 deshake_ctx->transform_time += ff_opencl_get_event_time(transform_event);

1680 if (deshake_ctx->should_crop) {

1681 deshake_ctx->crop_upscale_time += ff_opencl_get_event_time(crop_upscale_event);

1682 }

1683 }

1684

1685 ++deshake_ctx->curr_frame;

1686

1687 if (deshake_ctx->debug_on)

1688 av_freep(&debug_matches.matches);

1689

1690 if (deshake_ctx->should_crop) {

1691 err = av_frame_copy_props(cropped_frame, input_frame);

1692 if (err < 0)

1693 goto fail;

1694

1695 av_frame_free(&transformed_frame);

1696 av_frame_free(&input_frame);

1697 return ff_filter_frame(outlink, cropped_frame);

1698

1699 } else {

1700 err = av_frame_copy_props(transformed_frame, input_frame);

1701 if (err < 0)

1702 goto fail;

1703

1704 av_frame_free(&cropped_frame);

1705 av_frame_free(&input_frame);

1706 return ff_filter_frame(outlink, transformed_frame);

1707 }

1708

1709 fail:

1710 clFinish(deshake_ctx->command_queue);

1711

1712 if (deshake_ctx->debug_on)

1713 if (debug_matches.matches)

1714 av_freep(&debug_matches.matches);

1715

1716 av_frame_free(&input_frame);

1717 av_frame_free(&transformed_frame);

1718 av_frame_free(&cropped_frame);

1719 return err;

1720 }

1721

1722 // Add the given frame to the frame queue to eventually be processed.

1723 //

1724 // Also determines the motion from the previous frame and updates the stored

1725 // motion information accordingly.

1726 static int queue_frame(AVFilterLink *link, AVFrame *input_frame)

1727 {

1728 AVFilterContext *avctx = link->dst;

1729 DeshakeOpenCLContext *deshake_ctx = avctx->priv;

1730 int err;

1731 int num_vectors;

1732 int num_inliers = 0;

1733 cl_int cle;

1734 FrameDelta relative;

1735 SimilarityMatrix model;

1736 size_t global_work[2];

1737 size_t harris_global_work[2];

1738 size_t grid_32_global_work[2];

1739 int grid_32_h, grid_32_w;

1740 size_t local_work[2];

1741 cl_mem src, temp;

1742 float prev_vals[5];

1743 float new_vals[5];

1744 cl_event grayscale_event, harris_response_event, refine_features_event,

1745 brief_event, match_descriptors_event, read_buf_event;

1746 DebugMatches debug_matches;

1747

1748 num_vectors = 0;

1749

1750 local_work[0] = 8;

1751 local_work[1] = 8;

1752

1753 err = ff_opencl_filter_work_size_from_image(avctx, global_work, input_frame, 0, 0);

1754 if (err < 0)

1755 goto fail;

1756

1757 err = ff_opencl_filter_work_size_from_image(avctx, harris_global_work, input_frame, 0, 8);

1758 if (err < 0)

1759 goto fail;

1760

1761 err = ff_opencl_filter_work_size_from_image(avctx, grid_32_global_work, input_frame, 0, 32);

1762 if (err < 0)

1763 goto fail;

1764

1765 // We want a single work-item for each 32x32 block of pixels in the input frame

1766 grid_32_global_work[0] /= 32;

1767 grid_32_global_work[1] /= 32;

1768

1769 grid_32_h = ROUNDED_UP_DIV(input_frame->height, 32);

1770 grid_32_w = ROUNDED_UP_DIV(input_frame->width, 32);

1771

1772 if (deshake_ctx->is_yuv) {

1773 deshake_ctx->grayscale = (cl_mem)input_frame->data[0];

1774 } else {

1775 src = (cl_mem)input_frame->data[0];

1776

1777 CL_RUN_KERNEL_WITH_ARGS(

1778 deshake_ctx->command_queue,

1779 deshake_ctx->kernel_grayscale,

1780 global_work,

1781 NULL,

1782 &grayscale_event,

1783 { sizeof(cl_mem), &src },

1784 { sizeof(cl_mem), &deshake_ctx->grayscale }

1785 );

1786 }

1787

1788 CL_RUN_KERNEL_WITH_ARGS(

1789 deshake_ctx->command_queue,

1790 deshake_ctx->kernel_harris_response,

1791 harris_global_work,

1792 local_work,

1793 &harris_response_event,

1794 { sizeof(cl_mem), &deshake_ctx->grayscale },

1795 { sizeof(cl_mem), &deshake_ctx->harris_buf }

1796 );

1797

1798 CL_RUN_KERNEL_WITH_ARGS(

1799 deshake_ctx->command_queue,

1800 deshake_ctx->kernel_refine_features,

1801 grid_32_global_work,

1802 NULL,

1803 &refine_features_event,

1804 { sizeof(cl_mem), &deshake_ctx->grayscale },

1805 { sizeof(cl_mem), &deshake_ctx->harris_buf },

1806 { sizeof(cl_mem), &deshake_ctx->refined_features },

1807 { sizeof(cl_int), &deshake_ctx->refine_features }

1808 );

1809

1810 CL_RUN_KERNEL_WITH_ARGS(

1811 deshake_ctx->command_queue,

1812 deshake_ctx->kernel_brief_descriptors,

1813 grid_32_global_work,

1814 NULL,

1815 &brief_event,

1816 { sizeof(cl_mem), &deshake_ctx->grayscale },

1817 { sizeof(cl_mem), &deshake_ctx->refined_features },

1818 { sizeof(cl_mem), &deshake_ctx->descriptors },

1819 { sizeof(cl_mem), &deshake_ctx->brief_pattern}

1820 );

1821

1822 if (!av_fifo_can_read(deshake_ctx->abs_motion.ringbuffers[RingbufX])) {

1823 // This is the first frame we've been given to queue, meaning there is

1824 // no previous frame to match descriptors to

1825

1826 goto no_motion_data;

1827 }

1828

1829 CL_RUN_KERNEL_WITH_ARGS(

1830 deshake_ctx->command_queue,

1831 deshake_ctx->kernel_match_descriptors,

1832 grid_32_global_work,

1833 NULL,

1834 &match_descriptors_event,

1835 { sizeof(cl_mem), &deshake_ctx->prev_refined_features },

1836 { sizeof(cl_mem), &deshake_ctx->refined_features },

1837 { sizeof(cl_mem), &deshake_ctx->descriptors },

1838 { sizeof(cl_mem), &deshake_ctx->prev_descriptors },

1839 { sizeof(cl_mem), &deshake_ctx->matches }

1840 );

1841

1842 cle = clEnqueueReadBuffer(

1843 deshake_ctx->command_queue,

1844 deshake_ctx->matches,

1845 CL_TRUE,

1846 0,

1847 grid_32_h * grid_32_w * sizeof(MotionVector),

1848 deshake_ctx->matches_host,

1849 0,

1850 NULL,

1851 &read_buf_event

1852 );

1853 CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to read matches to host: %d.\n", cle);

1854

1855 num_vectors = make_vectors_contig(deshake_ctx, grid_32_h, grid_32_w);

1856

1857 if (num_vectors < 10) {

1858 // Not enough matches to get reliable motion data for this frame

1859 //

1860 // From this point on all data is relative to this frame rather than the

1861 // original frame. We have to make sure that we don't mix values that were

1862 // relative to the original frame with the new values relative to this

1863 // frame when doing the gaussian smoothing. We keep track of where the old

1864 // values end using this data_end_offset field in order to accomplish

1865 // that goal.

1866 //

1867 // If no motion data is present for multiple frames in a short window of

1868 // time, we leave the end where it was to avoid mixing 0s in with the

1869 // old data (and just treat them all as part of the new values)

1870 if (deshake_ctx->abs_motion.data_end_offset == -1) {

1871 deshake_ctx->abs_motion.data_end_offset =

1872 av_fifo_can_read(deshake_ctx->abs_motion.ringbuffers[RingbufX]) - 1;

1873 }

1874

1875 goto no_motion_data;

1876 }

1877

1878 if (!estimate_affine_2d(

1879 deshake_ctx,

1880 deshake_ctx->matches_contig_host,

1881 &debug_matches,

1882 num_vectors,

1883 model.matrix,

1884 10.0,

1885 3000,

1886 0.999999999999

1887 )) {

1888 goto no_motion_data;

1889 }

1890

1891 for (int i = 0; i < num_vectors; i++) {

1892 if (deshake_ctx->matches_contig_host[i].should_consider) {

1893 deshake_ctx->inliers[num_inliers] = deshake_ctx->matches_contig_host[i];

1894 num_inliers++;

1895 }

1896 }

1897

1898 if (!minimize_error(

1899 deshake_ctx,

1900 deshake_ctx->inliers,

1901 &debug_matches,

1902 num_inliers,

1903 model.matrix,

1904 400

1905 )) {

1906 goto no_motion_data;

1907 }

1908

1909

1910 relative = decompose_transform(model.matrix);

1911

1912 // Get the absolute transform data for the previous frame

1913 for (int i = 0; i < RingbufCount; i++) {

1914 av_fifo_peek(

1915 deshake_ctx->abs_motion.ringbuffers[i],

1916 &prev_vals[i], 1,

1917 av_fifo_can_read(deshake_ctx->abs_motion.ringbuffers[i]) - 1);

1918 }

1919

1920 new_vals[RingbufX] = prev_vals[RingbufX] + relative.translation.s[0];

1921 new_vals[RingbufY] = prev_vals[RingbufY] + relative.translation.s[1];

1922 new_vals[RingbufRot] = prev_vals[RingbufRot] + relative.rotation;

1923 new_vals[RingbufScaleX] = prev_vals[RingbufScaleX] / relative.scale.s[0];

1924 new_vals[RingbufScaleY] = prev_vals[RingbufScaleY] / relative.scale.s[1];

1925

1926 if (deshake_ctx->debug_on) {

1927 if (!deshake_ctx->is_yuv) {

1928 deshake_ctx->grayscale_time += ff_opencl_get_event_time(grayscale_event);

1929 }

1930 deshake_ctx->harris_response_time += ff_opencl_get_event_time(harris_response_event);

1931 deshake_ctx->refine_features_time += ff_opencl_get_event_time(refine_features_event);

1932 deshake_ctx->brief_descriptors_time += ff_opencl_get_event_time(brief_event);

1933 deshake_ctx->match_descriptors_time += ff_opencl_get_event_time(match_descriptors_event);

1934 deshake_ctx->read_buf_time += ff_opencl_get_event_time(read_buf_event);

1935 }

1936

1937 goto end;

1938

1939 no_motion_data:

1940 new_vals[RingbufX] = 0.0f;

1941 new_vals[RingbufY] = 0.0f;

1942 new_vals[RingbufRot] = 0.0f;

1943 new_vals[RingbufScaleX] = 1.0f;

1944 new_vals[RingbufScaleY] = 1.0f;

1945

1946 for (int i = 0; i < num_vectors; i++) {

1947 deshake_ctx->matches_contig_host[i].should_consider = 0;

1948 }

1949 debug_matches.num_model_matches = 0;

1950

1951 if (deshake_ctx->debug_on) {

1952 av_log(avctx, AV_LOG_VERBOSE,

1953 "\n[ALERT] No motion data found in queue_frame, motion reset to 0\n\n"

1954 );

1955 }

1956

1957 goto end;

1958

1959 end:

1960 // Swap the descriptor buffers (we don't need the previous frame's descriptors

1961 // again so we will use that space for the next frame's descriptors)

1962 temp = deshake_ctx->prev_descriptors;

1963 deshake_ctx->prev_descriptors = deshake_ctx->descriptors;

1964 deshake_ctx->descriptors = temp;

1965

1966 // Same for the refined features

1967 temp = deshake_ctx->prev_refined_features;

1968 deshake_ctx->prev_refined_features = deshake_ctx->refined_features;

1969 deshake_ctx->refined_features = temp;

1970

1971 if (deshake_ctx->debug_on) {

1972 if (num_vectors == 0) {

1973 debug_matches.matches = NULL;

1974 } else {

1975 debug_matches.matches = av_malloc_array(num_vectors, sizeof(MotionVector));

1976

1977 if (!debug_matches.matches) {

1978 err = AVERROR(ENOMEM);

1979 goto fail;

1980 }

1981 }

1982

1983 for (int i = 0; i < num_vectors; i++) {

1984 debug_matches.matches[i] = deshake_ctx->matches_contig_host[i];

1985 }

1986 debug_matches.num_matches = num_vectors;

1987

1988 av_fifo_write(

1989 deshake_ctx->abs_motion.debug_matches,

1990 &debug_matches, 1);

1991 }

1992

1993 for (int i = 0; i < RingbufCount; i++) {

1994 av_fifo_write(deshake_ctx->abs_motion.ringbuffers[i], &new_vals[i], 1);

1995 }

1996

1997 return ff_framequeue_add(&deshake_ctx->fq, input_frame);

1998

1999 fail:

2000 clFinish(deshake_ctx->command_queue);

2001 av_frame_free(&input_frame);

2002 return err;

2003 }

2004

2005 static int activate(AVFilterContext *ctx)

2006 {

2007 AVFilterLink *inlink = ctx->inputs[0];

2008 AVFilterLink *outlink = ctx->outputs[0];

2009 DeshakeOpenCLContext *deshake_ctx = ctx->priv;

2010 AVFrame *frame = NULL;

2011 int ret, status;

2012 int64_t pts;

2013

2014 FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);

2015

2016 if (!deshake_ctx->eof) {

2017 ret = ff_inlink_consume_frame(inlink, &frame);

2018 if (ret < 0)

2019 return ret;

2020 if (ret > 0) {

2021 if (!frame->hw_frames_ctx)

2022 return AVERROR(EINVAL);

2023

2024 if (!deshake_ctx->initialized) {

2025 ret = deshake_opencl_init(ctx);

2026 if (ret < 0)

2027 return ret;

2028 }

2029

2030 // If there is no more space in the ringbuffers, remove the oldest

2031 // values to make room for the new ones

2032 if (!av_fifo_can_write(deshake_ctx->abs_motion.ringbuffers[RingbufX])) {

2033 for (int i = 0; i < RingbufCount; i++) {

2034 av_fifo_drain2(deshake_ctx->abs_motion.ringbuffers[i], 1);

2035 }

2036 }

2037 ret = queue_frame(inlink, frame);

2038 if (ret < 0)

2039 return ret;

2040 if (ret >= 0) {

2041 // See if we have enough buffered frames to process one

2042 //

2043 // "enough" is half the smooth window of queued frames into the future

2044 if (ff_framequeue_queued_frames(&deshake_ctx->fq) >= deshake_ctx->smooth_window / 2) {

2045 return filter_frame(inlink, ff_framequeue_take(&deshake_ctx->fq));

2046 }

2047 }

2048 }

2049 }

2050

2051 if (!deshake_ctx->eof && ff_inlink_acknowledge_status(inlink, &status, &pts)) {

2052 if (status == AVERROR_EOF) {

2053 deshake_ctx->eof = 1;

2054 }

2055 }

2056

2057 if (deshake_ctx->eof) {

2058 // Finish processing the rest of the frames in the queue.

2059 while(ff_framequeue_queued_frames(&deshake_ctx->fq) != 0) {

2060 for (int i = 0; i < RingbufCount; i++) {

2061 av_fifo_drain2(deshake_ctx->abs_motion.ringbuffers[i], 1);

2062 }

2063

2064 ret = filter_frame(inlink, ff_framequeue_take(&deshake_ctx->fq));

2065 if (ret < 0) {

2066 return ret;

2067 }

2068 }

2069

2070 if (deshake_ctx->debug_on) {

2071 av_log(ctx, AV_LOG_VERBOSE,

2072 "Average kernel execution times:\n"

2073 "\t grayscale: %0.3f ms\n"

2074 "\t harris_response: %0.3f ms\n"

2075 "\t refine_features: %0.3f ms\n"

2076 "\tbrief_descriptors: %0.3f ms\n"

2077 "\tmatch_descriptors: %0.3f ms\n"

2078 "\t transform: %0.3f ms\n"

2079 "\t crop_upscale: %0.3f ms\n"

2080 "Average buffer read times:\n"

2081 "\t features buf: %0.3f ms\n",

2082 averaged_event_time_ms(deshake_ctx->grayscale_time, deshake_ctx->curr_frame),

2083 averaged_event_time_ms(deshake_ctx->harris_response_time, deshake_ctx->curr_frame),

2084 averaged_event_time_ms(deshake_ctx->refine_features_time, deshake_ctx->curr_frame),

2085 averaged_event_time_ms(deshake_ctx->brief_descriptors_time, deshake_ctx->curr_frame),

2086 averaged_event_time_ms(deshake_ctx->match_descriptors_time, deshake_ctx->curr_frame),

2087 averaged_event_time_ms(deshake_ctx->transform_time, deshake_ctx->curr_frame),

2088 averaged_event_time_ms(deshake_ctx->crop_upscale_time, deshake_ctx->curr_frame),

2089 averaged_event_time_ms(deshake_ctx->read_buf_time, deshake_ctx->curr_frame)

2090 );

2091 }

2092

2093 ff_outlink_set_status(outlink, AVERROR_EOF, deshake_ctx->duration);

2094 return 0;

2095 }

2096

2097 if (!deshake_ctx->eof) {

2098 FF_FILTER_FORWARD_WANTED(outlink, inlink);

2099 }

2100

2101 return FFERROR_NOT_READY;

2102 }

2103

2104 static const AVFilterPad deshake_opencl_inputs[] = {

2105 {

2106 .name = "default",

2107 .type = AVMEDIA_TYPE_VIDEO,

2108 .config_props = &ff_opencl_filter_config_input,

2109 },

2110 };

2111

2112 static const AVFilterPad deshake_opencl_outputs[] = {

2113 {

2114 .name = "default",

2115 .type = AVMEDIA_TYPE_VIDEO,

2116 .config_props = &ff_opencl_filter_config_output,

2117 },

2118 };

2119

2120 #define OFFSET(x) offsetof(DeshakeOpenCLContext, x)

2121 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM

2122

2123 static const AVOption deshake_opencl_options[] = {

2124 {

2125 "tripod", "simulates a tripod by preventing any camera movement whatsoever "

2126 "from the original frame",

2127 OFFSET(tripod_mode), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, FLAGS

2128 },

2129 {

2130 "debug", "turn on additional debugging information",

2131 OFFSET(debug_on), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, FLAGS

2132 },

2133 {

2134 "adaptive_crop", "attempt to subtly crop borders to reduce mirrored content",

2135 OFFSET(should_crop), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, FLAGS

2136 },

2137 {

2138 "refine_features", "refine feature point locations at a sub-pixel level",

2139 OFFSET(refine_features), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, FLAGS

2140 },

2141 {

2142 "smooth_strength", "smoothing strength (0 attempts to adaptively determine optimal strength)",

2143 OFFSET(smooth_percent), AV_OPT_TYPE_FLOAT, {.dbl = 0.0f}, 0.0f, 1.0f, FLAGS

2144 },

2145 {

2146 "smooth_window_multiplier", "multiplier for number of frames to buffer for motion data",

2147 OFFSET(smooth_window_multiplier), AV_OPT_TYPE_FLOAT, {.dbl = 2.0}, 0.1, 10.0, FLAGS

2148 },

2149 { NULL }

2150 };

2151

2152 AVFILTER_DEFINE_CLASS(deshake_opencl);

2153

2154 const FFFilter ff_vf_deshake_opencl = {

2155 .p.name = "deshake_opencl",

2156 .p.description = NULL_IF_CONFIG_SMALL("Feature-point based video stabilization filter"),

2157 .p.priv_class = &deshake_opencl_class,

2158 .p.flags = AVFILTER_FLAG_HWDEVICE,

2159 .priv_size = sizeof(DeshakeOpenCLContext),

2160 .init = &ff_opencl_filter_init,

2161 .uninit = &deshake_opencl_uninit,

2162 .activate = activate,

2163 FILTER_INPUTS(deshake_opencl_inputs),

2164 FILTER_OUTPUTS(deshake_opencl_outputs),

2165 FILTER_SINGLE_PIXFMT(AV_PIX_FMT_OPENCL),

2166 .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,

2167 };

deshake_opencl_inputs

static const AVFilterPad deshake_opencl_inputs[]

Definition: vf_deshake_opencl.c:2104

AVFILTER_DEFINE_CLASS

AVFILTER_DEFINE_CLASS(deshake_opencl)

MATCHES_CONTIG_SIZE

#define MATCHES_CONTIG_SIZE

Definition: vf_deshake_opencl.c:91

ff_get_video_buffer

AVFrame * ff_get_video_buffer(AVFilterLink *link, int w, int h)

Request a picture buffer with a specific set of permissions.

Definition: video.c:117

CL_RUN_KERNEL_WITH_ARGS

#define CL_RUN_KERNEL_WITH_ARGS(queue, kernel, global_work_size, local_work_size, event,...)

Uses the above macro to enqueue the given kernel and then additionally runs it to completion via clFi...

Definition: opencl.h:180

av_fifo_drain2

void av_fifo_drain2(AVFifo *f, size_t size)

Discard the specified amount of data from an AVFifo.

Definition: fifo.c:266

#define F2(l, r, i)

Definition: cast5.c:44

AVPixelFormat

Pixel format.

Definition: pixfmt.h:71

av_fifo_can_write

size_t av_fifo_can_write(const AVFifo *f)

Definition: fifo.c:94

free_debug_matches

static void free_debug_matches(AbsoluteFrameMotion *afm)

Definition: vf_deshake_opencl.c:150

FrameDelta::translation

cl_float2 translation

Definition: vf_deshake_opencl.c:165

av_clip

#define av_clip

Definition: common.h:100

IterIndices

Definition: vf_deshake_opencl.c:185

const char * r

Definition: vf_curves.c:127

AVERROR

Filter the word "frame" indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions

opt.h

DeshakeOpenCLContext::brief_pattern

cl_mem brief_pattern

Definition: vf_deshake_opencl.c:255

deshake_opencl_init

static int deshake_opencl_init(AVFilterContext *avctx)

Definition: vf_deshake_opencl.c:1109

estimate_affine_2d

static int estimate_affine_2d(DeshakeOpenCLContext *deshake_ctx, MotionVector *point_pairs, DebugMatches *debug_matches, const int num_point_pairs, double *model_out, const double threshold, const int max_iters, const double confidence)

Definition: vf_deshake_opencl.c:517

minimize_error

static int minimize_error(DeshakeOpenCLContext *deshake_ctx, MotionVector *inliers, DebugMatches *debug_matches, const int num_inliers, double *model_out, const int max_iters)

Definition: vf_deshake_opencl.c:673

OFFSET

#define OFFSET(x)

Definition: vf_deshake_opencl.c:2120

av_lfg_init

av_cold void av_lfg_init(AVLFG *c, unsigned int seed)

Definition: lfg.c:32

AV_PIX_FMT_GBRP16BE

@ AV_PIX_FMT_GBRP16BE

planar GBR 4:4:4 48bpp, big-endian

Definition: pixfmt.h:171

DeshakeOpenCLContext::refine_features_time

unsigned long long refine_features_time

Definition: vf_deshake_opencl.c:292

ff_filter_frame

int ff_filter_frame(AVFilterLink *link, AVFrame *frame)

Send a frame of data to the next filter.

Definition: avfilter.c:1067

AbsoluteFrameMotion

Definition: vf_deshake_opencl.c:133

AVFrame::duration

int64_t duration

Duration of the frame, in the same units as pts.

Definition: frame.h:775

AV_PIX_FMT_GBRP10BE

@ AV_PIX_FMT_GBRP10BE

planar GBR 4:4:4 30bpp, big-endian

Definition: pixfmt.h:169

av_pix_fmt_desc_get

const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)

Definition: pixdesc.c:3456

AVERROR_EOF

#define AVERROR_EOF

End of file.

Definition: error.h:57

AVBufferRef::data

uint8_t * data

The data buffer.

Definition: buffer.h:90

FFERROR_NOT_READY

return FFERROR_NOT_READY

Definition: filter_design.txt:204

DeshakeOpenCLContext::kernel_draw_debug_info

cl_kernel kernel_draw_debug_info

Definition: vf_deshake_opencl.c:285

PointPair::p2

cl_float2 p2

Definition: vf_deshake_opencl.c:99

matrix

Definition: vc1dsp.c:43

DeshakeOpenCLContext::transform_uv

cl_mem transform_uv

Definition: vf_deshake_opencl.c:266

int64_t

long long int64_t

Definition: coverity.c:34

inlink

The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is inlink

Definition: filter_design.txt:212

AV_VIDEO_MAX_PLANES

#define AV_VIDEO_MAX_PLANES

Maximum number of planes in any pixel format.

Definition: pixfmt.h:40

normalize.log

log

Definition: normalize.py:21

av_frame_free

void av_frame_free(AVFrame **frame)

Free the frame and any dynamically allocated objects in it, e.g.

Definition: frame.c:64

DeshakeOpenCLContext::should_crop

int should_crop

Definition: vf_deshake_opencl.c:272

av_fifo_peek

int av_fifo_peek(const AVFifo *f, void *buf, size_t nb_elems, size_t offset)

Read data from a FIFO without modifying FIFO state.

Definition: fifo.c:255

FILTER_INPUTS

#define FILTER_INPUTS(array)

Definition: filters.h:263

AVFrame

This structure describes decoded (raw) audio or video data.

Definition: frame.h:427

pixdesc.h

AVFrame::pts

int64_t pts

Presentation timestamp in time_base units (time when frame should be shown to user).

Definition: frame.h:529

AVFrame::width

int width

Definition: frame.h:499

ff_source_deshake_cl

const char * ff_source_deshake_cl

DeshakeOpenCLContext::prev_descriptors

cl_mem prev_descriptors

Definition: vf_deshake_opencl.c:259

opencl.h

AVOption

AVOption.

Definition: opt.h:429

#define b

Definition: input.c:42

RingbufX

@ RingbufX

Definition: vf_deshake_opencl.c:110

RingbufRot

@ RingbufRot

Definition: vf_deshake_opencl.c:112

DeshakeOpenCLContext::abs_motion

AbsoluteFrameMotion abs_motion

Definition: vf_deshake_opencl.c:205

expf

#define expf(x)

Definition: libm.h:285

high

int high

Definition: dovi_rpuenc.c:39

transforms

static const struct @162 transforms[18]

AbsoluteFrameMotion::curr_frame_offset

int curr_frame_offset

Definition: vf_deshake_opencl.c:139

AV_LOG_VERBOSE

#define AV_LOG_VERBOSE

Detailed information.

Definition: log.h:226

float.h

ff_opencl_filter_load_program

int ff_opencl_filter_load_program(AVFilterContext *avctx, const char **program_source_array, int nb_strings)

Load a new OpenCL program from strings in memory.

Definition: opencl.c:159

update_needed_crop

static void update_needed_crop(CropInfo *crop, float *transform, float frame_width, float frame_height)

Definition: vf_deshake_opencl.c:989

ff_framequeue_init

void ff_framequeue_init(FFFrameQueue *fq, FFFrameQueueGlobal *fqg)

Init a frame queue and attach it to a global structure.

Definition: framequeue.c:50

FFMAX

#define FFMAX(a, b)

Definition: macros.h:47

AVFilter::name

const char * name

Filter name.

Definition: avfilter.h:220

points_not_collinear

static int points_not_collinear(const cl_float2 **points)

Definition: vf_deshake_opencl.c:349

video.h

AVFilterLink

A link between two filters.

Definition: avfilter.h:395

FF_FILTER_FORWARD_STATUS_BACK

#define FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink)

Forward the status on an output link to an input link.

Definition: filters.h:638

CL_CREATE_KERNEL

#define CL_CREATE_KERNEL(ctx, kernel_name)

Create a kernel with the given name.

Definition: opencl.h:93

CL_CREATE_BUFFER

#define CL_CREATE_BUFFER(ctx, buffer_name, size)

Create a buffer with the given information.

Definition: opencl.h:237

AVFrame::data

uint8_t * data[AV_NUM_DATA_POINTERS]

pointer to the picture/channel planes.

Definition: frame.h:448

FilterLink

Link properties exposed to filter code, but not external callers.

Definition: filters.h:128

BRIEF_PATCH_SIZE_HALF

#define BRIEF_PATCH_SIZE_HALF

Definition: vf_deshake_opencl.c:89

DeshakeOpenCLContext::harris_buf

cl_mem harris_buf

Definition: vf_deshake_opencl.c:247

DeshakeOpenCLContext::smooth_window_multiplier

float smooth_window_multiplier

Definition: vf_deshake_opencl.c:281

AV_PIX_FMT_GBRAP12LE

@ AV_PIX_FMT_GBRAP12LE

planar GBR 4:4:4:4 48bpp, little-endian

Definition: pixfmt.h:311

ff_inlink_consume_frame

int ff_inlink_consume_frame(AVFilterLink *link, AVFrame **rframe)

Take a frame from the link's FIFO and update the link's stats.

Definition: avfilter.c:1517

ff_opencl_filter_work_size_from_image

int ff_opencl_filter_work_size_from_image(AVFilterContext *avctx, size_t *work_size, AVFrame *frame, int plane, int block_alignment)

Find the work size needed needed for a given plane of an image.

Definition: opencl.c:266

fifo.h

DeshakeOpenCLContext::debug_matches

cl_mem debug_matches

Definition: vf_deshake_opencl.c:286

AbsoluteFrameMotion::data_end_offset

int data_end_offset

Definition: vf_deshake_opencl.c:143

AV_PIX_FMT_GBRAP

@ AV_PIX_FMT_GBRAP

planar GBRA 4:4:4:4 32bpp

Definition: pixfmt.h:212

AVFilterContext::priv

void * priv

private data for use by the filter

Definition: avfilter.h:289

IterIndices::start

int start

Definition: vf_deshake_opencl.c:186

fail

#define fail()

Definition: checkasm.h:207

av_fifo_write

int av_fifo_write(AVFifo *f, const void *buf, size_t nb_elems)

Write data into a FIFO.

Definition: fifo.c:188

FFSIGN

#define FFSIGN(a)

Definition: common.h:75

DeshakeOpenCLContext::curr_frame

int curr_frame

Definition: vf_deshake_opencl.c:211

DeshakeOpenCLContext::kernel_transform

cl_kernel kernel_transform

Definition: vf_deshake_opencl.c:241

AbsoluteFrameMotion::ringbuffers

AVFifo * ringbuffers[RingbufCount]

Definition: vf_deshake_opencl.c:135

RingbufScaleY

@ RingbufScaleY

Definition: vf_deshake_opencl.c:114

IterIndices::end

int end

Definition: vf_deshake_opencl.c:187

val

static double val(void *priv, double ch)

Definition: aeval.c:77

deshake_opencl_outputs

static const AVFilterPad deshake_opencl_outputs[]

Definition: vf_deshake_opencl.c:2112

pts

static int64_t pts

Definition: transcode_aac.c:644

fabsf

static __device__ float fabsf(float a)

Definition: cuda_runtime.h:181

DeshakeOpenCLContext::kernel_crop_upscale

cl_kernel kernel_crop_upscale

Definition: vf_deshake_opencl.c:242

DeshakeOpenCLContext::initialized

int initialized

Definition: vf_deshake_opencl.c:193

DeshakeOpenCLContext::smooth_window

int smooth_window

Definition: vf_deshake_opencl.c:209

FrameDelta::skew

cl_float2 skew

Definition: vf_deshake_opencl.c:168

RingbufScaleX

@ RingbufScaleX

Definition: vf_deshake_opencl.c:113

ff_opencl_filter_config_output

int ff_opencl_filter_config_output(AVFilterLink *outlink)

Create a suitable hardware frames context for the output.

Definition: opencl.c:83

AVFilterPad

A filter pad used for either input or output.

Definition: filters.h:39

DeshakeOpenCLContext::eof

int eof

Definition: vf_deshake_opencl.c:197

avassert.h

DeshakeOpenCLContext::read_buf_time

unsigned long long read_buf_time

Definition: vf_deshake_opencl.c:299

AV_LOG_ERROR

#define AV_LOG_ERROR

Something went wrong and cannot losslessly be recovered.

Definition: log.h:210

av_cold

#define av_cold

Definition: attributes.h:106

av_fifo_read

int av_fifo_read(AVFifo *f, void *buf, size_t nb_elems)

Read data from a FIFO.

Definition: fifo.c:240

DeshakeOpenCLContext::debug_model_matches

cl_mem debug_model_matches

Definition: vf_deshake_opencl.c:287

FFFilter

Definition: filters.h:266

DebugMatches::num_model_matches

int num_model_matches

Definition: vf_deshake_opencl.c:128

duration

int64_t duration

Definition: movenc.c:65

float

Definition: af_crystalizer.c:122

AV_PIX_FMT_GBRAP16BE

@ AV_PIX_FMT_GBRAP16BE

planar GBRA 4:4:4:4 64bpp, big-endian

Definition: pixfmt.h:213

ff_outlink_set_status

static void ff_outlink_set_status(AVFilterLink *link, int status, int64_t pts)

Set the status field of a link from the source filter.

Definition: filters.h:628

#define s(width, name)

Definition: cbs_vp9.c:198

AV_PIX_FMT_GBRP16LE

@ AV_PIX_FMT_GBRP16LE

planar GBR 4:4:4 48bpp, little-endian

Definition: pixfmt.h:172

RingbufY

@ RingbufY

Definition: vf_deshake_opencl.c:111

av_lfg_get

static unsigned int av_lfg_get(AVLFG *c)

Get the next random unsigned 32-bit number using an ALFG.

Definition: lfg.h:53

AV_CEIL_RSHIFT

#define AV_CEIL_RSHIFT(a, b)

Definition: common.h:60

DeshakeOpenCLContext::smooth_percent

float smooth_percent

Definition: vf_deshake_opencl.c:278

FFFrameQueueGlobal

Structure to hold global options and statistics for frame queues.

Definition: framequeue.h:44

#define mi

Definition: vf_colormatrix.c:106

DeshakeOpenCLContext::grayscale_time

unsigned long long grayscale_time

Definition: vf_deshake_opencl.c:290

DeshakeOpenCLContext::kernel_brief_descriptors

cl_kernel kernel_brief_descriptors

Definition: vf_deshake_opencl.c:239

transform

static const int8_t transform[32][32]

Definition: dsp.c:27

av_q2d

static double av_q2d(AVRational a)

Convert an AVRational to a double.

Definition: rational.h:104

lfg.h

deshake_opencl_uninit

static av_cold void deshake_opencl_uninit(AVFilterContext *avctx)

Definition: vf_deshake_opencl.c:1050

av_assert0

#define av_assert0(cond)

assert() equivalent, that is always enabled.

Definition: avassert.h:41

filters.h

DeshakeOpenCLContext::kernel_match_descriptors

cl_kernel kernel_match_descriptors

Definition: vf_deshake_opencl.c:240

ctx

AVFormatContext * ctx

Definition: movenc.c:49

av_rescale_q

int64_t av_rescale_q(int64_t a, AVRational bq, AVRational cq)

Rescale a 64-bit integer by 2 rational numbers.

Definition: mathematics.c:142

AV_PIX_FMT_GBRP10LE

@ AV_PIX_FMT_GBRP10LE

planar GBR 4:4:4 30bpp, little-endian

Definition: pixfmt.h:170

FILTER_OUTPUTS

#define FILTER_OUTPUTS(array)

Definition: filters.h:264

link

Definition: filter_design.txt:23

CL_RELEASE_KERNEL

#define CL_RELEASE_KERNEL(k)

release an OpenCL Kernel

Definition: opencl.h:101

ff_framequeue_take

AVFrame * ff_framequeue_take(FFFrameQueue *fq)

Take the first frame in the queue.

Definition: framequeue.c:104

RingbufferIndices

Definition: vf_deshake_opencl.c:109

rand_in

static int rand_in(int low, int high, AVLFG *alfg)

Definition: vf_deshake_opencl.c:303

averaged_event_time_ms

static double averaged_event_time_ms(unsigned long long total_time, int num_frames)

Definition: vf_deshake_opencl.c:309

AbsoluteFrameMotion::debug_matches

AVFifo * debug_matches

Definition: vf_deshake_opencl.c:145

filter_frame

static int filter_frame(AVFilterLink *link, AVFrame *input_frame)

Definition: vf_deshake_opencl.c:1368

result

and forward the result(frame or status change) to the corresponding input. If nothing is possible

AV_PIX_FMT_GBRAP12BE

@ AV_PIX_FMT_GBRAP12BE

planar GBR 4:4:4:4 48bpp, big-endian

Definition: pixfmt.h:310

fabs

static __device__ float fabs(float a)

Definition: cuda_runtime.h:182

NULL

#define NULL

Definition: coverity.c:32

DeshakeOpenCLContext::alfg

AVLFG alfg

Definition: vf_deshake_opencl.c:200

av_frame_copy_props

int av_frame_copy_props(AVFrame *dst, const AVFrame *src)

Copy only "metadata" fields from src to dst.

Definition: frame.c:599

transform_center_scale

static void transform_center_scale(float x_shift, float y_shift, float angle, float scale_x, float scale_y, float center_w, float center_h, float *matrix)

Definition: vf_deshake_opencl.c:951

ff_framequeue_add

int ff_framequeue_add(FFFrameQueue *fq, AVFrame *frame)

Add a frame.

Definition: framequeue.c:67

ff_framequeue_free

void ff_framequeue_free(FFFrameQueue *fq)

Free the queue and all queued frames.

Definition: framequeue.c:57

DeshakeOpenCLContext::ransac_err

float * ransac_err

Definition: vf_deshake_opencl.c:217

DeshakeOpenCLContext::kernel_harris_response

cl_kernel kernel_harris_response

Definition: vf_deshake_opencl.c:237

framequeue.h

AVFilterContext::inputs

AVFilterLink ** inputs

array of pointers to input links

Definition: avfilter.h:282

start_end_for

static IterIndices start_end_for(DeshakeOpenCLContext *deshake_ctx, int length)

Definition: vf_deshake_opencl.c:833

av_fifo_can_read

size_t av_fifo_can_read(const AVFifo *f)

Definition: fifo.c:87

double

Definition: af_crystalizer.c:132

AV_PIX_FMT_OPENCL

@ AV_PIX_FMT_OPENCL

Hardware surfaces for OpenCL.

Definition: pixfmt.h:358

DeshakeOpenCLContext::matches_contig

cl_mem matches_contig

Definition: vf_deshake_opencl.c:262

DebugMatches::model_matches

MotionVector model_matches[3]

Definition: vf_deshake_opencl.c:124

DeshakeOpenCLContext::duration

int64_t duration

Definition: vf_deshake_opencl.c:196

DeshakeOpenCLContext

Definition: vf_deshake_opencl.c:190

ff_inlink_acknowledge_status

int ff_inlink_acknowledge_status(AVFilterLink *link, int *rstatus, int64_t *rpts)

Test and acknowledge the change of status on the link.

Definition: avfilter.c:1464

DeshakeOpenCLContext::crop_uv

CropInfo crop_uv

Definition: vf_deshake_opencl.c:222

Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c

Definition: undefined.txt:32

transformed_point

static cl_float2 transformed_point(float x, float y, float *transform)

Definition: vf_deshake_opencl.c:941

DeshakeOpenCLContext::harris_response_time

unsigned long long harris_response_time

Definition: vf_deshake_opencl.c:291

make_gauss_kernel

static void make_gauss_kernel(float *gauss_kernel, float length, float sigma)

Definition: vf_deshake_opencl.c:809

ff_filter_link

static FilterLink * ff_filter_link(AVFilterLink *link)

Definition: filters.h:198

FF_FILTER_FLAG_HWFRAME_AWARE

#define FF_FILTER_FLAG_HWFRAME_AWARE

The filter is aware of hardware frames, and any hardware frame context should not be automatically pr...

Definition: filters.h:207

FFFrameQueue

Queue of AVFrame pointers.

Definition: framequeue.h:60

DeshakeOpenCLContext::crop_upscale_time

unsigned long long crop_upscale_time

Definition: vf_deshake_opencl.c:296

relative

static IPT relative(const CmsCtx *ctx, IPT ipt)

Definition: cms.c:544

AVLFG

Context structure for the Lagged Fibonacci PRNG.

Definition: lfg.h:33

Definition: af_crystalizer.c:122

#define F3(l, r, i)

Definition: cast5.c:37

init

int(* init)(AVBSFContext *ctx)

Definition: dts2pts.c:550

AVFifo

Definition: fifo.c:35

DeshakeOpenCLContext::matches_contig_host

MotionVector * matches_contig_host

Definition: vf_deshake_opencl.c:231

NULL_IF_CONFIG_SMALL

#define NULL_IF_CONFIG_SMALL(x)

Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.

Definition: internal.h:94

powf

#define powf(x, y)

Definition: libm.h:52

CL_RELEASE_MEMORY

#define CL_RELEASE_MEMORY(m)

release an OpenCL Memory Object

Definition: opencl.h:114

AV_PIX_FMT_FLAG_RGB

#define AV_PIX_FMT_FLAG_RGB

The pixel format contains RGB-like data (as opposed to YUV/grayscale).

Definition: pixdesc.h:136

dst

uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst

Definition: dsp.h:87

AVFILTER_FLAG_HWDEVICE

#define AVFILTER_FLAG_HWDEVICE

The filter can create hardware frames using AVFilterContext.hw_device_ctx.

Definition: avfilter.h:188

hypot

static av_const double hypot(double x, double y)

Definition: libm.h:368

CropInfo::bottom_right

cl_float2 bottom_right

Definition: vf_deshake_opencl.c:180

AV_PIX_FMT_GBRP9BE

@ AV_PIX_FMT_GBRP9BE

planar GBR 4:4:4 27bpp, big-endian

Definition: pixfmt.h:167

get_subset

static int get_subset(AVLFG *alfg, const MotionVector *point_pairs, const int num_point_pairs, MotionVector *pairs_subset, int max_attempts)

Definition: vf_deshake_opencl.c:393

ROUNDED_UP_DIV

#define ROUNDED_UP_DIV(a, b)

Definition: vf_deshake_opencl.c:93

AV_PIX_FMT_GBRP9LE

@ AV_PIX_FMT_GBRP9LE

planar GBR 4:4:4 27bpp, little-endian

Definition: pixfmt.h:168

FLAGS

#define FLAGS

Definition: vf_deshake_opencl.c:2121

MotionVector::should_consider

cl_int should_consider

Definition: vf_deshake_opencl.c:105

The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a

Definition: undefined.txt:41

AV_PIX_FMT_GBRAP10LE

@ AV_PIX_FMT_GBRAP10LE

planar GBR 4:4:4:4 40bpp, little-endian

Definition: pixfmt.h:314

offset

it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset

Definition: writing_filters.txt:86

FF_FILTER_FORWARD_WANTED

FF_FILTER_FORWARD_WANTED(outlink, inlink)

MotionVector

Definition: agm.c:40

opencl_source.h

M_PI

#define M_PI

Definition: mathematics.h:67

FilterLink::hw_frames_ctx

AVBufferRef * hw_frames_ctx

For hwaccel pixel formats, this should be a reference to the AVHWFramesContext describing the frames.

Definition: filters.h:195

ff_opencl_filter_config_input

int ff_opencl_filter_config_input(AVFilterLink *inlink)

Check that the input link contains a suitable hardware frames context and extract the device from it.

Definition: opencl.c:46

AV_OPT_TYPE_FLOAT

@ AV_OPT_TYPE_FLOAT

Underlying C type is float.

Definition: opt.h:271

DeshakeOpenCLContext::inliers

MotionVector * inliers

Definition: vf_deshake_opencl.c:233

FrameDelta::rotation

float rotation

Definition: vf_deshake_opencl.c:166

uninit

static void uninit(AVBSFContext *ctx)

Definition: pcm_rechunk.c:68

ff_get_matrix

void ff_get_matrix(float x_shift, float y_shift, float angle, float scale_x, float scale_y, float *matrix)

Get an affine transformation matrix from given translation, rotation, and zoom factors.

Definition: transform.c:106

#define i(width, name, range_min, range_max)

Definition: cbs_h2645.c:256

queue_frame

static int queue_frame(AVFilterLink *link, AVFrame *input_frame)

Definition: vf_deshake_opencl.c:1726

round

static av_always_inline av_const double round(double x)

Definition: libm.h:446

DeshakeOpenCLContext::transform_y

cl_mem transform_y

Definition: vf_deshake_opencl.c:264

FFMIN3

#define FFMIN3(a, b, c)

Definition: macros.h:50

transform_debug

static void transform_debug(AVFilterContext *avctx, float *new_vals, float *old_vals, int curr_frame)

Definition: vf_deshake_opencl.c:1340

AVFilterLink::w

int w

agreed upon image width

Definition: avfilter.h:407

av_malloc_array

#define av_malloc_array(a, b)

Definition: tableprint_vlc.h:32

common.h

make_vectors_contig

static int make_vectors_contig(DeshakeOpenCLContext *deshake_ctx, int size_y, int size_x)

Definition: vf_deshake_opencl.c:777

delta

float delta

Definition: vorbis_enc_data.h:430

activate

static int activate(AVFilterContext *ctx)

Definition: vf_deshake_opencl.c:2005

DeshakeOpenCLContext::is_yuv

int is_yuv

Definition: vf_deshake_opencl.c:225

DeshakeOpenCLContext::fq

FFFrameQueue fq

Definition: vf_deshake_opencl.c:203

av_inv_q

static av_always_inline AVRational av_inv_q(AVRational q)

Invert a rational.

Definition: rational.h:159

DeshakeOpenCLContext::grayscale

cl_mem grayscale

Definition: vf_deshake_opencl.c:245

AVFilterPad::name

const char * name

Pad name.

Definition: filters.h:45

DeshakeOpenCLContext::transform_time

unsigned long long transform_time

Definition: vf_deshake_opencl.c:295

RingbufCount

@ RingbufCount

Definition: vf_deshake_opencl.c:117

AVHWFramesContext

This struct describes a set or pool of "hardware" frames (i.e.

Definition: hwcontext.h:118

DeshakeOpenCLContext::ocf

OpenCLFilterContext ocf

Definition: vf_deshake_opencl.c:191

AV_PIX_FMT_GBRAP16LE

@ AV_PIX_FMT_GBRAP16LE

planar GBRA 4:4:4:4 64bpp, little-endian

Definition: pixfmt.h:214

ff_opencl_filter_init

int ff_opencl_filter_init(AVFilterContext *avctx)

Initialise an OpenCL filter context.

Definition: opencl.c:135

CL_BLOCKING_WRITE_BUFFER

#define CL_BLOCKING_WRITE_BUFFER(queue, buffer, size, host_ptr, event)

Perform a blocking write to a buffer.

Definition: opencl.h:214

ret

Definition: filter_design.txt:187

FrameDelta

Definition: vf_deshake_opencl.c:164

DeshakeOpenCLContext::matches_host

MotionVector * matches_host

Definition: vf_deshake_opencl.c:230

compute_error

static void compute_error(const MotionVector *point_pairs, const int num_point_pairs, const double *model, float *err)

Definition: vf_deshake_opencl.c:435

ransac_update_num_iters

static int ransac_update_num_iters(double confidence, double num_outliers, int max_iters)

Definition: vf_deshake_opencl.c:495

pixfmt.h

frame

these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame For filters that do not use the this method is called when a frame is wanted on an output For a it should directly call filter_frame on the corresponding output For a if there are queued frames already one of these frames should be pushed If the filter should request a frame on one of its repeatedly until at least one frame has been pushed Return or at least make progress towards producing a frame

Definition: filter_design.txt:265

DeshakeOpenCLContext::prev_refined_features

cl_mem prev_refined_features

Definition: vf_deshake_opencl.c:252

FrameDelta::scale

cl_float2 scale

Definition: vf_deshake_opencl.c:167

av_fifo_alloc2

AVFifo * av_fifo_alloc2(size_t nb_elems, size_t elem_size, unsigned int flags)

Allocate and initialize an AVFifo with a given element size.

Definition: fifo.c:47

AbsoluteFrameMotion::data_start_offset

int data_start_offset

Definition: vf_deshake_opencl.c:142

DeshakeOpenCLContext::refined_features

cl_mem refined_features

Definition: vf_deshake_opencl.c:250

transform.h

check_subset

static int check_subset(const MotionVector *pairs_subset)

Definition: vf_deshake_opencl.c:375

ff_framequeue_queued_frames

static size_t ff_framequeue_queued_frames(const FFFrameQueue *fq)

Get the number of queued frames.

Definition: framequeue.h:158

run_estimate_kernel

static void run_estimate_kernel(const MotionVector *point_pairs, double *model)

Definition: vf_deshake_opencl.c:319

CropInfo

Definition: vf_deshake_opencl.c:176

AVFrame::height

int height

Definition: frame.h:499

status

ov_status_e status

Definition: dnn_backend_openvino.c:100

DebugMatches::matches

MotionVector * matches

Definition: vf_deshake_opencl.c:122

DeshakeOpenCLContext::tripod_mode

int tripod_mode

Definition: vf_deshake_opencl.c:270

AVFilterLink::h

int h

agreed upon image height

Definition: avfilter.h:408

DeshakeOpenCLContext::match_descriptors_time

unsigned long long match_descriptors_time

Definition: vf_deshake_opencl.c:294

ff_framequeue_global_init

void ff_framequeue_global_init(FFFrameQueueGlobal *fqg)

Init a global structure.

Definition: framequeue.c:31

avfilter.h

gaussian_for

static float gaussian_for(int x, float sigma)

Definition: vf_deshake_opencl.c:803

values

Definition: filter_design.txt:264

temp

else temp

Definition: vf_mcdeint.c:271

OpenCLFilterContext

Definition: opencl.h:36

ff_opencl_filter_uninit

void ff_opencl_filter_uninit(AVFilterContext *avctx)

Uninitialise an OpenCL filter context.

Definition: opencl.c:144

DeshakeOpenCLContext::debug_on

int debug_on

Definition: vf_deshake_opencl.c:271

AVFilterLink::time_base

AVRational time_base

Define the time base used by the PTS of the frames/samples which will pass through this link.

Definition: avfilter.h:431

DebugMatches::num_matches

int num_matches

Definition: vf_deshake_opencl.c:126

Windows::Graphics::DirectX::Direct3D11::p

IDirect3DDxgiInterfaceAccess _COM_Outptr_ void ** p

Definition: vsrc_gfxcapture_winrt.hpp:53

CL_RELEASE_QUEUE

#define CL_RELEASE_QUEUE(q)

release an OpenCL Command Queue

Definition: opencl.h:127

DeshakeOpenCLContext::kernel_refine_features

cl_kernel kernel_refine_features

Definition: vf_deshake_opencl.c:238

AVFilterContext

An instance of a filter.

Definition: avfilter.h:274

AV_PIX_FMT_GBRP

@ AV_PIX_FMT_GBRP

planar GBR 4:4:4 24bpp

Definition: pixfmt.h:165

desc

const char * desc

Definition: libsvtav1.c:78

find_inliers

static int find_inliers(MotionVector *point_pairs, const int num_point_pairs, const double *model, float *err, double thresh)

Definition: vf_deshake_opencl.c:459

AVMEDIA_TYPE_VIDEO

@ AVMEDIA_TYPE_VIDEO

Definition: avutil.h:200

FFFilter::p

AVFilter p

The public AVFilter.

Definition: filters.h:270

deshake_opencl_options

static const AVOption deshake_opencl_options[]

Definition: vf_deshake_opencl.c:2123

mem.h

PointPair::p1

cl_float2 p1

Definition: vf_deshake_opencl.c:97

AVPixFmtDescriptor

Descriptor that unambiguously describes how the bits of a pixel are stored in the up to 4 data planes...

Definition: pixdesc.h:69

ff_opencl_get_event_time

cl_ulong ff_opencl_get_event_time(cl_event event)

Gets the command start and end times for the given event and returns the difference (the time that th...

Definition: opencl.c:342

smooth

static float smooth(DeshakeOpenCLContext *deshake_ctx, float *gauss_kernel, int length, float max_val, AVFifo *values)

Definition: vf_deshake_opencl.c:887

DeshakeOpenCLContext::kernel_grayscale

cl_kernel kernel_grayscale

Definition: vf_deshake_opencl.c:236

AV_OPT_TYPE_BOOL

@ AV_OPT_TYPE_BOOL

Underlying C type is int.

Definition: opt.h:327

av_freep

#define av_freep(p)

Definition: tableprint_vlc.h:35

FFMAX3

#define FFMAX3(a, b, c)

Definition: macros.h:48

DebugMatches

Definition: vf_deshake_opencl.c:121

ff_vf_deshake_opencl

const FFFilter ff_vf_deshake_opencl

Definition: vf_deshake_opencl.c:2154

CL_CREATE_BUFFER_FLAGS

#define CL_CREATE_BUFFER_FLAGS(ctx, buffer_name, flags, size, host_ptr)

Create a buffer with the given information.

Definition: opencl.h:197

av_log

#define av_log(a,...)

Definition: tableprint_vlc.h:27

CL_FAIL_ON_ERROR

#define CL_FAIL_ON_ERROR(errcode,...)

A helper macro to handle OpenCL errors.

Definition: opencl.h:74

av_fifo_freep2

void av_fifo_freep2(AVFifo **f)

Free an AVFifo and reset pointer to NULL.

Definition: fifo.c:286

AV_PIX_FMT_GBRAP10BE

@ AV_PIX_FMT_GBRAP10BE

planar GBR 4:4:4:4 40bpp, big-endian

Definition: pixfmt.h:313

FilterLink::frame_rate

AVRational frame_rate

Frame rate of the stream on the link, or 1/0 if unknown or variable.

Definition: filters.h:187

DeshakeOpenCLContext::descriptors

cl_mem descriptors

Definition: vf_deshake_opencl.c:257

DeshakeOpenCLContext::matches

cl_mem matches

Definition: vf_deshake_opencl.c:261

MotionVector::p

PointPair p

Definition: vf_deshake_opencl.c:103

DeshakeOpenCLContext::brief_descriptors_time

unsigned long long brief_descriptors_time

Definition: vf_deshake_opencl.c:293

DeshakeOpenCLContext::command_queue

cl_command_queue command_queue

Definition: vf_deshake_opencl.c:235

PointPair

Definition: vf_deshake_opencl.c:95

FILTER_SINGLE_PIXFMT

#define FILTER_SINGLE_PIXFMT(pix_fmt_)

Definition: filters.h:253

BREIFN

#define BREIFN

Definition: vf_deshake_opencl.c:85

DeshakeOpenCLContext::sw_format

int sw_format

Definition: vf_deshake_opencl.c:227

DeshakeOpenCLContext::gauss_kernel

float * gauss_kernel

Definition: vf_deshake_opencl.c:214

DeshakeOpenCLContext::refine_features

cl_int refine_features

Definition: vf_deshake_opencl.c:275

SimilarityMatrix

Definition: vf_deshake_opencl.c:171

ringbuf_float_at

static void ringbuf_float_at(DeshakeOpenCLContext *deshake_ctx, AVFifo *values, float *val, int offset)

Definition: vf_deshake_opencl.c:844

optimize_model

static void optimize_model(DeshakeOpenCLContext *deshake_ctx, MotionVector *best_pairs, MotionVector *inliers, const int num_inliers, float best_err, double *model_out)

Definition: vf_deshake_opencl.c:600

src

#define src

Definition: vp8dsp.c:248

DeshakeOpenCLContext::crop_y

CropInfo crop_y

Definition: vf_deshake_opencl.c:220

av_clipd

Definition: af_crystalizer.c:132

CropInfo::top_left

cl_float2 top_left

Definition: vf_deshake_opencl.c:178