FFmpeg: libavfilter/vf_deshake_opencl.c Source File

FFmpeg

[フレーム]

libavfilter

vf_deshake_opencl.c

Go to the documentation of this file.

1 /*

2 * This file is part of FFmpeg.

3 *

4 * FFmpeg is free software; you can redistribute it and/or

5 * modify it under the terms of the GNU Lesser General Public

6 * License as published by the Free Software Foundation; either

7 * version 2.1 of the License, or (at your option) any later version.

8 *

9 * FFmpeg is distributed in the hope that it will be useful,

10 * but WITHOUT ANY WARRANTY; without even the implied warranty of

11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

12 * Lesser General Public License for more details.

13 *

14 * You should have received a copy of the GNU Lesser General Public

15 * License along with FFmpeg; if not, write to the Free Software

16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

17 *

21 * Third party copyrights are property of their respective owners.

22 *

23 * Redistribution and use in source and binary forms, with or without modification,

24 * are permitted provided that the following conditions are met:

25 *

26 * * Redistribution's of source code must retain the above copyright notice,

27 * this list of conditions and the following disclaimer.

28 *

29 * * Redistribution's in binary form must reproduce the above copyright notice,

30 * this list of conditions and the following disclaimer in the documentation

31 * and/or other materials provided with the distribution.

32 *

33 * * The name of the copyright holders may not be used to endorse or promote products

34 * derived from this software without specific prior written permission.

35 *

36 * This software is provided by the copyright holders and contributors "as is" and

37 * any express or implied warranties, including, but not limited to, the implied

38 * warranties of merchantability and fitness for a particular purpose are disclaimed.

39 * In no event shall the Intel Corporation or contributors be liable for any direct,

40 * indirect, incidental, special, exemplary, or consequential damages

41 * (including, but not limited to, procurement of substitute goods or services;

42 * loss of use, data, or profits; or business interruption) however caused

43 * and on any theory of liability, whether in contract, strict liability,

44 * or tort (including negligence or otherwise) arising in any way out of

45 * the use of this software, even if advised of the possibility of such damage.

46 */

48 #include <float.h>

49 #include <libavutil/lfg.h>

50 #include "libavutil/opt.h"

51 #include "libavutil/imgutils.h"

52 #include "libavutil/mem.h"

53 #include "libavutil/fifo.h"

54 #include "libavutil/common.h"

55 #include "libavutil/avassert.h"

56 #include "libavutil/pixfmt.h"

57 #include "avfilter.h"

58 #include "framequeue.h"

59 #include "filters.h"

60 #include "transform.h"

61 #include "formats.h"

62 #include "internal.h"

63 #include "opencl.h"

64 #include "opencl_source.h"

65 #include "video.h"

67 /*

68 This filter matches feature points between frames (dealing with outliers) and then

69 uses the matches to estimate an affine transform between frames. This transform is

70 decomposed into various values (translation, scale, rotation) and the values are

71 summed relative to the start of the video to obtain on absolute camera position

72 for each frame. This "camera path" is then smoothed via a gaussian filter, resulting

73 in a new path that is turned back into an affine transform and applied to each

74 frame to render it.

76 High-level overview:

78 All of the work to extract motion data from frames occurs in queue_frame. Motion data

79 is buffered in a smoothing window, so queue_frame simply computes the absolute camera

80 positions and places them in ringbuffers.

82 filter_frame is responsible for looking at the absolute camera positions currently

83 in the ringbuffers, applying the gaussian filter, and then transforming the frames.

84 */

86 // Number of bits for BRIEF descriptors

87 #define BREIFN 512

88 // Size of the patch from which a BRIEF descriptor is extracted

89 // This is the size used in OpenCV

90 #define BRIEF_PATCH_SIZE 31

91 #define BRIEF_PATCH_SIZE_HALF (BRIEF_PATCH_SIZE / 2)

93 #define MATCHES_CONTIG_SIZE 2000

95 #define ROUNDED_UP_DIV(a, b) ((a + (b - 1)) / b)

97 typedef struct PointPair {

98 // Previous frame

99 cl_float2 p1;

100 // Current frame

101 cl_float2 p2;

102 } PointPair;

103

104 typedef struct MotionVector {

105 PointPair p;

106 // Used to mark vectors as potential outliers

107 cl_int should_consider;

108 } MotionVector;

109

110 // Denotes the indices for the different types of motion in the ringbuffers array

111 enum RingbufferIndices {

112 RingbufX,

113 RingbufY,

114 RingbufRot,

115 RingbufScaleX,

116 RingbufScaleY,

117

118 // Should always be last

119 RingbufCount

120 };

121

122 // Struct that holds data for drawing point match debug data

123 typedef struct DebugMatches {

124 MotionVector *matches;

125 // The points used to calculate the affine transform for a frame

126 MotionVector model_matches[3];

127

128 int num_matches;

129 // For cases where we couldn't calculate a model

130 int num_model_matches;

131 } DebugMatches;

132

133 // Groups together the ringbuffers that store absolute distortion / position values

134 // for each frame

135 typedef struct AbsoluteFrameMotion {

136 // Array with the various ringbuffers, indexed via the RingbufferIndices enum

137 AVFifo *ringbuffers[RingbufCount];

138

139 // Offset to get to the current frame being processed

140 // (not in bytes)

141 int curr_frame_offset;

142 // Keeps track of where the start and end of contiguous motion data is (to

143 // deal with cases where no motion data is found between two frames)

144 int data_start_offset;

145 int data_end_offset;

146

147 AVFifo *debug_matches;

148 } AbsoluteFrameMotion;

149

150 // Takes care of freeing the arrays within the DebugMatches inside of the

151 // debug_matches ringbuffer and then freeing the buffer itself.

152 static void free_debug_matches(AbsoluteFrameMotion *afm) {

153 DebugMatches dm;

154

155 if (!afm->debug_matches) {

156 return;

157 }

158

159 while (av_fifo_read(afm->debug_matches, &dm, 1) >= 0)

160 av_freep(&dm.matches);

161

162 av_fifo_freep2(&afm->debug_matches);

163 }

164

165 // Stores the translation, scale, rotation, and skew deltas between two frames

166 typedef struct FrameDelta {

167 cl_float2 translation;

168 float rotation;

169 cl_float2 scale;

170 cl_float2 skew;

171 } FrameDelta;

172

173 typedef struct SimilarityMatrix {

174 // The 2x3 similarity matrix

175 double matrix[6];

176 } SimilarityMatrix;

177

178 typedef struct CropInfo {

179 // The top left corner of the bounding box for the crop

180 cl_float2 top_left;

181 // The bottom right corner of the bounding box for the crop

182 cl_float2 bottom_right;

183 } CropInfo;

184

185 // Returned from function that determines start and end values for iteration

186 // around the current frame in a ringbuffer

187 typedef struct IterIndices {

188 int start;

189 int end;

190 } IterIndices;

191

192 typedef struct DeshakeOpenCLContext {

193 OpenCLFilterContext ocf;

194 // Whether or not the above `OpenCLFilterContext` has been initialized

195 int initialized;

196

197 // These variables are used in the activate callback

198 int64_t duration;

199 int eof;

200

201 // State for random number generation

202 AVLFG alfg;

203

204 // FIFO frame queue used to buffer future frames for processing

205 FFFrameQueue fq;

206 // Ringbuffers for frame positions

207 AbsoluteFrameMotion abs_motion;

208

209 // The number of frames' motion to consider before and after the frame we are

210 // smoothing

211 int smooth_window;

212 // The number of the frame we are currently processing

213 int curr_frame;

214

215 // Stores a 1d array of normalised gaussian kernel values for convolution

216 float *gauss_kernel;

217

218 // Buffer for error values used in RANSAC code

219 float *ransac_err;

220

221 // Information regarding how to crop the smoothed luminance (or RGB) planes

222 CropInfo crop_y;

223 // Information regarding how to crop the smoothed chroma planes

224 CropInfo crop_uv;

225

226 // Whether or not we are processing YUV input (as oppposed to RGB)

227 int is_yuv;

228 // The underlying format of the hardware surfaces

229 int sw_format;

230

231 // Buffer to copy `matches` into for the CPU to work with

232 MotionVector *matches_host;

233 MotionVector *matches_contig_host;

234

235 MotionVector *inliers;

236

237 cl_command_queue command_queue;

238 cl_kernel kernel_grayscale;

239 cl_kernel kernel_harris_response;

240 cl_kernel kernel_refine_features;

241 cl_kernel kernel_brief_descriptors;

242 cl_kernel kernel_match_descriptors;

243 cl_kernel kernel_transform;

244 cl_kernel kernel_crop_upscale;

245

246 // Stores a frame converted to grayscale

247 cl_mem grayscale;

248 // Stores the harris response for a frame (measure of "cornerness" for each pixel)

249 cl_mem harris_buf;

250

251 // Detected features after non-maximum suppression and sub-pixel refinement

252 cl_mem refined_features;

253 // Saved from the previous frame

254 cl_mem prev_refined_features;

255

256 // BRIEF sampling pattern that is randomly initialized

257 cl_mem brief_pattern;

258 // Feature point descriptors for the current frame

259 cl_mem descriptors;

260 // Feature point descriptors for the previous frame

261 cl_mem prev_descriptors;

262 // Vectors between points in current and previous frame

263 cl_mem matches;

264 cl_mem matches_contig;

265 // Holds the matrix to transform luminance (or RGB) with

266 cl_mem transform_y;

267 // Holds the matrix to transform chroma with

268 cl_mem transform_uv;

269

270 // Configurable options

271

272 int tripod_mode;

273 int debug_on;

274 int should_crop;

275

276 // Whether or not feature points should be refined at a sub-pixel level

277 cl_int refine_features;

278 // If the user sets a value other than the default, 0, this percentage is

279 // translated into a sigma value ranging from 0.5 to 40.0

280 float smooth_percent;

281 // This number is multiplied by the video frame rate to determine the size

282 // of the smooth window

283 float smooth_window_multiplier;

284

285 // Debug stuff

286

287 cl_kernel kernel_draw_debug_info;

288 cl_mem debug_matches;

289 cl_mem debug_model_matches;

290

291 // These store the total time spent executing the different kernels in nanoseconds

292 unsigned long long grayscale_time;

293 unsigned long long harris_response_time;

294 unsigned long long refine_features_time;

295 unsigned long long brief_descriptors_time;

296 unsigned long long match_descriptors_time;

297 unsigned long long transform_time;

298 unsigned long long crop_upscale_time;

299

300 // Time spent copying matched features from the device to the host

301 unsigned long long read_buf_time;

302 } DeshakeOpenCLContext;

303

304 // Returns a random uniformly-distributed number in [low, high]

305 static int rand_in(int low, int high, AVLFG *alfg) {

306 return (av_lfg_get(alfg) % (high - low)) + low;

307 }

308

309 // Returns the average execution time for an event given the total time and the

310 // number of frames processed.

311 static double averaged_event_time_ms(unsigned long long total_time, int num_frames) {

312 return (double)total_time / (double)num_frames / 1000000.0;

313 }

314

315 // The following code is loosely ported from OpenCV

316

317 // Estimates affine transform from 3 point pairs

318 // model is a 2x3 matrix:

319 // a b c

320 // d e f

321 static void run_estimate_kernel(const MotionVector *point_pairs, double *model)

322 {

323 // src points

324 double x1 = point_pairs[0].p.p1.s[0];

325 double y1 = point_pairs[0].p.p1.s[1];

326 double x2 = point_pairs[1].p.p1.s[0];

327 double y2 = point_pairs[1].p.p1.s[1];

328 double x3 = point_pairs[2].p.p1.s[0];

329 double y3 = point_pairs[2].p.p1.s[1];

330

331 // dest points

332 double X1 = point_pairs[0].p.p2.s[0];

333 double Y1 = point_pairs[0].p.p2.s[1];

334 double X2 = point_pairs[1].p.p2.s[0];

335 double Y2 = point_pairs[1].p.p2.s[1];

336 double X3 = point_pairs[2].p.p2.s[0];

337 double Y3 = point_pairs[2].p.p2.s[1];

338

339 double d = 1.0 / ( x1*(y2-y3) + x2*(y3-y1) + x3*(y1-y2) );

340

341 model[0] = d * ( X1*(y2-y3) + X2*(y3-y1) + X3*(y1-y2) );

342 model[1] = d * ( X1*(x3-x2) + X2*(x1-x3) + X3*(x2-x1) );

343 model[2] = d * ( X1*(x2*y3 - x3*y2) + X2*(x3*y1 - x1*y3) + X3*(x1*y2 - x2*y1) );

344

345 model[3] = d * ( Y1*(y2-y3) + Y2*(y3-y1) + Y3*(y1-y2) );

346 model[4] = d * ( Y1*(x3-x2) + Y2*(x1-x3) + Y3*(x2-x1) );

347 model[5] = d * ( Y1*(x2*y3 - x3*y2) + Y2*(x3*y1 - x1*y3) + Y3*(x1*y2 - x2*y1) );

348 }

349

350 // Checks that the 3 points in the given array are not collinear

351 static int points_not_collinear(const cl_float2 **points)

352 {

353 int j, k, i = 2;

354

355 for (j = 0; j < i; j++) {

356 double dx1 = points[j]->s[0] - points[i]->s[0];

357 double dy1 = points[j]->s[1] - points[i]->s[1];

358

359 for (k = 0; k < j; k++) {

360 double dx2 = points[k]->s[0] - points[i]->s[0];

361 double dy2 = points[k]->s[1] - points[i]->s[1];

362

363 // Assuming a 3840 x 2160 video with a point at (0, 0) and one at

364 // (3839, 2159), this prevents a third point from being within roughly

365 // 0.5 of a pixel of the line connecting the two on both axes

366 if (fabs(dx2*dy1 - dy2*dx1) <= 1.0) {

367 return 0;

368 }

369 }

370 }

371

372 return 1;

373 }

374

375 // Checks a subset of 3 point pairs to make sure that the points are not collinear

376 // and not too close to each other

377 static int check_subset(const MotionVector *pairs_subset)

378 {

379 const cl_float2 *prev_points[] = {

380 &pairs_subset[0].p.p1,

381 &pairs_subset[1].p.p1,

382 &pairs_subset[2].p.p1

383 };

384

385 const cl_float2 *curr_points[] = {

386 &pairs_subset[0].p.p2,

387 &pairs_subset[1].p.p2,

388 &pairs_subset[2].p.p2

389 };

390

391 return points_not_collinear(prev_points) && points_not_collinear(curr_points);

392 }

393

394 // Selects a random subset of 3 points from point_pairs and places them in pairs_subset

395 static int get_subset(

396 AVLFG *alfg,

397 const MotionVector *point_pairs,

398 const int num_point_pairs,

399 MotionVector *pairs_subset,

400 int max_attempts

401 ) {

402 int idx[3];

403 int i = 0, j, iters = 0;

404

405 for (; iters < max_attempts; iters++) {

406 for (i = 0; i < 3 && iters < max_attempts;) {

407 int idx_i = 0;

408

409 for (;;) {

410 idx_i = idx[i] = rand_in(0, num_point_pairs, alfg);

411

412 for (j = 0; j < i; j++) {

413 if (idx_i == idx[j]) {

414 break;

415 }

416 }

417

418 if (j == i) {

419 break;

420 }

421 }

422

423 pairs_subset[i] = point_pairs[idx[i]];

424 i++;

425 }

426

427 if (i == 3 && !check_subset(pairs_subset)) {

428 continue;

429 }

430 break;

431 }

432

433 return i == 3 && iters < max_attempts;

434 }

435

436 // Computes the error for each of the given points based on the given model.

437 static void compute_error(

438 const MotionVector *point_pairs,

439 const int num_point_pairs,

440 const double *model,

441 float *err

442 ) {

443 double F0 = model[0], F1 = model[1], F2 = model[2];

444 double F3 = model[3], F4 = model[4], F5 = model[5];

445

446 for (int i = 0; i < num_point_pairs; i++) {

447 const cl_float2 *f = &point_pairs[i].p.p1;

448 const cl_float2 *t = &point_pairs[i].p.p2;

449

450 double a = F0*f->s[0] + F1*f->s[1] + F2 - t->s[0];

451 double b = F3*f->s[0] + F4*f->s[1] + F5 - t->s[1];

452

453 err[i] = a*a + b*b;

454 }

455 }

456

457 // Determines which of the given point matches are inliers for the given model

458 // based on the specified threshold.

459 //

460 // err must be an array of num_point_pairs length

461 static int find_inliers(

462 MotionVector *point_pairs,

463 const int num_point_pairs,

464 const double *model,

465 float *err,

466 double thresh

467 ) {

468 float t = (float)(thresh * thresh);

469 int i, n = num_point_pairs, num_inliers = 0;

470

471 compute_error(point_pairs, num_point_pairs, model, err);

472

473 for (i = 0; i < n; i++) {

474 if (err[i] <= t) {

475 // This is an inlier

476 point_pairs[i].should_consider = 1;

477 num_inliers += 1;

478 } else {

479 point_pairs[i].should_consider = 0;

480 }

481 }

482

483 return num_inliers;

484 }

485

486 // Determines the number of iterations required to achieve the desired confidence level.

487 //

488 // The equation used to determine the number of iterations to do is:

489 // 1 - confidence = (1 - inlier_probability^num_points)^num_iters

490 //

491 // Solving for num_iters:

492 //

493 // num_iters = log(1 - confidence) / log(1 - inlier_probability^num_points)

494 //

495 // A more in-depth explanation can be found at https://en.wikipedia.org/wiki/Random_sample_consensus

496 // under the 'Parameters' heading

497 static int ransac_update_num_iters(double confidence, double num_outliers, int max_iters)

498 {

499 double num, denom;

500

501 confidence = av_clipd(confidence, 0.0, 1.0);

502 num_outliers = av_clipd(num_outliers, 0.0, 1.0);

503

504 // avoid inf's & nan's

505 num = FFMAX(1.0 - confidence, DBL_MIN);

506 denom = 1.0 - pow(1.0 - num_outliers, 3);

507 if (denom < DBL_MIN) {

508 return 0;

509 }

510

511 num = log(num);

512 denom = log(denom);

513

514 return denom >= 0 || -num >= max_iters * (-denom) ? max_iters : (int)round(num / denom);

515 }

516

517 // Estimates an affine transform between the given pairs of points using RANdom

518 // SAmple Consensus

519 static int estimate_affine_2d(

520 DeshakeOpenCLContext *deshake_ctx,

521 MotionVector *point_pairs,

522 DebugMatches *debug_matches,

523 const int num_point_pairs,

524 double *model_out,

525 const double threshold,

526 const int max_iters,

527 const double confidence

528 ) {

529 int result = 0;

530 double best_model[6], model[6];

531 MotionVector pairs_subset[3], best_pairs[3];

532

533 int iter, niters = FFMAX(max_iters, 1);

534 int good_count, max_good_count = 0;

535

536 // We need at least 3 points to build a model from

537 if (num_point_pairs < 3) {

538 return 0;

539 } else if (num_point_pairs == 3) {

540 // There are only 3 points, so RANSAC doesn't apply here

541 run_estimate_kernel(point_pairs, model_out);

542

543 for (int i = 0; i < 3; ++i) {

544 point_pairs[i].should_consider = 1;

545 }

546

547 return 1;

548 }

549

550 for (iter = 0; iter < niters; ++iter) {

551 int found = get_subset(&deshake_ctx->alfg, point_pairs, num_point_pairs, pairs_subset, 10000);

552

553 if (!found) {

554 if (iter == 0) {

555 return 0;

556 }

557

558 break;

559 }

560

561 run_estimate_kernel(pairs_subset, model);

562 good_count = find_inliers(point_pairs, num_point_pairs, model, deshake_ctx->ransac_err, threshold);

563

564 if (good_count > FFMAX(max_good_count, 2)) {

565 for (int mi = 0; mi < 6; ++mi) {

566 best_model[mi] = model[mi];

567 }

568

569 for (int pi = 0; pi < 3; pi++) {

570 best_pairs[pi] = pairs_subset[pi];

571 }

572

573 max_good_count = good_count;

574 niters = ransac_update_num_iters(

575 confidence,

576 (double)(num_point_pairs - good_count) / num_point_pairs,

577 niters

578 );

579 }

580 }

581

582 if (max_good_count > 0) {

583 for (int mi = 0; mi < 6; ++mi) {

584 model_out[mi] = best_model[mi];

585 }

586

587 for (int pi = 0; pi < 3; ++pi) {

588 debug_matches->model_matches[pi] = best_pairs[pi];

589 }

590 debug_matches->num_model_matches = 3;

591

592 // Find the inliers again for the best model for debugging

593 find_inliers(point_pairs, num_point_pairs, best_model, deshake_ctx->ransac_err, threshold);

594 result = 1;

595 }

596

597 return result;

598 }

599

600 // "Wiggles" the first point in best_pairs around a tiny bit in order to decrease the

601 // total error

602 static void optimize_model(

603 DeshakeOpenCLContext *deshake_ctx,

604 MotionVector *best_pairs,

605 MotionVector *inliers,

606 const int num_inliers,

607 float best_err,

608 double *model_out

609 ) {

610 float move_x_val = 0.01;

611 float move_y_val = 0.01;

612 int move_x = 1;

613 float old_move_x_val = 0;

614 double model[6];

615 int last_changed = 0;

616

617 for (int iters = 0; iters < 200; iters++) {

618 float total_err = 0;

619

620 if (move_x) {

621 best_pairs[0].p.p2.s[0] += move_x_val;

622 } else {

623 best_pairs[0].p.p2.s[0] += move_y_val;

624 }

625

626 run_estimate_kernel(best_pairs, model);

627 compute_error(inliers, num_inliers, model, deshake_ctx->ransac_err);

628

629 for (int j = 0; j < num_inliers; j++) {

630 total_err += deshake_ctx->ransac_err[j];

631 }

632

633 if (total_err < best_err) {

634 for (int mi = 0; mi < 6; ++mi) {

635 model_out[mi] = model[mi];

636 }

637

638 best_err = total_err;

639 last_changed = iters;

640 } else {

641 // Undo the change

642 if (move_x) {

643 best_pairs[0].p.p2.s[0] -= move_x_val;

644 } else {

645 best_pairs[0].p.p2.s[0] -= move_y_val;

646 }

647

648 if (iters - last_changed > 4) {

649 // We've already improved the model as much as we can

650 break;

651 }

652

653 old_move_x_val = move_x_val;

654

655 if (move_x) {

656 move_x_val *= -1;

657 } else {

658 move_y_val *= -1;

659 }

660

661 if (old_move_x_val < 0) {

662 move_x = 0;

663 } else {

664 move_x = 1;

665 }

666 }

667 }

668 }

669

670 // Uses a process similar to that of RANSAC to find a transform that minimizes

671 // the total error for a set of point matches determined to be inliers

672 //

673 // (Pick random subsets, compute model, find total error, iterate until error

674 // is minimized.)

675 static int minimize_error(

676 DeshakeOpenCLContext *deshake_ctx,

677 MotionVector *inliers,

678 DebugMatches *debug_matches,

679 const int num_inliers,

680 double *model_out,

681 const int max_iters

682 ) {

683 int result = 0;

684 float best_err = FLT_MAX;

685 double best_model[6], model[6];

686 MotionVector pairs_subset[3], best_pairs[3];

687

688 for (int i = 0; i < max_iters; i++) {

689 float total_err = 0;

690 int found = get_subset(&deshake_ctx->alfg, inliers, num_inliers, pairs_subset, 10000);

691

692 if (!found) {

693 if (i == 0) {

694 return 0;

695 }

696

697 break;

698 }

699

700 run_estimate_kernel(pairs_subset, model);

701 compute_error(inliers, num_inliers, model, deshake_ctx->ransac_err);

702

703 for (int j = 0; j < num_inliers; j++) {

704 total_err += deshake_ctx->ransac_err[j];

705 }

706

707 if (total_err < best_err) {

708 for (int mi = 0; mi < 6; ++mi) {

709 best_model[mi] = model[mi];

710 }

711

712 for (int pi = 0; pi < 3; pi++) {

713 best_pairs[pi] = pairs_subset[pi];

714 }

715

716 best_err = total_err;

717 }

718 }

719

720 for (int mi = 0; mi < 6; ++mi) {

721 model_out[mi] = best_model[mi];

722 }

723

724 for (int pi = 0; pi < 3; ++pi) {

725 debug_matches->model_matches[pi] = best_pairs[pi];

726 }

727 debug_matches->num_model_matches = 3;

728 result = 1;

729

730 optimize_model(deshake_ctx, best_pairs, inliers, num_inliers, best_err, model_out);

731 return result;

732 }

733

734 // End code from OpenCV

735

736 // Decomposes a similarity matrix into translation, rotation, scale, and skew

737 //

738 // See http://frederic-wang.fr/decomposition-of-2d-transform-matrices.html

739 static FrameDelta decompose_transform(double *model)

740 {

741 FrameDelta ret;

742

743 double a = model[0];

744 double c = model[1];

745 double e = model[2];

746 double b = model[3];

747 double d = model[4];

748 double f = model[5];

749 double delta = a * d - b * c;

750

751 memset(&ret, 0, sizeof(ret));

752

753 ret.translation.s[0] = e;

754 ret.translation.s[1] = f;

755

756 // This is the QR method

757 if (a != 0 || b != 0) {

758 double r = hypot(a, b);

759

760 ret.rotation = FFSIGN(b) * acos(a / r);

761 ret.scale.s[0] = r;

762 ret.scale.s[1] = delta / r;

763 ret.skew.s[0] = atan((a * c + b * d) / (r * r));

764 ret.skew.s[1] = 0;

765 } else if (c != 0 || d != 0) {

766 double s = sqrt(c * c + d * d);

767

768 ret.rotation = M_PI / 2 - FFSIGN(d) * acos(-c / s);

769 ret.scale.s[0] = delta / s;

770 ret.scale.s[1] = s;

771 ret.skew.s[0] = 0;

772 ret.skew.s[1] = atan((a * c + b * d) / (s * s));

773 } // otherwise there is only translation

774

775 return ret;

776 }

777

778 // Move valid vectors from the 2d buffer into a 1d buffer where they are contiguous

779 static int make_vectors_contig(

780 DeshakeOpenCLContext *deshake_ctx,

781 int size_y,

782 int size_x

783 ) {

784 int num_vectors = 0;

785

786 for (int i = 0; i < size_y; ++i) {

787 for (int j = 0; j < size_x; ++j) {

788 MotionVector v = deshake_ctx->matches_host[j + i * size_x];

789

790 if (v.should_consider) {

791 deshake_ctx->matches_contig_host[num_vectors] = v;

792 ++num_vectors;

793 }

794

795 // Make sure we do not exceed the amount of space we allocated for these vectors

796 if (num_vectors == MATCHES_CONTIG_SIZE - 1) {

797 return num_vectors;

798 }

799 }

800 }

801 return num_vectors;

802 }

803

804 // Returns the gaussian kernel value for the given x coordinate and sigma value

805 static float gaussian_for(int x, float sigma) {

806 return 1.0f / expf(((float)x * (float)x) / (2.0f * sigma * sigma));

807 }

808

809 // Makes a normalized gaussian kernel of the given length for the given sigma

810 // and places it in gauss_kernel

811 static void make_gauss_kernel(float *gauss_kernel, float length, float sigma)

812 {

813 float gauss_sum = 0;

814 int window_half = length / 2;

815

816 for (int i = 0; i < length; ++i) {

817 float val = gaussian_for(i - window_half, sigma);

818

819 gauss_sum += val;

820 gauss_kernel[i] = val;

821 }

822

823 // Normalize the gaussian values

824 for (int i = 0; i < length; ++i) {

825 gauss_kernel[i] /= gauss_sum;

826 }

827 }

828

829 // Returns indices to start and end iteration at in order to iterate over a window

830 // of length size centered at the current frame in a ringbuffer

831 //

832 // Always returns numbers that result in a window of length size, even if that

833 // means specifying negative indices or indices past the end of the values in the

834 // ringbuffers. Make sure you clip indices appropriately within your loop.

835 static IterIndices start_end_for(DeshakeOpenCLContext *deshake_ctx, int length) {

836 IterIndices indices;

837

838 indices.start = deshake_ctx->abs_motion.curr_frame_offset - (length / 2);

839 indices.end = deshake_ctx->abs_motion.curr_frame_offset + (length / 2) + (length % 2);

840

841 return indices;

842 }

843

844 // Sets val to the value in the given ringbuffer at the given offset, taking care of

845 // clipping the offset into the appropriate range

846 static void ringbuf_float_at(

847 DeshakeOpenCLContext *deshake_ctx,

848 AVFifo *values,

849 float *val,

850 int offset

851 ) {

852 int clip_start, clip_end, offset_clipped;

853 if (deshake_ctx->abs_motion.data_end_offset != -1) {

854 clip_end = deshake_ctx->abs_motion.data_end_offset;

855 } else {

856 // This expression represents the last valid index in the buffer,

857 // which we use repeatedly at the end of the video.

858 clip_end = deshake_ctx->smooth_window - av_fifo_can_write(values) - 1;

859 }

860

861 if (deshake_ctx->abs_motion.data_start_offset != -1) {

862 clip_start = deshake_ctx->abs_motion.data_start_offset;

863 } else {

864 // Negative indices will occur at the start of the video, and we want

865 // them to be clipped to 0 in order to repeatedly use the position of

866 // the first frame.

867 clip_start = 0;

868 }

869

870 offset_clipped = av_clip(

871 offset,

872 clip_start,

873 clip_end

874 );

875

876 av_fifo_peek(values, val, 1, offset_clipped);

877 }

878

879 // Returns smoothed current frame value of the given buffer of floats based on the

880 // given Gaussian kernel and its length (also the window length, centered around the

881 // current frame) and the "maximum value" of the motion.

882 //

883 // This "maximum value" should be the width / height of the image in the case of

884 // translation and an empirically chosen constant for rotation / scale.

885 //

886 // The sigma chosen to generate the final gaussian kernel with used to smooth the

887 // camera path is either hardcoded (set by user, deshake_ctx->smooth_percent) or

888 // adaptively chosen.

889 static float smooth(

890 DeshakeOpenCLContext *deshake_ctx,

891 float *gauss_kernel,

892 int length,

893 float max_val,

894 AVFifo *values

895 ) {

896 float new_large_s = 0, new_small_s = 0, new_best = 0, old, diff_between,

897 percent_of_max, inverted_percent;

898 IterIndices indices = start_end_for(deshake_ctx, length);

899 float large_sigma = 40.0f;

900 float small_sigma = 2.0f;

901 float best_sigma;

902

903 if (deshake_ctx->smooth_percent) {

904 best_sigma = (large_sigma - 0.5f) * deshake_ctx->smooth_percent + 0.5f;

905 } else {

906 // Strategy to adaptively smooth trajectory:

907 //

908 // 1. Smooth path with large and small sigma values

909 // 2. Take the absolute value of the difference between them

910 // 3. Get a percentage by putting the difference over the "max value"

911 // 4, Invert the percentage

912 // 5. Calculate a new sigma value weighted towards the larger sigma value

913 // 6. Determine final smoothed trajectory value using that sigma

914

915 make_gauss_kernel(gauss_kernel, length, large_sigma);

916 for (int i = indices.start, j = 0; i < indices.end; ++i, ++j) {

917 ringbuf_float_at(deshake_ctx, values, &old, i);

918 new_large_s += old * gauss_kernel[j];

919 }

920

921 make_gauss_kernel(gauss_kernel, length, small_sigma);

922 for (int i = indices.start, j = 0; i < indices.end; ++i, ++j) {

923 ringbuf_float_at(deshake_ctx, values, &old, i);

924 new_small_s += old * gauss_kernel[j];

925 }

926

927 diff_between = fabsf(new_large_s - new_small_s);

928 percent_of_max = diff_between / max_val;

929 inverted_percent = 1 - percent_of_max;

930 best_sigma = large_sigma * powf(inverted_percent, 40);

931 }

932

933 make_gauss_kernel(gauss_kernel, length, best_sigma);

934 for (int i = indices.start, j = 0; i < indices.end; ++i, ++j) {

935 ringbuf_float_at(deshake_ctx, values, &old, i);

936 new_best += old * gauss_kernel[j];

937 }

938

939 return new_best;

940 }

941

942 // Returns the position of the given point after the transform is applied

943 static cl_float2 transformed_point(float x, float y, float *transform) {

944 cl_float2 ret;

945

946 ret.s[0] = x * transform[0] + y * transform[1] + transform[2];

947 ret.s[1] = x * transform[3] + y * transform[4] + transform[5];

948

949 return ret;

950 }

951

952 // Creates an affine transform that scales from the center of a frame

953 static void transform_center_scale(

954 float x_shift,

955 float y_shift,

956 float angle,

957 float scale_x,

958 float scale_y,

959 float center_w,

960 float center_h,

961 float *matrix

962 ) {

963 cl_float2 center_s;

964 float center_s_w, center_s_h;

965

966 ff_get_matrix(

967 0,

968 0,

969 0,

970 scale_x,

971 scale_y,

972 matrix

973 );

974

975 center_s = transformed_point(center_w, center_h, matrix);

976 center_s_w = center_w - center_s.s[0];

977 center_s_h = center_h - center_s.s[1];

978

979 ff_get_matrix(

980 x_shift + center_s_w,

981 y_shift + center_s_h,

982 angle,

983 scale_x,

984 scale_y,

985 matrix

986 );

987 }

988

989 // Determines the crop necessary to eliminate black borders from a smoothed frame

990 // and updates target crop accordingly

991 static void update_needed_crop(

992 CropInfo* crop,

993 float *transform,

994 float frame_width,

995 float frame_height

996 ) {

997 float new_width, new_height, adjusted_width, adjusted_height, adjusted_x, adjusted_y;

998

999 cl_float2 top_left = transformed_point(0, 0, transform);

1000 cl_float2 top_right = transformed_point(frame_width, 0, transform);

1001 cl_float2 bottom_left = transformed_point(0, frame_height, transform);

1002 cl_float2 bottom_right = transformed_point(frame_width, frame_height, transform);

1003 float ar_h = frame_height / frame_width;

1004 float ar_w = frame_width / frame_height;

1005

1006 if (crop->bottom_right.s[0] == 0) {

1007 // The crop hasn't been set to the original size of the plane

1008 crop->bottom_right.s[0] = frame_width;

1009 crop->bottom_right.s[1] = frame_height;

1010 }

1011

1012 crop->top_left.s[0] = FFMAX3(

1013 crop->top_left.s[0],

1014 top_left.s[0],

1015 bottom_left.s[0]

1016 );

1017

1018 crop->top_left.s[1] = FFMAX3(

1019 crop->top_left.s[1],

1020 top_left.s[1],

1021 top_right.s[1]

1022 );

1023

1024 crop->bottom_right.s[0] = FFMIN3(

1025 crop->bottom_right.s[0],

1026 bottom_right.s[0],

1027 top_right.s[0]

1028 );

1029

1030 crop->bottom_right.s[1] = FFMIN3(

1031 crop->bottom_right.s[1],

1032 bottom_right.s[1],

1033 bottom_left.s[1]

1034 );

1035

1036 // Make sure our potentially new bounding box has the same aspect ratio

1037 new_height = crop->bottom_right.s[1] - crop->top_left.s[1];

1038 new_width = crop->bottom_right.s[0] - crop->top_left.s[0];

1039

1040 adjusted_width = new_height * ar_w;

1041 adjusted_x = crop->bottom_right.s[0] - adjusted_width;

1042

1043 if (adjusted_x >= crop->top_left.s[0]) {

1044 crop->top_left.s[0] = adjusted_x;

1045 } else {

1046 adjusted_height = new_width * ar_h;

1047 adjusted_y = crop->bottom_right.s[1] - adjusted_height;

1048 crop->top_left.s[1] = adjusted_y;

1049 }

1050 }

1051

1052 static av_cold void deshake_opencl_uninit(AVFilterContext *avctx)

1053 {

1054 DeshakeOpenCLContext *ctx = avctx->priv;

1055 cl_int cle;

1056

1057 for (int i = 0; i < RingbufCount; i++)

1058 av_fifo_freep2(&ctx->abs_motion.ringbuffers[i]);

1059

1060 if (ctx->debug_on)

1061 free_debug_matches(&ctx->abs_motion);

1062

1063 if (ctx->gauss_kernel)

1064 av_freep(&ctx->gauss_kernel);

1065

1066 if (ctx->ransac_err)

1067 av_freep(&ctx->ransac_err);

1068

1069 if (ctx->matches_host)

1070 av_freep(&ctx->matches_host);

1071

1072 if (ctx->matches_contig_host)

1073 av_freep(&ctx->matches_contig_host);

1074

1075 if (ctx->inliers)

1076 av_freep(&ctx->inliers);

1077

1078 ff_framequeue_free(&ctx->fq);

1079

1080 CL_RELEASE_KERNEL(ctx->kernel_grayscale);

1081 CL_RELEASE_KERNEL(ctx->kernel_harris_response);

1082 CL_RELEASE_KERNEL(ctx->kernel_refine_features);

1083 CL_RELEASE_KERNEL(ctx->kernel_brief_descriptors);

1084 CL_RELEASE_KERNEL(ctx->kernel_match_descriptors);

1085 CL_RELEASE_KERNEL(ctx->kernel_crop_upscale);

1086 if (ctx->debug_on)

1087 CL_RELEASE_KERNEL(ctx->kernel_draw_debug_info);

1088

1089 CL_RELEASE_QUEUE(ctx->command_queue);

1090

1091 if (!ctx->is_yuv)

1092 CL_RELEASE_MEMORY(ctx->grayscale);

1093 CL_RELEASE_MEMORY(ctx->harris_buf);

1094 CL_RELEASE_MEMORY(ctx->refined_features);

1095 CL_RELEASE_MEMORY(ctx->prev_refined_features);

1096 CL_RELEASE_MEMORY(ctx->brief_pattern);

1097 CL_RELEASE_MEMORY(ctx->descriptors);

1098 CL_RELEASE_MEMORY(ctx->prev_descriptors);

1099 CL_RELEASE_MEMORY(ctx->matches);

1100 CL_RELEASE_MEMORY(ctx->matches_contig);

1101 CL_RELEASE_MEMORY(ctx->transform_y);

1102 CL_RELEASE_MEMORY(ctx->transform_uv);

1103 if (ctx->debug_on) {

1104 CL_RELEASE_MEMORY(ctx->debug_matches);

1105 CL_RELEASE_MEMORY(ctx->debug_model_matches);

1106 }

1107

1108 ff_opencl_filter_uninit(avctx);

1109 }

1110

1111 static int deshake_opencl_init(AVFilterContext *avctx)

1112 {

1113 DeshakeOpenCLContext *ctx = avctx->priv;

1114 AVFilterLink *outlink = avctx->outputs[0];

1115 AVFilterLink *inlink = avctx->inputs[0];

1116 // Pointer to the host-side pattern buffer to be initialized and then copied

1117 // to the GPU

1118 PointPair *pattern_host = NULL;

1119 cl_int cle;

1120 int err;

1121 cl_ulong8 zeroed_ulong8;

1122 FFFrameQueueGlobal fqg;

1123 cl_image_format grayscale_format;

1124 cl_image_desc grayscale_desc;

1125 cl_command_queue_properties queue_props;

1126

1127 const enum AVPixelFormat disallowed_formats[14] = {

1128 AV_PIX_FMT_GBRP,

1129 AV_PIX_FMT_GBRP9BE,

1130 AV_PIX_FMT_GBRP9LE,

1131 AV_PIX_FMT_GBRP10BE,

1132 AV_PIX_FMT_GBRP10LE,

1133 AV_PIX_FMT_GBRP16BE,

1134 AV_PIX_FMT_GBRP16LE,

1135 AV_PIX_FMT_GBRAP,

1136 AV_PIX_FMT_GBRAP16BE,

1137 AV_PIX_FMT_GBRAP16LE,

1138 AV_PIX_FMT_GBRAP12BE,

1139 AV_PIX_FMT_GBRAP12LE,

1140 AV_PIX_FMT_GBRAP10BE,

1141 AV_PIX_FMT_GBRAP10LE

1142 };

1143

1144 // Number of elements for an array

1145 const int image_grid_32 = ROUNDED_UP_DIV(outlink->h, 32) * ROUNDED_UP_DIV(outlink->w, 32);

1146

1147 const int descriptor_buf_size = image_grid_32 * (BREIFN / 8);

1148 const int features_buf_size = image_grid_32 * sizeof(cl_float2);

1149

1150 const AVHWFramesContext *hw_frames_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data;

1151 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hw_frames_ctx->sw_format);

1152

1153 av_assert0(hw_frames_ctx);

1154 av_assert0(desc);

1155

1156 ff_framequeue_global_init(&fqg);

1157 ff_framequeue_init(&ctx->fq, &fqg);

1158 ctx->eof = 0;

1159 ctx->smooth_window = (int)(av_q2d(avctx->inputs[0]->frame_rate) * ctx->smooth_window_multiplier);

1160 ctx->curr_frame = 0;

1161

1162 memset(&zeroed_ulong8, 0, sizeof(cl_ulong8));

1163

1164 ctx->gauss_kernel = av_malloc_array(ctx->smooth_window, sizeof(float));

1165 if (!ctx->gauss_kernel) {

1166 err = AVERROR(ENOMEM);

1167 goto fail;

1168 }

1169

1170 ctx->ransac_err = av_malloc_array(MATCHES_CONTIG_SIZE, sizeof(float));

1171 if (!ctx->ransac_err) {

1172 err = AVERROR(ENOMEM);

1173 goto fail;

1174 }

1175

1176 for (int i = 0; i < RingbufCount; i++) {

1177 ctx->abs_motion.ringbuffers[i] = av_fifo_alloc2(ctx->smooth_window,

1178 sizeof(float), 0);

1179

1180 if (!ctx->abs_motion.ringbuffers[i]) {

1181 err = AVERROR(ENOMEM);

1182 goto fail;

1183 }

1184 }

1185

1186 if (ctx->debug_on) {

1187 ctx->abs_motion.debug_matches = av_fifo_alloc2(

1188 ctx->smooth_window / 2,

1189 sizeof(DebugMatches), 0

1190 );

1191

1192 if (!ctx->abs_motion.debug_matches) {

1193 err = AVERROR(ENOMEM);

1194 goto fail;

1195 }

1196 }

1197

1198 ctx->abs_motion.curr_frame_offset = 0;

1199 ctx->abs_motion.data_start_offset = -1;

1200 ctx->abs_motion.data_end_offset = -1;

1201

1202 pattern_host = av_malloc_array(BREIFN, sizeof(PointPair));

1203 if (!pattern_host) {

1204 err = AVERROR(ENOMEM);

1205 goto fail;

1206 }

1207

1208 ctx->matches_host = av_malloc_array(image_grid_32, sizeof(MotionVector));

1209 if (!ctx->matches_host) {

1210 err = AVERROR(ENOMEM);

1211 goto fail;

1212 }

1213

1214 ctx->matches_contig_host = av_malloc_array(MATCHES_CONTIG_SIZE, sizeof(MotionVector));

1215 if (!ctx->matches_contig_host) {

1216 err = AVERROR(ENOMEM);

1217 goto fail;

1218 }

1219

1220 ctx->inliers = av_malloc_array(MATCHES_CONTIG_SIZE, sizeof(MotionVector));

1221 if (!ctx->inliers) {

1222 err = AVERROR(ENOMEM);

1223 goto fail;

1224 }

1225

1226 // Initializing the patch pattern for building BREIF descriptors with

1227 av_lfg_init(&ctx->alfg, 234342424);

1228 for (int i = 0; i < BREIFN; ++i) {

1229 PointPair pair;

1230

1231 for (int j = 0; j < 2; ++j) {

1232 pair.p1.s[j] = rand_in(-BRIEF_PATCH_SIZE_HALF, BRIEF_PATCH_SIZE_HALF + 1, &ctx->alfg);

1233 pair.p2.s[j] = rand_in(-BRIEF_PATCH_SIZE_HALF, BRIEF_PATCH_SIZE_HALF + 1, &ctx->alfg);

1234 }

1235

1236 pattern_host[i] = pair;

1237 }

1238

1239 for (int i = 0; i < 14; i++) {

1240 if (ctx->sw_format == disallowed_formats[i]) {

1241 av_log(avctx, AV_LOG_ERROR, "unsupported format in deshake_opencl.\n");

1242 err = AVERROR(ENOSYS);

1243 goto fail;

1244 }

1245 }

1246

1247 if (desc->flags & AV_PIX_FMT_FLAG_RGB) {

1248 ctx->is_yuv = 0;

1249 } else {

1250 ctx->is_yuv = 1;

1251 }

1252 ctx->sw_format = hw_frames_ctx->sw_format;

1253

1254 err = ff_opencl_filter_load_program(avctx, &ff_opencl_source_deshake, 1);

1255 if (err < 0)

1256 goto fail;

1257

1258 if (ctx->debug_on) {

1259 queue_props = CL_QUEUE_PROFILING_ENABLE;

1260 } else {

1261 queue_props = 0;

1262 }

1263 ctx->command_queue = clCreateCommandQueue(

1264 ctx->ocf.hwctx->context,

1265 ctx->ocf.hwctx->device_id,

1266 queue_props,

1267 &cle

1268 );

1269 CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create OpenCL command queue %d.\n", cle);

1270

1271 CL_CREATE_KERNEL(ctx, grayscale);

1272 CL_CREATE_KERNEL(ctx, harris_response);

1273 CL_CREATE_KERNEL(ctx, refine_features);

1274 CL_CREATE_KERNEL(ctx, brief_descriptors);

1275 CL_CREATE_KERNEL(ctx, match_descriptors);

1276 CL_CREATE_KERNEL(ctx, transform);

1277 CL_CREATE_KERNEL(ctx, crop_upscale);

1278 if (ctx->debug_on)

1279 CL_CREATE_KERNEL(ctx, draw_debug_info);

1280

1281 if (!ctx->is_yuv) {

1282 grayscale_format.image_channel_order = CL_R;

1283 grayscale_format.image_channel_data_type = CL_FLOAT;

1284

1285 grayscale_desc = (cl_image_desc) {

1286 .image_type = CL_MEM_OBJECT_IMAGE2D,

1287 .image_width = outlink->w,

1288 .image_height = outlink->h,

1289 .image_depth = 0,

1290 .image_array_size = 0,

1291 .image_row_pitch = 0,

1292 .image_slice_pitch = 0,

1293 .num_mip_levels = 0,

1294 .num_samples = 0,

1295 .buffer = NULL,

1296 };

1297

1298 ctx->grayscale = clCreateImage(

1299 ctx->ocf.hwctx->context,

1300 0,

1301 &grayscale_format,

1302 &grayscale_desc,

1303 NULL,

1304 &cle

1305 );

1306 CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create grayscale image: %d.\n", cle);

1307 }

1308

1309 CL_CREATE_BUFFER(ctx, harris_buf, outlink->h * outlink->w * sizeof(float));

1310 CL_CREATE_BUFFER(ctx, refined_features, features_buf_size);

1311 CL_CREATE_BUFFER(ctx, prev_refined_features, features_buf_size);

1312 CL_CREATE_BUFFER_FLAGS(

1313 ctx,

1314 brief_pattern,

1315 CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,

1316 BREIFN * sizeof(PointPair),

1317 pattern_host

1318 );

1319 CL_CREATE_BUFFER(ctx, descriptors, descriptor_buf_size);

1320 CL_CREATE_BUFFER(ctx, prev_descriptors, descriptor_buf_size);

1321 CL_CREATE_BUFFER(ctx, matches, image_grid_32 * sizeof(MotionVector));

1322 CL_CREATE_BUFFER(ctx, matches_contig, MATCHES_CONTIG_SIZE * sizeof(MotionVector));

1323 CL_CREATE_BUFFER(ctx, transform_y, 9 * sizeof(float));

1324 CL_CREATE_BUFFER(ctx, transform_uv, 9 * sizeof(float));

1325 if (ctx->debug_on) {

1326 CL_CREATE_BUFFER(ctx, debug_matches, MATCHES_CONTIG_SIZE * sizeof(MotionVector));

1327 CL_CREATE_BUFFER(ctx, debug_model_matches, 3 * sizeof(MotionVector));

1328 }

1329

1330 ctx->initialized = 1;

1331 av_freep(&pattern_host);

1332

1333 return 0;

1334

1335 fail:

1336 av_freep(&pattern_host);

1337 return err;

1338 }

1339

1340 // Logs debug information about the transform data

1341 static void transform_debug(AVFilterContext *avctx, float *new_vals, float *old_vals, int curr_frame) {

1342 av_log(avctx, AV_LOG_VERBOSE,

1343 "Frame %d:\n"

1344 "\tframe moved from: %f x, %f y\n"

1345 "\t to: %f x, %f y\n"

1346 "\t rotated from: %f degrees\n"

1347 "\t to: %f degrees\n"

1348 "\t scaled from: %f x, %f y\n"

1349 "\t to: %f x, %f y\n"

1350 "\n"

1351 "\tframe moved by: %f x, %f y\n"

1352 "\t rotated by: %f degrees\n"

1353 "\t scaled by: %f x, %f y\n",

1354 curr_frame,

1355 old_vals[RingbufX], old_vals[RingbufY],

1356 new_vals[RingbufX], new_vals[RingbufY],

1357 old_vals[RingbufRot] * (180.0 / M_PI),

1358 new_vals[RingbufRot] * (180.0 / M_PI),

1359 old_vals[RingbufScaleX], old_vals[RingbufScaleY],

1360 new_vals[RingbufScaleX], new_vals[RingbufScaleY],

1361 old_vals[RingbufX] - new_vals[RingbufX], old_vals[RingbufY] - new_vals[RingbufY],

1362 old_vals[RingbufRot] * (180.0 / M_PI) - new_vals[RingbufRot] * (180.0 / M_PI),

1363 new_vals[RingbufScaleX] / old_vals[RingbufScaleX], new_vals[RingbufScaleY] / old_vals[RingbufScaleY]

1364 );

1365 }

1366

1367 // Uses the buffered motion information to determine a transform that smooths the

1368 // given frame and applies it

1369 static int filter_frame(AVFilterLink *link, AVFrame *input_frame)

1370 {

1371 AVFilterContext *avctx = link->dst;

1372 AVFilterLink *outlink = avctx->outputs[0];

1373 DeshakeOpenCLContext *deshake_ctx = avctx->priv;

1374 AVFrame *cropped_frame = NULL, *transformed_frame = NULL;

1375 int err;

1376 cl_int cle;

1377 float new_vals[RingbufCount];

1378 float old_vals[RingbufCount];

1379 // Luma (in the case of YUV) transform, or just the transform in the case of RGB

1380 float transform_y[9];

1381 // Chroma transform

1382 float transform_uv[9];

1383 // Luma crop transform (or RGB)

1384 float transform_crop_y[9];

1385 // Chroma crop transform

1386 float transform_crop_uv[9];

1387 float transform_debug_rgb[9];

1388 size_t global_work[2];

1389 int64_t duration;

1390 cl_mem src, transformed, dst;

1391 cl_mem transforms[3];

1392 CropInfo crops[3];

1393 cl_event transform_event, crop_upscale_event;

1394 DebugMatches debug_matches;

1395 cl_int num_model_matches;

1396

1397 const float center_w = (float)input_frame->width / 2;

1398 const float center_h = (float)input_frame->height / 2;

1399

1400 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(deshake_ctx->sw_format);

1401 const int chroma_width = AV_CEIL_RSHIFT(input_frame->width, desc->log2_chroma_w);

1402 const int chroma_height = AV_CEIL_RSHIFT(input_frame->height, desc->log2_chroma_h);

1403

1404 const float center_w_chroma = (float)chroma_width / 2;

1405 const float center_h_chroma = (float)chroma_height / 2;

1406

1407 const float luma_w_over_chroma_w = ((float)input_frame->width / (float)chroma_width);

1408 const float luma_h_over_chroma_h = ((float)input_frame->height / (float)chroma_height);

1409

1410 if (deshake_ctx->debug_on) {

1411 av_fifo_read(

1412 deshake_ctx->abs_motion.debug_matches,

1413 &debug_matches, 1);

1414 }

1415

1416 #if FF_API_PKT_DURATION

1417 FF_DISABLE_DEPRECATION_WARNINGS

1418 if (input_frame->pkt_duration) {

1419 duration = input_frame->pkt_duration;

1420 } else

1421 FF_ENABLE_DEPRECATION_WARNINGS

1422 #endif

1423 if (input_frame->duration) {

1424 duration = input_frame->duration;

1425 } else {

1426 duration = av_rescale_q(1, av_inv_q(outlink->frame_rate), outlink->time_base);

1427 }

1428 deshake_ctx->duration = input_frame->pts + duration;

1429

1430 // Get the absolute transform data for this frame

1431 for (int i = 0; i < RingbufCount; i++) {

1432 av_fifo_peek(deshake_ctx->abs_motion.ringbuffers[i],

1433 &old_vals[i], 1,

1434 deshake_ctx->abs_motion.curr_frame_offset);

1435 }

1436

1437 if (deshake_ctx->tripod_mode) {

1438 // If tripod mode is turned on we simply undo all motion relative to the

1439 // first frame

1440

1441 new_vals[RingbufX] = 0.0f;

1442 new_vals[RingbufY] = 0.0f;

1443 new_vals[RingbufRot] = 0.0f;

1444 new_vals[RingbufScaleX] = 1.0f;

1445 new_vals[RingbufScaleY] = 1.0f;

1446 } else {

1447 // Tripod mode is off and we need to smooth a moving camera

1448

1449 new_vals[RingbufX] = smooth(

1450 deshake_ctx,

1451 deshake_ctx->gauss_kernel,

1452 deshake_ctx->smooth_window,

1453 input_frame->width,

1454 deshake_ctx->abs_motion.ringbuffers[RingbufX]

1455 );

1456 new_vals[RingbufY] = smooth(

1457 deshake_ctx,

1458 deshake_ctx->gauss_kernel,

1459 deshake_ctx->smooth_window,

1460 input_frame->height,

1461 deshake_ctx->abs_motion.ringbuffers[RingbufY]

1462 );

1463 new_vals[RingbufRot] = smooth(

1464 deshake_ctx,

1465 deshake_ctx->gauss_kernel,

1466 deshake_ctx->smooth_window,

1467 M_PI / 4,

1468 deshake_ctx->abs_motion.ringbuffers[RingbufRot]

1469 );

1470 new_vals[RingbufScaleX] = smooth(

1471 deshake_ctx,

1472 deshake_ctx->gauss_kernel,

1473 deshake_ctx->smooth_window,

1474 2.0f,

1475 deshake_ctx->abs_motion.ringbuffers[RingbufScaleX]

1476 );

1477 new_vals[RingbufScaleY] = smooth(

1478 deshake_ctx,

1479 deshake_ctx->gauss_kernel,

1480 deshake_ctx->smooth_window,

1481 2.0f,

1482 deshake_ctx->abs_motion.ringbuffers[RingbufScaleY]

1483 );

1484 }

1485

1486 transform_center_scale(

1487 old_vals[RingbufX] - new_vals[RingbufX],

1488 old_vals[RingbufY] - new_vals[RingbufY],

1489 old_vals[RingbufRot] - new_vals[RingbufRot],

1490 new_vals[RingbufScaleX] / old_vals[RingbufScaleX],

1491 new_vals[RingbufScaleY] / old_vals[RingbufScaleY],

1492 center_w,

1493 center_h,

1494 transform_y

1495 );

1496

1497 transform_center_scale(

1498 (old_vals[RingbufX] - new_vals[RingbufX]) / luma_w_over_chroma_w,

1499 (old_vals[RingbufY] - new_vals[RingbufY]) / luma_h_over_chroma_h,

1500 old_vals[RingbufRot] - new_vals[RingbufRot],

1501 new_vals[RingbufScaleX] / old_vals[RingbufScaleX],

1502 new_vals[RingbufScaleY] / old_vals[RingbufScaleY],

1503 center_w_chroma,

1504 center_h_chroma,

1505 transform_uv

1506 );

1507

1508 CL_BLOCKING_WRITE_BUFFER(deshake_ctx->command_queue, deshake_ctx->transform_y, 9 * sizeof(float), transform_y, NULL);

1509 CL_BLOCKING_WRITE_BUFFER(deshake_ctx->command_queue, deshake_ctx->transform_uv, 9 * sizeof(float), transform_uv, NULL);

1510

1511 if (deshake_ctx->debug_on)

1512 transform_debug(avctx, new_vals, old_vals, deshake_ctx->curr_frame);

1513

1514 cropped_frame = ff_get_video_buffer(outlink, outlink->w, outlink->h);

1515 if (!cropped_frame) {

1516 err = AVERROR(ENOMEM);

1517 goto fail;

1518 }

1519

1520 transformed_frame = ff_get_video_buffer(outlink, outlink->w, outlink->h);

1521 if (!transformed_frame) {

1522 err = AVERROR(ENOMEM);

1523 goto fail;

1524 }

1525

1526 transforms[0] = deshake_ctx->transform_y;

1527 transforms[1] = transforms[2] = deshake_ctx->transform_uv;

1528

1529 for (int p = 0; p < FF_ARRAY_ELEMS(transformed_frame->data); p++) {

1530 // Transform all of the planes appropriately

1531 src = (cl_mem)input_frame->data[p];

1532 transformed = (cl_mem)transformed_frame->data[p];

1533

1534 if (!transformed)

1535 break;

1536

1537 err = ff_opencl_filter_work_size_from_image(avctx, global_work, input_frame, p, 0);

1538 if (err < 0)

1539 goto fail;

1540

1541 CL_RUN_KERNEL_WITH_ARGS(

1542 deshake_ctx->command_queue,

1543 deshake_ctx->kernel_transform,

1544 global_work,

1545 NULL,

1546 &transform_event,

1547 { sizeof(cl_mem), &src },

1548 { sizeof(cl_mem), &transformed },

1549 { sizeof(cl_mem), &transforms[p] },

1550 );

1551 }

1552

1553 if (deshake_ctx->debug_on && !deshake_ctx->is_yuv && debug_matches.num_matches > 0) {

1554 CL_BLOCKING_WRITE_BUFFER(

1555 deshake_ctx->command_queue,

1556 deshake_ctx->debug_matches,

1557 debug_matches.num_matches * sizeof(MotionVector),

1558 debug_matches.matches,

1559 NULL

1560 );

1561

1562 CL_BLOCKING_WRITE_BUFFER(

1563 deshake_ctx->command_queue,

1564 deshake_ctx->debug_model_matches,

1565 debug_matches.num_model_matches * sizeof(MotionVector),

1566 debug_matches.model_matches,

1567 NULL

1568 );

1569

1570 num_model_matches = debug_matches.num_model_matches;

1571

1572 // Invert the transform

1573 transform_center_scale(

1574 new_vals[RingbufX] - old_vals[RingbufX],

1575 new_vals[RingbufY] - old_vals[RingbufY],

1576 new_vals[RingbufRot] - old_vals[RingbufRot],

1577 old_vals[RingbufScaleX] / new_vals[RingbufScaleX],

1578 old_vals[RingbufScaleY] / new_vals[RingbufScaleY],

1579 center_w,

1580 center_h,

1581 transform_debug_rgb

1582 );

1583

1584 CL_BLOCKING_WRITE_BUFFER(deshake_ctx->command_queue, deshake_ctx->transform_y, 9 * sizeof(float), transform_debug_rgb, NULL);

1585

1586 transformed = (cl_mem)transformed_frame->data[0];

1587 CL_RUN_KERNEL_WITH_ARGS(

1588 deshake_ctx->command_queue,

1589 deshake_ctx->kernel_draw_debug_info,

1590 (size_t[]){ debug_matches.num_matches },

1591 NULL,

1592 NULL,

1593 { sizeof(cl_mem), &transformed },

1594 { sizeof(cl_mem), &deshake_ctx->debug_matches },

1595 { sizeof(cl_mem), &deshake_ctx->debug_model_matches },

1596 { sizeof(cl_int), &num_model_matches },

1597 { sizeof(cl_mem), &deshake_ctx->transform_y }

1598 );

1599 }

1600

1601 if (deshake_ctx->should_crop) {

1602 // Generate transforms for cropping

1603 transform_center_scale(

1604 (old_vals[RingbufX] - new_vals[RingbufX]) / 5,

1605 (old_vals[RingbufY] - new_vals[RingbufY]) / 5,

1606 (old_vals[RingbufRot] - new_vals[RingbufRot]) / 5,

1607 new_vals[RingbufScaleX] / old_vals[RingbufScaleX],

1608 new_vals[RingbufScaleY] / old_vals[RingbufScaleY],

1609 center_w,

1610 center_h,

1611 transform_crop_y

1612 );

1613 update_needed_crop(&deshake_ctx->crop_y, transform_crop_y, input_frame->width, input_frame->height);

1614

1615 transform_center_scale(

1616 (old_vals[RingbufX] - new_vals[RingbufX]) / (5 * luma_w_over_chroma_w),

1617 (old_vals[RingbufY] - new_vals[RingbufY]) / (5 * luma_h_over_chroma_h),

1618 (old_vals[RingbufRot] - new_vals[RingbufRot]) / 5,

1619 new_vals[RingbufScaleX] / old_vals[RingbufScaleX],

1620 new_vals[RingbufScaleY] / old_vals[RingbufScaleY],

1621 center_w_chroma,

1622 center_h_chroma,

1623 transform_crop_uv

1624 );

1625 update_needed_crop(&deshake_ctx->crop_uv, transform_crop_uv, chroma_width, chroma_height);

1626

1627 crops[0] = deshake_ctx->crop_y;

1628 crops[1] = crops[2] = deshake_ctx->crop_uv;

1629

1630 for (int p = 0; p < FF_ARRAY_ELEMS(cropped_frame->data); p++) {

1631 // Crop all of the planes appropriately

1632 dst = (cl_mem)cropped_frame->data[p];

1633 transformed = (cl_mem)transformed_frame->data[p];

1634

1635 if (!dst)

1636 break;

1637

1638 err = ff_opencl_filter_work_size_from_image(avctx, global_work, input_frame, p, 0);

1639 if (err < 0)

1640 goto fail;

1641

1642 CL_RUN_KERNEL_WITH_ARGS(

1643 deshake_ctx->command_queue,

1644 deshake_ctx->kernel_crop_upscale,

1645 global_work,

1646 NULL,

1647 &crop_upscale_event,

1648 { sizeof(cl_mem), &transformed },

1649 { sizeof(cl_mem), &dst },

1650 { sizeof(cl_float2), &crops[p].top_left },

1651 { sizeof(cl_float2), &crops[p].bottom_right },

1652 );

1653 }

1654 }

1655

1656 if (deshake_ctx->curr_frame < deshake_ctx->smooth_window / 2) {

1657 // This means we are somewhere at the start of the video. We need to

1658 // increment the current frame offset until it reaches the center of

1659 // the ringbuffers (as the current frame will be located there for

1660 // the rest of the video).

1661 //

1662 // The end of the video is taken care of by draining motion data

1663 // one-by-one out of the buffer, causing the (at that point fixed)

1664 // offset to move towards later frames' data.

1665 ++deshake_ctx->abs_motion.curr_frame_offset;

1666 }

1667

1668 if (deshake_ctx->abs_motion.data_end_offset != -1) {

1669 // Keep the end offset in sync with the frame it's supposed to be

1670 // positioned at

1671 --deshake_ctx->abs_motion.data_end_offset;

1672

1673 if (deshake_ctx->abs_motion.data_end_offset == deshake_ctx->abs_motion.curr_frame_offset - 1) {

1674 // The end offset would be the start of the new video sequence; flip to

1675 // start offset

1676 deshake_ctx->abs_motion.data_end_offset = -1;

1677 deshake_ctx->abs_motion.data_start_offset = deshake_ctx->abs_motion.curr_frame_offset;

1678 }

1679 } else if (deshake_ctx->abs_motion.data_start_offset != -1) {

1680 // Keep the start offset in sync with the frame it's supposed to be

1681 // positioned at

1682 --deshake_ctx->abs_motion.data_start_offset;

1683 }

1684

1685 if (deshake_ctx->debug_on) {

1686 deshake_ctx->transform_time += ff_opencl_get_event_time(transform_event);

1687 if (deshake_ctx->should_crop) {

1688 deshake_ctx->crop_upscale_time += ff_opencl_get_event_time(crop_upscale_event);

1689 }

1690 }

1691

1692 ++deshake_ctx->curr_frame;

1693

1694 if (deshake_ctx->debug_on)

1695 av_freep(&debug_matches.matches);

1696

1697 if (deshake_ctx->should_crop) {

1698 err = av_frame_copy_props(cropped_frame, input_frame);

1699 if (err < 0)

1700 goto fail;

1701

1702 av_frame_free(&transformed_frame);

1703 av_frame_free(&input_frame);

1704 return ff_filter_frame(outlink, cropped_frame);

1705

1706 } else {

1707 err = av_frame_copy_props(transformed_frame, input_frame);

1708 if (err < 0)

1709 goto fail;

1710

1711 av_frame_free(&cropped_frame);

1712 av_frame_free(&input_frame);

1713 return ff_filter_frame(outlink, transformed_frame);

1714 }

1715

1716 fail:

1717 clFinish(deshake_ctx->command_queue);

1718

1719 if (deshake_ctx->debug_on)

1720 if (debug_matches.matches)

1721 av_freep(&debug_matches.matches);

1722

1723 av_frame_free(&input_frame);

1724 av_frame_free(&transformed_frame);

1725 av_frame_free(&cropped_frame);

1726 return err;

1727 }

1728

1729 // Add the given frame to the frame queue to eventually be processed.

1730 //

1731 // Also determines the motion from the previous frame and updates the stored

1732 // motion information accordingly.

1733 static int queue_frame(AVFilterLink *link, AVFrame *input_frame)

1734 {

1735 AVFilterContext *avctx = link->dst;

1736 DeshakeOpenCLContext *deshake_ctx = avctx->priv;

1737 int err;

1738 int num_vectors;

1739 int num_inliers = 0;

1740 cl_int cle;

1741 FrameDelta relative;

1742 SimilarityMatrix model;

1743 size_t global_work[2];

1744 size_t harris_global_work[2];

1745 size_t grid_32_global_work[2];

1746 int grid_32_h, grid_32_w;

1747 size_t local_work[2];

1748 cl_mem src, temp;

1749 float prev_vals[5];

1750 float new_vals[5];

1751 cl_event grayscale_event, harris_response_event, refine_features_event,

1752 brief_event, match_descriptors_event, read_buf_event;

1753 DebugMatches debug_matches;

1754

1755 num_vectors = 0;

1756

1757 local_work[0] = 8;

1758 local_work[1] = 8;

1759

1760 err = ff_opencl_filter_work_size_from_image(avctx, global_work, input_frame, 0, 0);

1761 if (err < 0)

1762 goto fail;

1763

1764 err = ff_opencl_filter_work_size_from_image(avctx, harris_global_work, input_frame, 0, 8);

1765 if (err < 0)

1766 goto fail;

1767

1768 err = ff_opencl_filter_work_size_from_image(avctx, grid_32_global_work, input_frame, 0, 32);

1769 if (err < 0)

1770 goto fail;

1771

1772 // We want a single work-item for each 32x32 block of pixels in the input frame

1773 grid_32_global_work[0] /= 32;

1774 grid_32_global_work[1] /= 32;

1775

1776 grid_32_h = ROUNDED_UP_DIV(input_frame->height, 32);

1777 grid_32_w = ROUNDED_UP_DIV(input_frame->width, 32);

1778

1779 if (deshake_ctx->is_yuv) {

1780 deshake_ctx->grayscale = (cl_mem)input_frame->data[0];

1781 } else {

1782 src = (cl_mem)input_frame->data[0];

1783

1784 CL_RUN_KERNEL_WITH_ARGS(

1785 deshake_ctx->command_queue,

1786 deshake_ctx->kernel_grayscale,

1787 global_work,

1788 NULL,

1789 &grayscale_event,

1790 { sizeof(cl_mem), &src },

1791 { sizeof(cl_mem), &deshake_ctx->grayscale }

1792 );

1793 }

1794

1795 CL_RUN_KERNEL_WITH_ARGS(

1796 deshake_ctx->command_queue,

1797 deshake_ctx->kernel_harris_response,

1798 harris_global_work,

1799 local_work,

1800 &harris_response_event,

1801 { sizeof(cl_mem), &deshake_ctx->grayscale },

1802 { sizeof(cl_mem), &deshake_ctx->harris_buf }

1803 );

1804

1805 CL_RUN_KERNEL_WITH_ARGS(

1806 deshake_ctx->command_queue,

1807 deshake_ctx->kernel_refine_features,

1808 grid_32_global_work,

1809 NULL,

1810 &refine_features_event,

1811 { sizeof(cl_mem), &deshake_ctx->grayscale },

1812 { sizeof(cl_mem), &deshake_ctx->harris_buf },

1813 { sizeof(cl_mem), &deshake_ctx->refined_features },

1814 { sizeof(cl_int), &deshake_ctx->refine_features }

1815 );

1816

1817 CL_RUN_KERNEL_WITH_ARGS(

1818 deshake_ctx->command_queue,

1819 deshake_ctx->kernel_brief_descriptors,

1820 grid_32_global_work,

1821 NULL,

1822 &brief_event,

1823 { sizeof(cl_mem), &deshake_ctx->grayscale },

1824 { sizeof(cl_mem), &deshake_ctx->refined_features },

1825 { sizeof(cl_mem), &deshake_ctx->descriptors },

1826 { sizeof(cl_mem), &deshake_ctx->brief_pattern}

1827 );

1828

1829 if (!av_fifo_can_read(deshake_ctx->abs_motion.ringbuffers[RingbufX])) {

1830 // This is the first frame we've been given to queue, meaning there is

1831 // no previous frame to match descriptors to

1832

1833 goto no_motion_data;

1834 }

1835

1836 CL_RUN_KERNEL_WITH_ARGS(

1837 deshake_ctx->command_queue,

1838 deshake_ctx->kernel_match_descriptors,

1839 grid_32_global_work,

1840 NULL,

1841 &match_descriptors_event,

1842 { sizeof(cl_mem), &deshake_ctx->prev_refined_features },

1843 { sizeof(cl_mem), &deshake_ctx->refined_features },

1844 { sizeof(cl_mem), &deshake_ctx->descriptors },

1845 { sizeof(cl_mem), &deshake_ctx->prev_descriptors },

1846 { sizeof(cl_mem), &deshake_ctx->matches }

1847 );

1848

1849 cle = clEnqueueReadBuffer(

1850 deshake_ctx->command_queue,

1851 deshake_ctx->matches,

1852 CL_TRUE,

1853 0,

1854 grid_32_h * grid_32_w * sizeof(MotionVector),

1855 deshake_ctx->matches_host,

1856 0,

1857 NULL,

1858 &read_buf_event

1859 );

1860 CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to read matches to host: %d.\n", cle);

1861

1862 num_vectors = make_vectors_contig(deshake_ctx, grid_32_h, grid_32_w);

1863

1864 if (num_vectors < 10) {

1865 // Not enough matches to get reliable motion data for this frame

1866 //

1867 // From this point on all data is relative to this frame rather than the

1868 // original frame. We have to make sure that we don't mix values that were

1869 // relative to the original frame with the new values relative to this

1870 // frame when doing the gaussian smoothing. We keep track of where the old

1871 // values end using this data_end_offset field in order to accomplish

1872 // that goal.

1873 //

1874 // If no motion data is present for multiple frames in a short window of

1875 // time, we leave the end where it was to avoid mixing 0s in with the

1876 // old data (and just treat them all as part of the new values)

1877 if (deshake_ctx->abs_motion.data_end_offset == -1) {

1878 deshake_ctx->abs_motion.data_end_offset =

1879 av_fifo_can_read(deshake_ctx->abs_motion.ringbuffers[RingbufX]) - 1;

1880 }

1881

1882 goto no_motion_data;

1883 }

1884

1885 if (!estimate_affine_2d(

1886 deshake_ctx,

1887 deshake_ctx->matches_contig_host,

1888 &debug_matches,

1889 num_vectors,

1890 model.matrix,

1891 10.0,

1892 3000,

1893 0.999999999999

1894 )) {

1895 goto no_motion_data;

1896 }

1897

1898 for (int i = 0; i < num_vectors; i++) {

1899 if (deshake_ctx->matches_contig_host[i].should_consider) {

1900 deshake_ctx->inliers[num_inliers] = deshake_ctx->matches_contig_host[i];

1901 num_inliers++;

1902 }

1903 }

1904

1905 if (!minimize_error(

1906 deshake_ctx,

1907 deshake_ctx->inliers,

1908 &debug_matches,

1909 num_inliers,

1910 model.matrix,

1911 400

1912 )) {

1913 goto no_motion_data;

1914 }

1915

1916

1917 relative = decompose_transform(model.matrix);

1918

1919 // Get the absolute transform data for the previous frame

1920 for (int i = 0; i < RingbufCount; i++) {

1921 av_fifo_peek(

1922 deshake_ctx->abs_motion.ringbuffers[i],

1923 &prev_vals[i], 1,

1924 av_fifo_can_read(deshake_ctx->abs_motion.ringbuffers[i]) - 1);

1925 }

1926

1927 new_vals[RingbufX] = prev_vals[RingbufX] + relative.translation.s[0];

1928 new_vals[RingbufY] = prev_vals[RingbufY] + relative.translation.s[1];

1929 new_vals[RingbufRot] = prev_vals[RingbufRot] + relative.rotation;

1930 new_vals[RingbufScaleX] = prev_vals[RingbufScaleX] / relative.scale.s[0];

1931 new_vals[RingbufScaleY] = prev_vals[RingbufScaleY] / relative.scale.s[1];

1932

1933 if (deshake_ctx->debug_on) {

1934 if (!deshake_ctx->is_yuv) {

1935 deshake_ctx->grayscale_time += ff_opencl_get_event_time(grayscale_event);

1936 }

1937 deshake_ctx->harris_response_time += ff_opencl_get_event_time(harris_response_event);

1938 deshake_ctx->refine_features_time += ff_opencl_get_event_time(refine_features_event);

1939 deshake_ctx->brief_descriptors_time += ff_opencl_get_event_time(brief_event);

1940 deshake_ctx->match_descriptors_time += ff_opencl_get_event_time(match_descriptors_event);

1941 deshake_ctx->read_buf_time += ff_opencl_get_event_time(read_buf_event);

1942 }

1943

1944 goto end;

1945

1946 no_motion_data:

1947 new_vals[RingbufX] = 0.0f;

1948 new_vals[RingbufY] = 0.0f;

1949 new_vals[RingbufRot] = 0.0f;

1950 new_vals[RingbufScaleX] = 1.0f;

1951 new_vals[RingbufScaleY] = 1.0f;

1952

1953 for (int i = 0; i < num_vectors; i++) {

1954 deshake_ctx->matches_contig_host[i].should_consider = 0;

1955 }

1956 debug_matches.num_model_matches = 0;

1957

1958 if (deshake_ctx->debug_on) {

1959 av_log(avctx, AV_LOG_VERBOSE,

1960 "\n[ALERT] No motion data found in queue_frame, motion reset to 0\n\n"

1961 );

1962 }

1963

1964 goto end;

1965

1966 end:

1967 // Swap the descriptor buffers (we don't need the previous frame's descriptors

1968 // again so we will use that space for the next frame's descriptors)

1969 temp = deshake_ctx->prev_descriptors;

1970 deshake_ctx->prev_descriptors = deshake_ctx->descriptors;

1971 deshake_ctx->descriptors = temp;

1972

1973 // Same for the refined features

1974 temp = deshake_ctx->prev_refined_features;

1975 deshake_ctx->prev_refined_features = deshake_ctx->refined_features;

1976 deshake_ctx->refined_features = temp;

1977

1978 if (deshake_ctx->debug_on) {

1979 if (num_vectors == 0) {

1980 debug_matches.matches = NULL;

1981 } else {

1982 debug_matches.matches = av_malloc_array(num_vectors, sizeof(MotionVector));

1983

1984 if (!debug_matches.matches) {

1985 err = AVERROR(ENOMEM);

1986 goto fail;

1987 }

1988 }

1989

1990 for (int i = 0; i < num_vectors; i++) {

1991 debug_matches.matches[i] = deshake_ctx->matches_contig_host[i];

1992 }

1993 debug_matches.num_matches = num_vectors;

1994

1995 av_fifo_write(

1996 deshake_ctx->abs_motion.debug_matches,

1997 &debug_matches, 1);

1998 }

1999

2000 for (int i = 0; i < RingbufCount; i++) {

2001 av_fifo_write(deshake_ctx->abs_motion.ringbuffers[i], &new_vals[i], 1);

2002 }

2003

2004 return ff_framequeue_add(&deshake_ctx->fq, input_frame);

2005

2006 fail:

2007 clFinish(deshake_ctx->command_queue);

2008 av_frame_free(&input_frame);

2009 return err;

2010 }

2011

2012 static int activate(AVFilterContext *ctx)

2013 {

2014 AVFilterLink *inlink = ctx->inputs[0];

2015 AVFilterLink *outlink = ctx->outputs[0];

2016 DeshakeOpenCLContext *deshake_ctx = ctx->priv;

2017 AVFrame *frame = NULL;

2018 int ret, status;

2019 int64_t pts;

2020

2021 FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);

2022

2023 if (!deshake_ctx->eof) {

2024 ret = ff_inlink_consume_frame(inlink, &frame);

2025 if (ret < 0)

2026 return ret;

2027 if (ret > 0) {

2028 if (!frame->hw_frames_ctx)

2029 return AVERROR(EINVAL);

2030

2031 if (!deshake_ctx->initialized) {

2032 ret = deshake_opencl_init(ctx);

2033 if (ret < 0)

2034 return ret;

2035 }

2036

2037 // If there is no more space in the ringbuffers, remove the oldest

2038 // values to make room for the new ones

2039 if (!av_fifo_can_write(deshake_ctx->abs_motion.ringbuffers[RingbufX])) {

2040 for (int i = 0; i < RingbufCount; i++) {

2041 av_fifo_drain2(deshake_ctx->abs_motion.ringbuffers[i], 1);

2042 }

2043 }

2044 ret = queue_frame(inlink, frame);

2045 if (ret < 0)

2046 return ret;

2047 if (ret >= 0) {

2048 // See if we have enough buffered frames to process one

2049 //

2050 // "enough" is half the smooth window of queued frames into the future

2051 if (ff_framequeue_queued_frames(&deshake_ctx->fq) >= deshake_ctx->smooth_window / 2) {

2052 return filter_frame(inlink, ff_framequeue_take(&deshake_ctx->fq));

2053 }

2054 }

2055 }

2056 }

2057

2058 if (!deshake_ctx->eof && ff_inlink_acknowledge_status(inlink, &status, &pts)) {

2059 if (status == AVERROR_EOF) {

2060 deshake_ctx->eof = 1;

2061 }

2062 }

2063

2064 if (deshake_ctx->eof) {

2065 // Finish processing the rest of the frames in the queue.

2066 while(ff_framequeue_queued_frames(&deshake_ctx->fq) != 0) {

2067 for (int i = 0; i < RingbufCount; i++) {

2068 av_fifo_drain2(deshake_ctx->abs_motion.ringbuffers[i], 1);

2069 }

2070

2071 ret = filter_frame(inlink, ff_framequeue_take(&deshake_ctx->fq));

2072 if (ret < 0) {

2073 return ret;

2074 }

2075 }

2076

2077 if (deshake_ctx->debug_on) {

2078 av_log(ctx, AV_LOG_VERBOSE,

2079 "Average kernel execution times:\n"

2080 "\t grayscale: %0.3f ms\n"

2081 "\t harris_response: %0.3f ms\n"

2082 "\t refine_features: %0.3f ms\n"

2083 "\tbrief_descriptors: %0.3f ms\n"

2084 "\tmatch_descriptors: %0.3f ms\n"

2085 "\t transform: %0.3f ms\n"

2086 "\t crop_upscale: %0.3f ms\n"

2087 "Average buffer read times:\n"

2088 "\t features buf: %0.3f ms\n",

2089 averaged_event_time_ms(deshake_ctx->grayscale_time, deshake_ctx->curr_frame),

2090 averaged_event_time_ms(deshake_ctx->harris_response_time, deshake_ctx->curr_frame),

2091 averaged_event_time_ms(deshake_ctx->refine_features_time, deshake_ctx->curr_frame),

2092 averaged_event_time_ms(deshake_ctx->brief_descriptors_time, deshake_ctx->curr_frame),

2093 averaged_event_time_ms(deshake_ctx->match_descriptors_time, deshake_ctx->curr_frame),

2094 averaged_event_time_ms(deshake_ctx->transform_time, deshake_ctx->curr_frame),

2095 averaged_event_time_ms(deshake_ctx->crop_upscale_time, deshake_ctx->curr_frame),

2096 averaged_event_time_ms(deshake_ctx->read_buf_time, deshake_ctx->curr_frame)

2097 );

2098 }

2099

2100 ff_outlink_set_status(outlink, AVERROR_EOF, deshake_ctx->duration);

2101 return 0;

2102 }

2103

2104 if (!deshake_ctx->eof) {

2105 FF_FILTER_FORWARD_WANTED(outlink, inlink);

2106 }

2107

2108 return FFERROR_NOT_READY;

2109 }

2110

2111 static const AVFilterPad deshake_opencl_inputs[] = {

2112 {

2113 .name = "default",

2114 .type = AVMEDIA_TYPE_VIDEO,

2115 .config_props = &ff_opencl_filter_config_input,

2116 },

2117 };

2118

2119 static const AVFilterPad deshake_opencl_outputs[] = {

2120 {

2121 .name = "default",

2122 .type = AVMEDIA_TYPE_VIDEO,

2123 .config_props = &ff_opencl_filter_config_output,

2124 },

2125 };

2126

2127 #define OFFSET(x) offsetof(DeshakeOpenCLContext, x)

2128 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM

2129

2130 static const AVOption deshake_opencl_options[] = {

2131 {

2132 "tripod", "simulates a tripod by preventing any camera movement whatsoever "

2133 "from the original frame",

2134 OFFSET(tripod_mode), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, FLAGS

2135 },

2136 {

2137 "debug", "turn on additional debugging information",

2138 OFFSET(debug_on), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, FLAGS

2139 },

2140 {

2141 "adaptive_crop", "attempt to subtly crop borders to reduce mirrored content",

2142 OFFSET(should_crop), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, FLAGS

2143 },

2144 {

2145 "refine_features", "refine feature point locations at a sub-pixel level",

2146 OFFSET(refine_features), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, FLAGS

2147 },

2148 {

2149 "smooth_strength", "smoothing strength (0 attempts to adaptively determine optimal strength)",

2150 OFFSET(smooth_percent), AV_OPT_TYPE_FLOAT, {.dbl = 0.0f}, 0.0f, 1.0f, FLAGS

2151 },

2152 {

2153 "smooth_window_multiplier", "multiplier for number of frames to buffer for motion data",

2154 OFFSET(smooth_window_multiplier), AV_OPT_TYPE_FLOAT, {.dbl = 2.0}, 0.1, 10.0, FLAGS

2155 },

2156 { NULL }

2157 };

2158

2159 AVFILTER_DEFINE_CLASS(deshake_opencl);

2160

2161 const AVFilter ff_vf_deshake_opencl = {

2162 .name = "deshake_opencl",

2163 .description = NULL_IF_CONFIG_SMALL("Feature-point based video stabilization filter"),

2164 .priv_size = sizeof(DeshakeOpenCLContext),

2165 .priv_class = &deshake_opencl_class,

2166 .init = &ff_opencl_filter_init,

2167 .uninit = &deshake_opencl_uninit,

2168 .activate = activate,

2169 FILTER_INPUTS(deshake_opencl_inputs),

2170 FILTER_OUTPUTS(deshake_opencl_outputs),

2171 FILTER_SINGLE_PIXFMT(AV_PIX_FMT_OPENCL),

2172 .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE

2173 };

deshake_opencl_inputs

static const AVFilterPad deshake_opencl_inputs[]

Definition: vf_deshake_opencl.c:2111

AVFILTER_DEFINE_CLASS

AVFILTER_DEFINE_CLASS(deshake_opencl)

MATCHES_CONTIG_SIZE

#define MATCHES_CONTIG_SIZE

Definition: vf_deshake_opencl.c:93

ff_get_video_buffer

AVFrame * ff_get_video_buffer(AVFilterLink *link, int w, int h)

Request a picture buffer with a specific set of permissions.

Definition: video.c:101

CL_RUN_KERNEL_WITH_ARGS

#define CL_RUN_KERNEL_WITH_ARGS(queue, kernel, global_work_size, local_work_size, event,...)

Uses the above macro to enqueue the given kernel and then additionally runs it to completion via clFi...

Definition: opencl.h:180

av_fifo_drain2

void av_fifo_drain2(AVFifo *f, size_t size)

Discard the specified amount of data from an AVFifo.

Definition: fifo.c:266

FF_ENABLE_DEPRECATION_WARNINGS

#define FF_ENABLE_DEPRECATION_WARNINGS

Definition: internal.h:82

#define F2(l, r, i)

Definition: cast5.c:44

AVPixelFormat

Pixel format.

Definition: pixfmt.h:64

av_fifo_can_write

size_t av_fifo_can_write(const AVFifo *f)

Definition: fifo.c:94

free_debug_matches

static void free_debug_matches(AbsoluteFrameMotion *afm)

Definition: vf_deshake_opencl.c:152

status

they must not be accessed directly The fifo field contains the frames that are queued in the input for processing by the filter The status_in and status_out fields contains the queued status(EOF or error) of the link

FrameDelta::translation

cl_float2 translation

Definition: vf_deshake_opencl.c:167

av_clip

#define av_clip

Definition: common.h:95

IterIndices

Definition: vf_deshake_opencl.c:187

const char * r

Definition: vf_curves.c:126

AVERROR

Filter the word "frame" indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions

opt.h

DeshakeOpenCLContext::brief_pattern

cl_mem brief_pattern

Definition: vf_deshake_opencl.c:257

deshake_opencl_init

static int deshake_opencl_init(AVFilterContext *avctx)

Definition: vf_deshake_opencl.c:1111

estimate_affine_2d

static int estimate_affine_2d(DeshakeOpenCLContext *deshake_ctx, MotionVector *point_pairs, DebugMatches *debug_matches, const int num_point_pairs, double *model_out, const double threshold, const int max_iters, const double confidence)

Definition: vf_deshake_opencl.c:519

minimize_error

static int minimize_error(DeshakeOpenCLContext *deshake_ctx, MotionVector *inliers, DebugMatches *debug_matches, const int num_inliers, double *model_out, const int max_iters)

Definition: vf_deshake_opencl.c:675

OFFSET

#define OFFSET(x)

Definition: vf_deshake_opencl.c:2127

av_lfg_init

av_cold void av_lfg_init(AVLFG *c, unsigned int seed)

Definition: lfg.c:32

FF_FILTER_FLAG_HWFRAME_AWARE

#define FF_FILTER_FLAG_HWFRAME_AWARE

The filter is aware of hardware frames, and any hardware frame context should not be automatically pr...

Definition: internal.h:374

AV_PIX_FMT_GBRP16BE

@ AV_PIX_FMT_GBRP16BE

planar GBR 4:4:4 48bpp, big-endian

Definition: pixfmt.h:164

DeshakeOpenCLContext::refine_features_time

unsigned long long refine_features_time

Definition: vf_deshake_opencl.c:294

ff_filter_frame

int ff_filter_frame(AVFilterLink *link, AVFrame *frame)

Send a frame of data to the next filter.

Definition: avfilter.c:969

AbsoluteFrameMotion

Definition: vf_deshake_opencl.c:135

AVFrame::duration

int64_t duration

Duration of the frame, in the same units as pts.

Definition: frame.h:728

AV_PIX_FMT_GBRP10BE

@ AV_PIX_FMT_GBRP10BE

planar GBR 4:4:4 30bpp, big-endian

Definition: pixfmt.h:162

av_pix_fmt_desc_get

const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)

Definition: pixdesc.c:2888

AVERROR_EOF

#define AVERROR_EOF

End of file.

Definition: error.h:57

FFERROR_NOT_READY

return FFERROR_NOT_READY

Definition: filter_design.txt:204

DeshakeOpenCLContext::kernel_draw_debug_info

cl_kernel kernel_draw_debug_info

Definition: vf_deshake_opencl.c:287

PointPair::p2

cl_float2 p2

Definition: vf_deshake_opencl.c:101

matrix

Definition: vc1dsp.c:42

DeshakeOpenCLContext::transform_uv

cl_mem transform_uv

Definition: vf_deshake_opencl.c:268

inlink

The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is inlink

Definition: filter_design.txt:212

av_frame_free

void av_frame_free(AVFrame **frame)

Free the frame and any dynamically allocated objects in it, e.g.

Definition: frame.c:99

DeshakeOpenCLContext::should_crop

int should_crop

Definition: vf_deshake_opencl.c:274

AVFrame

This structure describes decoded (raw) audio or video data.

Definition: frame.h:330

AVFrame::pts

int64_t pts

Presentation timestamp in time_base units (time when frame should be shown to user).

Definition: frame.h:437

AVFrame::width

int width

Definition: frame.h:402

DeshakeOpenCLContext::prev_descriptors

cl_mem prev_descriptors

Definition: vf_deshake_opencl.c:261

opencl.h

AVOption

AVOption.

Definition: opt.h:251

#define b

Definition: input.c:41

RingbufX

@ RingbufX

Definition: vf_deshake_opencl.c:112

RingbufRot

@ RingbufRot

Definition: vf_deshake_opencl.c:114

DeshakeOpenCLContext::abs_motion

AbsoluteFrameMotion abs_motion

Definition: vf_deshake_opencl.c:207

expf

#define expf(x)

Definition: libm.h:283

AbsoluteFrameMotion::curr_frame_offset

int curr_frame_offset

Definition: vf_deshake_opencl.c:141

AV_LOG_VERBOSE

#define AV_LOG_VERBOSE

Detailed information.

Definition: log.h:196

float.h

ff_opencl_filter_load_program

int ff_opencl_filter_load_program(AVFilterContext *avctx, const char **program_source_array, int nb_strings)

Load a new OpenCL program from strings in memory.

Definition: opencl.c:157

update_needed_crop

static void update_needed_crop(CropInfo *crop, float *transform, float frame_width, float frame_height)

Definition: vf_deshake_opencl.c:991

ff_framequeue_init

void ff_framequeue_init(FFFrameQueue *fq, FFFrameQueueGlobal *fqg)

Init a frame queue and attach it to a global structure.

Definition: framequeue.c:47

FFMAX

#define FFMAX(a, b)

Definition: macros.h:47

AVFilter::name

const char * name

Filter name.

Definition: avfilter.h:165

points_not_collinear

static int points_not_collinear(const cl_float2 **points)

Definition: vf_deshake_opencl.c:351

video.h

AVFilterLink

A link between two filters.

Definition: avfilter.h:522

FF_FILTER_FORWARD_STATUS_BACK

#define FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink)

Forward the status on an output link to an input link.

Definition: filters.h:199

CL_CREATE_KERNEL

#define CL_CREATE_KERNEL(ctx, kernel_name)

Create a kernel with the given name.

Definition: opencl.h:93

CL_CREATE_BUFFER

#define CL_CREATE_BUFFER(ctx, buffer_name, size)

Create a buffer with the given information.

Definition: opencl.h:237

AVFrame::data

uint8_t * data[AV_NUM_DATA_POINTERS]

pointer to the picture/channel planes.

Definition: frame.h:351

BRIEF_PATCH_SIZE_HALF

#define BRIEF_PATCH_SIZE_HALF

Definition: vf_deshake_opencl.c:91

DeshakeOpenCLContext::harris_buf

cl_mem harris_buf

Definition: vf_deshake_opencl.c:249

DeshakeOpenCLContext::smooth_window_multiplier

float smooth_window_multiplier

Definition: vf_deshake_opencl.c:283

formats.h

AV_PIX_FMT_GBRAP12LE

@ AV_PIX_FMT_GBRAP12LE

planar GBR 4:4:4:4 48bpp, little-endian

Definition: pixfmt.h:308

ff_inlink_consume_frame

int ff_inlink_consume_frame(AVFilterLink *link, AVFrame **rframe)

Take a frame from the link's FIFO and update the link's stats.

Definition: avfilter.c:1364

ff_opencl_filter_work_size_from_image

int ff_opencl_filter_work_size_from_image(AVFilterContext *avctx, size_t *work_size, AVFrame *frame, int plane, int block_alignment)

Find the work size needed needed for a given plane of an image.

Definition: opencl.c:264

fifo.h

DeshakeOpenCLContext::debug_matches

cl_mem debug_matches

Definition: vf_deshake_opencl.c:288

AbsoluteFrameMotion::data_end_offset

int data_end_offset

Definition: vf_deshake_opencl.c:145

AV_PIX_FMT_GBRAP

@ AV_PIX_FMT_GBRAP

planar GBRA 4:4:4:4 32bpp

Definition: pixfmt.h:205

AVFilterContext::priv

void * priv

private data for use by the filter

Definition: avfilter.h:407

IterIndices::start

int start

Definition: vf_deshake_opencl.c:188

fail

#define fail()

Definition: checkasm.h:134

av_fifo_write

int av_fifo_write(AVFifo *f, const void *buf, size_t nb_elems)

Write data into a FIFO.

Definition: fifo.c:188

FFSIGN

#define FFSIGN(a)

Definition: common.h:65

DeshakeOpenCLContext::curr_frame

int curr_frame

Definition: vf_deshake_opencl.c:213

DeshakeOpenCLContext::kernel_transform

cl_kernel kernel_transform

Definition: vf_deshake_opencl.c:243

AbsoluteFrameMotion::ringbuffers

AVFifo * ringbuffers[RingbufCount]

Definition: vf_deshake_opencl.c:137

RingbufScaleY

@ RingbufScaleY

Definition: vf_deshake_opencl.c:116

IterIndices::end

int end

Definition: vf_deshake_opencl.c:189

val

static double val(void *priv, double ch)

Definition: aeval.c:77

deshake_opencl_outputs

static const AVFilterPad deshake_opencl_outputs[]

Definition: vf_deshake_opencl.c:2119

pts

static int64_t pts

Definition: transcode_aac.c:653

fabsf

static __device__ float fabsf(float a)

Definition: cuda_runtime.h:181

DeshakeOpenCLContext::kernel_crop_upscale

cl_kernel kernel_crop_upscale

Definition: vf_deshake_opencl.c:244

DeshakeOpenCLContext::initialized

int initialized

Definition: vf_deshake_opencl.c:195

DeshakeOpenCLContext::smooth_window

int smooth_window

Definition: vf_deshake_opencl.c:211

FrameDelta::skew

cl_float2 skew

Definition: vf_deshake_opencl.c:170

RingbufScaleX

@ RingbufScaleX

Definition: vf_deshake_opencl.c:115

ff_opencl_filter_config_output

int ff_opencl_filter_config_output(AVFilterLink *outlink)

Create a suitable hardware frames context for the output.

Definition: opencl.c:82

AVFilterPad

A filter pad used for either input or output.

Definition: internal.h:49

DeshakeOpenCLContext::eof

int eof

Definition: vf_deshake_opencl.c:199

avassert.h

DeshakeOpenCLContext::read_buf_time

unsigned long long read_buf_time

Definition: vf_deshake_opencl.c:301

AV_LOG_ERROR

#define AV_LOG_ERROR

Something went wrong and cannot losslessly be recovered.

Definition: log.h:180

FF_ARRAY_ELEMS

#define FF_ARRAY_ELEMS(a)

Definition: sinewin_tablegen.c:29

av_cold

#define av_cold

Definition: attributes.h:90

av_fifo_read

int av_fifo_read(AVFifo *f, void *buf, size_t nb_elems)

Read data from a FIFO.

Definition: fifo.c:240

DeshakeOpenCLContext::debug_model_matches

cl_mem debug_model_matches

Definition: vf_deshake_opencl.c:289

DebugMatches::num_model_matches

int num_model_matches

Definition: vf_deshake_opencl.c:130

duration

int64_t duration

Definition: movenc.c:64

float

Definition: af_crystalizer.c:122

AV_PIX_FMT_GBRAP16BE

@ AV_PIX_FMT_GBRAP16BE

planar GBRA 4:4:4:4 64bpp, big-endian

Definition: pixfmt.h:206

ff_outlink_set_status

static void ff_outlink_set_status(AVFilterLink *link, int status, int64_t pts)

Set the status field of a link from the source filter.

Definition: filters.h:189

#define s(width, name)

Definition: cbs_vp9.c:256

AV_PIX_FMT_GBRP16LE

@ AV_PIX_FMT_GBRP16LE

planar GBR 4:4:4 48bpp, little-endian

Definition: pixfmt.h:165

RingbufY

@ RingbufY

Definition: vf_deshake_opencl.c:113

av_lfg_get

static unsigned int av_lfg_get(AVLFG *c)

Get the next random unsigned 32-bit number using an ALFG.

Definition: lfg.h:53

AV_CEIL_RSHIFT

#define AV_CEIL_RSHIFT(a, b)

Definition: common.h:50

DeshakeOpenCLContext::smooth_percent

float smooth_percent

Definition: vf_deshake_opencl.c:280

FFFrameQueueGlobal

Structure to hold global options and statistics for frame queues.

Definition: framequeue.h:46

#define mi

Definition: vf_colormatrix.c:108

DeshakeOpenCLContext::grayscale_time

unsigned long long grayscale_time

Definition: vf_deshake_opencl.c:292

DeshakeOpenCLContext::kernel_brief_descriptors

cl_kernel kernel_brief_descriptors

Definition: vf_deshake_opencl.c:241

av_q2d

static double av_q2d(AVRational a)

Convert an AVRational to a double.

Definition: rational.h:104

lfg.h

init

int(* init)(AVBSFContext *ctx)

Definition: dts2pts_bsf.c:365

deshake_opencl_uninit

static av_cold void deshake_opencl_uninit(AVFilterContext *avctx)

Definition: vf_deshake_opencl.c:1052

av_assert0

#define av_assert0(cond)

assert() equivalent, that is always enabled.

Definition: avassert.h:37

filters.h

AVFilterLink::frame_rate

AVRational frame_rate

Frame rate of the stream on the link, or 1/0 if unknown or variable; if left to 0/0,...

Definition: avfilter.h:617

DeshakeOpenCLContext::kernel_match_descriptors

cl_kernel kernel_match_descriptors

Definition: vf_deshake_opencl.c:242

ctx

AVFormatContext * ctx

Definition: movenc.c:48

av_rescale_q

int64_t av_rescale_q(int64_t a, AVRational bq, AVRational cq)

Rescale a 64-bit integer by 2 rational numbers.

Definition: mathematics.c:142

AV_PIX_FMT_GBRP10LE

@ AV_PIX_FMT_GBRP10LE

planar GBR 4:4:4 30bpp, little-endian

Definition: pixfmt.h:163

FILTER_INPUTS

#define FILTER_INPUTS(array)

Definition: internal.h:194

link

Definition: filter_design.txt:23

CL_RELEASE_KERNEL

#define CL_RELEASE_KERNEL(k)

release an OpenCL Kernel

Definition: opencl.h:101

ff_framequeue_take

AVFrame * ff_framequeue_take(FFFrameQueue *fq)

Take the first frame in the queue.

Definition: framequeue.c:98

RingbufferIndices

Definition: vf_deshake_opencl.c:111

rand_in

static int rand_in(int low, int high, AVLFG *alfg)

Definition: vf_deshake_opencl.c:305

averaged_event_time_ms

static double averaged_event_time_ms(unsigned long long total_time, int num_frames)

Definition: vf_deshake_opencl.c:311

AbsoluteFrameMotion::debug_matches

AVFifo * debug_matches

Definition: vf_deshake_opencl.c:147

filter_frame

static int filter_frame(AVFilterLink *link, AVFrame *input_frame)

Definition: vf_deshake_opencl.c:1369

result

and forward the result(frame or status change) to the corresponding input. If nothing is possible

AV_PIX_FMT_GBRAP12BE

@ AV_PIX_FMT_GBRAP12BE

planar GBR 4:4:4:4 48bpp, big-endian

Definition: pixfmt.h:307

fabs

static __device__ float fabs(float a)

Definition: cuda_runtime.h:182

NULL

#define NULL

Definition: coverity.c:32

DeshakeOpenCLContext::alfg

AVLFG alfg

Definition: vf_deshake_opencl.c:202

av_frame_copy_props

int av_frame_copy_props(AVFrame *dst, const AVFrame *src)

Copy only "metadata" fields from src to dst.

Definition: frame.c:594

transform_center_scale

static void transform_center_scale(float x_shift, float y_shift, float angle, float scale_x, float scale_y, float center_w, float center_h, float *matrix)

Definition: vf_deshake_opencl.c:953

ff_framequeue_add

int ff_framequeue_add(FFFrameQueue *fq, AVFrame *frame)

Add a frame.

Definition: framequeue.c:63

ff_framequeue_free

void ff_framequeue_free(FFFrameQueue *fq)

Free the queue and all queued frames.

Definition: framequeue.c:53

DeshakeOpenCLContext::ransac_err

float * ransac_err

Definition: vf_deshake_opencl.c:219

DeshakeOpenCLContext::kernel_harris_response

cl_kernel kernel_harris_response

Definition: vf_deshake_opencl.c:239

framequeue.h

transform

static const int8_t transform[32][32]

Definition: hevcdsp.c:27

AVFilterContext::inputs

AVFilterLink ** inputs

array of pointers to input links

Definition: avfilter.h:400

start_end_for

static IterIndices start_end_for(DeshakeOpenCLContext *deshake_ctx, int length)

Definition: vf_deshake_opencl.c:835

av_fifo_can_read

size_t av_fifo_can_read(const AVFifo *f)

Definition: fifo.c:87

double

Definition: af_crystalizer.c:132

AV_PIX_FMT_OPENCL

@ AV_PIX_FMT_OPENCL

Hardware surfaces for OpenCL.

Definition: pixfmt.h:355

DeshakeOpenCLContext::matches_contig

cl_mem matches_contig

Definition: vf_deshake_opencl.c:264

DebugMatches::model_matches

MotionVector model_matches[3]

Definition: vf_deshake_opencl.c:126

DeshakeOpenCLContext::duration

int64_t duration

Definition: vf_deshake_opencl.c:198

DeshakeOpenCLContext

Definition: vf_deshake_opencl.c:192

ff_inlink_acknowledge_status

int ff_inlink_acknowledge_status(AVFilterLink *link, int *rstatus, int64_t *rpts)

Test and acknowledge the change of status on the link.

Definition: avfilter.c:1318

DeshakeOpenCLContext::crop_uv

CropInfo crop_uv

Definition: vf_deshake_opencl.c:224

Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c

Definition: undefined.txt:32

transformed_point

static cl_float2 transformed_point(float x, float y, float *transform)

Definition: vf_deshake_opencl.c:943

DeshakeOpenCLContext::harris_response_time

unsigned long long harris_response_time

Definition: vf_deshake_opencl.c:293

make_gauss_kernel

static void make_gauss_kernel(float *gauss_kernel, float length, float sigma)

Definition: vf_deshake_opencl.c:811

FFFrameQueue

Queue of AVFrame pointers.

Definition: framequeue.h:53

DeshakeOpenCLContext::crop_upscale_time

unsigned long long crop_upscale_time

Definition: vf_deshake_opencl.c:298

AVLFG

Context structure for the Lagged Fibonacci PRNG.

Definition: lfg.h:33

Definition: af_crystalizer.c:122

#define F3(l, r, i)

Definition: cast5.c:37

AVFifo

Definition: fifo.c:35

DeshakeOpenCLContext::matches_contig_host

MotionVector * matches_contig_host

Definition: vf_deshake_opencl.c:233

NULL_IF_CONFIG_SMALL

#define NULL_IF_CONFIG_SMALL(x)

Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.

Definition: internal.h:115

powf

#define powf(x, y)

Definition: libm.h:50

CL_RELEASE_MEMORY

#define CL_RELEASE_MEMORY(m)

release an OpenCL Memory Object

Definition: opencl.h:114

AV_PIX_FMT_FLAG_RGB

#define AV_PIX_FMT_FLAG_RGB

The pixel format contains RGB-like data (as opposed to YUV/grayscale).

Definition: pixdesc.h:136

hypot

static av_const double hypot(double x, double y)

Definition: libm.h:366

CropInfo::bottom_right

cl_float2 bottom_right

Definition: vf_deshake_opencl.c:182

AV_PIX_FMT_GBRP9BE

@ AV_PIX_FMT_GBRP9BE

planar GBR 4:4:4 27bpp, big-endian

Definition: pixfmt.h:160

get_subset

static int get_subset(AVLFG *alfg, const MotionVector *point_pairs, const int num_point_pairs, MotionVector *pairs_subset, int max_attempts)

Definition: vf_deshake_opencl.c:395

ROUNDED_UP_DIV

#define ROUNDED_UP_DIV(a, b)

Definition: vf_deshake_opencl.c:95

AV_PIX_FMT_GBRP9LE

@ AV_PIX_FMT_GBRP9LE

planar GBR 4:4:4 27bpp, little-endian

Definition: pixfmt.h:161

FLAGS

#define FLAGS

Definition: vf_deshake_opencl.c:2128

MotionVector::should_consider

cl_int should_consider

Definition: vf_deshake_opencl.c:107

The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a

Definition: undefined.txt:41

AV_PIX_FMT_GBRAP10LE

@ AV_PIX_FMT_GBRAP10LE

planar GBR 4:4:4:4 40bpp, little-endian

Definition: pixfmt.h:311

offset

it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset

Definition: writing_filters.txt:86

FF_FILTER_FORWARD_WANTED

FF_FILTER_FORWARD_WANTED(outlink, inlink)

MotionVector

Definition: agm.c:38

opencl_source.h

M_PI

#define M_PI

Definition: mathematics.h:52

ff_opencl_source_deshake

const char * ff_opencl_source_deshake

ff_opencl_filter_config_input

int ff_opencl_filter_config_input(AVFilterLink *inlink)

Check that the input link contains a suitable hardware frames context and extract the device from it.

Definition: opencl.c:46

internal.h

AV_OPT_TYPE_FLOAT

@ AV_OPT_TYPE_FLOAT

Definition: opt.h:228

av_fifo_peek

int av_fifo_peek(AVFifo *f, void *buf, size_t nb_elems, size_t offset)

Read data from a FIFO without modifying FIFO state.

Definition: fifo.c:255

FILTER_SINGLE_PIXFMT

#define FILTER_SINGLE_PIXFMT(pix_fmt_)

Definition: internal.h:184

DeshakeOpenCLContext::inliers

MotionVector * inliers

Definition: vf_deshake_opencl.c:235

FrameDelta::rotation

float rotation

Definition: vf_deshake_opencl.c:168

ff_get_matrix

void ff_get_matrix(float x_shift, float y_shift, float angle, float scale_x, float scale_y, float *matrix)

Get an affine transformation matrix from given translation, rotation, and zoom factors.

Definition: transform.c:106

#define i(width, name, range_min, range_max)

Definition: cbs_h2645.c:269

queue_frame

static int queue_frame(AVFilterLink *link, AVFrame *input_frame)

Definition: vf_deshake_opencl.c:1733

round

static av_always_inline av_const double round(double x)

Definition: libm.h:444

DeshakeOpenCLContext::transform_y

cl_mem transform_y

Definition: vf_deshake_opencl.c:266

FFMIN3

#define FFMIN3(a, b, c)

Definition: macros.h:50

transform_debug

static void transform_debug(AVFilterContext *avctx, float *new_vals, float *old_vals, int curr_frame)

Definition: vf_deshake_opencl.c:1341

AVFilterLink::w

int w

agreed upon image width

Definition: avfilter.h:532

av_malloc_array

#define av_malloc_array(a, b)

Definition: tableprint_vlc.h:31

common.h

make_vectors_contig

static int make_vectors_contig(DeshakeOpenCLContext *deshake_ctx, int size_y, int size_x)

Definition: vf_deshake_opencl.c:779

delta

float delta

Definition: vorbis_enc_data.h:430

activate

static int activate(AVFilterContext *ctx)

Definition: vf_deshake_opencl.c:2012

DeshakeOpenCLContext::is_yuv

int is_yuv

Definition: vf_deshake_opencl.c:227

AVFrame::pkt_duration

attribute_deprecated int64_t pkt_duration

duration of the corresponding packet, expressed in AVStream->time_base units, 0 if unknown.

Definition: frame.h:631

DeshakeOpenCLContext::fq

FFFrameQueue fq

Definition: vf_deshake_opencl.c:205

av_inv_q

static av_always_inline AVRational av_inv_q(AVRational q)

Invert a rational.

Definition: rational.h:159

DeshakeOpenCLContext::grayscale

cl_mem grayscale

Definition: vf_deshake_opencl.c:247

AVFilterPad::name

const char * name

Pad name.

Definition: internal.h:55

DeshakeOpenCLContext::transform_time

unsigned long long transform_time

Definition: vf_deshake_opencl.c:297

RingbufCount

@ RingbufCount

Definition: vf_deshake_opencl.c:119

AVFilter

Filter definition.

Definition: avfilter.h:161

AVHWFramesContext

This struct describes a set or pool of "hardware" frames (i.e.

Definition: hwcontext.h:124

DeshakeOpenCLContext::ocf

OpenCLFilterContext ocf

Definition: vf_deshake_opencl.c:193

AV_PIX_FMT_GBRAP16LE

@ AV_PIX_FMT_GBRAP16LE

planar GBRA 4:4:4:4 64bpp, little-endian

Definition: pixfmt.h:207

ff_opencl_filter_init

int ff_opencl_filter_init(AVFilterContext *avctx)

Initialise an OpenCL filter context.

Definition: opencl.c:133

CL_BLOCKING_WRITE_BUFFER

#define CL_BLOCKING_WRITE_BUFFER(queue, buffer, size, host_ptr, event)

Perform a blocking write to a buffer.

Definition: opencl.h:214

ret

Definition: filter_design.txt:187

FrameDelta

Definition: vf_deshake_opencl.c:166

DeshakeOpenCLContext::matches_host

MotionVector * matches_host

Definition: vf_deshake_opencl.c:232

compute_error

static void compute_error(const MotionVector *point_pairs, const int num_point_pairs, const double *model, float *err)

Definition: vf_deshake_opencl.c:437

ransac_update_num_iters

static int ransac_update_num_iters(double confidence, double num_outliers, int max_iters)

Definition: vf_deshake_opencl.c:497

pixfmt.h

frame

these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame For filters that do not use the this method is called when a frame is wanted on an output For a it should directly call filter_frame on the corresponding output For a if there are queued frames already one of these frames should be pushed If the filter should request a frame on one of its repeatedly until at least one frame has been pushed Return or at least make progress towards producing a frame

Definition: filter_design.txt:264

DeshakeOpenCLContext::prev_refined_features

cl_mem prev_refined_features

Definition: vf_deshake_opencl.c:254

FrameDelta::scale

cl_float2 scale

Definition: vf_deshake_opencl.c:169

av_fifo_alloc2

AVFifo * av_fifo_alloc2(size_t nb_elems, size_t elem_size, unsigned int flags)

Allocate and initialize an AVFifo with a given element size.

Definition: fifo.c:47

AbsoluteFrameMotion::data_start_offset

int data_start_offset

Definition: vf_deshake_opencl.c:144

DeshakeOpenCLContext::refined_features

cl_mem refined_features

Definition: vf_deshake_opencl.c:252

transform.h

check_subset

static int check_subset(const MotionVector *pairs_subset)

Definition: vf_deshake_opencl.c:377

ff_framequeue_queued_frames

static size_t ff_framequeue_queued_frames(const FFFrameQueue *fq)

Get the number of queued frames.

Definition: framequeue.h:146

run_estimate_kernel

static void run_estimate_kernel(const MotionVector *point_pairs, double *model)

Definition: vf_deshake_opencl.c:321

CropInfo

Definition: vf_deshake_opencl.c:178

AVFrame::height

int height

Definition: frame.h:402

DebugMatches::matches

MotionVector * matches

Definition: vf_deshake_opencl.c:124

DeshakeOpenCLContext::tripod_mode

int tripod_mode

Definition: vf_deshake_opencl.c:272

AVFilterLink::h

int h

agreed upon image height

Definition: avfilter.h:533

DeshakeOpenCLContext::match_descriptors_time

unsigned long long match_descriptors_time

Definition: vf_deshake_opencl.c:296

ff_framequeue_global_init

void ff_framequeue_global_init(FFFrameQueueGlobal *fqg)

Init a global structure.

Definition: framequeue.c:30

avfilter.h

gaussian_for

static float gaussian_for(int x, float sigma)

Definition: vf_deshake_opencl.c:805

values

Definition: filter_design.txt:263

temp

else temp

Definition: vf_mcdeint.c:248

OpenCLFilterContext

Definition: opencl.h:36

ff_opencl_filter_uninit

void ff_opencl_filter_uninit(AVFilterContext *avctx)

Uninitialise an OpenCL filter context.

Definition: opencl.c:142

DeshakeOpenCLContext::debug_on

int debug_on

Definition: vf_deshake_opencl.c:273

AVFilterLink::time_base

AVRational time_base

Define the time base used by the PTS of the frames/samples which will pass through this link.

Definition: avfilter.h:555

DebugMatches::num_matches

int num_matches

Definition: vf_deshake_opencl.c:128

CL_RELEASE_QUEUE

#define CL_RELEASE_QUEUE(q)

release an OpenCL Command Queue

Definition: opencl.h:127

DeshakeOpenCLContext::kernel_refine_features

cl_kernel kernel_refine_features

Definition: vf_deshake_opencl.c:240

AVFilterContext

An instance of a filter.

Definition: avfilter.h:392

FF_DISABLE_DEPRECATION_WARNINGS

#define FF_DISABLE_DEPRECATION_WARNINGS

Definition: internal.h:81

AV_PIX_FMT_GBRP

@ AV_PIX_FMT_GBRP

planar GBR 4:4:4 24bpp

Definition: pixfmt.h:158

desc

const char * desc

Definition: libsvtav1.c:83

find_inliers

static int find_inliers(MotionVector *point_pairs, const int num_point_pairs, const double *model, float *err, double thresh)

Definition: vf_deshake_opencl.c:461

AVMEDIA_TYPE_VIDEO

@ AVMEDIA_TYPE_VIDEO

Definition: avutil.h:201

deshake_opencl_options

static const AVOption deshake_opencl_options[]

Definition: vf_deshake_opencl.c:2130

mem.h

PointPair::p1

cl_float2 p1

Definition: vf_deshake_opencl.c:99

AVPixFmtDescriptor

Descriptor that unambiguously describes how the bits of a pixel are stored in the up to 4 data planes...

Definition: pixdesc.h:69

ff_opencl_get_event_time

cl_ulong ff_opencl_get_event_time(cl_event event)

Gets the command start and end times for the given event and returns the difference (the time that th...

Definition: opencl.c:340

smooth

static float smooth(DeshakeOpenCLContext *deshake_ctx, float *gauss_kernel, int length, float max_val, AVFifo *values)

Definition: vf_deshake_opencl.c:889

DeshakeOpenCLContext::kernel_grayscale

cl_kernel kernel_grayscale

Definition: vf_deshake_opencl.c:238

AV_OPT_TYPE_BOOL

@ AV_OPT_TYPE_BOOL

Definition: opt.h:244

FILTER_OUTPUTS

#define FILTER_OUTPUTS(array)

Definition: internal.h:195

av_freep

#define av_freep(p)

Definition: tableprint_vlc.h:34

src

INIT_CLIP pixel * src

Definition: h264pred_template.c:418

FFMAX3

#define FFMAX3(a, b, c)

Definition: macros.h:48

DebugMatches

Definition: vf_deshake_opencl.c:123

Definition: ffmpeg_filter.c:156

imgutils.h

CL_CREATE_BUFFER_FLAGS

#define CL_CREATE_BUFFER_FLAGS(ctx, buffer_name, flags, size, host_ptr)

Create a buffer with the given information.

Definition: opencl.h:197

av_log

#define av_log(a,...)

Definition: tableprint_vlc.h:27

CL_FAIL_ON_ERROR

#define CL_FAIL_ON_ERROR(errcode,...)

A helper macro to handle OpenCL errors.

Definition: opencl.h:74

av_fifo_freep2

void av_fifo_freep2(AVFifo **f)

Free an AVFifo and reset pointer to NULL.

Definition: fifo.c:286

AV_PIX_FMT_GBRAP10BE

@ AV_PIX_FMT_GBRAP10BE

planar GBR 4:4:4:4 40bpp, big-endian

Definition: pixfmt.h:310

uninit

static av_cold int uninit(AVCodecContext *avctx)

Definition: crystalhd.c:285

DeshakeOpenCLContext::descriptors

cl_mem descriptors

Definition: vf_deshake_opencl.c:259

DeshakeOpenCLContext::matches

cl_mem matches

Definition: vf_deshake_opencl.c:263

int

Definition: ffmpeg_filter.c:156

MotionVector::p

PointPair p

Definition: vf_deshake_opencl.c:105

DeshakeOpenCLContext::brief_descriptors_time

unsigned long long brief_descriptors_time

Definition: vf_deshake_opencl.c:295

DeshakeOpenCLContext::command_queue

cl_command_queue command_queue

Definition: vf_deshake_opencl.c:237

PointPair

Definition: vf_deshake_opencl.c:97

BREIFN

#define BREIFN

Definition: vf_deshake_opencl.c:87

DeshakeOpenCLContext::sw_format

int sw_format

Definition: vf_deshake_opencl.c:229

DeshakeOpenCLContext::gauss_kernel

float * gauss_kernel

Definition: vf_deshake_opencl.c:216

DeshakeOpenCLContext::refine_features

cl_int refine_features

Definition: vf_deshake_opencl.c:277

SimilarityMatrix

Definition: vf_deshake_opencl.c:173

ringbuf_float_at

static void ringbuf_float_at(DeshakeOpenCLContext *deshake_ctx, AVFifo *values, float *val, int offset)

Definition: vf_deshake_opencl.c:846

optimize_model

static void optimize_model(DeshakeOpenCLContext *deshake_ctx, MotionVector *best_pairs, MotionVector *inliers, const int num_inliers, float best_err, double *model_out)

Definition: vf_deshake_opencl.c:602

ff_vf_deshake_opencl

const AVFilter ff_vf_deshake_opencl

Definition: vf_deshake_opencl.c:2161

DeshakeOpenCLContext::crop_y

CropInfo crop_y

Definition: vf_deshake_opencl.c:222

av_clipd

Definition: af_crystalizer.c:132

transforms

static const struct @86 transforms[18]

CropInfo::top_left