Go to the documentation of this file. 1 /*
2 * Copyright (c) 2023 Zhao Zhili <zhilizhao@tencent.com>
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include <VideoToolbox/VideoToolbox.h>
22
27
31
34
40
48
50 {
54
55 ret = VTPixelTransferSessionCreate(kCFAllocatorDefault, &
s->transfer);
59 }
60
61 #define STRING_OPTION(var_name, func_name, default_value) \
62 do { \
63 if (s->var_name##_string) { \
64 int var = av_##func_name##_from_name(s->var_name##_string); \
65 if (var < 0) { \
66 av_log(avctx, AV_LOG_ERROR, "Invalid %s.\n", #var_name); \
67 return AVERROR(EINVAL); \
68 } \
69 s->var_name = var; \
70 } else { \
71 s->var_name = default_value; \
72 } \
73 } while (0)
74
78
83 "Doesn't support converting to colour primaries %s\n",
84 s->colour_primaries_string);
86 }
87 VTSessionSetProperty(
s->transfer, kVTPixelTransferPropertyKey_DestinationColorPrimaries,
value);
88 }
89
94 "Doesn't support converting to trc %s\n",
95 s->colour_transfer_string);
97 }
98 VTSessionSetProperty(
s->transfer, kVTPixelTransferPropertyKey_DestinationTransferFunction,
value);
99 }
100
105 "Doesn't support converting to colorspace %s\n",
106 s->colour_matrix_string);
108 }
109 VTSessionSetProperty(
s->transfer, kVTPixelTransferPropertyKey_DestinationYCbCrMatrix,
value);
110 }
111
112 VTSessionSetProperty(
s->transfer, kVTPixelTransferPropertyKey_ScalingMode, kVTScalingMode_CropSourceToCleanAperture);
113
114 return 0;
115 }
116
118 {
120
122 VTPixelTransferSessionInvalidate(
s->transfer);
123 CFRelease(
s->transfer);
125 }
126 }
127
129 {
134 CVPixelBufferRef
src;
135 CVPixelBufferRef
dst;
136
138 int top;
141 CFNumberRef crop_width_num;
142 CFNumberRef crop_height_num;
143 CFNumberRef crop_offset_left_num;
144 CFNumberRef crop_offset_top_num;
145 const void *clean_aperture_keys[4];
146 const void *source_clean_aperture_values[4];
147 CFDictionaryRef source_clean_aperture;
148
153 }
154
158
162 out->crop_bottom = 0;
166 }
167
168 av_reduce(&
out->sample_aspect_ratio.num, &
out->sample_aspect_ratio.den,
171 INT_MAX);
173 out->color_primaries =
s->colour_primaries;
175 out->color_trc =
s->colour_transfer;
177 out->colorspace =
s->colour_matrix;
178
181 // The crop offsets are relative to the center of the frame.
182 // the crop width and crop height are relative to the center of the crop rect, not top left as normal.
185 crop_width_num = CFNumberCreate(kCFAllocatorDefault, kCFNumberIntType, &
width);
186 crop_height_num = CFNumberCreate(kCFAllocatorDefault, kCFNumberIntType, &
height);
187 crop_offset_left_num = CFNumberCreate(kCFAllocatorDefault, kCFNumberIntType, &
left);
188 crop_offset_top_num = CFNumberCreate(kCFAllocatorDefault, kCFNumberIntType, &top);
189
190 clean_aperture_keys[0] = kCVImageBufferCleanApertureWidthKey;
191 clean_aperture_keys[1] = kCVImageBufferCleanApertureHeightKey;
192 clean_aperture_keys[2] = kCVImageBufferCleanApertureHorizontalOffsetKey;
193 clean_aperture_keys[3] = kCVImageBufferCleanApertureVerticalOffsetKey;
194
195 source_clean_aperture_values[0] = crop_width_num;
196 source_clean_aperture_values[1] = crop_height_num;
197 source_clean_aperture_values[2] = crop_offset_left_num;
198 source_clean_aperture_values[3] = crop_offset_top_num;
199
200 source_clean_aperture = CFDictionaryCreate(kCFAllocatorDefault,
201 clean_aperture_keys,
202 source_clean_aperture_values,
203 4,
204 &kCFTypeDictionaryKeyCallBacks,
205 &kCFTypeDictionaryValueCallBacks);
206
207 CFRelease(crop_width_num);
208 CFRelease(crop_height_num);
209 CFRelease(crop_offset_left_num);
210 CFRelease(crop_offset_top_num);
211
212 src = (CVPixelBufferRef)in->
data[3];
213 dst = (CVPixelBufferRef)
out->data[3];
214 CVBufferSetAttachment(
src, kCVImageBufferCleanApertureKey,
215 source_clean_aperture, kCVAttachmentMode_ShouldPropagate);
216 ret = VTPixelTransferSessionTransferImage(
s->transfer,
src,
dst);
217 CFRelease(source_clean_aperture);
222 }
223
225
227
232 }
233
235 {
236 int err;
244
248 if (err < 0)
249 return err;
250
253
254 outlink->
w =
s->output_width;
255 outlink->
h =
s->output_height;
256
257 if (
inlink->sample_aspect_ratio.num) {
260 } else {
262 }
263
265
271 hw_frame_ctx_out->
width = outlink->
w;
272 hw_frame_ctx_out->
height = outlink->
h;
274
276 if (err < 0)
277 return err;
278
280 if (err < 0) {
282 "Failed to init videotoolbox frame context, %s\n",
284 return err;
285 }
286
287 return 0;
288 }
289
290 #define OFFSET(x) offsetof(ScaleVtContext, x)
291 #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
293 { "w", "Output video width",
295 { "h", "Output video height",
297 { "color_matrix", "Output colour matrix coefficient set",
299 { "color_primaries", "Output colour primaries",
301 { "color_transfer", "Output colour transfer characteristics",
304 };
305
307
309 {
313 },
314 };
315
317 {
321 },
322 };
323
325 .
p.
name =
"scale_vt",
327 .p.priv_class = &scale_vt_class,
336 };
Tag MUST be and< 10hcoeff half pel interpolation filter coefficients, hcoeff[0] are the 2 middle coefficients[1] are the next outer ones and so on, resulting in a filter like:...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2] ... the sign of the coefficients is not explicitly stored but alternates after each coeff and coeff[0] is positive, so ...,+,-,+,-,+,+,-,+,-,+,... hcoeff[0] is not explicitly stored but found by subtracting the sum of all stored coefficients with signs from 32 hcoeff[0]=32 - hcoeff[1] - hcoeff[2] - ... a good choice for hcoeff and htaps is htaps=6 hcoeff={40,-10, 2} an alternative which requires more computations at both encoder and decoder side and may or may not be better is htaps=8 hcoeff={42,-14, 6,-2}ref_frames minimum of the number of available reference frames and max_ref_frames for example the first frame after a key frame always has ref_frames=1spatial_decomposition_type wavelet type 0 is a 9/7 symmetric compact integer wavelet 1 is a 5/3 symmetric compact integer wavelet others are reserved stored as delta from last, last is reset to 0 if always_reset||keyframeqlog quality(logarithmic quantizer scale) stored as delta from last, last is reset to 0 if always_reset||keyframemv_scale stored as delta from last, last is reset to 0 if always_reset||keyframe FIXME check that everything works fine if this changes between framesqbias dequantization bias stored as delta from last, last is reset to 0 if always_reset||keyframeblock_max_depth maximum depth of the block tree stored as delta from last, last is reset to 0 if always_reset||keyframequant_table quantization tableHighlevel bitstream structure:==============================--------------------------------------------|Header|--------------------------------------------|------------------------------------|||Block0||||split?||||yes no||||......... intra?||||:Block01 :yes no||||:Block02 :....... ..........||||:Block03 ::y DC ::ref index:||||:Block04 ::cb DC ::motion x :||||......... :cr DC ::motion y :||||....... ..........|||------------------------------------||------------------------------------|||Block1|||...|--------------------------------------------|------------ ------------ ------------|||Y subbands||Cb subbands||Cr subbands||||--- ---||--- ---||--- ---|||||LL0||HL0||||LL0||HL0||||LL0||HL0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||LH0||HH0||||LH0||HH0||||LH0||HH0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HL1||LH1||||HL1||LH1||||HL1||LH1|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HH1||HL2||||HH1||HL2||||HH1||HL2|||||...||...||...|||------------ ------------ ------------|--------------------------------------------Decoding process:=================------------|||Subbands|------------||||------------|Intra DC||||LL0 subband prediction ------------|\ Dequantization ------------------- \||Reference frames|\ IDWT|------- -------|Motion \|||Frame 0||Frame 1||Compensation . OBMC v -------|------- -------|--------------. \------> Frame n output Frame Frame<----------------------------------/|...|------------------- Range Coder:============Binary Range Coder:------------------- The implemented range coder is an adapted version based upon "Range encoding: an algorithm for removing redundancy from a digitised message." by G. N. N. Martin. The symbols encoded by the Snow range coder are bits(0|1). The associated probabilities are not fix but change depending on the symbol mix seen so far. bit seen|new state ---------+----------------------------------------------- 0|256 - state_transition_table[256 - old_state];1|state_transition_table[old_state];state_transition_table={ 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};FIXME Range Coding of integers:------------------------- FIXME Neighboring Blocks:===================left and top are set to the respective blocks unless they are outside of the image in which case they are set to the Null block top-left is set to the top left block unless it is outside of the image in which case it is set to the left block if this block has no larger parent block or it is at the left side of its parent block and the top right block is not outside of the image then the top right block is used for top-right else the top-left block is used Null block y, cb, cr are 128 level, ref, mx and my are 0 Motion Vector Prediction:=========================1. the motion vectors of all the neighboring blocks are scaled to compensate for the difference of reference frames scaled_mv=(mv *(256 *(current_reference+1)/(mv.reference+1))+128)> the median of the scaled left