1 /*
2 * Copyright (C) 2015 Pedro Arthur <bygrandao@gmail.com>
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
23
25 {
27 for (
i = 0;
i < 2; ++
i) {
28 int n =
s->plane[
i].available_lines;
29 int j;
30 for (j = 0; j < n; ++j) {
33 s->plane[
i].line[j+n] =
NULL;
34 }
35 }
36
37 for (
i = 0;
i < 4; ++
i)
38 memset(
s->plane[
i].line, 0,
sizeof(uint8_t*) *
s->plane[
i].available_lines * (
s->is_ring ? 3 : 1));
39 s->should_free_lines = 0;
40 }
41
42 /*
43 slice lines contains extra bytes for vectorial code thus @size
44 is the allocated memory size and @width is the number of pixels
45 */
47 {
49 int idx[2] = {3, 2};
50
51 s->should_free_lines = 1;
53
54 for (
i = 0;
i < 2; ++
i) {
55 int n =
s->plane[
i].available_lines;
56 int j;
58
60 for (j = 0; j < n; ++j) {
61 // chroma plane line U and V are expected to be contiguous in memory
62 // by mmx vertical scaler code
64 if (!
s->plane[
i].line[j]) {
67 }
68 s->plane[ii].line[j] =
s->plane[
i].line[j] +
size + 16;
70 s->plane[
i].line[j+n] =
s->plane[
i].line[j];
71 s->plane[ii].line[j+n] =
s->plane[ii].line[j];
72 }
73 }
74 }
75
76 return 0;
77 }
78
80 {
82 int size[4] = { lumLines,
83 chrLines,
84 chrLines,
85 lumLines };
86
87 s->h_chr_sub_sample = h_sub_sample;
88 s->v_chr_sub_sample = v_sub_sample;
91 s->should_free_lines = 0;
92
93 for (
i = 0;
i < 4; ++
i) {
94 int n =
size[
i] * ( ring == 0 ? 1 : 3);
96 if (!
s->plane[
i].line)
98
99 s->plane[
i].tmp = ring ?
s->plane[
i].line +
size[
i] * 2 :
NULL;
100 s->plane[
i].available_lines =
size[
i];
101 s->plane[
i].sliceY = 0;
102 s->plane[
i].sliceH = 0;
103 }
104 return 0;
105 }
106
108 {
111 if (
s->should_free_lines)
113 for (
i = 0;
i < 4; ++
i) {
116 }
117 }
118 }
119
121 {
124 for (
i = 0;
i < 4;
i+=3) {
125 int n =
s->plane[
i].available_lines;
126 int l =
lum -
s->plane[
i].sliceY;
127
128 if (l >= n * 2) {
129 s->plane[
i].sliceY += n;
130 s->plane[
i].sliceH -= n;
131 }
132 }
133 }
134 if (chr) {
135 for (
i = 1;
i < 3; ++
i) {
136 int n =
s->plane[
i].available_lines;
137 int l = chr -
s->plane[
i].sliceY;
138
139 if (l >= n * 2) {
140 s->plane[
i].sliceY += n;
141 s->plane[
i].sliceH -= n;
142 }
143 }
144 }
145 return 0;
146 }
147
149 {
151
152 const int start[4] = {lumY,
153 chrY,
154 chrY,
155 lumY};
156
157 const int end[4] = {lumY +lumH,
158 chrY + chrH,
159 chrY + chrH,
160 lumY + lumH};
161
163
165 uint8_t *
const src_i =
src[
i] + (relative ? 0 : start[
i]) *
stride[
i];
166 int j;
167 int first =
s->plane[
i].sliceY;
168 int n =
s->plane[
i].available_lines;
169 int lines = end[
i] - start[
i];
170 int tot_lines = end[
i] -
first;
171
172 if (start[
i] >=
first && n >= tot_lines) {
173 s->plane[
i].sliceH =
FFMAX(tot_lines,
s->plane[
i].sliceH);
174 for (j = 0; j < lines; j+= 1)
176 } else {
177 s->plane[
i].sliceY = start[
i];
178 lines = lines > n ? n : lines;
179 s->plane[
i].sliceH = lines;
180 for (j = 0; j < lines; j+= 1)
181 s->plane[
i].line[j] = src_i + j *
stride[
i];
182 }
183
184 }
185
186 return 0;
187 }
188
190 {
191 int i, j, k,
size, end;
192
193 for (
i = 0;
i < 4; ++
i) {
194 size =
s->plane[
i].available_lines;
195 for (j = 0; j <
size; ++j) {
196 if (bpc == 16) {
197 end = (n>>1) + 1;
198 for (k = 0; k < end; ++k)
199 ((
int32_t*)(
s->plane[
i].line[j]))[k] = 1<<18;
200 } else if (bpc == 32) {
201 end = (n>>2) + 1;
202 for (k = 0; k < end; ++k)
203 ((
int64_t*)(
s->plane[
i].line[j]))[k] = 1LL<<34;
204 } else {
205 end = n + 1;
206 for (k = 0; k < end; ++k)
207 ((int16_t*)(
s->plane[
i].line[j]))[k] = 1<<14;
208 }
209 }
210 }
211 }
212
213 /*
214 Calculates the minimum ring buffer size, it should be able to store vFilterSize
215 more n lines where n is the max difference between each adjacent slice which
216 outputs a line.
217 The n lines are needed only when there is not enough src lines to output a single
218 dst line, then we should buffer these lines to process them on the next call to scale.
219 */
221 {
222 int lumY;
224 int chrDstH =
c->chrDstH;
225 int *lumFilterPos =
c->vLumFilterPos;
226 int *chrFilterPos =
c->vChrFilterPos;
227 int lumFilterSize =
c->vLumFilterSize;
228 int chrFilterSize =
c->vChrFilterSize;
229 int chrSubSample =
c->chrSrcVSubSample;
230
231 *out_lum_size = lumFilterSize;
232 *out_chr_size = chrFilterSize;
233
234 for (lumY = 0; lumY < dstH; lumY++) {
235 int chrY = (
int64_t)lumY * chrDstH / dstH;
236 int nextSlice =
FFMAX(lumFilterPos[lumY] + lumFilterSize - 1,
237 ((chrFilterPos[chrY] + chrFilterSize - 1)
238 << chrSubSample));
239
240 nextSlice >>= chrSubSample;
241 nextSlice <<= chrSubSample;
242 (*out_lum_size) =
FFMAX((*out_lum_size), nextSlice - lumFilterPos[lumY]);
243 (*out_chr_size) =
FFMAX((*out_chr_size), (nextSlice >> chrSubSample) - chrFilterPos[chrY]);
244 }
245 }
246
247
248
250 {
253 int num_ydesc;
254 int num_cdesc;
256 int need_lum_conv =
c->lumToYV12 ||
c->readLumPlanar ||
c->alpToYV12 ||
c->readAlpPlanar;
257 int need_chr_conv =
c->chrToYV12 ||
c->readChrPlanar;
258 int need_gamma =
c->is_internal_gamma;
259 int srcIdx, dstIdx;
260 int dst_stride =
FFALIGN(
c->dstW *
sizeof(int16_t) + 66, 16);
261
262 uint32_t * pal =
usePal(
c->srcFormat) ?
c->pal_yuv : (uint32_t*)
c->input_rgb2yuv_table;
263 int res = 0;
264
265 int lumBufSize;
266 int chrBufSize;
267
271
273 dst_stride <<= 1;
274
276 dst_stride <<= 2;
277
278 num_ydesc = need_lum_conv ? 2 : 1;
279 num_cdesc = need_chr_conv ? 2 : 1;
280
281 c->numSlice =
FFMAX(num_ydesc, num_cdesc) + 2;
282 c->numDesc = num_ydesc + num_cdesc + num_vdesc + (need_gamma ? 2 : 0);
283 c->descIndex[0] = num_ydesc + (need_gamma ? 1 : 0);
284 c->descIndex[1] = num_ydesc + num_cdesc + (need_gamma ? 1 : 0);
285
291 c->input_opaque =
c->h2f_tables;
292 }
293
301 }
302
303 res =
alloc_slice(&
c->slice[0],
c->srcFormat,
c->srcH,
c->chrSrcH,
c->chrSrcHSubSample,
c->chrSrcVSubSample, 0);
305 for (
i = 1;
i <
c->numSlice-2; ++
i) {
306 res =
alloc_slice(&
c->slice[
i],
c->srcFormat, lumBufSize, chrBufSize,
c->chrSrcHSubSample,
c->chrSrcVSubSample, 0);
310 }
311 // horizontal scaler output
312 res =
alloc_slice(&
c->slice[
i],
c->srcFormat, lumBufSize, chrBufSize,
c->chrDstHSubSample,
c->chrDstVSubSample, 1);
316
318
319 // vertical scaler output
321 res =
alloc_slice(&
c->slice[
i],
c->dstFormat,
c->dstH,
c->chrDstH,
c->chrDstHSubSample,
c->chrDstVSubSample, 0);
323
325 srcIdx = 0;
326 dstIdx = 1;
327
328 if (need_gamma) {
332 }
333
334 if (need_lum_conv) {
337 c->desc[
index].alpha =
c->needAlpha;
339 srcIdx = dstIdx;
340 }
341
342
343 dstIdx =
FFMAX(num_ydesc, num_cdesc);
346 c->desc[
index].alpha =
c->needAlpha;
347
348
350 {
351 srcIdx = 0;
352 dstIdx = 1;
353 if (need_chr_conv) {
357 srcIdx = dstIdx;
358 }
359
360 dstIdx =
FFMAX(num_ydesc, num_cdesc);
361 if (
c->needs_hcscale)
363 else
366 }
367
369 {
370 srcIdx =
c->numSlice - 2;
371 dstIdx =
c->numSlice - 1;
374 }
375
377 if (need_gamma) {
380 }
381
382 return 0;
383
386 return res;
387 }
388
390 {
393 for (
i = 0;
i <
c->numDesc; ++
i)
396 }
397
399 for (
i = 0;
i <
c->numSlice; ++
i)
402 }
404 return 0;
405 }