00001 /* 00002 * ARM optimized DSP utils 00003 * Copyright (c) 2001 Lionel Ulmer 00004 * 00005 * This file is part of FFmpeg. 00006 * 00007 * FFmpeg is free software; you can redistribute it and/or 00008 * modify it under the terms of the GNU Lesser General Public 00009 * License as published by the Free Software Foundation; either 00010 * version 2.1 of the License, or (at your option) any later version. 00011 * 00012 * FFmpeg is distributed in the hope that it will be useful, 00013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00015 * Lesser General Public License for more details. 00016 * 00017 * You should have received a copy of the GNU Lesser General Public 00018 * License along with FFmpeg; if not, write to the Free Software 00019 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 00020 */ 00021 00022 #include "libavcodec/dsputil.h" 00023 #if HAVE_IPP 00024 #include <ipp.h> 00025 #endif 00026 00027 void dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx); 00028 void ff_float_init_arm_vfp(DSPContext* c, AVCodecContext *avctx); 00029 void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx); 00030 00031 void j_rev_dct_ARM(DCTELEM *data); 00032 void simple_idct_ARM(DCTELEM *data); 00033 00034 void simple_idct_armv5te(DCTELEM *data); 00035 void simple_idct_put_armv5te(uint8_t *dest, int line_size, DCTELEM *data); 00036 void simple_idct_add_armv5te(uint8_t *dest, int line_size, DCTELEM *data); 00037 00038 void ff_simple_idct_armv6(DCTELEM *data); 00039 void ff_simple_idct_put_armv6(uint8_t *dest, int line_size, DCTELEM *data); 00040 void ff_simple_idct_add_armv6(uint8_t *dest, int line_size, DCTELEM *data); 00041 00042 void ff_simple_idct_neon(DCTELEM *data); 00043 void ff_simple_idct_put_neon(uint8_t *dest, int line_size, DCTELEM *data); 00044 void ff_simple_idct_add_neon(uint8_t *dest, int line_size, DCTELEM *data); 00045 00046 /* XXX: local hack */ 00047 static void (*ff_put_pixels_clamped)(const DCTELEM *block, uint8_t *pixels, int line_size); 00048 static void (*ff_add_pixels_clamped)(const DCTELEM *block, uint8_t *pixels, int line_size); 00049 00050 void put_pixels8_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h); 00051 void put_pixels8_x2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h); 00052 void put_pixels8_y2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h); 00053 void put_pixels8_xy2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h); 00054 00055 void put_no_rnd_pixels8_x2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h); 00056 void put_no_rnd_pixels8_y2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h); 00057 void put_no_rnd_pixels8_xy2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h); 00058 00059 void put_pixels16_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h); 00060 00061 void ff_prefetch_arm(void *mem, int stride, int h); 00062 00063 CALL_2X_PIXELS(put_pixels16_x2_arm , put_pixels8_x2_arm , 8) 00064 CALL_2X_PIXELS(put_pixels16_y2_arm , put_pixels8_y2_arm , 8) 00065 CALL_2X_PIXELS(put_pixels16_xy2_arm, put_pixels8_xy2_arm, 8) 00066 CALL_2X_PIXELS(put_no_rnd_pixels16_x2_arm , put_no_rnd_pixels8_x2_arm , 8) 00067 CALL_2X_PIXELS(put_no_rnd_pixels16_y2_arm , put_no_rnd_pixels8_y2_arm , 8) 00068 CALL_2X_PIXELS(put_no_rnd_pixels16_xy2_arm, put_no_rnd_pixels8_xy2_arm, 8) 00069 00070 void ff_add_pixels_clamped_ARM(short *block, unsigned char *dest, 00071 int line_size); 00072 00073 /* XXX: those functions should be suppressed ASAP when all IDCTs are 00074 converted */ 00075 static void j_rev_dct_ARM_put(uint8_t *dest, int line_size, DCTELEM *block) 00076 { 00077 j_rev_dct_ARM (block); 00078 ff_put_pixels_clamped(block, dest, line_size); 00079 } 00080 static void j_rev_dct_ARM_add(uint8_t *dest, int line_size, DCTELEM *block) 00081 { 00082 j_rev_dct_ARM (block); 00083 ff_add_pixels_clamped(block, dest, line_size); 00084 } 00085 static void simple_idct_ARM_put(uint8_t *dest, int line_size, DCTELEM *block) 00086 { 00087 simple_idct_ARM (block); 00088 ff_put_pixels_clamped(block, dest, line_size); 00089 } 00090 static void simple_idct_ARM_add(uint8_t *dest, int line_size, DCTELEM *block) 00091 { 00092 simple_idct_ARM (block); 00093 ff_add_pixels_clamped(block, dest, line_size); 00094 } 00095 00096 #if HAVE_IPP 00097 static void simple_idct_ipp(DCTELEM *block) 00098 { 00099 ippiDCT8x8Inv_Video_16s_C1I(block); 00100 } 00101 static void simple_idct_ipp_put(uint8_t *dest, int line_size, DCTELEM *block) 00102 { 00103 ippiDCT8x8Inv_Video_16s8u_C1R(block, dest, line_size); 00104 } 00105 00106 void add_pixels_clamped_iwmmxt(const DCTELEM *block, uint8_t *pixels, int line_size); 00107 00108 static void simple_idct_ipp_add(uint8_t *dest, int line_size, DCTELEM *block) 00109 { 00110 ippiDCT8x8Inv_Video_16s_C1I(block); 00111 #if HAVE_IWMMXT 00112 add_pixels_clamped_iwmmxt(block, dest, line_size); 00113 #else 00114 ff_add_pixels_clamped_ARM(block, dest, line_size); 00115 #endif 00116 } 00117 #endif 00118 00119 int mm_support(void) 00120 { 00121 return HAVE_IWMMXT * FF_MM_IWMMXT; 00122 } 00123 00124 void dsputil_init_arm(DSPContext* c, AVCodecContext *avctx) 00125 { 00126 int idct_algo= avctx->idct_algo; 00127 00128 ff_put_pixels_clamped = c->put_pixels_clamped; 00129 ff_add_pixels_clamped = c->add_pixels_clamped; 00130 00131 if (avctx->lowres == 0) { 00132 if(idct_algo == FF_IDCT_AUTO){ 00133 #if HAVE_IPP 00134 idct_algo = FF_IDCT_IPP; 00135 #elif HAVE_NEON 00136 idct_algo = FF_IDCT_SIMPLENEON; 00137 #elif HAVE_ARMV6 00138 idct_algo = FF_IDCT_SIMPLEARMV6; 00139 #elif HAVE_ARMV5TE 00140 idct_algo = FF_IDCT_SIMPLEARMV5TE; 00141 #else 00142 idct_algo = FF_IDCT_ARM; 00143 #endif 00144 } 00145 00146 if(idct_algo==FF_IDCT_ARM){ 00147 c->idct_put= j_rev_dct_ARM_put; 00148 c->idct_add= j_rev_dct_ARM_add; 00149 c->idct = j_rev_dct_ARM; 00150 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; 00151 } else if (idct_algo==FF_IDCT_SIMPLEARM){ 00152 c->idct_put= simple_idct_ARM_put; 00153 c->idct_add= simple_idct_ARM_add; 00154 c->idct = simple_idct_ARM; 00155 c->idct_permutation_type= FF_NO_IDCT_PERM; 00156 #if HAVE_ARMV6 00157 } else if (idct_algo==FF_IDCT_SIMPLEARMV6){ 00158 c->idct_put= ff_simple_idct_put_armv6; 00159 c->idct_add= ff_simple_idct_add_armv6; 00160 c->idct = ff_simple_idct_armv6; 00161 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; 00162 #endif 00163 #if HAVE_ARMV5TE 00164 } else if (idct_algo==FF_IDCT_SIMPLEARMV5TE){ 00165 c->idct_put= simple_idct_put_armv5te; 00166 c->idct_add= simple_idct_add_armv5te; 00167 c->idct = simple_idct_armv5te; 00168 c->idct_permutation_type = FF_NO_IDCT_PERM; 00169 #endif 00170 #if HAVE_IPP 00171 } else if (idct_algo==FF_IDCT_IPP){ 00172 c->idct_put= simple_idct_ipp_put; 00173 c->idct_add= simple_idct_ipp_add; 00174 c->idct = simple_idct_ipp; 00175 c->idct_permutation_type= FF_NO_IDCT_PERM; 00176 #endif 00177 #if HAVE_NEON 00178 } else if (idct_algo==FF_IDCT_SIMPLENEON){ 00179 c->idct_put= ff_simple_idct_put_neon; 00180 c->idct_add= ff_simple_idct_add_neon; 00181 c->idct = ff_simple_idct_neon; 00182 c->idct_permutation_type = FF_PARTTRANS_IDCT_PERM; 00183 #endif 00184 } 00185 } 00186 00187 c->put_pixels_tab[0][0] = put_pixels16_arm; 00188 c->put_pixels_tab[0][1] = put_pixels16_x2_arm; 00189 c->put_pixels_tab[0][2] = put_pixels16_y2_arm; 00190 c->put_pixels_tab[0][3] = put_pixels16_xy2_arm; 00191 c->put_no_rnd_pixels_tab[0][0] = put_pixels16_arm; 00192 c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_arm; 00193 c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_arm; 00194 c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_arm; 00195 c->put_pixels_tab[1][0] = put_pixels8_arm; 00196 c->put_pixels_tab[1][1] = put_pixels8_x2_arm; 00197 c->put_pixels_tab[1][2] = put_pixels8_y2_arm; 00198 c->put_pixels_tab[1][3] = put_pixels8_xy2_arm; 00199 c->put_no_rnd_pixels_tab[1][0] = put_pixels8_arm; 00200 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_arm; 00201 c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_arm; 00202 c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_arm; 00203 00204 #if HAVE_ARMV5TE 00205 c->prefetch = ff_prefetch_arm; 00206 #endif 00207 00208 #if HAVE_IWMMXT 00209 dsputil_init_iwmmxt(c, avctx); 00210 #endif 00211 #if HAVE_ARMVFP 00212 ff_float_init_arm_vfp(c, avctx); 00213 #endif 00214 #if HAVE_NEON 00215 ff_dsputil_init_neon(c, avctx); 00216 #endif 00217 }