00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #include <stdio.h>
00022 #include <stdlib.h>
00023 #include <string.h>
00024
00025 #ifdef __SSE__
00026 #include <mmintrin.h>
00027 #include <xmmintrin.h>
00028 #endif
00029
00030 #include <flgrCoreDispatch.h>
00031
00032 #include "flgrMeasureBaseSSE.h"
00033
00034
00035 #define M64_SIZE sizeof(__m64)
00036 #define M64_VEC_SIZE_UINT8 (M64_SIZE/sizeof(fgUINT8))
00037 #define M64_VEC_SIZE_INT16 (M64_SIZE/sizeof(fgINT16))
00038 #define M64_VEC_SIZE_UINT16 (M64_SIZE/sizeof(fgUINT16))
00039 #define M64_VEC_SIZE_UINT32 (M64_SIZE/sizeof(fgUINT32))
00040
00041
00042
00043 void flgr1d_measure_min_max_spp1_SSE_fgUINT8(FLGR_Data1D *dat, FLGR_Vector *mini, FLGR_Vector *maxi) {
00044 #ifdef __SSE__
00045 fgUINT8 *psrc = (fgUINT8*) dat->array;
00046 fgUINT8 array_min[M64_VEC_SIZE_UINT8];
00047 fgUINT8 array_max[M64_VEC_SIZE_UINT8];
00048 fgUINT8 _mini, _maxi;
00049 __m64 rmax,rmin,vec;
00050 int i, length = dat->length;
00051 int length_vecnorm = (length/M64_VEC_SIZE_UINT8)*M64_VEC_SIZE_UINT8;
00052
00053
00054
00055 _mm_empty();
00056
00057 rmin = *((__m64*) psrc);
00058 rmax = *((__m64*) psrc);
00059
00060 for(i=M64_VEC_SIZE_UINT8 ; i<length_vecnorm ; i+=M64_VEC_SIZE_UINT8) {
00061 vec = *((__m64*) (psrc+i));
00062 rmin = _mm_min_pu8(rmin,vec);
00063 rmax = _mm_max_pu8(rmax,vec);
00064 }
00065
00066 *((__m64*) array_min) = rmin;
00067 *((__m64*) array_max) = rmax;
00068
00069 _mm_empty();
00070
00071 _mini = array_min[0];
00072 _maxi = array_max[0];
00073
00074 for(i=1 ; i<M64_VEC_SIZE_UINT8 ; i++) {
00075 _mini = FLGR_MIN(_mini,array_min[i]);
00076 _maxi = FLGR_MAX(_maxi,array_max[i]);
00077 }
00078
00079 for(i=length_vecnorm ; i<length ; i++) {
00080 _mini = FLGR_MIN(psrc[i],_mini);
00081 _maxi = FLGR_MAX(psrc[i],_maxi);
00082 }
00083
00084 *((fgUINT8*) mini->array) = _mini;
00085 *((fgUINT8*) maxi->array) = _maxi;
00086 #endif
00087 }
00088
00089
00090 void flgr1d_measure_min_max_spp1_SSE_fgINT16(FLGR_Data1D *dat, FLGR_Vector *mini, FLGR_Vector *maxi) {
00091 #ifdef __SSE__
00092 fgINT16 *psrc = (fgINT16*) dat->array;
00093 fgINT16 array_min[M64_VEC_SIZE_INT16];
00094 fgINT16 array_max[M64_VEC_SIZE_INT16];
00095 fgINT16 _mini, _maxi;
00096 __m64 rmax,rmin,vec;
00097 int i, length = dat->length;
00098 int length_vecnorm = (length/M64_VEC_SIZE_INT16)*M64_VEC_SIZE_INT16;
00099
00100
00101
00102 _mm_empty();
00103
00104 rmin = *((__m64*) psrc);
00105 rmax = *((__m64*) psrc);
00106
00107 for(i=M64_VEC_SIZE_INT16 ; i<length_vecnorm ; i+=M64_VEC_SIZE_INT16) {
00108 vec = *((__m64*) (psrc+i));
00109 rmin = _mm_min_pi16(rmin,vec);
00110 rmax = _mm_max_pi16(rmax,vec);
00111 }
00112
00113 *((__m64*) array_min) = rmin;
00114 *((__m64*) array_max) = rmax;
00115
00116 _mm_empty();
00117
00118 _mini = array_min[0];
00119 _maxi = array_max[0];
00120
00121 for(i=1 ; i<M64_VEC_SIZE_INT16 ; i++) {
00122 _mini = FLGR_MIN(_mini,array_min[i]);
00123 _maxi = FLGR_MAX(_maxi,array_max[i]);
00124 }
00125
00126 for(i=length_vecnorm ; i<length ; i++) {
00127 _mini = FLGR_MIN(psrc[i],_mini);
00128 _maxi = FLGR_MAX(psrc[i],_maxi);
00129 }
00130
00131 *((fgINT16*) mini->array) = _mini;
00132 *((fgINT16*) maxi->array) = _maxi;
00133 #endif
00134 }
00135
00136
00137 void flgr1d_measure_volume_u32_spp1_SSE_fgUINT8(FLGR_Data1D *dat, FLGR_Vector *volume) {
00138 #ifdef __SSE__
00139 fgUINT32 array_vol[M64_VEC_SIZE_UINT32];
00140 fgUINT32 _volume;
00141 fgUINT8 *psrc = (fgUINT8*) dat->array;
00142 int i, length = dat->length;
00143 int length_vecnorm = (length/M64_VEC_SIZE_UINT8)*M64_VEC_SIZE_UINT8;
00144
00145 __m64 rvol;
00146 __m64 v1, v2;
00147 __m64 v3;
00148 __m64 vec_zero;
00149
00150
00151
00152 _mm_empty();
00153
00154 v2 = *((__m64*) (psrc));
00155 vec_zero = _mm_xor_si64(v2,v2);
00156
00157 rvol = vec_zero;
00158
00159 for(i=0 ; i<length_vecnorm ; i+=M64_VEC_SIZE_UINT8) {
00160 v2 = *((__m64*) (psrc+i));
00161
00162 v1 = _mm_unpackhi_pi8(v2,vec_zero);
00163 v2 = _mm_unpacklo_pi8(v2,vec_zero);
00164
00165 v3 = _mm_unpackhi_pi16(v1,vec_zero);
00166 v1 = _mm_unpacklo_pi16(v1,vec_zero);
00167 v1 = _mm_add_pi32(v1,v3);
00168
00169 v3 = _mm_unpackhi_pi16(v2,vec_zero);
00170 v2 = _mm_unpacklo_pi16(v2,vec_zero);
00171 v2 = _mm_add_pi32(v2,v3);
00172
00173 v1 = _mm_add_pi32(v1,v2);
00174
00175 rvol = _mm_add_pi32(v1,rvol);
00176 }
00177
00178 *((__m64*) array_vol) = rvol;
00179
00180 _mm_empty();
00181
00182 _volume = array_vol[0] + array_vol[1];
00183
00184 for(i=length_vecnorm ; i<length ; i++) {
00185 _volume += (fgUINT32) psrc[i];
00186 }
00187
00188 *((fgUINT32*) volume->array) = _volume;
00189 #endif
00190 }
00191
00192
00193
00194 void flgr1d_measure_volume_u32_spp1_SSE_fgUINT16(FLGR_Data1D *dat, FLGR_Vector *volume) {
00195 #ifdef __SSE__
00196 fgUINT32 array_vol[2];
00197 fgUINT32 _volume=0;
00198 fgUINT16 *psrc = (fgUINT16*) dat->array;
00199 __m64 *vec_psrc = (__m64*) dat->array;
00200 __m64 rvol;
00201 __m64 v1, v2;
00202 __m64 vec_zero;
00203 int i, nbvector = dat->length>>2;
00204
00205
00206
00207 _mm_empty();
00208
00209 v2 = *((__m64*) (psrc));
00210 vec_zero = _mm_xor_si64(v2,v2);
00211
00212 rvol = vec_zero;
00213
00214 for(i=0 ; i<nbvector ; i++) {
00215 v2 = vec_psrc[i];
00216 v1 = _mm_unpackhi_pi16(v2,vec_zero);
00217 v2 = _mm_unpacklo_pi16(v2,vec_zero);
00218
00219 v1 = _mm_add_pi32(v1,v2);
00220
00221 rvol = _mm_add_pi32(v1,rvol);
00222 }
00223
00224 *((__m64*) array_vol) = rvol;
00225
00226 _mm_empty();
00227
00228 _volume = array_vol[0] + array_vol[1];
00229
00230 for(i=nbvector<<2 ; i<dat->length ; i++) {
00231 _volume += (fgUINT32) psrc[i];
00232 }
00233
00234 *((fgUINT32*) volume->array) = _volume;
00235 #endif
00236 }
00237
00238
00239
00240
00241 void flgr1d_measure_volume_u32_spp1_SSE_fgINT16(FLGR_Data1D *dat, FLGR_Vector *volume) {
00242 #ifdef __SSE__
00243 fgUINT32 array_vol[2];
00244 fgUINT32 _volume=0;
00245 fgINT16 *psrc = (fgINT16*) dat->array;
00246 __m64 *vec_psrc = (__m64*) dat->array;
00247 __m64 rvol;
00248 __m64 v1, v2;
00249 __m64 vec_zero;
00250 int i, nbvector = dat->length>>2;
00251
00252
00253
00254 _mm_empty();
00255
00256 v2 = *((__m64*) (psrc));
00257 vec_zero = _mm_xor_si64(v2,v2);
00258
00259 rvol = vec_zero;
00260
00261 for(i=0 ; i<nbvector ; i++) {
00262 v2 = vec_psrc[i];
00263 v1 = _mm_unpackhi_pi16(v2,vec_zero);
00264 v2 = _mm_unpacklo_pi16(v2,vec_zero);
00265
00266 v1 = _mm_add_pi32(v1,v2);
00267
00268 rvol = _mm_add_pi32(v1,rvol);
00269 }
00270
00271 *((__m64*) array_vol) = rvol;
00272
00273 _mm_empty();
00274
00275 _volume = array_vol[0] + array_vol[1];
00276
00277 for(i=nbvector<<2 ; i<dat->length ; i++) {
00278 _volume += (fgUINT32) psrc[i];
00279 }
00280
00281 *((fgUINT32*) volume->array) = _volume;
00282 #endif
00283 }
00284
00285
00286 void flgr1d_measure_volume_s32_spp1_SSE_fgINT16(FLGR_Data1D *dat, FLGR_Vector *volume) {
00287 #ifdef __SSE__
00288 fgINT32 array_vol[2];
00289 fgINT32 _volume=0;
00290 fgINT16 *psrc = (fgINT16*) dat->array;
00291 __m64 *vec_psrc = (__m64*) dat->array;
00292 __m64 rvol;
00293 __m64 v1, v2;
00294 int i, nbvector = dat->length>>2;
00295
00296
00297
00298 _mm_empty();
00299
00300 v2 = *((__m64*) (psrc));
00301 rvol = _mm_xor_si64(v2,v2);
00302
00303 for(i=0 ; i<nbvector ; i++) {
00304 v2 = vec_psrc[i];
00305
00306 v1 = _mm_unpackhi_pi16(v2,v2);
00307 v1 = _mm_srai_pi32( v1, 16 );
00308
00309 v2 = _mm_unpacklo_pi16(v2,v2);
00310 v2 = _mm_srai_pi32( v2, 16 );
00311
00312 v1 = _mm_add_pi32(v1,v2);
00313
00314 rvol = _mm_add_pi32(v1,rvol);
00315 }
00316
00317 *((__m64*) array_vol) = rvol;
00318
00319 _mm_empty();
00320
00321 _volume = array_vol[0] + array_vol[1];
00322
00323 for(i=nbvector<<2 ; i<dat->length ; i++) {
00324 _volume += (fgINT32) psrc[i];
00325 }
00326
00327 *((fgINT32*) volume->array) = _volume;
00328 #endif
00329 }
00330
00331
00332