00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #ifdef __SSE2__
00022 #include <emmintrin.h>
00023 #endif
00024
00025 #if defined(__SSE__) && defined(__MMX__)
00026 #include <xmmintrin.h>
00027 #include <mmintrin.h>
00028 #endif
00029
00030 #include "flgrCoreData.h"
00031 #include "flgrCoreMalloc.h"
00032 #include "flgrCoreDispatch.h"
00033
00034
00035 int flgr1d_compare_eq_fast_fgUINT8(FLGR_Data1D *dat1, FLGR_Data1D *dat2) {
00036 #ifdef __SSE2__
00037 int i, nbvector = dat1->length/16;
00038 fgUINT8 array_cmp[16] __attribute__ ((aligned (16)));
00039 fgUINT8 *psrc1 = (fgUINT8*) dat1->array;
00040 fgUINT8 *psrc2 = (fgUINT8*) dat2->array;
00041 __m128i *vec_psrc1 = (__m128i*) dat1->array;
00042 __m128i *vec_psrc2 = (__m128i*) dat2->array;
00043 __m128i vectest,vectmp;
00044 fgUINT8 cmp;
00045
00046
00047
00048 i=dat1->length;
00049 psrc1[i]=0;psrc2[i++]=0;psrc1[i]=0;psrc2[i++]=0;
00050 psrc1[i]=0;psrc2[i++]=0;psrc1[i]=0;psrc2[i++]=0;
00051 psrc1[i]=0;psrc2[i++]=0;psrc1[i]=0;psrc2[i++]=0;
00052 psrc1[i]=0;psrc2[i++]=0;psrc1[i]=0;psrc2[i++]=0;
00053 psrc1[i]=0;psrc2[i++]=0;psrc1[i]=0;psrc2[i++]=0;
00054 psrc1[i]=0;psrc2[i++]=0;psrc1[i]=0;psrc2[i++]=0;
00055 psrc1[i]=0;psrc2[i++]=0;psrc1[i]=0;psrc2[i++]=0;
00056 psrc1[i]=0;psrc2[i++]=0;psrc1[i]=0;psrc2[i++]=0;
00057
00058 vectest = _mm_cmpeq_epi8(vec_psrc1[0], vec_psrc2[0]);
00059 for(i=1 ; i<=nbvector ; i++) {
00060 vectmp = _mm_cmpeq_epi8(vec_psrc1[i], vec_psrc2[i]);
00061 vectest = _mm_and_si128(vectest,vectmp);
00062 }
00063
00064 *((__m128i*) array_cmp) = vectest;
00065
00066 cmp = (array_cmp[0] != 0);
00067
00068 for(i=1 ; i<16 ; i++) {
00069 cmp &= (array_cmp[i] != 0);
00070 }
00071
00072 if(cmp != 0)
00073 return FLGR_TRUE;
00074 else
00075 return FLGR_FALSE;
00076
00077 #else
00078 #if defined(__MMX__) && (__SSE__)
00079 int i, nbvector = dat1->length/8;
00080 fgUINT8 array_cmp[8] __attribute__ ((aligned (16)));
00081 fgUINT8 *psrc1 = (fgUINT8*) dat1->array;
00082 fgUINT8 *psrc2 = (fgUINT8*) dat2->array;
00083 __m64 *vec_psrc1 = (__m64*) dat1->array;
00084 __m64 *vec_psrc2 = (__m64*) dat2->array;
00085 __m64 vectest,vectmp;
00086 fgUINT8 cmp;
00087
00088
00089
00090 i=dat1->length;
00091 psrc1[i]=0;psrc2[i++]=0;psrc1[i]=0;psrc2[i++]=0;
00092 psrc1[i]=0;psrc2[i++]=0;psrc1[i]=0;psrc2[i++]=0;
00093 psrc1[i]=0;psrc2[i++]=0;psrc1[i]=0;psrc2[i++]=0;
00094 psrc1[i]=0;psrc2[i++]=0;psrc1[i]=0;psrc2[i++]=0;
00095
00096 vectest = _mm_cmpeq_pi8(vec_psrc1[0], vec_psrc2[0]);
00097 for(i=1 ; i<=nbvector ; i++) {
00098 vectmp = _mm_cmpeq_pi8(vec_psrc1[i], vec_psrc2[i]);
00099 vectest = _mm_and_si64(vectest,vectmp);
00100 }
00101
00102 *((__m64*) array_cmp) = vectest;
00103
00104 cmp = (array_cmp[0] != 0);
00105
00106 for(i=1 ; i<8 ; i++) {
00107 cmp &= (array_cmp[i] != 0);
00108 }
00109
00110 if(cmp != 0)
00111 return FLGR_TRUE;
00112 else
00113 return FLGR_FALSE;
00114
00115 #else
00116
00117 POST_ERROR("SSE2 not activated\n");
00118 return FLGR_RET_NOT_IMPLEMENTED;
00119 #endif
00120 #endif
00121 }
00122
00123
00124 int flgr1d_compare_eq_fast_fgUINT16(FLGR_Data1D *dat1, FLGR_Data1D *dat2) {
00125 #ifdef __SSE2__
00126 int i, nbvector = dat1->length/8;
00127 fgUINT16 array_cmp[8] __attribute__ ((aligned (16)));
00128 fgUINT16 *psrc1 = (fgUINT16*) dat1->array;
00129 fgUINT16 *psrc2 = (fgUINT16*) dat2->array;
00130 __m128i *vec_psrc1 = (__m128i*) dat1->array;
00131 __m128i *vec_psrc2 = (__m128i*) dat2->array;
00132 __m128i vectest,vectmp;
00133 fgUINT16 cmp;
00134
00135
00136
00137 i=dat1->length;
00138 psrc1[i]=0;psrc2[i++]=0;psrc1[i]=0;psrc2[i++]=0;
00139 psrc1[i]=0;psrc2[i++]=0;psrc1[i]=0;psrc2[i++]=0;
00140 psrc1[i]=0;psrc2[i++]=0;psrc1[i]=0;psrc2[i++]=0;
00141 psrc1[i]=0;psrc2[i++]=0;psrc1[i]=0;psrc2[i++]=0;
00142
00143 vectest = _mm_cmpeq_epi16(vec_psrc1[0], vec_psrc2[0]);
00144 for(i=1 ; i<=nbvector ; i++) {
00145 vectmp = _mm_cmpeq_epi16(vec_psrc1[i], vec_psrc2[i]);
00146 vectest = _mm_and_si128(vectest,vectmp);
00147 }
00148
00149 *((__m128i*) array_cmp) = vectest;
00150
00151 cmp = (array_cmp[0] != 0);
00152
00153 for(i=1 ; i<8 ; i++) {
00154 cmp &= (array_cmp[i] != 0);
00155 }
00156
00157 if(cmp != 0)
00158 return FLGR_TRUE;
00159 else
00160 return FLGR_FALSE;
00161
00162 #else
00163 #if defined(__MMX__) && (__SSE__)
00164 int i, nbvector = dat1->length/4;
00165 fgUINT16 array_cmp[8] __attribute__ ((aligned (16)));
00166 fgUINT16 *psrc1 = (fgUINT16*) dat1->array;
00167 fgUINT16 *psrc2 = (fgUINT16*) dat2->array;
00168 __m64 *vec_psrc1 = (__m64*) dat1->array;
00169 __m64 *vec_psrc2 = (__m64*) dat2->array;
00170 __m64 vectest,vectmp;
00171 fgUINT16 cmp;
00172
00173
00174
00175 i=dat1->length;
00176 psrc1[i]=0;psrc2[i++]=0;psrc1[i]=0;psrc2[i++]=0;
00177 psrc1[i]=0;psrc2[i++]=0;psrc1[i]=0;psrc2[i++]=0;
00178
00179 vectest = _mm_cmpeq_pi16(vec_psrc1[0], vec_psrc2[0]);
00180 for(i=1 ; i<=nbvector ; i++) {
00181 vectmp = _mm_cmpeq_pi16(vec_psrc1[i], vec_psrc2[i]);
00182 vectest = _mm_and_si64(vectest,vectmp);
00183 }
00184
00185 *((__m64*) array_cmp) = vectest;
00186
00187 cmp = (array_cmp[0] != 0);
00188
00189 for(i=1 ; i<4 ; i++) {
00190 cmp &= (array_cmp[i] != 0);
00191 }
00192
00193 if(cmp != 0)
00194 return FLGR_TRUE;
00195 else
00196 return FLGR_FALSE;
00197
00198 #else
00199
00200 POST_ERROR("SSE/SSE2 not activated\n");
00201 return FLGR_RET_NOT_IMPLEMENTED;
00202 #endif
00203 #endif
00204 }
00205
00206
00207 int flgr1d_compare_eq_fast_fgUINT32(FLGR_Data1D *dat1, FLGR_Data1D *dat2) {
00208 #ifdef __SSE2__
00209 int i, nbvector = dat1->length/4;
00210 fgUINT32 array_cmp[4] __attribute__ ((aligned (16)));
00211 fgUINT32 *psrc1 = (fgUINT32*) dat1->array;
00212 fgUINT32 *psrc2 = (fgUINT32*) dat2->array;
00213 __m128i *vec_psrc1 = (__m128i*) dat1->array;
00214 __m128i *vec_psrc2 = (__m128i*) dat2->array;
00215 __m128i vectest,vectmp;
00216 fgUINT32 cmp;
00217
00218
00219
00220 i=dat1->length;
00221 psrc1[i]=0;psrc2[i++]=0;psrc1[i]=0;psrc2[i++]=0;
00222 psrc1[i]=0;psrc2[i++]=0;psrc1[i]=0;psrc2[i++]=0;
00223
00224 vectest = _mm_cmpeq_epi32(vec_psrc1[0], vec_psrc2[0]);
00225 for(i=1 ; i<=nbvector ; i++) {
00226 vectmp = _mm_cmpeq_epi32(vec_psrc1[i], vec_psrc2[i]);
00227 vectest = _mm_and_si128(vectest,vectmp);
00228 }
00229
00230 *((__m128i*) array_cmp) = vectest;
00231
00232 cmp = (array_cmp[0] != 0);
00233
00234 for(i=1 ; i<4 ; i++) {
00235 cmp &= (array_cmp[i] != 0);
00236 }
00237
00238 if(cmp != 0)
00239 return FLGR_TRUE;
00240 else
00241 return FLGR_FALSE;
00242
00243 #else
00244 #if defined(__MMX__) && (__SSE__)
00245 int i, nbvector = dat1->length/2;
00246 fgUINT32 array_cmp[8] __attribute__ ((aligned (16)));
00247 fgUINT32 *psrc1 = (fgUINT32*) dat1->array;
00248 fgUINT32 *psrc2 = (fgUINT32*) dat2->array;
00249 __m64 *vec_psrc1 = (__m64*) dat1->array;
00250 __m64 *vec_psrc2 = (__m64*) dat2->array;
00251 __m64 vectest,vectmp;
00252 fgUINT32 cmp;
00253
00254
00255
00256 i=dat1->length;
00257 psrc1[i]=0;psrc2[i++]=0;psrc1[i]=0;psrc2[i++]=0;
00258
00259 vectest = _mm_cmpeq_pi32(vec_psrc1[0], vec_psrc2[0]);
00260 for(i=1 ; i<=nbvector ; i++) {
00261 vectmp = _mm_cmpeq_pi32(vec_psrc1[i], vec_psrc2[i]);
00262 vectest = _mm_and_si64(vectest,vectmp);
00263 }
00264
00265 *((__m64*) array_cmp) = vectest;
00266
00267 cmp = (array_cmp[0] != 0);
00268
00269 for(i=1 ; i<2; i++) {
00270 cmp &= (array_cmp[i] != 0);
00271 }
00272
00273 if(cmp != 0)
00274 return FLGR_TRUE;
00275 else
00276 return FLGR_FALSE;
00277
00278 #else
00279
00280 POST_ERROR("SSE/SSE2 not activated\n");
00281 return FLGR_RET_NOT_IMPLEMENTED;
00282 #endif
00283 #endif
00284 }
00285
00286
00287
00288 int flgr1d_compare_eq_fast_fgINT8(FLGR_Data1D *dat1, FLGR_Data1D *dat2) {
00289 #ifdef __SSE2__
00290 int i, nbvector = dat1->length/16;
00291 fgINT8 array_cmp[16] __attribute__ ((aligned (16)));
00292 fgINT8 *psrc1 = (fgINT8*) dat1->array;
00293 fgINT8 *psrc2 = (fgINT8*) dat2->array;
00294 __m128i *vec_psrc1 = (__m128i*) dat1->array;
00295 __m128i *vec_psrc2 = (__m128i*) dat2->array;
00296 __m128i vectest,vectmp;
00297 fgINT8 cmp;
00298
00299
00300
00301 i=dat1->length;
00302 psrc1[i]=0;psrc2[i++]=0;psrc1[i]=0;psrc2[i++]=0;
00303 psrc1[i]=0;psrc2[i++]=0;psrc1[i]=0;psrc2[i++]=0;
00304 psrc1[i]=0;psrc2[i++]=0;psrc1[i]=0;psrc2[i++]=0;
00305 psrc1[i]=0;psrc2[i++]=0;psrc1[i]=0;psrc2[i++]=0;
00306 psrc1[i]=0;psrc2[i++]=0;psrc1[i]=0;psrc2[i++]=0;
00307 psrc1[i]=0;psrc2[i++]=0;psrc1[i]=0;psrc2[i++]=0;
00308 psrc1[i]=0;psrc2[i++]=0;psrc1[i]=0;psrc2[i++]=0;
00309 psrc1[i]=0;psrc2[i++]=0;psrc1[i]=0;psrc2[i++]=0;
00310
00311 vectest = _mm_cmpeq_epi8(vec_psrc1[0], vec_psrc2[0]);
00312 for(i=1 ; i<=nbvector ; i++) {
00313 vectmp = _mm_cmpeq_epi8(vec_psrc1[i], vec_psrc2[i]);
00314 vectest = _mm_and_si128(vectest,vectmp);
00315 }
00316
00317 *((__m128i*) array_cmp) = vectest;
00318
00319 cmp = (array_cmp[0] != 0);
00320
00321 for(i=1 ; i<16 ; i++) {
00322 cmp &= (array_cmp[i] != 0);
00323 }
00324
00325 if(cmp != 0)
00326 return FLGR_TRUE;
00327 else
00328 return FLGR_FALSE;
00329
00330 #else
00331 #if defined(__MMX__) && (__SSE__)
00332 int i, nbvector = dat1->length/8;
00333 fgINT8 array_cmp[8] __attribute__ ((aligned (16)));
00334 fgINT8 *psrc1 = (fgINT8*) dat1->array;
00335 fgINT8 *psrc2 = (fgINT8*) dat2->array;
00336 __m64 *vec_psrc1 = (__m64*) dat1->array;
00337 __m64 *vec_psrc2 = (__m64*) dat2->array;
00338 __m64 vectest,vectmp;
00339 fgINT8 cmp;
00340
00341
00342
00343 i=dat1->length;
00344 psrc1[i]=0;psrc2[i++]=0;psrc1[i]=0;psrc2[i++]=0;
00345 psrc1[i]=0;psrc2[i++]=0;psrc1[i]=0;psrc2[i++]=0;
00346 psrc1[i]=0;psrc2[i++]=0;psrc1[i]=0;psrc2[i++]=0;
00347 psrc1[i]=0;psrc2[i++]=0;psrc1[i]=0;psrc2[i++]=0;
00348
00349 vectest = _mm_cmpeq_pi8(vec_psrc1[0], vec_psrc2[0]);
00350 for(i=1 ; i<=nbvector ; i++) {
00351 vectmp = _mm_cmpeq_pi8(vec_psrc1[i], vec_psrc2[i]);
00352 vectest = _mm_and_si64(vectest,vectmp);
00353 }
00354
00355 *((__m64*) array_cmp) = vectest;
00356
00357 cmp = (array_cmp[0] != 0);
00358
00359 for(i=1 ; i<8 ; i++) {
00360 cmp &= (array_cmp[i] != 0);
00361 }
00362
00363 if(cmp != 0)
00364 return FLGR_TRUE;
00365 else
00366 return FLGR_FALSE;
00367
00368 #else
00369
00370 POST_ERROR("SSE2 not activated\n");
00371 return FLGR_RET_NOT_IMPLEMENTED;
00372 #endif
00373 #endif
00374 }
00375
00376
00377 int flgr1d_compare_eq_fast_fgINT16(FLGR_Data1D *dat1, FLGR_Data1D *dat2) {
00378 #ifdef __SSE2__
00379 int i, nbvector = dat1->length/8;
00380 fgINT16 array_cmp[8] __attribute__ ((aligned (16)));
00381 fgINT16 *psrc1 = (fgINT16*) dat1->array;
00382 fgINT16 *psrc2 = (fgINT16*) dat2->array;
00383 __m128i *vec_psrc1 = (__m128i*) dat1->array;
00384 __m128i *vec_psrc2 = (__m128i*) dat2->array;
00385 __m128i vectest,vectmp;
00386 fgINT16 cmp;
00387
00388
00389
00390 i=dat1->length;
00391 psrc1[i]=0;psrc2[i++]=0;psrc1[i]=0;psrc2[i++]=0;
00392 psrc1[i]=0;psrc2[i++]=0;psrc1[i]=0;psrc2[i++]=0;
00393 psrc1[i]=0;psrc2[i++]=0;psrc1[i]=0;psrc2[i++]=0;
00394 psrc1[i]=0;psrc2[i++]=0;psrc1[i]=0;psrc2[i++]=0;
00395
00396 vectest = _mm_cmpeq_epi16(vec_psrc1[0], vec_psrc2[0]);
00397 for(i=1 ; i<=nbvector ; i++) {
00398 vectmp = _mm_cmpeq_epi16(vec_psrc1[i], vec_psrc2[i]);
00399 vectest = _mm_and_si128(vectest,vectmp);
00400 }
00401
00402 *((__m128i*) array_cmp) = vectest;
00403
00404 cmp = (array_cmp[0] != 0);
00405
00406 for(i=1 ; i<8 ; i++) {
00407 cmp &= (array_cmp[i] != 0);
00408 }
00409
00410 if(cmp != 0)
00411 return FLGR_TRUE;
00412 else
00413 return FLGR_FALSE;
00414
00415 #else
00416 #if defined(__MMX__) && (__SSE__)
00417 int i, nbvector = dat1->length/4;
00418 fgINT16 array_cmp[8] __attribute__ ((aligned (16)));
00419 fgINT16 *psrc1 = (fgINT16*) dat1->array;
00420 fgINT16 *psrc2 = (fgINT16*) dat2->array;
00421 __m64 *vec_psrc1 = (__m64*) dat1->array;
00422 __m64 *vec_psrc2 = (__m64*) dat2->array;
00423 __m64 vectest,vectmp;
00424 fgINT16 cmp;
00425
00426
00427
00428 i=dat1->length;
00429 psrc1[i]=0;psrc2[i++]=0;psrc1[i]=0;psrc2[i++]=0;
00430 psrc1[i]=0;psrc2[i++]=0;psrc1[i]=0;psrc2[i++]=0;
00431
00432 vectest = _mm_cmpeq_pi16(vec_psrc1[0], vec_psrc2[0]);
00433 for(i=1 ; i<=nbvector ; i++) {
00434 vectmp = _mm_cmpeq_pi16(vec_psrc1[i], vec_psrc2[i]);
00435 vectest = _mm_and_si64(vectest,vectmp);
00436 }
00437
00438 *((__m64*) array_cmp) = vectest;
00439
00440 cmp = (array_cmp[0] != 0);
00441
00442 for(i=1 ; i<4 ; i++) {
00443 cmp &= (array_cmp[i] != 0);
00444 }
00445
00446 if(cmp != 0)
00447 return FLGR_TRUE;
00448 else
00449 return FLGR_FALSE;
00450 #else
00451
00452 POST_ERROR("SSE/SSE2 not activated\n");
00453 return FLGR_RET_NOT_IMPLEMENTED;
00454 #endif
00455 #endif
00456 }
00457
00458
00459 int flgr1d_compare_eq_fast_fgINT32(FLGR_Data1D *dat1, FLGR_Data1D *dat2) {
00460 #ifdef __SSE2__
00461 int i, nbvector = dat1->length/4;
00462 fgINT32 array_cmp[4] __attribute__ ((aligned (16)));
00463 fgINT32 *psrc1 = (fgINT32*) dat1->array;
00464 fgINT32 *psrc2 = (fgINT32*) dat2->array;
00465 __m128i *vec_psrc1 = (__m128i*) dat1->array;
00466 __m128i *vec_psrc2 = (__m128i*) dat2->array;
00467 __m128i vectest,vectmp;
00468 fgINT32 cmp;
00469
00470
00471
00472 i=dat1->length;
00473 psrc1[i]=0;psrc2[i++]=0;psrc1[i]=0;psrc2[i++]=0;
00474 psrc1[i]=0;psrc2[i++]=0;psrc1[i]=0;psrc2[i++]=0;
00475
00476 vectest = _mm_cmpeq_epi32(vec_psrc1[0], vec_psrc2[0]);
00477 for(i=1 ; i<=nbvector ; i++) {
00478 vectmp = _mm_cmpeq_epi32(vec_psrc1[i], vec_psrc2[i]);
00479 vectest = _mm_and_si128(vectest,vectmp);
00480 }
00481
00482 *((__m128i*) array_cmp) = vectest;
00483
00484 cmp = (array_cmp[0] != 0);
00485
00486 for(i=1 ; i<4 ; i++) {
00487 cmp &= (array_cmp[i] != 0);
00488 }
00489
00490 if(cmp != 0)
00491 return FLGR_TRUE;
00492 else
00493 return FLGR_FALSE;
00494
00495 #else
00496 #if defined(__MMX__) && (__SSE__)
00497 int i, nbvector = dat1->length/2;
00498 fgINT32 array_cmp[8] __attribute__ ((aligned (16)));
00499 fgINT32 *psrc1 = (fgINT32*) dat1->array;
00500 fgINT32 *psrc2 = (fgINT32*) dat2->array;
00501 __m64 *vec_psrc1 = (__m64*) dat1->array;
00502 __m64 *vec_psrc2 = (__m64*) dat2->array;
00503 __m64 vectest,vectmp;
00504 fgINT32 cmp;
00505
00506
00507
00508 i=dat1->length;
00509 psrc1[i]=0;psrc2[i++]=0;psrc1[i]=0;psrc2[i++]=0;
00510
00511 vectest = _mm_cmpeq_pi32(vec_psrc1[0], vec_psrc2[0]);
00512 for(i=1 ; i<=nbvector ; i++) {
00513 vectmp = _mm_cmpeq_pi32(vec_psrc1[i], vec_psrc2[i]);
00514 vectest = _mm_and_si64(vectest,vectmp);
00515 }
00516
00517 *((__m64*) array_cmp) = vectest;
00518
00519 cmp = (array_cmp[0] != 0);
00520
00521 for(i=1 ; i<2; i++) {
00522 cmp &= (array_cmp[i] != 0);
00523 }
00524
00525 if(cmp != 0)
00526 return FLGR_TRUE;
00527 else
00528 return FLGR_FALSE;
00529
00530 #else
00531
00532 POST_ERROR("SSE/SSE2 not activated\n");
00533 return FLGR_RET_NOT_IMPLEMENTED;
00534 #endif
00535 #endif
00536 }
00537
00538
00539