00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #include <stdio.h>
00022 #include <stdlib.h>
00023 #include <string.h>
00024
00025 #ifdef __SSE2__
00026 #include <emmintrin.h>
00027 #include <xmmintrin.h>
00028 #endif
00029
00030 #include "flgrCoreDispatch.h"
00031 #include "flgrCoreReplaceFast.h"
00032
00033
00034 void flgr1d_replace_I_LE_I_I_I_fast_fgUINT8(FLGR_Data1D *datout,
00035 FLGR_Data1D *dattest1, FLGR_Data1D *dattest2,
00036 FLGR_Data1D *dattrue, FLGR_Data1D *datfalse) {
00037 #ifdef __SSE2__
00038 int j;
00039 fgUINT8 *rowptr_out,*rowptr_t1,*rowptr_t2;
00040 fgUINT8 *rowptr_true, *rowptr_false;
00041
00042 __m128i ptst1, ptst2, ptrue, pfalse;
00043 __m128i mask,one;
00044
00045
00046
00047 one = _mm_set1_epi8((char) 0xFF);
00048
00049 rowptr_out = (fgUINT8 *) datout->array;
00050 rowptr_t1 = (fgUINT8 *) dattest1->array;
00051 rowptr_t2 = (fgUINT8 *) dattest2->array;
00052 rowptr_true = (fgUINT8 *) dattrue->array;
00053 rowptr_false = (fgUINT8 *) datfalse->array;
00054
00055
00056 for(j=0 ; j<datout->length ; j+=16) {
00057 ptst1 = _mm_load_si128((__m128i*) rowptr_t1);
00058 ptst2 = _mm_load_si128((__m128i*) rowptr_t2);
00059 ptrue = _mm_load_si128((__m128i*) rowptr_true);
00060 pfalse = _mm_load_si128((__m128i*) rowptr_false);
00061
00062 ptst1 = _mm_max_epu8(ptst1,ptst2);
00063 mask = _mm_cmpeq_epi8(ptst1,ptst2);
00064 _mm_maskmoveu_si128(ptrue,mask, (char*) rowptr_out);
00065
00066 mask = _mm_andnot_si128(mask,one);
00067 _mm_maskmoveu_si128(pfalse,mask, (char*) rowptr_out);
00068
00069 rowptr_false+=16;
00070 rowptr_true+=16;
00071 rowptr_t2+=16;
00072 rowptr_t1+=16;
00073 rowptr_out+=16;
00074 }
00075 #endif
00076 }
00077 void flgr1d_replace_I_EQ_I_I_I_fast_fgUINT8(FLGR_Data1D *datout,
00078 FLGR_Data1D *dattest1, FLGR_Data1D *dattest2,
00079 FLGR_Data1D *dattrue, FLGR_Data1D *datfalse) {
00080 #ifdef __SSE2__
00081 int j;
00082 fgUINT8 *rowptr_out,*rowptr_t1,*rowptr_t2;
00083 fgUINT8 *rowptr_true, *rowptr_false;
00084
00085 __m128i ptst1, ptst2, ptrue, pfalse;
00086 __m128i mask,one;
00087
00088
00089
00090 one = _mm_set1_epi8((char) 0xFF);
00091
00092 rowptr_out = (fgUINT8 *) datout->array;
00093 rowptr_t1 = (fgUINT8 *) dattest1->array;
00094 rowptr_t2 = (fgUINT8 *) dattest2->array;
00095 rowptr_true = (fgUINT8 *) dattrue->array;
00096 rowptr_false = (fgUINT8 *) datfalse->array;
00097
00098 for(j=0 ; j<datout->length ; j+=16) {
00099 ptst1 = _mm_load_si128((__m128i*) rowptr_t1);
00100 ptst2 = _mm_load_si128((__m128i*) rowptr_t2);
00101 ptrue = _mm_load_si128((__m128i*) rowptr_true);
00102 pfalse = _mm_load_si128((__m128i*) rowptr_false);
00103
00104
00105 mask = _mm_cmpeq_epi8(ptst1,ptst2);
00106 _mm_maskmoveu_si128(ptrue,mask, (char*) rowptr_out);
00107
00108 mask = _mm_andnot_si128(mask,one);
00109 _mm_maskmoveu_si128(pfalse,mask, (char*) rowptr_out);
00110
00111 rowptr_false+=16;
00112 rowptr_true+=16;
00113 rowptr_t2+=16;
00114 rowptr_t1+=16;
00115 rowptr_out+=16;
00116 }
00117 #endif
00118 }
00119 void flgr1d_replace_I_NE_I_I_I_fast_fgUINT8(FLGR_Data1D *datout,
00120 FLGR_Data1D *dattest1, FLGR_Data1D *dattest2,
00121 FLGR_Data1D *dattrue, FLGR_Data1D *datfalse) {
00122 #ifdef __SSE2__
00123 int j;
00124 fgUINT8 *rowptr_out,*rowptr_t1,*rowptr_t2;
00125 fgUINT8 *rowptr_true, *rowptr_false;
00126
00127 __m128i ptst1, ptst2, ptrue, pfalse;
00128 __m128i mask,one;
00129
00130
00131
00132 one = _mm_set1_epi8((char) 0xFF);
00133
00134 rowptr_out = (fgUINT8 *) datout->array;
00135 rowptr_t1 = (fgUINT8 *) dattest1->array;
00136 rowptr_t2 = (fgUINT8 *) dattest2->array;
00137 rowptr_true = (fgUINT8 *) dattrue->array;
00138 rowptr_false = (fgUINT8 *) datfalse->array;
00139
00140
00141 for(j=0 ; j<datout->length ; j+=16) {
00142 ptst1 = _mm_load_si128((__m128i*) rowptr_t1);
00143 ptst2 = _mm_load_si128((__m128i*) rowptr_t2);
00144 ptrue = _mm_load_si128((__m128i*) rowptr_true);
00145 pfalse = _mm_load_si128((__m128i*) rowptr_false);
00146
00147 mask = _mm_cmpeq_epi8(ptst1,ptst2);
00148 _mm_maskmoveu_si128(pfalse,mask, (char*) rowptr_out);
00149
00150 mask = _mm_andnot_si128(mask,one);
00151 _mm_maskmoveu_si128(ptrue,mask, (char*) rowptr_out);
00152
00153
00154 rowptr_false+=16;
00155 rowptr_true+=16;
00156 rowptr_t2+=16;
00157 rowptr_t1+=16;
00158 rowptr_out+=16;
00159 }
00160 #endif
00161 }
00162 void flgr1d_replace_I_GE_I_I_I_fast_fgUINT8(FLGR_Data1D *datout,
00163 FLGR_Data1D *dattest1, FLGR_Data1D *dattest2,
00164 FLGR_Data1D *dattrue, FLGR_Data1D *datfalse) {
00165 #ifdef __SSE2__
00166 int j;
00167 fgUINT8 *rowptr_out,*rowptr_t1,*rowptr_t2;
00168 fgUINT8 *rowptr_true, *rowptr_false;
00169
00170 __m128i ptst1, ptst2, ptrue, pfalse;
00171 __m128i mask,one;
00172
00173
00174
00175 one = _mm_set1_epi8((char) 0xFF);
00176
00177 rowptr_out = (fgUINT8 *) datout->array;
00178 rowptr_t1 = (fgUINT8 *) dattest1->array;
00179 rowptr_t2 = (fgUINT8 *) dattest2->array;
00180 rowptr_true = (fgUINT8 *) dattrue->array;
00181 rowptr_false = (fgUINT8 *) datfalse->array;
00182
00183
00184 for(j=0 ; j<datout->length ; j+=16) {
00185 ptst1 = _mm_load_si128((__m128i*) rowptr_t1);
00186 ptst2 = _mm_load_si128((__m128i*) rowptr_t2);
00187 ptrue = _mm_load_si128((__m128i*) rowptr_true);
00188 pfalse = _mm_load_si128((__m128i*) rowptr_false);
00189
00190 ptst1 = _mm_min_epu8(ptst1,ptst2);
00191 mask = _mm_cmpeq_epi8(ptst1,ptst2);
00192 _mm_maskmoveu_si128(ptrue,mask, (char*) rowptr_out);
00193
00194 mask = _mm_andnot_si128(mask,one);
00195 _mm_maskmoveu_si128(pfalse,mask, (char*) rowptr_out);
00196
00197
00198 rowptr_false+=16;
00199 rowptr_true+=16;
00200 rowptr_t2+=16;
00201 rowptr_t1+=16;
00202 rowptr_out+=16;
00203 }
00204 #endif
00205 }
00206
00207
00208
00209
00210
00211
00212
00213
00214 void flgr1d_replace_I_LE_I_I_C_fast_fgUINT8(FLGR_Data1D *datout,
00215 FLGR_Data1D *dattest1, FLGR_Data1D *dattest2,
00216 FLGR_Data1D *dattrue, fgUINT8 vfalse) {
00217 #ifdef __SSE2__
00218 int j;
00219 fgUINT8 *rowptr_out,*rowptr_t1,*rowptr_t2;
00220 fgUINT8 *rowptr_true;
00221
00222 __m128i ptst1, ptst2, ptrue, pfalse;
00223 __m128i mask,one;
00224
00225
00226
00227 one = _mm_set1_epi8((char) 0xFF);
00228 pfalse = _mm_set1_epi8((char) vfalse);
00229
00230 rowptr_out = (fgUINT8 *) datout->array;
00231 rowptr_t1 = (fgUINT8 *) dattest1->array;
00232 rowptr_t2 = (fgUINT8 *) dattest2->array;
00233 rowptr_true = (fgUINT8 *) dattrue->array;
00234
00235
00236 for(j=0 ; j<datout->length ; j+=16) {
00237 ptst1 = _mm_load_si128((__m128i*) rowptr_t1);
00238 ptst2 = _mm_load_si128((__m128i*) rowptr_t2);
00239 ptrue = _mm_load_si128((__m128i*) rowptr_true);
00240
00241 ptst1 = _mm_max_epu8(ptst1,ptst2);
00242 mask = _mm_cmpeq_epi8(ptst1,ptst2);
00243 _mm_maskmoveu_si128(ptrue,mask, (char*) rowptr_out);
00244
00245 mask = _mm_andnot_si128(mask,one);
00246 _mm_maskmoveu_si128(pfalse,mask, (char*) rowptr_out);
00247
00248
00249 rowptr_true+=16;
00250 rowptr_t2+=16;
00251 rowptr_t1+=16;
00252 rowptr_out+=16;
00253 }
00254 #endif
00255 }
00256 void flgr1d_replace_I_EQ_I_I_C_fast_fgUINT8(FLGR_Data1D *datout,
00257 FLGR_Data1D *dattest1, FLGR_Data1D *dattest2,
00258 FLGR_Data1D *dattrue, fgUINT8 vfalse) {
00259 #ifdef __SSE2__
00260 int j;
00261 fgUINT8 *rowptr_out,*rowptr_t1,*rowptr_t2;
00262 fgUINT8 *rowptr_true;
00263
00264 __m128i ptst1, ptst2, ptrue, pfalse;
00265 __m128i mask,one;
00266
00267
00268
00269 one = _mm_set1_epi8((char) 0xFF);
00270 pfalse = _mm_set1_epi8((char) vfalse);
00271
00272 rowptr_out = (fgUINT8 *) datout->array;
00273 rowptr_t1 = (fgUINT8 *) dattest1->array;
00274 rowptr_t2 = (fgUINT8 *) dattest2->array;
00275 rowptr_true = (fgUINT8 *) dattrue->array;
00276
00277
00278 for(j=0 ; j<datout->length ; j+=16) {
00279 ptst1 = _mm_load_si128((__m128i*) rowptr_t1);
00280 ptst2 = _mm_load_si128((__m128i*) rowptr_t2);
00281 ptrue = _mm_load_si128((__m128i*) rowptr_true);
00282
00283 mask = _mm_cmpeq_epi8(ptst1,ptst2);
00284 _mm_maskmoveu_si128(ptrue,mask, (char*) rowptr_out);
00285
00286 mask = _mm_andnot_si128(mask,one);
00287 _mm_maskmoveu_si128(pfalse,mask, (char*) rowptr_out);
00288
00289
00290 rowptr_true+=16;
00291 rowptr_t2+=16;
00292 rowptr_t1+=16;
00293 rowptr_out+=16;
00294 }
00295 #endif
00296 }
00297 void flgr1d_replace_I_NE_I_I_C_fast_fgUINT8(FLGR_Data1D *datout,
00298 FLGR_Data1D *dattest1, FLGR_Data1D *dattest2,
00299 FLGR_Data1D *dattrue, fgUINT8 vfalse) {
00300 #ifdef __SSE2__
00301 int j;
00302 fgUINT8 *rowptr_out,*rowptr_t1,*rowptr_t2;
00303 fgUINT8 *rowptr_true;
00304
00305 __m128i ptst1, ptst2, ptrue, pfalse;
00306 __m128i mask,one;
00307
00308
00309
00310 one = _mm_set1_epi8((char) 0xFF);
00311 pfalse = _mm_set1_epi8((char) vfalse);
00312
00313 rowptr_out = (fgUINT8 *) datout->array;
00314 rowptr_t1 = (fgUINT8 *) dattest1->array;
00315 rowptr_t2 = (fgUINT8 *) dattest2->array;
00316 rowptr_true = (fgUINT8 *) dattrue->array;
00317
00318
00319 for(j=0 ; j<datout->length ; j+=16) {
00320 ptst1 = _mm_load_si128((__m128i*) rowptr_t1);
00321 ptst2 = _mm_load_si128((__m128i*) rowptr_t2);
00322 ptrue = _mm_load_si128((__m128i*) rowptr_true);
00323
00324 mask = _mm_cmpeq_epi8(ptst1,ptst2);
00325 _mm_maskmoveu_si128(pfalse,mask, (char*) rowptr_out);
00326
00327 mask = _mm_andnot_si128(mask,one);
00328 _mm_maskmoveu_si128(ptrue,mask, (char*) rowptr_out);
00329
00330
00331 rowptr_true+=16;
00332 rowptr_t2+=16;
00333 rowptr_t1+=16;
00334 rowptr_out+=16;
00335 }
00336 #endif
00337 }
00338 void flgr1d_replace_I_GE_I_I_C_fast_fgUINT8(FLGR_Data1D *datout,
00339 FLGR_Data1D *dattest1, FLGR_Data1D *dattest2,
00340 FLGR_Data1D *dattrue, fgUINT8 vfalse) {
00341 #ifdef __SSE2__
00342 int j;
00343 fgUINT8 *rowptr_out,*rowptr_t1,*rowptr_t2;
00344 fgUINT8 *rowptr_true;
00345
00346 __m128i ptst1, ptst2, ptrue, pfalse;
00347 __m128i mask,one;
00348
00349
00350
00351 one = _mm_set1_epi8((char) 0xFF);
00352 pfalse = _mm_set1_epi8((char) vfalse);
00353
00354 rowptr_out = (fgUINT8 *) datout->array;
00355 rowptr_t1 = (fgUINT8 *) dattest1->array;
00356 rowptr_t2 = (fgUINT8 *) dattest2->array;
00357 rowptr_true = (fgUINT8 *) dattrue->array;
00358
00359
00360 for(j=0 ; j<datout->length ; j+=16) {
00361 ptst1 = _mm_load_si128((__m128i*) rowptr_t1);
00362 ptst2 = _mm_load_si128((__m128i*) rowptr_t2);
00363 ptrue = _mm_load_si128((__m128i*) rowptr_true);
00364
00365 ptst1 = _mm_min_epu8(ptst1,ptst2);
00366 mask = _mm_cmpeq_epi8(ptst1,ptst2);
00367 _mm_maskmoveu_si128(ptrue,mask, (char*) rowptr_out);
00368
00369 mask = _mm_andnot_si128(mask,one);
00370 _mm_maskmoveu_si128(pfalse,mask, (char*) rowptr_out);
00371
00372
00373 rowptr_true+=16;
00374 rowptr_t2+=16;
00375 rowptr_t1+=16;
00376 rowptr_out+=16;
00377 }
00378 #endif
00379 }
00380
00381
00382
00383
00384
00385
00386
00387
00388
00389
00390 void flgr1d_replace_I_LE_I_C_I_fast_fgUINT8(FLGR_Data1D *datout,
00391 FLGR_Data1D *dattest1, FLGR_Data1D *dattest2,
00392 fgUINT8 vtrue, FLGR_Data1D *datfalse) {
00393 #ifdef __SSE2__
00394 int j;
00395 fgUINT8 *rowptr_out,*rowptr_t1,*rowptr_t2;
00396 fgUINT8 *rowptr_false;
00397
00398 __m128i ptst1, ptst2, ptrue, pfalse;
00399 __m128i mask,one;
00400
00401
00402
00403 one = _mm_set1_epi8((char) 0xFF);
00404 ptrue = _mm_set1_epi8((char) vtrue);
00405
00406 rowptr_out = (fgUINT8 *) datout->array;
00407 rowptr_t1 = (fgUINT8 *) dattest1->array;
00408 rowptr_t2 = (fgUINT8 *) dattest2->array;
00409 rowptr_false = (fgUINT8 *) datfalse->array;
00410
00411
00412 for(j=0 ; j<datout->length ; j+=16) {
00413 ptst1 = _mm_load_si128((__m128i*) rowptr_t1);
00414 ptst2 = _mm_load_si128((__m128i*) rowptr_t2);
00415 pfalse = _mm_load_si128((__m128i*) rowptr_false);
00416
00417 ptst1 = _mm_max_epu8(ptst1,ptst2);
00418 mask = _mm_cmpeq_epi8(ptst1,ptst2);
00419 _mm_maskmoveu_si128(ptrue,mask, (char*) rowptr_out);
00420
00421 mask = _mm_andnot_si128(mask,one);
00422 _mm_maskmoveu_si128(pfalse,mask, (char*) rowptr_out);
00423
00424
00425 rowptr_false+=16;
00426 rowptr_t2+=16;
00427 rowptr_t1+=16;
00428 rowptr_out+=16;
00429 }
00430 #endif
00431 }
00432 void flgr1d_replace_I_EQ_I_C_I_fast_fgUINT8(FLGR_Data1D *datout,
00433 FLGR_Data1D *dattest1, FLGR_Data1D *dattest2,
00434 fgUINT8 vtrue, FLGR_Data1D *datfalse) {
00435 #ifdef __SSE2__
00436 int j;
00437 fgUINT8 *rowptr_out,*rowptr_t1,*rowptr_t2;
00438 fgUINT8 *rowptr_false;
00439
00440 __m128i ptst1, ptst2, ptrue, pfalse;
00441 __m128i mask,one;
00442
00443
00444
00445 one = _mm_set1_epi8((char) 0xFF);
00446 ptrue = _mm_set1_epi8((char) vtrue);
00447
00448 rowptr_out = (fgUINT8 *) datout->array;
00449 rowptr_t1 = (fgUINT8 *) dattest1->array;
00450 rowptr_t2 = (fgUINT8 *) dattest2->array;
00451 rowptr_false = (fgUINT8 *) datfalse->array;
00452
00453
00454 for(j=0 ; j<datout->length ; j+=16) {
00455 ptst1 = _mm_load_si128((__m128i*) rowptr_t1);
00456 ptst2 = _mm_load_si128((__m128i*) rowptr_t2);
00457 pfalse = _mm_load_si128((__m128i*) rowptr_false);
00458
00459 mask = _mm_cmpeq_epi8(ptst1,ptst2);
00460 _mm_maskmoveu_si128(ptrue,mask, (char*) rowptr_out);
00461
00462 mask = _mm_andnot_si128(mask,one);
00463 _mm_maskmoveu_si128(pfalse,mask, (char*) rowptr_out);
00464
00465
00466 rowptr_false+=16;
00467 rowptr_t2+=16;
00468 rowptr_t1+=16;
00469 rowptr_out+=16;
00470 }
00471 #endif
00472 }
00473 void flgr1d_replace_I_NE_I_C_I_fast_fgUINT8(FLGR_Data1D *datout,
00474 FLGR_Data1D *dattest1, FLGR_Data1D *dattest2,
00475 fgUINT8 vtrue, FLGR_Data1D *datfalse) {
00476 #ifdef __SSE2__
00477 int j;
00478 fgUINT8 *rowptr_out,*rowptr_t1,*rowptr_t2;
00479 fgUINT8 *rowptr_false;
00480
00481 __m128i ptst1, ptst2, ptrue, pfalse;
00482 __m128i mask,one;
00483
00484
00485
00486 one = _mm_set1_epi8((char) 0xFF);
00487 ptrue = _mm_set1_epi8((char) vtrue);
00488
00489 rowptr_out = (fgUINT8 *) datout->array;
00490 rowptr_t1 = (fgUINT8 *) dattest1->array;
00491 rowptr_t2 = (fgUINT8 *) dattest2->array;
00492 rowptr_false = (fgUINT8 *) datfalse->array;
00493
00494
00495 for(j=0 ; j<datout->length ; j+=16) {
00496 ptst1 = _mm_load_si128((__m128i*) rowptr_t1);
00497 ptst2 = _mm_load_si128((__m128i*) rowptr_t2);
00498 pfalse = _mm_load_si128((__m128i*) rowptr_false);
00499
00500 mask = _mm_cmpeq_epi8(ptst1,ptst2);
00501 _mm_maskmoveu_si128(pfalse,mask, (char*) rowptr_out);
00502
00503 mask = _mm_andnot_si128(mask,one);
00504 _mm_maskmoveu_si128(ptrue,mask, (char*) rowptr_out);
00505
00506
00507 rowptr_false+=16;
00508 rowptr_t2+=16;
00509 rowptr_t1+=16;
00510 rowptr_out+=16;
00511 }
00512 #endif
00513 }
00514 void flgr1d_replace_I_GE_I_C_I_fast_fgUINT8(FLGR_Data1D *datout,
00515 FLGR_Data1D *dattest1, FLGR_Data1D *dattest2,
00516 fgUINT8 vtrue, FLGR_Data1D *datfalse) {
00517 #ifdef __SSE2__
00518 int j;
00519 fgUINT8 *rowptr_out,*rowptr_t1,*rowptr_t2;
00520 fgUINT8 *rowptr_false;
00521
00522 __m128i ptst1, ptst2, ptrue, pfalse;
00523 __m128i mask,one;
00524
00525
00526
00527 one = _mm_set1_epi8((char) 0xFF);
00528 ptrue = _mm_set1_epi8((char) vtrue);
00529
00530 rowptr_out = (fgUINT8 *) datout->array;
00531 rowptr_t1 = (fgUINT8 *) dattest1->array;
00532 rowptr_t2 = (fgUINT8 *) dattest2->array;
00533 rowptr_false = (fgUINT8 *) datfalse->array;
00534
00535
00536 for(j=0 ; j<datout->length ; j+=16) {
00537 ptst1 = _mm_load_si128((__m128i*) rowptr_t1);
00538 ptst2 = _mm_load_si128((__m128i*) rowptr_t2);
00539 pfalse = _mm_load_si128((__m128i*) rowptr_false);
00540
00541 ptst1 = _mm_min_epu8(ptst1,ptst2);
00542 mask = _mm_cmpeq_epi8(ptst1,ptst2);
00543 _mm_maskmoveu_si128(ptrue,mask, (char*) rowptr_out);
00544
00545 mask = _mm_andnot_si128(mask,one);
00546 _mm_maskmoveu_si128(pfalse,mask, (char*) rowptr_out);
00547
00548
00549 rowptr_false+=16;
00550 rowptr_t2+=16;
00551 rowptr_t1+=16;
00552 rowptr_out+=16;
00553 }
00554 #endif
00555 }
00556
00557
00558
00559
00560
00561
00562
00563
00564
00565 void flgr1d_replace_I_LE_I_C_C_fast_fgUINT8(FLGR_Data1D *datout,
00566 FLGR_Data1D *dattest1, FLGR_Data1D *dattest2,
00567 fgUINT8 vtrue, fgUINT8 vfalse) {
00568 #ifdef __SSE2__
00569 int j;
00570 fgUINT8 *rowptr_out,*rowptr_t1,*rowptr_t2;
00571
00572 __m128i ptst1, ptst2, ptrue, pfalse;
00573 __m128i mask,one;
00574
00575
00576
00577 one = _mm_set1_epi8((char) 0xFF);
00578 ptrue = _mm_set1_epi8((char) vtrue);
00579 pfalse = _mm_set1_epi8((char) vfalse);
00580
00581 rowptr_out = (fgUINT8 *) datout->array;
00582 rowptr_t1 = (fgUINT8 *) dattest1->array;
00583 rowptr_t2 = (fgUINT8 *) dattest2->array;
00584
00585
00586 for(j=0 ; j<datout->length ; j+=16) {
00587 ptst1 = _mm_load_si128((__m128i*) rowptr_t1);
00588 ptst2 = _mm_load_si128((__m128i*) rowptr_t2);
00589
00590 ptst1 = _mm_max_epu8(ptst1,ptst2);
00591 mask = _mm_cmpeq_epi8(ptst1,ptst2);
00592 _mm_maskmoveu_si128(ptrue,mask, (char*) rowptr_out);
00593
00594 mask = _mm_andnot_si128(mask,one);
00595 _mm_maskmoveu_si128(pfalse,mask, (char*) rowptr_out);
00596
00597
00598 rowptr_t2+=16;
00599 rowptr_t1+=16;
00600 rowptr_out+=16;
00601 }
00602 #endif
00603 }
00604 void flgr1d_replace_I_EQ_I_C_C_fast_fgUINT8(FLGR_Data1D *datout,
00605 FLGR_Data1D *dattest1, FLGR_Data1D *dattest2,
00606 fgUINT8 vtrue, fgUINT8 vfalse) {
00607 #ifdef __SSE2__
00608 int j;
00609 fgUINT8 *rowptr_out,*rowptr_t1,*rowptr_t2;
00610
00611 __m128i ptst1, ptst2, ptrue, pfalse;
00612 __m128i mask,one;
00613
00614
00615
00616 one = _mm_set1_epi8((char) 0xFF);
00617 ptrue = _mm_set1_epi8((char) vtrue);
00618 pfalse = _mm_set1_epi8((char) vfalse);
00619
00620 rowptr_out = (fgUINT8 *) datout->array;
00621 rowptr_t1 = (fgUINT8 *) dattest1->array;
00622 rowptr_t2 = (fgUINT8 *) dattest2->array;
00623
00624
00625 for(j=0 ; j<datout->length ; j+=16) {
00626 ptst1 = _mm_load_si128((__m128i*) rowptr_t1);
00627 ptst2 = _mm_load_si128((__m128i*) rowptr_t2);
00628
00629 mask = _mm_cmpeq_epi8(ptst1,ptst2);
00630 _mm_maskmoveu_si128(ptrue,mask, (char*) rowptr_out);
00631
00632 mask = _mm_andnot_si128(mask,one);
00633 _mm_maskmoveu_si128(pfalse,mask, (char*) rowptr_out);
00634
00635
00636 rowptr_t2+=16;
00637 rowptr_t1+=16;
00638 rowptr_out+=16;
00639 }
00640 #endif
00641 }
00642 void flgr1d_replace_I_NE_I_C_C_fast_fgUINT8(FLGR_Data1D *datout,
00643 FLGR_Data1D *dattest1, FLGR_Data1D *dattest2,
00644 fgUINT8 vtrue, fgUINT8 vfalse) {
00645 #ifdef __SSE2__
00646 int j;
00647 fgUINT8 *rowptr_out,*rowptr_t1,*rowptr_t2;
00648
00649 __m128i ptst1, ptst2, ptrue, pfalse;
00650 __m128i mask,one;
00651
00652
00653
00654 one = _mm_set1_epi8((char) 0xFF);
00655 ptrue = _mm_set1_epi8((char) vtrue);
00656 pfalse = _mm_set1_epi8((char) vfalse);
00657
00658 rowptr_out = (fgUINT8 *) datout->array;
00659 rowptr_t1 = (fgUINT8 *) dattest1->array;
00660 rowptr_t2 = (fgUINT8 *) dattest2->array;
00661
00662
00663 for(j=0 ; j<datout->length ; j+=16) {
00664 ptst1 = _mm_load_si128((__m128i*) rowptr_t1);
00665 ptst2 = _mm_load_si128((__m128i*) rowptr_t2);
00666
00667 mask = _mm_cmpeq_epi8(ptst1,ptst2);
00668 _mm_maskmoveu_si128(pfalse,mask, (char*) rowptr_out);
00669
00670 mask = _mm_andnot_si128(mask,one);
00671 _mm_maskmoveu_si128(ptrue,mask, (char*) rowptr_out);
00672
00673
00674 rowptr_t2+=16;
00675 rowptr_t1+=16;
00676 rowptr_out+=16;
00677 }
00678 #endif
00679 }
00680 void flgr1d_replace_I_GE_I_C_C_fast_fgUINT8(FLGR_Data1D *datout,
00681 FLGR_Data1D *dattest1, FLGR_Data1D *dattest2,
00682 fgUINT8 vtrue, fgUINT8 vfalse) {
00683 #ifdef __SSE2__
00684 int j;
00685 fgUINT8 *rowptr_out,*rowptr_t1,*rowptr_t2;
00686
00687 __m128i ptst1, ptst2, ptrue, pfalse;
00688 __m128i mask,one;
00689
00690
00691
00692 one = _mm_set1_epi8((char) 0xFF);
00693 ptrue = _mm_set1_epi8((char) vtrue);
00694 pfalse = _mm_set1_epi8((char) vfalse);
00695
00696 rowptr_out = (fgUINT8 *) datout->array;
00697 rowptr_t1 = (fgUINT8 *) dattest1->array;
00698 rowptr_t2 = (fgUINT8 *) dattest2->array;
00699
00700
00701 for(j=0 ; j<datout->length ; j+=16) {
00702 ptst1 = _mm_load_si128((__m128i*) rowptr_t1);
00703 ptst2 = _mm_load_si128((__m128i*) rowptr_t2);
00704
00705 ptst1 = _mm_min_epu8(ptst1,ptst2);
00706 mask = _mm_cmpeq_epi8(ptst1,ptst2);
00707 _mm_maskmoveu_si128(ptrue,mask, (char*) rowptr_out);
00708
00709 mask = _mm_andnot_si128(mask,one);
00710 _mm_maskmoveu_si128(pfalse,mask, (char*) rowptr_out);
00711
00712
00713 rowptr_t2+=16;
00714 rowptr_t1+=16;
00715 rowptr_out+=16;
00716 }
00717 #endif
00718 }
00719
00720
00721
00722
00723
00724
00725
00726
00727
00728
00729
00730 void flgr1d_replace_I_LE_C_I_I_fast_fgUINT8(FLGR_Data1D *datout,
00731 FLGR_Data1D *dattest1,fgUINT8 vtest2,
00732 FLGR_Data1D *dattrue, FLGR_Data1D *datfalse) {
00733 #ifdef __SSE2__
00734 int j;
00735 fgUINT8 *rowptr_out,*rowptr_t1;
00736 fgUINT8 *rowptr_true, *rowptr_false;
00737
00738 __m128i ptst1, ptst2, ptrue, pfalse;
00739 __m128i mask,one;
00740
00741
00742
00743 one = _mm_set1_epi8((char) 0xFF);
00744 ptst2 = _mm_set1_epi8((char) vtest2);
00745
00746 rowptr_out = (fgUINT8 *) datout->array;
00747 rowptr_t1 = (fgUINT8 *) dattest1->array;
00748 rowptr_true = (fgUINT8 *) dattrue->array;
00749 rowptr_false = (fgUINT8 *) datfalse->array;
00750
00751
00752 for(j=0 ; j<datout->length ; j+=16) {
00753 ptst1 = _mm_load_si128((__m128i*) rowptr_t1);
00754 ptrue = _mm_load_si128((__m128i*) rowptr_true);
00755 pfalse = _mm_load_si128((__m128i*) rowptr_false);
00756
00757 ptst1 = _mm_max_epu8(ptst1,ptst2);
00758 mask = _mm_cmpeq_epi8(ptst1,ptst2);
00759 _mm_maskmoveu_si128(ptrue,mask, (char*) rowptr_out);
00760
00761 mask = _mm_andnot_si128(mask,one);
00762 _mm_maskmoveu_si128(pfalse,mask, (char*) rowptr_out);
00763
00764
00765 rowptr_false+=16;
00766 rowptr_true+=16;
00767 rowptr_t1+=16;
00768 rowptr_out+=16;
00769 }
00770 #endif
00771 }
00772 void flgr1d_replace_I_EQ_C_I_I_fast_fgUINT8(FLGR_Data1D *datout,
00773 FLGR_Data1D *dattest1,fgUINT8 vtest2,
00774 FLGR_Data1D *dattrue, FLGR_Data1D *datfalse) {
00775 #ifdef __SSE2__
00776 int j;
00777 fgUINT8 *rowptr_out,*rowptr_t1;
00778 fgUINT8 *rowptr_true, *rowptr_false;
00779
00780 __m128i ptst1, ptst2, ptrue, pfalse;
00781 __m128i mask,one;
00782
00783
00784
00785 one = _mm_set1_epi8((char) 0xFF);
00786 ptst2 = _mm_set1_epi8((char) vtest2);
00787
00788 rowptr_out = (fgUINT8 *) datout->array;
00789 rowptr_t1 = (fgUINT8 *) dattest1->array;
00790 rowptr_true = (fgUINT8 *) dattrue->array;
00791 rowptr_false = (fgUINT8 *) datfalse->array;
00792
00793
00794 for(j=0 ; j<datout->length ; j+=16) {
00795 ptst1 = _mm_load_si128((__m128i*) rowptr_t1);
00796 ptrue = _mm_load_si128((__m128i*) rowptr_true);
00797 pfalse = _mm_load_si128((__m128i*) rowptr_false);
00798
00799 mask = _mm_cmpeq_epi8(ptst1,ptst2);
00800 _mm_maskmoveu_si128(ptrue,mask, (char*) rowptr_out);
00801
00802 mask = _mm_andnot_si128(mask,one);
00803 _mm_maskmoveu_si128(pfalse,mask, (char*) rowptr_out);
00804
00805
00806 rowptr_false+=16;
00807 rowptr_true+=16;
00808 rowptr_t1+=16;
00809 rowptr_out+=16;
00810 }
00811 #endif
00812 }
00813 void flgr1d_replace_I_NE_C_I_I_fast_fgUINT8(FLGR_Data1D *datout,
00814 FLGR_Data1D *dattest1,fgUINT8 vtest2,
00815 FLGR_Data1D *dattrue, FLGR_Data1D *datfalse) {
00816 #ifdef __SSE2__
00817 int j;
00818 fgUINT8 *rowptr_out,*rowptr_t1;
00819 fgUINT8 *rowptr_true, *rowptr_false;
00820
00821 __m128i ptst1, ptst2, ptrue, pfalse;
00822 __m128i mask,one;
00823
00824
00825
00826 one = _mm_set1_epi8((char) 0xFF);
00827 ptst2 = _mm_set1_epi8((char) vtest2);
00828
00829 rowptr_out = (fgUINT8 *) datout->array;
00830 rowptr_t1 = (fgUINT8 *) dattest1->array;
00831 rowptr_true = (fgUINT8 *) dattrue->array;
00832 rowptr_false = (fgUINT8 *) datfalse->array;
00833
00834
00835 for(j=0 ; j<datout->length ; j+=16) {
00836 ptst1 = _mm_load_si128((__m128i*) rowptr_t1);
00837 ptrue = _mm_load_si128((__m128i*) rowptr_true);
00838 pfalse = _mm_load_si128((__m128i*) rowptr_false);
00839
00840 mask = _mm_cmpeq_epi8(ptst1,ptst2);
00841 _mm_maskmoveu_si128(pfalse,mask, (char*) rowptr_out);
00842
00843 mask = _mm_andnot_si128(mask,one);
00844 _mm_maskmoveu_si128(ptrue,mask, (char*) rowptr_out);
00845
00846
00847 rowptr_false+=16;
00848 rowptr_true+=16;
00849 rowptr_t1+=16;
00850 rowptr_out+=16;
00851 }
00852 #endif
00853 }
00854 void flgr1d_replace_I_GE_C_I_I_fast_fgUINT8(FLGR_Data1D *datout,
00855 FLGR_Data1D *dattest1,fgUINT8 vtest2,
00856 FLGR_Data1D *dattrue, FLGR_Data1D *datfalse) {
00857 #ifdef __SSE2__
00858 int j;
00859 fgUINT8 *rowptr_out,*rowptr_t1;
00860 fgUINT8 *rowptr_true, *rowptr_false;
00861
00862 __m128i ptst1, ptst2, ptrue, pfalse;
00863 __m128i mask,one;
00864
00865
00866
00867 one = _mm_set1_epi8((char) 0xFF);
00868 ptst2 = _mm_set1_epi8((char) vtest2);
00869
00870 rowptr_out = (fgUINT8 *) datout->array;
00871 rowptr_t1 = (fgUINT8 *) dattest1->array;
00872 rowptr_true = (fgUINT8 *) dattrue->array;
00873 rowptr_false = (fgUINT8 *) datfalse->array;
00874
00875
00876 for(j=0 ; j<datout->length ; j+=16) {
00877 ptst1 = _mm_load_si128((__m128i*) rowptr_t1);
00878 ptrue = _mm_load_si128((__m128i*) rowptr_true);
00879 pfalse = _mm_load_si128((__m128i*) rowptr_false);
00880
00881 ptst1 = _mm_min_epu8(ptst1,ptst2);
00882 mask = _mm_cmpeq_epi8(ptst1,ptst2);
00883 _mm_maskmoveu_si128(ptrue,mask, (char*) rowptr_out);
00884
00885 mask = _mm_andnot_si128(mask,one);
00886 _mm_maskmoveu_si128(pfalse,mask, (char*) rowptr_out);
00887
00888
00889 rowptr_false+=16;
00890 rowptr_true+=16;
00891 rowptr_t1+=16;
00892 rowptr_out+=16;
00893 }
00894 #endif
00895 }
00896
00897
00898
00899
00900
00901
00902
00903
00904 void flgr1d_replace_I_LE_C_I_C_fast_fgUINT8(FLGR_Data1D *datout,
00905 FLGR_Data1D *dattest1,fgUINT8 vtest2,
00906 FLGR_Data1D *dattrue, fgUINT8 vfalse) {
00907 #ifdef __SSE2__
00908 int j;
00909 fgUINT8 *rowptr_out,*rowptr_t1;
00910 fgUINT8 *rowptr_true;
00911
00912 __m128i ptst1, ptst2, ptrue, pfalse;
00913 __m128i mask,one;
00914
00915
00916
00917 one = _mm_set1_epi8((char) 0xFF);
00918
00919 ptst2 = _mm_set1_epi8((char) vtest2);
00920 pfalse = _mm_set1_epi8((char) vfalse);
00921
00922 rowptr_out = (fgUINT8 *) datout->array;
00923 rowptr_t1 = (fgUINT8 *) dattest1->array;
00924 rowptr_true = (fgUINT8 *) dattrue->array;
00925
00926
00927 for(j=0 ; j<datout->length ; j+=16) {
00928 ptst1 = _mm_load_si128((__m128i*) rowptr_t1);
00929 ptrue = _mm_load_si128((__m128i*) rowptr_true);
00930
00931 ptst1 = _mm_max_epu8(ptst1,ptst2);
00932 mask = _mm_cmpeq_epi8(ptst1,ptst2);
00933 _mm_maskmoveu_si128(ptrue,mask, (char*) rowptr_out);
00934
00935 mask = _mm_andnot_si128(mask,one);
00936 _mm_maskmoveu_si128(pfalse,mask, (char*) rowptr_out);
00937
00938
00939 rowptr_true+=16;
00940 rowptr_t1+=16;
00941 rowptr_out+=16;
00942 }
00943 #endif
00944 }
00945 void flgr1d_replace_I_EQ_C_I_C_fast_fgUINT8(FLGR_Data1D *datout,
00946 FLGR_Data1D *dattest1,fgUINT8 vtest2,
00947 FLGR_Data1D *dattrue, fgUINT8 vfalse) {
00948 #ifdef __SSE2__
00949 int j;
00950 fgUINT8 *rowptr_out,*rowptr_t1;
00951 fgUINT8 *rowptr_true;
00952
00953 __m128i ptst1, ptst2, ptrue, pfalse;
00954 __m128i mask,one;
00955
00956
00957
00958 one = _mm_set1_epi8((char) 0xFF);
00959
00960 ptst2 = _mm_set1_epi8((char) vtest2);
00961 pfalse = _mm_set1_epi8((char) vfalse);
00962
00963 rowptr_out = (fgUINT8 *) datout->array;
00964 rowptr_t1 = (fgUINT8 *) dattest1->array;
00965 rowptr_true = (fgUINT8 *) dattrue->array;
00966
00967
00968 for(j=0 ; j<datout->length ; j+=16) {
00969 ptst1 = _mm_load_si128((__m128i*) rowptr_t1);
00970 ptrue = _mm_load_si128((__m128i*) rowptr_true);
00971
00972 mask = _mm_cmpeq_epi8(ptst1,ptst2);
00973 _mm_maskmoveu_si128(ptrue,mask, (char*) rowptr_out);
00974
00975 mask = _mm_andnot_si128(mask,one);
00976 _mm_maskmoveu_si128(pfalse,mask, (char*) rowptr_out);
00977
00978
00979 rowptr_true+=16;
00980 rowptr_t1+=16;
00981 rowptr_out+=16;
00982 }
00983 #endif
00984 }
00985 void flgr1d_replace_I_NE_C_I_C_fast_fgUINT8(FLGR_Data1D *datout,
00986 FLGR_Data1D *dattest1,fgUINT8 vtest2,
00987 FLGR_Data1D *dattrue, fgUINT8 vfalse) {
00988 #ifdef __SSE2__
00989 int j;
00990 fgUINT8 *rowptr_out,*rowptr_t1;
00991 fgUINT8 *rowptr_true;
00992
00993 __m128i ptst1, ptst2, ptrue, pfalse;
00994 __m128i mask,one;
00995
00996
00997
00998 one = _mm_set1_epi8((char) 0xFF);
00999
01000 ptst2 = _mm_set1_epi8((char) vtest2);
01001 pfalse = _mm_set1_epi8((char) vfalse);
01002
01003 rowptr_out = (fgUINT8 *) datout->array;
01004 rowptr_t1 = (fgUINT8 *) dattest1->array;
01005 rowptr_true = (fgUINT8 *) dattrue->array;
01006
01007
01008 for(j=0 ; j<datout->length ; j+=16) {
01009 ptst1 = _mm_load_si128((__m128i*) rowptr_t1);
01010 ptrue = _mm_load_si128((__m128i*) rowptr_true);
01011
01012 mask = _mm_cmpeq_epi8(ptst1,ptst2);
01013 _mm_maskmoveu_si128(pfalse,mask, (char*) rowptr_out);
01014
01015 mask = _mm_andnot_si128(mask,one);
01016 _mm_maskmoveu_si128(ptrue,mask, (char*) rowptr_out);
01017
01018
01019 rowptr_true+=16;
01020 rowptr_t1+=16;
01021 rowptr_out+=16;
01022 }
01023 #endif
01024 }
01025 void flgr1d_replace_I_GE_C_I_C_fast_fgUINT8(FLGR_Data1D *datout,
01026 FLGR_Data1D *dattest1,fgUINT8 vtest2,
01027 FLGR_Data1D *dattrue, fgUINT8 vfalse) {
01028 #ifdef __SSE2__
01029 int j;
01030 fgUINT8 *rowptr_out,*rowptr_t1;
01031 fgUINT8 *rowptr_true;
01032
01033 __m128i ptst1, ptst2, ptrue, pfalse;
01034 __m128i mask,one;
01035
01036
01037
01038 one = _mm_set1_epi8((char) 0xFF);
01039
01040 ptst2 = _mm_set1_epi8((char) vtest2);
01041 pfalse = _mm_set1_epi8((char) vfalse);
01042
01043 rowptr_out = (fgUINT8 *) datout->array;
01044 rowptr_t1 = (fgUINT8 *) dattest1->array;
01045 rowptr_true = (fgUINT8 *) dattrue->array;
01046
01047
01048 for(j=0 ; j<datout->length ; j+=16) {
01049 ptst1 = _mm_load_si128((__m128i*) rowptr_t1);
01050 ptrue = _mm_load_si128((__m128i*) rowptr_true);
01051
01052 ptst1 = _mm_min_epu8(ptst1,ptst2);
01053 mask = _mm_cmpeq_epi8(ptst1,ptst2);
01054 _mm_maskmoveu_si128(ptrue,mask, (char*) rowptr_out);
01055
01056 mask = _mm_andnot_si128(mask,one);
01057 _mm_maskmoveu_si128(pfalse,mask, (char*) rowptr_out);
01058
01059
01060 rowptr_true+=16;
01061 rowptr_t1+=16;
01062 rowptr_out+=16;
01063 }
01064 #endif
01065 }
01066
01067
01068
01069
01070
01071
01072
01073
01074
01075
01076 void flgr1d_replace_I_LE_C_C_I_fast_fgUINT8(FLGR_Data1D *datout,
01077 FLGR_Data1D *dattest1,fgUINT8 vtest2,
01078 fgUINT8 vtrue, FLGR_Data1D *datfalse) {
01079 #ifdef __SSE2__
01080 int j;
01081 fgUINT8 *rowptr_out,*rowptr_t1;
01082 fgUINT8 *rowptr_false;
01083
01084 __m128i ptst1, ptst2, ptrue, pfalse;
01085 __m128i mask,one;
01086
01087
01088
01089 one = _mm_set1_epi8((char) 0xFF);
01090
01091 ptst2 = _mm_set1_epi8((char) vtest2);
01092 ptrue = _mm_set1_epi8((char) vtrue);
01093
01094 rowptr_out = (fgUINT8 *) datout->array;
01095 rowptr_t1 = (fgUINT8 *) dattest1->array;
01096 rowptr_false = (fgUINT8 *) datfalse->array;
01097
01098
01099 for(j=0 ; j<datout->length ; j+=16) {
01100 ptst1 = _mm_load_si128((__m128i*) rowptr_t1);
01101 pfalse = _mm_load_si128((__m128i*) rowptr_false);
01102
01103 ptst1 = _mm_max_epu8(ptst1,ptst2);
01104 mask = _mm_cmpeq_epi8(ptst1,ptst2);
01105 _mm_maskmoveu_si128(ptrue,mask, (char*) rowptr_out);
01106
01107 mask = _mm_andnot_si128(mask,one);
01108 _mm_maskmoveu_si128(pfalse,mask, (char*) rowptr_out);
01109
01110
01111 rowptr_false+=16;
01112 rowptr_t1+=16;
01113 rowptr_out+=16;
01114 }
01115 #endif
01116 }
01117 void flgr1d_replace_I_EQ_C_C_I_fast_fgUINT8(FLGR_Data1D *datout,
01118 FLGR_Data1D *dattest1,fgUINT8 vtest2,
01119 fgUINT8 vtrue, FLGR_Data1D *datfalse) {
01120 #ifdef __SSE2__
01121 int j;
01122 fgUINT8 *rowptr_out,*rowptr_t1;
01123 fgUINT8 *rowptr_false;
01124
01125 __m128i ptst1, ptst2, ptrue, pfalse;
01126 __m128i mask,one;
01127
01128
01129
01130 one = _mm_set1_epi8((char) 0xFF);
01131
01132 ptst2 = _mm_set1_epi8((char) vtest2);
01133 ptrue = _mm_set1_epi8((char) vtrue);
01134
01135 rowptr_out = (fgUINT8 *) datout->array;
01136 rowptr_t1 = (fgUINT8 *) dattest1->array;
01137 rowptr_false = (fgUINT8 *) datfalse->array;
01138
01139
01140 for(j=0 ; j<datout->length ; j+=16) {
01141 ptst1 = _mm_load_si128((__m128i*) rowptr_t1);
01142 pfalse = _mm_load_si128((__m128i*) rowptr_false);
01143
01144 mask = _mm_cmpeq_epi8(ptst1,ptst2);
01145 _mm_maskmoveu_si128(ptrue,mask, (char*) rowptr_out);
01146
01147 mask = _mm_andnot_si128(mask,one);
01148 _mm_maskmoveu_si128(pfalse,mask, (char*) rowptr_out);
01149
01150
01151 rowptr_false+=16;
01152 rowptr_t1+=16;
01153 rowptr_out+=16;
01154 }
01155 #endif
01156 }
01157 void flgr1d_replace_I_NE_C_C_I_fast_fgUINT8(FLGR_Data1D *datout,
01158 FLGR_Data1D *dattest1,fgUINT8 vtest2,
01159 fgUINT8 vtrue, FLGR_Data1D *datfalse) {
01160 #ifdef __SSE2__
01161 int j;
01162 fgUINT8 *rowptr_out,*rowptr_t1;
01163 fgUINT8 *rowptr_false;
01164
01165 __m128i ptst1, ptst2, ptrue, pfalse;
01166 __m128i mask,one;
01167
01168
01169
01170 one = _mm_set1_epi8((char) 0xFF);
01171
01172 ptst2 = _mm_set1_epi8((char) vtest2);
01173 ptrue = _mm_set1_epi8((char) vtrue);
01174
01175 rowptr_out = (fgUINT8 *) datout->array;
01176 rowptr_t1 = (fgUINT8 *) dattest1->array;
01177 rowptr_false = (fgUINT8 *) datfalse->array;
01178
01179
01180 for(j=0 ; j<datout->length ; j+=16) {
01181 ptst1 = _mm_load_si128((__m128i*) rowptr_t1);
01182 pfalse = _mm_load_si128((__m128i*) rowptr_false);
01183
01184 mask = _mm_cmpeq_epi8(ptst1,ptst2);
01185 _mm_maskmoveu_si128(pfalse,mask, (char*) rowptr_out);
01186
01187 mask = _mm_andnot_si128(mask,one);
01188 _mm_maskmoveu_si128(ptrue,mask, (char*) rowptr_out);
01189
01190
01191 rowptr_false+=16;
01192 rowptr_t1+=16;
01193 rowptr_out+=16;
01194 }
01195 #endif
01196 }
01197 void flgr1d_replace_I_GE_C_C_I_fast_fgUINT8(FLGR_Data1D *datout,
01198 FLGR_Data1D *dattest1,fgUINT8 vtest2,
01199 fgUINT8 vtrue, FLGR_Data1D *datfalse) {
01200 #ifdef __SSE2__
01201 int j;
01202 fgUINT8 *rowptr_out,*rowptr_t1;
01203 fgUINT8 *rowptr_false;
01204
01205 __m128i ptst1, ptst2, ptrue, pfalse;
01206 __m128i mask,one;
01207
01208
01209
01210 one = _mm_set1_epi8((char) 0xFF);
01211
01212 ptst2 = _mm_set1_epi8((char) vtest2);
01213 ptrue = _mm_set1_epi8((char) vtrue);
01214
01215 rowptr_out = (fgUINT8 *) datout->array;
01216 rowptr_t1 = (fgUINT8 *) dattest1->array;
01217 rowptr_false = (fgUINT8 *) datfalse->array;
01218
01219
01220 for(j=0 ; j<datout->length ; j+=16) {
01221 ptst1 = _mm_load_si128((__m128i*) rowptr_t1);
01222 pfalse = _mm_load_si128((__m128i*) rowptr_false);
01223
01224 ptst1 = _mm_min_epu8(ptst1,ptst2);
01225 mask = _mm_cmpeq_epi8(ptst1,ptst2);
01226 _mm_maskmoveu_si128(ptrue,mask, (char*) rowptr_out);
01227
01228 mask = _mm_andnot_si128(mask,one);
01229 _mm_maskmoveu_si128(pfalse,mask, (char*) rowptr_out);
01230
01231
01232 rowptr_false+=16;
01233 rowptr_t1+=16;
01234 rowptr_out+=16;
01235 }
01236 #endif
01237 }
01238
01239
01240
01241
01242
01243
01244
01245
01246
01247 void flgr1d_replace_I_LE_C_C_C_fast_fgUINT8(FLGR_Data1D *datout,
01248 FLGR_Data1D *dattest1,fgUINT8 vtest2,
01249 fgUINT8 vtrue, fgUINT8 vfalse) {
01250 #ifdef __SSE2__
01251 int j;
01252 fgUINT8 *rowptr_out,*rowptr_t1;
01253
01254 __m128i ptst1, ptst2, ptrue, pfalse;
01255 __m128i mask,one;
01256
01257
01258
01259 one = _mm_set1_epi8((char) 0xFF);
01260
01261 ptst2 = _mm_set1_epi8((char) vtest2);
01262 ptrue = _mm_set1_epi8((char) vtrue);
01263 pfalse = _mm_set1_epi8((char) vfalse);
01264
01265 rowptr_out = (fgUINT8 *) datout->array;
01266 rowptr_t1 = (fgUINT8 *) dattest1->array;
01267
01268
01269 for(j=0 ; j<datout->length ; j+=16) {
01270 ptst1 = _mm_load_si128((__m128i*) rowptr_t1);
01271
01272 ptst1 = _mm_max_epu8(ptst1,ptst2);
01273 mask = _mm_cmpeq_epi8(ptst1,ptst2);
01274 _mm_maskmoveu_si128(ptrue,mask, (char*) rowptr_out);
01275
01276 mask = _mm_andnot_si128(mask,one);
01277 _mm_maskmoveu_si128(pfalse,mask, (char*) rowptr_out);
01278
01279
01280 rowptr_t1+=16;
01281 rowptr_out+=16;
01282 }
01283 #endif
01284 }
01285 void flgr1d_replace_I_EQ_C_C_C_fast_fgUINT8(FLGR_Data1D *datout,
01286 FLGR_Data1D *dattest1,fgUINT8 vtest2,
01287 fgUINT8 vtrue, fgUINT8 vfalse) {
01288 #ifdef __SSE2__
01289 int j;
01290 fgUINT8 *rowptr_out,*rowptr_t1;
01291
01292 __m128i ptst1, ptst2, ptrue, pfalse;
01293 __m128i mask,one;
01294
01295
01296
01297 one = _mm_set1_epi8((char) 0xFF);
01298
01299 ptst2 = _mm_set1_epi8((char) vtest2);
01300 ptrue = _mm_set1_epi8((char) vtrue);
01301 pfalse = _mm_set1_epi8((char) vfalse);
01302
01303 rowptr_out = (fgUINT8 *) datout->array;
01304 rowptr_t1 = (fgUINT8 *) dattest1->array;
01305
01306
01307 for(j=0 ; j<datout->length ; j+=16) {
01308 ptst1 = _mm_load_si128((__m128i*) rowptr_t1);
01309
01310 mask = _mm_cmpeq_epi8(ptst1,ptst2);
01311 _mm_maskmoveu_si128(ptrue,mask, (char*) rowptr_out);
01312
01313 mask = _mm_andnot_si128(mask,one);
01314 _mm_maskmoveu_si128(pfalse,mask, (char*) rowptr_out);
01315
01316
01317 rowptr_t1+=16;
01318 rowptr_out+=16;
01319 }
01320 #endif
01321 }
01322 void flgr1d_replace_I_NE_C_C_C_fast_fgUINT8(FLGR_Data1D *datout,
01323 FLGR_Data1D *dattest1,fgUINT8 vtest2,
01324 fgUINT8 vtrue, fgUINT8 vfalse) {
01325 #ifdef __SSE2__
01326 int j;
01327 fgUINT8 *rowptr_out,*rowptr_t1;
01328
01329 __m128i ptst1, ptst2, ptrue, pfalse;
01330 __m128i mask,one;
01331
01332
01333
01334 one = _mm_set1_epi8((char) 0xFF);
01335
01336 ptst2 = _mm_set1_epi8((char) vtest2);
01337 ptrue = _mm_set1_epi8((char) vtrue);
01338 pfalse = _mm_set1_epi8((char) vfalse);
01339
01340 rowptr_out = (fgUINT8 *) datout->array;
01341 rowptr_t1 = (fgUINT8 *) dattest1->array;
01342
01343
01344 for(j=0 ; j<datout->length ; j+=16) {
01345 ptst1 = _mm_load_si128((__m128i*) rowptr_t1);
01346
01347 mask = _mm_cmpeq_epi8(ptst1,ptst2);
01348 _mm_maskmoveu_si128(pfalse,mask, (char*) rowptr_out);
01349
01350 mask = _mm_andnot_si128(mask,one);
01351 _mm_maskmoveu_si128(ptrue,mask, (char*) rowptr_out);
01352
01353
01354 rowptr_t1+=16;
01355 rowptr_out+=16;
01356 }
01357 #endif
01358 }
01359 void flgr1d_replace_I_GE_C_C_C_fast_fgUINT8(FLGR_Data1D *datout,
01360 FLGR_Data1D *dattest1,fgUINT8 vtest2,
01361 fgUINT8 vtrue, fgUINT8 vfalse) {
01362 #ifdef __SSE2__
01363 int j;
01364 fgUINT8 *rowptr_out,*rowptr_t1;
01365
01366 __m128i ptst1, ptst2, ptrue, pfalse;
01367 __m128i mask,one;
01368
01369
01370
01371 one = _mm_set1_epi8((char) 0xFF);
01372
01373 ptst2 = _mm_set1_epi8((char) vtest2);
01374 ptrue = _mm_set1_epi8((char) vtrue);
01375 pfalse = _mm_set1_epi8((char) vfalse);
01376
01377 rowptr_out = (fgUINT8 *) datout->array;
01378 rowptr_t1 = (fgUINT8 *) dattest1->array;
01379
01380
01381 for(j=0 ; j<datout->length ; j+=16) {
01382 ptst1 = _mm_load_si128((__m128i*) rowptr_t1);
01383
01384 ptst1 = _mm_min_epu8(ptst1,ptst2);
01385 mask = _mm_cmpeq_epi8(ptst1,ptst2);
01386 _mm_maskmoveu_si128(ptrue,mask, (char*) rowptr_out);
01387
01388 mask = _mm_andnot_si128(mask,one);
01389 _mm_maskmoveu_si128(pfalse,mask, (char*) rowptr_out);
01390
01391
01392 rowptr_t1+=16;
01393 rowptr_out+=16;
01394 }
01395 #endif
01396 }
01397
01398
01399
01400
01401
01402
01403
01404
01405
01406
01407