00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #include <stdio.h>
00023 #include <stdlib.h>
00024 #include <string.h>
00025 #include <flgrCoreMalloc.h>
00026 #include <flgrCoreDispatch.h>
00027
00028 #ifdef __SSE2__
00029 #include <emmintrin.h>
00030 #else
00031 #if defined(__SSE__) && defined(__MMX__)
00032 #include <mmintrin.h>
00033 #include <xmmintrin.h>
00034 #endif
00035 #endif
00036
00037 #include <flgrCoreTranspose.h>
00038 #include "flgrMorphoBaseSegmentFast.h"
00039
00040
00041 void flgr2d_erode_segment_vertical_fast_fgUINT8(FLGR_Data2D *imgdest, FLGR_Data2D *imgsrc, int sesize_y) {
00042 #ifdef __SSE2__
00043 int i,j,k,m=0,n=0,p=0;
00044 int se2=sesize_y/2;
00045 int kpad;
00046 int rowsize_y=imgdest->size_y;
00047 int finalPass=rowsize_y%sesize_y;
00048 int nbbloc = rowsize_y/sesize_y;
00049 fgUINT8 **rowin;
00050 fgUINT8 **rowout;
00051 __m128i *bufg;
00052 __m128i *bufh;
00053 __m128i tmpg,tmph;
00054
00055
00056
00057 bufg = (__m128i *) flgr_malloc_align(sizeof(__m128i)*(rowsize_y+16),16);
00058 bufh = (__m128i *) flgr_malloc_align(sizeof(__m128i)*(rowsize_y+16),16);
00059
00060 rowin = (fgUINT8**) malloc(sizeof(fgUINT8*)*(imgsrc->size_y+16));
00061 rowout = (fgUINT8**) malloc(sizeof(fgUINT8*)*(rowsize_y+16));
00062
00063 for(i=0 ; i<(rowsize_y+16) ; i++) {
00064 rowin[i] = (fgUINT8*) imgsrc->array[i];
00065 rowout[i] = (fgUINT8*) imgdest->array[i];
00066 }
00067
00068
00069 for(j=0 ; j<imgdest->size_x ; j+=16) {
00070
00071 m=0;
00072 n=nbbloc*sesize_y-1;
00073
00074 for(k=0;k<nbbloc;k++) {
00075
00076 tmpg = bufg[m]=*((__m128i*) (rowin[m]+j));
00077 tmph = bufh[n]=*((__m128i*) (rowin[n]+j));
00078 m++;n--;
00079
00080 for(i=1;i<sesize_y;i++,m++,n--) {
00081 bufg[m] = tmpg = _mm_min_epu8(*((__m128i*) (rowin[m]+j)),tmpg);
00082 bufh[n] = tmph = _mm_min_epu8(*((__m128i*) (rowin[n]+j)),tmph);
00083 }
00084
00085 }
00086
00087 n=rowsize_y-1;
00088 tmpg = bufg[m] = *((__m128i*) (rowin[m]+j));
00089 tmph = bufh[n] = *((__m128i*) (rowin[n]+j));
00090 m++;n--;
00091
00092 for(k=1 ; k<finalPass ; k++,m++,n--) {
00093 bufg[m] = tmpg = _mm_min_epu8(*((__m128i*) (rowin[m]+j)),tmpg);
00094 bufh[n] = tmph = _mm_min_epu8(*((__m128i*) (rowin[n]+j)),tmph);
00095 }
00096
00097 n=se2;p=0;m=0;
00098 for( i=0; i<se2 ; i++) {
00099 *((__m128i*) (rowout[m++]+j)) = bufg[n++];
00100 }
00101 for( i=se2; i<rowsize_y-se2 ;i++) {
00102 *((__m128i*) (rowout[m]+j)) = _mm_min_epu8(bufg[n], bufh[p]);
00103 n++;p++;m++;
00104 }
00105 n--;
00106 kpad = finalPass;
00107 kpad = (kpad==0) ? 0 : sesize_y-kpad;
00108 for( i=0 ; (i<kpad) && (m<rowsize_y) ; i++) {
00109 *((__m128i*) (rowout[m]+j)) = _mm_min_epu8(bufg[n], bufh[p]);
00110 m++;p++;
00111 }
00112 for( i=m ; i<rowsize_y ; i++) *((__m128i*) (rowout[i]+j)) = bufh[p++];
00113
00114 }
00115
00116 flgr_free_align(bufg);
00117 flgr_free_align(bufh);
00118 free(rowin);
00119 free(rowout);
00120
00121 return;
00122
00123
00124 #else
00125 #if defined(__SSE__) && defined(__MMX__)
00126 int i,j,k,m=0,n=0,p=0;
00127 int kpad;
00128 int se2=sesize_y/2;
00129 int rowsize_y=imgdest->size_y;
00130 int finalPass=rowsize_y%sesize_y;
00131 int nbbloc = rowsize_y/sesize_y;
00132 fgUINT8 **rowin;
00133 fgUINT8 **rowout;
00134 __m64 *bufg;
00135 __m64 *bufh;
00136 __m64 tmpg,tmph;
00137
00138
00139
00140 bufg = (__m64 *) flgr_malloc_align(sizeof(__m64)*(rowsize_y+16),16);
00141 bufh = (__m64 *) flgr_malloc_align(sizeof(__m64)*(rowsize_y+16),16);
00142
00143 rowin = (fgUINT8**) malloc(sizeof(fgUINT8*)*(imgsrc->size_y+16));
00144 rowout = (fgUINT8**) malloc(sizeof(fgUINT8*)*(rowsize_y+16));
00145
00146 for(i=0 ; i<(rowsize_y+16) ; i++) {
00147 rowin[i] = (fgUINT8*) imgsrc->array[i];
00148 rowout[i] = (fgUINT8*) imgdest->array[i];
00149 }
00150
00151 _mm_empty();
00152
00153
00154 for(j=0 ; j<imgdest->size_x ; j+=8) {
00155
00156
00157 m=0;
00158 n=nbbloc*sesize_y-1;
00159
00160 for(k=0;k<nbbloc;k++) {
00161
00162 tmpg = bufg[m]=*((__m64*) (rowin[m]+j));
00163 tmph = bufh[n]=*((__m64*) (rowin[n]+j));
00164 m++;n--;
00165
00166 for(i=1;i<sesize_y;i++,m++,n--) {
00167 bufg[m] = tmpg = _mm_min_pu8(*((__m64*) (rowin[m]+j)),tmpg);
00168 bufh[n] = tmph = _mm_min_pu8(*((__m64*) (rowin[n]+j)),tmph);
00169 }
00170
00171 }
00172
00173 n=rowsize_y-1;
00174 tmpg = bufg[m] = *((__m64*) (rowin[m]+j));
00175 tmph = bufh[n] = *((__m64*) (rowin[n]+j));
00176 m++;n--;
00177
00178 for(k=1 ; k<finalPass ; k++,m++,n--) {
00179 bufg[m] = tmpg = _mm_min_pu8(*((__m64*) (rowin[m]+j)),tmpg);
00180 bufh[n] = tmph = _mm_min_pu8(*((__m64*) (rowin[n]+j)),tmph);
00181 }
00182
00183 n=se2;p=0;m=0;
00184 for( i=0; i<se2 ; i++) {
00185 *((__m64*) (rowout[m++]+j)) = bufg[n++];
00186 }
00187 for( i=se2; i<rowsize_y-se2 ;i++) {
00188 *((__m64*) (rowout[m]+j)) = _mm_min_pu8(bufg[n], bufh[p]);
00189 n++;p++;m++;
00190 }
00191 n--;
00192 kpad = finalPass;
00193 kpad = (kpad==0) ? 0 : sesize_y-kpad;
00194 for( i=0 ; (i<kpad) && (m<rowsize_y) ; i++) {
00195 *((__m64*) (rowout[m]+j)) = _mm_min_pu8(bufg[n], bufh[p]);
00196 m++;p++;
00197 }
00198 for( i=m ; i<rowsize_y ; i++) *((__m64*) (rowout[i]+j)) = bufh[p++];
00199
00200 }
00201
00202 flgr_free_align(bufg);
00203 flgr_free_align(bufh);
00204 free(rowin);
00205 free(rowout);
00206
00207 _mm_empty();
00208 return;
00209
00210 #else
00211 POST_ERROR("MMX/SSE/SSE2 Instruction not supported!\n");
00212 #endif
00213 #endif
00214 }
00215
00216
00217
00218 void flgr2d_dilate_segment_vertical_fast_fgUINT8(FLGR_Data2D *imgdest, FLGR_Data2D *imgsrc, int sesize_y) {
00219 #ifdef __SSE2__
00220 int i,j,k,m=0,n=0,p=0;
00221 int se2=sesize_y/2;
00222 int kpad;
00223 int rowsize_y=imgdest->size_y;
00224 int finalPass=rowsize_y%sesize_y;
00225 int nbbloc = rowsize_y/sesize_y;
00226 fgUINT8 **rowin;
00227 fgUINT8 **rowout;
00228 __m128i *bufg;
00229 __m128i *bufh;
00230 __m128i tmpg,tmph;
00231
00232
00233
00234 bufg = (__m128i *) flgr_malloc_align(sizeof(__m128i)*(rowsize_y+16),16);
00235 bufh = (__m128i *) flgr_malloc_align(sizeof(__m128i)*(rowsize_y+16),16);
00236
00237 rowin = (fgUINT8**) malloc(sizeof(fgUINT8*)*(imgsrc->size_y+16));
00238 rowout = (fgUINT8**) malloc(sizeof(fgUINT8*)*(rowsize_y+16));
00239
00240 for(i=0 ; i<(rowsize_y+16) ; i++) {
00241 rowin[i] = (fgUINT8*) imgsrc->array[i];
00242 rowout[i] = (fgUINT8*) imgdest->array[i];
00243 }
00244
00245
00246
00247 for(j=0 ; j<imgdest->size_x ; j+=16) {
00248
00249 m=0;
00250 n=nbbloc*sesize_y-1;
00251
00252 for(k=0;k<nbbloc;k++) {
00253
00254 tmpg = bufg[m]=*((__m128i*) (rowin[m]+j));
00255 tmph = bufh[n]=*((__m128i*) (rowin[n]+j));
00256 m++;n--;
00257
00258 for(i=1;i<sesize_y;i++,m++,n--) {
00259 bufg[m] = tmpg = _mm_max_epu8(*((__m128i*) (rowin[m]+j)),tmpg);
00260 bufh[n] = tmph = _mm_max_epu8(*((__m128i*) (rowin[n]+j)),tmph);
00261 }
00262
00263 }
00264
00265 n=rowsize_y-1;
00266 tmpg = bufg[m] = *((__m128i*) (rowin[m]+j));
00267 tmph = bufh[n] = *((__m128i*) (rowin[n]+j));
00268 m++;n--;
00269
00270 for(k=1 ; k<finalPass ; k++,m++,n--) {
00271 bufg[m] = tmpg = _mm_max_epu8(*((__m128i*) (rowin[m]+j)),tmpg);
00272 bufh[n] = tmph = _mm_max_epu8(*((__m128i*) (rowin[n]+j)),tmph);
00273 }
00274
00275 n=se2;p=0;m=0;
00276 for( i=0; i<se2 ; i++) {
00277 *((__m128i*) (rowout[m++]+j)) = bufg[n++];
00278 }
00279 for( i=se2; i<rowsize_y-se2 ;i++) {
00280 *((__m128i*) (rowout[m]+j)) = _mm_max_epu8(bufg[n], bufh[p]);
00281 n++;p++;m++;
00282 }
00283 n--;
00284 kpad = finalPass;
00285 kpad = (kpad==0) ? 0 : sesize_y-kpad;
00286 for( i=0 ; (i<kpad) && (m<rowsize_y) ; i++) {
00287 *((__m128i*) (rowout[m]+j)) = _mm_max_epu8(bufg[n], bufh[p]);
00288 m++;p++;
00289 }
00290 for( i=m ; i<rowsize_y ; i++) *((__m128i*) (rowout[i]+j)) = bufh[p++];
00291
00292 }
00293
00294 flgr_free_align(bufg);
00295 flgr_free_align(bufh);
00296 free(rowin);
00297 free(rowout);
00298
00299 return;
00300
00301 #else
00302 #if defined(__SSE__) && defined(__MMX__)
00303 int i,j,k,m=0,n=0,p=0;
00304 int kpad;
00305 int se2=sesize_y/2;
00306 int rowsize_y=imgdest->size_y;
00307 int finalPass=rowsize_y%sesize_y;
00308 int nbbloc = rowsize_y/sesize_y;
00309 fgUINT8 **rowin;
00310 fgUINT8 **rowout;
00311 __m64 *bufg;
00312 __m64 *bufh;
00313 __m64 tmpg,tmph;
00314
00315
00316
00317 bufg = (__m64 *) flgr_malloc_align(sizeof(__m64)*(rowsize_y+16),16);
00318 bufh = (__m64 *) flgr_malloc_align(sizeof(__m64)*(rowsize_y+16),16);
00319
00320 rowin = (fgUINT8**) malloc(sizeof(fgUINT8*)*(imgsrc->size_y+16));
00321 rowout = (fgUINT8**) malloc(sizeof(fgUINT8*)*(rowsize_y+16));
00322
00323 for(i=0 ; i<(rowsize_y+16) ; i++) {
00324 rowin[i] = (fgUINT8*) imgsrc->array[i];
00325 rowout[i] = (fgUINT8*) imgdest->array[i];
00326 }
00327
00328 _mm_empty();
00329
00330
00331 for(j=0 ; j<imgdest->size_x ; j+=8) {
00332
00333
00334 m=0;
00335 n=nbbloc*sesize_y-1;
00336
00337 for(k=0;k<nbbloc;k++) {
00338
00339 tmpg = bufg[m]=*((__m64*) (rowin[m]+j));
00340 tmph = bufh[n]=*((__m64*) (rowin[n]+j));
00341 m++;n--;
00342
00343 for(i=1;i<sesize_y;i++,m++,n--) {
00344 bufg[m] = tmpg = _mm_max_pu8(*((__m64*) (rowin[m]+j)),tmpg);
00345 bufh[n] = tmph = _mm_max_pu8(*((__m64*) (rowin[n]+j)),tmph);
00346 }
00347
00348 }
00349
00350 n=rowsize_y-1;
00351 tmpg = bufg[m] = *((__m64*) (rowin[m]+j));
00352 tmph = bufh[n] = *((__m64*) (rowin[n]+j));
00353 m++;n--;
00354
00355 for(k=1 ; k<finalPass ; k++,m++,n--) {
00356 bufg[m] = tmpg = _mm_max_pu8(*((__m64*) (rowin[m]+j)),tmpg);
00357 bufh[n] = tmph = _mm_max_pu8(*((__m64*) (rowin[n]+j)),tmph);
00358 }
00359
00360 n=se2;p=0;m=0;
00361 for( i=0; i<se2 ; i++) {
00362 *((__m64*) (rowout[m++]+j)) = bufg[n++];
00363 }
00364 for( i=se2; i<rowsize_y-se2 ;i++) {
00365 *((__m64*) (rowout[m]+j)) = _mm_max_pu8(bufg[n], bufh[p]);
00366 n++;p++;m++;
00367 }
00368 n--;
00369 kpad = finalPass;
00370 kpad = (kpad==0) ? 0 : sesize_y-kpad;
00371 for( i=0 ; (i<kpad) && (m<rowsize_y) ; i++) {
00372 *((__m64*) (rowout[m]+j)) = _mm_max_pu8(bufg[n], bufh[p]);
00373 m++;p++;
00374 }
00375 for( i=m ; i<rowsize_y ; i++) *((__m64*) (rowout[i]+j)) = bufh[p++];
00376
00377 }
00378
00379 flgr_free_align(bufg);
00380 flgr_free_align(bufh);
00381 free(rowin);
00382 free(rowout);
00383
00384 _mm_empty();
00385 return ;
00386
00387 #else
00388 POST_ERROR("MMX/SSE/SSE2 Instruction not supported!\n");
00389 #endif
00390 #endif
00391 }
00392
00393
00394
00395 void flgr2d_erode_segment_horizontal_fast_fgUINT8(FLGR_Data2D *imgdest, FLGR_Data2D *imgsrc, int sesize_x) {
00396 FLGR_Data2D *tmpdest,*tmpsrc;
00397
00398
00399
00400 tmpdest = flgr2d_create_pixmap(imgdest->size_x,imgdest->size_y,imgsrc->spp,imgdest->type);
00401 tmpsrc = flgr2d_create_pixmap(imgdest->size_x,imgdest->size_y,imgsrc->spp,imgdest->type);
00402
00403 flgr2d_transpose_fgUINT8(tmpsrc,imgsrc);
00404 flgr2d_erode_segment_vertical_fast_fgUINT8(tmpdest,tmpsrc,sesize_x);
00405 flgr2d_transpose_fgUINT8(imgdest,tmpdest);
00406
00407 flgr2d_destroy(tmpdest);
00408 flgr2d_destroy(tmpsrc);
00409
00410 return ;
00411 }
00412
00413
00414 void flgr2d_dilate_segment_horizontal_fast_fgUINT8(FLGR_Data2D *imgdest, FLGR_Data2D *imgsrc, int sesize_x) {
00415 FLGR_Data2D *tmpdest,*tmpsrc;
00416
00417
00418
00419 tmpdest = flgr2d_create_pixmap(imgdest->size_x,imgdest->size_y,imgsrc->spp,imgdest->type);
00420 tmpsrc = flgr2d_create_pixmap(imgdest->size_x,imgdest->size_y,imgsrc->spp,imgdest->type);
00421
00422 flgr2d_transpose_fgUINT8(tmpsrc,imgsrc);
00423 flgr2d_dilate_segment_vertical_fast_fgUINT8(tmpdest,tmpsrc,sesize_x);
00424 flgr2d_transpose_fgUINT8(imgdest,tmpdest);
00425
00426 flgr2d_destroy(tmpdest);
00427 flgr2d_destroy(tmpsrc);
00428
00429 return ;
00430
00431 }