image - CUDA Median filter not working properly -



image - CUDA Median filter not working properly -

i took upon myself larn cuda, , tried implement simple median filter image processing. came with, can't seem results images come out of it. instance, output image relatively noise free, saturation of image seems higher, , when tried image of teddy bear wikipedia, nose gets greenish reason. became frustrated think of new ideas, if can see problem in code, gratefull. thanks!

this kernel function:

__global__ void median_filter(int *input, int *output, int image_w, int image_h){ __shared__ float window[block_w*block_h][9]; int x, y, tid; int i, j, imin, temp; x = blockidx.x*blockdim.x + threadidx.x; y = blockidx.y*blockdim.y + threadidx.y; tid = threadidx.y*blockdim.y + threadidx.x; if(x>=image_w && y>=image_h) return; /* setting 3x3 window elements median */ if(y==0 && x==0) window[tid][0] = input[y*image_w+x]; else if(y==0 && x!=0) window[tid][0] = input[y*image_w+x-1]; else if(y!=0 && x==0) window[tid][0] = input[(y-1)*image_w+x]; else window[tid][0] = input[(y-1)*image_w+x-1]; window[tid][1] = (y==0)?input[y*image_w+x]:input[(y-1)*image_w+x]; if(y==0 && x==image_w-1) window[tid][2] = input[y*image_w+x]; else if(y!=0 && x==image_w-1) window[tid][2] = input[(y-1)*image_w+x]; else if(y==0 && x!=image_w-1) window[tid][2] = input[(y-1)*image_w+x+1]; else window[tid][2] = input[(y-1)*image_w+x+1]; window[tid][3] = (x==0)?input[y*image_w+x]:input[y*image_w+x-1]; window[tid][4] = input[y*image_w+x]; window[tid][5] = (x==image_w-1)?input[y*image_w+x]:input[y*image_w+x+1]; if(y==image_h-1 && x==0) window[tid][6] = input[y*image_w+x]; else if(y!=image_h-1 && x==0) window[tid][6] = input[(y+1)*image_w+x]; else if(y==image_h-1 && x!=0) window[tid][6] = input[y*image_w+x-1]; else window[tid][6] = input[(y+1)*image_w+x-1]; window[tid][7] = (y==image_h-1)?input[y*image_w+x]:input[(y+1)*image_w+x]; if(y==image_h-1 && x==image_w-1) window[tid][8] = input[y*image_w+x]; else if(y!=image_h-1 && x==image_w-1) window[tid][8] = input[(y+1)*image_w+x]; else if(y==image_h-1 && x!=image_w-1) window[tid][8] = input[y*image_w+x+1]; else window[tid][8] = input[(y+1)*image_w+x+1]; __syncthreads(); /* sorting window find median */ for(j=0; j<8; j++){ imin = j; for(i=j+1; i<9; i++){ if(window[tid][i] < window[tid][imin]){ imin = i; } } if(imin != j){ temp = window[tid][imin]; window[tid][imin] = window[tid][j]; window[tid][j] = temp; } __syncthreads(); } output[y*image_w + x] = window[tid][4]; }

and main function:

int main(){ /*loading picture*/ char picture[50] = "before.bmp"; file *image = fopen(picture, "rb"); if(image == null) { printf("load image error!\n"); system("pause"); exit(1); } bitmapfileheader bmpfheader; bitmapinfoheader bmpiheader; fread(&bmpfheader, sizeof(bitmapfileheader), 1, image); fread(&bmpiheader, sizeof(bitmapinfoheader), 1, image); int imgwidth = bmpiheader.biwidth; int imgheight = bmpiheader.biheight; int img_size = imgwidth * imgheight * sizeof(int); int * imgeredchannel_x = (int *)malloc(img_size); int * imgegreenchannel_x = (int *)malloc(img_size); int * imgebluechannel_x = (int *)malloc(img_size); int * deviceinputred; int * deviceinputgreen; int * deviceinputblue; int * deviceoutputrd; int * deviceoutputgreen; int * deviceoutputblue; for(int = imgheight-1; i>=0; i--) { for(int j = 0; j<imgwidth; j++) { fread(&(imgegreenchannel_x[i * (imgwidth) + j]), sizeof(unsigned char), 1, image); fread(&(imgebluechannel_x[i * (imgwidth) + j]), sizeof(unsigned char), 1, image); fread(&(imgeredchannel_x[i * (imgwidth) + j]), sizeof(unsigned char), 1, image); } } cudamalloc((void **) &deviceinputred, sizeof(int) * imgheight * imgwidth); cudamalloc((void **) &deviceinputblue, sizeof(int) * imgheight * imgwidth); cudamalloc((void **) &deviceinputgreen, sizeof(int) * imgheight * imgwidth); cudamalloc((void **) &deviceoutputrd, sizeof(int) * imgheight * imgwidth); cudamalloc((void **) &deviceoutputblue, sizeof(int) * imgheight * imgwidth); cudamalloc((void **) &deviceoutputgreen, sizeof(int) * imgheight * imgwidth); int dima = imgwidth*imgheight; int numthreadsperblock = 256; int numblocks = dima / numthreadsperblock; int sharedmemsize = numthreadsperblock*sizeof(int); dim3 dimgrid(numblocks); dim3 dimblock(numthreadsperblock); cudamemcpy(deviceinputred,imgeredchannel_x,sizeof(int) * imgheight * imgwidth,cudamemcpyhosttodevice); checkcudaerror("memcpy h-d r"); cudamemcpy(deviceinputgreen,imgegreenchannel_x,sizeof(int) * imgheight * imgwidth,cudamemcpyhosttodevice); checkcudaerror("memcpy h-d g"); cudamemcpy(deviceinputblue,imgebluechannel_x,sizeof(int) * imgheight * imgwidth,cudamemcpyhosttodevice); checkcudaerror("memcpy h-d b"); median_filter<<< dimgrid , dimblock, sharedmemsize>>> (deviceinputred, deviceoutputrd, imgheight, imgwidth); checkcudaerror("kernel invocation r"); median_filter<<< dimgrid , dimblock, sharedmemsize>>> (deviceinputgreen, deviceoutputgreen, imgheight, imgwidth); checkcudaerror("kernel invocation g"); median_filter<<< dimgrid , dimblock, sharedmemsize>>> (deviceinputblue, deviceoutputblue, imgheight, imgwidth); checkcudaerror("kernel invocation b"); cudamemcpy(imgeredchannel_x, deviceoutputrd, imgheight * imgwidth * sizeof(int), cudamemcpydevicetohost); checkcudaerror("memcpy d-h r"); cudamemcpy(imgegreenchannel_x, deviceoutputgreen, imgheight * imgwidth * sizeof(int), cudamemcpydevicetohost); checkcudaerror("memcpy d-h g"); cudamemcpy(imgebluechannel_x, deviceoutputblue, imgheight * imgwidth * sizeof(int), cudamemcpydevicetohost); checkcudaerror("memcpy d-h b"); cudafree(deviceinputred); cudafree(deviceoutputrd); cudafree(deviceinputgreen); cudafree(deviceoutputgreen); cudafree(deviceinputblue); cudafree(deviceoutputblue); /*saving new picture*/ fclose(image); char title[50]="after"; strcat(title, ".bmp"); remove(title); image = fopen(title,"wb"); fwrite(&bmpfheader, sizeof(bitmapfileheader), 1, image); fwrite(&bmpiheader, sizeof(bitmapinfoheader), 1, image); for(int = imgheight-1; i>=0; i--) { for(int j = 0; j<imgwidth; j++) { int b = imgebluechannel_x[i * (imgwidth) + j]; int g = imgegreenchannel_x[i * (imgwidth) + j]; int r = imgeredchannel_x[i * (imgwidth) + j]; if(b>255)b=255; if(g>255)g=255; if(r>255)r=255; fwrite(&g, sizeof(unsigned char), 1, image); fwrite(&b, sizeof(unsigned char), 1, image); fwrite(&r, sizeof(unsigned char), 1, image); } } printf("success!\n"); fclose(image); system("pause"); homecoming 0; }

the nose getting greenish means have overflows in code, that's unusual because median filter should never generate overflows. certainly have messed code there, kernel doesn't create much sense lots of work you're doing.

in non linear filters suggest seek implementing min or max filters first see if work. here's working code max filter cuvi library cuda. median kernel should no different this:

__global__ void median_8u_c3( unsigned char* out, unsigned int width, unsigned int widthstep, unsigned int height){ int xindex = blockidx.x*block_size + threadidx.x; int yindex = blockidx.y*block_size + threadidx.y; int tid = yindex * widthstep + (3*xindex); if(xindex>=width|| yindex>=height) return; int limitx = anchorx + fheight - 1; int limity = anchory + fwidth - 1; unsigned char max_r = 0 , max_g = 0, max_b = 0; // instead of max filter code in loops below, can have median code for(cuvi32s i=anchorx ; i<= limitx; i++) for(cuvi32s j=anchory ; j<= limity; j++) { max_r = (tex2d(tex8,3*(xindex + i) , yindex + j) > max_r) ? tex2d(tex8,3*(xindex + i) , yindex + j) : max_r; max_g = (tex2d(tex8,3*(xindex + i)+1, yindex + j) > max_g) ? tex2d(tex8,3*(xindex + i)+1, yindex + j) : max_g; max_b = (tex2d(tex8,3*(xindex + i)+2, yindex + j) > max_b) ? tex2d(tex8,3*(xindex + i)+2, yindex + j) : max_b; } out[tid] = max_r; out[tid + 1] = max_g; out[tid + 2] = max_b; }

note: i'm using input textures.

image filter cuda median

Comments

Popular posts from this blog

javascript - mongodb won't find my schema method in nested container -

Hibernate criteria by a list of natural ids -

ios - Lagging ScrollView with UIWebview inside -