Calculate Skintone using JCuda is not giving the right percentage











up vote
-1
down vote

favorite












Im calculting skintone of an image in java.




  1. convert the pixel of Image in yCbCR.

  2. check if image pixel is in specific range, then its a skin color.

  3. calculate percentage by dividing it by total pixel.


Its working fine in CPU code, but when i convert it to GPU code, The pixel percentage is not coming right.



The confusing part for me was send the pixel data to GPU and get its r, g, b value in GPU.



So i follow JCuda Pixel Invert Example example to send pixel data. The difference is the example send pixel data in int array and I'm sending it in byte array.



Here the code.



import static jcuda.driver.JCudaDriver.cuCtxCreate;
import static jcuda.driver.JCudaDriver.cuCtxSynchronize;
import static jcuda.driver.JCudaDriver.cuDeviceGet;
import static jcuda.driver.JCudaDriver.cuInit;
import static jcuda.driver.JCudaDriver.cuLaunchKernel;
import static jcuda.driver.JCudaDriver.cuMemAlloc;
import static jcuda.driver.JCudaDriver.cuMemFree;
import static jcuda.driver.JCudaDriver.cuMemcpyDtoH;
import static jcuda.driver.JCudaDriver.cuMemcpyHtoD;

import java.awt.image.BufferedImage;
import java.awt.image.DataBuffer;
import java.awt.image.DataBufferByte;
import java.awt.image.Raster;
import java.io.File;
import java.io.IOException;

import javax.imageio.ImageIO;

import ij.IJ;
import jcuda.Pointer;
import jcuda.Sizeof;
import jcuda.driver.CUcontext;
import jcuda.driver.CUdevice;
import jcuda.driver.CUdeviceptr;
import jcuda.driver.CUfunction;
import jcuda.driver.JCudaDriver;
import jcuda.nvrtc.JNvrtc;

public class SkinTone {

public static void CalculateSKintoneGPU(File file) throws IOException {
BufferedImage bufferedImage = ImageIO.read(file);
if (bufferedImage == null || bufferedImage.getData() == null)
return;
Raster raster = bufferedImage.getData();

DataBuffer dataBuffer = raster.getDataBuffer();
DataBufferByte dataBufferInt = (DataBufferByte)dataBuffer;
byte pixels = dataBufferInt.getData();

int totalPixels = raster.getHeight() * raster.getWidth();

CUfunction kernelFunction = initlize();

int output = execute(kernelFunction, pixels, raster.getWidth(), raster.getHeight());
// Flushing memory
raster = null;
bufferedImage.flush();
bufferedImage = null;

long skintoneThreshold = Math.round(output[0] / (double) totalPixels * 100.0);

System.err.println("Skintone Using GPU: " + output[0]);
System.err.println("Total Pixel Of GPU: " + totalPixels);
System.err.println("SKinTone Percentage Using GPU: " + skintoneThreshold + "%");
}

static int execute(CUfunction kernelFunction, byte pixels, int w, int h) {
// Allocate memory on the device, and copy the host data to the device
int size = w * h * Sizeof.BYTE;
CUdeviceptr pointer = new CUdeviceptr();
cuMemAlloc(pointer, size);
cuMemcpyHtoD(pointer, Pointer.to(pixels), size);

int numElements = 9;
int s = 0;
// Allocate device output memory
CUdeviceptr deviceOutput = new CUdeviceptr();
cuMemAlloc(deviceOutput, numElements * Sizeof.INT);

// Set up the kernel parameters: A pointer to an array
// of pointers which point to the actual values.
Pointer kernelParameters = Pointer.to(Pointer.to(pointer), Pointer.to(new int { w }),
Pointer.to(new int { h }), Pointer.to(deviceOutput));

// Call the kernel function
int blockSize = 16;
int gridSize = (Math.max(w, h) + blockSize - 1) / blockSize;
cuLaunchKernel(kernelFunction, gridSize, gridSize, 1, // Grid dimension
blockSize, blockSize, 1, // Block dimension
0, null, // Shared memory size and stream
kernelParameters, null // Kernel- and extra parameters
);
cuCtxSynchronize();

// Allocate host output memory and copy the device output
// to the host.
int hostOutput = new int[numElements];
cuMemcpyDtoH(Pointer.to(hostOutput), deviceOutput, numElements * Sizeof.INT);

// Clean up.
cuMemFree(deviceOutput);
cuMemFree(pointer);

return hostOutput;
}

public static CUfunction initlize() {

// Enable exceptions and omit all subsequent error checks
JCudaDriver.setExceptionsEnabled(true);
JNvrtc.setExceptionsEnabled(true);

// Initialize the driver and create a context for the first device.
cuInit(0);
CUdevice device = new CUdevice();
cuDeviceGet(device, 0);
CUcontext context = new CUcontext();
cuCtxCreate(context, 0, device);

// Obtain the CUDA source code from the CUDA file
String cuFileName = "Skintone.cu";
String sourceCode = CudaUtils.readResourceAsString(cuFileName);
if (sourceCode == null) {
IJ.showMessage("Error", "Could not read the kernel source code");
}

// Create the kernel function
return CudaUtils.createFunction(sourceCode, "skintone");
}

public static void CalculateSKintoneCPU(File file) throws IOException {
BufferedImage bufferedImage = ImageIO.read(file);
if (bufferedImage == null || bufferedImage.getData() == null)
return;
Raster raster = bufferedImage.getData();
float rgb = new float[4];
int totalPixels = raster.getHeight() * raster.getWidth();

int skinTonePixels = 0;

for (int x = 0; x < raster.getWidth(); x++) {
for (int y = 0; y < raster.getHeight(); y++) {
raster.getPixel(x, y, rgb);
if (skintone(rgb)) {
skinTonePixels++;
}
}
}

// Flushing memory
raster = null;
rgb = null;
bufferedImage.flush();
bufferedImage = null;

long skintoneThreshold = Math.round(skinTonePixels / (double) totalPixels * 100.0);

System.err.println("Skintone Using CPU: " + skinTonePixels);
System.err.println("Total Pixel Of CPU: " + totalPixels);
System.err.println("SKinTone Percentage Using CPU: " + skintoneThreshold + "%");
}

private static boolean skintone(float rgb) {
float yCbCr = (float) convertRGBtoYUV(rgb);
if ((yCbCr[1] >= 80 && yCbCr[1] <= 120) && (yCbCr[2] >= 133 && yCbCr[2] <= 173)) {
return true;
}
return false;
}

private static float convertRGBtoYUV(float rgb) {
final float yCbCr = new float[3];
float r = rgb[0];
float g = rgb[1];
float b = rgb[2];

yCbCr[0] = 16 + (0.299f * r) + (0.587f * g) + (0.144f * b);
yCbCr[1] = 128 + (-0.169f * r) - (0.331f * g) + (0.5f * b);
yCbCr[2] = 128 + (0.5f * r) - (0.419f * g) - (0.081f * b);

return yCbCr;
}

public static void main(String args) throws IOException {
File file = new File("C:\Users\Aqeel\git\jcuda-imagej-example\src\test\resources\lena512color.png");
CalculateSKintoneCPU(file);
CalculateSKintoneGPU(file);
}


}



Kernal File



    extern "C"
__global__ void skintone(uchar4* data, int w, int h, int* output)
{
int x = threadIdx.x+blockIdx.x*blockDim.x;
int y = threadIdx.y+blockIdx.y*blockDim.y;

if (x < w && y < h)
{
float r, g, b;
float cb, cr;

int index = y*w+x;
uchar4 pixel = data[index];

r = pixel.x;
g = pixel.y;
b = pixel.z;

cb = 128 + (-0.169f * r) - (0.331f * g) + (0.5f * b);
cr = 128 + (0.5f * r) - (0.419f * g) - (0.081f * b);


if((cb >= 80 && cb <= 120) && (cr >= 133 && cr <= 173)) {
atomicAdd(&output[0], 1);
}
}
}


Complete Example src, Machine Need Nvida Card, Cuda Toolkit V9 and Graphics Drivers










share|improve this question




















  • 1




    What's your sample? What did you expect to come out? What did come out? What debugging did you do - where in the code do you think that the problem is? Looks like the problem could be in the C code, better to tag it with C than with Java then.
    – Erwin Bolwidt
    Nov 21 at 21:25










  • I add the code with the question. Thanks for suggesting to tag with the C code.
    – Aqeel Haider
    Nov 21 at 21:26










  • My output with a sample image is: Skintone Using CPU: 75417 Total Pixel Of CPU: 262144 SKinTone Percentage Using CPU: 29% Skintone Using GPU: 29269 Total Pixel Of GPU: 262144 SKinTone Percentage Using GPU: 11% They should be same.
    – Aqeel Haider
    Nov 21 at 21:28















up vote
-1
down vote

favorite












Im calculting skintone of an image in java.




  1. convert the pixel of Image in yCbCR.

  2. check if image pixel is in specific range, then its a skin color.

  3. calculate percentage by dividing it by total pixel.


Its working fine in CPU code, but when i convert it to GPU code, The pixel percentage is not coming right.



The confusing part for me was send the pixel data to GPU and get its r, g, b value in GPU.



So i follow JCuda Pixel Invert Example example to send pixel data. The difference is the example send pixel data in int array and I'm sending it in byte array.



Here the code.



import static jcuda.driver.JCudaDriver.cuCtxCreate;
import static jcuda.driver.JCudaDriver.cuCtxSynchronize;
import static jcuda.driver.JCudaDriver.cuDeviceGet;
import static jcuda.driver.JCudaDriver.cuInit;
import static jcuda.driver.JCudaDriver.cuLaunchKernel;
import static jcuda.driver.JCudaDriver.cuMemAlloc;
import static jcuda.driver.JCudaDriver.cuMemFree;
import static jcuda.driver.JCudaDriver.cuMemcpyDtoH;
import static jcuda.driver.JCudaDriver.cuMemcpyHtoD;

import java.awt.image.BufferedImage;
import java.awt.image.DataBuffer;
import java.awt.image.DataBufferByte;
import java.awt.image.Raster;
import java.io.File;
import java.io.IOException;

import javax.imageio.ImageIO;

import ij.IJ;
import jcuda.Pointer;
import jcuda.Sizeof;
import jcuda.driver.CUcontext;
import jcuda.driver.CUdevice;
import jcuda.driver.CUdeviceptr;
import jcuda.driver.CUfunction;
import jcuda.driver.JCudaDriver;
import jcuda.nvrtc.JNvrtc;

public class SkinTone {

public static void CalculateSKintoneGPU(File file) throws IOException {
BufferedImage bufferedImage = ImageIO.read(file);
if (bufferedImage == null || bufferedImage.getData() == null)
return;
Raster raster = bufferedImage.getData();

DataBuffer dataBuffer = raster.getDataBuffer();
DataBufferByte dataBufferInt = (DataBufferByte)dataBuffer;
byte pixels = dataBufferInt.getData();

int totalPixels = raster.getHeight() * raster.getWidth();

CUfunction kernelFunction = initlize();

int output = execute(kernelFunction, pixels, raster.getWidth(), raster.getHeight());
// Flushing memory
raster = null;
bufferedImage.flush();
bufferedImage = null;

long skintoneThreshold = Math.round(output[0] / (double) totalPixels * 100.0);

System.err.println("Skintone Using GPU: " + output[0]);
System.err.println("Total Pixel Of GPU: " + totalPixels);
System.err.println("SKinTone Percentage Using GPU: " + skintoneThreshold + "%");
}

static int execute(CUfunction kernelFunction, byte pixels, int w, int h) {
// Allocate memory on the device, and copy the host data to the device
int size = w * h * Sizeof.BYTE;
CUdeviceptr pointer = new CUdeviceptr();
cuMemAlloc(pointer, size);
cuMemcpyHtoD(pointer, Pointer.to(pixels), size);

int numElements = 9;
int s = 0;
// Allocate device output memory
CUdeviceptr deviceOutput = new CUdeviceptr();
cuMemAlloc(deviceOutput, numElements * Sizeof.INT);

// Set up the kernel parameters: A pointer to an array
// of pointers which point to the actual values.
Pointer kernelParameters = Pointer.to(Pointer.to(pointer), Pointer.to(new int { w }),
Pointer.to(new int { h }), Pointer.to(deviceOutput));

// Call the kernel function
int blockSize = 16;
int gridSize = (Math.max(w, h) + blockSize - 1) / blockSize;
cuLaunchKernel(kernelFunction, gridSize, gridSize, 1, // Grid dimension
blockSize, blockSize, 1, // Block dimension
0, null, // Shared memory size and stream
kernelParameters, null // Kernel- and extra parameters
);
cuCtxSynchronize();

// Allocate host output memory and copy the device output
// to the host.
int hostOutput = new int[numElements];
cuMemcpyDtoH(Pointer.to(hostOutput), deviceOutput, numElements * Sizeof.INT);

// Clean up.
cuMemFree(deviceOutput);
cuMemFree(pointer);

return hostOutput;
}

public static CUfunction initlize() {

// Enable exceptions and omit all subsequent error checks
JCudaDriver.setExceptionsEnabled(true);
JNvrtc.setExceptionsEnabled(true);

// Initialize the driver and create a context for the first device.
cuInit(0);
CUdevice device = new CUdevice();
cuDeviceGet(device, 0);
CUcontext context = new CUcontext();
cuCtxCreate(context, 0, device);

// Obtain the CUDA source code from the CUDA file
String cuFileName = "Skintone.cu";
String sourceCode = CudaUtils.readResourceAsString(cuFileName);
if (sourceCode == null) {
IJ.showMessage("Error", "Could not read the kernel source code");
}

// Create the kernel function
return CudaUtils.createFunction(sourceCode, "skintone");
}

public static void CalculateSKintoneCPU(File file) throws IOException {
BufferedImage bufferedImage = ImageIO.read(file);
if (bufferedImage == null || bufferedImage.getData() == null)
return;
Raster raster = bufferedImage.getData();
float rgb = new float[4];
int totalPixels = raster.getHeight() * raster.getWidth();

int skinTonePixels = 0;

for (int x = 0; x < raster.getWidth(); x++) {
for (int y = 0; y < raster.getHeight(); y++) {
raster.getPixel(x, y, rgb);
if (skintone(rgb)) {
skinTonePixels++;
}
}
}

// Flushing memory
raster = null;
rgb = null;
bufferedImage.flush();
bufferedImage = null;

long skintoneThreshold = Math.round(skinTonePixels / (double) totalPixels * 100.0);

System.err.println("Skintone Using CPU: " + skinTonePixels);
System.err.println("Total Pixel Of CPU: " + totalPixels);
System.err.println("SKinTone Percentage Using CPU: " + skintoneThreshold + "%");
}

private static boolean skintone(float rgb) {
float yCbCr = (float) convertRGBtoYUV(rgb);
if ((yCbCr[1] >= 80 && yCbCr[1] <= 120) && (yCbCr[2] >= 133 && yCbCr[2] <= 173)) {
return true;
}
return false;
}

private static float convertRGBtoYUV(float rgb) {
final float yCbCr = new float[3];
float r = rgb[0];
float g = rgb[1];
float b = rgb[2];

yCbCr[0] = 16 + (0.299f * r) + (0.587f * g) + (0.144f * b);
yCbCr[1] = 128 + (-0.169f * r) - (0.331f * g) + (0.5f * b);
yCbCr[2] = 128 + (0.5f * r) - (0.419f * g) - (0.081f * b);

return yCbCr;
}

public static void main(String args) throws IOException {
File file = new File("C:\Users\Aqeel\git\jcuda-imagej-example\src\test\resources\lena512color.png");
CalculateSKintoneCPU(file);
CalculateSKintoneGPU(file);
}


}



Kernal File



    extern "C"
__global__ void skintone(uchar4* data, int w, int h, int* output)
{
int x = threadIdx.x+blockIdx.x*blockDim.x;
int y = threadIdx.y+blockIdx.y*blockDim.y;

if (x < w && y < h)
{
float r, g, b;
float cb, cr;

int index = y*w+x;
uchar4 pixel = data[index];

r = pixel.x;
g = pixel.y;
b = pixel.z;

cb = 128 + (-0.169f * r) - (0.331f * g) + (0.5f * b);
cr = 128 + (0.5f * r) - (0.419f * g) - (0.081f * b);


if((cb >= 80 && cb <= 120) && (cr >= 133 && cr <= 173)) {
atomicAdd(&output[0], 1);
}
}
}


Complete Example src, Machine Need Nvida Card, Cuda Toolkit V9 and Graphics Drivers










share|improve this question




















  • 1




    What's your sample? What did you expect to come out? What did come out? What debugging did you do - where in the code do you think that the problem is? Looks like the problem could be in the C code, better to tag it with C than with Java then.
    – Erwin Bolwidt
    Nov 21 at 21:25










  • I add the code with the question. Thanks for suggesting to tag with the C code.
    – Aqeel Haider
    Nov 21 at 21:26










  • My output with a sample image is: Skintone Using CPU: 75417 Total Pixel Of CPU: 262144 SKinTone Percentage Using CPU: 29% Skintone Using GPU: 29269 Total Pixel Of GPU: 262144 SKinTone Percentage Using GPU: 11% They should be same.
    – Aqeel Haider
    Nov 21 at 21:28













up vote
-1
down vote

favorite









up vote
-1
down vote

favorite











Im calculting skintone of an image in java.




  1. convert the pixel of Image in yCbCR.

  2. check if image pixel is in specific range, then its a skin color.

  3. calculate percentage by dividing it by total pixel.


Its working fine in CPU code, but when i convert it to GPU code, The pixel percentage is not coming right.



The confusing part for me was send the pixel data to GPU and get its r, g, b value in GPU.



So i follow JCuda Pixel Invert Example example to send pixel data. The difference is the example send pixel data in int array and I'm sending it in byte array.



Here the code.



import static jcuda.driver.JCudaDriver.cuCtxCreate;
import static jcuda.driver.JCudaDriver.cuCtxSynchronize;
import static jcuda.driver.JCudaDriver.cuDeviceGet;
import static jcuda.driver.JCudaDriver.cuInit;
import static jcuda.driver.JCudaDriver.cuLaunchKernel;
import static jcuda.driver.JCudaDriver.cuMemAlloc;
import static jcuda.driver.JCudaDriver.cuMemFree;
import static jcuda.driver.JCudaDriver.cuMemcpyDtoH;
import static jcuda.driver.JCudaDriver.cuMemcpyHtoD;

import java.awt.image.BufferedImage;
import java.awt.image.DataBuffer;
import java.awt.image.DataBufferByte;
import java.awt.image.Raster;
import java.io.File;
import java.io.IOException;

import javax.imageio.ImageIO;

import ij.IJ;
import jcuda.Pointer;
import jcuda.Sizeof;
import jcuda.driver.CUcontext;
import jcuda.driver.CUdevice;
import jcuda.driver.CUdeviceptr;
import jcuda.driver.CUfunction;
import jcuda.driver.JCudaDriver;
import jcuda.nvrtc.JNvrtc;

public class SkinTone {

public static void CalculateSKintoneGPU(File file) throws IOException {
BufferedImage bufferedImage = ImageIO.read(file);
if (bufferedImage == null || bufferedImage.getData() == null)
return;
Raster raster = bufferedImage.getData();

DataBuffer dataBuffer = raster.getDataBuffer();
DataBufferByte dataBufferInt = (DataBufferByte)dataBuffer;
byte pixels = dataBufferInt.getData();

int totalPixels = raster.getHeight() * raster.getWidth();

CUfunction kernelFunction = initlize();

int output = execute(kernelFunction, pixels, raster.getWidth(), raster.getHeight());
// Flushing memory
raster = null;
bufferedImage.flush();
bufferedImage = null;

long skintoneThreshold = Math.round(output[0] / (double) totalPixels * 100.0);

System.err.println("Skintone Using GPU: " + output[0]);
System.err.println("Total Pixel Of GPU: " + totalPixels);
System.err.println("SKinTone Percentage Using GPU: " + skintoneThreshold + "%");
}

static int execute(CUfunction kernelFunction, byte pixels, int w, int h) {
// Allocate memory on the device, and copy the host data to the device
int size = w * h * Sizeof.BYTE;
CUdeviceptr pointer = new CUdeviceptr();
cuMemAlloc(pointer, size);
cuMemcpyHtoD(pointer, Pointer.to(pixels), size);

int numElements = 9;
int s = 0;
// Allocate device output memory
CUdeviceptr deviceOutput = new CUdeviceptr();
cuMemAlloc(deviceOutput, numElements * Sizeof.INT);

// Set up the kernel parameters: A pointer to an array
// of pointers which point to the actual values.
Pointer kernelParameters = Pointer.to(Pointer.to(pointer), Pointer.to(new int { w }),
Pointer.to(new int { h }), Pointer.to(deviceOutput));

// Call the kernel function
int blockSize = 16;
int gridSize = (Math.max(w, h) + blockSize - 1) / blockSize;
cuLaunchKernel(kernelFunction, gridSize, gridSize, 1, // Grid dimension
blockSize, blockSize, 1, // Block dimension
0, null, // Shared memory size and stream
kernelParameters, null // Kernel- and extra parameters
);
cuCtxSynchronize();

// Allocate host output memory and copy the device output
// to the host.
int hostOutput = new int[numElements];
cuMemcpyDtoH(Pointer.to(hostOutput), deviceOutput, numElements * Sizeof.INT);

// Clean up.
cuMemFree(deviceOutput);
cuMemFree(pointer);

return hostOutput;
}

public static CUfunction initlize() {

// Enable exceptions and omit all subsequent error checks
JCudaDriver.setExceptionsEnabled(true);
JNvrtc.setExceptionsEnabled(true);

// Initialize the driver and create a context for the first device.
cuInit(0);
CUdevice device = new CUdevice();
cuDeviceGet(device, 0);
CUcontext context = new CUcontext();
cuCtxCreate(context, 0, device);

// Obtain the CUDA source code from the CUDA file
String cuFileName = "Skintone.cu";
String sourceCode = CudaUtils.readResourceAsString(cuFileName);
if (sourceCode == null) {
IJ.showMessage("Error", "Could not read the kernel source code");
}

// Create the kernel function
return CudaUtils.createFunction(sourceCode, "skintone");
}

public static void CalculateSKintoneCPU(File file) throws IOException {
BufferedImage bufferedImage = ImageIO.read(file);
if (bufferedImage == null || bufferedImage.getData() == null)
return;
Raster raster = bufferedImage.getData();
float rgb = new float[4];
int totalPixels = raster.getHeight() * raster.getWidth();

int skinTonePixels = 0;

for (int x = 0; x < raster.getWidth(); x++) {
for (int y = 0; y < raster.getHeight(); y++) {
raster.getPixel(x, y, rgb);
if (skintone(rgb)) {
skinTonePixels++;
}
}
}

// Flushing memory
raster = null;
rgb = null;
bufferedImage.flush();
bufferedImage = null;

long skintoneThreshold = Math.round(skinTonePixels / (double) totalPixels * 100.0);

System.err.println("Skintone Using CPU: " + skinTonePixels);
System.err.println("Total Pixel Of CPU: " + totalPixels);
System.err.println("SKinTone Percentage Using CPU: " + skintoneThreshold + "%");
}

private static boolean skintone(float rgb) {
float yCbCr = (float) convertRGBtoYUV(rgb);
if ((yCbCr[1] >= 80 && yCbCr[1] <= 120) && (yCbCr[2] >= 133 && yCbCr[2] <= 173)) {
return true;
}
return false;
}

private static float convertRGBtoYUV(float rgb) {
final float yCbCr = new float[3];
float r = rgb[0];
float g = rgb[1];
float b = rgb[2];

yCbCr[0] = 16 + (0.299f * r) + (0.587f * g) + (0.144f * b);
yCbCr[1] = 128 + (-0.169f * r) - (0.331f * g) + (0.5f * b);
yCbCr[2] = 128 + (0.5f * r) - (0.419f * g) - (0.081f * b);

return yCbCr;
}

public static void main(String args) throws IOException {
File file = new File("C:\Users\Aqeel\git\jcuda-imagej-example\src\test\resources\lena512color.png");
CalculateSKintoneCPU(file);
CalculateSKintoneGPU(file);
}


}



Kernal File



    extern "C"
__global__ void skintone(uchar4* data, int w, int h, int* output)
{
int x = threadIdx.x+blockIdx.x*blockDim.x;
int y = threadIdx.y+blockIdx.y*blockDim.y;

if (x < w && y < h)
{
float r, g, b;
float cb, cr;

int index = y*w+x;
uchar4 pixel = data[index];

r = pixel.x;
g = pixel.y;
b = pixel.z;

cb = 128 + (-0.169f * r) - (0.331f * g) + (0.5f * b);
cr = 128 + (0.5f * r) - (0.419f * g) - (0.081f * b);


if((cb >= 80 && cb <= 120) && (cr >= 133 && cr <= 173)) {
atomicAdd(&output[0], 1);
}
}
}


Complete Example src, Machine Need Nvida Card, Cuda Toolkit V9 and Graphics Drivers










share|improve this question















Im calculting skintone of an image in java.




  1. convert the pixel of Image in yCbCR.

  2. check if image pixel is in specific range, then its a skin color.

  3. calculate percentage by dividing it by total pixel.


Its working fine in CPU code, but when i convert it to GPU code, The pixel percentage is not coming right.



The confusing part for me was send the pixel data to GPU and get its r, g, b value in GPU.



So i follow JCuda Pixel Invert Example example to send pixel data. The difference is the example send pixel data in int array and I'm sending it in byte array.



Here the code.



import static jcuda.driver.JCudaDriver.cuCtxCreate;
import static jcuda.driver.JCudaDriver.cuCtxSynchronize;
import static jcuda.driver.JCudaDriver.cuDeviceGet;
import static jcuda.driver.JCudaDriver.cuInit;
import static jcuda.driver.JCudaDriver.cuLaunchKernel;
import static jcuda.driver.JCudaDriver.cuMemAlloc;
import static jcuda.driver.JCudaDriver.cuMemFree;
import static jcuda.driver.JCudaDriver.cuMemcpyDtoH;
import static jcuda.driver.JCudaDriver.cuMemcpyHtoD;

import java.awt.image.BufferedImage;
import java.awt.image.DataBuffer;
import java.awt.image.DataBufferByte;
import java.awt.image.Raster;
import java.io.File;
import java.io.IOException;

import javax.imageio.ImageIO;

import ij.IJ;
import jcuda.Pointer;
import jcuda.Sizeof;
import jcuda.driver.CUcontext;
import jcuda.driver.CUdevice;
import jcuda.driver.CUdeviceptr;
import jcuda.driver.CUfunction;
import jcuda.driver.JCudaDriver;
import jcuda.nvrtc.JNvrtc;

public class SkinTone {

public static void CalculateSKintoneGPU(File file) throws IOException {
BufferedImage bufferedImage = ImageIO.read(file);
if (bufferedImage == null || bufferedImage.getData() == null)
return;
Raster raster = bufferedImage.getData();

DataBuffer dataBuffer = raster.getDataBuffer();
DataBufferByte dataBufferInt = (DataBufferByte)dataBuffer;
byte pixels = dataBufferInt.getData();

int totalPixels = raster.getHeight() * raster.getWidth();

CUfunction kernelFunction = initlize();

int output = execute(kernelFunction, pixels, raster.getWidth(), raster.getHeight());
// Flushing memory
raster = null;
bufferedImage.flush();
bufferedImage = null;

long skintoneThreshold = Math.round(output[0] / (double) totalPixels * 100.0);

System.err.println("Skintone Using GPU: " + output[0]);
System.err.println("Total Pixel Of GPU: " + totalPixels);
System.err.println("SKinTone Percentage Using GPU: " + skintoneThreshold + "%");
}

static int execute(CUfunction kernelFunction, byte pixels, int w, int h) {
// Allocate memory on the device, and copy the host data to the device
int size = w * h * Sizeof.BYTE;
CUdeviceptr pointer = new CUdeviceptr();
cuMemAlloc(pointer, size);
cuMemcpyHtoD(pointer, Pointer.to(pixels), size);

int numElements = 9;
int s = 0;
// Allocate device output memory
CUdeviceptr deviceOutput = new CUdeviceptr();
cuMemAlloc(deviceOutput, numElements * Sizeof.INT);

// Set up the kernel parameters: A pointer to an array
// of pointers which point to the actual values.
Pointer kernelParameters = Pointer.to(Pointer.to(pointer), Pointer.to(new int { w }),
Pointer.to(new int { h }), Pointer.to(deviceOutput));

// Call the kernel function
int blockSize = 16;
int gridSize = (Math.max(w, h) + blockSize - 1) / blockSize;
cuLaunchKernel(kernelFunction, gridSize, gridSize, 1, // Grid dimension
blockSize, blockSize, 1, // Block dimension
0, null, // Shared memory size and stream
kernelParameters, null // Kernel- and extra parameters
);
cuCtxSynchronize();

// Allocate host output memory and copy the device output
// to the host.
int hostOutput = new int[numElements];
cuMemcpyDtoH(Pointer.to(hostOutput), deviceOutput, numElements * Sizeof.INT);

// Clean up.
cuMemFree(deviceOutput);
cuMemFree(pointer);

return hostOutput;
}

public static CUfunction initlize() {

// Enable exceptions and omit all subsequent error checks
JCudaDriver.setExceptionsEnabled(true);
JNvrtc.setExceptionsEnabled(true);

// Initialize the driver and create a context for the first device.
cuInit(0);
CUdevice device = new CUdevice();
cuDeviceGet(device, 0);
CUcontext context = new CUcontext();
cuCtxCreate(context, 0, device);

// Obtain the CUDA source code from the CUDA file
String cuFileName = "Skintone.cu";
String sourceCode = CudaUtils.readResourceAsString(cuFileName);
if (sourceCode == null) {
IJ.showMessage("Error", "Could not read the kernel source code");
}

// Create the kernel function
return CudaUtils.createFunction(sourceCode, "skintone");
}

public static void CalculateSKintoneCPU(File file) throws IOException {
BufferedImage bufferedImage = ImageIO.read(file);
if (bufferedImage == null || bufferedImage.getData() == null)
return;
Raster raster = bufferedImage.getData();
float rgb = new float[4];
int totalPixels = raster.getHeight() * raster.getWidth();

int skinTonePixels = 0;

for (int x = 0; x < raster.getWidth(); x++) {
for (int y = 0; y < raster.getHeight(); y++) {
raster.getPixel(x, y, rgb);
if (skintone(rgb)) {
skinTonePixels++;
}
}
}

// Flushing memory
raster = null;
rgb = null;
bufferedImage.flush();
bufferedImage = null;

long skintoneThreshold = Math.round(skinTonePixels / (double) totalPixels * 100.0);

System.err.println("Skintone Using CPU: " + skinTonePixels);
System.err.println("Total Pixel Of CPU: " + totalPixels);
System.err.println("SKinTone Percentage Using CPU: " + skintoneThreshold + "%");
}

private static boolean skintone(float rgb) {
float yCbCr = (float) convertRGBtoYUV(rgb);
if ((yCbCr[1] >= 80 && yCbCr[1] <= 120) && (yCbCr[2] >= 133 && yCbCr[2] <= 173)) {
return true;
}
return false;
}

private static float convertRGBtoYUV(float rgb) {
final float yCbCr = new float[3];
float r = rgb[0];
float g = rgb[1];
float b = rgb[2];

yCbCr[0] = 16 + (0.299f * r) + (0.587f * g) + (0.144f * b);
yCbCr[1] = 128 + (-0.169f * r) - (0.331f * g) + (0.5f * b);
yCbCr[2] = 128 + (0.5f * r) - (0.419f * g) - (0.081f * b);

return yCbCr;
}

public static void main(String args) throws IOException {
File file = new File("C:\Users\Aqeel\git\jcuda-imagej-example\src\test\resources\lena512color.png");
CalculateSKintoneCPU(file);
CalculateSKintoneGPU(file);
}


}



Kernal File



    extern "C"
__global__ void skintone(uchar4* data, int w, int h, int* output)
{
int x = threadIdx.x+blockIdx.x*blockDim.x;
int y = threadIdx.y+blockIdx.y*blockDim.y;

if (x < w && y < h)
{
float r, g, b;
float cb, cr;

int index = y*w+x;
uchar4 pixel = data[index];

r = pixel.x;
g = pixel.y;
b = pixel.z;

cb = 128 + (-0.169f * r) - (0.331f * g) + (0.5f * b);
cr = 128 + (0.5f * r) - (0.419f * g) - (0.081f * b);


if((cb >= 80 && cb <= 120) && (cr >= 133 && cr <= 173)) {
atomicAdd(&output[0], 1);
}
}
}


Complete Example src, Machine Need Nvida Card, Cuda Toolkit V9 and Graphics Drivers







java c cuda gpu jcuda






share|improve this question















share|improve this question













share|improve this question




share|improve this question








edited Nov 22 at 10:15

























asked Nov 21 at 21:19









Aqeel Haider

196313




196313








  • 1




    What's your sample? What did you expect to come out? What did come out? What debugging did you do - where in the code do you think that the problem is? Looks like the problem could be in the C code, better to tag it with C than with Java then.
    – Erwin Bolwidt
    Nov 21 at 21:25










  • I add the code with the question. Thanks for suggesting to tag with the C code.
    – Aqeel Haider
    Nov 21 at 21:26










  • My output with a sample image is: Skintone Using CPU: 75417 Total Pixel Of CPU: 262144 SKinTone Percentage Using CPU: 29% Skintone Using GPU: 29269 Total Pixel Of GPU: 262144 SKinTone Percentage Using GPU: 11% They should be same.
    – Aqeel Haider
    Nov 21 at 21:28














  • 1




    What's your sample? What did you expect to come out? What did come out? What debugging did you do - where in the code do you think that the problem is? Looks like the problem could be in the C code, better to tag it with C than with Java then.
    – Erwin Bolwidt
    Nov 21 at 21:25










  • I add the code with the question. Thanks for suggesting to tag with the C code.
    – Aqeel Haider
    Nov 21 at 21:26










  • My output with a sample image is: Skintone Using CPU: 75417 Total Pixel Of CPU: 262144 SKinTone Percentage Using CPU: 29% Skintone Using GPU: 29269 Total Pixel Of GPU: 262144 SKinTone Percentage Using GPU: 11% They should be same.
    – Aqeel Haider
    Nov 21 at 21:28








1




1




What's your sample? What did you expect to come out? What did come out? What debugging did you do - where in the code do you think that the problem is? Looks like the problem could be in the C code, better to tag it with C than with Java then.
– Erwin Bolwidt
Nov 21 at 21:25




What's your sample? What did you expect to come out? What did come out? What debugging did you do - where in the code do you think that the problem is? Looks like the problem could be in the C code, better to tag it with C than with Java then.
– Erwin Bolwidt
Nov 21 at 21:25












I add the code with the question. Thanks for suggesting to tag with the C code.
– Aqeel Haider
Nov 21 at 21:26




I add the code with the question. Thanks for suggesting to tag with the C code.
– Aqeel Haider
Nov 21 at 21:26












My output with a sample image is: Skintone Using CPU: 75417 Total Pixel Of CPU: 262144 SKinTone Percentage Using CPU: 29% Skintone Using GPU: 29269 Total Pixel Of GPU: 262144 SKinTone Percentage Using GPU: 11% They should be same.
– Aqeel Haider
Nov 21 at 21:28




My output with a sample image is: Skintone Using CPU: 75417 Total Pixel Of CPU: 262144 SKinTone Percentage Using CPU: 29% Skintone Using GPU: 29269 Total Pixel Of GPU: 262144 SKinTone Percentage Using GPU: 11% They should be same.
– Aqeel Haider
Nov 21 at 21:28












1 Answer
1






active

oldest

votes

















up vote
0
down vote



accepted










I solve the problem by hit and trial method. In the kernel i change the position of r with b, and the problem resolved, also instead of byte i have to send the code in int array in java.



extern "C"
__global__ void skintone(uchar4* data, int w, int h, int* output)
{
int x = threadIdx.x+blockIdx.x*blockDim.x;
int y = threadIdx.y+blockIdx.y*blockDim.y;

if (x < w && y < h)
{
float b, g, r;
float cb, cr;

int index = y*w+x;
uchar4 pixel = data[index];

b = (float)pixel.x;
g = (float)pixel.y;
r = (float)pixel.z;

cb = 128 + (-0.169f * r) - (0.331f * g) + (0.5f * b);
cr = 128 + (0.5f * r) - (0.419f * g) - (0.081f * b);


if((cb >= 80 && cb <= 120) && (cr >= 133 && cr <= 173)) {
atomicAdd(&output[0], 1);
}
}
}


Java Code Changes.



public static void calculateSkintoneGPU() throws IOException {
BufferedImage img = ImageIO.read(SkinTone.class.getClassLoader().getResource("images.jpg"));
if (img == null || img.getData() == null)
return;

int width = img.getWidth(null);
int height = img.getHeight(null);
int pixels = new int[width * height];
PixelGrabber pg = new PixelGrabber(img, 0, 0, width, height, pixels, 0, width);
try {
pg.grabPixels();
} catch (InterruptedException e){};

int totalPixels = width * height;

CUfunction kernelFunction = initlize();

int output = execute(kernelFunction, pixels, width, height);
// Flushing memory
img.flush();
img = null;

long skintoneThreshold = Math.round(output[0] / (double) totalPixels * 100.0);

System.err.println("Skintone Using GPU: " + output[0]);
System.err.println("Total Pixel Of GPU: " + totalPixels);
System.err.println("SKinTone Percentage Using GPU: " + skintoneThreshold + "%");
}





share|improve this answer





















    Your Answer






    StackExchange.ifUsing("editor", function () {
    StackExchange.using("externalEditor", function () {
    StackExchange.using("snippets", function () {
    StackExchange.snippets.init();
    });
    });
    }, "code-snippets");

    StackExchange.ready(function() {
    var channelOptions = {
    tags: "".split(" "),
    id: "1"
    };
    initTagRenderer("".split(" "), "".split(" "), channelOptions);

    StackExchange.using("externalEditor", function() {
    // Have to fire editor after snippets, if snippets enabled
    if (StackExchange.settings.snippets.snippetsEnabled) {
    StackExchange.using("snippets", function() {
    createEditor();
    });
    }
    else {
    createEditor();
    }
    });

    function createEditor() {
    StackExchange.prepareEditor({
    heartbeatType: 'answer',
    convertImagesToLinks: true,
    noModals: true,
    showLowRepImageUploadWarning: true,
    reputationToPostImages: 10,
    bindNavPrevention: true,
    postfix: "",
    imageUploader: {
    brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
    contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
    allowUrls: true
    },
    onDemand: true,
    discardSelector: ".discard-answer"
    ,immediatelyShowMarkdownHelp:true
    });


    }
    });














     

    draft saved


    draft discarded


















    StackExchange.ready(
    function () {
    StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53420617%2fcalculate-skintone-using-jcuda-is-not-giving-the-right-percentage%23new-answer', 'question_page');
    }
    );

    Post as a guest















    Required, but never shown

























    1 Answer
    1






    active

    oldest

    votes








    1 Answer
    1






    active

    oldest

    votes









    active

    oldest

    votes






    active

    oldest

    votes








    up vote
    0
    down vote



    accepted










    I solve the problem by hit and trial method. In the kernel i change the position of r with b, and the problem resolved, also instead of byte i have to send the code in int array in java.



    extern "C"
    __global__ void skintone(uchar4* data, int w, int h, int* output)
    {
    int x = threadIdx.x+blockIdx.x*blockDim.x;
    int y = threadIdx.y+blockIdx.y*blockDim.y;

    if (x < w && y < h)
    {
    float b, g, r;
    float cb, cr;

    int index = y*w+x;
    uchar4 pixel = data[index];

    b = (float)pixel.x;
    g = (float)pixel.y;
    r = (float)pixel.z;

    cb = 128 + (-0.169f * r) - (0.331f * g) + (0.5f * b);
    cr = 128 + (0.5f * r) - (0.419f * g) - (0.081f * b);


    if((cb >= 80 && cb <= 120) && (cr >= 133 && cr <= 173)) {
    atomicAdd(&output[0], 1);
    }
    }
    }


    Java Code Changes.



    public static void calculateSkintoneGPU() throws IOException {
    BufferedImage img = ImageIO.read(SkinTone.class.getClassLoader().getResource("images.jpg"));
    if (img == null || img.getData() == null)
    return;

    int width = img.getWidth(null);
    int height = img.getHeight(null);
    int pixels = new int[width * height];
    PixelGrabber pg = new PixelGrabber(img, 0, 0, width, height, pixels, 0, width);
    try {
    pg.grabPixels();
    } catch (InterruptedException e){};

    int totalPixels = width * height;

    CUfunction kernelFunction = initlize();

    int output = execute(kernelFunction, pixels, width, height);
    // Flushing memory
    img.flush();
    img = null;

    long skintoneThreshold = Math.round(output[0] / (double) totalPixels * 100.0);

    System.err.println("Skintone Using GPU: " + output[0]);
    System.err.println("Total Pixel Of GPU: " + totalPixels);
    System.err.println("SKinTone Percentage Using GPU: " + skintoneThreshold + "%");
    }





    share|improve this answer

























      up vote
      0
      down vote



      accepted










      I solve the problem by hit and trial method. In the kernel i change the position of r with b, and the problem resolved, also instead of byte i have to send the code in int array in java.



      extern "C"
      __global__ void skintone(uchar4* data, int w, int h, int* output)
      {
      int x = threadIdx.x+blockIdx.x*blockDim.x;
      int y = threadIdx.y+blockIdx.y*blockDim.y;

      if (x < w && y < h)
      {
      float b, g, r;
      float cb, cr;

      int index = y*w+x;
      uchar4 pixel = data[index];

      b = (float)pixel.x;
      g = (float)pixel.y;
      r = (float)pixel.z;

      cb = 128 + (-0.169f * r) - (0.331f * g) + (0.5f * b);
      cr = 128 + (0.5f * r) - (0.419f * g) - (0.081f * b);


      if((cb >= 80 && cb <= 120) && (cr >= 133 && cr <= 173)) {
      atomicAdd(&output[0], 1);
      }
      }
      }


      Java Code Changes.



      public static void calculateSkintoneGPU() throws IOException {
      BufferedImage img = ImageIO.read(SkinTone.class.getClassLoader().getResource("images.jpg"));
      if (img == null || img.getData() == null)
      return;

      int width = img.getWidth(null);
      int height = img.getHeight(null);
      int pixels = new int[width * height];
      PixelGrabber pg = new PixelGrabber(img, 0, 0, width, height, pixels, 0, width);
      try {
      pg.grabPixels();
      } catch (InterruptedException e){};

      int totalPixels = width * height;

      CUfunction kernelFunction = initlize();

      int output = execute(kernelFunction, pixels, width, height);
      // Flushing memory
      img.flush();
      img = null;

      long skintoneThreshold = Math.round(output[0] / (double) totalPixels * 100.0);

      System.err.println("Skintone Using GPU: " + output[0]);
      System.err.println("Total Pixel Of GPU: " + totalPixels);
      System.err.println("SKinTone Percentage Using GPU: " + skintoneThreshold + "%");
      }





      share|improve this answer























        up vote
        0
        down vote



        accepted







        up vote
        0
        down vote



        accepted






        I solve the problem by hit and trial method. In the kernel i change the position of r with b, and the problem resolved, also instead of byte i have to send the code in int array in java.



        extern "C"
        __global__ void skintone(uchar4* data, int w, int h, int* output)
        {
        int x = threadIdx.x+blockIdx.x*blockDim.x;
        int y = threadIdx.y+blockIdx.y*blockDim.y;

        if (x < w && y < h)
        {
        float b, g, r;
        float cb, cr;

        int index = y*w+x;
        uchar4 pixel = data[index];

        b = (float)pixel.x;
        g = (float)pixel.y;
        r = (float)pixel.z;

        cb = 128 + (-0.169f * r) - (0.331f * g) + (0.5f * b);
        cr = 128 + (0.5f * r) - (0.419f * g) - (0.081f * b);


        if((cb >= 80 && cb <= 120) && (cr >= 133 && cr <= 173)) {
        atomicAdd(&output[0], 1);
        }
        }
        }


        Java Code Changes.



        public static void calculateSkintoneGPU() throws IOException {
        BufferedImage img = ImageIO.read(SkinTone.class.getClassLoader().getResource("images.jpg"));
        if (img == null || img.getData() == null)
        return;

        int width = img.getWidth(null);
        int height = img.getHeight(null);
        int pixels = new int[width * height];
        PixelGrabber pg = new PixelGrabber(img, 0, 0, width, height, pixels, 0, width);
        try {
        pg.grabPixels();
        } catch (InterruptedException e){};

        int totalPixels = width * height;

        CUfunction kernelFunction = initlize();

        int output = execute(kernelFunction, pixels, width, height);
        // Flushing memory
        img.flush();
        img = null;

        long skintoneThreshold = Math.round(output[0] / (double) totalPixels * 100.0);

        System.err.println("Skintone Using GPU: " + output[0]);
        System.err.println("Total Pixel Of GPU: " + totalPixels);
        System.err.println("SKinTone Percentage Using GPU: " + skintoneThreshold + "%");
        }





        share|improve this answer












        I solve the problem by hit and trial method. In the kernel i change the position of r with b, and the problem resolved, also instead of byte i have to send the code in int array in java.



        extern "C"
        __global__ void skintone(uchar4* data, int w, int h, int* output)
        {
        int x = threadIdx.x+blockIdx.x*blockDim.x;
        int y = threadIdx.y+blockIdx.y*blockDim.y;

        if (x < w && y < h)
        {
        float b, g, r;
        float cb, cr;

        int index = y*w+x;
        uchar4 pixel = data[index];

        b = (float)pixel.x;
        g = (float)pixel.y;
        r = (float)pixel.z;

        cb = 128 + (-0.169f * r) - (0.331f * g) + (0.5f * b);
        cr = 128 + (0.5f * r) - (0.419f * g) - (0.081f * b);


        if((cb >= 80 && cb <= 120) && (cr >= 133 && cr <= 173)) {
        atomicAdd(&output[0], 1);
        }
        }
        }


        Java Code Changes.



        public static void calculateSkintoneGPU() throws IOException {
        BufferedImage img = ImageIO.read(SkinTone.class.getClassLoader().getResource("images.jpg"));
        if (img == null || img.getData() == null)
        return;

        int width = img.getWidth(null);
        int height = img.getHeight(null);
        int pixels = new int[width * height];
        PixelGrabber pg = new PixelGrabber(img, 0, 0, width, height, pixels, 0, width);
        try {
        pg.grabPixels();
        } catch (InterruptedException e){};

        int totalPixels = width * height;

        CUfunction kernelFunction = initlize();

        int output = execute(kernelFunction, pixels, width, height);
        // Flushing memory
        img.flush();
        img = null;

        long skintoneThreshold = Math.round(output[0] / (double) totalPixels * 100.0);

        System.err.println("Skintone Using GPU: " + output[0]);
        System.err.println("Total Pixel Of GPU: " + totalPixels);
        System.err.println("SKinTone Percentage Using GPU: " + skintoneThreshold + "%");
        }






        share|improve this answer












        share|improve this answer



        share|improve this answer










        answered Nov 22 at 17:16









        Aqeel Haider

        196313




        196313






























             

            draft saved


            draft discarded



















































             


            draft saved


            draft discarded














            StackExchange.ready(
            function () {
            StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53420617%2fcalculate-skintone-using-jcuda-is-not-giving-the-right-percentage%23new-answer', 'question_page');
            }
            );

            Post as a guest















            Required, but never shown





















































            Required, but never shown














            Required, but never shown












            Required, but never shown







            Required, but never shown

































            Required, but never shown














            Required, but never shown












            Required, but never shown







            Required, but never shown







            Popular posts from this blog

            How to ignore python UserWarning in pytest?

            What visual should I use to simply compare current year value vs last year in Power BI desktop

            Script to remove string up to first number