summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichael Bien <[email protected]>2010-04-16 00:12:28 +0200
committerMichael Bien <[email protected]>2010-04-16 00:12:28 +0200
commit33d868102305697fd429039aa08f143db843d23a (patch)
treedda8dc620ff353c90e21910a799c21922ae061f5
parent2f6ce2419ec465bb40e076746367ae7073f83f45 (diff)
fixed buffer-offset bug in CLKernel.getCompileWorkGroupSize, optimized buffer size on 32bit systems, enabled test.
-rw-r--r--src/com/jogamp/opencl/CLKernel.java15
-rw-r--r--test/com/jogamp/opencl/CLProgramTest.java6
2 files changed, 15 insertions, 6 deletions
diff --git a/src/com/jogamp/opencl/CLKernel.java b/src/com/jogamp/opencl/CLKernel.java
index 37d355c3..f3a8684f 100644
--- a/src/com/jogamp/opencl/CLKernel.java
+++ b/src/com/jogamp/opencl/CLKernel.java
@@ -2,7 +2,6 @@ package com.jogamp.opencl;
import com.jogamp.opencl.util.CLUtil;
import com.jogamp.common.nio.Buffers;
-import com.jogamp.common.os.Platform;
import com.jogamp.common.nio.Int64Buffer;
import java.nio.Buffer;
import java.nio.ByteBuffer;
@@ -10,6 +9,7 @@ import java.nio.ByteOrder;
import static com.jogamp.opencl.CLException.*;
import static com.jogamp.opencl.CL.*;
+import static com.jogamp.common.os.Platform.*;
/**
* High level abstraction for an OpenCL Kernel.
@@ -37,7 +37,7 @@ public class CLKernel extends CLObject implements CLResource, Cloneable {
CLKernel(CLProgram program, long id) {
super(program.getContext(), id);
this.program = program;
- this.buffer = Buffers.newDirectByteBuffer(8*3);
+ this.buffer = Buffers.newDirectByteBuffer((is32Bit()?4:8)*3);
Int64Buffer size = Int64Buffer.allocateDirect(1);
@@ -112,7 +112,7 @@ public class CLKernel extends CLObject implements CLResource, Cloneable {
// }
public CLKernel setArg(int argumentIndex, CLMemory<?> value) {
- setArgument(argumentIndex, Platform.is32Bit()?4:8, wrap(value.ID));
+ setArgument(argumentIndex, is32Bit()?4:8, wrap(value.ID));
return this;
}
@@ -242,11 +242,16 @@ public class CLKernel extends CLObject implements CLResource, Cloneable {
* The returned array has always three elements.
*/
public long[] getCompileWorkGroupSize(CLDevice device) {
- int ret = cl.clGetKernelWorkGroupInfo(ID, device.ID, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, 8*3, buffer, null);
+ int ret = cl.clGetKernelWorkGroupInfo(ID, device.ID, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, (is32Bit()?4:8)*3, buffer, null);
if(ret != CL_SUCCESS) {
throw newException(ret, "error while asking for CL_KERNEL_COMPILE_WORK_GROUP_SIZE of "+this+" on "+device);
}
- return new long[] { buffer.getLong(0), buffer.getLong(1), buffer.getLong(2) };
+
+ if(is32Bit()) {
+ return new long[] { buffer.getInt(0), buffer.getInt(4), buffer.getInt(8) };
+ }else {
+ return new long[] { buffer.getLong(0), buffer.getLong(8), buffer.getLong(16) };
+ }
}
private long getWorkGroupInfo(CLDevice device, int flag) {
diff --git a/test/com/jogamp/opencl/CLProgramTest.java b/test/com/jogamp/opencl/CLProgramTest.java
index 18ae1e85..e6ce9207 100644
--- a/test/com/jogamp/opencl/CLProgramTest.java
+++ b/test/com/jogamp/opencl/CLProgramTest.java
@@ -239,9 +239,13 @@ public class CLProgramTest {
long[] wgs = kernel.getCompileWorkGroupSize(context.getDevices()[0]);
- // TODO test on other hardware and compare results
out.println("compile workgroup size: " + wgs[0]+" "+wgs[1]+" "+wgs[2]);
+ assertEquals(512, wgs[0]);
+ assertEquals(512, wgs[1]);
+ assertEquals(512, wgs[2]);
+
+
}finally{
context.release();
}