From 7a26673570fd77839672d472b04dbd509395de5b Mon Sep 17 00:00:00 2001 From: Wade Walker Date: Sun, 17 Apr 2016 19:30:53 -0500 Subject: Add new kernel workgroup info functions to CLKernel Added workgroup info functions for OpenCL 1.1, since that's the version the CL Java objects currently wraps. Also added a test that shows how to query values from version 1.2 and later. --- src/com/jogamp/opencl/CLKernel.java | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'src/com/jogamp/opencl') diff --git a/src/com/jogamp/opencl/CLKernel.java b/src/com/jogamp/opencl/CLKernel.java index c3031ae9..4eeb5af9 100644 --- a/src/com/jogamp/opencl/CLKernel.java +++ b/src/com/jogamp/opencl/CLKernel.java @@ -315,6 +315,7 @@ public class CLKernel extends CLObjectResource implements Cloneable { * qualifier and whose size is specified with clSetKernelArg. * If the local memory size, for any pointer argument to the kernel declared with * the __local address qualifier, is not specified, its size is assumed to be 0. + * @version 1.0 */ public long getLocalMemorySize(final CLDevice device) { return getWorkGroupInfo(device, CL_KERNEL_LOCAL_MEM_SIZE); @@ -326,6 +327,7 @@ public class CLKernel extends CLObjectResource implements Cloneable { * that can be used to execute a kernel on a specific device given by device. * The OpenCL implementation uses the resource requirements of the kernel * (register usage etc.) to determine what this work-group size should be. + * @version 1.0 */ public long getWorkGroupSize(final CLDevice device) { return getWorkGroupInfo(device, CL_KERNEL_WORK_GROUP_SIZE); @@ -335,6 +337,7 @@ public class CLKernel extends CLObjectResource implements Cloneable { * Returns the work-group size specified by the __attribute__((reqd_work_group_size(X, Y, Z))) qualifier in kernel sources. * If the work-group size is not specified using the above attribute qualifier new long[]{(0, 0, 0)} is returned. * The returned array has always three elements. + * @version 1.0 */ public long[] getCompileWorkGroupSize(final CLDevice device) { final int ret = binding.clGetKernelWorkGroupInfo(ID, device.ID, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, (is32Bit()?4:8)*3, buffer, null); @@ -349,6 +352,25 @@ public class CLKernel extends CLObjectResource implements Cloneable { } } + /** + * Returns the preferred multiple of workgroup size to use for kernel launch. This is only a performance hint; enqueueing + * with other sizes will still work, unless the size is more than the maximum allowed. + * @version 1.1 + */ + public long getPreferredWorkGroupSizeMultiple(final CLDevice device) { + return getWorkGroupInfo(device, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE); + } + + /** + * Returns the number of bytes of private memory used by each work item in the kernel. + * This includes private memory declared with the __private qualifier, as + * well as other private memory used by the implementation. + * @version 1.1 + */ + public long getPrivateMemSize(final CLDevice device) { + return getWorkGroupInfo(device, CL_KERNEL_PRIVATE_MEM_SIZE); + } + private long getWorkGroupInfo(final CLDevice device, final int flag) { final int ret = binding.clGetKernelWorkGroupInfo(ID, device.ID, flag, 8, buffer, null); if(ret != CL_SUCCESS) { -- cgit v1.2.3 From cf5340c0bfc1914073ea1f4fc3ccad83f50dc57d Mon Sep 17 00:00:00 2001 From: Emily Leiviskä Date: Wed, 16 Nov 2016 16:55:30 +0100 Subject: Changes CLMemory's getNIOSize() and getNIOCapacity() to use buffer.limit() instead of capacity() in order to respect the user's desired buffer size in memory operations. --- src/com/jogamp/opencl/CLMemory.java | 4 +-- test/com/jogamp/opencl/CLBufferTest.java | 48 +++++++++++++++++++++++++++++++- 2 files changed, 49 insertions(+), 3 deletions(-) (limited to 'src/com/jogamp/opencl') diff --git a/src/com/jogamp/opencl/CLMemory.java b/src/com/jogamp/opencl/CLMemory.java index 6a28d0a5..00f51b6b 100644 --- a/src/com/jogamp/opencl/CLMemory.java +++ b/src/com/jogamp/opencl/CLMemory.java @@ -145,7 +145,7 @@ public abstract class CLMemory extends CLObjectResource { if(buffer == null) { return 0; } - return buffer.capacity(); + return buffer.limit(); } /** @@ -155,7 +155,7 @@ public abstract class CLMemory extends CLObjectResource { if(buffer == null) { return 0; } - return getElementSize() * buffer.capacity(); + return getElementSize() * buffer.limit(); } /** diff --git a/test/com/jogamp/opencl/CLBufferTest.java b/test/com/jogamp/opencl/CLBufferTest.java index d5995903..de71dfe1 100644 --- a/test/com/jogamp/opencl/CLBufferTest.java +++ b/test/com/jogamp/opencl/CLBufferTest.java @@ -30,7 +30,6 @@ package com.jogamp.opencl; import com.jogamp.opencl.CLMemory.Mem; import com.jogamp.opencl.CLMemory.Map; -import com.jogamp.opencl.test.util.MiscUtils; import com.jogamp.opencl.test.util.UITestCase; import com.jogamp.common.nio.Buffers; import com.jogamp.common.util.Bitstream; @@ -38,6 +37,7 @@ import com.jogamp.common.util.Bitstream; import java.io.IOException; import java.nio.Buffer; import java.nio.ByteBuffer; +import java.nio.ByteOrder; import java.nio.DoubleBuffer; import java.nio.FloatBuffer; import java.nio.IntBuffer; @@ -64,6 +64,52 @@ import static com.jogamp.opencl.CLVersion.*; @FixMethodOrder(MethodSorters.NAME_ASCENDING) public class CLBufferTest extends UITestCase { + @Test + public void cloneWithLimitedBufferTest() { + final int elements = NUM_ELEMENTS; + final int padding = 312; // Arbitrary number + final CLContext context = CLContext.create(); + + final IntBuffer hostBuffer = ByteBuffer.allocateDirect((elements + padding)*SIZEOF_INT).asIntBuffer(); + hostBuffer.limit(elements); + + final CLBuffer deviceBuffer = context.createBuffer(elements*SIZEOF_INT).cloneWith(hostBuffer); + assertEquals(elements, deviceBuffer.getCLCapacity()); + assertEquals(elements*SIZEOF_INT, deviceBuffer.getNIOSize()); + assertEquals(elements, deviceBuffer.getNIOCapacity()); + + context.release(); + } + + @Test + public void copyLimitedSlicedBuffersTest() { + final int size = 4200*SIZEOF_INT; // Arbitrary number that is a multiple of SIZEOF_INT; + final int padding = 307; // Totally arbitrary number > 0 + final CLContext context = CLContext.create(); + final CLCommandQueue queue = context.getDevices()[0].createCommandQueue(); + + // Make a buffer that is offset relative to the originally allocated position and has a limit that is + // not equal to the capacity to test whether all these attributes are correctly handled. + ByteBuffer hostBuffer = ByteBuffer.allocateDirect(size + padding); + hostBuffer.position(padding/2); // Offset the original buffer + hostBuffer = hostBuffer.slice(); // Slice it to have a new buffer that starts at the offset + hostBuffer.limit(size); + hostBuffer.order(ByteOrder.nativeOrder()); // Necessary for comparisons to work later on. + fillBuffer(hostBuffer, 12345); + + final CLBuffer bufferA = context.createBuffer(size).cloneWith(hostBuffer); + final CLBuffer bufferB = context.createByteBuffer(size); + + queue.putWriteBuffer(bufferA, false) + .putCopyBuffer(bufferA, bufferB, bufferA.getNIOSize()) + .putReadBuffer(bufferB, true).finish(); + + hostBuffer.rewind(); + bufferB.buffer.rewind(); + checkIfEqual(hostBuffer, bufferB.buffer, size/SIZEOF_INT); + context.release(); + } + @Test public void createBufferTest() { -- cgit v1.2.3 From 01f69625995299262c11ae6bcbf345119c7b892f Mon Sep 17 00:00:00 2001 From: Emily Leiviskä Date: Wed, 16 Nov 2016 17:10:00 +0100 Subject: Changing CLBuffer#create to respect the limit instead of capacity on the direct buffer that the CLBuffer is being created for as this more closely represents the users intention about the buffer size. --- src/com/jogamp/opencl/CLBuffer.java | 2 +- test/com/jogamp/opencl/CLBufferTest.java | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) (limited to 'src/com/jogamp/opencl') diff --git a/src/com/jogamp/opencl/CLBuffer.java b/src/com/jogamp/opencl/CLBuffer.java index 065de079..81e036fc 100644 --- a/src/com/jogamp/opencl/CLBuffer.java +++ b/src/com/jogamp/opencl/CLBuffer.java @@ -82,7 +82,7 @@ public class CLBuffer extends CLMemory { final CL binding = context.getPlatform().getCLBinding(); final int[] result = new int[1]; - final int size = Buffers.sizeOfBufferElem(directBuffer) * directBuffer.capacity(); + final int size = Buffers.sizeOfBufferElem(directBuffer) * directBuffer.limit(); final long id = binding.clCreateBuffer(context.ID, flags, size, host_ptr, result, 0); CLException.checkForError(result[0], "can not create cl buffer"); diff --git a/test/com/jogamp/opencl/CLBufferTest.java b/test/com/jogamp/opencl/CLBufferTest.java index de71dfe1..0c6e1d11 100644 --- a/test/com/jogamp/opencl/CLBufferTest.java +++ b/test/com/jogamp/opencl/CLBufferTest.java @@ -64,6 +64,26 @@ import static com.jogamp.opencl.CLVersion.*; @FixMethodOrder(MethodSorters.NAME_ASCENDING) public class CLBufferTest extends UITestCase { + @Test + public void createBufferFromLimitedBuffer() { + final int elements = NUM_ELEMENTS; + final int padding = 19*SIZEOF_INT*2; // Totally arbitrary number > 0 divisible by 2*SIZEOF_INT + final CLContext context = CLContext.create(); + + // Make a buffer that is offset relative to the originally allocated position and has a + // limit that is + // not equal to the capacity to test whether all these attributes are correctly handled. + ByteBuffer byteBuffer = ByteBuffer.allocateDirect(elements*SIZEOF_INT + padding); + byteBuffer.position(padding / 2); // Offset the original buffer + IntBuffer intBuffer = byteBuffer.slice().order(ByteOrder.nativeOrder()).asIntBuffer(); // Slice it to have a new buffer that starts at the offset + intBuffer.limit(elements); + + final CLBuffer deviceBuffer = context.createBuffer(intBuffer); + assertEquals(elements, deviceBuffer.getCLCapacity()); + assertEquals(elements * SIZEOF_INT, deviceBuffer.getNIOSize()); + assertEquals(elements, deviceBuffer.getNIOCapacity()); + } + @Test public void cloneWithLimitedBufferTest() { final int elements = NUM_ELEMENTS; -- cgit v1.2.3