diff options
author | Sven Gothel <[email protected]> | 2023-07-12 01:49:31 +0200 |
---|---|---|
committer | Sven Gothel <[email protected]> | 2023-07-12 01:49:31 +0200 |
commit | 791eb1b2ae3001f04d59a61f634161e21d96ef6d (patch) | |
tree | 24be59b1a6bcbbec4e09c972d461ae2fe69078b9 | |
parent | dc1424050e47cd239ad26c4f12fb3a0e4289d682 (diff) | |
parent | 2ad07a7e3e8cf1fc29fe7cbb3256c4c2cda27d02 (diff) |
Merge remote-tracking branch 'wwalker/master'v2.5.0
-rw-r--r-- | src/com/jogamp/opencl/CLBuffer.java | 2 | ||||
-rw-r--r-- | src/com/jogamp/opencl/CLKernel.java | 22 | ||||
-rw-r--r-- | src/com/jogamp/opencl/CLMemory.java | 4 | ||||
-rw-r--r-- | test/com/jogamp/opencl/CLBufferTest.java | 68 | ||||
-rw-r--r-- | test/com/jogamp/opencl/CLProgramTest.java | 30 |
5 files changed, 122 insertions, 4 deletions
diff --git a/src/com/jogamp/opencl/CLBuffer.java b/src/com/jogamp/opencl/CLBuffer.java index 065de079..81e036fc 100644 --- a/src/com/jogamp/opencl/CLBuffer.java +++ b/src/com/jogamp/opencl/CLBuffer.java @@ -82,7 +82,7 @@ public class CLBuffer<B extends Buffer> extends CLMemory<B> { final CL binding = context.getPlatform().getCLBinding(); final int[] result = new int[1]; - final int size = Buffers.sizeOfBufferElem(directBuffer) * directBuffer.capacity(); + final int size = Buffers.sizeOfBufferElem(directBuffer) * directBuffer.limit(); final long id = binding.clCreateBuffer(context.ID, flags, size, host_ptr, result, 0); CLException.checkForError(result[0], "can not create cl buffer"); diff --git a/src/com/jogamp/opencl/CLKernel.java b/src/com/jogamp/opencl/CLKernel.java index c3031ae9..4eeb5af9 100644 --- a/src/com/jogamp/opencl/CLKernel.java +++ b/src/com/jogamp/opencl/CLKernel.java @@ -315,6 +315,7 @@ public class CLKernel extends CLObjectResource implements Cloneable { * qualifier and whose size is specified with clSetKernelArg. * If the local memory size, for any pointer argument to the kernel declared with * the <code>__local</code> address qualifier, is not specified, its size is assumed to be 0. + * @version 1.0 */ public long getLocalMemorySize(final CLDevice device) { return getWorkGroupInfo(device, CL_KERNEL_LOCAL_MEM_SIZE); @@ -326,6 +327,7 @@ public class CLKernel extends CLObjectResource implements Cloneable { * that can be used to execute a kernel on a specific device given by device. * The OpenCL implementation uses the resource requirements of the kernel * (register usage etc.) to determine what this work-group size should be. + * @version 1.0 */ public long getWorkGroupSize(final CLDevice device) { return getWorkGroupInfo(device, CL_KERNEL_WORK_GROUP_SIZE); @@ -335,6 +337,7 @@ public class CLKernel extends CLObjectResource implements Cloneable { * Returns the work-group size specified by the <code>__attribute__((reqd_work_group_size(X, Y, Z)))</code> qualifier in kernel sources. * If the work-group size is not specified using the above attribute qualifier <code>new long[]{(0, 0, 0)}</code> is returned. * The returned array has always three elements. + * @version 1.0 */ public long[] getCompileWorkGroupSize(final CLDevice device) { final int ret = binding.clGetKernelWorkGroupInfo(ID, device.ID, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, (is32Bit()?4:8)*3, buffer, null); @@ -349,6 +352,25 @@ public class CLKernel extends CLObjectResource implements Cloneable { } } + /** + * Returns the preferred multiple of workgroup size to use for kernel launch. This is only a performance hint; enqueueing + * with other sizes will still work, unless the size is more than the maximum allowed. + * @version 1.1 + */ + public long getPreferredWorkGroupSizeMultiple(final CLDevice device) { + return getWorkGroupInfo(device, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE); + } + + /** + * Returns the number of bytes of private memory used by each work item in the kernel. + * This includes private memory declared with the <code>__private</code> qualifier, as + * well as other private memory used by the implementation. + * @version 1.1 + */ + public long getPrivateMemSize(final CLDevice device) { + return getWorkGroupInfo(device, CL_KERNEL_PRIVATE_MEM_SIZE); + } + private long getWorkGroupInfo(final CLDevice device, final int flag) { final int ret = binding.clGetKernelWorkGroupInfo(ID, device.ID, flag, 8, buffer, null); if(ret != CL_SUCCESS) { diff --git a/src/com/jogamp/opencl/CLMemory.java b/src/com/jogamp/opencl/CLMemory.java index 6a28d0a5..00f51b6b 100644 --- a/src/com/jogamp/opencl/CLMemory.java +++ b/src/com/jogamp/opencl/CLMemory.java @@ -145,7 +145,7 @@ public abstract class CLMemory <B extends Buffer> extends CLObjectResource { if(buffer == null) { return 0; } - return buffer.capacity(); + return buffer.limit(); } /** @@ -155,7 +155,7 @@ public abstract class CLMemory <B extends Buffer> extends CLObjectResource { if(buffer == null) { return 0; } - return getElementSize() * buffer.capacity(); + return getElementSize() * buffer.limit(); } /** diff --git a/test/com/jogamp/opencl/CLBufferTest.java b/test/com/jogamp/opencl/CLBufferTest.java index d5995903..0c6e1d11 100644 --- a/test/com/jogamp/opencl/CLBufferTest.java +++ b/test/com/jogamp/opencl/CLBufferTest.java @@ -30,7 +30,6 @@ package com.jogamp.opencl; import com.jogamp.opencl.CLMemory.Mem; import com.jogamp.opencl.CLMemory.Map; -import com.jogamp.opencl.test.util.MiscUtils; import com.jogamp.opencl.test.util.UITestCase; import com.jogamp.common.nio.Buffers; import com.jogamp.common.util.Bitstream; @@ -38,6 +37,7 @@ import com.jogamp.common.util.Bitstream; import java.io.IOException; import java.nio.Buffer; import java.nio.ByteBuffer; +import java.nio.ByteOrder; import java.nio.DoubleBuffer; import java.nio.FloatBuffer; import java.nio.IntBuffer; @@ -65,6 +65,72 @@ import static com.jogamp.opencl.CLVersion.*; public class CLBufferTest extends UITestCase { @Test + public void createBufferFromLimitedBuffer() { + final int elements = NUM_ELEMENTS; + final int padding = 19*SIZEOF_INT*2; // Totally arbitrary number > 0 divisible by 2*SIZEOF_INT + final CLContext context = CLContext.create(); + + // Make a buffer that is offset relative to the originally allocated position and has a + // limit that is + // not equal to the capacity to test whether all these attributes are correctly handled. + ByteBuffer byteBuffer = ByteBuffer.allocateDirect(elements*SIZEOF_INT + padding); + byteBuffer.position(padding / 2); // Offset the original buffer + IntBuffer intBuffer = byteBuffer.slice().order(ByteOrder.nativeOrder()).asIntBuffer(); // Slice it to have a new buffer that starts at the offset + intBuffer.limit(elements); + + final CLBuffer<IntBuffer> deviceBuffer = context.createBuffer(intBuffer); + assertEquals(elements, deviceBuffer.getCLCapacity()); + assertEquals(elements * SIZEOF_INT, deviceBuffer.getNIOSize()); + assertEquals(elements, deviceBuffer.getNIOCapacity()); + } + + @Test + public void cloneWithLimitedBufferTest() { + final int elements = NUM_ELEMENTS; + final int padding = 312; // Arbitrary number + final CLContext context = CLContext.create(); + + final IntBuffer hostBuffer = ByteBuffer.allocateDirect((elements + padding)*SIZEOF_INT).asIntBuffer(); + hostBuffer.limit(elements); + + final CLBuffer<?> deviceBuffer = context.createBuffer(elements*SIZEOF_INT).cloneWith(hostBuffer); + assertEquals(elements, deviceBuffer.getCLCapacity()); + assertEquals(elements*SIZEOF_INT, deviceBuffer.getNIOSize()); + assertEquals(elements, deviceBuffer.getNIOCapacity()); + + context.release(); + } + + @Test + public void copyLimitedSlicedBuffersTest() { + final int size = 4200*SIZEOF_INT; // Arbitrary number that is a multiple of SIZEOF_INT; + final int padding = 307; // Totally arbitrary number > 0 + final CLContext context = CLContext.create(); + final CLCommandQueue queue = context.getDevices()[0].createCommandQueue(); + + // Make a buffer that is offset relative to the originally allocated position and has a limit that is + // not equal to the capacity to test whether all these attributes are correctly handled. + ByteBuffer hostBuffer = ByteBuffer.allocateDirect(size + padding); + hostBuffer.position(padding/2); // Offset the original buffer + hostBuffer = hostBuffer.slice(); // Slice it to have a new buffer that starts at the offset + hostBuffer.limit(size); + hostBuffer.order(ByteOrder.nativeOrder()); // Necessary for comparisons to work later on. + fillBuffer(hostBuffer, 12345); + + final CLBuffer<ByteBuffer> bufferA = context.createBuffer(size).cloneWith(hostBuffer); + final CLBuffer<ByteBuffer> bufferB = context.createByteBuffer(size); + + queue.putWriteBuffer(bufferA, false) + .putCopyBuffer(bufferA, bufferB, bufferA.getNIOSize()) + .putReadBuffer(bufferB, true).finish(); + + hostBuffer.rewind(); + bufferB.buffer.rewind(); + checkIfEqual(hostBuffer, bufferB.buffer, size/SIZEOF_INT); + context.release(); + } + + @Test public void createBufferTest() { out.println(" - - - highLevelTest; create buffer test - - - "); diff --git a/test/com/jogamp/opencl/CLProgramTest.java b/test/com/jogamp/opencl/CLProgramTest.java index ae09d2cb..8310ad06 100644 --- a/test/com/jogamp/opencl/CLProgramTest.java +++ b/test/com/jogamp/opencl/CLProgramTest.java @@ -31,6 +31,7 @@ package com.jogamp.opencl; import com.jogamp.opencl.test.util.UITestCase; import com.jogamp.opencl.util.CLBuildConfiguration; import com.jogamp.opencl.util.CLProgramConfiguration; +import com.jogamp.common.nio.Buffers; import com.jogamp.opencl.CLProgram.Status; import com.jogamp.opencl.util.CLBuildListener; import com.jogamp.opencl.llb.CL; @@ -41,6 +42,7 @@ import java.io.FileOutputStream; import java.io.IOException; import java.io.ObjectInputStream; import java.io.ObjectOutputStream; +import java.nio.ByteBuffer; import java.util.Map; import java.util.concurrent.CountDownLatch; @@ -52,7 +54,11 @@ import org.junit.runners.MethodSorters; import static org.junit.Assert.*; import static java.lang.System.*; +import static com.jogamp.common.os.Platform.is32Bit; +import static com.jogamp.opencl.CLException.newException; import static com.jogamp.opencl.CLProgram.CompilerOptions.*; +import static com.jogamp.opencl.llb.CL12.CL_KERNEL_GLOBAL_WORK_SIZE; +import static com.jogamp.opencl.llb.CL.CL_SUCCESS; /** * @@ -370,6 +376,30 @@ public class CLProgramTest extends UITestCase { } + /** + * Test of getting new kernel work group information, including those from OpenCL versions newer than 1.1. + */ + @Test + public void test22KerneWorkGrouplInfo() { + final CLContext context = CLContext.create(); + + try{ + final CLProgram program = context.createProgram(test20KernelSource).build(); + assertTrue(program.isExecutable()); + + final CLKernel kernel = program.createCLKernel("foo"); + assertNotNull(kernel); + + final long pwgsm = kernel.getPreferredWorkGroupSizeMultiple(context.getDevices()[0]); + out.println("preferred workgroup size multiple: " + pwgsm); + + final long pms = kernel.getPrivateMemSize(context.getDevices()[0]); + out.println("private mem size: " + pms); + }finally{ + context.release(); + } + } + // @Test public void test60Load() throws IOException, ClassNotFoundException, InterruptedException { for(int i = 0; i < 100; i++) { |