summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSven Gothel <[email protected]>2023-07-12 01:49:31 +0200
committerSven Gothel <[email protected]>2023-07-12 01:49:31 +0200
commit791eb1b2ae3001f04d59a61f634161e21d96ef6d (patch)
tree24be59b1a6bcbbec4e09c972d461ae2fe69078b9
parentdc1424050e47cd239ad26c4f12fb3a0e4289d682 (diff)
parent2ad07a7e3e8cf1fc29fe7cbb3256c4c2cda27d02 (diff)
Merge remote-tracking branch 'wwalker/master'v2.5.0
-rw-r--r--src/com/jogamp/opencl/CLBuffer.java2
-rw-r--r--src/com/jogamp/opencl/CLKernel.java22
-rw-r--r--src/com/jogamp/opencl/CLMemory.java4
-rw-r--r--test/com/jogamp/opencl/CLBufferTest.java68
-rw-r--r--test/com/jogamp/opencl/CLProgramTest.java30
5 files changed, 122 insertions, 4 deletions
diff --git a/src/com/jogamp/opencl/CLBuffer.java b/src/com/jogamp/opencl/CLBuffer.java
index 065de079..81e036fc 100644
--- a/src/com/jogamp/opencl/CLBuffer.java
+++ b/src/com/jogamp/opencl/CLBuffer.java
@@ -82,7 +82,7 @@ public class CLBuffer<B extends Buffer> extends CLMemory<B> {
final CL binding = context.getPlatform().getCLBinding();
final int[] result = new int[1];
- final int size = Buffers.sizeOfBufferElem(directBuffer) * directBuffer.capacity();
+ final int size = Buffers.sizeOfBufferElem(directBuffer) * directBuffer.limit();
final long id = binding.clCreateBuffer(context.ID, flags, size, host_ptr, result, 0);
CLException.checkForError(result[0], "can not create cl buffer");
diff --git a/src/com/jogamp/opencl/CLKernel.java b/src/com/jogamp/opencl/CLKernel.java
index c3031ae9..4eeb5af9 100644
--- a/src/com/jogamp/opencl/CLKernel.java
+++ b/src/com/jogamp/opencl/CLKernel.java
@@ -315,6 +315,7 @@ public class CLKernel extends CLObjectResource implements Cloneable {
* qualifier and whose size is specified with clSetKernelArg.
* If the local memory size, for any pointer argument to the kernel declared with
* the <code>__local</code> address qualifier, is not specified, its size is assumed to be 0.
+ * @version 1.0
*/
public long getLocalMemorySize(final CLDevice device) {
return getWorkGroupInfo(device, CL_KERNEL_LOCAL_MEM_SIZE);
@@ -326,6 +327,7 @@ public class CLKernel extends CLObjectResource implements Cloneable {
* that can be used to execute a kernel on a specific device given by device.
* The OpenCL implementation uses the resource requirements of the kernel
* (register usage etc.) to determine what this work-group size should be.
+ * @version 1.0
*/
public long getWorkGroupSize(final CLDevice device) {
return getWorkGroupInfo(device, CL_KERNEL_WORK_GROUP_SIZE);
@@ -335,6 +337,7 @@ public class CLKernel extends CLObjectResource implements Cloneable {
* Returns the work-group size specified by the <code>__attribute__((reqd_work_group_size(X, Y, Z)))</code> qualifier in kernel sources.
* If the work-group size is not specified using the above attribute qualifier <code>new long[]{(0, 0, 0)}</code> is returned.
* The returned array has always three elements.
+ * @version 1.0
*/
public long[] getCompileWorkGroupSize(final CLDevice device) {
final int ret = binding.clGetKernelWorkGroupInfo(ID, device.ID, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, (is32Bit()?4:8)*3, buffer, null);
@@ -349,6 +352,25 @@ public class CLKernel extends CLObjectResource implements Cloneable {
}
}
+ /**
+ * Returns the preferred multiple of workgroup size to use for kernel launch. This is only a performance hint; enqueueing
+ * with other sizes will still work, unless the size is more than the maximum allowed.
+ * @version 1.1
+ */
+ public long getPreferredWorkGroupSizeMultiple(final CLDevice device) {
+ return getWorkGroupInfo(device, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE);
+ }
+
+ /**
+ * Returns the number of bytes of private memory used by each work item in the kernel.
+ * This includes private memory declared with the <code>__private</code> qualifier, as
+ * well as other private memory used by the implementation.
+ * @version 1.1
+ */
+ public long getPrivateMemSize(final CLDevice device) {
+ return getWorkGroupInfo(device, CL_KERNEL_PRIVATE_MEM_SIZE);
+ }
+
private long getWorkGroupInfo(final CLDevice device, final int flag) {
final int ret = binding.clGetKernelWorkGroupInfo(ID, device.ID, flag, 8, buffer, null);
if(ret != CL_SUCCESS) {
diff --git a/src/com/jogamp/opencl/CLMemory.java b/src/com/jogamp/opencl/CLMemory.java
index 6a28d0a5..00f51b6b 100644
--- a/src/com/jogamp/opencl/CLMemory.java
+++ b/src/com/jogamp/opencl/CLMemory.java
@@ -145,7 +145,7 @@ public abstract class CLMemory <B extends Buffer> extends CLObjectResource {
if(buffer == null) {
return 0;
}
- return buffer.capacity();
+ return buffer.limit();
}
/**
@@ -155,7 +155,7 @@ public abstract class CLMemory <B extends Buffer> extends CLObjectResource {
if(buffer == null) {
return 0;
}
- return getElementSize() * buffer.capacity();
+ return getElementSize() * buffer.limit();
}
/**
diff --git a/test/com/jogamp/opencl/CLBufferTest.java b/test/com/jogamp/opencl/CLBufferTest.java
index d5995903..0c6e1d11 100644
--- a/test/com/jogamp/opencl/CLBufferTest.java
+++ b/test/com/jogamp/opencl/CLBufferTest.java
@@ -30,7 +30,6 @@ package com.jogamp.opencl;
import com.jogamp.opencl.CLMemory.Mem;
import com.jogamp.opencl.CLMemory.Map;
-import com.jogamp.opencl.test.util.MiscUtils;
import com.jogamp.opencl.test.util.UITestCase;
import com.jogamp.common.nio.Buffers;
import com.jogamp.common.util.Bitstream;
@@ -38,6 +37,7 @@ import com.jogamp.common.util.Bitstream;
import java.io.IOException;
import java.nio.Buffer;
import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
import java.nio.DoubleBuffer;
import java.nio.FloatBuffer;
import java.nio.IntBuffer;
@@ -65,6 +65,72 @@ import static com.jogamp.opencl.CLVersion.*;
public class CLBufferTest extends UITestCase {
@Test
+ public void createBufferFromLimitedBuffer() {
+ final int elements = NUM_ELEMENTS;
+ final int padding = 19*SIZEOF_INT*2; // Totally arbitrary number > 0 divisible by 2*SIZEOF_INT
+ final CLContext context = CLContext.create();
+
+ // Make a buffer that is offset relative to the originally allocated position and has a
+ // limit that is
+ // not equal to the capacity to test whether all these attributes are correctly handled.
+ ByteBuffer byteBuffer = ByteBuffer.allocateDirect(elements*SIZEOF_INT + padding);
+ byteBuffer.position(padding / 2); // Offset the original buffer
+ IntBuffer intBuffer = byteBuffer.slice().order(ByteOrder.nativeOrder()).asIntBuffer(); // Slice it to have a new buffer that starts at the offset
+ intBuffer.limit(elements);
+
+ final CLBuffer<IntBuffer> deviceBuffer = context.createBuffer(intBuffer);
+ assertEquals(elements, deviceBuffer.getCLCapacity());
+ assertEquals(elements * SIZEOF_INT, deviceBuffer.getNIOSize());
+ assertEquals(elements, deviceBuffer.getNIOCapacity());
+ }
+
+ @Test
+ public void cloneWithLimitedBufferTest() {
+ final int elements = NUM_ELEMENTS;
+ final int padding = 312; // Arbitrary number
+ final CLContext context = CLContext.create();
+
+ final IntBuffer hostBuffer = ByteBuffer.allocateDirect((elements + padding)*SIZEOF_INT).asIntBuffer();
+ hostBuffer.limit(elements);
+
+ final CLBuffer<?> deviceBuffer = context.createBuffer(elements*SIZEOF_INT).cloneWith(hostBuffer);
+ assertEquals(elements, deviceBuffer.getCLCapacity());
+ assertEquals(elements*SIZEOF_INT, deviceBuffer.getNIOSize());
+ assertEquals(elements, deviceBuffer.getNIOCapacity());
+
+ context.release();
+ }
+
+ @Test
+ public void copyLimitedSlicedBuffersTest() {
+ final int size = 4200*SIZEOF_INT; // Arbitrary number that is a multiple of SIZEOF_INT;
+ final int padding = 307; // Totally arbitrary number > 0
+ final CLContext context = CLContext.create();
+ final CLCommandQueue queue = context.getDevices()[0].createCommandQueue();
+
+ // Make a buffer that is offset relative to the originally allocated position and has a limit that is
+ // not equal to the capacity to test whether all these attributes are correctly handled.
+ ByteBuffer hostBuffer = ByteBuffer.allocateDirect(size + padding);
+ hostBuffer.position(padding/2); // Offset the original buffer
+ hostBuffer = hostBuffer.slice(); // Slice it to have a new buffer that starts at the offset
+ hostBuffer.limit(size);
+ hostBuffer.order(ByteOrder.nativeOrder()); // Necessary for comparisons to work later on.
+ fillBuffer(hostBuffer, 12345);
+
+ final CLBuffer<ByteBuffer> bufferA = context.createBuffer(size).cloneWith(hostBuffer);
+ final CLBuffer<ByteBuffer> bufferB = context.createByteBuffer(size);
+
+ queue.putWriteBuffer(bufferA, false)
+ .putCopyBuffer(bufferA, bufferB, bufferA.getNIOSize())
+ .putReadBuffer(bufferB, true).finish();
+
+ hostBuffer.rewind();
+ bufferB.buffer.rewind();
+ checkIfEqual(hostBuffer, bufferB.buffer, size/SIZEOF_INT);
+ context.release();
+ }
+
+ @Test
public void createBufferTest() {
out.println(" - - - highLevelTest; create buffer test - - - ");
diff --git a/test/com/jogamp/opencl/CLProgramTest.java b/test/com/jogamp/opencl/CLProgramTest.java
index ae09d2cb..8310ad06 100644
--- a/test/com/jogamp/opencl/CLProgramTest.java
+++ b/test/com/jogamp/opencl/CLProgramTest.java
@@ -31,6 +31,7 @@ package com.jogamp.opencl;
import com.jogamp.opencl.test.util.UITestCase;
import com.jogamp.opencl.util.CLBuildConfiguration;
import com.jogamp.opencl.util.CLProgramConfiguration;
+import com.jogamp.common.nio.Buffers;
import com.jogamp.opencl.CLProgram.Status;
import com.jogamp.opencl.util.CLBuildListener;
import com.jogamp.opencl.llb.CL;
@@ -41,6 +42,7 @@ import java.io.FileOutputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
+import java.nio.ByteBuffer;
import java.util.Map;
import java.util.concurrent.CountDownLatch;
@@ -52,7 +54,11 @@ import org.junit.runners.MethodSorters;
import static org.junit.Assert.*;
import static java.lang.System.*;
+import static com.jogamp.common.os.Platform.is32Bit;
+import static com.jogamp.opencl.CLException.newException;
import static com.jogamp.opencl.CLProgram.CompilerOptions.*;
+import static com.jogamp.opencl.llb.CL12.CL_KERNEL_GLOBAL_WORK_SIZE;
+import static com.jogamp.opencl.llb.CL.CL_SUCCESS;
/**
*
@@ -370,6 +376,30 @@ public class CLProgramTest extends UITestCase {
}
+ /**
+ * Test of getting new kernel work group information, including those from OpenCL versions newer than 1.1.
+ */
+ @Test
+ public void test22KerneWorkGrouplInfo() {
+ final CLContext context = CLContext.create();
+
+ try{
+ final CLProgram program = context.createProgram(test20KernelSource).build();
+ assertTrue(program.isExecutable());
+
+ final CLKernel kernel = program.createCLKernel("foo");
+ assertNotNull(kernel);
+
+ final long pwgsm = kernel.getPreferredWorkGroupSizeMultiple(context.getDevices()[0]);
+ out.println("preferred workgroup size multiple: " + pwgsm);
+
+ final long pms = kernel.getPrivateMemSize(context.getDevices()[0]);
+ out.println("private mem size: " + pms);
+ }finally{
+ context.release();
+ }
+ }
+
// @Test
public void test60Load() throws IOException, ClassNotFoundException, InterruptedException {
for(int i = 0; i < 100; i++) {