diff options
Diffstat (limited to 'test/com/jogamp/opencl')
-rw-r--r-- | test/com/jogamp/opencl/CLBufferTest.java | 94 | ||||
-rw-r--r-- | test/com/jogamp/opencl/CLCommandQueueTest.java | 6 | ||||
-rw-r--r-- | test/com/jogamp/opencl/HighLevelBindingTest.java | 31 | ||||
-rw-r--r-- | test/com/jogamp/opencl/LowLevelBindingTest.java | 33 |
4 files changed, 115 insertions, 49 deletions
diff --git a/test/com/jogamp/opencl/CLBufferTest.java b/test/com/jogamp/opencl/CLBufferTest.java index 3ac1fffe..50820d4e 100644 --- a/test/com/jogamp/opencl/CLBufferTest.java +++ b/test/com/jogamp/opencl/CLBufferTest.java @@ -33,6 +33,7 @@ import com.jogamp.opencl.CLMemory.Map; import com.jogamp.opencl.test.util.MiscUtils; import com.jogamp.opencl.test.util.UITestCase; import com.jogamp.common.nio.Buffers; +import com.jogamp.common.util.Bitstream; import java.io.IOException; import java.nio.Buffer; @@ -256,7 +257,7 @@ public class CLBufferTest extends UITestCase { } @Test - public void subBufferTest() { + public void subBufferTest01ByteBuffer() { out.println(" - - - subBufferTest - - - "); if(MiscUtils.isOpenCLUnavailable()) @@ -271,48 +272,77 @@ public class CLBufferTest extends UITestCase { CLContext context = CLContext.create(platform); try{ final int subelements = 5; + final long lMaxAlignment = context.getMaxMemBaseAddrAlign(); + final int iMaxAlignment = Bitstream.uint32LongToInt(lMaxAlignment); + System.err.println("XXX: maxAlignment "+lMaxAlignment+", 0x"+Long.toHexString(lMaxAlignment)+", (int)"+iMaxAlignment+", (int)0x"+Integer.toHexString(iMaxAlignment)); + if( -1 == iMaxAlignment ) { + throw new RuntimeException("Cannot handle MaxMemBaseAddrAlign > MAX_INT, has 0x"+Long.toHexString(lMaxAlignment)); + } // device only - { - CLBuffer<?> buffer = context.createBuffer(64); + CLBuffer<?> buffer = context.createBuffer(iMaxAlignment+subelements); - assertFalse(buffer.isSubBuffer()); - assertNotNull(buffer.getSubBuffers()); - assertTrue(buffer.getSubBuffers().isEmpty()); + assertFalse(buffer.isSubBuffer()); + assertNotNull(buffer.getSubBuffers()); + assertTrue(buffer.getSubBuffers().isEmpty()); - CLSubBuffer<?> subBuffer = buffer.createSubBuffer(10, subelements); + CLSubBuffer<?> subBuffer = buffer.createSubBuffer(iMaxAlignment, subelements); - assertTrue(subBuffer.isSubBuffer()); - assertEquals(subelements, subBuffer.getCLSize()); - assertEquals(10, subBuffer.getOffset()); - assertEquals(10, subBuffer.getCLOffset()); - assertEquals(buffer, subBuffer.getParent()); - assertEquals(1, buffer.getSubBuffers().size()); + assertTrue(subBuffer.isSubBuffer()); + assertEquals(subelements, subBuffer.getCLSize()); + assertEquals(iMaxAlignment, subBuffer.getOffset()); + assertEquals(iMaxAlignment, subBuffer.getCLOffset()); + assertEquals(buffer, subBuffer.getParent()); + assertEquals(1, buffer.getSubBuffers().size()); - subBuffer.release(); - assertEquals(0, buffer.getSubBuffers().size()); - } + subBuffer.release(); + assertEquals(0, buffer.getSubBuffers().size()); + }finally{ + context.release(); + } - // device + direct buffer - { - CLBuffer<FloatBuffer> buffer = context.createFloatBuffer(64); - assertFalse(buffer.isSubBuffer()); - assertNotNull(buffer.getSubBuffers()); - assertTrue(buffer.getSubBuffers().isEmpty()); + } - CLSubBuffer<FloatBuffer> subBuffer = buffer.createSubBuffer(10, subelements); + @Test + public void subBufferTest02FloatBuffer() { - assertTrue(subBuffer.isSubBuffer()); - assertEquals(subelements, subBuffer.getBuffer().capacity()); - assertEquals(10, subBuffer.getOffset()); - assertEquals(40, subBuffer.getCLOffset()); - assertEquals(buffer, subBuffer.getParent()); - assertEquals(1, buffer.getSubBuffers().size()); + out.println(" - - - subBufferTest - - - "); - assertEquals(subBuffer.getCLCapacity(), subBuffer.getBuffer().capacity()); + CLPlatform platform = CLPlatform.getDefault(version(CL_1_1)); + if(platform == null) { + out.println("aborting subBufferTest"); + return; + } - subBuffer.release(); - assertEquals(0, buffer.getSubBuffers().size()); + CLContext context = CLContext.create(platform); + try{ + final int subelements = 5; + final long lMaxAlignment = context.getMaxMemBaseAddrAlign(); + final int iMaxAlignment = Bitstream.uint32LongToInt(lMaxAlignment); + System.err.println("XXX: maxAlignment "+lMaxAlignment+", 0x"+Long.toHexString(lMaxAlignment)+", (int)"+iMaxAlignment+", (int)0x"+Integer.toHexString(iMaxAlignment)); + if( -1 == iMaxAlignment ) { + throw new RuntimeException("Cannot handle MaxMemBaseAddrAlign > MAX_INT, has 0x"+Long.toHexString(lMaxAlignment)); } + // FIXME: See Bug 979: Offset/Alignment via offset calculation per element-count is faulty! + final int floatsPerAlignment = iMaxAlignment / Buffers.SIZEOF_FLOAT; + // device + direct buffer + CLBuffer<FloatBuffer> buffer = context.createFloatBuffer(floatsPerAlignment+subelements); + assertFalse(buffer.isSubBuffer()); + assertNotNull(buffer.getSubBuffers()); + assertTrue(buffer.getSubBuffers().isEmpty()); + + CLSubBuffer<FloatBuffer> subBuffer = buffer.createSubBuffer(floatsPerAlignment, subelements); + + assertTrue(subBuffer.isSubBuffer()); + assertEquals(subelements, subBuffer.getBuffer().capacity()); + assertEquals(floatsPerAlignment, subBuffer.getOffset()); + assertEquals(iMaxAlignment, subBuffer.getCLOffset()); + assertEquals(buffer, subBuffer.getParent()); + assertEquals(1, buffer.getSubBuffers().size()); + + assertEquals(subBuffer.getCLCapacity(), subBuffer.getBuffer().capacity()); + + subBuffer.release(); + assertEquals(0, buffer.getSubBuffers().size()); }finally{ context.release(); diff --git a/test/com/jogamp/opencl/CLCommandQueueTest.java b/test/com/jogamp/opencl/CLCommandQueueTest.java index 76260289..491eab53 100644 --- a/test/com/jogamp/opencl/CLCommandQueueTest.java +++ b/test/com/jogamp/opencl/CLCommandQueueTest.java @@ -487,7 +487,9 @@ public class CLCommandQueueTest extends UITestCase { @Override public void run() { - int groupSize = queue2.getDevice().getMaxWorkItemSizes()[0]; + int maxWorkItemSize = queue2.getDevice().getMaxWorkItemSizes()[0]; + int kernelWorkGroupSize = (int)vectorAddKernel2.getWorkGroupSize( queue2.getDevice() ); + int localWorkSize = Math.min( maxWorkItemSize, kernelWorkGroupSize ); fillBuffer(clBufferA2.buffer, 12345); fillBuffer(clBufferB2.buffer, 67890); @@ -501,7 +503,7 @@ public class CLCommandQueueTest extends UITestCase { // System.out.println("D kernels"); CLEventList events2 = new CLEventList(2); - queue2.put1DRangeKernel(vectorAddKernel2, 0, elements, groupSize, events2); + queue2.put1DRangeKernel(vectorAddKernel2, 0, elements, localWorkSize, events2); queue2.putReadBuffer(clBufferD, false, events2); barrier.waitFor(queue2, events2); diff --git a/test/com/jogamp/opencl/HighLevelBindingTest.java b/test/com/jogamp/opencl/HighLevelBindingTest.java index 6a003435..a0a67595 100644 --- a/test/com/jogamp/opencl/HighLevelBindingTest.java +++ b/test/com/jogamp/opencl/HighLevelBindingTest.java @@ -167,6 +167,7 @@ public class HighLevelBindingTest extends UITestCase { // out.println(" C version: "+device.getCVersion()); //CL 1.1 out.println(" driver version: "+device.getDriverVersion()); out.println(" type: "+device.getType()); + out.println(" mem base addr align: "+device.getMemBaseAddrAlign()); out.println(" global mem: "+device.getGlobalMemSize()/(1024*1024)+" MB"); out.println(" max alloc mem: "+device.getMaxMemAllocSize()/(1024*1024)+" MB"); out.println(" max param size: "+device.getMaxParameterSize()+" byte"); @@ -323,13 +324,29 @@ public class HighLevelBindingTest extends UITestCase { int elementCount = 11444777; // Length of float arrays to process (odd # for illustration) int localWorkSize = device.getMaxWorkItemSizes()[0]; // set and log Global and Local work size dimensions - int globalWorkSize = roundUp(localWorkSize, elementCount); // rounded up to the nearest multiple of the LocalWorkSize - - out.println("allocateing buffers of size: "+globalWorkSize); - - ByteBuffer srcA = newDirectByteBuffer(globalWorkSize*SIZEOF_INT); - ByteBuffer srcB = newDirectByteBuffer(globalWorkSize*SIZEOF_INT); - ByteBuffer dest = newDirectByteBuffer(globalWorkSize*SIZEOF_INT); + int globalWorkSize = 0; + + ByteBuffer srcA = null; + ByteBuffer srcB = null; + ByteBuffer dest = null; + boolean allocated = false; + int divisor = 1; + while( !allocated ) { + try { + // round up to the nearest multiple of the LocalWorkSize + globalWorkSize = roundUp(localWorkSize, elementCount); + out.println("allocating three buffers of size: "+globalWorkSize); + srcA = newDirectByteBuffer(globalWorkSize*SIZEOF_INT); + srcB = newDirectByteBuffer(globalWorkSize*SIZEOF_INT); + dest = newDirectByteBuffer(globalWorkSize*SIZEOF_INT); + allocated = true; + } + catch( OutOfMemoryError oome ) { + ++divisor; + elementCount /= divisor; + out.println("not enough direct buffer memory; retrying with smaller buffers"); + } + } fillBuffer(srcA, 23456); fillBuffer(srcB, 46987); diff --git a/test/com/jogamp/opencl/LowLevelBindingTest.java b/test/com/jogamp/opencl/LowLevelBindingTest.java index 5381cab0..90027e13 100644 --- a/test/com/jogamp/opencl/LowLevelBindingTest.java +++ b/test/com/jogamp/opencl/LowLevelBindingTest.java @@ -280,13 +280,30 @@ public class LowLevelBindingTest extends UITestCase { checkError("on clCreateCommandQueue", intBuffer.get(0)); int localWorkSize = Math.min(128, maxWGS); // set and log Global and Local work size dimensions - int globalWorkSize = roundUp(localWorkSize, ELEMENT_COUNT); // rounded up to the nearest multiple of the LocalWorkSize - - out.println("allocateing buffers of size: "+globalWorkSize); - - ByteBuffer srcA = newDirectByteBuffer(globalWorkSize*SIZEOF_INT); - ByteBuffer srcB = newDirectByteBuffer(globalWorkSize*SIZEOF_INT); - ByteBuffer dest = newDirectByteBuffer(globalWorkSize*SIZEOF_INT); + int elementCount = ELEMENT_COUNT; + int globalWorkSize = 0; + + ByteBuffer srcA = null; + ByteBuffer srcB = null; + ByteBuffer dest = null; + boolean allocated = false; + int divisor = 1; + while( !allocated ) { + try { + // round up to the nearest multiple of the LocalWorkSize + globalWorkSize = roundUp(localWorkSize, elementCount); + out.println("allocating three buffers of size: "+globalWorkSize); + srcA = newDirectByteBuffer(globalWorkSize*SIZEOF_INT); + srcB = newDirectByteBuffer(globalWorkSize*SIZEOF_INT); + dest = newDirectByteBuffer(globalWorkSize*SIZEOF_INT); + allocated = true; + } + catch( OutOfMemoryError oome ) { + ++divisor; + elementCount /= divisor; + out.println("not enough direct buffer memory; retrying with smaller buffers"); + } + } // Allocate the OpenCL buffer memory objects for source and result on the device GMEM long devSrcA = cl.clCreateBuffer(context, CL.CL_MEM_READ_ONLY, srcA.capacity(), null, intBuffer); @@ -374,7 +391,7 @@ public class LowLevelBindingTest extends UITestCase { ret = cl.clSetKernelArg(kernel, 0, is32Bit()?SIZEOF_INT:SIZEOF_LONG, wrap(devSrcA)); checkError("on clSetKernelArg0", ret); ret = cl.clSetKernelArg(kernel, 1, is32Bit()?SIZEOF_INT:SIZEOF_LONG, wrap(devSrcB)); checkError("on clSetKernelArg1", ret); ret = cl.clSetKernelArg(kernel, 2, is32Bit()?SIZEOF_INT:SIZEOF_LONG, wrap(devDst)); checkError("on clSetKernelArg2", ret); - ret = cl.clSetKernelArg(kernel, 3, SIZEOF_INT, wrap(ELEMENT_COUNT)); checkError("on clSetKernelArg3", ret); + ret = cl.clSetKernelArg(kernel, 3, SIZEOF_INT, wrap(elementCount)); checkError("on clSetKernelArg3", ret); out.println("used device memory: "+ (srcA.capacity()+srcB.capacity()+dest.capacity())/1000000 +"MB"); |