diff options
-rw-r--r-- | src/com/jogamp/opencl/CLBuffer.java | 2 | ||||
-rw-r--r-- | src/com/jogamp/opencl/CLCommandQueue.java | 80 | ||||
-rw-r--r-- | src/com/jogamp/opencl/CLContext.java | 8 | ||||
-rw-r--r-- | src/com/jogamp/opencl/CLDevice.java | 18 | ||||
-rw-r--r-- | src/com/jogamp/opencl/CLEventList.java | 42 | ||||
-rw-r--r-- | src/com/jogamp/opencl/CLMemory.java | 21 | ||||
-rw-r--r-- | src/com/jogamp/opencl/CLPlatform.java | 105 | ||||
-rw-r--r-- | src/com/jogamp/opencl/CLProgram.java | 7 | ||||
-rw-r--r-- | src/com/jogamp/opencl/CLProgramBuilder.java | 77 | ||||
-rw-r--r-- | src/com/jogamp/opencl/CLSubBuffer.java | 3 | ||||
-rw-r--r-- | src/com/jogamp/opencl/util/CLBuildConfiguration.java | 8 | ||||
-rw-r--r-- | src/com/jogamp/opencl/util/CLDeviceFilters.java | 102 | ||||
-rw-r--r-- | src/com/jogamp/opencl/util/CLPlatformFilters.java | 40 | ||||
-rw-r--r-- | test/com/jogamp/opencl/CLBufferTest.java | 8 | ||||
-rw-r--r-- | test/com/jogamp/opencl/CLCommandQueueTest.java | 81 | ||||
-rw-r--r-- | test/com/jogamp/opencl/testkernels.cl | 23 |
16 files changed, 482 insertions, 143 deletions
diff --git a/src/com/jogamp/opencl/CLBuffer.java b/src/com/jogamp/opencl/CLBuffer.java index fed7db11..57fba461 100644 --- a/src/com/jogamp/opencl/CLBuffer.java +++ b/src/com/jogamp/opencl/CLBuffer.java @@ -82,7 +82,7 @@ public class CLBuffer<B extends Buffer> extends CLMemory<B> { if(isHostPointerFlag(flags)) { host_ptr = directBuffer; } - int size = sizeOfBufferElem(directBuffer) * directBuffer.capacity(); + int size = Buffers.sizeOfBufferElem(directBuffer) * directBuffer.capacity(); long id = cl.clCreateBuffer(context.ID, flags, size, host_ptr, result, 0); checkForError(result[0], "can not create cl buffer"); diff --git a/src/com/jogamp/opencl/CLCommandQueue.java b/src/com/jogamp/opencl/CLCommandQueue.java index 30757bad..6ceed82d 100644 --- a/src/com/jogamp/opencl/CLCommandQueue.java +++ b/src/com/jogamp/opencl/CLCommandQueue.java @@ -31,6 +31,7 @@ package com.jogamp.opencl; import com.jogamp.common.nio.CachedBufferFactory; import com.jogamp.opencl.gl.CLGLI; import com.jogamp.common.nio.PointerBuffer; +import java.nio.Buffer; import java.nio.ByteBuffer; import java.nio.IntBuffer; import java.util.ArrayList; @@ -118,7 +119,7 @@ public class CLCommandQueue extends CLObject implements CLResource { PointerBuffer conditionIDs = null; int conditions = 0; if(condition != null) { - conditionIDs = condition.IDs; + conditionIDs = condition.IDsView; conditions = condition.size; } @@ -162,7 +163,7 @@ public class CLCommandQueue extends CLObject implements CLResource { PointerBuffer conditionIDs = null; int conditions = 0; if(condition != null) { - conditionIDs = condition.IDs; + conditionIDs = condition.IDsView; conditions = condition.size; } @@ -211,7 +212,7 @@ public class CLCommandQueue extends CLObject implements CLResource { PointerBuffer conditionIDs = null; int conditions = 0; if(condition != null) { - conditionIDs = condition.IDs; + conditionIDs = condition.IDsView; conditions = condition.size; } @@ -281,7 +282,7 @@ public class CLCommandQueue extends CLObject implements CLResource { PointerBuffer conditionIDs = null; int conditions = 0; if(condition != null) { - conditionIDs = condition.IDs; + conditionIDs = condition.IDsView; conditions = condition.size; } @@ -326,7 +327,7 @@ public class CLCommandQueue extends CLObject implements CLResource { int originX, int originY, int hostX, int hostY, int rangeX, int rangeY, long rowPitch, long slicePitch, long hostRowPitch, long hostSlicePitch, boolean blockingRead, CLEventList condition, CLEventList events) { - // spec: if 2d: origin/hostpos=0, ragne=1 + // spec: if 2d: origin/hostpos=0, range=1 putReadBufferRect( readBuffer, originX, originY, 0, hostX, hostY, 0, rangeX, rangeY, 1, @@ -359,7 +360,7 @@ public class CLCommandQueue extends CLObject implements CLResource { PointerBuffer conditionIDs = null; int conditions = 0; if(condition != null) { - conditionIDs = condition.IDs; + conditionIDs = condition.IDsView; conditions = condition.size; } @@ -440,7 +441,7 @@ public class CLCommandQueue extends CLObject implements CLResource { PointerBuffer conditionIDs = null; int conditions = 0; if(condition != null) { - conditionIDs = condition.IDs; + conditionIDs = condition.IDsView; conditions = condition.size; } @@ -508,7 +509,7 @@ public class CLCommandQueue extends CLObject implements CLResource { PointerBuffer conditionIDs = null; int conditions = 0; if(condition != null) { - conditionIDs = condition.IDs; + conditionIDs = condition.IDsView; conditions = condition.size; } @@ -570,7 +571,7 @@ public class CLCommandQueue extends CLObject implements CLResource { PointerBuffer conditionIDs = null; int conditions = 0; if(condition != null) { - conditionIDs = condition.IDs; + conditionIDs = condition.IDsView; conditions = condition.size; } @@ -631,7 +632,7 @@ public class CLCommandQueue extends CLObject implements CLResource { PointerBuffer conditionIDs = null; int conditions = 0; if(condition != null) { - conditionIDs = condition.IDs; + conditionIDs = condition.IDsView; conditions = condition.size; } @@ -693,7 +694,7 @@ public class CLCommandQueue extends CLObject implements CLResource { PointerBuffer conditionIDs = null; int conditions = 0; if(condition != null) { - conditionIDs = condition.IDs; + conditionIDs = condition.IDsView; conditions = condition.size; } @@ -757,7 +758,7 @@ public class CLCommandQueue extends CLObject implements CLResource { PointerBuffer conditionIDs = null; int conditions = 0; if(condition != null) { - conditionIDs = condition.IDs; + conditionIDs = condition.IDsView; conditions = condition.size; } @@ -818,7 +819,7 @@ public class CLCommandQueue extends CLObject implements CLResource { /** * Calls {@native clEnqueueCopyImage}. */ - public CLCommandQueue putCopyImage(CLImage3d<?> srcImage, CLImage3d<?> dstImage, + public CLCommandQueue putCopyImage(CLImage<?> srcImage, CLImage<?> dstImage, int srcOriginX, int srcOriginY, int srcOriginZ, int dstOriginX, int dstOriginY, int dstOriginZ, int rangeX, int rangeY, int rangeZ, CLEventList condition, CLEventList events) { @@ -826,7 +827,7 @@ public class CLCommandQueue extends CLObject implements CLResource { PointerBuffer conditionIDs = null; int conditions = 0; if(condition != null) { - conditionIDs = condition.IDs; + conditionIDs = condition.IDsView; conditions = condition.size; } @@ -890,7 +891,7 @@ public class CLCommandQueue extends CLObject implements CLResource { PointerBuffer conditionIDs = null; int conditions = 0; if(condition != null) { - conditionIDs = condition.IDs; + conditionIDs = condition.IDsView; conditions = condition.size; } @@ -957,7 +958,7 @@ public class CLCommandQueue extends CLObject implements CLResource { PointerBuffer conditionIDs = null; int conditions = 0; if(condition != null) { - conditionIDs = condition.IDs; + conditionIDs = condition.IDsView; conditions = condition.size; } @@ -1021,7 +1022,7 @@ public class CLCommandQueue extends CLObject implements CLResource { PointerBuffer conditionIDs = null; int conditions = 0; if(condition != null) { - conditionIDs = condition.IDs; + conditionIDs = condition.IDsView; conditions = condition.size; } @@ -1088,7 +1089,7 @@ public class CLCommandQueue extends CLObject implements CLResource { PointerBuffer conditionIDs = null; int conditions = 0; if(condition != null) { - conditionIDs = condition.IDs; + conditionIDs = condition.IDsView; conditions = condition.size; } @@ -1146,7 +1147,7 @@ public class CLCommandQueue extends CLObject implements CLResource { PointerBuffer conditionIDs = null; int conditions = 0; if(condition != null) { - conditionIDs = condition.IDs; + conditionIDs = condition.IDsView; conditions = condition.size; } @@ -1206,7 +1207,7 @@ public class CLCommandQueue extends CLObject implements CLResource { PointerBuffer conditionIDs = null; int conditions = 0; if(condition != null) { - conditionIDs = condition.IDs; + conditionIDs = condition.IDsView; conditions = condition.size; } @@ -1272,7 +1273,7 @@ public class CLCommandQueue extends CLObject implements CLResource { PointerBuffer conditionIDs = null; int conditions = 0; if(condition != null) { - conditionIDs = condition.IDs; + conditionIDs = condition.IDsView; conditions = condition.size; } @@ -1297,30 +1298,30 @@ public class CLCommandQueue extends CLObject implements CLResource { /** * Calls {@native clEnqueueUnmapMemObject}. */ - public CLCommandQueue putUnmapMemory(CLMemory<?> memory) { - return putUnmapMemory(memory, null, null); + public CLCommandQueue putUnmapMemory(CLMemory<?> memory, Buffer mapped) { + return putUnmapMemory(memory, mapped, null, null); } /** * Calls {@native clEnqueueUnmapMemObject}. */ - public CLCommandQueue putUnmapMemory(CLMemory<?> memory, CLEventList events) { - return putUnmapMemory(memory, null, events); + public CLCommandQueue putUnmapMemory(CLMemory<?> memory, Buffer mapped, CLEventList events) { + return putUnmapMemory(memory, mapped, null, events); } /** * Calls {@native clEnqueueUnmapMemObject}. */ - public CLCommandQueue putUnmapMemory(CLMemory<?> memory, CLEventList condition, CLEventList events) { + public CLCommandQueue putUnmapMemory(CLMemory<?> memory, Buffer mapped, CLEventList condition, CLEventList events) { PointerBuffer conditionIDs = null; int conditions = 0; if(condition != null) { - conditionIDs = condition.IDs; + conditionIDs = condition.IDsView; conditions = condition.size; } - int ret = cl.clEnqueueUnmapMemObject(ID, memory.ID, memory.getBuffer(), + int ret = cl.clEnqueueUnmapMemObject(ID, memory.ID, mapped, conditions, conditionIDs, events==null ? null : events.IDs); if(ret != CL_SUCCESS) { throw newException(ret, "can not unmap " + memory + toStr(condition, events)); @@ -1348,11 +1349,11 @@ public class CLCommandQueue extends CLObject implements CLResource { * Calls {@native clWaitForEvents} if blockingWait equals true otherwise {@native clEnqueueWaitForEvents}. */ public CLCommandQueue putWaitForEvent(CLEventList list, int index, boolean blockingWait) { - int marker = list.IDs.position()-1; - list.IDs.position(index); - int ret = blockingWait ? cl.clWaitForEvents(1, list.IDs) - : cl.clEnqueueWaitForEvents(ID, 1, list.IDs); - list.IDs.position(marker); + + PointerBuffer ids = PointerBuffer.wrap(list.IDs.getBuffer().duplicate()).position(index); + + int ret = blockingWait ? cl.clWaitForEvents(1, ids) + : cl.clEnqueueWaitForEvents(ID, 1, ids); if(ret != CL_SUCCESS) { throw newException(ret, "can not "+ (blockingWait?"blocking": "") +" wait for event #" + index+ " in "+list); } @@ -1363,9 +1364,8 @@ public class CLCommandQueue extends CLObject implements CLResource { * Calls {@native clWaitForEvents} if blockingWait equals true otherwise {@native clEnqueueWaitForEvents}. */ public CLCommandQueue putWaitForEvents(CLEventList list, boolean blockingWait) { - list.IDs.rewind(); - int ret = blockingWait ? cl.clWaitForEvents(list.size, list.IDs) - : cl.clEnqueueWaitForEvents(ID, list.size, list.IDs); + int ret = blockingWait ? cl.clWaitForEvents(list.size, list.IDsView) + : cl.clEnqueueWaitForEvents(ID, list.size, list.IDsView); if(ret != CL_SUCCESS) { throw newException(ret, "can not "+ (blockingWait?"blocking": "") +" wait for events " + list); } @@ -1410,7 +1410,7 @@ public class CLCommandQueue extends CLObject implements CLResource { PointerBuffer conditionIDs = null; int conditions = 0; if(condition != null) { - conditionIDs = condition.IDs; + conditionIDs = condition.IDsView; conditions = condition.size; } @@ -1537,7 +1537,7 @@ public class CLCommandQueue extends CLObject implements CLResource { PointerBuffer conditionIDs = null; int conditions = 0; if(condition != null) { - conditionIDs = condition.IDs; + conditionIDs = condition.IDsView; conditions = condition.size; } @@ -1588,7 +1588,7 @@ public class CLCommandQueue extends CLObject implements CLResource { PointerBuffer conditionIDs = null; int conditions = 0; if(condition != null) { - conditionIDs = condition.IDs; + conditionIDs = condition.IDsView; conditions = condition.size; } @@ -1635,7 +1635,7 @@ public class CLCommandQueue extends CLObject implements CLResource { PointerBuffer conditionIDs = null; int conditions = 0; if(condition != null) { - conditionIDs = condition.IDs; + conditionIDs = condition.IDsView; conditions = condition.size; } diff --git a/src/com/jogamp/opencl/CLContext.java b/src/com/jogamp/opencl/CLContext.java index 81eb5f37..c8a847a2 100644 --- a/src/com/jogamp/opencl/CLContext.java +++ b/src/com/jogamp/opencl/CLContext.java @@ -402,15 +402,15 @@ public class CLContext extends CLObject implements CLResource { /** * Creates a CLImage3d with the specified format, dimension and flags. */ - public final CLImage3d<?> createImage3d(int width, int height, CLImageFormat format, Mem... flags) { - return createImage3d(null, width, height, 0, format, flags); + public final CLImage3d<?> createImage3d(int width, int height, int depth, CLImageFormat format, Mem... flags) { + return createImage3d(null, width, height, depth, format, flags); } /** * Creates a CLImage3d with the specified format, dimension and flags. */ - public final CLImage3d<?> createImage3d(int width, int height, int depth, int rowPitch, CLImageFormat format, Mem... flags) { - return createImage3d(null, width, height, rowPitch, format, flags); + public final CLImage3d<?> createImage3d(int width, int height, int depth, int rowPitch, int slicePitch, CLImageFormat format, Mem... flags) { + return createImage3d(null, width, height, depth, rowPitch, slicePitch, format, flags); } /** diff --git a/src/com/jogamp/opencl/CLDevice.java b/src/com/jogamp/opencl/CLDevice.java index fd1d93f3..1e9a94dd 100644 --- a/src/com/jogamp/opencl/CLDevice.java +++ b/src/com/jogamp/opencl/CLDevice.java @@ -33,6 +33,7 @@ import com.jogamp.common.nio.PointerBuffer; import com.jogamp.common.os.Platform; import java.nio.Buffer; import java.nio.ByteBuffer; +import java.nio.ByteOrder; import java.util.ArrayList; import java.util.Collections; import java.util.EnumSet; @@ -637,12 +638,12 @@ public final class CLDevice extends CLObject { } /** - * Returns {@link #isExtensionAvailable}("cl_khr_gl_sharing") || {@link #isExtensionAvailable}("cl_apple_gl_sharing"). + * Returns {@link #isExtensionAvailable}("cl_khr_gl_sharing") || {@link #isExtensionAvailable}("cl_APPLE_gl_sharing"). * @see #getExtensions() */ - @CLProperty("cl_khr_gl_sharing | cl_apple_gl_sharing") + @CLProperty("cl_khr_gl_sharing | cl_APPLE_gl_sharing") public boolean isGLMemorySharingSupported() { - return isExtensionAvailable("cl_khr_gl_sharing") || isExtensionAvailable("cl_apple_gl_sharing"); + return isExtensionAvailable("cl_khr_gl_sharing") || isExtensionAvailable("cl_APPLE_gl_sharing"); } /** @@ -652,6 +653,17 @@ public final class CLDevice extends CLObject { public boolean isExtensionAvailable(String extension) { return getExtensions().contains(extension); } + + /** + * Returns {@link ByteOrder#LITTLE_ENDIAN} or {@link ByteOrder#BIG_ENDIAN}. + */ + public ByteOrder getByteOrder() { + if(isLittleEndian()) { + return ByteOrder.LITTLE_ENDIAN; + }else{ + return ByteOrder.BIG_ENDIAN; + } + } /** * Returns all device extension names as unmodifiable Set. diff --git a/src/com/jogamp/opencl/CLEventList.java b/src/com/jogamp/opencl/CLEventList.java index 03a6f838..e2294b45 100644 --- a/src/com/jogamp/opencl/CLEventList.java +++ b/src/com/jogamp/opencl/CLEventList.java @@ -29,6 +29,7 @@ package com.jogamp.opencl; import com.jogamp.common.AutoCloseable; +import com.jogamp.common.nio.CachedBufferFactory; import com.jogamp.common.nio.PointerBuffer; import java.util.Iterator; @@ -40,23 +41,54 @@ public final class CLEventList implements CLResource, AutoCloseable, Iterable<CL private final CLEvent[] events; + /** + * stores event ids for fast access. + */ final PointerBuffer IDs; + + /** + * Points always to the first element of the id buffer. + */ + final PointerBuffer IDsView; + int size; - + public CLEventList(int capacity) { - this.events = new CLEvent[capacity]; - this.IDs = PointerBuffer.allocateDirect(capacity); + this(null, capacity); } public CLEventList(CLEvent... events) { + this(null, events); + } + + public CLEventList(CachedBufferFactory factory, int capacity) { + this.events = new CLEvent[capacity]; + this.IDs = initIDBuffer(factory, capacity); + this.IDsView = PointerBuffer.wrap(IDs.getBuffer().duplicate()); + } + + public CLEventList(CachedBufferFactory factory, CLEvent... events) { this.events = events; - this.IDs = PointerBuffer.allocateDirect(events.length); + this.IDs = initIDBuffer(factory, events.length); + this.IDsView = PointerBuffer.wrap(IDs.getBuffer().duplicate()); + for (CLEvent event : events) { + if(event == null) { + throw new IllegalArgumentException("event list containes null element."); + } IDs.put(event.ID); } IDs.rewind(); size = events.length; } + + private PointerBuffer initIDBuffer(CachedBufferFactory factory, int size) { + if(factory == null) { + return PointerBuffer.allocateDirect(size); + }else{ + return PointerBuffer.wrap(factory.newDirectByteBuffer(size*PointerBuffer.elementSize())); + } + } void createEvent(CLContext context) { @@ -86,7 +118,7 @@ public final class CLEventList implements CLResource, AutoCloseable, Iterable<CL public final void close() throws Exception { release(); } - + public CLEvent getEvent(int index) { if(index >= size) throw new IndexOutOfBoundsException("list contains "+size+" events, can not return event with index "+index); diff --git a/src/com/jogamp/opencl/CLMemory.java b/src/com/jogamp/opencl/CLMemory.java index 5b0422ca..b78e6024 100644 --- a/src/com/jogamp/opencl/CLMemory.java +++ b/src/com/jogamp/opencl/CLMemory.java @@ -33,11 +33,7 @@ import com.jogamp.common.nio.Buffers; import com.jogamp.common.nio.PointerBuffer; import com.jogamp.opencl.impl.CLMemObjectDestructorCallback; import java.nio.Buffer; -import java.nio.ByteBuffer; -import java.nio.DoubleBuffer; -import java.nio.FloatBuffer; import java.nio.IntBuffer; -import java.nio.ShortBuffer; import java.util.ArrayList; import java.util.EnumSet; import java.util.List; @@ -73,7 +69,7 @@ public abstract class CLMemory <B extends Buffer> extends CLObject implements CL } private void initElementSizes() { - this.elementSize = (buffer==null) ? 1 : sizeOfBufferElem(buffer); + this.elementSize = (buffer==null) ? 1 : Buffers.sizeOfBufferElem(buffer); this.clCapacity = (int) (size / elementSize); } @@ -85,21 +81,6 @@ public abstract class CLMemory <B extends Buffer> extends CLObject implements CL || (flags & CL_MEM_USE_HOST_PTR) != 0; } - static int sizeOfBufferElem(Buffer buffer) { - if (buffer instanceof ByteBuffer) { - return Buffers.SIZEOF_BYTE; - } else if (buffer instanceof IntBuffer) { - return Buffers.SIZEOF_INT; - } else if (buffer instanceof ShortBuffer) { - return Buffers.SIZEOF_SHORT; - } else if (buffer instanceof FloatBuffer) { - return Buffers.SIZEOF_FLOAT; - } else if (buffer instanceof DoubleBuffer) { - return Buffers.SIZEOF_DOUBLE; - } - throw new RuntimeException("Unexpected buffer type " + buffer.getClass().getName()); - } - protected static long getSizeImpl(CL cl, long id) { PointerBuffer pb = PointerBuffer.allocateDirect(1); int ret = cl.clGetMemObjectInfo(id, CL_MEM_SIZE, pb.elementSize(), pb.getBuffer(), null); diff --git a/src/com/jogamp/opencl/CLPlatform.java b/src/com/jogamp/opencl/CLPlatform.java index 218efed3..f5c94aed 100644 --- a/src/com/jogamp/opencl/CLPlatform.java +++ b/src/com/jogamp/opencl/CLPlatform.java @@ -234,20 +234,7 @@ public final class CLPlatform { for (int i = 0; i < platformId.capacity(); i++) { CLPlatform platform = new CLPlatform(platformId.get(i)); - if(filter == null) { - platforms.add(platform); - }else{ - boolean accepted = true; - for (Filter<CLPlatform> f : filter) { - if(!f.accept(platform)) { - accepted = false; - break; - } - } - if(accepted) { - platforms.add(platform); - } - } + addIfAccepted(platform, platforms, filter); } return platforms.toArray(new CLPlatform[platforms.size()]); @@ -285,38 +272,81 @@ public final class CLPlatform { public CLDevice[] listCLDevices(CLDevice.Type... types) { initialize(); - IntBuffer ib = Buffers.newDirectIntBuffer(1); - List<CLDevice> list = new ArrayList<CLDevice>(); for(int t = 0; t < types.length; t++) { CLDevice.Type type = types[t]; - //find all devices - int ret = cl.clGetDeviceIDs(ID, type.TYPE, 0, null, ib); + PointerBuffer deviceIDs = getDeviceIDs(type.TYPE); - // return an empty array rather than throwing an exception - if(ret == CL.CL_DEVICE_NOT_FOUND || ib.get(0) == 0) { - continue; + //add device to list + for (int n = 0; n < deviceIDs.capacity(); n++) { + list.add(new CLDevice(cl, this, deviceIDs.get(n))); } + } - checkForError(ret, "error while enumerating devices"); + return list.toArray(new CLDevice[list.size()]); - PointerBuffer deviceIDs = PointerBuffer.allocateDirect(ib.get(0)); - ret = cl.clGetDeviceIDs(ID, type.TYPE, deviceIDs.capacity(), deviceIDs, null); - checkForError(ret, "error while enumerating devices"); + } - //add device to list - for (int n = 0; n < deviceIDs.capacity(); n++) - list.add(new CLDevice(cl, this, deviceIDs.get(n))); - } + /** + * Lists all physical devices available on this platform matching the given {@link Filter}. + */ + public CLDevice[] listCLDevices(Filter<CLDevice>... filters) { + initialize(); - CLDevice[] devices = new CLDevice[list.size()]; - for (int i = 0; i < list.size(); i++) { - devices[i] = list.get(i); + List<CLDevice> list = new ArrayList<CLDevice>(); + + PointerBuffer deviceIDs = getDeviceIDs(CL_DEVICE_TYPE_ALL); + + //add device to list + for (int n = 0; n < deviceIDs.capacity(); n++) { + CLDevice device = new CLDevice(cl, this, deviceIDs.get(n)); + addIfAccepted(device, list, filters); } - return devices; + return list.toArray(new CLDevice[list.size()]); + + } + + private PointerBuffer getDeviceIDs(long type) { + + IntBuffer ib = Buffers.newDirectIntBuffer(1); + + //find all devices + int ret = cl.clGetDeviceIDs(ID, type, 0, null, ib); + + PointerBuffer deviceIDs = null; + + // return null rather than throwing an exception + if(ret == CL.CL_DEVICE_NOT_FOUND || ib.get(0) == 0) { + deviceIDs = PointerBuffer.allocate(0); + }else{ + deviceIDs = PointerBuffer.allocateDirect(ib.get(0)); + + checkForError(ret, "error while enumerating devices"); + ret = cl.clGetDeviceIDs(ID, type, deviceIDs.capacity(), deviceIDs, null); + checkForError(ret, "error while enumerating devices"); + } + + return deviceIDs; + } + + private static <I> void addIfAccepted(I item, List<I> list, Filter<I>[] filters) { + if(filters == null) { + list.add(item); + }else{ + boolean accepted = true; + for (Filter<I> filter : filters) { + if(!filter.accept(item)) { + accepted = false; + break; + } + } + if(accepted) { + list.add(item); + } + } } static CLDevice findMaxFlopsDevice(CLDevice[] devices) { @@ -372,6 +402,15 @@ public final class CLPlatform { } /** + * Returns the device with maximal FLOPS and the specified type from this platform. + * The device speed is estimated by calculating the product of + * MAX_COMPUTE_UNITS and MAX_CLOCK_FREQUENCY. + */ + public CLDevice getMaxFlopsDevice(Filter<CLDevice>... filter) { + return findMaxFlopsDevice(listCLDevices(filter)); + } + + /** * Returns the platform name. */ @CLProperty("CL_PLATFORM_NAME") diff --git a/src/com/jogamp/opencl/CLProgram.java b/src/com/jogamp/opencl/CLProgram.java index fd19cd8f..56804533 100644 --- a/src/com/jogamp/opencl/CLProgram.java +++ b/src/com/jogamp/opencl/CLProgram.java @@ -584,7 +584,12 @@ public class CLProgram extends CLObject implements CLResource { * each call of this method calls into Open */ public String getSource() { - return getProgramInfoString(CL_PROGRAM_SOURCE); + // some drivers return IVE codes if the program haven't been built from source. + try{ + return getProgramInfoString(CL_PROGRAM_SOURCE); + }catch(CLException.CLInvalidValueException ingore) { + return ""; + } } /** diff --git a/src/com/jogamp/opencl/CLProgramBuilder.java b/src/com/jogamp/opencl/CLProgramBuilder.java index ece9ba36..389adce8 100644 --- a/src/com/jogamp/opencl/CLProgramBuilder.java +++ b/src/com/jogamp/opencl/CLProgramBuilder.java @@ -48,6 +48,7 @@ import java.util.Set; /** * CLProgramBuilder is a helper for building programs with more complex configurations or * building multiple programs with similar configurations. + * CLProgramBuilder is used to create {@link CLProgramConfiguration}s and {@link CLBuildConfiguration}s. * @see CLProgram#prepare() * @see #createConfiguration() * @see #createConfiguration(com.jogamp.opencl.CLProgram) @@ -113,13 +114,13 @@ public final class CLProgramBuilder implements CLProgramConfiguration, Serializa * The CLProgram is initialized and ready to be build after this method call. * This method prefers program initialization from binaries if this fails or if * no binaries have been found, it will try to load the program from sources. If - * This also fails an appropriate exception will be thrown. + * this also fails an appropriate exception will be thrown. * @param ois The ObjectInputStream for reading the object. * @param context The context used for program initialization. */ public static CLProgramConfiguration loadConfiguration(ObjectInputStream ois, CLContext context) throws IOException, ClassNotFoundException { CLProgramBuilder config = (CLProgramBuilder) ois.readObject(); - if(config.binariesMap.size() > 0 && config.binariesMap.values().iterator().next().length > 0) { + if(allBinariesAvailable(config)) { try{ config.program = context.createProgram(config.binariesMap); }catch(CLException.CLInvalidBinaryException ex) { @@ -136,6 +137,15 @@ public final class CLProgramBuilder implements CLProgramConfiguration, Serializa } return config; } + + private static boolean allBinariesAvailable(CLProgramBuilder config) { + for (Map.Entry<CLDevice, byte[]> entry : config.binariesMap.entrySet()) { + if(Arrays.equals(NO_BINARIES, entry.getValue())) { + return false; + } + } + return config.binariesMap.size() > 0; + } @Override public void save(ObjectOutputStream oos) throws IOException { @@ -195,14 +205,16 @@ public final class CLProgramBuilder implements CLProgramConfiguration, Serializa @Override public CLProgramBuilder forDevice(CLDevice device) { - binariesMap.put(device, NO_BINARIES); + if(!binariesMap.containsKey(device)) { + binariesMap.put(device, NO_BINARIES); + } return this; } @Override public CLProgramBuilder forDevices(CLDevice... devices) { for (CLDevice device : devices) { - binariesMap.put(device, NO_BINARIES); + forDevice(device); } return this; } @@ -260,20 +272,43 @@ public final class CLProgramBuilder implements CLProgramConfiguration, Serializa optionSet.clear(); return this; } + + private int indexOf(CLDevice device, CLDevice[] devices) { + for (int i = 0; i < devices.length; i++) { + if(device.equals(devices[i])) { + return i; + } + } + return -1; + } // format: { platform_suffix, num_binaries, (device.ID, length, binaries)+ } private void writeObject(ObjectOutputStream out) throws IOException { out.defaultWriteObject(); - Set<CLDevice> devices = binariesMap.keySet(); - String suffix = devices.iterator().next().getPlatform().getICDSuffix(); - out.writeUTF(suffix); - - out.writeInt(binariesMap.size()); + CLDevice[] deviceList = null; + String suffix = null; + + if(!binariesMap.isEmpty()) { + CLPlatform platform = binariesMap.keySet().iterator().next().getPlatform(); + deviceList = platform.listCLDevices(); - for (CLDevice device : devices) { - byte[] binaries = binariesMap.get(device); - out.writeLong(device.ID); + suffix = platform.getICDSuffix(); + } + + out.writeUTF(suffix); // null if we have no binaries or no devices specified + out.writeInt(binariesMap.size()); // may be 0 + + for (Map.Entry<CLDevice, byte[]> entry : binariesMap.entrySet()) { + CLDevice device = entry.getKey(); + byte[] binaries = entry.getValue(); + + // we use the device index as identifier since there is currently no other way + // to distinguish identical devices via CL. + // it should be persistent between runs but may change on driver/hardware update. In this situations we would + // have to build from source anyway (build failures). + int index = indexOf(device, deviceList); + out.writeInt(index); out.writeInt(binaries.length); out.write(binaries); } @@ -290,18 +325,26 @@ public final class CLProgramBuilder implements CLProgramConfiguration, Serializa break; } } - + this.binariesMap = new LinkedHashMap<CLDevice, byte[]>(); + + CLDevice[] devices = null; + if(platform != null) { + devices = platform.listCLDevices(); + } + int mapSize = in.readInt(); for (int i = 0; i < mapSize; i++) { - long deviceID = in.readLong(); + int index = in.readInt(); int length = in.readInt(); byte[] binaries = new byte[length]; in.readFully(binaries); - - CLDevice device = new CLDevice(CLPlatform.getLowLevelCLInterface(), platform, deviceID); - binariesMap.put(device, binaries); + + // we ignore binaries we can't map to devices + if(devices != null && index >= 0 && index < devices.length) { + binariesMap.put(devices[index], binaries); + } } } diff --git a/src/com/jogamp/opencl/CLSubBuffer.java b/src/com/jogamp/opencl/CLSubBuffer.java index 89a74713..d8831783 100644 --- a/src/com/jogamp/opencl/CLSubBuffer.java +++ b/src/com/jogamp/opencl/CLSubBuffer.java @@ -28,6 +28,7 @@ package com.jogamp.opencl; +import com.jogamp.common.nio.Buffers; import com.jogamp.opencl.CLMemory.Mem; import java.nio.Buffer; @@ -72,7 +73,7 @@ public class CLSubBuffer<B extends Buffer> extends CLBuffer<B> { * Returns the offset of this sub buffer to its parent in buffer elements. */ public int getOffset() { - int elemSize = buffer==null ? 1 : sizeOfBufferElem(buffer); + int elemSize = buffer==null ? 1 : Buffers.sizeOfBufferElem(buffer); return offset/elemSize; } diff --git a/src/com/jogamp/opencl/util/CLBuildConfiguration.java b/src/com/jogamp/opencl/util/CLBuildConfiguration.java index f70f088e..89f59110 100644 --- a/src/com/jogamp/opencl/util/CLBuildConfiguration.java +++ b/src/com/jogamp/opencl/util/CLBuildConfiguration.java @@ -36,6 +36,14 @@ import java.util.Map; /** * Configuration representing everything needed to build an OpenCL program. + * <p> + * If you use {@link #save(java.io.ObjectOutputStream)} to persist build configurations between + * JVM sessions it is highly recommended to call {@link #forDevice(com.jogamp.opencl.CLDevice) } + * or {@link #forDevices(com.jogamp.opencl.CLDevice[]) } before building the program. + * Driver updates or HW changes can make exact device-to-binary mapping hard, the + * builder will drop all unmappable binaries silently. Setting the devices explicitly will + * force automatic rebuilds from source in this situation. + * </p> * @author Michael Bien * @see com.jogamp.opencl.CLProgramBuilder#createConfiguration() * @see com.jogamp.opencl.CLProgramBuilder#loadConfiguration(java.io.ObjectInputStream) diff --git a/src/com/jogamp/opencl/util/CLDeviceFilters.java b/src/com/jogamp/opencl/util/CLDeviceFilters.java new file mode 100644 index 00000000..a2ba0475 --- /dev/null +++ b/src/com/jogamp/opencl/util/CLDeviceFilters.java @@ -0,0 +1,102 @@ +/* + * Copyright 2011 JogAmp Community. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are + * permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, this list + * of conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY JogAmp Community ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JogAmp Community OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * The views and conclusions contained in the software and documentation are those of the + * authors and should not be interpreted as representing official policies, either expressed + * or implied, of JogAmp Community. + */ + +package com.jogamp.opencl.util; + +import com.jogamp.opencl.CLCommandQueue.Mode; +import com.jogamp.opencl.CLDevice; +import java.nio.ByteOrder; +import java.util.Arrays; + +/** + * Pre-defined filters. + * @author Michael Bien + * @see com.jogamp.opencl.CLPlatform#listCLDevices(com.jogamp.opencl.util.Filter[]) + * @see com.jogamp.opencl.CLPlatform#getMaxFlopsDevice(com.jogamp.opencl.util.Filter[]) + */ +public class CLDeviceFilters { + + /** + * Accepts all devices of the given type. + */ + public static Filter<CLDevice> type(final CLDevice.Type type) { + return new Filter<CLDevice>() { + public boolean accept(CLDevice item) { + if(type.equals(CLDevice.Type.ALL)) { + return true; + } + return item.getType().equals(type); + } + }; + } + + /** + * Accepts all devices of the given {@link ByteOrder}. + */ + public static Filter<CLDevice> byteOrder(final ByteOrder order) { + return new Filter<CLDevice>() { + public boolean accept(CLDevice item) { + return item.getByteOrder().equals(order); + } + }; + } + + /** + * Accepts all devices which support OpenGL-OpenCL interoparability. + */ + public static Filter<CLDevice> glSharing() { + return new Filter<CLDevice>() { + public boolean accept(CLDevice item) { + return item.isGLMemorySharingSupported(); + } + }; + } + + /** + * Accepts all devices supporting the given extensions. + */ + public static Filter<CLDevice> extension(final String... extensions) { + return new Filter<CLDevice>() { + public boolean accept(CLDevice item) { + return item.getExtensions().containsAll(Arrays.asList(extensions)); + } + }; + } + + /** + * Accepts all devices supporting the specified command queue modes. + */ + public static Filter<CLDevice> queueMode(final Mode... modes) { + return new Filter<CLDevice>() { + public boolean accept(CLDevice item) { + return item.getQueueProperties().containsAll(Arrays.asList(modes)); + } + }; + } + +} diff --git a/src/com/jogamp/opencl/util/CLPlatformFilters.java b/src/com/jogamp/opencl/util/CLPlatformFilters.java index 3d23a45c..dab7448f 100644 --- a/src/com/jogamp/opencl/util/CLPlatformFilters.java +++ b/src/com/jogamp/opencl/util/CLPlatformFilters.java @@ -28,10 +28,12 @@ package com.jogamp.opencl.util; +import com.jogamp.opencl.CLCommandQueue.Mode; import com.jogamp.opencl.CLDevice; import com.jogamp.opencl.CLPlatform; import com.jogamp.opencl.CLVersion; import java.util.Arrays; +import java.util.List; /** * Pre-defined filters. @@ -62,15 +64,49 @@ public class CLPlatformFilters { } }; } + + /** + * Accepts all platforms containing at least one devices of which supports OpenGL-OpenCL interoparability. + */ + public static Filter<CLPlatform> glSharing() { + return new Filter<CLPlatform>() { + public boolean accept(CLPlatform item) { + CLDevice[] devices = item.listCLDevices(); + for (CLDevice device : devices) { + if(device.isGLMemorySharingSupported()) { + return true; + } + } + return false; + } + }; + } /** - * Accepts all platforms containing devices of the given extensions. + * Accepts all platforms supporting the given extensions. */ - public static Filter<CLPlatform> extensions(final String... extensions) { + public static Filter<CLPlatform> extension(final String... extensions) { return new Filter<CLPlatform>() { public boolean accept(CLPlatform item) { return item.getExtensions().containsAll(Arrays.asList(extensions)); } }; } + + /** + * Accepts all platforms containing at least one devices supporting the specified command queue modes. + */ + public static Filter<CLPlatform> queueMode(final Mode... modes) { + return new Filter<CLPlatform>() { + public boolean accept(CLPlatform item) { + List<Mode> modesList = Arrays.asList(modes); + for (CLDevice device : item.listCLDevices()) { + if(device.getQueueProperties().containsAll(modesList)) { + return true; + } + } + return false; + } + }; + } } diff --git a/test/com/jogamp/opencl/CLBufferTest.java b/test/com/jogamp/opencl/CLBufferTest.java index 0e4a4a65..1b718277 100644 --- a/test/com/jogamp/opencl/CLBufferTest.java +++ b/test/com/jogamp/opencl/CLBufferTest.java @@ -158,10 +158,10 @@ public class CLBufferTest { ByteBuffer mappedBufferA = queue.putMapBuffer(clBufferA, Map.READ_WRITE, true); assertEquals(sizeInBytes, mappedBufferA.capacity()); - fillBuffer(mappedBufferA, 12345); // write to A + fillBuffer(mappedBufferA, 12345); // write to A - queue.putUnmapMemory(clBufferA) // unmap A - .putCopyBuffer(clBufferA, clBufferB); // copy A -> B + queue.putUnmapMemory(clBufferA, mappedBufferA)// unmap A + .putCopyBuffer(clBufferA, clBufferB); // copy A -> B // map B for read operations ByteBuffer mappedBufferB = queue.putMapBuffer(clBufferB, Map.READ, true); @@ -171,7 +171,7 @@ public class CLBufferTest { checkIfEqual(mappedBufferA, mappedBufferB, elements); // A == B ? out.println("results are valid"); - queue.putUnmapMemory(clBufferB); // unmap B + queue.putUnmapMemory(clBufferB, mappedBufferB); // unmap B context.release(); diff --git a/test/com/jogamp/opencl/CLCommandQueueTest.java b/test/com/jogamp/opencl/CLCommandQueueTest.java index e40d07e4..c8c028af 100644 --- a/test/com/jogamp/opencl/CLCommandQueueTest.java +++ b/test/com/jogamp/opencl/CLCommandQueueTest.java @@ -35,8 +35,11 @@ import java.util.concurrent.CountDownLatch; import com.jogamp.opencl.util.MultiQueueBarrier; import com.jogamp.opencl.CLCommandQueue.Mode; import com.jogamp.opencl.CLMemory.Mem; +import com.jogamp.opencl.util.CLDeviceFilters; +import com.jogamp.opencl.util.CLPlatformFilters; import java.io.IOException; import java.nio.ByteBuffer; +import java.nio.IntBuffer; import java.util.EnumSet; import java.util.concurrent.TimeUnit; import org.junit.Test; @@ -47,6 +50,7 @@ import static com.jogamp.opencl.TestUtils.*; import static com.jogamp.opencl.CLEvent.*; import static com.jogamp.opencl.CLVersion.*; import static com.jogamp.common.nio.Buffers.*; +import static com.jogamp.opencl.CLCommandQueue.Mode.*; /** * @@ -62,8 +66,8 @@ public class CLCommandQueueTest { //CLCommandQueueEnums EnumSet<Mode> queueMode = Mode.valuesOf(CL.CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL.CL_QUEUE_PROFILING_ENABLE); - assertTrue(queueMode.contains(Mode.OUT_OF_ORDER_MODE)); - assertTrue(queueMode.contains(Mode.PROFILING_MODE)); + assertTrue(queueMode.contains(OUT_OF_ORDER_MODE)); + assertTrue(queueMode.contains(PROFILING_MODE)); assertNotNull(Mode.valuesOf(0)); assertEquals(0, Mode.valuesOf(0).size()); @@ -137,11 +141,15 @@ public class CLCommandQueueTest { queue.put1DRangeKernel(vectorAddKernel, 0, elements, groupSize, events); assertEquals(2, events.size()); - queue.putWaitForEvent(events, 0, false) + queue.putWaitForEvent(events, 0, true) .putWaitForEvent(events, 1, true); - queue.putReadBuffer(clBufferC, false) - .putReadBuffer(clBufferD, true); + events.release(); + + queue.putReadBuffer(clBufferC, false, events) + .putReadBuffer(clBufferD, false, events); + + queue.putWaitForEvents(events, true); events.release(); @@ -151,6 +159,67 @@ public class CLCommandQueueTest { context.release(); } } + + @Test + public void eventConditionsTest() throws IOException { + + out.println(" - - - event conditions test - - - "); + + CLPlatform platform = CLPlatform.getDefault(CLPlatformFilters.queueMode(OUT_OF_ORDER_MODE)); + + CLDevice device = null; + // we can still test this with in-order queues + if(platform == null) { + device = CLPlatform.getDefault().getMaxFlopsDevice(); + }else{ + device = platform.getMaxFlopsDevice(CLDeviceFilters.queueMode(OUT_OF_ORDER_MODE)); + } + + CLContext context = CLContext.create(device); + + try{ + + CLProgram program = context.createProgram(getClass().getResourceAsStream("testkernels.cl")).build(); + + CLBuffer<IntBuffer> buffer = context.createBuffer(newDirectIntBuffer(new int[]{ 1,1,1, 1,1,1, 1,1,1 })); + + int elements = buffer.getNIOCapacity(); + + CLCommandQueue queue; + if(device.getQueueProperties().contains(OUT_OF_ORDER_MODE)) { + queue = device.createCommandQueue(OUT_OF_ORDER_MODE); + }else{ + queue = device.createCommandQueue(); + } + + CLEventList writeEvent = new CLEventList(1); + CLEventList kernelEvents = new CLEventList(2); + + // (1+1)*2 = 4; conditions enforce propper order + CLKernel addKernel = program.createCLKernel("add").putArg(buffer).putArg(1).putArg(elements); + CLKernel mulKernel = program.createCLKernel("mul").putArg(buffer).putArg(2).putArg(elements); + + queue.putWriteBuffer(buffer, false, writeEvent); + + queue.put1DRangeKernel(addKernel, 0, elements, 1, writeEvent, kernelEvents); + queue.put1DRangeKernel(mulKernel, 0, elements, 1, writeEvent, kernelEvents); + + queue.putReadBuffer(buffer, false, kernelEvents, null); + + queue.finish(); + + writeEvent.release(); + kernelEvents.release(); + + for (int i = 0; i < elements; i++) { + assertEquals(4, buffer.getBuffer().get(i)); + } + + }finally{ + context.release(); + } + + } @Test public void profilingEventsTest() throws IOException { @@ -174,7 +243,7 @@ public class CLCommandQueueTest { CLProgram program = context.createProgram(getClass().getResourceAsStream("testkernels.cl")).build(); CLKernel vectorAddKernel = program.createCLKernel("VectorAddGM").setArg(3, elements); - CLCommandQueue queue = device.createCommandQueue(Mode.PROFILING_MODE); + CLCommandQueue queue = device.createCommandQueue(PROFILING_MODE); out.println(queue); diff --git a/test/com/jogamp/opencl/testkernels.cl b/test/com/jogamp/opencl/testkernels.cl index ec7e8bf6..2b8c097d 100644 --- a/test/com/jogamp/opencl/testkernels.cl +++ b/test/com/jogamp/opencl/testkernels.cl @@ -1,22 +1,33 @@ - // OpenCL Kernel Function for element by element vector addition kernel void VectorAddGM(global const int* a, global const int* b, global int* c, int iNumElements) { - // get index into global data array int iGID = get_global_id(0); - // bound check (equivalent to the limit on a 'for' loop for standard/serial C code if (iGID >= iNumElements) { return; } - // add the vector elements c[iGID] = a[iGID] + b[iGID]; } kernel void Test(global const int* a, global const int* b, global int* c, int iNumElements) { - // get index into global data array int iGID = get_global_id(0); - // bound check (equivalent to the limit on a 'for' loop for standard/serial C code if (iGID >= iNumElements) { return; } c[iGID] = iGID; } + + kernel void add(global int* a, int value, int iNumElements) { + int iGID = get_global_id(0); + if (iGID >= iNumElements) { + return; + } + a[iGID] += value; + } + + kernel void mul(global int* a, int value, int iNumElements) { + + int iGID = get_global_id(0); + if (iGID >= iNumElements) { + return; + } + a[iGID] *= value; + } |