diff options
Diffstat (limited to 'src/com')
-rw-r--r-- | src/com/jogamp/opencl/demos/fractal/Mandelbrot.cl | 6 | ||||
-rw-r--r-- | src/com/jogamp/opencl/demos/fractal/MultiDeviceFractal.java | 85 |
2 files changed, 68 insertions, 23 deletions
diff --git a/src/com/jogamp/opencl/demos/fractal/Mandelbrot.cl b/src/com/jogamp/opencl/demos/fractal/Mandelbrot.cl index 640c775..2fc959c 100644 --- a/src/com/jogamp/opencl/demos/fractal/Mandelbrot.cl +++ b/src/com/jogamp/opencl/demos/fractal/Mandelbrot.cl @@ -1,5 +1,9 @@ #ifdef DOUBLE_FP - #pragma OPENCL EXTENSION cl_khr_fp64 : enable + #ifdef AMD_FP + #pragma OPENCL EXTENSION cl_amd_fp64 : enable + #else + #pragma OPENCL EXTENSION cl_khr_fp64 : enable + #endif typedef double varfloat; #else typedef float varfloat; diff --git a/src/com/jogamp/opencl/demos/fractal/MultiDeviceFractal.java b/src/com/jogamp/opencl/demos/fractal/MultiDeviceFractal.java index 9420c6d..1b7f19d 100644 --- a/src/com/jogamp/opencl/demos/fractal/MultiDeviceFractal.java +++ b/src/com/jogamp/opencl/demos/fractal/MultiDeviceFractal.java @@ -12,6 +12,7 @@ import com.jogamp.opencl.CLKernel; import com.jogamp.opencl.CLPlatform; import com.jogamp.opencl.CLProgram; import com.jogamp.opencl.CLProgram.CompilerOptions; +import com.jogamp.opencl.util.CLProgramConfiguration; import com.jogamp.opengl.util.awt.TextRenderer; import java.awt.Color; import java.awt.Dimension; @@ -72,7 +73,7 @@ public class MultiDeviceFractal implements GLEventListener { private CLGLContext clContext; private CLCommandQueue[] queues; private CLKernel[] kernels; - private CLProgram program; + private CLProgram[] programs; private CLEventList probes; private CLGLBuffer<?>[] pboBuffers; private CLBuffer<IntBuffer>[] colorMap; @@ -166,8 +167,31 @@ public class MultiDeviceFractal implements GLEventListener { } - // load and build program - program = clContext.createProgram(getClass().getResourceAsStream("Mandelbrot.cl")); + // check if we have 64bit FP support on all devices + // if yes we can use only one program for all devices + one kernel per device. + // if not we will have to create (at least) one program for 32 and one for 64bit devices. + // since there are different vendor extensions for double FP we use one program per device. + // (OpenCL spec is not very clear about this usecases) + boolean all64bit = true; + for (CLDevice device : devices) { + if(!isDoubleFPAvailable(device)) { + all64bit = false; + break; + } + } + + // load program(s) + if(all64bit) { + programs = new CLProgram[] { + clContext.createProgram(getClass().getResourceAsStream("Mandelbrot.cl")) + }; + }else{ + programs = new CLProgram[slices]; + for (int i = 0; i < slices; i++) { + programs[i] = clContext.createProgram(getClass().getResourceAsStream("Mandelbrot.cl")); + } + } + buildProgram(); } catch (IOException ex) { @@ -258,33 +282,42 @@ public class MultiDeviceFractal implements GLEventListener { * workaround: The driver keeps using the old binaries for some reason. * to solve this we simple create a new program and release the old. * however rebuilding programs should be possible -> remove when drivers are fixed. + * (again: the spec is not very clear about this kind of usages) */ - if(program != null && rebuild) { - String source = program.getSource(); - program.release(); - program = clContext.createProgram(source); + if(programs[0] != null && rebuild) { + for(int i = 0; i < programs.length; i++) { + String source = programs[i].getSource(); + programs[i].release(); + programs[i] = clContext.createProgram(source); + } } // disable 64bit floating point math if not available - if(doublePrecision) { - for (CLDevice device : program.getCLDevices()) { - if(!device.isDoubleFPAvailable()) { - doublePrecision = false; - break; + for(int i = 0; i < programs.length; i++) { + CLDevice device = queues[i].getDevice(); + + CLProgramConfiguration configure = programs[i].prepare(); + if(doublePrecision && isDoubleFPAvailable(device)) { + //cl_khr_fp64 + configure.withDefine("DOUBLE_FP"); + + //amd's verson of double precision floating point math + if(!device.isDoubleFPAvailable() && device.isExtensionAvailable("cl_amd_fp64")) { + configure.withDefine("AMD_FP"); } } - } + if(programs.length > 1) { + configure.forDevice(device); + } + System.out.println(configure); + configure.withOption(CompilerOptions.FAST_RELAXED_MATH).build(); + } - if(doublePrecision) { - program.build(CompilerOptions.FAST_RELAXED_MATH, "-D DOUBLE_FP"); - }else{ - program.build(CompilerOptions.FAST_RELAXED_MATH); - } rebuild = false; for (int i = 0; i < kernels.length; i++) { // init kernel with constants - kernels[i] = program.createCLKernel("mandelbrot"); + kernels[i] = programs[min(i, programs.length)].createCLKernel("mandelbrot"); } } @@ -292,7 +325,7 @@ public class MultiDeviceFractal implements GLEventListener { // init kernels with constants private void setKernelConstants() { for (int i = 0; i < slices; i++) { - kernels[i].setForce32BitArgs(!doublePrecision) + kernels[i].setForce32BitArgs(!doublePrecision || !isDoubleFPAvailable(queues[i].getDevice())) .setArg(6, pboBuffers[i]) .setArg(7, colorMap[i]) .setArg(8, colorMap[i].getBuffer().capacity()) @@ -374,14 +407,17 @@ public class MultiDeviceFractal implements GLEventListener { //draw info text textRenderer.beginRendering(width, height, false); - textRenderer.draw("precision: "+ (doublePrecision?"64bit":"32bit"), 10, height-15); + textRenderer.draw("device/time/precision", 10, height-15); for (int i = 0; i < slices; i++) { CLDevice device = queues[i].getDevice(); + boolean doubleFP = doublePrecision && isDoubleFPAvailable(device); CLEvent event = probes.getEvent(i); long start = event.getProfilingInfo(START); long end = event.getProfilingInfo(END); - textRenderer.draw(device.getType().toString()+i +" "+(int)((end-start)/1000000.0f)+"ms", 10, height-(20+16*(slices-i))); + textRenderer.draw(device.getType().toString()+i +" " + + (int)((end-start)/1000000.0f)+"ms @" + + (doubleFP?"64bit":"32bit"), 10, height-(20+16*(slices-i))); } textRenderer.endRendering(); @@ -474,6 +510,11 @@ public class MultiDeviceFractal implements GLEventListener { canvas.addKeyListener(keyAdapter); } + + private boolean isDoubleFPAvailable(CLDevice device) { + return device.isDoubleFPAvailable() || device.isExtensionAvailable("cl_amd_fp64"); + } + public void dispose(GLAutoDrawable drawable) { } |