forked from mirror/go-ethereum
parent
8b865fa9bf
commit
ec6a548ee3
@ -0,0 +1,26 @@ |
|||||||
|
/* |
||||||
|
Package cl provides a binding to the OpenCL api. It's mostly a low-level |
||||||
|
wrapper that avoids adding functionality while still making the interface |
||||||
|
a little more friendly and easy to use. |
||||||
|
|
||||||
|
Resource life-cycle management: |
||||||
|
|
||||||
|
For any CL object that gets created (buffer, queue, kernel, etc..) you should |
||||||
|
call object.Release() when finished with it to free the CL resources. This |
||||||
|
explicitely calls the related clXXXRelease method for the type. However, |
||||||
|
as a fallback there is a finalizer set for every resource item that takes |
||||||
|
care of it (eventually) if Release isn't called. In this way you can have |
||||||
|
better control over the life cycle of resources while having a fall back |
||||||
|
to avoid leaks. This is similar to how file handles and such are handled |
||||||
|
in the Go standard packages. |
||||||
|
*/ |
||||||
|
package cl |
||||||
|
|
||||||
|
// #include "headers/1.2/opencl.h"
|
||||||
|
// #cgo CFLAGS: -Iheaders/1.2
|
||||||
|
// #cgo darwin LDFLAGS: -framework OpenCL
|
||||||
|
// #cgo linux LDFLAGS: -lOpenCL
|
||||||
|
import "C" |
||||||
|
import "errors" |
||||||
|
|
||||||
|
var ErrUnsupported = errors.New("cl: unsupported") |
@ -0,0 +1,254 @@ |
|||||||
|
package cl |
||||||
|
|
||||||
|
import ( |
||||||
|
"math/rand" |
||||||
|
"reflect" |
||||||
|
"strings" |
||||||
|
"testing" |
||||||
|
) |
||||||
|
|
||||||
|
var kernelSource = ` |
||||||
|
__kernel void square( |
||||||
|
__global float* input, |
||||||
|
__global float* output, |
||||||
|
const unsigned int count) |
||||||
|
{ |
||||||
|
int i = get_global_id(0); |
||||||
|
if(i < count) |
||||||
|
output[i] = input[i] * input[i]; |
||||||
|
} |
||||||
|
` |
||||||
|
|
||||||
|
func getObjectStrings(object interface{}) map[string]string { |
||||||
|
v := reflect.ValueOf(object) |
||||||
|
t := reflect.TypeOf(object) |
||||||
|
|
||||||
|
strs := make(map[string]string) |
||||||
|
|
||||||
|
numMethods := t.NumMethod() |
||||||
|
for i := 0; i < numMethods; i++ { |
||||||
|
method := t.Method(i) |
||||||
|
if method.Type.NumIn() == 1 && method.Type.NumOut() == 1 && method.Type.Out(0).Kind() == reflect.String { |
||||||
|
// this is a string-returning method with (presumably) only a pointer receiver parameter
|
||||||
|
// call it
|
||||||
|
outs := v.Method(i).Call([]reflect.Value{}) |
||||||
|
// put the result in our map
|
||||||
|
strs[method.Name] = (outs[0].Interface()).(string) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
return strs |
||||||
|
} |
||||||
|
|
||||||
|
func TestPlatformStringsContainNoNULs(t *testing.T) { |
||||||
|
platforms, err := GetPlatforms() |
||||||
|
if err != nil { |
||||||
|
t.Fatalf("Failed to get platforms: %+v", err) |
||||||
|
} |
||||||
|
|
||||||
|
for _, p := range platforms { |
||||||
|
for key, value := range getObjectStrings(p) { |
||||||
|
if strings.Contains(value, "\x00") { |
||||||
|
t.Fatalf("platform string %q = %+q contains NUL", key, value) |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func TestDeviceStringsContainNoNULs(t *testing.T) { |
||||||
|
platforms, err := GetPlatforms() |
||||||
|
if err != nil { |
||||||
|
t.Fatalf("Failed to get platforms: %+v", err) |
||||||
|
} |
||||||
|
|
||||||
|
for _, p := range platforms { |
||||||
|
devs, err := p.GetDevices(DeviceTypeAll) |
||||||
|
if err != nil { |
||||||
|
t.Fatalf("Failed to get devices for platform %q: %+v", p.Name(), err) |
||||||
|
} |
||||||
|
|
||||||
|
for _, d := range devs { |
||||||
|
for key, value := range getObjectStrings(d) { |
||||||
|
if strings.Contains(value, "\x00") { |
||||||
|
t.Fatalf("device string %q = %+q contains NUL", key, value) |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func TestHello(t *testing.T) { |
||||||
|
var data [1024]float32 |
||||||
|
for i := 0; i < len(data); i++ { |
||||||
|
data[i] = rand.Float32() |
||||||
|
} |
||||||
|
|
||||||
|
platforms, err := GetPlatforms() |
||||||
|
if err != nil { |
||||||
|
t.Fatalf("Failed to get platforms: %+v", err) |
||||||
|
} |
||||||
|
for i, p := range platforms { |
||||||
|
t.Logf("Platform %d:", i) |
||||||
|
t.Logf(" Name: %s", p.Name()) |
||||||
|
t.Logf(" Vendor: %s", p.Vendor()) |
||||||
|
t.Logf(" Profile: %s", p.Profile()) |
||||||
|
t.Logf(" Version: %s", p.Version()) |
||||||
|
t.Logf(" Extensions: %s", p.Extensions()) |
||||||
|
} |
||||||
|
platform := platforms[0] |
||||||
|
|
||||||
|
devices, err := platform.GetDevices(DeviceTypeAll) |
||||||
|
if err != nil { |
||||||
|
t.Fatalf("Failed to get devices: %+v", err) |
||||||
|
} |
||||||
|
if len(devices) == 0 { |
||||||
|
t.Fatalf("GetDevices returned no devices") |
||||||
|
} |
||||||
|
deviceIndex := -1 |
||||||
|
for i, d := range devices { |
||||||
|
if deviceIndex < 0 && d.Type() == DeviceTypeGPU { |
||||||
|
deviceIndex = i |
||||||
|
} |
||||||
|
t.Logf("Device %d (%s): %s", i, d.Type(), d.Name()) |
||||||
|
t.Logf(" Address Bits: %d", d.AddressBits()) |
||||||
|
t.Logf(" Available: %+v", d.Available()) |
||||||
|
// t.Logf(" Built-In Kernels: %s", d.BuiltInKernels())
|
||||||
|
t.Logf(" Compiler Available: %+v", d.CompilerAvailable()) |
||||||
|
t.Logf(" Double FP Config: %s", d.DoubleFPConfig()) |
||||||
|
t.Logf(" Driver Version: %s", d.DriverVersion()) |
||||||
|
t.Logf(" Error Correction Supported: %+v", d.ErrorCorrectionSupport()) |
||||||
|
t.Logf(" Execution Capabilities: %s", d.ExecutionCapabilities()) |
||||||
|
t.Logf(" Extensions: %s", d.Extensions()) |
||||||
|
t.Logf(" Global Memory Cache Type: %s", d.GlobalMemCacheType()) |
||||||
|
t.Logf(" Global Memory Cacheline Size: %d KB", d.GlobalMemCachelineSize()/1024) |
||||||
|
t.Logf(" Global Memory Size: %d MB", d.GlobalMemSize()/(1024*1024)) |
||||||
|
t.Logf(" Half FP Config: %s", d.HalfFPConfig()) |
||||||
|
t.Logf(" Host Unified Memory: %+v", d.HostUnifiedMemory()) |
||||||
|
t.Logf(" Image Support: %+v", d.ImageSupport()) |
||||||
|
t.Logf(" Image2D Max Dimensions: %d x %d", d.Image2DMaxWidth(), d.Image2DMaxHeight()) |
||||||
|
t.Logf(" Image3D Max Dimenionns: %d x %d x %d", d.Image3DMaxWidth(), d.Image3DMaxHeight(), d.Image3DMaxDepth()) |
||||||
|
// t.Logf(" Image Max Buffer Size: %d", d.ImageMaxBufferSize())
|
||||||
|
// t.Logf(" Image Max Array Size: %d", d.ImageMaxArraySize())
|
||||||
|
// t.Logf(" Linker Available: %+v", d.LinkerAvailable())
|
||||||
|
t.Logf(" Little Endian: %+v", d.EndianLittle()) |
||||||
|
t.Logf(" Local Mem Size Size: %d KB", d.LocalMemSize()/1024) |
||||||
|
t.Logf(" Local Mem Type: %s", d.LocalMemType()) |
||||||
|
t.Logf(" Max Clock Frequency: %d", d.MaxClockFrequency()) |
||||||
|
t.Logf(" Max Compute Units: %d", d.MaxComputeUnits()) |
||||||
|
t.Logf(" Max Constant Args: %d", d.MaxConstantArgs()) |
||||||
|
t.Logf(" Max Constant Buffer Size: %d KB", d.MaxConstantBufferSize()/1024) |
||||||
|
t.Logf(" Max Mem Alloc Size: %d KB", d.MaxMemAllocSize()/1024) |
||||||
|
t.Logf(" Max Parameter Size: %d", d.MaxParameterSize()) |
||||||
|
t.Logf(" Max Read-Image Args: %d", d.MaxReadImageArgs()) |
||||||
|
t.Logf(" Max Samplers: %d", d.MaxSamplers()) |
||||||
|
t.Logf(" Max Work Group Size: %d", d.MaxWorkGroupSize()) |
||||||
|
t.Logf(" Max Work Item Dimensions: %d", d.MaxWorkItemDimensions()) |
||||||
|
t.Logf(" Max Work Item Sizes: %d", d.MaxWorkItemSizes()) |
||||||
|
t.Logf(" Max Write-Image Args: %d", d.MaxWriteImageArgs()) |
||||||
|
t.Logf(" Memory Base Address Alignment: %d", d.MemBaseAddrAlign()) |
||||||
|
t.Logf(" Native Vector Width Char: %d", d.NativeVectorWidthChar()) |
||||||
|
t.Logf(" Native Vector Width Short: %d", d.NativeVectorWidthShort()) |
||||||
|
t.Logf(" Native Vector Width Int: %d", d.NativeVectorWidthInt()) |
||||||
|
t.Logf(" Native Vector Width Long: %d", d.NativeVectorWidthLong()) |
||||||
|
t.Logf(" Native Vector Width Float: %d", d.NativeVectorWidthFloat()) |
||||||
|
t.Logf(" Native Vector Width Double: %d", d.NativeVectorWidthDouble()) |
||||||
|
t.Logf(" Native Vector Width Half: %d", d.NativeVectorWidthHalf()) |
||||||
|
t.Logf(" OpenCL C Version: %s", d.OpenCLCVersion()) |
||||||
|
// t.Logf(" Parent Device: %+v", d.ParentDevice())
|
||||||
|
t.Logf(" Profile: %s", d.Profile()) |
||||||
|
t.Logf(" Profiling Timer Resolution: %d", d.ProfilingTimerResolution()) |
||||||
|
t.Logf(" Vendor: %s", d.Vendor()) |
||||||
|
t.Logf(" Version: %s", d.Version()) |
||||||
|
} |
||||||
|
if deviceIndex < 0 { |
||||||
|
deviceIndex = 0 |
||||||
|
} |
||||||
|
device := devices[deviceIndex] |
||||||
|
t.Logf("Using device %d", deviceIndex) |
||||||
|
context, err := CreateContext([]*Device{device}) |
||||||
|
if err != nil { |
||||||
|
t.Fatalf("CreateContext failed: %+v", err) |
||||||
|
} |
||||||
|
// imageFormats, err := context.GetSupportedImageFormats(0, MemObjectTypeImage2D)
|
||||||
|
// if err != nil {
|
||||||
|
// t.Fatalf("GetSupportedImageFormats failed: %+v", err)
|
||||||
|
// }
|
||||||
|
// t.Logf("Supported image formats: %+v", imageFormats)
|
||||||
|
queue, err := context.CreateCommandQueue(device, 0) |
||||||
|
if err != nil { |
||||||
|
t.Fatalf("CreateCommandQueue failed: %+v", err) |
||||||
|
} |
||||||
|
program, err := context.CreateProgramWithSource([]string{kernelSource}) |
||||||
|
if err != nil { |
||||||
|
t.Fatalf("CreateProgramWithSource failed: %+v", err) |
||||||
|
} |
||||||
|
if err := program.BuildProgram(nil, ""); err != nil { |
||||||
|
t.Fatalf("BuildProgram failed: %+v", err) |
||||||
|
} |
||||||
|
kernel, err := program.CreateKernel("square") |
||||||
|
if err != nil { |
||||||
|
t.Fatalf("CreateKernel failed: %+v", err) |
||||||
|
} |
||||||
|
for i := 0; i < 3; i++ { |
||||||
|
name, err := kernel.ArgName(i) |
||||||
|
if err == ErrUnsupported { |
||||||
|
break |
||||||
|
} else if err != nil { |
||||||
|
t.Errorf("GetKernelArgInfo for name failed: %+v", err) |
||||||
|
break |
||||||
|
} else { |
||||||
|
t.Logf("Kernel arg %d: %s", i, name) |
||||||
|
} |
||||||
|
} |
||||||
|
input, err := context.CreateEmptyBuffer(MemReadOnly, 4*len(data)) |
||||||
|
if err != nil { |
||||||
|
t.Fatalf("CreateBuffer failed for input: %+v", err) |
||||||
|
} |
||||||
|
output, err := context.CreateEmptyBuffer(MemReadOnly, 4*len(data)) |
||||||
|
if err != nil { |
||||||
|
t.Fatalf("CreateBuffer failed for output: %+v", err) |
||||||
|
} |
||||||
|
if _, err := queue.EnqueueWriteBufferFloat32(input, true, 0, data[:], nil); err != nil { |
||||||
|
t.Fatalf("EnqueueWriteBufferFloat32 failed: %+v", err) |
||||||
|
} |
||||||
|
if err := kernel.SetArgs(input, output, uint32(len(data))); err != nil { |
||||||
|
t.Fatalf("SetKernelArgs failed: %+v", err) |
||||||
|
} |
||||||
|
|
||||||
|
local, err := kernel.WorkGroupSize(device) |
||||||
|
if err != nil { |
||||||
|
t.Fatalf("WorkGroupSize failed: %+v", err) |
||||||
|
} |
||||||
|
t.Logf("Work group size: %d", local) |
||||||
|
size, _ := kernel.PreferredWorkGroupSizeMultiple(nil) |
||||||
|
t.Logf("Preferred Work Group Size Multiple: %d", size) |
||||||
|
|
||||||
|
global := len(data) |
||||||
|
d := len(data) % local |
||||||
|
if d != 0 { |
||||||
|
global += local - d |
||||||
|
} |
||||||
|
if _, err := queue.EnqueueNDRangeKernel(kernel, nil, []int{global}, []int{local}, nil); err != nil { |
||||||
|
t.Fatalf("EnqueueNDRangeKernel failed: %+v", err) |
||||||
|
} |
||||||
|
|
||||||
|
if err := queue.Finish(); err != nil { |
||||||
|
t.Fatalf("Finish failed: %+v", err) |
||||||
|
} |
||||||
|
|
||||||
|
results := make([]float32, len(data)) |
||||||
|
if _, err := queue.EnqueueReadBufferFloat32(output, true, 0, results, nil); err != nil { |
||||||
|
t.Fatalf("EnqueueReadBufferFloat32 failed: %+v", err) |
||||||
|
} |
||||||
|
|
||||||
|
correct := 0 |
||||||
|
for i, v := range data { |
||||||
|
if results[i] == v*v { |
||||||
|
correct++ |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
if correct != len(data) { |
||||||
|
t.Fatalf("%d/%d correct values", correct, len(data)) |
||||||
|
} |
||||||
|
} |
@ -0,0 +1,161 @@ |
|||||||
|
package cl |
||||||
|
|
||||||
|
// #include <stdlib.h>
|
||||||
|
// #ifdef __APPLE__
|
||||||
|
// #include "OpenCL/opencl.h"
|
||||||
|
// #else
|
||||||
|
// #include "cl.h"
|
||||||
|
// #endif
|
||||||
|
import "C" |
||||||
|
|
||||||
|
import ( |
||||||
|
"runtime" |
||||||
|
"unsafe" |
||||||
|
) |
||||||
|
|
||||||
|
const maxImageFormats = 256 |
||||||
|
|
||||||
|
type Context struct { |
||||||
|
clContext C.cl_context |
||||||
|
devices []*Device |
||||||
|
} |
||||||
|
|
||||||
|
type MemObject struct { |
||||||
|
clMem C.cl_mem |
||||||
|
size int |
||||||
|
} |
||||||
|
|
||||||
|
func releaseContext(c *Context) { |
||||||
|
if c.clContext != nil { |
||||||
|
C.clReleaseContext(c.clContext) |
||||||
|
c.clContext = nil |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func releaseMemObject(b *MemObject) { |
||||||
|
if b.clMem != nil { |
||||||
|
C.clReleaseMemObject(b.clMem) |
||||||
|
b.clMem = nil |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func newMemObject(mo C.cl_mem, size int) *MemObject { |
||||||
|
memObject := &MemObject{clMem: mo, size: size} |
||||||
|
runtime.SetFinalizer(memObject, releaseMemObject) |
||||||
|
return memObject |
||||||
|
} |
||||||
|
|
||||||
|
func (b *MemObject) Release() { |
||||||
|
releaseMemObject(b) |
||||||
|
} |
||||||
|
|
||||||
|
// TODO: properties
|
||||||
|
func CreateContext(devices []*Device) (*Context, error) { |
||||||
|
deviceIds := buildDeviceIdList(devices) |
||||||
|
var err C.cl_int |
||||||
|
clContext := C.clCreateContext(nil, C.cl_uint(len(devices)), &deviceIds[0], nil, nil, &err) |
||||||
|
if err != C.CL_SUCCESS { |
||||||
|
return nil, toError(err) |
||||||
|
} |
||||||
|
if clContext == nil { |
||||||
|
return nil, ErrUnknown |
||||||
|
} |
||||||
|
context := &Context{clContext: clContext, devices: devices} |
||||||
|
runtime.SetFinalizer(context, releaseContext) |
||||||
|
return context, nil |
||||||
|
} |
||||||
|
|
||||||
|
func (ctx *Context) GetSupportedImageFormats(flags MemFlag, imageType MemObjectType) ([]ImageFormat, error) { |
||||||
|
var formats [maxImageFormats]C.cl_image_format |
||||||
|
var nFormats C.cl_uint |
||||||
|
if err := C.clGetSupportedImageFormats(ctx.clContext, C.cl_mem_flags(flags), C.cl_mem_object_type(imageType), maxImageFormats, &formats[0], &nFormats); err != C.CL_SUCCESS { |
||||||
|
return nil, toError(err) |
||||||
|
} |
||||||
|
fmts := make([]ImageFormat, nFormats) |
||||||
|
for i, f := range formats[:nFormats] { |
||||||
|
fmts[i] = ImageFormat{ |
||||||
|
ChannelOrder: ChannelOrder(f.image_channel_order), |
||||||
|
ChannelDataType: ChannelDataType(f.image_channel_data_type), |
||||||
|
} |
||||||
|
} |
||||||
|
return fmts, nil |
||||||
|
} |
||||||
|
|
||||||
|
func (ctx *Context) CreateCommandQueue(device *Device, properties CommandQueueProperty) (*CommandQueue, error) { |
||||||
|
var err C.cl_int |
||||||
|
clQueue := C.clCreateCommandQueue(ctx.clContext, device.id, C.cl_command_queue_properties(properties), &err) |
||||||
|
if err != C.CL_SUCCESS { |
||||||
|
return nil, toError(err) |
||||||
|
} |
||||||
|
if clQueue == nil { |
||||||
|
return nil, ErrUnknown |
||||||
|
} |
||||||
|
commandQueue := &CommandQueue{clQueue: clQueue, device: device} |
||||||
|
runtime.SetFinalizer(commandQueue, releaseCommandQueue) |
||||||
|
return commandQueue, nil |
||||||
|
} |
||||||
|
|
||||||
|
func (ctx *Context) CreateProgramWithSource(sources []string) (*Program, error) { |
||||||
|
cSources := make([]*C.char, len(sources)) |
||||||
|
for i, s := range sources { |
||||||
|
cs := C.CString(s) |
||||||
|
cSources[i] = cs |
||||||
|
defer C.free(unsafe.Pointer(cs)) |
||||||
|
} |
||||||
|
var err C.cl_int |
||||||
|
clProgram := C.clCreateProgramWithSource(ctx.clContext, C.cl_uint(len(sources)), &cSources[0], nil, &err) |
||||||
|
if err != C.CL_SUCCESS { |
||||||
|
return nil, toError(err) |
||||||
|
} |
||||||
|
if clProgram == nil { |
||||||
|
return nil, ErrUnknown |
||||||
|
} |
||||||
|
program := &Program{clProgram: clProgram, devices: ctx.devices} |
||||||
|
runtime.SetFinalizer(program, releaseProgram) |
||||||
|
return program, nil |
||||||
|
} |
||||||
|
|
||||||
|
func (ctx *Context) CreateBufferUnsafe(flags MemFlag, size int, dataPtr unsafe.Pointer) (*MemObject, error) { |
||||||
|
var err C.cl_int |
||||||
|
clBuffer := C.clCreateBuffer(ctx.clContext, C.cl_mem_flags(flags), C.size_t(size), dataPtr, &err) |
||||||
|
if err != C.CL_SUCCESS { |
||||||
|
return nil, toError(err) |
||||||
|
} |
||||||
|
if clBuffer == nil { |
||||||
|
return nil, ErrUnknown |
||||||
|
} |
||||||
|
return newMemObject(clBuffer, size), nil |
||||||
|
} |
||||||
|
|
||||||
|
func (ctx *Context) CreateEmptyBuffer(flags MemFlag, size int) (*MemObject, error) { |
||||||
|
return ctx.CreateBufferUnsafe(flags, size, nil) |
||||||
|
} |
||||||
|
|
||||||
|
func (ctx *Context) CreateEmptyBufferFloat32(flags MemFlag, size int) (*MemObject, error) { |
||||||
|
return ctx.CreateBufferUnsafe(flags, 4*size, nil) |
||||||
|
} |
||||||
|
|
||||||
|
func (ctx *Context) CreateBuffer(flags MemFlag, data []byte) (*MemObject, error) { |
||||||
|
return ctx.CreateBufferUnsafe(flags, len(data), unsafe.Pointer(&data[0])) |
||||||
|
} |
||||||
|
|
||||||
|
//float64
|
||||||
|
func (ctx *Context) CreateBufferFloat32(flags MemFlag, data []float32) (*MemObject, error) { |
||||||
|
return ctx.CreateBufferUnsafe(flags, 4*len(data), unsafe.Pointer(&data[0])) |
||||||
|
} |
||||||
|
|
||||||
|
func (ctx *Context) CreateUserEvent() (*Event, error) { |
||||||
|
var err C.cl_int |
||||||
|
clEvent := C.clCreateUserEvent(ctx.clContext, &err) |
||||||
|
if err != C.CL_SUCCESS { |
||||||
|
return nil, toError(err) |
||||||
|
} |
||||||
|
return newEvent(clEvent), nil |
||||||
|
} |
||||||
|
|
||||||
|
func (ctx *Context) Release() { |
||||||
|
releaseContext(ctx) |
||||||
|
} |
||||||
|
|
||||||
|
// http://www.khronos.org/registry/cl/sdk/1.2/docs/man/xhtml/clCreateSubBuffer.html
|
||||||
|
// func (memObject *MemObject) CreateSubBuffer(flags MemFlag, bufferCreateType BufferCreateType, )
|
@ -0,0 +1,510 @@ |
|||||||
|
package cl |
||||||
|
|
||||||
|
// #ifdef __APPLE__
|
||||||
|
// #include "OpenCL/opencl.h"
|
||||||
|
// #else
|
||||||
|
// #include "cl.h"
|
||||||
|
// #include "cl_ext.h"
|
||||||
|
// #endif
|
||||||
|
import "C" |
||||||
|
|
||||||
|
import ( |
||||||
|
"strings" |
||||||
|
"unsafe" |
||||||
|
) |
||||||
|
|
||||||
|
const maxDeviceCount = 64 |
||||||
|
|
||||||
|
type DeviceType uint |
||||||
|
|
||||||
|
const ( |
||||||
|
DeviceTypeCPU DeviceType = C.CL_DEVICE_TYPE_CPU |
||||||
|
DeviceTypeGPU DeviceType = C.CL_DEVICE_TYPE_GPU |
||||||
|
DeviceTypeAccelerator DeviceType = C.CL_DEVICE_TYPE_ACCELERATOR |
||||||
|
DeviceTypeDefault DeviceType = C.CL_DEVICE_TYPE_DEFAULT |
||||||
|
DeviceTypeAll DeviceType = C.CL_DEVICE_TYPE_ALL |
||||||
|
) |
||||||
|
|
||||||
|
type FPConfig int |
||||||
|
|
||||||
|
const ( |
||||||
|
FPConfigDenorm FPConfig = C.CL_FP_DENORM // denorms are supported
|
||||||
|
FPConfigInfNaN FPConfig = C.CL_FP_INF_NAN // INF and NaNs are supported
|
||||||
|
FPConfigRoundToNearest FPConfig = C.CL_FP_ROUND_TO_NEAREST // round to nearest even rounding mode supported
|
||||||
|
FPConfigRoundToZero FPConfig = C.CL_FP_ROUND_TO_ZERO // round to zero rounding mode supported
|
||||||
|
FPConfigRoundToInf FPConfig = C.CL_FP_ROUND_TO_INF // round to positive and negative infinity rounding modes supported
|
||||||
|
FPConfigFMA FPConfig = C.CL_FP_FMA // IEEE754-2008 fused multiply-add is supported
|
||||||
|
FPConfigSoftFloat FPConfig = C.CL_FP_SOFT_FLOAT // Basic floating-point operations (such as addition, subtraction, multiplication) are implemented in software
|
||||||
|
) |
||||||
|
|
||||||
|
var fpConfigNameMap = map[FPConfig]string{ |
||||||
|
FPConfigDenorm: "Denorm", |
||||||
|
FPConfigInfNaN: "InfNaN", |
||||||
|
FPConfigRoundToNearest: "RoundToNearest", |
||||||
|
FPConfigRoundToZero: "RoundToZero", |
||||||
|
FPConfigRoundToInf: "RoundToInf", |
||||||
|
FPConfigFMA: "FMA", |
||||||
|
FPConfigSoftFloat: "SoftFloat", |
||||||
|
} |
||||||
|
|
||||||
|
func (c FPConfig) String() string { |
||||||
|
var parts []string |
||||||
|
for bit, name := range fpConfigNameMap { |
||||||
|
if c&bit != 0 { |
||||||
|
parts = append(parts, name) |
||||||
|
} |
||||||
|
} |
||||||
|
if parts == nil { |
||||||
|
return "" |
||||||
|
} |
||||||
|
return strings.Join(parts, "|") |
||||||
|
} |
||||||
|
|
||||||
|
func (dt DeviceType) String() string { |
||||||
|
var parts []string |
||||||
|
if dt&DeviceTypeCPU != 0 { |
||||||
|
parts = append(parts, "CPU") |
||||||
|
} |
||||||
|
if dt&DeviceTypeGPU != 0 { |
||||||
|
parts = append(parts, "GPU") |
||||||
|
} |
||||||
|
if dt&DeviceTypeAccelerator != 0 { |
||||||
|
parts = append(parts, "Accelerator") |
||||||
|
} |
||||||
|
if dt&DeviceTypeDefault != 0 { |
||||||
|
parts = append(parts, "Default") |
||||||
|
} |
||||||
|
if parts == nil { |
||||||
|
parts = append(parts, "None") |
||||||
|
} |
||||||
|
return strings.Join(parts, "|") |
||||||
|
} |
||||||
|
|
||||||
|
type Device struct { |
||||||
|
id C.cl_device_id |
||||||
|
} |
||||||
|
|
||||||
|
func buildDeviceIdList(devices []*Device) []C.cl_device_id { |
||||||
|
deviceIds := make([]C.cl_device_id, len(devices)) |
||||||
|
for i, d := range devices { |
||||||
|
deviceIds[i] = d.id |
||||||
|
} |
||||||
|
return deviceIds |
||||||
|
} |
||||||
|
|
||||||
|
// Obtain the list of devices available on a platform. 'platform' refers
|
||||||
|
// to the platform returned by GetPlatforms or can be nil. If platform
|
||||||
|
// is nil, the behavior is implementation-defined.
|
||||||
|
func GetDevices(platform *Platform, deviceType DeviceType) ([]*Device, error) { |
||||||
|
var deviceIds [maxDeviceCount]C.cl_device_id |
||||||
|
var numDevices C.cl_uint |
||||||
|
var platformId C.cl_platform_id |
||||||
|
if platform != nil { |
||||||
|
platformId = platform.id |
||||||
|
} |
||||||
|
if err := C.clGetDeviceIDs(platformId, C.cl_device_type(deviceType), C.cl_uint(maxDeviceCount), &deviceIds[0], &numDevices); err != C.CL_SUCCESS { |
||||||
|
return nil, toError(err) |
||||||
|
} |
||||||
|
if numDevices > maxDeviceCount { |
||||||
|
numDevices = maxDeviceCount |
||||||
|
} |
||||||
|
devices := make([]*Device, numDevices) |
||||||
|
for i := 0; i < int(numDevices); i++ { |
||||||
|
devices[i] = &Device{id: deviceIds[i]} |
||||||
|
} |
||||||
|
return devices, nil |
||||||
|
} |
||||||
|
|
||||||
|
func (d *Device) nullableId() C.cl_device_id { |
||||||
|
if d == nil { |
||||||
|
return nil |
||||||
|
} |
||||||
|
return d.id |
||||||
|
} |
||||||
|
|
||||||
|
func (d *Device) GetInfoString(param C.cl_device_info, panicOnError bool) (string, error) { |
||||||
|
var strC [1024]C.char |
||||||
|
var strN C.size_t |
||||||
|
if err := C.clGetDeviceInfo(d.id, param, 1024, unsafe.Pointer(&strC), &strN); err != C.CL_SUCCESS { |
||||||
|
if panicOnError { |
||||||
|
panic("Should never fail") |
||||||
|
} |
||||||
|
return "", toError(err) |
||||||
|
} |
||||||
|
|
||||||
|
// OpenCL strings are NUL-terminated, and the terminator is included in strN
|
||||||
|
// Go strings aren't NUL-terminated, so subtract 1 from the length
|
||||||
|
return C.GoStringN((*C.char)(unsafe.Pointer(&strC)), C.int(strN-1)), nil |
||||||
|
} |
||||||
|
|
||||||
|
func (d *Device) getInfoUint(param C.cl_device_info, panicOnError bool) (uint, error) { |
||||||
|
var val C.cl_uint |
||||||
|
if err := C.clGetDeviceInfo(d.id, param, C.size_t(unsafe.Sizeof(val)), unsafe.Pointer(&val), nil); err != C.CL_SUCCESS { |
||||||
|
if panicOnError { |
||||||
|
panic("Should never fail") |
||||||
|
} |
||||||
|
return 0, toError(err) |
||||||
|
} |
||||||
|
return uint(val), nil |
||||||
|
} |
||||||
|
|
||||||
|
func (d *Device) getInfoSize(param C.cl_device_info, panicOnError bool) (int, error) { |
||||||
|
var val C.size_t |
||||||
|
if err := C.clGetDeviceInfo(d.id, param, C.size_t(unsafe.Sizeof(val)), unsafe.Pointer(&val), nil); err != C.CL_SUCCESS { |
||||||
|
if panicOnError { |
||||||
|
panic("Should never fail") |
||||||
|
} |
||||||
|
return 0, toError(err) |
||||||
|
} |
||||||
|
return int(val), nil |
||||||
|
} |
||||||
|
|
||||||
|
func (d *Device) getInfoUlong(param C.cl_device_info, panicOnError bool) (int64, error) { |
||||||
|
var val C.cl_ulong |
||||||
|
if err := C.clGetDeviceInfo(d.id, param, C.size_t(unsafe.Sizeof(val)), unsafe.Pointer(&val), nil); err != C.CL_SUCCESS { |
||||||
|
if panicOnError { |
||||||
|
panic("Should never fail") |
||||||
|
} |
||||||
|
return 0, toError(err) |
||||||
|
} |
||||||
|
return int64(val), nil |
||||||
|
} |
||||||
|
|
||||||
|
func (d *Device) getInfoBool(param C.cl_device_info, panicOnError bool) (bool, error) { |
||||||
|
var val C.cl_bool |
||||||
|
if err := C.clGetDeviceInfo(d.id, param, C.size_t(unsafe.Sizeof(val)), unsafe.Pointer(&val), nil); err != C.CL_SUCCESS { |
||||||
|
if panicOnError { |
||||||
|
panic("Should never fail") |
||||||
|
} |
||||||
|
return false, toError(err) |
||||||
|
} |
||||||
|
return val == C.CL_TRUE, nil |
||||||
|
} |
||||||
|
|
||||||
|
func (d *Device) Name() string { |
||||||
|
str, _ := d.GetInfoString(C.CL_DEVICE_NAME, true) |
||||||
|
return str |
||||||
|
} |
||||||
|
|
||||||
|
func (d *Device) Vendor() string { |
||||||
|
str, _ := d.GetInfoString(C.CL_DEVICE_VENDOR, true) |
||||||
|
return str |
||||||
|
} |
||||||
|
|
||||||
|
func (d *Device) Extensions() string { |
||||||
|
str, _ := d.GetInfoString(C.CL_DEVICE_EXTENSIONS, true) |
||||||
|
return str |
||||||
|
} |
||||||
|
|
||||||
|
func (d *Device) OpenCLCVersion() string { |
||||||
|
str, _ := d.GetInfoString(C.CL_DEVICE_OPENCL_C_VERSION, true) |
||||||
|
return str |
||||||
|
} |
||||||
|
|
||||||
|
func (d *Device) Profile() string { |
||||||
|
str, _ := d.GetInfoString(C.CL_DEVICE_PROFILE, true) |
||||||
|
return str |
||||||
|
} |
||||||
|
|
||||||
|
func (d *Device) Version() string { |
||||||
|
str, _ := d.GetInfoString(C.CL_DEVICE_VERSION, true) |
||||||
|
return str |
||||||
|
} |
||||||
|
|
||||||
|
func (d *Device) DriverVersion() string { |
||||||
|
str, _ := d.GetInfoString(C.CL_DRIVER_VERSION, true) |
||||||
|
return str |
||||||
|
} |
||||||
|
|
||||||
|
// The default compute device address space size specified as an
|
||||||
|
// unsigned integer value in bits. Currently supported values are 32 or 64 bits.
|
||||||
|
func (d *Device) AddressBits() int { |
||||||
|
val, _ := d.getInfoUint(C.CL_DEVICE_ADDRESS_BITS, true) |
||||||
|
return int(val) |
||||||
|
} |
||||||
|
|
||||||
|
// Size of global memory cache line in bytes.
|
||||||
|
func (d *Device) GlobalMemCachelineSize() int { |
||||||
|
val, _ := d.getInfoUint(C.CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, true) |
||||||
|
return int(val) |
||||||
|
} |
||||||
|
|
||||||
|
// Maximum configured clock frequency of the device in MHz.
|
||||||
|
func (d *Device) MaxClockFrequency() int { |
||||||
|
val, _ := d.getInfoUint(C.CL_DEVICE_MAX_CLOCK_FREQUENCY, true) |
||||||
|
return int(val) |
||||||
|
} |
||||||
|
|
||||||
|
// The number of parallel compute units on the OpenCL device.
|
||||||
|
// A work-group executes on a single compute unit. The minimum value is 1.
|
||||||
|
func (d *Device) MaxComputeUnits() int { |
||||||
|
val, _ := d.getInfoUint(C.CL_DEVICE_MAX_COMPUTE_UNITS, true) |
||||||
|
return int(val) |
||||||
|
} |
||||||
|
|
||||||
|
// Max number of arguments declared with the __constant qualifier in a kernel.
|
||||||
|
// The minimum value is 8 for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
|
||||||
|
func (d *Device) MaxConstantArgs() int { |
||||||
|
val, _ := d.getInfoUint(C.CL_DEVICE_MAX_CONSTANT_ARGS, true) |
||||||
|
return int(val) |
||||||
|
} |
||||||
|
|
||||||
|
// Max number of simultaneous image objects that can be read by a kernel.
|
||||||
|
// The minimum value is 128 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE.
|
||||||
|
func (d *Device) MaxReadImageArgs() int { |
||||||
|
val, _ := d.getInfoUint(C.CL_DEVICE_MAX_READ_IMAGE_ARGS, true) |
||||||
|
return int(val) |
||||||
|
} |
||||||
|
|
||||||
|
// Maximum number of samplers that can be used in a kernel. The minimum
|
||||||
|
// value is 16 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE. (Also see sampler_t.)
|
||||||
|
func (d *Device) MaxSamplers() int { |
||||||
|
val, _ := d.getInfoUint(C.CL_DEVICE_MAX_SAMPLERS, true) |
||||||
|
return int(val) |
||||||
|
} |
||||||
|
|
||||||
|
// Maximum dimensions that specify the global and local work-item IDs used
|
||||||
|
// by the data parallel execution model. (Refer to clEnqueueNDRangeKernel).
|
||||||
|
// The minimum value is 3 for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
|
||||||
|
func (d *Device) MaxWorkItemDimensions() int { |
||||||
|
val, _ := d.getInfoUint(C.CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, true) |
||||||
|
return int(val) |
||||||
|
} |
||||||
|
|
||||||
|
// Max number of simultaneous image objects that can be written to by a
|
||||||
|
// kernel. The minimum value is 8 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE.
|
||||||
|
func (d *Device) MaxWriteImageArgs() int { |
||||||
|
val, _ := d.getInfoUint(C.CL_DEVICE_MAX_WRITE_IMAGE_ARGS, true) |
||||||
|
return int(val) |
||||||
|
} |
||||||
|
|
||||||
|
// The minimum value is the size (in bits) of the largest OpenCL built-in
|
||||||
|
// data type supported by the device (long16 in FULL profile, long16 or
|
||||||
|
// int16 in EMBEDDED profile) for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
|
||||||
|
func (d *Device) MemBaseAddrAlign() int { |
||||||
|
val, _ := d.getInfoUint(C.CL_DEVICE_MEM_BASE_ADDR_ALIGN, true) |
||||||
|
return int(val) |
||||||
|
} |
||||||
|
|
||||||
|
func (d *Device) NativeVectorWidthChar() int { |
||||||
|
val, _ := d.getInfoUint(C.CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR, true) |
||||||
|
return int(val) |
||||||
|
} |
||||||
|
|
||||||
|
func (d *Device) NativeVectorWidthShort() int { |
||||||
|
val, _ := d.getInfoUint(C.CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT, true) |
||||||
|
return int(val) |
||||||
|
} |
||||||
|
|
||||||
|
func (d *Device) NativeVectorWidthInt() int { |
||||||
|
val, _ := d.getInfoUint(C.CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, true) |
||||||
|
return int(val) |
||||||
|
} |
||||||
|
|
||||||
|
func (d *Device) NativeVectorWidthLong() int { |
||||||
|
val, _ := d.getInfoUint(C.CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, true) |
||||||
|
return int(val) |
||||||
|
} |
||||||
|
|
||||||
|
func (d *Device) NativeVectorWidthFloat() int { |
||||||
|
val, _ := d.getInfoUint(C.CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, true) |
||||||
|
return int(val) |
||||||
|
} |
||||||
|
|
||||||
|
func (d *Device) NativeVectorWidthDouble() int { |
||||||
|
val, _ := d.getInfoUint(C.CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, true) |
||||||
|
return int(val) |
||||||
|
} |
||||||
|
|
||||||
|
func (d *Device) NativeVectorWidthHalf() int { |
||||||
|
val, _ := d.getInfoUint(C.CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF, true) |
||||||
|
return int(val) |
||||||
|
} |
||||||
|
|
||||||
|
// Max height of 2D image in pixels. The minimum value is 8192
|
||||||
|
// if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE.
|
||||||
|
func (d *Device) Image2DMaxHeight() int { |
||||||
|
val, _ := d.getInfoSize(C.CL_DEVICE_IMAGE2D_MAX_HEIGHT, true) |
||||||
|
return int(val) |
||||||
|
} |
||||||
|
|
||||||
|
// Max width of 2D image or 1D image not created from a buffer object in
|
||||||
|
// pixels. The minimum value is 8192 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE.
|
||||||
|
func (d *Device) Image2DMaxWidth() int { |
||||||
|
val, _ := d.getInfoSize(C.CL_DEVICE_IMAGE2D_MAX_WIDTH, true) |
||||||
|
return int(val) |
||||||
|
} |
||||||
|
|
||||||
|
// Max depth of 3D image in pixels. The minimum value is 2048 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE.
|
||||||
|
func (d *Device) Image3DMaxDepth() int { |
||||||
|
val, _ := d.getInfoSize(C.CL_DEVICE_IMAGE3D_MAX_DEPTH, true) |
||||||
|
return int(val) |
||||||
|
} |
||||||
|
|
||||||
|
// Max height of 3D image in pixels. The minimum value is 2048 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE.
|
||||||
|
func (d *Device) Image3DMaxHeight() int { |
||||||
|
val, _ := d.getInfoSize(C.CL_DEVICE_IMAGE3D_MAX_HEIGHT, true) |
||||||
|
return int(val) |
||||||
|
} |
||||||
|
|
||||||
|
// Max width of 3D image in pixels. The minimum value is 2048 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE.
|
||||||
|
func (d *Device) Image3DMaxWidth() int { |
||||||
|
val, _ := d.getInfoSize(C.CL_DEVICE_IMAGE3D_MAX_WIDTH, true) |
||||||
|
return int(val) |
||||||
|
} |
||||||
|
|
||||||
|
// Max size in bytes of the arguments that can be passed to a kernel. The
|
||||||
|
// minimum value is 1024 for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
|
||||||
|
// For this minimum value, only a maximum of 128 arguments can be passed to a kernel.
|
||||||
|
func (d *Device) MaxParameterSize() int { |
||||||
|
val, _ := d.getInfoSize(C.CL_DEVICE_MAX_PARAMETER_SIZE, true) |
||||||
|
return int(val) |
||||||
|
} |
||||||
|
|
||||||
|
// Maximum number of work-items in a work-group executing a kernel on a
|
||||||
|
// single compute unit, using the data parallel execution model. (Refer
|
||||||
|
// to clEnqueueNDRangeKernel). The minimum value is 1.
|
||||||
|
func (d *Device) MaxWorkGroupSize() int { |
||||||
|
val, _ := d.getInfoSize(C.CL_DEVICE_MAX_WORK_GROUP_SIZE, true) |
||||||
|
return int(val) |
||||||
|
} |
||||||
|
|
||||||
|
// Describes the resolution of device timer. This is measured in nanoseconds.
|
||||||
|
func (d *Device) ProfilingTimerResolution() int { |
||||||
|
val, _ := d.getInfoSize(C.CL_DEVICE_PROFILING_TIMER_RESOLUTION, true) |
||||||
|
return int(val) |
||||||
|
} |
||||||
|
|
||||||
|
// Size of local memory arena in bytes. The minimum value is 32 KB for
|
||||||
|
// devices that are not of type CL_DEVICE_TYPE_CUSTOM.
|
||||||
|
func (d *Device) LocalMemSize() int64 { |
||||||
|
val, _ := d.getInfoUlong(C.CL_DEVICE_LOCAL_MEM_SIZE, true) |
||||||
|
return val |
||||||
|
} |
||||||
|
|
||||||
|
// Max size in bytes of a constant buffer allocation. The minimum value is
|
||||||
|
// 64 KB for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
|
||||||
|
func (d *Device) MaxConstantBufferSize() int64 { |
||||||
|
val, _ := d.getInfoUlong(C.CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, true) |
||||||
|
return val |
||||||
|
} |
||||||
|
|
||||||
|
// Max size of memory object allocation in bytes. The minimum value is max
|
||||||
|
// (1/4th of CL_DEVICE_GLOBAL_MEM_SIZE, 128*1024*1024) for devices that are
|
||||||
|
// not of type CL_DEVICE_TYPE_CUSTOM.
|
||||||
|
func (d *Device) MaxMemAllocSize() int64 { |
||||||
|
val, _ := d.getInfoUlong(C.CL_DEVICE_MAX_MEM_ALLOC_SIZE, true) |
||||||
|
return val |
||||||
|
} |
||||||
|
|
||||||
|
// Size of global device memory in bytes.
|
||||||
|
func (d *Device) GlobalMemSize() int64 { |
||||||
|
val, _ := d.getInfoUlong(C.CL_DEVICE_GLOBAL_MEM_SIZE, true) |
||||||
|
return val |
||||||
|
} |
||||||
|
|
||||||
|
func (d *Device) Available() bool { |
||||||
|
val, _ := d.getInfoBool(C.CL_DEVICE_AVAILABLE, true) |
||||||
|
return val |
||||||
|
} |
||||||
|
|
||||||
|
func (d *Device) CompilerAvailable() bool { |
||||||
|
val, _ := d.getInfoBool(C.CL_DEVICE_COMPILER_AVAILABLE, true) |
||||||
|
return val |
||||||
|
} |
||||||
|
|
||||||
|
func (d *Device) EndianLittle() bool { |
||||||
|
val, _ := d.getInfoBool(C.CL_DEVICE_ENDIAN_LITTLE, true) |
||||||
|
return val |
||||||
|
} |
||||||
|
|
||||||
|
// Is CL_TRUE if the device implements error correction for all
|
||||||
|
// accesses to compute device memory (global and constant). Is
|
||||||
|
// CL_FALSE if the device does not implement such error correction.
|
||||||
|
func (d *Device) ErrorCorrectionSupport() bool { |
||||||
|
val, _ := d.getInfoBool(C.CL_DEVICE_ERROR_CORRECTION_SUPPORT, true) |
||||||
|
return val |
||||||
|
} |
||||||
|
|
||||||
|
func (d *Device) HostUnifiedMemory() bool { |
||||||
|
val, _ := d.getInfoBool(C.CL_DEVICE_HOST_UNIFIED_MEMORY, true) |
||||||
|
return val |
||||||
|
} |
||||||
|
|
||||||
|
func (d *Device) ImageSupport() bool { |
||||||
|
val, _ := d.getInfoBool(C.CL_DEVICE_IMAGE_SUPPORT, true) |
||||||
|
return val |
||||||
|
} |
||||||
|
|
||||||
|
func (d *Device) Type() DeviceType { |
||||||
|
var deviceType C.cl_device_type |
||||||
|
if err := C.clGetDeviceInfo(d.id, C.CL_DEVICE_TYPE, C.size_t(unsafe.Sizeof(deviceType)), unsafe.Pointer(&deviceType), nil); err != C.CL_SUCCESS { |
||||||
|
panic("Failed to get device type") |
||||||
|
} |
||||||
|
return DeviceType(deviceType) |
||||||
|
} |
||||||
|
|
||||||
|
// Describes double precision floating-point capability of the OpenCL device
|
||||||
|
func (d *Device) DoubleFPConfig() FPConfig { |
||||||
|
var fpConfig C.cl_device_fp_config |
||||||
|
if err := C.clGetDeviceInfo(d.id, C.CL_DEVICE_DOUBLE_FP_CONFIG, C.size_t(unsafe.Sizeof(fpConfig)), unsafe.Pointer(&fpConfig), nil); err != C.CL_SUCCESS { |
||||||
|
panic("Failed to get double FP config") |
||||||
|
} |
||||||
|
return FPConfig(fpConfig) |
||||||
|
} |
||||||
|
|
||||||
|
// Describes the OPTIONAL half precision floating-point capability of the OpenCL device
|
||||||
|
func (d *Device) HalfFPConfig() FPConfig { |
||||||
|
var fpConfig C.cl_device_fp_config |
||||||
|
err := C.clGetDeviceInfo(d.id, C.CL_DEVICE_HALF_FP_CONFIG, C.size_t(unsafe.Sizeof(fpConfig)), unsafe.Pointer(&fpConfig), nil) |
||||||
|
if err != C.CL_SUCCESS { |
||||||
|
return FPConfig(0) |
||||||
|
} |
||||||
|
return FPConfig(fpConfig) |
||||||
|
} |
||||||
|
|
||||||
|
// Type of local memory supported. This can be set to CL_LOCAL implying dedicated
|
||||||
|
// local memory storage such as SRAM, or CL_GLOBAL. For custom devices, CL_NONE
|
||||||
|
// can also be returned indicating no local memory support.
|
||||||
|
func (d *Device) LocalMemType() LocalMemType { |
||||||
|
var memType C.cl_device_local_mem_type |
||||||
|
if err := C.clGetDeviceInfo(d.id, C.CL_DEVICE_LOCAL_MEM_TYPE, C.size_t(unsafe.Sizeof(memType)), unsafe.Pointer(&memType), nil); err != C.CL_SUCCESS { |
||||||
|
return LocalMemType(C.CL_NONE) |
||||||
|
} |
||||||
|
return LocalMemType(memType) |
||||||
|
} |
||||||
|
|
||||||
|
// Describes the execution capabilities of the device. The mandated minimum capability is CL_EXEC_KERNEL.
|
||||||
|
func (d *Device) ExecutionCapabilities() ExecCapability { |
||||||
|
var execCap C.cl_device_exec_capabilities |
||||||
|
if err := C.clGetDeviceInfo(d.id, C.CL_DEVICE_EXECUTION_CAPABILITIES, C.size_t(unsafe.Sizeof(execCap)), unsafe.Pointer(&execCap), nil); err != C.CL_SUCCESS { |
||||||
|
panic("Failed to get execution capabilities") |
||||||
|
} |
||||||
|
return ExecCapability(execCap) |
||||||
|
} |
||||||
|
|
||||||
|
func (d *Device) GlobalMemCacheType() MemCacheType { |
||||||
|
var memType C.cl_device_mem_cache_type |
||||||
|
if err := C.clGetDeviceInfo(d.id, C.CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, C.size_t(unsafe.Sizeof(memType)), unsafe.Pointer(&memType), nil); err != C.CL_SUCCESS { |
||||||
|
return MemCacheType(C.CL_NONE) |
||||||
|
} |
||||||
|
return MemCacheType(memType) |
||||||
|
} |
||||||
|
|
||||||
|
// Maximum number of work-items that can be specified in each dimension of the work-group to clEnqueueNDRangeKernel.
|
||||||
|
//
|
||||||
|
// Returns n size_t entries, where n is the value returned by the query for CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS.
|
||||||
|
//
|
||||||
|
// The minimum value is (1, 1, 1) for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
|
||||||
|
func (d *Device) MaxWorkItemSizes() []int { |
||||||
|
dims := d.MaxWorkItemDimensions() |
||||||
|
sizes := make([]C.size_t, dims) |
||||||
|
if err := C.clGetDeviceInfo(d.id, C.CL_DEVICE_MAX_WORK_ITEM_SIZES, C.size_t(int(unsafe.Sizeof(sizes[0]))*dims), unsafe.Pointer(&sizes[0]), nil); err != C.CL_SUCCESS { |
||||||
|
panic("Failed to get max work item sizes") |
||||||
|
} |
||||||
|
intSizes := make([]int, dims) |
||||||
|
for i, s := range sizes { |
||||||
|
intSizes[i] = int(s) |
||||||
|
} |
||||||
|
return intSizes |
||||||
|
} |
@ -0,0 +1,51 @@ |
|||||||
|
// +build cl12
|
||||||
|
|
||||||
|
package cl |
||||||
|
|
||||||
|
// #ifdef __APPLE__
|
||||||
|
// #include "OpenCL/opencl.h"
|
||||||
|
// #else
|
||||||
|
// #include "cl.h"
|
||||||
|
// #endif
|
||||||
|
import "C" |
||||||
|
import "unsafe" |
||||||
|
|
||||||
|
const FPConfigCorrectlyRoundedDivideSqrt FPConfig = C.CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT |
||||||
|
|
||||||
|
func init() { |
||||||
|
fpConfigNameMap[FPConfigCorrectlyRoundedDivideSqrt] = "CorrectlyRoundedDivideSqrt" |
||||||
|
} |
||||||
|
|
||||||
|
func (d *Device) BuiltInKernels() string { |
||||||
|
str, _ := d.getInfoString(C.CL_DEVICE_BUILT_IN_KERNELS, true) |
||||||
|
return str |
||||||
|
} |
||||||
|
|
||||||
|
// Is CL_FALSE if the implementation does not have a linker available. Is CL_TRUE if the linker is available. This can be CL_FALSE for the embedded platform profile only. This must be CL_TRUE if CL_DEVICE_COMPILER_AVAILABLE is CL_TRUE
|
||||||
|
func (d *Device) LinkerAvailable() bool { |
||||||
|
val, _ := d.getInfoBool(C.CL_DEVICE_LINKER_AVAILABLE, true) |
||||||
|
return val |
||||||
|
} |
||||||
|
|
||||||
|
func (d *Device) ParentDevice() *Device { |
||||||
|
var deviceId C.cl_device_id |
||||||
|
if err := C.clGetDeviceInfo(d.id, C.CL_DEVICE_PARENT_DEVICE, C.size_t(unsafe.Sizeof(deviceId)), unsafe.Pointer(&deviceId), nil); err != C.CL_SUCCESS { |
||||||
|
panic("ParentDevice failed") |
||||||
|
} |
||||||
|
if deviceId == nil { |
||||||
|
return nil |
||||||
|
} |
||||||
|
return &Device{id: deviceId} |
||||||
|
} |
||||||
|
|
||||||
|
// Max number of pixels for a 1D image created from a buffer object. The minimum value is 65536 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE.
|
||||||
|
func (d *Device) ImageMaxBufferSize() int { |
||||||
|
val, _ := d.getInfoSize(C.CL_DEVICE_IMAGE_MAX_BUFFER_SIZE, true) |
||||||
|
return int(val) |
||||||
|
} |
||||||
|
|
||||||
|
// Max number of images in a 1D or 2D image array. The minimum value is 2048 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE
|
||||||
|
func (d *Device) ImageMaxArraySize() int { |
||||||
|
val, _ := d.getInfoSize(C.CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, true) |
||||||
|
return int(val) |
||||||
|
} |
1210
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/cl.h
generated
vendored
1210
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/cl.h
generated
vendored
File diff suppressed because it is too large
Load Diff
315
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/cl_ext.h
generated
vendored
315
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/cl_ext.h
generated
vendored
@ -0,0 +1,315 @@ |
|||||||
|
/*******************************************************************************
|
||||||
|
* Copyright (c) 2008-2013 The Khronos Group Inc. |
||||||
|
* |
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a |
||||||
|
* copy of this software and/or associated documentation files (the |
||||||
|
* "Materials"), to deal in the Materials without restriction, including |
||||||
|
* without limitation the rights to use, copy, modify, merge, publish, |
||||||
|
* distribute, sublicense, and/or sell copies of the Materials, and to |
||||||
|
* permit persons to whom the Materials are furnished to do so, subject to |
||||||
|
* the following conditions: |
||||||
|
* |
||||||
|
* The above copyright notice and this permission notice shall be included |
||||||
|
* in all copies or substantial portions of the Materials. |
||||||
|
* |
||||||
|
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
||||||
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY |
||||||
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
||||||
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
||||||
|
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. |
||||||
|
******************************************************************************/ |
||||||
|
|
||||||
|
/* $Revision: 11928 $ on $Date: 2010-07-13 09:04:56 -0700 (Tue, 13 Jul 2010) $ */ |
||||||
|
|
||||||
|
/* cl_ext.h contains OpenCL extensions which don't have external */ |
||||||
|
/* (OpenGL, D3D) dependencies. */ |
||||||
|
|
||||||
|
#ifndef __CL_EXT_H |
||||||
|
#define __CL_EXT_H |
||||||
|
|
||||||
|
#ifdef __cplusplus |
||||||
|
extern "C" { |
||||||
|
#endif |
||||||
|
|
||||||
|
#ifdef __APPLE__ |
||||||
|
#include <AvailabilityMacros.h> |
||||||
|
#endif |
||||||
|
|
||||||
|
#include <cl.h> |
||||||
|
|
||||||
|
/* cl_khr_fp16 extension - no extension #define since it has no functions */ |
||||||
|
#define CL_DEVICE_HALF_FP_CONFIG 0x1033 |
||||||
|
|
||||||
|
/* Memory object destruction
|
||||||
|
* |
||||||
|
* Apple extension for use to manage externally allocated buffers used with cl_mem objects with CL_MEM_USE_HOST_PTR |
||||||
|
* |
||||||
|
* Registers a user callback function that will be called when the memory object is deleted and its resources
|
||||||
|
* freed. Each call to clSetMemObjectCallbackFn registers the specified user callback function on a callback
|
||||||
|
* stack associated with memobj. The registered user callback functions are called in the reverse order in
|
||||||
|
* which they were registered. The user callback functions are called and then the memory object is deleted
|
||||||
|
* and its resources freed. This provides a mechanism for the application (and libraries) using memobj to be
|
||||||
|
* notified when the memory referenced by host_ptr, specified when the memory object is created and used as
|
||||||
|
* the storage bits for the memory object, can be reused or freed. |
||||||
|
* |
||||||
|
* The application may not call CL api's with the cl_mem object passed to the pfn_notify. |
||||||
|
* |
||||||
|
* Please check for the "cl_APPLE_SetMemObjectDestructor" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS) |
||||||
|
* before using. |
||||||
|
*/ |
||||||
|
#define cl_APPLE_SetMemObjectDestructor 1 |
||||||
|
cl_int CL_API_ENTRY clSetMemObjectDestructorAPPLE( cl_mem /* memobj */,
|
||||||
|
void (* /*pfn_notify*/)( cl_mem /* memobj */, void* /*user_data*/),
|
||||||
|
void * /*user_data */ ) CL_EXT_SUFFIX__VERSION_1_0;
|
||||||
|
|
||||||
|
|
||||||
|
/* Context Logging Functions
|
||||||
|
* |
||||||
|
* The next three convenience functions are intended to be used as the pfn_notify parameter to clCreateContext(). |
||||||
|
* Please check for the "cl_APPLE_ContextLoggingFunctions" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS) |
||||||
|
* before using. |
||||||
|
* |
||||||
|
* clLogMessagesToSystemLog fowards on all log messages to the Apple System Logger
|
||||||
|
*/ |
||||||
|
#define cl_APPLE_ContextLoggingFunctions 1 |
||||||
|
extern void CL_API_ENTRY clLogMessagesToSystemLogAPPLE( const char * /* errstr */,
|
||||||
|
const void * /* private_info */,
|
||||||
|
size_t /* cb */,
|
||||||
|
void * /* user_data */ ) CL_EXT_SUFFIX__VERSION_1_0; |
||||||
|
|
||||||
|
/* clLogMessagesToStdout sends all log messages to the file descriptor stdout */ |
||||||
|
extern void CL_API_ENTRY clLogMessagesToStdoutAPPLE( const char * /* errstr */,
|
||||||
|
const void * /* private_info */,
|
||||||
|
size_t /* cb */,
|
||||||
|
void * /* user_data */ ) CL_EXT_SUFFIX__VERSION_1_0; |
||||||
|
|
||||||
|
/* clLogMessagesToStderr sends all log messages to the file descriptor stderr */ |
||||||
|
extern void CL_API_ENTRY clLogMessagesToStderrAPPLE( const char * /* errstr */,
|
||||||
|
const void * /* private_info */,
|
||||||
|
size_t /* cb */,
|
||||||
|
void * /* user_data */ ) CL_EXT_SUFFIX__VERSION_1_0; |
||||||
|
|
||||||
|
|
||||||
|
/************************
|
||||||
|
* cl_khr_icd extension *
|
||||||
|
************************/ |
||||||
|
#define cl_khr_icd 1 |
||||||
|
|
||||||
|
/* cl_platform_info */ |
||||||
|
#define CL_PLATFORM_ICD_SUFFIX_KHR 0x0920 |
||||||
|
|
||||||
|
/* Additional Error Codes */ |
||||||
|
#define CL_PLATFORM_NOT_FOUND_KHR -1001 |
||||||
|
|
||||||
|
extern CL_API_ENTRY cl_int CL_API_CALL |
||||||
|
clIcdGetPlatformIDsKHR(cl_uint /* num_entries */, |
||||||
|
cl_platform_id * /* platforms */, |
||||||
|
cl_uint * /* num_platforms */); |
||||||
|
|
||||||
|
typedef CL_API_ENTRY cl_int (CL_API_CALL *clIcdGetPlatformIDsKHR_fn)( |
||||||
|
cl_uint /* num_entries */, |
||||||
|
cl_platform_id * /* platforms */, |
||||||
|
cl_uint * /* num_platforms */); |
||||||
|
|
||||||
|
|
||||||
|
/* Extension: cl_khr_image2D_buffer
|
||||||
|
* |
||||||
|
* This extension allows a 2D image to be created from a cl_mem buffer without a copy. |
||||||
|
* The type associated with a 2D image created from a buffer in an OpenCL program is image2d_t. |
||||||
|
* Both the sampler and sampler-less read_image built-in functions are supported for 2D images |
||||||
|
* and 2D images created from a buffer. Similarly, the write_image built-ins are also supported |
||||||
|
* for 2D images created from a buffer. |
||||||
|
* |
||||||
|
* When the 2D image from buffer is created, the client must specify the width, |
||||||
|
* height, image format (i.e. channel order and channel data type) and optionally the row pitch |
||||||
|
* |
||||||
|
* The pitch specified must be a multiple of CL_DEVICE_IMAGE_PITCH_ALIGNMENT pixels. |
||||||
|
* The base address of the buffer must be aligned to CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT pixels. |
||||||
|
*/ |
||||||
|
|
||||||
|
/*************************************
|
||||||
|
* cl_khr_initalize_memory extension * |
||||||
|
*************************************/ |
||||||
|
|
||||||
|
#define CL_CONTEXT_MEMORY_INITIALIZE_KHR 0x200E |
||||||
|
|
||||||
|
|
||||||
|
/**************************************
|
||||||
|
* cl_khr_terminate_context extension * |
||||||
|
**************************************/ |
||||||
|
|
||||||
|
#define CL_DEVICE_TERMINATE_CAPABILITY_KHR 0x200F |
||||||
|
#define CL_CONTEXT_TERMINATE_KHR 0x2010 |
||||||
|
|
||||||
|
#define cl_khr_terminate_context 1 |
||||||
|
extern CL_API_ENTRY cl_int CL_API_CALL clTerminateContextKHR(cl_context /* context */) CL_EXT_SUFFIX__VERSION_1_2; |
||||||
|
|
||||||
|
typedef CL_API_ENTRY cl_int (CL_API_CALL *clTerminateContextKHR_fn)(cl_context /* context */) CL_EXT_SUFFIX__VERSION_1_2; |
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Extension: cl_khr_spir |
||||||
|
* |
||||||
|
* This extension adds support to create an OpenCL program object from a
|
||||||
|
* Standard Portable Intermediate Representation (SPIR) instance |
||||||
|
*/ |
||||||
|
|
||||||
|
#define CL_DEVICE_SPIR_VERSIONS 0x40E0 |
||||||
|
#define CL_PROGRAM_BINARY_TYPE_INTERMEDIATE 0x40E1 |
||||||
|
|
||||||
|
|
||||||
|
/******************************************
|
||||||
|
* cl_nv_device_attribute_query extension * |
||||||
|
******************************************/ |
||||||
|
/* cl_nv_device_attribute_query extension - no extension #define since it has no functions */ |
||||||
|
#define CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV 0x4000 |
||||||
|
#define CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV 0x4001 |
||||||
|
#define CL_DEVICE_REGISTERS_PER_BLOCK_NV 0x4002 |
||||||
|
#define CL_DEVICE_WARP_SIZE_NV 0x4003 |
||||||
|
#define CL_DEVICE_GPU_OVERLAP_NV 0x4004 |
||||||
|
#define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV 0x4005 |
||||||
|
#define CL_DEVICE_INTEGRATED_MEMORY_NV 0x4006 |
||||||
|
|
||||||
|
/*********************************
|
||||||
|
* cl_amd_device_attribute_query * |
||||||
|
*********************************/ |
||||||
|
#define CL_DEVICE_PROFILING_TIMER_OFFSET_AMD 0x4036 |
||||||
|
|
||||||
|
/*********************************
|
||||||
|
* cl_arm_printf extension |
||||||
|
*********************************/ |
||||||
|
#define CL_PRINTF_CALLBACK_ARM 0x40B0 |
||||||
|
#define CL_PRINTF_BUFFERSIZE_ARM 0x40B1 |
||||||
|
|
||||||
|
#ifdef CL_VERSION_1_1 |
||||||
|
/***********************************
|
||||||
|
* cl_ext_device_fission extension * |
||||||
|
***********************************/ |
||||||
|
#define cl_ext_device_fission 1 |
||||||
|
|
||||||
|
extern CL_API_ENTRY cl_int CL_API_CALL |
||||||
|
clReleaseDeviceEXT( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1;
|
||||||
|
|
||||||
|
typedef CL_API_ENTRY cl_int
|
||||||
|
(CL_API_CALL *clReleaseDeviceEXT_fn)( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1; |
||||||
|
|
||||||
|
extern CL_API_ENTRY cl_int CL_API_CALL |
||||||
|
clRetainDeviceEXT( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1;
|
||||||
|
|
||||||
|
typedef CL_API_ENTRY cl_int
|
||||||
|
(CL_API_CALL *clRetainDeviceEXT_fn)( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1; |
||||||
|
|
||||||
|
typedef cl_ulong cl_device_partition_property_ext; |
||||||
|
extern CL_API_ENTRY cl_int CL_API_CALL |
||||||
|
clCreateSubDevicesEXT( cl_device_id /*in_device*/, |
||||||
|
const cl_device_partition_property_ext * /* properties */, |
||||||
|
cl_uint /*num_entries*/, |
||||||
|
cl_device_id * /*out_devices*/, |
||||||
|
cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1; |
||||||
|
|
||||||
|
typedef CL_API_ENTRY cl_int
|
||||||
|
( CL_API_CALL * clCreateSubDevicesEXT_fn)( cl_device_id /*in_device*/, |
||||||
|
const cl_device_partition_property_ext * /* properties */, |
||||||
|
cl_uint /*num_entries*/, |
||||||
|
cl_device_id * /*out_devices*/, |
||||||
|
cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1; |
||||||
|
|
||||||
|
/* cl_device_partition_property_ext */ |
||||||
|
#define CL_DEVICE_PARTITION_EQUALLY_EXT 0x4050 |
||||||
|
#define CL_DEVICE_PARTITION_BY_COUNTS_EXT 0x4051 |
||||||
|
#define CL_DEVICE_PARTITION_BY_NAMES_EXT 0x4052 |
||||||
|
#define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT 0x4053 |
||||||
|
|
||||||
|
/* clDeviceGetInfo selectors */ |
||||||
|
#define CL_DEVICE_PARENT_DEVICE_EXT 0x4054 |
||||||
|
#define CL_DEVICE_PARTITION_TYPES_EXT 0x4055 |
||||||
|
#define CL_DEVICE_AFFINITY_DOMAINS_EXT 0x4056 |
||||||
|
#define CL_DEVICE_REFERENCE_COUNT_EXT 0x4057 |
||||||
|
#define CL_DEVICE_PARTITION_STYLE_EXT 0x4058 |
||||||
|
|
||||||
|
/* error codes */ |
||||||
|
#define CL_DEVICE_PARTITION_FAILED_EXT -1057 |
||||||
|
#define CL_INVALID_PARTITION_COUNT_EXT -1058 |
||||||
|
#define CL_INVALID_PARTITION_NAME_EXT -1059 |
||||||
|
|
||||||
|
/* CL_AFFINITY_DOMAINs */ |
||||||
|
#define CL_AFFINITY_DOMAIN_L1_CACHE_EXT 0x1 |
||||||
|
#define CL_AFFINITY_DOMAIN_L2_CACHE_EXT 0x2 |
||||||
|
#define CL_AFFINITY_DOMAIN_L3_CACHE_EXT 0x3 |
||||||
|
#define CL_AFFINITY_DOMAIN_L4_CACHE_EXT 0x4 |
||||||
|
#define CL_AFFINITY_DOMAIN_NUMA_EXT 0x10 |
||||||
|
#define CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT 0x100 |
||||||
|
|
||||||
|
/* cl_device_partition_property_ext list terminators */ |
||||||
|
#define CL_PROPERTIES_LIST_END_EXT ((cl_device_partition_property_ext) 0) |
||||||
|
#define CL_PARTITION_BY_COUNTS_LIST_END_EXT ((cl_device_partition_property_ext) 0) |
||||||
|
#define CL_PARTITION_BY_NAMES_LIST_END_EXT ((cl_device_partition_property_ext) 0 - 1) |
||||||
|
|
||||||
|
/*********************************
|
||||||
|
* cl_qcom_ext_host_ptr extension |
||||||
|
*********************************/ |
||||||
|
|
||||||
|
#define CL_MEM_EXT_HOST_PTR_QCOM (1 << 29) |
||||||
|
|
||||||
|
#define CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM 0x40A0 |
||||||
|
#define CL_DEVICE_PAGE_SIZE_QCOM 0x40A1 |
||||||
|
#define CL_IMAGE_ROW_ALIGNMENT_QCOM 0x40A2 |
||||||
|
#define CL_IMAGE_SLICE_ALIGNMENT_QCOM 0x40A3 |
||||||
|
#define CL_MEM_HOST_UNCACHED_QCOM 0x40A4 |
||||||
|
#define CL_MEM_HOST_WRITEBACK_QCOM 0x40A5 |
||||||
|
#define CL_MEM_HOST_WRITETHROUGH_QCOM 0x40A6 |
||||||
|
#define CL_MEM_HOST_WRITE_COMBINING_QCOM 0x40A7 |
||||||
|
|
||||||
|
typedef cl_uint cl_image_pitch_info_qcom; |
||||||
|
|
||||||
|
extern CL_API_ENTRY cl_int CL_API_CALL |
||||||
|
clGetDeviceImageInfoQCOM(cl_device_id device, |
||||||
|
size_t image_width, |
||||||
|
size_t image_height, |
||||||
|
const cl_image_format *image_format, |
||||||
|
cl_image_pitch_info_qcom param_name, |
||||||
|
size_t param_value_size, |
||||||
|
void *param_value, |
||||||
|
size_t *param_value_size_ret); |
||||||
|
|
||||||
|
typedef struct _cl_mem_ext_host_ptr |
||||||
|
{ |
||||||
|
/* Type of external memory allocation. */ |
||||||
|
/* Legal values will be defined in layered extensions. */ |
||||||
|
cl_uint allocation_type; |
||||||
|
|
||||||
|
/* Host cache policy for this external memory allocation. */ |
||||||
|
cl_uint host_cache_policy; |
||||||
|
|
||||||
|
} cl_mem_ext_host_ptr; |
||||||
|
|
||||||
|
/*********************************
|
||||||
|
* cl_qcom_ion_host_ptr extension |
||||||
|
*********************************/ |
||||||
|
|
||||||
|
#define CL_MEM_ION_HOST_PTR_QCOM 0x40A8 |
||||||
|
|
||||||
|
typedef struct _cl_mem_ion_host_ptr |
||||||
|
{ |
||||||
|
/* Type of external memory allocation. */ |
||||||
|
/* Must be CL_MEM_ION_HOST_PTR_QCOM for ION allocations. */ |
||||||
|
cl_mem_ext_host_ptr ext_host_ptr; |
||||||
|
|
||||||
|
/* ION file descriptor */ |
||||||
|
int ion_filedesc; |
||||||
|
|
||||||
|
/* Host pointer to the ION allocated memory */ |
||||||
|
void* ion_hostptr; |
||||||
|
|
||||||
|
} cl_mem_ion_host_ptr; |
||||||
|
|
||||||
|
#endif /* CL_VERSION_1_1 */ |
||||||
|
|
||||||
|
#ifdef __cplusplus |
||||||
|
} |
||||||
|
#endif |
||||||
|
|
||||||
|
|
||||||
|
#endif /* __CL_EXT_H */ |
158
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/cl_gl.h
generated
vendored
158
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/cl_gl.h
generated
vendored
@ -0,0 +1,158 @@ |
|||||||
|
/**********************************************************************************
|
||||||
|
* Copyright (c) 2008 - 2012 The Khronos Group Inc. |
||||||
|
* |
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a |
||||||
|
* copy of this software and/or associated documentation files (the |
||||||
|
* "Materials"), to deal in the Materials without restriction, including |
||||||
|
* without limitation the rights to use, copy, modify, merge, publish, |
||||||
|
* distribute, sublicense, and/or sell copies of the Materials, and to |
||||||
|
* permit persons to whom the Materials are furnished to do so, subject to |
||||||
|
* the following conditions: |
||||||
|
* |
||||||
|
* The above copyright notice and this permission notice shall be included |
||||||
|
* in all copies or substantial portions of the Materials. |
||||||
|
* |
||||||
|
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
||||||
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY |
||||||
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
||||||
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
||||||
|
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. |
||||||
|
**********************************************************************************/ |
||||||
|
|
||||||
|
#ifndef __OPENCL_CL_GL_H |
||||||
|
#define __OPENCL_CL_GL_H |
||||||
|
|
||||||
|
#include <cl.h> |
||||||
|
|
||||||
|
#ifdef __cplusplus |
||||||
|
extern "C" { |
||||||
|
#endif |
||||||
|
|
||||||
|
typedef cl_uint cl_gl_object_type; |
||||||
|
typedef cl_uint cl_gl_texture_info; |
||||||
|
typedef cl_uint cl_gl_platform_info; |
||||||
|
typedef struct __GLsync *cl_GLsync; |
||||||
|
|
||||||
|
/* cl_gl_object_type = 0x2000 - 0x200F enum values are currently taken */ |
||||||
|
#define CL_GL_OBJECT_BUFFER 0x2000 |
||||||
|
#define CL_GL_OBJECT_TEXTURE2D 0x2001 |
||||||
|
#define CL_GL_OBJECT_TEXTURE3D 0x2002 |
||||||
|
#define CL_GL_OBJECT_RENDERBUFFER 0x2003 |
||||||
|
#define CL_GL_OBJECT_TEXTURE2D_ARRAY 0x200E |
||||||
|
#define CL_GL_OBJECT_TEXTURE1D 0x200F |
||||||
|
#define CL_GL_OBJECT_TEXTURE1D_ARRAY 0x2010 |
||||||
|
#define CL_GL_OBJECT_TEXTURE_BUFFER 0x2011 |
||||||
|
|
||||||
|
/* cl_gl_texture_info */ |
||||||
|
#define CL_GL_TEXTURE_TARGET 0x2004 |
||||||
|
#define CL_GL_MIPMAP_LEVEL 0x2005 |
||||||
|
#define CL_GL_NUM_SAMPLES 0x2012 |
||||||
|
|
||||||
|
|
||||||
|
extern CL_API_ENTRY cl_mem CL_API_CALL |
||||||
|
clCreateFromGLBuffer(cl_context /* context */, |
||||||
|
cl_mem_flags /* flags */, |
||||||
|
cl_GLuint /* bufobj */, |
||||||
|
int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; |
||||||
|
|
||||||
|
extern CL_API_ENTRY cl_mem CL_API_CALL |
||||||
|
clCreateFromGLTexture(cl_context /* context */, |
||||||
|
cl_mem_flags /* flags */, |
||||||
|
cl_GLenum /* target */, |
||||||
|
cl_GLint /* miplevel */, |
||||||
|
cl_GLuint /* texture */, |
||||||
|
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2; |
||||||
|
|
||||||
|
extern CL_API_ENTRY cl_mem CL_API_CALL |
||||||
|
clCreateFromGLRenderbuffer(cl_context /* context */, |
||||||
|
cl_mem_flags /* flags */, |
||||||
|
cl_GLuint /* renderbuffer */, |
||||||
|
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; |
||||||
|
|
||||||
|
extern CL_API_ENTRY cl_int CL_API_CALL |
||||||
|
clGetGLObjectInfo(cl_mem /* memobj */, |
||||||
|
cl_gl_object_type * /* gl_object_type */, |
||||||
|
cl_GLuint * /* gl_object_name */) CL_API_SUFFIX__VERSION_1_0; |
||||||
|
|
||||||
|
extern CL_API_ENTRY cl_int CL_API_CALL |
||||||
|
clGetGLTextureInfo(cl_mem /* memobj */, |
||||||
|
cl_gl_texture_info /* param_name */, |
||||||
|
size_t /* param_value_size */, |
||||||
|
void * /* param_value */, |
||||||
|
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; |
||||||
|
|
||||||
|
extern CL_API_ENTRY cl_int CL_API_CALL |
||||||
|
clEnqueueAcquireGLObjects(cl_command_queue /* command_queue */, |
||||||
|
cl_uint /* num_objects */, |
||||||
|
const cl_mem * /* mem_objects */, |
||||||
|
cl_uint /* num_events_in_wait_list */, |
||||||
|
const cl_event * /* event_wait_list */, |
||||||
|
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; |
||||||
|
|
||||||
|
extern CL_API_ENTRY cl_int CL_API_CALL |
||||||
|
clEnqueueReleaseGLObjects(cl_command_queue /* command_queue */, |
||||||
|
cl_uint /* num_objects */, |
||||||
|
const cl_mem * /* mem_objects */, |
||||||
|
cl_uint /* num_events_in_wait_list */, |
||||||
|
const cl_event * /* event_wait_list */, |
||||||
|
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; |
||||||
|
|
||||||
|
|
||||||
|
/* Deprecated OpenCL 1.1 APIs */ |
||||||
|
extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL |
||||||
|
clCreateFromGLTexture2D(cl_context /* context */, |
||||||
|
cl_mem_flags /* flags */, |
||||||
|
cl_GLenum /* target */, |
||||||
|
cl_GLint /* miplevel */, |
||||||
|
cl_GLuint /* texture */, |
||||||
|
cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; |
||||||
|
|
||||||
|
extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL |
||||||
|
clCreateFromGLTexture3D(cl_context /* context */, |
||||||
|
cl_mem_flags /* flags */, |
||||||
|
cl_GLenum /* target */, |
||||||
|
cl_GLint /* miplevel */, |
||||||
|
cl_GLuint /* texture */, |
||||||
|
cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; |
||||||
|
|
||||||
|
/* cl_khr_gl_sharing extension */ |
||||||
|
|
||||||
|
#define cl_khr_gl_sharing 1 |
||||||
|
|
||||||
|
typedef cl_uint cl_gl_context_info; |
||||||
|
|
||||||
|
/* Additional Error Codes */ |
||||||
|
#define CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR -1000 |
||||||
|
|
||||||
|
/* cl_gl_context_info */ |
||||||
|
#define CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR 0x2006 |
||||||
|
#define CL_DEVICES_FOR_GL_CONTEXT_KHR 0x2007 |
||||||
|
|
||||||
|
/* Additional cl_context_properties */ |
||||||
|
#define CL_GL_CONTEXT_KHR 0x2008 |
||||||
|
#define CL_EGL_DISPLAY_KHR 0x2009 |
||||||
|
#define CL_GLX_DISPLAY_KHR 0x200A |
||||||
|
#define CL_WGL_HDC_KHR 0x200B |
||||||
|
#define CL_CGL_SHAREGROUP_KHR 0x200C |
||||||
|
|
||||||
|
extern CL_API_ENTRY cl_int CL_API_CALL |
||||||
|
clGetGLContextInfoKHR(const cl_context_properties * /* properties */, |
||||||
|
cl_gl_context_info /* param_name */, |
||||||
|
size_t /* param_value_size */, |
||||||
|
void * /* param_value */, |
||||||
|
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; |
||||||
|
|
||||||
|
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)( |
||||||
|
const cl_context_properties * properties, |
||||||
|
cl_gl_context_info param_name, |
||||||
|
size_t param_value_size, |
||||||
|
void * param_value, |
||||||
|
size_t * param_value_size_ret); |
||||||
|
|
||||||
|
#ifdef __cplusplus |
||||||
|
} |
||||||
|
#endif |
||||||
|
|
||||||
|
#endif /* __OPENCL_CL_GL_H */ |
65
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/cl_gl_ext.h
generated
vendored
65
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/cl_gl_ext.h
generated
vendored
@ -0,0 +1,65 @@ |
|||||||
|
/**********************************************************************************
|
||||||
|
* Copyright (c) 2008-2012 The Khronos Group Inc. |
||||||
|
* |
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a |
||||||
|
* copy of this software and/or associated documentation files (the |
||||||
|
* "Materials"), to deal in the Materials without restriction, including |
||||||
|
* without limitation the rights to use, copy, modify, merge, publish, |
||||||
|
* distribute, sublicense, and/or sell copies of the Materials, and to |
||||||
|
* permit persons to whom the Materials are furnished to do so, subject to |
||||||
|
* the following conditions: |
||||||
|
* |
||||||
|
* The above copyright notice and this permission notice shall be included |
||||||
|
* in all copies or substantial portions of the Materials. |
||||||
|
* |
||||||
|
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
||||||
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY |
||||||
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
||||||
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
||||||
|
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. |
||||||
|
**********************************************************************************/ |
||||||
|
|
||||||
|
/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */ |
||||||
|
|
||||||
|
/* cl_gl_ext.h contains vendor (non-KHR) OpenCL extensions which have */ |
||||||
|
/* OpenGL dependencies. */ |
||||||
|
|
||||||
|
#ifndef __OPENCL_CL_GL_EXT_H |
||||||
|
#define __OPENCL_CL_GL_EXT_H |
||||||
|
|
||||||
|
#ifdef __cplusplus |
||||||
|
extern "C" { |
||||||
|
#endif |
||||||
|
|
||||||
|
#include <cl_gl.h> |
||||||
|
|
||||||
|
/*
|
||||||
|
* For each extension, follow this template |
||||||
|
* cl_VEN_extname extension */ |
||||||
|
/* #define cl_VEN_extname 1
|
||||||
|
* ... define new types, if any |
||||||
|
* ... define new tokens, if any |
||||||
|
* ... define new APIs, if any |
||||||
|
* |
||||||
|
* If you need GLtypes here, mirror them with a cl_GLtype, rather than including a GL header |
||||||
|
* This allows us to avoid having to decide whether to include GL headers or GLES here. |
||||||
|
*/ |
||||||
|
|
||||||
|
/*
|
||||||
|
* cl_khr_gl_event extension |
||||||
|
* See section 9.9 in the OpenCL 1.1 spec for more information |
||||||
|
*/ |
||||||
|
#define CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR 0x200D |
||||||
|
|
||||||
|
extern CL_API_ENTRY cl_event CL_API_CALL |
||||||
|
clCreateEventFromGLsyncKHR(cl_context /* context */, |
||||||
|
cl_GLsync /* cl_GLsync */, |
||||||
|
cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1; |
||||||
|
|
||||||
|
#ifdef __cplusplus |
||||||
|
} |
||||||
|
#endif |
||||||
|
|
||||||
|
#endif /* __OPENCL_CL_GL_EXT_H */ |
1278
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/cl_platform.h
generated
vendored
1278
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/cl_platform.h
generated
vendored
File diff suppressed because it is too large
Load Diff
43
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/opencl.h
generated
vendored
43
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/opencl.h
generated
vendored
@ -0,0 +1,43 @@ |
|||||||
|
/*******************************************************************************
|
||||||
|
* Copyright (c) 2008-2012 The Khronos Group Inc. |
||||||
|
* |
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a |
||||||
|
* copy of this software and/or associated documentation files (the |
||||||
|
* "Materials"), to deal in the Materials without restriction, including |
||||||
|
* without limitation the rights to use, copy, modify, merge, publish, |
||||||
|
* distribute, sublicense, and/or sell copies of the Materials, and to |
||||||
|
* permit persons to whom the Materials are furnished to do so, subject to |
||||||
|
* the following conditions: |
||||||
|
* |
||||||
|
* The above copyright notice and this permission notice shall be included |
||||||
|
* in all copies or substantial portions of the Materials. |
||||||
|
* |
||||||
|
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||||||
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
||||||
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY |
||||||
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
||||||
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
||||||
|
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. |
||||||
|
******************************************************************************/ |
||||||
|
|
||||||
|
/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */ |
||||||
|
|
||||||
|
#ifndef __OPENCL_H |
||||||
|
#define __OPENCL_H |
||||||
|
|
||||||
|
#ifdef __cplusplus |
||||||
|
extern "C" { |
||||||
|
#endif |
||||||
|
|
||||||
|
#include <cl.h> |
||||||
|
#include <cl_gl.h> |
||||||
|
#include <cl_gl_ext.h> |
||||||
|
#include <cl_ext.h> |
||||||
|
|
||||||
|
#ifdef __cplusplus |
||||||
|
} |
||||||
|
#endif |
||||||
|
|
||||||
|
#endif /* __OPENCL_H */ |
||||||
|
|
@ -0,0 +1,83 @@ |
|||||||
|
// +build cl12
|
||||||
|
|
||||||
|
package cl |
||||||
|
|
||||||
|
// #ifdef __APPLE__
|
||||||
|
// #include "OpenCL/opencl.h"
|
||||||
|
// #else
|
||||||
|
// #include "cl.h"
|
||||||
|
// #endif
|
||||||
|
import "C" |
||||||
|
import ( |
||||||
|
"image" |
||||||
|
"unsafe" |
||||||
|
) |
||||||
|
|
||||||
|
func (ctx *Context) CreateImage(flags MemFlag, imageFormat ImageFormat, imageDesc ImageDescription, data []byte) (*MemObject, error) { |
||||||
|
format := imageFormat.toCl() |
||||||
|
desc := imageDesc.toCl() |
||||||
|
var dataPtr unsafe.Pointer |
||||||
|
if data != nil { |
||||||
|
dataPtr = unsafe.Pointer(&data[0]) |
||||||
|
} |
||||||
|
var err C.cl_int |
||||||
|
clBuffer := C.clCreateImage(ctx.clContext, C.cl_mem_flags(flags), &format, &desc, dataPtr, &err) |
||||||
|
if err != C.CL_SUCCESS { |
||||||
|
return nil, toError(err) |
||||||
|
} |
||||||
|
if clBuffer == nil { |
||||||
|
return nil, ErrUnknown |
||||||
|
} |
||||||
|
return newMemObject(clBuffer, len(data)), nil |
||||||
|
} |
||||||
|
|
||||||
|
func (ctx *Context) CreateImageSimple(flags MemFlag, width, height int, channelOrder ChannelOrder, channelDataType ChannelDataType, data []byte) (*MemObject, error) { |
||||||
|
format := ImageFormat{channelOrder, channelDataType} |
||||||
|
desc := ImageDescription{ |
||||||
|
Type: MemObjectTypeImage2D, |
||||||
|
Width: width, |
||||||
|
Height: height, |
||||||
|
} |
||||||
|
return ctx.CreateImage(flags, format, desc, data) |
||||||
|
} |
||||||
|
|
||||||
|
func (ctx *Context) CreateImageFromImage(flags MemFlag, img image.Image) (*MemObject, error) { |
||||||
|
switch m := img.(type) { |
||||||
|
case *image.Gray: |
||||||
|
format := ImageFormat{ChannelOrderIntensity, ChannelDataTypeUNormInt8} |
||||||
|
desc := ImageDescription{ |
||||||
|
Type: MemObjectTypeImage2D, |
||||||
|
Width: m.Bounds().Dx(), |
||||||
|
Height: m.Bounds().Dy(), |
||||||
|
RowPitch: m.Stride, |
||||||
|
} |
||||||
|
return ctx.CreateImage(flags, format, desc, m.Pix) |
||||||
|
case *image.RGBA: |
||||||
|
format := ImageFormat{ChannelOrderRGBA, ChannelDataTypeUNormInt8} |
||||||
|
desc := ImageDescription{ |
||||||
|
Type: MemObjectTypeImage2D, |
||||||
|
Width: m.Bounds().Dx(), |
||||||
|
Height: m.Bounds().Dy(), |
||||||
|
RowPitch: m.Stride, |
||||||
|
} |
||||||
|
return ctx.CreateImage(flags, format, desc, m.Pix) |
||||||
|
} |
||||||
|
|
||||||
|
b := img.Bounds() |
||||||
|
w := b.Dx() |
||||||
|
h := b.Dy() |
||||||
|
data := make([]byte, w*h*4) |
||||||
|
dataOffset := 0 |
||||||
|
for y := 0; y < h; y++ { |
||||||
|
for x := 0; x < w; x++ { |
||||||
|
c := img.At(x+b.Min.X, y+b.Min.Y) |
||||||
|
r, g, b, a := c.RGBA() |
||||||
|
data[dataOffset] = uint8(r >> 8) |
||||||
|
data[dataOffset+1] = uint8(g >> 8) |
||||||
|
data[dataOffset+2] = uint8(b >> 8) |
||||||
|
data[dataOffset+3] = uint8(a >> 8) |
||||||
|
dataOffset += 4 |
||||||
|
} |
||||||
|
} |
||||||
|
return ctx.CreateImageSimple(flags, w, h, ChannelOrderRGBA, ChannelDataTypeUNormInt8, data) |
||||||
|
} |
@ -0,0 +1,127 @@ |
|||||||
|
package cl |
||||||
|
|
||||||
|
// #ifdef __APPLE__
|
||||||
|
// #include "OpenCL/opencl.h"
|
||||||
|
// #else
|
||||||
|
// #include "cl.h"
|
||||||
|
// #endif
|
||||||
|
import "C" |
||||||
|
|
||||||
|
import ( |
||||||
|
"fmt" |
||||||
|
"unsafe" |
||||||
|
) |
||||||
|
|
||||||
|
type ErrUnsupportedArgumentType struct { |
||||||
|
Index int |
||||||
|
Value interface{} |
||||||
|
} |
||||||
|
|
||||||
|
func (e ErrUnsupportedArgumentType) Error() string { |
||||||
|
return fmt.Sprintf("cl: unsupported argument type for index %d: %+v", e.Index, e.Value) |
||||||
|
} |
||||||
|
|
||||||
|
type Kernel struct { |
||||||
|
clKernel C.cl_kernel |
||||||
|
name string |
||||||
|
} |
||||||
|
|
||||||
|
type LocalBuffer int |
||||||
|
|
||||||
|
func releaseKernel(k *Kernel) { |
||||||
|
if k.clKernel != nil { |
||||||
|
C.clReleaseKernel(k.clKernel) |
||||||
|
k.clKernel = nil |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func (k *Kernel) Release() { |
||||||
|
releaseKernel(k) |
||||||
|
} |
||||||
|
|
||||||
|
func (k *Kernel) SetArgs(args ...interface{}) error { |
||||||
|
for index, arg := range args { |
||||||
|
if err := k.SetArg(index, arg); err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
} |
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
func (k *Kernel) SetArg(index int, arg interface{}) error { |
||||||
|
switch val := arg.(type) { |
||||||
|
case uint8: |
||||||
|
return k.SetArgUint8(index, val) |
||||||
|
case int8: |
||||||
|
return k.SetArgInt8(index, val) |
||||||
|
case uint32: |
||||||
|
return k.SetArgUint32(index, val) |
||||||
|
case uint64: |
||||||
|
return k.SetArgUint64(index, val) |
||||||
|
case int32: |
||||||
|
return k.SetArgInt32(index, val) |
||||||
|
case float32: |
||||||
|
return k.SetArgFloat32(index, val) |
||||||
|
case *MemObject: |
||||||
|
return k.SetArgBuffer(index, val) |
||||||
|
case LocalBuffer: |
||||||
|
return k.SetArgLocal(index, int(val)) |
||||||
|
default: |
||||||
|
return ErrUnsupportedArgumentType{Index: index, Value: arg} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func (k *Kernel) SetArgBuffer(index int, buffer *MemObject) error { |
||||||
|
return k.SetArgUnsafe(index, int(unsafe.Sizeof(buffer.clMem)), unsafe.Pointer(&buffer.clMem)) |
||||||
|
} |
||||||
|
|
||||||
|
func (k *Kernel) SetArgFloat32(index int, val float32) error { |
||||||
|
return k.SetArgUnsafe(index, int(unsafe.Sizeof(val)), unsafe.Pointer(&val)) |
||||||
|
} |
||||||
|
|
||||||
|
func (k *Kernel) SetArgInt8(index int, val int8) error { |
||||||
|
return k.SetArgUnsafe(index, int(unsafe.Sizeof(val)), unsafe.Pointer(&val)) |
||||||
|
} |
||||||
|
|
||||||
|
func (k *Kernel) SetArgUint8(index int, val uint8) error { |
||||||
|
return k.SetArgUnsafe(index, int(unsafe.Sizeof(val)), unsafe.Pointer(&val)) |
||||||
|
} |
||||||
|
|
||||||
|
func (k *Kernel) SetArgInt32(index int, val int32) error { |
||||||
|
return k.SetArgUnsafe(index, int(unsafe.Sizeof(val)), unsafe.Pointer(&val)) |
||||||
|
} |
||||||
|
|
||||||
|
func (k *Kernel) SetArgUint32(index int, val uint32) error { |
||||||
|
return k.SetArgUnsafe(index, int(unsafe.Sizeof(val)), unsafe.Pointer(&val)) |
||||||
|
} |
||||||
|
|
||||||
|
func (k *Kernel) SetArgUint64(index int, val uint64) error { |
||||||
|
return k.SetArgUnsafe(index, int(unsafe.Sizeof(val)), unsafe.Pointer(&val)) |
||||||
|
} |
||||||
|
|
||||||
|
func (k *Kernel) SetArgLocal(index int, size int) error { |
||||||
|
return k.SetArgUnsafe(index, size, nil) |
||||||
|
} |
||||||
|
|
||||||
|
func (k *Kernel) SetArgUnsafe(index, argSize int, arg unsafe.Pointer) error { |
||||||
|
//fmt.Println("FUNKY: ", index, argSize)
|
||||||
|
return toError(C.clSetKernelArg(k.clKernel, C.cl_uint(index), C.size_t(argSize), arg)) |
||||||
|
} |
||||||
|
|
||||||
|
func (k *Kernel) PreferredWorkGroupSizeMultiple(device *Device) (int, error) { |
||||||
|
var size C.size_t |
||||||
|
err := C.clGetKernelWorkGroupInfo(k.clKernel, device.nullableId(), C.CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, C.size_t(unsafe.Sizeof(size)), unsafe.Pointer(&size), nil) |
||||||
|
return int(size), toError(err) |
||||||
|
} |
||||||
|
|
||||||
|
func (k *Kernel) WorkGroupSize(device *Device) (int, error) { |
||||||
|
var size C.size_t |
||||||
|
err := C.clGetKernelWorkGroupInfo(k.clKernel, device.nullableId(), C.CL_KERNEL_WORK_GROUP_SIZE, C.size_t(unsafe.Sizeof(size)), unsafe.Pointer(&size), nil) |
||||||
|
return int(size), toError(err) |
||||||
|
} |
||||||
|
|
||||||
|
func (k *Kernel) NumArgs() (int, error) { |
||||||
|
var num C.cl_uint |
||||||
|
err := C.clGetKernelInfo(k.clKernel, C.CL_KERNEL_NUM_ARGS, C.size_t(unsafe.Sizeof(num)), unsafe.Pointer(&num), nil) |
||||||
|
return int(num), toError(err) |
||||||
|
} |
@ -0,0 +1,7 @@ |
|||||||
|
// +build !cl12
|
||||||
|
|
||||||
|
package cl |
||||||
|
|
||||||
|
func (k *Kernel) ArgName(index int) (string, error) { |
||||||
|
return "", ErrUnsupported |
||||||
|
} |
@ -0,0 +1,20 @@ |
|||||||
|
// +build cl12
|
||||||
|
|
||||||
|
package cl |
||||||
|
|
||||||
|
// #ifdef __APPLE__
|
||||||
|
// #include "OpenCL/opencl.h"
|
||||||
|
// #else
|
||||||
|
// #include "cl.h"
|
||||||
|
// #endif
|
||||||
|
import "C" |
||||||
|
import "unsafe" |
||||||
|
|
||||||
|
func (k *Kernel) ArgName(index int) (string, error) { |
||||||
|
var strC [1024]byte |
||||||
|
var strN C.size_t |
||||||
|
if err := C.clGetKernelArgInfo(k.clKernel, C.cl_uint(index), C.CL_KERNEL_ARG_NAME, 1024, unsafe.Pointer(&strC[0]), &strN); err != C.CL_SUCCESS { |
||||||
|
return "", toError(err) |
||||||
|
} |
||||||
|
return string(strC[:strN]), nil |
||||||
|
} |
@ -0,0 +1,83 @@ |
|||||||
|
package cl |
||||||
|
|
||||||
|
// #ifdef __APPLE__
|
||||||
|
// #include "OpenCL/opencl.h"
|
||||||
|
// #else
|
||||||
|
// #include "cl.h"
|
||||||
|
// #endif
|
||||||
|
import "C" |
||||||
|
|
||||||
|
import "unsafe" |
||||||
|
|
||||||
|
const maxPlatforms = 32 |
||||||
|
|
||||||
|
type Platform struct { |
||||||
|
id C.cl_platform_id |
||||||
|
} |
||||||
|
|
||||||
|
// Obtain the list of platforms available.
|
||||||
|
func GetPlatforms() ([]*Platform, error) { |
||||||
|
var platformIds [maxPlatforms]C.cl_platform_id |
||||||
|
var nPlatforms C.cl_uint |
||||||
|
if err := C.clGetPlatformIDs(C.cl_uint(maxPlatforms), &platformIds[0], &nPlatforms); err != C.CL_SUCCESS { |
||||||
|
return nil, toError(err) |
||||||
|
} |
||||||
|
platforms := make([]*Platform, nPlatforms) |
||||||
|
for i := 0; i < int(nPlatforms); i++ { |
||||||
|
platforms[i] = &Platform{id: platformIds[i]} |
||||||
|
} |
||||||
|
return platforms, nil |
||||||
|
} |
||||||
|
|
||||||
|
func (p *Platform) GetDevices(deviceType DeviceType) ([]*Device, error) { |
||||||
|
return GetDevices(p, deviceType) |
||||||
|
} |
||||||
|
|
||||||
|
func (p *Platform) getInfoString(param C.cl_platform_info) (string, error) { |
||||||
|
var strC [2048]byte |
||||||
|
var strN C.size_t |
||||||
|
if err := C.clGetPlatformInfo(p.id, param, 2048, unsafe.Pointer(&strC[0]), &strN); err != C.CL_SUCCESS { |
||||||
|
return "", toError(err) |
||||||
|
} |
||||||
|
return string(strC[:(strN - 1)]), nil |
||||||
|
} |
||||||
|
|
||||||
|
func (p *Platform) Name() string { |
||||||
|
if str, err := p.getInfoString(C.CL_PLATFORM_NAME); err != nil { |
||||||
|
panic("Platform.Name() should never fail") |
||||||
|
} else { |
||||||
|
return str |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func (p *Platform) Vendor() string { |
||||||
|
if str, err := p.getInfoString(C.CL_PLATFORM_VENDOR); err != nil { |
||||||
|
panic("Platform.Vendor() should never fail") |
||||||
|
} else { |
||||||
|
return str |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func (p *Platform) Profile() string { |
||||||
|
if str, err := p.getInfoString(C.CL_PLATFORM_PROFILE); err != nil { |
||||||
|
panic("Platform.Profile() should never fail") |
||||||
|
} else { |
||||||
|
return str |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func (p *Platform) Version() string { |
||||||
|
if str, err := p.getInfoString(C.CL_PLATFORM_VERSION); err != nil { |
||||||
|
panic("Platform.Version() should never fail") |
||||||
|
} else { |
||||||
|
return str |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func (p *Platform) Extensions() string { |
||||||
|
if str, err := p.getInfoString(C.CL_PLATFORM_EXTENSIONS); err != nil { |
||||||
|
panic("Platform.Extensions() should never fail") |
||||||
|
} else { |
||||||
|
return str |
||||||
|
} |
||||||
|
} |
@ -0,0 +1,105 @@ |
|||||||
|
package cl |
||||||
|
|
||||||
|
// #include <stdlib.h>
|
||||||
|
// #ifdef __APPLE__
|
||||||
|
// #include "OpenCL/opencl.h"
|
||||||
|
// #else
|
||||||
|
// #include "cl.h"
|
||||||
|
// #endif
|
||||||
|
import "C" |
||||||
|
|
||||||
|
import ( |
||||||
|
"fmt" |
||||||
|
"runtime" |
||||||
|
"unsafe" |
||||||
|
) |
||||||
|
|
||||||
|
type BuildError struct { |
||||||
|
Message string |
||||||
|
Device *Device |
||||||
|
} |
||||||
|
|
||||||
|
func (e BuildError) Error() string { |
||||||
|
if e.Device != nil { |
||||||
|
return fmt.Sprintf("cl: build error on %q: %s", e.Device.Name(), e.Message) |
||||||
|
} else { |
||||||
|
return fmt.Sprintf("cl: build error: %s", e.Message) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
type Program struct { |
||||||
|
clProgram C.cl_program |
||||||
|
devices []*Device |
||||||
|
} |
||||||
|
|
||||||
|
func releaseProgram(p *Program) { |
||||||
|
if p.clProgram != nil { |
||||||
|
C.clReleaseProgram(p.clProgram) |
||||||
|
p.clProgram = nil |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func (p *Program) Release() { |
||||||
|
releaseProgram(p) |
||||||
|
} |
||||||
|
|
||||||
|
func (p *Program) BuildProgram(devices []*Device, options string) error { |
||||||
|
var cOptions *C.char |
||||||
|
if options != "" { |
||||||
|
cOptions = C.CString(options) |
||||||
|
defer C.free(unsafe.Pointer(cOptions)) |
||||||
|
} |
||||||
|
var deviceList []C.cl_device_id |
||||||
|
var deviceListPtr *C.cl_device_id |
||||||
|
numDevices := C.cl_uint(len(devices)) |
||||||
|
if devices != nil && len(devices) > 0 { |
||||||
|
deviceList = buildDeviceIdList(devices) |
||||||
|
deviceListPtr = &deviceList[0] |
||||||
|
} |
||||||
|
if err := C.clBuildProgram(p.clProgram, numDevices, deviceListPtr, cOptions, nil, nil); err != C.CL_SUCCESS { |
||||||
|
buffer := make([]byte, 4096) |
||||||
|
var bLen C.size_t |
||||||
|
var err C.cl_int |
||||||
|
|
||||||
|
for _, dev := range p.devices { |
||||||
|
for i := 2; i >= 0; i-- { |
||||||
|
err = C.clGetProgramBuildInfo(p.clProgram, dev.id, C.CL_PROGRAM_BUILD_LOG, C.size_t(len(buffer)), unsafe.Pointer(&buffer[0]), &bLen) |
||||||
|
if err == C.CL_INVALID_VALUE && i > 0 && bLen < 1024*1024 { |
||||||
|
// INVALID_VALUE probably means our buffer isn't large enough
|
||||||
|
buffer = make([]byte, bLen) |
||||||
|
} else { |
||||||
|
break |
||||||
|
} |
||||||
|
} |
||||||
|
if err != C.CL_SUCCESS { |
||||||
|
return toError(err) |
||||||
|
} |
||||||
|
|
||||||
|
if bLen > 1 { |
||||||
|
return BuildError{ |
||||||
|
Device: dev, |
||||||
|
Message: string(buffer[:bLen-1]), |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
return BuildError{ |
||||||
|
Device: nil, |
||||||
|
Message: "build failed and produced no log entries", |
||||||
|
} |
||||||
|
} |
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
func (p *Program) CreateKernel(name string) (*Kernel, error) { |
||||||
|
cName := C.CString(name) |
||||||
|
defer C.free(unsafe.Pointer(cName)) |
||||||
|
var err C.cl_int |
||||||
|
clKernel := C.clCreateKernel(p.clProgram, cName, &err) |
||||||
|
if err != C.CL_SUCCESS { |
||||||
|
return nil, toError(err) |
||||||
|
} |
||||||
|
kernel := &Kernel{clKernel: clKernel, name: name} |
||||||
|
runtime.SetFinalizer(kernel, releaseKernel) |
||||||
|
return kernel, nil |
||||||
|
} |
@ -0,0 +1,193 @@ |
|||||||
|
package cl |
||||||
|
|
||||||
|
// #ifdef __APPLE__
|
||||||
|
// #include "OpenCL/opencl.h"
|
||||||
|
// #else
|
||||||
|
// #include "cl.h"
|
||||||
|
// #endif
|
||||||
|
import "C" |
||||||
|
|
||||||
|
import "unsafe" |
||||||
|
|
||||||
|
type CommandQueueProperty int |
||||||
|
|
||||||
|
const ( |
||||||
|
CommandQueueOutOfOrderExecModeEnable CommandQueueProperty = C.CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE |
||||||
|
CommandQueueProfilingEnable CommandQueueProperty = C.CL_QUEUE_PROFILING_ENABLE |
||||||
|
) |
||||||
|
|
||||||
|
type CommandQueue struct { |
||||||
|
clQueue C.cl_command_queue |
||||||
|
device *Device |
||||||
|
} |
||||||
|
|
||||||
|
func releaseCommandQueue(q *CommandQueue) { |
||||||
|
if q.clQueue != nil { |
||||||
|
C.clReleaseCommandQueue(q.clQueue) |
||||||
|
q.clQueue = nil |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Call clReleaseCommandQueue on the CommandQueue. Using the CommandQueue after Release will cause a panick.
|
||||||
|
func (q *CommandQueue) Release() { |
||||||
|
releaseCommandQueue(q) |
||||||
|
} |
||||||
|
|
||||||
|
// Blocks until all previously queued OpenCL commands in a command-queue are issued to the associated device and have completed.
|
||||||
|
func (q *CommandQueue) Finish() error { |
||||||
|
return toError(C.clFinish(q.clQueue)) |
||||||
|
} |
||||||
|
|
||||||
|
// Issues all previously queued OpenCL commands in a command-queue to the device associated with the command-queue.
|
||||||
|
func (q *CommandQueue) Flush() error { |
||||||
|
return toError(C.clFlush(q.clQueue)) |
||||||
|
} |
||||||
|
|
||||||
|
// Enqueues a command to map a region of the buffer object given by buffer into the host address space and returns a pointer to this mapped region.
|
||||||
|
func (q *CommandQueue) EnqueueMapBuffer(buffer *MemObject, blocking bool, flags MapFlag, offset, size int, eventWaitList []*Event) (*MappedMemObject, *Event, error) { |
||||||
|
var event C.cl_event |
||||||
|
var err C.cl_int |
||||||
|
ptr := C.clEnqueueMapBuffer(q.clQueue, buffer.clMem, clBool(blocking), flags.toCl(), C.size_t(offset), C.size_t(size), C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event, &err) |
||||||
|
if err != C.CL_SUCCESS { |
||||||
|
return nil, nil, toError(err) |
||||||
|
} |
||||||
|
ev := newEvent(event) |
||||||
|
if ptr == nil { |
||||||
|
return nil, ev, ErrUnknown |
||||||
|
} |
||||||
|
return &MappedMemObject{ptr: ptr, size: size}, ev, nil |
||||||
|
} |
||||||
|
|
||||||
|
// Enqueues a command to map a region of an image object into the host address space and returns a pointer to this mapped region.
|
||||||
|
func (q *CommandQueue) EnqueueMapImage(buffer *MemObject, blocking bool, flags MapFlag, origin, region [3]int, eventWaitList []*Event) (*MappedMemObject, *Event, error) { |
||||||
|
cOrigin := sizeT3(origin) |
||||||
|
cRegion := sizeT3(region) |
||||||
|
var event C.cl_event |
||||||
|
var err C.cl_int |
||||||
|
var rowPitch, slicePitch C.size_t |
||||||
|
ptr := C.clEnqueueMapImage(q.clQueue, buffer.clMem, clBool(blocking), flags.toCl(), &cOrigin[0], &cRegion[0], &rowPitch, &slicePitch, C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event, &err) |
||||||
|
if err != C.CL_SUCCESS { |
||||||
|
return nil, nil, toError(err) |
||||||
|
} |
||||||
|
ev := newEvent(event) |
||||||
|
if ptr == nil { |
||||||
|
return nil, ev, ErrUnknown |
||||||
|
} |
||||||
|
size := 0 // TODO: could calculate this
|
||||||
|
return &MappedMemObject{ptr: ptr, size: size, rowPitch: int(rowPitch), slicePitch: int(slicePitch)}, ev, nil |
||||||
|
} |
||||||
|
|
||||||
|
// Enqueues a command to unmap a previously mapped region of a memory object.
|
||||||
|
func (q *CommandQueue) EnqueueUnmapMemObject(buffer *MemObject, mappedObj *MappedMemObject, eventWaitList []*Event) (*Event, error) { |
||||||
|
var event C.cl_event |
||||||
|
if err := C.clEnqueueUnmapMemObject(q.clQueue, buffer.clMem, mappedObj.ptr, C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event); err != C.CL_SUCCESS { |
||||||
|
return nil, toError(err) |
||||||
|
} |
||||||
|
return newEvent(event), nil |
||||||
|
} |
||||||
|
|
||||||
|
// Enqueues a command to copy a buffer object to another buffer object.
|
||||||
|
func (q *CommandQueue) EnqueueCopyBuffer(srcBuffer, dstBuffer *MemObject, srcOffset, dstOffset, byteCount int, eventWaitList []*Event) (*Event, error) { |
||||||
|
var event C.cl_event |
||||||
|
err := toError(C.clEnqueueCopyBuffer(q.clQueue, srcBuffer.clMem, dstBuffer.clMem, C.size_t(srcOffset), C.size_t(dstOffset), C.size_t(byteCount), C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event)) |
||||||
|
return newEvent(event), err |
||||||
|
} |
||||||
|
|
||||||
|
// Enqueue commands to write to a buffer object from host memory.
|
||||||
|
func (q *CommandQueue) EnqueueWriteBuffer(buffer *MemObject, blocking bool, offset, dataSize int, dataPtr unsafe.Pointer, eventWaitList []*Event) (*Event, error) { |
||||||
|
var event C.cl_event |
||||||
|
err := toError(C.clEnqueueWriteBuffer(q.clQueue, buffer.clMem, clBool(blocking), C.size_t(offset), C.size_t(dataSize), dataPtr, C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event)) |
||||||
|
return newEvent(event), err |
||||||
|
} |
||||||
|
|
||||||
|
func (q *CommandQueue) EnqueueWriteBufferFloat32(buffer *MemObject, blocking bool, offset int, data []float32, eventWaitList []*Event) (*Event, error) { |
||||||
|
dataPtr := unsafe.Pointer(&data[0]) |
||||||
|
dataSize := int(unsafe.Sizeof(data[0])) * len(data) |
||||||
|
return q.EnqueueWriteBuffer(buffer, blocking, offset, dataSize, dataPtr, eventWaitList) |
||||||
|
} |
||||||
|
|
||||||
|
// Enqueue commands to read from a buffer object to host memory.
|
||||||
|
func (q *CommandQueue) EnqueueReadBuffer(buffer *MemObject, blocking bool, offset, dataSize int, dataPtr unsafe.Pointer, eventWaitList []*Event) (*Event, error) { |
||||||
|
var event C.cl_event |
||||||
|
err := toError(C.clEnqueueReadBuffer(q.clQueue, buffer.clMem, clBool(blocking), C.size_t(offset), C.size_t(dataSize), dataPtr, C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event)) |
||||||
|
return newEvent(event), err |
||||||
|
} |
||||||
|
|
||||||
|
func (q *CommandQueue) EnqueueReadBufferFloat32(buffer *MemObject, blocking bool, offset int, data []float32, eventWaitList []*Event) (*Event, error) { |
||||||
|
dataPtr := unsafe.Pointer(&data[0]) |
||||||
|
dataSize := int(unsafe.Sizeof(data[0])) * len(data) |
||||||
|
return q.EnqueueReadBuffer(buffer, blocking, offset, dataSize, dataPtr, eventWaitList) |
||||||
|
} |
||||||
|
|
||||||
|
// Enqueues a command to execute a kernel on a device.
|
||||||
|
func (q *CommandQueue) EnqueueNDRangeKernel(kernel *Kernel, globalWorkOffset, globalWorkSize, localWorkSize []int, eventWaitList []*Event) (*Event, error) { |
||||||
|
workDim := len(globalWorkSize) |
||||||
|
var globalWorkOffsetList []C.size_t |
||||||
|
var globalWorkOffsetPtr *C.size_t |
||||||
|
if globalWorkOffset != nil { |
||||||
|
globalWorkOffsetList = make([]C.size_t, len(globalWorkOffset)) |
||||||
|
for i, off := range globalWorkOffset { |
||||||
|
globalWorkOffsetList[i] = C.size_t(off) |
||||||
|
} |
||||||
|
globalWorkOffsetPtr = &globalWorkOffsetList[0] |
||||||
|
} |
||||||
|
var globalWorkSizeList []C.size_t |
||||||
|
var globalWorkSizePtr *C.size_t |
||||||
|
if globalWorkSize != nil { |
||||||
|
globalWorkSizeList = make([]C.size_t, len(globalWorkSize)) |
||||||
|
for i, off := range globalWorkSize { |
||||||
|
globalWorkSizeList[i] = C.size_t(off) |
||||||
|
} |
||||||
|
globalWorkSizePtr = &globalWorkSizeList[0] |
||||||
|
} |
||||||
|
var localWorkSizeList []C.size_t |
||||||
|
var localWorkSizePtr *C.size_t |
||||||
|
if localWorkSize != nil { |
||||||
|
localWorkSizeList = make([]C.size_t, len(localWorkSize)) |
||||||
|
for i, off := range localWorkSize { |
||||||
|
localWorkSizeList[i] = C.size_t(off) |
||||||
|
} |
||||||
|
localWorkSizePtr = &localWorkSizeList[0] |
||||||
|
} |
||||||
|
var event C.cl_event |
||||||
|
err := toError(C.clEnqueueNDRangeKernel(q.clQueue, kernel.clKernel, C.cl_uint(workDim), globalWorkOffsetPtr, globalWorkSizePtr, localWorkSizePtr, C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event)) |
||||||
|
return newEvent(event), err |
||||||
|
} |
||||||
|
|
||||||
|
// Enqueues a command to read from a 2D or 3D image object to host memory.
|
||||||
|
func (q *CommandQueue) EnqueueReadImage(image *MemObject, blocking bool, origin, region [3]int, rowPitch, slicePitch int, data []byte, eventWaitList []*Event) (*Event, error) { |
||||||
|
cOrigin := sizeT3(origin) |
||||||
|
cRegion := sizeT3(region) |
||||||
|
var event C.cl_event |
||||||
|
err := toError(C.clEnqueueReadImage(q.clQueue, image.clMem, clBool(blocking), &cOrigin[0], &cRegion[0], C.size_t(rowPitch), C.size_t(slicePitch), unsafe.Pointer(&data[0]), C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event)) |
||||||
|
return newEvent(event), err |
||||||
|
} |
||||||
|
|
||||||
|
// Enqueues a command to write from a 2D or 3D image object to host memory.
|
||||||
|
func (q *CommandQueue) EnqueueWriteImage(image *MemObject, blocking bool, origin, region [3]int, rowPitch, slicePitch int, data []byte, eventWaitList []*Event) (*Event, error) { |
||||||
|
cOrigin := sizeT3(origin) |
||||||
|
cRegion := sizeT3(region) |
||||||
|
var event C.cl_event |
||||||
|
err := toError(C.clEnqueueWriteImage(q.clQueue, image.clMem, clBool(blocking), &cOrigin[0], &cRegion[0], C.size_t(rowPitch), C.size_t(slicePitch), unsafe.Pointer(&data[0]), C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event)) |
||||||
|
return newEvent(event), err |
||||||
|
} |
||||||
|
|
||||||
|
func (q *CommandQueue) EnqueueFillBuffer(buffer *MemObject, pattern unsafe.Pointer, patternSize, offset, size int, eventWaitList []*Event) (*Event, error) { |
||||||
|
var event C.cl_event |
||||||
|
err := toError(C.clEnqueueFillBuffer(q.clQueue, buffer.clMem, pattern, C.size_t(patternSize), C.size_t(offset), C.size_t(size), C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event)) |
||||||
|
return newEvent(event), err |
||||||
|
} |
||||||
|
|
||||||
|
// A synchronization point that enqueues a barrier operation.
|
||||||
|
func (q *CommandQueue) EnqueueBarrierWithWaitList(eventWaitList []*Event) (*Event, error) { |
||||||
|
var event C.cl_event |
||||||
|
err := toError(C.clEnqueueBarrierWithWaitList(q.clQueue, C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event)) |
||||||
|
return newEvent(event), err |
||||||
|
} |
||||||
|
|
||||||
|
// Enqueues a marker command which waits for either a list of events to complete, or all previously enqueued commands to complete.
|
||||||
|
func (q *CommandQueue) EnqueueMarkerWithWaitList(eventWaitList []*Event) (*Event, error) { |
||||||
|
var event C.cl_event |
||||||
|
err := toError(C.clEnqueueMarkerWithWaitList(q.clQueue, C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event)) |
||||||
|
return newEvent(event), err |
||||||
|
} |
@ -0,0 +1,487 @@ |
|||||||
|
package cl |
||||||
|
|
||||||
|
// #ifdef __APPLE__
|
||||||
|
// #include "OpenCL/opencl.h"
|
||||||
|
// #else
|
||||||
|
// #include "cl.h"
|
||||||
|
// #endif
|
||||||
|
import "C" |
||||||
|
|
||||||
|
import ( |
||||||
|
"errors" |
||||||
|
"fmt" |
||||||
|
"reflect" |
||||||
|
"runtime" |
||||||
|
"strings" |
||||||
|
"unsafe" |
||||||
|
) |
||||||
|
|
||||||
|
var ( |
||||||
|
ErrUnknown = errors.New("cl: unknown error") // Generally an unexpected result from an OpenCL function (e.g. CL_SUCCESS but null pointer)
|
||||||
|
) |
||||||
|
|
||||||
|
type ErrOther int |
||||||
|
|
||||||
|
func (e ErrOther) Error() string { |
||||||
|
return fmt.Sprintf("cl: error %d", int(e)) |
||||||
|
} |
||||||
|
|
||||||
|
var ( |
||||||
|
ErrDeviceNotFound = errors.New("cl: Device Not Found") |
||||||
|
ErrDeviceNotAvailable = errors.New("cl: Device Not Available") |
||||||
|
ErrCompilerNotAvailable = errors.New("cl: Compiler Not Available") |
||||||
|
ErrMemObjectAllocationFailure = errors.New("cl: Mem Object Allocation Failure") |
||||||
|
ErrOutOfResources = errors.New("cl: Out Of Resources") |
||||||
|
ErrOutOfHostMemory = errors.New("cl: Out Of Host Memory") |
||||||
|
ErrProfilingInfoNotAvailable = errors.New("cl: Profiling Info Not Available") |
||||||
|
ErrMemCopyOverlap = errors.New("cl: Mem Copy Overlap") |
||||||
|
ErrImageFormatMismatch = errors.New("cl: Image Format Mismatch") |
||||||
|
ErrImageFormatNotSupported = errors.New("cl: Image Format Not Supported") |
||||||
|
ErrBuildProgramFailure = errors.New("cl: Build Program Failure") |
||||||
|
ErrMapFailure = errors.New("cl: Map Failure") |
||||||
|
ErrMisalignedSubBufferOffset = errors.New("cl: Misaligned Sub Buffer Offset") |
||||||
|
ErrExecStatusErrorForEventsInWaitList = errors.New("cl: Exec Status Error For Events In Wait List") |
||||||
|
ErrCompileProgramFailure = errors.New("cl: Compile Program Failure") |
||||||
|
ErrLinkerNotAvailable = errors.New("cl: Linker Not Available") |
||||||
|
ErrLinkProgramFailure = errors.New("cl: Link Program Failure") |
||||||
|
ErrDevicePartitionFailed = errors.New("cl: Device Partition Failed") |
||||||
|
ErrKernelArgInfoNotAvailable = errors.New("cl: Kernel Arg Info Not Available") |
||||||
|
ErrInvalidValue = errors.New("cl: Invalid Value") |
||||||
|
ErrInvalidDeviceType = errors.New("cl: Invalid Device Type") |
||||||
|
ErrInvalidPlatform = errors.New("cl: Invalid Platform") |
||||||
|
ErrInvalidDevice = errors.New("cl: Invalid Device") |
||||||
|
ErrInvalidContext = errors.New("cl: Invalid Context") |
||||||
|
ErrInvalidQueueProperties = errors.New("cl: Invalid Queue Properties") |
||||||
|
ErrInvalidCommandQueue = errors.New("cl: Invalid Command Queue") |
||||||
|
ErrInvalidHostPtr = errors.New("cl: Invalid Host Ptr") |
||||||
|
ErrInvalidMemObject = errors.New("cl: Invalid Mem Object") |
||||||
|
ErrInvalidImageFormatDescriptor = errors.New("cl: Invalid Image Format Descriptor") |
||||||
|
ErrInvalidImageSize = errors.New("cl: Invalid Image Size") |
||||||
|
ErrInvalidSampler = errors.New("cl: Invalid Sampler") |
||||||
|
ErrInvalidBinary = errors.New("cl: Invalid Binary") |
||||||
|
ErrInvalidBuildOptions = errors.New("cl: Invalid Build Options") |
||||||
|
ErrInvalidProgram = errors.New("cl: Invalid Program") |
||||||
|
ErrInvalidProgramExecutable = errors.New("cl: Invalid Program Executable") |
||||||
|
ErrInvalidKernelName = errors.New("cl: Invalid Kernel Name") |
||||||
|
ErrInvalidKernelDefinition = errors.New("cl: Invalid Kernel Definition") |
||||||
|
ErrInvalidKernel = errors.New("cl: Invalid Kernel") |
||||||
|
ErrInvalidArgIndex = errors.New("cl: Invalid Arg Index") |
||||||
|
ErrInvalidArgValue = errors.New("cl: Invalid Arg Value") |
||||||
|
ErrInvalidArgSize = errors.New("cl: Invalid Arg Size") |
||||||
|
ErrInvalidKernelArgs = errors.New("cl: Invalid Kernel Args") |
||||||
|
ErrInvalidWorkDimension = errors.New("cl: Invalid Work Dimension") |
||||||
|
ErrInvalidWorkGroupSize = errors.New("cl: Invalid Work Group Size") |
||||||
|
ErrInvalidWorkItemSize = errors.New("cl: Invalid Work Item Size") |
||||||
|
ErrInvalidGlobalOffset = errors.New("cl: Invalid Global Offset") |
||||||
|
ErrInvalidEventWaitList = errors.New("cl: Invalid Event Wait List") |
||||||
|
ErrInvalidEvent = errors.New("cl: Invalid Event") |
||||||
|
ErrInvalidOperation = errors.New("cl: Invalid Operation") |
||||||
|
ErrInvalidGlObject = errors.New("cl: Invalid Gl Object") |
||||||
|
ErrInvalidBufferSize = errors.New("cl: Invalid Buffer Size") |
||||||
|
ErrInvalidMipLevel = errors.New("cl: Invalid Mip Level") |
||||||
|
ErrInvalidGlobalWorkSize = errors.New("cl: Invalid Global Work Size") |
||||||
|
ErrInvalidProperty = errors.New("cl: Invalid Property") |
||||||
|
ErrInvalidImageDescriptor = errors.New("cl: Invalid Image Descriptor") |
||||||
|
ErrInvalidCompilerOptions = errors.New("cl: Invalid Compiler Options") |
||||||
|
ErrInvalidLinkerOptions = errors.New("cl: Invalid Linker Options") |
||||||
|
ErrInvalidDevicePartitionCount = errors.New("cl: Invalid Device Partition Count") |
||||||
|
) |
||||||
|
var errorMap = map[C.cl_int]error{ |
||||||
|
C.CL_SUCCESS: nil, |
||||||
|
C.CL_DEVICE_NOT_FOUND: ErrDeviceNotFound, |
||||||
|
C.CL_DEVICE_NOT_AVAILABLE: ErrDeviceNotAvailable, |
||||||
|
C.CL_COMPILER_NOT_AVAILABLE: ErrCompilerNotAvailable, |
||||||
|
C.CL_MEM_OBJECT_ALLOCATION_FAILURE: ErrMemObjectAllocationFailure, |
||||||
|
C.CL_OUT_OF_RESOURCES: ErrOutOfResources, |
||||||
|
C.CL_OUT_OF_HOST_MEMORY: ErrOutOfHostMemory, |
||||||
|
C.CL_PROFILING_INFO_NOT_AVAILABLE: ErrProfilingInfoNotAvailable, |
||||||
|
C.CL_MEM_COPY_OVERLAP: ErrMemCopyOverlap, |
||||||
|
C.CL_IMAGE_FORMAT_MISMATCH: ErrImageFormatMismatch, |
||||||
|
C.CL_IMAGE_FORMAT_NOT_SUPPORTED: ErrImageFormatNotSupported, |
||||||
|
C.CL_BUILD_PROGRAM_FAILURE: ErrBuildProgramFailure, |
||||||
|
C.CL_MAP_FAILURE: ErrMapFailure, |
||||||
|
C.CL_MISALIGNED_SUB_BUFFER_OFFSET: ErrMisalignedSubBufferOffset, |
||||||
|
C.CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST: ErrExecStatusErrorForEventsInWaitList, |
||||||
|
C.CL_INVALID_VALUE: ErrInvalidValue, |
||||||
|
C.CL_INVALID_DEVICE_TYPE: ErrInvalidDeviceType, |
||||||
|
C.CL_INVALID_PLATFORM: ErrInvalidPlatform, |
||||||
|
C.CL_INVALID_DEVICE: ErrInvalidDevice, |
||||||
|
C.CL_INVALID_CONTEXT: ErrInvalidContext, |
||||||
|
C.CL_INVALID_QUEUE_PROPERTIES: ErrInvalidQueueProperties, |
||||||
|
C.CL_INVALID_COMMAND_QUEUE: ErrInvalidCommandQueue, |
||||||
|
C.CL_INVALID_HOST_PTR: ErrInvalidHostPtr, |
||||||
|
C.CL_INVALID_MEM_OBJECT: ErrInvalidMemObject, |
||||||
|
C.CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: ErrInvalidImageFormatDescriptor, |
||||||
|
C.CL_INVALID_IMAGE_SIZE: ErrInvalidImageSize, |
||||||
|
C.CL_INVALID_SAMPLER: ErrInvalidSampler, |
||||||
|
C.CL_INVALID_BINARY: ErrInvalidBinary, |
||||||
|
C.CL_INVALID_BUILD_OPTIONS: ErrInvalidBuildOptions, |
||||||
|
C.CL_INVALID_PROGRAM: ErrInvalidProgram, |
||||||
|
C.CL_INVALID_PROGRAM_EXECUTABLE: ErrInvalidProgramExecutable, |
||||||
|
C.CL_INVALID_KERNEL_NAME: ErrInvalidKernelName, |
||||||
|
C.CL_INVALID_KERNEL_DEFINITION: ErrInvalidKernelDefinition, |
||||||
|
C.CL_INVALID_KERNEL: ErrInvalidKernel, |
||||||
|
C.CL_INVALID_ARG_INDEX: ErrInvalidArgIndex, |
||||||
|
C.CL_INVALID_ARG_VALUE: ErrInvalidArgValue, |
||||||
|
C.CL_INVALID_ARG_SIZE: ErrInvalidArgSize, |
||||||
|
C.CL_INVALID_KERNEL_ARGS: ErrInvalidKernelArgs, |
||||||
|
C.CL_INVALID_WORK_DIMENSION: ErrInvalidWorkDimension, |
||||||
|
C.CL_INVALID_WORK_GROUP_SIZE: ErrInvalidWorkGroupSize, |
||||||
|
C.CL_INVALID_WORK_ITEM_SIZE: ErrInvalidWorkItemSize, |
||||||
|
C.CL_INVALID_GLOBAL_OFFSET: ErrInvalidGlobalOffset, |
||||||
|
C.CL_INVALID_EVENT_WAIT_LIST: ErrInvalidEventWaitList, |
||||||
|
C.CL_INVALID_EVENT: ErrInvalidEvent, |
||||||
|
C.CL_INVALID_OPERATION: ErrInvalidOperation, |
||||||
|
C.CL_INVALID_GL_OBJECT: ErrInvalidGlObject, |
||||||
|
C.CL_INVALID_BUFFER_SIZE: ErrInvalidBufferSize, |
||||||
|
C.CL_INVALID_MIP_LEVEL: ErrInvalidMipLevel, |
||||||
|
C.CL_INVALID_GLOBAL_WORK_SIZE: ErrInvalidGlobalWorkSize, |
||||||
|
C.CL_INVALID_PROPERTY: ErrInvalidProperty, |
||||||
|
} |
||||||
|
|
||||||
|
func toError(code C.cl_int) error { |
||||||
|
if err, ok := errorMap[code]; ok { |
||||||
|
return err |
||||||
|
} |
||||||
|
return ErrOther(code) |
||||||
|
} |
||||||
|
|
||||||
|
type LocalMemType int |
||||||
|
|
||||||
|
const ( |
||||||
|
LocalMemTypeNone LocalMemType = C.CL_NONE |
||||||
|
LocalMemTypeGlobal LocalMemType = C.CL_GLOBAL |
||||||
|
LocalMemTypeLocal LocalMemType = C.CL_LOCAL |
||||||
|
) |
||||||
|
|
||||||
|
var localMemTypeMap = map[LocalMemType]string{ |
||||||
|
LocalMemTypeNone: "None", |
||||||
|
LocalMemTypeGlobal: "Global", |
||||||
|
LocalMemTypeLocal: "Local", |
||||||
|
} |
||||||
|
|
||||||
|
func (t LocalMemType) String() string { |
||||||
|
name := localMemTypeMap[t] |
||||||
|
if name == "" { |
||||||
|
name = "Unknown" |
||||||
|
} |
||||||
|
return name |
||||||
|
} |
||||||
|
|
||||||
|
type ExecCapability int |
||||||
|
|
||||||
|
const ( |
||||||
|
ExecCapabilityKernel ExecCapability = C.CL_EXEC_KERNEL // The OpenCL device can execute OpenCL kernels.
|
||||||
|
ExecCapabilityNativeKernel ExecCapability = C.CL_EXEC_NATIVE_KERNEL // The OpenCL device can execute native kernels.
|
||||||
|
) |
||||||
|
|
||||||
|
func (ec ExecCapability) String() string { |
||||||
|
var parts []string |
||||||
|
if ec&ExecCapabilityKernel != 0 { |
||||||
|
parts = append(parts, "Kernel") |
||||||
|
} |
||||||
|
if ec&ExecCapabilityNativeKernel != 0 { |
||||||
|
parts = append(parts, "NativeKernel") |
||||||
|
} |
||||||
|
if parts == nil { |
||||||
|
return "" |
||||||
|
} |
||||||
|
return strings.Join(parts, "|") |
||||||
|
} |
||||||
|
|
||||||
|
type MemCacheType int |
||||||
|
|
||||||
|
const ( |
||||||
|
MemCacheTypeNone MemCacheType = C.CL_NONE |
||||||
|
MemCacheTypeReadOnlyCache MemCacheType = C.CL_READ_ONLY_CACHE |
||||||
|
MemCacheTypeReadWriteCache MemCacheType = C.CL_READ_WRITE_CACHE |
||||||
|
) |
||||||
|
|
||||||
|
func (ct MemCacheType) String() string { |
||||||
|
switch ct { |
||||||
|
case MemCacheTypeNone: |
||||||
|
return "None" |
||||||
|
case MemCacheTypeReadOnlyCache: |
||||||
|
return "ReadOnly" |
||||||
|
case MemCacheTypeReadWriteCache: |
||||||
|
return "ReadWrite" |
||||||
|
} |
||||||
|
return fmt.Sprintf("Unknown(%x)", int(ct)) |
||||||
|
} |
||||||
|
|
||||||
|
type MemFlag int |
||||||
|
|
||||||
|
const ( |
||||||
|
MemReadWrite MemFlag = C.CL_MEM_READ_WRITE |
||||||
|
MemWriteOnly MemFlag = C.CL_MEM_WRITE_ONLY |
||||||
|
MemReadOnly MemFlag = C.CL_MEM_READ_ONLY |
||||||
|
MemUseHostPtr MemFlag = C.CL_MEM_USE_HOST_PTR |
||||||
|
MemAllocHostPtr MemFlag = C.CL_MEM_ALLOC_HOST_PTR |
||||||
|
MemCopyHostPtr MemFlag = C.CL_MEM_COPY_HOST_PTR |
||||||
|
|
||||||
|
MemWriteOnlyHost MemFlag = C.CL_MEM_HOST_WRITE_ONLY |
||||||
|
MemReadOnlyHost MemFlag = C.CL_MEM_HOST_READ_ONLY |
||||||
|
MemNoAccessHost MemFlag = C.CL_MEM_HOST_NO_ACCESS |
||||||
|
) |
||||||
|
|
||||||
|
type MemObjectType int |
||||||
|
|
||||||
|
const ( |
||||||
|
MemObjectTypeBuffer MemObjectType = C.CL_MEM_OBJECT_BUFFER |
||||||
|
MemObjectTypeImage2D MemObjectType = C.CL_MEM_OBJECT_IMAGE2D |
||||||
|
MemObjectTypeImage3D MemObjectType = C.CL_MEM_OBJECT_IMAGE3D |
||||||
|
) |
||||||
|
|
||||||
|
type MapFlag int |
||||||
|
|
||||||
|
const ( |
||||||
|
// This flag specifies that the region being mapped in the memory object is being mapped for reading.
|
||||||
|
MapFlagRead MapFlag = C.CL_MAP_READ |
||||||
|
MapFlagWrite MapFlag = C.CL_MAP_WRITE |
||||||
|
MapFlagWriteInvalidateRegion MapFlag = C.CL_MAP_WRITE_INVALIDATE_REGION |
||||||
|
) |
||||||
|
|
||||||
|
func (mf MapFlag) toCl() C.cl_map_flags { |
||||||
|
return C.cl_map_flags(mf) |
||||||
|
} |
||||||
|
|
||||||
|
type ChannelOrder int |
||||||
|
|
||||||
|
const ( |
||||||
|
ChannelOrderR ChannelOrder = C.CL_R |
||||||
|
ChannelOrderA ChannelOrder = C.CL_A |
||||||
|
ChannelOrderRG ChannelOrder = C.CL_RG |
||||||
|
ChannelOrderRA ChannelOrder = C.CL_RA |
||||||
|
ChannelOrderRGB ChannelOrder = C.CL_RGB |
||||||
|
ChannelOrderRGBA ChannelOrder = C.CL_RGBA |
||||||
|
ChannelOrderBGRA ChannelOrder = C.CL_BGRA |
||||||
|
ChannelOrderARGB ChannelOrder = C.CL_ARGB |
||||||
|
ChannelOrderIntensity ChannelOrder = C.CL_INTENSITY |
||||||
|
ChannelOrderLuminance ChannelOrder = C.CL_LUMINANCE |
||||||
|
ChannelOrderRx ChannelOrder = C.CL_Rx |
||||||
|
ChannelOrderRGx ChannelOrder = C.CL_RGx |
||||||
|
ChannelOrderRGBx ChannelOrder = C.CL_RGBx |
||||||
|
) |
||||||
|
|
||||||
|
var channelOrderNameMap = map[ChannelOrder]string{ |
||||||
|
ChannelOrderR: "R", |
||||||
|
ChannelOrderA: "A", |
||||||
|
ChannelOrderRG: "RG", |
||||||
|
ChannelOrderRA: "RA", |
||||||
|
ChannelOrderRGB: "RGB", |
||||||
|
ChannelOrderRGBA: "RGBA", |
||||||
|
ChannelOrderBGRA: "BGRA", |
||||||
|
ChannelOrderARGB: "ARGB", |
||||||
|
ChannelOrderIntensity: "Intensity", |
||||||
|
ChannelOrderLuminance: "Luminance", |
||||||
|
ChannelOrderRx: "Rx", |
||||||
|
ChannelOrderRGx: "RGx", |
||||||
|
ChannelOrderRGBx: "RGBx", |
||||||
|
} |
||||||
|
|
||||||
|
func (co ChannelOrder) String() string { |
||||||
|
name := channelOrderNameMap[co] |
||||||
|
if name == "" { |
||||||
|
name = fmt.Sprintf("Unknown(%x)", int(co)) |
||||||
|
} |
||||||
|
return name |
||||||
|
} |
||||||
|
|
||||||
|
type ChannelDataType int |
||||||
|
|
||||||
|
const ( |
||||||
|
ChannelDataTypeSNormInt8 ChannelDataType = C.CL_SNORM_INT8 |
||||||
|
ChannelDataTypeSNormInt16 ChannelDataType = C.CL_SNORM_INT16 |
||||||
|
ChannelDataTypeUNormInt8 ChannelDataType = C.CL_UNORM_INT8 |
||||||
|
ChannelDataTypeUNormInt16 ChannelDataType = C.CL_UNORM_INT16 |
||||||
|
ChannelDataTypeUNormShort565 ChannelDataType = C.CL_UNORM_SHORT_565 |
||||||
|
ChannelDataTypeUNormShort555 ChannelDataType = C.CL_UNORM_SHORT_555 |
||||||
|
ChannelDataTypeUNormInt101010 ChannelDataType = C.CL_UNORM_INT_101010 |
||||||
|
ChannelDataTypeSignedInt8 ChannelDataType = C.CL_SIGNED_INT8 |
||||||
|
ChannelDataTypeSignedInt16 ChannelDataType = C.CL_SIGNED_INT16 |
||||||
|
ChannelDataTypeSignedInt32 ChannelDataType = C.CL_SIGNED_INT32 |
||||||
|
ChannelDataTypeUnsignedInt8 ChannelDataType = C.CL_UNSIGNED_INT8 |
||||||
|
ChannelDataTypeUnsignedInt16 ChannelDataType = C.CL_UNSIGNED_INT16 |
||||||
|
ChannelDataTypeUnsignedInt32 ChannelDataType = C.CL_UNSIGNED_INT32 |
||||||
|
ChannelDataTypeHalfFloat ChannelDataType = C.CL_HALF_FLOAT |
||||||
|
ChannelDataTypeFloat ChannelDataType = C.CL_FLOAT |
||||||
|
) |
||||||
|
|
||||||
|
var channelDataTypeNameMap = map[ChannelDataType]string{ |
||||||
|
ChannelDataTypeSNormInt8: "SNormInt8", |
||||||
|
ChannelDataTypeSNormInt16: "SNormInt16", |
||||||
|
ChannelDataTypeUNormInt8: "UNormInt8", |
||||||
|
ChannelDataTypeUNormInt16: "UNormInt16", |
||||||
|
ChannelDataTypeUNormShort565: "UNormShort565", |
||||||
|
ChannelDataTypeUNormShort555: "UNormShort555", |
||||||
|
ChannelDataTypeUNormInt101010: "UNormInt101010", |
||||||
|
ChannelDataTypeSignedInt8: "SignedInt8", |
||||||
|
ChannelDataTypeSignedInt16: "SignedInt16", |
||||||
|
ChannelDataTypeSignedInt32: "SignedInt32", |
||||||
|
ChannelDataTypeUnsignedInt8: "UnsignedInt8", |
||||||
|
ChannelDataTypeUnsignedInt16: "UnsignedInt16", |
||||||
|
ChannelDataTypeUnsignedInt32: "UnsignedInt32", |
||||||
|
ChannelDataTypeHalfFloat: "HalfFloat", |
||||||
|
ChannelDataTypeFloat: "Float", |
||||||
|
} |
||||||
|
|
||||||
|
func (ct ChannelDataType) String() string { |
||||||
|
name := channelDataTypeNameMap[ct] |
||||||
|
if name == "" { |
||||||
|
name = fmt.Sprintf("Unknown(%x)", int(ct)) |
||||||
|
} |
||||||
|
return name |
||||||
|
} |
||||||
|
|
||||||
|
type ImageFormat struct { |
||||||
|
ChannelOrder ChannelOrder |
||||||
|
ChannelDataType ChannelDataType |
||||||
|
} |
||||||
|
|
||||||
|
func (f ImageFormat) toCl() C.cl_image_format { |
||||||
|
var format C.cl_image_format |
||||||
|
format.image_channel_order = C.cl_channel_order(f.ChannelOrder) |
||||||
|
format.image_channel_data_type = C.cl_channel_type(f.ChannelDataType) |
||||||
|
return format |
||||||
|
} |
||||||
|
|
||||||
|
type ProfilingInfo int |
||||||
|
|
||||||
|
const ( |
||||||
|
// A 64-bit value that describes the current device time counter in
|
||||||
|
// nanoseconds when the command identified by event is enqueued in
|
||||||
|
// a command-queue by the host.
|
||||||
|
ProfilingInfoCommandQueued ProfilingInfo = C.CL_PROFILING_COMMAND_QUEUED |
||||||
|
// A 64-bit value that describes the current device time counter in
|
||||||
|
// nanoseconds when the command identified by event that has been
|
||||||
|
// enqueued is submitted by the host to the device associated with the command-queue.
|
||||||
|
ProfilingInfoCommandSubmit ProfilingInfo = C.CL_PROFILING_COMMAND_SUBMIT |
||||||
|
// A 64-bit value that describes the current device time counter in
|
||||||
|
// nanoseconds when the command identified by event starts execution on the device.
|
||||||
|
ProfilingInfoCommandStart ProfilingInfo = C.CL_PROFILING_COMMAND_START |
||||||
|
// A 64-bit value that describes the current device time counter in
|
||||||
|
// nanoseconds when the command identified by event has finished
|
||||||
|
// execution on the device.
|
||||||
|
ProfilingInfoCommandEnd ProfilingInfo = C.CL_PROFILING_COMMAND_END |
||||||
|
) |
||||||
|
|
||||||
|
type CommmandExecStatus int |
||||||
|
|
||||||
|
const ( |
||||||
|
CommmandExecStatusComplete CommmandExecStatus = C.CL_COMPLETE |
||||||
|
CommmandExecStatusRunning CommmandExecStatus = C.CL_RUNNING |
||||||
|
CommmandExecStatusSubmitted CommmandExecStatus = C.CL_SUBMITTED |
||||||
|
CommmandExecStatusQueued CommmandExecStatus = C.CL_QUEUED |
||||||
|
) |
||||||
|
|
||||||
|
type Event struct { |
||||||
|
clEvent C.cl_event |
||||||
|
} |
||||||
|
|
||||||
|
func releaseEvent(ev *Event) { |
||||||
|
if ev.clEvent != nil { |
||||||
|
C.clReleaseEvent(ev.clEvent) |
||||||
|
ev.clEvent = nil |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func (e *Event) Release() { |
||||||
|
releaseEvent(e) |
||||||
|
} |
||||||
|
|
||||||
|
func (e *Event) GetEventProfilingInfo(paramName ProfilingInfo) (int64, error) { |
||||||
|
var paramValue C.cl_ulong |
||||||
|
if err := C.clGetEventProfilingInfo(e.clEvent, C.cl_profiling_info(paramName), C.size_t(unsafe.Sizeof(paramValue)), unsafe.Pointer(¶mValue), nil); err != C.CL_SUCCESS { |
||||||
|
return 0, toError(err) |
||||||
|
} |
||||||
|
return int64(paramValue), nil |
||||||
|
} |
||||||
|
|
||||||
|
// Sets the execution status of a user event object.
|
||||||
|
//
|
||||||
|
// `status` specifies the new execution status to be set and
|
||||||
|
// can be CL_COMPLETE or a negative integer value to indicate
|
||||||
|
// an error. A negative integer value causes all enqueued commands
|
||||||
|
// that wait on this user event to be terminated. clSetUserEventStatus
|
||||||
|
// can only be called once to change the execution status of event.
|
||||||
|
func (e *Event) SetUserEventStatus(status int) error { |
||||||
|
return toError(C.clSetUserEventStatus(e.clEvent, C.cl_int(status))) |
||||||
|
} |
||||||
|
|
||||||
|
// Waits on the host thread for commands identified by event objects in
|
||||||
|
// events to complete. A command is considered complete if its execution
|
||||||
|
// status is CL_COMPLETE or a negative value. The events specified in
|
||||||
|
// event_list act as synchronization points.
|
||||||
|
//
|
||||||
|
// If the cl_khr_gl_event extension is enabled, event objects can also be
|
||||||
|
// used to reflect the status of an OpenGL sync object. The sync object
|
||||||
|
// in turn refers to a fence command executing in an OpenGL command
|
||||||
|
// stream. This provides another method of coordinating sharing of buffers
|
||||||
|
// and images between OpenGL and OpenCL.
|
||||||
|
func WaitForEvents(events []*Event) error { |
||||||
|
return toError(C.clWaitForEvents(C.cl_uint(len(events)), eventListPtr(events))) |
||||||
|
} |
||||||
|
|
||||||
|
func newEvent(clEvent C.cl_event) *Event { |
||||||
|
ev := &Event{clEvent: clEvent} |
||||||
|
runtime.SetFinalizer(ev, releaseEvent) |
||||||
|
return ev |
||||||
|
} |
||||||
|
|
||||||
|
func eventListPtr(el []*Event) *C.cl_event { |
||||||
|
if el == nil { |
||||||
|
return nil |
||||||
|
} |
||||||
|
elist := make([]C.cl_event, len(el)) |
||||||
|
for i, e := range el { |
||||||
|
elist[i] = e.clEvent |
||||||
|
} |
||||||
|
return (*C.cl_event)(&elist[0]) |
||||||
|
} |
||||||
|
|
||||||
|
func clBool(b bool) C.cl_bool { |
||||||
|
if b { |
||||||
|
return C.CL_TRUE |
||||||
|
} |
||||||
|
return C.CL_FALSE |
||||||
|
} |
||||||
|
|
||||||
|
func sizeT3(i3 [3]int) [3]C.size_t { |
||||||
|
var val [3]C.size_t |
||||||
|
val[0] = C.size_t(i3[0]) |
||||||
|
val[1] = C.size_t(i3[1]) |
||||||
|
val[2] = C.size_t(i3[2]) |
||||||
|
return val |
||||||
|
} |
||||||
|
|
||||||
|
type MappedMemObject struct { |
||||||
|
ptr unsafe.Pointer |
||||||
|
size int |
||||||
|
rowPitch int |
||||||
|
slicePitch int |
||||||
|
} |
||||||
|
|
||||||
|
func (mb *MappedMemObject) ByteSlice() []byte { |
||||||
|
var byteSlice []byte |
||||||
|
sliceHeader := (*reflect.SliceHeader)(unsafe.Pointer(&byteSlice)) |
||||||
|
sliceHeader.Cap = mb.size |
||||||
|
sliceHeader.Len = mb.size |
||||||
|
sliceHeader.Data = uintptr(mb.ptr) |
||||||
|
return byteSlice |
||||||
|
} |
||||||
|
|
||||||
|
func (mb *MappedMemObject) Ptr() unsafe.Pointer { |
||||||
|
return mb.ptr |
||||||
|
} |
||||||
|
|
||||||
|
func (mb *MappedMemObject) Size() int { |
||||||
|
return mb.size |
||||||
|
} |
||||||
|
|
||||||
|
func (mb *MappedMemObject) RowPitch() int { |
||||||
|
return mb.rowPitch |
||||||
|
} |
||||||
|
|
||||||
|
func (mb *MappedMemObject) SlicePitch() int { |
||||||
|
return mb.slicePitch |
||||||
|
} |
@ -0,0 +1,71 @@ |
|||||||
|
// +build cl12
|
||||||
|
|
||||||
|
package cl |
||||||
|
|
||||||
|
// #ifdef __APPLE__
|
||||||
|
// #include "OpenCL/opencl.h"
|
||||||
|
// #else
|
||||||
|
// #include "cl.h"
|
||||||
|
// #endif
|
||||||
|
import "C" |
||||||
|
|
||||||
|
const ( |
||||||
|
ChannelDataTypeUNormInt24 ChannelDataType = C.CL_UNORM_INT24 |
||||||
|
ChannelOrderDepth ChannelOrder = C.CL_DEPTH |
||||||
|
ChannelOrderDepthStencil ChannelOrder = C.CL_DEPTH_STENCIL |
||||||
|
MemHostNoAccess MemFlag = C.CL_MEM_HOST_NO_ACCESS // OpenCL 1.2
|
||||||
|
MemHostReadOnly MemFlag = C.CL_MEM_HOST_READ_ONLY // OpenCL 1.2
|
||||||
|
MemHostWriteOnly MemFlag = C.CL_MEM_HOST_WRITE_ONLY // OpenCL 1.2
|
||||||
|
MemObjectTypeImage1D MemObjectType = C.CL_MEM_OBJECT_IMAGE1D |
||||||
|
MemObjectTypeImage1DArray MemObjectType = C.CL_MEM_OBJECT_IMAGE1D_ARRAY |
||||||
|
MemObjectTypeImage1DBuffer MemObjectType = C.CL_MEM_OBJECT_IMAGE1D_BUFFER |
||||||
|
MemObjectTypeImage2DArray MemObjectType = C.CL_MEM_OBJECT_IMAGE2D_ARRAY |
||||||
|
// This flag specifies that the region being mapped in the memory object is being mapped for writing.
|
||||||
|
//
|
||||||
|
// The contents of the region being mapped are to be discarded. This is typically the case when the
|
||||||
|
// region being mapped is overwritten by the host. This flag allows the implementation to no longer
|
||||||
|
// guarantee that the pointer returned by clEnqueueMapBuffer or clEnqueueMapImage contains the
|
||||||
|
// latest bits in the region being mapped which can be a significant performance enhancement.
|
||||||
|
MapFlagWriteInvalidateRegion MapFlag = C.CL_MAP_WRITE_INVALIDATE_REGION |
||||||
|
) |
||||||
|
|
||||||
|
func init() { |
||||||
|
errorMap[C.CL_COMPILE_PROGRAM_FAILURE] = ErrCompileProgramFailure |
||||||
|
errorMap[C.CL_DEVICE_PARTITION_FAILED] = ErrDevicePartitionFailed |
||||||
|
errorMap[C.CL_INVALID_COMPILER_OPTIONS] = ErrInvalidCompilerOptions |
||||||
|
errorMap[C.CL_INVALID_DEVICE_PARTITION_COUNT] = ErrInvalidDevicePartitionCount |
||||||
|
errorMap[C.CL_INVALID_IMAGE_DESCRIPTOR] = ErrInvalidImageDescriptor |
||||||
|
errorMap[C.CL_INVALID_LINKER_OPTIONS] = ErrInvalidLinkerOptions |
||||||
|
errorMap[C.CL_KERNEL_ARG_INFO_NOT_AVAILABLE] = ErrKernelArgInfoNotAvailable |
||||||
|
errorMap[C.CL_LINK_PROGRAM_FAILURE] = ErrLinkProgramFailure |
||||||
|
errorMap[C.CL_LINKER_NOT_AVAILABLE] = ErrLinkerNotAvailable |
||||||
|
channelOrderNameMap[ChannelOrderDepth] = "Depth" |
||||||
|
channelOrderNameMap[ChannelOrderDepthStencil] = "DepthStencil" |
||||||
|
channelDataTypeNameMap[ChannelDataTypeUNormInt24] = "UNormInt24" |
||||||
|
} |
||||||
|
|
||||||
|
type ImageDescription struct { |
||||||
|
Type MemObjectType |
||||||
|
Width, Height, Depth int |
||||||
|
ArraySize, RowPitch, SlicePitch int |
||||||
|
NumMipLevels, NumSamples int |
||||||
|
Buffer *MemObject |
||||||
|
} |
||||||
|
|
||||||
|
func (d ImageDescription) toCl() C.cl_image_desc { |
||||||
|
var desc C.cl_image_desc |
||||||
|
desc.image_type = C.cl_mem_object_type(d.Type) |
||||||
|
desc.image_width = C.size_t(d.Width) |
||||||
|
desc.image_height = C.size_t(d.Height) |
||||||
|
desc.image_depth = C.size_t(d.Depth) |
||||||
|
desc.image_array_size = C.size_t(d.ArraySize) |
||||||
|
desc.image_row_pitch = C.size_t(d.RowPitch) |
||||||
|
desc.image_slice_pitch = C.size_t(d.SlicePitch) |
||||||
|
desc.num_mip_levels = C.cl_uint(d.NumMipLevels) |
||||||
|
desc.num_samples = C.cl_uint(d.NumSamples) |
||||||
|
desc.buffer = nil |
||||||
|
if d.Buffer != nil { |
||||||
|
desc.buffer = d.Buffer.clMem |
||||||
|
} |
||||||
|
return desc |
||||||
|
} |
45
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/types_darwin.go
generated
vendored
45
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/types_darwin.go
generated
vendored
@ -0,0 +1,45 @@ |
|||||||
|
package cl |
||||||
|
|
||||||
|
// #ifdef __APPLE__
|
||||||
|
// #include "OpenCL/opencl.h"
|
||||||
|
// #else
|
||||||
|
// #include "cl.h"
|
||||||
|
// #endif
|
||||||
|
import "C" |
||||||
|
|
||||||
|
// Extension: cl_APPLE_fixed_alpha_channel_orders
|
||||||
|
//
|
||||||
|
// These selectors may be passed to clCreateImage2D() in the cl_image_format.image_channel_order field.
|
||||||
|
// They are like CL_BGRA and CL_ARGB except that the alpha channel to be ignored. On calls to read_imagef,
|
||||||
|
// the alpha will be 0xff (1.0f) if the sample falls in the image and 0 if it does not fall in the image.
|
||||||
|
// On calls to write_imagef, the alpha value is ignored and 0xff (1.0f) is written. These formats are
|
||||||
|
// currently only available for the CL_UNORM_INT8 cl_channel_type. They are intended to support legacy
|
||||||
|
// image formats.
|
||||||
|
const ( |
||||||
|
ChannelOrder1RGBApple ChannelOrder = C.CL_1RGB_APPLE // Introduced in MacOS X.7.
|
||||||
|
ChannelOrderBGR1Apple ChannelOrder = C.CL_BGR1_APPLE // Introduced in MacOS X.7.
|
||||||
|
) |
||||||
|
|
||||||
|
// Extension: cl_APPLE_biased_fixed_point_image_formats
|
||||||
|
//
|
||||||
|
// This selector may be passed to clCreateImage2D() in the cl_image_format.image_channel_data_type field.
|
||||||
|
// It defines a biased signed 1.14 fixed point storage format, with range [-1, 3). The conversion from
|
||||||
|
// float to this fixed point format is defined as follows:
|
||||||
|
//
|
||||||
|
// ushort float_to_sfixed14( float x ){
|
||||||
|
// int i = convert_int_sat_rte( x * 0x1.0p14f ); // scale [-1, 3.0) to [-16384, 3*16384), round to nearest integer
|
||||||
|
// i = add_sat( i, 0x4000 ); // apply bias, to convert to [0, 65535) range
|
||||||
|
// return convert_ushort_sat(i); // clamp to destination size
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// The inverse conversion is the reverse process. The formats are currently only available on the CPU with
|
||||||
|
// the CL_RGBA channel layout.
|
||||||
|
const ( |
||||||
|
ChannelDataTypeSFixed14Apple ChannelDataType = C.CL_SFIXED14_APPLE // Introduced in MacOS X.7.
|
||||||
|
) |
||||||
|
|
||||||
|
func init() { |
||||||
|
channelOrderNameMap[ChannelOrder1RGBApple] = "1RGBApple" |
||||||
|
channelOrderNameMap[ChannelOrderBGR1Apple] = "RGB1Apple" |
||||||
|
channelDataTypeNameMap[ChannelDataTypeSFixed14Apple] = "SFixed14Apple" |
||||||
|
} |
@ -0,0 +1,629 @@ |
|||||||
|
// Copyright 2014 The go-ethereum Authors
|
||||||
|
// This file is part of the go-ethereum library.
|
||||||
|
//
|
||||||
|
// The go-ethereum library is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Lesser General Public License as published by
|
||||||
|
// the Free Software Foundation, either version 3 of the License, or
|
||||||
|
// (at your option) any later version.
|
||||||
|
//
|
||||||
|
// The go-ethereum library is distributed in the hope that it will be useful,
|
||||||
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
// GNU Lesser General Public License for more details.
|
||||||
|
//
|
||||||
|
// You should have received a copy of the GNU Lesser General Public License
|
||||||
|
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
// +build opencl
|
||||||
|
|
||||||
|
package ethash |
||||||
|
|
||||||
|
//#cgo LDFLAGS: -w
|
||||||
|
//#include <stdint.h>
|
||||||
|
//#include <string.h>
|
||||||
|
//#include "src/libethash/internal.h"
|
||||||
|
import "C" |
||||||
|
|
||||||
|
import ( |
||||||
|
crand "crypto/rand" |
||||||
|
"encoding/binary" |
||||||
|
"fmt" |
||||||
|
"math" |
||||||
|
"math/big" |
||||||
|
mrand "math/rand" |
||||||
|
"strconv" |
||||||
|
"strings" |
||||||
|
"sync" |
||||||
|
"sync/atomic" |
||||||
|
"time" |
||||||
|
"unsafe" |
||||||
|
|
||||||
|
"github.com/Gustav-Simonsson/go-opencl/cl" |
||||||
|
"github.com/ethereum/go-ethereum/common" |
||||||
|
"github.com/ethereum/go-ethereum/pow" |
||||||
|
) |
||||||
|
|
||||||
|
/* |
||||||
|
|
||||||
|
This code have two main entry points: |
||||||
|
|
||||||
|
1. The initCL(...) function configures one or more OpenCL device |
||||||
|
(for now only GPU) and loads the Ethash DAG onto device memory |
||||||
|
|
||||||
|
2. The Search(...) function loads a Ethash nonce into device(s) memory and |
||||||
|
executes the Ethash OpenCL kernel. |
||||||
|
|
||||||
|
Throughout the code, we refer to "host memory" and "device memory". |
||||||
|
For most systems (e.g. regular PC GPU miner) the host memory is RAM and |
||||||
|
device memory is the GPU global memory (e.g. GDDR5). |
||||||
|
|
||||||
|
References mentioned in code comments: |
||||||
|
|
||||||
|
1. https://github.com/ethereum/wiki/wiki/Ethash
|
||||||
|
2. https://github.com/ethereum/cpp-ethereum/blob/develop/libethash-cl/ethash_cl_miner.cpp
|
||||||
|
3. https://www.khronos.org/registry/cl/sdk/1.2/docs/man/xhtml/
|
||||||
|
4. http://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2013/12/AMD_OpenCL_Programming_User_Guide.pdf
|
||||||
|
|
||||||
|
*/ |
||||||
|
|
||||||
|
type OpenCLDevice struct { |
||||||
|
deviceId int |
||||||
|
device *cl.Device |
||||||
|
openCL11 bool // OpenCL version 1.1 and 1.2 are handled a bit different
|
||||||
|
openCL12 bool |
||||||
|
|
||||||
|
dagBuf *cl.MemObject // Ethash full DAG in device mem
|
||||||
|
headerBuf *cl.MemObject // Hash of block-to-mine in device mem
|
||||||
|
searchBuffers []*cl.MemObject |
||||||
|
|
||||||
|
searchKernel *cl.Kernel |
||||||
|
hashKernel *cl.Kernel |
||||||
|
|
||||||
|
queue *cl.CommandQueue |
||||||
|
ctx *cl.Context |
||||||
|
workGroupSize int |
||||||
|
|
||||||
|
nonceRand *mrand.Rand // seeded by crypto/rand, see comments where it's initialised
|
||||||
|
result common.Hash |
||||||
|
} |
||||||
|
|
||||||
|
type OpenCLMiner struct { |
||||||
|
mu sync.Mutex |
||||||
|
|
||||||
|
ethash *Ethash // Ethash full DAG & cache in host mem
|
||||||
|
|
||||||
|
deviceIds []int |
||||||
|
devices []*OpenCLDevice |
||||||
|
|
||||||
|
dagSize uint64 |
||||||
|
|
||||||
|
hashRate int32 // Go atomics & uint64 have some issues; int32 is supported on all platforms
|
||||||
|
} |
||||||
|
|
||||||
|
type pendingSearch struct { |
||||||
|
bufIndex uint32 |
||||||
|
startNonce uint64 |
||||||
|
} |
||||||
|
|
||||||
|
const ( |
||||||
|
SIZEOF_UINT32 = 4 |
||||||
|
|
||||||
|
// See [1]
|
||||||
|
ethashMixBytesLen = 128 |
||||||
|
ethashAccesses = 64 |
||||||
|
|
||||||
|
// See [4]
|
||||||
|
workGroupSize = 32 // must be multiple of 8
|
||||||
|
maxSearchResults = 63 |
||||||
|
searchBufSize = 2 |
||||||
|
globalWorkSize = 1024 * 256 |
||||||
|
) |
||||||
|
|
||||||
|
func NewCL(deviceIds []int) *OpenCLMiner { |
||||||
|
ids := make([]int, len(deviceIds)) |
||||||
|
copy(ids, deviceIds) |
||||||
|
return &OpenCLMiner{ |
||||||
|
ethash: New(), |
||||||
|
dagSize: 0, // to see if we need to update DAG.
|
||||||
|
deviceIds: ids, |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func PrintDevices() { |
||||||
|
fmt.Println("=============================================") |
||||||
|
fmt.Println("============ OpenCL Device Info =============") |
||||||
|
fmt.Println("=============================================") |
||||||
|
|
||||||
|
var found []*cl.Device |
||||||
|
|
||||||
|
platforms, err := cl.GetPlatforms() |
||||||
|
if err != nil { |
||||||
|
fmt.Println("Plaform error (check your OpenCL installation): %v", err) |
||||||
|
return |
||||||
|
} |
||||||
|
|
||||||
|
for i, p := range platforms { |
||||||
|
fmt.Println("Platform id ", i) |
||||||
|
fmt.Println("Platform Name ", p.Name()) |
||||||
|
fmt.Println("Platform Vendor ", p.Vendor()) |
||||||
|
fmt.Println("Platform Version ", p.Version()) |
||||||
|
fmt.Println("Platform Extensions ", p.Extensions()) |
||||||
|
fmt.Println("Platform Profile ", p.Profile()) |
||||||
|
fmt.Println("") |
||||||
|
|
||||||
|
devices, err := cl.GetDevices(p, cl.DeviceTypeGPU) |
||||||
|
if err != nil { |
||||||
|
fmt.Println("Device error (check your GPU drivers) :", err) |
||||||
|
return |
||||||
|
} |
||||||
|
|
||||||
|
for _, d := range devices { |
||||||
|
fmt.Println("Device OpenCL id ", i) |
||||||
|
fmt.Println("Device id for mining ", len(found)) |
||||||
|
fmt.Println("Device Name ", d.Name()) |
||||||
|
fmt.Println("Vendor ", d.Vendor()) |
||||||
|
fmt.Println("Version ", d.Version()) |
||||||
|
fmt.Println("Driver version ", d.DriverVersion()) |
||||||
|
fmt.Println("Address bits ", d.AddressBits()) |
||||||
|
fmt.Println("Max clock freq ", d.MaxClockFrequency()) |
||||||
|
fmt.Println("Global mem size ", d.GlobalMemSize()) |
||||||
|
fmt.Println("Max constant buffer size", d.MaxConstantBufferSize()) |
||||||
|
fmt.Println("Max mem alloc size ", d.MaxMemAllocSize()) |
||||||
|
fmt.Println("Max compute units ", d.MaxComputeUnits()) |
||||||
|
fmt.Println("Max work group size ", d.MaxWorkGroupSize()) |
||||||
|
fmt.Println("Max work item sizes ", d.MaxWorkItemSizes()) |
||||||
|
fmt.Println("=============================================") |
||||||
|
|
||||||
|
found = append(found, d) |
||||||
|
} |
||||||
|
} |
||||||
|
if len(found) == 0 { |
||||||
|
fmt.Println("Found no GPU(s). Check that your OS can see the GPU(s)") |
||||||
|
} else { |
||||||
|
var idsFormat string |
||||||
|
for i := 0; i < len(found); i++ { |
||||||
|
idsFormat += strconv.Itoa(i) |
||||||
|
if i != len(found)-1 { |
||||||
|
idsFormat += "," |
||||||
|
} |
||||||
|
} |
||||||
|
fmt.Printf("Found %v devices. Benchmark first GPU: geth gpubench 0\n", len(found)) |
||||||
|
fmt.Printf("Mine using all GPUs: geth --minegpu %v\n", idsFormat) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// See [2]. We basically do the same here, but the Go OpenCL bindings
|
||||||
|
// are at a slightly higher abtraction level.
|
||||||
|
func InitCL(blockNum uint64, c *OpenCLMiner) error { |
||||||
|
platforms, err := cl.GetPlatforms() |
||||||
|
if err != nil { |
||||||
|
return fmt.Errorf("Plaform error: %v\nCheck your OpenCL installation and then run geth gpuinfo", err) |
||||||
|
} |
||||||
|
|
||||||
|
var devices []*cl.Device |
||||||
|
for _, p := range platforms { |
||||||
|
ds, err := cl.GetDevices(p, cl.DeviceTypeGPU) |
||||||
|
if err != nil { |
||||||
|
return fmt.Errorf("Devices error: %v\nCheck your GPU drivers and then run geth gpuinfo", err) |
||||||
|
} |
||||||
|
for _, d := range ds { |
||||||
|
devices = append(devices, d) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
pow := New() |
||||||
|
_ = pow.getDAG(blockNum) // generates DAG if we don't have it
|
||||||
|
pow.Light.getCache(blockNum) // and cache
|
||||||
|
|
||||||
|
c.ethash = pow |
||||||
|
dagSize := uint64(C.ethash_get_datasize(C.uint64_t(blockNum))) |
||||||
|
c.dagSize = dagSize |
||||||
|
|
||||||
|
for _, id := range c.deviceIds { |
||||||
|
if id > len(devices)-1 { |
||||||
|
return fmt.Errorf("Device id not found. See available device ids with: geth gpuinfo") |
||||||
|
} else { |
||||||
|
err := initCLDevice(id, devices[id], c) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
if len(c.devices) == 0 { |
||||||
|
return fmt.Errorf("No GPU devices found") |
||||||
|
} |
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
func initCLDevice(deviceId int, device *cl.Device, c *OpenCLMiner) error { |
||||||
|
devMaxAlloc := uint64(device.MaxMemAllocSize()) |
||||||
|
devGlobalMem := uint64(device.GlobalMemSize()) |
||||||
|
|
||||||
|
// TODO: more fine grained version logic
|
||||||
|
if device.Version() == "OpenCL 1.0" { |
||||||
|
fmt.Println("Device OpenCL version not supported: ", device.Version()) |
||||||
|
return fmt.Errorf("opencl version not supported") |
||||||
|
} |
||||||
|
|
||||||
|
var cl11, cl12 bool |
||||||
|
if device.Version() == "OpenCL 1.1" { |
||||||
|
cl11 = true |
||||||
|
} |
||||||
|
if device.Version() == "OpenCL 1.2" { |
||||||
|
cl12 = true |
||||||
|
} |
||||||
|
|
||||||
|
// log warnings but carry on; some device drivers report inaccurate values
|
||||||
|
if c.dagSize > devGlobalMem { |
||||||
|
fmt.Printf("WARNING: device memory may be insufficient: %v. DAG size: %v.\n", devGlobalMem, c.dagSize) |
||||||
|
} |
||||||
|
|
||||||
|
if c.dagSize > devMaxAlloc { |
||||||
|
fmt.Printf("WARNING: DAG size (%v) larger than device max memory allocation size (%v).\n", c.dagSize, devMaxAlloc) |
||||||
|
fmt.Printf("You probably have to export GPU_MAX_ALLOC_PERCENT=95\n") |
||||||
|
} |
||||||
|
|
||||||
|
fmt.Printf("Initialising device %v: %v\n", deviceId, device.Name()) |
||||||
|
|
||||||
|
context, err := cl.CreateContext([]*cl.Device{device}) |
||||||
|
if err != nil { |
||||||
|
return fmt.Errorf("failed creating context:", err) |
||||||
|
} |
||||||
|
|
||||||
|
// TODO: test running with CL_QUEUE_PROFILING_ENABLE for profiling?
|
||||||
|
queue, err := context.CreateCommandQueue(device, 0) |
||||||
|
if err != nil { |
||||||
|
return fmt.Errorf("command queue err:", err) |
||||||
|
} |
||||||
|
|
||||||
|
// See [4] section 3.2 and [3] "clBuildProgram".
|
||||||
|
// The OpenCL kernel code is compiled at run-time.
|
||||||
|
kvs := make(map[string]string, 4) |
||||||
|
kvs["GROUP_SIZE"] = strconv.FormatUint(workGroupSize, 10) |
||||||
|
kvs["DAG_SIZE"] = strconv.FormatUint(c.dagSize/ethashMixBytesLen, 10) |
||||||
|
kvs["ACCESSES"] = strconv.FormatUint(ethashAccesses, 10) |
||||||
|
kvs["MAX_OUTPUTS"] = strconv.FormatUint(maxSearchResults, 10) |
||||||
|
kernelCode := replaceWords(kernel, kvs) |
||||||
|
|
||||||
|
program, err := context.CreateProgramWithSource([]string{kernelCode}) |
||||||
|
if err != nil { |
||||||
|
return fmt.Errorf("program err:", err) |
||||||
|
} |
||||||
|
|
||||||
|
/* if using AMD OpenCL impl, you can set this to debug on x86 CPU device. |
||||||
|
see AMD OpenCL programming guide section 4.2 |
||||||
|
|
||||||
|
export in shell before running: |
||||||
|
export AMD_OCL_BUILD_OPTIONS_APPEND="-g -O0" |
||||||
|
export CPU_MAX_COMPUTE_UNITS=1 |
||||||
|
|
||||||
|
buildOpts := "-g -cl-opt-disable" |
||||||
|
|
||||||
|
*/ |
||||||
|
buildOpts := "" |
||||||
|
err = program.BuildProgram([]*cl.Device{device}, buildOpts) |
||||||
|
if err != nil { |
||||||
|
return fmt.Errorf("program build err:", err) |
||||||
|
} |
||||||
|
|
||||||
|
var searchKernelName, hashKernelName string |
||||||
|
searchKernelName = "ethash_search" |
||||||
|
hashKernelName = "ethash_hash" |
||||||
|
|
||||||
|
searchKernel, err := program.CreateKernel(searchKernelName) |
||||||
|
hashKernel, err := program.CreateKernel(hashKernelName) |
||||||
|
if err != nil { |
||||||
|
return fmt.Errorf("kernel err:", err) |
||||||
|
} |
||||||
|
|
||||||
|
// TODO: when this DAG size appears, patch the Go bindings
|
||||||
|
// (context.go) to work with uint64 as size_t
|
||||||
|
if c.dagSize > math.MaxInt32 { |
||||||
|
fmt.Println("DAG too large for allocation.") |
||||||
|
return fmt.Errorf("DAG too large for alloc") |
||||||
|
} |
||||||
|
|
||||||
|
// TODO: patch up Go bindings to work with size_t, will overflow if > maxint32
|
||||||
|
// TODO: fuck. shit's gonna overflow around 2017-06-09 12:17:02
|
||||||
|
dagBuf := *(new(*cl.MemObject)) |
||||||
|
dagBuf, err = context.CreateEmptyBuffer(cl.MemReadOnly, int(c.dagSize)) |
||||||
|
if err != nil { |
||||||
|
return fmt.Errorf("allocating dag buf failed: ", err) |
||||||
|
} |
||||||
|
|
||||||
|
// write DAG to device mem
|
||||||
|
dagPtr := unsafe.Pointer(c.ethash.Full.current.ptr.data) |
||||||
|
_, err = queue.EnqueueWriteBuffer(dagBuf, true, 0, int(c.dagSize), dagPtr, nil) |
||||||
|
if err != nil { |
||||||
|
return fmt.Errorf("writing to dag buf failed: ", err) |
||||||
|
} |
||||||
|
|
||||||
|
searchBuffers := make([]*cl.MemObject, searchBufSize) |
||||||
|
for i := 0; i < searchBufSize; i++ { |
||||||
|
searchBuff, err := context.CreateEmptyBuffer(cl.MemWriteOnly, (1+maxSearchResults)*SIZEOF_UINT32) |
||||||
|
if err != nil { |
||||||
|
return fmt.Errorf("search buffer err:", err) |
||||||
|
} |
||||||
|
searchBuffers[i] = searchBuff |
||||||
|
} |
||||||
|
|
||||||
|
headerBuf, err := context.CreateEmptyBuffer(cl.MemReadOnly, 32) |
||||||
|
if err != nil { |
||||||
|
return fmt.Errorf("header buffer err:", err) |
||||||
|
} |
||||||
|
|
||||||
|
// Unique, random nonces are crucial for mining efficieny.
|
||||||
|
// While we do not need cryptographically secure PRNG for nonces,
|
||||||
|
// we want to have uniform distribution and minimal repetition of nonces.
|
||||||
|
// We could guarantee strict uniqueness of nonces by generating unique ranges,
|
||||||
|
// but a int64 seed from crypto/rand should be good enough.
|
||||||
|
// we then use math/rand for speed and to avoid draining OS entropy pool
|
||||||
|
seed, err := crand.Int(crand.Reader, big.NewInt(math.MaxInt64)) |
||||||
|
if err != nil { |
||||||
|
return err |
||||||
|
} |
||||||
|
nonceRand := mrand.New(mrand.NewSource(seed.Int64())) |
||||||
|
|
||||||
|
deviceStruct := &OpenCLDevice{ |
||||||
|
deviceId: deviceId, |
||||||
|
device: device, |
||||||
|
openCL11: cl11, |
||||||
|
openCL12: cl12, |
||||||
|
|
||||||
|
dagBuf: dagBuf, |
||||||
|
headerBuf: headerBuf, |
||||||
|
searchBuffers: searchBuffers, |
||||||
|
|
||||||
|
searchKernel: searchKernel, |
||||||
|
hashKernel: hashKernel, |
||||||
|
|
||||||
|
queue: queue, |
||||||
|
ctx: context, |
||||||
|
|
||||||
|
workGroupSize: workGroupSize, |
||||||
|
|
||||||
|
nonceRand: nonceRand, |
||||||
|
} |
||||||
|
c.devices = append(c.devices, deviceStruct) |
||||||
|
|
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
func (c *OpenCLMiner) Search(block pow.Block, stop <-chan struct{}, index int) (uint64, []byte) { |
||||||
|
c.mu.Lock() |
||||||
|
newDagSize := uint64(C.ethash_get_datasize(C.uint64_t(block.NumberU64()))) |
||||||
|
if newDagSize > c.dagSize { |
||||||
|
// TODO: clean up buffers from previous DAG?
|
||||||
|
err := InitCL(block.NumberU64(), c) |
||||||
|
if err != nil { |
||||||
|
fmt.Println("OpenCL init error: ", err) |
||||||
|
return 0, []byte{0} |
||||||
|
} |
||||||
|
} |
||||||
|
defer c.mu.Unlock() |
||||||
|
|
||||||
|
// Avoid unneeded OpenCL initialisation if we received stop while running InitCL
|
||||||
|
select { |
||||||
|
case <-stop: |
||||||
|
return 0, []byte{0} |
||||||
|
default: |
||||||
|
} |
||||||
|
|
||||||
|
headerHash := block.HashNoNonce() |
||||||
|
diff := block.Difficulty() |
||||||
|
target256 := new(big.Int).Div(maxUint256, diff) |
||||||
|
target64 := new(big.Int).Rsh(target256, 192).Uint64() |
||||||
|
var zero uint32 = 0 |
||||||
|
|
||||||
|
d := c.devices[index] |
||||||
|
|
||||||
|
_, err := d.queue.EnqueueWriteBuffer(d.headerBuf, false, 0, 32, unsafe.Pointer(&headerHash[0]), nil) |
||||||
|
if err != nil { |
||||||
|
fmt.Println("Error in Search clEnqueueWriterBuffer : ", err) |
||||||
|
return 0, []byte{0} |
||||||
|
} |
||||||
|
|
||||||
|
for i := 0; i < searchBufSize; i++ { |
||||||
|
_, err := d.queue.EnqueueWriteBuffer(d.searchBuffers[i], false, 0, 4, unsafe.Pointer(&zero), nil) |
||||||
|
if err != nil { |
||||||
|
fmt.Println("Error in Search clEnqueueWriterBuffer : ", err) |
||||||
|
return 0, []byte{0} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// wait for all search buffers to complete
|
||||||
|
err = d.queue.Finish() |
||||||
|
if err != nil { |
||||||
|
fmt.Println("Error in Search clFinish : ", err) |
||||||
|
return 0, []byte{0} |
||||||
|
} |
||||||
|
|
||||||
|
err = d.searchKernel.SetArg(1, d.headerBuf) |
||||||
|
if err != nil { |
||||||
|
fmt.Println("Error in Search clSetKernelArg : ", err) |
||||||
|
return 0, []byte{0} |
||||||
|
} |
||||||
|
|
||||||
|
err = d.searchKernel.SetArg(2, d.dagBuf) |
||||||
|
if err != nil { |
||||||
|
fmt.Println("Error in Search clSetKernelArg : ", err) |
||||||
|
return 0, []byte{0} |
||||||
|
} |
||||||
|
|
||||||
|
err = d.searchKernel.SetArg(4, target64) |
||||||
|
if err != nil { |
||||||
|
fmt.Println("Error in Search clSetKernelArg : ", err) |
||||||
|
return 0, []byte{0} |
||||||
|
} |
||||||
|
err = d.searchKernel.SetArg(5, uint32(math.MaxUint32)) |
||||||
|
if err != nil { |
||||||
|
fmt.Println("Error in Search clSetKernelArg : ", err) |
||||||
|
return 0, []byte{0} |
||||||
|
} |
||||||
|
|
||||||
|
// wait on this before returning
|
||||||
|
var preReturnEvent *cl.Event |
||||||
|
if d.openCL12 { |
||||||
|
preReturnEvent, err = d.ctx.CreateUserEvent() |
||||||
|
if err != nil { |
||||||
|
fmt.Println("Error in Search create CL user event : ", err) |
||||||
|
return 0, []byte{0} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
pending := make([]pendingSearch, 0, searchBufSize) |
||||||
|
var p *pendingSearch |
||||||
|
searchBufIndex := uint32(0) |
||||||
|
var checkNonce uint64 |
||||||
|
loops := int64(0) |
||||||
|
prevHashRate := int32(0) |
||||||
|
start := time.Now().UnixNano() |
||||||
|
// we grab a single random nonce and sets this as argument to the kernel search function
|
||||||
|
// the device will then add each local threads gid to the nonce, creating a unique nonce
|
||||||
|
// for each device computing unit executing in parallel
|
||||||
|
initNonce := uint64(d.nonceRand.Int63()) |
||||||
|
for nonce := initNonce; ; nonce += uint64(globalWorkSize) { |
||||||
|
select { |
||||||
|
case <-stop: |
||||||
|
|
||||||
|
/* |
||||||
|
if d.openCL12 { |
||||||
|
err = cl.WaitForEvents([]*cl.Event{preReturnEvent}) |
||||||
|
if err != nil { |
||||||
|
fmt.Println("Error in Search WaitForEvents: ", err) |
||||||
|
} |
||||||
|
} |
||||||
|
*/ |
||||||
|
|
||||||
|
atomic.AddInt32(&c.hashRate, -prevHashRate) |
||||||
|
return 0, []byte{0} |
||||||
|
default: |
||||||
|
} |
||||||
|
|
||||||
|
if (loops % (1 << 7)) == 0 { |
||||||
|
elapsed := time.Now().UnixNano() - start |
||||||
|
// TODO: verify if this is correct hash rate calculation
|
||||||
|
hashes := (float64(1e9) / float64(elapsed)) * float64(loops*1024*256) |
||||||
|
hashrateDiff := int32(hashes) - prevHashRate |
||||||
|
prevHashRate = int32(hashes) |
||||||
|
atomic.AddInt32(&c.hashRate, hashrateDiff) |
||||||
|
} |
||||||
|
loops++ |
||||||
|
|
||||||
|
err = d.searchKernel.SetArg(0, d.searchBuffers[searchBufIndex]) |
||||||
|
if err != nil { |
||||||
|
fmt.Println("Error in Search clSetKernelArg : ", err) |
||||||
|
return 0, []byte{0} |
||||||
|
} |
||||||
|
err = d.searchKernel.SetArg(3, nonce) |
||||||
|
if err != nil { |
||||||
|
fmt.Println("Error in Search clSetKernelArg : ", err) |
||||||
|
return 0, []byte{0} |
||||||
|
} |
||||||
|
|
||||||
|
// execute kernel
|
||||||
|
_, err := d.queue.EnqueueNDRangeKernel( |
||||||
|
d.searchKernel, |
||||||
|
[]int{0}, |
||||||
|
[]int{globalWorkSize}, |
||||||
|
[]int{d.workGroupSize}, |
||||||
|
nil) |
||||||
|
if err != nil { |
||||||
|
fmt.Println("Error in Search clEnqueueNDRangeKernel : ", err) |
||||||
|
return 0, []byte{0} |
||||||
|
} |
||||||
|
|
||||||
|
pending = append(pending, pendingSearch{bufIndex: searchBufIndex, startNonce: nonce}) |
||||||
|
searchBufIndex = (searchBufIndex + 1) % searchBufSize |
||||||
|
|
||||||
|
if len(pending) == searchBufSize { |
||||||
|
p = &(pending[searchBufIndex]) |
||||||
|
cres, _, err := d.queue.EnqueueMapBuffer(d.searchBuffers[p.bufIndex], true, |
||||||
|
cl.MapFlagRead, 0, (1+maxSearchResults)*SIZEOF_UINT32, |
||||||
|
nil) |
||||||
|
if err != nil { |
||||||
|
fmt.Println("Error in Search clEnqueueMapBuffer: ", err) |
||||||
|
return 0, []byte{0} |
||||||
|
} |
||||||
|
|
||||||
|
results := cres.ByteSlice() |
||||||
|
nfound := binary.LittleEndian.Uint32(results) |
||||||
|
nfound = uint32(math.Min(float64(nfound), float64(maxSearchResults))) |
||||||
|
// OpenCL returns the offsets from the start nonce
|
||||||
|
for i := uint32(0); i < nfound; i++ { |
||||||
|
lo := (i + 1) * SIZEOF_UINT32 |
||||||
|
hi := (i + 2) * SIZEOF_UINT32 |
||||||
|
upperNonce := uint64(binary.LittleEndian.Uint32(results[lo:hi])) |
||||||
|
checkNonce = p.startNonce + upperNonce |
||||||
|
if checkNonce != 0 { |
||||||
|
cn := C.uint64_t(checkNonce) |
||||||
|
ds := C.uint64_t(c.dagSize) |
||||||
|
// We verify that the nonce is indeed a solution by
|
||||||
|
// executing the Ethash verification function (on the CPU).
|
||||||
|
ret := C.ethash_light_compute_internal(c.ethash.Light.current.ptr, ds, hashToH256(headerHash), cn) |
||||||
|
// TODO: return result first
|
||||||
|
if ret.success && h256ToHash(ret.result).Big().Cmp(target256) <= 0 { |
||||||
|
_, err = d.queue.EnqueueUnmapMemObject(d.searchBuffers[p.bufIndex], cres, nil) |
||||||
|
if err != nil { |
||||||
|
fmt.Println("Error in Search clEnqueueUnmapMemObject: ", err) |
||||||
|
} |
||||||
|
if d.openCL12 { |
||||||
|
err = cl.WaitForEvents([]*cl.Event{preReturnEvent}) |
||||||
|
if err != nil { |
||||||
|
fmt.Println("Error in Search WaitForEvents: ", err) |
||||||
|
} |
||||||
|
} |
||||||
|
return checkNonce, C.GoBytes(unsafe.Pointer(&ret.mix_hash), C.int(32)) |
||||||
|
} |
||||||
|
|
||||||
|
_, err := d.queue.EnqueueWriteBuffer(d.searchBuffers[p.bufIndex], false, 0, 4, unsafe.Pointer(&zero), nil) |
||||||
|
if err != nil { |
||||||
|
fmt.Println("Error in Search cl: EnqueueWriteBuffer", err) |
||||||
|
return 0, []byte{0} |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
_, err = d.queue.EnqueueUnmapMemObject(d.searchBuffers[p.bufIndex], cres, nil) |
||||||
|
if err != nil { |
||||||
|
fmt.Println("Error in Search clEnqueueUnMapMemObject: ", err) |
||||||
|
return 0, []byte{0} |
||||||
|
} |
||||||
|
pending = append(pending[:searchBufIndex], pending[searchBufIndex+1:]...) |
||||||
|
} |
||||||
|
} |
||||||
|
if d.openCL12 { |
||||||
|
err := cl.WaitForEvents([]*cl.Event{preReturnEvent}) |
||||||
|
if err != nil { |
||||||
|
fmt.Println("Error in Search clWaitForEvents: ", err) |
||||||
|
return 0, []byte{0} |
||||||
|
} |
||||||
|
} |
||||||
|
return 0, []byte{0} |
||||||
|
} |
||||||
|
|
||||||
|
func (c *OpenCLMiner) Verify(block pow.Block) bool { |
||||||
|
return c.ethash.Light.Verify(block) |
||||||
|
} |
||||||
|
func (c *OpenCLMiner) GetHashrate() int64 { |
||||||
|
return int64(atomic.LoadInt32(&c.hashRate)) |
||||||
|
} |
||||||
|
func (c *OpenCLMiner) Turbo(on bool) { |
||||||
|
// This is GPU mining. Always be turbo.
|
||||||
|
} |
||||||
|
|
||||||
|
func replaceWords(text string, kvs map[string]string) string { |
||||||
|
for k, v := range kvs { |
||||||
|
text = strings.Replace(text, k, v, -1) |
||||||
|
} |
||||||
|
return text |
||||||
|
} |
||||||
|
|
||||||
|
func logErr(err error) { |
||||||
|
if err != nil { |
||||||
|
fmt.Println("Error in OpenCL call:", err) |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
func argErr(err error) error { |
||||||
|
return fmt.Errorf("arg err: %v", err) |
||||||
|
} |
600
Godeps/_workspace/src/github.com/ethereum/ethash/ethash_opencl_kernel_go_str.go
generated
vendored
600
Godeps/_workspace/src/github.com/ethereum/ethash/ethash_opencl_kernel_go_str.go
generated
vendored
@ -0,0 +1,600 @@ |
|||||||
|
package ethash |
||||||
|
|
||||||
|
/* DO NOT EDIT!!! |
||||||
|
|
||||||
|
This code is version controlled at |
||||||
|
https://github.com/ethereum/cpp-ethereum/blob/develop/libethash-cl/ethash_cl_miner_kernel.cl
|
||||||
|
|
||||||
|
If needed change it there first, then copy over here. |
||||||
|
*/ |
||||||
|
|
||||||
|
const kernel = ` |
||||||
|
// author Tim Hughes <tim@twistedfury.com>
|
||||||
|
// Tested on Radeon HD 7850
|
||||||
|
// Hashrate: 15940347 hashes/s
|
||||||
|
// Bandwidth: 124533 MB/s
|
||||||
|
// search kernel should fit in <= 84 VGPRS (3 wavefronts)
|
||||||
|
|
||||||
|
#define THREADS_PER_HASH (128 / 16) |
||||||
|
#define HASHES_PER_LOOP (GROUP_SIZE / THREADS_PER_HASH) |
||||||
|
|
||||||
|
#define FNV_PRIME 0x01000193 |
||||||
|
|
||||||
|
__constant uint2 const Keccak_f1600_RC[24] = { |
||||||
|
(uint2)(0x00000001, 0x00000000), |
||||||
|
(uint2)(0x00008082, 0x00000000), |
||||||
|
(uint2)(0x0000808a, 0x80000000), |
||||||
|
(uint2)(0x80008000, 0x80000000), |
||||||
|
(uint2)(0x0000808b, 0x00000000), |
||||||
|
(uint2)(0x80000001, 0x00000000), |
||||||
|
(uint2)(0x80008081, 0x80000000), |
||||||
|
(uint2)(0x00008009, 0x80000000), |
||||||
|
(uint2)(0x0000008a, 0x00000000), |
||||||
|
(uint2)(0x00000088, 0x00000000), |
||||||
|
(uint2)(0x80008009, 0x00000000), |
||||||
|
(uint2)(0x8000000a, 0x00000000), |
||||||
|
(uint2)(0x8000808b, 0x00000000), |
||||||
|
(uint2)(0x0000008b, 0x80000000), |
||||||
|
(uint2)(0x00008089, 0x80000000), |
||||||
|
(uint2)(0x00008003, 0x80000000), |
||||||
|
(uint2)(0x00008002, 0x80000000), |
||||||
|
(uint2)(0x00000080, 0x80000000), |
||||||
|
(uint2)(0x0000800a, 0x00000000), |
||||||
|
(uint2)(0x8000000a, 0x80000000), |
||||||
|
(uint2)(0x80008081, 0x80000000), |
||||||
|
(uint2)(0x00008080, 0x80000000), |
||||||
|
(uint2)(0x80000001, 0x00000000), |
||||||
|
(uint2)(0x80008008, 0x80000000), |
||||||
|
}; |
||||||
|
|
||||||
|
void keccak_f1600_round(uint2* a, uint r, uint out_size) |
||||||
|
{ |
||||||
|
#if !__ENDIAN_LITTLE__ |
||||||
|
for (uint i = 0; i != 25; ++i) |
||||||
|
a[i] = a[i].yx; |
||||||
|
#endif |
||||||
|
|
||||||
|
uint2 b[25]; |
||||||
|
uint2 t; |
||||||
|
|
||||||
|
// Theta
|
||||||
|
b[0] = a[0] ^ a[5] ^ a[10] ^ a[15] ^ a[20]; |
||||||
|
b[1] = a[1] ^ a[6] ^ a[11] ^ a[16] ^ a[21]; |
||||||
|
b[2] = a[2] ^ a[7] ^ a[12] ^ a[17] ^ a[22]; |
||||||
|
b[3] = a[3] ^ a[8] ^ a[13] ^ a[18] ^ a[23]; |
||||||
|
b[4] = a[4] ^ a[9] ^ a[14] ^ a[19] ^ a[24]; |
||||||
|
t = b[4] ^ (uint2)(b[1].x << 1 | b[1].y >> 31, b[1].y << 1 | b[1].x >> 31); |
||||||
|
a[0] ^= t; |
||||||
|
a[5] ^= t; |
||||||
|
a[10] ^= t; |
||||||
|
a[15] ^= t; |
||||||
|
a[20] ^= t; |
||||||
|
t = b[0] ^ (uint2)(b[2].x << 1 | b[2].y >> 31, b[2].y << 1 | b[2].x >> 31); |
||||||
|
a[1] ^= t; |
||||||
|
a[6] ^= t; |
||||||
|
a[11] ^= t; |
||||||
|
a[16] ^= t; |
||||||
|
a[21] ^= t; |
||||||
|
t = b[1] ^ (uint2)(b[3].x << 1 | b[3].y >> 31, b[3].y << 1 | b[3].x >> 31); |
||||||
|
a[2] ^= t; |
||||||
|
a[7] ^= t; |
||||||
|
a[12] ^= t; |
||||||
|
a[17] ^= t; |
||||||
|
a[22] ^= t; |
||||||
|
t = b[2] ^ (uint2)(b[4].x << 1 | b[4].y >> 31, b[4].y << 1 | b[4].x >> 31); |
||||||
|
a[3] ^= t; |
||||||
|
a[8] ^= t; |
||||||
|
a[13] ^= t; |
||||||
|
a[18] ^= t; |
||||||
|
a[23] ^= t; |
||||||
|
t = b[3] ^ (uint2)(b[0].x << 1 | b[0].y >> 31, b[0].y << 1 | b[0].x >> 31); |
||||||
|
a[4] ^= t; |
||||||
|
a[9] ^= t; |
||||||
|
a[14] ^= t; |
||||||
|
a[19] ^= t; |
||||||
|
a[24] ^= t; |
||||||
|
|
||||||
|
// Rho Pi
|
||||||
|
b[0] = a[0]; |
||||||
|
b[10] = (uint2)(a[1].x << 1 | a[1].y >> 31, a[1].y << 1 | a[1].x >> 31); |
||||||
|
b[7] = (uint2)(a[10].x << 3 | a[10].y >> 29, a[10].y << 3 | a[10].x >> 29); |
||||||
|
b[11] = (uint2)(a[7].x << 6 | a[7].y >> 26, a[7].y << 6 | a[7].x >> 26); |
||||||
|
b[17] = (uint2)(a[11].x << 10 | a[11].y >> 22, a[11].y << 10 | a[11].x >> 22); |
||||||
|
b[18] = (uint2)(a[17].x << 15 | a[17].y >> 17, a[17].y << 15 | a[17].x >> 17); |
||||||
|
b[3] = (uint2)(a[18].x << 21 | a[18].y >> 11, a[18].y << 21 | a[18].x >> 11); |
||||||
|
b[5] = (uint2)(a[3].x << 28 | a[3].y >> 4, a[3].y << 28 | a[3].x >> 4); |
||||||
|
b[16] = (uint2)(a[5].y << 4 | a[5].x >> 28, a[5].x << 4 | a[5].y >> 28); |
||||||
|
b[8] = (uint2)(a[16].y << 13 | a[16].x >> 19, a[16].x << 13 | a[16].y >> 19); |
||||||
|
b[21] = (uint2)(a[8].y << 23 | a[8].x >> 9, a[8].x << 23 | a[8].y >> 9); |
||||||
|
b[24] = (uint2)(a[21].x << 2 | a[21].y >> 30, a[21].y << 2 | a[21].x >> 30); |
||||||
|
b[4] = (uint2)(a[24].x << 14 | a[24].y >> 18, a[24].y << 14 | a[24].x >> 18); |
||||||
|
b[15] = (uint2)(a[4].x << 27 | a[4].y >> 5, a[4].y << 27 | a[4].x >> 5); |
||||||
|
b[23] = (uint2)(a[15].y << 9 | a[15].x >> 23, a[15].x << 9 | a[15].y >> 23); |
||||||
|
b[19] = (uint2)(a[23].y << 24 | a[23].x >> 8, a[23].x << 24 | a[23].y >> 8); |
||||||
|
b[13] = (uint2)(a[19].x << 8 | a[19].y >> 24, a[19].y << 8 | a[19].x >> 24); |
||||||
|
b[12] = (uint2)(a[13].x << 25 | a[13].y >> 7, a[13].y << 25 | a[13].x >> 7); |
||||||
|
b[2] = (uint2)(a[12].y << 11 | a[12].x >> 21, a[12].x << 11 | a[12].y >> 21); |
||||||
|
b[20] = (uint2)(a[2].y << 30 | a[2].x >> 2, a[2].x << 30 | a[2].y >> 2); |
||||||
|
b[14] = (uint2)(a[20].x << 18 | a[20].y >> 14, a[20].y << 18 | a[20].x >> 14); |
||||||
|
b[22] = (uint2)(a[14].y << 7 | a[14].x >> 25, a[14].x << 7 | a[14].y >> 25); |
||||||
|
b[9] = (uint2)(a[22].y << 29 | a[22].x >> 3, a[22].x << 29 | a[22].y >> 3); |
||||||
|
b[6] = (uint2)(a[9].x << 20 | a[9].y >> 12, a[9].y << 20 | a[9].x >> 12); |
||||||
|
b[1] = (uint2)(a[6].y << 12 | a[6].x >> 20, a[6].x << 12 | a[6].y >> 20); |
||||||
|
|
||||||
|
// Chi
|
||||||
|
a[0] = bitselect(b[0] ^ b[2], b[0], b[1]); |
||||||
|
a[1] = bitselect(b[1] ^ b[3], b[1], b[2]); |
||||||
|
a[2] = bitselect(b[2] ^ b[4], b[2], b[3]); |
||||||
|
a[3] = bitselect(b[3] ^ b[0], b[3], b[4]); |
||||||
|
if (out_size >= 4) |
||||||
|
{ |
||||||
|
a[4] = bitselect(b[4] ^ b[1], b[4], b[0]); |
||||||
|
a[5] = bitselect(b[5] ^ b[7], b[5], b[6]); |
||||||
|
a[6] = bitselect(b[6] ^ b[8], b[6], b[7]); |
||||||
|
a[7] = bitselect(b[7] ^ b[9], b[7], b[8]); |
||||||
|
a[8] = bitselect(b[8] ^ b[5], b[8], b[9]); |
||||||
|
if (out_size >= 8) |
||||||
|
{ |
||||||
|
a[9] = bitselect(b[9] ^ b[6], b[9], b[5]); |
||||||
|
a[10] = bitselect(b[10] ^ b[12], b[10], b[11]); |
||||||
|
a[11] = bitselect(b[11] ^ b[13], b[11], b[12]); |
||||||
|
a[12] = bitselect(b[12] ^ b[14], b[12], b[13]); |
||||||
|
a[13] = bitselect(b[13] ^ b[10], b[13], b[14]); |
||||||
|
a[14] = bitselect(b[14] ^ b[11], b[14], b[10]); |
||||||
|
a[15] = bitselect(b[15] ^ b[17], b[15], b[16]); |
||||||
|
a[16] = bitselect(b[16] ^ b[18], b[16], b[17]); |
||||||
|
a[17] = bitselect(b[17] ^ b[19], b[17], b[18]); |
||||||
|
a[18] = bitselect(b[18] ^ b[15], b[18], b[19]); |
||||||
|
a[19] = bitselect(b[19] ^ b[16], b[19], b[15]); |
||||||
|
a[20] = bitselect(b[20] ^ b[22], b[20], b[21]); |
||||||
|
a[21] = bitselect(b[21] ^ b[23], b[21], b[22]); |
||||||
|
a[22] = bitselect(b[22] ^ b[24], b[22], b[23]); |
||||||
|
a[23] = bitselect(b[23] ^ b[20], b[23], b[24]); |
||||||
|
a[24] = bitselect(b[24] ^ b[21], b[24], b[20]); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Iota
|
||||||
|
a[0] ^= Keccak_f1600_RC[r]; |
||||||
|
|
||||||
|
#if !__ENDIAN_LITTLE__ |
||||||
|
for (uint i = 0; i != 25; ++i) |
||||||
|
a[i] = a[i].yx; |
||||||
|
#endif |
||||||
|
} |
||||||
|
|
||||||
|
void keccak_f1600_no_absorb(ulong* a, uint in_size, uint out_size, uint isolate) |
||||||
|
{ |
||||||
|
for (uint i = in_size; i != 25; ++i) |
||||||
|
{ |
||||||
|
a[i] = 0; |
||||||
|
} |
||||||
|
#if __ENDIAN_LITTLE__ |
||||||
|
a[in_size] ^= 0x0000000000000001; |
||||||
|
a[24-out_size*2] ^= 0x8000000000000000; |
||||||
|
#else |
||||||
|
a[in_size] ^= 0x0100000000000000; |
||||||
|
a[24-out_size*2] ^= 0x0000000000000080; |
||||||
|
#endif |
||||||
|
|
||||||
|
// Originally I unrolled the first and last rounds to interface
|
||||||
|
// better with surrounding code, however I haven't done this
|
||||||
|
// without causing the AMD compiler to blow up the VGPR usage.
|
||||||
|
uint r = 0; |
||||||
|
do |
||||||
|
{ |
||||||
|
// This dynamic branch stops the AMD compiler unrolling the loop
|
||||||
|
// and additionally saves about 33% of the VGPRs, enough to gain another
|
||||||
|
// wavefront. Ideally we'd get 4 in flight, but 3 is the best I can
|
||||||
|
// massage out of the compiler. It doesn't really seem to matter how
|
||||||
|
// much we try and help the compiler save VGPRs because it seems to throw
|
||||||
|
// that information away, hence the implementation of keccak here
|
||||||
|
// doesn't bother.
|
||||||
|
if (isolate) |
||||||
|
{ |
||||||
|
keccak_f1600_round((uint2*)a, r++, 25); |
||||||
|
} |
||||||
|
} |
||||||
|
while (r < 23); |
||||||
|
|
||||||
|
// final round optimised for digest size
|
||||||
|
keccak_f1600_round((uint2*)a, r++, out_size); |
||||||
|
} |
||||||
|
|
||||||
|
#define copy(dst, src, count) for (uint i = 0; i != count; ++i) { (dst)[i] = (src)[i]; } |
||||||
|
|
||||||
|
#define countof(x) (sizeof(x) / sizeof(x[0])) |
||||||
|
|
||||||
|
uint fnv(uint x, uint y) |
||||||
|
{ |
||||||
|
return x * FNV_PRIME ^ y; |
||||||
|
} |
||||||
|
|
||||||
|
uint4 fnv4(uint4 x, uint4 y) |
||||||
|
{ |
||||||
|
return x * FNV_PRIME ^ y; |
||||||
|
} |
||||||
|
|
||||||
|
uint fnv_reduce(uint4 v) |
||||||
|
{ |
||||||
|
return fnv(fnv(fnv(v.x, v.y), v.z), v.w); |
||||||
|
} |
||||||
|
|
||||||
|
typedef union |
||||||
|
{ |
||||||
|
ulong ulongs[32 / sizeof(ulong)]; |
||||||
|
uint uints[32 / sizeof(uint)]; |
||||||
|
} hash32_t; |
||||||
|
|
||||||
|
typedef union |
||||||
|
{ |
||||||
|
ulong ulongs[64 / sizeof(ulong)]; |
||||||
|
uint4 uint4s[64 / sizeof(uint4)]; |
||||||
|
} hash64_t; |
||||||
|
|
||||||
|
typedef union |
||||||
|
{ |
||||||
|
uint uints[128 / sizeof(uint)]; |
||||||
|
uint4 uint4s[128 / sizeof(uint4)]; |
||||||
|
} hash128_t; |
||||||
|
|
||||||
|
hash64_t init_hash(__constant hash32_t const* header, ulong nonce, uint isolate) |
||||||
|
{ |
||||||
|
hash64_t init; |
||||||
|
uint const init_size = countof(init.ulongs); |
||||||
|
uint const hash_size = countof(header->ulongs); |
||||||
|
|
||||||
|
// sha3_512(header .. nonce)
|
||||||
|
ulong state[25]; |
||||||
|
copy(state, header->ulongs, hash_size); |
||||||
|
state[hash_size] = nonce; |
||||||
|
keccak_f1600_no_absorb(state, hash_size + 1, init_size, isolate); |
||||||
|
|
||||||
|
copy(init.ulongs, state, init_size); |
||||||
|
return init; |
||||||
|
} |
||||||
|
|
||||||
|
uint inner_loop_chunks(uint4 init, uint thread_id, __local uint* share, __global hash128_t const* g_dag, __global hash128_t const* g_dag1, __global hash128_t const* g_dag2, __global hash128_t const* g_dag3, uint isolate) |
||||||
|
{ |
||||||
|
uint4 mix = init; |
||||||
|
|
||||||
|
// share init0
|
||||||
|
if (thread_id == 0) |
||||||
|
*share = mix.x; |
||||||
|
barrier(CLK_LOCAL_MEM_FENCE); |
||||||
|
uint init0 = *share; |
||||||
|
|
||||||
|
uint a = 0; |
||||||
|
do |
||||||
|
{ |
||||||
|
bool update_share = thread_id == (a/4) % THREADS_PER_HASH; |
||||||
|
|
||||||
|
#pragma unroll |
||||||
|
for (uint i = 0; i != 4; ++i) |
||||||
|
{ |
||||||
|
if (update_share) |
||||||
|
{ |
||||||
|
uint m[4] = { mix.x, mix.y, mix.z, mix.w }; |
||||||
|
*share = fnv(init0 ^ (a+i), m[i]) % DAG_SIZE; |
||||||
|
} |
||||||
|
barrier(CLK_LOCAL_MEM_FENCE); |
||||||
|
|
||||||
|
mix = fnv4(mix, *share>=3 * DAG_SIZE / 4 ? g_dag3[*share - 3 * DAG_SIZE / 4].uint4s[thread_id] : *share>=DAG_SIZE / 2 ? g_dag2[*share - DAG_SIZE / 2].uint4s[thread_id] : *share>=DAG_SIZE / 4 ? g_dag1[*share - DAG_SIZE / 4].uint4s[thread_id]:g_dag[*share].uint4s[thread_id]); |
||||||
|
} |
||||||
|
} while ((a += 4) != (ACCESSES & isolate)); |
||||||
|
|
||||||
|
return fnv_reduce(mix); |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
uint inner_loop(uint4 init, uint thread_id, __local uint* share, __global hash128_t const* g_dag, uint isolate) |
||||||
|
{ |
||||||
|
uint4 mix = init; |
||||||
|
|
||||||
|
// share init0
|
||||||
|
if (thread_id == 0) |
||||||
|
*share = mix.x; |
||||||
|
barrier(CLK_LOCAL_MEM_FENCE); |
||||||
|
uint init0 = *share; |
||||||
|
|
||||||
|
uint a = 0; |
||||||
|
do |
||||||
|
{ |
||||||
|
bool update_share = thread_id == (a/4) % THREADS_PER_HASH; |
||||||
|
|
||||||
|
#pragma unroll |
||||||
|
for (uint i = 0; i != 4; ++i) |
||||||
|
{ |
||||||
|
if (update_share) |
||||||
|
{ |
||||||
|
uint m[4] = { mix.x, mix.y, mix.z, mix.w }; |
||||||
|
*share = fnv(init0 ^ (a+i), m[i]) % DAG_SIZE; |
||||||
|
} |
||||||
|
barrier(CLK_LOCAL_MEM_FENCE); |
||||||
|
|
||||||
|
mix = fnv4(mix, g_dag[*share].uint4s[thread_id]); |
||||||
|
} |
||||||
|
} |
||||||
|
while ((a += 4) != (ACCESSES & isolate)); |
||||||
|
|
||||||
|
return fnv_reduce(mix); |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
hash32_t final_hash(hash64_t const* init, hash32_t const* mix, uint isolate) |
||||||
|
{ |
||||||
|
ulong state[25]; |
||||||
|
|
||||||
|
hash32_t hash; |
||||||
|
uint const hash_size = countof(hash.ulongs); |
||||||
|
uint const init_size = countof(init->ulongs); |
||||||
|
uint const mix_size = countof(mix->ulongs); |
||||||
|
|
||||||
|
// keccak_256(keccak_512(header..nonce) .. mix);
|
||||||
|
copy(state, init->ulongs, init_size); |
||||||
|
copy(state + init_size, mix->ulongs, mix_size); |
||||||
|
keccak_f1600_no_absorb(state, init_size+mix_size, hash_size, isolate); |
||||||
|
|
||||||
|
// copy out
|
||||||
|
copy(hash.ulongs, state, hash_size); |
||||||
|
return hash; |
||||||
|
} |
||||||
|
|
||||||
|
hash32_t compute_hash_simple( |
||||||
|
__constant hash32_t const* g_header, |
||||||
|
__global hash128_t const* g_dag, |
||||||
|
ulong nonce, |
||||||
|
uint isolate |
||||||
|
) |
||||||
|
{ |
||||||
|
hash64_t init = init_hash(g_header, nonce, isolate); |
||||||
|
|
||||||
|
hash128_t mix; |
||||||
|
for (uint i = 0; i != countof(mix.uint4s); ++i) |
||||||
|
{ |
||||||
|
mix.uint4s[i] = init.uint4s[i % countof(init.uint4s)]; |
||||||
|
} |
||||||
|
|
||||||
|
uint mix_val = mix.uints[0]; |
||||||
|
uint init0 = mix.uints[0]; |
||||||
|
uint a = 0; |
||||||
|
do |
||||||
|
{ |
||||||
|
uint pi = fnv(init0 ^ a, mix_val) % DAG_SIZE; |
||||||
|
uint n = (a+1) % countof(mix.uints); |
||||||
|
|
||||||
|
#pragma unroll |
||||||
|
for (uint i = 0; i != countof(mix.uints); ++i) |
||||||
|
{ |
||||||
|
mix.uints[i] = fnv(mix.uints[i], g_dag[pi].uints[i]); |
||||||
|
mix_val = i == n ? mix.uints[i] : mix_val; |
||||||
|
} |
||||||
|
} |
||||||
|
while (++a != (ACCESSES & isolate)); |
||||||
|
|
||||||
|
// reduce to output
|
||||||
|
hash32_t fnv_mix; |
||||||
|
for (uint i = 0; i != countof(fnv_mix.uints); ++i) |
||||||
|
{ |
||||||
|
fnv_mix.uints[i] = fnv_reduce(mix.uint4s[i]); |
||||||
|
} |
||||||
|
|
||||||
|
return final_hash(&init, &fnv_mix, isolate); |
||||||
|
} |
||||||
|
|
||||||
|
typedef union |
||||||
|
{ |
||||||
|
struct |
||||||
|
{ |
||||||
|
hash64_t init; |
||||||
|
uint pad; // avoid lds bank conflicts
|
||||||
|
}; |
||||||
|
hash32_t mix; |
||||||
|
} compute_hash_share; |
||||||
|
|
||||||
|
|
||||||
|
hash32_t compute_hash( |
||||||
|
__local compute_hash_share* share, |
||||||
|
__constant hash32_t const* g_header, |
||||||
|
__global hash128_t const* g_dag, |
||||||
|
ulong nonce, |
||||||
|
uint isolate |
||||||
|
) |
||||||
|
{ |
||||||
|
uint const gid = get_global_id(0); |
||||||
|
|
||||||
|
// Compute one init hash per work item.
|
||||||
|
hash64_t init = init_hash(g_header, nonce, isolate); |
||||||
|
|
||||||
|
// Threads work together in this phase in groups of 8.
|
||||||
|
uint const thread_id = gid % THREADS_PER_HASH; |
||||||
|
uint const hash_id = (gid % GROUP_SIZE) / THREADS_PER_HASH; |
||||||
|
|
||||||
|
hash32_t mix; |
||||||
|
uint i = 0; |
||||||
|
do |
||||||
|
{ |
||||||
|
// share init with other threads
|
||||||
|
if (i == thread_id) |
||||||
|
share[hash_id].init = init; |
||||||
|
barrier(CLK_LOCAL_MEM_FENCE); |
||||||
|
|
||||||
|
uint4 thread_init = share[hash_id].init.uint4s[thread_id % (64 / sizeof(uint4))]; |
||||||
|
barrier(CLK_LOCAL_MEM_FENCE); |
||||||
|
|
||||||
|
uint thread_mix = inner_loop(thread_init, thread_id, share[hash_id].mix.uints, g_dag, isolate); |
||||||
|
|
||||||
|
share[hash_id].mix.uints[thread_id] = thread_mix; |
||||||
|
barrier(CLK_LOCAL_MEM_FENCE); |
||||||
|
|
||||||
|
if (i == thread_id) |
||||||
|
mix = share[hash_id].mix; |
||||||
|
barrier(CLK_LOCAL_MEM_FENCE); |
||||||
|
} |
||||||
|
while (++i != (THREADS_PER_HASH & isolate)); |
||||||
|
|
||||||
|
return final_hash(&init, &mix, isolate); |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
hash32_t compute_hash_chunks( |
||||||
|
__local compute_hash_share* share, |
||||||
|
__constant hash32_t const* g_header, |
||||||
|
__global hash128_t const* g_dag, |
||||||
|
__global hash128_t const* g_dag1, |
||||||
|
__global hash128_t const* g_dag2, |
||||||
|
__global hash128_t const* g_dag3, |
||||||
|
ulong nonce, |
||||||
|
uint isolate |
||||||
|
) |
||||||
|
{ |
||||||
|
uint const gid = get_global_id(0); |
||||||
|
|
||||||
|
// Compute one init hash per work item.
|
||||||
|
hash64_t init = init_hash(g_header, nonce, isolate); |
||||||
|
|
||||||
|
// Threads work together in this phase in groups of 8.
|
||||||
|
uint const thread_id = gid % THREADS_PER_HASH; |
||||||
|
uint const hash_id = (gid % GROUP_SIZE) / THREADS_PER_HASH; |
||||||
|
|
||||||
|
hash32_t mix; |
||||||
|
uint i = 0; |
||||||
|
do |
||||||
|
{ |
||||||
|
// share init with other threads
|
||||||
|
if (i == thread_id) |
||||||
|
share[hash_id].init = init; |
||||||
|
barrier(CLK_LOCAL_MEM_FENCE); |
||||||
|
|
||||||
|
uint4 thread_init = share[hash_id].init.uint4s[thread_id % (64 / sizeof(uint4))]; |
||||||
|
barrier(CLK_LOCAL_MEM_FENCE); |
||||||
|
|
||||||
|
uint thread_mix = inner_loop_chunks(thread_init, thread_id, share[hash_id].mix.uints, g_dag, g_dag1, g_dag2, g_dag3, isolate); |
||||||
|
|
||||||
|
share[hash_id].mix.uints[thread_id] = thread_mix; |
||||||
|
barrier(CLK_LOCAL_MEM_FENCE); |
||||||
|
|
||||||
|
if (i == thread_id) |
||||||
|
mix = share[hash_id].mix; |
||||||
|
barrier(CLK_LOCAL_MEM_FENCE); |
||||||
|
} |
||||||
|
while (++i != (THREADS_PER_HASH & isolate)); |
||||||
|
|
||||||
|
return final_hash(&init, &mix, isolate); |
||||||
|
} |
||||||
|
|
||||||
|
__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1))) |
||||||
|
__kernel void ethash_hash_simple( |
||||||
|
__global hash32_t* g_hashes, |
||||||
|
__constant hash32_t const* g_header, |
||||||
|
__global hash128_t const* g_dag, |
||||||
|
ulong start_nonce, |
||||||
|
uint isolate |
||||||
|
) |
||||||
|
{ |
||||||
|
uint const gid = get_global_id(0); |
||||||
|
g_hashes[gid] = compute_hash_simple(g_header, g_dag, start_nonce + gid, isolate); |
||||||
|
} |
||||||
|
|
||||||
|
__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1))) |
||||||
|
__kernel void ethash_search_simple( |
||||||
|
__global volatile uint* restrict g_output, |
||||||
|
__constant hash32_t const* g_header, |
||||||
|
__global hash128_t const* g_dag, |
||||||
|
ulong start_nonce, |
||||||
|
ulong target, |
||||||
|
uint isolate |
||||||
|
) |
||||||
|
{ |
||||||
|
uint const gid = get_global_id(0); |
||||||
|
hash32_t hash = compute_hash_simple(g_header, g_dag, start_nonce + gid, isolate); |
||||||
|
|
||||||
|
if (hash.ulongs[countof(hash.ulongs)-1] < target) |
||||||
|
{ |
||||||
|
uint slot = min(convert_uint(MAX_OUTPUTS), convert_uint(atomic_inc(&g_output[0]) + 1)); |
||||||
|
g_output[slot] = gid; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1))) |
||||||
|
__kernel void ethash_hash( |
||||||
|
__global hash32_t* g_hashes, |
||||||
|
__constant hash32_t const* g_header, |
||||||
|
__global hash128_t const* g_dag, |
||||||
|
ulong start_nonce, |
||||||
|
uint isolate |
||||||
|
) |
||||||
|
{ |
||||||
|
__local compute_hash_share share[HASHES_PER_LOOP]; |
||||||
|
|
||||||
|
uint const gid = get_global_id(0); |
||||||
|
g_hashes[gid] = compute_hash(share, g_header, g_dag, start_nonce + gid, isolate); |
||||||
|
} |
||||||
|
|
||||||
|
__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1))) |
||||||
|
__kernel void ethash_search( |
||||||
|
__global volatile uint* restrict g_output, |
||||||
|
__constant hash32_t const* g_header, |
||||||
|
__global hash128_t const* g_dag, |
||||||
|
ulong start_nonce, |
||||||
|
ulong target, |
||||||
|
uint isolate |
||||||
|
) |
||||||
|
{ |
||||||
|
__local compute_hash_share share[HASHES_PER_LOOP]; |
||||||
|
|
||||||
|
uint const gid = get_global_id(0); |
||||||
|
hash32_t hash = compute_hash(share, g_header, g_dag, start_nonce + gid, isolate); |
||||||
|
|
||||||
|
if (as_ulong(as_uchar8(hash.ulongs[0]).s76543210) < target) |
||||||
|
{ |
||||||
|
uint slot = min((uint)MAX_OUTPUTS, atomic_inc(&g_output[0]) + 1); |
||||||
|
g_output[slot] = gid; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1))) |
||||||
|
__kernel void ethash_hash_chunks( |
||||||
|
__global hash32_t* g_hashes, |
||||||
|
__constant hash32_t const* g_header, |
||||||
|
__global hash128_t const* g_dag, |
||||||
|
__global hash128_t const* g_dag1, |
||||||
|
__global hash128_t const* g_dag2, |
||||||
|
__global hash128_t const* g_dag3, |
||||||
|
ulong start_nonce, |
||||||
|
uint isolate |
||||||
|
) |
||||||
|
{ |
||||||
|
__local compute_hash_share share[HASHES_PER_LOOP]; |
||||||
|
|
||||||
|
uint const gid = get_global_id(0); |
||||||
|
g_hashes[gid] = compute_hash_chunks(share, g_header, g_dag, g_dag1, g_dag2, g_dag3,start_nonce + gid, isolate); |
||||||
|
} |
||||||
|
|
||||||
|
__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1))) |
||||||
|
__kernel void ethash_search_chunks( |
||||||
|
__global volatile uint* restrict g_output, |
||||||
|
__constant hash32_t const* g_header, |
||||||
|
__global hash128_t const* g_dag, |
||||||
|
__global hash128_t const* g_dag1, |
||||||
|
__global hash128_t const* g_dag2, |
||||||
|
__global hash128_t const* g_dag3, |
||||||
|
ulong start_nonce, |
||||||
|
ulong target, |
||||||
|
uint isolate |
||||||
|
) |
||||||
|
{ |
||||||
|
__local compute_hash_share share[HASHES_PER_LOOP]; |
||||||
|
|
||||||
|
uint const gid = get_global_id(0); |
||||||
|
hash32_t hash = compute_hash_chunks(share, g_header, g_dag, g_dag1, g_dag2, g_dag3, start_nonce + gid, isolate); |
||||||
|
|
||||||
|
if (as_ulong(as_uchar8(hash.ulongs[0]).s76543210) < target) |
||||||
|
{ |
||||||
|
uint slot = min(convert_uint(MAX_OUTPUTS), convert_uint(atomic_inc(&g_output[0]) + 1)); |
||||||
|
g_output[slot] = gid; |
||||||
|
} |
||||||
|
} |
||||||
|
` |
@ -0,0 +1,54 @@ |
|||||||
|
// Copyright 2014 The go-ethereum Authors
|
||||||
|
// This file is part of the go-ethereum library.
|
||||||
|
//
|
||||||
|
// The go-ethereum library is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Lesser General Public License as published by
|
||||||
|
// the Free Software Foundation, either version 3 of the License, or
|
||||||
|
// (at your option) any later version.
|
||||||
|
//
|
||||||
|
// The go-ethereum library is distributed in the hope that it will be useful,
|
||||||
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
// GNU Lesser General Public License for more details.
|
||||||
|
//
|
||||||
|
// You should have received a copy of the GNU Lesser General Public License
|
||||||
|
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
// +build !opencl
|
||||||
|
|
||||||
|
package eth |
||||||
|
|
||||||
|
import ( |
||||||
|
"errors" |
||||||
|
"fmt" |
||||||
|
|
||||||
|
"github.com/ethereum/go-ethereum/logger" |
||||||
|
"github.com/ethereum/go-ethereum/logger/glog" |
||||||
|
) |
||||||
|
|
||||||
|
const disabledInfo = "Set GO_OPENCL and re-build to enable." |
||||||
|
|
||||||
|
func (s *Ethereum) StartMining(threads int, gpus string) error { |
||||||
|
eb, err := s.Etherbase() |
||||||
|
if err != nil { |
||||||
|
err = fmt.Errorf("Cannot start mining without etherbase address: %v", err) |
||||||
|
glog.V(logger.Error).Infoln(err) |
||||||
|
return err |
||||||
|
} |
||||||
|
|
||||||
|
if gpus != "" { |
||||||
|
return errors.New("GPU mining disabled. " + disabledInfo) |
||||||
|
} |
||||||
|
|
||||||
|
// CPU mining
|
||||||
|
go s.miner.Start(eb, threads) |
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
func GPUBench(gpuid uint64) { |
||||||
|
fmt.Println("GPU mining disabled. " + disabledInfo) |
||||||
|
} |
||||||
|
|
||||||
|
func PrintOpenCLDevices() { |
||||||
|
fmt.Println("OpenCL disabled. " + disabledInfo) |
||||||
|
} |
@ -0,0 +1,103 @@ |
|||||||
|
// Copyright 2014 The go-ethereum Authors
|
||||||
|
// This file is part of the go-ethereum library.
|
||||||
|
//
|
||||||
|
// The go-ethereum library is free software: you can redistribute it and/or modify
|
||||||
|
// it under the terms of the GNU Lesser General Public License as published by
|
||||||
|
// the Free Software Foundation, either version 3 of the License, or
|
||||||
|
// (at your option) any later version.
|
||||||
|
//
|
||||||
|
// The go-ethereum library is distributed in the hope that it will be useful,
|
||||||
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
// GNU Lesser General Public License for more details.
|
||||||
|
//
|
||||||
|
// You should have received a copy of the GNU Lesser General Public License
|
||||||
|
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
// +build opencl
|
||||||
|
|
||||||
|
package eth |
||||||
|
|
||||||
|
import ( |
||||||
|
"fmt" |
||||||
|
"math/big" |
||||||
|
"strconv" |
||||||
|
"strings" |
||||||
|
"time" |
||||||
|
|
||||||
|
"github.com/ethereum/ethash" |
||||||
|
"github.com/ethereum/go-ethereum/common" |
||||||
|
"github.com/ethereum/go-ethereum/core/types" |
||||||
|
"github.com/ethereum/go-ethereum/logger" |
||||||
|
"github.com/ethereum/go-ethereum/logger/glog" |
||||||
|
"github.com/ethereum/go-ethereum/miner" |
||||||
|
) |
||||||
|
|
||||||
|
func (s *Ethereum) StartMining(threads int, gpus string) error { |
||||||
|
eb, err := s.Etherbase() |
||||||
|
if err != nil { |
||||||
|
err = fmt.Errorf("Cannot start mining without etherbase address: %v", err) |
||||||
|
glog.V(logger.Error).Infoln(err) |
||||||
|
return err |
||||||
|
} |
||||||
|
|
||||||
|
// GPU mining
|
||||||
|
if gpus != "" { |
||||||
|
var ids []int |
||||||
|
for _, s := range strings.Split(gpus, ",") { |
||||||
|
i, err := strconv.Atoi(s) |
||||||
|
if err != nil { |
||||||
|
return fmt.Errorf("Invalid GPU id(s): %v", err) |
||||||
|
} |
||||||
|
if i < 0 { |
||||||
|
return fmt.Errorf("Invalid GPU id: %v", i) |
||||||
|
} |
||||||
|
ids = append(ids, i) |
||||||
|
} |
||||||
|
|
||||||
|
// TODO: re-creating miner is a bit ugly
|
||||||
|
cl := ethash.NewCL(ids) |
||||||
|
s.miner = miner.New(s, s.EventMux(), cl) |
||||||
|
go s.miner.Start(eb, len(ids)) |
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
// CPU mining
|
||||||
|
go s.miner.Start(eb, threads) |
||||||
|
return nil |
||||||
|
} |
||||||
|
|
||||||
|
func GPUBench(gpuid uint64) { |
||||||
|
e := ethash.NewCL([]int{int(gpuid)}) |
||||||
|
|
||||||
|
var h common.Hash |
||||||
|
bogoHeader := &types.Header{ |
||||||
|
ParentHash: h, |
||||||
|
Number: big.NewInt(int64(42)), |
||||||
|
Difficulty: big.NewInt(int64(999999999999999)), |
||||||
|
} |
||||||
|
bogoBlock := types.NewBlock(bogoHeader, nil, nil, nil) |
||||||
|
|
||||||
|
err := ethash.InitCL(bogoBlock.NumberU64(), e) |
||||||
|
if err != nil { |
||||||
|
fmt.Println("OpenCL init error: ", err) |
||||||
|
return |
||||||
|
} |
||||||
|
|
||||||
|
stopChan := make(chan struct{}) |
||||||
|
reportHashRate := func() { |
||||||
|
for { |
||||||
|
time.Sleep(3 * time.Second) |
||||||
|
fmt.Printf("hashes/s : %v\n", e.GetHashrate()) |
||||||
|
} |
||||||
|
} |
||||||
|
fmt.Printf("Starting benchmark (%v seconds)\n", 60) |
||||||
|
go reportHashRate() |
||||||
|
go e.Search(bogoBlock, stopChan, 0) |
||||||
|
time.Sleep(60 * time.Second) |
||||||
|
fmt.Println("OK.") |
||||||
|
} |
||||||
|
|
||||||
|
func PrintOpenCLDevices() { |
||||||
|
ethash.PrintDevices() |
||||||
|
} |
Loading…
Reference in new issue