forked from mirror/go-ethereum
parent
8b865fa9bf
commit
ec6a548ee3
@ -0,0 +1,26 @@ |
||||
/* |
||||
Package cl provides a binding to the OpenCL api. It's mostly a low-level |
||||
wrapper that avoids adding functionality while still making the interface |
||||
a little more friendly and easy to use. |
||||
|
||||
Resource life-cycle management: |
||||
|
||||
For any CL object that gets created (buffer, queue, kernel, etc..) you should |
||||
call object.Release() when finished with it to free the CL resources. This |
||||
explicitely calls the related clXXXRelease method for the type. However, |
||||
as a fallback there is a finalizer set for every resource item that takes |
||||
care of it (eventually) if Release isn't called. In this way you can have |
||||
better control over the life cycle of resources while having a fall back |
||||
to avoid leaks. This is similar to how file handles and such are handled |
||||
in the Go standard packages. |
||||
*/ |
||||
package cl |
||||
|
||||
// #include "headers/1.2/opencl.h"
|
||||
// #cgo CFLAGS: -Iheaders/1.2
|
||||
// #cgo darwin LDFLAGS: -framework OpenCL
|
||||
// #cgo linux LDFLAGS: -lOpenCL
|
||||
import "C" |
||||
import "errors" |
||||
|
||||
var ErrUnsupported = errors.New("cl: unsupported") |
@ -0,0 +1,254 @@ |
||||
package cl |
||||
|
||||
import ( |
||||
"math/rand" |
||||
"reflect" |
||||
"strings" |
||||
"testing" |
||||
) |
||||
|
||||
var kernelSource = ` |
||||
__kernel void square( |
||||
__global float* input, |
||||
__global float* output, |
||||
const unsigned int count) |
||||
{ |
||||
int i = get_global_id(0); |
||||
if(i < count) |
||||
output[i] = input[i] * input[i]; |
||||
} |
||||
` |
||||
|
||||
func getObjectStrings(object interface{}) map[string]string { |
||||
v := reflect.ValueOf(object) |
||||
t := reflect.TypeOf(object) |
||||
|
||||
strs := make(map[string]string) |
||||
|
||||
numMethods := t.NumMethod() |
||||
for i := 0; i < numMethods; i++ { |
||||
method := t.Method(i) |
||||
if method.Type.NumIn() == 1 && method.Type.NumOut() == 1 && method.Type.Out(0).Kind() == reflect.String { |
||||
// this is a string-returning method with (presumably) only a pointer receiver parameter
|
||||
// call it
|
||||
outs := v.Method(i).Call([]reflect.Value{}) |
||||
// put the result in our map
|
||||
strs[method.Name] = (outs[0].Interface()).(string) |
||||
} |
||||
} |
||||
|
||||
return strs |
||||
} |
||||
|
||||
func TestPlatformStringsContainNoNULs(t *testing.T) { |
||||
platforms, err := GetPlatforms() |
||||
if err != nil { |
||||
t.Fatalf("Failed to get platforms: %+v", err) |
||||
} |
||||
|
||||
for _, p := range platforms { |
||||
for key, value := range getObjectStrings(p) { |
||||
if strings.Contains(value, "\x00") { |
||||
t.Fatalf("platform string %q = %+q contains NUL", key, value) |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
func TestDeviceStringsContainNoNULs(t *testing.T) { |
||||
platforms, err := GetPlatforms() |
||||
if err != nil { |
||||
t.Fatalf("Failed to get platforms: %+v", err) |
||||
} |
||||
|
||||
for _, p := range platforms { |
||||
devs, err := p.GetDevices(DeviceTypeAll) |
||||
if err != nil { |
||||
t.Fatalf("Failed to get devices for platform %q: %+v", p.Name(), err) |
||||
} |
||||
|
||||
for _, d := range devs { |
||||
for key, value := range getObjectStrings(d) { |
||||
if strings.Contains(value, "\x00") { |
||||
t.Fatalf("device string %q = %+q contains NUL", key, value) |
||||
} |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
func TestHello(t *testing.T) { |
||||
var data [1024]float32 |
||||
for i := 0; i < len(data); i++ { |
||||
data[i] = rand.Float32() |
||||
} |
||||
|
||||
platforms, err := GetPlatforms() |
||||
if err != nil { |
||||
t.Fatalf("Failed to get platforms: %+v", err) |
||||
} |
||||
for i, p := range platforms { |
||||
t.Logf("Platform %d:", i) |
||||
t.Logf(" Name: %s", p.Name()) |
||||
t.Logf(" Vendor: %s", p.Vendor()) |
||||
t.Logf(" Profile: %s", p.Profile()) |
||||
t.Logf(" Version: %s", p.Version()) |
||||
t.Logf(" Extensions: %s", p.Extensions()) |
||||
} |
||||
platform := platforms[0] |
||||
|
||||
devices, err := platform.GetDevices(DeviceTypeAll) |
||||
if err != nil { |
||||
t.Fatalf("Failed to get devices: %+v", err) |
||||
} |
||||
if len(devices) == 0 { |
||||
t.Fatalf("GetDevices returned no devices") |
||||
} |
||||
deviceIndex := -1 |
||||
for i, d := range devices { |
||||
if deviceIndex < 0 && d.Type() == DeviceTypeGPU { |
||||
deviceIndex = i |
||||
} |
||||
t.Logf("Device %d (%s): %s", i, d.Type(), d.Name()) |
||||
t.Logf(" Address Bits: %d", d.AddressBits()) |
||||
t.Logf(" Available: %+v", d.Available()) |
||||
// t.Logf(" Built-In Kernels: %s", d.BuiltInKernels())
|
||||
t.Logf(" Compiler Available: %+v", d.CompilerAvailable()) |
||||
t.Logf(" Double FP Config: %s", d.DoubleFPConfig()) |
||||
t.Logf(" Driver Version: %s", d.DriverVersion()) |
||||
t.Logf(" Error Correction Supported: %+v", d.ErrorCorrectionSupport()) |
||||
t.Logf(" Execution Capabilities: %s", d.ExecutionCapabilities()) |
||||
t.Logf(" Extensions: %s", d.Extensions()) |
||||
t.Logf(" Global Memory Cache Type: %s", d.GlobalMemCacheType()) |
||||
t.Logf(" Global Memory Cacheline Size: %d KB", d.GlobalMemCachelineSize()/1024) |
||||
t.Logf(" Global Memory Size: %d MB", d.GlobalMemSize()/(1024*1024)) |
||||
t.Logf(" Half FP Config: %s", d.HalfFPConfig()) |
||||
t.Logf(" Host Unified Memory: %+v", d.HostUnifiedMemory()) |
||||
t.Logf(" Image Support: %+v", d.ImageSupport()) |
||||
t.Logf(" Image2D Max Dimensions: %d x %d", d.Image2DMaxWidth(), d.Image2DMaxHeight()) |
||||
t.Logf(" Image3D Max Dimenionns: %d x %d x %d", d.Image3DMaxWidth(), d.Image3DMaxHeight(), d.Image3DMaxDepth()) |
||||
// t.Logf(" Image Max Buffer Size: %d", d.ImageMaxBufferSize())
|
||||
// t.Logf(" Image Max Array Size: %d", d.ImageMaxArraySize())
|
||||
// t.Logf(" Linker Available: %+v", d.LinkerAvailable())
|
||||
t.Logf(" Little Endian: %+v", d.EndianLittle()) |
||||
t.Logf(" Local Mem Size Size: %d KB", d.LocalMemSize()/1024) |
||||
t.Logf(" Local Mem Type: %s", d.LocalMemType()) |
||||
t.Logf(" Max Clock Frequency: %d", d.MaxClockFrequency()) |
||||
t.Logf(" Max Compute Units: %d", d.MaxComputeUnits()) |
||||
t.Logf(" Max Constant Args: %d", d.MaxConstantArgs()) |
||||
t.Logf(" Max Constant Buffer Size: %d KB", d.MaxConstantBufferSize()/1024) |
||||
t.Logf(" Max Mem Alloc Size: %d KB", d.MaxMemAllocSize()/1024) |
||||
t.Logf(" Max Parameter Size: %d", d.MaxParameterSize()) |
||||
t.Logf(" Max Read-Image Args: %d", d.MaxReadImageArgs()) |
||||
t.Logf(" Max Samplers: %d", d.MaxSamplers()) |
||||
t.Logf(" Max Work Group Size: %d", d.MaxWorkGroupSize()) |
||||
t.Logf(" Max Work Item Dimensions: %d", d.MaxWorkItemDimensions()) |
||||
t.Logf(" Max Work Item Sizes: %d", d.MaxWorkItemSizes()) |
||||
t.Logf(" Max Write-Image Args: %d", d.MaxWriteImageArgs()) |
||||
t.Logf(" Memory Base Address Alignment: %d", d.MemBaseAddrAlign()) |
||||
t.Logf(" Native Vector Width Char: %d", d.NativeVectorWidthChar()) |
||||
t.Logf(" Native Vector Width Short: %d", d.NativeVectorWidthShort()) |
||||
t.Logf(" Native Vector Width Int: %d", d.NativeVectorWidthInt()) |
||||
t.Logf(" Native Vector Width Long: %d", d.NativeVectorWidthLong()) |
||||
t.Logf(" Native Vector Width Float: %d", d.NativeVectorWidthFloat()) |
||||
t.Logf(" Native Vector Width Double: %d", d.NativeVectorWidthDouble()) |
||||
t.Logf(" Native Vector Width Half: %d", d.NativeVectorWidthHalf()) |
||||
t.Logf(" OpenCL C Version: %s", d.OpenCLCVersion()) |
||||
// t.Logf(" Parent Device: %+v", d.ParentDevice())
|
||||
t.Logf(" Profile: %s", d.Profile()) |
||||
t.Logf(" Profiling Timer Resolution: %d", d.ProfilingTimerResolution()) |
||||
t.Logf(" Vendor: %s", d.Vendor()) |
||||
t.Logf(" Version: %s", d.Version()) |
||||
} |
||||
if deviceIndex < 0 { |
||||
deviceIndex = 0 |
||||
} |
||||
device := devices[deviceIndex] |
||||
t.Logf("Using device %d", deviceIndex) |
||||
context, err := CreateContext([]*Device{device}) |
||||
if err != nil { |
||||
t.Fatalf("CreateContext failed: %+v", err) |
||||
} |
||||
// imageFormats, err := context.GetSupportedImageFormats(0, MemObjectTypeImage2D)
|
||||
// if err != nil {
|
||||
// t.Fatalf("GetSupportedImageFormats failed: %+v", err)
|
||||
// }
|
||||
// t.Logf("Supported image formats: %+v", imageFormats)
|
||||
queue, err := context.CreateCommandQueue(device, 0) |
||||
if err != nil { |
||||
t.Fatalf("CreateCommandQueue failed: %+v", err) |
||||
} |
||||
program, err := context.CreateProgramWithSource([]string{kernelSource}) |
||||
if err != nil { |
||||
t.Fatalf("CreateProgramWithSource failed: %+v", err) |
||||
} |
||||
if err := program.BuildProgram(nil, ""); err != nil { |
||||
t.Fatalf("BuildProgram failed: %+v", err) |
||||
} |
||||
kernel, err := program.CreateKernel("square") |
||||
if err != nil { |
||||
t.Fatalf("CreateKernel failed: %+v", err) |
||||
} |
||||
for i := 0; i < 3; i++ { |
||||
name, err := kernel.ArgName(i) |
||||
if err == ErrUnsupported { |
||||
break |
||||
} else if err != nil { |
||||
t.Errorf("GetKernelArgInfo for name failed: %+v", err) |
||||
break |
||||
} else { |
||||
t.Logf("Kernel arg %d: %s", i, name) |
||||
} |
||||
} |
||||
input, err := context.CreateEmptyBuffer(MemReadOnly, 4*len(data)) |
||||
if err != nil { |
||||
t.Fatalf("CreateBuffer failed for input: %+v", err) |
||||
} |
||||
output, err := context.CreateEmptyBuffer(MemReadOnly, 4*len(data)) |
||||
if err != nil { |
||||
t.Fatalf("CreateBuffer failed for output: %+v", err) |
||||
} |
||||
if _, err := queue.EnqueueWriteBufferFloat32(input, true, 0, data[:], nil); err != nil { |
||||
t.Fatalf("EnqueueWriteBufferFloat32 failed: %+v", err) |
||||
} |
||||
if err := kernel.SetArgs(input, output, uint32(len(data))); err != nil { |
||||
t.Fatalf("SetKernelArgs failed: %+v", err) |
||||
} |
||||
|
||||
local, err := kernel.WorkGroupSize(device) |
||||
if err != nil { |
||||
t.Fatalf("WorkGroupSize failed: %+v", err) |
||||
} |
||||
t.Logf("Work group size: %d", local) |
||||
size, _ := kernel.PreferredWorkGroupSizeMultiple(nil) |
||||
t.Logf("Preferred Work Group Size Multiple: %d", size) |
||||
|
||||
global := len(data) |
||||
d := len(data) % local |
||||
if d != 0 { |
||||
global += local - d |
||||
} |
||||
if _, err := queue.EnqueueNDRangeKernel(kernel, nil, []int{global}, []int{local}, nil); err != nil { |
||||
t.Fatalf("EnqueueNDRangeKernel failed: %+v", err) |
||||
} |
||||
|
||||
if err := queue.Finish(); err != nil { |
||||
t.Fatalf("Finish failed: %+v", err) |
||||
} |
||||
|
||||
results := make([]float32, len(data)) |
||||
if _, err := queue.EnqueueReadBufferFloat32(output, true, 0, results, nil); err != nil { |
||||
t.Fatalf("EnqueueReadBufferFloat32 failed: %+v", err) |
||||
} |
||||
|
||||
correct := 0 |
||||
for i, v := range data { |
||||
if results[i] == v*v { |
||||
correct++ |
||||
} |
||||
} |
||||
|
||||
if correct != len(data) { |
||||
t.Fatalf("%d/%d correct values", correct, len(data)) |
||||
} |
||||
} |
@ -0,0 +1,161 @@ |
||||
package cl |
||||
|
||||
// #include <stdlib.h>
|
||||
// #ifdef __APPLE__
|
||||
// #include "OpenCL/opencl.h"
|
||||
// #else
|
||||
// #include "cl.h"
|
||||
// #endif
|
||||
import "C" |
||||
|
||||
import ( |
||||
"runtime" |
||||
"unsafe" |
||||
) |
||||
|
||||
const maxImageFormats = 256 |
||||
|
||||
type Context struct { |
||||
clContext C.cl_context |
||||
devices []*Device |
||||
} |
||||
|
||||
type MemObject struct { |
||||
clMem C.cl_mem |
||||
size int |
||||
} |
||||
|
||||
func releaseContext(c *Context) { |
||||
if c.clContext != nil { |
||||
C.clReleaseContext(c.clContext) |
||||
c.clContext = nil |
||||
} |
||||
} |
||||
|
||||
func releaseMemObject(b *MemObject) { |
||||
if b.clMem != nil { |
||||
C.clReleaseMemObject(b.clMem) |
||||
b.clMem = nil |
||||
} |
||||
} |
||||
|
||||
func newMemObject(mo C.cl_mem, size int) *MemObject { |
||||
memObject := &MemObject{clMem: mo, size: size} |
||||
runtime.SetFinalizer(memObject, releaseMemObject) |
||||
return memObject |
||||
} |
||||
|
||||
func (b *MemObject) Release() { |
||||
releaseMemObject(b) |
||||
} |
||||
|
||||
// TODO: properties
|
||||
func CreateContext(devices []*Device) (*Context, error) { |
||||
deviceIds := buildDeviceIdList(devices) |
||||
var err C.cl_int |
||||
clContext := C.clCreateContext(nil, C.cl_uint(len(devices)), &deviceIds[0], nil, nil, &err) |
||||
if err != C.CL_SUCCESS { |
||||
return nil, toError(err) |
||||
} |
||||
if clContext == nil { |
||||
return nil, ErrUnknown |
||||
} |
||||
context := &Context{clContext: clContext, devices: devices} |
||||
runtime.SetFinalizer(context, releaseContext) |
||||
return context, nil |
||||
} |
||||
|
||||
func (ctx *Context) GetSupportedImageFormats(flags MemFlag, imageType MemObjectType) ([]ImageFormat, error) { |
||||
var formats [maxImageFormats]C.cl_image_format |
||||
var nFormats C.cl_uint |
||||
if err := C.clGetSupportedImageFormats(ctx.clContext, C.cl_mem_flags(flags), C.cl_mem_object_type(imageType), maxImageFormats, &formats[0], &nFormats); err != C.CL_SUCCESS { |
||||
return nil, toError(err) |
||||
} |
||||
fmts := make([]ImageFormat, nFormats) |
||||
for i, f := range formats[:nFormats] { |
||||
fmts[i] = ImageFormat{ |
||||
ChannelOrder: ChannelOrder(f.image_channel_order), |
||||
ChannelDataType: ChannelDataType(f.image_channel_data_type), |
||||
} |
||||
} |
||||
return fmts, nil |
||||
} |
||||
|
||||
func (ctx *Context) CreateCommandQueue(device *Device, properties CommandQueueProperty) (*CommandQueue, error) { |
||||
var err C.cl_int |
||||
clQueue := C.clCreateCommandQueue(ctx.clContext, device.id, C.cl_command_queue_properties(properties), &err) |
||||
if err != C.CL_SUCCESS { |
||||
return nil, toError(err) |
||||
} |
||||
if clQueue == nil { |
||||
return nil, ErrUnknown |
||||
} |
||||
commandQueue := &CommandQueue{clQueue: clQueue, device: device} |
||||
runtime.SetFinalizer(commandQueue, releaseCommandQueue) |
||||
return commandQueue, nil |
||||
} |
||||
|
||||
func (ctx *Context) CreateProgramWithSource(sources []string) (*Program, error) { |
||||
cSources := make([]*C.char, len(sources)) |
||||
for i, s := range sources { |
||||
cs := C.CString(s) |
||||
cSources[i] = cs |
||||
defer C.free(unsafe.Pointer(cs)) |
||||
} |
||||
var err C.cl_int |
||||
clProgram := C.clCreateProgramWithSource(ctx.clContext, C.cl_uint(len(sources)), &cSources[0], nil, &err) |
||||
if err != C.CL_SUCCESS { |
||||
return nil, toError(err) |
||||
} |
||||
if clProgram == nil { |
||||
return nil, ErrUnknown |
||||
} |
||||
program := &Program{clProgram: clProgram, devices: ctx.devices} |
||||
runtime.SetFinalizer(program, releaseProgram) |
||||
return program, nil |
||||
} |
||||
|
||||
func (ctx *Context) CreateBufferUnsafe(flags MemFlag, size int, dataPtr unsafe.Pointer) (*MemObject, error) { |
||||
var err C.cl_int |
||||
clBuffer := C.clCreateBuffer(ctx.clContext, C.cl_mem_flags(flags), C.size_t(size), dataPtr, &err) |
||||
if err != C.CL_SUCCESS { |
||||
return nil, toError(err) |
||||
} |
||||
if clBuffer == nil { |
||||
return nil, ErrUnknown |
||||
} |
||||
return newMemObject(clBuffer, size), nil |
||||
} |
||||
|
||||
func (ctx *Context) CreateEmptyBuffer(flags MemFlag, size int) (*MemObject, error) { |
||||
return ctx.CreateBufferUnsafe(flags, size, nil) |
||||
} |
||||
|
||||
func (ctx *Context) CreateEmptyBufferFloat32(flags MemFlag, size int) (*MemObject, error) { |
||||
return ctx.CreateBufferUnsafe(flags, 4*size, nil) |
||||
} |
||||
|
||||
func (ctx *Context) CreateBuffer(flags MemFlag, data []byte) (*MemObject, error) { |
||||
return ctx.CreateBufferUnsafe(flags, len(data), unsafe.Pointer(&data[0])) |
||||
} |
||||
|
||||
//float64
|
||||
func (ctx *Context) CreateBufferFloat32(flags MemFlag, data []float32) (*MemObject, error) { |
||||
return ctx.CreateBufferUnsafe(flags, 4*len(data), unsafe.Pointer(&data[0])) |
||||
} |
||||
|
||||
func (ctx *Context) CreateUserEvent() (*Event, error) { |
||||
var err C.cl_int |
||||
clEvent := C.clCreateUserEvent(ctx.clContext, &err) |
||||
if err != C.CL_SUCCESS { |
||||
return nil, toError(err) |
||||
} |
||||
return newEvent(clEvent), nil |
||||
} |
||||
|
||||
func (ctx *Context) Release() { |
||||
releaseContext(ctx) |
||||
} |
||||
|
||||
// http://www.khronos.org/registry/cl/sdk/1.2/docs/man/xhtml/clCreateSubBuffer.html
|
||||
// func (memObject *MemObject) CreateSubBuffer(flags MemFlag, bufferCreateType BufferCreateType, )
|
@ -0,0 +1,510 @@ |
||||
package cl |
||||
|
||||
// #ifdef __APPLE__
|
||||
// #include "OpenCL/opencl.h"
|
||||
// #else
|
||||
// #include "cl.h"
|
||||
// #include "cl_ext.h"
|
||||
// #endif
|
||||
import "C" |
||||
|
||||
import ( |
||||
"strings" |
||||
"unsafe" |
||||
) |
||||
|
||||
const maxDeviceCount = 64 |
||||
|
||||
type DeviceType uint |
||||
|
||||
const ( |
||||
DeviceTypeCPU DeviceType = C.CL_DEVICE_TYPE_CPU |
||||
DeviceTypeGPU DeviceType = C.CL_DEVICE_TYPE_GPU |
||||
DeviceTypeAccelerator DeviceType = C.CL_DEVICE_TYPE_ACCELERATOR |
||||
DeviceTypeDefault DeviceType = C.CL_DEVICE_TYPE_DEFAULT |
||||
DeviceTypeAll DeviceType = C.CL_DEVICE_TYPE_ALL |
||||
) |
||||
|
||||
type FPConfig int |
||||
|
||||
const ( |
||||
FPConfigDenorm FPConfig = C.CL_FP_DENORM // denorms are supported
|
||||
FPConfigInfNaN FPConfig = C.CL_FP_INF_NAN // INF and NaNs are supported
|
||||
FPConfigRoundToNearest FPConfig = C.CL_FP_ROUND_TO_NEAREST // round to nearest even rounding mode supported
|
||||
FPConfigRoundToZero FPConfig = C.CL_FP_ROUND_TO_ZERO // round to zero rounding mode supported
|
||||
FPConfigRoundToInf FPConfig = C.CL_FP_ROUND_TO_INF // round to positive and negative infinity rounding modes supported
|
||||
FPConfigFMA FPConfig = C.CL_FP_FMA // IEEE754-2008 fused multiply-add is supported
|
||||
FPConfigSoftFloat FPConfig = C.CL_FP_SOFT_FLOAT // Basic floating-point operations (such as addition, subtraction, multiplication) are implemented in software
|
||||
) |
||||
|
||||
var fpConfigNameMap = map[FPConfig]string{ |
||||
FPConfigDenorm: "Denorm", |
||||
FPConfigInfNaN: "InfNaN", |
||||
FPConfigRoundToNearest: "RoundToNearest", |
||||
FPConfigRoundToZero: "RoundToZero", |
||||
FPConfigRoundToInf: "RoundToInf", |
||||
FPConfigFMA: "FMA", |
||||
FPConfigSoftFloat: "SoftFloat", |
||||
} |
||||
|
||||
func (c FPConfig) String() string { |
||||
var parts []string |
||||
for bit, name := range fpConfigNameMap { |
||||
if c&bit != 0 { |
||||
parts = append(parts, name) |
||||
} |
||||
} |
||||
if parts == nil { |
||||
return "" |
||||
} |
||||
return strings.Join(parts, "|") |
||||
} |
||||
|
||||
func (dt DeviceType) String() string { |
||||
var parts []string |
||||
if dt&DeviceTypeCPU != 0 { |
||||
parts = append(parts, "CPU") |
||||
} |
||||
if dt&DeviceTypeGPU != 0 { |
||||
parts = append(parts, "GPU") |
||||
} |
||||
if dt&DeviceTypeAccelerator != 0 { |
||||
parts = append(parts, "Accelerator") |
||||
} |
||||
if dt&DeviceTypeDefault != 0 { |
||||
parts = append(parts, "Default") |
||||
} |
||||
if parts == nil { |
||||
parts = append(parts, "None") |
||||
} |
||||
return strings.Join(parts, "|") |
||||
} |
||||
|
||||
type Device struct { |
||||
id C.cl_device_id |
||||
} |
||||
|
||||
func buildDeviceIdList(devices []*Device) []C.cl_device_id { |
||||
deviceIds := make([]C.cl_device_id, len(devices)) |
||||
for i, d := range devices { |
||||
deviceIds[i] = d.id |
||||
} |
||||
return deviceIds |
||||
} |
||||
|
||||
// Obtain the list of devices available on a platform. 'platform' refers
|
||||
// to the platform returned by GetPlatforms or can be nil. If platform
|
||||
// is nil, the behavior is implementation-defined.
|
||||
func GetDevices(platform *Platform, deviceType DeviceType) ([]*Device, error) { |
||||
var deviceIds [maxDeviceCount]C.cl_device_id |
||||
var numDevices C.cl_uint |
||||
var platformId C.cl_platform_id |
||||
if platform != nil { |
||||
platformId = platform.id |
||||
} |
||||
if err := C.clGetDeviceIDs(platformId, C.cl_device_type(deviceType), C.cl_uint(maxDeviceCount), &deviceIds[0], &numDevices); err != C.CL_SUCCESS { |
||||
return nil, toError(err) |
||||
} |
||||
if numDevices > maxDeviceCount { |
||||
numDevices = maxDeviceCount |
||||
} |
||||
devices := make([]*Device, numDevices) |
||||
for i := 0; i < int(numDevices); i++ { |
||||
devices[i] = &Device{id: deviceIds[i]} |
||||
} |
||||
return devices, nil |
||||
} |
||||
|
||||
func (d *Device) nullableId() C.cl_device_id { |
||||
if d == nil { |
||||
return nil |
||||
} |
||||
return d.id |
||||
} |
||||
|
||||
func (d *Device) GetInfoString(param C.cl_device_info, panicOnError bool) (string, error) { |
||||
var strC [1024]C.char |
||||
var strN C.size_t |
||||
if err := C.clGetDeviceInfo(d.id, param, 1024, unsafe.Pointer(&strC), &strN); err != C.CL_SUCCESS { |
||||
if panicOnError { |
||||
panic("Should never fail") |
||||
} |
||||
return "", toError(err) |
||||
} |
||||
|
||||
// OpenCL strings are NUL-terminated, and the terminator is included in strN
|
||||
// Go strings aren't NUL-terminated, so subtract 1 from the length
|
||||
return C.GoStringN((*C.char)(unsafe.Pointer(&strC)), C.int(strN-1)), nil |
||||
} |
||||
|
||||
func (d *Device) getInfoUint(param C.cl_device_info, panicOnError bool) (uint, error) { |
||||
var val C.cl_uint |
||||
if err := C.clGetDeviceInfo(d.id, param, C.size_t(unsafe.Sizeof(val)), unsafe.Pointer(&val), nil); err != C.CL_SUCCESS { |
||||
if panicOnError { |
||||
panic("Should never fail") |
||||
} |
||||
return 0, toError(err) |
||||
} |
||||
return uint(val), nil |
||||
} |
||||
|
||||
func (d *Device) getInfoSize(param C.cl_device_info, panicOnError bool) (int, error) { |
||||
var val C.size_t |
||||
if err := C.clGetDeviceInfo(d.id, param, C.size_t(unsafe.Sizeof(val)), unsafe.Pointer(&val), nil); err != C.CL_SUCCESS { |
||||
if panicOnError { |
||||
panic("Should never fail") |
||||
} |
||||
return 0, toError(err) |
||||
} |
||||
return int(val), nil |
||||
} |
||||
|
||||
func (d *Device) getInfoUlong(param C.cl_device_info, panicOnError bool) (int64, error) { |
||||
var val C.cl_ulong |
||||
if err := C.clGetDeviceInfo(d.id, param, C.size_t(unsafe.Sizeof(val)), unsafe.Pointer(&val), nil); err != C.CL_SUCCESS { |
||||
if panicOnError { |
||||
panic("Should never fail") |
||||
} |
||||
return 0, toError(err) |
||||
} |
||||
return int64(val), nil |
||||
} |
||||
|
||||
func (d *Device) getInfoBool(param C.cl_device_info, panicOnError bool) (bool, error) { |
||||
var val C.cl_bool |
||||
if err := C.clGetDeviceInfo(d.id, param, C.size_t(unsafe.Sizeof(val)), unsafe.Pointer(&val), nil); err != C.CL_SUCCESS { |
||||
if panicOnError { |
||||
panic("Should never fail") |
||||
} |
||||
return false, toError(err) |
||||
} |
||||
return val == C.CL_TRUE, nil |
||||
} |
||||
|
||||
func (d *Device) Name() string { |
||||
str, _ := d.GetInfoString(C.CL_DEVICE_NAME, true) |
||||
return str |
||||
} |
||||
|
||||
func (d *Device) Vendor() string { |
||||
str, _ := d.GetInfoString(C.CL_DEVICE_VENDOR, true) |
||||
return str |
||||
} |
||||
|
||||
func (d *Device) Extensions() string { |
||||
str, _ := d.GetInfoString(C.CL_DEVICE_EXTENSIONS, true) |
||||
return str |
||||
} |
||||
|
||||
func (d *Device) OpenCLCVersion() string { |
||||
str, _ := d.GetInfoString(C.CL_DEVICE_OPENCL_C_VERSION, true) |
||||
return str |
||||
} |
||||
|
||||
func (d *Device) Profile() string { |
||||
str, _ := d.GetInfoString(C.CL_DEVICE_PROFILE, true) |
||||
return str |
||||
} |
||||
|
||||
func (d *Device) Version() string { |
||||
str, _ := d.GetInfoString(C.CL_DEVICE_VERSION, true) |
||||
return str |
||||
} |
||||
|
||||
func (d *Device) DriverVersion() string { |
||||
str, _ := d.GetInfoString(C.CL_DRIVER_VERSION, true) |
||||
return str |
||||
} |
||||
|
||||
// The default compute device address space size specified as an
|
||||
// unsigned integer value in bits. Currently supported values are 32 or 64 bits.
|
||||
func (d *Device) AddressBits() int { |
||||
val, _ := d.getInfoUint(C.CL_DEVICE_ADDRESS_BITS, true) |
||||
return int(val) |
||||
} |
||||
|
||||
// Size of global memory cache line in bytes.
|
||||
func (d *Device) GlobalMemCachelineSize() int { |
||||
val, _ := d.getInfoUint(C.CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, true) |
||||
return int(val) |
||||
} |
||||
|
||||
// Maximum configured clock frequency of the device in MHz.
|
||||
func (d *Device) MaxClockFrequency() int { |
||||
val, _ := d.getInfoUint(C.CL_DEVICE_MAX_CLOCK_FREQUENCY, true) |
||||
return int(val) |
||||
} |
||||
|
||||
// The number of parallel compute units on the OpenCL device.
|
||||
// A work-group executes on a single compute unit. The minimum value is 1.
|
||||
func (d *Device) MaxComputeUnits() int { |
||||
val, _ := d.getInfoUint(C.CL_DEVICE_MAX_COMPUTE_UNITS, true) |
||||
return int(val) |
||||
} |
||||
|
||||
// Max number of arguments declared with the __constant qualifier in a kernel.
|
||||
// The minimum value is 8 for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
|
||||
func (d *Device) MaxConstantArgs() int { |
||||
val, _ := d.getInfoUint(C.CL_DEVICE_MAX_CONSTANT_ARGS, true) |
||||
return int(val) |
||||
} |
||||
|
||||
// Max number of simultaneous image objects that can be read by a kernel.
|
||||
// The minimum value is 128 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE.
|
||||
func (d *Device) MaxReadImageArgs() int { |
||||
val, _ := d.getInfoUint(C.CL_DEVICE_MAX_READ_IMAGE_ARGS, true) |
||||
return int(val) |
||||
} |
||||
|
||||
// Maximum number of samplers that can be used in a kernel. The minimum
|
||||
// value is 16 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE. (Also see sampler_t.)
|
||||
func (d *Device) MaxSamplers() int { |
||||
val, _ := d.getInfoUint(C.CL_DEVICE_MAX_SAMPLERS, true) |
||||
return int(val) |
||||
} |
||||
|
||||
// Maximum dimensions that specify the global and local work-item IDs used
|
||||
// by the data parallel execution model. (Refer to clEnqueueNDRangeKernel).
|
||||
// The minimum value is 3 for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
|
||||
func (d *Device) MaxWorkItemDimensions() int { |
||||
val, _ := d.getInfoUint(C.CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, true) |
||||
return int(val) |
||||
} |
||||
|
||||
// Max number of simultaneous image objects that can be written to by a
|
||||
// kernel. The minimum value is 8 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE.
|
||||
func (d *Device) MaxWriteImageArgs() int { |
||||
val, _ := d.getInfoUint(C.CL_DEVICE_MAX_WRITE_IMAGE_ARGS, true) |
||||
return int(val) |
||||
} |
||||
|
||||
// The minimum value is the size (in bits) of the largest OpenCL built-in
|
||||
// data type supported by the device (long16 in FULL profile, long16 or
|
||||
// int16 in EMBEDDED profile) for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
|
||||
func (d *Device) MemBaseAddrAlign() int { |
||||
val, _ := d.getInfoUint(C.CL_DEVICE_MEM_BASE_ADDR_ALIGN, true) |
||||
return int(val) |
||||
} |
||||
|
||||
func (d *Device) NativeVectorWidthChar() int { |
||||
val, _ := d.getInfoUint(C.CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR, true) |
||||
return int(val) |
||||
} |
||||
|
||||
func (d *Device) NativeVectorWidthShort() int { |
||||
val, _ := d.getInfoUint(C.CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT, true) |
||||
return int(val) |
||||
} |
||||
|
||||
func (d *Device) NativeVectorWidthInt() int { |
||||
val, _ := d.getInfoUint(C.CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, true) |
||||
return int(val) |
||||
} |
||||
|
||||
func (d *Device) NativeVectorWidthLong() int { |
||||
val, _ := d.getInfoUint(C.CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, true) |
||||
return int(val) |
||||
} |
||||
|
||||
func (d *Device) NativeVectorWidthFloat() int { |
||||
val, _ := d.getInfoUint(C.CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, true) |
||||
return int(val) |
||||
} |
||||
|
||||
func (d *Device) NativeVectorWidthDouble() int { |
||||
val, _ := d.getInfoUint(C.CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, true) |
||||
return int(val) |
||||
} |
||||
|
||||
func (d *Device) NativeVectorWidthHalf() int { |
||||
val, _ := d.getInfoUint(C.CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF, true) |
||||
return int(val) |
||||
} |
||||
|
||||
// Max height of 2D image in pixels. The minimum value is 8192
|
||||
// if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE.
|
||||
func (d *Device) Image2DMaxHeight() int { |
||||
val, _ := d.getInfoSize(C.CL_DEVICE_IMAGE2D_MAX_HEIGHT, true) |
||||
return int(val) |
||||
} |
||||
|
||||
// Max width of 2D image or 1D image not created from a buffer object in
|
||||
// pixels. The minimum value is 8192 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE.
|
||||
func (d *Device) Image2DMaxWidth() int { |
||||
val, _ := d.getInfoSize(C.CL_DEVICE_IMAGE2D_MAX_WIDTH, true) |
||||
return int(val) |
||||
} |
||||
|
||||
// Max depth of 3D image in pixels. The minimum value is 2048 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE.
|
||||
func (d *Device) Image3DMaxDepth() int { |
||||
val, _ := d.getInfoSize(C.CL_DEVICE_IMAGE3D_MAX_DEPTH, true) |
||||
return int(val) |
||||
} |
||||
|
||||
// Max height of 3D image in pixels. The minimum value is 2048 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE.
|
||||
func (d *Device) Image3DMaxHeight() int { |
||||
val, _ := d.getInfoSize(C.CL_DEVICE_IMAGE3D_MAX_HEIGHT, true) |
||||
return int(val) |
||||
} |
||||
|
||||
// Max width of 3D image in pixels. The minimum value is 2048 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE.
|
||||
func (d *Device) Image3DMaxWidth() int { |
||||
val, _ := d.getInfoSize(C.CL_DEVICE_IMAGE3D_MAX_WIDTH, true) |
||||
return int(val) |
||||
} |
||||
|
||||
// Max size in bytes of the arguments that can be passed to a kernel. The
|
||||
// minimum value is 1024 for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
|
||||
// For this minimum value, only a maximum of 128 arguments can be passed to a kernel.
|
||||
func (d *Device) MaxParameterSize() int { |
||||
val, _ := d.getInfoSize(C.CL_DEVICE_MAX_PARAMETER_SIZE, true) |
||||
return int(val) |
||||
} |
||||
|
||||
// Maximum number of work-items in a work-group executing a kernel on a
|
||||
// single compute unit, using the data parallel execution model. (Refer
|
||||
// to clEnqueueNDRangeKernel). The minimum value is 1.
|
||||
func (d *Device) MaxWorkGroupSize() int { |
||||
val, _ := d.getInfoSize(C.CL_DEVICE_MAX_WORK_GROUP_SIZE, true) |
||||
return int(val) |
||||
} |
||||
|
||||
// Describes the resolution of device timer. This is measured in nanoseconds.
|
||||
func (d *Device) ProfilingTimerResolution() int { |
||||
val, _ := d.getInfoSize(C.CL_DEVICE_PROFILING_TIMER_RESOLUTION, true) |
||||
return int(val) |
||||
} |
||||
|
||||
// Size of local memory arena in bytes. The minimum value is 32 KB for
|
||||
// devices that are not of type CL_DEVICE_TYPE_CUSTOM.
|
||||
func (d *Device) LocalMemSize() int64 { |
||||
val, _ := d.getInfoUlong(C.CL_DEVICE_LOCAL_MEM_SIZE, true) |
||||
return val |
||||
} |
||||
|
||||
// Max size in bytes of a constant buffer allocation. The minimum value is
|
||||
// 64 KB for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
|
||||
func (d *Device) MaxConstantBufferSize() int64 { |
||||
val, _ := d.getInfoUlong(C.CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, true) |
||||
return val |
||||
} |
||||
|
||||
// Max size of memory object allocation in bytes. The minimum value is max
|
||||
// (1/4th of CL_DEVICE_GLOBAL_MEM_SIZE, 128*1024*1024) for devices that are
|
||||
// not of type CL_DEVICE_TYPE_CUSTOM.
|
||||
func (d *Device) MaxMemAllocSize() int64 { |
||||
val, _ := d.getInfoUlong(C.CL_DEVICE_MAX_MEM_ALLOC_SIZE, true) |
||||
return val |
||||
} |
||||
|
||||
// Size of global device memory in bytes.
|
||||
func (d *Device) GlobalMemSize() int64 { |
||||
val, _ := d.getInfoUlong(C.CL_DEVICE_GLOBAL_MEM_SIZE, true) |
||||
return val |
||||
} |
||||
|
||||
func (d *Device) Available() bool { |
||||
val, _ := d.getInfoBool(C.CL_DEVICE_AVAILABLE, true) |
||||
return val |
||||
} |
||||
|
||||
func (d *Device) CompilerAvailable() bool { |
||||
val, _ := d.getInfoBool(C.CL_DEVICE_COMPILER_AVAILABLE, true) |
||||
return val |
||||
} |
||||
|
||||
func (d *Device) EndianLittle() bool { |
||||
val, _ := d.getInfoBool(C.CL_DEVICE_ENDIAN_LITTLE, true) |
||||
return val |
||||
} |
||||
|
||||
// Is CL_TRUE if the device implements error correction for all
|
||||
// accesses to compute device memory (global and constant). Is
|
||||
// CL_FALSE if the device does not implement such error correction.
|
||||
func (d *Device) ErrorCorrectionSupport() bool { |
||||
val, _ := d.getInfoBool(C.CL_DEVICE_ERROR_CORRECTION_SUPPORT, true) |
||||
return val |
||||
} |
||||
|
||||
func (d *Device) HostUnifiedMemory() bool { |
||||
val, _ := d.getInfoBool(C.CL_DEVICE_HOST_UNIFIED_MEMORY, true) |
||||
return val |
||||
} |
||||
|
||||
func (d *Device) ImageSupport() bool { |
||||
val, _ := d.getInfoBool(C.CL_DEVICE_IMAGE_SUPPORT, true) |
||||
return val |
||||
} |
||||
|
||||
func (d *Device) Type() DeviceType { |
||||
var deviceType C.cl_device_type |
||||
if err := C.clGetDeviceInfo(d.id, C.CL_DEVICE_TYPE, C.size_t(unsafe.Sizeof(deviceType)), unsafe.Pointer(&deviceType), nil); err != C.CL_SUCCESS { |
||||
panic("Failed to get device type") |
||||
} |
||||
return DeviceType(deviceType) |
||||
} |
||||
|
||||
// Describes double precision floating-point capability of the OpenCL device
|
||||
func (d *Device) DoubleFPConfig() FPConfig { |
||||
var fpConfig C.cl_device_fp_config |
||||
if err := C.clGetDeviceInfo(d.id, C.CL_DEVICE_DOUBLE_FP_CONFIG, C.size_t(unsafe.Sizeof(fpConfig)), unsafe.Pointer(&fpConfig), nil); err != C.CL_SUCCESS { |
||||
panic("Failed to get double FP config") |
||||
} |
||||
return FPConfig(fpConfig) |
||||
} |
||||
|
||||
// Describes the OPTIONAL half precision floating-point capability of the OpenCL device
|
||||
func (d *Device) HalfFPConfig() FPConfig { |
||||
var fpConfig C.cl_device_fp_config |
||||
err := C.clGetDeviceInfo(d.id, C.CL_DEVICE_HALF_FP_CONFIG, C.size_t(unsafe.Sizeof(fpConfig)), unsafe.Pointer(&fpConfig), nil) |
||||
if err != C.CL_SUCCESS { |
||||
return FPConfig(0) |
||||
} |
||||
return FPConfig(fpConfig) |
||||
} |
||||
|
||||
// Type of local memory supported. This can be set to CL_LOCAL implying dedicated
|
||||
// local memory storage such as SRAM, or CL_GLOBAL. For custom devices, CL_NONE
|
||||
// can also be returned indicating no local memory support.
|
||||
func (d *Device) LocalMemType() LocalMemType { |
||||
var memType C.cl_device_local_mem_type |
||||
if err := C.clGetDeviceInfo(d.id, C.CL_DEVICE_LOCAL_MEM_TYPE, C.size_t(unsafe.Sizeof(memType)), unsafe.Pointer(&memType), nil); err != C.CL_SUCCESS { |
||||
return LocalMemType(C.CL_NONE) |
||||
} |
||||
return LocalMemType(memType) |
||||
} |
||||
|
||||
// Describes the execution capabilities of the device. The mandated minimum capability is CL_EXEC_KERNEL.
|
||||
func (d *Device) ExecutionCapabilities() ExecCapability { |
||||
var execCap C.cl_device_exec_capabilities |
||||
if err := C.clGetDeviceInfo(d.id, C.CL_DEVICE_EXECUTION_CAPABILITIES, C.size_t(unsafe.Sizeof(execCap)), unsafe.Pointer(&execCap), nil); err != C.CL_SUCCESS { |
||||
panic("Failed to get execution capabilities") |
||||
} |
||||
return ExecCapability(execCap) |
||||
} |
||||
|
||||
func (d *Device) GlobalMemCacheType() MemCacheType { |
||||
var memType C.cl_device_mem_cache_type |
||||
if err := C.clGetDeviceInfo(d.id, C.CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, C.size_t(unsafe.Sizeof(memType)), unsafe.Pointer(&memType), nil); err != C.CL_SUCCESS { |
||||
return MemCacheType(C.CL_NONE) |
||||
} |
||||
return MemCacheType(memType) |
||||
} |
||||
|
||||
// Maximum number of work-items that can be specified in each dimension of the work-group to clEnqueueNDRangeKernel.
|
||||
//
|
||||
// Returns n size_t entries, where n is the value returned by the query for CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS.
|
||||
//
|
||||
// The minimum value is (1, 1, 1) for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
|
||||
func (d *Device) MaxWorkItemSizes() []int { |
||||
dims := d.MaxWorkItemDimensions() |
||||
sizes := make([]C.size_t, dims) |
||||
if err := C.clGetDeviceInfo(d.id, C.CL_DEVICE_MAX_WORK_ITEM_SIZES, C.size_t(int(unsafe.Sizeof(sizes[0]))*dims), unsafe.Pointer(&sizes[0]), nil); err != C.CL_SUCCESS { |
||||
panic("Failed to get max work item sizes") |
||||
} |
||||
intSizes := make([]int, dims) |
||||
for i, s := range sizes { |
||||
intSizes[i] = int(s) |
||||
} |
||||
return intSizes |
||||
} |
@ -0,0 +1,51 @@ |
||||
// +build cl12
|
||||
|
||||
package cl |
||||
|
||||
// #ifdef __APPLE__
|
||||
// #include "OpenCL/opencl.h"
|
||||
// #else
|
||||
// #include "cl.h"
|
||||
// #endif
|
||||
import "C" |
||||
import "unsafe" |
||||
|
||||
const FPConfigCorrectlyRoundedDivideSqrt FPConfig = C.CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT |
||||
|
||||
func init() { |
||||
fpConfigNameMap[FPConfigCorrectlyRoundedDivideSqrt] = "CorrectlyRoundedDivideSqrt" |
||||
} |
||||
|
||||
func (d *Device) BuiltInKernels() string { |
||||
str, _ := d.getInfoString(C.CL_DEVICE_BUILT_IN_KERNELS, true) |
||||
return str |
||||
} |
||||
|
||||
// Is CL_FALSE if the implementation does not have a linker available. Is CL_TRUE if the linker is available. This can be CL_FALSE for the embedded platform profile only. This must be CL_TRUE if CL_DEVICE_COMPILER_AVAILABLE is CL_TRUE
|
||||
func (d *Device) LinkerAvailable() bool { |
||||
val, _ := d.getInfoBool(C.CL_DEVICE_LINKER_AVAILABLE, true) |
||||
return val |
||||
} |
||||
|
||||
func (d *Device) ParentDevice() *Device { |
||||
var deviceId C.cl_device_id |
||||
if err := C.clGetDeviceInfo(d.id, C.CL_DEVICE_PARENT_DEVICE, C.size_t(unsafe.Sizeof(deviceId)), unsafe.Pointer(&deviceId), nil); err != C.CL_SUCCESS { |
||||
panic("ParentDevice failed") |
||||
} |
||||
if deviceId == nil { |
||||
return nil |
||||
} |
||||
return &Device{id: deviceId} |
||||
} |
||||
|
||||
// Max number of pixels for a 1D image created from a buffer object. The minimum value is 65536 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE.
|
||||
func (d *Device) ImageMaxBufferSize() int { |
||||
val, _ := d.getInfoSize(C.CL_DEVICE_IMAGE_MAX_BUFFER_SIZE, true) |
||||
return int(val) |
||||
} |
||||
|
||||
// Max number of images in a 1D or 2D image array. The minimum value is 2048 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE
|
||||
func (d *Device) ImageMaxArraySize() int { |
||||
val, _ := d.getInfoSize(C.CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, true) |
||||
return int(val) |
||||
} |
1210
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/cl.h
generated
vendored
1210
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/cl.h
generated
vendored
File diff suppressed because it is too large
Load Diff
315
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/cl_ext.h
generated
vendored
315
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/cl_ext.h
generated
vendored
@ -0,0 +1,315 @@ |
||||
/*******************************************************************************
|
||||
* Copyright (c) 2008-2013 The Khronos Group Inc. |
||||
* |
||||
* Permission is hereby granted, free of charge, to any person obtaining a |
||||
* copy of this software and/or associated documentation files (the |
||||
* "Materials"), to deal in the Materials without restriction, including |
||||
* without limitation the rights to use, copy, modify, merge, publish, |
||||
* distribute, sublicense, and/or sell copies of the Materials, and to |
||||
* permit persons to whom the Materials are furnished to do so, subject to |
||||
* the following conditions: |
||||
* |
||||
* The above copyright notice and this permission notice shall be included |
||||
* in all copies or substantial portions of the Materials. |
||||
* |
||||
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY |
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
||||
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. |
||||
******************************************************************************/ |
||||
|
||||
/* $Revision: 11928 $ on $Date: 2010-07-13 09:04:56 -0700 (Tue, 13 Jul 2010) $ */ |
||||
|
||||
/* cl_ext.h contains OpenCL extensions which don't have external */ |
||||
/* (OpenGL, D3D) dependencies. */ |
||||
|
||||
#ifndef __CL_EXT_H |
||||
#define __CL_EXT_H |
||||
|
||||
#ifdef __cplusplus |
||||
extern "C" { |
||||
#endif |
||||
|
||||
#ifdef __APPLE__ |
||||
#include <AvailabilityMacros.h> |
||||
#endif |
||||
|
||||
#include <cl.h> |
||||
|
||||
/* cl_khr_fp16 extension - no extension #define since it has no functions */ |
||||
#define CL_DEVICE_HALF_FP_CONFIG 0x1033 |
||||
|
||||
/* Memory object destruction
|
||||
* |
||||
* Apple extension for use to manage externally allocated buffers used with cl_mem objects with CL_MEM_USE_HOST_PTR |
||||
* |
||||
* Registers a user callback function that will be called when the memory object is deleted and its resources
|
||||
* freed. Each call to clSetMemObjectCallbackFn registers the specified user callback function on a callback
|
||||
* stack associated with memobj. The registered user callback functions are called in the reverse order in
|
||||
* which they were registered. The user callback functions are called and then the memory object is deleted
|
||||
* and its resources freed. This provides a mechanism for the application (and libraries) using memobj to be
|
||||
* notified when the memory referenced by host_ptr, specified when the memory object is created and used as
|
||||
* the storage bits for the memory object, can be reused or freed. |
||||
* |
||||
* The application may not call CL api's with the cl_mem object passed to the pfn_notify. |
||||
* |
||||
* Please check for the "cl_APPLE_SetMemObjectDestructor" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS) |
||||
* before using. |
||||
*/ |
||||
#define cl_APPLE_SetMemObjectDestructor 1 |
||||
cl_int CL_API_ENTRY clSetMemObjectDestructorAPPLE( cl_mem /* memobj */,
|
||||
void (* /*pfn_notify*/)( cl_mem /* memobj */, void* /*user_data*/),
|
||||
void * /*user_data */ ) CL_EXT_SUFFIX__VERSION_1_0;
|
||||
|
||||
|
||||
/* Context Logging Functions
|
||||
* |
||||
* The next three convenience functions are intended to be used as the pfn_notify parameter to clCreateContext(). |
||||
* Please check for the "cl_APPLE_ContextLoggingFunctions" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS) |
||||
* before using. |
||||
* |
||||
* clLogMessagesToSystemLog fowards on all log messages to the Apple System Logger
|
||||
*/ |
||||
#define cl_APPLE_ContextLoggingFunctions 1 |
||||
extern void CL_API_ENTRY clLogMessagesToSystemLogAPPLE( const char * /* errstr */,
|
||||
const void * /* private_info */,
|
||||
size_t /* cb */,
|
||||
void * /* user_data */ ) CL_EXT_SUFFIX__VERSION_1_0; |
||||
|
||||
/* clLogMessagesToStdout sends all log messages to the file descriptor stdout */ |
||||
extern void CL_API_ENTRY clLogMessagesToStdoutAPPLE( const char * /* errstr */,
|
||||
const void * /* private_info */,
|
||||
size_t /* cb */,
|
||||
void * /* user_data */ ) CL_EXT_SUFFIX__VERSION_1_0; |
||||
|
||||
/* clLogMessagesToStderr sends all log messages to the file descriptor stderr */ |
||||
extern void CL_API_ENTRY clLogMessagesToStderrAPPLE( const char * /* errstr */,
|
||||
const void * /* private_info */,
|
||||
size_t /* cb */,
|
||||
void * /* user_data */ ) CL_EXT_SUFFIX__VERSION_1_0; |
||||
|
||||
|
||||
/************************
|
||||
* cl_khr_icd extension *
|
||||
************************/ |
||||
#define cl_khr_icd 1 |
||||
|
||||
/* cl_platform_info */ |
||||
#define CL_PLATFORM_ICD_SUFFIX_KHR 0x0920 |
||||
|
||||
/* Additional Error Codes */ |
||||
#define CL_PLATFORM_NOT_FOUND_KHR -1001 |
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL |
||||
clIcdGetPlatformIDsKHR(cl_uint /* num_entries */, |
||||
cl_platform_id * /* platforms */, |
||||
cl_uint * /* num_platforms */); |
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clIcdGetPlatformIDsKHR_fn)( |
||||
cl_uint /* num_entries */, |
||||
cl_platform_id * /* platforms */, |
||||
cl_uint * /* num_platforms */); |
||||
|
||||
|
||||
/* Extension: cl_khr_image2D_buffer
|
||||
* |
||||
* This extension allows a 2D image to be created from a cl_mem buffer without a copy. |
||||
* The type associated with a 2D image created from a buffer in an OpenCL program is image2d_t. |
||||
* Both the sampler and sampler-less read_image built-in functions are supported for 2D images |
||||
* and 2D images created from a buffer. Similarly, the write_image built-ins are also supported |
||||
* for 2D images created from a buffer. |
||||
* |
||||
* When the 2D image from buffer is created, the client must specify the width, |
||||
* height, image format (i.e. channel order and channel data type) and optionally the row pitch |
||||
* |
||||
* The pitch specified must be a multiple of CL_DEVICE_IMAGE_PITCH_ALIGNMENT pixels. |
||||
* The base address of the buffer must be aligned to CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT pixels. |
||||
*/ |
||||
|
||||
/*************************************
|
||||
* cl_khr_initalize_memory extension * |
||||
*************************************/ |
||||
|
||||
#define CL_CONTEXT_MEMORY_INITIALIZE_KHR 0x200E |
||||
|
||||
|
||||
/**************************************
|
||||
* cl_khr_terminate_context extension * |
||||
**************************************/ |
||||
|
||||
#define CL_DEVICE_TERMINATE_CAPABILITY_KHR 0x200F |
||||
#define CL_CONTEXT_TERMINATE_KHR 0x2010 |
||||
|
||||
#define cl_khr_terminate_context 1 |
||||
extern CL_API_ENTRY cl_int CL_API_CALL clTerminateContextKHR(cl_context /* context */) CL_EXT_SUFFIX__VERSION_1_2; |
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clTerminateContextKHR_fn)(cl_context /* context */) CL_EXT_SUFFIX__VERSION_1_2; |
||||
|
||||
|
||||
/*
|
||||
* Extension: cl_khr_spir |
||||
* |
||||
* This extension adds support to create an OpenCL program object from a
|
||||
* Standard Portable Intermediate Representation (SPIR) instance |
||||
*/ |
||||
|
||||
#define CL_DEVICE_SPIR_VERSIONS 0x40E0 |
||||
#define CL_PROGRAM_BINARY_TYPE_INTERMEDIATE 0x40E1 |
||||
|
||||
|
||||
/******************************************
|
||||
* cl_nv_device_attribute_query extension * |
||||
******************************************/ |
||||
/* cl_nv_device_attribute_query extension - no extension #define since it has no functions */ |
||||
#define CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV 0x4000 |
||||
#define CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV 0x4001 |
||||
#define CL_DEVICE_REGISTERS_PER_BLOCK_NV 0x4002 |
||||
#define CL_DEVICE_WARP_SIZE_NV 0x4003 |
||||
#define CL_DEVICE_GPU_OVERLAP_NV 0x4004 |
||||
#define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV 0x4005 |
||||
#define CL_DEVICE_INTEGRATED_MEMORY_NV 0x4006 |
||||
|
||||
/*********************************
|
||||
* cl_amd_device_attribute_query * |
||||
*********************************/ |
||||
#define CL_DEVICE_PROFILING_TIMER_OFFSET_AMD 0x4036 |
||||
|
||||
/*********************************
|
||||
* cl_arm_printf extension |
||||
*********************************/ |
||||
#define CL_PRINTF_CALLBACK_ARM 0x40B0 |
||||
#define CL_PRINTF_BUFFERSIZE_ARM 0x40B1 |
||||
|
||||
#ifdef CL_VERSION_1_1 |
||||
/***********************************
|
||||
* cl_ext_device_fission extension * |
||||
***********************************/ |
||||
#define cl_ext_device_fission 1 |
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL |
||||
clReleaseDeviceEXT( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
typedef CL_API_ENTRY cl_int
|
||||
(CL_API_CALL *clReleaseDeviceEXT_fn)( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1; |
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL |
||||
clRetainDeviceEXT( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1;
|
||||
|
||||
typedef CL_API_ENTRY cl_int
|
||||
(CL_API_CALL *clRetainDeviceEXT_fn)( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1; |
||||
|
||||
typedef cl_ulong cl_device_partition_property_ext; |
||||
extern CL_API_ENTRY cl_int CL_API_CALL |
||||
clCreateSubDevicesEXT( cl_device_id /*in_device*/, |
||||
const cl_device_partition_property_ext * /* properties */, |
||||
cl_uint /*num_entries*/, |
||||
cl_device_id * /*out_devices*/, |
||||
cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1; |
||||
|
||||
typedef CL_API_ENTRY cl_int
|
||||
( CL_API_CALL * clCreateSubDevicesEXT_fn)( cl_device_id /*in_device*/, |
||||
const cl_device_partition_property_ext * /* properties */, |
||||
cl_uint /*num_entries*/, |
||||
cl_device_id * /*out_devices*/, |
||||
cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1; |
||||
|
||||
/* cl_device_partition_property_ext */ |
||||
#define CL_DEVICE_PARTITION_EQUALLY_EXT 0x4050 |
||||
#define CL_DEVICE_PARTITION_BY_COUNTS_EXT 0x4051 |
||||
#define CL_DEVICE_PARTITION_BY_NAMES_EXT 0x4052 |
||||
#define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT 0x4053 |
||||
|
||||
/* clDeviceGetInfo selectors */ |
||||
#define CL_DEVICE_PARENT_DEVICE_EXT 0x4054 |
||||
#define CL_DEVICE_PARTITION_TYPES_EXT 0x4055 |
||||
#define CL_DEVICE_AFFINITY_DOMAINS_EXT 0x4056 |
||||
#define CL_DEVICE_REFERENCE_COUNT_EXT 0x4057 |
||||
#define CL_DEVICE_PARTITION_STYLE_EXT 0x4058 |
||||
|
||||
/* error codes */ |
||||
#define CL_DEVICE_PARTITION_FAILED_EXT -1057 |
||||
#define CL_INVALID_PARTITION_COUNT_EXT -1058 |
||||
#define CL_INVALID_PARTITION_NAME_EXT -1059 |
||||
|
||||
/* CL_AFFINITY_DOMAINs */ |
||||
#define CL_AFFINITY_DOMAIN_L1_CACHE_EXT 0x1 |
||||
#define CL_AFFINITY_DOMAIN_L2_CACHE_EXT 0x2 |
||||
#define CL_AFFINITY_DOMAIN_L3_CACHE_EXT 0x3 |
||||
#define CL_AFFINITY_DOMAIN_L4_CACHE_EXT 0x4 |
||||
#define CL_AFFINITY_DOMAIN_NUMA_EXT 0x10 |
||||
#define CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT 0x100 |
||||
|
||||
/* cl_device_partition_property_ext list terminators */ |
||||
#define CL_PROPERTIES_LIST_END_EXT ((cl_device_partition_property_ext) 0) |
||||
#define CL_PARTITION_BY_COUNTS_LIST_END_EXT ((cl_device_partition_property_ext) 0) |
||||
#define CL_PARTITION_BY_NAMES_LIST_END_EXT ((cl_device_partition_property_ext) 0 - 1) |
||||
|
||||
/*********************************
|
||||
* cl_qcom_ext_host_ptr extension |
||||
*********************************/ |
||||
|
||||
#define CL_MEM_EXT_HOST_PTR_QCOM (1 << 29) |
||||
|
||||
#define CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM 0x40A0 |
||||
#define CL_DEVICE_PAGE_SIZE_QCOM 0x40A1 |
||||
#define CL_IMAGE_ROW_ALIGNMENT_QCOM 0x40A2 |
||||
#define CL_IMAGE_SLICE_ALIGNMENT_QCOM 0x40A3 |
||||
#define CL_MEM_HOST_UNCACHED_QCOM 0x40A4 |
||||
#define CL_MEM_HOST_WRITEBACK_QCOM 0x40A5 |
||||
#define CL_MEM_HOST_WRITETHROUGH_QCOM 0x40A6 |
||||
#define CL_MEM_HOST_WRITE_COMBINING_QCOM 0x40A7 |
||||
|
||||
typedef cl_uint cl_image_pitch_info_qcom; |
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL |
||||
clGetDeviceImageInfoQCOM(cl_device_id device, |
||||
size_t image_width, |
||||
size_t image_height, |
||||
const cl_image_format *image_format, |
||||
cl_image_pitch_info_qcom param_name, |
||||
size_t param_value_size, |
||||
void *param_value, |
||||
size_t *param_value_size_ret); |
||||
|
||||
typedef struct _cl_mem_ext_host_ptr |
||||
{ |
||||
/* Type of external memory allocation. */ |
||||
/* Legal values will be defined in layered extensions. */ |
||||
cl_uint allocation_type; |
||||
|
||||
/* Host cache policy for this external memory allocation. */ |
||||
cl_uint host_cache_policy; |
||||
|
||||
} cl_mem_ext_host_ptr; |
||||
|
||||
/*********************************
|
||||
* cl_qcom_ion_host_ptr extension |
||||
*********************************/ |
||||
|
||||
#define CL_MEM_ION_HOST_PTR_QCOM 0x40A8 |
||||
|
||||
typedef struct _cl_mem_ion_host_ptr |
||||
{ |
||||
/* Type of external memory allocation. */ |
||||
/* Must be CL_MEM_ION_HOST_PTR_QCOM for ION allocations. */ |
||||
cl_mem_ext_host_ptr ext_host_ptr; |
||||
|
||||
/* ION file descriptor */ |
||||
int ion_filedesc; |
||||
|
||||
/* Host pointer to the ION allocated memory */ |
||||
void* ion_hostptr; |
||||
|
||||
} cl_mem_ion_host_ptr; |
||||
|
||||
#endif /* CL_VERSION_1_1 */ |
||||
|
||||
#ifdef __cplusplus |
||||
} |
||||
#endif |
||||
|
||||
|
||||
#endif /* __CL_EXT_H */ |
158
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/cl_gl.h
generated
vendored
158
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/cl_gl.h
generated
vendored
@ -0,0 +1,158 @@ |
||||
/**********************************************************************************
|
||||
* Copyright (c) 2008 - 2012 The Khronos Group Inc. |
||||
* |
||||
* Permission is hereby granted, free of charge, to any person obtaining a |
||||
* copy of this software and/or associated documentation files (the |
||||
* "Materials"), to deal in the Materials without restriction, including |
||||
* without limitation the rights to use, copy, modify, merge, publish, |
||||
* distribute, sublicense, and/or sell copies of the Materials, and to |
||||
* permit persons to whom the Materials are furnished to do so, subject to |
||||
* the following conditions: |
||||
* |
||||
* The above copyright notice and this permission notice shall be included |
||||
* in all copies or substantial portions of the Materials. |
||||
* |
||||
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY |
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
||||
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. |
||||
**********************************************************************************/ |
||||
|
||||
#ifndef __OPENCL_CL_GL_H |
||||
#define __OPENCL_CL_GL_H |
||||
|
||||
#include <cl.h> |
||||
|
||||
#ifdef __cplusplus |
||||
extern "C" { |
||||
#endif |
||||
|
||||
typedef cl_uint cl_gl_object_type; |
||||
typedef cl_uint cl_gl_texture_info; |
||||
typedef cl_uint cl_gl_platform_info; |
||||
typedef struct __GLsync *cl_GLsync; |
||||
|
||||
/* cl_gl_object_type = 0x2000 - 0x200F enum values are currently taken */ |
||||
#define CL_GL_OBJECT_BUFFER 0x2000 |
||||
#define CL_GL_OBJECT_TEXTURE2D 0x2001 |
||||
#define CL_GL_OBJECT_TEXTURE3D 0x2002 |
||||
#define CL_GL_OBJECT_RENDERBUFFER 0x2003 |
||||
#define CL_GL_OBJECT_TEXTURE2D_ARRAY 0x200E |
||||
#define CL_GL_OBJECT_TEXTURE1D 0x200F |
||||
#define CL_GL_OBJECT_TEXTURE1D_ARRAY 0x2010 |
||||
#define CL_GL_OBJECT_TEXTURE_BUFFER 0x2011 |
||||
|
||||
/* cl_gl_texture_info */ |
||||
#define CL_GL_TEXTURE_TARGET 0x2004 |
||||
#define CL_GL_MIPMAP_LEVEL 0x2005 |
||||
#define CL_GL_NUM_SAMPLES 0x2012 |
||||
|
||||
|
||||
extern CL_API_ENTRY cl_mem CL_API_CALL |
||||
clCreateFromGLBuffer(cl_context /* context */, |
||||
cl_mem_flags /* flags */, |
||||
cl_GLuint /* bufobj */, |
||||
int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; |
||||
|
||||
extern CL_API_ENTRY cl_mem CL_API_CALL |
||||
clCreateFromGLTexture(cl_context /* context */, |
||||
cl_mem_flags /* flags */, |
||||
cl_GLenum /* target */, |
||||
cl_GLint /* miplevel */, |
||||
cl_GLuint /* texture */, |
||||
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2; |
||||
|
||||
extern CL_API_ENTRY cl_mem CL_API_CALL |
||||
clCreateFromGLRenderbuffer(cl_context /* context */, |
||||
cl_mem_flags /* flags */, |
||||
cl_GLuint /* renderbuffer */, |
||||
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0; |
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL |
||||
clGetGLObjectInfo(cl_mem /* memobj */, |
||||
cl_gl_object_type * /* gl_object_type */, |
||||
cl_GLuint * /* gl_object_name */) CL_API_SUFFIX__VERSION_1_0; |
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL |
||||
clGetGLTextureInfo(cl_mem /* memobj */, |
||||
cl_gl_texture_info /* param_name */, |
||||
size_t /* param_value_size */, |
||||
void * /* param_value */, |
||||
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; |
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL |
||||
clEnqueueAcquireGLObjects(cl_command_queue /* command_queue */, |
||||
cl_uint /* num_objects */, |
||||
const cl_mem * /* mem_objects */, |
||||
cl_uint /* num_events_in_wait_list */, |
||||
const cl_event * /* event_wait_list */, |
||||
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; |
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL |
||||
clEnqueueReleaseGLObjects(cl_command_queue /* command_queue */, |
||||
cl_uint /* num_objects */, |
||||
const cl_mem * /* mem_objects */, |
||||
cl_uint /* num_events_in_wait_list */, |
||||
const cl_event * /* event_wait_list */, |
||||
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0; |
||||
|
||||
|
||||
/* Deprecated OpenCL 1.1 APIs */ |
||||
extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL |
||||
clCreateFromGLTexture2D(cl_context /* context */, |
||||
cl_mem_flags /* flags */, |
||||
cl_GLenum /* target */, |
||||
cl_GLint /* miplevel */, |
||||
cl_GLuint /* texture */, |
||||
cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; |
||||
|
||||
extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL |
||||
clCreateFromGLTexture3D(cl_context /* context */, |
||||
cl_mem_flags /* flags */, |
||||
cl_GLenum /* target */, |
||||
cl_GLint /* miplevel */, |
||||
cl_GLuint /* texture */, |
||||
cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; |
||||
|
||||
/* cl_khr_gl_sharing extension */ |
||||
|
||||
#define cl_khr_gl_sharing 1 |
||||
|
||||
typedef cl_uint cl_gl_context_info; |
||||
|
||||
/* Additional Error Codes */ |
||||
#define CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR -1000 |
||||
|
||||
/* cl_gl_context_info */ |
||||
#define CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR 0x2006 |
||||
#define CL_DEVICES_FOR_GL_CONTEXT_KHR 0x2007 |
||||
|
||||
/* Additional cl_context_properties */ |
||||
#define CL_GL_CONTEXT_KHR 0x2008 |
||||
#define CL_EGL_DISPLAY_KHR 0x2009 |
||||
#define CL_GLX_DISPLAY_KHR 0x200A |
||||
#define CL_WGL_HDC_KHR 0x200B |
||||
#define CL_CGL_SHAREGROUP_KHR 0x200C |
||||
|
||||
extern CL_API_ENTRY cl_int CL_API_CALL |
||||
clGetGLContextInfoKHR(const cl_context_properties * /* properties */, |
||||
cl_gl_context_info /* param_name */, |
||||
size_t /* param_value_size */, |
||||
void * /* param_value */, |
||||
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0; |
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)( |
||||
const cl_context_properties * properties, |
||||
cl_gl_context_info param_name, |
||||
size_t param_value_size, |
||||
void * param_value, |
||||
size_t * param_value_size_ret); |
||||
|
||||
#ifdef __cplusplus |
||||
} |
||||
#endif |
||||
|
||||
#endif /* __OPENCL_CL_GL_H */ |
65
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/cl_gl_ext.h
generated
vendored
65
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/cl_gl_ext.h
generated
vendored
@ -0,0 +1,65 @@ |
||||
/**********************************************************************************
|
||||
* Copyright (c) 2008-2012 The Khronos Group Inc. |
||||
* |
||||
* Permission is hereby granted, free of charge, to any person obtaining a |
||||
* copy of this software and/or associated documentation files (the |
||||
* "Materials"), to deal in the Materials without restriction, including |
||||
* without limitation the rights to use, copy, modify, merge, publish, |
||||
* distribute, sublicense, and/or sell copies of the Materials, and to |
||||
* permit persons to whom the Materials are furnished to do so, subject to |
||||
* the following conditions: |
||||
* |
||||
* The above copyright notice and this permission notice shall be included |
||||
* in all copies or substantial portions of the Materials. |
||||
* |
||||
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY |
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
||||
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. |
||||
**********************************************************************************/ |
||||
|
||||
/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */ |
||||
|
||||
/* cl_gl_ext.h contains vendor (non-KHR) OpenCL extensions which have */ |
||||
/* OpenGL dependencies. */ |
||||
|
||||
#ifndef __OPENCL_CL_GL_EXT_H |
||||
#define __OPENCL_CL_GL_EXT_H |
||||
|
||||
#ifdef __cplusplus |
||||
extern "C" { |
||||
#endif |
||||
|
||||
#include <cl_gl.h> |
||||
|
||||
/*
|
||||
* For each extension, follow this template |
||||
* cl_VEN_extname extension */ |
||||
/* #define cl_VEN_extname 1
|
||||
* ... define new types, if any |
||||
* ... define new tokens, if any |
||||
* ... define new APIs, if any |
||||
* |
||||
* If you need GLtypes here, mirror them with a cl_GLtype, rather than including a GL header |
||||
* This allows us to avoid having to decide whether to include GL headers or GLES here. |
||||
*/ |
||||
|
||||
/*
|
||||
* cl_khr_gl_event extension |
||||
* See section 9.9 in the OpenCL 1.1 spec for more information |
||||
*/ |
||||
#define CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR 0x200D |
||||
|
||||
extern CL_API_ENTRY cl_event CL_API_CALL |
||||
clCreateEventFromGLsyncKHR(cl_context /* context */, |
||||
cl_GLsync /* cl_GLsync */, |
||||
cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1; |
||||
|
||||
#ifdef __cplusplus |
||||
} |
||||
#endif |
||||
|
||||
#endif /* __OPENCL_CL_GL_EXT_H */ |
1278
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/cl_platform.h
generated
vendored
1278
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/cl_platform.h
generated
vendored
File diff suppressed because it is too large
Load Diff
43
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/opencl.h
generated
vendored
43
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/opencl.h
generated
vendored
@ -0,0 +1,43 @@ |
||||
/*******************************************************************************
|
||||
* Copyright (c) 2008-2012 The Khronos Group Inc. |
||||
* |
||||
* Permission is hereby granted, free of charge, to any person obtaining a |
||||
* copy of this software and/or associated documentation files (the |
||||
* "Materials"), to deal in the Materials without restriction, including |
||||
* without limitation the rights to use, copy, modify, merge, publish, |
||||
* distribute, sublicense, and/or sell copies of the Materials, and to |
||||
* permit persons to whom the Materials are furnished to do so, subject to |
||||
* the following conditions: |
||||
* |
||||
* The above copyright notice and this permission notice shall be included |
||||
* in all copies or substantial portions of the Materials. |
||||
* |
||||
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY |
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
||||
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. |
||||
******************************************************************************/ |
||||
|
||||
/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */ |
||||
|
||||
#ifndef __OPENCL_H |
||||
#define __OPENCL_H |
||||
|
||||
#ifdef __cplusplus |
||||
extern "C" { |
||||
#endif |
||||
|
||||
#include <cl.h> |
||||
#include <cl_gl.h> |
||||
#include <cl_gl_ext.h> |
||||
#include <cl_ext.h> |
||||
|
||||
#ifdef __cplusplus |
||||
} |
||||
#endif |
||||
|
||||
#endif /* __OPENCL_H */ |
||||
|
@ -0,0 +1,83 @@ |
||||
// +build cl12
|
||||
|
||||
package cl |
||||
|
||||
// #ifdef __APPLE__
|
||||
// #include "OpenCL/opencl.h"
|
||||
// #else
|
||||
// #include "cl.h"
|
||||
// #endif
|
||||
import "C" |
||||
import ( |
||||
"image" |
||||
"unsafe" |
||||
) |
||||
|
||||
func (ctx *Context) CreateImage(flags MemFlag, imageFormat ImageFormat, imageDesc ImageDescription, data []byte) (*MemObject, error) { |
||||
format := imageFormat.toCl() |
||||
desc := imageDesc.toCl() |
||||
var dataPtr unsafe.Pointer |
||||
if data != nil { |
||||
dataPtr = unsafe.Pointer(&data[0]) |
||||
} |
||||
var err C.cl_int |
||||
clBuffer := C.clCreateImage(ctx.clContext, C.cl_mem_flags(flags), &format, &desc, dataPtr, &err) |
||||
if err != C.CL_SUCCESS { |
||||
return nil, toError(err) |
||||
} |
||||
if clBuffer == nil { |
||||
return nil, ErrUnknown |
||||
} |
||||
return newMemObject(clBuffer, len(data)), nil |
||||
} |
||||
|
||||
func (ctx *Context) CreateImageSimple(flags MemFlag, width, height int, channelOrder ChannelOrder, channelDataType ChannelDataType, data []byte) (*MemObject, error) { |
||||
format := ImageFormat{channelOrder, channelDataType} |
||||
desc := ImageDescription{ |
||||
Type: MemObjectTypeImage2D, |
||||
Width: width, |
||||
Height: height, |
||||
} |
||||
return ctx.CreateImage(flags, format, desc, data) |
||||
} |
||||
|
||||
func (ctx *Context) CreateImageFromImage(flags MemFlag, img image.Image) (*MemObject, error) { |
||||
switch m := img.(type) { |
||||
case *image.Gray: |
||||
format := ImageFormat{ChannelOrderIntensity, ChannelDataTypeUNormInt8} |
||||
desc := ImageDescription{ |
||||
Type: MemObjectTypeImage2D, |
||||
Width: m.Bounds().Dx(), |
||||
Height: m.Bounds().Dy(), |
||||
RowPitch: m.Stride, |
||||
} |
||||
return ctx.CreateImage(flags, format, desc, m.Pix) |
||||
case *image.RGBA: |
||||
format := ImageFormat{ChannelOrderRGBA, ChannelDataTypeUNormInt8} |
||||
desc := ImageDescription{ |
||||
Type: MemObjectTypeImage2D, |
||||
Width: m.Bounds().Dx(), |
||||
Height: m.Bounds().Dy(), |
||||
RowPitch: m.Stride, |
||||
} |
||||
return ctx.CreateImage(flags, format, desc, m.Pix) |
||||
} |
||||
|
||||
b := img.Bounds() |
||||
w := b.Dx() |
||||
h := b.Dy() |
||||
data := make([]byte, w*h*4) |
||||
dataOffset := 0 |
||||
for y := 0; y < h; y++ { |
||||
for x := 0; x < w; x++ { |
||||
c := img.At(x+b.Min.X, y+b.Min.Y) |
||||
r, g, b, a := c.RGBA() |
||||
data[dataOffset] = uint8(r >> 8) |
||||
data[dataOffset+1] = uint8(g >> 8) |
||||
data[dataOffset+2] = uint8(b >> 8) |
||||
data[dataOffset+3] = uint8(a >> 8) |
||||
dataOffset += 4 |
||||
} |
||||
} |
||||
return ctx.CreateImageSimple(flags, w, h, ChannelOrderRGBA, ChannelDataTypeUNormInt8, data) |
||||
} |
@ -0,0 +1,127 @@ |
||||
package cl |
||||
|
||||
// #ifdef __APPLE__
|
||||
// #include "OpenCL/opencl.h"
|
||||
// #else
|
||||
// #include "cl.h"
|
||||
// #endif
|
||||
import "C" |
||||
|
||||
import ( |
||||
"fmt" |
||||
"unsafe" |
||||
) |
||||
|
||||
type ErrUnsupportedArgumentType struct { |
||||
Index int |
||||
Value interface{} |
||||
} |
||||
|
||||
func (e ErrUnsupportedArgumentType) Error() string { |
||||
return fmt.Sprintf("cl: unsupported argument type for index %d: %+v", e.Index, e.Value) |
||||
} |
||||
|
||||
type Kernel struct { |
||||
clKernel C.cl_kernel |
||||
name string |
||||
} |
||||
|
||||
type LocalBuffer int |
||||
|
||||
func releaseKernel(k *Kernel) { |
||||
if k.clKernel != nil { |
||||
C.clReleaseKernel(k.clKernel) |
||||
k.clKernel = nil |
||||
} |
||||
} |
||||
|
||||
func (k *Kernel) Release() { |
||||
releaseKernel(k) |
||||
} |
||||
|
||||
func (k *Kernel) SetArgs(args ...interface{}) error { |
||||
for index, arg := range args { |
||||
if err := k.SetArg(index, arg); err != nil { |
||||
return err |
||||
} |
||||
} |
||||
return nil |
||||
} |
||||
|
||||
func (k *Kernel) SetArg(index int, arg interface{}) error { |
||||
switch val := arg.(type) { |
||||
case uint8: |
||||
return k.SetArgUint8(index, val) |
||||
case int8: |
||||
return k.SetArgInt8(index, val) |
||||
case uint32: |
||||
return k.SetArgUint32(index, val) |
||||
case uint64: |
||||
return k.SetArgUint64(index, val) |
||||
case int32: |
||||
return k.SetArgInt32(index, val) |
||||
case float32: |
||||
return k.SetArgFloat32(index, val) |
||||
case *MemObject: |
||||
return k.SetArgBuffer(index, val) |
||||
case LocalBuffer: |
||||
return k.SetArgLocal(index, int(val)) |
||||
default: |
||||
return ErrUnsupportedArgumentType{Index: index, Value: arg} |
||||
} |
||||
} |
||||
|
||||
func (k *Kernel) SetArgBuffer(index int, buffer *MemObject) error { |
||||
return k.SetArgUnsafe(index, int(unsafe.Sizeof(buffer.clMem)), unsafe.Pointer(&buffer.clMem)) |
||||
} |
||||
|
||||
func (k *Kernel) SetArgFloat32(index int, val float32) error { |
||||
return k.SetArgUnsafe(index, int(unsafe.Sizeof(val)), unsafe.Pointer(&val)) |
||||
} |
||||
|
||||
func (k *Kernel) SetArgInt8(index int, val int8) error { |
||||
return k.SetArgUnsafe(index, int(unsafe.Sizeof(val)), unsafe.Pointer(&val)) |
||||
} |
||||
|
||||
func (k *Kernel) SetArgUint8(index int, val uint8) error { |
||||
return k.SetArgUnsafe(index, int(unsafe.Sizeof(val)), unsafe.Pointer(&val)) |
||||
} |
||||
|
||||
func (k *Kernel) SetArgInt32(index int, val int32) error { |
||||
return k.SetArgUnsafe(index, int(unsafe.Sizeof(val)), unsafe.Pointer(&val)) |
||||
} |
||||
|
||||
func (k *Kernel) SetArgUint32(index int, val uint32) error { |
||||
return k.SetArgUnsafe(index, int(unsafe.Sizeof(val)), unsafe.Pointer(&val)) |
||||
} |
||||
|
||||
func (k *Kernel) SetArgUint64(index int, val uint64) error { |
||||
return k.SetArgUnsafe(index, int(unsafe.Sizeof(val)), unsafe.Pointer(&val)) |
||||
} |
||||
|
||||
func (k *Kernel) SetArgLocal(index int, size int) error { |
||||
return k.SetArgUnsafe(index, size, nil) |
||||
} |
||||
|
||||
func (k *Kernel) SetArgUnsafe(index, argSize int, arg unsafe.Pointer) error { |
||||
//fmt.Println("FUNKY: ", index, argSize)
|
||||
return toError(C.clSetKernelArg(k.clKernel, C.cl_uint(index), C.size_t(argSize), arg)) |
||||
} |
||||
|
||||
func (k *Kernel) PreferredWorkGroupSizeMultiple(device *Device) (int, error) { |
||||
var size C.size_t |
||||
err := C.clGetKernelWorkGroupInfo(k.clKernel, device.nullableId(), C.CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, C.size_t(unsafe.Sizeof(size)), unsafe.Pointer(&size), nil) |
||||
return int(size), toError(err) |
||||
} |
||||
|
||||
func (k *Kernel) WorkGroupSize(device *Device) (int, error) { |
||||
var size C.size_t |
||||
err := C.clGetKernelWorkGroupInfo(k.clKernel, device.nullableId(), C.CL_KERNEL_WORK_GROUP_SIZE, C.size_t(unsafe.Sizeof(size)), unsafe.Pointer(&size), nil) |
||||
return int(size), toError(err) |
||||
} |
||||
|
||||
func (k *Kernel) NumArgs() (int, error) { |
||||
var num C.cl_uint |
||||
err := C.clGetKernelInfo(k.clKernel, C.CL_KERNEL_NUM_ARGS, C.size_t(unsafe.Sizeof(num)), unsafe.Pointer(&num), nil) |
||||
return int(num), toError(err) |
||||
} |
@ -0,0 +1,7 @@ |
||||
// +build !cl12
|
||||
|
||||
package cl |
||||
|
||||
func (k *Kernel) ArgName(index int) (string, error) { |
||||
return "", ErrUnsupported |
||||
} |
@ -0,0 +1,20 @@ |
||||
// +build cl12
|
||||
|
||||
package cl |
||||
|
||||
// #ifdef __APPLE__
|
||||
// #include "OpenCL/opencl.h"
|
||||
// #else
|
||||
// #include "cl.h"
|
||||
// #endif
|
||||
import "C" |
||||
import "unsafe" |
||||
|
||||
func (k *Kernel) ArgName(index int) (string, error) { |
||||
var strC [1024]byte |
||||
var strN C.size_t |
||||
if err := C.clGetKernelArgInfo(k.clKernel, C.cl_uint(index), C.CL_KERNEL_ARG_NAME, 1024, unsafe.Pointer(&strC[0]), &strN); err != C.CL_SUCCESS { |
||||
return "", toError(err) |
||||
} |
||||
return string(strC[:strN]), nil |
||||
} |
@ -0,0 +1,83 @@ |
||||
package cl |
||||
|
||||
// #ifdef __APPLE__
|
||||
// #include "OpenCL/opencl.h"
|
||||
// #else
|
||||
// #include "cl.h"
|
||||
// #endif
|
||||
import "C" |
||||
|
||||
import "unsafe" |
||||
|
||||
const maxPlatforms = 32 |
||||
|
||||
type Platform struct { |
||||
id C.cl_platform_id |
||||
} |
||||
|
||||
// Obtain the list of platforms available.
|
||||
func GetPlatforms() ([]*Platform, error) { |
||||
var platformIds [maxPlatforms]C.cl_platform_id |
||||
var nPlatforms C.cl_uint |
||||
if err := C.clGetPlatformIDs(C.cl_uint(maxPlatforms), &platformIds[0], &nPlatforms); err != C.CL_SUCCESS { |
||||
return nil, toError(err) |
||||
} |
||||
platforms := make([]*Platform, nPlatforms) |
||||
for i := 0; i < int(nPlatforms); i++ { |
||||
platforms[i] = &Platform{id: platformIds[i]} |
||||
} |
||||
return platforms, nil |
||||
} |
||||
|
||||
func (p *Platform) GetDevices(deviceType DeviceType) ([]*Device, error) { |
||||
return GetDevices(p, deviceType) |
||||
} |
||||
|
||||
func (p *Platform) getInfoString(param C.cl_platform_info) (string, error) { |
||||
var strC [2048]byte |
||||
var strN C.size_t |
||||
if err := C.clGetPlatformInfo(p.id, param, 2048, unsafe.Pointer(&strC[0]), &strN); err != C.CL_SUCCESS { |
||||
return "", toError(err) |
||||
} |
||||
return string(strC[:(strN - 1)]), nil |
||||
} |
||||
|
||||
func (p *Platform) Name() string { |
||||
if str, err := p.getInfoString(C.CL_PLATFORM_NAME); err != nil { |
||||
panic("Platform.Name() should never fail") |
||||
} else { |
||||
return str |
||||
} |
||||
} |
||||
|
||||
func (p *Platform) Vendor() string { |
||||
if str, err := p.getInfoString(C.CL_PLATFORM_VENDOR); err != nil { |
||||
panic("Platform.Vendor() should never fail") |
||||
} else { |
||||
return str |
||||
} |
||||
} |
||||
|
||||
func (p *Platform) Profile() string { |
||||
if str, err := p.getInfoString(C.CL_PLATFORM_PROFILE); err != nil { |
||||
panic("Platform.Profile() should never fail") |
||||
} else { |
||||
return str |
||||
} |
||||
} |
||||
|
||||
func (p *Platform) Version() string { |
||||
if str, err := p.getInfoString(C.CL_PLATFORM_VERSION); err != nil { |
||||
panic("Platform.Version() should never fail") |
||||
} else { |
||||
return str |
||||
} |
||||
} |
||||
|
||||
func (p *Platform) Extensions() string { |
||||
if str, err := p.getInfoString(C.CL_PLATFORM_EXTENSIONS); err != nil { |
||||
panic("Platform.Extensions() should never fail") |
||||
} else { |
||||
return str |
||||
} |
||||
} |
@ -0,0 +1,105 @@ |
||||
package cl |
||||
|
||||
// #include <stdlib.h>
|
||||
// #ifdef __APPLE__
|
||||
// #include "OpenCL/opencl.h"
|
||||
// #else
|
||||
// #include "cl.h"
|
||||
// #endif
|
||||
import "C" |
||||
|
||||
import ( |
||||
"fmt" |
||||
"runtime" |
||||
"unsafe" |
||||
) |
||||
|
||||
type BuildError struct { |
||||
Message string |
||||
Device *Device |
||||
} |
||||
|
||||
func (e BuildError) Error() string { |
||||
if e.Device != nil { |
||||
return fmt.Sprintf("cl: build error on %q: %s", e.Device.Name(), e.Message) |
||||
} else { |
||||
return fmt.Sprintf("cl: build error: %s", e.Message) |
||||
} |
||||
} |
||||
|
||||
type Program struct { |
||||
clProgram C.cl_program |
||||
devices []*Device |
||||
} |
||||
|
||||
func releaseProgram(p *Program) { |
||||
if p.clProgram != nil { |
||||
C.clReleaseProgram(p.clProgram) |
||||
p.clProgram = nil |
||||
} |
||||
} |
||||
|
||||
func (p *Program) Release() { |
||||
releaseProgram(p) |
||||
} |
||||
|
||||
func (p *Program) BuildProgram(devices []*Device, options string) error { |
||||
var cOptions *C.char |
||||
if options != "" { |
||||
cOptions = C.CString(options) |
||||
defer C.free(unsafe.Pointer(cOptions)) |
||||
} |
||||
var deviceList []C.cl_device_id |
||||
var deviceListPtr *C.cl_device_id |
||||
numDevices := C.cl_uint(len(devices)) |
||||
if devices != nil && len(devices) > 0 { |
||||
deviceList = buildDeviceIdList(devices) |
||||
deviceListPtr = &deviceList[0] |
||||
} |
||||
if err := C.clBuildProgram(p.clProgram, numDevices, deviceListPtr, cOptions, nil, nil); err != C.CL_SUCCESS { |
||||
buffer := make([]byte, 4096) |
||||
var bLen C.size_t |
||||
var err C.cl_int |
||||
|
||||
for _, dev := range p.devices { |
||||
for i := 2; i >= 0; i-- { |
||||
err = C.clGetProgramBuildInfo(p.clProgram, dev.id, C.CL_PROGRAM_BUILD_LOG, C.size_t(len(buffer)), unsafe.Pointer(&buffer[0]), &bLen) |
||||
if err == C.CL_INVALID_VALUE && i > 0 && bLen < 1024*1024 { |
||||
// INVALID_VALUE probably means our buffer isn't large enough
|
||||
buffer = make([]byte, bLen) |
||||
} else { |
||||
break |
||||
} |
||||
} |
||||
if err != C.CL_SUCCESS { |
||||
return toError(err) |
||||
} |
||||
|
||||
if bLen > 1 { |
||||
return BuildError{ |
||||
Device: dev, |
||||
Message: string(buffer[:bLen-1]), |
||||
} |
||||
} |
||||
} |
||||
|
||||
return BuildError{ |
||||
Device: nil, |
||||
Message: "build failed and produced no log entries", |
||||
} |
||||
} |
||||
return nil |
||||
} |
||||
|
||||
func (p *Program) CreateKernel(name string) (*Kernel, error) { |
||||
cName := C.CString(name) |
||||
defer C.free(unsafe.Pointer(cName)) |
||||
var err C.cl_int |
||||
clKernel := C.clCreateKernel(p.clProgram, cName, &err) |
||||
if err != C.CL_SUCCESS { |
||||
return nil, toError(err) |
||||
} |
||||
kernel := &Kernel{clKernel: clKernel, name: name} |
||||
runtime.SetFinalizer(kernel, releaseKernel) |
||||
return kernel, nil |
||||
} |
@ -0,0 +1,193 @@ |
||||
package cl |
||||
|
||||
// #ifdef __APPLE__
|
||||
// #include "OpenCL/opencl.h"
|
||||
// #else
|
||||
// #include "cl.h"
|
||||
// #endif
|
||||
import "C" |
||||
|
||||
import "unsafe" |
||||
|
||||
type CommandQueueProperty int |
||||
|
||||
const ( |
||||
CommandQueueOutOfOrderExecModeEnable CommandQueueProperty = C.CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE |
||||
CommandQueueProfilingEnable CommandQueueProperty = C.CL_QUEUE_PROFILING_ENABLE |
||||
) |
||||
|
||||
type CommandQueue struct { |
||||
clQueue C.cl_command_queue |
||||
device *Device |
||||
} |
||||
|
||||
func releaseCommandQueue(q *CommandQueue) { |
||||
if q.clQueue != nil { |
||||
C.clReleaseCommandQueue(q.clQueue) |
||||
q.clQueue = nil |
||||
} |
||||
} |
||||
|
||||
// Call clReleaseCommandQueue on the CommandQueue. Using the CommandQueue after Release will cause a panick.
|
||||
func (q *CommandQueue) Release() { |
||||
releaseCommandQueue(q) |
||||
} |
||||
|
||||
// Blocks until all previously queued OpenCL commands in a command-queue are issued to the associated device and have completed.
|
||||
func (q *CommandQueue) Finish() error { |
||||
return toError(C.clFinish(q.clQueue)) |
||||
} |
||||
|
||||
// Issues all previously queued OpenCL commands in a command-queue to the device associated with the command-queue.
|
||||
func (q *CommandQueue) Flush() error { |
||||
return toError(C.clFlush(q.clQueue)) |
||||
} |
||||
|
||||
// Enqueues a command to map a region of the buffer object given by buffer into the host address space and returns a pointer to this mapped region.
|
||||
func (q *CommandQueue) EnqueueMapBuffer(buffer *MemObject, blocking bool, flags MapFlag, offset, size int, eventWaitList []*Event) (*MappedMemObject, *Event, error) { |
||||
var event C.cl_event |
||||
var err C.cl_int |
||||
ptr := C.clEnqueueMapBuffer(q.clQueue, buffer.clMem, clBool(blocking), flags.toCl(), C.size_t(offset), C.size_t(size), C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event, &err) |
||||
if err != C.CL_SUCCESS { |
||||
return nil, nil, toError(err) |
||||
} |
||||
ev := newEvent(event) |
||||
if ptr == nil { |
||||
return nil, ev, ErrUnknown |
||||
} |
||||
return &MappedMemObject{ptr: ptr, size: size}, ev, nil |
||||
} |
||||
|
||||
// Enqueues a command to map a region of an image object into the host address space and returns a pointer to this mapped region.
|
||||
func (q *CommandQueue) EnqueueMapImage(buffer *MemObject, blocking bool, flags MapFlag, origin, region [3]int, eventWaitList []*Event) (*MappedMemObject, *Event, error) { |
||||
cOrigin := sizeT3(origin) |
||||
cRegion := sizeT3(region) |
||||
var event C.cl_event |
||||
var err C.cl_int |
||||
var rowPitch, slicePitch C.size_t |
||||
ptr := C.clEnqueueMapImage(q.clQueue, buffer.clMem, clBool(blocking), flags.toCl(), &cOrigin[0], &cRegion[0], &rowPitch, &slicePitch, C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event, &err) |
||||
if err != C.CL_SUCCESS { |
||||
return nil, nil, toError(err) |
||||
} |
||||
ev := newEvent(event) |
||||
if ptr == nil { |
||||
return nil, ev, ErrUnknown |
||||
} |
||||
size := 0 // TODO: could calculate this
|
||||
return &MappedMemObject{ptr: ptr, size: size, rowPitch: int(rowPitch), slicePitch: int(slicePitch)}, ev, nil |
||||
} |
||||
|
||||
// Enqueues a command to unmap a previously mapped region of a memory object.
|
||||
func (q *CommandQueue) EnqueueUnmapMemObject(buffer *MemObject, mappedObj *MappedMemObject, eventWaitList []*Event) (*Event, error) { |
||||
var event C.cl_event |
||||
if err := C.clEnqueueUnmapMemObject(q.clQueue, buffer.clMem, mappedObj.ptr, C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event); err != C.CL_SUCCESS { |
||||
return nil, toError(err) |
||||
} |
||||
return newEvent(event), nil |
||||
} |
||||
|
||||
// Enqueues a command to copy a buffer object to another buffer object.
|
||||
func (q *CommandQueue) EnqueueCopyBuffer(srcBuffer, dstBuffer *MemObject, srcOffset, dstOffset, byteCount int, eventWaitList []*Event) (*Event, error) { |
||||
var event C.cl_event |
||||
err := toError(C.clEnqueueCopyBuffer(q.clQueue, srcBuffer.clMem, dstBuffer.clMem, C.size_t(srcOffset), C.size_t(dstOffset), C.size_t(byteCount), C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event)) |
||||
return newEvent(event), err |
||||
} |
||||
|
||||
// Enqueue commands to write to a buffer object from host memory.
|
||||
func (q *CommandQueue) EnqueueWriteBuffer(buffer *MemObject, blocking bool, offset, dataSize int, dataPtr unsafe.Pointer, eventWaitList []*Event) (*Event, error) { |
||||
var event C.cl_event |
||||
err := toError(C.clEnqueueWriteBuffer(q.clQueue, buffer.clMem, clBool(blocking), C.size_t(offset), C.size_t(dataSize), dataPtr, C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event)) |
||||
return newEvent(event), err |
||||
} |
||||
|
||||
func (q *CommandQueue) EnqueueWriteBufferFloat32(buffer *MemObject, blocking bool, offset int, data []float32, eventWaitList []*Event) (*Event, error) { |
||||
dataPtr := unsafe.Pointer(&data[0]) |
||||
dataSize := int(unsafe.Sizeof(data[0])) * len(data) |
||||
return q.EnqueueWriteBuffer(buffer, blocking, offset, dataSize, dataPtr, eventWaitList) |
||||
} |
||||
|
||||
// Enqueue commands to read from a buffer object to host memory.
|
||||
func (q *CommandQueue) EnqueueReadBuffer(buffer *MemObject, blocking bool, offset, dataSize int, dataPtr unsafe.Pointer, eventWaitList []*Event) (*Event, error) { |
||||
var event C.cl_event |
||||
err := toError(C.clEnqueueReadBuffer(q.clQueue, buffer.clMem, clBool(blocking), C.size_t(offset), C.size_t(dataSize), dataPtr, C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event)) |
||||
return newEvent(event), err |
||||
} |
||||
|
||||
func (q *CommandQueue) EnqueueReadBufferFloat32(buffer *MemObject, blocking bool, offset int, data []float32, eventWaitList []*Event) (*Event, error) { |
||||
dataPtr := unsafe.Pointer(&data[0]) |
||||
dataSize := int(unsafe.Sizeof(data[0])) * len(data) |
||||
return q.EnqueueReadBuffer(buffer, blocking, offset, dataSize, dataPtr, eventWaitList) |
||||
} |
||||
|
||||
// Enqueues a command to execute a kernel on a device.
|
||||
func (q *CommandQueue) EnqueueNDRangeKernel(kernel *Kernel, globalWorkOffset, globalWorkSize, localWorkSize []int, eventWaitList []*Event) (*Event, error) { |
||||
workDim := len(globalWorkSize) |
||||
var globalWorkOffsetList []C.size_t |
||||
var globalWorkOffsetPtr *C.size_t |
||||
if globalWorkOffset != nil { |
||||
globalWorkOffsetList = make([]C.size_t, len(globalWorkOffset)) |
||||
for i, off := range globalWorkOffset { |
||||
globalWorkOffsetList[i] = C.size_t(off) |
||||
} |
||||
globalWorkOffsetPtr = &globalWorkOffsetList[0] |
||||
} |
||||
var globalWorkSizeList []C.size_t |
||||
var globalWorkSizePtr *C.size_t |
||||
if globalWorkSize != nil { |
||||
globalWorkSizeList = make([]C.size_t, len(globalWorkSize)) |
||||
for i, off := range globalWorkSize { |
||||
globalWorkSizeList[i] = C.size_t(off) |
||||
} |
||||
globalWorkSizePtr = &globalWorkSizeList[0] |
||||
} |
||||
var localWorkSizeList []C.size_t |
||||
var localWorkSizePtr *C.size_t |
||||
if localWorkSize != nil { |
||||
localWorkSizeList = make([]C.size_t, len(localWorkSize)) |
||||
for i, off := range localWorkSize { |
||||
localWorkSizeList[i] = C.size_t(off) |
||||
} |
||||
localWorkSizePtr = &localWorkSizeList[0] |
||||
} |
||||
var event C.cl_event |
||||
err := toError(C.clEnqueueNDRangeKernel(q.clQueue, kernel.clKernel, C.cl_uint(workDim), globalWorkOffsetPtr, globalWorkSizePtr, localWorkSizePtr, C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event)) |
||||
return newEvent(event), err |
||||
} |
||||
|
||||
// Enqueues a command to read from a 2D or 3D image object to host memory.
|
||||
func (q *CommandQueue) EnqueueReadImage(image *MemObject, blocking bool, origin, region [3]int, rowPitch, slicePitch int, data []byte, eventWaitList []*Event) (*Event, error) { |
||||
cOrigin := sizeT3(origin) |
||||
cRegion := sizeT3(region) |
||||
var event C.cl_event |
||||
err := toError(C.clEnqueueReadImage(q.clQueue, image.clMem, clBool(blocking), &cOrigin[0], &cRegion[0], C.size_t(rowPitch), C.size_t(slicePitch), unsafe.Pointer(&data[0]), C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event)) |
||||
return newEvent(event), err |
||||
} |
||||
|
||||
// Enqueues a command to write from a 2D or 3D image object to host memory.
|
||||
func (q *CommandQueue) EnqueueWriteImage(image *MemObject, blocking bool, origin, region [3]int, rowPitch, slicePitch int, data []byte, eventWaitList []*Event) (*Event, error) { |
||||
cOrigin := sizeT3(origin) |
||||
cRegion := sizeT3(region) |
||||
var event C.cl_event |
||||
err := toError(C.clEnqueueWriteImage(q.clQueue, image.clMem, clBool(blocking), &cOrigin[0], &cRegion[0], C.size_t(rowPitch), C.size_t(slicePitch), unsafe.Pointer(&data[0]), C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event)) |
||||
return newEvent(event), err |
||||
} |
||||
|
||||
func (q *CommandQueue) EnqueueFillBuffer(buffer *MemObject, pattern unsafe.Pointer, patternSize, offset, size int, eventWaitList []*Event) (*Event, error) { |
||||
var event C.cl_event |
||||
err := toError(C.clEnqueueFillBuffer(q.clQueue, buffer.clMem, pattern, C.size_t(patternSize), C.size_t(offset), C.size_t(size), C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event)) |
||||
return newEvent(event), err |
||||
} |
||||
|
||||
// A synchronization point that enqueues a barrier operation.
|
||||
func (q *CommandQueue) EnqueueBarrierWithWaitList(eventWaitList []*Event) (*Event, error) { |
||||
var event C.cl_event |
||||
err := toError(C.clEnqueueBarrierWithWaitList(q.clQueue, C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event)) |
||||
return newEvent(event), err |
||||
} |
||||
|
||||
// Enqueues a marker command which waits for either a list of events to complete, or all previously enqueued commands to complete.
|
||||
func (q *CommandQueue) EnqueueMarkerWithWaitList(eventWaitList []*Event) (*Event, error) { |
||||
var event C.cl_event |
||||
err := toError(C.clEnqueueMarkerWithWaitList(q.clQueue, C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event)) |
||||
return newEvent(event), err |
||||
} |
@ -0,0 +1,487 @@ |
||||
package cl |
||||
|
||||
// #ifdef __APPLE__
|
||||
// #include "OpenCL/opencl.h"
|
||||
// #else
|
||||
// #include "cl.h"
|
||||
// #endif
|
||||
import "C" |
||||
|
||||
import ( |
||||
"errors" |
||||
"fmt" |
||||
"reflect" |
||||
"runtime" |
||||
"strings" |
||||
"unsafe" |
||||
) |
||||
|
||||
var ( |
||||
ErrUnknown = errors.New("cl: unknown error") // Generally an unexpected result from an OpenCL function (e.g. CL_SUCCESS but null pointer)
|
||||
) |
||||
|
||||
type ErrOther int |
||||
|
||||
func (e ErrOther) Error() string { |
||||
return fmt.Sprintf("cl: error %d", int(e)) |
||||
} |
||||
|
||||
var ( |
||||
ErrDeviceNotFound = errors.New("cl: Device Not Found") |
||||
ErrDeviceNotAvailable = errors.New("cl: Device Not Available") |
||||
ErrCompilerNotAvailable = errors.New("cl: Compiler Not Available") |
||||
ErrMemObjectAllocationFailure = errors.New("cl: Mem Object Allocation Failure") |
||||
ErrOutOfResources = errors.New("cl: Out Of Resources") |
||||
ErrOutOfHostMemory = errors.New("cl: Out Of Host Memory") |
||||
ErrProfilingInfoNotAvailable = errors.New("cl: Profiling Info Not Available") |
||||
ErrMemCopyOverlap = errors.New("cl: Mem Copy Overlap") |
||||
ErrImageFormatMismatch = errors.New("cl: Image Format Mismatch") |
||||
ErrImageFormatNotSupported = errors.New("cl: Image Format Not Supported") |
||||
ErrBuildProgramFailure = errors.New("cl: Build Program Failure") |
||||
ErrMapFailure = errors.New("cl: Map Failure") |
||||
ErrMisalignedSubBufferOffset = errors.New("cl: Misaligned Sub Buffer Offset") |
||||
ErrExecStatusErrorForEventsInWaitList = errors.New("cl: Exec Status Error For Events In Wait List") |
||||
ErrCompileProgramFailure = errors.New("cl: Compile Program Failure") |
||||
ErrLinkerNotAvailable = errors.New("cl: Linker Not Available") |
||||
ErrLinkProgramFailure = errors.New("cl: Link Program Failure") |
||||
ErrDevicePartitionFailed = errors.New("cl: Device Partition Failed") |
||||
ErrKernelArgInfoNotAvailable = errors.New("cl: Kernel Arg Info Not Available") |
||||
ErrInvalidValue = errors.New("cl: Invalid Value") |
||||
ErrInvalidDeviceType = errors.New("cl: Invalid Device Type") |
||||
ErrInvalidPlatform = errors.New("cl: Invalid Platform") |
||||
ErrInvalidDevice = errors.New("cl: Invalid Device") |
||||
ErrInvalidContext = errors.New("cl: Invalid Context") |
||||
ErrInvalidQueueProperties = errors.New("cl: Invalid Queue Properties") |
||||
ErrInvalidCommandQueue = errors.New("cl: Invalid Command Queue") |
||||
ErrInvalidHostPtr = errors.New("cl: Invalid Host Ptr") |
||||
ErrInvalidMemObject = errors.New("cl: Invalid Mem Object") |
||||
ErrInvalidImageFormatDescriptor = errors.New("cl: Invalid Image Format Descriptor") |
||||
ErrInvalidImageSize = errors.New("cl: Invalid Image Size") |
||||
ErrInvalidSampler = errors.New("cl: Invalid Sampler") |
||||
ErrInvalidBinary = errors.New("cl: Invalid Binary") |
||||
ErrInvalidBuildOptions = errors.New("cl: Invalid Build Options") |
||||
ErrInvalidProgram = errors.New("cl: Invalid Program") |
||||
ErrInvalidProgramExecutable = errors.New("cl: Invalid Program Executable") |
||||
ErrInvalidKernelName = errors.New("cl: Invalid Kernel Name") |
||||
ErrInvalidKernelDefinition = errors.New("cl: Invalid Kernel Definition") |
||||
ErrInvalidKernel = errors.New("cl: Invalid Kernel") |
||||
ErrInvalidArgIndex = errors.New("cl: Invalid Arg Index") |
||||
ErrInvalidArgValue = errors.New("cl: Invalid Arg Value") |
||||
ErrInvalidArgSize = errors.New("cl: Invalid Arg Size") |
||||
ErrInvalidKernelArgs = errors.New("cl: Invalid Kernel Args") |
||||
ErrInvalidWorkDimension = errors.New("cl: Invalid Work Dimension") |
||||
ErrInvalidWorkGroupSize = errors.New("cl: Invalid Work Group Size") |
||||
ErrInvalidWorkItemSize = errors.New("cl: Invalid Work Item Size") |
||||
ErrInvalidGlobalOffset = errors.New("cl: Invalid Global Offset") |
||||
ErrInvalidEventWaitList = errors.New("cl: Invalid Event Wait List") |
||||
ErrInvalidEvent = errors.New("cl: Invalid Event") |
||||
ErrInvalidOperation = errors.New("cl: Invalid Operation") |
||||
ErrInvalidGlObject = errors.New("cl: Invalid Gl Object") |
||||
ErrInvalidBufferSize = errors.New("cl: Invalid Buffer Size") |
||||
ErrInvalidMipLevel = errors.New("cl: Invalid Mip Level") |
||||
ErrInvalidGlobalWorkSize = errors.New("cl: Invalid Global Work Size") |
||||
ErrInvalidProperty = errors.New("cl: Invalid Property") |
||||
ErrInvalidImageDescriptor = errors.New("cl: Invalid Image Descriptor") |
||||
ErrInvalidCompilerOptions = errors.New("cl: Invalid Compiler Options") |
||||
ErrInvalidLinkerOptions = errors.New("cl: Invalid Linker Options") |
||||
ErrInvalidDevicePartitionCount = errors.New("cl: Invalid Device Partition Count") |
||||
) |
||||
var errorMap = map[C.cl_int]error{ |
||||
C.CL_SUCCESS: nil, |
||||
C.CL_DEVICE_NOT_FOUND: ErrDeviceNotFound, |
||||
C.CL_DEVICE_NOT_AVAILABLE: ErrDeviceNotAvailable, |
||||
C.CL_COMPILER_NOT_AVAILABLE: ErrCompilerNotAvailable, |
||||
C.CL_MEM_OBJECT_ALLOCATION_FAILURE: ErrMemObjectAllocationFailure, |
||||
C.CL_OUT_OF_RESOURCES: ErrOutOfResources, |
||||
C.CL_OUT_OF_HOST_MEMORY: ErrOutOfHostMemory, |
||||
C.CL_PROFILING_INFO_NOT_AVAILABLE: ErrProfilingInfoNotAvailable, |
||||
C.CL_MEM_COPY_OVERLAP: ErrMemCopyOverlap, |
||||
C.CL_IMAGE_FORMAT_MISMATCH: ErrImageFormatMismatch, |
||||
C.CL_IMAGE_FORMAT_NOT_SUPPORTED: ErrImageFormatNotSupported, |
||||
C.CL_BUILD_PROGRAM_FAILURE: ErrBuildProgramFailure, |
||||
C.CL_MAP_FAILURE: ErrMapFailure, |
||||
C.CL_MISALIGNED_SUB_BUFFER_OFFSET: ErrMisalignedSubBufferOffset, |
||||
C.CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST: ErrExecStatusErrorForEventsInWaitList, |
||||
C.CL_INVALID_VALUE: ErrInvalidValue, |
||||
C.CL_INVALID_DEVICE_TYPE: ErrInvalidDeviceType, |
||||
C.CL_INVALID_PLATFORM: ErrInvalidPlatform, |
||||
C.CL_INVALID_DEVICE: ErrInvalidDevice, |
||||
C.CL_INVALID_CONTEXT: ErrInvalidContext, |
||||
C.CL_INVALID_QUEUE_PROPERTIES: ErrInvalidQueueProperties, |
||||
C.CL_INVALID_COMMAND_QUEUE: ErrInvalidCommandQueue, |
||||
C.CL_INVALID_HOST_PTR: ErrInvalidHostPtr, |
||||
C.CL_INVALID_MEM_OBJECT: ErrInvalidMemObject, |
||||
C.CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: ErrInvalidImageFormatDescriptor, |
||||
C.CL_INVALID_IMAGE_SIZE: ErrInvalidImageSize, |
||||
C.CL_INVALID_SAMPLER: ErrInvalidSampler, |
||||
C.CL_INVALID_BINARY: ErrInvalidBinary, |
||||
C.CL_INVALID_BUILD_OPTIONS: ErrInvalidBuildOptions, |
||||
C.CL_INVALID_PROGRAM: ErrInvalidProgram, |
||||
C.CL_INVALID_PROGRAM_EXECUTABLE: ErrInvalidProgramExecutable, |
||||
C.CL_INVALID_KERNEL_NAME: ErrInvalidKernelName, |
||||
C.CL_INVALID_KERNEL_DEFINITION: ErrInvalidKernelDefinition, |
||||
C.CL_INVALID_KERNEL: ErrInvalidKernel, |
||||
C.CL_INVALID_ARG_INDEX: ErrInvalidArgIndex, |
||||
C.CL_INVALID_ARG_VALUE: ErrInvalidArgValue, |
||||
C.CL_INVALID_ARG_SIZE: ErrInvalidArgSize, |
||||
C.CL_INVALID_KERNEL_ARGS: ErrInvalidKernelArgs, |
||||
C.CL_INVALID_WORK_DIMENSION: ErrInvalidWorkDimension, |
||||
C.CL_INVALID_WORK_GROUP_SIZE: ErrInvalidWorkGroupSize, |
||||
C.CL_INVALID_WORK_ITEM_SIZE: ErrInvalidWorkItemSize, |
||||
C.CL_INVALID_GLOBAL_OFFSET: ErrInvalidGlobalOffset, |
||||
C.CL_INVALID_EVENT_WAIT_LIST: ErrInvalidEventWaitList, |
||||
C.CL_INVALID_EVENT: ErrInvalidEvent, |
||||
C.CL_INVALID_OPERATION: ErrInvalidOperation, |
||||
C.CL_INVALID_GL_OBJECT: ErrInvalidGlObject, |
||||
C.CL_INVALID_BUFFER_SIZE: ErrInvalidBufferSize, |
||||
C.CL_INVALID_MIP_LEVEL: ErrInvalidMipLevel, |
||||
C.CL_INVALID_GLOBAL_WORK_SIZE: ErrInvalidGlobalWorkSize, |
||||
C.CL_INVALID_PROPERTY: ErrInvalidProperty, |
||||
} |
||||
|
||||
func toError(code C.cl_int) error { |
||||
if err, ok := errorMap[code]; ok { |
||||
return err |
||||
} |
||||
return ErrOther(code) |
||||
} |
||||
|
||||
type LocalMemType int |
||||
|
||||
const ( |
||||
LocalMemTypeNone LocalMemType = C.CL_NONE |
||||
LocalMemTypeGlobal LocalMemType = C.CL_GLOBAL |
||||
LocalMemTypeLocal LocalMemType = C.CL_LOCAL |
||||
) |
||||
|
||||
var localMemTypeMap = map[LocalMemType]string{ |
||||
LocalMemTypeNone: "None", |
||||
LocalMemTypeGlobal: "Global", |
||||
LocalMemTypeLocal: "Local", |
||||
} |
||||
|
||||
func (t LocalMemType) String() string { |
||||
name := localMemTypeMap[t] |
||||
if name == "" { |
||||
name = "Unknown" |
||||
} |
||||
return name |
||||
} |
||||
|
||||
type ExecCapability int |
||||
|
||||
const ( |
||||
ExecCapabilityKernel ExecCapability = C.CL_EXEC_KERNEL // The OpenCL device can execute OpenCL kernels.
|
||||
ExecCapabilityNativeKernel ExecCapability = C.CL_EXEC_NATIVE_KERNEL // The OpenCL device can execute native kernels.
|
||||
) |
||||
|
||||
func (ec ExecCapability) String() string { |
||||
var parts []string |
||||
if ec&ExecCapabilityKernel != 0 { |
||||
parts = append(parts, "Kernel") |
||||
} |
||||
if ec&ExecCapabilityNativeKernel != 0 { |
||||
parts = append(parts, "NativeKernel") |
||||
} |
||||
if parts == nil { |
||||
return "" |
||||
} |
||||
return strings.Join(parts, "|") |
||||
} |
||||
|
||||
type MemCacheType int |
||||
|
||||
const ( |
||||
MemCacheTypeNone MemCacheType = C.CL_NONE |
||||
MemCacheTypeReadOnlyCache MemCacheType = C.CL_READ_ONLY_CACHE |
||||
MemCacheTypeReadWriteCache MemCacheType = C.CL_READ_WRITE_CACHE |
||||
) |
||||
|
||||
func (ct MemCacheType) String() string { |
||||
switch ct { |
||||
case MemCacheTypeNone: |
||||
return "None" |
||||
case MemCacheTypeReadOnlyCache: |
||||
return "ReadOnly" |
||||
case MemCacheTypeReadWriteCache: |
||||
return "ReadWrite" |
||||
} |
||||
return fmt.Sprintf("Unknown(%x)", int(ct)) |
||||
} |
||||
|
||||
type MemFlag int |
||||
|
||||
const ( |
||||
MemReadWrite MemFlag = C.CL_MEM_READ_WRITE |
||||
MemWriteOnly MemFlag = C.CL_MEM_WRITE_ONLY |
||||
MemReadOnly MemFlag = C.CL_MEM_READ_ONLY |
||||
MemUseHostPtr MemFlag = C.CL_MEM_USE_HOST_PTR |
||||
MemAllocHostPtr MemFlag = C.CL_MEM_ALLOC_HOST_PTR |
||||
MemCopyHostPtr MemFlag = C.CL_MEM_COPY_HOST_PTR |
||||
|
||||
MemWriteOnlyHost MemFlag = C.CL_MEM_HOST_WRITE_ONLY |
||||
MemReadOnlyHost MemFlag = C.CL_MEM_HOST_READ_ONLY |
||||
MemNoAccessHost MemFlag = C.CL_MEM_HOST_NO_ACCESS |
||||
) |
||||
|
||||
type MemObjectType int |
||||
|
||||
const ( |
||||
MemObjectTypeBuffer MemObjectType = C.CL_MEM_OBJECT_BUFFER |
||||
MemObjectTypeImage2D MemObjectType = C.CL_MEM_OBJECT_IMAGE2D |
||||
MemObjectTypeImage3D MemObjectType = C.CL_MEM_OBJECT_IMAGE3D |
||||
) |
||||
|
||||
type MapFlag int |
||||
|
||||
const ( |
||||
// This flag specifies that the region being mapped in the memory object is being mapped for reading.
|
||||
MapFlagRead MapFlag = C.CL_MAP_READ |
||||
MapFlagWrite MapFlag = C.CL_MAP_WRITE |
||||
MapFlagWriteInvalidateRegion MapFlag = C.CL_MAP_WRITE_INVALIDATE_REGION |
||||
) |
||||
|
||||
func (mf MapFlag) toCl() C.cl_map_flags { |
||||
return C.cl_map_flags(mf) |
||||
} |
||||
|
||||
type ChannelOrder int |
||||
|
||||
const ( |
||||
ChannelOrderR ChannelOrder = C.CL_R |
||||
ChannelOrderA ChannelOrder = C.CL_A |
||||
ChannelOrderRG ChannelOrder = C.CL_RG |
||||
ChannelOrderRA ChannelOrder = C.CL_RA |
||||
ChannelOrderRGB ChannelOrder = C.CL_RGB |
||||
ChannelOrderRGBA ChannelOrder = C.CL_RGBA |
||||
ChannelOrderBGRA ChannelOrder = C.CL_BGRA |
||||
ChannelOrderARGB ChannelOrder = C.CL_ARGB |
||||
ChannelOrderIntensity ChannelOrder = C.CL_INTENSITY |
||||
ChannelOrderLuminance ChannelOrder = C.CL_LUMINANCE |
||||
ChannelOrderRx ChannelOrder = C.CL_Rx |
||||
ChannelOrderRGx ChannelOrder = C.CL_RGx |
||||
ChannelOrderRGBx ChannelOrder = C.CL_RGBx |
||||
) |
||||
|
||||
var channelOrderNameMap = map[ChannelOrder]string{ |
||||
ChannelOrderR: "R", |
||||
ChannelOrderA: "A", |
||||
ChannelOrderRG: "RG", |
||||
ChannelOrderRA: "RA", |
||||
ChannelOrderRGB: "RGB", |
||||
ChannelOrderRGBA: "RGBA", |
||||
ChannelOrderBGRA: "BGRA", |
||||
ChannelOrderARGB: "ARGB", |
||||
ChannelOrderIntensity: "Intensity", |
||||
ChannelOrderLuminance: "Luminance", |
||||
ChannelOrderRx: "Rx", |
||||
ChannelOrderRGx: "RGx", |
||||
ChannelOrderRGBx: "RGBx", |
||||
} |
||||
|
||||
func (co ChannelOrder) String() string { |
||||
name := channelOrderNameMap[co] |
||||
if name == "" { |
||||
name = fmt.Sprintf("Unknown(%x)", int(co)) |
||||
} |
||||
return name |
||||
} |
||||
|
||||
type ChannelDataType int |
||||
|
||||
const ( |
||||
ChannelDataTypeSNormInt8 ChannelDataType = C.CL_SNORM_INT8 |
||||
ChannelDataTypeSNormInt16 ChannelDataType = C.CL_SNORM_INT16 |
||||
ChannelDataTypeUNormInt8 ChannelDataType = C.CL_UNORM_INT8 |
||||
ChannelDataTypeUNormInt16 ChannelDataType = C.CL_UNORM_INT16 |
||||
ChannelDataTypeUNormShort565 ChannelDataType = C.CL_UNORM_SHORT_565 |
||||
ChannelDataTypeUNormShort555 ChannelDataType = C.CL_UNORM_SHORT_555 |
||||
ChannelDataTypeUNormInt101010 ChannelDataType = C.CL_UNORM_INT_101010 |
||||
ChannelDataTypeSignedInt8 ChannelDataType = C.CL_SIGNED_INT8 |
||||
ChannelDataTypeSignedInt16 ChannelDataType = C.CL_SIGNED_INT16 |
||||
ChannelDataTypeSignedInt32 ChannelDataType = C.CL_SIGNED_INT32 |
||||
ChannelDataTypeUnsignedInt8 ChannelDataType = C.CL_UNSIGNED_INT8 |
||||
ChannelDataTypeUnsignedInt16 ChannelDataType = C.CL_UNSIGNED_INT16 |
||||
ChannelDataTypeUnsignedInt32 ChannelDataType = C.CL_UNSIGNED_INT32 |
||||
ChannelDataTypeHalfFloat ChannelDataType = C.CL_HALF_FLOAT |
||||
ChannelDataTypeFloat ChannelDataType = C.CL_FLOAT |
||||
) |
||||
|
||||
var channelDataTypeNameMap = map[ChannelDataType]string{ |
||||
ChannelDataTypeSNormInt8: "SNormInt8", |
||||
ChannelDataTypeSNormInt16: "SNormInt16", |
||||
ChannelDataTypeUNormInt8: "UNormInt8", |
||||
ChannelDataTypeUNormInt16: "UNormInt16", |
||||
ChannelDataTypeUNormShort565: "UNormShort565", |
||||
ChannelDataTypeUNormShort555: "UNormShort555", |
||||
ChannelDataTypeUNormInt101010: "UNormInt101010", |
||||
ChannelDataTypeSignedInt8: "SignedInt8", |
||||
ChannelDataTypeSignedInt16: "SignedInt16", |
||||
ChannelDataTypeSignedInt32: "SignedInt32", |
||||
ChannelDataTypeUnsignedInt8: "UnsignedInt8", |
||||
ChannelDataTypeUnsignedInt16: "UnsignedInt16", |
||||
ChannelDataTypeUnsignedInt32: "UnsignedInt32", |
||||
ChannelDataTypeHalfFloat: "HalfFloat", |
||||
ChannelDataTypeFloat: "Float", |
||||
} |
||||
|
||||
func (ct ChannelDataType) String() string { |
||||
name := channelDataTypeNameMap[ct] |
||||
if name == "" { |
||||
name = fmt.Sprintf("Unknown(%x)", int(ct)) |
||||
} |
||||
return name |
||||
} |
||||
|
||||
type ImageFormat struct { |
||||
ChannelOrder ChannelOrder |
||||
ChannelDataType ChannelDataType |
||||
} |
||||
|
||||
func (f ImageFormat) toCl() C.cl_image_format { |
||||
var format C.cl_image_format |
||||
format.image_channel_order = C.cl_channel_order(f.ChannelOrder) |
||||
format.image_channel_data_type = C.cl_channel_type(f.ChannelDataType) |
||||
return format |
||||
} |
||||
|
||||
type ProfilingInfo int |
||||
|
||||
const ( |
||||
// A 64-bit value that describes the current device time counter in
|
||||
// nanoseconds when the command identified by event is enqueued in
|
||||
// a command-queue by the host.
|
||||
ProfilingInfoCommandQueued ProfilingInfo = C.CL_PROFILING_COMMAND_QUEUED |
||||
// A 64-bit value that describes the current device time counter in
|
||||
// nanoseconds when the command identified by event that has been
|
||||
// enqueued is submitted by the host to the device associated with the command-queue.
|
||||
ProfilingInfoCommandSubmit ProfilingInfo = C.CL_PROFILING_COMMAND_SUBMIT |
||||
// A 64-bit value that describes the current device time counter in
|
||||
// nanoseconds when the command identified by event starts execution on the device.
|
||||
ProfilingInfoCommandStart ProfilingInfo = C.CL_PROFILING_COMMAND_START |
||||
// A 64-bit value that describes the current device time counter in
|
||||
// nanoseconds when the command identified by event has finished
|
||||
// execution on the device.
|
||||
ProfilingInfoCommandEnd ProfilingInfo = C.CL_PROFILING_COMMAND_END |
||||
) |
||||
|
||||
type CommmandExecStatus int |
||||
|
||||
const ( |
||||
CommmandExecStatusComplete CommmandExecStatus = C.CL_COMPLETE |
||||
CommmandExecStatusRunning CommmandExecStatus = C.CL_RUNNING |
||||
CommmandExecStatusSubmitted CommmandExecStatus = C.CL_SUBMITTED |
||||
CommmandExecStatusQueued CommmandExecStatus = C.CL_QUEUED |
||||
) |
||||
|
||||
type Event struct { |
||||
clEvent C.cl_event |
||||
} |
||||
|
||||
func releaseEvent(ev *Event) { |
||||
if ev.clEvent != nil { |
||||
C.clReleaseEvent(ev.clEvent) |
||||
ev.clEvent = nil |
||||
} |
||||
} |
||||
|
||||
func (e *Event) Release() { |
||||
releaseEvent(e) |
||||
} |
||||
|
||||
func (e *Event) GetEventProfilingInfo(paramName ProfilingInfo) (int64, error) { |
||||
var paramValue C.cl_ulong |
||||
if err := C.clGetEventProfilingInfo(e.clEvent, C.cl_profiling_info(paramName), C.size_t(unsafe.Sizeof(paramValue)), unsafe.Pointer(¶mValue), nil); err != C.CL_SUCCESS { |
||||
return 0, toError(err) |
||||
} |
||||
return int64(paramValue), nil |
||||
} |
||||
|
||||
// Sets the execution status of a user event object.
|
||||
//
|
||||
// `status` specifies the new execution status to be set and
|
||||
// can be CL_COMPLETE or a negative integer value to indicate
|
||||
// an error. A negative integer value causes all enqueued commands
|
||||
// that wait on this user event to be terminated. clSetUserEventStatus
|
||||
// can only be called once to change the execution status of event.
|
||||
func (e *Event) SetUserEventStatus(status int) error { |
||||
return toError(C.clSetUserEventStatus(e.clEvent, C.cl_int(status))) |
||||
} |
||||
|
||||
// Waits on the host thread for commands identified by event objects in
|
||||
// events to complete. A command is considered complete if its execution
|
||||
// status is CL_COMPLETE or a negative value. The events specified in
|
||||
// event_list act as synchronization points.
|
||||
//
|
||||
// If the cl_khr_gl_event extension is enabled, event objects can also be
|
||||
// used to reflect the status of an OpenGL sync object. The sync object
|
||||
// in turn refers to a fence command executing in an OpenGL command
|
||||
// stream. This provides another method of coordinating sharing of buffers
|
||||
// and images between OpenGL and OpenCL.
|
||||
func WaitForEvents(events []*Event) error { |
||||
return toError(C.clWaitForEvents(C.cl_uint(len(events)), eventListPtr(events))) |
||||
} |
||||
|
||||
func newEvent(clEvent C.cl_event) *Event { |
||||
ev := &Event{clEvent: clEvent} |
||||
runtime.SetFinalizer(ev, releaseEvent) |
||||
return ev |
||||
} |
||||
|
||||
func eventListPtr(el []*Event) *C.cl_event { |
||||
if el == nil { |
||||
return nil |
||||
} |
||||
elist := make([]C.cl_event, len(el)) |
||||
for i, e := range el { |
||||
elist[i] = e.clEvent |
||||
} |
||||
return (*C.cl_event)(&elist[0]) |
||||
} |
||||
|
||||
func clBool(b bool) C.cl_bool { |
||||
if b { |
||||
return C.CL_TRUE |
||||
} |
||||
return C.CL_FALSE |
||||
} |
||||
|
||||
func sizeT3(i3 [3]int) [3]C.size_t { |
||||
var val [3]C.size_t |
||||
val[0] = C.size_t(i3[0]) |
||||
val[1] = C.size_t(i3[1]) |
||||
val[2] = C.size_t(i3[2]) |
||||
return val |
||||
} |
||||
|
||||
type MappedMemObject struct { |
||||
ptr unsafe.Pointer |
||||
size int |
||||
rowPitch int |
||||
slicePitch int |
||||
} |
||||
|
||||
func (mb *MappedMemObject) ByteSlice() []byte { |
||||
var byteSlice []byte |
||||
sliceHeader := (*reflect.SliceHeader)(unsafe.Pointer(&byteSlice)) |
||||
sliceHeader.Cap = mb.size |
||||
sliceHeader.Len = mb.size |
||||
sliceHeader.Data = uintptr(mb.ptr) |
||||
return byteSlice |
||||
} |
||||
|
||||
func (mb *MappedMemObject) Ptr() unsafe.Pointer { |
||||
return mb.ptr |
||||
} |
||||
|
||||
func (mb *MappedMemObject) Size() int { |
||||
return mb.size |
||||
} |
||||
|
||||
func (mb *MappedMemObject) RowPitch() int { |
||||
return mb.rowPitch |
||||
} |
||||
|
||||
func (mb *MappedMemObject) SlicePitch() int { |
||||
return mb.slicePitch |
||||
} |
@ -0,0 +1,71 @@ |
||||
// +build cl12
|
||||
|
||||
package cl |
||||
|
||||
// #ifdef __APPLE__
|
||||
// #include "OpenCL/opencl.h"
|
||||
// #else
|
||||
// #include "cl.h"
|
||||
// #endif
|
||||
import "C" |
||||
|
||||
const ( |
||||
ChannelDataTypeUNormInt24 ChannelDataType = C.CL_UNORM_INT24 |
||||
ChannelOrderDepth ChannelOrder = C.CL_DEPTH |
||||
ChannelOrderDepthStencil ChannelOrder = C.CL_DEPTH_STENCIL |
||||
MemHostNoAccess MemFlag = C.CL_MEM_HOST_NO_ACCESS // OpenCL 1.2
|
||||
MemHostReadOnly MemFlag = C.CL_MEM_HOST_READ_ONLY // OpenCL 1.2
|
||||
MemHostWriteOnly MemFlag = C.CL_MEM_HOST_WRITE_ONLY // OpenCL 1.2
|
||||
MemObjectTypeImage1D MemObjectType = C.CL_MEM_OBJECT_IMAGE1D |
||||
MemObjectTypeImage1DArray MemObjectType = C.CL_MEM_OBJECT_IMAGE1D_ARRAY |
||||
MemObjectTypeImage1DBuffer MemObjectType = C.CL_MEM_OBJECT_IMAGE1D_BUFFER |
||||
MemObjectTypeImage2DArray MemObjectType = C.CL_MEM_OBJECT_IMAGE2D_ARRAY |
||||
// This flag specifies that the region being mapped in the memory object is being mapped for writing.
|
||||
//
|
||||
// The contents of the region being mapped are to be discarded. This is typically the case when the
|
||||
// region being mapped is overwritten by the host. This flag allows the implementation to no longer
|
||||
// guarantee that the pointer returned by clEnqueueMapBuffer or clEnqueueMapImage contains the
|
||||
// latest bits in the region being mapped which can be a significant performance enhancement.
|
||||
MapFlagWriteInvalidateRegion MapFlag = C.CL_MAP_WRITE_INVALIDATE_REGION |
||||
) |
||||
|
||||
func init() { |
||||
errorMap[C.CL_COMPILE_PROGRAM_FAILURE] = ErrCompileProgramFailure |
||||
errorMap[C.CL_DEVICE_PARTITION_FAILED] = ErrDevicePartitionFailed |
||||
errorMap[C.CL_INVALID_COMPILER_OPTIONS] = ErrInvalidCompilerOptions |
||||
errorMap[C.CL_INVALID_DEVICE_PARTITION_COUNT] = ErrInvalidDevicePartitionCount |
||||
errorMap[C.CL_INVALID_IMAGE_DESCRIPTOR] = ErrInvalidImageDescriptor |
||||
errorMap[C.CL_INVALID_LINKER_OPTIONS] = ErrInvalidLinkerOptions |
||||
errorMap[C.CL_KERNEL_ARG_INFO_NOT_AVAILABLE] = ErrKernelArgInfoNotAvailable |
||||
errorMap[C.CL_LINK_PROGRAM_FAILURE] = ErrLinkProgramFailure |
||||
errorMap[C.CL_LINKER_NOT_AVAILABLE] = ErrLinkerNotAvailable |
||||
channelOrderNameMap[ChannelOrderDepth] = "Depth" |
||||
channelOrderNameMap[ChannelOrderDepthStencil] = "DepthStencil" |
||||
channelDataTypeNameMap[ChannelDataTypeUNormInt24] = "UNormInt24" |
||||
} |
||||
|
||||
type ImageDescription struct { |
||||
Type MemObjectType |
||||
Width, Height, Depth int |
||||
ArraySize, RowPitch, SlicePitch int |
||||
NumMipLevels, NumSamples int |
||||
Buffer *MemObject |
||||
} |
||||
|
||||
func (d ImageDescription) toCl() C.cl_image_desc { |
||||
var desc C.cl_image_desc |
||||
desc.image_type = C.cl_mem_object_type(d.Type) |
||||
desc.image_width = C.size_t(d.Width) |
||||
desc.image_height = C.size_t(d.Height) |
||||
desc.image_depth = C.size_t(d.Depth) |
||||
desc.image_array_size = C.size_t(d.ArraySize) |
||||
desc.image_row_pitch = C.size_t(d.RowPitch) |
||||
desc.image_slice_pitch = C.size_t(d.SlicePitch) |
||||
desc.num_mip_levels = C.cl_uint(d.NumMipLevels) |
||||
desc.num_samples = C.cl_uint(d.NumSamples) |
||||
desc.buffer = nil |
||||
if d.Buffer != nil { |
||||
desc.buffer = d.Buffer.clMem |
||||
} |
||||
return desc |
||||
} |
45
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/types_darwin.go
generated
vendored
45
Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/types_darwin.go
generated
vendored
@ -0,0 +1,45 @@ |
||||
package cl |
||||
|
||||
// #ifdef __APPLE__
|
||||
// #include "OpenCL/opencl.h"
|
||||
// #else
|
||||
// #include "cl.h"
|
||||
// #endif
|
||||
import "C" |
||||
|
||||
// Extension: cl_APPLE_fixed_alpha_channel_orders
|
||||
//
|
||||
// These selectors may be passed to clCreateImage2D() in the cl_image_format.image_channel_order field.
|
||||
// They are like CL_BGRA and CL_ARGB except that the alpha channel to be ignored. On calls to read_imagef,
|
||||
// the alpha will be 0xff (1.0f) if the sample falls in the image and 0 if it does not fall in the image.
|
||||
// On calls to write_imagef, the alpha value is ignored and 0xff (1.0f) is written. These formats are
|
||||
// currently only available for the CL_UNORM_INT8 cl_channel_type. They are intended to support legacy
|
||||
// image formats.
|
||||
const ( |
||||
ChannelOrder1RGBApple ChannelOrder = C.CL_1RGB_APPLE // Introduced in MacOS X.7.
|
||||
ChannelOrderBGR1Apple ChannelOrder = C.CL_BGR1_APPLE // Introduced in MacOS X.7.
|
||||
) |
||||
|
||||
// Extension: cl_APPLE_biased_fixed_point_image_formats
|
||||
//
|
||||
// This selector may be passed to clCreateImage2D() in the cl_image_format.image_channel_data_type field.
|
||||
// It defines a biased signed 1.14 fixed point storage format, with range [-1, 3). The conversion from
|
||||
// float to this fixed point format is defined as follows:
|
||||
//
|
||||
// ushort float_to_sfixed14( float x ){
|
||||
// int i = convert_int_sat_rte( x * 0x1.0p14f ); // scale [-1, 3.0) to [-16384, 3*16384), round to nearest integer
|
||||
// i = add_sat( i, 0x4000 ); // apply bias, to convert to [0, 65535) range
|
||||
// return convert_ushort_sat(i); // clamp to destination size
|
||||
// }
|
||||
//
|
||||
// The inverse conversion is the reverse process. The formats are currently only available on the CPU with
|
||||
// the CL_RGBA channel layout.
|
||||
const ( |
||||
ChannelDataTypeSFixed14Apple ChannelDataType = C.CL_SFIXED14_APPLE // Introduced in MacOS X.7.
|
||||
) |
||||
|
||||
func init() { |
||||
channelOrderNameMap[ChannelOrder1RGBApple] = "1RGBApple" |
||||
channelOrderNameMap[ChannelOrderBGR1Apple] = "RGB1Apple" |
||||
channelDataTypeNameMap[ChannelDataTypeSFixed14Apple] = "SFixed14Apple" |
||||
} |
@ -0,0 +1,629 @@ |
||||
// Copyright 2014 The go-ethereum Authors
|
||||
// This file is part of the go-ethereum library.
|
||||
//
|
||||
// The go-ethereum library is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Lesser General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// The go-ethereum library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Lesser General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public License
|
||||
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
// +build opencl
|
||||
|
||||
package ethash |
||||
|
||||
//#cgo LDFLAGS: -w
|
||||
//#include <stdint.h>
|
||||
//#include <string.h>
|
||||
//#include "src/libethash/internal.h"
|
||||
import "C" |
||||
|
||||
import ( |
||||
crand "crypto/rand" |
||||
"encoding/binary" |
||||
"fmt" |
||||
"math" |
||||
"math/big" |
||||
mrand "math/rand" |
||||
"strconv" |
||||
"strings" |
||||
"sync" |
||||
"sync/atomic" |
||||
"time" |
||||
"unsafe" |
||||
|
||||
"github.com/Gustav-Simonsson/go-opencl/cl" |
||||
"github.com/ethereum/go-ethereum/common" |
||||
"github.com/ethereum/go-ethereum/pow" |
||||
) |
||||
|
||||
/* |
||||
|
||||
This code have two main entry points: |
||||
|
||||
1. The initCL(...) function configures one or more OpenCL device |
||||
(for now only GPU) and loads the Ethash DAG onto device memory |
||||
|
||||
2. The Search(...) function loads a Ethash nonce into device(s) memory and |
||||
executes the Ethash OpenCL kernel. |
||||
|
||||
Throughout the code, we refer to "host memory" and "device memory". |
||||
For most systems (e.g. regular PC GPU miner) the host memory is RAM and |
||||
device memory is the GPU global memory (e.g. GDDR5). |
||||
|
||||
References mentioned in code comments: |
||||
|
||||
1. https://github.com/ethereum/wiki/wiki/Ethash
|
||||
2. https://github.com/ethereum/cpp-ethereum/blob/develop/libethash-cl/ethash_cl_miner.cpp
|
||||
3. https://www.khronos.org/registry/cl/sdk/1.2/docs/man/xhtml/
|
||||
4. http://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2013/12/AMD_OpenCL_Programming_User_Guide.pdf
|
||||
|
||||
*/ |
||||
|
||||
type OpenCLDevice struct { |
||||
deviceId int |
||||
device *cl.Device |
||||
openCL11 bool // OpenCL version 1.1 and 1.2 are handled a bit different
|
||||
openCL12 bool |
||||
|
||||
dagBuf *cl.MemObject // Ethash full DAG in device mem
|
||||
headerBuf *cl.MemObject // Hash of block-to-mine in device mem
|
||||
searchBuffers []*cl.MemObject |
||||
|
||||
searchKernel *cl.Kernel |
||||
hashKernel *cl.Kernel |
||||
|
||||
queue *cl.CommandQueue |
||||
ctx *cl.Context |
||||
workGroupSize int |
||||
|
||||
nonceRand *mrand.Rand // seeded by crypto/rand, see comments where it's initialised
|
||||
result common.Hash |
||||
} |
||||
|
||||
type OpenCLMiner struct { |
||||
mu sync.Mutex |
||||
|
||||
ethash *Ethash // Ethash full DAG & cache in host mem
|
||||
|
||||
deviceIds []int |
||||
devices []*OpenCLDevice |
||||
|
||||
dagSize uint64 |
||||
|
||||
hashRate int32 // Go atomics & uint64 have some issues; int32 is supported on all platforms
|
||||
} |
||||
|
||||
type pendingSearch struct { |
||||
bufIndex uint32 |
||||
startNonce uint64 |
||||
} |
||||
|
||||
const ( |
||||
SIZEOF_UINT32 = 4 |
||||
|
||||
// See [1]
|
||||
ethashMixBytesLen = 128 |
||||
ethashAccesses = 64 |
||||
|
||||
// See [4]
|
||||
workGroupSize = 32 // must be multiple of 8
|
||||
maxSearchResults = 63 |
||||
searchBufSize = 2 |
||||
globalWorkSize = 1024 * 256 |
||||
) |
||||
|
||||
func NewCL(deviceIds []int) *OpenCLMiner { |
||||
ids := make([]int, len(deviceIds)) |
||||
copy(ids, deviceIds) |
||||
return &OpenCLMiner{ |
||||
ethash: New(), |
||||
dagSize: 0, // to see if we need to update DAG.
|
||||
deviceIds: ids, |
||||
} |
||||
} |
||||
|
||||
func PrintDevices() { |
||||
fmt.Println("=============================================") |
||||
fmt.Println("============ OpenCL Device Info =============") |
||||
fmt.Println("=============================================") |
||||
|
||||
var found []*cl.Device |
||||
|
||||
platforms, err := cl.GetPlatforms() |
||||
if err != nil { |
||||
fmt.Println("Plaform error (check your OpenCL installation): %v", err) |
||||
return |
||||
} |
||||
|
||||
for i, p := range platforms { |
||||
fmt.Println("Platform id ", i) |
||||
fmt.Println("Platform Name ", p.Name()) |
||||
fmt.Println("Platform Vendor ", p.Vendor()) |
||||
fmt.Println("Platform Version ", p.Version()) |
||||
fmt.Println("Platform Extensions ", p.Extensions()) |
||||
fmt.Println("Platform Profile ", p.Profile()) |
||||
fmt.Println("") |
||||
|
||||
devices, err := cl.GetDevices(p, cl.DeviceTypeGPU) |
||||
if err != nil { |
||||
fmt.Println("Device error (check your GPU drivers) :", err) |
||||
return |
||||
} |
||||
|
||||
for _, d := range devices { |
||||
fmt.Println("Device OpenCL id ", i) |
||||
fmt.Println("Device id for mining ", len(found)) |
||||
fmt.Println("Device Name ", d.Name()) |
||||
fmt.Println("Vendor ", d.Vendor()) |
||||
fmt.Println("Version ", d.Version()) |
||||
fmt.Println("Driver version ", d.DriverVersion()) |
||||
fmt.Println("Address bits ", d.AddressBits()) |
||||
fmt.Println("Max clock freq ", d.MaxClockFrequency()) |
||||
fmt.Println("Global mem size ", d.GlobalMemSize()) |
||||
fmt.Println("Max constant buffer size", d.MaxConstantBufferSize()) |
||||
fmt.Println("Max mem alloc size ", d.MaxMemAllocSize()) |
||||
fmt.Println("Max compute units ", d.MaxComputeUnits()) |
||||
fmt.Println("Max work group size ", d.MaxWorkGroupSize()) |
||||
fmt.Println("Max work item sizes ", d.MaxWorkItemSizes()) |
||||
fmt.Println("=============================================") |
||||
|
||||
found = append(found, d) |
||||
} |
||||
} |
||||
if len(found) == 0 { |
||||
fmt.Println("Found no GPU(s). Check that your OS can see the GPU(s)") |
||||
} else { |
||||
var idsFormat string |
||||
for i := 0; i < len(found); i++ { |
||||
idsFormat += strconv.Itoa(i) |
||||
if i != len(found)-1 { |
||||
idsFormat += "," |
||||
} |
||||
} |
||||
fmt.Printf("Found %v devices. Benchmark first GPU: geth gpubench 0\n", len(found)) |
||||
fmt.Printf("Mine using all GPUs: geth --minegpu %v\n", idsFormat) |
||||
} |
||||
} |
||||
|
||||
// See [2]. We basically do the same here, but the Go OpenCL bindings
|
||||
// are at a slightly higher abtraction level.
|
||||
func InitCL(blockNum uint64, c *OpenCLMiner) error { |
||||
platforms, err := cl.GetPlatforms() |
||||
if err != nil { |
||||
return fmt.Errorf("Plaform error: %v\nCheck your OpenCL installation and then run geth gpuinfo", err) |
||||
} |
||||
|
||||
var devices []*cl.Device |
||||
for _, p := range platforms { |
||||
ds, err := cl.GetDevices(p, cl.DeviceTypeGPU) |
||||
if err != nil { |
||||
return fmt.Errorf("Devices error: %v\nCheck your GPU drivers and then run geth gpuinfo", err) |
||||
} |
||||
for _, d := range ds { |
||||
devices = append(devices, d) |
||||
} |
||||
} |
||||
|
||||
pow := New() |
||||
_ = pow.getDAG(blockNum) // generates DAG if we don't have it
|
||||
pow.Light.getCache(blockNum) // and cache
|
||||
|
||||
c.ethash = pow |
||||
dagSize := uint64(C.ethash_get_datasize(C.uint64_t(blockNum))) |
||||
c.dagSize = dagSize |
||||
|
||||
for _, id := range c.deviceIds { |
||||
if id > len(devices)-1 { |
||||
return fmt.Errorf("Device id not found. See available device ids with: geth gpuinfo") |
||||
} else { |
||||
err := initCLDevice(id, devices[id], c) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
} |
||||
} |
||||
if len(c.devices) == 0 { |
||||
return fmt.Errorf("No GPU devices found") |
||||
} |
||||
return nil |
||||
} |
||||
|
||||
func initCLDevice(deviceId int, device *cl.Device, c *OpenCLMiner) error { |
||||
devMaxAlloc := uint64(device.MaxMemAllocSize()) |
||||
devGlobalMem := uint64(device.GlobalMemSize()) |
||||
|
||||
// TODO: more fine grained version logic
|
||||
if device.Version() == "OpenCL 1.0" { |
||||
fmt.Println("Device OpenCL version not supported: ", device.Version()) |
||||
return fmt.Errorf("opencl version not supported") |
||||
} |
||||
|
||||
var cl11, cl12 bool |
||||
if device.Version() == "OpenCL 1.1" { |
||||
cl11 = true |
||||
} |
||||
if device.Version() == "OpenCL 1.2" { |
||||
cl12 = true |
||||
} |
||||
|
||||
// log warnings but carry on; some device drivers report inaccurate values
|
||||
if c.dagSize > devGlobalMem { |
||||
fmt.Printf("WARNING: device memory may be insufficient: %v. DAG size: %v.\n", devGlobalMem, c.dagSize) |
||||
} |
||||
|
||||
if c.dagSize > devMaxAlloc { |
||||
fmt.Printf("WARNING: DAG size (%v) larger than device max memory allocation size (%v).\n", c.dagSize, devMaxAlloc) |
||||
fmt.Printf("You probably have to export GPU_MAX_ALLOC_PERCENT=95\n") |
||||
} |
||||
|
||||
fmt.Printf("Initialising device %v: %v\n", deviceId, device.Name()) |
||||
|
||||
context, err := cl.CreateContext([]*cl.Device{device}) |
||||
if err != nil { |
||||
return fmt.Errorf("failed creating context:", err) |
||||
} |
||||
|
||||
// TODO: test running with CL_QUEUE_PROFILING_ENABLE for profiling?
|
||||
queue, err := context.CreateCommandQueue(device, 0) |
||||
if err != nil { |
||||
return fmt.Errorf("command queue err:", err) |
||||
} |
||||
|
||||
// See [4] section 3.2 and [3] "clBuildProgram".
|
||||
// The OpenCL kernel code is compiled at run-time.
|
||||
kvs := make(map[string]string, 4) |
||||
kvs["GROUP_SIZE"] = strconv.FormatUint(workGroupSize, 10) |
||||
kvs["DAG_SIZE"] = strconv.FormatUint(c.dagSize/ethashMixBytesLen, 10) |
||||
kvs["ACCESSES"] = strconv.FormatUint(ethashAccesses, 10) |
||||
kvs["MAX_OUTPUTS"] = strconv.FormatUint(maxSearchResults, 10) |
||||
kernelCode := replaceWords(kernel, kvs) |
||||
|
||||
program, err := context.CreateProgramWithSource([]string{kernelCode}) |
||||
if err != nil { |
||||
return fmt.Errorf("program err:", err) |
||||
} |
||||
|
||||
/* if using AMD OpenCL impl, you can set this to debug on x86 CPU device. |
||||
see AMD OpenCL programming guide section 4.2 |
||||
|
||||
export in shell before running: |
||||
export AMD_OCL_BUILD_OPTIONS_APPEND="-g -O0" |
||||
export CPU_MAX_COMPUTE_UNITS=1 |
||||
|
||||
buildOpts := "-g -cl-opt-disable" |
||||
|
||||
*/ |
||||
buildOpts := "" |
||||
err = program.BuildProgram([]*cl.Device{device}, buildOpts) |
||||
if err != nil { |
||||
return fmt.Errorf("program build err:", err) |
||||
} |
||||
|
||||
var searchKernelName, hashKernelName string |
||||
searchKernelName = "ethash_search" |
||||
hashKernelName = "ethash_hash" |
||||
|
||||
searchKernel, err := program.CreateKernel(searchKernelName) |
||||
hashKernel, err := program.CreateKernel(hashKernelName) |
||||
if err != nil { |
||||
return fmt.Errorf("kernel err:", err) |
||||
} |
||||
|
||||
// TODO: when this DAG size appears, patch the Go bindings
|
||||
// (context.go) to work with uint64 as size_t
|
||||
if c.dagSize > math.MaxInt32 { |
||||
fmt.Println("DAG too large for allocation.") |
||||
return fmt.Errorf("DAG too large for alloc") |
||||
} |
||||
|
||||
// TODO: patch up Go bindings to work with size_t, will overflow if > maxint32
|
||||
// TODO: fuck. shit's gonna overflow around 2017-06-09 12:17:02
|
||||
dagBuf := *(new(*cl.MemObject)) |
||||
dagBuf, err = context.CreateEmptyBuffer(cl.MemReadOnly, int(c.dagSize)) |
||||
if err != nil { |
||||
return fmt.Errorf("allocating dag buf failed: ", err) |
||||
} |
||||
|
||||
// write DAG to device mem
|
||||
dagPtr := unsafe.Pointer(c.ethash.Full.current.ptr.data) |
||||
_, err = queue.EnqueueWriteBuffer(dagBuf, true, 0, int(c.dagSize), dagPtr, nil) |
||||
if err != nil { |
||||
return fmt.Errorf("writing to dag buf failed: ", err) |
||||
} |
||||
|
||||
searchBuffers := make([]*cl.MemObject, searchBufSize) |
||||
for i := 0; i < searchBufSize; i++ { |
||||
searchBuff, err := context.CreateEmptyBuffer(cl.MemWriteOnly, (1+maxSearchResults)*SIZEOF_UINT32) |
||||
if err != nil { |
||||
return fmt.Errorf("search buffer err:", err) |
||||
} |
||||
searchBuffers[i] = searchBuff |
||||
} |
||||
|
||||
headerBuf, err := context.CreateEmptyBuffer(cl.MemReadOnly, 32) |
||||
if err != nil { |
||||
return fmt.Errorf("header buffer err:", err) |
||||
} |
||||
|
||||
// Unique, random nonces are crucial for mining efficieny.
|
||||
// While we do not need cryptographically secure PRNG for nonces,
|
||||
// we want to have uniform distribution and minimal repetition of nonces.
|
||||
// We could guarantee strict uniqueness of nonces by generating unique ranges,
|
||||
// but a int64 seed from crypto/rand should be good enough.
|
||||
// we then use math/rand for speed and to avoid draining OS entropy pool
|
||||
seed, err := crand.Int(crand.Reader, big.NewInt(math.MaxInt64)) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
nonceRand := mrand.New(mrand.NewSource(seed.Int64())) |
||||
|
||||
deviceStruct := &OpenCLDevice{ |
||||
deviceId: deviceId, |
||||
device: device, |
||||
openCL11: cl11, |
||||
openCL12: cl12, |
||||
|
||||
dagBuf: dagBuf, |
||||
headerBuf: headerBuf, |
||||
searchBuffers: searchBuffers, |
||||
|
||||
searchKernel: searchKernel, |
||||
hashKernel: hashKernel, |
||||
|
||||
queue: queue, |
||||
ctx: context, |
||||
|
||||
workGroupSize: workGroupSize, |
||||
|
||||
nonceRand: nonceRand, |
||||
} |
||||
c.devices = append(c.devices, deviceStruct) |
||||
|
||||
return nil |
||||
} |
||||
|
||||
func (c *OpenCLMiner) Search(block pow.Block, stop <-chan struct{}, index int) (uint64, []byte) { |
||||
c.mu.Lock() |
||||
newDagSize := uint64(C.ethash_get_datasize(C.uint64_t(block.NumberU64()))) |
||||
if newDagSize > c.dagSize { |
||||
// TODO: clean up buffers from previous DAG?
|
||||
err := InitCL(block.NumberU64(), c) |
||||
if err != nil { |
||||
fmt.Println("OpenCL init error: ", err) |
||||
return 0, []byte{0} |
||||
} |
||||
} |
||||
defer c.mu.Unlock() |
||||
|
||||
// Avoid unneeded OpenCL initialisation if we received stop while running InitCL
|
||||
select { |
||||
case <-stop: |
||||
return 0, []byte{0} |
||||
default: |
||||
} |
||||
|
||||
headerHash := block.HashNoNonce() |
||||
diff := block.Difficulty() |
||||
target256 := new(big.Int).Div(maxUint256, diff) |
||||
target64 := new(big.Int).Rsh(target256, 192).Uint64() |
||||
var zero uint32 = 0 |
||||
|
||||
d := c.devices[index] |
||||
|
||||
_, err := d.queue.EnqueueWriteBuffer(d.headerBuf, false, 0, 32, unsafe.Pointer(&headerHash[0]), nil) |
||||
if err != nil { |
||||
fmt.Println("Error in Search clEnqueueWriterBuffer : ", err) |
||||
return 0, []byte{0} |
||||
} |
||||
|
||||
for i := 0; i < searchBufSize; i++ { |
||||
_, err := d.queue.EnqueueWriteBuffer(d.searchBuffers[i], false, 0, 4, unsafe.Pointer(&zero), nil) |
||||
if err != nil { |
||||
fmt.Println("Error in Search clEnqueueWriterBuffer : ", err) |
||||
return 0, []byte{0} |
||||
} |
||||
} |
||||
|
||||
// wait for all search buffers to complete
|
||||
err = d.queue.Finish() |
||||
if err != nil { |
||||
fmt.Println("Error in Search clFinish : ", err) |
||||
return 0, []byte{0} |
||||
} |
||||
|
||||
err = d.searchKernel.SetArg(1, d.headerBuf) |
||||
if err != nil { |
||||
fmt.Println("Error in Search clSetKernelArg : ", err) |
||||
return 0, []byte{0} |
||||
} |
||||
|
||||
err = d.searchKernel.SetArg(2, d.dagBuf) |
||||
if err != nil { |
||||
fmt.Println("Error in Search clSetKernelArg : ", err) |
||||
return 0, []byte{0} |
||||
} |
||||
|
||||
err = d.searchKernel.SetArg(4, target64) |
||||
if err != nil { |
||||
fmt.Println("Error in Search clSetKernelArg : ", err) |
||||
return 0, []byte{0} |
||||
} |
||||
err = d.searchKernel.SetArg(5, uint32(math.MaxUint32)) |
||||
if err != nil { |
||||
fmt.Println("Error in Search clSetKernelArg : ", err) |
||||
return 0, []byte{0} |
||||
} |
||||
|
||||
// wait on this before returning
|
||||
var preReturnEvent *cl.Event |
||||
if d.openCL12 { |
||||
preReturnEvent, err = d.ctx.CreateUserEvent() |
||||
if err != nil { |
||||
fmt.Println("Error in Search create CL user event : ", err) |
||||
return 0, []byte{0} |
||||
} |
||||
} |
||||
|
||||
pending := make([]pendingSearch, 0, searchBufSize) |
||||
var p *pendingSearch |
||||
searchBufIndex := uint32(0) |
||||
var checkNonce uint64 |
||||
loops := int64(0) |
||||
prevHashRate := int32(0) |
||||
start := time.Now().UnixNano() |
||||
// we grab a single random nonce and sets this as argument to the kernel search function
|
||||
// the device will then add each local threads gid to the nonce, creating a unique nonce
|
||||
// for each device computing unit executing in parallel
|
||||
initNonce := uint64(d.nonceRand.Int63()) |
||||
for nonce := initNonce; ; nonce += uint64(globalWorkSize) { |
||||
select { |
||||
case <-stop: |
||||
|
||||
/* |
||||
if d.openCL12 { |
||||
err = cl.WaitForEvents([]*cl.Event{preReturnEvent}) |
||||
if err != nil { |
||||
fmt.Println("Error in Search WaitForEvents: ", err) |
||||
} |
||||
} |
||||
*/ |
||||
|
||||
atomic.AddInt32(&c.hashRate, -prevHashRate) |
||||
return 0, []byte{0} |
||||
default: |
||||
} |
||||
|
||||
if (loops % (1 << 7)) == 0 { |
||||
elapsed := time.Now().UnixNano() - start |
||||
// TODO: verify if this is correct hash rate calculation
|
||||
hashes := (float64(1e9) / float64(elapsed)) * float64(loops*1024*256) |
||||
hashrateDiff := int32(hashes) - prevHashRate |
||||
prevHashRate = int32(hashes) |
||||
atomic.AddInt32(&c.hashRate, hashrateDiff) |
||||
} |
||||
loops++ |
||||
|
||||
err = d.searchKernel.SetArg(0, d.searchBuffers[searchBufIndex]) |
||||
if err != nil { |
||||
fmt.Println("Error in Search clSetKernelArg : ", err) |
||||
return 0, []byte{0} |
||||
} |
||||
err = d.searchKernel.SetArg(3, nonce) |
||||
if err != nil { |
||||
fmt.Println("Error in Search clSetKernelArg : ", err) |
||||
return 0, []byte{0} |
||||
} |
||||
|
||||
// execute kernel
|
||||
_, err := d.queue.EnqueueNDRangeKernel( |
||||
d.searchKernel, |
||||
[]int{0}, |
||||
[]int{globalWorkSize}, |
||||
[]int{d.workGroupSize}, |
||||
nil) |
||||
if err != nil { |
||||
fmt.Println("Error in Search clEnqueueNDRangeKernel : ", err) |
||||
return 0, []byte{0} |
||||
} |
||||
|
||||
pending = append(pending, pendingSearch{bufIndex: searchBufIndex, startNonce: nonce}) |
||||
searchBufIndex = (searchBufIndex + 1) % searchBufSize |
||||
|
||||
if len(pending) == searchBufSize { |
||||
p = &(pending[searchBufIndex]) |
||||
cres, _, err := d.queue.EnqueueMapBuffer(d.searchBuffers[p.bufIndex], true, |
||||
cl.MapFlagRead, 0, (1+maxSearchResults)*SIZEOF_UINT32, |
||||
nil) |
||||
if err != nil { |
||||
fmt.Println("Error in Search clEnqueueMapBuffer: ", err) |
||||
return 0, []byte{0} |
||||
} |
||||
|
||||
results := cres.ByteSlice() |
||||
nfound := binary.LittleEndian.Uint32(results) |
||||
nfound = uint32(math.Min(float64(nfound), float64(maxSearchResults))) |
||||
// OpenCL returns the offsets from the start nonce
|
||||
for i := uint32(0); i < nfound; i++ { |
||||
lo := (i + 1) * SIZEOF_UINT32 |
||||
hi := (i + 2) * SIZEOF_UINT32 |
||||
upperNonce := uint64(binary.LittleEndian.Uint32(results[lo:hi])) |
||||
checkNonce = p.startNonce + upperNonce |
||||
if checkNonce != 0 { |
||||
cn := C.uint64_t(checkNonce) |
||||
ds := C.uint64_t(c.dagSize) |
||||
// We verify that the nonce is indeed a solution by
|
||||
// executing the Ethash verification function (on the CPU).
|
||||
ret := C.ethash_light_compute_internal(c.ethash.Light.current.ptr, ds, hashToH256(headerHash), cn) |
||||
// TODO: return result first
|
||||
if ret.success && h256ToHash(ret.result).Big().Cmp(target256) <= 0 { |
||||
_, err = d.queue.EnqueueUnmapMemObject(d.searchBuffers[p.bufIndex], cres, nil) |
||||
if err != nil { |
||||
fmt.Println("Error in Search clEnqueueUnmapMemObject: ", err) |
||||
} |
||||
if d.openCL12 { |
||||
err = cl.WaitForEvents([]*cl.Event{preReturnEvent}) |
||||
if err != nil { |
||||
fmt.Println("Error in Search WaitForEvents: ", err) |
||||
} |
||||
} |
||||
return checkNonce, C.GoBytes(unsafe.Pointer(&ret.mix_hash), C.int(32)) |
||||
} |
||||
|
||||
_, err := d.queue.EnqueueWriteBuffer(d.searchBuffers[p.bufIndex], false, 0, 4, unsafe.Pointer(&zero), nil) |
||||
if err != nil { |
||||
fmt.Println("Error in Search cl: EnqueueWriteBuffer", err) |
||||
return 0, []byte{0} |
||||
} |
||||
} |
||||
} |
||||
_, err = d.queue.EnqueueUnmapMemObject(d.searchBuffers[p.bufIndex], cres, nil) |
||||
if err != nil { |
||||
fmt.Println("Error in Search clEnqueueUnMapMemObject: ", err) |
||||
return 0, []byte{0} |
||||
} |
||||
pending = append(pending[:searchBufIndex], pending[searchBufIndex+1:]...) |
||||
} |
||||
} |
||||
if d.openCL12 { |
||||
err := cl.WaitForEvents([]*cl.Event{preReturnEvent}) |
||||
if err != nil { |
||||
fmt.Println("Error in Search clWaitForEvents: ", err) |
||||
return 0, []byte{0} |
||||
} |
||||
} |
||||
return 0, []byte{0} |
||||
} |
||||
|
||||
func (c *OpenCLMiner) Verify(block pow.Block) bool { |
||||
return c.ethash.Light.Verify(block) |
||||
} |
||||
func (c *OpenCLMiner) GetHashrate() int64 { |
||||
return int64(atomic.LoadInt32(&c.hashRate)) |
||||
} |
||||
func (c *OpenCLMiner) Turbo(on bool) { |
||||
// This is GPU mining. Always be turbo.
|
||||
} |
||||
|
||||
func replaceWords(text string, kvs map[string]string) string { |
||||
for k, v := range kvs { |
||||
text = strings.Replace(text, k, v, -1) |
||||
} |
||||
return text |
||||
} |
||||
|
||||
func logErr(err error) { |
||||
if err != nil { |
||||
fmt.Println("Error in OpenCL call:", err) |
||||
} |
||||
} |
||||
|
||||
func argErr(err error) error { |
||||
return fmt.Errorf("arg err: %v", err) |
||||
} |
600
Godeps/_workspace/src/github.com/ethereum/ethash/ethash_opencl_kernel_go_str.go
generated
vendored
600
Godeps/_workspace/src/github.com/ethereum/ethash/ethash_opencl_kernel_go_str.go
generated
vendored
@ -0,0 +1,600 @@ |
||||
package ethash |
||||
|
||||
/* DO NOT EDIT!!! |
||||
|
||||
This code is version controlled at |
||||
https://github.com/ethereum/cpp-ethereum/blob/develop/libethash-cl/ethash_cl_miner_kernel.cl
|
||||
|
||||
If needed change it there first, then copy over here. |
||||
*/ |
||||
|
||||
const kernel = ` |
||||
// author Tim Hughes <tim@twistedfury.com>
|
||||
// Tested on Radeon HD 7850
|
||||
// Hashrate: 15940347 hashes/s
|
||||
// Bandwidth: 124533 MB/s
|
||||
// search kernel should fit in <= 84 VGPRS (3 wavefronts)
|
||||
|
||||
#define THREADS_PER_HASH (128 / 16) |
||||
#define HASHES_PER_LOOP (GROUP_SIZE / THREADS_PER_HASH) |
||||
|
||||
#define FNV_PRIME 0x01000193 |
||||
|
||||
__constant uint2 const Keccak_f1600_RC[24] = { |
||||
(uint2)(0x00000001, 0x00000000), |
||||
(uint2)(0x00008082, 0x00000000), |
||||
(uint2)(0x0000808a, 0x80000000), |
||||
(uint2)(0x80008000, 0x80000000), |
||||
(uint2)(0x0000808b, 0x00000000), |
||||
(uint2)(0x80000001, 0x00000000), |
||||
(uint2)(0x80008081, 0x80000000), |
||||
(uint2)(0x00008009, 0x80000000), |
||||
(uint2)(0x0000008a, 0x00000000), |
||||
(uint2)(0x00000088, 0x00000000), |
||||
(uint2)(0x80008009, 0x00000000), |
||||
(uint2)(0x8000000a, 0x00000000), |
||||
(uint2)(0x8000808b, 0x00000000), |
||||
(uint2)(0x0000008b, 0x80000000), |
||||
(uint2)(0x00008089, 0x80000000), |
||||
(uint2)(0x00008003, 0x80000000), |
||||
(uint2)(0x00008002, 0x80000000), |
||||
(uint2)(0x00000080, 0x80000000), |
||||
(uint2)(0x0000800a, 0x00000000), |
||||
(uint2)(0x8000000a, 0x80000000), |
||||
(uint2)(0x80008081, 0x80000000), |
||||
(uint2)(0x00008080, 0x80000000), |
||||
(uint2)(0x80000001, 0x00000000), |
||||
(uint2)(0x80008008, 0x80000000), |
||||
}; |
||||
|
||||
void keccak_f1600_round(uint2* a, uint r, uint out_size) |
||||
{ |
||||
#if !__ENDIAN_LITTLE__ |
||||
for (uint i = 0; i != 25; ++i) |
||||
a[i] = a[i].yx; |
||||
#endif |
||||
|
||||
uint2 b[25]; |
||||
uint2 t; |
||||
|
||||
// Theta
|
||||
b[0] = a[0] ^ a[5] ^ a[10] ^ a[15] ^ a[20]; |
||||
b[1] = a[1] ^ a[6] ^ a[11] ^ a[16] ^ a[21]; |
||||
b[2] = a[2] ^ a[7] ^ a[12] ^ a[17] ^ a[22]; |
||||
b[3] = a[3] ^ a[8] ^ a[13] ^ a[18] ^ a[23]; |
||||
b[4] = a[4] ^ a[9] ^ a[14] ^ a[19] ^ a[24]; |
||||
t = b[4] ^ (uint2)(b[1].x << 1 | b[1].y >> 31, b[1].y << 1 | b[1].x >> 31); |
||||
a[0] ^= t; |
||||
a[5] ^= t; |
||||
a[10] ^= t; |
||||
a[15] ^= t; |
||||
a[20] ^= t; |
||||
t = b[0] ^ (uint2)(b[2].x << 1 | b[2].y >> 31, b[2].y << 1 | b[2].x >> 31); |
||||
a[1] ^= t; |
||||
a[6] ^= t; |
||||
a[11] ^= t; |
||||
a[16] ^= t; |
||||
a[21] ^= t; |
||||
t = b[1] ^ (uint2)(b[3].x << 1 | b[3].y >> 31, b[3].y << 1 | b[3].x >> 31); |
||||
a[2] ^= t; |
||||
a[7] ^= t; |
||||
a[12] ^= t; |
||||
a[17] ^= t; |
||||
a[22] ^= t; |
||||
t = b[2] ^ (uint2)(b[4].x << 1 | b[4].y >> 31, b[4].y << 1 | b[4].x >> 31); |
||||
a[3] ^= t; |
||||
a[8] ^= t; |
||||
a[13] ^= t; |
||||
a[18] ^= t; |
||||
a[23] ^= t; |
||||
t = b[3] ^ (uint2)(b[0].x << 1 | b[0].y >> 31, b[0].y << 1 | b[0].x >> 31); |
||||
a[4] ^= t; |
||||
a[9] ^= t; |
||||
a[14] ^= t; |
||||
a[19] ^= t; |
||||
a[24] ^= t; |
||||
|
||||
// Rho Pi
|
||||
b[0] = a[0]; |
||||
b[10] = (uint2)(a[1].x << 1 | a[1].y >> 31, a[1].y << 1 | a[1].x >> 31); |
||||
b[7] = (uint2)(a[10].x << 3 | a[10].y >> 29, a[10].y << 3 | a[10].x >> 29); |
||||
b[11] = (uint2)(a[7].x << 6 | a[7].y >> 26, a[7].y << 6 | a[7].x >> 26); |
||||
b[17] = (uint2)(a[11].x << 10 | a[11].y >> 22, a[11].y << 10 | a[11].x >> 22); |
||||
b[18] = (uint2)(a[17].x << 15 | a[17].y >> 17, a[17].y << 15 | a[17].x >> 17); |
||||
b[3] = (uint2)(a[18].x << 21 | a[18].y >> 11, a[18].y << 21 | a[18].x >> 11); |
||||
b[5] = (uint2)(a[3].x << 28 | a[3].y >> 4, a[3].y << 28 | a[3].x >> 4); |
||||
b[16] = (uint2)(a[5].y << 4 | a[5].x >> 28, a[5].x << 4 | a[5].y >> 28); |
||||
b[8] = (uint2)(a[16].y << 13 | a[16].x >> 19, a[16].x << 13 | a[16].y >> 19); |
||||
b[21] = (uint2)(a[8].y << 23 | a[8].x >> 9, a[8].x << 23 | a[8].y >> 9); |
||||
b[24] = (uint2)(a[21].x << 2 | a[21].y >> 30, a[21].y << 2 | a[21].x >> 30); |
||||
b[4] = (uint2)(a[24].x << 14 | a[24].y >> 18, a[24].y << 14 | a[24].x >> 18); |
||||
b[15] = (uint2)(a[4].x << 27 | a[4].y >> 5, a[4].y << 27 | a[4].x >> 5); |
||||
b[23] = (uint2)(a[15].y << 9 | a[15].x >> 23, a[15].x << 9 | a[15].y >> 23); |
||||
b[19] = (uint2)(a[23].y << 24 | a[23].x >> 8, a[23].x << 24 | a[23].y >> 8); |
||||
b[13] = (uint2)(a[19].x << 8 | a[19].y >> 24, a[19].y << 8 | a[19].x >> 24); |
||||
b[12] = (uint2)(a[13].x << 25 | a[13].y >> 7, a[13].y << 25 | a[13].x >> 7); |
||||
b[2] = (uint2)(a[12].y << 11 | a[12].x >> 21, a[12].x << 11 | a[12].y >> 21); |
||||
b[20] = (uint2)(a[2].y << 30 | a[2].x >> 2, a[2].x << 30 | a[2].y >> 2); |
||||
b[14] = (uint2)(a[20].x << 18 | a[20].y >> 14, a[20].y << 18 | a[20].x >> 14); |
||||
b[22] = (uint2)(a[14].y << 7 | a[14].x >> 25, a[14].x << 7 | a[14].y >> 25); |
||||
b[9] = (uint2)(a[22].y << 29 | a[22].x >> 3, a[22].x << 29 | a[22].y >> 3); |
||||
b[6] = (uint2)(a[9].x << 20 | a[9].y >> 12, a[9].y << 20 | a[9].x >> 12); |
||||
b[1] = (uint2)(a[6].y << 12 | a[6].x >> 20, a[6].x << 12 | a[6].y >> 20); |
||||
|
||||
// Chi
|
||||
a[0] = bitselect(b[0] ^ b[2], b[0], b[1]); |
||||
a[1] = bitselect(b[1] ^ b[3], b[1], b[2]); |
||||
a[2] = bitselect(b[2] ^ b[4], b[2], b[3]); |
||||
a[3] = bitselect(b[3] ^ b[0], b[3], b[4]); |
||||
if (out_size >= 4) |
||||
{ |
||||
a[4] = bitselect(b[4] ^ b[1], b[4], b[0]); |
||||
a[5] = bitselect(b[5] ^ b[7], b[5], b[6]); |
||||
a[6] = bitselect(b[6] ^ b[8], b[6], b[7]); |
||||
a[7] = bitselect(b[7] ^ b[9], b[7], b[8]); |
||||
a[8] = bitselect(b[8] ^ b[5], b[8], b[9]); |
||||
if (out_size >= 8) |
||||
{ |
||||
a[9] = bitselect(b[9] ^ b[6], b[9], b[5]); |
||||
a[10] = bitselect(b[10] ^ b[12], b[10], b[11]); |
||||
a[11] = bitselect(b[11] ^ b[13], b[11], b[12]); |
||||
a[12] = bitselect(b[12] ^ b[14], b[12], b[13]); |
||||
a[13] = bitselect(b[13] ^ b[10], b[13], b[14]); |
||||
a[14] = bitselect(b[14] ^ b[11], b[14], b[10]); |
||||
a[15] = bitselect(b[15] ^ b[17], b[15], b[16]); |
||||
a[16] = bitselect(b[16] ^ b[18], b[16], b[17]); |
||||
a[17] = bitselect(b[17] ^ b[19], b[17], b[18]); |
||||
a[18] = bitselect(b[18] ^ b[15], b[18], b[19]); |
||||
a[19] = bitselect(b[19] ^ b[16], b[19], b[15]); |
||||
a[20] = bitselect(b[20] ^ b[22], b[20], b[21]); |
||||
a[21] = bitselect(b[21] ^ b[23], b[21], b[22]); |
||||
a[22] = bitselect(b[22] ^ b[24], b[22], b[23]); |
||||
a[23] = bitselect(b[23] ^ b[20], b[23], b[24]); |
||||
a[24] = bitselect(b[24] ^ b[21], b[24], b[20]); |
||||
} |
||||
} |
||||
|
||||
// Iota
|
||||
a[0] ^= Keccak_f1600_RC[r]; |
||||
|
||||
#if !__ENDIAN_LITTLE__ |
||||
for (uint i = 0; i != 25; ++i) |
||||
a[i] = a[i].yx; |
||||
#endif |
||||
} |
||||
|
||||
void keccak_f1600_no_absorb(ulong* a, uint in_size, uint out_size, uint isolate) |
||||
{ |
||||
for (uint i = in_size; i != 25; ++i) |
||||
{ |
||||
a[i] = 0; |
||||
} |
||||
#if __ENDIAN_LITTLE__ |
||||
a[in_size] ^= 0x0000000000000001; |
||||
a[24-out_size*2] ^= 0x8000000000000000; |
||||
#else |
||||
a[in_size] ^= 0x0100000000000000; |
||||
a[24-out_size*2] ^= 0x0000000000000080; |
||||
#endif |
||||
|
||||
// Originally I unrolled the first and last rounds to interface
|
||||
// better with surrounding code, however I haven't done this
|
||||
// without causing the AMD compiler to blow up the VGPR usage.
|
||||
uint r = 0; |
||||
do |
||||
{ |
||||
// This dynamic branch stops the AMD compiler unrolling the loop
|
||||
// and additionally saves about 33% of the VGPRs, enough to gain another
|
||||
// wavefront. Ideally we'd get 4 in flight, but 3 is the best I can
|
||||
// massage out of the compiler. It doesn't really seem to matter how
|
||||
// much we try and help the compiler save VGPRs because it seems to throw
|
||||
// that information away, hence the implementation of keccak here
|
||||
// doesn't bother.
|
||||
if (isolate) |
||||
{ |
||||
keccak_f1600_round((uint2*)a, r++, 25); |
||||
} |
||||
} |
||||
while (r < 23); |
||||
|
||||
// final round optimised for digest size
|
||||
keccak_f1600_round((uint2*)a, r++, out_size); |
||||
} |
||||
|
||||
#define copy(dst, src, count) for (uint i = 0; i != count; ++i) { (dst)[i] = (src)[i]; } |
||||
|
||||
#define countof(x) (sizeof(x) / sizeof(x[0])) |
||||
|
||||
uint fnv(uint x, uint y) |
||||
{ |
||||
return x * FNV_PRIME ^ y; |
||||
} |
||||
|
||||
uint4 fnv4(uint4 x, uint4 y) |
||||
{ |
||||
return x * FNV_PRIME ^ y; |
||||
} |
||||
|
||||
uint fnv_reduce(uint4 v) |
||||
{ |
||||
return fnv(fnv(fnv(v.x, v.y), v.z), v.w); |
||||
} |
||||
|
||||
typedef union |
||||
{ |
||||
ulong ulongs[32 / sizeof(ulong)]; |
||||
uint uints[32 / sizeof(uint)]; |
||||
} hash32_t; |
||||
|
||||
typedef union |
||||
{ |
||||
ulong ulongs[64 / sizeof(ulong)]; |
||||
uint4 uint4s[64 / sizeof(uint4)]; |
||||
} hash64_t; |
||||
|
||||
typedef union |
||||
{ |
||||
uint uints[128 / sizeof(uint)]; |
||||
uint4 uint4s[128 / sizeof(uint4)]; |
||||
} hash128_t; |
||||
|
||||
hash64_t init_hash(__constant hash32_t const* header, ulong nonce, uint isolate) |
||||
{ |
||||
hash64_t init; |
||||
uint const init_size = countof(init.ulongs); |
||||
uint const hash_size = countof(header->ulongs); |
||||
|
||||
// sha3_512(header .. nonce)
|
||||
ulong state[25]; |
||||
copy(state, header->ulongs, hash_size); |
||||
state[hash_size] = nonce; |
||||
keccak_f1600_no_absorb(state, hash_size + 1, init_size, isolate); |
||||
|
||||
copy(init.ulongs, state, init_size); |
||||
return init; |
||||
} |
||||
|
||||
uint inner_loop_chunks(uint4 init, uint thread_id, __local uint* share, __global hash128_t const* g_dag, __global hash128_t const* g_dag1, __global hash128_t const* g_dag2, __global hash128_t const* g_dag3, uint isolate) |
||||
{ |
||||
uint4 mix = init; |
||||
|
||||
// share init0
|
||||
if (thread_id == 0) |
||||
*share = mix.x; |
||||
barrier(CLK_LOCAL_MEM_FENCE); |
||||
uint init0 = *share; |
||||
|
||||
uint a = 0; |
||||
do |
||||
{ |
||||
bool update_share = thread_id == (a/4) % THREADS_PER_HASH; |
||||
|
||||
#pragma unroll |
||||
for (uint i = 0; i != 4; ++i) |
||||
{ |
||||
if (update_share) |
||||
{ |
||||
uint m[4] = { mix.x, mix.y, mix.z, mix.w }; |
||||
*share = fnv(init0 ^ (a+i), m[i]) % DAG_SIZE; |
||||
} |
||||
barrier(CLK_LOCAL_MEM_FENCE); |
||||
|
||||
mix = fnv4(mix, *share>=3 * DAG_SIZE / 4 ? g_dag3[*share - 3 * DAG_SIZE / 4].uint4s[thread_id] : *share>=DAG_SIZE / 2 ? g_dag2[*share - DAG_SIZE / 2].uint4s[thread_id] : *share>=DAG_SIZE / 4 ? g_dag1[*share - DAG_SIZE / 4].uint4s[thread_id]:g_dag[*share].uint4s[thread_id]); |
||||
} |
||||
} while ((a += 4) != (ACCESSES & isolate)); |
||||
|
||||
return fnv_reduce(mix); |
||||
} |
||||
|
||||
|
||||
|
||||
uint inner_loop(uint4 init, uint thread_id, __local uint* share, __global hash128_t const* g_dag, uint isolate) |
||||
{ |
||||
uint4 mix = init; |
||||
|
||||
// share init0
|
||||
if (thread_id == 0) |
||||
*share = mix.x; |
||||
barrier(CLK_LOCAL_MEM_FENCE); |
||||
uint init0 = *share; |
||||
|
||||
uint a = 0; |
||||
do |
||||
{ |
||||
bool update_share = thread_id == (a/4) % THREADS_PER_HASH; |
||||
|
||||
#pragma unroll |
||||
for (uint i = 0; i != 4; ++i) |
||||
{ |
||||
if (update_share) |
||||
{ |
||||
uint m[4] = { mix.x, mix.y, mix.z, mix.w }; |
||||
*share = fnv(init0 ^ (a+i), m[i]) % DAG_SIZE; |
||||
} |
||||
barrier(CLK_LOCAL_MEM_FENCE); |
||||
|
||||
mix = fnv4(mix, g_dag[*share].uint4s[thread_id]); |
||||
} |
||||
} |
||||
while ((a += 4) != (ACCESSES & isolate)); |
||||
|
||||
return fnv_reduce(mix); |
||||
} |
||||
|
||||
|
||||
hash32_t final_hash(hash64_t const* init, hash32_t const* mix, uint isolate) |
||||
{ |
||||
ulong state[25]; |
||||
|
||||
hash32_t hash; |
||||
uint const hash_size = countof(hash.ulongs); |
||||
uint const init_size = countof(init->ulongs); |
||||
uint const mix_size = countof(mix->ulongs); |
||||
|
||||
// keccak_256(keccak_512(header..nonce) .. mix);
|
||||
copy(state, init->ulongs, init_size); |
||||
copy(state + init_size, mix->ulongs, mix_size); |
||||
keccak_f1600_no_absorb(state, init_size+mix_size, hash_size, isolate); |
||||
|
||||
// copy out
|
||||
copy(hash.ulongs, state, hash_size); |
||||
return hash; |
||||
} |
||||
|
||||
hash32_t compute_hash_simple( |
||||
__constant hash32_t const* g_header, |
||||
__global hash128_t const* g_dag, |
||||
ulong nonce, |
||||
uint isolate |
||||
) |
||||
{ |
||||
hash64_t init = init_hash(g_header, nonce, isolate); |
||||
|
||||
hash128_t mix; |
||||
for (uint i = 0; i != countof(mix.uint4s); ++i) |
||||
{ |
||||
mix.uint4s[i] = init.uint4s[i % countof(init.uint4s)]; |
||||
} |
||||
|
||||
uint mix_val = mix.uints[0]; |
||||
uint init0 = mix.uints[0]; |
||||
uint a = 0; |
||||
do |
||||
{ |
||||
uint pi = fnv(init0 ^ a, mix_val) % DAG_SIZE; |
||||
uint n = (a+1) % countof(mix.uints); |
||||
|
||||
#pragma unroll |
||||
for (uint i = 0; i != countof(mix.uints); ++i) |
||||
{ |
||||
mix.uints[i] = fnv(mix.uints[i], g_dag[pi].uints[i]); |
||||
mix_val = i == n ? mix.uints[i] : mix_val; |
||||
} |
||||
} |
||||
while (++a != (ACCESSES & isolate)); |
||||
|
||||
// reduce to output
|
||||
hash32_t fnv_mix; |
||||
for (uint i = 0; i != countof(fnv_mix.uints); ++i) |
||||
{ |
||||
fnv_mix.uints[i] = fnv_reduce(mix.uint4s[i]); |
||||
} |
||||
|
||||
return final_hash(&init, &fnv_mix, isolate); |
||||
} |
||||
|
||||
typedef union |
||||
{ |
||||
struct |
||||
{ |
||||
hash64_t init; |
||||
uint pad; // avoid lds bank conflicts
|
||||
}; |
||||
hash32_t mix; |
||||
} compute_hash_share; |
||||
|
||||
|
||||
hash32_t compute_hash( |
||||
__local compute_hash_share* share, |
||||
__constant hash32_t const* g_header, |
||||
__global hash128_t const* g_dag, |
||||
ulong nonce, |
||||
uint isolate |
||||
) |
||||
{ |
||||
uint const gid = get_global_id(0); |
||||
|
||||
// Compute one init hash per work item.
|
||||
hash64_t init = init_hash(g_header, nonce, isolate); |
||||
|
||||
// Threads work together in this phase in groups of 8.
|
||||
uint const thread_id = gid % THREADS_PER_HASH; |
||||
uint const hash_id = (gid % GROUP_SIZE) / THREADS_PER_HASH; |
||||
|
||||
hash32_t mix; |
||||
uint i = 0; |
||||
do |
||||
{ |
||||
// share init with other threads
|
||||
if (i == thread_id) |
||||
share[hash_id].init = init; |
||||
barrier(CLK_LOCAL_MEM_FENCE); |
||||
|
||||
uint4 thread_init = share[hash_id].init.uint4s[thread_id % (64 / sizeof(uint4))]; |
||||
barrier(CLK_LOCAL_MEM_FENCE); |
||||
|
||||
uint thread_mix = inner_loop(thread_init, thread_id, share[hash_id].mix.uints, g_dag, isolate); |
||||
|
||||
share[hash_id].mix.uints[thread_id] = thread_mix; |
||||
barrier(CLK_LOCAL_MEM_FENCE); |
||||
|
||||
if (i == thread_id) |
||||
mix = share[hash_id].mix; |
||||
barrier(CLK_LOCAL_MEM_FENCE); |
||||
} |
||||
while (++i != (THREADS_PER_HASH & isolate)); |
||||
|
||||
return final_hash(&init, &mix, isolate); |
||||
} |
||||
|
||||
|
||||
hash32_t compute_hash_chunks( |
||||
__local compute_hash_share* share, |
||||
__constant hash32_t const* g_header, |
||||
__global hash128_t const* g_dag, |
||||
__global hash128_t const* g_dag1, |
||||
__global hash128_t const* g_dag2, |
||||
__global hash128_t const* g_dag3, |
||||
ulong nonce, |
||||
uint isolate |
||||
) |
||||
{ |
||||
uint const gid = get_global_id(0); |
||||
|
||||
// Compute one init hash per work item.
|
||||
hash64_t init = init_hash(g_header, nonce, isolate); |
||||
|
||||
// Threads work together in this phase in groups of 8.
|
||||
uint const thread_id = gid % THREADS_PER_HASH; |
||||
uint const hash_id = (gid % GROUP_SIZE) / THREADS_PER_HASH; |
||||
|
||||
hash32_t mix; |
||||
uint i = 0; |
||||
do |
||||
{ |
||||
// share init with other threads
|
||||
if (i == thread_id) |
||||
share[hash_id].init = init; |
||||
barrier(CLK_LOCAL_MEM_FENCE); |
||||
|
||||
uint4 thread_init = share[hash_id].init.uint4s[thread_id % (64 / sizeof(uint4))]; |
||||
barrier(CLK_LOCAL_MEM_FENCE); |
||||
|
||||
uint thread_mix = inner_loop_chunks(thread_init, thread_id, share[hash_id].mix.uints, g_dag, g_dag1, g_dag2, g_dag3, isolate); |
||||
|
||||
share[hash_id].mix.uints[thread_id] = thread_mix; |
||||
barrier(CLK_LOCAL_MEM_FENCE); |
||||
|
||||
if (i == thread_id) |
||||
mix = share[hash_id].mix; |
||||
barrier(CLK_LOCAL_MEM_FENCE); |
||||
} |
||||
while (++i != (THREADS_PER_HASH & isolate)); |
||||
|
||||
return final_hash(&init, &mix, isolate); |
||||
} |
||||
|
||||
__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1))) |
||||
__kernel void ethash_hash_simple( |
||||
__global hash32_t* g_hashes, |
||||
__constant hash32_t const* g_header, |
||||
__global hash128_t const* g_dag, |
||||
ulong start_nonce, |
||||
uint isolate |
||||
) |
||||
{ |
||||
uint const gid = get_global_id(0); |
||||
g_hashes[gid] = compute_hash_simple(g_header, g_dag, start_nonce + gid, isolate); |
||||
} |
||||
|
||||
__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1))) |
||||
__kernel void ethash_search_simple( |
||||
__global volatile uint* restrict g_output, |
||||
__constant hash32_t const* g_header, |
||||
__global hash128_t const* g_dag, |
||||
ulong start_nonce, |
||||
ulong target, |
||||
uint isolate |
||||
) |
||||
{ |
||||
uint const gid = get_global_id(0); |
||||
hash32_t hash = compute_hash_simple(g_header, g_dag, start_nonce + gid, isolate); |
||||
|
||||
if (hash.ulongs[countof(hash.ulongs)-1] < target) |
||||
{ |
||||
uint slot = min(convert_uint(MAX_OUTPUTS), convert_uint(atomic_inc(&g_output[0]) + 1)); |
||||
g_output[slot] = gid; |
||||
} |
||||
} |
||||
|
||||
|
||||
__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1))) |
||||
__kernel void ethash_hash( |
||||
__global hash32_t* g_hashes, |
||||
__constant hash32_t const* g_header, |
||||
__global hash128_t const* g_dag, |
||||
ulong start_nonce, |
||||
uint isolate |
||||
) |
||||
{ |
||||
__local compute_hash_share share[HASHES_PER_LOOP]; |
||||
|
||||
uint const gid = get_global_id(0); |
||||
g_hashes[gid] = compute_hash(share, g_header, g_dag, start_nonce + gid, isolate); |
||||
} |
||||
|
||||
__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1))) |
||||
__kernel void ethash_search( |
||||
__global volatile uint* restrict g_output, |
||||
__constant hash32_t const* g_header, |
||||
__global hash128_t const* g_dag, |
||||
ulong start_nonce, |
||||
ulong target, |
||||
uint isolate |
||||
) |
||||
{ |
||||
__local compute_hash_share share[HASHES_PER_LOOP]; |
||||
|
||||
uint const gid = get_global_id(0); |
||||
hash32_t hash = compute_hash(share, g_header, g_dag, start_nonce + gid, isolate); |
||||
|
||||
if (as_ulong(as_uchar8(hash.ulongs[0]).s76543210) < target) |
||||
{ |
||||
uint slot = min((uint)MAX_OUTPUTS, atomic_inc(&g_output[0]) + 1); |
||||
g_output[slot] = gid; |
||||
} |
||||
} |
||||
|
||||
__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1))) |
||||
__kernel void ethash_hash_chunks( |
||||
__global hash32_t* g_hashes, |
||||
__constant hash32_t const* g_header, |
||||
__global hash128_t const* g_dag, |
||||
__global hash128_t const* g_dag1, |
||||
__global hash128_t const* g_dag2, |
||||
__global hash128_t const* g_dag3, |
||||
ulong start_nonce, |
||||
uint isolate |
||||
) |
||||
{ |
||||
__local compute_hash_share share[HASHES_PER_LOOP]; |
||||
|
||||
uint const gid = get_global_id(0); |
||||
g_hashes[gid] = compute_hash_chunks(share, g_header, g_dag, g_dag1, g_dag2, g_dag3,start_nonce + gid, isolate); |
||||
} |
||||
|
||||
__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1))) |
||||
__kernel void ethash_search_chunks( |
||||
__global volatile uint* restrict g_output, |
||||
__constant hash32_t const* g_header, |
||||
__global hash128_t const* g_dag, |
||||
__global hash128_t const* g_dag1, |
||||
__global hash128_t const* g_dag2, |
||||
__global hash128_t const* g_dag3, |
||||
ulong start_nonce, |
||||
ulong target, |
||||
uint isolate |
||||
) |
||||
{ |
||||
__local compute_hash_share share[HASHES_PER_LOOP]; |
||||
|
||||
uint const gid = get_global_id(0); |
||||
hash32_t hash = compute_hash_chunks(share, g_header, g_dag, g_dag1, g_dag2, g_dag3, start_nonce + gid, isolate); |
||||
|
||||
if (as_ulong(as_uchar8(hash.ulongs[0]).s76543210) < target) |
||||
{ |
||||
uint slot = min(convert_uint(MAX_OUTPUTS), convert_uint(atomic_inc(&g_output[0]) + 1)); |
||||
g_output[slot] = gid; |
||||
} |
||||
} |
||||
` |
@ -0,0 +1,54 @@ |
||||
// Copyright 2014 The go-ethereum Authors
|
||||
// This file is part of the go-ethereum library.
|
||||
//
|
||||
// The go-ethereum library is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Lesser General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// The go-ethereum library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Lesser General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public License
|
||||
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
// +build !opencl
|
||||
|
||||
package eth |
||||
|
||||
import ( |
||||
"errors" |
||||
"fmt" |
||||
|
||||
"github.com/ethereum/go-ethereum/logger" |
||||
"github.com/ethereum/go-ethereum/logger/glog" |
||||
) |
||||
|
||||
const disabledInfo = "Set GO_OPENCL and re-build to enable." |
||||
|
||||
func (s *Ethereum) StartMining(threads int, gpus string) error { |
||||
eb, err := s.Etherbase() |
||||
if err != nil { |
||||
err = fmt.Errorf("Cannot start mining without etherbase address: %v", err) |
||||
glog.V(logger.Error).Infoln(err) |
||||
return err |
||||
} |
||||
|
||||
if gpus != "" { |
||||
return errors.New("GPU mining disabled. " + disabledInfo) |
||||
} |
||||
|
||||
// CPU mining
|
||||
go s.miner.Start(eb, threads) |
||||
return nil |
||||
} |
||||
|
||||
func GPUBench(gpuid uint64) { |
||||
fmt.Println("GPU mining disabled. " + disabledInfo) |
||||
} |
||||
|
||||
func PrintOpenCLDevices() { |
||||
fmt.Println("OpenCL disabled. " + disabledInfo) |
||||
} |
@ -0,0 +1,103 @@ |
||||
// Copyright 2014 The go-ethereum Authors
|
||||
// This file is part of the go-ethereum library.
|
||||
//
|
||||
// The go-ethereum library is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Lesser General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// The go-ethereum library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Lesser General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public License
|
||||
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
// +build opencl
|
||||
|
||||
package eth |
||||
|
||||
import ( |
||||
"fmt" |
||||
"math/big" |
||||
"strconv" |
||||
"strings" |
||||
"time" |
||||
|
||||
"github.com/ethereum/ethash" |
||||
"github.com/ethereum/go-ethereum/common" |
||||
"github.com/ethereum/go-ethereum/core/types" |
||||
"github.com/ethereum/go-ethereum/logger" |
||||
"github.com/ethereum/go-ethereum/logger/glog" |
||||
"github.com/ethereum/go-ethereum/miner" |
||||
) |
||||
|
||||
func (s *Ethereum) StartMining(threads int, gpus string) error { |
||||
eb, err := s.Etherbase() |
||||
if err != nil { |
||||
err = fmt.Errorf("Cannot start mining without etherbase address: %v", err) |
||||
glog.V(logger.Error).Infoln(err) |
||||
return err |
||||
} |
||||
|
||||
// GPU mining
|
||||
if gpus != "" { |
||||
var ids []int |
||||
for _, s := range strings.Split(gpus, ",") { |
||||
i, err := strconv.Atoi(s) |
||||
if err != nil { |
||||
return fmt.Errorf("Invalid GPU id(s): %v", err) |
||||
} |
||||
if i < 0 { |
||||
return fmt.Errorf("Invalid GPU id: %v", i) |
||||
} |
||||
ids = append(ids, i) |
||||
} |
||||
|
||||
// TODO: re-creating miner is a bit ugly
|
||||
cl := ethash.NewCL(ids) |
||||
s.miner = miner.New(s, s.EventMux(), cl) |
||||
go s.miner.Start(eb, len(ids)) |
||||
return nil |
||||
} |
||||
|
||||
// CPU mining
|
||||
go s.miner.Start(eb, threads) |
||||
return nil |
||||
} |
||||
|
||||
func GPUBench(gpuid uint64) { |
||||
e := ethash.NewCL([]int{int(gpuid)}) |
||||
|
||||
var h common.Hash |
||||
bogoHeader := &types.Header{ |
||||
ParentHash: h, |
||||
Number: big.NewInt(int64(42)), |
||||
Difficulty: big.NewInt(int64(999999999999999)), |
||||
} |
||||
bogoBlock := types.NewBlock(bogoHeader, nil, nil, nil) |
||||
|
||||
err := ethash.InitCL(bogoBlock.NumberU64(), e) |
||||
if err != nil { |
||||
fmt.Println("OpenCL init error: ", err) |
||||
return |
||||
} |
||||
|
||||
stopChan := make(chan struct{}) |
||||
reportHashRate := func() { |
||||
for { |
||||
time.Sleep(3 * time.Second) |
||||
fmt.Printf("hashes/s : %v\n", e.GetHashrate()) |
||||
} |
||||
} |
||||
fmt.Printf("Starting benchmark (%v seconds)\n", 60) |
||||
go reportHashRate() |
||||
go e.Search(bogoBlock, stopChan, 0) |
||||
time.Sleep(60 * time.Second) |
||||
fmt.Println("OK.") |
||||
} |
||||
|
||||
func PrintOpenCLDevices() { |
||||
ethash.PrintDevices() |
||||
} |
Loading…
Reference in new issue