all: Add GPU mining, disabled by default

pull/1869/head
Gustav Simonsson 10 years ago
parent 8b865fa9bf
commit ec6a548ee3
  1. 4
      Godeps/Godeps.json
  2. 26
      Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/cl.go
  3. 254
      Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/cl_test.go
  4. 161
      Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/context.go
  5. 510
      Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/device.go
  6. 51
      Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/device12.go
  7. 1210
      Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/cl.h
  8. 315
      Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/cl_ext.h
  9. 158
      Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/cl_gl.h
  10. 65
      Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/cl_gl_ext.h
  11. 1278
      Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/cl_platform.h
  12. 43
      Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/headers/1.2/opencl.h
  13. 83
      Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/image.go
  14. 127
      Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/kernel.go
  15. 7
      Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/kernel10.go
  16. 20
      Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/kernel12.go
  17. 83
      Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/platform.go
  18. 105
      Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/program.go
  19. 193
      Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/queue.go
  20. 487
      Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/types.go
  21. 71
      Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/types12.go
  22. 45
      Godeps/_workspace/src/github.com/Gustav-Simonsson/go-opencl/cl/types_darwin.go
  23. 24
      Godeps/_workspace/src/github.com/ethereum/ethash/ethash.go
  24. 629
      Godeps/_workspace/src/github.com/ethereum/ethash/ethash_opencl.go
  25. 600
      Godeps/_workspace/src/github.com/ethereum/ethash/ethash_opencl_kernel_go_str.go
  26. 6
      Godeps/_workspace/src/github.com/ethereum/ethash/ethash_test.go
  27. 6
      Makefile
  28. 4
      build/flags.sh
  29. 3
      cmd/geth/js_test.go
  30. 45
      cmd/geth/main.go
  31. 6
      cmd/utils/flags.go
  32. 2
      common/natspec/natspec_e2e_test.go
  33. 2
      core/chain_makers.go
  34. 4
      core/chain_pow_test.go
  35. 12
      eth/backend.go
  36. 54
      eth/cpu_mining.go
  37. 103
      eth/gpu_mining.go
  38. 2
      miner/agent.go
  39. 2
      pow/ezp/pow.go
  40. 2
      pow/pow.go
  41. 2
      rpc/api/miner.go
  42. 2
      xeth/xeth.go

4
Godeps/Godeps.json generated vendored

@ -16,8 +16,8 @@
}, },
{ {
"ImportPath": "github.com/ethereum/ethash", "ImportPath": "github.com/ethereum/ethash",
"Comment": "v23.1-234-g062e40a", "Comment": "v23.1-235-gb39e007",
"Rev": "062e40a1a1671f5a5102862b56e4c56f68a732f5" "Rev": "b39e007d393ab5945b4c0748a7415b7e31c5db04"
}, },
{ {
"ImportPath": "github.com/fatih/color", "ImportPath": "github.com/fatih/color",

@ -0,0 +1,26 @@
/*
Package cl provides a binding to the OpenCL api. It's mostly a low-level
wrapper that avoids adding functionality while still making the interface
a little more friendly and easy to use.
Resource life-cycle management:
For any CL object that gets created (buffer, queue, kernel, etc..) you should
call object.Release() when finished with it to free the CL resources. This
explicitely calls the related clXXXRelease method for the type. However,
as a fallback there is a finalizer set for every resource item that takes
care of it (eventually) if Release isn't called. In this way you can have
better control over the life cycle of resources while having a fall back
to avoid leaks. This is similar to how file handles and such are handled
in the Go standard packages.
*/
package cl
// #include "headers/1.2/opencl.h"
// #cgo CFLAGS: -Iheaders/1.2
// #cgo darwin LDFLAGS: -framework OpenCL
// #cgo linux LDFLAGS: -lOpenCL
import "C"
import "errors"
var ErrUnsupported = errors.New("cl: unsupported")

@ -0,0 +1,254 @@
package cl
import (
"math/rand"
"reflect"
"strings"
"testing"
)
var kernelSource = `
__kernel void square(
__global float* input,
__global float* output,
const unsigned int count)
{
int i = get_global_id(0);
if(i < count)
output[i] = input[i] * input[i];
}
`
func getObjectStrings(object interface{}) map[string]string {
v := reflect.ValueOf(object)
t := reflect.TypeOf(object)
strs := make(map[string]string)
numMethods := t.NumMethod()
for i := 0; i < numMethods; i++ {
method := t.Method(i)
if method.Type.NumIn() == 1 && method.Type.NumOut() == 1 && method.Type.Out(0).Kind() == reflect.String {
// this is a string-returning method with (presumably) only a pointer receiver parameter
// call it
outs := v.Method(i).Call([]reflect.Value{})
// put the result in our map
strs[method.Name] = (outs[0].Interface()).(string)
}
}
return strs
}
func TestPlatformStringsContainNoNULs(t *testing.T) {
platforms, err := GetPlatforms()
if err != nil {
t.Fatalf("Failed to get platforms: %+v", err)
}
for _, p := range platforms {
for key, value := range getObjectStrings(p) {
if strings.Contains(value, "\x00") {
t.Fatalf("platform string %q = %+q contains NUL", key, value)
}
}
}
}
func TestDeviceStringsContainNoNULs(t *testing.T) {
platforms, err := GetPlatforms()
if err != nil {
t.Fatalf("Failed to get platforms: %+v", err)
}
for _, p := range platforms {
devs, err := p.GetDevices(DeviceTypeAll)
if err != nil {
t.Fatalf("Failed to get devices for platform %q: %+v", p.Name(), err)
}
for _, d := range devs {
for key, value := range getObjectStrings(d) {
if strings.Contains(value, "\x00") {
t.Fatalf("device string %q = %+q contains NUL", key, value)
}
}
}
}
}
func TestHello(t *testing.T) {
var data [1024]float32
for i := 0; i < len(data); i++ {
data[i] = rand.Float32()
}
platforms, err := GetPlatforms()
if err != nil {
t.Fatalf("Failed to get platforms: %+v", err)
}
for i, p := range platforms {
t.Logf("Platform %d:", i)
t.Logf(" Name: %s", p.Name())
t.Logf(" Vendor: %s", p.Vendor())
t.Logf(" Profile: %s", p.Profile())
t.Logf(" Version: %s", p.Version())
t.Logf(" Extensions: %s", p.Extensions())
}
platform := platforms[0]
devices, err := platform.GetDevices(DeviceTypeAll)
if err != nil {
t.Fatalf("Failed to get devices: %+v", err)
}
if len(devices) == 0 {
t.Fatalf("GetDevices returned no devices")
}
deviceIndex := -1
for i, d := range devices {
if deviceIndex < 0 && d.Type() == DeviceTypeGPU {
deviceIndex = i
}
t.Logf("Device %d (%s): %s", i, d.Type(), d.Name())
t.Logf(" Address Bits: %d", d.AddressBits())
t.Logf(" Available: %+v", d.Available())
// t.Logf(" Built-In Kernels: %s", d.BuiltInKernels())
t.Logf(" Compiler Available: %+v", d.CompilerAvailable())
t.Logf(" Double FP Config: %s", d.DoubleFPConfig())
t.Logf(" Driver Version: %s", d.DriverVersion())
t.Logf(" Error Correction Supported: %+v", d.ErrorCorrectionSupport())
t.Logf(" Execution Capabilities: %s", d.ExecutionCapabilities())
t.Logf(" Extensions: %s", d.Extensions())
t.Logf(" Global Memory Cache Type: %s", d.GlobalMemCacheType())
t.Logf(" Global Memory Cacheline Size: %d KB", d.GlobalMemCachelineSize()/1024)
t.Logf(" Global Memory Size: %d MB", d.GlobalMemSize()/(1024*1024))
t.Logf(" Half FP Config: %s", d.HalfFPConfig())
t.Logf(" Host Unified Memory: %+v", d.HostUnifiedMemory())
t.Logf(" Image Support: %+v", d.ImageSupport())
t.Logf(" Image2D Max Dimensions: %d x %d", d.Image2DMaxWidth(), d.Image2DMaxHeight())
t.Logf(" Image3D Max Dimenionns: %d x %d x %d", d.Image3DMaxWidth(), d.Image3DMaxHeight(), d.Image3DMaxDepth())
// t.Logf(" Image Max Buffer Size: %d", d.ImageMaxBufferSize())
// t.Logf(" Image Max Array Size: %d", d.ImageMaxArraySize())
// t.Logf(" Linker Available: %+v", d.LinkerAvailable())
t.Logf(" Little Endian: %+v", d.EndianLittle())
t.Logf(" Local Mem Size Size: %d KB", d.LocalMemSize()/1024)
t.Logf(" Local Mem Type: %s", d.LocalMemType())
t.Logf(" Max Clock Frequency: %d", d.MaxClockFrequency())
t.Logf(" Max Compute Units: %d", d.MaxComputeUnits())
t.Logf(" Max Constant Args: %d", d.MaxConstantArgs())
t.Logf(" Max Constant Buffer Size: %d KB", d.MaxConstantBufferSize()/1024)
t.Logf(" Max Mem Alloc Size: %d KB", d.MaxMemAllocSize()/1024)
t.Logf(" Max Parameter Size: %d", d.MaxParameterSize())
t.Logf(" Max Read-Image Args: %d", d.MaxReadImageArgs())
t.Logf(" Max Samplers: %d", d.MaxSamplers())
t.Logf(" Max Work Group Size: %d", d.MaxWorkGroupSize())
t.Logf(" Max Work Item Dimensions: %d", d.MaxWorkItemDimensions())
t.Logf(" Max Work Item Sizes: %d", d.MaxWorkItemSizes())
t.Logf(" Max Write-Image Args: %d", d.MaxWriteImageArgs())
t.Logf(" Memory Base Address Alignment: %d", d.MemBaseAddrAlign())
t.Logf(" Native Vector Width Char: %d", d.NativeVectorWidthChar())
t.Logf(" Native Vector Width Short: %d", d.NativeVectorWidthShort())
t.Logf(" Native Vector Width Int: %d", d.NativeVectorWidthInt())
t.Logf(" Native Vector Width Long: %d", d.NativeVectorWidthLong())
t.Logf(" Native Vector Width Float: %d", d.NativeVectorWidthFloat())
t.Logf(" Native Vector Width Double: %d", d.NativeVectorWidthDouble())
t.Logf(" Native Vector Width Half: %d", d.NativeVectorWidthHalf())
t.Logf(" OpenCL C Version: %s", d.OpenCLCVersion())
// t.Logf(" Parent Device: %+v", d.ParentDevice())
t.Logf(" Profile: %s", d.Profile())
t.Logf(" Profiling Timer Resolution: %d", d.ProfilingTimerResolution())
t.Logf(" Vendor: %s", d.Vendor())
t.Logf(" Version: %s", d.Version())
}
if deviceIndex < 0 {
deviceIndex = 0
}
device := devices[deviceIndex]
t.Logf("Using device %d", deviceIndex)
context, err := CreateContext([]*Device{device})
if err != nil {
t.Fatalf("CreateContext failed: %+v", err)
}
// imageFormats, err := context.GetSupportedImageFormats(0, MemObjectTypeImage2D)
// if err != nil {
// t.Fatalf("GetSupportedImageFormats failed: %+v", err)
// }
// t.Logf("Supported image formats: %+v", imageFormats)
queue, err := context.CreateCommandQueue(device, 0)
if err != nil {
t.Fatalf("CreateCommandQueue failed: %+v", err)
}
program, err := context.CreateProgramWithSource([]string{kernelSource})
if err != nil {
t.Fatalf("CreateProgramWithSource failed: %+v", err)
}
if err := program.BuildProgram(nil, ""); err != nil {
t.Fatalf("BuildProgram failed: %+v", err)
}
kernel, err := program.CreateKernel("square")
if err != nil {
t.Fatalf("CreateKernel failed: %+v", err)
}
for i := 0; i < 3; i++ {
name, err := kernel.ArgName(i)
if err == ErrUnsupported {
break
} else if err != nil {
t.Errorf("GetKernelArgInfo for name failed: %+v", err)
break
} else {
t.Logf("Kernel arg %d: %s", i, name)
}
}
input, err := context.CreateEmptyBuffer(MemReadOnly, 4*len(data))
if err != nil {
t.Fatalf("CreateBuffer failed for input: %+v", err)
}
output, err := context.CreateEmptyBuffer(MemReadOnly, 4*len(data))
if err != nil {
t.Fatalf("CreateBuffer failed for output: %+v", err)
}
if _, err := queue.EnqueueWriteBufferFloat32(input, true, 0, data[:], nil); err != nil {
t.Fatalf("EnqueueWriteBufferFloat32 failed: %+v", err)
}
if err := kernel.SetArgs(input, output, uint32(len(data))); err != nil {
t.Fatalf("SetKernelArgs failed: %+v", err)
}
local, err := kernel.WorkGroupSize(device)
if err != nil {
t.Fatalf("WorkGroupSize failed: %+v", err)
}
t.Logf("Work group size: %d", local)
size, _ := kernel.PreferredWorkGroupSizeMultiple(nil)
t.Logf("Preferred Work Group Size Multiple: %d", size)
global := len(data)
d := len(data) % local
if d != 0 {
global += local - d
}
if _, err := queue.EnqueueNDRangeKernel(kernel, nil, []int{global}, []int{local}, nil); err != nil {
t.Fatalf("EnqueueNDRangeKernel failed: %+v", err)
}
if err := queue.Finish(); err != nil {
t.Fatalf("Finish failed: %+v", err)
}
results := make([]float32, len(data))
if _, err := queue.EnqueueReadBufferFloat32(output, true, 0, results, nil); err != nil {
t.Fatalf("EnqueueReadBufferFloat32 failed: %+v", err)
}
correct := 0
for i, v := range data {
if results[i] == v*v {
correct++
}
}
if correct != len(data) {
t.Fatalf("%d/%d correct values", correct, len(data))
}
}

@ -0,0 +1,161 @@
package cl
// #include <stdlib.h>
// #ifdef __APPLE__
// #include "OpenCL/opencl.h"
// #else
// #include "cl.h"
// #endif
import "C"
import (
"runtime"
"unsafe"
)
const maxImageFormats = 256
type Context struct {
clContext C.cl_context
devices []*Device
}
type MemObject struct {
clMem C.cl_mem
size int
}
func releaseContext(c *Context) {
if c.clContext != nil {
C.clReleaseContext(c.clContext)
c.clContext = nil
}
}
func releaseMemObject(b *MemObject) {
if b.clMem != nil {
C.clReleaseMemObject(b.clMem)
b.clMem = nil
}
}
func newMemObject(mo C.cl_mem, size int) *MemObject {
memObject := &MemObject{clMem: mo, size: size}
runtime.SetFinalizer(memObject, releaseMemObject)
return memObject
}
func (b *MemObject) Release() {
releaseMemObject(b)
}
// TODO: properties
func CreateContext(devices []*Device) (*Context, error) {
deviceIds := buildDeviceIdList(devices)
var err C.cl_int
clContext := C.clCreateContext(nil, C.cl_uint(len(devices)), &deviceIds[0], nil, nil, &err)
if err != C.CL_SUCCESS {
return nil, toError(err)
}
if clContext == nil {
return nil, ErrUnknown
}
context := &Context{clContext: clContext, devices: devices}
runtime.SetFinalizer(context, releaseContext)
return context, nil
}
func (ctx *Context) GetSupportedImageFormats(flags MemFlag, imageType MemObjectType) ([]ImageFormat, error) {
var formats [maxImageFormats]C.cl_image_format
var nFormats C.cl_uint
if err := C.clGetSupportedImageFormats(ctx.clContext, C.cl_mem_flags(flags), C.cl_mem_object_type(imageType), maxImageFormats, &formats[0], &nFormats); err != C.CL_SUCCESS {
return nil, toError(err)
}
fmts := make([]ImageFormat, nFormats)
for i, f := range formats[:nFormats] {
fmts[i] = ImageFormat{
ChannelOrder: ChannelOrder(f.image_channel_order),
ChannelDataType: ChannelDataType(f.image_channel_data_type),
}
}
return fmts, nil
}
func (ctx *Context) CreateCommandQueue(device *Device, properties CommandQueueProperty) (*CommandQueue, error) {
var err C.cl_int
clQueue := C.clCreateCommandQueue(ctx.clContext, device.id, C.cl_command_queue_properties(properties), &err)
if err != C.CL_SUCCESS {
return nil, toError(err)
}
if clQueue == nil {
return nil, ErrUnknown
}
commandQueue := &CommandQueue{clQueue: clQueue, device: device}
runtime.SetFinalizer(commandQueue, releaseCommandQueue)
return commandQueue, nil
}
func (ctx *Context) CreateProgramWithSource(sources []string) (*Program, error) {
cSources := make([]*C.char, len(sources))
for i, s := range sources {
cs := C.CString(s)
cSources[i] = cs
defer C.free(unsafe.Pointer(cs))
}
var err C.cl_int
clProgram := C.clCreateProgramWithSource(ctx.clContext, C.cl_uint(len(sources)), &cSources[0], nil, &err)
if err != C.CL_SUCCESS {
return nil, toError(err)
}
if clProgram == nil {
return nil, ErrUnknown
}
program := &Program{clProgram: clProgram, devices: ctx.devices}
runtime.SetFinalizer(program, releaseProgram)
return program, nil
}
func (ctx *Context) CreateBufferUnsafe(flags MemFlag, size int, dataPtr unsafe.Pointer) (*MemObject, error) {
var err C.cl_int
clBuffer := C.clCreateBuffer(ctx.clContext, C.cl_mem_flags(flags), C.size_t(size), dataPtr, &err)
if err != C.CL_SUCCESS {
return nil, toError(err)
}
if clBuffer == nil {
return nil, ErrUnknown
}
return newMemObject(clBuffer, size), nil
}
func (ctx *Context) CreateEmptyBuffer(flags MemFlag, size int) (*MemObject, error) {
return ctx.CreateBufferUnsafe(flags, size, nil)
}
func (ctx *Context) CreateEmptyBufferFloat32(flags MemFlag, size int) (*MemObject, error) {
return ctx.CreateBufferUnsafe(flags, 4*size, nil)
}
func (ctx *Context) CreateBuffer(flags MemFlag, data []byte) (*MemObject, error) {
return ctx.CreateBufferUnsafe(flags, len(data), unsafe.Pointer(&data[0]))
}
//float64
func (ctx *Context) CreateBufferFloat32(flags MemFlag, data []float32) (*MemObject, error) {
return ctx.CreateBufferUnsafe(flags, 4*len(data), unsafe.Pointer(&data[0]))
}
func (ctx *Context) CreateUserEvent() (*Event, error) {
var err C.cl_int
clEvent := C.clCreateUserEvent(ctx.clContext, &err)
if err != C.CL_SUCCESS {
return nil, toError(err)
}
return newEvent(clEvent), nil
}
func (ctx *Context) Release() {
releaseContext(ctx)
}
// http://www.khronos.org/registry/cl/sdk/1.2/docs/man/xhtml/clCreateSubBuffer.html
// func (memObject *MemObject) CreateSubBuffer(flags MemFlag, bufferCreateType BufferCreateType, )

@ -0,0 +1,510 @@
package cl
// #ifdef __APPLE__
// #include "OpenCL/opencl.h"
// #else
// #include "cl.h"
// #include "cl_ext.h"
// #endif
import "C"
import (
"strings"
"unsafe"
)
const maxDeviceCount = 64
type DeviceType uint
const (
DeviceTypeCPU DeviceType = C.CL_DEVICE_TYPE_CPU
DeviceTypeGPU DeviceType = C.CL_DEVICE_TYPE_GPU
DeviceTypeAccelerator DeviceType = C.CL_DEVICE_TYPE_ACCELERATOR
DeviceTypeDefault DeviceType = C.CL_DEVICE_TYPE_DEFAULT
DeviceTypeAll DeviceType = C.CL_DEVICE_TYPE_ALL
)
type FPConfig int
const (
FPConfigDenorm FPConfig = C.CL_FP_DENORM // denorms are supported
FPConfigInfNaN FPConfig = C.CL_FP_INF_NAN // INF and NaNs are supported
FPConfigRoundToNearest FPConfig = C.CL_FP_ROUND_TO_NEAREST // round to nearest even rounding mode supported
FPConfigRoundToZero FPConfig = C.CL_FP_ROUND_TO_ZERO // round to zero rounding mode supported
FPConfigRoundToInf FPConfig = C.CL_FP_ROUND_TO_INF // round to positive and negative infinity rounding modes supported
FPConfigFMA FPConfig = C.CL_FP_FMA // IEEE754-2008 fused multiply-add is supported
FPConfigSoftFloat FPConfig = C.CL_FP_SOFT_FLOAT // Basic floating-point operations (such as addition, subtraction, multiplication) are implemented in software
)
var fpConfigNameMap = map[FPConfig]string{
FPConfigDenorm: "Denorm",
FPConfigInfNaN: "InfNaN",
FPConfigRoundToNearest: "RoundToNearest",
FPConfigRoundToZero: "RoundToZero",
FPConfigRoundToInf: "RoundToInf",
FPConfigFMA: "FMA",
FPConfigSoftFloat: "SoftFloat",
}
func (c FPConfig) String() string {
var parts []string
for bit, name := range fpConfigNameMap {
if c&bit != 0 {
parts = append(parts, name)
}
}
if parts == nil {
return ""
}
return strings.Join(parts, "|")
}
func (dt DeviceType) String() string {
var parts []string
if dt&DeviceTypeCPU != 0 {
parts = append(parts, "CPU")
}
if dt&DeviceTypeGPU != 0 {
parts = append(parts, "GPU")
}
if dt&DeviceTypeAccelerator != 0 {
parts = append(parts, "Accelerator")
}
if dt&DeviceTypeDefault != 0 {
parts = append(parts, "Default")
}
if parts == nil {
parts = append(parts, "None")
}
return strings.Join(parts, "|")
}
type Device struct {
id C.cl_device_id
}
func buildDeviceIdList(devices []*Device) []C.cl_device_id {
deviceIds := make([]C.cl_device_id, len(devices))
for i, d := range devices {
deviceIds[i] = d.id
}
return deviceIds
}
// Obtain the list of devices available on a platform. 'platform' refers
// to the platform returned by GetPlatforms or can be nil. If platform
// is nil, the behavior is implementation-defined.
func GetDevices(platform *Platform, deviceType DeviceType) ([]*Device, error) {
var deviceIds [maxDeviceCount]C.cl_device_id
var numDevices C.cl_uint
var platformId C.cl_platform_id
if platform != nil {
platformId = platform.id
}
if err := C.clGetDeviceIDs(platformId, C.cl_device_type(deviceType), C.cl_uint(maxDeviceCount), &deviceIds[0], &numDevices); err != C.CL_SUCCESS {
return nil, toError(err)
}
if numDevices > maxDeviceCount {
numDevices = maxDeviceCount
}
devices := make([]*Device, numDevices)
for i := 0; i < int(numDevices); i++ {
devices[i] = &Device{id: deviceIds[i]}
}
return devices, nil
}
func (d *Device) nullableId() C.cl_device_id {
if d == nil {
return nil
}
return d.id
}
func (d *Device) GetInfoString(param C.cl_device_info, panicOnError bool) (string, error) {
var strC [1024]C.char
var strN C.size_t
if err := C.clGetDeviceInfo(d.id, param, 1024, unsafe.Pointer(&strC), &strN); err != C.CL_SUCCESS {
if panicOnError {
panic("Should never fail")
}
return "", toError(err)
}
// OpenCL strings are NUL-terminated, and the terminator is included in strN
// Go strings aren't NUL-terminated, so subtract 1 from the length
return C.GoStringN((*C.char)(unsafe.Pointer(&strC)), C.int(strN-1)), nil
}
func (d *Device) getInfoUint(param C.cl_device_info, panicOnError bool) (uint, error) {
var val C.cl_uint
if err := C.clGetDeviceInfo(d.id, param, C.size_t(unsafe.Sizeof(val)), unsafe.Pointer(&val), nil); err != C.CL_SUCCESS {
if panicOnError {
panic("Should never fail")
}
return 0, toError(err)
}
return uint(val), nil
}
func (d *Device) getInfoSize(param C.cl_device_info, panicOnError bool) (int, error) {
var val C.size_t
if err := C.clGetDeviceInfo(d.id, param, C.size_t(unsafe.Sizeof(val)), unsafe.Pointer(&val), nil); err != C.CL_SUCCESS {
if panicOnError {
panic("Should never fail")
}
return 0, toError(err)
}
return int(val), nil
}
func (d *Device) getInfoUlong(param C.cl_device_info, panicOnError bool) (int64, error) {
var val C.cl_ulong
if err := C.clGetDeviceInfo(d.id, param, C.size_t(unsafe.Sizeof(val)), unsafe.Pointer(&val), nil); err != C.CL_SUCCESS {
if panicOnError {
panic("Should never fail")
}
return 0, toError(err)
}
return int64(val), nil
}
func (d *Device) getInfoBool(param C.cl_device_info, panicOnError bool) (bool, error) {
var val C.cl_bool
if err := C.clGetDeviceInfo(d.id, param, C.size_t(unsafe.Sizeof(val)), unsafe.Pointer(&val), nil); err != C.CL_SUCCESS {
if panicOnError {
panic("Should never fail")
}
return false, toError(err)
}
return val == C.CL_TRUE, nil
}
func (d *Device) Name() string {
str, _ := d.GetInfoString(C.CL_DEVICE_NAME, true)
return str
}
func (d *Device) Vendor() string {
str, _ := d.GetInfoString(C.CL_DEVICE_VENDOR, true)
return str
}
func (d *Device) Extensions() string {
str, _ := d.GetInfoString(C.CL_DEVICE_EXTENSIONS, true)
return str
}
func (d *Device) OpenCLCVersion() string {
str, _ := d.GetInfoString(C.CL_DEVICE_OPENCL_C_VERSION, true)
return str
}
func (d *Device) Profile() string {
str, _ := d.GetInfoString(C.CL_DEVICE_PROFILE, true)
return str
}
func (d *Device) Version() string {
str, _ := d.GetInfoString(C.CL_DEVICE_VERSION, true)
return str
}
func (d *Device) DriverVersion() string {
str, _ := d.GetInfoString(C.CL_DRIVER_VERSION, true)
return str
}
// The default compute device address space size specified as an
// unsigned integer value in bits. Currently supported values are 32 or 64 bits.
func (d *Device) AddressBits() int {
val, _ := d.getInfoUint(C.CL_DEVICE_ADDRESS_BITS, true)
return int(val)
}
// Size of global memory cache line in bytes.
func (d *Device) GlobalMemCachelineSize() int {
val, _ := d.getInfoUint(C.CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, true)
return int(val)
}
// Maximum configured clock frequency of the device in MHz.
func (d *Device) MaxClockFrequency() int {
val, _ := d.getInfoUint(C.CL_DEVICE_MAX_CLOCK_FREQUENCY, true)
return int(val)
}
// The number of parallel compute units on the OpenCL device.
// A work-group executes on a single compute unit. The minimum value is 1.
func (d *Device) MaxComputeUnits() int {
val, _ := d.getInfoUint(C.CL_DEVICE_MAX_COMPUTE_UNITS, true)
return int(val)
}
// Max number of arguments declared with the __constant qualifier in a kernel.
// The minimum value is 8 for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
func (d *Device) MaxConstantArgs() int {
val, _ := d.getInfoUint(C.CL_DEVICE_MAX_CONSTANT_ARGS, true)
return int(val)
}
// Max number of simultaneous image objects that can be read by a kernel.
// The minimum value is 128 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE.
func (d *Device) MaxReadImageArgs() int {
val, _ := d.getInfoUint(C.CL_DEVICE_MAX_READ_IMAGE_ARGS, true)
return int(val)
}
// Maximum number of samplers that can be used in a kernel. The minimum
// value is 16 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE. (Also see sampler_t.)
func (d *Device) MaxSamplers() int {
val, _ := d.getInfoUint(C.CL_DEVICE_MAX_SAMPLERS, true)
return int(val)
}
// Maximum dimensions that specify the global and local work-item IDs used
// by the data parallel execution model. (Refer to clEnqueueNDRangeKernel).
// The minimum value is 3 for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
func (d *Device) MaxWorkItemDimensions() int {
val, _ := d.getInfoUint(C.CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, true)
return int(val)
}
// Max number of simultaneous image objects that can be written to by a
// kernel. The minimum value is 8 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE.
func (d *Device) MaxWriteImageArgs() int {
val, _ := d.getInfoUint(C.CL_DEVICE_MAX_WRITE_IMAGE_ARGS, true)
return int(val)
}
// The minimum value is the size (in bits) of the largest OpenCL built-in
// data type supported by the device (long16 in FULL profile, long16 or
// int16 in EMBEDDED profile) for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
func (d *Device) MemBaseAddrAlign() int {
val, _ := d.getInfoUint(C.CL_DEVICE_MEM_BASE_ADDR_ALIGN, true)
return int(val)
}
func (d *Device) NativeVectorWidthChar() int {
val, _ := d.getInfoUint(C.CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR, true)
return int(val)
}
func (d *Device) NativeVectorWidthShort() int {
val, _ := d.getInfoUint(C.CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT, true)
return int(val)
}
func (d *Device) NativeVectorWidthInt() int {
val, _ := d.getInfoUint(C.CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, true)
return int(val)
}
func (d *Device) NativeVectorWidthLong() int {
val, _ := d.getInfoUint(C.CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, true)
return int(val)
}
func (d *Device) NativeVectorWidthFloat() int {
val, _ := d.getInfoUint(C.CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, true)
return int(val)
}
func (d *Device) NativeVectorWidthDouble() int {
val, _ := d.getInfoUint(C.CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, true)
return int(val)
}
func (d *Device) NativeVectorWidthHalf() int {
val, _ := d.getInfoUint(C.CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF, true)
return int(val)
}
// Max height of 2D image in pixels. The minimum value is 8192
// if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE.
func (d *Device) Image2DMaxHeight() int {
val, _ := d.getInfoSize(C.CL_DEVICE_IMAGE2D_MAX_HEIGHT, true)
return int(val)
}
// Max width of 2D image or 1D image not created from a buffer object in
// pixels. The minimum value is 8192 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE.
func (d *Device) Image2DMaxWidth() int {
val, _ := d.getInfoSize(C.CL_DEVICE_IMAGE2D_MAX_WIDTH, true)
return int(val)
}
// Max depth of 3D image in pixels. The minimum value is 2048 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE.
func (d *Device) Image3DMaxDepth() int {
val, _ := d.getInfoSize(C.CL_DEVICE_IMAGE3D_MAX_DEPTH, true)
return int(val)
}
// Max height of 3D image in pixels. The minimum value is 2048 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE.
func (d *Device) Image3DMaxHeight() int {
val, _ := d.getInfoSize(C.CL_DEVICE_IMAGE3D_MAX_HEIGHT, true)
return int(val)
}
// Max width of 3D image in pixels. The minimum value is 2048 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE.
func (d *Device) Image3DMaxWidth() int {
val, _ := d.getInfoSize(C.CL_DEVICE_IMAGE3D_MAX_WIDTH, true)
return int(val)
}
// Max size in bytes of the arguments that can be passed to a kernel. The
// minimum value is 1024 for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
// For this minimum value, only a maximum of 128 arguments can be passed to a kernel.
func (d *Device) MaxParameterSize() int {
val, _ := d.getInfoSize(C.CL_DEVICE_MAX_PARAMETER_SIZE, true)
return int(val)
}
// Maximum number of work-items in a work-group executing a kernel on a
// single compute unit, using the data parallel execution model. (Refer
// to clEnqueueNDRangeKernel). The minimum value is 1.
func (d *Device) MaxWorkGroupSize() int {
val, _ := d.getInfoSize(C.CL_DEVICE_MAX_WORK_GROUP_SIZE, true)
return int(val)
}
// Describes the resolution of device timer. This is measured in nanoseconds.
func (d *Device) ProfilingTimerResolution() int {
val, _ := d.getInfoSize(C.CL_DEVICE_PROFILING_TIMER_RESOLUTION, true)
return int(val)
}
// Size of local memory arena in bytes. The minimum value is 32 KB for
// devices that are not of type CL_DEVICE_TYPE_CUSTOM.
func (d *Device) LocalMemSize() int64 {
val, _ := d.getInfoUlong(C.CL_DEVICE_LOCAL_MEM_SIZE, true)
return val
}
// Max size in bytes of a constant buffer allocation. The minimum value is
// 64 KB for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
func (d *Device) MaxConstantBufferSize() int64 {
val, _ := d.getInfoUlong(C.CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, true)
return val
}
// Max size of memory object allocation in bytes. The minimum value is max
// (1/4th of CL_DEVICE_GLOBAL_MEM_SIZE, 128*1024*1024) for devices that are
// not of type CL_DEVICE_TYPE_CUSTOM.
func (d *Device) MaxMemAllocSize() int64 {
val, _ := d.getInfoUlong(C.CL_DEVICE_MAX_MEM_ALLOC_SIZE, true)
return val
}
// Size of global device memory in bytes.
func (d *Device) GlobalMemSize() int64 {
val, _ := d.getInfoUlong(C.CL_DEVICE_GLOBAL_MEM_SIZE, true)
return val
}
func (d *Device) Available() bool {
val, _ := d.getInfoBool(C.CL_DEVICE_AVAILABLE, true)
return val
}
func (d *Device) CompilerAvailable() bool {
val, _ := d.getInfoBool(C.CL_DEVICE_COMPILER_AVAILABLE, true)
return val
}
func (d *Device) EndianLittle() bool {
val, _ := d.getInfoBool(C.CL_DEVICE_ENDIAN_LITTLE, true)
return val
}
// Is CL_TRUE if the device implements error correction for all
// accesses to compute device memory (global and constant). Is
// CL_FALSE if the device does not implement such error correction.
func (d *Device) ErrorCorrectionSupport() bool {
val, _ := d.getInfoBool(C.CL_DEVICE_ERROR_CORRECTION_SUPPORT, true)
return val
}
func (d *Device) HostUnifiedMemory() bool {
val, _ := d.getInfoBool(C.CL_DEVICE_HOST_UNIFIED_MEMORY, true)
return val
}
func (d *Device) ImageSupport() bool {
val, _ := d.getInfoBool(C.CL_DEVICE_IMAGE_SUPPORT, true)
return val
}
func (d *Device) Type() DeviceType {
var deviceType C.cl_device_type
if err := C.clGetDeviceInfo(d.id, C.CL_DEVICE_TYPE, C.size_t(unsafe.Sizeof(deviceType)), unsafe.Pointer(&deviceType), nil); err != C.CL_SUCCESS {
panic("Failed to get device type")
}
return DeviceType(deviceType)
}
// Describes double precision floating-point capability of the OpenCL device
func (d *Device) DoubleFPConfig() FPConfig {
var fpConfig C.cl_device_fp_config
if err := C.clGetDeviceInfo(d.id, C.CL_DEVICE_DOUBLE_FP_CONFIG, C.size_t(unsafe.Sizeof(fpConfig)), unsafe.Pointer(&fpConfig), nil); err != C.CL_SUCCESS {
panic("Failed to get double FP config")
}
return FPConfig(fpConfig)
}
// Describes the OPTIONAL half precision floating-point capability of the OpenCL device
func (d *Device) HalfFPConfig() FPConfig {
var fpConfig C.cl_device_fp_config
err := C.clGetDeviceInfo(d.id, C.CL_DEVICE_HALF_FP_CONFIG, C.size_t(unsafe.Sizeof(fpConfig)), unsafe.Pointer(&fpConfig), nil)
if err != C.CL_SUCCESS {
return FPConfig(0)
}
return FPConfig(fpConfig)
}
// Type of local memory supported. This can be set to CL_LOCAL implying dedicated
// local memory storage such as SRAM, or CL_GLOBAL. For custom devices, CL_NONE
// can also be returned indicating no local memory support.
func (d *Device) LocalMemType() LocalMemType {
var memType C.cl_device_local_mem_type
if err := C.clGetDeviceInfo(d.id, C.CL_DEVICE_LOCAL_MEM_TYPE, C.size_t(unsafe.Sizeof(memType)), unsafe.Pointer(&memType), nil); err != C.CL_SUCCESS {
return LocalMemType(C.CL_NONE)
}
return LocalMemType(memType)
}
// Describes the execution capabilities of the device. The mandated minimum capability is CL_EXEC_KERNEL.
func (d *Device) ExecutionCapabilities() ExecCapability {
var execCap C.cl_device_exec_capabilities
if err := C.clGetDeviceInfo(d.id, C.CL_DEVICE_EXECUTION_CAPABILITIES, C.size_t(unsafe.Sizeof(execCap)), unsafe.Pointer(&execCap), nil); err != C.CL_SUCCESS {
panic("Failed to get execution capabilities")
}
return ExecCapability(execCap)
}
func (d *Device) GlobalMemCacheType() MemCacheType {
var memType C.cl_device_mem_cache_type
if err := C.clGetDeviceInfo(d.id, C.CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, C.size_t(unsafe.Sizeof(memType)), unsafe.Pointer(&memType), nil); err != C.CL_SUCCESS {
return MemCacheType(C.CL_NONE)
}
return MemCacheType(memType)
}
// Maximum number of work-items that can be specified in each dimension of the work-group to clEnqueueNDRangeKernel.
//
// Returns n size_t entries, where n is the value returned by the query for CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS.
//
// The minimum value is (1, 1, 1) for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
func (d *Device) MaxWorkItemSizes() []int {
dims := d.MaxWorkItemDimensions()
sizes := make([]C.size_t, dims)
if err := C.clGetDeviceInfo(d.id, C.CL_DEVICE_MAX_WORK_ITEM_SIZES, C.size_t(int(unsafe.Sizeof(sizes[0]))*dims), unsafe.Pointer(&sizes[0]), nil); err != C.CL_SUCCESS {
panic("Failed to get max work item sizes")
}
intSizes := make([]int, dims)
for i, s := range sizes {
intSizes[i] = int(s)
}
return intSizes
}

@ -0,0 +1,51 @@
// +build cl12
package cl
// #ifdef __APPLE__
// #include "OpenCL/opencl.h"
// #else
// #include "cl.h"
// #endif
import "C"
import "unsafe"
const FPConfigCorrectlyRoundedDivideSqrt FPConfig = C.CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT
func init() {
fpConfigNameMap[FPConfigCorrectlyRoundedDivideSqrt] = "CorrectlyRoundedDivideSqrt"
}
func (d *Device) BuiltInKernels() string {
str, _ := d.getInfoString(C.CL_DEVICE_BUILT_IN_KERNELS, true)
return str
}
// Is CL_FALSE if the implementation does not have a linker available. Is CL_TRUE if the linker is available. This can be CL_FALSE for the embedded platform profile only. This must be CL_TRUE if CL_DEVICE_COMPILER_AVAILABLE is CL_TRUE
func (d *Device) LinkerAvailable() bool {
val, _ := d.getInfoBool(C.CL_DEVICE_LINKER_AVAILABLE, true)
return val
}
func (d *Device) ParentDevice() *Device {
var deviceId C.cl_device_id
if err := C.clGetDeviceInfo(d.id, C.CL_DEVICE_PARENT_DEVICE, C.size_t(unsafe.Sizeof(deviceId)), unsafe.Pointer(&deviceId), nil); err != C.CL_SUCCESS {
panic("ParentDevice failed")
}
if deviceId == nil {
return nil
}
return &Device{id: deviceId}
}
// Max number of pixels for a 1D image created from a buffer object. The minimum value is 65536 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE.
func (d *Device) ImageMaxBufferSize() int {
val, _ := d.getInfoSize(C.CL_DEVICE_IMAGE_MAX_BUFFER_SIZE, true)
return int(val)
}
// Max number of images in a 1D or 2D image array. The minimum value is 2048 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE
func (d *Device) ImageMaxArraySize() int {
val, _ := d.getInfoSize(C.CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, true)
return int(val)
}

File diff suppressed because it is too large Load Diff

@ -0,0 +1,315 @@
/*******************************************************************************
* Copyright (c) 2008-2013 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
******************************************************************************/
/* $Revision: 11928 $ on $Date: 2010-07-13 09:04:56 -0700 (Tue, 13 Jul 2010) $ */
/* cl_ext.h contains OpenCL extensions which don't have external */
/* (OpenGL, D3D) dependencies. */
#ifndef __CL_EXT_H
#define __CL_EXT_H
#ifdef __cplusplus
extern "C" {
#endif
#ifdef __APPLE__
#include <AvailabilityMacros.h>
#endif
#include <cl.h>
/* cl_khr_fp16 extension - no extension #define since it has no functions */
#define CL_DEVICE_HALF_FP_CONFIG 0x1033
/* Memory object destruction
*
* Apple extension for use to manage externally allocated buffers used with cl_mem objects with CL_MEM_USE_HOST_PTR
*
* Registers a user callback function that will be called when the memory object is deleted and its resources
* freed. Each call to clSetMemObjectCallbackFn registers the specified user callback function on a callback
* stack associated with memobj. The registered user callback functions are called in the reverse order in
* which they were registered. The user callback functions are called and then the memory object is deleted
* and its resources freed. This provides a mechanism for the application (and libraries) using memobj to be
* notified when the memory referenced by host_ptr, specified when the memory object is created and used as
* the storage bits for the memory object, can be reused or freed.
*
* The application may not call CL api's with the cl_mem object passed to the pfn_notify.
*
* Please check for the "cl_APPLE_SetMemObjectDestructor" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS)
* before using.
*/
#define cl_APPLE_SetMemObjectDestructor 1
cl_int CL_API_ENTRY clSetMemObjectDestructorAPPLE( cl_mem /* memobj */,
void (* /*pfn_notify*/)( cl_mem /* memobj */, void* /*user_data*/),
void * /*user_data */ ) CL_EXT_SUFFIX__VERSION_1_0;
/* Context Logging Functions
*
* The next three convenience functions are intended to be used as the pfn_notify parameter to clCreateContext().
* Please check for the "cl_APPLE_ContextLoggingFunctions" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS)
* before using.
*
* clLogMessagesToSystemLog fowards on all log messages to the Apple System Logger
*/
#define cl_APPLE_ContextLoggingFunctions 1
extern void CL_API_ENTRY clLogMessagesToSystemLogAPPLE( const char * /* errstr */,
const void * /* private_info */,
size_t /* cb */,
void * /* user_data */ ) CL_EXT_SUFFIX__VERSION_1_0;
/* clLogMessagesToStdout sends all log messages to the file descriptor stdout */
extern void CL_API_ENTRY clLogMessagesToStdoutAPPLE( const char * /* errstr */,
const void * /* private_info */,
size_t /* cb */,
void * /* user_data */ ) CL_EXT_SUFFIX__VERSION_1_0;
/* clLogMessagesToStderr sends all log messages to the file descriptor stderr */
extern void CL_API_ENTRY clLogMessagesToStderrAPPLE( const char * /* errstr */,
const void * /* private_info */,
size_t /* cb */,
void * /* user_data */ ) CL_EXT_SUFFIX__VERSION_1_0;
/************************
* cl_khr_icd extension *
************************/
#define cl_khr_icd 1
/* cl_platform_info */
#define CL_PLATFORM_ICD_SUFFIX_KHR 0x0920
/* Additional Error Codes */
#define CL_PLATFORM_NOT_FOUND_KHR -1001
extern CL_API_ENTRY cl_int CL_API_CALL
clIcdGetPlatformIDsKHR(cl_uint /* num_entries */,
cl_platform_id * /* platforms */,
cl_uint * /* num_platforms */);
typedef CL_API_ENTRY cl_int (CL_API_CALL *clIcdGetPlatformIDsKHR_fn)(
cl_uint /* num_entries */,
cl_platform_id * /* platforms */,
cl_uint * /* num_platforms */);
/* Extension: cl_khr_image2D_buffer
*
* This extension allows a 2D image to be created from a cl_mem buffer without a copy.
* The type associated with a 2D image created from a buffer in an OpenCL program is image2d_t.
* Both the sampler and sampler-less read_image built-in functions are supported for 2D images
* and 2D images created from a buffer. Similarly, the write_image built-ins are also supported
* for 2D images created from a buffer.
*
* When the 2D image from buffer is created, the client must specify the width,
* height, image format (i.e. channel order and channel data type) and optionally the row pitch
*
* The pitch specified must be a multiple of CL_DEVICE_IMAGE_PITCH_ALIGNMENT pixels.
* The base address of the buffer must be aligned to CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT pixels.
*/
/*************************************
* cl_khr_initalize_memory extension *
*************************************/
#define CL_CONTEXT_MEMORY_INITIALIZE_KHR 0x200E
/**************************************
* cl_khr_terminate_context extension *
**************************************/
#define CL_DEVICE_TERMINATE_CAPABILITY_KHR 0x200F
#define CL_CONTEXT_TERMINATE_KHR 0x2010
#define cl_khr_terminate_context 1
extern CL_API_ENTRY cl_int CL_API_CALL clTerminateContextKHR(cl_context /* context */) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clTerminateContextKHR_fn)(cl_context /* context */) CL_EXT_SUFFIX__VERSION_1_2;
/*
* Extension: cl_khr_spir
*
* This extension adds support to create an OpenCL program object from a
* Standard Portable Intermediate Representation (SPIR) instance
*/
#define CL_DEVICE_SPIR_VERSIONS 0x40E0
#define CL_PROGRAM_BINARY_TYPE_INTERMEDIATE 0x40E1
/******************************************
* cl_nv_device_attribute_query extension *
******************************************/
/* cl_nv_device_attribute_query extension - no extension #define since it has no functions */
#define CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV 0x4000
#define CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV 0x4001
#define CL_DEVICE_REGISTERS_PER_BLOCK_NV 0x4002
#define CL_DEVICE_WARP_SIZE_NV 0x4003
#define CL_DEVICE_GPU_OVERLAP_NV 0x4004
#define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV 0x4005
#define CL_DEVICE_INTEGRATED_MEMORY_NV 0x4006
/*********************************
* cl_amd_device_attribute_query *
*********************************/
#define CL_DEVICE_PROFILING_TIMER_OFFSET_AMD 0x4036
/*********************************
* cl_arm_printf extension
*********************************/
#define CL_PRINTF_CALLBACK_ARM 0x40B0
#define CL_PRINTF_BUFFERSIZE_ARM 0x40B1
#ifdef CL_VERSION_1_1
/***********************************
* cl_ext_device_fission extension *
***********************************/
#define cl_ext_device_fission 1
extern CL_API_ENTRY cl_int CL_API_CALL
clReleaseDeviceEXT( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1;
typedef CL_API_ENTRY cl_int
(CL_API_CALL *clReleaseDeviceEXT_fn)( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1;
extern CL_API_ENTRY cl_int CL_API_CALL
clRetainDeviceEXT( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1;
typedef CL_API_ENTRY cl_int
(CL_API_CALL *clRetainDeviceEXT_fn)( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1;
typedef cl_ulong cl_device_partition_property_ext;
extern CL_API_ENTRY cl_int CL_API_CALL
clCreateSubDevicesEXT( cl_device_id /*in_device*/,
const cl_device_partition_property_ext * /* properties */,
cl_uint /*num_entries*/,
cl_device_id * /*out_devices*/,
cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1;
typedef CL_API_ENTRY cl_int
( CL_API_CALL * clCreateSubDevicesEXT_fn)( cl_device_id /*in_device*/,
const cl_device_partition_property_ext * /* properties */,
cl_uint /*num_entries*/,
cl_device_id * /*out_devices*/,
cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1;
/* cl_device_partition_property_ext */
#define CL_DEVICE_PARTITION_EQUALLY_EXT 0x4050
#define CL_DEVICE_PARTITION_BY_COUNTS_EXT 0x4051
#define CL_DEVICE_PARTITION_BY_NAMES_EXT 0x4052
#define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT 0x4053
/* clDeviceGetInfo selectors */
#define CL_DEVICE_PARENT_DEVICE_EXT 0x4054
#define CL_DEVICE_PARTITION_TYPES_EXT 0x4055
#define CL_DEVICE_AFFINITY_DOMAINS_EXT 0x4056
#define CL_DEVICE_REFERENCE_COUNT_EXT 0x4057
#define CL_DEVICE_PARTITION_STYLE_EXT 0x4058
/* error codes */
#define CL_DEVICE_PARTITION_FAILED_EXT -1057
#define CL_INVALID_PARTITION_COUNT_EXT -1058
#define CL_INVALID_PARTITION_NAME_EXT -1059
/* CL_AFFINITY_DOMAINs */
#define CL_AFFINITY_DOMAIN_L1_CACHE_EXT 0x1
#define CL_AFFINITY_DOMAIN_L2_CACHE_EXT 0x2
#define CL_AFFINITY_DOMAIN_L3_CACHE_EXT 0x3
#define CL_AFFINITY_DOMAIN_L4_CACHE_EXT 0x4
#define CL_AFFINITY_DOMAIN_NUMA_EXT 0x10
#define CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT 0x100
/* cl_device_partition_property_ext list terminators */
#define CL_PROPERTIES_LIST_END_EXT ((cl_device_partition_property_ext) 0)
#define CL_PARTITION_BY_COUNTS_LIST_END_EXT ((cl_device_partition_property_ext) 0)
#define CL_PARTITION_BY_NAMES_LIST_END_EXT ((cl_device_partition_property_ext) 0 - 1)
/*********************************
* cl_qcom_ext_host_ptr extension
*********************************/
#define CL_MEM_EXT_HOST_PTR_QCOM (1 << 29)
#define CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM 0x40A0
#define CL_DEVICE_PAGE_SIZE_QCOM 0x40A1
#define CL_IMAGE_ROW_ALIGNMENT_QCOM 0x40A2
#define CL_IMAGE_SLICE_ALIGNMENT_QCOM 0x40A3
#define CL_MEM_HOST_UNCACHED_QCOM 0x40A4
#define CL_MEM_HOST_WRITEBACK_QCOM 0x40A5
#define CL_MEM_HOST_WRITETHROUGH_QCOM 0x40A6
#define CL_MEM_HOST_WRITE_COMBINING_QCOM 0x40A7
typedef cl_uint cl_image_pitch_info_qcom;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetDeviceImageInfoQCOM(cl_device_id device,
size_t image_width,
size_t image_height,
const cl_image_format *image_format,
cl_image_pitch_info_qcom param_name,
size_t param_value_size,
void *param_value,
size_t *param_value_size_ret);
typedef struct _cl_mem_ext_host_ptr
{
/* Type of external memory allocation. */
/* Legal values will be defined in layered extensions. */
cl_uint allocation_type;
/* Host cache policy for this external memory allocation. */
cl_uint host_cache_policy;
} cl_mem_ext_host_ptr;
/*********************************
* cl_qcom_ion_host_ptr extension
*********************************/
#define CL_MEM_ION_HOST_PTR_QCOM 0x40A8
typedef struct _cl_mem_ion_host_ptr
{
/* Type of external memory allocation. */
/* Must be CL_MEM_ION_HOST_PTR_QCOM for ION allocations. */
cl_mem_ext_host_ptr ext_host_ptr;
/* ION file descriptor */
int ion_filedesc;
/* Host pointer to the ION allocated memory */
void* ion_hostptr;
} cl_mem_ion_host_ptr;
#endif /* CL_VERSION_1_1 */
#ifdef __cplusplus
}
#endif
#endif /* __CL_EXT_H */

@ -0,0 +1,158 @@
/**********************************************************************************
* Copyright (c) 2008 - 2012 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
**********************************************************************************/
#ifndef __OPENCL_CL_GL_H
#define __OPENCL_CL_GL_H
#include <cl.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef cl_uint cl_gl_object_type;
typedef cl_uint cl_gl_texture_info;
typedef cl_uint cl_gl_platform_info;
typedef struct __GLsync *cl_GLsync;
/* cl_gl_object_type = 0x2000 - 0x200F enum values are currently taken */
#define CL_GL_OBJECT_BUFFER 0x2000
#define CL_GL_OBJECT_TEXTURE2D 0x2001
#define CL_GL_OBJECT_TEXTURE3D 0x2002
#define CL_GL_OBJECT_RENDERBUFFER 0x2003
#define CL_GL_OBJECT_TEXTURE2D_ARRAY 0x200E
#define CL_GL_OBJECT_TEXTURE1D 0x200F
#define CL_GL_OBJECT_TEXTURE1D_ARRAY 0x2010
#define CL_GL_OBJECT_TEXTURE_BUFFER 0x2011
/* cl_gl_texture_info */
#define CL_GL_TEXTURE_TARGET 0x2004
#define CL_GL_MIPMAP_LEVEL 0x2005
#define CL_GL_NUM_SAMPLES 0x2012
extern CL_API_ENTRY cl_mem CL_API_CALL
clCreateFromGLBuffer(cl_context /* context */,
cl_mem_flags /* flags */,
cl_GLuint /* bufobj */,
int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_mem CL_API_CALL
clCreateFromGLTexture(cl_context /* context */,
cl_mem_flags /* flags */,
cl_GLenum /* target */,
cl_GLint /* miplevel */,
cl_GLuint /* texture */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_mem CL_API_CALL
clCreateFromGLRenderbuffer(cl_context /* context */,
cl_mem_flags /* flags */,
cl_GLuint /* renderbuffer */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetGLObjectInfo(cl_mem /* memobj */,
cl_gl_object_type * /* gl_object_type */,
cl_GLuint * /* gl_object_name */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetGLTextureInfo(cl_mem /* memobj */,
cl_gl_texture_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueAcquireGLObjects(cl_command_queue /* command_queue */,
cl_uint /* num_objects */,
const cl_mem * /* mem_objects */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueReleaseGLObjects(cl_command_queue /* command_queue */,
cl_uint /* num_objects */,
const cl_mem * /* mem_objects */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
/* Deprecated OpenCL 1.1 APIs */
extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
clCreateFromGLTexture2D(cl_context /* context */,
cl_mem_flags /* flags */,
cl_GLenum /* target */,
cl_GLint /* miplevel */,
cl_GLuint /* texture */,
cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
clCreateFromGLTexture3D(cl_context /* context */,
cl_mem_flags /* flags */,
cl_GLenum /* target */,
cl_GLint /* miplevel */,
cl_GLuint /* texture */,
cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
/* cl_khr_gl_sharing extension */
#define cl_khr_gl_sharing 1
typedef cl_uint cl_gl_context_info;
/* Additional Error Codes */
#define CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR -1000
/* cl_gl_context_info */
#define CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR 0x2006
#define CL_DEVICES_FOR_GL_CONTEXT_KHR 0x2007
/* Additional cl_context_properties */
#define CL_GL_CONTEXT_KHR 0x2008
#define CL_EGL_DISPLAY_KHR 0x2009
#define CL_GLX_DISPLAY_KHR 0x200A
#define CL_WGL_HDC_KHR 0x200B
#define CL_CGL_SHAREGROUP_KHR 0x200C
extern CL_API_ENTRY cl_int CL_API_CALL
clGetGLContextInfoKHR(const cl_context_properties * /* properties */,
cl_gl_context_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)(
const cl_context_properties * properties,
cl_gl_context_info param_name,
size_t param_value_size,
void * param_value,
size_t * param_value_size_ret);
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_CL_GL_H */

@ -0,0 +1,65 @@
/**********************************************************************************
* Copyright (c) 2008-2012 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
**********************************************************************************/
/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
/* cl_gl_ext.h contains vendor (non-KHR) OpenCL extensions which have */
/* OpenGL dependencies. */
#ifndef __OPENCL_CL_GL_EXT_H
#define __OPENCL_CL_GL_EXT_H
#ifdef __cplusplus
extern "C" {
#endif
#include <cl_gl.h>
/*
* For each extension, follow this template
* cl_VEN_extname extension */
/* #define cl_VEN_extname 1
* ... define new types, if any
* ... define new tokens, if any
* ... define new APIs, if any
*
* If you need GLtypes here, mirror them with a cl_GLtype, rather than including a GL header
* This allows us to avoid having to decide whether to include GL headers or GLES here.
*/
/*
* cl_khr_gl_event extension
* See section 9.9 in the OpenCL 1.1 spec for more information
*/
#define CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR 0x200D
extern CL_API_ENTRY cl_event CL_API_CALL
clCreateEventFromGLsyncKHR(cl_context /* context */,
cl_GLsync /* cl_GLsync */,
cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1;
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_CL_GL_EXT_H */

File diff suppressed because it is too large Load Diff

@ -0,0 +1,43 @@
/*******************************************************************************
* Copyright (c) 2008-2012 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
******************************************************************************/
/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
#ifndef __OPENCL_H
#define __OPENCL_H
#ifdef __cplusplus
extern "C" {
#endif
#include <cl.h>
#include <cl_gl.h>
#include <cl_gl_ext.h>
#include <cl_ext.h>
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_H */

@ -0,0 +1,83 @@
// +build cl12
package cl
// #ifdef __APPLE__
// #include "OpenCL/opencl.h"
// #else
// #include "cl.h"
// #endif
import "C"
import (
"image"
"unsafe"
)
func (ctx *Context) CreateImage(flags MemFlag, imageFormat ImageFormat, imageDesc ImageDescription, data []byte) (*MemObject, error) {
format := imageFormat.toCl()
desc := imageDesc.toCl()
var dataPtr unsafe.Pointer
if data != nil {
dataPtr = unsafe.Pointer(&data[0])
}
var err C.cl_int
clBuffer := C.clCreateImage(ctx.clContext, C.cl_mem_flags(flags), &format, &desc, dataPtr, &err)
if err != C.CL_SUCCESS {
return nil, toError(err)
}
if clBuffer == nil {
return nil, ErrUnknown
}
return newMemObject(clBuffer, len(data)), nil
}
func (ctx *Context) CreateImageSimple(flags MemFlag, width, height int, channelOrder ChannelOrder, channelDataType ChannelDataType, data []byte) (*MemObject, error) {
format := ImageFormat{channelOrder, channelDataType}
desc := ImageDescription{
Type: MemObjectTypeImage2D,
Width: width,
Height: height,
}
return ctx.CreateImage(flags, format, desc, data)
}
func (ctx *Context) CreateImageFromImage(flags MemFlag, img image.Image) (*MemObject, error) {
switch m := img.(type) {
case *image.Gray:
format := ImageFormat{ChannelOrderIntensity, ChannelDataTypeUNormInt8}
desc := ImageDescription{
Type: MemObjectTypeImage2D,
Width: m.Bounds().Dx(),
Height: m.Bounds().Dy(),
RowPitch: m.Stride,
}
return ctx.CreateImage(flags, format, desc, m.Pix)
case *image.RGBA:
format := ImageFormat{ChannelOrderRGBA, ChannelDataTypeUNormInt8}
desc := ImageDescription{
Type: MemObjectTypeImage2D,
Width: m.Bounds().Dx(),
Height: m.Bounds().Dy(),
RowPitch: m.Stride,
}
return ctx.CreateImage(flags, format, desc, m.Pix)
}
b := img.Bounds()
w := b.Dx()
h := b.Dy()
data := make([]byte, w*h*4)
dataOffset := 0
for y := 0; y < h; y++ {
for x := 0; x < w; x++ {
c := img.At(x+b.Min.X, y+b.Min.Y)
r, g, b, a := c.RGBA()
data[dataOffset] = uint8(r >> 8)
data[dataOffset+1] = uint8(g >> 8)
data[dataOffset+2] = uint8(b >> 8)
data[dataOffset+3] = uint8(a >> 8)
dataOffset += 4
}
}
return ctx.CreateImageSimple(flags, w, h, ChannelOrderRGBA, ChannelDataTypeUNormInt8, data)
}

@ -0,0 +1,127 @@
package cl
// #ifdef __APPLE__
// #include "OpenCL/opencl.h"
// #else
// #include "cl.h"
// #endif
import "C"
import (
"fmt"
"unsafe"
)
type ErrUnsupportedArgumentType struct {
Index int
Value interface{}
}
func (e ErrUnsupportedArgumentType) Error() string {
return fmt.Sprintf("cl: unsupported argument type for index %d: %+v", e.Index, e.Value)
}
type Kernel struct {
clKernel C.cl_kernel
name string
}
type LocalBuffer int
func releaseKernel(k *Kernel) {
if k.clKernel != nil {
C.clReleaseKernel(k.clKernel)
k.clKernel = nil
}
}
func (k *Kernel) Release() {
releaseKernel(k)
}
func (k *Kernel) SetArgs(args ...interface{}) error {
for index, arg := range args {
if err := k.SetArg(index, arg); err != nil {
return err
}
}
return nil
}
func (k *Kernel) SetArg(index int, arg interface{}) error {
switch val := arg.(type) {
case uint8:
return k.SetArgUint8(index, val)
case int8:
return k.SetArgInt8(index, val)
case uint32:
return k.SetArgUint32(index, val)
case uint64:
return k.SetArgUint64(index, val)
case int32:
return k.SetArgInt32(index, val)
case float32:
return k.SetArgFloat32(index, val)
case *MemObject:
return k.SetArgBuffer(index, val)
case LocalBuffer:
return k.SetArgLocal(index, int(val))
default:
return ErrUnsupportedArgumentType{Index: index, Value: arg}
}
}
func (k *Kernel) SetArgBuffer(index int, buffer *MemObject) error {
return k.SetArgUnsafe(index, int(unsafe.Sizeof(buffer.clMem)), unsafe.Pointer(&buffer.clMem))
}
func (k *Kernel) SetArgFloat32(index int, val float32) error {
return k.SetArgUnsafe(index, int(unsafe.Sizeof(val)), unsafe.Pointer(&val))
}
func (k *Kernel) SetArgInt8(index int, val int8) error {
return k.SetArgUnsafe(index, int(unsafe.Sizeof(val)), unsafe.Pointer(&val))
}
func (k *Kernel) SetArgUint8(index int, val uint8) error {
return k.SetArgUnsafe(index, int(unsafe.Sizeof(val)), unsafe.Pointer(&val))
}
func (k *Kernel) SetArgInt32(index int, val int32) error {
return k.SetArgUnsafe(index, int(unsafe.Sizeof(val)), unsafe.Pointer(&val))
}
func (k *Kernel) SetArgUint32(index int, val uint32) error {
return k.SetArgUnsafe(index, int(unsafe.Sizeof(val)), unsafe.Pointer(&val))
}
func (k *Kernel) SetArgUint64(index int, val uint64) error {
return k.SetArgUnsafe(index, int(unsafe.Sizeof(val)), unsafe.Pointer(&val))
}
func (k *Kernel) SetArgLocal(index int, size int) error {
return k.SetArgUnsafe(index, size, nil)
}
func (k *Kernel) SetArgUnsafe(index, argSize int, arg unsafe.Pointer) error {
//fmt.Println("FUNKY: ", index, argSize)
return toError(C.clSetKernelArg(k.clKernel, C.cl_uint(index), C.size_t(argSize), arg))
}
func (k *Kernel) PreferredWorkGroupSizeMultiple(device *Device) (int, error) {
var size C.size_t
err := C.clGetKernelWorkGroupInfo(k.clKernel, device.nullableId(), C.CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, C.size_t(unsafe.Sizeof(size)), unsafe.Pointer(&size), nil)
return int(size), toError(err)
}
func (k *Kernel) WorkGroupSize(device *Device) (int, error) {
var size C.size_t
err := C.clGetKernelWorkGroupInfo(k.clKernel, device.nullableId(), C.CL_KERNEL_WORK_GROUP_SIZE, C.size_t(unsafe.Sizeof(size)), unsafe.Pointer(&size), nil)
return int(size), toError(err)
}
func (k *Kernel) NumArgs() (int, error) {
var num C.cl_uint
err := C.clGetKernelInfo(k.clKernel, C.CL_KERNEL_NUM_ARGS, C.size_t(unsafe.Sizeof(num)), unsafe.Pointer(&num), nil)
return int(num), toError(err)
}

@ -0,0 +1,7 @@
// +build !cl12
package cl
func (k *Kernel) ArgName(index int) (string, error) {
return "", ErrUnsupported
}

@ -0,0 +1,20 @@
// +build cl12
package cl
// #ifdef __APPLE__
// #include "OpenCL/opencl.h"
// #else
// #include "cl.h"
// #endif
import "C"
import "unsafe"
func (k *Kernel) ArgName(index int) (string, error) {
var strC [1024]byte
var strN C.size_t
if err := C.clGetKernelArgInfo(k.clKernel, C.cl_uint(index), C.CL_KERNEL_ARG_NAME, 1024, unsafe.Pointer(&strC[0]), &strN); err != C.CL_SUCCESS {
return "", toError(err)
}
return string(strC[:strN]), nil
}

@ -0,0 +1,83 @@
package cl
// #ifdef __APPLE__
// #include "OpenCL/opencl.h"
// #else
// #include "cl.h"
// #endif
import "C"
import "unsafe"
const maxPlatforms = 32
type Platform struct {
id C.cl_platform_id
}
// Obtain the list of platforms available.
func GetPlatforms() ([]*Platform, error) {
var platformIds [maxPlatforms]C.cl_platform_id
var nPlatforms C.cl_uint
if err := C.clGetPlatformIDs(C.cl_uint(maxPlatforms), &platformIds[0], &nPlatforms); err != C.CL_SUCCESS {
return nil, toError(err)
}
platforms := make([]*Platform, nPlatforms)
for i := 0; i < int(nPlatforms); i++ {
platforms[i] = &Platform{id: platformIds[i]}
}
return platforms, nil
}
func (p *Platform) GetDevices(deviceType DeviceType) ([]*Device, error) {
return GetDevices(p, deviceType)
}
func (p *Platform) getInfoString(param C.cl_platform_info) (string, error) {
var strC [2048]byte
var strN C.size_t
if err := C.clGetPlatformInfo(p.id, param, 2048, unsafe.Pointer(&strC[0]), &strN); err != C.CL_SUCCESS {
return "", toError(err)
}
return string(strC[:(strN - 1)]), nil
}
func (p *Platform) Name() string {
if str, err := p.getInfoString(C.CL_PLATFORM_NAME); err != nil {
panic("Platform.Name() should never fail")
} else {
return str
}
}
func (p *Platform) Vendor() string {
if str, err := p.getInfoString(C.CL_PLATFORM_VENDOR); err != nil {
panic("Platform.Vendor() should never fail")
} else {
return str
}
}
func (p *Platform) Profile() string {
if str, err := p.getInfoString(C.CL_PLATFORM_PROFILE); err != nil {
panic("Platform.Profile() should never fail")
} else {
return str
}
}
func (p *Platform) Version() string {
if str, err := p.getInfoString(C.CL_PLATFORM_VERSION); err != nil {
panic("Platform.Version() should never fail")
} else {
return str
}
}
func (p *Platform) Extensions() string {
if str, err := p.getInfoString(C.CL_PLATFORM_EXTENSIONS); err != nil {
panic("Platform.Extensions() should never fail")
} else {
return str
}
}

@ -0,0 +1,105 @@
package cl
// #include <stdlib.h>
// #ifdef __APPLE__
// #include "OpenCL/opencl.h"
// #else
// #include "cl.h"
// #endif
import "C"
import (
"fmt"
"runtime"
"unsafe"
)
type BuildError struct {
Message string
Device *Device
}
func (e BuildError) Error() string {
if e.Device != nil {
return fmt.Sprintf("cl: build error on %q: %s", e.Device.Name(), e.Message)
} else {
return fmt.Sprintf("cl: build error: %s", e.Message)
}
}
type Program struct {
clProgram C.cl_program
devices []*Device
}
func releaseProgram(p *Program) {
if p.clProgram != nil {
C.clReleaseProgram(p.clProgram)
p.clProgram = nil
}
}
func (p *Program) Release() {
releaseProgram(p)
}
func (p *Program) BuildProgram(devices []*Device, options string) error {
var cOptions *C.char
if options != "" {
cOptions = C.CString(options)
defer C.free(unsafe.Pointer(cOptions))
}
var deviceList []C.cl_device_id
var deviceListPtr *C.cl_device_id
numDevices := C.cl_uint(len(devices))
if devices != nil && len(devices) > 0 {
deviceList = buildDeviceIdList(devices)
deviceListPtr = &deviceList[0]
}
if err := C.clBuildProgram(p.clProgram, numDevices, deviceListPtr, cOptions, nil, nil); err != C.CL_SUCCESS {
buffer := make([]byte, 4096)
var bLen C.size_t
var err C.cl_int
for _, dev := range p.devices {
for i := 2; i >= 0; i-- {
err = C.clGetProgramBuildInfo(p.clProgram, dev.id, C.CL_PROGRAM_BUILD_LOG, C.size_t(len(buffer)), unsafe.Pointer(&buffer[0]), &bLen)
if err == C.CL_INVALID_VALUE && i > 0 && bLen < 1024*1024 {
// INVALID_VALUE probably means our buffer isn't large enough
buffer = make([]byte, bLen)
} else {
break
}
}
if err != C.CL_SUCCESS {
return toError(err)
}
if bLen > 1 {
return BuildError{
Device: dev,
Message: string(buffer[:bLen-1]),
}
}
}
return BuildError{
Device: nil,
Message: "build failed and produced no log entries",
}
}
return nil
}
func (p *Program) CreateKernel(name string) (*Kernel, error) {
cName := C.CString(name)
defer C.free(unsafe.Pointer(cName))
var err C.cl_int
clKernel := C.clCreateKernel(p.clProgram, cName, &err)
if err != C.CL_SUCCESS {
return nil, toError(err)
}
kernel := &Kernel{clKernel: clKernel, name: name}
runtime.SetFinalizer(kernel, releaseKernel)
return kernel, nil
}

@ -0,0 +1,193 @@
package cl
// #ifdef __APPLE__
// #include "OpenCL/opencl.h"
// #else
// #include "cl.h"
// #endif
import "C"
import "unsafe"
type CommandQueueProperty int
const (
CommandQueueOutOfOrderExecModeEnable CommandQueueProperty = C.CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE
CommandQueueProfilingEnable CommandQueueProperty = C.CL_QUEUE_PROFILING_ENABLE
)
type CommandQueue struct {
clQueue C.cl_command_queue
device *Device
}
func releaseCommandQueue(q *CommandQueue) {
if q.clQueue != nil {
C.clReleaseCommandQueue(q.clQueue)
q.clQueue = nil
}
}
// Call clReleaseCommandQueue on the CommandQueue. Using the CommandQueue after Release will cause a panick.
func (q *CommandQueue) Release() {
releaseCommandQueue(q)
}
// Blocks until all previously queued OpenCL commands in a command-queue are issued to the associated device and have completed.
func (q *CommandQueue) Finish() error {
return toError(C.clFinish(q.clQueue))
}
// Issues all previously queued OpenCL commands in a command-queue to the device associated with the command-queue.
func (q *CommandQueue) Flush() error {
return toError(C.clFlush(q.clQueue))
}
// Enqueues a command to map a region of the buffer object given by buffer into the host address space and returns a pointer to this mapped region.
func (q *CommandQueue) EnqueueMapBuffer(buffer *MemObject, blocking bool, flags MapFlag, offset, size int, eventWaitList []*Event) (*MappedMemObject, *Event, error) {
var event C.cl_event
var err C.cl_int
ptr := C.clEnqueueMapBuffer(q.clQueue, buffer.clMem, clBool(blocking), flags.toCl(), C.size_t(offset), C.size_t(size), C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event, &err)
if err != C.CL_SUCCESS {
return nil, nil, toError(err)
}
ev := newEvent(event)
if ptr == nil {
return nil, ev, ErrUnknown
}
return &MappedMemObject{ptr: ptr, size: size}, ev, nil
}
// Enqueues a command to map a region of an image object into the host address space and returns a pointer to this mapped region.
func (q *CommandQueue) EnqueueMapImage(buffer *MemObject, blocking bool, flags MapFlag, origin, region [3]int, eventWaitList []*Event) (*MappedMemObject, *Event, error) {
cOrigin := sizeT3(origin)
cRegion := sizeT3(region)
var event C.cl_event
var err C.cl_int
var rowPitch, slicePitch C.size_t
ptr := C.clEnqueueMapImage(q.clQueue, buffer.clMem, clBool(blocking), flags.toCl(), &cOrigin[0], &cRegion[0], &rowPitch, &slicePitch, C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event, &err)
if err != C.CL_SUCCESS {
return nil, nil, toError(err)
}
ev := newEvent(event)
if ptr == nil {
return nil, ev, ErrUnknown
}
size := 0 // TODO: could calculate this
return &MappedMemObject{ptr: ptr, size: size, rowPitch: int(rowPitch), slicePitch: int(slicePitch)}, ev, nil
}
// Enqueues a command to unmap a previously mapped region of a memory object.
func (q *CommandQueue) EnqueueUnmapMemObject(buffer *MemObject, mappedObj *MappedMemObject, eventWaitList []*Event) (*Event, error) {
var event C.cl_event
if err := C.clEnqueueUnmapMemObject(q.clQueue, buffer.clMem, mappedObj.ptr, C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event); err != C.CL_SUCCESS {
return nil, toError(err)
}
return newEvent(event), nil
}
// Enqueues a command to copy a buffer object to another buffer object.
func (q *CommandQueue) EnqueueCopyBuffer(srcBuffer, dstBuffer *MemObject, srcOffset, dstOffset, byteCount int, eventWaitList []*Event) (*Event, error) {
var event C.cl_event
err := toError(C.clEnqueueCopyBuffer(q.clQueue, srcBuffer.clMem, dstBuffer.clMem, C.size_t(srcOffset), C.size_t(dstOffset), C.size_t(byteCount), C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event))
return newEvent(event), err
}
// Enqueue commands to write to a buffer object from host memory.
func (q *CommandQueue) EnqueueWriteBuffer(buffer *MemObject, blocking bool, offset, dataSize int, dataPtr unsafe.Pointer, eventWaitList []*Event) (*Event, error) {
var event C.cl_event
err := toError(C.clEnqueueWriteBuffer(q.clQueue, buffer.clMem, clBool(blocking), C.size_t(offset), C.size_t(dataSize), dataPtr, C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event))
return newEvent(event), err
}
func (q *CommandQueue) EnqueueWriteBufferFloat32(buffer *MemObject, blocking bool, offset int, data []float32, eventWaitList []*Event) (*Event, error) {
dataPtr := unsafe.Pointer(&data[0])
dataSize := int(unsafe.Sizeof(data[0])) * len(data)
return q.EnqueueWriteBuffer(buffer, blocking, offset, dataSize, dataPtr, eventWaitList)
}
// Enqueue commands to read from a buffer object to host memory.
func (q *CommandQueue) EnqueueReadBuffer(buffer *MemObject, blocking bool, offset, dataSize int, dataPtr unsafe.Pointer, eventWaitList []*Event) (*Event, error) {
var event C.cl_event
err := toError(C.clEnqueueReadBuffer(q.clQueue, buffer.clMem, clBool(blocking), C.size_t(offset), C.size_t(dataSize), dataPtr, C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event))
return newEvent(event), err
}
func (q *CommandQueue) EnqueueReadBufferFloat32(buffer *MemObject, blocking bool, offset int, data []float32, eventWaitList []*Event) (*Event, error) {
dataPtr := unsafe.Pointer(&data[0])
dataSize := int(unsafe.Sizeof(data[0])) * len(data)
return q.EnqueueReadBuffer(buffer, blocking, offset, dataSize, dataPtr, eventWaitList)
}
// Enqueues a command to execute a kernel on a device.
func (q *CommandQueue) EnqueueNDRangeKernel(kernel *Kernel, globalWorkOffset, globalWorkSize, localWorkSize []int, eventWaitList []*Event) (*Event, error) {
workDim := len(globalWorkSize)
var globalWorkOffsetList []C.size_t
var globalWorkOffsetPtr *C.size_t
if globalWorkOffset != nil {
globalWorkOffsetList = make([]C.size_t, len(globalWorkOffset))
for i, off := range globalWorkOffset {
globalWorkOffsetList[i] = C.size_t(off)
}
globalWorkOffsetPtr = &globalWorkOffsetList[0]
}
var globalWorkSizeList []C.size_t
var globalWorkSizePtr *C.size_t
if globalWorkSize != nil {
globalWorkSizeList = make([]C.size_t, len(globalWorkSize))
for i, off := range globalWorkSize {
globalWorkSizeList[i] = C.size_t(off)
}
globalWorkSizePtr = &globalWorkSizeList[0]
}
var localWorkSizeList []C.size_t
var localWorkSizePtr *C.size_t
if localWorkSize != nil {
localWorkSizeList = make([]C.size_t, len(localWorkSize))
for i, off := range localWorkSize {
localWorkSizeList[i] = C.size_t(off)
}
localWorkSizePtr = &localWorkSizeList[0]
}
var event C.cl_event
err := toError(C.clEnqueueNDRangeKernel(q.clQueue, kernel.clKernel, C.cl_uint(workDim), globalWorkOffsetPtr, globalWorkSizePtr, localWorkSizePtr, C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event))
return newEvent(event), err
}
// Enqueues a command to read from a 2D or 3D image object to host memory.
func (q *CommandQueue) EnqueueReadImage(image *MemObject, blocking bool, origin, region [3]int, rowPitch, slicePitch int, data []byte, eventWaitList []*Event) (*Event, error) {
cOrigin := sizeT3(origin)
cRegion := sizeT3(region)
var event C.cl_event
err := toError(C.clEnqueueReadImage(q.clQueue, image.clMem, clBool(blocking), &cOrigin[0], &cRegion[0], C.size_t(rowPitch), C.size_t(slicePitch), unsafe.Pointer(&data[0]), C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event))
return newEvent(event), err
}
// Enqueues a command to write from a 2D or 3D image object to host memory.
func (q *CommandQueue) EnqueueWriteImage(image *MemObject, blocking bool, origin, region [3]int, rowPitch, slicePitch int, data []byte, eventWaitList []*Event) (*Event, error) {
cOrigin := sizeT3(origin)
cRegion := sizeT3(region)
var event C.cl_event
err := toError(C.clEnqueueWriteImage(q.clQueue, image.clMem, clBool(blocking), &cOrigin[0], &cRegion[0], C.size_t(rowPitch), C.size_t(slicePitch), unsafe.Pointer(&data[0]), C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event))
return newEvent(event), err
}
func (q *CommandQueue) EnqueueFillBuffer(buffer *MemObject, pattern unsafe.Pointer, patternSize, offset, size int, eventWaitList []*Event) (*Event, error) {
var event C.cl_event
err := toError(C.clEnqueueFillBuffer(q.clQueue, buffer.clMem, pattern, C.size_t(patternSize), C.size_t(offset), C.size_t(size), C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event))
return newEvent(event), err
}
// A synchronization point that enqueues a barrier operation.
func (q *CommandQueue) EnqueueBarrierWithWaitList(eventWaitList []*Event) (*Event, error) {
var event C.cl_event
err := toError(C.clEnqueueBarrierWithWaitList(q.clQueue, C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event))
return newEvent(event), err
}
// Enqueues a marker command which waits for either a list of events to complete, or all previously enqueued commands to complete.
func (q *CommandQueue) EnqueueMarkerWithWaitList(eventWaitList []*Event) (*Event, error) {
var event C.cl_event
err := toError(C.clEnqueueMarkerWithWaitList(q.clQueue, C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event))
return newEvent(event), err
}

@ -0,0 +1,487 @@
package cl
// #ifdef __APPLE__
// #include "OpenCL/opencl.h"
// #else
// #include "cl.h"
// #endif
import "C"
import (
"errors"
"fmt"
"reflect"
"runtime"
"strings"
"unsafe"
)
var (
ErrUnknown = errors.New("cl: unknown error") // Generally an unexpected result from an OpenCL function (e.g. CL_SUCCESS but null pointer)
)
type ErrOther int
func (e ErrOther) Error() string {
return fmt.Sprintf("cl: error %d", int(e))
}
var (
ErrDeviceNotFound = errors.New("cl: Device Not Found")
ErrDeviceNotAvailable = errors.New("cl: Device Not Available")
ErrCompilerNotAvailable = errors.New("cl: Compiler Not Available")
ErrMemObjectAllocationFailure = errors.New("cl: Mem Object Allocation Failure")
ErrOutOfResources = errors.New("cl: Out Of Resources")
ErrOutOfHostMemory = errors.New("cl: Out Of Host Memory")
ErrProfilingInfoNotAvailable = errors.New("cl: Profiling Info Not Available")
ErrMemCopyOverlap = errors.New("cl: Mem Copy Overlap")
ErrImageFormatMismatch = errors.New("cl: Image Format Mismatch")
ErrImageFormatNotSupported = errors.New("cl: Image Format Not Supported")
ErrBuildProgramFailure = errors.New("cl: Build Program Failure")
ErrMapFailure = errors.New("cl: Map Failure")
ErrMisalignedSubBufferOffset = errors.New("cl: Misaligned Sub Buffer Offset")
ErrExecStatusErrorForEventsInWaitList = errors.New("cl: Exec Status Error For Events In Wait List")
ErrCompileProgramFailure = errors.New("cl: Compile Program Failure")
ErrLinkerNotAvailable = errors.New("cl: Linker Not Available")
ErrLinkProgramFailure = errors.New("cl: Link Program Failure")
ErrDevicePartitionFailed = errors.New("cl: Device Partition Failed")
ErrKernelArgInfoNotAvailable = errors.New("cl: Kernel Arg Info Not Available")
ErrInvalidValue = errors.New("cl: Invalid Value")
ErrInvalidDeviceType = errors.New("cl: Invalid Device Type")
ErrInvalidPlatform = errors.New("cl: Invalid Platform")
ErrInvalidDevice = errors.New("cl: Invalid Device")
ErrInvalidContext = errors.New("cl: Invalid Context")
ErrInvalidQueueProperties = errors.New("cl: Invalid Queue Properties")
ErrInvalidCommandQueue = errors.New("cl: Invalid Command Queue")
ErrInvalidHostPtr = errors.New("cl: Invalid Host Ptr")
ErrInvalidMemObject = errors.New("cl: Invalid Mem Object")
ErrInvalidImageFormatDescriptor = errors.New("cl: Invalid Image Format Descriptor")
ErrInvalidImageSize = errors.New("cl: Invalid Image Size")
ErrInvalidSampler = errors.New("cl: Invalid Sampler")
ErrInvalidBinary = errors.New("cl: Invalid Binary")
ErrInvalidBuildOptions = errors.New("cl: Invalid Build Options")
ErrInvalidProgram = errors.New("cl: Invalid Program")
ErrInvalidProgramExecutable = errors.New("cl: Invalid Program Executable")
ErrInvalidKernelName = errors.New("cl: Invalid Kernel Name")
ErrInvalidKernelDefinition = errors.New("cl: Invalid Kernel Definition")
ErrInvalidKernel = errors.New("cl: Invalid Kernel")
ErrInvalidArgIndex = errors.New("cl: Invalid Arg Index")
ErrInvalidArgValue = errors.New("cl: Invalid Arg Value")
ErrInvalidArgSize = errors.New("cl: Invalid Arg Size")
ErrInvalidKernelArgs = errors.New("cl: Invalid Kernel Args")
ErrInvalidWorkDimension = errors.New("cl: Invalid Work Dimension")
ErrInvalidWorkGroupSize = errors.New("cl: Invalid Work Group Size")
ErrInvalidWorkItemSize = errors.New("cl: Invalid Work Item Size")
ErrInvalidGlobalOffset = errors.New("cl: Invalid Global Offset")
ErrInvalidEventWaitList = errors.New("cl: Invalid Event Wait List")
ErrInvalidEvent = errors.New("cl: Invalid Event")
ErrInvalidOperation = errors.New("cl: Invalid Operation")
ErrInvalidGlObject = errors.New("cl: Invalid Gl Object")
ErrInvalidBufferSize = errors.New("cl: Invalid Buffer Size")
ErrInvalidMipLevel = errors.New("cl: Invalid Mip Level")
ErrInvalidGlobalWorkSize = errors.New("cl: Invalid Global Work Size")
ErrInvalidProperty = errors.New("cl: Invalid Property")
ErrInvalidImageDescriptor = errors.New("cl: Invalid Image Descriptor")
ErrInvalidCompilerOptions = errors.New("cl: Invalid Compiler Options")
ErrInvalidLinkerOptions = errors.New("cl: Invalid Linker Options")
ErrInvalidDevicePartitionCount = errors.New("cl: Invalid Device Partition Count")
)
var errorMap = map[C.cl_int]error{
C.CL_SUCCESS: nil,
C.CL_DEVICE_NOT_FOUND: ErrDeviceNotFound,
C.CL_DEVICE_NOT_AVAILABLE: ErrDeviceNotAvailable,
C.CL_COMPILER_NOT_AVAILABLE: ErrCompilerNotAvailable,
C.CL_MEM_OBJECT_ALLOCATION_FAILURE: ErrMemObjectAllocationFailure,
C.CL_OUT_OF_RESOURCES: ErrOutOfResources,
C.CL_OUT_OF_HOST_MEMORY: ErrOutOfHostMemory,
C.CL_PROFILING_INFO_NOT_AVAILABLE: ErrProfilingInfoNotAvailable,
C.CL_MEM_COPY_OVERLAP: ErrMemCopyOverlap,
C.CL_IMAGE_FORMAT_MISMATCH: ErrImageFormatMismatch,
C.CL_IMAGE_FORMAT_NOT_SUPPORTED: ErrImageFormatNotSupported,
C.CL_BUILD_PROGRAM_FAILURE: ErrBuildProgramFailure,
C.CL_MAP_FAILURE: ErrMapFailure,
C.CL_MISALIGNED_SUB_BUFFER_OFFSET: ErrMisalignedSubBufferOffset,
C.CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST: ErrExecStatusErrorForEventsInWaitList,
C.CL_INVALID_VALUE: ErrInvalidValue,
C.CL_INVALID_DEVICE_TYPE: ErrInvalidDeviceType,
C.CL_INVALID_PLATFORM: ErrInvalidPlatform,
C.CL_INVALID_DEVICE: ErrInvalidDevice,
C.CL_INVALID_CONTEXT: ErrInvalidContext,
C.CL_INVALID_QUEUE_PROPERTIES: ErrInvalidQueueProperties,
C.CL_INVALID_COMMAND_QUEUE: ErrInvalidCommandQueue,
C.CL_INVALID_HOST_PTR: ErrInvalidHostPtr,
C.CL_INVALID_MEM_OBJECT: ErrInvalidMemObject,
C.CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: ErrInvalidImageFormatDescriptor,
C.CL_INVALID_IMAGE_SIZE: ErrInvalidImageSize,
C.CL_INVALID_SAMPLER: ErrInvalidSampler,
C.CL_INVALID_BINARY: ErrInvalidBinary,
C.CL_INVALID_BUILD_OPTIONS: ErrInvalidBuildOptions,
C.CL_INVALID_PROGRAM: ErrInvalidProgram,
C.CL_INVALID_PROGRAM_EXECUTABLE: ErrInvalidProgramExecutable,
C.CL_INVALID_KERNEL_NAME: ErrInvalidKernelName,
C.CL_INVALID_KERNEL_DEFINITION: ErrInvalidKernelDefinition,
C.CL_INVALID_KERNEL: ErrInvalidKernel,
C.CL_INVALID_ARG_INDEX: ErrInvalidArgIndex,
C.CL_INVALID_ARG_VALUE: ErrInvalidArgValue,
C.CL_INVALID_ARG_SIZE: ErrInvalidArgSize,
C.CL_INVALID_KERNEL_ARGS: ErrInvalidKernelArgs,
C.CL_INVALID_WORK_DIMENSION: ErrInvalidWorkDimension,
C.CL_INVALID_WORK_GROUP_SIZE: ErrInvalidWorkGroupSize,
C.CL_INVALID_WORK_ITEM_SIZE: ErrInvalidWorkItemSize,
C.CL_INVALID_GLOBAL_OFFSET: ErrInvalidGlobalOffset,
C.CL_INVALID_EVENT_WAIT_LIST: ErrInvalidEventWaitList,
C.CL_INVALID_EVENT: ErrInvalidEvent,
C.CL_INVALID_OPERATION: ErrInvalidOperation,
C.CL_INVALID_GL_OBJECT: ErrInvalidGlObject,
C.CL_INVALID_BUFFER_SIZE: ErrInvalidBufferSize,
C.CL_INVALID_MIP_LEVEL: ErrInvalidMipLevel,
C.CL_INVALID_GLOBAL_WORK_SIZE: ErrInvalidGlobalWorkSize,
C.CL_INVALID_PROPERTY: ErrInvalidProperty,
}
func toError(code C.cl_int) error {
if err, ok := errorMap[code]; ok {
return err
}
return ErrOther(code)
}
type LocalMemType int
const (
LocalMemTypeNone LocalMemType = C.CL_NONE
LocalMemTypeGlobal LocalMemType = C.CL_GLOBAL
LocalMemTypeLocal LocalMemType = C.CL_LOCAL
)
var localMemTypeMap = map[LocalMemType]string{
LocalMemTypeNone: "None",
LocalMemTypeGlobal: "Global",
LocalMemTypeLocal: "Local",
}
func (t LocalMemType) String() string {
name := localMemTypeMap[t]
if name == "" {
name = "Unknown"
}
return name
}
type ExecCapability int
const (
ExecCapabilityKernel ExecCapability = C.CL_EXEC_KERNEL // The OpenCL device can execute OpenCL kernels.
ExecCapabilityNativeKernel ExecCapability = C.CL_EXEC_NATIVE_KERNEL // The OpenCL device can execute native kernels.
)
func (ec ExecCapability) String() string {
var parts []string
if ec&ExecCapabilityKernel != 0 {
parts = append(parts, "Kernel")
}
if ec&ExecCapabilityNativeKernel != 0 {
parts = append(parts, "NativeKernel")
}
if parts == nil {
return ""
}
return strings.Join(parts, "|")
}
type MemCacheType int
const (
MemCacheTypeNone MemCacheType = C.CL_NONE
MemCacheTypeReadOnlyCache MemCacheType = C.CL_READ_ONLY_CACHE
MemCacheTypeReadWriteCache MemCacheType = C.CL_READ_WRITE_CACHE
)
func (ct MemCacheType) String() string {
switch ct {
case MemCacheTypeNone:
return "None"
case MemCacheTypeReadOnlyCache:
return "ReadOnly"
case MemCacheTypeReadWriteCache:
return "ReadWrite"
}
return fmt.Sprintf("Unknown(%x)", int(ct))
}
type MemFlag int
const (
MemReadWrite MemFlag = C.CL_MEM_READ_WRITE
MemWriteOnly MemFlag = C.CL_MEM_WRITE_ONLY
MemReadOnly MemFlag = C.CL_MEM_READ_ONLY
MemUseHostPtr MemFlag = C.CL_MEM_USE_HOST_PTR
MemAllocHostPtr MemFlag = C.CL_MEM_ALLOC_HOST_PTR
MemCopyHostPtr MemFlag = C.CL_MEM_COPY_HOST_PTR
MemWriteOnlyHost MemFlag = C.CL_MEM_HOST_WRITE_ONLY
MemReadOnlyHost MemFlag = C.CL_MEM_HOST_READ_ONLY
MemNoAccessHost MemFlag = C.CL_MEM_HOST_NO_ACCESS
)
type MemObjectType int
const (
MemObjectTypeBuffer MemObjectType = C.CL_MEM_OBJECT_BUFFER
MemObjectTypeImage2D MemObjectType = C.CL_MEM_OBJECT_IMAGE2D
MemObjectTypeImage3D MemObjectType = C.CL_MEM_OBJECT_IMAGE3D
)
type MapFlag int
const (
// This flag specifies that the region being mapped in the memory object is being mapped for reading.
MapFlagRead MapFlag = C.CL_MAP_READ
MapFlagWrite MapFlag = C.CL_MAP_WRITE
MapFlagWriteInvalidateRegion MapFlag = C.CL_MAP_WRITE_INVALIDATE_REGION
)
func (mf MapFlag) toCl() C.cl_map_flags {
return C.cl_map_flags(mf)
}
type ChannelOrder int
const (
ChannelOrderR ChannelOrder = C.CL_R
ChannelOrderA ChannelOrder = C.CL_A
ChannelOrderRG ChannelOrder = C.CL_RG
ChannelOrderRA ChannelOrder = C.CL_RA
ChannelOrderRGB ChannelOrder = C.CL_RGB
ChannelOrderRGBA ChannelOrder = C.CL_RGBA
ChannelOrderBGRA ChannelOrder = C.CL_BGRA
ChannelOrderARGB ChannelOrder = C.CL_ARGB
ChannelOrderIntensity ChannelOrder = C.CL_INTENSITY
ChannelOrderLuminance ChannelOrder = C.CL_LUMINANCE
ChannelOrderRx ChannelOrder = C.CL_Rx
ChannelOrderRGx ChannelOrder = C.CL_RGx
ChannelOrderRGBx ChannelOrder = C.CL_RGBx
)
var channelOrderNameMap = map[ChannelOrder]string{
ChannelOrderR: "R",
ChannelOrderA: "A",
ChannelOrderRG: "RG",
ChannelOrderRA: "RA",
ChannelOrderRGB: "RGB",
ChannelOrderRGBA: "RGBA",
ChannelOrderBGRA: "BGRA",
ChannelOrderARGB: "ARGB",
ChannelOrderIntensity: "Intensity",
ChannelOrderLuminance: "Luminance",
ChannelOrderRx: "Rx",
ChannelOrderRGx: "RGx",
ChannelOrderRGBx: "RGBx",
}
func (co ChannelOrder) String() string {
name := channelOrderNameMap[co]
if name == "" {
name = fmt.Sprintf("Unknown(%x)", int(co))
}
return name
}
type ChannelDataType int
const (
ChannelDataTypeSNormInt8 ChannelDataType = C.CL_SNORM_INT8
ChannelDataTypeSNormInt16 ChannelDataType = C.CL_SNORM_INT16
ChannelDataTypeUNormInt8 ChannelDataType = C.CL_UNORM_INT8
ChannelDataTypeUNormInt16 ChannelDataType = C.CL_UNORM_INT16
ChannelDataTypeUNormShort565 ChannelDataType = C.CL_UNORM_SHORT_565
ChannelDataTypeUNormShort555 ChannelDataType = C.CL_UNORM_SHORT_555
ChannelDataTypeUNormInt101010 ChannelDataType = C.CL_UNORM_INT_101010
ChannelDataTypeSignedInt8 ChannelDataType = C.CL_SIGNED_INT8
ChannelDataTypeSignedInt16 ChannelDataType = C.CL_SIGNED_INT16
ChannelDataTypeSignedInt32 ChannelDataType = C.CL_SIGNED_INT32
ChannelDataTypeUnsignedInt8 ChannelDataType = C.CL_UNSIGNED_INT8
ChannelDataTypeUnsignedInt16 ChannelDataType = C.CL_UNSIGNED_INT16
ChannelDataTypeUnsignedInt32 ChannelDataType = C.CL_UNSIGNED_INT32
ChannelDataTypeHalfFloat ChannelDataType = C.CL_HALF_FLOAT
ChannelDataTypeFloat ChannelDataType = C.CL_FLOAT
)
var channelDataTypeNameMap = map[ChannelDataType]string{
ChannelDataTypeSNormInt8: "SNormInt8",
ChannelDataTypeSNormInt16: "SNormInt16",
ChannelDataTypeUNormInt8: "UNormInt8",
ChannelDataTypeUNormInt16: "UNormInt16",
ChannelDataTypeUNormShort565: "UNormShort565",
ChannelDataTypeUNormShort555: "UNormShort555",
ChannelDataTypeUNormInt101010: "UNormInt101010",
ChannelDataTypeSignedInt8: "SignedInt8",
ChannelDataTypeSignedInt16: "SignedInt16",
ChannelDataTypeSignedInt32: "SignedInt32",
ChannelDataTypeUnsignedInt8: "UnsignedInt8",
ChannelDataTypeUnsignedInt16: "UnsignedInt16",
ChannelDataTypeUnsignedInt32: "UnsignedInt32",
ChannelDataTypeHalfFloat: "HalfFloat",
ChannelDataTypeFloat: "Float",
}
func (ct ChannelDataType) String() string {
name := channelDataTypeNameMap[ct]
if name == "" {
name = fmt.Sprintf("Unknown(%x)", int(ct))
}
return name
}
type ImageFormat struct {
ChannelOrder ChannelOrder
ChannelDataType ChannelDataType
}
func (f ImageFormat) toCl() C.cl_image_format {
var format C.cl_image_format
format.image_channel_order = C.cl_channel_order(f.ChannelOrder)
format.image_channel_data_type = C.cl_channel_type(f.ChannelDataType)
return format
}
type ProfilingInfo int
const (
// A 64-bit value that describes the current device time counter in
// nanoseconds when the command identified by event is enqueued in
// a command-queue by the host.
ProfilingInfoCommandQueued ProfilingInfo = C.CL_PROFILING_COMMAND_QUEUED
// A 64-bit value that describes the current device time counter in
// nanoseconds when the command identified by event that has been
// enqueued is submitted by the host to the device associated with the command-queue.
ProfilingInfoCommandSubmit ProfilingInfo = C.CL_PROFILING_COMMAND_SUBMIT
// A 64-bit value that describes the current device time counter in
// nanoseconds when the command identified by event starts execution on the device.
ProfilingInfoCommandStart ProfilingInfo = C.CL_PROFILING_COMMAND_START
// A 64-bit value that describes the current device time counter in
// nanoseconds when the command identified by event has finished
// execution on the device.
ProfilingInfoCommandEnd ProfilingInfo = C.CL_PROFILING_COMMAND_END
)
type CommmandExecStatus int
const (
CommmandExecStatusComplete CommmandExecStatus = C.CL_COMPLETE
CommmandExecStatusRunning CommmandExecStatus = C.CL_RUNNING
CommmandExecStatusSubmitted CommmandExecStatus = C.CL_SUBMITTED
CommmandExecStatusQueued CommmandExecStatus = C.CL_QUEUED
)
type Event struct {
clEvent C.cl_event
}
func releaseEvent(ev *Event) {
if ev.clEvent != nil {
C.clReleaseEvent(ev.clEvent)
ev.clEvent = nil
}
}
func (e *Event) Release() {
releaseEvent(e)
}
func (e *Event) GetEventProfilingInfo(paramName ProfilingInfo) (int64, error) {
var paramValue C.cl_ulong
if err := C.clGetEventProfilingInfo(e.clEvent, C.cl_profiling_info(paramName), C.size_t(unsafe.Sizeof(paramValue)), unsafe.Pointer(&paramValue), nil); err != C.CL_SUCCESS {
return 0, toError(err)
}
return int64(paramValue), nil
}
// Sets the execution status of a user event object.
//
// `status` specifies the new execution status to be set and
// can be CL_COMPLETE or a negative integer value to indicate
// an error. A negative integer value causes all enqueued commands
// that wait on this user event to be terminated. clSetUserEventStatus
// can only be called once to change the execution status of event.
func (e *Event) SetUserEventStatus(status int) error {
return toError(C.clSetUserEventStatus(e.clEvent, C.cl_int(status)))
}
// Waits on the host thread for commands identified by event objects in
// events to complete. A command is considered complete if its execution
// status is CL_COMPLETE or a negative value. The events specified in
// event_list act as synchronization points.
//
// If the cl_khr_gl_event extension is enabled, event objects can also be
// used to reflect the status of an OpenGL sync object. The sync object
// in turn refers to a fence command executing in an OpenGL command
// stream. This provides another method of coordinating sharing of buffers
// and images between OpenGL and OpenCL.
func WaitForEvents(events []*Event) error {
return toError(C.clWaitForEvents(C.cl_uint(len(events)), eventListPtr(events)))
}
func newEvent(clEvent C.cl_event) *Event {
ev := &Event{clEvent: clEvent}
runtime.SetFinalizer(ev, releaseEvent)
return ev
}
func eventListPtr(el []*Event) *C.cl_event {
if el == nil {
return nil
}
elist := make([]C.cl_event, len(el))
for i, e := range el {
elist[i] = e.clEvent
}
return (*C.cl_event)(&elist[0])
}
func clBool(b bool) C.cl_bool {
if b {
return C.CL_TRUE
}
return C.CL_FALSE
}
func sizeT3(i3 [3]int) [3]C.size_t {
var val [3]C.size_t
val[0] = C.size_t(i3[0])
val[1] = C.size_t(i3[1])
val[2] = C.size_t(i3[2])
return val
}
type MappedMemObject struct {
ptr unsafe.Pointer
size int
rowPitch int
slicePitch int
}
func (mb *MappedMemObject) ByteSlice() []byte {
var byteSlice []byte
sliceHeader := (*reflect.SliceHeader)(unsafe.Pointer(&byteSlice))
sliceHeader.Cap = mb.size
sliceHeader.Len = mb.size
sliceHeader.Data = uintptr(mb.ptr)
return byteSlice
}
func (mb *MappedMemObject) Ptr() unsafe.Pointer {
return mb.ptr
}
func (mb *MappedMemObject) Size() int {
return mb.size
}
func (mb *MappedMemObject) RowPitch() int {
return mb.rowPitch
}
func (mb *MappedMemObject) SlicePitch() int {
return mb.slicePitch
}

@ -0,0 +1,71 @@
// +build cl12
package cl
// #ifdef __APPLE__
// #include "OpenCL/opencl.h"
// #else
// #include "cl.h"
// #endif
import "C"
const (
ChannelDataTypeUNormInt24 ChannelDataType = C.CL_UNORM_INT24
ChannelOrderDepth ChannelOrder = C.CL_DEPTH
ChannelOrderDepthStencil ChannelOrder = C.CL_DEPTH_STENCIL
MemHostNoAccess MemFlag = C.CL_MEM_HOST_NO_ACCESS // OpenCL 1.2
MemHostReadOnly MemFlag = C.CL_MEM_HOST_READ_ONLY // OpenCL 1.2
MemHostWriteOnly MemFlag = C.CL_MEM_HOST_WRITE_ONLY // OpenCL 1.2
MemObjectTypeImage1D MemObjectType = C.CL_MEM_OBJECT_IMAGE1D
MemObjectTypeImage1DArray MemObjectType = C.CL_MEM_OBJECT_IMAGE1D_ARRAY
MemObjectTypeImage1DBuffer MemObjectType = C.CL_MEM_OBJECT_IMAGE1D_BUFFER
MemObjectTypeImage2DArray MemObjectType = C.CL_MEM_OBJECT_IMAGE2D_ARRAY
// This flag specifies that the region being mapped in the memory object is being mapped for writing.
//
// The contents of the region being mapped are to be discarded. This is typically the case when the
// region being mapped is overwritten by the host. This flag allows the implementation to no longer
// guarantee that the pointer returned by clEnqueueMapBuffer or clEnqueueMapImage contains the
// latest bits in the region being mapped which can be a significant performance enhancement.
MapFlagWriteInvalidateRegion MapFlag = C.CL_MAP_WRITE_INVALIDATE_REGION
)
func init() {
errorMap[C.CL_COMPILE_PROGRAM_FAILURE] = ErrCompileProgramFailure
errorMap[C.CL_DEVICE_PARTITION_FAILED] = ErrDevicePartitionFailed
errorMap[C.CL_INVALID_COMPILER_OPTIONS] = ErrInvalidCompilerOptions
errorMap[C.CL_INVALID_DEVICE_PARTITION_COUNT] = ErrInvalidDevicePartitionCount
errorMap[C.CL_INVALID_IMAGE_DESCRIPTOR] = ErrInvalidImageDescriptor
errorMap[C.CL_INVALID_LINKER_OPTIONS] = ErrInvalidLinkerOptions
errorMap[C.CL_KERNEL_ARG_INFO_NOT_AVAILABLE] = ErrKernelArgInfoNotAvailable
errorMap[C.CL_LINK_PROGRAM_FAILURE] = ErrLinkProgramFailure
errorMap[C.CL_LINKER_NOT_AVAILABLE] = ErrLinkerNotAvailable
channelOrderNameMap[ChannelOrderDepth] = "Depth"
channelOrderNameMap[ChannelOrderDepthStencil] = "DepthStencil"
channelDataTypeNameMap[ChannelDataTypeUNormInt24] = "UNormInt24"
}
type ImageDescription struct {
Type MemObjectType
Width, Height, Depth int
ArraySize, RowPitch, SlicePitch int
NumMipLevels, NumSamples int
Buffer *MemObject
}
func (d ImageDescription) toCl() C.cl_image_desc {
var desc C.cl_image_desc
desc.image_type = C.cl_mem_object_type(d.Type)
desc.image_width = C.size_t(d.Width)
desc.image_height = C.size_t(d.Height)
desc.image_depth = C.size_t(d.Depth)
desc.image_array_size = C.size_t(d.ArraySize)
desc.image_row_pitch = C.size_t(d.RowPitch)
desc.image_slice_pitch = C.size_t(d.SlicePitch)
desc.num_mip_levels = C.cl_uint(d.NumMipLevels)
desc.num_samples = C.cl_uint(d.NumSamples)
desc.buffer = nil
if d.Buffer != nil {
desc.buffer = d.Buffer.clMem
}
return desc
}

@ -0,0 +1,45 @@
package cl
// #ifdef __APPLE__
// #include "OpenCL/opencl.h"
// #else
// #include "cl.h"
// #endif
import "C"
// Extension: cl_APPLE_fixed_alpha_channel_orders
//
// These selectors may be passed to clCreateImage2D() in the cl_image_format.image_channel_order field.
// They are like CL_BGRA and CL_ARGB except that the alpha channel to be ignored. On calls to read_imagef,
// the alpha will be 0xff (1.0f) if the sample falls in the image and 0 if it does not fall in the image.
// On calls to write_imagef, the alpha value is ignored and 0xff (1.0f) is written. These formats are
// currently only available for the CL_UNORM_INT8 cl_channel_type. They are intended to support legacy
// image formats.
const (
ChannelOrder1RGBApple ChannelOrder = C.CL_1RGB_APPLE // Introduced in MacOS X.7.
ChannelOrderBGR1Apple ChannelOrder = C.CL_BGR1_APPLE // Introduced in MacOS X.7.
)
// Extension: cl_APPLE_biased_fixed_point_image_formats
//
// This selector may be passed to clCreateImage2D() in the cl_image_format.image_channel_data_type field.
// It defines a biased signed 1.14 fixed point storage format, with range [-1, 3). The conversion from
// float to this fixed point format is defined as follows:
//
// ushort float_to_sfixed14( float x ){
// int i = convert_int_sat_rte( x * 0x1.0p14f ); // scale [-1, 3.0) to [-16384, 3*16384), round to nearest integer
// i = add_sat( i, 0x4000 ); // apply bias, to convert to [0, 65535) range
// return convert_ushort_sat(i); // clamp to destination size
// }
//
// The inverse conversion is the reverse process. The formats are currently only available on the CPU with
// the CL_RGBA channel layout.
const (
ChannelDataTypeSFixed14Apple ChannelDataType = C.CL_SFIXED14_APPLE // Introduced in MacOS X.7.
)
func init() {
channelOrderNameMap[ChannelOrder1RGBApple] = "1RGBApple"
channelOrderNameMap[ChannelOrderBGR1Apple] = "RGB1Apple"
channelDataTypeNameMap[ChannelDataTypeSFixed14Apple] = "SFixed14Apple"
}

@ -30,8 +30,8 @@ import (
) )
var ( var (
minDifficulty = new(big.Int).Exp(big.NewInt(2), big.NewInt(256), big.NewInt(0)) maxUint256 = new(big.Int).Exp(big.NewInt(2), big.NewInt(256), big.NewInt(0))
sharedLight = new(Light) sharedLight = new(Light)
) )
const ( const (
@ -140,7 +140,7 @@ func (l *Light) Verify(block pow.Block) bool {
// the finalizer before the call completes. // the finalizer before the call completes.
_ = cache _ = cache
// The actual check. // The actual check.
target := new(big.Int).Div(minDifficulty, difficulty) target := new(big.Int).Div(maxUint256, difficulty)
return h256ToHash(ret.result).Big().Cmp(target) <= 0 return h256ToHash(ret.result).Big().Cmp(target) <= 0
} }
@ -199,7 +199,7 @@ func (d *dag) generate() {
if d.dir == "" { if d.dir == "" {
d.dir = DefaultDir d.dir = DefaultDir
} }
glog.V(logger.Info).Infof("Generating DAG for epoch %d (%x)", d.epoch, seedHash) glog.V(logger.Info).Infof("Generating DAG for epoch %d (size %d) (%x)", d.epoch, dagSize, seedHash)
// Generate a temporary cache. // Generate a temporary cache.
// TODO: this could share the cache with Light // TODO: this could share the cache with Light
cache := C.ethash_light_new_internal(cacheSize, (*C.ethash_h256_t)(unsafe.Pointer(&seedHash[0]))) cache := C.ethash_light_new_internal(cacheSize, (*C.ethash_h256_t)(unsafe.Pointer(&seedHash[0])))
@ -220,14 +220,18 @@ func (d *dag) generate() {
}) })
} }
func freeDAG(h *dag) { func freeDAG(d *dag) {
C.ethash_full_delete(h.ptr) C.ethash_full_delete(d.ptr)
h.ptr = nil d.ptr = nil
}
func (d *dag) Ptr() unsafe.Pointer {
return unsafe.Pointer(d.ptr.data)
} }
//export ethashGoCallback //export ethashGoCallback
func ethashGoCallback(percent C.unsigned) C.int { func ethashGoCallback(percent C.unsigned) C.int {
glog.V(logger.Info).Infof("Still generating DAG: %d%%", percent) glog.V(logger.Info).Infof("Generating DAG: %d%%", percent)
return 0 return 0
} }
@ -273,7 +277,7 @@ func (pow *Full) getDAG(blockNum uint64) (d *dag) {
return d return d
} }
func (pow *Full) Search(block pow.Block, stop <-chan struct{}) (nonce uint64, mixDigest []byte) { func (pow *Full) Search(block pow.Block, stop <-chan struct{}, index int) (nonce uint64, mixDigest []byte) {
dag := pow.getDAG(block.NumberU64()) dag := pow.getDAG(block.NumberU64())
r := rand.New(rand.NewSource(time.Now().UnixNano())) r := rand.New(rand.NewSource(time.Now().UnixNano()))
@ -286,7 +290,7 @@ func (pow *Full) Search(block pow.Block, stop <-chan struct{}) (nonce uint64, mi
nonce = uint64(r.Int63()) nonce = uint64(r.Int63())
hash := hashToH256(block.HashNoNonce()) hash := hashToH256(block.HashNoNonce())
target := new(big.Int).Div(minDifficulty, diff) target := new(big.Int).Div(maxUint256, diff)
for { for {
select { select {
case <-stop: case <-stop:

@ -0,0 +1,629 @@
// Copyright 2014 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
// +build opencl
package ethash
//#cgo LDFLAGS: -w
//#include <stdint.h>
//#include <string.h>
//#include "src/libethash/internal.h"
import "C"
import (
crand "crypto/rand"
"encoding/binary"
"fmt"
"math"
"math/big"
mrand "math/rand"
"strconv"
"strings"
"sync"
"sync/atomic"
"time"
"unsafe"
"github.com/Gustav-Simonsson/go-opencl/cl"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/pow"
)
/*
This code have two main entry points:
1. The initCL(...) function configures one or more OpenCL device
(for now only GPU) and loads the Ethash DAG onto device memory
2. The Search(...) function loads a Ethash nonce into device(s) memory and
executes the Ethash OpenCL kernel.
Throughout the code, we refer to "host memory" and "device memory".
For most systems (e.g. regular PC GPU miner) the host memory is RAM and
device memory is the GPU global memory (e.g. GDDR5).
References mentioned in code comments:
1. https://github.com/ethereum/wiki/wiki/Ethash
2. https://github.com/ethereum/cpp-ethereum/blob/develop/libethash-cl/ethash_cl_miner.cpp
3. https://www.khronos.org/registry/cl/sdk/1.2/docs/man/xhtml/
4. http://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2013/12/AMD_OpenCL_Programming_User_Guide.pdf
*/
type OpenCLDevice struct {
deviceId int
device *cl.Device
openCL11 bool // OpenCL version 1.1 and 1.2 are handled a bit different
openCL12 bool
dagBuf *cl.MemObject // Ethash full DAG in device mem
headerBuf *cl.MemObject // Hash of block-to-mine in device mem
searchBuffers []*cl.MemObject
searchKernel *cl.Kernel
hashKernel *cl.Kernel
queue *cl.CommandQueue
ctx *cl.Context
workGroupSize int
nonceRand *mrand.Rand // seeded by crypto/rand, see comments where it's initialised
result common.Hash
}
type OpenCLMiner struct {
mu sync.Mutex
ethash *Ethash // Ethash full DAG & cache in host mem
deviceIds []int
devices []*OpenCLDevice
dagSize uint64
hashRate int32 // Go atomics & uint64 have some issues; int32 is supported on all platforms
}
type pendingSearch struct {
bufIndex uint32
startNonce uint64
}
const (
SIZEOF_UINT32 = 4
// See [1]
ethashMixBytesLen = 128
ethashAccesses = 64
// See [4]
workGroupSize = 32 // must be multiple of 8
maxSearchResults = 63
searchBufSize = 2
globalWorkSize = 1024 * 256
)
func NewCL(deviceIds []int) *OpenCLMiner {
ids := make([]int, len(deviceIds))
copy(ids, deviceIds)
return &OpenCLMiner{
ethash: New(),
dagSize: 0, // to see if we need to update DAG.
deviceIds: ids,
}
}
func PrintDevices() {
fmt.Println("=============================================")
fmt.Println("============ OpenCL Device Info =============")
fmt.Println("=============================================")
var found []*cl.Device
platforms, err := cl.GetPlatforms()
if err != nil {
fmt.Println("Plaform error (check your OpenCL installation): %v", err)
return
}
for i, p := range platforms {
fmt.Println("Platform id ", i)
fmt.Println("Platform Name ", p.Name())
fmt.Println("Platform Vendor ", p.Vendor())
fmt.Println("Platform Version ", p.Version())
fmt.Println("Platform Extensions ", p.Extensions())
fmt.Println("Platform Profile ", p.Profile())
fmt.Println("")
devices, err := cl.GetDevices(p, cl.DeviceTypeGPU)
if err != nil {
fmt.Println("Device error (check your GPU drivers) :", err)
return
}
for _, d := range devices {
fmt.Println("Device OpenCL id ", i)
fmt.Println("Device id for mining ", len(found))
fmt.Println("Device Name ", d.Name())
fmt.Println("Vendor ", d.Vendor())
fmt.Println("Version ", d.Version())
fmt.Println("Driver version ", d.DriverVersion())
fmt.Println("Address bits ", d.AddressBits())
fmt.Println("Max clock freq ", d.MaxClockFrequency())
fmt.Println("Global mem size ", d.GlobalMemSize())
fmt.Println("Max constant buffer size", d.MaxConstantBufferSize())
fmt.Println("Max mem alloc size ", d.MaxMemAllocSize())
fmt.Println("Max compute units ", d.MaxComputeUnits())
fmt.Println("Max work group size ", d.MaxWorkGroupSize())
fmt.Println("Max work item sizes ", d.MaxWorkItemSizes())
fmt.Println("=============================================")
found = append(found, d)
}
}
if len(found) == 0 {
fmt.Println("Found no GPU(s). Check that your OS can see the GPU(s)")
} else {
var idsFormat string
for i := 0; i < len(found); i++ {
idsFormat += strconv.Itoa(i)
if i != len(found)-1 {
idsFormat += ","
}
}
fmt.Printf("Found %v devices. Benchmark first GPU: geth gpubench 0\n", len(found))
fmt.Printf("Mine using all GPUs: geth --minegpu %v\n", idsFormat)
}
}
// See [2]. We basically do the same here, but the Go OpenCL bindings
// are at a slightly higher abtraction level.
func InitCL(blockNum uint64, c *OpenCLMiner) error {
platforms, err := cl.GetPlatforms()
if err != nil {
return fmt.Errorf("Plaform error: %v\nCheck your OpenCL installation and then run geth gpuinfo", err)
}
var devices []*cl.Device
for _, p := range platforms {
ds, err := cl.GetDevices(p, cl.DeviceTypeGPU)
if err != nil {
return fmt.Errorf("Devices error: %v\nCheck your GPU drivers and then run geth gpuinfo", err)
}
for _, d := range ds {
devices = append(devices, d)
}
}
pow := New()
_ = pow.getDAG(blockNum) // generates DAG if we don't have it
pow.Light.getCache(blockNum) // and cache
c.ethash = pow
dagSize := uint64(C.ethash_get_datasize(C.uint64_t(blockNum)))
c.dagSize = dagSize
for _, id := range c.deviceIds {
if id > len(devices)-1 {
return fmt.Errorf("Device id not found. See available device ids with: geth gpuinfo")
} else {
err := initCLDevice(id, devices[id], c)
if err != nil {
return err
}
}
}
if len(c.devices) == 0 {
return fmt.Errorf("No GPU devices found")
}
return nil
}
func initCLDevice(deviceId int, device *cl.Device, c *OpenCLMiner) error {
devMaxAlloc := uint64(device.MaxMemAllocSize())
devGlobalMem := uint64(device.GlobalMemSize())
// TODO: more fine grained version logic
if device.Version() == "OpenCL 1.0" {
fmt.Println("Device OpenCL version not supported: ", device.Version())
return fmt.Errorf("opencl version not supported")
}
var cl11, cl12 bool
if device.Version() == "OpenCL 1.1" {
cl11 = true
}
if device.Version() == "OpenCL 1.2" {
cl12 = true
}
// log warnings but carry on; some device drivers report inaccurate values
if c.dagSize > devGlobalMem {
fmt.Printf("WARNING: device memory may be insufficient: %v. DAG size: %v.\n", devGlobalMem, c.dagSize)
}
if c.dagSize > devMaxAlloc {
fmt.Printf("WARNING: DAG size (%v) larger than device max memory allocation size (%v).\n", c.dagSize, devMaxAlloc)
fmt.Printf("You probably have to export GPU_MAX_ALLOC_PERCENT=95\n")
}
fmt.Printf("Initialising device %v: %v\n", deviceId, device.Name())
context, err := cl.CreateContext([]*cl.Device{device})
if err != nil {
return fmt.Errorf("failed creating context:", err)
}
// TODO: test running with CL_QUEUE_PROFILING_ENABLE for profiling?
queue, err := context.CreateCommandQueue(device, 0)
if err != nil {
return fmt.Errorf("command queue err:", err)
}
// See [4] section 3.2 and [3] "clBuildProgram".
// The OpenCL kernel code is compiled at run-time.
kvs := make(map[string]string, 4)
kvs["GROUP_SIZE"] = strconv.FormatUint(workGroupSize, 10)
kvs["DAG_SIZE"] = strconv.FormatUint(c.dagSize/ethashMixBytesLen, 10)
kvs["ACCESSES"] = strconv.FormatUint(ethashAccesses, 10)
kvs["MAX_OUTPUTS"] = strconv.FormatUint(maxSearchResults, 10)
kernelCode := replaceWords(kernel, kvs)
program, err := context.CreateProgramWithSource([]string{kernelCode})
if err != nil {
return fmt.Errorf("program err:", err)
}
/* if using AMD OpenCL impl, you can set this to debug on x86 CPU device.
see AMD OpenCL programming guide section 4.2
export in shell before running:
export AMD_OCL_BUILD_OPTIONS_APPEND="-g -O0"
export CPU_MAX_COMPUTE_UNITS=1
buildOpts := "-g -cl-opt-disable"
*/
buildOpts := ""
err = program.BuildProgram([]*cl.Device{device}, buildOpts)
if err != nil {
return fmt.Errorf("program build err:", err)
}
var searchKernelName, hashKernelName string
searchKernelName = "ethash_search"
hashKernelName = "ethash_hash"
searchKernel, err := program.CreateKernel(searchKernelName)
hashKernel, err := program.CreateKernel(hashKernelName)
if err != nil {
return fmt.Errorf("kernel err:", err)
}
// TODO: when this DAG size appears, patch the Go bindings
// (context.go) to work with uint64 as size_t
if c.dagSize > math.MaxInt32 {
fmt.Println("DAG too large for allocation.")
return fmt.Errorf("DAG too large for alloc")
}
// TODO: patch up Go bindings to work with size_t, will overflow if > maxint32
// TODO: fuck. shit's gonna overflow around 2017-06-09 12:17:02
dagBuf := *(new(*cl.MemObject))
dagBuf, err = context.CreateEmptyBuffer(cl.MemReadOnly, int(c.dagSize))
if err != nil {
return fmt.Errorf("allocating dag buf failed: ", err)
}
// write DAG to device mem
dagPtr := unsafe.Pointer(c.ethash.Full.current.ptr.data)
_, err = queue.EnqueueWriteBuffer(dagBuf, true, 0, int(c.dagSize), dagPtr, nil)
if err != nil {
return fmt.Errorf("writing to dag buf failed: ", err)
}
searchBuffers := make([]*cl.MemObject, searchBufSize)
for i := 0; i < searchBufSize; i++ {
searchBuff, err := context.CreateEmptyBuffer(cl.MemWriteOnly, (1+maxSearchResults)*SIZEOF_UINT32)
if err != nil {
return fmt.Errorf("search buffer err:", err)
}
searchBuffers[i] = searchBuff
}
headerBuf, err := context.CreateEmptyBuffer(cl.MemReadOnly, 32)
if err != nil {
return fmt.Errorf("header buffer err:", err)
}
// Unique, random nonces are crucial for mining efficieny.
// While we do not need cryptographically secure PRNG for nonces,
// we want to have uniform distribution and minimal repetition of nonces.
// We could guarantee strict uniqueness of nonces by generating unique ranges,
// but a int64 seed from crypto/rand should be good enough.
// we then use math/rand for speed and to avoid draining OS entropy pool
seed, err := crand.Int(crand.Reader, big.NewInt(math.MaxInt64))
if err != nil {
return err
}
nonceRand := mrand.New(mrand.NewSource(seed.Int64()))
deviceStruct := &OpenCLDevice{
deviceId: deviceId,
device: device,
openCL11: cl11,
openCL12: cl12,
dagBuf: dagBuf,
headerBuf: headerBuf,
searchBuffers: searchBuffers,
searchKernel: searchKernel,
hashKernel: hashKernel,
queue: queue,
ctx: context,
workGroupSize: workGroupSize,
nonceRand: nonceRand,
}
c.devices = append(c.devices, deviceStruct)
return nil
}
func (c *OpenCLMiner) Search(block pow.Block, stop <-chan struct{}, index int) (uint64, []byte) {
c.mu.Lock()
newDagSize := uint64(C.ethash_get_datasize(C.uint64_t(block.NumberU64())))
if newDagSize > c.dagSize {
// TODO: clean up buffers from previous DAG?
err := InitCL(block.NumberU64(), c)
if err != nil {
fmt.Println("OpenCL init error: ", err)
return 0, []byte{0}
}
}
defer c.mu.Unlock()
// Avoid unneeded OpenCL initialisation if we received stop while running InitCL
select {
case <-stop:
return 0, []byte{0}
default:
}
headerHash := block.HashNoNonce()
diff := block.Difficulty()
target256 := new(big.Int).Div(maxUint256, diff)
target64 := new(big.Int).Rsh(target256, 192).Uint64()
var zero uint32 = 0
d := c.devices[index]
_, err := d.queue.EnqueueWriteBuffer(d.headerBuf, false, 0, 32, unsafe.Pointer(&headerHash[0]), nil)
if err != nil {
fmt.Println("Error in Search clEnqueueWriterBuffer : ", err)
return 0, []byte{0}
}
for i := 0; i < searchBufSize; i++ {
_, err := d.queue.EnqueueWriteBuffer(d.searchBuffers[i], false, 0, 4, unsafe.Pointer(&zero), nil)
if err != nil {
fmt.Println("Error in Search clEnqueueWriterBuffer : ", err)
return 0, []byte{0}
}
}
// wait for all search buffers to complete
err = d.queue.Finish()
if err != nil {
fmt.Println("Error in Search clFinish : ", err)
return 0, []byte{0}
}
err = d.searchKernel.SetArg(1, d.headerBuf)
if err != nil {
fmt.Println("Error in Search clSetKernelArg : ", err)
return 0, []byte{0}
}
err = d.searchKernel.SetArg(2, d.dagBuf)
if err != nil {
fmt.Println("Error in Search clSetKernelArg : ", err)
return 0, []byte{0}
}
err = d.searchKernel.SetArg(4, target64)
if err != nil {
fmt.Println("Error in Search clSetKernelArg : ", err)
return 0, []byte{0}
}
err = d.searchKernel.SetArg(5, uint32(math.MaxUint32))
if err != nil {
fmt.Println("Error in Search clSetKernelArg : ", err)
return 0, []byte{0}
}
// wait on this before returning
var preReturnEvent *cl.Event
if d.openCL12 {
preReturnEvent, err = d.ctx.CreateUserEvent()
if err != nil {
fmt.Println("Error in Search create CL user event : ", err)
return 0, []byte{0}
}
}
pending := make([]pendingSearch, 0, searchBufSize)
var p *pendingSearch
searchBufIndex := uint32(0)
var checkNonce uint64
loops := int64(0)
prevHashRate := int32(0)
start := time.Now().UnixNano()
// we grab a single random nonce and sets this as argument to the kernel search function
// the device will then add each local threads gid to the nonce, creating a unique nonce
// for each device computing unit executing in parallel
initNonce := uint64(d.nonceRand.Int63())
for nonce := initNonce; ; nonce += uint64(globalWorkSize) {
select {
case <-stop:
/*
if d.openCL12 {
err = cl.WaitForEvents([]*cl.Event{preReturnEvent})
if err != nil {
fmt.Println("Error in Search WaitForEvents: ", err)
}
}
*/
atomic.AddInt32(&c.hashRate, -prevHashRate)
return 0, []byte{0}
default:
}
if (loops % (1 << 7)) == 0 {
elapsed := time.Now().UnixNano() - start
// TODO: verify if this is correct hash rate calculation
hashes := (float64(1e9) / float64(elapsed)) * float64(loops*1024*256)
hashrateDiff := int32(hashes) - prevHashRate
prevHashRate = int32(hashes)
atomic.AddInt32(&c.hashRate, hashrateDiff)
}
loops++
err = d.searchKernel.SetArg(0, d.searchBuffers[searchBufIndex])
if err != nil {
fmt.Println("Error in Search clSetKernelArg : ", err)
return 0, []byte{0}
}
err = d.searchKernel.SetArg(3, nonce)
if err != nil {
fmt.Println("Error in Search clSetKernelArg : ", err)
return 0, []byte{0}
}
// execute kernel
_, err := d.queue.EnqueueNDRangeKernel(
d.searchKernel,
[]int{0},
[]int{globalWorkSize},
[]int{d.workGroupSize},
nil)
if err != nil {
fmt.Println("Error in Search clEnqueueNDRangeKernel : ", err)
return 0, []byte{0}
}
pending = append(pending, pendingSearch{bufIndex: searchBufIndex, startNonce: nonce})
searchBufIndex = (searchBufIndex + 1) % searchBufSize
if len(pending) == searchBufSize {
p = &(pending[searchBufIndex])
cres, _, err := d.queue.EnqueueMapBuffer(d.searchBuffers[p.bufIndex], true,
cl.MapFlagRead, 0, (1+maxSearchResults)*SIZEOF_UINT32,
nil)
if err != nil {
fmt.Println("Error in Search clEnqueueMapBuffer: ", err)
return 0, []byte{0}
}
results := cres.ByteSlice()
nfound := binary.LittleEndian.Uint32(results)
nfound = uint32(math.Min(float64(nfound), float64(maxSearchResults)))
// OpenCL returns the offsets from the start nonce
for i := uint32(0); i < nfound; i++ {
lo := (i + 1) * SIZEOF_UINT32
hi := (i + 2) * SIZEOF_UINT32
upperNonce := uint64(binary.LittleEndian.Uint32(results[lo:hi]))
checkNonce = p.startNonce + upperNonce
if checkNonce != 0 {
cn := C.uint64_t(checkNonce)
ds := C.uint64_t(c.dagSize)
// We verify that the nonce is indeed a solution by
// executing the Ethash verification function (on the CPU).
ret := C.ethash_light_compute_internal(c.ethash.Light.current.ptr, ds, hashToH256(headerHash), cn)
// TODO: return result first
if ret.success && h256ToHash(ret.result).Big().Cmp(target256) <= 0 {
_, err = d.queue.EnqueueUnmapMemObject(d.searchBuffers[p.bufIndex], cres, nil)
if err != nil {
fmt.Println("Error in Search clEnqueueUnmapMemObject: ", err)
}
if d.openCL12 {
err = cl.WaitForEvents([]*cl.Event{preReturnEvent})
if err != nil {
fmt.Println("Error in Search WaitForEvents: ", err)
}
}
return checkNonce, C.GoBytes(unsafe.Pointer(&ret.mix_hash), C.int(32))
}
_, err := d.queue.EnqueueWriteBuffer(d.searchBuffers[p.bufIndex], false, 0, 4, unsafe.Pointer(&zero), nil)
if err != nil {
fmt.Println("Error in Search cl: EnqueueWriteBuffer", err)
return 0, []byte{0}
}
}
}
_, err = d.queue.EnqueueUnmapMemObject(d.searchBuffers[p.bufIndex], cres, nil)
if err != nil {
fmt.Println("Error in Search clEnqueueUnMapMemObject: ", err)
return 0, []byte{0}
}
pending = append(pending[:searchBufIndex], pending[searchBufIndex+1:]...)
}
}
if d.openCL12 {
err := cl.WaitForEvents([]*cl.Event{preReturnEvent})
if err != nil {
fmt.Println("Error in Search clWaitForEvents: ", err)
return 0, []byte{0}
}
}
return 0, []byte{0}
}
func (c *OpenCLMiner) Verify(block pow.Block) bool {
return c.ethash.Light.Verify(block)
}
func (c *OpenCLMiner) GetHashrate() int64 {
return int64(atomic.LoadInt32(&c.hashRate))
}
func (c *OpenCLMiner) Turbo(on bool) {
// This is GPU mining. Always be turbo.
}
func replaceWords(text string, kvs map[string]string) string {
for k, v := range kvs {
text = strings.Replace(text, k, v, -1)
}
return text
}
func logErr(err error) {
if err != nil {
fmt.Println("Error in OpenCL call:", err)
}
}
func argErr(err error) error {
return fmt.Errorf("arg err: %v", err)
}

@ -0,0 +1,600 @@
package ethash
/* DO NOT EDIT!!!
This code is version controlled at
https://github.com/ethereum/cpp-ethereum/blob/develop/libethash-cl/ethash_cl_miner_kernel.cl
If needed change it there first, then copy over here.
*/
const kernel = `
// author Tim Hughes <tim@twistedfury.com>
// Tested on Radeon HD 7850
// Hashrate: 15940347 hashes/s
// Bandwidth: 124533 MB/s
// search kernel should fit in <= 84 VGPRS (3 wavefronts)
#define THREADS_PER_HASH (128 / 16)
#define HASHES_PER_LOOP (GROUP_SIZE / THREADS_PER_HASH)
#define FNV_PRIME 0x01000193
__constant uint2 const Keccak_f1600_RC[24] = {
(uint2)(0x00000001, 0x00000000),
(uint2)(0x00008082, 0x00000000),
(uint2)(0x0000808a, 0x80000000),
(uint2)(0x80008000, 0x80000000),
(uint2)(0x0000808b, 0x00000000),
(uint2)(0x80000001, 0x00000000),
(uint2)(0x80008081, 0x80000000),
(uint2)(0x00008009, 0x80000000),
(uint2)(0x0000008a, 0x00000000),
(uint2)(0x00000088, 0x00000000),
(uint2)(0x80008009, 0x00000000),
(uint2)(0x8000000a, 0x00000000),
(uint2)(0x8000808b, 0x00000000),
(uint2)(0x0000008b, 0x80000000),
(uint2)(0x00008089, 0x80000000),
(uint2)(0x00008003, 0x80000000),
(uint2)(0x00008002, 0x80000000),
(uint2)(0x00000080, 0x80000000),
(uint2)(0x0000800a, 0x00000000),
(uint2)(0x8000000a, 0x80000000),
(uint2)(0x80008081, 0x80000000),
(uint2)(0x00008080, 0x80000000),
(uint2)(0x80000001, 0x00000000),
(uint2)(0x80008008, 0x80000000),
};
void keccak_f1600_round(uint2* a, uint r, uint out_size)
{
#if !__ENDIAN_LITTLE__
for (uint i = 0; i != 25; ++i)
a[i] = a[i].yx;
#endif
uint2 b[25];
uint2 t;
// Theta
b[0] = a[0] ^ a[5] ^ a[10] ^ a[15] ^ a[20];
b[1] = a[1] ^ a[6] ^ a[11] ^ a[16] ^ a[21];
b[2] = a[2] ^ a[7] ^ a[12] ^ a[17] ^ a[22];
b[3] = a[3] ^ a[8] ^ a[13] ^ a[18] ^ a[23];
b[4] = a[4] ^ a[9] ^ a[14] ^ a[19] ^ a[24];
t = b[4] ^ (uint2)(b[1].x << 1 | b[1].y >> 31, b[1].y << 1 | b[1].x >> 31);
a[0] ^= t;
a[5] ^= t;
a[10] ^= t;
a[15] ^= t;
a[20] ^= t;
t = b[0] ^ (uint2)(b[2].x << 1 | b[2].y >> 31, b[2].y << 1 | b[2].x >> 31);
a[1] ^= t;
a[6] ^= t;
a[11] ^= t;
a[16] ^= t;
a[21] ^= t;
t = b[1] ^ (uint2)(b[3].x << 1 | b[3].y >> 31, b[3].y << 1 | b[3].x >> 31);
a[2] ^= t;
a[7] ^= t;
a[12] ^= t;
a[17] ^= t;
a[22] ^= t;
t = b[2] ^ (uint2)(b[4].x << 1 | b[4].y >> 31, b[4].y << 1 | b[4].x >> 31);
a[3] ^= t;
a[8] ^= t;
a[13] ^= t;
a[18] ^= t;
a[23] ^= t;
t = b[3] ^ (uint2)(b[0].x << 1 | b[0].y >> 31, b[0].y << 1 | b[0].x >> 31);
a[4] ^= t;
a[9] ^= t;
a[14] ^= t;
a[19] ^= t;
a[24] ^= t;
// Rho Pi
b[0] = a[0];
b[10] = (uint2)(a[1].x << 1 | a[1].y >> 31, a[1].y << 1 | a[1].x >> 31);
b[7] = (uint2)(a[10].x << 3 | a[10].y >> 29, a[10].y << 3 | a[10].x >> 29);
b[11] = (uint2)(a[7].x << 6 | a[7].y >> 26, a[7].y << 6 | a[7].x >> 26);
b[17] = (uint2)(a[11].x << 10 | a[11].y >> 22, a[11].y << 10 | a[11].x >> 22);
b[18] = (uint2)(a[17].x << 15 | a[17].y >> 17, a[17].y << 15 | a[17].x >> 17);
b[3] = (uint2)(a[18].x << 21 | a[18].y >> 11, a[18].y << 21 | a[18].x >> 11);
b[5] = (uint2)(a[3].x << 28 | a[3].y >> 4, a[3].y << 28 | a[3].x >> 4);
b[16] = (uint2)(a[5].y << 4 | a[5].x >> 28, a[5].x << 4 | a[5].y >> 28);
b[8] = (uint2)(a[16].y << 13 | a[16].x >> 19, a[16].x << 13 | a[16].y >> 19);
b[21] = (uint2)(a[8].y << 23 | a[8].x >> 9, a[8].x << 23 | a[8].y >> 9);
b[24] = (uint2)(a[21].x << 2 | a[21].y >> 30, a[21].y << 2 | a[21].x >> 30);
b[4] = (uint2)(a[24].x << 14 | a[24].y >> 18, a[24].y << 14 | a[24].x >> 18);
b[15] = (uint2)(a[4].x << 27 | a[4].y >> 5, a[4].y << 27 | a[4].x >> 5);
b[23] = (uint2)(a[15].y << 9 | a[15].x >> 23, a[15].x << 9 | a[15].y >> 23);
b[19] = (uint2)(a[23].y << 24 | a[23].x >> 8, a[23].x << 24 | a[23].y >> 8);
b[13] = (uint2)(a[19].x << 8 | a[19].y >> 24, a[19].y << 8 | a[19].x >> 24);
b[12] = (uint2)(a[13].x << 25 | a[13].y >> 7, a[13].y << 25 | a[13].x >> 7);
b[2] = (uint2)(a[12].y << 11 | a[12].x >> 21, a[12].x << 11 | a[12].y >> 21);
b[20] = (uint2)(a[2].y << 30 | a[2].x >> 2, a[2].x << 30 | a[2].y >> 2);
b[14] = (uint2)(a[20].x << 18 | a[20].y >> 14, a[20].y << 18 | a[20].x >> 14);
b[22] = (uint2)(a[14].y << 7 | a[14].x >> 25, a[14].x << 7 | a[14].y >> 25);
b[9] = (uint2)(a[22].y << 29 | a[22].x >> 3, a[22].x << 29 | a[22].y >> 3);
b[6] = (uint2)(a[9].x << 20 | a[9].y >> 12, a[9].y << 20 | a[9].x >> 12);
b[1] = (uint2)(a[6].y << 12 | a[6].x >> 20, a[6].x << 12 | a[6].y >> 20);
// Chi
a[0] = bitselect(b[0] ^ b[2], b[0], b[1]);
a[1] = bitselect(b[1] ^ b[3], b[1], b[2]);
a[2] = bitselect(b[2] ^ b[4], b[2], b[3]);
a[3] = bitselect(b[3] ^ b[0], b[3], b[4]);
if (out_size >= 4)
{
a[4] = bitselect(b[4] ^ b[1], b[4], b[0]);
a[5] = bitselect(b[5] ^ b[7], b[5], b[6]);
a[6] = bitselect(b[6] ^ b[8], b[6], b[7]);
a[7] = bitselect(b[7] ^ b[9], b[7], b[8]);
a[8] = bitselect(b[8] ^ b[5], b[8], b[9]);
if (out_size >= 8)
{
a[9] = bitselect(b[9] ^ b[6], b[9], b[5]);
a[10] = bitselect(b[10] ^ b[12], b[10], b[11]);
a[11] = bitselect(b[11] ^ b[13], b[11], b[12]);
a[12] = bitselect(b[12] ^ b[14], b[12], b[13]);
a[13] = bitselect(b[13] ^ b[10], b[13], b[14]);
a[14] = bitselect(b[14] ^ b[11], b[14], b[10]);
a[15] = bitselect(b[15] ^ b[17], b[15], b[16]);
a[16] = bitselect(b[16] ^ b[18], b[16], b[17]);
a[17] = bitselect(b[17] ^ b[19], b[17], b[18]);
a[18] = bitselect(b[18] ^ b[15], b[18], b[19]);
a[19] = bitselect(b[19] ^ b[16], b[19], b[15]);
a[20] = bitselect(b[20] ^ b[22], b[20], b[21]);
a[21] = bitselect(b[21] ^ b[23], b[21], b[22]);
a[22] = bitselect(b[22] ^ b[24], b[22], b[23]);
a[23] = bitselect(b[23] ^ b[20], b[23], b[24]);
a[24] = bitselect(b[24] ^ b[21], b[24], b[20]);
}
}
// Iota
a[0] ^= Keccak_f1600_RC[r];
#if !__ENDIAN_LITTLE__
for (uint i = 0; i != 25; ++i)
a[i] = a[i].yx;
#endif
}
void keccak_f1600_no_absorb(ulong* a, uint in_size, uint out_size, uint isolate)
{
for (uint i = in_size; i != 25; ++i)
{
a[i] = 0;
}
#if __ENDIAN_LITTLE__
a[in_size] ^= 0x0000000000000001;
a[24-out_size*2] ^= 0x8000000000000000;
#else
a[in_size] ^= 0x0100000000000000;
a[24-out_size*2] ^= 0x0000000000000080;
#endif
// Originally I unrolled the first and last rounds to interface
// better with surrounding code, however I haven't done this
// without causing the AMD compiler to blow up the VGPR usage.
uint r = 0;
do
{
// This dynamic branch stops the AMD compiler unrolling the loop
// and additionally saves about 33% of the VGPRs, enough to gain another
// wavefront. Ideally we'd get 4 in flight, but 3 is the best I can
// massage out of the compiler. It doesn't really seem to matter how
// much we try and help the compiler save VGPRs because it seems to throw
// that information away, hence the implementation of keccak here
// doesn't bother.
if (isolate)
{
keccak_f1600_round((uint2*)a, r++, 25);
}
}
while (r < 23);
// final round optimised for digest size
keccak_f1600_round((uint2*)a, r++, out_size);
}
#define copy(dst, src, count) for (uint i = 0; i != count; ++i) { (dst)[i] = (src)[i]; }
#define countof(x) (sizeof(x) / sizeof(x[0]))
uint fnv(uint x, uint y)
{
return x * FNV_PRIME ^ y;
}
uint4 fnv4(uint4 x, uint4 y)
{
return x * FNV_PRIME ^ y;
}
uint fnv_reduce(uint4 v)
{
return fnv(fnv(fnv(v.x, v.y), v.z), v.w);
}
typedef union
{
ulong ulongs[32 / sizeof(ulong)];
uint uints[32 / sizeof(uint)];
} hash32_t;
typedef union
{
ulong ulongs[64 / sizeof(ulong)];
uint4 uint4s[64 / sizeof(uint4)];
} hash64_t;
typedef union
{
uint uints[128 / sizeof(uint)];
uint4 uint4s[128 / sizeof(uint4)];
} hash128_t;
hash64_t init_hash(__constant hash32_t const* header, ulong nonce, uint isolate)
{
hash64_t init;
uint const init_size = countof(init.ulongs);
uint const hash_size = countof(header->ulongs);
// sha3_512(header .. nonce)
ulong state[25];
copy(state, header->ulongs, hash_size);
state[hash_size] = nonce;
keccak_f1600_no_absorb(state, hash_size + 1, init_size, isolate);
copy(init.ulongs, state, init_size);
return init;
}
uint inner_loop_chunks(uint4 init, uint thread_id, __local uint* share, __global hash128_t const* g_dag, __global hash128_t const* g_dag1, __global hash128_t const* g_dag2, __global hash128_t const* g_dag3, uint isolate)
{
uint4 mix = init;
// share init0
if (thread_id == 0)
*share = mix.x;
barrier(CLK_LOCAL_MEM_FENCE);
uint init0 = *share;
uint a = 0;
do
{
bool update_share = thread_id == (a/4) % THREADS_PER_HASH;
#pragma unroll
for (uint i = 0; i != 4; ++i)
{
if (update_share)
{
uint m[4] = { mix.x, mix.y, mix.z, mix.w };
*share = fnv(init0 ^ (a+i), m[i]) % DAG_SIZE;
}
barrier(CLK_LOCAL_MEM_FENCE);
mix = fnv4(mix, *share>=3 * DAG_SIZE / 4 ? g_dag3[*share - 3 * DAG_SIZE / 4].uint4s[thread_id] : *share>=DAG_SIZE / 2 ? g_dag2[*share - DAG_SIZE / 2].uint4s[thread_id] : *share>=DAG_SIZE / 4 ? g_dag1[*share - DAG_SIZE / 4].uint4s[thread_id]:g_dag[*share].uint4s[thread_id]);
}
} while ((a += 4) != (ACCESSES & isolate));
return fnv_reduce(mix);
}
uint inner_loop(uint4 init, uint thread_id, __local uint* share, __global hash128_t const* g_dag, uint isolate)
{
uint4 mix = init;
// share init0
if (thread_id == 0)
*share = mix.x;
barrier(CLK_LOCAL_MEM_FENCE);
uint init0 = *share;
uint a = 0;
do
{
bool update_share = thread_id == (a/4) % THREADS_PER_HASH;
#pragma unroll
for (uint i = 0; i != 4; ++i)
{
if (update_share)
{
uint m[4] = { mix.x, mix.y, mix.z, mix.w };
*share = fnv(init0 ^ (a+i), m[i]) % DAG_SIZE;
}
barrier(CLK_LOCAL_MEM_FENCE);
mix = fnv4(mix, g_dag[*share].uint4s[thread_id]);
}
}
while ((a += 4) != (ACCESSES & isolate));
return fnv_reduce(mix);
}
hash32_t final_hash(hash64_t const* init, hash32_t const* mix, uint isolate)
{
ulong state[25];
hash32_t hash;
uint const hash_size = countof(hash.ulongs);
uint const init_size = countof(init->ulongs);
uint const mix_size = countof(mix->ulongs);
// keccak_256(keccak_512(header..nonce) .. mix);
copy(state, init->ulongs, init_size);
copy(state + init_size, mix->ulongs, mix_size);
keccak_f1600_no_absorb(state, init_size+mix_size, hash_size, isolate);
// copy out
copy(hash.ulongs, state, hash_size);
return hash;
}
hash32_t compute_hash_simple(
__constant hash32_t const* g_header,
__global hash128_t const* g_dag,
ulong nonce,
uint isolate
)
{
hash64_t init = init_hash(g_header, nonce, isolate);
hash128_t mix;
for (uint i = 0; i != countof(mix.uint4s); ++i)
{
mix.uint4s[i] = init.uint4s[i % countof(init.uint4s)];
}
uint mix_val = mix.uints[0];
uint init0 = mix.uints[0];
uint a = 0;
do
{
uint pi = fnv(init0 ^ a, mix_val) % DAG_SIZE;
uint n = (a+1) % countof(mix.uints);
#pragma unroll
for (uint i = 0; i != countof(mix.uints); ++i)
{
mix.uints[i] = fnv(mix.uints[i], g_dag[pi].uints[i]);
mix_val = i == n ? mix.uints[i] : mix_val;
}
}
while (++a != (ACCESSES & isolate));
// reduce to output
hash32_t fnv_mix;
for (uint i = 0; i != countof(fnv_mix.uints); ++i)
{
fnv_mix.uints[i] = fnv_reduce(mix.uint4s[i]);
}
return final_hash(&init, &fnv_mix, isolate);
}
typedef union
{
struct
{
hash64_t init;
uint pad; // avoid lds bank conflicts
};
hash32_t mix;
} compute_hash_share;
hash32_t compute_hash(
__local compute_hash_share* share,
__constant hash32_t const* g_header,
__global hash128_t const* g_dag,
ulong nonce,
uint isolate
)
{
uint const gid = get_global_id(0);
// Compute one init hash per work item.
hash64_t init = init_hash(g_header, nonce, isolate);
// Threads work together in this phase in groups of 8.
uint const thread_id = gid % THREADS_PER_HASH;
uint const hash_id = (gid % GROUP_SIZE) / THREADS_PER_HASH;
hash32_t mix;
uint i = 0;
do
{
// share init with other threads
if (i == thread_id)
share[hash_id].init = init;
barrier(CLK_LOCAL_MEM_FENCE);
uint4 thread_init = share[hash_id].init.uint4s[thread_id % (64 / sizeof(uint4))];
barrier(CLK_LOCAL_MEM_FENCE);
uint thread_mix = inner_loop(thread_init, thread_id, share[hash_id].mix.uints, g_dag, isolate);
share[hash_id].mix.uints[thread_id] = thread_mix;
barrier(CLK_LOCAL_MEM_FENCE);
if (i == thread_id)
mix = share[hash_id].mix;
barrier(CLK_LOCAL_MEM_FENCE);
}
while (++i != (THREADS_PER_HASH & isolate));
return final_hash(&init, &mix, isolate);
}
hash32_t compute_hash_chunks(
__local compute_hash_share* share,
__constant hash32_t const* g_header,
__global hash128_t const* g_dag,
__global hash128_t const* g_dag1,
__global hash128_t const* g_dag2,
__global hash128_t const* g_dag3,
ulong nonce,
uint isolate
)
{
uint const gid = get_global_id(0);
// Compute one init hash per work item.
hash64_t init = init_hash(g_header, nonce, isolate);
// Threads work together in this phase in groups of 8.
uint const thread_id = gid % THREADS_PER_HASH;
uint const hash_id = (gid % GROUP_SIZE) / THREADS_PER_HASH;
hash32_t mix;
uint i = 0;
do
{
// share init with other threads
if (i == thread_id)
share[hash_id].init = init;
barrier(CLK_LOCAL_MEM_FENCE);
uint4 thread_init = share[hash_id].init.uint4s[thread_id % (64 / sizeof(uint4))];
barrier(CLK_LOCAL_MEM_FENCE);
uint thread_mix = inner_loop_chunks(thread_init, thread_id, share[hash_id].mix.uints, g_dag, g_dag1, g_dag2, g_dag3, isolate);
share[hash_id].mix.uints[thread_id] = thread_mix;
barrier(CLK_LOCAL_MEM_FENCE);
if (i == thread_id)
mix = share[hash_id].mix;
barrier(CLK_LOCAL_MEM_FENCE);
}
while (++i != (THREADS_PER_HASH & isolate));
return final_hash(&init, &mix, isolate);
}
__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1)))
__kernel void ethash_hash_simple(
__global hash32_t* g_hashes,
__constant hash32_t const* g_header,
__global hash128_t const* g_dag,
ulong start_nonce,
uint isolate
)
{
uint const gid = get_global_id(0);
g_hashes[gid] = compute_hash_simple(g_header, g_dag, start_nonce + gid, isolate);
}
__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1)))
__kernel void ethash_search_simple(
__global volatile uint* restrict g_output,
__constant hash32_t const* g_header,
__global hash128_t const* g_dag,
ulong start_nonce,
ulong target,
uint isolate
)
{
uint const gid = get_global_id(0);
hash32_t hash = compute_hash_simple(g_header, g_dag, start_nonce + gid, isolate);
if (hash.ulongs[countof(hash.ulongs)-1] < target)
{
uint slot = min(convert_uint(MAX_OUTPUTS), convert_uint(atomic_inc(&g_output[0]) + 1));
g_output[slot] = gid;
}
}
__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1)))
__kernel void ethash_hash(
__global hash32_t* g_hashes,
__constant hash32_t const* g_header,
__global hash128_t const* g_dag,
ulong start_nonce,
uint isolate
)
{
__local compute_hash_share share[HASHES_PER_LOOP];
uint const gid = get_global_id(0);
g_hashes[gid] = compute_hash(share, g_header, g_dag, start_nonce + gid, isolate);
}
__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1)))
__kernel void ethash_search(
__global volatile uint* restrict g_output,
__constant hash32_t const* g_header,
__global hash128_t const* g_dag,
ulong start_nonce,
ulong target,
uint isolate
)
{
__local compute_hash_share share[HASHES_PER_LOOP];
uint const gid = get_global_id(0);
hash32_t hash = compute_hash(share, g_header, g_dag, start_nonce + gid, isolate);
if (as_ulong(as_uchar8(hash.ulongs[0]).s76543210) < target)
{
uint slot = min((uint)MAX_OUTPUTS, atomic_inc(&g_output[0]) + 1);
g_output[slot] = gid;
}
}
__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1)))
__kernel void ethash_hash_chunks(
__global hash32_t* g_hashes,
__constant hash32_t const* g_header,
__global hash128_t const* g_dag,
__global hash128_t const* g_dag1,
__global hash128_t const* g_dag2,
__global hash128_t const* g_dag3,
ulong start_nonce,
uint isolate
)
{
__local compute_hash_share share[HASHES_PER_LOOP];
uint const gid = get_global_id(0);
g_hashes[gid] = compute_hash_chunks(share, g_header, g_dag, g_dag1, g_dag2, g_dag3,start_nonce + gid, isolate);
}
__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1)))
__kernel void ethash_search_chunks(
__global volatile uint* restrict g_output,
__constant hash32_t const* g_header,
__global hash128_t const* g_dag,
__global hash128_t const* g_dag1,
__global hash128_t const* g_dag2,
__global hash128_t const* g_dag3,
ulong start_nonce,
ulong target,
uint isolate
)
{
__local compute_hash_share share[HASHES_PER_LOOP];
uint const gid = get_global_id(0);
hash32_t hash = compute_hash_chunks(share, g_header, g_dag, g_dag1, g_dag2, g_dag3, start_nonce + gid, isolate);
if (as_ulong(as_uchar8(hash.ulongs[0]).s76543210) < target)
{
uint slot = min(convert_uint(MAX_OUTPUTS), convert_uint(atomic_inc(&g_output[0]) + 1));
g_output[slot] = gid;
}
}
`

@ -92,7 +92,7 @@ func TestEthashConcurrentVerify(t *testing.T) {
defer os.RemoveAll(eth.Full.Dir) defer os.RemoveAll(eth.Full.Dir)
block := &testBlock{difficulty: big.NewInt(10)} block := &testBlock{difficulty: big.NewInt(10)}
nonce, md := eth.Search(block, nil) nonce, md := eth.Search(block, nil, 0)
block.nonce = nonce block.nonce = nonce
block.mixDigest = common.BytesToHash(md) block.mixDigest = common.BytesToHash(md)
@ -135,7 +135,7 @@ func TestEthashConcurrentSearch(t *testing.T) {
// launch n searches concurrently. // launch n searches concurrently.
for i := 0; i < nsearch; i++ { for i := 0; i < nsearch; i++ {
go func() { go func() {
nonce, md := eth.Search(block, stop) nonce, md := eth.Search(block, stop, 0)
select { select {
case found <- searchRes{n: nonce, md: md}: case found <- searchRes{n: nonce, md: md}:
case <-stop: case <-stop:
@ -167,7 +167,7 @@ func TestEthashSearchAcrossEpoch(t *testing.T) {
for i := epochLength - 40; i < epochLength+40; i++ { for i := epochLength - 40; i < epochLength+40; i++ {
block := &testBlock{number: i, difficulty: big.NewInt(90)} block := &testBlock{number: i, difficulty: big.NewInt(90)}
rand.Read(block.hashNoNonce[:]) rand.Read(block.hashNoNonce[:])
nonce, md := eth.Search(block, nil) nonce, md := eth.Search(block, nil, 0)
block.nonce = nonce block.nonce = nonce
block.mixDigest = common.BytesToHash(md) block.mixDigest = common.BytesToHash(md)
if !eth.Verify(block) { if !eth.Verify(block) {

@ -6,7 +6,7 @@
GOBIN = build/bin GOBIN = build/bin
geth: geth:
build/env.sh go install -v $(shell build/ldflags.sh) ./cmd/geth build/env.sh go install -v $(shell build/flags.sh) ./cmd/geth
@echo "Done building." @echo "Done building."
@echo "Run \"$(GOBIN)/geth\" to launch geth." @echo "Run \"$(GOBIN)/geth\" to launch geth."
@ -39,12 +39,12 @@ evm:
@echo "Done building." @echo "Done building."
@echo "Run \"$(GOBIN)/evm to start the evm." @echo "Run \"$(GOBIN)/evm to start the evm."
mist: mist:
build/env.sh go install -v $(shell build/ldflags.sh) ./cmd/mist build/env.sh go install -v $(shell build/flags.sh) ./cmd/mist
@echo "Done building." @echo "Done building."
@echo "Run \"$(GOBIN)/mist --asset_path=cmd/mist/assets\" to launch mist." @echo "Run \"$(GOBIN)/mist --asset_path=cmd/mist/assets\" to launch mist."
all: all:
build/env.sh go install -v $(shell build/ldflags.sh) ./... build/env.sh go install -v $(shell build/flags.sh) ./...
test: all test: all
build/env.sh go test ./... build/env.sh go test ./...

@ -16,3 +16,7 @@ sep=$(go version | awk '{ if ($3 >= "go1.5" || index($3, "devel")) print "="; el
if [ -f ".git/HEAD" ]; then if [ -f ".git/HEAD" ]; then
echo "-ldflags '-X main.gitCommit$sep$(git rev-parse HEAD)'" echo "-ldflags '-X main.gitCommit$sep$(git rev-parse HEAD)'"
fi fi
if [ ! -z "$GO_OPENCL" ]; then
echo "-tags opencl"
fi

@ -468,8 +468,7 @@ func processTxs(repl *testjethre, t *testing.T, expTxc int) bool {
t.Errorf("incorrect number of pending transactions, expected %v, got %v", expTxc, txc) t.Errorf("incorrect number of pending transactions, expected %v, got %v", expTxc, txc)
return false return false
} }
err = repl.ethereum.StartMining(runtime.NumCPU(), "")
err = repl.ethereum.StartMining(runtime.NumCPU())
if err != nil { if err != nil {
t.Errorf("unexpected error mining: %v", err) t.Errorf("unexpected error mining: %v", err)
return false return false

@ -104,6 +104,22 @@ The makedag command generates an ethash DAG in /tmp/dag.
This command exists to support the system testing project. This command exists to support the system testing project.
Regular users do not need to execute it. Regular users do not need to execute it.
`,
},
{
Action: gpuinfo,
Name: "gpuinfo",
Usage: "gpuinfo",
Description: `
Prints OpenCL device info for all found GPUs.
`,
},
{
Action: gpubench,
Name: "gpubench",
Usage: "benchmark GPU",
Description: `
Runs quick benchmark on first GPU found.
`, `,
}, },
{ {
@ -298,6 +314,7 @@ JavaScript API. See https://github.com/ethereum/go-ethereum/wiki/Javascipt-Conso
utils.GasPriceFlag, utils.GasPriceFlag,
utils.MinerThreadsFlag, utils.MinerThreadsFlag,
utils.MiningEnabledFlag, utils.MiningEnabledFlag,
utils.MiningGPUFlag,
utils.AutoDAGFlag, utils.AutoDAGFlag,
utils.NATFlag, utils.NATFlag,
utils.NatspecEnabledFlag, utils.NatspecEnabledFlag,
@ -586,7 +603,10 @@ func startEth(ctx *cli.Context, eth *eth.Ethereum) {
} }
} }
if ctx.GlobalBool(utils.MiningEnabledFlag.Name) { if ctx.GlobalBool(utils.MiningEnabledFlag.Name) {
if err := eth.StartMining(ctx.GlobalInt(utils.MinerThreadsFlag.Name)); err != nil { err := eth.StartMining(
ctx.GlobalInt(utils.MinerThreadsFlag.Name),
ctx.GlobalString(utils.MiningGPUFlag.Name))
if err != nil {
utils.Fatalf("%v", err) utils.Fatalf("%v", err)
} }
} }
@ -740,6 +760,29 @@ func makedag(ctx *cli.Context) {
} }
} }
func gpuinfo(ctx *cli.Context) {
eth.PrintOpenCLDevices()
}
func gpubench(ctx *cli.Context) {
args := ctx.Args()
wrongArgs := func() {
utils.Fatalf(`Usage: geth gpubench <gpu number>`)
}
switch {
case len(args) == 1:
n, err := strconv.ParseUint(args[0], 0, 64)
if err != nil {
wrongArgs()
}
eth.GPUBench(n)
case len(args) == 0:
eth.GPUBench(0)
default:
wrongArgs()
}
}
func version(c *cli.Context) { func version(c *cli.Context) {
fmt.Println(ClientIdentifier) fmt.Println(ClientIdentifier)
fmt.Println("Version:", Version) fmt.Println("Version:", Version)

@ -149,6 +149,12 @@ var (
} }
// miner settings // miner settings
// TODO: refactor CPU vs GPU mining flags
MiningGPUFlag = cli.StringFlag{
Name: "minegpu",
Usage: "Mine with given GPUs. '--minegpu 0,1' will mine with the first two GPUs found.",
}
MinerThreadsFlag = cli.IntFlag{ MinerThreadsFlag = cli.IntFlag{
Name: "minerthreads", Name: "minerthreads",
Usage: "Number of miner threads", Usage: "Number of miner threads",

@ -306,7 +306,7 @@ func processTxs(repl *testFrontend, t *testing.T, expTxc int) bool {
return false return false
} }
err = repl.ethereum.StartMining(runtime.NumCPU()) err = repl.ethereum.StartMining(runtime.NumCPU(), "")
if err != nil { if err != nil {
t.Errorf("unexpected error mining: %v", err) t.Errorf("unexpected error mining: %v", err)
return false return false

@ -32,7 +32,7 @@ import (
// It returns true from Verify for any block. // It returns true from Verify for any block.
type FakePow struct{} type FakePow struct{}
func (f FakePow) Search(block pow.Block, stop <-chan struct{}) (uint64, []byte) { func (f FakePow) Search(block pow.Block, stop <-chan struct{}, index int) (uint64, []byte) {
return 0, nil return 0, nil
} }
func (f FakePow) Verify(block pow.Block) bool { return true } func (f FakePow) Verify(block pow.Block) bool { return true }

@ -34,7 +34,7 @@ type failPow struct {
failing uint64 failing uint64
} }
func (pow failPow) Search(pow.Block, <-chan struct{}) (uint64, []byte) { func (pow failPow) Search(pow.Block, <-chan struct{}, int) (uint64, []byte) {
return 0, nil return 0, nil
} }
func (pow failPow) Verify(block pow.Block) bool { return block.NumberU64() != pow.failing } func (pow failPow) Verify(block pow.Block) bool { return block.NumberU64() != pow.failing }
@ -47,7 +47,7 @@ type delayedPow struct {
delay time.Duration delay time.Duration
} }
func (pow delayedPow) Search(pow.Block, <-chan struct{}) (uint64, []byte) { func (pow delayedPow) Search(pow.Block, <-chan struct{}, int) (uint64, []byte) {
return 0, nil return 0, nil
} }
func (pow delayedPow) Verify(block pow.Block) bool { time.Sleep(pow.delay); return true } func (pow delayedPow) Verify(block pow.Block) bool { time.Sleep(pow.delay); return true }

@ -481,18 +481,6 @@ func (s *Ethereum) ResetWithGenesisBlock(gb *types.Block) {
s.chainManager.ResetWithGenesisBlock(gb) s.chainManager.ResetWithGenesisBlock(gb)
} }
func (s *Ethereum) StartMining(threads int) error {
eb, err := s.Etherbase()
if err != nil {
err = fmt.Errorf("Cannot start mining without etherbase address: %v", err)
glog.V(logger.Error).Infoln(err)
return err
}
go s.miner.Start(eb, threads)
return nil
}
func (s *Ethereum) Etherbase() (eb common.Address, err error) { func (s *Ethereum) Etherbase() (eb common.Address, err error) {
eb = s.etherbase eb = s.etherbase
if (eb == common.Address{}) { if (eb == common.Address{}) {

@ -0,0 +1,54 @@
// Copyright 2014 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
// +build !opencl
package eth
import (
"errors"
"fmt"
"github.com/ethereum/go-ethereum/logger"
"github.com/ethereum/go-ethereum/logger/glog"
)
const disabledInfo = "Set GO_OPENCL and re-build to enable."
func (s *Ethereum) StartMining(threads int, gpus string) error {
eb, err := s.Etherbase()
if err != nil {
err = fmt.Errorf("Cannot start mining without etherbase address: %v", err)
glog.V(logger.Error).Infoln(err)
return err
}
if gpus != "" {
return errors.New("GPU mining disabled. " + disabledInfo)
}
// CPU mining
go s.miner.Start(eb, threads)
return nil
}
func GPUBench(gpuid uint64) {
fmt.Println("GPU mining disabled. " + disabledInfo)
}
func PrintOpenCLDevices() {
fmt.Println("OpenCL disabled. " + disabledInfo)
}

@ -0,0 +1,103 @@
// Copyright 2014 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
// +build opencl
package eth
import (
"fmt"
"math/big"
"strconv"
"strings"
"time"
"github.com/ethereum/ethash"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core/types"
"github.com/ethereum/go-ethereum/logger"
"github.com/ethereum/go-ethereum/logger/glog"
"github.com/ethereum/go-ethereum/miner"
)
func (s *Ethereum) StartMining(threads int, gpus string) error {
eb, err := s.Etherbase()
if err != nil {
err = fmt.Errorf("Cannot start mining without etherbase address: %v", err)
glog.V(logger.Error).Infoln(err)
return err
}
// GPU mining
if gpus != "" {
var ids []int
for _, s := range strings.Split(gpus, ",") {
i, err := strconv.Atoi(s)
if err != nil {
return fmt.Errorf("Invalid GPU id(s): %v", err)
}
if i < 0 {
return fmt.Errorf("Invalid GPU id: %v", i)
}
ids = append(ids, i)
}
// TODO: re-creating miner is a bit ugly
cl := ethash.NewCL(ids)
s.miner = miner.New(s, s.EventMux(), cl)
go s.miner.Start(eb, len(ids))
return nil
}
// CPU mining
go s.miner.Start(eb, threads)
return nil
}
func GPUBench(gpuid uint64) {
e := ethash.NewCL([]int{int(gpuid)})
var h common.Hash
bogoHeader := &types.Header{
ParentHash: h,
Number: big.NewInt(int64(42)),
Difficulty: big.NewInt(int64(999999999999999)),
}
bogoBlock := types.NewBlock(bogoHeader, nil, nil, nil)
err := ethash.InitCL(bogoBlock.NumberU64(), e)
if err != nil {
fmt.Println("OpenCL init error: ", err)
return
}
stopChan := make(chan struct{})
reportHashRate := func() {
for {
time.Sleep(3 * time.Second)
fmt.Printf("hashes/s : %v\n", e.GetHashrate())
}
}
fmt.Printf("Starting benchmark (%v seconds)\n", 60)
go reportHashRate()
go e.Search(bogoBlock, stopChan, 0)
time.Sleep(60 * time.Second)
fmt.Println("OK.")
}
func PrintOpenCLDevices() {
ethash.PrintDevices()
}

@ -118,7 +118,7 @@ func (self *CpuAgent) mine(work *Work, stop <-chan struct{}) {
glog.V(logger.Debug).Infof("(re)started agent[%d]. mining...\n", self.index) glog.V(logger.Debug).Infof("(re)started agent[%d]. mining...\n", self.index)
// Mine // Mine
nonce, mixDigest := self.pow.Search(work.Block, stop) nonce, mixDigest := self.pow.Search(work.Block, stop, self.index)
if nonce != 0 { if nonce != 0 {
block := work.Block.WithMiningResult(nonce, common.BytesToHash(mixDigest)) block := work.Block.WithMiningResult(nonce, common.BytesToHash(mixDigest))
self.returnCh <- &Result{work, block} self.returnCh <- &Result{work, block}

@ -48,7 +48,7 @@ func (pow *EasyPow) Turbo(on bool) {
pow.turbo = on pow.turbo = on
} }
func (pow *EasyPow) Search(block pow.Block, stop <-chan struct{}) (uint64, []byte) { func (pow *EasyPow) Search(block pow.Block, stop <-chan struct{}, index int) (uint64, []byte) {
r := rand.New(rand.NewSource(time.Now().UnixNano())) r := rand.New(rand.NewSource(time.Now().UnixNano()))
hash := block.HashNoNonce() hash := block.HashNoNonce()
diff := block.Difficulty() diff := block.Difficulty()

@ -17,7 +17,7 @@
package pow package pow
type PoW interface { type PoW interface {
Search(block Block, stop <-chan struct{}) (uint64, []byte) Search(block Block, stop <-chan struct{}, index int) (uint64, []byte)
Verify(block Block) bool Verify(block Block) bool
GetHashrate() int64 GetHashrate() int64
Turbo(bool) Turbo(bool)

@ -100,7 +100,7 @@ func (self *minerApi) StartMiner(req *shared.Request) (interface{}, error) {
} }
self.ethereum.StartAutoDAG() self.ethereum.StartAutoDAG()
err := self.ethereum.StartMining(args.Threads) err := self.ethereum.StartMining(args.Threads, "")
if err == nil { if err == nil {
return true, nil return true, nil
} }

@ -452,7 +452,7 @@ func (self *XEth) ClientVersion() string {
func (self *XEth) SetMining(shouldmine bool, threads int) bool { func (self *XEth) SetMining(shouldmine bool, threads int) bool {
ismining := self.backend.IsMining() ismining := self.backend.IsMining()
if shouldmine && !ismining { if shouldmine && !ismining {
err := self.backend.StartMining(threads) err := self.backend.StartMining(threads, "")
return err == nil return err == nil
} }
if ismining && !shouldmine { if ismining && !shouldmine {

Loading…
Cancel
Save