Game Of Life on GPU via OpenCL

Post your FreeBASIC source, examples, tips and tricks here. Please don’t post code without including an explanation.
Post Reply
D.J.Peters
Posts: 8586
Joined: May 28, 2005 3:28
Contact:

Game Of Life on GPU via OpenCL

Post by D.J.Peters »

file: GameOfLife.bas

Code: Select all

#include once "cl.bi"

#ifndef NULL
#define NULL cptr(any ptr,0)
#endif

const as string code = _
!"__kernel void GameOfLive(constant char* input, global char* output,\n"  & _
!"                         const unsigned int width, const unsigned int height){\n"  & _
!"  int i = get_global_id(0);\n" & _
!"  int rowUp = i - width;\n" & _
!"  int rowDown = i + width;\n" & _
!"  bool outOfBounds = (i < width);\n" & _
!"  outOfBounds |= (i > (width * (height-1)));\n" & _
!"  outOfBounds |= (i % width == 0);\n" & _
!"  outOfBounds |= (i % width == width-1);\n" & _
!"  if (outOfBounds) {\n" & _ 
!"   output[i] = 0; return;\n" & _
!"  }\n" & _
!"  int neighbours = input[rowUp-1] + input[rowUp] + input[rowUp+1];\n" & _
!"  neighbours += input[i-1] + input[i+1];\n" & _
!"  neighbours += input[rowDown-1] + input[rowDown] + input[rowDown+1];\n" & _
!"  if (neighbours == 3 || (input[i] && neighbours == 2))\n" & _
!"    output[i] = 1;\n" & _
!"  else\n"  & _
!"    output[i] = 0;\n" & _
!"}\n"

sub fail(errcode as cl_int,msg as string)
  if (errcode<>CL_SUCCESS) then
    if ScreenPtr() then screen 0
    print msg
    print getCLErrorString(errcode)
    beep:sleep:end
  end if
end sub

function GetPlatformID() as cl_platform_id_t
  dim as cl_platform_id_t patformid
  dim as cl_int failed = clGetPlatformIDs(1,@patformid, NULL)
  fail(failed,"Unable to enumerate platform IDs")
  return patformid
end function


function GetDeviceID(byval patformid as cl_platform_id_t, _
                     byval devicetype as cl_device_type_t=CL_DEVICE_TYPE_ALL) as cl_device_id_t
  dim as cl_device_id_t deviceid
  dim as cl_int failed = clGetDeviceIDs(patformid,devicetype,1,@deviceid,NULL)
  fail(failed,"Unable to enumerate device IDs")
  return deviceid
end function

function CreateContext(deviceid as cl_device_id_t) as cl_context_t
  dim as cl_int failed
  dim as cl_context_t context = clCreateContext(0,1,@deviceid,NULL,NULL,@failed)
  fail(failed,"Unable to create context")
  return context
end function

function CreateCommandQueue(byval context as cl_context_t, _
                            byval deviceid as cl_device_id_t) as cl_command_queue_t
  dim as cl_int failed
  dim as cl_command_queue_t queue = clCreateCommandQueue(context,deviceid,0,@failed)
  fail(failed,"Unable to create command queue")
  return queue
end function

function CreateProgram(byval context as cl_context_t, _
                       byval deviceid as cl_device_id_t, _
                       byval source as zstring ptr) as cl_program_t
  
  dim as cl_int failed
  dim as cl_program_t program = clCreateProgramWithSource(context,1,@source,NULL,@failed)
  if (program=0) or failed then
    fail(failed,"Unable to create program from source code")
  end if
  failed = clBuildProgram(program,0,NULL,NULL,NULL,NULL)
  if failed then
    dim as string strLog=space(2048)
    dim as cl_int nChars
    clGetProgramBuildInfo(program,deviceid,CL_PROGRAM_BUILD_LOG,2048,strptr(strLog),@nChars)
    strLog=left(strLog,nChars)
    fail(failed,!"Unable to build program\n" & strLog)
  end if
  return program
end function

function CreateKernel(byval program as cl_program_t, _
                      byval kernelname as zstring ptr) as cl_kernel_t
  dim as cl_int failed
  dim as cl_kernel_t kernel = clCreateKernel(program,kernelname,@failed)
  if (kernel=0) or failed then
    clReleaseProgram(program)
    fail(failed,"Unable to create kernel")
  end if
  return kernel
end function

function CreateReadBuffer(byval context as cl_context_t, _
                          byval nBytes as size_t) as cl_mem_t
  dim as cl_int failed
  dim as cl_mem_t p = clCreateBuffer(context,CL_MEM_READ_ONLY,nBytes,NULL,@failed)
  fail(failed,"Unable to create READ_ONLY buffer")
  return p
end function

function CreateWriteBuffer(byval context as cl_context_t, _
                           byval nBytes as size_t) as cl_mem_t
  dim as cl_int failed
  dim as cl_mem_t p = clCreateBuffer(context,CL_MEM_WRITE_ONLY,nBytes,NULL,@failed)
  fail(failed,"Unable to create WRITE_ONLY buffer")
  return p
end function

function CreateReadWriteBuffer(byval context as cl_context_t, _
                               byval nBytes as size_t) as cl_mem_t
  dim as cl_int failed
  dim as cl_mem_t p = clCreateBuffer(context,CL_MEM_READ_WRITE,nBytes,NULL,@failed)
  fail(failed,"Unable to create READ_WRITE buffer")
  return p
end function

sub SetKernelArg(byval kernel    as cl_kernel_t, _
                 byval nArgument as cl_uint, _
                 byval ArgSize   as size_t, _
                 byval pValue    as any ptr)
  dim as cl_int failed = clSetKernelArg(kernel,nArgument,ArgSize,pValue)
  fail(failed,"Unable to set kerel arg " & nArgument)
end sub

function GetKernelWorkGroupSize(byval kernel as cl_kernel_t, _
                                byval deviceid as cl_device_id_t) as size_t
  dim as size_t WorkGroupSize
  dim as cl_int failed = clGetKernelWorkGroupInfo(kernel,deviceid,CL_KERNEL_WORK_GROUP_SIZE, _
                                                   sizeof(size_t),@WorkGroupSize,NULL)
  fail(failed,"Unable to get kernel work-group size")
  return WorkGroupSize
end function

'
' main
'
randomize timer()
var platform = GetPlatformID()
var device = GetDeviceID(platform)
var context = CreateContext(device)
var queue = CreateCommandQueue(context,device)
var program = CreateProgram(context,device,strptr(code))
var kernel = CreateKernel(program,"GameOfLive")
var workgroupsize = GetKernelWorkGroupSize(kernel,device)
dim as size_t GAME_WIDTH=workgroupsize\2
dim as size_t GAME_HEIGHT=workgroupsize\2
dim as size_t GAME_SIZE=GAME_WIDTH*GAME_HEIGHT
dim as ubyte game(GAME_WIDTH-1,GAME_HEIGHT-1)
for y as CL_INT=0 to GAME_HEIGHT-1
  for x as CL_INT=0 to GAME_WIDTH-1
    if rnd()>.5 then game(x,y)=1
  next
next

var pInput = CreateReadBuffer(context,GAME_SIZE)
var pOutput= CreateWriteBuffer(context,GAME_SIZE) 
SetKernelArg(kernel, 0, sizeof(cl_mem_t),@pInput)
SetKernelArg(kernel, 1, sizeof(cl_mem_t),@pOutput)
SetKernelArg(kernel, 2, sizeof(cl_int),@GAME_WIDTH)
SetKernelArg(kernel, 3, sizeof(cl_int),@GAME_HEIGHT)
dim as cl_int failed = clEnqueueWriteBuffer(queue,pInput,CL_TRUE,0,GAME_SIZE,@game(0,0),0,NULL,NULL)
fail(failed,"Unable to enqueue buffer")


screenres GAME_WIDTH*3,GAME_HEIGHT*3,8

dim as integer frames
dim as double t1=Timer()
while inkey=""

const ITERA=10
for i as integer=1 to ITERA
  ' Run the kernel on every cell in the board
  failed = clEnqueueNDRangeKernel(queue,kernel,1,NULL,@GAME_SIZE,@workgroupsize,0,NULL,NULL)
  if failed then fail(failed,"Unable to enqueue kernel")
  ' Copy the output to the input for the next iteration
  failed = clEnqueueCopyBuffer(queue,pOutput,pInput,0,0,GAME_SIZE,0,NULL,NULL)
  if failed then fail(failed,"Unable to enqueue copy")
next
  ' read back in game()
  failed = clEnqueueReadBuffer(queue,pOutput,CL_TRUE,0,GAME_SIZE,@game(0,0),0,NULL,NULL)
  if failed then fail(failed,"Unable to read results")

#if 1
  screenlock
  for x as CL_INT=0 to GAME_WIDTH-1
    for y as CL_INT=0 to GAME_HEIGHT-1
      line(x*3,y*3)-step(2,2),game(x,y)+6,BF
    next
  next
  screenunlock
#endif
  frames+=ITERA
  if frames mod (ITERA*5)=0 then
    dim as double t2=timer()
    dim as integer fps=frames/(t2-t1)
    windowtitle "fps " & fps
    t1=t2:frames=0
  end if 
wend

clReleaseMemObject(pInput)
clReleaseMemObject(pOutput)
clReleaseKernel(kernel)
clReleaseCommandQueue(queue)
clReleaseContext(context)


Last edited by D.J.Peters on Mar 05, 2016 12:29, edited 3 times in total.
oyster
Posts: 274
Joined: Oct 11, 2005 10:46

Re: Game Of Life on GPU via OpenCL

Post by oyster »

the bi file is not a good BASIC sourcefile, on my PC, FreeBASIC-1.01.0-win64 says
E:\prg\BASIC\fb\lib\gpu\opencl\cl.bi(19) error 3: Expected End-of-Line, found '#
' in ''/ #ifndef __OPENCL_CL_BI__ #define __OPENCL_CL_BI__ #include once'
...
is there a url to download all correct BI?
thanks
D.J.Peters
Posts: 8586
Joined: May 28, 2005 3:28
Contact:

Re: Game Of Life on GPU via OpenCL

Post by D.J.Peters »

edit: will add new link to cl.bi !"

Joshy
Last edited by D.J.Peters on Mar 08, 2016 21:16, edited 2 times in total.
Provoni
Posts: 514
Joined: Jan 05, 2014 12:33
Location: Belgium

Re: Game Of Life on GPU via OpenCL

Post by Provoni »

Thanks for your work on GPU computing for FreeBASIC D.J Peters. How can the GPU be used to speed up a program? For instance I'm wondering if I can off-load tasks to the GPU with my multi-threaded cryptology program. My program spends most of its time on scoring heuristics. Crude example:

for i=1 to message_length
score+=score_table(msg(i),msg(i+1),msg(i+2),msg(i+3))
next i
D.J.Peters
Posts: 8586
Joined: May 28, 2005 3:28
Contact:

Re: Game Of Life on GPU via OpenCL

Post by D.J.Peters »

You have to learn OpenCL first OpenCL tutorial it's like C
than you have to describe your "problem" from view of parallelism.

Joshy
Post Reply