GPGPU labor VIII. OpenCL beve zetés. Kezdeti teendők. Tantárgy honlapja, OpenCL bevezet és II. A labor kiindulási alapjának letöltése (lab8_base.zip), kitömörítés a D:GPGPU könyvtárba D:GPGPUlabslab8lab8_opencllab8_opencl.sln indítása

  2. Kezdeti teendők • Tantárgy honlapja, OpenCLbevezetés II. • A labor kiindulási alapjának letöltése (lab8_base.zip), kitömörítés a D:\GPGPU\ könyvtárba • D:\GPGPU\labs\lab8\lab8_opencl\lab8_opencl.sln indítása • Project tulajdonságai – ConfigurationProperties – Debugging – WorkingDirectory = $(ProjectDir)\..\..\bin

  3. Platform // OpenCL platform cl_platform_id platform; char* getPlatformInfo(cl_platform_id platform, cl_platform_infoparamName){ size_tinfoSize = 0; CL_SAFE_CALL( clGetPlatformInfo(platform, paramName, 0, NULL, &infoSize) ); char* info = (char*)malloc(infoSize); CL_SAFE_CALL( clGetPlatformInfo(platform, paramName, infoSize, info, NULL) ); return info; } cl_platform_idcreatePlatform(){ cl_platform_id platform; CL_SAFE_CALL( clGetPlatformIDs(1, &platform, NULL)); std::cout << getPlatformInfo(platform, CL_PLATFORM_VERSION) << std::endl; return platform; }

  4. OpenCL eszközök // OpenCL devices of the platform cl_device_iddevice_id; void* getDeviceInfo(cl_device_iddevice_id, cl_device_infoparamName){ size_tinfoSize = 0; CL_SAFE_CALL( clGetDeviceInfo(device_id, paramName, 0, NULL, &infoSize) ); char* info = (char*)malloc(infoSize); CL_SAFE_CALL( clGetDeviceInfo(device_id, paramName, infoSize, info, NULL) ); return info; } cl_device_idcreateDevice(cl_platform_id platform, cl_device_type type){ cl_device_iddevice_id; CL_SAFE_CALL( clGetDeviceIDs(platform, type, 1, &device_id, NULL) ); cl_uint* max_compute_units = (cl_uint*)getDeviceInfo(device_id, CL_DEVICE_MAX_COMPUTE_UNITS); std::cout << "Max computeunits: " << *max_compute_units << std::endl; return device_id; }

  5. Kontextus // OpenCL context cl_context context; cl_contextcreateContext(cl_device_iddevice_id){ cl_context context = 0; context = clCreateContext(0, 1, &device_id, NULL, NULL, NULL); if(!context){ std::cerr << "Context creation failed!\n"; exit(EXIT_FAILURE); } return context; }

  6. Parancs sor // OpenCL command queue cl_command_queue commands; cl_command_queuecreateCommandQueue(cl_contextcontext, cl_device_iddevice){ cl_command_queuecommand_queue = 0; command_queue = clCreateCommandQueue(context, device_id, 0, NULL); if(!command_queue){ std::cerr << "Command queue creation failed!\n"; } returncommand_queue; }

  7. OpenCL program // OpenCL program cl_program program; boolfileToString(const char* path, char*& out, int& len) { std::ifstream file(path, std::ios::ate | std::ios::binary); if(!file.is_open()) { return false; } len = file.tellg(); out = new char[ len+1 ]; file.seekg (0, std::ios::beg); file.read(out, len); file.close(); out[len] = 0; return true; }

  8. OpenCL program cl_programcreateProgram(cl_context context, cl_device_iddevice_id, const char* fileName){ char* programSource = NULL; intlen = 0; interrorFlag = -1; if(!fileToString(fileName, programSource, len)){ std::cerr << "Error loading program: " << fileName << std::endl; exit(EXIT_FAILURE); } cl_program program = 0; program = clCreateProgramWithSource(context, 1, (const char**)&programSource, NULL, NULL); if (!program) { std::cerr << "Error: Failed to create compute program!" << std::endl; exit(EXIT_FAILURE); } cl_int err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); if (err != CL_SUCCESS) { size_tlen; char buffer[2048]; std::cerr << "Error: Failed to build program executable!" << std::endl; clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len); std::cerr << buffer << std::endl; exit(1); } return program; }

  9. OpenCL kernel // OpenCL kernel cl_kernelcreateKernel(cl_program program, const char* kernelName){ cl_kernel kernel; cl_int err; kernel = clCreateKernel(program, kernelName, &err); if (!kernel || err != CL_SUCCESS) { std::cerr << "Error: Failed to create compute kernel!" << std::endl; exit(1); } return kernel; }

  10. main() // OpenCL init platform = createPlatform(); device_id = createDevice(platform, CL_DEVICE_TYPE_GPU); context = createContext(device_id); commands = createCommandQueue(context, device_id); program = createProgram(context, device_id, "programs.cl"); // OpenCL processing // OpenCL cleanup clReleaseProgram(program); clReleaseCommandQueue(commands); clReleaseContext(context); return 0;

  11. Globális címzés // simple global address void globalAddress(){ cl_kernelglobalAddressKernel = createKernel(program, "globalAddress"); const intdata_size = 1024; float* data = (float*)malloc(sizeof(float)*data_size); cl_memclData = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * data_size, NULL, NULL); CL_SAFE_CALL( clSetKernelArg(globalAddressKernel, 0, sizeof(cl_mem), &clData) ); size_tworkgroupSize = 0; CL_SAFE_CALL( clGetKernelWorkGroupInfo(globalAddressKernel, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(workgroupSize), &workgroupSize, NULL) ); size_tworkSize = data_size; CL_SAFE_CALL( clEnqueueNDRangeKernel(commands, globalAddressKernel, 1, NULL, &workSize, &workgroupSize, 0, NULL, NULL) ); clFinish(commands); CL_SAFE_CALL( clEnqueueReadBuffer(commands, clData, CL_TRUE, 0, sizeof(float) * data_size, data, 0, NULL, NULL) ); FILE* outFile = fopen("globalAddress.txt", "w"); for(int i = 0; i < data_size; ++i){ fprintf(outFile, "%f\n", data[i]); } fclose(outFile); clReleaseKernel(globalAddressKernel); free(data); }

  12. Globális címzés (programs.cl) __kernel void globalAddress(__global float* data){ int id = get_global_id(0); data[id] = id; }

  13. Globális címzés

  14. Lokális címzés // local address void localAddress(){ cl_kernellocalAddressKernel = createKernel(program, "localAddress"); const intdata_size = 1024; float* data = (float*)malloc(sizeof(float)*data_size); cl_memclData = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * data_size, NULL, NULL); CL_SAFE_CALL( clSetKernelArg(localAddressKernel, 0, sizeof(cl_mem), &clData) ); size_tworkgroupSize = 0; CL_SAFE_CALL( clGetKernelWorkGroupInfo(localAddressKernel, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(workgroupSize), &workgroupSize, NULL) ); workgroupSize = workgroupSize / 4; size_tworkSize = data_size; CL_SAFE_CALL( clEnqueueNDRangeKernel(commands, localAddressKernel, 1, NULL, &workSize, &workgroupSize, 0, NULL, NULL) ); clFinish(commands); CL_SAFE_CALL( clEnqueueReadBuffer(commands, clData, CL_TRUE, 0, sizeof(float) * data_size, data, 0, NULL, NULL) ); FILE* outFile = fopen("localAddress.txt", "w"); for(int i = 0; i < data_size; ++i){ fprintf(outFile, "%f\n", data[i]); } fclose(outFile); clReleaseKernel(localAddressKernel); free(data); }

  15. Lokális címzés (programs.cl) __kernel void localAddress(__global float* data){ int id = get_local_id(0); data[get_local_id(0) + get_group_id(0) * get_local_size(0)] = id; }

  16. Lokális címzés

  17. 2D címzés // 2D address void address2D(){ cl_kernel address2DKernel = createKernel(program, "address2D"); const intdata_size[2] = {1024, 1024}; cl_float4* data = (cl_float4*)malloc(sizeof(cl_float4)*data_size[0] * data_size[1]); cl_memclData = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_float4) * data_size[0] * data_size[1], NULL, NULL); CL_SAFE_CALL( clSetKernelArg(address2DKernel, 0, sizeof(cl_mem), &clData) ); size_tworkgroupSize[2] = {8, 8}; size_tworkSize[2] = { data_size[0], data_size[1] }; CL_SAFE_CALL( clEnqueueNDRangeKernel(commands, address2DKernel, 2, NULL, workSize, workgroupSize, 0, NULL, NULL) ); clFinish(commands); CL_SAFE_CALL( clEnqueueReadBuffer(commands, clData, CL_TRUE, 0, sizeof(cl_float4) * data_size[0] * data_size[1], data, 0, NULL, NULL) ); FILE* outFile = fopen("2DAddress.txt", "w"); for(int i = 0; i < data_size[0] * data_size[1]; ++i){ fprintf(outFile, "G: [%f, %f] L: [%f, %f]\n", data[i].s[0], data[i].s[1], data[i].s[2], data[i].s[3]); } fclose(outFile); clReleaseKernel(address2DKernel); free(data); }

  18. 2D címzés (programs.cl) __kernel void address2D(__global float4* data){ intlocalIDX = get_local_id(0); intlocalIDY = get_local_id(1); intglobalIDX = get_global_id(0); intglobalIDY = get_global_id(1); data[globalIDX + get_global_size(0) * globalIDY] = (float4)(globalIDX, globalIDY, localIDX, localIDY); }

  19. Adatfeldolgozás // square void square(){ cl_kernelsquareKernel = createKernel(program, "square"); const intdata_size = 1024; float* inputData = (float*)malloc(sizeof(float) * data_size); for(int i = 0; i < data_size; ++i){ inputData[i] = i; } cl_memclInputData = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float) * data_size, NULL, NULL); CL_SAFE_CALL( clEnqueueWriteBuffer(commands, clInputData, CL_TRUE, 0, sizeof(float) * data_size, inputData, 0, NULL, NULL) ); float* data = (float*)malloc(sizeof(float)*data_size); cl_memclData = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * data_size, NULL, NULL); CL_SAFE_CALL( clSetKernelArg(squareKernel, 0, sizeof(cl_mem), &clInputData) ); CL_SAFE_CALL( clSetKernelArg(squareKernel, 1, sizeof(cl_mem), &clData) ); CL_SAFE_CALL( clSetKernelArg(squareKernel, 2, sizeof(int), &data_size) ); size_tworkgroupSize = 0; CL_SAFE_CALL( clGetKernelWorkGroupInfo(squareKernel, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(workgroupSize), &workgroupSize, NULL) ); size_tworkSize = data_size; CL_SAFE_CALL( clEnqueueNDRangeKernel(commands, squareKernel, 1, NULL, &workSize, &workgroupSize, 0, NULL, NULL) ); clFinish(commands); CL_SAFE_CALL( clEnqueueReadBuffer(commands, clData, CL_TRUE, 0, sizeof(float) * data_size, data, 0, NULL, NULL) ); int wrong = 0; for(int i = 0; i < data_size; ++i){ if(data[i] != inputData[i] * inputData[i]){ wrong++; } } std::cout << "Wrong squares: " << wrong << std::endl; clReleaseKernel(squareKernel); free(data); free(inputData); }

  20. Adatfeldolgozás (programs.cl) __kernel void square(__global float* inputData, __global float* outputData, const intdata_size){ int id = get_global_id(0); if(id < data_size){ outputData[id] = inputData[id] * inputData[id]; } }

  21. 2D függvény kiértékelés // 2D function void function2D(){ cl_kernel function2DKernel = createKernel(program, "function2D"); const intdata_size[2] = {1024, 1024}; cl_float4* data = (cl_float4*)malloc(sizeof(cl_float4) * data_size[0] * data_size[1]); cl_memclData = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_float4) * data_size[0] * data_size[1], NULL, NULL); CL_SAFE_CALL( clSetKernelArg(function2DKernel, 0, sizeof(cl_mem), &clData) ); size_tworkSize[2] = { data_size[0], data_size[1] }; CL_SAFE_CALL( clEnqueueNDRangeKernel(commands, function2DKernel, 2, NULL, workSize, NULL, 0, NULL, NULL) ); clFinish(commands); CL_SAFE_CALL( clEnqueueReadBuffer(commands, clData, CL_TRUE, 0, sizeof(cl_float4) * data_size[0] * data_size[1], data, 0, NULL, NULL) ); FILE* outFile = fopen("function2D.txt", "w"); for(int i = 0; i < data_size[0] * data_size[1]; ++i){ fprintf(outFile, "%f %f %f\n", data[i].x, data[i].y, data[i].z); } fclose(outFile); clReleaseKernel(function2DKernel); free(data); }

  22. 2D függvény kiértékelés (programs.cl) __kernel void function2D(__global float4* data){ int2 id = (int2)(get_global_id(0), get_global_id(1)); int2 globalSize = (int2)(get_global_size(0), get_global_size(1)); float2 point = (float2)(id.x / (float)globalSize.x * 6.0, id.y / (float)globalSize.y * 6.0f); data[id.x + id.y * globalSize.x] = (float4)(id.x, id.y, sin(point.x) * cos(point.y), 0); }

  23. 2D függvény kiértékelés • GNUPlot • splot ‘function2D.txt’ every 1000 using 1:2:3 with dots

