โอเพนซ แอล อ งกฤษ OpenCL Open Computing Language เป นภาษาโปรแกรมท ใช แยกงานจากการประมวลผลตามปรกต ไปให หน วยประมวลผลกราฟ

โอเพนซีแอล (อังกฤษ: OpenCL: Open Computing Language) เป็นภาษาโปรแกรมที่ใช้แยกงานจากการประมวลผลตามปรกติไปให้ หน่วยประมวลผลกราฟิกส์ และ CPU อื่นๆประมวลผลร่วมด้วยเพื่อใช้ GPU ในการประมวลผลอื่นๆนอกเหนือไปจากการประมวลผลคอมพิวเตอร์กราฟิกส์ตามปรกติ พัฒนาขึ้นโดย Khronos Group ร่วมกับบริษัทอื่นๆ มี C99 เป็นพื้นฐาน โดยบริษัทแอปเปิลได้เสนอให้ Khronos Group เป็นตัวกลางเพื่อกำหนด OpenCL เป็นมาตรฐานเพื่อจะได้ใช้งานใน

ตัวอย่างการใช้งาน

ตัวอย่างนี้เป็นการใช้ OpenCL ในการคำนวณ :

// create a compute context with GPU device context = clCreateContextFromType(CL_DEVICE_TYPE_GPU); // create a work-queue queue = clCreateWorkQueue(context, NULL, NULL, 0); // allocate the buffer memory objects memobjs[0] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float)*2*num_entries, srcA); memobjs[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(float)*2*num_entries, NULL); // create the compute program program = clCreateProgramFromSource(context, 1, &fft1D_1024_kernel_src, NULL); // build the compute program executable clBuildProgramExecutable(program, false, NULL, NULL); // create the compute kernel kernel = clCreateKernel(program, "fft1D_1024"); // create N-D range object with work-item dimensions global_work_size[0] = n; local_work_size[0] = 64; range = clCreateNDRangeContainer(context, 0, 1, global_work_size, local_work_size); // set the args values clSetKernelArg(kernel, 0, (void *)&memobjs[0], sizeof(cl_mem), NULL); clSetKernelArg(kernel, 1, (void *)&memobjs[1], sizeof(cl_mem), NULL); clSetKernelArg(kernel, 2, NULL, sizeof(float)*(local_work_size[0]+1)*16, NULL); clSetKernelArg(kernel, 3, NULL, sizeof(float)*(local_work_size[0]+1)*16, NULL);  // execute kernel clExecuteKernel(queue, kernel, NULL, range, NULL, 0, NULL);

ส่วนที่ใช้คำนวณจริง: (อิงจาก Fitting FFT onto the G80 Architecture)

// This kernel computes FFT of length 1024. The 1024 length FFT is decomposed into // calls to a radix 16 function, another radix 16 function and then a radix 4 function __kernel void fft1D_1024 (__global float2 *in, __global float2 *out,  __local float *sMemx, __local float *sMemy) {  int tid = get_local_id(0);  int blockIdx = get_group_id(0) * 1024 + tid;  float2 data[16];  // starting index of data to/from global memory  in = in + blockIdx; out = out + blockIdx;  globalLoads(data, in, 64); // coalesced global reads  fftRadix16Pass(data); // in-place radix-16 pass  twiddleFactorMul(data, tid, 1024, 0);  // local shuffle using local memory  localShuffle(data, sMemx, sMemy, tid, (((tid & 15) * 65) + (tid >> 4)));  fftRadix16Pass(data); // in-place radix-16 pass  twiddleFactorMul(data, tid, 64, 4); // twiddle factor multiplication  localShuffle(data, sMemx, sMemy, tid, (((tid >> 4) * 64) + (tid & 15)));  // four radix-4 function calls  fftRadix4Pass(data); fftRadix4Pass(data + 4);  fftRadix4Pass(data + 8); fftRadix4Pass(data + 12);  // coalesced global writes  globalStores(data, out, 64); }

อ้างอิง

"OpenCL" (PDF). SIGGRAPH2008. 2008-08-14. คลังข้อมูลเก่าเก็บจากแหล่งเดิม (PDF)เมื่อ 2012-03-19. สืบค้นเมื่อ 2008-08-14.
"Fitting FFT onto G80 Architecture" (PDF). Vasily Volkov and Brian Kazian, UC Berkeley CS258 project report. May 2008. สืบค้นเมื่อ 2008-11-14.

[siggraph-1] "OpenCL" (PDF). SIGGRAPH2008. 2008-08-14. คลังข้อมูลเก่าเก็บจากแหล่งเดิม (PDF)เมื่อ 2012-03-19. สืบค้นเมื่อ 2008-08-14.

[VolkovKazianFFTG80-2] "Fitting FFT onto G80 Architecture" (PDF). Vasily Volkov and Brian Kazian, UC Berkeley CS258 project report. May 2008. สืบค้นเมื่อ 2008-11-14.