MobinMQL/Scripts/Examples/OpenCL/Double/MatrixMult.mq5
2025-07-22 14:48:34 +03:00

225 lines
8.2 KiB
MQL5

//+------------------------------------------------------------------+
//| MatrixMult.mq5 |
//| Copyright 2000-2025, MetaQuotes Ltd. |
//| https://www.mql5.com |
//+------------------------------------------------------------------+
#property copyright "Copyright 2000-2025, MetaQuotes Ltd."
#property link "https://www.mql5.com"
#property version "1.00"
#include <OpenCL/OpenCL.mqh>
//--- OpenCL kernels
#resource "Kernels/matrixmult.cl" as string cl_program
#define BLOCK_SIZE 10
//+------------------------------------------------------------------+
//| MatrixMult_CPU |
//+------------------------------------------------------------------+
bool MatrixMult_CPU(const double &matrix_a[],const double &matrix_b[],double &matrix_c[],
const int rows_a,const int cols_a,const int cols_b,ulong &time_cpu)
{
int size=rows_a*cols_b;
if(ArrayResize(matrix_c,size)!=size)
return(false);
//--- CPU calculation started
time_cpu=GetMicrosecondCount();
for(int i=0; i<rows_a; i++)
{
for(int j=0; j<cols_b; j++)
{
double sum=0.0;
for(int k=0; k<cols_a; k++)
{
sum+=matrix_a[cols_a*i+k]*matrix_b[cols_b*k+j];
}
matrix_c[cols_b*i+j]=sum;
}
}
//--- CPU calculation finished
time_cpu=ulong((GetMicrosecondCount()-time_cpu)/1000);
//---
return(true);
}
//+------------------------------------------------------------------+
//| MatrixMult_GPU |
//+------------------------------------------------------------------+
bool MatrixMult_GPU(const double &matrix_a[],const double &matrix_b[],double &matrix1_c[],double &matrix2_c[],
const int rows_a,const int cols_a,const int cols_b,const int size_a,const int size_b,
const int size_c,ulong &time1_gpu,ulong &time2_gpu)
{
const int task_dimension=2;
//--- prepare matrices for result
if(ArrayResize(matrix1_c,size_c)!=size_c || ArrayResize(matrix2_c,size_c)!=size_c)
return(false);
ArrayFill(matrix1_c,0,size_c,(double)0.0);
ArrayFill(matrix2_c,0,size_c,(double)0.0);
//--- OpenCL
COpenCL OpenCL;
if(!OpenCL.Initialize(cl_program,true))
{
PrintFormat("Error in OpenCL initialization. Error code=%d",GetLastError());
return(false);
}
//--- check support working with double
if(!OpenCL.SupportDouble())
{
PrintFormat("Working with double (cl_khr_fp64) is not supported on the device.");
return(false);
}
//--- create kernels
OpenCL.SetKernelsCount(2);
OpenCL.KernelCreate(0,"MatrixMult_GPU1");
OpenCL.KernelCreate(1,"MatrixMult_GPU2");
//--- create buffers
OpenCL.SetBuffersCount(3);
//---
if(!OpenCL.BufferFromArray(0,matrix_a,0,size_a,CL_MEM_READ_ONLY))
{
PrintFormat("Error in BufferFromArray for matrix A. Error code=%d",GetLastError());
return(false);
}
if(!OpenCL.BufferFromArray(1,matrix_b,0,size_b,CL_MEM_READ_ONLY))
{
PrintFormat("Error in BufferFromArray for matrix B. Error code=%d",GetLastError());
return(false);
}
if(!OpenCL.BufferCreate(2,size_c*sizeof(double),CL_MEM_WRITE_ONLY))
{
PrintFormat("Error in BufferCreate for matrix C. Error code=%d",GetLastError());
return(false);
}
//--- prepare arguments for kernel 0
int kernel_index=0;
OpenCL.SetArgumentBuffer(kernel_index,0,0);
OpenCL.SetArgumentBuffer(kernel_index,1,1);
OpenCL.SetArgumentBuffer(kernel_index,2,2);
OpenCL.SetArgument(kernel_index,3,rows_a);
OpenCL.SetArgument(kernel_index,4,cols_a);
OpenCL.SetArgument(kernel_index,5,cols_b);
//--- set task dimension a_rows x b_cols
uint global_work_size[2];
//--- set dimensions
global_work_size[0]=rows_a;
global_work_size[1]=cols_b;
uint global_work_offset[2]={0,0};
//--- GPU calculation start kernel 0
time1_gpu=GetMicrosecondCount();
if(!OpenCL.Execute(kernel_index,task_dimension,global_work_offset,global_work_size))
{
PrintFormat("Error in Execute. Error code=%d",GetLastError());
return(false);
}
if(!OpenCL.BufferRead(2,matrix1_c,0,0,size_c))
{
PrintFormat("Error in BufferRead for matrix1 C. Error code=%d",GetLastError());
return(false);
}
//--- GPU calculation finished
time1_gpu=ulong((GetMicrosecondCount()-time1_gpu)/1000);
//--- prepare arguments for kernel 1
kernel_index=1;
//--- set arguments
OpenCL.SetArgumentBuffer(kernel_index,0,0);
OpenCL.SetArgumentBuffer(kernel_index,1,1);
OpenCL.SetArgumentBuffer(kernel_index,2,2);
OpenCL.SetArgument(kernel_index,3,rows_a);
OpenCL.SetArgument(kernel_index,4,cols_a);
OpenCL.SetArgument(kernel_index,5,cols_b);
uint local_work_size[2];
local_work_size[0]=BLOCK_SIZE;
local_work_size[1]=BLOCK_SIZE;
//--- GPU calculation start, kernel1
time2_gpu=GetMicrosecondCount();
if(!OpenCL.Execute(kernel_index,task_dimension,global_work_offset,global_work_size,local_work_size))
{
PrintFormat("Error in Execute. Error code=%d",GetLastError());
return(false);
}
if(!OpenCL.BufferRead(2,matrix2_c,0,0,size_c))
{
PrintFormat("Error in BufferRead for matrix2 C. Error code=%d",GetLastError());
return(false);
}
//--- GPU calculation finished
time2_gpu=ulong((GetMicrosecondCount()-time2_gpu)/1000);
//--- remove OpenCL objects
OpenCL.Shutdown();
//---
return(true);
}
//+------------------------------------------------------------------+
//| Script program start function |
//+------------------------------------------------------------------+
void OnStart()
{
//--- matrix A 1000x2000
int rows_a=1000;
int cols_a=2000;
//--- matrix B 2000x1000
int rows_b=cols_a;
int cols_b=1000;
//--- matrix C 1000x1000
int rows_c=rows_a;
int cols_c=cols_b;
//--- matrix A: size=rows_a*cols_a
int size_a=rows_a*cols_a;
int size_b=rows_b*cols_b;
int size_c=rows_c*cols_c;
//--- prepare matrix A
double matrix_a[];
ArrayResize(matrix_a,rows_a*cols_a);
for(int i=0; i<rows_a; i++)
for(int j=0; j<cols_a; j++)
{
matrix_a[i*cols_a+j]=(double)(10*MathRand()/32767);
}
//--- prepare matrix B
double matrix_b[];
ArrayResize(matrix_b,rows_b*cols_b);
for(int i=0; i<rows_b; i++)
for(int j=0; j<cols_b; j++)
{
matrix_b[i*cols_b+j]=(double)(10*MathRand()/32767);
}
//--- CPU: calculate matrix product matrix_a*matrix_b
double matrix_c_cpu[];
ulong time_cpu=0;
if(!MatrixMult_CPU(matrix_a,matrix_b,matrix_c_cpu,rows_a,cols_a,cols_b,time_cpu))
{
PrintFormat("Error in calculation on CPU. Error code=%d",GetLastError());
return;
}
//--- calculate matrix product using GPU
double matrix_c_gpu_method1[];
double matrix_c_gpu_method2[];
ulong time_gpu_method1=0;
ulong time_gpu_method2=0;
if(!MatrixMult_GPU(matrix_a,matrix_b,matrix_c_gpu_method1,matrix_c_gpu_method2,rows_a,cols_a,cols_b,size_a,size_b,size_c,time_gpu_method1,time_gpu_method2))
{
PrintFormat("Error in calculation on GPU. Error code=%d",GetLastError());
return;
}
//--- calculate CPU/GPU ratio
double CPU_GPU_ratio1=0;
double CPU_GPU_ratio2=0;
if(time_gpu_method1!=0)
CPU_GPU_ratio1=1.0*time_cpu/time_gpu_method1;
if(time_gpu_method2!=0)
CPU_GPU_ratio2=1.0*time_cpu/time_gpu_method2;
PrintFormat("time CPU=%d ms, time GPU global work groups =%d ms, CPU/GPU ratio: %f",time_cpu,time_gpu_method1,CPU_GPU_ratio1);
PrintFormat("time CPU=%d ms, time GPU local work groups =%d ms, CPU/GPU ratio: %f",time_cpu,time_gpu_method2,CPU_GPU_ratio2);
//--- check calculations
double total_error1=0;
double total_error2=0;
for(int i=0; i<rows_c; i++)
{
for(int j=0; j<cols_c; j++)
{
int pos=cols_c*i+j;
total_error1+=MathAbs(matrix_c_gpu_method1[pos]-matrix_c_cpu[pos]);
total_error2+=MathAbs(matrix_c_gpu_method2[pos]-matrix_c_cpu[pos]);
}
}
PrintFormat("Total error for method 1 = %f",total_error1);
PrintFormat("Total error for method 2 = %f",total_error2);
}
//+------------------------------------------------------------------+