L3 API GEMM example
1. xfblasGemm - matrix-matrix multiplication
#include "xf_blas.hpp"
# define IDX2R(i, j, ld) (((i) * (ld)) + (j))
# define m 5 // a - mxk matrix
# define n 5 // b - kxn matrix
# define k 5 // c - mxn matrix
using namespace std;
int main(int argc, char **argv) {
if (argc < 3){
cerr << " usage: \n"
<< " gemm_common_test.exe gemx.xclbin config_info.dat 1\n"
<< " gemm_common_test.exe gemx.xclbin config_info.dat\n";
return EXIT_FAILURE;
}
unsigned int l_argIdx = 1;
string l_xclbinFile(argv[l_argIdx++]);
string l_configFile(argv[l_argIdx++]);
string l_logFile;
ofstream logFile("xrt_report.txt");
logFile.close();
l_logFile = "xrt_report.txt";
int l_numKernel = 1;
if (argc == 4){
cout<<"read custom number of kernels\n";
l_numKernel = stoi(argv[l_argIdx++]);
}
int i, j; // i-row index ,j- column index
XFBLAS_dataType * a, * b, * c;
a = ( XFBLAS_dataType *) malloc (m*k* sizeof ( XFBLAS_dataType )); // host memory for a
b = ( XFBLAS_dataType *) malloc (k*n* sizeof ( XFBLAS_dataType ));
c = ( XFBLAS_dataType *) malloc (m*n* sizeof ( XFBLAS_dataType ));
int ind = 1;
for( i = 0; i< m; i ++){
for( j = 0; j < k; j ++){
a[ IDX2R (i,j,k )]=( XFBLAS_dataType ) ind++;
}
}
for( i = 0; i< k; i ++){
for( j = 0; j < n; j ++){
b[ IDX2R (i,j,n )]=( XFBLAS_dataType ) ind++;
}
}
for( i = 0; i< m; i ++){
for( j = 0; j < n; j ++){
c[ IDX2R (i,j,n )]= 0;
}
}
XFBLAS_dataType * d_a, * d_b, * d_c;
xfblasEngine_t engineName = XFBLAS_ENGINE_GEMM;
xfblasStatus_t status = XFBLAS_STATUS_SUCCESS;
status = xfblasCreate(l_xclbinFile.c_str(), l_configFile, l_logFile.c_str(), engineName, l_numKernel);
if (status != XFBLAS_STATUS_SUCCESS) {
cout<<"Create Handle failed with error code: "<< status << "\n";
return EXIT_FAILURE;
}
status = xfblasMalloc(&d_a, m,k,sizeof(*a), l_numKernel-1);
if (status != XFBLAS_STATUS_SUCCESS) {
cout<<"Malloc memory for matrix A failed with error code: "<< status << "\n";
return EXIT_FAILURE;
}
status = xfblasMalloc(&d_b, k,n,sizeof(*b), l_numKernel-1);
if (status != XFBLAS_STATUS_SUCCESS) {
cout<<"Malloc memory for matrix B failed with error code: "<< status << "\n";
return EXIT_FAILURE;
}
status = xfblasMalloc(&d_c, m,n,sizeof(*c), l_numKernel-1);
if (status != XFBLAS_STATUS_SUCCESS) {
cout<<"Malloc memory for matrix C failed with error code: "<< status << "\n";
return EXIT_FAILURE;
}
status = xfblasSetMatrix(m,k,sizeof(*a),a,k,d_a, l_numKernel-1);
status = xfblasSetMatrix(k,n,sizeof(*b),b,n,d_b, l_numKernel-1);
status = xfblasSetMatrix(m,n,sizeof(*c),c,n,d_c, l_numKernel-1);
if (status != XFBLAS_STATUS_SUCCESS) {
cout<<"Set Matrix failed with error code: "<< status << "\n";
return EXIT_FAILURE;
}
status = xfblasGemm(XFBLAS_OP_N, XFBLAS_OP_N, m, n, k, 1, d_a, k, d_b, n, 1, d_c, n, l_numKernel-1);
if (status != XFBLAS_STATUS_SUCCESS) {
cout<<"Matrix Multiplication failed with error code: "<< status << "\n";
return EXIT_FAILURE;
}
status = xfblasGetMatrix(m,n,sizeof(*c),d_c,c,n, l_numKernel-1);
if (status != XFBLAS_STATUS_SUCCESS) {
cout<<"Get Matirx failed with error code: "<< status << "\n";
return EXIT_FAILURE;
}
for ( i = 0; i < m; i ++){
for ( j = 0; j < n; j ++){
cout<< (c[ IDX2R (i,j, k )])<<" ";
}
cout<<"\n";
}
// 590 605 620 635 650
// 1490 1530 1570 1610 1650
// 2390 2455 2520 2585 2650
// 3290 3380 3470 3560 3650
// 4190 4305 4420 4535 4650
xfblasFree(d_a, l_numKernel-1);
xfblasFree(d_b, l_numKernel-1);
xfblasFree(d_c, l_numKernel-1);
xfblasDestroy(l_numKernel);
free(a);
free(b);
free(c);
}
2. xfblasGemm - restricted memory version
#include <iomanip>
#include "xf_blas.hpp"
# define IDX2R(i, j, ld) (((i) * (ld)) + (j))
# define m 128 // a - mxk matrix
# define n 128 // b - kxn matrix
# define k 128 // c - mxn matrix
using namespace std;
int main(int argc, char **argv) {
if (argc < 3){
cerr << " usage: \n"
<< " gemm_test.exe gemx.xclbin config_info.dat 1\n"
<< " gemm_test.exe gemx.xclbin config_info.dat\n";
return EXIT_FAILURE;
}
unsigned int l_argIdx = 1;
string l_xclbinFile(argv[l_argIdx++]);
string l_configFile(argv[l_argIdx++]);
string l_logFile;
ofstream logFile("xrt_report.txt");
logFile.close();
l_logFile = "xrt_report.txt";
int l_numKernel = 1;
if (argc == 4){
cout<<"read custom number of kernels\n";
l_numKernel = stoi(argv[l_argIdx++]);
}
xfblasEngine_t engineName = XFBLAS_ENGINE_GEMM;
xfblasStatus_t status = xfblasCreate(l_xclbinFile.c_str(), l_configFile, l_logFile.c_str(), engineName, l_numKernel);
if (status != XFBLAS_STATUS_SUCCESS) {
cout<<"Create Handle failed with error code: "<< status << "\n";
xfblasDestroy();
return EXIT_FAILURE;
}
int i, j; // i-row l_numKernel -1 ,j- column l_numKernel -1
XFBLAS_dataType * a, * b, * c;
posix_memalign((void** )&a, 4096, m*k* sizeof ( XFBLAS_dataType ));
posix_memalign((void** )&b, 4096, k*n* sizeof ( XFBLAS_dataType ));
posix_memalign((void** )&c, 4096, m*n* sizeof ( XFBLAS_dataType ));
int ind = 1;
for( i = 0; i< m; i ++){
for( j = 0; j < k; j ++){
a[ IDX2R (i,j,k )]= (XFBLAS_dataType) ind++;
}
}
ind = 1;
for( i = 0; i< k; i ++){
for( j = 0; j < n; j ++){
b[ IDX2R (i,j,n )]= (XFBLAS_dataType) ind++;
}
}
for( i = 0; i< m; i ++){
for( j = 0; j < n; j ++){
c[ IDX2R (i,j,n )]= 0;
}
}
status = xfblasMallocRestricted(m,k,sizeof(*a),a,k, l_numKernel-1);
if (status != XFBLAS_STATUS_SUCCESS) {
cout<<"Malloc memory for matrix A failed with error code: "<< status << "\n";
xfblasDestroy();
return EXIT_FAILURE;
}
status = xfblasMallocRestricted(k,n,sizeof(*b),b,n, l_numKernel-1);
if (status != XFBLAS_STATUS_SUCCESS) {
cout<<"Malloc memory for matrix B failed with error code: "<< status << "\n";
xfblasDestroy();
return EXIT_FAILURE;
}
status = xfblasMallocRestricted(m,n,sizeof(*c),c,n, l_numKernel-1);
if (status != XFBLAS_STATUS_SUCCESS) {
cout<<"Malloc memory for matrix C failed with error code: "<< status << "\n";
xfblasDestroy();
return EXIT_FAILURE;
}
status = xfblasSetMatrixRestricted(a, l_numKernel-1);
status = xfblasSetMatrixRestricted(b, l_numKernel-1);
status = xfblasSetMatrixRestricted(c, l_numKernel-1);
if (status != XFBLAS_STATUS_SUCCESS) {
cout<<"Set Matrix failed with error code: "<< status << "\n";
xfblasDestroy();
return EXIT_FAILURE;
}
status = xfblasGemm(XFBLAS_OP_N, XFBLAS_OP_N, m, n, k, 1, a, k, b, n, 1, c, n, l_numKernel-1);
if (status != XFBLAS_STATUS_SUCCESS) {
cout<<"Matrix Multiplication failed with error code: "<< status << "\n";
xfblasDestroy();
return EXIT_FAILURE;
}
status = xfblasGetMatrixRestricted(c, l_numKernel-1);
if (status != XFBLAS_STATUS_SUCCESS) {
cout<<"Get Matirx failed with error code: "<< status << "\n";
xfblasDestroy();
return EXIT_FAILURE;
}
for ( i = 0; i < m; i ++){
for ( j = 0; j < n; j ++){
cout<< (c[ IDX2R (i,j, k )])<<" ";
}
cout<<"\n";
}
xfblasFree(a, l_numKernel-1);
xfblasFree(b, l_numKernel-1);
xfblasFree(c, l_numKernel-1);
free(a);
free(b);
free(c);
xfblasDestroy(l_numKernel);
return EXIT_SUCCESS;
}
3. xfblasGemm - pre-allocated memory version
#include "xf_blas.hpp"
# define IDX2R(i, j, ld) (((i) * (ld)) + (j))
# define m 5 // a - mxk matrix
# define n 5 // b - kxn matrix
# define k 5 // c - mxn matrix
using namespace std;
int main(int argc, char **argv) {
if (argc < 3){
cerr << " usage: \n"
<< " gemm_pre_allocated_test.exe gemx.xclbin config_info.dat\n";
return EXIT_FAILURE;
}
unsigned int l_argIdx = 1;
string l_xclbinFile(argv[l_argIdx++]);
string l_configFile(argv[l_argIdx++]);
string l_logFile;
ofstream logFile("xrt_report.txt");
logFile.close();
l_logFile = "xrt_report.txt";
int i, j; // i-row index ,j- column index
XFBLAS_dataType * a, * b, * c;
int padded_lda, padded_ldb, padded_ldc;
xfblasEngine_t engineName = XFBLAS_ENGINE_GEMM;
xfblasStatus_t status = XFBLAS_STATUS_SUCCESS;
status = xfblasCreate(l_xclbinFile.c_str(), l_configFile, l_logFile.c_str(), engineName);
if (status != XFBLAS_STATUS_SUCCESS) {
cout<<"Create Handle failed with error code: "<< status << "\n";
return EXIT_FAILURE;
}
status = xfblasMallocManaged(&a, &padded_lda, m,k,sizeof(*a));
if (status != XFBLAS_STATUS_SUCCESS) {
cout<<"Malloc memory for matrix A failed with error code: "<< status << "\n";
return EXIT_FAILURE;
}
status = xfblasMallocManaged(&b, &padded_ldb, k,n,sizeof(*b));
if (status != XFBLAS_STATUS_SUCCESS) {
cout<<"Malloc memory for matrix B failed with error code: "<< status << "\n";
return EXIT_FAILURE;
}
status = xfblasMallocManaged(&c, &padded_ldc, m,n,sizeof(*c));
if (status != XFBLAS_STATUS_SUCCESS) {
cout<<"Malloc memory for matrix C failed with error code: "<< status << "\n";
return EXIT_FAILURE;
}
int ind = 1;
for( i = 0; i< m; i ++){
for( j = 0; j < k; j ++){
a[ IDX2R (i,j,padded_lda)]=( XFBLAS_dataType ) ind++;
}
}
for( i = 0; i< k; i ++){
for( j = 0; j < n; j ++){
b[ IDX2R (i,j,padded_ldb )]=( XFBLAS_dataType ) ind++;
}
}
for( i = 0; i< m; i ++){
for( j = 0; j < n; j ++){
c[ IDX2R (i,j,padded_ldc )]= 1;
}
}
cout<< "C before running GEMM\n";
for ( i = 0; i < m; i ++){
for ( j = 0; j < n; j ++){
cout<< (c[ IDX2R (i,j,padded_ldc)])<<" ";
}
cout<<"\n";
}
status = xfblasGemm(XFBLAS_OP_N, XFBLAS_OP_N, m, n, k, 1, a, k, b, n, 1, c, n);
status = xfblasDeviceSynchronize();
if (status != XFBLAS_STATUS_SUCCESS) {
cout<<"Matrix Multiplication failed with error code: "<< status << "\n";
return EXIT_FAILURE;
}
cout<<"C after running GEMM\n";
for ( i = 0; i < m; i ++){
for ( j = 0; j < n; j ++){
cout<< (c[ IDX2R (i,j, padded_ldc)])<<" ";
}
cout<<"\n";
}
// 591 606 621 636 651
// 1491 1531 1571 1611 1651
// 2391 2456 2521 2586 2651
// 3291 3381 3471 3561 3651
// 4191 4306 4421 4536 4651
xfblasFree(a);
xfblasFree(b);
xfblasFree(c);
xfblasDestroy();
}