Source code for xfblas_L3

# Copyright 2019 Xilinx, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from ctypes import *
import numpy as np
import sys
import argparse
import os


[docs]class XFBLASManager: def __init__(self, libFile): self._lib = cdll.LoadLibrary(libFile) self._lib.xfblasCreate.argtypes = [c_char_p, c_char_p, c_uint, c_uint] self._lib.xfblasCreate.restype = c_bool self._lib.xfblasSend.argtypes = [ np.ctypeslib.ndpointer( flags="C_CONTIGUOUS"), c_ulonglong, c_uint, c_uint, c_uint] self._lib.xfblasSend.restype = c_bool self._lib.xfblasGet.argtypes = [ np.ctypeslib.ndpointer( flags="C_CONTIGUOUS"), c_uint, c_uint] self._lib.xfblasGet.restype = c_bool self._lib.xfblasFreeInstr.argtypes = [c_uint, c_uint] self._lib.xfblasDestroy.argtypes = [c_uint, c_uint] self._lib.xfblasFree.argtypes = [ np.ctypeslib.ndpointer( flags="C_CONTIGUOUS"), c_uint, c_uint] self._lib.xfblasGemm.argtypes = [ c_uint, c_uint, c_uint, c_uint, np.ctypeslib.ndpointer( flags="C_CONTIGUOUS"), c_uint, np.ctypeslib.ndpointer( flags="C_CONTIGUOUS"), c_uint, c_uint, np.ctypeslib.ndpointer( flags="C_CONTIGUOUS"), c_uint, c_uint, c_uint] self._lib.xfblasGemm.restype = c_bool self._lib.xfblasGemv.argtypes = [ c_uint, c_uint, c_uint, np.ctypeslib.ndpointer( flags="C_CONTIGUOUS"), c_uint, np.ctypeslib.ndpointer( flags="C_CONTIGUOUS"), c_uint, c_uint, np.ctypeslib.ndpointer( flags="C_CONTIGUOUS"), c_uint, c_uint, c_uint] self._lib.xfblasGemv.restype = c_bool self._lib.xfblasGemm.restype = c_bool self._lib.xfblasGetByAddress.argtypes = [np.ctypeslib.ndpointer( flags="C_CONTIGUOUS"), c_ulonglong, c_uint, c_uint, c_uint] self._lib.xfblasGetByAddress.restype = c_bool self._lib.xfblasExecuteAsync.argtypes = [c_uint, c_uint] self._lib.xfblasExecute.argtypes = [c_uint, c_uint]
[docs] def createGemm(self, xclbin, numKernel, idxDevice): ''' create Gemm Handle Parameters xclbin file path for FPGA bitstream numKernel number of CUs in the xclbin idxDeivce index of local device to be used ''' b_xclbin = xclbin.encode('utf-8') b_log = xclbin.encode('utf-8') return self._lib.xfblasCreate(b_xclbin, b'Gemm', numKernel, idxDevice)
def createGemv(self, xclbin, numKernel, idxDevice): b_xclbin = xclbin.encode('utf-8') b_log = xclbin.encode('utf-8') return self._lib.xfblasCreate(b_xclbin, b'Gemv', numKernel, idxDevice)
[docs] def sendMat(self, A, idxKernel, idxDevice): ''' send mat from host to device Parameters A: ndarray matrix in host memory idxKernel: int index of kernel to be used idxDeivce: int index of local device to be used ''' return self._lib.xfblasSend( A, c_ulonglong( A.size), c_uint( A.itemsize), idxKernel, idxDevice)
[docs] def getMat(self, A, idxKernel, idxDevice): ''' get mat from device to host Parameters A: ndarray matrix in host memory idxKernel: int index of kernel to be used idxDeivce: int index of local device to be used ''' return self._lib.xfblasGet(A, idxKernel, idxDevice)
[docs] def freeInstr(self, idxKernel, idxDevice): ''' free memory for instructions Parameters idxKernel index of kernel to be used idxDeivce index of local device to be used ''' return self._lib.xfblasFreeInstr(idxKernel, idxDevice)
[docs] def freeMat(self, A, idxKernel, idxDevice): ''' free device memory for mat A Parameters A: ndarray matrix in host memory idxKernel: int index of kernel to be used idxDeivce: int index of local device to be used ''' return self._lib.xfblasFree(A, idxKernel, idxDevice)
[docs] def destroy(self, numKernel, idxDevice): ''' release handle used by the XFBLAS library Parameters numKernel number of CUs in the xclbin idxDeivce index of local device to be used ''' return self._lib.xfblasDestroy(numKernel, idxDevice)
[docs] def gemmOp(self, A, B, C, idxKernel, idxDevice): ''' perform matrix-matrix multiplication of C=A*B Parameters A: ndarray matrix in host memory B: ndarray matrix in host memory C: ndarray matrix in host memory idxKernel: int index of kernel to be used idxDeivce: int index of local device to be used ''' return self._lib.xfblasGemm( c_uint( A.shape[0]), c_uint( B.shape[1]), c_uint( A.shape[1]), 1, A, c_uint( A.shape[1]), B, c_uint( B.shape[1]), 1, C, c_uint( B.shape[1]), idxKernel, idxDevice)
def gemvOp(self, A, x, y, idxKernel, idxDevice): return self._lib.xfblasGemv( c_uint( A.shape[0]), c_uint( A.shape[1]), 1, A, c_uint( A.shape[1]), x, 1, y, 1, idxKernel, idxDevice) def getMatByAddress(self, A, offset, idxKernel, idxDevice): return self._lib.xfblasGetByAddress(A, c_ulonglong( A.size * A.itemsize), offset, idxKernel, idxDevice)
[docs] def executeAsync(self, numKernel, idxDevice): ''' run number of kernels async Parameters numKernel number of CUs in the xclbin idxDeivce index of local device to be used ''' return self._lib.xfblasExecuteAsync(numKernel, idxDevice)
[docs] def execute(self, idxKernel, idxDevice): ''' run ith kernel Parameters idxKernel: int index of kernel to be used idxDeivce: int index of local device to be used ''' return self._lib.xfblasExecute(idxKernel, idxDevice)
_xfblasManager = None def createGemm(args, xclbin_opts, numKernel=1, idxDevice=0): if int(xclbin_opts['BLAS_runGemm']) != 1: raise Exception('The xclbin does not include gemm engine.') createManager(args.lib) return _xfblasManager.createGemm(args.xclbin, numKernel, idxDevice) def createGemv(args, xclbin_opts, numKernel=1, idxDevice=0): if int(xclbin_opts['BLAS_runGemv']) != 1: raise Exception('The xclbin does not include gemv engine.') createManager(args.lib) return _xfblasManager.createGemv(args.xclbin, numKernel, idxDevice) def sendMat(A, idxKernel=0, idxDevice=0): return _xfblasManager.sendMat(A, idxKernel, idxDevice) def getMat(A, idxKernel=0, idxDevice=0): return _xfblasManager.getMat(A, idxKernel, idxDevice) def freeInstr(idxKernel=0, idxDevice=0): return _xfblasManager.freeInstr(idxKernel, idxDevice) def freeMat(A, idxKernel=0, idxDevice=0): return _xfblasManager.freeMat(A, idxKernel, idxDevice) def destroy(numKernel=1, idxDevice=0): return _xfblasManager.destroy(numKernel, idxDevice) def gemmOp(A, B, C, idxKernel=0, idxDevice=0): return _xfblasManager.gemmOp(A, B, C, idxKernel, idxDevice) def gemvOp(A, x, y, idxKernel=0, idxDevice=0): return _xfblasManager.gemvOp(A, x, y, idxKernel, idxDevice) def getMatByAddress(A, offset, idxKernel=0, idxDevice=0): return _xfblasManager.getMatByAddress(A, offset, idxKernel, idxDevice) def executeAsync(numKernel=1, idxDevice=0): return _xfblasManager.executeAsync(numKernel, idxDevice) def execute(idxKernel=0, idxDevice=0): return _xfblasManager.execute(idxKernel, idxDevice) def createManager(libFile): global _xfblasManager if not _xfblasManager: _xfblasManager = XFBLASManager(libFile) return True def parse_cfg(filename): myvars = {} with open(filename) as myfile: for line in myfile: for word in line.split(): name, var = word.split("=") myvars[name.strip()] = var.rstrip() return myvars def default_args(): parser = argparse.ArgumentParser(description='xfblas') parser.add_argument( '--xclbin', required=True, help='file path to FPGA bitstream') parser.add_argument( '--lib', required=True, help='file path to xfblas shared library') parser.add_argument( '--cfg', required=True, help='file describing .xclbin properties') return parser def processCommandLine(): parser = default_args() args = parser.parse_args() xclbin_opts = parse_cfg(args.cfg) return args, xclbin_opts