API Functions of xf::sparse

xBarCol

#include "xf_sparse/cscmv.hpp"
template <
    unsigned int t_LogParEntries,
    typename t_DataType,
    typename t_IndexType = unsigned int,
    unsigned int t_DataBits = 32,
    unsigned int t_IndexBits = 32
    >
void xBarCol (
    const unsigned int p_colPtrBlocks,
    const unsigned int p_nnzBlocks
    )

xBarCol function that distributes input col values to the dedicated banks according to their col index pointers

Parameters:

t_LogParEntries log2 of the parallelly processed entries in the input/output vector stream
t_DataType the data type of the matrix and vector entries
t_IndexType the data type of the indicies
t_DataBits the number of bits for storing the data
t_IndexBits the number of bits for storing the indices
p_colPtrBlocks the number of col index pointer blocks
p_nnzBlocks the number of NNZ blocks
p_colPtrStr the input col pointer vector stream
p_colValStr the input col value vector stream
p_nnzColValStr the output banked col value vector stream

xBarRow

#include "xf_sparse/cscmv.hpp"
template <
    unsigned int t_LogParEntries,
    typename t_DataType,
    typename t_IndexType = unsigned int,
    unsigned int t_DataBits = 32,
    unsigned int t_IndexBits = 32
    >
void xBarRow (const unsigned int p_nnzBlocks)

xBarRow function that multiplies input NNZs’ values with input vectors and distributes the results to the dedicated banks according to their row indices

Parameters:

t_LogParEntries log2 of the parallelly processed entries in the input/output vector stream
t_DataType the data type of the matrix and vector entries
t_IndexType the data type of the indicies
t_DataBits the number of bits for storing the data
t_IndexBits the number of bits for storing the indices
p_nnzBlocks the number of NNZ blocks
p_nnzValStr the input NNZ value stream
p_nnzColValStr the input col value stream
p_rowIndexStr the inpuut NNZ row index stream
p_rowEntryStr the output banked multiplication results stream array
p_isEndStr the output control stream

rowAgg

#include "xf_sparse/cscmv.hpp"
template <
    unsigned int t_ParEntries,
    unsigned int t_ParGroups,
    typename t_DataType,
    typename t_IndexType,
    unsigned int t_DataBits = 32
    >
void rowAgg (
    const unsigned int p_rowBlocks,
    hls::stream <ap_uint <t_DataBits>> p_rowValStr [t_ParEntries],
    hls::stream <ap_uint <t_DataBits*t_ParEntries>>& p_rowAggStr
    )

rowAgg function that aggregates multiple row entry streams into one row entry stream

Parameters:

t_ParEntries the parallelly processed entries in the input/output vector stream
t_ParGroups the number of parallel accumulation paths
t_DataType the data type of the matrix and vector entries
t_IndexType the data type of the indicies
t_DataBits the number of bits for storing the data
p_rowBlocks the number of row blocks
p_rowValStr the iutput row entry stream array
p_rowAggStr the output aggregated row entry stream

cscRow

#include "xf_sparse/cscmv.hpp"
template <
    unsigned int t_MaxRowBlocks,
    unsigned int t_LogParEntries,
    unsigned int t_LogParGroups,
    typename t_DataType,
    typename t_IndexType = unsigned int,
    unsigned int t_DataBits = 32,
    unsigned int t_IndexBits = 32
    >
void cscRow (
    const unsigned int p_nnzBlocks,
    const unsigned int p_rowBlocks
    )

cscRow function that returns the multiplication results of a sparse matrix and a dense vector

Parameters:

t_MaxRowBlocks the maximum number of row entrie blocks buffered onchip per PE
t_LogParEntries log2 of the parallelly processed entries in the input/output vector stream
t_LogParGroups log2 of the number of parallel accumulation paths
t_DataType the data type of the matrix and vector entries
t_IndexType the data type of the indicies
t_DataBits the number of bits for storing the data
t_IndexBits the number of bits for storing the indices
p_nnzBlocks the number of NNZ vector blocks
p_rowBlocks the number of result row vector blocks
p_nnzValStr the input NNZ value vector stream
p_nnzColValStr the input col vector stream
p_rowIndexStr the input NNZ index vector stream
p_rowAggStr the output row vector stream

dispColVec

#include "xf_sparse/moverL1.hpp"
template <
    unsigned int t_MaxColParBlocks,
    unsigned int t_ParBlocks4Param,
    unsigned int t_HbmChannels,
    unsigned int t_ParEntries,
    unsigned int t_DataBits
    >
void dispColVec (
    const unsigned int t_chId,
    hls::stream <ap_uint <t_DataBits*t_ParEntries>>& p_datStr,
    hls::stream <ap_uint <t_DataBits*t_ParEntries>>& p_datFwdStr,
    hls::stream <ap_uint <t_DataBits*t_ParEntries>>& p_datOutStr
    )

dispColVec function that forward and copy input column vector and parameters

Parameters:

t_MaxColParBlocks the maximum number of parallel processed column blocks buffered in on-chip memory
t_ParBlocks4Param the number of parallelly processed parameter blocks
t_HbmChannels number of HBM channels
t_ParEntries parallelly processed entries
t_DataBits number of bits used to store each entry
t_chId constant HBM channel ID
p_datStr input vector stream
p_datFwdStr an forwarded parameter and column vector streams
p_datOutStr an copied parameter and column vector streams

dispCol

#include "xf_sparse/moverL1.hpp"
template <
    unsigned int t_MaxColParBlocks,
    unsigned int t_ParBlocks4Param,
    unsigned int t_HbmChannels,
    unsigned int t_ParEntries,
    unsigned int t_DataBits
    >
void dispCol (
    hls::stream <ap_uint <t_DataBits*t_ParEntries>>& p_datStr,
    hls::stream <ap_uint <t_DataBits*t_ParEntries>> p_datOutStr [t_HbmChannels]
    )

dispCol function that dispatchs input column vectors accross parallel CUs for computing SpMV simultaneously

Parameters:

t_MaxColParBlocks the maximum number of parallelly processed column vector entries in the on-chip buffer
t_ParBlocks4Param the number of parallelly processed parameter blocks
t_HbmChannels number of HBM channels
t_ParEntries parallelly processed entries
t_DataBits number of bits used to store each entry
p_datStr input vector stream
p_datOutStr an output array of column vector streams

dispNnzCol

#include "xf_sparse/moverL1.hpp"
template <
    unsigned int t_MaxColParBlocks,
    unsigned int t_ParBlocks4Param,
    unsigned int t_HbmChannels,
    unsigned int t_ParEntries,
    unsigned int t_DataBits
    >
void dispNnzCol (
    hls::stream <ap_uint <t_DataBits*t_ParEntries>>& p_datStr,
    hls::stream <ap_uint <t_DataBits*t_ParEntries>> p_datOutStr [t_HbmChannels]
    )

dispNnzCol function that dispatchs NNZ Col pointer entries accross parallel compute CUs

Parameters:

t_MaxColParBlocks the maximum number of parallelly processed column entries in the on-chip buffer
t_ParBlocks4Param the number of parallelly processed parameter blocks
t_HbmChannels number of HBM channels
t_ParEntries parallelly processed entries
t_DataBits number of bits used to store each entry
p_datStr input vector stream
p_datOutStr an output array of vector streams