API Functions of xf::sparse

xBarCol

#include "xf_sparse/cscmv.hpp"
template <
    unsigned int t_LogParEntries,
    typename t_DataType,
    typename t_IndexType = unsigned int,
    unsigned int t_DataBits = 32,
    unsigned int t_IndexBits = 32
    >
void xBarCol (
    const unsigned int p_colPtrBlocks,
    const unsigned int p_nnzBlocks
    )

xBarCol function that distribute input col values to the dedicated banks according to their col index pointers

Parameters:

t_LogParEntries log2 of the number of entries in the input/output vector stream
t_DataType the data type of the matrix and vector entries
t_IndexType the data type of the indicies
t_DataBits the number of bits for storing the data
t_IndexBits the number of bits for storing the indices
p_colPtrBlocks the number of col index pointer vectors
p_nnzBlocks the number of NNZ vector blocks
p_colPtrStr the input col pointer vector stream
p_colValStr the input col value vector stream
p_nnzColValStr the output banked col value vector stream

xBarRow

#include "xf_sparse/cscmv.hpp"
template <
    unsigned int t_LogParEntries,
    typename t_DataType,
    typename t_IndexType = unsigned int,
    unsigned int t_DataBits = 32,
    unsigned int t_IndexBits = 32
    >
void xBarRow (const unsigned int p_nnzBlocks)

xBarRow function that multiplies input NNZs’ values with input vectors and distributes the results to the dedicated banks according to their row index pointers

Parameters:

t_LogParEntries log2 of the number of entries in the input/output vector stream
t_DataType the data type of the matrix and vector entries
t_IndexType the data type of the indicies
t_DataBits the number of bits for storing the data
t_IndexBits the number of bits for storing the indices
p_nnzBlocks the number of NNZ vector blocks
p_nnzValStr the input NNZ value vector stream
p_nnzColValStr the input col value vector stream
p_rowIndexStr the inpuut NNZ row index vector stream
p_rowEntryStr the output banked multiplication results stream array
p_isEndStr the output control stream

rowInterleave

#include "xf_sparse/cscmv.hpp"
template <
    unsigned int t_LogParEntries,
    unsigned int t_LogParGroups,
    typename t_DataType,
    typename t_IndexType = unsigned int,
    unsigned int t_DataBits = 32,
    unsigned int t_IndexBits = 32
    >
void rowInterleave (
    hls::stream <ap_uint <t_DataBits+t_IndexBits>>& p_rowEntryStr,
    hls::stream <ap_uint <1>>& p_isEndStr,
    hls::stream <ap_uint <t_DataBits+t_IndexBits-t_LogParEntries-t_LogParGroups>> p_rowInterleaveStr [1<< t_LogParGroups],
    hls::stream <ap_uint <1>> p_isEndOutStr [1<< t_LogParGroups]
    )

rowInterleave function that interleave the row entries to parallel accumulators

Parameters:

t_LogParEntries log2 of the number of entries in the input/output vector stream
t_LogParGroups log2 of the number of parallel accumulation paths
t_DataType the data type of the matrix and vector entries
t_IndexType the data type of the indicies
t_DataBits the number of bits for storing the data
t_IndexBits the number of bits for storing the indices
p_rowEntryStr the input row entry stream
p_isEndStr the input control stream
p_rowInterleaveStr the output interleaved row entry stream array
p_isEndOutStr the output control stream

rowRegAcc

#include "xf_sparse/cscmv.hpp"
template <
    unsigned int t_LogParEntriess,
    typename t_DataType,
    typename t_IndexType = unsigned int,
    unsigned int t_DataBits = 32,
    unsigned int t_RowOffsetBits = 32
    >
void rowRegAcc (
    hls::stream <ap_uint <t_DataBits+t_RowOffsetBits>>& p_rowEntryStr,
    hls::stream <ap_uint <1>>& p_isEndStr,
    hls::stream <ap_uint <t_DataBits+t_RowOffsetBits>>& p_rowRegAccStr,
    hls::stream <ap_uint <1>>& p_isEndOutStr
    )

rowRegAcc function that returns the accumulated results in the register

Parameters:

t_LogParEntries log2 of the number of entries in the input/output vector stream
t_DataType the data type of the matrix and vector entries
t_IndexType the data type of the indicies
t_DataBits the number of bits for storing the data
t_RowOffsetBits the number of bits for storing the row offsets
p_rowEntryStr the input row entry stream
p_isEndStr the input control stream
p_rowValStr the output accumulated row entry stream
p_isEndOutStr the output control stream

rowAcc

#include "xf_sparse/cscmv.hpp"
template <
    unsigned int t_MaxRowBlocks,
    unsigned int t_LogParEntries,
    unsigned int t_LogParGroups,
    typename t_DataType,
    typename t_IndexType = unsigned int,
    unsigned int t_DataBits = 32,
    unsigned int t_RowOffsetBits = 32
    >
void rowAcc (
    const unsigned int p_rowBlocks,
    hls::stream <ap_uint <t_DataBits+t_RowOffsetBits>>& p_rowEntryStr,
    hls::stream <ap_uint <1>>& p_isEndStr,
    hls::stream <ap_uint <t_DataBits>>& p_rowValStr
    )

rowAcc function that returns the accumulated results

Parameters:

t_MaxRowBlocks the maximum number of row entries buffered onchip per PE
t_LogParEntries log2 of the number of entries in the input/output vector stream
t_LogParGroups log2 of the number of parallel accumulation paths
t_DataType the data type of the matrix and vector entries
t_IndexType the data type of the indicies
t_DataBits the number of bits for storing the data
t_IndexBits the number of bits for storing the indices
t_RowOffsetBits the number of bits for storing the row offsets
p_rowBlocks the number of row vectors
p_isEndStr the input control stream
p_nnzColValStr the input col vector stream
p_rowIndexStr the input NNZ index vector stream
p_rowValStr the output row entry stream

rowAgg

#include "xf_sparse/cscmv.hpp"
template <
    unsigned int t_ParEntries,
    unsigned int t_ParGroups,
    typename t_DataType,
    typename t_IndexType,
    unsigned int t_DataBits = 32
    >
void rowAgg (
    const unsigned int p_rowBlocks,
    hls::stream <ap_uint <t_DataBits>> p_rowValStr [t_ParEntries][t_ParGroups],
    hls::stream <ap_uint <t_DataBits*t_ParEntries>>& p_rowAggStr
    )

rowAgg function that aggregates multiple row entry streams into one row vector stream

Parameters:

t_ParEntries the number of entries in the input/output vector stream
t_ParGroups the number of parallel accumulation paths
t_DataType the data type of the matrix and vector entries
t_IndexType the data type of the indicies
t_DataBits the number of bits for storing the data
p_rowBlocks the number of row vectors
p_rowValStr the iutput row entry stream array
p_rowAggStr the output aggregated row vector stream

cscRow

#include "xf_sparse/cscmv.hpp"
template <
    unsigned int t_MaxRowBlocks,
    unsigned int t_LogParEntries,
    unsigned int t_LogParGroups,
    typename t_DataType,
    typename t_IndexType = unsigned int,
    unsigned int t_DataBits = 32,
    unsigned int t_IndexBits = 32
    >
void cscRow (
    const unsigned int p_nnzBlocks,
    const unsigned int p_rowBlocks
    )

cscRow function that returns the multiplication results of a sparse matrix and a dense vector

Parameters:

t_MaxRowBlocks the maximum number of row entries buffered onchip per PE
t_LogParEntries log2 of the number of entries in the input/output vector stream
t_LogParGroups log2 of the number of parallel accumulation paths
t_DataType the data type of the matrix and vector entries
t_IndexType the data type of the indicies
t_DataBits the number of bits for storing the data
t_IndexBits the number of bits for storing the indices
p_nnzBlocks the number of NNZ vectors
p_rowBlocks the number of row vectors
p_nnzValStr the input NNZ value vector stream
p_nnzColValStr the input col vector stream
p_rowIndexStr the input NNZ index vector stream
p_rowAggStr the output row vector stream

dispColVec

#include "xf_sparse/moverL1.hpp"
template <
    unsigned int t_MaxColParBlocks,
    unsigned int t_HbmChannels,
    unsigned int t_ParEntries,
    unsigned int t_DataBits
    >
void dispColVec (
    const unsigned int t_chId,
    hls::stream <ap_uint <32>>& p_paramStr,
    hls::stream <ap_uint <t_DataBits*t_ParEntries>>& p_datStr,
    hls::stream <ap_uint <32>>& p_paramFwdStr,
    hls::stream <ap_uint <t_DataBits*t_ParEntries>>& p_datFwdStr,
    hls::stream <ap_uint <32>>& p_paramOutStr,
    hls::stream <ap_uint <t_DataBits*t_ParEntries>>& p_datOutStr
    )

dispColVec function that forward and copy input column vector and parameter streams

Parameters:

t_MaxColParBlocks the maximum number of parallel processed column vectors buffered in on-chip memory
t_HbmChannels number of HBM channels
t_ParEntries parallelly process entries
t_DataBits number of bits used to store each entry
t_chId constant HBM channel ID
p_paramStr 32-bit input parameter stream
p_datStr input vector stream
p_paramOutStr an forwarded output 32-bit parameter streams
p_datOutStr an forwarded column vector streams
p_paramOutStr an copied output 32-bit parameter streams
p_datOutStr an copied column vector streams

dispCol

#include "xf_sparse/moverL1.hpp"
template <
    unsigned int t_MaxColParBlocks,
    unsigned int t_HbmChannels,
    unsigned int t_ParEntries,
    unsigned int t_DataBits
    >
void dispCol (
    hls::stream <ap_uint <32>>& p_paramStr,
    hls::stream <ap_uint <t_DataBits*t_ParEntries>>& p_datStr,
    hls::stream <ap_uint <32>> p_paramOutStr [t_HbmChannels],
    hls::stream <ap_uint <t_DataBits*t_ParEntries>> p_datOutStr [t_HbmChannels]
    )

dispCol function that dispatchs input column vectors accross parallel cscmv engines

Parameters:

t_MaxColParBlocks the maximum number of parallel processed column vectors buffered in each cscmv engine
t_HbmChannels number of HBM channels
t_ParEntries parallelly process entries
t_DataBits number of bits used to store each entry
p_paramStr 32-bit input parameter stream
p_datStr input vector stream
p_paramOutStr an output array of 32-bit parameter streams
p_datOutStr an output array of column vector streams

dispNnzCol

#include "xf_sparse/moverL1.hpp"
template <
    unsigned int t_MaxColParBlocks,
    unsigned int t_HbmChannels,
    unsigned int t_ParEntries,
    unsigned int t_DataBits
    >
void dispNnzCol (
    hls::stream <ap_uint <32>>& p_paramStr,
    hls::stream <ap_uint <t_DataBits*t_ParEntries>>& p_datStr,
    hls::stream <ap_uint <32>> p_paramOutStr [t_HbmChannels],
    hls::stream <ap_uint <t_DataBits*t_ParEntries>> p_datOutStr [t_HbmChannels]
    )

dispNnzCol function that dispatchs NNZ Col pointer vectors accross parallel cscmv engines

Parameters:

t_MaxColParBlocks the maximum number of parallel processed column vectors buffered in each cscmv engine
t_HbmChannels number of HBM channels
t_ParEntries parallelly process entries
t_DataBits number of bits used to store each entry
p_paramStr 32-bit input parameter stream
p_datStr input vector stream
p_paramOutStr an output array of 32-bit parameter streams
p_datOutStr an output array of vector streams