API Functions of `xf::sparse`¶

xBarCol¶

#include "xf_sparse/cscmv.hpp"

template <
    unsigned int t_LogParEntries,
    typename t_DataType,
    typename t_IndexType = unsigned int,
    unsigned int t_DataBits = 32,
    unsigned int t_IndexBits = 32
    >
void xBarCol (
    const unsigned int p_colPtrBlocks,
    const unsigned int p_nnzBlocks
    )

xBarCol function that distribute input col values to the dedicated banks according to their col index pointers

Parameters:

t_LogParEntries	log2 of the number of entries in the input/output vector stream
t_DataType	the data type of the matrix and vector entries
t_IndexType	the data type of the indicies
t_DataBits	the number of bits for storing the data
t_IndexBits	the number of bits for storing the indices
p_colPtrBlocks	the number of col index pointer vectors
p_nnzBlocks	the number of NNZ vector blocks
p_colPtrStr	the input col pointer vector stream
p_colValStr	the input col value vector stream
p_nnzColValStr	the output banked col value vector stream

xBarRow¶

#include "xf_sparse/cscmv.hpp"

template <
    unsigned int t_LogParEntries,
    typename t_DataType,
    typename t_IndexType = unsigned int,
    unsigned int t_DataBits = 32,
    unsigned int t_IndexBits = 32
    >
void xBarRow (const unsigned int p_nnzBlocks)

xBarRow function that multiplies input NNZs’ values with input vectors and distributes the results to the dedicated banks according to their row index pointers

Parameters:

t_LogParEntries	log2 of the number of entries in the input/output vector stream
t_DataType	the data type of the matrix and vector entries
t_IndexType	the data type of the indicies
t_DataBits	the number of bits for storing the data
t_IndexBits	the number of bits for storing the indices
p_nnzBlocks	the number of NNZ vector blocks
p_nnzValStr	the input NNZ value vector stream
p_nnzColValStr	the input col value vector stream
p_rowIndexStr	the inpuut NNZ row index vector stream
p_rowEntryStr	the output banked multiplication results stream array
p_isEndStr	the output control stream

rowInterleave¶

#include "xf_sparse/cscmv.hpp"

template <
    unsigned int t_LogParEntries,
    unsigned int t_LogParGroups,
    typename t_DataType,
    typename t_IndexType = unsigned int,
    unsigned int t_DataBits = 32,
    unsigned int t_IndexBits = 32
    >
void rowInterleave (
    hls::stream <ap_uint <t_DataBits+t_IndexBits>>& p_rowEntryStr,
    hls::stream <ap_uint <1>>& p_isEndStr,
    hls::stream <ap_uint <t_DataBits+t_IndexBits-t_LogParEntries-t_LogParGroups>> p_rowInterleaveStr [1<< t_LogParGroups],
    hls::stream <ap_uint <1>> p_isEndOutStr [1<< t_LogParGroups]
    )

rowInterleave function that interleave the row entries to parallel accumulators

Parameters:

t_LogParEntries	log2 of the number of entries in the input/output vector stream
t_LogParGroups	log2 of the number of parallel accumulation paths
t_DataType	the data type of the matrix and vector entries
t_IndexType	the data type of the indicies
t_DataBits	the number of bits for storing the data
t_IndexBits	the number of bits for storing the indices
p_rowEntryStr	the input row entry stream
p_isEndStr	the input control stream
p_rowInterleaveStr	the output interleaved row entry stream array
p_isEndOutStr	the output control stream

rowRegAcc¶

#include "xf_sparse/cscmv.hpp"

template <
    unsigned int t_LogParEntriess,
    typename t_DataType,
    typename t_IndexType = unsigned int,
    unsigned int t_DataBits = 32,
    unsigned int t_RowOffsetBits = 32
    >
void rowRegAcc (
    hls::stream <ap_uint <t_DataBits+t_RowOffsetBits>>& p_rowEntryStr,
    hls::stream <ap_uint <1>>& p_isEndStr,
    hls::stream <ap_uint <t_DataBits+t_RowOffsetBits>>& p_rowRegAccStr,
    hls::stream <ap_uint <1>>& p_isEndOutStr
    )

rowRegAcc function that returns the accumulated results in the register

Parameters:

t_LogParEntries	log2 of the number of entries in the input/output vector stream
t_DataType	the data type of the matrix and vector entries
t_IndexType	the data type of the indicies
t_DataBits	the number of bits for storing the data
t_RowOffsetBits	the number of bits for storing the row offsets
p_rowEntryStr	the input row entry stream
p_isEndStr	the input control stream
p_rowValStr	the output accumulated row entry stream
p_isEndOutStr	the output control stream

rowAcc¶

#include "xf_sparse/cscmv.hpp"

template <
    unsigned int t_MaxRowBlocks,
    unsigned int t_LogParEntries,
    unsigned int t_LogParGroups,
    typename t_DataType,
    typename t_IndexType = unsigned int,
    unsigned int t_DataBits = 32,
    unsigned int t_RowOffsetBits = 32
    >
void rowAcc (
    const unsigned int p_rowBlocks,
    hls::stream <ap_uint <t_DataBits+t_RowOffsetBits>>& p_rowEntryStr,
    hls::stream <ap_uint <1>>& p_isEndStr,
    hls::stream <ap_uint <t_DataBits>>& p_rowValStr
    )

rowAcc function that returns the accumulated results

Parameters:

t_MaxRowBlocks	the maximum number of row entries buffered onchip per PE
t_LogParEntries	log2 of the number of entries in the input/output vector stream
t_LogParGroups	log2 of the number of parallel accumulation paths
t_DataType	the data type of the matrix and vector entries
t_IndexType	the data type of the indicies
t_DataBits	the number of bits for storing the data
t_IndexBits	the number of bits for storing the indices
t_RowOffsetBits	the number of bits for storing the row offsets
p_rowBlocks	the number of row vectors
p_isEndStr	the input control stream
p_nnzColValStr	the input col vector stream
p_rowIndexStr	the input NNZ index vector stream
p_rowValStr	the output row entry stream

rowAgg¶

#include "xf_sparse/cscmv.hpp"

template <
    unsigned int t_ParEntries,
    unsigned int t_ParGroups,
    typename t_DataType,
    typename t_IndexType,
    unsigned int t_DataBits = 32
    >
void rowAgg (
    const unsigned int p_rowBlocks,
    hls::stream <ap_uint <t_DataBits>> p_rowValStr [t_ParEntries][t_ParGroups],
    hls::stream <ap_uint <t_DataBits*t_ParEntries>>& p_rowAggStr
    )

rowAgg function that aggregates multiple row entry streams into one row vector stream

Parameters:

t_ParEntries	the number of entries in the input/output vector stream
t_ParGroups	the number of parallel accumulation paths
t_DataType	the data type of the matrix and vector entries
t_IndexType	the data type of the indicies
t_DataBits	the number of bits for storing the data
p_rowBlocks	the number of row vectors
p_rowValStr	the iutput row entry stream array
p_rowAggStr	the output aggregated row vector stream

cscRow¶

#include "xf_sparse/cscmv.hpp"

template <
    unsigned int t_MaxRowBlocks,
    unsigned int t_LogParEntries,
    unsigned int t_LogParGroups,
    typename t_DataType,
    typename t_IndexType = unsigned int,
    unsigned int t_DataBits = 32,
    unsigned int t_IndexBits = 32
    >
void cscRow (
    const unsigned int p_nnzBlocks,
    const unsigned int p_rowBlocks
    )

cscRow function that returns the multiplication results of a sparse matrix and a dense vector

Parameters:

t_MaxRowBlocks	the maximum number of row entries buffered onchip per PE
t_LogParEntries	log2 of the number of entries in the input/output vector stream
t_LogParGroups	log2 of the number of parallel accumulation paths
t_DataType	the data type of the matrix and vector entries
t_IndexType	the data type of the indicies
t_DataBits	the number of bits for storing the data
t_IndexBits	the number of bits for storing the indices
p_nnzBlocks	the number of NNZ vectors
p_rowBlocks	the number of row vectors
p_nnzValStr	the input NNZ value vector stream
p_nnzColValStr	the input col vector stream
p_rowIndexStr	the input NNZ index vector stream
p_rowAggStr	the output row vector stream

dispColVec¶

#include "xf_sparse/moverL1.hpp"

template <
    unsigned int t_MaxColParBlocks,
    unsigned int t_HbmChannels,
    unsigned int t_ParEntries,
    unsigned int t_DataBits
    >
void dispColVec (
    const unsigned int t_chId,
    hls::stream <ap_uint <32>>& p_paramStr,
    hls::stream <ap_uint <t_DataBits*t_ParEntries>>& p_datStr,
    hls::stream <ap_uint <32>>& p_paramFwdStr,
    hls::stream <ap_uint <t_DataBits*t_ParEntries>>& p_datFwdStr,
    hls::stream <ap_uint <32>>& p_paramOutStr,
    hls::stream <ap_uint <t_DataBits*t_ParEntries>>& p_datOutStr
    )

dispColVec function that forward and copy input column vector and parameter streams

Parameters:

t_MaxColParBlocks	the maximum number of parallel processed column vectors buffered in on-chip memory
t_HbmChannels	number of HBM channels
t_ParEntries	parallelly process entries
t_DataBits	number of bits used to store each entry
t_chId	constant HBM channel ID
p_paramStr	32-bit input parameter stream
p_datStr	input vector stream
p_paramOutStr	an forwarded output 32-bit parameter streams
p_datOutStr	an forwarded column vector streams
p_paramOutStr	an copied output 32-bit parameter streams
p_datOutStr	an copied column vector streams

dispCol¶

#include "xf_sparse/moverL1.hpp"

template <
    unsigned int t_MaxColParBlocks,
    unsigned int t_HbmChannels,
    unsigned int t_ParEntries,
    unsigned int t_DataBits
    >
void dispCol (
    hls::stream <ap_uint <32>>& p_paramStr,
    hls::stream <ap_uint <t_DataBits*t_ParEntries>>& p_datStr,
    hls::stream <ap_uint <32>> p_paramOutStr [t_HbmChannels],
    hls::stream <ap_uint <t_DataBits*t_ParEntries>> p_datOutStr [t_HbmChannels]
    )

dispCol function that dispatchs input column vectors accross parallel cscmv engines

Parameters:

t_MaxColParBlocks	the maximum number of parallel processed column vectors buffered in each cscmv engine
t_HbmChannels	number of HBM channels
t_ParEntries	parallelly process entries
t_DataBits	number of bits used to store each entry
p_paramStr	32-bit input parameter stream
p_datStr	input vector stream
p_paramOutStr	an output array of 32-bit parameter streams
p_datOutStr	an output array of column vector streams

dispNnzCol¶

#include "xf_sparse/moverL1.hpp"

template <
    unsigned int t_MaxColParBlocks,
    unsigned int t_HbmChannels,
    unsigned int t_ParEntries,
    unsigned int t_DataBits
    >
void dispNnzCol (
    hls::stream <ap_uint <32>>& p_paramStr,
    hls::stream <ap_uint <t_DataBits*t_ParEntries>>& p_datStr,
    hls::stream <ap_uint <32>> p_paramOutStr [t_HbmChannels],
    hls::stream <ap_uint <t_DataBits*t_ParEntries>> p_datOutStr [t_HbmChannels]
    )

dispNnzCol function that dispatchs NNZ Col pointer vectors accross parallel cscmv engines

Parameters:

t_MaxColParBlocks	the maximum number of parallel processed column vectors buffered in each cscmv engine
t_HbmChannels	number of HBM channels
t_ParEntries	parallelly process entries
t_DataBits	number of bits used to store each entry
p_paramStr	32-bit input parameter stream
p_datStr	input vector stream
p_paramOutStr	an output array of 32-bit parameter streams
p_datOutStr	an output array of vector streams

API Functions of xf::sparse¶

xBarCol¶

xBarRow¶

rowInterleave¶

rowRegAcc¶

rowAcc¶

rowAgg¶

cscRow¶

dispColVec¶

dispCol¶

dispNnzCol¶

API Functions of `xf::sparse`¶