API Functions of xf::sparse
¶
xBarCol¶
#include "xf_sparse/cscmv.hpp"
template < unsigned int t_LogParEntries, typename t_DataType, typename t_IndexType = unsigned int, unsigned int t_DataBits = 32, unsigned int t_IndexBits = 32 > void xBarCol ( const unsigned int p_colPtrBlocks, const unsigned int p_nnzBlocks )
xBarCol function that distribute input col values to the dedicated banks according to their col index pointers
Parameters:
t_LogParEntries | log2 of the number of entries in the input/output vector stream |
t_DataType | the data type of the matrix and vector entries |
t_IndexType | the data type of the indicies |
t_DataBits | the number of bits for storing the data |
t_IndexBits | the number of bits for storing the indices |
p_colPtrBlocks | the number of col index pointer vectors |
p_nnzBlocks | the number of NNZ vector blocks |
p_colPtrStr | the input col pointer vector stream |
p_colValStr | the input col value vector stream |
p_nnzColValStr | the output banked col value vector stream |
xBarRow¶
#include "xf_sparse/cscmv.hpp"
template < unsigned int t_LogParEntries, typename t_DataType, typename t_IndexType = unsigned int, unsigned int t_DataBits = 32, unsigned int t_IndexBits = 32 > void xBarRow (const unsigned int p_nnzBlocks)
xBarRow function that multiplies input NNZs’ values with input vectors and distributes the results to the dedicated banks according to their row index pointers
Parameters:
t_LogParEntries | log2 of the number of entries in the input/output vector stream |
t_DataType | the data type of the matrix and vector entries |
t_IndexType | the data type of the indicies |
t_DataBits | the number of bits for storing the data |
t_IndexBits | the number of bits for storing the indices |
p_nnzBlocks | the number of NNZ vector blocks |
p_nnzValStr | the input NNZ value vector stream |
p_nnzColValStr | the input col value vector stream |
p_rowIndexStr | the inpuut NNZ row index vector stream |
p_rowEntryStr | the output banked multiplication results stream array |
p_isEndStr | the output control stream |
rowInterleave¶
#include "xf_sparse/cscmv.hpp"
template < unsigned int t_LogParEntries, unsigned int t_LogParGroups, typename t_DataType, typename t_IndexType = unsigned int, unsigned int t_DataBits = 32, unsigned int t_IndexBits = 32 > void rowInterleave ( hls::stream <ap_uint <t_DataBits+t_IndexBits>>& p_rowEntryStr, hls::stream <ap_uint <1>>& p_isEndStr, hls::stream <ap_uint <t_DataBits+t_IndexBits-t_LogParEntries-t_LogParGroups>> p_rowInterleaveStr [1<< t_LogParGroups], hls::stream <ap_uint <1>> p_isEndOutStr [1<< t_LogParGroups] )
rowInterleave function that interleave the row entries to parallel accumulators
Parameters:
t_LogParEntries | log2 of the number of entries in the input/output vector stream |
t_LogParGroups | log2 of the number of parallel accumulation paths |
t_DataType | the data type of the matrix and vector entries |
t_IndexType | the data type of the indicies |
t_DataBits | the number of bits for storing the data |
t_IndexBits | the number of bits for storing the indices |
p_rowEntryStr | the input row entry stream |
p_isEndStr | the input control stream |
p_rowInterleaveStr | the output interleaved row entry stream array |
p_isEndOutStr | the output control stream |
rowRegAcc¶
#include "xf_sparse/cscmv.hpp"
template < unsigned int t_LogParEntriess, typename t_DataType, typename t_IndexType = unsigned int, unsigned int t_DataBits = 32, unsigned int t_RowOffsetBits = 32 > void rowRegAcc ( hls::stream <ap_uint <t_DataBits+t_RowOffsetBits>>& p_rowEntryStr, hls::stream <ap_uint <1>>& p_isEndStr, hls::stream <ap_uint <t_DataBits+t_RowOffsetBits>>& p_rowRegAccStr, hls::stream <ap_uint <1>>& p_isEndOutStr )
rowRegAcc function that returns the accumulated results in the register
Parameters:
t_LogParEntries | log2 of the number of entries in the input/output vector stream |
t_DataType | the data type of the matrix and vector entries |
t_IndexType | the data type of the indicies |
t_DataBits | the number of bits for storing the data |
t_RowOffsetBits | the number of bits for storing the row offsets |
p_rowEntryStr | the input row entry stream |
p_isEndStr | the input control stream |
p_rowValStr | the output accumulated row entry stream |
p_isEndOutStr | the output control stream |
rowAcc¶
#include "xf_sparse/cscmv.hpp"
template < unsigned int t_MaxRowBlocks, unsigned int t_LogParEntries, unsigned int t_LogParGroups, typename t_DataType, typename t_IndexType = unsigned int, unsigned int t_DataBits = 32, unsigned int t_RowOffsetBits = 32 > void rowAcc ( const unsigned int p_rowBlocks, hls::stream <ap_uint <t_DataBits+t_RowOffsetBits>>& p_rowEntryStr, hls::stream <ap_uint <1>>& p_isEndStr, hls::stream <ap_uint <t_DataBits>>& p_rowValStr )
rowAcc function that returns the accumulated results
Parameters:
t_MaxRowBlocks | the maximum number of row entries buffered onchip per PE |
t_LogParEntries | log2 of the number of entries in the input/output vector stream |
t_LogParGroups | log2 of the number of parallel accumulation paths |
t_DataType | the data type of the matrix and vector entries |
t_IndexType | the data type of the indicies |
t_DataBits | the number of bits for storing the data |
t_IndexBits | the number of bits for storing the indices |
t_RowOffsetBits | the number of bits for storing the row offsets |
p_rowBlocks | the number of row vectors |
p_isEndStr | the input control stream |
p_nnzColValStr | the input col vector stream |
p_rowIndexStr | the input NNZ index vector stream |
p_rowValStr | the output row entry stream |
rowAgg¶
#include "xf_sparse/cscmv.hpp"
template < unsigned int t_ParEntries, unsigned int t_ParGroups, typename t_DataType, typename t_IndexType, unsigned int t_DataBits = 32 > void rowAgg ( const unsigned int p_rowBlocks, hls::stream <ap_uint <t_DataBits>> p_rowValStr [t_ParEntries][t_ParGroups], hls::stream <ap_uint <t_DataBits*t_ParEntries>>& p_rowAggStr )
rowAgg function that aggregates multiple row entry streams into one row vector stream
Parameters:
t_ParEntries | the number of entries in the input/output vector stream |
t_ParGroups | the number of parallel accumulation paths |
t_DataType | the data type of the matrix and vector entries |
t_IndexType | the data type of the indicies |
t_DataBits | the number of bits for storing the data |
p_rowBlocks | the number of row vectors |
p_rowValStr | the iutput row entry stream array |
p_rowAggStr | the output aggregated row vector stream |
cscRow¶
#include "xf_sparse/cscmv.hpp"
template < unsigned int t_MaxRowBlocks, unsigned int t_LogParEntries, unsigned int t_LogParGroups, typename t_DataType, typename t_IndexType = unsigned int, unsigned int t_DataBits = 32, unsigned int t_IndexBits = 32 > void cscRow ( const unsigned int p_nnzBlocks, const unsigned int p_rowBlocks )
cscRow function that returns the multiplication results of a sparse matrix and a dense vector
Parameters:
t_MaxRowBlocks | the maximum number of row entries buffered onchip per PE |
t_LogParEntries | log2 of the number of entries in the input/output vector stream |
t_LogParGroups | log2 of the number of parallel accumulation paths |
t_DataType | the data type of the matrix and vector entries |
t_IndexType | the data type of the indicies |
t_DataBits | the number of bits for storing the data |
t_IndexBits | the number of bits for storing the indices |
p_nnzBlocks | the number of NNZ vectors |
p_rowBlocks | the number of row vectors |
p_nnzValStr | the input NNZ value vector stream |
p_nnzColValStr | the input col vector stream |
p_rowIndexStr | the input NNZ index vector stream |
p_rowAggStr | the output row vector stream |
dispColVec¶
#include "xf_sparse/moverL1.hpp"
template < unsigned int t_MaxColParBlocks, unsigned int t_HbmChannels, unsigned int t_ParEntries, unsigned int t_DataBits > void dispColVec ( const unsigned int t_chId, hls::stream <ap_uint <32>>& p_paramStr, hls::stream <ap_uint <t_DataBits*t_ParEntries>>& p_datStr, hls::stream <ap_uint <32>>& p_paramFwdStr, hls::stream <ap_uint <t_DataBits*t_ParEntries>>& p_datFwdStr, hls::stream <ap_uint <32>>& p_paramOutStr, hls::stream <ap_uint <t_DataBits*t_ParEntries>>& p_datOutStr )
dispColVec function that forward and copy input column vector and parameter streams
Parameters:
t_MaxColParBlocks | the maximum number of parallel processed column vectors buffered in on-chip memory |
t_HbmChannels | number of HBM channels |
t_ParEntries | parallelly process entries |
t_DataBits | number of bits used to store each entry |
t_chId | constant HBM channel ID |
p_paramStr | 32-bit input parameter stream |
p_datStr | input vector stream |
p_paramOutStr | an forwarded output 32-bit parameter streams |
p_datOutStr | an forwarded column vector streams |
p_paramOutStr | an copied output 32-bit parameter streams |
p_datOutStr | an copied column vector streams |
dispCol¶
#include "xf_sparse/moverL1.hpp"
template < unsigned int t_MaxColParBlocks, unsigned int t_HbmChannels, unsigned int t_ParEntries, unsigned int t_DataBits > void dispCol ( hls::stream <ap_uint <32>>& p_paramStr, hls::stream <ap_uint <t_DataBits*t_ParEntries>>& p_datStr, hls::stream <ap_uint <32>> p_paramOutStr [t_HbmChannels], hls::stream <ap_uint <t_DataBits*t_ParEntries>> p_datOutStr [t_HbmChannels] )
dispCol function that dispatchs input column vectors accross parallel cscmv engines
Parameters:
t_MaxColParBlocks | the maximum number of parallel processed column vectors buffered in each cscmv engine |
t_HbmChannels | number of HBM channels |
t_ParEntries | parallelly process entries |
t_DataBits | number of bits used to store each entry |
p_paramStr | 32-bit input parameter stream |
p_datStr | input vector stream |
p_paramOutStr | an output array of 32-bit parameter streams |
p_datOutStr | an output array of column vector streams |
dispNnzCol¶
#include "xf_sparse/moverL1.hpp"
template < unsigned int t_MaxColParBlocks, unsigned int t_HbmChannels, unsigned int t_ParEntries, unsigned int t_DataBits > void dispNnzCol ( hls::stream <ap_uint <32>>& p_paramStr, hls::stream <ap_uint <t_DataBits*t_ParEntries>>& p_datStr, hls::stream <ap_uint <32>> p_paramOutStr [t_HbmChannels], hls::stream <ap_uint <t_DataBits*t_ParEntries>> p_datOutStr [t_HbmChannels] )
dispNnzCol function that dispatchs NNZ Col pointer vectors accross parallel cscmv engines
Parameters:
t_MaxColParBlocks | the maximum number of parallel processed column vectors buffered in each cscmv engine |
t_HbmChannels | number of HBM channels |
t_ParEntries | parallelly process entries |
t_DataBits | number of bits used to store each entry |
p_paramStr | 32-bit input parameter stream |
p_datStr | input vector stream |
p_paramOutStr | an output array of 32-bit parameter streams |
p_datOutStr | an output array of vector streams |