namespace xf::blas¶
Overview¶
namespace blas { // global functions template <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType> void amax(unsigned int p_n, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_x, t_IndexType& p_result); template <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType> void amin(unsigned int p_n, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_x, t_IndexType& p_result); template <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType = unsigned int> void asum(unsigned int p_n, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_x, t_DataType& p_sum); template <typename t_DataType, unsigned int t_ParEntries, typename t_IndexType = unsigned int> void axpy(unsigned int p_n, const t_DataType p_alpha, hls::stream<WideType<t_DataType, t_ParEntries>>& p_x, hls::stream<WideType<t_DataType, t_ParEntries>>& p_y, hls::stream<WideType<t_DataType, t_ParEntries>>& p_r); template <typename t_DataType, unsigned int t_ParEntries, typename t_IndexType = unsigned int> void copy(unsigned int p_n, hls::stream<WideType<t_DataType, t_ParEntries>>& p_x, hls::stream<WideType<t_DataType, t_ParEntries>>& p_y); template <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType = unsigned int> void dot(unsigned int p_n, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_x, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_y, t_DataType& p_res); template <typename t_DataType, unsigned int t_ParEntries, unsigned int t_MaxRows, typename t_IndexType = unsigned int, typename t_MacType = t_DataType> void gbmv(const unsigned int p_m, const unsigned int p_n, const unsigned int p_kl, const unsigned int p_ku, hls::stream<WideType<t_DataType, t_ParEntries>>& p_A, hls::stream<WideType<t_DataType, t_ParEntries>>& p_x, hls::stream<WideType<t_MacType, t_ParEntries>>& p_y); template <typename t_DataType, unsigned int t_ParEntries, unsigned int t_MaxRows, typename t_IndexType = unsigned int, typename t_MacType = t_DataType> void gbmv(const unsigned int p_m, const unsigned int p_n, const unsigned int p_kl, const unsigned int p_ku, const t_DataType p_alpha, hls::stream<WideType<t_DataType, t_ParEntries>>& p_M, hls::stream<WideType<t_DataType, t_ParEntries>>& p_x, const t_DataType p_beta, hls::stream<WideType<t_DataType, t_ParEntries>>& p_y, hls::stream<WideType<t_DataType, t_ParEntries>>& p_yr); template <typename t_DataType, unsigned int t_LogParEntries, unsigned int t_NumStreams = (1 << t_LogParEntries), typename t_IndexType = unsigned int> void gemv(const unsigned int p_m, const unsigned int p_n, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>> p_M [t_NumStreams], hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>> p_x [t_NumStreams], hls::stream<WideType<t_DataType, t_NumStreams>>& p_y); template <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType = unsigned int> void gemv(const unsigned int p_m, const unsigned int p_n, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_M, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_x, hls::stream<WideType<t_DataType, 1>>& p_y); template <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType = unsigned int> void gemv(const unsigned int p_m, const unsigned int p_n, const t_DataType p_alpha, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_M, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_x, const t_DataType p_beta, hls::stream<WideType<t_DataType, 1>>& p_y, hls::stream<WideType<t_DataType, 1>>& p_yr); template <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType = unsigned int> void nrm2(unsigned int p_n, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_x, t_DataType& p_res); template <typename t_DataType, unsigned int t_ParEntries, typename t_IndexType = unsigned int> void scal(unsigned int p_n, t_DataType p_alpha, hls::stream<WideType<t_DataType, t_ParEntries>>& p_x, hls::stream<WideType<t_DataType, t_ParEntries>>& p_res); template <typename t_DataType, unsigned int t_ParEntries, typename t_IndexType = unsigned int> void swap(unsigned int p_n, hls::stream<WideType<t_DataType, t_ParEntries>>& p_x, hls::stream<WideType<t_DataType, t_ParEntries>>& p_y, hls::stream<WideType<t_DataType, t_ParEntries>>& p_xRes, hls::stream<WideType<t_DataType, t_ParEntries>>& p_yRes); template <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType = unsigned int> void symv(const unsigned int p_n); template <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType = unsigned int> void symv(const unsigned int p_n, const t_DataType p_alpha, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_M, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_x, const t_DataType p_beta, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_y, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_yr); template <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType = unsigned int, typename t_MacType = t_DataType> void trmv(const bool uplo, const unsigned int p_n); template <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType = unsigned int> void trmv(const bool uplo, const unsigned int p_n, const t_DataType p_alpha, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_M, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_x, const t_DataType p_beta, hls::stream<WideType<t_DataType, 1>>& p_y, hls::stream<WideType<t_DataType, 1>>& p_yr); } // namespace blas
Detailed Documentation¶
Global Functions¶
amax¶
template <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType> void amax(unsigned int p_n, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_x, t_IndexType& p_result)
amax function that returns the position of the vector element that has the maximum magnitude.
Parameters:
t_DataType | the data type of the vector entries |
t_LogParEntries | log2 of the number of parallelly processed entries in the input vector |
t_IndexType | the datatype of the index |
p_n | the number of entries in the input vector p_x, p_n % l_ParEntries == 0 |
p_x | the input stream of packed vector entries |
p_result | the resulting index, which is 0 if p_n <= 0 |
amin¶
template <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType> void amin(unsigned int p_n, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_x, t_IndexType& p_result)
amin function that returns the position of the vector element that has the minimum magnitude.
Parameters:
t_DataType | the data type of the vector entries |
t_LogParEntries | log2 of the number of parallelly processed entries in the input vector |
t_IndexType | the datatype of the index |
p_n | the number of entries in the input vector p_x, p_n % l_ParEntries == 0 |
p_x | the input stream of packed vector entries |
p_result | the resulting index, which is 0 if p_n <= 0 |
asum¶
template <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType = unsigned int> void asum(unsigned int p_n, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_x, t_DataType& p_sum)
asum function that returns the sum of the magnitude of vector elements.
Parameters:
t_DataType | the data type of the vector entries |
t_LogParEntries | log2 of the number of parallelly processed entries in the input vector |
t_IndexType | the datatype of the index |
p_n | the number of entries in the input vector p_x, p_n % l_ParEntries == 0 |
p_x | the input stream of packed vector entries |
p_sum | the sum, which is 0 if p_n <= 0 |
axpy¶
template <typename t_DataType, unsigned int t_ParEntries, typename t_IndexType = unsigned int> void axpy(unsigned int p_n, const t_DataType p_alpha, hls::stream<WideType<t_DataType, t_ParEntries>>& p_x, hls::stream<WideType<t_DataType, t_ParEntries>>& p_y, hls::stream<WideType<t_DataType, t_ParEntries>>& p_r)
axpy function that compute Y = alpha*X + Y.
Parameters:
t_DataType | the data type of the vector entries |
t_LogParEntries | log2 of the number of parallelly processed entries in the input vector |
t_IndexType | the datatype of the index |
p_n | the number of entries in the input vector p_x, p_n % t_ParEntries == 0 |
p_x | the input stream of packed entries of vector X |
p_y | the input stream of packed entries of vector Y |
p_r | the output stream of packed entries of result vector Y |
copy¶
template <typename t_DataType, unsigned int t_ParEntries, typename t_IndexType = unsigned int> void copy(unsigned int p_n, hls::stream<WideType<t_DataType, t_ParEntries>>& p_x, hls::stream<WideType<t_DataType, t_ParEntries>>& p_y)
copy function that compute Y = X
Parameters:
t_DataType | the data type of the vector entries |
t_ParEntries | number of parallelly processed entries in the packed input vector stream |
t_IndexType | the datatype of the index |
p_n | the number of entries in vector X and Y |
p_x | the packed input vector stream |
p_y | the packed output vector stream |
dot¶
template <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType = unsigned int> void dot(unsigned int p_n, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_x, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_y, t_DataType& p_res)
dot function that returns the dot product of vector x and y.
Parameters:
t_DataType | the data type of the vector entries |
t_LogParEntries | log2 of the number of parallelly processed entries in the input vector |
t_IndexType | the datatype of the index |
p_n | the number of entries in the input vector p_x, p_n % l_ParEntries == 0 |
p_x | the input stream of packed vector entries |
p_res | the dot product of x and y |
gbmv¶
template <typename t_DataType, unsigned int t_ParEntries, unsigned int t_MaxRows, typename t_IndexType = unsigned int, typename t_MacType = t_DataType> void gbmv(const unsigned int p_m, const unsigned int p_n, const unsigned int p_kl, const unsigned int p_ku, const t_DataType p_alpha, hls::stream<WideType<t_DataType, t_ParEntries>>& p_M, hls::stream<WideType<t_DataType, t_ParEntries>>& p_x, const t_DataType p_beta, hls::stream<WideType<t_DataType, t_ParEntries>>& p_y, hls::stream<WideType<t_DataType, t_ParEntries>>& p_yr)
gbmv function performs general banded matrix-vector multiplication matrix and a vector y = alpha * M * x + beta * y
Parameters:
t_DataType | the data type of the vector entries |
t_ParEntries | the number of parallelly processed entries in the input vector |
t_MaxRows | the maximum size of buffers for output vector |
t_IndexType | the datatype of the index |
t_MacType | the datatype of the output stream |
p_m | the number of rows of input matrix p_M |
p_alpha | scalar alpha |
p_M | the input stream of packed Matrix entries |
p_x | the input stream of packed vector entries |
p_beta | scalar beta |
p_y | the output vector |
gemv¶
template <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType = unsigned int> void gemv(const unsigned int p_m, const unsigned int p_n, const t_DataType p_alpha, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_M, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_x, const t_DataType p_beta, hls::stream<WideType<t_DataType, 1>>& p_y, hls::stream<WideType<t_DataType, 1>>& p_yr)
gemv function that returns the result vector of the multiplication of a matrix and a vector y = alpha * M * x + beta * y
Parameters:
t_DataType | the data type of the vector entries |
t_LogParEntries | log2 of the number of parallelly processed entries in the input vector |
t_IndexType | the datatype of the index |
p_m | the number of rows of input matrix p_M |
p_n | the number of cols of input matrix p_M, as well as the number of entries in the input vector p_x, p_n % l_ParEntries == 0 |
p_alpha | scalar alpha |
p_M | the input stream of packed Matrix entries |
p_x | the input stream of packed vector entries |
p_beta | scalar beta |
p_y | the output vector |
nrm2¶
template <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType = unsigned int> void nrm2(unsigned int p_n, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_x, t_DataType& p_res)
nrm2 function that returns the Euclidean norm of the vector x.
Parameters:
t_DataType | the data type of the vector entries |
t_LogParEntries | log2 of the number of parallelly processed entries in the input vector |
t_IndexType | the datatype of the index |
p_n | the number of entries in the input vector p_x, p_n % (1<<l_LogParEntries) == 0 |
p_x | the input stream of packed vector entries |
p_res | the nrm2 of x |
scal¶
template <typename t_DataType, unsigned int t_ParEntries, typename t_IndexType = unsigned int> void scal(unsigned int p_n, t_DataType p_alpha, hls::stream<WideType<t_DataType, t_ParEntries>>& p_x, hls::stream<WideType<t_DataType, t_ParEntries>>& p_res)
scal function that compute X = alpha * X
Parameters:
t_DataType | the data type of the vector entries |
t_ParEntries | number of parallelly processed entries in the packed input vector stream |
t_IndexType | the datatype of the index |
p_n | the number of entries in vector X, p_n % t_ParEntries == 0 |
p_x | the packed input vector stream |
p_res | the packed output vector stream |
swap¶
template <typename t_DataType, unsigned int t_ParEntries, typename t_IndexType = unsigned int> void swap(unsigned int p_n, hls::stream<WideType<t_DataType, t_ParEntries>>& p_x, hls::stream<WideType<t_DataType, t_ParEntries>>& p_y, hls::stream<WideType<t_DataType, t_ParEntries>>& p_xRes, hls::stream<WideType<t_DataType, t_ParEntries>>& p_yRes)
swap function that swap vector x and y
Parameters:
t_DataType | the data type of the vector entries |
t_ParEntries | number of parallelly processed entries in the packed input vector stream |
t_IndexType | the datatype of the index |
p_n | the number of entries in vector X and Y, p_n % t_ParEntries == 0 |
p_x | the packed input vector stream |
p_y | the packed input vector stream |
p_xRes | the packed output stream |
p_yRes | the packed output stream |
symv¶
template <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType = unsigned int> void symv(const unsigned int p_n, const t_DataType p_alpha, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_M, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_x, const t_DataType p_beta, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_y, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_yr)
symv function that returns the result vector of the multiplication of a symmetric matrix and a vector y = alpha * M * x + beta * y
Parameters:
t_DataType | the data type of the vector entries |
t_LogParEntries | log2 of the number of parallelly processed entries in the input vector |
t_IndexType | the datatype of the index |
p_n | the dimention of input matrix p_M, as well as the number of entries in the input vector p_x, p_n % l_ParEntries == 0 |
p_alpha | |
scalar | alpha |
p_M | the input stream of packed Matrix entries |
p_x | the input stream of packed vector entries |
p_beta | |
scalar | beta |
p_y | the output vector |
trmv¶
template <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType = unsigned int> void trmv(const bool uplo, const unsigned int p_n, const t_DataType p_alpha, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_M, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_x, const t_DataType p_beta, hls::stream<WideType<t_DataType, 1>>& p_y, hls::stream<WideType<t_DataType, 1>>& p_yr)
trmv function that returns the result vector of the multiplication of a triangular matrix and a vector y = alpha * M * x + beta * y
Parameters:
t_DataType | the data type of the vector entries |
t_LogParEntries | log2 of the number of parallelly processed entries in the input vector |
t_IndexType | the datatype of the index |
p_n | the number of cols of input matrix p_M, as well as the number of entries in the input vector p_x, p_n % l_ParEntries == 0 |
p_alpha | |
scalar | alpha |
p_M | the input stream of packed Matrix entries |
p_x | the input stream of packed vector entries |
p_beta | |
scalar | beta |
p_y | the output vector |