namespace xf::blas¶
Overview¶
namespace blas { // global functions template <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType> void amax(unsigned int p_n, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_x, t_IndexType& p_result); template <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType> void amin(unsigned int p_n, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_x, t_IndexType& p_result); template <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType = unsigned int> void asum(unsigned int p_n, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_x, t_DataType& p_sum); template <typename t_DataType, unsigned int t_ParEntries, typename t_IndexType = unsigned int> void axpy(unsigned int p_n, const t_DataType p_alpha, hls::stream<WideType<t_DataType, t_ParEntries>>& p_x, hls::stream<WideType<t_DataType, t_ParEntries>>& p_y, hls::stream<WideType<t_DataType, t_ParEntries>>& p_r); template <typename t_DataType, unsigned int t_ParEntries, typename t_IndexType = unsigned int> void copy(unsigned int p_n, hls::stream<WideType<t_DataType, t_ParEntries>>& p_x, hls::stream<WideType<t_DataType, t_ParEntries>>& p_y); template <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType = unsigned int> void dot(unsigned int p_n, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_x, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_y, t_DataType& p_res); template <typename t_DataType, unsigned int t_ParEntries, unsigned int t_MaxRows, typename t_IndexType = unsigned int, typename t_MacType = t_DataType> void gbmv(const unsigned int p_m, const unsigned int p_n, const unsigned int p_kl, const unsigned int p_ku, hls::stream<WideType<t_DataType, t_ParEntries>>& p_A, hls::stream<WideType<t_DataType, t_ParEntries>>& p_x, hls::stream<WideType<t_MacType, t_ParEntries>>& p_y); template <typename t_DataType, unsigned int t_ParEntries, unsigned int t_MaxRows, typename t_IndexType = unsigned int, typename t_MacType = t_DataType> void gbmv(const unsigned int p_m, const unsigned int p_n, const unsigned int p_kl, const unsigned int p_ku, const t_DataType p_alpha, hls::stream<WideType<t_DataType, t_ParEntries>>& p_M, hls::stream<WideType<t_DataType, t_ParEntries>>& p_x, const t_DataType p_beta, hls::stream<WideType<t_DataType, t_ParEntries>>& p_y, hls::stream<WideType<t_DataType, t_ParEntries>>& p_yr); template <typename t_DataType, unsigned int t_LogParEntries, unsigned int t_NumStreams = (1 << t_LogParEntries), typename t_IndexType = unsigned int> void gemv(const unsigned int p_m, const unsigned int p_n, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>> p_M [t_NumStreams], hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>> p_x [t_NumStreams], hls::stream<WideType<t_DataType, t_NumStreams>>& p_y); template <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType = unsigned int> void gemv(const unsigned int p_m, const unsigned int p_n, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_M, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_x, hls::stream<WideType<t_DataType, 1>>& p_y); template <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType = unsigned int> void gemv(const unsigned int p_m, const unsigned int p_n, const t_DataType p_alpha, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_M, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_x, const t_DataType p_beta, hls::stream<WideType<t_DataType, 1>>& p_y, hls::stream<WideType<t_DataType, 1>>& p_yr); template <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType = unsigned int> void nrm2(unsigned int p_n, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_x, t_DataType& p_res); template <typename t_DataType, unsigned int t_ParEntries, typename t_IndexType = unsigned int> void scal(unsigned int p_n, t_DataType p_alpha, hls::stream<WideType<t_DataType, t_ParEntries>>& p_x, hls::stream<WideType<t_DataType, t_ParEntries>>& p_res); template <typename t_DataType, unsigned int t_ParEntries, typename t_IndexType = unsigned int> void swap(unsigned int p_n, hls::stream<WideType<t_DataType, t_ParEntries>>& p_x, hls::stream<WideType<t_DataType, t_ParEntries>>& p_y, hls::stream<WideType<t_DataType, t_ParEntries>>& p_xRes, hls::stream<WideType<t_DataType, t_ParEntries>>& p_yRes); template <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType = unsigned int> void symv(const unsigned int p_n); template <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType = unsigned int> void symv(const unsigned int p_n, const t_DataType p_alpha, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_M, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_x, const t_DataType p_beta, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_y, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_yr); template <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType = unsigned int, typename t_MacType = t_DataType> void trmv(const bool uplo, const unsigned int p_n); template <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType = unsigned int> void trmv(const bool uplo, const unsigned int p_n, const t_DataType p_alpha, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_M, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_x, const t_DataType p_beta, hls::stream<WideType<t_DataType, 1>>& p_y, hls::stream<WideType<t_DataType, 1>>& p_yr); } // namespace blas
Detailed Documentation¶
Global Functions¶
amax¶
template <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType> void amax(unsigned int p_n, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_x, t_IndexType& p_result)
amax function that returns the position of the vector element that has the maximum magnitude.
Parameters:
t_DataType |
the data type of the vector entries |
t_LogParEntries |
log2 of the number of parallelly processed entries in the input vector |
t_IndexType |
the datatype of the index |
p_n |
the number of entries in the input vector p_x, p_n % l_ParEntries == 0 |
p_x |
the input stream of packed vector entries |
p_result |
the resulting index, which is 0 if p_n <= 0 |
amin¶
template <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType> void amin(unsigned int p_n, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_x, t_IndexType& p_result)
amin function that returns the position of the vector element that has the minimum magnitude.
Parameters:
t_DataType |
the data type of the vector entries |
t_LogParEntries |
log2 of the number of parallelly processed entries in the input vector |
t_IndexType |
the datatype of the index |
p_n |
the number of entries in the input vector p_x, p_n % l_ParEntries == 0 |
p_x |
the input stream of packed vector entries |
p_result |
the resulting index, which is 0 if p_n <= 0 |
asum¶
template <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType = unsigned int> void asum(unsigned int p_n, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_x, t_DataType& p_sum)
asum function that returns the sum of the magnitude of vector elements.
Parameters:
t_DataType |
the data type of the vector entries |
t_LogParEntries |
log2 of the number of parallelly processed entries in the input vector |
t_IndexType |
the datatype of the index |
p_n |
the number of entries in the input vector p_x, p_n % l_ParEntries == 0 |
p_x |
the input stream of packed vector entries |
p_sum |
the sum, which is 0 if p_n <= 0 |
axpy¶
template <typename t_DataType, unsigned int t_ParEntries, typename t_IndexType = unsigned int> void axpy(unsigned int p_n, const t_DataType p_alpha, hls::stream<WideType<t_DataType, t_ParEntries>>& p_x, hls::stream<WideType<t_DataType, t_ParEntries>>& p_y, hls::stream<WideType<t_DataType, t_ParEntries>>& p_r)
axpy function that compute Y = alpha*X + Y.
Parameters:
t_DataType |
the data type of the vector entries |
t_LogParEntries |
log2 of the number of parallelly processed entries in the input vector |
t_IndexType |
the datatype of the index |
p_n |
the number of entries in the input vector p_x, p_n % t_ParEntries == 0 |
p_x |
the input stream of packed entries of vector X |
p_y |
the input stream of packed entries of vector Y |
p_r |
the output stream of packed entries of result vector Y |
copy¶
template <typename t_DataType, unsigned int t_ParEntries, typename t_IndexType = unsigned int> void copy(unsigned int p_n, hls::stream<WideType<t_DataType, t_ParEntries>>& p_x, hls::stream<WideType<t_DataType, t_ParEntries>>& p_y)
copy function that compute Y = X
Parameters:
t_DataType |
the data type of the vector entries |
t_ParEntries |
number of parallelly processed entries in the packed input vector stream |
t_IndexType |
the datatype of the index |
p_n |
the number of entries in vector X and Y |
p_x |
the packed input vector stream |
p_y |
the packed output vector stream |
dot¶
template <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType = unsigned int> void dot(unsigned int p_n, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_x, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_y, t_DataType& p_res)
dot function that returns the dot product of vector x and y.
Parameters:
t_DataType |
the data type of the vector entries |
t_LogParEntries |
log2 of the number of parallelly processed entries in the input vector |
t_IndexType |
the datatype of the index |
p_n |
the number of entries in the input vector p_x, p_n % l_ParEntries == 0 |
p_x |
the input stream of packed vector entries |
p_res |
the dot product of x and y |
gbmv¶
template <typename t_DataType, unsigned int t_ParEntries, unsigned int t_MaxRows, typename t_IndexType = unsigned int, typename t_MacType = t_DataType> void gbmv(const unsigned int p_m, const unsigned int p_n, const unsigned int p_kl, const unsigned int p_ku, const t_DataType p_alpha, hls::stream<WideType<t_DataType, t_ParEntries>>& p_M, hls::stream<WideType<t_DataType, t_ParEntries>>& p_x, const t_DataType p_beta, hls::stream<WideType<t_DataType, t_ParEntries>>& p_y, hls::stream<WideType<t_DataType, t_ParEntries>>& p_yr)
gbmv function performs general banded matrix-vector multiplication matrix and a vector y = alpha * M * x + beta * y
Parameters:
t_DataType |
the data type of the vector entries |
t_ParEntries |
the number of parallelly processed entries in the input vector |
t_MaxRows |
the maximum size of buffers for output vector |
t_IndexType |
the datatype of the index |
t_MacType |
the datatype of the output stream |
p_m |
the number of rows of input matrix p_M |
p_alpha |
scalar alpha |
p_M |
the input stream of packed Matrix entries |
p_x |
the input stream of packed vector entries |
p_beta |
scalar beta |
p_y |
the output vector |
gemv¶
template <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType = unsigned int> void gemv(const unsigned int p_m, const unsigned int p_n, const t_DataType p_alpha, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_M, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_x, const t_DataType p_beta, hls::stream<WideType<t_DataType, 1>>& p_y, hls::stream<WideType<t_DataType, 1>>& p_yr)
gemv function that returns the result vector of the multiplication of a matrix and a vector y = alpha * M * x + beta * y
Parameters:
t_DataType |
the data type of the vector entries |
t_LogParEntries |
log2 of the number of parallelly processed entries in the input vector |
t_IndexType |
the datatype of the index |
p_m |
the number of rows of input matrix p_M |
p_n |
the number of cols of input matrix p_M, as well as the number of entries in the input vector p_x, p_n % l_ParEntries == 0 |
p_alpha |
scalar alpha |
p_M |
the input stream of packed Matrix entries |
p_x |
the input stream of packed vector entries |
p_beta |
scalar beta |
p_y |
the output vector |
nrm2¶
template <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType = unsigned int> void nrm2(unsigned int p_n, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_x, t_DataType& p_res)
nrm2 function that returns the Euclidean norm of the vector x.
Parameters:
t_DataType |
the data type of the vector entries |
t_LogParEntries |
log2 of the number of parallelly processed entries in the input vector |
t_IndexType |
the datatype of the index |
p_n |
the number of entries in the input vector p_x, p_n % (1<<l_LogParEntries) == 0 |
p_x |
the input stream of packed vector entries |
p_res |
the nrm2 of x |
scal¶
template <typename t_DataType, unsigned int t_ParEntries, typename t_IndexType = unsigned int> void scal(unsigned int p_n, t_DataType p_alpha, hls::stream<WideType<t_DataType, t_ParEntries>>& p_x, hls::stream<WideType<t_DataType, t_ParEntries>>& p_res)
scal function that compute X = alpha * X
Parameters:
t_DataType |
the data type of the vector entries |
t_ParEntries |
number of parallelly processed entries in the packed input vector stream |
t_IndexType |
the datatype of the index |
p_n |
the number of entries in vector X, p_n % t_ParEntries == 0 |
p_x |
the packed input vector stream |
p_res |
the packed output vector stream |
swap¶
template <typename t_DataType, unsigned int t_ParEntries, typename t_IndexType = unsigned int> void swap(unsigned int p_n, hls::stream<WideType<t_DataType, t_ParEntries>>& p_x, hls::stream<WideType<t_DataType, t_ParEntries>>& p_y, hls::stream<WideType<t_DataType, t_ParEntries>>& p_xRes, hls::stream<WideType<t_DataType, t_ParEntries>>& p_yRes)
swap function that swap vector x and y
Parameters:
t_DataType |
the data type of the vector entries |
t_ParEntries |
number of parallelly processed entries in the packed input vector stream |
t_IndexType |
the datatype of the index |
p_n |
the number of entries in vector X and Y, p_n % t_ParEntries == 0 |
p_x |
the packed input vector stream |
p_y |
the packed input vector stream |
p_xRes |
the packed output stream |
p_yRes |
the packed output stream |
symv¶
template <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType = unsigned int> void symv(const unsigned int p_n, const t_DataType p_alpha, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_M, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_x, const t_DataType p_beta, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_y, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_yr)
symv function that returns the result vector of the multiplication of a symmetric matrix and a vector y = alpha * M * x + beta * y
Parameters:
t_DataType |
the data type of the vector entries |
t_LogParEntries |
log2 of the number of parallelly processed entries in the input vector |
t_IndexType |
the datatype of the index |
p_n |
the dimention of input matrix p_M, as well as the number of entries in the input vector p_x, p_n % l_ParEntries == 0 |
p_alpha |
|
scalar |
alpha |
p_M |
the input stream of packed Matrix entries |
p_x |
the input stream of packed vector entries |
p_beta |
|
scalar |
beta |
p_y |
the output vector |
trmv¶
template <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType = unsigned int> void trmv(const bool uplo, const unsigned int p_n, const t_DataType p_alpha, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_M, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_x, const t_DataType p_beta, hls::stream<WideType<t_DataType, 1>>& p_y, hls::stream<WideType<t_DataType, 1>>& p_yr)
trmv function that returns the result vector of the multiplication of a triangular matrix and a vector y = alpha * M * x + beta * y
Parameters:
t_DataType |
the data type of the vector entries |
t_LogParEntries |
log2 of the number of parallelly processed entries in the input vector |
t_IndexType |
the datatype of the index |
p_n |
the number of cols of input matrix p_M, as well as the number of entries in the input vector p_x, p_n % l_ParEntries == 0 |
p_alpha |
|
scalar |
alpha |
p_M |
the input stream of packed Matrix entries |
p_x |
the input stream of packed vector entries |
p_beta |
|
scalar |
beta |
p_y |
the output vector |