namespace xf::blas¶

Overview¶

namespace blas {

// global functions

template  <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType>
void amax(unsigned int p_n, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_x, t_IndexType& p_result);

template  <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType>
void amin(unsigned int p_n, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_x, t_IndexType& p_result);

template  <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType = unsigned int>
void asum(unsigned int p_n, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_x, t_DataType& p_sum);

template  <typename t_DataType, unsigned int t_ParEntries, typename t_IndexType = unsigned int>
void axpy(unsigned int p_n, const t_DataType p_alpha, hls::stream<WideType<t_DataType, t_ParEntries>>& p_x, hls::stream<WideType<t_DataType, t_ParEntries>>& p_y, hls::stream<WideType<t_DataType, t_ParEntries>>& p_r);

template  <typename t_DataType, unsigned int t_ParEntries, typename t_IndexType = unsigned int>
void copy(unsigned int p_n, hls::stream<WideType<t_DataType, t_ParEntries>>& p_x, hls::stream<WideType<t_DataType, t_ParEntries>>& p_y);

template  <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType = unsigned int>
void dot(unsigned int p_n, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_x, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_y, t_DataType& p_res);

template  <typename t_DataType, unsigned int t_ParEntries, unsigned int t_MaxRows, typename t_IndexType = unsigned int, typename t_MacType = t_DataType>
void gbmv(const unsigned int p_m, const unsigned int p_n, const unsigned int p_kl, const unsigned int p_ku, hls::stream<WideType<t_DataType, t_ParEntries>>& p_A, hls::stream<WideType<t_DataType, t_ParEntries>>& p_x, hls::stream<WideType<t_MacType, t_ParEntries>>& p_y);

template  <typename t_DataType, unsigned int t_ParEntries, unsigned int t_MaxRows, typename t_IndexType = unsigned int, typename t_MacType = t_DataType>
void gbmv(const unsigned int p_m, const unsigned int p_n, const unsigned int p_kl, const unsigned int p_ku, const t_DataType p_alpha, hls::stream<WideType<t_DataType, t_ParEntries>>& p_M, hls::stream<WideType<t_DataType, t_ParEntries>>& p_x, const t_DataType p_beta, hls::stream<WideType<t_DataType, t_ParEntries>>& p_y, hls::stream<WideType<t_DataType, t_ParEntries>>& p_yr);

template  <typename t_DataType, unsigned int t_LogParEntries, unsigned int t_NumStreams = (1 << t_LogParEntries), typename t_IndexType = unsigned int>
void gemv(const unsigned int p_m, const unsigned int p_n, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>> p_M [t_NumStreams], hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>> p_x [t_NumStreams], hls::stream<WideType<t_DataType, t_NumStreams>>& p_y);

template  <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType = unsigned int>
void gemv(const unsigned int p_m, const unsigned int p_n, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_M, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_x, hls::stream<WideType<t_DataType, 1>>& p_y);

template  <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType = unsigned int>
void gemv(const unsigned int p_m, const unsigned int p_n, const t_DataType p_alpha, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_M, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_x, const t_DataType p_beta, hls::stream<WideType<t_DataType, 1>>& p_y, hls::stream<WideType<t_DataType, 1>>& p_yr);

template  <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType = unsigned int>
void nrm2(unsigned int p_n, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_x, t_DataType& p_res);

template  <typename t_DataType, unsigned int t_ParEntries, typename t_IndexType = unsigned int>
void scal(unsigned int p_n, t_DataType p_alpha, hls::stream<WideType<t_DataType, t_ParEntries>>& p_x, hls::stream<WideType<t_DataType, t_ParEntries>>& p_res);

template  <typename t_DataType, unsigned int t_ParEntries, typename t_IndexType = unsigned int>
void swap(unsigned int p_n, hls::stream<WideType<t_DataType, t_ParEntries>>& p_x, hls::stream<WideType<t_DataType, t_ParEntries>>& p_y, hls::stream<WideType<t_DataType, t_ParEntries>>& p_xRes, hls::stream<WideType<t_DataType, t_ParEntries>>& p_yRes);

template  <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType = unsigned int>
void symv(const unsigned int p_n);

template  <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType = unsigned int>
void symv(const unsigned int p_n, const t_DataType p_alpha, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_M, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_x, const t_DataType p_beta, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_y, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_yr);

template  <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType = unsigned int, typename t_MacType = t_DataType>
void trmv(const bool uplo, const unsigned int p_n);

template  <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType = unsigned int>
void trmv(const bool uplo, const unsigned int p_n, const t_DataType p_alpha, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_M, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_x, const t_DataType p_beta, hls::stream<WideType<t_DataType, 1>>& p_y, hls::stream<WideType<t_DataType, 1>>& p_yr);

} // namespace blas

Detailed Documentation¶

Global Functions¶

amax¶

template  <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType>
void amax(unsigned int p_n, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_x, t_IndexType& p_result)

amax function that returns the position of the vector element that has the maximum magnitude.

Parameters:

t_DataType	the data type of the vector entries
t_LogParEntries	log2 of the number of parallelly processed entries in the input vector
t_IndexType	the datatype of the index
p_n	the number of entries in the input vector p_x, p_n % l_ParEntries == 0
p_x	the input stream of packed vector entries
p_result	the resulting index, which is 0 if p_n <= 0

amin¶

template  <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType>
void amin(unsigned int p_n, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_x, t_IndexType& p_result)

amin function that returns the position of the vector element that has the minimum magnitude.

Parameters:

t_DataType	the data type of the vector entries
t_LogParEntries	log2 of the number of parallelly processed entries in the input vector
t_IndexType	the datatype of the index
p_n	the number of entries in the input vector p_x, p_n % l_ParEntries == 0
p_x	the input stream of packed vector entries
p_result	the resulting index, which is 0 if p_n <= 0

asum¶

template  <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType = unsigned int>
void asum(unsigned int p_n, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_x, t_DataType& p_sum)

asum function that returns the sum of the magnitude of vector elements.

Parameters:

t_DataType	the data type of the vector entries
t_LogParEntries	log2 of the number of parallelly processed entries in the input vector
t_IndexType	the datatype of the index
p_n	the number of entries in the input vector p_x, p_n % l_ParEntries == 0
p_x	the input stream of packed vector entries
p_sum	the sum, which is 0 if p_n <= 0

axpy¶

template  <typename t_DataType, unsigned int t_ParEntries, typename t_IndexType = unsigned int>
void axpy(unsigned int p_n, const t_DataType p_alpha, hls::stream<WideType<t_DataType, t_ParEntries>>& p_x, hls::stream<WideType<t_DataType, t_ParEntries>>& p_y, hls::stream<WideType<t_DataType, t_ParEntries>>& p_r)

axpy function that compute Y = alpha*X + Y.

Parameters:

t_DataType	the data type of the vector entries
t_LogParEntries	log2 of the number of parallelly processed entries in the input vector
t_IndexType	the datatype of the index
p_n	the number of entries in the input vector p_x, p_n % t_ParEntries == 0
p_x	the input stream of packed entries of vector X
p_y	the input stream of packed entries of vector Y
p_r	the output stream of packed entries of result vector Y

copy¶

template  <typename t_DataType, unsigned int t_ParEntries, typename t_IndexType = unsigned int>
void copy(unsigned int p_n, hls::stream<WideType<t_DataType, t_ParEntries>>& p_x, hls::stream<WideType<t_DataType, t_ParEntries>>& p_y)

copy function that compute Y = X

Parameters:

t_DataType	the data type of the vector entries
t_ParEntries	number of parallelly processed entries in the packed input vector stream
t_IndexType	the datatype of the index
p_n	the number of entries in vector X and Y
p_x	the packed input vector stream
p_y	the packed output vector stream

dot¶

template  <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType = unsigned int>
void dot(unsigned int p_n, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_x, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_y, t_DataType& p_res)

dot function that returns the dot product of vector x and y.

Parameters:

t_DataType	the data type of the vector entries
t_LogParEntries	log2 of the number of parallelly processed entries in the input vector
t_IndexType	the datatype of the index
p_n	the number of entries in the input vector p_x, p_n % l_ParEntries == 0
p_x	the input stream of packed vector entries
p_res	the dot product of x and y

gbmv¶

template  <typename t_DataType, unsigned int t_ParEntries, unsigned int t_MaxRows, typename t_IndexType = unsigned int, typename t_MacType = t_DataType>
void gbmv(const unsigned int p_m, const unsigned int p_n, const unsigned int p_kl, const unsigned int p_ku, const t_DataType p_alpha, hls::stream<WideType<t_DataType, t_ParEntries>>& p_M, hls::stream<WideType<t_DataType, t_ParEntries>>& p_x, const t_DataType p_beta, hls::stream<WideType<t_DataType, t_ParEntries>>& p_y, hls::stream<WideType<t_DataType, t_ParEntries>>& p_yr)

gbmv function performs general banded matrix-vector multiplication matrix and a vector y = alpha * M * x + beta * y

Parameters:

t_DataType	the data type of the vector entries
t_ParEntries	the number of parallelly processed entries in the input vector
t_MaxRows	the maximum size of buffers for output vector
t_IndexType	the datatype of the index
t_MacType	the datatype of the output stream
p_m	the number of rows of input matrix p_M
p_alpha	scalar alpha
p_M	the input stream of packed Matrix entries
p_x	the input stream of packed vector entries
p_beta	scalar beta
p_y	the output vector

gemv¶

template  <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType = unsigned int>
void gemv(const unsigned int p_m, const unsigned int p_n, const t_DataType p_alpha, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_M, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_x, const t_DataType p_beta, hls::stream<WideType<t_DataType, 1>>& p_y, hls::stream<WideType<t_DataType, 1>>& p_yr)

gemv function that returns the result vector of the multiplication of a matrix and a vector y = alpha * M * x + beta * y

Parameters:

t_DataType	the data type of the vector entries
t_LogParEntries	log2 of the number of parallelly processed entries in the input vector
t_IndexType	the datatype of the index
p_m	the number of rows of input matrix p_M
p_n	the number of cols of input matrix p_M, as well as the number of entries in the input vector p_x, p_n % l_ParEntries == 0
p_alpha	scalar alpha
p_M	the input stream of packed Matrix entries
p_x	the input stream of packed vector entries
p_beta	scalar beta
p_y	the output vector

nrm2¶

template  <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType = unsigned int>
void nrm2(unsigned int p_n, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_x, t_DataType& p_res)

nrm2 function that returns the Euclidean norm of the vector x.

Parameters:

t_DataType	the data type of the vector entries
t_LogParEntries	log2 of the number of parallelly processed entries in the input vector
t_IndexType	the datatype of the index
p_n	the number of entries in the input vector p_x, p_n % (1<<l_LogParEntries) == 0
p_x	the input stream of packed vector entries
p_res	the nrm2 of x

scal¶

template  <typename t_DataType, unsigned int t_ParEntries, typename t_IndexType = unsigned int>
void scal(unsigned int p_n, t_DataType p_alpha, hls::stream<WideType<t_DataType, t_ParEntries>>& p_x, hls::stream<WideType<t_DataType, t_ParEntries>>& p_res)

scal function that compute X = alpha * X

Parameters:

t_DataType	the data type of the vector entries
t_ParEntries	number of parallelly processed entries in the packed input vector stream
t_IndexType	the datatype of the index
p_n	the number of entries in vector X, p_n % t_ParEntries == 0
p_x	the packed input vector stream
p_res	the packed output vector stream

swap¶

template  <typename t_DataType, unsigned int t_ParEntries, typename t_IndexType = unsigned int>
void swap(unsigned int p_n, hls::stream<WideType<t_DataType, t_ParEntries>>& p_x, hls::stream<WideType<t_DataType, t_ParEntries>>& p_y, hls::stream<WideType<t_DataType, t_ParEntries>>& p_xRes, hls::stream<WideType<t_DataType, t_ParEntries>>& p_yRes)

swap function that swap vector x and y

Parameters:

t_DataType	the data type of the vector entries
t_ParEntries	number of parallelly processed entries in the packed input vector stream
t_IndexType	the datatype of the index
p_n	the number of entries in vector X and Y, p_n % t_ParEntries == 0
p_x	the packed input vector stream
p_y	the packed input vector stream
p_xRes	the packed output stream
p_yRes	the packed output stream

symv¶

template  <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType = unsigned int>
void symv(const unsigned int p_n, const t_DataType p_alpha, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_M, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_x, const t_DataType p_beta, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_y, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_yr)

symv function that returns the result vector of the multiplication of a symmetric matrix and a vector y = alpha * M * x + beta * y

Parameters:

t_DataType	the data type of the vector entries
t_LogParEntries	log2 of the number of parallelly processed entries in the input vector
t_IndexType	the datatype of the index
p_n	the dimention of input matrix p_M, as well as the number of entries in the input vector p_x, p_n % l_ParEntries == 0
p_alpha
scalar	alpha
p_M	the input stream of packed Matrix entries
p_x	the input stream of packed vector entries
p_beta
scalar	beta
p_y	the output vector

trmv¶

template  <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType = unsigned int>
void trmv(const bool uplo, const unsigned int p_n, const t_DataType p_alpha, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_M, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_x, const t_DataType p_beta, hls::stream<WideType<t_DataType, 1>>& p_y, hls::stream<WideType<t_DataType, 1>>& p_yr)

trmv function that returns the result vector of the multiplication of a triangular matrix and a vector y = alpha * M * x + beta * y

Parameters:

t_DataType	the data type of the vector entries
t_LogParEntries	log2 of the number of parallelly processed entries in the input vector
t_IndexType	the datatype of the index
p_n	the number of cols of input matrix p_M, as well as the number of entries in the input vector p_x, p_n % l_ParEntries == 0
p_alpha
scalar	alpha
p_M	the input stream of packed Matrix entries
p_x	the input stream of packed vector entries
p_beta
scalar	beta
p_y	the output vector