namespace xf::blas

Overview

namespace blas {

// global functions

template  <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType>
void amax(unsigned int p_n, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_x, t_IndexType& p_result);

template  <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType>
void amin(unsigned int p_n, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_x, t_IndexType& p_result);

template  <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType = unsigned int>
void asum(unsigned int p_n, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_x, t_DataType& p_sum);

template  <typename t_DataType, unsigned int t_ParEntries, typename t_IndexType = unsigned int>
void axpy(unsigned int p_n, const t_DataType p_alpha, hls::stream<WideType<t_DataType, t_ParEntries>>& p_x, hls::stream<WideType<t_DataType, t_ParEntries>>& p_y, hls::stream<WideType<t_DataType, t_ParEntries>>& p_r);

template  <typename t_DataType, unsigned int t_ParEntries, typename t_IndexType = unsigned int>
void copy(unsigned int p_n, hls::stream<WideType<t_DataType, t_ParEntries>>& p_x, hls::stream<WideType<t_DataType, t_ParEntries>>& p_y);

template  <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType = unsigned int>
void dot(unsigned int p_n, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_x, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_y, t_DataType& p_res);

template  <typename t_DataType, unsigned int t_ParEntries, unsigned int t_MaxRows, typename t_IndexType = unsigned int, typename t_MacType = t_DataType>
void gbmv(const unsigned int p_m, const unsigned int p_n, const unsigned int p_kl, const unsigned int p_ku, hls::stream<WideType<t_DataType, t_ParEntries>>& p_A, hls::stream<WideType<t_DataType, t_ParEntries>>& p_x, hls::stream<WideType<t_MacType, t_ParEntries>>& p_y);

template  <typename t_DataType, unsigned int t_ParEntries, unsigned int t_MaxRows, typename t_IndexType = unsigned int, typename t_MacType = t_DataType>
void gbmv(const unsigned int p_m, const unsigned int p_n, const unsigned int p_kl, const unsigned int p_ku, const t_DataType p_alpha, hls::stream<WideType<t_DataType, t_ParEntries>>& p_M, hls::stream<WideType<t_DataType, t_ParEntries>>& p_x, const t_DataType p_beta, hls::stream<WideType<t_DataType, t_ParEntries>>& p_y, hls::stream<WideType<t_DataType, t_ParEntries>>& p_yr);

template  <typename t_DataType, unsigned int t_LogParEntries, unsigned int t_NumStreams = (1 << t_LogParEntries), typename t_IndexType = unsigned int>
void gemv(const unsigned int p_m, const unsigned int p_n, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>> p_M [t_NumStreams], hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>> p_x [t_NumStreams], hls::stream<WideType<t_DataType, t_NumStreams>>& p_y);

template  <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType = unsigned int>
void gemv(const unsigned int p_m, const unsigned int p_n, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_M, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_x, hls::stream<WideType<t_DataType, 1>>& p_y);

template  <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType = unsigned int>
void gemv(const unsigned int p_m, const unsigned int p_n, const t_DataType p_alpha, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_M, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_x, const t_DataType p_beta, hls::stream<WideType<t_DataType, 1>>& p_y, hls::stream<WideType<t_DataType, 1>>& p_yr);

template  <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType = unsigned int>
void nrm2(unsigned int p_n, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_x, t_DataType& p_res);

template  <typename t_DataType, unsigned int t_ParEntries, typename t_IndexType = unsigned int>
void scal(unsigned int p_n, t_DataType p_alpha, hls::stream<WideType<t_DataType, t_ParEntries>>& p_x, hls::stream<WideType<t_DataType, t_ParEntries>>& p_res);

template  <typename t_DataType, unsigned int t_ParEntries, typename t_IndexType = unsigned int>
void swap(unsigned int p_n, hls::stream<WideType<t_DataType, t_ParEntries>>& p_x, hls::stream<WideType<t_DataType, t_ParEntries>>& p_y, hls::stream<WideType<t_DataType, t_ParEntries>>& p_xRes, hls::stream<WideType<t_DataType, t_ParEntries>>& p_yRes);

template  <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType = unsigned int>
void symv(const unsigned int p_n);

template  <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType = unsigned int>
void symv(const unsigned int p_n, const t_DataType p_alpha, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_M, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_x, const t_DataType p_beta, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_y, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_yr);

template  <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType = unsigned int, typename t_MacType = t_DataType>
void trmv(const bool uplo, const unsigned int p_n);

template  <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType = unsigned int>
void trmv(const bool uplo, const unsigned int p_n, const t_DataType p_alpha, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_M, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_x, const t_DataType p_beta, hls::stream<WideType<t_DataType, 1>>& p_y, hls::stream<WideType<t_DataType, 1>>& p_yr);

} // namespace blas

Detailed Documentation

Global Functions

amax

template  <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType>
void amax(unsigned int p_n, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_x, t_IndexType& p_result)

amax function that returns the position of the vector element that has the maximum magnitude.

Parameters:

t_DataType

the data type of the vector entries

t_LogParEntries

log2 of the number of parallelly processed entries in the input vector

t_IndexType

the datatype of the index

p_n

the number of entries in the input vector p_x, p_n % l_ParEntries == 0

p_x

the input stream of packed vector entries

p_result

the resulting index, which is 0 if p_n <= 0

amin

template  <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType>
void amin(unsigned int p_n, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_x, t_IndexType& p_result)

amin function that returns the position of the vector element that has the minimum magnitude.

Parameters:

t_DataType

the data type of the vector entries

t_LogParEntries

log2 of the number of parallelly processed entries in the input vector

t_IndexType

the datatype of the index

p_n

the number of entries in the input vector p_x, p_n % l_ParEntries == 0

p_x

the input stream of packed vector entries

p_result

the resulting index, which is 0 if p_n <= 0

asum

template  <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType = unsigned int>
void asum(unsigned int p_n, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_x, t_DataType& p_sum)

asum function that returns the sum of the magnitude of vector elements.

Parameters:

t_DataType

the data type of the vector entries

t_LogParEntries

log2 of the number of parallelly processed entries in the input vector

t_IndexType

the datatype of the index

p_n

the number of entries in the input vector p_x, p_n % l_ParEntries == 0

p_x

the input stream of packed vector entries

p_sum

the sum, which is 0 if p_n <= 0

axpy

template  <typename t_DataType, unsigned int t_ParEntries, typename t_IndexType = unsigned int>
void axpy(unsigned int p_n, const t_DataType p_alpha, hls::stream<WideType<t_DataType, t_ParEntries>>& p_x, hls::stream<WideType<t_DataType, t_ParEntries>>& p_y, hls::stream<WideType<t_DataType, t_ParEntries>>& p_r)

axpy function that compute Y = alpha*X + Y.

Parameters:

t_DataType

the data type of the vector entries

t_LogParEntries

log2 of the number of parallelly processed entries in the input vector

t_IndexType

the datatype of the index

p_n

the number of entries in the input vector p_x, p_n % t_ParEntries == 0

p_x

the input stream of packed entries of vector X

p_y

the input stream of packed entries of vector Y

p_r

the output stream of packed entries of result vector Y

copy

template  <typename t_DataType, unsigned int t_ParEntries, typename t_IndexType = unsigned int>
void copy(unsigned int p_n, hls::stream<WideType<t_DataType, t_ParEntries>>& p_x, hls::stream<WideType<t_DataType, t_ParEntries>>& p_y)

copy function that compute Y = X

Parameters:

t_DataType

the data type of the vector entries

t_ParEntries

number of parallelly processed entries in the packed input vector stream

t_IndexType

the datatype of the index

p_n

the number of entries in vector X and Y

p_x

the packed input vector stream

p_y

the packed output vector stream

dot

template  <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType = unsigned int>
void dot(unsigned int p_n, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_x, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_y, t_DataType& p_res)

dot function that returns the dot product of vector x and y.

Parameters:

t_DataType

the data type of the vector entries

t_LogParEntries

log2 of the number of parallelly processed entries in the input vector

t_IndexType

the datatype of the index

p_n

the number of entries in the input vector p_x, p_n % l_ParEntries == 0

p_x

the input stream of packed vector entries

p_res

the dot product of x and y

gbmv

template  <typename t_DataType, unsigned int t_ParEntries, unsigned int t_MaxRows, typename t_IndexType = unsigned int, typename t_MacType = t_DataType>
void gbmv(const unsigned int p_m, const unsigned int p_n, const unsigned int p_kl, const unsigned int p_ku, const t_DataType p_alpha, hls::stream<WideType<t_DataType, t_ParEntries>>& p_M, hls::stream<WideType<t_DataType, t_ParEntries>>& p_x, const t_DataType p_beta, hls::stream<WideType<t_DataType, t_ParEntries>>& p_y, hls::stream<WideType<t_DataType, t_ParEntries>>& p_yr)

gbmv function performs general banded matrix-vector multiplication matrix and a vector y = alpha * M * x + beta * y

Parameters:

t_DataType

the data type of the vector entries

t_ParEntries

the number of parallelly processed entries in the input vector

t_MaxRows

the maximum size of buffers for output vector

t_IndexType

the datatype of the index

t_MacType

the datatype of the output stream

p_m

the number of rows of input matrix p_M

p_alpha

scalar alpha

p_M

the input stream of packed Matrix entries

p_x

the input stream of packed vector entries

p_beta

scalar beta

p_y

the output vector

gemv

template  <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType = unsigned int>
void gemv(const unsigned int p_m, const unsigned int p_n, const t_DataType p_alpha, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_M, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_x, const t_DataType p_beta, hls::stream<WideType<t_DataType, 1>>& p_y, hls::stream<WideType<t_DataType, 1>>& p_yr)

gemv function that returns the result vector of the multiplication of a matrix and a vector y = alpha * M * x + beta * y

Parameters:

t_DataType

the data type of the vector entries

t_LogParEntries

log2 of the number of parallelly processed entries in the input vector

t_IndexType

the datatype of the index

p_m

the number of rows of input matrix p_M

p_n

the number of cols of input matrix p_M, as well as the number of entries in the input vector p_x, p_n % l_ParEntries == 0

p_alpha

scalar alpha

p_M

the input stream of packed Matrix entries

p_x

the input stream of packed vector entries

p_beta

scalar beta

p_y

the output vector

nrm2

template  <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType = unsigned int>
void nrm2(unsigned int p_n, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_x, t_DataType& p_res)

nrm2 function that returns the Euclidean norm of the vector x.

Parameters:

t_DataType

the data type of the vector entries

t_LogParEntries

log2 of the number of parallelly processed entries in the input vector

t_IndexType

the datatype of the index

p_n

the number of entries in the input vector p_x, p_n % (1<<l_LogParEntries) == 0

p_x

the input stream of packed vector entries

p_res

the nrm2 of x

scal

template  <typename t_DataType, unsigned int t_ParEntries, typename t_IndexType = unsigned int>
void scal(unsigned int p_n, t_DataType p_alpha, hls::stream<WideType<t_DataType, t_ParEntries>>& p_x, hls::stream<WideType<t_DataType, t_ParEntries>>& p_res)

scal function that compute X = alpha * X

Parameters:

t_DataType

the data type of the vector entries

t_ParEntries

number of parallelly processed entries in the packed input vector stream

t_IndexType

the datatype of the index

p_n

the number of entries in vector X, p_n % t_ParEntries == 0

p_x

the packed input vector stream

p_res

the packed output vector stream

swap

template  <typename t_DataType, unsigned int t_ParEntries, typename t_IndexType = unsigned int>
void swap(unsigned int p_n, hls::stream<WideType<t_DataType, t_ParEntries>>& p_x, hls::stream<WideType<t_DataType, t_ParEntries>>& p_y, hls::stream<WideType<t_DataType, t_ParEntries>>& p_xRes, hls::stream<WideType<t_DataType, t_ParEntries>>& p_yRes)

swap function that swap vector x and y

Parameters:

t_DataType

the data type of the vector entries

t_ParEntries

number of parallelly processed entries in the packed input vector stream

t_IndexType

the datatype of the index

p_n

the number of entries in vector X and Y, p_n % t_ParEntries == 0

p_x

the packed input vector stream

p_y

the packed input vector stream

p_xRes

the packed output stream

p_yRes

the packed output stream

symv

template  <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType = unsigned int>
void symv(const unsigned int p_n, const t_DataType p_alpha, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_M, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_x, const t_DataType p_beta, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_y, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_yr)

symv function that returns the result vector of the multiplication of a symmetric matrix and a vector y = alpha * M * x + beta * y

Parameters:

t_DataType

the data type of the vector entries

t_LogParEntries

log2 of the number of parallelly processed entries in the input vector

t_IndexType

the datatype of the index

p_n

the dimention of input matrix p_M, as well as the number of entries in the input vector p_x, p_n % l_ParEntries == 0

p_alpha

scalar

alpha

p_M

the input stream of packed Matrix entries

p_x

the input stream of packed vector entries

p_beta

scalar

beta

p_y

the output vector

trmv

template  <typename t_DataType, unsigned int t_LogParEntries, typename t_IndexType = unsigned int>
void trmv(const bool uplo, const unsigned int p_n, const t_DataType p_alpha, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_M, hls::stream<WideType<t_DataType,(1<<t_LogParEntries)>>& p_x, const t_DataType p_beta, hls::stream<WideType<t_DataType, 1>>& p_y, hls::stream<WideType<t_DataType, 1>>& p_yr)

trmv function that returns the result vector of the multiplication of a triangular matrix and a vector y = alpha * M * x + beta * y

Parameters:

t_DataType

the data type of the vector entries

t_LogParEntries

log2 of the number of parallelly processed entries in the input vector

t_IndexType

the datatype of the index

p_n

the number of cols of input matrix p_M, as well as the number of entries in the input vector p_x, p_n % l_ParEntries == 0

p_alpha

scalar

alpha

p_M

the input stream of packed Matrix entries

p_x

the input stream of packed vector entries

p_beta

scalar

beta

p_y

the output vector