namespace blas

// classes

template <typename T>
class BitConv

template <unsigned int W>
class BoolArr

template <
    typename t_FloatType,
    unsigned int t_MemWidth,
    unsigned int t_MemWidthBits
class MemUtil

template <typename t_DataType>
class SpmA

template <typename t_DataType>
class SpmC

template <
    typename t_DataType,
    unsigned int t_MemWidth,
    unsigned int t_IndexWidth,
    unsigned int t_MaxK,
    unsigned int t_MaxM,
    unsigned int t_MaxNnz
class Spmv

template <
    typename TS,
    typename TD
class WideConv

template <
    typename T,
    unsigned int t_Width,
    unsigned int t_DataWidth = sizeof(T) * 8
class WideType


#include "xf_fintech/dimv.hpp"
template <
    typename t_DataType,
    unsigned int t_N,
    unsigned int t_NumDiag,
    unsigned int t_EntriesInParallel
void dimv (
    t_DataType p_in [t_N][t_NumDiag],
    t_DataType p_inV [t_N],
    unsigned int p_n,
    t_DataType p_outV [t_N]

Diagonal matrix - vector multiplication, preCondition matrix is square matrix and p_N is multiple of p_NumDiag and t_NumDiag>1, preCondition t_NumDiag > 1 && t_NumDiag == odd_number && t_EntriesInParallel > t_NumDiag/2.


t_DataType data type
t_N maximum number of entries alogn diagonal line
t_NumDiag number of diagonal lines indexed low to up, 3: tridiagonal; 5: pentadiagonal; 7:heptadiagonal
t_EntriesInParallel number of entries in each vector processed in parallel
p_in input diagonal matrix
p_inV input vector
p_n number of entries along diagonal line, must be multiple of t_EntriesInParallel
p_outV output vector