L2 Kernel APIs

// namespaces

namespace xf
    namespace xf::hpc
        namespace xf::hpc::cg
        namespace xf::hpc::rtm

// typedefs

typedef xf::blas::WideType <CG_dataType, CG_parEntries> CG_wideType
typedef CG_wideType::t_TypeInt CG_interface
typedef xf::blas::WideType <CG_dataType, CG_parEntries> CG_wideType
typedef CG_wideType::t_TypeInt CG_interface
typedef xf::blas::WideType <CG_dataType, CG_vecParEntries> CG_vecType
typedef CG_vecType::t_TypeInt CG_vecInterface
typedef hls::stream <uint32_t> CG_paramStrType
typedef hls::stream <ap_uint <32*CG_numChannels>> CG_wideParamStrType
typedef hls::stream <CG_interface> CG_wideStrType
typedef hls::stream <ap_uint <SPARSE_dataBits>> CG_datStrType
typedef hls::stream <ap_uint <CG_tkStrWidth>> CG_tkStrType
typedef hls::stream <ap_uint <SPARSE_indexBits>> CG_idxStrType
typedef xf::blas::WideType <CG_dataType, CG_parEntries> CG_wideType
typedef CG_wideType::t_TypeInt CG_interface
typedef xf::blas::WideType <CG_dataType, CG_parEntries> CG_wideType
typedef CG_wideType::t_TypeInt CG_interface
typedef xf::blas::WideType <CG_dataType, CG_vecParEntries> CG_vecType
typedef CG_vecType::t_TypeInt CG_vecInterface
typedef xf::blas::WideType <CG_dataType, CG_vecParEntries> CG_wideType
typedef CG_wideType::t_TypeInt CG_interface
typedef xf::blas::WideType <CG_dataType, CG_vecParEntries> CG_wideType
typedef CG_wideType::t_TypeInt CG_interface
typedef xf::blas::WideType <CG_dataType, CG_vecParEntries> CG_wideType
typedef CG_wideType::t_TypeInt CG_interface
typedef xf::blas::WideType <CG_dataType, CG_vecParEntries> CG_wideType
typedef CG_wideType::t_TypeInt CG_interface
typedef xf::blas::WideType <CG_dataType, CG_parEntries> CG_wideType
typedef CG_wideType::t_TypeInt CG_interface
typedef RTM2D <RTM_dataType, RTM_order, RTM_maxDim, RTM_MaxB, RTM_nPE> RTM_TYPE
typedef RTM_TYPE::t_PairInType RTM_pairType
typedef RTM_TYPE::t_InType RTM_vtType
typedef RTM_TYPE::t_UpbInType RTM_upbType
typedef xf::blas::WideType <RTM_dataType, RTM_parEntries> RTM_wideType
typedef RTM_wideType::t_TypeInt RTM_interface
typedef RTM2D <RTM_dataType, RTM_order, RTM_maxDim, RTM_MaxB, RTM_nPE> RTM_TYPE
typedef RTM_TYPE::t_PairInType RTM_pairType
typedef RTM_TYPE::t_InType RTM_vtType
typedef RTM_TYPE::t_UpbInType RTM_upbType
typedef xf::blas::WideType <RTM_dataType, RTM_parEntries> RTM_wideType
typedef RTM_wideType::t_TypeInt RTM_interface
typedef Domain3D <RTM_maxZ, RTM_maxY, RTM_order/2, RTM_nPEZ, RTM_nPEX, RTM_numFSMs> DOMAIN_TYPE
typedef RTM3D <DOMAIN_TYPE, RTM_dataType, RTM_order, RTM_maxZ, RTM_maxY, RTM_MaxB, RTM_nPEZ, RTM_nPEX> RTM_TYPE
typedef RTM_TYPE::t_InType RTM_type
typedef RTM_TYPE::t_UpbInType RTM_upbType
typedef Domain3D <RTM_maxZ, RTM_maxY, RTM_order/2, RTM_nPEZ, RTM_nPEX, RTM_numFSMs> DOMAIN_TYPE
typedef RTM3D <DOMAIN_TYPE, RTM_dataType, RTM_order, RTM_maxZ, RTM_maxY, RTM_MaxB, RTM_nPEZ, RTM_nPEX> RTM_TYPE
typedef RTM_TYPE::t_InType RTM_type

// macros

#define AP_CTRL_NONE(NAME)
#define AXIS(NAME)

#define POINTER( \
    NAME, \
    BUNDLE \
    )

#define PRAGMA_HLS(x)
#define SCALAR(NAME)

Global Functions

krnl_control

#include "cgSolver/krnl_control.hpp"
void krnl_control (
    CG_interface* p_instr,
    hls::stream <uint8_t>& p_signal,
    hls::stream <uint64_t>& p_clock,
    hls::stream <ap_uint <CG_tkStrWidth>>& p_tokenIn,
    hls::stream <ap_uint <CG_tkStrWidth>>& p_tokenOut
    )

krnl_control kernel function to load instructions and control the cg solver

Parameters:

p_instr the memory address to instructions

krnl_duplicate

#include "cgSolver/krnl_duplicate.hpp"
void krnl_duplicate (
    hls::stream <ap_uint <CG_tkStrWidth>>& p_tokenIn,
    hls::stream <ap_uint <CG_tkStrWidth>>& p_tokenX,
    hls::stream <ap_uint <CG_tkStrWidth>>& p_tokenR
    )

krnl_duplicate kernel function to compute A * p

Parameters:

p_A the memory address to vector A
p_pk the input memory address to vector pk
p_Apk the output memory address to vector Apk
p_tokenIn input stream carries the token for execution
p_tokenOut output stream carries the token for execution

krnl_gemv

#include "cgSolver/krnl_gemv.hpp"
void krnl_gemv (
    CG_interface* p_A0,
    CG_interface* p_pk,
    CG_vecInterface* p_pkc,
    CG_vecInterface* p_Apk,
    hls::stream <ap_uint <CG_tkStrWidth>>& p_tokenInA,
    hls::stream <ap_uint <CG_tkStrWidth>>& p_tokenOut
    )

krnl_gemv kernel function to compute A * p

Parameters:

p_A the memory address to vector A
p_pk the input memory address to vector pk
p_Apk the output memory address to vector Apk
p_tokenIn input stream carries the token for execution
p_tokenOut output stream carries the token for execution

krnl_timer

#include "cgSolver/krnl_timer.hpp"
void krnl_timer (
    hls::stream <xf::hpc::Signal_t>& p_signal,
    hls::stream <xf::hpc::Clock_t>& p_clock
    )

krnl_timer kernel function to count clock cycles

Parameters:

p_signal the input signal stream
p_clock the output clock stream

krnl_update_pk

#include "cgSolver/krnl_update_pk.hpp"
void krnl_update_pk (
    CG_interface* p_rk,
    CG_interface* p_pk_in,
    CG_interface* p_pk_out,
    hls::stream <ap_uint <CG_tkStrWidth>>& p_tokenIn,
    hls::stream <ap_uint <CG_tkStrWidth>>& p_tokenOut
    )

krnl_update_pk kernel function to update vector pk

Parameters:

p_rk the memory address to vector rk
p_pk_in the input memory address to vector pk
p_pk_out the output memory address to vector pk
p_tokenIn input stream carries the token for execution
p_tokenOut output stream carries the token for execution

krnl_update_rk

#include "cgSolver/krnl_update_rk.hpp"
void krnl_update_rk (
    CG_interface* p_rk_in,
    CG_interface* p_rk_out,
    CG_interface* p_Apk,
    hls::stream <ap_uint <CG_tkStrWidth>>& p_tokenIn,
    hls::stream <ap_uint <CG_tkStrWidth>>& p_tokenOut
    )

krnl_update_xr kernel function to update the vector xk and rk

Parameters:

p_rk_in the input memory address to vector rk
p_rk_out the output memory address to vector rk
p_Apk the memory address to vector Apk
p_tokenIn input stream carries the token for execution
p_tokenOut output stream carries the token for execution

krnl_update_rk_jacobi

#include "cgSolver/krnl_update_rk_jacobi.hpp"
void krnl_update_rk_jacobi (
    CG_interface* p_rk_in,
    CG_interface* p_rk_out,
    CG_interface* p_zk,
    CG_interface* p_jacobi,
    CG_interface* p_Apk,
    hls::stream <ap_uint <CG_tkStrWidth>>& p_tokenIn,
    hls::stream <ap_uint <CG_tkStrWidth>>& p_tokenOut
    )

krnl_update_xr kernel function to update the vector xk and rk

Parameters:

p_rk_in the input memory address to vector rk
p_rk_out the output memory address to vector rk
p_Apk the memory address to vector Apk
p_tokenIn input stream carries the token for execution
p_tokenOut output stream carries the token for execution

krnl_update_xr

krnl_update_xr overload (1)

#include "cgSolver/krnl_update_xk.hpp"
void krnl_update_xr (
    CG_interface* p_xk_in,
    CG_interface* p_xk_out,
    CG_interface* p_pk,
    hls::stream <ap_uint <CG_tkStrWidth>>& p_tokenIn
    )

krnl_update_xr kernel function to update the vector xk and rk

Parameters:

p_xk_in the input memory address to vector xk
p_xk_out the output memory address to vector xk
p_pk the memory address to vector pk
p_tokenIn input stream carries the token for execution

krnl_update_xr overload (2)

#include "cgSolver/krnl_update_xr.hpp"
void krnl_update_xr (
    CG_interface* p_xk_in,
    CG_interface* p_xk_out,
    CG_interface* p_rk_in,
    CG_interface* p_rk_out,
    CG_interface* p_pk,
    CG_interface* p_Apk,
    hls::stream <ap_uint <CG_tkStrWidth>>& p_tokenIn,
    hls::stream <ap_uint <CG_tkStrWidth>>& p_tokenOut
    )

krnl_update_xr kernel function to update the vector xk and rk

Parameters:

p_xk_in the input memory address to vector xk
p_xk_out the output memory address to vector xk
p_rk_in the input memory address to vector rk
p_rk_out the output memory address to vector rk
p_pk the memory address to vector pk
p_tokenIn input stream carries the token for execution
p_tokenOut output stream carries the token for execution

fcnKernel

#include "mlp/fcnKernel.hpp"
void fcnKernel (
    DdrIntType* p_DdrRd,
    DdrIntType* p_DdrWr
    )

fcnKernel defines the kernel top function, with DDR/HBM as an interface

Parameters:

p_DdrRd is DDR/HBM memory address used for read
p_DdrWr is DDR/HBM memory address used for write

rtmbackward

#include "rtm2d/rtmbackward.hpp"
void rtmbackward (
    const unsigned int p_z,
    const unsigned int p_x,
    const unsigned int p_t,
    const unsigned int p_recz,
    const RTM_dataType* p_rec,
    const RTM_dataType* p_coefz,
    const RTM_dataType* p_coefx,
    const RTM_dataType* p_taperz,
    const RTM_dataType* p_taperx,
    const RTM_interface* p_v2dt2,
    RTM_interface* p_p0,
    RTM_interface* p_p1,
    RTM_interface* p_r0,
    RTM_interface* p_r1,
    RTM_interface* p_i0,
    RTM_interface* p_i1,
    RTM_upbType* p_upb
    )

rfmbackward kernel function

Parameters:

p_z is the number of grids along detecting depth
p_x is the number of grids along detecting width
p_t is the number of detecting time parititons
p_recz is the z coordinates of all receivers
p_rec is the receiver data wavefileds
p_coefz is the laplacian z-direction coefficients
p_coefx is the laplacian x-direction coefficients
p_taperz is the absorbing factor along z
p_taperx is the absorbing factor along x
p_v2dt2 is the velocity model v^2 * dt^2
p_upb is the uppper bounday wavefiled
p_p0 is the first input memory of source wavefield
p_p1 is the second input memory of source wavefield
p_r0 is the first input memory of receiver wavefield
p_r1 is the second input memory of receiver wavefield
p_i0 is the first input memory of cross-correlation images
p_i1 is the second input memory of cross-correlation images

rtmforward

rtmforward overload (1)

#include "rtm2d/rtmforward.hpp"
void rtmforward (
    const unsigned int p_z,
    const unsigned int p_x,
    const unsigned int p_t,
    const unsigned int p_srcz,
    const unsigned int p_srcx,
    const RTM_dataType* p_src,
    const RTM_dataType* p_coefz,
    const RTM_dataType* p_coefx,
    const RTM_dataType* p_taperz,
    const RTM_dataType* p_taperx,
    const RTM_interface* p_v2dt2,
    RTM_interface* p_p0,
    RTM_interface* p_p1,
    RTM_upbType* p_upb
    )

rfmforward kernel function

Parameters:

p_z is the number of grids along detecting depth
p_x is the number of grids along detecting width
p_t is the number of detecting time parititons
p_srcz is the source z coordinate
p_srcx is the source x coordinate
p_src is the source wavefiled
p_coefz is the laplacian z-direction coefficients
p_coefx is the laplacian x-direction coefficients
p_taperz is the absorbing factor along z
p_taperx is the absorbing factor along x
p_v2dt2 is the velocity model v^2 * dt^2
p_p0 is the first input memory of source wavefield
p_p1 is the second input memory of source wavefield
p_upb is the uppper bounday wavefiled

rtmforward overload (2)

#include "rtm3d/rtmforward_hbc.hpp"
void rtmforward (
    const unsigned int p_z,
    const unsigned int p_y,
    const unsigned int p_x,
    const unsigned int p_t,
    const unsigned int p_srcz,
    const unsigned int p_srcy,
    const unsigned int p_srcx,
    const RTM_dataType* p_src,
    const RTM_dataType* p_coefz,
    const RTM_dataType* p_coefy,
    const RTM_dataType* p_coefx,
    const RTM_dataType* p_taperz,
    const RTM_dataType* p_tapery,
    const RTM_dataType* p_taperx,
    const RTM_type* p_v2dt2,
    RTM_type* p_pi0,
    RTM_type* p_pi1,
    RTM_type* p_po0,
    RTM_type* p_po1,
    RTM_type* p_ppi0,
    RTM_type* p_ppi1,
    RTM_type* p_ppo0,
    RTM_type* p_ppo1,
    RTM_upbType* p_upb
    )

rfmforward kernel function

Parameters:

p_z is the number of grids along z
p_y is the number of grids along y
p_x is the number of grids along x
p_t is the number of detecting time parititons
p_srcz is the source z coordinate
p_srcy is the source y coordinate
p_srcx is the source x coordinate
p_src is the source wavefiled
p_coefz is the laplacian z-direction coefficients
p_coefy is the laplacian y-direction coefficients
p_coefx is the laplacian x-direction coefficients
p_taperz is the absorbing factor along z
p_tapery is the absorbing factor along y
p_taperx is the absorbing factor along x
p_v2dt2 is the velocity model v^2 * dt^2
p_pi0 the first input memory of pressure wavefield at t-1
p_pi1 the second input memory of pressure wavefield at t-1
p_po0 the first output memory of pressure wavefield at t-1
p_po1 the second output memory of pressure wavefield at t-1
p_ppi0 the first input memory of pressure wavefield at t-2
p_ppi1 the second input memory of pressure wavefield at t-2
p_ppo0 the first output memory of pressure wavefield at t-2
p_ppo1 the second output memory of pressure wavefield at t-2
p_upb is the uppper bounday wavefiled

rtmforward overload (3)

#include "rtm3d/rtmforward_rbc.hpp"
void rtmforward (
    const unsigned int p_z,
    const unsigned int p_y,
    const unsigned int p_x,
    const unsigned int p_t,
    const unsigned int p_srcz,
    const unsigned int p_srcy,
    const unsigned int p_srcx,
    const RTM_dataType* p_src,
    const RTM_dataType* p_coefz,
    const RTM_dataType* p_coefy,
    const RTM_dataType* p_coefx,
    const RTM_type* p_v2dt2,
    RTM_type* p_pi0,
    RTM_type* p_pi1,
    RTM_type* p_po0,
    RTM_type* p_po1,
    RTM_type* p_ppi0,
    RTM_type* p_ppi1,
    RTM_type* p_ppo0,
    RTM_type* p_ppo1
    )

rfmforward kernel function

Parameters:

p_z is the number of grids along z
p_y is the number of grids along y
p_x is the number of grids along x
p_t is the number of detecting time parititons
p_srcz is the source z coordinate
p_srcy is the source y coordinate
p_srcx is the source x coordinate
p_src is the source wavefiled
p_coefz is the laplacian z-direction coefficients
p_coefy is the laplacian y-direction coefficients
p_coefx is the laplacian x-direction coefficients
p_v2dt2 is the velocity model v^2 * dt^2
p_pi0 is the first input memory of pressure wavefield at t-1
p_pi1 is the second input memory of pressure wavefield at t-1
p_po0 is the first output memory of pressure wavefield at t-1
p_po1 is the second output memory of pressure wavefield at t-1
p_ppi0 is the first input memory of pressure wavefield at t-2
p_ppi1 is the second input memory of pressure wavefield at t-2
p_ppo0 is the first output memory of pressure wavefield at t-2
p_ppo1 is the second output memory of pressure wavefield at t-2

streamTimer

#include "streamTimer.hpp"
void streamTimer (
    hls::stream <xf::hpc::Signal_t>& p_signal,
    hls::stream <uint64_t>& p_clock
    )

streamTimer kernel function to count clock cycles

Parameters:

p_signal the input signal stream
p_clock the output clock stream