L2 Kernel APIs¶

// namespaces

namespace xf
    namespace xf::hpc
        namespace xf::hpc::cg
        namespace xf::hpc::rtm

// typedefs

typedef xf::blas::WideType <CG_dataType, CG_parEntries> CG_wideType
typedef CG_wideType::t_TypeInt CG_interface
typedef xf::blas::WideType <CG_dataType, CG_parEntries> CG_wideType
typedef CG_wideType::t_TypeInt CG_interface
typedef xf::blas::WideType <CG_dataType, CG_vecParEntries> CG_vecType
typedef CG_vecType::t_TypeInt CG_vecInterface
typedef hls::stream <uint32_t> CG_paramStrType
typedef hls::stream <ap_uint <32*CG_numChannels>> CG_wideParamStrType
typedef hls::stream <CG_interface> CG_wideStrType
typedef hls::stream <ap_uint <SPARSE_dataBits>> CG_datStrType
typedef hls::stream <ap_uint <CG_tkStrWidth>> CG_tkStrType
typedef hls::stream <ap_uint <SPARSE_indexBits>> CG_idxStrType
typedef xf::blas::WideType <CG_dataType, CG_parEntries> CG_wideType
typedef CG_wideType::t_TypeInt CG_interface
typedef xf::blas::WideType <CG_dataType, CG_parEntries> CG_wideType
typedef CG_wideType::t_TypeInt CG_interface
typedef xf::blas::WideType <CG_dataType, CG_vecParEntries> CG_vecType
typedef CG_vecType::t_TypeInt CG_vecInterface
typedef xf::blas::WideType <CG_dataType, CG_vecParEntries> CG_wideType
typedef CG_wideType::t_TypeInt CG_interface
typedef xf::blas::WideType <CG_dataType, CG_vecParEntries> CG_wideType
typedef CG_wideType::t_TypeInt CG_interface
typedef xf::blas::WideType <CG_dataType, CG_vecParEntries> CG_wideType
typedef CG_wideType::t_TypeInt CG_interface
typedef xf::blas::WideType <CG_dataType, CG_vecParEntries> CG_wideType
typedef CG_wideType::t_TypeInt CG_interface
typedef xf::blas::WideType <CG_dataType, CG_parEntries> CG_wideType
typedef CG_wideType::t_TypeInt CG_interface
typedef RTM2D <RTM_dataType, RTM_order, RTM_maxDim, RTM_MaxB, RTM_nPE> RTM_TYPE
typedef RTM_TYPE::t_PairInType RTM_pairType
typedef RTM_TYPE::t_InType RTM_vtType
typedef RTM_TYPE::t_UpbInType RTM_upbType
typedef xf::blas::WideType <RTM_dataType, RTM_parEntries> RTM_wideType
typedef RTM_wideType::t_TypeInt RTM_interface
typedef RTM2D <RTM_dataType, RTM_order, RTM_maxDim, RTM_MaxB, RTM_nPE> RTM_TYPE
typedef RTM_TYPE::t_PairInType RTM_pairType
typedef RTM_TYPE::t_InType RTM_vtType
typedef RTM_TYPE::t_UpbInType RTM_upbType
typedef xf::blas::WideType <RTM_dataType, RTM_parEntries> RTM_wideType
typedef RTM_wideType::t_TypeInt RTM_interface
typedef Domain3D <RTM_maxZ, RTM_maxY, RTM_order/2, RTM_nPEZ, RTM_nPEX, RTM_numFSMs> DOMAIN_TYPE
typedef RTM3D <DOMAIN_TYPE, RTM_dataType, RTM_order, RTM_maxZ, RTM_maxY, RTM_MaxB, RTM_nPEZ, RTM_nPEX> RTM_TYPE
typedef RTM_TYPE::t_InType RTM_type
typedef RTM_TYPE::t_UpbInType RTM_upbType
typedef Domain3D <RTM_maxZ, RTM_maxY, RTM_order/2, RTM_nPEZ, RTM_nPEX, RTM_numFSMs> DOMAIN_TYPE
typedef RTM3D <DOMAIN_TYPE, RTM_dataType, RTM_order, RTM_maxZ, RTM_maxY, RTM_MaxB, RTM_nPEZ, RTM_nPEX> RTM_TYPE
typedef RTM_TYPE::t_InType RTM_type

// macros

#define AP_CTRL_NONE(NAME)
#define AXIS(NAME)

#define POINTER( \
    NAME, \
    BUNDLE \
    )

#define PRAGMA_HLS(x)
#define SCALAR(NAME)

Global Functions¶

krnl_control¶

#include "cgSolver/krnl_control.hpp"

void krnl_control (
    CG_interface* p_instr,
    hls::stream <uint8_t>& p_signal,
    hls::stream <uint64_t>& p_clock,
    hls::stream <ap_uint <CG_tkStrWidth>>& p_tokenIn,
    hls::stream <ap_uint <CG_tkStrWidth>>& p_tokenOut
    )

krnl_control kernel function to load instructions and control the cg solver

Parameters:

p_instr

the memory address to instructions

krnl_duplicate¶

#include "cgSolver/krnl_duplicate.hpp"

void krnl_duplicate (
    hls::stream <ap_uint <CG_tkStrWidth>>& p_tokenIn,
    hls::stream <ap_uint <CG_tkStrWidth>>& p_tokenX,
    hls::stream <ap_uint <CG_tkStrWidth>>& p_tokenR
    )

krnl_duplicate kernel function to compute A * p

Parameters:

p_A	the memory address to vector A
p_pk	the input memory address to vector pk
p_Apk	the output memory address to vector Apk
p_tokenIn	input stream carries the token for execution
p_tokenOut	output stream carries the token for execution

krnl_gemv¶

#include "cgSolver/krnl_gemv.hpp"

void krnl_gemv (
    CG_interface* p_A0,
    CG_interface* p_pk,
    CG_vecInterface* p_pkc,
    CG_vecInterface* p_Apk,
    hls::stream <ap_uint <CG_tkStrWidth>>& p_tokenInA,
    hls::stream <ap_uint <CG_tkStrWidth>>& p_tokenOut
    )

krnl_gemv kernel function to compute A * p

Parameters:

p_A	the memory address to vector A
p_pk	the input memory address to vector pk
p_Apk	the output memory address to vector Apk
p_tokenIn	input stream carries the token for execution
p_tokenOut	output stream carries the token for execution

krnl_timer¶

#include "cgSolver/krnl_timer.hpp"

void krnl_timer (
    hls::stream <xf::hpc::Signal_t>& p_signal,
    hls::stream <xf::hpc::Clock_t>& p_clock
    )

krnl_timer kernel function to count clock cycles

Parameters:

p_signal	the input signal stream
p_clock	the output clock stream

krnl_update_pk¶

#include "cgSolver/krnl_update_pk.hpp"

void krnl_update_pk (
    CG_interface* p_rk,
    CG_interface* p_pk_in,
    CG_interface* p_pk_out,
    hls::stream <ap_uint <CG_tkStrWidth>>& p_tokenIn,
    hls::stream <ap_uint <CG_tkStrWidth>>& p_tokenOut
    )

krnl_update_pk kernel function to update vector pk

Parameters:

p_rk	the memory address to vector rk
p_pk_in	the input memory address to vector pk
p_pk_out	the output memory address to vector pk
p_tokenIn	input stream carries the token for execution
p_tokenOut	output stream carries the token for execution

krnl_update_rk¶

#include "cgSolver/krnl_update_rk.hpp"

void krnl_update_rk (
    CG_interface* p_rk_in,
    CG_interface* p_rk_out,
    CG_interface* p_Apk,
    hls::stream <ap_uint <CG_tkStrWidth>>& p_tokenIn,
    hls::stream <ap_uint <CG_tkStrWidth>>& p_tokenOut
    )

krnl_update_xr kernel function to update the vector xk and rk

Parameters:

p_rk_in	the input memory address to vector rk
p_rk_out	the output memory address to vector rk
p_Apk	the memory address to vector Apk
p_tokenIn	input stream carries the token for execution
p_tokenOut	output stream carries the token for execution

krnl_update_rk_jacobi¶

#include "cgSolver/krnl_update_rk_jacobi.hpp"

void krnl_update_rk_jacobi (
    CG_interface* p_rk_in,
    CG_interface* p_rk_out,
    CG_interface* p_zk,
    CG_interface* p_jacobi,
    CG_interface* p_Apk,
    hls::stream <ap_uint <CG_tkStrWidth>>& p_tokenIn,
    hls::stream <ap_uint <CG_tkStrWidth>>& p_tokenOut
    )

krnl_update_xr kernel function to update the vector xk and rk

Parameters:

p_rk_in	the input memory address to vector rk
p_rk_out	the output memory address to vector rk
p_Apk	the memory address to vector Apk
p_tokenIn	input stream carries the token for execution
p_tokenOut	output stream carries the token for execution

krnl_update_xr¶

krnl_update_xr overload (1)¶

#include "cgSolver/krnl_update_xk.hpp"

void krnl_update_xr (
    CG_interface* p_xk_in,
    CG_interface* p_xk_out,
    CG_interface* p_pk,
    hls::stream <ap_uint <CG_tkStrWidth>>& p_tokenIn
    )

krnl_update_xr kernel function to update the vector xk and rk

Parameters:

p_xk_in	the input memory address to vector xk
p_xk_out	the output memory address to vector xk
p_pk	the memory address to vector pk
p_tokenIn	input stream carries the token for execution

krnl_update_xr overload (2)¶

#include "cgSolver/krnl_update_xr.hpp"

void krnl_update_xr (
    CG_interface* p_xk_in,
    CG_interface* p_xk_out,
    CG_interface* p_rk_in,
    CG_interface* p_rk_out,
    CG_interface* p_pk,
    CG_interface* p_Apk,
    hls::stream <ap_uint <CG_tkStrWidth>>& p_tokenIn,
    hls::stream <ap_uint <CG_tkStrWidth>>& p_tokenOut
    )

krnl_update_xr kernel function to update the vector xk and rk

Parameters:

p_xk_in	the input memory address to vector xk
p_xk_out	the output memory address to vector xk
p_rk_in	the input memory address to vector rk
p_rk_out	the output memory address to vector rk
p_pk	the memory address to vector pk
p_tokenIn	input stream carries the token for execution
p_tokenOut	output stream carries the token for execution

fcnKernel¶

#include "mlp/fcnKernel.hpp"

void fcnKernel (
    DdrIntType* p_DdrRd,
    DdrIntType* p_DdrWr
    )

fcnKernel defines the kernel top function, with DDR/HBM as an interface

Parameters:

p_DdrRd	is DDR/HBM memory address used for read
p_DdrWr	is DDR/HBM memory address used for write

rtmbackward¶

#include "rtm2d/rtmbackward.hpp"

void rtmbackward (
    const unsigned int p_z,
    const unsigned int p_x,
    const unsigned int p_t,
    const unsigned int p_recz,
    const RTM_dataType* p_rec,
    const RTM_dataType* p_coefz,
    const RTM_dataType* p_coefx,
    const RTM_dataType* p_taperz,
    const RTM_dataType* p_taperx,
    const RTM_interface* p_v2dt2,
    RTM_interface* p_p0,
    RTM_interface* p_p1,
    RTM_interface* p_r0,
    RTM_interface* p_r1,
    RTM_interface* p_i0,
    RTM_interface* p_i1,
    RTM_upbType* p_upb
    )

rfmbackward kernel function

Parameters:

p_z	is the number of grids along detecting depth
p_x	is the number of grids along detecting width
p_t	is the number of detecting time parititons
p_recz	is the z coordinates of all receivers
p_rec	is the receiver data wavefileds
p_coefz	is the laplacian z-direction coefficients
p_coefx	is the laplacian x-direction coefficients
p_taperz	is the absorbing factor along z
p_taperx	is the absorbing factor along x
p_v2dt2	is the velocity model v^2 * dt^2
p_upb	is the uppper bounday wavefiled
p_p0	is the first input memory of source wavefield
p_p1	is the second input memory of source wavefield
p_r0	is the first input memory of receiver wavefield
p_r1	is the second input memory of receiver wavefield
p_i0	is the first input memory of cross-correlation images
p_i1	is the second input memory of cross-correlation images

rtmforward¶

rtmforward overload (1)¶

#include "rtm2d/rtmforward.hpp"

void rtmforward (
    const unsigned int p_z,
    const unsigned int p_x,
    const unsigned int p_t,
    const unsigned int p_srcz,
    const unsigned int p_srcx,
    const RTM_dataType* p_src,
    const RTM_dataType* p_coefz,
    const RTM_dataType* p_coefx,
    const RTM_dataType* p_taperz,
    const RTM_dataType* p_taperx,
    const RTM_interface* p_v2dt2,
    RTM_interface* p_p0,
    RTM_interface* p_p1,
    RTM_upbType* p_upb
    )

rfmforward kernel function

Parameters:

p_z	is the number of grids along detecting depth
p_x	is the number of grids along detecting width
p_t	is the number of detecting time parititons
p_srcz	is the source z coordinate
p_srcx	is the source x coordinate
p_src	is the source wavefiled
p_coefz	is the laplacian z-direction coefficients
p_coefx	is the laplacian x-direction coefficients
p_taperz	is the absorbing factor along z
p_taperx	is the absorbing factor along x
p_v2dt2	is the velocity model v^2 * dt^2
p_p0	is the first input memory of source wavefield
p_p1	is the second input memory of source wavefield
p_upb	is the uppper bounday wavefiled

rtmforward overload (2)¶

#include "rtm3d/rtmforward_hbc.hpp"

void rtmforward (
    const unsigned int p_z,
    const unsigned int p_y,
    const unsigned int p_x,
    const unsigned int p_t,
    const unsigned int p_srcz,
    const unsigned int p_srcy,
    const unsigned int p_srcx,
    const RTM_dataType* p_src,
    const RTM_dataType* p_coefz,
    const RTM_dataType* p_coefy,
    const RTM_dataType* p_coefx,
    const RTM_dataType* p_taperz,
    const RTM_dataType* p_tapery,
    const RTM_dataType* p_taperx,
    const RTM_type* p_v2dt2,
    RTM_type* p_pi0,
    RTM_type* p_pi1,
    RTM_type* p_po0,
    RTM_type* p_po1,
    RTM_type* p_ppi0,
    RTM_type* p_ppi1,
    RTM_type* p_ppo0,
    RTM_type* p_ppo1,
    RTM_upbType* p_upb
    )

rfmforward kernel function

Parameters:

p_z	is the number of grids along z
p_y	is the number of grids along y
p_x	is the number of grids along x
p_t	is the number of detecting time parititons
p_srcz	is the source z coordinate
p_srcy	is the source y coordinate
p_srcx	is the source x coordinate
p_src	is the source wavefiled
p_coefz	is the laplacian z-direction coefficients
p_coefy	is the laplacian y-direction coefficients
p_coefx	is the laplacian x-direction coefficients
p_taperz	is the absorbing factor along z
p_tapery	is the absorbing factor along y
p_taperx	is the absorbing factor along x
p_v2dt2	is the velocity model v^2 * dt^2
p_pi0	the first input memory of pressure wavefield at t-1
p_pi1	the second input memory of pressure wavefield at t-1
p_po0	the first output memory of pressure wavefield at t-1
p_po1	the second output memory of pressure wavefield at t-1
p_ppi0	the first input memory of pressure wavefield at t-2
p_ppi1	the second input memory of pressure wavefield at t-2
p_ppo0	the first output memory of pressure wavefield at t-2
p_ppo1	the second output memory of pressure wavefield at t-2
p_upb	is the uppper bounday wavefiled

rtmforward overload (3)¶

#include "rtm3d/rtmforward_rbc.hpp"

void rtmforward (
    const unsigned int p_z,
    const unsigned int p_y,
    const unsigned int p_x,
    const unsigned int p_t,
    const unsigned int p_srcz,
    const unsigned int p_srcy,
    const unsigned int p_srcx,
    const RTM_dataType* p_src,
    const RTM_dataType* p_coefz,
    const RTM_dataType* p_coefy,
    const RTM_dataType* p_coefx,
    const RTM_type* p_v2dt2,
    RTM_type* p_pi0,
    RTM_type* p_pi1,
    RTM_type* p_po0,
    RTM_type* p_po1,
    RTM_type* p_ppi0,
    RTM_type* p_ppi1,
    RTM_type* p_ppo0,
    RTM_type* p_ppo1
    )

rfmforward kernel function

Parameters:

p_z	is the number of grids along z
p_y	is the number of grids along y
p_x	is the number of grids along x
p_t	is the number of detecting time parititons
p_srcz	is the source z coordinate
p_srcy	is the source y coordinate
p_srcx	is the source x coordinate
p_src	is the source wavefiled
p_coefz	is the laplacian z-direction coefficients
p_coefy	is the laplacian y-direction coefficients
p_coefx	is the laplacian x-direction coefficients
p_v2dt2	is the velocity model v^2 * dt^2
p_pi0	is the first input memory of pressure wavefield at t-1
p_pi1	is the second input memory of pressure wavefield at t-1
p_po0	is the first output memory of pressure wavefield at t-1
p_po1	is the second output memory of pressure wavefield at t-1
p_ppi0	is the first input memory of pressure wavefield at t-2
p_ppi1	is the second input memory of pressure wavefield at t-2
p_ppo0	is the first output memory of pressure wavefield at t-2
p_ppo1	is the second output memory of pressure wavefield at t-2

streamTimer¶

#include "streamTimer.hpp"

void streamTimer (
    hls::stream <xf::hpc::Signal_t>& p_signal,
    hls::stream <uint64_t>& p_clock
    )

streamTimer kernel function to count clock cycles

Parameters:

p_signal	the input signal stream
p_clock	the output clock stream