namespace details

// typedefs

typedef int idct25_t
typedef ap_int <24> idctm_t

// enums

enum
{
    w1 = 2841
    w2 = 2676
    w3 = 2408
    w5 = 1609
    w6 = 1108
    w7 = 565
    w1pw7 = w1 + w7
    w1mw7 = w1 - w7
    w2pw6 = w2 + w6
    w2mw6 = w2 - w6
    w3pw5 = w3 + w5
    w3mw5 = w3 - w5
    r2 = 181
}

decoder_jpg_full_top

#include "jpegDec/XAcc_idct.hpp"
template <int _WAxi>
void decoder_jpg_full_top (
    ap_uint <_WAxi>* ptr,
    const int sz,
    const int c,
    const uint16_t dht_tbl1 [2][2][1<< (9)],
    uint8_t ac_value_buckets [2][165],
    HCODE_T ac_huff_start_code [2][6],
    int16_t ac_huff_start_addr [2][16],
    uint8_t dc_value_buckets [2][12],
    HCODE_T dc_huff_start_code [2][3],
    int16_t dc_huff_start_addr [2][16],
    ap_uint <12> hls_cmp,
    const uint8_t hls_mbs [(3)],
    const uint8_t q_tables [2][8][8],
    const img_info img_info,
    const bas_info bas_info,
    int& rtn2,
    uint32_t& rst_cnt,
    ap_uint <64>* yuv_mcu_pointer
    )

Level 1 : decode all mcu with burst read data from DDR.

Parameters:

_WAxi size of data path in dataflow region, in bit. when _WAxi is 16, the decoder could decode one symbol per cycle in about 99% cases. when _WAxi is 8 , the decoder could decode one symbol per cycle in about 80% cases, but use less resource.
ptr the pointer to DDR.
sz the total bytes to be read from DDR.
c the column to be read from AXI in the case when AXI_WIDTH > 8*sizeof(char)
dht_tbl1/dht_tbl2 the segment data of Define huffman table marker.
hls_cmp the shift register organized by the index of each color component.
hls_mbs the number of blocks in mcu for each component.
q_tables the quent table of huffman.
img_info include hls_cs_cmpc/hls_mbs/hls_mcuh/hls_mcuc is just for csim tests.
bas_info the basic infomation for the image.
yuv_mcu_pointer pointer to the hls_mcuc*{hls_mbs[0~2]*{Y/U/V}}

burstWrite

#include "jpegDec/XAcc_idct.hpp"
template <int _WAxi>
void burstWrite (
    ap_uint <_WAxi>* yuv_mcu_pointer,
    hls::stream <idct_out_t> strm_iDCT_x8 [8],
    const uint32_t all_blocks
    )

the template of stream width of _WAxi burst out.

Parameters:

_WAxi width of axi port.
wbuf AXI master port to write to, ex. 64 bits.
strm_iDCT_x8 stream width is 8 bits

parser_jpg_top

#include "jpegDec/XAcc_jfifparser.hpp"
void parser_jpg_top (
    ap_uint < (16)>* datatoDDR,
    const int size,
    int& r,
    int& c,
    uint16_t dht_tbl1 [2][2][1<< (9)],
    uint8_t ac_value_buckets [2][165],
    HCODE_T ac_huff_start_code [2][6],
    int16_t ac_huff_start_addr [2][16],
    uint8_t dc_value_buckets [2][12],
    HCODE_T dc_huff_start_code [2][3],
    int16_t dc_huff_start_addr [2][16],
    ap_uint <12>& hls_cmp,
    int& left,
    uint8_t hls_mbs [(3)],
    uint8_t q_tables [2][8][8],
    int& rtn,
    img_info& img_info,
    cmp_info cmp_info [(3)],
    bas_info& bas_info
    )

Level 1 : parser the jfif register for the jepg decoder.

Parameters:

CH_W size of data path in dataflow region, in bit. when CH_W is 16, the decoder could decode one symbol per cycle in about 99% cases. when CH_W is 8 , the decoder could decode one symbol per cycle in about 80% cases, but use less resource.
datatoDDR the pointer to DDR.
size the total bytes to be read from DDR.
r the index of vector to be read from AXI in all cases
c the column to be read from AXI in the case when AXI_WIDTH > 8*sizeof(char)
dht_tbl1/dht_tbl2 the segment data of Define huffman table marker.
hls_cmp the shift register organized by the index of each color component.
left the number of bytes to be read from DDR after parser.
hls_mbs the number of blocks in mcu for each component.
q_tables is quantization tables.
rtn return flag.
image info include hls_cs_cmpc/hls_mbs/hls_mcuh/hls_mcuc is just for csim tests.
cmp_info image information may be used to generate the bas_info .
bas_info information used by next module.

decoder_jpg_top

#include "jpegDec/XAcc_jfifparser.hpp"
void decoder_jpg_top (
    ap_uint < (16)>* ptr,
    const int sz,
    const int c,
    const uint16_t dht_tbl1 [2][2][1<< (9)],
    uint8_t ac_value_buckets [2][165],
    HCODE_T ac_huff_start_code [2][6],
    int16_t ac_huff_start_addr [2][16],
    uint8_t dc_value_buckets [2][12],
    HCODE_T dc_huff_start_code [2][3],
    int16_t dc_huff_start_addr [2][16],
    ap_uint <12> hls_cmp,
    const uint8_t hls_mbs [(3)],
    const img_info img_info,
    int& rtn2,
    uint32_t& rst_cnt,
    hls::stream <ap_uint <24>>& block_strm
    )

Level 1 : decode all mcu with burst read data from DDR.

Parameters:

CH_W size of data path in dataflow region, in bit. when CH_W is 16, the decoder could decode one symbol per cycle in about 99% cases. when CH_W is 8 , the decoder could decode one symbol per cycle in about 80% cases, but use less resource.
ptr the pointer to DDR.
sz the total bytes to be read from DDR.
c the column to be read from AXI in the case when AXI_WIDTH > 8*sizeof(char)
dht_tbl1/dht_tbl2 the segment data of Define huffman table marker.
hls_cmp the shift register organized by the index of each color component.
hls_mbs the number of blocks in mcu for each component.
image info include hls_cs_cmpc/hls_mbs/hls_mcuh/hls_mcuc is just for csim tests.
rtn return flag.
block_strm the stream of coefficients in block,23:is_rst, 22:is_endblock,21~16:bpos,15~0:block val

mcu_decoder

#include "jpegDec/XAcc_jpegdecoder.hpp"
template <typename CHTYPE>
void mcu_decoder (
    hls::stream <CHTYPE>& image_strm,
    hls::stream <bool>& eof_strm,
    const uint16_t dht_tbl1 [2][2][1<< (9)],
    const uint8_t ac_value_buckets [2][165],
    const HCODE_T ac_huff_start_code [2][6],
    const int16_t ac_huff_start_addr [2][16],
    const uint8_t dc_value_buckets [2][12],
    const HCODE_T dc_huff_start_code [2][3],
    const int16_t dc_huff_start_addr [2][16],
    ap_uint <12> hls_cmp,
    const uint8_t hls_cs_cmpc,
    const uint8_t hls_mbs [(3)],
    const uint16_t hls_mcuh,
    const uint32_t hls_mcuc,
    int& rtn2,
    uint32_t& rst_cnt,
    hls::stream <ap_uint <24>>& block_strm
    )

Level 1 : decode all mcu.

Parameters:

CH_W size of data path in dataflow region, in bit. when CH_W is 16, the decoder could decode one symbol per cycle in about 99% cases. when CH_W is 8 , the decoder could decode one symbol per cycle in about 80% cases, but use less resource.
image_strm the stream of compressed data after SOS marker.
eof_strm the stream of end flag for image_strm, synchronous signal using false and an addtional true in the end.
dht_tbl1/dht_tbl2 the segment data of Define huffman table marker.
hls_cmp the shift register organized by the index of each color component.
image info include hls_cs_cmpc/hls_mbs/hls_mcuh/hls_mcuc is just for csim tests.
block_strm the stream of coefficients in block,23:is_rst, 22:is_endblock,21~16:bpos,15~0:block val

hls_next_mcupos2

#include "jpegDec/XAcc_jpegdecoder.hpp"
void hls_next_mcupos2 (
    hls::stream <ap_uint <24>>& block_strm,
    int16_t hls_block [(3)*(1036800)*64],
    int hls_sfv [4],
    int hls_sfh [4],
    const uint8_t hls_mbs [4],
    int hls_bch,
    int hls_bc,
    int32_t hls_mcuc,
    uint8_t hls_cs_cmpc,
    bool rtn2,
    int& sta
    )

convert strm to Aligned_block, for other integration ,not just JPEG

Parameters:

block_strm the stream of coefficients in block,23:is_rst, 22:is_endblock,21~16:bpos,15~0:block val
hls_block the maxsize block, will be copy to the aligned_block
hls_sfv the sample factor vertical for each component
hls_sfh the sample factor horizontal for each component
hls_mbs the blocks in mcu for each component.
hls_bch the max block count horizontal (interleaved)
hls_bc the max block count (all) (interleaved)
sta the status of the process, 0: keep doing, 1: reset decoder(todo), 2: decode done