namespace details¶
// typedefs typedef int idct25_t typedef ap_int <24> idctm_t // enums enum { w1 = 2841 w2 = 2676 w3 = 2408 w5 = 1609 w6 = 1108 w7 = 565 w1pw7 = w1 + w7 w1mw7 = w1 - w7 w2pw6 = w2 + w6 w2mw6 = w2 - w6 w3pw5 = w3 + w5 w3mw5 = w3 - w5 r2 = 181 } // structs struct hls_huff_DHT struct hls_huff_segment struct sos_data // classes template < int _W, int _I, int _WBIT, int _NPPC > class BicubicInterpolator // global variables static const uint8_t hls_jpeg_zigzag_to_raster[64] const short hls_icos_base_8192_scaled[64]
decoder_jpg_full_top¶
#include "jpegDec/XAcc_idct.hpp"
template <int _WAxi> void decoder_jpg_full_top ( ap_uint <_WAxi>* ptr, const int sz, const int c, const uint16_t dht_tbl1 [2][2][1<< (9)], uint8_t ac_value_buckets [2][165], HCODE_T ac_huff_start_code [2][6], int16_t ac_huff_start_addr [2][16], uint8_t dc_value_buckets [2][12], HCODE_T dc_huff_start_code [2][3], int16_t dc_huff_start_addr [2][16], ap_uint <12> hls_cmp, const uint8_t hls_mbs [(3)], const uint8_t q_tables [2][8][8], const img_info img_info, const bas_info bas_info, int& rtn2, uint32_t& rst_cnt, ap_uint <64>* yuv_mcu_pointer )
Level 1 : decode all mcu with burst read data from DDR.
Parameters:
| _WAxi | size of data path in dataflow region, in bit. when _WAxi is 16, the decoder could decode one symbol per cycle in about 99% cases. when _WAxi is 8 , the decoder could decode one symbol per cycle in about 80% cases, but use less resource. |
| ptr | the pointer to DDR. |
| sz | the total bytes to be read from DDR. |
| c | the column to be read from AXI in the case when AXI_WIDTH > 8*sizeof(char) |
| dht_tbl1/dht_tbl2 | the segment data of Define huffman table marker. |
| hls_cmp | the shift register organized by the index of each color component. |
| hls_mbs | the number of blocks in mcu for each component. |
| q_tables | the quent table of huffman. |
| img_info | include hls_cs_cmpc/hls_mbs/hls_mcuh/hls_mcuc is just for csim tests. |
| bas_info | the basic infomation for the image. |
| yuv_mcu_pointer | pointer to the hls_mcuc*{hls_mbs[0~2]*{Y/U/V}} |
burstWrite¶
#include "jpegDec/XAcc_idct.hpp"
template <int _WAxi> void burstWrite ( ap_uint <_WAxi>* yuv_mcu_pointer, hls::stream <idct_out_t> strm_iDCT_x8 [8], const uint32_t all_blocks )
the template of stream width of _WAxi burst out.
Parameters:
| _WAxi | width of axi port. |
| wbuf | AXI master port to write to, ex. 64 bits. |
| strm_iDCT_x8 | stream width is 8 bits |
parser_jpg_top¶
#include "jpegDec/XAcc_jfifparser.hpp"
void parser_jpg_top ( ap_uint < (16)>* datatoDDR, const int size, int& r, int& c, uint16_t dht_tbl1 [2][2][1<< (9)], uint8_t ac_value_buckets [2][165], HCODE_T ac_huff_start_code [2][6], int16_t ac_huff_start_addr [2][16], uint8_t dc_value_buckets [2][12], HCODE_T dc_huff_start_code [2][3], int16_t dc_huff_start_addr [2][16], ap_uint <12>& hls_cmp, int& left, uint8_t hls_mbs [(3)], uint8_t q_tables [2][8][8], int& rtn, img_info& img_info, cmp_info cmp_info [(3)], bas_info& bas_info )
Level 1 : parser the jfif register for the jepg decoder.
Parameters:
| CH_W | size of data path in dataflow region, in bit. when CH_W is 16, the decoder could decode one symbol per cycle in about 99% cases. when CH_W is 8 , the decoder could decode one symbol per cycle in about 80% cases, but use less resource. |
| datatoDDR | the pointer to DDR. |
| size | the total bytes to be read from DDR. |
| r | the index of vector to be read from AXI in all cases |
| c | the column to be read from AXI in the case when AXI_WIDTH > 8*sizeof(char) |
| dht_tbl1/dht_tbl2 | the segment data of Define huffman table marker. |
| hls_cmp | the shift register organized by the index of each color component. |
| left | the number of bytes to be read from DDR after parser. |
| hls_mbs | the number of blocks in mcu for each component. |
| q_tables | is quantization tables. |
| rtn | return flag. |
| image | info include hls_cs_cmpc/hls_mbs/hls_mcuh/hls_mcuc is just for csim tests. |
| cmp_info | image information may be used to generate the bas_info . |
| bas_info | information used by next module. |
decoder_jpg_top¶
#include "jpegDec/XAcc_jfifparser.hpp"
void decoder_jpg_top ( ap_uint < (16)>* ptr, const int sz, const int c, const uint16_t dht_tbl1 [2][2][1<< (9)], uint8_t ac_value_buckets [2][165], HCODE_T ac_huff_start_code [2][6], int16_t ac_huff_start_addr [2][16], uint8_t dc_value_buckets [2][12], HCODE_T dc_huff_start_code [2][3], int16_t dc_huff_start_addr [2][16], ap_uint <12> hls_cmp, const uint8_t hls_mbs [(3)], const img_info img_info, int& rtn2, uint32_t& rst_cnt, hls::stream <ap_uint <24>>& block_strm )
Level 1 : decode all mcu with burst read data from DDR.
Parameters:
| CH_W | size of data path in dataflow region, in bit. when CH_W is 16, the decoder could decode one symbol per cycle in about 99% cases. when CH_W is 8 , the decoder could decode one symbol per cycle in about 80% cases, but use less resource. |
| ptr | the pointer to DDR. |
| sz | the total bytes to be read from DDR. |
| c | the column to be read from AXI in the case when AXI_WIDTH > 8*sizeof(char) |
| dht_tbl1/dht_tbl2 | the segment data of Define huffman table marker. |
| hls_cmp | the shift register organized by the index of each color component. |
| hls_mbs | the number of blocks in mcu for each component. |
| image | info include hls_cs_cmpc/hls_mbs/hls_mcuh/hls_mcuc is just for csim tests. |
| rtn | return flag. |
| block_strm | the stream of coefficients in block,23:is_rst, 22:is_endblock,21~16:bpos,15~0:block val |
mcu_decoder¶
#include "jpegDec/XAcc_jpegdecoder.hpp"
template <typename CHTYPE> void mcu_decoder ( hls::stream <CHTYPE>& image_strm, hls::stream <bool>& eof_strm, const uint16_t dht_tbl1 [2][2][1<< (9)], const uint8_t ac_value_buckets [2][165], const HCODE_T ac_huff_start_code [2][6], const int16_t ac_huff_start_addr [2][16], const uint8_t dc_value_buckets [2][12], const HCODE_T dc_huff_start_code [2][3], const int16_t dc_huff_start_addr [2][16], ap_uint <12> hls_cmp, const uint8_t hls_cs_cmpc, const uint8_t hls_mbs [(3)], const uint16_t hls_mcuh, const uint32_t hls_mcuc, int& rtn2, uint32_t& rst_cnt, hls::stream <ap_uint <24>>& block_strm )
Level 1 : decode all mcu.
Parameters:
| CH_W | size of data path in dataflow region, in bit. when CH_W is 16, the decoder could decode one symbol per cycle in about 99% cases. when CH_W is 8 , the decoder could decode one symbol per cycle in about 80% cases, but use less resource. |
| image_strm | the stream of compressed data after SOS marker. |
| eof_strm | the stream of end flag for image_strm, synchronous signal using false and an addtional true in the end. |
| dht_tbl1/dht_tbl2 | the segment data of Define huffman table marker. |
| hls_cmp | the shift register organized by the index of each color component. |
| image | info include hls_cs_cmpc/hls_mbs/hls_mcuh/hls_mcuc is just for csim tests. |
| block_strm | the stream of coefficients in block,23:is_rst, 22:is_endblock,21~16:bpos,15~0:block val |
hls_next_mcupos2¶
#include "jpegDec/XAcc_jpegdecoder.hpp"
void hls_next_mcupos2 ( hls::stream <ap_uint <24>>& block_strm, int16_t hls_block [(3)*(1036800)*64], int hls_sfv [4], int hls_sfh [4], const uint8_t hls_mbs [4], int hls_bch, int hls_bc, int32_t hls_mcuc, uint8_t hls_cs_cmpc, bool rtn2, int& sta )
convert strm to Aligned_block, for other integration ,not just JPEG
Parameters:
| block_strm | the stream of coefficients in block,23:is_rst, 22:is_endblock,21~16:bpos,15~0:block val |
| hls_block | the maxsize block, will be copy to the aligned_block |
| hls_sfv | the sample factor vertical for each component |
| hls_sfh | the sample factor horizontal for each component |
| hls_mbs | the blocks in mcu for each component. |
| hls_bch | the max block count horizontal (interleaved) |
| hls_bc | the max block count (all) (interleaved) |
| sta | the status of the process, 0: keep doing, 1: reset decoder(todo), 2: decode done |
loadToStrm¶
#include "resize/bicubicinterpolator.hpp"
void loadToStrm ( ap_uint <32> width, ap_uint <32> height, ap_uint <WDATA>* axi_src, hls::stream <ap_uint <WDATA>>& src_strm )
The function is loading the pixels of image into stream.
Parameters:
| width | representing the number of input image each row |
| height | representing the number of input image each column |
| axi_src | the hbm port for input |
| src_strm | the input stream of bicubic interpolator |
pickOutStrm¶
#include "resize/bicubicinterpolator.hpp"
void pickOutStrm ( hls::stream <ap_uint <72>>& dst_strm, hls::stream <bool>& e_dst, hls::stream <ap_uint <WDATA>>& pixel_strm )
The function is picking out valid value of interpolation from 72-bits, the (0, 63) saving the valid value and the (64, 71) representing the number of valid value of interpolation.
Parameters:
| dst_strm | the output of bicubic interpolator |
| e_dst | the flag of output |
| pixel_strm | the compact 64-bits or representing 8 pixels |
loadToImage¶
#include "resize/bicubicinterpolator.hpp"
void loadToImage ( ap_uint <32> width, ap_uint <32> height, hls::stream <ap_uint <WDATA>>& pixel_strm, ap_uint <WDATA>* axi_dst )
The function is putting the result of interpolation into memory hbm.
Parameters:
| width | representing the number of output image each row |
| height | representing the number of output image each column |
| dst_strm | the output stream of bicubic interpolator |
| axi_dst | the hbm port for output |