Global Namespace¶
// typedefs typedef ap_uint <8> uintInV_t typedef ap_uint <8> uintOutV_t typedef struct unpackerBlockInfo dt_blockInfo typedef struct unpackerChunkInfo dt_chunkInfo typedef ap_uint <GMEM_DWIDTH> uint512_t // structs struct unpackerBlockInfo struct unpackerChunkInfo // global variables constexpr int c_windowSize constexpr int c_blockSize const int c_streamDWidth const int c_windowSize // macros #define DECODER_TYPE #define DECODER_TYPE #define DECODER_TYPE #define DICT_ELE_WIDTH #define GMEM_BURST_SIZE #define GMEM_BURST_SIZE #define GMEM_BURST_SIZE #define GMEM_BURST_SIZE #define GMEM_BURST_SIZE #define GMEM_BURST_SIZE #define GMEM_BURST_SIZE #define GMEM_BURST_SIZE #define GMEM_BURST_SIZE #define GMEM_BURST_SIZE #define GMEM_BURST_SIZE #define GMEM_DATAWIDTH #define GMEM_DWIDTH #define GMEM_DWIDTH #define GMEM_DWIDTH #define GMEM_DWIDTH #define GMEM_DWIDTH #define GMEM_DWIDTH #define GMEM_DWIDTH #define GMEM_DWIDTH #define GMEM_DWIDTH #define GMEM_DWIDTH #define GMEM_DWIDTH #define GMEM_DWIDTH #define GMEM_DWIDTH #define GMEM_IN_DWIDTH #define GMEM_IN_DWIDTH #define GMEM_OUT_DWIDTH #define GMEM_OUT_DWIDTH #define HISTORY_SIZE #define HISTORY_SIZE #define HISTORY_SIZE #define HISTORY_SIZE #define HISTORY_SIZE #define HISTORY_SIZE #define LOW_OFFSET #define LOW_OFFSET #define LOW_OFFSET #define LZ_MAX_OFFSET_LIMIT #define LZ_MAX_OFFSET_LIMIT #define LZ_MAX_OFFSET_LIMIT #define LZ_MAX_OFFSET_LIMIT #define LZ_MAX_OFFSET_LIMIT #define LZ_MAX_OFFSET_LIMIT #define LZ_MAX_OFFSET_LIMIT #define LZ_MAX_OFFSET_LIMIT #define MATCH_LEN #define MATCH_LEN #define MATCH_LEN #define MATCH_LEN #define MATCH_LEN #define MATCH_LEVEL #define MAX_LIT_COUNT #define MAX_LIT_COUNT #define MAX_LIT_COUNT #define MAX_LIT_COUNT #define MAX_LIT_STREAM_SIZE #define MAX_LIT_STREAM_SIZE #define MAX_MATCH_LEN #define MAX_MATCH_LEN #define MAX_MATCH_LEN #define MAX_MATCH_LEN #define MAX_MATCH_LEN #define MAX_OFFSET #define MAX_OFFSET #define MAX_OFFSET #define MAX_OFFSET #define MAX_OFFSET #define MAX_OFFSET #define MAX_OFFSET #define MIN_BLCK_SIZE #define MIN_BLCK_SIZE #define MIN_BLOCK_SIZE #define MIN_BLOCK_SIZE #define MIN_MATCH #define MIN_MATCH #define MIN_MATCH #define MIN_MATCH #define MIN_MATCH #define MULTIPLE_BYTES #define MULTIPLE_BYTES #define MULTIPLE_BYTES #define MULTIPLE_BYTES #define NUM_CORES #define NUM_CORES #define NUM_CORES #define OFFSET_WINDOW #define OFFSET_WINDOW #define OFFSET_WINDOW #define OFFSET_WINDOW #define OUT_BYTES #define PARALLEL_BLOCK #define PARALLEL_BLOCK #define PARALLEL_BLOCK #define PARALLEL_BYTE #define PARALLEL_BYTE #define PARALLEL_BYTES #define PARALLEL_BYTES #define PARALLEL_BYTES #define STRATEGY #define STREAM_IN_DWIDTH #define STREAM_OUT_DWIDTH #define STRTGY #define URAM_BUFFER #define URAM_BUFFER #define ZLIB_BLOCK_SIZE #define ZLIB_BLOCK_SIZE #define ZLIB_BLOCK_SIZE #define ZSTD_BLOCK_SIZE_KB #define ZSTD_BLOCK_SIZE_KB
Global Functions¶
xilAdler32¶
#include "adler32_mm.hpp"
void xilAdler32 ( const ap_uint <PARALLEL_BYTES*8>* in, ap_uint <32>* adlerData, uint32_t inSize )
Adler32 kernel takes the raw data as input and generates the adler32 result.
Parameters:
in | input raw data |
adlerData | Adler data |
inSize | input size |
xilChecksum32¶
#include "checksum_mm.hpp"
void xilChecksum32 ( const ap_uint <PARALLEL_BYTES*8>* in, ap_uint <32>* initData, uint32_t inSize, bool checksumType )
Checksum kernel takes the raw data as input and generates the checksum result.
Parameters:
in | input raw data |
initData | input Initial data |
outData | output checksum |
inSize | input size |
checksumType | CRC/ADLER |
xilCrc32¶
#include "crc32_mm.hpp"
void xilCrc32 ( const ap_uint <PARALLEL_BYTES*8>* in, ap_uint <32>* crcData, uint32_t inSize )
Crc32 kernel takes the raw data as input and generates the crc32 result.
Parameters:
in | input raw data |
crcData | CRC data |
inSize | input size |
xilGzipCompressFixedStreaming¶
#include "gzip_compress_fixed_stream.hpp"
void xilGzipCompressFixedStreaming ( hls::stream <ap_axiu <GMEM_IN_DWIDTH, 0, 0, 0>>& inStream, hls::stream <ap_axiu <GMEM_OUT_DWIDTH, 0, 0, 0>>& outStream, hls::stream <ap_axiu <32, 0, 0, 0>>& inSizeStream )
GZIP compression kernel takes the raw data as input and compresses the data in block based fashion and writes the output to global memory. This kernel uses fixed huffman encoding for compression.
Parameters:
inStream | input raw data |
outStream | output compressed data |
inSizeStream | input data size |
xilGzipCompBlock¶
#include "gzip_compress_multicore_mm.hpp"
void xilGzipCompBlock ( const ap_uint <GMEM_DWIDTH>* in, ap_uint <GMEM_DWIDTH>* out, uint32_t* compressd_size, uint32_t* checksumData, uint32_t input_size, bool checksumType )
GZIP compression kernel takes the raw data as input from DDR and compresses the data using num cores and writes the output to global memory.
Parameters:
in | input raw data |
out | output compressed data |
compressd_size | compressed output size of each block |
input_size | input data size |
xilGzipComp¶
#include "gzip_compress_multicore_stream.hpp"
void xilGzipComp ( hls::stream <ap_axiu <GMEM_DWIDTH, 0, 0, 0>>& inaxistream, hls::stream <ap_axiu <GMEM_DWIDTH, 0, 0, 0>>& outaxistream )
GZIP streaming compression kernel takes the raw data as input from axi interface and compresses the data using num cores and writes the output to an axi interface.
Parameters:
inaxistream | input raw data |
outaxistream | output compressed data |
xilGzipCompressStreaming¶
#include "gzip_compress_stream.hpp"
void xilGzipCompressStreaming ( hls::stream <ap_axiu <GMEM_IN_DWIDTH, 0, 0, 0>>& inStream, hls::stream <ap_axiu <GMEM_OUT_DWIDTH, 0, 0, 0>>& outStream, hls::stream <ap_axiu <32, 0, 0, 0>>& inSizeStream )
GZIP compression kernel takes the raw data as input and compresses the data in block based fashion and writes the output to global memory.
Parameters:
inStream | input raw data |
outStream | output compressed data |
inSizeStream | input data size |
xilLz4Compress¶
#include "lz4_compress_mm.hpp"
void xilLz4Compress ( const xf::compression::uintMemWidth_t* in, xf::compression::uintMemWidth_t* out, uint32_t* compressd_size, uint32_t* in_block_size, uint32_t block_size_in_kb, uint32_t input_size )
LZ4 compression kernel takes the raw data as input and compresses the data in block based fashion and writes the output to global memory.
Parameters:
in | input raw data |
out | output compressed data |
compressd_size | compressed output size of each block |
in_block_size | input block size of each block |
block_size_in_kb | input block size in bytes |
input_size | input data size |
xilLz4CompressStream¶
#include "lz4_compress_stream.hpp"
void xilLz4CompressStream ( hls::stream <ap_axiu <8, 0, 0, 0>>& inaxistream, hls::stream <ap_axiu <8, 0, 0, 0>>& outaxistream, uint32_t inputSize )
LZ4 compression streaming kernel. It takes input from axi kernel stream and writes compressed data back to output axi kernel stream.
Parameters:
inaxistream | Input axi kernel stream |
outaxistream | Output axi kernel stream |
inputSize | Input compressed data size |
xilLz4Decompress¶
#include "lz4_multibyte_decompress_mm.hpp"
void xilLz4Decompress ( const ap_uint <PARALLEL_BYTE*8>* in, ap_uint <PARALLEL_BYTE*8>* out, uint32_t* in_block_size, uint32_t* in_compress_size, uint32_t block_size_in_kb, uint32_t no_blocks )
LZ4 decompression kernel takes compressed data as input and process in block based fashion and writes the raw data to global memory.
Parameters:
in | input compressed data |
out | output raw data |
in_block_size | input block size of each block |
in_compress_size | compress size of each block |
block_size_in_kb | block size in bytes |
no_blocks | number of blocks |
xilLz4DecompressStream¶
#include "lz4_multibyte_decompress_stream.hpp"
void xilLz4DecompressStream ( hls::stream <ap_axiu <MULTIPLE_BYTES*8, 0, 0, 0>>& inaxistream, hls::stream <ap_axiu <MULTIPLE_BYTES*8, 0, 0, 0>>& outaxistream, hls::stream <ap_axiu <32, 0, 0, 0>>& outaxistreamsize, uint32_t inputSize )
Snappy decompression streaming kernel takes compressed data as input from kernel axi stream and process in block based fashion and writes the raw data to global memory.
Parameters:
inaxistream | input kernel axi stream for compressed data |
outaxistream | output kernel axi stream for decompressed data |
inputSize | input data size |
xilLz4P2PDecompress¶
#include "lz4_p2p_decompress_kernel.hpp"
void xilLz4P2PDecompress ( const xf::compression::uintMemWidth_t* in, xf::compression::uintMemWidth_t* out, dt_blockInfo* bObj, dt_chunkInfo* cObj, uint32_t block_size_in_kb, uint32_t compute_unit, uint8_t total_no_cu, uint32_t num_blocks )
LZ4 P2P decompression kernel is responsible for decompressing data which is in LZ4 encoded form.
Parameters:
in | input stream width |
out | output stream width |
in_block_size | input size |
in_compress_size | output size |
block_start_idx | start index of block |
no_blocks | number of blocks for each compute unit |
block_size_in_kb | block input size |
compute_unit | particular compute unit |
total_no_cu | number of compute units |
num_blocks | number of blocks base don host buffersize |
xilLz4Packer¶
#include "lz4_packer_mm.hpp"
void xilLz4Packer ( uint512_t* in, uint512_t* out, uint32_t* compressd_size, uint32_t* in_block_size, uint32_t* encoded_size, uint512_t* orig_input_data, uint32_t block_size_in_kb, uint32_t no_blocks, uint32_t xxhashVal, uint32_t input_size )
LZ4 packer kernel takes the raw data as input and compresses the data in block based fashion and writes the output to global memory.
Parameters:
in | input raw data |
out | output compressed data |
compressd_size | compressed output size of each block |
in_block_size | input block size of each block |
encoded_size | encoded size of each block |
orig_input_data | raw input data |
block_size_in_kb | input block size in bytes |
no_blocks | number of input blocks |
xxhashVal | Hash Value |
input_size | Total Input File Size |
xilLz4Unpacker¶
#include "lz4_unpacker_kernel.hpp"
void xilLz4Unpacker ( const xf::compression::uintMemWidth_t* in, dt_blockInfo* bObj, dt_chunkInfo* cObj, uint32_t block_size_in_kb, uint8_t first_chunk, uint8_t total_no_cu, uint32_t num_blocks )
LZ4 unpacker kernel is responsible in unpacking LZ4 compressed block information.
Parameters:
in | input stream width |
in_block_size | input block size |
in_compress_size | input compress size |
block_start_idx | start index of each input block |
no_blocks_per_cu | number of blocks for each compute unit |
original_size | original file size |
in_start_index | input start index |
no_blocks | number of blocks |
block_size_in_kb | size of each block |
first_chunk | first chunk to determine header |
total_no_cu | number of decompress compute units |
num_blocks | number of blocks based on host buffersize |
xilSnappyCompress¶
#include "snappy_compress_mm.hpp"
void xilSnappyCompress ( const xf::compression::uintMemWidth_t* in, xf::compression::uintMemWidth_t* out, uint32_t* compressd_size, uint32_t* in_block_size, uint32_t block_size_in_kb, uint32_t input_size )
Snappy compression kernel takes the raw data as input and compresses the data in block based fashion and writes the output to global memory.
Parameters:
in | input raw data |
out | output compressed data |
compressd_size | compressed output size of each block |
in_block_size | input block size of each block |
block_size_in_kb | input block size in bytes |
input_size | input data size |
xilSnappyCompressStream¶
#include "snappy_compress_stream.hpp"
void xilSnappyCompressStream ( hls::stream <ap_axiu <8, 0, 0, 0>>& inaxistream, hls::stream <ap_axiu <8, 0, 0, 0>>& outaxistream, uint32_t inputSize )
Snappy compression streaming kernel takes the raw data as input from kernel axi stream and compresses the data in block based fashion and writes the output to kernel axi stream.
Parameters:
inaxistream | input kernel axi stream for raw data |
outaxistream | output kernel axi stream for compressed data |
inputSize | input data size |
xilSnappyDecompress¶
#include "snappy_decompress_mm.hpp"
void xilSnappyDecompress ( const xf::compression::uintMemWidth_t* in, xf::compression::uintMemWidth_t* out, uint32_t* in_block_size, uint32_t* in_compress_size, uint32_t block_size_in_kb, uint32_t no_blocks )
Snappy decompression kernel takes compressed data as input and process in block based fashion and writes the raw data to global memory.
Parameters:
in | input compressed data |
out | output raw data |
in_block_size | input block size of each block |
in_compress_size | compress size of each block |
block_size_in_kb | block size in bytes |
no_blocks | number of blocks |
xilSnappyDecompressStream¶
#include "snappy_decompress_stream.hpp"
void xilSnappyDecompressStream ( hls::stream <ap_axiu <8, 0, 0, 0>>& inaxistream, hls::stream <ap_axiu <8, 0, 0, 0>>& outaxistream, uint32_t inputSize, uint32_t outputSize )
Snappy decompression streaming kernel takes compressed data as input from kernel axi stream and process in block based fashion and writes the raw data to global memory.
Parameters:
inaxistream | input kernel axi stream for compressed data |
outaxistream | output kernel axi stream for decompressed data |
inputSize | input data size |
xilSnappyDecompress¶
#include "snappy_multibyte_decompress_mm.hpp"
void xilSnappyDecompress ( const ap_uint <PARALLEL_BYTE*8>* in, ap_uint <PARALLEL_BYTE*8>* out, uint32_t* in_block_size, uint32_t* in_compress_size, uint32_t block_size_in_kb, uint32_t no_blocks )
Snappy decompression kernel takes compressed data as input and process in block based fashion and writes the raw data to global memory.
Parameters:
in | input compressed data |
out | output raw data |
in_block_size | input block size of each block |
in_compress_size | compress size of each block |
block_size_in_kb | block size in bytes |
no_blocks | number of blocks |
xilSnappyDecompressStream¶
xilSnappyDecompressStream overload (1)¶
#include "snappy_multibyte_decompress_stream.hpp"
void xilSnappyDecompressStream ( hls::stream <ap_axiu <MULTIPLE_BYTES*8, 0, 0, 0>>& inaxistream, hls::stream <ap_axiu <MULTIPLE_BYTES*8, 0, 0, 0>>& outaxistream, hls::stream <ap_axiu <32, 0, 0, 0>>& outaxistreamsize, uint32_t inputSize )
Snappy decompression streaming kernel takes compressed data as input from kernel axi stream and process in block based fashion and writes the raw data to global memory.
Parameters:
inaxistream | input kernel axi stream for compressed data |
outaxistream | output kernel axi stream for decompressed data |
inputSize | input data size |
xilSnappyDecompressStream overload (2)¶
#include "snappy_multicore_decompress_stream.hpp"
void xilSnappyDecompressStream ( hls::stream <ap_axiu <MULTIPLE_BYTES*8, 0, 0, 0>>& inaxistream, hls::stream <ap_axiu <32, 0, 0, 0>>& inaxistreamsize, hls::stream <ap_axiu <MULTIPLE_BYTES*8, 0, 0, 0>>& outaxistream, hls::stream <ap_axiu <32, 0, 0, 0>>& outaxistreamsize )
Snappy decompression streaming kernel takes compressed data as input from kernel axi stream and process in block based fashion and writes the raw data to global memory.
Parameters:
inaxistream | input kernel axi stream for compressed data |
outaxistream | output kernel axi stream for decompressed data |
inputSize | input data size |
xilZlibCompressFull¶
#include "zlib_compress_multi_engine_mm.hpp"
void xilZlibCompressFull ( const ap_uint <GMEM_DWIDTH>* in, ap_uint <GMEM_DWIDTH>* out, uint32_t* compressd_size, uint32_t input_size )
ZLIB compression kernel takes the raw data as input and compresses the data in parallel block based fashion and writes the output to global memory.
Parameters:
in | input raw data |
out | output compressed data |
compressd_size | compressed output size of each block |
input_size | input data size |
xilHuffmanKernel¶
#include "zlib_huffman_enc_mm.hpp"
void xilHuffmanKernel ( xf::compression::uintMemWidth_t* in, uint32_t* lit_freq, uint32_t* dist_freq, xf::compression::uintMemWidth_t* out, uint32_t* in_block_size, uint32_t* compressd_size, uint32_t block_size_in_kb, uint32_t input_size )
Huffman kernel top function. This is an initial version of Huffman Kernel which does block based bit packing process. It uses dynamic huffman codes and bit lengths to encode the LZ77 (Byte Compressed Data) output. This version operates on 1MB block data per engine as this is suitable for use cases where raw data is over >100MB and compression ratio is over 2.5x in order to achieve best throughput. This can be further optimized to achieve better throughput for smaller file usecase.
Parameters:
in | input stream |
out | output stream |
in_block_size | input block size |
compressd_size | output compressed size |
dyn_litmtree_codes | input literal and match length codes |
dyn_distree_codes | input distance codes |
dyn_bitlentree_codes | input bit-length codes |
dyn_litmtree_blen | input literal and match length bit length data |
dyn_dtree_blen | input distance bit length data |
dyn_bitlentree_blen | input bit-length of bit length data |
dyn_max_codes | input maximum codes |
block_size_in_kb | input block size in bytes |
input_size | input data size |
xilLz77Compress¶
#include "zlib_lz77_compress_mm.hpp"
void xilLz77Compress ( const xf::compression::uintMemWidth_t* in, xf::compression::uintMemWidth_t* out, uint32_t* compressd_size, uint32_t* in_block_size, uint32_t* dyn_ltree_freq, uint32_t* dyn_dtree_freq, uint32_t block_size_in_kb, uint32_t input_size )
LZ77 compression kernel takes the raw data as input and compresses the data in block based fashion and writes the output to global memory. LZ77 is a byte based compression scheme. The resulting output from this kernel is represented in packet form of 32bit length <Literal, Match Length, Distance>. It also generates output of literal and distance frequencies for dynamic huffman tree generation. The output generated by this kernel is referred by TreeGen and Huffman Kernels.
Parameters:
in | input stream |
out | output stream |
compressd_size | compressed output size of each block |
in_block_size | input block size of each block |
dyn_ltree_freq | literal frequency data |
dyn_dtree_freq | distance frequency data |
block_size_in_kb | input block size in bytes |
input_size | input data size |
xilTreegenKernel¶
#include "zlib_treegen_mm.hpp"
void xilTreegenKernel ( uint32_t* dyn_ltree_freq, uint32_t* dyn_dtree_freq, uint32_t* dyn_bltree_freq, uint32_t* dyn_ltree_codes, uint32_t* dyn_dtree_codes, uint32_t* dyn_bltree_codes, uint32_t* dyn_ltree_blen, uint32_t* dyn_dtree_blen, uint32_t* dyn_bltree_blen, uint32_t* max_codes, uint32_t block_size_in_kb, uint32_t input_size, uint32_t blocks_per_chunk )
This is a resource optimized version of huffman treegen kernel. It takes literal and distance frequency data as input through single input stream and generates dynamic huffman codes and bit length data which is output through a single output stream. This kernel does not use DDR in any way and is optimised for both speed and low resource usage.
Parameters:
freqStream | 24-bit input stream for getting frequency data |
codeStream | 20-bit output stream sending huffman codes and bit-lengths data |
xilZstdCompress¶
#include "zstd_compress_stream.hpp"
void xilZstdCompress ( hls::stream <ap_axiu <STREAM_IN_DWIDTH, 0, 0, 0>>& axiInStream, hls::stream <ap_axiu <STREAM_OUT_DWIDTH, 0, 0, 0>>& axiOutStream )
ZSTD compression kernel takes input data from axi stream and compresses it into multiple frames having 1 block each and writes the compressed data to output axi stream.
Parameters:
inStream | input raw data |
outStream | output compressed data |
xilZstdDecompressStream¶
#include "zstd_decompress_stream.hpp"
void xilZstdDecompressStream ( hls::stream <ap_axiu <c_streamDWidth, 0, 0, 0>>& inaxistreamd, hls::stream <ap_axiu <c_streamDWidth, 0, 0, 0>>& outaxistreamd )
This is full ZStandard decompression streaming kernel function. It supports all block sizes and supports window size upto 128KB. It takes entire ZStd compressed file as input and produces decompressed file at the kernel output stream. This kernel does not use DDR memory, it uses streams instead. Intermediate data is stored in internal BRAMs and stream FIFOs, which helps to attain better decompression throughput.
Parameters:
input_size | input size |
inaxistreamd | input kernel axi stream |
outaxistreamd | output kernel axi stream |
sizestreamd | output size kernel axi stream |