Namespace vart¶
-
namespace vart
Functions
-
std::vector<float> get_input_scale(std::vector<const xir::Tensor*> input_tensors)
-
std::vector<float> get_output_scale(std::vector<const xir::Tensor*> output_tensors)
-
float get_input_scale(const xir::Tensor *input_tensor)
-
float get_output_scale(const xir::Tensor *output_tensor)
-
template<typename InputType, typename OutputType = InputType>
class BaseRunner Subclassed by Runner
Public Functions
-
virtual std::pair<std::uint32_t, int> execute_async(InputType input, OutputType output) = 0
-
- Parameters:
input – inputs with a customized type
output – outputs with a customized type
- Returns:
pair<jobid, status> status 0 for exit successfully, others for customized warnings or errors
-
virtual int wait(int jobid, int timeout = -1) = 0
wait
modes: 1. Blocking wait for specific ID. 2. Non-blocking wait for specific ID. 3. Blocking wait for any ID. 4. Non-blocking wait for any ID
- Parameters:
jobid – job id, neg for any id, others for specific job id
timeout – timeout, neg for block for ever, 0 for non-block, pos for block with a limitation(ms).
- Returns:
status 0 for exit successfully, others for customized warnings or errors
-
virtual std::pair<std::uint32_t, int> execute_async(InputType input, OutputType output) = 0
-
struct DpuMeta : public Meta
-
struct Meta
Subclassed by DpuMeta
-
class Runner : public vart::BaseRunner<const std::vector<TensorBuffer*>&>
- #include <runner.hpp>
Class of the Runner, provides API to use the runner.
The runner instance has a number of member functions to control the execution and get the input and output tensors of the runner.
Sample code:
// This example assumes that you have a DPU subgraph called dpu_subgraph. // The way to create a DPU runner to run dpu_subgraph is shown below. // create runner auto runner = vart::Runner::create_runner(dpu_subgraph, ”run”); // get input tensors auto input_tensors = runner->get_input_tensors(); // get input tensor buffers auto input_tensor_buffers = std::vector<vart::TensorBuffer*>(); for (auto input : input_tensors) { auto t = vart::alloc_cpu_flat_tensor_buffer(input); input_tensor_buffers.emplace_back(t.get()); } // get output tensors auto output_tensors = runner->get_output_tensors(); // get output tensor buffers auto output_tensor_buffers = std::vector< vart::TensorBuffer*>(); for (auto output : output _tensors) { auto t = vart::alloc_cpu_flat_tensor_buffer(output); output_tensor_buffers.emplace_back(t.get()); } // sync input tensor buffers for (auto& input : input_tensor_buffers) { input->sync_for_write(0, input->get_tensor()->get_data_size() / input->get_tensor()->get_shape()[0]); } // run runner auto v = runner->execute_async(input_tensor_buffers, output_tensor_buffers); auto status = runner->wait((int)v.first, 1000000000); // sync output tensor buffers for (auto& output : output_tensor_buffers) { output->sync_for_read(0, output->get_tensor()->get_data_size() / output->get_tensor()->get_shape()[0]); }
Subclassed by RunnerExt
Public Functions
-
virtual std::pair<uint32_t, int> execute_async(const std::vector<TensorBuffer*> &input, const std::vector<TensorBuffer*> &output) = 0
Executes the runner.
This is a blocking function.
- Parameters:
input – A vector of TensorBuffer create by all input tensors of runner.
output – A vector of TensorBuffer create by all output tensors of runner.
- Returns:
pair<jobid, status> status 0 for exit successfully, others for customized warnings or errors
-
virtual int wait(int jobid, int timeout) = 0
Waits for the end of DPU processing.
modes: 1. Blocking wait for specific ID. 2. Non-blocking wait for specific ID. 3. Blocking wait for any ID. 4. Non-blocking wait for any ID
- Parameters:
jobid – job id, neg for any id, others for specific job id
timeout – timeout, neg for block for ever, 0 for non-block, pos for block with a limitation(ms).
- Returns:
status 0 for exit successfully, others for customized warnings or errors
-
virtual TensorFormat get_tensor_format()
Get the tensor format of runner.
Sample code:
auto format = runner->get_tensor_format(); switch (format) { case vart::Runner::TensorFormat::NCHW: // do something break; case vart::Runner::TensorFormat::NHWC: // do something break; }
- Returns:
TensorFormat : NHWC / HCHW
-
virtual std::vector<const xir::Tensor*> get_input_tensors() = 0
Get all input tensors of runner.
Sample code:
inputTensors = runner->get_input_tensors(); for (auto input : inputTensor) { input->get_name(); input->get_shape(); input->get_element_num(); }
- Returns:
All input tensors. A vector of raw pointer to the input tensor.
-
virtual std::vector<const xir::Tensor*> get_output_tensors() = 0
Get all output tensors of runner.
Sample code:
outputTensors = runner->get_output_tensors(); for (auto output : outputTensor) { output->get_name(); output->get_shape(); output->get_element_num(); }
- Returns:
All output tensors. A vector of raw pointer to the output tensor.
-
virtual std::pair<std::uint32_t, int> execute_async(InputType input, OutputType output) = 0
-
- Parameters:
input – inputs with a customized type
output – outputs with a customized type
- Returns:
pair<jobid, status> status 0 for exit successfully, others for customized warnings or errors
Public Static Functions
-
static std::unique_ptr<Runner> create_runner(const xir::Subgraph *subgraph, const std::string &mode = std::string(""))
Factory function to create an instance of DPU runner by subgraph.
Sample code:
// This API can be used like: auto runner = vart::Runner::create_runner(subgraph, "run");
- Parameters:
subgraph – XIR Subgraph
mode – 1 mode supported: ‘run’ - DPU runner.
- Returns:
An instance of DPU runner.
-
static std::unique_ptr<Runner> create_runner_with_attrs(const xir::Subgraph *subgraph, xir::Attrs *attrs)
Factory function to create an instance of DPU runner by subgraph, and attrs.
- Parameters:
subgraph – XIR Subgraph
attrs – XIR attrs object, this object is shared among all runners on the same graph.
attrs["mode"], 1 – mode supported: ‘run’ - DPU runner.
- Returns:
An instance of DPU runner.
-
virtual std::pair<uint32_t, int> execute_async(const std::vector<TensorBuffer*> &input, const std::vector<TensorBuffer*> &output) = 0
-
class RunnerExt : public Runner
Public Functions
-
virtual std::vector<vart::TensorBuffer*> get_inputs() = 0
Gets all input TensorBuffers of RunnerExt.
Sample code:
auto runner = vart::RunnerExt::create_runner(subgraph, attrs); auto input_tensor_buffers = runner->get_inputs(); for (auto input : input_tensor_buffers) { auto shape = input->get_tensor()->get_shape(); }
- Returns:
All input TensorBuffers. A vector of raw pointer to the input TensorBuffer.
-
virtual std::vector<vart::TensorBuffer*> get_outputs() = 0
Gets all output TensorBuffers of RunnerExt.
Sample code:
auto runner = vart::RunnerExt::create_runner(subgraph, attrs); auto output_tensor_buffers = runner->get_outputs(); for (auto output : output_tensor_buffers) { auto shape = output->get_tensor()->get_shape(); }
- Returns:
All output TensorBuffers. A vector of raw pointer to the output TensorBuffer.
-
virtual std::pair<uint32_t, int> execute_async(const std::vector<TensorBuffer*> &input, const std::vector<TensorBuffer*> &output) = 0
Executes the runner.
This is a blocking function.
- Parameters:
input – A vector of TensorBuffer create by all input tensors of runner.
output – A vector of TensorBuffer create by all output tensors of runner.
- Returns:
pair<jobid, status> status 0 for exit successfully, others for customized warnings or errors
-
virtual std::pair<std::uint32_t, int> execute_async(InputType input, OutputType output) = 0
-
- Parameters:
input – inputs with a customized type
output – outputs with a customized type
- Returns:
pair<jobid, status> status 0 for exit successfully, others for customized warnings or errors
-
virtual int wait(int jobid, int timeout) = 0
Waits for the end of DPU processing.
modes: 1. Blocking wait for specific ID. 2. Non-blocking wait for specific ID. 3. Blocking wait for any ID. 4. Non-blocking wait for any ID
- Parameters:
jobid – job id, neg for any id, others for specific job id
timeout – timeout, neg for block for ever, 0 for non-block, pos for block with a limitation(ms).
- Returns:
status 0 for exit successfully, others for customized warnings or errors
-
virtual TensorFormat get_tensor_format()
Get the tensor format of runner.
Sample code:
auto format = runner->get_tensor_format(); switch (format) { case vart::Runner::TensorFormat::NCHW: // do something break; case vart::Runner::TensorFormat::NHWC: // do something break; }
- Returns:
TensorFormat : NHWC / HCHW
-
virtual std::vector<const xir::Tensor*> get_input_tensors() = 0
Get all input tensors of runner.
Sample code:
inputTensors = runner->get_input_tensors(); for (auto input : inputTensor) { input->get_name(); input->get_shape(); input->get_element_num(); }
- Returns:
All input tensors. A vector of raw pointer to the input tensor.
-
virtual std::vector<const xir::Tensor*> get_output_tensors() = 0
Get all output tensors of runner.
Sample code:
outputTensors = runner->get_output_tensors(); for (auto output : outputTensor) { output->get_name(); output->get_shape(); output->get_element_num(); }
- Returns:
All output tensors. A vector of raw pointer to the output tensor.
Public Static Functions
-
static std::unique_ptr<RunnerExt> create_runner(const xir::Subgraph *subgraph, xir::Attrs *attrs)
Factory fucntion to create an instance of runner by subgraph and attrs.
- Parameters:
subgraph – XIR Subgraph
attrs – XIR attrs object, this object is shared among all runners on the same graph.
- Returns:
An instance of runner.
-
static std::unique_ptr<Runner> create_runner(const xir::Subgraph *subgraph, const std::string &mode = std::string(""))
Factory function to create an instance of DPU runner by subgraph.
Sample code:
// This API can be used like: auto runner = vart::Runner::create_runner(subgraph, "run");
- Parameters:
subgraph – XIR Subgraph
mode – 1 mode supported: ‘run’ - DPU runner.
- Returns:
An instance of DPU runner.
-
static std::unique_ptr<Runner> create_runner_with_attrs(const xir::Subgraph *subgraph, xir::Attrs *attrs)
Factory function to create an instance of DPU runner by subgraph, and attrs.
- Parameters:
subgraph – XIR Subgraph
attrs – XIR attrs object, this object is shared among all runners on the same graph.
attrs["mode"], 1 – mode supported: ‘run’ - DPU runner.
- Returns:
An instance of DPU runner.
-
virtual std::vector<vart::TensorBuffer*> get_inputs() = 0
-
class TensorBuffer
- #include <tensor_buffer.hpp>
Class of TensorBuffer.
Subclassed by TensorBufferExt
Public Functions
-
virtual std::pair<std::uint64_t, std::size_t> data(const std::vector<std::int32_t> idx = {}) = 0
Get the data address of the index and the size of the data available for use.
Sample code:
vart::TensorBuffer* tb; std::tie(data_addr, tensor_size) = tb->data({0,0,0,0});
- Parameters:
idx – The index of the data to be accessed, its dimension same as the tensor shape.
- Returns:
A pair of the data address of the index and the size of the data available for use in byte unit.
-
inline virtual location_t get_location() const
Get where the tensor buffer located.
Sample code:
vart::TensorBuffer* tb; switch (tb->get_location()) { case vart::TensorBuffer::location_t::HOST_VIRT: // do nothing break; case vart::TensorBuffer::location_t::HOST_PHY: // do nothing break; default: // do nothing break; }
- Returns:
the tensor buffer location, a location_t enum type value: HOST_VIRT/HOST_PHY/DEVICE_*.
-
inline virtual std::pair<uint64_t, size_t> data_phy(const std::vector<std::int32_t> idx)
Get the data physical address of the index and the size of the data available for use.
Sample code:
vart::TensorBuffer* tb; std::tie(phy_data, phy_size) = tb->data_phy({0, 0});
- Parameters:
idx – The index of the data to be accessed, its dimension same to the tensor shape.
- Returns:
A pair of the data physical address of the index and the size of the data available for use in byte unit.
-
inline virtual void sync_for_read(uint64_t offset, size_t size)
Invalid cache for reading Before read, it is no-op in case get_location() returns DEVICE_ONLY or HOST_VIRT.
Sample code:
for (auto& output : output_tensor_buffers) { output->sync_for_read(0, output->get_tensor()->get_data_size() / output->get_tensor()->get_shape()[0]); }
- Parameters:
offset – The start offset address.
size – The data size.
- Returns:
void
-
inline virtual void sync_for_write(uint64_t offset, size_t size)
Flush cache for writing after write, it is no-op in case get_location() returns DEVICE_ONLY or HOST_VIRT.
Sample code:
for (auto& input : input_tensor_buffers) { input->sync_for_write(0, input->get_tensor()->get_data_size() / input->get_tensor()->get_shape()[0]); }
- Parameters:
offset – The start offset address.
size – The data size.
- Returns:
void
-
virtual void copy_from_host(size_t batch_idx, const void *buf, size_t size, size_t offset)
copy data from source buffer.
- Parameters:
batch_idx – the batch index.
buf – source buffer start address.
size – data size to be copied.
offset – the start offset to be copied.
- Returns:
void
-
virtual void copy_to_host(size_t batch_idx, void *buf, size_t size, size_t offset)
copy data to destination buffer.
Sample code:
vart::TensorBuffer* tb_from; vart::TensorBuffer* tb_to; for (auto batch = 0u; batch < batch_size; ++batch) { std::tie(data, tensor_size) = tb_to->data({(int)batch, 0, 0, 0}); tb_from->copy_to_host(batch, reinterpret_cast<void*>(data), tensor_size, 0u); }
- Parameters:
batch_idx – the batch index.
buf – destination buffer start address.
size – data size to be copied.
offset – the start offset to be copied.
- Returns:
void
-
const xir::Tensor *get_tensor() const
Get tensor of TensorBuffer.
- Returns:
A pointer to the tensor.
-
virtual std::string to_string() const
for fancy log messages
Public Static Functions
-
static std::string to_string(location_t value)
for TensorBuffer location message
-
static void copy_tensor_buffer(vart::TensorBuffer *tb_from, vart::TensorBuffer *tb_to)
copy TensorBuffer from one to another.
Sample code:
vart::TensorBuffer* tb_from; vart::TensorBuffer* tb_to; vart::TensorBuffer::copy_tensor_buffer(tb_from.get(), tb_to.get());
- Parameters:
tb_from – the source TensorBuffer.
tb_to – the destination TensorBuffer.
- Returns:
void
-
static std::unique_ptr<TensorBuffer> create_unowned_device_tensor_buffer(const xir::Tensor *tensor, uint64_t batch_addr[], size_t addr_arrsize)
create unowned device tensor buffer with device physical addresses for a tensor.
There are some limitations on the arguments:
The addr_arrsize must NOT be greater than the tensor batch.
The tensor must have attribute ddr_addr whose value must be 0.
Sample code:
auto runner = vart::RunnerExt::create_runner(subgraph, attrs); auto input_tensors = runner->get_input_tensors(); auto output_tensors = runner->get_output_tensors(); std::vector<vart::TensorBuffer*> input_tensor_buffers; std::vector<vart::TensorBuffer*> output_tensor_buffers; uint64_t in_batch_addr[1]; uint64_t out_batch_addr[1]; in_batch_addr[0] = DEVICE_PHY_ADDRESS_IN; out_batch_addr[0] = DEVICE_PHY_ADDRESS_OUT; auto input_tb = vart::TensorBuffer::create_unowned_device_tensor_buffer( input_tensors[0], in_batch_addr, 1); auto output_tb = vart::TensorBuffer::create_unowned_device_tensor_buffer( output_tensors[0], out_batch_addr, 1); input_tensor_buffers.emplace_back(input_tb.get()); output_tensor_buffers.emplace_back(output_tb.get()); auto v = runner->execute_async(input_tensor_buffers, output_tensor_buffers);
- Parameters:
tensor – XIR tensor pointer
batch_addr – Array which contains device physical address for each batch
addr_arrsize – The array size of batch_addr
- Returns:
Unique pointer of created tensor buffer.
-
virtual std::pair<std::uint64_t, std::size_t> data(const std::vector<std::int32_t> idx = {}) = 0
-
class TensorBufferExt : public TensorBuffer
Public Functions
-
virtual std::string to_string() const
for fancy log messages
-
virtual std::pair<std::uint64_t, std::size_t> data(const std::vector<std::int32_t> idx = {}) = 0
Get the data address of the index and the size of the data available for use.
Sample code:
vart::TensorBuffer* tb; std::tie(data_addr, tensor_size) = tb->data({0,0,0,0});
- Parameters:
idx – The index of the data to be accessed, its dimension same as the tensor shape.
- Returns:
A pair of the data address of the index and the size of the data available for use in byte unit.
-
inline virtual location_t get_location() const
Get where the tensor buffer located.
Sample code:
vart::TensorBuffer* tb; switch (tb->get_location()) { case vart::TensorBuffer::location_t::HOST_VIRT: // do nothing break; case vart::TensorBuffer::location_t::HOST_PHY: // do nothing break; default: // do nothing break; }
- Returns:
the tensor buffer location, a location_t enum type value: HOST_VIRT/HOST_PHY/DEVICE_*.
-
inline virtual std::pair<uint64_t, size_t> data_phy(const std::vector<std::int32_t> idx)
Get the data physical address of the index and the size of the data available for use.
Sample code:
vart::TensorBuffer* tb; std::tie(phy_data, phy_size) = tb->data_phy({0, 0});
- Parameters:
idx – The index of the data to be accessed, its dimension same to the tensor shape.
- Returns:
A pair of the data physical address of the index and the size of the data available for use in byte unit.
-
inline virtual void sync_for_read(uint64_t offset, size_t size)
Invalid cache for reading Before read, it is no-op in case get_location() returns DEVICE_ONLY or HOST_VIRT.
Sample code:
for (auto& output : output_tensor_buffers) { output->sync_for_read(0, output->get_tensor()->get_data_size() / output->get_tensor()->get_shape()[0]); }
- Parameters:
offset – The start offset address.
size – The data size.
- Returns:
void
-
inline virtual void sync_for_write(uint64_t offset, size_t size)
Flush cache for writing after write, it is no-op in case get_location() returns DEVICE_ONLY or HOST_VIRT.
Sample code:
for (auto& input : input_tensor_buffers) { input->sync_for_write(0, input->get_tensor()->get_data_size() / input->get_tensor()->get_shape()[0]); }
- Parameters:
offset – The start offset address.
size – The data size.
- Returns:
void
-
virtual void copy_from_host(size_t batch_idx, const void *buf, size_t size, size_t offset)
copy data from source buffer.
- Parameters:
batch_idx – the batch index.
buf – source buffer start address.
size – data size to be copied.
offset – the start offset to be copied.
- Returns:
void
-
virtual void copy_to_host(size_t batch_idx, void *buf, size_t size, size_t offset)
copy data to destination buffer.
Sample code:
vart::TensorBuffer* tb_from; vart::TensorBuffer* tb_to; for (auto batch = 0u; batch < batch_size; ++batch) { std::tie(data, tensor_size) = tb_to->data({(int)batch, 0, 0, 0}); tb_from->copy_to_host(batch, reinterpret_cast<void*>(data), tensor_size, 0u); }
- Parameters:
batch_idx – the batch index.
buf – destination buffer start address.
size – data size to be copied.
offset – the start offset to be copied.
- Returns:
void
-
const xir::Tensor *get_tensor() const
Get tensor of TensorBuffer.
- Returns:
A pointer to the tensor.
Public Static Functions
-
static std::string to_string(location_t value)
for TensorBuffer location message
-
static void copy_tensor_buffer(vart::TensorBuffer *tb_from, vart::TensorBuffer *tb_to)
copy TensorBuffer from one to another.
Sample code:
vart::TensorBuffer* tb_from; vart::TensorBuffer* tb_to; vart::TensorBuffer::copy_tensor_buffer(tb_from.get(), tb_to.get());
- Parameters:
tb_from – the source TensorBuffer.
tb_to – the destination TensorBuffer.
- Returns:
void
-
static std::unique_ptr<TensorBuffer> create_unowned_device_tensor_buffer(const xir::Tensor *tensor, uint64_t batch_addr[], size_t addr_arrsize)
create unowned device tensor buffer with device physical addresses for a tensor.
There are some limitations on the arguments:
The addr_arrsize must NOT be greater than the tensor batch.
The tensor must have attribute ddr_addr whose value must be 0.
Sample code:
auto runner = vart::RunnerExt::create_runner(subgraph, attrs); auto input_tensors = runner->get_input_tensors(); auto output_tensors = runner->get_output_tensors(); std::vector<vart::TensorBuffer*> input_tensor_buffers; std::vector<vart::TensorBuffer*> output_tensor_buffers; uint64_t in_batch_addr[1]; uint64_t out_batch_addr[1]; in_batch_addr[0] = DEVICE_PHY_ADDRESS_IN; out_batch_addr[0] = DEVICE_PHY_ADDRESS_OUT; auto input_tb = vart::TensorBuffer::create_unowned_device_tensor_buffer( input_tensors[0], in_batch_addr, 1); auto output_tb = vart::TensorBuffer::create_unowned_device_tensor_buffer( output_tensors[0], out_batch_addr, 1); input_tensor_buffers.emplace_back(input_tb.get()); output_tensor_buffers.emplace_back(output_tb.get()); auto v = runner->execute_async(input_tensor_buffers, output_tensor_buffers);
- Parameters:
tensor – XIR tensor pointer
batch_addr – Array which contains device physical address for each batch
addr_arrsize – The array size of batch_addr
- Returns:
Unique pointer of created tensor buffer.
-
virtual std::string to_string() const
-
struct XclBo
-
std::vector<float> get_input_scale(std::vector<const xir::Tensor*> input_tensors)