Program Listing for File grpc_internal.cpp¶
↰ Return to documentation for file (/workspace/amdinfer/src/amdinfer/clients/grpc_internal.cpp)
// Copyright 2022 Xilinx, Inc.
// Copyright 2022 Advanced Micro Devices, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "amdinfer/clients/grpc_internal.hpp"
#include <google/protobuf/repeated_ptr_field.h> // for RepeatedPtrField
#include <google/protobuf/stubs/common.h> // for string
#include <cstddef> // for size_t
#include <cstdint> // for int16_t, int32_t
#include <cstring> // for memcpy
#include <memory> // for make_shared, shared...
#include <utility> // for move
#include <variant> // for visit
#include <vector> // for vector, _Bit_reference
#include "amdinfer/build_options.hpp" // for AMDINFER_ENABLE_LO...
#include "amdinfer/core/data_types.hpp" // for DataType, mapTypeToStr
#include "amdinfer/core/predict_api_internal.hpp" // for RequestParameters
#include "amdinfer/declarations.hpp" // for InferenceResponseOu...
#include "amdinfer/observation/observer.hpp" // for kNumTraceData
#include "amdinfer/util/traits.hpp" // IWYU pragma: keep
#include "predict_api.pb.h" // for ModelInferResponse_...
namespace amdinfer {
void mapProtoToParameters(
const google::protobuf::Map<std::string, inference::InferParameter>& params,
RequestParameters* parameters) {
using ParameterType = inference::InferParameter::ParameterChoiceCase;
for (const auto& [key, value] : params) {
auto type = value.parameter_choice_case();
switch (type) {
case ParameterType::kBoolParam: {
parameters->put(key, value.bool_param());
break;
}
case ParameterType::kInt64Param: {
// TODO(varunsh): parameters should switch to uint64?
parameters->put(key, static_cast<int>(value.int64_param()));
break;
}
case ParameterType::kDoubleParam: {
parameters->put(key, value.double_param());
break;
}
case ParameterType::kStringParam: {
parameters->put(key, value.string_param());
break;
}
default: {
// if not set
break;
}
}
}
}
RequestParametersPtr mapProtoToParameters(
const google::protobuf::Map<std::string, inference::InferParameter>& params) {
auto parameters = std::make_shared<RequestParameters>();
mapProtoToParameters(params, parameters.get());
return parameters;
}
void mapProtoToParameters(
const google::protobuf::Map<std::string, inference::InferParameter>& params,
RequestParameters& parameters) {
mapProtoToParameters(params, ¶meters);
}
// refer to cppreference for std::visit
// helper type for the visitor #4
template <class... Ts>
struct Overloaded : Ts... {
using Ts::operator()...;
};
// explicit deduction guide (not needed as of C++20)
template <class... Ts>
Overloaded(Ts...) -> Overloaded<Ts...>;
void mapParametersToProto(
const std::map<std::string, amdinfer::Parameter, std::less<>>& parameters,
google::protobuf::Map<std::string, inference::InferParameter>*
grpc_parameters) {
for (const auto& [key, value] : parameters) {
inference::InferParameter param;
std::visit(
Overloaded{[&](bool arg) { param.set_bool_param(arg); },
[&](double arg) { param.set_double_param(arg); },
[&](int32_t arg) { param.set_int64_param(arg); },
[&](const std::string& arg) { param.set_string_param(arg); }},
value);
grpc_parameters->insert({key, param});
}
}
struct AddDataToTensor {
template <typename T, typename Tensor>
void operator()(const void* source_data, size_t size, Tensor* tensor,
const Observer& observer) const {
const auto* data = static_cast<const T*>(source_data);
auto* contents = getTensorContents<T>(tensor);
if constexpr (std::is_same_v<T, char>) {
contents->Add(data);
} else if constexpr (std::is_same_v<T, fp16>) {
for (auto i = 0U; i < size; ++i) {
contents->Add(static_cast<float>(data[i]));
}
} else {
for (auto i = 0U; i < size; ++i) {
#ifdef AMDINFER_ENABLE_LOGGING
const auto min_size = size > kNumTraceData ? kNumTraceData : size;
if (i < min_size) {
AMDINFER_LOG_TRACE(observer.logger, "Adding data to tensor: " +
std::to_string(data[i]));
}
#endif
contents->Add(data[i]);
}
}
}
};
void mapRequestToProto(const InferenceRequest& request,
inference::ModelInferRequest& grpc_request,
[[maybe_unused]] const Observer& observer) {
AMDINFER_LOG_TRACE(observer.logger,
"Mapping the InferenceRequest to proto object");
grpc_request.set_id(request.getID());
if (const auto* parameters = request.getParameters(); parameters != nullptr) {
auto params = parameters->data();
auto* grpc_parameters = grpc_request.mutable_parameters();
mapParametersToProto(params, grpc_parameters);
}
const auto& inputs = request.getInputs();
for (const auto& input : inputs) {
auto* tensor = grpc_request.add_inputs();
tensor->set_name(input.getName());
const auto& shape = input.getShape();
auto size = 1U;
for (const auto& index : shape) {
tensor->add_shape(index);
size *= index;
}
auto datatype = input.getDatatype();
tensor->set_datatype(datatype.str());
mapParametersToProto(input.getParameters()->data(),
tensor->mutable_parameters());
switchOverTypes(AddDataToTensor(), input.getDatatype(), input.getData(),
input.getSize(), tensor, observer);
}
// TODO(varunsh): skipping outputs for now
}
struct SetOutputData {
template <typename T, typename Tensor>
void operator()(InferenceResponseOutput* output, size_t size, Tensor* tensor,
const Observer& observer) const {
std::vector<std::byte> data;
const auto bytes_to_copy = size * sizeof(T);
data.resize(bytes_to_copy);
const auto* contents = getTensorContents<T>(tensor);
if constexpr (std::is_same_v<T, char>) {
std::memcpy(data.data(), contents, size * sizeof(std::byte));
output->setData(std::move(data));
} else {
if constexpr (util::is_any_v<T, int8_t, uint8_t, int16_t, uint16_t,
fp16>) {
for (auto i = 0U; i < size; ++i) {
std::memcpy(&(data[i * sizeof(T)]), &(contents[i]), sizeof(T));
}
} else {
std::memcpy(data.data(), contents, bytes_to_copy);
}
output->setData(std::move(data));
}
logTraceBuffer(observer.logger, output->getData(), sizeof(T));
}
};
void mapProtoToResponse(const inference::ModelInferResponse& reply,
InferenceResponse& response, const Observer& observer) {
response.setModel(reply.model_name());
response.setID(reply.id());
for (const auto& tensor : reply.outputs()) {
InferenceResponseOutput output;
output.setName(tensor.name());
output.setDatatype(DataType(tensor.datatype().c_str()));
std::vector<uint64_t> shape;
shape.reserve(tensor.shape_size());
auto size = 1U;
for (const auto& index : tensor.shape()) {
shape.push_back(static_cast<size_t>(index));
size *= index;
}
output.setShape(shape);
// TODO(varunsh): skipping parameters for now
switchOverTypes(SetOutputData(), output.getDatatype(), &output, size,
&tensor, observer);
response.addOutput(output);
}
}
void mapResponseToProto(InferenceResponse response,
inference::ModelInferResponse& reply) {
Observer observer;
AMDINFER_IF_LOGGING(observer.logger = Logger{Loggers::Server});
AMDINFER_LOG_TRACE(observer.logger,
"Mapping the InferenceResponse to proto object");
reply.set_model_name(response.getModel());
reply.set_id(response.getID());
auto outputs = response.getOutputs();
for (const InferenceResponseOutput& output : outputs) {
auto* tensor = reply.add_outputs();
tensor->set_name(output.getName());
// auto* parameters = tensor->mutable_parameters();
tensor->set_datatype(output.getDatatype().str());
const auto& shape = output.getShape();
auto size = 1U;
for (const size_t& index : shape) {
tensor->add_shape(index);
size *= index;
}
switchOverTypes(AddDataToTensor(), output.getDatatype(), output.getData(),
output.getSize(), tensor, observer);
}
}
void mapModelMetadataToProto(const ModelMetadata& metadata,
inference::ModelMetadataResponse& resp) {
resp.set_name(metadata.getName());
resp.set_platform(metadata.getPlatform());
const auto& inputs = metadata.getInputs();
for (const auto& input : inputs) {
auto* tensor = resp.add_inputs();
tensor->set_name(input.getName());
tensor->set_datatype(input.getDataType().str());
const auto& shape = input.getShape();
for (const auto& i : shape) {
tensor->add_shape(i);
}
}
const auto& outputs = metadata.getOutputs();
for (const auto& output : outputs) {
auto* tensor = resp.add_outputs();
tensor->set_name(output.getName());
tensor->set_datatype(output.getDataType().str());
const auto& shape = output.getShape();
for (const auto& i : shape) {
tensor->add_shape(i);
}
}
}
} // namespace amdinfer