11#include "../PassDetail.h"
18#include "llvm/Support/Debug.h"
21#include "xaiengine/xaiegbl_defs.h"
23#include "xaiengine/xaie_txn.h"
28#define DEBUG_TYPE "aie-convert-to-config"
37struct TransactionBinaryOperation {
38 struct XAie_TxnCmd cmd;
39 TransactionBinaryOperation(
XAie_TxnOpcode opc, uint32_t mask, uint64_t addr,
40 uint32_t value,
const uint8_t *data,
46 cmd.DataPtr =
reinterpret_cast<uint64_t
>(data);
54static std::optional<int>
55parseTransactionBinary(
const std::vector<uint8_t> &data,
56 std::vector<TransactionBinaryOperation> &ops) {
58 uint32_t
major = data[0];
59 uint32_t
minor = data[1];
60 uint32_t num_cols = data[4];
62 uint32_t num_ops, txn_size;
63 std::memcpy(&num_ops, &data[8], 4);
64 std::memcpy(&txn_size, &data[12], 4);
66 LLVM_DEBUG(llvm::dbgs() <<
"Major: " << major <<
"\n");
67 LLVM_DEBUG(llvm::dbgs() <<
"Minor: " << minor <<
"\n");
68 LLVM_DEBUG(llvm::dbgs() <<
"DevGen: " << data[2] <<
"\n");
69 LLVM_DEBUG(llvm::dbgs() <<
"NumRows: " << data[3] <<
"\n");
70 LLVM_DEBUG(llvm::dbgs() <<
"NumCols: " << num_cols <<
"\n");
71 LLVM_DEBUG(llvm::dbgs() <<
"NumMemTileRows: " << data[5] <<
"\n");
72 LLVM_DEBUG(llvm::dbgs() <<
"NumOps: " << num_ops <<
"\n");
73 LLVM_DEBUG(llvm::dbgs() <<
"TxnSize: " << txn_size <<
" bytes\n");
78 auto convertOpcode = [](uint8_t opc) {
87 llvm::errs() <<
"Unhandled opcode: " << std::to_string(opc) <<
"\n";
95 if (major == 0 && minor == 1) {
96 while (i < data.size()) {
99 LLVM_DEBUG(llvm::dbgs() <<
"opcode: " + std::to_string(opc) +
"\n");
105 const uint8_t *data_ptr =
nullptr;
108 LLVM_DEBUG(llvm::dbgs() <<
"opcode: WRITE (0x00)\n");
109 uint32_t addr0, addr1;
110 std::memcpy(&addr0, &data[i + 8], 4);
111 std::memcpy(&addr1, &data[i + 12], 4);
112 std::memcpy(&value, &data[i + 16], 4);
113 std::memcpy(&size, &data[i + 20], 4);
114 addr =
static_cast<uint64_t
>(addr1) << 32 | addr0;
117 LLVM_DEBUG(llvm::dbgs() <<
"opcode: BLOCKWRITE (0x01)\n");
118 std::memcpy(&addr, &data[i + 8], 4);
119 std::memcpy(&size, &data[i + 12], 4);
120 data_ptr = data.data() + i + 16;
124 LLVM_DEBUG(llvm::dbgs() <<
"opcode: MASKWRITE (0x03)\n");
125 uint32_t addr0, addr1;
126 std::memcpy(&addr0, &data[i + 8], 4);
127 std::memcpy(&addr1, &data[i + 12], 4);
128 std::memcpy(&value, &data[i + 16], 4);
129 std::memcpy(&mask, &data[i + 20], 4);
130 std::memcpy(&size, &data[i + 24], 4);
131 addr =
static_cast<uint64_t
>(addr1) << 32 | addr0;
134 llvm::errs() <<
"Unhandled opcode: " << std::to_string(opc) <<
"\n";
137 ops.emplace_back(opc, mask, addr, value, data_ptr, size);
138 LLVM_DEBUG(llvm::dbgs() <<
"addr: " << addr <<
"\n");
139 LLVM_DEBUG(llvm::dbgs() <<
"value: " << value <<
"\n");
140 LLVM_DEBUG(llvm::dbgs() <<
"size: " << size <<
"\n");
141 LLVM_DEBUG(llvm::dbgs() <<
"mask: " << mask <<
"\n");
142 LLVM_DEBUG(llvm::dbgs()
143 <<
"data: " <<
reinterpret_cast<uintptr_t
>(data_ptr) <<
"\n");
145 }
else if (major == 1 && minor == 0) {
146 while (i < data.size()) {
149 LLVM_DEBUG(llvm::dbgs() <<
"opcode: " + std::to_string(opc) +
"\n");
155 const uint8_t *data_ptr =
nullptr;
158 LLVM_DEBUG(llvm::dbgs() <<
"opcode: WRITE (0x00)\n");
159 std::memcpy(&addr, &data[i + 4], 4);
160 std::memcpy(&value, &data[i + 8], 4);
163 LLVM_DEBUG(llvm::dbgs() <<
"opcode: BLOCKWRITE (0x01)\n");
164 std::memcpy(&addr, &data[i + 4], 4);
165 std::memcpy(&size, &data[i + 8], 4);
166 data_ptr = data.data() + i + 12;
170 LLVM_DEBUG(llvm::dbgs() <<
"opcode: MASKWRITE (0x03)\n");
171 std::memcpy(&addr, &data[i + 4], 4);
172 std::memcpy(&value, &data[i + 8], 4);
173 std::memcpy(&mask, &data[i + 12], 4);
176 llvm::errs() <<
"Unhandled opcode: " << std::to_string(opc) <<
"\n";
179 LLVM_DEBUG(llvm::dbgs() <<
"addr: " << addr <<
"\n");
180 LLVM_DEBUG(llvm::dbgs() <<
"value: " << value <<
"\n");
181 LLVM_DEBUG(llvm::dbgs() <<
"size: " << size <<
"\n");
182 LLVM_DEBUG(llvm::dbgs() <<
"mask: " << mask <<
"\n");
183 LLVM_DEBUG(llvm::dbgs()
184 <<
"data: " <<
reinterpret_cast<uintptr_t
>(data_ptr) <<
"\n");
185 ops.emplace_back(opc, mask, addr, value, data_ptr, size);
188 llvm::errs() <<
"Unsupported TXN binary version: " <<
major <<
"." <<
minor
196static LogicalResult generateTransactions(
AIERTControl &ctl,
197 const StringRef workDirPath,
198 DeviceOp &targetOp,
bool aieSim,
199 bool enableElfs,
bool enableInit,
201 if (enableElfs && !targetOp.getOps<CoreOp>().empty() &&
202 failed(ctl.
addAieElfs(targetOp, workDirPath, aieSim)))
206 if (enableCores && !targetOp.getOps<CoreOp>().empty() &&
215emitTransactionOps(OpBuilder &builder,
216 std::vector<TransactionBinaryOperation> &operations,
217 std::vector<memref::GlobalOp> &global_data) {
219 auto loc = builder.getUnknownLoc();
222 for (
auto p :
llvm::zip(operations, global_data)) {
223 auto op = std::get<0>(p);
224 memref::GlobalOp payload = std::get<1>(p);
227 builder.create<AIEX::NpuWrite32Op>(loc, op.cmd.RegOff, op.cmd.Value,
228 nullptr,
nullptr,
nullptr);
230 auto memref = builder.create<memref::GetGlobalOp>(loc, payload.getType(),
232 builder.create<AIEX::NpuBlockWriteOp>(
233 loc, builder.getUI32IntegerAttr(op.cmd.RegOff), memref.getResult(),
234 nullptr,
nullptr,
nullptr);
236 builder.create<AIEX::NpuMaskWrite32Op>(loc, op.cmd.RegOff, op.cmd.Value,
237 op.cmd.Mask,
nullptr,
nullptr,
240 llvm::errs() <<
"Unhandled txn opcode: " << op.cmd.Opcode <<
"\n";
250emitControlPacketOps(OpBuilder &builder,
251 std::vector<TransactionBinaryOperation> &operations,
252 std::vector<memref::GlobalOp> &global_data) {
254 auto loc = builder.getUnknownLoc();
255 auto ctx = builder.getContext();
258 for (
auto p :
llvm::zip(operations, global_data)) {
259 auto op = std::get<0>(p);
260 memref::GlobalOp payload = std::get<1>(p);
263 builder.create<AIEX::NpuControlPacketOp>(
264 loc, builder.getUI32IntegerAttr(op.cmd.RegOff),
nullptr,
265 builder.getI32IntegerAttr(0),
266 builder.getI32IntegerAttr(0),
267 DenseI32ArrayAttr::get(ctx, ArrayRef<int32_t>(op.cmd.Value)));
269 if (!std::get<1>(p).getInitialValue())
271 auto blockWriteData =
272 dyn_cast<DenseIntElementsAttr>(*std::get<1>(p).getInitialValue());
273 if (!blockWriteData) {
275 "Global symbol initial value is not a dense int array");
278 auto blockWriteDataValues = blockWriteData.getValues<int32_t>();
280 int currAddr = op.cmd.RegOff;
281 for (
size_t i = 0; i < blockWriteDataValues.size(); i += 4) {
282 auto last = std::min(blockWriteDataValues.size(), i + 4);
283 SmallVector<int32_t> splitData =
284 SmallVector<int32_t>(blockWriteDataValues.begin() + i,
285 blockWriteDataValues.begin() + last);
286 builder.create<AIEX::NpuControlPacketOp>(
287 loc, builder.getUI32IntegerAttr(currAddr),
nullptr,
288 builder.getI32IntegerAttr(0),
289 builder.getI32IntegerAttr(0),
290 DenseI32ArrayAttr::get(ctx, ArrayRef<int32_t>(splitData)));
291 currAddr += splitData.size() *
sizeof(int32_t);
295 builder.create<AIEX::NpuControlPacketOp>(
296 loc, builder.getUI32IntegerAttr(op.cmd.RegOff),
nullptr,
297 builder.getI32IntegerAttr(0),
298 builder.getI32IntegerAttr(0),
299 DenseI32ArrayAttr::get(ctx, ArrayRef<int32_t>(op.cmd.Value)));
301 llvm::errs() <<
"Unhandled txn opcode: " << op.cmd.Opcode <<
"\n";
311 SmallVector<AIEX::NpuControlPacketOp> ctrlPktOps;
313 [&](AIEX::NpuControlPacketOp cpOp) { ctrlPktOps.push_back(cpOp); });
314 if (ctrlPktOps.empty())
317 SmallVector<Operation *> erased;
318 int addrBuffer = ctrlPktOps[0].getAddress();
319 AIEX::NpuControlPacketOp ctrlPktBuffer = ctrlPktOps[0];
320 for (
size_t i = 1; i < ctrlPktOps.size(); i++) {
321 int currentAddrBuffer = ctrlPktOps[i].getAddress();
322 if (addrBuffer != currentAddrBuffer) {
323 addrBuffer = currentAddrBuffer;
324 ctrlPktBuffer = ctrlPktOps[i];
327 auto bufferedData = ctrlPktBuffer.getData().value();
328 auto currentData = ctrlPktOps[i].getData().value();
329 SmallVector<int> newData;
330 for (
unsigned j = 0; j < std::max(bufferedData.size(), currentData.size());
332 if (j < std::min(bufferedData.size(), currentData.size())) {
333 newData.push_back(bufferedData[j] | currentData[j]);
336 newData.push_back(j < bufferedData.size() ? bufferedData[j]
339 ctrlPktBuffer.getProperties().data = DenseI32ArrayAttr::get(
340 ctrlPktBuffer->getContext(), ArrayRef<int>{newData});
341 erased.push_back(ctrlPktOps[i]);
344 for (
auto e : erased)
356static LogicalResult convertTransactionOpsToMLIR(
357 OpBuilder builder, AIE::DeviceOp device,
OutputType outputType,
358 std::vector<TransactionBinaryOperation> &operations) {
360 auto loc = builder.getUnknownLoc();
363 std::vector<memref::GlobalOp> global_data;
364 for (
auto &op : operations) {
366 global_data.push_back(
nullptr);
369 uint32_t size = op.cmd.Size / 4;
370 const uint32_t *d =
reinterpret_cast<const uint32_t *
>(op.cmd.DataPtr);
371 std::vector<uint32_t> data32(d, d + size);
374 std::string name =
"blockwrite_data";
375 while (device.lookupSymbol(name))
376 name =
"blockwrite_data_" + std::to_string(
id++);
378 MemRefType memrefType = MemRefType::get({size}, builder.getI32Type());
379 TensorType tensorType = RankedTensorType::get({size}, builder.getI32Type());
380 auto global = builder.create<memref::GlobalOp>(
381 loc, name, builder.getStringAttr(
"private"), memrefType,
382 DenseElementsAttr::get<uint32_t>(tensorType, data32),
true,
nullptr);
383 global_data.push_back(global);
388 std::string seq_name =
"configure";
389 while (device.lookupSymbol(seq_name))
390 seq_name =
"configure" + std::to_string(
id++);
391 StringAttr seq_sym_name = builder.getStringAttr(seq_name);
392 auto seq = builder.create<AIEX::RuntimeSequenceOp>(loc, seq_sym_name);
393 seq.getBody().push_back(
new Block);
396 builder.setInsertionPointToStart(&seq.getBody().front());
398 if (failed(emitTransactionOps(builder, operations, global_data)))
401 if (failed(emitControlPacketOps(builder, operations, global_data)))
407 llvm_unreachable(
"bad output type");
417std::optional<mlir::ModuleOp>
419 std::vector<uint8_t> &binary) {
422 std::vector<TransactionBinaryOperation> operations;
423 auto c = parseTransactionBinary(binary, operations);
425 llvm::errs() <<
"Failed to parse binary\n";
430 auto loc = mlir::UnknownLoc::get(ctx);
433 auto module = ModuleOp::create(loc);
434 OpBuilder builder(module.getBodyRegion());
435 builder.setInsertionPointToStart(module.getBody());
438 std::vector<AIEDevice> devices{AIEDevice::npu1_1col, AIEDevice::npu1_2col,
439 AIEDevice::npu1_3col, AIEDevice::npu1};
440 auto device = builder.create<DeviceOp>(loc, devices[columns - 1]);
441 device.getRegion().emplaceBlock();
442 DeviceOp::ensureTerminator(device.getBodyRegion(), builder, loc);
443 builder.setInsertionPointToStart(device.getBody());
446 if (failed(convertTransactionOpsToMLIR(builder, device,
453static LogicalResult convertAIEToConfiguration(AIE::DeviceOp device,
464 bool xaieDebug =
false;
473 bool generateElfs = clElfDir.size() > 0;
474 if (failed(generateTransactions(ctl, clElfDir, device, aieSim, generateElfs,
482 std::vector<TransactionBinaryOperation> operations;
483 if (!parseTransactionBinary(txn_data, operations)) {
484 llvm::errs() <<
"Failed to parse binary\n";
488 OpBuilder builder(device.getBodyRegion());
492 convertTransactionOpsToMLIR(builder, device, outputType, operations)))
500struct ConvertAIEToTransactionPass
501 : ConvertAIEToTransactionBase<ConvertAIEToTransactionPass> {
502 void getDependentDialects(DialectRegistry ®istry)
const override {
503 registry.insert<memref::MemRefDialect, AIEX::AIEXDialect>();
505 void runOnOperation()
override {
506 if (failed(convertAIEToConfiguration(getOperation(), clElfDir,
508 return signalPassFailure();
512struct ConvertAIEToControlPacketsPass
513 :
public ConvertAIEToControlPacketsBase<ConvertAIEToControlPacketsPass> {
514 void getDependentDialects(DialectRegistry ®istry)
const override {
515 registry.insert<memref::MemRefDialect, AIEX::AIEXDialect>();
517 void runOnOperation()
override {
518 if (failed(convertAIEToConfiguration(getOperation(), clElfDir,
520 return signalPassFailure();
526std::unique_ptr<mlir::OperationPass<xilinx::AIE::DeviceOp>>
528 return std::make_unique<ConvertAIEToTransactionPass>();
531std::unique_ptr<mlir::OperationPass<xilinx::AIE::DeviceOp>>
533 return std::make_unique<ConvertAIEToControlPacketsPass>();
LogicalResult orConsecutiveWritesOnSameAddr(Block *body)
bool hasProperty(ModelProperty Prop) const
std::shared_ptr< Value > value()
Include the generated interface declarations.
std::unique_ptr< mlir::OperationPass< xilinx::AIE::DeviceOp > > createConvertAIEToTransactionPass()
std::optional< mlir::ModuleOp > convertTransactionBinaryToMLIR(mlir::MLIRContext *ctx, std::vector< uint8_t > &binary)
std::unique_ptr< mlir::OperationPass< xilinx::AIE::DeviceOp > > createConvertAIEToControlPacketsPass()
mlir::LogicalResult addAieElfs(DeviceOp &targetOp, const mlir::StringRef workDirPath, bool aieSim)
mlir::LogicalResult setIOBackend(bool aieSim, bool xaieDebug)
std::vector< uint8_t > exportSerializedTransaction()
mlir::LogicalResult addCoreEnable(DeviceOp &targetOp)
mlir::LogicalResult addInitConfig(DeviceOp &targetOp)