11#include "../PassDetail.h"
16#include "llvm/Support/Debug.h"
20#define DEBUG_TYPE "aie-convert-to-config"
29struct TransactionBinaryOperation {
30 struct XAie_TxnCmd cmd;
31 TransactionBinaryOperation(XAie_TxnOpcode opc, uint32_t mask, uint64_t addr,
32 uint32_t value,
const uint8_t *data,
38 cmd.DataPtr =
reinterpret_cast<uint64_t
>(data);
46static std::optional<int>
47parseTransactionBinary(
const std::vector<uint8_t> &data,
48 std::vector<TransactionBinaryOperation> &ops) {
50 uint32_t major = data[0];
51 uint32_t minor = data[1];
52 uint32_t num_cols = data[4];
54 uint32_t num_ops, txn_size;
55 std::memcpy(&num_ops, &data[8], 4);
56 std::memcpy(&txn_size, &data[12], 4);
58 LLVM_DEBUG(llvm::dbgs() <<
"Major: " << major <<
"\n");
59 LLVM_DEBUG(llvm::dbgs() <<
"Minor: " << minor <<
"\n");
60 LLVM_DEBUG(llvm::dbgs() <<
"DevGen: " << data[2] <<
"\n");
61 LLVM_DEBUG(llvm::dbgs() <<
"NumRows: " << data[3] <<
"\n");
62 LLVM_DEBUG(llvm::dbgs() <<
"NumCols: " << num_cols <<
"\n");
63 LLVM_DEBUG(llvm::dbgs() <<
"NumMemTileRows: " << data[5] <<
"\n");
64 LLVM_DEBUG(llvm::dbgs() <<
"NumOps: " << num_ops <<
"\n");
65 LLVM_DEBUG(llvm::dbgs() <<
"TxnSize: " << txn_size <<
" bytes\n");
70 auto convertOpcode = [](uint8_t opc) {
73 return XAie_TxnOpcode::XAIE_IO_WRITE;
75 return XAie_TxnOpcode::XAIE_IO_BLOCKWRITE;
77 return XAie_TxnOpcode::XAIE_IO_MASKWRITE;
79 llvm::errs() <<
"Unhandled opcode: " << std::to_string(opc) <<
"\n";
80 return XAie_TxnOpcode::XAIE_IO_CUSTOM_OP_MAX;
87 if (major == 0 && minor == 1) {
88 while (i < data.size()) {
90 XAie_TxnOpcode opc = convertOpcode(data[i]);
91 LLVM_DEBUG(llvm::dbgs() <<
"opcode: " + std::to_string(opc) +
"\n");
97 const uint8_t *data_ptr =
nullptr;
99 if (opc == XAie_TxnOpcode::XAIE_IO_WRITE) {
100 LLVM_DEBUG(llvm::dbgs() <<
"opcode: WRITE (0x00)\n");
101 uint32_t addr0, addr1;
102 std::memcpy(&addr0, &data[i + 8], 4);
103 std::memcpy(&addr1, &data[i + 12], 4);
104 std::memcpy(&value, &data[i + 16], 4);
105 std::memcpy(&size, &data[i + 20], 4);
106 addr =
static_cast<uint64_t
>(addr1) << 32 | addr0;
108 }
else if (opc == XAie_TxnOpcode::XAIE_IO_BLOCKWRITE) {
109 LLVM_DEBUG(llvm::dbgs() <<
"opcode: BLOCKWRITE (0x01)\n");
110 std::memcpy(&addr, &data[i + 8], 4);
111 std::memcpy(&size, &data[i + 12], 4);
112 data_ptr = data.data() + i + 16;
115 }
else if (opc == XAie_TxnOpcode::XAIE_IO_MASKWRITE) {
116 LLVM_DEBUG(llvm::dbgs() <<
"opcode: MASKWRITE (0x03)\n");
117 uint32_t addr0, addr1;
118 std::memcpy(&addr0, &data[i + 8], 4);
119 std::memcpy(&addr1, &data[i + 12], 4);
120 std::memcpy(&value, &data[i + 16], 4);
121 std::memcpy(&mask, &data[i + 20], 4);
122 std::memcpy(&size, &data[i + 24], 4);
123 addr =
static_cast<uint64_t
>(addr1) << 32 | addr0;
126 llvm::errs() <<
"Unhandled opcode: " << std::to_string(opc) <<
"\n";
129 ops.emplace_back(opc, mask, addr, value, data_ptr, size);
130 LLVM_DEBUG(llvm::dbgs() <<
"addr: " << addr <<
"\n");
131 LLVM_DEBUG(llvm::dbgs() <<
"value: " << value <<
"\n");
132 LLVM_DEBUG(llvm::dbgs() <<
"size: " << size <<
"\n");
133 LLVM_DEBUG(llvm::dbgs() <<
"mask: " << mask <<
"\n");
134 LLVM_DEBUG(llvm::dbgs()
135 <<
"data: " <<
reinterpret_cast<uintptr_t
>(data_ptr) <<
"\n");
137 }
else if (major == 1 && minor == 0) {
138 while (i < data.size()) {
140 XAie_TxnOpcode opc = convertOpcode(data[i]);
141 LLVM_DEBUG(llvm::dbgs() <<
"opcode: " + std::to_string(opc) +
"\n");
147 const uint8_t *data_ptr =
nullptr;
149 if (opc == XAie_TxnOpcode::XAIE_IO_WRITE) {
150 LLVM_DEBUG(llvm::dbgs() <<
"opcode: WRITE (0x00)\n");
151 std::memcpy(&addr, &data[i + 4], 4);
152 std::memcpy(&value, &data[i + 8], 4);
154 }
else if (opc == XAie_TxnOpcode::XAIE_IO_BLOCKWRITE) {
155 LLVM_DEBUG(llvm::dbgs() <<
"opcode: BLOCKWRITE (0x01)\n");
156 std::memcpy(&addr, &data[i + 4], 4);
157 std::memcpy(&size, &data[i + 8], 4);
158 data_ptr = data.data() + i + 12;
161 }
else if (opc == XAie_TxnOpcode::XAIE_IO_MASKWRITE) {
162 LLVM_DEBUG(llvm::dbgs() <<
"opcode: MASKWRITE (0x03)\n");
163 std::memcpy(&addr, &data[i + 4], 4);
164 std::memcpy(&value, &data[i + 8], 4);
165 std::memcpy(&mask, &data[i + 12], 4);
168 llvm::errs() <<
"Unhandled opcode: " << std::to_string(opc) <<
"\n";
171 LLVM_DEBUG(llvm::dbgs() <<
"addr: " << addr <<
"\n");
172 LLVM_DEBUG(llvm::dbgs() <<
"value: " << value <<
"\n");
173 LLVM_DEBUG(llvm::dbgs() <<
"size: " << size <<
"\n");
174 LLVM_DEBUG(llvm::dbgs() <<
"mask: " << mask <<
"\n");
175 LLVM_DEBUG(llvm::dbgs()
176 <<
"data: " <<
reinterpret_cast<uintptr_t
>(data_ptr) <<
"\n");
177 ops.emplace_back(opc, mask, addr, value, data_ptr, size);
180 llvm::errs() <<
"Unsupported TXN binary version: " << major <<
"." << minor
188static LogicalResult generateTransactions(
AIERTControl &ctl,
189 const StringRef workDirPath,
190 DeviceOp &targetOp,
bool aieSim,
191 bool enableElfs,
bool enableInit,
193 if (enableElfs && !targetOp.getOps<CoreOp>().empty() &&
194 failed(ctl.
addAieElfs(targetOp, workDirPath, aieSim)))
198 if (enableCores && !targetOp.getOps<CoreOp>().empty() &&
207emitTransactionOps(OpBuilder &builder,
208 std::vector<TransactionBinaryOperation> &operations,
209 std::vector<memref::GlobalOp> &global_data) {
211 auto loc = builder.getUnknownLoc();
214 for (
auto p :
llvm::zip(operations, global_data)) {
215 auto op = std::get<0>(p);
216 memref::GlobalOp payload = std::get<1>(p);
218 if (op.cmd.Opcode == XAie_TxnOpcode::XAIE_IO_WRITE) {
219 builder.create<AIEX::NpuWrite32Op>(loc, op.cmd.RegOff, op.cmd.Value,
220 nullptr,
nullptr,
nullptr);
221 }
else if (op.cmd.Opcode == XAie_TxnOpcode::XAIE_IO_BLOCKWRITE) {
222 auto memref = builder.create<memref::GetGlobalOp>(loc, payload.getType(),
224 builder.create<AIEX::NpuBlockWriteOp>(
225 loc, builder.getUI32IntegerAttr(op.cmd.RegOff), memref.getResult(),
226 nullptr,
nullptr,
nullptr);
227 }
else if (op.cmd.Opcode == XAie_TxnOpcode::XAIE_IO_MASKWRITE) {
228 builder.create<AIEX::NpuMaskWrite32Op>(loc, op.cmd.RegOff, op.cmd.Value,
229 op.cmd.Mask,
nullptr,
nullptr,
232 llvm::errs() <<
"Unhandled txn opcode: " << op.cmd.Opcode <<
"\n";
242emitControlPacketOps(OpBuilder &builder,
243 std::vector<TransactionBinaryOperation> &operations,
244 std::vector<memref::GlobalOp> &global_data) {
246 auto loc = builder.getUnknownLoc();
247 auto ctx = builder.getContext();
250 for (
auto p :
llvm::zip(operations, global_data)) {
251 auto op = std::get<0>(p);
252 memref::GlobalOp payload = std::get<1>(p);
254 if (op.cmd.Opcode == XAie_TxnOpcode::XAIE_IO_WRITE) {
255 builder.create<AIEX::NpuControlPacketOp>(
256 loc, builder.getUI32IntegerAttr(op.cmd.RegOff),
nullptr,
257 builder.getI32IntegerAttr(0),
258 builder.getI32IntegerAttr(0),
259 DenseI32ArrayAttr::get(ctx, ArrayRef<int32_t>(op.cmd.Value)));
260 }
else if (op.cmd.Opcode == XAie_TxnOpcode::XAIE_IO_BLOCKWRITE) {
261 if (!std::get<1>(p).getInitialValue())
263 auto blockWriteData =
264 dyn_cast<DenseIntElementsAttr>(*std::get<1>(p).getInitialValue());
265 if (!blockWriteData) {
267 "Global symbol initial value is not a dense int array");
270 auto blockWriteDataValues = blockWriteData.getValues<int32_t>();
272 int currAddr = op.cmd.RegOff;
273 for (
size_t i = 0; i < blockWriteDataValues.size(); i += 4) {
274 auto last = std::min(blockWriteDataValues.size(), i + 4);
275 SmallVector<int32_t> splitData =
276 SmallVector<int32_t>(blockWriteDataValues.begin() + i,
277 blockWriteDataValues.begin() + last);
278 builder.create<AIEX::NpuControlPacketOp>(
279 loc, builder.getUI32IntegerAttr(currAddr),
nullptr,
280 builder.getI32IntegerAttr(0),
281 builder.getI32IntegerAttr(0),
282 DenseI32ArrayAttr::get(ctx, ArrayRef<int32_t>(splitData)));
283 currAddr += splitData.size() *
sizeof(int32_t);
286 }
else if (op.cmd.Opcode == XAie_TxnOpcode::XAIE_IO_MASKWRITE) {
287 builder.create<AIEX::NpuControlPacketOp>(
288 loc, builder.getUI32IntegerAttr(op.cmd.RegOff),
nullptr,
289 builder.getI32IntegerAttr(0),
290 builder.getI32IntegerAttr(0),
291 DenseI32ArrayAttr::get(ctx, ArrayRef<int32_t>(op.cmd.Value)));
293 llvm::errs() <<
"Unhandled txn opcode: " << op.cmd.Opcode <<
"\n";
303 SmallVector<AIEX::NpuControlPacketOp> ctrlPktOps;
305 [&](AIEX::NpuControlPacketOp cpOp) { ctrlPktOps.push_back(cpOp); });
306 if (ctrlPktOps.empty())
309 SmallVector<Operation *> erased;
310 int addrBuffer = ctrlPktOps[0].getAddress();
311 AIEX::NpuControlPacketOp ctrlPktBuffer = ctrlPktOps[0];
312 for (
size_t i = 1; i < ctrlPktOps.size(); i++) {
313 int currentAddrBuffer = ctrlPktOps[i].getAddress();
314 if (addrBuffer != currentAddrBuffer) {
315 addrBuffer = currentAddrBuffer;
316 ctrlPktBuffer = ctrlPktOps[i];
319 auto bufferedData = ctrlPktBuffer.getData().value();
320 auto currentData = ctrlPktOps[i].getData().value();
321 SmallVector<int> newData;
322 for (
unsigned j = 0; j < std::max(bufferedData.size(), currentData.size());
324 if (j < std::min(bufferedData.size(), currentData.size())) {
325 newData.push_back(bufferedData[j] | currentData[j]);
328 newData.push_back(j < bufferedData.size() ? bufferedData[j]
331 ctrlPktBuffer.getProperties().data = DenseI32ArrayAttr::get(
332 ctrlPktBuffer->getContext(), ArrayRef<int>{newData});
333 erased.push_back(ctrlPktOps[i]);
336 for (
auto e : erased)
348static LogicalResult convertTransactionOpsToMLIR(
349 OpBuilder builder, AIE::DeviceOp device,
OutputType outputType,
350 std::vector<TransactionBinaryOperation> &operations) {
352 auto loc = builder.getUnknownLoc();
355 std::vector<memref::GlobalOp> global_data;
356 for (
auto &op : operations) {
357 if (op.cmd.Opcode != XAIE_IO_BLOCKWRITE) {
358 global_data.push_back(
nullptr);
361 uint32_t size = op.cmd.Size / 4;
362 const uint32_t *d =
reinterpret_cast<const uint32_t *
>(op.cmd.DataPtr);
363 std::vector<uint32_t> data32(d, d + size);
366 std::string name =
"blockwrite_data";
367 while (device.lookupSymbol(name))
368 name =
"blockwrite_data_" + std::to_string(
id++);
370 MemRefType memrefType = MemRefType::get({size}, builder.getI32Type());
371 TensorType tensorType = RankedTensorType::get({size}, builder.getI32Type());
372 auto global = builder.create<memref::GlobalOp>(
373 loc, name, builder.getStringAttr(
"private"), memrefType,
374 DenseElementsAttr::get<uint32_t>(tensorType, data32),
true,
nullptr);
375 global_data.push_back(global);
380 std::string seq_name =
"configure";
381 while (device.lookupSymbol(seq_name))
382 seq_name =
"configure" + std::to_string(
id++);
383 StringAttr seq_sym_name = builder.getStringAttr(seq_name);
384 auto seq = builder.create<AIEX::RuntimeSequenceOp>(loc, seq_sym_name);
385 seq.getBody().push_back(
new Block);
388 builder.setInsertionPointToStart(&seq.getBody().front());
390 if (failed(emitTransactionOps(builder, operations, global_data)))
393 if (failed(emitControlPacketOps(builder, operations, global_data)))
399 llvm_unreachable(
"bad output type");
409std::optional<mlir::ModuleOp>
411 std::vector<uint8_t> &binary) {
414 std::vector<TransactionBinaryOperation> operations;
415 auto c = parseTransactionBinary(binary, operations);
417 llvm::errs() <<
"Failed to parse binary\n";
422 auto loc = mlir::UnknownLoc::get(ctx);
425 auto module = ModuleOp::create(loc);
426 OpBuilder builder(module.getBodyRegion());
427 builder.setInsertionPointToStart(module.getBody());
430 std::vector<AIEDevice> devices{AIEDevice::npu1_1col, AIEDevice::npu1_2col,
431 AIEDevice::npu1_3col, AIEDevice::npu1_4col,
433 auto device = builder.create<DeviceOp>(loc, devices[columns - 1]);
434 device.getRegion().emplaceBlock();
435 DeviceOp::ensureTerminator(device.getBodyRegion(), builder, loc);
436 builder.setInsertionPointToStart(device.getBody());
439 if (failed(convertTransactionOpsToMLIR(builder, device,
446static LogicalResult convertAIEToConfiguration(AIE::DeviceOp device,
457 bool xaieDebug =
false;
464 XAie_StartTransaction(&ctl.
devInst, XAIE_TRANSACTION_DISABLE_AUTO_FLUSH);
466 bool generateElfs = clElfDir.size() > 0;
467 if (failed(generateTransactions(ctl, clElfDir, device, aieSim, generateElfs,
472 uint8_t *txn_ptr = XAie_ExportSerializedTransaction(&ctl.
devInst, 0, 0);
473 XAie_TxnHeader *hdr = (XAie_TxnHeader *)txn_ptr;
474 std::vector<uint8_t> txn_data(txn_ptr, txn_ptr + hdr->TxnSize);
477 std::vector<TransactionBinaryOperation> operations;
478 if (!parseTransactionBinary(txn_data, operations)) {
479 llvm::errs() <<
"Failed to parse binary\n";
483 OpBuilder builder(device.getBodyRegion());
487 convertTransactionOpsToMLIR(builder, device, outputType, operations)))
495struct ConvertAIEToTransactionPass
496 : ConvertAIEToTransactionBase<ConvertAIEToTransactionPass> {
497 void getDependentDialects(DialectRegistry ®istry)
const override {
498 registry.insert<memref::MemRefDialect, AIEX::AIEXDialect>();
500 void runOnOperation()
override {
501 if (failed(convertAIEToConfiguration(getOperation(), clElfDir,
503 return signalPassFailure();
507struct ConvertAIEToControlPacketsPass
508 :
public ConvertAIEToControlPacketsBase<ConvertAIEToControlPacketsPass> {
509 void getDependentDialects(DialectRegistry ®istry)
const override {
510 registry.insert<memref::MemRefDialect, AIEX::AIEXDialect>();
512 void runOnOperation()
override {
513 if (failed(convertAIEToConfiguration(getOperation(), clElfDir,
515 return signalPassFailure();
521std::unique_ptr<mlir::OperationPass<xilinx::AIE::DeviceOp>>
523 return std::make_unique<ConvertAIEToTransactionPass>();
526std::unique_ptr<mlir::OperationPass<xilinx::AIE::DeviceOp>>
528 return std::make_unique<ConvertAIEToControlPacketsPass>();
LogicalResult orConsecutiveWritesOnSameAddr(Block *body)
bool hasProperty(ModelProperty Prop) const
Include the generated interface declarations.
std::unique_ptr< mlir::OperationPass< xilinx::AIE::DeviceOp > > createConvertAIEToTransactionPass()
std::optional< mlir::ModuleOp > convertTransactionBinaryToMLIR(mlir::MLIRContext *ctx, std::vector< uint8_t > &binary)
std::unique_ptr< mlir::OperationPass< xilinx::AIE::DeviceOp > > createConvertAIEToControlPacketsPass()
mlir::LogicalResult addAieElfs(DeviceOp &targetOp, const mlir::StringRef workDirPath, bool aieSim)
mlir::LogicalResult setIOBackend(bool aieSim, bool xaieDebug)
mlir::LogicalResult addCoreEnable(DeviceOp &targetOp)
mlir::LogicalResult addInitConfig(DeviceOp &targetOp)