MLIR-AIE
AIEToConfiguration.cpp
Go to the documentation of this file.
1//===- AIEToConfiguration.h -------------------------------------*- C++ -*-===//
2//
3// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7// Copyright (C) 2024, Advanced Micro Devices, Inc. All rights reserved.
8//
9//===----------------------------------------------------------------------===//
10
11#include "../PassDetail.h"
12
16#include "aie/Targets/AIERT.h"
17
18#include "llvm/Support/Debug.h"
19
20extern "C" {
21#include "xaiengine/xaiegbl_defs.h"
22// above needs to go first for u32, u64 typedefs
23#include "xaiengine/xaie_txn.h"
24}
25
26#include <vector>
27
28#define DEBUG_TYPE "aie-convert-to-config"
29
30using namespace mlir;
31using namespace xilinx;
32using namespace xilinx::AIE;
33
34namespace {
35
36// An TransactionBinaryOperation encapulates an aie-rt TnxCmd struct
37struct TransactionBinaryOperation {
38 struct XAie_TxnCmd cmd;
39 TransactionBinaryOperation(XAie_TxnOpcode opc, uint32_t mask, uint64_t addr,
40 uint32_t value, const uint8_t *data,
41 uint32_t size) {
42 cmd.Opcode = opc;
43 cmd.Mask = mask;
44 cmd.RegOff = addr;
45 cmd.Value = value;
46 cmd.DataPtr = reinterpret_cast<uint64_t>(data);
47 cmd.Size = size;
48 }
49};
50} // namespace
51
52// Parse a TXN binary blob. On success return the number of columns from the
53// header and a vector of parsed operations. On failure return std::nullopt.
54static std::optional<int>
55parseTransactionBinary(const std::vector<uint8_t> &data,
56 std::vector<TransactionBinaryOperation> &ops) {
57
58 uint32_t major = data[0];
59 uint32_t minor = data[1];
60 uint32_t num_cols = data[4];
61
62 uint32_t num_ops, txn_size;
63 std::memcpy(&num_ops, &data[8], 4);
64 std::memcpy(&txn_size, &data[12], 4);
65
66 LLVM_DEBUG(llvm::dbgs() << "Major: " << major << "\n");
67 LLVM_DEBUG(llvm::dbgs() << "Minor: " << minor << "\n");
68 LLVM_DEBUG(llvm::dbgs() << "DevGen: " << data[2] << "\n");
69 LLVM_DEBUG(llvm::dbgs() << "NumRows: " << data[3] << "\n");
70 LLVM_DEBUG(llvm::dbgs() << "NumCols: " << num_cols << "\n");
71 LLVM_DEBUG(llvm::dbgs() << "NumMemTileRows: " << data[5] << "\n");
72 LLVM_DEBUG(llvm::dbgs() << "NumOps: " << num_ops << "\n");
73 LLVM_DEBUG(llvm::dbgs() << "TxnSize: " << txn_size << " bytes\n");
74
75 size_t i = 16;
76
77 // Convert opcode from uint8 to enum
78 auto convertOpcode = [](uint8_t opc) {
79 switch (opc) {
80 case 0:
82 case 1:
84 case 3:
86 default:
87 llvm::errs() << "Unhandled opcode: " << std::to_string(opc) << "\n";
89 }
90 };
91
92 // Parse the binary blob. There are two versions supported, 0.1 and 1.0.
93 // For both versions, build a list of TransactionBinaryOperation objects
94 // representing the parsed operations.
95 if (major == 0 && minor == 1) {
96 while (i < data.size()) {
97
98 XAie_TxnOpcode opc = convertOpcode(data[i]);
99 LLVM_DEBUG(llvm::dbgs() << "opcode: " + std::to_string(opc) + "\n");
100
101 uint64_t addr = 0;
102 uint32_t value = 0;
103 uint32_t size = 0;
104 uint32_t mask = 0;
105 const uint8_t *data_ptr = nullptr;
106
108 LLVM_DEBUG(llvm::dbgs() << "opcode: WRITE (0x00)\n");
109 uint32_t addr0, addr1;
110 std::memcpy(&addr0, &data[i + 8], 4);
111 std::memcpy(&addr1, &data[i + 12], 4);
112 std::memcpy(&value, &data[i + 16], 4);
113 std::memcpy(&size, &data[i + 20], 4);
114 addr = static_cast<uint64_t>(addr1) << 32 | addr0;
115 i += size;
116 } else if (opc == XAie_TxnOpcode::XAIE_IO_BLOCKWRITE) {
117 LLVM_DEBUG(llvm::dbgs() << "opcode: BLOCKWRITE (0x01)\n");
118 std::memcpy(&addr, &data[i + 8], 4);
119 std::memcpy(&size, &data[i + 12], 4);
120 data_ptr = data.data() + i + 16;
121 i += size;
122 size = size - 16;
123 } else if (opc == XAie_TxnOpcode::XAIE_IO_MASKWRITE) {
124 LLVM_DEBUG(llvm::dbgs() << "opcode: MASKWRITE (0x03)\n");
125 uint32_t addr0, addr1;
126 std::memcpy(&addr0, &data[i + 8], 4);
127 std::memcpy(&addr1, &data[i + 12], 4);
128 std::memcpy(&value, &data[i + 16], 4);
129 std::memcpy(&mask, &data[i + 20], 4);
130 std::memcpy(&size, &data[i + 24], 4);
131 addr = static_cast<uint64_t>(addr1) << 32 | addr0;
132 i += size;
133 } else {
134 llvm::errs() << "Unhandled opcode: " << std::to_string(opc) << "\n";
135 return std::nullopt;
136 }
137 ops.emplace_back(opc, mask, addr, value, data_ptr, size);
138 LLVM_DEBUG(llvm::dbgs() << "addr: " << addr << "\n");
139 LLVM_DEBUG(llvm::dbgs() << "value: " << value << "\n");
140 LLVM_DEBUG(llvm::dbgs() << "size: " << size << "\n");
141 LLVM_DEBUG(llvm::dbgs() << "mask: " << mask << "\n");
142 LLVM_DEBUG(llvm::dbgs()
143 << "data: " << reinterpret_cast<uintptr_t>(data_ptr) << "\n");
144 }
145 } else if (major == 1 && minor == 0) {
146 while (i < data.size()) {
147
148 XAie_TxnOpcode opc = convertOpcode(data[i]);
149 LLVM_DEBUG(llvm::dbgs() << "opcode: " + std::to_string(opc) + "\n");
150
151 uint64_t addr = 0;
152 uint32_t value = 0;
153 uint32_t size = 0;
154 uint32_t mask = 0;
155 const uint8_t *data_ptr = nullptr;
156
158 LLVM_DEBUG(llvm::dbgs() << "opcode: WRITE (0x00)\n");
159 std::memcpy(&addr, &data[i + 4], 4);
160 std::memcpy(&value, &data[i + 8], 4);
161 i += 12;
162 } else if (opc == XAie_TxnOpcode::XAIE_IO_BLOCKWRITE) {
163 LLVM_DEBUG(llvm::dbgs() << "opcode: BLOCKWRITE (0x01)\n");
164 std::memcpy(&addr, &data[i + 4], 4);
165 std::memcpy(&size, &data[i + 8], 4);
166 data_ptr = data.data() + i + 12;
167 i += size;
168 size = size - 12;
169 } else if (opc == XAie_TxnOpcode::XAIE_IO_MASKWRITE) {
170 LLVM_DEBUG(llvm::dbgs() << "opcode: MASKWRITE (0x03)\n");
171 std::memcpy(&addr, &data[i + 4], 4);
172 std::memcpy(&value, &data[i + 8], 4);
173 std::memcpy(&mask, &data[i + 12], 4);
174 i += 16;
175 } else {
176 llvm::errs() << "Unhandled opcode: " << std::to_string(opc) << "\n";
177 return std::nullopt;
178 }
179 LLVM_DEBUG(llvm::dbgs() << "addr: " << addr << "\n");
180 LLVM_DEBUG(llvm::dbgs() << "value: " << value << "\n");
181 LLVM_DEBUG(llvm::dbgs() << "size: " << size << "\n");
182 LLVM_DEBUG(llvm::dbgs() << "mask: " << mask << "\n");
183 LLVM_DEBUG(llvm::dbgs()
184 << "data: " << reinterpret_cast<uintptr_t>(data_ptr) << "\n");
185 ops.emplace_back(opc, mask, addr, value, data_ptr, size);
186 }
187 } else {
188 llvm::errs() << "Unsupported TXN binary version: " << major << "." << minor
189 << "\n";
190 return std::nullopt;
191 }
192
193 return num_cols;
194}
195
196static LogicalResult generateTransactions(AIERTControl &ctl,
197 const StringRef workDirPath,
198 DeviceOp &targetOp, bool aieSim,
199 bool enableElfs, bool enableInit,
200 bool enableCores) {
201 if (enableElfs && !targetOp.getOps<CoreOp>().empty() &&
202 failed(ctl.addAieElfs(targetOp, workDirPath, aieSim)))
203 return failure();
204 if (enableInit && failed(ctl.addInitConfig(targetOp)))
205 return failure();
206 if (enableCores && !targetOp.getOps<CoreOp>().empty() &&
207 failed(ctl.addCoreEnable(targetOp)))
208 return failure();
209 return success();
210}
211
212// Translate vector of TransactionBinaryOperation to a sequence of transaction
213// ops (npu.write32, npu.maskwrite32, npu.blockwrite).
214static LogicalResult
215emitTransactionOps(OpBuilder &builder,
216 std::vector<TransactionBinaryOperation> &operations,
217 std::vector<memref::GlobalOp> &global_data) {
218
219 auto loc = builder.getUnknownLoc();
220
221 // create the txn ops
222 for (auto p : llvm::zip(operations, global_data)) {
223 auto op = std::get<0>(p);
224 memref::GlobalOp payload = std::get<1>(p);
225
226 if (op.cmd.Opcode == XAie_TxnOpcode::XAIE_IO_WRITE) {
227 builder.create<AIEX::NpuWrite32Op>(loc, op.cmd.RegOff, op.cmd.Value,
228 nullptr, nullptr, nullptr);
229 } else if (op.cmd.Opcode == XAie_TxnOpcode::XAIE_IO_BLOCKWRITE) {
230 auto memref = builder.create<memref::GetGlobalOp>(loc, payload.getType(),
231 payload.getName());
232 builder.create<AIEX::NpuBlockWriteOp>(
233 loc, builder.getUI32IntegerAttr(op.cmd.RegOff), memref.getResult(),
234 nullptr, nullptr, nullptr);
235 } else if (op.cmd.Opcode == XAie_TxnOpcode::XAIE_IO_MASKWRITE) {
236 builder.create<AIEX::NpuMaskWrite32Op>(loc, op.cmd.RegOff, op.cmd.Value,
237 op.cmd.Mask, nullptr, nullptr,
238 nullptr);
239 } else {
240 llvm::errs() << "Unhandled txn opcode: " << op.cmd.Opcode << "\n";
241 return failure();
242 }
243 }
244 return success();
245}
246
247// Translate vector of TransactionBinaryOperation to a sequence of control
248// packet ops.
249static LogicalResult
250emitControlPacketOps(OpBuilder &builder,
251 std::vector<TransactionBinaryOperation> &operations,
252 std::vector<memref::GlobalOp> &global_data) {
253
254 auto loc = builder.getUnknownLoc();
255 auto ctx = builder.getContext();
256
257 // create the control packet ops
258 for (auto p : llvm::zip(operations, global_data)) {
259 auto op = std::get<0>(p);
260 memref::GlobalOp payload = std::get<1>(p);
261
262 if (op.cmd.Opcode == XAie_TxnOpcode::XAIE_IO_WRITE) {
263 builder.create<AIEX::NpuControlPacketOp>(
264 loc, builder.getUI32IntegerAttr(op.cmd.RegOff), nullptr,
265 /*opcode*/ builder.getI32IntegerAttr(0),
266 /*stream_id*/ builder.getI32IntegerAttr(0),
267 DenseI32ArrayAttr::get(ctx, ArrayRef<int32_t>(op.cmd.Value)));
268 } else if (op.cmd.Opcode == XAie_TxnOpcode::XAIE_IO_BLOCKWRITE) {
269 if (!std::get<1>(p).getInitialValue())
270 continue;
271 auto blockWriteData =
272 dyn_cast<DenseIntElementsAttr>(*std::get<1>(p).getInitialValue());
273 if (!blockWriteData) {
274 payload.emitError(
275 "Global symbol initial value is not a dense int array");
276 break;
277 }
278 auto blockWriteDataValues = blockWriteData.getValues<int32_t>();
279 // Split block write data into beats of 4 or less, in int32_t.
280 int currAddr = op.cmd.RegOff;
281 for (size_t i = 0; i < blockWriteDataValues.size(); i += 4) {
282 auto last = std::min(blockWriteDataValues.size(), i + 4);
283 SmallVector<int32_t> splitData =
284 SmallVector<int32_t>(blockWriteDataValues.begin() + i,
285 blockWriteDataValues.begin() + last);
286 builder.create<AIEX::NpuControlPacketOp>(
287 loc, builder.getUI32IntegerAttr(currAddr), nullptr,
288 /*opcode*/ builder.getI32IntegerAttr(0),
289 /*stream_id*/ builder.getI32IntegerAttr(0),
290 DenseI32ArrayAttr::get(ctx, ArrayRef<int32_t>(splitData)));
291 currAddr += splitData.size() * sizeof(int32_t);
292 }
293
294 } else if (op.cmd.Opcode == XAie_TxnOpcode::XAIE_IO_MASKWRITE) {
295 builder.create<AIEX::NpuControlPacketOp>(
296 loc, builder.getUI32IntegerAttr(op.cmd.RegOff), nullptr,
297 /*opcode*/ builder.getI32IntegerAttr(0),
298 /*stream_id*/ builder.getI32IntegerAttr(0),
299 DenseI32ArrayAttr::get(ctx, ArrayRef<int32_t>(op.cmd.Value)));
300 } else {
301 llvm::errs() << "Unhandled txn opcode: " << op.cmd.Opcode << "\n";
302 return failure();
303 }
304 }
305 return success();
306}
307
308// Perform bitwise or on consecutive control packets operating on the same
309// address, to resolve the lack of mask write in control packets.
310LogicalResult orConsecutiveWritesOnSameAddr(Block *body) {
311 SmallVector<AIEX::NpuControlPacketOp> ctrlPktOps;
312 body->walk(
313 [&](AIEX::NpuControlPacketOp cpOp) { ctrlPktOps.push_back(cpOp); });
314 if (ctrlPktOps.empty())
315 return success();
316
317 SmallVector<Operation *> erased;
318 int addrBuffer = ctrlPktOps[0].getAddress();
319 AIEX::NpuControlPacketOp ctrlPktBuffer = ctrlPktOps[0];
320 for (size_t i = 1; i < ctrlPktOps.size(); i++) {
321 int currentAddrBuffer = ctrlPktOps[i].getAddress();
322 if (addrBuffer != currentAddrBuffer) {
323 addrBuffer = currentAddrBuffer;
324 ctrlPktBuffer = ctrlPktOps[i];
325 continue;
326 }
327 auto bufferedData = ctrlPktBuffer.getData().value();
328 auto currentData = ctrlPktOps[i].getData().value();
329 SmallVector<int> newData;
330 for (unsigned j = 0; j < std::max(bufferedData.size(), currentData.size());
331 j++) {
332 if (j < std::min(bufferedData.size(), currentData.size())) {
333 newData.push_back(bufferedData[j] | currentData[j]);
334 continue;
335 }
336 newData.push_back(j < bufferedData.size() ? bufferedData[j]
337 : currentData[j]);
338 }
339 ctrlPktBuffer.getProperties().data = DenseI32ArrayAttr::get(
340 ctrlPktBuffer->getContext(), ArrayRef<int>{newData});
341 erased.push_back(ctrlPktOps[i]);
342 }
343
344 for (auto e : erased)
345 e->erase();
346
347 return success();
348}
349
350// an enum to represent the output type of the transaction binary
355
356static LogicalResult convertTransactionOpsToMLIR(
357 OpBuilder builder, AIE::DeviceOp device, OutputType outputType,
358 std::vector<TransactionBinaryOperation> &operations) {
359
360 auto loc = builder.getUnknownLoc();
361
362 // for each blockwrite in the binary, create a GlobalOp with the data
363 std::vector<memref::GlobalOp> global_data;
364 for (auto &op : operations) {
365 if (op.cmd.Opcode != XAIE_IO_BLOCKWRITE) {
366 global_data.push_back(nullptr);
367 continue;
368 }
369 uint32_t size = op.cmd.Size / 4;
370 const uint32_t *d = reinterpret_cast<const uint32_t *>(op.cmd.DataPtr);
371 std::vector<uint32_t> data32(d, d + size);
372
373 int id = 0;
374 std::string name = "blockwrite_data";
375 while (device.lookupSymbol(name))
376 name = "blockwrite_data_" + std::to_string(id++);
377
378 MemRefType memrefType = MemRefType::get({size}, builder.getI32Type());
379 TensorType tensorType = RankedTensorType::get({size}, builder.getI32Type());
380 auto global = builder.create<memref::GlobalOp>(
381 loc, name, builder.getStringAttr("private"), memrefType,
382 DenseElementsAttr::get<uint32_t>(tensorType, data32), true, nullptr);
383 global_data.push_back(global);
384 }
385
386 // create aiex.runtime_sequence
387 int id = 0;
388 std::string seq_name = "configure";
389 while (device.lookupSymbol(seq_name))
390 seq_name = "configure" + std::to_string(id++);
391 StringAttr seq_sym_name = builder.getStringAttr(seq_name);
392 auto seq = builder.create<AIEX::RuntimeSequenceOp>(loc, seq_sym_name);
393 seq.getBody().push_back(new Block);
394
395 // create the txn ops
396 builder.setInsertionPointToStart(&seq.getBody().front());
397 if (outputType == OutputType::Transaction) {
398 if (failed(emitTransactionOps(builder, operations, global_data)))
399 return failure();
400 } else if (outputType == OutputType::ControlPacket) {
401 if (failed(emitControlPacketOps(builder, operations, global_data)))
402 return failure();
403 // resolve mask writes; control packet doesn't natively support mask write.
404 if (failed(orConsecutiveWritesOnSameAddr(&seq.getBody().front())))
405 return failure();
406 } else {
407 llvm_unreachable("bad output type");
408 }
409
410 return success();
411}
412
413// Convert (disassemble) a transaction binary to MLIR. On success return a new
414// ModuleOp containing a DeviceOp containing a runtime sequence with the
415// transaction binary encoded as a sequence of npu.write32, npu.maskwrite32 and
416// npu.blockwrite operations. On failure return std::nullopt.
417std::optional<mlir::ModuleOp>
419 std::vector<uint8_t> &binary) {
420
421 // parse the binary
422 std::vector<TransactionBinaryOperation> operations;
423 auto c = parseTransactionBinary(binary, operations);
424 if (!c) {
425 llvm::errs() << "Failed to parse binary\n";
426 return std::nullopt;
427 }
428 int columns = *c;
429
430 auto loc = mlir::UnknownLoc::get(ctx);
431
432 // create a new ModuleOp and set the insertion point
433 auto module = ModuleOp::create(loc);
434 OpBuilder builder(module.getBodyRegion());
435 builder.setInsertionPointToStart(module.getBody());
436
437 // create aie.device
438 std::vector<AIEDevice> devices{AIEDevice::npu1_1col, AIEDevice::npu1_2col,
439 AIEDevice::npu1_3col, AIEDevice::npu1};
440 auto device = builder.create<DeviceOp>(loc, devices[columns - 1]);
441 device.getRegion().emplaceBlock();
442 DeviceOp::ensureTerminator(device.getBodyRegion(), builder, loc);
443 builder.setInsertionPointToStart(device.getBody());
444
445 // convert the parsed ops to MLIR
446 if (failed(convertTransactionOpsToMLIR(builder, device,
447 OutputType::Transaction, operations)))
448 return std::nullopt;
449
450 return module;
451}
452
453static LogicalResult convertAIEToConfiguration(AIE::DeviceOp device,
454 StringRef clElfDir,
455 OutputType outputType) {
456
457 const AIETargetModel &targetModel =
458 (const AIETargetModel &)device.getTargetModel();
459
460 if (!targetModel.hasProperty(AIETargetModel::IsNPU))
461 return failure();
462
463 bool aieSim = false;
464 bool xaieDebug = false;
465
466 AIERTControl ctl(targetModel);
467 if (failed(ctl.setIOBackend(aieSim, xaieDebug)))
468 return failure();
469
470 // start collecting transations
471 ctl.startTransaction();
472
473 bool generateElfs = clElfDir.size() > 0;
474 if (failed(generateTransactions(ctl, clElfDir, device, aieSim, generateElfs,
475 true, true)))
476 return failure();
477
478 // Export the transactions to a binary buffer
479 std::vector<uint8_t> txn_data = ctl.exportSerializedTransaction();
480
481 // parse the binary data
482 std::vector<TransactionBinaryOperation> operations;
483 if (!parseTransactionBinary(txn_data, operations)) {
484 llvm::errs() << "Failed to parse binary\n";
485 return failure();
486 }
487
488 OpBuilder builder(device.getBodyRegion());
489
490 // convert the parsed ops to MLIR
491 if (failed(
492 convertTransactionOpsToMLIR(builder, device, outputType, operations)))
493 return failure();
494
495 return success();
496}
497
498namespace {
499
500struct ConvertAIEToTransactionPass
501 : ConvertAIEToTransactionBase<ConvertAIEToTransactionPass> {
502 void getDependentDialects(DialectRegistry &registry) const override {
503 registry.insert<memref::MemRefDialect, AIEX::AIEXDialect>();
504 }
505 void runOnOperation() override {
506 if (failed(convertAIEToConfiguration(getOperation(), clElfDir,
508 return signalPassFailure();
509 }
510};
511
512struct ConvertAIEToControlPacketsPass
513 : public ConvertAIEToControlPacketsBase<ConvertAIEToControlPacketsPass> {
514 void getDependentDialects(DialectRegistry &registry) const override {
515 registry.insert<memref::MemRefDialect, AIEX::AIEXDialect>();
516 }
517 void runOnOperation() override {
518 if (failed(convertAIEToConfiguration(getOperation(), clElfDir,
520 return signalPassFailure();
521 }
522};
523
524} // end anonymous namespace
525
526std::unique_ptr<mlir::OperationPass<xilinx::AIE::DeviceOp>>
528 return std::make_unique<ConvertAIEToTransactionPass>();
529}
530
531std::unique_ptr<mlir::OperationPass<xilinx::AIE::DeviceOp>>
533 return std::make_unique<ConvertAIEToControlPacketsPass>();
534}
XAie_TxnOpcode
@ XAIE_IO_BLOCKWRITE
@ XAIE_IO_MASKWRITE
@ XAIE_IO_CUSTOM_OP_MAX
@ XAIE_IO_WRITE
LogicalResult orConsecutiveWritesOnSameAddr(Block *body)
@ ControlPacket
bool hasProperty(ModelProperty Prop) const
std::shared_ptr< Value > value()
Definition cxxopts.hpp:1026
uint8_t major
Definition cxxopts.hpp:131
uint8_t minor
Definition cxxopts.hpp:131
Include the generated interface declarations.
std::unique_ptr< mlir::OperationPass< xilinx::AIE::DeviceOp > > createConvertAIEToTransactionPass()
std::optional< mlir::ModuleOp > convertTransactionBinaryToMLIR(mlir::MLIRContext *ctx, std::vector< uint8_t > &binary)
std::unique_ptr< mlir::OperationPass< xilinx::AIE::DeviceOp > > createConvertAIEToControlPacketsPass()
mlir::LogicalResult addAieElfs(DeviceOp &targetOp, const mlir::StringRef workDirPath, bool aieSim)
Definition AIERT.cpp:846
mlir::LogicalResult setIOBackend(bool aieSim, bool xaieDebug)
Definition AIERT.cpp:255
std::vector< uint8_t > exportSerializedTransaction()
Definition AIERT.cpp:886
mlir::LogicalResult addCoreEnable(DeviceOp &targetOp)
Definition AIERT.cpp:813
mlir::LogicalResult addInitConfig(DeviceOp &targetOp)
Definition AIERT.cpp:741