MLIR-AIE
AIEToConfiguration.cpp
Go to the documentation of this file.
1//===- AIEToConfiguration.h -------------------------------------*- C++ -*-===//
2//
3// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7// Copyright (C) 2024, Advanced Micro Devices, Inc. All rights reserved.
8//
9//===----------------------------------------------------------------------===//
10
11#include "../PassDetail.h"
12
14#include "aie/Targets/AIERT.h"
15
16#include "llvm/Support/Debug.h"
17
18#include <vector>
19
20#define DEBUG_TYPE "aie-convert-to-config"
21
22using namespace mlir;
23using namespace xilinx;
24using namespace xilinx::AIE;
25
26namespace {
27
28// An TransactionBinaryOperation encapulates an aie-rt TnxCmd struct
29struct TransactionBinaryOperation {
30 struct XAie_TxnCmd cmd;
31 TransactionBinaryOperation(XAie_TxnOpcode opc, uint32_t mask, uint64_t addr,
32 uint32_t value, const uint8_t *data,
33 uint32_t size) {
34 cmd.Opcode = opc;
35 cmd.Mask = mask;
36 cmd.RegOff = addr;
37 cmd.Value = value;
38 cmd.DataPtr = reinterpret_cast<uint64_t>(data);
39 cmd.Size = size;
40 }
41};
42} // namespace
43
44// Parse a TXN binary blob. On success return the number of columns from the
45// header and a vector of parsed operations. On failure return std::nullopt.
46static std::optional<int>
47parseTransactionBinary(const std::vector<uint8_t> &data,
48 std::vector<TransactionBinaryOperation> &ops) {
49
50 uint32_t major = data[0];
51 uint32_t minor = data[1];
52 uint32_t num_cols = data[4];
53
54 uint32_t num_ops, txn_size;
55 std::memcpy(&num_ops, &data[8], 4);
56 std::memcpy(&txn_size, &data[12], 4);
57
58 LLVM_DEBUG(llvm::dbgs() << "Major: " << major << "\n");
59 LLVM_DEBUG(llvm::dbgs() << "Minor: " << minor << "\n");
60 LLVM_DEBUG(llvm::dbgs() << "DevGen: " << data[2] << "\n");
61 LLVM_DEBUG(llvm::dbgs() << "NumRows: " << data[3] << "\n");
62 LLVM_DEBUG(llvm::dbgs() << "NumCols: " << num_cols << "\n");
63 LLVM_DEBUG(llvm::dbgs() << "NumMemTileRows: " << data[5] << "\n");
64 LLVM_DEBUG(llvm::dbgs() << "NumOps: " << num_ops << "\n");
65 LLVM_DEBUG(llvm::dbgs() << "TxnSize: " << txn_size << " bytes\n");
66
67 size_t i = 16;
68
69 // Convert opcode from uint8 to enum
70 auto convertOpcode = [](uint8_t opc) {
71 switch (opc) {
72 case 0:
73 return XAie_TxnOpcode::XAIE_IO_WRITE;
74 case 1:
75 return XAie_TxnOpcode::XAIE_IO_BLOCKWRITE;
76 case 3:
77 return XAie_TxnOpcode::XAIE_IO_MASKWRITE;
78 default:
79 llvm::errs() << "Unhandled opcode: " << std::to_string(opc) << "\n";
80 return XAie_TxnOpcode::XAIE_IO_CUSTOM_OP_MAX;
81 }
82 };
83
84 // Parse the binary blob. There are two versions supported, 0.1 and 1.0.
85 // For both versions, build a list of TransactionBinaryOperation objects
86 // representing the parsed operations.
87 if (major == 0 && minor == 1) {
88 while (i < data.size()) {
89
90 XAie_TxnOpcode opc = convertOpcode(data[i]);
91 LLVM_DEBUG(llvm::dbgs() << "opcode: " + std::to_string(opc) + "\n");
92
93 uint64_t addr = 0;
94 uint32_t value = 0;
95 uint32_t size = 0;
96 uint32_t mask = 0;
97 const uint8_t *data_ptr = nullptr;
98
99 if (opc == XAie_TxnOpcode::XAIE_IO_WRITE) {
100 LLVM_DEBUG(llvm::dbgs() << "opcode: WRITE (0x00)\n");
101 uint32_t addr0, addr1;
102 std::memcpy(&addr0, &data[i + 8], 4);
103 std::memcpy(&addr1, &data[i + 12], 4);
104 std::memcpy(&value, &data[i + 16], 4);
105 std::memcpy(&size, &data[i + 20], 4);
106 addr = static_cast<uint64_t>(addr1) << 32 | addr0;
107 i += size;
108 } else if (opc == XAie_TxnOpcode::XAIE_IO_BLOCKWRITE) {
109 LLVM_DEBUG(llvm::dbgs() << "opcode: BLOCKWRITE (0x01)\n");
110 std::memcpy(&addr, &data[i + 8], 4);
111 std::memcpy(&size, &data[i + 12], 4);
112 data_ptr = data.data() + i + 16;
113 i += size;
114 size = size - 16;
115 } else if (opc == XAie_TxnOpcode::XAIE_IO_MASKWRITE) {
116 LLVM_DEBUG(llvm::dbgs() << "opcode: MASKWRITE (0x03)\n");
117 uint32_t addr0, addr1;
118 std::memcpy(&addr0, &data[i + 8], 4);
119 std::memcpy(&addr1, &data[i + 12], 4);
120 std::memcpy(&value, &data[i + 16], 4);
121 std::memcpy(&mask, &data[i + 20], 4);
122 std::memcpy(&size, &data[i + 24], 4);
123 addr = static_cast<uint64_t>(addr1) << 32 | addr0;
124 i += size;
125 } else {
126 llvm::errs() << "Unhandled opcode: " << std::to_string(opc) << "\n";
127 return std::nullopt;
128 }
129 ops.emplace_back(opc, mask, addr, value, data_ptr, size);
130 LLVM_DEBUG(llvm::dbgs() << "addr: " << addr << "\n");
131 LLVM_DEBUG(llvm::dbgs() << "value: " << value << "\n");
132 LLVM_DEBUG(llvm::dbgs() << "size: " << size << "\n");
133 LLVM_DEBUG(llvm::dbgs() << "mask: " << mask << "\n");
134 LLVM_DEBUG(llvm::dbgs()
135 << "data: " << reinterpret_cast<uintptr_t>(data_ptr) << "\n");
136 }
137 } else if (major == 1 && minor == 0) {
138 while (i < data.size()) {
139
140 XAie_TxnOpcode opc = convertOpcode(data[i]);
141 LLVM_DEBUG(llvm::dbgs() << "opcode: " + std::to_string(opc) + "\n");
142
143 uint64_t addr = 0;
144 uint32_t value = 0;
145 uint32_t size = 0;
146 uint32_t mask = 0;
147 const uint8_t *data_ptr = nullptr;
148
149 if (opc == XAie_TxnOpcode::XAIE_IO_WRITE) {
150 LLVM_DEBUG(llvm::dbgs() << "opcode: WRITE (0x00)\n");
151 std::memcpy(&addr, &data[i + 4], 4);
152 std::memcpy(&value, &data[i + 8], 4);
153 i += 12;
154 } else if (opc == XAie_TxnOpcode::XAIE_IO_BLOCKWRITE) {
155 LLVM_DEBUG(llvm::dbgs() << "opcode: BLOCKWRITE (0x01)\n");
156 std::memcpy(&addr, &data[i + 4], 4);
157 std::memcpy(&size, &data[i + 8], 4);
158 data_ptr = data.data() + i + 12;
159 i += size;
160 size = size - 12;
161 } else if (opc == XAie_TxnOpcode::XAIE_IO_MASKWRITE) {
162 LLVM_DEBUG(llvm::dbgs() << "opcode: MASKWRITE (0x03)\n");
163 std::memcpy(&addr, &data[i + 4], 4);
164 std::memcpy(&value, &data[i + 8], 4);
165 std::memcpy(&mask, &data[i + 12], 4);
166 i += 16;
167 } else {
168 llvm::errs() << "Unhandled opcode: " << std::to_string(opc) << "\n";
169 return std::nullopt;
170 }
171 LLVM_DEBUG(llvm::dbgs() << "addr: " << addr << "\n");
172 LLVM_DEBUG(llvm::dbgs() << "value: " << value << "\n");
173 LLVM_DEBUG(llvm::dbgs() << "size: " << size << "\n");
174 LLVM_DEBUG(llvm::dbgs() << "mask: " << mask << "\n");
175 LLVM_DEBUG(llvm::dbgs()
176 << "data: " << reinterpret_cast<uintptr_t>(data_ptr) << "\n");
177 ops.emplace_back(opc, mask, addr, value, data_ptr, size);
178 }
179 } else {
180 llvm::errs() << "Unsupported TXN binary version: " << major << "." << minor
181 << "\n";
182 return std::nullopt;
183 }
184
185 return num_cols;
186}
187
188static LogicalResult generateTransactions(AIERTControl &ctl,
189 const StringRef workDirPath,
190 DeviceOp &targetOp, bool aieSim,
191 bool enableElfs, bool enableInit,
192 bool enableCores) {
193 if (enableElfs && !targetOp.getOps<CoreOp>().empty() &&
194 failed(ctl.addAieElfs(targetOp, workDirPath, aieSim)))
195 return failure();
196 if (enableInit && failed(ctl.addInitConfig(targetOp)))
197 return failure();
198 if (enableCores && !targetOp.getOps<CoreOp>().empty() &&
199 failed(ctl.addCoreEnable(targetOp)))
200 return failure();
201 return success();
202}
203
204// Translate vector of TransactionBinaryOperation to a sequence of transaction
205// ops (npu.write32, npu.maskwrite32, npu.blockwrite).
206static LogicalResult
207emitTransactionOps(OpBuilder &builder,
208 std::vector<TransactionBinaryOperation> &operations,
209 std::vector<memref::GlobalOp> &global_data) {
210
211 auto loc = builder.getUnknownLoc();
212
213 // create the txn ops
214 for (auto p : llvm::zip(operations, global_data)) {
215 auto op = std::get<0>(p);
216 memref::GlobalOp payload = std::get<1>(p);
217
218 if (op.cmd.Opcode == XAie_TxnOpcode::XAIE_IO_WRITE) {
219 builder.create<AIEX::NpuWrite32Op>(loc, op.cmd.RegOff, op.cmd.Value,
220 nullptr, nullptr, nullptr);
221 } else if (op.cmd.Opcode == XAie_TxnOpcode::XAIE_IO_BLOCKWRITE) {
222 auto memref = builder.create<memref::GetGlobalOp>(loc, payload.getType(),
223 payload.getName());
224 builder.create<AIEX::NpuBlockWriteOp>(
225 loc, builder.getUI32IntegerAttr(op.cmd.RegOff), memref.getResult(),
226 nullptr, nullptr, nullptr);
227 } else if (op.cmd.Opcode == XAie_TxnOpcode::XAIE_IO_MASKWRITE) {
228 builder.create<AIEX::NpuMaskWrite32Op>(loc, op.cmd.RegOff, op.cmd.Value,
229 op.cmd.Mask, nullptr, nullptr,
230 nullptr);
231 } else {
232 llvm::errs() << "Unhandled txn opcode: " << op.cmd.Opcode << "\n";
233 return failure();
234 }
235 }
236 return success();
237}
238
239// Translate vector of TransactionBinaryOperation to a sequence of control
240// packet ops.
241static LogicalResult
242emitControlPacketOps(OpBuilder &builder,
243 std::vector<TransactionBinaryOperation> &operations,
244 std::vector<memref::GlobalOp> &global_data) {
245
246 auto loc = builder.getUnknownLoc();
247 auto ctx = builder.getContext();
248
249 // create the control packet ops
250 for (auto p : llvm::zip(operations, global_data)) {
251 auto op = std::get<0>(p);
252 memref::GlobalOp payload = std::get<1>(p);
253
254 if (op.cmd.Opcode == XAie_TxnOpcode::XAIE_IO_WRITE) {
255 builder.create<AIEX::NpuControlPacketOp>(
256 loc, builder.getUI32IntegerAttr(op.cmd.RegOff), nullptr,
257 /*opcode*/ builder.getI32IntegerAttr(0),
258 /*stream_id*/ builder.getI32IntegerAttr(0),
259 DenseI32ArrayAttr::get(ctx, ArrayRef<int32_t>(op.cmd.Value)));
260 } else if (op.cmd.Opcode == XAie_TxnOpcode::XAIE_IO_BLOCKWRITE) {
261 if (!std::get<1>(p).getInitialValue())
262 continue;
263 auto blockWriteData =
264 dyn_cast<DenseIntElementsAttr>(*std::get<1>(p).getInitialValue());
265 if (!blockWriteData) {
266 payload.emitError(
267 "Global symbol initial value is not a dense int array");
268 break;
269 }
270 auto blockWriteDataValues = blockWriteData.getValues<int32_t>();
271 // Split block write data into beats of 4 or less, in int32_t.
272 int currAddr = op.cmd.RegOff;
273 for (size_t i = 0; i < blockWriteDataValues.size(); i += 4) {
274 auto last = std::min(blockWriteDataValues.size(), i + 4);
275 SmallVector<int32_t> splitData =
276 SmallVector<int32_t>(blockWriteDataValues.begin() + i,
277 blockWriteDataValues.begin() + last);
278 builder.create<AIEX::NpuControlPacketOp>(
279 loc, builder.getUI32IntegerAttr(currAddr), nullptr,
280 /*opcode*/ builder.getI32IntegerAttr(0),
281 /*stream_id*/ builder.getI32IntegerAttr(0),
282 DenseI32ArrayAttr::get(ctx, ArrayRef<int32_t>(splitData)));
283 currAddr += splitData.size() * sizeof(int32_t);
284 }
285
286 } else if (op.cmd.Opcode == XAie_TxnOpcode::XAIE_IO_MASKWRITE) {
287 builder.create<AIEX::NpuControlPacketOp>(
288 loc, builder.getUI32IntegerAttr(op.cmd.RegOff), nullptr,
289 /*opcode*/ builder.getI32IntegerAttr(0),
290 /*stream_id*/ builder.getI32IntegerAttr(0),
291 DenseI32ArrayAttr::get(ctx, ArrayRef<int32_t>(op.cmd.Value)));
292 } else {
293 llvm::errs() << "Unhandled txn opcode: " << op.cmd.Opcode << "\n";
294 return failure();
295 }
296 }
297 return success();
298}
299
300// Perform bitwise or on consecutive control packets operating on the same
301// address, to resolve the lack of mask write in control packets.
302LogicalResult orConsecutiveWritesOnSameAddr(Block *body) {
303 SmallVector<AIEX::NpuControlPacketOp> ctrlPktOps;
304 body->walk(
305 [&](AIEX::NpuControlPacketOp cpOp) { ctrlPktOps.push_back(cpOp); });
306 if (ctrlPktOps.empty())
307 return success();
308
309 SmallVector<Operation *> erased;
310 int addrBuffer = ctrlPktOps[0].getAddress();
311 AIEX::NpuControlPacketOp ctrlPktBuffer = ctrlPktOps[0];
312 for (size_t i = 1; i < ctrlPktOps.size(); i++) {
313 int currentAddrBuffer = ctrlPktOps[i].getAddress();
314 if (addrBuffer != currentAddrBuffer) {
315 addrBuffer = currentAddrBuffer;
316 ctrlPktBuffer = ctrlPktOps[i];
317 continue;
318 }
319 auto bufferedData = ctrlPktBuffer.getData().value();
320 auto currentData = ctrlPktOps[i].getData().value();
321 SmallVector<int> newData;
322 for (unsigned j = 0; j < std::max(bufferedData.size(), currentData.size());
323 j++) {
324 if (j < std::min(bufferedData.size(), currentData.size())) {
325 newData.push_back(bufferedData[j] | currentData[j]);
326 continue;
327 }
328 newData.push_back(j < bufferedData.size() ? bufferedData[j]
329 : currentData[j]);
330 }
331 ctrlPktBuffer.getProperties().data = DenseI32ArrayAttr::get(
332 ctrlPktBuffer->getContext(), ArrayRef<int>{newData});
333 erased.push_back(ctrlPktOps[i]);
334 }
335
336 for (auto e : erased)
337 e->erase();
338
339 return success();
340}
341
342// an enum to represent the output type of the transaction binary
347
348static LogicalResult convertTransactionOpsToMLIR(
349 OpBuilder builder, AIE::DeviceOp device, OutputType outputType,
350 std::vector<TransactionBinaryOperation> &operations) {
351
352 auto loc = builder.getUnknownLoc();
353
354 // for each blockwrite in the binary, create a GlobalOp with the data
355 std::vector<memref::GlobalOp> global_data;
356 for (auto &op : operations) {
357 if (op.cmd.Opcode != XAIE_IO_BLOCKWRITE) {
358 global_data.push_back(nullptr);
359 continue;
360 }
361 uint32_t size = op.cmd.Size / 4;
362 const uint32_t *d = reinterpret_cast<const uint32_t *>(op.cmd.DataPtr);
363 std::vector<uint32_t> data32(d, d + size);
364
365 int id = 0;
366 std::string name = "blockwrite_data";
367 while (device.lookupSymbol(name))
368 name = "blockwrite_data_" + std::to_string(id++);
369
370 MemRefType memrefType = MemRefType::get({size}, builder.getI32Type());
371 TensorType tensorType = RankedTensorType::get({size}, builder.getI32Type());
372 auto global = builder.create<memref::GlobalOp>(
373 loc, name, builder.getStringAttr("private"), memrefType,
374 DenseElementsAttr::get<uint32_t>(tensorType, data32), true, nullptr);
375 global_data.push_back(global);
376 }
377
378 // create aiex.runtime_sequence
379 int id = 0;
380 std::string seq_name = "configure";
381 while (device.lookupSymbol(seq_name))
382 seq_name = "configure" + std::to_string(id++);
383 StringAttr seq_sym_name = builder.getStringAttr(seq_name);
384 auto seq = builder.create<AIEX::RuntimeSequenceOp>(loc, seq_sym_name);
385 seq.getBody().push_back(new Block);
386
387 // create the txn ops
388 builder.setInsertionPointToStart(&seq.getBody().front());
389 if (outputType == OutputType::Transaction) {
390 if (failed(emitTransactionOps(builder, operations, global_data)))
391 return failure();
392 } else if (outputType == OutputType::ControlPacket) {
393 if (failed(emitControlPacketOps(builder, operations, global_data)))
394 return failure();
395 // resolve mask writes; control packet doesn't natively support mask write.
396 if (failed(orConsecutiveWritesOnSameAddr(&seq.getBody().front())))
397 return failure();
398 } else {
399 llvm_unreachable("bad output type");
400 }
401
402 return success();
403}
404
405// Convert (disassemble) a transaction binary to MLIR. On success return a new
406// ModuleOp containing a DeviceOp containing a runtime sequence with the
407// transaction binary encoded as a sequence of npu.write32, npu.maskwrite32 and
408// npu.blockwrite operations. On failure return std::nullopt.
409std::optional<mlir::ModuleOp>
411 std::vector<uint8_t> &binary) {
412
413 // parse the binary
414 std::vector<TransactionBinaryOperation> operations;
415 auto c = parseTransactionBinary(binary, operations);
416 if (!c) {
417 llvm::errs() << "Failed to parse binary\n";
418 return std::nullopt;
419 }
420 int columns = *c;
421
422 auto loc = mlir::UnknownLoc::get(ctx);
423
424 // create a new ModuleOp and set the insertion point
425 auto module = ModuleOp::create(loc);
426 OpBuilder builder(module.getBodyRegion());
427 builder.setInsertionPointToStart(module.getBody());
428
429 // create aie.device
430 std::vector<AIEDevice> devices{AIEDevice::npu1_1col, AIEDevice::npu1_2col,
431 AIEDevice::npu1_3col, AIEDevice::npu1_4col,
432 AIEDevice::npu1};
433 auto device = builder.create<DeviceOp>(loc, devices[columns - 1]);
434 device.getRegion().emplaceBlock();
435 DeviceOp::ensureTerminator(device.getBodyRegion(), builder, loc);
436 builder.setInsertionPointToStart(device.getBody());
437
438 // convert the parsed ops to MLIR
439 if (failed(convertTransactionOpsToMLIR(builder, device,
440 OutputType::Transaction, operations)))
441 return std::nullopt;
442
443 return module;
444}
445
446static LogicalResult convertAIEToConfiguration(AIE::DeviceOp device,
447 StringRef clElfDir,
448 OutputType outputType) {
449
450 const BaseNPUTargetModel &targetModel =
451 (const BaseNPUTargetModel &)device.getTargetModel();
452
453 if (!targetModel.hasProperty(AIETargetModel::IsNPU))
454 return failure();
455
456 bool aieSim = false;
457 bool xaieDebug = false;
458
459 AIERTControl ctl(targetModel);
460 if (failed(ctl.setIOBackend(aieSim, xaieDebug)))
461 return failure();
462
463 // start collecting transations
464 XAie_StartTransaction(&ctl.devInst, XAIE_TRANSACTION_DISABLE_AUTO_FLUSH);
465
466 bool generateElfs = clElfDir.size() > 0;
467 if (failed(generateTransactions(ctl, clElfDir, device, aieSim, generateElfs,
468 true, true)))
469 return failure();
470
471 // Export the transactions to a binary buffer
472 uint8_t *txn_ptr = XAie_ExportSerializedTransaction(&ctl.devInst, 0, 0);
473 XAie_TxnHeader *hdr = (XAie_TxnHeader *)txn_ptr;
474 std::vector<uint8_t> txn_data(txn_ptr, txn_ptr + hdr->TxnSize);
475
476 // parse the binary data
477 std::vector<TransactionBinaryOperation> operations;
478 if (!parseTransactionBinary(txn_data, operations)) {
479 llvm::errs() << "Failed to parse binary\n";
480 return failure();
481 }
482
483 OpBuilder builder(device.getBodyRegion());
484
485 // convert the parsed ops to MLIR
486 if (failed(
487 convertTransactionOpsToMLIR(builder, device, outputType, operations)))
488 return failure();
489
490 return success();
491}
492
493namespace {
494
495struct ConvertAIEToTransactionPass
496 : ConvertAIEToTransactionBase<ConvertAIEToTransactionPass> {
497 void getDependentDialects(DialectRegistry &registry) const override {
498 registry.insert<memref::MemRefDialect, AIEX::AIEXDialect>();
499 }
500 void runOnOperation() override {
501 if (failed(convertAIEToConfiguration(getOperation(), clElfDir,
503 return signalPassFailure();
504 }
505};
506
507struct ConvertAIEToControlPacketsPass
508 : public ConvertAIEToControlPacketsBase<ConvertAIEToControlPacketsPass> {
509 void getDependentDialects(DialectRegistry &registry) const override {
510 registry.insert<memref::MemRefDialect, AIEX::AIEXDialect>();
511 }
512 void runOnOperation() override {
513 if (failed(convertAIEToConfiguration(getOperation(), clElfDir,
515 return signalPassFailure();
516 }
517};
518
519} // end anonymous namespace
520
521std::unique_ptr<mlir::OperationPass<xilinx::AIE::DeviceOp>>
523 return std::make_unique<ConvertAIEToTransactionPass>();
524}
525
526std::unique_ptr<mlir::OperationPass<xilinx::AIE::DeviceOp>>
528 return std::make_unique<ConvertAIEToControlPacketsPass>();
529}
LogicalResult orConsecutiveWritesOnSameAddr(Block *body)
@ ControlPacket
bool hasProperty(ModelProperty Prop) const
Include the generated interface declarations.
std::unique_ptr< mlir::OperationPass< xilinx::AIE::DeviceOp > > createConvertAIEToTransactionPass()
std::optional< mlir::ModuleOp > convertTransactionBinaryToMLIR(mlir::MLIRContext *ctx, std::vector< uint8_t > &binary)
std::unique_ptr< mlir::OperationPass< xilinx::AIE::DeviceOp > > createConvertAIEToControlPacketsPass()
mlir::LogicalResult addAieElfs(DeviceOp &targetOp, const mlir::StringRef workDirPath, bool aieSim)
Definition AIERT.cpp:685
mlir::LogicalResult setIOBackend(bool aieSim, bool xaieDebug)
Definition AIERT.cpp:109
mlir::LogicalResult addCoreEnable(DeviceOp &targetOp)
Definition AIERT.cpp:655
XAie_DevInst devInst
Definition AIERT.h:189
mlir::LogicalResult addInitConfig(DeviceOp &targetOp)
Definition AIERT.cpp:582