MLIR-AIE
AIEToConfiguration.cpp
Go to the documentation of this file.
1//===- AIEToConfiguration.h -------------------------------------*- C++ -*-===//
2//
3// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7// Copyright (C) 2024, Advanced Micro Devices, Inc. All rights reserved.
8//
9//===----------------------------------------------------------------------===//
10
11#include "../PassDetail.h"
12
16#include "aie/Targets/AIERT.h"
17
18#include "llvm/Support/Debug.h"
19#include <llvm/ADT/APInt.h>
20
21extern "C" {
22#include "xaiengine/xaiegbl_defs.h"
23// above needs to go first for u32, u64 typedefs
24#include "xaiengine/xaie_txn.h"
25}
26
27#include <cstring>
28#include <optional>
29#include <utility>
30#include <vector>
31
32namespace xilinx {
33#define GEN_PASS_DEF_CONVERTAIETOCONTROLPACKETS
34#define GEN_PASS_DEF_CONVERTAIETOTRANSACTION
35#include "aie/Conversion/Passes.h.inc"
36} // namespace xilinx
37
38#define DEBUG_TYPE "aie-convert-to-config"
39
40using namespace mlir;
41using namespace xilinx;
42using namespace xilinx::AIE;
43
44namespace {
45
46// A TransactionBinaryOperation encapsulates an aie-rt XAie_TxnCmd struct and
47// any additional metadata needed for custom operations that do not map cleanly
48// onto the core command fields.
49struct TransactionBinaryOperation {
50 struct XAie_TxnCmd cmd = {};
51
52 struct SyncPayload {
53 int32_t column;
54 int32_t row;
55 int32_t direction;
56 int32_t channel;
57 int32_t columnCount;
58 int32_t rowCount;
59 };
60
61 struct LoadPdiPayload {
62 uint32_t id;
63 uint32_t size;
64 uint64_t address;
65 };
66
67 struct AddressPatchPayload {
68 uint32_t action;
69 uint32_t addr;
70 int32_t argIdx;
71 int32_t argPlus;
72 };
73
74 std::optional<SyncPayload> sync;
75 std::optional<LoadPdiPayload> loadPdi;
76 std::optional<AddressPatchPayload> addressPatch;
77
78 TransactionBinaryOperation() = default;
79
80 TransactionBinaryOperation(XAie_TxnOpcode opc, uint32_t mask, uint64_t addr,
81 uint32_t value, const uint8_t *data,
82 uint32_t size) {
83 cmd.Opcode = opc;
84 cmd.Mask = mask;
85 cmd.RegOff = addr;
86 cmd.Value = value;
87 cmd.DataPtr = reinterpret_cast<uint64_t>(data);
88 cmd.Size = size;
89 }
90};
91
92constexpr size_t kTxnHeaderBytes = 16;
93
94struct TxnPreemptHeader {
95 uint8_t opcode;
96 uint8_t level;
97 uint16_t reserved;
98};
99
100struct TxnLoadPdiHeader {
101 uint8_t opcode;
102 uint8_t padding;
103 uint16_t id;
104 uint32_t size;
105 uint64_t address;
106};
107} // namespace
108
109// Parse a TXN binary blob. On success return the number of columns from the
110// header and a vector of parsed operations. On failure return std::nullopt.
111static std::optional<int>
112parseTransactionBinary(const std::vector<uint8_t> &data,
113 std::vector<TransactionBinaryOperation> &ops) {
114
115 if (data.size() < kTxnHeaderBytes) {
116 llvm::errs() << "Transaction binary is too small for header\n";
117 return std::nullopt;
118 }
119
120 uint32_t major = data[0];
121 uint32_t minor = data[1];
122 uint32_t num_cols = data[4];
123
124 uint32_t num_ops, txn_size;
125 std::memcpy(&num_ops, &data[8], 4);
126 std::memcpy(&txn_size, &data[12], 4);
127
128 LLVM_DEBUG(llvm::dbgs() << "Major: " << major << "\n");
129 LLVM_DEBUG(llvm::dbgs() << "Minor: " << minor << "\n");
130 LLVM_DEBUG(llvm::dbgs() << "DevGen: " << data[2] << "\n");
131 LLVM_DEBUG(llvm::dbgs() << "NumRows: " << data[3] << "\n");
132 LLVM_DEBUG(llvm::dbgs() << "NumCols: " << num_cols << "\n");
133 LLVM_DEBUG(llvm::dbgs() << "NumMemTileRows: " << data[5] << "\n");
134 LLVM_DEBUG(llvm::dbgs() << "NumOps: " << num_ops << "\n");
135 LLVM_DEBUG(llvm::dbgs() << "TxnSize: " << txn_size << " bytes\n");
136
137 size_t i = kTxnHeaderBytes;
138
139 auto requireBytes = [&](size_t offset, size_t length) -> bool {
140 if (offset + length > data.size()) {
141 llvm::errs() << "Transaction binary truncated while parsing opcode\n";
142 return false;
143 }
144 return true;
145 };
146
147 auto read32 = [&](size_t offset) -> uint32_t {
148 uint32_t value;
149 std::memcpy(&value, data.data() + offset, sizeof(uint32_t));
150 return value;
151 };
152
153 // Convert opcode from uint8 to a validated opcode byte
154 auto convertOpcode = [](uint8_t opc) -> std::optional<uint8_t> {
155 switch (opc) {
156 case static_cast<uint8_t>(XAie_TxnOpcode::XAIE_IO_WRITE):
157 case static_cast<uint8_t>(XAie_TxnOpcode::XAIE_IO_BLOCKWRITE):
158 case static_cast<uint8_t>(XAie_TxnOpcode::XAIE_IO_MASKWRITE):
159 case 0x6: // XAie_TxnOpcode::XAIE_IO_PREEMPT
160 case 0x8: // XAie_TxnOpcode::XAIE_IO_LOAD_PDI
161 case static_cast<uint8_t>(XAie_TxnOpcode::XAIE_IO_CUSTOM_OP_TCT):
162 case static_cast<uint8_t>(XAie_TxnOpcode::XAIE_IO_CUSTOM_OP_DDR_PATCH):
163 return opc;
164 default:
165 llvm::errs() << "Unhandled opcode: " << std::to_string(opc) << "\n";
166 return std::nullopt;
167 }
168 };
169
170 // Parse the binary blob. There are two versions supported, 0.1 and 1.0.
171 // For both versions, build a list of TransactionBinaryOperation objects
172 // representing the parsed operations.
173 if (major == 0 && minor == 1) {
174 while (i < data.size()) {
175 auto maybeOpcode = convertOpcode(data[i]);
176 if (!maybeOpcode)
177 return std::nullopt;
178 XAie_TxnOpcode opcode = static_cast<XAie_TxnOpcode>(*maybeOpcode);
179 LLVM_DEBUG(llvm::dbgs() << "opcode: " + std::to_string(opcode) << "\n");
180
181 TransactionBinaryOperation op;
182 op.cmd.Opcode = opcode;
183
184 switch (opcode) {
186 LLVM_DEBUG(llvm::dbgs() << "opcode: WRITE (0x00)\n");
187 if (!requireBytes(i, 24))
188 return std::nullopt;
189 uint32_t addrLo = read32(i + 8);
190 uint32_t addrHi = read32(i + 12);
191 uint32_t value = read32(i + 16);
192 uint32_t opSize = read32(i + 20);
193 if (!requireBytes(i, opSize))
194 return std::nullopt;
195 uint64_t addr = (static_cast<uint64_t>(addrHi) << 32) | addrLo;
196 op.cmd.RegOff = addr;
197 op.cmd.Value = value;
198 op.cmd.Size = 0;
199 i += opSize;
200 break;
201 }
203 LLVM_DEBUG(llvm::dbgs() << "opcode: BLOCKWRITE (0x01)\n");
204 if (!requireBytes(i, 16))
205 return std::nullopt;
206 uint32_t addr = read32(i + 8);
207 uint32_t opSize = read32(i + 12);
208 if (opSize < 16 || !requireBytes(i, opSize))
209 return std::nullopt;
210 const uint8_t *payload = data.data() + i + 16;
211 uint32_t payloadBytes = opSize - 16;
212 op.cmd.RegOff = addr;
213 op.cmd.DataPtr = reinterpret_cast<uint64_t>(payload);
214 op.cmd.Size = payloadBytes;
215 i += opSize;
216 break;
217 }
219 LLVM_DEBUG(llvm::dbgs() << "opcode: MASKWRITE (0x03)\n");
220 if (!requireBytes(i, 28))
221 return std::nullopt;
222 uint32_t addrLo = read32(i + 8);
223 uint32_t addrHi = read32(i + 12);
224 uint32_t value = read32(i + 16);
225 uint32_t mask = read32(i + 20);
226 uint32_t opSize = read32(i + 24);
227 if (!requireBytes(i, opSize))
228 return std::nullopt;
229 uint64_t addr = (static_cast<uint64_t>(addrHi) << 32) | addrLo;
230 op.cmd.RegOff = addr;
231 op.cmd.Value = value;
232 op.cmd.Mask = mask;
233 op.cmd.Size = opSize;
234 i += opSize;
235 break;
236 }
238 uint32_t opSize = read32(i + 4);
239 if (opSize < 16 || !requireBytes(i, opSize))
240 return std::nullopt;
241 uint32_t descriptor = read32(i + 8);
242 uint32_t config = read32(i + 12);
243 TransactionBinaryOperation::SyncPayload payload{
244 /*column=*/static_cast<int32_t>((descriptor >> 16) & 0xff),
245 /*row=*/static_cast<int32_t>((descriptor >> 8) & 0xff),
246 /*direction=*/static_cast<int32_t>(descriptor & 0xff),
247 /*channel=*/static_cast<int32_t>((config >> 24) & 0xff),
248 /*columnCount=*/static_cast<int32_t>((config >> 16) & 0xff),
249 /*rowCount=*/static_cast<int32_t>((config >> 8) & 0xff)};
250 op.sync = payload;
251 op.cmd.Size = opSize;
252 i += opSize;
253 break;
254 }
255 case 0x8: { // XAie_TxnOpcode::XAIE_IO_LOAD_PDI
256 LLVM_DEBUG(llvm::dbgs() << "opcode: LOAD_PDI (0x08)\n");
257 constexpr size_t opSize = sizeof(TxnLoadPdiHeader);
258 if (!requireBytes(i, opSize))
259 return std::nullopt;
260 TxnLoadPdiHeader header;
261 std::memcpy(&header, data.data() + i, opSize);
262 TransactionBinaryOperation::LoadPdiPayload payload{
263 header.id, header.size, header.address};
264 op.loadPdi = payload;
265 op.cmd.Size = opSize;
266 i += opSize;
267 break;
268 }
270 uint32_t opSize = read32(i + 4);
271 if (opSize < 44 || !requireBytes(i, opSize))
272 return std::nullopt;
273 uint32_t action = read32(i + 20);
274 uint32_t addr = read32(i + 24);
275 int32_t argIdx = static_cast<int32_t>(read32(i + 32));
276 int32_t argPlus = static_cast<int32_t>(read32(i + 40));
277 TransactionBinaryOperation::AddressPatchPayload payload{
278 action, addr, argIdx, argPlus};
279 op.addressPatch = payload;
280 op.cmd.Size = opSize;
281 i += opSize;
282 break;
283 }
284 case 0x6: { // XAie_TxnOpcode::XAIE_IO_PREEMPT
285 LLVM_DEBUG(llvm::dbgs() << "opcode: PREEMPT (0x06)\n");
286 constexpr size_t opSize = sizeof(TxnPreemptHeader);
287 if (!requireBytes(i, opSize))
288 return std::nullopt;
289 auto header =
290 reinterpret_cast<const TxnPreemptHeader *>(data.data() + i);
291 op.cmd.Value = header->level;
292 op.cmd.Size = opSize;
293 i += opSize;
294 break;
295 }
296 default:
297 llvm::errs() << "Unhandled opcode: " << std::to_string(opcode)
298 << " for v0.1 transaction\n";
299 return std::nullopt;
300 }
301
302 ops.push_back(std::move(op));
303 }
304 } else if (major == 1 && minor == 0) {
305 while (i < data.size()) {
306 auto maybeOpcode = convertOpcode(data[i]);
307 if (!maybeOpcode)
308 return std::nullopt;
309 XAie_TxnOpcode opcode = static_cast<XAie_TxnOpcode>(*maybeOpcode);
310 LLVM_DEBUG(llvm::dbgs() << "opcode: " + std::to_string(opcode) << "\n");
311
312 TransactionBinaryOperation op;
313 op.cmd.Opcode = opcode;
314
315 switch (opcode) {
317 LLVM_DEBUG(llvm::dbgs() << "opcode: WRITE (0x00)\n");
318 if (!requireBytes(i, 12))
319 return std::nullopt;
320 uint32_t addr = read32(i + 4);
321 uint32_t value = read32(i + 8);
322 op.cmd.RegOff = addr;
323 op.cmd.Value = value;
324 op.cmd.Size = 0;
325 i += 12;
326 break;
327 }
329 LLVM_DEBUG(llvm::dbgs() << "opcode: BLOCKWRITE (0x01)\n");
330 if (!requireBytes(i, 12))
331 return std::nullopt;
332 uint32_t addr = read32(i + 4);
333 uint32_t opSize = read32(i + 8);
334 if (opSize < 12 || !requireBytes(i, opSize))
335 return std::nullopt;
336 const uint8_t *payload = data.data() + i + 12;
337 uint32_t payloadBytes = opSize - 12;
338 op.cmd.RegOff = addr;
339 op.cmd.DataPtr = reinterpret_cast<uint64_t>(payload);
340 op.cmd.Size = payloadBytes;
341 i += opSize;
342 break;
343 }
345 LLVM_DEBUG(llvm::dbgs() << "opcode: MASKWRITE (0x03)\n");
346 if (!requireBytes(i, 16))
347 return std::nullopt;
348 uint32_t addr = read32(i + 4);
349 uint32_t value = read32(i + 8);
350 uint32_t mask = read32(i + 12);
351 op.cmd.RegOff = addr;
352 op.cmd.Value = value;
353 op.cmd.Mask = mask;
354 op.cmd.Size = 0;
355 i += 16;
356 break;
357 }
359 uint32_t opSize = read32(i + 4);
360 if (opSize < 16 || !requireBytes(i, opSize))
361 return std::nullopt;
362 uint32_t descriptor = read32(i + 8);
363 uint32_t config = read32(i + 12);
364 TransactionBinaryOperation::SyncPayload payload{
365 /*column=*/static_cast<int32_t>((descriptor >> 16) & 0xff),
366 /*row=*/static_cast<int32_t>((descriptor >> 8) & 0xff),
367 /*direction=*/static_cast<int32_t>(descriptor & 0xff),
368 /*channel=*/static_cast<int32_t>((config >> 24) & 0xff),
369 /*columnCount=*/static_cast<int32_t>((config >> 16) & 0xff),
370 /*rowCount=*/static_cast<int32_t>((config >> 8) & 0xff)};
371 op.sync = payload;
372 op.cmd.Size = opSize;
373 i += opSize;
374 break;
375 }
376 case 0x8: { // XAie_TxnOpcode::XAIE_IO_LOAD_PDI
377 LLVM_DEBUG(llvm::dbgs() << "opcode: LOAD_PDI (0x08)\n");
378 constexpr size_t opSize = sizeof(TxnLoadPdiHeader);
379 if (!requireBytes(i, opSize))
380 return std::nullopt;
381 TxnLoadPdiHeader header;
382 std::memcpy(&header, data.data() + i, opSize);
383 TransactionBinaryOperation::LoadPdiPayload payload{
384 header.id, header.size, header.address};
385 op.loadPdi = payload;
386 op.cmd.Size = opSize;
387 i += opSize;
388 break;
389 }
391 uint32_t opSize = read32(i + 4);
392 if (opSize < 44 || !requireBytes(i, opSize))
393 return std::nullopt;
394 uint32_t action = read32(i + 20);
395 uint32_t addr = read32(i + 24);
396 int32_t argIdx = static_cast<int32_t>(read32(i + 32));
397 int32_t argPlus = static_cast<int32_t>(read32(i + 40));
398 TransactionBinaryOperation::AddressPatchPayload payload{
399 action, addr, argIdx, argPlus};
400 op.addressPatch = payload;
401 op.cmd.Size = opSize;
402 i += opSize;
403 break;
404 }
405 case 0x6: { // XAie_TxnOpcode::XAIE_IO_PREEMPT
406 LLVM_DEBUG(llvm::dbgs() << "opcode: PREEMPT (0x06)\n");
407 constexpr size_t opSize = sizeof(TxnPreemptHeader);
408 if (!requireBytes(i, opSize))
409 return std::nullopt;
410 auto header =
411 reinterpret_cast<const TxnPreemptHeader *>(data.data() + i);
412 op.cmd.Value = header->level;
413 op.cmd.Size = opSize;
414 i += opSize;
415 break;
416 }
417 default:
418 llvm::errs() << "Unhandled opcode: " << std::to_string(opcode)
419 << " for v1.0 transaction\n";
420 return std::nullopt;
421 }
422
423 ops.push_back(std::move(op));
424 }
425 } else {
426 llvm::errs() << "Unsupported TXN binary version: " << major << "." << minor
427 << "\n";
428 return std::nullopt;
429 }
430
431 return num_cols;
432}
433
434static LogicalResult generateTransactions(AIERTControl &ctl,
435 const StringRef workDirPath,
436 DeviceOp &targetOp, bool aieSim,
437 bool enableElfs, bool enableInit,
438 bool enableCores) {
439 if (enableElfs && !targetOp.getOps<CoreOp>().empty() &&
440 failed(ctl.addAieElfs(targetOp, workDirPath, aieSim)))
441 return failure();
442 if (enableInit && failed(ctl.addInitConfig(targetOp)))
443 return failure();
444 if (enableCores && !targetOp.getOps<CoreOp>().empty() &&
445 failed(ctl.addCoreEnable(targetOp)))
446 return failure();
447 return success();
448}
449
450// Translate vector of TransactionBinaryOperation to a sequence of transaction
451// ops (npu.write32, npu.maskwrite32, npu.blockwrite).
452static LogicalResult
453emitTransactionOps(OpBuilder &builder,
454 std::vector<TransactionBinaryOperation> &operations,
455 std::vector<memref::GlobalOp> &global_data) {
456
457 auto loc = builder.getUnknownLoc();
458
459 // create the txn ops
460 for (auto [op, payload] : llvm::zip(operations, global_data)) {
461
462 if (op.cmd.Opcode == XAie_TxnOpcode::XAIE_IO_WRITE) {
463 AIEX::NpuWrite32Op::create(builder, loc, op.cmd.RegOff, op.cmd.Value,
464 nullptr, nullptr, nullptr);
465 } else if (op.cmd.Opcode == XAie_TxnOpcode::XAIE_IO_BLOCKWRITE) {
466 auto memref = memref::GetGlobalOp::create(builder, loc, payload.getType(),
467 payload.getName());
468 AIEX::NpuBlockWriteOp::create(
469 builder, loc, builder.getUI32IntegerAttr(op.cmd.RegOff),
470 memref.getResult(), nullptr, nullptr, nullptr);
471 } else if (op.cmd.Opcode == XAie_TxnOpcode::XAIE_IO_MASKWRITE) {
472 AIEX::NpuMaskWrite32Op::create(builder, loc, op.cmd.RegOff, op.cmd.Value,
473 op.cmd.Mask, nullptr, nullptr, nullptr);
474 } else if (op.cmd.Opcode == XAie_TxnOpcode::XAIE_IO_CUSTOM_OP_TCT) {
475 if (!op.sync) {
476 llvm::errs() << "Missing sync payload while emitting transaction\n";
477 return failure();
478 }
479 const TransactionBinaryOperation::SyncPayload &sync = *op.sync;
480 AIEX::NpuSyncOp::create(builder, loc,
481 builder.getI32IntegerAttr(sync.column),
482 builder.getI32IntegerAttr(sync.row),
483 builder.getI32IntegerAttr(sync.direction),
484 builder.getI32IntegerAttr(sync.channel),
485 builder.getI32IntegerAttr(sync.columnCount),
486 builder.getI32IntegerAttr(sync.rowCount));
487 } else if (op.cmd.Opcode == 0x8 /* XAie_TxnOpcode::XAIE_IO_LOAD_PDI */) {
488 if (!op.loadPdi) {
489 llvm::errs() << "Missing load_pdi payload while emitting transaction\n";
490 return failure();
491 }
492 const TransactionBinaryOperation::LoadPdiPayload &payloadInfo =
493 *op.loadPdi;
494 auto idAttr =
495 builder.getI32IntegerAttr(static_cast<int32_t>(payloadInfo.id));
496 IntegerAttr sizeAttr =
497 builder.getI32IntegerAttr(static_cast<int32_t>(payloadInfo.size));
498
499 auto ui64Ty =
500 IntegerType::get(builder.getContext(), 64, IntegerType::Unsigned);
501 IntegerAttr addressAttr =
502 IntegerAttr::get(ui64Ty, llvm::APInt(64, payloadInfo.address));
503
504 AIEX::NpuLoadPdiOp::create(builder, loc, nullptr, idAttr, sizeAttr,
505 addressAttr);
506 } else if (op.cmd.Opcode == XAie_TxnOpcode::XAIE_IO_CUSTOM_OP_DDR_PATCH) {
507 if (!op.addressPatch) {
508 llvm::errs()
509 << "Missing address_patch payload while emitting transaction\n";
510 return failure();
511 }
512 const TransactionBinaryOperation::AddressPatchPayload &patch =
513 *op.addressPatch;
514 AIEX::NpuAddressPatchOp::create(builder, loc,
515 builder.getUI32IntegerAttr(patch.addr),
516 builder.getI32IntegerAttr(patch.argIdx),
517 builder.getI32IntegerAttr(patch.argPlus));
518 } else if (op.cmd.Opcode == 0x6 /* XAie_TxnOpcode::XAIE_IO_PREEMPT */) {
519 auto ui8Ty =
520 IntegerType::get(builder.getContext(), 8, IntegerType::Unsigned);
521 auto levelAttr = IntegerAttr::get(ui8Ty, llvm::APInt(8, op.cmd.Value));
522 AIEX::NpuPreemptOp::create(builder, loc, levelAttr);
523 } else {
524 llvm::errs() << "Unhandled txn opcode: " << op.cmd.Opcode << "\n";
525 return failure();
526 }
527 }
528 return success();
529}
530
531// Translate vector of TransactionBinaryOperation to a sequence of control
532// packet ops.
533static LogicalResult
534emitControlPacketOps(OpBuilder &builder,
535 std::vector<TransactionBinaryOperation> &operations,
536 std::vector<memref::GlobalOp> &global_data) {
537
538 auto loc = builder.getUnknownLoc();
539 auto ctx = builder.getContext();
540
541 // create the control packet ops
542 for (auto [op, payload] : llvm::zip(operations, global_data)) {
543
544 if (op.cmd.Opcode == XAie_TxnOpcode::XAIE_IO_WRITE) {
545 AIEX::NpuControlPacketOp::create(
546 builder, loc, builder.getUI32IntegerAttr(op.cmd.RegOff), nullptr,
547 /*opcode*/ builder.getI32IntegerAttr(0),
548 /*stream_id*/ builder.getI32IntegerAttr(0),
549 DenseI32ArrayAttr::get(ctx, ArrayRef<int32_t>(op.cmd.Value)));
550 } else if (op.cmd.Opcode == XAie_TxnOpcode::XAIE_IO_BLOCKWRITE) {
551 if (!payload.getInitialValue())
552 continue;
553 auto blockWriteData =
554 dyn_cast<DenseIntElementsAttr>(*payload.getInitialValue());
555 if (!blockWriteData) {
556 payload.emitError(
557 "Global symbol initial value is not a dense int array");
558 break;
559 }
560 auto blockWriteDataValues = blockWriteData.getValues<int32_t>();
561 // Split block write data into beats of 4 or less, in int32_t.
562 int currAddr = op.cmd.RegOff;
563 for (size_t i = 0; i < blockWriteDataValues.size(); i += 4) {
564 auto last = std::min(blockWriteDataValues.size(), i + 4);
565 SmallVector<int32_t> splitData =
566 SmallVector<int32_t>(blockWriteDataValues.begin() + i,
567 blockWriteDataValues.begin() + last);
568 AIEX::NpuControlPacketOp::create(
569 builder, loc, builder.getUI32IntegerAttr(currAddr), nullptr,
570 /*opcode*/ builder.getI32IntegerAttr(0),
571 /*stream_id*/ builder.getI32IntegerAttr(0),
572 DenseI32ArrayAttr::get(ctx, ArrayRef<int32_t>(splitData)));
573 currAddr += splitData.size() * sizeof(int32_t);
574 }
575
576 } else if (op.cmd.Opcode == XAie_TxnOpcode::XAIE_IO_MASKWRITE) {
577 AIEX::NpuControlPacketOp::create(
578 builder, loc, builder.getUI32IntegerAttr(op.cmd.RegOff), nullptr,
579 /*opcode*/ builder.getI32IntegerAttr(0),
580 /*stream_id*/ builder.getI32IntegerAttr(0),
581 DenseI32ArrayAttr::get(ctx, ArrayRef<int32_t>(op.cmd.Value)));
582 } else {
583 llvm::errs() << "Unhandled txn opcode: " << op.cmd.Opcode << "\n";
584 return failure();
585 }
586 }
587 return success();
588}
589
590// Perform bitwise or on consecutive control packets operating on the same
591// address, to resolve the lack of mask write in control packets.
592LogicalResult orConsecutiveWritesOnSameAddr(Block *body) {
593 SmallVector<AIEX::NpuControlPacketOp> ctrlPktOps;
594 body->walk(
595 [&](AIEX::NpuControlPacketOp cpOp) { ctrlPktOps.push_back(cpOp); });
596 if (ctrlPktOps.empty())
597 return success();
598
599 SmallVector<Operation *> erased;
600 int addrBuffer = ctrlPktOps[0].getAddress();
601 AIEX::NpuControlPacketOp ctrlPktBuffer = ctrlPktOps[0];
602 for (size_t i = 1; i < ctrlPktOps.size(); i++) {
603 int currentAddrBuffer = ctrlPktOps[i].getAddress();
604 if (addrBuffer != currentAddrBuffer) {
605 addrBuffer = currentAddrBuffer;
606 ctrlPktBuffer = ctrlPktOps[i];
607 continue;
608 }
609 auto bufferedData = ctrlPktBuffer.getData().value();
610 auto currentData = ctrlPktOps[i].getData().value();
611 SmallVector<int> newData;
612 for (unsigned j = 0; j < std::max(bufferedData.size(), currentData.size());
613 j++) {
614 if (j < std::min(bufferedData.size(), currentData.size())) {
615 newData.push_back(bufferedData[j] | currentData[j]);
616 continue;
617 }
618 newData.push_back(j < bufferedData.size() ? bufferedData[j]
619 : currentData[j]);
620 }
621 ctrlPktBuffer.getProperties().data = DenseI32ArrayAttr::get(
622 ctrlPktBuffer->getContext(), ArrayRef<int>{newData});
623 erased.push_back(ctrlPktOps[i]);
624 }
625
626 for (auto e : erased)
627 e->erase();
628
629 return success();
630}
631
632// Take transaction operations and insert them at the _current_ insertion point
633// of the supplied builder.
634static LogicalResult convertTransactionOpsToMLIR(
635 OpBuilder builder, AIE::AIEToConfigurationOutputType outputType,
636 std::vector<TransactionBinaryOperation> &operations,
637 std::string blockwrite_prefix = "config_blockwrite_data_") {
638
639 auto loc = builder.getUnknownLoc();
640
641 // for each blockwrite in the binary, create a GlobalOp with the data at the
642 // device level
643 std::vector<memref::GlobalOp> global_data;
644 {
645 DeviceOp device =
646 llvm::dyn_cast<DeviceOp>(builder.getBlock()->getParentOp());
647 if (!device) {
648 device = builder.getBlock()->getParentOp()->getParentOfType<DeviceOp>();
649 }
650 OpBuilder::InsertionGuard guard(builder);
651 builder.setInsertionPointToStart(device.getBody());
652 int id = 0;
653 for (auto &op : operations) {
654 if (op.cmd.Opcode != XAIE_IO_BLOCKWRITE) {
655 global_data.push_back(nullptr);
656 continue;
657 }
658 uint32_t size = op.cmd.Size / 4;
659 const uint32_t *d = reinterpret_cast<const uint32_t *>(op.cmd.DataPtr);
660 std::vector<uint32_t> data32(d, d + size);
661
662 std::string name = blockwrite_prefix;
663 do {
664 name = blockwrite_prefix + std::to_string(id++);
665 } while (device.lookupSymbol(name));
666
667 MemRefType memrefType = MemRefType::get({size}, builder.getI32Type());
668 TensorType tensorType =
669 RankedTensorType::get({size}, builder.getI32Type());
670 auto global = memref::GlobalOp::create(
671 builder, loc, name, builder.getStringAttr("private"), memrefType,
672 DenseElementsAttr::get<uint32_t>(tensorType, data32), true, nullptr);
673 global_data.push_back(global);
674 }
675 }
676
677 // create the txn ops
678 if (outputType == AIE::AIEToConfigurationOutputType::Transaction) {
679 if (failed(emitTransactionOps(builder, operations, global_data)))
680 return failure();
681 } else if (outputType == AIE::AIEToConfigurationOutputType::ControlPacket) {
682 if (failed(emitControlPacketOps(builder, operations, global_data)))
683 return failure();
684 // resolve mask writes; control packet doesn't natively support mask write.
685 if (failed(orConsecutiveWritesOnSameAddr(builder.getBlock())))
686 return failure();
687 } else {
688 llvm_unreachable("bad output type");
689 }
690
691 return success();
692}
693
694// Convert (disassemble) a transaction binary to MLIR. On success return a new
695// ModuleOp containing a DeviceOp containing a runtime sequence with the
696// transaction binary encoded as a sequence of npu.write32, npu.maskwrite32 and
697// npu.blockwrite operations. On failure return std::nullopt.
698std::optional<mlir::ModuleOp>
700 std::vector<uint8_t> &binary) {
701
702 // parse the binary
703 std::vector<TransactionBinaryOperation> operations;
704 auto c = parseTransactionBinary(binary, operations);
705 if (!c) {
706 llvm::errs() << "Failed to parse binary\n";
707 return std::nullopt;
708 }
709 int columns = *c;
710
711 auto loc = mlir::UnknownLoc::get(ctx);
712
713 // create a new ModuleOp and set the insertion point
714 auto module = ModuleOp::create(loc);
715 OpBuilder builder(module.getBodyRegion());
716 builder.setInsertionPointToStart(module.getBody());
717
718 // create aie.device
719 std::vector<AIEDevice> devices{AIEDevice::npu1_1col, AIEDevice::npu1_2col,
720 AIEDevice::npu1_3col, AIEDevice::npu1};
721 auto device = DeviceOp::create(builder, loc, devices[columns - 1],
722 DeviceOp::getDefaultDeviceName());
723 device.getRegion().emplaceBlock();
724 DeviceOp::ensureTerminator(device.getBodyRegion(), builder, loc);
725 builder.setInsertionPointToStart(device.getBody());
726
727 // convert the parsed ops to MLIR
728 if (failed(convertTransactionOpsToMLIR(
729 builder, AIE::AIEToConfigurationOutputType::Transaction, operations)))
730 return std::nullopt;
731
732 return module;
733}
734
736 OpBuilder &builder, xilinx::AIE::DeviceOp device, llvm::StringRef clElfDir,
738 std::string blockwrite_prefix) {
739 const AIETargetModel &targetModel =
740 (const AIETargetModel &)device.getTargetModel();
741
742 if (!targetModel.hasProperty(AIETargetModel::IsNPU))
743 return failure();
744
745 bool aieSim = false;
746 bool xaieDebug = false;
747
748 AIERTControl ctl(targetModel);
749 if (failed(ctl.setIOBackend(aieSim, xaieDebug)))
750 return failure();
751
752 // start collecting transactions
753 ctl.startTransaction();
754
755 bool generateElfs = true;
756 if (failed(generateTransactions(ctl, clElfDir, device, aieSim, generateElfs,
757 true, true)))
758 return failure();
759
760 // Export the transactions to a binary buffer
761 std::vector<uint8_t> txn_data = ctl.exportSerializedTransaction();
762
763 // parse the binary data
764 std::vector<TransactionBinaryOperation> operations;
765 if (!parseTransactionBinary(txn_data, operations)) {
766 llvm::errs() << "Failed to parse binary\n";
767 return failure();
768 }
769
770 if (failed(convertTransactionOpsToMLIR(builder, outputType, operations,
771 blockwrite_prefix))) {
772 return failure();
773 }
774
775 return success();
776}
777
778static LogicalResult
779convertAIEToConfiguration(AIE::DeviceOp device, StringRef clElfDir,
781
782 OpBuilder builder(device.getBodyRegion());
783 // search for aiex.configure ops in runtime sequences by walking the device
784 // and collect them in a vector. If there are none, create a new runtime
785 // sequence. Otherwise assume the insertion point is the first
786 // aiex.configure op.
787 auto loc = builder.getUnknownLoc();
788 SmallVector<AIEX::ConfigureOp> configureOps;
789 device.walk([&](AIEX::ConfigureOp op) { configureOps.push_back(op); });
790
791 if (configureOps.empty()) {
792 // create aiex.runtime_sequence
793 int id = 0;
794 std::string seq_name = "configure";
795 while (device.lookupSymbol(seq_name))
796 seq_name = "configure" + std::to_string(id++);
797 StringAttr seq_sym_name = builder.getStringAttr(seq_name);
798 auto seq = AIE::RuntimeSequenceOp::create(builder, loc, seq_sym_name);
799 seq.getBody().push_back(new Block);
800 builder.setInsertionPointToStart(&seq.getBody().front());
801 } else {
802 builder.setInsertionPoint(configureOps.front());
803 }
804
805 // convert the parsed ops to MLIR
806 if (failed(generateAndInsertConfigOps(builder, device, clElfDir, outputType)))
807 return failure();
808
809 // If we chose the first aiex.configure as insertion point, erase it
810 // and inline its child operations.
811 if (!configureOps.empty()) {
812 // splice the body into the current insertion point
813 builder.getBlock()->getOperations().splice(
814 builder.getInsertionPoint(),
815 configureOps.front().getBody().front().getOperations());
816 configureOps.front().erase();
817 }
818
819 return success();
820}
821
822namespace {
823
824template <typename BaseClass, AIE::AIEToConfigurationOutputType MyOutputType>
825struct ConvertAIEToConfigurationPass : BaseClass {
826 std::string &ref_clElfDir;
827 std::string &ref_clDeviceName;
828 ConvertAIEToConfigurationPass(std::string &clElfDir,
829 std::string &clDeviceName)
830 : ref_clElfDir(clElfDir), ref_clDeviceName(clDeviceName) {}
831
832 void getDependentDialects(DialectRegistry &registry) const override {
833 registry.insert<memref::MemRefDialect, AIEX::AIEXDialect>();
834 }
835
836 void runOnOperation() override {
837 AIE::DeviceOp deviceOp = BaseClass::getOperation();
838 if (!ref_clDeviceName.empty() &&
839 deviceOp.getSymName() != ref_clDeviceName) {
840 return;
841 }
842 if (failed(
843 convertAIEToConfiguration(deviceOp, ref_clElfDir, MyOutputType))) {
844 return BaseClass::signalPassFailure();
845 }
846 }
847};
848
849struct ConvertAIEToTransactionPass
850 : ConvertAIEToConfigurationPass<
851 xilinx::impl::ConvertAIEToTransactionBase<
852 ConvertAIEToTransactionPass>,
853 AIE::AIEToConfigurationOutputType::Transaction> {
854 ConvertAIEToTransactionPass()
855 : ConvertAIEToConfigurationPass<
856 xilinx::impl::ConvertAIEToTransactionBase<
857 ConvertAIEToTransactionPass>,
859 clDeviceName) {}
860};
861
862struct ConvertAIEToControlPacketsPass
863 : ConvertAIEToConfigurationPass<
864 xilinx::impl::ConvertAIEToControlPacketsBase<
865 ConvertAIEToControlPacketsPass>,
866 AIE::AIEToConfigurationOutputType::ControlPacket> {
867 ConvertAIEToControlPacketsPass()
868 : ConvertAIEToConfigurationPass<
869 xilinx::impl::ConvertAIEToControlPacketsBase<
870 ConvertAIEToControlPacketsPass>,
872 clDeviceName) {}
873};
874
875} // end anonymous namespace
876
877std::unique_ptr<mlir::OperationPass<xilinx::AIE::DeviceOp>>
879 return std::make_unique<ConvertAIEToTransactionPass>();
880}
881
882std::unique_ptr<mlir::OperationPass<xilinx::AIE::DeviceOp>>
884 return std::make_unique<ConvertAIEToControlPacketsPass>();
885}
XAie_TxnOpcode
@ XAIE_IO_CUSTOM_OP_TCT
@ XAIE_IO_CUSTOM_OP_DDR_PATCH
@ XAIE_IO_BLOCKWRITE
@ XAIE_IO_MASKWRITE
@ XAIE_IO_WRITE
LogicalResult orConsecutiveWritesOnSameAddr(Block *body)
bool hasProperty(ModelProperty Prop) const
std::shared_ptr< Value > value()
Definition cxxopts.hpp:1026
uint8_t patch
Definition cxxopts.hpp:131
uint8_t major
Definition cxxopts.hpp:131
uint8_t minor
Definition cxxopts.hpp:131
Include the generated interface declarations.
std::unique_ptr< mlir::OperationPass< xilinx::AIE::DeviceOp > > createConvertAIEToTransactionPass()
mlir::LogicalResult generateAndInsertConfigOps(mlir::OpBuilder &builder, xilinx::AIE::DeviceOp device, llvm::StringRef clElfDir="", AIEToConfigurationOutputType outputType=AIEToConfigurationOutputType::Transaction, std::string blockwrite_prefix="config_blockwrite_data_")
std::optional< mlir::ModuleOp > convertTransactionBinaryToMLIR(mlir::MLIRContext *ctx, std::vector< uint8_t > &binary)
std::unique_ptr< mlir::OperationPass< xilinx::AIE::DeviceOp > > createConvertAIEToControlPacketsPass()
mlir::LogicalResult addAieElfs(DeviceOp &targetOp, const mlir::StringRef workDirPath, bool aieSim)
Definition AIERT.cpp:979
mlir::LogicalResult setIOBackend(bool aieSim, bool xaieDebug)
Definition AIERT.cpp:257
std::vector< uint8_t > exportSerializedTransaction()
Definition AIERT.cpp:1026
mlir::LogicalResult addCoreEnable(DeviceOp &targetOp)
Definition AIERT.cpp:815
mlir::LogicalResult addInitConfig(DeviceOp &targetOp)
Definition AIERT.cpp:743