MLIR-AIE
AIETargetNPU.cpp
Go to the documentation of this file.
1//===- AIETargetNPU.cpp -----------------------------------------*- C++ -*-===//
2//
3// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7// (c) Copyright 2023-2025 Advanced Micro Devices, Inc.
8//
9//===----------------------------------------------------------------------===//
10
12
15
16#include "mlir/Dialect/Func/IR/FuncOps.h"
17#include "mlir/Tools/mlir-translate/MlirTranslateMain.h"
18
19#include "llvm/ADT/ArrayRef.h"
20#include "llvm/ADT/TypeSwitch.h"
21#include "llvm/Support/Format.h"
22
23#include <vector>
24
25using namespace mlir;
26using namespace xilinx;
27using namespace xilinx::AIE;
28using namespace xilinx::AIEX;
29
30#define TXN_OPC_WRITE 0x0
31#define TXN_OPC_BLOCKWRITE 0x1
32#define TXN_OPC_MASKWRITE 0x3
33#define TXN_OPC_TCT 0x80
34#define TXN_OPC_DDR_PATCH 0x81
35
36namespace {
37
38// Example:
39// - instructions = {3,4,5}
40// - tailSize = 2
41// instructions becomes {3,4,5,0,0} and
42// a mutable reference to the tail {0,0} is returned.
43llvm::MutableArrayRef<uint32_t>
44reserveAndGetTail(std::vector<uint32_t> &instructions, uint64_t tailSize) {
45 auto oldSize = instructions.size();
46 auto newSize = oldSize + tailSize;
47 instructions.resize(newSize, 0);
48 return llvm::MutableArrayRef<uint32_t>(instructions.data() + oldSize,
49 tailSize);
50}
51
52void appendSync(std::vector<uint32_t> &instructions, NpuSyncOp op) {
53
54 auto words = reserveAndGetTail(instructions, 4);
55
56 // XAIE_IO_CUSTOM_OP_TCT
57 words[0] = TXN_OPC_TCT;
58
59 words[1] = words.size() * sizeof(uint32_t); // Operation Size
60
61 words[2] |= static_cast<uint32_t>(op.getDirection()) & 0xff;
62 words[2] |= (op.getRow() & 0xff) << 8;
63 words[2] |= (op.getColumn() & 0xff) << 16;
64
65 words[3] |= (op.getRowNum() & 0xff) << 8;
66 words[3] |= (op.getColumnNum() & 0xff) << 16;
67 words[3] |= (op.getChannel() & 0xff) << 24;
68}
69
70void appendWrite32(std::vector<uint32_t> &instructions, NpuWrite32Op op) {
71
72 auto words = reserveAndGetTail(instructions, 6);
73
74 if (op.getBuffer()) {
75 op.emitOpError("Cannot translate symbolic address");
76 return;
77 }
78
79 // XAIE_IO_WRITE
80 words[0] = TXN_OPC_WRITE;
81 words[2] = op.getAddress();
82 auto col = op.getColumn();
83 auto row = op.getRow();
84 if (col && row) {
85 const AIETargetModel &tm = op->getParentOfType<DeviceOp>().getTargetModel();
86 words[2] = ((*col & 0xff) << tm.getColumnShift()) |
87 ((*row & 0xff) << tm.getRowShift()) | (words[2] & 0xFFFFF);
88 }
89 words[3] = 0; // Extra bits for Reg Offset
90 words[4] = op.getValue(); // Value
91 words[5] = words.size() * sizeof(uint32_t); // Operation Size
92}
93
94void appendMaskWrite32(std::vector<uint32_t> &instructions,
95 NpuMaskWrite32Op op) {
96
97 auto words = reserveAndGetTail(instructions, 7);
98
99 if (op.getBuffer()) {
100 op.emitOpError("Cannot translate symbolic address");
101 return;
102 }
103
104 // XAIE_IO_MASKWRITE
105 words[0] = TXN_OPC_MASKWRITE;
106 words[2] = op.getAddress();
107 auto col = op.getColumn();
108 auto row = op.getRow();
109 if (col && row) {
110 const AIETargetModel &tm = op->getParentOfType<DeviceOp>().getTargetModel();
111 words[2] = ((*col & 0xff) << tm.getColumnShift()) |
112 ((*row & 0xff) << tm.getRowShift()) | (words[2] & 0xFFFFF);
113 }
114 words[3] = 0;
115 words[4] = op.getValue(); // Value
116 words[5] = op.getMask(); // Mask
117 words[6] = words.size() * sizeof(uint32_t); // Operation Size
118}
119
120void appendAddressPatch(std::vector<uint32_t> &instructions,
121 NpuAddressPatchOp op) {
122
123 auto words = reserveAndGetTail(instructions, 12);
124
125 // XAIE_IO_CUSTOM_OP_DDR_PATCH
126 words[0] = TXN_OPC_DDR_PATCH;
127 words[1] = words.size() * sizeof(uint32_t); // Operation Size
128
129 words[5] = 0; // Action
130
131 words[6] = op.getAddr();
132
133 words[8] = op.getArgIdx();
134
135 words[10] = op.getArgPlus();
136}
137
138void appendBlockWrite(std::vector<uint32_t> &instructions, NpuBlockWriteOp op) {
139
140 Value memref = op.getData();
141 int64_t width = cast<MemRefType>(memref.getType()).getElementTypeBitWidth();
142 if (width != 32) {
143 op.emitWarning("Only 32-bit data type is supported for now");
144 return;
145 }
146
147 memref::GetGlobalOp getGlobal = memref.getDefiningOp<memref::GetGlobalOp>();
148 if (!getGlobal) {
149 op.emitError("Only MemRefs from memref.get_global are supported");
150 return;
151 }
152
153 auto global = dyn_cast_if_present<memref::GlobalOp>(
154 op->getParentOfType<AIE::DeviceOp>().lookupSymbol(getGlobal.getName()));
155 if (!global) {
156 op.emitError("Global symbol not found");
157 return;
158 }
159
160 auto initVal = global.getInitialValue();
161 if (!initVal) {
162 op.emitError("Global symbol has no initial value");
163 return;
164 }
165
166 auto data = dyn_cast<DenseIntElementsAttr>(*initVal);
167 if (!data) {
168 op.emitError("Global symbol initial value is not a dense int array");
169 return;
170 }
171
172 unsigned payload_start = 4;
173 auto words = reserveAndGetTail(instructions, data.size() + payload_start);
174
175 // XAIE_IO_BLOCKWRITE
176 words[0] = TXN_OPC_BLOCKWRITE;
177 words[2] = op.getAddress();
178 auto col = op.getColumn();
179 auto row = op.getRow();
180 if (col && row) {
181 words[1] = (*col & 0xff) | ((*row & 0xff) << 8);
182 const AIETargetModel &tm = op->getParentOfType<DeviceOp>().getTargetModel();
183 words[2] = ((*col & 0xff) << tm.getColumnShift()) |
184 ((*row & 0xff) << tm.getRowShift()) | (words[2] & 0xFFFFF);
185 }
186 words[3] = words.size() * sizeof(uint32_t); // Operation Size
187
188 unsigned i = payload_start;
189 for (auto d : data)
190 words[i++] = d.getZExtValue();
191}
192
193} // namespace
194
195LogicalResult
197 std::vector<uint32_t> &instructions,
198 StringRef sequenceName) {
199
200 auto words = reserveAndGetTail(instructions, 4);
201
202 DeviceOp deviceOp = *module.getOps<DeviceOp>().begin();
203 const AIETargetModel &tm = deviceOp.getTargetModel();
204
205 // setup txn header
206 uint8_t major = 0;
207 uint8_t minor = 1;
208 uint8_t devGen = 3; // NPU (PHX HWK)
209 if (llvm::isa<AIE::NPU2TargetModel>(AIE::getTargetModel(deviceOp)))
210 devGen = 4; // NPU2 (STX KRK)
211 uint8_t numRows = tm.rows();
212 uint8_t numCols = tm.columns();
213 uint8_t numMemTileRows = tm.getNumMemTileRows();
214 uint32_t count = 0;
215 words[0] = (numRows << 24) | (devGen << 16) | (minor << 8) | major;
216 words[1] = (numMemTileRows << 8) | numCols;
217
218 auto sequenceOps = deviceOp.getOps<AIEX::RuntimeSequenceOp>();
219 for (auto seq : sequenceOps) {
220 if (sequenceName.size() && sequenceName != seq.getSymName())
221 continue;
222 Block &entry = seq.getBody().front();
223 for (auto &o : entry) {
224 llvm::TypeSwitch<Operation *>(&o)
225 .Case<NpuSyncOp>([&](auto op) {
226 count++;
227 appendSync(instructions, op);
228 })
229 .Case<NpuWrite32Op>([&](auto op) {
230 count++;
231 appendWrite32(instructions, op);
232 })
233 .Case<NpuBlockWriteOp>([&](auto op) {
234 count++;
235 appendBlockWrite(instructions, op);
236 })
237 .Case<NpuMaskWrite32Op>([&](auto op) {
238 count++;
239 appendMaskWrite32(instructions, op);
240 })
241 .Case<NpuAddressPatchOp>([&](auto op) {
242 count++;
243 appendAddressPatch(instructions, op);
244 });
245 }
246 }
247
248 // write size fields of the txn header
249 instructions[2] = count;
250 instructions[3] = instructions.size() * sizeof(uint32_t); // size of the txn
251 return success();
252}
253
255 ModuleOp module, std::vector<uint32_t> &instructions,
256 StringRef sequenceName) {
257 DeviceOp deviceOp = *module.getOps<DeviceOp>().begin();
258 auto sequenceOps = deviceOp.getOps<AIEX::RuntimeSequenceOp>();
259 for (auto seq : sequenceOps) {
260 if (sequenceName.size() && sequenceName != seq.getSymName())
261 continue;
262 Block &entry = seq.getBody().front();
263 for (auto &o : entry) {
264 llvm::TypeSwitch<Operation *>(&o).Case<NpuControlPacketOp>([&](auto op) {
265 uint32_t size = 0;
266 auto data = op.getData();
267 auto length = op.getLength();
268 if (data)
269 size = data->size();
270 auto words = reserveAndGetTail(instructions, 1 + size);
271 if (!data && length)
272 size = *length;
273 auto parity = [](uint32_t n) {
274 uint32_t p = 0;
275 while (n) {
276 p += n & 1;
277 n >>= 1;
278 }
279 return (p % 2) == 0;
280 };
281 uint32_t addr = op.getAddress() & 0xFFFFF;
282 uint32_t beats = size - 1;
283 uint32_t opc = op.getOpcode();
284 uint32_t id = op.getStreamId();
285 uint32_t hdr = id << 24 | opc << 22 | beats << 20 | addr;
286 words[0] = hdr | (0x1 & parity(hdr)) << 31;
287 if (opc == 0x0 || opc == 0x2)
288 for (unsigned i = 0; i < size; i++)
289 words[i + 1] = data.value()[i];
290 });
291 }
292 }
293 return success();
294}
#define TXN_OPC_MASKWRITE
#define TXN_OPC_TCT
#define TXN_OPC_BLOCKWRITE
#define TXN_OPC_DDR_PATCH
#define TXN_OPC_WRITE
virtual int rows() const =0
Return the number of rows in the device.
virtual uint32_t getColumnShift() const =0
virtual int columns() const =0
Return the number of columns in the device.
virtual uint32_t getNumMemTileRows() const =0
virtual uint32_t getRowShift() const =0
Include the generated interface declarations.
mlir::LogicalResult AIETranslateNpuToBinary(mlir::ModuleOp, std::vector< uint32_t > &, llvm::StringRef sequenceName="")
const AIETargetModel & getTargetModel(mlir::Operation *op)
mlir::LogicalResult AIETranslateControlPacketsToUI32Vec(mlir::ModuleOp, std::vector< uint32_t > &, llvm::StringRef sequenceName="")