MLIR-AIE
AIETargetNPU.cpp
Go to the documentation of this file.
1//===- AIETargetNPU.cpp -----------------------------------------*- C++ -*-===//
2//
3// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7// (c) Copyright 2023-2025 Advanced Micro Devices, Inc.
8//
9//===----------------------------------------------------------------------===//
10
12
15
16#include "mlir/Dialect/Func/IR/FuncOps.h"
17#include "mlir/Interfaces/DataLayoutInterfaces.h"
18#include "mlir/Tools/mlir-translate/MlirTranslateMain.h"
19
20#include "llvm/ADT/ArrayRef.h"
21#include "llvm/ADT/TypeSwitch.h"
22#include "llvm/Support/Format.h"
23
24#include <vector>
25
26extern "C" {
27// #include "xaiengine/xaie_txn.h"
28// see aie-rt commit a6196eb, xaiengine/xaie_txn.h for source of this enum
60}
61
62using namespace mlir;
63using namespace xilinx;
64using namespace xilinx::AIE;
65using namespace xilinx::AIEX;
66
67namespace {
68
69// Example:
70// - instructions = {3,4,5}
71// - tailSize = 2
72// instructions becomes {3,4,5,0,0} and
73// a mutable reference to the tail {0,0} is returned.
74llvm::MutableArrayRef<uint32_t>
75reserveAndGetTail(std::vector<uint32_t> &instructions, uint64_t tailSize) {
76 auto oldSize = instructions.size();
77 auto newSize = oldSize + tailSize;
78 instructions.resize(newSize, 0);
79 return llvm::MutableArrayRef<uint32_t>(instructions.data() + oldSize,
80 tailSize);
81}
82
83void appendSync(std::vector<uint32_t> &instructions, NpuSyncOp op) {
84
85 auto words = reserveAndGetTail(instructions, 4);
86
87 // XAIE_IO_CUSTOM_OP_TCT
88 words[0] = XAIE_IO_CUSTOM_OP_TCT;
89
90 words[1] = words.size() * sizeof(uint32_t); // Operation Size
91
92 words[2] |= static_cast<uint32_t>(op.getDirection()) & 0xff;
93 words[2] |= (op.getRow() & 0xff) << 8;
94 words[2] |= (op.getColumn() & 0xff) << 16;
95
96 words[3] |= (op.getRowNum() & 0xff) << 8;
97 words[3] |= (op.getColumnNum() & 0xff) << 16;
98 words[3] |= (op.getChannel() & 0xff) << 24;
99}
100
101void appendWrite32(std::vector<uint32_t> &instructions, NpuWrite32Op op) {
102
103 auto words = reserveAndGetTail(instructions, 6);
104
105 if (op.getBuffer()) {
106 op.emitOpError("Cannot translate symbolic address");
107 return;
108 }
109
110 // XAIE_IO_WRITE
111 words[0] = XAIE_IO_WRITE;
112 words[2] = *op.getAbsoluteAddress();
113 words[3] = 0; // Extra bits for Reg Offset
114 words[4] = op.getValue(); // Value
115 words[5] = words.size() * sizeof(uint32_t); // Operation Size
116}
117
118void appendMaskWrite32(std::vector<uint32_t> &instructions,
119 NpuMaskWrite32Op op) {
120
121 auto words = reserveAndGetTail(instructions, 7);
122
123 if (op.getBuffer()) {
124 op.emitOpError("Cannot translate symbolic address");
125 return;
126 }
127
128 // XAIE_IO_MASKWRITE
129 words[0] = XAIE_IO_MASKWRITE;
130 words[2] = *op.getAbsoluteAddress();
131 words[3] = 0;
132 words[4] = op.getValue(); // Value
133 words[5] = op.getMask(); // Mask
134 words[6] = words.size() * sizeof(uint32_t); // Operation Size
135}
136
137void appendLoadPdi(std::vector<uint32_t> &instructions, NpuLoadPdiOp op) {
138
139 auto words = reserveAndGetTail(instructions, 4);
140
141 // XAIE_IO_LOADPDI
142 words[0] = XAIE_IO_LOADPDI;
143 words[0] |= op.getId() << 16;
144 std::optional<uint32_t> size = op.getSize();
145 if (size)
146 words[1] = *size;
147 std::optional<uint64_t> address = op.getAddress();
148 if (address) {
149 words[2] = *address;
150 words[3] = *address >> 32;
151 }
152}
153
154void appendAddressPatch(std::vector<uint32_t> &instructions,
155 NpuAddressPatchOp op) {
156
157 auto words = reserveAndGetTail(instructions, 12);
158
159 // XAIE_IO_CUSTOM_OP_DDR_PATCH
161 words[1] = words.size() * sizeof(uint32_t); // Operation Size
162
163 words[5] = 0; // Action
164
165 words[6] = op.getAddr();
166
167 words[8] = op.getArgIdx();
168
169 words[10] = op.getArgPlus();
170}
171
172void appendBlockWrite(std::vector<uint32_t> &instructions, NpuBlockWriteOp op) {
173 unsigned payload_start = 4;
174
175 std::optional<uint32_t> address = op.getAbsoluteAddress();
176 DenseIntElementsAttr data = op.getDataWords();
177
178 auto words = reserveAndGetTail(instructions, data.size() + payload_start);
179
180 // XAIE_IO_BLOCKWRITE
181 words[0] = XAIE_IO_BLOCKWRITE;
182 words[2] = op.getAddress();
183 auto col = op.getColumn();
184 auto row = op.getRow();
185 if (col && row) {
186 words[1] = (*col & 0xff) | ((*row & 0xff) << 8);
187 }
188 words[2] = *address;
189 words[3] = words.size() * sizeof(uint32_t); // Operation Size
190
191 unsigned i = payload_start;
192 for (auto d : data)
193 words[i++] = d.getZExtValue();
194}
195
196void appendPreempt(std::vector<uint32_t> &instructions, NpuPreemptOp op) {
197
198 auto words = reserveAndGetTail(instructions, 1);
199 words[0] = XAIE_IO_PREEMPT | (op.getLevel() << 8);
200}
201
202} // namespace
203
205 mlir::ModuleOp moduleOp, std::vector<uint32_t> &instructions,
206 StringRef deviceName, StringRef sequenceName) {
207
208 DeviceOp deviceOp =
209 DeviceOp::getForSymbolInModuleOrError(moduleOp, deviceName);
210 if (!deviceOp) {
211 return failure();
212 }
213
214 auto words = reserveAndGetTail(instructions, 4);
215
216 const AIETargetModel &tm = deviceOp.getTargetModel();
217
218 // setup txn header
219 uint8_t major = 0;
220 uint8_t minor = 1;
221 uint8_t devGen = 3; // NPU (PHX HWK)
222 if (llvm::isa<AIE::BaseNPU2TargetModel>(tm))
223 devGen = 4; // NPU2 (STX KRK)
224 uint8_t numRows = tm.rows();
225 uint8_t numCols = tm.columns();
226 uint8_t numMemTileRows = tm.getNumMemTileRows();
227 uint32_t count = 0;
228 words[0] = (numRows << 24) | (devGen << 16) | (minor << 8) | major;
229 words[1] = (numMemTileRows << 8) | numCols;
230
231 AIE::RuntimeSequenceOp seq =
232 AIE::RuntimeSequenceOp::getForSymbolInDeviceOrError(deviceOp,
233 sequenceName);
234 if (!seq) {
235 return failure();
236 }
237 for (Block &block : seq.getBody()) {
238 for (Operation &o : block) {
239 llvm::TypeSwitch<Operation *>(&o)
240 .Case<NpuSyncOp>([&](auto op) {
241 count++;
242 appendSync(instructions, op);
243 })
244 .Case<NpuWrite32Op>([&](auto op) {
245 count++;
246 appendWrite32(instructions, op);
247 })
248 .Case<NpuBlockWriteOp>([&](auto op) {
249 count++;
250 appendBlockWrite(instructions, op);
251 })
252 .Case<NpuMaskWrite32Op>([&](auto op) {
253 count++;
254 appendMaskWrite32(instructions, op);
255 })
256 .Case<NpuLoadPdiOp>([&](auto op) {
257 count++;
258 appendLoadPdi(instructions, op);
259 })
260 .Case<NpuAddressPatchOp>([&](auto op) {
261 count++;
262 appendAddressPatch(instructions, op);
263 })
264 .Case<NpuPreemptOp>([&](auto op) {
265 count++;
266 appendPreempt(instructions, op);
267 });
268 }
269 }
270
271 // write size fields of the txn header
272 instructions[2] = count;
273 instructions[3] = instructions.size() * sizeof(uint32_t); // size of the txn
274 return success();
275}
276
278 ModuleOp module, std::vector<uint32_t> &instructions, StringRef deviceName,
279 StringRef sequenceName) {
280 DeviceOp deviceOp =
281 AIE::DeviceOp::getForSymbolInModuleOrError(module, deviceName);
282 if (!deviceOp) {
283 return failure();
284 }
285 OpBuilder builder = OpBuilder::atBlockBegin(deviceOp.getBody());
286 AIE::RuntimeSequenceOp seq =
287 AIE::RuntimeSequenceOp::getForSymbolInDeviceOrError(deviceOp,
288 sequenceName);
289 if (!seq) {
290 return failure();
291 }
292
293 Block &entry = seq.getBody().front();
294 for (auto &o : entry) {
295 auto packetOp = dyn_cast<AIEX::NpuControlPacketOp>(o);
296 if (!packetOp)
297 continue;
298
299 uint32_t size = 0;
300 auto data = packetOp.getData();
301 if (data)
302 size = data->size();
303
304 auto words = reserveAndGetTail(instructions, 2 + size);
305
306 if (!data && packetOp.getLength())
307 size = *packetOp.getLength();
308
309 auto parity = [](uint32_t n) {
310 uint32_t p = 0;
311 while (n) {
312 p += n & 1;
313 n >>= 1;
314 }
315 return (p % 2) == 0;
316 };
317
318 // stream header is attached here instead of by shim dma
319 int col = packetOp.getColumnFromAddr();
320 int row = packetOp.getRowFromAddr();
321 auto destTile = TileOp::getOrCreate(builder, deviceOp, col, row);
322 auto info = destTile->getAttrOfType<AIE::PacketInfoAttr>("controller_id");
323 uint32_t hdr = 0;
324 if (info)
325 hdr = (info.getPktType() & 0x7) << 12 | (info.getPktId() & 0xff);
326 else
327 destTile->emitWarning("Expected controller_id attribute");
328 words[0] = hdr | (0x1 & parity(hdr)) << 31;
329
330 // control packet header
331 uint32_t addr = packetOp.getAddress() & 0xFFFFF;
332 uint32_t beats = size - 1;
333 uint32_t opc = packetOp.getOpcode();
334 uint32_t id = packetOp.getStreamId();
335 hdr = id << 24 | opc << 22 | beats << 20 | addr;
336 words[1] = hdr | (0x1 & parity(hdr)) << 31;
337
338 // configuration data
339 if (opc == 0x0 || opc == 0x2)
340 for (unsigned i = 0; i < size; i++)
341 words[i + 2] = data.value()[i];
342 }
343 return success();
344}
XAie_TxnOpcode
@ XAIE_IO_LOAD_PM_START
@ XAIE_IO_MASKPOLL
@ XAIE_IO_PREEMPT
@ XAIE_IO_CUSTOM_OP_NEXT
@ XAIE_IO_UPDATE_REG
@ XAIE_IO_CUSTOM_OP_RECORD_TIMER
@ XAIE_IO_CUSTOM_OP_TCT
@ XAIE_IO_MASKPOLL_BUSY
@ XAIE_IO_CUSTOM_OP_READ_REGS
@ XAIE_CONFIG_SHIMDMA_BD
@ XAIE_IO_UPDATE_STATE_TABLE
@ XAIE_IO_UPDATE_SCRATCH
@ XAIE_IO_CUSTOM_OP_DDR_PATCH
@ XAIE_IO_CUSTOM_OP_MERGE_SYNC
@ XAIE_IO_LOAD_PM_END_INTERNAL
@ XAIE_IO_BLOCKWRITE
@ XAIE_IO_BLOCKSET
@ XAIE_IO_CREATE_SCRATCHPAD
@ XAIE_IO_LOADPDI
@ XAIE_IO_MASKWRITE
@ XAIE_IO_CUSTOM_OP_BEGIN
@ XAIE_IO_CUSTOM_OP_MAX
@ XAIE_IO_NOOP
@ XAIE_CONFIG_SHIMDMA_DMABUF_BD
@ XAIE_IO_WRITE
virtual int rows() const =0
Return the number of rows in the device.
virtual int columns() const =0
Return the number of columns in the device.
virtual uint32_t getNumMemTileRows() const =0
uint8_t major
Definition cxxopts.hpp:131
uint8_t minor
Definition cxxopts.hpp:131
Include the generated interface declarations.
mlir::LogicalResult AIETranslateControlPacketsToUI32Vec(mlir::ModuleOp, std::vector< uint32_t > &, llvm::StringRef deviceName="", llvm::StringRef sequenceName="")
mlir::LogicalResult AIETranslateNpuToBinary(mlir::ModuleOp, std::vector< uint32_t > &, llvm::StringRef deviceName="", llvm::StringRef sequenceName="")