16#include "mlir/Dialect/Func/IR/FuncOps.h"
17#include "mlir/Interfaces/DataLayoutInterfaces.h"
18#include "mlir/Tools/mlir-translate/MlirTranslateMain.h"
20#include "llvm/ADT/ArrayRef.h"
21#include "llvm/ADT/TypeSwitch.h"
22#include "llvm/Support/Format.h"
74llvm::MutableArrayRef<uint32_t>
75reserveAndGetTail(std::vector<uint32_t> &instructions, uint64_t tailSize) {
76 auto oldSize = instructions.size();
77 auto newSize = oldSize + tailSize;
78 instructions.resize(newSize, 0);
79 return llvm::MutableArrayRef<uint32_t>(instructions.data() + oldSize,
83void appendSync(std::vector<uint32_t> &instructions, NpuSyncOp op) {
85 auto words = reserveAndGetTail(instructions, 4);
90 words[1] = words.size() *
sizeof(uint32_t);
92 words[2] |=
static_cast<uint32_t
>(op.getDirection()) & 0xff;
93 words[2] |= (op.getRow() & 0xff) << 8;
94 words[2] |= (op.getColumn() & 0xff) << 16;
96 words[3] |= (op.getRowNum() & 0xff) << 8;
97 words[3] |= (op.getColumnNum() & 0xff) << 16;
98 words[3] |= (op.getChannel() & 0xff) << 24;
101void appendWrite32(std::vector<uint32_t> &instructions, NpuWrite32Op op) {
103 auto words = reserveAndGetTail(instructions, 6);
105 if (op.getBuffer()) {
106 op.emitOpError(
"Cannot translate symbolic address");
112 words[2] = *op.getAbsoluteAddress();
114 words[4] = op.getValue();
115 words[5] = words.size() *
sizeof(uint32_t);
118void appendMaskWrite32(std::vector<uint32_t> &instructions,
119 NpuMaskWrite32Op op) {
121 auto words = reserveAndGetTail(instructions, 7);
123 if (op.getBuffer()) {
124 op.emitOpError(
"Cannot translate symbolic address");
130 words[2] = *op.getAbsoluteAddress();
132 words[4] = op.getValue();
133 words[5] = op.getMask();
134 words[6] = words.size() *
sizeof(uint32_t);
137void appendLoadPdi(std::vector<uint32_t> &instructions, NpuLoadPdiOp op) {
139 auto words = reserveAndGetTail(instructions, 4);
143 words[0] |= op.getId() << 16;
144 std::optional<uint32_t> size = op.getSize();
147 std::optional<uint64_t> address = op.getAddress();
150 words[3] = *address >> 32;
154void appendAddressPatch(std::vector<uint32_t> &instructions,
155 NpuAddressPatchOp op) {
157 auto words = reserveAndGetTail(instructions, 12);
161 words[1] = words.size() *
sizeof(uint32_t);
165 words[6] = op.getAddr();
167 words[8] = op.getArgIdx();
169 words[10] = op.getArgPlus();
172void appendBlockWrite(std::vector<uint32_t> &instructions, NpuBlockWriteOp op) {
173 unsigned payload_start = 4;
175 std::optional<uint32_t> address = op.getAbsoluteAddress();
176 DenseIntElementsAttr data = op.getDataWords();
178 auto words = reserveAndGetTail(instructions, data.size() + payload_start);
182 words[2] = op.getAddress();
183 auto col = op.getColumn();
184 auto row = op.getRow();
186 words[1] = (*
col & 0xff) | ((*
row & 0xff) << 8);
189 words[3] = words.size() *
sizeof(uint32_t);
191 unsigned i = payload_start;
193 words[i++] = d.getZExtValue();
196void appendPreempt(std::vector<uint32_t> &instructions, NpuPreemptOp op) {
198 auto words = reserveAndGetTail(instructions, 1);
205 mlir::ModuleOp moduleOp, std::vector<uint32_t> &instructions,
206 StringRef deviceName, StringRef sequenceName) {
209 DeviceOp::getForSymbolInModuleOrError(moduleOp, deviceName);
214 auto words = reserveAndGetTail(instructions, 4);
222 if (llvm::isa<AIE::BaseNPU2TargetModel>(tm))
224 uint8_t numRows = tm.
rows();
225 uint8_t numCols = tm.
columns();
228 words[0] = (numRows << 24) | (devGen << 16) | (
minor << 8) | major;
229 words[1] = (numMemTileRows << 8) | numCols;
231 AIE::RuntimeSequenceOp seq =
232 AIE::RuntimeSequenceOp::getForSymbolInDeviceOrError(deviceOp,
237 for (Block &block : seq.getBody()) {
238 for (Operation &o : block) {
239 llvm::TypeSwitch<Operation *>(&o)
240 .Case<NpuSyncOp>([&](
auto op) {
242 appendSync(instructions, op);
244 .Case<NpuWrite32Op>([&](
auto op) {
246 appendWrite32(instructions, op);
248 .Case<NpuBlockWriteOp>([&](
auto op) {
250 appendBlockWrite(instructions, op);
252 .Case<NpuMaskWrite32Op>([&](
auto op) {
254 appendMaskWrite32(instructions, op);
256 .Case<NpuLoadPdiOp>([&](
auto op) {
258 appendLoadPdi(instructions, op);
260 .Case<NpuAddressPatchOp>([&](
auto op) {
262 appendAddressPatch(instructions, op);
264 .Case<NpuPreemptOp>([&](
auto op) {
266 appendPreempt(instructions, op);
272 instructions[2] = count;
273 instructions[3] = instructions.size() *
sizeof(uint32_t);
278 ModuleOp module, std::vector<uint32_t> &instructions, StringRef deviceName,
279 StringRef sequenceName) {
281 AIE::DeviceOp::getForSymbolInModuleOrError(module, deviceName);
285 OpBuilder builder = OpBuilder::atBlockBegin(deviceOp.getBody());
286 AIE::RuntimeSequenceOp seq =
287 AIE::RuntimeSequenceOp::getForSymbolInDeviceOrError(deviceOp,
293 Block &entry = seq.getBody().front();
294 for (
auto &o : entry) {
295 auto packetOp = dyn_cast<AIEX::NpuControlPacketOp>(o);
300 auto data = packetOp.getData();
304 auto words = reserveAndGetTail(instructions, 2 + size);
306 if (!data && packetOp.getLength())
307 size = *packetOp.getLength();
309 auto parity = [](uint32_t n) {
319 int col = packetOp.getColumnFromAddr();
320 int row = packetOp.getRowFromAddr();
321 auto destTile = TileOp::getOrCreate(builder, deviceOp,
col,
row);
322 auto info = destTile->getAttrOfType<AIE::PacketInfoAttr>(
"controller_id");
325 hdr = (info.getPktType() & 0x7) << 12 | (info.getPktId() & 0xff);
327 destTile->emitWarning(
"Expected controller_id attribute");
328 words[0] = hdr | (0x1 & parity(hdr)) << 31;
331 uint32_t addr = packetOp.getAddress() & 0xFFFFF;
332 uint32_t beats = size - 1;
333 uint32_t opc = packetOp.getOpcode();
334 uint32_t
id = packetOp.getStreamId();
335 hdr =
id << 24 | opc << 22 | beats << 20 | addr;
336 words[1] = hdr | (0x1 & parity(hdr)) << 31;
339 if (opc == 0x0 || opc == 0x2)
340 for (
unsigned i = 0; i < size; i++)
341 words[i + 2] = data.value()[i];
@ XAIE_IO_CUSTOM_OP_RECORD_TIMER
@ XAIE_IO_CUSTOM_OP_READ_REGS
@ XAIE_IO_UPDATE_STATE_TABLE
@ XAIE_IO_CUSTOM_OP_DDR_PATCH
@ XAIE_IO_CUSTOM_OP_MERGE_SYNC
@ XAIE_IO_LOAD_PM_END_INTERNAL
@ XAIE_IO_CREATE_SCRATCHPAD
@ XAIE_IO_CUSTOM_OP_BEGIN
@ XAIE_CONFIG_SHIMDMA_DMABUF_BD
virtual int rows() const =0
Return the number of rows in the device.
virtual int columns() const =0
Return the number of columns in the device.
virtual uint32_t getNumMemTileRows() const =0
Include the generated interface declarations.
mlir::LogicalResult AIETranslateControlPacketsToUI32Vec(mlir::ModuleOp, std::vector< uint32_t > &, llvm::StringRef deviceName="", llvm::StringRef sequenceName="")
mlir::LogicalResult AIETranslateNpuToBinary(mlir::ModuleOp, std::vector< uint32_t > &, llvm::StringRef deviceName="", llvm::StringRef sequenceName="")