16#include "mlir/Dialect/Func/IR/FuncOps.h"
17#include "mlir/Interfaces/DataLayoutInterfaces.h"
18#include "mlir/Tools/mlir-translate/MlirTranslateMain.h"
20#include "llvm/ADT/ArrayRef.h"
21#include "llvm/ADT/TypeSwitch.h"
22#include "llvm/Support/Format.h"
70llvm::MutableArrayRef<uint32_t>
71reserveAndGetTail(std::vector<uint32_t> &instructions, uint64_t tailSize) {
72 auto oldSize = instructions.size();
73 auto newSize = oldSize + tailSize;
74 instructions.resize(newSize, 0);
75 return llvm::MutableArrayRef<uint32_t>(instructions.data() + oldSize,
79void appendSync(std::vector<uint32_t> &instructions, NpuSyncOp op) {
81 auto words = reserveAndGetTail(instructions, 4);
86 words[1] = words.size() *
sizeof(uint32_t);
88 words[2] |=
static_cast<uint32_t
>(op.getDirection()) & 0xff;
89 words[2] |= (op.getRow() & 0xff) << 8;
90 words[2] |= (op.getColumn() & 0xff) << 16;
92 words[3] |= (op.getRowNum() & 0xff) << 8;
93 words[3] |= (op.getColumnNum() & 0xff) << 16;
94 words[3] |= (op.getChannel() & 0xff) << 24;
97void appendWrite32(std::vector<uint32_t> &instructions, NpuWrite32Op op) {
99 auto words = reserveAndGetTail(instructions, 6);
101 if (op.getBuffer()) {
102 op.emitOpError(
"Cannot translate symbolic address");
108 words[2] = op.getAddress();
109 auto col = op.getColumn();
110 auto row = op.getRow();
117 words[4] = op.getValue();
118 words[5] = words.size() *
sizeof(uint32_t);
121void appendMaskWrite32(std::vector<uint32_t> &instructions,
122 NpuMaskWrite32Op op) {
124 auto words = reserveAndGetTail(instructions, 7);
126 if (op.getBuffer()) {
127 op.emitOpError(
"Cannot translate symbolic address");
133 words[2] = op.getAddress();
134 auto col = op.getColumn();
135 auto row = op.getRow();
142 words[4] = op.getValue();
143 words[5] = op.getMask();
144 words[6] = words.size() *
sizeof(uint32_t);
147void appendAddressPatch(std::vector<uint32_t> &instructions,
148 NpuAddressPatchOp op) {
150 auto words = reserveAndGetTail(instructions, 12);
154 words[1] = words.size() *
sizeof(uint32_t);
158 words[6] = op.getAddr();
160 words[8] = op.getArgIdx();
162 words[10] = op.getArgPlus();
165void appendBlockWrite(std::vector<uint32_t> &instructions, NpuBlockWriteOp op) {
167 Value memref = op.getData();
168 DataLayout dataLayout = DataLayout::closest(op);
169 int64_t width = dataLayout.getTypeSizeInBits(cast<MemRefType>(memref.getType()).getElementType());
171 op.emitWarning(
"Only 32-bit data type is supported for now");
175 memref::GetGlobalOp getGlobal = memref.getDefiningOp<memref::GetGlobalOp>();
177 op.emitError(
"Only MemRefs from memref.get_global are supported");
181 auto global = dyn_cast_if_present<memref::GlobalOp>(
182 op->getParentOfType<AIE::DeviceOp>().lookupSymbol(getGlobal.getName()));
184 op.emitError(
"Global symbol not found");
188 auto initVal = global.getInitialValue();
190 op.emitError(
"Global symbol has no initial value");
194 auto data = dyn_cast<DenseIntElementsAttr>(*initVal);
196 op.emitError(
"Global symbol initial value is not a dense int array");
200 unsigned payload_start = 4;
201 auto words = reserveAndGetTail(instructions, data.size() + payload_start);
205 words[2] = op.getAddress();
206 auto col = op.getColumn();
207 auto row = op.getRow();
209 words[1] = (*
col & 0xff) | ((*
row & 0xff) << 8);
214 words[3] = words.size() *
sizeof(uint32_t);
216 unsigned i = payload_start;
218 words[i++] = d.getZExtValue();
221void appendPreempt(std::vector<uint32_t> &instructions,
224 auto words = reserveAndGetTail(instructions, 1);
232 std::vector<uint32_t> &instructions,
233 StringRef sequenceName) {
235 auto words = reserveAndGetTail(instructions, 4);
237 DeviceOp deviceOp = *
module.getOps<DeviceOp>().begin();
244 if (llvm::isa<AIE::BaseNPU2TargetModel>(tm))
246 uint8_t numRows = tm.
rows();
247 uint8_t numCols = tm.
columns();
250 words[0] = (numRows << 24) | (devGen << 16) | (
minor << 8) | major;
251 words[1] = (numMemTileRows << 8) | numCols;
253 auto sequenceOps = deviceOp.getOps<AIEX::RuntimeSequenceOp>();
254 for (
auto seq : sequenceOps) {
255 if (sequenceName.size() && sequenceName != seq.getSymName())
257 Block &entry = seq.getBody().front();
258 for (
auto &o : entry) {
259 llvm::TypeSwitch<Operation *>(&o)
260 .Case<NpuSyncOp>([&](
auto op) {
262 appendSync(instructions, op);
264 .Case<NpuWrite32Op>([&](
auto op) {
266 appendWrite32(instructions, op);
268 .Case<NpuBlockWriteOp>([&](
auto op) {
270 appendBlockWrite(instructions, op);
272 .Case<NpuMaskWrite32Op>([&](
auto op) {
274 appendMaskWrite32(instructions, op);
276 .Case<NpuAddressPatchOp>([&](
auto op) {
278 appendAddressPatch(instructions, op);
280 .Case<NpuPreemptOp>([&](
auto op) {
282 appendPreempt(instructions, op);
288 instructions[2] = count;
289 instructions[3] = instructions.size() *
sizeof(uint32_t);
294 ModuleOp module, std::vector<uint32_t> &instructions,
295 StringRef sequenceName) {
296 DeviceOp deviceOp = *
module.getOps<DeviceOp>().begin();
297 OpBuilder builder = OpBuilder::atBlockBegin(deviceOp.getBody());
299 auto sequenceOps = deviceOp.getOps<AIEX::RuntimeSequenceOp>();
300 for (
auto seq : sequenceOps) {
301 if (sequenceName.size() && sequenceName != seq.getSymName())
303 Block &entry = seq.getBody().front();
304 for (
auto &o : entry) {
305 auto packetOp = dyn_cast<AIEX::NpuControlPacketOp>(o);
310 auto data = packetOp.getData();
314 auto words = reserveAndGetTail(instructions, 2 + size);
316 if (!data && packetOp.getLength())
317 size = *packetOp.getLength();
319 auto parity = [](uint32_t n) {
329 int col = packetOp.getColumnFromAddr();
330 int row = packetOp.getRowFromAddr();
331 auto destTile = TileOp::getOrCreate(builder, deviceOp,
col,
row);
332 auto info = destTile->getAttrOfType<AIE::PacketInfoAttr>(
"controller_id");
334 return destTile->emitError(
"Expected controller_id attribute");
335 uint32_t hdr = (info.getPktType() & 0x7) << 12 | (info.getPktId() & 0xff);
336 words[0] = hdr | (0x1 & parity(hdr)) << 31;
339 uint32_t addr = packetOp.getAddress() & 0xFFFFF;
340 uint32_t beats = size - 1;
341 uint32_t opc = packetOp.getOpcode();
342 uint32_t
id = packetOp.getStreamId();
343 hdr =
id << 24 | opc << 22 | beats << 20 | addr;
344 words[1] = hdr | (0x1 & parity(hdr)) << 31;
347 if (opc == 0x0 || opc == 0x2)
348 for (
unsigned i = 0; i < size; i++)
349 words[i + 2] = data.value()[i];
@ XAIE_IO_CUSTOM_OP_RECORD_TIMER
@ XAIE_IO_CUSTOM_OP_READ_REGS
@ XAIE_IO_UPDATE_STATE_TABLE
@ XAIE_IO_CUSTOM_OP_DDR_PATCH
@ XAIE_IO_CUSTOM_OP_MERGE_SYNC
@ XAIE_IO_LOAD_PM_END_INTERNAL
@ XAIE_IO_CREATE_SCRATCHPAD
@ XAIE_IO_CUSTOM_OP_BEGIN
@ XAIE_CONFIG_SHIMDMA_DMABUF_BD
virtual int rows() const =0
Return the number of rows in the device.
virtual uint32_t getColumnShift() const =0
virtual int columns() const =0
Return the number of columns in the device.
virtual uint32_t getNumMemTileRows() const =0
virtual uint32_t getRowShift() const =0
Include the generated interface declarations.
mlir::LogicalResult AIETranslateNpuToBinary(mlir::ModuleOp, std::vector< uint32_t > &, llvm::StringRef sequenceName="")
const AIETargetModel & getTargetModel(mlir::Operation *op)
mlir::LogicalResult AIETranslateControlPacketsToUI32Vec(mlir::ModuleOp, std::vector< uint32_t > &, llvm::StringRef sequenceName="")