MLIR-AIE
AIERT.cpp
Go to the documentation of this file.
1//===- AIERT.cpp ------------------------------------------------*- C++ -*-===//
2//
3// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7// Copyright (C) 2024, Advanced Micro Devices, Inc. All rights reserved.
8//
9//===----------------------------------------------------------------------===//
10
11#include "aie/Targets/AIERT.h"
13
14#include "mlir/Support/LogicalResult.h"
15
16extern "C" {
17#include "xaiengine/xaie_core.h"
18#include "xaiengine/xaie_dma.h"
19#include "xaiengine/xaie_elfloader.h"
20#include "xaiengine/xaie_interrupt.h"
21#include "xaiengine/xaie_locks.h"
22#include "xaiengine/xaie_mem.h"
23#include "xaiengine/xaie_plif.h"
24#include "xaiengine/xaie_ss.h"
25#include "xaiengine/xaie_txn.h"
26#include "xaiengine/xaiegbl.h"
27#include "xaiengine/xaiegbl_defs.h"
28}
29
30#include <filesystem>
31
32using namespace mlir;
33
34#define DEBUG_TYPE "aie-aiert"
35
36llvm::raw_ostream &operator<<(llvm::raw_ostream &os, const XAie_LocType &loc) {
37 os << "XAie_LocType(col: " << std::to_string(loc.Col)
38 << ", row: " << std::to_string(loc.Row) << ")";
39 return os;
40}
41
42llvm::raw_ostream &operator<<(llvm::raw_ostream &os, const XAie_Lock &lock) {
43 os << "XAie_Lock(id: " << std::to_string(lock.LockId)
44 << ", val: " << std::to_string(lock.LockVal) << ")";
45 return os;
46}
47
48llvm::raw_ostream &operator<<(llvm::raw_ostream &os,
49 const XAie_Packet &packet) {
50 os << "XAie_Packet(id: " << std::to_string(packet.PktId)
51 << ", type: " << std::to_string(packet.PktType) << ")";
52 return os;
53}
54
55namespace xilinx::AIE {
56
58 : targetModel(tm) {
59 // The first column in the NPU lacks a shim tile. AIE-RT exposes some of
60 // the internals about how this is modeled in a somewhat awkward way.
61 size_t partitionStartCol =
63 size_t partitionNumCols = tm.columns();
64 size_t deviceRows = tm.rows();
65 size_t deviceCols = tm.columns() + partitionStartCol;
66
67 // Don't put this in the target model, because it's XAIE specific.
68 unsigned char devGen;
69 switch (tm.getTargetArch()) {
70 case AIEArch::AIE1: // probably unreachable.
71 devGen = XAIE_DEV_GEN_AIE;
72 break;
73 case AIEArch::AIE2:
74 // FIXME: What if we don't have an IPU? aie-rt
75 // models non-IPU devices differently.
76 devGen = XAIE_DEV_GEN_AIE2IPU;
77 break;
78 case AIEArch::AIE2p:
79 devGen = XAIE_DEV_GEN_AIE2P_STRIX_B0;
80 break;
81 default:
82 assert(false);
83 }
84 configPtr = XAie_Config{
85 /*AieGen*/ devGen,
86 /*BaseAddr*/ XAIE_BASE_ADDR,
87 /*ColShift*/ static_cast<uint8_t>(tm.getColumnShift()),
88 /*RowShift*/ static_cast<uint8_t>(tm.getRowShift()),
89 /*NumRows*/ static_cast<uint8_t>(deviceRows),
90 /*NumCols*/ static_cast<uint8_t>(deviceCols),
91 /*ShimRowNum*/ XAIE_SHIM_ROW,
92 /*MemTileRowStart*/ XAIE_MEM_TILE_ROW_START,
93 /*MemTileNumRows*/ static_cast<uint8_t>(tm.getNumMemTileRows()),
94 /*AieTileRowStart*/
95 static_cast<uint8_t>(XAIE_MEM_TILE_ROW_START + tm.getNumMemTileRows()),
96 /*AieTileNumRows*/
97 static_cast<uint8_t>(tm.rows() - tm.getNumMemTileRows() - 1),
98 /*PartProp*/ {},
99 /*Backend*/ XAIE_IO_BACKEND_CDO};
100 XAie_InstDeclare(_devInst, &configPtr);
101 devInst = _devInst;
102 TRY_XAIE_API_FATAL_ERROR(XAie_SetupPartitionConfig, &devInst,
103 XAIE_PARTITION_BASE_ADDR, partitionStartCol,
104 partitionNumCols);
105 TRY_XAIE_API_FATAL_ERROR(XAie_CfgInitialize, &devInst, &configPtr);
106 TRY_XAIE_API_FATAL_ERROR(XAie_UpdateNpiAddr, &devInst, NPI_ADDR);
107}
108
109LogicalResult AIERTControl::setIOBackend(bool aieSim, bool xaieDebug) {
110 // Quoting: The instance of a device must be always declared using this
111 // macro. In the future, the same macro will be expanded to
112 // allocate more memory from the user application for resource
113 // management.
114 if (aieSim) {
115 TRY_XAIE_API_FATAL_ERROR(XAie_SetIOBackend, &devInst, XAIE_IO_BACKEND_SIM);
116 } else if (xaieDebug)
117 TRY_XAIE_API_FATAL_ERROR(XAie_SetIOBackend, &devInst,
118 XAIE_IO_BACKEND_DEBUG);
119 else
120 TRY_XAIE_API_FATAL_ERROR(XAie_SetIOBackend, &devInst, XAIE_IO_BACKEND_CDO);
121 return success();
122}
123
124LogicalResult AIERTControl::configureLocksInBdBlock(XAie_DmaDesc &dmaTileBd,
125 Block &block,
126 XAie_LocType &tileLoc) {
127 LLVM_DEBUG(llvm::dbgs() << "\nstart configuring bds\n");
128 std::optional<int> acqValue, relValue, acqLockId, relLockId;
129 bool acqEn = false;
130
131 // switch (lock->getAc)
132 LockOp lock;
133 for (auto op : block.getOps<UseLockOp>()) {
134 // Only dyn_cast if you are going to check if it was of the type
135 // expected; if you aren't checking use cast instead as it will at
136 // least assert in debug mode with an easier to understand error than
137 // dereferencing.
138 lock = cast<LockOp>(op.getLock().getDefiningOp());
139 switch (op.getAction()) {
140 case LockAction::Acquire:
141 case LockAction::AcquireGreaterEqual:
142 acqEn = op.getAcqEn();
143 acqLockId = lock.getLockIDValue();
144 acqValue = op.getLockValue();
145 if (op.acquireGE())
146 acqValue.value() = -acqValue.value();
147 break;
148 case LockAction::Release:
149 relLockId = lock.getLockIDValue();
150 relValue = op.getLockValue();
151 break;
152 }
153 }
154
155 assert(acqValue && relValue && acqLockId && relLockId &&
156 "expected both use_lock(acquire) and use_lock(release) with bd");
157
158 if (targetModel.isMemTile(tileLoc.Col, tileLoc.Row)) {
159 auto lockOffset = targetModel.getLockLocalBaseIndex(
160 tileLoc.Col, tileLoc.Row, lock.colIndex(), lock.rowIndex());
161 if (lockOffset && acqLockId)
162 acqLockId.value() += lockOffset.value();
163 if (lockOffset && relLockId)
164 relLockId.value() += lockOffset.value();
165 }
166
167 // no RelEn in the arch spec even though the API requires you to set it?
168 bool relEn = false;
169 XAie_Lock acqLock = XAie_LockInit(acqLockId.value(), acqValue.value());
170 XAie_Lock relLock = XAie_LockInit(relLockId.value(), relValue.value());
171 TRY_XAIE_API_EMIT_ERROR((*block.getOps<UseLockOp>().begin()),
172 dmaTileBd.DmaMod->SetLock, &dmaTileBd, acqLock,
173 relLock, acqEn, relEn);
174 return success();
175}
176
177LogicalResult AIERTControl::configureBdInBlock(XAie_DmaDesc &dmaTileBd,
178 Block &block,
179 XAie_LocType &tileLoc, int bdId,
180 std::optional<int> nextBdId) {
181 std::optional<int> packetType;
182 std::optional<int> packetID;
183
184 // Below should go
185 auto maybePacketOps = block.getOps<DMABDPACKETOp>();
186 if (!maybePacketOps.empty()) {
187 assert(llvm::range_size(maybePacketOps) == 1 &&
188 "expected only one dma_bd_packet");
189 auto packetOp = *maybePacketOps.begin();
190 packetType = packetOp.getPacketType();
191 packetID = packetOp.getPacketID();
192 }
193
194 auto bdOp = *block.getOps<DMABDOp>().begin();
195
196 if (targetModel.isShimNOCTile(tileLoc.Col, tileLoc.Row)) {
197 // write them out like this so they show up with names in debug prints
198 uint8_t smid = 0;
199 uint32_t burstLen =
200 getShimBurstLengthBytes(targetModel, bdOp.getBurstLength());
201 uint8_t qOs = 0;
202 uint8_t cache = 0;
203 uint8_t secure = 0;
204 TRY_XAIE_API_EMIT_ERROR(bdOp, XAie_DmaSetAxi, &dmaTileBd, smid,
205 burstLen / 16, qOs, cache, secure);
206 }
207
208 // get address from BufferOp (core,mem) or ExternalBufferOp (shim)
209 uint64_t baseAddr = 0;
210 if (targetModel.isShimNOCTile(tileLoc.Col, tileLoc.Row)) {
211 auto bufferOp =
212 cast<AIE::ExternalBufferOp>(bdOp.getBuffer().getDefiningOp());
213 // external buffers aren't required to have an address here because the
214 // address might get patched later or the default of zero might be a valid
215 // address.
216 if (bufferOp.getAddress())
217 baseAddr = bufferOp.getAddress().value();
218 } else {
219 auto bufferOp = cast<AIE::BufferOp>(bdOp.getBuffer().getDefiningOp());
220 if (!bufferOp.getAddress())
221 return bufferOp.emitError("buffer must have address assigned");
222 baseAddr = bufferOp.getAddress().value();
223 }
224
225 if (targetModel.isMemTile(tileLoc.Col, tileLoc.Row)) {
226 // check if buffer is allocated on the same memtile, the west, or the east
227 // one
228 auto bufferOp = cast<AIE::BufferOp>(bdOp.getBuffer().getDefiningOp());
229 auto bufferRow = bufferOp.getTileOp().getRow();
230 auto bufferCol = bufferOp.getTileOp().getCol();
231 auto addrOffset = targetModel.getMemLocalBaseAddress(
232 tileLoc.Col, tileLoc.Row, bufferCol, bufferRow);
233 if (addrOffset)
234 baseAddr += addrOffset.value();
235 }
236
237 std::optional<llvm::ArrayRef<BDDimLayoutAttr>> dims = bdOp.getDimensions();
238 uint64_t lenInBytes = bdOp.getLenInBytes();
239 uint64_t basePlusOffsetInBytes = baseAddr + bdOp.getOffsetInBytes();
240 if (!dims) {
241 TRY_XAIE_API_EMIT_ERROR(bdOp, XAie_DmaSetAddrLen, &dmaTileBd,
242 basePlusOffsetInBytes, lenInBytes);
243 } else {
244 XAie_DmaTensor dmaTileBdTensor = {};
245 dmaTileBdTensor.NumDim = dims->size();
246 dmaTileBdTensor.Dim = static_cast<XAie_DmaDimDesc *>(
247 calloc(dmaTileBdTensor.NumDim, sizeof(XAie_DmaDimDesc)));
248 if (!dmaTileBdTensor.Dim)
249 return bdOp.emitError("couldn't allocate array of XAie_DmaDimDesc");
250 // libxaie requires stride in multiples of 32b
251 double elementWidthIn32bWords =
252 static_cast<double>(bdOp.getBufferElementTypeWidthInBytes()) / 4.0;
253 for (size_t i = 0; i < dims->size(); i++) {
254 // Pass down dimensions in reverse order; in the MLIR, this allows
255 // us to specify step sizes/wraps in the same order as we would
256 // access a multi-dim C array, with the highest dimension first.
257 int j = dims->size() - i - 1;
258 uint16_t size;
259 uint32_t stride;
260 if (j > 0) {
261 stride = static_cast<uint32_t>(dims.value()[i].getStride() *
262 elementWidthIn32bWords);
263 size = dims.value()[i].getSize();
264 } else {
265 stride = dims.value()[i].getStride();
266 size = static_cast<uint16_t>(dims.value()[i].getSize() *
267 elementWidthIn32bWords);
268 }
269 stride = stride > 0 ? stride : 1;
270 // Assume AIE-ML architecture (ie use AieMlDimDesc instead of AieDimDesc);
271 // asserted in AIETranslateToCDODirect).
272 dmaTileBdTensor.Dim[j].AieMlDimDesc = {stride, size};
273 }
274 TRY_XAIE_API_EMIT_ERROR(bdOp, XAie_DmaSetMultiDimAddr, &dmaTileBd,
275 &dmaTileBdTensor, basePlusOffsetInBytes,
276 lenInBytes);
277 }
278
279 // ND zero padding.
280 std::optional<llvm::ArrayRef<BDPadLayoutAttr>> padDims =
281 bdOp.getPadDimensions();
282
283 if (padDims) {
284 XAie_DmaPadTensor dmaPadTensor = {};
285 dmaPadTensor.NumDim = padDims->size();
286 dmaPadTensor.PadDesc = static_cast<XAie_PadDesc *>(
287 calloc(dmaPadTensor.NumDim, sizeof(XAie_PadDesc)));
288 if (!dmaPadTensor.PadDesc)
289 return bdOp.emitError("couldn't allocate array of XAie_PadDesc");
290 // libxaie requires stride in multiples of 32b
291 double elementWidthIn32bWords =
292 static_cast<double>(bdOp.getBufferElementTypeWidthInBytes()) / 4.0;
293 for (size_t i = 0; i < padDims->size(); i++) {
294 // Pass down dimensions in reverse order.
295 int j = padDims->size() - i - 1;
296 uint8_t before;
297 uint8_t after;
298 if (j > 0) {
299 before = static_cast<uint8_t>(padDims.value()[i].getConstPadBefore());
300 after = static_cast<uint8_t>(padDims.value()[i].getConstPadAfter());
301 } else {
302 before = static_cast<uint8_t>(padDims.value()[i].getConstPadBefore() *
303 elementWidthIn32bWords);
304 after = static_cast<uint8_t>(padDims.value()[i].getConstPadAfter() *
305 elementWidthIn32bWords);
306 }
307 dmaPadTensor.PadDesc[j] = {before, after};
308 }
309 TRY_XAIE_API_EMIT_ERROR(bdOp, XAie_DmaSetPadding, &dmaTileBd,
310 &dmaPadTensor);
311 }
312 if (nextBdId) {
313 auto enableNextBd = 1;
314 TRY_XAIE_API_EMIT_ERROR(bdOp, XAie_DmaSetNextBd, &dmaTileBd,
315 nextBdId.value(), enableNextBd);
316 }
317
318 if (auto packetInfo = bdOp.getPacket()) {
319 packetType = packetInfo->getPktType();
320 packetID = packetInfo->getPktId();
321 }
322
323 if (packetID) {
324 if (!packetType)
325 bdOp.emitError("must have packetType with packetID");
326 if (bdOp.getLen() == 0)
327 return bdOp.emitOpError(
328 "For MM2S channels, if Buffer_Length=0 then Enable_Packet must be "
329 "set to 0, otherwise behavior is undefined (3.7.8 arch spec)");
331 bdOp, XAie_DmaSetPkt, &dmaTileBd,
332 XAie_PacketInit(packetID.value(), packetType.value()));
333 }
334 TRY_XAIE_API_EMIT_ERROR(bdOp, XAie_DmaEnableBd, &dmaTileBd);
335 TRY_XAIE_API_EMIT_ERROR(bdOp, XAie_DmaWriteBd, &devInst, &dmaTileBd, tileLoc,
336 bdId);
337 LLVM_DEBUG(llvm::dbgs() << "\nend configuring bds\n");
338 return success();
339};
340
341LogicalResult
342AIERTControl::pushToBdQueueAndEnable(Operation &op, XAie_LocType &tileLoc,
343 int chNum, const DMAChannelDir &channelDir,
344 int bdId, int repeatCount) {
345 XAie_DmaDirection direction =
346 channelDir == DMAChannelDir::S2MM ? DMA_S2MM : DMA_MM2S;
347 auto enTokenIssue = tileLoc.Row == 0 && direction == DMA_S2MM;
348 // in english repeat_count==0 means "do it once" and don't repeat but
349 // libxaie treats repeat_count=1 as do it once.
350 repeatCount += 1;
351 TRY_XAIE_API_EMIT_ERROR(op, XAie_DmaChannelSetStartQueue, &devInst, tileLoc,
352 chNum, direction, bdId, repeatCount, enTokenIssue);
353 TRY_XAIE_API_EMIT_ERROR(op, XAie_DmaChannelEnable, &devInst, tileLoc, chNum,
354 direction);
355 return success();
356};
357
358LogicalResult AIERTControl::configureLocksAndBd(Block &block,
359 XAie_LocType tileLoc) {
360 DMABDOp bd = *block.getOps<DMABDOp>().begin();
361 assert(bd.getBdId().has_value() &&
362 "DMABDOp must have assigned bd_id; did you forget to run "
363 "aie-assign-bd-ids?");
364 XAie_DmaDesc dmaTileBd;
365 TRY_XAIE_API_EMIT_ERROR(bd, XAie_DmaDescInit, &devInst, &dmaTileBd, tileLoc);
366 if (!block.getOps<UseLockOp>().empty() &&
367 failed(configureLocksInBdBlock(dmaTileBd, block, tileLoc)))
368 return failure();
369 if (!block.getOps<DMABDOp>().empty() &&
370 failed(configureBdInBlock(dmaTileBd, block, tileLoc, bd.getBdId().value(),
371 bd.getNextBdId())))
372 return failure();
373 return success();
374}
375
376LogicalResult AIERTControl::initLocks(DeviceOp &targetOp) {
377 for (auto tileOp : targetOp.getOps<TileOp>()) {
378 auto tileLoc = XAie_TileLoc(tileOp.colIndex(), tileOp.rowIndex());
379 if (!tileOp.isShimTile() && tileOp.getCoreOp()) {
380 TRY_XAIE_API_EMIT_ERROR(tileOp, XAie_CoreReset, &devInst, tileLoc);
381 TRY_XAIE_API_EMIT_ERROR(tileOp, XAie_CoreUnreset, &devInst, tileLoc);
382 // Set locks to zero
383 for (uint8_t l = 0; l < NUM_LOCKS; l++) {
384 auto locInit = XAie_LockInit(l, 0);
385 TRY_XAIE_API_EMIT_ERROR(tileOp, XAie_LockSetValue, &devInst, tileLoc,
386 locInit);
387 }
388 }
389 }
390
391 // Set locks with explicit initializers
392 targetOp.walk<WalkOrder::PreOrder>([&](LockOp lockOp) {
393 if (lockOp.getLockID() && lockOp.getInit()) {
394 auto tileLoc = XAie_TileLoc(lockOp.getTileOp().colIndex(),
395 lockOp.getTileOp().rowIndex());
396 auto locInit = XAie_LockInit(*lockOp.getLockID(), *lockOp.getInit());
397 TRY_XAIE_API_FATAL_ERROR(XAie_LockSetValue, &devInst, tileLoc, locInit);
398 } else
399 LLVM_DEBUG(llvm::dbgs()
400 << "lock op missing either id or init" << lockOp << "\n");
401 });
402 return success();
403}
404
405LogicalResult AIERTControl::initBuffers(DeviceOp &targetOp) {
406 // Set buffers with explicit initializers
407 targetOp.walk<WalkOrder::PreOrder>([&](BufferOp bufferOp) {
408 auto initialValue = bufferOp.getInitialValue();
409 if (!initialValue)
410 return;
411 mlir::DenseElementsAttr denseInit =
412 dyn_cast<mlir::DenseElementsAttr>(initialValue.value());
413 if (!denseInit)
414 return;
415 auto tileLoc = XAie_TileLoc(bufferOp.getTileOp().colIndex(),
416 bufferOp.getTileOp().rowIndex());
417 std::vector<char> byteVec;
418 if (denseInit.getElementType().isIntOrIndex()) {
419 for (auto intVal : denseInit.getValues<APInt>()) {
420 // Get the size in bytes
421 size_t byteSize = (intVal.getBitWidth() + 7) / 8;
422 // Create a buffer for the integer bytes and copy
423 std::vector<char> bytes(byteSize);
424 std::copy(
425 static_cast<const char *>(static_cast<const void *>(&intVal)),
426 static_cast<const char *>(static_cast<const void *>(&intVal)) +
427 byteSize,
428 bytes.begin());
429 byteVec.insert(byteVec.end(), bytes.begin(), bytes.end());
430 }
431 } else if (isa<FloatType>(denseInit.getElementType())) {
432 for (auto floatVal : denseInit.getValues<APFloat>()) {
433 APInt floatInt = floatVal.bitcastToAPInt();
434 // Get the size in bytes
435 size_t byteSize = (floatInt.getBitWidth() + 7) / 8;
436 // Create a buffer for the float bytes and copy
437 std::vector<char> bytes(byteSize);
438 std::copy(
439 static_cast<const char *>(static_cast<const void *>(&floatInt)),
440 static_cast<const char *>(static_cast<const void *>(&floatInt)) +
441 byteSize,
442 bytes.begin());
443 byteVec.insert(byteVec.end(), bytes.begin(), bytes.end());
444 }
445 } else {
446 llvm::outs() << "buffer op type not supported for initialization "
447 << bufferOp << "\n";
448 return;
449 }
450 TRY_XAIE_API_FATAL_ERROR(XAie_DataMemBlockWrite, &devInst, tileLoc,
451 bufferOp.getAddress().value(), byteVec.data(),
452 byteVec.size());
453 });
454 return success();
455}
456
457LogicalResult AIERTControl::configureSwitches(DeviceOp &targetOp) {
458
459 // StreamSwitch (switchbox) configuration
460 for (auto switchboxOp : targetOp.getOps<SwitchboxOp>()) {
461 int32_t col = switchboxOp.colIndex();
462 int32_t row = switchboxOp.rowIndex();
463 XAie_LocType tileLoc = XAie_TileLoc(col, row);
465 "Only NPU currently supported");
466
467 Block &b = switchboxOp.getConnections().front();
468 for (auto connectOp : b.getOps<ConnectOp>())
470 switchboxOp, XAie_StrmConnCctEnable, &devInst, tileLoc,
471 WIRE_BUNDLE_TO_STRM_SW_PORT_TYPE.at(connectOp.getSourceBundle()),
472 connectOp.sourceIndex(),
473 WIRE_BUNDLE_TO_STRM_SW_PORT_TYPE.at(connectOp.getDestBundle()),
474 connectOp.destIndex());
475
476 for (auto masterSetOp : b.getOps<MasterSetOp>()) {
477 int mask = 0;
478 int arbiter = -1;
479
480 for (auto val : masterSetOp.getAmsels()) {
481 AMSelOp amsel = cast<AMSelOp>(val.getDefiningOp());
482 arbiter = amsel.arbiterIndex();
483 int msel = amsel.getMselValue();
484 mask |= (1 << msel);
485 }
486
487 // the default is to keep header
488 bool keepHeader = true;
489 // the default for dma destinations is to drop the header
490 if (masterSetOp.getDestBundle() == WireBundle::DMA)
491 keepHeader = false;
492 // assume a connection going south from row zero gets wired to shimdma
493 // by a shimmux.
494 if (switchboxOp.rowIndex() == 0 &&
495 masterSetOp.getDestBundle() == WireBundle::South)
496 keepHeader = false;
497
498 // "keep_pkt_header" attribute overrides the above defaults, if set
499 if (auto keep = masterSetOp.getKeepPktHeader())
500 keepHeader = *keep;
501
502 auto dropHeader =
503 keepHeader ? XAIE_SS_PKT_DONOT_DROP_HEADER : XAIE_SS_PKT_DROP_HEADER;
505 masterSetOp, XAie_StrmPktSwMstrPortEnable, &devInst, tileLoc,
506 WIRE_BUNDLE_TO_STRM_SW_PORT_TYPE.at(masterSetOp.getDestBundle()),
507 masterSetOp.destIndex(), dropHeader, arbiter, mask);
508 }
509
510 for (auto packetRulesOp : b.getOps<PacketRulesOp>()) {
511 int slot = 0;
512 Block &block = packetRulesOp.getRules().front();
513 for (auto slotOp : block.getOps<PacketRuleOp>()) {
514 AMSelOp amselOp = cast<AMSelOp>(slotOp.getAmsel().getDefiningOp());
515 int arbiter = amselOp.arbiterIndex();
516 int msel = amselOp.getMselValue();
517 TRY_XAIE_API_EMIT_ERROR(packetRulesOp, XAie_StrmPktSwSlavePortEnable,
518 &devInst, tileLoc,
519 WIRE_BUNDLE_TO_STRM_SW_PORT_TYPE.at(
520 packetRulesOp.getSourceBundle()),
521 packetRulesOp.sourceIndex());
522 auto packetInit = XAie_PacketInit(slotOp.valueInt(), /*PktType*/ 0);
523 // TODO Need to better define packet id,type used here
524 TRY_XAIE_API_EMIT_ERROR(packetRulesOp, XAie_StrmPktSwSlaveSlotEnable,
525 &devInst, tileLoc,
526 WIRE_BUNDLE_TO_STRM_SW_PORT_TYPE.at(
527 packetRulesOp.getSourceBundle()),
528 packetRulesOp.sourceIndex(), slot, packetInit,
529 slotOp.maskInt(), msel, arbiter);
530 slot++;
531 }
532 }
533 }
534
535 for (auto muxOp : targetOp.getOps<ShimMuxOp>()) {
536 // NOTE ShimMux always connects from the south as directions are
537 // defined relative to the tile stream switch.
538 auto tileLoc =
539 XAie_TileLoc(muxOp.getTileOp().getCol(), muxOp.getTileOp().getRow());
540 Block &b = muxOp.getConnections().front();
541 for (auto connectOp : b.getOps<ConnectOp>()) {
542 // demux!
543 if (connectOp.getSourceBundle() == WireBundle::North)
544 TRY_XAIE_API_EMIT_ERROR(muxOp, XAie_EnableAieToShimDmaStrmPort,
545 &devInst, tileLoc, connectOp.sourceIndex());
546 // mux
547 if (connectOp.getDestBundle() == WireBundle::North)
548 TRY_XAIE_API_EMIT_ERROR(muxOp, XAie_EnableShimDmaToAieStrmPort,
549 &devInst, tileLoc, connectOp.destIndex());
550 }
551 }
552
553 for (auto switchboxOp : targetOp.getOps<ShimSwitchboxOp>()) {
554 Block &b = switchboxOp.getConnections().front();
555 auto tileLoc = XAie_TileLoc(switchboxOp.getCol(), 0);
556 for (auto connectOp : b.getOps<ConnectOp>())
558 switchboxOp, XAie_StrmConnCctEnable, &devInst, tileLoc,
559 WIRE_BUNDLE_TO_STRM_SW_PORT_TYPE.at(connectOp.getSourceBundle()),
560 connectOp.sourceIndex(),
561 WIRE_BUNDLE_TO_STRM_SW_PORT_TYPE.at(connectOp.getDestBundle()),
562 connectOp.destIndex());
563 }
564
565 // Cascade configuration
566 if (isa<AIE2TargetModel>(targetModel)) {
567 for (auto configOp : targetOp.getOps<ConfigureCascadeOp>()) {
568 TileOp tile = cast<TileOp>(configOp.getTile().getDefiningOp());
569 auto tileLoc = XAie_TileLoc(tile.getCol(), tile.getRow());
571 targetOp, XAie_CoreConfigAccumulatorControl, &devInst, tileLoc,
572 WIRE_BUNDLE_TO_STRM_SW_PORT_TYPE.at(
573 static_cast<WireBundle>(configOp.getInputDir())),
574 WIRE_BUNDLE_TO_STRM_SW_PORT_TYPE.at(
575 static_cast<WireBundle>(configOp.getOutputDir())));
576 }
577 }
578
579 return success();
580}
581
582LogicalResult AIERTControl::addInitConfig(DeviceOp &targetOp) {
583
584 if (failed(initLocks(targetOp))) {
585 return failure();
586 }
587
588 if (failed(initBuffers(targetOp))) {
589 return failure();
590 }
591
592 auto memOps = llvm::to_vector_of<TileElement>(targetOp.getOps<MemOp>());
593 llvm::append_range(memOps, targetOp.getOps<MemTileDMAOp>());
594 llvm::append_range(memOps, targetOp.getOps<ShimDMAOp>());
595 for (TileElement memOp : memOps) {
596 int col = memOp.getTileID().col;
597 int row = memOp.getTileID().row;
598 XAie_LocType tileLoc = XAie_TileLoc(col, row);
599
600 // Get the region's entry block, then start traversing through the chain of
601 // blocks.
602 llvm::SetVector<Block *> blockVector =
603 getOrderedChainOfBlocks(&memOp.getOperation()->getRegion(0));
604
605 // handle DMA ops separately
606 auto dmaOps = llvm::to_vector_of<DMAOp>(
607 memOp.getOperation()->getRegion(0).getOps<DMAOp>());
608 if (!dmaOps.empty()) {
609 for (auto dmaOp : dmaOps)
610 for (auto &bdRegion : dmaOp.getBds()) {
611 Block &block = bdRegion.getBlocks().front();
612 if (failed(configureLocksAndBd(block, tileLoc)))
613 return failure();
614 }
615 } else {
616 for (Block *block : blockVector) {
617 if (block->getOps<DMABDOp>().empty())
618 continue;
619 if (failed(configureLocksAndBd(*block, tileLoc)))
620 return failure();
621 }
622 }
623
624 if (!dmaOps.empty())
625 for (auto dmaOp : dmaOps) {
626 auto &block = dmaOp.getBds().front().getBlocks().front();
627 DMABDOp bd = *block.getOps<DMABDOp>().begin();
628 if (failed(pushToBdQueueAndEnable(
629 *dmaOp.getOperation(), tileLoc, dmaOp.getChannelIndex(),
630 dmaOp.getChannelDir(), bd.getBdId().value(),
631 dmaOp.getRepeatCount())))
632 return failure();
633 }
634 else
635 for (Block *block : blockVector) {
636 for (auto op : block->getOps<DMAStartOp>()) {
637 DMABDOp bd = *op.getDest()->getOps<DMABDOp>().begin();
638 int chNum = op.getChannelIndex();
639 auto channelDir = op.getChannelDir();
640 if (failed(pushToBdQueueAndEnable(*bd.getOperation(), tileLoc, chNum,
641 channelDir, bd.getBdId().value(),
642 op.getRepeatCount())))
643 return failure();
644 }
645 }
646 }
647
648 if (failed(configureSwitches(targetOp))) {
649 return failure();
650 }
651
652 return success();
653}
654
655LogicalResult AIERTControl::addCoreEnable(DeviceOp &targetOp) {
656 // Start execution of all the cores.
657 for (auto tileOp : targetOp.getOps<TileOp>()) {
658 auto tileLoc = XAie_TileLoc(tileOp.colIndex(), tileOp.rowIndex());
659 if (!tileOp.isShimTile() && tileOp.getCoreOp())
660 TRY_XAIE_API_EMIT_ERROR(targetOp, XAie_CoreEnable, &devInst, tileLoc);
661 }
662 return success();
663}
664
665LogicalResult AIERTControl::addAieElf(uint8_t col, uint8_t row,
666 const StringRef elfPath, bool aieSim) {
667 TRY_XAIE_API_LOGICAL_RESULT(XAie_CoreDisable, &devInst,
668 XAie_TileLoc(col, row));
669 TRY_XAIE_API_LOGICAL_RESULT(XAie_DmaChannelResetAll, &devInst,
670 XAie_TileLoc(col, row),
671 XAie_DmaChReset::DMA_CHANNEL_RESET);
672
673 // loadSym: Load symbols from .map file. This argument is not used when
674 // __AIESIM__ is not defined.
675 TRY_XAIE_API_LOGICAL_RESULT(XAie_LoadElf, &devInst, XAie_TileLoc(col, row),
676 elfPath.str().c_str(), /*loadSym*/ aieSim);
677
678 TRY_XAIE_API_LOGICAL_RESULT(XAie_DmaChannelResetAll, &devInst,
679 XAie_TileLoc(col, row),
680 XAie_DmaChReset::DMA_CHANNEL_UNRESET);
681
682 return success();
683}
684
685LogicalResult AIERTControl::addAieElfs(DeviceOp &targetOp,
686 const StringRef elfPath, bool aieSim) {
687 for (auto tileOp : targetOp.getOps<TileOp>())
688 if (tileOp.isShimNOCorPLTile()) {
689 // Resets no needed with V2 kernel driver
690 } else {
691 int col = tileOp.colIndex();
692 int row = tileOp.rowIndex();
693 if (auto coreOp = tileOp.getCoreOp()) {
694 std::string fileName;
695 if (auto fileAttr = coreOp.getElfFile())
696 fileName = fileAttr->str();
697 else
698 fileName = (llvm::Twine("core_") + std::to_string(col) + "_" +
699 std::to_string(row) + ".elf")
700 .str();
701 auto ps = std::filesystem::path::preferred_separator;
702 if (failed(addAieElf(
703 col, row,
704 (llvm::Twine(elfPath) + std::string(1, ps) + fileName).str(),
705 aieSim)))
706 return failure();
707 }
708 }
709 return success();
710}
711
712void AIERTControl::dmaUpdateBdAddr(int col, int row, size_t addr, size_t bdId) {
713 auto tileLoc = XAie_TileLoc(col, row);
714 TRY_XAIE_API_FATAL_ERROR(XAie_DmaUpdateBdAddr, &devInst, tileLoc, addr, bdId);
715}
716
718 TRY_XAIE_API_FATAL_ERROR(XAie_StartTransaction, &devInst,
719 XAIE_TRANSACTION_DISABLE_AUTO_FLUSH);
720}
721
723 XAie_TxnInst *txnInst = XAie_ExportTransactionInstance(&devInst);
724 std::ios_base::fmtflags f(std::cout.flags());
725 for (size_t i = 0; i < txnInst->NumCmds; ++i) {
726 std::cout.flags(f);
727 std::cout << "Txn OpCode: " << std::hex
728 << AIETXNOPCODETOSTR.at(txnInst->CmdBuf[i].Opcode) << "\n";
729 std::cout.flags(f);
730 std::cout << "RegOff: 0x" << std::hex << txnInst->CmdBuf[i].RegOff << "\n";
731 std::cout.flags(f);
732 std::cout << "Value: 0x" << std::hex << txnInst->CmdBuf[i].Value << "\n";
733 std::cout.flags(f);
734 std::cout << "Mask: 0x" << std::hex << txnInst->CmdBuf[i].Mask << "\n";
735 }
736}
737
738} // namespace xilinx::AIE
llvm::raw_ostream & operator<<(llvm::raw_ostream &os, const XAie_LocType &loc)
Definition AIERT.cpp:36
#define XAIE_SHIM_ROW
Definition AIERT.h:177
#define XAIE_PARTITION_BASE_ADDR
Definition AIERT.h:179
#define TRY_XAIE_API_LOGICAL_RESULT(API,...)
Definition AIERT.h:140
#define NPI_ADDR
Definition AIERT.h:181
#define NUM_LOCKS
Definition AIERT.h:182
#define TRY_XAIE_API_FATAL_ERROR(API,...)
Definition AIERT.h:121
#define XAIE_BASE_ADDR
Definition AIERT.h:176
#define TRY_XAIE_API_EMIT_ERROR(OP, API,...)
Definition AIERT.h:131
#define XAIE_MEM_TILE_ROW_START
Definition AIERT.h:178
uint32_t getRowShift() const override
AIEArch getTargetArch() const override
AIE2 TargetModel.
uint32_t getColumnShift() const override
std::optional< uint32_t > getMemLocalBaseAddress(int localCol, int localRow, int memCol, int memRow) const
Return the memory base address (or offset) in the local tile when accessing a neighbor's memory or an...
std::optional< uint32_t > getLockLocalBaseIndex(int localCol, int localRow, int lockCol, int lockRow) const
Return the lock base index (or offset) in the local tile when accessing a neighbor's lock or an empty...
virtual bool isShimNOCTile(int col, int row) const =0
Return true if the given tile is a Shim NOC tile.
bool hasProperty(ModelProperty Prop) const
virtual int columns() const =0
Return the number of columns in the device.
int rows() const override
Return the number of rows in the device.
bool isMemTile(int col, int row) const override
Return true if the given tile is an AIE2 'Memory' tile.
uint32_t getNumMemTileRows() const override
Include the generated interface declarations.
uint32_t getShimBurstLengthBytes(const AIE::AIETargetModel &tm, uint32_t burstLength)
llvm::SetVector< mlir::Block * > getOrderedChainOfBlocks(mlir::Region *region)
AIERTControl(const xilinx::AIE::BaseNPUTargetModel &tm)
Definition AIERT.cpp:57
mlir::LogicalResult configureLocksAndBd(mlir::Block &block, XAie_LocType tileLoc)
Definition AIERT.cpp:358
void dmaUpdateBdAddr(int col, int row, size_t addr, size_t bdId)
Definition AIERT.cpp:712
mlir::LogicalResult addAieElfs(DeviceOp &targetOp, const mlir::StringRef workDirPath, bool aieSim)
Definition AIERT.cpp:685
mlir::LogicalResult setIOBackend(bool aieSim, bool xaieDebug)
Definition AIERT.cpp:109
mlir::LogicalResult configureBdInBlock(XAie_DmaDesc &dmaTileBd, mlir::Block &block, XAie_LocType &tileLoc, int bdId, std::optional< int > nextBdId)
Definition AIERT.cpp:177
mlir::LogicalResult initLocks(DeviceOp &targetOp)
Definition AIERT.cpp:376
XAie_Config configPtr
Definition AIERT.h:188
mlir::LogicalResult configureSwitches(DeviceOp &targetOp)
Definition AIERT.cpp:457
mlir::LogicalResult initBuffers(DeviceOp &targetOp)
Definition AIERT.cpp:405
mlir::LogicalResult addCoreEnable(DeviceOp &targetOp)
Definition AIERT.cpp:655
mlir::LogicalResult pushToBdQueueAndEnable(mlir::Operation &op, XAie_LocType &tileLoc, int chNum, const DMAChannelDir &channelDir, int bdId, int repeatCount)
Definition AIERT.cpp:342
const BaseNPUTargetModel & targetModel
Definition AIERT.h:190
XAie_DevInst devInst
Definition AIERT.h:189
mlir::LogicalResult addAieElf(uint8_t col, uint8_t row, const mlir::StringRef elfPath, bool aieSim)
Definition AIERT.cpp:665
void exportSerializedTransaction()
Definition AIERT.cpp:722
mlir::LogicalResult addInitConfig(DeviceOp &targetOp)
Definition AIERT.cpp:582
mlir::LogicalResult configureLocksInBdBlock(XAie_DmaDesc &dmaTileBd, mlir::Block &block, XAie_LocType &tileLoc)
Definition AIERT.cpp:124