MLIR-AIE
AIERT.cpp
Go to the documentation of this file.
1//===- AIERT.cpp ------------------------------------------------*- C++ -*-===//
2//
3// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7// Copyright (C) 2024, Advanced Micro Devices, Inc. All rights reserved.
8//
9//===----------------------------------------------------------------------===//
10
11#include "aie/Targets/AIERT.h"
13
14#include "mlir/Support/LogicalResult.h"
15
16extern "C" {
17#include "xaiengine/xaie_core.h"
18#include "xaiengine/xaie_dma.h"
19#include "xaiengine/xaie_elfloader.h"
20#include "xaiengine/xaie_interrupt.h"
21#include "xaiengine/xaie_locks.h"
22#include "xaiengine/xaie_mem.h"
23#include "xaiengine/xaie_plif.h"
24#include "xaiengine/xaie_ss.h"
25#include "xaiengine/xaie_txn.h"
26#include "xaiengine/xaiegbl.h"
27#include "xaiengine/xaiegbl_defs.h"
28}
29
30#include <filesystem>
31
32#define AIERC_STR(x) x, #x
33static const std::map<AieRC, std::string> AIERCTOSTR = {
34 {AIERC_STR(XAIE_OK)},
35 {AIERC_STR(XAIE_ERR)},
36 {AIERC_STR(XAIE_INVALID_DEVICE)},
37 {AIERC_STR(XAIE_INVALID_RANGE)},
38 {AIERC_STR(XAIE_INVALID_ARGS)},
39 {AIERC_STR(XAIE_INVALID_TILE)},
40 {AIERC_STR(XAIE_ERR_STREAM_PORT)},
41 {AIERC_STR(XAIE_INVALID_DMA_TILE)},
42 {AIERC_STR(XAIE_INVALID_BD_NUM)},
43 {AIERC_STR(XAIE_ERR_OUTOFBOUND)},
44 {AIERC_STR(XAIE_INVALID_DATA_MEM_ADDR)},
45 {AIERC_STR(XAIE_INVALID_ELF)},
46 {AIERC_STR(XAIE_CORE_STATUS_TIMEOUT)},
47 {AIERC_STR(XAIE_INVALID_CHANNEL_NUM)},
48 {AIERC_STR(XAIE_INVALID_LOCK)},
49 {AIERC_STR(XAIE_INVALID_DMA_DIRECTION)},
50 {AIERC_STR(XAIE_INVALID_PLIF_WIDTH)},
51 {AIERC_STR(XAIE_INVALID_LOCK_ID)},
52 {AIERC_STR(XAIE_INVALID_LOCK_VALUE)},
53 {AIERC_STR(XAIE_LOCK_RESULT_FAILED)},
54 {AIERC_STR(XAIE_INVALID_DMA_DESC)},
55 {AIERC_STR(XAIE_INVALID_ADDRESS)},
56 {AIERC_STR(XAIE_FEATURE_NOT_SUPPORTED)},
57 {AIERC_STR(XAIE_INVALID_BURST_LENGTH)},
58 {AIERC_STR(XAIE_INVALID_BACKEND)},
59 {AIERC_STR(XAIE_INSUFFICIENT_BUFFER_SIZE)},
60 {AIERC_STR(XAIE_ERR_MAX)}};
61#undef AIERC_STR
62
63static const std::map<xilinx::AIE::WireBundle, StrmSwPortType>
64 WIRE_BUNDLE_TO_STRM_SW_PORT_TYPE = {
65 {xilinx::AIE::WireBundle::Core, StrmSwPortType::CORE},
66 {xilinx::AIE::WireBundle::DMA, StrmSwPortType::DMA},
67 {xilinx::AIE::WireBundle::TileControl, StrmSwPortType::CTRL},
68 {xilinx::AIE::WireBundle::FIFO, StrmSwPortType::FIFO},
69 {xilinx::AIE::WireBundle::South, StrmSwPortType::SOUTH},
70 {xilinx::AIE::WireBundle::West, StrmSwPortType::WEST},
71 {xilinx::AIE::WireBundle::North, StrmSwPortType::NORTH},
72 {xilinx::AIE::WireBundle::East, StrmSwPortType::EAST},
73 // missing PLIO from WireBundle
74 // missing NOC from WireBundle
75 {xilinx::AIE::WireBundle::Trace, StrmSwPortType::TRACE},
76};
77
78#ifndef NDEBUG
79
80// https://stackoverflow.com/a/32230306
81template <typename H1>
82llvm::raw_ostream &showAIEXRTArgs(llvm::raw_ostream &out, const char *label,
83 H1 &&value) {
84 return out << label << "=" << std::forward<H1>(value);
85}
86
87template <typename H1, typename... T>
88llvm::raw_ostream &showAIEXRTArgs(llvm::raw_ostream &out, const char *label,
89 H1 &&value, T &&...rest) {
90 const char *pcomma = strchr(label, ',');
91 return showAIEXRTArgs(out.write(label, pcomma - label)
92 << "=" << std::forward<H1>(value) << ',',
93 pcomma + 1, std::forward<T>(rest)...);
94}
95
96llvm::raw_ostream &operator<<(llvm::raw_ostream &os, const XAie_LocType &loc);
97
98llvm::raw_ostream &operator<<(llvm::raw_ostream &os, const XAie_Lock &lock);
99
100llvm::raw_ostream &operator<<(llvm::raw_ostream &os, const XAie_Packet &packet);
101
102#define SHOW_AIERT_ARGS(os, ...) showAIEXRTArgs(os, #__VA_ARGS__, __VA_ARGS__)
103
104// So that we can use the pattern if(auto r = TRY_XAIE_API...) { // r is nonzero
105// }
106static_assert(XAIE_OK == 0);
107
108#define TRY_XAIE_API_FATAL_ERROR(API, ...) \
109 do { \
110 LLVM_DEBUG(llvm::dbgs() << "trying XAIE API: " << #API << " with args: "); \
111 LLVM_DEBUG(SHOW_AIERT_ARGS(llvm::dbgs(), __VA_ARGS__)); \
112 LLVM_DEBUG(llvm::dbgs() << "\n"); \
113 if (auto r = API(__VA_ARGS__)) \
114 llvm::report_fatal_error(llvm::Twine(#API " failed with ") + \
115 AIERCTOSTR.at(r)); \
116 } while (0)
117
118#define TRY_XAIE_API_EMIT_ERROR(OP, API, ...) \
119 do { \
120 LLVM_DEBUG(llvm::dbgs() << "trying XAIE API: " << #API << " with args: "); \
121 LLVM_DEBUG(SHOW_AIERT_ARGS(llvm::dbgs(), __VA_ARGS__)); \
122 LLVM_DEBUG(llvm::dbgs() << "\n"); \
123 if (auto r = API(__VA_ARGS__)) \
124 return OP.emitOpError() << #API " failed with " << AIERCTOSTR.at(r); \
125 } while (0)
126
127#define TRY_XAIE_API_LOGICAL_RESULT(API, ...) \
128 do { \
129 LLVM_DEBUG(llvm::dbgs() << "trying XAIE API: " << #API << " with args: "); \
130 LLVM_DEBUG(SHOW_AIERT_ARGS(llvm::dbgs(), __VA_ARGS__)); \
131 LLVM_DEBUG(llvm::dbgs() << "\n"); \
132 if (auto r = API(__VA_ARGS__)) { \
133 llvm::errs() << #API " failed with " << AIERCTOSTR.at(r); \
134 return failure(); \
135 } \
136 } while (0)
137
138#else
139
140#define TRY_XAIE_API_FATAL_ERROR(API, ...) \
141 do { \
142 if (auto r = API(__VA_ARGS__)) \
143 llvm::report_fatal_error(llvm::Twine(#API " failed with ") + \
144 AIERCTOSTR.at(r)); \
145 } while (0)
146
147#define TRY_XAIE_API_EMIT_ERROR(OP, API, ...) \
148 do { \
149 if (auto r = API(__VA_ARGS__)) \
150 return OP.emitOpError() << #API " failed with " << AIERCTOSTR.at(r); \
151 } while (0)
152
153#define TRY_XAIE_API_LOGICAL_RESULT(API, ...) \
154 do { \
155 if (auto r = API(__VA_ARGS__)) { \
156 llvm::errs() << #API " failed with " << AIERCTOSTR.at(r); \
157 return failure(); \
158 } \
159 } while (0)
160
161#endif
162
163using namespace mlir;
164using namespace xilinx;
165
166#define DEBUG_TYPE "aie-aiert"
167
168llvm::raw_ostream &operator<<(llvm::raw_ostream &os, const XAie_LocType &loc) {
169 os << "XAie_LocType(col: " << std::to_string(loc.Col)
170 << ", row: " << std::to_string(loc.Row) << ")";
171 return os;
172}
173
174llvm::raw_ostream &operator<<(llvm::raw_ostream &os, const XAie_Lock &lock) {
175 os << "XAie_Lock(id: " << std::to_string(lock.LockId)
176 << ", val: " << std::to_string(lock.LockVal) << ")";
177 return os;
178}
179
180llvm::raw_ostream &operator<<(llvm::raw_ostream &os,
181 const XAie_Packet &packet) {
182 os << "XAie_Packet(id: " << std::to_string(packet.PktId)
183 << ", type: " << std::to_string(packet.PktType) << ")";
184 return os;
185}
186
187#define XAIE_BASE_ADDR 0x40000000
188#define XAIE_SHIM_ROW 0
189#define XAIE_MEM_TILE_ROW_START 1
190#define XAIE_PARTITION_BASE_ADDR 0x0
191
192#define NPI_ADDR 0x0
193#define NUM_LOCKS 16
194#define EVEN_BD_NUM_START 0
195#define ODD_BD_NUM_START 24
196
198 XAie_Config configPtr;
199 XAie_DevInst devInst;
200};
201
203
205 : targetModel(tm), aiert(std::make_unique<AIERtImpl>()) {
206 // The first column in the NPU lacks a shim tile. AIE-RT exposes some of
207 // the internals about how this is modeled in a somewhat awkward way.
208 size_t partitionStartCol =
210 size_t partitionNumCols = tm.columns();
211 size_t deviceRows = tm.rows();
212 size_t deviceCols = tm.columns() + partitionStartCol;
213
214 // Don't put this in the target model, because it's XAIE specific.
215 unsigned char devGen;
216 switch (tm.getTargetArch()) {
217 case AIEArch::AIE1: // probably unreachable.
218 devGen = XAIE_DEV_GEN_AIE;
219 break;
220 case AIEArch::AIE2:
221 // FIXME: What if we don't have an IPU? aie-rt
222 // models non-IPU devices differently.
223 devGen = XAIE_DEV_GEN_AIE2IPU;
224 break;
225 case AIEArch::AIE2p:
226 devGen = XAIE_DEV_GEN_AIE2P_STRIX_B0;
227 break;
228 }
229 aiert->configPtr = XAie_Config{
230 /*AieGen*/ devGen,
231 /*BaseAddr*/ XAIE_BASE_ADDR,
232 /*ColShift*/ static_cast<uint8_t>(tm.getColumnShift()),
233 /*RowShift*/ static_cast<uint8_t>(tm.getRowShift()),
234 /*NumRows*/ static_cast<uint8_t>(deviceRows),
235 /*NumCols*/ static_cast<uint8_t>(deviceCols),
236 /*ShimRowNum*/ XAIE_SHIM_ROW,
237 /*MemTileRowStart*/ XAIE_MEM_TILE_ROW_START,
238 /*MemTileNumRows*/ static_cast<uint8_t>(tm.getNumMemTileRows()),
239 /*AieTileRowStart*/
240 static_cast<uint8_t>(XAIE_MEM_TILE_ROW_START + tm.getNumMemTileRows()),
241 /*AieTileNumRows*/
242 static_cast<uint8_t>(tm.rows() - tm.getNumMemTileRows() - 1),
243 /*PartProp*/ {},
244 /*Backend*/ XAIE_IO_BACKEND_CDO};
245 XAie_InstDeclare(_devInst, &aiert->configPtr);
246 aiert->devInst = _devInst;
247 TRY_XAIE_API_FATAL_ERROR(XAie_SetupPartitionConfig, &aiert->devInst,
248 XAIE_PARTITION_BASE_ADDR, partitionStartCol,
249 partitionNumCols);
250 TRY_XAIE_API_FATAL_ERROR(XAie_CfgInitialize, &aiert->devInst,
251 &aiert->configPtr);
252 TRY_XAIE_API_FATAL_ERROR(XAie_UpdateNpiAddr, &aiert->devInst, NPI_ADDR);
253}
254
256 bool xaieDebug) {
257 // Quoting: The instance of a device must be always declared using this
258 // macro. In the future, the same macro will be expanded to
259 // allocate more memory from the user application for resource
260 // management.
261 if (aieSim) {
262 TRY_XAIE_API_FATAL_ERROR(XAie_SetIOBackend, &aiert->devInst,
263 XAIE_IO_BACKEND_SIM);
264 } else if (xaieDebug)
265 TRY_XAIE_API_FATAL_ERROR(XAie_SetIOBackend, &aiert->devInst,
266 XAIE_IO_BACKEND_DEBUG);
267 else
268 TRY_XAIE_API_FATAL_ERROR(XAie_SetIOBackend, &aiert->devInst,
269 XAIE_IO_BACKEND_CDO);
270 return success();
271}
272
273LogicalResult configureLocksInBdBlock(const AIE::AIETargetModel &targetModel,
274 XAie_DmaDesc &dmaTileBd, Block &block,
275 int col, int row) {
276 LLVM_DEBUG(llvm::dbgs() << "\nstart configuring bds\n");
277 std::optional<int> acqValue, relValue, acqLockId, relLockId;
278 bool acqEn = false;
279
280 // switch (lock->getAc)
281 AIE::LockOp lock;
282 for (auto op : block.getOps<AIE::UseLockOp>()) {
283 // Only dyn_cast if you are going to check if it was of the type
284 // expected; if you aren't checking use cast instead as it will at
285 // least assert in debug mode with an easier to understand error than
286 // dereferencing.
287 lock = cast<AIE::LockOp>(op.getLock().getDefiningOp());
288 switch (op.getAction()) {
289 case AIE::LockAction::Acquire:
290 case AIE::LockAction::AcquireGreaterEqual:
291 acqEn = op.getAcqEn();
292 acqLockId = lock.getLockIDValue();
293 acqValue = op.getLockValue();
294 if (op.acquireGE())
295 acqValue.value() = -acqValue.value();
296 break;
297 case AIE::LockAction::Release:
298 relLockId = lock.getLockIDValue();
299 relValue = op.getLockValue();
300 break;
301 }
302 }
303
304 assert(acqValue && relValue && acqLockId && relLockId &&
305 "expected both use_lock(acquire) and use_lock(release) with bd");
306
307 if (targetModel.isMemTile(col, row)) {
308 auto lockOffset = targetModel.getLockLocalBaseIndex(
309 col, row, lock.colIndex(), lock.rowIndex());
310 if (lockOffset && acqLockId)
311 acqLockId.value() += lockOffset.value();
312 if (lockOffset && relLockId)
313 relLockId.value() += lockOffset.value();
314 }
315
316 // no RelEn in the arch spec even though the API requires you to set it?
317 bool relEn = false;
318 XAie_Lock acqLock = XAie_LockInit(acqLockId.value(), acqValue.value());
319 XAie_Lock relLock = XAie_LockInit(relLockId.value(), relValue.value());
320 TRY_XAIE_API_EMIT_ERROR((*block.getOps<AIE::UseLockOp>().begin()),
321 dmaTileBd.DmaMod->SetLock, &dmaTileBd, acqLock,
322 relLock, acqEn, relEn);
323 return success();
324}
325
326LogicalResult configureBdInBlock(const AIE::AIETargetModel &targetModel,
327 XAie_DevInst *devInst, XAie_DmaDesc &dmaTileBd,
328 Block &block, int col, int row, int bdId,
329 std::optional<int> nextBdId) {
330 std::optional<int> packetType;
331 std::optional<int> packetID;
332
333 // Below should go
334 auto maybePacketOps = block.getOps<AIE::DMABDPACKETOp>();
335 if (!maybePacketOps.empty()) {
336 assert(llvm::range_size(maybePacketOps) == 1 &&
337 "expected only one dma_bd_packet");
338 auto packetOp = *maybePacketOps.begin();
339 packetType = packetOp.getPacketType();
340 packetID = packetOp.getPacketID();
341 }
342
343 auto bdOp = *block.getOps<AIE::DMABDOp>().begin();
344
345 if (targetModel.isShimNOCTile(col, row)) {
346 // write them out like this so they show up with names in debug prints
347 uint8_t smid = 0;
348 uint32_t burstLen =
349 getShimBurstLengthBytes(targetModel, bdOp.getBurstLength());
350 uint8_t qOs = 0;
351 uint8_t cache = 0;
352 uint8_t secure = 0;
353 TRY_XAIE_API_EMIT_ERROR(bdOp, XAie_DmaSetAxi, &dmaTileBd, smid,
354 burstLen / 16, qOs, cache, secure);
355 }
356
357 // get address from BufferOp (core,mem) or ExternalBufferOp (shim)
358 uint64_t baseAddr = 0;
359 if (targetModel.isShimNOCTile(col, row)) {
360 auto bufferOp =
361 cast<AIE::ExternalBufferOp>(bdOp.getBuffer().getDefiningOp());
362 // external buffers aren't required to have an address here because the
363 // address might get patched later or the default of zero might be a valid
364 // address.
365 if (bufferOp.getAddress())
366 baseAddr = bufferOp.getAddress().value();
367 } else {
368 auto bufferOp = cast<AIE::BufferOp>(bdOp.getBuffer().getDefiningOp());
369 if (!bufferOp.getAddress())
370 return bufferOp.emitError("buffer must have address assigned");
371 baseAddr = bufferOp.getAddress().value();
372 }
373
374 if (targetModel.isMemTile(col, row)) {
375 // check if buffer is allocated on the same memtile, the west, or the east
376 // one
377 auto bufferOp = cast<AIE::BufferOp>(bdOp.getBuffer().getDefiningOp());
378 auto bufferRow = bufferOp.getTileOp().getRow();
379 auto bufferCol = bufferOp.getTileOp().getCol();
380 auto addrOffset =
381 targetModel.getMemLocalBaseAddress(col, row, bufferCol, bufferRow);
382 if (addrOffset)
383 baseAddr += addrOffset.value();
384 }
385
386 std::optional<llvm::ArrayRef<AIE::BDDimLayoutAttr>> dims =
387 bdOp.getDimensions();
388 uint64_t lenInBytes = bdOp.getLenInBytes();
389 uint64_t basePlusOffsetInBytes = baseAddr + bdOp.getOffsetInBytes();
390 if (!dims) {
391 TRY_XAIE_API_EMIT_ERROR(bdOp, XAie_DmaSetAddrLen, &dmaTileBd,
392 basePlusOffsetInBytes, lenInBytes);
393 } else {
394 XAie_DmaTensor dmaTileBdTensor = {};
395 dmaTileBdTensor.NumDim = dims->size();
396 dmaTileBdTensor.Dim = static_cast<XAie_DmaDimDesc *>(
397 calloc(dmaTileBdTensor.NumDim, sizeof(XAie_DmaDimDesc)));
398 if (!dmaTileBdTensor.Dim)
399 return bdOp.emitError("couldn't allocate array of XAie_DmaDimDesc");
400 // libxaie requires stride in multiples of 32b
401 double elementWidthIn32bWords =
402 static_cast<double>(bdOp.getBufferElementTypeWidthInBytes()) / 4.0;
403 for (size_t i = 0; i < dims->size(); i++) {
404 // Pass down dimensions in reverse order; in the MLIR, this allows
405 // us to specify step sizes/wraps in the same order as we would
406 // access a multi-dim C array, with the highest dimension first.
407 int j = dims->size() - i - 1;
408 uint16_t size;
409 uint32_t stride;
410 if (j > 0) {
411 stride = static_cast<uint32_t>(dims.value()[i].getStride() *
412 elementWidthIn32bWords);
413 size = dims.value()[i].getSize();
414 } else {
415 stride = dims.value()[i].getStride();
416 size = static_cast<uint16_t>(dims.value()[i].getSize() *
417 elementWidthIn32bWords);
418 }
419 stride = stride > 0 ? stride : 1;
420 // Assume AIE-ML architecture (ie use AieMlDimDesc instead of AieDimDesc);
421 // asserted in AIETranslateToCDODirect).
422 dmaTileBdTensor.Dim[j].AieMlDimDesc = {stride, size};
423 }
424 TRY_XAIE_API_EMIT_ERROR(bdOp, XAie_DmaSetMultiDimAddr, &dmaTileBd,
425 &dmaTileBdTensor, basePlusOffsetInBytes,
426 lenInBytes);
427 }
428
429 // ND zero padding.
430 std::optional<llvm::ArrayRef<AIE::BDPadLayoutAttr>> padDims =
431 bdOp.getPadDimensions();
432
433 if (padDims) {
434 XAie_DmaPadTensor dmaPadTensor = {};
435 dmaPadTensor.NumDim = padDims->size();
436 dmaPadTensor.PadDesc = static_cast<XAie_PadDesc *>(
437 calloc(dmaPadTensor.NumDim, sizeof(XAie_PadDesc)));
438 if (!dmaPadTensor.PadDesc)
439 return bdOp.emitError("couldn't allocate array of XAie_PadDesc");
440 // libxaie requires stride in multiples of 32b
441 double elementWidthIn32bWords =
442 static_cast<double>(bdOp.getBufferElementTypeWidthInBytes()) / 4.0;
443 for (size_t i = 0; i < padDims->size(); i++) {
444 // Pass down dimensions in reverse order.
445 int j = padDims->size() - i - 1;
446 uint8_t before;
447 uint8_t after;
448 if (j > 0) {
449 before = static_cast<uint8_t>(padDims.value()[i].getConstPadBefore());
450 after = static_cast<uint8_t>(padDims.value()[i].getConstPadAfter());
451 } else {
452 before = static_cast<uint8_t>(padDims.value()[i].getConstPadBefore() *
453 elementWidthIn32bWords);
454 after = static_cast<uint8_t>(padDims.value()[i].getConstPadAfter() *
455 elementWidthIn32bWords);
456 }
457 dmaPadTensor.PadDesc[j] = {before, after};
458 }
459 TRY_XAIE_API_EMIT_ERROR(bdOp, XAie_DmaSetPadding, &dmaTileBd,
460 &dmaPadTensor);
461 }
462 if (nextBdId) {
463 auto enableNextBd = 1;
464 TRY_XAIE_API_EMIT_ERROR(bdOp, XAie_DmaSetNextBd, &dmaTileBd,
465 nextBdId.value(), enableNextBd);
466 }
467
468 if (auto packetInfo = bdOp.getPacket()) {
469 packetType = packetInfo->getPktType();
470 packetID = packetInfo->getPktId();
471 }
472
473 if (packetID) {
474 if (!packetType)
475 bdOp.emitError("must have packetType with packetID");
476 if (bdOp.getLen() == 0)
477 return bdOp.emitOpError(
478 "For MM2S channels, if Buffer_Length=0 then Enable_Packet must be "
479 "set to 0, otherwise behavior is undefined (3.7.8 arch spec)");
481 bdOp, XAie_DmaSetPkt, &dmaTileBd,
482 XAie_PacketInit(packetID.value(), packetType.value()));
483 }
484 TRY_XAIE_API_EMIT_ERROR(bdOp, XAie_DmaEnableBd, &dmaTileBd);
485 auto tileLoc = XAie_TileLoc(col, row);
486 TRY_XAIE_API_EMIT_ERROR(bdOp, XAie_DmaWriteBd, devInst, &dmaTileBd, tileLoc,
487 bdId);
488 LLVM_DEBUG(llvm::dbgs() << "\nend configuring bds\n");
489 return success();
490};
491
493 Operation &op, int col, int row, int chNum, const DMAChannelDir &channelDir,
494 int bdId, int repeatCount) {
495 XAie_DmaDirection direction =
496 channelDir == DMAChannelDir::S2MM ? DMA_S2MM : DMA_MM2S;
497 auto tileLoc = XAie_TileLoc(col, row);
498 auto enTokenIssue = tileLoc.Row == 0 && direction == DMA_S2MM;
499 // in english repeat_count==0 means "do it once" and don't repeat but
500 // libxaie treats repeat_count=1 as do it once.
501 repeatCount += 1;
502 TRY_XAIE_API_EMIT_ERROR(op, XAie_DmaChannelSetStartQueue, &aiert->devInst,
503 tileLoc, chNum, direction, bdId, repeatCount,
504 enTokenIssue);
505 TRY_XAIE_API_EMIT_ERROR(op, XAie_DmaChannelEnable, &aiert->devInst, tileLoc,
506 chNum, direction);
507 return success();
508};
509
511 int col, int row) {
512 DMABDOp bd = *block.getOps<DMABDOp>().begin();
513 assert(bd.getBdId().has_value() &&
514 "DMABDOp must have assigned bd_id; did you forget to run "
515 "aie-assign-bd-ids?");
516 XAie_DmaDesc dmaTileBd;
517 auto tileLoc = XAie_TileLoc(col, row);
518 TRY_XAIE_API_EMIT_ERROR(bd, XAie_DmaDescInit, &aiert->devInst, &dmaTileBd,
519 tileLoc);
520 if (!block.getOps<UseLockOp>().empty() &&
521 failed(configureLocksInBdBlock(targetModel, dmaTileBd, block, col, row)))
522 return failure();
523 if (!block.getOps<DMABDOp>().empty() &&
524 failed(configureBdInBlock(targetModel, &aiert->devInst, dmaTileBd, block,
525 col, row, bd.getBdId().value(),
526 bd.getNextBdId())))
527 return failure();
528 return success();
529}
530
531LogicalResult xilinx::AIE::AIERTControl::initLocks(DeviceOp &targetOp) {
532 for (auto tileOp : targetOp.getOps<TileOp>()) {
533 auto tileLoc = XAie_TileLoc(tileOp.colIndex(), tileOp.rowIndex());
534 if (!tileOp.isShimTile() && tileOp.getCoreOp()) {
535 TRY_XAIE_API_EMIT_ERROR(tileOp, XAie_CoreReset, &aiert->devInst, tileLoc);
536 TRY_XAIE_API_EMIT_ERROR(tileOp, XAie_CoreUnreset, &aiert->devInst,
537 tileLoc);
538 // Set locks to zero
539 for (uint8_t l = 0; l < NUM_LOCKS; l++) {
540 auto locInit = XAie_LockInit(l, 0);
541 TRY_XAIE_API_EMIT_ERROR(tileOp, XAie_LockSetValue, &aiert->devInst,
542 tileLoc, locInit);
543 }
544 }
545 }
546
547 // Set locks with explicit initializers
548 targetOp.walk<WalkOrder::PreOrder>([&](LockOp lockOp) {
549 if (lockOp.getLockID() && lockOp.getInit()) {
550 auto tileLoc = XAie_TileLoc(lockOp.getTileOp().colIndex(),
551 lockOp.getTileOp().rowIndex());
552 auto locInit = XAie_LockInit(*lockOp.getLockID(), *lockOp.getInit());
553 TRY_XAIE_API_FATAL_ERROR(XAie_LockSetValue, &aiert->devInst, tileLoc,
554 locInit);
555 } else
556 LLVM_DEBUG(llvm::dbgs()
557 << "lock op missing either id or init" << lockOp << "\n");
558 });
559 return success();
560}
561
562LogicalResult xilinx::AIE::AIERTControl::initBuffers(DeviceOp &targetOp) {
563 // Set buffers with explicit initializers
564 targetOp.walk<WalkOrder::PreOrder>([&](BufferOp bufferOp) {
565 auto initialValue = bufferOp.getInitialValue();
566 if (!initialValue)
567 return;
568 mlir::DenseElementsAttr denseInit =
569 dyn_cast<mlir::DenseElementsAttr>(initialValue.value());
570 if (!denseInit)
571 return;
572 auto tileLoc = XAie_TileLoc(bufferOp.getTileOp().colIndex(),
573 bufferOp.getTileOp().rowIndex());
574 std::vector<char> byteVec;
575 if (denseInit.getElementType().isIntOrIndex()) {
576 for (auto intVal : denseInit.getValues<APInt>()) {
577 // Get the size in bytes
578 size_t byteSize = (intVal.getBitWidth() + 7) / 8;
579 // Create a buffer for the integer bytes and copy
580 std::vector<char> bytes(byteSize);
581 std::copy(
582 static_cast<const char *>(static_cast<const void *>(&intVal)),
583 static_cast<const char *>(static_cast<const void *>(&intVal)) +
584 byteSize,
585 bytes.begin());
586 byteVec.insert(byteVec.end(), bytes.begin(), bytes.end());
587 }
588 } else if (isa<FloatType>(denseInit.getElementType())) {
589 for (auto floatVal : denseInit.getValues<APFloat>()) {
590 APInt floatInt = floatVal.bitcastToAPInt();
591 // Get the size in bytes
592 size_t byteSize = (floatInt.getBitWidth() + 7) / 8;
593 // Create a buffer for the float bytes and copy
594 std::vector<char> bytes(byteSize);
595 std::copy(
596 static_cast<const char *>(static_cast<const void *>(&floatInt)),
597 static_cast<const char *>(static_cast<const void *>(&floatInt)) +
598 byteSize,
599 bytes.begin());
600 byteVec.insert(byteVec.end(), bytes.begin(), bytes.end());
601 }
602 } else {
603 llvm::outs() << "buffer op type not supported for initialization "
604 << bufferOp << "\n";
605 return;
606 }
607 TRY_XAIE_API_FATAL_ERROR(XAie_DataMemBlockWrite, &aiert->devInst, tileLoc,
608 bufferOp.getAddress().value(), byteVec.data(),
609 byteVec.size());
610 });
611 return success();
612}
613
614LogicalResult xilinx::AIE::AIERTControl::configureSwitches(DeviceOp &targetOp) {
615
616 // StreamSwitch (switchbox) configuration
617 for (auto switchboxOp : targetOp.getOps<SwitchboxOp>()) {
618 int32_t col = switchboxOp.colIndex();
619 int32_t row = switchboxOp.rowIndex();
620 XAie_LocType tileLoc = XAie_TileLoc(col, row);
621 assert(targetModel.hasProperty(AIETargetModel::IsNPU) &&
622 "Only NPU currently supported");
623
624 Block &b = switchboxOp.getConnections().front();
625 for (auto connectOp : b.getOps<ConnectOp>())
627 switchboxOp, XAie_StrmConnCctEnable, &aiert->devInst, tileLoc,
628 WIRE_BUNDLE_TO_STRM_SW_PORT_TYPE.at(connectOp.getSourceBundle()),
629 connectOp.sourceIndex(),
630 WIRE_BUNDLE_TO_STRM_SW_PORT_TYPE.at(connectOp.getDestBundle()),
631 connectOp.destIndex());
632
633 for (auto masterSetOp : b.getOps<MasterSetOp>()) {
634 int mask = 0;
635 int arbiter = -1;
636
637 for (auto val : masterSetOp.getAmsels()) {
638 AMSelOp amsel = cast<AMSelOp>(val.getDefiningOp());
639 arbiter = amsel.arbiterIndex();
640 int msel = amsel.getMselValue();
641 mask |= (1 << msel);
642 }
643
644 // the default is to keep header
645 bool keepHeader = true;
646 // the default for dma destinations is to drop the header
647 if (masterSetOp.getDestBundle() == WireBundle::DMA)
648 keepHeader = false;
649 // assume a connection going south from row zero gets wired to shimdma
650 // by a shimmux.
651 if (switchboxOp.rowIndex() == 0 &&
652 masterSetOp.getDestBundle() == WireBundle::South)
653 keepHeader = false;
654
655 // "keep_pkt_header" attribute overrides the above defaults, if set
656 if (auto keep = masterSetOp.getKeepPktHeader())
657 keepHeader = *keep;
658
659 auto dropHeader =
660 keepHeader ? XAIE_SS_PKT_DONOT_DROP_HEADER : XAIE_SS_PKT_DROP_HEADER;
662 masterSetOp, XAie_StrmPktSwMstrPortEnable, &aiert->devInst, tileLoc,
663 WIRE_BUNDLE_TO_STRM_SW_PORT_TYPE.at(masterSetOp.getDestBundle()),
664 masterSetOp.destIndex(), dropHeader, arbiter, mask);
665 }
666
667 for (auto packetRulesOp : b.getOps<PacketRulesOp>()) {
668 int slot = 0;
669 Block &block = packetRulesOp.getRules().front();
670 for (auto slotOp : block.getOps<PacketRuleOp>()) {
671 AMSelOp amselOp = cast<AMSelOp>(slotOp.getAmsel().getDefiningOp());
672 int arbiter = amselOp.arbiterIndex();
673 int msel = amselOp.getMselValue();
674 TRY_XAIE_API_EMIT_ERROR(packetRulesOp, XAie_StrmPktSwSlavePortEnable,
675 &aiert->devInst, tileLoc,
676 WIRE_BUNDLE_TO_STRM_SW_PORT_TYPE.at(
677 packetRulesOp.getSourceBundle()),
678 packetRulesOp.sourceIndex());
679 auto packetInit = XAie_PacketInit(slotOp.valueInt(), /*PktType*/ 0);
680 // TODO Need to better define packet id,type used here
681 TRY_XAIE_API_EMIT_ERROR(packetRulesOp, XAie_StrmPktSwSlaveSlotEnable,
682 &aiert->devInst, tileLoc,
683 WIRE_BUNDLE_TO_STRM_SW_PORT_TYPE.at(
684 packetRulesOp.getSourceBundle()),
685 packetRulesOp.sourceIndex(), slot, packetInit,
686 slotOp.maskInt(), msel, arbiter);
687 slot++;
688 }
689 }
690 }
691
692 for (auto muxOp : targetOp.getOps<ShimMuxOp>()) {
693 // NOTE ShimMux always connects from the south as directions are
694 // defined relative to the tile stream switch.
695 auto tileLoc =
696 XAie_TileLoc(muxOp.getTileOp().getCol(), muxOp.getTileOp().getRow());
697 Block &b = muxOp.getConnections().front();
698 for (auto connectOp : b.getOps<ConnectOp>()) {
699 // demux!
700 if (connectOp.getSourceBundle() == WireBundle::North)
701 TRY_XAIE_API_EMIT_ERROR(muxOp, XAie_EnableAieToShimDmaStrmPort,
702 &aiert->devInst, tileLoc,
703 connectOp.sourceIndex());
704 // mux
705 if (connectOp.getDestBundle() == WireBundle::North)
706 TRY_XAIE_API_EMIT_ERROR(muxOp, XAie_EnableShimDmaToAieStrmPort,
707 &aiert->devInst, tileLoc,
708 connectOp.destIndex());
709 }
710 }
711
712 for (auto switchboxOp : targetOp.getOps<ShimSwitchboxOp>()) {
713 Block &b = switchboxOp.getConnections().front();
714 auto tileLoc = XAie_TileLoc(switchboxOp.getCol(), 0);
715 for (auto connectOp : b.getOps<ConnectOp>())
717 switchboxOp, XAie_StrmConnCctEnable, &aiert->devInst, tileLoc,
718 WIRE_BUNDLE_TO_STRM_SW_PORT_TYPE.at(connectOp.getSourceBundle()),
719 connectOp.sourceIndex(),
720 WIRE_BUNDLE_TO_STRM_SW_PORT_TYPE.at(connectOp.getDestBundle()),
721 connectOp.destIndex());
722 }
723
724 // Cascade configuration
725 if (isa<AIE2TargetModel>(targetModel)) {
726 for (auto configOp : targetOp.getOps<ConfigureCascadeOp>()) {
727 TileOp tile = cast<TileOp>(configOp.getTile().getDefiningOp());
728 auto tileLoc = XAie_TileLoc(tile.getCol(), tile.getRow());
730 targetOp, XAie_CoreConfigAccumulatorControl, &aiert->devInst, tileLoc,
731 WIRE_BUNDLE_TO_STRM_SW_PORT_TYPE.at(
732 static_cast<WireBundle>(configOp.getInputDir())),
733 WIRE_BUNDLE_TO_STRM_SW_PORT_TYPE.at(
734 static_cast<WireBundle>(configOp.getOutputDir())));
735 }
736 }
737
738 return success();
739}
740
741LogicalResult xilinx::AIE::AIERTControl::addInitConfig(DeviceOp &targetOp) {
742
743 if (failed(initLocks(targetOp))) {
744 return failure();
745 }
746
747 if (failed(initBuffers(targetOp))) {
748 return failure();
749 }
750
751 auto memOps = llvm::to_vector_of<TileElement>(targetOp.getOps<MemOp>());
752 llvm::append_range(memOps, targetOp.getOps<MemTileDMAOp>());
753 llvm::append_range(memOps, targetOp.getOps<ShimDMAOp>());
754 for (TileElement memOp : memOps) {
755 int col = memOp.getTileID().col;
756 int row = memOp.getTileID().row;
757
758 // Get the region's entry block, then start traversing through the chain of
759 // blocks.
760 llvm::SetVector<Block *> blockVector =
761 getOrderedChainOfBlocks(&memOp.getOperation()->getRegion(0));
762
763 // handle DMA ops separately
764 auto dmaOps = llvm::to_vector_of<DMAOp>(
765 memOp.getOperation()->getRegion(0).getOps<DMAOp>());
766 if (!dmaOps.empty()) {
767 for (auto dmaOp : dmaOps)
768 for (auto &bdRegion : dmaOp.getBds()) {
769 Block &block = bdRegion.getBlocks().front();
770 if (failed(configureLocksAndBd(block, col, row)))
771 return failure();
772 }
773 } else {
774 for (Block *block : blockVector) {
775 if (block->getOps<DMABDOp>().empty())
776 continue;
777 if (failed(configureLocksAndBd(*block, col, row)))
778 return failure();
779 }
780 }
781
782 if (!dmaOps.empty())
783 for (auto dmaOp : dmaOps) {
784 auto &block = dmaOp.getBds().front().getBlocks().front();
785 DMABDOp bd = *block.getOps<DMABDOp>().begin();
786 if (failed(pushToBdQueueAndEnable(
787 *dmaOp.getOperation(), col, row, dmaOp.getChannelIndex(),
788 dmaOp.getChannelDir(), bd.getBdId().value(),
789 dmaOp.getRepeatCount())))
790 return failure();
791 }
792 else
793 for (Block *block : blockVector) {
794 for (auto op : block->getOps<DMAStartOp>()) {
795 DMABDOp bd = *op.getDest()->getOps<DMABDOp>().begin();
796 int chNum = op.getChannelIndex();
797 auto channelDir = op.getChannelDir();
798 if (failed(pushToBdQueueAndEnable(*bd.getOperation(), col, row, chNum,
799 channelDir, bd.getBdId().value(),
800 op.getRepeatCount())))
801 return failure();
802 }
803 }
804 }
805
806 if (failed(configureSwitches(targetOp))) {
807 return failure();
808 }
809
810 return success();
811}
812
813LogicalResult xilinx::AIE::AIERTControl::addCoreEnable(DeviceOp &targetOp) {
814 // Start execution of all the cores.
815 for (auto tileOp : targetOp.getOps<TileOp>()) {
816 auto tileLoc = XAie_TileLoc(tileOp.colIndex(), tileOp.rowIndex());
817 if (!tileOp.isShimTile() && tileOp.getCoreOp())
818 TRY_XAIE_API_EMIT_ERROR(targetOp, XAie_CoreEnable, &aiert->devInst,
819 tileLoc);
820 }
821 return success();
822}
823
824LogicalResult xilinx::AIE::AIERTControl::addAieElf(uint8_t col, uint8_t row,
825 const StringRef elfPath,
826 bool aieSim) {
827 TRY_XAIE_API_LOGICAL_RESULT(XAie_CoreDisable, &aiert->devInst,
828 XAie_TileLoc(col, row));
829 TRY_XAIE_API_LOGICAL_RESULT(XAie_DmaChannelResetAll, &aiert->devInst,
830 XAie_TileLoc(col, row),
831 XAie_DmaChReset::DMA_CHANNEL_RESET);
832
833 // loadSym: Load symbols from .map file. This argument is not used when
834 // __AIESIM__ is not defined.
835 TRY_XAIE_API_LOGICAL_RESULT(XAie_LoadElf, &aiert->devInst,
836 XAie_TileLoc(col, row), elfPath.str().c_str(),
837 /*loadSym*/ aieSim);
838
839 TRY_XAIE_API_LOGICAL_RESULT(XAie_DmaChannelResetAll, &aiert->devInst,
840 XAie_TileLoc(col, row),
841 XAie_DmaChReset::DMA_CHANNEL_UNRESET);
842
843 return success();
844}
845
846LogicalResult xilinx::AIE::AIERTControl::addAieElfs(DeviceOp &targetOp,
847 const StringRef elfPath,
848 bool aieSim) {
849 for (auto tileOp : targetOp.getOps<TileOp>())
850 if (tileOp.isShimNOCorPLTile()) {
851 // Resets no needed with V2 kernel driver
852 } else {
853 int col = tileOp.colIndex();
854 int row = tileOp.rowIndex();
855 if (auto coreOp = tileOp.getCoreOp()) {
856 std::string fileName;
857 if (auto fileAttr = coreOp.getElfFile())
858 fileName = fileAttr->str();
859 else
860 fileName = (llvm::Twine("core_") + std::to_string(col) + "_" +
861 std::to_string(row) + ".elf")
862 .str();
863 auto ps = std::filesystem::path::preferred_separator;
864 if (failed(addAieElf(
865 col, row,
866 (llvm::Twine(elfPath) + std::string(1, ps) + fileName).str(),
867 aieSim)))
868 return failure();
869 }
870 }
871 return success();
872}
873
875 size_t bdId) {
876 auto tileLoc = XAie_TileLoc(col, row);
877 TRY_XAIE_API_FATAL_ERROR(XAie_DmaUpdateBdAddr, &aiert->devInst, tileLoc, addr,
878 bdId);
879}
880
882 TRY_XAIE_API_FATAL_ERROR(XAie_StartTransaction, &aiert->devInst,
883 XAIE_TRANSACTION_DISABLE_AUTO_FLUSH);
884}
885
887 // Export the transactions to a binary buffer
888 uint8_t *txn_ptr = XAie_ExportSerializedTransaction(&aiert->devInst, 0, 0);
889 XAie_TxnHeader *hdr = (XAie_TxnHeader *)txn_ptr;
890 std::vector<uint8_t> txn_data(txn_ptr, txn_ptr + hdr->TxnSize);
891 return txn_data;
892}
#define XAIE_SHIM_ROW
Definition AIERT.cpp:188
llvm::raw_ostream & showAIEXRTArgs(llvm::raw_ostream &out, const char *label, H1 &&value)
Definition AIERT.cpp:82
LogicalResult configureLocksInBdBlock(const AIE::AIETargetModel &targetModel, XAie_DmaDesc &dmaTileBd, Block &block, int col, int row)
Definition AIERT.cpp:273
#define XAIE_PARTITION_BASE_ADDR
Definition AIERT.cpp:190
#define TRY_XAIE_API_LOGICAL_RESULT(API,...)
Definition AIERT.cpp:127
#define NPI_ADDR
Definition AIERT.cpp:192
#define NUM_LOCKS
Definition AIERT.cpp:193
#define TRY_XAIE_API_FATAL_ERROR(API,...)
Definition AIERT.cpp:108
llvm::raw_ostream & operator<<(llvm::raw_ostream &os, const XAie_LocType &loc)
Definition AIERT.cpp:168
#define XAIE_BASE_ADDR
Definition AIERT.cpp:187
#define TRY_XAIE_API_EMIT_ERROR(OP, API,...)
Definition AIERT.cpp:118
#define XAIE_MEM_TILE_ROW_START
Definition AIERT.cpp:189
#define AIERC_STR(x)
Definition AIERT.cpp:32
LogicalResult configureBdInBlock(const AIE::AIETargetModel &targetModel, XAie_DevInst *devInst, XAie_DmaDesc &dmaTileBd, Block &block, int col, int row, int bdId, std::optional< int > nextBdId)
Definition AIERT.cpp:326
std::optional< uint32_t > getMemLocalBaseAddress(int localCol, int localRow, int memCol, int memRow) const
Return the memory base address (or offset) in the local tile when accessing a neighbor's memory or an...
virtual AIEArch getTargetArch() const =0
Return the target architecture.
std::optional< uint32_t > getLockLocalBaseIndex(int localCol, int localRow, int lockCol, int lockRow) const
Return the lock base index (or offset) in the local tile when accessing a neighbor's lock or an empty...
virtual bool isMemTile(int col, int row) const =0
Return true if the given tile is an AIE2 'Memory' tile.
virtual int rows() const =0
Return the number of rows in the device.
virtual bool isShimNOCTile(int col, int row) const =0
Return true if the given tile is a Shim NOC tile.
virtual uint32_t getColumnShift() const =0
bool hasProperty(ModelProperty Prop) const
virtual int columns() const =0
Return the number of columns in the device.
virtual uint32_t getNumMemTileRows() const =0
virtual uint32_t getRowShift() const =0
uint32_t getShimBurstLengthBytes(const AIE::AIETargetModel &tm, uint32_t burstLength)
llvm::SetVector< mlir::Block * > getOrderedChainOfBlocks(mlir::Region *region)
mlir::LogicalResult pushToBdQueueAndEnable(mlir::Operation &op, int col, int row, int chNum, const DMAChannelDir &channelDir, int bdId, int repeatCount)
Definition AIERT.cpp:492
void dmaUpdateBdAddr(int col, int row, size_t addr, size_t bdId)
Definition AIERT.cpp:874
mlir::LogicalResult addAieElfs(DeviceOp &targetOp, const mlir::StringRef workDirPath, bool aieSim)
Definition AIERT.cpp:846
mlir::LogicalResult setIOBackend(bool aieSim, bool xaieDebug)
Definition AIERT.cpp:255
std::vector< uint8_t > exportSerializedTransaction()
Definition AIERT.cpp:886
AIERTControl(const xilinx::AIE::AIETargetModel &tm)
Definition AIERT.cpp:204
mlir::LogicalResult initLocks(DeviceOp &targetOp)
Definition AIERT.cpp:531
mlir::LogicalResult configureSwitches(DeviceOp &targetOp)
Definition AIERT.cpp:614
mlir::LogicalResult initBuffers(DeviceOp &targetOp)
Definition AIERT.cpp:562
mlir::LogicalResult addCoreEnable(DeviceOp &targetOp)
Definition AIERT.cpp:813
mlir::LogicalResult addAieElf(uint8_t col, uint8_t row, const mlir::StringRef elfPath, bool aieSim)
Definition AIERT.cpp:824
mlir::LogicalResult configureLocksAndBd(mlir::Block &block, int col, int row)
Definition AIERT.cpp:510
mlir::LogicalResult addInitConfig(DeviceOp &targetOp)
Definition AIERT.cpp:741