MLIR-AIE
AIEObjectFifoStatefulTransform.cpp
Go to the documentation of this file.
1//===- AIEObjectFifoStatefulTransform.cpp ----------------------*- MLIR -*-===//
2//
3// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7// (c) Copyright 2021 Xilinx Inc.
8//
9// Date: October 18th 2021
10//
11//===----------------------------------------------------------------------===//
12
15
16#include "mlir/Analysis/TopologicalSortUtils.h"
17#include "mlir/Dialect/Arith/IR/Arith.h"
18#include "mlir/Dialect/MemRef/IR/MemRef.h"
19#include "mlir/Dialect/SCF/IR/SCF.h"
20#include "mlir/Dialect/SCF/Utils/Utils.h"
21#include "mlir/IR/Attributes.h"
22#include "mlir/Pass/Pass.h"
23#include "mlir/Transforms/DialectConversion.h"
24
25#include "mlir/IR/Operation.h"
26#include "mlir/Interfaces/DataLayoutInterfaces.h"
27
28#include <numeric>
29#include <set>
30
31#include <iostream>
32
33namespace xilinx::AIE {
34#define GEN_PASS_DEF_AIEOBJECTFIFOSTATEFULTRANSFORM
35#include "aie/Dialect/AIE/Transforms/AIEPasses.h.inc"
36} // namespace xilinx::AIE
37
38using namespace mlir;
39using namespace xilinx;
40using namespace xilinx::AIE;
41
42#define DEBUG_TYPE "aie-objectFifo-stateful-transform"
43
44#define LOOP_VAR_DEPENDENCY (-2)
45
46//===----------------------------------------------------------------------===//
47// Lock Analysis
48//===----------------------------------------------------------------------===//
50 DenseMap<std::pair<Value, int>, int> locksPerTile;
51
52public:
53 LockAnalysis(DeviceOp &device) {
54 // go over the locks created for each tile and update the index in
55 // locksPerTile
56 device.walk([&](LockOp lockOp) {
57 auto tile = lockOp.getTile();
58 auto lockID = lockOp.getLockIDValue();
59 locksPerTile[{tile, lockID}] = 1;
60 });
61 }
62
63 /// Given a tile, returns next usable lockID for that tile.
64 int getLockID(TileOp &tileOp) {
65 const auto &targetModel = getTargetModel(tileOp);
66 for (unsigned i = 0;
67 i < targetModel.getNumLocks(tileOp.getCol(), tileOp.getRow()); i++)
68 if (int usageCnt = locksPerTile[{tileOp, i}]; usageCnt == 0) {
69 locksPerTile[{tileOp, i}] = 1;
70 return i;
71 }
72 return -1;
73 }
74};
75
76//===----------------------------------------------------------------------===//
77// DMA Channel Analysis
78//===----------------------------------------------------------------------===//
80 DenseMap<std::tuple<Value, DMAChannelDir, int>, int> channelsPerTile;
81 DenseMap<std::tuple<Value, DMAChannelDir, int>, int> aieStreamsPerTile;
82
83public:
84 DMAChannelAnalysis(DeviceOp &device) {
85 // go over the channels used for each tile and update channel map
86 for (auto memOp : device.getOps<MemOp>()) {
87 Region &r = memOp.getBody();
88 for (auto &bl : r.getBlocks()) {
89 for (auto op : bl.getOps<DMAStartOp>()) {
90 channelsPerTile[{memOp.getTile(), op.getChannelDir(),
91 op.getChannelIndex()}] = 1;
92 }
93 }
94 }
95 for (auto memOp : device.getOps<MemTileDMAOp>()) {
96 Region &r = memOp.getBody();
97 for (auto &bl : r.getBlocks()) {
98 for (auto op : bl.getOps<DMAStartOp>()) {
99 channelsPerTile[{memOp.getTile(), op.getChannelDir(),
100 op.getChannelIndex()}] = 1;
101 }
102 }
103 }
104 for (auto memOp : device.getOps<ShimDMAOp>()) {
105 Region &r = memOp.getBody();
106 for (auto &bl : r.getBlocks()) {
107 for (auto op : bl.getOps<DMAStartOp>()) {
108 channelsPerTile[{memOp.getTile(), op.getChannelDir(),
109 op.getChannelIndex()}] = 1;
110 }
111 }
112 }
113 for (auto flowOp : device.getOps<FlowOp>()) {
114 if (flowOp.getSourceBundle() == WireBundle::Core)
115 aieStreamsPerTile[{flowOp.getSource(), DMAChannelDir::MM2S,
116 flowOp.getSourceChannel()}] = 1;
117 if (flowOp.getDestBundle() == WireBundle::Core)
118 aieStreamsPerTile[{flowOp.getDest(), DMAChannelDir::S2MM,
119 flowOp.getDestChannel()}] = 1;
120 }
121 }
122
123 /// Given a tile and DMAChannelDir, returns next usable channel index for
124 /// that tile.
125 int getDMAChannelIndex(TileOp tileOp, DMAChannelDir dir,
126 bool requiresAdjacentTileAccessChannels) {
127 int maxChannelNum = 0;
128 if (dir == DMAChannelDir::MM2S)
129 maxChannelNum = tileOp.getNumSourceConnections(WireBundle::DMA);
130 else
131 maxChannelNum = tileOp.getNumDestConnections(WireBundle::DMA);
132
133 const auto &targetModel = getTargetModel(tileOp);
134 int maxChannelNumForAdjacentTile =
135 targetModel.getMaxChannelNumForAdjacentMemTile(tileOp.getCol(),
136 tileOp.getRow());
137
138 // if requires adjacent tile access channels, only allocate on channel 0-3,
139 // and if cannot, return 0
140 if (requiresAdjacentTileAccessChannels) {
141 maxChannelNum = std::min(maxChannelNum, maxChannelNumForAdjacentTile);
142 }
143
144 for (int i = 0; i < maxChannelNum; i++) {
145 if (int usageCnt = channelsPerTile[{tileOp.getResult(), dir, i}];
146 usageCnt == 0) {
147 channelsPerTile[{tileOp.getResult(), dir, i}] = 1;
148 return i;
149 }
150 }
151 return -1;
152 }
153
154 /// Given a tile and DMAChannel, adds entry to aieStreamsPerTile or
155 /// throws an error if the stream is already used.
156 void checkAIEStreamIndex(TileOp tileOp, DMAChannel chan) {
157 if (aieStreamsPerTile.find({tileOp.getResult(), chan.direction,
158 chan.channel}) == aieStreamsPerTile.end()) {
159 aieStreamsPerTile[{tileOp.getResult(), chan.direction, chan.channel}] = 1;
160 } else {
161 if (chan.direction == DMAChannelDir::MM2S)
162 tileOp.emitOpError("number of output Core channels exceeded!");
163 else
164 tileOp.emitOpError("number of input Core channels exceeded!");
165 }
166 }
167};
168
169//===----------------------------------------------------------------------===//
170// Create objectFifos Pass
171//===----------------------------------------------------------------------===//
172
173/// Struct to hold per-device state for the objectFifo transformation.
174/// This is passed to helper functions to avoid member variable pollution
175/// between different device operations.
177 DenseMap<ObjectFifoCreateOp, std::vector<BufferOp>>
178 buffersPerFifo; // maps each objFifo to its corresponding buffer
179 DenseMap<ObjectFifoCreateOp, std::vector<ExternalBufferOp>>
180 externalBuffersPerFifo; // maps each objFifo to its corresponding
181 // external buffers
182 DenseMap<ObjectFifoCreateOp, std::vector<LockOp>>
183 locksPerFifo; // maps each objFifo to its corresponding locks
184 std::vector<std::pair<ObjectFifoCreateOp, std::vector<ObjectFifoCreateOp>>>
185 splitFifos; // maps each objFifo between non-adjacent tiles to its
186 // corresponding consumer objectFifos
187 DenseMap<ObjectFifoLinkOp, ObjectFifoCreateOp>
188 objFifoLinks; // maps each ObjectFifoLinkOp to objFifo whose elements
189 // have been created and should be used
190 std::vector<ObjectFifoCreateOp>
191 splitBecauseLink; // objfifos which have been split because they are
192 // part of a Link, not because they didn't have a shared
193 // memory module
194};
195
198 AIEObjectFifoStatefulTransformPass> {
199
200 /// Function that returns true if two tiles in the AIE array share a memory
201 /// module. share_direction is equal to:
202 /// * 2 if the memory modules on both tiles can be shared,
203 /// * -1 if the shared memory module is that of the first input tile,
204 /// * 1 if it is that of the second input tile,
205 /// * 0 is no memory module is shared.
206 bool isSharedMemory(TileOp a, TileOp b, int *share_direction) {
207 const auto &targetModel = getTargetModel(a.getOperation());
208
209 if ((a.isShimTile() && !b.isShimTile()) ||
210 (!a.isShimTile() && b.isShimTile())) {
211 *share_direction = 0;
212 return false;
213 }
214 if ((targetModel.isMemTile(a.getCol(), a.getRow()) &&
215 !targetModel.isMemTile(b.getCol(), b.getRow())) ||
216 (!targetModel.isMemTile(a.getCol(), a.getRow()) &&
217 targetModel.isMemTile(b.getCol(), b.getRow()))) {
218 *share_direction = 0;
219 return false;
220 }
221 bool rightShared = targetModel.isLegalMemAffinity(
222 a.colIndex(), a.rowIndex(), b.colIndex(), b.rowIndex());
223
224 bool leftShared = targetModel.isLegalMemAffinity(
225 b.colIndex(), b.rowIndex(), a.colIndex(), a.rowIndex());
226
227 if (leftShared && rightShared)
228 *share_direction = 2;
229 else if (leftShared)
230 *share_direction = -1;
231 else if (rightShared)
232 *share_direction = 1;
233 else
234 *share_direction = 0;
235
236 return leftShared || rightShared;
237 }
238
239 /// Function to retrieve ObjectFifoAllocateOp of ObjectFifoCreateOp,
240 /// if it exists.
241 std::optional<ObjectFifoAllocateOp>
242 getOptionalAllocateOp(ObjectFifoCreateOp op) {
243 ObjectFifoAllocateOp allocOp;
244 auto device = op->getParentOfType<DeviceOp>();
245 bool foundAlloc = false;
246 for (ObjectFifoAllocateOp alloc : device.getOps<ObjectFifoAllocateOp>()) {
247 if (alloc.getObjectFifo() == op) {
248 if (foundAlloc)
249 op.emitOpError("has more than one allocate operation");
250 allocOp = alloc;
251 foundAlloc = true;
252 }
253 }
254 if (foundAlloc)
255 return {allocOp};
256 return {};
257 }
258
259 // Return true if the objectFifo created by createOp requires a DMA to be set
260 // up. This is the case if the tiles are not adjacent (no shared memory), if
261 // the objectFifo broadcasts to multiple tiles, if one of the consumers or
262 // the producer wants to use the multi-dimensional address generation
263 // features of the DMA, if the objectFifo is part of a LinkOp, or if the
264 // via_DMA or repeatCount attributes of the objectFifo are set.
265 bool requiresDMAs(ObjectFifoCreateOp createOp, int &share_direction,
266 ObjectFifoState &state) {
267 bool hasSharedMemory = false;
268 bool atLeastOneConsumerWantsTransform = false;
269 bool isUsedInLinkOp = false;
270
271 if (createOp.getVia_DMA())
272 return true;
273
274 if (createOp.getRepeatCount().has_value())
275 return true;
276
277 if (createOp.getAieStream())
278 return true;
279
280 if (createOp.getConsumerTiles().size() == 1 &&
281 createOp.getDimensionsToStream().empty()) {
282
283 // Test for shared memory
284 for (auto consumerTile : createOp.getConsumerTiles()) {
285 if (auto consumerTileOp =
286 dyn_cast<TileOp>(consumerTile.getDefiningOp())) {
287 if (std::count(state.splitBecauseLink.begin(),
288 state.splitBecauseLink.end(), createOp))
289 hasSharedMemory =
290 isSharedMemory(createOp.getProducerTileOp(),
291 createOp.getProducerTileOp(), &share_direction);
292 else
293 hasSharedMemory = isSharedMemory(createOp.getProducerTileOp(),
294 consumerTileOp, &share_direction);
295 }
296 }
297 }
298
299 // Only test for use of data layout transformations if we are in the shared
300 // memory case; otherwise, we will return `true` in any case.
301 if (hasSharedMemory) {
302 // Even if just one of the consumers in the list of consumers wants to
303 // perform a memory transform, we need to use DMAs.
304 for (BDDimLayoutArrayAttr dims :
305 createOp.getDimensionsFromStreamPerConsumer())
306 if (!dims.empty()) {
307 atLeastOneConsumerWantsTransform = true;
308 break;
309 }
310 }
311
312 // Check if the objectfifo operation can use shared memory for linking. If
313 // the link operation is a distribute or a join operation, or if the link
314 // has different memref types, DMAs are required even if shared memory is
315 // available and the objectfifo should be split. Otherwise also check if the
316 // via_shared_memory attribute of the objectfifo operation is set and try to
317 // apply it.
318 if (hasSharedMemory) {
319 if (auto linkOp = getOptionalLinkOp(createOp)) {
320 isUsedInLinkOp = true;
321 if (!linkOp->isDistribute() && !linkOp->isJoin()) {
322 auto fifoInType = llvm::cast<AIEObjectFifoType>(
323 linkOp->getInputObjectFifos()[0].getElemType());
324 auto producerType =
325 llvm::cast<MemRefType>(fifoInType.getElementType());
326 auto fifoOutType = llvm::cast<AIEObjectFifoType>(
327 linkOp->getOutputObjectFifos()[0].getElemType());
328 auto consumerType =
329 llvm::cast<MemRefType>(fifoOutType.getElementType());
330 if (consumerType != producerType) {
331 // TODO: Support for different memref types through shared
332 // memory without DMAs
333 state.splitBecauseLink.push_back(createOp);
334 }
335 std::optional<ObjectFifoAllocateOp> opAlloc =
336 getOptionalAllocateOp(createOp);
337 if (opAlloc.has_value()) {
338 TileOp delegate = opAlloc->getDelegateTileOp();
339 int prodShareDir;
340 int consShareDir;
341 auto consumerTileOp = dyn_cast<TileOp>(
342 createOp.getConsumerTiles()[0].getDefiningOp());
343 isSharedMemory(delegate, createOp.getProducerTileOp(),
344 &prodShareDir);
345 isSharedMemory(delegate, consumerTileOp, &consShareDir);
346 if ((prodShareDir == -1 || prodShareDir == 2) &&
347 (consShareDir == -1 || consShareDir == 2))
348 isUsedInLinkOp = false;
349 else
350 state.splitBecauseLink.push_back(createOp);
351 }
352 } else {
353 state.splitBecauseLink.push_back(createOp);
354 }
355 }
356 }
357
358 return !hasSharedMemory || atLeastOneConsumerWantsTransform ||
359 isUsedInLinkOp;
360 }
361
362 /// Function to retrieve ObjectFifoLinkOp of ObjectFifoCreateOp,
363 /// if it belongs to one.
364 std::optional<ObjectFifoLinkOp> getOptionalLinkOp(ObjectFifoCreateOp op) {
365 auto device = op->getParentOfType<DeviceOp>();
366 for (ObjectFifoLinkOp linkOp : device.getOps<ObjectFifoLinkOp>()) {
367 for (ObjectFifoCreateOp in : linkOp.getInputObjectFifos())
368 if (in == op)
369 return {linkOp};
370 for (ObjectFifoCreateOp out : linkOp.getOutputObjectFifos())
371 if (out == op)
372 return {linkOp};
373 }
374 return {};
375 }
376
377 ObjectFifoCreateOp
378 createObjectFifo(OpBuilder &builder, AIEObjectFifoType datatype,
379 std::string name, Value prodTile, Value consTile,
380 Attribute depth, BDDimLayoutArrayAttr dimensionsToStream,
381 BDDimLayoutArrayArrayAttr dimensionsFromStreamPerConsumer) {
382 auto ofName = builder.getStringAttr(name);
383 auto fifo = ObjectFifoCreateOp::create(
384 builder, builder.getUnknownLoc(), ofName, prodTile, consTile, depth,
385 datatype, dimensionsToStream, dimensionsFromStreamPerConsumer);
386 return fifo;
387 }
388
389 /// Function used to create objectFifo locks based on target architecture.
390 /// Called by createObjectFifoElements().
391 std::vector<LockOp>
392 createObjectFifoLocks(OpBuilder &builder, LockAnalysis &lockAnalysis,
393 ObjectFifoCreateOp op, int numElem,
394 int joinDistribFactor, TileOp creation_tile,
395 int repeatCount, ObjectFifoState &state) {
396 std::vector<LockOp> locks;
397 if (op.getDisableSynchronization())
398 return locks;
399 auto dev = op->getParentOfType<DeviceOp>();
400 auto &target = dev.getTargetModel();
401 // if shimTile external buffers are collected from input code
402 // create as many locks as there are external buffers
403 if (creation_tile.isShimTile()) {
404 numElem = 0;
405 if (!state.externalBuffersPerFifo[op].empty())
406 numElem = state.externalBuffersPerFifo[op].size();
407 }
408 if (target.getTargetArch() == AIEArch::AIE1) {
409 for (int i = 0; i < numElem; i++) {
410 // create corresponding aie1 locks
411 int initValue = op.getInitValues().has_value() ? 1 : 0;
412 int lockID = lockAnalysis.getLockID(creation_tile);
413 assert(lockID >= 0 && "No more locks to allocate!");
414 auto lock = LockOp::create(builder, builder.getUnknownLoc(),
415 creation_tile, lockID, initValue);
416 lock.getOperation()->setAttr(SymbolTable::getSymbolAttrName(),
417 builder.getStringAttr(op.name().str() +
418 "_lock_" +
419 std::to_string(i)));
420 locks.push_back(lock);
421 }
422 } else {
423 // create corresponding aie2 locks
424 for (int i = 0; i < joinDistribFactor; i++) {
425 auto initValues = op.getInitValues().has_value()
426 ? op.getInitValues().value().size()
427 : 0;
428 int prodLockID = lockAnalysis.getLockID(creation_tile);
429 assert(prodLockID >= 0 && "No more locks to allocate!");
430 int prodLockValue = (numElem - initValues) * repeatCount;
431 auto prodLock =
432 LockOp::create(builder, builder.getUnknownLoc(), creation_tile,
433 prodLockID, prodLockValue);
434 prodLock.getOperation()->setAttr(
435 SymbolTable::getSymbolAttrName(),
436 builder.getStringAttr(op.name().str() + "_prod_lock_" +
437 std::to_string(i)));
438 locks.push_back(prodLock);
439
440 int consLockID = lockAnalysis.getLockID(creation_tile);
441 assert(consLockID >= 0 && "No more locks to allocate!");
442 int consLockValue = initValues * repeatCount;
443 auto consLock =
444 LockOp::create(builder, builder.getUnknownLoc(), creation_tile,
445 consLockID, consLockValue);
446 consLock.getOperation()->setAttr(
447 SymbolTable::getSymbolAttrName(),
448 builder.getStringAttr(op.name().str() + "_cons_lock_" +
449 std::to_string(i)));
450 locks.push_back(consLock);
451 }
452 }
453 return locks;
454 }
455
456 /// Function to calculate total memory usage on a specific tile
457 /// based on all buffers allocated to that tile from buffersPerFifo map
459 TileOp targetTile,
460 DenseMap<ObjectFifoCreateOp, std::vector<BufferOp>> &buffersPerFifo,
461 std::vector<BufferOp> &buffers) {
462 int totalUsedMemory = 0;
463
464 // Iterate through all ObjectFifos and their buffers
465 for (auto &[fifoOp, bufferList] : buffersPerFifo) {
466 for (auto &buffer : bufferList) {
467 // Check if this buffer is allocated on the target tile
468 if (buffer.getTile() == targetTile.getResult()) {
469 auto bufferSizeBytes = buffer.getAllocationSize();
470 totalUsedMemory += bufferSizeBytes;
471 }
472 }
473 }
474
475 // Also count buffers that are not in buffersPerFifo
476 for (auto &buffer : buffers) {
477 // Check if this buffer is allocated on the target tile
478 if (buffer.getTile() == targetTile.getResult()) {
479 auto bufferSizeBytes = buffer.getAllocationSize();
480 totalUsedMemory += bufferSizeBytes;
481 }
482 }
483
484 return totalUsedMemory;
485 }
486
487 /// Function to analyze cross-tile buffer allocations in splitFifos
488 /// Returns a simple map of (ObjectFifoCreateOp, bool) indicating cross-tile
489 /// issues
490 std::map<ObjectFifoCreateOp, bool>
492 std::map<ObjectFifoCreateOp, bool> crossTileMap;
493
494 for (size_t i = 0; i < state.splitFifos.size(); i++) {
495 auto &[producerFifo, consumerFifos] = state.splitFifos[i];
496
497 // Analyze producer buffers
498 bool producerHasCrossTile = false;
499
500 ObjectFifoCreateOp target = producerFifo;
501 auto linkOp = getOptionalLinkOp(producerFifo);
502
503 if (linkOp &&
504 state.objFifoLinks.find(*linkOp) != state.objFifoLinks.end()) {
505 target = state.objFifoLinks[*linkOp]; // Use the linked target FIFO
506 }
507
508 if (state.buffersPerFifo.find(target) != state.buffersPerFifo.end()) {
509 // For each FIFO (producer and consumer):
510 auto &producerBuffers = state.buffersPerFifo[target];
511 TileOp expectedTile = target.getProducerTileOp();
512 for (auto &buffer : producerBuffers) {
513 TileOp bufferTile = buffer.getTile().getDefiningOp<TileOp>();
514 if (bufferTile != expectedTile) {
515 producerHasCrossTile = true;
516 break;
517 }
518 }
519 }
520 crossTileMap[producerFifo] = producerHasCrossTile;
521
522 // Analyze consumer buffers
523 for (auto &consumerFifo : consumerFifos) {
524 bool consumerHasCrossTile = false;
525 ObjectFifoCreateOp target = consumerFifo;
526 auto linkOp = getOptionalLinkOp(consumerFifo);
527 if (linkOp &&
528 state.objFifoLinks.find(*linkOp) != state.objFifoLinks.end()) {
529 target = state.objFifoLinks[*linkOp]; // Use the linked target FIFO
530 }
531
532 if (state.buffersPerFifo.find(target) != state.buffersPerFifo.end()) {
533 // For each FIFO (producer and consumer):
534 auto &consumerBuffers = state.buffersPerFifo[target];
535 TileOp expectedTile = target.getProducerTileOp();
536 for (auto &buffer : consumerBuffers) {
537 TileOp bufferTile = buffer.getTile().getDefiningOp<TileOp>();
538 if (bufferTile != expectedTile) {
539 consumerHasCrossTile = true;
540 break;
541 }
542 }
543 }
544 crossTileMap[consumerFifo] = consumerHasCrossTile;
545 }
546 }
547 return crossTileMap;
548 }
549
550 /// Helper function to find a tile at specific coordinates.
551 /// If a tile is not found, it creates a new one and returns it.
552 /// hostTile is the original tile from which we are searching for neighbors.
553 /// we create the new tile below the hostTile
554 TileOp findOrCreateTile(OpBuilder &builder, DeviceOp &dev, TileOp hostTile,
555 int col, int row) {
556 // First, try to find an existing tile
557 for (auto tile : dev.getOps<TileOp>()) {
558 if (tile.getCol() == col && tile.getRow() == row) {
559 return tile;
560 }
561 }
562
563 // If not found, create a new one.
564 OpBuilder::InsertionGuard g(builder);
565
566 auto savedInsertionPoint = builder.saveInsertionPoint();
567
568 // Find the last buffer operation after the host tile
569 Operation *insertAfter = hostTile.getOperation();
570 Operation *nextOp = insertAfter->getNextNode();
571 while (nextOp && isa<BufferOp>(nextOp)) {
572 insertAfter = nextOp;
573 nextOp = nextOp->getNextNode();
574 }
575
576 builder.setInsertionPointAfter(insertAfter);
577 auto newTile = TileOp::create(builder, builder.getUnknownLoc(), col, row);
578
579 builder.restoreInsertionPoint(savedInsertionPoint);
580
581 return newTile;
582 }
583
584 /// Function used to create objectFifo elements and their locks.
585 /// It maps the input objectFifo to associated buffers and locks.
586 void createObjectFifoElements(OpBuilder &builder, LockAnalysis &lockAnalysis,
587 ObjectFifoCreateOp op, int share_direction,
588 ObjectFifoState &state) {
589 if (!op.size())
590 return;
591
592 if (op.getAieStream())
593 return;
594
595 std::vector<BufferOp> buffers;
596 auto fifo = llvm::cast<AIEObjectFifoType>(op.getElemType());
597 auto elemType = llvm::cast<MemRefType>(fifo.getElementType());
598 int numElem = op.size();
599 int of_elem_index = 0; // used to give objectFifo elements a symbolic name
600
601 // if this objectFifo is linked to another, check if the other's elements
602 // have already been created: if none of the output objectfifos of the link
603 // have initValues, then the elements that are created are those of the
604 // objFifo with elements of bigger size
605 bool linked = false;
606 auto linkOp = getOptionalLinkOp(op);
607 if (linkOp) {
608 auto fifoIn = linkOp->getInputObjectFifos()[0];
609 auto fifoOut = linkOp->getOutputObjectFifos()[0];
610 linked = true;
611 if (state.objFifoLinks.find(*linkOp) != state.objFifoLinks.end())
612 return; // elements have already been created
613 if (linkOp->isJoin()) {
614 // if join, fifoOut has bigger size
615 if (op.name() != fifoOut.name())
616 return;
617 } else if (linkOp->isDistribute()) {
618 // if distribute, fifoIn has bigger size
619 if (op.name() != fifoIn.name())
620 return;
621 } else {
622 // check if output objectfifo has initValues
623 if (fifoOut.getInitValues().has_value()) {
624 if (fifoOut.name() != op.name())
625 return;
626 } else {
627 // check which objectfifo of the link has bigger size
628 auto fifoInType = llvm::cast<AIEObjectFifoType>(fifoIn.getElemType());
629 auto elemInType = llvm::cast<MemRefType>(fifoInType.getElementType());
630 int inSize = elemInType.getNumElements();
631
632 auto fifoOutType =
633 llvm::cast<AIEObjectFifoType>(fifoOut.getElemType());
634 auto elemOutType =
635 llvm::cast<MemRefType>(fifoOutType.getElementType());
636
637 if (int outSize = elemOutType.getNumElements(); inSize >= outSize) {
638 if (op.name() != fifoIn.name())
639 return;
640 } else {
641 // When output has padDimensions, MemTile buffer should use
642 // input (smaller) size — padding is applied on-the-fly by DMA
643 bool outHasPadding = fifoOut.getPadDimensions().has_value();
644 if (outHasPadding) {
645 if (op.name() != fifoIn.name())
646 return;
647 } else {
648 if (fifoOut.name() != op.name())
649 return;
650 }
651 }
652 }
653 }
654 }
655
656 TileOp creation_tile;
657 auto consumerTileOp =
658 dyn_cast<TileOp>(op.getConsumerTiles()[0].getDefiningOp());
659 if (share_direction != 1)
660 creation_tile = op.getProducerTileOp();
661 else
662 creation_tile = consumerTileOp;
663
664 std::optional<ObjectFifoAllocateOp> opAlloc = getOptionalAllocateOp(op);
665 if (opAlloc.has_value()) {
666 TileOp delegate = opAlloc->getDelegateTileOp();
667 int prodShareDir;
668 int consShareDir;
669 isSharedMemory(delegate, op.getProducerTileOp(), &prodShareDir);
670 isSharedMemory(delegate, consumerTileOp, &consShareDir);
671 if ((prodShareDir == -1 || prodShareDir == 2) &&
672 (consShareDir == -1 || consShareDir == 2))
673 creation_tile = delegate;
674 else
675 opAlloc->emitOpError("objectfifo has no shared memory access to "
676 "delegate tile's memory module");
677 }
678
679 // Reset opbuilder location to after the last tile declaration
680 Operation *t = nullptr;
681 auto dev = op->getParentOfType<DeviceOp>();
682 for (auto tile_op : dev.getBody()->getOps<TileOp>()) {
683 t = tile_op.getOperation();
684 }
685
686 builder.setInsertionPointAfter(t);
687 for (int i = 0; i < numElem; i++) {
688
689 mlir::ElementsAttr initValues = nullptr;
690 if (!creation_tile.isShimTile()) {
691 if (op.getInitValues().has_value()) {
692 initValues =
693 llvm::cast<mlir::ElementsAttr>(op.getInitValues().value()[i]);
694 }
695
696 auto elementType = elemType.getElementType();
697
698 DataLayout dataLayout = DataLayout::closest(op.getOperation());
699 int64_t elementBitWidth = dataLayout.getTypeSizeInBits(elementType);
700
701 auto totalSizeBytes = elemType.getNumElements() * elementBitWidth / 8;
702 auto &targetModel = dev.getTargetModel();
703
704 int maxDataMemorySize = 0;
705 if (creation_tile.isMemTile())
706 maxDataMemorySize =
707 targetModel.getMemTileSize(); // getMemTileSize returns in Bytes
708 else
709 maxDataMemorySize =
710 targetModel
711 .getLocalMemorySize(); // getLocalMemorySize returns in Bytes
712
713 // also need to count the buffers that are not in buffersPerFifo
714 int currentUsedMemory = calculateCurrentUsedMemory(
715 creation_tile, state.buffersPerFifo, buffers);
716
717 // Check if current tile can hold the new buffer or not
718 TileOp current_buf_allocation_tile =
719 creation_tile; // used to keep track of the tile where the buffer is
720 // allocated
721 if (creation_tile.isMemTile()) {
722 if (static_cast<int>(currentUsedMemory + totalSizeBytes) >
723 maxDataMemorySize) {
724 // if not, check if the neighbour can hold the new buffer or not
725 // Find neighbor tiles with shared memory
726 std::vector<TileOp> neighborTiles;
727 int currentCol = creation_tile.getCol();
728 int currentRow = creation_tile.getRow();
729
730 // Check tile to the left
731 if (currentCol > 0) {
732 TileOp leftTile = findOrCreateTile(builder, dev, creation_tile,
733 currentCol - 1, currentRow);
734
735 int share_direction = 0;
736 if (isSharedMemory(creation_tile, leftTile, &share_direction) &&
737 (share_direction == 1 || share_direction == 2)) {
738 neighborTiles.push_back(leftTile);
739 }
740 }
741
742 // Check tile to the right
743 if (currentCol < (targetModel.columns() - 1)) {
744 TileOp rightTile = findOrCreateTile(builder, dev, creation_tile,
745 currentCol + 1, currentRow);
746 int share_direction = 0;
747 if (isSharedMemory(creation_tile, rightTile, &share_direction) &&
748 (share_direction == 1 || share_direction == 2)) {
749 neighborTiles.push_back(rightTile);
750 }
751 }
752
753 // try to allocate on neighbor tiles
754 if (!neighborTiles.empty()) {
755 for (auto &tile : neighborTiles) {
756 // Try to allocate on this neighbor tile
757 int neighborUsedMemory = calculateCurrentUsedMemory(
758 tile, state.buffersPerFifo, buffers);
759 if (static_cast<int>(neighborUsedMemory + totalSizeBytes) <=
760 maxDataMemorySize) {
761 // Allocate buffer on neighbor tile, change creation_tile to
762 // be this neighbour tile
763 current_buf_allocation_tile = tile;
764 break;
765 }
766 }
767 }
768 }
769 }
770 auto buff = BufferOp::create(
771 builder, builder.getUnknownLoc(), elemType,
772 current_buf_allocation_tile,
773 builder.getStringAttr(op.name().str() + "_buff_" +
774 std::to_string(of_elem_index)),
775 /*address*/ nullptr, initValues,
776 /*mem_bank*/ nullptr);
777 buffers.push_back(buff);
778 }
779 of_elem_index++;
780 }
781
782 int repeatCount = 1;
783 int joinDistribFactor = 1;
784 if (op.getRepeatCount().has_value())
785 repeatCount = op.getRepeatCount().value();
786 if (linked) {
787 if (linkOp->getRepeatCount().has_value())
788 repeatCount = linkOp->getRepeatCount().value();
789 if (linkOp->isDistribute())
790 joinDistribFactor *= linkOp->getFifoOuts().size();
791 else if (linkOp->isJoin())
792 joinDistribFactor *= linkOp->getFifoIns().size();
793 state.objFifoLinks[*linkOp] = op;
794 }
795 std::vector<LockOp> locks = createObjectFifoLocks(
796 builder, lockAnalysis, op, numElem, joinDistribFactor, creation_tile,
797 repeatCount, state);
798 state.buffersPerFifo[op] = buffers;
799 state.locksPerFifo[op] = locks;
800 }
801
802 /// Function that returns a pointer to the block of a Region
803 /// that contains the AIEEndOp.
804 Block *findEndOpBlock(Region &r) {
805 Block *endBlock = nullptr;
806 for (auto &bl : r.getBlocks())
807 if (!bl.getOps<EndOp>().empty())
808 endBlock = &bl;
809 return endBlock;
810 }
811
812 /// Function used to create a Bd block.
813 template <typename MyOp>
814 void createBd(OpBuilder &builder, LockOp acqLock, int acqMode,
815 LockAction acqLockAction, LockOp relLock, int relMode,
816 MyOp buff, int offset, int len, Block *succ,
817 BDDimLayoutArrayAttr dims, BDPadLayoutArrayAttr padDimensions,
818 std::optional<PacketInfoAttr> bdPacket) {
819 if (acqLock)
820 UseLockOp::create(builder, builder.getUnknownLoc(), acqLock,
821 acqLockAction, acqMode);
822 if (bdPacket) {
823 DMABDPACKETOp::create(builder, builder.getUnknownLoc(),
824 bdPacket->getPktType(), bdPacket->getPktId());
825 }
826 if (!dims.getValue().empty() && padDimensions) {
827 DMABDOp::create(builder, builder.getUnknownLoc(), buff, offset, len, dims,
828 padDimensions);
829 } else if (!dims.getValue().empty()) {
830 DMABDOp::create(builder, builder.getUnknownLoc(), buff, offset, len,
831 dims);
832 } else {
833 DMABDOp::create(builder, builder.getUnknownLoc(), buff, offset, len);
834 }
835 if (acqLock)
836 UseLockOp::create(builder, builder.getUnknownLoc(), relLock,
837 LockAction::Release, relMode);
838 NextBDOp::create(builder, builder.getUnknownLoc(), succ);
839 }
840
841 /// Function used to create a Bd block.
842 /// If lockMode is 0 we create a consumerDMA (i.e. on producer tile) else a
843 /// producerDMA (i.e. on consumer tile).
844 template <typename MyOp>
845 void createBdBlock(OpBuilder &builder, ObjectFifoCreateOp op, int lockMode,
846 int acqNum, int relNum, MyOp buff, int offset, int len,
847 DMAChannelDir channelDir, size_t lockIndex, Block *succ,
848 BDDimLayoutArrayAttr dims,
849 BDPadLayoutArrayAttr padDimensions,
850 std::optional<PacketInfoAttr> bdPacket,
851 ObjectFifoState &state, bool distribOrJoin = false) {
852 LockOp acqLock;
853 LockOp relLock;
854 int acqMode = 1;
855 int relMode = 1;
856 auto acqLockAction = LockAction::Acquire;
857 if (state.locksPerFifo[op].size() > 0) {
858 auto dev = op->getParentOfType<DeviceOp>();
859 if (auto &target = dev.getTargetModel();
860 target.getTargetArch() == AIEArch::AIE1) {
861 acqMode = lockMode == 0 ? 1 : 0;
862 relMode = lockMode == 0 ? 0 : 1;
863 acqLock = state.locksPerFifo[op][lockIndex];
864 relLock = state.locksPerFifo[op][lockIndex];
865 } else {
866 acqMode = acqNum;
867 relMode = relNum;
868 acqLockAction = LockAction::AcquireGreaterEqual;
869 int prodLockIndex = 0;
870 int consLockIndex = 1;
871 if (distribOrJoin) {
872 prodLockIndex = lockIndex * 2;
873 consLockIndex = lockIndex * 2 + 1;
874 }
875 acqLock = channelDir == DMAChannelDir::S2MM
876 ? state.locksPerFifo[op][prodLockIndex]
877 : state.locksPerFifo[op][consLockIndex];
878 relLock = channelDir == DMAChannelDir::S2MM
879 ? state.locksPerFifo[op][consLockIndex]
880 : state.locksPerFifo[op][prodLockIndex];
881 }
882 }
883 createBd(builder, acqLock, acqMode, acqLockAction, relLock, relMode, buff,
884 offset, len, succ, dims, padDimensions, bdPacket);
885 }
886
887 /// Function that either calls createAIETileDMA(), createShimDMA() or
888 /// createMemTileDMA() based on op tile row value.
889 void createDMA(DeviceOp &device, OpBuilder &builder, ObjectFifoCreateOp op,
890 DMAChannelDir channelDir, int channelIndex, int lockMode,
891 BDDimLayoutArrayAttr dims, BDPadLayoutArrayAttr pad_dims,
892 std::optional<PacketInfoAttr> bdPacket,
893 ObjectFifoState &state) {
894 if (op.getProducerTileOp().isShimTile()) {
895 createShimDMA(device, builder, op, channelDir, channelIndex, lockMode,
896 dims, bdPacket, state);
897 } else if (op.getProducerTileOp().isMemTile()) {
898 BDPadLayoutArrayAttr padDims = nullptr;
899 if (channelDir == DMAChannelDir::MM2S && pad_dims)
900 padDims = pad_dims;
901 createMemTileDMA(device, builder, op, channelDir, channelIndex, lockMode,
902 dims, padDims, bdPacket, state);
903 } else {
904 createAIETileDMA(device, builder, op, channelDir, channelIndex, lockMode,
905 dims, bdPacket, state);
906 }
907 }
908
909 /// Function used to create a MemOp region with a DMA channel.
910 /// It uses creatBdBlock(), see there for lockMode input.
911 void createAIETileDMA(DeviceOp &device, OpBuilder &builder,
912 ObjectFifoCreateOp op, DMAChannelDir channelDir,
913 int channelIndex, int lockMode,
914 BDDimLayoutArrayAttr dims,
915 std::optional<PacketInfoAttr> bdPacket,
916 ObjectFifoState &state) {
917 size_t numBlocks = op.size();
918 if (numBlocks == 0)
919 return;
920
921 int acqNum = 1;
922 int relNum = 1;
923
924 auto fifo = llvm::cast<AIEObjectFifoType>(op.getElemType());
925 auto elemType = llvm::cast<MemRefType>(fifo.getElementType());
926 int len = elemType.getNumElements();
927
928 // check for repeat count
929 int repeatCount = 1;
930 if (op.getRepeatCount().has_value())
931 repeatCount = op.getRepeatCount().value();
932
933 // search for the buffers/locks (based on if this objFifo has a link)
934 ObjectFifoCreateOp target = op;
935 if (std::optional<ObjectFifoLinkOp> linkOp = getOptionalLinkOp(op);
936 linkOp.has_value()) {
937 if (state.objFifoLinks.find(linkOp.value()) != state.objFifoLinks.end()) {
938 target = state.objFifoLinks[linkOp.value()];
939 if (target == op) {
940 if (linkOp->getRepeatCount().has_value()) {
941 acqNum *= linkOp->getRepeatCount().value();
942 relNum *= linkOp->getRepeatCount().value();
943 }
944 }
945 }
946 }
947
948 // search for MemOp
949 Operation *producerMem = nullptr;
950 for (auto memOp : device.getOps<MemOp>()) {
951 if (memOp.getTile() == op.getProducerTile()) {
952 producerMem = memOp.getOperation();
953 break;
954 }
955 }
956
957 // if none exists, create one
958 TileOp objFifoTileOp = target.getProducerTileOp();
959 if (producerMem == nullptr) {
960 OpBuilder::InsertionGuard g(builder);
961 builder.setInsertionPoint(device.getBody()->getTerminator());
962 auto newMemOp =
963 MemOp::create(builder, builder.getUnknownLoc(), objFifoTileOp);
964 {
965 OpBuilder::InsertionGuard g(builder);
966 builder.setInsertionPointToStart(&newMemOp.getRegion().emplaceBlock());
967 EndOp::create(builder, builder.getUnknownLoc());
968 }
969 producerMem = newMemOp.getOperation();
970 }
971 Block *endBlock = findEndOpBlock(producerMem->getRegion(0));
972 Block *lastDmaBlock = endBlock->getSinglePredecessor();
973 Block *dmaBlock = builder.createBlock(endBlock);
974 Block *bdBlock = builder.createBlock(endBlock);
975
976 // create DMA channel
977 builder.setInsertionPointToStart(dmaBlock);
978 DMAStartOp::create(builder, builder.getUnknownLoc(), channelDir,
979 channelIndex, /*repeatCout*/ 0, bdBlock, endBlock);
980 if (lastDmaBlock != nullptr)
981 lastDmaBlock->getTerminator()->setSuccessor(dmaBlock, 1);
982
983 // create Bd blocks
984 Block *succ;
985 Block *curr = bdBlock;
986 size_t elemIndex = 0;
987 size_t totalBlocks = 0;
988 for (size_t i = 0; i < numBlocks; i++) {
989 if (elemIndex >= state.buffersPerFifo[target].size())
990 break;
991 for (int r = 0; r < repeatCount; r++) {
992 if (totalBlocks == numBlocks * repeatCount - 1)
993 succ = bdBlock;
994 else
995 succ = builder.createBlock(endBlock);
996
997 builder.setInsertionPointToStart(curr);
998 createBdBlock<BufferOp>(builder, target, lockMode, acqNum, relNum,
999 state.buffersPerFifo[target][elemIndex],
1000 /*offset*/ 0, len, channelDir, elemIndex, succ,
1001 dims, nullptr, bdPacket, state);
1002 curr = succ;
1003 totalBlocks++;
1004 }
1005 elemIndex++;
1006 }
1007 }
1008
1009 /// Function used to create a ShimDMAOp region with a DMA channel.
1010 /// It uses creatBdBlock(), see there for lockMode input.
1011 void createShimDMA(DeviceOp &device, OpBuilder &builder,
1012 ObjectFifoCreateOp op, DMAChannelDir channelDir,
1013 int channelIndex, int lockMode, BDDimLayoutArrayAttr dims,
1014 std::optional<PacketInfoAttr> bdPacket,
1015 ObjectFifoState &state) {
1016 size_t numBlocks = state.externalBuffersPerFifo[op].size();
1017 if (numBlocks == 0)
1018 return;
1019
1020 int acqNum = 1;
1021 int relNum = 1;
1022
1023 // search for ShimDMAOp
1024 Operation *producerDMA = nullptr;
1025 for (auto dmaOp : device.getOps<ShimDMAOp>()) {
1026 if (dmaOp.getTile() == op.getProducerTile()) {
1027 producerDMA = dmaOp.getOperation();
1028 break;
1029 }
1030 }
1031
1032 // if none exists, create one
1033 TileOp objFifoTileOp = op.getProducerTileOp();
1034 if (producerDMA == nullptr) {
1035 OpBuilder::InsertionGuard g(builder);
1036 builder.setInsertionPoint(device.getBody()->getTerminator());
1037 auto newDMAOp = ShimDMAOp::create(builder, builder.getUnknownLoc(),
1038 builder.getIndexType(), objFifoTileOp);
1039 {
1040 OpBuilder::InsertionGuard g(builder);
1041 builder.setInsertionPointToStart(&newDMAOp.getRegion().emplaceBlock());
1042 EndOp::create(builder, builder.getUnknownLoc());
1043 }
1044 producerDMA = newDMAOp.getOperation();
1045 }
1046
1047 Block *endBlock = findEndOpBlock(producerDMA->getRegion(0));
1048 Block *lastDmaBlock = endBlock->getSinglePredecessor();
1049 Block *dmaBlock = builder.createBlock(endBlock);
1050 Block *bdBlock = builder.createBlock(endBlock);
1051
1052 // create DMA channel
1053 builder.setInsertionPointToStart(dmaBlock);
1054 DMAStartOp::create(builder, builder.getUnknownLoc(), channelDir,
1055 channelIndex, /*repeatCout*/ 0, bdBlock, endBlock);
1056 if (lastDmaBlock != nullptr)
1057 lastDmaBlock->getTerminator()->setSuccessor(dmaBlock, 1);
1058
1059 // create Bd blocks
1060 Block *succ;
1061 Block *curr = bdBlock;
1062 size_t elemIndex = 0;
1063 for (size_t i = 0; i < numBlocks; i++) {
1064 if (elemIndex >= state.externalBuffersPerFifo[op].size())
1065 break;
1066 if (i == numBlocks - 1)
1067 succ = bdBlock;
1068 else
1069 succ = builder.createBlock(endBlock);
1070
1071 MemRefType buffer = state.externalBuffersPerFifo[op][elemIndex].getType();
1072 int len = buffer.getNumElements();
1073 builder.setInsertionPointToStart(curr);
1074 createBdBlock<ExternalBufferOp>(
1075 builder, op, lockMode, acqNum, relNum,
1076 state.externalBuffersPerFifo[op][elemIndex],
1077 /*offset*/ 0, len, channelDir, elemIndex, succ, dims, nullptr,
1078 bdPacket, state);
1079 curr = succ;
1080 elemIndex++;
1081 }
1082 }
1083
1084 /// Function used to create a MemTileDMAOp region with a DMA channel.
1085 /// It uses creatBdBlock(), see there for lockMode input.
1086 void createMemTileDMA(DeviceOp &device, OpBuilder &builder,
1087 ObjectFifoCreateOp op, DMAChannelDir channelDir,
1088 int channelIndex, int lockMode,
1089 BDDimLayoutArrayAttr dims,
1090 BDPadLayoutArrayAttr padDimensions,
1091 std::optional<PacketInfoAttr> bdPacket,
1092 ObjectFifoState &state) {
1093 size_t numBlocks = op.size();
1094 if (numBlocks == 0)
1095 return;
1096
1097 auto fifo = llvm::cast<AIEObjectFifoType>(op.getElemType());
1098 auto elemType = llvm::cast<MemRefType>(fifo.getElementType());
1099 int lenOut = elemType.getNumElements();
1100 int acqNum = 1;
1101 int relNum = 1;
1102
1103 // check for repeat count
1104 int repeatCount = 1;
1105 if (op.getRepeatCount().has_value())
1106 repeatCount = op.getRepeatCount().value();
1107
1108 // check for BD chain repeat count
1109 auto bdChainIterCount = op.getIterCount();
1110
1111 // search for the buffers/locks (based on if this objFifo has a link)
1112 // identify size difference between input and output memrefs
1113 ObjectFifoCreateOp target = op;
1114 bool isDistribute = false;
1115 bool isJoin = false;
1116 int extraOffset = 0;
1117 int joinDistribFactor = 1;
1118 int joinDistribLockIndex = 0;
1119 auto linkOp = getOptionalLinkOp(op);
1120 if (linkOp) {
1121 if (state.objFifoLinks.find(*linkOp) != state.objFifoLinks.end()) {
1122 target = state.objFifoLinks[*linkOp];
1123 auto srcOffsets = linkOp->getSrcOffsets();
1124 auto dstOffsets = linkOp->getDstOffsets();
1125
1126 if (linkOp->getRepeatCount().has_value())
1127 if (linkOp->getInputObjectFifos()[0] == op) {
1128 acqNum *= linkOp->getRepeatCount().value();
1129 relNum *= linkOp->getRepeatCount().value();
1130 }
1131
1132 if (linkOp->isJoin()) {
1133 // compute offset and length
1134 isJoin = true;
1135 if (target == op) {
1136 joinDistribFactor *= linkOp->getFifoIns().size();
1137 } else {
1138 int i = 0;
1139 for (auto fifoIn : linkOp->getInputObjectFifos()) {
1140 if (fifoIn.name() == op.name())
1141 break;
1142 i++;
1143 }
1144 extraOffset = *getConstantIntValue(srcOffsets[i]);
1145 lenOut = linkOp->getJoinTransferLengths()[i];
1146 joinDistribLockIndex = i;
1147 }
1148 } else if (linkOp->isDistribute()) {
1149 // compute offset and length
1150 isDistribute = true;
1151 if (target == op) {
1152 joinDistribFactor *= linkOp->getFifoOuts().size();
1153 } else {
1154 int i = 0;
1155 for (auto fifoOut : linkOp->getOutputObjectFifos()) {
1156 if (fifoOut.name() == op.name())
1157 break;
1158 i++;
1159 }
1160 extraOffset = *getConstantIntValue(dstOffsets[i]);
1161 lenOut = linkOp->getDistributeTransferLengths()[i];
1162 joinDistribLockIndex = i;
1163 }
1164 } else {
1165 if (target != op) {
1166 auto targetFifo =
1167 llvm::cast<AIEObjectFifoType>(target.getElemType());
1168 auto targetElemType =
1169 llvm::cast<MemRefType>(targetFifo.getElementType());
1170 int targetLen = targetElemType.getNumElements();
1171 // Only override when target is larger or equal. When target
1172 // is smaller (padDimensions size mismatch after buffer
1173 // ownership change), op's own element count is correct.
1174 if (targetLen >= lenOut)
1175 lenOut = targetLen;
1176 }
1177 }
1178
1179 // check if current op is of smaller size in link
1180 if (target != op) {
1181 numBlocks = target.size();
1182 }
1183 }
1184 }
1185
1186 // search for MemTileDMAOp
1187 Operation *producerDMA = nullptr;
1188 for (auto dmaOp : device.getOps<MemTileDMAOp>()) {
1189 if (dmaOp.getTile() == target.getProducerTile()) {
1190 producerDMA = dmaOp.getOperation();
1191 break;
1192 }
1193 }
1194
1195 // if none exists, create one
1196 TileOp objFifoTileOp = target.getProducerTileOp();
1197 if (producerDMA == nullptr) {
1198 OpBuilder::InsertionGuard g(builder);
1199 builder.setInsertionPoint(device.getBody()->getTerminator());
1200 auto newDMAOp =
1201 MemTileDMAOp::create(builder, builder.getUnknownLoc(), objFifoTileOp);
1202 {
1203 OpBuilder::InsertionGuard g(builder);
1204 builder.setInsertionPointToStart(&newDMAOp.getRegion().emplaceBlock());
1205 EndOp::create(builder, builder.getUnknownLoc());
1206 }
1207 producerDMA = newDMAOp.getOperation();
1208 }
1209
1210 Block *endBlock = findEndOpBlock(producerDMA->getRegion(0));
1211 Block *lastDmaBlock = endBlock->getSinglePredecessor();
1212 Block *dmaBlock = builder.createBlock(endBlock);
1213 Block *bdBlock = builder.createBlock(endBlock);
1214
1215 // create DMA channel
1216 builder.setInsertionPointToStart(dmaBlock);
1217
1218 // Use iter_count if available, otherwise default to 0
1219 int taskCount = 0;
1220 bool isBdChainMode = false;
1221 if (bdChainIterCount.has_value()) {
1222 taskCount = bdChainIterCount.value() - 1;
1223 isBdChainMode = true;
1224 }
1225 DMAStartOp::create(builder, builder.getUnknownLoc(), channelDir,
1226 channelIndex, taskCount, bdBlock, endBlock);
1227 if (lastDmaBlock != nullptr)
1228 lastDmaBlock->getTerminator()->setSuccessor(dmaBlock, 1);
1229
1230 // create Bd blocks
1231 Block *succ;
1232 Block *curr = bdBlock;
1233 size_t elemIndex = 0;
1234 size_t lockIndex = 0;
1235 size_t totalBlocks = 0;
1236 bool distribOrJoin = false;
1237
1238 for (size_t i = 0; i < numBlocks; i++) {
1239 if (elemIndex >= state.buffersPerFifo[target].size())
1240 break;
1241 for (int r = 0; r < repeatCount * joinDistribFactor; r++) {
1242 if (totalBlocks == numBlocks * repeatCount * joinDistribFactor - 1) {
1243 // If iter_count attribute is set (BD chain mode), create a
1244 // dedicated terminating block
1245 if (isBdChainMode) {
1246 succ = builder.createBlock(endBlock);
1247 // Create a separate terminating block with aie.end for this
1248 // specific DMA channel
1249 builder.setInsertionPointToStart(succ);
1250 EndOp::create(builder, builder.getUnknownLoc());
1251 } else {
1252 succ = bdBlock;
1253 }
1254 } else {
1255 succ = builder.createBlock(endBlock);
1256 }
1257
1258 builder.setInsertionPointToStart(curr);
1259 int offset = 0;
1260 if (isDistribute || isJoin) {
1261 distribOrJoin = true;
1262 if (target == op) {
1263 if (isDistribute) {
1264 offset = *getConstantIntValue(linkOp->getDstOffsets()[r]);
1265 lenOut = linkOp->getDistributeTransferLengths()[r];
1266 } else {
1267 offset = *getConstantIntValue(linkOp->getSrcOffsets()[r]);
1268 lenOut = linkOp->getJoinTransferLengths()[r];
1269 }
1270 lockIndex = r % joinDistribFactor;
1271 } else {
1272 offset = extraOffset;
1273 lockIndex = joinDistribLockIndex;
1274 }
1275 } else {
1276 lockIndex = elemIndex;
1277 }
1278
1279 createBdBlock<BufferOp>(builder, target, lockMode, acqNum, relNum,
1280 state.buffersPerFifo[target][elemIndex], offset,
1281 lenOut, channelDir, lockIndex, succ, dims,
1282 padDimensions, bdPacket, state, distribOrJoin);
1283 curr = succ;
1284 totalBlocks++;
1285 }
1286 elemIndex++;
1287 }
1288 }
1289
1290 // Function that computes the Least Common Multiplier of the values
1291 // of a vector.
1292 int computeLCM(std::set<int> values) {
1293 int lcm = 1;
1294 for (int i : values)
1295 lcm = i * lcm / std::gcd(i, lcm);
1296 return lcm;
1297 }
1298
1299 // Function that unrolls for-loops that contain objectFifo operations.
1300 LogicalResult unrollForLoops(DeviceOp &device, OpBuilder &builder,
1301 std::set<TileOp> objectFifoTiles) {
1302 for (auto coreOp : device.getOps<CoreOp>()) {
1303 if (objectFifoTiles.count(coreOp.getTileOp()) > 0) {
1304 std::vector<scf::ForOp> unrolledLoops;
1305 std::map<Operation *, bool> foundMap;
1306 std::map<Operation *, int64_t> remainderMap;
1307 std::map<Operation *, int64_t> tripCountMap;
1308 WalkResult res = coreOp.walk([&](scf::ForOp forLoop) {
1309 // look for operations on objectFifos
1310 // when multiple fifos in same loop, must use the smallest
1311 // common multiplier as the unroll factor
1312 foundMap[forLoop.getOperation()] = false;
1313 std::set<int> objFifoSizes;
1314 Block *body = forLoop.getBody();
1315 remainderMap[forLoop.getOperation()] = 0;
1316 for (auto acqOp : body->getOps<ObjectFifoAcquireOp>()) {
1317 if (acqOp.getOperation()->getParentOp() == forLoop) {
1318 foundMap[forLoop.getOperation()] = true;
1319 ObjectFifoCreateOp op = acqOp.getObjectFifo();
1320 objFifoSizes.insert(op.size());
1321 }
1322 }
1323 // If the loop doesn't have acquire and release locks
1324 // Push it to the unrolledLoops to avoid unrolling
1325 if (!foundMap[forLoop.getOperation()]) {
1326 unrolledLoops.push_back(forLoop);
1327 return WalkResult::advance();
1328 }
1329 // Walk in the loop region to unroll the loop and its remainder
1330 Region *region = forLoop->getParentRegion();
1331 scf::ForOp prevLoop;
1332 prevLoop = forLoop;
1333 tripCountMap[prevLoop.getOperation()] = 0;
1334 while (remainderMap[prevLoop.getOperation()] > 1 ||
1335 foundMap[prevLoop.getOperation()]) {
1336 region->walk([&](scf::ForOp remLoop) {
1337 bool skipLoop = false;
1338 int64_t tripCount = 0;
1339 if (remLoop.getSingleLowerBound() &&
1340 remLoop.getSingleUpperBound() && remLoop.getSingleStep()) {
1341 tripCount = remLoop.getStaticTripCount()->getSExtValue();
1342 }
1343 int unrollFactor =
1344 computeLCM(objFifoSizes); // also counts original loop body
1345 // Loop ids are not unique.
1346 // Sometimes, immediately after unrolling, the unrolled loop
1347 // and the one next to it (can be the remainder loop or an
1348 // independent loop) will have the same ID. This makes it
1349 // difficult to identify which loop needs to be unrolled.
1350 // Once it restarts walking from start, it ends up allocating
1351 // new ID to each loop.
1352 if (remainderMap[prevLoop.getOperation()] > 1 &&
1353 foundMap[remLoop.getOperation()] == false &&
1354 prevLoop != remLoop) {
1355 skipLoop = true;
1356 }
1357 if (std::count(unrolledLoops.begin(), unrolledLoops.end(),
1358 remLoop) == 0 &&
1359 !skipLoop) {
1360 tripCountMap[remLoop.getOperation()] = tripCount;
1361 // if loop iterations < unrollFactor, unroll the loop fully
1362 if (tripCountMap[remLoop.getOperation()] < unrollFactor)
1363 unrollFactor = tripCountMap[remLoop.getOperation()];
1364 // If unrollFactor = 0,divide by zero
1365 if (unrollFactor == 0) {
1366 remLoop.emitOpError()
1367 << "could not be unrolled with unrollFactor = 0, check "
1368 "loop boundaries."
1369 << "\n";
1370 return WalkResult::interrupt();
1371 }
1372 remainderMap[remLoop.getOperation()] =
1373 tripCountMap[remLoop.getOperation()] % unrollFactor;
1374 auto step = remLoop.getStep()
1375 .getDefiningOp<arith::ConstantOp>()
1376 .getValue();
1377 int64_t step_value = llvm::dyn_cast<IntegerAttr>(step).getInt();
1378
1379 if (step_value < unrollFactor ||
1380 foundMap[remLoop.getOperation()]) {
1381 // Process the for loop
1382 if (failed(mlir::loopUnrollByFactor(remLoop, unrollFactor))) {
1383 remLoop.emitOpError()
1384 << "could not be unrolled with unrollFactor: "
1385 << unrollFactor << "\n";
1386 return WalkResult::interrupt();
1387 }
1388 unrolledLoops.push_back(remLoop);
1389 foundMap[remLoop.getOperation()] = false;
1390 } else {
1391 remainderMap[remLoop.getOperation()] = 0;
1392 foundMap[remLoop.getOperation()] = false;
1393 }
1394 } else {
1395 remainderMap[remLoop.getOperation()] = 0;
1396 foundMap[remLoop.getOperation()] = false;
1397 }
1398 prevLoop = remLoop;
1399 return WalkResult::advance();
1400 });
1401 }
1402 return WalkResult::advance();
1403 });
1404 if (res.wasInterrupted())
1405 return failure();
1406 }
1407 }
1408 return success();
1409 }
1410
1411 // Function that generates the IR to update runtime state of objectfifo
1412 // accesses. Called by dynamicGlobalObjectFifos().
1413 void updateGlobalNextIndex(OpBuilder &builder, ObjectFifoReleaseOp relOp,
1414 BufferOp globalNextIndex, arith::ConstantOp index,
1415 arith::ConstantOp size) {
1416 builder.setInsertionPointAfter(relOp);
1417 Value oldCounter = memref::LoadOp::create(
1418 builder, builder.getUnknownLoc(), globalNextIndex,
1419 ValueRange(ArrayRef({index.getResult()})));
1420 Value val =
1421 arith::ConstantOp::create(builder, oldCounter.getLoc(),
1422 builder.getI32IntegerAttr(relOp.getSize()));
1423 Value sum = arith::AddIOp::create(builder, val.getLoc(), oldCounter, val);
1424 Value isGreaterEqual = arith::CmpIOp::create(
1425 builder, sum.getLoc(), arith::CmpIPredicate::sge, sum, size);
1426 Value newCounter = arith::SelectOp::create(
1427 builder, sum.getLoc(), isGreaterEqual,
1428 arith::SubIOp::create(builder, sum.getLoc(), sum, size), sum);
1429 memref::StoreOp::create(builder, size.getLoc(), newCounter, globalNextIndex,
1430 ValueRange(ArrayRef({index.getResult()})));
1431 }
1432
1433 // Function that generates the IR for objectfifo accesses to be handled at
1434 // runtime.
1435 LogicalResult dynamicGlobalObjectFifos(DeviceOp &device, OpBuilder &builder,
1436 std::set<TileOp> objectFifoTiles,
1437 ObjectFifoState &state) {
1438 for (auto coreOp : device.getOps<CoreOp>()) {
1439 if (objectFifoTiles.count(coreOp.getTileOp()) <= 0)
1440 continue;
1441 if (objectFifoTiles.count(coreOp.getTileOp()) > 0) {
1442 // For each core: count the number of objectFifos and create
1443 // a global buffer just before the core to track index of
1444 // next object to access.
1445 // !! NOTE !! objectFifos with same producer / consumer tile
1446 // need two counters (accessed based on the ObjectFifoPort)
1447 std::map<std::pair<ObjectFifoCreateOp, ObjectFifoPort>, int> fifoSizes;
1448 // Also, keep a map of the ConstantOps for the indices per OF
1449 // and a map with the ConstantOps for the sizes per OF.
1450 std::map<std::pair<ObjectFifoCreateOp, ObjectFifoPort>,
1451 arith::ConstantOp>
1452 globalIndices;
1453 std::map<std::pair<ObjectFifoCreateOp, ObjectFifoPort>,
1454 arith::ConstantOp>
1455 constantSizes;
1456
1457 int index = 0;
1458 builder.setInsertionPointToStart(&(coreOp.getBody().front()));
1459 Value initVal = arith::ConstantOp::create(
1460 builder, builder.getUnknownLoc(), builder.getI32IntegerAttr(0));
1461 coreOp.walk([&](ObjectFifoAcquireOp acqOp) {
1462 ObjectFifoCreateOp op = acqOp.getObjectFifo();
1463 ObjectFifoPort port = acqOp.getPort();
1464 if (fifoSizes.find({op, port}) == fifoSizes.end()) {
1465 fifoSizes[{op, port}] = op.size();
1466 auto indexOp = arith::ConstantOp::create(
1467 builder, initVal.getLoc(), builder.getIndexAttr(index));
1468 globalIndices[{op, port}] = indexOp;
1469 index++;
1470 auto size =
1471 arith::ConstantOp::create(builder, indexOp.getLoc(),
1472 builder.getI32IntegerAttr(op.size()));
1473 constantSizes[{op, port}] = size;
1474 }
1475 });
1476 builder.setInsertionPoint(coreOp);
1477 auto memrefTy =
1478 MemRefType::get(SmallVector<int64_t>{(int64_t)fifoSizes.size()},
1479 builder.getI32Type());
1480 auto globalNextIndex = BufferOp::create(
1481 builder, builder.getUnknownLoc(), memrefTy, coreOp.getTile(),
1482 /*sym_name*/ nullptr, /*address*/ nullptr,
1483 /*initial_value*/ nullptr, /*mem_bank*/ nullptr);
1484
1485 // Initialize all counters in the global buffers to 0.
1486 for (auto i : constantSizes) {
1487 builder.setInsertionPointAfter(i.second);
1488 memref::StoreOp::create(
1489 builder, builder.getUnknownLoc(), initVal, globalNextIndex,
1490 ValueRange(ArrayRef({globalIndices[i.first].getResult()})));
1491 }
1492
1493 // Walk the code:
1494 // - after each ObjectFifoReleaseOp:
1495 // - globalNextIndex: add #rel modulo objfifo depth
1496 // - before each ObjectFifoAcquireOp:
1497 // - globalNextIndex: load index and use it to index_switch (one
1498 // IndexSwithOp per AccessOp)
1499 WalkResult res = coreOp.walk([&](Operation *op) {
1500 if (auto relOp = dyn_cast<ObjectFifoReleaseOp>(op)) {
1501 ObjectFifoCreateOp createOp = relOp.getObjectFifo();
1502 ObjectFifoPort port = relOp.getPort();
1503 updateGlobalNextIndex(builder, relOp, globalNextIndex,
1504 globalIndices[{createOp, port}],
1505 constantSizes[{createOp, port}]);
1506 }
1507 if (auto acqOp = dyn_cast<ObjectFifoAcquireOp>(op)) {
1508 std::vector<ObjectFifoSubviewAccessOp> accessOps;
1509 for (auto u : acqOp->getUsers())
1510 if (auto accessOp = dyn_cast<ObjectFifoSubviewAccessOp>(u))
1511 accessOps.push_back(accessOp);
1512
1513 for (auto accessOp : accessOps) {
1514 ObjectFifoCreateOp createOp = acqOp.getObjectFifo();
1515 ObjectFifoPort port = acqOp.getPort();
1516
1517 // Single switch case
1518 if (fifoSizes[{createOp, port}] == 1)
1519 return WalkResult::advance();
1520
1521 // Create a switch for each subview access
1522 builder.setInsertionPointAfter(accessOp);
1523 auto switchIndexAsInteger = memref::LoadOp::create(
1524 builder, builder.getUnknownLoc(), globalNextIndex,
1525 ValueRange(
1526 ArrayRef({globalIndices[{createOp, port}].getResult()})));
1527 auto switchIndex = arith::IndexCastOp::create(
1528 builder, builder.getUnknownLoc(), builder.getIndexType(),
1529 switchIndexAsInteger);
1530 unsigned caseRegionCounts = fifoSizes[{createOp, port}];
1531 SmallVector<int64_t, 4> caseValues;
1532 for (int i = 0; i < fifoSizes[{createOp, port}]; ++i) {
1533 caseValues.push_back(i);
1534 }
1535 auto cases =
1536 DenseI64ArrayAttr::get(builder.getContext(), caseValues);
1537 auto switchOp = scf::IndexSwitchOp::create(
1538 builder, switchIndex.getLoc(),
1539 TypeRange({state.buffersPerFifo[createOp][0].getType()}),
1540 switchIndex, cases, caseRegionCounts);
1541 // Create default case of IndexSwitchOp
1542 builder.createBlock(&switchOp.getDefaultRegion());
1543 auto bufferIndex = (accessOp.getIndex()) % createOp.size();
1544 builder.setInsertionPointToStart(&(switchOp.getDefaultBlock()));
1545 scf::YieldOp::create(
1546 builder, builder.getUnknownLoc(),
1547 state.buffersPerFifo[createOp][bufferIndex].getResult());
1548 for (int i = 0; i < fifoSizes[{createOp, port}]; ++i) {
1549 // Create other cases of IndexSwitchOp
1550 builder.createBlock(&switchOp.getCaseRegions()[i]);
1551 builder.setInsertionPoint(&switchOp.getCaseBlock(i),
1552 switchOp.getCaseBlock(i).begin());
1553 int bufferToBeAccesed =
1554 (accessOp.getIndex() + i) % fifoSizes[{createOp, port}];
1555 scf::YieldOp::create(
1556 builder, switchOp.getCaseRegions()[i].getLoc(),
1557 state.buffersPerFifo[createOp][bufferToBeAccesed]
1558 .getResult());
1559 }
1560
1561 // Replace all uses of accessed objectfifo buffers with
1562 // results of switchOps
1563 accessOp.getOutput().replaceAllUsesWith(switchOp.getResult(0));
1564 }
1565 }
1566 return WalkResult::advance();
1567 });
1568 if (res.wasInterrupted())
1569 return failure();
1570 }
1571 }
1572 return success();
1573 }
1574
1575 /// Function used to create a UseLockOp based on input parameters.
1576 /// acc is an accumulator map that tracks the indices of the next locks to
1577 /// acquire (or release). Uses op to find index of acc for next lockID.
1578 /// Updates acc.
1579 void createUseLocks(OpBuilder &builder, ObjectFifoCreateOp op,
1580 ObjectFifoPort port,
1581 DenseMap<std::pair<ObjectFifoCreateOp, int>, int> &acc,
1582 int numLocks, LockAction lockAction,
1583 ObjectFifoState &state) {
1584 ObjectFifoCreateOp target = op;
1585 auto portNum = port == ObjectFifoPort::Produce ? 0 : 1;
1586 if (auto linkOp = getOptionalLinkOp(op))
1587 if (state.objFifoLinks.find(*linkOp) != state.objFifoLinks.end())
1588 target = state.objFifoLinks[*linkOp];
1589
1590 auto dev = op->getParentOfType<DeviceOp>();
1591 if (!dev.getTargetModel().hasProperty(AIETargetModel::UsesSemaphoreLocks)) {
1592
1593 if (state.locksPerFifo[target].size() == 0) {
1594 for (int i = 0; i < numLocks; i++) {
1595 int lockID = acc[{op, portNum}];
1596 acc[{op, portNum}] =
1597 (lockID + 1) % op.size(); // update to next objFifo elem
1598 }
1599 return;
1600 }
1601
1602 int lockMode = 0;
1603 if ((port == ObjectFifoPort::Produce &&
1604 lockAction == LockAction::Release) ||
1605 (port == ObjectFifoPort::Consume &&
1606 lockAction == LockAction::Acquire))
1607 lockMode = 1;
1608 for (int i = 0; i < numLocks; i++) {
1609 int lockID = acc[{op, portNum}];
1610 UseLockOp::create(builder, builder.getUnknownLoc(),
1611 state.locksPerFifo[target][lockID], lockAction,
1612 lockMode);
1613 acc[{op, portNum}] =
1614 (lockID + 1) % op.size(); // update to next objFifo elem
1615 }
1616 } else {
1617 if (numLocks == 0)
1618 return;
1619
1620 if (state.locksPerFifo[target].size() == 0) {
1621 acc[{op, portNum}] = (acc[{op, portNum}] + numLocks) %
1622 op.size(); // update to next objFifo elem
1623 return;
1624 }
1625
1626 // search for the correct lock based on the port of the acq/rel
1627 // operation e.g. acq as consumer is the read lock (second)
1628 LockOp lock;
1629 if (lockAction == LockAction::AcquireGreaterEqual) {
1630 if (port == ObjectFifoPort::Produce)
1631 lock = state.locksPerFifo[target][0];
1632 else
1633 lock = state.locksPerFifo[target][1];
1634 } else {
1635 if (port == ObjectFifoPort::Produce)
1636 lock = state.locksPerFifo[target][1];
1637 else
1638 lock = state.locksPerFifo[target][0];
1639 }
1640 UseLockOp::create(builder, builder.getUnknownLoc(), lock, lockAction,
1641 numLocks);
1642 acc[{op, portNum}] = (acc[{op, portNum}] + numLocks) %
1643 op.size(); // update to next objFifo elem
1644 }
1645 }
1646
1647 /// Function used to check whether op is already contained in map.
1648 /// If it is then return the associated int, if not create new entry and
1649 /// return 0.
1651 DenseMap<std::pair<ObjectFifoCreateOp, int>, int> &map,
1652 std::pair<ObjectFifoCreateOp, int> pair) {
1653 if (map.find(pair) == map.end()) {
1654 map[pair] = 0;
1655 return 0;
1656 }
1657 return map[pair];
1658 }
1659
1660 /// Function used to add an external buffer to the externalBuffersPerFifo map.
1661 void addExternalBuffer(ObjectFifoCreateOp fifo, ExternalBufferOp buff,
1662 ObjectFifoState &state) {
1663 if (state.externalBuffersPerFifo.find(fifo) ==
1664 state.externalBuffersPerFifo.end()) {
1665 std::vector<ExternalBufferOp> buffs;
1666 state.externalBuffersPerFifo[fifo] = buffs;
1667 }
1668 state.externalBuffersPerFifo[fifo].push_back(buff);
1669 }
1670
1671 /// Function used to detect all external buffers associated with parent
1672 /// objectFifo and tile then map them to child objectFifo.
1673 void detectExternalBuffers(DeviceOp &device, ObjectFifoCreateOp parent,
1674 ObjectFifoCreateOp child, Value tile,
1675 ObjectFifoState &state) {
1676 for (auto regOp : device.getOps<ObjectFifoRegisterExternalBuffersOp>())
1677 if (auto objFifo = regOp.getObjectFifo();
1678 regOp.getTile() == tile && objFifo == parent)
1679 for (auto extBuff : regOp.getExternalBuffers())
1680 addExternalBuffer(child, extBuff.getDefiningOp<ExternalBufferOp>(),
1681 state);
1682 }
1683
1684 /// Function used to replace uses of split objectFifos.
1685 void replaceSplitFifo(ObjectFifoCreateOp originalOp, ObjectFifoCreateOp newOp,
1686 TileOp tile) {
1687 auto original =
1688 originalOp->getAttrOfType<StringAttr>(SymbolTable::getSymbolAttrName());
1689 auto newSymbol =
1690 newOp->getAttrOfType<StringAttr>(SymbolTable::getSymbolAttrName());
1691 for (auto user : tile->getUsers())
1692 if (isa<CoreOp>(user))
1693 if (auto res =
1694 SymbolTable::replaceAllSymbolUses(original, newSymbol, user);
1695 res.failed())
1696 llvm_unreachable("unreachable");
1697 }
1698
1699 /// Function used to find the size of an objectFifo after split based on
1700 /// the maximum number of elements (of the original objectFifo) acquired
1701 /// by a process running on given tile. If no CoreOp exists for this tile
1702 /// return 0.
1703 int findObjectFifoSize(DeviceOp &device, Value tile,
1704 ObjectFifoCreateOp objFifo) {
1705 if (objFifo.size() == 0)
1706 return 0;
1707
1708 // if memTile, size is equal to objFifo size
1709 if (tile.getDefiningOp<TileOp>().isMemTile())
1710 return objFifo.size();
1711
1712 // if shimTile, size is equal to number of external buffers
1713 if (tile.getDefiningOp<TileOp>().isShimTile())
1714 for (auto regOp : device.getOps<ObjectFifoRegisterExternalBuffersOp>()) {
1715 if (regOp.getTile() == tile)
1716 return regOp.getExternalBuffers().size();
1717 }
1718
1719 int maxAcquire = 0;
1720 for (auto coreOp : device.getOps<CoreOp>())
1721 if (coreOp.getTile() == tile)
1722 coreOp.walk([&](ObjectFifoAcquireOp acqOp) {
1723 if (auto createOp = acqOp.getObjectFifo(); createOp == objFifo)
1724 if (acqOp.acqNumber() > maxAcquire)
1725 maxAcquire = acqOp.acqNumber();
1726 });
1727
1728 if (maxAcquire > 0) {
1729 if (maxAcquire == 1 && objFifo.size() == 1)
1730 return 1;
1731 return maxAcquire + 1;
1732 // +1 because objectFifo size is always 1 bigger than maxAcquire to allow
1733 // for prefetching: simplest case scenario is at least a ping-pong buffer
1734 }
1735
1736 return objFifo.size();
1737 }
1738
1739 /// Function used to generate, from an objectFifo with a shimTile endpoint, a
1740 /// shimDMAAllocationOp containing the channelDir, channelIndex and
1741 /// shimTile reference assigned by the objectFifo lowering.
1742 void createObjectFifoAllocationInfo(OpBuilder &builder, MLIRContext *ctx,
1743 ObjectFifoCreateOp &objFifoOp,
1744 TileOp shimTile, DMAChannelDir channelDir,
1745 int channelIndex, bool plio,
1746 std::optional<PacketInfoAttr> packet) {
1747 PacketInfoAttr packetInfo = nullptr;
1748 if (packet)
1749 packetInfo = *packet;
1750 std::string alloc_name = getShimAllocationName(objFifoOp.getName());
1751 // SymbolRefAttr::get(ctx, objFifoOp.getName())
1752 ShimDMAAllocationOp::create(
1753 builder, builder.getUnknownLoc(), StringAttr::get(ctx, alloc_name),
1754 shimTile.getResult(), DMAChannelDirAttr::get(ctx, channelDir),
1755 builder.getI64IntegerAttr(channelIndex), builder.getBoolAttr(plio),
1756 packetInfo);
1757 }
1758
1759 static std::string getShimAllocationName(llvm::StringRef objFifoName) {
1760 return (objFifoName + "_shim_alloc").str();
1761 }
1762
1763 /// Function used to verify that an objectfifo is present in at most one
1764 /// ObjectFifoLinkOp.
1765 LogicalResult verifyObjectFifoLinks(DeviceOp &device) {
1766 DenseSet<ObjectFifoCreateOp> objectfifoset;
1767 bool hasError = false;
1768 for (ObjectFifoLinkOp link : device.getOps<ObjectFifoLinkOp>()) {
1769 for (ObjectFifoCreateOp inOf : link.getInputObjectFifos()) {
1770 if (objectfifoset.count(inOf)) {
1771 inOf.emitOpError("objectfifo cannot be in more than one "
1772 "ObjectFifoLinkOp");
1773 hasError = true;
1774 }
1775 objectfifoset.insert(inOf);
1776 }
1777 for (ObjectFifoCreateOp outOf : link.getOutputObjectFifos()) {
1778 if (objectfifoset.count(outOf)) {
1779 outOf.emitOpError("objectfifo cannot be in more than one "
1780 "ObjectFifoLinkOp");
1781 hasError = true;
1782 }
1783 objectfifoset.insert(outOf);
1784 }
1785 }
1786 return hasError ? failure() : success();
1787 }
1788
1789 /// Account for already used packet IDs and return next available ID.
1790 int getStartPacketID(DeviceOp &device) {
1791 int packetID = 0;
1792 for (PacketFlowOp packetflow : device.getOps<PacketFlowOp>()) {
1793 if (packetflow.getID() > packetID) {
1794 // compute next available ID
1795 packetID = packetflow.getID() + 1;
1796 }
1797 }
1798 return packetID;
1799 }
1800
1801 /// Helper function to assign DMA channel indices for FIFOs based on
1802 /// cross-tile conditions
1804 DMAChannelAnalysis &dmaAnalysis,
1805 const std::map<ObjectFifoCreateOp, bool> &crossTileInfos,
1806 std::map<ObjectFifoCreateOp, int> &fifo_dma_channel_index,
1807 bool assignCrossTileOnly, ObjectFifoState &state) {
1808 for (auto &[producer, consumers] : state.splitFifos) {
1809 // Check if we should process this producer based on cross-tile condition
1810 bool shouldProcessProducer = assignCrossTileOnly
1811 ? crossTileInfos.at(producer)
1812 : !crossTileInfos.at(producer);
1813
1814 if (shouldProcessProducer) {
1815 bool requiresAdjacentTileAccessChannels = crossTileInfos.at(producer);
1816 int channelIndex = dmaAnalysis.getDMAChannelIndex(
1817 producer.getProducerTileOp(), DMAChannelDir::MM2S,
1818 requiresAdjacentTileAccessChannels);
1819 fifo_dma_channel_index[producer] = channelIndex;
1820 }
1821
1822 for (auto consumer : consumers) {
1823 // Check if we should process this consumer based on cross-tile
1824 // condition
1825 bool shouldProcessConsumer = assignCrossTileOnly
1826 ? crossTileInfos.at(consumer)
1827 : !crossTileInfos.at(consumer);
1828
1829 if (shouldProcessConsumer) {
1830 bool requiresAdjacentTileAccessChannels = crossTileInfos.at(consumer);
1831 int channelIndex = dmaAnalysis.getDMAChannelIndex(
1832 consumer.getProducerTileOp(), DMAChannelDir::S2MM,
1833 requiresAdjacentTileAccessChannels);
1834 fifo_dma_channel_index[consumer] = channelIndex;
1835 }
1836 }
1837 }
1838 }
1839
1840 void runOnOperation() override {
1841
1842 DeviceOp device = getOperation();
1843
1844 // Create local state for this device operation - ensures thread and
1845 // multi-device safety
1846 ObjectFifoState state;
1847
1848 LockAnalysis lockAnalysis(device);
1849 DMAChannelAnalysis dmaAnalysis(device);
1850 OpBuilder builder = OpBuilder::atBlockTerminator(device.getBody());
1851 auto ctx = device->getContext();
1852 auto producerWireType = WireBundle::DMA;
1853 auto consumerWireType = WireBundle::DMA;
1854 std::set<TileOp>
1855 objectFifoTiles; // track cores to check for loops during unrolling
1856
1857 if (failed(verifyObjectFifoLinks(device)))
1858 return signalPassFailure();
1859
1860 //===------------------------------------------------------------------===//
1861 // Split objectFifos into a consumer end and producer end if needed
1862 //===------------------------------------------------------------------===//
1863 // We are going to create additional createObjectFifoOps, so get a copy of
1864 // all "original" ones before the loop to avoid looping over newly created
1865 // ones.
1866 std::vector<ObjectFifoCreateOp> createFifoOps;
1867 auto range = device.getOps<ObjectFifoCreateOp>();
1868 createFifoOps.insert(createFifoOps.end(), range.begin(), range.end());
1869 for (auto createOp : createFifoOps) {
1870 std::vector<ObjectFifoCreateOp> splitConsumerFifos;
1871 int consumerIndex = 0;
1872 int consumerDepth = createOp.size();
1873 ArrayRef<BDDimLayoutArrayAttr> consumerDims =
1874 createOp.getDimensionsFromStreamPerConsumer();
1875
1876 // Only FIFOs using DMA are split into two ends;
1877 // skip in shared memory case
1878 if (int share_direction = 0;
1879 !requiresDMAs(createOp, share_direction, state)) {
1880 continue;
1881 }
1882
1883 for (auto consumerTile : createOp.getConsumerTiles()) {
1884 auto consumerTileOp = dyn_cast<TileOp>(consumerTile.getDefiningOp());
1885
1886 if (isa<ArrayAttr>(createOp.getElemNumber())) {
1887 // +1 to account for 1st depth (producer)
1888 consumerDepth = createOp.size(consumerIndex + 1);
1889 } else {
1890 consumerDepth = findObjectFifoSize(device, consumerTileOp, createOp);
1891 }
1892
1893 builder.setInsertionPointAfter(createOp);
1894 auto datatype = llvm::cast<AIEObjectFifoType>(createOp.getElemType());
1895 auto consumerObjFifoSize =
1896 builder.getIntegerAttr(builder.getI32Type(), consumerDepth);
1897 // rename and replace split objectFifo
1898 std::string consumerFifoName;
1899 if (createOp.getConsumerTiles().size() > 1) {
1900 consumerFifoName = createOp.name().str() + "_" +
1901 std::to_string(consumerIndex) + "_cons";
1902 } else {
1903 consumerFifoName = createOp.name().str() + "_cons";
1904 }
1905 BDDimLayoutArrayAttr emptyDims =
1906 BDDimLayoutArrayAttr::get(builder.getContext(), {});
1907 BDDimLayoutArrayAttr singletonFromStreamDims =
1908 BDDimLayoutArrayAttr::get(
1909 builder.getContext(),
1910 ArrayRef<BDDimLayoutAttr>{consumerDims[consumerIndex]});
1911 BDDimLayoutArrayArrayAttr fromStreamDims =
1912 BDDimLayoutArrayArrayAttr::get(builder.getContext(),
1913 singletonFromStreamDims);
1914
1915 ObjectFifoCreateOp consumerFifo = createObjectFifo(
1916 builder, datatype, consumerFifoName, consumerTile, consumerTile,
1917 consumerObjFifoSize, emptyDims, fromStreamDims);
1918 if (createOp.getDisableSynchronization())
1919 consumerFifo.setDisableSynchronization(true);
1920 // Propagate iter_count attribute from the original createOp
1921 // to the new consumerFifo
1922 if (auto bdChainIterCount = createOp.getIterCount()) {
1923 consumerFifo.setIterCountAttr(
1924 builder.getI32IntegerAttr(*bdChainIterCount));
1925 }
1926 replaceSplitFifo(createOp, consumerFifo, consumerTileOp);
1927 if (createOp.getAieStream()) {
1928 int streamEnd = createOp.getAieStream().value();
1929 if (streamEnd > 0) {
1930 consumerFifo->setAttr("aie_stream",
1931 builder.getI32IntegerAttr(streamEnd));
1932 consumerFifo->setAttr(
1933 "aie_stream_port",
1934 builder.getI32IntegerAttr(createOp.getAieStreamPort().value()));
1935 }
1936 if (streamEnd == 1) {
1937 createOp->removeAttr("aie_stream");
1938 createOp->removeAttr("aie_stream_port");
1939 }
1940 }
1941
1942 // identify external buffers that were registered to the consumer fifo
1943 if (consumerTile.getDefiningOp<TileOp>().isShimTile())
1944 detectExternalBuffers(device, createOp, consumerFifo, consumerTile,
1945 state);
1946
1947 // record that this objectFifo was split; it will require DMA config
1948 splitConsumerFifos.push_back(consumerFifo);
1949
1950 // update the linkOp if the split objFifo was originally its start point
1951 if (auto linkOp = getOptionalLinkOp(createOp))
1952 for (ObjectFifoCreateOp fifoIn : linkOp->getInputObjectFifos())
1953 if (fifoIn.name() == createOp.name() &&
1954 consumerTile == *linkOp->getOptionalSharedTile())
1955 if (failed(SymbolTable::replaceAllSymbolUses(
1956 createOp, consumerFifo.name(), linkOp->getOperation())))
1957 llvm::report_fatal_error("unable to update all symbol uses");
1958
1959 consumerIndex++;
1960 }
1961
1962 if (!splitConsumerFifos.empty()) {
1963 state.splitFifos.emplace_back(createOp, splitConsumerFifos);
1964 }
1965 }
1966
1967 //===------------------------------------------------------------------===//
1968 // - Create objectFifo buffers and locks.
1969 // - Populate a list of tiles containing objectFifos for later processing of
1970 // the acquires/releases (uses of the FIFO).
1971 // - Global release counter tracker to keep track of the objectFifo state
1972 //===------------------------------------------------------------------===//
1973 for (auto createOp : device.getOps<ObjectFifoCreateOp>()) {
1974
1975 int share_direction = 0;
1976 bool shared = !requiresDMAs(createOp, share_direction, state);
1977
1978 // add all tiles that contain an objectFifo to objectFifoTiles for later
1979 // loop unrolling pass
1980 objectFifoTiles.insert(createOp.getProducerTileOp());
1981 for (auto consumerTile : createOp.getConsumerTiles()) {
1982 auto consumerTileOp = dyn_cast<TileOp>(consumerTile.getDefiningOp());
1983 objectFifoTiles.insert(consumerTileOp);
1984 }
1985
1986 // identify external buffers that were registered to
1987 // the producer objectFifo
1988 if (createOp.getProducerTileOp().isShimTile())
1989 detectExternalBuffers(device, createOp, createOp,
1990 createOp.getProducerTile(), state);
1991
1992 // if split, the necessary size for producer fifo might change
1993 if (shared) {
1994 createObjectFifoElements(builder, lockAnalysis, createOp,
1995 share_direction, state);
1996 } else {
1997 if (isa<ArrayAttr>(createOp.getElemNumber()))
1998 createOp.setElemNumberAttr(
1999 builder.getI32IntegerAttr(createOp.size()));
2000 else {
2001 if (!createOp.getInitValues().has_value()) {
2002
2003 int prodMaxAcquire = findObjectFifoSize(
2004 device, createOp.getProducerTileOp(), createOp);
2005 createOp.setElemNumberAttr(
2006 builder.getI32IntegerAttr(prodMaxAcquire));
2007 }
2008 }
2009 createObjectFifoElements(builder, lockAnalysis, createOp,
2010 share_direction, state);
2011 }
2012 }
2013
2014 //===------------------------------------------------------------------===//
2015 // Create flows and tile DMAs
2016 //===------------------------------------------------------------------===//
2017 // Only the objectFifos we split above require DMA communication; the others
2018 // rely on shared memory and share the same buffers.
2019
2020 // analyze cross-tile buffer allocations and print results
2021 auto crossTileInfos = analyzeCrossTileFIFOBuffers(state);
2022
2023 // maps ends of split FIFO to DMA channels
2024 std::map<ObjectFifoCreateOp, int> fifo_dma_channel_index;
2025
2026 // assign channel indices for FIFOs with cross-tile issues first
2027 assignDMAChannelIndices(dmaAnalysis, crossTileInfos, fifo_dma_channel_index,
2028 true, state);
2029 // then assign channel indices for FIFOs without cross-tile issues
2030 assignDMAChannelIndices(dmaAnalysis, crossTileInfos, fifo_dma_channel_index,
2031 false, state);
2032
2033 int packetID = getStartPacketID(device);
2034 for (auto &[producer, consumers] : state.splitFifos) {
2035 int producerChanIndex = -1;
2036 DMAChannel producerChan;
2037 PacketFlowOp packetflow;
2038 if (producer.getAieStream()) {
2039 int prodStreamEnd = producer.getAieStream().value();
2040 if (prodStreamEnd == 0 || prodStreamEnd == 2) {
2041 producerChanIndex = producer.getAieStreamPort().value();
2042 producerChan = {DMAChannelDir::MM2S, producerChanIndex};
2043 dmaAnalysis.checkAIEStreamIndex(producer.getProducerTileOp(),
2044 producerChan);
2045 }
2046 } else {
2047 producerChanIndex = fifo_dma_channel_index[producer];
2048 if (producerChanIndex == -1) {
2049 producer.getProducerTileOp().emitOpError(
2050 "number of output DMA channel exceeded!");
2051 return signalPassFailure();
2052 }
2053 producerChan = {DMAChannelDir::MM2S, producerChanIndex};
2054 std::optional<PacketInfoAttr> bdPacket = {};
2055 if (clPacketSwObjectFifos) {
2056 if (packetID > 31) {
2057 device.emitOpError("max number of packet IDs reached");
2058 return signalPassFailure();
2059 }
2060 bdPacket = {AIE::PacketInfoAttr::get(ctx, /*pkt_type*/ 0,
2061 /*pkt_id*/ packetID)};
2062 packetID++;
2063 }
2064 createDMA(device, builder, producer, producerChan.direction,
2065 producerChan.channel, 0, producer.getDimensionsToStreamAttr(),
2066 producer.getPadDimensionsAttr(), bdPacket, state);
2067
2068 // generate objectFifo allocation info
2069 builder.setInsertionPoint(device.getBody()->getTerminator());
2070 if (producer.getProducerTileOp().isShimTile())
2071 createObjectFifoAllocationInfo(
2072 builder, ctx, producer, producer.getProducerTileOp(),
2073 producerChan.direction, producerChan.channel, producer.getPlio(),
2074 bdPacket);
2075
2076 if (clPacketSwObjectFifos) {
2077 // create packet flow
2078 builder.setInsertionPointAfter(producer);
2079 packetflow = builder.create<PacketFlowOp>(
2080 builder.getUnknownLoc(),
2081 builder.getIntegerAttr(builder.getI8Type(), bdPacket->getPktId()),
2082 nullptr, nullptr);
2083 {
2084 OpBuilder::InsertionGuard g(builder);
2085 builder.setInsertionPointToStart(
2086 &packetflow.getRegion().emplaceBlock());
2087 builder.create<EndOp>(builder.getUnknownLoc());
2088 }
2089 }
2090 }
2091
2092 for (auto consumer : consumers) {
2093 // if not aie stream, create consumer tile DMA
2094 int consumerChanIndex = -1;
2095 DMAChannel consumerChan;
2096 if (consumer.getAieStream()) {
2097 int consStreamEnd = consumer.getAieStream().value();
2098 if (consStreamEnd == 1 || consStreamEnd == 2) {
2099 consumerChanIndex = consumer.getAieStreamPort().value();
2100 consumerChan = {DMAChannelDir::S2MM, consumerChanIndex};
2101 dmaAnalysis.checkAIEStreamIndex(consumer.getProducerTileOp(),
2102 consumerChan);
2103 }
2104 } else {
2105 consumerChanIndex = fifo_dma_channel_index[consumer];
2106 if (consumerChanIndex == -1) {
2107 consumer.getProducerTileOp().emitOpError(
2108 "number of input DMA channel exceeded!");
2109 return signalPassFailure();
2110 }
2111 consumerChan = {DMAChannelDir::S2MM, consumerChanIndex};
2112 BDDimLayoutArrayAttr consumerDims =
2113 consumer.getDimensionsFromStreamPerConsumer()[0];
2114 createDMA(device, builder, consumer, consumerChan.direction,
2115 consumerChan.channel, 1, consumerDims, nullptr, {}, state);
2116
2117 // generate objectFifo allocation info
2118 builder.setInsertionPoint(device.getBody()->getTerminator());
2119 if (!consumer.getAieStream()) {
2120 // generate objectFifo allocation info
2121 builder.setInsertionPoint(device.getBody()->getTerminator());
2122 if (consumer.getProducerTileOp().isShimTile())
2123 createObjectFifoAllocationInfo(
2124 builder, ctx, producer, consumer.getProducerTileOp(),
2125 consumerChan.direction, consumerChan.channel,
2126 producer.getPlio(), {});
2127 }
2128
2129 if (clPacketSwObjectFifos) {
2130 builder.setInsertionPointToStart(&packetflow.getPorts().front());
2131 builder.create<PacketDestOp>(builder.getUnknownLoc(),
2132 consumer.getProducerTile(),
2133 WireBundle::DMA, consumerChan.channel);
2134 }
2135 }
2136
2137 // If we have PLIO then figure out the direction and make that a PLIO
2138 if (producer.getPlio()) {
2139 producerWireType = producer.getProducerTileOp().isShimTile()
2140 ? WireBundle::PLIO
2141 : WireBundle::DMA;
2142 consumerWireType = consumer.getProducerTileOp().isShimTile()
2143 ? WireBundle::PLIO
2144 : WireBundle::DMA;
2145 } else {
2146 producerWireType = WireBundle::DMA;
2147 consumerWireType = WireBundle::DMA;
2148 if (producer.getAieStream()) {
2149 int prodStreamEnd = producer.getAieStream().value();
2150 if (prodStreamEnd == 0 || prodStreamEnd == 2)
2151 producerWireType = WireBundle::Core;
2152 }
2153 if (consumer.getAieStream()) {
2154 int consumerStreamEnd = consumer.getAieStream().value();
2155 if (consumerStreamEnd == 1 || consumerStreamEnd == 2)
2156 consumerWireType = WireBundle::Core;
2157 }
2158 }
2159
2160 if (!clPacketSwObjectFifos) {
2161 // create flow
2162 builder.setInsertionPointAfter(producer);
2163 FlowOp::create(builder, builder.getUnknownLoc(),
2164 producer.getProducerTile(), producerWireType,
2165 producerChan.channel, consumer.getProducerTile(),
2166 consumerWireType, consumerChan.channel);
2167 }
2168 }
2169
2170 if (clPacketSwObjectFifos) {
2171 builder.setInsertionPointToStart(&packetflow.getPorts().front());
2172 PacketSourceOp::create(builder, builder.getUnknownLoc(),
2173 producer.getProducerTile(), WireBundle::DMA,
2174 producerChan.channel);
2175 }
2176 }
2177
2178 //===------------------------------------------------------------------===//
2179 // Statically unroll for loops or use dynamic objectFifos
2180 //===------------------------------------------------------------------===//
2181 if (clDynamicObjectFifos) {
2182 if (failed(dynamicGlobalObjectFifos(device, builder, objectFifoTiles,
2183 state)))
2184 return signalPassFailure();
2185 } else {
2186 std::set<TileOp> dynamicTiles;
2187 std::set<TileOp> unrollTiles;
2188 for (auto c : device.getOps<CoreOp>()) {
2189 TileOp t = c.getTileOp();
2190 if (objectFifoTiles.count(t) > 0) {
2191 if (c.getDynamicObjfifoLowering().has_value()) {
2192 if (c.getDynamicObjfifoLowering().value())
2193 dynamicTiles.insert(t);
2194 else
2195 unrollTiles.insert(t);
2196 } else {
2197 unrollTiles.insert(t);
2198 }
2199 }
2200 }
2201 if (failed(
2202 dynamicGlobalObjectFifos(device, builder, dynamicTiles, state)))
2203 return signalPassFailure();
2204 if (failed(unrollForLoops(device, builder, unrollTiles)))
2205 return signalPassFailure();
2206 }
2207
2208 //===------------------------------------------------------------------===//
2209 // Replace ops
2210 //===------------------------------------------------------------------===//
2211 for (auto coreOp : device.getOps<CoreOp>()) {
2212 DenseMap<ObjectFifoAcquireOp, std::vector<BufferOp *>>
2213 subviews; // maps each "subview" to its buffer references (subviews
2214 // are created by AcquireOps)
2215 DenseMap<std::pair<ObjectFifoCreateOp, int>, std::vector<int>>
2216 acquiresPerFifo; // maps each objFifo to indices of buffers acquired
2217 // in latest subview of that objFifo (useful to
2218 // cascade acquired elements to next AcquireOp)
2219 DenseMap<std::pair<ObjectFifoCreateOp, int>,
2220 std::vector<ObjectFifoReleaseOp>>
2221 releaseOps; // useful to check which ReleaseOp has taken place before
2222 // an AcquireOp per objFifo
2223 DenseMap<std::pair<ObjectFifoCreateOp, int>, int>
2224 acqPerFifo; // maps each objFifo to its next index to acquire within
2225 // this CoreOp
2226 DenseMap<std::pair<ObjectFifoCreateOp, int>, int>
2227 relPerFifo; // maps each objFifo to its next index to release within
2228 // this CoreOp
2229
2230 //===----------------------------------------------------------------===//
2231 // Replace objectFifo.release ops
2232 //===----------------------------------------------------------------===//
2233 WalkResult res = coreOp.walk([&](ObjectFifoReleaseOp releaseOp) {
2234 builder.setInsertionPointAfter(releaseOp);
2235 ObjectFifoCreateOp op = releaseOp.getObjectFifo();
2236 auto port = releaseOp.getPort();
2237 auto portNum = port == ObjectFifoPort::Produce ? 0 : 1;
2238 auto core = releaseOp->getParentOfType<CoreOp>();
2239
2240 if (auto linkOp = getOptionalLinkOp(op)) {
2241 if (core.getTile() == *linkOp->getOptionalSharedTile()) {
2242 releaseOp->emitOpError("currently cannot access objectFifo used in "
2243 "ObjectFifoLinkOp");
2244 return WalkResult::interrupt();
2245 ;
2246 }
2247 }
2248
2249 if (op.getAieStream().has_value()) {
2250 int streamEnd = op.getAieStream().value();
2251 if (streamEnd == 2 || streamEnd == portNum)
2252 releaseOp->emitOpError("cannot release from objectfifo stream "
2253 "port");
2254 return WalkResult::interrupt();
2255 }
2256
2257 // update index of next element to release for this objectFifo
2258 updateAndReturnIndex(relPerFifo, {op, portNum});
2259
2260 // release locks
2261 int numLocks = releaseOp.relNumber();
2262 // account for repetition
2263 if (op.getRepeatCount().has_value())
2264 numLocks *= op.getRepeatCount().value();
2265 createUseLocks(builder, op, port, relPerFifo, numLocks,
2266 LockAction::Release, state);
2267
2268 // register release op
2269 if (releaseOps.find({op, portNum}) != releaseOps.end()) {
2270 releaseOps[{op, portNum}].push_back(releaseOp);
2271 } else {
2272 std::vector release = {releaseOp};
2273 releaseOps[{op, portNum}] = release;
2274 }
2275 return WalkResult::advance();
2276 });
2277 if (res.wasInterrupted())
2278 return signalPassFailure();
2279
2280 //===----------------------------------------------------------------===//
2281 // Replace objectFifo.acquire ops
2282 //===----------------------------------------------------------------===//
2283 res = coreOp.walk([&](ObjectFifoAcquireOp acquireOp) {
2284 ObjectFifoCreateOp op = acquireOp.getObjectFifo();
2285 builder.setInsertionPointAfter(acquireOp);
2286 auto port = acquireOp.getPort();
2287 auto portNum = port == ObjectFifoPort::Produce ? 0 : 1;
2288 auto core = acquireOp->getParentOfType<CoreOp>();
2289
2290 auto linkOp = getOptionalLinkOp(op);
2291 if (linkOp) {
2292 if (core.getTile() == *linkOp->getOptionalSharedTile()) {
2293 acquireOp->emitOpError("currently cannot access objectFifo used in "
2294 "ObjectFifoLinkOp");
2295 return WalkResult::interrupt();
2296 ;
2297 }
2298 }
2299
2300 if (op.getAieStream().has_value()) {
2301 int streamEnd = op.getAieStream().value();
2302 if (streamEnd == 2 || streamEnd == portNum)
2303 acquireOp->emitOpError("cannot acquire from objectfifo stream "
2304 "port");
2305 return WalkResult::interrupt();
2306 }
2307
2308 // index of next element to acquire for this objectFifo
2309 int start = updateAndReturnIndex(
2310 acqPerFifo, {op, portNum}); // useful for keeping track of which
2311 // indices are acquired
2312
2313 // check how many elements have been released in between this AcquireOp
2314 // and the previous one
2315 // !!! operations may not be in the same block !!!
2316 int numRel = 0;
2317 for (std::vector<ObjectFifoReleaseOp>::iterator relOp =
2318 releaseOps[{op, portNum}].begin();
2319 relOp != releaseOps[{op, portNum}].end();) {
2320 bool erased = false;
2321 Operation *acqBlockDefOp = acquireOp.getOperation();
2322 do {
2323 Operation *relBlockDefOp = (*relOp).getOperation();
2324 do {
2325 if (acqBlockDefOp->getBlock() == relBlockDefOp->getBlock()) {
2326 if (relBlockDefOp->isBeforeInBlock(acqBlockDefOp)) {
2327 numRel += (*relOp).relNumber();
2328 relOp = releaseOps[{op, portNum}].erase(relOp);
2329 // to ensure that we do not account
2330 // the ReleaseOps again later,
2331 // after the subview is created
2332 erased = true;
2333 }
2334 }
2335 } while ((relBlockDefOp = relBlockDefOp->getParentOp()) &&
2336 !isa<DeviceOp>(relBlockDefOp) && !erased);
2337 } while ((acqBlockDefOp = acqBlockDefOp->getParentOp()) &&
2338 !isa<DeviceOp>(acqBlockDefOp) && !erased);
2339 if (!erased)
2340 ++relOp;
2341 }
2342
2343 // track indices of elements to acquire
2344 std::vector<int> acquiredIndices;
2345 if (!acquiresPerFifo[{op, portNum}].empty()) {
2346 // take into account what has already been acquired by previous
2347 // AcquireOp in program order
2348 acquiredIndices = acquiresPerFifo[{op, portNum}];
2349 // take into account what has been released in-between
2350 if (static_cast<size_t>(numRel) > acquiredIndices.size()) {
2351 acquireOp->emitOpError("cannot release more elements than are "
2352 "already acquired");
2353 return WalkResult::interrupt();
2354 }
2355 for (int i = 0; i < numRel; i++)
2356 acquiredIndices.erase(acquiredIndices.begin());
2357 }
2358
2359 // acquire locks
2360 int numLocks = acquireOp.acqNumber();
2361 int alreadyAcq = acquiredIndices.size();
2362 int numCreate;
2363 if (numLocks > alreadyAcq)
2364 numCreate = numLocks - alreadyAcq;
2365 else
2366 numCreate = 0;
2367
2368 // account for repetition
2369 if (op.getRepeatCount().has_value())
2370 numCreate *= op.getRepeatCount().value();
2371
2372 auto dev = op->getParentOfType<DeviceOp>();
2373 if (auto &targetArch = dev.getTargetModel();
2374 targetArch.getTargetArch() == AIEArch::AIE1)
2375 createUseLocks(builder, op, port, acqPerFifo, numCreate,
2376 LockAction::Acquire, state);
2377 else
2378 createUseLocks(builder, op, port, acqPerFifo, numCreate,
2379 LockAction::AcquireGreaterEqual, state);
2380
2381 // if objFifo was linked with others, find which objFifos
2382 // elements to use
2383 ObjectFifoCreateOp target = op;
2384 if (linkOp)
2385 if (state.objFifoLinks.find(*linkOp) != state.objFifoLinks.end())
2386 target = state.objFifoLinks[*linkOp];
2387
2388 // create subview: buffers that were already acquired + new acquires
2389 for (int i = 0; i < numCreate; i++) {
2390 acquiredIndices.push_back(start);
2391 start = (start + 1) % op.size();
2392 }
2393 std::vector<BufferOp *> subviewRefs;
2394 subviewRefs.reserve(acquiredIndices.size());
2395 for (auto index : acquiredIndices)
2396 subviewRefs.push_back(&state.buffersPerFifo[target][index]);
2397
2398 subviews[acquireOp] = subviewRefs;
2399 acquiresPerFifo[{op, portNum}] = acquiredIndices;
2400
2401 return WalkResult::advance();
2402 });
2403 if (res.wasInterrupted())
2404 return signalPassFailure();
2405
2406 //===----------------------------------------------------------------===//
2407 // Replace subview.access ops
2408 //===----------------------------------------------------------------===//
2409 res = coreOp.walk([&](ObjectFifoSubviewAccessOp accessOp) {
2410 auto acqOp = accessOp.getSubview().getDefiningOp<ObjectFifoAcquireOp>();
2411 if (ObjectFifoCreateOp op = acqOp.getObjectFifo()) {
2412 if (auto linkOp = getOptionalLinkOp(op); linkOp.has_value()) {
2413 if (!linkOp->isDistribute() && !linkOp->isJoin()) {
2414 for (auto consumerTile : op.getConsumerTiles()) {
2415 if (auto consumerTileOp =
2416 dyn_cast<TileOp>(consumerTile.getDefiningOp())) {
2417 int share_dir_value = 0;
2418 bool sharing = isSharedMemory(
2419 op.getProducerTileOp(), consumerTileOp, &share_dir_value);
2420 if (!sharing) {
2421 accessOp->emitOpError(
2422 "currently cannot access objectFifo used in "
2423 "ObjectFifoLinkOp if the tiles don't share memory");
2424 return WalkResult::interrupt();
2425 }
2426 }
2427 }
2428 } else {
2429 accessOp->emitOpError(
2430 "currently cannot access objectFifo used in "
2431 "ObjectFifoLinkOp if it is a distribute or join link");
2432 return WalkResult::interrupt();
2433 }
2434 }
2435 }
2436 accessOp.getOutput().replaceAllUsesWith(
2437 subviews[acqOp][accessOp.getIndex()]->getBuffer());
2438 return WalkResult::advance();
2439 });
2440 if (res.wasInterrupted())
2441 return signalPassFailure();
2442 }
2443
2444 //===------------------------------------------------------------------===//
2445 // Remove old ops
2446 //===------------------------------------------------------------------===//
2447 SetVector<Operation *> opsToErase;
2448 device.walk([&](Operation *op) {
2449 if (isa<ObjectFifoLinkOp, ObjectFifoRegisterExternalBuffersOp,
2450 ObjectFifoAcquireOp, ObjectFifoSubviewAccessOp,
2451 ObjectFifoReleaseOp, ObjectFifoAllocateOp>(op))
2452 opsToErase.insert(op);
2453 });
2454 SmallVector<Operation *> sorted{opsToErase.begin(), opsToErase.end()};
2455 computeTopologicalSorting(sorted);
2456 for (auto *op : llvm::reverse(sorted))
2457 op->erase();
2458
2459 //===------------------------------------------------------------------===//
2460 // Replace any remaining uses of object fifo symbol with symbol of its shim
2461 // dma allocation.
2462 //===------------------------------------------------------------------===//
2463 opsToErase.clear();
2464 for (auto createOp : device.getOps<ObjectFifoCreateOp>()) {
2465 std::string shimAllocName = getShimAllocationName(createOp.getName());
2466 if (failed(SymbolTable::replaceAllSymbolUses(
2467 createOp.getNameAttr(), builder.getStringAttr(shimAllocName),
2468 device))) {
2469 createOp.emitError(
2470 "failed to replace symbol uses with shim allocation");
2471 return signalPassFailure();
2472 }
2473 opsToErase.insert(createOp);
2474 }
2475 for (auto *op : opsToErase) {
2476 op->erase();
2477 }
2478 }
2479};
2480
2481std::unique_ptr<OperationPass<DeviceOp>>
2483 return std::make_unique<AIEObjectFifoStatefulTransformPass>();
2484}
void checkAIEStreamIndex(TileOp tileOp, DMAChannel chan)
Given a tile and DMAChannel, adds entry to aieStreamsPerTile or throws an error if the stream is alre...
int getDMAChannelIndex(TileOp tileOp, DMAChannelDir dir, bool requiresAdjacentTileAccessChannels)
Given a tile and DMAChannelDir, returns next usable channel index for that tile.
int getLockID(TileOp &tileOp)
Given a tile, returns next usable lockID for that tile.
Include the generated interface declarations.
std::unique_ptr< mlir::OperationPass< DeviceOp > > createAIEObjectFifoStatefulTransformPass()
DMAChannel { DMAChannelDir direction DMAChannel
Definition AIEDialect.h:173
const AIETargetModel & getTargetModel(mlir::Operation *op)
void createObjectFifoElements(OpBuilder &builder, LockAnalysis &lockAnalysis, ObjectFifoCreateOp op, int share_direction, ObjectFifoState &state)
Function used to create objectFifo elements and their locks.
int findObjectFifoSize(DeviceOp &device, Value tile, ObjectFifoCreateOp objFifo)
Function used to find the size of an objectFifo after split based on the maximum number of elements (...
void createDMA(DeviceOp &device, OpBuilder &builder, ObjectFifoCreateOp op, DMAChannelDir channelDir, int channelIndex, int lockMode, BDDimLayoutArrayAttr dims, BDPadLayoutArrayAttr pad_dims, std::optional< PacketInfoAttr > bdPacket, ObjectFifoState &state)
Function that either calls createAIETileDMA(), createShimDMA() or createMemTileDMA() based on op tile...
std::vector< LockOp > createObjectFifoLocks(OpBuilder &builder, LockAnalysis &lockAnalysis, ObjectFifoCreateOp op, int numElem, int joinDistribFactor, TileOp creation_tile, int repeatCount, ObjectFifoState &state)
Function used to create objectFifo locks based on target architecture.
Block * findEndOpBlock(Region &r)
Function that returns a pointer to the block of a Region that contains the AIEEndOp.
int calculateCurrentUsedMemory(TileOp targetTile, DenseMap< ObjectFifoCreateOp, std::vector< BufferOp > > &buffersPerFifo, std::vector< BufferOp > &buffers)
Function to calculate total memory usage on a specific tile based on all buffers allocated to that ti...
void createAIETileDMA(DeviceOp &device, OpBuilder &builder, ObjectFifoCreateOp op, DMAChannelDir channelDir, int channelIndex, int lockMode, BDDimLayoutArrayAttr dims, std::optional< PacketInfoAttr > bdPacket, ObjectFifoState &state)
Function used to create a MemOp region with a DMA channel.
void replaceSplitFifo(ObjectFifoCreateOp originalOp, ObjectFifoCreateOp newOp, TileOp tile)
Function used to replace uses of split objectFifos.
void detectExternalBuffers(DeviceOp &device, ObjectFifoCreateOp parent, ObjectFifoCreateOp child, Value tile, ObjectFifoState &state)
Function used to detect all external buffers associated with parent objectFifo and tile then map them...
void createObjectFifoAllocationInfo(OpBuilder &builder, MLIRContext *ctx, ObjectFifoCreateOp &objFifoOp, TileOp shimTile, DMAChannelDir channelDir, int channelIndex, bool plio, std::optional< PacketInfoAttr > packet)
Function used to generate, from an objectFifo with a shimTile endpoint, a shimDMAAllocationOp contain...
LogicalResult verifyObjectFifoLinks(DeviceOp &device)
Function used to verify that an objectfifo is present in at most one ObjectFifoLinkOp.
bool isSharedMemory(TileOp a, TileOp b, int *share_direction)
Function that returns true if two tiles in the AIE array share a memory module.
void assignDMAChannelIndices(DMAChannelAnalysis &dmaAnalysis, const std::map< ObjectFifoCreateOp, bool > &crossTileInfos, std::map< ObjectFifoCreateOp, int > &fifo_dma_channel_index, bool assignCrossTileOnly, ObjectFifoState &state)
Helper function to assign DMA channel indices for FIFOs based on cross-tile conditions.
void updateGlobalNextIndex(OpBuilder &builder, ObjectFifoReleaseOp relOp, BufferOp globalNextIndex, arith::ConstantOp index, arith::ConstantOp size)
std::optional< ObjectFifoLinkOp > getOptionalLinkOp(ObjectFifoCreateOp op)
Function to retrieve ObjectFifoLinkOp of ObjectFifoCreateOp, if it belongs to one.
void createMemTileDMA(DeviceOp &device, OpBuilder &builder, ObjectFifoCreateOp op, DMAChannelDir channelDir, int channelIndex, int lockMode, BDDimLayoutArrayAttr dims, BDPadLayoutArrayAttr padDimensions, std::optional< PacketInfoAttr > bdPacket, ObjectFifoState &state)
Function used to create a MemTileDMAOp region with a DMA channel.
std::optional< ObjectFifoAllocateOp > getOptionalAllocateOp(ObjectFifoCreateOp op)
Function to retrieve ObjectFifoAllocateOp of ObjectFifoCreateOp, if it exists.
void createBdBlock(OpBuilder &builder, ObjectFifoCreateOp op, int lockMode, int acqNum, int relNum, MyOp buff, int offset, int len, DMAChannelDir channelDir, size_t lockIndex, Block *succ, BDDimLayoutArrayAttr dims, BDPadLayoutArrayAttr padDimensions, std::optional< PacketInfoAttr > bdPacket, ObjectFifoState &state, bool distribOrJoin=false)
Function used to create a Bd block.
void createUseLocks(OpBuilder &builder, ObjectFifoCreateOp op, ObjectFifoPort port, DenseMap< std::pair< ObjectFifoCreateOp, int >, int > &acc, int numLocks, LockAction lockAction, ObjectFifoState &state)
Function used to create a UseLockOp based on input parameters.
void createShimDMA(DeviceOp &device, OpBuilder &builder, ObjectFifoCreateOp op, DMAChannelDir channelDir, int channelIndex, int lockMode, BDDimLayoutArrayAttr dims, std::optional< PacketInfoAttr > bdPacket, ObjectFifoState &state)
Function used to create a ShimDMAOp region with a DMA channel.
bool requiresDMAs(ObjectFifoCreateOp createOp, int &share_direction, ObjectFifoState &state)
int updateAndReturnIndex(DenseMap< std::pair< ObjectFifoCreateOp, int >, int > &map, std::pair< ObjectFifoCreateOp, int > pair)
Function used to check whether op is already contained in map.
TileOp findOrCreateTile(OpBuilder &builder, DeviceOp &dev, TileOp hostTile, int col, int row)
Helper function to find a tile at specific coordinates.
static std::string getShimAllocationName(llvm::StringRef objFifoName)
void addExternalBuffer(ObjectFifoCreateOp fifo, ExternalBufferOp buff, ObjectFifoState &state)
Function used to add an external buffer to the externalBuffersPerFifo map.
int getStartPacketID(DeviceOp &device)
Account for already used packet IDs and return next available ID.
LogicalResult unrollForLoops(DeviceOp &device, OpBuilder &builder, std::set< TileOp > objectFifoTiles)
void createBd(OpBuilder &builder, LockOp acqLock, int acqMode, LockAction acqLockAction, LockOp relLock, int relMode, MyOp buff, int offset, int len, Block *succ, BDDimLayoutArrayAttr dims, BDPadLayoutArrayAttr padDimensions, std::optional< PacketInfoAttr > bdPacket)
Function used to create a Bd block.
ObjectFifoCreateOp createObjectFifo(OpBuilder &builder, AIEObjectFifoType datatype, std::string name, Value prodTile, Value consTile, Attribute depth, BDDimLayoutArrayAttr dimensionsToStream, BDDimLayoutArrayArrayAttr dimensionsFromStreamPerConsumer)
std::map< ObjectFifoCreateOp, bool > analyzeCrossTileFIFOBuffers(ObjectFifoState &state)
Function to analyze cross-tile buffer allocations in splitFifos Returns a simple map of (ObjectFifoCr...
LogicalResult dynamicGlobalObjectFifos(DeviceOp &device, OpBuilder &builder, std::set< TileOp > objectFifoTiles, ObjectFifoState &state)
Struct to hold per-device state for the objectFifo transformation.
DenseMap< ObjectFifoCreateOp, std::vector< BufferOp > > buffersPerFifo
std::vector< std::pair< ObjectFifoCreateOp, std::vector< ObjectFifoCreateOp > > > splitFifos
DenseMap< ObjectFifoCreateOp, std::vector< LockOp > > locksPerFifo
DenseMap< ObjectFifoLinkOp, ObjectFifoCreateOp > objFifoLinks
DenseMap< ObjectFifoCreateOp, std::vector< ExternalBufferOp > > externalBuffersPerFifo
std::vector< ObjectFifoCreateOp > splitBecauseLink