MLIR-AIE
AIEObjectFifoStatefulTransform.cpp
Go to the documentation of this file.
1//===- AIEObjectFifoStatefulTransform.cpp ----------------------*- MLIR -*-===//
2//
3// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7// (c) Copyright 2021 Xilinx Inc.
8//
9// Date: October 18th 2021
10//
11//===----------------------------------------------------------------------===//
12
15
16#include "mlir/Analysis/TopologicalSortUtils.h"
17#include "mlir/Dialect/Arith/IR/Arith.h"
18#include "mlir/Dialect/MemRef/IR/MemRef.h"
19#include "mlir/Dialect/SCF/IR/SCF.h"
20#include "mlir/Dialect/SCF/Utils/Utils.h"
21#include "mlir/IR/Attributes.h"
22#include "mlir/Pass/Pass.h"
23#include "mlir/Transforms/DialectConversion.h"
24
25#include "mlir/IR/Operation.h"
26#include "mlir/Interfaces/DataLayoutInterfaces.h"
27
28#include <numeric>
29#include <set>
30
31#include <iostream>
32
33using namespace mlir;
34using namespace xilinx;
35using namespace xilinx::AIE;
36
37#define DEBUG_TYPE "aie-objectFifo-stateful-transform"
38
39#define LOOP_VAR_DEPENDENCY (-2)
40
41//===----------------------------------------------------------------------===//
42// Lock Analysis
43//===----------------------------------------------------------------------===//
45 DenseMap<std::pair<Value, int>, int> locksPerTile;
46
47public:
48 LockAnalysis(DeviceOp &device) {
49 // go over the locks created for each tile and update the index in
50 // locksPerTile
51 device.walk([&](LockOp lockOp) {
52 auto tile = lockOp.getTile();
53 auto lockID = lockOp.getLockIDValue();
54 locksPerTile[{tile, lockID}] = 1;
55 });
56 }
57
58 /// Given a tile, returns next usable lockID for that tile.
59 int getLockID(TileOp &tileOp) {
60 const auto &targetModel = getTargetModel(tileOp);
61 for (unsigned i = 0;
62 i < targetModel.getNumLocks(tileOp.getCol(), tileOp.getRow()); i++)
63 if (int usageCnt = locksPerTile[{tileOp, i}]; usageCnt == 0) {
64 locksPerTile[{tileOp, i}] = 1;
65 return i;
66 }
67 return -1;
68 }
69};
70
71//===----------------------------------------------------------------------===//
72// DMA Channel Analysis
73//===----------------------------------------------------------------------===//
75 DenseMap<std::tuple<Value, DMAChannelDir, int>, int> channelsPerTile;
76
77public:
78 DMAChannelAnalysis(DeviceOp &device) {
79 // go over the channels used for each tile and update channel map
80 for (auto memOp : device.getOps<MemOp>()) {
81 Region &r = memOp.getBody();
82 for (auto &bl : r.getBlocks()) {
83 for (auto op : bl.getOps<DMAStartOp>()) {
84 channelsPerTile[{memOp.getTile(), op.getChannelDir(),
85 op.getChannelIndex()}] = 1;
86 }
87 }
88 }
89 for (auto memOp : device.getOps<MemTileDMAOp>()) {
90 Region &r = memOp.getBody();
91 for (auto &bl : r.getBlocks()) {
92 for (auto op : bl.getOps<DMAStartOp>()) {
93 channelsPerTile[{memOp.getTile(), op.getChannelDir(),
94 op.getChannelIndex()}] = 1;
95 }
96 }
97 }
98 for (auto memOp : device.getOps<ShimDMAOp>()) {
99 Region &r = memOp.getBody();
100 for (auto &bl : r.getBlocks()) {
101 for (auto op : bl.getOps<DMAStartOp>()) {
102 channelsPerTile[{memOp.getTile(), op.getChannelDir(),
103 op.getChannelIndex()}] = 1;
104 }
105 }
106 }
107 }
108
109 /// Given a tile and DMAChannelDir, returns next usable channel index for
110 /// that tile.
111 int getDMAChannelIndex(TileOp tileOp, DMAChannelDir dir,
112 bool requiresAdjacentTileAccessChannels) {
113 int maxChannelNum = 0;
114 if (dir == DMAChannelDir::MM2S)
115 maxChannelNum = tileOp.getNumSourceConnections(WireBundle::DMA);
116 else
117 maxChannelNum = tileOp.getNumDestConnections(WireBundle::DMA);
118
119 const auto &targetModel = getTargetModel(tileOp);
120 int maxChannelNumForAdjacentTile =
121 targetModel.getMaxChannelNumForAdjacentMemTile(tileOp.getCol(),
122 tileOp.getRow());
123
124 // if requires adjacent tile access channels, only allocate on channel 0-3,
125 // and if cannot, return 0
126 if (requiresAdjacentTileAccessChannels) {
127 maxChannelNum = std::min(maxChannelNum, maxChannelNumForAdjacentTile);
128 }
129
130 for (int i = 0; i < maxChannelNum; i++) {
131 if (int usageCnt = channelsPerTile[{tileOp.getResult(), dir, i}];
132 usageCnt == 0) {
133 channelsPerTile[{tileOp.getResult(), dir, i}] = 1;
134 return i;
135 }
136 }
137 return -1;
138 }
139};
140
141//===----------------------------------------------------------------------===//
142// Create objectFifos Pass
143//===----------------------------------------------------------------------===//
145 : AIEObjectFifoStatefulTransformBase<AIEObjectFifoStatefulTransformPass> {
146 DenseMap<ObjectFifoCreateOp, std::vector<BufferOp>>
147 buffersPerFifo; // maps each objFifo to its corresponding buffer
148 DenseMap<ObjectFifoCreateOp, std::vector<ExternalBufferOp>>
149 externalBuffersPerFifo; // maps each objFifo to its corresponding
150 // external buffers
151 DenseMap<ObjectFifoCreateOp, std::vector<LockOp>>
152 locksPerFifo; // maps each objFifo to its corresponding locks
153 std::vector<std::pair<ObjectFifoCreateOp, std::vector<ObjectFifoCreateOp>>>
154 splitFifos; // maps each objFifo between non-adjacent tiles to its
155 // corresponding consumer objectFifos
156 DenseMap<ObjectFifoLinkOp, ObjectFifoCreateOp>
157 objFifoLinks; // maps each ObjectFifoLinkOp to objFifo whose elements
158 // have been created and should be used
159 std::vector<ObjectFifoCreateOp>
160 splitBecauseLink; // objfifos which have been split because they are
161 // part of a Link, not because they didn't have a shared memory module
162
163 /// Function that returns true if two tiles in the AIE array share a memory
164 /// module. share_direction is equal to:
165 /// * -1 if the shared memory module is that of the first input tile,
166 /// * 1 if it is that of the second input tile,
167 /// * 0 is no memory module is shared.
168 bool isSharedMemory(TileOp a, TileOp b, int *share_direction) {
169 const auto &targetModel = getTargetModel(a.getOperation());
170
171 if ((a.isShimTile() && !b.isShimTile()) ||
172 (!a.isShimTile() && b.isShimTile())) {
173 *share_direction = 0;
174 return false;
175 }
176 if ((targetModel.isMemTile(a.getCol(), a.getRow()) &&
177 !targetModel.isMemTile(b.getCol(), b.getRow())) ||
178 (!targetModel.isMemTile(a.getCol(), a.getRow()) &&
179 targetModel.isMemTile(b.getCol(), b.getRow()))) {
180 *share_direction = 0;
181 return false;
182 }
183 bool rightShared = targetModel.isLegalMemAffinity(
184 a.colIndex(), a.rowIndex(), b.colIndex(), b.rowIndex());
185
186 bool leftShared = targetModel.isLegalMemAffinity(
187 b.colIndex(), b.rowIndex(), a.colIndex(), a.rowIndex());
188
189 if (leftShared)
190 *share_direction = -1;
191 else if (rightShared)
192 *share_direction = 1;
193 else
194 *share_direction = 0;
195
196 return leftShared || rightShared;
197 }
198
199 // Return true if the objectFifo created by createOp requires a DMA to be set
200 // up. This is the case if the tiles are not adjacent (no shared memory), if
201 // the objectFifo broadcasts to multiple tiles, if one of the consumers or
202 // the producer wants to use the multi-dimensional address generation
203 // features of the DMA, if the objectFifo is part of a LinkOp, or if the
204 // via_DMA or repeatCount attributes of the objectFifo are set.
205 bool requiresDMAs(ObjectFifoCreateOp createOp, int &share_direction) {
206 bool hasSharedMemory = false;
207 bool atLeastOneConsumerWantsTransform = false;
208 bool isUsedInLinkOp = false;
209
210 if (createOp.getVia_DMA())
211 return true;
212
213 if (createOp.getRepeatCount().has_value())
214 return true;
215
216 if (createOp.getConsumerTiles().size() == 1 &&
217 createOp.getDimensionsToStream().empty()) {
218
219 // Test for shared memory
220 for (auto consumerTile : createOp.getConsumerTiles()) {
221 if (auto consumerTileOp =
222 dyn_cast<TileOp>(consumerTile.getDefiningOp())) {
223 if (std::count(splitBecauseLink.begin(), splitBecauseLink.end(),
224 createOp))
225 hasSharedMemory =
226 isSharedMemory(createOp.getProducerTileOp(),
227 createOp.getProducerTileOp(), &share_direction);
228 else
229 hasSharedMemory = isSharedMemory(createOp.getProducerTileOp(),
230 consumerTileOp, &share_direction);
231 }
232 }
233 }
234
235 // Only test for use of data layout transformations if we are in the shared
236 // memory case; otherwise, we will return `true` in any case.
237 if (hasSharedMemory) {
238 // Even if just one of the consumers in the list of consumers wants to
239 // perform a memory transform, we need to use DMAs.
240 for (BDDimLayoutArrayAttr dims :
241 createOp.getDimensionsFromStreamPerConsumer())
242 if (!dims.empty()) {
243 atLeastOneConsumerWantsTransform = true;
244 break;
245 }
246 }
247
248 // Check if the objectfifo operation can use shared memory for linking. If
249 // the link operation is a distribute or a join operation, or if the link
250 // has different memref types, DMAs are required even if shared memory is
251 // available and the objectfifo should be split. Otherwise also check if the
252 // via_shared_memory attribute of the objectfifo operation is set and try to
253 // apply it.
254 if (hasSharedMemory) {
255 if (auto linkOp = getOptionalLinkOp(createOp)) {
256 isUsedInLinkOp = true;
257 int share_dir = 0;
258 if (!linkOp->isDistribute() && !linkOp->isJoin()) {
259 auto fifoInType = llvm::cast<AIEObjectFifoType>(
260 linkOp->getInputObjectFifos()[0].getElemType());
261 auto producerType =
262 llvm::cast<MemRefType>(fifoInType.getElementType());
263 auto fifoOutType = llvm::cast<AIEObjectFifoType>(
264 linkOp->getOutputObjectFifos()[0].getElemType());
265 auto consumerType =
266 llvm::cast<MemRefType>(fifoOutType.getElementType());
267 if (consumerType != producerType) {
268 // TODO: Support for different memref types through shared
269 // memory without DMAs
270 splitBecauseLink.push_back(createOp);
271 }
272 if (createOp.getViaSharedMem().has_value()) {
273 checkAndApplyViaSharedMemAttribute(createOp, share_dir);
274 if (share_direction == share_dir)
275 isUsedInLinkOp = false;
276 else
277 splitBecauseLink.push_back(createOp);
278 }
279 } else {
280 splitBecauseLink.push_back(createOp);
281 }
282 }
283 }
284
285 return !hasSharedMemory || atLeastOneConsumerWantsTransform ||
286 isUsedInLinkOp;
287 }
288
289 // Checks if via_shared_mem attribute of the objectfifo is set and if so
290 // tries to apply it. If the desired shared memory module is available to
291 // both producer and consumer then it will be used, otherwise an error is
292 // emitted.
293 void checkAndApplyViaSharedMemAttribute(ObjectFifoCreateOp createOp,
294 int &share_direction) {
295 if (createOp.getViaSharedMem().has_value()) {
296 int desiredSharedTile = createOp.getViaSharedMem().value();
297 int desiredSharedModule = 1;
298 if (desiredSharedTile == 0)
299 desiredSharedModule = -1;
300 if (share_direction != desiredSharedModule) {
301 bool desiredSharedModuleIsShared = false;
302 int newShareDirection = 0;
303 for (auto consumerTile : createOp.getConsumerTiles()) {
304 if (auto consumerTileOp =
305 dyn_cast<TileOp>(consumerTile.getDefiningOp()))
306 if (share_direction == -1)
307 /// * -1 if the shared memory module is that of the first input
308 /// tile,
309 /// * 1 if it is that of the second input tile
310 desiredSharedModuleIsShared =
311 isSharedMemory(consumerTileOp, createOp.getProducerTileOp(),
312 &newShareDirection);
313 }
314 if (desiredSharedModuleIsShared) {
315 if (share_direction == newShareDirection)
316 share_direction = (share_direction == -1) ? 1 : -1;
317 else
318 createOp->emitOpError(
319 "no access to shared memory module specified by "
320 "`via_shared_mem`");
321 }
322 }
323 }
324 }
325
326 /// Function to retrieve ObjectFifoLinkOp of ObjectFifoCreateOp,
327 /// if it belongs to one.
328 std::optional<ObjectFifoLinkOp> getOptionalLinkOp(ObjectFifoCreateOp op) {
329 auto device = op->getParentOfType<DeviceOp>();
330 for (ObjectFifoLinkOp linkOp : device.getOps<ObjectFifoLinkOp>()) {
331 for (ObjectFifoCreateOp in : linkOp.getInputObjectFifos())
332 if (in == op)
333 return {linkOp};
334 for (ObjectFifoCreateOp out : linkOp.getOutputObjectFifos())
335 if (out == op)
336 return {linkOp};
337 }
338 return {};
339 }
340
341 ObjectFifoCreateOp
342 createObjectFifo(OpBuilder &builder, AIEObjectFifoType datatype,
343 std::string name, Value prodTile, Value consTile,
344 Attribute depth, BDDimLayoutArrayAttr dimensionsToStream,
345 BDDimLayoutArrayArrayAttr dimensionsFromStreamPerConsumer) {
346 auto ofName = builder.getStringAttr(name);
347 auto fifo = builder.create<ObjectFifoCreateOp>(
348 builder.getUnknownLoc(), ofName, prodTile, consTile, depth, datatype,
349 dimensionsToStream, dimensionsFromStreamPerConsumer);
350 return fifo;
351 }
352
353 /// Function used to create objectFifo locks based on target architecture.
354 /// Called by createObjectFifoElements().
355 std::vector<LockOp> createObjectFifoLocks(OpBuilder &builder,
356 LockAnalysis &lockAnalysis,
357 ObjectFifoCreateOp op, int numElem,
358 int joinDistribFactor,
359 TileOp creation_tile,
360 int repeatCount) {
361 std::vector<LockOp> locks;
362 if (op.getDisableSynchronization())
363 return locks;
364 auto dev = op->getParentOfType<DeviceOp>();
365 auto &target = dev.getTargetModel();
366 // if shimTile external buffers are collected from input code
367 // create as many locks as there are external buffers
368 if (creation_tile.isShimTile()) {
369 numElem = 0;
370 if (!externalBuffersPerFifo[op].empty())
371 numElem = externalBuffersPerFifo[op].size();
372 }
373 if (target.getTargetArch() == AIEArch::AIE1) {
374 for (int i = 0; i < numElem; i++) {
375 // create corresponding aie1 locks
376 int initValue = op.getInitValues().has_value() ? 1 : 0;
377 int lockID = lockAnalysis.getLockID(creation_tile);
378 assert(lockID >= 0 && "No more locks to allocate!");
379 auto lock = builder.create<LockOp>(builder.getUnknownLoc(),
380 creation_tile, lockID, initValue);
381 lock.getOperation()->setAttr(SymbolTable::getSymbolAttrName(),
382 builder.getStringAttr(op.name().str() +
383 "_lock_" +
384 std::to_string(i)));
385 locks.push_back(lock);
386 }
387 } else {
388 // create corresponding aie2 locks
389 for (int i = 0; i < joinDistribFactor; i++) {
390 auto initValues = op.getInitValues().has_value()
391 ? op.getInitValues().value().size()
392 : 0;
393 int prodLockID = lockAnalysis.getLockID(creation_tile);
394 assert(prodLockID >= 0 && "No more locks to allocate!");
395 int prodLockValue = (numElem - initValues) * repeatCount;
396 auto prodLock = builder.create<LockOp>(
397 builder.getUnknownLoc(), creation_tile, prodLockID, prodLockValue);
398 prodLock.getOperation()->setAttr(
399 SymbolTable::getSymbolAttrName(),
400 builder.getStringAttr(op.name().str() + "_prod_lock_" +
401 std::to_string(i)));
402 locks.push_back(prodLock);
403
404 int consLockID = lockAnalysis.getLockID(creation_tile);
405 assert(consLockID >= 0 && "No more locks to allocate!");
406 int consLockValue = initValues * repeatCount;
407 auto consLock = builder.create<LockOp>(
408 builder.getUnknownLoc(), creation_tile, consLockID, consLockValue);
409 consLock.getOperation()->setAttr(
410 SymbolTable::getSymbolAttrName(),
411 builder.getStringAttr(op.name().str() + "_cons_lock_" +
412 std::to_string(i)));
413 locks.push_back(consLock);
414 }
415 }
416 return locks;
417 }
418
419 /// Function to calculate total memory usage on a specific tile
420 /// based on all buffers allocated to that tile from buffersPerFifo map
422 TileOp targetTile,
423 DenseMap<ObjectFifoCreateOp, std::vector<BufferOp>> &buffersPerFifo,
424 std::vector<BufferOp> &buffers) {
425 int totalUsedMemory = 0;
426
427 // Iterate through all ObjectFifos and their buffers
428 for (auto &[fifoOp, bufferList] : buffersPerFifo) {
429 for (auto &buffer : bufferList) {
430 // Check if this buffer is allocated on the target tile
431 if (buffer.getTile() == targetTile.getResult()) {
432 auto bufferSizeBytes = buffer.getAllocationSize();
433 totalUsedMemory += bufferSizeBytes;
434 }
435 }
436 }
437
438 // Also count buffers that are not in buffersPerFifo
439 for (auto &buffer : buffers) {
440 // Check if this buffer is allocated on the target tile
441 if (buffer.getTile() == targetTile.getResult()) {
442 auto bufferSizeBytes = buffer.getAllocationSize();
443 totalUsedMemory += bufferSizeBytes;
444 }
445 }
446
447 return totalUsedMemory;
448 }
449
450 /// Function to analyze cross-tile buffer allocations in splitFifos
451 /// Returns a simple map of (ObjectFifoCreateOp, bool) indicating cross-tile
452 /// issues
453 std::map<ObjectFifoCreateOp, bool> analyzeCrossTileFIFOBuffers() {
454 std::map<ObjectFifoCreateOp, bool> crossTileMap;
455
456 for (size_t i = 0; i < splitFifos.size(); i++) {
457 auto &[producerFifo, consumerFifos] = splitFifos[i];
458
459 // Analyze producer buffers
460 bool producerHasCrossTile = false;
461
462 ObjectFifoCreateOp target = producerFifo;
463 auto linkOp = getOptionalLinkOp(producerFifo);
464
465 if (linkOp && objFifoLinks.find(*linkOp) != objFifoLinks.end()) {
466 target = objFifoLinks[*linkOp]; // Use the linked target FIFO
467 }
468
469 if (buffersPerFifo.find(target) != buffersPerFifo.end()) {
470 // For each FIFO (producer and consumer):
471 auto &producerBuffers = buffersPerFifo[target];
472 TileOp expectedTile = target.getProducerTileOp();
473 for (auto &buffer : producerBuffers) {
474 TileOp bufferTile = buffer.getTile().getDefiningOp<TileOp>();
475 if (bufferTile != expectedTile) {
476 producerHasCrossTile = true;
477 break;
478 }
479 }
480 }
481 crossTileMap[producerFifo] = producerHasCrossTile;
482
483 // Analyze consumer buffers
484 for (auto &consumerFifo : consumerFifos) {
485 bool consumerHasCrossTile = false;
486 ObjectFifoCreateOp target = consumerFifo;
487 auto linkOp = getOptionalLinkOp(consumerFifo);
488 if (linkOp && objFifoLinks.find(*linkOp) != objFifoLinks.end()) {
489 target = objFifoLinks[*linkOp]; // Use the linked target FIFO
490 }
491
492 if (buffersPerFifo.find(target) != buffersPerFifo.end()) {
493 // For each FIFO (producer and consumer):
494 auto &consumerBuffers = buffersPerFifo[target];
495 TileOp expectedTile = target.getProducerTileOp();
496 for (auto &buffer : consumerBuffers) {
497 TileOp bufferTile = buffer.getTile().getDefiningOp<TileOp>();
498 if (bufferTile != expectedTile) {
499 consumerHasCrossTile = true;
500 break;
501 }
502 }
503 }
504 crossTileMap[consumerFifo] = consumerHasCrossTile;
505 }
506 }
507 return crossTileMap;
508 }
509
510 /// Helper function to find a tile at specific coordinates.
511 /// If a tile is not found, it creates a new one and returns it.
512 /// hostTile is the original tile from which we are searching for neighbors.
513 /// we create the new tile below the hostTile
514 TileOp findOrCreateTile(OpBuilder &builder, DeviceOp &dev, TileOp hostTile,
515 int col, int row) {
516 // First, try to find an existing tile
517 for (auto tile : dev.getOps<TileOp>()) {
518 if (tile.getCol() == col && tile.getRow() == row) {
519 return tile;
520 }
521 }
522
523 // If not found, create a new one.
524 OpBuilder::InsertionGuard g(builder);
525
526 auto savedInsertionPoint = builder.saveInsertionPoint();
527
528 // Find the last buffer operation after the host tile
529 Operation *insertAfter = hostTile.getOperation();
530 Operation *nextOp = insertAfter->getNextNode();
531 while (nextOp && isa<BufferOp>(nextOp)) {
532 insertAfter = nextOp;
533 nextOp = nextOp->getNextNode();
534 }
535
536 builder.setInsertionPointAfter(insertAfter);
537 auto newTile = builder.create<TileOp>(builder.getUnknownLoc(), col, row);
538
539 builder.restoreInsertionPoint(savedInsertionPoint);
540
541 return newTile;
542 }
543
544 /// Function used to create objectFifo elements and their locks.
545 /// It maps the input objectFifo to associated buffers and locks.
546 void createObjectFifoElements(OpBuilder &builder, LockAnalysis &lockAnalysis,
547 ObjectFifoCreateOp op, int share_direction) {
548 if (!op.size())
549 return;
550
551 std::vector<BufferOp> buffers;
552 auto fifo = llvm::cast<AIEObjectFifoType>(op.getElemType());
553 auto elemType = llvm::cast<MemRefType>(fifo.getElementType());
554 int numElem = op.size();
555 int of_elem_index = 0; // used to give objectFifo elements a symbolic name
556
557 // if this objectFifo is linked to another, check if the other's elements
558 // have already been created: if none of the output objectfifos of the link
559 // have initValues, then the elements that are created are those of the
560 // objFifo with elements of bigger size
561 bool linked = false;
562 auto linkOp = getOptionalLinkOp(op);
563 if (linkOp) {
564 auto fifoIn = linkOp->getInputObjectFifos()[0];
565 auto fifoOut = linkOp->getOutputObjectFifos()[0];
566 linked = true;
567 if (objFifoLinks.find(*linkOp) != objFifoLinks.end())
568 return; // elements have already been created
569 if (linkOp->isJoin()) {
570 // if join, fifoOut has bigger size
571 if (op.name() != fifoOut.name())
572 return;
573 } else if (linkOp->isDistribute()) {
574 // if distribute, fifoIn has bigger size
575 if (op.name() != fifoIn.name())
576 return;
577 } else {
578 // check if output objectfifo has initValues
579 if (fifoOut.getInitValues().has_value()) {
580 if (fifoOut.name() != op.name())
581 return;
582 } else {
583 // check which objectfifo of the link has bigger size
584 auto fifoInType = llvm::cast<AIEObjectFifoType>(fifoIn.getElemType());
585 auto elemInType = llvm::cast<MemRefType>(fifoInType.getElementType());
586 int inSize = elemInType.getNumElements();
587
588 auto fifoOutType =
589 llvm::cast<AIEObjectFifoType>(fifoOut.getElemType());
590 auto elemOutType =
591 llvm::cast<MemRefType>(fifoOutType.getElementType());
592
593 if (int outSize = elemOutType.getNumElements(); inSize >= outSize) {
594 if (op.name() != fifoIn.name())
595 return;
596 } else {
597 if (fifoOut.name() != op.name())
598 return;
599 }
600 }
601 }
602 }
603
604 TileOp creation_tile;
605 if (share_direction == 0 || share_direction == -1)
606 creation_tile = op.getProducerTileOp();
607 else {
608 auto consumerTileOp =
609 dyn_cast<TileOp>(op.getConsumerTiles()[0].getDefiningOp());
610 creation_tile = consumerTileOp;
611 }
612
613 // Reset opbuilder location to after the last tile declaration
614 Operation *t = nullptr;
615 auto dev = op->getParentOfType<DeviceOp>();
616 for (auto tile_op : dev.getBody()->getOps<TileOp>()) {
617 t = tile_op.getOperation();
618 }
619
620 builder.setInsertionPointAfter(t);
621 for (int i = 0; i < numElem; i++) {
622
623 mlir::ElementsAttr initValues = nullptr;
624 if (!creation_tile.isShimTile()) {
625 if (op.getInitValues().has_value()) {
626 initValues =
627 llvm::cast<mlir::ElementsAttr>(op.getInitValues().value()[i]);
628 }
629
630 auto elementType = elemType.getElementType();
631
632 DataLayout dataLayout = DataLayout::closest(op.getOperation());
633 int64_t elementBitWidth = dataLayout.getTypeSizeInBits(elementType);
634
635 auto totalSizeBytes = elemType.getNumElements() * elementBitWidth / 8;
636 auto &targetModel = dev.getTargetModel();
637
638 int maxDataMemorySize = 0;
639 if (creation_tile.isMemTile())
640 maxDataMemorySize =
641 targetModel.getMemTileSize(); // getMemTileSize returns in Bytes
642 else
643 maxDataMemorySize =
644 targetModel
645 .getLocalMemorySize(); // getLocalMemorySize returns in Bytes
646
647 // also need to count the buffers that are not in buffersPerFifo
648 int currentUsedMemory =
649 calculateCurrentUsedMemory(creation_tile, buffersPerFifo, buffers);
650
651 // Check if current tile can hold the new buffer or not
652 TileOp current_buf_allocation_tile =
653 creation_tile; // used to keep track of the tile where the buffer is
654 // allocated
655 if (static_cast<int>(currentUsedMemory + totalSizeBytes) >
656 maxDataMemorySize) {
657 // if not, check if the neighbour can hold the new buffer or not
658 // Find neighbor tiles with shared memory
659 std::vector<TileOp> neighborTiles;
660 int currentCol = creation_tile.getCol();
661 int currentRow = creation_tile.getRow();
662
663 // Check tile to the left
664 if (currentCol > 0) {
665 TileOp leftTile = findOrCreateTile(builder, dev, creation_tile,
666 currentCol - 1, currentRow);
667
668 int share_direction = 0;
669 if (isSharedMemory(creation_tile, leftTile, &share_direction)) {
670 neighborTiles.push_back(leftTile);
671 }
672 }
673
674 // Check tile to the right
675 if (currentCol < (targetModel.columns() - 1)) {
676 TileOp rightTile = findOrCreateTile(builder, dev, creation_tile,
677 currentCol + 1, currentRow);
678 int share_direction = 0;
679 if (isSharedMemory(creation_tile, rightTile, &share_direction)) {
680 neighborTiles.push_back(rightTile);
681 }
682 }
683
684 // try to allocate on neighbor tiles
685 if (!neighborTiles.empty()) {
686 for (auto &tile : neighborTiles) {
687 // Try to allocate on this neighbor tile
688 int neighborUsedMemory =
690 if (static_cast<int>(neighborUsedMemory + totalSizeBytes) <=
691 maxDataMemorySize) {
692 // Allocate buffer on neighbor tile, change creation_tile to be
693 // this neighbour tile
694 current_buf_allocation_tile = tile;
695 break;
696 }
697 }
698 }
699 }
700 auto buff = builder.create<BufferOp>(
701 builder.getUnknownLoc(), elemType, current_buf_allocation_tile,
702 builder.getStringAttr(op.name().str() + "_buff_" +
703 std::to_string(of_elem_index)),
704 /*address*/ nullptr, initValues,
705 /*mem_bank*/ nullptr);
706 buffers.push_back(buff);
707 }
708 of_elem_index++;
709 }
710
711 int repeatCount = 1;
712 int joinDistribFactor = 1;
713 if (op.getRepeatCount().has_value())
714 repeatCount = op.getRepeatCount().value();
715 if (linked) {
716 if (linkOp->getRepeatCount().has_value())
717 repeatCount = linkOp->getRepeatCount().value();
718 if (linkOp->isDistribute())
719 joinDistribFactor *= linkOp->getFifoOuts().size();
720 else if (linkOp->isJoin())
721 joinDistribFactor *= linkOp->getFifoIns().size();
722 objFifoLinks[*linkOp] = op;
723 }
724 std::vector<LockOp> locks =
725 createObjectFifoLocks(builder, lockAnalysis, op, numElem,
726 joinDistribFactor, creation_tile, repeatCount);
727 buffersPerFifo[op] = buffers;
728 locksPerFifo[op] = locks;
729 }
730
731 /// Function that returns a pointer to the block of a Region
732 /// that contains the AIEEndOp.
733 Block *findEndOpBlock(Region &r) {
734 Block *endBlock = nullptr;
735 for (auto &bl : r.getBlocks())
736 if (!bl.getOps<EndOp>().empty())
737 endBlock = &bl;
738 return endBlock;
739 }
740
741 /// Function used to create a Bd block.
742 template <typename MyOp>
743 void createBd(OpBuilder &builder, LockOp acqLock, int acqMode,
744 LockAction acqLockAction, LockOp relLock, int relMode,
745 MyOp buff, int offset, int len, Block *succ,
746 BDDimLayoutArrayAttr dims, BDPadLayoutArrayAttr padDimensions) {
747 if (acqLock)
748 builder.create<UseLockOp>(builder.getUnknownLoc(), acqLock, acqLockAction,
749 acqMode);
750
751 if (!dims.getValue().empty() && padDimensions) {
752 builder.create<DMABDOp>(builder.getUnknownLoc(), buff, offset, len, dims,
753 padDimensions);
754 } else if (!dims.getValue().empty()) {
755 builder.create<DMABDOp>(builder.getUnknownLoc(), buff, offset, len, dims);
756 } else {
757 builder.create<DMABDOp>(builder.getUnknownLoc(), buff, offset, len);
758 }
759 if (acqLock)
760 builder.create<UseLockOp>(builder.getUnknownLoc(), relLock,
761 LockAction::Release, relMode);
762 builder.create<NextBDOp>(builder.getUnknownLoc(), succ);
763 }
764
765 /// Function used to create a Bd block.
766 /// If lockMode is 0 we create a consumerDMA (i.e. on producer tile) else a
767 /// producerDMA (i.e. on consumer tile).
768 template <typename MyOp>
769 void createBdBlock(OpBuilder &builder, ObjectFifoCreateOp op, int lockMode,
770 int acqNum, int relNum, MyOp buff, int offset, int len,
771 DMAChannelDir channelDir, size_t lockIndex, Block *succ,
772 BDDimLayoutArrayAttr dims,
773 BDPadLayoutArrayAttr padDimensions,
774 bool distribOrJoin = false) {
775 LockOp acqLock;
776 LockOp relLock;
777 int acqMode = 1;
778 int relMode = 1;
779 auto acqLockAction = LockAction::Acquire;
780 if (locksPerFifo[op].size() > 0) {
781 auto dev = op->getParentOfType<DeviceOp>();
782 if (auto &target = dev.getTargetModel();
783 target.getTargetArch() == AIEArch::AIE1) {
784 acqMode = lockMode == 0 ? 1 : 0;
785 relMode = lockMode == 0 ? 0 : 1;
786 acqLock = locksPerFifo[op][lockIndex];
787 relLock = locksPerFifo[op][lockIndex];
788 } else {
789 acqMode = acqNum;
790 relMode = relNum;
791 acqLockAction = LockAction::AcquireGreaterEqual;
792 int prodLockIndex = 0;
793 int consLockIndex = 1;
794 if (distribOrJoin) {
795 prodLockIndex = lockIndex * 2;
796 consLockIndex = lockIndex * 2 + 1;
797 }
798 acqLock = channelDir == DMAChannelDir::S2MM
799 ? locksPerFifo[op][prodLockIndex]
800 : locksPerFifo[op][consLockIndex];
801 relLock = channelDir == DMAChannelDir::S2MM
802 ? locksPerFifo[op][consLockIndex]
803 : locksPerFifo[op][prodLockIndex];
804 }
805 }
806 createBd(builder, acqLock, acqMode, acqLockAction, relLock, relMode, buff,
807 offset, len, succ, dims, padDimensions);
808 }
809
810 /// Function that either calls createAIETileDMA(), createShimDMA() or
811 /// createMemTileDMA() based on op tile row value.
812 void createDMA(DeviceOp &device, OpBuilder &builder, ObjectFifoCreateOp op,
813 DMAChannelDir channelDir, int channelIndex, int lockMode,
814 BDDimLayoutArrayAttr dims, BDPadLayoutArrayAttr pad_dims) {
815 if (op.getProducerTileOp().isShimTile()) {
816 createShimDMA(device, builder, op, channelDir, channelIndex, lockMode,
817 dims);
818 } else if (op.getProducerTileOp().isMemTile()) {
819 BDPadLayoutArrayAttr padDims = nullptr;
820 if (channelDir == DMAChannelDir::MM2S && pad_dims)
821 padDims = pad_dims;
822 createMemTileDMA(device, builder, op, channelDir, channelIndex, lockMode,
823 dims, padDims);
824 } else {
825 createAIETileDMA(device, builder, op, channelDir, channelIndex, lockMode,
826 dims);
827 }
828 }
829
830 /// Function used to create a MemOp region with a DMA channel.
831 /// It uses creatBdBlock(), see there for lockMode input.
832 void createAIETileDMA(DeviceOp &device, OpBuilder &builder,
833 ObjectFifoCreateOp op, DMAChannelDir channelDir,
834 int channelIndex, int lockMode,
835 BDDimLayoutArrayAttr dims) {
836 size_t numBlocks = op.size();
837 if (numBlocks == 0)
838 return;
839
840 int acqNum = 1;
841 int relNum = 1;
842
843 auto fifo = llvm::cast<AIEObjectFifoType>(op.getElemType());
844 auto elemType = llvm::cast<MemRefType>(fifo.getElementType());
845 int len = elemType.getNumElements();
846
847 // check for repeat count
848 int repeatCount = 1;
849 if (op.getRepeatCount().has_value())
850 repeatCount = op.getRepeatCount().value();
851
852 // search for the buffers/locks (based on if this objFifo has a link)
853 ObjectFifoCreateOp target = op;
854 if (std::optional<ObjectFifoLinkOp> linkOp = getOptionalLinkOp(op);
855 linkOp.has_value()) {
856 if (objFifoLinks.find(linkOp.value()) != objFifoLinks.end()) {
857 target = objFifoLinks[linkOp.value()];
858 if (target == op) {
859 if (linkOp->getRepeatCount().has_value()) {
860 acqNum *= linkOp->getRepeatCount().value();
861 relNum *= linkOp->getRepeatCount().value();
862 }
863 }
864 }
865 }
866
867 // search for MemOp
868 Operation *producerMem = nullptr;
869 for (auto memOp : device.getOps<MemOp>()) {
870 if (memOp.getTile() == op.getProducerTile()) {
871 producerMem = memOp.getOperation();
872 break;
873 }
874 }
875
876 // if none exists, create one
877 TileOp objFifoTileOp = target.getProducerTileOp();
878 if (producerMem == nullptr) {
879 OpBuilder::InsertionGuard g(builder);
880 builder.setInsertionPoint(device.getBody()->getTerminator());
881 auto newMemOp =
882 builder.create<MemOp>(builder.getUnknownLoc(), objFifoTileOp);
883 {
884 OpBuilder::InsertionGuard g(builder);
885 builder.setInsertionPointToStart(&newMemOp.getRegion().emplaceBlock());
886 builder.create<EndOp>(builder.getUnknownLoc());
887 }
888 producerMem = newMemOp.getOperation();
889 }
890 Block *endBlock = findEndOpBlock(producerMem->getRegion(0));
891 Block *lastDmaBlock = endBlock->getSinglePredecessor();
892 Block *dmaBlock = builder.createBlock(endBlock);
893 Block *bdBlock = builder.createBlock(endBlock);
894
895 // create DMA channel
896 builder.setInsertionPointToStart(dmaBlock);
897 builder.create<DMAStartOp>(builder.getUnknownLoc(), channelDir,
898 channelIndex, /*repeatCout*/ 0, bdBlock,
899 endBlock);
900 if (lastDmaBlock != nullptr)
901 lastDmaBlock->getTerminator()->setSuccessor(dmaBlock, 1);
902
903 // create Bd blocks
904 Block *succ;
905 Block *curr = bdBlock;
906 size_t elemIndex = 0;
907 size_t totalBlocks = 0;
908 for (size_t i = 0; i < numBlocks; i++) {
909 if (elemIndex >= buffersPerFifo[target].size())
910 break;
911 for (int r = 0; r < repeatCount; r++) {
912 if (totalBlocks == numBlocks * repeatCount - 1)
913 succ = bdBlock;
914 else
915 succ = builder.createBlock(endBlock);
916
917 builder.setInsertionPointToStart(curr);
918 createBdBlock<BufferOp>(builder, target, lockMode, acqNum, relNum,
919 buffersPerFifo[target][elemIndex], /*offset*/ 0,
920 len, channelDir, elemIndex, succ, dims,
921 nullptr);
922 curr = succ;
923 totalBlocks++;
924 }
925 elemIndex++;
926 }
927 }
928
929 /// Function used to create a ShimDMAOp region with a DMA channel.
930 /// It uses creatBdBlock(), see there for lockMode input.
931 void createShimDMA(DeviceOp &device, OpBuilder &builder,
932 ObjectFifoCreateOp op, DMAChannelDir channelDir,
933 int channelIndex, int lockMode,
934 BDDimLayoutArrayAttr dims) {
935 size_t numBlocks = externalBuffersPerFifo[op].size();
936 if (numBlocks == 0)
937 return;
938
939 int acqNum = 1;
940 int relNum = 1;
941
942 // search for ShimDMAOp
943 Operation *producerDMA = nullptr;
944 for (auto dmaOp : device.getOps<ShimDMAOp>()) {
945 if (dmaOp.getTile() == op.getProducerTile()) {
946 producerDMA = dmaOp.getOperation();
947 break;
948 }
949 }
950
951 // if none exists, create one
952 TileOp objFifoTileOp = op.getProducerTileOp();
953 if (producerDMA == nullptr) {
954 OpBuilder::InsertionGuard g(builder);
955 builder.setInsertionPoint(device.getBody()->getTerminator());
956 auto newDMAOp = builder.create<ShimDMAOp>(
957 builder.getUnknownLoc(), builder.getIndexType(), objFifoTileOp);
958 {
959 OpBuilder::InsertionGuard g(builder);
960 builder.setInsertionPointToStart(&newDMAOp.getRegion().emplaceBlock());
961 builder.create<EndOp>(builder.getUnknownLoc());
962 }
963 producerDMA = newDMAOp.getOperation();
964 }
965
966 Block *endBlock = findEndOpBlock(producerDMA->getRegion(0));
967 Block *lastDmaBlock = endBlock->getSinglePredecessor();
968 Block *dmaBlock = builder.createBlock(endBlock);
969 Block *bdBlock = builder.createBlock(endBlock);
970
971 // create DMA channel
972 builder.setInsertionPointToStart(dmaBlock);
973 builder.create<DMAStartOp>(builder.getUnknownLoc(), channelDir,
974 channelIndex, /*repeatCout*/ 0, bdBlock,
975 endBlock);
976 if (lastDmaBlock != nullptr)
977 lastDmaBlock->getTerminator()->setSuccessor(dmaBlock, 1);
978
979 // create Bd blocks
980 Block *succ;
981 Block *curr = bdBlock;
982 size_t elemIndex = 0;
983 for (size_t i = 0; i < numBlocks; i++) {
984 if (elemIndex >= externalBuffersPerFifo[op].size())
985 break;
986 if (i == numBlocks - 1)
987 succ = bdBlock;
988 else
989 succ = builder.createBlock(endBlock);
990
991 MemRefType buffer = externalBuffersPerFifo[op][elemIndex].getType();
992 int len = buffer.getNumElements();
993 builder.setInsertionPointToStart(curr);
994 createBdBlock<ExternalBufferOp>(builder, op, lockMode, acqNum, relNum,
995 externalBuffersPerFifo[op][elemIndex],
996 /*offset*/ 0, len, channelDir, elemIndex,
997 succ, dims, nullptr);
998 curr = succ;
999 elemIndex++;
1000 }
1001 }
1002
1003 /// Function used to create a MemTileDMAOp region with a DMA channel.
1004 /// It uses creatBdBlock(), see there for lockMode input.
1005 void createMemTileDMA(DeviceOp &device, OpBuilder &builder,
1006 ObjectFifoCreateOp op, DMAChannelDir channelDir,
1007 int channelIndex, int lockMode,
1008 BDDimLayoutArrayAttr dims,
1009 BDPadLayoutArrayAttr padDimensions) {
1010 size_t numBlocks = op.size();
1011 if (numBlocks == 0)
1012 return;
1013
1014 auto fifo = llvm::cast<AIEObjectFifoType>(op.getElemType());
1015 auto elemType = llvm::cast<MemRefType>(fifo.getElementType());
1016 int lenOut = elemType.getNumElements();
1017 int acqNum = 1;
1018 int relNum = 1;
1019
1020 // check for repeat count
1021 int repeatCount = 1;
1022 if (op.getRepeatCount().has_value())
1023 repeatCount = op.getRepeatCount().value();
1024
1025 // search for the buffers/locks (based on if this objFifo has a link)
1026 // identify size difference between input and output memrefs
1027 ObjectFifoCreateOp target = op;
1028 bool isDistribute = false;
1029 bool isJoin = false;
1030 int extraOffset = 0;
1031 int joinDistribFactor = 1;
1032 int joinDistribLockIndex = 0;
1033 auto linkOp = getOptionalLinkOp(op);
1034 if (linkOp) {
1035 if (objFifoLinks.find(*linkOp) != objFifoLinks.end()) {
1036 target = objFifoLinks[*linkOp];
1037 auto srcOffsets = linkOp->getSrcOffsets();
1038 auto dstOffsets = linkOp->getDstOffsets();
1039
1040 if (linkOp->getRepeatCount().has_value())
1041 if (linkOp->getInputObjectFifos()[0] == op) {
1042 acqNum *= linkOp->getRepeatCount().value();
1043 relNum *= linkOp->getRepeatCount().value();
1044 }
1045
1046 if (linkOp->isJoin()) {
1047 // compute offset and length
1048 isJoin = true;
1049 if (target == op) {
1050 joinDistribFactor *= linkOp->getFifoIns().size();
1051 } else {
1052 int i = 0;
1053 for (auto fifoIn : linkOp->getInputObjectFifos()) {
1054 if (fifoIn.name() == op.name())
1055 break;
1056 i++;
1057 }
1058 extraOffset = *getConstantIntValue(srcOffsets[i]);
1059 lenOut = linkOp->getJoinTransferLengths()[i];
1060 joinDistribLockIndex = i;
1061 }
1062 } else if (linkOp->isDistribute()) {
1063 // compute offset and length
1064 isDistribute = true;
1065 if (target == op) {
1066 joinDistribFactor *= linkOp->getFifoOuts().size();
1067 } else {
1068 int i = 0;
1069 for (auto fifoOut : linkOp->getOutputObjectFifos()) {
1070 if (fifoOut.name() == op.name())
1071 break;
1072 i++;
1073 }
1074 extraOffset = *getConstantIntValue(dstOffsets[i]);
1075 lenOut = linkOp->getDistributeTransferLengths()[i];
1076 joinDistribLockIndex = i;
1077 }
1078 } else {
1079 if (target != op) {
1080 auto targetFifo =
1081 llvm::cast<AIEObjectFifoType>(target.getElemType());
1082 auto targetElemType =
1083 llvm::cast<MemRefType>(targetFifo.getElementType());
1084 lenOut = targetElemType.getNumElements();
1085 }
1086 }
1087
1088 // check if current op is of smaller size in link
1089 if (target != op) {
1090 numBlocks = target.size();
1091 }
1092 }
1093 }
1094
1095 // search for MemTileDMAOp
1096 Operation *producerDMA = nullptr;
1097 for (auto dmaOp : device.getOps<MemTileDMAOp>()) {
1098 if (dmaOp.getTile() == target.getProducerTile()) {
1099 producerDMA = dmaOp.getOperation();
1100 break;
1101 }
1102 }
1103
1104 // if none exists, create one
1105 TileOp objFifoTileOp = target.getProducerTileOp();
1106 if (producerDMA == nullptr) {
1107 OpBuilder::InsertionGuard g(builder);
1108 builder.setInsertionPoint(device.getBody()->getTerminator());
1109 auto newDMAOp =
1110 builder.create<MemTileDMAOp>(builder.getUnknownLoc(), objFifoTileOp);
1111 {
1112 OpBuilder::InsertionGuard g(builder);
1113 builder.setInsertionPointToStart(&newDMAOp.getRegion().emplaceBlock());
1114 builder.create<EndOp>(builder.getUnknownLoc());
1115 }
1116 producerDMA = newDMAOp.getOperation();
1117 }
1118
1119 Block *endBlock = findEndOpBlock(producerDMA->getRegion(0));
1120 Block *lastDmaBlock = endBlock->getSinglePredecessor();
1121 Block *dmaBlock = builder.createBlock(endBlock);
1122 Block *bdBlock = builder.createBlock(endBlock);
1123
1124 // create DMA channel
1125 builder.setInsertionPointToStart(dmaBlock);
1126 builder.create<DMAStartOp>(builder.getUnknownLoc(), channelDir,
1127 channelIndex, /*repeatCout*/ 0, bdBlock,
1128 endBlock);
1129 if (lastDmaBlock != nullptr)
1130 lastDmaBlock->getTerminator()->setSuccessor(dmaBlock, 1);
1131
1132 // create Bd blocks
1133 Block *succ;
1134 Block *curr = bdBlock;
1135 size_t elemIndex = 0;
1136 size_t lockIndex = 0;
1137 size_t totalBlocks = 0;
1138 bool distribOrJoin = false;
1139 for (size_t i = 0; i < numBlocks; i++) {
1140 if (elemIndex >= buffersPerFifo[target].size())
1141 break;
1142 for (int r = 0; r < repeatCount * joinDistribFactor; r++) {
1143 if (totalBlocks == numBlocks * repeatCount * joinDistribFactor - 1) {
1144 succ = bdBlock;
1145 } else {
1146 succ = builder.createBlock(endBlock);
1147 }
1148
1149 builder.setInsertionPointToStart(curr);
1150 int offset = 0;
1151 if (isDistribute || isJoin) {
1152 distribOrJoin = true;
1153 if (target == op) {
1154 if (isDistribute) {
1155 offset = *getConstantIntValue(linkOp->getDstOffsets()[r]);
1156 lenOut = linkOp->getDistributeTransferLengths()[r];
1157 } else {
1158 offset = *getConstantIntValue(linkOp->getSrcOffsets()[r]);
1159 lenOut = linkOp->getJoinTransferLengths()[r];
1160 }
1161 lockIndex = r % joinDistribFactor;
1162 } else {
1163 offset = extraOffset;
1164 lockIndex = joinDistribLockIndex;
1165 }
1166 } else {
1167 lockIndex = elemIndex;
1168 }
1169
1170 createBdBlock<BufferOp>(builder, target, lockMode, acqNum, relNum,
1171 buffersPerFifo[target][elemIndex], offset,
1172 lenOut, channelDir, lockIndex, succ, dims,
1173 padDimensions, distribOrJoin);
1174 curr = succ;
1175 totalBlocks++;
1176 }
1177 elemIndex++;
1178 }
1179 }
1180
1181 // Function that computes the Least Common Multiplier of the values
1182 // of a vector.
1183 int computeLCM(std::set<int> values) {
1184 int lcm = 1;
1185 for (int i : values)
1186 lcm = i * lcm / std::gcd(i, lcm);
1187 return lcm;
1188 }
1189
1190 // Function that unrolls for-loops that contain objectFifo operations.
1191 LogicalResult unrollForLoops(DeviceOp &device, OpBuilder &builder,
1192 std::set<TileOp> objectFifoTiles) {
1193 for (auto coreOp : device.getOps<CoreOp>()) {
1194 if (objectFifoTiles.count(coreOp.getTileOp()) > 0) {
1195 std::vector<scf::ForOp> unrolledLoops;
1196 std::map<Operation *, bool> foundMap;
1197 std::map<Operation *, int64_t> remainderMap;
1198 std::map<Operation *, int64_t> tripCountMap;
1199 WalkResult res = coreOp.walk([&](scf::ForOp forLoop) {
1200 // look for operations on objectFifos
1201 // when multiple fifos in same loop, must use the smallest
1202 // common multiplier as the unroll factor
1203 foundMap[forLoop.getOperation()] = false;
1204 std::set<int> objFifoSizes;
1205 Block *body = forLoop.getBody();
1206 remainderMap[forLoop.getOperation()] = 0;
1207 for (auto acqOp : body->getOps<ObjectFifoAcquireOp>()) {
1208 if (acqOp.getOperation()->getParentOp() == forLoop) {
1209 foundMap[forLoop.getOperation()] = true;
1210 ObjectFifoCreateOp op = acqOp.getObjectFifo();
1211 objFifoSizes.insert(op.size());
1212 }
1213 }
1214 // If the loop doesn't have acquire and release locks
1215 // Push it to the unrolledLoops to avoid unrolling
1216 if (!foundMap[forLoop.getOperation()]) {
1217 unrolledLoops.push_back(forLoop);
1218 return WalkResult::advance();
1219 }
1220 // Walk in the loop region to unroll the loop and its remainder
1221 Region *region = forLoop->getParentRegion();
1222 scf::ForOp prevLoop;
1223 prevLoop = forLoop;
1224 tripCountMap[prevLoop.getOperation()] = 0;
1225 while (remainderMap[prevLoop.getOperation()] > 1 ||
1226 foundMap[prevLoop.getOperation()]) {
1227 region->walk([&](scf::ForOp remLoop) {
1228 bool skipLoop = false;
1229 int64_t tripCount = 0;
1230 if (remLoop.getSingleLowerBound() &&
1231 remLoop.getSingleUpperBound() && remLoop.getSingleStep()) {
1232 tripCount = constantTripCount(*(remLoop.getSingleLowerBound()),
1233 *(remLoop.getSingleUpperBound()),
1234 *(remLoop.getSingleStep()))
1235 .value_or(0);
1236 }
1237 int unrollFactor =
1238 computeLCM(objFifoSizes); // also counts original loop body
1239 // Loop ids are not unique.
1240 // Sometimes, immediately after unrolling, the unrolled loop
1241 // and the one next to it (can be the remainder loop or an
1242 // independent loop) will have the same ID. This makes it
1243 // difficult to identify which loop needs to be unrolled.
1244 // Once it restarts walking from start, it ends up allocating
1245 // new ID to each loop.
1246 if (remainderMap[prevLoop.getOperation()] > 1 &&
1247 foundMap[remLoop.getOperation()] == false &&
1248 prevLoop != remLoop) {
1249 skipLoop = true;
1250 }
1251 if (std::count(unrolledLoops.begin(), unrolledLoops.end(),
1252 remLoop) == 0 &&
1253 !skipLoop) {
1254 tripCountMap[remLoop.getOperation()] = tripCount;
1255 // if loop iterations < unrollFactor, unroll the loop fully
1256 if (tripCountMap[remLoop.getOperation()] < unrollFactor)
1257 unrollFactor = tripCountMap[remLoop.getOperation()];
1258 // If unrollFactor = 0,divide by zero
1259 if (unrollFactor == 0) {
1260 remLoop.emitOpError()
1261 << "could not be unrolled with unrollFactor = 0, check "
1262 "loop boundaries."
1263 << "\n";
1264 return WalkResult::interrupt();
1265 }
1266 remainderMap[remLoop.getOperation()] =
1267 tripCountMap[remLoop.getOperation()] % unrollFactor;
1268 auto step = remLoop.getStep()
1269 .getDefiningOp<arith::ConstantOp>()
1270 .getValue();
1271 int64_t step_value = llvm::dyn_cast<IntegerAttr>(step).getInt();
1272
1273 if (step_value < unrollFactor ||
1274 foundMap[remLoop.getOperation()]) {
1275 // Process the for loop
1276 if (failed(mlir::loopUnrollByFactor(remLoop, unrollFactor))) {
1277 remLoop.emitOpError()
1278 << "could not be unrolled with unrollFactor: "
1279 << unrollFactor << "\n";
1280 return WalkResult::interrupt();
1281 }
1282 unrolledLoops.push_back(remLoop);
1283 foundMap[remLoop.getOperation()] = false;
1284 } else {
1285 remainderMap[remLoop.getOperation()] = 0;
1286 foundMap[remLoop.getOperation()] = false;
1287 }
1288 } else {
1289 remainderMap[remLoop.getOperation()] = 0;
1290 foundMap[remLoop.getOperation()] = false;
1291 }
1292 prevLoop = remLoop;
1293 return WalkResult::advance();
1294 });
1295 }
1296 return WalkResult::advance();
1297 });
1298 if (res.wasInterrupted())
1299 return failure();
1300 }
1301 }
1302 return success();
1303 }
1304
1305 // Function that generates the IR to update runtime state of objectfifo
1306 // accesses. Called by dynamicGlobalObjectFifos().
1307 void updateGlobalNextIndex(OpBuilder &builder, ObjectFifoReleaseOp relOp,
1308 BufferOp globalNextIndex, arith::ConstantOp index,
1309 arith::ConstantOp size) {
1310 builder.setInsertionPointAfter(relOp);
1311 Value oldCounter = builder.create<memref::LoadOp>(
1312 builder.getUnknownLoc(), globalNextIndex,
1313 ValueRange(ArrayRef({index.getResult()})));
1314 Value val = builder.create<arith::ConstantOp>(
1315 oldCounter.getLoc(), builder.getI32IntegerAttr(relOp.getSize()));
1316 Value sum = builder.create<arith::AddIOp>(val.getLoc(), oldCounter, val);
1317 Value isGreaterEqual = builder.create<arith::CmpIOp>(
1318 sum.getLoc(), arith::CmpIPredicate::sge, sum, size);
1319 Value newCounter = builder.create<arith::SelectOp>(
1320 sum.getLoc(), isGreaterEqual,
1321 builder.create<arith::SubIOp>(sum.getLoc(), sum, size), sum);
1322 builder.create<memref::StoreOp>(size.getLoc(), newCounter, globalNextIndex,
1323 ValueRange(ArrayRef({index.getResult()})));
1324 }
1325
1326 // Function that generates the IR for objectfifo accesses to be handled at
1327 // runtime.
1328 LogicalResult dynamicGlobalObjectFifos(DeviceOp &device, OpBuilder &builder,
1329 std::set<TileOp> objectFifoTiles) {
1330 for (auto coreOp : device.getOps<CoreOp>()) {
1331 if (objectFifoTiles.count(coreOp.getTileOp()) <= 0)
1332 continue;
1333 if (objectFifoTiles.count(coreOp.getTileOp()) > 0) {
1334 // For each core: count the number of objectFifos and create
1335 // a global buffer just before the core to track index of
1336 // next object to access.
1337 // !! NOTE !! objectFifos with same producer / consumer tile
1338 // need two counters (accessed based on the ObjectFifoPort)
1339 std::map<std::pair<ObjectFifoCreateOp, ObjectFifoPort>, int> fifoSizes;
1340 // Also, keep a map of the ConstantOps for the indices per OF
1341 // and a map with the ConstantOps for the sizes per OF.
1342 std::map<std::pair<ObjectFifoCreateOp, ObjectFifoPort>,
1343 arith::ConstantOp>
1344 globalIndices;
1345 std::map<std::pair<ObjectFifoCreateOp, ObjectFifoPort>,
1346 arith::ConstantOp>
1347 constantSizes;
1348
1349 int index = 0;
1350 builder.setInsertionPointToStart(&(coreOp.getBody().front()));
1351 Value initVal = builder.create<arith::ConstantOp>(
1352 builder.getUnknownLoc(), builder.getI32IntegerAttr(0));
1353 coreOp.walk([&](ObjectFifoAcquireOp acqOp) {
1354 ObjectFifoCreateOp op = acqOp.getObjectFifo();
1355 ObjectFifoPort port = acqOp.getPort();
1356 if (fifoSizes.find({op, port}) == fifoSizes.end()) {
1357 fifoSizes[{op, port}] = op.size();
1358 auto indexOp = builder.create<arith::ConstantOp>(
1359 initVal.getLoc(), builder.getIndexAttr(index));
1360 globalIndices[{op, port}] = indexOp;
1361 index++;
1362 auto size = builder.create<arith::ConstantOp>(
1363 indexOp.getLoc(), builder.getI32IntegerAttr(op.size()));
1364 constantSizes[{op, port}] = size;
1365 }
1366 });
1367 builder.setInsertionPoint(coreOp);
1368 auto memrefTy =
1369 MemRefType::get(SmallVector<int64_t>{(int64_t)fifoSizes.size()},
1370 builder.getI32Type());
1371 auto globalNextIndex = builder.create<BufferOp>(
1372 builder.getUnknownLoc(), memrefTy, coreOp.getTile(),
1373 /*sym_name*/ nullptr, /*address*/ nullptr,
1374 /*initial_value*/ nullptr, /*mem_bank*/ nullptr);
1375
1376 // Initialize all counters in the global buffers to 0.
1377 for (auto i : constantSizes) {
1378 builder.setInsertionPointAfter(i.second);
1379 builder.create<memref::StoreOp>(
1380 builder.getUnknownLoc(), initVal, globalNextIndex,
1381 ValueRange(ArrayRef({globalIndices[i.first].getResult()})));
1382 }
1383
1384 // Walk the code:
1385 // - after each ObjectFifoReleaseOp:
1386 // - globalNextIndex: add #rel modulo objfifo depth
1387 // - before each ObjectFifoAcquireOp:
1388 // - globalNextIndex: load index and use it to index_switch (one
1389 // IndexSwithOp per AccessOp)
1390 WalkResult res = coreOp.walk([&](Operation *op) {
1391 if (auto relOp = dyn_cast<ObjectFifoReleaseOp>(op)) {
1392 ObjectFifoCreateOp createOp = relOp.getObjectFifo();
1393 ObjectFifoPort port = relOp.getPort();
1394 updateGlobalNextIndex(builder, relOp, globalNextIndex,
1395 globalIndices[{createOp, port}],
1396 constantSizes[{createOp, port}]);
1397 }
1398 if (auto acqOp = dyn_cast<ObjectFifoAcquireOp>(op)) {
1399 std::vector<ObjectFifoSubviewAccessOp> accessOps;
1400 for (auto u : acqOp->getUsers())
1401 if (auto accessOp = dyn_cast<ObjectFifoSubviewAccessOp>(u))
1402 accessOps.push_back(accessOp);
1403
1404 for (auto accessOp : accessOps) {
1405 ObjectFifoCreateOp createOp = acqOp.getObjectFifo();
1406 ObjectFifoPort port = acqOp.getPort();
1407
1408 // Single switch case
1409 if (fifoSizes[{createOp, port}] == 1)
1410 return WalkResult::advance();
1411
1412 // Create a switch for each subview access
1413 builder.setInsertionPointAfter(accessOp);
1414 auto switchIndexAsInteger = builder.create<memref::LoadOp>(
1415 builder.getUnknownLoc(), globalNextIndex,
1416 ValueRange(
1417 ArrayRef({globalIndices[{createOp, port}].getResult()})));
1418 auto switchIndex = builder.create<arith::IndexCastOp>(
1419 builder.getUnknownLoc(), builder.getIndexType(),
1420 switchIndexAsInteger);
1421 unsigned caseRegionCounts = fifoSizes[{createOp, port}];
1422 SmallVector<int64_t, 4> caseValues;
1423 for (int i = 0; i < fifoSizes[{createOp, port}]; ++i) {
1424 caseValues.push_back(i);
1425 }
1426 auto cases =
1427 DenseI64ArrayAttr::get(builder.getContext(), caseValues);
1428 auto switchOp = builder.create<scf::IndexSwitchOp>(
1429 switchIndex.getLoc(),
1430 TypeRange({buffersPerFifo[createOp][0].getType()}),
1431 switchIndex, cases, caseRegionCounts);
1432 // Create default case of IndexSwitchOp
1433 builder.createBlock(&switchOp.getDefaultRegion());
1434 auto bufferIndex = (accessOp.getIndex()) % createOp.size();
1435 builder.setInsertionPointToStart(&(switchOp.getDefaultBlock()));
1436 builder.create<scf::YieldOp>(
1437 builder.getUnknownLoc(),
1438 buffersPerFifo[createOp][bufferIndex].getResult());
1439 for (int i = 0; i < fifoSizes[{createOp, port}]; ++i) {
1440 // Create other cases of IndexSwitchOp
1441 builder.createBlock(&switchOp.getCaseRegions()[i]);
1442 builder.setInsertionPoint(&switchOp.getCaseBlock(i),
1443 switchOp.getCaseBlock(i).begin());
1444 int bufferToBeAccesed =
1445 (accessOp.getIndex() + i) % fifoSizes[{createOp, port}];
1446 builder.create<scf::YieldOp>(
1447 switchOp.getCaseRegions()[i].getLoc(),
1448 buffersPerFifo[createOp][bufferToBeAccesed].getResult());
1449 }
1450
1451 // Replace all uses of accessed objectfifo buffers with
1452 // results of switchOps
1453 accessOp.getOutput().replaceAllUsesWith(switchOp.getResult(0));
1454 }
1455 }
1456 return WalkResult::advance();
1457 });
1458 if (res.wasInterrupted())
1459 return failure();
1460 }
1461 }
1462 return success();
1463 }
1464
1465 /// Function used to create a UseLockOp based on input parameters.
1466 /// acc is an accumulator map that tracks the indices of the next locks to
1467 /// acquire (or release). Uses op to find index of acc for next lockID.
1468 /// Updates acc.
1469 void createUseLocks(OpBuilder &builder, ObjectFifoCreateOp op,
1470 ObjectFifoPort port,
1471 DenseMap<std::pair<ObjectFifoCreateOp, int>, int> &acc,
1472 int numLocks, LockAction lockAction) {
1473 ObjectFifoCreateOp target = op;
1474 auto portNum = port == ObjectFifoPort::Produce ? 0 : 1;
1475 if (auto linkOp = getOptionalLinkOp(op))
1476 if (objFifoLinks.find(*linkOp) != objFifoLinks.end())
1477 target = objFifoLinks[*linkOp];
1478
1479 auto dev = op->getParentOfType<DeviceOp>();
1480 if (!dev.getTargetModel().hasProperty(AIETargetModel::UsesSemaphoreLocks)) {
1481
1482 if (locksPerFifo[target].size() == 0) {
1483 for (int i = 0; i < numLocks; i++) {
1484 int lockID = acc[{op, portNum}];
1485 acc[{op, portNum}] =
1486 (lockID + 1) % op.size(); // update to next objFifo elem
1487 }
1488 return;
1489 }
1490
1491 int lockMode = 0;
1492 if ((port == ObjectFifoPort::Produce &&
1493 lockAction == LockAction::Release) ||
1494 (port == ObjectFifoPort::Consume &&
1495 lockAction == LockAction::Acquire))
1496 lockMode = 1;
1497 for (int i = 0; i < numLocks; i++) {
1498 int lockID = acc[{op, portNum}];
1499 builder.create<UseLockOp>(builder.getUnknownLoc(),
1500 locksPerFifo[target][lockID], lockAction,
1501 lockMode);
1502 acc[{op, portNum}] =
1503 (lockID + 1) % op.size(); // update to next objFifo elem
1504 }
1505 } else {
1506 if (numLocks == 0)
1507 return;
1508
1509 if (locksPerFifo[target].size() == 0) {
1510 acc[{op, portNum}] = (acc[{op, portNum}] + numLocks) %
1511 op.size(); // update to next objFifo elem
1512 return;
1513 }
1514
1515 // search for the correct lock based on the port of the acq/rel
1516 // operation e.g. acq as consumer is the read lock (second)
1517 LockOp lock;
1518 if (lockAction == LockAction::AcquireGreaterEqual) {
1519 if (port == ObjectFifoPort::Produce)
1520 lock = locksPerFifo[target][0];
1521 else
1522 lock = locksPerFifo[target][1];
1523 } else {
1524 if (port == ObjectFifoPort::Produce)
1525 lock = locksPerFifo[target][1];
1526 else
1527 lock = locksPerFifo[target][0];
1528 }
1529 builder.create<UseLockOp>(builder.getUnknownLoc(), lock, lockAction,
1530 numLocks);
1531 acc[{op, portNum}] = (acc[{op, portNum}] + numLocks) %
1532 op.size(); // update to next objFifo elem
1533 }
1534 }
1535
1536 /// Function used to check whether op is already contained in map.
1537 /// If it is then return the associated int, if not create new entry and
1538 /// return 0.
1540 DenseMap<std::pair<ObjectFifoCreateOp, int>, int> &map,
1541 std::pair<ObjectFifoCreateOp, int> pair) {
1542 if (map.find(pair) == map.end()) {
1543 map[pair] = 0;
1544 return 0;
1545 }
1546 return map[pair];
1547 }
1548
1549 /// Function used to add an external buffer to the externalBuffersPerFifo map.
1550 void addExternalBuffer(ObjectFifoCreateOp fifo, ExternalBufferOp buff) {
1551 if (externalBuffersPerFifo.find(fifo) == externalBuffersPerFifo.end()) {
1552 std::vector<ExternalBufferOp> buffs;
1553 externalBuffersPerFifo[fifo] = buffs;
1554 }
1555 externalBuffersPerFifo[fifo].push_back(buff);
1556 }
1557
1558 /// Function used to detect all external buffers associated with parent
1559 /// objectFifo and tile then map them to child objectFifo.
1560 void detectExternalBuffers(DeviceOp &device, ObjectFifoCreateOp parent,
1561 ObjectFifoCreateOp child, Value tile) {
1562 for (auto regOp : device.getOps<ObjectFifoRegisterExternalBuffersOp>())
1563 if (auto objFifo = regOp.getObjectFifo();
1564 regOp.getTile() == tile && objFifo == parent)
1565 for (auto extBuff : regOp.getExternalBuffers())
1566 addExternalBuffer(child, extBuff.getDefiningOp<ExternalBufferOp>());
1567 }
1568
1569 /// Function used to replace uses of split objectFifos.
1570 void replaceSplitFifo(ObjectFifoCreateOp originalOp, ObjectFifoCreateOp newOp,
1571 TileOp tile) {
1572 auto original =
1573 originalOp->getAttrOfType<StringAttr>(SymbolTable::getSymbolAttrName());
1574 auto newSymbol =
1575 newOp->getAttrOfType<StringAttr>(SymbolTable::getSymbolAttrName());
1576 for (auto user : tile->getUsers())
1577 if (isa<CoreOp>(user))
1578 if (auto res =
1579 SymbolTable::replaceAllSymbolUses(original, newSymbol, user);
1580 res.failed())
1581 llvm_unreachable("unreachable");
1582 }
1583
1584 /// Function used to find the size of an objectFifo after split based on
1585 /// the maximum number of elements (of the original objectFifo) acquired
1586 /// by a process running on given tile. If no CoreOp exists for this tile
1587 /// return 0.
1588 int findObjectFifoSize(DeviceOp &device, Value tile,
1589 ObjectFifoCreateOp objFifo) {
1590 if (objFifo.size() == 0)
1591 return 0;
1592
1593 // if memTile, size is equal to objFifo size
1594 if (tile.getDefiningOp<TileOp>().isMemTile())
1595 return objFifo.size();
1596
1597 // if shimTile, size is equal to number of external buffers
1598 if (tile.getDefiningOp<TileOp>().isShimTile())
1599 for (auto regOp : device.getOps<ObjectFifoRegisterExternalBuffersOp>()) {
1600 if (regOp.getTile() == tile)
1601 return regOp.getExternalBuffers().size();
1602 }
1603
1604 int maxAcquire = 0;
1605 for (auto coreOp : device.getOps<CoreOp>())
1606 if (coreOp.getTile() == tile)
1607 coreOp.walk([&](ObjectFifoAcquireOp acqOp) {
1608 if (auto createOp = acqOp.getObjectFifo(); createOp == objFifo)
1609 if (acqOp.acqNumber() > maxAcquire)
1610 maxAcquire = acqOp.acqNumber();
1611 });
1612
1613 if (maxAcquire > 0) {
1614 if (maxAcquire == 1 && objFifo.size() == 1)
1615 return 1;
1616 return maxAcquire + 1;
1617 // +1 because objectFifo size is always 1 bigger than maxAcquire to allow
1618 // for prefetching: simplest case scenario is at least a ping-pong buffer
1619 }
1620
1621 return objFifo.size();
1622 }
1623
1624 /// Function used to generate, from an objectFifo with a shimTile endpoint, a
1625 /// shimDMAAllocationOp containing the channelDir, channelIndex and
1626 /// shimTile col assigned by the objectFifo lowering.
1627 void createObjectFifoAllocationInfo(OpBuilder &builder, MLIRContext *ctx,
1628 FlatSymbolRefAttr obj_fifo, int colIndex,
1629 DMAChannelDir channelDir,
1630 int channelIndex, bool plio) {
1631 builder.create<ShimDMAAllocationOp>(builder.getUnknownLoc(), obj_fifo,
1632 DMAChannelDirAttr::get(ctx, channelDir),
1633 builder.getI64IntegerAttr(channelIndex),
1634 builder.getI64IntegerAttr(colIndex),
1635 builder.getBoolAttr(plio));
1636 }
1637
1638 /// Function used to verify that an objectfifo is present in at most one
1639 /// ObjectFifoLinkOp.
1640 void verifyObjectFifoLinks(DeviceOp &device) {
1641 DenseSet<ObjectFifoCreateOp> objectfifoset;
1642 for (ObjectFifoLinkOp link : device.getOps<ObjectFifoLinkOp>()) {
1643 for (ObjectFifoCreateOp inOf : link.getInputObjectFifos()) {
1644 if (objectfifoset.count(inOf))
1645 inOf.emitOpError("objectfifo cannot be in more than one "
1646 "ObjectFifoLinkOp");
1647 objectfifoset.insert(inOf);
1648 }
1649 for (ObjectFifoCreateOp outOf : link.getOutputObjectFifos()) {
1650 if (objectfifoset.count(outOf))
1651 outOf.emitOpError("objectfifo cannot be in more than one "
1652 "ObjectFifoLinkOp");
1653 objectfifoset.insert(outOf);
1654 }
1655 }
1656 }
1657
1658 /// Helper function to assign DMA channel indices for FIFOs based on
1659 /// cross-tile conditions
1661 DMAChannelAnalysis &dmaAnalysis,
1662 const std::map<ObjectFifoCreateOp, bool> &crossTileInfos,
1663 std::map<ObjectFifoCreateOp, int> &fifo_dma_channel_index,
1664 bool assignCrossTileOnly) {
1665 for (auto &[producer, consumers] : splitFifos) {
1666 // Check if we should process this producer based on cross-tile condition
1667 bool shouldProcessProducer = assignCrossTileOnly
1668 ? crossTileInfos.at(producer)
1669 : !crossTileInfos.at(producer);
1670
1671 if (shouldProcessProducer) {
1672 bool requiresAdjacentTileAccessChannels = crossTileInfos.at(producer);
1673 int channelIndex = dmaAnalysis.getDMAChannelIndex(
1674 producer.getProducerTileOp(), DMAChannelDir::MM2S,
1675 requiresAdjacentTileAccessChannels);
1676 fifo_dma_channel_index[producer] = channelIndex;
1677 }
1678
1679 for (auto consumer : consumers) {
1680 // Check if we should process this consumer based on cross-tile
1681 // condition
1682 bool shouldProcessConsumer = assignCrossTileOnly
1683 ? crossTileInfos.at(consumer)
1684 : !crossTileInfos.at(consumer);
1685
1686 if (shouldProcessConsumer) {
1687 bool requiresAdjacentTileAccessChannels = crossTileInfos.at(consumer);
1688 int channelIndex = dmaAnalysis.getDMAChannelIndex(
1689 consumer.getProducerTileOp(), DMAChannelDir::S2MM,
1690 requiresAdjacentTileAccessChannels);
1691 fifo_dma_channel_index[consumer] = channelIndex;
1692 }
1693 }
1694 }
1695 }
1696
1697 void runOnOperation() override {
1698
1699 DeviceOp device = getOperation();
1700 LockAnalysis lockAnalysis(device);
1701 DMAChannelAnalysis dmaAnalysis(device);
1702 OpBuilder builder = OpBuilder::atBlockTerminator(device.getBody());
1703 auto ctx = device->getContext();
1704 auto producerWireType = WireBundle::DMA;
1705 auto consumerWireType = WireBundle::DMA;
1706 std::set<TileOp>
1707 objectFifoTiles; // track cores to check for loops during unrolling
1708
1709 verifyObjectFifoLinks(device);
1710
1711 //===------------------------------------------------------------------===//
1712 // Split objectFifos into a consumer end and producer end if needed
1713 //===------------------------------------------------------------------===//
1714 // We are going to create additional createObjectFifoOps, so get a copy of
1715 // all "original" ones before the loop to avoid looping over newly created
1716 // ones.
1717 std::vector<ObjectFifoCreateOp> createFifoOps;
1718 auto range = device.getOps<ObjectFifoCreateOp>();
1719 createFifoOps.insert(createFifoOps.end(), range.begin(), range.end());
1720 for (auto createOp : createFifoOps) {
1721 std::vector<ObjectFifoCreateOp> splitConsumerFifos;
1722 int consumerIndex = 0;
1723 int consumerDepth = createOp.size();
1724 ArrayRef<BDDimLayoutArrayAttr> consumerDims =
1725 createOp.getDimensionsFromStreamPerConsumer();
1726
1727 // Only FIFOs using DMA are split into two ends;
1728 // skip in shared memory case
1729 if (int share_direction = 0; !requiresDMAs(createOp, share_direction)) {
1730 continue;
1731 }
1732
1733 for (auto consumerTile : createOp.getConsumerTiles()) {
1734 auto consumerTileOp = dyn_cast<TileOp>(consumerTile.getDefiningOp());
1735
1736 if (isa<ArrayAttr>(createOp.getElemNumber())) {
1737 // +1 to account for 1st depth (producer)
1738 consumerDepth = createOp.size(consumerIndex + 1);
1739 } else {
1740 consumerDepth = findObjectFifoSize(device, consumerTileOp, createOp);
1741 }
1742
1743 builder.setInsertionPointAfter(createOp);
1744 auto datatype = llvm::cast<AIEObjectFifoType>(createOp.getElemType());
1745 auto consumerObjFifoSize =
1746 builder.getIntegerAttr(builder.getI32Type(), consumerDepth);
1747 // rename and replace split objectFifo
1748 std::string consumerFifoName;
1749 if (createOp.getConsumerTiles().size() > 1) {
1750 consumerFifoName = createOp.name().str() + "_" +
1751 std::to_string(consumerIndex) + "_cons";
1752 } else {
1753 consumerFifoName = createOp.name().str() + "_cons";
1754 }
1755 BDDimLayoutArrayAttr emptyDims =
1756 BDDimLayoutArrayAttr::get(builder.getContext(), {});
1757 BDDimLayoutArrayAttr singletonFromStreamDims =
1758 BDDimLayoutArrayAttr::get(
1759 builder.getContext(),
1760 ArrayRef<BDDimLayoutAttr>{consumerDims[consumerIndex]});
1761 BDDimLayoutArrayArrayAttr fromStreamDims =
1762 BDDimLayoutArrayArrayAttr::get(builder.getContext(),
1763 singletonFromStreamDims);
1764
1765 ObjectFifoCreateOp consumerFifo = createObjectFifo(
1766 builder, datatype, consumerFifoName, consumerTile, consumerTile,
1767 consumerObjFifoSize, emptyDims, fromStreamDims);
1768 if (createOp.getDisableSynchronization())
1769 consumerFifo.setDisableSynchronization(true);
1770 replaceSplitFifo(createOp, consumerFifo, consumerTileOp);
1771
1772 // identify external buffers that were registered to the consumer fifo
1773 if (consumerTile.getDefiningOp<TileOp>().isShimTile())
1774 detectExternalBuffers(device, createOp, consumerFifo, consumerTile);
1775
1776 // record that this objectFifo was split; it will require DMA config
1777 splitConsumerFifos.push_back(consumerFifo);
1778
1779 // update the linkOp if the split objFifo was originally its start point
1780 if (auto linkOp = getOptionalLinkOp(createOp))
1781 for (ObjectFifoCreateOp fifoIn : linkOp->getInputObjectFifos())
1782 if (fifoIn.name() == createOp.name() &&
1783 consumerTile == *linkOp->getOptionalSharedTile())
1784 if (failed(SymbolTable::replaceAllSymbolUses(
1785 createOp, consumerFifo.name(), linkOp->getOperation())))
1786 llvm::report_fatal_error("unable to update all symbol uses");
1787
1788 consumerIndex++;
1789 }
1790
1791 if (!splitConsumerFifos.empty()) {
1792 splitFifos.emplace_back(createOp, splitConsumerFifos);
1793 }
1794 }
1795
1796 //===------------------------------------------------------------------===//
1797 // - Create objectFifo buffers and locks.
1798 // - Populate a list of tiles containing objectFifos for later processing of
1799 // the acquires/releases (uses of the FIFO).
1800 // - Global release counter tracker to keep track of the objectFifo state
1801 //===------------------------------------------------------------------===//
1802 for (auto createOp : device.getOps<ObjectFifoCreateOp>()) {
1803
1804 int share_direction = 0;
1805 bool shared = !requiresDMAs(createOp, share_direction);
1806
1807 // add all tiles that contain an objectFifo to objectFifoTiles for later
1808 // loop unrolling pass
1809 objectFifoTiles.insert(createOp.getProducerTileOp());
1810 for (auto consumerTile : createOp.getConsumerTiles()) {
1811 auto consumerTileOp = dyn_cast<TileOp>(consumerTile.getDefiningOp());
1812 objectFifoTiles.insert(consumerTileOp);
1813 }
1814
1815 // identify external buffers that were registered to
1816 // the producer objectFifo
1817 if (createOp.getProducerTileOp().isShimTile())
1818 detectExternalBuffers(device, createOp, createOp,
1819 createOp.getProducerTile());
1820
1821 // if split, the necessary size for producer fifo might change
1822 if (shared) {
1823
1824 checkAndApplyViaSharedMemAttribute(createOp, share_direction);
1825 createObjectFifoElements(builder, lockAnalysis, createOp,
1826 share_direction);
1827 } else {
1828
1829 if (createOp.getViaSharedMem().has_value())
1830 createOp->emitOpError(
1831 "no access to shared memory module specified by "
1832 "`via_shared_mem`");
1833
1834 if (isa<ArrayAttr>(createOp.getElemNumber()))
1835 createOp.setElemNumberAttr(
1836 builder.getI32IntegerAttr(createOp.size()));
1837 else {
1838
1839 if (!createOp.getInitValues().has_value()) {
1840
1841 int prodMaxAcquire = findObjectFifoSize(
1842 device, createOp.getProducerTileOp(), createOp);
1843 createOp.setElemNumberAttr(
1844 builder.getI32IntegerAttr(prodMaxAcquire));
1845 }
1846 }
1847 createObjectFifoElements(builder, lockAnalysis, createOp,
1848 share_direction);
1849 }
1850 }
1851
1852 // Analyze cross-tile buffer allocations and print results
1853 auto crossTileInfos = analyzeCrossTileFIFOBuffers();
1854
1855 // assign DMA channels for FIFOs
1856 // assign the channel index for fifos that has cross-tile issues first
1857 // use dmaAnalysis.getDMAChannelIndex() to assign index (which internally
1858 // loop over all of available channels and assign from 0 to maximum)
1859 std::map<ObjectFifoCreateOp, int> fifo_dma_channel_index;
1860
1861 // Assign channel indices for FIFOs with cross-tile issues first
1862 assignDMAChannelIndices(dmaAnalysis, crossTileInfos, fifo_dma_channel_index,
1863 true);
1864
1865 // Then assign channel indices for FIFOs without cross-tile issues
1866 assignDMAChannelIndices(dmaAnalysis, crossTileInfos, fifo_dma_channel_index,
1867 false);
1868
1869 //===------------------------------------------------------------------===//
1870 // Create flows and tile DMAs
1871 //===------------------------------------------------------------------===//
1872 // Only the objectFifos we split above require DMA communication; the others
1873 // rely on shared memory and share the same buffers.
1874 for (auto &[producer, consumers] : splitFifos) {
1875 int producerChanIndex = fifo_dma_channel_index[producer];
1876 if (producerChanIndex == -1)
1877 producer.getProducerTileOp().emitOpError(
1878 "number of output DMA channel exceeded!");
1879 DMAChannel producerChan = {DMAChannelDir::MM2S, producerChanIndex};
1880 createDMA(device, builder, producer, producerChan.direction,
1881 producerChan.channel, 0, producer.getDimensionsToStreamAttr(),
1882 producer.getPadDimensionsAttr());
1883 // generate objectFifo allocation info
1884 builder.setInsertionPoint(device.getBody()->getTerminator());
1885
1886 if (producer.getProducerTileOp().isShimTile())
1887 createObjectFifoAllocationInfo(
1888 builder, ctx, SymbolRefAttr::get(ctx, producer.getName()),
1889 producer.getProducerTileOp().colIndex(), producerChan.direction,
1890 producerChan.channel, producer.getPlio());
1891
1892 for (auto consumer : consumers) {
1893 int consumerChanIndex = fifo_dma_channel_index[consumer];
1894 if (consumerChanIndex == -1)
1895 consumer.getProducerTileOp().emitOpError(
1896 "number of input DMA channel exceeded!");
1897 DMAChannel consumerChan = {DMAChannelDir::S2MM, consumerChanIndex};
1898 BDDimLayoutArrayAttr consumerDims =
1899 consumer.getDimensionsFromStreamPerConsumer()[0];
1900 createDMA(device, builder, consumer, consumerChan.direction,
1901 consumerChan.channel, 1, consumerDims, nullptr);
1902 // generate objectFifo allocation info
1903 builder.setInsertionPoint(device.getBody()->getTerminator());
1904
1905 // If we have PLIO then figure out the direction and make that a PLIO
1906 if (producer.getPlio()) {
1907 producerWireType = producer.getProducerTileOp().isShimTile()
1908 ? WireBundle::PLIO
1909 : WireBundle::DMA;
1910 consumerWireType = consumer.getProducerTileOp().isShimTile()
1911 ? WireBundle::PLIO
1912 : WireBundle::DMA;
1913 } else {
1914 producerWireType = WireBundle::DMA;
1915 consumerWireType = WireBundle::DMA;
1916 }
1917
1918 if (consumer.getProducerTileOp().isShimTile())
1919 createObjectFifoAllocationInfo(
1920 builder, ctx, SymbolRefAttr::get(ctx, producer.getName()),
1921 consumer.getProducerTileOp().colIndex(), consumerChan.direction,
1922 consumerChan.channel, producer.getPlio());
1923
1924 // create flow
1925 builder.setInsertionPointAfter(producer);
1926 builder.create<FlowOp>(builder.getUnknownLoc(),
1927 producer.getProducerTile(), producerWireType,
1928 producerChan.channel, consumer.getProducerTile(),
1929 consumerWireType, consumerChan.channel);
1930 }
1931 }
1932
1933 //===------------------------------------------------------------------===//
1934 // Statically unroll for loops or use dynamic objectFifos
1935 //===------------------------------------------------------------------===//
1936 if (clDynamicObjectFifos) {
1937 if (failed(dynamicGlobalObjectFifos(device, builder, objectFifoTiles)))
1938 signalPassFailure();
1939 } else {
1940 std::set<TileOp> dynamicTiles;
1941 std::set<TileOp> unrollTiles;
1942 for (auto c : device.getOps<CoreOp>()) {
1943 TileOp t = c.getTileOp();
1944 if (objectFifoTiles.count(t) > 0) {
1945 if (c.getDynamicObjfifoLowering().has_value()) {
1946 if (c.getDynamicObjfifoLowering().value())
1947 dynamicTiles.insert(t);
1948 else
1949 unrollTiles.insert(t);
1950 } else {
1951 unrollTiles.insert(t);
1952 }
1953 }
1954 }
1955 if (failed(dynamicGlobalObjectFifos(device, builder, dynamicTiles)))
1956 signalPassFailure();
1957 if (failed(unrollForLoops(device, builder, unrollTiles)))
1958 signalPassFailure();
1959 }
1960
1961 //===------------------------------------------------------------------===//
1962 // Replace ops
1963 //===------------------------------------------------------------------===//
1964 for (auto coreOp : device.getOps<CoreOp>()) {
1965 DenseMap<ObjectFifoAcquireOp, std::vector<BufferOp *>>
1966 subviews; // maps each "subview" to its buffer references (subviews
1967 // are created by AcquireOps)
1968 DenseMap<std::pair<ObjectFifoCreateOp, int>, std::vector<int>>
1969 acquiresPerFifo; // maps each objFifo to indices of buffers acquired
1970 // in latest subview of that objFifo (useful to
1971 // cascade acquired elements to next AcquireOp)
1972 DenseMap<std::pair<ObjectFifoCreateOp, int>,
1973 std::vector<ObjectFifoReleaseOp>>
1974 releaseOps; // useful to check which ReleaseOp has taken place before
1975 // an AcquireOp per objFifo
1976 DenseMap<std::pair<ObjectFifoCreateOp, int>, int>
1977 acqPerFifo; // maps each objFifo to its next index to acquire within
1978 // this CoreOp
1979 DenseMap<std::pair<ObjectFifoCreateOp, int>, int>
1980 relPerFifo; // maps each objFifo to its next index to release within
1981 // this CoreOp
1982
1983 //===----------------------------------------------------------------===//
1984 // Replace objectFifo.release ops
1985 //===----------------------------------------------------------------===//
1986 coreOp.walk([&](ObjectFifoReleaseOp releaseOp) {
1987 builder.setInsertionPointAfter(releaseOp);
1988 ObjectFifoCreateOp op = releaseOp.getObjectFifo();
1989 auto port = releaseOp.getPort();
1990 auto portNum = port == ObjectFifoPort::Produce ? 0 : 1;
1991 auto core = releaseOp->getParentOfType<CoreOp>();
1992
1993 if (auto linkOp = getOptionalLinkOp(op)) {
1994 if (core.getTile() == *linkOp->getOptionalSharedTile()) {
1995 releaseOp->emitOpError("currently cannot access objectFifo used in "
1996 "ObjectFifoLinkOp");
1997 return;
1998 }
1999 }
2000
2001 // update index of next element to release for this objectFifo
2002 updateAndReturnIndex(relPerFifo, {op, portNum});
2003
2004 // release locks
2005 int numLocks = releaseOp.relNumber();
2006 // account for repetition
2007 if (op.getRepeatCount().has_value())
2008 numLocks *= op.getRepeatCount().value();
2009 createUseLocks(builder, op, port, relPerFifo, numLocks,
2010 LockAction::Release);
2011
2012 // register release op
2013 if (releaseOps.find({op, portNum}) != releaseOps.end()) {
2014 releaseOps[{op, portNum}].push_back(releaseOp);
2015 } else {
2016 std::vector release = {releaseOp};
2017 releaseOps[{op, portNum}] = release;
2018 }
2019 });
2020
2021 //===----------------------------------------------------------------===//
2022 // Replace objectFifo.acquire ops
2023 //===----------------------------------------------------------------===//
2024 coreOp.walk([&](ObjectFifoAcquireOp acquireOp) {
2025 ObjectFifoCreateOp op = acquireOp.getObjectFifo();
2026 builder.setInsertionPointAfter(acquireOp);
2027 auto port = acquireOp.getPort();
2028 auto portNum = port == ObjectFifoPort::Produce ? 0 : 1;
2029 auto core = acquireOp->getParentOfType<CoreOp>();
2030
2031 auto linkOp = getOptionalLinkOp(op);
2032 if (linkOp) {
2033 if (core.getTile() == *linkOp->getOptionalSharedTile()) {
2034 acquireOp->emitOpError("currently cannot access objectFifo used in "
2035 "ObjectFifoLinkOp");
2036 return;
2037 }
2038 }
2039
2040 // index of next element to acquire for this objectFifo
2041 int start = updateAndReturnIndex(
2042 acqPerFifo, {op, portNum}); // useful for keeping track of which
2043 // indices are acquired
2044
2045 // check how many elements have been released in between this AcquireOp
2046 // and the previous one
2047 // !!! operations may not be in the same block !!!
2048 int numRel = 0;
2049 for (std::vector<ObjectFifoReleaseOp>::iterator relOp =
2050 releaseOps[{op, portNum}].begin();
2051 relOp != releaseOps[{op, portNum}].end();) {
2052 bool erased = false;
2053 Operation *acqBlockDefOp = acquireOp.getOperation();
2054 do {
2055 Operation *relBlockDefOp = (*relOp).getOperation();
2056 do {
2057 if (acqBlockDefOp->getBlock() == relBlockDefOp->getBlock()) {
2058 if (relBlockDefOp->isBeforeInBlock(acqBlockDefOp)) {
2059 numRel += (*relOp).relNumber();
2060 relOp = releaseOps[{op, portNum}].erase(relOp);
2061 // to ensure that we do not account
2062 // the ReleaseOps again later,
2063 // after the subview is created
2064 erased = true;
2065 }
2066 }
2067 } while ((relBlockDefOp = relBlockDefOp->getParentOp()) &&
2068 !isa<DeviceOp>(relBlockDefOp) && !erased);
2069 } while ((acqBlockDefOp = acqBlockDefOp->getParentOp()) &&
2070 !isa<DeviceOp>(acqBlockDefOp) && !erased);
2071 if (!erased)
2072 ++relOp;
2073 }
2074
2075 // track indices of elements to acquire
2076 std::vector<int> acquiredIndices;
2077 if (!acquiresPerFifo[{op, portNum}].empty()) {
2078 // take into account what has already been acquired by previous
2079 // AcquireOp in program order
2080 acquiredIndices = acquiresPerFifo[{op, portNum}];
2081 // take into account what has been released in-between
2082 if (static_cast<size_t>(numRel) > acquiredIndices.size()) {
2083 acquireOp->emitOpError("cannot release more elements than are "
2084 "already acquired");
2085 return;
2086 }
2087 for (int i = 0; i < numRel; i++)
2088 acquiredIndices.erase(acquiredIndices.begin());
2089 }
2090
2091 // acquire locks
2092 int numLocks = acquireOp.acqNumber();
2093 int alreadyAcq = acquiredIndices.size();
2094 int numCreate;
2095 if (numLocks > alreadyAcq)
2096 numCreate = numLocks - alreadyAcq;
2097 else
2098 numCreate = 0;
2099
2100 // account for repetition
2101 if (op.getRepeatCount().has_value())
2102 numCreate *= op.getRepeatCount().value();
2103
2104 auto dev = op->getParentOfType<DeviceOp>();
2105 if (auto &targetArch = dev.getTargetModel();
2106 targetArch.getTargetArch() == AIEArch::AIE1)
2107 createUseLocks(builder, op, port, acqPerFifo, numCreate,
2108 LockAction::Acquire);
2109 else
2110 createUseLocks(builder, op, port, acqPerFifo, numCreate,
2111 LockAction::AcquireGreaterEqual);
2112
2113 // if objFifo was linked with others, find which objFifos
2114 // elements to use
2115 ObjectFifoCreateOp target = op;
2116 if (linkOp)
2117 if (objFifoLinks.find(*linkOp) != objFifoLinks.end())
2118 target = objFifoLinks[*linkOp];
2119
2120 // create subview: buffers that were already acquired + new acquires
2121 for (int i = 0; i < numCreate; i++) {
2122 acquiredIndices.push_back(start);
2123 start = (start + 1) % op.size();
2124 }
2125 std::vector<BufferOp *> subviewRefs;
2126 subviewRefs.reserve(acquiredIndices.size());
2127 for (auto index : acquiredIndices)
2128 subviewRefs.push_back(&buffersPerFifo[target][index]);
2129
2130 subviews[acquireOp] = subviewRefs;
2131 acquiresPerFifo[{op, portNum}] = acquiredIndices;
2132 });
2133
2134 //===----------------------------------------------------------------===//
2135 // Replace subview.access ops
2136 //===----------------------------------------------------------------===//
2137 coreOp.walk([&](ObjectFifoSubviewAccessOp accessOp) {
2138 auto acqOp = accessOp.getSubview().getDefiningOp<ObjectFifoAcquireOp>();
2139 if (ObjectFifoCreateOp op = acqOp.getObjectFifo()) {
2140 if (auto linkOp = getOptionalLinkOp(op); linkOp.has_value()) {
2141 if (!linkOp->isDistribute() && !linkOp->isJoin()) {
2142 for (auto consumerTile : op.getConsumerTiles()) {
2143 if (auto consumerTileOp =
2144 dyn_cast<TileOp>(consumerTile.getDefiningOp())) {
2145 int share_dir_value = 0;
2146 bool sharing = isSharedMemory(
2147 op.getProducerTileOp(), consumerTileOp, &share_dir_value);
2148 if (!sharing)
2149 accessOp->emitOpError(
2150 "currently cannot access objectFifo used in "
2151 "ObjectFifoLinkOp if the tiles don't share memory");
2152 }
2153 }
2154 } else
2155 accessOp->emitOpError(
2156 "currently cannot access objectFifo used in "
2157 "ObjectFifoLinkOp if it is a distribute or join link");
2158 }
2159 }
2160 accessOp.getOutput().replaceAllUsesWith(
2161 subviews[acqOp][accessOp.getIndex()]->getBuffer());
2162 });
2163 }
2164 // make global symbols to replace the to be erased ObjectFifoCreateOps
2165 for (auto createOp : device.getOps<ObjectFifoCreateOp>()) {
2166 builder.setInsertionPointToStart(device.getBody());
2167 auto sym_name = createOp.getName();
2168 createOp->setAttr(SymbolTable::getSymbolAttrName(),
2169 builder.getStringAttr("__erase_" + sym_name));
2170 auto memrefType = llvm::cast<AIEObjectFifoType>(createOp.getElemType())
2171 .getElementType();
2172 builder.create<memref::GlobalOp>(builder.getUnknownLoc(), sym_name,
2173 builder.getStringAttr("public"),
2174 memrefType, nullptr, false, nullptr);
2175 }
2176
2177 //===------------------------------------------------------------------===//
2178 // Remove old ops
2179 //===------------------------------------------------------------------===//
2180 SetVector<Operation *> opsToErase;
2181 device.walk([&](Operation *op) {
2182 if (isa<ObjectFifoCreateOp, ObjectFifoLinkOp,
2183 ObjectFifoRegisterExternalBuffersOp, ObjectFifoAcquireOp,
2184 ObjectFifoSubviewAccessOp, ObjectFifoReleaseOp>(op))
2185 opsToErase.insert(op);
2186 });
2187 SmallVector<Operation *> sorted{opsToErase.begin(), opsToErase.end()};
2188 computeTopologicalSorting(sorted);
2189 for (auto *op : llvm::reverse(sorted))
2190 op->erase();
2191 }
2192};
2193
2194std::unique_ptr<OperationPass<DeviceOp>>
2196 return std::make_unique<AIEObjectFifoStatefulTransformPass>();
2197}
int getDMAChannelIndex(TileOp tileOp, DMAChannelDir dir, bool requiresAdjacentTileAccessChannels)
Given a tile and DMAChannelDir, returns next usable channel index for that tile.
int getLockID(TileOp &tileOp)
Given a tile, returns next usable lockID for that tile.
Include the generated interface declarations.
std::unique_ptr< mlir::OperationPass< DeviceOp > > createAIEObjectFifoStatefulTransformPass()
DMAChannel { DMAChannelDir direction DMAChannel
Definition AIEDialect.h:159
const AIETargetModel & getTargetModel(mlir::Operation *op)
void createObjectFifoElements(OpBuilder &builder, LockAnalysis &lockAnalysis, ObjectFifoCreateOp op, int share_direction)
Function used to create objectFifo elements and their locks.
DenseMap< ObjectFifoCreateOp, std::vector< LockOp > > locksPerFifo
void createAIETileDMA(DeviceOp &device, OpBuilder &builder, ObjectFifoCreateOp op, DMAChannelDir channelDir, int channelIndex, int lockMode, BDDimLayoutArrayAttr dims)
Function used to create a MemOp region with a DMA channel.
void createMemTileDMA(DeviceOp &device, OpBuilder &builder, ObjectFifoCreateOp op, DMAChannelDir channelDir, int channelIndex, int lockMode, BDDimLayoutArrayAttr dims, BDPadLayoutArrayAttr padDimensions)
Function used to create a MemTileDMAOp region with a DMA channel.
int findObjectFifoSize(DeviceOp &device, Value tile, ObjectFifoCreateOp objFifo)
Function used to find the size of an objectFifo after split based on the maximum number of elements (...
void createObjectFifoAllocationInfo(OpBuilder &builder, MLIRContext *ctx, FlatSymbolRefAttr obj_fifo, int colIndex, DMAChannelDir channelDir, int channelIndex, bool plio)
Function used to generate, from an objectFifo with a shimTile endpoint, a shimDMAAllocationOp contain...
LogicalResult dynamicGlobalObjectFifos(DeviceOp &device, OpBuilder &builder, std::set< TileOp > objectFifoTiles)
Block * findEndOpBlock(Region &r)
Function that returns a pointer to the block of a Region that contains the AIEEndOp.
int calculateCurrentUsedMemory(TileOp targetTile, DenseMap< ObjectFifoCreateOp, std::vector< BufferOp > > &buffersPerFifo, std::vector< BufferOp > &buffers)
Function to calculate total memory usage on a specific tile based on all buffers allocated to that ti...
void replaceSplitFifo(ObjectFifoCreateOp originalOp, ObjectFifoCreateOp newOp, TileOp tile)
Function used to replace uses of split objectFifos.
bool isSharedMemory(TileOp a, TileOp b, int *share_direction)
Function that returns true if two tiles in the AIE array share a memory module.
void updateGlobalNextIndex(OpBuilder &builder, ObjectFifoReleaseOp relOp, BufferOp globalNextIndex, arith::ConstantOp index, arith::ConstantOp size)
std::vector< LockOp > createObjectFifoLocks(OpBuilder &builder, LockAnalysis &lockAnalysis, ObjectFifoCreateOp op, int numElem, int joinDistribFactor, TileOp creation_tile, int repeatCount)
Function used to create objectFifo locks based on target architecture.
void createShimDMA(DeviceOp &device, OpBuilder &builder, ObjectFifoCreateOp op, DMAChannelDir channelDir, int channelIndex, int lockMode, BDDimLayoutArrayAttr dims)
Function used to create a ShimDMAOp region with a DMA channel.
void addExternalBuffer(ObjectFifoCreateOp fifo, ExternalBufferOp buff)
Function used to add an external buffer to the externalBuffersPerFifo map.
void verifyObjectFifoLinks(DeviceOp &device)
Function used to verify that an objectfifo is present in at most one ObjectFifoLinkOp.
std::optional< ObjectFifoLinkOp > getOptionalLinkOp(ObjectFifoCreateOp op)
Function to retrieve ObjectFifoLinkOp of ObjectFifoCreateOp, if it belongs to one.
void createDMA(DeviceOp &device, OpBuilder &builder, ObjectFifoCreateOp op, DMAChannelDir channelDir, int channelIndex, int lockMode, BDDimLayoutArrayAttr dims, BDPadLayoutArrayAttr pad_dims)
Function that either calls createAIETileDMA(), createShimDMA() or createMemTileDMA() based on op tile...
void assignDMAChannelIndices(DMAChannelAnalysis &dmaAnalysis, const std::map< ObjectFifoCreateOp, bool > &crossTileInfos, std::map< ObjectFifoCreateOp, int > &fifo_dma_channel_index, bool assignCrossTileOnly)
Helper function to assign DMA channel indices for FIFOs based on cross-tile conditions.
void createBd(OpBuilder &builder, LockOp acqLock, int acqMode, LockAction acqLockAction, LockOp relLock, int relMode, MyOp buff, int offset, int len, Block *succ, BDDimLayoutArrayAttr dims, BDPadLayoutArrayAttr padDimensions)
Function used to create a Bd block.
void createBdBlock(OpBuilder &builder, ObjectFifoCreateOp op, int lockMode, int acqNum, int relNum, MyOp buff, int offset, int len, DMAChannelDir channelDir, size_t lockIndex, Block *succ, BDDimLayoutArrayAttr dims, BDPadLayoutArrayAttr padDimensions, bool distribOrJoin=false)
Function used to create a Bd block.
int updateAndReturnIndex(DenseMap< std::pair< ObjectFifoCreateOp, int >, int > &map, std::pair< ObjectFifoCreateOp, int > pair)
Function used to check whether op is already contained in map.
std::map< ObjectFifoCreateOp, bool > analyzeCrossTileFIFOBuffers()
Function to analyze cross-tile buffer allocations in splitFifos Returns a simple map of (ObjectFifoCr...
TileOp findOrCreateTile(OpBuilder &builder, DeviceOp &dev, TileOp hostTile, int col, int row)
Helper function to find a tile at specific coordinates.
DenseMap< ObjectFifoLinkOp, ObjectFifoCreateOp > objFifoLinks
void detectExternalBuffers(DeviceOp &device, ObjectFifoCreateOp parent, ObjectFifoCreateOp child, Value tile)
Function used to detect all external buffers associated with parent objectFifo and tile then map them...
DenseMap< ObjectFifoCreateOp, std::vector< ExternalBufferOp > > externalBuffersPerFifo
std::vector< ObjectFifoCreateOp > splitBecauseLink
void checkAndApplyViaSharedMemAttribute(ObjectFifoCreateOp createOp, int &share_direction)
LogicalResult unrollForLoops(DeviceOp &device, OpBuilder &builder, std::set< TileOp > objectFifoTiles)
ObjectFifoCreateOp createObjectFifo(OpBuilder &builder, AIEObjectFifoType datatype, std::string name, Value prodTile, Value consTile, Attribute depth, BDDimLayoutArrayAttr dimensionsToStream, BDDimLayoutArrayArrayAttr dimensionsFromStreamPerConsumer)
void createUseLocks(OpBuilder &builder, ObjectFifoCreateOp op, ObjectFifoPort port, DenseMap< std::pair< ObjectFifoCreateOp, int >, int > &acc, int numLocks, LockAction lockAction)
Function used to create a UseLockOp based on input parameters.
bool requiresDMAs(ObjectFifoCreateOp createOp, int &share_direction)
DenseMap< ObjectFifoCreateOp, std::vector< BufferOp > > buffersPerFifo
std::vector< std::pair< ObjectFifoCreateOp, std::vector< ObjectFifoCreateOp > > > splitFifos