MLIR-AIE
AIEObjectFifoStatefulTransform.cpp
Go to the documentation of this file.
1//===- AIEObjectFifoStatefulTransform.cpp ----------------------*- MLIR -*-===//
2//
3// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7// (c) Copyright 2021 Xilinx Inc.
8//
9// Date: October 18th 2021
10//
11//===----------------------------------------------------------------------===//
12
15
16#include "mlir/Analysis/TopologicalSortUtils.h"
17#include "mlir/Dialect/Arith/IR/Arith.h"
18#include "mlir/Dialect/MemRef/IR/MemRef.h"
19#include "mlir/Dialect/SCF/IR/SCF.h"
20#include "mlir/Dialect/SCF/Utils/Utils.h"
21#include "mlir/IR/Attributes.h"
22#include "mlir/Pass/Pass.h"
23#include "mlir/Transforms/DialectConversion.h"
24
25#include <numeric>
26#include <set>
27
28using namespace mlir;
29using namespace xilinx;
30using namespace xilinx::AIE;
31
32#define DEBUG_TYPE "aie-objectFifo-stateful-transform"
33
34#define LOOP_VAR_DEPENDENCY (-2)
35
36//===----------------------------------------------------------------------===//
37// Lock Analysis
38//===----------------------------------------------------------------------===//
40 DenseMap<std::pair<Value, int>, int> locksPerTile;
41
42public:
43 LockAnalysis(DeviceOp &device) {
44 // go over the locks created for each tile and update the index in
45 // locksPerTile
46 device.walk([&](LockOp lockOp) {
47 auto tile = lockOp.getTile();
48 auto lockID = lockOp.getLockIDValue();
49 locksPerTile[{tile, lockID}] = 1;
50 });
51 }
52
53 /// Given a tile, returns next usable lockID for that tile.
54 int getLockID(TileOp &tileOp) {
55 const auto &targetModel = getTargetModel(tileOp);
56 for (unsigned i = 0;
57 i < targetModel.getNumLocks(tileOp.getCol(), tileOp.getRow()); i++)
58 if (int usageCnt = locksPerTile[{tileOp, i}]; usageCnt == 0) {
59 locksPerTile[{tileOp, i}] = 1;
60 return i;
61 }
62 return -1;
63 }
64};
65
66//===----------------------------------------------------------------------===//
67// DMA Channel Analysis
68//===----------------------------------------------------------------------===//
70 DenseMap<std::tuple<Value, DMAChannelDir, int>, int> channelsPerTile;
71
72public:
73 DMAChannelAnalysis(DeviceOp &device) {
74 // go over the channels used for each tile and update channel map
75 for (auto memOp : device.getOps<MemOp>()) {
76 Region &r = memOp.getBody();
77 for (auto &bl : r.getBlocks()) {
78 for (auto op : bl.getOps<DMAStartOp>()) {
79 channelsPerTile[{memOp.getTile(), op.getChannelDir(),
80 op.getChannelIndex()}] = 1;
81 }
82 }
83 }
84 for (auto memOp : device.getOps<MemTileDMAOp>()) {
85 Region &r = memOp.getBody();
86 for (auto &bl : r.getBlocks()) {
87 for (auto op : bl.getOps<DMAStartOp>()) {
88 channelsPerTile[{memOp.getTile(), op.getChannelDir(),
89 op.getChannelIndex()}] = 1;
90 }
91 }
92 }
93 for (auto memOp : device.getOps<ShimDMAOp>()) {
94 Region &r = memOp.getBody();
95 for (auto &bl : r.getBlocks()) {
96 for (auto op : bl.getOps<DMAStartOp>()) {
97 channelsPerTile[{memOp.getTile(), op.getChannelDir(),
98 op.getChannelIndex()}] = 1;
99 }
100 }
101 }
102 }
103
104 /// Given a tile and DMAChannelDir, returns next usable channel index for
105 /// that tile.
106 int getDMAChannelIndex(TileOp tileOp, DMAChannelDir dir) {
107 const auto &targetModel = getTargetModel(tileOp);
108 int maxChannelNum = 0;
109 if (tileOp.isShimTile())
110 maxChannelNum = 2;
111 else {
112 if (dir == DMAChannelDir::MM2S)
113 maxChannelNum = targetModel.getNumSourceSwitchboxConnections(
114 tileOp.getCol(), tileOp.getRow(), WireBundle::DMA);
115 else
116 maxChannelNum = targetModel.getNumDestSwitchboxConnections(
117 tileOp.getCol(), tileOp.getRow(), WireBundle::DMA);
118 }
119 for (int i = 0; i < maxChannelNum; i++)
120 if (int usageCnt = channelsPerTile[{tileOp.getResult(), dir, i}];
121 usageCnt == 0) {
122 channelsPerTile[{tileOp.getResult(), dir, i}] = 1;
123 return i;
124 }
125 return -1;
126 }
127};
128
129//===----------------------------------------------------------------------===//
130// Create objectFifos Pass
131//===----------------------------------------------------------------------===//
133 : AIEObjectFifoStatefulTransformBase<AIEObjectFifoStatefulTransformPass> {
134 DenseMap<ObjectFifoCreateOp, std::vector<BufferOp>>
135 buffersPerFifo; // maps each objFifo to its corresponding buffer
136 DenseMap<ObjectFifoCreateOp, std::vector<ExternalBufferOp>>
137 externalBuffersPerFifo; // maps each objFifo to its corresponding
138 // external buffers
139 DenseMap<ObjectFifoCreateOp, std::vector<LockOp>>
140 locksPerFifo; // maps each objFifo to its corresponding locks
141 std::vector<std::pair<ObjectFifoCreateOp, std::vector<ObjectFifoCreateOp>>>
142 splitFifos; // maps each objFifo between non-adjacent tiles to its
143 // corresponding consumer objectFifos
144 DenseMap<ObjectFifoLinkOp, ObjectFifoCreateOp>
145 objFifoLinks; // maps each ObjectFifoLinkOp to objFifo whose elements
146 // have been created and should be used
147 std::vector<ObjectFifoCreateOp>
148 splitBecauseLink; // objfifos which have been split because they are
149 // part of a Link, not because they didn't have a shared memory module
150
151 /// Function that returns true if two tiles in the AIE array share a memory
152 /// module. share_direction is equal to:
153 /// * -1 if the shared memory module is that of the first input tile,
154 /// * 1 if it is that of the second input tile,
155 /// * 0 is no memory module is shared.
156 bool isSharedMemory(TileOp a, TileOp b, int *share_direction) {
157 const auto &targetModel = getTargetModel(a.getOperation());
158
159 if ((a.isShimTile() && !b.isShimTile()) ||
160 (!a.isShimTile() && b.isShimTile())) {
161 *share_direction = 0;
162 return false;
163 }
164 if ((targetModel.isMemTile(a.getCol(), a.getRow()) &&
165 !targetModel.isMemTile(b.getCol(), b.getRow())) ||
166 (!targetModel.isMemTile(a.getCol(), a.getRow()) &&
167 targetModel.isMemTile(b.getCol(), b.getRow()))) {
168 *share_direction = 0;
169 return false;
170 }
171 bool rightShared = targetModel.isLegalMemAffinity(
172 a.colIndex(), a.rowIndex(), b.colIndex(), b.rowIndex());
173
174 bool leftShared = targetModel.isLegalMemAffinity(
175 b.colIndex(), b.rowIndex(), a.colIndex(), a.rowIndex());
176
177 if (leftShared)
178 *share_direction = -1;
179 else if (rightShared)
180 *share_direction = 1;
181 else
182 *share_direction = 0;
183
184 return leftShared || rightShared;
185 }
186
187 // Return true if the objectFifo created by createOp requires a DMA to be set
188 // up. This is the case if the tiles are not adjacent (no shared memory), if
189 // the objectFifo broadcasts to multiple tiles, if one of the consumers or
190 // the producer wants to use the multi-dimensional address generation
191 // features of the DMA, if the objectFifo is part of a LinkOp, or if the
192 // via_DMA or repeatCount attributes of the objectFifo are set.
193 bool requiresDMAs(ObjectFifoCreateOp createOp, int &share_direction) {
194 bool hasSharedMemory = false;
195 bool atLeastOneConsumerWantsTransform = false;
196 bool isUsedInLinkOp = false;
197
198 if (createOp.getVia_DMA())
199 return true;
200
201 if (createOp.getRepeatCount().has_value())
202 return true;
203
204 if (createOp.getConsumerTiles().size() == 1 &&
205 createOp.getDimensionsToStream().empty()) {
206
207 // Test for shared memory
208 for (auto consumerTile : createOp.getConsumerTiles()) {
209 if (auto consumerTileOp =
210 dyn_cast<TileOp>(consumerTile.getDefiningOp())) {
211 if (std::count(splitBecauseLink.begin(), splitBecauseLink.end(),
212 createOp))
213 hasSharedMemory =
214 isSharedMemory(createOp.getProducerTileOp(),
215 createOp.getProducerTileOp(), &share_direction);
216 else
217 hasSharedMemory = isSharedMemory(createOp.getProducerTileOp(),
218 consumerTileOp, &share_direction);
219 }
220 }
221 }
222
223 // Only test for use of data layout transformations if we are in the shared
224 // memory case; otherwise, we will return `true` in any case.
225 if (hasSharedMemory) {
226 // Even if just one of the consumers in the list of consumers wants to
227 // perform a memory transform, we need to use DMAs.
228 for (BDDimLayoutArrayAttr dims :
229 createOp.getDimensionsFromStreamPerConsumer())
230 if (!dims.empty()) {
231 atLeastOneConsumerWantsTransform = true;
232 break;
233 }
234 }
235
236 // Check if the objectfifo operation can use shared memory for linking. If
237 // the link operation is a distribute or a join operation, or if the link
238 // has different memref types, DMAs are required even if shared memory is
239 // available and the objectfifo should be split. Otherwise also check if the
240 // via_shared_memory attribute of the objectfifo operation is set and try to
241 // apply it.
242 if (hasSharedMemory) {
243 if (auto linkOp = getOptionalLinkOp(createOp)) {
244 isUsedInLinkOp = true;
245 int share_dir = 0;
246 if (!linkOp->isDistribute() && !linkOp->isJoin()) {
247 auto fifoInType = llvm::cast<AIEObjectFifoType>(
248 linkOp->getInputObjectFifos()[0].getElemType());
249 auto producerType =
250 llvm::cast<MemRefType>(fifoInType.getElementType());
251 auto fifoOutType = llvm::cast<AIEObjectFifoType>(
252 linkOp->getOutputObjectFifos()[0].getElemType());
253 auto consumerType =
254 llvm::cast<MemRefType>(fifoOutType.getElementType());
255 if (consumerType != producerType) {
256 // TODO: Support for different memref types through shared
257 // memory without DMAs
258 splitBecauseLink.push_back(createOp);
259 }
260 if (createOp.getViaSharedMem().has_value()) {
261 checkAndApplyViaSharedMemAttribute(createOp, share_dir);
262 if (share_direction == share_dir)
263 isUsedInLinkOp = false;
264 else
265 splitBecauseLink.push_back(createOp);
266 }
267 } else {
268 splitBecauseLink.push_back(createOp);
269 }
270 }
271 }
272
273 return !hasSharedMemory || atLeastOneConsumerWantsTransform ||
274 isUsedInLinkOp;
275 }
276
277 // Checks if via_shared_mem attribute of the objectfifo is set and if so
278 // tries to apply it. If the desired shared memory module is available to
279 // both producer and consumer then it will be used, otherwise an error is
280 // emitted.
281 void checkAndApplyViaSharedMemAttribute(ObjectFifoCreateOp createOp,
282 int &share_direction) {
283 if (createOp.getViaSharedMem().has_value()) {
284 int desiredSharedTile = createOp.getViaSharedMem().value();
285 int desiredSharedModule = 1;
286 if (desiredSharedTile == 0)
287 desiredSharedModule = -1;
288 if (share_direction != desiredSharedModule) {
289 bool desiredSharedModuleIsShared = false;
290 int newShareDirection = 0;
291 for (auto consumerTile : createOp.getConsumerTiles()) {
292 if (auto consumerTileOp =
293 dyn_cast<TileOp>(consumerTile.getDefiningOp()))
294 if (share_direction == -1)
295 /// * -1 if the shared memory module is that of the first input
296 /// tile,
297 /// * 1 if it is that of the second input tile
298 desiredSharedModuleIsShared =
299 isSharedMemory(consumerTileOp, createOp.getProducerTileOp(),
300 &newShareDirection);
301 }
302 if (desiredSharedModuleIsShared) {
303 if (share_direction == newShareDirection)
304 share_direction = (share_direction == -1) ? 1 : -1;
305 else
306 createOp->emitOpError(
307 "no access to shared memory module specified by "
308 "`via_shared_mem`");
309 }
310 }
311 }
312 }
313
314 /// Function to retrieve ObjectFifoLinkOp of ObjectFifoCreateOp,
315 /// if it belongs to one.
316 std::optional<ObjectFifoLinkOp> getOptionalLinkOp(ObjectFifoCreateOp op) {
317 auto device = op->getParentOfType<DeviceOp>();
318 for (ObjectFifoLinkOp linkOp : device.getOps<ObjectFifoLinkOp>()) {
319 for (ObjectFifoCreateOp in : linkOp.getInputObjectFifos())
320 if (in == op)
321 return {linkOp};
322 for (ObjectFifoCreateOp out : linkOp.getOutputObjectFifos())
323 if (out == op)
324 return {linkOp};
325 }
326 return {};
327 }
328
329 ObjectFifoCreateOp
330 createObjectFifo(OpBuilder &builder, AIEObjectFifoType datatype,
331 std::string name, Value prodTile, Value consTile,
332 Attribute depth, BDDimLayoutArrayAttr dimensionsToStream,
333 BDDimLayoutArrayArrayAttr dimensionsFromStreamPerConsumer) {
334 auto ofName = builder.getStringAttr(name);
335 auto fifo = builder.create<ObjectFifoCreateOp>(
336 builder.getUnknownLoc(), ofName, prodTile, consTile, depth, datatype,
337 dimensionsToStream, dimensionsFromStreamPerConsumer);
338 return fifo;
339 }
340
341 /// Function used to create objectFifo locks based on target architecture.
342 /// Called by createObjectFifoElements().
343 std::vector<LockOp> createObjectFifoLocks(OpBuilder &builder,
344 LockAnalysis &lockAnalysis,
345 ObjectFifoCreateOp op, int numElem,
346 int joinDistribFactor,
347 TileOp creation_tile,
348 int repeatCount) {
349 std::vector<LockOp> locks;
350 if (op.getDisableSynchronization())
351 return locks;
352 auto dev = op->getParentOfType<DeviceOp>();
353 auto &target = dev.getTargetModel();
354 // if shimTile external buffers are collected from input code
355 // create as many locks as there are external buffers
356 if (creation_tile.isShimTile()) {
357 numElem = 1;
358 if (!externalBuffersPerFifo[op].empty())
359 numElem = externalBuffersPerFifo[op].size();
360 }
361 if (target.getTargetArch() == AIEArch::AIE1) {
362 for (int i = 0; i < numElem; i++) {
363 // create corresponding aie1 locks
364 int initValue = op.getInitValues().has_value() ? 1 : 0;
365 int lockID = lockAnalysis.getLockID(creation_tile);
366 assert(lockID >= 0 && "No more locks to allocate!");
367 auto lock = builder.create<LockOp>(builder.getUnknownLoc(),
368 creation_tile, lockID, initValue);
369 lock.getOperation()->setAttr(
370 SymbolTable::getSymbolAttrName(),
371 builder.getStringAttr(op.name().str() + "_lock_" +
372 std::to_string(i)));
373 locks.push_back(lock);
374 }
375 } else {
376 // create corresponding aie2 locks
377 for (int i = 0; i < joinDistribFactor; i++) {
378 auto initValues = op.getInitValues().has_value()
379 ? op.getInitValues().value().size()
380 : 0;
381 int prodLockID = lockAnalysis.getLockID(creation_tile);
382 assert(prodLockID >= 0 && "No more locks to allocate!");
383 int prodLockValue = (numElem - initValues) * repeatCount;
384 auto prodLock = builder.create<LockOp>(
385 builder.getUnknownLoc(), creation_tile, prodLockID, prodLockValue);
386 prodLock.getOperation()->setAttr(
387 SymbolTable::getSymbolAttrName(),
388 builder.getStringAttr(op.name().str() + "_prod_lock_" +
389 std::to_string(i)));
390 locks.push_back(prodLock);
391
392 int consLockID = lockAnalysis.getLockID(creation_tile);
393 assert(consLockID >= 0 && "No more locks to allocate!");
394 int consLockValue = initValues * repeatCount;
395 auto consLock = builder.create<LockOp>(
396 builder.getUnknownLoc(), creation_tile, consLockID, consLockValue);
397 consLock.getOperation()->setAttr(
398 SymbolTable::getSymbolAttrName(),
399 builder.getStringAttr(op.name().str() + "_cons_lock_" +
400 std::to_string(i)));
401 locks.push_back(consLock);
402 }
403 }
404 return locks;
405 }
406
407 /// Function used to create objectFifo elements and their locks.
408 /// It maps the input objectFifo to associated buffers and locks.
409 void createObjectFifoElements(OpBuilder &builder, LockAnalysis &lockAnalysis,
410 ObjectFifoCreateOp op, int share_direction) {
411 if (!op.size())
412 return;
413
414 std::vector<BufferOp> buffers;
415 auto fifo = llvm::cast<AIEObjectFifoType>(op.getElemType());
416 auto elemType = llvm::cast<MemRefType>(fifo.getElementType());
417 int numElem = op.size();
418 int of_elem_index = 0; // used to give objectFifo elements a symbolic name
419
420 // if this objectFifo is linked to another, check if the other's elements
421 // have already been created: if none of the output objectfifos of the link
422 // have initValues, then the elements that are created are those of the
423 // objFifo with elements of bigger size
424 bool linked = false;
425 auto linkOp = getOptionalLinkOp(op);
426 if (linkOp) {
427 auto fifoIn = linkOp->getInputObjectFifos()[0];
428 auto fifoOut = linkOp->getOutputObjectFifos()[0];
429 linked = true;
430 if (objFifoLinks.find(*linkOp) != objFifoLinks.end())
431 return; // elements have already been created
432 if (linkOp->isJoin()) {
433 // if join, fifoOut has bigger size
434 if (op.name() != fifoOut.name())
435 return;
436 } else if (linkOp->isDistribute()) {
437 // if distribute, fifoIn has bigger size
438 if (op.name() != fifoIn.name())
439 return;
440 } else {
441 // check if output objectfifo has initValues
442 if (fifoOut.getInitValues().has_value()) {
443 if (fifoOut.name() != op.name())
444 return;
445 } else {
446 // check which objectfifo of the link has bigger size
447 auto fifoInType = llvm::cast<AIEObjectFifoType>(fifoIn.getElemType());
448 auto elemInType = llvm::cast<MemRefType>(fifoInType.getElementType());
449 int inSize = elemInType.getNumElements();
450
451 auto fifoOutType =
452 llvm::cast<AIEObjectFifoType>(fifoOut.getElemType());
453 auto elemOutType =
454 llvm::cast<MemRefType>(fifoOutType.getElementType());
455
456 if (int outSize = elemOutType.getNumElements(); inSize >= outSize) {
457 if (op.name() != fifoIn.name())
458 return;
459 } else {
460 if (fifoOut.name() != op.name())
461 return;
462 }
463 }
464 }
465 }
466
467 TileOp creation_tile;
468 if (share_direction == 0 || share_direction == -1)
469 creation_tile = op.getProducerTileOp();
470 else {
471 auto consumerTileOp =
472 dyn_cast<TileOp>(op.getConsumerTiles()[0].getDefiningOp());
473 creation_tile = consumerTileOp;
474 }
475
476 // Reset opbuilder location to after the last tile declaration
477 Operation *t = nullptr;
478 auto dev = op->getParentOfType<DeviceOp>();
479 for (auto tile_op : dev.getBody()->getOps<TileOp>()) {
480 t = tile_op.getOperation();
481 }
482 builder.setInsertionPointAfter(t);
483 for (int i = 0; i < numElem; i++) {
484 mlir::ElementsAttr initValues = nullptr;
485 if (!creation_tile.isShimTile()) {
486 if (op.getInitValues().has_value()) {
487 initValues =
488 llvm::cast<mlir::ElementsAttr>(op.getInitValues().value()[i]);
489 }
490 auto buff = builder.create<BufferOp>(
491 builder.getUnknownLoc(), elemType, creation_tile,
492 builder.getStringAttr(op.name().str() + "_buff_" +
493 std::to_string(of_elem_index)),
494 /*address*/ nullptr, initValues,
495 /*mem_bank*/ nullptr);
496 buffers.push_back(buff);
497 }
498 of_elem_index++;
499 }
500
501 int repeatCount = 1;
502 int joinDistribFactor = 1;
503 if (op.getRepeatCount().has_value())
504 repeatCount = op.getRepeatCount().value();
505 if (linked) {
506 if (linkOp->getRepeatCount().has_value())
507 repeatCount = linkOp->getRepeatCount().value();
508 if (linkOp->isDistribute())
509 joinDistribFactor *= linkOp->getFifoOuts().size();
510 else if (linkOp->isJoin())
511 joinDistribFactor *= linkOp->getFifoIns().size();
512 objFifoLinks[*linkOp] = op;
513 }
514 std::vector<LockOp> locks = createObjectFifoLocks(
515 builder, lockAnalysis, op, numElem, joinDistribFactor, creation_tile, repeatCount);
516 buffersPerFifo[op] = buffers;
517 locksPerFifo[op] = locks;
518 }
519
520 /// Function that returns a pointer to the block of a Region
521 /// that contains the AIEEndOp.
522 Block *findEndOpBlock(Region &r) {
523 Block *endBlock = nullptr;
524 for (auto &bl : r.getBlocks())
525 if (!bl.getOps<EndOp>().empty())
526 endBlock = &bl;
527 return endBlock;
528 }
529
530 /// Function used to create a Bd block.
531 template <typename MyOp>
532 void createBd(OpBuilder &builder, LockOp acqLock, int acqMode,
533 LockAction acqLockAction, LockOp relLock, int relMode,
534 MyOp buff, int offset, int len, Block *succ,
535 BDDimLayoutArrayAttr dims, BDPadLayoutArrayAttr padDimensions) {
536 if (acqLock)
537 builder.create<UseLockOp>(builder.getUnknownLoc(), acqLock, acqLockAction,
538 acqMode);
539
540 if (!dims.getValue().empty() && padDimensions) {
541 builder.create<DMABDOp>(builder.getUnknownLoc(), buff, offset, len, dims,
542 padDimensions);
543 } else if (!dims.getValue().empty()) {
544 builder.create<DMABDOp>(builder.getUnknownLoc(), buff, offset, len, dims);
545 } else {
546 builder.create<DMABDOp>(builder.getUnknownLoc(), buff, offset, len);
547 }
548 if (acqLock)
549 builder.create<UseLockOp>(builder.getUnknownLoc(), relLock,
550 LockAction::Release, relMode);
551 builder.create<NextBDOp>(builder.getUnknownLoc(), succ);
552 }
553
554 /// Function used to create a Bd block.
555 /// If lockMode is 0 we create a consumerDMA (i.e. on producer tile) else a
556 /// producerDMA (i.e. on consumer tile).
557 template <typename MyOp>
558 void createBdBlock(OpBuilder &builder, ObjectFifoCreateOp op, int lockMode,
559 int acqNum, int relNum, MyOp buff, int offset, int len,
560 DMAChannelDir channelDir, size_t lockIndex, Block *succ,
561 BDDimLayoutArrayAttr dims,
562 BDPadLayoutArrayAttr padDimensions,
563 bool distribOrJoin = false) {
564 LockOp acqLock;
565 LockOp relLock;
566 int acqMode = 1;
567 int relMode = 1;
568 auto acqLockAction = LockAction::Acquire;
569 if (locksPerFifo[op].size() > 0) {
570 auto dev = op->getParentOfType<DeviceOp>();
571 if (auto &target = dev.getTargetModel();
572 target.getTargetArch() == AIEArch::AIE1) {
573 acqMode = lockMode == 0 ? 1 : 0;
574 relMode = lockMode == 0 ? 0 : 1;
575 acqLock = locksPerFifo[op][lockIndex];
576 relLock = locksPerFifo[op][lockIndex];
577 } else {
578 acqMode = acqNum;
579 relMode = relNum;
580 acqLockAction = LockAction::AcquireGreaterEqual;
581 int prodLockIndex = 0;
582 int consLockIndex = 1;
583 if (distribOrJoin) {
584 prodLockIndex = lockIndex * 2;
585 consLockIndex = lockIndex * 2 + 1;
586 }
587 acqLock = channelDir == DMAChannelDir::S2MM
588 ? locksPerFifo[op][prodLockIndex]
589 : locksPerFifo[op][consLockIndex];
590 relLock = channelDir == DMAChannelDir::S2MM
591 ? locksPerFifo[op][consLockIndex]
592 : locksPerFifo[op][prodLockIndex];
593 }
594 }
595 createBd(builder, acqLock, acqMode, acqLockAction, relLock, relMode, buff,
596 offset, len, succ, dims, padDimensions);
597 }
598
599 /// Function that either calls createAIETileDMA(), createShimDMA() or
600 /// createMemTileDMA() based on op tile row value.
601 void createDMA(DeviceOp &device, OpBuilder &builder, ObjectFifoCreateOp op,
602 DMAChannelDir channelDir, int channelIndex, int lockMode,
603 BDDimLayoutArrayAttr dims, BDPadLayoutArrayAttr pad_dims) {
604 if (op.getProducerTileOp().isShimTile()) {
605 createShimDMA(device, builder, op, channelDir, channelIndex, lockMode,
606 dims);
607 } else if (op.getProducerTileOp().isMemTile()) {
608 BDPadLayoutArrayAttr padDims = nullptr;
609 if (channelDir == DMAChannelDir::MM2S && pad_dims)
610 padDims = pad_dims;
611 createMemTileDMA(device, builder, op, channelDir, channelIndex, lockMode,
612 dims, padDims);
613 } else {
614 createAIETileDMA(device, builder, op, channelDir, channelIndex, lockMode,
615 dims);
616 }
617 }
618
619 /// Function used to create a MemOp region with a DMA channel.
620 /// It uses creatBdBlock(), see there for lockMode input.
621 void createAIETileDMA(DeviceOp &device, OpBuilder &builder,
622 ObjectFifoCreateOp op, DMAChannelDir channelDir,
623 int channelIndex, int lockMode,
624 BDDimLayoutArrayAttr dims) {
625 size_t numBlocks = op.size();
626 if (numBlocks == 0)
627 return;
628
629 int acqNum = 1;
630 int relNum = 1;
631
632 auto fifo = llvm::cast<AIEObjectFifoType>(op.getElemType());
633 auto elemType = llvm::cast<MemRefType>(fifo.getElementType());
634 int len = elemType.getNumElements();
635
636 // check for repeat count
637 int repeatCount = 1;
638 if (op.getRepeatCount().has_value())
639 repeatCount = op.getRepeatCount().value();
640
641 // search for the buffers/locks (based on if this objFifo has a link)
642 ObjectFifoCreateOp target = op;
643 if (std::optional<ObjectFifoLinkOp> linkOp = getOptionalLinkOp(op);
644 linkOp.has_value()) {
645 if (objFifoLinks.find(linkOp.value()) != objFifoLinks.end()) {
646 target = objFifoLinks[linkOp.value()];
647 if (target == op) {
648 if (linkOp->getRepeatCount().has_value()) {
649 acqNum *= linkOp->getRepeatCount().value();
650 relNum *= linkOp->getRepeatCount().value();
651 }
652 }
653 }
654 }
655
656 // search for MemOp
657 Operation *producerMem = nullptr;
658 for (auto memOp : device.getOps<MemOp>()) {
659 if (memOp.getTile() == op.getProducerTile()) {
660 producerMem = memOp.getOperation();
661 break;
662 }
663 }
664
665 // if none exists, create one
666 TileOp objFifoTileOp = target.getProducerTileOp();
667 if (producerMem == nullptr) {
668 OpBuilder::InsertionGuard g(builder);
669 builder.setInsertionPoint(device.getBody()->getTerminator());
670 auto newMemOp =
671 builder.create<MemOp>(builder.getUnknownLoc(), objFifoTileOp);
672 {
673 OpBuilder::InsertionGuard g(builder);
674 builder.setInsertionPointToStart(&newMemOp.getRegion().emplaceBlock());
675 builder.create<EndOp>(builder.getUnknownLoc());
676 }
677 producerMem = newMemOp.getOperation();
678 }
679 Block *endBlock = findEndOpBlock(producerMem->getRegion(0));
680 Block *lastDmaBlock = endBlock->getSinglePredecessor();
681 Block *dmaBlock = builder.createBlock(endBlock);
682 Block *bdBlock = builder.createBlock(endBlock);
683
684 // create DMA channel
685 builder.setInsertionPointToStart(dmaBlock);
686 builder.create<DMAStartOp>(builder.getUnknownLoc(), channelDir,
687 channelIndex, /*repeatCout*/ 0, bdBlock,
688 endBlock);
689 if (lastDmaBlock != nullptr)
690 lastDmaBlock->getTerminator()->setSuccessor(dmaBlock, 1);
691
692 // create Bd blocks
693 Block *succ;
694 Block *curr = bdBlock;
695 size_t elemIndex = 0;
696 size_t totalBlocks = 0;
697 for (size_t i = 0; i < numBlocks; i++) {
698 if (elemIndex >= buffersPerFifo[target].size())
699 break;
700 for (int r = 0; r < repeatCount; r++) {
701 if (totalBlocks == numBlocks * repeatCount - 1)
702 succ = bdBlock;
703 else
704 succ = builder.createBlock(endBlock);
705
706 builder.setInsertionPointToStart(curr);
707 createBdBlock<BufferOp>(builder, target, lockMode, acqNum, relNum,
708 buffersPerFifo[target][elemIndex], /*offset*/ 0,
709 len, channelDir, elemIndex, succ, dims,
710 nullptr);
711 curr = succ;
712 totalBlocks++;
713 }
714 elemIndex++;
715 }
716 }
717
718 /// Function used to create a ShimDMAOp region with a DMA channel.
719 /// It uses creatBdBlock(), see there for lockMode input.
720 void createShimDMA(DeviceOp &device, OpBuilder &builder,
721 ObjectFifoCreateOp op, DMAChannelDir channelDir,
722 int channelIndex, int lockMode,
723 BDDimLayoutArrayAttr dims) {
724 size_t numBlocks = externalBuffersPerFifo[op].size();
725 if (numBlocks == 0)
726 return;
727
728 int acqNum = 1;
729 int relNum = 1;
730
731 // search for ShimDMAOp
732 Operation *producerDMA = nullptr;
733 for (auto dmaOp : device.getOps<ShimDMAOp>()) {
734 if (dmaOp.getTile() == op.getProducerTile()) {
735 producerDMA = dmaOp.getOperation();
736 break;
737 }
738 }
739
740 // if none exists, create one
741 TileOp objFifoTileOp = op.getProducerTileOp();
742 if (producerDMA == nullptr) {
743 OpBuilder::InsertionGuard g(builder);
744 builder.setInsertionPoint(device.getBody()->getTerminator());
745 auto newDMAOp = builder.create<ShimDMAOp>(
746 builder.getUnknownLoc(), builder.getIndexType(), objFifoTileOp);
747 {
748 OpBuilder::InsertionGuard g(builder);
749 builder.setInsertionPointToStart(&newDMAOp.getRegion().emplaceBlock());
750 builder.create<EndOp>(builder.getUnknownLoc());
751 }
752 producerDMA = newDMAOp.getOperation();
753 }
754
755 Block *endBlock = findEndOpBlock(producerDMA->getRegion(0));
756 Block *lastDmaBlock = endBlock->getSinglePredecessor();
757 Block *dmaBlock = builder.createBlock(endBlock);
758 Block *bdBlock = builder.createBlock(endBlock);
759
760 // create DMA channel
761 builder.setInsertionPointToStart(dmaBlock);
762 builder.create<DMAStartOp>(builder.getUnknownLoc(), channelDir,
763 channelIndex, /*repeatCout*/ 0, bdBlock,
764 endBlock);
765 if (lastDmaBlock != nullptr)
766 lastDmaBlock->getTerminator()->setSuccessor(dmaBlock, 1);
767
768 // create Bd blocks
769 Block *succ;
770 Block *curr = bdBlock;
771 size_t elemIndex = 0;
772 for (size_t i = 0; i < numBlocks; i++) {
773 if (elemIndex >= externalBuffersPerFifo[op].size())
774 break;
775 if (i == numBlocks - 1)
776 succ = bdBlock;
777 else
778 succ = builder.createBlock(endBlock);
779
780 MemRefType buffer = externalBuffersPerFifo[op][elemIndex].getType();
781 int len = buffer.getNumElements();
782 builder.setInsertionPointToStart(curr);
783 createBdBlock<ExternalBufferOp>(builder, op, lockMode, acqNum, relNum,
784 externalBuffersPerFifo[op][elemIndex],
785 /*offset*/ 0, len, channelDir, elemIndex,
786 succ, dims, nullptr);
787 curr = succ;
788 elemIndex++;
789 }
790 }
791
792 /// Function used to create a MemTileDMAOp region with a DMA channel.
793 /// It uses creatBdBlock(), see there for lockMode input.
794 void createMemTileDMA(DeviceOp &device, OpBuilder &builder,
795 ObjectFifoCreateOp op, DMAChannelDir channelDir,
796 int channelIndex, int lockMode,
797 BDDimLayoutArrayAttr dims,
798 BDPadLayoutArrayAttr padDimensions) {
799 size_t numBlocks = op.size();
800 if (numBlocks == 0)
801 return;
802
803 auto fifo = llvm::cast<AIEObjectFifoType>(op.getElemType());
804 auto elemType = llvm::cast<MemRefType>(fifo.getElementType());
805 int lenOut = elemType.getNumElements();
806 int acqNum = 1;
807 int relNum = 1;
808
809 // check for repeat count
810 int repeatCount = 1;
811 if (op.getRepeatCount().has_value())
812 repeatCount = op.getRepeatCount().value();
813
814 // search for the buffers/locks (based on if this objFifo has a link)
815 // identify size difference between input and output memrefs
816 ObjectFifoCreateOp target = op;
817 bool isDistribute = false;
818 bool isJoin = false;
819 int extraOffset = 0;
820 int joinDistribFactor = 1;
821 int joinDistribLockIndex = 0;
822 auto linkOp = getOptionalLinkOp(op);
823 if (linkOp) {
824 if (objFifoLinks.find(*linkOp) != objFifoLinks.end()) {
825 target = objFifoLinks[*linkOp];
826 auto srcOffsets = linkOp->getSrcOffsets();
827 auto dstOffsets = linkOp->getDstOffsets();
828
829 if (linkOp->getRepeatCount().has_value())
830 if (linkOp->getInputObjectFifos()[0] == op) {
831 acqNum *= linkOp->getRepeatCount().value();
832 relNum *= linkOp->getRepeatCount().value();
833 }
834
835 if (linkOp->isJoin()) {
836 // compute offset and length
837 isJoin = true;
838 if (target == op) {
839 joinDistribFactor *= linkOp->getFifoIns().size();
840 } else {
841 int i = 0;
842 for (auto fifoIn : linkOp->getInputObjectFifos()) {
843 if (fifoIn.name() == op.name())
844 break;
845 i++;
846 }
847 extraOffset = *getConstantIntValue(srcOffsets[i]);
848 lenOut = linkOp->getJoinTransferLengths()[i];
849 joinDistribLockIndex = i;
850 }
851 } else if (linkOp->isDistribute()) {
852 // compute offset and length
853 isDistribute = true;
854 if (target == op) {
855 joinDistribFactor *= linkOp->getFifoOuts().size();
856 } else {
857 int i = 0;
858 for (auto fifoOut : linkOp->getOutputObjectFifos()) {
859 if (fifoOut.name() == op.name())
860 break;
861 i++;
862 }
863 extraOffset = *getConstantIntValue(dstOffsets[i]);
864 lenOut = linkOp->getDistributeTransferLengths()[i];
865 joinDistribLockIndex = i;
866 }
867 } else {
868 if (target != op) {
869 auto targetFifo =
870 llvm::cast<AIEObjectFifoType>(target.getElemType());
871 auto targetElemType =
872 llvm::cast<MemRefType>(targetFifo.getElementType());
873 lenOut = targetElemType.getNumElements();
874 }
875 }
876
877 // check if current op is of smaller size in link
878 if (target != op)
879 numBlocks = target.size();
880 }
881 }
882
883 // search for MemTileDMAOp
884 Operation *producerDMA = nullptr;
885 for (auto dmaOp : device.getOps<MemTileDMAOp>()) {
886 if (dmaOp.getTile() == target.getProducerTile()) {
887 producerDMA = dmaOp.getOperation();
888 break;
889 }
890 }
891
892 // if none exists, create one
893 TileOp objFifoTileOp = target.getProducerTileOp();
894 if (producerDMA == nullptr) {
895 OpBuilder::InsertionGuard g(builder);
896 builder.setInsertionPoint(device.getBody()->getTerminator());
897 auto newDMAOp =
898 builder.create<MemTileDMAOp>(builder.getUnknownLoc(), objFifoTileOp);
899 {
900 OpBuilder::InsertionGuard g(builder);
901 builder.setInsertionPointToStart(&newDMAOp.getRegion().emplaceBlock());
902 builder.create<EndOp>(builder.getUnknownLoc());
903 }
904 producerDMA = newDMAOp.getOperation();
905 }
906
907 Block *endBlock = findEndOpBlock(producerDMA->getRegion(0));
908 Block *lastDmaBlock = endBlock->getSinglePredecessor();
909 Block *dmaBlock = builder.createBlock(endBlock);
910 Block *bdBlock = builder.createBlock(endBlock);
911
912 // create DMA channel
913 builder.setInsertionPointToStart(dmaBlock);
914 builder.create<DMAStartOp>(builder.getUnknownLoc(), channelDir,
915 channelIndex, /*repeatCout*/ 0, bdBlock,
916 endBlock);
917 if (lastDmaBlock != nullptr)
918 lastDmaBlock->getTerminator()->setSuccessor(dmaBlock, 1);
919
920 // create Bd blocks
921 Block *succ;
922 Block *curr = bdBlock;
923 size_t elemIndex = 0;
924 size_t lockIndex = 0;
925 size_t totalBlocks = 0;
926 bool distribOrJoin = false;
927 for (size_t i = 0; i < numBlocks; i++) {
928 if (elemIndex >= buffersPerFifo[target].size())
929 break;
930 for (int r = 0; r < repeatCount * joinDistribFactor; r++) {
931 if (totalBlocks == numBlocks * repeatCount * joinDistribFactor - 1)
932 succ = bdBlock;
933 else
934 succ = builder.createBlock(endBlock);
935
936 builder.setInsertionPointToStart(curr);
937 int offset = 0;
938 if (isDistribute || isJoin) {
939 distribOrJoin = true;
940 if (target == op) {
941 if (isDistribute) {
942 offset = *getConstantIntValue(linkOp->getDstOffsets()[r]);
943 lenOut = linkOp->getDistributeTransferLengths()[r];
944 } else {
945 offset = *getConstantIntValue(linkOp->getSrcOffsets()[r]);
946 lenOut = linkOp->getJoinTransferLengths()[r];
947 }
948 lockIndex = r % joinDistribFactor;
949 } else {
950 offset = extraOffset;
951 lockIndex = joinDistribLockIndex;
952 }
953 } else {
954 lockIndex = elemIndex;
955 }
956 createBdBlock<BufferOp>(builder, target, lockMode, acqNum, relNum,
957 buffersPerFifo[target][elemIndex], offset,
958 lenOut, channelDir, lockIndex, succ, dims,
959 padDimensions, distribOrJoin);
960 curr = succ;
961 totalBlocks++;
962 }
963 elemIndex++;
964 }
965 }
966
967 // Function that computes the Least Common Multiplier of the values
968 // of a vector.
969 int computeLCM(std::set<int> values) {
970 int lcm = 1;
971 for (int i : values)
972 lcm = i * lcm / std::gcd(i, lcm);
973 return lcm;
974 }
975
976 // Function that unrolls for-loops that contain objectFifo operations.
977 LogicalResult unrollForLoops(DeviceOp &device, OpBuilder &builder,
978 std::set<TileOp> objectFifoTiles) {
979 for (auto coreOp : device.getOps<CoreOp>()) {
980 if (objectFifoTiles.count(coreOp.getTileOp()) > 0) {
981 std::vector<scf::ForOp> unrolledLoops;
982 std::map<Operation *, bool> foundMap;
983 std::map<Operation *, int64_t> remainderMap;
984 std::map<Operation *, int64_t> tripCountMap;
985 WalkResult res = coreOp.walk([&](scf::ForOp forLoop) {
986 // look for operations on objectFifos
987 // when multiple fifos in same loop, must use the smallest
988 // common multiplier as the unroll factor
989 foundMap[forLoop.getOperation()] = false;
990 std::set<int> objFifoSizes;
991 Block *body = forLoop.getBody();
992 remainderMap[forLoop.getOperation()] = 0;
993 for (auto acqOp : body->getOps<ObjectFifoAcquireOp>()) {
994 if (acqOp.getOperation()->getParentOp() == forLoop) {
995 foundMap[forLoop.getOperation()] = true;
996 ObjectFifoCreateOp op = acqOp.getObjectFifo();
997 objFifoSizes.insert(op.size());
998 }
999 }
1000 // If the loop doesn't have acquire and release locks
1001 // Push it to the unrolledLoops to avoid unrolling
1002 if (!foundMap[forLoop.getOperation()]) {
1003 unrolledLoops.push_back(forLoop);
1004 return WalkResult::advance();
1005 }
1006 // Walk in the loop region to unroll the loop and its remainder
1007 Region *region = forLoop->getParentRegion();
1008 scf::ForOp prevLoop;
1009 prevLoop = forLoop;
1010 tripCountMap[prevLoop.getOperation()] = 0;
1011 while (remainderMap[prevLoop.getOperation()] > 1 ||
1012 foundMap[prevLoop.getOperation()]) {
1013 region->walk([&](scf::ForOp remLoop) {
1014 bool skipLoop = false;
1015 int64_t tripCount = 0;
1016 if (remLoop.getSingleLowerBound() &&
1017 remLoop.getSingleUpperBound() && remLoop.getSingleStep()) {
1018 tripCount = constantTripCount(*(remLoop.getSingleLowerBound()),
1019 *(remLoop.getSingleUpperBound()),
1020 *(remLoop.getSingleStep()))
1021 .value_or(0);
1022 }
1023 int unrollFactor =
1024 computeLCM(objFifoSizes); // also counts original loop body
1025 // Loop ids are not unique.
1026 // Sometimes, immediately after unrolling, the unrolled loop
1027 // and the one next to it (can be the remainder loop or an
1028 // independent loop) will have the same ID. This makes it
1029 // difficult to identify which loop needs to be unrolled.
1030 // Once it restarts walking from start, it ends up allocating
1031 // new ID to each loop.
1032 if (remainderMap[prevLoop.getOperation()] > 1 &&
1033 foundMap[remLoop.getOperation()] == false &&
1034 prevLoop != remLoop) {
1035 skipLoop = true;
1036 }
1037 if (std::count(unrolledLoops.begin(), unrolledLoops.end(),
1038 remLoop) == 0 &&
1039 !skipLoop) {
1040 tripCountMap[remLoop.getOperation()] = tripCount;
1041 // if loop iterations < unrollFactor, unroll the loop fully
1042 if (tripCountMap[remLoop.getOperation()] < unrollFactor)
1043 unrollFactor = tripCountMap[remLoop.getOperation()];
1044 // If unrollFactor = 0,divide by zero
1045 if (unrollFactor == 0) {
1046 remLoop.emitOpError()
1047 << "could not be unrolled with unrollFactor = 0, check "
1048 "loop boundaries."
1049 << "\n";
1050 return WalkResult::interrupt();
1051 }
1052 remainderMap[remLoop.getOperation()] =
1053 tripCountMap[remLoop.getOperation()] % unrollFactor;
1054 auto step = remLoop.getStep()
1055 .getDefiningOp<arith::ConstantOp>()
1056 .getValue();
1057 int64_t step_value = llvm::dyn_cast<IntegerAttr>(step).getInt();
1058
1059 if (step_value < unrollFactor ||
1060 foundMap[remLoop.getOperation()]) {
1061 // Process the for loop
1062 if (failed(mlir::loopUnrollByFactor(remLoop, unrollFactor))) {
1063 remLoop.emitOpError()
1064 << "could not be unrolled with unrollFactor: "
1065 << unrollFactor << "\n";
1066 return WalkResult::interrupt();
1067 }
1068 unrolledLoops.push_back(remLoop);
1069 foundMap[remLoop.getOperation()] = false;
1070 } else {
1071 remainderMap[remLoop.getOperation()] = 0;
1072 foundMap[remLoop.getOperation()] = false;
1073 }
1074 } else {
1075 remainderMap[remLoop.getOperation()] = 0;
1076 foundMap[remLoop.getOperation()] = false;
1077 }
1078 prevLoop = remLoop;
1079 return WalkResult::advance();
1080 });
1081 }
1082 return WalkResult::advance();
1083 });
1084 if (res.wasInterrupted())
1085 return failure();
1086 }
1087 }
1088 return success();
1089 }
1090
1091 // Function that generates the IR to update runtime state of objectfifo
1092 // accesses. Called by dynamicGlobalObjectFifos().
1093 void updateGlobalNextIndex(OpBuilder &builder, ObjectFifoReleaseOp relOp,
1094 BufferOp globalNextIndex, arith::ConstantOp index,
1095 arith::ConstantOp size) {
1096 builder.setInsertionPointAfter(relOp);
1097 Value oldCounter = builder.create<memref::LoadOp>(
1098 builder.getUnknownLoc(), globalNextIndex,
1099 ValueRange(ArrayRef({index.getResult()})));
1100 Value val = builder.create<arith::ConstantOp>(
1101 oldCounter.getLoc(), builder.getI32IntegerAttr(relOp.getSize()));
1102 Value sum = builder.create<arith::AddIOp>(val.getLoc(), oldCounter, val);
1103 Value isGreaterEqual = builder.create<arith::CmpIOp>(
1104 sum.getLoc(), arith::CmpIPredicate::sge, sum, size);
1105 Value newCounter = builder.create<arith::SelectOp>(
1106 sum.getLoc(), isGreaterEqual,
1107 builder.create<arith::SubIOp>(sum.getLoc(), sum, size), sum);
1108 builder.create<memref::StoreOp>(size.getLoc(), newCounter, globalNextIndex,
1109 ValueRange(ArrayRef({index.getResult()})));
1110 }
1111
1112 // Function that generates the IR for objectfifo accesses to be handled at
1113 // runtime.
1114 LogicalResult dynamicGlobalObjectFifos(DeviceOp &device, OpBuilder &builder,
1115 std::set<TileOp> objectFifoTiles) {
1116 for (auto coreOp : device.getOps<CoreOp>()) {
1117 if (objectFifoTiles.count(coreOp.getTileOp()) <= 0)
1118 continue;
1119 if (objectFifoTiles.count(coreOp.getTileOp()) > 0) {
1120 // For each core: count the number of objectFifos and create
1121 // a global buffer just before the core to track index of
1122 // next object to access.
1123 // !! NOTE !! objectFifos with same producer / consumer tile
1124 // need two counters (accessed based on the ObjectFifoPort)
1125 std::map<std::pair<ObjectFifoCreateOp, ObjectFifoPort>, int> fifoSizes;
1126 // Also, keep a map of the ConstantOps for the indices per OF
1127 // and a map with the ConstantOps for the sizes per OF.
1128 std::map<std::pair<ObjectFifoCreateOp, ObjectFifoPort>,
1129 arith::ConstantOp>
1130 globalIndices;
1131 std::map<std::pair<ObjectFifoCreateOp, ObjectFifoPort>,
1132 arith::ConstantOp>
1133 constantSizes;
1134
1135 int index = 0;
1136 builder.setInsertionPointToStart(&(coreOp.getBody().front()));
1137 Value initVal = builder.create<arith::ConstantOp>(
1138 builder.getUnknownLoc(), builder.getI32IntegerAttr(0));
1139 coreOp.walk([&](ObjectFifoAcquireOp acqOp) {
1140 ObjectFifoCreateOp op = acqOp.getObjectFifo();
1141 ObjectFifoPort port = acqOp.getPort();
1142 if (fifoSizes.find({op, port}) == fifoSizes.end()) {
1143 fifoSizes[{op, port}] = op.size();
1144 auto indexOp = builder.create<arith::ConstantOp>(
1145 initVal.getLoc(), builder.getIndexAttr(index));
1146 globalIndices[{op, port}] = indexOp;
1147 index++;
1148 auto size = builder.create<arith::ConstantOp>(
1149 indexOp.getLoc(), builder.getI32IntegerAttr(op.size()));
1150 constantSizes[{op, port}] = size;
1151 }
1152 });
1153 builder.setInsertionPoint(coreOp);
1154 auto memrefTy =
1155 MemRefType::get(SmallVector<int64_t>{(int64_t)fifoSizes.size()},
1156 builder.getI32Type());
1157 auto globalNextIndex = builder.create<BufferOp>(
1158 builder.getUnknownLoc(), memrefTy, coreOp.getTile(),
1159 /*sym_name*/ nullptr, /*address*/ nullptr,
1160 /*initial_value*/ nullptr, /*mem_bank*/ nullptr);
1161
1162 // Initialize all counters in the global buffers to 0.
1163 for (auto i : constantSizes) {
1164 builder.setInsertionPointAfter(i.second);
1165 builder.create<memref::StoreOp>(
1166 builder.getUnknownLoc(), initVal, globalNextIndex,
1167 ValueRange(ArrayRef({globalIndices[i.first].getResult()})));
1168 }
1169
1170 // Walk the code:
1171 // - after each ObjectFifoReleaseOp:
1172 // - globalNextIndex: add #rel modulo objfifo depth
1173 // - before each ObjectFifoAcquireOp:
1174 // - globalNextIndex: load index and use it to index_switch (one
1175 // IndexSwithOp per AccessOp)
1176 WalkResult res = coreOp.walk([&](Operation *op) {
1177 if (auto relOp = dyn_cast<ObjectFifoReleaseOp>(op)) {
1178 ObjectFifoCreateOp createOp = relOp.getObjectFifo();
1179 ObjectFifoPort port = relOp.getPort();
1180 updateGlobalNextIndex(builder, relOp, globalNextIndex,
1181 globalIndices[{createOp, port}],
1182 constantSizes[{createOp, port}]);
1183 }
1184 if (auto acqOp = dyn_cast<ObjectFifoAcquireOp>(op)) {
1185 std::vector<ObjectFifoSubviewAccessOp> accessOps;
1186 for (auto u : acqOp->getUsers())
1187 if (auto accessOp = dyn_cast<ObjectFifoSubviewAccessOp>(u))
1188 accessOps.push_back(accessOp);
1189
1190 for (auto accessOp : accessOps) {
1191 ObjectFifoCreateOp createOp = acqOp.getObjectFifo();
1192 ObjectFifoPort port = acqOp.getPort();
1193
1194 // Single switch case
1195 if (fifoSizes[{createOp, port}] == 1)
1196 return WalkResult::advance();
1197
1198 // Create a switch for each subview access
1199 builder.setInsertionPointAfter(accessOp);
1200 auto switchIndexAsInteger = builder.create<memref::LoadOp>(
1201 builder.getUnknownLoc(), globalNextIndex,
1202 ValueRange(
1203 ArrayRef({globalIndices[{createOp, port}].getResult()})));
1204 auto switchIndex = builder.create<arith::IndexCastOp>(
1205 builder.getUnknownLoc(), builder.getIndexType(),
1206 switchIndexAsInteger);
1207 unsigned caseRegionCounts = fifoSizes[{createOp, port}];
1208 SmallVector<int64_t, 4> caseValues;
1209 for (int i = 0; i < fifoSizes[{createOp, port}]; ++i) {
1210 caseValues.push_back(i);
1211 }
1212 auto cases =
1213 DenseI64ArrayAttr::get(builder.getContext(), caseValues);
1214 auto switchOp = builder.create<scf::IndexSwitchOp>(
1215 switchIndex.getLoc(),
1216 TypeRange({buffersPerFifo[createOp][0].getType()}),
1217 switchIndex, cases, caseRegionCounts);
1218 // Create default case of IndexSwitchOp
1219 builder.createBlock(&switchOp.getDefaultRegion());
1220 auto bufferIndex = (accessOp.getIndex()) % createOp.size();
1221 builder.setInsertionPointToStart(&(switchOp.getDefaultBlock()));
1222 builder.create<scf::YieldOp>(
1223 builder.getUnknownLoc(),
1224 buffersPerFifo[createOp][bufferIndex].getResult());
1225 for (int i = 0; i < fifoSizes[{createOp, port}]; ++i) {
1226 // Create other cases of IndexSwitchOp
1227 builder.createBlock(&switchOp.getCaseRegions()[i]);
1228 builder.setInsertionPoint(&switchOp.getCaseBlock(i),
1229 switchOp.getCaseBlock(i).begin());
1230 int bufferToBeAccesed =
1231 (accessOp.getIndex() + i) % fifoSizes[{createOp, port}];
1232 builder.create<scf::YieldOp>(
1233 switchOp.getCaseRegions()[i].getLoc(),
1234 buffersPerFifo[createOp][bufferToBeAccesed].getResult());
1235 }
1236
1237 // Replace all uses of accessed objectfifo buffers with
1238 // results of switchOps
1239 accessOp.getOutput().replaceAllUsesWith(switchOp.getResult(0));
1240 }
1241 }
1242 return WalkResult::advance();
1243 });
1244 if (res.wasInterrupted())
1245 return failure();
1246 }
1247 }
1248 return success();
1249 }
1250
1251 /// Function used to create a UseLockOp based on input parameters.
1252 /// acc is an accumulator map that tracks the indices of the next locks to
1253 /// acquire (or release). Uses op to find index of acc for next lockID.
1254 /// Updates acc.
1255 void createUseLocks(OpBuilder &builder, ObjectFifoCreateOp op,
1256 ObjectFifoPort port,
1257 DenseMap<std::pair<ObjectFifoCreateOp, int>, int> &acc,
1258 int numLocks, LockAction lockAction) {
1259 ObjectFifoCreateOp target = op;
1260 auto portNum = port == ObjectFifoPort::Produce ? 0 : 1;
1261 if (auto linkOp = getOptionalLinkOp(op))
1262 if (objFifoLinks.find(*linkOp) != objFifoLinks.end())
1263 target = objFifoLinks[*linkOp];
1264
1265 auto dev = op->getParentOfType<DeviceOp>();
1266 if (!dev.getTargetModel().hasProperty(AIETargetModel::UsesSemaphoreLocks)) {
1267
1268 if (locksPerFifo[target].size() == 0) {
1269 for (int i = 0; i < numLocks; i++) {
1270 int lockID = acc[{op, portNum}];
1271 acc[{op, portNum}] =
1272 (lockID + 1) % op.size(); // update to next objFifo elem
1273 }
1274 return;
1275 }
1276
1277 int lockMode = 0;
1278 if ((port == ObjectFifoPort::Produce &&
1279 lockAction == LockAction::Release) ||
1280 (port == ObjectFifoPort::Consume &&
1281 lockAction == LockAction::Acquire))
1282 lockMode = 1;
1283 for (int i = 0; i < numLocks; i++) {
1284 int lockID = acc[{op, portNum}];
1285 builder.create<UseLockOp>(builder.getUnknownLoc(),
1286 locksPerFifo[target][lockID], lockAction,
1287 lockMode);
1288 acc[{op, portNum}] =
1289 (lockID + 1) % op.size(); // update to next objFifo elem
1290 }
1291 } else {
1292 if (numLocks == 0)
1293 return;
1294
1295 if (locksPerFifo[target].size() == 0) {
1296 acc[{op, portNum}] = (acc[{op, portNum}] + numLocks) %
1297 op.size(); // update to next objFifo elem
1298 return;
1299 }
1300
1301 // search for the correct lock based on the port of the acq/rel
1302 // operation e.g. acq as consumer is the read lock (second)
1303 LockOp lock;
1304 if (lockAction == LockAction::AcquireGreaterEqual) {
1305 if (port == ObjectFifoPort::Produce)
1306 lock = locksPerFifo[target][0];
1307 else
1308 lock = locksPerFifo[target][1];
1309 } else {
1310 if (port == ObjectFifoPort::Produce)
1311 lock = locksPerFifo[target][1];
1312 else
1313 lock = locksPerFifo[target][0];
1314 }
1315 builder.create<UseLockOp>(builder.getUnknownLoc(), lock, lockAction,
1316 numLocks);
1317 acc[{op, portNum}] = (acc[{op, portNum}] + numLocks) %
1318 op.size(); // update to next objFifo elem
1319 }
1320 }
1321
1322 /// Function used to check whether op is already contained in map.
1323 /// If it is then return the associated int, if not create new entry and
1324 /// return 0.
1326 DenseMap<std::pair<ObjectFifoCreateOp, int>, int> &map,
1327 std::pair<ObjectFifoCreateOp, int> pair) {
1328 if (map.find(pair) == map.end()) {
1329 map[pair] = 0;
1330 return 0;
1331 }
1332 return map[pair];
1333 }
1334
1335 /// Function used to add an external buffer to the externalBuffersPerFifo map.
1336 void addExternalBuffer(ObjectFifoCreateOp fifo, ExternalBufferOp buff) {
1337 if (externalBuffersPerFifo.find(fifo) == externalBuffersPerFifo.end()) {
1338 std::vector<ExternalBufferOp> buffs;
1339 externalBuffersPerFifo[fifo] = buffs;
1340 }
1341 externalBuffersPerFifo[fifo].push_back(buff);
1342 }
1343
1344 /// Function used to detect all external buffers associated with parent
1345 /// objectFifo and tile then map them to child objectFifo.
1346 void detectExternalBuffers(DeviceOp &device, ObjectFifoCreateOp parent,
1347 ObjectFifoCreateOp child, Value tile) {
1348 for (auto regOp : device.getOps<ObjectFifoRegisterExternalBuffersOp>())
1349 if (auto objFifo = regOp.getObjectFifo();
1350 regOp.getTile() == tile && objFifo == parent)
1351 for (auto extBuff : regOp.getExternalBuffers())
1352 addExternalBuffer(child, extBuff.getDefiningOp<ExternalBufferOp>());
1353 }
1354
1355 /// Function used to replace uses of split objectFifos.
1356 void replaceSplitFifo(ObjectFifoCreateOp originalOp, ObjectFifoCreateOp newOp,
1357 TileOp tile) {
1358 auto original =
1359 originalOp->getAttrOfType<StringAttr>(SymbolTable::getSymbolAttrName());
1360 auto newSymbol =
1361 newOp->getAttrOfType<StringAttr>(SymbolTable::getSymbolAttrName());
1362 for (auto user : tile->getUsers())
1363 if (isa<CoreOp>(user))
1364 if (auto res =
1365 SymbolTable::replaceAllSymbolUses(original, newSymbol, user);
1366 res.failed())
1367 llvm_unreachable("unreachable");
1368 }
1369
1370 /// Function used to find the size of an objectFifo after split based on
1371 /// the maximum number of elements (of the original objectFifo) acquired
1372 /// by a process running on given tile. If no CoreOp exists for this tile
1373 /// return 0.
1374 int findObjectFifoSize(DeviceOp &device, Value tile,
1375 ObjectFifoCreateOp objFifo) {
1376 if (objFifo.size() == 0)
1377 return 0;
1378
1379 // if memTile, size is equal to objFifo size
1380 if (tile.getDefiningOp<TileOp>().isMemTile())
1381 return objFifo.size();
1382
1383 // if shimTile, size is equal to number of external buffers
1384 if (tile.getDefiningOp<TileOp>().isShimTile())
1385 for (auto regOp : device.getOps<ObjectFifoRegisterExternalBuffersOp>()) {
1386 if (regOp.getTile() == tile)
1387 return regOp.getExternalBuffers().size();
1388 }
1389
1390 int maxAcquire = 0;
1391 for (auto coreOp : device.getOps<CoreOp>())
1392 if (coreOp.getTile() == tile)
1393 coreOp.walk([&](ObjectFifoAcquireOp acqOp) {
1394 if (auto createOp = acqOp.getObjectFifo(); createOp == objFifo)
1395 if (acqOp.acqNumber() > maxAcquire)
1396 maxAcquire = acqOp.acqNumber();
1397 });
1398
1399 if (maxAcquire > 0) {
1400 if (maxAcquire == 1 && objFifo.size() == 1)
1401 return 1;
1402 return maxAcquire + 1;
1403 // +1 because objectFifo size is always 1 bigger than maxAcquire to allow
1404 // for prefetching: simplest case scenario is at least a ping-pong buffer
1405 }
1406
1407 return objFifo.size();
1408 }
1409
1410 /// Function used to generate, from an objectFifo with a shimTile endpoint, a
1411 /// shimDMAAllocationOp containing the channelDir, channelIndex and
1412 /// shimTile col assigned by the objectFifo lowering.
1413 void createObjectFifoAllocationInfo(OpBuilder &builder, MLIRContext *ctx,
1414 FlatSymbolRefAttr obj_fifo, int colIndex,
1415 DMAChannelDir channelDir,
1416 int channelIndex, bool plio) {
1417 builder.create<ShimDMAAllocationOp>(builder.getUnknownLoc(), obj_fifo,
1418 DMAChannelDirAttr::get(ctx, channelDir),
1419 builder.getI64IntegerAttr(channelIndex),
1420 builder.getI64IntegerAttr(colIndex),
1421 builder.getBoolAttr(plio));
1422 }
1423
1424 /// Function used to verify that an objectfifo is present in at most one
1425 /// ObjectFifoLinkOp.
1426 void verifyObjectFifoLinks(DeviceOp &device) {
1427 DenseSet<ObjectFifoCreateOp> objectfifoset;
1428 for (ObjectFifoLinkOp link : device.getOps<ObjectFifoLinkOp>()) {
1429 for (ObjectFifoCreateOp inOf : link.getInputObjectFifos()) {
1430 if (objectfifoset.count(inOf))
1431 inOf.emitOpError("objectfifo cannot be in more than one "
1432 "ObjectFifoLinkOp");
1433 objectfifoset.insert(inOf);
1434 }
1435 for (ObjectFifoCreateOp outOf : link.getOutputObjectFifos()) {
1436 if (objectfifoset.count(outOf))
1437 outOf.emitOpError("objectfifo cannot be in more than one "
1438 "ObjectFifoLinkOp");
1439 objectfifoset.insert(outOf);
1440 }
1441 }
1442 }
1443
1444 void runOnOperation() override {
1445 DeviceOp device = getOperation();
1446 LockAnalysis lockAnalysis(device);
1447 DMAChannelAnalysis dmaAnalysis(device);
1448 OpBuilder builder = OpBuilder::atBlockTerminator(device.getBody());
1449 auto ctx = device->getContext();
1450 auto producerWireType = WireBundle::DMA;
1451 auto consumerWireType = WireBundle::DMA;
1452 std::set<TileOp>
1453 objectFifoTiles; // track cores to check for loops during unrolling
1454
1455 verifyObjectFifoLinks(device);
1456
1457 //===------------------------------------------------------------------===//
1458 // Split objectFifos into a consumer end and producer end if needed
1459 //===------------------------------------------------------------------===//
1460 // We are going to create additional createObjectFifoOps, so get a copy of
1461 // all "original" ones before the loop to avoid looping over newly created
1462 // ones.
1463 std::vector<ObjectFifoCreateOp> createFifoOps;
1464 auto range = device.getOps<ObjectFifoCreateOp>();
1465 createFifoOps.insert(createFifoOps.end(), range.begin(), range.end());
1466 for (auto createOp : createFifoOps) {
1467 std::vector<ObjectFifoCreateOp> splitConsumerFifos;
1468 int consumerIndex = 0;
1469 int consumerDepth = createOp.size();
1470 ArrayRef<BDDimLayoutArrayAttr> consumerDims =
1471 createOp.getDimensionsFromStreamPerConsumer();
1472
1473 // Only FIFOs using DMA are split into two ends;
1474 // skip in shared memory case
1475 if (int share_direction = 0; !requiresDMAs(createOp, share_direction)) {
1476 continue;
1477 }
1478
1479 for (auto consumerTile : createOp.getConsumerTiles()) {
1480 auto consumerTileOp = dyn_cast<TileOp>(consumerTile.getDefiningOp());
1481
1482 if (isa<ArrayAttr>(createOp.getElemNumber())) {
1483 // +1 to account for 1st depth (producer)
1484 consumerDepth = createOp.size(consumerIndex + 1);
1485 } else {
1486 consumerDepth = findObjectFifoSize(device, consumerTileOp, createOp);
1487 }
1488
1489 builder.setInsertionPointAfter(createOp);
1490 auto datatype = llvm::cast<AIEObjectFifoType>(createOp.getElemType());
1491 auto consumerObjFifoSize =
1492 builder.getIntegerAttr(builder.getI32Type(), consumerDepth);
1493 // rename and replace split objectFifo
1494 std::string consumerFifoName;
1495 if (createOp.getConsumerTiles().size() > 1) {
1496 consumerFifoName = createOp.name().str() + "_" +
1497 std::to_string(consumerIndex) + "_cons";
1498 } else {
1499 consumerFifoName = createOp.name().str() + "_cons";
1500 }
1501 BDDimLayoutArrayAttr emptyDims =
1502 BDDimLayoutArrayAttr::get(builder.getContext(), {});
1503 BDDimLayoutArrayAttr singletonFromStreamDims =
1504 BDDimLayoutArrayAttr::get(
1505 builder.getContext(),
1506 ArrayRef<BDDimLayoutAttr>{consumerDims[consumerIndex]});
1507 BDDimLayoutArrayArrayAttr fromStreamDims =
1508 BDDimLayoutArrayArrayAttr::get(builder.getContext(),
1509 singletonFromStreamDims);
1510
1511 ObjectFifoCreateOp consumerFifo = createObjectFifo(
1512 builder, datatype, consumerFifoName, consumerTile, consumerTile,
1513 consumerObjFifoSize, emptyDims, fromStreamDims);
1514 if (createOp.getDisableSynchronization())
1515 consumerFifo.setDisableSynchronization(true);
1516 replaceSplitFifo(createOp, consumerFifo, consumerTileOp);
1517
1518 // identify external buffers that were registered to the consumer fifo
1519 if (consumerTile.getDefiningOp<TileOp>().isShimTile())
1520 detectExternalBuffers(device, createOp, consumerFifo, consumerTile);
1521
1522 // record that this objectFifo was split; it will require DMA config
1523 splitConsumerFifos.push_back(consumerFifo);
1524
1525 // update the linkOp if the split objFifo was originally its start point
1526 if (auto linkOp = getOptionalLinkOp(createOp))
1527 for (ObjectFifoCreateOp fifoIn : linkOp->getInputObjectFifos())
1528 if (fifoIn.name() == createOp.name() &&
1529 consumerTile == *linkOp->getOptionalSharedTile())
1530 if (failed(SymbolTable::replaceAllSymbolUses(
1531 createOp, consumerFifo.name(), linkOp->getOperation())))
1532 llvm::report_fatal_error("unable to update all symbol uses");
1533
1534 consumerIndex++;
1535 }
1536
1537 if (!splitConsumerFifos.empty()) {
1538 splitFifos.emplace_back(createOp, splitConsumerFifos);
1539 }
1540 }
1541
1542 //===------------------------------------------------------------------===//
1543 // - Create objectFifo buffers and locks.
1544 // - Populate a list of tiles containing objectFifos for later processing of
1545 // the acquires/releases (uses of the FIFO).
1546 // - Global release counter tracker to keep track of the objectFifo state
1547 //===------------------------------------------------------------------===//
1548
1549 for (auto createOp : device.getOps<ObjectFifoCreateOp>()) {
1550 int share_direction = 0;
1551 bool shared = !requiresDMAs(createOp, share_direction);
1552
1553 // add all tiles that contain an objectFifo to objectFifoTiles for later
1554 // loop unrolling pass
1555 objectFifoTiles.insert(createOp.getProducerTileOp());
1556 for (auto consumerTile : createOp.getConsumerTiles()) {
1557 auto consumerTileOp = dyn_cast<TileOp>(consumerTile.getDefiningOp());
1558 objectFifoTiles.insert(consumerTileOp);
1559 }
1560
1561 // identify external buffers that were registered to
1562 // the producer objectFifo
1563 if (createOp.getProducerTileOp().isShimTile())
1564 detectExternalBuffers(device, createOp, createOp,
1565 createOp.getProducerTile());
1566
1567 // if split, the necessary size for producer fifo might change
1568 if (shared) {
1569 checkAndApplyViaSharedMemAttribute(createOp, share_direction);
1570 createObjectFifoElements(builder, lockAnalysis, createOp,
1571 share_direction);
1572 } else {
1573 if (createOp.getViaSharedMem().has_value())
1574 createOp->emitOpError(
1575 "no access to shared memory module specified by "
1576 "`via_shared_mem`");
1577
1578 if (isa<ArrayAttr>(createOp.getElemNumber()))
1579 createOp.setElemNumberAttr(
1580 builder.getI32IntegerAttr(createOp.size()));
1581 else {
1582 if (!createOp.getInitValues().has_value()) {
1583 int prodMaxAcquire = findObjectFifoSize(
1584 device, createOp.getProducerTileOp(), createOp);
1585 createOp.setElemNumberAttr(
1586 builder.getI32IntegerAttr(prodMaxAcquire));
1587 }
1588 }
1589 createObjectFifoElements(builder, lockAnalysis, createOp,
1590 share_direction);
1591 }
1592 }
1593
1594 //===------------------------------------------------------------------===//
1595 // Create flows and tile DMAs
1596 //===------------------------------------------------------------------===//
1597 // Only the objectFifos we split above require DMA communication; the others
1598 // rely on shared memory and share the same buffers.
1599 for (auto &[producer, consumers] : splitFifos) {
1600 // create producer tile DMA
1601 int producerChanIndex = dmaAnalysis.getDMAChannelIndex(
1602 producer.getProducerTileOp(), DMAChannelDir::MM2S);
1603 if (producerChanIndex == -1)
1604 producer.getProducerTileOp().emitOpError(
1605 "number of output DMA channel exceeded!");
1606 DMAChannel producerChan = {DMAChannelDir::MM2S, producerChanIndex};
1607 createDMA(device, builder, producer, producerChan.direction,
1608 producerChan.channel, 0, producer.getDimensionsToStreamAttr(),
1609 producer.getPadDimensionsAttr());
1610 // generate objectFifo allocation info
1611 builder.setInsertionPoint(device.getBody()->getTerminator());
1612
1613 if (producer.getProducerTileOp().isShimTile())
1614 createObjectFifoAllocationInfo(
1615 builder, ctx, SymbolRefAttr::get(ctx, producer.getName()),
1616 producer.getProducerTileOp().colIndex(), producerChan.direction,
1617 producerChan.channel, producer.getPlio());
1618
1619 for (auto consumer : consumers) {
1620
1621 // create consumer tile DMA
1622 int consumerChanIndex = dmaAnalysis.getDMAChannelIndex(
1623 consumer.getProducerTileOp(), DMAChannelDir::S2MM);
1624 if (consumerChanIndex == -1)
1625 consumer.getProducerTileOp().emitOpError(
1626 "number of input DMA channel exceeded!");
1627 DMAChannel consumerChan = {DMAChannelDir::S2MM, consumerChanIndex};
1628 BDDimLayoutArrayAttr consumerDims =
1629 consumer.getDimensionsFromStreamPerConsumer()[0];
1630 createDMA(device, builder, consumer, consumerChan.direction,
1631 consumerChan.channel, 1, consumerDims, nullptr);
1632 // generate objectFifo allocation info
1633 builder.setInsertionPoint(device.getBody()->getTerminator());
1634
1635 // If we have PLIO then figure out the direction and make that a PLIO
1636 if (producer.getPlio()) {
1637 producerWireType = producer.getProducerTileOp().isShimTile()
1638 ? WireBundle::PLIO
1639 : WireBundle::DMA;
1640 consumerWireType = consumer.getProducerTileOp().isShimTile()
1641 ? WireBundle::PLIO
1642 : WireBundle::DMA;
1643 } else {
1644 producerWireType = WireBundle::DMA;
1645 consumerWireType = WireBundle::DMA;
1646 }
1647
1648 if (consumer.getProducerTileOp().isShimTile())
1649 createObjectFifoAllocationInfo(
1650 builder, ctx, SymbolRefAttr::get(ctx, producer.getName()),
1651 consumer.getProducerTileOp().colIndex(), consumerChan.direction,
1652 consumerChan.channel, producer.getPlio());
1653
1654 // create flow
1655 builder.setInsertionPointAfter(producer);
1656 builder.create<FlowOp>(builder.getUnknownLoc(),
1657 producer.getProducerTile(), producerWireType,
1658 producerChan.channel, consumer.getProducerTile(),
1659 consumerWireType, consumerChan.channel);
1660 }
1661 }
1662
1663 //===------------------------------------------------------------------===//
1664 // Statically unroll for loops or use dynamic objectFifos
1665 //===------------------------------------------------------------------===//
1666 if (clDynamicObjectFifos) {
1667 if (failed(dynamicGlobalObjectFifos(device, builder, objectFifoTiles)))
1668 signalPassFailure();
1669 } else {
1670 std::set<TileOp> dynamicTiles;
1671 std::set<TileOp> unrollTiles;
1672 for (auto c : device.getOps<CoreOp>()) {
1673 TileOp t = c.getTileOp();
1674 if (objectFifoTiles.count(t) > 0) {
1675 if (c.getDynamicObjfifoLowering().has_value()) {
1676 if (c.getDynamicObjfifoLowering().value())
1677 dynamicTiles.insert(t);
1678 else
1679 unrollTiles.insert(t);
1680 } else {
1681 unrollTiles.insert(t);
1682 }
1683 }
1684 }
1685 if (failed(dynamicGlobalObjectFifos(device, builder, dynamicTiles)))
1686 signalPassFailure();
1687 if (failed(unrollForLoops(device, builder, unrollTiles)))
1688 signalPassFailure();
1689 }
1690
1691 //===------------------------------------------------------------------===//
1692 // Replace ops
1693 //===------------------------------------------------------------------===//
1694 for (auto coreOp : device.getOps<CoreOp>()) {
1695 DenseMap<ObjectFifoAcquireOp, std::vector<BufferOp *>>
1696 subviews; // maps each "subview" to its buffer references (subviews
1697 // are created by AcquireOps)
1698 DenseMap<std::pair<ObjectFifoCreateOp, int>, std::vector<int>>
1699 acquiresPerFifo; // maps each objFifo to indices of buffers acquired
1700 // in latest subview of that objFifo (useful to
1701 // cascade acquired elements to next AcquireOp)
1702 DenseMap<std::pair<ObjectFifoCreateOp, int>,
1703 std::vector<ObjectFifoReleaseOp>>
1704 releaseOps; // useful to check which ReleaseOp has taken place before
1705 // an AcquireOp per objFifo
1706 DenseMap<std::pair<ObjectFifoCreateOp, int>, int>
1707 acqPerFifo; // maps each objFifo to its next index to acquire within
1708 // this CoreOp
1709 DenseMap<std::pair<ObjectFifoCreateOp, int>, int>
1710 relPerFifo; // maps each objFifo to its next index to release within
1711 // this CoreOp
1712
1713 //===----------------------------------------------------------------===//
1714 // Replace objectFifo.release ops
1715 //===----------------------------------------------------------------===//
1716 coreOp.walk([&](ObjectFifoReleaseOp releaseOp) {
1717 builder.setInsertionPointAfter(releaseOp);
1718 ObjectFifoCreateOp op = releaseOp.getObjectFifo();
1719 auto port = releaseOp.getPort();
1720 auto portNum = port == ObjectFifoPort::Produce ? 0 : 1;
1721 auto core = releaseOp->getParentOfType<CoreOp>();
1722
1723 if (auto linkOp = getOptionalLinkOp(op)) {
1724 if (core.getTile() == *linkOp->getOptionalSharedTile()) {
1725 releaseOp->emitOpError("currently cannot access objectFifo used in "
1726 "ObjectFifoLinkOp");
1727 return;
1728 }
1729 }
1730
1731 // update index of next element to release for this objectFifo
1732 updateAndReturnIndex(relPerFifo, {op, portNum});
1733
1734 // release locks
1735 int numLocks = releaseOp.relNumber();
1736 // account for repetition
1737 if (op.getRepeatCount().has_value())
1738 numLocks *= op.getRepeatCount().value();
1739 createUseLocks(builder, op, port, relPerFifo, numLocks,
1740 LockAction::Release);
1741
1742 // register release op
1743 if (releaseOps.find({op, portNum}) != releaseOps.end()) {
1744 releaseOps[{op, portNum}].push_back(releaseOp);
1745 } else {
1746 std::vector release = {releaseOp};
1747 releaseOps[{op, portNum}] = release;
1748 }
1749 });
1750
1751 //===----------------------------------------------------------------===//
1752 // Replace objectFifo.acquire ops
1753 //===----------------------------------------------------------------===//
1754 coreOp.walk([&](ObjectFifoAcquireOp acquireOp) {
1755 ObjectFifoCreateOp op = acquireOp.getObjectFifo();
1756 builder.setInsertionPointAfter(acquireOp);
1757 auto port = acquireOp.getPort();
1758 auto portNum = port == ObjectFifoPort::Produce ? 0 : 1;
1759 auto core = acquireOp->getParentOfType<CoreOp>();
1760
1761 auto linkOp = getOptionalLinkOp(op);
1762 if (linkOp) {
1763 if (core.getTile() == *linkOp->getOptionalSharedTile()) {
1764 acquireOp->emitOpError("currently cannot access objectFifo used in "
1765 "ObjectFifoLinkOp");
1766 return;
1767 }
1768 }
1769
1770 // index of next element to acquire for this objectFifo
1771 int start = updateAndReturnIndex(
1772 acqPerFifo, {op, portNum}); // useful for keeping track of which
1773 // indices are acquired
1774
1775 // check how many elements have been released in between this AcquireOp
1776 // and the previous one
1777 // !!! operations may not be in the same block !!!
1778 int numRel = 0;
1779 for (std::vector<ObjectFifoReleaseOp>::iterator relOp =
1780 releaseOps[{op, portNum}].begin();
1781 relOp != releaseOps[{op, portNum}].end();) {
1782 bool erased = false;
1783 Operation *acqBlockDefOp = acquireOp.getOperation();
1784 do {
1785 Operation *relBlockDefOp = (*relOp).getOperation();
1786 do {
1787 if (acqBlockDefOp->getBlock() == relBlockDefOp->getBlock()) {
1788 if (relBlockDefOp->isBeforeInBlock(acqBlockDefOp)) {
1789 numRel += (*relOp).relNumber();
1790 relOp = releaseOps[{op, portNum}].erase(relOp);
1791 // to ensure that we do not account
1792 // the ReleaseOps again later,
1793 // after the subview is created
1794 erased = true;
1795 }
1796 }
1797 } while ((relBlockDefOp = relBlockDefOp->getParentOp()) &&
1798 !isa<DeviceOp>(relBlockDefOp) && !erased);
1799 } while ((acqBlockDefOp = acqBlockDefOp->getParentOp()) &&
1800 !isa<DeviceOp>(acqBlockDefOp) && !erased);
1801 if (!erased)
1802 ++relOp;
1803 }
1804
1805 // track indices of elements to acquire
1806 std::vector<int> acquiredIndices;
1807 if (!acquiresPerFifo[{op, portNum}].empty()) {
1808 // take into account what has already been acquired by previous
1809 // AcquireOp in program order
1810 acquiredIndices = acquiresPerFifo[{op, portNum}];
1811 // take into account what has been released in-between
1812 if (static_cast<size_t>(numRel) > acquiredIndices.size()) {
1813 acquireOp->emitOpError("cannot release more elements than are "
1814 "already acquired");
1815 return;
1816 }
1817 for (int i = 0; i < numRel; i++)
1818 acquiredIndices.erase(acquiredIndices.begin());
1819 }
1820
1821 // acquire locks
1822 int numLocks = acquireOp.acqNumber();
1823 int alreadyAcq = acquiredIndices.size();
1824 int numCreate;
1825 if (numLocks > alreadyAcq)
1826 numCreate = numLocks - alreadyAcq;
1827 else
1828 numCreate = 0;
1829
1830 // account for repetition
1831 if (op.getRepeatCount().has_value())
1832 numCreate *= op.getRepeatCount().value();
1833
1834 auto dev = op->getParentOfType<DeviceOp>();
1835 if (auto &targetArch = dev.getTargetModel();
1836 targetArch.getTargetArch() == AIEArch::AIE1)
1837 createUseLocks(builder, op, port, acqPerFifo, numCreate,
1838 LockAction::Acquire);
1839 else
1840 createUseLocks(builder, op, port, acqPerFifo, numCreate,
1841 LockAction::AcquireGreaterEqual);
1842
1843 // if objFifo was linked with others, find which objFifos
1844 // elements to use
1845 ObjectFifoCreateOp target = op;
1846 if (linkOp)
1847 if (objFifoLinks.find(*linkOp) != objFifoLinks.end())
1848 target = objFifoLinks[*linkOp];
1849
1850 // create subview: buffers that were already acquired + new acquires
1851 for (int i = 0; i < numCreate; i++) {
1852 acquiredIndices.push_back(start);
1853 start = (start + 1) % op.size();
1854 }
1855 std::vector<BufferOp *> subviewRefs;
1856 subviewRefs.reserve(acquiredIndices.size());
1857 for (auto index : acquiredIndices)
1858 subviewRefs.push_back(&buffersPerFifo[target][index]);
1859
1860 subviews[acquireOp] = subviewRefs;
1861 acquiresPerFifo[{op, portNum}] = acquiredIndices;
1862 });
1863
1864 //===----------------------------------------------------------------===//
1865 // Replace subview.access ops
1866 //===----------------------------------------------------------------===//
1867 coreOp.walk([&](ObjectFifoSubviewAccessOp accessOp) {
1868 auto acqOp = accessOp.getSubview().getDefiningOp<ObjectFifoAcquireOp>();
1869 if (ObjectFifoCreateOp op = acqOp.getObjectFifo()) {
1870 if (auto linkOp = getOptionalLinkOp(op); linkOp.has_value()) {
1871 if (!linkOp->isDistribute() && !linkOp->isJoin()) {
1872 for (auto consumerTile : op.getConsumerTiles()) {
1873 if (auto consumerTileOp =
1874 dyn_cast<TileOp>(consumerTile.getDefiningOp())) {
1875 int share_dir_value = 0;
1876 bool sharing = isSharedMemory(
1877 op.getProducerTileOp(), consumerTileOp, &share_dir_value);
1878 if (!sharing)
1879 accessOp->emitOpError(
1880 "currently cannot access objectFifo used in "
1881 "ObjectFifoLinkOp if the tiles don't share memory");
1882 }
1883 }
1884 } else
1885 accessOp->emitOpError(
1886 "currently cannot access objectFifo used in "
1887 "ObjectFifoLinkOp if it is a distribute or join link");
1888 }
1889 }
1890 accessOp.getOutput().replaceAllUsesWith(
1891 subviews[acqOp][accessOp.getIndex()]->getBuffer());
1892 });
1893 }
1894 // make global symbols to replace the to be erased ObjectFifoCreateOps
1895 for (auto createOp : device.getOps<ObjectFifoCreateOp>()) {
1896 builder.setInsertionPointToStart(device.getBody());
1897 auto sym_name = createOp.getName();
1898 createOp->setAttr(SymbolTable::getSymbolAttrName(),
1899 builder.getStringAttr("__erase_" + sym_name));
1900 auto memrefType = llvm::cast<AIEObjectFifoType>(createOp.getElemType())
1901 .getElementType();
1902 builder.create<memref::GlobalOp>(builder.getUnknownLoc(), sym_name,
1903 builder.getStringAttr("public"),
1904 memrefType, nullptr, false, nullptr);
1905 }
1906
1907 //===------------------------------------------------------------------===//
1908 // Remove old ops
1909 //===------------------------------------------------------------------===//
1910 SetVector<Operation *> opsToErase;
1911 device.walk([&](Operation *op) {
1912 if (isa<ObjectFifoCreateOp, ObjectFifoLinkOp,
1913 ObjectFifoRegisterExternalBuffersOp, ObjectFifoAcquireOp,
1914 ObjectFifoSubviewAccessOp, ObjectFifoReleaseOp>(op))
1915 opsToErase.insert(op);
1916 });
1917 SmallVector<Operation *> sorted{opsToErase.begin(), opsToErase.end()};
1918 computeTopologicalSorting(sorted);
1919 for (auto *op : llvm::reverse(sorted))
1920 op->erase();
1921 }
1922};
1923
1924std::unique_ptr<OperationPass<DeviceOp>>
1926 return std::make_unique<AIEObjectFifoStatefulTransformPass>();
1927}
int getDMAChannelIndex(TileOp tileOp, DMAChannelDir dir)
Given a tile and DMAChannelDir, returns next usable channel index for that tile.
int getLockID(TileOp &tileOp)
Given a tile, returns next usable lockID for that tile.
Include the generated interface declarations.
std::unique_ptr< mlir::OperationPass< DeviceOp > > createAIEObjectFifoStatefulTransformPass()
DMAChannel { DMAChannelDir direction DMAChannel
Definition AIEDialect.h:180
const AIETargetModel & getTargetModel(mlir::Operation *op)
void createObjectFifoElements(OpBuilder &builder, LockAnalysis &lockAnalysis, ObjectFifoCreateOp op, int share_direction)
Function used to create objectFifo elements and their locks.
DenseMap< ObjectFifoCreateOp, std::vector< LockOp > > locksPerFifo
void createAIETileDMA(DeviceOp &device, OpBuilder &builder, ObjectFifoCreateOp op, DMAChannelDir channelDir, int channelIndex, int lockMode, BDDimLayoutArrayAttr dims)
Function used to create a MemOp region with a DMA channel.
void createMemTileDMA(DeviceOp &device, OpBuilder &builder, ObjectFifoCreateOp op, DMAChannelDir channelDir, int channelIndex, int lockMode, BDDimLayoutArrayAttr dims, BDPadLayoutArrayAttr padDimensions)
Function used to create a MemTileDMAOp region with a DMA channel.
int findObjectFifoSize(DeviceOp &device, Value tile, ObjectFifoCreateOp objFifo)
Function used to find the size of an objectFifo after split based on the maximum number of elements (...
void createObjectFifoAllocationInfo(OpBuilder &builder, MLIRContext *ctx, FlatSymbolRefAttr obj_fifo, int colIndex, DMAChannelDir channelDir, int channelIndex, bool plio)
Function used to generate, from an objectFifo with a shimTile endpoint, a shimDMAAllocationOp contain...
LogicalResult dynamicGlobalObjectFifos(DeviceOp &device, OpBuilder &builder, std::set< TileOp > objectFifoTiles)
Block * findEndOpBlock(Region &r)
Function that returns a pointer to the block of a Region that contains the AIEEndOp.
void replaceSplitFifo(ObjectFifoCreateOp originalOp, ObjectFifoCreateOp newOp, TileOp tile)
Function used to replace uses of split objectFifos.
bool isSharedMemory(TileOp a, TileOp b, int *share_direction)
Function that returns true if two tiles in the AIE array share a memory module.
void updateGlobalNextIndex(OpBuilder &builder, ObjectFifoReleaseOp relOp, BufferOp globalNextIndex, arith::ConstantOp index, arith::ConstantOp size)
std::vector< LockOp > createObjectFifoLocks(OpBuilder &builder, LockAnalysis &lockAnalysis, ObjectFifoCreateOp op, int numElem, int joinDistribFactor, TileOp creation_tile, int repeatCount)
Function used to create objectFifo locks based on target architecture.
void createShimDMA(DeviceOp &device, OpBuilder &builder, ObjectFifoCreateOp op, DMAChannelDir channelDir, int channelIndex, int lockMode, BDDimLayoutArrayAttr dims)
Function used to create a ShimDMAOp region with a DMA channel.
void addExternalBuffer(ObjectFifoCreateOp fifo, ExternalBufferOp buff)
Function used to add an external buffer to the externalBuffersPerFifo map.
void verifyObjectFifoLinks(DeviceOp &device)
Function used to verify that an objectfifo is present in at most one ObjectFifoLinkOp.
std::optional< ObjectFifoLinkOp > getOptionalLinkOp(ObjectFifoCreateOp op)
Function to retrieve ObjectFifoLinkOp of ObjectFifoCreateOp, if it belongs to one.
void createDMA(DeviceOp &device, OpBuilder &builder, ObjectFifoCreateOp op, DMAChannelDir channelDir, int channelIndex, int lockMode, BDDimLayoutArrayAttr dims, BDPadLayoutArrayAttr pad_dims)
Function that either calls createAIETileDMA(), createShimDMA() or createMemTileDMA() based on op tile...
void createBd(OpBuilder &builder, LockOp acqLock, int acqMode, LockAction acqLockAction, LockOp relLock, int relMode, MyOp buff, int offset, int len, Block *succ, BDDimLayoutArrayAttr dims, BDPadLayoutArrayAttr padDimensions)
Function used to create a Bd block.
void createBdBlock(OpBuilder &builder, ObjectFifoCreateOp op, int lockMode, int acqNum, int relNum, MyOp buff, int offset, int len, DMAChannelDir channelDir, size_t lockIndex, Block *succ, BDDimLayoutArrayAttr dims, BDPadLayoutArrayAttr padDimensions, bool distribOrJoin=false)
Function used to create a Bd block.
int updateAndReturnIndex(DenseMap< std::pair< ObjectFifoCreateOp, int >, int > &map, std::pair< ObjectFifoCreateOp, int > pair)
Function used to check whether op is already contained in map.
DenseMap< ObjectFifoLinkOp, ObjectFifoCreateOp > objFifoLinks
void detectExternalBuffers(DeviceOp &device, ObjectFifoCreateOp parent, ObjectFifoCreateOp child, Value tile)
Function used to detect all external buffers associated with parent objectFifo and tile then map them...
DenseMap< ObjectFifoCreateOp, std::vector< ExternalBufferOp > > externalBuffersPerFifo
std::vector< ObjectFifoCreateOp > splitBecauseLink
void checkAndApplyViaSharedMemAttribute(ObjectFifoCreateOp createOp, int &share_direction)
LogicalResult unrollForLoops(DeviceOp &device, OpBuilder &builder, std::set< TileOp > objectFifoTiles)
ObjectFifoCreateOp createObjectFifo(OpBuilder &builder, AIEObjectFifoType datatype, std::string name, Value prodTile, Value consTile, Attribute depth, BDDimLayoutArrayAttr dimensionsToStream, BDDimLayoutArrayArrayAttr dimensionsFromStreamPerConsumer)
void createUseLocks(OpBuilder &builder, ObjectFifoCreateOp op, ObjectFifoPort port, DenseMap< std::pair< ObjectFifoCreateOp, int >, int > &acc, int numLocks, LockAction lockAction)
Function used to create a UseLockOp based on input parameters.
bool requiresDMAs(ObjectFifoCreateOp createOp, int &share_direction)
DenseMap< ObjectFifoCreateOp, std::vector< BufferOp > > buffersPerFifo
std::vector< std::pair< ObjectFifoCreateOp, std::vector< ObjectFifoCreateOp > > > splitFifos