12#include "llvm/Support/Debug.h"
20#define DEBUG_TYPE "aie-placer"
23 this->targetModel = &targetModel;
32 case AIETileType::CoreTile:
35 case AIETileType::MemTile:
36 case AIETileType::ShimNOCTile:
66 if (coresPerCol.has_value()) {
68 std::map<int, int> coresInColumn;
69 for (
const auto &tile : availability.
compTiles) {
70 coresInColumn[tile.col]++;
72 int maxDeviceCoresPerCol = 0;
73 for (
const auto &[
col, count] : coresInColumn) {
74 maxDeviceCoresPerCol = std::max(maxDeviceCoresPerCol, count);
76 deviceCoresPerCol = maxDeviceCoresPerCol;
78 limitCoresPerColumn(*coresPerCol, targetModel.
columns());
82void SequentialPlacer::limitCoresPerColumn(
int maxCoresPerCol,
int numColumns) {
84 std::map<int, std::vector<TileID>> tilesByColumn;
86 for (
const auto &tile : availability.compTiles) {
87 tilesByColumn[tile.col].push_back(tile);
91 std::vector<TileID> limitedTiles;
93 for (
int col = 0;
col < numColumns;
col++) {
94 auto it = tilesByColumn.find(
col);
95 if (it == tilesByColumn.end())
98 const auto &tilesInCol = it->second;
100 std::min(tilesInCol.size(),
static_cast<size_t>(maxCoresPerCol));
103 limitedTiles.insert(limitedTiles.end(), tilesInCol.begin(),
104 tilesInCol.begin() + numToTake);
113 if (coresPerCol.has_value() && *coresPerCol > deviceCoresPerCol) {
114 return device.emitError() <<
"requested cores-per-col (" << *coresPerCol
115 <<
") exceeds device capacity ("
116 << deviceCoresPerCol <<
" cores per column)";
120 SmallVector<LogicalTileOp> logicalTiles;
121 SmallVector<ObjectFifoCreateOp> objectFifos;
122 SmallVector<ObjectFifoLinkOp> objectFifoLinks;
124 device.walk([&](Operation *op) {
125 if (
auto lt = dyn_cast<LogicalTileOp>(op))
126 logicalTiles.push_back(lt);
127 if (
auto of = dyn_cast<ObjectFifoCreateOp>(op))
128 objectFifos.push_back(of);
129 if (
auto link = dyn_cast<ObjectFifoLinkOp>(op))
130 objectFifoLinks.push_back(link);
134 auto channelRequirements =
135 buildChannelRequirements(objectFifos, objectFifoLinks);
138 size_t nextCompIdx = 0;
139 for (
auto logicalTile : logicalTiles) {
141 auto col = logicalTile.tryGetCol();
142 auto row = logicalTile.tryGetRow();
145 if (failed(validateAndUpdateChannelUsage(logicalTile, tile,
146 channelRequirements,
true)))
149 result[logicalTile] = tile;
153 if (logicalTile.getTileType() == AIETileType::CoreTile)
154 availability.
removeTile(tile, logicalTile.getTileType());
159 if (logicalTile.getTileType() == AIETileType::CoreTile) {
160 std::optional<TileID> placement = std::nullopt;
162 for (
size_t i = nextCompIdx; i < availability.
compTiles.size(); ++i) {
166 if (
col && candidate.col != *
col)
168 if (
row && candidate.row != *
row)
174 placement = availability.
compTiles[nextCompIdx++];
180 return logicalTile.emitError()
181 <<
"no compute tile available matching constraint ("
182 << (
col ? std::to_string(*
col) :
"?") <<
", "
183 << (
row ? std::to_string(*
row) :
"?") <<
")";
185 return logicalTile.emitError(
186 "no available compute tiles for placement");
189 if (failed(validateAndUpdateChannelUsage(logicalTile, *placement,
190 channelRequirements,
false)))
193 result[logicalTile] = *placement;
196 if (logicalTile.getTileType() == AIETileType::ShimPLTile) {
197 return logicalTile.emitError(
198 "DMA channel-based SequentialPlacer does not support unplaced "
199 "ShimPLTiles (no DMAs).");
204 llvm::DenseMap<int, SmallVector<ObjectFifoCreateOp>> groupToFifos;
205 llvm::DenseMap<int, SmallVector<LogicalTileOp>> groupToLogicalTiles;
206 buildObjectFifoGroups(objectFifos, objectFifoLinks, groupToFifos,
207 groupToLogicalTiles);
210 llvm::DenseSet<int> processedGroups;
212 for (
auto &[groupId, logicalTiles] : groupToLogicalTiles) {
213 if (processedGroups.count(groupId))
215 processedGroups.insert(groupId);
218 int groupCommonCol = 0;
219 int totalCoreEndpoints = 0;
222 auto fifosIt = groupToFifos.find(groupId);
223 if (fifosIt != groupToFifos.end()) {
224 for (
auto ofOp : fifosIt->second) {
226 Value producerTile = ofOp.getProducerTile();
227 if (
auto *producerOp = producerTile.getDefiningOp()) {
228 if (
auto prodLogical = dyn_cast<LogicalTileOp>(producerOp)) {
229 if (prodLogical.getTileType() == AIETileType::CoreTile) {
230 if (
result.count(prodLogical.getOperation())) {
231 sumCols +=
result[prodLogical.getOperation()].col;
232 totalCoreEndpoints++;
238 for (Value consumerTile : ofOp.getConsumerTiles()) {
239 if (
auto *consumerOp = consumerTile.getDefiningOp()) {
240 if (
auto consLogical = dyn_cast<LogicalTileOp>(consumerOp)) {
241 if (consLogical.getTileType() == AIETileType::CoreTile) {
242 if (
result.count(consLogical.getOperation())) {
243 sumCols +=
result[consLogical.getOperation()].col;
244 totalCoreEndpoints++;
256 if (totalCoreEndpoints > 0) {
257 groupCommonCol = (sumCols + totalCoreEndpoints / 2) / totalCoreEndpoints;
261 for (
auto logicalTile : logicalTiles) {
263 if (
result.count(logicalTile.getOperation()))
267 auto it = channelRequirements.find(logicalTile.getOperation());
268 int numInputChannels = 0, numOutputChannels = 0;
269 if (it != channelRequirements.end()) {
270 numInputChannels = it->second.first;
271 numOutputChannels = it->second.second;
275 auto colConstraint = logicalTile.tryGetCol();
276 int targetCol = colConstraint ? *colConstraint : groupCommonCol;
279 auto maybeTile = findTileWithCapacity(
281 numOutputChannels, logicalTile.getTileType());
284 return logicalTile.emitError()
285 <<
"no " << stringifyAIETileType(logicalTile.getTileType())
286 <<
" with sufficient DMA capacity";
288 result[logicalTile] = *maybeTile;
291 if (numInputChannels > 0)
292 updateChannelUsage(*maybeTile,
false, numInputChannels);
293 if (numOutputChannels > 0)
294 updateChannelUsage(*maybeTile,
true, numOutputChannels);
299 for (
auto logicalTile : logicalTiles) {
301 if (
result.count(logicalTile.getOperation()))
305 AIETileType tileType = logicalTile.getTileType();
306 if (tileType == AIETileType::CoreTile ||
307 tileType == AIETileType::ShimPLTile)
311 auto colConstraint = logicalTile.tryGetCol();
312 int targetCol = colConstraint ? *colConstraint : 0;
315 auto maybeTile = findTileWithCapacity(targetCol, availability.
nonCompTiles,
319 return logicalTile.emitError()
320 <<
"no " << stringifyAIETileType(tileType) <<
" available";
322 result[logicalTile] = *maybeTile;
328LogicalResult SequentialPlacer::validateAndUpdateChannelUsage(
329 LogicalTileOp logicalTile,
TileID tile,
330 const llvm::DenseMap<Operation *, std::pair<int, int>> &channelRequirements,
331 bool isConstrained) {
334 auto it = channelRequirements.find(logicalTile.getOperation());
335 int inChannels = 0, outChannels = 0;
336 if (it != channelRequirements.end()) {
337 inChannels = it->second.first;
338 outChannels = it->second.second;
342 if (!hasAvailableChannels(tile, inChannels, outChannels)) {
344 int maxIn = logicalTile.getNumDestConnections(WireBundle::DMA);
345 int maxOut = logicalTile.getNumSourceConnections(WireBundle::DMA);
349 auto diag = logicalTile.emitError();
351 diag <<
"tile (" << tile.col <<
", " << tile.row <<
") requires ";
353 diag <<
"tile requires ";
354 diag << inChannels <<
" input/" << outChannels
355 <<
" output DMA channels, but only " << availIn <<
" input/"
356 << availOut <<
" output available";
362 updateChannelUsage(tile,
false, inChannels);
364 updateChannelUsage(tile,
true, outChannels);
369llvm::DenseMap<Operation *, std::pair<int, int>>
370SequentialPlacer::buildChannelRequirements(
371 SmallVector<ObjectFifoCreateOp> &objectFifos,
372 SmallVector<ObjectFifoLinkOp> &objectFifoLinks) {
373 llvm::DenseMap<Operation *, std::pair<int, int>> channelRequirements;
376 llvm::StringMap<ObjectFifoCreateOp> fifoNameToOp;
377 for (
auto ofOp : objectFifos) {
378 fifoNameToOp[ofOp.getSymName()] = ofOp;
383 llvm::DenseSet<llvm::StringRef> linkedFifoNames;
385 for (
auto linkOp : objectFifoLinks) {
386 for (
auto srcFifoAttr : linkOp.getFifoIns()) {
387 auto srcFifoName = cast<FlatSymbolRefAttr>(srcFifoAttr).getValue();
388 linkedFifoNames.insert(srcFifoName);
391 for (
auto dstFifoAttr : linkOp.getFifoOuts()) {
392 auto dstFifoName = cast<FlatSymbolRefAttr>(dstFifoAttr).getValue();
393 linkedFifoNames.insert(dstFifoName);
398 for (
auto ofOp : objectFifos) {
400 if (linkedFifoNames.count(ofOp.getSymName()))
403 Value producerTile = ofOp.getProducerTile();
404 auto *producerOp = producerTile.getDefiningOp();
405 auto producerLogicalTile = dyn_cast_or_null<LogicalTileOp>(producerOp);
408 bool producerNeedsDMA =
false;
410 for (Value consumerTile : ofOp.getConsumerTiles()) {
411 auto *consumerOp = consumerTile.getDefiningOp();
412 auto consumerLogicalTile = dyn_cast_or_null<LogicalTileOp>(consumerOp);
416 if (producerLogicalTile && consumerLogicalTile &&
417 producerLogicalTile.getTileType() == AIETileType::CoreTile &&
418 consumerLogicalTile.getTileType() == AIETileType::CoreTile)
423 channelRequirements[consumerOp].first++;
425 producerNeedsDMA =
true;
429 if (producerNeedsDMA && producerOp)
430 channelRequirements[producerOp].second++;
435 for (
auto linkOp : objectFifoLinks) {
438 Operation *linkTileOp =
nullptr;
441 for (
auto srcFifoAttr : linkOp.getFifoIns()) {
442 auto srcFifoName = cast<FlatSymbolRefAttr>(srcFifoAttr).getValue();
443 auto it = fifoNameToOp.find(srcFifoName);
444 if (it == fifoNameToOp.end())
447 auto srcFifo = it->second;
448 for (Value consumerTile : srcFifo.getConsumerTiles()) {
449 if (
auto *consumerOp = consumerTile.getDefiningOp()) {
450 linkTileOp = consumerOp;
464 int numInputChannels = linkOp.getFifoIns().size();
465 int numOutputChannels = linkOp.getFifoOuts().size();
467 channelRequirements[linkTileOp].first += numInputChannels;
468 channelRequirements[linkTileOp].second += numOutputChannels;
471 return channelRequirements;
474void SequentialPlacer::buildObjectFifoGroups(
475 SmallVector<ObjectFifoCreateOp> &objectFifos,
476 SmallVector<ObjectFifoLinkOp> &objectFifoLinks,
477 llvm::DenseMap<
int, SmallVector<ObjectFifoCreateOp>> &groupToFifos,
478 llvm::DenseMap<
int, SmallVector<LogicalTileOp>> &groupToLogicalTiles) {
481 llvm::StringMap<int> fifoToGroup;
485 for (
auto linkOp : objectFifoLinks) {
486 int groupId = nextGroupId++;
489 for (
auto srcFifoAttr : linkOp.getFifoIns()) {
490 auto srcFifoName = cast<FlatSymbolRefAttr>(srcFifoAttr).getValue();
491 fifoToGroup[srcFifoName] = groupId;
495 for (
auto dstFifoAttr : linkOp.getFifoOuts()) {
496 auto dstFifoName = cast<FlatSymbolRefAttr>(dstFifoAttr).getValue();
497 fifoToGroup[dstFifoName] = groupId;
502 int unlinkedGroupId = nextGroupId;
504 for (
auto ofOp : objectFifos) {
506 auto groupIt = fifoToGroup.find(ofOp.getSymName());
509 if (groupIt != fifoToGroup.end()) {
510 groupId = groupIt->second;
512 groupId = unlinkedGroupId++;
515 groupToFifos[groupId].push_back(ofOp);
519 Value producerTile = ofOp.getProducerTile();
520 if (
auto *producerOp = producerTile.getDefiningOp()) {
521 if (
auto prodLogical = dyn_cast<LogicalTileOp>(producerOp)) {
522 if (prodLogical.getTileType() != AIETileType::CoreTile) {
523 groupToLogicalTiles[groupId].push_back(prodLogical);
529 for (Value consumerTile : ofOp.getConsumerTiles()) {
530 if (
auto *consumerOp = consumerTile.getDefiningOp()) {
531 if (
auto consLogical = dyn_cast<LogicalTileOp>(consumerOp)) {
532 if (consLogical.getTileType() != AIETileType::CoreTile) {
533 groupToLogicalTiles[groupId].push_back(consLogical);
541std::optional<TileID> SequentialPlacer::findTileWithCapacity(
542 int targetCol, std::vector<TileID> &tiles,
int requiredInputChannels,
543 int requiredOutputChannels, AIETileType requestedType) {
544 int maxCol = targetModel->
columns();
547 for (
int offset = 0; offset < maxCol; ++offset) {
548 int searchCol = targetCol + offset;
549 if (searchCol >= maxCol)
552 for (
auto &tile : tiles) {
553 AIETileType tileType = targetModel->
getTileType(tile.col, tile.row);
554 if (tileType != requestedType)
557 if (tile.col == searchCol) {
559 if (hasAvailableChannels(tile, requiredInputChannels,
560 requiredOutputChannels)) {
570void SequentialPlacer::updateChannelUsage(
TileID tile,
bool isOutput,
578 if (!hasAvailableChannels(tile, 0, 0)) {
579 AIETileType type = targetModel->
getTileType(tile.col, tile.row);
584bool SequentialPlacer::hasAvailableChannels(
TileID tile,
int inputChannels,
585 int outputChannels) {
605 return (currentIn + inputChannels <= maxIn) &&
606 (currentOut + outputChannels <= maxOut);
610 auto removeFromVector = [&](std::vector<TileID> &vec) {
611 vec.erase(std::remove(vec.begin(), vec.end(), tile), vec.end());
615 case AIETileType::CoreTile:
618 case AIETileType::MemTile:
619 case AIETileType::ShimNOCTile:
620 case AIETileType::ShimPLTile:
virtual uint32_t getNumSourceShimMuxConnections(int col, int row, WireBundle bundle) const =0
Return the number of sources of connections inside a shimmux.
virtual AIETileType getTileType(int col, int row) const =0
Return the tile type for the given tile coordinates.
virtual int rows() const =0
Return the number of rows in the device.
virtual int columns() const =0
Return the number of columns in the device.
virtual uint32_t getNumDestShimMuxConnections(int col, int row, WireBundle bundle) const =0
Return the number of destinations of connections inside a shimmux.
virtual uint32_t getNumDestSwitchboxConnections(int col, int row, WireBundle bundle) const =0
Return the number of destinations of connections inside a switchbox.
virtual uint32_t getNumSourceSwitchboxConnections(int col, int row, WireBundle bundle) const =0
Return the number of sources of connections inside a switchbox.
void initialize(const AIETargetModel &targetModel) override
mlir::LogicalResult place(DeviceOp device) override
Include the generated interface declarations.
TileID { friend std::ostream &operator<<(std::ostream &os, const TileID &s) { os<< "TileID("<< s.col<< ", "<< s.row<< ")" TileID
std::vector< TileID > compTiles
llvm::DenseMap< TileID, int > outputChannelsUsed
void removeTile(TileID tile, AIETileType type)
std::vector< TileID > nonCompTiles
llvm::DenseMap< TileID, int > inputChannelsUsed