MLIR-AIE
IntervalReuse.cpp
Go to the documentation of this file.
1//===-IntervalReuse.cpp - Interval Analysis for AIE Vectors -----*- C++ -*-===//
2//
3// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7// (c) Copyright 2022 Xilinx Inc.
8//
9//===----------------------------------------------------------------------===//
10// This file implements the interval abstraction for AIE vectors. The
11// abstraction is essential to generate UPD instructions, and compute the
12// start/offset in AIE MAC/MUL intrinsic.
13//===----------------------------------------------------------------------===//
14
17
18#include "llvm/Support/Debug.h"
19
20using namespace llvm;
21using namespace mlir;
22using namespace xilinx::aievec;
23
24#define DEBUG_TYPE "aie-reuse"
25
26// Return true if the read operation is enclosed with the same loop nests as
27// the other read operations belonging to this IntervalReuse object.
28bool IntervalReuse::sameEnclosingLoops(
29 Operation *op, mlir::DenseMap<Block *, SmallVector<Operation *, 8>>
30 &blockToEnclosingLoops) {
31 // Assert that there are some existing read operations in the interval
32 assert(!extentMap.empty() &&
33 "interval must have at least one read operation");
34 // Get any of the previous read operation belonging to this interval as
35 // reference.
36 Operation *ref = extentMap.begin()->first;
37
38 // Assert that we have computed the enclosing loops for the reference and
39 // current read op.
40 assert(blockToEnclosingLoops.count(op->getBlock()) &&
41 "block to enclosing loop mapping not computed");
42 assert(blockToEnclosingLoops.count(ref->getBlock()) &&
43 "block to enclosing loop mapping not computed");
44 // return true if both reference and op are enclosed in the same loop nest
45 return blockToEnclosingLoops[op->getBlock()] ==
46 blockToEnclosingLoops[ref->getBlock()];
47}
48
49// Given a read operation, return the index of the interval that completely
50// subsumes this operation's access extent.
51size_t IntervalReuse::getIntervalIndex(Operation *op) {
52 // Get the access extent for this operation
53 auto bound = getAccessExtent(op);
54 // Do a binary search to find the interval that subsumes the bound. Note
55 // that the intervals are sorted and disjoint.
56 auto lb = std::lower_bound(intervals.begin(), intervals.end(), bound);
57 // If the intervals are [0,512],[512,1024], and we want to find [256,512],
58 // then the lower_bound will index into [512,1024]. We need to check the
59 // previous entry to find the correct bound.
60 if (lb != intervals.begin()) {
61 auto prev = std::prev(lb);
62 if (prev->first <= bound.first && prev->second >= bound.second)
63 lb = prev;
64 }
65 assert(lb != intervals.end() &&
66 "Failed to find correct interval for read operation");
67 // Assert that we found the right interval
68 assert(lb->first <= bound.first && lb->second >= bound.second &&
69 "Failed to find correct interval for read operation");
70 size_t pos = std::distance(intervals.begin(), lb);
71 return pos;
72}
73
74// Given a read operation belonging to this IntervalReuse object, return its
75// access extent.
76std::pair<int32_t, int32_t> IntervalReuse::getAccessExtent(Operation *op) {
77 assert(extentMap.find(op) != extentMap.end() &&
78 "Could not find the bounds of operator in map");
79 return extentMap[op];
80}
81
82// Set the access extent of a preexisting read operation in this IntervalReuse
83// object.
85 std::pair<int32_t, int32_t> &extent) {
86 assert(extentMap.find(op) != extentMap.end() &&
87 "operation does not belong to this reuse interval");
88 extentMap[op] = extent;
89}
90
91// For a read operation belonging to this IntervalReuse object, get the
92// interval that subsumes its read access extent.
93std::pair<int32_t, int32_t> IntervalReuse::getInterval(Operation *op) {
94 size_t pos = getIntervalIndex(op);
95 return intervals[pos];
96}
97
98// This read operation is only the LHS operand of a mul/mac op. So tag the
99// vector corresponding to its access interval. The tagging helps in coalescing
100// vectors for i8xi8 scheme.
102 // If vecIsLHSOperand is empty, initialize its size to the size of intervals
103 if (vecIsLHSOperand.empty())
104 vecIsLHSOperand.resize(intervals.size(), false);
105 // Get the position of this operation's access in the interval
106 size_t pos = getIntervalIndex(op);
107 assert(pos < vecIsLHSOperand.size());
108 // Set the corresponding index in vecIsLHSOperand to true
109 vecIsLHSOperand[pos] = true;
110}
111
112// For a read operation belonging to this reuse interval, return the interval
113// width. The interval size corresponds to the size of the vector that this
114// read op will get the data from.
115int32_t IntervalReuse::getIntervalWidth(Operation *op) {
116 auto interval = getInterval(op);
117 return interval.second - interval.first;
118}
119
120// Function to detect potential reuse in vector among different read ops. First
121// check if the same array is read by the read ops. Then check if their base
122// expr is the same. Finally, the accesses must belong to the same loop nest.
123// If all these conditions are met, there is a potential data reuse.
125 vector::TransferReadOp readOp, AffineExpr invariantBase,
126 mlir::DenseMap<Block *, SmallVector<Operation *, 8>>
127 &blockToEnclosingLoops) {
128 return sameMemRef(readOp.getSource()) &&
129 sameInvariantIndices(invariantBase) &&
130 sameEnclosingLoops(readOp, blockToEnclosingLoops);
131}
132
133// For the given vector read operation, compute the access bounds. For example,
134// if the vector size is 1x8, and the read is A[N:N+7], then the bound is [N,
135// N+7]. The returned bounds are vector size aligned. This means that if the
136// access is A[N+1:N+8], and the vector size is 256 bits, then the returned
137// bound is [N:N+15]. We assume that each row of the array is properly aligned.
138static std::pair<int32_t, int32_t>
139computeAccessExtent(vector::TransferReadOp readOp, int32_t offset,
140 int32_t loopStepSize, bool isSplat, unsigned minVecSize) {
141 VectorType vType = cast<VectorType>(readOp.getResult().getType());
142 unsigned vecSize = getVectorLaneSize(vType);
143 int32_t elementSizeInBits = getElementSizeInBits(vType);
144 // Create chunks greater in size than minVecSize
145 int32_t vecSizeInBits = std::max(minVecSize, vecSize * elementSizeInBits);
146
147 assert(isPowerOfTwo(vecSizeInBits) &&
148 "Current support is only for power-of-two vector sizes");
149
150 // Below computation is base on the assumption that vectorization factor is
151 // always a power of 2.
152 int32_t lb = (offset * elementSizeInBits) & ~(vecSizeInBits - 1);
153 int32_t ub = (isSplat ? (offset & ~(vecSize - 1)) + vecSize
154 : offset + loopStepSize * vecSize) *
155 elementSizeInBits;
156 // Adjust to the nearest multiple of vecSizeInBits
157 ub = (ub + vecSizeInBits - 1) & ~(vecSizeInBits - 1);
158
159 return std::make_pair(lb, ub);
160}
161
162// Insert a new interval for the read operation into already existing
163// intervals. This gives us an interval reuse graph, which can then be used to
164// determine the AIE vector sizes. The minimum vector size for AIE is 128 bits,
165// so align the access extents to at least 128-bit boundary.
167 vector::TransferReadOp readOp,
168 mlir::DenseMap<Operation *, IntervalReuse *> &opToIntervalMap,
169 int32_t offset, int32_t loopStepSize, bool isSplat, unsigned minVecSize) {
170 // Get the vector-size-aligned lower and upper bounds for the vector read
171 std::pair<int32_t, int32_t> bound =
172 computeAccessExtent(readOp, offset, loopStepSize, isSplat, minVecSize);
173
174 // Make an entry into extentMap
175 extentMap[readOp] = bound;
176 // Make an entry into readOp->interval map
177 opToIntervalMap[readOp] = this;
178
179 LLVM_DEBUG(llvm::dbgs() << "\n\nInserting access extent [" << bound.first
180 << "," << bound.second << "] for read op " << readOp);
181
182 // If the interval already exists, we're done
183 if (std::find(intervals.begin(), intervals.end(), bound) != intervals.end()) {
184 LLVM_DEBUG(
185 llvm::dbgs()
186 << "\n\tPre-existing interval already subsumes the access extent");
187 return;
188 }
189 // Insert bound into the current interval set. We do so by using O(N) space,
190 // creating a new mergedIntervals, and replacing intervals with it.
191 int32_t lb = bound.first, ub = bound.second;
192 SmallVector<std::pair<int32_t, int32_t>, 8> mergedIntervals;
193 bool inserted = false;
194
195 // Iterate over all the existing intervals, and merge them with the bound.
196 // There are three cases: (1) the interval falls before the bound; (2) the
197 // interval intersects with bound, and (3) the interval falls after the
198 // bound. The idea is to take care of (2), while just blindly inserting
199 // intervals that don't intersect with bounds.
200 for (auto iv : intervals) {
201 // No interference, interval i is definitely before new interval
202 if (iv.second <= lb)
203 mergedIntervals.push_back(iv);
204 else if (iv.first >= ub) {
205 // Interference finished at this point. Resolve if necessary
206 if (!inserted) {
207 mergedIntervals.push_back(std::make_pair(lb, ub));
208 inserted = true;
209 }
210 mergedIntervals.push_back(iv);
211 } else {
212 // Interference continues. Compute the overlap
213 lb = std::min(lb, iv.first);
214 ub = std::max(ub, iv.second);
215 }
216 }
217 if (!inserted)
218 mergedIntervals.push_back(std::make_pair(lb, ub));
219
220 intervals.clear();
221 intervals = std::move(mergedIntervals);
222
223 // Verify that each interval is within the AIE vector size limit
224 assert([&] {
225 for (auto iv : intervals) {
226 int32_t width = iv.second - iv.first;
227 if (width > 1024) {
228 printf("Vector width > 1024 currently not supported");
229 return false;
230 }
231 }
232 return true;
233 }());
234
235 // Print out the merged intervals
236 LLVM_DEBUG(llvm::dbgs() << "\n\tAfter inserting access extent, intervals: ");
237#ifndef NDEBUG
238 for (auto iv : intervals)
239 LLVM_DEBUG(llvm::dbgs() << "[" << iv.first << "," << iv.second << "] ");
240#endif
241}
242
243// If a vector corresponding to any interval was tagged as exclusively being
244// the LHS operand of a mul/fma op, run a vector coalescing loop. The loop is
245// pretty simple. It tries to identify two vectors that (1) are consecutive,
246// (2) tagged, and (3) the combined size is 512 bits, and coalesces them into a
247// single vector.
249 // Only proceed if any vector was tagged as exclusive LHS operand of mul/mac
250 // op.
251 if (vecIsLHSOperand.empty())
252 return;
253
254 // First check to see if we can coalesce any vector. The vector should be
255 // tagged, and its size should be <= 256 bits.
256 bool canCoalesce = false;
257 for (size_t i = 0, e = intervals.size(); i < e; ++i) {
258 canCoalesce |=
259 vecIsLHSOperand[i] && intervals[i].second - intervals[i].first <= 256;
260 }
261 if (!canCoalesce)
262 return;
263
264 // Now we try to coalesce
265 SmallVector<std::pair<int32_t, int32_t>, 8> coalescedIntervals;
266 for (size_t i = 0, e = intervals.size(); i < e;) {
267 // Check 1. Two consecutive vectors that can be fused
268 if (vecIsLHSOperand[i] && i < intervals.size() - 1 &&
269 vecIsLHSOperand[i + 1]) {
270 // Get vector sizes
271 int32_t v1size = intervals[i].second - intervals[i].first;
272 int32_t v2size = intervals[i + 1].second - intervals[i + 1].first;
273 // Check 2. Both vectors must be <= 256 bits, so that their sum is
274 // <= 512 bits.
275 if (v1size <= 256 && v2size <= 256) {
276 coalescedIntervals.push_back(
277 std::make_pair(intervals[i].first, intervals[i + 1].second));
278 i += 2;
279 continue;
280 }
281 }
282 coalescedIntervals.push_back(intervals[i]);
283 ++i;
284 }
285 // We are done with the use of the tagging vector. Erase it.
286 vecIsLHSOperand.clear();
287 // Replace intervals with the coalesced intervals
288 intervals.clear();
289 intervals = std::move(coalescedIntervals);
290
291 // Print out coalesced intervals
292 LLVM_DEBUG(llvm::dbgs() << "\n\nAfter coalescing for "
293 << "i8xi8 scheme, intervals: ");
294#ifndef NDEBUG
295 for (auto iv : intervals)
296 LLVM_DEBUG(llvm::dbgs() << "[" << iv.first << "," << iv.second << "] ");
297#endif
298}
void insertInterval(mlir::vector::TransferReadOp readOp, llvm::DenseMap< mlir::Operation *, IntervalReuse * > &dataAccessToIntervalMap, int32_t offset, int32_t forLoopStepSize, bool isSplat=false, unsigned minVecSize=128)
void setAccessExtent(mlir::Operation *op, std::pair< int32_t, int32_t > &extent)
bool potentialReuse(mlir::vector::TransferReadOp readOp, mlir::AffineExpr invariantBase, llvm::DenseMap< mlir::Block *, llvm::SmallVector< mlir::Operation *, 8 > > &blockToEnclosingLoops)
int32_t getIntervalWidth(mlir::Operation *op)
std::pair< int32_t, int32_t > getAccessExtent(mlir::Operation *op)
std::pair< int32_t, int32_t > getInterval(mlir::Operation *op)
void markLHSOperandVec(mlir::Operation *op)
bool isPowerOfTwo(int32_t n)
Definition AIEVecUtils.h:39
unsigned getVectorLaneSize(mlir::VectorType type)
Definition AIEVecUtils.h:55
int32_t getElementSizeInBits(mlir::VectorType type)
Definition AIEVecUtils.h:49