MLIR-AIE
IntervalReuse.cpp
Go to the documentation of this file.
1//===-IntervalReuse.cpp - Interval Analysis for AIE Vectors -----*- C++ -*-===//
2//
3// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7// (c) Copyright 2022 Xilinx Inc.
8//
9//===----------------------------------------------------------------------===//
10// This file implements the interval abstraction for AIE vectors. The
11// abstraction is essential to generate UPD instructions, and compute the
12// start/offset in AIE MAC/MUL intrinsic.
13//===----------------------------------------------------------------------===//
14
17
18#include "llvm/Support/Debug.h"
19
20using namespace llvm;
21using namespace mlir;
22using namespace xilinx::aievec;
23
24#define DEBUG_TYPE "aie-reuse"
25
26// Return true if the read operation is enclosed with the same loop nests as
27// the other read operations belonging to this IntervalReuse object.
28bool IntervalReuse::sameEnclosingLoops(
29 Operation *op, mlir::DenseMap<Block *, SmallVector<Operation *, 8>>
30 &blockToEnclosingLoops) {
31 // Assert that there are some existing read operations in the interval
32 assert(!extentMap.empty() &&
33 "interval must have at least one read operation");
34 // Get any of the previous read operation belonging to this interval as
35 // reference.
36 Operation *ref = extentMap.begin()->first;
37
38 // Assert that we have computed the enclosing loops for the reference and
39 // current read op.
40 assert(blockToEnclosingLoops.count(op->getBlock()) &&
41 "block to enclosing loop mapping not computed");
42 assert(blockToEnclosingLoops.count(ref->getBlock()) &&
43 "block to enclosing loop mapping not computed");
44 // return true if both reference and op are enclosed in the same loop nest
45 return blockToEnclosingLoops[op->getBlock()] ==
46 blockToEnclosingLoops[ref->getBlock()];
47}
48
49// Given a read operation, return the index of the interval that completely
50// subsumes this operation's access extent.
51size_t IntervalReuse::getIntervalIndex(Operation *op) {
52 // Get the access extent for this operation
53 auto bound = getAccessExtent(op);
54 // Do a binary search to find the interval that subsumes the bound. Note
55 // that the intervals are sorted and disjoint.
56 auto lb = std::lower_bound(intervals.begin(), intervals.end(), bound);
57 // If the intervals are [0,512],[512,1024], and we want to find [256,512],
58 // then the lower_bound will index into [512,1024]. We need to check the
59 // previous entry to find the correct bound.
60 if (lb != intervals.begin()) {
61 auto prev = std::prev(lb);
62 if (prev->first <= bound.first && prev->second >= bound.second)
63 lb = prev;
64 }
65 assert(lb != intervals.end() &&
66 "Failed to find correct interval for read operation");
67 // Assert that we found the right interval
68 assert(lb->first <= bound.first && lb->second >= bound.second &&
69 "Failed to find correct interval for read operation");
70 size_t pos = std::distance(intervals.begin(), lb);
71 return pos;
72}
73
74// Given a read operation belonging to this IntervalReuse object, return its
75// access extent.
76std::pair<int32_t, int32_t> IntervalReuse::getAccessExtent(Operation *op) {
77 assert(extentMap.find(op) != extentMap.end() &&
78 "Could not find the bounds of operator in map");
79 return extentMap[op];
80}
81
82// Set the access extent of a preexisting read operation in this IntervalReuse
83// object.
85 std::pair<int32_t, int32_t> &extent) {
86 assert(extentMap.find(op) != extentMap.end() &&
87 "operation does not belong to this reuse interval");
88 extentMap[op] = extent;
89}
90
91// For a read operation belonging to this IntervalReuse object, get the
92// interval that subsumes its read access extent.
93std::pair<int32_t, int32_t> IntervalReuse::getInterval(Operation *op) {
94 size_t pos = getIntervalIndex(op);
95 return intervals[pos];
96}
97
98// This read operation is only the LHS operand of a mul/mac op. So tag the
99// vector corresponding to its access interval. The tagging helps in coalescing
100// vectors for i8xi8 scheme.
102 // If vecIsLHSOperand is empty, initialize its size to the size of intervals
103 if (vecIsLHSOperand.empty())
104 vecIsLHSOperand.resize(intervals.size(), false);
105 // Get the position of this operation's access in the interval
106 size_t pos = getIntervalIndex(op);
107 assert(pos < vecIsLHSOperand.size());
108 // Set the corresponding index in vecIsLHSOperand to true
109 vecIsLHSOperand[pos] = true;
110}
111
112// For a read operation belonging to this reuse interval, return the interval
113// width. The interval size corresponds to the size of the vector that this
114// read op will get the data from.
115int32_t IntervalReuse::getIntervalWidth(Operation *op) {
116 auto interval = getInterval(op);
117 return interval.second - interval.first;
118}
119
120// Function to detect potential reuse in vector among different read ops. First
121// check if the same array is read by the read ops. Then check if their base
122// expr is the same. Finally, the accesses must belong to the same loop nest.
123// If all these conditions are met, there is a potential data reuse.
125 vector::TransferReadOp readOp, AffineExpr invariantBase,
126 mlir::DenseMap<Block *, SmallVector<Operation *, 8>>
127 &blockToEnclosingLoops) {
128 return sameMemRef(readOp.getBase()) && sameInvariantIndices(invariantBase) &&
129 sameEnclosingLoops(readOp, blockToEnclosingLoops);
130}
131
132// For the given vector read operation, compute the access bounds. For example,
133// if the vector size is 1x8, and the read is A[N:N+7], then the bound is [N,
134// N+7]. The returned bounds are vector size aligned. This means that if the
135// access is A[N+1:N+8], and the vector size is 256 bits, then the returned
136// bound is [N:N+15]. We assume that each row of the array is properly aligned.
137static std::pair<int32_t, int32_t>
138computeAccessExtent(vector::TransferReadOp readOp, int32_t offset,
139 int32_t loopStepSize, bool isSplat, unsigned minVecSize) {
140 VectorType vType = cast<VectorType>(readOp.getResult().getType());
141 unsigned vecSize = getVectorLaneSize(vType);
142 int32_t elementSizeInBits = getElementSizeInBits(vType);
143 // Create chunks greater in size than minVecSize
144 int32_t vecSizeInBits = std::max(minVecSize, vecSize * elementSizeInBits);
145
146 assert(isPowerOfTwo(vecSizeInBits) &&
147 "Current support is only for power-of-two vector sizes");
148
149 // Below computation is base on the assumption that vectorization factor is
150 // always a power of 2.
151 int32_t lb = (offset * elementSizeInBits) & ~(vecSizeInBits - 1);
152 int32_t ub = (isSplat ? (offset & ~(vecSize - 1)) + vecSize
153 : offset + loopStepSize * vecSize) *
154 elementSizeInBits;
155 // Adjust to the nearest multiple of vecSizeInBits
156 ub = (ub + vecSizeInBits - 1) & ~(vecSizeInBits - 1);
157
158 return std::make_pair(lb, ub);
159}
160
161// Insert a new interval for the read operation into already existing
162// intervals. This gives us an interval reuse graph, which can then be used to
163// determine the AIE vector sizes. The minimum vector size for AIE is 128 bits,
164// so align the access extents to at least 128-bit boundary.
166 vector::TransferReadOp readOp,
167 mlir::DenseMap<Operation *, IntervalReuse *> &opToIntervalMap,
168 int32_t offset, int32_t loopStepSize, bool isSplat, unsigned minVecSize) {
169 // Get the vector-size-aligned lower and upper bounds for the vector read
170 std::pair<int32_t, int32_t> bound =
171 computeAccessExtent(readOp, offset, loopStepSize, isSplat, minVecSize);
172
173 // Make an entry into extentMap
174 extentMap[readOp] = bound;
175 // Make an entry into readOp->interval map
176 opToIntervalMap[readOp] = this;
177
178 LLVM_DEBUG(llvm::dbgs() << "\n\nInserting access extent [" << bound.first
179 << "," << bound.second << "] for read op " << readOp);
180
181 // If the interval already exists, we're done
182 if (std::find(intervals.begin(), intervals.end(), bound) != intervals.end()) {
183 LLVM_DEBUG(
184 llvm::dbgs()
185 << "\n\tPre-existing interval already subsumes the access extent");
186 return;
187 }
188 // Insert bound into the current interval set. We do so by using O(N) space,
189 // creating a new mergedIntervals, and replacing intervals with it.
190 int32_t lb = bound.first, ub = bound.second;
191 SmallVector<std::pair<int32_t, int32_t>, 8> mergedIntervals;
192 bool inserted = false;
193
194 // Iterate over all the existing intervals, and merge them with the bound.
195 // There are three cases: (1) the interval falls before the bound; (2) the
196 // interval intersects with bound, and (3) the interval falls after the
197 // bound. The idea is to take care of (2), while just blindly inserting
198 // intervals that don't intersect with bounds.
199 for (auto iv : intervals) {
200 // No interference, interval i is definitely before new interval
201 if (iv.second <= lb)
202 mergedIntervals.push_back(iv);
203 else if (iv.first >= ub) {
204 // Interference finished at this point. Resolve if necessary
205 if (!inserted) {
206 mergedIntervals.push_back(std::make_pair(lb, ub));
207 inserted = true;
208 }
209 mergedIntervals.push_back(iv);
210 } else {
211 // Interference continues. Compute the overlap
212 lb = std::min(lb, iv.first);
213 ub = std::max(ub, iv.second);
214 }
215 }
216 if (!inserted)
217 mergedIntervals.push_back(std::make_pair(lb, ub));
218
219 intervals.clear();
220 intervals = std::move(mergedIntervals);
221
222 // Verify that each interval is within the AIE vector size limit
223 assert([&] {
224 for (auto iv : intervals) {
225 int32_t width = iv.second - iv.first;
226 if (width > 1024) {
227 printf("Vector width > 1024 currently not supported");
228 return false;
229 }
230 }
231 return true;
232 }());
233
234 // Print out the merged intervals
235 LLVM_DEBUG(llvm::dbgs() << "\n\tAfter inserting access extent, intervals: ");
236#ifndef NDEBUG
237 for (auto iv : intervals)
238 LLVM_DEBUG(llvm::dbgs() << "[" << iv.first << "," << iv.second << "] ");
239#endif
240}
241
242// If a vector corresponding to any interval was tagged as exclusively being
243// the LHS operand of a mul/fma op, run a vector coalescing loop. The loop is
244// pretty simple. It tries to identify two vectors that (1) are consecutive,
245// (2) tagged, and (3) the combined size is 512 bits, and coalesces them into a
246// single vector.
248 // Only proceed if any vector was tagged as exclusive LHS operand of mul/mac
249 // op.
250 if (vecIsLHSOperand.empty())
251 return;
252
253 // First check to see if we can coalesce any vector. The vector should be
254 // tagged, and its size should be <= 256 bits.
255 bool canCoalesce = false;
256 for (size_t i = 0, e = intervals.size(); i < e; ++i) {
257 canCoalesce |=
258 vecIsLHSOperand[i] && intervals[i].second - intervals[i].first <= 256;
259 }
260 if (!canCoalesce)
261 return;
262
263 // Now we try to coalesce
264 SmallVector<std::pair<int32_t, int32_t>, 8> coalescedIntervals;
265 for (size_t i = 0, e = intervals.size(); i < e;) {
266 // Check 1. Two consecutive vectors that can be fused
267 if (vecIsLHSOperand[i] && i < intervals.size() - 1 &&
268 vecIsLHSOperand[i + 1]) {
269 // Get vector sizes
270 int32_t v1size = intervals[i].second - intervals[i].first;
271 int32_t v2size = intervals[i + 1].second - intervals[i + 1].first;
272 // Check 2. Both vectors must be <= 256 bits, so that their sum is
273 // <= 512 bits.
274 if (v1size <= 256 && v2size <= 256) {
275 coalescedIntervals.push_back(
276 std::make_pair(intervals[i].first, intervals[i + 1].second));
277 i += 2;
278 continue;
279 }
280 }
281 coalescedIntervals.push_back(intervals[i]);
282 ++i;
283 }
284 // We are done with the use of the tagging vector. Erase it.
285 vecIsLHSOperand.clear();
286 // Replace intervals with the coalesced intervals
287 intervals.clear();
288 intervals = std::move(coalescedIntervals);
289
290 // Print out coalesced intervals
291 LLVM_DEBUG(llvm::dbgs() << "\n\nAfter coalescing for "
292 << "i8xi8 scheme, intervals: ");
293#ifndef NDEBUG
294 for (auto iv : intervals)
295 LLVM_DEBUG(llvm::dbgs() << "[" << iv.first << "," << iv.second << "] ");
296#endif
297}
void insertInterval(mlir::vector::TransferReadOp readOp, llvm::DenseMap< mlir::Operation *, IntervalReuse * > &dataAccessToIntervalMap, int32_t offset, int32_t forLoopStepSize, bool isSplat=false, unsigned minVecSize=128)
void setAccessExtent(mlir::Operation *op, std::pair< int32_t, int32_t > &extent)
bool potentialReuse(mlir::vector::TransferReadOp readOp, mlir::AffineExpr invariantBase, llvm::DenseMap< mlir::Block *, llvm::SmallVector< mlir::Operation *, 8 > > &blockToEnclosingLoops)
int32_t getIntervalWidth(mlir::Operation *op)
std::pair< int32_t, int32_t > getAccessExtent(mlir::Operation *op)
std::pair< int32_t, int32_t > getInterval(mlir::Operation *op)
void markLHSOperandVec(mlir::Operation *op)
bool isPowerOfTwo(int32_t n)
Definition AIEVecUtils.h:39
unsigned getVectorLaneSize(mlir::VectorType type)
Definition AIEVecUtils.h:55
int32_t getElementSizeInBits(mlir::VectorType type)
Definition AIEVecUtils.h:49