MLIR-AIE
AIEXDialect.cpp
Go to the documentation of this file.
1//===- AIEXDialect.cpp ------------------------------------------*- C++ -*-===//
2//
3// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7// (c) Copyright 2019 Xilinx Inc.
8//
9//===----------------------------------------------------------------------===//
10
12
13#include "mlir/IR/DialectImplementation.h"
14#include "mlir/Dialect/Func/IR/FuncOps.h"
15#include "mlir/Interfaces/FoldInterfaces.h"
16#include "mlir/Transforms/InliningUtils.h"
17
18#include "llvm/ADT/TypeSwitch.h"
19
20#include <numeric>
21
22using namespace mlir;
23using namespace xilinx;
24
25#include "aie/Dialect/AIEX/IR/AIEXDialect.cpp.inc"
26
27#define GET_TYPEDEF_CLASSES
28#include "aie/Dialect/AIEX/IR/AIEXTypes.cpp.inc"
29
30namespace xilinx::AIEX {
31
32// FIXME: use Tablegen'd dialect class
33void AIEXDialect::initialize() {
34 addOperations<
35#define GET_OP_LIST
36#include "aie/Dialect/AIEX/IR/AIEX.cpp.inc"
37 >();
38 addTypes<
39#define GET_TYPEDEF_LIST
40#include "aie/Dialect/AIEX/IR/AIEXTypes.cpp.inc"
41 >();
42}
43
44} // namespace xilinx::AIEX
45
46#define GET_OP_CLASSES
47#include "aie/Dialect/AIEX/IR/AIEX.cpp.inc"
48
50 const AIE::AIETargetModel &tm, unsigned bd_id, unsigned col, unsigned row) {
51 assert(bd_id < tm.getNumBDs(col, row));
52 return ((col & 0xff) << tm.getColumnShift()) |
53 ((row & 0xff) << tm.getRowShift()) | (0x1D004 + bd_id * 0x20);
54}
55
56/* Return the correct values to write to the hardware registers to configure
57 strides and wraps given the input user-facing strides and wraps.
58
59 In the IR, we express strides in units of element data type, but the hardware
60 requires it in units of address granularity. Address granularity currently is
61 4 bytes for all hardware.
62
63
64 User-facing strides/wraps relate to hardware as follows:
65
66 - By default, stride 0 and size 1 is assumed if unspecified.
67 - If only N strides/wraps are defined, those define the lowest N dimensions.
68
69 inputStride[3] == iteration_stride / elemSizeFac + 1
70 inputWrap[3] == iteration_size + 1
71 Highest-dimension stride/wrap is iteration count / iteration stride.
72 inputStride[2] == d2_stride / elemSizeFac + 1
73 Note: d2_size is not specified in hardware as it is
74 implicit from the total buffer transfer length
75 inputStride[1] == d1_stride / elemSizeFac + 1
76 inputSize[1] == d1_size
77 inputStride[0] == d0_stride / elemSizeFac + 1
78 inputSize[0] == d0_size / elemSizeFac
79
80 where elemSizeFac == bufferElementSize / addressGranularity
81 where bufferElementSize == size in bytes of elements in buffer,
82 e.g. 4 for int32
83 where addressGranularity == transfer granularity in hardware, which is
84 4 bytes for all current hardware
85
86 Note: strides are expressed offset by one from user input strides, because the
87 hardware does not support a 0 stride (repeat).
88 */
90 mlir::BaseMemRefType referencedBufType,
91 llvm::SmallVector<int64_t, 4> inputSizes,
92 llvm::SmallVector<int64_t, 4> inputStrides,
93 llvm::SmallVector<int64_t, 4> &sizes,
94 llvm::SmallVector<int64_t, 4> &strides) {
95 assert(inputSizes.size() == inputStrides.size());
96 assert(sizes.size() == 4);
97 assert(strides.size() == 4);
98
99 auto elemWidth = referencedBufType.getElementTypeBitWidth();
100 auto addressGranularity = targetModel.getAddressGenGranularity();
101
102 // Output strides and sizes are default-initialized to 0
103 std::fill(sizes.begin(), sizes.end(), 0);
104 std::fill(strides.begin(), strides.end(), 0);
105
106 if (inputSizes[0] == 0) {
107 // Illegal input, this won't transfer anything at all.
108 // Leave it to the verification functions to complain to the user.
109 return;
110 }
111
112 // d0_size, d0_stride
113 sizes[0] = inputSizes[0] * elemWidth / addressGranularity;
114 if (inputStrides[0] * elemWidth < addressGranularity) {
115 // While the hardware cannot transfer less than addressGranularity bits at
116 // a time, the user may expresses a contiguous transfer of multiple
117 // elements with a stride smaller than addressGranularity. We can thus set
118 // the stride to 1 (encoded in hardware as 0) here to allow such transfers.
119 // The verification function should ensure that
120 // inputStrides[0] * elemWidth < addressGranularity
121 // iff. inputSize[0] * elemWidth > addressGranularity.
122 strides[0] = 0;
123 } else {
124 strides[0] = inputStrides[0] * elemWidth / addressGranularity - 1;
125 }
126
127 // d1_size, d1_stride
128 sizes[1] = inputSizes[1];
129 if (inputSizes[1] > 1) {
130 // Stride only matters if we have more than one iteration.
131 strides[1] = inputStrides[1] * elemWidth / addressGranularity - 1;
132 }
133
134 // d2_size, d2_stride
135 sizes[2] = inputSizes[2];
136 if (inputSizes[2] > 1) {
137 // Stride only matters if we have more than one iteration.
138 strides[2] = inputStrides[2] * elemWidth / addressGranularity - 1;
139 }
140
141 // iteration_size, iteration_stride
142 if (inputSizes[3] > 1) {
143 // Stride only matters if we have more than one iteration.
144 sizes[3] = inputSizes[3] - 1;
145 // Note that the iteration_stride must be positive, just like the other
146 // dimensions. However, one can encode a zero-stride "repeat" of the same
147 // transfer by setting a positive repeat_count on the pushToQueue instr,
148 // and setting the size here to 1. This causes the BD to "wrap" at every
149 // single iteration, effectively never adding the specified stride, in turn
150 // equalling a repeat without stride.
151 if (inputStrides[3] > 0) {
152 strides[3] = inputStrides[3] * elemWidth / addressGranularity - 1;
153 }
154 }
155}
156
157mlir::LogicalResult
158AIEX::verifyStridesWraps(mlir::Operation *forOp,
159 mlir::BaseMemRefType referencedBufType, int tileCol,
160 int tileRow, llvm::SmallVector<int64_t, 4> inputSizes,
161 llvm::SmallVector<int64_t, 4> inputStrides,
162 llvm::SmallVector<int64_t, 4> hardwareSizes,
163 llvm::SmallVector<int64_t, 4> hardwareStrides,
164 bool skipTransformationChecks) {
165 const auto &targetModel = AIE::getTargetModel(forOp);
166 auto addressGranularity = targetModel.getAddressGenGranularity();
167 auto elemWidth = referencedBufType.getElementTypeBitWidth();
168
169 uint32_t wrap_bits = 0;
170 uint32_t step_bits = 0;
171 uint32_t iter_bits = 6;
172 if (targetModel.isShimNOCTile(tileCol, tileRow)) {
173 step_bits = 20; // XAIEMLGBL_NOC_MODULE_DMA_BD0_3_D0_STEPSIZE_WIDTH
174 wrap_bits = 10; // XAIEMLGBL_NOC_MODULE_DMA_BD0_3_D0_WRAP_WIDTH
175 } else if (targetModel.isMemTile(tileCol, tileRow)) {
176 step_bits = 17; // XAIEMLGBL_MEM_TILE_MODULE_DMA_BD0_2_D0_STEPSIZE_WIDTH
177 wrap_bits = 10; // XAIEMLGBL_MEM_TILE_MODULE_DMA_BD0_2_D0_WRAP_WIDTH
178 } else if (targetModel.isCoreTile(tileCol, tileRow)) {
179 step_bits = 13; // XAIEMLGBL_MEMORY_MODULE_DMA_BD0_2_D0_STEPSIZE_WIDTH
180 wrap_bits = 8; // XAIEMLGBL_MEMORY_MODULE_DMA_BD0_3_D0_WRAP_WIDTH
181 } else {
182 return forOp->emitOpError(
183 "Unsupported tile type at (" + std::to_string(tileCol) + ", " +
184 std::to_string(tileRow) + ") Must be ShimNOC, Mem or Core.");
185 }
186
187 for (int i = 0; i < 4; i++) {
188 if (inputSizes[i] <= 0) {
189 return forOp->emitOpError("Size ") << i << " must be a positive integer.";
190 }
191 }
192
193 if (inputSizes[0] * elemWidth % addressGranularity != 0) {
194 std::stringstream msg;
195 msg << "Transfer sizes must be multiples of " << (addressGranularity / 8)
196 << " bytes. " << inputSizes[0] << " elements at " << (elemWidth / 8)
197 << " bytes each equal " << (inputSizes[0] * elemWidth / 8)
198 << " bytes, which is not divisible by " << (addressGranularity / 8)
199 << ". ";
200 return forOp->emitOpError(msg.str());
201 }
202
203 for (int i = 0; i < 3; i++) {
204 if (inputSizes[i] > 1 && inputStrides[i] < 1) {
205 // If inputSize[i] == 1, anything is allowable in the stride, since that
206 // stride will never be applied. For any larger size, we must verify that
207 // the stride is positive.
208 return forOp->emitOpError("Stride ")
209 << i << " must be a positive integer.";
210 }
211 }
212 // A value of zero is allowable for the fourth-dimension stride
213 // (this indicates an interation stride for the repeat of 0)
214 if (inputSizes[3] > 1 && inputStrides[3] < 0) {
215 return forOp->emitOpError("Stride 3 must be a non-negative integer.");
216 }
217
218 for (int i = 0; i < 4; i++) {
219 // strides[0] == 1 is ok iff the transfer size is a multiple of
220 // addressGranularity, which is checked below
221 if (i == 0 && inputStrides[i] == 1)
222 continue;
223 if (inputStrides[i] * elemWidth % addressGranularity != 0) {
224 std::stringstream msg;
225 msg << "Stride " << i << " is " << inputStrides[i] << " elements * "
226 << (elemWidth / 8) << " bytes = " << (inputStrides[i] * elemWidth / 8)
227 << " bytes, which is not divisible by " << (addressGranularity / 8)
228 << ". ";
229 return forOp->emitOpError(msg.str());
230 }
231 }
232
233 if (!skipTransformationChecks && hardwareSizes[0] > (1 << wrap_bits) - 1)
234 return forOp->emitOpError(
235 "Size 0 exceeds the [0:" + std::to_string((1 << wrap_bits) - 1) +
236 "] range.");
237 if (hardwareSizes[1] > (1 << wrap_bits) - 1)
238 return forOp->emitOpError(
239 "Size 1 exceeds the [0:" + std::to_string((1 << wrap_bits) - 1) +
240 "] range.");
241 if (hardwareSizes[3] > (1 << iter_bits))
242 return forOp->emitOpError(
243 "Size 3 exceeds the [1:" + std::to_string(1 << iter_bits) + "] range.");
244 if (hardwareStrides[0] > (1 << step_bits))
245 return forOp->emitOpError("Stride 0 exceeds the [1:" +
246 std::to_string(1 << step_bits) + "] range.");
247 if (hardwareStrides[1] > (1 << step_bits))
248 return forOp->emitOpError("Stride 1 exceeds the [1:" +
249 std::to_string(1 << step_bits) + "] range.");
250 if (hardwareStrides[2] > (1 << step_bits))
251 return forOp->emitOpError("Stride 2 exceeds the [1:" +
252 std::to_string(1 << step_bits) + "] range.");
253 // strides[3] exceeding the range is ok iff the sizes[3] is one, which is
254 // checked below
255 if (hardwareStrides[3] > (1 << step_bits) && hardwareSizes[3] > 0)
256 return forOp->emitOpError("Stride 3 exceeds the [1:" +
257 std::to_string(1 << step_bits) + "] range.");
258
259 return success();
260}
261
262//===----------------------------------------------------------------------===//
263// UseTokenOp
264//===----------------------------------------------------------------------===//
265
266LogicalResult AIEX::UseTokenOp::verify() {
267 auto *parentOp = (*this)->getParentOp();
268 if (isa<func::FuncOp>(parentOp) || isa<AIE::CoreOp>(parentOp) ||
269 isa<AIE::MemOp>(parentOp) || isa<AIE::ShimDMAOp>(parentOp))
270 return success();
271 return failure();
272}
273
274//===----------------------------------------------------------------------===//
275// MulticastOp
276//===----------------------------------------------------------------------===//
277
278LogicalResult AIEX::MulticastOp::verify() {
279 Region &body = getPorts();
280 assert(getOperation()->getNumRegions());
281 assert(!body.empty());
282 for (auto &ops : body.front())
283 if (!isa<MultiDestOp, AIE::EndOp>(ops))
284 return ops.emitOpError("cannot be contained in a Multicast op");
285
286 return success();
287}
288
289//===----------------------------------------------------------------------===//
290// BroadcastPacketOp
291//===----------------------------------------------------------------------===//
292
293LogicalResult AIEX::BroadcastPacketOp::verify() {
294 Region &body = getPorts();
295 assert(getOperation()->getNumRegions());
296 assert(!body.empty());
297 for (auto &ops : body.front())
298 if (!isa<BPIDOp, AIE::EndOp>(ops))
299 return ops.emitOpError("cannot be contained in a BroadcastPacket op");
300
301 return success();
302}
303
304//===----------------------------------------------------------------------===//
305// NpuDmaMemcpyNdOp
306//===----------------------------------------------------------------------===//
307
308/* Calculates the offset value to be written to the
309 */
310int64_t AIEX::NpuDmaMemcpyNdOp::getOffsetInBytes() {
311 llvm::SmallVector<int64_t, 4> offsets =
312 llvm::map_to_vector(llvm::reverse(getMixedOffsets()), [](OpFoldResult s) {
313 return getConstantIntValue(s).value();
314 });
315 llvm::SmallVector<int64_t, 4> strides =
316 llvm::map_to_vector(llvm::reverse(getMixedStrides()), [](OpFoldResult s) {
317 return getConstantIntValue(s).value();
318 });
319 size_t offset = 0;
320 BaseMemRefType my_memref = getMemref().getType();
321 size_t R = offsets.size();
322 size_t el_bit_width = my_memref.getElementTypeBitWidth();
323 assert(el_bit_width % 8 == 0 &&
324 "Expected Memref element bitwidth to be multiple of 8.");
325 size_t S = el_bit_width / 8;
326 for (size_t i = 0; i < R; i++)
327 offset += offsets[i] * strides[i] * S;
328 return offset;
329}
330
331// dma_memcpy_nd transfers of the form [*, 1, 1, len][*, 0, 0, 1] do not
332// specify any data layout transformation, but simply express a contiguous
333// transfer of `len`. We exclude checks to 4th dimension, because repeat count
334// is still possible without a data layout transformation.
335bool AIEX::NpuDmaMemcpyNdOp::isLinearTransferWithoutTransformation() {
336 llvm::SmallVector<int64_t, 4> inputSizes =
337 llvm::map_to_vector(llvm::reverse(getMixedSizes()), [](OpFoldResult s) {
338 return getConstantIntValue(s).value();
339 });
340 llvm::SmallVector<int64_t, 4> inputStrides =
341 llvm::map_to_vector(llvm::reverse(getMixedStrides()), [](OpFoldResult s) {
342 return getConstantIntValue(s).value();
343 });
344 return (inputSizes[1] == 1 && inputSizes[2] == 1 && inputStrides[0] == 1 &&
345 inputStrides[1] == 0 && inputStrides[2] == 0);
346}
347
348// Helper method to check if a requested burst length is supported by the target
349// model. Returns an error message if the burst length is not supported or an
350// empty option otherwise.
351static std::optional<std::string>
352checkBurstLength(const xilinx::AIE::AIETargetModel &targetModel,
353 uint32_t requestedBurstLength) {
354 if (requestedBurstLength != 0) {
355 auto bel = targetModel.getShimBurstEncodingsAndLengths();
356 auto pair = std::find_if(bel.begin(), bel.end(),
357 [=](const std::pair<uint32_t, uint32_t> &p) {
358 return p.second == requestedBurstLength;
359 });
360
361 if (pair == bel.end()) {
362 std::string errorMessage =
363 "Requested burst length is not supported by the target. "
364 "Supported burst lengths:";
365
366 errorMessage =
367 std::accumulate(bel.begin(), bel.end(), errorMessage,
368 [](const std::string &a, auto b) {
369 return a + " " + std::to_string(b.second);
370 });
371
372 return errorMessage;
373 }
374 }
375
376 return std::nullopt;
377}
378
379LogicalResult AIEX::NpuDmaMemcpyNdOp::verify() {
380 BaseMemRefType buffer = getMemref().getType();
381 const auto &targetModel = AIE::getTargetModel(*this);
382 auto addressGranularity = targetModel.getAddressGenGranularity();
383
384 if (buffer.getElementTypeBitWidth() > addressGranularity) {
385 return emitOpError("Maximum element bit width allowed is ")
386 << addressGranularity << "bits. ";
387 } else if (buffer.hasStaticShape() &&
388 (buffer.getNumElements() * buffer.getElementTypeBitWidth()) <
389 addressGranularity) {
390 return emitOpError("Minimum data transfer size required is ")
391 << addressGranularity << "bits. ";
392 }
393 if (!llvm::all_of(getMixedStrides(), [](OpFoldResult s) {
394 return getConstantIntValue(s).has_value();
395 }))
396 return emitOpError("Only constant strides currently supported.");
397 if (!llvm::all_of(getMixedSizes(), [](OpFoldResult s) {
398 return getConstantIntValue(s).has_value();
399 }))
400 return emitOpError("Only constant sizes currently supported.");
401 if (!llvm::all_of(getMixedOffsets(), [](OpFoldResult s) {
402 return getConstantIntValue(s).has_value();
403 }))
404 return emitOpError("Only constant offsets currently supported.");
405
406 llvm::SmallVector<int64_t, 4> inputSizes =
407 llvm::map_to_vector(llvm::reverse(getMixedSizes()), [](OpFoldResult s) {
408 return getConstantIntValue(s).value();
409 });
410 llvm::SmallVector<int64_t, 4> inputStrides =
411 llvm::map_to_vector(llvm::reverse(getMixedStrides()), [](OpFoldResult s) {
412 return getConstantIntValue(s).value();
413 });
414 llvm::SmallVector<int64_t, 4> hardwareSizes(4);
415 llvm::SmallVector<int64_t, 4> hardwareStrides(4);
416 getHardwareStridesWraps(targetModel, buffer, inputSizes, inputStrides,
417 hardwareSizes, hardwareStrides);
418 int64_t offset = getOffsetInBytes();
419
420 auto errorMessage = checkBurstLength(targetModel, getBurstLength());
421 if (errorMessage.has_value()) {
422 return emitOpError(errorMessage.value());
423 }
424
425 // The experimental HSA target uses this op on AIE1, skip all the AIE2
426 // specific checks
427 if (targetModel.getTargetArch() == AIE::AIEArch::AIE1)
428 return success();
429
430 if (offset % 4 != 0) {
431 return emitOpError("Offset must be 4-byte-aligned.");
432 }
433
434 // dma_memcpy_nd transfers of the form [1, 1, 1, len][0, 0, 0, 1] do not
435 // specify any data layout transformation, but simply express a contiguous
436 // transfer of `len`. For backwards compatibility, we allow this to proceed
437 // even if it exceeds the maximum stride/wrap size of any one dimension,
438 // and simply do not lower any data layout transformations, since there is
439 // no other way to express this at the dma_memcpy_nd interface otherwise.
440 AIE::ShimDMAllocationGetter allocGetter;
441 AIE::DeviceOp dev = getOperation()->getParentOfType<AIE::DeviceOp>();
442 if (auto allocOp = allocGetter.get(dev, getMetadata())) {
443 int col = allocOp->getCol();
444 bool skipTransformationChecks = isLinearTransferWithoutTransformation();
445 if (failed(verifyStridesWraps(*this, buffer, col, 0, inputSizes,
446 inputStrides, hardwareSizes, hardwareStrides,
447 skipTransformationChecks))) {
448 return failure();
449 }
450 }
451
452 // packet header
453 if (auto packetInfo = getPacket()) {
454 if (packetInfo->getPktType() > 7)
455 return emitOpError("Packet type field can only hold 3 bits.");
456 if (packetInfo->getPktId() > 31)
457 return emitOpError("Packet ID field can only hold 5 bits.");
458 }
459
460 return success();
461}
462
463//===----------------------------------------------------------------------===//
464// NpuDmaWaitOp
465//===----------------------------------------------------------------------===//
466
467LogicalResult AIEX::NpuDmaWaitOp::verify() {
468 AIE::DeviceOp dev = (*this)->getParentOfType<AIE::DeviceOp>();
469 // Some passes (e.g. aie-standard-lowering) use aiex ops outside a DeviceOp,
470 // so we can't expect the device to always exist.
471 if (dev && !dev.lookupSymbol(getSymbol()))
472 return emitOpError("couldn't find symbol in parent device");
473 return success();
474}
475
476//===----------------------------------------------------------------------===//
477// NpuPushQueueOp
478//===----------------------------------------------------------------------===//
479
480LogicalResult AIEX::NpuPushQueueOp::verify() {
481 const auto &targetModel = AIE::getTargetModel(*this);
482 auto numBds = targetModel.getNumBDs(getColumn(), getRow());
483 if (getBdId() > numBds)
484 return emitOpError("BD ID exceeds the maximum ID.");
485 if (getRepeatCount() > 255)
486 return emitOpError("Repeat count exceeds the [0:255] range.");
487 return success();
488}
489
490//===----------------------------------------------------------------------===//
491// NpuWriteBdOp
492//===----------------------------------------------------------------------===//
493
494LogicalResult AIEX::NpuWriteBdOp::verify() {
495 const auto &targetModel = AIE::getTargetModel(*this);
496 auto numBds = targetModel.getNumBDs(getColumn(), getRow());
497 bool isLinearTransfer =
498 (getD0Size() >= 1) && (getD1Size() == 1) && (getIterationSize() == 0);
499 if (getBdId() > numBds)
500 return emitOpError("BD ID exceeds the maximum ID.");
501 if (!isLinearTransfer && getD0Size() > 0x3FF)
502 return emitOpError("D0 Size exceeds the [0:1023] range.");
503 if (getD0Stride() > 0xFFFFF)
504 return emitOpError("D0 Stride exceeds the [0:1M-1] range.");
505 if (getD1Size() > 0x3FF)
506 return emitOpError("D1 Size exceeds the [0:1023] range.");
507 if (getD1Stride() > 0xFFFFF)
508 return emitOpError("D1 Stride exceeds the [0:1M-1] range.");
509 if (getD2Stride() > 0xFFFFF)
510 return emitOpError("D2 Stride exceeds the [0:1M-1] range.");
511 if (getIterationSize() > 0x3F)
512 return emitOpError("Iteration Size exceeds the [0:63] range.");
513 if (getIterationStride() > 0xFFFFF)
514 return emitOpError("Iteration Stride exceeds the [0:1M-1] range.");
515 if (targetModel.isShimNOCTile(getColumn(), getRow()) && getD2Size() != 0)
516 return emitOpError("ShimTile only supports 3 dimensions of sizes.");
517 if (targetModel.isShimNOCTile(getColumn(), getRow()) &&
518 (getD0ZeroBefore() != 0 || getD0ZeroAfter() != 0 ||
519 getD1ZeroBefore() != 0 || getD1ZeroAfter() != 0 ||
520 getD2ZeroBefore() != 0 || getD2ZeroAfter() != 0))
521 return emitOpError("ShimTile doesn't support zero padding.");
522 if (!targetModel.isShimNOCTile(getColumn(), getRow()) &&
523 getBurstLength() != 0)
524 return emitOpError("Only ShimTiles support burst length.");
525 auto errorMessage = checkBurstLength(targetModel, getBurstLength());
526 if (errorMessage.has_value()) {
527 return emitOpError(errorMessage.value());
528 }
529
530 return success();
531}
532
533//===----------------------------------------------------------------------===//
534// RuntimeSequenceOp
535//===----------------------------------------------------------------------===//
536
537ParseResult AIEX::RuntimeSequenceOp::parse(OpAsmParser &parser,
538 OperationState &result) {
539
540 StringAttr nameAttr;
541 (void)parser.parseOptionalSymbolName(
542 nameAttr, mlir::SymbolTable::getSymbolAttrName(), result.attributes);
543
544 SmallVector<OpAsmParser::Argument> entryArgs;
545
546 // Entry arguments, e.g. (%addr: memref<1xi32>)
547 ParseResult argParseResult = parser.parseCommaSeparatedList(
548 OpAsmParser::Delimiter::Paren, [&]() -> ParseResult {
549 OpAsmParser::Argument argument;
550 if (parser.parseArgument(argument, true, true)) {
551 return failure();
552 }
553 entryArgs.push_back(argument);
554 return success();
555 });
556 if (argParseResult) {
557 return argParseResult;
558 }
559
560 // Body
561 auto *body = result.addRegion();
562 ParseResult bodyParseResult = parser.parseRegion(*body, entryArgs, false);
563 if (bodyParseResult) {
564 return bodyParseResult;
565 }
566
567 return success();
568}
569
570void AIEX::RuntimeSequenceOp::print(OpAsmPrinter &printer) {
571 Region &body = getRegion();
572
573 auto nameAttr = (*this)->getAttrOfType<StringAttr>(
574 mlir::SymbolTable::getSymbolAttrName());
575 if (nameAttr) {
576 printer << ' ';
577 printer.printSymbolName(nameAttr);
578 }
579
580 printer << '(';
581 for (unsigned i = 0, n = body.getNumArguments(); i < n; i++) {
582 if (i > 0) {
583 printer << ", ";
584 }
585 printer.printRegionArgument(body.getArgument(i));
586 }
587 printer << ')';
588
589 printer << ' ';
590 printer.printRegion(body, false, true);
591}
592
593LogicalResult AIEX::RuntimeSequenceOp::verify() {
594 AIE::DeviceOp device = (*this)->getParentOfType<AIE::DeviceOp>();
595 if (!device) {
596 // this check is redudnant with the HasParent trait, but can't hurt
597 (*this)->emitOpError() << "must be inside AIE device operation.";
598 return failure();
599 }
600 return success();
601}
602
603//===----------------------------------------------------------------------===//
604// DMAConfigureTaskOp
605//===----------------------------------------------------------------------===//
606
607std::optional<uint32_t> AIEX::DMAConfigureTaskOp::getFirstBdId() {
608 Region &body = getBody();
609 if (body.empty()) {
610 return std::nullopt;
611 }
612 auto bd_ops = body.front().getOps<AIE::DMABDOp>();
613 if (bd_ops.empty() && body.front().getNumSuccessors() == 1) {
614 // Allow the first block to be empty and point to the entry point of the
615 // chain. This allows for specifying cyclying BD chains (infinite loops)
616 // within the constraints of MLIR syntax.
617 Block &chain_entry = *body.front().getSuccessor(0);
618 bd_ops = chain_entry.getOps<AIE::DMABDOp>();
619 }
620 if (bd_ops.empty()) {
621 return std::nullopt;
622 }
623 AIE::DMABDOp bd = *bd_ops.begin();
624 if (!bd.getBdId().has_value()) {
625 return std::nullopt;
626 }
627 return bd.getBdId().value();
628}
629
630LogicalResult
631AIEX::DMAConfigureTaskOp::canonicalize(AIEX::DMAConfigureTaskOp op,
632 PatternRewriter &rewriter) {
633 // Remove blocks that contain nothing but a terminator
634 Region &body = op.getBody();
635 bool did_rewrite = false;
636 for (auto it = body.begin(); it != body.end(); ++it) {
637 Block &block = *it;
638 if (block.empty()) {
639 continue;
640 }
641 auto ops_it = block.without_terminator();
642 if (std::distance(ops_it.begin(), ops_it.end()) == 0) {
643 rewriter.eraseOp(block.getTerminator());
644 did_rewrite = true;
645 }
646 }
647 if (did_rewrite) {
648 return success();
649 }
650 return failure();
651}
652
653LogicalResult AIEX::DMAConfigureTaskOp::verify() {
654 Region &body = getBody();
655 for (auto it = body.begin(); it != body.end(); ++it) {
656 Block &block = *it;
657 if (block.empty()) {
658 continue;
659 }
660 if (block.hasNoPredecessors() && !block.isEntryBlock()) {
661 auto error = block.getTerminator()->emitError(
662 "Block ending in this terminator does not form a chain with "
663 "entry block.");
664 return failure();
665 }
666
667 const AIE::AIETargetModel &targetModel =
668 AIE::getTargetModel(getOperation());
669
670 // This is a layering violation on the DMABDOps, but they are never verified
671 // otherwise Because DMAConfigureTaskOps are not yet merged into the AIE
672 // dialect. The normal DMABDOp verify operation will skip over any BD inside
673 // a DMAConfigureTaskOp
674 LogicalResult result = success();
675 block.walk([&](AIE::DMABDOp bd) {
676 if (bd.getBurstLength() != 0 &&
677 !targetModel.isShimNOCTile(getTileID().col, getTileID().row)) {
678 bd.emitOpError("Burst length is only supported in Shim NOC tiles that "
679 "are connected to the memory-mapped NOC.");
680 result = failure();
681 }
682 });
683 if (failed(result)) {
684 return result;
685 }
686 }
687 return success();
688}
689
690//===----------------------------------------------------------------------===//
691// DMAStartBdChainOp
692//===----------------------------------------------------------------------===//
693
694AIE::BDChainOp AIEX::DMAStartBdChainOp::getBDChainOp() {
695 AIE::DeviceOp device = (*this)->getParentOfType<AIE::DeviceOp>();
696 AIE::BDChainOp chain = device.lookupSymbol<AIE::BDChainOp>(getSymbol());
697 return chain;
698}
699
700LogicalResult AIEX::DMAStartBdChainOp::verify() {
701 AIE::BDChainOp chain = getBDChainOp();
702 if (!chain) {
703 return emitOpError("symbol does not reference valid BD chain");
704 }
705
706 auto actualArgTypes = getArgs().getTypes();
707 auto expectedArgTypes = chain.getRegion().getArgumentTypes();
708 if (actualArgTypes.size() != expectedArgTypes.size()) {
709 return emitOpError("Number of arguments mismatches.");
710 }
711 for (unsigned i = 0, n = expectedArgTypes.size(); i < n; i++) {
712 if (actualArgTypes[i] != expectedArgTypes[i]) {
713 return emitOpError("Argument ") << (i + 1) << " types mismatch: "
714 << "expected " << expectedArgTypes[i]
715 << " but got " << actualArgTypes[i];
716 }
717 }
718 return success();
719}
720
721//===----------------------------------------------------------------------===//
722// NpuControlPacketOp
723//===----------------------------------------------------------------------===//
724
725uint32_t AIEX::NpuControlPacketOp::getRowFromAddr() {
726 const auto &targetModel = AIE::getTargetModel(*this);
727 uint32_t addr = getAddress();
728 uint32_t rowInt = (addr >> targetModel.getRowShift()) & 0x1f;
729 return rowInt;
730}
731
732uint32_t AIEX::NpuControlPacketOp::getColumnFromAddr() {
733 const auto &targetModel = AIE::getTargetModel(*this);
734 uint32_t addr = getAddress();
735 uint32_t colInt = (addr >> targetModel.getColumnShift()) & 0x1f;
736 return colInt;
737}
738
739//===----------------------------------------------------------------------===//
740// SetLockOp
741//===----------------------------------------------------------------------===//
742
743LogicalResult AIEX::SetLockOp::verify() {
744 const auto &targetModel = AIE::getTargetModel(*this);
745
746 if (targetModel.getTargetArch() == AIE::AIEArch::AIE1)
747 return emitOpError("SetLockOp is not supported on AIE1.");
748
749 if (getValue() > targetModel.getMaxLockValue())
750 return emitOpError("Lock value exceeds the maximum value of " +
751 std::to_string(targetModel.getMaxLockValue()));
752
753 auto lockOp = getLockOp();
754 auto lockIDOpt = getLockOp().getLockID();
755 // Note that the lockID may not be assigned initially, so lets wait until it
756 // is to verify the lockID dependent conditions
757 if (!lockIDOpt) {
758 return success();
759 }
760
761 auto col = lockOp.colIndex();
762 auto row = lockOp.rowIndex();
763 uint32_t lockID = lockOp.getLockIDValue();
764
765 if (lockID >= targetModel.getNumLocks(col, row)) {
766 return emitOpError("Lock ID out of range for given tile. Max ID: " +
767 std::to_string(targetModel.getNumLocks(col, row) - 1));
768 }
769
770 if (!targetModel.getLocalLockAddress(lockID, lockOp.getTileID())) {
771 return emitOpError("Invalid lock ID and tile combination when trying to "
772 "retrieve the local lock address.");
773 }
774
775 return success();
776}
virtual AIEArch getTargetArch() const =0
Return the target architecture.
virtual uint32_t getNumBDs(int col, int row) const =0
Return the number of buffer descriptors supported by the DMA in the given tile.
virtual std::vector< std::pair< uint32_t, uint32_t > > getShimBurstEncodingsAndLengths() const =0
virtual std::optional< uint32_t > getLocalLockAddress(uint32_t lockId, TileID tile) const =0
virtual uint32_t getNumLocks(int col, int row) const =0
Return the number of lock objects.
virtual bool isShimNOCTile(int col, int row) const =0
Return true if the given tile is a Shim NOC tile.
virtual uint32_t getMaxLockValue() const =0
Return the maximum value that can be stored in a lock register.
virtual uint32_t getColumnShift() const =0
virtual uint32_t getRowShift() const =0
virtual uint32_t getAddressGenGranularity() const =0
Return the data bus width of the device.
void getHardwareStridesWraps(const AIE::AIETargetModel &targetModel, mlir::BaseMemRefType referencedBufType, llvm::SmallVector< int64_t, 4 > inputSizes, llvm::SmallVector< int64_t, 4 > inputStrides, llvm::SmallVector< int64_t, 4 > &sizes, llvm::SmallVector< int64_t, 4 > &strides)
uint64_t getBufferDescriptorAddressRegisterAddress(const AIE::AIETargetModel &tm, unsigned bd_id, unsigned col, unsigned row)
mlir::LogicalResult verifyStridesWraps(mlir::Operation *forOp, mlir::BaseMemRefType referencedBufType, int tileCol, int tileRow, llvm::SmallVector< int64_t, 4 > inputSizes, llvm::SmallVector< int64_t, 4 > inputStrides, llvm::SmallVector< int64_t, 4 > hardwareSizes, llvm::SmallVector< int64_t, 4 > hardwareStrides, bool skipTransformationChecks=false)
const AIETargetModel & getTargetModel(mlir::Operation *op)
std::optional< AIE::ShimDMAAllocationOp > get(DeviceOp dev, mlir::StringRef sym_name)