141 WalkResult unsupported_ops = block.walk([&](Operation *inner_op) {
142 return llvm::TypeSwitch<Operation *, WalkResult>(inner_op)
144 [&](AIE::DMABDOp bd_op) {
return WalkResult::advance(); })
145 .Case<AIE::UseLockOp>(
146 [&](AIE::UseLockOp lock_op) {
return WalkResult::advance(); })
147 .Case<AIE::NextBDOp>(
148 [&](AIE::NextBDOp lock_op) {
return WalkResult::advance(); })
150 [&](AIE::EndOp lock_op) {
return WalkResult::advance(); })
151 .Default([&](Operation *inner_op) {
152 auto error = block.getParentOp()->emitOpError(
153 "Unsupported operation within BD block.");
154 error.attachNote(inner_op->getLoc())
155 <<
"No lowering to NPU instructions available for this "
157 return WalkResult::interrupt();
160 if (unsupported_ops.wasInterrupted()) {
181 uint32_t bd_id = bd_op.getBdId().value();
183 auto buf = bd_op.getBuffer();
185 target_model, bd_id, tile.getCol(), tile.getRow());
186 if (mlir::BlockArgument buf_arg =
187 llvm::dyn_cast<mlir::BlockArgument>(buf)) {
188 if (!target_model.
isShimNOCTile(tile.getCol(), tile.getRow())) {
189 return bd_op->emitOpError(
"DDR memory (runtime input arguments) can "
190 "only be referred to on shim tiles.");
192 unsigned arg_idx = buf_arg.getArgNumber();
193 int64_t offset = bd_op.getOffsetInBytes();
194 builder.create<NpuAddressPatchOp>(bd_op.getLoc(), register_addr,
197 }
else if (AIE::BufferOp buffer =
198 llvm::dyn_cast<AIE::BufferOp>(buf.getDefiningOp())) {
200 if (!buffer.getAddress().has_value()) {
201 return bd_op->emitOpError(
202 "Cannot lower buffer without associated address. Run pass "
203 "--aie-assign-buffer-addresses first or manually assign an "
206 buf_addr = *buffer.getAddress();
207 builder.create<NpuWrite32Op>(bd_op.getLoc(), register_addr, buf_addr,
208 nullptr,
nullptr,
nullptr);
210 return bd_op->emitOpError(
211 "Buffer argument must be either a constant aie.buffer or a runtime "
212 "sequence input argument.");
219 AIE::DMAChannelDir channelDir) {
222 MemRefType buffer_type = bd_op.getBuffer().getType();
223 uint32_t addr_granularity = target_model.getAddressGenGranularity();
225 uint32_t bd_id = bd_op.getBdId().value();
226 int64_t offset = bd_op.getOffsetInBytes();
227 uint64_t len = bd_op.getLenInBytes();
228 uint64_t len_addr_granularity = len * 8 / addr_granularity;
230 if (offset * 8 % addr_granularity != 0) {
231 return bd_op->emitOpError(
"Offset must be aligned to ")
232 << (addr_granularity / 8) <<
" byte boundary.";
235 if (len < addr_granularity / 8) {
236 return bd_op->emitOpError(
"Transfer size of ")
237 << len <<
" bytes falls below minimum hardware transfer unit of "
238 << (addr_granularity / 8) <<
" bytes.";
241 std::optional<llvm::ArrayRef<AIE::BDDimLayoutAttr>> dims =
242 bd_op.getDimensions();
243 llvm::SmallVector<int64_t, 4> sizes = llvm::SmallVector<int64_t, 4>(4, 0);
244 llvm::SmallVector<int64_t, 4> strides = llvm::SmallVector<int64_t, 4>(4, 0);
247 std::optional<llvm::ArrayRef<AIE::BDPadLayoutAttr>> padDims =
248 bd_op.getPadDimensions();
249 llvm::SmallVector<int64_t, 4> padBefore =
250 llvm::SmallVector<int64_t, 4>(4, 0);
251 llvm::SmallVector<int64_t, 4> padAfter =
252 llvm::SmallVector<int64_t, 4>(4, 0);
253 std::fill(padBefore.begin(), padBefore.end(), 0);
254 std::fill(padAfter.begin(), padAfter.end(), 0);
262 auto iteration_size = 0;
263 auto iteration_stride = 0;
265 if (dims && dims->size() > 0) {
266 llvm::SmallVector<int64_t, 4> input_sizes =
267 llvm::SmallVector<int64_t, 4>(4, 1);
268 llvm::SmallVector<int64_t, 4> input_strides =
269 llvm::SmallVector<int64_t, 4>(4, 0);
270 if (dims->size() > 4) {
271 return bd_op->emitOpError(
"At most four data layout transformation "
272 "dimensions may be provided.");
275 for (
size_t i = 0; i < dims->size(); i++) {
279 int j = dims->size() - i - 1;
280 input_sizes[i] = (*dims)[j].getSize();
281 input_strides[i] = (*dims)[j].getStride();
286 bool isLinearTransfer = (input_sizes[0] >= 1) && (input_sizes[1] == 1) &&
287 (input_sizes[2] == 1);
289 if (dims->size() > 2) {
290 d2size = (target_model.isMemTile(tile.getCol(), tile.getRow()))
291 ? (*dims)[2].getSize()
294 if (padDims.has_value()) {
295 if (!target_model.isMemTile(tile.getCol(), tile.getRow()))
296 return bd_op->emitOpError()
297 <<
"Padding is only supported by memtile dma bds.";
298 if (padDims->size() > dims->size())
299 return bd_op->emitOpError()
300 <<
"Mismatch number of dimensions between padding(s)"
301 <<
" and wrap(s) and stride(s).";
302 if (channelDir == AIE::DMAChannelDir::MM2S) {
303 for (
size_t i = 0; i < padDims->size(); i++) {
304 int j = padDims->size() - i - 1;
305 padBefore[i] = (*padDims)[j].getConstPadBefore();
306 padAfter[i] = (*padDims)[j].getConstPadAfter();
308 for (
size_t i = padDims->size(); i < dims->size(); i++) {
313 return bd_op->emitOpError()
314 <<
"supports padding only for MM2S direction on MemTiles.";
317 input_strides, sizes, strides);
320 tile.getRow(), input_sizes, input_strides,
321 sizes, strides, isLinearTransfer))) {
325 iteration_size = sizes[3];
326 iteration_stride = strides[3];
328 if (!isLinearTransfer) {
331 d0stride = strides[0];
335 d1stride = strides[1];
338 d2stride = strides[2];
341 if (input_sizes[3] > 1 && input_strides[3] == 0) {
347 iteration_stride = 0;
354 uint64_t len_dims_addr_granularity = 1;
355 for (
size_t i = 0; i < 3; i++) {
356 len_dims_addr_granularity *= sizes[i];
358 if (len_dims_addr_granularity != len_addr_granularity) {
361 "Buffer descriptor length does not match length of transfer "
362 "expressed by lowest three dimensions of data layout "
363 "transformation strides/wraps. ")
364 <<
"BD length is " << (len_addr_granularity * addr_granularity / 8)
366 <<
"Lowest three dimensions of data layout transformation would "
367 "result in transfer of "
368 << (len_dims_addr_granularity * addr_granularity / 8) <<
" bytes. ";
369 err.attachNote() <<
"Do not include the highest dimension size in "
370 "transfer length, as this is the BD repeat count.";
374 if (padDims && target_model.isMemTile(tile.getCol(), tile.getRow()) &&
375 channelDir == AIE::DMAChannelDir::MM2S) {
376 return bd_op->emitOpError()
377 <<
"Padding requires n-d data layouts expressed as "
378 <<
"wrap(s) and stride(s).";
379 }
else if (padDims) {
380 return bd_op->emitOpError() <<
"Padding is supported only on MemTiles.";
384 uint32_t use_next_bd = 0;
385 uint32_t next_bd_id = 0;
386 if (bd_op.getNextBdId().has_value()) {
387 next_bd_id = bd_op.getNextBdId().value();
391 builder.create<NpuWriteBdOp>(
392 bd_op.getLoc(), tile.getCol(), bd_id, len_addr_granularity, offset, 0,
408 padBefore[1], padBefore[2],
409 padAfter[0], padAfter[1],
411 bd_op.getBurstLength());