210 uint32_t bd_id = bd_op.getBdId().value();
212 auto buf = bd_op.getBuffer();
213 auto col = tile.getCol();
214 auto row = tile.getRow();
215 uint64_t register_addr = target_model.
getDmaBdAddress(col, row, bd_id) +
222 mlir::BlockArgument buf_arg =
nullptr;
225 if (
auto directArg = llvm::dyn_cast<mlir::BlockArgument>(buf)) {
229 buf_arg = traceResult->rootArg;
230 offset = traceResult->offsetInBytes;
234 if (!target_model.
isShimNOCTile(tile.getCol(), tile.getRow())) {
235 return bd_op->emitOpError(
"DDR memory (runtime input arguments) can "
236 "only be referred to on shim tiles.");
239 unsigned arg_idx = buf_arg.getArgNumber();
240 offset += bd_op.getOffsetInBytes();
241 NpuAddressPatchOp::create(builder, bd_op.getLoc(),
245 }
else if (AIE::BufferOp buffer =
246 llvm::dyn_cast<AIE::BufferOp>(buf.getDefiningOp())) {
248 if (!buffer.getAddress().has_value()) {
249 return bd_op->emitOpError(
250 "Cannot lower buffer without associated address. Run pass "
251 "--aie-assign-buffer-addresses first or manually assign an "
254 buf_addr = *buffer.getAddress();
255 buf_addr += bd_op.getOffsetInBytes();
257 NpuMaskWrite32Op::create(builder, bd_op.getLoc(), register_addr,
258 (buf_addr / 4) << 14, 0x0fffc000,
nullptr,
260 }
else if (target_model.
isMemTile(col, row)) {
268 col, row, buffer.getTileOp().getCol(),
269 buffer.getTileOp().getRow());
271 buf_addr += addrOffset.value();
273 NpuMaskWrite32Op::create(builder, bd_op.getLoc(), register_addr,
274 buf_addr / 4, 0x0007FFFF,
nullptr,
nullptr,
277 NpuWrite32Op::create(builder, bd_op.getLoc(), register_addr, buf_addr,
278 nullptr,
nullptr,
nullptr);
281 return bd_op->emitOpError(
282 "Buffer argument must be a constant aie.buffer, a runtime sequence "
283 "input argument, or a (chain of) subview(s) or cast(s) of a block "
284 "argument with constant offsets and strides equal to one.");
291 AIE::DMAChannelDir channelDir,
292 std::optional<xilinx::AIE::PacketInfoAttr> packet) {
295 auto buffer_type = llvm::cast<BaseMemRefType>(bd_op.getBuffer().getType());
296 uint32_t addr_granularity = target_model.getAddressGenGranularity();
298 uint32_t bd_id = bd_op.getBdId().value();
299 int64_t offset = bd_op.getOffsetInBytes();
300 uint64_t len = bd_op.getLenInBytes();
301 uint64_t len_addr_granularity = len * 8 / addr_granularity;
303 if (offset * 8 % addr_granularity != 0) {
304 return bd_op->emitOpError(
"Offset must be aligned to ")
305 << (addr_granularity / 8) <<
" byte boundary.";
308 if (len < addr_granularity / 8) {
309 return bd_op->emitOpError(
"Transfer size of ")
310 << len <<
" bytes falls below minimum hardware transfer unit of "
311 << (addr_granularity / 8) <<
" bytes.";
314 std::optional<llvm::ArrayRef<AIE::BDDimLayoutAttr>> dims =
315 bd_op.getDimensions();
316 llvm::SmallVector<int64_t, 4> sizes = llvm::SmallVector<int64_t, 4>(4, 0);
317 llvm::SmallVector<int64_t, 4> strides = llvm::SmallVector<int64_t, 4>(4, 0);
320 std::optional<llvm::ArrayRef<AIE::BDPadLayoutAttr>> padDims =
321 bd_op.getPadDimensions();
322 llvm::SmallVector<int64_t, 4> padBefore =
323 llvm::SmallVector<int64_t, 4>(4, 0);
324 llvm::SmallVector<int64_t, 4> padAfter =
325 llvm::SmallVector<int64_t, 4>(4, 0);
326 std::fill(padBefore.begin(), padBefore.end(), 0);
327 std::fill(padAfter.begin(), padAfter.end(), 0);
329 auto enable_packet = 0;
330 auto out_of_order_id = 0;
332 auto packet_type = 0;
339 auto iteration_size = 0;
340 auto iteration_stride = 0;
342 if (dims && dims->size() > 0) {
343 llvm::SmallVector<int64_t, 4> input_sizes =
344 llvm::SmallVector<int64_t, 4>(4, 1);
345 llvm::SmallVector<int64_t, 4> input_strides =
346 llvm::SmallVector<int64_t, 4>(4, 0);
347 if (dims->size() > 4) {
348 return bd_op->emitOpError(
"At most four data layout transformation "
349 "dimensions may be provided.");
352 for (
size_t i = 0; i < dims->size(); i++) {
356 int j = dims->size() - i - 1;
357 input_sizes[i] = (*dims)[j].getSize();
358 input_strides[i] = (*dims)[j].getStride();
365 if (dims->size() > 2) {
366 d2size = (target_model.isMemTile(tile.getCol(), tile.getRow()))
367 ? (*dims)[2].getSize()
370 if (padDims.has_value()) {
371 if (!target_model.isMemTile(tile.getCol(), tile.getRow()))
372 return bd_op->emitOpError()
373 <<
"Padding is only supported by memtile dma bds.";
374 if (padDims->size() > dims->size())
375 return bd_op->emitOpError()
376 <<
"Mismatch number of dimensions between padding(s)"
377 <<
" and wrap(s) and stride(s).";
378 if (channelDir == AIE::DMAChannelDir::MM2S) {
379 for (
size_t i = 0; i < padDims->size(); i++) {
380 int j = padDims->size() - i - 1;
381 padBefore[i] = (*padDims)[j].getConstPadBefore();
382 padAfter[i] = (*padDims)[j].getConstPadAfter();
384 for (
size_t i = padDims->size(); i < dims->size(); i++) {
389 return bd_op->emitOpError()
390 <<
"supports padding only for MM2S direction on MemTiles.";
393 input_strides, sizes, strides);
396 tile.getRow(), input_sizes, input_strides,
401 iteration_size = sizes[3];
402 iteration_stride = strides[3];
407 d0stride = strides[0];
411 d1stride = strides[1];
414 d2stride = strides[2];
417 if (input_sizes[3] > 1 && input_strides[3] == 0) {
423 iteration_stride = 0;
430 uint64_t len_dims_addr_granularity = 1;
431 for (
size_t i = 0; i < 3; i++) {
432 len_dims_addr_granularity *= sizes[i];
434 if (len_dims_addr_granularity != len_addr_granularity) {
437 "Buffer descriptor length does not match length of transfer "
438 "expressed by lowest three dimensions of data layout "
439 "transformation strides/wraps. ")
440 <<
"BD length is " << (len_addr_granularity * addr_granularity / 8)
442 <<
"Lowest three dimensions of data layout transformation would "
443 "result in transfer of "
444 << (len_dims_addr_granularity * addr_granularity / 8) <<
" bytes. ";
445 err.attachNote() <<
"Do not include the highest dimension size in "
446 "transfer length, as this is the BD repeat count.";
450 if (padDims && target_model.isMemTile(tile.getCol(), tile.getRow()) &&
451 channelDir == AIE::DMAChannelDir::MM2S) {
452 return bd_op->emitOpError()
453 <<
"Padding requires n-d data layouts expressed as "
454 <<
"wrap(s) and stride(s).";
455 }
else if (padDims) {
456 return bd_op->emitOpError() <<
"Padding is supported only on MemTiles.";
460 uint32_t use_next_bd = 0;
461 uint32_t next_bd_id = 0;
462 if (bd_op.getNextBdId().has_value()) {
463 next_bd_id = bd_op.getNextBdId().value();
469 auto info = bd_op.getPacket().value_or(packet.value_or(
nullptr));
472 packet_type = info.getPktType();
473 packet_id = info.getPktId();
477 int32_t lock_rel_val = 0;
478 int32_t lock_rel_id = 0;
479 int32_t lock_acq_enable = 0;
480 int32_t lock_acq_val = 0;
481 int32_t lock_acq_id = 0;
485 auto [acquire_op, release_op] = *lock_ops;
488 AIE::LockOp acq_lock = acquire_op.getLockOp();
489 AIE::LockOp rel_lock = release_op.getLockOp();
491 if (acq_lock.getLockID().has_value()) {
492 lock_acq_id = acq_lock.getLockID().value();
493 lock_acq_val = acquire_op.getLockValue();
496 if (acquire_op.acquireGE())
497 lock_acq_val = -lock_acq_val;
501 if (rel_lock.getLockID().has_value()) {
502 lock_rel_id = rel_lock.getLockID().value();
503 lock_rel_val = release_op.getLockValue();
508 if (target_model.isMemTile(tile.getCol(), tile.getRow())) {
509 auto lockOffset = target_model.getLockLocalBaseIndex(
510 tile.getCol(), tile.getRow(), acq_lock.colIndex(),
511 acq_lock.rowIndex());
512 if (lockOffset && acq_lock.getLockID().has_value())
513 lock_acq_id += lockOffset.value();
514 if (lockOffset && rel_lock.getLockID().has_value())
515 lock_rel_id += lockOffset.value();
519 NpuWriteBdOp::create(
520 builder, bd_op.getLoc(), tile.getCol(), bd_id, len_addr_granularity,
535 lock_rel_val, lock_rel_id,
537 lock_acq_val, lock_acq_id,
539 padBefore[1], padBefore[2],
540 padAfter[0], padAfter[1],
542 bd_op.getBurstLength());