181 uint32_t bd_id = bd_op.getBdId().value();
183 auto buf = bd_op.getBuffer();
184 uint64_t register_addr =
187 if (mlir::BlockArgument buf_arg =
188 llvm::dyn_cast<mlir::BlockArgument>(buf)) {
189 if (!target_model.
isShimNOCTile(tile.getCol(), tile.getRow())) {
190 return bd_op->emitOpError(
"DDR memory (runtime input arguments) can "
191 "only be referred to on shim tiles.");
193 unsigned arg_idx = buf_arg.getArgNumber();
194 int64_t offset = bd_op.getOffsetInBytes();
195 builder.create<NpuAddressPatchOp>(bd_op.getLoc(),
199 }
else if (AIE::BufferOp buffer =
200 llvm::dyn_cast<AIE::BufferOp>(buf.getDefiningOp())) {
202 if (!buffer.getAddress().has_value()) {
203 return bd_op->emitOpError(
204 "Cannot lower buffer without associated address. Run pass "
205 "--aie-assign-buffer-addresses first or manually assign an "
208 buf_addr = *buffer.getAddress();
209 builder.create<NpuWrite32Op>(bd_op.getLoc(), register_addr, buf_addr,
210 nullptr,
nullptr,
nullptr);
212 return bd_op->emitOpError(
"Buffer argument must be either a constant "
213 "aie.buffer or a runtime "
214 "sequence input argument.");
221 AIE::DMAChannelDir channelDir) {
224 MemRefType buffer_type = bd_op.getBuffer().getType();
225 uint32_t addr_granularity = target_model.getAddressGenGranularity();
227 uint32_t bd_id = bd_op.getBdId().value();
228 int64_t offset = bd_op.getOffsetInBytes();
229 uint64_t len = bd_op.getLenInBytes();
230 uint64_t len_addr_granularity = len * 8 / addr_granularity;
232 if (offset * 8 % addr_granularity != 0) {
233 return bd_op->emitOpError(
"Offset must be aligned to ")
234 << (addr_granularity / 8) <<
" byte boundary.";
237 if (len < addr_granularity / 8) {
238 return bd_op->emitOpError(
"Transfer size of ")
239 << len <<
" bytes falls below minimum hardware transfer unit of "
240 << (addr_granularity / 8) <<
" bytes.";
243 std::optional<llvm::ArrayRef<AIE::BDDimLayoutAttr>> dims =
244 bd_op.getDimensions();
245 llvm::SmallVector<int64_t, 4> sizes = llvm::SmallVector<int64_t, 4>(4, 0);
246 llvm::SmallVector<int64_t, 4> strides = llvm::SmallVector<int64_t, 4>(4, 0);
249 std::optional<llvm::ArrayRef<AIE::BDPadLayoutAttr>> padDims =
250 bd_op.getPadDimensions();
251 llvm::SmallVector<int64_t, 4> padBefore =
252 llvm::SmallVector<int64_t, 4>(4, 0);
253 llvm::SmallVector<int64_t, 4> padAfter =
254 llvm::SmallVector<int64_t, 4>(4, 0);
255 std::fill(padBefore.begin(), padBefore.end(), 0);
256 std::fill(padAfter.begin(), padAfter.end(), 0);
258 auto enable_packet = 0;
259 auto out_of_order_id = 0;
261 auto packet_type = 0;
268 auto iteration_size = 0;
269 auto iteration_stride = 0;
271 if (dims && dims->size() > 0) {
272 llvm::SmallVector<int64_t, 4> input_sizes =
273 llvm::SmallVector<int64_t, 4>(4, 1);
274 llvm::SmallVector<int64_t, 4> input_strides =
275 llvm::SmallVector<int64_t, 4>(4, 0);
276 if (dims->size() > 4) {
277 return bd_op->emitOpError(
"At most four data layout transformation "
278 "dimensions may be provided.");
281 for (
size_t i = 0; i < dims->size(); i++) {
285 int j = dims->size() - i - 1;
286 input_sizes[i] = (*dims)[j].getSize();
287 input_strides[i] = (*dims)[j].getStride();
292 bool isLinearTransfer = (input_sizes[0] >= 1) && (input_sizes[1] == 1) &&
293 (input_sizes[2] == 1);
295 if (dims->size() > 2) {
296 d2size = (target_model.isMemTile(tile.getCol(), tile.getRow()))
297 ? (*dims)[2].getSize()
300 if (padDims.has_value()) {
301 if (!target_model.isMemTile(tile.getCol(), tile.getRow()))
302 return bd_op->emitOpError()
303 <<
"Padding is only supported by memtile dma bds.";
304 if (padDims->size() > dims->size())
305 return bd_op->emitOpError()
306 <<
"Mismatch number of dimensions between padding(s)"
307 <<
" and wrap(s) and stride(s).";
308 if (channelDir == AIE::DMAChannelDir::MM2S) {
309 for (
size_t i = 0; i < padDims->size(); i++) {
310 int j = padDims->size() - i - 1;
311 padBefore[i] = (*padDims)[j].getConstPadBefore();
312 padAfter[i] = (*padDims)[j].getConstPadAfter();
314 for (
size_t i = padDims->size(); i < dims->size(); i++) {
319 return bd_op->emitOpError()
320 <<
"supports padding only for MM2S direction on MemTiles.";
323 input_strides, sizes, strides);
326 tile.getRow(), input_sizes, input_strides,
327 sizes, strides, isLinearTransfer))) {
331 iteration_size = sizes[3];
332 iteration_stride = strides[3];
334 if (!isLinearTransfer) {
337 d0stride = strides[0];
341 d1stride = strides[1];
344 d2stride = strides[2];
347 if (input_sizes[3] > 1 && input_strides[3] == 0) {
353 iteration_stride = 0;
360 uint64_t len_dims_addr_granularity = 1;
361 for (
size_t i = 0; i < 3; i++) {
362 len_dims_addr_granularity *= sizes[i];
364 if (len_dims_addr_granularity != len_addr_granularity) {
367 "Buffer descriptor length does not match length of transfer "
368 "expressed by lowest three dimensions of data layout "
369 "transformation strides/wraps. ")
370 <<
"BD length is " << (len_addr_granularity * addr_granularity / 8)
372 <<
"Lowest three dimensions of data layout transformation would "
373 "result in transfer of "
374 << (len_dims_addr_granularity * addr_granularity / 8) <<
" bytes. ";
375 err.attachNote() <<
"Do not include the highest dimension size in "
376 "transfer length, as this is the BD repeat count.";
380 if (padDims && target_model.isMemTile(tile.getCol(), tile.getRow()) &&
381 channelDir == AIE::DMAChannelDir::MM2S) {
382 return bd_op->emitOpError()
383 <<
"Padding requires n-d data layouts expressed as "
384 <<
"wrap(s) and stride(s).";
385 }
else if (padDims) {
386 return bd_op->emitOpError() <<
"Padding is supported only on MemTiles.";
390 uint32_t use_next_bd = 0;
391 uint32_t next_bd_id = 0;
392 if (bd_op.getNextBdId().has_value()) {
393 next_bd_id = bd_op.getNextBdId().value();
398 if (
auto packetInfo = bd_op.getPacket()) {
400 packet_type = packetInfo->getPktType();
401 packet_id = packetInfo->getPktId();
404 builder.create<NpuWriteBdOp>(
405 bd_op.getLoc(), tile.getCol(), bd_id, len_addr_granularity, offset,
424 padBefore[1], padBefore[2],
425 padAfter[0], padAfter[1],
427 bd_op.getBurstLength());