16#include "mlir/Pass/Pass.h"
17#include "mlir/Transforms/DialectConversion.h"
28 using OpConversionPattern::OpConversionPattern;
30 Write32SymToAddr(MLIRContext *context, PatternBenefit benefit = 1)
34 matchAndRewrite(NpuWrite32Op op, OpAdaptor adaptor,
35 ConversionPatternRewriter &rewriter)
const override {
40 auto device = op->getParentOfType<AIE::DeviceOp>();
41 auto buffer = device.lookupSymbol<AIE::BufferOp>(*op.getBuffer());
43 return op->emitError(
"buffer '" + *op.getBuffer() +
44 "' not found in device");
46 if (!buffer.getAddress())
47 return op->emitError(
"buffer must have address assigned");
50 uint32_t address =
static_cast<uint32_t
>(*buffer.getAddress()) +
51 op.getAddress() *
sizeof(uint32_t);
52 auto col = buffer.getTileOp().getCol();
53 auto row = buffer.getTileOp().getRow();
55 ((row & 0xff) << tm.
getRowShift()) | (address & 0xFFFFF);
57 rewriter.replaceOpWithNewOp<NpuWrite32Op>(op, address, op.getValue(),
58 nullptr,
nullptr,
nullptr);
64 using OpConversionPattern::OpConversionPattern;
66 BlockWriteSymToAddr(MLIRContext *context, PatternBenefit benefit = 1)
70 matchAndRewrite(NpuBlockWriteOp op, OpAdaptor adaptor,
71 ConversionPatternRewriter &rewriter)
const override {
76 auto device = op->getParentOfType<AIE::DeviceOp>();
78 auto buffer = device.lookupSymbol<AIE::BufferOp>(*op.getBuffer());
80 return op->emitError(
"buffer '" + *op.getBuffer() +
81 "' not found in device");
83 if (!buffer.getAddress())
84 return op->emitError(
"buffer must have address assigned");
87 uint32_t address =
static_cast<uint32_t
>(*buffer.getAddress()) +
88 op.getAddress() *
sizeof(uint32_t);
89 auto col = buffer.getTileOp().getCol();
90 auto row = buffer.getTileOp().getRow();
92 ((row & 0xff) << tm.
getRowShift()) | (address & 0xFFFFF);
94 rewriter.replaceOpWithNewOp<NpuBlockWriteOp>(op, address, op.getData(),
95 nullptr,
nullptr,
nullptr);
101 using OpConversionPattern::OpConversionPattern;
103 MaskWrite32SymToAddr(MLIRContext *context, PatternBenefit benefit = 1)
107 matchAndRewrite(NpuMaskWrite32Op op, OpAdaptor adaptor,
108 ConversionPatternRewriter &rewriter)
const override {
113 auto device = op->getParentOfType<AIE::DeviceOp>();
115 auto buffer = device.lookupSymbol<AIE::BufferOp>(*op.getBuffer());
117 return op->emitError(
"buffer '" + *op.getBuffer() +
118 "' not found in device");
120 if (!buffer.getAddress())
121 return op->emitError(
"buffer must have address assigned");
124 uint32_t address =
static_cast<uint32_t
>(*buffer.getAddress()) +
125 op.getAddress() *
sizeof(uint32_t);
126 auto col = buffer.getTileOp().getCol();
127 auto row = buffer.getTileOp().getRow();
129 ((row & 0xff) << tm.
getRowShift()) | (address & 0xFFFFF);
131 rewriter.replaceOpWithNewOp<NpuMaskWrite32Op>(
132 op, address, op.getValue(), op.getMask(),
nullptr,
nullptr,
nullptr);
138 using OpConversionPattern::OpConversionPattern;
140 RtpToWrite32Pattern(MLIRContext *context, PatternBenefit benefit = 1)
144 matchAndRewrite(NpuWriteRTPOp op, OpAdaptor adaptor,
145 ConversionPatternRewriter &rewriter)
const override {
147 auto device = op->getParentOfType<AIE::DeviceOp>();
149 auto buffer = device.lookupSymbol<AIE::BufferOp>(op.getBuffer());
151 op->emitError(
"buffer '" + op.getBuffer() +
"' not found in device");
155 if (!buffer.getAddress()) {
156 op->emitError(
"buffer must have address assigned");
159 AIE::TileOp tile = buffer.getTileOp();
161 uint32_t idx = op.getIndex() *
sizeof(uint32_t);
162 uint32_t address = buffer.getAddress().value() + idx;
164 rewriter.create<NpuWrite32Op>(op->getLoc(), address, op.getValue(),
nullptr,
165 rewriter.getI32IntegerAttr(tile.getCol()),
166 rewriter.getI32IntegerAttr(tile.getRow()));
168 rewriter.eraseOp(op);
176 using OpConversionPattern::OpConversionPattern;
178 PushQueuetoWrite32Pattern(MLIRContext *context, PatternBenefit benefit = 1)
182 matchAndRewrite(NpuPushQueueOp op, OpAdaptor adaptor,
183 ConversionPatternRewriter &rewriter)
const override {
185 auto column = rewriter.getI32IntegerAttr(op.getColumn());
186 auto row = rewriter.getI32IntegerAttr(0);
187 bool isMM2S = op.getDirection() == AIE::DMAChannelDir::MM2S;
190 if (op.getIssueToken()) {
193 AIE::TileOp shimTile = AIE::TileOp::getOrCreate(
194 rewriter, op->getParentOfType<AIE::DeviceOp>(), op.getColumn(), 0);
195 if (shimTile->hasAttr(
"controller_id")) {
196 uint32_t ctrl_offset = isMM2S ? 0x1D210 : 0x1D200;
197 if (op.getChannel() == 1)
199 AIE::PacketInfoAttr controller_id_attr =
200 shimTile->getAttrOfType<AIE::PacketInfoAttr>(
"controller_id");
201 uint32_t data = controller_id_attr.getPktId() << 8;
202 uint32_t mask = 0x00000F00;
203 rewriter.create<NpuMaskWrite32Op>(op->getLoc(), ctrl_offset, data, mask,
204 nullptr, column,
row);
209 uint32_t queue_offset = isMM2S ? 0x1D214 : 0x1D204;
210 if (op.getChannel() == 1)
214 uint32_t bd_id = op.getBdId();
215 uint32_t repeat_cnt = op.getRepeatCount();
218 cmd |= (repeat_cnt & 0xFF) << 16;
219 if (op.getIssueToken())
222 rewriter.create<NpuWrite32Op>(op->getLoc(), queue_offset, cmd,
nullptr,
224 rewriter.eraseOp(op);
230 using OpConversionPattern::OpConversionPattern;
237 PatternBenefit benefit = 1)
241 matchAndRewrite(NpuDmaMemcpyNdOp op, OpAdaptor adaptor,
242 ConversionPatternRewriter &rewriter)
const override {
244 BaseMemRefType bufferType = op.getMemref().getType();
245 auto *ctx = op->getContext();
246 auto i32ty = IntegerType::get(ctx, 32);
247 auto zero = IntegerAttr::get(i32ty, 0);
248 auto memref = adaptor.getMemref();
250 auto dev = op->getParentOfType<AIE::DeviceOp>();
254 auto infoOp = allocGetter.
get(dev, op.getMetadata());
256 return op->emitOpError(
"couldn't find shim_dma_allocation op.");
259 auto channelDir = infoOp->getChannelDir();
260 bool isMM2S = channelDir == AIE::DMAChannelDir::MM2S;
261 int col = infoOp->getCol();
266 auto buffer_length = zero;
267 auto buffer_offset = zero;
268 auto enable_packet = zero;
269 auto out_of_order_id = zero;
270 auto packet_id = zero;
271 auto packet_type = zero;
273 auto d0_stride = zero;
275 auto d1_stride = zero;
277 auto d2_stride = zero;
278 auto iteration_current = zero;
279 auto iteration_size = zero;
280 auto iteration_stride = zero;
283 auto use_next_bd = zero;
284 auto valid_bd = zero;
285 auto lock_rel_val = zero;
286 auto lock_rel_id = zero;
287 auto lock_acq_enable = zero;
288 auto lock_acq_val = zero;
289 auto lock_acq_id = zero;
290 auto d0_zero_before = zero;
291 auto d1_zero_before = zero;
292 auto d2_zero_before = zero;
293 auto d0_zero_after = zero;
294 auto d1_zero_after = zero;
295 auto d2_zero_after = zero;
296 auto burst_length = zero;
298 auto issue_token = BoolAttr::get(ctx,
false);
299 auto repeat_count = zero;
300 llvm::SmallVector<int64_t, 4> inputSizes = llvm::map_to_vector(
301 llvm::reverse(op.getMixedSizes()),
302 [](OpFoldResult s) { return getConstantIntValue(s).value(); });
303 llvm::SmallVector<int64_t, 4> inputStrides = llvm::map_to_vector(
304 llvm::reverse(op.getMixedStrides()),
305 [](OpFoldResult s) { return getConstantIntValue(s).value(); });
306 llvm::SmallVector<int64_t, 4> sizes(4);
307 llvm::SmallVector<int64_t, 4> strides(4);
310 int64_t offset = op.getOffsetInBytes();
313 column = IntegerAttr::get(i32ty, col);
316 row = IntegerAttr::get(i32ty, 0);
318 bool skipTransformationChecks = op.isLinearTransferWithoutTransformation();
320 inputStrides, sizes, strides,
321 skipTransformationChecks))) {
326 AIEX::RuntimeSequenceOp seq_op =
327 op->getParentOfType<AIEX::RuntimeSequenceOp>();
329 op->emitOpError(
"NpuDmaMemcpyNdOps must have RuntimeSequenceOp parent at "
330 "time of lowering.");
333 Block &entryBB = seq_op.getBody().front();
335 for (
int i = 0, e = entryBB.getNumArguments(); i < e; i++) {
336 if (entryBB.getArgument(i) == memref) {
345 bd_id = IntegerAttr::get(i32ty, op.getId());
348 uint64_t buffer_length_val = inputSizes[0] *
349 bufferType.getElementTypeBitWidth() /
350 targetModel.getAddressGenGranularity();
351 if (inputSizes.size() > 1) {
352 for (
size_t i = 1; i < std::min(inputSizes.size(), (
size_t)3); i++) {
353 buffer_length_val *= inputSizes[i];
356 buffer_length = IntegerAttr::get(i32ty, buffer_length_val);
359 buffer_offset = IntegerAttr::get(i32ty, 0);
362 if (
auto packetInfo = op.getPacket()) {
363 enable_packet = IntegerAttr::get(i32ty, 1);
364 packet_type = IntegerAttr::get(i32ty, packetInfo->getPktType());
365 packet_id = IntegerAttr::get(i32ty, packetInfo->getPktId());
370 if (!op.isLinearTransferWithoutTransformation()) {
372 d0_size = IntegerAttr::get(i32ty, sizes[0]);
373 d0_stride = IntegerAttr::get(i32ty, strides[0]);
376 d1_size = IntegerAttr::get(i32ty, sizes[1]);
377 d1_stride = IntegerAttr::get(i32ty, strides[1]);
380 d2_stride = IntegerAttr::get(i32ty, strides[2]);
383 if (targetModel.isMemTile(col, 0))
384 d2_size = IntegerAttr::get(i32ty, sizes[2]);
386 d2_size = IntegerAttr::get(i32ty, 0);
389 if (inputSizes[3] > 1) {
390 if (inputStrides[3] > 0) {
391 iteration_size = IntegerAttr::get(i32ty, sizes[3]);
392 iteration_stride = IntegerAttr::get(i32ty, strides[3]);
398 iteration_size = zero;
399 iteration_stride = zero;
402 repeat_count = IntegerAttr::get(i32ty, sizes[3]);
409 valid_bd = IntegerAttr::get(i32ty, 1);
422 d0_zero_before = IntegerAttr::get(i32ty, op.getD0ZeroBefore());
425 d1_zero_before = IntegerAttr::get(i32ty, op.getD1ZeroBefore());
428 d2_zero_before = IntegerAttr::get(i32ty, op.getD2ZeroBefore());
431 d0_zero_after = IntegerAttr::get(i32ty, op.getD0ZeroAfter());
434 d1_zero_after = IntegerAttr::get(i32ty, op.getD1ZeroAfter());
437 d2_zero_after = IntegerAttr::get(i32ty, op.getD2ZeroAfter());
440 burst_length = IntegerAttr::get(i32ty, op.getBurstLength());
443 issue_token = BoolAttr::get(ctx, op.getIssueToken());
447 issue_token = BoolAttr::get(ctx,
true);
449 if (targetModel.isMemTile(col, 0) && (!isMM2S) &&
450 (op.getD0ZeroBefore() != 0 || op.getD0ZeroAfter() != 0 ||
451 op.getD1ZeroBefore() != 0 || op.getD1ZeroAfter() != 0 ||
452 op.getD2ZeroBefore() != 0 || op.getD2ZeroAfter() != 0))
453 op->emitOpError(
"MemTile supports zero padding only on MM2S direction");
455 rewriter.create<NpuWriteBdOp>(
456 op->getLoc(), column, bd_id, buffer_length, buffer_offset,
457 enable_packet, out_of_order_id, packet_id, packet_type, d0_size,
458 d0_stride, d1_size, d1_stride, d2_size, d2_stride, iteration_current,
459 iteration_size, iteration_stride, next_bd,
row, use_next_bd, valid_bd,
460 lock_rel_val, lock_rel_id, lock_acq_enable, lock_acq_val, lock_acq_id,
461 d0_zero_before, d1_zero_before, d2_zero_before, d0_zero_after,
462 d1_zero_after, d2_zero_after, burst_length);
465 targetModel, op.getId(), col, 0);
467 rewriter.create<NpuAddressPatchOp>(op->getLoc(), addr, arg_idx, offset);
469 rewriter.create<NpuPushQueueOp>(
470 op->getLoc(), column,
row, infoOp->getChannelDirAttr(),
471 infoOp->getChannelIndexAttr(), issue_token, repeat_count, bd_id);
473 rewriter.eraseOp(op);
487 using OpConversionPattern::OpConversionPattern;
489 DmaWaitToSyncPattern(MLIRContext *context,
491 PatternBenefit benefit = 1)
495 matchAndRewrite(NpuDmaWaitOp op, OpAdaptor adaptor,
496 ConversionPatternRewriter &rewriter)
const override {
497 AIE::DeviceOp dev = op->getParentOfType<AIE::DeviceOp>();
499 return op->emitError(
"couldn't find parent of type DeviceOp");
501 std::optional<AIE::ShimDMAAllocationOp> shimDmaAllocOp =
502 allocGetter.
get(dev, op.getSymbol());
503 if (!shimDmaAllocOp) {
504 return op->emitError(
"couldn't find shim_dma_allocation op");
509 (void)rewriter.replaceOpWithNewOp<NpuSyncOp>(
510 op, shimDmaAllocOp->getCol(), 0,
511 static_cast<uint32_t
>(shimDmaAllocOp->getChannelDir()),
512 shimDmaAllocOp->getChannelIndex(), 1, 1);
519 using OpConversionPattern::OpConversionPattern;
525 WriteBdToBlockWritePattern(MLIRContext *context,
int &cachedId,
526 PatternBenefit benefit = 1)
530 matchAndRewrite(NpuWriteBdOp op, OpAdaptor adaptor,
531 ConversionPatternRewriter &rewriter)
const override {
533 AIE::DeviceOp dev = op->getParentOfType<AIE::DeviceOp>();
536 std::vector<uint32_t> words(8, 0);
537 uint32_t bd_id = op.getBdId();
539 if (tm.isShimNOCTile(op.getColumn(), op.getRow())) {
540 bd_addr = (op.getColumn() << tm.getColumnShift()) |
541 (op.getRow() << tm.getRowShift()) | (0x1D000 + bd_id * 0x20);
544 words[0] = op.getBufferLength();
547 words[1] = op.getBufferOffset();
551 words[2] |= (op.getEnablePacket() & 0x1) << 30;
552 words[2] |= (op.getOutOfOrderId() & 0x3f) << 24;
553 words[2] |= (op.getPacketId() & 0x1f) << 19;
554 words[2] |= (op.getPacketType() & 0x7) << 16;
558 words[3] |= (op.getD0Size() & 0x3ff) << 20;
559 words[3] |= op.getD0Stride() & 0xfffff;
564 words[4] |= (op.getD1Size() & 0x3ff) << 20;
565 words[4] |= op.getD1Stride() & 0xfffff;
569 words[5] = op.getD2Stride() & 0xfffff;
572 words[6] |= (op.getIterationCurrent() & 0x3f) << 26;
573 words[6] |= (op.getIterationSize() & 0x3f) << 20;
574 words[6] |= op.getIterationStride() & 0xfffff;
578 words[7] |= (op.getNextBd() & 0xf) << 27;
579 words[7] |= (op.getUseNextBd() & 0x1) << 26;
580 words[7] |= (op.getValidBd() & 0x1) << 25;
581 words[7] |= (op.getLockRelVal() & 0xef) << 18;
582 words[7] |= (op.getLockRelId() & 0xf) << 13;
583 words[7] |= (op.getLockAcqEnable() & 0x1) << 12;
584 words[7] |= (op.getLockAcqVal() & 0xef) << 5;
585 words[7] |= op.getLockAcqId() & 0xf;
587 if (op.getD0ZeroBefore() || op.getD1ZeroBefore() ||
588 op.getD2ZeroBefore() || op.getD0ZeroAfter() || op.getD1ZeroAfter() ||
589 op.getD2ZeroAfter()) {
590 op->emitError(
"Zero padding is only available on MemTile");
592 }
else if (tm.isMemTile(op.getColumn(), op.getRow())) {
593 bd_addr = (op.getColumn() << tm.getColumnShift()) |
594 (op.getRow() << tm.getRowShift()) | (0xA0000 + bd_id * 0x20);
596 words[0] |= (op.getEnablePacket() & 0x1) << 31;
597 words[0] |= (op.getPacketType() & 0x7) << 28;
598 words[0] |= (op.getPacketId() & 0x1f) << 23;
599 words[0] |= (op.getOutOfOrderId() & 0x3f) << 17;
600 words[0] |= op.getBufferLength() & 0x1ffff;
603 words[1] |= (op.getD0ZeroBefore() & 0x3F) << 26;
604 words[1] |= (op.getNextBd() & 0x3f) << 20;
605 words[1] |= (op.getUseNextBd() & 0x1) << 19;
606 words[1] |= op.getBufferOffset() & 0x7ffff;
609 words[2] |= (op.getD0Size() & 0x3ff) << 17;
610 words[2] |= op.getD0Stride() & 0x1ffff;
614 words[3] |= (op.getD1ZeroBefore() & 0x1F) << 27;
615 words[3] |= (op.getD1Size() & 0x3ff) << 17;
616 words[3] |= op.getD1Stride() & 0x1ffff;
620 words[4] |= (op.getD2ZeroBefore() & 0xF) << 27;
621 words[4] |= op.getD2Stride() & 0x1ffff;
625 words[5] |= (op.getD2ZeroAfter() & 0xF) << 28;
626 words[5] |= (op.getD1ZeroAfter() & 0x1F) << 23;
627 words[5] |= (op.getD0ZeroAfter() & 0x3F) << 17;
630 words[6] |= (op.getIterationCurrent() & 0x3f) << 23;
631 words[6] |= (op.getIterationSize() & 0x3f) << 17;
632 words[6] |= op.getIterationStride() & 0x1ffff;
635 words[7] |= (op.getValidBd() & 0x1) << 31;
636 words[7] |= (op.getLockRelVal() & 0x7f) << 24;
637 words[7] |= (op.getLockRelId() & 0xff) << 16;
638 words[7] |= (op.getLockAcqEnable() & 0x1) << 15;
639 words[7] |= (op.getLockAcqVal() & 0x7f) << 8;
640 words[7] |= op.getLockAcqId() & 0xff;
643 op->emitError(
"Run-time DMA configuration is supported only for "
644 "ShimTiles and MemTiles currently.");
648 MemRefType memrefType = MemRefType::get({8}, rewriter.getI32Type());
649 TensorType tensorType = RankedTensorType::get({8}, rewriter.getI32Type());
650 memref::GlobalOp global =
nullptr;
651 auto initVal = DenseElementsAttr::get<uint32_t>(tensorType, words);
652 auto otherGlobals = dev.getOps<memref::GlobalOp>();
653 for (
auto g : otherGlobals) {
656 if (g.getType() != memrefType)
658 auto otherValue = g.getInitialValue();
661 if (*otherValue != initVal)
667 OpBuilder::InsertionGuard guard(rewriter);
668 rewriter.setInsertionPoint(
669 op->getParentOfType<AIEX::RuntimeSequenceOp>());
670 std::string name =
"blockwrite_data_";
671 while (dev.lookupSymbol(name + std::to_string(cachedId)))
673 name += std::to_string(cachedId);
674 global = rewriter.create<memref::GlobalOp>(
675 op->getLoc(), name, rewriter.getStringAttr(
"private"), memrefType,
676 initVal,
true,
nullptr);
678 auto memref = rewriter.create<memref::GetGlobalOp>(op->getLoc(), memrefType,
680 (void)rewriter.replaceOpWithNewOp<NpuBlockWriteOp>(
681 op, rewriter.getUI32IntegerAttr(bd_addr), memref.getResult(),
nullptr,
687int WriteBdToBlockWritePattern::cachedId = 0;
689struct AIEDmaToNpuPass : AIEDmaToNpuBase<AIEDmaToNpuPass> {
691 void getDependentDialects(DialectRegistry ®istry)
const override {
692 registry.insert<memref::MemRefDialect>();
695 void runOnOperation()
override {
699 AIE::DeviceOp device = getOperation();
701 ConversionTarget target(getContext());
702 target.addLegalDialect<AIEXDialect>();
703 target.addLegalDialect<memref::MemRefDialect>();
704 target.addLegalOp<AIE::BufferOp>();
705 target.addLegalOp<AIE::ShimDMAAllocationOp>();
706 target.addLegalOp<AIE::TileOp>();
708 target.addIllegalOp<NpuDmaMemcpyNdOp>();
709 target.addIllegalOp<NpuDmaWaitOp>();
710 target.addIllegalOp<NpuPushQueueOp>();
711 target.addIllegalOp<NpuWriteRTPOp>();
712 target.addIllegalOp<NpuWriteBdOp>();
713 target.addDynamicallyLegalOp<NpuWrite32Op>(
714 [&](NpuWrite32Op op) {
return !op.getBuffer(); });
715 target.addDynamicallyLegalOp<NpuBlockWriteOp>(
716 [&](NpuBlockWriteOp op) {
return !op.getBuffer(); });
717 target.addDynamicallyLegalOp<NpuMaskWrite32Op>(
718 [&](NpuMaskWrite32Op op) {
return !op.getBuffer(); });
720 RewritePatternSet patterns(&getContext());
721 patterns.insert<BlockWriteSymToAddr>(&getContext());
722 patterns.insert<DmaToNpuPattern>(&getContext(), cachingGetter);
723 patterns.insert<DmaWaitToSyncPattern>(&getContext(), cachingGetter);
724 patterns.insert<MaskWrite32SymToAddr>(&getContext());
725 patterns.insert<PushQueuetoWrite32Pattern>(&getContext());
726 patterns.insert<RtpToWrite32Pattern>(&getContext());
727 patterns.insert<Write32SymToAddr>(&getContext());
728 patterns.insert<WriteBdToBlockWritePattern>(&getContext());
730 if (failed(applyPartialConversion(device, target, std::move(patterns))))
738 return std::make_unique<AIEDmaToNpuPass>();
virtual uint32_t getColumnShift() const =0
virtual uint32_t getRowShift() const =0
std::unique_ptr< mlir::OperationPass< AIE::DeviceOp > > createAIEDmaToNpuPass()
void getHardwareStridesWraps(const AIE::AIETargetModel &targetModel, mlir::BaseMemRefType referencedBufType, llvm::SmallVector< int64_t, 4 > inputSizes, llvm::SmallVector< int64_t, 4 > inputStrides, llvm::SmallVector< int64_t, 4 > &sizes, llvm::SmallVector< int64_t, 4 > &strides)
uint64_t getBufferDescriptorAddressRegisterAddress(const AIE::AIETargetModel &tm, unsigned bd_id, unsigned col, unsigned row)
mlir::LogicalResult verifyStridesWraps(mlir::Operation *forOp, mlir::BaseMemRefType referencedBufType, int tileCol, int tileRow, llvm::SmallVector< int64_t, 4 > inputSizes, llvm::SmallVector< int64_t, 4 > inputStrides, llvm::SmallVector< int64_t, 4 > hardwareSizes, llvm::SmallVector< int64_t, 4 > hardwareStrides, bool skipTransformationChecks=false)
uint32_t getShimBurstLengthEncoding(const AIE::AIETargetModel &tm, uint32_t burstLength)
const AIETargetModel & getTargetModel(mlir::Operation *op)
std::optional< AIE::ShimDMAAllocationOp > get(DeviceOp dev, mlir::StringRef sym_name)