66 DeviceOp device = getOperation();
67 const auto &targetModel = device.getTargetModel();
68 auto ctx = device->getContext();
69 auto loc = device->getLoc();
71 if (targetModel.getTargetArch() == AIEArch::AIE1)
74 SmallVector<Operation *> erased;
75 auto sequenceOps = device.getOps<AIE::RuntimeSequenceOp>();
76 for (
auto f : sequenceOps) {
78 auto controlPacketOps = f.getOps<AIEX::NpuControlPacketOp>();
79 if (controlPacketOps.empty())
87 AIE::RuntimeSequenceOp::create(builder, loc, f.getSymNameAttr());
88 newSeq.getBody().push_back(
new Block);
91 for (
auto arg : f.getBody().getArguments()) {
93 auto newArg = newSeq.getBody().addArgument(arg.getType(), arg.getLoc());
95 arg.replaceAllUsesWith(newArg);
97 mapping.map(arg, newArg);
101 auto ctrlPktMemrefType = MemRefType::get(
102 ShapedType::kDynamic, IntegerType::get(ctx, 32),
nullptr, 0);
103 auto newBlockArg = newSeq.getBody().addArgument(ctrlPktMemrefType, loc);
105 builder.setInsertionPointToStart(&newSeq.getBody().front());
112 std::string shimDmaAllocName;
116 std::vector<BatchInfo> batches;
118 int64_t ddrOffset = 0;
119 Block &entry = f.getBody().front();
122 bool new_batch =
true;
123 for (Operation &o : entry) {
124 auto ctrlPktOp = dyn_cast<NpuControlPacketOp>(&o);
131 int col = ctrlPktOp.getColumnFromAddr();
132 int row = ctrlPktOp.getRowFromAddr();
135 int64_t ctrlPktSize = 0;
136 auto data = ctrlPktOp.getData();
138 ctrlPktSize = data->size();
139 else if (ctrlPktOp.getLength())
140 ctrlPktSize = *ctrlPktOp.getLength();
145 if (targetModel.getTargetArch() == AIEArch::AIE2p && !new_batch &&
146 batches.back().tileId ==
TileID{col, row}) {
148 batches.back().totalSize += ctrlPktSize;
151 auto rowToShimChanMap =
153 int shimChan = rowToShimChanMap[
row];
155 std::string shimDmaAllocName =
"ctrlpkt";
156 shimDmaAllocName +=
"_col" + std::to_string(
col);
157 shimDmaAllocName +=
"_mm2s";
158 shimDmaAllocName +=
"_chan" + std::to_string(shimChan);
160 batches.push_back({
TileID{
col,
row}, ddrOffset, ctrlPktSize,
161 shimDmaAllocName, shimChan, &o});
164 ddrOffset += ctrlPktSize;
168 auto batchIt = batches.begin();
170 for (Operation &o : entry) {
171 auto ctrlPktOp = dyn_cast<NpuControlPacketOp>(&o);
173 builder.clone(o, mapping);
178 if (batchIt == batches.end())
182 if (batchIt->first != &o)
185 int col = ctrlPktOp.getColumnFromAddr();
188 const std::vector<int64_t> staticOffsets = {0, 0, 0,
189 batchIt->startOffset};
190 const std::vector<int64_t> staticSizes = {1, 1, 1, batchIt->totalSize};
191 const std::vector<int64_t> staticStrides = {0, 0, 0, 1};
193 SymbolRefAttr metadata =
194 SymbolRefAttr::get(builder.getContext(), batchIt->shimDmaAllocName);
195 NpuDmaMemcpyNdOp::create(builder, builder.getUnknownLoc(), newBlockArg,
196 SmallVector<Value>{}, SmallVector<Value>{},
197 SmallVector<Value>{}, ArrayRef(staticOffsets),
198 ArrayRef(staticSizes), ArrayRef(staticStrides),
199 nullptr, metadata, 0,
true, 0, 0, 0, 0, 0, 0);
201 auto shimRow = builder.getI32IntegerAttr(0);
202 auto shimCol = builder.getI32IntegerAttr(
col);
203 auto dir = builder.getI32IntegerAttr(1);
204 auto chan = builder.getI32IntegerAttr(batchIt->shimChan);
205 auto col_num = builder.getI32IntegerAttr(1);
206 auto row_num = builder.getI32IntegerAttr(1);
207 AIEX::NpuSyncOp::create(builder, loc, shimCol, shimRow, dir, chan,
215 for (
auto e : erased)