MLIR-AIE
AIENpuToCert.cpp
Go to the documentation of this file.
1//===- AIENpuToCert.cpp ----------------------------------------*- C++ -*-===//
2//
3// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7// (c) Copyright 2025 Advanced Micro Devices, Inc.
8//
9//===----------------------------------------------------------------------===//
10
14
15#include "mlir/IR/IRMapping.h"
16#include "mlir/Pass/Pass.h"
17#include "mlir/Transforms/DialectConversion.h"
18#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
19
20#include <type_traits>
21#include <vector>
22
23namespace xilinx::AIEX {
24#define GEN_PASS_DEF_AIENPUTOCERT
25#define GEN_PASS_DEF_AIECERTPAGES
26#include "aie/Dialect/AIEX/Transforms/AIEXPasses.h.inc"
27} // namespace xilinx::AIEX
28
29using namespace mlir;
30using namespace xilinx;
31
32#define DEBUG_TYPE "npu-to-cert"
33
34namespace {
35
36// slightly smaller than the actual page size to account for overheads and
37// estimation errors
38static constexpr uint32_t cert_page_size = 8000;
39
40struct RuntimeSequenceToCertJob : OpConversionPattern<AIE::RuntimeSequenceOp> {
41 using OpConversionPattern::OpConversionPattern;
42
43 RuntimeSequenceToCertJob(MLIRContext *context, PatternBenefit benefit = 1)
44 : OpConversionPattern(context, benefit) {}
45
46 LogicalResult
47 matchAndRewrite(AIE::RuntimeSequenceOp op, OpAdaptor adaptor,
48 ConversionPatternRewriter &rewriter) const override {
49
50 auto symName = op.getSymName();
51 uint32_t newJobId = 1;
52 if (symName != "configure") {
53 uint32_t maxJobId = 1;
54 op->getParentOp()->walk([&](AIEX::CertJobOp certJobOp) {
55 maxJobId = std::max(maxJobId, certJobOp.getJobId());
56 });
57 newJobId = maxJobId + 1;
58 }
59 auto jobOp = rewriter.replaceOpWithNewOp<AIEX::CertJobOp>(
60 op, op->getResultTypes(), newJobId);
61 IRMapping remap;
62 op.getRegion().cloneInto(&jobOp.getBody(), remap);
63 AIEX::CertJobOp::ensureTerminator(jobOp.getBody(), rewriter, op->getLoc());
64
65 return success();
66 }
67};
68
69struct NpuWrite32ToCertWrite32 : OpConversionPattern<AIEX::NpuWrite32Op> {
70 using OpConversionPattern::OpConversionPattern;
71
72 LogicalResult
73 matchAndRewrite(AIEX::NpuWrite32Op op, OpAdaptor adaptor,
74 ConversionPatternRewriter &rewriter) const override {
75 rewriter.replaceOpWithNewOp<AIEX::CertWrite32Op>(op, op.getAddress(),
76 op.getValue());
77 return success();
78 }
79};
80
81struct NpuMaskWrite32ToCertMaskWrite32
82 : OpConversionPattern<AIEX::NpuMaskWrite32Op> {
83 using OpConversionPattern::OpConversionPattern;
84
85 LogicalResult
86 matchAndRewrite(AIEX::NpuMaskWrite32Op op, OpAdaptor adaptor,
87 ConversionPatternRewriter &rewriter) const override {
88 rewriter.replaceOpWithNewOp<AIEX::CertMaskWrite32Op>(
89 op, op.getAddress(), op.getMask(), op.getValue());
90 return success();
91 }
92};
93
94struct NpuBlockWriteToCertUcDma : OpConversionPattern<AIEX::NpuBlockWriteOp> {
95 using OpConversionPattern::OpConversionPattern;
96
97 LogicalResult
98 matchAndRewrite(AIEX::NpuBlockWriteOp op, OpAdaptor adaptor,
99 ConversionPatternRewriter &rewriter) const override {
100
101 memref::GetGlobalOp dataOperand =
102 dyn_cast_or_null<memref::GetGlobalOp>(op.getData().getDefiningOp());
103 if (!dataOperand)
104 return failure();
105 MemRefType dataType = cast<MemRefType>(dataOperand.getResult().getType());
106 uint32_t dataSize = dataType.getNumElements();
107
108 int id = 0;
109 std::string symbolName = "chain_" + std::to_string(id);
110 while (op->getParentOfType<AIE::DeviceOp>().lookupSymbol(symbolName))
111 symbolName = "chain_" + std::to_string(++id);
112
113 // Create a new uc_dma_write_des_sync operation
114 rewriter.replaceOpWithNewOp<AIEX::CertUcDmaWriteDesSyncOp>(op, symbolName);
115
116 // Create the uc_dma_chain operation
117 rewriter.setInsertionPoint(op->getParentOfType<AIEX::CertJobOp>());
118 auto symbolAttr = rewriter.getStringAttr(symbolName);
119 auto chainOp =
120 AIEX::CertUcDmaChainOp::create(rewriter, op.getLoc(), symbolAttr);
121
122 Block *bb = new Block();
123 chainOp.getRegion().push_back(bb);
124 rewriter.setInsertionPointToStart(bb);
125 AIEX::CertUcDmaBdOp::create(rewriter, op.getLoc(), dataOperand.getName(),
126 op.getAddress(), dataSize, false);
127
128 AIEX::CertUcDmaChainOp::ensureTerminator(chainOp.getBody(), rewriter,
129 op->getLoc());
130 return success();
131 }
132};
133
134struct NpuSyncToCertWaitTCTS : OpConversionPattern<AIEX::NpuSyncOp> {
135 using OpConversionPattern::OpConversionPattern;
136
137 LogicalResult
138 matchAndRewrite(AIEX::NpuSyncOp op, OpAdaptor adaptor,
139 ConversionPatternRewriter &rewriter) const override {
140 uint32_t row = op.getRow();
141 uint32_t col = op.getColumn();
142
143 // These are the shift amounts from the tct packet format.
144 // The firmware expects the row and column packed and shifted down to zero.
145 const int row_id_shift = 16;
146 const int col_id_shift = 21;
147 uint16_t tile_id = col << (col_id_shift - row_id_shift) | row;
148 uint32_t channel = op.getChannel();
149 uint32_t direction = op.getDirection();
150
151 const std::vector<int> chan2actor_shim_s2mm = {0, 2};
152 const std::vector<int> chan2actor_shim_mm2s = {6, 7, 8, 9};
153
154 const std::vector<int> chan2actor_mem_s2mm = {1, 2, 3, 4, 5, 6, 7};
155 const std::vector<int> chan2actor_mem_mm2s = {16, 17, 18, 19, 20,
156 22, 23, 24, 25, 26};
157 const std::vector<int> chan2actor_tile_s2mm = {0, 1};
158 const std::vector<int> chan2actor_tile_mm2s = {6};
159 const auto &tm = AIE::getTargetModel(op);
160 const bool isS2MM =
161 direction == static_cast<std::underlying_type_t<AIE::DMAChannelDir>>(
162 AIE::DMAChannelDir::S2MM);
163
164 const std::vector<int> *chan2actor = nullptr;
165 if (tm.isCoreTile(col, row))
166 chan2actor = isS2MM ? &chan2actor_tile_s2mm : &chan2actor_tile_mm2s;
167 else if (tm.isMemTile(col, row))
168 chan2actor = isS2MM ? &chan2actor_mem_s2mm : &chan2actor_mem_mm2s;
169 else
170 chan2actor = isS2MM ? &chan2actor_shim_s2mm : &chan2actor_shim_mm2s;
171
172 size_t chanIdx = static_cast<size_t>(channel);
173 if (!chan2actor || chanIdx >= chan2actor->size()) {
174 op.emitError("invalid DMA channel ")
175 << channel << " for " << (isS2MM ? "S2MM" : "MM2S")
176 << " direction in NpuSyncToCertWaitTCTS conversion";
177 return failure();
178 }
179
180 uint8_t actor_id = static_cast<uint8_t>((*chan2actor)[chanIdx]);
181 uint8_t num_tcts = 1;
182 rewriter.replaceOpWithNewOp<AIEX::CertWaitTCTSOp>(op, tile_id, actor_id,
183 num_tcts);
184 return success();
185 }
186};
187
188struct NpuAddressPatchToCertApplyOffset57
189 : OpConversionPattern<AIEX::NpuAddressPatchOp> {
190 using OpConversionPattern::OpConversionPattern;
191
192 LogicalResult
193 matchAndRewrite(AIEX::NpuAddressPatchOp op, OpAdaptor adaptor,
194 ConversionPatternRewriter &rewriter) const override {
195 // find the previous blockwrite operation
196 Block::iterator it(op);
197 while (it != op->getBlock()->begin()) {
198 --it;
199 auto blockWriteOp = dyn_cast<AIEX::NpuBlockWriteOp>(*it);
200 if (!blockWriteOp)
201 continue;
202
203 const auto &tm = AIE::getTargetModel(op);
204 uint32_t addr = op.getAddr();
205 int col = (addr >> tm.getColumnShift()) & 0x1f;
206 int row = (addr >> tm.getRowShift()) & 0x1f;
207 if (!tm.isValidTile({col, row}))
208 return failure();
209
210 // if it's not a matching blockwrite, give up.
211 if (blockWriteOp.getAddress() + tm.getDmaBdAddressOffset(col, row) !=
212 addr)
213 break;
214
215 Value data = blockWriteOp.getData();
216 auto getGlobalOp = dyn_cast<memref::GetGlobalOp>(data.getDefiningOp());
217 if (!getGlobalOp)
218 break;
219
220 // replace the address with the new address
221 rewriter.setInsertionPoint(blockWriteOp);
222 rewriter.replaceOpWithNewOp<AIEX::CertApplyOffset57Op>(
223 op, getGlobalOp.getName(), 1, op.getArgIdx());
224 return success();
225 }
226
227 return failure();
228 }
229};
230
231struct MergeConsecutiveCertUcDmaWriteDesSyncOps
232 : OpRewritePattern<AIEX::CertUcDmaWriteDesSyncOp> {
233 using OpRewritePattern::OpRewritePattern;
234
235 LogicalResult matchAndRewrite(AIEX::CertUcDmaWriteDesSyncOp op,
236 PatternRewriter &rewriter) const override {
237 // Get the previous operation in the block
238 Block::iterator it(op);
239 AIEX::CertUcDmaWriteDesSyncOp prevWriteDesSync = nullptr;
240 while (it != op->getBlock()->begin() && !prevWriteDesSync) {
241 --it;
242 Operation *prevOp = &*it;
243 if (isa<AIEX::CertWrite32Op, AIEX::CertMaskWrite32Op,
244 AIEX::CertApplyOffset57Op, AIEX::CertWaitTCTSOp>(prevOp))
245 return failure();
246 prevWriteDesSync = dyn_cast<AIEX::CertUcDmaWriteDesSyncOp>(prevOp);
247 }
248 if (!prevWriteDesSync)
249 return failure();
250
251 // find the uc_dma_chain
252 StringRef sym_name = op.getSymbol();
253 StringRef prev_sym_name = prevWriteDesSync.getSymbol();
254 auto chain = dyn_cast_if_present<AIEX::CertUcDmaChainOp>(
255 op->getParentOfType<AIE::DeviceOp>().lookupSymbol(sym_name));
256 auto prevChain = dyn_cast_if_present<AIEX::CertUcDmaChainOp>(
257 prevWriteDesSync->getParentOfType<AIE::DeviceOp>().lookupSymbol(
258 prev_sym_name));
259 if (!chain || !prevChain)
260 return failure();
261
262 // Compute the size of the current and previous chains. If their combined
263 // data size is greater than the cert page size, then we cannot merge them.
264 uint32_t prevChainSize = 0;
265 for (auto &o : prevChain.getBody().front().getOperations()) {
266 auto bdOp = dyn_cast<AIEX::CertUcDmaBdOp>(o);
267 if (!bdOp)
268 continue;
269 prevChainSize += bdOp.getLength() * sizeof(int);
270 }
271 uint32_t currChainSize = 0;
272 for (auto &o : chain.getBody().front().getOperations()) {
273 auto bdOp = dyn_cast<AIEX::CertUcDmaBdOp>(o);
274 if (!bdOp)
275 continue;
276 currChainSize += bdOp.getLength() * sizeof(int);
277 }
278 if ((currChainSize + prevChainSize) >= cert_page_size)
279 return failure();
280
281 IRMapping map;
282 rewriter.setInsertionPointToStart(&chain.getBody().front());
283 for (auto &o : prevChain.getBody().front().getOperations()) {
284 auto bdOp = dyn_cast<AIEX::CertUcDmaBdOp>(o);
285 if (!bdOp)
286 continue;
287 AIEX::CertUcDmaBdOp::create(
288 rewriter, bdOp.getLoc(), bdOp.getRemoteAddress(),
289 bdOp.getLocalAddress(), bdOp.getLength(), true);
290 }
291 rewriter.eraseOp(prevChain);
292 rewriter.eraseOp(prevWriteDesSync);
293 return success();
294 }
295};
296
297struct SplitNpuBlockWriteOpPattern : OpRewritePattern<AIEX::NpuBlockWriteOp> {
298 using OpRewritePattern::OpRewritePattern;
299
300 LogicalResult matchAndRewrite(AIEX::NpuBlockWriteOp op,
301 PatternRewriter &rewriter) const override {
302
303 memref::GetGlobalOp dataOperand =
304 dyn_cast_or_null<memref::GetGlobalOp>(op.getData().getDefiningOp());
305 if (!dataOperand)
306 return failure();
307
308 MemRefType dataType = cast<MemRefType>(dataOperand.getResult().getType());
309 uint32_t dataSize = dataType.getNumElements();
310
311 uint32_t dataSizeBytes = dataSize * sizeof(int);
312 if (dataSizeBytes < cert_page_size)
313 return failure();
314
315 auto loc = op.getLoc();
316
317 // Calculate split point (split roughly in half)
318 uint32_t splitElements = dataSize / 2;
319 uint32_t firstChunkSize = splitElements;
320 uint32_t secondChunkSize = dataSize - splitElements;
321
322 // Find the original memref.global operation
323 auto deviceOp = op->getParentOfType<AIE::DeviceOp>();
324 auto originalGlobal = dyn_cast_if_present<memref::GlobalOp>(
325 deviceOp.lookupSymbol(dataOperand.getName()));
326 if (!originalGlobal)
327 return failure();
328
329 // Get the original data attribute
330 auto originalData = originalGlobal.getInitialValue();
331 if (!originalData)
332 return failure();
333
334 auto denseData = dyn_cast<DenseIntElementsAttr>(*originalData);
335 if (!denseData)
336 return failure();
337
338 // Split the data into two chunks
339 auto dataValues = denseData.getValues<APInt>();
340 std::vector<APInt> firstChunkData(dataValues.begin(),
341 dataValues.begin() + firstChunkSize);
342 std::vector<APInt> secondChunkData(dataValues.begin() + firstChunkSize,
343 dataValues.end());
344
345 // Create new global operations for the split data
346 auto elementType = rewriter.getI32Type();
347 auto firstChunkType = MemRefType::get({firstChunkSize}, elementType);
348 auto secondChunkType = MemRefType::get({secondChunkSize}, elementType);
349 TensorType firstTensorType =
350 RankedTensorType::get({firstChunkSize}, elementType);
351 TensorType secondTensorType =
352 RankedTensorType::get({secondChunkSize}, elementType);
353
354 auto firstChunkAttr =
355 DenseIntElementsAttr::get(firstTensorType, firstChunkData);
356 auto secondChunkAttr =
357 DenseIntElementsAttr::get(secondTensorType, secondChunkData);
358
359 // Generate unique names for the new globals
360 std::string firstName = dataOperand.getName().str() + "_split_0";
361 std::string secondName = dataOperand.getName().str() + "_split_1";
362
363 // Ensure unique names
364 int counter = 0;
365 while (deviceOp.lookupSymbol(firstName)) {
366 firstName =
367 dataOperand.getName().str() + "_split_0_" + std::to_string(counter++);
368 }
369 counter = 0;
370 while (deviceOp.lookupSymbol(secondName)) {
371 secondName =
372 dataOperand.getName().str() + "_split_1_" + std::to_string(counter++);
373 }
374
375 // Create the new global operations
376 rewriter.setInsertionPoint(originalGlobal);
377 memref::GlobalOp::create(rewriter, loc, firstName,
378 rewriter.getStringAttr("private"), firstChunkType,
379 firstChunkAttr, true, nullptr);
380
381 memref::GlobalOp::create(rewriter, loc, secondName,
382 rewriter.getStringAttr("private"), secondChunkType,
383 secondChunkAttr, true, nullptr);
384
385 // Create get_global operations for the new data
386 rewriter.setInsertionPoint(op);
387
388 auto firstGetGlobal =
389 memref::GetGlobalOp::create(rewriter, loc, firstChunkType, firstName);
390 auto secondGetGlobal =
391 memref::GetGlobalOp::create(rewriter, loc, secondChunkType, secondName);
392
393 uint32_t baseAddr = op.getAddress();
394
395 AIEX::NpuBlockWriteOp::create(rewriter, loc, baseAddr,
396 firstGetGlobal.getResult(), nullptr, nullptr,
397 nullptr);
398
399 AIEX::NpuBlockWriteOp::create(rewriter, loc, baseAddr + firstChunkSize * 4,
400 secondGetGlobal.getResult(), nullptr, nullptr,
401 nullptr);
402
403 // Replace the original operation
404 rewriter.eraseOp(op);
405
406 LLVM_DEBUG(llvm::outs()
407 << "Split NpuBlockWriteOp with data size: " << dataSizeBytes
408 << " bytes into chunks of " << firstChunkSize << " and "
409 << secondChunkSize << " elements\n");
410
411 return success();
412 }
413};
414
415struct AIENpuToCertPass
416 : xilinx::AIEX::impl::AIENpuToCertBase<AIENpuToCertPass> {
417 void runOnOperation() override {
418 ConversionTarget target(getContext());
419 target.addIllegalOp<AIE::RuntimeSequenceOp>();
420
421 target.addLegalOp<AIEX::CertApplyOffset57Op>();
422 target.addLegalOp<AIEX::CertJobOp>();
423 target.addLegalOp<AIEX::CertMaskWrite32Op>();
424 target.addLegalOp<AIEX::CertUcDmaWriteDesSyncOp>();
425 target.addLegalOp<AIEX::CertUcDmaChainOp>();
426 target.addLegalOp<AIEX::CertUcDmaBdOp>();
427 target.addLegalOp<AIEX::CertWrite32Op>();
428 target.addLegalOp<AIEX::CertWaitTCTSOp>();
429 target.addLegalDialect<AIE::AIEDialect>();
430
431 RewritePatternSet p0(&getContext());
432 p0.insert<RuntimeSequenceToCertJob>(&getContext());
433 p0.insert<NpuAddressPatchToCertApplyOffset57>(&getContext());
434
435 if (failed(applyPartialConversion(getOperation(), target, std::move(p0))))
436 signalPassFailure();
437
438 target.addIllegalOp<AIEX::NpuAddressPatchOp>();
439
440 // patch conversion must come before blockwrite conversion
441 RewritePatternSet p1(&getContext());
442 p1.insert<NpuAddressPatchToCertApplyOffset57>(&getContext());
443
444 if (failed(applyPartialConversion(getOperation(), target, std::move(p1))))
445 signalPassFailure();
446
447 // Split oversized NpuBlockWriteOps before lowering them to cert ops
448 {
449 RewritePatternSet p(&getContext());
450 p.insert<SplitNpuBlockWriteOpPattern>(&getContext());
451 if (failed(applyPatternsGreedily(getOperation(), std::move(p))))
452 signalPassFailure();
453 }
454
455 target.addIllegalOp<AIEX::NpuBlockWriteOp>();
456 target.addIllegalOp<AIEX::NpuMaskWrite32Op>();
457 target.addIllegalOp<AIEX::NpuSyncOp>();
458 target.addIllegalOp<AIEX::NpuWrite32Op>();
459
460 // Run npu to cert conversion patterns
461 {
462 RewritePatternSet p(&getContext());
463 p.insert<NpuBlockWriteToCertUcDma>(&getContext());
464 p.insert<NpuMaskWrite32ToCertMaskWrite32>(&getContext());
465 p.insert<NpuWrite32ToCertWrite32>(&getContext());
466 p.insert<NpuSyncToCertWaitTCTS>(&getContext());
467
468 if (failed(applyPartialConversion(getOperation(), target, std::move(p))))
469 signalPassFailure();
470 }
471
472 // Run the merge pattern for CertUcDmaWriteDesSyncOps
473 {
474 RewritePatternSet p(&getContext());
475 p.insert<MergeConsecutiveCertUcDmaWriteDesSyncOps>(&getContext());
476 if (failed(applyPatternsGreedily(getOperation(), std::move(p))))
477 signalPassFailure();
478 }
479 }
480};
481
482} // namespace
483
484static uint32_t estimateCost(AIEX::CertJobOp op, uint32_t split_target,
485 Block::iterator &split_iter) {
486 // assume a job is on its own page
487 uint32_t text_cost = 32; // page header
488 uint32_t data_cost = 0;
489 uint32_t split_cost = 0;
490 for (auto &o : op.getBody().front().getOperations()) {
491 if (!split_cost && (text_cost + data_cost) >= split_target) {
492 split_iter = Block::iterator(&o);
493 split_cost = text_cost + data_cost;
494 }
495 if (isa<AIEX::CertLocalBarrierOp>(o)) {
496 text_cost += 8; // local barrier
497 } else if (isa<AIEX::CertRemoteBarrierOp>(o)) {
498 text_cost += 8; // remote barrier
499 } else if (isa<AIEX::CertWaitTCTSOp>(o)) {
500 text_cost += 8; // wait tct
501 } else if (isa<AIEX::CertMaskWrite32Op>(o)) {
502 text_cost += 16; // mask write
503 } else if (isa<AIEX::CertWrite32Op>(o)) {
504 text_cost += 12; // write
505 } else if (isa<AIEX::CertApplyOffset57Op>(o)) {
506 text_cost += 16; // apply offset
507 } else if (auto syncOp = dyn_cast<AIEX::CertUcDmaWriteDesSyncOp>(o)) {
508 text_cost += 16; // write des sync
509 // find the uc_dma_chain
510 StringRef sym_name = syncOp.getSymbol();
511 auto chain = dyn_cast_if_present<AIEX::CertUcDmaChainOp>(
512 op->getParentOfType<AIE::DeviceOp>().lookupSymbol(sym_name));
513 if (!chain)
514 continue;
515 for (auto bdOp : chain.getBody().front().getOps<AIEX::CertUcDmaBdOp>()) {
516 data_cost += 16; // bd op
517 StringRef data_sym_name = bdOp.getRemoteAddress();
518 auto global = dyn_cast_if_present<memref::GlobalOp>(
519 op->getParentOfType<AIE::DeviceOp>().lookupSymbol(data_sym_name));
520 if (!global)
521 continue;
522 auto initVal = global.getInitialValue();
523 if (!initVal)
524 continue;
525 auto data = dyn_cast<DenseIntElementsAttr>(*initVal);
526 if (!data)
527 continue;
528 data_cost += data.getNumElements() * 4; // 4 bytes per element
529 }
530 }
531 }
532 return text_cost + data_cost;
533}
534
535namespace {
536struct SplitCertJobOpPattern : OpRewritePattern<AIEX::CertJobOp> {
537 using OpRewritePattern::OpRewritePattern;
538
539 LogicalResult matchAndRewrite(AIEX::CertJobOp op,
540 PatternRewriter &rewriter) const override {
541
542 constexpr uint32_t split_threshold = cert_page_size;
543
544 Block::iterator split_iter;
545 uint32_t cost = estimateCost(op, cert_page_size / 2, split_iter);
546 LLVM_DEBUG(llvm::outs() << "Estimate cost for job: " << op.getJobId()
547 << " is " << cost << "\n");
548
549 if (cost < split_threshold)
550 return failure();
551
552 auto loc = op.getLoc();
553 op->getParentOfType<AIE::DeviceOp>().walk([&](AIEX::CertJobOp certJobOp) {
554 if (certJobOp.getJobId() > op.getJobId())
555 certJobOp.setJobId(certJobOp.getJobId() + 1);
556 });
557
558 // split the job
559 auto jobId = op.getJobId();
560 auto newJobOp0 = AIEX::CertJobOp::create(rewriter, loc, jobId);
561 auto newJobOp1 = AIEX::CertJobOp::create(rewriter, loc, jobId + 1);
562
563 newJobOp0.getBody().push_back(new Block());
564 rewriter.setInsertionPointToStart(&newJobOp0.getBody().front());
565 for (Block::iterator oi = op.getBody().front().getOperations().begin();
566 oi != split_iter; ++oi) {
567 rewriter.clone(*oi);
568 }
569 AIEX::CertJobOp::ensureTerminator(newJobOp0.getBody(), rewriter, loc);
570
571 newJobOp1.getBody().push_back(new Block());
572 rewriter.setInsertionPointToStart(&newJobOp1.getBody().front());
573 for (Block::iterator oi = split_iter;
574 oi != op.getBody().front().getOperations().end(); ++oi) {
575 rewriter.clone(*oi);
576 }
577
578 rewriter.eraseOp(op);
579 return success();
580 }
581};
582
583struct AIECertPagesPass
584 : xilinx::AIEX::impl::AIECertPagesBase<AIECertPagesPass> {
585 void runOnOperation() override {
586 // First apply the blockwrite splitting pattern
587 RewritePatternSet p0(&getContext());
588 p0.insert<SplitNpuBlockWriteOpPattern>(&getContext());
589 if (failed(applyPatternsGreedily(getOperation(), std::move(p0))))
590 signalPassFailure();
591
592 // Then apply the job splitting pattern
593 RewritePatternSet p1(&getContext());
594 p1.insert<SplitCertJobOpPattern>(&getContext());
595 if (failed(applyPatternsGreedily(getOperation(), std::move(p1))))
596 signalPassFailure();
597 }
598};
599
600} // namespace
601
602std::unique_ptr<OperationPass<AIE::DeviceOp>> AIEX::createAIENpuToCertPass() {
603 return std::make_unique<AIENpuToCertPass>();
604}
605
606std::unique_ptr<OperationPass<AIE::DeviceOp>> AIEX::createAIECertPagesPass() {
607 return std::make_unique<AIECertPagesPass>();
608}
std::unique_ptr< mlir::OperationPass< AIE::DeviceOp > > createAIENpuToCertPass()
std::unique_ptr< mlir::OperationPass< AIE::DeviceOp > > createAIECertPagesPass()
const AIETargetModel & getTargetModel(mlir::Operation *op)