MLIR-AIE
TranslateAIEVecToCpp.cpp
Go to the documentation of this file.
1//===- TranslateAIEVecToCpp.cpp - AIE vector dialect to C++ -----*- C++ -*-===//
2//
3// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7// (c) Copyright 2022 Xilinx Inc.
8// (c) Copyright 2023 Advanced Micro Devices, Inc.
9//
10//===----------------------------------------------------------------------===//
11// This file defines helpers to emit C++ code for AIE vector dialect.
12//===----------------------------------------------------------------------===//
13
15
20
21#include "mlir/Dialect/Arith/IR/Arith.h"
22#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
23#include "mlir/Dialect/EmitC/IR/EmitC.h"
24#include "mlir/Dialect/Func/IR/FuncOps.h"
25#include "mlir/Dialect/Index/IR/IndexOps.h"
26#include "mlir/Dialect/MemRef/IR/MemRef.h"
27#include "mlir/Dialect/SCF/IR/SCF.h"
28#include "mlir/Dialect/Vector/IR/VectorOps.h"
29#include "mlir/IR/BuiltinOps.h"
30#include "mlir/IR/BuiltinTypes.h"
31#include "mlir/IR/Operation.h"
32#include "mlir/Support/IndentedOstream.h"
33
34#include "llvm/ADT/ScopedHashTable.h"
35#include "llvm/ADT/SmallSet.h"
36#include "llvm/ADT/StringRef.h"
37#include "llvm/ADT/TypeSwitch.h"
38#include "llvm/Support/CommandLine.h"
39#include "llvm/Support/Debug.h"
40#include "llvm/Support/FormatVariadic.h"
41#include "llvm/Support/MathExtras.h"
42
43#include <limits>
44#include <numeric>
45#include <optional>
46#include <sstream>
47#include <stack>
48
49#define DEBUG_TYPE "aievec-to-cpp"
50
51using namespace mlir;
52using namespace xilinx;
53using namespace xilinx::aievec;
54using llvm::formatv;
55
56/// Convenience functions to produce interleaved output with functions returning
57/// a LogicalResult. This is different than those in STLExtras as functions used
58/// on each element doesn't return a string.
59template <typename ForwardIterator, typename UnaryFunctor,
60 typename NullaryFunctor>
61LogicalResult interleaveWithError(ForwardIterator begin, ForwardIterator end,
62 UnaryFunctor eachFn,
63 NullaryFunctor betweenFn) {
64 if (begin == end)
65 return success();
66 if (failed(eachFn(*begin)))
67 return failure();
68 ++begin;
69 for (; begin != end; ++begin) {
70 betweenFn();
71 if (failed(eachFn(*begin)))
72 return failure();
73 }
74 return success();
75}
76
77template <typename Container, typename UnaryFunctor, typename NullaryFunctor>
78LogicalResult interleaveWithError(const Container &c, UnaryFunctor eachFn,
79 NullaryFunctor betweenFn) {
80 return interleaveWithError(c.begin(), c.end(), eachFn, betweenFn);
81}
82
83template <typename Container, typename UnaryFunctor>
84LogicalResult interleaveCommaWithError(const Container &c, raw_ostream &os,
85 UnaryFunctor eachFn) {
86 return interleaveWithError(c.begin(), c.end(), eachFn, [&] { os << ", "; });
87}
88
89namespace {
90/// Emitter that uses dialect specific emitters to emit C++ code.
91struct CppEmitter {
92 explicit CppEmitter(raw_ostream &os, bool declareVariablesAtTop, bool aie2);
93
94 /// Emits attribute or returns failure.
95 LogicalResult emitAttribute(Location loc, Attribute attr);
96
97 /// Emits operation 'op' with/without training semicolon or returns failure.
98 LogicalResult emitOperation(Operation &op, bool trailingSemicolon);
99
100 /// Generate the C++ type name for a given MLIR type. Name generation can
101 /// fail, returning a value-less optional string. stdintType is true when the
102 /// type is from stdint.h, and isAcc is true if we want to generate a name
103 /// for a vector type that should be stored in an accumulator.
104 std::optional<std::string> genCppTypeName(Type type, bool stdintType = true,
105 bool isAcc = false);
106
107 /// Emits type 'type' or returns failure. stdintType is true when the
108 /// type is from stdint.h
109 LogicalResult emitType(Location loc, Type type, bool stdintType = true,
110 bool isAcc = false);
111
112 /// Emits array of types as a std::tuple of the emitted types.
113 /// - emits void for an empty array;
114 /// - emits the type of the only element for arrays of size one;
115 /// - emits a std::tuple otherwise;
116 LogicalResult emitTypes(Location loc, ArrayRef<Type> types);
117
118 /// Emits array of types as a std::tuple of the emitted types independently of
119 /// the array size.
120 LogicalResult emitTupleType(Location loc, ArrayRef<Type> types);
121
122 /// Emits an assignment for a variable which has been declared previously.
123 LogicalResult emitVariableAssignment(OpResult result);
124
125 /// Emits a variable declaration for a result of an operation.
126 LogicalResult emitVariableDeclaration(OpResult result, bool trailingSemicolon,
127 bool isAcc = false);
128
129 /// Emits the variable declaration and assignment prefix for 'op'.
130 /// - emits separate variable followed by std::tie for multi-valued operation;
131 /// - emits single type followed by variable for single result;
132 /// - emits nothing if no value produced by op;
133 /// Emits final '=' operator where a type is produced. Returns failure if
134 /// any result type could not be converted.
135 LogicalResult emitAssignPrefix(Operation &op, bool isAcc = false);
136
137 /// Emits a label for the block.
138 LogicalResult emitLabel(Block &block);
139
140 /// Emits the operands and atttributes of the operation. All operands are
141 /// emitted first and then all attributes in alphabetical order.
142 LogicalResult emitOperandsAndAttributes(Operation &op,
143 ArrayRef<StringRef> exclude = {});
144
145 /// Emits the operands of the operation. All operands are emitted in order.
146 LogicalResult emitOperands(Operation &op);
147
148 /// Return the existing or a new name for a Value.
149 StringRef getOrCreateName(Value val, std::string prefix = "v");
150
151 /// Set the name of the value to an existing name
152 void setName(Value val, StringRef name);
153
154 /// Return a new name that is not associated with any value
155 std::string getNewName(std::string prefix = "v");
156
157 // Set the dim size at position index of the memref to the parameter
158 void setMemRefDimParam(Value memref, unsigned index,
159 const std::string &parameter);
160
161 // For the dynamic shaped memref, return the parametric size at index
162 StringRef getMemRefDimParam(Value memref, unsigned index);
163
164 // Return true if the specified dim of memref is parametric
165 bool isMemRefDimParam(Value memref, unsigned index);
166
167 /// Return the existing or a new label of a Block.
168 StringRef getOrCreateName(Block &block, std::string prefix = "label");
169
170 /// Whether to map an mlir integer to a unsigned integer in C++.
171 bool shouldMapToUnsigned(IntegerType::SignednessSemantics val);
172
173 /// RAII helper function to manage entering/exiting C++ scopes.
174 struct Scope {
175 Scope(CppEmitter &emitter)
176 : valueMapperScope(emitter.valueMapper),
177 blockMapperScope(emitter.blockMapper), emitter(emitter) {
178 emitter.valueInScopeCount.push(emitter.valueInScopeCount.top());
179 emitter.labelInScopeCount.push(emitter.labelInScopeCount.top());
180 }
181 ~Scope() {
182 emitter.valueInScopeCount.pop();
183 emitter.labelInScopeCount.pop();
184 }
185
186 private:
187 llvm::ScopedHashTableScope<Value, std::string> valueMapperScope;
188 llvm::ScopedHashTableScope<Block *, std::string> blockMapperScope;
189 CppEmitter &emitter;
190 };
191
192 /// Returns wether the Value is assigned to a C++ variable in the scope.
193 bool hasValueInScope(Value val);
194
195 // Returns whether a label is assigned to the block.
196 bool hasBlockLabel(Block &block);
197
198 /// Returns the output stream.
199 raw_indented_ostream &ostream() { return os; }
200
201 /// Returns if all variables for op results and basic block arguments need to
202 /// be declared at the beginning of a function.
203 bool shouldDeclareVariablesAtTop() { return declareVariablesAtTop; }
204
205 bool aie2() { return aie2_; }
206
207private:
208 using ValueMapper = llvm::ScopedHashTable<Value, std::string>;
209 using BlockMapper = llvm::ScopedHashTable<Block *, std::string>;
210
211 /// Output stream to emit to.
212 raw_indented_ostream os;
213
214 /// Boolean to enforce that all variables for op results and block
215 /// arguments are declared at the beginning of the function. This also
216 /// includes results from ops located in nested regions.
217 bool declareVariablesAtTop;
218
219 /// Map from value to name of C++ variable that contain the name.
220 ValueMapper valueMapper;
221
222 /// Map from block to name of C++ label.
223 BlockMapper blockMapper;
224
225 /// Map from a dynamic memref index to the parameter
226 DenseMap<std::pair<Value, unsigned>, std::string> paramIndexMapper;
227
228 /// The number of values in the current scope. This is used to declare the
229 /// names of values in a scope.
230 std::stack<int64_t> valueInScopeCount;
231 std::stack<int64_t> labelInScopeCount;
232
233 llvm::SmallSet<StringRef, 16> includeNames;
234
235 bool aie2_;
236};
237} // namespace
238
239//===----------------------------------------------------------------------===//
240// Helper Routines
241//===----------------------------------------------------------------------===//
242
243// Return true if this op should be skipped in codegen. Ops like memref::DimOp,
244// aievec::srs and aievec::ups for fp operands fall in this category.
245// Certain ops should only be emitted if they are used in the computation of an
246// op that is not skipped. An example of such an op is the index defining op for
247// memref::DimOp. Since DimOp is skipped, we don't need to generate the index
248// defining op. If checkStrongLiveness is true, then also skip such ops.
249static bool skippedOp(Operation *op, CppEmitter &emitter,
250 bool checkStrongLiveness = true) {
251 // Ops that must be skipped:
252 bool skip =
253 TypeSwitch<Operation *, bool>(op)
254 // skip op 1 : all dim op and assume_alignement op
255 .Case<memref::DimOp, memref::AssumeAlignmentOp>(
256 [](auto op) { return true; })
257 // skip op 2 : some aievec::srs for float types
258 .Case<aievec::SRSOp>([&](auto srsOp) {
259 // Get the datatype of the source accumulator and result vector
260 auto accType = cast<VectorType>(srsOp.getSource().getType());
261 Type eltType = accType.getElementType();
262 // If the underlying element types are float, then we do not really
263 // need an srs op if source of srsOp has only one use.
264 Value source = srsOp.getSource();
265 if (!emitter.aie2() && llvm::isa<FloatType>(eltType) &&
266 source.getDefiningOp()->hasOneUse()) {
267 StringRef srcName = emitter.getOrCreateName(source);
268 emitter.setName(srsOp->getResult(0), srcName);
269 return true;
270 }
271 return false;
272 })
273 // skip op 3 : some aievec::ups for float ops
274 .Case<aievec::UPSOp>([&](auto upsOp) {
275 // Get the datatype of the source vector and result accumulator
276 auto accType = cast<VectorType>(upsOp.getResult().getType());
277 Type eltType = accType.getElementType();
278 // If the underlying element types are float, then we do not really
279 // need a ups op if the source accumulator has only one use.
280 Value source = upsOp.getSource();
281 if (!emitter.aie2() && llvm::isa<FloatType>(eltType) &&
282 source.getDefiningOp()->hasOneUse()) {
283 StringRef srcName = emitter.getOrCreateName(source);
284 emitter.setName(upsOp->getResult(0), srcName);
285 return true;
286 }
287 return false;
288 })
289 // skip op 4 : some aievec::cast, when it represents a move to/from
290 // accumulator, but the type is necessarily an
291 // accumulator.
292 .Case<aievec::CastOp>([&](auto castOp) {
293 Value source = castOp.getSource();
294 auto srcVTy = cast<VectorType>(source.getType());
295 auto resVTy = cast<VectorType>(castOp.getResult().getType());
296 if (srcVTy.getElementType() == resVTy.getElementType()) {
297 auto iElTy = dyn_cast<IntegerType>(srcVTy.getElementType());
298 if (iElTy && iElTy.getWidth() == 64) {
299 StringRef srcName = emitter.getOrCreateName(source);
300 emitter.setName(castOp->getResult(0), srcName);
301 return true;
302 }
303 }
304 return false;
305 })
306 // skip op 5: ignore casts between index and integer types.
307 .Case<arith::IndexCastOp, arith::IndexCastUIOp, index::CastSOp,
308 index::CastUOp>([&](auto idxCastOp) {
309 Value source = idxCastOp->getOperand(0);
310 StringRef srcName = emitter.getOrCreateName(source);
311 emitter.setName(idxCastOp->getResult(0), srcName);
312 return true;
313 })
314 // skip op 6: ignore vector shape cast operations.
315 .Case<vector::ShapeCastOp>([&](auto castOp) {
316 Value source = castOp.getSource();
317 StringRef srcName = emitter.getOrCreateName(source);
318 emitter.setName(castOp.getResult(), srcName);
319 return true;
320 })
321 // skip op 7: ignore unrealized conversion casts. These will come
322 // from non-C types interfacing with emitc for math ops.
323 .Case<UnrealizedConversionCastOp>([&](auto uccOp) {
324 auto inputs = uccOp.getInputs();
325 auto outputs = uccOp.getOutputs();
326 if (inputs.size() > 1 || inputs.size() > 1)
327 return false;
328 StringRef inputName = emitter.getOrCreateName(inputs[0]);
329 emitter.setName(outputs[0], inputName);
330 return true;
331 })
332 .Default([&](Operation *) { return false; });
333
334 // Ops whose strong liveness must be determined
335 checkStrongLiveness &= isa<arith::ConstantOp>(op);
336
337 // If we already know that this op must be skipped, or that don't need to
338 // check strong liveness of the op, we are done
339 if (skip || !checkStrongLiveness)
340 return skip;
341
342 // We need to check if this op is strongly live. i.e., its result is used in
343 // an op that is not skipped. We iterate over all its immediate users, and
344 // return false if any of them is not skipped in codegen.
345 for (auto user : op->getUsers()) {
346 if (!skippedOp(user, emitter, false))
347 return false;
348 }
349 return true;
350}
351
352// Print the memref dims, if the memref has dynamic shape
353static LogicalResult parseMemRefDynamicDims(CppEmitter &emitter,
354 func::FuncOp func) {
355 // Step1: Walk over all the operations that are memref dimOp
356 func.walk([&](Operation *Op) {
357 if (auto op = dyn_cast<memref::DimOp>(Op)) {
358 // Extract the source memref, result, and index
359 Value source = op.getSource();
360 Value result = op.getResult();
361 auto indexOp = dyn_cast<arith::ConstantOp>(op.getIndex().getDefiningOp());
362 assert(indexOp && "Failed to get the index value of dimOp");
363 // Get the constant index value
364 APInt idxVal = llvm::cast<IntegerAttr>(indexOp.getValue()).getValue();
365 unsigned index = idxVal.getZExtValue();
366 // Assign a printable name to the result
367 StringRef name = emitter.getOrCreateName(result, "m");
368 emitter.setMemRefDimParam(source, index, name.str());
369 }
370 });
371
372 // Step2: Iterate over all the block arguments, and make sure that the memref
373 // args have a parameter associated with the dynamic sized dimension
374 for (BlockArgument arg : func.getArguments()) {
375 auto argType = llvm::dyn_cast<MemRefType>(arg.getType());
376 if (!argType)
377 continue;
378 for (unsigned dim = 0; dim < argType.getRank(); ++dim) {
379 if (argType.isDynamicDim(dim)) {
380 // If the dynamic dim size is not already parametrized, assign it one
381 if (!emitter.isMemRefDimParam(arg, dim)) {
382 std::string name = emitter.getNewName("m");
383 emitter.setMemRefDimParam(arg, dim, name);
384 }
385 }
386 }
387 }
388 return success();
389}
390
391// Print the memref dims, if the memref has dynamic shape
392static LogicalResult printMemRefDims(CppEmitter &emitter, BlockArgument arg) {
393 raw_indented_ostream &os = emitter.ostream();
394 if (auto argType = llvm::dyn_cast<MemRefType>(arg.getType())) {
395 for (unsigned dim = 0; dim < argType.getRank(); ++dim) {
396 if (argType.isDynamicDim(dim)) {
397 StringRef param = emitter.getMemRefDimParam(arg, dim);
398 os << ", size_t " << param;
399 }
400 }
401 }
402 return success();
403}
404
405// Get the linearized access for the source memref
406static LogicalResult createLinearizedAccess(CppEmitter &emitter, Value source,
407 SmallVector<Value, 4> indices,
408 std::string &access) {
409 auto memRefType = llvm::dyn_cast<MemRefType>(source.getType());
410 assert(memRefType &&
411 "cannot creating linearized expression for non-memref type");
412 ArrayRef<int64_t> stride = memRefType.getShape();
413
414 // The stride and indices size must match
415 if (stride.size() != indices.size() ||
416 static_cast<int64_t>(stride.size()) != memRefType.getRank())
417 return failure();
418
419 // A stride contains two parts:
420 int64_t numPart = 1; // for static shaped dims
421 std::string paramPart; // for dynamic shaped dims
422
423 SmallVector<std::string, 4> accessVec;
424 for (int dim = memRefType.getRank() - 1; dim >= 0; --dim) {
425 // All the indices in the access expression must already be emitted
426 if (!emitter.hasValueInScope(indices[dim]))
427 return failure();
428
429 // Form the access string for this dimension
430 std::string cur;
431 if (!paramPart.empty())
432 cur = paramPart + "*";
433 if (numPart > 1)
434 cur += std::to_string(numPart) + "*";
435 cur += emitter.getOrCreateName(indices[dim]);
436 accessVec.push_back(cur);
437
438 // Now update the numPart and paramPart to form the stride for the next
439 // dimension
440 if (memRefType.isDynamicDim(dim)) {
441 StringRef param = emitter.getMemRefDimParam(source, dim);
442 paramPart = param.str() + (paramPart.empty() ? "" : "*" + paramPart);
443 } else
444 numPart *= stride[dim];
445 }
446 // All the strides are in accessVec. Compose them
447 while (!accessVec.empty()) {
448 access += (access.empty() ? "" : "+") + accessVec.back();
449 accessVec.pop_back();
450 }
451 // If the access is empty, make '0' as default access
452 if (access.empty())
453 access = "0";
454
455 return success();
456}
457
458// Return true if the array accessed by this value is readonly
459static bool isReadOnly(Value read) {
460 return std::none_of(
461 read.getUsers().begin(), read.getUsers().end(),
462 [](auto *user) { return isa<vector::TransferWriteOp>(user); });
463}
464
465//===----------------------------------------------------------------------===//
466// Print non-AIE dialect ops
467//===----------------------------------------------------------------------===//
468
469// Get the loop trip count of the for operator
470static std::pair<bool, int64_t> getTripCount(scf::ForOp forOp) {
471 // If the upper and lower bounds are constant values, return the difference.
472 auto lb = forOp.getLowerBound().getDefiningOp<arith::ConstantOp>();
473 if (auto ub = forOp.getUpperBound().getDefiningOp<arith::ConstantOp>();
474 lb && ub) {
475 APInt ubValue = llvm::cast<IntegerAttr>(ub.getValue()).getValue();
476 APInt lbValue = llvm::cast<IntegerAttr>(lb.getValue()).getValue();
477 return std::make_pair(true,
478 ubValue.getSExtValue() - lbValue.getSExtValue());
479 }
480 return std::make_pair(false, 0);
481}
482
483// Get the loop step size of the for operator
484static std::pair<bool, int64_t> getStep(scf::ForOp forOp) {
485 if (auto step = forOp.getStep().getDefiningOp<arith::ConstantOp>()) {
486 APInt stepValue = llvm::cast<IntegerAttr>(step.getValue()).getValue();
487 return std::make_pair(true, stepValue.getSExtValue());
488 }
489 return std::make_pair(false, 0);
490}
491
492// Return the operator string of the Arith dialect binary operator
493template <typename T>
494static StringRef getOperator(T binOp) {
495 if (isa<arith::AddIOp>(binOp) || isa<arith::AddFOp>(binOp))
496 return " + ";
497 if (isa<arith::MulIOp>(binOp) || isa<arith::MulFOp>(binOp))
498 return " * ";
499 if (isa<arith::SubIOp>(binOp) || isa<arith::SubFOp>(binOp))
500 return " - ";
501 if (isa<arith::DivFOp>(binOp) || isa<arith::DivUIOp>(binOp) ||
502 isa<arith::DivSIOp>(binOp))
503 return " / ";
504 if (isa<arith::RemSIOp>(binOp))
505 return " % ";
506 if (isa<arith::CmpIOp>(binOp)) {
507 auto cmpOp = cast<arith::CmpIOp>(binOp);
508 switch (cmpOp.getPredicate()) {
509 case arith::CmpIPredicate::eq:
510 return " == ";
511 case arith::CmpIPredicate::ne:
512 return " != ";
513 case arith::CmpIPredicate::sge:
514 case arith::CmpIPredicate::uge:
515 return " >= ";
516 case arith::CmpIPredicate::sgt:
517 case arith::CmpIPredicate::ugt:
518 return " > ";
519 case arith::CmpIPredicate::sle:
520 case arith::CmpIPredicate::ule:
521 return " <= ";
522 case arith::CmpIPredicate::slt:
523 case arith::CmpIPredicate::ult:
524 return " < ";
525 }
526 }
527 llvm_unreachable("Cannot print the operation of binary operator");
528}
529
530// Print the Arith dialect binary operation
531template <typename T>
532static LogicalResult printOperation(CppEmitter &emitter, T binOp) {
533 if (failed(emitter.emitAssignPrefix(*binOp)))
534 return failure();
535 raw_indented_ostream &os = emitter.ostream();
536 auto lhs = binOp.getLhs();
537 if (!emitter.hasValueInScope(lhs))
538 return failure();
539 os << emitter.getOrCreateName(lhs);
540 os << getOperator(binOp);
541 auto rhs = binOp.getRhs();
542 if (!emitter.hasValueInScope(rhs))
543 return failure();
544 os << emitter.getOrCreateName(rhs);
545
546 return success();
547}
548
549// Print the ternary operation
550static LogicalResult printOperation(CppEmitter &emitter,
551 arith::SelectOp selectOp) {
552 if (failed(emitter.emitAssignPrefix(*selectOp)))
553 return failure();
554
555 auto cond = selectOp.getCondition();
556 if (!emitter.hasValueInScope(cond))
557 return failure();
558 auto tVal = selectOp.getTrueValue();
559 if (!emitter.hasValueInScope(tVal))
560 return failure();
561 auto fVal = selectOp.getFalseValue();
562 if (!emitter.hasValueInScope(fVal))
563 return failure();
564
565 raw_indented_ostream &os = emitter.ostream();
566 os << emitter.getOrCreateName(cond) << " ? " << emitter.getOrCreateName(tVal)
567 << " : " << emitter.getOrCreateName(fVal);
568
569 return success();
570}
571
572//===----------------------------------------------------------------------===//
573// Print AIE dialect ops
574//===----------------------------------------------------------------------===//
575
576// Print the AIE dialect UPD op
577static LogicalResult printOperation(CppEmitter &emitter, aievec::UPDOp updOp) {
578 Value source = updOp.getSource();
579 // If the source is not already emitted, error out
580 if (!emitter.hasValueInScope(source))
581 return failure();
582
583 // Construct the access expression using memref shape and indices
584 auto indices = updOp.getIndices();
585 std::string access;
586 if (failed(createLinearizedAccess(emitter, source, indices, access)))
587 return failure();
588
589 raw_indented_ostream &os = emitter.ostream();
590 Value result = updOp.getResult();
591 auto resultType = llvm::cast<VectorType>(result.getType());
592 int32_t vecSizeInBits = getVectorSizeInBits(resultType);
593 int32_t elementSizeInBits = getElementSizeInBits(resultType);
594
595 // If the UPD op had an offset, add it to the access expr
596 if (updOp.getOffset() != 0) {
597 if (std::abs(updOp.getOffset()) % elementSizeInBits)
598 return failure();
599 int32_t updOffset = updOp.getOffset() / elementSizeInBits;
600 access += updOffset > 0 ? " + " : " - ";
601 access += std::to_string(std::abs(updOffset));
602 }
603
604 // If the vector size to be loaded is less than or equal to 256, we
605 // can just do a direct memory copy. If the translation is for AIE2,
606 // this number should be doubled
607 if (vecSizeInBits <= (emitter.aie2() ? 1024 : 256)) {
608 // Print the lhs
609 if (failed(emitter.emitAssignPrefix(*updOp)))
610 return failure();
611 os << "*(";
612 if (failed(emitter.emitType(updOp->getLoc(), resultType)))
613 return failure();
614 os << " *)";
615 os << "(";
616 os << emitter.getOrCreateName(source);
617 if (!access.empty())
618 os << " + " << access;
619 os << ")";
620 } else {
621 Value vector = updOp.getVector();
622 // If this is the first upd op (between idx=0 and idx=1), then generate
623 // declaration
624 if (!vector) {
625 if (!emitter.shouldDeclareVariablesAtTop()) {
626 if (failed(emitter.emitVariableDeclaration(updOp->getResult(0), true)))
627 return failure();
628 }
629 } else {
630 if (!emitter.hasValueInScope(vector))
631 return failure();
632 emitter.setName(updOp->getResult(0), emitter.getOrCreateName(vector));
633 }
634
635 // The granularity of upd is 128/256/512 for 256/512/1024 bit values
636 int32_t granularity = vecSizeInBits == 256 ? 128
637 : vecSizeInBits == 512 ? 256
638 : 512;
639 // Create a vector type with number of lanes halved of the result
640 unsigned lanes = getVectorLaneSize(resultType);
641 assert(lanes % 2 == 0 &&
642 "The number of vector lanes of UPD result is not even");
643 SmallVector<int64_t, 4> updShape = {lanes / 2};
644 VectorType updType = VectorType::get(updShape, resultType.getElementType());
645
646 if (!emitter.hasValueInScope(result))
647 return failure();
648 // If the source array of upd is read-only, load from restrict pointer
649 bool readOnly = isReadOnly(source);
650 std::string restrictPrefix =
651 readOnly ? "r_" + emitter.getOrCreateName(result).str() + "_" : "";
652 // Create a restrict pointer
653 if (readOnly && !vector) {
654 if (failed(emitter.emitType(updOp->getLoc(), source.getType())))
655 return failure();
656 os << " " << restrictPrefix << emitter.getOrCreateName(source);
657 os << " = ";
658 os << emitter.getOrCreateName(source);
659 os << ";\n";
660 }
661 os << emitter.getOrCreateName(result);
662 os << " = ";
663 os << (granularity == 128 ? "upd_v"
664 : granularity == 256 ? "upd_w"
665 : "upd_x");
666 os << "(";
667 os << emitter.getOrCreateName(result);
668 os << ", ";
669 os << std::to_string(updOp.getIndex());
670 os << ", ";
671 os << "*(";
672 if (failed(emitter.emitType(updOp->getLoc(), updType)))
673 return failure();
674 os << " *)";
675 os << "(";
676 os << restrictPrefix << emitter.getOrCreateName(source);
677 if (!access.empty())
678 os << " + " << access;
679 os << ")";
680 os << ")";
681 }
682
683 return success();
684}
685
686// Print the UPS intrinsic
687static LogicalResult printOperation(CppEmitter &emitter, aievec::UPSOp upsOp) {
688 Value source = upsOp.getSource();
689 int32_t shift = upsOp.getShift();
690
691 raw_indented_ostream &os = emitter.ostream();
692
693 // Generate the initialization for the accumulator
694 if (failed(emitter.emitAssignPrefix(*upsOp, /*isAcc=*/true)))
695 return failure();
696
697 // The source vector should have already been emitted
698 if (!emitter.hasValueInScope(source))
699 return failure();
700
701 auto accType = llvm::cast<VectorType>(upsOp.getResult().getType());
702 unsigned lanes = getVectorLaneSize(accType);
703 Type eltType = accType.getElementType();
704
705 // If the underlying element types are float, then we do not really need a
706 // ups op. We can simply generate an assignment
707 if (!emitter.aie2() && llvm::isa<FloatType>(eltType)) {
708 os << emitter.getOrCreateName(source);
709 return success();
710 }
711
712 // Determine if it is lups or ups based on accumulator type
713 auto iType = llvm::dyn_cast<IntegerType>(eltType);
714 auto fType = llvm::dyn_cast<FloatType>(eltType);
715 if (iType) {
716 if (iType.getWidth() == 80)
717 os << "l";
718 }
719
720 if (iType && emitter.aie2()) {
721 os << "ups_to_v" << lanes << "acc" << iType.getWidth();
722 } else if (fType && emitter.aie2()) {
723 os << "ups_to_v16accfloat";
724 } else {
725 os << "ups";
726 }
727
728 os << "(";
729 os << emitter.getOrCreateName(source);
730 if (!(fType && emitter.aie2())) {
731 os << ", ";
732 os << std::to_string(shift);
733 }
734 os << ")";
735
736 return success();
737}
738
739// Generate the cast intrinsic for AIE2
740static LogicalResult printOperation(CppEmitter &emitter,
741 aievec::CastOp castOp) {
742 if (!emitter.aie2()) {
743 return failure();
744 }
745
746 // The source should have already been emitted
747 Value source = castOp.getSource();
748 if (!emitter.hasValueInScope(source))
749 return failure();
750
751 bool isResAcc = castOp.getIsResAcc();
752
753 // Generate the initialization for the vector
754 if (failed(emitter.emitAssignPrefix(*castOp, /*isAcc=*/isResAcc)))
755 return failure();
756
757 // Get the datatype of the source and result vector
758 auto resType = llvm::cast<VectorType>(castOp->getResult(0).getType());
759 Type eltType = resType.getElementType();
760 unsigned lanes = getVectorLaneSize(resType);
761
762 raw_indented_ostream &os = emitter.ostream();
763
764 unsigned width;
765 if (isResAcc) {
766 if (llvm::isa<FloatType>(eltType))
767 os << "v" << lanes << "accfloat";
768 else {
769 width = getElementSizeInBits(resType);
770 os << "v" << lanes << "acc" << width;
771 }
772 } else if (llvm::isa<FloatType>(eltType)) {
773 width = llvm::cast<FloatType>(eltType).getWidth();
774 os << "v" << lanes;
775 if (width == 16)
776 os << "bfloat16";
777 else
778 os << "float";
779 } else {
780 width = getElementSizeInBits(resType);
781 os << "v" << lanes << "int" << width;
782 }
783 os << "(";
784 os << emitter.getOrCreateName(source);
785 os << ")";
786 return success();
787}
788
789// Generate the unpack intrinsic for AIE2
790static LogicalResult printOperation(CppEmitter &emitter,
791 aievec::UnpackOp unpackOp) {
792
793 // The source should have already been emitted
794 Value source = unpackOp.getSource();
795 if (!emitter.hasValueInScope(source))
796 return failure();
797
798 // Generate the initialization for the vector
799 if (failed(emitter.emitAssignPrefix(*unpackOp, /*isAcc=*/false)))
800 return failure();
801
802 raw_indented_ostream &os = emitter.ostream();
803
804 os << "unpack(";
805 os << emitter.getOrCreateName(source);
806 os << ")";
807 return success();
808}
809
810// Generate the srs intrinsic
811static LogicalResult printOperation(CppEmitter &emitter, aievec::SRSOp srsOp) {
812 Value source = srsOp.getSource();
813 Value shift = srsOp.getShift();
814
815 // Get the datatype of the source accumulator and result vector
816 auto accType = llvm::cast<VectorType>(srsOp.getSource().getType());
817 auto resType = llvm::cast<VectorType>(srsOp->getResult(0).getType());
818 Type eltType = accType.getElementType();
819 unsigned lanes = getVectorLaneSize(resType);
820
821 raw_indented_ostream &os = emitter.ostream();
822
823 // Generate the initialization for the vector
824 if (failed(emitter.emitAssignPrefix(*srsOp)))
825 return failure();
826
827 // The source accumulator should have already been emitted
828 if (!emitter.hasValueInScope(source))
829 return failure();
830
831 // If the underlying element types are float, then we do not really need an
832 // srs op. We can simply generate an assignment
833 if (llvm::isa<FloatType>(eltType)) {
834 if (emitter.aie2()) {
835 if (unsigned width = getElementSizeInBits(resType); width == 32)
836 os << "srs";
837 else if (width == 16)
838 os << "to_v16bfloat16";
839 os << "(";
840 os << emitter.getOrCreateName(source);
841 os << ")";
842 } else
843 os << emitter.getOrCreateName(source);
844
845 return success();
846 }
847
848 // Otheriwse, get the datatype width of the source accumulator and result
849 // vector
850 unsigned resultWidth = getElementSizeInBits(accType);
851 unsigned resWidth = getElementSizeInBits(resType);
852 unsigned srcWidth = 0;
853 if (auto iType = llvm::dyn_cast<IntegerType>(eltType))
854 srcWidth = iType.getWidth();
855
856 // Based on the datatypes, generate srs version
857 if ((srcWidth == 80 && resultWidth == 64) ||
858 (srcWidth == 48 && resultWidth == 32))
859 os << "l";
860 else if (srcWidth == 48 && resultWidth == 8)
861 os << "b";
862
863 if (emitter.aie2())
864 os << "srs_to_v" << std::to_string(lanes) << "int"
865 << std::to_string(resWidth);
866 else
867 os << "srs";
868
869 os << "(";
870 os << emitter.getOrCreateName(source);
871 os << ", ";
872 if (llvm::cast<IntegerType>(srsOp.getShift().getType()).getWidth() != 32)
873 os << "(int32_t)";
874 os << emitter.getOrCreateName(shift);
875 os << ")";
876
877 return success();
878}
879
880// Generate the broadcast intrinsic
881static LogicalResult printOperation(CppEmitter &emitter,
882 aievec::BroadcastOp broadcastOp) {
883 Value source = broadcastOp.getSource();
884 int8_t idx = broadcastOp.getIdx();
885
886 raw_indented_ostream &os = emitter.ostream();
887
888 // Generate the initialization for the vector
889 if (failed(emitter.emitAssignPrefix(*broadcastOp)))
890 return failure();
891
892 // The source vector should have already been emitted
893 if (!emitter.hasValueInScope(source))
894 return failure();
895
896 os << "broadcast_elem";
897 os << "(";
898 os << emitter.getOrCreateName(source);
899 os << ", ";
900 os << std::to_string(idx);
901 os << ")";
902
903 return success();
904}
905
906// Generate the broadcast_scalar intrinsic
907static LogicalResult
908printOperation(CppEmitter &emitter,
909 aievec::BroadcastScalarOp broadcastScalarOp) {
910 auto source = broadcastScalarOp.getSource();
911 auto resType =
912 llvm::cast<VectorType>(broadcastScalarOp.getResult().getType());
913 unsigned width = getElementSizeInBits(resType);
914 unsigned lanes = getVectorLaneSize(resType);
915 raw_indented_ostream &os = emitter.ostream();
916
917 // Generate the initialization for the vector
918 if (failed(emitter.emitAssignPrefix(*broadcastScalarOp)))
919 return failure();
920
921 Type eltType = resType.getElementType();
922 os << "broadcast_to_v";
923 if (llvm::isa<IntegerType>(eltType)) {
924 os << lanes << "int";
925 os << width;
926 } else if (width == 16)
927 os << lanes << "bfloat16";
928 else
929 os << lanes << "float";
930 os << "(" << emitter.getOrCreateName(source) << ")";
931
932 return success();
933}
934
935// Generate the ext intrinsic
936template <typename T>
937static LogicalResult printExtOperation(CppEmitter &emitter, T extOp) {
938 Value source = extOp.getSource();
939 int8_t index = extOp.getIndex();
940
941 raw_indented_ostream &os = emitter.ostream();
942
943 // Generate the initialization for the result
944 if (failed(emitter.emitAssignPrefix(*extOp)))
945 return failure();
946
947 if (!emitter.hasValueInScope(source))
948 return failure();
949
950 auto resType = llvm::cast<VectorType>(extOp.getResult().getType());
951 Type eltType = resType.getElementType();
952 unsigned lanes = getVectorLaneSize(resType);
953 unsigned resWidth = getElementSizeInBits(resType);
954
955 // Print the version of ext for AIE2
956 if (emitter.aie2()) {
957 os << "extract_v" << std::to_string(lanes);
958 if (llvm::isa<IntegerType>(eltType))
959 os << "int" << std::to_string(resWidth);
960 else if (resWidth == 16)
961 os << "bfloat16";
962 else
963 os << "float";
964 } else {
965 // Print the version of ext for aie1
966 int32_t vecSizeInBits = getVectorSizeInBits(resType);
967 assert(vecSizeInBits == 128 || vecSizeInBits == 256 ||
968 vecSizeInBits == 512);
969 os << (vecSizeInBits == 128 ? "ext_v"
970 : vecSizeInBits == 256 ? "ext_w"
971 : "ext_x");
972 }
973 os << "(";
974 // The source accumulator should have already been emitted
975 os << emitter.getOrCreateName(source);
976 os << ", ";
977 os << std::to_string(index);
978 os << ")";
979
980 return success();
981}
982
983// Generate the aie2 ext intrinsic
984static LogicalResult printOperation(CppEmitter &emitter, aievec::ExtOp extOp) {
985 if (!emitter.aie2())
986 return failure();
987 return printExtOperation<aievec::ExtOp>(emitter, extOp);
988}
989
990// Generate the aie1 ext intrinsic
991static LogicalResult printOperation(CppEmitter &emitter,
992 aievec::aie1::ExtOp extOp) {
993 if (emitter.aie2())
994 return failure();
995 return printExtOperation<aievec::aie1::ExtOp>(emitter, extOp);
996}
997
998// Generate the concat intrinsic
999static LogicalResult printOperation(CppEmitter &emitter,
1000 aievec::ConcatOp concatOp) {
1001 SmallVector<Value> sources = concatOp.getSources();
1002
1003 raw_indented_ostream &os = emitter.ostream();
1004
1005 // Generate the initialization for the result
1006 if (failed(emitter.emitAssignPrefix(*concatOp)))
1007 return failure();
1008
1009 os << "concat";
1010 os << "(";
1011 // Print the sources sources
1012 bool first = true;
1013 for (auto source : sources) {
1014 // source should have already been emitted
1015 if (!emitter.hasValueInScope(source))
1016 return failure();
1017 if (!first)
1018 os << ", ";
1019 os << emitter.getOrCreateName(source);
1020 first = false;
1021 }
1022 os << ")";
1023
1024 return success();
1025}
1026
1027// Generate the shift intrinsic
1028static LogicalResult printOperation(CppEmitter &emitter,
1029 aievec::ShiftOp shiftOp) {
1030 Value lhs = shiftOp.getLhs();
1031 Value rhs = shiftOp.getRhs();
1032 Value shift = shiftOp.getShift();
1033 bool isAcc = shiftOp.getIsAcc();
1034
1035 raw_indented_ostream &os = emitter.ostream();
1036
1037 // Generate the initialization for the result
1038 if (failed(emitter.emitAssignPrefix(*shiftOp, isAcc)))
1039 return failure();
1040
1041 os << "shift_bytes";
1042 os << "(";
1043 // Print the lhs, rhs and shift
1044 if (!emitter.hasValueInScope(lhs) || !emitter.hasValueInScope(rhs))
1045 return failure();
1046 os << emitter.getOrCreateName(lhs);
1047 os << ", ";
1048 os << emitter.getOrCreateName(rhs);
1049 os << ", static_cast<uint32_t>(";
1050
1051 if (!emitter.hasValueInScope(shift))
1052 return failure();
1053 os << emitter.getOrCreateName(shift);
1054 os << "))";
1055
1056 return success();
1057}
1058
1059// Generate the shuffle intrinsic
1060static LogicalResult printOperation(CppEmitter &emitter,
1061 aievec::ShuffleOp shuffleOp) {
1062 Value lhs = shuffleOp.getLhs();
1063 Value rhs = shuffleOp.getRhs();
1064 aievec::ShuffleMode mode = shuffleOp.getMode();
1065
1066 raw_indented_ostream &os = emitter.ostream();
1067
1068 // Generate the initialization for the result
1069 if (failed(emitter.emitAssignPrefix(*shuffleOp)))
1070 return failure();
1071
1072 os << "shuffle";
1073 os << "(";
1074 if (!emitter.hasValueInScope(lhs))
1075 return failure();
1076 os << emitter.getOrCreateName(lhs);
1077 os << ", ";
1078 if (rhs) {
1079 if (!emitter.hasValueInScope(rhs))
1080 return failure();
1081 os << emitter.getOrCreateName(rhs);
1082 os << ", ";
1083 }
1084 os << "eShuffleMode::shuffle_T" << stringifyEnum(mode).substr(1);
1085 os << ")";
1086
1087 return success();
1088}
1089
1090// Generate the shuffle intrinsic
1091static LogicalResult printOperation(CppEmitter &emitter,
1092 aievec::LegacyShuffleOp shuffleOp) {
1093 Value source = shuffleOp.getSource();
1094 unsigned mode = shuffleOp.getMode();
1095
1096 raw_indented_ostream &os = emitter.ostream();
1097
1098 // Generate the initialization for the result
1099 if (failed(emitter.emitAssignPrefix(*shuffleOp)))
1100 return failure();
1101
1102 os << "shuffle";
1103 os << "(";
1104 // Print the source and mode
1105 // source should have already been emitted
1106 if (!emitter.hasValueInScope(source))
1107 return failure();
1108 os << emitter.getOrCreateName(source);
1109 os << ", ";
1110 os << std::to_string(mode);
1111 os << ")";
1112
1113 return success();
1114}
1115
1116// Generate the select intrinsic
1117static LogicalResult printOperation(CppEmitter &emitter,
1118 aievec::aie1::SelectOp selectOp) {
1119 Value xbuff = selectOp.getXbuff();
1120 assert(xbuff && "xbuff empty in select op");
1121
1122 raw_indented_ostream &os = emitter.ostream();
1123
1124 // Generate the initialization for the result
1125 if (failed(emitter.emitAssignPrefix(*selectOp)))
1126 return failure();
1127
1128 // Determine if we want to geneate select32, or select16, or select8
1129 auto xbuffType = llvm::cast<VectorType>(selectOp.getXbuff().getType());
1130 int32_t elementSizeInBits = getElementSizeInBits(xbuffType);
1131 assert(elementSizeInBits == 16 || elementSizeInBits == 32 ||
1132 elementSizeInBits == 64);
1133 // Print name
1134 os << (elementSizeInBits == 16 ? "select32"
1135 : elementSizeInBits == 32 ? "select16"
1136 : "select8");
1137 os << "(";
1138 // Print select bits
1139 assert(!selectOp.getSelect().empty());
1140 os << selectOp.getSelect();
1141 // xbuff should have already been emitted
1142 if (!emitter.hasValueInScope(xbuff))
1143 return failure();
1144 // Print xbuff
1145 os << ", ";
1146 os << emitter.getOrCreateName(xbuff);
1147 // Print attributes related to lower lane selection
1148 if (!selectOp.getXstart().empty())
1149 os << ", " << selectOp.getXstart();
1150 if (!selectOp.getXoffsets().empty())
1151 os << ", " << selectOp.getXoffsets();
1152 if (!selectOp.getXoffsetsHi().empty())
1153 os << ", " << selectOp.getXoffsetsHi();
1154 if (!selectOp.getXsquare().empty())
1155 os << ", " << selectOp.getXsquare();
1156 // If ybuff is not null, print it
1157 if (selectOp.getYbuff()) {
1158 Value ybuff = selectOp.getYbuff();
1159 // ybuff should have already been emitted
1160 if (!emitter.hasValueInScope(ybuff))
1161 return failure();
1162 // Print ybuff
1163 os << ", ";
1164 os << emitter.getOrCreateName(ybuff);
1165 }
1166 // Print attributes related to higher lane selection
1167 if (!selectOp.getYstart().empty())
1168 os << ", " << selectOp.getYstart();
1169 if (!selectOp.getYoffsets().empty())
1170 os << ", " << selectOp.getYoffsets();
1171 if (!selectOp.getYoffsetsHi().empty())
1172 os << ", " << selectOp.getYoffsetsHi();
1173 if (!selectOp.getYsquare().empty())
1174 os << ", " << selectOp.getYsquare();
1175 os << ")";
1176
1177 return success();
1178}
1179
1180// Generate the pack intrinsic
1181static LogicalResult printOperation(CppEmitter &emitter,
1182 aievec::PackOp packOp) {
1183 Value source = packOp.getSource();
1184
1185 raw_indented_ostream &os = emitter.ostream();
1186
1187 // Generate the initialization for the result
1188 if (failed(emitter.emitAssignPrefix(*packOp)))
1189 return failure();
1190
1191 // Determine the flavor of result
1192 auto sourceType = llvm::cast<VectorType>(packOp.getSource().getType());
1193 Type scalarType = sourceType.getElementType();
1194 os << (scalarType.isUnsignedInteger() ? "upack" : "pack");
1195 os << "(";
1196 // source should have already been emitted
1197 if (!emitter.hasValueInScope(source))
1198 return failure();
1199 os << emitter.getOrCreateName(source);
1200 os << ")";
1201
1202 return success();
1203}
1204
1205// Print lhs or rhs operand of add/sub intrinsic
1206template <typename T>
1207static LogicalResult printAddOrSubOperand(CppEmitter &emitter, T op,
1208 unsigned opNum) {
1209 // We currently only support printing operands 0 and 1
1210 if (opNum > 1)
1211 return failure();
1212
1213 // The operand should have already been emitted
1214 Value operand = opNum == 0 ? op.getLhs() : op.getRhs();
1215 if (!emitter.hasValueInScope(operand))
1216 return failure();
1217
1218 raw_indented_ostream &os = emitter.ostream();
1219
1220 StringRef start = op.getStart(opNum);
1221 StringRef offset = op.getOffset(opNum);
1222 StringRef offsetHi = op.getOffsetHi(opNum);
1223 StringRef square = op.getSquare(opNum);
1224
1225 os << emitter.getOrCreateName(operand);
1226 if (!start.empty())
1227 os << ", " << start;
1228 if (!offset.empty())
1229 os << ", " << offset;
1230 if (!offsetHi.empty())
1231 os << ", " << offsetHi;
1232 if (!square.empty())
1233 os << ", " << square;
1234
1235 return success();
1236}
1237
1238// Print lhs or rhs operand of min/max intrinsic
1239template <typename T>
1240static LogicalResult printMinMaxOperand(CppEmitter &emitter, T op,
1241 unsigned opNum) {
1242 // We currently only support printing operands 0 and 1
1243 if (opNum > 1)
1244 return failure();
1245
1246 // The operand should have already been emitted
1247 Value operand = opNum == 0 ? op.getLhs() : op.getRhs();
1248 if (!emitter.hasValueInScope(operand))
1249 return failure();
1250
1251 raw_indented_ostream &os = emitter.ostream();
1252 os << emitter.getOrCreateName(operand);
1253
1254 return success();
1255}
1256
1257// Print lhs or rhs operand of add_elem/sub_elem intrinsic
1258template <typename T>
1259static LogicalResult printAddElemOrSubElemOperand(CppEmitter &emitter, T op,
1260 unsigned opNum) {
1261 // We currently only support printing operands 0 and 1
1262 if (opNum > 1)
1263 return failure();
1264
1265 // The operand should have already been emitted
1266 Value operand = opNum == 0 ? op.getLhs() : op.getRhs();
1267 if (!emitter.hasValueInScope(operand))
1268 return failure();
1269
1270 raw_indented_ostream &os = emitter.ostream();
1271 os << emitter.getOrCreateName(operand);
1272
1273 return success();
1274}
1275
1276// Print lhs or rhs operand of mul/mac intrinsic
1277template <typename T>
1278static LogicalResult printFMAOrMulOperand(CppEmitter &emitter, T op,
1279 unsigned opNum) {
1280 // We currently only support printing operands 0 and 1
1281 if (opNum > 1)
1282 return failure();
1283
1284 // The operand should have already been emitted
1285 Value operand = opNum == 0 ? op.getLhs() : op.getRhs();
1286 if (!emitter.hasValueInScope(operand))
1287 return failure();
1288
1289 raw_indented_ostream &os = emitter.ostream();
1290
1291 StringRef start = op.getStart(opNum);
1292 StringRef offset = op.getOffset(opNum);
1293 StringRef offsetHi = op.getOffsetHi(opNum);
1294 StringRef step = op.getStep(opNum);
1295 StringRef square = op.getSquare(opNum);
1296
1297 os << emitter.getOrCreateName(operand);
1298 if (!start.empty())
1299 os << ", " << start;
1300 if (!offset.empty())
1301 os << ", " << offset;
1302 if (!offsetHi.empty())
1303 os << ", " << offsetHi;
1304 if (!step.empty())
1305 os << ", " << step;
1306 if (!square.empty())
1307 os << ", " << square;
1308
1309 return success();
1310}
1311
1312// Print lhs or rhs operand of mul_elem/mac_elem intrinsic
1313template <typename T>
1314static LogicalResult printFMAOrMulElemOperand(CppEmitter &emitter, T op,
1315 Type iType, int32_t size,
1316 unsigned opNum) {
1317 // We currently only support printing operands 0 and 1
1318 if (opNum > 1)
1319 return failure();
1320
1321 // The operand should have already been emitted
1322 Value operand = opNum == 0 ? op.getLhs() : op.getRhs();
1323 if (!emitter.hasValueInScope(operand))
1324 return failure();
1325
1326 raw_indented_ostream &os = emitter.ostream();
1327 os << emitter.getOrCreateName(operand);
1328 if (size == 32 && iType)
1329 os << ", " << (opNum == 0 ? "undef_v16int32()" : "broadcast_zero_s32()");
1330
1331 return success();
1332}
1333
1334// Print lhs or rhs operand of mul_conv/mac_conv intrinsic
1335template <typename T>
1336static LogicalResult printFMAOrMulConvOperand(CppEmitter &emitter, T op,
1337 unsigned opNum) {
1338 // We currently only support printing operands 0 and 1
1339 if (opNum > 1)
1340 return failure();
1341
1342 // The operand should have already been emitted
1343 Value operand = opNum == 0 ? op.getLhs() : op.getRhs();
1344 if (!emitter.hasValueInScope(operand))
1345 return failure();
1346
1347 raw_indented_ostream &os = emitter.ostream();
1348 os << emitter.getOrCreateName(operand);
1349
1350 return success();
1351}
1352
1353// Generate the Mul op
1354static LogicalResult printOperation(CppEmitter &emitter,
1355 aievec::aie1::MulOp mulOp) {
1356 auto lhs = mulOp.getLhs();
1357 auto rhs = mulOp.getRhs();
1358
1359 // The sources should have already been emitted
1360 if (!emitter.hasValueInScope(lhs) || !emitter.hasValueInScope(rhs))
1361 return failure();
1362
1363 // Determine if the mul scheme is simple or complex
1364 bool simpleScheme = mulOp.getStart(0).empty();
1365
1366 std::string opname;
1367 // Create opname based on the result type
1368 auto resType = llvm::cast<VectorType>(mulOp.getResult().getType());
1369 Type eltType = resType.getElementType();
1370 if (!simpleScheme) {
1371 if (auto iType = llvm::dyn_cast<IntegerType>(eltType)) {
1372 if (iType.getWidth() == 80)
1373 opname = "l";
1374 } else if (llvm::isa<FloatType>(eltType))
1375 opname = "fp";
1376 }
1377
1378 opname += "mul";
1379 if (!simpleScheme && !llvm::isa<FloatType>(eltType))
1380 opname += std::to_string(getVectorLaneSize(resType));
1381
1382 raw_indented_ostream &os = emitter.ostream();
1383
1384 // Generate the initialization for the accumulator
1385 if (failed(emitter.emitAssignPrefix(*mulOp)))
1386 return failure();
1387
1388 os << opname;
1389 os << "(";
1390 if (failed(printFMAOrMulOperand<aievec::aie1::MulOp>(emitter, mulOp, 0)))
1391 return failure();
1392 os << ", ";
1393 if (failed(printFMAOrMulOperand<aievec::aie1::MulOp>(emitter, mulOp, 1)))
1394 return failure();
1395 os << ")";
1396
1397 return success();
1398}
1399// convert operand to 512 bits
1400static std::string printConversionTo512bit(CppEmitter &emitter, Value v) {
1401 std::string vName = emitter.getOrCreateName(v).str();
1402 auto vTy = cast<VectorType>(v.getType());
1403 auto vShape = vTy.getShape();
1404 int64_t elemBitWidth = vTy.getElementTypeBitWidth();
1405 int64_t numElems = std::accumulate(vShape.begin(), vShape.end(), 1,
1406 std::multiplies<int64_t>());
1407 int64_t vBitWidth = numElems * elemBitWidth;
1408 if (vBitWidth >= 512)
1409 return vName;
1410
1411 int64_t newNumElems = 512 / elemBitWidth;
1412
1413 std::string vNewName = emitter.getNewName();
1414 raw_indented_ostream &os = emitter.ostream();
1415 auto newVecTy = VectorType::get({512 / elemBitWidth}, vTy.getElementType());
1416 auto newTyName = *(
1417 emitter.genCppTypeName(newVecTy, /*stdintType=*/false, /*isAcc=*/false));
1418 auto oldTyName =
1419 *(emitter.genCppTypeName(vTy, /*stdintType=*/false, /*isAcc=*/false));
1420
1421 os << newTyName << " " << vNewName << " = concat(";
1422 if (newNumElems / numElems == 4) {
1423 os << "concat(" << vName << ", undef_" << oldTyName << "())";
1424 oldTyName = *(emitter.genCppTypeName(
1425 VectorType::get({256 / elemBitWidth}, vTy.getElementType())));
1426 } else {
1427 os << vName;
1428 }
1429 os << ", undef_" << oldTyName << "());\n";
1430 return vNewName;
1431}
1432
1433// Generate the MulElem op
1434static LogicalResult printOperation(CppEmitter &emitter,
1435 aievec::MulElemOp mulElemOp) {
1436 auto lhs = mulElemOp.getLhs();
1437 auto rhs = mulElemOp.getRhs();
1438
1439 // The sources should have already been emitted
1440 if (!emitter.hasValueInScope(lhs) || !emitter.hasValueInScope(rhs))
1441 return failure();
1442
1443 auto lhsName = printConversionTo512bit(emitter, lhs);
1444 auto rhsName = printConversionTo512bit(emitter, rhs);
1445
1446 std::string opname = "mul_elem";
1447
1448 // Create opname based on the source type
1449 auto lhsType = llvm::cast<VectorType>(mulElemOp.getLhs().getType());
1450 Type eltType = lhsType.getElementType();
1451 int32_t lsize = getElementSizeInBits(lhsType);
1452 auto iType = llvm::dyn_cast<IntegerType>(eltType);
1453
1454 if (iType) {
1455 if (lsize == 32)
1456 opname += "_16_2";
1457 else if (lsize == 16)
1458 opname += "_32";
1459 else if (lsize == 8)
1460 opname += "_32_2";
1461 } else if (llvm::isa<FloatType>(eltType)) {
1462 if (lsize == 32)
1463 opname += "_16";
1464 else if (lsize == 16)
1465 opname += "_16_2";
1466 }
1467
1468 raw_indented_ostream &os = emitter.ostream();
1469
1470 // Generate the initialization for the accumulator
1471 if (failed(emitter.emitAssignPrefix(*mulElemOp, true /*isAcc*/)))
1472 return failure();
1473
1474 os << opname;
1475 os << "(" << lhsName;
1476 if ((lsize == 32) && iType)
1477 os << " ,"
1478 << "undef_v16int32()";
1479 os << " ," << rhsName;
1480 if ((lsize == 32) && iType)
1481 os << " , "
1482 << "broadcast_zero_s32()";
1483 os << ")";
1484 return success();
1485}
1486
1487// Generate the MulConv op
1488static LogicalResult printOperation(CppEmitter &emitter,
1489 aievec::MulConvOp mulConvOp) {
1490 auto lhs = mulConvOp.getLhs();
1491 auto rhs = mulConvOp.getRhs();
1492
1493 // The sources should have already been emitted
1494 if (!emitter.hasValueInScope(lhs) || !emitter.hasValueInScope(rhs))
1495 return failure();
1496
1497 // Create opname based on the source type
1498 auto lhsType = llvm::cast<VectorType>(mulConvOp.getLhs().getType());
1499 Type eltType = lhsType.getElementType();
1500 int32_t lsize = getElementSizeInBits(lhsType);
1501 auto iType = llvm::dyn_cast<IntegerType>(eltType);
1502
1503 // Only support int16 and int8 cases
1504 if (!iType || !(lsize == 16 || lsize == 8)) {
1505 return failure();
1506 }
1507
1508 int32_t M = mulConvOp.getM();
1509 int32_t N = mulConvOp.getN();
1510 std::string opname =
1511 "mul_conv_" + std::to_string(M) + "x" + std::to_string(N);
1512
1513 raw_indented_ostream &os = emitter.ostream();
1514
1515 // Generate the initialization for the accumulator
1516 if (failed(emitter.emitAssignPrefix(*mulConvOp, true /*isAcc*/)))
1517 return failure();
1518
1519 os << opname;
1520 os << "(";
1521
1522 if (failed(
1523 printFMAOrMulConvOperand<aievec::MulConvOp>(emitter, mulConvOp, 0)))
1524 return failure();
1525 os << ", ";
1526 if (failed(
1527 printFMAOrMulConvOperand<aievec::MulConvOp>(emitter, mulConvOp, 1)))
1528 return failure();
1529 os << ")";
1530
1531 return success();
1532}
1533
1534// Generate the Add op
1535static LogicalResult printOperation(CppEmitter &emitter,
1536 aievec::aie1::AddOp addOp) {
1537 auto lhs = addOp.getLhs();
1538 auto rhs = addOp.getRhs();
1539
1540 // The sources should have already been emitted
1541 if (!emitter.hasValueInScope(lhs) || !emitter.hasValueInScope(rhs))
1542 return failure();
1543
1544 raw_indented_ostream &os = emitter.ostream();
1545
1546 // Generate the initialization for the result
1547 if (failed(emitter.emitAssignPrefix(*addOp)))
1548 return failure();
1549
1550 // Get the scalar type of result vector
1551 auto resultType = llvm::cast<VectorType>(addOp.getResult().getType());
1552 unsigned lanes = getVectorLaneSize(resultType);
1553 Type elementType = resultType.getElementType();
1554 bool floatType = llvm::isa<FloatType>(elementType);
1555
1556 // Detemine if the add scheme is simple or complex
1557
1558 if (addOp.getStart(0).empty()) {
1559 // Handle float type operation
1560 if (floatType) {
1561 os << "fpadd";
1562 os << "(";
1563 os << emitter.getOrCreateName(lhs);
1564 os << ", ";
1565 os << emitter.getOrCreateName(rhs);
1566 os << ")";
1567 }
1568 // Otherwise we can simply print this as overloaded +
1569 else {
1570 os << emitter.getOrCreateName(lhs);
1571 os << " + ";
1572 os << emitter.getOrCreateName(rhs);
1573 }
1574 return success();
1575 }
1576 // Otherwise this is complex scheme
1577 os << (floatType ? "fpadd" : "add" + std::to_string(lanes));
1578 os << "(";
1579 if (failed(printAddOrSubOperand<aievec::aie1::AddOp>(emitter, addOp, 0)))
1580 return failure();
1581 os << ", ";
1582 if (failed(printAddOrSubOperand<aievec::aie1::AddOp>(emitter, addOp, 1)))
1583 return failure();
1584 os << ")";
1585
1586 return success();
1587}
1588
1589// Generate the Sub op
1590static LogicalResult printOperation(CppEmitter &emitter,
1591 aievec::aie1::SubOp subOp) {
1592 auto lhs = subOp.getLhs();
1593 auto rhs = subOp.getRhs();
1594
1595 // The sources should have already been emitted
1596 if (!emitter.hasValueInScope(lhs) || !emitter.hasValueInScope(rhs))
1597 return failure();
1598
1599 raw_indented_ostream &os = emitter.ostream();
1600
1601 // Generate the initialization for the result
1602 if (failed(emitter.emitAssignPrefix(*subOp)))
1603 return failure();
1604
1605 // Get the scalar type of result vector
1606 auto resultType = llvm::cast<VectorType>(subOp.getResult().getType());
1607 unsigned lanes = getVectorLaneSize(resultType);
1608 Type elementType = resultType.getElementType();
1609 bool floatType = llvm::isa<FloatType>(elementType);
1610
1611 // Detemine if the sub scheme is simple or complex
1612
1613 if (subOp.getStart(0).empty()) {
1614 // Handle float type operation
1615 if (floatType) {
1616 os << "fpsub";
1617 os << "(";
1618 os << emitter.getOrCreateName(lhs);
1619 os << ", ";
1620 os << emitter.getOrCreateName(rhs);
1621 os << ")";
1622 }
1623 // Otherwise we can simply print this as overloaded -
1624 else {
1625 os << emitter.getOrCreateName(lhs);
1626 os << " - ";
1627 os << emitter.getOrCreateName(rhs);
1628 }
1629 return success();
1630 }
1631 // Otherwise this is complex scheme
1632 os << (floatType ? "fpsub" : "sub" + std::to_string(lanes));
1633 os << "(";
1634 if (failed(printAddOrSubOperand<aievec::aie1::SubOp>(emitter, subOp, 0)))
1635 return failure();
1636 os << ", ";
1637 if (failed(printAddOrSubOperand<aievec::aie1::SubOp>(emitter, subOp, 1)))
1638 return failure();
1639 os << ")";
1640
1641 return success();
1642}
1643
1644// Generate the Min op
1645static LogicalResult printOperation(CppEmitter &emitter, aievec::MinOp minOp) {
1646 auto lhs = minOp.getLhs();
1647 auto rhs = minOp.getRhs();
1648
1649 // The sources should have already been emitted
1650 if (!emitter.hasValueInScope(lhs) || !emitter.hasValueInScope(rhs))
1651 return failure();
1652
1653 raw_indented_ostream &os = emitter.ostream();
1654
1655 // Generate the initialization for the result
1656 if (failed(emitter.emitAssignPrefix(*minOp)))
1657 return failure();
1658
1659 os << "min(";
1660 if (failed(printMinMaxOperand<aievec::MinOp>(emitter, minOp, 0)))
1661 return failure();
1662 os << ", ";
1663 if (failed(printMinMaxOperand<aievec::MinOp>(emitter, minOp, 1)))
1664 return failure();
1665 os << ")";
1666
1667 return success();
1668}
1669
1670// Generate the Max op
1671static LogicalResult printOperation(CppEmitter &emitter, aievec::MaxOp maxOp) {
1672 auto lhs = maxOp.getLhs();
1673 auto rhs = maxOp.getRhs();
1674
1675 // The sources should have already been emitted
1676 if (!emitter.hasValueInScope(lhs) || !emitter.hasValueInScope(rhs))
1677 return failure();
1678
1679 raw_indented_ostream &os = emitter.ostream();
1680
1681 // Generate the initialization for the result
1682 if (failed(emitter.emitAssignPrefix(*maxOp)))
1683 return failure();
1684
1685 os << "max(";
1686 if (failed(printMinMaxOperand<aievec::MaxOp>(emitter, maxOp, 0)))
1687 return failure();
1688 os << ", ";
1689 if (failed(printMinMaxOperand<aievec::MaxOp>(emitter, maxOp, 1)))
1690 return failure();
1691 os << ")";
1692
1693 return success();
1694}
1695
1696// Generate the Neg op
1697static LogicalResult printOperation(CppEmitter &emitter, aievec::NegOp negOp) {
1698 auto src = negOp.getSource();
1699
1700 // The source should have already been emitted
1701 if (!emitter.hasValueInScope(src))
1702 return failure();
1703
1704 raw_indented_ostream &os = emitter.ostream();
1705
1706 // Generate the initialization for the result
1707 if (failed(emitter.emitAssignPrefix(*negOp, true /*isAcc*/)))
1708 return failure();
1709
1710 os << "neg(";
1711 os << emitter.getOrCreateName(src);
1712 os << ")";
1713
1714 return success();
1715}
1716
1717// Generate the Bneg op
1718static LogicalResult printOperation(CppEmitter &emitter,
1719 aievec::BnegOp bnegOp) {
1720 auto src = bnegOp.getSource();
1721
1722 // The source should have already been emitted
1723 if (!emitter.hasValueInScope(src))
1724 return failure();
1725
1726 raw_indented_ostream &os = emitter.ostream();
1727
1728 // Generate the initialization for the result
1729 if (failed(emitter.emitAssignPrefix(*bnegOp)))
1730 return failure();
1731
1732 os << "bneg(";
1733 os << emitter.getOrCreateName(src);
1734 os << ")";
1735
1736 return success();
1737}
1738
1739// Generate the Bxor op
1740static LogicalResult printOperation(CppEmitter &emitter, aievec::BxorOp xorOp) {
1741 auto lhs = xorOp.getLhs();
1742 auto rhs = xorOp.getRhs();
1743
1744 // The source should have already been emitted
1745 if (!emitter.hasValueInScope(lhs) || !emitter.hasValueInScope(rhs))
1746 return failure();
1747
1748 raw_indented_ostream &os = emitter.ostream();
1749
1750 // Generate the initialization for the result
1751 if (failed(emitter.emitAssignPrefix(*xorOp)))
1752 return failure();
1753
1754 os << "bxor(";
1755 os << emitter.getOrCreateName(lhs);
1756 os << ", ";
1757 os << emitter.getOrCreateName(rhs);
1758 os << ")";
1759
1760 return success();
1761}
1762
1763// Generate the Band op
1764static LogicalResult printOperation(CppEmitter &emitter, aievec::BandOp andOp) {
1765 auto lhs = andOp.getLhs();
1766 auto rhs = andOp.getRhs();
1767
1768 // The source should have already been emitted
1769 if (!emitter.hasValueInScope(lhs) || !emitter.hasValueInScope(rhs))
1770 return failure();
1771
1772 raw_indented_ostream &os = emitter.ostream();
1773
1774 // Generate the initialization for the result
1775 if (failed(emitter.emitAssignPrefix(*andOp)))
1776 return failure();
1777
1778 os << "band(";
1779 os << emitter.getOrCreateName(lhs);
1780 os << ", ";
1781 os << emitter.getOrCreateName(rhs);
1782 os << ")";
1783
1784 return success();
1785}
1786
1787// Generate the Bor op
1788static LogicalResult printOperation(CppEmitter &emitter, aievec::BorOp orOp) {
1789 auto lhs = orOp.getLhs();
1790 auto rhs = orOp.getRhs();
1791
1792 // The source should have already been emitted
1793 if (!emitter.hasValueInScope(lhs) || !emitter.hasValueInScope(rhs))
1794 return failure();
1795
1796 raw_indented_ostream &os = emitter.ostream();
1797
1798 // Generate the initialization for the result
1799 if (failed(emitter.emitAssignPrefix(*orOp)))
1800 return failure();
1801
1802 os << "bor(";
1803 os << emitter.getOrCreateName(lhs);
1804 os << ", ";
1805 os << emitter.getOrCreateName(rhs);
1806 os << ")";
1807
1808 return success();
1809}
1810
1811// Generate the AddElem op
1812static LogicalResult printOperation(CppEmitter &emitter,
1813 aievec::AddElemOp addElemOp) {
1814 auto lhs = addElemOp.getLhs();
1815 auto rhs = addElemOp.getRhs();
1816
1817 // The sources should have already been emitted
1818 if (!emitter.hasValueInScope(lhs) || !emitter.hasValueInScope(rhs))
1819 return failure();
1820
1821 raw_indented_ostream &os = emitter.ostream();
1822
1823 // Generate the initialization for the result
1824 // FIXME: move the logic to the op creation and add isAcc to the op attribute
1825 bool isAcc = false;
1826 auto resType = cast<VectorType>(addElemOp.getResult().getType());
1827 auto resElemType = resType.getElementType();
1828 unsigned resBitWidth = resElemType.getIntOrFloatBitWidth();
1829 unsigned resLaneSize = getVectorLaneSize(resType);
1830 if (isa<FloatType>(resElemType) || resBitWidth * resLaneSize == 1024)
1831 isAcc = true;
1832
1833 if (failed(emitter.emitAssignPrefix(*addElemOp, /*isAcc=*/isAcc)))
1834 return failure();
1835
1836 os << "add(";
1837 if (failed(printAddElemOrSubElemOperand<aievec::AddElemOp>(emitter, addElemOp,
1838 0)))
1839 return failure();
1840 os << ", ";
1841 if (failed(printAddElemOrSubElemOperand<aievec::AddElemOp>(emitter, addElemOp,
1842 1)))
1843 return failure();
1844 os << ")";
1845
1846 return success();
1847}
1848
1849// Generate the SubElem op
1850static LogicalResult printOperation(CppEmitter &emitter,
1851 aievec::SubElemOp subElemOp) {
1852 auto lhs = subElemOp.getLhs();
1853 auto rhs = subElemOp.getRhs();
1854
1855 // The sources should have already been emitted
1856 if (!emitter.hasValueInScope(lhs) || !emitter.hasValueInScope(rhs))
1857 return failure();
1858
1859 raw_indented_ostream &os = emitter.ostream();
1860
1861 // Generate the initialization for the result
1862 // FIXME: move the logic to the op creation and add isAcc to the op attribute
1863 bool isAcc = false;
1864 auto resType = cast<VectorType>(subElemOp.getResult().getType());
1865 auto resElemType = resType.getElementType();
1866 unsigned resBitWidth = resElemType.getIntOrFloatBitWidth();
1867 unsigned resLaneSize = getVectorLaneSize(resType);
1868 if (isa<FloatType>(resElemType) || resBitWidth * resLaneSize == 1024)
1869 isAcc = true;
1870
1871 if (failed(emitter.emitAssignPrefix(*subElemOp, /*isAcc=*/isAcc)))
1872 return failure();
1873
1874 os << "sub(";
1875 if (failed(printAddElemOrSubElemOperand<aievec::SubElemOp>(emitter, subElemOp,
1876 0)))
1877 return failure();
1878 os << ", ";
1879 if (failed(printAddElemOrSubElemOperand<aievec::SubElemOp>(emitter, subElemOp,
1880 1)))
1881 return failure();
1882 os << ")";
1883
1884 return success();
1885}
1886
1887// Generate the FMA op
1888static LogicalResult printOperation(CppEmitter &emitter,
1889 aievec::aie1::FMAOp fmaOp) {
1890 auto acc = fmaOp.getAcc();
1891 auto lhs = fmaOp.getLhs();
1892 auto rhs = fmaOp.getRhs();
1893
1894 // The sources should have already been emitted
1895 if (!emitter.hasValueInScope(acc) || !emitter.hasValueInScope(lhs) ||
1896 !emitter.hasValueInScope(rhs))
1897 return failure();
1898
1899 // Detemine if the mul scheme is simple or complex
1900 bool simpleScheme = fmaOp.getStart(0).empty();
1901
1902 std::string opname;
1903 // Create opname based on the result type
1904 auto resType = llvm::cast<VectorType>(fmaOp.getResult().getType());
1905 Type eltType = resType.getElementType();
1906 if (!simpleScheme) {
1907 if (auto iType = llvm::dyn_cast<IntegerType>(eltType)) {
1908 if (iType.getWidth() == 80)
1909 opname = "l";
1910 } else if (llvm::isa<FloatType>(eltType))
1911 opname = "fp";
1912 }
1913
1914 opname += fmaOp.getFmsub() ? "msc" : "mac";
1915 if (!simpleScheme && !llvm::isa<FloatType>(eltType))
1916 opname += std::to_string(getVectorLaneSize(resType));
1917
1918 raw_indented_ostream &os = emitter.ostream();
1919
1920 StringRef accName = emitter.getOrCreateName(acc);
1921 os << accName;
1922 os << " = ";
1923 os << opname;
1924 os << "(";
1925 os << accName;
1926 os << ", ";
1927 if (failed(printFMAOrMulOperand<aievec::aie1::FMAOp>(emitter, fmaOp, 0)))
1928 return failure();
1929 os << ", ";
1930 if (failed(printFMAOrMulOperand<aievec::aie1::FMAOp>(emitter, fmaOp, 1)))
1931 return failure();
1932 os << ")";
1933
1934 // Finally, set the name of the result to the accumulator's name
1935 emitter.setName(fmaOp->getResult(0), accName);
1936
1937 return success();
1938}
1939
1940// Generate the FMAElem op
1941static LogicalResult printOperation(CppEmitter &emitter,
1942 aievec::FMAElemOp fmaElemOp) {
1943 auto acc = fmaElemOp.getAcc();
1944 auto lhs = fmaElemOp.getLhs();
1945 auto rhs = fmaElemOp.getRhs();
1946
1947 // The sources should have already been emitted
1948 if (!emitter.hasValueInScope(acc) || !emitter.hasValueInScope(lhs) ||
1949 !emitter.hasValueInScope(rhs))
1950 return failure();
1951
1952 std::string opname = fmaElemOp.getFmsub() ? "msc_elem" : "mac_elem";
1953 // Create opname based on the lhs and rhs type
1954 auto lhsType = llvm::cast<VectorType>(fmaElemOp.getLhs().getType());
1955 Type eltType = lhsType.getElementType();
1956 int32_t lsize = getElementSizeInBits(lhsType);
1957 auto iType = llvm::dyn_cast<IntegerType>(eltType);
1958
1959 if (iType) {
1960 if (lsize == 32)
1961 opname += "_16_2";
1962 else if (lsize == 16)
1963 opname += "_32";
1964 else if (lsize == 8)
1965 opname += "_32_2";
1966 } else if (llvm::isa<FloatType>(eltType)) {
1967 if (lsize == 32)
1968 opname += "_16";
1969 else if (lsize == 16)
1970 opname += "_16_2";
1971 }
1972
1973 raw_indented_ostream &os = emitter.ostream();
1974
1975 StringRef accName = emitter.getOrCreateName(acc);
1976 os << accName;
1977 os << " = ";
1978 os << opname;
1979 os << "(";
1980 if (failed(printFMAOrMulElemOperand<aievec::FMAElemOp>(emitter, fmaElemOp,
1981 iType, lsize, 1)))
1982 return failure();
1983 os << ", ";
1984 if (failed(printFMAOrMulElemOperand<aievec::FMAElemOp>(emitter, fmaElemOp,
1985 iType, lsize, 0)))
1986 return failure();
1987 os << ", ";
1988 os << accName;
1989 os << ")";
1990
1991 // Finally, set the name of the result to the accumulator's name
1992 emitter.setName(fmaElemOp->getResult(0), accName);
1993
1994 return success();
1995}
1996
1997// Generate the FMAConv op
1998static LogicalResult printOperation(CppEmitter &emitter,
1999 aievec::FMAConvOp fmaConvOp) {
2000 auto acc = fmaConvOp.getAcc();
2001 auto lhs = fmaConvOp.getLhs();
2002 auto rhs = fmaConvOp.getRhs();
2003
2004 // The sources should have already been emitted
2005 if (!emitter.hasValueInScope(acc) || !emitter.hasValueInScope(lhs) ||
2006 !emitter.hasValueInScope(rhs))
2007 return failure();
2008
2009 std::string opname = fmaConvOp.getFmsub() ? "msc_conv" : "mac_conv";
2010 // Create opname based on the lhs and rhs type
2011 auto lhsType = llvm::cast<VectorType>(fmaConvOp.getLhs().getType());
2012 Type eltType = lhsType.getElementType();
2013 int32_t lsize = getElementSizeInBits(lhsType);
2014 auto iType = llvm::dyn_cast<IntegerType>(eltType);
2015
2016 // Only support int16 and int8 cases
2017 if (!iType || !(lsize == 16 || lsize == 8))
2018 return failure();
2019
2020 int32_t M = fmaConvOp.getM();
2021 int32_t N = fmaConvOp.getN();
2022 opname += "_" + std::to_string(M) + "x" + std::to_string(N);
2023
2024 raw_indented_ostream &os = emitter.ostream();
2025
2026 StringRef accName = emitter.getOrCreateName(acc);
2027 os << accName;
2028 os << " = ";
2029 os << opname;
2030 os << "(";
2031 if (failed(
2032 printFMAOrMulConvOperand<aievec::FMAConvOp>(emitter, fmaConvOp, 0)))
2033 return failure();
2034 os << ", ";
2035 if (failed(
2036 printFMAOrMulConvOperand<aievec::FMAConvOp>(emitter, fmaConvOp, 1)))
2037 return failure();
2038 os << ", ";
2039 os << accName;
2040 os << ")";
2041
2042 // Finally, set the name of the result to the accumulator's name
2043 emitter.setName(fmaConvOp->getResult(0), accName);
2044
2045 return success();
2046}
2047
2048// Generate the comparison intrinsics(eq, ne, lt, le, gt, ge) for AIE2
2049static LogicalResult printOperation(CppEmitter &emitter, aievec::CmpOp cmpOp) {
2050 if (!emitter.aie2())
2051 return failure();
2052
2053 // The lhs and rhs should have already been emitted
2054 Value lhs = cmpOp.getLhs();
2055 Value rhs = cmpOp.getRhs();
2056
2057 if (!emitter.hasValueInScope(lhs) || !emitter.hasValueInScope(rhs))
2058 return failure();
2059
2060 // Generate the initialization for the vector
2061 if (failed(emitter.emitAssignPrefix(*cmpOp)))
2062 return failure();
2063
2064 raw_indented_ostream &os = emitter.ostream();
2065
2066 StringRef pred = cmpOp.getPred();
2067 if (pred == "eq")
2068 os << "eq";
2069 else if (pred == "ne")
2070 os << "ne";
2071 else if (pred == "slt" || pred == "ult")
2072 os << "lt";
2073 else if (pred == "sle" || pred == "ule")
2074 os << "le";
2075 else if (pred == "sgt" || pred == "ugt")
2076 os << "gt";
2077 else if (pred == "sge" || pred == "uge")
2078 os << "ge";
2079 else
2080 return failure();
2081
2082 os << "(";
2083 auto vType = llvm::cast<VectorType>(lhs.getType());
2084
2085 if (Type eltType = vType.getElementType();
2086 llvm::isa<IntegerType>(eltType) &&
2087 (pred == "ult" || pred == "ule" || pred == "ugt" || pred == "uge")) {
2088 unsigned lanes = getVectorLaneSize(vType);
2089 unsigned width = getElementSizeInBits(vType);
2090 os << "v" << std::to_string(lanes) << "uint" << std::to_string(width);
2091 os << "(";
2092 os << emitter.getOrCreateName(lhs);
2093 os << "), ";
2094 os << "v" << std::to_string(lanes) << "uint" << std::to_string(width);
2095 os << "(";
2096 os << emitter.getOrCreateName(rhs);
2097 os << ")";
2098 } else {
2099 os << emitter.getOrCreateName(lhs);
2100 os << ", ";
2101 os << emitter.getOrCreateName(rhs);
2102 }
2103 os << ")";
2104
2105 return success();
2106}
2107
2108// Generate the sel intrinsic for AIE2
2109static LogicalResult printOperation(CppEmitter &emitter, aievec::SelOp selOp) {
2110 if (!emitter.aie2())
2111 return failure();
2112
2113 // The lhs, rhs and sel should have already been emitted
2114 Value lhs = selOp.getLhs();
2115 Value rhs = selOp.getRhs();
2116 Value sel = selOp.getSel();
2117
2118 if (!emitter.hasValueInScope(lhs) || !emitter.hasValueInScope(rhs) ||
2119 !emitter.hasValueInScope(sel))
2120 return failure();
2121
2122 // Generate the initialization for the vector
2123 if (failed(emitter.emitAssignPrefix(*selOp)))
2124 return failure();
2125
2126 raw_indented_ostream &os = emitter.ostream();
2127
2128 os << "sel(";
2129 os << emitter.getOrCreateName(rhs);
2130 os << ", ";
2131 os << emitter.getOrCreateName(lhs);
2132 os << ", ";
2133 os << emitter.getOrCreateName(sel);
2134 os << ")";
2135
2136 return success();
2137}
2138
2139// Generate the extract elem intrinsic
2140static LogicalResult printOperation(CppEmitter &emitter,
2141 aievec::ExtElemOp extElemOp) {
2142 Value source = extElemOp.getSource();
2143 Value index = extElemOp.getIndex();
2144
2145 raw_indented_ostream &os = emitter.ostream();
2146
2147 // Generate the initialization for the result
2148 if (failed(emitter.emitAssignPrefix(*extElemOp)))
2149 return failure();
2150
2151 // source should have already been emitted
2152 if (!emitter.hasValueInScope(source))
2153 return failure();
2154
2155 os << "extract_elem";
2156 os << "(";
2157 // Print the source and index
2158 os << emitter.getOrCreateName(source);
2159 os << ", ";
2160 os << emitter.getOrCreateName(index);
2161 os << ")";
2162
2163 return success();
2164}
2165
2166// Generate the transfer write op
2167static LogicalResult printOperation(CppEmitter &emitter,
2168 vector::TransferWriteOp writeOp) {
2169 Value source = writeOp.getBase();
2170 Value vector = writeOp.getVector();
2171
2172 // If the aray, or the vector being outputted is not already emitted,
2173 // error out
2174 if (!emitter.hasValueInScope(source) || !emitter.hasValueInScope(vector))
2175 return failure();
2176
2177 // Construct the access expression using memref shape and indices
2178 std::string access;
2179 auto indices = writeOp.getIndices();
2180 if (failed(createLinearizedAccess(emitter, source, indices, access)))
2181 return failure();
2182
2183 raw_indented_ostream &os = emitter.ostream();
2184
2185 os << "*(";
2186 if (failed(emitter.emitType(writeOp->getLoc(), vector.getType())))
2187 return failure();
2188 os << " *)";
2189 os << "(";
2190 os << emitter.getOrCreateName(source);
2191 if (!access.empty())
2192 os << " + " << access;
2193 os << ")";
2194 os << " = ";
2195 os << emitter.getOrCreateName(vector);
2196
2197 return success();
2198}
2199
2200// Generate the memref store op
2201static LogicalResult printOperation(CppEmitter &emitter,
2202 memref::StoreOp storeOp) {
2203 Value value = storeOp.getValue();
2204 Value memref = storeOp.getMemref();
2205
2206 // If the value, or the memref being outputted is not already emitted,
2207 // error out
2208 if (!emitter.hasValueInScope(value) || !emitter.hasValueInScope(memref))
2209 return failure();
2210
2211 raw_indented_ostream &os = emitter.ostream();
2212
2213 os << "*(";
2214 if (failed(emitter.emitType(
2215 storeOp->getLoc(),
2216 cast<MemRefType>(memref.getType()).getElementType())))
2217 return failure();
2218 os << " *)";
2219 os << emitter.getOrCreateName(memref);
2220 os << " = ";
2221 os << emitter.getOrCreateName(value);
2222
2223 return success();
2224}
2225
2226// Print an operation by forwarding the value to the next op
2227template <typename OpTy>
2228static LogicalResult printValueForwardOperation(CppEmitter &emitter, OpTy op) {
2229 Value source = op.getSrc();
2230
2231 // If the memref being outputted is not already emitted,
2232 // error out
2233 if (!emitter.hasValueInScope(source))
2234 return failure();
2235
2236 if (failed(emitter.emitAssignPrefix(*op)))
2237 return failure();
2238
2239 raw_indented_ostream &os = emitter.ostream();
2240 os << emitter.getOrCreateName(source);
2241
2242 return success();
2243}
2244
2245// Print an expand shape by forwarding the value to the next op
2246static LogicalResult printOperation(CppEmitter &emitter,
2247 memref::ExpandShapeOp expandShapeOp) {
2248 return printValueForwardOperation<memref::ExpandShapeOp>(emitter,
2249 expandShapeOp);
2250}
2251
2252// Print a collapse shape by forwarding the value to the next op
2253static LogicalResult printOperation(CppEmitter &emitter,
2254 memref::CollapseShapeOp collapseShapeOp) {
2255 return printValueForwardOperation<memref::CollapseShapeOp>(emitter,
2256 collapseShapeOp);
2257}
2258
2259static LogicalResult printConstantOp(CppEmitter &emitter, Operation *operation,
2260 Attribute value) {
2261 OpResult result = operation->getResult(0);
2262
2263 // Only emit an assignment as the variable was already declared when printing
2264 // the FuncOp.
2265 if (emitter.shouldDeclareVariablesAtTop()) {
2266 // Skip the assignment if the emitc.constant has no value.
2267 if (auto oAttr = llvm::dyn_cast<emitc::OpaqueAttr>(value))
2268 if (oAttr.getValue().empty())
2269 return success();
2270
2271 if (failed(emitter.emitVariableAssignment(result)))
2272 return failure();
2273 return emitter.emitAttribute(operation->getLoc(), value);
2274 }
2275
2276 // Emit a variable declaration for an emitc.constant op without value.
2277 if (auto oAttr = llvm::dyn_cast<emitc::OpaqueAttr>(value))
2278 if (oAttr.getValue().empty())
2279 // The semicolon gets printed by the emitOperation function.
2280 return emitter.emitVariableDeclaration(result,
2281 /*trailingSemicolon=*/false);
2282
2283 // Emit a variable declaration.
2284 if (failed(emitter.emitAssignPrefix(*operation)))
2285 return failure();
2286 return emitter.emitAttribute(operation->getLoc(), value);
2287}
2288
2289static LogicalResult printOperation(CppEmitter &emitter,
2290 emitc::ConstantOp constantOp) {
2291 Operation *operation = constantOp.getOperation();
2292 Attribute value = constantOp.getValue();
2293 return printConstantOp(emitter, operation, value);
2294}
2295
2296static LogicalResult printOperation(CppEmitter &emitter,
2297 arith::ConstantOp constantOp) {
2298 Operation *operation = constantOp.getOperation();
2299 Attribute value = constantOp.getValue();
2300 return printConstantOp(emitter, operation, value);
2301}
2302
2303static LogicalResult printOperation(CppEmitter &emitter,
2304 cf::BranchOp branchOp) {
2305 raw_ostream &os = emitter.ostream();
2306 Block &successor = *branchOp.getSuccessor();
2307
2308 for (auto pair : zip(branchOp.getOperands(), successor.getArguments())) {
2309 Value &operand = std::get<0>(pair);
2310 BlockArgument &argument = std::get<1>(pair);
2311 os << emitter.getOrCreateName(argument) << " = "
2312 << emitter.getOrCreateName(operand) << ";\n";
2313 }
2314
2315 os << "goto ";
2316 if (!emitter.hasBlockLabel(successor))
2317 return branchOp.emitOpError("unable to find label for successor block");
2318 os << emitter.getOrCreateName(successor);
2319 return success();
2320}
2321
2322static LogicalResult printOperation(CppEmitter &emitter,
2323 cf::CondBranchOp condBranchOp) {
2324 raw_indented_ostream &os = emitter.ostream();
2325 Block &trueSuccessor = *condBranchOp.getTrueDest();
2326 Block &falseSuccessor = *condBranchOp.getFalseDest();
2327
2328 os << "if (" << emitter.getOrCreateName(condBranchOp.getCondition())
2329 << ") {\n";
2330
2331 os.indent();
2332
2333 // If condition is true.
2334 for (auto pair :
2335 zip(condBranchOp.getTrueOperands(), trueSuccessor.getArguments())) {
2336 Value &operand = std::get<0>(pair);
2337 BlockArgument &argument = std::get<1>(pair);
2338 os << emitter.getOrCreateName(argument) << " = "
2339 << emitter.getOrCreateName(operand) << ";\n";
2340 }
2341
2342 os << "goto ";
2343 if (!emitter.hasBlockLabel(trueSuccessor))
2344 return condBranchOp.emitOpError("unable to find label for successor block");
2345 os << emitter.getOrCreateName(trueSuccessor) << ";\n";
2346 os.unindent() << "} else {\n";
2347 os.indent();
2348 // If condition is false.
2349 for (auto pair :
2350 zip(condBranchOp.getFalseOperands(), falseSuccessor.getArguments())) {
2351 Value &operand = std::get<0>(pair);
2352 BlockArgument &argument = std::get<1>(pair);
2353 os << emitter.getOrCreateName(argument) << " = "
2354 << emitter.getOrCreateName(operand) << ";\n";
2355 }
2356
2357 os << "goto ";
2358 if (!emitter.hasBlockLabel(falseSuccessor))
2359 return condBranchOp.emitOpError()
2360 << "unable to find label for successor block";
2361 os << emitter.getOrCreateName(falseSuccessor) << ";\n";
2362 os.unindent() << "}";
2363
2364 return success();
2365}
2366
2367static LogicalResult printOperation(CppEmitter &emitter, func::CallOp callOp) {
2368 if (failed(emitter.emitAssignPrefix(*callOp.getOperation())))
2369 return failure();
2370
2371 raw_ostream &os = emitter.ostream();
2372 os << callOp.getCallee() << "(";
2373 if (failed(emitter.emitOperands(*callOp.getOperation())))
2374 return failure();
2375 os << ")";
2376
2377 return success();
2378}
2379
2380static LogicalResult printOperation(CppEmitter &emitter,
2381 emitc::CallOpaqueOp callOp) {
2382 raw_ostream &os = emitter.ostream();
2383 Operation &op = *callOp.getOperation();
2384 if (callOp.getCallee() == "getTanhBf16" ||
2385 callOp.getCallee() == "getSqrtBf16" ||
2386 callOp.getCallee() == "getRsqrtBf16" ||
2387 callOp.getCallee() == "getErfBf16" || callOp.getCallee() == "getAbs" ||
2388 callOp.getCallee() == "getSigmoidBf16" ||
2389 callOp.getCallee() == "getCeilBf16" ||
2390 callOp.getCallee() == "getFloorBf16") {
2391 if (failed(emitter.emitAssignPrefix(op, /*isAcc*/ false)))
2392 return failure();
2393 } else if (failed(emitter.emitAssignPrefix(op, /*isAcc*/ true)))
2394 return failure();
2395
2396 os << callOp.getCallee();
2397
2398 auto emitArgs = [&](Attribute attr) -> LogicalResult {
2399 // Index attributes are treated specially as operand index.
2400 if (auto t = llvm::dyn_cast<IntegerAttr>(attr))
2401 if (t.getType().isIndex()) {
2402 int64_t idx = t.getInt();
2403 if (idx < 0 || idx >= op.getNumOperands())
2404 return op.emitOpError("invalid operand index");
2405 if (!emitter.hasValueInScope(op.getOperand(idx)))
2406 return op.emitOpError("operand ")
2407 << idx << "'s value not defined in scope";
2408 os << emitter.getOrCreateName(op.getOperand(idx));
2409 return success();
2410 }
2411 if (failed(emitter.emitAttribute(op.getLoc(), attr)))
2412 return failure();
2413
2414 return success();
2415 };
2416
2417 if (callOp.getTemplateArgs()) {
2418 os << "<";
2419 if (failed(
2420 interleaveCommaWithError(*callOp.getTemplateArgs(), os, emitArgs)))
2421 return failure();
2422 os << ">";
2423 }
2424
2425 os << "(";
2426
2427 LogicalResult emittedArgs =
2428 callOp.getArgs()
2429 ? interleaveCommaWithError(*callOp.getArgs(), os, emitArgs)
2430 : emitter.emitOperands(op);
2431 if (failed(emittedArgs))
2432 return failure();
2433 os << ")";
2434
2435 return success();
2436}
2437
2438static LogicalResult printOperation(CppEmitter &emitter,
2439 emitc::ApplyOp applyOp) {
2440 raw_ostream &os = emitter.ostream();
2441
2442 if (Operation &op = *applyOp.getOperation();
2443 failed(emitter.emitAssignPrefix(op)))
2444 return failure();
2445 os << applyOp.getApplicableOperator();
2446 os << emitter.getOrCreateName(applyOp.getOperand());
2447
2448 return success();
2449}
2450
2451static LogicalResult printOperation(CppEmitter &emitter,
2452 emitc::IncludeOp includeOp) {
2453 raw_ostream &os = emitter.ostream();
2454
2455 os << "#include ";
2456 if (includeOp.getIsStandardInclude())
2457 os << "<" << includeOp.getInclude() << ">";
2458 else
2459 os << "\"" << includeOp.getInclude() << "\"";
2460
2461 return success();
2462}
2463
2464static LogicalResult printOperation(CppEmitter &emitter, scf::ForOp forOp) {
2465 raw_indented_ostream &os = emitter.ostream();
2466
2467 OperandRange operands = forOp.getInitArgs();
2468 Block::BlockArgListType iterArgs = forOp.getRegionIterArgs();
2469 Operation::result_range results = forOp.getResults();
2470
2471 if (!emitter.shouldDeclareVariablesAtTop())
2472 for (OpResult result : results)
2473 if (failed(emitter.emitVariableDeclaration(result,
2474 /*trailingSemicolon=*/true)))
2475 return failure();
2476
2477 for (auto pair : zip(iterArgs, operands)) {
2478 if (failed(emitter.emitType(forOp.getLoc(), std::get<0>(pair).getType())))
2479 return failure();
2480 os << " " << emitter.getOrCreateName(std::get<0>(pair)) << " = ";
2481 os << emitter.getOrCreateName(std::get<1>(pair)) << ";";
2482 os << "\n";
2483 }
2484
2485 os << "for (";
2486 if (failed(
2487 emitter.emitType(forOp.getLoc(), forOp.getInductionVar().getType())))
2488 return failure();
2489
2490 os << " ";
2491 os << emitter.getOrCreateName(forOp.getInductionVar());
2492 os << " = ";
2493 os << emitter.getOrCreateName(forOp.getLowerBound());
2494 os << "; ";
2495 os << emitter.getOrCreateName(forOp.getInductionVar());
2496 os << " < ";
2497 os << emitter.getOrCreateName(forOp.getUpperBound());
2498 os << "; ";
2499 os << emitter.getOrCreateName(forOp.getInductionVar());
2500 os << " += ";
2501 os << emitter.getOrCreateName(forOp.getStep());
2502 os << ")\n";
2503 os << "chess_prepare_for_pipelining\n";
2504 // Try to find the upper bound and step of the for operator.
2505 // If the bounds are found, print them
2506 if (auto [constantLoopBound, tripCount] = getTripCount(forOp);
2507 constantLoopBound) {
2508 auto [constantStep, step] = getStep(forOp);
2509 int64_t lb =
2510 constantStep && step > 0 ? llvm::divideFloorSigned(tripCount, step) : 1;
2511 int64_t ub =
2512 constantStep && step > 0 ? llvm::divideCeilSigned(tripCount, step) : 0;
2513 os << "chess_loop_range(";
2514 os << std::to_string(lb);
2515 os << ", ";
2516 if (constantStep && step > 0)
2517 os << std::to_string(ub);
2518 os << ")\n";
2519 }
2520 os << "{\n";
2521 os.indent();
2522
2523 Region &forRegion = forOp.getRegion();
2524 auto regionOps = forRegion.getOps();
2525
2526 // We skip the trailing yield op because this updates the result variables
2527 // of the for op in the generated code. Instead we update the iterArgs at
2528 // the end of a loop iteration and set the result variables after the for
2529 // loop.
2530 for (auto it = regionOps.begin(); std::next(it) != regionOps.end(); ++it) {
2531 if (bool trailingSemicolon =
2532 !isa<scf::IfOp, scf::ForOp, cf::CondBranchOp>(*it);
2533 failed(emitter.emitOperation(*it, trailingSemicolon)))
2534 return failure();
2535 }
2536
2537 Operation *yieldOp = forRegion.getBlocks().front().getTerminator();
2538 // Copy yield operands into iterArgs at the end of a loop iteration.
2539 for (auto pair : zip(iterArgs, yieldOp->getOperands())) {
2540 BlockArgument iterArg = std::get<0>(pair);
2541 Value operand = std::get<1>(pair);
2542 os << emitter.getOrCreateName(iterArg) << " = "
2543 << emitter.getOrCreateName(operand) << ";\n";
2544 }
2545
2546 os.unindent() << "}";
2547
2548 // Copy iterArgs into results after the for loop.
2549 for (auto pair : zip(results, iterArgs)) {
2550 OpResult result = std::get<0>(pair);
2551 BlockArgument iterArg = std::get<1>(pair);
2552 os << "\n"
2553 << emitter.getOrCreateName(result) << " = "
2554 << emitter.getOrCreateName(iterArg) << ";";
2555 }
2556
2557 return success();
2558}
2559
2560static LogicalResult printOperation(CppEmitter &emitter, scf::IfOp ifOp) {
2561 raw_indented_ostream &os = emitter.ostream();
2562
2563 if (!emitter.shouldDeclareVariablesAtTop())
2564 for (OpResult result : ifOp.getResults())
2565 if (failed(emitter.emitVariableDeclaration(result,
2566 /*trailingSemicolon=*/true)))
2567 return failure();
2568
2569 os << "if (";
2570 if (failed(emitter.emitOperands(*ifOp.getOperation())))
2571 return failure();
2572 os << ") {\n";
2573 os.indent();
2574
2575 Region &thenRegion = ifOp.getThenRegion();
2576 // Note: This prints a superfluous semicolon if the terminating yield op has
2577 // zero results.
2578 for (Operation &op : thenRegion.getOps())
2579 if (failed(emitter.emitOperation(op, /*trailingSemicolon=*/true)))
2580 return failure();
2581
2582 os.unindent() << "}";
2583
2584 if (Region &elseRegion = ifOp.getElseRegion(); !elseRegion.empty()) {
2585 os << " else {\n";
2586 os.indent();
2587
2588 // Note: This prints a superfluous semicolon if the terminating yield op
2589 // has zero results.
2590 for (Operation &op : elseRegion.getOps())
2591 if (failed(emitter.emitOperation(op, /*trailingSemicolon=*/true)))
2592 return failure();
2593
2594 os.unindent() << "}";
2595 }
2596
2597 return success();
2598}
2599
2600static LogicalResult printOperation(CppEmitter &emitter, scf::YieldOp yieldOp) {
2601 raw_ostream &os = emitter.ostream();
2602 Operation &parentOp = *yieldOp.getOperation()->getParentOp();
2603
2604 if (yieldOp.getNumOperands() != parentOp.getNumResults())
2605 return yieldOp.emitError("number of operands does not to match the number "
2606 "of the parent op's results");
2607
2608 if (failed(interleaveWithError(
2609 llvm::zip(parentOp.getResults(), yieldOp.getOperands()),
2610 [&](auto pair) -> LogicalResult {
2611 auto result = std::get<0>(pair);
2612 auto operand = std::get<1>(pair);
2613 os << emitter.getOrCreateName(result) << " = ";
2614
2615 if (!emitter.hasValueInScope(operand))
2616 return yieldOp.emitError("operand value not in scope");
2617 os << emitter.getOrCreateName(operand);
2618 return success();
2619 },
2620 [&] { os << ";\n"; })))
2621 return failure();
2622
2623 return success();
2624}
2625
2626static LogicalResult printOperation(CppEmitter &emitter,
2627 func::ReturnOp returnOp) {
2628 raw_ostream &os = emitter.ostream();
2629 os << "return";
2630 switch (returnOp.getNumOperands()) {
2631 case 0:
2632 return success();
2633 case 1:
2634 os << " " << emitter.getOrCreateName(returnOp.getOperand(0));
2635 return success(emitter.hasValueInScope(returnOp.getOperand(0)));
2636 default:
2637 os << " std::make_tuple(";
2638 if (failed(emitter.emitOperandsAndAttributes(*returnOp.getOperation())))
2639 return failure();
2640 os << ")";
2641 }
2642
2643 return success();
2644}
2645
2646static LogicalResult printOperation(CppEmitter &emitter, ModuleOp moduleOp) {
2647 CppEmitter::Scope scope(emitter);
2648
2649 for (Operation &op : moduleOp)
2650 if (failed(emitter.emitOperation(op, /*trailingSemicolon=*/false)))
2651 return failure();
2652
2653 return success();
2654}
2655
2656static LogicalResult printOperation(CppEmitter &emitter,
2657 AIE::DeviceOp deviceOp) {
2658 CppEmitter::Scope scope(emitter);
2659 raw_indented_ostream &os = emitter.ostream();
2660
2661 // Emit device as a comment with device type
2662 os << "aie.device(" << deviceOp.getDevice() << ") {\n";
2663 os.indent();
2664
2665 // Process all operations within the device's body region
2666 Region &region = deviceOp.getBodyRegion();
2667 for (Block &block : region.getBlocks()) {
2668 for (Operation &op : block.getOperations()) {
2669 // Skip terminator operations (like aie.end)
2670 if (op.hasTrait<OpTrait::IsTerminator>())
2671 continue;
2672
2673 if (failed(emitter.emitOperation(op, /*trailingSemicolon=*/false)))
2674 return failure();
2675 }
2676 }
2677
2678 os.unindent() << "}\n";
2679 return success();
2680}
2681
2682static LogicalResult printOperation(CppEmitter &emitter,
2683 func::FuncOp functionOp) {
2684 // We need to declare variables at top if the function has multiple blocks.
2685 if (!emitter.shouldDeclareVariablesAtTop() &&
2686 functionOp.getBlocks().size() > 1)
2687 return functionOp.emitOpError(
2688 "with multiple blocks needs variables declared at top");
2689
2690 CppEmitter::Scope scope(emitter);
2691
2692 // Find any memref dim op in the function, and parse the dimension of each
2693 // dynamic shaped memref
2694 if (failed(parseMemRefDynamicDims(emitter, functionOp)))
2695 return failure();
2696
2697 raw_indented_ostream &os = emitter.ostream();
2698 if (failed(emitter.emitTypes(functionOp.getLoc(),
2699 functionOp.getFunctionType().getResults())))
2700 return failure();
2701 os << " " << functionOp.getName();
2702
2703 os << "(";
2704 if (functionOp.isDeclaration()) {
2705 if (failed(interleaveCommaWithError(
2706 functionOp.getArgumentTypes(), os, [&](Type type) -> LogicalResult {
2707 if (failed(emitter.emitType(functionOp.getLoc(), type)))
2708 return failure();
2709 // If it is a memref argument, we need to check if it has dynamic
2710 // shape. If so, the dimensions have to be printed out
2711 if (auto argType = dyn_cast<MemRefType>(type))
2712 for (unsigned dim = 0; dim < argType.getRank(); ++dim)
2713 if (argType.isDynamicDim(dim))
2714 os << ", size_t";
2715 return success();
2716 })))
2717 return failure();
2718 os << ");\n";
2719 return success();
2720 }
2721
2722 if (failed(interleaveCommaWithError(
2723 functionOp.getArguments(), os,
2724 [&](BlockArgument arg) -> LogicalResult {
2725 if (failed(emitter.emitType(functionOp.getLoc(), arg.getType())))
2726 return failure();
2727 os << " " << emitter.getOrCreateName(arg);
2728 // If it is a memref argument, we need to check if it has dynamic
2729 // shape. If so, the dimensions have to be printed out
2730 if (failed(printMemRefDims(emitter, arg)))
2731 return failure();
2732 return success();
2733 })))
2734 return failure();
2735
2736 os << ") {\n";
2737 os.indent();
2738 if (emitter.shouldDeclareVariablesAtTop()) {
2739 // Declare all variables that hold op results including those from nested
2740 // regions.
2741 WalkResult result =
2742 functionOp.walk<WalkOrder::PreOrder>([&](Operation *op) -> WalkResult {
2743 for (OpResult result : op->getResults()) {
2744 if (failed(emitter.emitVariableDeclaration(
2745 result, /*trailingSemicolon=*/true)))
2746 return {
2747 op->emitError("unable to declare result variable for op")};
2748 }
2749 return WalkResult::advance();
2750 });
2751 if (result.wasInterrupted())
2752 return failure();
2753 }
2754
2755 Region::BlockListType &blocks = functionOp.getBlocks();
2756 // Create label names for basic blocks.
2757 for (Block &block : blocks)
2758 emitter.getOrCreateName(block);
2759
2760 // Declare variables for basic block arguments.
2761 for (auto it = std::next(blocks.begin()); it != blocks.end(); ++it) {
2762 Block &block = *it;
2763 for (BlockArgument &arg : block.getArguments()) {
2764 if (emitter.hasValueInScope(arg))
2765 return functionOp.emitOpError(" block argument #")
2766 << arg.getArgNumber() << " is out of scope";
2767 if (failed(
2768 emitter.emitType(block.getParentOp()->getLoc(), arg.getType())))
2769 return failure();
2770 os << " " << emitter.getOrCreateName(arg) << ";\n";
2771 }
2772 }
2773
2774 for (Block &block : blocks) {
2775 // Only print a label if there is more than one block.
2776 if (blocks.size() > 1)
2777 if (failed(emitter.emitLabel(block)))
2778 return failure();
2779 for (Operation &op : block.getOperations()) {
2780 // When generating code for an scf.if or std.cond_br op no semicolon needs
2781 // to be printed after the closing brace.
2782 // When generating code for an scf.for op, printing a trailing semicolon
2783 // is handled within the printOperation function.
2784 if (bool trailingSemicolon =
2785 !isa<scf::IfOp, scf::ForOp, cf::CondBranchOp>(op);
2786 failed(emitter.emitOperation(
2787 op, /*trailingSemicolon=*/trailingSemicolon)))
2788 return failure();
2789 }
2790 }
2791 os.unindent() << "}\n";
2792
2793 return success();
2794}
2795
2796static LogicalResult printOperation(CppEmitter &emitter,
2797 aievec::MatMulOp matmulOp) {
2798 auto lhs = matmulOp.getLhs();
2799 auto rhs = matmulOp.getRhs();
2800 auto acc = matmulOp.getAcc();
2801
2802 // The sources should have already been emitted
2803 if (!emitter.hasValueInScope(lhs) || !emitter.hasValueInScope(rhs) ||
2804 !emitter.hasValueInScope(acc))
2805 return failure();
2806
2807 auto lhsName = printConversionTo512bit(emitter, lhs);
2808 auto rhsName = printConversionTo512bit(emitter, rhs);
2809
2810 raw_indented_ostream &os = emitter.ostream();
2811
2812 StringRef accName = emitter.getOrCreateName(acc);
2813
2814 auto lhsShape = cast<VectorType>(lhs.getType()).getShape();
2815 auto rhsShape = cast<VectorType>(rhs.getType()).getShape();
2816 os << accName << " = mac_" << lhsShape[0] << "x" << lhsShape[1] << "_"
2817 << rhsShape[0] << "x" << rhsShape[1] << "(";
2818 os << lhsName << ", " << rhsName << ", " << accName << ")";
2819
2820 // Finally, set the name of the result to the accumulator's name
2821 emitter.setName(matmulOp.getResult(), accName);
2822
2823 return success();
2824}
2825
2826CppEmitter::CppEmitter(raw_ostream &os, bool declareVariablesAtTop, bool aie2)
2827 : os(os), declareVariablesAtTop(declareVariablesAtTop), aie2_(aie2) {
2828 valueInScopeCount.push(0);
2829 labelInScopeCount.push(0);
2830}
2831
2832/// Return the existing or a new name for a Value.
2833StringRef CppEmitter::getOrCreateName(Value val, std::string prefix) {
2834 if (!valueMapper.count(val))
2835 valueMapper.insert(val,
2836 formatv("{0}{1}", prefix, ++valueInScopeCount.top()));
2837 return *valueMapper.begin(val);
2838}
2839
2840/// Set the name of a value to an existing name
2841void CppEmitter::setName(Value val, StringRef name) {
2842 valueMapper.insert(val, name.str());
2843}
2844
2845/// Get a new name that is not associated with any value
2846std::string CppEmitter::getNewName(std::string prefix) {
2847 std::string ret = formatv("{0}{1}", prefix, ++valueInScopeCount.top());
2848 return ret;
2849}
2850
2851/// Given a dynamic shaped memref, set its size at position 'index' to
2852// parameter 'result'
2853void CppEmitter::setMemRefDimParam(Value memref, unsigned index,
2854 const std::string &parameter) {
2855 auto p = std::make_pair(memref, index);
2856 assert(!paramIndexMapper.count(p) && "memref dimension already set");
2857 paramIndexMapper[p] = parameter;
2858}
2859
2860/// Return the memref parameteric dimension size at given index
2861StringRef CppEmitter::getMemRefDimParam(Value memref, unsigned index) {
2862 auto p = std::make_pair(memref, index);
2863 assert(paramIndexMapper.count(p) && "memref dimension not found");
2864 return paramIndexMapper[p];
2865}
2866
2867/// Return true if the specified dim of memref has a parameter
2868/// associated with it
2869bool CppEmitter::isMemRefDimParam(Value memref, unsigned index) {
2870 assert([&] {
2871 auto type = llvm::dyn_cast<MemRefType>(memref.getType());
2872 if (!(type && type.isDynamicDim(index))) {
2873 printf("the dimension size at index is not dynamic\n");
2874 return false;
2875 }
2876 return true;
2877 }());
2878
2879 auto p = std::make_pair(memref, index);
2880 return paramIndexMapper.count(p);
2881}
2882
2883/// Return the existing or a new label for a Block.
2884StringRef CppEmitter::getOrCreateName(Block &block, std::string prefix) {
2885 if (!blockMapper.count(&block))
2886 blockMapper.insert(&block,
2887 formatv("{0}{1}", prefix, ++labelInScopeCount.top()));
2888 return *blockMapper.begin(&block);
2889}
2890
2891bool CppEmitter::shouldMapToUnsigned(IntegerType::SignednessSemantics val) {
2892 switch (val) {
2893 case IntegerType::Signless:
2894 case IntegerType::Signed:
2895 return false;
2896 case IntegerType::Unsigned:
2897 return true;
2898 }
2899 llvm::report_fatal_error("Unexpected IntegerType::SignednessSemantics");
2900}
2901
2902bool CppEmitter::hasValueInScope(Value val) { return valueMapper.count(val); }
2903
2904bool CppEmitter::hasBlockLabel(Block &block) {
2905 return blockMapper.count(&block);
2906}
2907
2908// Check whether the int type dense value has a splat value and get the int
2909// value as a string.
2910template <typename ElTy>
2911static std::string getSplatValueOfIntDense(DenseIntElementsAttr dense) {
2912 ElTy splatVal = dense.getSplatValue<ElTy>();
2913 return std::to_string(splatVal);
2914}
2915
2916// Get the first float value of a dense type value as a string.
2917static std::string getSplatValueOfFloatDense(DenseFPElementsAttr dense,
2918 bool isBFloat = false) {
2919 auto apFloat = dense.getSplatValue<APFloat>();
2920 float splatVal = apFloat.convertToFloat();
2921 std::string firstValue = std::to_string(splatVal);
2922
2923 if (apFloat.isPosInfinity())
2924 if (isBFloat)
2925 // TODO: Clean this up; emitting largest finite value in lieu of infinity;
2926 // system headers do not provide a simple way to initialize a bfloat16 to
2927 // infinity.
2928 firstValue = std::to_string(0x1.FEp+127f);
2929 else
2930 firstValue = std::to_string(std::numeric_limits<float>::max());
2931 else if (apFloat.isNegInfinity())
2932 if (isBFloat)
2933 firstValue = std::to_string(-0x1.FEp+127f);
2934 else
2935 firstValue = std::to_string(std::numeric_limits<float>::lowest());
2936 else if (!apFloat.isNonZero())
2937 firstValue = "0";
2938
2939 return firstValue;
2940}
2941
2942LogicalResult CppEmitter::emitAttribute(Location loc, Attribute attr) {
2943 auto printInt = [&](const APInt &val, bool isUnsigned) {
2944 if (val.getBitWidth() == 1)
2945 if (val.getBoolValue())
2946 os << "true";
2947 else
2948 os << "false";
2949 else {
2950 SmallString<128> strValue;
2951 val.toString(strValue, 10, !isUnsigned, false);
2952 os << strValue;
2953 }
2954 };
2955
2956 auto printFloat = [&](const APFloat &val) {
2957 if (val.isFinite()) {
2958 SmallString<128> strValue;
2959 // Use default values of toString except don't truncate zeros.
2960 val.toString(strValue, 0, 0, false);
2961 switch (llvm::APFloatBase::SemanticsToEnum(val.getSemantics())) {
2962 case llvm::APFloatBase::S_IEEEsingle:
2963 os << "(float)";
2964 break;
2965 case llvm::APFloatBase::S_IEEEdouble:
2966 os << "(double)";
2967 break;
2968 default:
2969 break;
2970 }
2971 os << strValue;
2972 } else if (val.isNaN())
2973 os << "NAN";
2974 else if (val.isInfinity()) {
2975 if (val.isNegative())
2976 os << "-";
2977 os << "INFINITY";
2978 }
2979 };
2980
2981 // Print floating point attributes.
2982 if (auto fAttr = llvm::dyn_cast<FloatAttr>(attr)) {
2983 printFloat(fAttr.getValue());
2984 return success();
2985 }
2986
2987 if (auto dense = llvm::dyn_cast<DenseFPElementsAttr>(attr)) {
2988 if (aie2() && dense.isSplat()) {
2989 if (auto vType = llvm::dyn_cast<VectorType>(dense.getType()))
2990 if (auto fType = llvm::dyn_cast<FloatType>(vType.getElementType())) {
2991 unsigned width = fType.getWidth();
2992 std::string splatValue;
2993 if (width == 32)
2994 splatValue = getSplatValueOfFloatDense(dense);
2995 else if (width == 16)
2996 splatValue = getSplatValueOfFloatDense(dense, /*isBFloat*/ true);
2997
2998 if (width == 32 || (width == 16 && getVectorLaneSize(vType) == 32))
2999 if (splatValue == "0") {
3000 os << "broadcast_zero_";
3001 if (failed(emitType(loc, fType)))
3002 return failure();
3003 os << "()";
3004 } else {
3005 os << "broadcast_to_";
3006 if (failed(emitType(loc, vType)))
3007 return failure();
3008 os << "((";
3009 if (failed(emitType(loc, fType)))
3010 return failure();
3011 os << ")";
3012 os << splatValue;
3013 os << ")";
3014 }
3015 else if (width == 16 && getVectorLaneSize(vType) == 16) {
3016 os << "extract_v16bfloat16(";
3017 if (splatValue == "0")
3018 os << "broadcast_zero_bfloat16()";
3019 else {
3020 os << "broadcast_to_v32bfloat16";
3021 os << "((";
3022 if (failed(emitType(loc, fType)))
3023 return failure();
3024 os << ")";
3025 os << splatValue;
3026 os << ")";
3027 }
3028 os << ", 0)";
3029 }
3030 }
3031 // TODO: Deal with multiple dense value case for AIE2.
3032 } else {
3033 os << '{';
3034 interleaveComma(dense, os, [&](const APFloat &val) { printFloat(val); });
3035 os << '}';
3036 }
3037 return success();
3038 }
3039
3040 // Print integer attributes.
3041 if (auto iAttr = llvm::dyn_cast<IntegerAttr>(attr)) {
3042 if (auto iType = llvm::dyn_cast<IntegerType>(iAttr.getType())) {
3043 printInt(iAttr.getValue(), shouldMapToUnsigned(iType.getSignedness()));
3044 return success();
3045 }
3046 if (llvm::dyn_cast<IndexType>(iAttr.getType())) {
3047 printInt(iAttr.getValue(), false);
3048 return success();
3049 }
3050 }
3051
3052 if (auto dense = llvm::dyn_cast<DenseIntElementsAttr>(attr)) {
3053 if (auto tType = llvm::dyn_cast<TensorType>(dense.getType())) {
3054 if (auto iType = llvm::dyn_cast<IntegerType>(tType.getElementType())) {
3055 os << '{';
3056 interleaveComma(dense, os, [&](const APInt &val) {
3057 printInt(val, shouldMapToUnsigned(iType.getSignedness()));
3058 });
3059 os << '}';
3060 return success();
3061 }
3062 if (llvm::dyn_cast<IndexType>(tType.getElementType())) {
3063 os << '{';
3064 interleaveComma(dense, os,
3065 [&](const APInt &val) { printInt(val, false); });
3066 os << '}';
3067 return success();
3068 }
3069 }
3070
3071 if (auto vType = llvm::dyn_cast<VectorType>(dense.getType())) {
3072 if (auto iType = llvm::dyn_cast<IntegerType>(vType.getElementType())) {
3073 unsigned width = iType.getWidth();
3074 if (llvm::all_of(dense, [](const APInt &val) { return val == 0; })) {
3075 if (aie2()) {
3076 if (width * getVectorLaneSize(vType) == 1024) {
3077 os << "concat(broadcast_zero_s" << width << "(), broadcast_zero_s"
3078 << width << "())";
3079 return success();
3080 }
3081 os << "broadcast_zero_s";
3082 os << width;
3083 } else {
3084 os << "null_";
3085 if (failed(emitType(loc, vType)))
3086 return failure();
3087 }
3088 os << "()";
3089 return success();
3090 }
3091
3092 if (aie2() && dense.isSplat()) {
3093 std::string splatValue;
3094 if (width == 32)
3095 splatValue = getSplatValueOfIntDense<int32_t>(dense);
3096 else if (width == 16)
3097 splatValue = getSplatValueOfIntDense<int16_t>(dense);
3098 else if (width == 8)
3099 splatValue = getSplatValueOfIntDense<int8_t>(dense);
3100 os << "broadcast_to_";
3101 if (failed(emitType(loc, vType)))
3102 return failure();
3103 os << "((";
3104 if (failed(emitType(loc, iType)))
3105 return failure();
3106 os << ")";
3107 os << splatValue;
3108 os << ")";
3109 // TODO: Handle multiple dense value case in AIE2.
3110 } else {
3111 os << '{';
3112 interleaveComma(dense, os, [&](const APInt &val) {
3113 printInt(val, shouldMapToUnsigned(iType.getSignedness()));
3114 });
3115 os << '}';
3116 }
3117 return success();
3118 }
3119 if (llvm::dyn_cast<IndexType>(vType.getElementType())) {
3120 os << '{';
3121 interleaveComma(dense, os,
3122 [&](const APInt &val) { printInt(val, false); });
3123 os << '}';
3124 return success();
3125 }
3126 }
3127 }
3128
3129 // Print opaque attributes.
3130 if (auto oAttr = llvm::dyn_cast<emitc::OpaqueAttr>(attr)) {
3131 os << oAttr.getValue();
3132 return success();
3133 }
3134
3135 // Print symbolic reference attributes.
3136 if (auto sAttr = llvm::dyn_cast<SymbolRefAttr>(attr)) {
3137 if (sAttr.getNestedReferences().size() > 1)
3138 return emitError(loc, "attribute has more than 1 nested reference");
3139 os << sAttr.getRootReference().getValue();
3140 return success();
3141 }
3142
3143 // Print type attributes.
3144 if (auto type = llvm::dyn_cast<TypeAttr>(attr))
3145 return emitType(loc, type.getValue());
3146
3147 return emitError(loc, "cannot emit attribute of type ") << attr;
3148}
3149
3150LogicalResult CppEmitter::emitOperands(Operation &op) {
3151 auto emitOperandName = [&](Value result) -> LogicalResult {
3152 if (!hasValueInScope(result))
3153 return op.emitOpError() << "operand value not in scope";
3154 os << getOrCreateName(result);
3155 return success();
3156 };
3157 return interleaveCommaWithError(op.getOperands(), os, emitOperandName);
3158}
3159
3160LogicalResult
3161CppEmitter::emitOperandsAndAttributes(Operation &op,
3162 ArrayRef<StringRef> exclude) {
3163 if (failed(emitOperands(op)))
3164 return failure();
3165 // Insert comma in between operands and non-filtered attributes if needed.
3166 if (op.getNumOperands() > 0)
3167 for (NamedAttribute attr : op.getAttrs())
3168 if (!is_contained(exclude, attr.getName().strref())) {
3169 os << ", ";
3170 break;
3171 }
3172 // Emit attributes.
3173 auto emitNamedAttribute = [&](NamedAttribute attr) -> LogicalResult {
3174 if (is_contained(exclude, attr.getName().strref()))
3175 return success();
3176 os << "/* " << attr.getName().getValue() << " */";
3177 if (failed(emitAttribute(op.getLoc(), attr.getValue())))
3178 return failure();
3179 return success();
3180 };
3181
3182 return interleaveCommaWithError(op.getAttrs(), os, emitNamedAttribute);
3183}
3184
3185LogicalResult CppEmitter::emitVariableAssignment(OpResult result) {
3186 if (!hasValueInScope(result)) {
3187 return result.getDefiningOp()->emitOpError(
3188 "result variable for the operation has not been declared");
3189 }
3190 os << getOrCreateName(result) << " = ";
3191
3192 return success();
3193}
3194
3195LogicalResult CppEmitter::emitVariableDeclaration(OpResult result,
3196 bool trailingSemicolon,
3197 bool isAcc) {
3198 if (hasValueInScope(result))
3199 return result.getDefiningOp()->emitError(
3200 "result variable for the operation already declared");
3201 if (failed(
3202 emitType(result.getOwner()->getLoc(), result.getType(), true, isAcc)))
3203 return failure();
3204 os << " " << getOrCreateName(result);
3205 if (trailingSemicolon)
3206 os << ";\n";
3207
3208 return success();
3209}
3210
3211LogicalResult CppEmitter::emitAssignPrefix(Operation &op, bool isAcc) {
3212 switch (op.getNumResults()) {
3213 case 0:
3214 break;
3215 case 1: {
3216 OpResult result = op.getResult(0);
3217 if (shouldDeclareVariablesAtTop()) {
3218 if (failed(emitVariableAssignment(result)))
3219 return failure();
3220 } else {
3221 if (failed(emitVariableDeclaration(result, /*trailingSemicolon=*/false,
3222 isAcc)))
3223 return failure();
3224 os << " = ";
3225 }
3226 break;
3227 }
3228 default:
3229 if (!shouldDeclareVariablesAtTop())
3230 for (OpResult result : op.getResults())
3231 if (failed(emitVariableDeclaration(result, /*trailingSemicolon=*/true)))
3232 return failure();
3233
3234 os << "std::tie(";
3235 interleaveComma(op.getResults(), os,
3236 [&](Value result) { os << getOrCreateName(result); });
3237 os << ") = ";
3238 }
3239 return success();
3240}
3241
3242LogicalResult CppEmitter::emitLabel(Block &block) {
3243 if (!hasBlockLabel(block))
3244 return block.getParentOp()->emitError("label for block not found");
3245 // FIXME: Add feature in `raw_indented_ostream` to ignore indent for block
3246 // label instead of using `getOStream`.
3247 os.getOStream() << getOrCreateName(block) << ":\n";
3248 return success();
3249}
3250
3251LogicalResult CppEmitter::emitOperation(Operation &op, bool trailingSemicolon) {
3252 // Some operations in AIE become nops. Check if this operation must be skipped
3253 // from codegen
3254 if (skippedOp(&op, *this))
3255 return success();
3256
3257 LogicalResult status =
3258 TypeSwitch<Operation *, LogicalResult>(&op)
3259 // EmitC ops.
3260 .Case<emitc::ApplyOp, emitc::CallOpaqueOp, emitc::ConstantOp>(
3261 [&](auto op) { return printOperation(*this, op); })
3262 .Case<emitc::IncludeOp>([&](auto op) {
3263 if (StringRef name = op.getInclude(); !includeNames.count(name)) {
3264 includeNames.insert(name);
3265 return printOperation(*this, op);
3266 }
3267 return success();
3268 })
3269 // SCF ops.
3270 .Case<scf::ForOp, scf::IfOp, scf::YieldOp>(
3271 [&](auto op) { return printOperation(*this, op); })
3272 // Standard ops.
3273 .Case<cf::BranchOp, func::CallOp, cf::CondBranchOp, func::FuncOp,
3274 ModuleOp, func::ReturnOp>(
3275 [&](auto op) { return printOperation(*this, op); })
3276 // Arith ops.
3277 .Case<arith::ConstantOp>(
3278 [&](auto op) { return printOperation(*this, op); })
3279 // Extra ops added for AIE
3280 // Arith ops.
3281 .Case<arith::AddIOp>(
3282 [&](auto op) { return printOperation<arith::AddIOp>(*this, op); })
3283 .Case<arith::AddFOp>(
3284 [&](auto op) { return printOperation<arith::AddFOp>(*this, op); })
3285 .Case<arith::MulIOp>(
3286 [&](auto op) { return printOperation<arith::MulIOp>(*this, op); })
3287 .Case<arith::MulFOp>(
3288 [&](auto op) { return printOperation<arith::MulFOp>(*this, op); })
3289 .Case<arith::SubIOp>(
3290 [&](auto op) { return printOperation<arith::SubIOp>(*this, op); })
3291 .Case<arith::SubFOp>(
3292 [&](auto op) { return printOperation<arith::SubFOp>(*this, op); })
3293 .Case<arith::DivSIOp>([&](auto op) {
3294 return printOperation<arith::DivSIOp>(*this, op);
3295 })
3296 .Case<arith::DivUIOp>([&](auto op) {
3297 return printOperation<arith::DivUIOp>(*this, op);
3298 })
3299 .Case<arith::DivFOp>(
3300 [&](auto op) { return printOperation<arith::DivFOp>(*this, op); })
3301 .Case<arith::RemSIOp>([&](auto op) {
3302 return printOperation<arith::RemSIOp>(*this, op);
3303 })
3304 .Case<arith::CmpIOp>(
3305 [&](auto op) { return printOperation<arith::CmpIOp>(*this, op); })
3306 .Case<arith::SelectOp>(
3307 [&](auto op) { return printOperation(*this, op); })
3308 // Vector ops.
3309 .Case<vector::TransferWriteOp>(
3310 [&](auto op) { return printOperation(*this, op); })
3311 // Memref ops.
3312 .Case<memref::StoreOp, memref::ExpandShapeOp,
3313 memref::CollapseShapeOp>(
3314 [&](auto op) { return printOperation(*this, op); })
3315 // AievecAie1 ops
3316 .Case<aievec::aie1::AddOp, aievec::aie1::SubOp, aievec::aie1::FMAOp,
3317 aievec::aie1::MulOp, aievec::aie1::SelectOp,
3318 aievec::aie1::ExtOp>(
3319 [&](auto op) { return printOperation(*this, op); })
3320 // Aievec ops
3321 .Case<AddElemOp, ConcatOp, ExtOp, PackOp, SRSOp, SubElemOp, UPDOp,
3322 UPSOp, FMAElemOp, MulElemOp, BroadcastOp, BroadcastScalarOp,
3323 MulConvOp, FMAConvOp, ShiftOp, ShuffleOp, CastOp, MinOp, MaxOp,
3324 NegOp, CmpOp, SelOp, ExtElemOp, BxorOp, BnegOp, BandOp, BorOp,
3325 UnpackOp, MatMulOp, LegacyShuffleOp>(
3326 [&](auto op) { return printOperation(*this, op); })
3327 // AIE dialect ops.
3328 .Case<AIE::DeviceOp>(
3329 [&](auto op) { return printOperation(*this, op); })
3330 .Default([&](Operation *) {
3331 return op.emitOpError("unable to find printer for op");
3332 });
3333
3334 if (failed(status))
3335 return failure();
3336 os << (trailingSemicolon ? ";\n" : "\n");
3337
3338 return success();
3339}
3340
3341std::optional<std::string>
3342CppEmitter::genCppTypeName(Type type, bool stdintType, bool isAcc) {
3343 std::stringstream ss;
3344 if (auto iType = dyn_cast<IntegerType>(type)) {
3345 switch (iType.getWidth()) {
3346 case 1:
3347 return "bool";
3348 case 8:
3349 case 16:
3350 case 32:
3351 case 64:
3352 if (shouldMapToUnsigned(iType.getSignedness()))
3353 ss << "uint" << iType.getWidth() << (stdintType ? "_t" : "");
3354 else
3355 ss << "int" << iType.getWidth() << (stdintType ? "_t" : "");
3356 return ss.str();
3357 case 48:
3358 case 80:
3359 ss << "acc" << iType.getWidth();
3360 return ss.str();
3361 default:
3362 return {};
3363 }
3364 }
3365 if (auto fType = dyn_cast<FloatType>(type)) {
3366 switch (fType.getWidth()) {
3367 case 16:
3368 return "bfloat16";
3369 case 32:
3370 return "float";
3371 case 64:
3372 return "double";
3373 default:
3374 return {};
3375 }
3376 }
3377 if (auto iType = dyn_cast<IndexType>(type))
3378 return "size_t";
3379
3380 if (auto tType = dyn_cast<TensorType>(type)) {
3381 if (!tType.hasRank())
3382 return {};
3383 if (!tType.hasStaticShape())
3384 return {};
3385 ss << "Tensor<";
3386 auto nestedTypeName = genCppTypeName(tType.getElementType());
3387 if (!nestedTypeName)
3388 return {};
3389 ss << *nestedTypeName;
3390 auto shape = tType.getShape();
3391 for (auto dimSize : shape) {
3392 ss << ", ";
3393 ss << dimSize;
3394 }
3395 ss << ">";
3396 return ss.str();
3397 }
3398 if (auto tType = dyn_cast<TupleType>(type)) {
3399 ss << "std::tuple<";
3400 bool itrleaveFailed = false;
3401 llvm::interleave(
3402 tType.getTypes(),
3403 [&](Type type) {
3404 auto optTyNameStr = genCppTypeName(type);
3405 if (optTyNameStr)
3406 ss << *optTyNameStr;
3407 else
3408 itrleaveFailed = true;
3409 },
3410 [&]() { ss << ", "; });
3411 ss << ">";
3412 if (!itrleaveFailed)
3413 return ss.str();
3414 return {};
3415 }
3416 if (auto oType = dyn_cast<emitc::OpaqueType>(type)) {
3417 ss << oType.getValue().str();
3418 return ss.str();
3419 }
3420 // Types added for AIE
3421 // MemRefType: printed as 'eltType'*
3422 if (auto tType = dyn_cast<MemRefType>(type)) {
3423 auto elemTyStrOpt = genCppTypeName(tType.getElementType());
3424 if (!elemTyStrOpt)
3425 return {};
3426 ss << *elemTyStrOpt << " * restrict";
3427 return ss.str();
3428 }
3429 // VectorType: printed as v'lane''eltType'
3430 if (auto tType = dyn_cast<VectorType>(type)) {
3431 Type eltType = tType.getElementType();
3432 // Flatten multidimensional vectors
3433 auto vShape = tType.getShape();
3434 int64_t numElems = std::accumulate(vShape.begin(), vShape.end(), 1,
3435 std::multiplies<int64_t>());
3436 ss << "v" << std::to_string(numElems);
3437
3438 int64_t iElTyBitWidth = 0;
3439 auto iElTy = dyn_cast<IntegerType>(eltType);
3440 if (iElTy)
3441 iElTyBitWidth = iElTy.getWidth();
3442 if (aie2() && (isAcc || iElTyBitWidth == 64)) {
3443 if (iElTy) {
3444 // AIE2 has `ups_to_v16acc32`, `ups_to_v16acc64`, `ups_to_v32acc32`
3445 // intrinsics
3446 if ((numElems == 16 && iElTyBitWidth == 64) ||
3447 (numElems == 32 && iElTyBitWidth == 32) ||
3448 (numElems == 16 && iElTyBitWidth == 32)) {
3449 ss << "acc" << iElTyBitWidth;
3450 return ss.str();
3451 }
3452 return {};
3453 }
3454 if (isa<FloatType>(eltType)) {
3455 // AIE2 only has a `ups_to_v16accfloat` intrinsic
3456 ss << "accfloat";
3457 return ss.str();
3458 }
3459 }
3460 auto elTyNameOpt = genCppTypeName(eltType, false);
3461 if (!elTyNameOpt)
3462 return {};
3463 ss << *elTyNameOpt;
3464 return ss.str();
3465 }
3466 return {};
3467}
3468
3469LogicalResult CppEmitter::emitType(Location loc, Type type, bool stdintType,
3470 bool isAcc) {
3471 auto typeName = genCppTypeName(type, stdintType, isAcc);
3472 if (!typeName)
3473 return emitError(loc, "cannot emit type ") << type;
3474 os << *typeName;
3475 return success();
3476}
3477
3478LogicalResult CppEmitter::emitTypes(Location loc, ArrayRef<Type> types) {
3479 switch (types.size()) {
3480 case 0:
3481 os << "void";
3482 return success();
3483 case 1:
3484 return emitType(loc, types.front());
3485 default:
3486 return emitTupleType(loc, types);
3487 }
3488}
3489
3490LogicalResult CppEmitter::emitTupleType(Location loc, ArrayRef<Type> types) {
3491 os << "std::tuple<";
3492 if (failed(interleaveCommaWithError(
3493 types, os, [&](Type type) { return emitType(loc, type); })))
3494 return failure();
3495 os << ">";
3496 return success();
3497}
3498
3499LogicalResult aievec::translateAIEVecToCpp(Operation *op, bool aie2,
3500 raw_ostream &os) {
3501 CppEmitter emitter(os, false, aie2);
3502 return emitter.emitOperation(*op, /*trailingSemicolon=*/false);
3503}
LogicalResult interleaveCommaWithError(const Container &c, raw_ostream &os, UnaryFunctor eachFn)
LogicalResult interleaveWithError(ForwardIterator begin, ForwardIterator end, UnaryFunctor eachFn, NullaryFunctor betweenFn)
Convenience functions to produce interleaved output with functions returning a LogicalResult.
std::shared_ptr< Value > value()
Definition cxxopts.hpp:1026
PathEndPoint src
mlir::LogicalResult translateAIEVecToCpp(mlir::Operation *op, bool aie2, mlir::raw_ostream &os)
Translates the AIE vector dialect MLIR to C++ code.
int32_t getVectorSizeInBits(mlir::VectorType type)
Definition AIEVecUtils.h:66
unsigned getVectorLaneSize(mlir::VectorType type)
Definition AIEVecUtils.h:55
int32_t getElementSizeInBits(mlir::VectorType type)
Definition AIEVecUtils.h:49