MLIR-AIE
AIETargetHSA.cpp
Go to the documentation of this file.
1//===- AIETargetXAIEV2.cpp --------------------------------------*- C++ -*-===//
2//
3// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7// (c) Copyright 2021 Xilinx Inc.
8// (c) Copyright 2021-2023, Advanced Micro Devices, Inc.
9//
10//===----------------------------------------------------------------------===//
12
16
17#include "mlir/Dialect/Func/IR/FuncOps.h"
18#include "mlir/IR/Attributes.h"
19#include "mlir/IR/IRMapping.h"
20#include "mlir/Pass/Pass.h"
21#include "mlir/Tools/mlir-translate/MlirTranslateMain.h"
22
23#include "llvm/ADT/StringExtras.h"
24#include "llvm/IR/Module.h"
25
26using namespace mlir;
27using namespace xilinx;
28using namespace xilinx::AIE;
29using namespace xilinx::AIEX;
30
31namespace xilinx::AIE {
32
33// This string is output at the top of the lowered C++ code.
34const char *hsa_cpp_file_header = R"code(
35// This file was auto-generated by aiecc.py --aie-generate-hsa
36
37#ifndef MLIR_AIE_QUIET
38#define __mlir_aie_verbose(x) x
39#else
40#define __mlir_aie_verbose(x)
41#endif
42
43)code";
44
45mlir::LogicalResult AIETranslateToHSA(ModuleOp module, raw_ostream &output,
46 llvm::StringRef deviceName) {
47
48 DenseMap<TileID, Operation *> tiles;
49 DenseMap<Operation *, SmallVector<BufferOp, 4>> buffers;
50
51 DeviceOp targetOp = AIE::DeviceOp::getForSymbolInModule(module, deviceName);
52 if (!targetOp)
53 return module.emitOpError("expected AIE.device operation at toplevel");
54
55 // Putting the standard header
56 output << hsa_cpp_file_header;
57
58 // Getting the sequence function op which contains the instructions
59 auto sequenceOps = targetOp.getOps<AIE::RuntimeSequenceOp>();
60 if (sequenceOps.empty()) {
61 // If no sequenceOp then just return
62 return success();
63 } else if (std::distance(sequenceOps.begin(), sequenceOps.end()) > 1) {
64 return module.emitOpError("expected at most one sequence operation");
65 }
66 AIE::RuntimeSequenceOp sequenceOp = *sequenceOps.begin();
67
68 collectTiles(targetOp, tiles);
69 collectBuffers(targetOp, buffers);
70
71 // Generate dynamic data movement
72 output << "void invoke_data_movement(hsa_queue_t *q, hsa_agent_t *a";
73
74 // Looping over every Memcpy operation so we take the correct number of
75 // buffers
76 int num_ops = 0;
77 for (auto op : sequenceOp.getOps<NpuDmaMemcpyNdOp>()) {
78 // Getting the IDs of the buffers
79 auto memref = op.getMemref();
80 Block &entryBB =
81 op->getParentOfType<AIE::RuntimeSequenceOp>().getBody().front();
82 int arg_idx = -1;
83 for (int i = 0, e = entryBB.getNumArguments(); i < e; i++) {
84 if (entryBB.getArgument(i) == memref) {
85 arg_idx = i;
86 break;
87 }
88 }
89 num_ops++;
90
91 output << ", void *buf" << arg_idx;
92 }
93
94 output << ") {\n";
95
96 output << "\tuint64_t wr_idx = 0;\n";
97 output << "\tuint64_t packet_id = 0;\n";
98
99 int op_count = 0;
100 for (auto op : sequenceOp.getOps<NpuDmaMemcpyNdOp>()) {
101 auto dev = sequenceOp->getParentOfType<AIE::DeviceOp>();
102 if (!dev) {
103 op.emitOpError("couldn't get DeviceOp");
104 return failure();
105 }
106
107 AIE::ShimDMAAllocationOp infoOp = AIE::ShimDMAAllocationOp::getForSymbol(
108 dev, op.getMetadata().getRootReference());
109 if (!infoOp) {
110 op.emitOpError("couldn't find shim_dma_allocation op");
111 return failure();
112 }
113
114 AIE::TileOp tile = infoOp.getTileOp();
115 if (!tile) {
116 op.emitOpError("shim_dma_allocation op must reference a valid TileOp");
117 return failure();
118 }
119
120 auto channelDir = infoOp.getChannelDir();
121 uint32_t ChannelId = infoOp.getChannelIndex();
122 bool isMM2S = channelDir == AIE::DMAChannelDir::MM2S;
123 int col = tile.getCol();
124 bool isPlio = infoOp.getPlio();
125
126 llvm::SmallVector<int64_t, 4> strides = llvm::map_to_vector(
127 llvm::reverse(op.getMixedStrides()),
128 [](OpFoldResult s) { return getConstantIntValue(s).value(); });
129 ::SmallVector<int64_t, 4> sizes = llvm::map_to_vector(
130 llvm::reverse(op.getMixedSizes()),
131 [](OpFoldResult s) { return getConstantIntValue(s).value(); });
132 ::SmallVector<int64_t, 4> offsets = llvm::map_to_vector(
133 llvm::reverse(op.getMixedOffsets()),
134 [](OpFoldResult s) { return getConstantIntValue(s).value(); });
135
136 // buffer_offset
137 size_t stride = 1;
138 size_t offset = 0;
139 BaseMemRefType my_memref = op.getMemref().getType();
140 auto shape = my_memref.getShape();
141 size_t R = shape.size();
142 size_t el_bit_width = op.getElementTypeBitwidth();
143 assert(el_bit_width % 8 == 0 &&
144 "Expected Memref element bitwidth to be multiple of 8.");
145 size_t S = el_bit_width / 8;
146 for (size_t i = 0; i < R; i++) {
147 offset += offsets[i] * stride * S;
148 stride *= shape[R - i - 1];
149 }
150
151 // Getting the ID of the buffer that we are using
152 auto memref = op.getMemref();
153 Block &entryBB =
154 op->getParentOfType<AIE::RuntimeSequenceOp>().getBody().front();
155 int arg_idx = -1;
156 for (int i = 0, e = entryBB.getNumArguments(); i < e; i++) {
157 if (entryBB.getArgument(i) == memref) {
158 arg_idx = i;
159 break;
160 }
161 }
162
163 if (strides[0] != 1)
164 return module.emitOpError("nd_memcpy inner-dimension stride != 1 is "
165 "unsupported by HSA target");
166
167 // Writing the packet information to perform the DMA
168 output << "\thsa_agent_dispatch_packet_t pkt" << op_count << " ;\n";
169 output << "\twr_idx = hsa_queue_add_write_index_relaxed(q, 1);\n";
170 output << "\tpacket_id = wr_idx % q->size;\n";
171 output << "\tmlir_aie_packet_nd_memcpy(&pkt" << op_count
172 << ", 0 /* herd_id */, " << col << " /* col */, " << isMM2S
173 << " /* dir */, " << ChannelId
174 << "/* channel */, 4 /* Burst length */, " << (isPlio ? 1 : 2)
175 << " /* Memory space */, "
176 "(uint64_t)buf"
177 << arg_idx << " + " << offset << " /* Address */, " << sizes[0] * 4
178 << " /* 1d_length */, " << (strides[1] ? sizes[1] : 1)
179 << " /* 2d_length */, " << (strides[1] ? strides[1] * 4 : 0)
180 << " /* 2d_stride */, " << (strides[2] ? sizes[2] : 1)
181 << " /* 3d_length */, " << (strides[2] ? strides[2] * 4 : 0)
182 << " /* 3d_stride */ , 1 /* 4d_length */, 0 /* 4d_stride */);\n";
183
184 bool last_op = op_count == (num_ops - 1);
185 // Only ring the doorbell on the last packet
186 if (last_op) {
187 output
188 << "\tmlir_aie_queue_dispatch_and_wait(a, q, packet_id, wr_idx, &pkt"
189 << op_count << ", false);\n\n";
190 } else {
191 output << "\thsa_amd_signal_create_on_agent(1, 0, nullptr, a, 0, &pkt"
192 << op_count << ".completion_signal);\n";
193 output << "\tmlir_aie_write_pkt<hsa_agent_dispatch_packet_t>(q, "
194 "packet_id, &pkt"
195 << op_count << ");\n\n";
196 }
197
198 op_count++;
199 }
200
201 // Waiting to make sure each DMA is complete
202 for (int i = 0; i < op_count; i++) {
203 output << "\twhile (hsa_signal_wait_scacquire(pkt" << i
204 << ".completion_signal,\n";
205 output << "\tHSA_SIGNAL_CONDITION_EQ, 0, 0x80000,\n";
206 output << "\tHSA_WAIT_STATE_ACTIVE) != 0);\n";
207 }
208
209 // Destroying every signal that we created
210 for (int i = 0; i < op_count; i++) {
211 output << "\thsa_signal_destroy(pkt" << i << ".completion_signal);\n";
212 }
213
214 output << "}\n";
215
216 return success();
217}
218} // namespace xilinx::AIE
Include the generated interface declarations.
void collectTiles(DeviceOp &device, llvm::DenseMap< TileID, mlir::Operation * > &tiles)
void collectBuffers(DeviceOp &device, llvm::DenseMap< mlir::Operation *, llvm::SmallVector< BufferOp, 4 > > &buffers)
mlir::LogicalResult AIETranslateToHSA(mlir::ModuleOp module, llvm::raw_ostream &output, llvm::StringRef deviceName="")
const char * hsa_cpp_file_header