MLIR-AIE
AIETargetAirbin.cpp
Go to the documentation of this file.
1//===- AIETargetAirbin.cpp --------------------------------------*- C++ -*-===//
2//
3// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7// (c) Copyright 2021 Xilinx Inc.
8//
9//===----------------------------------------------------------------------===//
10
14
15#include "llvm/Support/Debug.h"
16#include "llvm/Support/Format.h"
17#include "llvm/Support/FormatVariadic.h"
18#include "llvm/Support/raw_ostream.h"
19
20#include <fcntl.h> // open
21#include <gelf.h>
22#include <iostream>
23#include <libelf.h>
24#include <set>
25#include <sys/stat.h>
26#include <unistd.h> // read
27#include <utility> // pair
28#include <vector>
29
30#define DEBUG_TYPE "aie-generate-airbin"
31
32#define EM_AMDAIR 225 /* AMD AIR */
33
34using namespace mlir;
35
36namespace xilinx::AIE {
37
38enum {
52};
53
54static constexpr auto DISABLE = 0u;
55static constexpr auto ENABLE = 1u;
56
57static constexpr auto TILE_ADDR_OFF_WIDTH = 18u;
58
59static constexpr auto TILE_ADDR_ROW_SHIFT = TILE_ADDR_OFF_WIDTH;
60static constexpr auto TILE_ADDR_ROW_WIDTH = 5u;
61
62static constexpr auto TILE_ADDR_COL_SHIFT =
63 TILE_ADDR_ROW_SHIFT + TILE_ADDR_ROW_WIDTH;
64static constexpr auto TILE_ADDR_COL_WIDTH = 7u;
65
66static constexpr auto TILE_ADDR_ARR_SHIFT =
67 TILE_ADDR_COL_SHIFT + TILE_ADDR_COL_WIDTH;
68
69/********
70 ME Tile
71********/
72static constexpr auto ME_DATA_MEM_BASE = 0x00000u;
73static constexpr auto ME_PROG_MEM_BASE = 0x20000u;
74static constexpr auto ME_DMA_BD_BASE = 0x1D000u;
75static constexpr auto ME_DMA_S2MM_BASE = 0x1DE00u;
76static constexpr auto ME_DMA_MM2S_BASE = 0x1DE10u;
77static constexpr auto ME_SS_MASTER_BASE = 0x3F000u;
78static constexpr auto ME_SS_SLAVE_CFG_BASE = 0x3F100u;
79static constexpr auto ME_SS_SLAVE_SLOT_BASE = 0x3F200u;
80
81/*
82 Tile DMA
83*/
84static constexpr auto ME_DMA_BD_COUNT = 16;
85static constexpr auto ME_DMA_BD_SIZE = 0x20;
86
87struct MERegDMABD {
88 uint32_t addrA;
89 uint32_t addrB;
90 uint32_t x2d{0xff0000u | 0x001u}; // wrap at 256, increment by 1
91 uint32_t y2d{0xff000000u | 0xff0000u |
92 0x100u}; // wrap at 256, increment by 256 every 256 streams
93 uint32_t packet;
94 uint32_t interleave;
95 uint32_t control;
96 uint32_t padding;
97};
98
99static_assert(sizeof(MERegDMABD) == ME_DMA_BD_SIZE,
100 "Size of me_reg_dma_bd is incorrect");
101
102using DMABDRegBlock = MERegDMABD[ME_DMA_BD_COUNT];
103static const MERegDMABD *
104 DMABdRegs(reinterpret_cast<MERegDMABD *>(ME_DMA_BD_BASE));
105
106static_assert(sizeof(DMABDRegBlock) == (ME_DMA_BD_COUNT * sizeof(MERegDMABD)),
107 "Size of dma_bd_reg_block is incorrect");
108
109auto regDMAAddrABD = [](auto idx) {
110 return reinterpret_cast<uint64_t>(&DMABdRegs[idx].addrA);
111};
112
113auto regDMAAddrBBD = [](auto idx) {
114 return reinterpret_cast<uint64_t>(&DMABdRegs[idx].addrB);
115};
116
117auto regDMA2DXBD = [](auto idx) {
118 return reinterpret_cast<uint64_t>(&DMABdRegs[idx].x2d);
119};
120
121auto regDMA2DYBD = [](auto idx) {
122 return reinterpret_cast<uint64_t>(&DMABdRegs[idx].y2d);
123};
124
125auto regDMAPktBD = [](auto idx) {
126 return reinterpret_cast<uint64_t>(&DMABdRegs[idx].packet);
127};
128
129auto regDMAIntStateBD = [](auto idx) {
130 return reinterpret_cast<uint64_t>(&DMABdRegs[idx].interleave);
131};
132
133auto regDMACtrlBD = [](auto idx) {
134 return reinterpret_cast<uint64_t>(&DMABdRegs[idx].control);
135};
136
137/*
138 DMA S2MM channel control
139*/
140static constexpr auto DMA_S2MM_CHANNEL_COUNT = 2u;
141static constexpr auto REG_DMA_S2MM_BLOCK_SIZE = 0x08;
142
144 uint32_t ctrl;
145 uint32_t queue;
146};
147
148static_assert(sizeof(RegDMAS2MM) == REG_DMA_S2MM_BLOCK_SIZE,
149 "Size of reg_dma_s2mm is incorrect");
150
151using DMAS2MMRegBlock = RegDMAS2MM[DMA_S2MM_CHANNEL_COUNT];
152static const RegDMAS2MM *
153 DMAS2MMRegs(reinterpret_cast<RegDMAS2MM *>(ME_DMA_S2MM_BASE));
154
155auto regDMAS2MMCtrl = [](auto ch) {
156 return reinterpret_cast<uint64_t>(&DMAS2MMRegs[ch].ctrl);
157};
158
159auto regDMAS2MMQueue = [](auto ch) {
160 return reinterpret_cast<uint64_t>(&DMAS2MMRegs[ch].queue);
161};
162
163/*
164 DMA MM2S channel control
165*/
166static constexpr auto DMA_MM2S_CHANNEL_COUNT = 2u;
167static constexpr auto REG_DMA_MM2S_BLOCK_SIZE = 0x08;
168
170 uint32_t ctrl;
171 uint32_t queue;
172};
173
174static_assert(sizeof(RegDMAMM2S) == REG_DMA_MM2S_BLOCK_SIZE,
175 "Size of reg_dma_mm2s is incorrect");
176
177using DMAMM2SRegBlock = RegDMAMM2S[DMA_MM2S_CHANNEL_COUNT];
178static const RegDMAMM2S *
179 DMAMM2SRegs(reinterpret_cast<RegDMAMM2S *>(ME_DMA_MM2S_BASE));
180
181auto regDMAMM2SCtrl = [](auto ch) {
182 return reinterpret_cast<uint64_t>(&DMAMM2SRegs[ch].ctrl);
183};
184
185auto regDMAMM2SQueue = [](auto ch) {
186 return reinterpret_cast<uint64_t>(&DMAMM2SRegs[ch].queue);
187};
188
189/*
190 ME stream switches
191*/
192static constexpr auto ME_SS_MASTER_COUNT = 25;
193static constexpr auto ME_SS_SLAVE_CFG_COUNT = 27;
194static constexpr auto ME_SS_SLAVE_SLOT_COUNT = 108;
195static constexpr auto SS_SLOT_NUM_PORTS = 4u;
196
197using MESSMasterBlock = uint32_t[ME_SS_MASTER_COUNT];
198static const MESSMasterBlock *
199 MESSMaster(reinterpret_cast<MESSMasterBlock *>(ME_SS_MASTER_BASE));
200
201static_assert(sizeof(MESSMasterBlock) ==
202 (ME_SS_MASTER_COUNT * sizeof(uint32_t)),
203 "Size of me_ss_master_block is incorrect");
204
205auto regMESSMaster = [](auto idx) {
206 return reinterpret_cast<uint64_t>(&MESSMaster[idx]);
207};
208
209using MESSSlaveCfgBlock = uint32_t[ME_SS_SLAVE_CFG_COUNT];
210static const MESSSlaveCfgBlock *
211 MESSSlaveCfg(reinterpret_cast<MESSSlaveCfgBlock *>(ME_SS_SLAVE_CFG_BASE));
212
213static_assert(sizeof(MESSSlaveCfgBlock) ==
214 (ME_SS_SLAVE_CFG_COUNT * sizeof(uint32_t)),
215 "Size of me_ss_slave_cfg_block is incorrect");
216
217auto regMESSSlaveCfg = [](auto idx) {
218 return reinterpret_cast<uint64_t>(&MESSSlaveCfg[idx]);
219};
220
221using MESSSlaveSlotBlock = uint32_t[ME_SS_SLAVE_SLOT_COUNT][SS_SLOT_NUM_PORTS];
222static const MESSSlaveSlotBlock *MESSSlaveSlot(
223 reinterpret_cast<MESSSlaveSlotBlock *>(ME_SS_SLAVE_SLOT_BASE));
224
225static_assert(sizeof(MESSSlaveSlotBlock) ==
226 (ME_SS_SLAVE_SLOT_COUNT * SS_SLOT_NUM_PORTS *
227 sizeof(uint32_t)),
228 "Size of me_ss_slave_slot_block is incorrect");
229
230auto regMESSSlaveSlot = [](auto port, auto slot) {
231 return reinterpret_cast<uint64_t>(&MESSSlaveSlot[slot][port]);
232};
233
234// ME data memory
235static constexpr auto DATA_MEM_SIZE = 0x08000u; // 32KB
236
237// ME program memory
238static constexpr auto PROG_MEM_SIZE = 0x4000u; // 16KB
239
240/**********
241 Shim Tile
242**********/
243static constexpr auto SHIM_DMA_BD_BASE = 0x1D000u;
244static constexpr auto SHIM_DMA_S2MM_BASE = 0x1D140u;
245static constexpr auto SHIM_SS_MASTER_BASE = 0x3F000u;
246static constexpr auto SHIM_SS_SLAVE_CFG_BASE = 0x3F100u;
247static constexpr auto SHIM_SS_SLAVE_SLOT_BASE = 0x3F200u;
248
249/*
250 Shim DMA
251*/
252static constexpr auto SHIM_DMA_BD_COUNT = 16;
253static constexpr auto REG_SHIM_DMA_BD_SIZE = 0x14;
254
255struct ShimDMABD {
256 uint32_t addrLow;
257 uint32_t len;
258 uint32_t control;
259 uint32_t axiCfg;
260 uint32_t packet;
261};
262
263static_assert(sizeof(struct ShimDMABD) == REG_SHIM_DMA_BD_SIZE,
264 "Size of shim_dma_bd is incorrect");
265
266using ShimDMABDBlock = ShimDMABD[SHIM_DMA_BD_COUNT];
267
268/*
269 Mux/demux
270*/
271static constexpr auto SHIM_MUX_BASE = 0x1F000u;
272
273/*
274 Shim stream switches
275*/
276static constexpr auto SHIM_SS_MASTER_COUNT = 23;
277static constexpr auto SHIM_SS_SLAVE_CFG_COUNT = 24;
278static constexpr auto SHIM_SS_SLAVE_SLOT_COUNT = 96;
279
280using ShimSSMasterBlock = uint32_t[SHIM_SS_MASTER_COUNT];
281using ShimSSSlaveCfgBlock = uint32_t[SHIM_SS_SLAVE_CFG_COUNT];
282using ShimSSSlaveSlotBlock = uint32_t[SHIM_SS_SLAVE_SLOT_COUNT];
283
284// section names
285static uint8_t secNameOffset[SEC_IDX_MAX];
286
287static const char *secNameStr[SEC_IDX_MAX] = {
288 "null", ".ssmast", ".ssslve", ".sspckt",
289 ".sdma.bd", ".shmmux", ".sdma.ctl", ".prgm.mem",
290 ".tdma.bd", ".tdma.ctl", "deprecated", ".data.mem"};
291
292static size_t stridx;
293
294/*
295 Holds a sorted list of all writes made to device memory
296 All recorded writes are time/order invariant. This allows sorting to
297 compact the airbin.
298*/
299static std::map<uint64_t, uint32_t> memWrites;
300
301/*
302 * Tile address format:
303 * --------------------------------------------
304 * | 7 bits 5 bits 18 bits |
305 * --------------------------------------------
306 * | Array offset | Column | Row | Tile addr |
307 * --------------------------------------------
308 */
310public:
311 TileAddress(uint8_t column, uint8_t row, uint64_t arrayOffset = 0x000u)
312 : arrayOffset{arrayOffset}, column{column}, row{row} {}
313
314 // SFINAE is used here to choose the copy constructor for `TileAddress`,
315 // and this constructor for all other classes.
316 template <typename Op,
317 std::enable_if_t<!std::is_same_v<Op, TileAddress>, bool> = true>
319 : TileAddress{static_cast<uint8_t>(op.colIndex()),
320 static_cast<uint8_t>(op.rowIndex())} {}
321
322 uint64_t fullAddress(uint64_t registerOffset) const {
323 return (arrayOffset << TILE_ADDR_ARR_SHIFT) |
324 (static_cast<uint64_t>(column) << TILE_ADDR_COL_SHIFT) |
325 (static_cast<uint64_t>(row) << TILE_ADDR_ROW_SHIFT) | registerOffset;
326 }
327
328 bool isShim() const { return row == 0; }
329
330 operator uint16_t() const {
331 return (static_cast<uint16_t>(column) << TILE_ADDR_ROW_WIDTH) | row;
332 }
333
334 uint8_t col() const { return column; }
335
336 void clearRange(uint32_t rangeStart, uint32_t length);
337
338private:
339 uint64_t arrayOffset : 34;
340 uint8_t column : TILE_ADDR_COL_WIDTH;
341 uint8_t row : TILE_ADDR_ROW_WIDTH;
342};
343
344static_assert(sizeof(TileAddress) <= sizeof(uint64_t),
345 "Tile addresses are at most 64-bits");
346
347class Address {
348public:
349 Address(TileAddress tile, uint64_t offset) : tile{tile}, offset{offset} {}
350
351 operator uint64_t() const { return tile.fullAddress(offset); }
352
353 TileAddress destTile() const { return tile; }
354 uint32_t getOffset() const { return offset; }
355
356private:
357 TileAddress tile;
358 uint64_t offset : TILE_ADDR_OFF_WIDTH;
359};
360
361using Write = std::pair<uint64_t, uint32_t>;
362
363class Section {
364public:
365 Section(uint64_t addr) : address(addr){};
366 uint64_t getAddr() const { return address; }
367 size_t getLength() const { return data.size() * sizeof(uint32_t); }
368 void addData(uint32_t value) { data.push_back(value); }
369 const uint32_t *getData() const { return data.data(); }
370
371private:
372 uint64_t address; // start address of this section
373 std::vector<uint32_t> data; // data to be written starting at 'address'
374};
375
376// This template can be instantiated to represent a bitfield in a register.
377template <uint8_t highBit, uint8_t lowBit = highBit>
378class Field final {
379public:
380 static_assert(highBit >= lowBit,
381 "The high bit should be higher than the low bit");
382 static_assert(highBit < sizeof(uint32_t) * 8u,
383 "The field must live in a 32-bit register");
384
385 static constexpr auto NUM_BITS_USED = (highBit - lowBit) + 1u;
386 static constexpr auto UNSHIFTED_MASK = (1u << NUM_BITS_USED) - 1u;
387 static_assert((lowBit != highBit) ^ (UNSHIFTED_MASK == 1),
388 "1 is a valid mask iff the field is 1 bit wide");
389
390 static constexpr auto SHIFTED_MASK = UNSHIFTED_MASK << lowBit;
391
392 [[nodiscard]] constexpr uint32_t operator()(uint32_t value) const {
393 return (value << lowBit) & SHIFTED_MASK;
394 }
395};
396
397/*
398 Add or replace a register value in 'mem_writes'
399*/
400static void write32(Address addr, uint32_t value) {
401 if (addr.destTile().col() <= 0)
402 llvm::report_fatal_error(
403 llvm::Twine("address of destination tile <= 0 : ") +
404 std::to_string(addr.destTile().col()));
405
406 auto ret = memWrites.emplace(addr, value);
407 if (!ret.second)
408 ret.first->second = value;
409}
410
411/*
412 Look up a value for a given address
413
414 If the address is found return the value, otherwise 0
415*/
416static uint32_t read32(Address addr) {
417 auto ret = memWrites.find(addr);
418 if (ret != memWrites.end())
419 return ret->second;
420
421 return 0;
422}
423
424/*
425 Set every address in the range to 0
426*/
427void TileAddress::clearRange(uint32_t start, uint32_t length) {
428 if (start % 4 != 0)
429 llvm::report_fatal_error(llvm::Twine("start address ") +
430 std::to_string(start) +
431 " must word 4 byte aligned");
432 if (start % 4 != 0)
433 llvm::report_fatal_error(llvm::Twine("length ") + std::to_string(start) +
434 " must be a multiple of 4 bytes");
435
436 LLVM_DEBUG(llvm::dbgs() << llvm::format("<%u,%u> 0x%x - 0x%x (len: %u)\n",
437 column, row, start, start + length,
438 length));
439 for (auto off = start; off < start + length; off += 4u)
440 write32(Address{*this, off}, 0);
441}
442
443/*
444 Read the ELF produced by the AIE compiler and include its loadable
445 output in the airbin ELF
446*/
447static void loadElf(TileAddress tile, const std::string &filename) {
448 LLVM_DEBUG(llvm::dbgs() << "Reading ELF file " << filename << " for tile "
449 << tile << '\n');
450
451 int elfFd = open(filename.c_str(), O_RDONLY);
452 if (elfFd < 0)
453 llvm::report_fatal_error(llvm::Twine("Can't open elf file ") + filename);
454
455 elf_version(EV_CURRENT);
456 Elf *inElf = elf_begin(elfFd, ELF_C_READ, nullptr);
457
458 // check the characteristics
459 GElf_Ehdr *ehdr;
460 GElf_Ehdr ehdrMem;
461 ehdr = gelf_getehdr(inElf, &ehdrMem);
462 if (!ehdr)
463 llvm::report_fatal_error(llvm::Twine("cannot get ELF header: ") +
464 elf_errmsg(-1));
465
466 // Read data as 32-bit little endian
467 assert(ehdr->e_ident[EI_CLASS] == ELFCLASS32 &&
468 "(ehdr->e_ident[EI_CLASS] != ELFCLASS32");
469 assert(ehdr->e_ident[EI_DATA] == ELFDATA2LSB &&
470 "ehdr->e_ident[EI_DATA] != ELFDATA2LSB");
471
472 size_t phnum;
473 if (elf_getphdrnum(inElf, &phnum) != 0)
474 llvm::report_fatal_error(llvm::Twine("cannot get program header count: ") +
475 elf_errmsg(-1));
476
477 // iterate through all program headers
478 for (unsigned int ndx = 0; ndx < phnum; ndx++) {
479 GElf_Phdr phdrMem;
480 GElf_Phdr *phdr = gelf_getphdr(inElf, ndx, &phdrMem);
481 if (!phdr)
482 llvm::report_fatal_error(llvm::Twine("cannot get program header entry ") +
483 std::to_string(ndx) + ": " + elf_errmsg(-1));
484
485 // for each loadable program header
486 if (phdr->p_type != PT_LOAD)
487 continue;
488
489 // decide destination address based on header attributes
490 uint32_t dest;
491 if (phdr->p_flags & PF_X)
492 dest = ME_PROG_MEM_BASE + phdr->p_vaddr;
493 else
494 dest = ME_DATA_MEM_BASE + (phdr->p_vaddr & (DATA_MEM_SIZE - 1));
495
496 LLVM_DEBUG(llvm::dbgs()
497 << llvm::format("ELF flags=0x%x vaddr=0x%lx dest=0x%x\r\n",
498 phdr->p_flags, phdr->p_vaddr, dest));
499 // read data one word at a time and write it to the output list
500 // TODO since we know these are data and not registers, we could likely
501 // bypass the output list and write a section directly into the AIRBIN
502 size_t elfSize;
503 uint32_t offset;
504 char *raw = elf_rawfile(inElf, &elfSize);
505
506 for (offset = phdr->p_offset; offset < phdr->p_offset + phdr->p_filesz;
507 offset += 4) {
508 Address destAddr{tile, dest};
509 uint32_t data = *reinterpret_cast<uint32_t *>(raw + offset);
510 write32(destAddr, data);
511 dest += 4;
512 }
513 }
514
515 elf_end(inElf);
516 close(elfFd);
517}
518
519/*
520 The SHIM row is always 0.
521 SHIM resets are handled by the runtime.
522*/
523static void configShimTile(TileOp &tileOp) {
524 assert(tileOp.isShimTile() &&
525 "The tile must be a Shim to generate Shim Config");
526
527 TileAddress tileAddress{tileOp};
528
529 if (tileOp.isShimNOCTile())
530 tileAddress.clearRange(SHIM_DMA_BD_BASE, sizeof(ShimDMABDBlock));
531
532 tileAddress.clearRange(SHIM_SS_MASTER_BASE, sizeof(ShimSSMasterBlock));
533 tileAddress.clearRange(SHIM_SS_SLAVE_CFG_BASE, sizeof(ShimSSSlaveCfgBlock));
534 tileAddress.clearRange(SHIM_SS_SLAVE_SLOT_BASE, sizeof(ShimSSSlaveSlotBlock));
535}
536
537/*
538 Generate the config for an ME tile
539*/
540static void configMETile(TileOp tileOp, const std::string &coreFilesDir) {
541 TileAddress tileAddress{tileOp};
542 // Reset configuration
543
544 // clear program and data memory
545 tileAddress.clearRange(ME_PROG_MEM_BASE, PROG_MEM_SIZE);
546 tileAddress.clearRange(ME_DATA_MEM_BASE, DATA_MEM_SIZE);
547
548 // TileDMA
549 tileAddress.clearRange(ME_DMA_BD_BASE, sizeof(DMABDRegBlock));
550 tileAddress.clearRange(ME_DMA_S2MM_BASE, sizeof(DMAS2MMRegBlock));
551 tileAddress.clearRange(ME_DMA_MM2S_BASE, sizeof(DMAMM2SRegBlock));
552
553 // Stream Switches
554 tileAddress.clearRange(ME_SS_MASTER_BASE, sizeof(MESSMasterBlock));
555 tileAddress.clearRange(ME_SS_SLAVE_CFG_BASE, sizeof(MESSSlaveCfgBlock));
556 tileAddress.clearRange(ME_SS_SLAVE_SLOT_BASE, sizeof(MESSSlaveSlotBlock));
557
558 // NOTE: Here is usually where locking is done.
559 // However, the runtime will handle that when loading the airbin.
560
561 // read the AIE executable and copy the loadable parts
562 if (auto coreOp = tileOp.getCoreOp()) {
563 std::string fileName;
564 if (auto fileAttr = coreOp->getAttrOfType<StringAttr>("elf_file"))
565 fileName = fileAttr.str();
566 else
567 fileName = llvm::formatv("{0}/core_{1}_{2}.elf", coreFilesDir,
568 tileOp.colIndex(), tileOp.rowIndex());
569 loadElf(tileAddress, fileName);
570 }
571}
572
573struct BDInfo {
574 bool foundBDPacket = false;
575 int packetType = 0;
576 int packetID = 0;
577 bool foundBD = false;
578 int lenA = 0;
579 int lenB = 0;
580 unsigned bytesA = 0;
581 unsigned bytesB = 0;
582 int offsetA = 0;
583 int offsetB = 0;
584 uint64_t baseAddrA = 0;
585 uint64_t baseAddrB = 0;
586 bool hasA = false;
587 bool hasB = false;
588 std::string bufA = "0";
589 std::string bufB = "0";
590 uint32_t abMode = DISABLE;
591 uint32_t fifoMode = DISABLE; // FIXME: when to enable FIFO mode?
592};
593
594static BDInfo getBDInfo(Block &block) {
595 BDInfo bdInfo;
596 for (auto op : block.getOps<DMABDOp>()) {
597 bdInfo.foundBD = true;
598 assert(op.getBufferOp().getAddress().has_value() &&
599 "buffer op should have address");
600 bdInfo.baseAddrA = op.getBufferOp().getAddress().value();
601 bdInfo.lenA = op.getLenInBytes();
602 bdInfo.bytesA = op.getBufferElementTypeWidthInBytes();
603 bdInfo.offsetA = op.getOffsetInBytes();
604 bdInfo.bufA = "XAIEDMA_TILE_BD_ADDRA";
605 bdInfo.hasA = true;
606 }
607 return bdInfo;
608}
609
610static void configureDMAs(DeviceOp &targetOp) {
611 Field<1> dmaChannelReset;
612 Field<0> dmaChannelEnable;
613
614 for (auto memOp : targetOp.getOps<MemOp>()) {
615 TileAddress tile{memOp};
616 LLVM_DEBUG(llvm::dbgs() << "DMA: tile=" << memOp.getTile());
617 // Clear the CTRL and QUEUE registers for the DMA channels.
618 for (auto chNum = 0u; chNum < DMA_S2MM_CHANNEL_COUNT; ++chNum) {
619 write32({tile, regDMAS2MMCtrl(chNum)},
620 dmaChannelReset(DISABLE) | dmaChannelEnable(DISABLE));
621 write32({tile, regDMAS2MMQueue(chNum)}, 0);
622 }
623 for (auto chNum = 0u; chNum < DMA_MM2S_CHANNEL_COUNT; ++chNum) {
624 write32({tile, regDMAMM2SCtrl(chNum)},
625 dmaChannelReset(DISABLE) | dmaChannelEnable(DISABLE));
626 write32({tile, regDMAMM2SQueue(chNum)}, 0);
627 }
628
629 DenseMap<Block *, int> blockMap;
630
631 {
632 // Assign each block a BD number
633 auto bdNum = 0;
634 for (auto &block : memOp.getBody()) {
635 if (!block.getOps<DMABDOp>().empty()) {
636 blockMap[&block] = bdNum;
637 bdNum++;
638 }
639 }
640 }
641
642 for (auto &block : memOp.getBody()) {
643 auto bdInfo = getBDInfo(block);
644
645 if (bdInfo.hasA and bdInfo.hasB) {
646 bdInfo.abMode = ENABLE;
647 if (bdInfo.lenA != bdInfo.lenB)
648 llvm::errs() << "ABmode must have matching lengths.\n";
649 if (bdInfo.bytesA != bdInfo.bytesB)
650 llvm::errs() << "ABmode must have matching element data types.\n";
651 }
652
653 int acqValue = 0, relValue = 0;
654 auto acqEnable = DISABLE;
655 auto relEnable = DISABLE;
656 std::optional<int> lockID = std::nullopt;
657
658 for (auto op : block.getOps<UseLockOp>()) {
659 LockOp lock = dyn_cast<LockOp>(op.getLock().getDefiningOp());
660 lockID = lock.getLockIDValue();
661 if (op.acquire()) {
662 acqEnable = ENABLE;
663 acqValue = op.getLockValue();
664 } else {
665 relEnable = ENABLE;
666 relValue = op.getLockValue();
667 }
668 }
669
670 // We either
671 // a. went thru the loop once (`lockID` should be something) xor
672 // b. did not enter the loop (the enables should be both disable)
673 assert(lockID.has_value() ^
674 (acqEnable == DISABLE and relEnable == DISABLE) &&
675 "lock invariants not satisfied");
676
677 for (auto op : block.getOps<DMABDPACKETOp>()) {
678 bdInfo.foundBDPacket = true;
679 bdInfo.packetType = op.getPacketType();
680 bdInfo.packetID = op.getPacketID();
681 }
682
683 auto bdNum = blockMap[&block];
684 MERegDMABD bdData;
685 if (bdInfo.foundBD) {
686 Field<25, 22> bdAddressLockID;
687 Field<21> bdAddressReleaseEnable;
688 Field<20> bdAddressReleaseValue;
689 Field<19> bdAddressReleaseValueEnable;
690 Field<18> bdAddressAcquireEnable;
691 Field<17> bdAddressAcquireValue;
692 Field<16> bdAddressAcquireValueEnable;
693
694 if (bdInfo.hasA) {
695 bdData.addrA = bdAddressLockID(lockID.value()) |
696 bdAddressReleaseEnable(relEnable) |
697 bdAddressAcquireEnable(acqEnable);
698 if (relValue != 0xFFu)
699 bdData.addrA |= bdAddressReleaseValueEnable(true) |
700 bdAddressReleaseValue(relValue);
701 if (acqValue != 0xFFu)
702 bdData.addrA |= bdAddressAcquireValueEnable(true) |
703 bdAddressAcquireValue(acqValue);
704 }
705 if (bdInfo.hasB)
706 llvm::report_fatal_error("bdInfo.hasB not supported");
707
708 auto addrA = bdInfo.baseAddrA + bdInfo.offsetA;
709 auto addrB = bdInfo.baseAddrB + bdInfo.offsetB;
710
711 Field<12, 0> bdAddressBase, bdControlLength;
712 Field<30> bdControlABMode;
713 Field<28> bdControlFifo;
714
715 bdData.addrA |= bdAddressBase(addrA >> 2u);
716 bdData.addrB |= bdAddressBase(addrB >> 2u);
717 bdData.control |= bdControlLength(bdInfo.lenA - 1) |
718 bdControlFifo(bdInfo.fifoMode) |
719 bdControlABMode(bdInfo.abMode);
720
721 if (block.getNumSuccessors() > 0) {
722 // should have only one successor block
723 assert(block.getNumSuccessors() == 1 &&
724 "block.getNumSuccessors() != 1");
725 auto *nextBlock = block.getSuccessors()[0];
726 auto nextBDNum = blockMap[nextBlock];
727
728 Field<16, 13> bdControlNextBD;
729 Field<17> bdControlEnableNextBD;
730
731 bdData.control |= bdControlEnableNextBD(nextBDNum != 0xFFu) |
732 bdControlNextBD(nextBDNum);
733 }
734
735 if (bdInfo.foundBDPacket) {
736 Field<14, 12> bdPacketType;
737 Field<4, 0> bdPacketID;
738 Field<27> bdControlEnablePacket;
739
740 bdData.packet =
741 bdPacketID(bdInfo.packetID) | bdPacketType(bdInfo.packetType);
742 bdData.control |= bdControlEnablePacket(ENABLE);
743 }
744
745 Field<31> bdControlValid;
746
747 assert(bdNum < ME_DMA_BD_COUNT && "bdNum >= ME_DMA_BD_COUNT");
748 uint64_t bdOffset = regDMAAddrABD(bdNum);
749
750 write32({tile, bdOffset}, bdData.addrA);
751 write32({tile, regDMAAddrBBD(bdNum)}, bdData.addrB);
752 write32({tile, regDMA2DXBD(bdNum)}, bdData.x2d);
753 write32({tile, regDMA2DYBD(bdNum)}, bdData.y2d);
754 write32({tile, regDMAPktBD(bdNum)}, bdData.packet);
755 write32({tile, regDMAIntStateBD(bdNum)}, bdData.interleave);
756 write32({tile, regDMACtrlBD(bdNum)},
757 bdData.control | bdControlValid(true));
758 }
759 }
760
761 for (auto &block : memOp.getBody()) {
762 for (auto op : block.getOps<DMAStartOp>()) {
763 auto bdNum = blockMap[op.getDest()];
764 if (bdNum != 0xFFU) {
765 Field<4, 0> dmaChannelQueueStartBd;
766
767 uint32_t chNum = op.getChannelIndex();
768 if (op.getChannelDir() == DMAChannelDir::MM2S) {
769 write32(Address{tile, regDMAMM2SQueue(chNum)},
770 dmaChannelQueueStartBd(bdNum));
771 write32({tile, regDMAMM2SCtrl(chNum)},
772 dmaChannelEnable(ENABLE) | dmaChannelReset(DISABLE));
773 } else {
774 write32(Address{tile, regDMAS2MMQueue(chNum)},
775 dmaChannelQueueStartBd(bdNum));
776 write32({tile, regDMAS2MMCtrl(chNum)},
777 dmaChannelEnable(ENABLE) | dmaChannelReset(DISABLE));
778 }
779 }
780 }
781 }
782 }
783}
784
785static uint8_t computeSlavePort(WireBundle bundle, int index, bool isShim) {
786 assert(index >= 0 && "index < 0");
787 assert(index < UINT8_MAX - 21 && "index >= UINT8_MAX - 21");
788
789 switch (bundle) {
790 case WireBundle::DMA:
791 return 2u + index;
792 case WireBundle::East: {
793 if (isShim)
794 return 19u + index;
795 return 21u + index;
796 }
797 case WireBundle::North: {
798 if (isShim)
799 return 15u + index;
800 return 17u + index;
801 }
802 case WireBundle::South: {
803 if (isShim)
804 return 3u + index;
805 return 7u + index;
806 }
807 case WireBundle::West: {
808 if (isShim)
809 return 11u + index;
810 return 13u + index;
811 }
812 default:
813 // To implement a new WireBundle,
814 // look in libXAIE for the macros that handle the port.
815 llvm::report_fatal_error("unexpected bundle");
816 }
817}
818
819static uint8_t computeMasterPort(WireBundle bundle, int index, bool isShim) {
820 assert(index >= 0 && "index < 0");
821 assert(index < UINT8_MAX - 21 && "index >= UINT8_MAX - 21");
822
823 switch (bundle) {
824 case WireBundle::DMA:
825 return 2u + index;
826 case WireBundle::East: {
827 if (isShim)
828 return 19u + index;
829 return 21u + index;
830 }
831 case WireBundle::North: {
832 if (isShim)
833 return 13u + index;
834 return 15u + index;
835 }
836 case WireBundle::South: {
837 if (isShim)
838 return 3u + index;
839 return 7u + index;
840 }
841 case WireBundle::West: {
842 if (isShim)
843 return 9u + index;
844 return 11u + index;
845 }
846 default:
847 // To implement a new WireBundle,
848 // look in libXAIE for the macros that handle the port.
849 llvm::report_fatal_error(llvm::Twine("unexpected bundle") +
850 std::to_string(static_cast<uint32_t>(bundle)));
851 }
852}
853
854static void configureSwitchBoxes(DeviceOp &targetOp) {
855 for (auto switchboxOp : targetOp.getOps<SwitchboxOp>()) {
856 Region &r = switchboxOp.getConnections();
857 Block &b = r.front();
858 bool isEmpty = b.getOps<ConnectOp>().empty() &&
859 b.getOps<MasterSetOp>().empty() &&
860 b.getOps<PacketRulesOp>().empty();
861
862 // NOTE: may not be needed
863 std::set<TileAddress> switchboxSet;
864 if (isa<TileOp>(switchboxOp.getTile().getDefiningOp())) {
865 if (!isEmpty)
866 switchboxSet.emplace(switchboxOp);
867 } else if (AIEX::SelectOp sel = dyn_cast<AIEX::SelectOp>(
868 switchboxOp.getTile().getDefiningOp()))
869 // TODO: Use XAIEV1 target and translate into write32s
870 llvm::report_fatal_error("select op not supported");
871
872 constexpr Field<31> STREAM_ENABLE;
873 constexpr Field<30> STREAM_PACKET_ENABLE;
874 for (auto connectOp : b.getOps<ConnectOp>()) {
875 for (auto tile : switchboxSet) {
876 auto slavePort =
877 computeSlavePort(connectOp.getSourceBundle(),
878 connectOp.sourceIndex(), tile.isShim());
879 auto masterPort = computeMasterPort(
880 connectOp.getDestBundle(), connectOp.destIndex(), tile.isShim());
881
882 Field<7> streamMasterDropHeader;
883 Field<6, 0> streamMasterConfig;
884
885 // Configure master side
886 {
887 Address address{tile, regMESSMaster(masterPort)};
888 // TODO: `Field::extract(uint32_t)`?
889 auto dropHeader = (slavePort & 0x80u) >> 7u;
890 auto value = STREAM_ENABLE(true) | STREAM_PACKET_ENABLE(false) |
891 streamMasterDropHeader(dropHeader) |
892 streamMasterConfig(slavePort);
893 assert(value < UINT32_MAX);
894 write32(address, value);
895 }
896
897 // Configure slave side
898 {
899 Address address{tile, regMESSSlaveCfg(slavePort)};
900 write32(address, STREAM_ENABLE(true) | STREAM_PACKET_ENABLE(false));
901 }
902
903 for (auto connectOp : b.getOps<MasterSetOp>()) {
904 auto mask = 0u;
905 int arbiter = -1;
906 for (auto val : connectOp.getAmsels()) {
907 auto amsel = dyn_cast<AMSelOp>(val.getDefiningOp());
908 arbiter = amsel.arbiterIndex();
909 int msel = amsel.getMselValue();
910 mask |= 1u << msel;
911 }
912
913 static constexpr auto STREAM_SWITCH_MSEL_SHIFT = 3u;
914 static constexpr auto STREAM_SWITCH_ARB_SHIFT = 0u;
915
916 const auto DROP_HEADER = connectOp.getDestBundle() == WireBundle::DMA;
917 auto config = streamMasterDropHeader(DROP_HEADER) |
918 (mask << STREAM_SWITCH_MSEL_SHIFT) |
919 (arbiter << STREAM_SWITCH_ARB_SHIFT);
920 Address dest{tile, regMESSMaster(masterPort)};
921 write32(dest, STREAM_ENABLE(ENABLE) | STREAM_PACKET_ENABLE(ENABLE) |
922 streamMasterDropHeader(DROP_HEADER) |
923 streamMasterConfig(config));
924 }
925 }
926 }
927
928 for (auto connectOp : b.getOps<PacketRulesOp>()) {
929 int slot = 0;
930 Block &block = connectOp.getRules().front();
931 for (auto slotOp : block.getOps<PacketRuleOp>()) {
932 AMSelOp amselOp = dyn_cast<AMSelOp>(slotOp.getAmsel().getDefiningOp());
933 int arbiter = amselOp.arbiterIndex();
934 int msel = amselOp.getMselValue();
935
936 for (auto tile : switchboxSet) {
937 auto slavePort =
938 computeSlavePort(connectOp.getSourceBundle(),
939 connectOp.sourceIndex(), tile.isShim());
940 write32({tile, regMESSSlaveCfg(slavePort)},
941 STREAM_ENABLE(ENABLE) | STREAM_PACKET_ENABLE(ENABLE));
942
943 Field<28, 24> streamSlotId;
944 Field<20, 16> streamSlotMask;
945 Field<8> streamSlotEnable;
946 Field<5, 4> streamSlotMSel;
947 Field<2, 0> streamSlotArbit;
948
949 auto config = streamSlotId(slotOp.valueInt()) |
950 streamSlotMask(slotOp.maskInt()) |
951 streamSlotEnable(ENABLE) | streamSlotMSel(msel) |
952 streamSlotArbit(arbiter);
953 write32({tile, regMESSSlaveSlot(slavePort, slot)}, config);
954 slot++;
955 }
956 }
957 }
958 }
959
960 const auto INPUT_MASK_FOR = [](WireBundle bundle, uint8_t shiftAmt) {
961 switch (bundle) {
962 case WireBundle::PLIO:
963 return 0u << shiftAmt;
964 case WireBundle::DMA:
965 return 1u << shiftAmt;
966 case WireBundle::NOC:
967 return 2u << shiftAmt;
968 default:
969 llvm::report_fatal_error(llvm::Twine("unexpected bundle: ") +
970 std::to_string(static_cast<uint32_t>(bundle)));
971 }
972 };
973
974 std::optional<TileAddress> currentTile = std::nullopt;
975 for (auto op : targetOp.getOps<ShimMuxOp>()) {
976 Region &r = op.getConnections();
977 Block &b = r.front();
978
979 if (isa<TileOp>(op.getTile().getDefiningOp())) {
980 bool isEmpty = b.getOps<ConnectOp>().empty();
981 if (!isEmpty)
982 currentTile = op;
983 }
984
985 for (auto connectOp : b.getOps<ConnectOp>()) {
986 if (connectOp.getSourceBundle() == WireBundle::North) {
987 // demux!
988 // XAieTile_ShimStrmDemuxConfig(&(TileInst[col][0]),
989 // XAIETILE_SHIM_STRM_DEM_SOUTH3, XAIETILE_SHIM_STRM_DEM_DMA);
990 assert(currentTile.has_value() && "current tile not set");
991 auto shiftAmt = [index = connectOp.sourceIndex()] {
992 // NOTE: hardcoded to SOUTH to match definitions from libxaie
993 switch (index) {
994 case 2:
995 return 4u;
996 case 3:
997 return 6u;
998 case 6:
999 return 8u;
1000 case 7:
1001 return 10u;
1002 default: // Unsure about this, but seems safe to assume
1003 llvm::report_fatal_error(llvm::Twine("unexpected source index: ") +
1004 std::to_string(index));
1005 }
1006 }();
1007
1008 // We need to add to the possibly preexisting mask.
1009 Address addr{currentTile.value(), 0x1F004u};
1010 auto currentMask = read32(addr);
1011 write32(addr, currentMask |
1012 INPUT_MASK_FOR(connectOp.getDestBundle(), shiftAmt));
1013 } else if (connectOp.getDestBundle() == WireBundle::North) {
1014 // mux
1015 // XAieTile_ShimStrmMuxConfig(&(TileInst[col][0]),
1016 // XAIETILE_SHIM_STRM_MUX_SOUTH3, XAIETILE_SHIM_STRM_MUX_DMA);
1017 assert(currentTile.has_value() && "no current tile");
1018 auto shiftAmt = [index = connectOp.destIndex()] {
1019 // NOTE: hardcoded to SOUTH to match definitions from libxaie
1020 switch (index) {
1021 case 2:
1022 return 8u;
1023 case 3:
1024 return 10u;
1025 case 6:
1026 return 12u;
1027 case 7:
1028 return 14u;
1029 default: // Unsure about this, but seems safe to assume
1030 llvm::report_fatal_error(llvm::Twine("unexpected dest index ") +
1031 std::to_string(index));
1032 }
1033 }();
1034
1035 Address addr{currentTile.value(), 0x1F000u};
1036 auto currentMask = read32(addr);
1037 write32(addr, currentMask | INPUT_MASK_FOR(connectOp.getSourceBundle(),
1038 shiftAmt));
1039 }
1040 }
1041 }
1042
1043 /* TODO: Implement the following
1044 for (auto switchboxOp : targetOp.getOps<ShimSwitchboxOp>()) {
1045 Region &r = switchboxOp.getConnections();
1046 Block &b = r.front();
1047 for (auto connectOp : b.getOps<ConnectOp>()) {
1048 output << "XAieTile_StrmConnectCct(" << tileInstStr(col, 0) << ",\n";
1049 output << "\tXAIETILE_STRSW_SPORT_"
1050 << stringifyWireBundle(connectOp.sourceBundle()).upper() << "("
1051 << tileInstStr(col, 0) << ", " << connectOp.sourceIndex()
1052 << "),\n";
1053 output << "\tXAIETILE_STRSW_MPORT_"
1054 << stringifyWireBundle(connectOp.destBundle()).upper() << "("
1055 << tileInstStr(col, 0) << ", " << connectOp.destIndex() << "),\n";
1056 output << "\t" << enable << ");\n";
1057 }
1058 }
1059 */
1060}
1061
1062static void configureCascade(DeviceOp &targetOp) {
1063 const auto &target_model = xilinx::AIE::getTargetModel(targetOp);
1064 if (isa<AIE2TargetModel>(targetModel)) {
1065 for (auto configOp : targetOp.getOps<ConfigureCascadeOp>()) {
1066 TileOp tile = cast<TileOp>(configOp.getTile().getDefiningOp());
1067 auto inputDir = stringifyCascadeDir(configOp.getInputDir()).upper();
1068 auto outputDir = stringifyCascadeDir(configOp.getOutputDir()).upper();
1069
1070 Address address{tile, 0x36060u};
1071
1072 /*
1073 * * Register value for output BIT 1: 0 == SOUTH, 1 == EAST
1074 * * Register value for input BIT 0: 0 == NORTH, 1 == WEST
1075 */
1076 uint8_t outputValue = (outputDir == "SOUTH") ? 0 : 1;
1077 uint8_t inputValue = (inputDir == "NORTH") ? 0 : 1;
1078
1079 constexpr Field<1> Output;
1080 constexpr Field<0> Input;
1081
1082 auto regValue = Output(outputValue) | Input(inputValue);
1083
1084 write32(address, regValue);
1085 }
1086 }
1087}
1088
1089/*
1090 Convert memory address to index
1091
1092 Used to look up register/region name
1093*/
1094static uint8_t secAddr2Index(uint64_t in) {
1095 switch (in & ((1 << TILE_ADDR_OFF_WIDTH) - 1)) {
1096 case 0:
1097 return SEC_IDX_DATA_MEM;
1098 case ME_SS_MASTER_BASE:
1099 return SEC_IDX_SSMAST;
1100 case ME_SS_SLAVE_CFG_BASE:
1101 return SEC_IDX_SSSLVE;
1102 case ME_SS_SLAVE_SLOT_BASE:
1103 return SEC_IDX_SSPCKT;
1104 case ME_DMA_BD_BASE:
1105 return SEC_IDX_SDMA_BD;
1106 case SHIM_MUX_BASE:
1107 return SEC_IDX_SHMMUX;
1108 case SHIM_DMA_S2MM_BASE:
1109 return SEC_IDX_SDMA_CTL;
1110 case ME_PROG_MEM_BASE:
1111 return SEC_IDX_PRGM_MEM;
1112 case ME_DMA_S2MM_BASE:
1113 return SEC_IDX_TDMA_CTL;
1114 default:
1115 return 0;
1116 }
1117}
1118
1119/*
1120 Group the writes into contiguous sections
1121*/
1122static void groupSections(std::vector<Section *> &sections) {
1123 uint64_t lastAddr = 0;
1124 Section *section = nullptr;
1125
1126 for (auto write : memWrites) {
1127 if (write.first != lastAddr + 4) {
1128 if (section)
1129 sections.push_back(section);
1130 section = new Section(write.first);
1131 LLVM_DEBUG(llvm::dbgs() << "Starting new section @ "
1132 << llvm::format("0x%lx (last=0x%lx)\n",
1133 write.first, lastAddr));
1134 }
1135 assert(section && "section is null");
1136 section->addData(write.second);
1137 lastAddr = write.first;
1138 }
1139
1140 sections.push_back(section);
1141}
1142
1143/*
1144 Add a string to the section header string table and return the offset of
1145 the start of the string
1146*/
1147static size_t addString(Elf_Scn *scn, const char *str) {
1148 size_t lastidx = stridx;
1149 size_t size = strlen(str) + 1;
1150
1151 Elf_Data *data = elf_newdata(scn);
1152 data->d_buf = (void *)str;
1153 data->d_type = ELF_T_BYTE;
1154 data->d_size = size;
1155 data->d_align = 1;
1156 data->d_version = EV_CURRENT;
1157
1158 stridx += size;
1159 return lastidx;
1160}
1161
1162Elf_Data *sectionAddData(Elf_Scn *scn, const Section *section) {
1163 size_t size = section->getLength();
1164 auto *buf = static_cast<uint32_t *>(malloc(size));
1165
1166 // create a data object for the section
1167 Elf_Data *data = elf_newdata(scn);
1168 data->d_buf = buf;
1169 data->d_type = ELF_T_BYTE;
1170 data->d_size = size;
1171 data->d_off = 0;
1172 data->d_align = 1;
1173 data->d_version = EV_CURRENT;
1174
1175 // fill the data
1176 memcpy(buf, section->getData(), size);
1177
1178 return data;
1179}
1180
1181mlir::LogicalResult AIETranslateToAirbin(mlir::ModuleOp module,
1182 const std::string &outputFilename,
1183 const std::string &coreFilesDir,
1184 bool testAirBin) {
1185 int tmpElfFD;
1186 Elf *outElf;
1187 GElf_Ehdr ehdrMem;
1188 GElf_Ehdr *ehdr;
1189 GElf_Shdr *shdr;
1190 GElf_Shdr shdrMem;
1191 char emptyStr[] = "";
1192 char strTabName[] = ".shstrtab";
1193 std::vector<Section *> sections;
1194
1195 if (module.getOps<DeviceOp>().empty()) {
1196 LLVM_DEBUG(llvm::dbgs() << "no device ops found");
1197 return success();
1198 }
1199
1200 DeviceOp targetOp = *(module.getOps<DeviceOp>().begin());
1201
1202 // Write the initial configuration for every tile specified in the MLIR.
1203 for (auto tileOp : targetOp.getOps<TileOp>()) {
1204 LLVM_DEBUG(llvm::dbgs() << "CC: tile=" << tileOp.getTileID());
1205 if (tileOp.isShimTile())
1206 configShimTile(tileOp);
1207 else
1208 configMETile(tileOp, coreFilesDir);
1209 }
1210
1211 configureSwitchBoxes(targetOp);
1212 configureCascade(targetOp);
1213 configureDMAs(targetOp);
1214 groupSections(sections);
1215
1216 LLVM_DEBUG(llvm::dbgs() << llvm::format("mem_writes: %lu in %lu sections\n",
1217 memWrites.size(), sections.size()));
1218
1219 elf_version(EV_CURRENT);
1220 tmpElfFD =
1221 open(outputFilename.c_str(), O_RDWR | O_CREAT | O_TRUNC, DEFFILEMODE);
1222 outElf = elf_begin(tmpElfFD, ELF_C_WRITE, nullptr);
1223
1224 if (!gelf_newehdr(outElf, ELFCLASS64))
1225 llvm::report_fatal_error(llvm::Twine("Error creating ELF64 header: ") +
1226 elf_errmsg(-1));
1227
1228 ehdr = gelf_getehdr(outElf, &ehdrMem);
1229 if (!ehdr)
1230 llvm::report_fatal_error(llvm::Twine("cannot get ELF header: ") +
1231 elf_errmsg(-1));
1232
1233 // Initialize header.
1234 ehdr->e_ident[EI_DATA] = ELFDATA2LSB;
1235 ehdr->e_ident[EI_OSABI] = ELFOSABI_GNU;
1236 ehdr->e_type = ET_NONE;
1237 ehdr->e_machine = EM_AMDAIR;
1238 ehdr->e_version = EV_CURRENT;
1239 if (gelf_update_ehdr(outElf, ehdr) == 0)
1240 llvm::report_fatal_error(llvm::Twine("cannot update ELF header: ") +
1241 elf_errmsg(-1));
1242
1243 // Create new section for the 'section header string table'
1244 Elf_Scn *shStrTabScn = elf_newscn(outElf);
1245 if (!shStrTabScn)
1246 llvm::report_fatal_error(
1247 llvm::Twine("cannot create new shstrtab section: ") + elf_errmsg(-1));
1248
1249 // the first entry in the string table must be a NULL string
1250 addString(shStrTabScn, emptyStr);
1251
1252 shdr = gelf_getshdr(shStrTabScn, &shdrMem);
1253 if (!shdr)
1254 llvm::report_fatal_error(
1255 llvm::Twine("cannot get header for sh_strings section: ") +
1256 elf_errmsg(-1));
1257
1258 shdr->sh_type = SHT_STRTAB;
1259 shdr->sh_flags = 0;
1260 shdr->sh_addr = 0;
1261 shdr->sh_link = SHN_UNDEF;
1262 shdr->sh_info = SHN_UNDEF;
1263 shdr->sh_addralign = 1;
1264 shdr->sh_entsize = 0;
1265 shdr->sh_name = addString(shStrTabScn, strTabName);
1266
1267 // add all the AIRBIN-specific section names up front and index them
1268 for (uint8_t secIdx = SEC_IDX_SSMAST; secIdx < SEC_IDX_MAX; secIdx++)
1269 secNameOffset[secIdx] = addString(shStrTabScn, secNameStr[secIdx]);
1270 secNameOffset[SEC_IDX_NULL] = 0;
1271
1272 // We have to store the section strtab index in the ELF header so sections
1273 // have actual names.
1274 int ndx = elf_ndxscn(shStrTabScn);
1275 ehdr->e_shstrndx = ndx;
1276
1277 if (!gelf_update_ehdr(outElf, ehdr))
1278 llvm::report_fatal_error(llvm::Twine("cannot update ELF header: ") +
1279 elf_errmsg(-1));
1280
1281 // Finished new shstrtab section, update the header.
1282 if (!gelf_update_shdr(shStrTabScn, shdr))
1283 llvm::report_fatal_error(
1284 llvm::Twine("cannot update new shstrtab section header: ") +
1285 elf_errmsg(-1));
1286
1287 // output the rest of the sections
1288 for (const Section *section : sections) {
1289 uint64_t addr = section->getAddr();
1290 Elf_Scn *scn = elf_newscn(outElf);
1291 if (!scn)
1292 llvm::report_fatal_error(llvm::Twine("cannot create new ") +
1293 secNameStr[secAddr2Index(addr)] +
1294 "section: " + elf_errmsg(-1));
1295
1296 shdr = gelf_getshdr(scn, &shdrMem);
1297 if (!shdr)
1298 llvm::report_fatal_error(llvm::Twine("cannot get header for ") +
1299 secNameStr[secAddr2Index(addr)] +
1300 "section: " + elf_errmsg(-1));
1301
1302 Elf_Data *data = sectionAddData(scn, section);
1303
1304 shdr->sh_type = SHT_PROGBITS;
1305 shdr->sh_flags = SHF_ALLOC;
1306 shdr->sh_addr = section->getAddr();
1307 shdr->sh_link = SHN_UNDEF;
1308 shdr->sh_info = SHN_UNDEF;
1309 shdr->sh_addralign = 1;
1310 shdr->sh_entsize = 0;
1311 shdr->sh_size = data->d_size;
1312 shdr->sh_name = secNameOffset[secAddr2Index(addr)];
1313
1314 if (!gelf_update_shdr(scn, shdr))
1315 llvm::report_fatal_error(llvm::Twine("cannot update section header: ") +
1316 elf_errmsg(-1));
1317 }
1318
1319 if (elf_update(outElf, ELF_C_WRITE) < 0)
1320 llvm::report_fatal_error(llvm::Twine("failure in elf_update: ") +
1321 elf_errmsg(-1));
1322 elf_end(outElf);
1323 close(tmpElfFD);
1324
1325 return success();
1326}
1327} // namespace xilinx::AIE
Address(TileAddress tile, uint64_t offset)
TileAddress destTile() const
uint32_t getOffset() const
constexpr uint32_t operator()(uint32_t value) const
static constexpr auto SHIFTED_MASK
static constexpr auto UNSHIFTED_MASK
static constexpr auto NUM_BITS_USED
uint64_t getAddr() const
const uint32_t * getData() const
void addData(uint32_t value)
void clearRange(uint32_t rangeStart, uint32_t length)
uint64_t fullAddress(uint64_t registerOffset) const
TileAddress(uint8_t column, uint8_t row, uint64_t arrayOffset=0x000u)
const V & get(const std::map< K, V > &pa, K k)
Definition d_ary_heap.h:69
Include the generated interface declarations.
MERegDMABD[ME_DMA_BD_COUNT] DMABDRegBlock
uint32_t[SHIM_SS_SLAVE_CFG_COUNT] ShimSSSlaveCfgBlock
uint32_t[SHIM_SS_MASTER_COUNT] ShimSSMasterBlock
uint32_t[ME_SS_MASTER_COUNT] MESSMasterBlock
RegDMAS2MM[DMA_S2MM_CHANNEL_COUNT] DMAS2MMRegBlock
std::pair< uint64_t, uint32_t > Write
RegDMAMM2S[DMA_MM2S_CHANNEL_COUNT] DMAMM2SRegBlock
ShimDMABD[SHIM_DMA_BD_COUNT] ShimDMABDBlock
uint32_t[ME_SS_SLAVE_SLOT_COUNT][SS_SLOT_NUM_PORTS] MESSSlaveSlotBlock
uint32_t[ME_SS_SLAVE_CFG_COUNT] MESSSlaveCfgBlock
uint32_t[SHIM_SS_SLAVE_SLOT_COUNT] ShimSSSlaveSlotBlock