15#include "llvm/Support/Debug.h"
16#include "llvm/Support/Format.h"
17#include "llvm/Support/FormatVariadic.h"
18#include "llvm/Support/raw_ostream.h"
30#define DEBUG_TYPE "aie-generate-airbin"
54static constexpr auto DISABLE = 0u;
55static constexpr auto ENABLE = 1u;
57static constexpr auto TILE_ADDR_OFF_WIDTH = 18u;
59static constexpr auto TILE_ADDR_ROW_SHIFT = TILE_ADDR_OFF_WIDTH;
60static constexpr auto TILE_ADDR_ROW_WIDTH = 5u;
62static constexpr auto TILE_ADDR_COL_SHIFT =
63 TILE_ADDR_ROW_SHIFT + TILE_ADDR_ROW_WIDTH;
64static constexpr auto TILE_ADDR_COL_WIDTH = 7u;
66static constexpr auto TILE_ADDR_ARR_SHIFT =
67 TILE_ADDR_COL_SHIFT + TILE_ADDR_COL_WIDTH;
72static constexpr auto ME_DATA_MEM_BASE = 0x00000u;
73static constexpr auto ME_PROG_MEM_BASE = 0x20000u;
74static constexpr auto ME_DMA_BD_BASE = 0x1D000u;
75static constexpr auto ME_DMA_S2MM_BASE = 0x1DE00u;
76static constexpr auto ME_DMA_MM2S_BASE = 0x1DE10u;
77static constexpr auto ME_SS_MASTER_BASE = 0x3F000u;
78static constexpr auto ME_SS_SLAVE_CFG_BASE = 0x3F100u;
79static constexpr auto ME_SS_SLAVE_SLOT_BASE = 0x3F200u;
84static constexpr auto ME_DMA_BD_COUNT = 16;
85static constexpr auto ME_DMA_BD_SIZE = 0x20;
90 uint32_t
x2d{0xff0000u | 0x001u};
91 uint32_t
y2d{0xff000000u | 0xff0000u |
91 uint32_t
y2d{0xff000000u | 0xff0000u | {
…};
99static_assert(
sizeof(
MERegDMABD) == ME_DMA_BD_SIZE,
100 "Size of me_reg_dma_bd is incorrect");
104 DMABdRegs(
reinterpret_cast<MERegDMABD *
>(ME_DMA_BD_BASE));
107 "Size of dma_bd_reg_block is incorrect");
110 return reinterpret_cast<uint64_t
>(&DMABdRegs[idx].
addrA);
114 return reinterpret_cast<uint64_t
>(&DMABdRegs[idx].
addrB);
118 return reinterpret_cast<uint64_t
>(&DMABdRegs[idx].
x2d);
122 return reinterpret_cast<uint64_t
>(&DMABdRegs[idx].
y2d);
126 return reinterpret_cast<uint64_t
>(&DMABdRegs[idx].
packet);
130 return reinterpret_cast<uint64_t
>(&DMABdRegs[idx].
interleave);
134 return reinterpret_cast<uint64_t
>(&DMABdRegs[idx].
control);
140static constexpr auto DMA_S2MM_CHANNEL_COUNT = 2u;
141static constexpr auto REG_DMA_S2MM_BLOCK_SIZE = 0x08;
148static_assert(
sizeof(
RegDMAS2MM) == REG_DMA_S2MM_BLOCK_SIZE,
149 "Size of reg_dma_s2mm is incorrect");
153 DMAS2MMRegs(
reinterpret_cast<RegDMAS2MM *
>(ME_DMA_S2MM_BASE));
156 return reinterpret_cast<uint64_t
>(&DMAS2MMRegs[ch].
ctrl);
160 return reinterpret_cast<uint64_t
>(&DMAS2MMRegs[ch].
queue);
166static constexpr auto DMA_MM2S_CHANNEL_COUNT = 2u;
167static constexpr auto REG_DMA_MM2S_BLOCK_SIZE = 0x08;
174static_assert(
sizeof(
RegDMAMM2S) == REG_DMA_MM2S_BLOCK_SIZE,
175 "Size of reg_dma_mm2s is incorrect");
179 DMAMM2SRegs(
reinterpret_cast<RegDMAMM2S *
>(ME_DMA_MM2S_BASE));
182 return reinterpret_cast<uint64_t
>(&DMAMM2SRegs[ch].
ctrl);
186 return reinterpret_cast<uint64_t
>(&DMAMM2SRegs[ch].
queue);
192static constexpr auto ME_SS_MASTER_COUNT = 25;
193static constexpr auto ME_SS_SLAVE_CFG_COUNT = 27;
194static constexpr auto ME_SS_SLAVE_SLOT_COUNT = 108;
195static constexpr auto SS_SLOT_NUM_PORTS = 4u;
202 (ME_SS_MASTER_COUNT *
sizeof(uint32_t)),
203 "Size of me_ss_master_block is incorrect");
206 return reinterpret_cast<uint64_t
>(&MESSMaster[idx]);
214 (ME_SS_SLAVE_CFG_COUNT *
sizeof(uint32_t)),
215 "Size of me_ss_slave_cfg_block is incorrect");
218 return reinterpret_cast<uint64_t
>(&MESSSlaveCfg[idx]);
226 (ME_SS_SLAVE_SLOT_COUNT * SS_SLOT_NUM_PORTS *
228 "Size of me_ss_slave_slot_block is incorrect");
231 return reinterpret_cast<uint64_t
>(&MESSSlaveSlot[slot][
port]);
235static constexpr auto DATA_MEM_SIZE = 0x08000u;
238static constexpr auto PROG_MEM_SIZE = 0x4000u;
243static constexpr auto SHIM_DMA_BD_BASE = 0x1D000u;
244static constexpr auto SHIM_DMA_S2MM_BASE = 0x1D140u;
245static constexpr auto SHIM_SS_MASTER_BASE = 0x3F000u;
246static constexpr auto SHIM_SS_SLAVE_CFG_BASE = 0x3F100u;
247static constexpr auto SHIM_SS_SLAVE_SLOT_BASE = 0x3F200u;
252static constexpr auto SHIM_DMA_BD_COUNT = 16;
253static constexpr auto REG_SHIM_DMA_BD_SIZE = 0x14;
263static_assert(
sizeof(
struct ShimDMABD) == REG_SHIM_DMA_BD_SIZE,
264 "Size of shim_dma_bd is incorrect");
271static constexpr auto SHIM_MUX_BASE = 0x1F000u;
276static constexpr auto SHIM_SS_MASTER_COUNT = 23;
277static constexpr auto SHIM_SS_SLAVE_CFG_COUNT = 24;
278static constexpr auto SHIM_SS_SLAVE_SLOT_COUNT = 96;
288 "null",
".ssmast",
".ssslve",
".sspckt",
289 ".sdma.bd",
".shmmux",
".sdma.ctl",
".prgm.mem",
290 ".tdma.bd",
".tdma.ctl",
"deprecated",
".data.mem"};
299static std::map<uint64_t, uint32_t> memWrites;
311 TileAddress(uint8_t column, uint8_t row, uint64_t arrayOffset = 0x000u)
312 : arrayOffset{arrayOffset}, column{column},
row{
row} {}
311 TileAddress(uint8_t column, uint8_t row, uint64_t arrayOffset = 0x000u) {
…}
316 template <
typename Op,
317 std::enable_if_t<!std::is_same_v<Op, TileAddress>,
bool> =
true>
320 static_cast<uint8_t>(op.rowIndex())} {}
323 return (arrayOffset << TILE_ADDR_ARR_SHIFT) |
324 (
static_cast<uint64_t
>(column) << TILE_ADDR_COL_SHIFT) |
325 (
static_cast<uint64_t
>(row) << TILE_ADDR_ROW_SHIFT) | registerOffset;
330 operator uint16_t()
const {
331 return (
static_cast<uint16_t
>(column) << TILE_ADDR_ROW_WIDTH) | row;
330 operator uint16_t()
const {
…}
334 uint8_t
col()
const {
return column; }
336 void clearRange(uint32_t rangeStart, uint32_t length);
339 uint64_t arrayOffset : 34;
340 uint8_t column : TILE_ADDR_COL_WIDTH;
341 uint8_t row : TILE_ADDR_ROW_WIDTH;
344static_assert(
sizeof(TileAddress) <=
sizeof(uint64_t),
345 "Tile addresses are at most 64-bits");
358 uint64_t offset : TILE_ADDR_OFF_WIDTH;
361using Write = std::pair<uint64_t, uint32_t>;
367 size_t getLength()
const {
return data.size() *
sizeof(uint32_t); }
368 void addData(uint32_t value) { data.push_back(value); }
369 const uint32_t *
getData()
const {
return data.data(); }
373 std::vector<uint32_t> data;
377template <u
int8_t highBit, u
int8_t lowBit = highBit>
380 static_assert(highBit >= lowBit,
381 "The high bit should be higher than the low bit");
382 static_assert(highBit <
sizeof(uint32_t) * 8u,
383 "The field must live in a 32-bit register");
388 "1 is a valid mask iff the field is 1 bit wide");
392 [[nodiscard]]
constexpr uint32_t
operator()(uint32_t value)
const {
392 [[nodiscard]]
constexpr uint32_t
operator()(uint32_t value)
const {
…}
400static void write32(Address addr, uint32_t value) {
401 if (addr.destTile().col() <= 0)
402 llvm::report_fatal_error(
403 llvm::Twine(
"address of destination tile <= 0 : ") +
404 std::to_string(addr.destTile().col()));
406 auto ret = memWrites.emplace(addr, value);
408 ret.first->second = value;
416static uint32_t read32(Address addr) {
417 auto ret = memWrites.find(addr);
418 if (ret != memWrites.end())
429 llvm::report_fatal_error(llvm::Twine(
"start address ") +
430 std::to_string(start) +
431 " must word 4 byte aligned");
433 llvm::report_fatal_error(llvm::Twine(
"length ") + std::to_string(start) +
434 " must be a multiple of 4 bytes");
436 LLVM_DEBUG(llvm::dbgs() << llvm::format(
"<%u,%u> 0x%x - 0x%x (len: %u)\n",
437 column, row, start, start + length,
439 for (
auto off = start; off < start + length; off += 4u)
440 write32(
Address{*
this, off}, 0);
447static void loadElf(
TileAddress tile,
const std::string &filename) {
448 LLVM_DEBUG(llvm::dbgs() <<
"Reading ELF file " << filename <<
" for tile "
451 int elfFd = open(filename.c_str(), O_RDONLY);
453 llvm::report_fatal_error(llvm::Twine(
"Can't open elf file ") + filename);
455 elf_version(EV_CURRENT);
456 Elf *inElf = elf_begin(elfFd, ELF_C_READ,
nullptr);
461 ehdr = gelf_getehdr(inElf, &ehdrMem);
463 llvm::report_fatal_error(llvm::Twine(
"cannot get ELF header: ") +
467 assert(ehdr->e_ident[EI_CLASS] == ELFCLASS32 &&
468 "(ehdr->e_ident[EI_CLASS] != ELFCLASS32");
469 assert(ehdr->e_ident[EI_DATA] == ELFDATA2LSB &&
470 "ehdr->e_ident[EI_DATA] != ELFDATA2LSB");
473 if (elf_getphdrnum(inElf, &phnum) != 0)
474 llvm::report_fatal_error(llvm::Twine(
"cannot get program header count: ") +
478 for (
unsigned int ndx = 0; ndx < phnum; ndx++) {
480 GElf_Phdr *phdr = gelf_getphdr(inElf, ndx, &phdrMem);
482 llvm::report_fatal_error(llvm::Twine(
"cannot get program header entry ") +
483 std::to_string(ndx) +
": " + elf_errmsg(-1));
486 if (phdr->p_type != PT_LOAD)
491 if (phdr->p_flags & PF_X)
492 dest = ME_PROG_MEM_BASE + phdr->p_vaddr;
494 dest = ME_DATA_MEM_BASE + (phdr->p_vaddr & (DATA_MEM_SIZE - 1));
496 LLVM_DEBUG(llvm::dbgs()
497 << llvm::format(
"ELF flags=0x%x vaddr=0x%lx dest=0x%x\r\n",
498 phdr->p_flags, phdr->p_vaddr, dest));
504 char *raw = elf_rawfile(inElf, &elfSize);
506 for (offset = phdr->p_offset; offset < phdr->p_offset + phdr->p_filesz;
509 uint32_t data = *
reinterpret_cast<uint32_t *
>(raw + offset);
510 write32(destAddr, data);
523static void configShimTile(TileOp &tileOp) {
524 assert(tileOp.isShimTile() &&
525 "The tile must be a Shim to generate Shim Config");
527 TileAddress tileAddress{tileOp};
529 if (tileOp.isShimNOCTile())
540static void configMETile(TileOp tileOp,
const std::string &coreFilesDir) {
541 TileAddress tileAddress{tileOp};
545 tileAddress.clearRange(ME_PROG_MEM_BASE, PROG_MEM_SIZE);
546 tileAddress.clearRange(ME_DATA_MEM_BASE, DATA_MEM_SIZE);
549 tileAddress.clearRange(ME_DMA_BD_BASE,
sizeof(
DMABDRegBlock));
562 if (
auto coreOp = tileOp.getCoreOp()) {
563 std::string fileName;
564 if (
auto fileAttr = coreOp->getAttrOfType<StringAttr>(
"elf_file"))
565 fileName = fileAttr.str();
567 fileName = llvm::formatv(
"{0}/core_{1}_{2}.elf", coreFilesDir,
568 tileOp.colIndex(), tileOp.rowIndex());
569 loadElf(tileAddress, fileName);
594static BDInfo getBDInfo(Block &block) {
596 for (
auto op : block.getOps<DMABDOp>()) {
598 assert(op.getBufferOp().getAddress().has_value() &&
599 "buffer op should have address");
600 bdInfo.
baseAddrA = op.getBufferOp().getAddress().value();
601 bdInfo.
lenA = op.getLenInBytes();
602 bdInfo.
bytesA = op.getBufferElementTypeWidthInBytes();
603 bdInfo.
offsetA = op.getOffsetInBytes();
604 bdInfo.
bufA =
"XAIEDMA_TILE_BD_ADDRA";
610static void configureDMAs(DeviceOp &targetOp) {
611 Field<1> dmaChannelReset;
612 Field<0> dmaChannelEnable;
614 for (
auto memOp : targetOp.getOps<MemOp>()) {
615 TileAddress tile{memOp};
616 LLVM_DEBUG(llvm::dbgs() <<
"DMA: tile=" << memOp.getTile());
618 for (
auto chNum = 0u; chNum < DMA_S2MM_CHANNEL_COUNT; ++chNum) {
620 dmaChannelReset(DISABLE) | dmaChannelEnable(DISABLE));
623 for (
auto chNum = 0u; chNum < DMA_MM2S_CHANNEL_COUNT; ++chNum) {
625 dmaChannelReset(DISABLE) | dmaChannelEnable(DISABLE));
629 DenseMap<Block *, int> blockMap;
634 for (
auto &block : memOp.getBody()) {
635 if (!block.getOps<DMABDOp>().empty()) {
636 blockMap[&block] = bdNum;
642 for (
auto &block : memOp.getBody()) {
643 auto bdInfo = getBDInfo(block);
645 if (bdInfo.hasA and bdInfo.hasB) {
646 bdInfo.abMode = ENABLE;
647 if (bdInfo.lenA != bdInfo.lenB)
648 llvm::errs() <<
"ABmode must have matching lengths.\n";
649 if (bdInfo.bytesA != bdInfo.bytesB)
650 llvm::errs() <<
"ABmode must have matching element data types.\n";
653 int acqValue = 0, relValue = 0;
654 auto acqEnable = DISABLE;
655 auto relEnable = DISABLE;
656 std::optional<int> lockID = std::nullopt;
658 for (
auto op : block.getOps<UseLockOp>()) {
659 LockOp lock = dyn_cast<LockOp>(op.getLock().getDefiningOp());
660 lockID = lock.getLockIDValue();
663 acqValue = op.getLockValue();
666 relValue = op.getLockValue();
673 assert(lockID.has_value() ^
674 (acqEnable == DISABLE and relEnable == DISABLE) &&
675 "lock invariants not satisfied");
677 for (
auto op : block.getOps<DMABDPACKETOp>()) {
678 bdInfo.foundBDPacket =
true;
679 bdInfo.packetType = op.getPacketType();
680 bdInfo.packetID = op.getPacketID();
683 auto bdNum = blockMap[&block];
685 if (bdInfo.foundBD) {
686 Field<25, 22> bdAddressLockID;
687 Field<21> bdAddressReleaseEnable;
688 Field<20> bdAddressReleaseValue;
689 Field<19> bdAddressReleaseValueEnable;
690 Field<18> bdAddressAcquireEnable;
691 Field<17> bdAddressAcquireValue;
692 Field<16> bdAddressAcquireValueEnable;
695 bdData.addrA = bdAddressLockID(lockID.value()) |
696 bdAddressReleaseEnable(relEnable) |
697 bdAddressAcquireEnable(acqEnable);
698 if (relValue != 0xFFu)
699 bdData.addrA |= bdAddressReleaseValueEnable(
true) |
700 bdAddressReleaseValue(relValue);
701 if (acqValue != 0xFFu)
702 bdData.addrA |= bdAddressAcquireValueEnable(
true) |
703 bdAddressAcquireValue(acqValue);
706 llvm::report_fatal_error(
"bdInfo.hasB not supported");
708 auto addrA = bdInfo.baseAddrA + bdInfo.offsetA;
709 auto addrB = bdInfo.baseAddrB + bdInfo.offsetB;
711 Field<12, 0> bdAddressBase, bdControlLength;
712 Field<30> bdControlABMode;
713 Field<28> bdControlFifo;
715 bdData.addrA |= bdAddressBase(addrA >> 2u);
716 bdData.addrB |= bdAddressBase(addrB >> 2u);
717 bdData.control |= bdControlLength(bdInfo.lenA - 1) |
718 bdControlFifo(bdInfo.fifoMode) |
719 bdControlABMode(bdInfo.abMode);
721 if (block.getNumSuccessors() > 0) {
723 assert(block.getNumSuccessors() == 1 &&
724 "block.getNumSuccessors() != 1");
725 auto *nextBlock = block.getSuccessors()[0];
726 auto nextBDNum = blockMap[nextBlock];
728 Field<16, 13> bdControlNextBD;
729 Field<17> bdControlEnableNextBD;
731 bdData.control |= bdControlEnableNextBD(nextBDNum != 0xFFu) |
732 bdControlNextBD(nextBDNum);
735 if (bdInfo.foundBDPacket) {
736 Field<14, 12> bdPacketType;
737 Field<4, 0> bdPacketID;
738 Field<27> bdControlEnablePacket;
741 bdPacketID(bdInfo.packetID) | bdPacketType(bdInfo.packetType);
742 bdData.control |= bdControlEnablePacket(ENABLE);
745 Field<31> bdControlValid;
747 assert(bdNum < ME_DMA_BD_COUNT && "bdNum >= ME_DMA_BD_COUNT
");
748 uint64_t bdOffset = regDMAAddrABD(bdNum);
750 write32({tile, bdOffset}, bdData.addrA);
751 write32({tile, regDMAAddrBBD(bdNum)}, bdData.addrB);
752 write32({tile, regDMA2DXBD(bdNum)}, bdData.x2d);
753 write32({tile, regDMA2DYBD(bdNum)}, bdData.y2d);
754 write32({tile, regDMAPktBD(bdNum)}, bdData.packet);
755 write32({tile, regDMAIntStateBD(bdNum)}, bdData.interleave);
756 write32({tile, regDMACtrlBD(bdNum)},
757 bdData.control | bdControlValid(true));
761 for (auto &block : memOp.getBody()) {
762 for (auto op : block.getOps<DMAStartOp>()) {
763 auto bdNum = blockMap[op.getDest()];
764 if (bdNum != 0xFFU) {
765 Field<4, 0> dmaChannelQueueStartBd;
767 uint32_t chNum = op.getChannelIndex();
768 if (op.getChannelDir() == DMAChannelDir::MM2S) {
769 write32(Address{tile, regDMAMM2SQueue(chNum)},
770 dmaChannelQueueStartBd(bdNum));
771 write32({tile, regDMAMM2SCtrl(chNum)},
772 dmaChannelEnable(ENABLE) | dmaChannelReset(DISABLE));
774 write32(Address{tile, regDMAS2MMQueue(chNum)},
775 dmaChannelQueueStartBd(bdNum));
776 write32({tile, regDMAS2MMCtrl(chNum)},
777 dmaChannelEnable(ENABLE) | dmaChannelReset(DISABLE));
785static uint8_t computeSlavePort(WireBundle bundle, int index, bool isShim) {
786 assert(index >= 0 && "index < 0
");
787 assert(index < UINT8_MAX - 21 && "index >= UINT8_MAX - 21
");
790 case WireBundle::DMA:
792 case WireBundle::East: {
797 case WireBundle::North: {
802 case WireBundle::South: {
807 case WireBundle::West: {
813 // To implement a new WireBundle,
814 // look in libXAIE for the macros that handle the port.
815 llvm::report_fatal_error("unexpected bundle
");
819static uint8_t computeMasterPort(WireBundle bundle, int index, bool isShim) {
820 assert(index >= 0 && "index < 0
");
821 assert(index < UINT8_MAX - 21 && "index >= UINT8_MAX - 21
");
824 case WireBundle::DMA:
826 case WireBundle::East: {
831 case WireBundle::North: {
836 case WireBundle::South: {
841 case WireBundle::West: {
847 // To implement a new WireBundle,
848 // look in libXAIE for the macros that handle the port.
849 llvm::report_fatal_error(llvm::Twine("unexpected bundle
") +
850 std::to_string(static_cast<uint32_t>(bundle)));
854static void configureSwitchBoxes(DeviceOp &targetOp) {
855 for (auto switchboxOp : targetOp.getOps<SwitchboxOp>()) {
856 Region &r = switchboxOp.getConnections();
857 Block &b = r.front();
858 bool isEmpty = b.getOps<ConnectOp>().empty() &&
859 b.getOps<MasterSetOp>().empty() &&
860 b.getOps<PacketRulesOp>().empty();
862 // NOTE: may not be needed
863 std::set<TileAddress> switchboxSet;
864 if (isa<TileOp>(switchboxOp.getTile().getDefiningOp())) {
866 switchboxSet.emplace(switchboxOp);
867 } else if (AIEX::SelectOp sel = dyn_cast<AIEX::SelectOp>(
868 switchboxOp.getTile().getDefiningOp()))
869 // TODO: Use XAIEV1 target and translate into write32s
870 llvm::report_fatal_error("select op not supported
");
872 constexpr Field<31> STREAM_ENABLE;
873 constexpr Field<30> STREAM_PACKET_ENABLE;
874 for (auto connectOp : b.getOps<ConnectOp>()) {
875 for (auto tile : switchboxSet) {
877 computeSlavePort(connectOp.getSourceBundle(),
878 connectOp.sourceIndex(), tile.isShim());
879 auto masterPort = computeMasterPort(
880 connectOp.getDestBundle(), connectOp.destIndex(), tile.isShim());
882 Field<7> streamMasterDropHeader;
883 Field<6, 0> streamMasterConfig;
885 // Configure master side
887 Address address{tile, regMESSMaster(masterPort)};
888 // TODO: `Field::extract(uint32_t)`?
889 auto dropHeader = (slavePort & 0x80u) >> 7u;
890 auto value = STREAM_ENABLE(true) | STREAM_PACKET_ENABLE(false) |
891 streamMasterDropHeader(dropHeader) |
892 streamMasterConfig(slavePort);
893 assert(value < UINT32_MAX);
894 write32(address, value);
897 // Configure slave side
899 Address address{tile, regMESSSlaveCfg(slavePort)};
900 write32(address, STREAM_ENABLE(true) | STREAM_PACKET_ENABLE(false));
903 for (auto connectOp : b.getOps<MasterSetOp>()) {
906 for (auto val : connectOp.getAmsels()) {
907 auto amsel = dyn_cast<AMSelOp>(val.getDefiningOp());
908 arbiter = amsel.arbiterIndex();
909 int msel = amsel.getMselValue();
913 static constexpr auto STREAM_SWITCH_MSEL_SHIFT = 3u;
914 static constexpr auto STREAM_SWITCH_ARB_SHIFT = 0u;
916 const auto DROP_HEADER = connectOp.getDestBundle() == WireBundle::DMA;
917 auto config = streamMasterDropHeader(DROP_HEADER) |
918 (mask << STREAM_SWITCH_MSEL_SHIFT) |
919 (arbiter << STREAM_SWITCH_ARB_SHIFT);
920 Address dest{tile, regMESSMaster(masterPort)};
921 write32(dest, STREAM_ENABLE(ENABLE) | STREAM_PACKET_ENABLE(ENABLE) |
922 streamMasterDropHeader(DROP_HEADER) |
923 streamMasterConfig(config));
928 for (auto connectOp : b.getOps<PacketRulesOp>()) {
930 Block &block = connectOp.getRules().front();
931 for (auto slotOp : block.getOps<PacketRuleOp>()) {
932 AMSelOp amselOp = dyn_cast<AMSelOp>(slotOp.getAmsel().getDefiningOp());
933 int arbiter = amselOp.arbiterIndex();
934 int msel = amselOp.getMselValue();
936 for (auto tile : switchboxSet) {
938 computeSlavePort(connectOp.getSourceBundle(),
939 connectOp.sourceIndex(), tile.isShim());
940 write32({tile, regMESSSlaveCfg(slavePort)},
941 STREAM_ENABLE(ENABLE) | STREAM_PACKET_ENABLE(ENABLE));
943 Field<28, 24> streamSlotId;
944 Field<20, 16> streamSlotMask;
945 Field<8> streamSlotEnable;
946 Field<5, 4> streamSlotMSel;
947 Field<2, 0> streamSlotArbit;
949 auto config = streamSlotId(slotOp.valueInt()) |
950 streamSlotMask(slotOp.maskInt()) |
951 streamSlotEnable(ENABLE) | streamSlotMSel(msel) |
952 streamSlotArbit(arbiter);
953 write32({tile, regMESSSlaveSlot(slavePort, slot)}, config);
960 const auto INPUT_MASK_FOR = [](WireBundle bundle, uint8_t shiftAmt) {
962 case WireBundle::PLIO:
963 return 0u << shiftAmt;
964 case WireBundle::DMA:
965 return 1u << shiftAmt;
966 case WireBundle::NOC:
967 return 2u << shiftAmt;
969 llvm::report_fatal_error(llvm::Twine("unexpected bundle:
") +
970 std::to_string(static_cast<uint32_t>(bundle)));
974 std::optional<TileAddress> currentTile = std::nullopt;
975 for (auto op : targetOp.getOps<ShimMuxOp>()) {
976 Region &r = op.getConnections();
977 Block &b = r.front();
979 if (isa<TileOp>(op.getTile().getDefiningOp())) {
980 bool isEmpty = b.getOps<ConnectOp>().empty();
985 for (auto connectOp : b.getOps<ConnectOp>()) {
986 if (connectOp.getSourceBundle() == WireBundle::North) {
988 // XAieTile_ShimStrmDemuxConfig(&(TileInst[col][0]),
989 // XAIETILE_SHIM_STRM_DEM_SOUTH3, XAIETILE_SHIM_STRM_DEM_DMA);
990 assert(currentTile.has_value() && "current tile not set
");
991 auto shiftAmt = [index = connectOp.sourceIndex()] {
992 // NOTE: hardcoded to SOUTH to match definitions from libxaie
1002 default: // Unsure about this, but seems safe to assume
1003 llvm::report_fatal_error(llvm::Twine("unexpected source index:
") +
1004 std::to_string(index));
1008 // We need to add to the possibly preexisting mask.
1009 Address addr{currentTile.value(), 0x1F004u};
1010 auto currentMask = read32(addr);
1011 write32(addr, currentMask |
1012 INPUT_MASK_FOR(connectOp.getDestBundle(), shiftAmt));
1013 } else if (connectOp.getDestBundle() == WireBundle::North) {
1015 // XAieTile_ShimStrmMuxConfig(&(TileInst[col][0]),
1016 // XAIETILE_SHIM_STRM_MUX_SOUTH3, XAIETILE_SHIM_STRM_MUX_DMA);
1017 assert(currentTile.has_value() && "no current tile
");
1018 auto shiftAmt = [index = connectOp.destIndex()] {
1019 // NOTE: hardcoded to SOUTH to match definitions from libxaie
1029 default: // Unsure about this, but seems safe to assume
1030 llvm::report_fatal_error(llvm::Twine("unexpected dest index
") +
1031 std::to_string(index));
1035 Address addr{currentTile.value(), 0x1F000u};
1036 auto currentMask = read32(addr);
1037 write32(addr, currentMask | INPUT_MASK_FOR(connectOp.getSourceBundle(),
1043 /* TODO: Implement the following
1044 for (auto switchboxOp : targetOp.getOps<ShimSwitchboxOp>()) {
1045 Region &r = switchboxOp.getConnections();
1046 Block &b = r.front();
1047 for (auto connectOp : b.getOps<ConnectOp>()) {
1048 output << "XAieTile_StrmConnectCct(
" << tileInstStr(col, 0) << ",\n
";
1049 output << "\tXAIETILE_STRSW_SPORT_
"
1050 << stringifyWireBundle(connectOp.sourceBundle()).upper() << "(
"
1051 << tileInstStr(col, 0) << ",
" << connectOp.sourceIndex()
1053 output << "\tXAIETILE_STRSW_MPORT_
"
1054 << stringifyWireBundle(connectOp.destBundle()).upper() << "(
"
1055 << tileInstStr(col, 0) << ",
" << connectOp.destIndex() << "),\n
";
1056 output << "\t
" << enable << ");\n
";
1062static void configureCascade(DeviceOp &targetOp) {
1063 const auto &target_model = xilinx::AIE::getTargetModel(targetOp);
1064 if (isa<AIE2TargetModel>(targetModel)) {
1065 for (auto configOp : targetOp.getOps<ConfigureCascadeOp>()) {
1066 TileOp tile = cast<TileOp>(configOp.getTile().getDefiningOp());
1067 auto inputDir = stringifyCascadeDir(configOp.getInputDir()).upper();
1068 auto outputDir = stringifyCascadeDir(configOp.getOutputDir()).upper();
1070 Address address{tile, 0x36060u};
1073 * * Register value for output BIT 1: 0 == SOUTH, 1 == EAST
1074 * * Register value for input BIT 0: 0 == NORTH, 1 == WEST
1076 uint8_t outputValue = (outputDir == "SOUTH
") ? 0 : 1;
1077 uint8_t inputValue = (inputDir == "NORTH
") ? 0 : 1;
1079 constexpr Field<1> Output;
1080 constexpr Field<0> Input;
1082 auto regValue = Output(outputValue) | Input(inputValue);
1084 write32(address, regValue);
1090 Convert memory address to index
1092 Used to look up register/region name
1094static uint8_t secAddr2Index(uint64_t in) {
1095 switch (in & ((1 << TILE_ADDR_OFF_WIDTH) - 1)) {
1097 return SEC_IDX_DATA_MEM;
1098 case ME_SS_MASTER_BASE:
1099 return SEC_IDX_SSMAST;
1100 case ME_SS_SLAVE_CFG_BASE:
1101 return SEC_IDX_SSSLVE;
1102 case ME_SS_SLAVE_SLOT_BASE:
1103 return SEC_IDX_SSPCKT;
1104 case ME_DMA_BD_BASE:
1105 return SEC_IDX_SDMA_BD;
1107 return SEC_IDX_SHMMUX;
1108 case SHIM_DMA_S2MM_BASE:
1109 return SEC_IDX_SDMA_CTL;
1110 case ME_PROG_MEM_BASE:
1111 return SEC_IDX_PRGM_MEM;
1112 case ME_DMA_S2MM_BASE:
1113 return SEC_IDX_TDMA_CTL;
1120 Group the writes into contiguous sections
1122static void groupSections(std::vector<Section *> §ions) {
1123 uint64_t lastAddr = 0;
1124 Section *section = nullptr;
1126 for (auto write : memWrites) {
1127 if (write.first != lastAddr + 4) {
1129 sections.push_back(section);
1130 section = new Section(write.first);
1131 LLVM_DEBUG(llvm::dbgs() << "Starting
new section @
"
1132 << llvm::format("0x%lx (last=0x%lx)\n
",
1133 write.first, lastAddr));
1135 assert(section && "section is null
");
1136 section->addData(write.second);
1137 lastAddr = write.first;
1140 sections.push_back(section);
1144 Add a string to the section header string table and return the offset of
1145 the start of the string
1147static size_t addString(Elf_Scn *scn, const char *str) {
1148 size_t lastidx = stridx;
1149 size_t size = strlen(str) + 1;
1151 Elf_Data *data = elf_newdata(scn);
1152 data->d_buf = (void *)str;
1153 data->d_type = ELF_T_BYTE;
1154 data->d_size = size;
1156 data->d_version = EV_CURRENT;
1162Elf_Data *sectionAddData(Elf_Scn *scn, const Section *section) {
1163 size_t size = section->getLength();
1164 auto *buf = static_cast<uint32_t *>(malloc(size));
1166 // create a data object for the section
1167 Elf_Data *data = elf_newdata(scn);
1169 data->d_type = ELF_T_BYTE;
1170 data->d_size = size;
1173 data->d_version = EV_CURRENT;
1176 memcpy(buf, section->getData(), size);
1162Elf_Data *sectionAddData(Elf_Scn *scn, const Section *section) { {
…}
1181mlir::LogicalResult AIETranslateToAirbin(mlir::ModuleOp module,
1182 const std::string &outputFilename,
1183 const std::string &coreFilesDir,
1191 char emptyStr[] = "";
1192 char strTabName[] = ".shstrtab
";
1193 std::vector<Section *> sections;
1195 if (module.getOps<DeviceOp>().empty()) {
1196 LLVM_DEBUG(llvm::dbgs() << "no device ops found
");
1200 DeviceOp targetOp = *(module.getOps<DeviceOp>().begin());
1202 // Write the initial configuration for every tile specified in the MLIR.
1203 for (auto tileOp : targetOp.getOps<TileOp>()) {
1204 LLVM_DEBUG(llvm::dbgs() << "CC: tile=
" << tileOp.getTileID());
1205 if (tileOp.isShimTile())
1206 configShimTile(tileOp);
1208 configMETile(tileOp, coreFilesDir);
1211 configureSwitchBoxes(targetOp);
1212 configureCascade(targetOp);
1213 configureDMAs(targetOp);
1214 groupSections(sections);
1216 LLVM_DEBUG(llvm::dbgs() << llvm::format("mem_writes: %lu in %lu sections\n
",
1217 memWrites.size(), sections.size()));
1219 elf_version(EV_CURRENT);
1221 open(outputFilename.c_str(), O_RDWR | O_CREAT | O_TRUNC, DEFFILEMODE);
1222 outElf = elf_begin(tmpElfFD, ELF_C_WRITE, nullptr);
1224 if (!gelf_newehdr(outElf, ELFCLASS64))
1225 llvm::report_fatal_error(llvm::Twine("Error creating ELF64 header:
") +
1228 ehdr = gelf_getehdr(outElf, &ehdrMem);
1230 llvm::report_fatal_error(llvm::Twine("cannot
get ELF header:
") +
1233 // Initialize header.
1234 ehdr->e_ident[EI_DATA] = ELFDATA2LSB;
1235 ehdr->e_ident[EI_OSABI] = ELFOSABI_GNU;
1236 ehdr->e_type = ET_NONE;
1237 ehdr->e_machine = EM_AMDAIR;
1238 ehdr->e_version = EV_CURRENT;
1239 if (gelf_update_ehdr(outElf, ehdr) == 0)
1240 llvm::report_fatal_error(llvm::Twine("cannot update ELF header:
") +
1243 // Create new section for the 'section header string table'
1244 Elf_Scn *shStrTabScn = elf_newscn(outElf);
1246 llvm::report_fatal_error(
1247 llvm::Twine("cannot create
new shstrtab section:
") + elf_errmsg(-1));
1249 // the first entry in the string table must be a NULL string
1250 addString(shStrTabScn, emptyStr);
1252 shdr = gelf_getshdr(shStrTabScn, &shdrMem);
1254 llvm::report_fatal_error(
1255 llvm::Twine("cannot
get header
for sh_strings section:
") +
1258 shdr->sh_type = SHT_STRTAB;
1261 shdr->sh_link = SHN_UNDEF;
1262 shdr->sh_info = SHN_UNDEF;
1263 shdr->sh_addralign = 1;
1264 shdr->sh_entsize = 0;
1265 shdr->sh_name = addString(shStrTabScn, strTabName);
1267 // add all the AIRBIN-specific section names up front and index them
1268 for (uint8_t secIdx = SEC_IDX_SSMAST; secIdx < SEC_IDX_MAX; secIdx++)
1269 secNameOffset[secIdx] = addString(shStrTabScn, secNameStr[secIdx]);
1270 secNameOffset[SEC_IDX_NULL] = 0;
1272 // We have to store the section strtab index in the ELF header so sections
1273 // have actual names.
1274 int ndx = elf_ndxscn(shStrTabScn);
1275 ehdr->e_shstrndx = ndx;
1277 if (!gelf_update_ehdr(outElf, ehdr))
1278 llvm::report_fatal_error(llvm::Twine("cannot update ELF header:
") +
1281 // Finished new shstrtab section, update the header.
1282 if (!gelf_update_shdr(shStrTabScn, shdr))
1283 llvm::report_fatal_error(
1284 llvm::Twine("cannot update
new shstrtab section header:
") +
1287 // output the rest of the sections
1288 for (const Section *section : sections) {
1289 uint64_t addr = section->getAddr();
1290 Elf_Scn *scn = elf_newscn(outElf);
1292 llvm::report_fatal_error(llvm::Twine("cannot create
new ") +
1293 secNameStr[secAddr2Index(addr)] +
1294 "section:
" + elf_errmsg(-1));
1296 shdr = gelf_getshdr(scn, &shdrMem);
1298 llvm::report_fatal_error(llvm::Twine("cannot
get header
for ") +
1299 secNameStr[secAddr2Index(addr)] +
1300 "section:
" + elf_errmsg(-1));
1302 Elf_Data *data = sectionAddData(scn, section);
1304 shdr->sh_type = SHT_PROGBITS;
1305 shdr->sh_flags = SHF_ALLOC;
1306 shdr->sh_addr = section->getAddr();
1307 shdr->sh_link = SHN_UNDEF;
1308 shdr->sh_info = SHN_UNDEF;
1309 shdr->sh_addralign = 1;
1310 shdr->sh_entsize = 0;
1311 shdr->sh_size = data->d_size;
1312 shdr->sh_name = secNameOffset[secAddr2Index(addr)];
1314 if (!gelf_update_shdr(scn, shdr))
1315 llvm::report_fatal_error(llvm::Twine("cannot update section header:
") +
1319 if (elf_update(outElf, ELF_C_WRITE) < 0)
1320 llvm::report_fatal_error(llvm::Twine("failure in elf_update:
") +
1181mlir::LogicalResult AIETranslateToAirbin(mlir::ModuleOp module, {
…}
1327} // namespace xilinx::AIE
Address(TileAddress tile, uint64_t offset)
TileAddress destTile() const
uint32_t getOffset() const
constexpr uint32_t operator()(uint32_t value) const
static constexpr auto SHIFTED_MASK
static constexpr auto UNSHIFTED_MASK
static constexpr auto NUM_BITS_USED
const uint32_t * getData() const
void addData(uint32_t value)
void clearRange(uint32_t rangeStart, uint32_t length)
uint64_t fullAddress(uint64_t registerOffset) const
TileAddress(uint8_t column, uint8_t row, uint64_t arrayOffset=0x000u)
const V & get(const std::map< K, V > &pa, K k)
Include the generated interface declarations.
MERegDMABD[ME_DMA_BD_COUNT] DMABDRegBlock
uint32_t[SHIM_SS_SLAVE_CFG_COUNT] ShimSSSlaveCfgBlock
uint32_t[SHIM_SS_MASTER_COUNT] ShimSSMasterBlock
uint32_t[ME_SS_MASTER_COUNT] MESSMasterBlock
RegDMAS2MM[DMA_S2MM_CHANNEL_COUNT] DMAS2MMRegBlock
std::pair< uint64_t, uint32_t > Write
RegDMAMM2S[DMA_MM2S_CHANNEL_COUNT] DMAMM2SRegBlock
ShimDMABD[SHIM_DMA_BD_COUNT] ShimDMABDBlock
uint32_t[ME_SS_SLAVE_SLOT_COUNT][SS_SLOT_NUM_PORTS] MESSSlaveSlotBlock
uint32_t[ME_SS_SLAVE_CFG_COUNT] MESSSlaveCfgBlock
uint32_t[SHIM_SS_SLAVE_SLOT_COUNT] ShimSSSlaveSlotBlock