25#define SYSFS_PATH_MAX 63
28hsa_status_t mlir_aie_packet_req_translation(hsa_agent_dispatch_packet_t *pkt,
35 pkt->header = (HSA_PACKET_TYPE_AGENT_DISPATCH << HSA_PACKET_HEADER_TYPE);
37 return HSA_STATUS_SUCCESS;
40hsa_status_t mlir_aie_packet_nd_memcpy(
41 hsa_agent_dispatch_packet_t *pkt, uint16_t herd_id, uint8_t col,
42 uint8_t direction, uint8_t channel, uint8_t burst_len, uint8_t memory_space,
43 uint64_t phys_addr, uint32_t transfer_length1d, uint32_t transfer_length2d,
44 uint32_t transfer_stride2d, uint32_t transfer_length3d,
45 uint32_t transfer_stride3d, uint32_t transfer_length4d,
46 uint32_t transfer_stride4d) {
49 pkt->arg[0] |= ((uint64_t)memory_space) << 16;
50 pkt->arg[0] |= ((uint64_t)channel) << 24;
51 pkt->arg[0] |= ((uint64_t)col) << 32;
52 pkt->arg[0] |= ((uint64_t)burst_len) << 52;
53 pkt->arg[0] |= ((uint64_t)direction) << 60;
55 pkt->arg[1] = phys_addr;
56 pkt->arg[2] = transfer_length1d;
57 pkt->arg[2] |= ((uint64_t)transfer_length2d) << 32;
58 pkt->arg[2] |= ((uint64_t)transfer_stride2d) << 48;
59 pkt->arg[3] = transfer_length3d;
60 pkt->arg[3] |= ((uint64_t)transfer_stride3d) << 16;
61 pkt->arg[3] |= ((uint64_t)transfer_length4d) << 32;
62 pkt->arg[3] |= ((uint64_t)transfer_stride4d) << 48;
65 pkt->header = (HSA_PACKET_TYPE_AGENT_DISPATCH << HSA_PACKET_HEADER_TYPE);
67 return HSA_STATUS_SUCCESS;
70hsa_status_t get_aie_agents(hsa_agent_t agent,
void *data) {
71 hsa_status_t status(HSA_STATUS_SUCCESS);
72 hsa_device_type_t device_type;
73 std::vector<hsa_agent_t> *aie_agents(
nullptr);
76 status = HSA_STATUS_ERROR_INVALID_ARGUMENT;
80 aie_agents =
static_cast<std::vector<hsa_agent_t> *
>(data);
81 status = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &device_type);
83 if (status != HSA_STATUS_SUCCESS) {
84 printf(
"%s [ERROR] We got a status of 0x%x from hsa_agent_get_info\n",
89 if (device_type == HSA_DEVICE_TYPE_AIE) {
90 aie_agents->push_back(agent);
96hsa_status_t get_global_mem_pool(hsa_amd_memory_pool_t pool,
void *data) {
97 hsa_status_t status(HSA_STATUS_SUCCESS);
98 hsa_region_segment_t segment_type;
99 status = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT,
101 if (segment_type == HSA_REGION_SEGMENT_GLOBAL) {
102 *
reinterpret_cast<hsa_amd_memory_pool_t *
>(data) = pool;
108hsa_status_t mlir_aie_queue_dispatch_and_wait(
109 hsa_agent_t *agent, hsa_queue_t *q, uint64_t packet_id, uint64_t doorbell,
110 hsa_agent_dispatch_packet_t *pkt,
bool destroy_signal) {
114 hsa_amd_signal_create_on_agent(1, 0,
nullptr, agent, 0,
115 &(pkt->completion_signal));
118 mlir_aie_write_pkt<hsa_agent_dispatch_packet_t>(q, packet_id, pkt);
121 hsa_signal_store_screlease(q->doorbell_signal, doorbell);
124 while (hsa_signal_wait_scacquire(pkt->completion_signal,
125 HSA_SIGNAL_CONDITION_EQ, 0, 0x80000,
126 HSA_WAIT_STATE_ACTIVE) != 0)
130 if (destroy_signal) {
131 hsa_signal_destroy(pkt->completion_signal);
134 return HSA_STATUS_SUCCESS;
137hsa_status_t mlir_aie_packet_device_init(hsa_agent_dispatch_packet_t *pkt,
142 pkt->arg[0] |= ((uint64_t)num_cols << 40);
145 pkt->header = (HSA_PACKET_TYPE_AGENT_DISPATCH << HSA_PACKET_HEADER_TYPE);
147 return HSA_STATUS_SUCCESS;
156 printf(
"Failed to finish tiles.\n");
160 if (ctx->cmd_queue != NULL) {
161 hsa_queue_destroy(ctx->cmd_queue);
179 printf(
"[ERROR] %s: Passed context of NULL\n", __func__);
184 hsa_status_t hsa_ret = hsa_init();
185 if (hsa_ret != HSA_STATUS_SUCCESS) {
186 printf(
"hsa_init failed\n");
191 hsa_status_t iterate_agents_ret = hsa_iterate_agents(
192 &get_aie_agents,
reinterpret_cast<void *
>(&(ctx->agents)));
193 if (iterate_agents_ret != HSA_STATUS_SUCCESS) {
194 printf(
"iterate_agents failed with opcode 0x%x\n", iterate_agents_ret);
199 if (ctx->agents.empty()) {
200 printf(
"No agents found. Exiting.\n");
205 hsa_amd_agent_iterate_memory_pools(
206 ctx->agents.front(), get_global_mem_pool,
207 reinterpret_cast<void *
>(&(ctx->global_mem_pool)));
210 hsa_queue_t *q =
nullptr;
211 int aie_max_queue_size = 0;
212 hsa_agent_get_info(ctx->agents[0], HSA_AGENT_INFO_QUEUE_MAX_SIZE,
213 &aie_max_queue_size);
215 auto queue_create_status =
216 hsa_queue_create(ctx->agents[0], aie_max_queue_size,
217 HSA_QUEUE_TYPE_SINGLE,
nullptr,
nullptr, 0, 0, &q);
219 if (queue_create_status != HSA_STATUS_SUCCESS) {
220 printf(
"Failed to create queue. Exiting\n");
225 uint64_t wr_idx = hsa_queue_add_write_index_relaxed(q, 1);
226 uint64_t packet_id = wr_idx % q->size;
227 hsa_agent_dispatch_packet_t shim_pkt;
228 mlir_aie_packet_device_init(&shim_pkt, 50);
229 mlir_aie_queue_dispatch_and_wait(&(ctx->agents[0]), q, packet_id, wr_idx,
237 if (snprintf(sysfs_path,
SYSFS_PATH_MAX,
"/sys/class/amdair/amdair/%02u",
242 XAie_BackendType backend;
243 ctx->
XAieConfig->Backend = XAIE_IO_BACKEND_AMDAIR;
244 backend = XAIE_IO_BACKEND_AMDAIR;
252 printf(
"Driver initialization failed.\n");
258 const XAie_Backend *Backend = ctx->
XAieDevInst->Backend;
259 if (Backend->Type != XAIE_IO_BACKEND_SIM) {
260 RC = XAie_PmRequestTiles(ctx->
XAieDevInst, NULL, 0);
262 printf(
"Failed to request tiles.\n");
269 printf(
"Failed to finish tiles.\n");
281 printf(
"Driver initialization failed.\n");
284 RC = XAie_PmRequestTiles(ctx->
XAieDevInst, NULL, 0);
286 printf(
"Failed to request tiles.\n");
291 if (Backend->Type == XAIE_IO_BACKEND_SIM) {
292 printf(
"Turning ecc off\n");
308 int lockval,
int timeout) {
309 return (XAie_LockAcquire(ctx->
XAieDevInst, XAie_TileLoc(col, row),
310 XAie_LockInit(lockid, lockval), timeout) == XAIE_OK);
322 int lockval,
int timeout) {
323 return (XAie_LockRelease(ctx->
XAieDevInst, XAie_TileLoc(col, row),
324 XAie_LockInit(lockid, lockval), timeout) == XAIE_OK);
350 XAie_DataMemRdWord(ctx->
XAieDevInst, XAie_TileLoc(col, row), addr, &data);
361 u64 addr, u32 data) {
362 XAie_DataMemWrWord(ctx->
XAieDevInst, XAie_TileLoc(col, row), addr, data);
369 return (((u64)row & 0xFFU) << ctx->
XAieDevInst->DevProp.RowShift) |
370 (((u64)col & 0xFFU) << ctx->
XAieDevInst->DevProp.ColShift);
376 for (
int i = 0; i < 0x2000; i++) {
378 AieRC rc = XAie_DataMemRdWord(ctx->
XAieDevInst, XAie_TileLoc(col, row),
380 if (rc == XAIE_OK && d != 0)
381 printf(
"Tile[%d][%d]: mem[%d] = %d\n", col, row, i, d);
388 for (
int i = 0; i < 0x2000; i++) {
389 XAie_DataMemWrWord(ctx->
XAieDevInst, XAie_TileLoc(col, row), (i * 4), 0);
394 int row,
const char *dmatype,
395 const char *channel,
int channelNum,
396 int running,
int stalled) {
397 printf(
"%s [%d, %d] AIE1 %s%d ", dmatype, col, row, channel, channelNum);
410 printf(
"Stalled on lock");
416 int row,
const char *dmatype,
417 const char *channel,
int channelNum,
418 u32 statusOffset, u32 controlOffset,
422 XAie_Read32(ctx->
XAieDevInst, tileAddr + statusOffset, &status);
423 XAie_Read32(ctx->
XAieDevInst, tileAddr + controlOffset, &control);
424 u32 running = status & 0x3;
425 u32 stalled_acq = (status >> 2) & 0x1;
426 u32 stalled_rel = (status >> 3) & 0x1;
427 u32 stalled_data = (status >> 4) & 0x1;
428 u32 stalled_complete = (status >> 5) & 0x1;
429 current_bd = status >> 24;
430 printf(
"%s [%d, %d] AIE2 %s%d ", dmatype, col, row, channel, channelNum);
443 printf(
"Stalled on Acquire ");
445 printf(
"Stalled on Release ");
447 printf(
"Stalled on Data ");
448 if (stalled_complete)
449 printf(
"Stalled on Completion ");
450 printf(
"status:%08X ctrl:%02X\n", status, control);
453static void print_bd(
int bd,
int bd_valid, u32 nextBd, u32 useNextBd,
454 int isPacket, u32 packetID, u32 packetType,
455 int words_to_transfer,
int base_address,
456 int acquireEnabled, u32 acquireLock,
int acquireValue,
457 int releaseEnabled, u32 releaseLock,
int releaseValue,
458 int s2mm_current_bd[],
int mm2s_current_bd[],
462 printf(
"BD %d valid ", bd);
464 printf(
"(Next BD: %d)\n", nextBd);
466 printf(
"(Last BD)\n");
468 for (
int i = 0; i < numchannels; i++) {
469 if (bd == s2mm_current_bd[i]) {
470 printf(
" * Current BD for s2mm channel %d\n", i);
472 if (bd == mm2s_current_bd[i]) {
473 printf(
" * Current BD for mm2s channel %d\n", i);
478 printf(
" Packet ID: %02X\n", packetID);
479 printf(
" Packet Type: %01X\n", packetType);
481 printf(
" Transferring %d 32 bit words to/from byte address %06X\n",
482 words_to_transfer, base_address * 4);
492 if (acquireEnabled) {
493 printf(
" Acquires lock %d ", acquireLock);
494 printf(
"with value %d\n", acquireValue);
496 if (releaseEnabled) {
497 printf(
" Releases lock %d ", releaseLock);
498 printf(
"with value %d\n", releaseValue);
519 auto TileType = ctx->
XAieDevInst->DevOps->GetTTypefromLoc(
521 assert(TileType == XAIEGBL_TILE_TYPE_AIETILE);
523 if (ctx->
XAieConfig->AieGen == XAIE_DEV_GEN_AIEML) {
524 const int num_bds = 2;
525 int s2mm_current_bd[num_bds];
526 int mm2s_current_bd[num_bds];
528 for (
int i = 0; i < num_bds; i++) {
529 print_aie2_dmachannel_status(ctx, col, row,
"DMA",
"s2mm", i,
530 0x0001DF00 + 4 * i, 0x0001DE00 + 8 * i,
533 XAie_Read32(ctx->
XAieDevInst, tileAddr + 0x0001D230 + (0x4 * i),
535 printf(
"DMA [%d, %d] s2mm%d write_count = %d\n", col, row, i,
538 for (
int i = 0; i < num_bds; i++)
539 print_aie2_dmachannel_status(ctx, col, row,
"DMA",
"mm2s", i,
540 0x0001DF10 + 4 * i, 0x0001DE10 + 8 * i,
543 for (
int bd = 0; bd < 8; bd++) {
547 XAie_Read32(ctx->
XAieDevInst, tileAddr + 0x0001D000 + (0x20 * bd),
549 XAie_Read32(ctx->
XAieDevInst, tileAddr + 0x0001D004 + (0x20 * bd),
551 XAie_Read32(ctx->
XAieDevInst, tileAddr + 0x0001D014 + (0x20 * bd),
554 int bd_valid = (dma_bd_control >> 25) & 0x1;
555 u32 nextBd = ((dma_bd_control >> 27) & 0xF);
556 u32 useNextBd = ((dma_bd_control >> 26) & 0x1);
557 int isPacket = (dma_bd_packet >> 30) & 0x1;
558 u32 packetID = (dma_bd_packet >> 19) & 0x1F;
559 u32 packetType = (dma_bd_packet >> 16) & 0x7;
560 int words_to_transfer = (dma_bd_addr & 0x3FFF);
561 int base_address = dma_bd_addr >> 14;
562 int acquireEnabled = (dma_bd_control >> 12) & 0x1;
563 u32 acquireLock = dma_bd_control & 0xf;
564 int acquireValue = (((int)dma_bd_control << 20) >> 25);
565 u32 releaseLock = (dma_bd_control >> 13) & 0xf;
566 int releaseValue = (((int)dma_bd_control << 7) >> 25);
567 int releaseEnabled = releaseValue != 0;
569 print_bd(bd, bd_valid, nextBd, useNextBd, isPacket, packetID, packetType,
570 words_to_transfer, base_address, acquireEnabled, acquireLock,
571 acquireValue, releaseEnabled, releaseLock, releaseValue,
572 s2mm_current_bd, mm2s_current_bd, num_bds);
576 XAie_Read32(ctx->
XAieDevInst, tileAddr + 0x0001DF10, &dma_mm2s_status);
578 XAie_Read32(ctx->
XAieDevInst, tileAddr + 0x0001DF00, &dma_s2mm_status);
579 u32 dma_mm2s0_control;
580 XAie_Read32(ctx->
XAieDevInst, tileAddr + 0x0001DE10, &dma_mm2s0_control);
581 u32 dma_mm2s1_control;
582 XAie_Read32(ctx->
XAieDevInst, tileAddr + 0x0001DE18, &dma_mm2s1_control);
583 u32 dma_s2mm0_control;
584 XAie_Read32(ctx->
XAieDevInst, tileAddr + 0x0001DE00, &dma_s2mm0_control);
585 u32 dma_s2mm1_control;
586 XAie_Read32(ctx->
XAieDevInst, tileAddr + 0x0001DE08, &dma_s2mm1_control);
588 u32 s2mm_ch0_running = dma_s2mm_status & 0x3;
589 u32 s2mm_ch1_running = (dma_s2mm_status >> 2) & 0x3;
590 u32 mm2s_ch0_running = dma_mm2s_status & 0x3;
591 u32 mm2s_ch1_running = (dma_mm2s_status >> 2) & 0x3;
592 int s2mm0_current_bd, s2mm1_current_bd;
593 int mm2s0_current_bd, mm2s1_current_bd;
594 s2mm0_current_bd = (dma_s2mm_status >> 16) & 0xf;
595 s2mm1_current_bd = (dma_s2mm_status >> 20) & 0xf;
596 mm2s0_current_bd = (dma_mm2s_status >> 16) & 0xf;
597 mm2s1_current_bd = (dma_mm2s_status >> 20) & 0xf;
598 u32 s2mm_ch0_stalled = (dma_s2mm_status >> 4) & 0x1;
599 u32 s2mm_ch1_stalled = (dma_s2mm_status >> 5) & 0x1;
600 u32 mm2s_ch0_stalled = (dma_mm2s_status >> 4) & 0x1;
601 u32 mm2s_ch1_stalled = (dma_mm2s_status >> 5) & 0x1;
603 printf(
"DMA [%d, %d] mm2s_status/0ctrl/1ctrl is %08X %02X %02X, "
604 "s2mm_status/0ctrl/1ctrl is %08X %02X %02X\n",
605 col, row, dma_mm2s_status, dma_mm2s0_control, dma_mm2s1_control,
606 dma_s2mm_status, dma_s2mm0_control, dma_s2mm1_control);
607 print_aie1_dmachannel_status(ctx, col, row,
"DMA",
"s2mm", 0,
608 s2mm_ch0_running, s2mm_ch0_stalled);
609 print_aie1_dmachannel_status(ctx, col, row,
"DMA",
"s2mm", 1,
610 s2mm_ch1_running, s2mm_ch1_stalled);
611 print_aie1_dmachannel_status(ctx, col, row,
"DMA",
"mm2s", 0,
612 mm2s_ch0_running, mm2s_ch0_stalled);
613 print_aie1_dmachannel_status(ctx, col, row,
"DMA",
"mm2s", 1,
614 mm2s_ch1_running, mm2s_ch1_stalled);
615 for (
int bd = 0; bd < 8; bd++) {
617 XAie_Read32(ctx->
XAieDevInst, tileAddr + 0x0001D000 + (0x20 * bd),
620 XAie_Read32(ctx->
XAieDevInst, tileAddr + 0x0001D018 + (0x20 * bd),
624 if ((dma_bd_control >> 31) & 0x1 && (dma_bd_control != 0xFFFFFFFF)) {
625 printf(
"BD %d valid ", bd);
626 u32 nextBd = ((dma_bd_control >> 13) & 0xF);
627 u32 useNextBd = ((dma_bd_control >> 17) & 0x1);
629 printf(
"(Next BD: %d)\n", nextBd);
631 printf(
"(Last BD)\n");
633 if (bd == s2mm0_current_bd) {
634 printf(
" * Current BD for s2mm channel 0\n");
636 if (bd == s2mm1_current_bd) {
637 printf(
" * Current BD for s2mm channel 1\n");
639 if (bd == mm2s0_current_bd) {
640 printf(
" * Current BD for mm2s channel 0\n");
642 if (bd == mm2s1_current_bd) {
643 printf(
" * Current BD for mm2s channel 1\n");
646 if (dma_bd_control & 0x08000000) {
648 XAie_Read32(ctx->
XAieDevInst, tileAddr + 0x0001D010 + (0x20 * bd),
650 printf(
" Packet mode: %02X\n", dma_packet & 0x1F);
652 int words_to_transfer = 1 + (dma_bd_control & 0x1FFF);
653 int base_address = dma_bd_addr_a & 0x1FFF;
654 printf(
" Transferring %d 32 bit words to/from byte address %06X\n",
655 words_to_transfer, base_address * 4);
658 for (
int w = 0; w < 7; w++) {
660 XAie_DataMemRdWord(ctx->
XAieDevInst, XAie_TileLoc(col, row),
661 (base_address + w) * 4, &tmpd);
662 printf(
"%08X ", tmpd);
665 int hasAcquire = (dma_bd_addr_a >> 18) & 0x1;
666 int hasRelease = (dma_bd_addr_a >> 21) & 0x1;
667 if (hasAcquire || hasRelease) {
668 u32 lock_id = (dma_bd_addr_a >> 22) & 0xf;
670 printf(
" Acquires lock %d ", lock_id);
671 if ((dma_bd_addr_a >> 16) & 0x1)
672 printf(
"with value %d ", (dma_bd_addr_a >> 17) & 0x1);
675 printf(
" Releases lock %d ", lock_id);
676 if ((dma_bd_addr_a >> 19) & 0x1)
677 printf(
"with value %d ", (dma_bd_addr_a >> 20) & 0x1);
680 printf(
"currently ");
682 XAie_Read32(ctx->
XAieDevInst, tileAddr + 0x0001EF00, &locks);
683 u32 two_bits = (locks >> (lock_id * 2)) & 0x3;
685 u32 acquired = two_bits & 0x1;
686 u32 value = two_bits & 0x2;
689 printf(value ?
"1" :
"0");
695 if (dma_bd_control & 0x30000000) {
696 int FIFO = (dma_bd_control >> 28) & 0x3;
697 u32 dma_fifo_counter;
698 XAie_Read32(ctx->
XAieDevInst, tileAddr + 0x0001DF20,
700 printf(
" Using FIFO Cnt%d : %08X\n", FIFO, dma_fifo_counter);
708 const char *type,
int lockOffset,
int locks) {
710 printf(
"%s [%d, %d] AIE2 locks are: ", type, col, row);
711 int lockAddr = tileAddr + lockOffset;
712 for (
int lock = 0; lock < locks; lock++) {
725 auto TileType = ctx->
XAieDevInst->DevOps->GetTTypefromLoc(
727 assert(TileType == XAIEGBL_TILE_TYPE_MEMTILE);
728 assert(ctx->
XAieConfig->AieGen == XAIE_DEV_GEN_AIEML);
730 int s2mm_current_bd[6];
731 int mm2s_current_bd[6];
733 for (
int i = 0; i < 6; i++) {
734 print_aie2_dmachannel_status(ctx, col, row,
"MemTileDMA",
"s2mm", i,
735 0x000A0660 + 4 * i, 0x000A0600 + 8 * i,
738 XAie_Read32(ctx->
XAieDevInst, tileAddr + 0x000A06B0 + (0x4 * i),
740 printf(
"MemTileDMA [%d, %d] s2mm%d write_count = %d\n", col, row, i,
743 for (
int i = 0; i < 6; i++)
744 print_aie2_dmachannel_status(ctx, col, row,
"MemTileDMA",
"mm2s", i,
745 0x000A0680 + 4 * i, 0x000A0630 + 8 * i,
750 for (
int bd = 0; bd < 8; bd++) {
754 XAie_Read32(ctx->
XAieDevInst, tileAddr + 0x000A0000 + (0x20 * bd),
756 XAie_Read32(ctx->
XAieDevInst, tileAddr + 0x000A0004 + (0x20 * bd),
758 XAie_Read32(ctx->
XAieDevInst, tileAddr + 0x000A001C + (0x20 * bd),
761 int bd_valid = (dma_bd_7 >> 31) & 0x1;
762 u32 nextBd = ((dma_bd_1 >> 20) & 0xF);
763 u32 useNextBd = ((dma_bd_1 >> 19) & 0x1);
764 int isPacket = (dma_bd_0 >> 31) & 0x1;
765 u32 packetID = (dma_bd_0 >> 23) & 0x1F;
766 u32 packetType = (dma_bd_0 >> 28) & 0x7;
767 int words_to_transfer = (dma_bd_0 & 0x1FFFF);
768 int base_address = dma_bd_1 & 0x7FFFF;
769 int acquireEnabled = (dma_bd_7 >> 12) & 0x1;
770 u32 acquireLock = dma_bd_7 & 0xff;
771 int acquireValue = (((int)dma_bd_7 << 17) >> 25);
772 u32 releaseLock = (dma_bd_7 >> 16) & 0xff;
773 int releaseValue = (((int)dma_bd_7 << 1) >> 25);
774 int releaseEnabled = releaseValue != 0;
776 print_bd(bd, bd_valid, nextBd, useNextBd, isPacket, packetID, packetType,
777 words_to_transfer, base_address, acquireEnabled, acquireLock,
778 acquireValue, releaseEnabled, releaseLock, releaseValue,
779 s2mm_current_bd, mm2s_current_bd, 6);
788 auto TileType = ctx->
XAieDevInst->DevOps->GetTTypefromLoc(
790 assert(TileType == XAIEGBL_TILE_TYPE_SHIMNOC);
792 const int num_bds = 2;
793 int s2mm_current_bd[num_bds];
794 int mm2s_current_bd[num_bds];
795 if (ctx->
XAieConfig->AieGen == XAIE_DEV_GEN_AIEML) {
796 for (
int i = 0; i < num_bds; i++) {
797 print_aie2_dmachannel_status(ctx, col, row,
"ShimDMA",
"s2mm", i,
798 0x0001D220 + 4 * i, 0x0001D200 + 8 * i,
801 XAie_Read32(ctx->
XAieDevInst, tileAddr + 0x0001D230 + (0x4 * i),
803 printf(
"ShimDMA [%d, %d] s2mm%d write_count = %d\n", col, row, i,
806 for (
int i = 0; i < num_bds; i++)
807 print_aie2_dmachannel_status(ctx, col, row,
"ShimDMA",
"mm2s", i,
808 0x0001D228 + 4 * i, 0x0001D210 + 8 * i,
811 u32 dma_mm2s_status, dma_s2mm_status;
812 u32 dma_mm2s0_control, dma_mm2s1_control;
813 u32 dma_s2mm0_control, dma_s2mm1_control;
814 XAie_Read32(ctx->
XAieDevInst, tileAddr + 0x0001D164, &dma_mm2s_status);
815 XAie_Read32(ctx->
XAieDevInst, tileAddr + 0x0001D160, &dma_s2mm_status);
816 XAie_Read32(ctx->
XAieDevInst, tileAddr + 0x0001D150, &dma_mm2s0_control);
817 XAie_Read32(ctx->
XAieDevInst, tileAddr + 0x0001D158, &dma_mm2s1_control);
818 XAie_Read32(ctx->
XAieDevInst, tileAddr + 0x0001D140, &dma_s2mm0_control);
819 XAie_Read32(ctx->
XAieDevInst, tileAddr + 0x0001D148, &dma_s2mm1_control);
821 u32 s2mm_ch0_running = dma_s2mm_status & 0x3;
822 u32 s2mm_ch1_running = (dma_s2mm_status >> 2) & 0x3;
823 u32 mm2s_ch0_running = dma_mm2s_status & 0x3;
824 u32 mm2s_ch1_running = (dma_mm2s_status >> 2) & 0x3;
825 s2mm_current_bd[0] = (dma_s2mm_status >> 16) & 0xf;
826 s2mm_current_bd[1] = (dma_s2mm_status >> 20) & 0xf;
827 mm2s_current_bd[0] = (dma_mm2s_status >> 16) & 0xf;
828 mm2s_current_bd[1] = (dma_mm2s_status >> 20) & 0xf;
829 u32 s2mm_ch0_stalled = (dma_s2mm_status >> 4) & 0x1;
830 u32 s2mm_ch1_stalled = (dma_s2mm_status >> 5) & 0x1;
831 u32 mm2s_ch0_stalled = (dma_mm2s_status >> 4) & 0x1;
832 u32 mm2s_ch1_stalled = (dma_mm2s_status >> 5) & 0x1;
834 printf(
"ShimDMA [%d, %d] AIE1 mm2s_status/0ctrl/1ctrl is %08X %02X %02X, "
835 "s2mm_status/0ctrl/1ctrl is %08X %02X %02X\n",
836 col, row, dma_mm2s_status, dma_mm2s0_control, dma_mm2s1_control,
837 dma_s2mm_status, dma_s2mm0_control, dma_s2mm1_control);
838 print_aie1_dmachannel_status(ctx, col, row,
"ShimDMA",
"s2mm", 0,
839 s2mm_ch0_running, s2mm_ch0_stalled);
840 print_aie1_dmachannel_status(ctx, col, row,
"ShimDMA",
"s2mm", 1,
841 s2mm_ch1_running, s2mm_ch1_stalled);
842 print_aie1_dmachannel_status(ctx, col, row,
"ShimDMA",
"mm2s", 0,
843 mm2s_ch0_running, mm2s_ch0_stalled);
844 print_aie1_dmachannel_status(ctx, col, row,
"ShimDMA",
"mm2s", 1,
845 mm2s_ch1_running, mm2s_ch1_stalled);
849 if (ctx->
XAieConfig->AieGen == XAIE_DEV_GEN_AIEML) {
851 int overflowAddr = tileAddr + 0x00014120;
852 int underflowAddr = tileAddr + 0x00014128;
853 u32 overflow, underflow;
856 printf(
" overflow?:%x underflow?:%x\n", overflow, underflow);
858 XAie_Read32(ctx->
XAieDevInst, tileAddr + 0x00014F00, &locks);
859 printf(
"ShimDMA [%d, %d] AIE1 locks are %08X\n", col, row, locks);
860 for (
int lock = 0; lock < 16; lock++) {
861 u32 two_bits = (locks >> (lock * 2)) & 0x3;
863 printf(
"Lock %d: ", lock);
864 u32 acquired = two_bits & 0x1;
865 u32 value = two_bits & 0x2;
868 printf(value ?
"1" :
"0");
874 for (
int bd = 0; bd < 8; bd++) {
875 int words_to_transfer;
880 int acquire_lockID, release_lockID;
881 int enable_lock_release;
882 int lock_release_val;
884 int enable_lock_acquire;
885 int lock_acquire_val;
888 if (ctx->
XAieConfig->AieGen == XAIE_DEV_GEN_AIEML) {
890 u32 dma_bd_buffer_length;
893 XAie_Read32(ctx->
XAieDevInst, tileAddr + 0x0001D000 + (0x20 * bd),
894 &dma_bd_buffer_length);
895 XAie_Read32(ctx->
XAieDevInst, tileAddr + 0x0001D004 + (0x20 * bd),
897 XAie_Read32(ctx->
XAieDevInst, tileAddr + 0x0001D008 + (0x20 * bd),
899 XAie_Read32(ctx->
XAieDevInst, tileAddr + 0x0001D01C + (0x20 * bd),
903 words_to_transfer = dma_bd_buffer_length;
905 u64(dma_bd_addr_low & 0xFFFC) + (u64(dma_bd_2 & 0xFF) << 32);
906 bd_valid = (dma_bd_7 >> 25) & 0x1;
907 use_next_bd = ((dma_bd_7 >> 26) & 0x1);
908 next_bd = ((dma_bd_7 >> 27) & 0xF);
909 acquire_lockID = ((dma_bd_7 >> 0) & 0xF);
910 release_lockID = ((dma_bd_7 >> 13) & 0xF);
911 lock_release_val = (s32(dma_bd_7) << 7) >> 25;
912 enable_lock_release = lock_release_val != 0;
914 lock_acquire_val = (s32(dma_bd_7) << 20) >> 25;
915 enable_lock_acquire = ((dma_bd_7 >> 12) & 0x1);
919 u32 dma_bd_buffer_length;
921 XAie_Read32(ctx->
XAieDevInst, tileAddr + 0x0001D000 + (0x14 * bd),
923 XAie_Read32(ctx->
XAieDevInst, tileAddr + 0x0001D004 + (0x14 * bd),
924 &dma_bd_buffer_length);
925 XAie_Read32(ctx->
XAieDevInst, tileAddr + 0x0001D008 + (0x14 * bd),
927 words_to_transfer = dma_bd_buffer_length;
929 (u64)dma_bd_addr_a + ((u64)((dma_bd_control >> 16) & 0xFFFF) << 32);
930 bd_valid = dma_bd_control & 0x1;
931 use_next_bd = ((dma_bd_control >> 15) & 0x1);
932 next_bd = ((dma_bd_control >> 11) & 0xF);
933 release_lockID = acquire_lockID = ((dma_bd_control >> 7) & 0xF);
934 enable_lock_release = ((dma_bd_control >> 6) & 0x1);
935 lock_release_val = ((dma_bd_control >> 5) & 0x1);
936 use_release_val = ((dma_bd_control >> 4) & 0x1);
937 enable_lock_acquire = ((dma_bd_control >> 3) & 0x1);
938 lock_acquire_val = ((dma_bd_control >> 2) & 0x1);
939 use_acquire_val = ((dma_bd_control >> 1) & 0x1);
941 bool isPacket =
false;
944 print_bd(bd, bd_valid, next_bd, use_next_bd, isPacket, packetID, packetType,
945 words_to_transfer, base_address, enable_lock_acquire,
946 acquire_lockID, lock_acquire_val, enable_lock_release,
947 release_lockID, lock_release_val, s2mm_current_bd, mm2s_current_bd,
958 u32 status, coreTimerLow, PC, LR, SP, locks, R0, R4;
960 if (ctx->
XAieConfig->AieGen == XAIE_DEV_GEN_AIEML) {
961 XAie_Read32(ctx->
XAieDevInst, tileAddr + 0x032004, &status);
962 XAie_Read32(ctx->
XAieDevInst, tileAddr + 0x0340F8, &coreTimerLow);
963 XAie_Read32(ctx->
XAieDevInst, tileAddr + 0x00031100, &PC);
964 XAie_Read32(ctx->
XAieDevInst, tileAddr + 0x00031130, &LR);
965 XAie_Read32(ctx->
XAieDevInst, tileAddr + 0x00031120, &SP);
966 XAie_Read32(ctx->
XAieDevInst, tileAddr + 0x000340D8, &trace_status);
968 XAie_Read32(ctx->
XAieDevInst, tileAddr + 0x00030C00, &R0);
969 XAie_Read32(ctx->
XAieDevInst, tileAddr + 0x00030C40, &R4);
972 XAie_Read32(ctx->
XAieDevInst, tileAddr + 0x032004, &status);
973 XAie_Read32(ctx->
XAieDevInst, tileAddr + 0x0340F8, &coreTimerLow);
974 XAie_Read32(ctx->
XAieDevInst, tileAddr + 0x00030280, &PC);
975 XAie_Read32(ctx->
XAieDevInst, tileAddr + 0x000302B0, &LR);
976 XAie_Read32(ctx->
XAieDevInst, tileAddr + 0x000302A0, &SP);
977 XAie_Read32(ctx->
XAieDevInst, tileAddr + 0x000140D8, &trace_status);
979 XAie_Read32(ctx->
XAieDevInst, tileAddr + 0x00030000, &R0);
980 XAie_Read32(ctx->
XAieDevInst, tileAddr + 0x00030040, &R4);
982 printf(
"Core [%d, %d] status is %08X, timer is %u, PC is %08X"
983 ", LR is %08X, SP is %08X, R0 is %08X,R4 is %08X\n",
984 col, row, status, coreTimerLow, PC, LR, SP, R0, R4);
985 printf(
"Core [%d, %d] trace status is %08X\n", col, row, trace_status);
987 if (ctx->
XAieConfig->AieGen == XAIE_DEV_GEN_AIEML) {
990 XAie_Read32(ctx->
XAieDevInst, tileAddr + 0x0001EF00, &locks);
991 printf(
"Core [%d, %d] AIE1 locks are %08X\n", col, row, locks);
992 for (
int lock = 0; lock < 16; lock++) {
993 u32 two_bits = (locks >> (lock * 2)) & 0x3;
995 printf(
"Lock %d: ", lock);
996 u32 acquired = two_bits & 0x1;
997 u32 value = two_bits & 0x2;
1000 printf(value ?
"1" :
"0");
1007 const char *core_status_strings[] = {
1022 "Cascade Stall Slave",
1023 "Cascade Stall Master",
1029 "Core Processor Bus Stall",
1032 printf(
"Core Status: ");
1033 for (
int i = 0; i <= 21; i++) {
1034 if ((status >> i) & 0x1)
1035 printf(
"%s ", core_status_strings[i]);
1040static void clear_range(XAie_DevInst *devInst, u64 tileAddr, u64 low,
1042 for (
int i = low; i <= high; i += 4) {
1043 XAie_Write32(devInst, tileAddr + i, 0);
1061 XAie_CoreDisable(ctx->
XAieDevInst, XAie_TileLoc(col, row));
1064 clear_range(ctx->
XAieDevInst, tileAddr, 0x20000, 0x200FF);
1066 clear_range(ctx->
XAieDevInst, tileAddr, 0x1D000, 0x1D1F8);
1067 XAie_Write32(ctx->
XAieDevInst, tileAddr + 0x1DE00, 0);
1068 XAie_Write32(ctx->
XAieDevInst, tileAddr + 0x1DE08, 0);
1069 XAie_Write32(ctx->
XAieDevInst, tileAddr + 0x1DE10, 0);
1070 XAie_Write32(ctx->
XAieDevInst, tileAddr + 0x1DE08, 0);
1072 clear_range(ctx->
XAieDevInst, tileAddr, 0x3F000, 0x3F060);
1074 clear_range(ctx->
XAieDevInst, tileAddr, 0x3F100, 0x3F168);
1076 clear_range(ctx->
XAieDevInst, tileAddr, 0x3F200, 0x3F3AC);
1079 XAie_CoreEnable(ctx->
XAieDevInst, XAie_TileLoc(col, row));
1089 clear_range(ctx->
XAieDevInst, tileAddr, 0x1D000, 0x1D13C);
1090 XAie_Write32(ctx->
XAieDevInst, tileAddr + 0x1D140, 0);
1091 XAie_Write32(ctx->
XAieDevInst, tileAddr + 0x1D148, 0);
1092 XAie_Write32(ctx->
XAieDevInst, tileAddr + 0x1D150, 0);
1093 XAie_Write32(ctx->
XAieDevInst, tileAddr + 0x1D158, 0);
1096 clear_range(ctx->
XAieDevInst, tileAddr, 0x3F000, 0x3F058);
1098 clear_range(ctx->
XAieDevInst, tileAddr, 0x3F100, 0x3F15C);
1100 clear_range(ctx->
XAieDevInst, tileAddr, 0x3F200, 0x3F37C);
1116 for (
int i = 0; i < n; i++) {
1117 total_0 += performance_counter[i];
1120 float mean_0 = (float)total_0 / n;
1124 for (
int i = 0; i < n; i++) {
1125 float x = (float)performance_counter[i] - mean_0;
1129 sdev_0 = sqrtf(sdev_0 / n);
1131 printf(
"Mean and Standard Devation: %f, %f \n", mean_0, sdev_0);
@ AIR_PKT_TYPE_DEVICE_INITIALIZE
#define AIR_ADDRESS_ABSOLUTE_RANGE
XAie_DevInst * XAieDevInst
void mlir_aie_print_memtiledma_status(aie_libxaie_ctx_t *ctx, int col, int row)
Print a summary of the status of the given MemTile DMA.
int mlir_aie_acquire_lock(aie_libxaie_ctx_t *ctx, int col, int row, int lockid, int lockval, int timeout)
Acquire a physical lock.
u32 mlir_aie_data_mem_rd_word(aie_libxaie_ctx_t *ctx, int col, int row, u64 addr)
Read a value from the data memory of a particular tile memory.
void mlir_aie_clear_tile_memory(aie_libxaie_ctx_t *ctx, int col, int row)
Fill the tile memory of the given tile with zeros.
void print_aie2_lock_status(aie_libxaie_ctx_t *ctx, int col, int row, const char *type, int lockOffset, int locks)
u64 mlir_aie_get_tile_addr(aie_libxaie_ctx_t *ctx, int col, int row)
Return the base address of the given tile.
void mlir_aie_clear_shim_config(aie_libxaie_ctx_t *ctx, int col, int row)
Clear the configuration of the given shim tile.
void mlir_aie_clear_config(aie_libxaie_ctx_t *ctx, int col, int row)
Clear the configuration of the given (non-shim) tile.
void mlir_aie_print_dma_status(aie_libxaie_ctx_t *ctx, int col, int row)
Print a summary of the status of the given Tile DMA.
int mlir_aie_release_lock(aie_libxaie_ctx_t *ctx, int col, int row, int lockid, int lockval, int timeout)
Release a physical lock.
int mlir_aie_init_device(aie_libxaie_ctx_t *ctx, uint32_t device_id)
Initialize the device represented by the context.
void mlir_aie_print_shimdma_status(aie_libxaie_ctx_t *ctx, int col, int row)
Print a summary of the status of the given Shim DMA.
u32 mlir_aie_read32(aie_libxaie_ctx_t *ctx, u64 addr)
Read the AIE configuration memory at the given physical address.
void mlir_aie_dump_tile_memory(aie_libxaie_ctx_t *ctx, int col, int row)
Dump the tile memory of the given tile Values that are zero are not shown.
void computeStats(u32 performance_counter[], int n)
Given an array of values, compute and print statistics about those values.
void mlir_aie_write32(aie_libxaie_ctx_t *ctx, u64 addr, u32 val)
Write the AIE configuration memory at the given physical address.
void mlir_aie_data_mem_wr_word(aie_libxaie_ctx_t *ctx, int col, int row, u64 addr, u32 data)
Write a value to the data memory of a particular tile memory.
void mlir_aie_deinit_libxaie(aie_libxaie_ctx_t *ctx)
Release access to the libXAIE context.
void mlir_aie_print_tile_status(aie_libxaie_ctx_t *ctx, int col, int row)
Print the status of a core represented by the given tile, at the given coordinates.