25#define SYSFS_PATH_MAX 63
28hsa_status_t mlir_aie_packet_req_translation(hsa_agent_dispatch_packet_t *pkt,
35 pkt->header = (HSA_PACKET_TYPE_AGENT_DISPATCH << HSA_PACKET_HEADER_TYPE);
37 return HSA_STATUS_SUCCESS;
40hsa_status_t mlir_aie_packet_nd_memcpy(
41 hsa_agent_dispatch_packet_t *pkt, uint16_t herd_id, uint8_t col,
42 uint8_t direction, uint8_t channel, uint8_t burst_len, uint8_t memory_space,
43 uint64_t phys_addr, uint32_t transfer_length1d, uint32_t transfer_length2d,
44 uint32_t transfer_stride2d, uint32_t transfer_length3d,
45 uint32_t transfer_stride3d, uint32_t transfer_length4d,
46 uint32_t transfer_stride4d) {
49 pkt->arg[0] |= ((uint64_t)memory_space) << 16;
50 pkt->arg[0] |= ((uint64_t)channel) << 24;
51 pkt->arg[0] |= ((uint64_t)col) << 32;
52 pkt->arg[0] |= ((uint64_t)burst_len) << 52;
53 pkt->arg[0] |= ((uint64_t)direction) << 60;
55 pkt->arg[1] = phys_addr;
56 pkt->arg[2] = transfer_length1d;
57 pkt->arg[2] |= ((uint64_t)transfer_length2d) << 32;
58 pkt->arg[2] |= ((uint64_t)transfer_stride2d) << 48;
59 pkt->arg[3] = transfer_length3d;
60 pkt->arg[3] |= ((uint64_t)transfer_stride3d) << 16;
61 pkt->arg[3] |= ((uint64_t)transfer_length4d) << 32;
62 pkt->arg[3] |= ((uint64_t)transfer_stride4d) << 48;
65 pkt->header = (HSA_PACKET_TYPE_AGENT_DISPATCH << HSA_PACKET_HEADER_TYPE);
67 return HSA_STATUS_SUCCESS;
70hsa_status_t get_aie_agents(hsa_agent_t agent,
void *data) {
71 hsa_status_t status(HSA_STATUS_SUCCESS);
72 hsa_device_type_t device_type;
73 std::vector<hsa_agent_t> *aie_agents(
nullptr);
76 status = HSA_STATUS_ERROR_INVALID_ARGUMENT;
80 aie_agents =
static_cast<std::vector<hsa_agent_t> *
>(data);
81 status = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &device_type);
83 if (status != HSA_STATUS_SUCCESS) {
84 printf(
"%s [ERROR] We got a status of 0x%x from hsa_agent_get_info\n",
89 if (device_type == HSA_DEVICE_TYPE_AIE) {
90 aie_agents->push_back(agent);
96hsa_status_t get_global_mem_pool(hsa_amd_memory_pool_t pool,
void *data) {
97 hsa_status_t status(HSA_STATUS_SUCCESS);
98 hsa_region_segment_t segment_type;
99 status = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT,
101 if (segment_type == HSA_REGION_SEGMENT_GLOBAL) {
102 *
reinterpret_cast<hsa_amd_memory_pool_t *
>(data) = pool;
108hsa_status_t mlir_aie_queue_dispatch_and_wait(
109 hsa_agent_t *agent, hsa_queue_t *q, uint64_t packet_id, uint64_t doorbell,
110 hsa_agent_dispatch_packet_t *pkt,
bool destroy_signal) {
114 hsa_amd_signal_create_on_agent(1, 0,
nullptr, agent, 0,
115 &(pkt->completion_signal));
118 mlir_aie_write_pkt<hsa_agent_dispatch_packet_t>(q, packet_id, pkt);
121 hsa_signal_store_screlease(q->doorbell_signal, doorbell);
124 while (hsa_signal_wait_scacquire(pkt->completion_signal,
125 HSA_SIGNAL_CONDITION_EQ, 0, 0x80000,
126 HSA_WAIT_STATE_ACTIVE) != 0)
130 if (destroy_signal) {
131 hsa_signal_destroy(pkt->completion_signal);
134 return HSA_STATUS_SUCCESS;
137hsa_status_t mlir_aie_packet_device_init(hsa_agent_dispatch_packet_t *pkt,
142 pkt->arg[0] |= ((uint64_t)num_cols << 40);
145 pkt->header = (HSA_PACKET_TYPE_AGENT_DISPATCH << HSA_PACKET_HEADER_TYPE);
147 return HSA_STATUS_SUCCESS;
154 AieRC RC = XAie_Finish(&(ctx->
DevInst));
156 printf(
"Failed to finish tiles.\n");
160 if (ctx->cmd_queue != NULL) {
161 hsa_queue_destroy(ctx->cmd_queue);
176 printf(
"[ERROR] %s: Passed context of NULL\n", __func__);
181 hsa_status_t hsa_ret = hsa_init();
182 if (hsa_ret != HSA_STATUS_SUCCESS) {
183 printf(
"hsa_init failed\n");
188 hsa_status_t iterate_agents_ret = hsa_iterate_agents(
189 &get_aie_agents,
reinterpret_cast<void *
>(&(ctx->agents)));
190 if (iterate_agents_ret != HSA_STATUS_SUCCESS) {
191 printf(
"iterate_agents failed with opcode 0x%x\n", iterate_agents_ret);
196 if (ctx->agents.empty()) {
197 printf(
"No agents found. Exiting.\n");
202 hsa_amd_agent_iterate_memory_pools(
203 ctx->agents.front(), get_global_mem_pool,
204 reinterpret_cast<void *
>(&(ctx->global_mem_pool)));
207 hsa_queue_t *q =
nullptr;
208 int aie_max_queue_size = 0;
209 hsa_agent_get_info(ctx->agents[0], HSA_AGENT_INFO_QUEUE_MAX_SIZE,
210 &aie_max_queue_size);
212 auto queue_create_status =
213 hsa_queue_create(ctx->agents[0], aie_max_queue_size,
214 HSA_QUEUE_TYPE_SINGLE,
nullptr,
nullptr, 0, 0, &q);
216 if (queue_create_status != HSA_STATUS_SUCCESS) {
217 printf(
"Failed to create queue. Exiting\n");
222 uint64_t wr_idx = hsa_queue_add_write_index_relaxed(q, 1);
223 uint64_t packet_id = wr_idx % q->size;
224 hsa_agent_dispatch_packet_t shim_pkt;
225 mlir_aie_packet_device_init(&shim_pkt, 50);
226 mlir_aie_queue_dispatch_and_wait(&(ctx->agents[0]), q, packet_id, wr_idx,
234 if (snprintf(sysfs_path,
SYSFS_PATH_MAX,
"/sys/class/amdair/amdair/%02u",
239 XAie_BackendType backend;
241 backend = XAIE_IO_BACKEND_AMDAIR;
243 ctx->
DevInst.IOInst = (
void *)sysfs_path;
249 printf(
"Driver initialization failed.\n");
255 const XAie_Backend *Backend = ctx->
DevInst.Backend;
256 if (Backend->Type != XAIE_IO_BACKEND_SIM) {
257 RC = XAie_PmRequestTiles(&(ctx->
DevInst), NULL, 0);
259 printf(
"Failed to request tiles.\n");
264 RC = XAie_Finish(&(ctx->
DevInst));
266 printf(
"Failed to finish tiles.\n");
273 ctx->
DevInst.IOInst = (
void *)sysfs_path;
278 printf(
"Driver initialization failed.\n");
281 RC = XAie_PmRequestTiles(&(ctx->
DevInst), NULL, 0);
283 printf(
"Failed to request tiles.\n");
288 if (Backend->Type == XAIE_IO_BACKEND_SIM) {
289 printf(
"Turning ecc off\n");
290 XAie_TurnEccOff(&(ctx->
DevInst));
305 int lockval,
int timeout) {
306 return (XAie_LockAcquire(&(ctx->
DevInst), XAie_TileLoc(col, row),
307 XAie_LockInit(lockid, lockval), timeout) == XAIE_OK);
319 int lockval,
int timeout) {
320 return (XAie_LockRelease(&(ctx->
DevInst), XAie_TileLoc(col, row),
321 XAie_LockInit(lockid, lockval), timeout) == XAIE_OK);
327 XAie_Read32(&(ctx->
DevInst), addr, &val);
335 XAie_Write32(&(ctx->
DevInst), addr, val);
344 XAie_DataMemRdWord(&(ctx->
DevInst), XAie_TileLoc(col, row), addr, &data);
352 u64 addr, u32 data) {
353 XAie_DataMemWrWord(&(ctx->
DevInst), XAie_TileLoc(col, row), addr, data);
360 return (((u64)row & 0xFFU) << ctx->
DevInst.DevProp.RowShift) |
361 (((u64)col & 0xFFU) << ctx->
DevInst.DevProp.ColShift);
367 for (
int i = 0; i < 0x2000; i++) {
369 AieRC rc = XAie_DataMemRdWord(&(ctx->
DevInst), XAie_TileLoc(col, row),
371 if (rc == XAIE_OK && d != 0)
372 printf(
"Tile[%d][%d]: mem[%d] = %d\n", col, row, i, d);
379 for (
int i = 0; i < 0x2000; i++) {
380 XAie_DataMemWrWord(&(ctx->
DevInst), XAie_TileLoc(col, row), (i * 4), 0);
385 int row,
const char *dmatype,
386 const char *channel,
int channelNum,
387 int running,
int stalled) {
388 printf(
"%s [%d, %d] AIE1 %s%d ", dmatype, col, row, channel, channelNum);
401 printf(
"Stalled on lock");
407 int row,
const char *dmatype,
408 const char *channel,
int channelNum,
409 u32 statusOffset, u32 controlOffset,
413 XAie_Read32(&(ctx->
DevInst), tileAddr + statusOffset, &status);
414 XAie_Read32(&(ctx->
DevInst), tileAddr + controlOffset, &control);
415 u32 running = status & 0x3;
416 u32 stalled_acq = (status >> 2) & 0x1;
417 u32 stalled_rel = (status >> 3) & 0x1;
418 u32 stalled_data = (status >> 4) & 0x1;
419 u32 stalled_complete = (status >> 5) & 0x1;
420 current_bd = status >> 24;
421 printf(
"%s [%d, %d] AIE2 %s%d ", dmatype, col, row, channel, channelNum);
434 printf(
"Stalled on Acquire ");
436 printf(
"Stalled on Release ");
438 printf(
"Stalled on Data ");
439 if (stalled_complete)
440 printf(
"Stalled on Completion ");
441 printf(
"status:%08X ctrl:%02X\n", status, control);
444static void print_bd(
int bd,
int bd_valid, u32 nextBd, u32 useNextBd,
445 int isPacket, u32 packetID, u32 packetType,
446 int words_to_transfer,
int base_address,
447 int acquireEnabled, u32 acquireLock,
int acquireValue,
448 int releaseEnabled, u32 releaseLock,
int releaseValue,
449 int s2mm_current_bd[],
int mm2s_current_bd[],
453 printf(
"BD %d valid ", bd);
455 printf(
"(Next BD: %d)\n", nextBd);
457 printf(
"(Last BD)\n");
459 for (
int i = 0; i < numchannels; i++) {
460 if (bd == s2mm_current_bd[i]) {
461 printf(
" * Current BD for s2mm channel %d\n", i);
463 if (bd == mm2s_current_bd[i]) {
464 printf(
" * Current BD for mm2s channel %d\n", i);
469 printf(
" Packet ID: %02X\n", packetID);
470 printf(
" Packet Type: %01X\n", packetType);
472 printf(
" Transferring %d 32 bit words to/from byte address %06X\n",
473 words_to_transfer, base_address * 4);
483 if (acquireEnabled) {
484 printf(
" Acquires lock %d ", acquireLock);
485 printf(
"with value %d\n", acquireValue);
487 if (releaseEnabled) {
488 printf(
" Releases lock %d ", releaseLock);
489 printf(
"with value %d\n", releaseValue);
510 auto TileType = ctx->
DevInst.DevOps->GetTTypefromLoc(&(ctx->
DevInst),
511 XAie_TileLoc(col, row));
512 assert(TileType == XAIEGBL_TILE_TYPE_AIETILE);
515 const int num_bds = 2;
516 int s2mm_current_bd[num_bds];
517 int mm2s_current_bd[num_bds];
519 for (
int i = 0; i < num_bds; i++) {
520 print_aie2_dmachannel_status(ctx, col, row,
"DMA",
"s2mm", i,
521 0x0001DF00 + 4 * i, 0x0001DE00 + 8 * i,
524 XAie_Read32(&(ctx->
DevInst), tileAddr + 0x0001D230 + (0x4 * i),
526 printf(
"DMA [%d, %d] s2mm%d write_count = %d\n", col, row, i,
529 for (
int i = 0; i < num_bds; i++)
530 print_aie2_dmachannel_status(ctx, col, row,
"DMA",
"mm2s", i,
531 0x0001DF10 + 4 * i, 0x0001DE10 + 8 * i,
534 for (
int bd = 0; bd < 8; bd++) {
538 XAie_Read32(&(ctx->
DevInst), tileAddr + 0x0001D000 + (0x20 * bd),
540 XAie_Read32(&(ctx->
DevInst), tileAddr + 0x0001D004 + (0x20 * bd),
542 XAie_Read32(&(ctx->
DevInst), tileAddr + 0x0001D014 + (0x20 * bd),
545 int bd_valid = (dma_bd_control >> 25) & 0x1;
546 u32 nextBd = ((dma_bd_control >> 27) & 0xF);
547 u32 useNextBd = ((dma_bd_control >> 26) & 0x1);
548 int isPacket = (dma_bd_packet >> 30) & 0x1;
549 u32 packetID = (dma_bd_packet >> 19) & 0x1F;
550 u32 packetType = (dma_bd_packet >> 16) & 0x7;
551 int words_to_transfer = (dma_bd_addr & 0x3FFF);
552 int base_address = dma_bd_addr >> 14;
553 int acquireEnabled = (dma_bd_control >> 12) & 0x1;
554 u32 acquireLock = dma_bd_control & 0xf;
555 int acquireValue = (((int)dma_bd_control << 20) >> 25);
556 u32 releaseLock = (dma_bd_control >> 13) & 0xf;
557 int releaseValue = (((int)dma_bd_control << 7) >> 25);
558 int releaseEnabled = releaseValue != 0;
560 print_bd(bd, bd_valid, nextBd, useNextBd, isPacket, packetID, packetType,
561 words_to_transfer, base_address, acquireEnabled, acquireLock,
562 acquireValue, releaseEnabled, releaseLock, releaseValue,
563 s2mm_current_bd, mm2s_current_bd, num_bds);
567 XAie_Read32(&(ctx->
DevInst), tileAddr + 0x0001DF10, &dma_mm2s_status);
569 XAie_Read32(&(ctx->
DevInst), tileAddr + 0x0001DF00, &dma_s2mm_status);
570 u32 dma_mm2s0_control;
571 XAie_Read32(&(ctx->
DevInst), tileAddr + 0x0001DE10, &dma_mm2s0_control);
572 u32 dma_mm2s1_control;
573 XAie_Read32(&(ctx->
DevInst), tileAddr + 0x0001DE18, &dma_mm2s1_control);
574 u32 dma_s2mm0_control;
575 XAie_Read32(&(ctx->
DevInst), tileAddr + 0x0001DE00, &dma_s2mm0_control);
576 u32 dma_s2mm1_control;
577 XAie_Read32(&(ctx->
DevInst), tileAddr + 0x0001DE08, &dma_s2mm1_control);
579 u32 s2mm_ch0_running = dma_s2mm_status & 0x3;
580 u32 s2mm_ch1_running = (dma_s2mm_status >> 2) & 0x3;
581 u32 mm2s_ch0_running = dma_mm2s_status & 0x3;
582 u32 mm2s_ch1_running = (dma_mm2s_status >> 2) & 0x3;
583 int s2mm0_current_bd, s2mm1_current_bd;
584 int mm2s0_current_bd, mm2s1_current_bd;
585 s2mm0_current_bd = (dma_s2mm_status >> 16) & 0xf;
586 s2mm1_current_bd = (dma_s2mm_status >> 20) & 0xf;
587 mm2s0_current_bd = (dma_mm2s_status >> 16) & 0xf;
588 mm2s1_current_bd = (dma_mm2s_status >> 20) & 0xf;
589 u32 s2mm_ch0_stalled = (dma_s2mm_status >> 4) & 0x1;
590 u32 s2mm_ch1_stalled = (dma_s2mm_status >> 5) & 0x1;
591 u32 mm2s_ch0_stalled = (dma_mm2s_status >> 4) & 0x1;
592 u32 mm2s_ch1_stalled = (dma_mm2s_status >> 5) & 0x1;
594 printf(
"DMA [%d, %d] mm2s_status/0ctrl/1ctrl is %08X %02X %02X, "
595 "s2mm_status/0ctrl/1ctrl is %08X %02X %02X\n",
596 col, row, dma_mm2s_status, dma_mm2s0_control, dma_mm2s1_control,
597 dma_s2mm_status, dma_s2mm0_control, dma_s2mm1_control);
598 print_aie1_dmachannel_status(ctx, col, row,
"DMA",
"s2mm", 0,
599 s2mm_ch0_running, s2mm_ch0_stalled);
600 print_aie1_dmachannel_status(ctx, col, row,
"DMA",
"s2mm", 1,
601 s2mm_ch1_running, s2mm_ch1_stalled);
602 print_aie1_dmachannel_status(ctx, col, row,
"DMA",
"mm2s", 0,
603 mm2s_ch0_running, mm2s_ch0_stalled);
604 print_aie1_dmachannel_status(ctx, col, row,
"DMA",
"mm2s", 1,
605 mm2s_ch1_running, mm2s_ch1_stalled);
606 for (
int bd = 0; bd < 8; bd++) {
608 XAie_Read32(&(ctx->
DevInst), tileAddr + 0x0001D000 + (0x20 * bd),
611 XAie_Read32(&(ctx->
DevInst), tileAddr + 0x0001D018 + (0x20 * bd),
615 if ((dma_bd_control >> 31) & 0x1 && (dma_bd_control != 0xFFFFFFFF)) {
616 printf(
"BD %d valid ", bd);
617 u32 nextBd = ((dma_bd_control >> 13) & 0xF);
618 u32 useNextBd = ((dma_bd_control >> 17) & 0x1);
620 printf(
"(Next BD: %d)\n", nextBd);
622 printf(
"(Last BD)\n");
624 if (bd == s2mm0_current_bd) {
625 printf(
" * Current BD for s2mm channel 0\n");
627 if (bd == s2mm1_current_bd) {
628 printf(
" * Current BD for s2mm channel 1\n");
630 if (bd == mm2s0_current_bd) {
631 printf(
" * Current BD for mm2s channel 0\n");
633 if (bd == mm2s1_current_bd) {
634 printf(
" * Current BD for mm2s channel 1\n");
637 if (dma_bd_control & 0x08000000) {
639 XAie_Read32(&(ctx->
DevInst), tileAddr + 0x0001D010 + (0x20 * bd),
641 printf(
" Packet mode: %02X\n", dma_packet & 0x1F);
643 int words_to_transfer = 1 + (dma_bd_control & 0x1FFF);
644 int base_address = dma_bd_addr_a & 0x1FFF;
645 printf(
" Transferring %d 32 bit words to/from byte address %06X\n",
646 words_to_transfer, base_address * 4);
649 for (
int w = 0; w < 7; w++) {
651 XAie_DataMemRdWord(&(ctx->
DevInst), XAie_TileLoc(col, row),
652 (base_address + w) * 4, &tmpd);
653 printf(
"%08X ", tmpd);
656 int hasAcquire = (dma_bd_addr_a >> 18) & 0x1;
657 int hasRelease = (dma_bd_addr_a >> 21) & 0x1;
658 if (hasAcquire || hasRelease) {
659 u32 lock_id = (dma_bd_addr_a >> 22) & 0xf;
661 printf(
" Acquires lock %d ", lock_id);
662 if ((dma_bd_addr_a >> 16) & 0x1)
663 printf(
"with value %d ", (dma_bd_addr_a >> 17) & 0x1);
666 printf(
" Releases lock %d ", lock_id);
667 if ((dma_bd_addr_a >> 19) & 0x1)
668 printf(
"with value %d ", (dma_bd_addr_a >> 20) & 0x1);
671 printf(
"currently ");
673 XAie_Read32(&(ctx->
DevInst), tileAddr + 0x0001EF00, &locks);
674 u32 two_bits = (locks >> (lock_id * 2)) & 0x3;
676 u32 acquired = two_bits & 0x1;
677 u32 value = two_bits & 0x2;
680 printf(value ?
"1" :
"0");
686 if (dma_bd_control & 0x30000000) {
687 int FIFO = (dma_bd_control >> 28) & 0x3;
688 u32 dma_fifo_counter;
689 XAie_Read32(&(ctx->
DevInst), tileAddr + 0x0001DF20,
691 printf(
" Using FIFO Cnt%d : %08X\n", FIFO, dma_fifo_counter);
699 const char *type,
int lockOffset,
int locks) {
701 printf(
"%s [%d, %d] AIE2 locks are: ", type, col, row);
702 int lockAddr = tileAddr + lockOffset;
703 for (
int lock = 0; lock < locks; lock++) {
705 XAie_Read32(&(ctx->
DevInst), lockAddr, &val);
716 auto TileType = ctx->
DevInst.DevOps->GetTTypefromLoc(&(ctx->
DevInst),
717 XAie_TileLoc(col, row));
718 assert(TileType == XAIEGBL_TILE_TYPE_MEMTILE);
721 int s2mm_current_bd[6];
722 int mm2s_current_bd[6];
724 for (
int i = 0; i < 6; i++) {
725 print_aie2_dmachannel_status(ctx, col, row,
"MemTileDMA",
"s2mm", i,
726 0x000A0660 + 4 * i, 0x000A0600 + 8 * i,
729 XAie_Read32(&(ctx->
DevInst), tileAddr + 0x000A06B0 + (0x4 * i),
731 printf(
"MemTileDMA [%d, %d] s2mm%d write_count = %d\n", col, row, i,
734 for (
int i = 0; i < 6; i++)
735 print_aie2_dmachannel_status(ctx, col, row,
"MemTileDMA",
"mm2s", i,
736 0x000A0680 + 4 * i, 0x000A0630 + 8 * i,
741 for (
int bd = 0; bd < 8; bd++) {
745 XAie_Read32(&(ctx->
DevInst), tileAddr + 0x000A0000 + (0x20 * bd),
747 XAie_Read32(&(ctx->
DevInst), tileAddr + 0x000A0004 + (0x20 * bd),
749 XAie_Read32(&(ctx->
DevInst), tileAddr + 0x000A001C + (0x20 * bd),
752 int bd_valid = (dma_bd_7 >> 31) & 0x1;
753 u32 nextBd = ((dma_bd_1 >> 20) & 0xF);
754 u32 useNextBd = ((dma_bd_1 >> 19) & 0x1);
755 int isPacket = (dma_bd_0 >> 31) & 0x1;
756 u32 packetID = (dma_bd_0 >> 23) & 0x1F;
757 u32 packetType = (dma_bd_0 >> 28) & 0x7;
758 int words_to_transfer = (dma_bd_0 & 0x1FFFF);
759 int base_address = dma_bd_1 & 0x7FFFF;
760 int acquireEnabled = (dma_bd_7 >> 12) & 0x1;
761 u32 acquireLock = dma_bd_7 & 0xff;
762 int acquireValue = (((int)dma_bd_7 << 17) >> 25);
763 u32 releaseLock = (dma_bd_7 >> 16) & 0xff;
764 int releaseValue = (((int)dma_bd_7 << 1) >> 25);
765 int releaseEnabled = releaseValue != 0;
767 print_bd(bd, bd_valid, nextBd, useNextBd, isPacket, packetID, packetType,
768 words_to_transfer, base_address, acquireEnabled, acquireLock,
769 acquireValue, releaseEnabled, releaseLock, releaseValue,
770 s2mm_current_bd, mm2s_current_bd, 6);
779 auto TileType = ctx->
DevInst.DevOps->GetTTypefromLoc(&(ctx->
DevInst),
780 XAie_TileLoc(col, row));
781 assert(TileType == XAIEGBL_TILE_TYPE_SHIMNOC);
783 const int num_bds = 2;
784 int s2mm_current_bd[num_bds];
785 int mm2s_current_bd[num_bds];
787 for (
int i = 0; i < num_bds; i++) {
788 print_aie2_dmachannel_status(ctx, col, row,
"ShimDMA",
"s2mm", i,
789 0x0001D220 + 4 * i, 0x0001D200 + 8 * i,
792 XAie_Read32(&(ctx->
DevInst), tileAddr + 0x0001D230 + (0x4 * i),
794 printf(
"ShimDMA [%d, %d] s2mm%d write_count = %d\n", col, row, i,
797 for (
int i = 0; i < num_bds; i++)
798 print_aie2_dmachannel_status(ctx, col, row,
"ShimDMA",
"mm2s", i,
799 0x0001D228 + 4 * i, 0x0001D210 + 8 * i,
802 u32 dma_mm2s_status, dma_s2mm_status;
803 u32 dma_mm2s0_control, dma_mm2s1_control;
804 u32 dma_s2mm0_control, dma_s2mm1_control;
805 XAie_Read32(&(ctx->
DevInst), tileAddr + 0x0001D164, &dma_mm2s_status);
806 XAie_Read32(&(ctx->
DevInst), tileAddr + 0x0001D160, &dma_s2mm_status);
807 XAie_Read32(&(ctx->
DevInst), tileAddr + 0x0001D150, &dma_mm2s0_control);
808 XAie_Read32(&(ctx->
DevInst), tileAddr + 0x0001D158, &dma_mm2s1_control);
809 XAie_Read32(&(ctx->
DevInst), tileAddr + 0x0001D140, &dma_s2mm0_control);
810 XAie_Read32(&(ctx->
DevInst), tileAddr + 0x0001D148, &dma_s2mm1_control);
812 u32 s2mm_ch0_running = dma_s2mm_status & 0x3;
813 u32 s2mm_ch1_running = (dma_s2mm_status >> 2) & 0x3;
814 u32 mm2s_ch0_running = dma_mm2s_status & 0x3;
815 u32 mm2s_ch1_running = (dma_mm2s_status >> 2) & 0x3;
816 s2mm_current_bd[0] = (dma_s2mm_status >> 16) & 0xf;
817 s2mm_current_bd[1] = (dma_s2mm_status >> 20) & 0xf;
818 mm2s_current_bd[0] = (dma_mm2s_status >> 16) & 0xf;
819 mm2s_current_bd[1] = (dma_mm2s_status >> 20) & 0xf;
820 u32 s2mm_ch0_stalled = (dma_s2mm_status >> 4) & 0x1;
821 u32 s2mm_ch1_stalled = (dma_s2mm_status >> 5) & 0x1;
822 u32 mm2s_ch0_stalled = (dma_mm2s_status >> 4) & 0x1;
823 u32 mm2s_ch1_stalled = (dma_mm2s_status >> 5) & 0x1;
825 printf(
"ShimDMA [%d, %d] AIE1 mm2s_status/0ctrl/1ctrl is %08X %02X %02X, "
826 "s2mm_status/0ctrl/1ctrl is %08X %02X %02X\n",
827 col, row, dma_mm2s_status, dma_mm2s0_control, dma_mm2s1_control,
828 dma_s2mm_status, dma_s2mm0_control, dma_s2mm1_control);
829 print_aie1_dmachannel_status(ctx, col, row,
"ShimDMA",
"s2mm", 0,
830 s2mm_ch0_running, s2mm_ch0_stalled);
831 print_aie1_dmachannel_status(ctx, col, row,
"ShimDMA",
"s2mm", 1,
832 s2mm_ch1_running, s2mm_ch1_stalled);
833 print_aie1_dmachannel_status(ctx, col, row,
"ShimDMA",
"mm2s", 0,
834 mm2s_ch0_running, mm2s_ch0_stalled);
835 print_aie1_dmachannel_status(ctx, col, row,
"ShimDMA",
"mm2s", 1,
836 mm2s_ch1_running, mm2s_ch1_stalled);
842 int overflowAddr = tileAddr + 0x00014120;
843 int underflowAddr = tileAddr + 0x00014128;
844 u32 overflow, underflow;
847 printf(
" overflow?:%x underflow?:%x\n", overflow, underflow);
849 XAie_Read32(&(ctx->
DevInst), tileAddr + 0x00014F00, &locks);
850 printf(
"ShimDMA [%d, %d] AIE1 locks are %08X\n", col, row, locks);
851 for (
int lock = 0; lock < 16; lock++) {
852 u32 two_bits = (locks >> (lock * 2)) & 0x3;
854 printf(
"Lock %d: ", lock);
855 u32 acquired = two_bits & 0x1;
856 u32 value = two_bits & 0x2;
859 printf(value ?
"1" :
"0");
865 for (
int bd = 0; bd < 8; bd++) {
866 int words_to_transfer;
871 int acquire_lockID, release_lockID;
872 int enable_lock_release;
873 int lock_release_val;
875 int enable_lock_acquire;
876 int lock_acquire_val;
881 u32 dma_bd_buffer_length;
884 XAie_Read32(&(ctx->
DevInst), tileAddr + 0x0001D000 + (0x20 * bd),
885 &dma_bd_buffer_length);
886 XAie_Read32(&(ctx->
DevInst), tileAddr + 0x0001D004 + (0x20 * bd),
888 XAie_Read32(&(ctx->
DevInst), tileAddr + 0x0001D008 + (0x20 * bd),
890 XAie_Read32(&(ctx->
DevInst), tileAddr + 0x0001D01C + (0x20 * bd),
894 words_to_transfer = dma_bd_buffer_length;
896 u64(dma_bd_addr_low & 0xFFFC) + (u64(dma_bd_2 & 0xFF) << 32);
897 bd_valid = (dma_bd_7 >> 25) & 0x1;
898 use_next_bd = ((dma_bd_7 >> 26) & 0x1);
899 next_bd = ((dma_bd_7 >> 27) & 0xF);
900 acquire_lockID = ((dma_bd_7 >> 0) & 0xF);
901 release_lockID = ((dma_bd_7 >> 13) & 0xF);
902 lock_release_val = (s32(dma_bd_7) << 7) >> 25;
903 enable_lock_release = lock_release_val != 0;
905 lock_acquire_val = (s32(dma_bd_7) << 20) >> 25;
906 enable_lock_acquire = ((dma_bd_7 >> 12) & 0x1);
910 u32 dma_bd_buffer_length;
912 XAie_Read32(&(ctx->
DevInst), tileAddr + 0x0001D000 + (0x14 * bd),
914 XAie_Read32(&(ctx->
DevInst), tileAddr + 0x0001D004 + (0x14 * bd),
915 &dma_bd_buffer_length);
916 XAie_Read32(&(ctx->
DevInst), tileAddr + 0x0001D008 + (0x14 * bd),
918 words_to_transfer = dma_bd_buffer_length;
920 (u64)dma_bd_addr_a + ((u64)((dma_bd_control >> 16) & 0xFFFF) << 32);
921 bd_valid = dma_bd_control & 0x1;
922 use_next_bd = ((dma_bd_control >> 15) & 0x1);
923 next_bd = ((dma_bd_control >> 11) & 0xF);
924 release_lockID = acquire_lockID = ((dma_bd_control >> 7) & 0xF);
925 enable_lock_release = ((dma_bd_control >> 6) & 0x1);
926 lock_release_val = ((dma_bd_control >> 5) & 0x1);
927 use_release_val = ((dma_bd_control >> 4) & 0x1);
928 enable_lock_acquire = ((dma_bd_control >> 3) & 0x1);
929 lock_acquire_val = ((dma_bd_control >> 2) & 0x1);
930 use_acquire_val = ((dma_bd_control >> 1) & 0x1);
932 bool isPacket =
false;
935 print_bd(bd, bd_valid, next_bd, use_next_bd, isPacket, packetID, packetType,
936 words_to_transfer, base_address, enable_lock_acquire,
937 acquire_lockID, lock_acquire_val, enable_lock_release,
938 release_lockID, lock_release_val, s2mm_current_bd, mm2s_current_bd,
949 u32 status, coreTimerLow, PC, LR, SP, locks, R0, R4;
952 XAie_Read32(&(ctx->
DevInst), tileAddr + 0x032004, &status);
953 XAie_Read32(&(ctx->
DevInst), tileAddr + 0x0340F8, &coreTimerLow);
954 XAie_Read32(&(ctx->
DevInst), tileAddr + 0x00031100, &PC);
955 XAie_Read32(&(ctx->
DevInst), tileAddr + 0x00031130, &LR);
956 XAie_Read32(&(ctx->
DevInst), tileAddr + 0x00031120, &SP);
957 XAie_Read32(&(ctx->
DevInst), tileAddr + 0x000340D8, &trace_status);
959 XAie_Read32(&(ctx->
DevInst), tileAddr + 0x00030C00, &R0);
960 XAie_Read32(&(ctx->
DevInst), tileAddr + 0x00030C40, &R4);
963 XAie_Read32(&(ctx->
DevInst), tileAddr + 0x032004, &status);
964 XAie_Read32(&(ctx->
DevInst), tileAddr + 0x0340F8, &coreTimerLow);
965 XAie_Read32(&(ctx->
DevInst), tileAddr + 0x00030280, &PC);
966 XAie_Read32(&(ctx->
DevInst), tileAddr + 0x000302B0, &LR);
967 XAie_Read32(&(ctx->
DevInst), tileAddr + 0x000302A0, &SP);
968 XAie_Read32(&(ctx->
DevInst), tileAddr + 0x000140D8, &trace_status);
970 XAie_Read32(&(ctx->
DevInst), tileAddr + 0x00030000, &R0);
971 XAie_Read32(&(ctx->
DevInst), tileAddr + 0x00030040, &R4);
973 printf(
"Core [%d, %d] status is %08X, timer is %u, PC is %08X"
974 ", LR is %08X, SP is %08X, R0 is %08X,R4 is %08X\n",
975 col, row, status, coreTimerLow, PC, LR, SP, R0, R4);
976 printf(
"Core [%d, %d] trace status is %08X\n", col, row, trace_status);
981 XAie_Read32(&(ctx->
DevInst), tileAddr + 0x0001EF00, &locks);
982 printf(
"Core [%d, %d] AIE1 locks are %08X\n", col, row, locks);
983 for (
int lock = 0; lock < 16; lock++) {
984 u32 two_bits = (locks >> (lock * 2)) & 0x3;
986 printf(
"Lock %d: ", lock);
987 u32 acquired = two_bits & 0x1;
988 u32 value = two_bits & 0x2;
991 printf(value ?
"1" :
"0");
998 const char *core_status_strings[] = {
1013 "Cascade Stall Slave",
1014 "Cascade Stall Master",
1020 "Core Processor Bus Stall",
1023 printf(
"Core Status: ");
1024 for (
int i = 0; i <= 21; i++) {
1025 if ((status >> i) & 0x1)
1026 printf(
"%s ", core_status_strings[i]);
1031static void clear_range(XAie_DevInst *devInst, u64 tileAddr, u64 low,
1033 for (
int i = low; i <= high; i += 4) {
1034 XAie_Write32(devInst, tileAddr + i, 0);
1052 XAie_CoreDisable(&(ctx->
DevInst), XAie_TileLoc(col, row));
1055 clear_range(&(ctx->
DevInst), tileAddr, 0x20000, 0x200FF);
1057 clear_range(&(ctx->
DevInst), tileAddr, 0x1D000, 0x1D1F8);
1058 XAie_Write32(&(ctx->
DevInst), tileAddr + 0x1DE00, 0);
1059 XAie_Write32(&(ctx->
DevInst), tileAddr + 0x1DE08, 0);
1060 XAie_Write32(&(ctx->
DevInst), tileAddr + 0x1DE10, 0);
1061 XAie_Write32(&(ctx->
DevInst), tileAddr + 0x1DE08, 0);
1063 clear_range(&(ctx->
DevInst), tileAddr, 0x3F000, 0x3F060);
1065 clear_range(&(ctx->
DevInst), tileAddr, 0x3F100, 0x3F168);
1067 clear_range(&(ctx->
DevInst), tileAddr, 0x3F200, 0x3F3AC);
1070 XAie_CoreEnable(&(ctx->
DevInst), XAie_TileLoc(col, row));
1080 clear_range(&(ctx->
DevInst), tileAddr, 0x1D000, 0x1D13C);
1081 XAie_Write32(&(ctx->
DevInst), tileAddr + 0x1D140, 0);
1082 XAie_Write32(&(ctx->
DevInst), tileAddr + 0x1D148, 0);
1083 XAie_Write32(&(ctx->
DevInst), tileAddr + 0x1D150, 0);
1084 XAie_Write32(&(ctx->
DevInst), tileAddr + 0x1D158, 0);
1087 clear_range(&(ctx->
DevInst), tileAddr, 0x3F000, 0x3F058);
1089 clear_range(&(ctx->
DevInst), tileAddr, 0x3F100, 0x3F15C);
1091 clear_range(&(ctx->
DevInst), tileAddr, 0x3F200, 0x3F37C);
1107 for (
int i = 0; i < n; i++) {
1108 total_0 += performance_counter[i];
1111 float mean_0 = (float)total_0 / n;
1115 for (
int i = 0; i < n; i++) {
1116 float x = (float)performance_counter[i] - mean_0;
1120 sdev_0 = sqrtf(sdev_0 / n);
1122 printf(
"Mean and Standard Devation: %f, %f \n", mean_0, sdev_0);
@ AIR_PKT_TYPE_DEVICE_INITIALIZE
#define AIR_ADDRESS_ABSOLUTE_RANGE
void mlir_aie_print_memtiledma_status(aie_libxaie_ctx_t *ctx, int col, int row)
Print a summary of the status of the given MemTile DMA.
int mlir_aie_acquire_lock(aie_libxaie_ctx_t *ctx, int col, int row, int lockid, int lockval, int timeout)
Acquire a physical lock.
u32 mlir_aie_data_mem_rd_word(aie_libxaie_ctx_t *ctx, int col, int row, u64 addr)
Read a value from the data memory of a particular tile memory.
void mlir_aie_clear_tile_memory(aie_libxaie_ctx_t *ctx, int col, int row)
Fill the tile memory of the given tile with zeros.
void print_aie2_lock_status(aie_libxaie_ctx_t *ctx, int col, int row, const char *type, int lockOffset, int locks)
u64 mlir_aie_get_tile_addr(aie_libxaie_ctx_t *ctx, int col, int row)
Return the base address of the given tile.
void mlir_aie_clear_shim_config(aie_libxaie_ctx_t *ctx, int col, int row)
Clear the configuration of the given shim tile.
void mlir_aie_clear_config(aie_libxaie_ctx_t *ctx, int col, int row)
Clear the configuration of the given (non-shim) tile.
void mlir_aie_print_dma_status(aie_libxaie_ctx_t *ctx, int col, int row)
Print a summary of the status of the given Tile DMA.
int mlir_aie_release_lock(aie_libxaie_ctx_t *ctx, int col, int row, int lockid, int lockval, int timeout)
Release a physical lock.
int mlir_aie_init_device(aie_libxaie_ctx_t *ctx, uint32_t device_id)
Initialize the device represented by the context.
void mlir_aie_print_shimdma_status(aie_libxaie_ctx_t *ctx, int col, int row)
Print a summary of the status of the given Shim DMA.
u32 mlir_aie_read32(aie_libxaie_ctx_t *ctx, u64 addr)
Read the AIE configuration memory at the given physical address.
void mlir_aie_dump_tile_memory(aie_libxaie_ctx_t *ctx, int col, int row)
Dump the tile memory of the given tile Values that are zero are not shown.
void computeStats(u32 performance_counter[], int n)
Given an array of values, compute and print statistics about those values.
void mlir_aie_write32(aie_libxaie_ctx_t *ctx, u64 addr, u32 val)
Write the AIE configuration memory at the given physical address.
void mlir_aie_data_mem_wr_word(aie_libxaie_ctx_t *ctx, int col, int row, u64 addr, u32 data)
Write a value to the data memory of a particular tile memory.
void mlir_aie_deinit_libxaie(aie_libxaie_ctx_t *ctx)
Release access to the libXAIE context.
void mlir_aie_print_tile_status(aie_libxaie_ctx_t *ctx, int col, int row)
Print the status of a core represented by the given tile, at the given coordinates.