MLIR-AIE
test_library.cpp
Go to the documentation of this file.
1//===- test_library.cpp -----------------------------------------*- C++ -*-===//
2//
3// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7// (c) Copyright 2021 Xilinx Inc.
8//
9//===----------------------------------------------------------------------===//
10
11/// \file
12/// This file contains common libraries used for testing. Many of these
13/// functions are relatively thin wrappers around underlying libXAIE call and
14/// are provided to expose a relatively consistent API. Others are more
15/// complex.
16
17#include "test_library.h"
18#include "math.h"
19#include <assert.h>
20#include <fcntl.h>
21#include <stdio.h>
22#include <sys/mman.h>
23#include <vector>
24
25#define SYSFS_PATH_MAX 63
26
27#ifdef HSA_RUNTIME
28hsa_status_t mlir_aie_packet_req_translation(hsa_agent_dispatch_packet_t *pkt,
29 uint64_t va) {
30
31 pkt->arg[0] = 0;
32 pkt->arg[0] = va;
33
34 pkt->type = AIR_PKT_TYPE_TRANSLATE;
35 pkt->header = (HSA_PACKET_TYPE_AGENT_DISPATCH << HSA_PACKET_HEADER_TYPE);
36
37 return HSA_STATUS_SUCCESS;
38}
39
40hsa_status_t mlir_aie_packet_nd_memcpy(
41 hsa_agent_dispatch_packet_t *pkt, uint16_t herd_id, uint8_t col,
42 uint8_t direction, uint8_t channel, uint8_t burst_len, uint8_t memory_space,
43 uint64_t phys_addr, uint32_t transfer_length1d, uint32_t transfer_length2d,
44 uint32_t transfer_stride2d, uint32_t transfer_length3d,
45 uint32_t transfer_stride3d, uint32_t transfer_length4d,
46 uint32_t transfer_stride4d) {
47
48 pkt->arg[0] = 0;
49 pkt->arg[0] |= ((uint64_t)memory_space) << 16;
50 pkt->arg[0] |= ((uint64_t)channel) << 24;
51 pkt->arg[0] |= ((uint64_t)col) << 32;
52 pkt->arg[0] |= ((uint64_t)burst_len) << 52;
53 pkt->arg[0] |= ((uint64_t)direction) << 60;
54
55 pkt->arg[1] = phys_addr;
56 pkt->arg[2] = transfer_length1d;
57 pkt->arg[2] |= ((uint64_t)transfer_length2d) << 32;
58 pkt->arg[2] |= ((uint64_t)transfer_stride2d) << 48;
59 pkt->arg[3] = transfer_length3d;
60 pkt->arg[3] |= ((uint64_t)transfer_stride3d) << 16;
61 pkt->arg[3] |= ((uint64_t)transfer_length4d) << 32;
62 pkt->arg[3] |= ((uint64_t)transfer_stride4d) << 48;
63
64 pkt->type = AIR_PKT_TYPE_ND_MEMCPY;
65 pkt->header = (HSA_PACKET_TYPE_AGENT_DISPATCH << HSA_PACKET_HEADER_TYPE);
66
67 return HSA_STATUS_SUCCESS;
68}
69
70hsa_status_t get_aie_agents(hsa_agent_t agent, void *data) {
71 hsa_status_t status(HSA_STATUS_SUCCESS);
72 hsa_device_type_t device_type;
73 std::vector<hsa_agent_t> *aie_agents(nullptr);
74
75 if (!data) {
76 status = HSA_STATUS_ERROR_INVALID_ARGUMENT;
77 return status;
78 }
79
80 aie_agents = static_cast<std::vector<hsa_agent_t> *>(data);
81 status = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &device_type);
82
83 if (status != HSA_STATUS_SUCCESS) {
84 printf("%s [ERROR] We got a status of 0x%x from hsa_agent_get_info\n",
85 __func__, status);
86 return status;
87 }
88
89 if (device_type == HSA_DEVICE_TYPE_AIE) {
90 aie_agents->push_back(agent);
91 }
92
93 return status;
94}
95
96hsa_status_t get_global_mem_pool(hsa_amd_memory_pool_t pool, void *data) {
97 hsa_status_t status(HSA_STATUS_SUCCESS);
98 hsa_region_segment_t segment_type;
99 status = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT,
100 &segment_type);
101 if (segment_type == HSA_REGION_SEGMENT_GLOBAL) {
102 *reinterpret_cast<hsa_amd_memory_pool_t *>(data) = pool;
103 }
104
105 return status;
106}
107
108hsa_status_t mlir_aie_queue_dispatch_and_wait(
109 hsa_agent_t *agent, hsa_queue_t *q, uint64_t packet_id, uint64_t doorbell,
110 hsa_agent_dispatch_packet_t *pkt, bool destroy_signal) {
111
112 // dispatch and wait has blocking semantics so we can internally create the
113 // signal
114 hsa_amd_signal_create_on_agent(1, 0, nullptr, agent, 0,
115 &(pkt->completion_signal));
116
117 // Write the packet to the queue
118 mlir_aie_write_pkt<hsa_agent_dispatch_packet_t>(q, packet_id, pkt);
119
120 // Ringing the doorbell
121 hsa_signal_store_screlease(q->doorbell_signal, doorbell);
122
123 // wait for packet completion
124 while (hsa_signal_wait_scacquire(pkt->completion_signal,
125 HSA_SIGNAL_CONDITION_EQ, 0, 0x80000,
126 HSA_WAIT_STATE_ACTIVE) != 0)
127 ;
128
129 // Optionally destroying the signal
130 if (destroy_signal) {
131 hsa_signal_destroy(pkt->completion_signal);
132 }
133
134 return HSA_STATUS_SUCCESS;
135}
136
137hsa_status_t mlir_aie_packet_device_init(hsa_agent_dispatch_packet_t *pkt,
138 uint32_t num_cols) {
139
140 pkt->arg[0] = 0;
141 pkt->arg[0] |= (AIR_ADDRESS_ABSOLUTE_RANGE << 48);
142 pkt->arg[0] |= ((uint64_t)num_cols << 40);
143
145 pkt->header = (HSA_PACKET_TYPE_AGENT_DISPATCH << HSA_PACKET_HEADER_TYPE);
146
147 return HSA_STATUS_SUCCESS;
148}
149#endif
150
151/// @brief Release access to the libXAIE context.
152/// @param ctx The context
154 AieRC RC = XAie_Finish(ctx->XAieDevInst);
155 if (RC != XAIE_OK) {
156 printf("Failed to finish tiles.\n");
157 }
158
159#ifdef HSA_RUNTIME
160 if (ctx->cmd_queue != NULL) {
161 hsa_queue_destroy(ctx->cmd_queue);
162 }
163 hsa_shut_down();
164#endif
165 delete ctx->XAieConfig;
166 delete ctx->XAieDevInst;
167 delete ctx;
168}
169
170/// @brief Initialize the device represented by the context.
171/// @param ctx The AIE context
172/// @param device_id The device ID to initialize
173/// @return Zero on success, negative value on error
174int mlir_aie_init_device(aie_libxaie_ctx_t *ctx, uint32_t device_id) {
175 AieRC RC = XAIE_OK;
176
177#ifdef HSA_RUNTIME
178 if (ctx == NULL) {
179 printf("[ERROR] %s: Passed context of NULL\n", __func__);
180 return -1;
181 }
182
183 // Initializing HSA
184 hsa_status_t hsa_ret = hsa_init();
185 if (hsa_ret != HSA_STATUS_SUCCESS) {
186 printf("hsa_init failed\n");
187 return -1;
188 }
189
190 // Finding all AIE HSA agents
191 hsa_status_t iterate_agents_ret = hsa_iterate_agents(
192 &get_aie_agents, reinterpret_cast<void *>(&(ctx->agents)));
193 if (iterate_agents_ret != HSA_STATUS_SUCCESS) {
194 printf("iterate_agents failed with opcode 0x%x\n", iterate_agents_ret);
195 return -1;
196 }
197
198 // Checking if the agents are empty
199 if (ctx->agents.empty()) {
200 printf("No agents found. Exiting.\n");
201 return -1;
202 }
203
204 // Iterating over memory pools to initialize our allocator
205 hsa_amd_agent_iterate_memory_pools(
206 ctx->agents.front(), get_global_mem_pool,
207 reinterpret_cast<void *>(&(ctx->global_mem_pool)));
208
209 // Creating a queue on the first agent that we see
210 hsa_queue_t *q = nullptr;
211 int aie_max_queue_size = 0;
212 hsa_agent_get_info(ctx->agents[0], HSA_AGENT_INFO_QUEUE_MAX_SIZE,
213 &aie_max_queue_size);
214
215 auto queue_create_status =
216 hsa_queue_create(ctx->agents[0], aie_max_queue_size,
217 HSA_QUEUE_TYPE_SINGLE, nullptr, nullptr, 0, 0, &q);
218
219 if (queue_create_status != HSA_STATUS_SUCCESS) {
220 printf("Failed to create queue. Exiting\n");
221 return -1;
222 }
223
224 // Initializing the device
225 uint64_t wr_idx = hsa_queue_add_write_index_relaxed(q, 1);
226 uint64_t packet_id = wr_idx % q->size;
227 hsa_agent_dispatch_packet_t shim_pkt;
228 mlir_aie_packet_device_init(&shim_pkt, 50);
229 mlir_aie_queue_dispatch_and_wait(&(ctx->agents[0]), q, packet_id, wr_idx,
230 &shim_pkt, true);
231
232 // Attaching the queue to the context so we can send more packets if needed
233 ctx->cmd_queue = q;
234
235 // Creating the sysfs path to issue read/write 32 commands
236 char sysfs_path[SYSFS_PATH_MAX + 1];
237 if (snprintf(sysfs_path, SYSFS_PATH_MAX, "/sys/class/amdair/amdair/%02u",
238 device_id) == SYSFS_PATH_MAX)
239 sysfs_path[SYSFS_PATH_MAX] = 0;
240
241 // Using the AMDAIR libxaie backend, which utilizes the AMDAIR driver
242 XAie_BackendType backend;
243 ctx->XAieConfig->Backend = XAIE_IO_BACKEND_AMDAIR;
244 backend = XAIE_IO_BACKEND_AMDAIR;
245 ctx->XAieConfig->BaseAddr = 0;
246 ctx->XAieDevInst->IOInst = (void *)sysfs_path;
247
248#endif
249
250 RC = XAie_CfgInitialize(ctx->XAieDevInst, ctx->XAieConfig);
251 if (RC != XAIE_OK) {
252 printf("Driver initialization failed.\n");
253 return -1;
254 }
255
256 // Without this special case, the simulator generates
257 // FATAL::[ xtlm::907 ] b_transport_cb is not registered with the utils
258 const XAie_Backend *Backend = ctx->XAieDevInst->Backend;
259 if (Backend->Type != XAIE_IO_BACKEND_SIM) {
260 RC = XAie_PmRequestTiles(ctx->XAieDevInst, NULL, 0);
261 if (RC != XAIE_OK) {
262 printf("Failed to request tiles.\n");
263 return -1;
264 }
265
266 // TODO Extra code to really teardown the partitions
267 RC = XAie_Finish(ctx->XAieDevInst);
268 if (RC != XAIE_OK) {
269 printf("Failed to finish tiles.\n");
270 return -1;
271 }
272
273#ifdef HSA_RUNTIME
274 // Because we tear this down, need to do it again
275 ctx->XAieConfig->BaseAddr = 0;
276 ctx->XAieDevInst->IOInst = (void *)sysfs_path;
277#endif
278
279 RC = XAie_CfgInitialize(ctx->XAieDevInst, ctx->XAieConfig);
280 if (RC != XAIE_OK) {
281 printf("Driver initialization failed.\n");
282 return -1;
283 }
284 RC = XAie_PmRequestTiles(ctx->XAieDevInst, NULL, 0);
285 if (RC != XAIE_OK) {
286 printf("Failed to request tiles.\n");
287 return -1;
288 }
289 }
290
291 if (Backend->Type == XAIE_IO_BACKEND_SIM) {
292 printf("Turning ecc off\n");
293 XAie_TurnEccOff(ctx->XAieDevInst);
294 }
295
296 return 0;
297}
298
299/// @brief Acquire a physical lock
300/// @param ctx The context
301/// @param col The column of the lock
302/// @param row The row of the lock
303/// @param lockid The ID of the lock in the tile.
304/// @param lockval The value to acquire the lock with.
305/// @param timeout The number of microseconds to wait
306/// @return Return non-zero on success, i.e. the operation did not timeout.
307int mlir_aie_acquire_lock(aie_libxaie_ctx_t *ctx, int col, int row, int lockid,
308 int lockval, int timeout) {
309 return (XAie_LockAcquire(ctx->XAieDevInst, XAie_TileLoc(col, row),
310 XAie_LockInit(lockid, lockval), timeout) == XAIE_OK);
311}
312
313/// @brief Release a physical lock
314/// @param ctx The context
315/// @param col The column of the lock
316/// @param row The row of the lock
317/// @param lockid The ID of the lock in the tile.
318/// @param lockval The value to acquire the lock with.
319/// @param timeout The number of microseconds to wait
320/// @return Return non-zero on success, i.e. the operation did not timeout.
321int mlir_aie_release_lock(aie_libxaie_ctx_t *ctx, int col, int row, int lockid,
322 int lockval, int timeout) {
323 return (XAie_LockRelease(ctx->XAieDevInst, XAie_TileLoc(col, row),
324 XAie_LockInit(lockid, lockval), timeout) == XAIE_OK);
325}
326
327/// @brief Read the AIE configuration memory at the given physical address.
329 u32 val;
330 XAie_Read32(ctx->XAieDevInst, addr, &val);
331 return val;
332}
333
334/// @brief Write the AIE configuration memory at the given physical address.
335/// It's almost always better to use some more indirect method of accessing
336/// configuration registers, but this is provided as a last resort.
337void mlir_aie_write32(aie_libxaie_ctx_t *ctx, u64 addr, u32 val) {
338 XAie_Write32(ctx->XAieDevInst, addr, val);
339}
340
341/// @brief Read a value from the data memory of a particular tile memory
342/// @param ctx The AIE context
343/// @param col The column coordinate of the tile
344/// @param row The row coordinate of the tile
345/// @param addr The address within the tile\'s data memory
346/// @return The 32-bit data value read from the specified address
348 u64 addr) {
349 u32 data;
350 XAie_DataMemRdWord(ctx->XAieDevInst, XAie_TileLoc(col, row), addr, &data);
351 return data;
352}
353
354/// @brief Write a value to the data memory of a particular tile memory
355/// @param ctx The AIE context
356/// @param col The column coordinate of the tile
357/// @param row The row coordinate of the tile
358/// @param addr The address within the tile\'s data memory
359/// @param data The 32-bit data value to write
361 u64 addr, u32 data) {
362 XAie_DataMemWrWord(ctx->XAieDevInst, XAie_TileLoc(col, row), addr, data);
363}
364
365/// @brief Return the base address of the given tile.
366/// The configuration address space of most tiles is very similar,
367/// relative to this base address.
368u64 mlir_aie_get_tile_addr(aie_libxaie_ctx_t *ctx, int col, int row) {
369 return (((u64)row & 0xFFU) << ctx->XAieDevInst->DevProp.RowShift) |
370 (((u64)col & 0xFFU) << ctx->XAieDevInst->DevProp.ColShift);
371}
372
373/// @brief Dump the tile memory of the given tile
374/// Values that are zero are not shown
375void mlir_aie_dump_tile_memory(aie_libxaie_ctx_t *ctx, int col, int row) {
376 for (int i = 0; i < 0x2000; i++) {
377 uint32_t d;
378 AieRC rc = XAie_DataMemRdWord(ctx->XAieDevInst, XAie_TileLoc(col, row),
379 (i * 4), &d);
380 if (rc == XAIE_OK && d != 0)
381 printf("Tile[%d][%d]: mem[%d] = %d\n", col, row, i, d);
382 }
383}
384
385/// @brief Fill the tile memory of the given tile with zeros.
386/// Values that are zero are not shown
387void mlir_aie_clear_tile_memory(aie_libxaie_ctx_t *ctx, int col, int row) {
388 for (int i = 0; i < 0x2000; i++) {
389 XAie_DataMemWrWord(ctx->XAieDevInst, XAie_TileLoc(col, row), (i * 4), 0);
390 }
391}
392
393static void print_aie1_dmachannel_status(aie_libxaie_ctx_t *ctx, int col,
394 int row, const char *dmatype,
395 const char *channel, int channelNum,
396 int running, int stalled) {
397 printf("%s [%d, %d] AIE1 %s%d ", dmatype, col, row, channel, channelNum);
398 switch (running) {
399 case 0:
400 printf("IDLE ");
401 break;
402 case 1:
403 printf("STARTING ");
404 break;
405 case 2:
406 printf("RUNNING ");
407 break;
408 }
409 if (stalled) {
410 printf("Stalled on lock");
411 }
412 printf("\n");
413}
414
415static void print_aie2_dmachannel_status(aie_libxaie_ctx_t *ctx, int col,
416 int row, const char *dmatype,
417 const char *channel, int channelNum,
418 u32 statusOffset, u32 controlOffset,
419 int &current_bd) {
420 u64 tileAddr = mlir_aie_get_tile_addr(ctx, row, col);
421 u32 status, control;
422 XAie_Read32(ctx->XAieDevInst, tileAddr + statusOffset, &status);
423 XAie_Read32(ctx->XAieDevInst, tileAddr + controlOffset, &control);
424 u32 running = status & 0x3;
425 u32 stalled_acq = (status >> 2) & 0x1;
426 u32 stalled_rel = (status >> 3) & 0x1;
427 u32 stalled_data = (status >> 4) & 0x1;
428 u32 stalled_complete = (status >> 5) & 0x1;
429 current_bd = status >> 24;
430 printf("%s [%d, %d] AIE2 %s%d ", dmatype, col, row, channel, channelNum);
431 switch (running) {
432 case 0:
433 printf("IDLE ");
434 break;
435 case 1:
436 printf("STARTING ");
437 break;
438 case 2:
439 printf("RUNNING ");
440 break;
441 }
442 if (stalled_acq)
443 printf("Stalled on Acquire ");
444 if (stalled_rel)
445 printf("Stalled on Release ");
446 if (stalled_data)
447 printf("Stalled on Data ");
448 if (stalled_complete)
449 printf("Stalled on Completion ");
450 printf("status:%08X ctrl:%02X\n", status, control);
451};
452
453static void print_bd(int bd, int bd_valid, u32 nextBd, u32 useNextBd,
454 int isPacket, u32 packetID, u32 packetType,
455 int words_to_transfer, int base_address,
456 int acquireEnabled, u32 acquireLock, int acquireValue,
457 int releaseEnabled, u32 releaseLock, int releaseValue,
458 int s2mm_current_bd[], int mm2s_current_bd[],
459 int numchannels) {
460
461 if (bd_valid) {
462 printf("BD %d valid ", bd);
463 if (useNextBd)
464 printf("(Next BD: %d)\n", nextBd);
465 else
466 printf("(Last BD)\n");
467
468 for (int i = 0; i < numchannels; i++) {
469 if (bd == s2mm_current_bd[i]) {
470 printf(" * Current BD for s2mm channel %d\n", i);
471 }
472 if (bd == mm2s_current_bd[i]) {
473 printf(" * Current BD for mm2s channel %d\n", i);
474 }
475 }
476
477 if (isPacket) {
478 printf(" Packet ID: %02X\n", packetID);
479 printf(" Packet Type: %01X\n", packetType);
480 }
481 printf(" Transferring %d 32 bit words to/from byte address %06X\n",
482 words_to_transfer, base_address * 4);
483
484 // printf(" ");
485 // for (int w = 0; w < 7; w++) {
486 // u32 tmpd;
487 // XAie_DataMemRdWord(ctx->XAieDevInst, XAie_TileLoc(col, row),
488 // (base_address + w) * 4, &tmpd);
489 // printf("%08X ", tmpd);
490 // }
491 // printf("\n");
492 if (acquireEnabled) { // acquire is enabled
493 printf(" Acquires lock %d ", acquireLock);
494 printf("with value %d\n", acquireValue);
495 }
496 if (releaseEnabled) {
497 printf(" Releases lock %d ", releaseLock);
498 printf("with value %d\n", releaseValue);
499 }
500 // printf("currently ");
501 // u32 locks;
502 // XAie_Read32(ctx->XAieDevInst, tileAddr + 0x0001EF00, &locks);
503 // u32 two_bits = (locks >> (lock_id * 2)) & 0x3;
504 // if (two_bits) {
505 // u32 acquired = two_bits & 0x1;
506 // u32 value = two_bits & 0x2;
507 // if (acquired)
508 // printf("Acquired ");
509 // printf(value ? "1" : "0");
510 // } else
511 // printf("0");
512 // }
513 }
514}
515
516/// @brief Print a summary of the status of the given Tile DMA.
517void mlir_aie_print_dma_status(aie_libxaie_ctx_t *ctx, int col, int row) {
518 u64 tileAddr = mlir_aie_get_tile_addr(ctx, row, col);
519 auto TileType = ctx->XAieDevInst->DevOps->GetTTypefromLoc(
520 ctx->XAieDevInst, XAie_TileLoc(col, row));
521 assert(TileType == XAIEGBL_TILE_TYPE_AIETILE);
522
523 if (ctx->XAieConfig->AieGen == XAIE_DEV_GEN_AIEML) {
524 const int num_bds = 2;
525 int s2mm_current_bd[num_bds];
526 int mm2s_current_bd[num_bds];
527
528 for (int i = 0; i < num_bds; i++) {
529 print_aie2_dmachannel_status(ctx, col, row, "DMA", "s2mm", i,
530 0x0001DF00 + 4 * i, 0x0001DE00 + 8 * i,
531 s2mm_current_bd[i]);
532 u32 write_count;
533 XAie_Read32(ctx->XAieDevInst, tileAddr + 0x0001D230 + (0x4 * i),
534 &write_count);
535 printf("DMA [%d, %d] s2mm%d write_count = %d\n", col, row, i,
536 write_count);
537 }
538 for (int i = 0; i < num_bds; i++)
539 print_aie2_dmachannel_status(ctx, col, row, "DMA", "mm2s", i,
540 0x0001DF10 + 4 * i, 0x0001DE10 + 8 * i,
541 mm2s_current_bd[i]);
542
543 for (int bd = 0; bd < 8; bd++) {
544 u32 dma_bd_addr;
545 u32 dma_bd_packet;
546 u32 dma_bd_control;
547 XAie_Read32(ctx->XAieDevInst, tileAddr + 0x0001D000 + (0x20 * bd),
548 &dma_bd_addr);
549 XAie_Read32(ctx->XAieDevInst, tileAddr + 0x0001D004 + (0x20 * bd),
550 &dma_bd_packet);
551 XAie_Read32(ctx->XAieDevInst, tileAddr + 0x0001D014 + (0x20 * bd),
552 &dma_bd_control);
553
554 int bd_valid = (dma_bd_control >> 25) & 0x1;
555 u32 nextBd = ((dma_bd_control >> 27) & 0xF);
556 u32 useNextBd = ((dma_bd_control >> 26) & 0x1);
557 int isPacket = (dma_bd_packet >> 30) & 0x1;
558 u32 packetID = (dma_bd_packet >> 19) & 0x1F;
559 u32 packetType = (dma_bd_packet >> 16) & 0x7;
560 int words_to_transfer = (dma_bd_addr & 0x3FFF);
561 int base_address = dma_bd_addr >> 14;
562 int acquireEnabled = (dma_bd_control >> 12) & 0x1;
563 u32 acquireLock = dma_bd_control & 0xf;
564 int acquireValue = (((int)dma_bd_control << 20) >> 25);
565 u32 releaseLock = (dma_bd_control >> 13) & 0xf;
566 int releaseValue = (((int)dma_bd_control << 7) >> 25);
567 int releaseEnabled = releaseValue != 0;
568
569 print_bd(bd, bd_valid, nextBd, useNextBd, isPacket, packetID, packetType,
570 words_to_transfer, base_address, acquireEnabled, acquireLock,
571 acquireValue, releaseEnabled, releaseLock, releaseValue,
572 s2mm_current_bd, mm2s_current_bd, num_bds);
573 }
574 } else { // AIE1
575 u32 dma_mm2s_status;
576 XAie_Read32(ctx->XAieDevInst, tileAddr + 0x0001DF10, &dma_mm2s_status);
577 u32 dma_s2mm_status;
578 XAie_Read32(ctx->XAieDevInst, tileAddr + 0x0001DF00, &dma_s2mm_status);
579 u32 dma_mm2s0_control;
580 XAie_Read32(ctx->XAieDevInst, tileAddr + 0x0001DE10, &dma_mm2s0_control);
581 u32 dma_mm2s1_control;
582 XAie_Read32(ctx->XAieDevInst, tileAddr + 0x0001DE18, &dma_mm2s1_control);
583 u32 dma_s2mm0_control;
584 XAie_Read32(ctx->XAieDevInst, tileAddr + 0x0001DE00, &dma_s2mm0_control);
585 u32 dma_s2mm1_control;
586 XAie_Read32(ctx->XAieDevInst, tileAddr + 0x0001DE08, &dma_s2mm1_control);
587
588 u32 s2mm_ch0_running = dma_s2mm_status & 0x3;
589 u32 s2mm_ch1_running = (dma_s2mm_status >> 2) & 0x3;
590 u32 mm2s_ch0_running = dma_mm2s_status & 0x3;
591 u32 mm2s_ch1_running = (dma_mm2s_status >> 2) & 0x3;
592 int s2mm0_current_bd, s2mm1_current_bd;
593 int mm2s0_current_bd, mm2s1_current_bd;
594 s2mm0_current_bd = (dma_s2mm_status >> 16) & 0xf;
595 s2mm1_current_bd = (dma_s2mm_status >> 20) & 0xf;
596 mm2s0_current_bd = (dma_mm2s_status >> 16) & 0xf;
597 mm2s1_current_bd = (dma_mm2s_status >> 20) & 0xf;
598 u32 s2mm_ch0_stalled = (dma_s2mm_status >> 4) & 0x1;
599 u32 s2mm_ch1_stalled = (dma_s2mm_status >> 5) & 0x1;
600 u32 mm2s_ch0_stalled = (dma_mm2s_status >> 4) & 0x1;
601 u32 mm2s_ch1_stalled = (dma_mm2s_status >> 5) & 0x1;
602
603 printf("DMA [%d, %d] mm2s_status/0ctrl/1ctrl is %08X %02X %02X, "
604 "s2mm_status/0ctrl/1ctrl is %08X %02X %02X\n",
605 col, row, dma_mm2s_status, dma_mm2s0_control, dma_mm2s1_control,
606 dma_s2mm_status, dma_s2mm0_control, dma_s2mm1_control);
607 print_aie1_dmachannel_status(ctx, col, row, "DMA", "s2mm", 0,
608 s2mm_ch0_running, s2mm_ch0_stalled);
609 print_aie1_dmachannel_status(ctx, col, row, "DMA", "s2mm", 1,
610 s2mm_ch1_running, s2mm_ch1_stalled);
611 print_aie1_dmachannel_status(ctx, col, row, "DMA", "mm2s", 0,
612 mm2s_ch0_running, mm2s_ch0_stalled);
613 print_aie1_dmachannel_status(ctx, col, row, "DMA", "mm2s", 1,
614 mm2s_ch1_running, mm2s_ch1_stalled);
615 for (int bd = 0; bd < 8; bd++) {
616 u32 dma_bd_addr_a;
617 XAie_Read32(ctx->XAieDevInst, tileAddr + 0x0001D000 + (0x20 * bd),
618 &dma_bd_addr_a);
619 u32 dma_bd_control;
620 XAie_Read32(ctx->XAieDevInst, tileAddr + 0x0001D018 + (0x20 * bd),
621 &dma_bd_control);
622 // It appears that in the simulator, bd's are not initialized according to
623 // the spec, and instead the control word is all 1's.
624 if ((dma_bd_control >> 31) & 0x1 && (dma_bd_control != 0xFFFFFFFF)) {
625 printf("BD %d valid ", bd);
626 u32 nextBd = ((dma_bd_control >> 13) & 0xF);
627 u32 useNextBd = ((dma_bd_control >> 17) & 0x1);
628 if (useNextBd)
629 printf("(Next BD: %d)\n", nextBd);
630 else
631 printf("(Last BD)\n");
632
633 if (bd == s2mm0_current_bd) {
634 printf(" * Current BD for s2mm channel 0\n");
635 }
636 if (bd == s2mm1_current_bd) {
637 printf(" * Current BD for s2mm channel 1\n");
638 }
639 if (bd == mm2s0_current_bd) {
640 printf(" * Current BD for mm2s channel 0\n");
641 }
642 if (bd == mm2s1_current_bd) {
643 printf(" * Current BD for mm2s channel 1\n");
644 }
645
646 if (dma_bd_control & 0x08000000) {
647 u32 dma_packet;
648 XAie_Read32(ctx->XAieDevInst, tileAddr + 0x0001D010 + (0x20 * bd),
649 &dma_packet);
650 printf(" Packet mode: %02X\n", dma_packet & 0x1F);
651 }
652 int words_to_transfer = 1 + (dma_bd_control & 0x1FFF);
653 int base_address = dma_bd_addr_a & 0x1FFF;
654 printf(" Transferring %d 32 bit words to/from byte address %06X\n",
655 words_to_transfer, base_address * 4);
656
657 printf(" ");
658 for (int w = 0; w < 7; w++) {
659 u32 tmpd;
660 XAie_DataMemRdWord(ctx->XAieDevInst, XAie_TileLoc(col, row),
661 (base_address + w) * 4, &tmpd);
662 printf("%08X ", tmpd);
663 }
664 printf("\n");
665 int hasAcquire = (dma_bd_addr_a >> 18) & 0x1;
666 int hasRelease = (dma_bd_addr_a >> 21) & 0x1;
667 if (hasAcquire || hasRelease) {
668 u32 lock_id = (dma_bd_addr_a >> 22) & 0xf;
669 if (hasAcquire) {
670 printf(" Acquires lock %d ", lock_id);
671 if ((dma_bd_addr_a >> 16) & 0x1)
672 printf("with value %d ", (dma_bd_addr_a >> 17) & 0x1);
673 }
674 if (hasRelease) {
675 printf(" Releases lock %d ", lock_id);
676 if ((dma_bd_addr_a >> 19) & 0x1)
677 printf("with value %d ", (dma_bd_addr_a >> 20) & 0x1);
678 }
679
680 printf("currently ");
681 u32 locks;
682 XAie_Read32(ctx->XAieDevInst, tileAddr + 0x0001EF00, &locks);
683 u32 two_bits = (locks >> (lock_id * 2)) & 0x3;
684 if (two_bits) {
685 u32 acquired = two_bits & 0x1;
686 u32 value = two_bits & 0x2;
687 if (acquired)
688 printf("Acquired ");
689 printf(value ? "1" : "0");
690 } else
691 printf("0");
692 printf("\n");
693 }
694
695 if (dma_bd_control & 0x30000000) { // FIFO MODE
696 int FIFO = (dma_bd_control >> 28) & 0x3;
697 u32 dma_fifo_counter;
698 XAie_Read32(ctx->XAieDevInst, tileAddr + 0x0001DF20,
699 &dma_fifo_counter);
700 printf(" Using FIFO Cnt%d : %08X\n", FIFO, dma_fifo_counter);
701 }
702 }
703 }
704 }
705}
706
707void print_aie2_lock_status(aie_libxaie_ctx_t *ctx, int col, int row,
708 const char *type, int lockOffset, int locks) {
709 u64 tileAddr = mlir_aie_get_tile_addr(ctx, row, col);
710 printf("%s [%d, %d] AIE2 locks are: ", type, col, row);
711 int lockAddr = tileAddr + lockOffset;
712 for (int lock = 0; lock < locks; lock++) {
713 u32 val;
714 XAie_Read32(ctx->XAieDevInst, lockAddr, &val);
715 printf("%X ", val);
716 lockAddr += 0x10;
717 }
718 printf("\n");
719}
720
721/// @brief Print a summary of the status of the given MemTile DMA.
723 int row) {
724 u64 tileAddr = mlir_aie_get_tile_addr(ctx, row, col);
725 auto TileType = ctx->XAieDevInst->DevOps->GetTTypefromLoc(
726 ctx->XAieDevInst, XAie_TileLoc(col, row));
727 assert(TileType == XAIEGBL_TILE_TYPE_MEMTILE);
728 assert(ctx->XAieConfig->AieGen == XAIE_DEV_GEN_AIEML);
729
730 int s2mm_current_bd[6];
731 int mm2s_current_bd[6];
732
733 for (int i = 0; i < 6; i++) {
734 print_aie2_dmachannel_status(ctx, col, row, "MemTileDMA", "s2mm", i,
735 0x000A0660 + 4 * i, 0x000A0600 + 8 * i,
736 s2mm_current_bd[i]);
737 u32 write_count;
738 XAie_Read32(ctx->XAieDevInst, tileAddr + 0x000A06B0 + (0x4 * i),
739 &write_count);
740 printf("MemTileDMA [%d, %d] s2mm%d write_count = %d\n", col, row, i,
741 write_count);
742 }
743 for (int i = 0; i < 6; i++)
744 print_aie2_dmachannel_status(ctx, col, row, "MemTileDMA", "mm2s", i,
745 0x000A0680 + 4 * i, 0x000A0630 + 8 * i,
746 mm2s_current_bd[i]);
747
748 print_aie2_lock_status(ctx, col, row, "MemTileDMA", 0x000C0000, 64);
749
750 for (int bd = 0; bd < 8; bd++) {
751 u32 dma_bd_0;
752 u32 dma_bd_1;
753 u32 dma_bd_7;
754 XAie_Read32(ctx->XAieDevInst, tileAddr + 0x000A0000 + (0x20 * bd),
755 &dma_bd_0);
756 XAie_Read32(ctx->XAieDevInst, tileAddr + 0x000A0004 + (0x20 * bd),
757 &dma_bd_1);
758 XAie_Read32(ctx->XAieDevInst, tileAddr + 0x000A001C + (0x20 * bd),
759 &dma_bd_7);
760
761 int bd_valid = (dma_bd_7 >> 31) & 0x1;
762 u32 nextBd = ((dma_bd_1 >> 20) & 0xF);
763 u32 useNextBd = ((dma_bd_1 >> 19) & 0x1);
764 int isPacket = (dma_bd_0 >> 31) & 0x1;
765 u32 packetID = (dma_bd_0 >> 23) & 0x1F;
766 u32 packetType = (dma_bd_0 >> 28) & 0x7;
767 int words_to_transfer = (dma_bd_0 & 0x1FFFF);
768 int base_address = dma_bd_1 & 0x7FFFF;
769 int acquireEnabled = (dma_bd_7 >> 12) & 0x1;
770 u32 acquireLock = dma_bd_7 & 0xff;
771 int acquireValue = (((int)dma_bd_7 << 17) >> 25);
772 u32 releaseLock = (dma_bd_7 >> 16) & 0xff;
773 int releaseValue = (((int)dma_bd_7 << 1) >> 25);
774 int releaseEnabled = releaseValue != 0;
775
776 print_bd(bd, bd_valid, nextBd, useNextBd, isPacket, packetID, packetType,
777 words_to_transfer, base_address, acquireEnabled, acquireLock,
778 acquireValue, releaseEnabled, releaseLock, releaseValue,
779 s2mm_current_bd, mm2s_current_bd, 6);
780 }
781}
782
783/// @brief Print a summary of the status of the given Shim DMA.
785 // int col = loc.Col;
786 // int row = loc.Row;
787 u64 tileAddr = mlir_aie_get_tile_addr(ctx, row, col);
788 auto TileType = ctx->XAieDevInst->DevOps->GetTTypefromLoc(
789 ctx->XAieDevInst, XAie_TileLoc(col, row));
790 assert(TileType == XAIEGBL_TILE_TYPE_SHIMNOC);
791
792 const int num_bds = 2;
793 int s2mm_current_bd[num_bds];
794 int mm2s_current_bd[num_bds];
795 if (ctx->XAieConfig->AieGen == XAIE_DEV_GEN_AIEML) {
796 for (int i = 0; i < num_bds; i++) {
797 print_aie2_dmachannel_status(ctx, col, row, "ShimDMA", "s2mm", i,
798 0x0001D220 + 4 * i, 0x0001D200 + 8 * i,
799 s2mm_current_bd[i]);
800 u32 write_count;
801 XAie_Read32(ctx->XAieDevInst, tileAddr + 0x0001D230 + (0x4 * i),
802 &write_count);
803 printf("ShimDMA [%d, %d] s2mm%d write_count = %d\n", col, row, i,
804 write_count);
805 }
806 for (int i = 0; i < num_bds; i++)
807 print_aie2_dmachannel_status(ctx, col, row, "ShimDMA", "mm2s", i,
808 0x0001D228 + 4 * i, 0x0001D210 + 8 * i,
809 mm2s_current_bd[i]);
810 } else {
811 u32 dma_mm2s_status, dma_s2mm_status;
812 u32 dma_mm2s0_control, dma_mm2s1_control;
813 u32 dma_s2mm0_control, dma_s2mm1_control;
814 XAie_Read32(ctx->XAieDevInst, tileAddr + 0x0001D164, &dma_mm2s_status);
815 XAie_Read32(ctx->XAieDevInst, tileAddr + 0x0001D160, &dma_s2mm_status);
816 XAie_Read32(ctx->XAieDevInst, tileAddr + 0x0001D150, &dma_mm2s0_control);
817 XAie_Read32(ctx->XAieDevInst, tileAddr + 0x0001D158, &dma_mm2s1_control);
818 XAie_Read32(ctx->XAieDevInst, tileAddr + 0x0001D140, &dma_s2mm0_control);
819 XAie_Read32(ctx->XAieDevInst, tileAddr + 0x0001D148, &dma_s2mm1_control);
820
821 u32 s2mm_ch0_running = dma_s2mm_status & 0x3;
822 u32 s2mm_ch1_running = (dma_s2mm_status >> 2) & 0x3;
823 u32 mm2s_ch0_running = dma_mm2s_status & 0x3;
824 u32 mm2s_ch1_running = (dma_mm2s_status >> 2) & 0x3;
825 s2mm_current_bd[0] = (dma_s2mm_status >> 16) & 0xf;
826 s2mm_current_bd[1] = (dma_s2mm_status >> 20) & 0xf;
827 mm2s_current_bd[0] = (dma_mm2s_status >> 16) & 0xf;
828 mm2s_current_bd[1] = (dma_mm2s_status >> 20) & 0xf;
829 u32 s2mm_ch0_stalled = (dma_s2mm_status >> 4) & 0x1;
830 u32 s2mm_ch1_stalled = (dma_s2mm_status >> 5) & 0x1;
831 u32 mm2s_ch0_stalled = (dma_mm2s_status >> 4) & 0x1;
832 u32 mm2s_ch1_stalled = (dma_mm2s_status >> 5) & 0x1;
833
834 printf("ShimDMA [%d, %d] AIE1 mm2s_status/0ctrl/1ctrl is %08X %02X %02X, "
835 "s2mm_status/0ctrl/1ctrl is %08X %02X %02X\n",
836 col, row, dma_mm2s_status, dma_mm2s0_control, dma_mm2s1_control,
837 dma_s2mm_status, dma_s2mm0_control, dma_s2mm1_control);
838 print_aie1_dmachannel_status(ctx, col, row, "ShimDMA", "s2mm", 0,
839 s2mm_ch0_running, s2mm_ch0_stalled);
840 print_aie1_dmachannel_status(ctx, col, row, "ShimDMA", "s2mm", 1,
841 s2mm_ch1_running, s2mm_ch1_stalled);
842 print_aie1_dmachannel_status(ctx, col, row, "ShimDMA", "mm2s", 0,
843 mm2s_ch0_running, mm2s_ch0_stalled);
844 print_aie1_dmachannel_status(ctx, col, row, "ShimDMA", "mm2s", 1,
845 mm2s_ch1_running, mm2s_ch1_stalled);
846 }
847
848 u32 locks;
849 if (ctx->XAieConfig->AieGen == XAIE_DEV_GEN_AIEML) {
850 print_aie2_lock_status(ctx, col, row, "ShimDMA", 0x00014000, 16);
851 int overflowAddr = tileAddr + 0x00014120;
852 int underflowAddr = tileAddr + 0x00014128;
853 u32 overflow, underflow;
854 // XAie_Read32(ctx->XAieDevInst, overflowAddr, &overflow);
855 // XAie_Read32(ctx->XAieDevInst, underflowAddr, &underflow);
856 printf(" overflow?:%x underflow?:%x\n", overflow, underflow);
857 } else {
858 XAie_Read32(ctx->XAieDevInst, tileAddr + 0x00014F00, &locks);
859 printf("ShimDMA [%d, %d] AIE1 locks are %08X\n", col, row, locks);
860 for (int lock = 0; lock < 16; lock++) {
861 u32 two_bits = (locks >> (lock * 2)) & 0x3;
862 if (two_bits) {
863 printf("Lock %d: ", lock);
864 u32 acquired = two_bits & 0x1;
865 u32 value = two_bits & 0x2;
866 if (acquired)
867 printf("Acquired ");
868 printf(value ? "1" : "0");
869 printf("\n");
870 }
871 }
872 }
873
874 for (int bd = 0; bd < 8; bd++) {
875 int words_to_transfer; // transfer size in 32-bit words
876 u64 base_address; // address in bytes
877 bool bd_valid;
878 int use_next_bd;
879 int next_bd;
880 int acquire_lockID, release_lockID;
881 int enable_lock_release;
882 int lock_release_val;
883 int use_release_val;
884 int enable_lock_acquire;
885 int lock_acquire_val;
886 int use_acquire_val;
887
888 if (ctx->XAieConfig->AieGen == XAIE_DEV_GEN_AIEML) {
889 u32 dma_bd_addr_low;
890 u32 dma_bd_buffer_length;
891 u32 dma_bd_2;
892 u32 dma_bd_7;
893 XAie_Read32(ctx->XAieDevInst, tileAddr + 0x0001D000 + (0x20 * bd),
894 &dma_bd_buffer_length);
895 XAie_Read32(ctx->XAieDevInst, tileAddr + 0x0001D004 + (0x20 * bd),
896 &dma_bd_addr_low);
897 XAie_Read32(ctx->XAieDevInst, tileAddr + 0x0001D008 + (0x20 * bd),
898 &dma_bd_2);
899 XAie_Read32(ctx->XAieDevInst, tileAddr + 0x0001D01C + (0x20 * bd),
900 &dma_bd_7);
901 // printf("test: %d %d %d %d\n", dma_bd_buffer_length, dma_bd_addr_low,
902 // dma_bd_2, dma_bd_7);
903 words_to_transfer = dma_bd_buffer_length;
904 base_address =
905 u64(dma_bd_addr_low & 0xFFFC) + (u64(dma_bd_2 & 0xFF) << 32);
906 bd_valid = (dma_bd_7 >> 25) & 0x1;
907 use_next_bd = ((dma_bd_7 >> 26) & 0x1);
908 next_bd = ((dma_bd_7 >> 27) & 0xF);
909 acquire_lockID = ((dma_bd_7 >> 0) & 0xF);
910 release_lockID = ((dma_bd_7 >> 13) & 0xF);
911 lock_release_val = (s32(dma_bd_7) << 7) >> 25; // sign extend
912 enable_lock_release = lock_release_val != 0;
913 use_release_val = 1;
914 lock_acquire_val = (s32(dma_bd_7) << 20) >> 25; // sign extend
915 enable_lock_acquire = ((dma_bd_7 >> 12) & 0x1);
916 use_acquire_val = 1;
917 } else {
918 u32 dma_bd_addr_a;
919 u32 dma_bd_buffer_length;
920 u32 dma_bd_control;
921 XAie_Read32(ctx->XAieDevInst, tileAddr + 0x0001D000 + (0x14 * bd),
922 &dma_bd_addr_a);
923 XAie_Read32(ctx->XAieDevInst, tileAddr + 0x0001D004 + (0x14 * bd),
924 &dma_bd_buffer_length);
925 XAie_Read32(ctx->XAieDevInst, tileAddr + 0x0001D008 + (0x14 * bd),
926 &dma_bd_control);
927 words_to_transfer = dma_bd_buffer_length;
928 base_address =
929 (u64)dma_bd_addr_a + ((u64)((dma_bd_control >> 16) & 0xFFFF) << 32);
930 bd_valid = dma_bd_control & 0x1;
931 use_next_bd = ((dma_bd_control >> 15) & 0x1);
932 next_bd = ((dma_bd_control >> 11) & 0xF);
933 release_lockID = acquire_lockID = ((dma_bd_control >> 7) & 0xF);
934 enable_lock_release = ((dma_bd_control >> 6) & 0x1);
935 lock_release_val = ((dma_bd_control >> 5) & 0x1);
936 use_release_val = ((dma_bd_control >> 4) & 0x1);
937 enable_lock_acquire = ((dma_bd_control >> 3) & 0x1);
938 lock_acquire_val = ((dma_bd_control >> 2) & 0x1);
939 use_acquire_val = ((dma_bd_control >> 1) & 0x1);
940 }
941 bool isPacket = false;
942 int packetID = 0;
943 int packetType = 0;
944 print_bd(bd, bd_valid, next_bd, use_next_bd, isPacket, packetID, packetType,
945 words_to_transfer, base_address, enable_lock_acquire,
946 acquire_lockID, lock_acquire_val, enable_lock_release,
947 release_lockID, lock_release_val, s2mm_current_bd, mm2s_current_bd,
948 num_bds);
949 }
950}
951
952/// @brief Print the status of a core represented by the given tile, at the
953/// given coordinates.
954void mlir_aie_print_tile_status(aie_libxaie_ctx_t *ctx, int col, int row) {
955 // int col = loc.Col;
956 // int row = loc.Row;
957 u64 tileAddr = mlir_aie_get_tile_addr(ctx, row, col);
958 u32 status, coreTimerLow, PC, LR, SP, locks, R0, R4;
959 u32 trace_status;
960 if (ctx->XAieConfig->AieGen == XAIE_DEV_GEN_AIEML) {
961 XAie_Read32(ctx->XAieDevInst, tileAddr + 0x032004, &status);
962 XAie_Read32(ctx->XAieDevInst, tileAddr + 0x0340F8, &coreTimerLow);
963 XAie_Read32(ctx->XAieDevInst, tileAddr + 0x00031100, &PC);
964 XAie_Read32(ctx->XAieDevInst, tileAddr + 0x00031130, &LR);
965 XAie_Read32(ctx->XAieDevInst, tileAddr + 0x00031120, &SP);
966 XAie_Read32(ctx->XAieDevInst, tileAddr + 0x000340D8, &trace_status);
967
968 XAie_Read32(ctx->XAieDevInst, tileAddr + 0x00030C00, &R0);
969 XAie_Read32(ctx->XAieDevInst, tileAddr + 0x00030C40, &R4);
970
971 } else {
972 XAie_Read32(ctx->XAieDevInst, tileAddr + 0x032004, &status);
973 XAie_Read32(ctx->XAieDevInst, tileAddr + 0x0340F8, &coreTimerLow);
974 XAie_Read32(ctx->XAieDevInst, tileAddr + 0x00030280, &PC);
975 XAie_Read32(ctx->XAieDevInst, tileAddr + 0x000302B0, &LR);
976 XAie_Read32(ctx->XAieDevInst, tileAddr + 0x000302A0, &SP);
977 XAie_Read32(ctx->XAieDevInst, tileAddr + 0x000140D8, &trace_status);
978
979 XAie_Read32(ctx->XAieDevInst, tileAddr + 0x00030000, &R0);
980 XAie_Read32(ctx->XAieDevInst, tileAddr + 0x00030040, &R4);
981 }
982 printf("Core [%d, %d] status is %08X, timer is %u, PC is %08X"
983 ", LR is %08X, SP is %08X, R0 is %08X,R4 is %08X\n",
984 col, row, status, coreTimerLow, PC, LR, SP, R0, R4);
985 printf("Core [%d, %d] trace status is %08X\n", col, row, trace_status);
986
987 if (ctx->XAieConfig->AieGen == XAIE_DEV_GEN_AIEML) {
988 print_aie2_lock_status(ctx, col, row, "Core", 0x0001F000, 16);
989 } else {
990 XAie_Read32(ctx->XAieDevInst, tileAddr + 0x0001EF00, &locks);
991 printf("Core [%d, %d] AIE1 locks are %08X\n", col, row, locks);
992 for (int lock = 0; lock < 16; lock++) {
993 u32 two_bits = (locks >> (lock * 2)) & 0x3;
994 if (two_bits) {
995 printf("Lock %d: ", lock);
996 u32 acquired = two_bits & 0x1;
997 u32 value = two_bits & 0x2;
998 if (acquired)
999 printf("Acquired ");
1000 printf(value ? "1" : "0");
1001 printf("\n");
1002 }
1003 }
1004 }
1005
1006 // Note that not all strings are valid for all architectures
1007 const char *core_status_strings[] = {
1008 "Enabled",
1009 "In Reset",
1010 "Memory Stall S",
1011 "Memory Stall W",
1012 "Memory Stall N",
1013 "Memory Stall E",
1014 "Lock Stall S",
1015 "Lock Stall W",
1016 "Lock Stall N",
1017 "Lock Stall E",
1018 "Stream Stall SS0",
1019 "Stream Stall SS1", // AIE1 only
1020 "Stream Stall MS0",
1021 "Stream Stall MS1", // AIE1 only
1022 "Cascade Stall Slave",
1023 "Cascade Stall Master",
1024 "Debug Halt",
1025 "ECC Error",
1026 "ECC Scrubbing",
1027 "Error Halt",
1028 "Core Done",
1029 "Core Processor Bus Stall", // AIE2 only
1030 };
1031
1032 printf("Core Status: ");
1033 for (int i = 0; i <= 21; i++) {
1034 if ((status >> i) & 0x1)
1035 printf("%s ", core_status_strings[i]);
1036 }
1037 printf("\n");
1038}
1039
1040static void clear_range(XAie_DevInst *devInst, u64 tileAddr, u64 low,
1041 u64 high) {
1042 for (int i = low; i <= high; i += 4) {
1043 XAie_Write32(devInst, tileAddr + i, 0);
1044 // int x = XAie_Read32(ctx->XAieDevInst,tileAddr+i);
1045 // if(x != 0) {
1046 // printf("@%x = %x\n", i, x);
1047 // XAie_Write32(ctx->XAieDevInst,tileAddr+i, 0);
1048 // }
1049 }
1050}
1051
1052/// @brief Clear the configuration of the given (non-shim) tile.
1053/// This includes: clearing the program memory, data memory,
1054/// DMA descriptors, and stream switch configuration.
1055void mlir_aie_clear_config(aie_libxaie_ctx_t *ctx, int col, int row) {
1056 u64 tileAddr = mlir_aie_get_tile_addr(ctx, row, col);
1057
1058 // Put the core in reset first, otherwise bus collisions
1059 // result in arm bus errors.
1060 // TODO Check if this works
1061 XAie_CoreDisable(ctx->XAieDevInst, XAie_TileLoc(col, row));
1062
1063 // Program Memory
1064 clear_range(ctx->XAieDevInst, tileAddr, 0x20000, 0x200FF);
1065 // TileDMA
1066 clear_range(ctx->XAieDevInst, tileAddr, 0x1D000, 0x1D1F8);
1067 XAie_Write32(ctx->XAieDevInst, tileAddr + 0x1DE00, 0);
1068 XAie_Write32(ctx->XAieDevInst, tileAddr + 0x1DE08, 0);
1069 XAie_Write32(ctx->XAieDevInst, tileAddr + 0x1DE10, 0);
1070 XAie_Write32(ctx->XAieDevInst, tileAddr + 0x1DE08, 0);
1071 // Stream Switch master config
1072 clear_range(ctx->XAieDevInst, tileAddr, 0x3F000, 0x3F060);
1073 // Stream Switch slave config
1074 clear_range(ctx->XAieDevInst, tileAddr, 0x3F100, 0x3F168);
1075 // Stream Switch slave slot config
1076 clear_range(ctx->XAieDevInst, tileAddr, 0x3F200, 0x3F3AC);
1077
1078 // TODO Check if this works
1079 XAie_CoreEnable(ctx->XAieDevInst, XAie_TileLoc(col, row));
1080}
1081
1082/// @brief Clear the configuration of the given shim tile.
1083/// This includes: clearing the program memory, data memory,
1084/// DMA descriptors, and stream switch configuration.
1085void mlir_aie_clear_shim_config(aie_libxaie_ctx_t *ctx, int col, int row) {
1086 u64 tileAddr = mlir_aie_get_tile_addr(ctx, row, col);
1087
1088 // ShimDMA
1089 clear_range(ctx->XAieDevInst, tileAddr, 0x1D000, 0x1D13C);
1090 XAie_Write32(ctx->XAieDevInst, tileAddr + 0x1D140, 0);
1091 XAie_Write32(ctx->XAieDevInst, tileAddr + 0x1D148, 0);
1092 XAie_Write32(ctx->XAieDevInst, tileAddr + 0x1D150, 0);
1093 XAie_Write32(ctx->XAieDevInst, tileAddr + 0x1D158, 0);
1094
1095 // Stream Switch master config
1096 clear_range(ctx->XAieDevInst, tileAddr, 0x3F000, 0x3F058);
1097 // Stream Switch slave config
1098 clear_range(ctx->XAieDevInst, tileAddr, 0x3F100, 0x3F15C);
1099 // Stream Switch slave slot config
1100 clear_range(ctx->XAieDevInst, tileAddr, 0x3F200, 0x3F37C);
1101}
1102
1103/*
1104 ******************************************************************************
1105 * COMMON
1106 ******************************************************************************
1107 */
1108
1109/// @brief Given an array of values, compute and print statistics about those
1110/// values.
1111/// @param performance_counter An array of values
1112/// @param n The number of values
1113void computeStats(u32 performance_counter[], int n) {
1114 u32 total_0 = 0;
1115
1116 for (int i = 0; i < n; i++) {
1117 total_0 += performance_counter[i];
1118 }
1119
1120 float mean_0 = (float)total_0 / n;
1121
1122 float sdev_0 = 0;
1123
1124 for (int i = 0; i < n; i++) {
1125 float x = (float)performance_counter[i] - mean_0;
1126 sdev_0 += x * x;
1127 }
1128
1129 sdev_0 = sqrtf(sdev_0 / n);
1130
1131 printf("Mean and Standard Devation: %f, %f \n", mean_0, sdev_0);
1132}
@ AIR_PKT_TYPE_TRANSLATE
Definition hsa_ext_air.h:87
@ AIR_PKT_TYPE_DEVICE_INITIALIZE
Definition hsa_ext_air.h:68
@ AIR_PKT_TYPE_ND_MEMCPY
Definition hsa_ext_air.h:92
#define AIR_ADDRESS_ABSOLUTE_RANGE
Definition hsa_ext_air.h:49
XAie_DevInst * XAieDevInst
Definition target.h:33
XAie_Config * XAieConfig
Definition target.h:32
void mlir_aie_print_memtiledma_status(aie_libxaie_ctx_t *ctx, int col, int row)
Print a summary of the status of the given MemTile DMA.
int mlir_aie_acquire_lock(aie_libxaie_ctx_t *ctx, int col, int row, int lockid, int lockval, int timeout)
Acquire a physical lock.
u32 mlir_aie_data_mem_rd_word(aie_libxaie_ctx_t *ctx, int col, int row, u64 addr)
Read a value from the data memory of a particular tile memory.
void mlir_aie_clear_tile_memory(aie_libxaie_ctx_t *ctx, int col, int row)
Fill the tile memory of the given tile with zeros.
void print_aie2_lock_status(aie_libxaie_ctx_t *ctx, int col, int row, const char *type, int lockOffset, int locks)
u64 mlir_aie_get_tile_addr(aie_libxaie_ctx_t *ctx, int col, int row)
Return the base address of the given tile.
void mlir_aie_clear_shim_config(aie_libxaie_ctx_t *ctx, int col, int row)
Clear the configuration of the given shim tile.
void mlir_aie_clear_config(aie_libxaie_ctx_t *ctx, int col, int row)
Clear the configuration of the given (non-shim) tile.
void mlir_aie_print_dma_status(aie_libxaie_ctx_t *ctx, int col, int row)
Print a summary of the status of the given Tile DMA.
int mlir_aie_release_lock(aie_libxaie_ctx_t *ctx, int col, int row, int lockid, int lockval, int timeout)
Release a physical lock.
int mlir_aie_init_device(aie_libxaie_ctx_t *ctx, uint32_t device_id)
Initialize the device represented by the context.
void mlir_aie_print_shimdma_status(aie_libxaie_ctx_t *ctx, int col, int row)
Print a summary of the status of the given Shim DMA.
u32 mlir_aie_read32(aie_libxaie_ctx_t *ctx, u64 addr)
Read the AIE configuration memory at the given physical address.
void mlir_aie_dump_tile_memory(aie_libxaie_ctx_t *ctx, int col, int row)
Dump the tile memory of the given tile Values that are zero are not shown.
void computeStats(u32 performance_counter[], int n)
Given an array of values, compute and print statistics about those values.
void mlir_aie_write32(aie_libxaie_ctx_t *ctx, u64 addr, u32 val)
Write the AIE configuration memory at the given physical address.
void mlir_aie_data_mem_wr_word(aie_libxaie_ctx_t *ctx, int col, int row, u64 addr, u32 data)
Write a value to the data memory of a particular tile memory.
void mlir_aie_deinit_libxaie(aie_libxaie_ctx_t *ctx)
Release access to the libXAIE context.
void mlir_aie_print_tile_status(aie_libxaie_ctx_t *ctx, int col, int row)
Print the status of a core represented by the given tile, at the given coordinates.
#define SYSFS_PATH_MAX