MLIR-AIE
test_library.cpp
Go to the documentation of this file.
1//===- test_library.cpp -----------------------------------------*- C++ -*-===//
2//
3// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7// (c) Copyright 2021 Xilinx Inc.
8//
9//===----------------------------------------------------------------------===//
10
11/// \file
12/// This file contains common libraries used for testing. Many of these
13/// functions are relatively thin wrappers around underlying libXAIE call and
14/// are provided to expose a relatively consistent API. Others are more
15/// complex.
16
17#include "test_library.h"
18#include "math.h"
19#include <assert.h>
20#include <fcntl.h>
21#include <stdio.h>
22#include <sys/mman.h>
23#include <vector>
24
25#define SYSFS_PATH_MAX 63
26
27#ifdef HSA_RUNTIME
28hsa_status_t mlir_aie_packet_req_translation(hsa_agent_dispatch_packet_t *pkt,
29 uint64_t va) {
30
31 pkt->arg[0] = 0;
32 pkt->arg[0] = va;
33
34 pkt->type = AIR_PKT_TYPE_TRANSLATE;
35 pkt->header = (HSA_PACKET_TYPE_AGENT_DISPATCH << HSA_PACKET_HEADER_TYPE);
36
37 return HSA_STATUS_SUCCESS;
38}
39
40hsa_status_t mlir_aie_packet_nd_memcpy(
41 hsa_agent_dispatch_packet_t *pkt, uint16_t herd_id, uint8_t col,
42 uint8_t direction, uint8_t channel, uint8_t burst_len, uint8_t memory_space,
43 uint64_t phys_addr, uint32_t transfer_length1d, uint32_t transfer_length2d,
44 uint32_t transfer_stride2d, uint32_t transfer_length3d,
45 uint32_t transfer_stride3d, uint32_t transfer_length4d,
46 uint32_t transfer_stride4d) {
47
48 pkt->arg[0] = 0;
49 pkt->arg[0] |= ((uint64_t)memory_space) << 16;
50 pkt->arg[0] |= ((uint64_t)channel) << 24;
51 pkt->arg[0] |= ((uint64_t)col) << 32;
52 pkt->arg[0] |= ((uint64_t)burst_len) << 52;
53 pkt->arg[0] |= ((uint64_t)direction) << 60;
54
55 pkt->arg[1] = phys_addr;
56 pkt->arg[2] = transfer_length1d;
57 pkt->arg[2] |= ((uint64_t)transfer_length2d) << 32;
58 pkt->arg[2] |= ((uint64_t)transfer_stride2d) << 48;
59 pkt->arg[3] = transfer_length3d;
60 pkt->arg[3] |= ((uint64_t)transfer_stride3d) << 16;
61 pkt->arg[3] |= ((uint64_t)transfer_length4d) << 32;
62 pkt->arg[3] |= ((uint64_t)transfer_stride4d) << 48;
63
64 pkt->type = AIR_PKT_TYPE_ND_MEMCPY;
65 pkt->header = (HSA_PACKET_TYPE_AGENT_DISPATCH << HSA_PACKET_HEADER_TYPE);
66
67 return HSA_STATUS_SUCCESS;
68}
69
70hsa_status_t get_aie_agents(hsa_agent_t agent, void *data) {
71 hsa_status_t status(HSA_STATUS_SUCCESS);
72 hsa_device_type_t device_type;
73 std::vector<hsa_agent_t> *aie_agents(nullptr);
74
75 if (!data) {
76 status = HSA_STATUS_ERROR_INVALID_ARGUMENT;
77 return status;
78 }
79
80 aie_agents = static_cast<std::vector<hsa_agent_t> *>(data);
81 status = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &device_type);
82
83 if (status != HSA_STATUS_SUCCESS) {
84 printf("%s [ERROR] We got a status of 0x%x from hsa_agent_get_info\n",
85 __func__, status);
86 return status;
87 }
88
89 if (device_type == HSA_DEVICE_TYPE_AIE) {
90 aie_agents->push_back(agent);
91 }
92
93 return status;
94}
95
96hsa_status_t get_global_mem_pool(hsa_amd_memory_pool_t pool, void *data) {
97 hsa_status_t status(HSA_STATUS_SUCCESS);
98 hsa_region_segment_t segment_type;
99 status = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT,
100 &segment_type);
101 if (segment_type == HSA_REGION_SEGMENT_GLOBAL) {
102 *reinterpret_cast<hsa_amd_memory_pool_t *>(data) = pool;
103 }
104
105 return status;
106}
107
108hsa_status_t mlir_aie_queue_dispatch_and_wait(
109 hsa_agent_t *agent, hsa_queue_t *q, uint64_t packet_id, uint64_t doorbell,
110 hsa_agent_dispatch_packet_t *pkt, bool destroy_signal) {
111
112 // dispatch and wait has blocking semantics so we can internally create the
113 // signal
114 hsa_amd_signal_create_on_agent(1, 0, nullptr, agent, 0,
115 &(pkt->completion_signal));
116
117 // Write the packet to the queue
118 mlir_aie_write_pkt<hsa_agent_dispatch_packet_t>(q, packet_id, pkt);
119
120 // Ringing the doorbell
121 hsa_signal_store_screlease(q->doorbell_signal, doorbell);
122
123 // wait for packet completion
124 while (hsa_signal_wait_scacquire(pkt->completion_signal,
125 HSA_SIGNAL_CONDITION_EQ, 0, 0x80000,
126 HSA_WAIT_STATE_ACTIVE) != 0)
127 ;
128
129 // Optionally destroying the signal
130 if (destroy_signal) {
131 hsa_signal_destroy(pkt->completion_signal);
132 }
133
134 return HSA_STATUS_SUCCESS;
135}
136
137hsa_status_t mlir_aie_packet_device_init(hsa_agent_dispatch_packet_t *pkt,
138 uint32_t num_cols) {
139
140 pkt->arg[0] = 0;
141 pkt->arg[0] |= (AIR_ADDRESS_ABSOLUTE_RANGE << 48);
142 pkt->arg[0] |= ((uint64_t)num_cols << 40);
143
145 pkt->header = (HSA_PACKET_TYPE_AGENT_DISPATCH << HSA_PACKET_HEADER_TYPE);
146
147 return HSA_STATUS_SUCCESS;
148}
149#endif
150
151/// @brief Release access to the libXAIE context.
152/// @param ctx The context
154 AieRC RC = XAie_Finish(&(ctx->DevInst));
155 if (RC != XAIE_OK) {
156 printf("Failed to finish tiles.\n");
157 }
158
159#ifdef HSA_RUNTIME
160 if (ctx->cmd_queue != NULL) {
161 hsa_queue_destroy(ctx->cmd_queue);
162 }
163 hsa_shut_down();
164#endif
165 free(ctx);
166}
167
168/// @brief Initialize the device represented by the context.
169/// @param ctx The context
170/// @return Zero on success
171int mlir_aie_init_device(aie_libxaie_ctx_t *ctx, uint32_t device_id) {
172 AieRC RC = XAIE_OK;
173
174#ifdef HSA_RUNTIME
175 if (ctx == NULL) {
176 printf("[ERROR] %s: Passed context of NULL\n", __func__);
177 return -1;
178 }
179
180 // Initializing HSA
181 hsa_status_t hsa_ret = hsa_init();
182 if (hsa_ret != HSA_STATUS_SUCCESS) {
183 printf("hsa_init failed\n");
184 return -1;
185 }
186
187 // Finding all AIE HSA agents
188 hsa_status_t iterate_agents_ret = hsa_iterate_agents(
189 &get_aie_agents, reinterpret_cast<void *>(&(ctx->agents)));
190 if (iterate_agents_ret != HSA_STATUS_SUCCESS) {
191 printf("iterate_agents failed with opcode 0x%x\n", iterate_agents_ret);
192 return -1;
193 }
194
195 // Checking if the agents are empty
196 if (ctx->agents.empty()) {
197 printf("No agents found. Exiting.\n");
198 return -1;
199 }
200
201 // Iterating over memory pools to initialize our allocator
202 hsa_amd_agent_iterate_memory_pools(
203 ctx->agents.front(), get_global_mem_pool,
204 reinterpret_cast<void *>(&(ctx->global_mem_pool)));
205
206 // Creating a queue on the first agent that we see
207 hsa_queue_t *q = nullptr;
208 int aie_max_queue_size = 0;
209 hsa_agent_get_info(ctx->agents[0], HSA_AGENT_INFO_QUEUE_MAX_SIZE,
210 &aie_max_queue_size);
211
212 auto queue_create_status =
213 hsa_queue_create(ctx->agents[0], aie_max_queue_size,
214 HSA_QUEUE_TYPE_SINGLE, nullptr, nullptr, 0, 0, &q);
215
216 if (queue_create_status != HSA_STATUS_SUCCESS) {
217 printf("Failed to create queue. Exiting\n");
218 return -1;
219 }
220
221 // Initializing the device
222 uint64_t wr_idx = hsa_queue_add_write_index_relaxed(q, 1);
223 uint64_t packet_id = wr_idx % q->size;
224 hsa_agent_dispatch_packet_t shim_pkt;
225 mlir_aie_packet_device_init(&shim_pkt, 50);
226 mlir_aie_queue_dispatch_and_wait(&(ctx->agents[0]), q, packet_id, wr_idx,
227 &shim_pkt, true);
228
229 // Attaching the queue to the context so we can send more packets if needed
230 ctx->cmd_queue = q;
231
232 // Creating the sysfs path to issue read/write 32 commands
233 char sysfs_path[SYSFS_PATH_MAX + 1];
234 if (snprintf(sysfs_path, SYSFS_PATH_MAX, "/sys/class/amdair/amdair/%02u",
235 device_id) == SYSFS_PATH_MAX)
236 sysfs_path[SYSFS_PATH_MAX] = 0;
237
238 // Using the AMDAIR libxaie backend, which utilizes the AMDAIR driver
239 XAie_BackendType backend;
240 ctx->AieConfigPtr.Backend = XAIE_IO_BACKEND_AMDAIR;
241 backend = XAIE_IO_BACKEND_AMDAIR;
242 ctx->AieConfigPtr.BaseAddr = 0;
243 ctx->DevInst.IOInst = (void *)sysfs_path;
244
245#endif
246
247 RC = XAie_CfgInitialize(&(ctx->DevInst), &(ctx->AieConfigPtr));
248 if (RC != XAIE_OK) {
249 printf("Driver initialization failed.\n");
250 return -1;
251 }
252
253 // Without this special case, the simulator generates
254 // FATAL::[ xtlm::907 ] b_transport_cb is not registered with the utils
255 const XAie_Backend *Backend = ctx->DevInst.Backend;
256 if (Backend->Type != XAIE_IO_BACKEND_SIM) {
257 RC = XAie_PmRequestTiles(&(ctx->DevInst), NULL, 0);
258 if (RC != XAIE_OK) {
259 printf("Failed to request tiles.\n");
260 return -1;
261 }
262
263 // TODO Extra code to really teardown the partitions
264 RC = XAie_Finish(&(ctx->DevInst));
265 if (RC != XAIE_OK) {
266 printf("Failed to finish tiles.\n");
267 return -1;
268 }
269
270#ifdef HSA_RUNTIME
271 // Because we tear this down, need to do it again
272 ctx->AieConfigPtr.BaseAddr = 0;
273 ctx->DevInst.IOInst = (void *)sysfs_path;
274#endif
275
276 RC = XAie_CfgInitialize(&(ctx->DevInst), &(ctx->AieConfigPtr));
277 if (RC != XAIE_OK) {
278 printf("Driver initialization failed.\n");
279 return -1;
280 }
281 RC = XAie_PmRequestTiles(&(ctx->DevInst), NULL, 0);
282 if (RC != XAIE_OK) {
283 printf("Failed to request tiles.\n");
284 return -1;
285 }
286 }
287
288 if (Backend->Type == XAIE_IO_BACKEND_SIM) {
289 printf("Turning ecc off\n");
290 XAie_TurnEccOff(&(ctx->DevInst));
291 }
292
293 return 0;
294}
295
296/// @brief Acquire a physical lock
297/// @param ctx The context
298/// @param col The column of the lock
299/// @param row The row of the lock
300/// @param lockid The ID of the lock in the tile.
301/// @param lockval The value to acquire the lock with.
302/// @param timeout The number of microseconds to wait
303/// @return Return non-zero on success, i.e. the operation did not timeout.
304int mlir_aie_acquire_lock(aie_libxaie_ctx_t *ctx, int col, int row, int lockid,
305 int lockval, int timeout) {
306 return (XAie_LockAcquire(&(ctx->DevInst), XAie_TileLoc(col, row),
307 XAie_LockInit(lockid, lockval), timeout) == XAIE_OK);
308}
309
310/// @brief Release a physical lock
311/// @param ctx The context
312/// @param col The column of the lock
313/// @param row The row of the lock
314/// @param lockid The ID of the lock in the tile.
315/// @param lockval The value to acquire the lock with.
316/// @param timeout The number of microseconds to wait
317/// @return Return non-zero on success, i.e. the operation did not timeout.
318int mlir_aie_release_lock(aie_libxaie_ctx_t *ctx, int col, int row, int lockid,
319 int lockval, int timeout) {
320 return (XAie_LockRelease(&(ctx->DevInst), XAie_TileLoc(col, row),
321 XAie_LockInit(lockid, lockval), timeout) == XAIE_OK);
322}
323
324/// @brief Read the AIE configuration memory at the given physical address.
326 u32 val;
327 XAie_Read32(&(ctx->DevInst), addr, &val);
328 return val;
329}
330
331/// @brief Write the AIE configuration memory at the given physical address.
332/// It's almost always better to use some more indirect method of accessing
333/// configuration registers, but this is provided as a last resort.
334void mlir_aie_write32(aie_libxaie_ctx_t *ctx, u64 addr, u32 val) {
335 XAie_Write32(&(ctx->DevInst), addr, val);
336}
337
338/// @brief Read a value from the data memory of a particular tile memory
339/// @param addr The address in the given tile.
340/// @return The data
342 u64 addr) {
343 u32 data;
344 XAie_DataMemRdWord(&(ctx->DevInst), XAie_TileLoc(col, row), addr, &data);
345 return data;
346}
347
348/// @brief Write a value to the data memory of a particular tile memory
349/// @param addr The address in the given tile.
350/// @param data The data
352 u64 addr, u32 data) {
353 XAie_DataMemWrWord(&(ctx->DevInst), XAie_TileLoc(col, row), addr, data);
354}
355
356/// @brief Return the base address of the given tile.
357/// The configuration address space of most tiles is very similar,
358/// relative to this base address.
359u64 mlir_aie_get_tile_addr(aie_libxaie_ctx_t *ctx, int col, int row) {
360 return (((u64)row & 0xFFU) << ctx->DevInst.DevProp.RowShift) |
361 (((u64)col & 0xFFU) << ctx->DevInst.DevProp.ColShift);
362}
363
364/// @brief Dump the tile memory of the given tile
365/// Values that are zero are not shown
366void mlir_aie_dump_tile_memory(aie_libxaie_ctx_t *ctx, int col, int row) {
367 for (int i = 0; i < 0x2000; i++) {
368 uint32_t d;
369 AieRC rc = XAie_DataMemRdWord(&(ctx->DevInst), XAie_TileLoc(col, row),
370 (i * 4), &d);
371 if (rc == XAIE_OK && d != 0)
372 printf("Tile[%d][%d]: mem[%d] = %d\n", col, row, i, d);
373 }
374}
375
376/// @brief Fill the tile memory of the given tile with zeros.
377/// Values that are zero are not shown
378void mlir_aie_clear_tile_memory(aie_libxaie_ctx_t *ctx, int col, int row) {
379 for (int i = 0; i < 0x2000; i++) {
380 XAie_DataMemWrWord(&(ctx->DevInst), XAie_TileLoc(col, row), (i * 4), 0);
381 }
382}
383
384static void print_aie1_dmachannel_status(aie_libxaie_ctx_t *ctx, int col,
385 int row, const char *dmatype,
386 const char *channel, int channelNum,
387 int running, int stalled) {
388 printf("%s [%d, %d] AIE1 %s%d ", dmatype, col, row, channel, channelNum);
389 switch (running) {
390 case 0:
391 printf("IDLE ");
392 break;
393 case 1:
394 printf("STARTING ");
395 break;
396 case 2:
397 printf("RUNNING ");
398 break;
399 }
400 if (stalled) {
401 printf("Stalled on lock");
402 }
403 printf("\n");
404}
405
406static void print_aie2_dmachannel_status(aie_libxaie_ctx_t *ctx, int col,
407 int row, const char *dmatype,
408 const char *channel, int channelNum,
409 u32 statusOffset, u32 controlOffset,
410 int &current_bd) {
411 u64 tileAddr = mlir_aie_get_tile_addr(ctx, row, col);
412 u32 status, control;
413 XAie_Read32(&(ctx->DevInst), tileAddr + statusOffset, &status);
414 XAie_Read32(&(ctx->DevInst), tileAddr + controlOffset, &control);
415 u32 running = status & 0x3;
416 u32 stalled_acq = (status >> 2) & 0x1;
417 u32 stalled_rel = (status >> 3) & 0x1;
418 u32 stalled_data = (status >> 4) & 0x1;
419 u32 stalled_complete = (status >> 5) & 0x1;
420 current_bd = status >> 24;
421 printf("%s [%d, %d] AIE2 %s%d ", dmatype, col, row, channel, channelNum);
422 switch (running) {
423 case 0:
424 printf("IDLE ");
425 break;
426 case 1:
427 printf("STARTING ");
428 break;
429 case 2:
430 printf("RUNNING ");
431 break;
432 }
433 if (stalled_acq)
434 printf("Stalled on Acquire ");
435 if (stalled_rel)
436 printf("Stalled on Release ");
437 if (stalled_data)
438 printf("Stalled on Data ");
439 if (stalled_complete)
440 printf("Stalled on Completion ");
441 printf("status:%08X ctrl:%02X\n", status, control);
442};
443
444static void print_bd(int bd, int bd_valid, u32 nextBd, u32 useNextBd,
445 int isPacket, u32 packetID, u32 packetType,
446 int words_to_transfer, int base_address,
447 int acquireEnabled, u32 acquireLock, int acquireValue,
448 int releaseEnabled, u32 releaseLock, int releaseValue,
449 int s2mm_current_bd[], int mm2s_current_bd[],
450 int numchannels) {
451
452 if (bd_valid) {
453 printf("BD %d valid ", bd);
454 if (useNextBd)
455 printf("(Next BD: %d)\n", nextBd);
456 else
457 printf("(Last BD)\n");
458
459 for (int i = 0; i < numchannels; i++) {
460 if (bd == s2mm_current_bd[i]) {
461 printf(" * Current BD for s2mm channel %d\n", i);
462 }
463 if (bd == mm2s_current_bd[i]) {
464 printf(" * Current BD for mm2s channel %d\n", i);
465 }
466 }
467
468 if (isPacket) {
469 printf(" Packet ID: %02X\n", packetID);
470 printf(" Packet Type: %01X\n", packetType);
471 }
472 printf(" Transferring %d 32 bit words to/from byte address %06X\n",
473 words_to_transfer, base_address * 4);
474
475 // printf(" ");
476 // for (int w = 0; w < 7; w++) {
477 // u32 tmpd;
478 // XAie_DataMemRdWord(&(ctx->DevInst), XAie_TileLoc(col, row),
479 // (base_address + w) * 4, &tmpd);
480 // printf("%08X ", tmpd);
481 // }
482 // printf("\n");
483 if (acquireEnabled) { // acquire is enabled
484 printf(" Acquires lock %d ", acquireLock);
485 printf("with value %d\n", acquireValue);
486 }
487 if (releaseEnabled) {
488 printf(" Releases lock %d ", releaseLock);
489 printf("with value %d\n", releaseValue);
490 }
491 // printf("currently ");
492 // u32 locks;
493 // XAie_Read32(&(ctx->DevInst), tileAddr + 0x0001EF00, &locks);
494 // u32 two_bits = (locks >> (lock_id * 2)) & 0x3;
495 // if (two_bits) {
496 // u32 acquired = two_bits & 0x1;
497 // u32 value = two_bits & 0x2;
498 // if (acquired)
499 // printf("Acquired ");
500 // printf(value ? "1" : "0");
501 // } else
502 // printf("0");
503 // }
504 }
505}
506
507/// @brief Print a summary of the status of the given Tile DMA.
508void mlir_aie_print_dma_status(aie_libxaie_ctx_t *ctx, int col, int row) {
509 u64 tileAddr = mlir_aie_get_tile_addr(ctx, row, col);
510 auto TileType = ctx->DevInst.DevOps->GetTTypefromLoc(&(ctx->DevInst),
511 XAie_TileLoc(col, row));
512 assert(TileType == XAIEGBL_TILE_TYPE_AIETILE);
513
514 if (ctx->AieConfigPtr.AieGen == XAIE_DEV_GEN_AIEML) {
515 const int num_bds = 2;
516 int s2mm_current_bd[num_bds];
517 int mm2s_current_bd[num_bds];
518
519 for (int i = 0; i < num_bds; i++) {
520 print_aie2_dmachannel_status(ctx, col, row, "DMA", "s2mm", i,
521 0x0001DF00 + 4 * i, 0x0001DE00 + 8 * i,
522 s2mm_current_bd[i]);
523 u32 write_count;
524 XAie_Read32(&(ctx->DevInst), tileAddr + 0x0001D230 + (0x4 * i),
525 &write_count);
526 printf("DMA [%d, %d] s2mm%d write_count = %d\n", col, row, i,
527 write_count);
528 }
529 for (int i = 0; i < num_bds; i++)
530 print_aie2_dmachannel_status(ctx, col, row, "DMA", "mm2s", i,
531 0x0001DF10 + 4 * i, 0x0001DE10 + 8 * i,
532 mm2s_current_bd[i]);
533
534 for (int bd = 0; bd < 8; bd++) {
535 u32 dma_bd_addr;
536 u32 dma_bd_packet;
537 u32 dma_bd_control;
538 XAie_Read32(&(ctx->DevInst), tileAddr + 0x0001D000 + (0x20 * bd),
539 &dma_bd_addr);
540 XAie_Read32(&(ctx->DevInst), tileAddr + 0x0001D004 + (0x20 * bd),
541 &dma_bd_packet);
542 XAie_Read32(&(ctx->DevInst), tileAddr + 0x0001D014 + (0x20 * bd),
543 &dma_bd_control);
544
545 int bd_valid = (dma_bd_control >> 25) & 0x1;
546 u32 nextBd = ((dma_bd_control >> 27) & 0xF);
547 u32 useNextBd = ((dma_bd_control >> 26) & 0x1);
548 int isPacket = (dma_bd_packet >> 30) & 0x1;
549 u32 packetID = (dma_bd_packet >> 19) & 0x1F;
550 u32 packetType = (dma_bd_packet >> 16) & 0x7;
551 int words_to_transfer = (dma_bd_addr & 0x3FFF);
552 int base_address = dma_bd_addr >> 14;
553 int acquireEnabled = (dma_bd_control >> 12) & 0x1;
554 u32 acquireLock = dma_bd_control & 0xf;
555 int acquireValue = (((int)dma_bd_control << 20) >> 25);
556 u32 releaseLock = (dma_bd_control >> 13) & 0xf;
557 int releaseValue = (((int)dma_bd_control << 7) >> 25);
558 int releaseEnabled = releaseValue != 0;
559
560 print_bd(bd, bd_valid, nextBd, useNextBd, isPacket, packetID, packetType,
561 words_to_transfer, base_address, acquireEnabled, acquireLock,
562 acquireValue, releaseEnabled, releaseLock, releaseValue,
563 s2mm_current_bd, mm2s_current_bd, num_bds);
564 }
565 } else { // AIE1
566 u32 dma_mm2s_status;
567 XAie_Read32(&(ctx->DevInst), tileAddr + 0x0001DF10, &dma_mm2s_status);
568 u32 dma_s2mm_status;
569 XAie_Read32(&(ctx->DevInst), tileAddr + 0x0001DF00, &dma_s2mm_status);
570 u32 dma_mm2s0_control;
571 XAie_Read32(&(ctx->DevInst), tileAddr + 0x0001DE10, &dma_mm2s0_control);
572 u32 dma_mm2s1_control;
573 XAie_Read32(&(ctx->DevInst), tileAddr + 0x0001DE18, &dma_mm2s1_control);
574 u32 dma_s2mm0_control;
575 XAie_Read32(&(ctx->DevInst), tileAddr + 0x0001DE00, &dma_s2mm0_control);
576 u32 dma_s2mm1_control;
577 XAie_Read32(&(ctx->DevInst), tileAddr + 0x0001DE08, &dma_s2mm1_control);
578
579 u32 s2mm_ch0_running = dma_s2mm_status & 0x3;
580 u32 s2mm_ch1_running = (dma_s2mm_status >> 2) & 0x3;
581 u32 mm2s_ch0_running = dma_mm2s_status & 0x3;
582 u32 mm2s_ch1_running = (dma_mm2s_status >> 2) & 0x3;
583 int s2mm0_current_bd, s2mm1_current_bd;
584 int mm2s0_current_bd, mm2s1_current_bd;
585 s2mm0_current_bd = (dma_s2mm_status >> 16) & 0xf;
586 s2mm1_current_bd = (dma_s2mm_status >> 20) & 0xf;
587 mm2s0_current_bd = (dma_mm2s_status >> 16) & 0xf;
588 mm2s1_current_bd = (dma_mm2s_status >> 20) & 0xf;
589 u32 s2mm_ch0_stalled = (dma_s2mm_status >> 4) & 0x1;
590 u32 s2mm_ch1_stalled = (dma_s2mm_status >> 5) & 0x1;
591 u32 mm2s_ch0_stalled = (dma_mm2s_status >> 4) & 0x1;
592 u32 mm2s_ch1_stalled = (dma_mm2s_status >> 5) & 0x1;
593
594 printf("DMA [%d, %d] mm2s_status/0ctrl/1ctrl is %08X %02X %02X, "
595 "s2mm_status/0ctrl/1ctrl is %08X %02X %02X\n",
596 col, row, dma_mm2s_status, dma_mm2s0_control, dma_mm2s1_control,
597 dma_s2mm_status, dma_s2mm0_control, dma_s2mm1_control);
598 print_aie1_dmachannel_status(ctx, col, row, "DMA", "s2mm", 0,
599 s2mm_ch0_running, s2mm_ch0_stalled);
600 print_aie1_dmachannel_status(ctx, col, row, "DMA", "s2mm", 1,
601 s2mm_ch1_running, s2mm_ch1_stalled);
602 print_aie1_dmachannel_status(ctx, col, row, "DMA", "mm2s", 0,
603 mm2s_ch0_running, mm2s_ch0_stalled);
604 print_aie1_dmachannel_status(ctx, col, row, "DMA", "mm2s", 1,
605 mm2s_ch1_running, mm2s_ch1_stalled);
606 for (int bd = 0; bd < 8; bd++) {
607 u32 dma_bd_addr_a;
608 XAie_Read32(&(ctx->DevInst), tileAddr + 0x0001D000 + (0x20 * bd),
609 &dma_bd_addr_a);
610 u32 dma_bd_control;
611 XAie_Read32(&(ctx->DevInst), tileAddr + 0x0001D018 + (0x20 * bd),
612 &dma_bd_control);
613 // It appears that in the simulator, bd's are not initialized according to
614 // the spec, and instead the control word is all 1's.
615 if ((dma_bd_control >> 31) & 0x1 && (dma_bd_control != 0xFFFFFFFF)) {
616 printf("BD %d valid ", bd);
617 u32 nextBd = ((dma_bd_control >> 13) & 0xF);
618 u32 useNextBd = ((dma_bd_control >> 17) & 0x1);
619 if (useNextBd)
620 printf("(Next BD: %d)\n", nextBd);
621 else
622 printf("(Last BD)\n");
623
624 if (bd == s2mm0_current_bd) {
625 printf(" * Current BD for s2mm channel 0\n");
626 }
627 if (bd == s2mm1_current_bd) {
628 printf(" * Current BD for s2mm channel 1\n");
629 }
630 if (bd == mm2s0_current_bd) {
631 printf(" * Current BD for mm2s channel 0\n");
632 }
633 if (bd == mm2s1_current_bd) {
634 printf(" * Current BD for mm2s channel 1\n");
635 }
636
637 if (dma_bd_control & 0x08000000) {
638 u32 dma_packet;
639 XAie_Read32(&(ctx->DevInst), tileAddr + 0x0001D010 + (0x20 * bd),
640 &dma_packet);
641 printf(" Packet mode: %02X\n", dma_packet & 0x1F);
642 }
643 int words_to_transfer = 1 + (dma_bd_control & 0x1FFF);
644 int base_address = dma_bd_addr_a & 0x1FFF;
645 printf(" Transferring %d 32 bit words to/from byte address %06X\n",
646 words_to_transfer, base_address * 4);
647
648 printf(" ");
649 for (int w = 0; w < 7; w++) {
650 u32 tmpd;
651 XAie_DataMemRdWord(&(ctx->DevInst), XAie_TileLoc(col, row),
652 (base_address + w) * 4, &tmpd);
653 printf("%08X ", tmpd);
654 }
655 printf("\n");
656 int hasAcquire = (dma_bd_addr_a >> 18) & 0x1;
657 int hasRelease = (dma_bd_addr_a >> 21) & 0x1;
658 if (hasAcquire || hasRelease) {
659 u32 lock_id = (dma_bd_addr_a >> 22) & 0xf;
660 if (hasAcquire) {
661 printf(" Acquires lock %d ", lock_id);
662 if ((dma_bd_addr_a >> 16) & 0x1)
663 printf("with value %d ", (dma_bd_addr_a >> 17) & 0x1);
664 }
665 if (hasRelease) {
666 printf(" Releases lock %d ", lock_id);
667 if ((dma_bd_addr_a >> 19) & 0x1)
668 printf("with value %d ", (dma_bd_addr_a >> 20) & 0x1);
669 }
670
671 printf("currently ");
672 u32 locks;
673 XAie_Read32(&(ctx->DevInst), tileAddr + 0x0001EF00, &locks);
674 u32 two_bits = (locks >> (lock_id * 2)) & 0x3;
675 if (two_bits) {
676 u32 acquired = two_bits & 0x1;
677 u32 value = two_bits & 0x2;
678 if (acquired)
679 printf("Acquired ");
680 printf(value ? "1" : "0");
681 } else
682 printf("0");
683 printf("\n");
684 }
685
686 if (dma_bd_control & 0x30000000) { // FIFO MODE
687 int FIFO = (dma_bd_control >> 28) & 0x3;
688 u32 dma_fifo_counter;
689 XAie_Read32(&(ctx->DevInst), tileAddr + 0x0001DF20,
690 &dma_fifo_counter);
691 printf(" Using FIFO Cnt%d : %08X\n", FIFO, dma_fifo_counter);
692 }
693 }
694 }
695 }
696}
697
698void print_aie2_lock_status(aie_libxaie_ctx_t *ctx, int col, int row,
699 const char *type, int lockOffset, int locks) {
700 u64 tileAddr = mlir_aie_get_tile_addr(ctx, row, col);
701 printf("%s [%d, %d] AIE2 locks are: ", type, col, row);
702 int lockAddr = tileAddr + lockOffset;
703 for (int lock = 0; lock < locks; lock++) {
704 u32 val;
705 XAie_Read32(&(ctx->DevInst), lockAddr, &val);
706 printf("%X ", val);
707 lockAddr += 0x10;
708 }
709 printf("\n");
710}
711
712/// @brief Print a summary of the status of the given MemTile DMA.
714 int row) {
715 u64 tileAddr = mlir_aie_get_tile_addr(ctx, row, col);
716 auto TileType = ctx->DevInst.DevOps->GetTTypefromLoc(&(ctx->DevInst),
717 XAie_TileLoc(col, row));
718 assert(TileType == XAIEGBL_TILE_TYPE_MEMTILE);
719 assert(ctx->AieConfigPtr.AieGen == XAIE_DEV_GEN_AIEML);
720
721 int s2mm_current_bd[6];
722 int mm2s_current_bd[6];
723
724 for (int i = 0; i < 6; i++) {
725 print_aie2_dmachannel_status(ctx, col, row, "MemTileDMA", "s2mm", i,
726 0x000A0660 + 4 * i, 0x000A0600 + 8 * i,
727 s2mm_current_bd[i]);
728 u32 write_count;
729 XAie_Read32(&(ctx->DevInst), tileAddr + 0x000A06B0 + (0x4 * i),
730 &write_count);
731 printf("MemTileDMA [%d, %d] s2mm%d write_count = %d\n", col, row, i,
732 write_count);
733 }
734 for (int i = 0; i < 6; i++)
735 print_aie2_dmachannel_status(ctx, col, row, "MemTileDMA", "mm2s", i,
736 0x000A0680 + 4 * i, 0x000A0630 + 8 * i,
737 mm2s_current_bd[i]);
738
739 print_aie2_lock_status(ctx, col, row, "MemTileDMA", 0x000C0000, 64);
740
741 for (int bd = 0; bd < 8; bd++) {
742 u32 dma_bd_0;
743 u32 dma_bd_1;
744 u32 dma_bd_7;
745 XAie_Read32(&(ctx->DevInst), tileAddr + 0x000A0000 + (0x20 * bd),
746 &dma_bd_0);
747 XAie_Read32(&(ctx->DevInst), tileAddr + 0x000A0004 + (0x20 * bd),
748 &dma_bd_1);
749 XAie_Read32(&(ctx->DevInst), tileAddr + 0x000A001C + (0x20 * bd),
750 &dma_bd_7);
751
752 int bd_valid = (dma_bd_7 >> 31) & 0x1;
753 u32 nextBd = ((dma_bd_1 >> 20) & 0xF);
754 u32 useNextBd = ((dma_bd_1 >> 19) & 0x1);
755 int isPacket = (dma_bd_0 >> 31) & 0x1;
756 u32 packetID = (dma_bd_0 >> 23) & 0x1F;
757 u32 packetType = (dma_bd_0 >> 28) & 0x7;
758 int words_to_transfer = (dma_bd_0 & 0x1FFFF);
759 int base_address = dma_bd_1 & 0x7FFFF;
760 int acquireEnabled = (dma_bd_7 >> 12) & 0x1;
761 u32 acquireLock = dma_bd_7 & 0xff;
762 int acquireValue = (((int)dma_bd_7 << 17) >> 25);
763 u32 releaseLock = (dma_bd_7 >> 16) & 0xff;
764 int releaseValue = (((int)dma_bd_7 << 1) >> 25);
765 int releaseEnabled = releaseValue != 0;
766
767 print_bd(bd, bd_valid, nextBd, useNextBd, isPacket, packetID, packetType,
768 words_to_transfer, base_address, acquireEnabled, acquireLock,
769 acquireValue, releaseEnabled, releaseLock, releaseValue,
770 s2mm_current_bd, mm2s_current_bd, 6);
771 }
772}
773
774/// @brief Print a summary of the status of the given Shim DMA.
776 // int col = loc.Col;
777 // int row = loc.Row;
778 u64 tileAddr = mlir_aie_get_tile_addr(ctx, row, col);
779 auto TileType = ctx->DevInst.DevOps->GetTTypefromLoc(&(ctx->DevInst),
780 XAie_TileLoc(col, row));
781 assert(TileType == XAIEGBL_TILE_TYPE_SHIMNOC);
782
783 const int num_bds = 2;
784 int s2mm_current_bd[num_bds];
785 int mm2s_current_bd[num_bds];
786 if (ctx->AieConfigPtr.AieGen == XAIE_DEV_GEN_AIEML) {
787 for (int i = 0; i < num_bds; i++) {
788 print_aie2_dmachannel_status(ctx, col, row, "ShimDMA", "s2mm", i,
789 0x0001D220 + 4 * i, 0x0001D200 + 8 * i,
790 s2mm_current_bd[i]);
791 u32 write_count;
792 XAie_Read32(&(ctx->DevInst), tileAddr + 0x0001D230 + (0x4 * i),
793 &write_count);
794 printf("ShimDMA [%d, %d] s2mm%d write_count = %d\n", col, row, i,
795 write_count);
796 }
797 for (int i = 0; i < num_bds; i++)
798 print_aie2_dmachannel_status(ctx, col, row, "ShimDMA", "mm2s", i,
799 0x0001D228 + 4 * i, 0x0001D210 + 8 * i,
800 mm2s_current_bd[i]);
801 } else {
802 u32 dma_mm2s_status, dma_s2mm_status;
803 u32 dma_mm2s0_control, dma_mm2s1_control;
804 u32 dma_s2mm0_control, dma_s2mm1_control;
805 XAie_Read32(&(ctx->DevInst), tileAddr + 0x0001D164, &dma_mm2s_status);
806 XAie_Read32(&(ctx->DevInst), tileAddr + 0x0001D160, &dma_s2mm_status);
807 XAie_Read32(&(ctx->DevInst), tileAddr + 0x0001D150, &dma_mm2s0_control);
808 XAie_Read32(&(ctx->DevInst), tileAddr + 0x0001D158, &dma_mm2s1_control);
809 XAie_Read32(&(ctx->DevInst), tileAddr + 0x0001D140, &dma_s2mm0_control);
810 XAie_Read32(&(ctx->DevInst), tileAddr + 0x0001D148, &dma_s2mm1_control);
811
812 u32 s2mm_ch0_running = dma_s2mm_status & 0x3;
813 u32 s2mm_ch1_running = (dma_s2mm_status >> 2) & 0x3;
814 u32 mm2s_ch0_running = dma_mm2s_status & 0x3;
815 u32 mm2s_ch1_running = (dma_mm2s_status >> 2) & 0x3;
816 s2mm_current_bd[0] = (dma_s2mm_status >> 16) & 0xf;
817 s2mm_current_bd[1] = (dma_s2mm_status >> 20) & 0xf;
818 mm2s_current_bd[0] = (dma_mm2s_status >> 16) & 0xf;
819 mm2s_current_bd[1] = (dma_mm2s_status >> 20) & 0xf;
820 u32 s2mm_ch0_stalled = (dma_s2mm_status >> 4) & 0x1;
821 u32 s2mm_ch1_stalled = (dma_s2mm_status >> 5) & 0x1;
822 u32 mm2s_ch0_stalled = (dma_mm2s_status >> 4) & 0x1;
823 u32 mm2s_ch1_stalled = (dma_mm2s_status >> 5) & 0x1;
824
825 printf("ShimDMA [%d, %d] AIE1 mm2s_status/0ctrl/1ctrl is %08X %02X %02X, "
826 "s2mm_status/0ctrl/1ctrl is %08X %02X %02X\n",
827 col, row, dma_mm2s_status, dma_mm2s0_control, dma_mm2s1_control,
828 dma_s2mm_status, dma_s2mm0_control, dma_s2mm1_control);
829 print_aie1_dmachannel_status(ctx, col, row, "ShimDMA", "s2mm", 0,
830 s2mm_ch0_running, s2mm_ch0_stalled);
831 print_aie1_dmachannel_status(ctx, col, row, "ShimDMA", "s2mm", 1,
832 s2mm_ch1_running, s2mm_ch1_stalled);
833 print_aie1_dmachannel_status(ctx, col, row, "ShimDMA", "mm2s", 0,
834 mm2s_ch0_running, mm2s_ch0_stalled);
835 print_aie1_dmachannel_status(ctx, col, row, "ShimDMA", "mm2s", 1,
836 mm2s_ch1_running, mm2s_ch1_stalled);
837 }
838
839 u32 locks;
840 if (ctx->AieConfigPtr.AieGen == XAIE_DEV_GEN_AIEML) {
841 print_aie2_lock_status(ctx, col, row, "ShimDMA", 0x00014000, 16);
842 int overflowAddr = tileAddr + 0x00014120;
843 int underflowAddr = tileAddr + 0x00014128;
844 u32 overflow, underflow;
845 // XAie_Read32(&(ctx->DevInst), overflowAddr, &overflow);
846 // XAie_Read32(&(ctx->DevInst), underflowAddr, &underflow);
847 printf(" overflow?:%x underflow?:%x\n", overflow, underflow);
848 } else {
849 XAie_Read32(&(ctx->DevInst), tileAddr + 0x00014F00, &locks);
850 printf("ShimDMA [%d, %d] AIE1 locks are %08X\n", col, row, locks);
851 for (int lock = 0; lock < 16; lock++) {
852 u32 two_bits = (locks >> (lock * 2)) & 0x3;
853 if (two_bits) {
854 printf("Lock %d: ", lock);
855 u32 acquired = two_bits & 0x1;
856 u32 value = two_bits & 0x2;
857 if (acquired)
858 printf("Acquired ");
859 printf(value ? "1" : "0");
860 printf("\n");
861 }
862 }
863 }
864
865 for (int bd = 0; bd < 8; bd++) {
866 int words_to_transfer; // transfer size in 32-bit words
867 u64 base_address; // address in bytes
868 bool bd_valid;
869 int use_next_bd;
870 int next_bd;
871 int acquire_lockID, release_lockID;
872 int enable_lock_release;
873 int lock_release_val;
874 int use_release_val;
875 int enable_lock_acquire;
876 int lock_acquire_val;
877 int use_acquire_val;
878
879 if (ctx->AieConfigPtr.AieGen == XAIE_DEV_GEN_AIEML) {
880 u32 dma_bd_addr_low;
881 u32 dma_bd_buffer_length;
882 u32 dma_bd_2;
883 u32 dma_bd_7;
884 XAie_Read32(&(ctx->DevInst), tileAddr + 0x0001D000 + (0x20 * bd),
885 &dma_bd_buffer_length);
886 XAie_Read32(&(ctx->DevInst), tileAddr + 0x0001D004 + (0x20 * bd),
887 &dma_bd_addr_low);
888 XAie_Read32(&(ctx->DevInst), tileAddr + 0x0001D008 + (0x20 * bd),
889 &dma_bd_2);
890 XAie_Read32(&(ctx->DevInst), tileAddr + 0x0001D01C + (0x20 * bd),
891 &dma_bd_7);
892 // printf("test: %d %d %d %d\n", dma_bd_buffer_length, dma_bd_addr_low,
893 // dma_bd_2, dma_bd_7);
894 words_to_transfer = dma_bd_buffer_length;
895 base_address =
896 u64(dma_bd_addr_low & 0xFFFC) + (u64(dma_bd_2 & 0xFF) << 32);
897 bd_valid = (dma_bd_7 >> 25) & 0x1;
898 use_next_bd = ((dma_bd_7 >> 26) & 0x1);
899 next_bd = ((dma_bd_7 >> 27) & 0xF);
900 acquire_lockID = ((dma_bd_7 >> 0) & 0xF);
901 release_lockID = ((dma_bd_7 >> 13) & 0xF);
902 lock_release_val = (s32(dma_bd_7) << 7) >> 25; // sign extend
903 enable_lock_release = lock_release_val != 0;
904 use_release_val = 1;
905 lock_acquire_val = (s32(dma_bd_7) << 20) >> 25; // sign extend
906 enable_lock_acquire = ((dma_bd_7 >> 12) & 0x1);
907 use_acquire_val = 1;
908 } else {
909 u32 dma_bd_addr_a;
910 u32 dma_bd_buffer_length;
911 u32 dma_bd_control;
912 XAie_Read32(&(ctx->DevInst), tileAddr + 0x0001D000 + (0x14 * bd),
913 &dma_bd_addr_a);
914 XAie_Read32(&(ctx->DevInst), tileAddr + 0x0001D004 + (0x14 * bd),
915 &dma_bd_buffer_length);
916 XAie_Read32(&(ctx->DevInst), tileAddr + 0x0001D008 + (0x14 * bd),
917 &dma_bd_control);
918 words_to_transfer = dma_bd_buffer_length;
919 base_address =
920 (u64)dma_bd_addr_a + ((u64)((dma_bd_control >> 16) & 0xFFFF) << 32);
921 bd_valid = dma_bd_control & 0x1;
922 use_next_bd = ((dma_bd_control >> 15) & 0x1);
923 next_bd = ((dma_bd_control >> 11) & 0xF);
924 release_lockID = acquire_lockID = ((dma_bd_control >> 7) & 0xF);
925 enable_lock_release = ((dma_bd_control >> 6) & 0x1);
926 lock_release_val = ((dma_bd_control >> 5) & 0x1);
927 use_release_val = ((dma_bd_control >> 4) & 0x1);
928 enable_lock_acquire = ((dma_bd_control >> 3) & 0x1);
929 lock_acquire_val = ((dma_bd_control >> 2) & 0x1);
930 use_acquire_val = ((dma_bd_control >> 1) & 0x1);
931 }
932 bool isPacket = false;
933 int packetID = 0;
934 int packetType = 0;
935 print_bd(bd, bd_valid, next_bd, use_next_bd, isPacket, packetID, packetType,
936 words_to_transfer, base_address, enable_lock_acquire,
937 acquire_lockID, lock_acquire_val, enable_lock_release,
938 release_lockID, lock_release_val, s2mm_current_bd, mm2s_current_bd,
939 num_bds);
940 }
941}
942
943/// @brief Print the status of a core represented by the given tile, at the
944/// given coordinates.
945void mlir_aie_print_tile_status(aie_libxaie_ctx_t *ctx, int col, int row) {
946 // int col = loc.Col;
947 // int row = loc.Row;
948 u64 tileAddr = mlir_aie_get_tile_addr(ctx, row, col);
949 u32 status, coreTimerLow, PC, LR, SP, locks, R0, R4;
950 u32 trace_status;
951 if (ctx->AieConfigPtr.AieGen == XAIE_DEV_GEN_AIEML) {
952 XAie_Read32(&(ctx->DevInst), tileAddr + 0x032004, &status);
953 XAie_Read32(&(ctx->DevInst), tileAddr + 0x0340F8, &coreTimerLow);
954 XAie_Read32(&(ctx->DevInst), tileAddr + 0x00031100, &PC);
955 XAie_Read32(&(ctx->DevInst), tileAddr + 0x00031130, &LR);
956 XAie_Read32(&(ctx->DevInst), tileAddr + 0x00031120, &SP);
957 XAie_Read32(&(ctx->DevInst), tileAddr + 0x000340D8, &trace_status);
958
959 XAie_Read32(&(ctx->DevInst), tileAddr + 0x00030C00, &R0);
960 XAie_Read32(&(ctx->DevInst), tileAddr + 0x00030C40, &R4);
961
962 } else {
963 XAie_Read32(&(ctx->DevInst), tileAddr + 0x032004, &status);
964 XAie_Read32(&(ctx->DevInst), tileAddr + 0x0340F8, &coreTimerLow);
965 XAie_Read32(&(ctx->DevInst), tileAddr + 0x00030280, &PC);
966 XAie_Read32(&(ctx->DevInst), tileAddr + 0x000302B0, &LR);
967 XAie_Read32(&(ctx->DevInst), tileAddr + 0x000302A0, &SP);
968 XAie_Read32(&(ctx->DevInst), tileAddr + 0x000140D8, &trace_status);
969
970 XAie_Read32(&(ctx->DevInst), tileAddr + 0x00030000, &R0);
971 XAie_Read32(&(ctx->DevInst), tileAddr + 0x00030040, &R4);
972 }
973 printf("Core [%d, %d] status is %08X, timer is %u, PC is %08X"
974 ", LR is %08X, SP is %08X, R0 is %08X,R4 is %08X\n",
975 col, row, status, coreTimerLow, PC, LR, SP, R0, R4);
976 printf("Core [%d, %d] trace status is %08X\n", col, row, trace_status);
977
978 if (ctx->AieConfigPtr.AieGen == XAIE_DEV_GEN_AIEML) {
979 print_aie2_lock_status(ctx, col, row, "Core", 0x0001F000, 16);
980 } else {
981 XAie_Read32(&(ctx->DevInst), tileAddr + 0x0001EF00, &locks);
982 printf("Core [%d, %d] AIE1 locks are %08X\n", col, row, locks);
983 for (int lock = 0; lock < 16; lock++) {
984 u32 two_bits = (locks >> (lock * 2)) & 0x3;
985 if (two_bits) {
986 printf("Lock %d: ", lock);
987 u32 acquired = two_bits & 0x1;
988 u32 value = two_bits & 0x2;
989 if (acquired)
990 printf("Acquired ");
991 printf(value ? "1" : "0");
992 printf("\n");
993 }
994 }
995 }
996
997 // Note that not all strings are valid for all architectures
998 const char *core_status_strings[] = {
999 "Enabled",
1000 "In Reset",
1001 "Memory Stall S",
1002 "Memory Stall W",
1003 "Memory Stall N",
1004 "Memory Stall E",
1005 "Lock Stall S",
1006 "Lock Stall W",
1007 "Lock Stall N",
1008 "Lock Stall E",
1009 "Stream Stall SS0",
1010 "Stream Stall SS1", // AIE1 only
1011 "Stream Stall MS0",
1012 "Stream Stall MS1", // AIE1 only
1013 "Cascade Stall Slave",
1014 "Cascade Stall Master",
1015 "Debug Halt",
1016 "ECC Error",
1017 "ECC Scrubbing",
1018 "Error Halt",
1019 "Core Done",
1020 "Core Processor Bus Stall", // AIE2 only
1021 };
1022
1023 printf("Core Status: ");
1024 for (int i = 0; i <= 21; i++) {
1025 if ((status >> i) & 0x1)
1026 printf("%s ", core_status_strings[i]);
1027 }
1028 printf("\n");
1029}
1030
1031static void clear_range(XAie_DevInst *devInst, u64 tileAddr, u64 low,
1032 u64 high) {
1033 for (int i = low; i <= high; i += 4) {
1034 XAie_Write32(devInst, tileAddr + i, 0);
1035 // int x = XAie_Read32(ctx->DevInst,tileAddr+i);
1036 // if(x != 0) {
1037 // printf("@%x = %x\n", i, x);
1038 // XAie_Write32(ctx->DevInst,tileAddr+i, 0);
1039 // }
1040 }
1041}
1042
1043/// @brief Clear the configuration of the given (non-shim) tile.
1044/// This includes: clearing the program memory, data memory,
1045/// DMA descriptors, and stream switch configuration.
1046void mlir_aie_clear_config(aie_libxaie_ctx_t *ctx, int col, int row) {
1047 u64 tileAddr = mlir_aie_get_tile_addr(ctx, row, col);
1048
1049 // Put the core in reset first, otherwise bus collisions
1050 // result in arm bus errors.
1051 // TODO Check if this works
1052 XAie_CoreDisable(&(ctx->DevInst), XAie_TileLoc(col, row));
1053
1054 // Program Memory
1055 clear_range(&(ctx->DevInst), tileAddr, 0x20000, 0x200FF);
1056 // TileDMA
1057 clear_range(&(ctx->DevInst), tileAddr, 0x1D000, 0x1D1F8);
1058 XAie_Write32(&(ctx->DevInst), tileAddr + 0x1DE00, 0);
1059 XAie_Write32(&(ctx->DevInst), tileAddr + 0x1DE08, 0);
1060 XAie_Write32(&(ctx->DevInst), tileAddr + 0x1DE10, 0);
1061 XAie_Write32(&(ctx->DevInst), tileAddr + 0x1DE08, 0);
1062 // Stream Switch master config
1063 clear_range(&(ctx->DevInst), tileAddr, 0x3F000, 0x3F060);
1064 // Stream Switch slave config
1065 clear_range(&(ctx->DevInst), tileAddr, 0x3F100, 0x3F168);
1066 // Stream Switch slave slot config
1067 clear_range(&(ctx->DevInst), tileAddr, 0x3F200, 0x3F3AC);
1068
1069 // TODO Check if this works
1070 XAie_CoreEnable(&(ctx->DevInst), XAie_TileLoc(col, row));
1071}
1072
1073/// @brief Clear the configuration of the given shim tile.
1074/// This includes: clearing the program memory, data memory,
1075/// DMA descriptors, and stream switch configuration.
1076void mlir_aie_clear_shim_config(aie_libxaie_ctx_t *ctx, int col, int row) {
1077 u64 tileAddr = mlir_aie_get_tile_addr(ctx, row, col);
1078
1079 // ShimDMA
1080 clear_range(&(ctx->DevInst), tileAddr, 0x1D000, 0x1D13C);
1081 XAie_Write32(&(ctx->DevInst), tileAddr + 0x1D140, 0);
1082 XAie_Write32(&(ctx->DevInst), tileAddr + 0x1D148, 0);
1083 XAie_Write32(&(ctx->DevInst), tileAddr + 0x1D150, 0);
1084 XAie_Write32(&(ctx->DevInst), tileAddr + 0x1D158, 0);
1085
1086 // Stream Switch master config
1087 clear_range(&(ctx->DevInst), tileAddr, 0x3F000, 0x3F058);
1088 // Stream Switch slave config
1089 clear_range(&(ctx->DevInst), tileAddr, 0x3F100, 0x3F15C);
1090 // Stream Switch slave slot config
1091 clear_range(&(ctx->DevInst), tileAddr, 0x3F200, 0x3F37C);
1092}
1093
1094/*
1095 ******************************************************************************
1096 * COMMON
1097 ******************************************************************************
1098 */
1099
1100/// @brief Given an array of values, compute and print statistics about those
1101/// values.
1102/// @param performance_counter An array of values
1103/// @param n The number of values
1104void computeStats(u32 performance_counter[], int n) {
1105 u32 total_0 = 0;
1106
1107 for (int i = 0; i < n; i++) {
1108 total_0 += performance_counter[i];
1109 }
1110
1111 float mean_0 = (float)total_0 / n;
1112
1113 float sdev_0 = 0;
1114
1115 for (int i = 0; i < n; i++) {
1116 float x = (float)performance_counter[i] - mean_0;
1117 sdev_0 += x * x;
1118 }
1119
1120 sdev_0 = sqrtf(sdev_0 / n);
1121
1122 printf("Mean and Standard Devation: %f, %f \n", mean_0, sdev_0);
1123}
@ AIR_PKT_TYPE_TRANSLATE
Definition hsa_ext_air.h:87
@ AIR_PKT_TYPE_DEVICE_INITIALIZE
Definition hsa_ext_air.h:68
@ AIR_PKT_TYPE_ND_MEMCPY
Definition hsa_ext_air.h:92
#define AIR_ADDRESS_ABSOLUTE_RANGE
Definition hsa_ext_air.h:49
XAie_DevInst DevInst
Definition target.h:33
XAie_Config AieConfigPtr
Definition target.h:32
void mlir_aie_print_memtiledma_status(aie_libxaie_ctx_t *ctx, int col, int row)
Print a summary of the status of the given MemTile DMA.
int mlir_aie_acquire_lock(aie_libxaie_ctx_t *ctx, int col, int row, int lockid, int lockval, int timeout)
Acquire a physical lock.
u32 mlir_aie_data_mem_rd_word(aie_libxaie_ctx_t *ctx, int col, int row, u64 addr)
Read a value from the data memory of a particular tile memory.
void mlir_aie_clear_tile_memory(aie_libxaie_ctx_t *ctx, int col, int row)
Fill the tile memory of the given tile with zeros.
void print_aie2_lock_status(aie_libxaie_ctx_t *ctx, int col, int row, const char *type, int lockOffset, int locks)
u64 mlir_aie_get_tile_addr(aie_libxaie_ctx_t *ctx, int col, int row)
Return the base address of the given tile.
void mlir_aie_clear_shim_config(aie_libxaie_ctx_t *ctx, int col, int row)
Clear the configuration of the given shim tile.
void mlir_aie_clear_config(aie_libxaie_ctx_t *ctx, int col, int row)
Clear the configuration of the given (non-shim) tile.
void mlir_aie_print_dma_status(aie_libxaie_ctx_t *ctx, int col, int row)
Print a summary of the status of the given Tile DMA.
int mlir_aie_release_lock(aie_libxaie_ctx_t *ctx, int col, int row, int lockid, int lockval, int timeout)
Release a physical lock.
int mlir_aie_init_device(aie_libxaie_ctx_t *ctx, uint32_t device_id)
Initialize the device represented by the context.
void mlir_aie_print_shimdma_status(aie_libxaie_ctx_t *ctx, int col, int row)
Print a summary of the status of the given Shim DMA.
u32 mlir_aie_read32(aie_libxaie_ctx_t *ctx, u64 addr)
Read the AIE configuration memory at the given physical address.
void mlir_aie_dump_tile_memory(aie_libxaie_ctx_t *ctx, int col, int row)
Dump the tile memory of the given tile Values that are zero are not shown.
void computeStats(u32 performance_counter[], int n)
Given an array of values, compute and print statistics about those values.
void mlir_aie_write32(aie_libxaie_ctx_t *ctx, u64 addr, u32 val)
Write the AIE configuration memory at the given physical address.
void mlir_aie_data_mem_wr_word(aie_libxaie_ctx_t *ctx, int col, int row, u64 addr, u32 data)
Write a value to the data memory of a particular tile memory.
void mlir_aie_deinit_libxaie(aie_libxaie_ctx_t *ctx)
Release access to the libXAIE context.
void mlir_aie_print_tile_status(aie_libxaie_ctx_t *ctx, int col, int row)
Print the status of a core represented by the given tile, at the given coordinates.
#define SYSFS_PATH_MAX