53 void (*init_bufIn2)(T2 *, int),
void (*init_bufOut)(T3 *, int),
63 std::cout <<
"Sequence instr count: " << instr_v.size() <<
"\n";
73 auto bo_instr = xrt::bo(device, instr_v.size() *
sizeof(
int),
74 XCL_BO_FLAGS_CACHEABLE,
kernel.group_id(1));
75 auto bo_in1 = xrt::bo(device, IN1_VOLUME *
sizeof(T1), XRT_BO_FLAGS_HOST_ONLY,
77 auto bo_in2 = xrt::bo(device, IN2_VOLUME *
sizeof(T2), XRT_BO_FLAGS_HOST_ONLY,
79 auto bo_out = xrt::bo(device, OUT_VOLUME *
sizeof(T3), XRT_BO_FLAGS_HOST_ONLY,
84 auto bo_trace = xrt::bo(device, tmp_trace_size, XRT_BO_FLAGS_HOST_ONLY,
88 std::cout <<
"Writing data into buffer objects.\n";
91 void *bufInstr = bo_instr.map<
void *>();
92 memcpy(bufInstr, instr_v.data(), instr_v.size() *
sizeof(
int));
95 T1 *bufIn1 = bo_in1.map<T1 *>();
96 T2 *bufIn2 = bo_in2.map<T2 *>();
97 T3 *bufOut = bo_out.map<T3 *>();
98 char *bufTrace = bo_trace.map<
char *>();
100 init_bufIn1(bufIn1, IN1_VOLUME);
101 init_bufIn2(bufIn2, IN2_VOLUME);
102 init_bufOut(bufOut, OUT_VOLUME);
108 bo_instr.sync(XCL_BO_SYNC_BO_TO_DEVICE);
109 bo_in1.sync(XCL_BO_SYNC_BO_TO_DEVICE);
110 bo_in2.sync(XCL_BO_SYNC_BO_TO_DEVICE);
111 bo_out.sync(XCL_BO_SYNC_BO_TO_DEVICE);
113 bo_trace.sync(XCL_BO_SYNC_BO_TO_DEVICE);
119 float npu_time_total = 0;
120 float npu_time_min = 9999999;
121 float npu_time_max = 0;
128 for (
unsigned iter = 0; iter < num_iter; iter++) {
131 std::cout <<
"Running Kernel.\n";
135 std::cout <<
"Running Kernel.\n";
136 auto start = std::chrono::high_resolution_clock::now();
137 unsigned int opcode = 3;
138 auto run =
kernel(opcode, bo_instr, instr_v.size(), bo_in1, bo_in2, bo_out,
141 auto stop = std::chrono::high_resolution_clock::now();
142 bo_out.sync(XCL_BO_SYNC_BO_FROM_DEVICE);
144 bo_trace.sync(XCL_BO_SYNC_BO_FROM_DEVICE);
153 std::cout <<
"Verifying results ..." << std::endl;
155 auto vstart = std::chrono::system_clock::now();
158 verify_results(bufIn1, bufIn2, bufOut, IN1_VOLUME, myargs.
verbosity);
160 auto vstop = std::chrono::system_clock::now();
162 std::chrono::duration_cast<std::chrono::seconds>(vstop - vstart)
165 std::cout <<
"Verify time: " << vtime <<
"secs." << std::endl;
168 std::cout <<
"WARNING: results not verified." << std::endl;
179 std::chrono::duration_cast<std::chrono::microseconds>(stop - start)
182 npu_time_total += npu_time;
183 npu_time_min = (npu_time < npu_time_min) ? npu_time : npu_time_min;
184 npu_time_max = (npu_time > npu_time_max) ? npu_time : npu_time_max;
194 std::cout << std::endl
195 <<
"Avg NPU time: " << npu_time_total / myargs.
n_iterations <<
"us."
198 std::cout <<
"Avg NPU gflops: "
202 std::cout << std::endl
203 <<
"Min NPU time: " << npu_time_min <<
"us." << std::endl;
205 std::cout <<
"Max NPU gflops: " << macs / (1000 * npu_time_min)
208 std::cout << std::endl
209 <<
"Max NPU time: " << npu_time_max <<
"us." << std::endl;
211 std::cout <<
"Min NPU gflops: " << macs / (1000 * npu_time_max)
215 std::cout <<
"\nPASS!\n\n";
218 std::cout <<
"\nError count: " << errors <<
"\n\n";
219 std::cout <<
"\nFailed.\n\n";
239 std::cout <<
"Sequence instr count: " << instr_v.size() <<
"\n";
249 auto bo_instr = xrt::bo(device, instr_v.size() *
sizeof(
int),
250 XCL_BO_FLAGS_CACHEABLE,
kernel.group_id(1));
251 auto bo_in1 = xrt::bo(device, IN1_VOLUME *
sizeof(T1), XRT_BO_FLAGS_HOST_ONLY,
253 auto bo_out = xrt::bo(device, OUT_VOLUME *
sizeof(T3), XRT_BO_FLAGS_HOST_ONLY,
257 auto bo_trace = xrt::bo(device, tmp_trace_size, XRT_BO_FLAGS_HOST_ONLY,
261 std::cout <<
"Writing data into buffer objects.\n";
264 void *bufInstr = bo_instr.map<
void *>();
265 memcpy(bufInstr, instr_v.data(), instr_v.size() *
sizeof(
int));
268 T1 *bufIn1 = bo_in1.map<T1 *>();
269 T3 *bufOut = bo_out.map<T3 *>();
270 char *bufTrace = bo_trace.map<
char *>();
272 init_bufIn1(bufIn1, IN1_VOLUME);
279 bo_instr.sync(XCL_BO_SYNC_BO_TO_DEVICE);
280 bo_in1.sync(XCL_BO_SYNC_BO_TO_DEVICE);
281 bo_out.sync(XCL_BO_SYNC_BO_TO_DEVICE);
283 bo_trace.sync(XCL_BO_SYNC_BO_TO_DEVICE);
289 float npu_time_total = 0;
290 float npu_time_min = 9999999;
291 float npu_time_max = 0;
298 for (
unsigned iter = 0; iter < num_iter; iter++) {
301 std::cout <<
"Running Kernel.\n";
305 std::cout <<
"Running Kernel.\n";
306 auto start = std::chrono::high_resolution_clock::now();
307 unsigned int opcode = 3;
309 auto run =
kernel(opcode, bo_instr, instr_v.size(), bo_in1, bo_out, 0, 0,
312 auto stop = std::chrono::high_resolution_clock::now();
313 bo_out.sync(XCL_BO_SYNC_BO_FROM_DEVICE);
315 bo_trace.sync(XCL_BO_SYNC_BO_FROM_DEVICE);
324 std::cout <<
"Verifying results ..." << std::endl;
326 auto vstart = std::chrono::system_clock::now();
328 errors += verify_results(bufIn1, bufOut, IN1_VOLUME, myargs.
verbosity);
330 auto vstop = std::chrono::system_clock::now();
332 std::chrono::duration_cast<std::chrono::seconds>(vstop - vstart)
335 std::cout <<
"Verify time: " << vtime <<
"secs." << std::endl;
338 std::cout <<
"WARNING: results not verified." << std::endl;
345 std::cout <<
"Writing trace of size " << myargs.
trace_size << std::endl;
353 std::chrono::duration_cast<std::chrono::microseconds>(stop - start)
356 npu_time_total += npu_time;
357 npu_time_min = (npu_time < npu_time_min) ? npu_time : npu_time_min;
358 npu_time_max = (npu_time > npu_time_max) ? npu_time : npu_time_max;
368 std::cout << std::endl
369 <<
"Avg NPU time: " << npu_time_total / myargs.
n_iterations <<
"us."
372 std::cout <<
"Avg NPU gflops: "
376 std::cout << std::endl
377 <<
"Min NPU time: " << npu_time_min <<
"us." << std::endl;
379 std::cout <<
"Max NPU gflops: " << macs / (1000 * npu_time_min)
382 std::cout << std::endl
383 <<
"Max NPU time: " << npu_time_max <<
"us." << std::endl;
385 std::cout <<
"Min NPU gflops: " << macs / (1000 * npu_time_max)
389 std::cout <<
"\nPASS!\n\n";
392 std::cout <<
"\nError count: " << errors <<
"\n\n";
393 std::cout <<
"\nFailed.\n\n";