60 std::cout <<
"Sequence instr count: " << instr_v.size() <<
"\n";
70 auto bo_instr = xrt::bo(device, instr_v.size() *
sizeof(
int),
71 XCL_BO_FLAGS_CACHEABLE,
kernel.group_id(1));
72 auto bo_in1 = xrt::bo(device, IN1_VOLUME *
sizeof(T1), XRT_BO_FLAGS_HOST_ONLY,
74 auto bo_in2 = xrt::bo(device, IN2_VOLUME *
sizeof(T2), XRT_BO_FLAGS_HOST_ONLY,
76 auto bo_out = xrt::bo(device, OUT_VOLUME *
sizeof(T3), XRT_BO_FLAGS_HOST_ONLY,
80 auto bo_tmp1 = xrt::bo(device, 4, XRT_BO_FLAGS_HOST_ONLY,
kernel.group_id(6));
85 auto bo_trace = xrt::bo(device, tmp_trace_size, XRT_BO_FLAGS_HOST_ONLY,
89 std::cout <<
"Writing data into buffer objects.\n";
92 void *bufInstr = bo_instr.map<
void *>();
93 memcpy(bufInstr, instr_v.data(), instr_v.size() *
sizeof(
int));
96 T1 *bufIn1 = bo_in1.map<T1 *>();
97 T2 *bufIn2 = bo_in2.map<T2 *>();
98 T3 *bufOut = bo_out.map<T3 *>();
99 char *bufTrace = bo_trace.map<
char *>();
101 init_bufIn1(bufIn1, IN1_VOLUME);
102 init_bufIn2(bufIn2, IN2_VOLUME);
103 init_bufOut(bufOut, OUT_VOLUME);
105 char *bufTmp1 = bo_tmp1.map<
char *>();
106 memset(bufTmp1, 0, 4);
112 bo_instr.sync(XCL_BO_SYNC_BO_TO_DEVICE);
113 bo_in1.sync(XCL_BO_SYNC_BO_TO_DEVICE);
114 bo_in2.sync(XCL_BO_SYNC_BO_TO_DEVICE);
115 bo_out.sync(XCL_BO_SYNC_BO_TO_DEVICE);
116 bo_tmp1.sync(XCL_BO_SYNC_BO_TO_DEVICE);
118 bo_trace.sync(XCL_BO_SYNC_BO_TO_DEVICE);
124 float npu_time_total = 0;
125 float npu_time_min = 9999999;
126 float npu_time_max = 0;
133 for (
unsigned iter = 0; iter < num_iter; iter++) {
136 std::cout <<
"Running Kernel.\n";
140 std::cout <<
"Running Kernel.\n";
141 auto start = std::chrono::high_resolution_clock::now();
142 unsigned int opcode = 3;
143 auto run =
kernel(opcode, bo_instr, instr_v.size(), bo_in1, bo_in2, bo_out,
146 auto stop = std::chrono::high_resolution_clock::now();
147 bo_out.sync(XCL_BO_SYNC_BO_FROM_DEVICE);
149 bo_trace.sync(XCL_BO_SYNC_BO_FROM_DEVICE);
158 std::cout <<
"Verifying results ..." << std::endl;
160 auto vstart = std::chrono::system_clock::now();
163 verify_results(bufIn1, bufIn2, bufOut, IN1_VOLUME, myargs.
verbosity);
165 auto vstop = std::chrono::system_clock::now();
167 std::chrono::duration_cast<std::chrono::seconds>(vstop - vstart)
170 std::cout <<
"Verify time: " << vtime <<
"secs." << std::endl;
173 std::cout <<
"WARNING: results not verified." << std::endl;
184 std::chrono::duration_cast<std::chrono::microseconds>(stop - start)
187 npu_time_total += npu_time;
188 npu_time_min = (npu_time < npu_time_min) ? npu_time : npu_time_min;
189 npu_time_max = (npu_time > npu_time_max) ? npu_time : npu_time_max;
199 std::cout << std::endl
200 <<
"Avg NPU time: " << npu_time_total / myargs.
n_iterations <<
"us."
203 std::cout <<
"Avg NPU gflops: "
207 std::cout << std::endl
208 <<
"Min NPU time: " << npu_time_min <<
"us." << std::endl;
210 std::cout <<
"Max NPU gflops: " << macs / (1000 * npu_time_min)
213 std::cout << std::endl
214 <<
"Max NPU time: " << npu_time_max <<
"us." << std::endl;
216 std::cout <<
"Min NPU gflops: " << macs / (1000 * npu_time_max)
220 std::cout <<
"\nPASS!\n\n";
223 std::cout <<
"\nError count: " << errors <<
"\n\n";
224 std::cout <<
"\nFailed.\n\n";
244 std::cout <<
"Sequence instr count: " << instr_v.size() <<
"\n";
254 auto bo_instr = xrt::bo(device, instr_v.size() *
sizeof(
int),
255 XCL_BO_FLAGS_CACHEABLE,
kernel.group_id(1));
256 auto bo_in1 = xrt::bo(device, IN1_VOLUME *
sizeof(T1), XRT_BO_FLAGS_HOST_ONLY,
258 auto bo_out = xrt::bo(device, OUT_VOLUME *
sizeof(T3), XRT_BO_FLAGS_HOST_ONLY,
262 auto bo_tmp1 = xrt::bo(device, 1, XRT_BO_FLAGS_HOST_ONLY,
kernel.group_id(5));
263 auto bo_tmp2 = xrt::bo(device, 1, XRT_BO_FLAGS_HOST_ONLY,
kernel.group_id(6));
268 auto bo_trace = xrt::bo(device, tmp_trace_size, XRT_BO_FLAGS_HOST_ONLY,
272 std::cout <<
"Writing data into buffer objects.\n";
275 void *bufInstr = bo_instr.map<
void *>();
276 memcpy(bufInstr, instr_v.data(), instr_v.size() *
sizeof(
int));
279 T1 *bufIn1 = bo_in1.map<T1 *>();
280 T3 *bufOut = bo_out.map<T3 *>();
282 char *bufTrace = bo_trace.map<
char *>();
284 init_bufIn1(bufIn1, IN1_VOLUME);
291 bo_instr.sync(XCL_BO_SYNC_BO_TO_DEVICE);
292 bo_in1.sync(XCL_BO_SYNC_BO_TO_DEVICE);
293 bo_out.sync(XCL_BO_SYNC_BO_TO_DEVICE);
295 bo_trace.sync(XCL_BO_SYNC_BO_TO_DEVICE);
301 float npu_time_total = 0;
302 float npu_time_min = 9999999;
303 float npu_time_max = 0;
310 for (
unsigned iter = 0; iter < num_iter; iter++) {
313 std::cout <<
"Running Kernel.\n";
317 std::cout <<
"Running Kernel.\n";
318 auto start = std::chrono::high_resolution_clock::now();
319 unsigned int opcode = 3;
322 auto run =
kernel(opcode, bo_instr, instr_v.size(), bo_in1, bo_out, bo_tmp1,
325 auto stop = std::chrono::high_resolution_clock::now();
326 bo_out.sync(XCL_BO_SYNC_BO_FROM_DEVICE);
328 bo_trace.sync(XCL_BO_SYNC_BO_FROM_DEVICE);
337 std::cout <<
"Verifying results ..." << std::endl;
339 auto vstart = std::chrono::system_clock::now();
341 errors += verify_results(bufIn1, bufOut, IN1_VOLUME, myargs.
verbosity);
343 auto vstop = std::chrono::system_clock::now();
345 std::chrono::duration_cast<std::chrono::seconds>(vstop - vstart)
348 std::cout <<
"Verify time: " << vtime <<
"secs." << std::endl;
351 std::cout <<
"WARNING: results not verified." << std::endl;
358 std::cout <<
"Writing trace of size " << myargs.
trace_size << std::endl;
366 std::chrono::duration_cast<std::chrono::microseconds>(stop - start)
369 npu_time_total += npu_time;
370 npu_time_min = (npu_time < npu_time_min) ? npu_time : npu_time_min;
371 npu_time_max = (npu_time > npu_time_max) ? npu_time : npu_time_max;
381 std::cout << std::endl
382 <<
"Avg NPU time: " << npu_time_total / myargs.
n_iterations <<
"us."
385 std::cout <<
"Avg NPU gflops: "
389 std::cout << std::endl
390 <<
"Min NPU time: " << npu_time_min <<
"us." << std::endl;
392 std::cout <<
"Max NPU gflops: " << macs / (1000 * npu_time_min)
395 std::cout << std::endl
396 <<
"Max NPU time: " << npu_time_max <<
"us." << std::endl;
398 std::cout <<
"Min NPU gflops: " << macs / (1000 * npu_time_max)
402 std::cout <<
"\nPASS!\n\n";
405 std::cout <<
"\nError count: " << errors <<
"\n\n";
406 std::cout <<
"\nFailed.\n\n";