mirror of
https://github.com/fumiama/base16384-sycl.git
synced 2026-06-05 00:32:49 +08:00
feat(test): add result comparison to basic
Run on i7-14700K: ``` > .\tests\basic.exe 执行设备: Intel(R) UHD Graphics 770 设备类型: GPU Intel GPU 特性: XeCore 数量: 4 每个 XeCore 的向量引擎数: 8 向量引擎总数: 32 每个 XeCore 的硬件线程数: 56 每个向量引擎的硬件线程数: 7 硬件线程总数: 224 GPU 内存大小: 31712935936 B (29.53 GB) 每个工作组的共享本地内存: 65536 B 最大工作组大小: 512 支持的子组大小: 8 16 32 推荐选择子组大小: 8 100% 占用率工作组大小: 256 CPU (2944.7 ms): 54 85 110 101 0 102 101 101 85 86 85 86 110 110 110 206 94 110 104 198 110 102 102 206 94 110 110 94 104 94 206 102 101 102 94 54 101 110 254 86 86 104 102 198 104 110 0 104 110 86 0 110 110 198 110 110 94 85 110 0 254 101 101 101... GPU 基本并行 (471.7 ms): 54 85 110 101 0 102 101 101 85 86 85 86 110 110 110 206 94 110 104 198 110 102 102 206 94 110 110 94 104 94 206 102 101 102 94 54 101 110 254 86 86 104 102 198 104 110 0 104 110 86 0 110 110 198 110 110 94 85 110 0 254 101 101 101... GPU 高级并行 (448.2 ms): 54 85 110 101 0 102 101 101 85 86 85 86 110 110 110 206 94 110 104 198 110 102 102 206 94 110 110 94 104 94 206 102 101 102 94 54 101 110 254 86 86 104 102 198 104 110 0 104 110 86 0 110 110 198 110 110 94 85 110 0 254 101 101 101... ```
This commit is contained in:
@@ -1,14 +1,14 @@
|
||||
#ifndef _TEST_KERNELS_H_
|
||||
#define _TEST_KERNELS_H_
|
||||
|
||||
#include <stdint.h>
|
||||
#include <iostream>
|
||||
|
||||
namespace base16384 {
|
||||
class test {
|
||||
public:
|
||||
// base16384_test_kernels_basic is a demo calculation that implements
|
||||
// mod, bit, plus and mul calculations.
|
||||
SYCL_EXTERNAL static uint8_t kernels_basic(uint8_t in);
|
||||
SYCL_EXTERNAL static std::uint8_t kernels_basic(uint8_t in);
|
||||
};
|
||||
} // namespace base16384
|
||||
|
||||
|
||||
@@ -1,10 +1,9 @@
|
||||
#include <stdint.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <sycl/sycl.hpp>
|
||||
|
||||
#include "test.hpp"
|
||||
|
||||
SYCL_EXTERNAL uint8_t base16384::test::kernels_basic(uint8_t in) {
|
||||
SYCL_EXTERNAL std::uint8_t base16384::test::kernels_basic(uint8_t in) {
|
||||
in *= in;
|
||||
in %= 251;
|
||||
in ^= in >> 2;
|
||||
|
||||
@@ -100,6 +100,17 @@ int main() {
|
||||
for (int i = 0; i < std::min(N, 64); i++) std::cout << " " << static_cast<int>(data[i]);
|
||||
std::cout << "..." << std::endl;
|
||||
|
||||
// Verify GPU basic parallel result
|
||||
for (int i = 0; i < N; i++) {
|
||||
if (data[i] != cpu_data[i]) {
|
||||
std::cerr << "GPU 基本并行结果验证失败:位置 " << i << " 期望值 "
|
||||
<< static_cast<int>(cpu_data[i]) << " 实际值 " << static_cast<int>(data[i])
|
||||
<< std::endl;
|
||||
sycl::free(data, q);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
std::copy(initial_data.cbegin(), initial_data.cend(), data);
|
||||
|
||||
start_time = std::chrono::high_resolution_clock::now();
|
||||
@@ -122,6 +133,17 @@ int main() {
|
||||
for (int i = 0; i < std::min(N, 64); i++) std::cout << " " << static_cast<int>(data[i]);
|
||||
std::cout << "..." << std::endl;
|
||||
|
||||
// Verify GPU advanced parallel result
|
||||
for (int i = 0; i < N; i++) {
|
||||
if (data[i] != cpu_data[i]) {
|
||||
std::cerr << "GPU 高级并行结果验证失败:位置 " << i << " 期望值 "
|
||||
<< static_cast<int>(cpu_data[i]) << " 实际值 " << static_cast<int>(data[i])
|
||||
<< std::endl;
|
||||
sycl::free(data, q);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
sycl::free(data, q);
|
||||
|
||||
return 0;
|
||||
|
||||
Reference in New Issue
Block a user