diff --git a/CMakeLists.txt b/CMakeLists.txt index c2710f7..d768890 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -73,6 +73,7 @@ message(STATUS "Collected libs: ${B14LIBS}") enable_testing() add_subdirectory(tests) message(STATUS "Collected tests: ${B14TESTS}") + foreach(TARGET_NAME ${B14TESTS}) target_link_libraries(${TARGET_NAME} ${B14LIBS}) endforeach() diff --git a/include/test.hpp b/include/test.hpp new file mode 100644 index 0000000..7667032 --- /dev/null +++ b/include/test.hpp @@ -0,0 +1,15 @@ +#ifndef _TEST_KERNELS_H_ +#define _TEST_KERNELS_H_ + +#include + +namespace base16384 { +class test { + public: + // base16384_test_kernels_basic is a demo calculation that implements + // mod, bit, plus and mul calculations. + SYCL_EXTERNAL static uint8_t kernels_basic(uint8_t in); +}; +} // namespace base16384 + +#endif \ No newline at end of file diff --git a/include/xeinfo.hpp b/include/xeinfo.hpp index 11e8968..9beb080 100644 --- a/include/xeinfo.hpp +++ b/include/xeinfo.hpp @@ -1,6 +1,9 @@ #ifndef _XEINFO_HPP_ #define _XEINFO_HPP_ +#include + +#include #include #include #include @@ -59,7 +62,7 @@ class xeinfo { const int num_subslices_per_slice; const int num_eus_per_subslice; const int num_threads_per_eu; - const int global_mem_size; + const uint64_t global_mem_size; const int local_mem_size; const int max_work_group_size; const std::vector sub_group_sizes; @@ -84,8 +87,9 @@ class xeinfo { builder << " 每个 XeCore 的硬件线程数: " << num_thread_per_xecore << "\n"; builder << " 每个向量引擎的硬件线程数: " << num_threads_per_eu << "\n"; builder << " 硬件线程总数: " << total_hardware_threads << "\n"; - builder << " GPU 内存大小: " << global_mem_size << " 字节\n"; - builder << " 每个工作组的共享本地内存: " << local_mem_size << " 字节\n"; + builder << " GPU 内存大小: " << global_mem_size << " B (" << std::fixed << std::setprecision(2) + << (double)global_mem_size / 1024 / 1024 / 1024 << " GB)\n"; + builder << " 每个工作组的共享本地内存: " << local_mem_size << " B\n"; builder << " 最大工作组大小: " << max_work_group_size << "\n"; builder << " 支持的子组大小:"; for (size_t i = 0; i < sub_group_sizes.size(); i++) builder << " " << sub_group_sizes[i]; diff --git a/libs/CMakeLists.txt b/libs/CMakeLists.txt index 9cba6ed..057638a 100644 --- a/libs/CMakeLists.txt +++ b/libs/CMakeLists.txt @@ -5,12 +5,12 @@ set(LOCAL_B14LIBS "") foreach(CPP_FILE ${CPP_FILES}) # name without .cpp get_filename_component(TARGET_NAME ${CPP_FILE} NAME_WE) - message(STATUS "Add lib: ${TARGET_NAME}") + message(STATUS "Add CPP lib: ${TARGET_NAME}") add_library(${TARGET_NAME} STATIC ${CPP_FILE}) set_target_properties(${TARGET_NAME} PROPERTIES COMPILE_FLAGS "${COMPILE_FLAGS}") set_target_properties(${TARGET_NAME} PROPERTIES LINK_FLAGS "${LINK_FLAGS}") - + list(APPEND LOCAL_B14LIBS ${TARGET_NAME}) endforeach() diff --git a/libs/test_kernels.cpp b/libs/test_kernels.cpp new file mode 100644 index 0000000..94d9a73 --- /dev/null +++ b/libs/test_kernels.cpp @@ -0,0 +1,14 @@ +#include + +#include + +#include "test.hpp" + +SYCL_EXTERNAL uint8_t base16384::test::kernels_basic(uint8_t in) { + in *= in; + in %= 251; + in ^= in >> 2; + in += 17; + in *= 3; + return in ^ (in << 1); +} \ No newline at end of file diff --git a/tests/basic.cpp b/tests/basic.cpp index 64f272d..7b2a4ef 100644 --- a/tests/basic.cpp +++ b/tests/basic.cpp @@ -14,6 +14,7 @@ #include #include "errors.hpp" +#include "test.hpp" #include "xeinfo.hpp" constexpr int iter_count = 65536; @@ -67,13 +68,7 @@ int main() { auto start_time = std::chrono::high_resolution_clock::now(); for (int j = 0; j < iter_count; j++) { for (auto& byte : cpu_data) { - // 复杂计算:多步数学运算组合 - uint8_t temp = byte; - temp = (temp * temp) % 251; // 使用质数避免快速收敛 - temp = temp ^ (temp >> 2); // 位运算 - temp = (temp + 17) % 256; // 加法和模运算 - temp = temp * 3 % 256; // 乘法 - byte = temp ^ (temp << 1); // 最终位运算 + byte = base16384::test::kernels_basic(byte); } } auto end_time = std::chrono::high_resolution_clock::now(); @@ -91,15 +86,8 @@ int main() { start_time = std::chrono::high_resolution_clock::now(); auto errn = base16384::errors::try_failed([&]() { for (int j = 0; j < iter_count; j++) { - q.parallel_for(sycl::range<1>(N), [=](sycl::id<1> i) { - // 复杂计算:多步数学运算组合 - uint8_t temp = data[i]; - temp = (temp * temp) % 251; // 使用质数避免快速收敛 - temp = temp ^ (temp >> 2); // 位运算 - temp = (temp + 17) % 256; // 加法和模运算 - temp = temp * 3 % 256; // 乘法 - data[i] = temp ^ (temp << 1); // 最终位运算 - }); + q.parallel_for(sycl::range<1>(N), + [=](sycl::id<1> i) { data[i] = base16384::test::kernels_basic(data[i]); }); } q.wait(); }); @@ -120,13 +108,7 @@ int main() { q.parallel_for(sycl::nd_range<1>(N, work_group_size), [=](sycl::nd_item<1> item) { // sub-group size const auto i = item.get_global_id(0); - // 复杂计算:多步数学运算组合 - uint8_t temp = data[i]; - temp = (temp * temp) % 251; // 使用质数避免快速收敛 - temp = temp ^ (temp >> 2); // 位运算 - temp = (temp + 17) % 256; // 加法和模运算 - temp = temp * 3 % 256; // 乘法 - data[i] = temp ^ (temp << 1); // 最终位运算 + data[i] = base16384::test::kernels_basic(data[i]); }); } q.wait();