mirror of
https://github.com/fumiama/base16384-sycl.git
synced 2026-06-05 00:32:49 +08:00
doc: add README&LICENSE
This commit is contained in:
@@ -1,3 +1,4 @@
|
||||
#include <chrono>
|
||||
#include <iostream>
|
||||
#include <sycl/sycl.hpp>
|
||||
#include <vector>
|
||||
@@ -7,7 +8,8 @@
|
||||
|
||||
#include "errors.hpp"
|
||||
|
||||
static const int N = 4;
|
||||
static const int N = 65536;
|
||||
static const int work_group_size = 64;
|
||||
|
||||
int main() {
|
||||
#ifdef _WIN32
|
||||
@@ -28,20 +30,48 @@ int main() {
|
||||
std::cout << "其他" << std::endl;
|
||||
}
|
||||
|
||||
// CPU baseline test
|
||||
std::vector<int> cpu_data(N);
|
||||
for (int i = 0; i < N; i++) cpu_data[i] = i;
|
||||
|
||||
auto start_time = std::chrono::high_resolution_clock::now();
|
||||
for (int i = 0; i < N; i++) cpu_data[i] *= 2;
|
||||
auto end_time = std::chrono::high_resolution_clock::now();
|
||||
auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time);
|
||||
|
||||
std::cout << "CPU (" << duration.count() << " us):" << std::endl;
|
||||
for (int i = 0; i < min(N, 64); i++) std::cout << " " << cpu_data[i];
|
||||
std::cout << "..." << std::endl;
|
||||
|
||||
int *data = sycl::malloc_shared<int>(N, q);
|
||||
for (int i = 0; i < N; i++) data[i] = i;
|
||||
|
||||
auto errn = failed([&]() {
|
||||
q.parallel_for(sycl::range<1>(1), [=](sycl::id<1>) {
|
||||
for (int i = 0; i < N; i++) {
|
||||
data[i] *= 2;
|
||||
}
|
||||
}).wait();
|
||||
});
|
||||
|
||||
// test basic parallel kernel
|
||||
start_time = std::chrono::high_resolution_clock::now();
|
||||
auto errn = base16384_try_failed(
|
||||
[&]() { q.parallel_for(sycl::range<1>(N), [=](sycl::id<1> i) { data[i] *= 2; }).wait(); });
|
||||
end_time = std::chrono::high_resolution_clock::now();
|
||||
duration = std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time);
|
||||
if (errn) return errn;
|
||||
|
||||
for (int i = 0; i < N; i++) std::cout << data[i] << std::endl;
|
||||
std::cout << "GPU基本并行 (" << duration.count() << " us):" << std::endl;
|
||||
for (int i = 0; i < min(N, 64); i++) std::cout << " " << data[i];
|
||||
std::cout << "..." << std::endl;
|
||||
|
||||
start_time = std::chrono::high_resolution_clock::now();
|
||||
errn = base16384_try_failed([&]() {
|
||||
q.parallel_for(sycl::nd_range<1>(N, work_group_size), [=](sycl::nd_item<1> item) {
|
||||
int i = item.get_global_id(0);
|
||||
data[i] /= 2;
|
||||
}).wait();
|
||||
});
|
||||
end_time = std::chrono::high_resolution_clock::now();
|
||||
duration = std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time);
|
||||
if (errn) return errn;
|
||||
|
||||
std::cout << "GPU高级并行 (" << duration.count() << " us):" << std::endl;
|
||||
for (int i = 0; i < min(N, 64); i++) std::cout << " " << data[i];
|
||||
std::cout << "..." << std::endl;
|
||||
|
||||
sycl::free(data, q);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user