From 25cb3b974115268a542e4d46fe6a048f6f5818cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=BA=90=E6=96=87=E9=9B=A8?= <41315874+fumiama@users.noreply.github.com> Date: Wed, 25 Mar 2026 00:25:24 +0800 Subject: [PATCH] feat(example): impl. vadd --- .gitignore | 4 +- cmd/examples/gemm/main.go | 15 --- cmd/examples/vadd/.gitignore | 1 + cmd/examples/vadd/main.cpp | 9 ++ cmd/examples/vadd/main.go | 186 +++++++++++++++++++++++++++++++++++ cmd/examples/vadd/main.ll | 52 ++++++++++ cmd/examples/vadd/main.spv | Bin 0 -> 924 bytes cmd/func2kernel/main.go | 28 ++++++ internal/zecall/generic.go | 5 +- ze/command.go | 91 +++++++++++++++++ ze/context.go | 21 ++++ ze/device.go | 21 ++++ ze/init.go | 15 ++- ze/kernel.go | 41 ++++++++ ze/mem.go | 35 +++++++ ze/module.go | 31 ++++++ 16 files changed, 531 insertions(+), 24 deletions(-) delete mode 100644 cmd/examples/gemm/main.go create mode 100644 cmd/examples/vadd/.gitignore create mode 100644 cmd/examples/vadd/main.cpp create mode 100644 cmd/examples/vadd/main.go create mode 100644 cmd/examples/vadd/main.ll create mode 100644 cmd/examples/vadd/main.spv create mode 100644 cmd/func2kernel/main.go create mode 100644 ze/command.go create mode 100644 ze/context.go create mode 100644 ze/device.go create mode 100644 ze/kernel.go create mode 100644 ze/mem.go create mode 100644 ze/module.go diff --git a/.gitignore b/.gitignore index 4defa92..8e48f2e 100644 --- a/.gitignore +++ b/.gitignore @@ -28,8 +28,8 @@ go.work.sum .env # Editor/IDE -# .idea/ -# .vscode/ +.idea/ +.vscode/ # Local spec /spec diff --git a/cmd/examples/gemm/main.go b/cmd/examples/gemm/main.go deleted file mode 100644 index 1fd19b4..0000000 --- a/cmd/examples/gemm/main.go +++ /dev/null @@ -1,15 +0,0 @@ -package main - -import ( - "fmt" - - "github.com/fumiama/gozel/ze" -) - -func main() { - hs, err := ze.InitGPUDrivers() - if err != nil { - panic(err) - } - fmt.Println(hs) -} diff --git a/cmd/examples/vadd/.gitignore b/cmd/examples/vadd/.gitignore new file mode 100644 index 0000000..2f777a5 --- /dev/null +++ b/cmd/examples/vadd/.gitignore @@ -0,0 +1 @@ +/device* diff --git a/cmd/examples/vadd/main.cpp b/cmd/examples/vadd/main.cpp new file mode 100644 index 0000000..c755b18 --- /dev/null +++ b/cmd/examples/vadd/main.cpp @@ -0,0 +1,9 @@ +#include + +extern "C" SYCL_EXTERNAL +void vector_add(double* a, double* b) { + auto item = sycl::ext::oneapi::this_work_item::get_nd_item<1>(); + int idx = item.get_global_id(0); + + a[idx] += b[idx]; +} diff --git a/cmd/examples/vadd/main.go b/cmd/examples/vadd/main.go new file mode 100644 index 0000000..eb4eb4c --- /dev/null +++ b/cmd/examples/vadd/main.go @@ -0,0 +1,186 @@ +package main + +import ( + _ "embed" + "fmt" + "math/rand" + "os" + "unsafe" + + "github.com/fumiama/gozel" + "github.com/fumiama/gozel/ze" +) + +//go:generate clang++ -fsycl -fsycl-device-only -fno-sycl-use-footer -faddrsig -Xclang -emit-llvm-bc main.cpp -o device_func.bc +//go:generate sycl-post-link -symbols -split=auto -o device_func.table device_func.bc +//go:generate llvm-spirv -o device_func.spv device_func_0.bc +//go:generate clang++ -target spir64-unknown-unknown -S -emit-llvm -x ir device_func_0.bc -o device_func.ll +//go:generate go run ../../func2kernel device_func.ll device_kern.ll +//go:generate clang++ -target spir64-unknown-unknown -c -emit-llvm -x ir device_kern.ll -o device_kern.bc +//go:generate llvm-spirv -o main.spv device_kern.bc +//go:generate clang++ -target spir64-unknown-unknown -S -emit-llvm -x ir device_kern.bc -o main.ll + +//go:embed main.spv +var kernelspv []byte + +const ( + X, Y, Z = 1024, 1, 1 + N = X * Y * Z + bufsz = N * unsafe.Sizeof(float64(0)) +) + +func main() { + floatbuf := make([]float64, 2*N) + for i := range floatbuf { + floatbuf[i] = rand.Float64() + } + + gpus, err := ze.InitGPUDrivers() + if err != nil { + panic(err) + } + if len(gpus) == 0 { + panic("no gpu available") + } + gpu := gpus[0] + + ctx, err := gpu.ContextCreate() + if err != nil { + panic(err) + } + + devs, err := gpu.DeviceGet() + if err != nil { + panic(err) + } + if len(devs) == 0 { + panic("no device available") + } + dev := devs[0] + + q, err := ctx.CommandQueueCreate(dev) + if err != nil { + panic(err) + } + defer q.Destroy() + + hbuf_v1, err := ctx.MemAllocHost(bufsz, 1) + if err != nil { + panic(err) + } + defer ctx.MemFree(hbuf_v1) + + hbuf_v2, err := ctx.MemAllocHost(bufsz, 1) + if err != nil { + panic(err) + } + defer ctx.MemFree(hbuf_v2) + + dbuf_v1, err := ctx.MemAllocDevice(dev, bufsz, 1) + if err != nil { + panic(err) + } + defer ctx.MemFree(dbuf_v1) + + dbuf_v2, err := ctx.MemAllocDevice(dev, bufsz, 1) + if err != nil { + panic(err) + } + defer ctx.MemFree(dbuf_v2) + + zev1, zev2 := unsafe.Slice((*float64)(hbuf_v1), N), unsafe.Slice((*float64)(hbuf_v2), N) + copy(zev1, floatbuf[:N]) + copy(zev2, floatbuf[N:]) + + mod, err := ctx.ModuleCreate(dev, kernelspv) + if err != nil { + panic(err) + } + defer mod.Destroy() + + krn, err := mod.KernelCreate("vector_add") + if err != nil { + panic(err) + } + defer krn.Destroy() + + err = krn.SetArgumentValue(0, unsafe.Sizeof(uintptr(0)), unsafe.Pointer(&dbuf_v1)) + if err != nil { + panic(err) + } + err = krn.SetArgumentValue(1, unsafe.Sizeof(uintptr(0)), unsafe.Pointer(&dbuf_v2)) + if err != nil { + panic(err) + } + err = krn.SetGroupSize(X, Y, Z) + if err != nil { + panic(err) + } + + lst, err := ctx.CommandListCreate(dev) + if err != nil { + panic(err) + } + defer lst.Destroy() + + err = lst.AppendMemoryCopy(dbuf_v1, hbuf_v1, bufsz) + if err != nil { + panic(err) + } + err = lst.AppendMemoryCopy(dbuf_v2, hbuf_v2, bufsz) + if err != nil { + panic(err) + } + + err = lst.AppendBarrier() + if err != nil { + panic(err) + } + + err = lst.AppendLaunchKernel(krn, &gozel.ZeGroupCount{ + Groupcountx: 1, Groupcounty: 1, Groupcountz: 1, + }) + if err != nil { + panic(err) + } + + err = lst.AppendBarrier() + if err != nil { + panic(err) + } + + err = lst.AppendMemoryCopy(hbuf_v1, dbuf_v1, bufsz) + if err != nil { + panic(err) + } + + err = lst.Close() + if err != nil { + panic(err) + } + + err = q.ExecuteCommandLists(lst) + if err != nil { + panic(err) + } + + err = q.Synchronize() + if err != nil { + panic(err) + } + + fail := false + for i := range N { + expect := floatbuf[i] + floatbuf[N+i] + if zev1[i] != expect { + fail = true + fmt.Printf("[%05d] expect %f = %f + %f, got %f.\n", i, expect, floatbuf[i], floatbuf[N+i], zev1[i]) + } else { + fmt.Printf("[%05d] valid %f = %f + %f, got %f.\n", i, expect, floatbuf[i], floatbuf[N+i], zev1[i]) + } + } + + if fail { + os.Exit(1) + } +} diff --git a/cmd/examples/vadd/main.ll b/cmd/examples/vadd/main.ll new file mode 100644 index 0000000..68cf7ad --- /dev/null +++ b/cmd/examples/vadd/main.ll @@ -0,0 +1,52 @@ +; ModuleID = 'device_kern.bc' +source_filename = "main.cpp" +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64-G1" +target triple = "spir64-unknown-unknown" + +@__spirv_BuiltInGlobalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 + +; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: write) +define dso_local spir_kernel void @vector_add(ptr addrspace(1) noundef captures(none) %0, ptr addrspace(1) noundef readonly captures(none) %1) local_unnamed_addr #0 !sycl_used_aspects !8 !sycl_fixed_targets !10 { + %3 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalInvocationId, align 32, !noalias !11 + %4 = icmp ult i64 %3, 2147483648 + tail call void @llvm.assume(i1 %4) + %5 = getelementptr inbounds nuw double, ptr addrspace(1) %1, i64 %3 + %6 = load double, ptr addrspace(1) %5, align 8 + %7 = getelementptr inbounds nuw double, ptr addrspace(1) %0, i64 %3 + %8 = load double, ptr addrspace(1) %7, align 8 + %9 = fadd double %8, %6 + store double %9, ptr addrspace(1) %7, align 8 + ret void +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) +declare void @llvm.assume(i1 noundef) #1 + +attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: write) "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-entry-point" "sycl-module-id"="main.cpp" "sycl-optlevel"="2" } +attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) } + +!llvm.dependent-libraries = !{!0} +!llvm.module.flags = !{!1, !2, !3} +!opencl.spir.version = !{!4} +!spirv.Source = !{!5} +!llvm.ident = !{!6} +!sycl-esimd-split-status = !{!7} + +!0 = !{!"libcpmt"} +!1 = !{i32 1, !"wchar_size", i32 2} +!2 = !{i32 1, !"sycl-device", i32 1} +!3 = !{i32 7, !"frame-pointer", i32 2} +!4 = !{i32 1, i32 2} +!5 = !{i32 4, i32 100000} +!6 = !{!"clang version 21.0.0git (https://github.com/intel/llvm d5f649b706f63b5c74e1929bc95db8de91085560)"} +!7 = !{i8 0} +!8 = !{!9} +!9 = !{!"fp64", i32 6} +!10 = !{} +!11 = !{!12, !14, !16} +!12 = distinct !{!12, !13, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEE8initSizeEv: argument 0"} +!13 = distinct !{!13, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEE8initSizeEv"} +!14 = distinct !{!14, !15, !"_ZN7__spirv22initGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEEET0_v: argument 0"} +!15 = distinct !{!15, !"_ZN7__spirv22initGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEEET0_v"} +!16 = distinct !{!16, !17, !"_ZNK4sycl3_V17nd_itemILi1EE13get_global_idEv: argument 0"} +!17 = distinct !{!17, !"_ZNK4sycl3_V17nd_itemILi1EE13get_global_idEv"} diff --git a/cmd/examples/vadd/main.spv b/cmd/examples/vadd/main.spv new file mode 100644 index 0000000000000000000000000000000000000000..400381eb3e532db5dffefd61441a55c434a7e17c GIT binary patch literal 924 zcma))$w~u36h&WVHD)rCvk^fM#iawH1O-Dt@DrMtWDyA6A!)O6p&P%$Pw?~H2;S3n zLn8#NaPy{H_f@^>RPrEa>4X(6YrFQlDwedgTx=$^+0d?pHZPl3H6i;r8M=25r!#+G z(n)ET1t?igs%9I)eAxHBsnZ*XgJxMg@_Rx`Kl**VCsae#%yDLu(RA+IK99zJ*X@kG zr{1{h&b@xmA9-$9GU8>7^Tf3~RtWYV{ZoVUchsMrjlfR_KKI*>#V7JxL7WSEq95<8 zP&#Ry-t167k9c)K%v`8%{;6**h~d}8XSQvDb*wJLl_0WLOjDecEO~58F|*}7#8p+$ zYADLdwgos#vT(@l%UhO(M;^c_%ECdrrvEkBtC&8uGcnl5yRgtXS1&Y}P2@9|FKHBH zUqbDYd^E15K##IO4!