1
0
mirror of https://github.com/fumiama/gozel.git synced 2026-06-05 00:10:24 +08:00

feat(example): impl. vadd

This commit is contained in:
源文雨
2026-03-25 00:25:24 +08:00
parent 1111b0ecc1
commit 25cb3b9741
16 changed files with 531 additions and 24 deletions

View File

@@ -1,15 +0,0 @@
package main
import (
"fmt"
"github.com/fumiama/gozel/ze"
)
func main() {
hs, err := ze.InitGPUDrivers()
if err != nil {
panic(err)
}
fmt.Println(hs)
}

1
cmd/examples/vadd/.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
/device*

View File

@@ -0,0 +1,9 @@
#include <sycl/sycl.hpp>
extern "C" SYCL_EXTERNAL
void vector_add(double* a, double* b) {
auto item = sycl::ext::oneapi::this_work_item::get_nd_item<1>();
int idx = item.get_global_id(0);
a[idx] += b[idx];
}

186
cmd/examples/vadd/main.go Normal file
View File

@@ -0,0 +1,186 @@
package main
import (
_ "embed"
"fmt"
"math/rand"
"os"
"unsafe"
"github.com/fumiama/gozel"
"github.com/fumiama/gozel/ze"
)
//go:generate clang++ -fsycl -fsycl-device-only -fno-sycl-use-footer -faddrsig -Xclang -emit-llvm-bc main.cpp -o device_func.bc
//go:generate sycl-post-link -symbols -split=auto -o device_func.table device_func.bc
//go:generate llvm-spirv -o device_func.spv device_func_0.bc
//go:generate clang++ -target spir64-unknown-unknown -S -emit-llvm -x ir device_func_0.bc -o device_func.ll
//go:generate go run ../../func2kernel device_func.ll device_kern.ll
//go:generate clang++ -target spir64-unknown-unknown -c -emit-llvm -x ir device_kern.ll -o device_kern.bc
//go:generate llvm-spirv -o main.spv device_kern.bc
//go:generate clang++ -target spir64-unknown-unknown -S -emit-llvm -x ir device_kern.bc -o main.ll
//go:embed main.spv
var kernelspv []byte
const (
X, Y, Z = 1024, 1, 1
N = X * Y * Z
bufsz = N * unsafe.Sizeof(float64(0))
)
func main() {
floatbuf := make([]float64, 2*N)
for i := range floatbuf {
floatbuf[i] = rand.Float64()
}
gpus, err := ze.InitGPUDrivers()
if err != nil {
panic(err)
}
if len(gpus) == 0 {
panic("no gpu available")
}
gpu := gpus[0]
ctx, err := gpu.ContextCreate()
if err != nil {
panic(err)
}
devs, err := gpu.DeviceGet()
if err != nil {
panic(err)
}
if len(devs) == 0 {
panic("no device available")
}
dev := devs[0]
q, err := ctx.CommandQueueCreate(dev)
if err != nil {
panic(err)
}
defer q.Destroy()
hbuf_v1, err := ctx.MemAllocHost(bufsz, 1)
if err != nil {
panic(err)
}
defer ctx.MemFree(hbuf_v1)
hbuf_v2, err := ctx.MemAllocHost(bufsz, 1)
if err != nil {
panic(err)
}
defer ctx.MemFree(hbuf_v2)
dbuf_v1, err := ctx.MemAllocDevice(dev, bufsz, 1)
if err != nil {
panic(err)
}
defer ctx.MemFree(dbuf_v1)
dbuf_v2, err := ctx.MemAllocDevice(dev, bufsz, 1)
if err != nil {
panic(err)
}
defer ctx.MemFree(dbuf_v2)
zev1, zev2 := unsafe.Slice((*float64)(hbuf_v1), N), unsafe.Slice((*float64)(hbuf_v2), N)
copy(zev1, floatbuf[:N])
copy(zev2, floatbuf[N:])
mod, err := ctx.ModuleCreate(dev, kernelspv)
if err != nil {
panic(err)
}
defer mod.Destroy()
krn, err := mod.KernelCreate("vector_add")
if err != nil {
panic(err)
}
defer krn.Destroy()
err = krn.SetArgumentValue(0, unsafe.Sizeof(uintptr(0)), unsafe.Pointer(&dbuf_v1))
if err != nil {
panic(err)
}
err = krn.SetArgumentValue(1, unsafe.Sizeof(uintptr(0)), unsafe.Pointer(&dbuf_v2))
if err != nil {
panic(err)
}
err = krn.SetGroupSize(X, Y, Z)
if err != nil {
panic(err)
}
lst, err := ctx.CommandListCreate(dev)
if err != nil {
panic(err)
}
defer lst.Destroy()
err = lst.AppendMemoryCopy(dbuf_v1, hbuf_v1, bufsz)
if err != nil {
panic(err)
}
err = lst.AppendMemoryCopy(dbuf_v2, hbuf_v2, bufsz)
if err != nil {
panic(err)
}
err = lst.AppendBarrier()
if err != nil {
panic(err)
}
err = lst.AppendLaunchKernel(krn, &gozel.ZeGroupCount{
Groupcountx: 1, Groupcounty: 1, Groupcountz: 1,
})
if err != nil {
panic(err)
}
err = lst.AppendBarrier()
if err != nil {
panic(err)
}
err = lst.AppendMemoryCopy(hbuf_v1, dbuf_v1, bufsz)
if err != nil {
panic(err)
}
err = lst.Close()
if err != nil {
panic(err)
}
err = q.ExecuteCommandLists(lst)
if err != nil {
panic(err)
}
err = q.Synchronize()
if err != nil {
panic(err)
}
fail := false
for i := range N {
expect := floatbuf[i] + floatbuf[N+i]
if zev1[i] != expect {
fail = true
fmt.Printf("[%05d] expect %f = %f + %f, got %f.\n", i, expect, floatbuf[i], floatbuf[N+i], zev1[i])
} else {
fmt.Printf("[%05d] valid %f = %f + %f, got %f.\n", i, expect, floatbuf[i], floatbuf[N+i], zev1[i])
}
}
if fail {
os.Exit(1)
}
}

52
cmd/examples/vadd/main.ll Normal file
View File

@@ -0,0 +1,52 @@
; ModuleID = 'device_kern.bc'
source_filename = "main.cpp"
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64-G1"
target triple = "spir64-unknown-unknown"
@__spirv_BuiltInGlobalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: write)
define dso_local spir_kernel void @vector_add(ptr addrspace(1) noundef captures(none) %0, ptr addrspace(1) noundef readonly captures(none) %1) local_unnamed_addr #0 !sycl_used_aspects !8 !sycl_fixed_targets !10 {
%3 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalInvocationId, align 32, !noalias !11
%4 = icmp ult i64 %3, 2147483648
tail call void @llvm.assume(i1 %4)
%5 = getelementptr inbounds nuw double, ptr addrspace(1) %1, i64 %3
%6 = load double, ptr addrspace(1) %5, align 8
%7 = getelementptr inbounds nuw double, ptr addrspace(1) %0, i64 %3
%8 = load double, ptr addrspace(1) %7, align 8
%9 = fadd double %8, %6
store double %9, ptr addrspace(1) %7, align 8
ret void
}
; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write)
declare void @llvm.assume(i1 noundef) #1
attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: write) "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-entry-point" "sycl-module-id"="main.cpp" "sycl-optlevel"="2" }
attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
!llvm.dependent-libraries = !{!0}
!llvm.module.flags = !{!1, !2, !3}
!opencl.spir.version = !{!4}
!spirv.Source = !{!5}
!llvm.ident = !{!6}
!sycl-esimd-split-status = !{!7}
!0 = !{!"libcpmt"}
!1 = !{i32 1, !"wchar_size", i32 2}
!2 = !{i32 1, !"sycl-device", i32 1}
!3 = !{i32 7, !"frame-pointer", i32 2}
!4 = !{i32 1, i32 2}
!5 = !{i32 4, i32 100000}
!6 = !{!"clang version 21.0.0git (https://github.com/intel/llvm d5f649b706f63b5c74e1929bc95db8de91085560)"}
!7 = !{i8 0}
!8 = !{!9}
!9 = !{!"fp64", i32 6}
!10 = !{}
!11 = !{!12, !14, !16}
!12 = distinct !{!12, !13, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEE8initSizeEv: argument 0"}
!13 = distinct !{!13, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEE8initSizeEv"}
!14 = distinct !{!14, !15, !"_ZN7__spirv22initGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEEET0_v: argument 0"}
!15 = distinct !{!15, !"_ZN7__spirv22initGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEEET0_v"}
!16 = distinct !{!16, !17, !"_ZNK4sycl3_V17nd_itemILi1EE13get_global_idEv: argument 0"}
!17 = distinct !{!17, !"_ZNK4sycl3_V17nd_itemILi1EE13get_global_idEv"}

BIN
cmd/examples/vadd/main.spv Normal file

Binary file not shown.

28
cmd/func2kernel/main.go Normal file
View File

@@ -0,0 +1,28 @@
package main
import (
"bufio"
"os"
"strings"
)
func main() {
f, err := os.Open(os.Args[1])
if err != nil {
panic(err)
}
defer f.Close()
fo, err := os.Create(os.Args[2])
if err != nil {
panic(err)
}
defer fo.Close()
scan := bufio.NewScanner(f)
for scan.Scan() {
t := scan.Text()
t = strings.ReplaceAll(t, " spir_func ", " spir_kernel ")
t = strings.ReplaceAll(t, "ptr addrspace(4)", "ptr addrspace(1)")
fo.WriteString(t)
fo.WriteString("\n")
}
}