diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index cbfbe66..10cf30b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -39,9 +39,6 @@ jobs: - name: Build cmd/gen run: go build ./cmd/gen - - name: Build cmd/func2kernel - run: go build ./cmd/func2kernel - - name: Run tests run: go test -v -count=1 ./... diff --git a/README.md b/README.md index 1ec34a4..732c68e 100644 --- a/README.md +++ b/README.md @@ -215,7 +215,6 @@ gozel/ │ └── command.go # Command queues, lists, barriers ├── internal/zecall/ # purego FFI layer (loads ze_loader at runtime) ├── cmd/gen/ # Code generator: parses L0 headers → Go source -├── cmd/func2kernel/ # LLVM IR transformer for SPIR-V kernel preparation ├── spec/ # Optional L0 SDK headers for dev purpose (input to cmd/gen) └── examples/ ├── quick_start/ # The quick start shown in this README @@ -267,18 +266,14 @@ This invokes `cmd/gen` with the local `spec/` directory as configured in `doc.go GPU kernels in the [examples](examples) folder are written in SYCL C++ and compiled to SPIR-V for embedding into Go programs, which is a little bit hacky. You can also use `ocloc`, which is a common practice and you can search for the build doc elsewhere. The build pipeline uses `go generate` directives: ``` -main.cpp ──clang++ -fsycl──▶ device_func.bc +main.cpp ──clang++ -fsycl──▶ device_kern.bc │ sycl-post-link │ - ▼ device_func_0.bc + ▼ device_kern_0.bc │ clang++ -emit-llvm -S │ - ▼ device_func.ll - │ - cmd/func2kernel ← transforms spir_func → spir_kernel - │ ▼ device_kern.ll │ llvm-spirv @@ -297,5 +292,3 @@ cd examples/vadd go generate # compiles main.cpp → main.spv go run main.go # runs vector addition on GPU ``` - -The `cmd/func2kernel` tool handles the LLVM IR transformation (`spir_func` → `spir_kernel`, address space 4 → 1) required by the SPIR-V backend. diff --git a/cmd/func2kernel/main.go b/cmd/func2kernel/main.go deleted file mode 100644 index 778b5de..0000000 --- a/cmd/func2kernel/main.go +++ /dev/null @@ -1,28 +0,0 @@ -package main - -import ( - "bufio" - "os" - "strings" -) - -func main() { - f, err := os.Open(os.Args[1]) - if err != nil { - panic(err) - } - defer f.Close() - fo, err := os.Create(os.Args[2]) - if err != nil { - panic(err) - } - defer fo.Close() - scan := bufio.NewScanner(f) - for scan.Scan() { - t := scan.Text() - t = strings.ReplaceAll(t, " spir_func ", " spir_kernel ") - t = strings.ReplaceAll(t, "ptr addrspace(4)", "ptr addrspace(1)") - fo.WriteString(t) - fo.WriteString("\n") - } -} diff --git a/examples/vadd/main.cpp b/examples/vadd/main.cpp index b81eae0..702557f 100644 --- a/examples/vadd/main.cpp +++ b/examples/vadd/main.cpp @@ -1,6 +1,6 @@ #include -extern "C" SYCL_EXTERNAL +extern "C" SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((sycl::ext::oneapi::experimental::nd_range_kernel<1>)) void vector_add(float* a, float* b) { auto item = sycl::ext::oneapi::this_work_item::get_nd_item<1>(); int idx = item.get_global_id(0); diff --git a/examples/vadd/main.go b/examples/vadd/main.go index 774b436..63887dc 100644 --- a/examples/vadd/main.go +++ b/examples/vadd/main.go @@ -15,11 +15,9 @@ import ( "github.com/fumiama/gozel/ze" ) -//go:generate clang++ -fsycl -fsycl-device-only -fsycl-targets=spirv64 -faddrsig -Xclang -emit-llvm-bc main.cpp -o device_func.bc -//go:generate sycl-post-link -symbols -split=auto -o device_func.table device_func.bc -//go:generate clang++ -target spirv64-unknown-unknown -S -emit-llvm -x ir device_func_0.bc -o device_func.ll -//go:generate go run ../../cmd/func2kernel device_func.ll device_kern.ll -//go:generate clang++ -target spirv64-unknown-unknown -c -emit-llvm -x ir device_kern.ll -o device_kern.bc +//go:generate clang++ -fsycl -fsycl-device-only -fsycl-targets=spirv64 -Xclang -emit-llvm-bc main.cpp -o device_kern.bc +//go:generate sycl-post-link -symbols -split=auto -o device_kern.table device_kern.bc +//go:generate clang++ -target spirv64-unknown-unknown -S -emit-llvm -x ir device_kern_0.bc -o device_kern.ll //go:generate llvm-spirv -o main.spv device_kern.bc //go:generate clang++ -target spirv64-unknown-unknown -S -emit-llvm -x ir device_kern.bc -o main.ll @@ -131,7 +129,7 @@ func main() { } defer mod.Destroy() - krn, err := mod.KernelCreate("vector_add") + krn, err := mod.KernelCreate("__sycl_kernel_vector_add") if err != nil { panic(err) } diff --git a/examples/vadd/main.ll b/examples/vadd/main.ll index 9ffc1b6..e742496 100644 --- a/examples/vadd/main.ll +++ b/examples/vadd/main.ll @@ -6,30 +6,31 @@ target triple = "spirv64-unknown-unknown" @__spirv_BuiltInGlobalInvocationId = external local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: write) -define spir_kernel void @vector_add(ptr addrspace(1) noundef captures(none) %0, ptr addrspace(1) noundef readonly captures(none) %1) local_unnamed_addr #0 !sycl_fixed_targets !7 { - %3 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalInvocationId, align 32, !noalias !8 +define spir_kernel void @__sycl_kernel_vector_add(ptr addrspace(1) noundef align 4 captures(none) %0, ptr addrspace(1) noundef readonly align 4 captures(none) %1) local_unnamed_addr #0 !kernel_arg_buffer_location !6 !sycl_fixed_targets !7 !sycl_kernel_omit_args !8 { + %3 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalInvocationId, align 32, !noalias !9 %4 = icmp ult i64 %3, 2147483648 tail call void @llvm.assume(i1 %4) - %5 = getelementptr inbounds nuw float, ptr addrspace(1) %1, i64 %3 - %6 = load float, ptr addrspace(1) %5, align 4, !tbaa !15 - %7 = getelementptr inbounds nuw float, ptr addrspace(1) %0, i64 %3 - %8 = load float, ptr addrspace(1) %7, align 4, !tbaa !15 + %5 = getelementptr inbounds float, ptr addrspace(1) %1, i64 %3 + %6 = load float, ptr addrspace(1) %5, align 4, !tbaa !16 + %7 = getelementptr inbounds float, ptr addrspace(1) %0, i64 %3 + %8 = load float, ptr addrspace(1) %7, align 4, !tbaa !16 %9 = fadd float %8, %6 - store float %9, ptr addrspace(1) %7, align 4, !tbaa !15 + store float %9, ptr addrspace(1) %7, align 4, !tbaa !16 ret void } ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) declare void @llvm.assume(i1 noundef) #1 -attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: write) "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-entry-point" "sycl-module-id"="main.cpp" "sycl-optlevel"="2" } +declare dso_local spir_func i32 @_Z18__spirv_ocl_printfPU3AS2Kcz(ptr addrspace(2), ...) + +attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: write) "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="main.cpp" "sycl-nd-range-kernel"="1" "sycl-optlevel"="2" "uniform-work-group-size"="true" } attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) } !llvm.module.flags = !{!0, !1, !2} !opencl.spir.version = !{!3} !spirv.Source = !{!4} !llvm.ident = !{!5} -!sycl-esimd-split-status = !{!6} !0 = !{i32 1, !"wchar_size", i32 4} !1 = !{i32 1, !"sycl-device", i32 1} @@ -37,16 +38,17 @@ attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessib !3 = !{i32 1, i32 2} !4 = !{i32 4, i32 100000} !5 = !{!"clang version 21.0.0git (https://github.com/intel/llvm d5f649b706f63b5c74e1929bc95db8de91085560)"} -!6 = !{i8 0} +!6 = !{i32 -1, i32 -1} !7 = !{} -!8 = !{!9, !11, !13} -!9 = distinct !{!9, !10, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEE8initSizeEv: argument 0"} -!10 = distinct !{!10, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEE8initSizeEv"} -!11 = distinct !{!11, !12, !"_ZN7__spirv22initGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEEET0_v: argument 0"} -!12 = distinct !{!12, !"_ZN7__spirv22initGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEEET0_v"} -!13 = distinct !{!13, !14, !"_ZNK4sycl3_V17nd_itemILi1EE13get_global_idEv: argument 0"} -!14 = distinct !{!14, !"_ZNK4sycl3_V17nd_itemILi1EE13get_global_idEv"} -!15 = !{!16, !16, i64 0} -!16 = !{!"float", !17, i64 0} -!17 = !{!"omnipotent char", !18, i64 0} -!18 = !{!"Simple C++ TBAA"} +!8 = !{i1 false, i1 false} +!9 = !{!10, !12, !14} +!10 = distinct !{!10, !11, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEE8initSizeEv: argument 0"} +!11 = distinct !{!11, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEE8initSizeEv"} +!12 = distinct !{!12, !13, !"_ZN7__spirv22initGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEEET0_v: argument 0"} +!13 = distinct !{!13, !"_ZN7__spirv22initGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEEET0_v"} +!14 = distinct !{!14, !15, !"_ZNK4sycl3_V17nd_itemILi1EE13get_global_idEv: argument 0"} +!15 = distinct !{!15, !"_ZNK4sycl3_V17nd_itemILi1EE13get_global_idEv"} +!16 = !{!17, !17, i64 0} +!17 = !{!"float", !18, i64 0} +!18 = !{!"omnipotent char", !19, i64 0} +!19 = !{!"Simple C++ TBAA"} diff --git a/examples/vadd/main.spv b/examples/vadd/main.spv index 51c5691..0adf1e1 100644 Binary files a/examples/vadd/main.spv and b/examples/vadd/main.spv differ