mirror of
https://github.com/fumiama/gozel.git
synced 2026-06-05 08:20:24 +08:00
feat(examples): simplify vadd spv gen (#4)
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
This commit is contained in:
3
.github/workflows/ci.yml
vendored
3
.github/workflows/ci.yml
vendored
@@ -39,9 +39,6 @@ jobs:
|
|||||||
- name: Build cmd/gen
|
- name: Build cmd/gen
|
||||||
run: go build ./cmd/gen
|
run: go build ./cmd/gen
|
||||||
|
|
||||||
- name: Build cmd/func2kernel
|
|
||||||
run: go build ./cmd/func2kernel
|
|
||||||
|
|
||||||
- name: Run tests
|
- name: Run tests
|
||||||
run: go test -v -count=1 ./...
|
run: go test -v -count=1 ./...
|
||||||
|
|
||||||
|
|||||||
11
README.md
11
README.md
@@ -215,7 +215,6 @@ gozel/
|
|||||||
│ └── command.go # Command queues, lists, barriers
|
│ └── command.go # Command queues, lists, barriers
|
||||||
├── internal/zecall/ # purego FFI layer (loads ze_loader at runtime)
|
├── internal/zecall/ # purego FFI layer (loads ze_loader at runtime)
|
||||||
├── cmd/gen/ # Code generator: parses L0 headers → Go source
|
├── cmd/gen/ # Code generator: parses L0 headers → Go source
|
||||||
├── cmd/func2kernel/ # LLVM IR transformer for SPIR-V kernel preparation
|
|
||||||
├── spec/ # Optional L0 SDK headers for dev purpose (input to cmd/gen)
|
├── spec/ # Optional L0 SDK headers for dev purpose (input to cmd/gen)
|
||||||
└── examples/
|
└── examples/
|
||||||
├── quick_start/ # The quick start shown in this README
|
├── quick_start/ # The quick start shown in this README
|
||||||
@@ -267,18 +266,14 @@ This invokes `cmd/gen` with the local `spec/` directory as configured in `doc.go
|
|||||||
GPU kernels in the [examples](examples) folder are written in SYCL C++ and compiled to SPIR-V for embedding into Go programs, which is a little bit hacky. You can also use `ocloc`, which is a common practice and you can search for the build doc elsewhere. The build pipeline uses `go generate` directives:
|
GPU kernels in the [examples](examples) folder are written in SYCL C++ and compiled to SPIR-V for embedding into Go programs, which is a little bit hacky. You can also use `ocloc`, which is a common practice and you can search for the build doc elsewhere. The build pipeline uses `go generate` directives:
|
||||||
|
|
||||||
```
|
```
|
||||||
main.cpp ──clang++ -fsycl──▶ device_func.bc
|
main.cpp ──clang++ -fsycl──▶ device_kern.bc
|
||||||
│
|
│
|
||||||
sycl-post-link
|
sycl-post-link
|
||||||
│
|
│
|
||||||
▼ device_func_0.bc
|
▼ device_kern_0.bc
|
||||||
│
|
│
|
||||||
clang++ -emit-llvm -S
|
clang++ -emit-llvm -S
|
||||||
│
|
│
|
||||||
▼ device_func.ll
|
|
||||||
│
|
|
||||||
cmd/func2kernel ← transforms spir_func → spir_kernel
|
|
||||||
│
|
|
||||||
▼ device_kern.ll
|
▼ device_kern.ll
|
||||||
│
|
│
|
||||||
llvm-spirv
|
llvm-spirv
|
||||||
@@ -297,5 +292,3 @@ cd examples/vadd
|
|||||||
go generate # compiles main.cpp → main.spv
|
go generate # compiles main.cpp → main.spv
|
||||||
go run main.go # runs vector addition on GPU
|
go run main.go # runs vector addition on GPU
|
||||||
```
|
```
|
||||||
|
|
||||||
The `cmd/func2kernel` tool handles the LLVM IR transformation (`spir_func` → `spir_kernel`, address space 4 → 1) required by the SPIR-V backend.
|
|
||||||
|
|||||||
@@ -1,28 +0,0 @@
|
|||||||
package main
|
|
||||||
|
|
||||||
import (
|
|
||||||
"bufio"
|
|
||||||
"os"
|
|
||||||
"strings"
|
|
||||||
)
|
|
||||||
|
|
||||||
func main() {
|
|
||||||
f, err := os.Open(os.Args[1])
|
|
||||||
if err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
defer f.Close()
|
|
||||||
fo, err := os.Create(os.Args[2])
|
|
||||||
if err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
defer fo.Close()
|
|
||||||
scan := bufio.NewScanner(f)
|
|
||||||
for scan.Scan() {
|
|
||||||
t := scan.Text()
|
|
||||||
t = strings.ReplaceAll(t, " spir_func ", " spir_kernel ")
|
|
||||||
t = strings.ReplaceAll(t, "ptr addrspace(4)", "ptr addrspace(1)")
|
|
||||||
fo.WriteString(t)
|
|
||||||
fo.WriteString("\n")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,6 +1,6 @@
|
|||||||
#include <sycl/sycl.hpp>
|
#include <sycl/sycl.hpp>
|
||||||
|
|
||||||
extern "C" SYCL_EXTERNAL
|
extern "C" SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((sycl::ext::oneapi::experimental::nd_range_kernel<1>))
|
||||||
void vector_add(float* a, float* b) {
|
void vector_add(float* a, float* b) {
|
||||||
auto item = sycl::ext::oneapi::this_work_item::get_nd_item<1>();
|
auto item = sycl::ext::oneapi::this_work_item::get_nd_item<1>();
|
||||||
int idx = item.get_global_id(0);
|
int idx = item.get_global_id(0);
|
||||||
|
|||||||
@@ -15,11 +15,9 @@ import (
|
|||||||
"github.com/fumiama/gozel/ze"
|
"github.com/fumiama/gozel/ze"
|
||||||
)
|
)
|
||||||
|
|
||||||
//go:generate clang++ -fsycl -fsycl-device-only -fsycl-targets=spirv64 -faddrsig -Xclang -emit-llvm-bc main.cpp -o device_func.bc
|
//go:generate clang++ -fsycl -fsycl-device-only -fsycl-targets=spirv64 -Xclang -emit-llvm-bc main.cpp -o device_kern.bc
|
||||||
//go:generate sycl-post-link -symbols -split=auto -o device_func.table device_func.bc
|
//go:generate sycl-post-link -symbols -split=auto -o device_kern.table device_kern.bc
|
||||||
//go:generate clang++ -target spirv64-unknown-unknown -S -emit-llvm -x ir device_func_0.bc -o device_func.ll
|
//go:generate clang++ -target spirv64-unknown-unknown -S -emit-llvm -x ir device_kern_0.bc -o device_kern.ll
|
||||||
//go:generate go run ../../cmd/func2kernel device_func.ll device_kern.ll
|
|
||||||
//go:generate clang++ -target spirv64-unknown-unknown -c -emit-llvm -x ir device_kern.ll -o device_kern.bc
|
|
||||||
//go:generate llvm-spirv -o main.spv device_kern.bc
|
//go:generate llvm-spirv -o main.spv device_kern.bc
|
||||||
//go:generate clang++ -target spirv64-unknown-unknown -S -emit-llvm -x ir device_kern.bc -o main.ll
|
//go:generate clang++ -target spirv64-unknown-unknown -S -emit-llvm -x ir device_kern.bc -o main.ll
|
||||||
|
|
||||||
@@ -131,7 +129,7 @@ func main() {
|
|||||||
}
|
}
|
||||||
defer mod.Destroy()
|
defer mod.Destroy()
|
||||||
|
|
||||||
krn, err := mod.KernelCreate("vector_add")
|
krn, err := mod.KernelCreate("__sycl_kernel_vector_add")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -6,30 +6,31 @@ target triple = "spirv64-unknown-unknown"
|
|||||||
@__spirv_BuiltInGlobalInvocationId = external local_unnamed_addr addrspace(1) constant <3 x i64>, align 32
|
@__spirv_BuiltInGlobalInvocationId = external local_unnamed_addr addrspace(1) constant <3 x i64>, align 32
|
||||||
|
|
||||||
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: write)
|
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: write)
|
||||||
define spir_kernel void @vector_add(ptr addrspace(1) noundef captures(none) %0, ptr addrspace(1) noundef readonly captures(none) %1) local_unnamed_addr #0 !sycl_fixed_targets !7 {
|
define spir_kernel void @__sycl_kernel_vector_add(ptr addrspace(1) noundef align 4 captures(none) %0, ptr addrspace(1) noundef readonly align 4 captures(none) %1) local_unnamed_addr #0 !kernel_arg_buffer_location !6 !sycl_fixed_targets !7 !sycl_kernel_omit_args !8 {
|
||||||
%3 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalInvocationId, align 32, !noalias !8
|
%3 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalInvocationId, align 32, !noalias !9
|
||||||
%4 = icmp ult i64 %3, 2147483648
|
%4 = icmp ult i64 %3, 2147483648
|
||||||
tail call void @llvm.assume(i1 %4)
|
tail call void @llvm.assume(i1 %4)
|
||||||
%5 = getelementptr inbounds nuw float, ptr addrspace(1) %1, i64 %3
|
%5 = getelementptr inbounds float, ptr addrspace(1) %1, i64 %3
|
||||||
%6 = load float, ptr addrspace(1) %5, align 4, !tbaa !15
|
%6 = load float, ptr addrspace(1) %5, align 4, !tbaa !16
|
||||||
%7 = getelementptr inbounds nuw float, ptr addrspace(1) %0, i64 %3
|
%7 = getelementptr inbounds float, ptr addrspace(1) %0, i64 %3
|
||||||
%8 = load float, ptr addrspace(1) %7, align 4, !tbaa !15
|
%8 = load float, ptr addrspace(1) %7, align 4, !tbaa !16
|
||||||
%9 = fadd float %8, %6
|
%9 = fadd float %8, %6
|
||||||
store float %9, ptr addrspace(1) %7, align 4, !tbaa !15
|
store float %9, ptr addrspace(1) %7, align 4, !tbaa !16
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write)
|
; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write)
|
||||||
declare void @llvm.assume(i1 noundef) #1
|
declare void @llvm.assume(i1 noundef) #1
|
||||||
|
|
||||||
attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: write) "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-entry-point" "sycl-module-id"="main.cpp" "sycl-optlevel"="2" }
|
declare dso_local spir_func i32 @_Z18__spirv_ocl_printfPU3AS2Kcz(ptr addrspace(2), ...)
|
||||||
|
|
||||||
|
attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: write) "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="main.cpp" "sycl-nd-range-kernel"="1" "sycl-optlevel"="2" "uniform-work-group-size"="true" }
|
||||||
attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
|
attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
|
||||||
|
|
||||||
!llvm.module.flags = !{!0, !1, !2}
|
!llvm.module.flags = !{!0, !1, !2}
|
||||||
!opencl.spir.version = !{!3}
|
!opencl.spir.version = !{!3}
|
||||||
!spirv.Source = !{!4}
|
!spirv.Source = !{!4}
|
||||||
!llvm.ident = !{!5}
|
!llvm.ident = !{!5}
|
||||||
!sycl-esimd-split-status = !{!6}
|
|
||||||
|
|
||||||
!0 = !{i32 1, !"wchar_size", i32 4}
|
!0 = !{i32 1, !"wchar_size", i32 4}
|
||||||
!1 = !{i32 1, !"sycl-device", i32 1}
|
!1 = !{i32 1, !"sycl-device", i32 1}
|
||||||
@@ -37,16 +38,17 @@ attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessib
|
|||||||
!3 = !{i32 1, i32 2}
|
!3 = !{i32 1, i32 2}
|
||||||
!4 = !{i32 4, i32 100000}
|
!4 = !{i32 4, i32 100000}
|
||||||
!5 = !{!"clang version 21.0.0git (https://github.com/intel/llvm d5f649b706f63b5c74e1929bc95db8de91085560)"}
|
!5 = !{!"clang version 21.0.0git (https://github.com/intel/llvm d5f649b706f63b5c74e1929bc95db8de91085560)"}
|
||||||
!6 = !{i8 0}
|
!6 = !{i32 -1, i32 -1}
|
||||||
!7 = !{}
|
!7 = !{}
|
||||||
!8 = !{!9, !11, !13}
|
!8 = !{i1 false, i1 false}
|
||||||
!9 = distinct !{!9, !10, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEE8initSizeEv: argument 0"}
|
!9 = !{!10, !12, !14}
|
||||||
!10 = distinct !{!10, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEE8initSizeEv"}
|
!10 = distinct !{!10, !11, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEE8initSizeEv: argument 0"}
|
||||||
!11 = distinct !{!11, !12, !"_ZN7__spirv22initGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEEET0_v: argument 0"}
|
!11 = distinct !{!11, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEE8initSizeEv"}
|
||||||
!12 = distinct !{!12, !"_ZN7__spirv22initGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEEET0_v"}
|
!12 = distinct !{!12, !13, !"_ZN7__spirv22initGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEEET0_v: argument 0"}
|
||||||
!13 = distinct !{!13, !14, !"_ZNK4sycl3_V17nd_itemILi1EE13get_global_idEv: argument 0"}
|
!13 = distinct !{!13, !"_ZN7__spirv22initGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEEET0_v"}
|
||||||
!14 = distinct !{!14, !"_ZNK4sycl3_V17nd_itemILi1EE13get_global_idEv"}
|
!14 = distinct !{!14, !15, !"_ZNK4sycl3_V17nd_itemILi1EE13get_global_idEv: argument 0"}
|
||||||
!15 = !{!16, !16, i64 0}
|
!15 = distinct !{!15, !"_ZNK4sycl3_V17nd_itemILi1EE13get_global_idEv"}
|
||||||
!16 = !{!"float", !17, i64 0}
|
!16 = !{!17, !17, i64 0}
|
||||||
!17 = !{!"omnipotent char", !18, i64 0}
|
!17 = !{!"float", !18, i64 0}
|
||||||
!18 = !{!"Simple C++ TBAA"}
|
!18 = !{!"omnipotent char", !19, i64 0}
|
||||||
|
!19 = !{!"Simple C++ TBAA"}
|
||||||
|
|||||||
Binary file not shown.
Reference in New Issue
Block a user