diff --git a/README.md b/README.md index 732c68e..1aab001 100644 --- a/README.md +++ b/README.md @@ -190,7 +190,8 @@ Contributions of all kinds are welcome. Some particularly impactful areas: ## License -This project is licensed under the [GNU Affero General Public License v3.0](LICENSE). +- This project is generally licensed under the [GNU Affero General Public License v3.0](LICENSE). +- The files in [gozel](gozel) folder follows their original license, which is [MIT](https://github.com/oneapi-src/level-zero/blob/master/LICENSE). --- diff --git a/cmd/gen/api.go b/cmd/gen/api.go index dd8c8d8..b2f328b 100644 --- a/cmd/gen/api.go +++ b/cmd/gen/api.go @@ -1,11 +1,14 @@ package main -import "os" +import ( + "os" + "path" +) var apif *os.File func init() { - f, err := os.Create("api.go") + f, err := os.Create(path.Join("gozel", "api.go")) if err != nil { panic(err) } diff --git a/cmd/gen/scan.go b/cmd/gen/scan.go index 81e7824..4e5bac9 100644 --- a/cmd/gen/scan.go +++ b/cmd/gen/scan.go @@ -6,6 +6,7 @@ import ( "fmt" "io" "os" + "path" "strconv" "strings" "unicode" @@ -134,7 +135,7 @@ func scanHeader(name string, scan *bufio.Scanner) { } fmt.Println(infh(name), "scanning region", region) k := fmt.Sprint(name, "_", region) - f, err := os.Create(fmt.Sprint(k, ".go")) + f, err := os.Create(path.Join("gozel", fmt.Sprint(k, ".go"))) if err != nil { panic(fmt.Sprintf("%s L%d: cannot create region %s, err: %v", name, ln, region, err)) } diff --git a/examples/vadd/main.cpp b/examples/vadd/main.cpp index 702557f..86a1055 100644 --- a/examples/vadd/main.cpp +++ b/examples/vadd/main.cpp @@ -3,7 +3,7 @@ extern "C" SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((sycl::ext::oneapi::experimental::nd_range_kernel<1>)) void vector_add(float* a, float* b) { auto item = sycl::ext::oneapi::this_work_item::get_nd_item<1>(); - int idx = item.get_global_id(0); + int idx = item.get_global_linear_id(); a[idx] += b[idx]; } diff --git a/examples/vadd/main.go b/examples/vadd/main.go index 7b9bda1..4ee0dd4 100644 --- a/examples/vadd/main.go +++ b/examples/vadd/main.go @@ -4,6 +4,7 @@ package main import ( _ "embed" "fmt" + "math" "math/rand" "os" "strconv" @@ -11,7 +12,7 @@ import ( "time" "unsafe" - "github.com/fumiama/gozel" + "github.com/fumiama/gozel/gozel" "github.com/fumiama/gozel/ze" ) @@ -69,8 +70,7 @@ func main() { fmt.Printf("%-28s (%d, %d, %d)\n", "Max Group Count (X, Y, Z):", cprop.Maxgroupcountx, cprop.Maxgroupcounty, cprop.Maxgroupcountz) fmt.Printf("%-28s %d\n", "Max Total Group Size:", cprop.Maxtotalgroupsize) fmt.Printf("%-28s %d\n", "Max Shared Local Memory:", cprop.Maxsharedlocalmemory) - fmt.Printf("%-28s %d\n", "Num Subgroup Sizes:", cprop.Numsubgroupsizes) - fmt.Printf("%-28s %v\n", "Subgroup Sizes:", cprop.Subgroupsizes[:]) + fmt.Printf("%-28s %v\n", "Subgroup Sizes:", cprop.Subgroupsizes[:cprop.Numsubgroupsizes]) var ( X, Y, Z = uintptr(cprop.Maxgroupsizex), uintptr(1), uintptr(1) @@ -84,7 +84,7 @@ func main() { fmt.Printf("%-28s %d\n", "Total Elements (N):", N) fmt.Printf("%-28s %d MiB\n", "Buffer Size:", bufsz/1024/1024) - q, err := ctx.CommandQueueCreate(dev) + q, err := ctx.CommandQueueCreate(dev, gozel.ZE_COMMAND_QUEUE_MODE_DEFAULT) if err != nil { panic(err) } @@ -135,11 +135,11 @@ func main() { } defer krn.Destroy() - err = krn.SetArgumentValue(0, unsafe.Sizeof(uintptr(0)), unsafe.Pointer(&dbufV1)) + err = krn.SetArgumentValue(0, &dbufV1) if err != nil { panic(err) } - err = krn.SetArgumentValue(1, unsafe.Sizeof(uintptr(0)), unsafe.Pointer(&dbufV2)) + err = krn.SetArgumentValue(1, &dbufV2) if err != nil { panic(err) } @@ -154,16 +154,16 @@ func main() { } defer lstpre.Destroy() - err = lstpre.AppendMemoryCopy(dbufV1, hbufV1, bufsz) + err = lstpre.AppendMemoryCopy(dbufV1, hbufV1, bufsz, 0) if err != nil { panic(err) } - err = lstpre.AppendMemoryCopy(dbufV2, hbufV2, bufsz) + err = lstpre.AppendMemoryCopy(dbufV2, hbufV2, bufsz, 0) if err != nil { panic(err) } - err = lstpre.AppendBarrier() + err = lstpre.AppendBarrier(0) if err != nil { panic(err) } @@ -181,12 +181,12 @@ func main() { err = lstcalc.AppendLaunchKernel(krn, &gozel.ZeGroupCount{ Groupcountx: uint32(groupCount), Groupcounty: 1, Groupcountz: 1, - }) + }, 0) if err != nil { panic(err) } - err = lstcalc.AppendBarrier() + err = lstcalc.AppendBarrier(0) if err != nil { panic(err) } @@ -202,7 +202,7 @@ func main() { } defer lstpost.Destroy() - err = lstpost.AppendMemoryCopy(hbufV1, dbufV1, bufsz) + err = lstpost.AppendMemoryCopy(hbufV1, dbufV1, bufsz, 0) if err != nil { panic(err) } @@ -217,7 +217,7 @@ func main() { if err != nil { panic(err) } - err = q.Synchronize() + err = q.Synchronize(math.MaxUint64) if err != nil { panic(err) } diff --git a/examples/vadd/main.ll b/examples/vadd/main.ll index e742496..96d403c 100644 --- a/examples/vadd/main.ll +++ b/examples/vadd/main.ll @@ -4,18 +4,21 @@ target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256: target triple = "spirv64-unknown-unknown" @__spirv_BuiltInGlobalInvocationId = external local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 +@__spirv_BuiltInGlobalOffset = external local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: write) define spir_kernel void @__sycl_kernel_vector_add(ptr addrspace(1) noundef align 4 captures(none) %0, ptr addrspace(1) noundef readonly align 4 captures(none) %1) local_unnamed_addr #0 !kernel_arg_buffer_location !6 !sycl_fixed_targets !7 !sycl_kernel_omit_args !8 { %3 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalInvocationId, align 32, !noalias !9 - %4 = icmp ult i64 %3, 2147483648 - tail call void @llvm.assume(i1 %4) - %5 = getelementptr inbounds float, ptr addrspace(1) %1, i64 %3 - %6 = load float, ptr addrspace(1) %5, align 4, !tbaa !16 - %7 = getelementptr inbounds float, ptr addrspace(1) %0, i64 %3 - %8 = load float, ptr addrspace(1) %7, align 4, !tbaa !16 - %9 = fadd float %8, %6 - store float %9, ptr addrspace(1) %7, align 4, !tbaa !16 + %4 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalOffset, align 32, !noalias !16 + %5 = sub i64 %3, %4 + %6 = icmp ult i64 %5, 2147483648 + tail call void @llvm.assume(i1 %6) + %7 = getelementptr inbounds float, ptr addrspace(1) %1, i64 %5 + %8 = load float, ptr addrspace(1) %7, align 4, !tbaa !23 + %9 = getelementptr inbounds float, ptr addrspace(1) %0, i64 %5 + %10 = load float, ptr addrspace(1) %9, align 4, !tbaa !23 + %11 = fadd float %10, %8 + store float %11, ptr addrspace(1) %9, align 4, !tbaa !23 ret void } @@ -48,7 +51,14 @@ attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessib !13 = distinct !{!13, !"_ZN7__spirv22initGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEEET0_v"} !14 = distinct !{!14, !15, !"_ZNK4sycl3_V17nd_itemILi1EE13get_global_idEv: argument 0"} !15 = distinct !{!15, !"_ZNK4sycl3_V17nd_itemILi1EE13get_global_idEv"} -!16 = !{!17, !17, i64 0} -!17 = !{!"float", !18, i64 0} -!18 = !{!"omnipotent char", !19, i64 0} -!19 = !{!"Simple C++ TBAA"} +!16 = !{!17, !19, !21} +!17 = distinct !{!17, !18, !"_ZN7__spirv23InitSizesSTGlobalOffsetILi1EN4sycl3_V12idILi1EEEE8initSizeEv: argument 0"} +!18 = distinct !{!18, !"_ZN7__spirv23InitSizesSTGlobalOffsetILi1EN4sycl3_V12idILi1EEEE8initSizeEv"} +!19 = distinct !{!19, !20, !"_ZN7__spirv16initGlobalOffsetILi1EN4sycl3_V12idILi1EEEEET0_v: argument 0"} +!20 = distinct !{!20, !"_ZN7__spirv16initGlobalOffsetILi1EN4sycl3_V12idILi1EEEEET0_v"} +!21 = distinct !{!21, !22, !"_ZNK4sycl3_V17nd_itemILi1EE10get_offsetEv: argument 0"} +!22 = distinct !{!22, !"_ZNK4sycl3_V17nd_itemILi1EE10get_offsetEv"} +!23 = !{!24, !24, i64 0} +!24 = !{!"float", !25, i64 0} +!25 = !{!"omnipotent char", !26, i64 0} +!26 = !{!"Simple C++ TBAA"} diff --git a/examples/vadd/main.spv b/examples/vadd/main.spv index 0adf1e1..7f901ad 100644 Binary files a/examples/vadd/main.spv and b/examples/vadd/main.spv differ diff --git a/examples/vadd_event/.gitignore b/examples/vadd_event/.gitignore new file mode 100644 index 0000000..2f777a5 --- /dev/null +++ b/examples/vadd_event/.gitignore @@ -0,0 +1 @@ +/device* diff --git a/examples/vadd_event/main.cpp b/examples/vadd_event/main.cpp new file mode 100644 index 0000000..86a1055 --- /dev/null +++ b/examples/vadd_event/main.cpp @@ -0,0 +1,9 @@ +#include + +extern "C" SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((sycl::ext::oneapi::experimental::nd_range_kernel<1>)) +void vector_add(float* a, float* b) { + auto item = sycl::ext::oneapi::this_work_item::get_nd_item<1>(); + int idx = item.get_global_linear_id(); + + a[idx] += b[idx]; +} diff --git a/examples/vadd_event/main.go b/examples/vadd_event/main.go new file mode 100644 index 0000000..b0a611d --- /dev/null +++ b/examples/vadd_event/main.go @@ -0,0 +1,231 @@ +// Package main demonstrates vector addition using the gozel Level Zero bindings. +package main + +import ( + _ "embed" + "fmt" + "math" + "math/rand" + "os" + "strconv" + "strings" + "time" + "unsafe" + + "github.com/fumiama/gozel/gozel" + "github.com/fumiama/gozel/ze" +) + +//go:generate clang++ -fsycl -fsycl-device-only -fsycl-targets=spirv64 -Xclang -emit-llvm-bc main.cpp -o device_kern.bc +//go:generate sycl-post-link -symbols -split=auto -o device_kern.table device_kern.bc +//go:generate clang++ -target spirv64-unknown-unknown -S -emit-llvm -x ir device_kern_0.bc -o device_kern.ll +//go:generate llvm-spirv -o main.spv device_kern.bc +//go:generate clang++ -target spirv64-unknown-unknown -S -emit-llvm -x ir device_kern.bc -o main.ll + +//go:embed main.spv +var kernelspv []byte + +func main() { + gpus, err := ze.InitGPUDrivers() + if err != nil { + panic(err) + } + if len(gpus) == 0 { + panic("no gpu available") + } + gpu := gpus[0] + + ctx, err := gpu.ContextCreate() + if err != nil { + panic(err) + } + + devs, err := gpu.DeviceGet() + if err != nil { + panic(err) + } + if len(devs) == 0 { + panic("no device available") + } + dev := devs[0] + + prop, err := dev.DeviceGetProperties() + if err != nil { + panic(err) + } + + fmt.Println("=============== Device Basic Properties ===============") + name, _, _ := strings.Cut(string(prop.Name[:]), "\x00") + fmt.Println( + "Running on device: ID =", prop.Deviceid, ", Name =", name, + "@", strconv.FormatFloat(float64(prop.Coreclockrate)/1024/1024/1024, 'f', 2, 64), "GHz.", + ) + + cprop, err := dev.DeviceGetComputeProperties() + if err != nil { + panic(err) + } + fmt.Println("=============== Device Compute Properties ===============") + fmt.Printf("%-28s (%d, %d, %d)\n", "Max Group Size (X, Y, Z):", cprop.Maxgroupsizex, cprop.Maxgroupsizey, cprop.Maxgroupsizez) + fmt.Printf("%-28s (%d, %d, %d)\n", "Max Group Count (X, Y, Z):", cprop.Maxgroupcountx, cprop.Maxgroupcounty, cprop.Maxgroupcountz) + fmt.Printf("%-28s %d\n", "Max Total Group Size:", cprop.Maxtotalgroupsize) + fmt.Printf("%-28s %d\n", "Max Shared Local Memory:", cprop.Maxsharedlocalmemory) + fmt.Printf("%-28s %d\n", "Num Subgroup Sizes:", cprop.Numsubgroupsizes) + fmt.Printf("%-28s %v\n", "Subgroup Sizes:", cprop.Subgroupsizes[:]) + + var ( + X, Y, Z = uintptr(cprop.Maxgroupsizex), uintptr(1), uintptr(1) + groupCount = uintptr(65536) + N = X * groupCount + bufsz = N * unsafe.Sizeof(float32(0)) + ) + fmt.Println("=============== Computation Configuration ===============") + fmt.Printf("%-28s (%d, %d, %d)\n", "Group Size (X, Y, Z):", X, Y, Z) + fmt.Printf("%-28s %d\n", "Group Count:", groupCount) + fmt.Printf("%-28s %d\n", "Total Elements (N):", N) + fmt.Printf("%-28s %d MiB\n", "Buffer Size:", bufsz/1024/1024) + + hbufV1, err := ctx.MemAllocHost(bufsz, 1) + if err != nil { + panic(err) + } + defer ctx.MemFree(hbufV1) + + hbufV2, err := ctx.MemAllocHost(bufsz, 1) + if err != nil { + panic(err) + } + defer ctx.MemFree(hbufV2) + + dbufV1, err := ctx.MemAllocDevice(dev, bufsz, 1) + if err != nil { + panic(err) + } + defer ctx.MemFree(dbufV1) + + dbufV2, err := ctx.MemAllocDevice(dev, bufsz, 1) + if err != nil { + panic(err) + } + defer ctx.MemFree(dbufV2) + + floatbuf := make([]float32, 2*N) + for i := range floatbuf { + floatbuf[i] = rand.Float32() + } + + zev1, zev2 := unsafe.Slice((*float32)(hbufV1), N), unsafe.Slice((*float32)(hbufV2), N) + copy(zev1, floatbuf[:N]) + copy(zev2, floatbuf[N:]) + + mod, err := ctx.ModuleCreate(dev, kernelspv) + if err != nil { + panic(err) + } + defer mod.Destroy() + + krn, err := mod.KernelCreate("__sycl_kernel_vector_add") + if err != nil { + panic(err) + } + defer krn.Destroy() + + err = krn.SetArgumentValue(0, &dbufV1) + if err != nil { + panic(err) + } + err = krn.SetArgumentValue(1, &dbufV2) + if err != nil { + panic(err) + } + err = krn.SetGroupSize(uint32(X), uint32(Y), uint32(Z)) + if err != nil { + panic(err) + } + + evp, err := ctx.EventPoolCreate(3, dev) + if err != nil { + panic(err) + } + defer evp.Destroy() + + evcph2dv1, err := evp.EventCreate(0, gozel.ZE_EVENT_SCOPE_FLAG_HOST, 0) + if err != nil { + panic(err) + } + defer evcph2dv1.Destroy() + evcph2dv2, err := evp.EventCreate(1, gozel.ZE_EVENT_SCOPE_FLAG_HOST, 0) + if err != nil { + panic(err) + } + defer evcph2dv2.Destroy() + + start := time.Now() + + lst, err := ctx.CommandListCreateImmediate(dev, gozel.ZE_COMMAND_QUEUE_MODE_DEFAULT) + if err != nil { + panic(err) + } + err = lst.AppendMemoryCopy(dbufV1, hbufV1, bufsz, evcph2dv1) + if err != nil { + panic(err) + } + err = lst.AppendMemoryCopy(dbufV2, hbufV2, bufsz, evcph2dv2) + if err != nil { + panic(err) + } + + evk, err := evp.EventCreate(2, gozel.ZE_EVENT_SCOPE_FLAG_HOST, 0) + if err != nil { + panic(err) + } + defer evk.Destroy() + + err = lst.AppendLaunchKernel(krn, &gozel.ZeGroupCount{ + Groupcountx: uint32(groupCount), Groupcounty: 1, Groupcountz: 1, + }, evk, evcph2dv1, evcph2dv2) + if err != nil { + panic(err) + } + + err = lst.AppendMemoryCopy(hbufV1, dbufV1, bufsz, 0, evk) + if err != nil { + panic(err) + } + err = lst.HostSynchronize(math.MaxUint64) + if err != nil { + panic(err) + } + + elapsed := time.Since(start) + + fmt.Println("=============== Calculation Results ===============") + fmt.Printf("%-28s %.6f ms\n", "GPU Execution Time:", elapsed.Seconds()*1000) + fmt.Printf("%-28s %.2f GiB/s\n", "GPU Throughput:", float64(bufsz)/elapsed.Seconds()/1e9) + + tmpbuf := make([]float32, N) + start = time.Now() + for i := range N { + tmpbuf[i] = floatbuf[i] + floatbuf[N+i] + } + elapsed = time.Since(start) + + fmt.Println("=============== Validation Results ===============") + fmt.Printf("%-28s %.6f ms\n", "CPU Execution Time:", elapsed.Seconds()*1000) + fmt.Printf("%-28s %.2f GiB/s\n", "CPU Throughput:", float64(bufsz)/elapsed.Seconds()/1e9) + + fail := false + for i := range N { + expect := floatbuf[i] + floatbuf[N+i] + if zev1[i] != expect { + fail = true + fmt.Printf("[%05d] expect %f = %f + %f, got %f.\n", i, expect, floatbuf[i], floatbuf[N+i], zev1[i]) + } + } + + if fail { + os.Exit(1) + } + + fmt.Println("Test Passed!!!") +} diff --git a/examples/vadd_event/main.ll b/examples/vadd_event/main.ll new file mode 100644 index 0000000..96d403c --- /dev/null +++ b/examples/vadd_event/main.ll @@ -0,0 +1,64 @@ +; ModuleID = 'device_kern.bc' +source_filename = "main.cpp" +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64-G1" +target triple = "spirv64-unknown-unknown" + +@__spirv_BuiltInGlobalInvocationId = external local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 +@__spirv_BuiltInGlobalOffset = external local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 + +; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: write) +define spir_kernel void @__sycl_kernel_vector_add(ptr addrspace(1) noundef align 4 captures(none) %0, ptr addrspace(1) noundef readonly align 4 captures(none) %1) local_unnamed_addr #0 !kernel_arg_buffer_location !6 !sycl_fixed_targets !7 !sycl_kernel_omit_args !8 { + %3 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalInvocationId, align 32, !noalias !9 + %4 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalOffset, align 32, !noalias !16 + %5 = sub i64 %3, %4 + %6 = icmp ult i64 %5, 2147483648 + tail call void @llvm.assume(i1 %6) + %7 = getelementptr inbounds float, ptr addrspace(1) %1, i64 %5 + %8 = load float, ptr addrspace(1) %7, align 4, !tbaa !23 + %9 = getelementptr inbounds float, ptr addrspace(1) %0, i64 %5 + %10 = load float, ptr addrspace(1) %9, align 4, !tbaa !23 + %11 = fadd float %10, %8 + store float %11, ptr addrspace(1) %9, align 4, !tbaa !23 + ret void +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) +declare void @llvm.assume(i1 noundef) #1 + +declare dso_local spir_func i32 @_Z18__spirv_ocl_printfPU3AS2Kcz(ptr addrspace(2), ...) + +attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: write) "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="main.cpp" "sycl-nd-range-kernel"="1" "sycl-optlevel"="2" "uniform-work-group-size"="true" } +attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) } + +!llvm.module.flags = !{!0, !1, !2} +!opencl.spir.version = !{!3} +!spirv.Source = !{!4} +!llvm.ident = !{!5} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 1, !"sycl-device", i32 1} +!2 = !{i32 7, !"frame-pointer", i32 2} +!3 = !{i32 1, i32 2} +!4 = !{i32 4, i32 100000} +!5 = !{!"clang version 21.0.0git (https://github.com/intel/llvm d5f649b706f63b5c74e1929bc95db8de91085560)"} +!6 = !{i32 -1, i32 -1} +!7 = !{} +!8 = !{i1 false, i1 false} +!9 = !{!10, !12, !14} +!10 = distinct !{!10, !11, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEE8initSizeEv: argument 0"} +!11 = distinct !{!11, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEE8initSizeEv"} +!12 = distinct !{!12, !13, !"_ZN7__spirv22initGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEEET0_v: argument 0"} +!13 = distinct !{!13, !"_ZN7__spirv22initGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEEET0_v"} +!14 = distinct !{!14, !15, !"_ZNK4sycl3_V17nd_itemILi1EE13get_global_idEv: argument 0"} +!15 = distinct !{!15, !"_ZNK4sycl3_V17nd_itemILi1EE13get_global_idEv"} +!16 = !{!17, !19, !21} +!17 = distinct !{!17, !18, !"_ZN7__spirv23InitSizesSTGlobalOffsetILi1EN4sycl3_V12idILi1EEEE8initSizeEv: argument 0"} +!18 = distinct !{!18, !"_ZN7__spirv23InitSizesSTGlobalOffsetILi1EN4sycl3_V12idILi1EEEE8initSizeEv"} +!19 = distinct !{!19, !20, !"_ZN7__spirv16initGlobalOffsetILi1EN4sycl3_V12idILi1EEEEET0_v: argument 0"} +!20 = distinct !{!20, !"_ZN7__spirv16initGlobalOffsetILi1EN4sycl3_V12idILi1EEEEET0_v"} +!21 = distinct !{!21, !22, !"_ZNK4sycl3_V17nd_itemILi1EE10get_offsetEv: argument 0"} +!22 = distinct !{!22, !"_ZNK4sycl3_V17nd_itemILi1EE10get_offsetEv"} +!23 = !{!24, !24, i64 0} +!24 = !{!"float", !25, i64 0} +!25 = !{!"omnipotent char", !26, i64 0} +!26 = !{!"Simple C++ TBAA"} diff --git a/examples/vadd_event/main.spv b/examples/vadd_event/main.spv new file mode 100644 index 0000000..7f901ad Binary files /dev/null and b/examples/vadd_event/main.spv differ diff --git a/api.go b/gozel/api.go similarity index 100% rename from api.go rename to gozel/api.go diff --git a/core_CacheLineSize.go b/gozel/core_CacheLineSize.go similarity index 100% rename from core_CacheLineSize.go rename to gozel/core_CacheLineSize.go diff --git a/core_EUCount.go b/gozel/core_EUCount.go similarity index 100% rename from core_EUCount.go rename to gozel/core_EUCount.go diff --git a/core_PCIProperties.go b/gozel/core_PCIProperties.go similarity index 100% rename from core_PCIProperties.go rename to gozel/core_PCIProperties.go diff --git a/core_RTAS.go b/gozel/core_RTAS.go similarity index 100% rename from core_RTAS.go rename to gozel/core_RTAS.go diff --git a/core_RTASBuilder.go b/gozel/core_RTASBuilder.go similarity index 100% rename from core_RTASBuilder.go rename to gozel/core_RTASBuilder.go diff --git a/core_SRGB.go b/gozel/core_SRGB.go similarity index 100% rename from core_SRGB.go rename to gozel/core_SRGB.go diff --git a/core_bandwidth.go b/gozel/core_bandwidth.go similarity index 100% rename from core_bandwidth.go rename to gozel/core_bandwidth.go diff --git a/core_barrier.go b/gozel/core_barrier.go similarity index 100% rename from core_barrier.go rename to gozel/core_barrier.go diff --git a/core_bfloat16conversions.go b/gozel/core_bfloat16conversions.go similarity index 100% rename from core_bfloat16conversions.go rename to gozel/core_bfloat16conversions.go diff --git a/core_bindlessimages.go b/gozel/core_bindlessimages.go similarity index 100% rename from core_bindlessimages.go rename to gozel/core_bindlessimages.go diff --git a/core_cacheReservation.go b/gozel/core_cacheReservation.go similarity index 100% rename from core_cacheReservation.go rename to gozel/core_cacheReservation.go diff --git a/core_callbacks.go b/gozel/core_callbacks.go similarity index 100% rename from core_callbacks.go rename to gozel/core_callbacks.go diff --git a/core_cmdlist.go b/gozel/core_cmdlist.go similarity index 100% rename from core_cmdlist.go rename to gozel/core_cmdlist.go diff --git a/core_cmdqueue.go b/gozel/core_cmdqueue.go similarity index 100% rename from core_cmdqueue.go rename to gozel/core_cmdqueue.go diff --git a/core_commandListClone.go b/gozel/core_commandListClone.go similarity index 100% rename from core_commandListClone.go rename to gozel/core_commandListClone.go diff --git a/core_common.go b/gozel/core_common.go similarity index 100% rename from core_common.go rename to gozel/core_common.go diff --git a/core_context.go b/gozel/core_context.go similarity index 100% rename from core_context.go rename to gozel/core_context.go diff --git a/core_copy.go b/gozel/core_copy.go similarity index 100% rename from core_copy.go rename to gozel/core_copy.go diff --git a/core_counterbasedeventpool.go b/gozel/core_counterbasedeventpool.go similarity index 100% rename from core_counterbasedeventpool.go rename to gozel/core_counterbasedeventpool.go diff --git a/core_device.go b/gozel/core_device.go similarity index 100% rename from core_device.go rename to gozel/core_device.go diff --git a/core_deviceLUID.go b/gozel/core_deviceLUID.go similarity index 100% rename from core_deviceLUID.go rename to gozel/core_deviceLUID.go diff --git a/core_deviceVectorSizes.go b/gozel/core_deviceVectorSizes.go similarity index 100% rename from core_deviceVectorSizes.go rename to gozel/core_deviceVectorSizes.go diff --git a/core_deviceipversion.go b/gozel/core_deviceipversion.go similarity index 100% rename from core_deviceipversion.go rename to gozel/core_deviceipversion.go diff --git a/core_deviceusablememproperties.go b/gozel/core_deviceusablememproperties.go similarity index 100% rename from core_deviceusablememproperties.go rename to gozel/core_deviceusablememproperties.go diff --git a/core_driver.go b/gozel/core_driver.go similarity index 100% rename from core_driver.go rename to gozel/core_driver.go diff --git a/core_driverDDIHandles.go b/gozel/core_driverDDIHandles.go similarity index 100% rename from core_driverDDIHandles.go rename to gozel/core_driverDDIHandles.go diff --git a/core_event.go b/gozel/core_event.go similarity index 100% rename from core_event.go rename to gozel/core_event.go diff --git a/core_eventQueryKernelTimestamps.go b/gozel/core_eventQueryKernelTimestamps.go similarity index 100% rename from core_eventQueryKernelTimestamps.go rename to gozel/core_eventQueryKernelTimestamps.go diff --git a/core_eventquerytimestamps.go b/gozel/core_eventquerytimestamps.go similarity index 100% rename from core_eventquerytimestamps.go rename to gozel/core_eventquerytimestamps.go diff --git a/core_externalMemMap.go b/gozel/core_externalMemMap.go similarity index 100% rename from core_externalMemMap.go rename to gozel/core_externalMemMap.go diff --git a/core_externalSemaphores.go b/gozel/core_externalSemaphores.go similarity index 100% rename from core_externalSemaphores.go rename to gozel/core_externalSemaphores.go diff --git a/core_fabric.go b/gozel/core_fabric.go similarity index 100% rename from core_fabric.go rename to gozel/core_fabric.go diff --git a/core_fence.go b/gozel/core_fence.go similarity index 100% rename from core_fence.go rename to gozel/core_fence.go diff --git a/core_floatAtomics.go b/gozel/core_floatAtomics.go similarity index 100% rename from core_floatAtomics.go rename to gozel/core_floatAtomics.go diff --git a/core_globaloffset.go b/gozel/core_globaloffset.go similarity index 100% rename from core_globaloffset.go rename to gozel/core_globaloffset.go diff --git a/core_image.go b/gozel/core_image.go similarity index 100% rename from core_image.go rename to gozel/core_image.go diff --git a/core_imageCopy.go b/gozel/core_imageCopy.go similarity index 100% rename from core_imageCopy.go rename to gozel/core_imageCopy.go diff --git a/core_imageFormatSupport.go b/gozel/core_imageFormatSupport.go similarity index 100% rename from core_imageFormatSupport.go rename to gozel/core_imageFormatSupport.go diff --git a/core_imageQueryAllocProperties.go b/gozel/core_imageQueryAllocProperties.go similarity index 100% rename from core_imageQueryAllocProperties.go rename to gozel/core_imageQueryAllocProperties.go diff --git a/core_imagememoryproperties.go b/gozel/core_imagememoryproperties.go similarity index 100% rename from core_imagememoryproperties.go rename to gozel/core_imagememoryproperties.go diff --git a/core_imageview.go b/gozel/core_imageview.go similarity index 100% rename from core_imageview.go rename to gozel/core_imageview.go diff --git a/core_imageviewplanar.go b/gozel/core_imageviewplanar.go similarity index 100% rename from core_imageviewplanar.go rename to gozel/core_imageviewplanar.go diff --git a/core_immediateCommandListAppend.go b/gozel/core_immediateCommandListAppend.go similarity index 100% rename from core_immediateCommandListAppend.go rename to gozel/core_immediateCommandListAppend.go diff --git a/core_ipcMemHandleType.go b/gozel/core_ipcMemHandleType.go similarity index 100% rename from core_ipcMemHandleType.go rename to gozel/core_ipcMemHandleType.go diff --git a/core_kernelAllocationProperties.go b/gozel/core_kernelAllocationProperties.go similarity index 100% rename from core_kernelAllocationProperties.go rename to gozel/core_kernelAllocationProperties.go diff --git a/core_kernelBinary.go b/gozel/core_kernelBinary.go similarity index 100% rename from core_kernelBinary.go rename to gozel/core_kernelBinary.go diff --git a/core_kernelMaxGroupSizeProperties.go b/gozel/core_kernelMaxGroupSizeProperties.go similarity index 100% rename from core_kernelMaxGroupSizeProperties.go rename to gozel/core_kernelMaxGroupSizeProperties.go diff --git a/core_kernelSchedulingHints.go b/gozel/core_kernelSchedulingHints.go similarity index 100% rename from core_kernelSchedulingHints.go rename to gozel/core_kernelSchedulingHints.go diff --git a/core_linkageInspection.go b/gozel/core_linkageInspection.go similarity index 100% rename from core_linkageInspection.go rename to gozel/core_linkageInspection.go diff --git a/core_linkonceodr.go b/gozel/core_linkonceodr.go similarity index 100% rename from core_linkonceodr.go rename to gozel/core_linkonceodr.go diff --git a/core_memory.go b/gozel/core_memory.go similarity index 100% rename from core_memory.go rename to gozel/core_memory.go diff --git a/core_memoryCompressionHints.go b/gozel/core_memoryCompressionHints.go similarity index 100% rename from core_memoryCompressionHints.go rename to gozel/core_memoryCompressionHints.go diff --git a/core_memoryFreePolicies.go b/gozel/core_memoryFreePolicies.go similarity index 100% rename from core_memoryFreePolicies.go rename to gozel/core_memoryFreePolicies.go diff --git a/core_memoryProperties.go b/gozel/core_memoryProperties.go similarity index 100% rename from core_memoryProperties.go rename to gozel/core_memoryProperties.go diff --git a/core_module.go b/gozel/core_module.go similarity index 100% rename from core_module.go rename to gozel/core_module.go diff --git a/core_mutableCommandList.go b/gozel/core_mutableCommandList.go similarity index 100% rename from core_mutableCommandList.go rename to gozel/core_mutableCommandList.go diff --git a/core_powersavinghint.go b/gozel/core_powersavinghint.go similarity index 100% rename from core_powersavinghint.go rename to gozel/core_powersavinghint.go diff --git a/core_program.go b/gozel/core_program.go similarity index 100% rename from core_program.go rename to gozel/core_program.go diff --git a/core_raytracing.go b/gozel/core_raytracing.go similarity index 100% rename from core_raytracing.go rename to gozel/core_raytracing.go diff --git a/core_relaxedAllocLimits.go b/gozel/core_relaxedAllocLimits.go similarity index 100% rename from core_relaxedAllocLimits.go rename to gozel/core_relaxedAllocLimits.go diff --git a/core_residency.go b/gozel/core_residency.go similarity index 100% rename from core_residency.go rename to gozel/core_residency.go diff --git a/core_sampler.go b/gozel/core_sampler.go similarity index 100% rename from core_sampler.go rename to gozel/core_sampler.go diff --git a/core_subAllocationsProperties.go b/gozel/core_subAllocationsProperties.go similarity index 100% rename from core_subAllocationsProperties.go rename to gozel/core_subAllocationsProperties.go diff --git a/core_subgroups.go b/gozel/core_subgroups.go similarity index 100% rename from core_subgroups.go rename to gozel/core_subgroups.go diff --git a/core_virtual.go b/gozel/core_virtual.go similarity index 100% rename from core_virtual.go rename to gozel/core_virtual.go diff --git a/rntm_common.go b/gozel/rntm_common.go similarity index 100% rename from rntm_common.go rename to gozel/rntm_common.go diff --git a/sysm_Overclock.go b/gozel/sysm_Overclock.go similarity index 100% rename from sysm_Overclock.go rename to gozel/sysm_Overclock.go diff --git a/sysm_common.go b/gozel/sysm_common.go similarity index 100% rename from sysm_common.go rename to gozel/sysm_common.go diff --git a/sysm_device.go b/gozel/sysm_device.go similarity index 100% rename from sysm_device.go rename to gozel/sysm_device.go diff --git a/sysm_diagnostics.go b/gozel/sysm_diagnostics.go similarity index 100% rename from sysm_diagnostics.go rename to gozel/sysm_diagnostics.go diff --git a/sysm_driver.go b/gozel/sysm_driver.go similarity index 100% rename from sysm_driver.go rename to gozel/sysm_driver.go diff --git a/sysm_ecc.go b/gozel/sysm_ecc.go similarity index 100% rename from sysm_ecc.go rename to gozel/sysm_ecc.go diff --git a/sysm_eccState.go b/gozel/sysm_eccState.go similarity index 100% rename from sysm_eccState.go rename to gozel/sysm_eccState.go diff --git a/sysm_engine.go b/gozel/sysm_engine.go similarity index 100% rename from sysm_engine.go rename to gozel/sysm_engine.go diff --git a/sysm_engineActivity.go b/gozel/sysm_engineActivity.go similarity index 100% rename from sysm_engineActivity.go rename to gozel/sysm_engineActivity.go diff --git a/sysm_events.go b/gozel/sysm_events.go similarity index 100% rename from sysm_events.go rename to gozel/sysm_events.go diff --git a/sysm_fabric.go b/gozel/sysm_fabric.go similarity index 100% rename from sysm_fabric.go rename to gozel/sysm_fabric.go diff --git a/sysm_fan.go b/gozel/sysm_fan.go similarity index 100% rename from sysm_fan.go rename to gozel/sysm_fan.go diff --git a/sysm_firmware.go b/gozel/sysm_firmware.go similarity index 100% rename from sysm_firmware.go rename to gozel/sysm_firmware.go diff --git a/sysm_firmwareSecurityVersion.go b/gozel/sysm_firmwareSecurityVersion.go similarity index 100% rename from sysm_firmwareSecurityVersion.go rename to gozel/sysm_firmwareSecurityVersion.go diff --git a/sysm_frequency.go b/gozel/sysm_frequency.go similarity index 100% rename from sysm_frequency.go rename to gozel/sysm_frequency.go diff --git a/sysm_led.go b/gozel/sysm_led.go similarity index 100% rename from sysm_led.go rename to gozel/sysm_led.go diff --git a/sysm_memPageOfflineState.go b/gozel/sysm_memPageOfflineState.go similarity index 100% rename from sysm_memPageOfflineState.go rename to gozel/sysm_memPageOfflineState.go diff --git a/sysm_memory.go b/gozel/sysm_memory.go similarity index 100% rename from sysm_memory.go rename to gozel/sysm_memory.go diff --git a/sysm_memoryBwCounterValidBits.go b/gozel/sysm_memoryBwCounterValidBits.go similarity index 100% rename from sysm_memoryBwCounterValidBits.go rename to gozel/sysm_memoryBwCounterValidBits.go diff --git a/sysm_pciLinkSpeedDowngrade.go b/gozel/sysm_pciLinkSpeedDowngrade.go similarity index 100% rename from sysm_pciLinkSpeedDowngrade.go rename to gozel/sysm_pciLinkSpeedDowngrade.go diff --git a/sysm_performance.go b/gozel/sysm_performance.go similarity index 100% rename from sysm_performance.go rename to gozel/sysm_performance.go diff --git a/sysm_power.go b/gozel/sysm_power.go similarity index 100% rename from sysm_power.go rename to gozel/sysm_power.go diff --git a/sysm_powerDomainProperties.go b/gozel/sysm_powerDomainProperties.go similarity index 100% rename from sysm_powerDomainProperties.go rename to gozel/sysm_powerDomainProperties.go diff --git a/sysm_powerLimits.go b/gozel/sysm_powerLimits.go similarity index 100% rename from sysm_powerLimits.go rename to gozel/sysm_powerLimits.go diff --git a/sysm_psu.go b/gozel/sysm_psu.go similarity index 100% rename from sysm_psu.go rename to gozel/sysm_psu.go diff --git a/sysm_ras.go b/gozel/sysm_ras.go similarity index 100% rename from sysm_ras.go rename to gozel/sysm_ras.go diff --git a/sysm_rasState.go b/gozel/sysm_rasState.go similarity index 100% rename from sysm_rasState.go rename to gozel/sysm_rasState.go diff --git a/sysm_scheduler.go b/gozel/sysm_scheduler.go similarity index 100% rename from sysm_scheduler.go rename to gozel/sysm_scheduler.go diff --git a/sysm_standby.go b/gozel/sysm_standby.go similarity index 100% rename from sysm_standby.go rename to gozel/sysm_standby.go diff --git a/sysm_sysmanDeviceMapping.go b/gozel/sysm_sysmanDeviceMapping.go similarity index 100% rename from sysm_sysmanDeviceMapping.go rename to gozel/sysm_sysmanDeviceMapping.go diff --git a/sysm_temperature.go b/gozel/sysm_temperature.go similarity index 100% rename from sysm_temperature.go rename to gozel/sysm_temperature.go diff --git a/sysm_virtualFunctionManagement.go b/gozel/sysm_virtualFunctionManagement.go similarity index 100% rename from sysm_virtualFunctionManagement.go rename to gozel/sysm_virtualFunctionManagement.go diff --git a/tols_GlobalTimestamps.go b/gozel/tols_GlobalTimestamps.go similarity index 100% rename from tols_GlobalTimestamps.go rename to gozel/tols_GlobalTimestamps.go diff --git a/tols_common.go b/gozel/tols_common.go similarity index 100% rename from tols_common.go rename to gozel/tols_common.go diff --git a/tols_concurrentMetricGroup.go b/gozel/tols_concurrentMetricGroup.go similarity index 100% rename from tols_concurrentMetricGroup.go rename to gozel/tols_concurrentMetricGroup.go diff --git a/tols_debug.go b/gozel/tols_debug.go similarity index 100% rename from tols_debug.go rename to gozel/tols_debug.go diff --git a/tols_metric.go b/gozel/tols_metric.go similarity index 100% rename from tols_metric.go rename to gozel/tols_metric.go diff --git a/tols_metricExportData.go b/gozel/tols_metricExportData.go similarity index 100% rename from tols_metricExportData.go rename to gozel/tols_metricExportData.go diff --git a/tols_metricExportMemory.go b/gozel/tols_metricExportMemory.go similarity index 100% rename from tols_metricExportMemory.go rename to gozel/tols_metricExportMemory.go diff --git a/tols_metricGroupMarker.go b/gozel/tols_metricGroupMarker.go similarity index 100% rename from tols_metricGroupMarker.go rename to gozel/tols_metricGroupMarker.go diff --git a/tols_metricProgrammable.go b/gozel/tols_metricProgrammable.go similarity index 100% rename from tols_metricProgrammable.go rename to gozel/tols_metricProgrammable.go diff --git a/tols_metricRuntimeEnableDisable.go b/gozel/tols_metricRuntimeEnableDisable.go similarity index 100% rename from tols_metricRuntimeEnableDisable.go rename to gozel/tols_metricRuntimeEnableDisable.go diff --git a/tols_metricTracer.go b/gozel/tols_metricTracer.go similarity index 100% rename from tols_metricTracer.go rename to gozel/tols_metricTracer.go diff --git a/tols_module.go b/gozel/tols_module.go similarity index 100% rename from tols_module.go rename to gozel/tols_module.go diff --git a/tols_multiMetricValues.go b/gozel/tols_multiMetricValues.go similarity index 100% rename from tols_multiMetricValues.go rename to gozel/tols_multiMetricValues.go diff --git a/tols_pin.go b/gozel/tols_pin.go similarity index 100% rename from tols_pin.go rename to gozel/tols_pin.go diff --git a/tols_tracing.go b/gozel/tols_tracing.go similarity index 100% rename from tols_tracing.go rename to gozel/tols_tracing.go diff --git a/ze/command.go b/ze/command.go index 77d3f62..a916e29 100644 --- a/ze/command.go +++ b/ze/command.go @@ -2,24 +2,23 @@ package ze import ( - "math" "runtime" "unsafe" - "github.com/fumiama/gozel" + "github.com/fumiama/gozel/gozel" ) // CommandQueueHandle is a handle to a Level Zero command queue. type CommandQueueHandle gozel.ZeCommandQueueHandle // CommandQueueCreate creates a command queue on the given device with default mode and normal priority. -func (h ContextHandle) CommandQueueCreate(hDevice DeviceHandle) ( +func (h ContextHandle) CommandQueueCreate(hDevice DeviceHandle, mode gozel.ZeCommandQueueMode) ( CommandQueueHandle, error, ) { var q gozel.ZeCommandQueueHandle _, err := gozel.ZeCommandQueueCreate(gozel.ZeContextHandle(h), gozel.ZeDeviceHandle(hDevice), &gozel.ZeCommandQueueDesc{ Stype: gozel.ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC, - Mode: gozel.ZE_COMMAND_QUEUE_MODE_DEFAULT, + Mode: mode, Priority: gozel.ZE_COMMAND_QUEUE_PRIORITY_NORMAL, }, &q) return CommandQueueHandle(q), err @@ -36,8 +35,8 @@ func (h CommandQueueHandle) ExecuteCommandLists(hCommandList ...CommandListHandl } // Synchronize blocks the host until all commands in the command queue have completed. -func (h CommandQueueHandle) Synchronize() error { - _, err := gozel.ZeCommandQueueSynchronize(gozel.ZeCommandQueueHandle(h), math.MaxUint64) +func (h CommandQueueHandle) Synchronize(timeout uint64) error { + _, err := gozel.ZeCommandQueueSynchronize(gozel.ZeCommandQueueHandle(h), timeout) return err } @@ -61,11 +60,30 @@ func (h ContextHandle) CommandListCreate(hDevice DeviceHandle) ( return CommandListHandle(cl), err } +// CommandListCreateImmediate creates a command list on the given device, also creates an implicit command queue. +func (h ContextHandle) CommandListCreateImmediate(hDevice DeviceHandle, mode gozel.ZeCommandQueueMode) ( + CommandListHandle, error, +) { + var cl gozel.ZeCommandListHandle + _, err := gozel.ZeCommandListCreateImmediate(gozel.ZeContextHandle(h), gozel.ZeDeviceHandle(hDevice), &gozel.ZeCommandQueueDesc{ + Stype: gozel.ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC, + Mode: mode, + Priority: gozel.ZE_COMMAND_QUEUE_PRIORITY_NORMAL, + }, &cl) + return CommandListHandle(cl), err +} + // AppendLaunchKernel appends a kernel launch command to the command list. func (h CommandListHandle) AppendLaunchKernel( hKernel KernelHandle, pLaunchFuncArgs *gozel.ZeGroupCount, + hSignalEvent EventHandle, waitEvents ...EventHandle, ) error { - _, err := gozel.ZeCommandListAppendLaunchKernel(gozel.ZeCommandListHandle(h), gozel.ZeKernelHandle(hKernel), pLaunchFuncArgs, 0, 0, nil) + _, err := gozel.ZeCommandListAppendLaunchKernel( + gozel.ZeCommandListHandle(h), gozel.ZeKernelHandle(hKernel), + pLaunchFuncArgs, gozel.ZeEventHandle(hSignalEvent), uint32(len(waitEvents)), + (*gozel.ZeEventHandle)(unsafe.SliceData(waitEvents)), + ) + runtime.KeepAlive(waitEvents) return err } @@ -73,9 +91,15 @@ func (h CommandListHandle) AppendLaunchKernel( func (h CommandListHandle) AppendLaunchKernelWithArguments( hKernel KernelHandle, groupCounts *gozel.ZeGroupCount, groupSizes *gozel.ZeGroupSize, pArguments *unsafe.Pointer, + hSignalEvent EventHandle, waitEvents ...EventHandle, ) error { _, err := gozel.ZeCommandListAppendLaunchKernelWithArguments( - gozel.ZeCommandListHandle(h), gozel.ZeKernelHandle(hKernel), groupCounts, groupSizes, pArguments, nil, 0, 0, nil) + gozel.ZeCommandListHandle(h), gozel.ZeKernelHandle(hKernel), + groupCounts, groupSizes, pArguments, + nil, gozel.ZeEventHandle(hSignalEvent), uint32(len(waitEvents)), + (*gozel.ZeEventHandle)(unsafe.SliceData(waitEvents)), + ) + runtime.KeepAlive(waitEvents) return err } @@ -88,8 +112,14 @@ func (h CommandListHandle) Close() error { // AppendMemoryCopy appends a memory copy command from srcptr to dstptr of the given size. func (h CommandListHandle) AppendMemoryCopy( dstptr unsafe.Pointer, srcptr unsafe.Pointer, size uintptr, + hSignalEvent EventHandle, waitEvents ...EventHandle, ) error { - _, err := gozel.ZeCommandListAppendMemoryCopy(gozel.ZeCommandListHandle(h), dstptr, srcptr, size, 0, 0, nil) + _, err := gozel.ZeCommandListAppendMemoryCopy( + gozel.ZeCommandListHandle(h), dstptr, srcptr, size, + gozel.ZeEventHandle(hSignalEvent), uint32(len(waitEvents)), + (*gozel.ZeEventHandle)(unsafe.SliceData(waitEvents)), + ) + runtime.KeepAlive(waitEvents) return err } @@ -100,7 +130,21 @@ func (h CommandListHandle) Destroy() error { } // AppendBarrier appends an execution barrier to the command list. -func (h CommandListHandle) AppendBarrier() error { - _, err := gozel.ZeCommandListAppendBarrier(gozel.ZeCommandListHandle(h), 0, 0, nil) +func (h CommandListHandle) AppendBarrier( + hSignalEvent EventHandle, waitEvents ...EventHandle, +) error { + _, err := gozel.ZeCommandListAppendBarrier( + gozel.ZeCommandListHandle(h), + gozel.ZeEventHandle(hSignalEvent), uint32(len(waitEvents)), + (*gozel.ZeEventHandle)(unsafe.SliceData(waitEvents)), + ) + runtime.KeepAlive(waitEvents) + return err +} + +// HostSynchronize Synchronizes an immediate command list by waiting on the host for the +// completion of all commands previously submitted to it. +func (h CommandListHandle) HostSynchronize(timeout uint64) error { + _, err := gozel.ZeCommandListHostSynchronize(gozel.ZeCommandListHandle(h), timeout) return err } diff --git a/ze/context.go b/ze/context.go index e151e36..d8af198 100644 --- a/ze/context.go +++ b/ze/context.go @@ -1,6 +1,6 @@ package ze -import "github.com/fumiama/gozel" +import "github.com/fumiama/gozel/gozel" // ContextHandle is a handle to a Level Zero context. type ContextHandle gozel.ZeContextHandle diff --git a/ze/device.go b/ze/device.go index c34a431..480ee05 100644 --- a/ze/device.go +++ b/ze/device.go @@ -1,6 +1,6 @@ package ze -import "github.com/fumiama/gozel" +import "github.com/fumiama/gozel/gozel" // DeviceHandle is a handle to a Level Zero driver's device object. type DeviceHandle gozel.ZeDeviceHandle diff --git a/ze/event.go b/ze/event.go new file mode 100644 index 0000000..00c7a05 --- /dev/null +++ b/ze/event.go @@ -0,0 +1,60 @@ +package ze + +import ( + "runtime" + "unsafe" + + "github.com/fumiama/gozel/gozel" +) + +// EventPoolHandle (ze_event_pool_handle_t) Handle of driver's event pool object +type EventPoolHandle gozel.ZeEventPoolHandle + +// EventPoolCreate Creates a pool of events on the context. +func (h ContextHandle) EventPoolCreate( + evcount uint32, devices ...DeviceHandle, +) (eph EventPoolHandle, err error) { + _, err = gozel.ZeEventPoolCreate(gozel.ZeContextHandle(h), &gozel.ZeEventPoolDesc{ + Stype: gozel.ZE_STRUCTURE_TYPE_EVENT_POOL_DESC, + Flags: gozel.ZE_EVENT_POOL_FLAG_HOST_VISIBLE, + Count: evcount, + }, uint32(len(devices)), (*gozel.ZeDeviceHandle)(unsafe.SliceData(devices)), + (*gozel.ZeEventPoolHandle)(&eph), + ) + runtime.KeepAlive(devices) + return +} + +// Destroy Deletes an event pool object. +func (h EventPoolHandle) Destroy() error { + _, err := gozel.ZeEventPoolDestroy(gozel.ZeEventPoolHandle(h)) + return err +} + +// EventHandle (ze_event_handle_t) Handle of driver's event object +type EventHandle gozel.ZeEventHandle + +// EventCreate Creates an event from the pool. +func (h EventPoolHandle) EventCreate( + index uint32, signal, wait gozel.ZeEventScopeFlags, +) (eh EventHandle, err error) { + _, err = gozel.ZeEventCreate(gozel.ZeEventPoolHandle(h), &gozel.ZeEventDesc{ + Stype: gozel.ZE_STRUCTURE_TYPE_EVENT_DESC, + Index: index, + Signal: signal, + Wait: wait, + }, (*gozel.ZeEventHandle)(&eh)) + return +} + +// HostSynchronize The current host thread waits on an event to be signaled. +func (h EventHandle) HostSynchronize(timeout uint64) error { + _, err := gozel.ZeEventHostSynchronize(gozel.ZeEventHandle(h), timeout) + return err +} + +// Destroy Deletes an event object. +func (h EventHandle) Destroy() error { + _, err := gozel.ZeEventDestroy(gozel.ZeEventHandle(h)) + return err +} diff --git a/ze/init.go b/ze/init.go index d75baa1..941f816 100644 --- a/ze/init.go +++ b/ze/init.go @@ -3,7 +3,7 @@ package ze import ( "unsafe" - "github.com/fumiama/gozel" + "github.com/fumiama/gozel/gozel" ) // DriverHandle is a handle to a Level Zero driver instance. diff --git a/ze/kernel.go b/ze/kernel.go index 75497c9..331bb97 100644 --- a/ze/kernel.go +++ b/ze/kernel.go @@ -1,10 +1,11 @@ package ze import ( + "reflect" "runtime" "unsafe" - "github.com/fumiama/gozel" + "github.com/fumiama/gozel/gozel" ) // KernelHandle is a handle to a Level Zero kernel. @@ -23,8 +24,15 @@ func (h ModuleHandle) KernelCreate(kernelName string) (KernelHandle, error) { } // SetArgumentValue sets the value of a kernel argument at the given index. -func (h KernelHandle) SetArgumentValue(argIndex uint32, argSize uintptr, pArgValue unsafe.Pointer) error { - _, err := gozel.ZeKernelSetArgumentValue(gozel.ZeKernelHandle(h), argIndex, argSize, pArgValue) +func (h KernelHandle) SetArgumentValue(argIndex uint32, arg any) error { + _, err := gozel.ZeKernelSetArgumentValue( + gozel.ZeKernelHandle(h), argIndex, reflect.TypeOf(arg).Size(), + *(*unsafe.Pointer)( + unsafe.Add(unsafe.Pointer(&arg), + unsafe.Sizeof(uintptr(0))), + ), + ) + runtime.KeepAlive(arg) return err } diff --git a/ze/mem.go b/ze/mem.go index 6e001de..6265874 100644 --- a/ze/mem.go +++ b/ze/mem.go @@ -3,7 +3,7 @@ package ze import ( "unsafe" - "github.com/fumiama/gozel" + "github.com/fumiama/gozel/gozel" ) // MemAllocDevice allocates device memory on the given device with the specified size and alignment. diff --git a/ze/module.go b/ze/module.go index 584482c..3fd5d9d 100644 --- a/ze/module.go +++ b/ze/module.go @@ -5,7 +5,7 @@ import ( "runtime" "strings" - "github.com/fumiama/gozel" + "github.com/fumiama/gozel/gozel" ) // ModuleHandle is a handle to a Level Zero module.