From b821801ecdd533ad675517b5cda279312550e6b2 Mon Sep 17 00:00:00 2001 From: fumiama <41315874+fumiama@users.noreply.github.com> Date: Sat, 28 Mar 2026 18:00:12 +0800 Subject: [PATCH] feat(ze): add event support & vadd demo & refactor (#5) Co-authored-by: github-actions[bot] --- README.md | 3 +- cmd/gen/api.go | 7 +- cmd/gen/scan.go | 3 +- examples/vadd/main.cpp | 2 +- examples/vadd/main.go | 26 +- examples/vadd/main.ll | 34 ++- examples/vadd/main.spv | Bin 1028 -> 1232 bytes examples/vadd_event/.gitignore | 1 + examples/vadd_event/main.cpp | 9 + examples/vadd_event/main.go | 231 ++++++++++++++++++ examples/vadd_event/main.ll | 64 +++++ examples/vadd_event/main.spv | Bin 0 -> 1232 bytes api.go => gozel/api.go | 0 .../core_CacheLineSize.go | 0 core_EUCount.go => gozel/core_EUCount.go | 0 .../core_PCIProperties.go | 0 core_RTAS.go => gozel/core_RTAS.go | 0 .../core_RTASBuilder.go | 0 core_SRGB.go => gozel/core_SRGB.go | 0 core_bandwidth.go => gozel/core_bandwidth.go | 0 core_barrier.go => gozel/core_barrier.go | 0 .../core_bfloat16conversions.go | 0 .../core_bindlessimages.go | 0 .../core_cacheReservation.go | 0 core_callbacks.go => gozel/core_callbacks.go | 0 core_cmdlist.go => gozel/core_cmdlist.go | 0 core_cmdqueue.go => gozel/core_cmdqueue.go | 0 .../core_commandListClone.go | 0 core_common.go => gozel/core_common.go | 0 core_context.go => gozel/core_context.go | 0 core_copy.go => gozel/core_copy.go | 0 .../core_counterbasedeventpool.go | 0 core_device.go => gozel/core_device.go | 0 .../core_deviceLUID.go | 0 .../core_deviceVectorSizes.go | 0 .../core_deviceipversion.go | 0 .../core_deviceusablememproperties.go | 0 core_driver.go => gozel/core_driver.go | 0 .../core_driverDDIHandles.go | 0 core_event.go => gozel/core_event.go | 0 .../core_eventQueryKernelTimestamps.go | 0 .../core_eventquerytimestamps.go | 0 .../core_externalMemMap.go | 0 .../core_externalSemaphores.go | 0 core_fabric.go => gozel/core_fabric.go | 0 core_fence.go => gozel/core_fence.go | 0 .../core_floatAtomics.go | 0 .../core_globaloffset.go | 0 core_image.go => gozel/core_image.go | 0 core_imageCopy.go => gozel/core_imageCopy.go | 0 .../core_imageFormatSupport.go | 0 .../core_imageQueryAllocProperties.go | 0 .../core_imagememoryproperties.go | 0 core_imageview.go => gozel/core_imageview.go | 0 .../core_imageviewplanar.go | 0 .../core_immediateCommandListAppend.go | 0 .../core_ipcMemHandleType.go | 0 .../core_kernelAllocationProperties.go | 0 .../core_kernelBinary.go | 0 .../core_kernelMaxGroupSizeProperties.go | 0 .../core_kernelSchedulingHints.go | 0 .../core_linkageInspection.go | 0 .../core_linkonceodr.go | 0 core_memory.go => gozel/core_memory.go | 0 .../core_memoryCompressionHints.go | 0 .../core_memoryFreePolicies.go | 0 .../core_memoryProperties.go | 0 core_module.go => gozel/core_module.go | 0 .../core_mutableCommandList.go | 0 .../core_powersavinghint.go | 0 core_program.go => gozel/core_program.go | 0 .../core_raytracing.go | 0 .../core_relaxedAllocLimits.go | 0 core_residency.go => gozel/core_residency.go | 0 core_sampler.go => gozel/core_sampler.go | 0 .../core_subAllocationsProperties.go | 0 core_subgroups.go => gozel/core_subgroups.go | 0 core_virtual.go => gozel/core_virtual.go | 0 rntm_common.go => gozel/rntm_common.go | 0 sysm_Overclock.go => gozel/sysm_Overclock.go | 0 sysm_common.go => gozel/sysm_common.go | 0 sysm_device.go => gozel/sysm_device.go | 0 .../sysm_diagnostics.go | 0 sysm_driver.go => gozel/sysm_driver.go | 0 sysm_ecc.go => gozel/sysm_ecc.go | 0 sysm_eccState.go => gozel/sysm_eccState.go | 0 sysm_engine.go => gozel/sysm_engine.go | 0 .../sysm_engineActivity.go | 0 sysm_events.go => gozel/sysm_events.go | 0 sysm_fabric.go => gozel/sysm_fabric.go | 0 sysm_fan.go => gozel/sysm_fan.go | 0 sysm_firmware.go => gozel/sysm_firmware.go | 0 .../sysm_firmwareSecurityVersion.go | 0 sysm_frequency.go => gozel/sysm_frequency.go | 0 sysm_led.go => gozel/sysm_led.go | 0 .../sysm_memPageOfflineState.go | 0 sysm_memory.go => gozel/sysm_memory.go | 0 .../sysm_memoryBwCounterValidBits.go | 0 .../sysm_pciLinkSpeedDowngrade.go | 0 .../sysm_performance.go | 0 sysm_power.go => gozel/sysm_power.go | 0 .../sysm_powerDomainProperties.go | 0 .../sysm_powerLimits.go | 0 sysm_psu.go => gozel/sysm_psu.go | 0 sysm_ras.go => gozel/sysm_ras.go | 0 sysm_rasState.go => gozel/sysm_rasState.go | 0 sysm_scheduler.go => gozel/sysm_scheduler.go | 0 sysm_standby.go => gozel/sysm_standby.go | 0 .../sysm_sysmanDeviceMapping.go | 0 .../sysm_temperature.go | 0 .../sysm_virtualFunctionManagement.go | 0 .../tols_GlobalTimestamps.go | 0 tols_common.go => gozel/tols_common.go | 0 .../tols_concurrentMetricGroup.go | 0 tols_debug.go => gozel/tols_debug.go | 0 tols_metric.go => gozel/tols_metric.go | 0 .../tols_metricExportData.go | 0 .../tols_metricExportMemory.go | 0 .../tols_metricGroupMarker.go | 0 .../tols_metricProgrammable.go | 0 .../tols_metricRuntimeEnableDisable.go | 0 .../tols_metricTracer.go | 0 tols_module.go => gozel/tols_module.go | 0 .../tols_multiMetricValues.go | 0 tols_pin.go => gozel/tols_pin.go | 0 tols_tracing.go => gozel/tols_tracing.go | 0 ze/command.go | 66 ++++- ze/context.go | 2 +- ze/device.go | 2 +- ze/event.go | 60 +++++ ze/init.go | 2 +- ze/kernel.go | 14 +- ze/mem.go | 2 +- ze/module.go | 2 +- 134 files changed, 481 insertions(+), 49 deletions(-) create mode 100644 examples/vadd_event/.gitignore create mode 100644 examples/vadd_event/main.cpp create mode 100644 examples/vadd_event/main.go create mode 100644 examples/vadd_event/main.ll create mode 100644 examples/vadd_event/main.spv rename api.go => gozel/api.go (100%) rename core_CacheLineSize.go => gozel/core_CacheLineSize.go (100%) rename core_EUCount.go => gozel/core_EUCount.go (100%) rename core_PCIProperties.go => gozel/core_PCIProperties.go (100%) rename core_RTAS.go => gozel/core_RTAS.go (100%) rename core_RTASBuilder.go => gozel/core_RTASBuilder.go (100%) rename core_SRGB.go => gozel/core_SRGB.go (100%) rename core_bandwidth.go => gozel/core_bandwidth.go (100%) rename core_barrier.go => gozel/core_barrier.go (100%) rename core_bfloat16conversions.go => gozel/core_bfloat16conversions.go (100%) rename core_bindlessimages.go => gozel/core_bindlessimages.go (100%) rename core_cacheReservation.go => gozel/core_cacheReservation.go (100%) rename core_callbacks.go => gozel/core_callbacks.go (100%) rename core_cmdlist.go => gozel/core_cmdlist.go (100%) rename core_cmdqueue.go => gozel/core_cmdqueue.go (100%) rename core_commandListClone.go => gozel/core_commandListClone.go (100%) rename core_common.go => gozel/core_common.go (100%) rename core_context.go => gozel/core_context.go (100%) rename core_copy.go => gozel/core_copy.go (100%) rename core_counterbasedeventpool.go => gozel/core_counterbasedeventpool.go (100%) rename core_device.go => gozel/core_device.go (100%) rename core_deviceLUID.go => gozel/core_deviceLUID.go (100%) rename core_deviceVectorSizes.go => gozel/core_deviceVectorSizes.go (100%) rename core_deviceipversion.go => gozel/core_deviceipversion.go (100%) rename core_deviceusablememproperties.go => gozel/core_deviceusablememproperties.go (100%) rename core_driver.go => gozel/core_driver.go (100%) rename core_driverDDIHandles.go => gozel/core_driverDDIHandles.go (100%) rename core_event.go => gozel/core_event.go (100%) rename core_eventQueryKernelTimestamps.go => gozel/core_eventQueryKernelTimestamps.go (100%) rename core_eventquerytimestamps.go => gozel/core_eventquerytimestamps.go (100%) rename core_externalMemMap.go => gozel/core_externalMemMap.go (100%) rename core_externalSemaphores.go => gozel/core_externalSemaphores.go (100%) rename core_fabric.go => gozel/core_fabric.go (100%) rename core_fence.go => gozel/core_fence.go (100%) rename core_floatAtomics.go => gozel/core_floatAtomics.go (100%) rename core_globaloffset.go => gozel/core_globaloffset.go (100%) rename core_image.go => gozel/core_image.go (100%) rename core_imageCopy.go => gozel/core_imageCopy.go (100%) rename core_imageFormatSupport.go => gozel/core_imageFormatSupport.go (100%) rename core_imageQueryAllocProperties.go => gozel/core_imageQueryAllocProperties.go (100%) rename core_imagememoryproperties.go => gozel/core_imagememoryproperties.go (100%) rename core_imageview.go => gozel/core_imageview.go (100%) rename core_imageviewplanar.go => gozel/core_imageviewplanar.go (100%) rename core_immediateCommandListAppend.go => gozel/core_immediateCommandListAppend.go (100%) rename core_ipcMemHandleType.go => gozel/core_ipcMemHandleType.go (100%) rename core_kernelAllocationProperties.go => gozel/core_kernelAllocationProperties.go (100%) rename core_kernelBinary.go => gozel/core_kernelBinary.go (100%) rename core_kernelMaxGroupSizeProperties.go => gozel/core_kernelMaxGroupSizeProperties.go (100%) rename core_kernelSchedulingHints.go => gozel/core_kernelSchedulingHints.go (100%) rename core_linkageInspection.go => gozel/core_linkageInspection.go (100%) rename core_linkonceodr.go => gozel/core_linkonceodr.go (100%) rename core_memory.go => gozel/core_memory.go (100%) rename core_memoryCompressionHints.go => gozel/core_memoryCompressionHints.go (100%) rename core_memoryFreePolicies.go => gozel/core_memoryFreePolicies.go (100%) rename core_memoryProperties.go => gozel/core_memoryProperties.go (100%) rename core_module.go => gozel/core_module.go (100%) rename core_mutableCommandList.go => gozel/core_mutableCommandList.go (100%) rename core_powersavinghint.go => gozel/core_powersavinghint.go (100%) rename core_program.go => gozel/core_program.go (100%) rename core_raytracing.go => gozel/core_raytracing.go (100%) rename core_relaxedAllocLimits.go => gozel/core_relaxedAllocLimits.go (100%) rename core_residency.go => gozel/core_residency.go (100%) rename core_sampler.go => gozel/core_sampler.go (100%) rename core_subAllocationsProperties.go => gozel/core_subAllocationsProperties.go (100%) rename core_subgroups.go => gozel/core_subgroups.go (100%) rename core_virtual.go => gozel/core_virtual.go (100%) rename rntm_common.go => gozel/rntm_common.go (100%) rename sysm_Overclock.go => gozel/sysm_Overclock.go (100%) rename sysm_common.go => gozel/sysm_common.go (100%) rename sysm_device.go => gozel/sysm_device.go (100%) rename sysm_diagnostics.go => gozel/sysm_diagnostics.go (100%) rename sysm_driver.go => gozel/sysm_driver.go (100%) rename sysm_ecc.go => gozel/sysm_ecc.go (100%) rename sysm_eccState.go => gozel/sysm_eccState.go (100%) rename sysm_engine.go => gozel/sysm_engine.go (100%) rename sysm_engineActivity.go => gozel/sysm_engineActivity.go (100%) rename sysm_events.go => gozel/sysm_events.go (100%) rename sysm_fabric.go => gozel/sysm_fabric.go (100%) rename sysm_fan.go => gozel/sysm_fan.go (100%) rename sysm_firmware.go => gozel/sysm_firmware.go (100%) rename sysm_firmwareSecurityVersion.go => gozel/sysm_firmwareSecurityVersion.go (100%) rename sysm_frequency.go => gozel/sysm_frequency.go (100%) rename sysm_led.go => gozel/sysm_led.go (100%) rename sysm_memPageOfflineState.go => gozel/sysm_memPageOfflineState.go (100%) rename sysm_memory.go => gozel/sysm_memory.go (100%) rename sysm_memoryBwCounterValidBits.go => gozel/sysm_memoryBwCounterValidBits.go (100%) rename sysm_pciLinkSpeedDowngrade.go => gozel/sysm_pciLinkSpeedDowngrade.go (100%) rename sysm_performance.go => gozel/sysm_performance.go (100%) rename sysm_power.go => gozel/sysm_power.go (100%) rename sysm_powerDomainProperties.go => gozel/sysm_powerDomainProperties.go (100%) rename sysm_powerLimits.go => gozel/sysm_powerLimits.go (100%) rename sysm_psu.go => gozel/sysm_psu.go (100%) rename sysm_ras.go => gozel/sysm_ras.go (100%) rename sysm_rasState.go => gozel/sysm_rasState.go (100%) rename sysm_scheduler.go => gozel/sysm_scheduler.go (100%) rename sysm_standby.go => gozel/sysm_standby.go (100%) rename sysm_sysmanDeviceMapping.go => gozel/sysm_sysmanDeviceMapping.go (100%) rename sysm_temperature.go => gozel/sysm_temperature.go (100%) rename sysm_virtualFunctionManagement.go => gozel/sysm_virtualFunctionManagement.go (100%) rename tols_GlobalTimestamps.go => gozel/tols_GlobalTimestamps.go (100%) rename tols_common.go => gozel/tols_common.go (100%) rename tols_concurrentMetricGroup.go => gozel/tols_concurrentMetricGroup.go (100%) rename tols_debug.go => gozel/tols_debug.go (100%) rename tols_metric.go => gozel/tols_metric.go (100%) rename tols_metricExportData.go => gozel/tols_metricExportData.go (100%) rename tols_metricExportMemory.go => gozel/tols_metricExportMemory.go (100%) rename tols_metricGroupMarker.go => gozel/tols_metricGroupMarker.go (100%) rename tols_metricProgrammable.go => gozel/tols_metricProgrammable.go (100%) rename tols_metricRuntimeEnableDisable.go => gozel/tols_metricRuntimeEnableDisable.go (100%) rename tols_metricTracer.go => gozel/tols_metricTracer.go (100%) rename tols_module.go => gozel/tols_module.go (100%) rename tols_multiMetricValues.go => gozel/tols_multiMetricValues.go (100%) rename tols_pin.go => gozel/tols_pin.go (100%) rename tols_tracing.go => gozel/tols_tracing.go (100%) create mode 100644 ze/event.go diff --git a/README.md b/README.md index 732c68e..1aab001 100644 --- a/README.md +++ b/README.md @@ -190,7 +190,8 @@ Contributions of all kinds are welcome. Some particularly impactful areas: ## License -This project is licensed under the [GNU Affero General Public License v3.0](LICENSE). +- This project is generally licensed under the [GNU Affero General Public License v3.0](LICENSE). +- The files in [gozel](gozel) folder follows their original license, which is [MIT](https://github.com/oneapi-src/level-zero/blob/master/LICENSE). --- diff --git a/cmd/gen/api.go b/cmd/gen/api.go index dd8c8d8..b2f328b 100644 --- a/cmd/gen/api.go +++ b/cmd/gen/api.go @@ -1,11 +1,14 @@ package main -import "os" +import ( + "os" + "path" +) var apif *os.File func init() { - f, err := os.Create("api.go") + f, err := os.Create(path.Join("gozel", "api.go")) if err != nil { panic(err) } diff --git a/cmd/gen/scan.go b/cmd/gen/scan.go index 81e7824..4e5bac9 100644 --- a/cmd/gen/scan.go +++ b/cmd/gen/scan.go @@ -6,6 +6,7 @@ import ( "fmt" "io" "os" + "path" "strconv" "strings" "unicode" @@ -134,7 +135,7 @@ func scanHeader(name string, scan *bufio.Scanner) { } fmt.Println(infh(name), "scanning region", region) k := fmt.Sprint(name, "_", region) - f, err := os.Create(fmt.Sprint(k, ".go")) + f, err := os.Create(path.Join("gozel", fmt.Sprint(k, ".go"))) if err != nil { panic(fmt.Sprintf("%s L%d: cannot create region %s, err: %v", name, ln, region, err)) } diff --git a/examples/vadd/main.cpp b/examples/vadd/main.cpp index 702557f..86a1055 100644 --- a/examples/vadd/main.cpp +++ b/examples/vadd/main.cpp @@ -3,7 +3,7 @@ extern "C" SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((sycl::ext::oneapi::experimental::nd_range_kernel<1>)) void vector_add(float* a, float* b) { auto item = sycl::ext::oneapi::this_work_item::get_nd_item<1>(); - int idx = item.get_global_id(0); + int idx = item.get_global_linear_id(); a[idx] += b[idx]; } diff --git a/examples/vadd/main.go b/examples/vadd/main.go index 7b9bda1..4ee0dd4 100644 --- a/examples/vadd/main.go +++ b/examples/vadd/main.go @@ -4,6 +4,7 @@ package main import ( _ "embed" "fmt" + "math" "math/rand" "os" "strconv" @@ -11,7 +12,7 @@ import ( "time" "unsafe" - "github.com/fumiama/gozel" + "github.com/fumiama/gozel/gozel" "github.com/fumiama/gozel/ze" ) @@ -69,8 +70,7 @@ func main() { fmt.Printf("%-28s (%d, %d, %d)\n", "Max Group Count (X, Y, Z):", cprop.Maxgroupcountx, cprop.Maxgroupcounty, cprop.Maxgroupcountz) fmt.Printf("%-28s %d\n", "Max Total Group Size:", cprop.Maxtotalgroupsize) fmt.Printf("%-28s %d\n", "Max Shared Local Memory:", cprop.Maxsharedlocalmemory) - fmt.Printf("%-28s %d\n", "Num Subgroup Sizes:", cprop.Numsubgroupsizes) - fmt.Printf("%-28s %v\n", "Subgroup Sizes:", cprop.Subgroupsizes[:]) + fmt.Printf("%-28s %v\n", "Subgroup Sizes:", cprop.Subgroupsizes[:cprop.Numsubgroupsizes]) var ( X, Y, Z = uintptr(cprop.Maxgroupsizex), uintptr(1), uintptr(1) @@ -84,7 +84,7 @@ func main() { fmt.Printf("%-28s %d\n", "Total Elements (N):", N) fmt.Printf("%-28s %d MiB\n", "Buffer Size:", bufsz/1024/1024) - q, err := ctx.CommandQueueCreate(dev) + q, err := ctx.CommandQueueCreate(dev, gozel.ZE_COMMAND_QUEUE_MODE_DEFAULT) if err != nil { panic(err) } @@ -135,11 +135,11 @@ func main() { } defer krn.Destroy() - err = krn.SetArgumentValue(0, unsafe.Sizeof(uintptr(0)), unsafe.Pointer(&dbufV1)) + err = krn.SetArgumentValue(0, &dbufV1) if err != nil { panic(err) } - err = krn.SetArgumentValue(1, unsafe.Sizeof(uintptr(0)), unsafe.Pointer(&dbufV2)) + err = krn.SetArgumentValue(1, &dbufV2) if err != nil { panic(err) } @@ -154,16 +154,16 @@ func main() { } defer lstpre.Destroy() - err = lstpre.AppendMemoryCopy(dbufV1, hbufV1, bufsz) + err = lstpre.AppendMemoryCopy(dbufV1, hbufV1, bufsz, 0) if err != nil { panic(err) } - err = lstpre.AppendMemoryCopy(dbufV2, hbufV2, bufsz) + err = lstpre.AppendMemoryCopy(dbufV2, hbufV2, bufsz, 0) if err != nil { panic(err) } - err = lstpre.AppendBarrier() + err = lstpre.AppendBarrier(0) if err != nil { panic(err) } @@ -181,12 +181,12 @@ func main() { err = lstcalc.AppendLaunchKernel(krn, &gozel.ZeGroupCount{ Groupcountx: uint32(groupCount), Groupcounty: 1, Groupcountz: 1, - }) + }, 0) if err != nil { panic(err) } - err = lstcalc.AppendBarrier() + err = lstcalc.AppendBarrier(0) if err != nil { panic(err) } @@ -202,7 +202,7 @@ func main() { } defer lstpost.Destroy() - err = lstpost.AppendMemoryCopy(hbufV1, dbufV1, bufsz) + err = lstpost.AppendMemoryCopy(hbufV1, dbufV1, bufsz, 0) if err != nil { panic(err) } @@ -217,7 +217,7 @@ func main() { if err != nil { panic(err) } - err = q.Synchronize() + err = q.Synchronize(math.MaxUint64) if err != nil { panic(err) } diff --git a/examples/vadd/main.ll b/examples/vadd/main.ll index e742496..96d403c 100644 --- a/examples/vadd/main.ll +++ b/examples/vadd/main.ll @@ -4,18 +4,21 @@ target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256: target triple = "spirv64-unknown-unknown" @__spirv_BuiltInGlobalInvocationId = external local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 +@__spirv_BuiltInGlobalOffset = external local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: write) define spir_kernel void @__sycl_kernel_vector_add(ptr addrspace(1) noundef align 4 captures(none) %0, ptr addrspace(1) noundef readonly align 4 captures(none) %1) local_unnamed_addr #0 !kernel_arg_buffer_location !6 !sycl_fixed_targets !7 !sycl_kernel_omit_args !8 { %3 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalInvocationId, align 32, !noalias !9 - %4 = icmp ult i64 %3, 2147483648 - tail call void @llvm.assume(i1 %4) - %5 = getelementptr inbounds float, ptr addrspace(1) %1, i64 %3 - %6 = load float, ptr addrspace(1) %5, align 4, !tbaa !16 - %7 = getelementptr inbounds float, ptr addrspace(1) %0, i64 %3 - %8 = load float, ptr addrspace(1) %7, align 4, !tbaa !16 - %9 = fadd float %8, %6 - store float %9, ptr addrspace(1) %7, align 4, !tbaa !16 + %4 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalOffset, align 32, !noalias !16 + %5 = sub i64 %3, %4 + %6 = icmp ult i64 %5, 2147483648 + tail call void @llvm.assume(i1 %6) + %7 = getelementptr inbounds float, ptr addrspace(1) %1, i64 %5 + %8 = load float, ptr addrspace(1) %7, align 4, !tbaa !23 + %9 = getelementptr inbounds float, ptr addrspace(1) %0, i64 %5 + %10 = load float, ptr addrspace(1) %9, align 4, !tbaa !23 + %11 = fadd float %10, %8 + store float %11, ptr addrspace(1) %9, align 4, !tbaa !23 ret void } @@ -48,7 +51,14 @@ attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessib !13 = distinct !{!13, !"_ZN7__spirv22initGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEEET0_v"} !14 = distinct !{!14, !15, !"_ZNK4sycl3_V17nd_itemILi1EE13get_global_idEv: argument 0"} !15 = distinct !{!15, !"_ZNK4sycl3_V17nd_itemILi1EE13get_global_idEv"} -!16 = !{!17, !17, i64 0} -!17 = !{!"float", !18, i64 0} -!18 = !{!"omnipotent char", !19, i64 0} -!19 = !{!"Simple C++ TBAA"} +!16 = !{!17, !19, !21} +!17 = distinct !{!17, !18, !"_ZN7__spirv23InitSizesSTGlobalOffsetILi1EN4sycl3_V12idILi1EEEE8initSizeEv: argument 0"} +!18 = distinct !{!18, !"_ZN7__spirv23InitSizesSTGlobalOffsetILi1EN4sycl3_V12idILi1EEEE8initSizeEv"} +!19 = distinct !{!19, !20, !"_ZN7__spirv16initGlobalOffsetILi1EN4sycl3_V12idILi1EEEEET0_v: argument 0"} +!20 = distinct !{!20, !"_ZN7__spirv16initGlobalOffsetILi1EN4sycl3_V12idILi1EEEEET0_v"} +!21 = distinct !{!21, !22, !"_ZNK4sycl3_V17nd_itemILi1EE10get_offsetEv: argument 0"} +!22 = distinct !{!22, !"_ZNK4sycl3_V17nd_itemILi1EE10get_offsetEv"} +!23 = !{!24, !24, i64 0} +!24 = !{!"float", !25, i64 0} +!25 = !{!"omnipotent char", !26, i64 0} +!26 = !{!"Simple C++ TBAA"} diff --git a/examples/vadd/main.spv b/examples/vadd/main.spv index 0adf1e119be79bb3b68b1487664ff7adcc0e30ae..7f901ad337956d633eb506ad8b62b1a49111c973 100644 GIT binary patch literal 1232 zcma)*+iDYG6oq%1X{)AfT6^+TohTGU>7`mk5d}j)=pzgmTX~3*6vV^(@$DvZj8_ zK0j-y&!oU{G8|8{&p(Dm)hoNj=zG8DmDACnUkyiP53+f?8)j^W}feWe;6xM(0+DlMplKNtC}MY7{DZ3+|21|5GW*BN6q3_9qqmIGTmWK!K4c*!Q`-q>O z=jbPYqdPo&11@;qYBropQOq7M#JM8>v!1o87>*D6qRd>AQ6o-ej=Qgzd5&9GOwDnB z#4^)7%QA9x^1Uwoyf$@<2hx{tjcvF9)A zeBSa}#yV7}G8&DD_BP0)&71KNO5EoTJdqvTdu4oa!tzxLfaYZ{O0$313IrdHHBuN`W&LWaRM6vM*IRvnZp+ z+;_^My(B}EJo}krSw^1y+=1uvyP=mPx1^Z7=e|o9ja3;nw62tcL(cPlEM;dhKR5pw cR&s9M+|SK>GB>}~bM?txZOOPVc+|b-FFZ + +extern "C" SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((sycl::ext::oneapi::experimental::nd_range_kernel<1>)) +void vector_add(float* a, float* b) { + auto item = sycl::ext::oneapi::this_work_item::get_nd_item<1>(); + int idx = item.get_global_linear_id(); + + a[idx] += b[idx]; +} diff --git a/examples/vadd_event/main.go b/examples/vadd_event/main.go new file mode 100644 index 0000000..b0a611d --- /dev/null +++ b/examples/vadd_event/main.go @@ -0,0 +1,231 @@ +// Package main demonstrates vector addition using the gozel Level Zero bindings. +package main + +import ( + _ "embed" + "fmt" + "math" + "math/rand" + "os" + "strconv" + "strings" + "time" + "unsafe" + + "github.com/fumiama/gozel/gozel" + "github.com/fumiama/gozel/ze" +) + +//go:generate clang++ -fsycl -fsycl-device-only -fsycl-targets=spirv64 -Xclang -emit-llvm-bc main.cpp -o device_kern.bc +//go:generate sycl-post-link -symbols -split=auto -o device_kern.table device_kern.bc +//go:generate clang++ -target spirv64-unknown-unknown -S -emit-llvm -x ir device_kern_0.bc -o device_kern.ll +//go:generate llvm-spirv -o main.spv device_kern.bc +//go:generate clang++ -target spirv64-unknown-unknown -S -emit-llvm -x ir device_kern.bc -o main.ll + +//go:embed main.spv +var kernelspv []byte + +func main() { + gpus, err := ze.InitGPUDrivers() + if err != nil { + panic(err) + } + if len(gpus) == 0 { + panic("no gpu available") + } + gpu := gpus[0] + + ctx, err := gpu.ContextCreate() + if err != nil { + panic(err) + } + + devs, err := gpu.DeviceGet() + if err != nil { + panic(err) + } + if len(devs) == 0 { + panic("no device available") + } + dev := devs[0] + + prop, err := dev.DeviceGetProperties() + if err != nil { + panic(err) + } + + fmt.Println("=============== Device Basic Properties ===============") + name, _, _ := strings.Cut(string(prop.Name[:]), "\x00") + fmt.Println( + "Running on device: ID =", prop.Deviceid, ", Name =", name, + "@", strconv.FormatFloat(float64(prop.Coreclockrate)/1024/1024/1024, 'f', 2, 64), "GHz.", + ) + + cprop, err := dev.DeviceGetComputeProperties() + if err != nil { + panic(err) + } + fmt.Println("=============== Device Compute Properties ===============") + fmt.Printf("%-28s (%d, %d, %d)\n", "Max Group Size (X, Y, Z):", cprop.Maxgroupsizex, cprop.Maxgroupsizey, cprop.Maxgroupsizez) + fmt.Printf("%-28s (%d, %d, %d)\n", "Max Group Count (X, Y, Z):", cprop.Maxgroupcountx, cprop.Maxgroupcounty, cprop.Maxgroupcountz) + fmt.Printf("%-28s %d\n", "Max Total Group Size:", cprop.Maxtotalgroupsize) + fmt.Printf("%-28s %d\n", "Max Shared Local Memory:", cprop.Maxsharedlocalmemory) + fmt.Printf("%-28s %d\n", "Num Subgroup Sizes:", cprop.Numsubgroupsizes) + fmt.Printf("%-28s %v\n", "Subgroup Sizes:", cprop.Subgroupsizes[:]) + + var ( + X, Y, Z = uintptr(cprop.Maxgroupsizex), uintptr(1), uintptr(1) + groupCount = uintptr(65536) + N = X * groupCount + bufsz = N * unsafe.Sizeof(float32(0)) + ) + fmt.Println("=============== Computation Configuration ===============") + fmt.Printf("%-28s (%d, %d, %d)\n", "Group Size (X, Y, Z):", X, Y, Z) + fmt.Printf("%-28s %d\n", "Group Count:", groupCount) + fmt.Printf("%-28s %d\n", "Total Elements (N):", N) + fmt.Printf("%-28s %d MiB\n", "Buffer Size:", bufsz/1024/1024) + + hbufV1, err := ctx.MemAllocHost(bufsz, 1) + if err != nil { + panic(err) + } + defer ctx.MemFree(hbufV1) + + hbufV2, err := ctx.MemAllocHost(bufsz, 1) + if err != nil { + panic(err) + } + defer ctx.MemFree(hbufV2) + + dbufV1, err := ctx.MemAllocDevice(dev, bufsz, 1) + if err != nil { + panic(err) + } + defer ctx.MemFree(dbufV1) + + dbufV2, err := ctx.MemAllocDevice(dev, bufsz, 1) + if err != nil { + panic(err) + } + defer ctx.MemFree(dbufV2) + + floatbuf := make([]float32, 2*N) + for i := range floatbuf { + floatbuf[i] = rand.Float32() + } + + zev1, zev2 := unsafe.Slice((*float32)(hbufV1), N), unsafe.Slice((*float32)(hbufV2), N) + copy(zev1, floatbuf[:N]) + copy(zev2, floatbuf[N:]) + + mod, err := ctx.ModuleCreate(dev, kernelspv) + if err != nil { + panic(err) + } + defer mod.Destroy() + + krn, err := mod.KernelCreate("__sycl_kernel_vector_add") + if err != nil { + panic(err) + } + defer krn.Destroy() + + err = krn.SetArgumentValue(0, &dbufV1) + if err != nil { + panic(err) + } + err = krn.SetArgumentValue(1, &dbufV2) + if err != nil { + panic(err) + } + err = krn.SetGroupSize(uint32(X), uint32(Y), uint32(Z)) + if err != nil { + panic(err) + } + + evp, err := ctx.EventPoolCreate(3, dev) + if err != nil { + panic(err) + } + defer evp.Destroy() + + evcph2dv1, err := evp.EventCreate(0, gozel.ZE_EVENT_SCOPE_FLAG_HOST, 0) + if err != nil { + panic(err) + } + defer evcph2dv1.Destroy() + evcph2dv2, err := evp.EventCreate(1, gozel.ZE_EVENT_SCOPE_FLAG_HOST, 0) + if err != nil { + panic(err) + } + defer evcph2dv2.Destroy() + + start := time.Now() + + lst, err := ctx.CommandListCreateImmediate(dev, gozel.ZE_COMMAND_QUEUE_MODE_DEFAULT) + if err != nil { + panic(err) + } + err = lst.AppendMemoryCopy(dbufV1, hbufV1, bufsz, evcph2dv1) + if err != nil { + panic(err) + } + err = lst.AppendMemoryCopy(dbufV2, hbufV2, bufsz, evcph2dv2) + if err != nil { + panic(err) + } + + evk, err := evp.EventCreate(2, gozel.ZE_EVENT_SCOPE_FLAG_HOST, 0) + if err != nil { + panic(err) + } + defer evk.Destroy() + + err = lst.AppendLaunchKernel(krn, &gozel.ZeGroupCount{ + Groupcountx: uint32(groupCount), Groupcounty: 1, Groupcountz: 1, + }, evk, evcph2dv1, evcph2dv2) + if err != nil { + panic(err) + } + + err = lst.AppendMemoryCopy(hbufV1, dbufV1, bufsz, 0, evk) + if err != nil { + panic(err) + } + err = lst.HostSynchronize(math.MaxUint64) + if err != nil { + panic(err) + } + + elapsed := time.Since(start) + + fmt.Println("=============== Calculation Results ===============") + fmt.Printf("%-28s %.6f ms\n", "GPU Execution Time:", elapsed.Seconds()*1000) + fmt.Printf("%-28s %.2f GiB/s\n", "GPU Throughput:", float64(bufsz)/elapsed.Seconds()/1e9) + + tmpbuf := make([]float32, N) + start = time.Now() + for i := range N { + tmpbuf[i] = floatbuf[i] + floatbuf[N+i] + } + elapsed = time.Since(start) + + fmt.Println("=============== Validation Results ===============") + fmt.Printf("%-28s %.6f ms\n", "CPU Execution Time:", elapsed.Seconds()*1000) + fmt.Printf("%-28s %.2f GiB/s\n", "CPU Throughput:", float64(bufsz)/elapsed.Seconds()/1e9) + + fail := false + for i := range N { + expect := floatbuf[i] + floatbuf[N+i] + if zev1[i] != expect { + fail = true + fmt.Printf("[%05d] expect %f = %f + %f, got %f.\n", i, expect, floatbuf[i], floatbuf[N+i], zev1[i]) + } + } + + if fail { + os.Exit(1) + } + + fmt.Println("Test Passed!!!") +} diff --git a/examples/vadd_event/main.ll b/examples/vadd_event/main.ll new file mode 100644 index 0000000..96d403c --- /dev/null +++ b/examples/vadd_event/main.ll @@ -0,0 +1,64 @@ +; ModuleID = 'device_kern.bc' +source_filename = "main.cpp" +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64-G1" +target triple = "spirv64-unknown-unknown" + +@__spirv_BuiltInGlobalInvocationId = external local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 +@__spirv_BuiltInGlobalOffset = external local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 + +; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: write) +define spir_kernel void @__sycl_kernel_vector_add(ptr addrspace(1) noundef align 4 captures(none) %0, ptr addrspace(1) noundef readonly align 4 captures(none) %1) local_unnamed_addr #0 !kernel_arg_buffer_location !6 !sycl_fixed_targets !7 !sycl_kernel_omit_args !8 { + %3 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalInvocationId, align 32, !noalias !9 + %4 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalOffset, align 32, !noalias !16 + %5 = sub i64 %3, %4 + %6 = icmp ult i64 %5, 2147483648 + tail call void @llvm.assume(i1 %6) + %7 = getelementptr inbounds float, ptr addrspace(1) %1, i64 %5 + %8 = load float, ptr addrspace(1) %7, align 4, !tbaa !23 + %9 = getelementptr inbounds float, ptr addrspace(1) %0, i64 %5 + %10 = load float, ptr addrspace(1) %9, align 4, !tbaa !23 + %11 = fadd float %10, %8 + store float %11, ptr addrspace(1) %9, align 4, !tbaa !23 + ret void +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) +declare void @llvm.assume(i1 noundef) #1 + +declare dso_local spir_func i32 @_Z18__spirv_ocl_printfPU3AS2Kcz(ptr addrspace(2), ...) + +attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: write) "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="main.cpp" "sycl-nd-range-kernel"="1" "sycl-optlevel"="2" "uniform-work-group-size"="true" } +attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) } + +!llvm.module.flags = !{!0, !1, !2} +!opencl.spir.version = !{!3} +!spirv.Source = !{!4} +!llvm.ident = !{!5} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 1, !"sycl-device", i32 1} +!2 = !{i32 7, !"frame-pointer", i32 2} +!3 = !{i32 1, i32 2} +!4 = !{i32 4, i32 100000} +!5 = !{!"clang version 21.0.0git (https://github.com/intel/llvm d5f649b706f63b5c74e1929bc95db8de91085560)"} +!6 = !{i32 -1, i32 -1} +!7 = !{} +!8 = !{i1 false, i1 false} +!9 = !{!10, !12, !14} +!10 = distinct !{!10, !11, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEE8initSizeEv: argument 0"} +!11 = distinct !{!11, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEE8initSizeEv"} +!12 = distinct !{!12, !13, !"_ZN7__spirv22initGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEEET0_v: argument 0"} +!13 = distinct !{!13, !"_ZN7__spirv22initGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEEET0_v"} +!14 = distinct !{!14, !15, !"_ZNK4sycl3_V17nd_itemILi1EE13get_global_idEv: argument 0"} +!15 = distinct !{!15, !"_ZNK4sycl3_V17nd_itemILi1EE13get_global_idEv"} +!16 = !{!17, !19, !21} +!17 = distinct !{!17, !18, !"_ZN7__spirv23InitSizesSTGlobalOffsetILi1EN4sycl3_V12idILi1EEEE8initSizeEv: argument 0"} +!18 = distinct !{!18, !"_ZN7__spirv23InitSizesSTGlobalOffsetILi1EN4sycl3_V12idILi1EEEE8initSizeEv"} +!19 = distinct !{!19, !20, !"_ZN7__spirv16initGlobalOffsetILi1EN4sycl3_V12idILi1EEEEET0_v: argument 0"} +!20 = distinct !{!20, !"_ZN7__spirv16initGlobalOffsetILi1EN4sycl3_V12idILi1EEEEET0_v"} +!21 = distinct !{!21, !22, !"_ZNK4sycl3_V17nd_itemILi1EE10get_offsetEv: argument 0"} +!22 = distinct !{!22, !"_ZNK4sycl3_V17nd_itemILi1EE10get_offsetEv"} +!23 = !{!24, !24, i64 0} +!24 = !{!"float", !25, i64 0} +!25 = !{!"omnipotent char", !26, i64 0} +!26 = !{!"Simple C++ TBAA"} diff --git a/examples/vadd_event/main.spv b/examples/vadd_event/main.spv new file mode 100644 index 0000000000000000000000000000000000000000..7f901ad337956d633eb506ad8b62b1a49111c973 GIT binary patch literal 1232 zcma)*+iDYG6oq%1X{)AfT6^+TohTGU>7`mk5d}j)=pzgmTX~3*6vV^(@$DvZj8_ zK0j-y&!oU{G8|8{&p(Dm)hoNj=zG8DmDACnUkyiP53+f?8)j^W}feWe;6xM(0+DlMplKNtC}MY7{DZ3+|21|5GW*BN6q3_9qqmIGTmWK!K4c*!Q`-q>O z=jbPYqdPo&11@;qYBropQOq7M#JM8>v!1o87>*D6qRd>AQ6o-ej=Qgzd5&9GOwDnB z#4^)7