mirror of
https://github.com/fumiama/gozel.git
synced 2026-07-01 00:00:24 +08:00
feat(ze): add event support & vadd demo & refactor (#5)
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
This commit is contained in:
@@ -190,7 +190,8 @@ Contributions of all kinds are welcome. Some particularly impactful areas:
|
|||||||
|
|
||||||
## License
|
## License
|
||||||
|
|
||||||
This project is licensed under the [GNU Affero General Public License v3.0](LICENSE).
|
- This project is generally licensed under the [GNU Affero General Public License v3.0](LICENSE).
|
||||||
|
- The files in [gozel](gozel) folder follows their original license, which is [MIT](https://github.com/oneapi-src/level-zero/blob/master/LICENSE).
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
|||||||
@@ -1,11 +1,14 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
import "os"
|
import (
|
||||||
|
"os"
|
||||||
|
"path"
|
||||||
|
)
|
||||||
|
|
||||||
var apif *os.File
|
var apif *os.File
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
f, err := os.Create("api.go")
|
f, err := os.Create(path.Join("gozel", "api.go"))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"os"
|
"os"
|
||||||
|
"path"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"unicode"
|
"unicode"
|
||||||
@@ -134,7 +135,7 @@ func scanHeader(name string, scan *bufio.Scanner) {
|
|||||||
}
|
}
|
||||||
fmt.Println(infh(name), "scanning region", region)
|
fmt.Println(infh(name), "scanning region", region)
|
||||||
k := fmt.Sprint(name, "_", region)
|
k := fmt.Sprint(name, "_", region)
|
||||||
f, err := os.Create(fmt.Sprint(k, ".go"))
|
f, err := os.Create(path.Join("gozel", fmt.Sprint(k, ".go")))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(fmt.Sprintf("%s L%d: cannot create region %s, err: %v", name, ln, region, err))
|
panic(fmt.Sprintf("%s L%d: cannot create region %s, err: %v", name, ln, region, err))
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
extern "C" SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((sycl::ext::oneapi::experimental::nd_range_kernel<1>))
|
extern "C" SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((sycl::ext::oneapi::experimental::nd_range_kernel<1>))
|
||||||
void vector_add(float* a, float* b) {
|
void vector_add(float* a, float* b) {
|
||||||
auto item = sycl::ext::oneapi::this_work_item::get_nd_item<1>();
|
auto item = sycl::ext::oneapi::this_work_item::get_nd_item<1>();
|
||||||
int idx = item.get_global_id(0);
|
int idx = item.get_global_linear_id();
|
||||||
|
|
||||||
a[idx] += b[idx];
|
a[idx] += b[idx];
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ package main
|
|||||||
import (
|
import (
|
||||||
_ "embed"
|
_ "embed"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"math"
|
||||||
"math/rand"
|
"math/rand"
|
||||||
"os"
|
"os"
|
||||||
"strconv"
|
"strconv"
|
||||||
@@ -11,7 +12,7 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
"unsafe"
|
"unsafe"
|
||||||
|
|
||||||
"github.com/fumiama/gozel"
|
"github.com/fumiama/gozel/gozel"
|
||||||
"github.com/fumiama/gozel/ze"
|
"github.com/fumiama/gozel/ze"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -69,8 +70,7 @@ func main() {
|
|||||||
fmt.Printf("%-28s (%d, %d, %d)\n", "Max Group Count (X, Y, Z):", cprop.Maxgroupcountx, cprop.Maxgroupcounty, cprop.Maxgroupcountz)
|
fmt.Printf("%-28s (%d, %d, %d)\n", "Max Group Count (X, Y, Z):", cprop.Maxgroupcountx, cprop.Maxgroupcounty, cprop.Maxgroupcountz)
|
||||||
fmt.Printf("%-28s %d\n", "Max Total Group Size:", cprop.Maxtotalgroupsize)
|
fmt.Printf("%-28s %d\n", "Max Total Group Size:", cprop.Maxtotalgroupsize)
|
||||||
fmt.Printf("%-28s %d\n", "Max Shared Local Memory:", cprop.Maxsharedlocalmemory)
|
fmt.Printf("%-28s %d\n", "Max Shared Local Memory:", cprop.Maxsharedlocalmemory)
|
||||||
fmt.Printf("%-28s %d\n", "Num Subgroup Sizes:", cprop.Numsubgroupsizes)
|
fmt.Printf("%-28s %v\n", "Subgroup Sizes:", cprop.Subgroupsizes[:cprop.Numsubgroupsizes])
|
||||||
fmt.Printf("%-28s %v\n", "Subgroup Sizes:", cprop.Subgroupsizes[:])
|
|
||||||
|
|
||||||
var (
|
var (
|
||||||
X, Y, Z = uintptr(cprop.Maxgroupsizex), uintptr(1), uintptr(1)
|
X, Y, Z = uintptr(cprop.Maxgroupsizex), uintptr(1), uintptr(1)
|
||||||
@@ -84,7 +84,7 @@ func main() {
|
|||||||
fmt.Printf("%-28s %d\n", "Total Elements (N):", N)
|
fmt.Printf("%-28s %d\n", "Total Elements (N):", N)
|
||||||
fmt.Printf("%-28s %d MiB\n", "Buffer Size:", bufsz/1024/1024)
|
fmt.Printf("%-28s %d MiB\n", "Buffer Size:", bufsz/1024/1024)
|
||||||
|
|
||||||
q, err := ctx.CommandQueueCreate(dev)
|
q, err := ctx.CommandQueueCreate(dev, gozel.ZE_COMMAND_QUEUE_MODE_DEFAULT)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
@@ -135,11 +135,11 @@ func main() {
|
|||||||
}
|
}
|
||||||
defer krn.Destroy()
|
defer krn.Destroy()
|
||||||
|
|
||||||
err = krn.SetArgumentValue(0, unsafe.Sizeof(uintptr(0)), unsafe.Pointer(&dbufV1))
|
err = krn.SetArgumentValue(0, &dbufV1)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
err = krn.SetArgumentValue(1, unsafe.Sizeof(uintptr(0)), unsafe.Pointer(&dbufV2))
|
err = krn.SetArgumentValue(1, &dbufV2)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
@@ -154,16 +154,16 @@ func main() {
|
|||||||
}
|
}
|
||||||
defer lstpre.Destroy()
|
defer lstpre.Destroy()
|
||||||
|
|
||||||
err = lstpre.AppendMemoryCopy(dbufV1, hbufV1, bufsz)
|
err = lstpre.AppendMemoryCopy(dbufV1, hbufV1, bufsz, 0)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
err = lstpre.AppendMemoryCopy(dbufV2, hbufV2, bufsz)
|
err = lstpre.AppendMemoryCopy(dbufV2, hbufV2, bufsz, 0)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
err = lstpre.AppendBarrier()
|
err = lstpre.AppendBarrier(0)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
@@ -181,12 +181,12 @@ func main() {
|
|||||||
|
|
||||||
err = lstcalc.AppendLaunchKernel(krn, &gozel.ZeGroupCount{
|
err = lstcalc.AppendLaunchKernel(krn, &gozel.ZeGroupCount{
|
||||||
Groupcountx: uint32(groupCount), Groupcounty: 1, Groupcountz: 1,
|
Groupcountx: uint32(groupCount), Groupcounty: 1, Groupcountz: 1,
|
||||||
})
|
}, 0)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
err = lstcalc.AppendBarrier()
|
err = lstcalc.AppendBarrier(0)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
@@ -202,7 +202,7 @@ func main() {
|
|||||||
}
|
}
|
||||||
defer lstpost.Destroy()
|
defer lstpost.Destroy()
|
||||||
|
|
||||||
err = lstpost.AppendMemoryCopy(hbufV1, dbufV1, bufsz)
|
err = lstpost.AppendMemoryCopy(hbufV1, dbufV1, bufsz, 0)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
@@ -217,7 +217,7 @@ func main() {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
err = q.Synchronize()
|
err = q.Synchronize(math.MaxUint64)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -4,18 +4,21 @@ target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:
|
|||||||
target triple = "spirv64-unknown-unknown"
|
target triple = "spirv64-unknown-unknown"
|
||||||
|
|
||||||
@__spirv_BuiltInGlobalInvocationId = external local_unnamed_addr addrspace(1) constant <3 x i64>, align 32
|
@__spirv_BuiltInGlobalInvocationId = external local_unnamed_addr addrspace(1) constant <3 x i64>, align 32
|
||||||
|
@__spirv_BuiltInGlobalOffset = external local_unnamed_addr addrspace(1) constant <3 x i64>, align 32
|
||||||
|
|
||||||
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: write)
|
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: write)
|
||||||
define spir_kernel void @__sycl_kernel_vector_add(ptr addrspace(1) noundef align 4 captures(none) %0, ptr addrspace(1) noundef readonly align 4 captures(none) %1) local_unnamed_addr #0 !kernel_arg_buffer_location !6 !sycl_fixed_targets !7 !sycl_kernel_omit_args !8 {
|
define spir_kernel void @__sycl_kernel_vector_add(ptr addrspace(1) noundef align 4 captures(none) %0, ptr addrspace(1) noundef readonly align 4 captures(none) %1) local_unnamed_addr #0 !kernel_arg_buffer_location !6 !sycl_fixed_targets !7 !sycl_kernel_omit_args !8 {
|
||||||
%3 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalInvocationId, align 32, !noalias !9
|
%3 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalInvocationId, align 32, !noalias !9
|
||||||
%4 = icmp ult i64 %3, 2147483648
|
%4 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalOffset, align 32, !noalias !16
|
||||||
tail call void @llvm.assume(i1 %4)
|
%5 = sub i64 %3, %4
|
||||||
%5 = getelementptr inbounds float, ptr addrspace(1) %1, i64 %3
|
%6 = icmp ult i64 %5, 2147483648
|
||||||
%6 = load float, ptr addrspace(1) %5, align 4, !tbaa !16
|
tail call void @llvm.assume(i1 %6)
|
||||||
%7 = getelementptr inbounds float, ptr addrspace(1) %0, i64 %3
|
%7 = getelementptr inbounds float, ptr addrspace(1) %1, i64 %5
|
||||||
%8 = load float, ptr addrspace(1) %7, align 4, !tbaa !16
|
%8 = load float, ptr addrspace(1) %7, align 4, !tbaa !23
|
||||||
%9 = fadd float %8, %6
|
%9 = getelementptr inbounds float, ptr addrspace(1) %0, i64 %5
|
||||||
store float %9, ptr addrspace(1) %7, align 4, !tbaa !16
|
%10 = load float, ptr addrspace(1) %9, align 4, !tbaa !23
|
||||||
|
%11 = fadd float %10, %8
|
||||||
|
store float %11, ptr addrspace(1) %9, align 4, !tbaa !23
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -48,7 +51,14 @@ attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessib
|
|||||||
!13 = distinct !{!13, !"_ZN7__spirv22initGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEEET0_v"}
|
!13 = distinct !{!13, !"_ZN7__spirv22initGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEEET0_v"}
|
||||||
!14 = distinct !{!14, !15, !"_ZNK4sycl3_V17nd_itemILi1EE13get_global_idEv: argument 0"}
|
!14 = distinct !{!14, !15, !"_ZNK4sycl3_V17nd_itemILi1EE13get_global_idEv: argument 0"}
|
||||||
!15 = distinct !{!15, !"_ZNK4sycl3_V17nd_itemILi1EE13get_global_idEv"}
|
!15 = distinct !{!15, !"_ZNK4sycl3_V17nd_itemILi1EE13get_global_idEv"}
|
||||||
!16 = !{!17, !17, i64 0}
|
!16 = !{!17, !19, !21}
|
||||||
!17 = !{!"float", !18, i64 0}
|
!17 = distinct !{!17, !18, !"_ZN7__spirv23InitSizesSTGlobalOffsetILi1EN4sycl3_V12idILi1EEEE8initSizeEv: argument 0"}
|
||||||
!18 = !{!"omnipotent char", !19, i64 0}
|
!18 = distinct !{!18, !"_ZN7__spirv23InitSizesSTGlobalOffsetILi1EN4sycl3_V12idILi1EEEE8initSizeEv"}
|
||||||
!19 = !{!"Simple C++ TBAA"}
|
!19 = distinct !{!19, !20, !"_ZN7__spirv16initGlobalOffsetILi1EN4sycl3_V12idILi1EEEEET0_v: argument 0"}
|
||||||
|
!20 = distinct !{!20, !"_ZN7__spirv16initGlobalOffsetILi1EN4sycl3_V12idILi1EEEEET0_v"}
|
||||||
|
!21 = distinct !{!21, !22, !"_ZNK4sycl3_V17nd_itemILi1EE10get_offsetEv: argument 0"}
|
||||||
|
!22 = distinct !{!22, !"_ZNK4sycl3_V17nd_itemILi1EE10get_offsetEv"}
|
||||||
|
!23 = !{!24, !24, i64 0}
|
||||||
|
!24 = !{!"float", !25, i64 0}
|
||||||
|
!25 = !{!"omnipotent char", !26, i64 0}
|
||||||
|
!26 = !{!"Simple C++ TBAA"}
|
||||||
|
|||||||
Binary file not shown.
1
examples/vadd_event/.gitignore
vendored
Normal file
1
examples/vadd_event/.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
|||||||
|
/device*
|
||||||
9
examples/vadd_event/main.cpp
Normal file
9
examples/vadd_event/main.cpp
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
#include <sycl/sycl.hpp>
|
||||||
|
|
||||||
|
extern "C" SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((sycl::ext::oneapi::experimental::nd_range_kernel<1>))
|
||||||
|
void vector_add(float* a, float* b) {
|
||||||
|
auto item = sycl::ext::oneapi::this_work_item::get_nd_item<1>();
|
||||||
|
int idx = item.get_global_linear_id();
|
||||||
|
|
||||||
|
a[idx] += b[idx];
|
||||||
|
}
|
||||||
231
examples/vadd_event/main.go
Normal file
231
examples/vadd_event/main.go
Normal file
@@ -0,0 +1,231 @@
|
|||||||
|
// Package main demonstrates vector addition using the gozel Level Zero bindings.
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
_ "embed"
|
||||||
|
"fmt"
|
||||||
|
"math"
|
||||||
|
"math/rand"
|
||||||
|
"os"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
"unsafe"
|
||||||
|
|
||||||
|
"github.com/fumiama/gozel/gozel"
|
||||||
|
"github.com/fumiama/gozel/ze"
|
||||||
|
)
|
||||||
|
|
||||||
|
//go:generate clang++ -fsycl -fsycl-device-only -fsycl-targets=spirv64 -Xclang -emit-llvm-bc main.cpp -o device_kern.bc
|
||||||
|
//go:generate sycl-post-link -symbols -split=auto -o device_kern.table device_kern.bc
|
||||||
|
//go:generate clang++ -target spirv64-unknown-unknown -S -emit-llvm -x ir device_kern_0.bc -o device_kern.ll
|
||||||
|
//go:generate llvm-spirv -o main.spv device_kern.bc
|
||||||
|
//go:generate clang++ -target spirv64-unknown-unknown -S -emit-llvm -x ir device_kern.bc -o main.ll
|
||||||
|
|
||||||
|
//go:embed main.spv
|
||||||
|
var kernelspv []byte
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
gpus, err := ze.InitGPUDrivers()
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
if len(gpus) == 0 {
|
||||||
|
panic("no gpu available")
|
||||||
|
}
|
||||||
|
gpu := gpus[0]
|
||||||
|
|
||||||
|
ctx, err := gpu.ContextCreate()
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
devs, err := gpu.DeviceGet()
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
if len(devs) == 0 {
|
||||||
|
panic("no device available")
|
||||||
|
}
|
||||||
|
dev := devs[0]
|
||||||
|
|
||||||
|
prop, err := dev.DeviceGetProperties()
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Println("=============== Device Basic Properties ===============")
|
||||||
|
name, _, _ := strings.Cut(string(prop.Name[:]), "\x00")
|
||||||
|
fmt.Println(
|
||||||
|
"Running on device: ID =", prop.Deviceid, ", Name =", name,
|
||||||
|
"@", strconv.FormatFloat(float64(prop.Coreclockrate)/1024/1024/1024, 'f', 2, 64), "GHz.",
|
||||||
|
)
|
||||||
|
|
||||||
|
cprop, err := dev.DeviceGetComputeProperties()
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
fmt.Println("=============== Device Compute Properties ===============")
|
||||||
|
fmt.Printf("%-28s (%d, %d, %d)\n", "Max Group Size (X, Y, Z):", cprop.Maxgroupsizex, cprop.Maxgroupsizey, cprop.Maxgroupsizez)
|
||||||
|
fmt.Printf("%-28s (%d, %d, %d)\n", "Max Group Count (X, Y, Z):", cprop.Maxgroupcountx, cprop.Maxgroupcounty, cprop.Maxgroupcountz)
|
||||||
|
fmt.Printf("%-28s %d\n", "Max Total Group Size:", cprop.Maxtotalgroupsize)
|
||||||
|
fmt.Printf("%-28s %d\n", "Max Shared Local Memory:", cprop.Maxsharedlocalmemory)
|
||||||
|
fmt.Printf("%-28s %d\n", "Num Subgroup Sizes:", cprop.Numsubgroupsizes)
|
||||||
|
fmt.Printf("%-28s %v\n", "Subgroup Sizes:", cprop.Subgroupsizes[:])
|
||||||
|
|
||||||
|
var (
|
||||||
|
X, Y, Z = uintptr(cprop.Maxgroupsizex), uintptr(1), uintptr(1)
|
||||||
|
groupCount = uintptr(65536)
|
||||||
|
N = X * groupCount
|
||||||
|
bufsz = N * unsafe.Sizeof(float32(0))
|
||||||
|
)
|
||||||
|
fmt.Println("=============== Computation Configuration ===============")
|
||||||
|
fmt.Printf("%-28s (%d, %d, %d)\n", "Group Size (X, Y, Z):", X, Y, Z)
|
||||||
|
fmt.Printf("%-28s %d\n", "Group Count:", groupCount)
|
||||||
|
fmt.Printf("%-28s %d\n", "Total Elements (N):", N)
|
||||||
|
fmt.Printf("%-28s %d MiB\n", "Buffer Size:", bufsz/1024/1024)
|
||||||
|
|
||||||
|
hbufV1, err := ctx.MemAllocHost(bufsz, 1)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
defer ctx.MemFree(hbufV1)
|
||||||
|
|
||||||
|
hbufV2, err := ctx.MemAllocHost(bufsz, 1)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
defer ctx.MemFree(hbufV2)
|
||||||
|
|
||||||
|
dbufV1, err := ctx.MemAllocDevice(dev, bufsz, 1)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
defer ctx.MemFree(dbufV1)
|
||||||
|
|
||||||
|
dbufV2, err := ctx.MemAllocDevice(dev, bufsz, 1)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
defer ctx.MemFree(dbufV2)
|
||||||
|
|
||||||
|
floatbuf := make([]float32, 2*N)
|
||||||
|
for i := range floatbuf {
|
||||||
|
floatbuf[i] = rand.Float32()
|
||||||
|
}
|
||||||
|
|
||||||
|
zev1, zev2 := unsafe.Slice((*float32)(hbufV1), N), unsafe.Slice((*float32)(hbufV2), N)
|
||||||
|
copy(zev1, floatbuf[:N])
|
||||||
|
copy(zev2, floatbuf[N:])
|
||||||
|
|
||||||
|
mod, err := ctx.ModuleCreate(dev, kernelspv)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
defer mod.Destroy()
|
||||||
|
|
||||||
|
krn, err := mod.KernelCreate("__sycl_kernel_vector_add")
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
defer krn.Destroy()
|
||||||
|
|
||||||
|
err = krn.SetArgumentValue(0, &dbufV1)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
err = krn.SetArgumentValue(1, &dbufV2)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
err = krn.SetGroupSize(uint32(X), uint32(Y), uint32(Z))
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
evp, err := ctx.EventPoolCreate(3, dev)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
defer evp.Destroy()
|
||||||
|
|
||||||
|
evcph2dv1, err := evp.EventCreate(0, gozel.ZE_EVENT_SCOPE_FLAG_HOST, 0)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
defer evcph2dv1.Destroy()
|
||||||
|
evcph2dv2, err := evp.EventCreate(1, gozel.ZE_EVENT_SCOPE_FLAG_HOST, 0)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
defer evcph2dv2.Destroy()
|
||||||
|
|
||||||
|
start := time.Now()
|
||||||
|
|
||||||
|
lst, err := ctx.CommandListCreateImmediate(dev, gozel.ZE_COMMAND_QUEUE_MODE_DEFAULT)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
err = lst.AppendMemoryCopy(dbufV1, hbufV1, bufsz, evcph2dv1)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
err = lst.AppendMemoryCopy(dbufV2, hbufV2, bufsz, evcph2dv2)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
evk, err := evp.EventCreate(2, gozel.ZE_EVENT_SCOPE_FLAG_HOST, 0)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
defer evk.Destroy()
|
||||||
|
|
||||||
|
err = lst.AppendLaunchKernel(krn, &gozel.ZeGroupCount{
|
||||||
|
Groupcountx: uint32(groupCount), Groupcounty: 1, Groupcountz: 1,
|
||||||
|
}, evk, evcph2dv1, evcph2dv2)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
err = lst.AppendMemoryCopy(hbufV1, dbufV1, bufsz, 0, evk)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
err = lst.HostSynchronize(math.MaxUint64)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
elapsed := time.Since(start)
|
||||||
|
|
||||||
|
fmt.Println("=============== Calculation Results ===============")
|
||||||
|
fmt.Printf("%-28s %.6f ms\n", "GPU Execution Time:", elapsed.Seconds()*1000)
|
||||||
|
fmt.Printf("%-28s %.2f GiB/s\n", "GPU Throughput:", float64(bufsz)/elapsed.Seconds()/1e9)
|
||||||
|
|
||||||
|
tmpbuf := make([]float32, N)
|
||||||
|
start = time.Now()
|
||||||
|
for i := range N {
|
||||||
|
tmpbuf[i] = floatbuf[i] + floatbuf[N+i]
|
||||||
|
}
|
||||||
|
elapsed = time.Since(start)
|
||||||
|
|
||||||
|
fmt.Println("=============== Validation Results ===============")
|
||||||
|
fmt.Printf("%-28s %.6f ms\n", "CPU Execution Time:", elapsed.Seconds()*1000)
|
||||||
|
fmt.Printf("%-28s %.2f GiB/s\n", "CPU Throughput:", float64(bufsz)/elapsed.Seconds()/1e9)
|
||||||
|
|
||||||
|
fail := false
|
||||||
|
for i := range N {
|
||||||
|
expect := floatbuf[i] + floatbuf[N+i]
|
||||||
|
if zev1[i] != expect {
|
||||||
|
fail = true
|
||||||
|
fmt.Printf("[%05d] expect %f = %f + %f, got %f.\n", i, expect, floatbuf[i], floatbuf[N+i], zev1[i])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if fail {
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Println("Test Passed!!!")
|
||||||
|
}
|
||||||
64
examples/vadd_event/main.ll
Normal file
64
examples/vadd_event/main.ll
Normal file
@@ -0,0 +1,64 @@
|
|||||||
|
; ModuleID = 'device_kern.bc'
|
||||||
|
source_filename = "main.cpp"
|
||||||
|
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64-G1"
|
||||||
|
target triple = "spirv64-unknown-unknown"
|
||||||
|
|
||||||
|
@__spirv_BuiltInGlobalInvocationId = external local_unnamed_addr addrspace(1) constant <3 x i64>, align 32
|
||||||
|
@__spirv_BuiltInGlobalOffset = external local_unnamed_addr addrspace(1) constant <3 x i64>, align 32
|
||||||
|
|
||||||
|
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: write)
|
||||||
|
define spir_kernel void @__sycl_kernel_vector_add(ptr addrspace(1) noundef align 4 captures(none) %0, ptr addrspace(1) noundef readonly align 4 captures(none) %1) local_unnamed_addr #0 !kernel_arg_buffer_location !6 !sycl_fixed_targets !7 !sycl_kernel_omit_args !8 {
|
||||||
|
%3 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalInvocationId, align 32, !noalias !9
|
||||||
|
%4 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalOffset, align 32, !noalias !16
|
||||||
|
%5 = sub i64 %3, %4
|
||||||
|
%6 = icmp ult i64 %5, 2147483648
|
||||||
|
tail call void @llvm.assume(i1 %6)
|
||||||
|
%7 = getelementptr inbounds float, ptr addrspace(1) %1, i64 %5
|
||||||
|
%8 = load float, ptr addrspace(1) %7, align 4, !tbaa !23
|
||||||
|
%9 = getelementptr inbounds float, ptr addrspace(1) %0, i64 %5
|
||||||
|
%10 = load float, ptr addrspace(1) %9, align 4, !tbaa !23
|
||||||
|
%11 = fadd float %10, %8
|
||||||
|
store float %11, ptr addrspace(1) %9, align 4, !tbaa !23
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write)
|
||||||
|
declare void @llvm.assume(i1 noundef) #1
|
||||||
|
|
||||||
|
declare dso_local spir_func i32 @_Z18__spirv_ocl_printfPU3AS2Kcz(ptr addrspace(2), ...)
|
||||||
|
|
||||||
|
attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: write) "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="main.cpp" "sycl-nd-range-kernel"="1" "sycl-optlevel"="2" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
|
||||||
|
|
||||||
|
!llvm.module.flags = !{!0, !1, !2}
|
||||||
|
!opencl.spir.version = !{!3}
|
||||||
|
!spirv.Source = !{!4}
|
||||||
|
!llvm.ident = !{!5}
|
||||||
|
|
||||||
|
!0 = !{i32 1, !"wchar_size", i32 4}
|
||||||
|
!1 = !{i32 1, !"sycl-device", i32 1}
|
||||||
|
!2 = !{i32 7, !"frame-pointer", i32 2}
|
||||||
|
!3 = !{i32 1, i32 2}
|
||||||
|
!4 = !{i32 4, i32 100000}
|
||||||
|
!5 = !{!"clang version 21.0.0git (https://github.com/intel/llvm d5f649b706f63b5c74e1929bc95db8de91085560)"}
|
||||||
|
!6 = !{i32 -1, i32 -1}
|
||||||
|
!7 = !{}
|
||||||
|
!8 = !{i1 false, i1 false}
|
||||||
|
!9 = !{!10, !12, !14}
|
||||||
|
!10 = distinct !{!10, !11, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEE8initSizeEv: argument 0"}
|
||||||
|
!11 = distinct !{!11, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEE8initSizeEv"}
|
||||||
|
!12 = distinct !{!12, !13, !"_ZN7__spirv22initGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEEET0_v: argument 0"}
|
||||||
|
!13 = distinct !{!13, !"_ZN7__spirv22initGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEEET0_v"}
|
||||||
|
!14 = distinct !{!14, !15, !"_ZNK4sycl3_V17nd_itemILi1EE13get_global_idEv: argument 0"}
|
||||||
|
!15 = distinct !{!15, !"_ZNK4sycl3_V17nd_itemILi1EE13get_global_idEv"}
|
||||||
|
!16 = !{!17, !19, !21}
|
||||||
|
!17 = distinct !{!17, !18, !"_ZN7__spirv23InitSizesSTGlobalOffsetILi1EN4sycl3_V12idILi1EEEE8initSizeEv: argument 0"}
|
||||||
|
!18 = distinct !{!18, !"_ZN7__spirv23InitSizesSTGlobalOffsetILi1EN4sycl3_V12idILi1EEEE8initSizeEv"}
|
||||||
|
!19 = distinct !{!19, !20, !"_ZN7__spirv16initGlobalOffsetILi1EN4sycl3_V12idILi1EEEEET0_v: argument 0"}
|
||||||
|
!20 = distinct !{!20, !"_ZN7__spirv16initGlobalOffsetILi1EN4sycl3_V12idILi1EEEEET0_v"}
|
||||||
|
!21 = distinct !{!21, !22, !"_ZNK4sycl3_V17nd_itemILi1EE10get_offsetEv: argument 0"}
|
||||||
|
!22 = distinct !{!22, !"_ZNK4sycl3_V17nd_itemILi1EE10get_offsetEv"}
|
||||||
|
!23 = !{!24, !24, i64 0}
|
||||||
|
!24 = !{!"float", !25, i64 0}
|
||||||
|
!25 = !{!"omnipotent char", !26, i64 0}
|
||||||
|
!26 = !{!"Simple C++ TBAA"}
|
||||||
BIN
examples/vadd_event/main.spv
Normal file
BIN
examples/vadd_event/main.spv
Normal file
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user