From e781f418d5e01345c72cb267031a3df6586d606e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=BA=90=E6=96=87=E9=9B=A8?= <41315874+fumiama@users.noreply.github.com> Date: Sat, 28 Mar 2026 18:51:25 +0800 Subject: [PATCH] fix: remove duplicated gens --- README.md | 4 -- examples/vadd/main.go | 5 +-- examples/vadd/main.ll | 81 ++++++++++++++++++------------------- examples/vadd_event/main.go | 5 +-- examples/vadd_event/main.ll | 81 ++++++++++++++++++------------------- 5 files changed, 84 insertions(+), 92 deletions(-) diff --git a/README.md b/README.md index 516afc3..cc08563 100644 --- a/README.md +++ b/README.md @@ -274,10 +274,6 @@ main.cpp ──clang++ -fsycl──▶ device_kern.bc │ ▼ device_kern_0.bc │ - clang++ -emit-llvm -S - │ - ▼ device_kern.ll - │ llvm-spirv │ ▼ main.spv ← embedded via //go:embed diff --git a/examples/vadd/main.go b/examples/vadd/main.go index 4ee0dd4..2e732e5 100644 --- a/examples/vadd/main.go +++ b/examples/vadd/main.go @@ -18,9 +18,8 @@ import ( //go:generate clang++ -fsycl -fsycl-device-only -fsycl-targets=spirv64 -Xclang -emit-llvm-bc main.cpp -o device_kern.bc //go:generate sycl-post-link -symbols -split=auto -o device_kern.table device_kern.bc -//go:generate clang++ -target spirv64-unknown-unknown -S -emit-llvm -x ir device_kern_0.bc -o device_kern.ll -//go:generate llvm-spirv -o main.spv device_kern.bc -//go:generate clang++ -target spirv64-unknown-unknown -S -emit-llvm -x ir device_kern.bc -o main.ll +//go:generate llvm-spirv -o main.spv device_kern_0.bc +//go:generate clang++ -target spirv64-unknown-unknown -S -emit-llvm -x ir device_kern_0.bc -o main.ll //go:embed main.spv var kernelspv []byte diff --git a/examples/vadd/main.ll b/examples/vadd/main.ll index 96d403c..8c9d3e3 100644 --- a/examples/vadd/main.ll +++ b/examples/vadd/main.ll @@ -1,4 +1,4 @@ -; ModuleID = 'device_kern.bc' +; ModuleID = 'device_kern_0.bc' source_filename = "main.cpp" target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64-G1" target triple = "spirv64-unknown-unknown" @@ -7,58 +7,57 @@ target triple = "spirv64-unknown-unknown" @__spirv_BuiltInGlobalOffset = external local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: write) -define spir_kernel void @__sycl_kernel_vector_add(ptr addrspace(1) noundef align 4 captures(none) %0, ptr addrspace(1) noundef readonly align 4 captures(none) %1) local_unnamed_addr #0 !kernel_arg_buffer_location !6 !sycl_fixed_targets !7 !sycl_kernel_omit_args !8 { - %3 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalInvocationId, align 32, !noalias !9 - %4 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalOffset, align 32, !noalias !16 +define spir_kernel void @__sycl_kernel_vector_add(ptr addrspace(1) noundef align 4 captures(none) %0, ptr addrspace(1) noundef readonly align 4 captures(none) %1) local_unnamed_addr #0 !kernel_arg_buffer_location !9 !sycl_fixed_targets !10 !sycl_kernel_omit_args !11 { + %3 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalInvocationId, align 32, !noalias !12 + %4 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalOffset, align 32, !noalias !19 %5 = sub i64 %3, %4 %6 = icmp ult i64 %5, 2147483648 tail call void @llvm.assume(i1 %6) %7 = getelementptr inbounds float, ptr addrspace(1) %1, i64 %5 - %8 = load float, ptr addrspace(1) %7, align 4, !tbaa !23 + %8 = load float, ptr addrspace(1) %7, align 4 %9 = getelementptr inbounds float, ptr addrspace(1) %0, i64 %5 - %10 = load float, ptr addrspace(1) %9, align 4, !tbaa !23 + %10 = load float, ptr addrspace(1) %9, align 4 %11 = fadd float %10, %8 - store float %11, ptr addrspace(1) %9, align 4, !tbaa !23 + store float %11, ptr addrspace(1) %9, align 4 ret void } ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) declare void @llvm.assume(i1 noundef) #1 -declare dso_local spir_func i32 @_Z18__spirv_ocl_printfPU3AS2Kcz(ptr addrspace(2), ...) - -attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: write) "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="main.cpp" "sycl-nd-range-kernel"="1" "sycl-optlevel"="2" "uniform-work-group-size"="true" } +attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: write) "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-entry-point" "sycl-module-id"="main.cpp" "sycl-nd-range-kernel"="1" "sycl-optlevel"="2" "uniform-work-group-size"="true" } attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) } -!llvm.module.flags = !{!0, !1, !2} -!opencl.spir.version = !{!3} -!spirv.Source = !{!4} -!llvm.ident = !{!5} +!llvm.linker.options = !{!0, !1} +!llvm.module.flags = !{!2, !3, !4} +!opencl.spir.version = !{!5} +!spirv.Source = !{!6} +!llvm.ident = !{!7} +!sycl-esimd-split-status = !{!8} -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 1, !"sycl-device", i32 1} -!2 = !{i32 7, !"frame-pointer", i32 2} -!3 = !{i32 1, i32 2} -!4 = !{i32 4, i32 100000} -!5 = !{!"clang version 21.0.0git (https://github.com/intel/llvm d5f649b706f63b5c74e1929bc95db8de91085560)"} -!6 = !{i32 -1, i32 -1} -!7 = !{} -!8 = !{i1 false, i1 false} -!9 = !{!10, !12, !14} -!10 = distinct !{!10, !11, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEE8initSizeEv: argument 0"} -!11 = distinct !{!11, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEE8initSizeEv"} -!12 = distinct !{!12, !13, !"_ZN7__spirv22initGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEEET0_v: argument 0"} -!13 = distinct !{!13, !"_ZN7__spirv22initGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEEET0_v"} -!14 = distinct !{!14, !15, !"_ZNK4sycl3_V17nd_itemILi1EE13get_global_idEv: argument 0"} -!15 = distinct !{!15, !"_ZNK4sycl3_V17nd_itemILi1EE13get_global_idEv"} -!16 = !{!17, !19, !21} -!17 = distinct !{!17, !18, !"_ZN7__spirv23InitSizesSTGlobalOffsetILi1EN4sycl3_V12idILi1EEEE8initSizeEv: argument 0"} -!18 = distinct !{!18, !"_ZN7__spirv23InitSizesSTGlobalOffsetILi1EN4sycl3_V12idILi1EEEE8initSizeEv"} -!19 = distinct !{!19, !20, !"_ZN7__spirv16initGlobalOffsetILi1EN4sycl3_V12idILi1EEEEET0_v: argument 0"} -!20 = distinct !{!20, !"_ZN7__spirv16initGlobalOffsetILi1EN4sycl3_V12idILi1EEEEET0_v"} -!21 = distinct !{!21, !22, !"_ZNK4sycl3_V17nd_itemILi1EE10get_offsetEv: argument 0"} -!22 = distinct !{!22, !"_ZNK4sycl3_V17nd_itemILi1EE10get_offsetEv"} -!23 = !{!24, !24, i64 0} -!24 = !{!"float", !25, i64 0} -!25 = !{!"omnipotent char", !26, i64 0} -!26 = !{!"Simple C++ TBAA"} +!0 = !{!"-llibcpmt"} +!1 = !{!"/alternatename:_Avx2WmemEnabled=_Avx2WmemEnabledWeakValue"} +!2 = !{i32 1, !"wchar_size", i32 2} +!3 = !{i32 1, !"sycl-device", i32 1} +!4 = !{i32 7, !"frame-pointer", i32 2} +!5 = !{i32 1, i32 2} +!6 = !{i32 4, i32 100000} +!7 = !{!"clang version 21.0.0git (https://github.com/intel/llvm d5f649b706f63b5c74e1929bc95db8de91085560)"} +!8 = !{i8 0} +!9 = !{i32 -1, i32 -1} +!10 = !{} +!11 = !{i1 false, i1 false} +!12 = !{!13, !15, !17} +!13 = distinct !{!13, !14, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEE8initSizeEv: argument 0"} +!14 = distinct !{!14, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEE8initSizeEv"} +!15 = distinct !{!15, !16, !"_ZN7__spirv22initGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEEET0_v: argument 0"} +!16 = distinct !{!16, !"_ZN7__spirv22initGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEEET0_v"} +!17 = distinct !{!17, !18, !"_ZNK4sycl3_V17nd_itemILi1EE13get_global_idEv: argument 0"} +!18 = distinct !{!18, !"_ZNK4sycl3_V17nd_itemILi1EE13get_global_idEv"} +!19 = !{!20, !22, !24} +!20 = distinct !{!20, !21, !"_ZN7__spirv23InitSizesSTGlobalOffsetILi1EN4sycl3_V12idILi1EEEE8initSizeEv: argument 0"} +!21 = distinct !{!21, !"_ZN7__spirv23InitSizesSTGlobalOffsetILi1EN4sycl3_V12idILi1EEEE8initSizeEv"} +!22 = distinct !{!22, !23, !"_ZN7__spirv16initGlobalOffsetILi1EN4sycl3_V12idILi1EEEEET0_v: argument 0"} +!23 = distinct !{!23, !"_ZN7__spirv16initGlobalOffsetILi1EN4sycl3_V12idILi1EEEEET0_v"} +!24 = distinct !{!24, !25, !"_ZNK4sycl3_V17nd_itemILi1EE10get_offsetEv: argument 0"} +!25 = distinct !{!25, !"_ZNK4sycl3_V17nd_itemILi1EE10get_offsetEv"} diff --git a/examples/vadd_event/main.go b/examples/vadd_event/main.go index b0a611d..ce3ba3b 100644 --- a/examples/vadd_event/main.go +++ b/examples/vadd_event/main.go @@ -18,9 +18,8 @@ import ( //go:generate clang++ -fsycl -fsycl-device-only -fsycl-targets=spirv64 -Xclang -emit-llvm-bc main.cpp -o device_kern.bc //go:generate sycl-post-link -symbols -split=auto -o device_kern.table device_kern.bc -//go:generate clang++ -target spirv64-unknown-unknown -S -emit-llvm -x ir device_kern_0.bc -o device_kern.ll -//go:generate llvm-spirv -o main.spv device_kern.bc -//go:generate clang++ -target spirv64-unknown-unknown -S -emit-llvm -x ir device_kern.bc -o main.ll +//go:generate llvm-spirv -o main.spv device_kern_0.bc +//go:generate clang++ -target spirv64-unknown-unknown -S -emit-llvm -x ir device_kern_0.bc -o main.ll //go:embed main.spv var kernelspv []byte diff --git a/examples/vadd_event/main.ll b/examples/vadd_event/main.ll index 96d403c..8c9d3e3 100644 --- a/examples/vadd_event/main.ll +++ b/examples/vadd_event/main.ll @@ -1,4 +1,4 @@ -; ModuleID = 'device_kern.bc' +; ModuleID = 'device_kern_0.bc' source_filename = "main.cpp" target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64-G1" target triple = "spirv64-unknown-unknown" @@ -7,58 +7,57 @@ target triple = "spirv64-unknown-unknown" @__spirv_BuiltInGlobalOffset = external local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: write) -define spir_kernel void @__sycl_kernel_vector_add(ptr addrspace(1) noundef align 4 captures(none) %0, ptr addrspace(1) noundef readonly align 4 captures(none) %1) local_unnamed_addr #0 !kernel_arg_buffer_location !6 !sycl_fixed_targets !7 !sycl_kernel_omit_args !8 { - %3 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalInvocationId, align 32, !noalias !9 - %4 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalOffset, align 32, !noalias !16 +define spir_kernel void @__sycl_kernel_vector_add(ptr addrspace(1) noundef align 4 captures(none) %0, ptr addrspace(1) noundef readonly align 4 captures(none) %1) local_unnamed_addr #0 !kernel_arg_buffer_location !9 !sycl_fixed_targets !10 !sycl_kernel_omit_args !11 { + %3 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalInvocationId, align 32, !noalias !12 + %4 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalOffset, align 32, !noalias !19 %5 = sub i64 %3, %4 %6 = icmp ult i64 %5, 2147483648 tail call void @llvm.assume(i1 %6) %7 = getelementptr inbounds float, ptr addrspace(1) %1, i64 %5 - %8 = load float, ptr addrspace(1) %7, align 4, !tbaa !23 + %8 = load float, ptr addrspace(1) %7, align 4 %9 = getelementptr inbounds float, ptr addrspace(1) %0, i64 %5 - %10 = load float, ptr addrspace(1) %9, align 4, !tbaa !23 + %10 = load float, ptr addrspace(1) %9, align 4 %11 = fadd float %10, %8 - store float %11, ptr addrspace(1) %9, align 4, !tbaa !23 + store float %11, ptr addrspace(1) %9, align 4 ret void } ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) declare void @llvm.assume(i1 noundef) #1 -declare dso_local spir_func i32 @_Z18__spirv_ocl_printfPU3AS2Kcz(ptr addrspace(2), ...) - -attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: write) "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="main.cpp" "sycl-nd-range-kernel"="1" "sycl-optlevel"="2" "uniform-work-group-size"="true" } +attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: write) "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-entry-point" "sycl-module-id"="main.cpp" "sycl-nd-range-kernel"="1" "sycl-optlevel"="2" "uniform-work-group-size"="true" } attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) } -!llvm.module.flags = !{!0, !1, !2} -!opencl.spir.version = !{!3} -!spirv.Source = !{!4} -!llvm.ident = !{!5} +!llvm.linker.options = !{!0, !1} +!llvm.module.flags = !{!2, !3, !4} +!opencl.spir.version = !{!5} +!spirv.Source = !{!6} +!llvm.ident = !{!7} +!sycl-esimd-split-status = !{!8} -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 1, !"sycl-device", i32 1} -!2 = !{i32 7, !"frame-pointer", i32 2} -!3 = !{i32 1, i32 2} -!4 = !{i32 4, i32 100000} -!5 = !{!"clang version 21.0.0git (https://github.com/intel/llvm d5f649b706f63b5c74e1929bc95db8de91085560)"} -!6 = !{i32 -1, i32 -1} -!7 = !{} -!8 = !{i1 false, i1 false} -!9 = !{!10, !12, !14} -!10 = distinct !{!10, !11, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEE8initSizeEv: argument 0"} -!11 = distinct !{!11, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEE8initSizeEv"} -!12 = distinct !{!12, !13, !"_ZN7__spirv22initGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEEET0_v: argument 0"} -!13 = distinct !{!13, !"_ZN7__spirv22initGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEEET0_v"} -!14 = distinct !{!14, !15, !"_ZNK4sycl3_V17nd_itemILi1EE13get_global_idEv: argument 0"} -!15 = distinct !{!15, !"_ZNK4sycl3_V17nd_itemILi1EE13get_global_idEv"} -!16 = !{!17, !19, !21} -!17 = distinct !{!17, !18, !"_ZN7__spirv23InitSizesSTGlobalOffsetILi1EN4sycl3_V12idILi1EEEE8initSizeEv: argument 0"} -!18 = distinct !{!18, !"_ZN7__spirv23InitSizesSTGlobalOffsetILi1EN4sycl3_V12idILi1EEEE8initSizeEv"} -!19 = distinct !{!19, !20, !"_ZN7__spirv16initGlobalOffsetILi1EN4sycl3_V12idILi1EEEEET0_v: argument 0"} -!20 = distinct !{!20, !"_ZN7__spirv16initGlobalOffsetILi1EN4sycl3_V12idILi1EEEEET0_v"} -!21 = distinct !{!21, !22, !"_ZNK4sycl3_V17nd_itemILi1EE10get_offsetEv: argument 0"} -!22 = distinct !{!22, !"_ZNK4sycl3_V17nd_itemILi1EE10get_offsetEv"} -!23 = !{!24, !24, i64 0} -!24 = !{!"float", !25, i64 0} -!25 = !{!"omnipotent char", !26, i64 0} -!26 = !{!"Simple C++ TBAA"} +!0 = !{!"-llibcpmt"} +!1 = !{!"/alternatename:_Avx2WmemEnabled=_Avx2WmemEnabledWeakValue"} +!2 = !{i32 1, !"wchar_size", i32 2} +!3 = !{i32 1, !"sycl-device", i32 1} +!4 = !{i32 7, !"frame-pointer", i32 2} +!5 = !{i32 1, i32 2} +!6 = !{i32 4, i32 100000} +!7 = !{!"clang version 21.0.0git (https://github.com/intel/llvm d5f649b706f63b5c74e1929bc95db8de91085560)"} +!8 = !{i8 0} +!9 = !{i32 -1, i32 -1} +!10 = !{} +!11 = !{i1 false, i1 false} +!12 = !{!13, !15, !17} +!13 = distinct !{!13, !14, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEE8initSizeEv: argument 0"} +!14 = distinct !{!14, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEE8initSizeEv"} +!15 = distinct !{!15, !16, !"_ZN7__spirv22initGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEEET0_v: argument 0"} +!16 = distinct !{!16, !"_ZN7__spirv22initGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEEET0_v"} +!17 = distinct !{!17, !18, !"_ZNK4sycl3_V17nd_itemILi1EE13get_global_idEv: argument 0"} +!18 = distinct !{!18, !"_ZNK4sycl3_V17nd_itemILi1EE13get_global_idEv"} +!19 = !{!20, !22, !24} +!20 = distinct !{!20, !21, !"_ZN7__spirv23InitSizesSTGlobalOffsetILi1EN4sycl3_V12idILi1EEEE8initSizeEv: argument 0"} +!21 = distinct !{!21, !"_ZN7__spirv23InitSizesSTGlobalOffsetILi1EN4sycl3_V12idILi1EEEE8initSizeEv"} +!22 = distinct !{!22, !23, !"_ZN7__spirv16initGlobalOffsetILi1EN4sycl3_V12idILi1EEEEET0_v: argument 0"} +!23 = distinct !{!23, !"_ZN7__spirv16initGlobalOffsetILi1EN4sycl3_V12idILi1EEEEET0_v"} +!24 = distinct !{!24, !25, !"_ZNK4sycl3_V17nd_itemILi1EE10get_offsetEv: argument 0"} +!25 = distinct !{!25, !"_ZNK4sycl3_V17nd_itemILi1EE10get_offsetEv"}