1
0
mirror of https://github.com/fumiama/gozel.git synced 2026-06-21 10:30:30 +08:00

fix: remove duplicated gens

This commit is contained in:
源文雨
2026-03-28 18:51:25 +08:00
parent 95852e65c5
commit e781f418d5
5 changed files with 84 additions and 92 deletions

View File

@@ -274,10 +274,6 @@ main.cpp ──clang++ -fsycl──▶ device_kern.bc
▼ device_kern_0.bc ▼ device_kern_0.bc
clang++ -emit-llvm -S
▼ device_kern.ll
llvm-spirv llvm-spirv
▼ main.spv ← embedded via //go:embed ▼ main.spv ← embedded via //go:embed

View File

@@ -18,9 +18,8 @@ import (
//go:generate clang++ -fsycl -fsycl-device-only -fsycl-targets=spirv64 -Xclang -emit-llvm-bc main.cpp -o device_kern.bc //go:generate clang++ -fsycl -fsycl-device-only -fsycl-targets=spirv64 -Xclang -emit-llvm-bc main.cpp -o device_kern.bc
//go:generate sycl-post-link -symbols -split=auto -o device_kern.table device_kern.bc //go:generate sycl-post-link -symbols -split=auto -o device_kern.table device_kern.bc
//go:generate clang++ -target spirv64-unknown-unknown -S -emit-llvm -x ir device_kern_0.bc -o device_kern.ll //go:generate llvm-spirv -o main.spv device_kern_0.bc
//go:generate llvm-spirv -o main.spv device_kern.bc //go:generate clang++ -target spirv64-unknown-unknown -S -emit-llvm -x ir device_kern_0.bc -o main.ll
//go:generate clang++ -target spirv64-unknown-unknown -S -emit-llvm -x ir device_kern.bc -o main.ll
//go:embed main.spv //go:embed main.spv
var kernelspv []byte var kernelspv []byte

View File

@@ -1,4 +1,4 @@
; ModuleID = 'device_kern.bc' ; ModuleID = 'device_kern_0.bc'
source_filename = "main.cpp" source_filename = "main.cpp"
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64-G1" target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64-G1"
target triple = "spirv64-unknown-unknown" target triple = "spirv64-unknown-unknown"
@@ -7,58 +7,57 @@ target triple = "spirv64-unknown-unknown"
@__spirv_BuiltInGlobalOffset = external local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 @__spirv_BuiltInGlobalOffset = external local_unnamed_addr addrspace(1) constant <3 x i64>, align 32
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: write) ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: write)
define spir_kernel void @__sycl_kernel_vector_add(ptr addrspace(1) noundef align 4 captures(none) %0, ptr addrspace(1) noundef readonly align 4 captures(none) %1) local_unnamed_addr #0 !kernel_arg_buffer_location !6 !sycl_fixed_targets !7 !sycl_kernel_omit_args !8 { define spir_kernel void @__sycl_kernel_vector_add(ptr addrspace(1) noundef align 4 captures(none) %0, ptr addrspace(1) noundef readonly align 4 captures(none) %1) local_unnamed_addr #0 !kernel_arg_buffer_location !9 !sycl_fixed_targets !10 !sycl_kernel_omit_args !11 {
%3 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalInvocationId, align 32, !noalias !9 %3 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalInvocationId, align 32, !noalias !12
%4 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalOffset, align 32, !noalias !16 %4 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalOffset, align 32, !noalias !19
%5 = sub i64 %3, %4 %5 = sub i64 %3, %4
%6 = icmp ult i64 %5, 2147483648 %6 = icmp ult i64 %5, 2147483648
tail call void @llvm.assume(i1 %6) tail call void @llvm.assume(i1 %6)
%7 = getelementptr inbounds float, ptr addrspace(1) %1, i64 %5 %7 = getelementptr inbounds float, ptr addrspace(1) %1, i64 %5
%8 = load float, ptr addrspace(1) %7, align 4, !tbaa !23 %8 = load float, ptr addrspace(1) %7, align 4
%9 = getelementptr inbounds float, ptr addrspace(1) %0, i64 %5 %9 = getelementptr inbounds float, ptr addrspace(1) %0, i64 %5
%10 = load float, ptr addrspace(1) %9, align 4, !tbaa !23 %10 = load float, ptr addrspace(1) %9, align 4
%11 = fadd float %10, %8 %11 = fadd float %10, %8
store float %11, ptr addrspace(1) %9, align 4, !tbaa !23 store float %11, ptr addrspace(1) %9, align 4
ret void ret void
} }
; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write)
declare void @llvm.assume(i1 noundef) #1 declare void @llvm.assume(i1 noundef) #1
declare dso_local spir_func i32 @_Z18__spirv_ocl_printfPU3AS2Kcz(ptr addrspace(2), ...) attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: write) "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-entry-point" "sycl-module-id"="main.cpp" "sycl-nd-range-kernel"="1" "sycl-optlevel"="2" "uniform-work-group-size"="true" }
attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: write) "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="main.cpp" "sycl-nd-range-kernel"="1" "sycl-optlevel"="2" "uniform-work-group-size"="true" }
attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) } attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
!llvm.module.flags = !{!0, !1, !2} !llvm.linker.options = !{!0, !1}
!opencl.spir.version = !{!3} !llvm.module.flags = !{!2, !3, !4}
!spirv.Source = !{!4} !opencl.spir.version = !{!5}
!llvm.ident = !{!5} !spirv.Source = !{!6}
!llvm.ident = !{!7}
!sycl-esimd-split-status = !{!8}
!0 = !{i32 1, !"wchar_size", i32 4} !0 = !{!"-llibcpmt"}
!1 = !{i32 1, !"sycl-device", i32 1} !1 = !{!"/alternatename:_Avx2WmemEnabled=_Avx2WmemEnabledWeakValue"}
!2 = !{i32 7, !"frame-pointer", i32 2} !2 = !{i32 1, !"wchar_size", i32 2}
!3 = !{i32 1, i32 2} !3 = !{i32 1, !"sycl-device", i32 1}
!4 = !{i32 4, i32 100000} !4 = !{i32 7, !"frame-pointer", i32 2}
!5 = !{!"clang version 21.0.0git (https://github.com/intel/llvm d5f649b706f63b5c74e1929bc95db8de91085560)"} !5 = !{i32 1, i32 2}
!6 = !{i32 -1, i32 -1} !6 = !{i32 4, i32 100000}
!7 = !{} !7 = !{!"clang version 21.0.0git (https://github.com/intel/llvm d5f649b706f63b5c74e1929bc95db8de91085560)"}
!8 = !{i1 false, i1 false} !8 = !{i8 0}
!9 = !{!10, !12, !14} !9 = !{i32 -1, i32 -1}
!10 = distinct !{!10, !11, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEE8initSizeEv: argument 0"} !10 = !{}
!11 = distinct !{!11, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEE8initSizeEv"} !11 = !{i1 false, i1 false}
!12 = distinct !{!12, !13, !"_ZN7__spirv22initGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEEET0_v: argument 0"} !12 = !{!13, !15, !17}
!13 = distinct !{!13, !"_ZN7__spirv22initGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEEET0_v"} !13 = distinct !{!13, !14, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEE8initSizeEv: argument 0"}
!14 = distinct !{!14, !15, !"_ZNK4sycl3_V17nd_itemILi1EE13get_global_idEv: argument 0"} !14 = distinct !{!14, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEE8initSizeEv"}
!15 = distinct !{!15, !"_ZNK4sycl3_V17nd_itemILi1EE13get_global_idEv"} !15 = distinct !{!15, !16, !"_ZN7__spirv22initGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEEET0_v: argument 0"}
!16 = !{!17, !19, !21} !16 = distinct !{!16, !"_ZN7__spirv22initGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEEET0_v"}
!17 = distinct !{!17, !18, !"_ZN7__spirv23InitSizesSTGlobalOffsetILi1EN4sycl3_V12idILi1EEEE8initSizeEv: argument 0"} !17 = distinct !{!17, !18, !"_ZNK4sycl3_V17nd_itemILi1EE13get_global_idEv: argument 0"}
!18 = distinct !{!18, !"_ZN7__spirv23InitSizesSTGlobalOffsetILi1EN4sycl3_V12idILi1EEEE8initSizeEv"} !18 = distinct !{!18, !"_ZNK4sycl3_V17nd_itemILi1EE13get_global_idEv"}
!19 = distinct !{!19, !20, !"_ZN7__spirv16initGlobalOffsetILi1EN4sycl3_V12idILi1EEEEET0_v: argument 0"} !19 = !{!20, !22, !24}
!20 = distinct !{!20, !"_ZN7__spirv16initGlobalOffsetILi1EN4sycl3_V12idILi1EEEEET0_v"} !20 = distinct !{!20, !21, !"_ZN7__spirv23InitSizesSTGlobalOffsetILi1EN4sycl3_V12idILi1EEEE8initSizeEv: argument 0"}
!21 = distinct !{!21, !22, !"_ZNK4sycl3_V17nd_itemILi1EE10get_offsetEv: argument 0"} !21 = distinct !{!21, !"_ZN7__spirv23InitSizesSTGlobalOffsetILi1EN4sycl3_V12idILi1EEEE8initSizeEv"}
!22 = distinct !{!22, !"_ZNK4sycl3_V17nd_itemILi1EE10get_offsetEv"} !22 = distinct !{!22, !23, !"_ZN7__spirv16initGlobalOffsetILi1EN4sycl3_V12idILi1EEEEET0_v: argument 0"}
!23 = !{!24, !24, i64 0} !23 = distinct !{!23, !"_ZN7__spirv16initGlobalOffsetILi1EN4sycl3_V12idILi1EEEEET0_v"}
!24 = !{!"float", !25, i64 0} !24 = distinct !{!24, !25, !"_ZNK4sycl3_V17nd_itemILi1EE10get_offsetEv: argument 0"}
!25 = !{!"omnipotent char", !26, i64 0} !25 = distinct !{!25, !"_ZNK4sycl3_V17nd_itemILi1EE10get_offsetEv"}
!26 = !{!"Simple C++ TBAA"}

View File

@@ -18,9 +18,8 @@ import (
//go:generate clang++ -fsycl -fsycl-device-only -fsycl-targets=spirv64 -Xclang -emit-llvm-bc main.cpp -o device_kern.bc //go:generate clang++ -fsycl -fsycl-device-only -fsycl-targets=spirv64 -Xclang -emit-llvm-bc main.cpp -o device_kern.bc
//go:generate sycl-post-link -symbols -split=auto -o device_kern.table device_kern.bc //go:generate sycl-post-link -symbols -split=auto -o device_kern.table device_kern.bc
//go:generate clang++ -target spirv64-unknown-unknown -S -emit-llvm -x ir device_kern_0.bc -o device_kern.ll //go:generate llvm-spirv -o main.spv device_kern_0.bc
//go:generate llvm-spirv -o main.spv device_kern.bc //go:generate clang++ -target spirv64-unknown-unknown -S -emit-llvm -x ir device_kern_0.bc -o main.ll
//go:generate clang++ -target spirv64-unknown-unknown -S -emit-llvm -x ir device_kern.bc -o main.ll
//go:embed main.spv //go:embed main.spv
var kernelspv []byte var kernelspv []byte

View File

@@ -1,4 +1,4 @@
; ModuleID = 'device_kern.bc' ; ModuleID = 'device_kern_0.bc'
source_filename = "main.cpp" source_filename = "main.cpp"
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64-G1" target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64-G1"
target triple = "spirv64-unknown-unknown" target triple = "spirv64-unknown-unknown"
@@ -7,58 +7,57 @@ target triple = "spirv64-unknown-unknown"
@__spirv_BuiltInGlobalOffset = external local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 @__spirv_BuiltInGlobalOffset = external local_unnamed_addr addrspace(1) constant <3 x i64>, align 32
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: write) ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: write)
define spir_kernel void @__sycl_kernel_vector_add(ptr addrspace(1) noundef align 4 captures(none) %0, ptr addrspace(1) noundef readonly align 4 captures(none) %1) local_unnamed_addr #0 !kernel_arg_buffer_location !6 !sycl_fixed_targets !7 !sycl_kernel_omit_args !8 { define spir_kernel void @__sycl_kernel_vector_add(ptr addrspace(1) noundef align 4 captures(none) %0, ptr addrspace(1) noundef readonly align 4 captures(none) %1) local_unnamed_addr #0 !kernel_arg_buffer_location !9 !sycl_fixed_targets !10 !sycl_kernel_omit_args !11 {
%3 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalInvocationId, align 32, !noalias !9 %3 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalInvocationId, align 32, !noalias !12
%4 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalOffset, align 32, !noalias !16 %4 = load i64, ptr addrspace(1) @__spirv_BuiltInGlobalOffset, align 32, !noalias !19
%5 = sub i64 %3, %4 %5 = sub i64 %3, %4
%6 = icmp ult i64 %5, 2147483648 %6 = icmp ult i64 %5, 2147483648
tail call void @llvm.assume(i1 %6) tail call void @llvm.assume(i1 %6)
%7 = getelementptr inbounds float, ptr addrspace(1) %1, i64 %5 %7 = getelementptr inbounds float, ptr addrspace(1) %1, i64 %5
%8 = load float, ptr addrspace(1) %7, align 4, !tbaa !23 %8 = load float, ptr addrspace(1) %7, align 4
%9 = getelementptr inbounds float, ptr addrspace(1) %0, i64 %5 %9 = getelementptr inbounds float, ptr addrspace(1) %0, i64 %5
%10 = load float, ptr addrspace(1) %9, align 4, !tbaa !23 %10 = load float, ptr addrspace(1) %9, align 4
%11 = fadd float %10, %8 %11 = fadd float %10, %8
store float %11, ptr addrspace(1) %9, align 4, !tbaa !23 store float %11, ptr addrspace(1) %9, align 4
ret void ret void
} }
; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write)
declare void @llvm.assume(i1 noundef) #1 declare void @llvm.assume(i1 noundef) #1
declare dso_local spir_func i32 @_Z18__spirv_ocl_printfPU3AS2Kcz(ptr addrspace(2), ...) attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: write) "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-entry-point" "sycl-module-id"="main.cpp" "sycl-nd-range-kernel"="1" "sycl-optlevel"="2" "uniform-work-group-size"="true" }
attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: write) "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="main.cpp" "sycl-nd-range-kernel"="1" "sycl-optlevel"="2" "uniform-work-group-size"="true" }
attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) } attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
!llvm.module.flags = !{!0, !1, !2} !llvm.linker.options = !{!0, !1}
!opencl.spir.version = !{!3} !llvm.module.flags = !{!2, !3, !4}
!spirv.Source = !{!4} !opencl.spir.version = !{!5}
!llvm.ident = !{!5} !spirv.Source = !{!6}
!llvm.ident = !{!7}
!sycl-esimd-split-status = !{!8}
!0 = !{i32 1, !"wchar_size", i32 4} !0 = !{!"-llibcpmt"}
!1 = !{i32 1, !"sycl-device", i32 1} !1 = !{!"/alternatename:_Avx2WmemEnabled=_Avx2WmemEnabledWeakValue"}
!2 = !{i32 7, !"frame-pointer", i32 2} !2 = !{i32 1, !"wchar_size", i32 2}
!3 = !{i32 1, i32 2} !3 = !{i32 1, !"sycl-device", i32 1}
!4 = !{i32 4, i32 100000} !4 = !{i32 7, !"frame-pointer", i32 2}
!5 = !{!"clang version 21.0.0git (https://github.com/intel/llvm d5f649b706f63b5c74e1929bc95db8de91085560)"} !5 = !{i32 1, i32 2}
!6 = !{i32 -1, i32 -1} !6 = !{i32 4, i32 100000}
!7 = !{} !7 = !{!"clang version 21.0.0git (https://github.com/intel/llvm d5f649b706f63b5c74e1929bc95db8de91085560)"}
!8 = !{i1 false, i1 false} !8 = !{i8 0}
!9 = !{!10, !12, !14} !9 = !{i32 -1, i32 -1}
!10 = distinct !{!10, !11, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEE8initSizeEv: argument 0"} !10 = !{}
!11 = distinct !{!11, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEE8initSizeEv"} !11 = !{i1 false, i1 false}
!12 = distinct !{!12, !13, !"_ZN7__spirv22initGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEEET0_v: argument 0"} !12 = !{!13, !15, !17}
!13 = distinct !{!13, !"_ZN7__spirv22initGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEEET0_v"} !13 = distinct !{!13, !14, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEE8initSizeEv: argument 0"}
!14 = distinct !{!14, !15, !"_ZNK4sycl3_V17nd_itemILi1EE13get_global_idEv: argument 0"} !14 = distinct !{!14, !"_ZN7__spirv29InitSizesSTGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEE8initSizeEv"}
!15 = distinct !{!15, !"_ZNK4sycl3_V17nd_itemILi1EE13get_global_idEv"} !15 = distinct !{!15, !16, !"_ZN7__spirv22initGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEEET0_v: argument 0"}
!16 = !{!17, !19, !21} !16 = distinct !{!16, !"_ZN7__spirv22initGlobalInvocationIdILi1EN4sycl3_V12idILi1EEEEET0_v"}
!17 = distinct !{!17, !18, !"_ZN7__spirv23InitSizesSTGlobalOffsetILi1EN4sycl3_V12idILi1EEEE8initSizeEv: argument 0"} !17 = distinct !{!17, !18, !"_ZNK4sycl3_V17nd_itemILi1EE13get_global_idEv: argument 0"}
!18 = distinct !{!18, !"_ZN7__spirv23InitSizesSTGlobalOffsetILi1EN4sycl3_V12idILi1EEEE8initSizeEv"} !18 = distinct !{!18, !"_ZNK4sycl3_V17nd_itemILi1EE13get_global_idEv"}
!19 = distinct !{!19, !20, !"_ZN7__spirv16initGlobalOffsetILi1EN4sycl3_V12idILi1EEEEET0_v: argument 0"} !19 = !{!20, !22, !24}
!20 = distinct !{!20, !"_ZN7__spirv16initGlobalOffsetILi1EN4sycl3_V12idILi1EEEEET0_v"} !20 = distinct !{!20, !21, !"_ZN7__spirv23InitSizesSTGlobalOffsetILi1EN4sycl3_V12idILi1EEEE8initSizeEv: argument 0"}
!21 = distinct !{!21, !22, !"_ZNK4sycl3_V17nd_itemILi1EE10get_offsetEv: argument 0"} !21 = distinct !{!21, !"_ZN7__spirv23InitSizesSTGlobalOffsetILi1EN4sycl3_V12idILi1EEEE8initSizeEv"}
!22 = distinct !{!22, !"_ZNK4sycl3_V17nd_itemILi1EE10get_offsetEv"} !22 = distinct !{!22, !23, !"_ZN7__spirv16initGlobalOffsetILi1EN4sycl3_V12idILi1EEEEET0_v: argument 0"}
!23 = !{!24, !24, i64 0} !23 = distinct !{!23, !"_ZN7__spirv16initGlobalOffsetILi1EN4sycl3_V12idILi1EEEEET0_v"}
!24 = !{!"float", !25, i64 0} !24 = distinct !{!24, !25, !"_ZNK4sycl3_V17nd_itemILi1EE10get_offsetEv: argument 0"}
!25 = !{!"omnipotent char", !26, i64 0} !25 = distinct !{!25, !"_ZNK4sycl3_V17nd_itemILi1EE10get_offsetEv"}
!26 = !{!"Simple C++ TBAA"}