; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -passes=dse -S < %s | FileCheck %s target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048" define dllexport i32 @f0(ptr %a0, ptr %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7) #0 { ; CHECK-LABEL: @f0( ; CHECK-NEXT: b0: ; CHECK-NEXT: [[V0:%.*]] = getelementptr inbounds ptr, ptr [[A0:%.*]], i32 [[A2:%.*]] ; CHECK-NEXT: [[V1:%.*]] = load ptr, ptr [[V0]], align 4, !tbaa [[TBAA0:![0-9]+]] ; CHECK-NEXT: [[V2:%.*]] = getelementptr i8, ptr [[V1]], i32 [[A3:%.*]] ; CHECK-NEXT: [[V6:%.*]] = getelementptr inbounds ptr, ptr [[A1:%.*]], i32 [[A4:%.*]] ; CHECK-NEXT: [[V7:%.*]] = load ptr, ptr [[V6]], align 4, !tbaa [[TBAA3:![0-9]+]] ; CHECK-NEXT: [[V8:%.*]] = getelementptr i8, ptr [[V7]], i32 [[A5:%.*]] ; CHECK-NEXT: [[V10:%.*]] = tail call <128 x i8> @llvm.masked.load.v128i8.p0(ptr [[V8]], i32 32, <128 x i1> , <128 x i8> undef), !tbaa [[TBAA5:![0-9]+]] ; CHECK-NEXT: [[V11:%.*]] = shufflevector <128 x i8> [[V10]], <128 x i8> undef, <32 x i32> ; CHECK-NEXT: [[V14:%.*]] = shufflevector <32 x i8> [[V11]], <32 x i8> undef, <128 x i32> ; CHECK-NEXT: [[V16:%.*]] = shufflevector <128 x i8> [[V14]], <128 x i8> undef, <32 x i32> ; CHECK-NEXT: [[V17:%.*]] = getelementptr inbounds ptr, ptr [[A1]], i32 [[A6:%.*]] ; CHECK-NEXT: [[V18:%.*]] = load ptr, ptr [[V17]], align 4, !tbaa [[TBAA3]] ; CHECK-NEXT: [[V19:%.*]] = getelementptr i8, ptr [[V18]], i32 [[A7:%.*]] ; CHECK-NEXT: [[V21:%.*]] = tail call <128 x i8> @llvm.masked.load.v128i8.p0(ptr [[V19]], i32 32, <128 x i1> , <128 x i8> undef), !tbaa [[TBAA5]] ; CHECK-NEXT: [[V22:%.*]] = shufflevector <128 x i8> [[V21]], <128 x i8> undef, <32 x i32> ; CHECK-NEXT: [[V23:%.*]] = icmp ugt <32 x i8> [[V16]], [[V22]] ; CHECK-NEXT: [[V24:%.*]] = select <32 x i1> [[V23]], <32 x i8> [[V16]], <32 x i8> [[V22]] ; CHECK-NEXT: [[V25:%.*]] = shufflevector <32 x i8> [[V24]], <32 x i8> undef, <128 x i32> ; CHECK-NEXT: tail call void @llvm.masked.store.v128i8.p0(<128 x i8> [[V25]], ptr [[V2]], i32 32, <128 x i1> ), !tbaa [[TBAA8:![0-9]+]] ; CHECK-NEXT: ret i32 0 ; b0: %v0 = getelementptr inbounds ptr, ptr %a0, i32 %a2 %v1 = load ptr, ptr %v0, align 4, !tbaa !0 %v2 = getelementptr i8, ptr %v1, i32 %a3 tail call void @llvm.masked.store.v128i8.p0(<128 x i8> , ptr %v2, i32 32, <128 x i1> ), !tbaa !3 %v6 = getelementptr inbounds ptr, ptr %a1, i32 %a4 %v7 = load ptr, ptr %v6, align 4, !tbaa !6 %v8 = getelementptr i8, ptr %v7, i32 %a5 %v10 = tail call <128 x i8> @llvm.masked.load.v128i8.p0(ptr %v8, i32 32, <128 x i1> , <128 x i8> undef), !tbaa !8 %v11 = shufflevector <128 x i8> %v10, <128 x i8> undef, <32 x i32> %v14 = shufflevector <32 x i8> %v11, <32 x i8> undef, <128 x i32> tail call void @llvm.masked.store.v128i8.p0(<128 x i8> %v14, ptr %v2, i32 32, <128 x i1> ), !tbaa !3 %v16 = shufflevector <128 x i8> %v14, <128 x i8> undef, <32 x i32> %v17 = getelementptr inbounds ptr, ptr %a1, i32 %a6 %v18 = load ptr, ptr %v17, align 4, !tbaa !6 %v19 = getelementptr i8, ptr %v18, i32 %a7 %v21 = tail call <128 x i8> @llvm.masked.load.v128i8.p0(ptr %v19, i32 32, <128 x i1> , <128 x i8> undef), !tbaa !8 %v22 = shufflevector <128 x i8> %v21, <128 x i8> undef, <32 x i32> %v23 = icmp ugt <32 x i8> %v16, %v22 %v24 = select <32 x i1> %v23, <32 x i8> %v16, <32 x i8> %v22 %v25 = shufflevector <32 x i8> %v24, <32 x i8> undef, <128 x i32> tail call void @llvm.masked.store.v128i8.p0(<128 x i8> %v25, ptr %v2, i32 32, <128 x i1> ), !tbaa !3 ret i32 0 } define dllexport i32 @f1(ptr %a, <4 x i8> %v1, <4 x i32> %v2) { ; CHECK-LABEL: @f1( ; CHECK-NEXT: tail call void @llvm.masked.store.v4i32.p0(<4 x i32> [[V2:%.*]], ptr [[A:%.*]], i32 1, <4 x i1> splat (i1 true)) ; CHECK-NEXT: tail call void @llvm.masked.store.v4i8.p0(<4 x i8> [[V1:%.*]], ptr [[A]], i32 1, <4 x i1> splat (i1 true)) ; CHECK-NEXT: ret i32 0 ; tail call void @llvm.masked.store.v4i32.p0(<4 x i32> %v2, ptr %a, i32 1, <4 x i1> ) tail call void @llvm.masked.store.v4i8.p0(<4 x i8> %v1, ptr %a, i32 1, <4 x i1> ) ret i32 0 } define dllexport i32 @f2(ptr %a, <4 x i8> %v1, <4 x i32> %v2, <4 x i1> %mask) { ; CHECK-LABEL: @f2( ; CHECK-NEXT: tail call void @llvm.masked.store.v4i32.p0(<4 x i32> [[V2:%.*]], ptr [[A:%.*]], i32 1, <4 x i1> [[MASK:%.*]]) ; CHECK-NEXT: tail call void @llvm.masked.store.v4i8.p0(<4 x i8> [[V1:%.*]], ptr [[A]], i32 1, <4 x i1> [[MASK]]) ; CHECK-NEXT: ret i32 0 ; tail call void @llvm.masked.store.v4i32.p0(<4 x i32> %v2, ptr %a, i32 1, <4 x i1> %mask) tail call void @llvm.masked.store.v4i8.p0(<4 x i8> %v1, ptr %a, i32 1, <4 x i1> %mask) ret i32 0 } declare void @llvm.masked.store.v4i8.p0(<4 x i8>, ptr, i32, <4 x i1>) declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32, <4 x i1>) declare void @llvm.masked.store.v128i8.p0(<128 x i8>, ptr, i32 immarg, <128 x i1>) #1 declare <128 x i8> @llvm.masked.load.v128i8.p0(ptr, i32 immarg, <128 x i1>, <128 x i8>) #2 attributes #0 = { nounwind willreturn } attributes #1 = { argmemonly nounwind willreturn } attributes #2 = { argmemonly nounwind readonly willreturn } !0 = !{!1, !1, i64 0} !1 = !{!"0x2cf74d0", !2, i64 0} !2 = !{!"tvm-tbaa"} !3 = !{!4, !4, i64 0} !4 = !{!"i8", !5, i64 0} !5 = !{!"0x2c6ebb0", !2, i64 0} !6 = !{!7, !7, i64 0} !7 = !{!"0x2cff870", !2, i64 0} !8 = !{!9, !9, i64 0} !9 = !{!"i8", !10, i64 0} !10 = !{!"0x2c6c3c0", !2, i64 0}