1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -passes=slp-vectorizer,instcombine -S -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 -slp-threshold=-1 | FileCheck %s 3; RUN: opt < %s -passes=slp-vectorizer,instcombine -S -mtriple=x86_64-unknown-linux-gnu -mattr=+sse4.2 | FileCheck %s 4; RUN: opt < %s -passes=slp-vectorizer,instcombine -S -mtriple=x86_64-unknown-linux-gnu -mattr=+avx | FileCheck %s 5; RUN: opt < %s -passes=slp-vectorizer,instcombine -S -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 | FileCheck %s 6; RUN: opt < %s -passes=slp-vectorizer,instcombine -S -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512bw,+avx512vl | FileCheck %s 7 8define void @store_i32(ptr nocapture %0, i32 %1, i32 %2) { 9; CHECK-LABEL: @store_i32( 10; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[TMP0:%.*]], align 4, !tbaa [[TBAA0:![0-9]+]] 11; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1:%.*]], i64 0 12; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> zeroinitializer 13; CHECK-NEXT: [[TMP7:%.*]] = mul <4 x i32> [[TMP4]], [[TMP6]] 14; CHECK-NEXT: [[TMP8:%.*]] = lshr <4 x i32> [[TMP7]], splat (i32 15) 15; CHECK-NEXT: [[TMP9:%.*]] = call <4 x i32> @llvm.umin.v4i32(<4 x i32> [[TMP8]], <4 x i32> splat (i32 255)) 16; CHECK-NEXT: store <4 x i32> [[TMP9]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]] 17; CHECK-NEXT: ret void 18; 19 %4 = load i32, ptr %0, align 4, !tbaa !2 20 %5 = mul i32 %4, %1 21 %6 = lshr i32 %5, 15 22 %7 = icmp ult i32 %6, 255 23 %8 = select i1 %7, i32 %6, i32 255 24 store i32 %8, ptr %0, align 4, !tbaa !2 25 %9 = getelementptr inbounds i32, ptr %0, i64 1 26 %10 = load i32, ptr %9, align 4, !tbaa !2 27 %11 = mul i32 %10, %1 28 %12 = lshr i32 %11, 15 29 %13 = icmp ult i32 %12, 255 30 %14 = select i1 %13, i32 %12, i32 255 31 store i32 %14, ptr %9, align 4, !tbaa !2 32 %15 = getelementptr inbounds i32, ptr %0, i64 2 33 %16 = load i32, ptr %15, align 4, !tbaa !2 34 %17 = mul i32 %16, %1 35 %18 = lshr i32 %17, 15 36 %19 = icmp ult i32 %18, 255 37 %20 = select i1 %19, i32 %18, i32 255 38 store i32 %20, ptr %15, align 4, !tbaa !2 39 %21 = getelementptr inbounds i32, ptr %0, i64 3 40 %22 = load i32, ptr %21, align 4, !tbaa !2 41 %23 = mul i32 %22, %1 42 %24 = lshr i32 %23, 15 43 %25 = icmp ult i32 %24, 255 44 %26 = select i1 %25, i32 %24, i32 255 45 store i32 %26, ptr %21, align 4, !tbaa !2 46 ret void 47} 48 49define void @store_i8(ptr nocapture %0, i32 %1, i32 %2) { 50; CHECK-LABEL: @store_i8( 51; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i8>, ptr [[TMP0:%.*]], align 1, !tbaa [[TBAA4:![0-9]+]] 52; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32> 53; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1:%.*]], i64 0 54; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <4 x i32> zeroinitializer 55; CHECK-NEXT: [[TMP8:%.*]] = mul <4 x i32> [[TMP7]], [[TMP5]] 56; CHECK-NEXT: [[TMP9:%.*]] = lshr <4 x i32> [[TMP8]], splat (i32 15) 57; CHECK-NEXT: [[TMP10:%.*]] = call <4 x i32> @llvm.umin.v4i32(<4 x i32> [[TMP9]], <4 x i32> splat (i32 255)) 58; CHECK-NEXT: [[TMP11:%.*]] = trunc nuw <4 x i32> [[TMP10]] to <4 x i8> 59; CHECK-NEXT: store <4 x i8> [[TMP11]], ptr [[TMP0]], align 1, !tbaa [[TBAA4]] 60; CHECK-NEXT: ret void 61; 62 %4 = load i8, ptr %0, align 1, !tbaa !6 63 %5 = zext i8 %4 to i32 64 %6 = mul i32 %5, %1 65 %7 = lshr i32 %6, 15 66 %8 = icmp ult i32 %7, 255 67 %9 = select i1 %8, i32 %7, i32 255 68 %10 = trunc i32 %9 to i8 69 store i8 %10, ptr %0, align 1, !tbaa !6 70 %11 = getelementptr inbounds i8, ptr %0, i64 1 71 %12 = load i8, ptr %11, align 1, !tbaa !6 72 %13 = zext i8 %12 to i32 73 %14 = mul i32 %13, %1 74 %15 = lshr i32 %14, 15 75 %16 = icmp ult i32 %15, 255 76 %17 = select i1 %16, i32 %15, i32 255 77 %18 = trunc i32 %17 to i8 78 store i8 %18, ptr %11, align 1, !tbaa !6 79 %19 = getelementptr inbounds i8, ptr %0, i64 2 80 %20 = load i8, ptr %19, align 1, !tbaa !6 81 %21 = zext i8 %20 to i32 82 %22 = mul i32 %21, %1 83 %23 = lshr i32 %22, 15 84 %24 = icmp ult i32 %23, 255 85 %25 = select i1 %24, i32 %23, i32 255 86 %26 = trunc i32 %25 to i8 87 store i8 %26, ptr %19, align 1, !tbaa !6 88 %27 = getelementptr inbounds i8, ptr %0, i64 3 89 %28 = load i8, ptr %27, align 1, !tbaa !6 90 %29 = zext i8 %28 to i32 91 %30 = mul i32 %29, %1 92 %31 = lshr i32 %30, 15 93 %32 = icmp ult i32 %31, 255 94 %33 = select i1 %32, i32 %31, i32 255 95 %34 = trunc i32 %33 to i8 96 store i8 %34, ptr %27, align 1, !tbaa !6 97 ret void 98} 99 100define void @store_i64(ptr nocapture %0, i32 %1, i32 %2) { 101; CHECK-LABEL: @store_i64( 102; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP1:%.*]] to i64 103; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 8, !tbaa [[TBAA5:![0-9]+]] 104; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i64> poison, i64 [[TMP4]], i64 0 105; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i64> [[TMP6]], <4 x i64> poison, <4 x i32> zeroinitializer 106; CHECK-NEXT: [[TMP8:%.*]] = mul <4 x i64> [[TMP5]], [[TMP7]] 107; CHECK-NEXT: [[TMP9:%.*]] = lshr <4 x i64> [[TMP8]], splat (i64 15) 108; CHECK-NEXT: [[TMP10:%.*]] = trunc <4 x i64> [[TMP9]] to <4 x i32> 109; CHECK-NEXT: [[TMP11:%.*]] = icmp ult <4 x i32> [[TMP10]], splat (i32 255) 110; CHECK-NEXT: [[TMP12:%.*]] = and <4 x i64> [[TMP9]], splat (i64 4294967295) 111; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x i64> [[TMP12]], <4 x i64> splat (i64 255) 112; CHECK-NEXT: store <4 x i64> [[TMP13]], ptr [[TMP0]], align 8, !tbaa [[TBAA5]] 113; CHECK-NEXT: ret void 114; 115 %4 = zext i32 %1 to i64 116 %5 = load i64, ptr %0, align 8, !tbaa !7 117 %6 = mul i64 %5, %4 118 %7 = lshr i64 %6, 15 119 %8 = trunc i64 %7 to i32 120 %9 = icmp ult i32 %8, 255 121 %10 = and i64 %7, 4294967295 122 %11 = select i1 %9, i64 %10, i64 255 123 store i64 %11, ptr %0, align 8, !tbaa !7 124 %12 = getelementptr inbounds i64, ptr %0, i64 1 125 %13 = load i64, ptr %12, align 8, !tbaa !7 126 %14 = mul i64 %13, %4 127 %15 = lshr i64 %14, 15 128 %16 = trunc i64 %15 to i32 129 %17 = icmp ult i32 %16, 255 130 %18 = and i64 %15, 4294967295 131 %19 = select i1 %17, i64 %18, i64 255 132 store i64 %19, ptr %12, align 8, !tbaa !7 133 %20 = getelementptr inbounds i64, ptr %0, i64 2 134 %21 = load i64, ptr %20, align 8, !tbaa !7 135 %22 = mul i64 %21, %4 136 %23 = lshr i64 %22, 15 137 %24 = trunc i64 %23 to i32 138 %25 = icmp ult i32 %24, 255 139 %26 = and i64 %23, 4294967295 140 %27 = select i1 %25, i64 %26, i64 255 141 store i64 %27, ptr %20, align 8, !tbaa !7 142 %28 = getelementptr inbounds i64, ptr %0, i64 3 143 %29 = load i64, ptr %28, align 8, !tbaa !7 144 %30 = mul i64 %29, %4 145 %31 = lshr i64 %30, 15 146 %32 = trunc i64 %31 to i32 147 %33 = icmp ult i32 %32, 255 148 %34 = and i64 %31, 4294967295 149 %35 = select i1 %33, i64 %34, i64 255 150 store i64 %35, ptr %28, align 8, !tbaa !7 151 ret void 152} 153 154!2 = !{!3, !3, i64 0} 155!3 = !{!"int", !4, i64 0} 156!4 = !{!"omnipotent char", !5, i64 0} 157!5 = !{!"Simple C++ TBAA"} 158!6 = !{!4, !4, i64 0} 159!7 = !{!8, !8, i64 0} 160!8 = !{!"long", !4, i64 0} 161