1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -passes=instcombine -S < %s | FileCheck %s 3 4define <4 x float> @good1(float %arg) { 5; CHECK-LABEL: @good1( 6; CHECK-NEXT: [[T:%.*]] = insertelement <4 x float> poison, float [[ARG:%.*]], i64 0 7; CHECK-NEXT: [[T6:%.*]] = shufflevector <4 x float> [[T]], <4 x float> poison, <4 x i32> zeroinitializer 8; CHECK-NEXT: ret <4 x float> [[T6]] 9; 10 %t = insertelement <4 x float> poison, float %arg, i32 0 11 %t4 = insertelement <4 x float> %t, float %arg, i32 1 12 %t5 = insertelement <4 x float> %t4, float %arg, i32 2 13 %t6 = insertelement <4 x float> %t5, float %arg, i32 3 14 ret <4 x float> %t6 15} 16 17define <4 x float> @good2(float %arg) { 18; CHECK-LABEL: @good2( 19; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> poison, float [[ARG:%.*]], i64 0 20; CHECK-NEXT: [[T6:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> zeroinitializer 21; CHECK-NEXT: ret <4 x float> [[T6]] 22; 23 %t = insertelement <4 x float> poison, float %arg, i32 1 24 %t4 = insertelement <4 x float> %t, float %arg, i32 2 25 %t5 = insertelement <4 x float> %t4, float %arg, i32 0 26 %t6 = insertelement <4 x float> %t5, float %arg, i32 3 27 ret <4 x float> %t6 28} 29 30define <4 x float> @good3(float %arg) { 31; CHECK-LABEL: @good3( 32; CHECK-NEXT: [[T:%.*]] = insertelement <4 x float> poison, float [[ARG:%.*]], i64 0 33; CHECK-NEXT: [[T6:%.*]] = shufflevector <4 x float> [[T]], <4 x float> poison, <4 x i32> zeroinitializer 34; CHECK-NEXT: ret <4 x float> [[T6]] 35; 36 %t = insertelement <4 x float> zeroinitializer, float %arg, i32 0 37 %t4 = insertelement <4 x float> %t, float %arg, i32 1 38 %t5 = insertelement <4 x float> %t4, float %arg, i32 2 39 %t6 = insertelement <4 x float> %t5, float %arg, i32 3 40 ret <4 x float> %t6 41} 42 43define <4 x float> @good4(float %arg) { 44; CHECK-LABEL: @good4( 45; CHECK-NEXT: [[T:%.*]] = insertelement <4 x float> poison, float [[ARG:%.*]], i64 0 46; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[T]], [[T]] 47; CHECK-NEXT: [[T7:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> zeroinitializer 48; CHECK-NEXT: ret <4 x float> [[T7]] 49; 50 %t = insertelement <4 x float> zeroinitializer, float %arg, i32 0 51 %t4 = insertelement <4 x float> %t, float %arg, i32 1 52 %t5 = insertelement <4 x float> %t4, float %arg, i32 2 53 %t6 = insertelement <4 x float> %t5, float %arg, i32 3 54 %t7 = fadd <4 x float> %t6, %t6 55 ret <4 x float> %t7 56} 57 58define <4 x float> @good5(float %v) { 59; CHECK-LABEL: @good5( 60; CHECK-NEXT: [[INS1:%.*]] = insertelement <4 x float> poison, float [[V:%.*]], i64 0 61; CHECK-NEXT: [[A1:%.*]] = fadd <4 x float> [[INS1]], [[INS1]] 62; CHECK-NEXT: [[INS4:%.*]] = shufflevector <4 x float> [[INS1]], <4 x float> poison, <4 x i32> zeroinitializer 63; CHECK-NEXT: [[RES:%.*]] = fadd <4 x float> [[A1]], [[INS4]] 64; CHECK-NEXT: ret <4 x float> [[RES]] 65; 66 %ins1 = insertelement <4 x float> poison, float %v, i32 0 67 %a1 = fadd <4 x float> %ins1, %ins1 68 %ins2 = insertelement<4 x float> %ins1, float %v, i32 1 69 %ins3 = insertelement<4 x float> %ins2, float %v, i32 2 70 %ins4 = insertelement<4 x float> %ins3, float %v, i32 3 71 %res = fadd <4 x float> %a1, %ins4 72 ret <4 x float> %res 73} 74 75; The insert is changed to allow the canonical shuffle-splat pattern from element 0. 76 77define <4 x float> @splat_undef1(float %arg) { 78; CHECK-LABEL: @splat_undef1( 79; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> poison, float [[ARG:%.*]], i64 0 80; CHECK-NEXT: [[T6:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 poison, i32 0, i32 0, i32 0> 81; CHECK-NEXT: ret <4 x float> [[T6]] 82; 83 %t = insertelement <4 x float> poison, float %arg, i32 1 84 %t4 = insertelement <4 x float> %t, float %arg, i32 1 85 %t5 = insertelement <4 x float> %t4, float %arg, i32 2 86 %t6 = insertelement <4 x float> %t5, float %arg, i32 3 87 ret <4 x float> %t6 88} 89 90; Re-uses the existing first insertelement. 91 92define <4 x float> @splat_undef2(float %arg) { 93; CHECK-LABEL: @splat_undef2( 94; CHECK-NEXT: [[T:%.*]] = insertelement <4 x float> poison, float [[ARG:%.*]], i64 0 95; CHECK-NEXT: [[T6:%.*]] = shufflevector <4 x float> [[T]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 0, i32 0> 96; CHECK-NEXT: ret <4 x float> [[T6]] 97; 98 %t = insertelement <4 x float> poison, float %arg, i32 0 99 %t5 = insertelement <4 x float> %t, float %arg, i32 2 100 %t6 = insertelement <4 x float> %t5, float %arg, i32 3 101 ret <4 x float> %t6 102} 103 104define <4 x float> @bad3(float %arg, float %arg2) { 105; CHECK-LABEL: @bad3( 106; CHECK-NEXT: [[T:%.*]] = insertelement <4 x float> poison, float [[ARG:%.*]], i64 0 107; CHECK-NEXT: [[T4:%.*]] = insertelement <4 x float> [[T]], float [[ARG2:%.*]], i64 1 108; CHECK-NEXT: [[T5:%.*]] = insertelement <4 x float> [[T4]], float [[ARG]], i64 2 109; CHECK-NEXT: [[T6:%.*]] = insertelement <4 x float> [[T5]], float [[ARG]], i64 3 110; CHECK-NEXT: ret <4 x float> [[T6]] 111; 112 %t = insertelement <4 x float> poison, float %arg, i32 0 113 %t4 = insertelement <4 x float> %t, float %arg2, i32 1 114 %t5 = insertelement <4 x float> %t4, float %arg, i32 2 115 %t6 = insertelement <4 x float> %t5, float %arg, i32 3 116 ret <4 x float> %t6 117} 118 119define <1 x float> @bad4(float %arg) { 120; CHECK-LABEL: @bad4( 121; CHECK-NEXT: [[T:%.*]] = insertelement <1 x float> poison, float [[ARG:%.*]], i64 0 122; CHECK-NEXT: ret <1 x float> [[T]] 123; 124 %t = insertelement <1 x float> poison, float %arg, i32 0 125 ret <1 x float> %t 126} 127 128; Multiple undef elements are ok. 129; TODO: Multiple uses triggers the transform at %t4, but we should sink/scalarize/CSE the splats? 130 131define <4 x float> @splat_undef3(float %arg) { 132; CHECK-LABEL: @splat_undef3( 133; CHECK-NEXT: [[T:%.*]] = insertelement <4 x float> poison, float [[ARG:%.*]], i64 0 134; CHECK-NEXT: [[T4:%.*]] = shufflevector <4 x float> [[T]], <4 x float> poison, <4 x i32> <i32 0, i32 0, i32 poison, i32 poison> 135; CHECK-NEXT: [[T6:%.*]] = shufflevector <4 x float> [[T]], <4 x float> poison, <4 x i32> zeroinitializer 136; CHECK-NEXT: [[T7:%.*]] = fadd <4 x float> [[T6]], [[T4]] 137; CHECK-NEXT: ret <4 x float> [[T7]] 138; 139 %t = insertelement <4 x float> poison, float %arg, i32 0 140 %t4 = insertelement <4 x float> %t, float %arg, i32 1 141 %t5 = insertelement <4 x float> %t4, float %arg, i32 2 142 %t6 = insertelement <4 x float> %t5, float %arg, i32 3 143 %t7 = fadd <4 x float> %t6, %t4 144 ret <4 x float> %t7 145} 146 147define <4 x float> @bad6(float %arg, i32 %k) { 148; CHECK-LABEL: @bad6( 149; CHECK-NEXT: [[T:%.*]] = insertelement <4 x float> poison, float [[ARG:%.*]], i64 0 150; CHECK-NEXT: [[T4:%.*]] = insertelement <4 x float> [[T]], float [[ARG]], i64 1 151; CHECK-NEXT: [[T5:%.*]] = insertelement <4 x float> [[T4]], float [[ARG]], i32 [[K:%.*]] 152; CHECK-NEXT: [[T6:%.*]] = insertelement <4 x float> [[T5]], float [[ARG]], i64 3 153; CHECK-NEXT: ret <4 x float> [[T6]] 154; 155 %t = insertelement <4 x float> poison, float %arg, i32 0 156 %t4 = insertelement <4 x float> %t, float %arg, i32 1 157 %t5 = insertelement <4 x float> %t4, float %arg, i32 %k 158 %t6 = insertelement <4 x float> %t5, float %arg, i32 3 159 ret <4 x float> %t6 160} 161 162define <4 x float> @bad7(float %v) { 163; CHECK-LABEL: @bad7( 164; CHECK-NEXT: [[INS1:%.*]] = insertelement <4 x float> poison, float [[V:%.*]], i64 1 165; CHECK-NEXT: [[A1:%.*]] = fadd <4 x float> [[INS1]], [[INS1]] 166; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> [[INS1]], float [[V]], i64 0 167; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float [[V]], i64 2 168; CHECK-NEXT: [[INS4:%.*]] = insertelement <4 x float> [[TMP2]], float [[V]], i64 3 169; CHECK-NEXT: [[RES:%.*]] = fadd <4 x float> [[A1]], [[INS4]] 170; CHECK-NEXT: ret <4 x float> [[RES]] 171; 172 %ins1 = insertelement <4 x float> poison, float %v, i32 1 173 %a1 = fadd <4 x float> %ins1, %ins1 174 %ins2 = insertelement<4 x float> %ins1, float %v, i32 2 175 %ins3 = insertelement<4 x float> %ins2, float %v, i32 3 176 %ins4 = insertelement<4 x float> %ins3, float %v, i32 0 177 %res = fadd <4 x float> %a1, %ins4 178 ret <4 x float> %res 179} 180