1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt %s -passes='function(scalarizer<load-store>,dce)' -S | FileCheck %s 3target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" 4 5declare <4 x float> @ext(<4 x float>) 6@g = global <4 x float> zeroinitializer 7 8define void @f1(<4 x float> %init, ptr %base, i32 %count) { 9; CHECK-LABEL: @f1( 10; CHECK-NEXT: entry: 11; CHECK-NEXT: [[INIT_I0:%.*]] = extractelement <4 x float> [[INIT:%.*]], i64 0 12; CHECK-NEXT: [[INIT_I1:%.*]] = extractelement <4 x float> [[INIT]], i64 1 13; CHECK-NEXT: [[INIT_I2:%.*]] = extractelement <4 x float> [[INIT]], i64 2 14; CHECK-NEXT: [[INIT_I3:%.*]] = extractelement <4 x float> [[INIT]], i64 3 15; CHECK-NEXT: br label [[LOOP:%.*]] 16; CHECK: loop: 17; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ] 18; CHECK-NEXT: [[ACC_I0:%.*]] = phi float [ [[INIT_I0]], [[ENTRY]] ], [ [[SEL_I0:%.*]], [[LOOP]] ] 19; CHECK-NEXT: [[ACC_I1:%.*]] = phi float [ [[INIT_I1]], [[ENTRY]] ], [ [[SEL_I1:%.*]], [[LOOP]] ] 20; CHECK-NEXT: [[ACC_I2:%.*]] = phi float [ [[INIT_I2]], [[ENTRY]] ], [ [[SEL_I2:%.*]], [[LOOP]] ] 21; CHECK-NEXT: [[ACC_I3:%.*]] = phi float [ [[INIT_I3]], [[ENTRY]] ], [ [[SEL_I3:%.*]], [[LOOP]] ] 22; CHECK-NEXT: [[NEXTI]] = sub i32 [[I]], 1 23; CHECK-NEXT: [[PTR:%.*]] = getelementptr <4 x float>, ptr [[BASE:%.*]], i32 [[I]] 24; CHECK-NEXT: [[VAL_I0:%.*]] = load float, ptr [[PTR]], align 16 25; CHECK-NEXT: [[PTR_I1:%.*]] = getelementptr float, ptr [[PTR]], i32 1 26; CHECK-NEXT: [[VAL_I1:%.*]] = load float, ptr [[PTR_I1]], align 4 27; CHECK-NEXT: [[PTR_I2:%.*]] = getelementptr float, ptr [[PTR]], i32 2 28; CHECK-NEXT: [[VAL_I2:%.*]] = load float, ptr [[PTR_I2]], align 8 29; CHECK-NEXT: [[PTR_I3:%.*]] = getelementptr float, ptr [[PTR]], i32 3 30; CHECK-NEXT: [[VAL_I3:%.*]] = load float, ptr [[PTR_I3]], align 4 31; CHECK-NEXT: [[ADD_I0:%.*]] = fadd float [[VAL_I0]], [[VAL_I2]] 32; CHECK-NEXT: [[ADD_I1:%.*]] = fadd float [[VAL_I1]], [[VAL_I3]] 33; CHECK-NEXT: [[ADD_I2:%.*]] = fadd float [[ACC_I0]], [[ACC_I2]] 34; CHECK-NEXT: [[ADD_I3:%.*]] = fadd float [[ACC_I1]], [[ACC_I3]] 35; CHECK-NEXT: [[ADD_UPTO0:%.*]] = insertelement <4 x float> poison, float [[ADD_I0]], i64 0 36; CHECK-NEXT: [[ADD_UPTO1:%.*]] = insertelement <4 x float> [[ADD_UPTO0]], float [[ADD_I1]], i64 1 37; CHECK-NEXT: [[ADD_UPTO2:%.*]] = insertelement <4 x float> [[ADD_UPTO1]], float [[ADD_I2]], i64 2 38; CHECK-NEXT: [[ADD:%.*]] = insertelement <4 x float> [[ADD_UPTO2]], float [[ADD_I3]], i64 3 39; CHECK-NEXT: [[CALL:%.*]] = call <4 x float> @ext(<4 x float> [[ADD]]) 40; CHECK-NEXT: [[CALL_I0:%.*]] = extractelement <4 x float> [[CALL]], i64 0 41; CHECK-NEXT: [[CMP_I0:%.*]] = fcmp ogt float [[CALL_I0]], 1.000000e+00 42; CHECK-NEXT: [[CALL_I1:%.*]] = extractelement <4 x float> [[CALL]], i64 1 43; CHECK-NEXT: [[CMP_I1:%.*]] = fcmp ogt float [[CALL_I1]], 2.000000e+00 44; CHECK-NEXT: [[CALL_I2:%.*]] = extractelement <4 x float> [[CALL]], i64 2 45; CHECK-NEXT: [[CMP_I2:%.*]] = fcmp ogt float [[CALL_I2]], 3.000000e+00 46; CHECK-NEXT: [[CALL_I3:%.*]] = extractelement <4 x float> [[CALL]], i64 3 47; CHECK-NEXT: [[CMP_I3:%.*]] = fcmp ogt float [[CALL_I3]], 4.000000e+00 48; CHECK-NEXT: [[SEL_I0]] = select i1 [[CMP_I0]], float [[CALL_I0]], float 5.000000e+00 49; CHECK-NEXT: [[SEL_I1]] = select i1 [[CMP_I1]], float [[CALL_I1]], float 6.000000e+00 50; CHECK-NEXT: [[SEL_I2]] = select i1 [[CMP_I2]], float [[CALL_I2]], float 7.000000e+00 51; CHECK-NEXT: [[SEL_I3]] = select i1 [[CMP_I3]], float [[CALL_I3]], float 8.000000e+00 52; CHECK-NEXT: store float [[SEL_I0]], ptr [[PTR]], align 16 53; CHECK-NEXT: store float [[SEL_I1]], ptr [[PTR_I1]], align 4 54; CHECK-NEXT: store float [[SEL_I2]], ptr [[PTR_I2]], align 8 55; CHECK-NEXT: store float [[SEL_I3]], ptr [[PTR_I3]], align 4 56; CHECK-NEXT: [[TEST:%.*]] = icmp eq i32 [[NEXTI]], 0 57; CHECK-NEXT: br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]] 58; CHECK: exit: 59; CHECK-NEXT: ret void 60; 61entry: 62 br label %loop 63 64loop: 65 %i = phi i32 [ %count, %entry ], [ %nexti, %loop ] 66 %acc = phi <4 x float> [ %init, %entry ], [ %sel, %loop ] 67 %nexti = sub i32 %i, 1 68 69 %ptr = getelementptr <4 x float>, ptr %base, i32 %i 70 %val = load <4 x float> , ptr %ptr 71 %dval = bitcast <4 x float> %val to <2 x double> 72 %dacc = bitcast <4 x float> %acc to <2 x double> 73 %shuffle1 = shufflevector <2 x double> %dval, <2 x double> %dacc, 74 <2 x i32> <i32 0, i32 2> 75 %shuffle2 = shufflevector <2 x double> %dval, <2 x double> %dacc, 76 <2 x i32> <i32 1, i32 3> 77 %f1 = bitcast <2 x double> %shuffle1 to <4 x float> 78 %f2 = bitcast <2 x double> %shuffle2 to <4 x float> 79 %add = fadd <4 x float> %f1, %f2 80 %call = call <4 x float> @ext(<4 x float> %add) 81 %cmp = fcmp ogt <4 x float> %call, 82 <float 1.0, float 2.0, float 3.0, float 4.0> 83 %sel = select <4 x i1> %cmp, <4 x float> %call, 84 <4 x float> <float 5.0, float 6.0, float 7.0, float 8.0> 85 store <4 x float> %sel, ptr %ptr 86 87 %test = icmp eq i32 %nexti, 0 88 br i1 %test, label %loop, label %exit 89 90exit: 91 ret void 92} 93 94define void @f2(<4 x i32> %init, ptr %base, i32 %count) { 95; CHECK-LABEL: @f2( 96; CHECK-NEXT: entry: 97; CHECK-NEXT: [[INIT_I0:%.*]] = extractelement <4 x i32> [[INIT:%.*]], i64 0 98; CHECK-NEXT: [[INIT_I1:%.*]] = extractelement <4 x i32> [[INIT]], i64 1 99; CHECK-NEXT: [[INIT_I2:%.*]] = extractelement <4 x i32> [[INIT]], i64 2 100; CHECK-NEXT: [[INIT_I3:%.*]] = extractelement <4 x i32> [[INIT]], i64 3 101; CHECK-NEXT: br label [[LOOP:%.*]] 102; CHECK: loop: 103; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ] 104; CHECK-NEXT: [[ACC_I0:%.*]] = phi i32 [ [[INIT_I0]], [[ENTRY]] ], [ [[SEL_I0:%.*]], [[LOOP]] ] 105; CHECK-NEXT: [[ACC_I1:%.*]] = phi i32 [ [[INIT_I1]], [[ENTRY]] ], [ [[SEL_I1:%.*]], [[LOOP]] ] 106; CHECK-NEXT: [[ACC_I2:%.*]] = phi i32 [ [[INIT_I2]], [[ENTRY]] ], [ [[SEL_I2:%.*]], [[LOOP]] ] 107; CHECK-NEXT: [[ACC_I3:%.*]] = phi i32 [ [[INIT_I3]], [[ENTRY]] ], [ [[SEL_I3:%.*]], [[LOOP]] ] 108; CHECK-NEXT: [[NEXTI]] = sub i32 [[I]], 1 109; CHECK-NEXT: [[PTR:%.*]] = getelementptr <4 x i8>, ptr [[BASE:%.*]], i32 [[I]] 110; CHECK-NEXT: [[VAL_I0:%.*]] = load i8, ptr [[PTR]], align 4 111; CHECK-NEXT: [[PTR_I1:%.*]] = getelementptr i8, ptr [[PTR]], i32 1 112; CHECK-NEXT: [[VAL_I1:%.*]] = load i8, ptr [[PTR_I1]], align 1 113; CHECK-NEXT: [[PTR_I2:%.*]] = getelementptr i8, ptr [[PTR]], i32 2 114; CHECK-NEXT: [[VAL_I2:%.*]] = load i8, ptr [[PTR_I2]], align 2 115; CHECK-NEXT: [[PTR_I3:%.*]] = getelementptr i8, ptr [[PTR]], i32 3 116; CHECK-NEXT: [[VAL_I3:%.*]] = load i8, ptr [[PTR_I3]], align 1 117; CHECK-NEXT: [[EXT_I0:%.*]] = sext i8 [[VAL_I0]] to i32 118; CHECK-NEXT: [[EXT_I1:%.*]] = sext i8 [[VAL_I1]] to i32 119; CHECK-NEXT: [[EXT_I2:%.*]] = sext i8 [[VAL_I2]] to i32 120; CHECK-NEXT: [[EXT_I3:%.*]] = sext i8 [[VAL_I3]] to i32 121; CHECK-NEXT: [[ADD_I0:%.*]] = add i32 [[EXT_I0]], [[ACC_I0]] 122; CHECK-NEXT: [[ADD_I1:%.*]] = add i32 [[EXT_I1]], [[ACC_I1]] 123; CHECK-NEXT: [[ADD_I2:%.*]] = add i32 [[EXT_I2]], [[ACC_I2]] 124; CHECK-NEXT: [[ADD_I3:%.*]] = add i32 [[EXT_I3]], [[ACC_I3]] 125; CHECK-NEXT: [[CMP_I0:%.*]] = icmp slt i32 [[ADD_I0]], -10 126; CHECK-NEXT: [[CMP_I1:%.*]] = icmp slt i32 [[ADD_I1]], -11 127; CHECK-NEXT: [[CMP_I2:%.*]] = icmp slt i32 [[ADD_I2]], -12 128; CHECK-NEXT: [[CMP_I3:%.*]] = icmp slt i32 [[ADD_I3]], -13 129; CHECK-NEXT: [[SEL_I0]] = select i1 [[CMP_I0]], i32 [[ADD_I0]], i32 [[I]] 130; CHECK-NEXT: [[SEL_I1]] = select i1 [[CMP_I1]], i32 [[ADD_I1]], i32 [[I]] 131; CHECK-NEXT: [[SEL_I2]] = select i1 [[CMP_I2]], i32 [[ADD_I2]], i32 [[I]] 132; CHECK-NEXT: [[SEL_I3]] = select i1 [[CMP_I3]], i32 [[ADD_I3]], i32 [[I]] 133; CHECK-NEXT: [[TRUNC_I0:%.*]] = trunc i32 [[SEL_I0]] to i8 134; CHECK-NEXT: [[TRUNC_I1:%.*]] = trunc i32 [[SEL_I1]] to i8 135; CHECK-NEXT: [[TRUNC_I2:%.*]] = trunc i32 [[SEL_I2]] to i8 136; CHECK-NEXT: [[TRUNC_I3:%.*]] = trunc i32 [[SEL_I3]] to i8 137; CHECK-NEXT: store i8 [[TRUNC_I0]], ptr [[PTR]], align 4 138; CHECK-NEXT: store i8 [[TRUNC_I1]], ptr [[PTR_I1]], align 1 139; CHECK-NEXT: store i8 [[TRUNC_I2]], ptr [[PTR_I2]], align 2 140; CHECK-NEXT: store i8 [[TRUNC_I3]], ptr [[PTR_I3]], align 1 141; CHECK-NEXT: [[TEST:%.*]] = icmp eq i32 [[NEXTI]], 0 142; CHECK-NEXT: br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]] 143; CHECK: exit: 144; CHECK-NEXT: ret void 145; 146entry: 147 br label %loop 148 149loop: 150 %i = phi i32 [ %count, %entry ], [ %nexti, %loop ] 151 %acc = phi <4 x i32> [ %init, %entry ], [ %sel, %loop ] 152 %nexti = sub i32 %i, 1 153 154 %ptr = getelementptr <4 x i8>, ptr %base, i32 %i 155 %val = load <4 x i8> , ptr %ptr 156 %ext = sext <4 x i8> %val to <4 x i32> 157 %add = add <4 x i32> %ext, %acc 158 %cmp = icmp slt <4 x i32> %add, <i32 -10, i32 -11, i32 -12, i32 -13> 159 %single = insertelement <4 x i32> undef, i32 %i, i32 0 160 %limit = shufflevector <4 x i32> %single, <4 x i32> undef, 161 <4 x i32> zeroinitializer 162 %sel = select <4 x i1> %cmp, <4 x i32> %add, <4 x i32> %limit 163 %trunc = trunc <4 x i32> %sel to <4 x i8> 164 store <4 x i8> %trunc, ptr %ptr 165 166 %test = icmp eq i32 %nexti, 0 167 br i1 %test, label %loop, label %exit 168 169exit: 170 ret void 171} 172 173; Check that !tbaa information is preserved. 174define void @f3(ptr %src, ptr %dst) { 175; CHECK-LABEL: @f3( 176; CHECK-NEXT: [[DST_I1:%.*]] = getelementptr i32, ptr [[DST:%.*]], i32 1 177; CHECK-NEXT: [[DST_I2:%.*]] = getelementptr i32, ptr [[DST]], i32 2 178; CHECK-NEXT: [[DST_I3:%.*]] = getelementptr i32, ptr [[DST]], i32 3 179; CHECK-NEXT: [[VAL_I0:%.*]] = load i32, ptr [[SRC:%.*]], align 16, !tbaa [[TBAA0:![0-9]+]] 180; CHECK-NEXT: [[SRC_I1:%.*]] = getelementptr i32, ptr [[SRC]], i32 1 181; CHECK-NEXT: [[VAL_I1:%.*]] = load i32, ptr [[SRC_I1]], align 4, !tbaa [[TBAA0]] 182; CHECK-NEXT: [[SRC_I2:%.*]] = getelementptr i32, ptr [[SRC]], i32 2 183; CHECK-NEXT: [[VAL_I2:%.*]] = load i32, ptr [[SRC_I2]], align 8, !tbaa [[TBAA0]] 184; CHECK-NEXT: [[SRC_I3:%.*]] = getelementptr i32, ptr [[SRC]], i32 3 185; CHECK-NEXT: [[VAL_I3:%.*]] = load i32, ptr [[SRC_I3]], align 4, !tbaa [[TBAA0]] 186; CHECK-NEXT: [[ADD_I0:%.*]] = add i32 [[VAL_I0]], [[VAL_I0]] 187; CHECK-NEXT: [[ADD_I1:%.*]] = add i32 [[VAL_I1]], [[VAL_I1]] 188; CHECK-NEXT: [[ADD_I2:%.*]] = add i32 [[VAL_I2]], [[VAL_I2]] 189; CHECK-NEXT: [[ADD_I3:%.*]] = add i32 [[VAL_I3]], [[VAL_I3]] 190; CHECK-NEXT: store i32 [[ADD_I0]], ptr [[DST]], align 16, !tbaa [[TBAA3:![0-9]+]] 191; CHECK-NEXT: store i32 [[ADD_I1]], ptr [[DST_I1]], align 4, !tbaa [[TBAA3]] 192; CHECK-NEXT: store i32 [[ADD_I2]], ptr [[DST_I2]], align 8, !tbaa [[TBAA3]] 193; CHECK-NEXT: store i32 [[ADD_I3]], ptr [[DST_I3]], align 4, !tbaa [[TBAA3]] 194; CHECK-NEXT: ret void 195; 196 %val = load <4 x i32> , ptr %src, !tbaa !1 197 %add = add <4 x i32> %val, %val 198 store <4 x i32> %add, ptr %dst, !tbaa !2 199 ret void 200} 201 202; Check that !tbaa.struct information is preserved. 203define void @f4(ptr %src, ptr %dst) { 204; CHECK-LABEL: @f4( 205; CHECK-NEXT: [[DST_I1:%.*]] = getelementptr i32, ptr [[DST:%.*]], i32 1 206; CHECK-NEXT: [[DST_I2:%.*]] = getelementptr i32, ptr [[DST]], i32 2 207; CHECK-NEXT: [[DST_I3:%.*]] = getelementptr i32, ptr [[DST]], i32 3 208; CHECK-NEXT: [[VAL_I0:%.*]] = load i32, ptr [[SRC:%.*]], align 16, !tbaa.struct [[TBAA_STRUCT5:![0-9]+]] 209; CHECK-NEXT: [[SRC_I1:%.*]] = getelementptr i32, ptr [[SRC]], i32 1 210; CHECK-NEXT: [[VAL_I1:%.*]] = load i32, ptr [[SRC_I1]], align 4, !tbaa.struct [[TBAA_STRUCT5]] 211; CHECK-NEXT: [[SRC_I2:%.*]] = getelementptr i32, ptr [[SRC]], i32 2 212; CHECK-NEXT: [[VAL_I2:%.*]] = load i32, ptr [[SRC_I2]], align 8, !tbaa.struct [[TBAA_STRUCT5]] 213; CHECK-NEXT: [[SRC_I3:%.*]] = getelementptr i32, ptr [[SRC]], i32 3 214; CHECK-NEXT: [[VAL_I3:%.*]] = load i32, ptr [[SRC_I3]], align 4, !tbaa.struct [[TBAA_STRUCT5]] 215; CHECK-NEXT: [[ADD_I0:%.*]] = add i32 [[VAL_I0]], [[VAL_I0]] 216; CHECK-NEXT: [[ADD_I1:%.*]] = add i32 [[VAL_I1]], [[VAL_I1]] 217; CHECK-NEXT: [[ADD_I2:%.*]] = add i32 [[VAL_I2]], [[VAL_I2]] 218; CHECK-NEXT: [[ADD_I3:%.*]] = add i32 [[VAL_I3]], [[VAL_I3]] 219; CHECK-NEXT: store i32 [[ADD_I0]], ptr [[DST]], align 16, !tbaa.struct [[TBAA_STRUCT5]] 220; CHECK-NEXT: store i32 [[ADD_I1]], ptr [[DST_I1]], align 4, !tbaa.struct [[TBAA_STRUCT5]] 221; CHECK-NEXT: store i32 [[ADD_I2]], ptr [[DST_I2]], align 8, !tbaa.struct [[TBAA_STRUCT5]] 222; CHECK-NEXT: store i32 [[ADD_I3]], ptr [[DST_I3]], align 4, !tbaa.struct [[TBAA_STRUCT5]] 223; CHECK-NEXT: ret void 224; 225 %val = load <4 x i32> , ptr %src, !tbaa.struct !5 226 %add = add <4 x i32> %val, %val 227 store <4 x i32> %add, ptr %dst, !tbaa.struct !5 228 ret void 229} 230 231; Check that llvm.access.group information is preserved. 232define void @f5(i32 %count, ptr %src, ptr %dst) { 233; CHECK-LABEL: @f5( 234; CHECK-NEXT: entry: 235; CHECK-NEXT: br label [[LOOP:%.*]] 236; CHECK: loop: 237; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[NEXT_INDEX:%.*]], [[LOOP]] ] 238; CHECK-NEXT: [[THIS_SRC:%.*]] = getelementptr <4 x i32>, ptr [[SRC:%.*]], i32 [[INDEX]] 239; CHECK-NEXT: [[THIS_SRC_I1:%.*]] = getelementptr i32, ptr [[THIS_SRC]], i32 1 240; CHECK-NEXT: [[THIS_SRC_I2:%.*]] = getelementptr i32, ptr [[THIS_SRC]], i32 2 241; CHECK-NEXT: [[THIS_SRC_I3:%.*]] = getelementptr i32, ptr [[THIS_SRC]], i32 3 242; CHECK-NEXT: [[THIS_DST:%.*]] = getelementptr <4 x i32>, ptr [[DST:%.*]], i32 [[INDEX]] 243; CHECK-NEXT: [[THIS_DST_I1:%.*]] = getelementptr i32, ptr [[THIS_DST]], i32 1 244; CHECK-NEXT: [[THIS_DST_I2:%.*]] = getelementptr i32, ptr [[THIS_DST]], i32 2 245; CHECK-NEXT: [[THIS_DST_I3:%.*]] = getelementptr i32, ptr [[THIS_DST]], i32 3 246; CHECK-NEXT: [[VAL_I0:%.*]] = load i32, ptr [[THIS_SRC]], align 16, !llvm.access.group [[ACC_GRP6:![0-9]+]] 247; CHECK-NEXT: [[VAL_I1:%.*]] = load i32, ptr [[THIS_SRC_I1]], align 4, !llvm.access.group [[ACC_GRP6]] 248; CHECK-NEXT: [[VAL_I2:%.*]] = load i32, ptr [[THIS_SRC_I2]], align 8, !llvm.access.group [[ACC_GRP6]] 249; CHECK-NEXT: [[VAL_I3:%.*]] = load i32, ptr [[THIS_SRC_I3]], align 4, !llvm.access.group [[ACC_GRP6]] 250; CHECK-NEXT: [[ADD_I0:%.*]] = add i32 [[VAL_I0]], [[VAL_I0]] 251; CHECK-NEXT: [[ADD_I1:%.*]] = add i32 [[VAL_I1]], [[VAL_I1]] 252; CHECK-NEXT: [[ADD_I2:%.*]] = add i32 [[VAL_I2]], [[VAL_I2]] 253; CHECK-NEXT: [[ADD_I3:%.*]] = add i32 [[VAL_I3]], [[VAL_I3]] 254; CHECK-NEXT: store i32 [[ADD_I0]], ptr [[THIS_DST]], align 16, !llvm.access.group [[ACC_GRP6]] 255; CHECK-NEXT: store i32 [[ADD_I1]], ptr [[THIS_DST_I1]], align 4, !llvm.access.group [[ACC_GRP6]] 256; CHECK-NEXT: store i32 [[ADD_I2]], ptr [[THIS_DST_I2]], align 8, !llvm.access.group [[ACC_GRP6]] 257; CHECK-NEXT: store i32 [[ADD_I3]], ptr [[THIS_DST_I3]], align 4, !llvm.access.group [[ACC_GRP6]] 258; CHECK-NEXT: [[NEXT_INDEX]] = add i32 [[INDEX]], -1 259; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ne i32 [[NEXT_INDEX]], [[COUNT:%.*]] 260; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[END:%.*]], !llvm.loop [[LOOP7:![0-9]+]] 261; CHECK: end: 262; CHECK-NEXT: ret void 263; 264entry: 265 br label %loop 266 267loop: 268 %index = phi i32 [ 0, %entry ], [ %next_index, %loop ] 269 %this_src = getelementptr <4 x i32>, ptr %src, i32 %index 270 %this_dst = getelementptr <4 x i32>, ptr %dst, i32 %index 271 %val = load <4 x i32> , ptr %this_src, !llvm.access.group !13 272 %add = add <4 x i32> %val, %val 273 store <4 x i32> %add, ptr %this_dst, !llvm.access.group !13 274 %next_index = add i32 %index, -1 275 %continue = icmp ne i32 %next_index, %count 276 br i1 %continue, label %loop, label %end, !llvm.loop !3 277 278end: 279 ret void 280} 281 282; Check that fpmath information is preserved. 283define <4 x float> @f6(<4 x float> %x) { 284; CHECK-LABEL: @f6( 285; CHECK-NEXT: [[X_I0:%.*]] = extractelement <4 x float> [[X:%.*]], i64 0 286; CHECK-NEXT: [[RES_I0:%.*]] = fadd float [[X_I0]], 1.000000e+00, !fpmath [[META9:![0-9]+]] 287; CHECK-NEXT: [[X_I1:%.*]] = extractelement <4 x float> [[X]], i64 1 288; CHECK-NEXT: [[RES_I1:%.*]] = fadd float [[X_I1]], 2.000000e+00, !fpmath [[META9]] 289; CHECK-NEXT: [[X_I2:%.*]] = extractelement <4 x float> [[X]], i64 2 290; CHECK-NEXT: [[RES_I2:%.*]] = fadd float [[X_I2]], 3.000000e+00, !fpmath [[META9]] 291; CHECK-NEXT: [[X_I3:%.*]] = extractelement <4 x float> [[X]], i64 3 292; CHECK-NEXT: [[RES_I3:%.*]] = fadd float [[X_I3]], 4.000000e+00, !fpmath [[META9]] 293; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <4 x float> poison, float [[RES_I0]], i64 0 294; CHECK-NEXT: [[RES_UPTO1:%.*]] = insertelement <4 x float> [[RES_UPTO0]], float [[RES_I1]], i64 1 295; CHECK-NEXT: [[RES_UPTO2:%.*]] = insertelement <4 x float> [[RES_UPTO1]], float [[RES_I2]], i64 2 296; CHECK-NEXT: [[RES:%.*]] = insertelement <4 x float> [[RES_UPTO2]], float [[RES_I3]], i64 3 297; CHECK-NEXT: ret <4 x float> [[RES]] 298; 299 %res = fadd <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>, 300 !fpmath !4 301 ret <4 x float> %res 302} 303 304; Check that random metadata isn't kept. 305define void @f7(ptr %src, ptr %dst) { 306; CHECK-LABEL: @f7( 307; CHECK-NEXT: [[DST_I1:%.*]] = getelementptr i32, ptr [[DST:%.*]], i32 1 308; CHECK-NEXT: [[DST_I2:%.*]] = getelementptr i32, ptr [[DST]], i32 2 309; CHECK-NEXT: [[DST_I3:%.*]] = getelementptr i32, ptr [[DST]], i32 3 310; CHECK-NEXT: [[VAL_I0:%.*]] = load i32, ptr [[SRC:%.*]], align 16 311; CHECK-NEXT: [[SRC_I1:%.*]] = getelementptr i32, ptr [[SRC]], i32 1 312; CHECK-NEXT: [[VAL_I1:%.*]] = load i32, ptr [[SRC_I1]], align 4 313; CHECK-NEXT: [[SRC_I2:%.*]] = getelementptr i32, ptr [[SRC]], i32 2 314; CHECK-NEXT: [[VAL_I2:%.*]] = load i32, ptr [[SRC_I2]], align 8 315; CHECK-NEXT: [[SRC_I3:%.*]] = getelementptr i32, ptr [[SRC]], i32 3 316; CHECK-NEXT: [[VAL_I3:%.*]] = load i32, ptr [[SRC_I3]], align 4 317; CHECK-NEXT: [[ADD_I0:%.*]] = add i32 [[VAL_I0]], [[VAL_I0]] 318; CHECK-NEXT: [[ADD_I1:%.*]] = add i32 [[VAL_I1]], [[VAL_I1]] 319; CHECK-NEXT: [[ADD_I2:%.*]] = add i32 [[VAL_I2]], [[VAL_I2]] 320; CHECK-NEXT: [[ADD_I3:%.*]] = add i32 [[VAL_I3]], [[VAL_I3]] 321; CHECK-NEXT: store i32 [[ADD_I0]], ptr [[DST]], align 16 322; CHECK-NEXT: store i32 [[ADD_I1]], ptr [[DST_I1]], align 4 323; CHECK-NEXT: store i32 [[ADD_I2]], ptr [[DST_I2]], align 8 324; CHECK-NEXT: store i32 [[ADD_I3]], ptr [[DST_I3]], align 4 325; CHECK-NEXT: ret void 326; 327 %val = load <4 x i32> , ptr %src, !foo !5 328 %add = add <4 x i32> %val, %val 329 store <4 x i32> %add, ptr %dst, !foo !5 330 ret void 331} 332 333; Test GEP with vectors. 334define void @f8(ptr %dest, <4 x ptr> %ptr0, <4 x i32> %i0, 335; CHECK-LABEL: @f8( 336; CHECK-NEXT: [[DEST_I1:%.*]] = getelementptr ptr, ptr [[DEST:%.*]], i32 1 337; CHECK-NEXT: [[DEST_I2:%.*]] = getelementptr ptr, ptr [[DEST]], i32 2 338; CHECK-NEXT: [[DEST_I3:%.*]] = getelementptr ptr, ptr [[DEST]], i32 3 339; CHECK-NEXT: [[PTR0_I0:%.*]] = extractelement <4 x ptr> [[PTR0:%.*]], i64 0 340; CHECK-NEXT: [[PTR0_I2:%.*]] = extractelement <4 x ptr> [[PTR0]], i64 2 341; CHECK-NEXT: [[PTR0_I3:%.*]] = extractelement <4 x ptr> [[PTR0]], i64 3 342; CHECK-NEXT: [[I0_I1:%.*]] = extractelement <4 x i32> [[I0:%.*]], i64 1 343; CHECK-NEXT: [[I0_I3:%.*]] = extractelement <4 x i32> [[I0]], i64 3 344; CHECK-NEXT: [[VAL_I0:%.*]] = getelementptr float, ptr [[PTR0_I0]], i32 100 345; CHECK-NEXT: [[VAL_I1:%.*]] = getelementptr float, ptr [[OTHER:%.*]], i32 [[I0_I1]] 346; CHECK-NEXT: [[VAL_I2:%.*]] = getelementptr float, ptr [[PTR0_I2]], i32 100 347; CHECK-NEXT: [[VAL_I3:%.*]] = getelementptr float, ptr [[PTR0_I3]], i32 [[I0_I3]] 348; CHECK-NEXT: store ptr [[VAL_I0]], ptr [[DEST]], align 32 349; CHECK-NEXT: store ptr [[VAL_I1]], ptr [[DEST_I1]], align 8 350; CHECK-NEXT: store ptr [[VAL_I2]], ptr [[DEST_I2]], align 16 351; CHECK-NEXT: store ptr [[VAL_I3]], ptr [[DEST_I3]], align 8 352; CHECK-NEXT: ret void 353; 354 ptr %other) { 355 %i1 = insertelement <4 x i32> %i0, i32 100, i32 0 356 %i2 = insertelement <4 x i32> %i1, i32 100, i32 2 357 %ptr1 = insertelement <4 x ptr> %ptr0, ptr %other, i32 1 358 %val = getelementptr float, <4 x ptr> %ptr1, <4 x i32> %i2 359 store <4 x ptr> %val, ptr %dest 360 ret void 361} 362 363; Test the handling of unaligned loads. 364define void @f9(ptr %dest, ptr %src) { 365; CHECK-LABEL: @f9( 366; CHECK-NEXT: [[DEST_I1:%.*]] = getelementptr float, ptr [[DEST:%.*]], i32 1 367; CHECK-NEXT: [[DEST_I2:%.*]] = getelementptr float, ptr [[DEST]], i32 2 368; CHECK-NEXT: [[DEST_I3:%.*]] = getelementptr float, ptr [[DEST]], i32 3 369; CHECK-NEXT: [[VAL_I0:%.*]] = load float, ptr [[SRC:%.*]], align 4 370; CHECK-NEXT: [[SRC_I1:%.*]] = getelementptr float, ptr [[SRC]], i32 1 371; CHECK-NEXT: [[VAL_I1:%.*]] = load float, ptr [[SRC_I1]], align 4 372; CHECK-NEXT: [[SRC_I2:%.*]] = getelementptr float, ptr [[SRC]], i32 2 373; CHECK-NEXT: [[VAL_I2:%.*]] = load float, ptr [[SRC_I2]], align 4 374; CHECK-NEXT: [[SRC_I3:%.*]] = getelementptr float, ptr [[SRC]], i32 3 375; CHECK-NEXT: [[VAL_I3:%.*]] = load float, ptr [[SRC_I3]], align 4 376; CHECK-NEXT: store float [[VAL_I0]], ptr [[DEST]], align 8 377; CHECK-NEXT: store float [[VAL_I1]], ptr [[DEST_I1]], align 4 378; CHECK-NEXT: store float [[VAL_I2]], ptr [[DEST_I2]], align 8 379; CHECK-NEXT: store float [[VAL_I3]], ptr [[DEST_I3]], align 4 380; CHECK-NEXT: ret void 381; 382 %val = load <4 x float> , ptr %src, align 4 383 store <4 x float> %val, ptr %dest, align 8 384 ret void 385} 386 387; ...and again with subelement alignment. 388define void @f10(ptr %dest, ptr %src) { 389; CHECK-LABEL: @f10( 390; CHECK-NEXT: [[DEST_I1:%.*]] = getelementptr float, ptr [[DEST:%.*]], i32 1 391; CHECK-NEXT: [[DEST_I2:%.*]] = getelementptr float, ptr [[DEST]], i32 2 392; CHECK-NEXT: [[DEST_I3:%.*]] = getelementptr float, ptr [[DEST]], i32 3 393; CHECK-NEXT: [[VAL_I0:%.*]] = load float, ptr [[SRC:%.*]], align 1 394; CHECK-NEXT: [[SRC_I1:%.*]] = getelementptr float, ptr [[SRC]], i32 1 395; CHECK-NEXT: [[VAL_I1:%.*]] = load float, ptr [[SRC_I1]], align 1 396; CHECK-NEXT: [[SRC_I2:%.*]] = getelementptr float, ptr [[SRC]], i32 2 397; CHECK-NEXT: [[VAL_I2:%.*]] = load float, ptr [[SRC_I2]], align 1 398; CHECK-NEXT: [[SRC_I3:%.*]] = getelementptr float, ptr [[SRC]], i32 3 399; CHECK-NEXT: [[VAL_I3:%.*]] = load float, ptr [[SRC_I3]], align 1 400; CHECK-NEXT: store float [[VAL_I0]], ptr [[DEST]], align 2 401; CHECK-NEXT: store float [[VAL_I1]], ptr [[DEST_I1]], align 2 402; CHECK-NEXT: store float [[VAL_I2]], ptr [[DEST_I2]], align 2 403; CHECK-NEXT: store float [[VAL_I3]], ptr [[DEST_I3]], align 2 404; CHECK-NEXT: ret void 405; 406 %val = load <4 x float> , ptr %src, align 1 407 store <4 x float> %val, ptr %dest, align 2 408 ret void 409} 410 411; Test that sub-byte loads aren't scalarized. 412define void @f11(ptr %dest, ptr %src0) { 413; CHECK-LABEL: @f11( 414; CHECK-NEXT: [[SRC1:%.*]] = getelementptr <32 x i1>, ptr [[SRC0:%.*]], i32 1 415; CHECK-NEXT: [[VAL0:%.*]] = load <32 x i1>, ptr [[SRC0]], align 4 416; CHECK-NEXT: [[VAL0_I0:%.*]] = extractelement <32 x i1> [[VAL0]], i64 0 417; CHECK-NEXT: [[VAL0_I1:%.*]] = extractelement <32 x i1> [[VAL0]], i64 1 418; CHECK-NEXT: [[VAL0_I2:%.*]] = extractelement <32 x i1> [[VAL0]], i64 2 419; CHECK-NEXT: [[VAL0_I3:%.*]] = extractelement <32 x i1> [[VAL0]], i64 3 420; CHECK-NEXT: [[VAL0_I4:%.*]] = extractelement <32 x i1> [[VAL0]], i64 4 421; CHECK-NEXT: [[VAL0_I5:%.*]] = extractelement <32 x i1> [[VAL0]], i64 5 422; CHECK-NEXT: [[VAL0_I6:%.*]] = extractelement <32 x i1> [[VAL0]], i64 6 423; CHECK-NEXT: [[VAL0_I7:%.*]] = extractelement <32 x i1> [[VAL0]], i64 7 424; CHECK-NEXT: [[VAL0_I8:%.*]] = extractelement <32 x i1> [[VAL0]], i64 8 425; CHECK-NEXT: [[VAL0_I9:%.*]] = extractelement <32 x i1> [[VAL0]], i64 9 426; CHECK-NEXT: [[VAL0_I10:%.*]] = extractelement <32 x i1> [[VAL0]], i64 10 427; CHECK-NEXT: [[VAL0_I11:%.*]] = extractelement <32 x i1> [[VAL0]], i64 11 428; CHECK-NEXT: [[VAL0_I12:%.*]] = extractelement <32 x i1> [[VAL0]], i64 12 429; CHECK-NEXT: [[VAL0_I13:%.*]] = extractelement <32 x i1> [[VAL0]], i64 13 430; CHECK-NEXT: [[VAL0_I14:%.*]] = extractelement <32 x i1> [[VAL0]], i64 14 431; CHECK-NEXT: [[VAL0_I15:%.*]] = extractelement <32 x i1> [[VAL0]], i64 15 432; CHECK-NEXT: [[VAL0_I16:%.*]] = extractelement <32 x i1> [[VAL0]], i64 16 433; CHECK-NEXT: [[VAL0_I17:%.*]] = extractelement <32 x i1> [[VAL0]], i64 17 434; CHECK-NEXT: [[VAL0_I18:%.*]] = extractelement <32 x i1> [[VAL0]], i64 18 435; CHECK-NEXT: [[VAL0_I19:%.*]] = extractelement <32 x i1> [[VAL0]], i64 19 436; CHECK-NEXT: [[VAL0_I20:%.*]] = extractelement <32 x i1> [[VAL0]], i64 20 437; CHECK-NEXT: [[VAL0_I21:%.*]] = extractelement <32 x i1> [[VAL0]], i64 21 438; CHECK-NEXT: [[VAL0_I22:%.*]] = extractelement <32 x i1> [[VAL0]], i64 22 439; CHECK-NEXT: [[VAL0_I23:%.*]] = extractelement <32 x i1> [[VAL0]], i64 23 440; CHECK-NEXT: [[VAL0_I24:%.*]] = extractelement <32 x i1> [[VAL0]], i64 24 441; CHECK-NEXT: [[VAL0_I25:%.*]] = extractelement <32 x i1> [[VAL0]], i64 25 442; CHECK-NEXT: [[VAL0_I26:%.*]] = extractelement <32 x i1> [[VAL0]], i64 26 443; CHECK-NEXT: [[VAL0_I27:%.*]] = extractelement <32 x i1> [[VAL0]], i64 27 444; CHECK-NEXT: [[VAL0_I28:%.*]] = extractelement <32 x i1> [[VAL0]], i64 28 445; CHECK-NEXT: [[VAL0_I29:%.*]] = extractelement <32 x i1> [[VAL0]], i64 29 446; CHECK-NEXT: [[VAL0_I30:%.*]] = extractelement <32 x i1> [[VAL0]], i64 30 447; CHECK-NEXT: [[VAL0_I31:%.*]] = extractelement <32 x i1> [[VAL0]], i64 31 448; CHECK-NEXT: [[VAL1:%.*]] = load <32 x i1>, ptr [[SRC1]], align 4 449; CHECK-NEXT: [[VAL1_I0:%.*]] = extractelement <32 x i1> [[VAL1]], i64 0 450; CHECK-NEXT: [[AND_I0:%.*]] = and i1 [[VAL0_I0]], [[VAL1_I0]] 451; CHECK-NEXT: [[VAL1_I1:%.*]] = extractelement <32 x i1> [[VAL1]], i64 1 452; CHECK-NEXT: [[AND_I1:%.*]] = and i1 [[VAL0_I1]], [[VAL1_I1]] 453; CHECK-NEXT: [[VAL1_I2:%.*]] = extractelement <32 x i1> [[VAL1]], i64 2 454; CHECK-NEXT: [[AND_I2:%.*]] = and i1 [[VAL0_I2]], [[VAL1_I2]] 455; CHECK-NEXT: [[VAL1_I3:%.*]] = extractelement <32 x i1> [[VAL1]], i64 3 456; CHECK-NEXT: [[AND_I3:%.*]] = and i1 [[VAL0_I3]], [[VAL1_I3]] 457; CHECK-NEXT: [[VAL1_I4:%.*]] = extractelement <32 x i1> [[VAL1]], i64 4 458; CHECK-NEXT: [[AND_I4:%.*]] = and i1 [[VAL0_I4]], [[VAL1_I4]] 459; CHECK-NEXT: [[VAL1_I5:%.*]] = extractelement <32 x i1> [[VAL1]], i64 5 460; CHECK-NEXT: [[AND_I5:%.*]] = and i1 [[VAL0_I5]], [[VAL1_I5]] 461; CHECK-NEXT: [[VAL1_I6:%.*]] = extractelement <32 x i1> [[VAL1]], i64 6 462; CHECK-NEXT: [[AND_I6:%.*]] = and i1 [[VAL0_I6]], [[VAL1_I6]] 463; CHECK-NEXT: [[VAL1_I7:%.*]] = extractelement <32 x i1> [[VAL1]], i64 7 464; CHECK-NEXT: [[AND_I7:%.*]] = and i1 [[VAL0_I7]], [[VAL1_I7]] 465; CHECK-NEXT: [[VAL1_I8:%.*]] = extractelement <32 x i1> [[VAL1]], i64 8 466; CHECK-NEXT: [[AND_I8:%.*]] = and i1 [[VAL0_I8]], [[VAL1_I8]] 467; CHECK-NEXT: [[VAL1_I9:%.*]] = extractelement <32 x i1> [[VAL1]], i64 9 468; CHECK-NEXT: [[AND_I9:%.*]] = and i1 [[VAL0_I9]], [[VAL1_I9]] 469; CHECK-NEXT: [[VAL1_I10:%.*]] = extractelement <32 x i1> [[VAL1]], i64 10 470; CHECK-NEXT: [[AND_I10:%.*]] = and i1 [[VAL0_I10]], [[VAL1_I10]] 471; CHECK-NEXT: [[VAL1_I11:%.*]] = extractelement <32 x i1> [[VAL1]], i64 11 472; CHECK-NEXT: [[AND_I11:%.*]] = and i1 [[VAL0_I11]], [[VAL1_I11]] 473; CHECK-NEXT: [[VAL1_I12:%.*]] = extractelement <32 x i1> [[VAL1]], i64 12 474; CHECK-NEXT: [[AND_I12:%.*]] = and i1 [[VAL0_I12]], [[VAL1_I12]] 475; CHECK-NEXT: [[VAL1_I13:%.*]] = extractelement <32 x i1> [[VAL1]], i64 13 476; CHECK-NEXT: [[AND_I13:%.*]] = and i1 [[VAL0_I13]], [[VAL1_I13]] 477; CHECK-NEXT: [[VAL1_I14:%.*]] = extractelement <32 x i1> [[VAL1]], i64 14 478; CHECK-NEXT: [[AND_I14:%.*]] = and i1 [[VAL0_I14]], [[VAL1_I14]] 479; CHECK-NEXT: [[VAL1_I15:%.*]] = extractelement <32 x i1> [[VAL1]], i64 15 480; CHECK-NEXT: [[AND_I15:%.*]] = and i1 [[VAL0_I15]], [[VAL1_I15]] 481; CHECK-NEXT: [[VAL1_I16:%.*]] = extractelement <32 x i1> [[VAL1]], i64 16 482; CHECK-NEXT: [[AND_I16:%.*]] = and i1 [[VAL0_I16]], [[VAL1_I16]] 483; CHECK-NEXT: [[VAL1_I17:%.*]] = extractelement <32 x i1> [[VAL1]], i64 17 484; CHECK-NEXT: [[AND_I17:%.*]] = and i1 [[VAL0_I17]], [[VAL1_I17]] 485; CHECK-NEXT: [[VAL1_I18:%.*]] = extractelement <32 x i1> [[VAL1]], i64 18 486; CHECK-NEXT: [[AND_I18:%.*]] = and i1 [[VAL0_I18]], [[VAL1_I18]] 487; CHECK-NEXT: [[VAL1_I19:%.*]] = extractelement <32 x i1> [[VAL1]], i64 19 488; CHECK-NEXT: [[AND_I19:%.*]] = and i1 [[VAL0_I19]], [[VAL1_I19]] 489; CHECK-NEXT: [[VAL1_I20:%.*]] = extractelement <32 x i1> [[VAL1]], i64 20 490; CHECK-NEXT: [[AND_I20:%.*]] = and i1 [[VAL0_I20]], [[VAL1_I20]] 491; CHECK-NEXT: [[VAL1_I21:%.*]] = extractelement <32 x i1> [[VAL1]], i64 21 492; CHECK-NEXT: [[AND_I21:%.*]] = and i1 [[VAL0_I21]], [[VAL1_I21]] 493; CHECK-NEXT: [[VAL1_I22:%.*]] = extractelement <32 x i1> [[VAL1]], i64 22 494; CHECK-NEXT: [[AND_I22:%.*]] = and i1 [[VAL0_I22]], [[VAL1_I22]] 495; CHECK-NEXT: [[VAL1_I23:%.*]] = extractelement <32 x i1> [[VAL1]], i64 23 496; CHECK-NEXT: [[AND_I23:%.*]] = and i1 [[VAL0_I23]], [[VAL1_I23]] 497; CHECK-NEXT: [[VAL1_I24:%.*]] = extractelement <32 x i1> [[VAL1]], i64 24 498; CHECK-NEXT: [[AND_I24:%.*]] = and i1 [[VAL0_I24]], [[VAL1_I24]] 499; CHECK-NEXT: [[VAL1_I25:%.*]] = extractelement <32 x i1> [[VAL1]], i64 25 500; CHECK-NEXT: [[AND_I25:%.*]] = and i1 [[VAL0_I25]], [[VAL1_I25]] 501; CHECK-NEXT: [[VAL1_I26:%.*]] = extractelement <32 x i1> [[VAL1]], i64 26 502; CHECK-NEXT: [[AND_I26:%.*]] = and i1 [[VAL0_I26]], [[VAL1_I26]] 503; CHECK-NEXT: [[VAL1_I27:%.*]] = extractelement <32 x i1> [[VAL1]], i64 27 504; CHECK-NEXT: [[AND_I27:%.*]] = and i1 [[VAL0_I27]], [[VAL1_I27]] 505; CHECK-NEXT: [[VAL1_I28:%.*]] = extractelement <32 x i1> [[VAL1]], i64 28 506; CHECK-NEXT: [[AND_I28:%.*]] = and i1 [[VAL0_I28]], [[VAL1_I28]] 507; CHECK-NEXT: [[VAL1_I29:%.*]] = extractelement <32 x i1> [[VAL1]], i64 29 508; CHECK-NEXT: [[AND_I29:%.*]] = and i1 [[VAL0_I29]], [[VAL1_I29]] 509; CHECK-NEXT: [[VAL1_I30:%.*]] = extractelement <32 x i1> [[VAL1]], i64 30 510; CHECK-NEXT: [[AND_I30:%.*]] = and i1 [[VAL0_I30]], [[VAL1_I30]] 511; CHECK-NEXT: [[VAL1_I31:%.*]] = extractelement <32 x i1> [[VAL1]], i64 31 512; CHECK-NEXT: [[AND_I31:%.*]] = and i1 [[VAL0_I31]], [[VAL1_I31]] 513; CHECK-NEXT: [[AND_UPTO0:%.*]] = insertelement <32 x i1> poison, i1 [[AND_I0]], i64 0 514; CHECK-NEXT: [[AND_UPTO1:%.*]] = insertelement <32 x i1> [[AND_UPTO0]], i1 [[AND_I1]], i64 1 515; CHECK-NEXT: [[AND_UPTO2:%.*]] = insertelement <32 x i1> [[AND_UPTO1]], i1 [[AND_I2]], i64 2 516; CHECK-NEXT: [[AND_UPTO3:%.*]] = insertelement <32 x i1> [[AND_UPTO2]], i1 [[AND_I3]], i64 3 517; CHECK-NEXT: [[AND_UPTO4:%.*]] = insertelement <32 x i1> [[AND_UPTO3]], i1 [[AND_I4]], i64 4 518; CHECK-NEXT: [[AND_UPTO5:%.*]] = insertelement <32 x i1> [[AND_UPTO4]], i1 [[AND_I5]], i64 5 519; CHECK-NEXT: [[AND_UPTO6:%.*]] = insertelement <32 x i1> [[AND_UPTO5]], i1 [[AND_I6]], i64 6 520; CHECK-NEXT: [[AND_UPTO7:%.*]] = insertelement <32 x i1> [[AND_UPTO6]], i1 [[AND_I7]], i64 7 521; CHECK-NEXT: [[AND_UPTO8:%.*]] = insertelement <32 x i1> [[AND_UPTO7]], i1 [[AND_I8]], i64 8 522; CHECK-NEXT: [[AND_UPTO9:%.*]] = insertelement <32 x i1> [[AND_UPTO8]], i1 [[AND_I9]], i64 9 523; CHECK-NEXT: [[AND_UPTO10:%.*]] = insertelement <32 x i1> [[AND_UPTO9]], i1 [[AND_I10]], i64 10 524; CHECK-NEXT: [[AND_UPTO11:%.*]] = insertelement <32 x i1> [[AND_UPTO10]], i1 [[AND_I11]], i64 11 525; CHECK-NEXT: [[AND_UPTO12:%.*]] = insertelement <32 x i1> [[AND_UPTO11]], i1 [[AND_I12]], i64 12 526; CHECK-NEXT: [[AND_UPTO13:%.*]] = insertelement <32 x i1> [[AND_UPTO12]], i1 [[AND_I13]], i64 13 527; CHECK-NEXT: [[AND_UPTO14:%.*]] = insertelement <32 x i1> [[AND_UPTO13]], i1 [[AND_I14]], i64 14 528; CHECK-NEXT: [[AND_UPTO15:%.*]] = insertelement <32 x i1> [[AND_UPTO14]], i1 [[AND_I15]], i64 15 529; CHECK-NEXT: [[AND_UPTO16:%.*]] = insertelement <32 x i1> [[AND_UPTO15]], i1 [[AND_I16]], i64 16 530; CHECK-NEXT: [[AND_UPTO17:%.*]] = insertelement <32 x i1> [[AND_UPTO16]], i1 [[AND_I17]], i64 17 531; CHECK-NEXT: [[AND_UPTO18:%.*]] = insertelement <32 x i1> [[AND_UPTO17]], i1 [[AND_I18]], i64 18 532; CHECK-NEXT: [[AND_UPTO19:%.*]] = insertelement <32 x i1> [[AND_UPTO18]], i1 [[AND_I19]], i64 19 533; CHECK-NEXT: [[AND_UPTO20:%.*]] = insertelement <32 x i1> [[AND_UPTO19]], i1 [[AND_I20]], i64 20 534; CHECK-NEXT: [[AND_UPTO21:%.*]] = insertelement <32 x i1> [[AND_UPTO20]], i1 [[AND_I21]], i64 21 535; CHECK-NEXT: [[AND_UPTO22:%.*]] = insertelement <32 x i1> [[AND_UPTO21]], i1 [[AND_I22]], i64 22 536; CHECK-NEXT: [[AND_UPTO23:%.*]] = insertelement <32 x i1> [[AND_UPTO22]], i1 [[AND_I23]], i64 23 537; CHECK-NEXT: [[AND_UPTO24:%.*]] = insertelement <32 x i1> [[AND_UPTO23]], i1 [[AND_I24]], i64 24 538; CHECK-NEXT: [[AND_UPTO25:%.*]] = insertelement <32 x i1> [[AND_UPTO24]], i1 [[AND_I25]], i64 25 539; CHECK-NEXT: [[AND_UPTO26:%.*]] = insertelement <32 x i1> [[AND_UPTO25]], i1 [[AND_I26]], i64 26 540; CHECK-NEXT: [[AND_UPTO27:%.*]] = insertelement <32 x i1> [[AND_UPTO26]], i1 [[AND_I27]], i64 27 541; CHECK-NEXT: [[AND_UPTO28:%.*]] = insertelement <32 x i1> [[AND_UPTO27]], i1 [[AND_I28]], i64 28 542; CHECK-NEXT: [[AND_UPTO29:%.*]] = insertelement <32 x i1> [[AND_UPTO28]], i1 [[AND_I29]], i64 29 543; CHECK-NEXT: [[AND_UPTO30:%.*]] = insertelement <32 x i1> [[AND_UPTO29]], i1 [[AND_I30]], i64 30 544; CHECK-NEXT: [[AND:%.*]] = insertelement <32 x i1> [[AND_UPTO30]], i1 [[AND_I31]], i64 31 545; CHECK-NEXT: store <32 x i1> [[AND]], ptr [[DEST:%.*]], align 4 546; CHECK-NEXT: ret void 547; 548 %src1 = getelementptr <32 x i1>, ptr %src0, i32 1 549 %val0 = load <32 x i1> , ptr %src0 550 %val1 = load <32 x i1> , ptr %src1 551 %and = and <32 x i1> %val0, %val1 552 store <32 x i1> %and, ptr %dest 553 ret void 554} 555 556; Test vector GEPs with more than one index. 557define void @f13(ptr %dest, <4 x ptr> %ptr, <4 x i32> %i, 558; CHECK-LABEL: @f13( 559; CHECK-NEXT: [[DEST_I1:%.*]] = getelementptr ptr, ptr [[DEST:%.*]], i32 1 560; CHECK-NEXT: [[DEST_I2:%.*]] = getelementptr ptr, ptr [[DEST]], i32 2 561; CHECK-NEXT: [[DEST_I3:%.*]] = getelementptr ptr, ptr [[DEST]], i32 3 562; CHECK-NEXT: [[PTR_I0:%.*]] = extractelement <4 x ptr> [[PTR:%.*]], i64 0 563; CHECK-NEXT: [[I_I0:%.*]] = extractelement <4 x i32> [[I:%.*]], i64 0 564; CHECK-NEXT: [[VAL_I0:%.*]] = getelementptr inbounds [4 x float], ptr [[PTR_I0]], i32 0, i32 [[I_I0]] 565; CHECK-NEXT: [[PTR_I1:%.*]] = extractelement <4 x ptr> [[PTR]], i64 1 566; CHECK-NEXT: [[I_I1:%.*]] = extractelement <4 x i32> [[I]], i64 1 567; CHECK-NEXT: [[VAL_I1:%.*]] = getelementptr inbounds [4 x float], ptr [[PTR_I1]], i32 1, i32 [[I_I1]] 568; CHECK-NEXT: [[PTR_I2:%.*]] = extractelement <4 x ptr> [[PTR]], i64 2 569; CHECK-NEXT: [[I_I2:%.*]] = extractelement <4 x i32> [[I]], i64 2 570; CHECK-NEXT: [[VAL_I2:%.*]] = getelementptr inbounds [4 x float], ptr [[PTR_I2]], i32 2, i32 [[I_I2]] 571; CHECK-NEXT: [[PTR_I3:%.*]] = extractelement <4 x ptr> [[PTR]], i64 3 572; CHECK-NEXT: [[I_I3:%.*]] = extractelement <4 x i32> [[I]], i64 3 573; CHECK-NEXT: [[VAL_I3:%.*]] = getelementptr inbounds [4 x float], ptr [[PTR_I3]], i32 3, i32 [[I_I3]] 574; CHECK-NEXT: store ptr [[VAL_I0]], ptr [[DEST]], align 32 575; CHECK-NEXT: store ptr [[VAL_I1]], ptr [[DEST_I1]], align 8 576; CHECK-NEXT: store ptr [[VAL_I2]], ptr [[DEST_I2]], align 16 577; CHECK-NEXT: store ptr [[VAL_I3]], ptr [[DEST_I3]], align 8 578; CHECK-NEXT: ret void 579; 580 ptr %other) { 581 %val = getelementptr inbounds [4 x float], <4 x ptr> %ptr, 582 <4 x i32> <i32 0, i32 1, i32 2, i32 3>, 583 <4 x i32> %i 584 store <4 x ptr> %val, ptr %dest 585 ret void 586} 587 588; Test combinations of vector and non-vector PHIs. 589define <4 x float> @f14(<4 x float> %acc, i32 %count) { 590; CHECK-LABEL: @f14( 591; CHECK-NEXT: entry: 592; CHECK-NEXT: [[ACC_I0:%.*]] = extractelement <4 x float> [[ACC:%.*]], i64 0 593; CHECK-NEXT: [[ACC_I1:%.*]] = extractelement <4 x float> [[ACC]], i64 1 594; CHECK-NEXT: [[ACC_I2:%.*]] = extractelement <4 x float> [[ACC]], i64 2 595; CHECK-NEXT: [[ACC_I3:%.*]] = extractelement <4 x float> [[ACC]], i64 3 596; CHECK-NEXT: br label [[LOOP:%.*]] 597; CHECK: loop: 598; CHECK-NEXT: [[THIS_ACC_I0:%.*]] = phi float [ [[ACC_I0]], [[ENTRY:%.*]] ], [ [[NEXT_ACC_I0:%.*]], [[LOOP]] ] 599; CHECK-NEXT: [[THIS_ACC_I1:%.*]] = phi float [ [[ACC_I1]], [[ENTRY]] ], [ [[NEXT_ACC_I1:%.*]], [[LOOP]] ] 600; CHECK-NEXT: [[THIS_ACC_I2:%.*]] = phi float [ [[ACC_I2]], [[ENTRY]] ], [ [[NEXT_ACC_I2:%.*]], [[LOOP]] ] 601; CHECK-NEXT: [[THIS_ACC_I3:%.*]] = phi float [ [[ACC_I3]], [[ENTRY]] ], [ [[NEXT_ACC_I3:%.*]], [[LOOP]] ] 602; CHECK-NEXT: [[THIS_COUNT:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY]] ], [ [[NEXT_COUNT:%.*]], [[LOOP]] ] 603; CHECK-NEXT: [[THIS_ACC_UPTO0:%.*]] = insertelement <4 x float> poison, float [[THIS_ACC_I0]], i64 0 604; CHECK-NEXT: [[THIS_ACC_UPTO1:%.*]] = insertelement <4 x float> [[THIS_ACC_UPTO0]], float [[THIS_ACC_I1]], i64 1 605; CHECK-NEXT: [[THIS_ACC_UPTO2:%.*]] = insertelement <4 x float> [[THIS_ACC_UPTO1]], float [[THIS_ACC_I2]], i64 2 606; CHECK-NEXT: [[THIS_ACC:%.*]] = insertelement <4 x float> [[THIS_ACC_UPTO2]], float [[THIS_ACC_I3]], i64 3 607; CHECK-NEXT: [[FOO:%.*]] = call <4 x float> @ext(<4 x float> [[THIS_ACC]]) 608; CHECK-NEXT: [[FOO_I0:%.*]] = extractelement <4 x float> [[FOO]], i64 0 609; CHECK-NEXT: [[NEXT_ACC_I0]] = fadd float [[THIS_ACC_I0]], [[FOO_I0]] 610; CHECK-NEXT: [[FOO_I1:%.*]] = extractelement <4 x float> [[FOO]], i64 1 611; CHECK-NEXT: [[NEXT_ACC_I1]] = fadd float [[THIS_ACC_I1]], [[FOO_I1]] 612; CHECK-NEXT: [[FOO_I2:%.*]] = extractelement <4 x float> [[FOO]], i64 2 613; CHECK-NEXT: [[NEXT_ACC_I2]] = fadd float [[THIS_ACC_I2]], [[FOO_I2]] 614; CHECK-NEXT: [[FOO_I3:%.*]] = extractelement <4 x float> [[FOO]], i64 3 615; CHECK-NEXT: [[NEXT_ACC_I3]] = fadd float [[THIS_ACC_I3]], [[FOO_I3]] 616; CHECK-NEXT: [[NEXT_ACC_UPTO0:%.*]] = insertelement <4 x float> poison, float [[NEXT_ACC_I0]], i64 0 617; CHECK-NEXT: [[NEXT_ACC_UPTO1:%.*]] = insertelement <4 x float> [[NEXT_ACC_UPTO0]], float [[NEXT_ACC_I1]], i64 1 618; CHECK-NEXT: [[NEXT_ACC_UPTO2:%.*]] = insertelement <4 x float> [[NEXT_ACC_UPTO1]], float [[NEXT_ACC_I2]], i64 2 619; CHECK-NEXT: [[NEXT_ACC:%.*]] = insertelement <4 x float> [[NEXT_ACC_UPTO2]], float [[NEXT_ACC_I3]], i64 3 620; CHECK-NEXT: [[NEXT_COUNT]] = sub i32 [[THIS_COUNT]], 1 621; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[NEXT_COUNT]], 0 622; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]] 623; CHECK: exit: 624; CHECK-NEXT: ret <4 x float> [[NEXT_ACC]] 625; 626entry: 627 br label %loop 628 629loop: 630 %this_acc = phi <4 x float> [ %acc, %entry ], [ %next_acc, %loop ] 631 %this_count = phi i32 [ %count, %entry ], [ %next_count, %loop ] 632 %foo = call <4 x float> @ext(<4 x float> %this_acc) 633 %next_acc = fadd <4 x float> %this_acc, %foo 634 %next_count = sub i32 %this_count, 1 635 %cmp = icmp eq i32 %next_count, 0 636 br i1 %cmp, label %loop, label %exit 637 638exit: 639 ret <4 x float> %next_acc 640} 641 642; Test unary operator scalarization. 643define void @f15(<4 x float> %init, ptr %base, i32 %count) { 644; CHECK-LABEL: @f15( 645; CHECK-NEXT: entry: 646; CHECK-NEXT: br label [[LOOP:%.*]] 647; CHECK: loop: 648; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ] 649; CHECK-NEXT: [[NEXTI]] = sub i32 [[I]], 1 650; CHECK-NEXT: [[PTR:%.*]] = getelementptr <4 x float>, ptr [[BASE:%.*]], i32 [[I]] 651; CHECK-NEXT: [[VAL_I0:%.*]] = load float, ptr [[PTR]], align 16 652; CHECK-NEXT: [[PTR_I1:%.*]] = getelementptr float, ptr [[PTR]], i32 1 653; CHECK-NEXT: [[VAL_I1:%.*]] = load float, ptr [[PTR_I1]], align 4 654; CHECK-NEXT: [[PTR_I2:%.*]] = getelementptr float, ptr [[PTR]], i32 2 655; CHECK-NEXT: [[VAL_I2:%.*]] = load float, ptr [[PTR_I2]], align 8 656; CHECK-NEXT: [[PTR_I3:%.*]] = getelementptr float, ptr [[PTR]], i32 3 657; CHECK-NEXT: [[VAL_I3:%.*]] = load float, ptr [[PTR_I3]], align 4 658; CHECK-NEXT: [[NEG_I0:%.*]] = fneg float [[VAL_I0]] 659; CHECK-NEXT: [[NEG_I1:%.*]] = fneg float [[VAL_I1]] 660; CHECK-NEXT: [[NEG_I2:%.*]] = fneg float [[VAL_I2]] 661; CHECK-NEXT: [[NEG_I3:%.*]] = fneg float [[VAL_I3]] 662; CHECK-NEXT: [[NEG_UPTO0:%.*]] = insertelement <4 x float> poison, float [[NEG_I0]], i64 0 663; CHECK-NEXT: [[NEG_UPTO1:%.*]] = insertelement <4 x float> [[NEG_UPTO0]], float [[NEG_I1]], i64 1 664; CHECK-NEXT: [[NEG_UPTO2:%.*]] = insertelement <4 x float> [[NEG_UPTO1]], float [[NEG_I2]], i64 2 665; CHECK-NEXT: [[NEG:%.*]] = insertelement <4 x float> [[NEG_UPTO2]], float [[NEG_I3]], i64 3 666; CHECK-NEXT: [[CALL:%.*]] = call <4 x float> @ext(<4 x float> [[NEG]]) 667; CHECK-NEXT: [[CALL_I0:%.*]] = extractelement <4 x float> [[CALL]], i64 0 668; CHECK-NEXT: [[CMP_I0:%.*]] = fcmp ogt float [[CALL_I0]], 1.000000e+00 669; CHECK-NEXT: [[CALL_I1:%.*]] = extractelement <4 x float> [[CALL]], i64 1 670; CHECK-NEXT: [[CMP_I1:%.*]] = fcmp ogt float [[CALL_I1]], 2.000000e+00 671; CHECK-NEXT: [[CALL_I2:%.*]] = extractelement <4 x float> [[CALL]], i64 2 672; CHECK-NEXT: [[CMP_I2:%.*]] = fcmp ogt float [[CALL_I2]], 3.000000e+00 673; CHECK-NEXT: [[CALL_I3:%.*]] = extractelement <4 x float> [[CALL]], i64 3 674; CHECK-NEXT: [[CMP_I3:%.*]] = fcmp ogt float [[CALL_I3]], 4.000000e+00 675; CHECK-NEXT: [[SEL_I0:%.*]] = select i1 [[CMP_I0]], float [[CALL_I0]], float 5.000000e+00 676; CHECK-NEXT: [[SEL_I1:%.*]] = select i1 [[CMP_I1]], float [[CALL_I1]], float 6.000000e+00 677; CHECK-NEXT: [[SEL_I2:%.*]] = select i1 [[CMP_I2]], float [[CALL_I2]], float 7.000000e+00 678; CHECK-NEXT: [[SEL_I3:%.*]] = select i1 [[CMP_I3]], float [[CALL_I3]], float 8.000000e+00 679; CHECK-NEXT: store float [[SEL_I0]], ptr [[PTR]], align 16 680; CHECK-NEXT: store float [[SEL_I1]], ptr [[PTR_I1]], align 4 681; CHECK-NEXT: store float [[SEL_I2]], ptr [[PTR_I2]], align 8 682; CHECK-NEXT: store float [[SEL_I3]], ptr [[PTR_I3]], align 4 683; CHECK-NEXT: [[TEST:%.*]] = icmp eq i32 [[NEXTI]], 0 684; CHECK-NEXT: br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]] 685; CHECK: exit: 686; CHECK-NEXT: ret void 687; 688entry: 689 br label %loop 690 691loop: 692 %i = phi i32 [ %count, %entry ], [ %nexti, %loop ] 693 %acc = phi <4 x float> [ %init, %entry ], [ %sel, %loop ] 694 %nexti = sub i32 %i, 1 695 696 %ptr = getelementptr <4 x float>, ptr %base, i32 %i 697 %val = load <4 x float> , ptr %ptr 698 %neg = fneg <4 x float> %val 699 %call = call <4 x float> @ext(<4 x float> %neg) 700 %cmp = fcmp ogt <4 x float> %call, 701 <float 1.0, float 2.0, float 3.0, float 4.0> 702 %sel = select <4 x i1> %cmp, <4 x float> %call, 703 <4 x float> <float 5.0, float 6.0, float 7.0, float 8.0> 704 store <4 x float> %sel, ptr %ptr 705 706 %test = icmp eq i32 %nexti, 0 707 br i1 %test, label %loop, label %exit 708 709exit: 710 ret void 711} 712 713; Check that IR flags are preserved. 714define <2 x i32> @f16(<2 x i32> %i, <2 x i32> %j) { 715; CHECK-LABEL: @f16( 716; CHECK-NEXT: [[I_I0:%.*]] = extractelement <2 x i32> [[I:%.*]], i64 0 717; CHECK-NEXT: [[J_I0:%.*]] = extractelement <2 x i32> [[J:%.*]], i64 0 718; CHECK-NEXT: [[RES_I0:%.*]] = add nuw nsw i32 [[I_I0]], [[J_I0]] 719; CHECK-NEXT: [[I_I1:%.*]] = extractelement <2 x i32> [[I]], i64 1 720; CHECK-NEXT: [[J_I1:%.*]] = extractelement <2 x i32> [[J]], i64 1 721; CHECK-NEXT: [[RES_I1:%.*]] = add nuw nsw i32 [[I_I1]], [[J_I1]] 722; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[RES_I0]], i64 0 723; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x i32> [[RES_UPTO0]], i32 [[RES_I1]], i64 1 724; CHECK-NEXT: ret <2 x i32> [[RES]] 725; 726 %res = add nuw nsw <2 x i32> %i, %j 727 ret <2 x i32> %res 728} 729define <2 x i32> @f17(<2 x i32> %i, <2 x i32> %j) { 730; CHECK-LABEL: @f17( 731; CHECK-NEXT: [[I_I0:%.*]] = extractelement <2 x i32> [[I:%.*]], i64 0 732; CHECK-NEXT: [[J_I0:%.*]] = extractelement <2 x i32> [[J:%.*]], i64 0 733; CHECK-NEXT: [[RES_I0:%.*]] = sdiv exact i32 [[I_I0]], [[J_I0]] 734; CHECK-NEXT: [[I_I1:%.*]] = extractelement <2 x i32> [[I]], i64 1 735; CHECK-NEXT: [[J_I1:%.*]] = extractelement <2 x i32> [[J]], i64 1 736; CHECK-NEXT: [[RES_I1:%.*]] = sdiv exact i32 [[I_I1]], [[J_I1]] 737; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[RES_I0]], i64 0 738; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x i32> [[RES_UPTO0]], i32 [[RES_I1]], i64 1 739; CHECK-NEXT: ret <2 x i32> [[RES]] 740; 741 %res = sdiv exact <2 x i32> %i, %j 742 ret <2 x i32> %res 743} 744define <2 x float> @f18(<2 x float> %x, <2 x float> %y) { 745; CHECK-LABEL: @f18( 746; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0 747; CHECK-NEXT: [[Y_I0:%.*]] = extractelement <2 x float> [[Y:%.*]], i64 0 748; CHECK-NEXT: [[RES_I0:%.*]] = fadd fast float [[X_I0]], [[Y_I0]] 749; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1 750; CHECK-NEXT: [[Y_I1:%.*]] = extractelement <2 x float> [[Y]], i64 1 751; CHECK-NEXT: [[RES_I1:%.*]] = fadd fast float [[X_I1]], [[Y_I1]] 752; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x float> poison, float [[RES_I0]], i64 0 753; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x float> [[RES_UPTO0]], float [[RES_I1]], i64 1 754; CHECK-NEXT: ret <2 x float> [[RES]] 755; 756 %res = fadd fast <2 x float> %x, %y 757 ret <2 x float> %res 758} 759define <2 x float> @f19(<2 x float> %x) { 760; CHECK-LABEL: @f19( 761; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0 762; CHECK-NEXT: [[RES_I0:%.*]] = fneg fast float [[X_I0]] 763; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1 764; CHECK-NEXT: [[RES_I1:%.*]] = fneg fast float [[X_I1]] 765; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x float> poison, float [[RES_I0]], i64 0 766; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x float> [[RES_UPTO0]], float [[RES_I1]], i64 1 767; CHECK-NEXT: ret <2 x float> [[RES]] 768; 769 %res = fneg fast <2 x float> %x 770 ret <2 x float> %res 771} 772define <2 x i1> @f20(<2 x float> %x, <2 x float> %y) { 773; CHECK-LABEL: @f20( 774; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0 775; CHECK-NEXT: [[Y_I0:%.*]] = extractelement <2 x float> [[Y:%.*]], i64 0 776; CHECK-NEXT: [[RES_I0:%.*]] = fcmp fast ogt float [[X_I0]], [[Y_I0]] 777; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1 778; CHECK-NEXT: [[Y_I1:%.*]] = extractelement <2 x float> [[Y]], i64 1 779; CHECK-NEXT: [[RES_I1:%.*]] = fcmp fast ogt float [[X_I1]], [[Y_I1]] 780; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x i1> poison, i1 [[RES_I0]], i64 0 781; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x i1> [[RES_UPTO0]], i1 [[RES_I1]], i64 1 782; CHECK-NEXT: ret <2 x i1> [[RES]] 783; 784 %res = fcmp fast ogt <2 x float> %x, %y 785 ret <2 x i1> %res 786} 787declare <2 x float> @llvm.sqrt.v2f32(<2 x float>) 788define <2 x float> @f21(<2 x float> %x) { 789; CHECK-LABEL: @f21( 790; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0 791; CHECK-NEXT: [[RES_I0:%.*]] = call fast float @llvm.sqrt.f32(float [[X_I0]]) 792; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1 793; CHECK-NEXT: [[RES_I1:%.*]] = call fast float @llvm.sqrt.f32(float [[X_I1]]) 794; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x float> poison, float [[RES_I0]], i64 0 795; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x float> [[RES_UPTO0]], float [[RES_I1]], i64 1 796; CHECK-NEXT: ret <2 x float> [[RES]] 797; 798 %res = call fast <2 x float> @llvm.sqrt.v2f32(<2 x float> %x) 799 ret <2 x float> %res 800} 801declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) 802define <2 x float> @f22(<2 x float> %x, <2 x float> %y, <2 x float> %z) { 803; CHECK-LABEL: @f22( 804; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0 805; CHECK-NEXT: [[Y_I0:%.*]] = extractelement <2 x float> [[Y:%.*]], i64 0 806; CHECK-NEXT: [[Z_I0:%.*]] = extractelement <2 x float> [[Z:%.*]], i64 0 807; CHECK-NEXT: [[RES_I0:%.*]] = call fast float @llvm.fma.f32(float [[X_I0]], float [[Y_I0]], float [[Z_I0]]) 808; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1 809; CHECK-NEXT: [[Y_I1:%.*]] = extractelement <2 x float> [[Y]], i64 1 810; CHECK-NEXT: [[Z_I1:%.*]] = extractelement <2 x float> [[Z]], i64 1 811; CHECK-NEXT: [[RES_I1:%.*]] = call fast float @llvm.fma.f32(float [[X_I1]], float [[Y_I1]], float [[Z_I1]]) 812; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x float> poison, float [[RES_I0]], i64 0 813; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x float> [[RES_UPTO0]], float [[RES_I1]], i64 1 814; CHECK-NEXT: ret <2 x float> [[RES]] 815; 816 %res = call fast <2 x float> @llvm.fma.v2f32(<2 x float> %x, <2 x float> %y, <2 x float> %z) 817 ret <2 x float> %res 818} 819 820; See https://reviews.llvm.org/D83101#2133062 821define <2 x i32> @f23_crash(<2 x i32> %srcvec, i32 %v1) { 822; CHECK-LABEL: @f23_crash( 823; CHECK-NEXT: [[SRCVEC_I0:%.*]] = extractelement <2 x i32> [[SRCVEC:%.*]], i64 0 824; CHECK-NEXT: [[T1_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[SRCVEC_I0]], i64 0 825; CHECK-NEXT: [[T1:%.*]] = insertelement <2 x i32> [[T1_UPTO0]], i32 [[V1:%.*]], i64 1 826; CHECK-NEXT: ret <2 x i32> [[T1]] 827; 828 %v0 = extractelement <2 x i32> %srcvec, i32 0 829 %t0 = insertelement <2 x i32> undef, i32 %v0, i32 0 830 %t1 = insertelement <2 x i32> %t0, i32 %v1, i32 1 831 ret <2 x i32> %t1 832} 833 834define <2 x i32> @f24(<2 x i32> %src) { 835; CHECK-LABEL: @f24( 836; CHECK-NEXT: [[SRC_I0:%.*]] = extractelement <2 x i32> [[SRC:%.*]], i64 0 837; CHECK-NEXT: [[FRZ_I0:%.*]] = freeze i32 [[SRC_I0]] 838; CHECK-NEXT: [[SRC_I1:%.*]] = extractelement <2 x i32> [[SRC]], i64 1 839; CHECK-NEXT: [[FRZ_I1:%.*]] = freeze i32 [[SRC_I1]] 840; CHECK-NEXT: [[FRZ_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[FRZ_I0]], i64 0 841; CHECK-NEXT: [[FRZ:%.*]] = insertelement <2 x i32> [[FRZ_UPTO0]], i32 [[FRZ_I1]], i64 1 842; CHECK-NEXT: ret <2 x i32> [[FRZ]] 843; 844 %frz = freeze <2 x i32> %src 845 ret <2 x i32> %frz 846} 847 848define <2 x float> @f25(<2 x float> %src) { 849; CHECK-LABEL: @f25( 850; CHECK-NEXT: [[SRC_I0:%.*]] = extractelement <2 x float> [[SRC:%.*]], i64 0 851; CHECK-NEXT: [[ADD_I0:%.*]] = fadd float [[SRC_I0]], [[SRC_I0]] 852; CHECK-NEXT: [[SRC_I1:%.*]] = extractelement <2 x float> [[SRC]], i64 1 853; CHECK-NEXT: [[ADD_I1:%.*]] = fadd float [[SRC_I1]], [[SRC_I1]] 854; CHECK-NEXT: [[FRZ_I0:%.*]] = freeze float [[ADD_I0]] 855; CHECK-NEXT: [[FRZ_I1:%.*]] = freeze float [[ADD_I1]] 856; CHECK-NEXT: [[MUL_I0:%.*]] = fmul float [[FRZ_I0]], [[FRZ_I0]] 857; CHECK-NEXT: [[MUL_I1:%.*]] = fmul float [[FRZ_I1]], [[FRZ_I1]] 858; CHECK-NEXT: [[MUL_UPTO0:%.*]] = insertelement <2 x float> poison, float [[MUL_I0]], i64 0 859; CHECK-NEXT: [[MUL:%.*]] = insertelement <2 x float> [[MUL_UPTO0]], float [[MUL_I1]], i64 1 860; CHECK-NEXT: ret <2 x float> [[MUL]] 861; 862 %add = fadd <2 x float> %src, %src 863 %frz = freeze <2 x float> %add 864 %mul = fmul <2 x float> %frz, %frz 865 ret <2 x float> %mul 866} 867 868define <2 x i8> @test_copy_trunc_flags(<2 x i32> %src) { 869; CHECK-LABEL: @test_copy_trunc_flags( 870; CHECK-NEXT: [[SRC_I0:%.*]] = extractelement <2 x i32> [[SRC:%.*]], i64 0 871; CHECK-NEXT: [[TRUNC_I0:%.*]] = trunc nuw nsw i32 [[SRC_I0]] to i8 872; CHECK-NEXT: [[SRC_I1:%.*]] = extractelement <2 x i32> [[SRC]], i64 1 873; CHECK-NEXT: [[TRUNC_I1:%.*]] = trunc nuw nsw i32 [[SRC_I1]] to i8 874; CHECK-NEXT: [[TRUNC_UPTO0:%.*]] = insertelement <2 x i8> poison, i8 [[TRUNC_I0]], i64 0 875; CHECK-NEXT: [[TRUNC:%.*]] = insertelement <2 x i8> [[TRUNC_UPTO0]], i8 [[TRUNC_I1]], i64 1 876; CHECK-NEXT: ret <2 x i8> [[TRUNC]] 877; 878 %trunc = trunc nuw nsw <2 x i32> %src to <2 x i8> 879 ret <2 x i8> %trunc 880} 881 882!0 = !{ !"root" } 883!1 = !{ !"set1", !0 } 884!2 = !{ !"set2", !0 } 885!3 = !{ !3, !{!"llvm.loop.parallel_accesses", !13} } 886!4 = !{ float 4.0 } 887!5 = !{ i64 0, i64 8, null } 888!13 = distinct !{} 889