1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx | FileCheck %s 3 4define void @julia_2xdouble(ptr sret([2 x double]), ptr, ptr, ptr) { 5; CHECK-LABEL: @julia_2xdouble( 6; CHECK-NEXT: top: 7; CHECK-NEXT: [[TMP5:%.*]] = load <2 x double>, ptr [[TMP2:%.*]], align 4 8; CHECK-NEXT: [[TMP7:%.*]] = load <2 x double>, ptr [[TMP3:%.*]], align 4 9; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[TMP5]], [[TMP7]] 10; CHECK-NEXT: [[TMP10:%.*]] = load <2 x double>, ptr [[TMP1:%.*]], align 4 11; CHECK-NEXT: [[TMP11:%.*]] = fadd <2 x double> [[TMP8]], [[TMP10]] 12; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[TMP11]], i32 0 13; CHECK-NEXT: [[I0:%.*]] = insertvalue [2 x double] undef, double [[TMP12]], 0 14; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x double> [[TMP11]], i32 1 15; CHECK-NEXT: [[I1:%.*]] = insertvalue [2 x double] [[I0]], double [[TMP13]], 1 16; CHECK-NEXT: store [2 x double] [[I1]], ptr [[TMP0:%.*]], align 4 17; CHECK-NEXT: ret void 18; 19top: 20 %x0 = load double, ptr %2, align 4 21 %y0 = load double, ptr %3, align 4 22 %m0 = fmul double %x0, %y0 23 %px1 = getelementptr inbounds [2 x double], ptr %2, i64 0, i64 1 24 %x1 = load double, ptr %px1, align 4 25 %py1 = getelementptr inbounds [2 x double], ptr %3, i64 0, i64 1 26 %y1 = load double, ptr %py1, align 4 27 %m1 = fmul double %x1, %y1 28 %z0 = load double, ptr %1, align 4 29 %a0 = fadd double %m0, %z0 30 %i0 = insertvalue [2 x double] undef, double %a0, 0 31 %pz1 = getelementptr inbounds [2 x double], ptr %1, i64 0, i64 1 32 %z1 = load double, ptr %pz1, align 4 33 %a1 = fadd double %m1, %z1 34 %i1 = insertvalue [2 x double] %i0, double %a1, 1 35 store [2 x double] %i1, ptr %0, align 4 36 ret void 37} 38 39define void @julia_4xfloat(ptr sret([4 x float]), ptr, ptr, ptr) { 40; CHECK-LABEL: @julia_4xfloat( 41; CHECK-NEXT: top: 42; CHECK-NEXT: [[TMP5:%.*]] = load <4 x float>, ptr [[TMP2:%.*]], align 4 43; CHECK-NEXT: [[TMP7:%.*]] = load <4 x float>, ptr [[TMP3:%.*]], align 4 44; CHECK-NEXT: [[TMP8:%.*]] = fmul <4 x float> [[TMP5]], [[TMP7]] 45; CHECK-NEXT: [[TMP10:%.*]] = load <4 x float>, ptr [[TMP1:%.*]], align 4 46; CHECK-NEXT: [[TMP11:%.*]] = fadd <4 x float> [[TMP8]], [[TMP10]] 47; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x float> [[TMP11]], i32 0 48; CHECK-NEXT: [[I0:%.*]] = insertvalue [4 x float] undef, float [[TMP12]], 0 49; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[TMP11]], i32 1 50; CHECK-NEXT: [[I1:%.*]] = insertvalue [4 x float] [[I0]], float [[TMP13]], 1 51; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x float> [[TMP11]], i32 2 52; CHECK-NEXT: [[I2:%.*]] = insertvalue [4 x float] [[I1]], float [[TMP14]], 2 53; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x float> [[TMP11]], i32 3 54; CHECK-NEXT: [[I3:%.*]] = insertvalue [4 x float] [[I2]], float [[TMP15]], 3 55; CHECK-NEXT: store [4 x float] [[I3]], ptr [[TMP0:%.*]], align 4 56; CHECK-NEXT: ret void 57; 58top: 59 %x0 = load float, ptr %2, align 4 60 %y0 = load float, ptr %3, align 4 61 %m0 = fmul float %x0, %y0 62 %px1 = getelementptr inbounds [4 x float], ptr %2, i64 0, i64 1 63 %x1 = load float, ptr %px1, align 4 64 %py1 = getelementptr inbounds [4 x float], ptr %3, i64 0, i64 1 65 %y1 = load float, ptr %py1, align 4 66 %m1 = fmul float %x1, %y1 67 %px2 = getelementptr inbounds [4 x float], ptr %2, i64 0, i64 2 68 %x2 = load float, ptr %px2, align 4 69 %py2 = getelementptr inbounds [4 x float], ptr %3, i64 0, i64 2 70 %y2 = load float, ptr %py2, align 4 71 %m2 = fmul float %x2, %y2 72 %px3 = getelementptr inbounds [4 x float], ptr %2, i64 0, i64 3 73 %x3 = load float, ptr %px3, align 4 74 %py3 = getelementptr inbounds [4 x float], ptr %3, i64 0, i64 3 75 %y3 = load float, ptr %py3, align 4 76 %m3 = fmul float %x3, %y3 77 %z0 = load float, ptr %1, align 4 78 %a0 = fadd float %m0, %z0 79 %i0 = insertvalue [4 x float] undef, float %a0, 0 80 %pz1 = getelementptr inbounds [4 x float], ptr %1, i64 0, i64 1 81 %z1 = load float, ptr %pz1, align 4 82 %a1 = fadd float %m1, %z1 83 %i1 = insertvalue [4 x float] %i0, float %a1, 1 84 %pz2 = getelementptr inbounds [4 x float], ptr %1, i64 0, i64 2 85 %z2 = load float, ptr %pz2, align 4 86 %a2 = fadd float %m2, %z2 87 %i2 = insertvalue [4 x float] %i1, float %a2, 2 88 %pz3 = getelementptr inbounds [4 x float], ptr %1, i64 0, i64 3 89 %z3 = load float, ptr %pz3, align 4 90 %a3 = fadd float %m3, %z3 91 %i3 = insertvalue [4 x float] %i2, float %a3, 3 92 store [4 x float] %i3, ptr %0, align 4 93 ret void 94} 95 96define void @julia_load_array_of_float(ptr %a, ptr %b, ptr %c) { 97; CHECK-LABEL: @julia_load_array_of_float( 98; CHECK-NEXT: top: 99; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[A:%.*]], align 4 100; CHECK-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr [[B:%.*]], align 4 101; CHECK-NEXT: [[TMP4:%.*]] = fsub <4 x float> [[TMP1]], [[TMP3]] 102; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP4]], i32 0 103; CHECK-NEXT: [[C_ARR0:%.*]] = insertvalue [4 x float] undef, float [[TMP5]], 0 104; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP4]], i32 1 105; CHECK-NEXT: [[C_ARR1:%.*]] = insertvalue [4 x float] [[C_ARR0]], float [[TMP6]], 1 106; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP4]], i32 2 107; CHECK-NEXT: [[C_ARR2:%.*]] = insertvalue [4 x float] [[C_ARR1]], float [[TMP7]], 2 108; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[TMP4]], i32 3 109; CHECK-NEXT: [[C_ARR3:%.*]] = insertvalue [4 x float] [[C_ARR2]], float [[TMP8]], 3 110; CHECK-NEXT: store [4 x float] [[C_ARR3]], ptr [[C:%.*]], align 4 111; CHECK-NEXT: ret void 112; 113top: 114 %a_arr = load [4 x float], ptr %a, align 4 115 %a0 = extractvalue [4 x float] %a_arr, 0 116 %a2 = extractvalue [4 x float] %a_arr, 2 117 %a1 = extractvalue [4 x float] %a_arr, 1 118 %b_arr = load [4 x float], ptr %b, align 4 119 %b0 = extractvalue [4 x float] %b_arr, 0 120 %b2 = extractvalue [4 x float] %b_arr, 2 121 %b1 = extractvalue [4 x float] %b_arr, 1 122 %a3 = extractvalue [4 x float] %a_arr, 3 123 %c1 = fsub float %a1, %b1 124 %b3 = extractvalue [4 x float] %b_arr, 3 125 %c0 = fsub float %a0, %b0 126 %c2 = fsub float %a2, %b2 127 %c_arr0 = insertvalue [4 x float] undef, float %c0, 0 128 %c_arr1 = insertvalue [4 x float] %c_arr0, float %c1, 1 129 %c3 = fsub float %a3, %b3 130 %c_arr2 = insertvalue [4 x float] %c_arr1, float %c2, 2 131 %c_arr3 = insertvalue [4 x float] %c_arr2, float %c3, 3 132 store [4 x float] %c_arr3, ptr %c, align 4 133 ret void 134} 135 136define void @julia_load_array_of_i32(ptr %a, ptr %b, ptr %c) { 137; CHECK-LABEL: @julia_load_array_of_i32( 138; CHECK-NEXT: top: 139; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A:%.*]], align 4 140; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[B:%.*]], align 4 141; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP1]], [[TMP3]] 142; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP4]], i32 0 143; CHECK-NEXT: [[C_ARR0:%.*]] = insertvalue [4 x i32] undef, i32 [[TMP5]], 0 144; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP4]], i32 1 145; CHECK-NEXT: [[C_ARR1:%.*]] = insertvalue [4 x i32] [[C_ARR0]], i32 [[TMP6]], 1 146; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP4]], i32 2 147; CHECK-NEXT: [[C_ARR2:%.*]] = insertvalue [4 x i32] [[C_ARR1]], i32 [[TMP7]], 2 148; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[TMP4]], i32 3 149; CHECK-NEXT: [[C_ARR3:%.*]] = insertvalue [4 x i32] [[C_ARR2]], i32 [[TMP8]], 3 150; CHECK-NEXT: store [4 x i32] [[C_ARR3]], ptr [[C:%.*]], align 4 151; CHECK-NEXT: ret void 152; 153top: 154 %a_arr = load [4 x i32], ptr %a, align 4 155 %a0 = extractvalue [4 x i32] %a_arr, 0 156 %a2 = extractvalue [4 x i32] %a_arr, 2 157 %a1 = extractvalue [4 x i32] %a_arr, 1 158 %b_arr = load [4 x i32], ptr %b, align 4 159 %b0 = extractvalue [4 x i32] %b_arr, 0 160 %b2 = extractvalue [4 x i32] %b_arr, 2 161 %b1 = extractvalue [4 x i32] %b_arr, 1 162 %a3 = extractvalue [4 x i32] %a_arr, 3 163 %c1 = sub i32 %a1, %b1 164 %b3 = extractvalue [4 x i32] %b_arr, 3 165 %c0 = sub i32 %a0, %b0 166 %c2 = sub i32 %a2, %b2 167 %c_arr0 = insertvalue [4 x i32] undef, i32 %c0, 0 168 %c_arr1 = insertvalue [4 x i32] %c_arr0, i32 %c1, 1 169 %c3 = sub i32 %a3, %b3 170 %c_arr2 = insertvalue [4 x i32] %c_arr1, i32 %c2, 2 171 %c_arr3 = insertvalue [4 x i32] %c_arr2, i32 %c3, 3 172 store [4 x i32] %c_arr3, ptr %c, align 4 173 ret void 174} 175 176; Almost identical to previous test, but for type that should NOT be vectorized. 177; 178define void @julia_load_array_of_i16(ptr %a, ptr %b, ptr %c) { 179; CHECK-LABEL: @julia_load_array_of_i16( 180; CHECK-NEXT: top: 181; CHECK-NEXT: [[A_ARR:%.*]] = load [4 x i16], ptr [[A:%.*]], align 4 182; CHECK-NEXT: [[A0:%.*]] = extractvalue [4 x i16] [[A_ARR]], 0 183; CHECK-NEXT: [[A2:%.*]] = extractvalue [4 x i16] [[A_ARR]], 2 184; CHECK-NEXT: [[A1:%.*]] = extractvalue [4 x i16] [[A_ARR]], 1 185; CHECK-NEXT: [[B_ARR:%.*]] = load [4 x i16], ptr [[B:%.*]], align 4 186; CHECK-NEXT: [[B0:%.*]] = extractvalue [4 x i16] [[B_ARR]], 0 187; CHECK-NEXT: [[B2:%.*]] = extractvalue [4 x i16] [[B_ARR]], 2 188; CHECK-NEXT: [[B1:%.*]] = extractvalue [4 x i16] [[B_ARR]], 1 189; CHECK-NEXT: [[A3:%.*]] = extractvalue [4 x i16] [[A_ARR]], 3 190; CHECK-NEXT: [[C1:%.*]] = sub i16 [[A1]], [[B1]] 191; CHECK-NEXT: [[B3:%.*]] = extractvalue [4 x i16] [[B_ARR]], 3 192; CHECK-NEXT: [[C0:%.*]] = sub i16 [[A0]], [[B0]] 193; CHECK-NEXT: [[C2:%.*]] = sub i16 [[A2]], [[B2]] 194; CHECK-NEXT: [[C_ARR0:%.*]] = insertvalue [4 x i16] undef, i16 [[C0]], 0 195; CHECK-NEXT: [[C_ARR1:%.*]] = insertvalue [4 x i16] [[C_ARR0]], i16 [[C1]], 1 196; CHECK-NEXT: [[C3:%.*]] = sub i16 [[A3]], [[B3]] 197; CHECK-NEXT: [[C_ARR2:%.*]] = insertvalue [4 x i16] [[C_ARR1]], i16 [[C2]], 2 198; CHECK-NEXT: [[C_ARR3:%.*]] = insertvalue [4 x i16] [[C_ARR2]], i16 [[C3]], 3 199; CHECK-NEXT: store [4 x i16] [[C_ARR3]], ptr [[C:%.*]], align 4 200; CHECK-NEXT: ret void 201; 202top: 203 %a_arr = load [4 x i16], ptr %a, align 4 204 %a0 = extractvalue [4 x i16] %a_arr, 0 205 %a2 = extractvalue [4 x i16] %a_arr, 2 206 %a1 = extractvalue [4 x i16] %a_arr, 1 207 %b_arr = load [4 x i16], ptr %b, align 4 208 %b0 = extractvalue [4 x i16] %b_arr, 0 209 %b2 = extractvalue [4 x i16] %b_arr, 2 210 %b1 = extractvalue [4 x i16] %b_arr, 1 211 %a3 = extractvalue [4 x i16] %a_arr, 3 212 %c1 = sub i16 %a1, %b1 213 %b3 = extractvalue [4 x i16] %b_arr, 3 214 %c0 = sub i16 %a0, %b0 215 %c2 = sub i16 %a2, %b2 216 %c_arr0 = insertvalue [4 x i16] undef, i16 %c0, 0 217 %c_arr1 = insertvalue [4 x i16] %c_arr0, i16 %c1, 1 218 %c3 = sub i16 %a3, %b3 219 %c_arr2 = insertvalue [4 x i16] %c_arr1, i16 %c2, 2 220 %c_arr3 = insertvalue [4 x i16] %c_arr2, i16 %c3, 3 221 store [4 x i16] %c_arr3, ptr %c, align 4 222 ret void 223} 224 225%pseudovec = type { float, float, float, float } 226 227define void @julia_load_struct_of_float(ptr %a, ptr %b, ptr %c) { 228; CHECK-LABEL: @julia_load_struct_of_float( 229; CHECK-NEXT: top: 230; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[A:%.*]], align 4 231; CHECK-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr [[B:%.*]], align 4 232; CHECK-NEXT: [[TMP4:%.*]] = fsub <4 x float> [[TMP1]], [[TMP3]] 233; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP4]], i32 0 234; CHECK-NEXT: [[C_STRUCT0:%.*]] = insertvalue [[PSEUDOVEC:%.*]] undef, float [[TMP5]], 0 235; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP4]], i32 1 236; CHECK-NEXT: [[C_STRUCT1:%.*]] = insertvalue [[PSEUDOVEC]] [[C_STRUCT0]], float [[TMP6]], 1 237; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP4]], i32 2 238; CHECK-NEXT: [[C_STRUCT2:%.*]] = insertvalue [[PSEUDOVEC]] [[C_STRUCT1]], float [[TMP7]], 2 239; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[TMP4]], i32 3 240; CHECK-NEXT: [[C_STRUCT3:%.*]] = insertvalue [[PSEUDOVEC]] [[C_STRUCT2]], float [[TMP8]], 3 241; CHECK-NEXT: store [[PSEUDOVEC]] [[C_STRUCT3]], ptr [[C:%.*]], align 4 242; CHECK-NEXT: ret void 243; 244top: 245 %a_struct = load %pseudovec, ptr %a, align 4 246 %a0 = extractvalue %pseudovec %a_struct, 0 247 %a1 = extractvalue %pseudovec %a_struct, 1 248 %b_struct = load %pseudovec, ptr %b, align 4 249 %a2 = extractvalue %pseudovec %a_struct, 2 250 %b0 = extractvalue %pseudovec %b_struct, 0 251 %a3 = extractvalue %pseudovec %a_struct, 3 252 %c0 = fsub float %a0, %b0 253 %b1 = extractvalue %pseudovec %b_struct, 1 254 %b2 = extractvalue %pseudovec %b_struct, 2 255 %c1 = fsub float %a1, %b1 256 %c_struct0 = insertvalue %pseudovec undef, float %c0, 0 257 %b3 = extractvalue %pseudovec %b_struct, 3 258 %c3 = fsub float %a3, %b3 259 %c_struct1 = insertvalue %pseudovec %c_struct0, float %c1, 1 260 %c2 = fsub float %a2, %b2 261 %c_struct2 = insertvalue %pseudovec %c_struct1, float %c2, 2 262 %c_struct3 = insertvalue %pseudovec %c_struct2, float %c3, 3 263 store %pseudovec %c_struct3, ptr %c, align 4 264 ret void 265} 266