1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 2; RUN: opt -passes=gvn -S < %s | FileCheck %s 3 4 5define <4 x float> @ConvertVectors_ByRef(ptr %loc) { 6; CHECK-LABEL: define <4 x float> @ConvertVectors_ByRef 7; CHECK-SAME: (ptr [[LOC:%.*]]) { 8; CHECK-NEXT: [[LOAD_VEC:%.*]] = load <4 x float>, ptr [[LOC]], align 16 9; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[LOAD_VEC]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison> 10; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds [4 x float], ptr [[LOC]], i64 0, i64 1 11; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[LOAD_VEC]] to i128 12; CHECK-NEXT: [[TMP2:%.*]] = lshr i128 [[TMP1]], 32 13; CHECK-NEXT: [[TMP3:%.*]] = trunc i128 [[TMP2]] to i32 14; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP3]] to float 15; CHECK-NEXT: [[INS1:%.*]] = insertelement <4 x float> [[SHUF]], float [[TMP4]], i64 1 16; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds [4 x float], ptr [[LOC]], i64 0, i64 2 17; CHECK-NEXT: [[TMP5:%.*]] = lshr i128 [[TMP1]], 64 18; CHECK-NEXT: [[TMP6:%.*]] = trunc i128 [[TMP5]] to i32 19; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32 [[TMP6]] to float 20; CHECK-NEXT: [[INS2:%.*]] = insertelement <4 x float> [[INS1]], float [[TMP7]], i64 2 21; CHECK-NEXT: [[INS3:%.*]] = insertelement <4 x float> [[INS2]], float [[TMP7]], i64 3 22; CHECK-NEXT: ret <4 x float> [[INS3]] 23; 24 %load_vec = load <4 x float>, ptr %loc, align 16 25 %shuf = shufflevector <4 x float> %load_vec, <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison> 26 %gep1 = getelementptr inbounds [4 x float], ptr %loc, i64 0, i64 1 27 %load1 = load float, ptr %gep1, align 4 28 %ins1 = insertelement <4 x float> %shuf, float %load1, i64 1 29 %gep2 = getelementptr inbounds [4 x float], ptr %loc, i64 0, i64 2 30 %load2 = load float, ptr %gep2, align 8 31 %ins2 = insertelement <4 x float> %ins1, float %load2, i64 2 32 %ins3 = insertelement <4 x float> %ins2, float %load2, i64 3 33 ret <4 x float> %ins3 34} 35 36define i64 @store_element_smaller_than_load(ptr %loc, <4 x i32> %v) { 37; CHECK-LABEL: define i64 @store_element_smaller_than_load 38; CHECK-SAME: (ptr [[LOC:%.*]], <4 x i32> [[V:%.*]]) { 39; CHECK-NEXT: entry: 40; CHECK-NEXT: store <4 x i32> [[V]], ptr [[LOC]], align 16 41; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [4 x i32], ptr [[LOC]], i64 0, i64 2 42; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V]] to i128 43; CHECK-NEXT: [[TMP1:%.*]] = lshr i128 [[TMP0]], 64 44; CHECK-NEXT: [[TMP2:%.*]] = trunc i128 [[TMP1]] to i64 45; CHECK-NEXT: ret i64 [[TMP2]] 46; 47 entry: 48 store <4 x i32> %v, ptr %loc 49 %gep = getelementptr inbounds [4 x i32], ptr %loc, i64 0, i64 2 50 %ref = load i64, ptr %gep 51 ret i64 %ref 52} 53 54define i64 @call_before_load(ptr %loc, <4 x i32> %v) { 55; CHECK-LABEL: define i64 @call_before_load 56; CHECK-SAME: (ptr [[LOC:%.*]], <4 x i32> [[V:%.*]]) { 57; CHECK-NEXT: entry: 58; CHECK-NEXT: store <4 x i32> [[V]], ptr [[LOC]], align 16 59; CHECK-NEXT: call void @f(<4 x i32> [[V]]) 60; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [4 x i32], ptr [[LOC]], i64 0, i64 2 61; CHECK-NEXT: [[REF:%.*]] = load i64, ptr [[GEP]], align 4 62; CHECK-NEXT: ret i64 [[REF]] 63; 64 entry: 65 store <4 x i32> %v, ptr %loc 66 call void @f(<4 x i32> %v) 67 %gep = getelementptr inbounds [4 x i32], ptr %loc, i64 0, i64 2 68 %ref = load i64, ptr %gep 69 ret i64 %ref 70} 71 72define i64 @call_before_load_memory_none(ptr %loc, <4 x i32> %v) { 73; CHECK-LABEL: define i64 @call_before_load_memory_none 74; CHECK-SAME: (ptr [[LOC:%.*]], <4 x i32> [[V:%.*]]) { 75; CHECK-NEXT: entry: 76; CHECK-NEXT: store <4 x i32> [[V]], ptr [[LOC]], align 16 77; CHECK-NEXT: call void @f_no_mem(<4 x i32> [[V]]) 78; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [4 x i32], ptr [[LOC]], i64 0, i64 2 79; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V]] to i128 80; CHECK-NEXT: [[TMP1:%.*]] = lshr i128 [[TMP0]], 64 81; CHECK-NEXT: [[TMP2:%.*]] = trunc i128 [[TMP1]] to i64 82; CHECK-NEXT: ret i64 [[TMP2]] 83; 84 entry: 85 store <4 x i32> %v, ptr %loc 86 call void @f_no_mem(<4 x i32> %v) 87 %gep = getelementptr inbounds [4 x i32], ptr %loc, i64 0, i64 2 88 %ref = load i64, ptr %gep 89 ret i64 %ref 90} 91 92define i64 @call_after_load(ptr %loc, <4 x i32> %v) { 93; CHECK-LABEL: define i64 @call_after_load 94; CHECK-SAME: (ptr [[LOC:%.*]], <4 x i32> [[V:%.*]]) { 95; CHECK-NEXT: entry: 96; CHECK-NEXT: store <4 x i32> [[V]], ptr [[LOC]], align 16 97; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [4 x i32], ptr [[LOC]], i64 0, i64 2 98; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V]] to i128 99; CHECK-NEXT: [[TMP1:%.*]] = lshr i128 [[TMP0]], 64 100; CHECK-NEXT: [[TMP2:%.*]] = trunc i128 [[TMP1]] to i64 101; CHECK-NEXT: call void @f(<4 x i32> [[V]]) 102; CHECK-NEXT: ret i64 [[TMP2]] 103; 104 entry: 105 store <4 x i32> %v, ptr %loc 106 %gep = getelementptr inbounds [4 x i32], ptr %loc, i64 0, i64 2 107 %ref = load i64, ptr %gep 108 call void @f(<4 x i32> %v) 109 ret i64 %ref 110} 111 112define double @store_element_smaller_than_load_float(ptr %loc, <4 x float> %v) { 113; CHECK-LABEL: define double @store_element_smaller_than_load_float 114; CHECK-SAME: (ptr [[LOC:%.*]], <4 x float> [[V:%.*]]) { 115; CHECK-NEXT: store <4 x float> [[V]], ptr [[LOC]], align 16 116; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [4 x float], ptr [[LOC]], i64 0, i64 2 117; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[V]] to i128 118; CHECK-NEXT: [[TMP2:%.*]] = lshr i128 [[TMP1]], 64 119; CHECK-NEXT: [[TMP3:%.*]] = trunc i128 [[TMP2]] to i64 120; CHECK-NEXT: [[TMP4:%.*]] = bitcast i64 [[TMP3]] to double 121; CHECK-NEXT: ret double [[TMP4]] 122; 123 store <4 x float> %v, ptr %loc 124 %gep = getelementptr inbounds [4 x float], ptr %loc, i64 0, i64 2 125 %ref = load double, ptr %gep 126 ret double %ref 127} 128 129define i64 @load_as_scalar(ptr %loc, <2 x i32> %v) { 130; CHECK-LABEL: define i64 @load_as_scalar 131; CHECK-SAME: (ptr [[LOC:%.*]], <2 x i32> [[V:%.*]]) { 132; CHECK-NEXT: store <2 x i32> [[V]], ptr [[LOC]], align 8 133; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[V]] to i64 134; CHECK-NEXT: ret i64 [[TMP1]] 135; 136 store <2 x i32> %v, ptr %loc 137 %gep = getelementptr inbounds [4 x float], ptr %loc, i64 0 138 %ref = load i64, ptr %gep 139 ret i64 %ref 140} 141 142 143define i9 @load_as_scalar_larger(ptr %loc, <4 x i6> %v) { 144; CHECK-LABEL: define i9 @load_as_scalar_larger 145; CHECK-SAME: (ptr [[LOC:%.*]], <4 x i6> [[V:%.*]]) { 146; CHECK-NEXT: store <4 x i6> [[V]], ptr [[LOC]], align 4 147; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i6> [[V]] to i24 148; CHECK-NEXT: [[TMP2:%.*]] = trunc i24 [[TMP1]] to i16 149; CHECK-NEXT: [[TMP3:%.*]] = trunc i16 [[TMP2]] to i9 150; CHECK-NEXT: ret i9 [[TMP3]] 151; 152 store <4 x i6> %v, ptr %loc 153 %gep = getelementptr i9, ptr %loc, i64 0 154 %ref = load i9, ptr %gep 155 ret i9 %ref 156} 157 158 159define i4 @load_as_scalar_smaller(ptr %loc, <4 x i6> %v) { 160; CHECK-LABEL: define i4 @load_as_scalar_smaller 161; CHECK-SAME: (ptr [[LOC:%.*]], <4 x i6> [[V:%.*]]) { 162; CHECK-NEXT: store <4 x i6> [[V]], ptr [[LOC]], align 4 163; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i6> [[V]] to i24 164; CHECK-NEXT: [[TMP2:%.*]] = trunc i24 [[TMP1]] to i8 165; CHECK-NEXT: [[TMP3:%.*]] = trunc i8 [[TMP2]] to i4 166; CHECK-NEXT: ret i4 [[TMP3]] 167; 168 store <4 x i6> %v, ptr %loc 169 %gep = getelementptr i4, ptr %loc, i64 0 170 %ref = load i4, ptr %gep 171 ret i4 %ref 172} 173 174 175define i32 @load_vec_same_type(ptr %loc, <4 x i32> %v) { 176; CHECK-LABEL: define i32 @load_vec_same_type 177; CHECK-SAME: (ptr [[LOC:%.*]], <4 x i32> [[V:%.*]]) { 178; CHECK-NEXT: entry: 179; CHECK-NEXT: store <4 x i32> [[V]], ptr [[LOC]], align 16 180; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[V]], i32 1 181; CHECK-NEXT: ret i32 [[R]] 182; 183 entry: 184 store <4 x i32> %v, ptr %loc 185 %gep = getelementptr inbounds [4 x i32], ptr %loc, i64 0 186 %ref = load <4 x i32>, ptr %gep 187 %r = extractelement <4 x i32> %ref, i32 1 188 ret i32 %r 189} 190 191define i64 @load_vec_same_size_different_type1(ptr %loc, <4 x i32> %v) { 192; CHECK-LABEL: define i64 @load_vec_same_size_different_type1 193; CHECK-SAME: (ptr [[LOC:%.*]], <4 x i32> [[V:%.*]]) { 194; CHECK-NEXT: entry: 195; CHECK-NEXT: store <4 x i32> [[V]], ptr [[LOC]], align 16 196; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V]] to i128 197; CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[TMP0]] to <2 x i64> 198; CHECK-NEXT: [[R:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 199; CHECK-NEXT: ret i64 [[R]] 200; 201 entry: 202 store <4 x i32> %v, ptr %loc 203 %gep = getelementptr inbounds [4 x i32], ptr %loc, i64 0 204 %ref = load <2 x i64>, ptr %gep 205 %r = extractelement <2 x i64> %ref, i32 1 206 ret i64 %r 207} 208 209define double @load_vec_same_size_different_type2(ptr %loc, <4 x i32> %v) { 210; CHECK-LABEL: define double @load_vec_same_size_different_type2 211; CHECK-SAME: (ptr [[LOC:%.*]], <4 x i32> [[V:%.*]]) { 212; CHECK-NEXT: entry: 213; CHECK-NEXT: store <4 x i32> [[V]], ptr [[LOC]], align 16 214; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V]] to i128 215; CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[TMP0]] to <2 x double> 216; CHECK-NEXT: [[R:%.*]] = extractelement <2 x double> [[TMP1]], i32 1 217; CHECK-NEXT: ret double [[R]] 218; 219 entry: 220 store <4 x i32> %v, ptr %loc 221 %gep = getelementptr inbounds [4 x i32], ptr %loc, i64 0 222 %ref = load <2 x double>, ptr %gep 223 %r = extractelement <2 x double> %ref, i32 1 224 ret double %r 225} 226 227define i32 @load_subvector_same_type(ptr %loc, <4 x i32> %v) { 228; CHECK-LABEL: define i32 @load_subvector_same_type 229; CHECK-SAME: (ptr [[LOC:%.*]], <4 x i32> [[V:%.*]]) { 230; CHECK-NEXT: entry: 231; CHECK-NEXT: store <4 x i32> [[V]], ptr [[LOC]], align 16 232; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V]] to i128 233; CHECK-NEXT: [[TMP1:%.*]] = trunc i128 [[TMP0]] to i64 234; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[TMP1]] to <2 x i32> 235; CHECK-NEXT: [[R:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1 236; CHECK-NEXT: ret i32 [[R]] 237; 238 entry: 239 store <4 x i32> %v, ptr %loc 240 %gep = getelementptr inbounds [4 x i32], ptr %loc, i64 0 241 %ref = load <2 x i32>, ptr %gep 242 %r = extractelement <2 x i32> %ref, i32 1 243 ret i32 %r 244} 245 246define i64 @load_subvector_different_type(ptr %loc, <8 x i32> %v) { 247; CHECK-LABEL: define i64 @load_subvector_different_type 248; CHECK-SAME: (ptr [[LOC:%.*]], <8 x i32> [[V:%.*]]) { 249; CHECK-NEXT: entry: 250; CHECK-NEXT: store <8 x i32> [[V]], ptr [[LOC]], align 32 251; CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i32> [[V]] to i256 252; CHECK-NEXT: [[TMP1:%.*]] = trunc i256 [[TMP0]] to i128 253; CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[TMP1]] to <2 x i64> 254; CHECK-NEXT: [[R:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1 255; CHECK-NEXT: ret i64 [[R]] 256; 257 entry: 258 store <8 x i32> %v, ptr %loc 259 %gep = getelementptr inbounds [4 x i32], ptr %loc, i64 0 260 %ref = load <2 x i64>, ptr %gep 261 %r = extractelement <2 x i64> %ref, i32 1 262 ret i64 %r 263} 264 265define i16 @load_subvector_different_type2(ptr %loc, <8 x i32> %v) { 266; CHECK-LABEL: define i16 @load_subvector_different_type2 267; CHECK-SAME: (ptr [[LOC:%.*]], <8 x i32> [[V:%.*]]) { 268; CHECK-NEXT: entry: 269; CHECK-NEXT: store <8 x i32> [[V]], ptr [[LOC]], align 32 270; CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i32> [[V]] to i256 271; CHECK-NEXT: [[TMP1:%.*]] = trunc i256 [[TMP0]] to i32 272; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[TMP1]] to <2 x i16> 273; CHECK-NEXT: [[R:%.*]] = extractelement <2 x i16> [[TMP2]], i32 1 274; CHECK-NEXT: ret i16 [[R]] 275; 276 entry: 277 store <8 x i32> %v, ptr %loc 278 %gep = getelementptr [2 x i16], ptr %loc, i64 0 279 %ref = load <2 x i16>, ptr %gep 280 %r = extractelement <2 x i16> %ref, i32 1 281 ret i16 %r 282} 283 284define i4 @load_subvector_different_type3(ptr %loc, <8 x i8> %v) { 285; CHECK-LABEL: define i4 @load_subvector_different_type3 286; CHECK-SAME: (ptr [[LOC:%.*]], <8 x i8> [[V:%.*]]) { 287; CHECK-NEXT: entry: 288; CHECK-NEXT: store <8 x i8> [[V]], ptr [[LOC]], align 8 289; CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[V]] to i64 290; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i16 291; CHECK-NEXT: [[TMP2:%.*]] = trunc i16 [[TMP1]] to i12 292; CHECK-NEXT: [[TMP3:%.*]] = bitcast i12 [[TMP2]] to <3 x i4> 293; CHECK-NEXT: [[R:%.*]] = extractelement <3 x i4> [[TMP3]], i32 1 294; CHECK-NEXT: ret i4 [[R]] 295; 296 entry: 297 store <8 x i8> %v, ptr %loc 298 %gep = getelementptr [3 x i4], ptr %loc, i64 0 299 %ref = load <3 x i4>, ptr %gep 300 %r = extractelement <3 x i4> %ref, i32 1 301 ret i4 %r 302} 303 304define i12 @load_subvector_different_type4(ptr %loc, <8 x i8> %v) { 305; CHECK-LABEL: define i12 @load_subvector_different_type4 306; CHECK-SAME: (ptr [[LOC:%.*]], <8 x i8> [[V:%.*]]) { 307; CHECK-NEXT: entry: 308; CHECK-NEXT: store <8 x i8> [[V]], ptr [[LOC]], align 8 309; CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[V]] to i64 310; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i24 311; CHECK-NEXT: [[TMP2:%.*]] = bitcast i24 [[TMP1]] to <2 x i12> 312; CHECK-NEXT: [[R:%.*]] = extractelement <2 x i12> [[TMP2]], i32 1 313; CHECK-NEXT: ret i12 [[R]] 314; 315 entry: 316 store <8 x i8> %v, ptr %loc 317 %gep = getelementptr [2 x i12], ptr %loc, i64 0 318 %ref = load <2 x i12>, ptr %gep 319 %r = extractelement <2 x i12> %ref, i32 1 320 ret i12 %r 321} 322 323define i6 @load_subvector_different_type5(ptr %loc, <8 x i8> %v) { 324; CHECK-LABEL: define i6 @load_subvector_different_type5 325; CHECK-SAME: (ptr [[LOC:%.*]], <8 x i8> [[V:%.*]]) { 326; CHECK-NEXT: entry: 327; CHECK-NEXT: store <8 x i8> [[V]], ptr [[LOC]], align 8 328; CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[V]] to i64 329; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i16 330; CHECK-NEXT: [[TMP2:%.*]] = trunc i16 [[TMP1]] to i12 331; CHECK-NEXT: [[TMP3:%.*]] = bitcast i12 [[TMP2]] to <2 x i6> 332; CHECK-NEXT: [[R:%.*]] = extractelement <2 x i6> [[TMP3]], i32 1 333; CHECK-NEXT: ret i6 [[R]] 334; 335 entry: 336 store <8 x i8> %v, ptr %loc 337 %gep = getelementptr [2 x i6], ptr %loc, i64 0 338 %ref = load <2 x i6>, ptr %gep 339 %r = extractelement <2 x i6> %ref, i32 1 340 ret i6 %r 341} 342 343declare void @f(<4 x i32>) 344declare void @f_no_mem(<4 x i32>) memory(none) 345