1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -passes=vector-combine -mtriple=arm64-apple-darwinos -S %s | FileCheck --check-prefixes=CHECK,LIMIT-DEFAULT %s 3; RUN: opt -passes=vector-combine -mtriple=arm64-apple-darwinos -vector-combine-max-scan-instrs=2 -S %s | FileCheck --check-prefixes=CHECK,LIMIT2 %s 4 5define i32 @load_extract_idx_0(ptr %x) { 6; CHECK-LABEL: @load_extract_idx_0( 7; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr [[X:%.*]], i32 0, i32 3 8; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[TMP1]], align 4 9; CHECK-NEXT: ret i32 [[R]] 10; 11 %lv = load <4 x i32>, ptr %x 12 %r = extractelement <4 x i32> %lv, i32 3 13 ret i32 %r 14} 15 16define i32 @vscale_load_extract_idx_0(ptr %x) { 17; CHECK-LABEL: @vscale_load_extract_idx_0( 18; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <vscale x 4 x i32>, ptr [[X:%.*]], i32 0, i32 0 19; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[TMP1]], align 16 20; CHECK-NEXT: ret i32 [[R]] 21; 22 %lv = load <vscale x 4 x i32>, ptr %x 23 %r = extractelement <vscale x 4 x i32> %lv, i32 0 24 ret i32 %r 25} 26 27; If the original load had a smaller alignment than the scalar type, the 28; smaller alignment should be used. 29define i32 @load_extract_idx_0_small_alignment(ptr %x) { 30; CHECK-LABEL: @load_extract_idx_0_small_alignment( 31; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr [[X:%.*]], i32 0, i32 3 32; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[TMP1]], align 2 33; CHECK-NEXT: ret i32 [[R]] 34; 35 %lv = load <4 x i32>, ptr %x, align 2 36 %r = extractelement <4 x i32> %lv, i32 3 37 ret i32 %r 38} 39 40define i32 @load_extract_idx_1(ptr %x) { 41; CHECK-LABEL: @load_extract_idx_1( 42; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr [[X:%.*]], i32 0, i32 1 43; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[TMP1]], align 4 44; CHECK-NEXT: ret i32 [[R]] 45; 46 %lv = load <4 x i32>, ptr %x 47 %r = extractelement <4 x i32> %lv, i32 1 48 ret i32 %r 49} 50 51define i32 @load_extract_idx_2(ptr %x) { 52; CHECK-LABEL: @load_extract_idx_2( 53; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr [[X:%.*]], i32 0, i32 2 54; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[TMP1]], align 8 55; CHECK-NEXT: ret i32 [[R]] 56; 57 %lv = load <4 x i32>, ptr %x 58 %r = extractelement <4 x i32> %lv, i32 2 59 ret i32 %r 60} 61 62define i32 @vscale_load_extract_idx_2(ptr %x) { 63; CHECK-LABEL: @vscale_load_extract_idx_2( 64; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <vscale x 4 x i32>, ptr [[X:%.*]], i32 0, i32 2 65; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[TMP1]], align 8 66; CHECK-NEXT: ret i32 [[R]] 67; 68 %lv = load <vscale x 4 x i32>, ptr %x 69 %r = extractelement <vscale x 4 x i32> %lv, i32 2 70 ret i32 %r 71} 72 73define i32 @load_extract_idx_3(ptr %x) { 74; CHECK-LABEL: @load_extract_idx_3( 75; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr [[X:%.*]], i32 0, i32 3 76; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[TMP1]], align 4 77; CHECK-NEXT: ret i32 [[R]] 78; 79 %lv = load <4 x i32>, ptr %x 80 %r = extractelement <4 x i32> %lv, i32 3 81 ret i32 %r 82} 83 84; Out-of-bounds index for extractelement, should not be converted to narrow 85; load, because it would introduce a dereference of a poison pointer. 86define i32 @load_extract_idx_4(ptr %x) { 87; CHECK-LABEL: @load_extract_idx_4( 88; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16 89; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[LV]], i32 4 90; CHECK-NEXT: ret i32 [[R]] 91; 92 %lv = load <4 x i32>, ptr %x 93 %r = extractelement <4 x i32> %lv, i32 4 94 ret i32 %r 95} 96 97define i32 @vscale_load_extract_idx_4(ptr %x) { 98; CHECK-LABEL: @vscale_load_extract_idx_4( 99; CHECK-NEXT: [[LV:%.*]] = load <vscale x 4 x i32>, ptr [[X:%.*]], align 16 100; CHECK-NEXT: [[R:%.*]] = extractelement <vscale x 4 x i32> [[LV]], i32 4 101; CHECK-NEXT: ret i32 [[R]] 102; 103 %lv = load <vscale x 4 x i32>, ptr %x 104 %r = extractelement <vscale x 4 x i32> %lv, i32 4 105 ret i32 %r 106} 107 108define i32 @load_extract_idx_var_i64(ptr %x, i64 %idx) { 109; CHECK-LABEL: @load_extract_idx_var_i64( 110; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16 111; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[LV]], i64 [[IDX:%.*]] 112; CHECK-NEXT: ret i32 [[R]] 113; 114 %lv = load <4 x i32>, ptr %x 115 %r = extractelement <4 x i32> %lv, i64 %idx 116 ret i32 %r 117} 118 119declare void @maythrow() readnone 120 121define i32 @load_extract_idx_var_i64_known_valid_by_assume(ptr %x, i64 %idx) { 122; CHECK-LABEL: @load_extract_idx_var_i64_known_valid_by_assume( 123; CHECK-NEXT: entry: 124; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX:%.*]], 4 125; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) 126; CHECK-NEXT: call void @maythrow() 127; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <4 x i32>, ptr [[X:%.*]], i32 0, i64 [[IDX]] 128; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[TMP0]], align 4 129; CHECK-NEXT: ret i32 [[R]] 130; 131entry: 132 %cmp = icmp ult i64 %idx, 4 133 call void @llvm.assume(i1 %cmp) 134 %lv = load <4 x i32>, ptr %x 135 call void @maythrow() 136 %r = extractelement <4 x i32> %lv, i64 %idx 137 ret i32 %r 138} 139 140define i32 @vscale_load_extract_idx_var_i64_known_valid_by_assume(ptr %x, i64 %idx) { 141; CHECK-LABEL: @vscale_load_extract_idx_var_i64_known_valid_by_assume( 142; CHECK-NEXT: entry: 143; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX:%.*]], 4 144; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) 145; CHECK-NEXT: call void @maythrow() 146; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <vscale x 4 x i32>, ptr [[X:%.*]], i32 0, i64 [[IDX]] 147; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[TMP0]], align 4 148; CHECK-NEXT: ret i32 [[R]] 149; 150entry: 151 %cmp = icmp ult i64 %idx, 4 152 call void @llvm.assume(i1 %cmp) 153 %lv = load <vscale x 4 x i32>, ptr %x 154 call void @maythrow() 155 %r = extractelement <vscale x 4 x i32> %lv, i64 %idx 156 ret i32 %r 157} 158 159declare i1 @cond() 160 161define i32 @load_extract_idx_var_i64_known_valid_by_assume_in_dominating_block(ptr %x, i64 %idx, i1 %c.1) { 162; CHECK-LABEL: @load_extract_idx_var_i64_known_valid_by_assume_in_dominating_block( 163; CHECK-NEXT: entry: 164; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX:%.*]], 4 165; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) 166; CHECK-NEXT: br i1 [[C_1:%.*]], label [[LOOP:%.*]], label [[EXIT:%.*]] 167; CHECK: loop: 168; CHECK-NEXT: call void @maythrow() 169; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <4 x i32>, ptr [[X:%.*]], i32 0, i64 [[IDX]] 170; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[TMP0]], align 4 171; CHECK-NEXT: [[C_2:%.*]] = call i1 @cond() 172; CHECK-NEXT: br i1 [[C_2]], label [[LOOP]], label [[EXIT]] 173; CHECK: exit: 174; CHECK-NEXT: [[P:%.*]] = phi i32 [ [[R]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ] 175; CHECK-NEXT: ret i32 [[P]] 176; 177entry: 178 %cmp = icmp ult i64 %idx, 4 179 call void @llvm.assume(i1 %cmp) 180 br i1 %c.1, label %loop, label %exit 181 182loop: 183 %lv = load <4 x i32>, ptr %x 184 call void @maythrow() 185 %r = extractelement <4 x i32> %lv, i64 %idx 186 %c.2 = call i1 @cond() 187 br i1 %c.2, label %loop, label %exit 188 189exit: 190 %p = phi i32 [ %r, %loop ], [ 0, %entry ] 191 ret i32 %p 192} 193 194define i32 @load_extract_idx_var_i64_known_valid_by_assume_in_non_dominating_block(ptr %x, i64 %idx, i1 %c.1, i1 %c.2) { 195; CHECK-LABEL: @load_extract_idx_var_i64_known_valid_by_assume_in_non_dominating_block( 196; CHECK-NEXT: entry: 197; CHECK-NEXT: br i1 [[C_1:%.*]], label [[ASSUME_CHECK:%.*]], label [[LOOP:%.*]] 198; CHECK: assume_check: 199; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX:%.*]], 4 200; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) 201; CHECK-NEXT: br i1 [[C_2:%.*]], label [[LOOP]], label [[EXIT:%.*]] 202; CHECK: loop: 203; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16 204; CHECK-NEXT: call void @maythrow() 205; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[LV]], i64 [[IDX]] 206; CHECK-NEXT: [[C_3:%.*]] = call i1 @cond() 207; CHECK-NEXT: br i1 [[C_3]], label [[LOOP]], label [[EXIT]] 208; CHECK: exit: 209; CHECK-NEXT: [[P:%.*]] = phi i32 [ [[R]], [[LOOP]] ], [ 0, [[ASSUME_CHECK]] ] 210; CHECK-NEXT: ret i32 0 211; 212entry: 213 br i1 %c.1, label %assume_check, label %loop 214 215assume_check: 216 %cmp = icmp ult i64 %idx, 4 217 call void @llvm.assume(i1 %cmp) 218 br i1 %c.2, label %loop, label %exit 219 220loop: 221 %lv = load <4 x i32>, ptr %x 222 call void @maythrow() 223 %r = extractelement <4 x i32> %lv, i64 %idx 224 %c.3 = call i1 @cond() 225 br i1 %c.3, label %loop, label %exit 226 227exit: 228 %p = phi i32 [ %r, %loop ], [ 0, %assume_check ] 229 ret i32 0 230} 231 232define i32 @load_extract_idx_var_i64_not_known_valid_by_assume_after_load(ptr %x, i64 %idx) { 233; CHECK-LABEL: @load_extract_idx_var_i64_not_known_valid_by_assume_after_load( 234; CHECK-NEXT: entry: 235; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX:%.*]], 4 236; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16 237; CHECK-NEXT: call void @maythrow() 238; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) 239; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[LV]], i64 [[IDX]] 240; CHECK-NEXT: ret i32 [[R]] 241; 242entry: 243 %cmp = icmp ult i64 %idx, 4 244 %lv = load <4 x i32>, ptr %x 245 call void @maythrow() 246 call void @llvm.assume(i1 %cmp) 247 %r = extractelement <4 x i32> %lv, i64 %idx 248 ret i32 %r 249} 250 251define i32 @load_extract_idx_var_i64_not_known_valid_by_assume(ptr %x, i64 %idx) { 252; CHECK-LABEL: @load_extract_idx_var_i64_not_known_valid_by_assume( 253; CHECK-NEXT: entry: 254; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX:%.*]], 5 255; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) 256; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16 257; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[LV]], i64 [[IDX]] 258; CHECK-NEXT: ret i32 [[R]] 259; 260entry: 261 %cmp = icmp ult i64 %idx, 5 262 call void @llvm.assume(i1 %cmp) 263 %lv = load <4 x i32>, ptr %x 264 %r = extractelement <4 x i32> %lv, i64 %idx 265 ret i32 %r 266} 267 268define i32 @vscale_load_extract_idx_var_i64_not_known_valid_by_assume_0(ptr %x, i64 %idx) { 269; CHECK-LABEL: @vscale_load_extract_idx_var_i64_not_known_valid_by_assume_0( 270; CHECK-NEXT: entry: 271; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX:%.*]], 5 272; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) 273; CHECK-NEXT: [[LV:%.*]] = load <vscale x 4 x i32>, ptr [[X:%.*]], align 16 274; CHECK-NEXT: [[R:%.*]] = extractelement <vscale x 4 x i32> [[LV]], i64 [[IDX]] 275; CHECK-NEXT: ret i32 [[R]] 276; 277entry: 278 %cmp = icmp ult i64 %idx, 5 279 call void @llvm.assume(i1 %cmp) 280 %lv = load <vscale x 4 x i32>, ptr %x 281 %r = extractelement <vscale x 4 x i32> %lv, i64 %idx 282 ret i32 %r 283} 284 285define i32 @vscale_load_extract_idx_var_i64_not_known_valid_by_assume_1(ptr %x, i64 %idx) { 286; CHECK-LABEL: @vscale_load_extract_idx_var_i64_not_known_valid_by_assume_1( 287; CHECK-NEXT: entry: 288; CHECK-NEXT: [[VS:%.*]] = call i64 @llvm.vscale.i64() 289; CHECK-NEXT: [[VM:%.*]] = mul i64 [[VS]], 4 290; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX:%.*]], [[VM]] 291; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) 292; CHECK-NEXT: [[LV:%.*]] = load <vscale x 4 x i32>, ptr [[X:%.*]], align 16 293; CHECK-NEXT: [[R:%.*]] = extractelement <vscale x 4 x i32> [[LV]], i64 [[IDX]] 294; CHECK-NEXT: ret i32 [[R]] 295; 296entry: 297 %vs = call i64 @llvm.vscale.i64() 298 %vm = mul i64 %vs, 4 299 %cmp = icmp ult i64 %idx, %vm 300 call void @llvm.assume(i1 %cmp) 301 %lv = load <vscale x 4 x i32>, ptr %x 302 %r = extractelement <vscale x 4 x i32> %lv, i64 %idx 303 ret i32 %r 304} 305 306declare i64 @llvm.vscale.i64() 307declare void @llvm.assume(i1) 308 309define i32 @load_extract_idx_var_i64_known_valid_by_and(ptr %x, i64 %idx) { 310; CHECK-LABEL: @load_extract_idx_var_i64_known_valid_by_and( 311; CHECK-NEXT: entry: 312; CHECK-NEXT: [[IDX_FROZEN:%.*]] = freeze i64 [[IDX:%.*]] 313; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = and i64 [[IDX_FROZEN]], 3 314; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <4 x i32>, ptr [[X:%.*]], i32 0, i64 [[IDX_CLAMPED]] 315; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[TMP0]], align 4 316; CHECK-NEXT: ret i32 [[R]] 317; 318entry: 319 %idx.clamped = and i64 %idx, 3 320 %lv = load <4 x i32>, ptr %x 321 %r = extractelement <4 x i32> %lv, i64 %idx.clamped 322 ret i32 %r 323} 324 325define i32 @vscale_load_extract_idx_var_i64_known_valid_by_and(ptr %x, i64 %idx) { 326; CHECK-LABEL: @vscale_load_extract_idx_var_i64_known_valid_by_and( 327; CHECK-NEXT: entry: 328; CHECK-NEXT: [[IDX_FROZEN:%.*]] = freeze i64 [[IDX:%.*]] 329; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = and i64 [[IDX_FROZEN]], 3 330; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <vscale x 4 x i32>, ptr [[X:%.*]], i32 0, i64 [[IDX_CLAMPED]] 331; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[TMP0]], align 4 332; CHECK-NEXT: ret i32 [[R]] 333; 334entry: 335 %idx.clamped = and i64 %idx, 3 336 %lv = load <vscale x 4 x i32>, ptr %x 337 %r = extractelement <vscale x 4 x i32> %lv, i64 %idx.clamped 338 ret i32 %r 339} 340 341define i32 @load_extract_idx_var_i64_known_valid_by_and_noundef(ptr %x, i64 noundef %idx) { 342; CHECK-LABEL: @load_extract_idx_var_i64_known_valid_by_and_noundef( 343; CHECK-NEXT: entry: 344; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = and i64 [[IDX:%.*]], 3 345; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <4 x i32>, ptr [[X:%.*]], i32 0, i64 [[IDX_CLAMPED]] 346; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[TMP0]], align 4 347; CHECK-NEXT: ret i32 [[R]] 348; 349entry: 350 %idx.clamped = and i64 %idx, 3 351 %lv = load <4 x i32>, ptr %x 352 %r = extractelement <4 x i32> %lv, i64 %idx.clamped 353 ret i32 %r 354} 355 356define i32 @load_extract_idx_var_i64_not_known_valid_by_and(ptr %x, i64 %idx) { 357; CHECK-LABEL: @load_extract_idx_var_i64_not_known_valid_by_and( 358; CHECK-NEXT: entry: 359; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = and i64 [[IDX:%.*]], 4 360; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16 361; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[LV]], i64 [[IDX_CLAMPED]] 362; CHECK-NEXT: ret i32 [[R]] 363; 364entry: 365 %idx.clamped = and i64 %idx, 4 366 %lv = load <4 x i32>, ptr %x 367 %r = extractelement <4 x i32> %lv, i64 %idx.clamped 368 ret i32 %r 369} 370 371define i32 @vscale_load_extract_idx_var_i64_not_known_valid_by_and(ptr %x, i64 %idx) { 372; CHECK-LABEL: @vscale_load_extract_idx_var_i64_not_known_valid_by_and( 373; CHECK-NEXT: entry: 374; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = and i64 [[IDX:%.*]], 4 375; CHECK-NEXT: [[LV:%.*]] = load <vscale x 4 x i32>, ptr [[X:%.*]], align 16 376; CHECK-NEXT: [[R:%.*]] = extractelement <vscale x 4 x i32> [[LV]], i64 [[IDX_CLAMPED]] 377; CHECK-NEXT: ret i32 [[R]] 378; 379entry: 380 %idx.clamped = and i64 %idx, 4 381 %lv = load <vscale x 4 x i32>, ptr %x 382 %r = extractelement <vscale x 4 x i32> %lv, i64 %idx.clamped 383 ret i32 %r 384} 385 386define i32 @load_extract_idx_var_i64_known_valid_by_urem(ptr %x, i64 %idx) { 387; CHECK-LABEL: @load_extract_idx_var_i64_known_valid_by_urem( 388; CHECK-NEXT: entry: 389; CHECK-NEXT: [[IDX_FROZEN:%.*]] = freeze i64 [[IDX:%.*]] 390; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = urem i64 [[IDX_FROZEN]], 4 391; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <4 x i32>, ptr [[X:%.*]], i32 0, i64 [[IDX_CLAMPED]] 392; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[TMP0]], align 4 393; CHECK-NEXT: ret i32 [[R]] 394; 395entry: 396 %idx.clamped = urem i64 %idx, 4 397 %lv = load <4 x i32>, ptr %x 398 %r = extractelement <4 x i32> %lv, i64 %idx.clamped 399 ret i32 %r 400} 401 402define i32 @vscale_load_extract_idx_var_i64_known_valid_by_urem(ptr %x, i64 %idx) { 403; CHECK-LABEL: @vscale_load_extract_idx_var_i64_known_valid_by_urem( 404; CHECK-NEXT: entry: 405; CHECK-NEXT: [[IDX_FROZEN:%.*]] = freeze i64 [[IDX:%.*]] 406; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = urem i64 [[IDX_FROZEN]], 4 407; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <vscale x 4 x i32>, ptr [[X:%.*]], i32 0, i64 [[IDX_CLAMPED]] 408; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[TMP0]], align 4 409; CHECK-NEXT: ret i32 [[R]] 410; 411entry: 412 %idx.clamped = urem i64 %idx, 4 413 %lv = load <vscale x 4 x i32>, ptr %x 414 %r = extractelement <vscale x 4 x i32> %lv, i64 %idx.clamped 415 ret i32 %r 416} 417 418define i32 @load_extract_idx_var_i64_known_valid_by_urem_noundef(ptr %x, i64 noundef %idx) { 419; CHECK-LABEL: @load_extract_idx_var_i64_known_valid_by_urem_noundef( 420; CHECK-NEXT: entry: 421; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = urem i64 [[IDX:%.*]], 4 422; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds <4 x i32>, ptr [[X:%.*]], i32 0, i64 [[IDX_CLAMPED]] 423; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[TMP0]], align 4 424; CHECK-NEXT: ret i32 [[R]] 425; 426entry: 427 %idx.clamped = urem i64 %idx, 4 428 %lv = load <4 x i32>, ptr %x 429 %r = extractelement <4 x i32> %lv, i64 %idx.clamped 430 ret i32 %r 431} 432 433define i32 @load_extract_idx_var_i64_not_known_valid_by_urem(ptr %x, i64 %idx) { 434; CHECK-LABEL: @load_extract_idx_var_i64_not_known_valid_by_urem( 435; CHECK-NEXT: entry: 436; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = urem i64 [[IDX:%.*]], 5 437; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16 438; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[LV]], i64 [[IDX_CLAMPED]] 439; CHECK-NEXT: ret i32 [[R]] 440; 441entry: 442 %idx.clamped = urem i64 %idx, 5 443 %lv = load <4 x i32>, ptr %x 444 %r = extractelement <4 x i32> %lv, i64 %idx.clamped 445 ret i32 %r 446} 447 448define i32 @vscale_load_extract_idx_var_i64_not_known_valid_by_urem(ptr %x, i64 %idx) { 449; CHECK-LABEL: @vscale_load_extract_idx_var_i64_not_known_valid_by_urem( 450; CHECK-NEXT: entry: 451; CHECK-NEXT: [[IDX_CLAMPED:%.*]] = urem i64 [[IDX:%.*]], 5 452; CHECK-NEXT: [[LV:%.*]] = load <vscale x 4 x i32>, ptr [[X:%.*]], align 16 453; CHECK-NEXT: [[R:%.*]] = extractelement <vscale x 4 x i32> [[LV]], i64 [[IDX_CLAMPED]] 454; CHECK-NEXT: ret i32 [[R]] 455; 456entry: 457 %idx.clamped = urem i64 %idx, 5 458 %lv = load <vscale x 4 x i32>, ptr %x 459 %r = extractelement <vscale x 4 x i32> %lv, i64 %idx.clamped 460 ret i32 %r 461} 462 463define i32 @load_extract_idx_var_i32(ptr %x, i32 %idx) { 464; CHECK-LABEL: @load_extract_idx_var_i32( 465; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16 466; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[LV]], i32 [[IDX:%.*]] 467; CHECK-NEXT: ret i32 [[R]] 468; 469 %lv = load <4 x i32>, ptr %x 470 %r = extractelement <4 x i32> %lv, i32 %idx 471 ret i32 %r 472} 473 474declare void @clobber() 475 476define i32 @load_extract_clobber_call_before(ptr %x) { 477; CHECK-LABEL: @load_extract_clobber_call_before( 478; CHECK-NEXT: call void @clobber() 479; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr [[X:%.*]], i32 0, i32 2 480; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[TMP1]], align 8 481; CHECK-NEXT: ret i32 [[R]] 482; 483 call void @clobber() 484 %lv = load <4 x i32>, ptr %x 485 %r = extractelement <4 x i32> %lv, i32 2 486 ret i32 %r 487} 488 489define i32 @load_extract_clobber_call_between(ptr %x) { 490; CHECK-LABEL: @load_extract_clobber_call_between( 491; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16 492; CHECK-NEXT: call void @clobber() 493; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[LV]], i32 2 494; CHECK-NEXT: ret i32 [[R]] 495; 496 %lv = load <4 x i32>, ptr %x 497 call void @clobber() 498 %r = extractelement <4 x i32> %lv, i32 2 499 ret i32 %r 500} 501 502define i32 @load_extract_clobber_call_after(ptr %x) { 503; CHECK-LABEL: @load_extract_clobber_call_after( 504; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr [[X:%.*]], i32 0, i32 2 505; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[TMP1]], align 8 506; CHECK-NEXT: call void @clobber() 507; CHECK-NEXT: ret i32 [[R]] 508; 509 %lv = load <4 x i32>, ptr %x 510 %r = extractelement <4 x i32> %lv, i32 2 511 call void @clobber() 512 ret i32 %r 513} 514 515define i32 @load_extract_clobber_store_before(ptr %x, ptr %y) { 516; CHECK-LABEL: @load_extract_clobber_store_before( 517; CHECK-NEXT: store i8 0, ptr [[Y:%.*]], align 1 518; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr [[X:%.*]], i32 0, i32 2 519; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[TMP1]], align 8 520; CHECK-NEXT: ret i32 [[R]] 521; 522 store i8 0, ptr %y 523 %lv = load <4 x i32>, ptr %x 524 %r = extractelement <4 x i32> %lv, i32 2 525 ret i32 %r 526} 527 528define i32 @load_extract_clobber_store_between(ptr %x, ptr %y) { 529; CHECK-LABEL: @load_extract_clobber_store_between( 530; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16 531; CHECK-NEXT: store i8 0, ptr [[Y:%.*]], align 1 532; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[LV]], i32 2 533; CHECK-NEXT: ret i32 [[R]] 534; 535 %lv = load <4 x i32>, ptr %x 536 store i8 0, ptr %y 537 %r = extractelement <4 x i32> %lv, i32 2 538 ret i32 %r 539} 540 541define i32 @load_extract_clobber_store_between_limit(ptr %x, ptr %y, <8 x i32> %z) { 542; CHECK-LABEL: @load_extract_clobber_store_between_limit( 543; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16 544; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <8 x i32> [[Z1:%.*]], <8 x i32> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 545; CHECK-NEXT: [[TMP1:%.*]] = add <8 x i32> [[Z1]], [[SHIFT]] 546; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <8 x i32> [[Z1]], <8 x i32> poison, <8 x i32> <i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 547; CHECK-NEXT: [[TMP2:%.*]] = add <8 x i32> [[TMP1]], [[SHIFT1]] 548; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <8 x i32> [[Z1]], <8 x i32> poison, <8 x i32> <i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 549; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i32> [[TMP2]], [[SHIFT2]] 550; CHECK-NEXT: [[SHIFT3:%.*]] = shufflevector <8 x i32> [[Z1]], <8 x i32> poison, <8 x i32> <i32 4, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 551; CHECK-NEXT: [[Z:%.*]] = add <8 x i32> [[TMP3]], [[SHIFT3]] 552; CHECK-NEXT: [[Z_0:%.*]] = extractelement <8 x i32> [[Z]], i32 0 553; CHECK-NEXT: store i8 0, ptr [[Y:%.*]], align 1 554; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[LV]], i32 2 555; CHECK-NEXT: [[ADD_4:%.*]] = add i32 [[Z_0]], [[R]] 556; CHECK-NEXT: ret i32 [[ADD_4]] 557; 558 %lv = load <4 x i32>, ptr %x 559 %z.0 = extractelement <8 x i32> %z, i32 0 560 %z.1 = extractelement <8 x i32> %z, i32 1 561 %add.0 = add i32 %z.0, %z.1 562 %z.2 = extractelement <8 x i32> %z, i32 2 563 %add.1 = add i32 %add.0, %z.2 564 %z.3 = extractelement <8 x i32> %z, i32 3 565 %add.2 = add i32 %add.1, %z.3 566 %z.4 = extractelement <8 x i32> %z, i32 4 567 %add.3 = add i32 %add.2, %z.4 568 store i8 0, ptr %y 569 %r = extractelement <4 x i32> %lv, i32 2 570 %add.4 = add i32 %add.3, %r 571 ret i32 %add.4 572} 573 574define i32 @load_extract_clobber_store_after_limit(ptr %x, ptr %y, <8 x i32> %z) { 575; LIMIT-DEFAULT-LABEL: @load_extract_clobber_store_after_limit( 576; LIMIT-DEFAULT-NEXT: [[SHIFT:%.*]] = shufflevector <8 x i32> [[Z1:%.*]], <8 x i32> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 577; LIMIT-DEFAULT-NEXT: [[TMP4:%.*]] = add <8 x i32> [[Z1]], [[SHIFT]] 578; LIMIT-DEFAULT-NEXT: [[SHIFT1:%.*]] = shufflevector <8 x i32> [[Z1]], <8 x i32> poison, <8 x i32> <i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 579; LIMIT-DEFAULT-NEXT: [[TMP2:%.*]] = add <8 x i32> [[TMP4]], [[SHIFT1]] 580; LIMIT-DEFAULT-NEXT: [[SHIFT2:%.*]] = shufflevector <8 x i32> [[Z1]], <8 x i32> poison, <8 x i32> <i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 581; LIMIT-DEFAULT-NEXT: [[TMP3:%.*]] = add <8 x i32> [[TMP2]], [[SHIFT2]] 582; LIMIT-DEFAULT-NEXT: [[SHIFT3:%.*]] = shufflevector <8 x i32> [[Z1]], <8 x i32> poison, <8 x i32> <i32 4, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 583; LIMIT-DEFAULT-NEXT: [[Z:%.*]] = add <8 x i32> [[TMP3]], [[SHIFT3]] 584; LIMIT-DEFAULT-NEXT: [[Z_0:%.*]] = extractelement <8 x i32> [[Z]], i32 0 585; LIMIT-DEFAULT-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr [[X:%.*]], i32 0, i32 2 586; LIMIT-DEFAULT-NEXT: [[R:%.*]] = load i32, ptr [[TMP1]], align 8 587; LIMIT-DEFAULT-NEXT: store i8 0, ptr [[Y:%.*]], align 1 588; LIMIT-DEFAULT-NEXT: [[ADD_4:%.*]] = add i32 [[Z_0]], [[R]] 589; LIMIT-DEFAULT-NEXT: ret i32 [[ADD_4]] 590; 591; LIMIT2-LABEL: @load_extract_clobber_store_after_limit( 592; LIMIT2-NEXT: [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16 593; LIMIT2-NEXT: [[SHIFT:%.*]] = shufflevector <8 x i32> [[Z1:%.*]], <8 x i32> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 594; LIMIT2-NEXT: [[TMP1:%.*]] = add <8 x i32> [[Z1]], [[SHIFT]] 595; LIMIT2-NEXT: [[SHIFT1:%.*]] = shufflevector <8 x i32> [[Z1]], <8 x i32> poison, <8 x i32> <i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 596; LIMIT2-NEXT: [[TMP2:%.*]] = add <8 x i32> [[TMP1]], [[SHIFT1]] 597; LIMIT2-NEXT: [[SHIFT2:%.*]] = shufflevector <8 x i32> [[Z1]], <8 x i32> poison, <8 x i32> <i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 598; LIMIT2-NEXT: [[TMP3:%.*]] = add <8 x i32> [[TMP2]], [[SHIFT2]] 599; LIMIT2-NEXT: [[SHIFT3:%.*]] = shufflevector <8 x i32> [[Z1]], <8 x i32> poison, <8 x i32> <i32 4, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 600; LIMIT2-NEXT: [[Z:%.*]] = add <8 x i32> [[TMP3]], [[SHIFT3]] 601; LIMIT2-NEXT: [[Z_0:%.*]] = extractelement <8 x i32> [[Z]], i32 0 602; LIMIT2-NEXT: [[R:%.*]] = extractelement <4 x i32> [[LV]], i32 2 603; LIMIT2-NEXT: store i8 0, ptr [[Y:%.*]], align 1 604; LIMIT2-NEXT: [[ADD_4:%.*]] = add i32 [[Z_0]], [[R]] 605; LIMIT2-NEXT: ret i32 [[ADD_4]] 606; 607 %lv = load <4 x i32>, ptr %x 608 %z.0 = extractelement <8 x i32> %z, i32 0 609 %z.1 = extractelement <8 x i32> %z, i32 1 610 %add.0 = add i32 %z.0, %z.1 611 %z.2 = extractelement <8 x i32> %z, i32 2 612 %add.1 = add i32 %add.0, %z.2 613 %z.3 = extractelement <8 x i32> %z, i32 3 614 %add.2 = add i32 %add.1, %z.3 615 %z.4 = extractelement <8 x i32> %z, i32 4 616 %add.3 = add i32 %add.2, %z.4 617 %r = extractelement <4 x i32> %lv, i32 2 618 store i8 0, ptr %y 619 %add.4 = add i32 %add.3, %r 620 ret i32 %add.4 621} 622 623declare void @use.v4i32(<4 x i32>) 624 625define i32 @load_extract_idx_different_bbs(ptr %x, i1 %c) { 626; CHECK-LABEL: @load_extract_idx_different_bbs( 627; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16 628; CHECK-NEXT: br i1 [[C:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]] 629; CHECK: then: 630; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[LV]], i32 1 631; CHECK-NEXT: ret i32 [[R]] 632; CHECK: else: 633; CHECK-NEXT: call void @use.v4i32(<4 x i32> [[LV]]) 634; CHECK-NEXT: ret i32 20 635; 636 %lv = load <4 x i32>, ptr %x 637 br i1 %c, label %then, label %else 638 639then: 640 %r = extractelement <4 x i32> %lv, i32 1 641 ret i32 %r 642 643else: 644 call void @use.v4i32(<4 x i32> %lv) 645 ret i32 20 646} 647 648define i31 @load_with_non_power_of_2_element_type(ptr %x) { 649; CHECK-LABEL: @load_with_non_power_of_2_element_type( 650; CHECK-NEXT: [[LV:%.*]] = load <4 x i31>, ptr [[X:%.*]], align 16 651; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i31> [[LV]], i32 1 652; CHECK-NEXT: ret i31 [[R]] 653; 654 %lv = load <4 x i31>, ptr %x 655 %r = extractelement <4 x i31> %lv, i32 1 656 ret i31 %r 657} 658 659define i1 @load_with_non_power_of_2_element_type_2(ptr %x) { 660; CHECK-LABEL: @load_with_non_power_of_2_element_type_2( 661; CHECK-NEXT: [[LV:%.*]] = load <8 x i1>, ptr [[X:%.*]], align 1 662; CHECK-NEXT: [[R:%.*]] = extractelement <8 x i1> [[LV]], i32 1 663; CHECK-NEXT: ret i1 [[R]] 664; 665 %lv = load <8 x i1>, ptr %x 666 %r = extractelement <8 x i1> %lv, i32 1 667 ret i1 %r 668} 669 670; Scalarizing the load for multiple constant indices may not be profitable. 671define i32 @load_multiple_extracts_with_constant_idx(ptr %x) { 672; CHECK-LABEL: @load_multiple_extracts_with_constant_idx( 673; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16 674; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[LV]], <4 x i32> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison> 675; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[LV]], [[SHIFT]] 676; CHECK-NEXT: [[RES:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0 677; CHECK-NEXT: ret i32 [[RES]] 678; 679 %lv = load <4 x i32>, ptr %x 680 %e.0 = extractelement <4 x i32> %lv, i32 0 681 %e.1 = extractelement <4 x i32> %lv, i32 1 682 %res = add i32 %e.0, %e.1 683 ret i32 %res 684} 685 686; Scalarizing the load for multiple extracts is profitable in this case, 687; because the vector large vector requires 2 vector registers. 688define i32 @load_multiple_extracts_with_constant_idx_profitable(ptr %x) { 689; CHECK-LABEL: @load_multiple_extracts_with_constant_idx_profitable( 690; CHECK-NEXT: [[LV:%.*]] = load <8 x i32>, ptr [[X:%.*]], align 16 691; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <8 x i32> [[LV]], <8 x i32> poison, <8 x i32> <i32 6, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 692; CHECK-NEXT: [[TMP1:%.*]] = add <8 x i32> [[LV]], [[SHIFT]] 693; CHECK-NEXT: [[RES:%.*]] = extractelement <8 x i32> [[TMP1]], i32 0 694; CHECK-NEXT: ret i32 [[RES]] 695; 696 %lv = load <8 x i32>, ptr %x, align 16 697 %e.0 = extractelement <8 x i32> %lv, i32 0 698 %e.1 = extractelement <8 x i32> %lv, i32 6 699 %res = add i32 %e.0, %e.1 700 ret i32 %res 701} 702 703; Scalarizing may or may not be profitable, depending on the target. 704define i32 @load_multiple_2_with_variable_indices(ptr %x, i64 %idx.0, i64 %idx.1) { 705; CHECK-LABEL: @load_multiple_2_with_variable_indices( 706; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16 707; CHECK-NEXT: [[E_0:%.*]] = extractelement <4 x i32> [[LV]], i64 [[IDX_0:%.*]] 708; CHECK-NEXT: [[E_1:%.*]] = extractelement <4 x i32> [[LV]], i64 [[IDX_1:%.*]] 709; CHECK-NEXT: [[RES:%.*]] = add i32 [[E_0]], [[E_1]] 710; CHECK-NEXT: ret i32 [[RES]] 711; 712 %lv = load <4 x i32>, ptr %x 713 %e.0 = extractelement <4 x i32> %lv, i64 %idx.0 714 %e.1 = extractelement <4 x i32> %lv, i64 %idx.1 715 %res = add i32 %e.0, %e.1 716 ret i32 %res 717} 718 719define i32 @load_4_extracts_with_variable_indices_short_vector(ptr %x, i64 %idx.0, i64 %idx.1, i64 %idx.2, i64 %idx.3) { 720; CHECK-LABEL: @load_4_extracts_with_variable_indices_short_vector( 721; CHECK-NEXT: [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16 722; CHECK-NEXT: [[E_0:%.*]] = extractelement <4 x i32> [[LV]], i64 [[IDX_0:%.*]] 723; CHECK-NEXT: [[E_1:%.*]] = extractelement <4 x i32> [[LV]], i64 [[IDX_1:%.*]] 724; CHECK-NEXT: [[E_2:%.*]] = extractelement <4 x i32> [[LV]], i64 [[IDX_2:%.*]] 725; CHECK-NEXT: [[E_3:%.*]] = extractelement <4 x i32> [[LV]], i64 [[IDX_3:%.*]] 726; CHECK-NEXT: [[RES_0:%.*]] = add i32 [[E_0]], [[E_1]] 727; CHECK-NEXT: [[RES_1:%.*]] = add i32 [[RES_0]], [[E_2]] 728; CHECK-NEXT: [[RES_2:%.*]] = add i32 [[RES_1]], [[E_3]] 729; CHECK-NEXT: ret i32 [[RES_2]] 730; 731 %lv = load <4 x i32>, ptr %x 732 %e.0 = extractelement <4 x i32> %lv, i64 %idx.0 733 %e.1 = extractelement <4 x i32> %lv, i64 %idx.1 734 %e.2 = extractelement <4 x i32> %lv, i64 %idx.2 735 %e.3 = extractelement <4 x i32> %lv, i64 %idx.3 736 %res.0 = add i32 %e.0, %e.1 737 %res.1 = add i32 %res.0, %e.2 738 %res.2 = add i32 %res.1, %e.3 739 ret i32 %res.2 740} 741 742define i32 @load_multiple_extracts_with_variable_indices_large_vector_only_first_valid(ptr %x, i64 %idx.0, i64 %idx.1) { 743; CHECK-LABEL: @load_multiple_extracts_with_variable_indices_large_vector_only_first_valid( 744; CHECK-NEXT: [[CMP_IDX_0:%.*]] = icmp ult i64 [[IDX_0:%.*]], 16 745; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_IDX_0]]) 746; CHECK-NEXT: [[LV:%.*]] = load <16 x i32>, ptr [[X:%.*]], align 64 747; CHECK-NEXT: [[E_0:%.*]] = extractelement <16 x i32> [[LV]], i64 [[IDX_0]] 748; CHECK-NEXT: [[E_1:%.*]] = extractelement <16 x i32> [[LV]], i64 [[IDX_1:%.*]] 749; CHECK-NEXT: [[RES:%.*]] = add i32 [[E_0]], [[E_1]] 750; CHECK-NEXT: ret i32 [[RES]] 751; 752 %cmp.idx.0 = icmp ult i64 %idx.0, 16 753 call void @llvm.assume(i1 %cmp.idx.0) 754 755 %lv = load <16 x i32>, ptr %x 756 %e.0 = extractelement <16 x i32> %lv, i64 %idx.0 757 %e.1 = extractelement <16 x i32> %lv, i64 %idx.1 758 %res = add i32 %e.0, %e.1 759 ret i32 %res 760} 761 762define i32 @load_multiple_extracts_with_variable_indices_large_vector_only_all_valid(ptr %x, i64 %idx.0, i64 %idx.1) { 763; CHECK-LABEL: @load_multiple_extracts_with_variable_indices_large_vector_only_all_valid( 764; CHECK-NEXT: [[CMP_IDX_0:%.*]] = icmp ult i64 [[IDX_0:%.*]], 16 765; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_IDX_0]]) 766; CHECK-NEXT: [[CMP_IDX_1:%.*]] = icmp ult i64 [[IDX_1:%.*]], 16 767; CHECK-NEXT: call void @llvm.assume(i1 [[CMP_IDX_1]]) 768; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <16 x i32>, ptr [[X:%.*]], i32 0, i64 [[IDX_0]] 769; CHECK-NEXT: [[E_0:%.*]] = load i32, ptr [[TMP1]], align 4 770; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <16 x i32>, ptr [[X]], i32 0, i64 [[IDX_1]] 771; CHECK-NEXT: [[E_1:%.*]] = load i32, ptr [[TMP2]], align 4 772; CHECK-NEXT: [[RES:%.*]] = add i32 [[E_0]], [[E_1]] 773; CHECK-NEXT: ret i32 [[RES]] 774; 775 %cmp.idx.0 = icmp ult i64 %idx.0, 16 776 call void @llvm.assume(i1 %cmp.idx.0) 777 %cmp.idx.1 = icmp ult i64 %idx.1, 16 778 call void @llvm.assume(i1 %cmp.idx.1) 779 780 %lv = load <16 x i32>, ptr %x 781 %e.0 = extractelement <16 x i32> %lv, i64 %idx.0 782 %e.1 = extractelement <16 x i32> %lv, i64 %idx.1 783 %res = add i32 %e.0, %e.1 784 ret i32 %res 785} 786 787define i32 @load_multiple_extracts_with_variable_indices_large_vector_only_first_valid_by_and(ptr %x, i64 %idx.0, i64 %idx.1) { 788; CHECK-LABEL: @load_multiple_extracts_with_variable_indices_large_vector_only_first_valid_by_and( 789; CHECK-NEXT: [[IDX_0_CLAMPED:%.*]] = and i64 [[IDX_0:%.*]], 15 790; CHECK-NEXT: [[LV:%.*]] = load <16 x i32>, ptr [[X:%.*]], align 64 791; CHECK-NEXT: [[E_0:%.*]] = extractelement <16 x i32> [[LV]], i64 [[IDX_0_CLAMPED]] 792; CHECK-NEXT: [[E_1:%.*]] = extractelement <16 x i32> [[LV]], i64 [[IDX_1:%.*]] 793; CHECK-NEXT: [[RES:%.*]] = add i32 [[E_0]], [[E_1]] 794; CHECK-NEXT: ret i32 [[RES]] 795; 796 %idx.0.clamped = and i64 %idx.0, 15 797 798 %lv = load <16 x i32>, ptr %x 799 %e.0 = extractelement <16 x i32> %lv, i64 %idx.0.clamped 800 %e.1 = extractelement <16 x i32> %lv, i64 %idx.1 801 %res = add i32 %e.0, %e.1 802 ret i32 %res 803} 804 805define i32 @load_multiple_extracts_with_variable_indices_large_vector_all_valid_by_and(ptr %x, i64 %idx.0, i64 %idx.1) { 806; CHECK-LABEL: @load_multiple_extracts_with_variable_indices_large_vector_all_valid_by_and( 807; CHECK-NEXT: [[IDX_0_FROZEN:%.*]] = freeze i64 [[IDX_0:%.*]] 808; CHECK-NEXT: [[IDX_0_CLAMPED:%.*]] = and i64 [[IDX_0_FROZEN]], 15 809; CHECK-NEXT: [[IDX_1_FROZEN:%.*]] = freeze i64 [[IDX_1:%.*]] 810; CHECK-NEXT: [[IDX_1_CLAMPED:%.*]] = and i64 [[IDX_1_FROZEN]], 15 811; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <16 x i32>, ptr [[X:%.*]], i32 0, i64 [[IDX_0_CLAMPED]] 812; CHECK-NEXT: [[E_0:%.*]] = load i32, ptr [[TMP1]], align 4 813; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <16 x i32>, ptr [[X]], i32 0, i64 [[IDX_1_CLAMPED]] 814; CHECK-NEXT: [[E_1:%.*]] = load i32, ptr [[TMP2]], align 4 815; CHECK-NEXT: [[RES:%.*]] = add i32 [[E_0]], [[E_1]] 816; CHECK-NEXT: ret i32 [[RES]] 817; 818 %idx.0.clamped = and i64 %idx.0, 15 819 %idx.1.clamped = and i64 %idx.1, 15 820 821 %lv = load <16 x i32>, ptr %x 822 %e.0 = extractelement <16 x i32> %lv, i64 %idx.0.clamped 823 %e.1 = extractelement <16 x i32> %lv, i64 %idx.1.clamped 824 %res = add i32 %e.0, %e.1 825 ret i32 %res 826} 827 828define i32 @load_multiple_extracts_with_unique_variable_indices_large_vector_valid_by_and(ptr %x, ptr %y, i64 %idx) { 829; LIMIT-DEFAULT-LABEL: @load_multiple_extracts_with_unique_variable_indices_large_vector_valid_by_and( 830; LIMIT-DEFAULT-NEXT: [[IDX_FROZEN:%.*]] = freeze i64 [[IDX:%.*]] 831; LIMIT-DEFAULT-NEXT: [[IDX_CLAMPED:%.*]] = and i64 [[IDX_FROZEN]], 15 832; LIMIT-DEFAULT-NEXT: [[TMP1:%.*]] = getelementptr inbounds <16 x i32>, ptr [[X:%.*]], i32 0, i64 [[IDX_CLAMPED]] 833; LIMIT-DEFAULT-NEXT: [[E_0:%.*]] = load i32, ptr [[TMP1]], align 4 834; LIMIT-DEFAULT-NEXT: [[TMP2:%.*]] = getelementptr inbounds <16 x i32>, ptr [[Y:%.*]], i32 0, i64 [[IDX_CLAMPED]] 835; LIMIT-DEFAULT-NEXT: [[E_1:%.*]] = load i32, ptr [[TMP2]], align 4 836; LIMIT-DEFAULT-NEXT: [[RES:%.*]] = add i32 [[E_0]], [[E_1]] 837; LIMIT-DEFAULT-NEXT: ret i32 [[RES]] 838; 839; LIMIT2-LABEL: @load_multiple_extracts_with_unique_variable_indices_large_vector_valid_by_and( 840; LIMIT2-NEXT: [[IDX_FROZEN:%.*]] = freeze i64 [[IDX:%.*]] 841; LIMIT2-NEXT: [[IDX_CLAMPED:%.*]] = and i64 [[IDX_FROZEN]], 15 842; LIMIT2-NEXT: [[LY:%.*]] = load <16 x i32>, ptr [[Y:%.*]], align 64 843; LIMIT2-NEXT: [[TMP1:%.*]] = getelementptr inbounds <16 x i32>, ptr [[X:%.*]], i32 0, i64 [[IDX_CLAMPED]] 844; LIMIT2-NEXT: [[E_0:%.*]] = load i32, ptr [[TMP1]], align 4 845; LIMIT2-NEXT: [[E_1:%.*]] = extractelement <16 x i32> [[LY]], i64 [[IDX_CLAMPED]] 846; LIMIT2-NEXT: [[RES:%.*]] = add i32 [[E_0]], [[E_1]] 847; LIMIT2-NEXT: ret i32 [[RES]] 848; 849 %idx.clamped = and i64 %idx, 15 850 851 %lx = load <16 x i32>, ptr %x 852 %ly = load <16 x i32>, ptr %y 853 %e.0 = extractelement <16 x i32> %lx, i64 %idx.clamped 854 %e.1 = extractelement <16 x i32> %ly, i64 %idx.clamped 855 %res = add i32 %e.0, %e.1 856 ret i32 %res 857} 858 859define i32 @load_multiple_extracts_with_variable_indices_large_vector_all_valid_by_and_some_noundef(ptr %x, i64 %idx.0, i64 noundef %idx.1) { 860; CHECK-LABEL: @load_multiple_extracts_with_variable_indices_large_vector_all_valid_by_and_some_noundef( 861; CHECK-NEXT: [[IDX_0_FROZEN:%.*]] = freeze i64 [[IDX_0:%.*]] 862; CHECK-NEXT: [[IDX_0_CLAMPED:%.*]] = and i64 [[IDX_0_FROZEN]], 15 863; CHECK-NEXT: [[IDX_1_CLAMPED:%.*]] = and i64 [[IDX_1:%.*]], 15 864; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <16 x i32>, ptr [[X:%.*]], i32 0, i64 [[IDX_0_CLAMPED]] 865; CHECK-NEXT: [[E_0:%.*]] = load i32, ptr [[TMP1]], align 4 866; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <16 x i32>, ptr [[X]], i32 0, i64 [[IDX_1_CLAMPED]] 867; CHECK-NEXT: [[E_1:%.*]] = load i32, ptr [[TMP2]], align 4 868; CHECK-NEXT: [[RES:%.*]] = add i32 [[E_0]], [[E_1]] 869; CHECK-NEXT: ret i32 [[RES]] 870; 871 %idx.0.clamped = and i64 %idx.0, 15 872 %idx.1.clamped = and i64 %idx.1, 15 873 874 %lv = load <16 x i32>, ptr %x 875 %e.0 = extractelement <16 x i32> %lv, i64 %idx.0.clamped 876 %e.1 = extractelement <16 x i32> %lv, i64 %idx.1.clamped 877 %res = add i32 %e.0, %e.1 878 ret i32 %res 879} 880 881; Test case from PR51992. 882define i8 @load_extract_safe_due_to_branch_on_poison(<8 x i8> %in, ptr %src) { 883; CHECK-LABEL: @load_extract_safe_due_to_branch_on_poison( 884; CHECK-NEXT: entry: 885; CHECK-NEXT: [[EXT_IDX:%.*]] = extractelement <8 x i8> [[IN:%.*]], i32 0 886; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[EXT_IDX]], 99 887; CHECK-NEXT: br i1 [[CMP]], label [[THEN:%.*]], label [[EXIT:%.*]] 888; CHECK: then: 889; CHECK-NEXT: br label [[EXIT]] 890; CHECK: exit: 891; CHECK-NEXT: ret i8 0 892; 893entry: 894 %ext.idx = extractelement <8 x i8> %in, i32 0 895 %ext.idx.i32 = zext i8 %ext.idx to i32 896 %cmp = icmp ult i8 %ext.idx, 99 897 br i1 %cmp, label %then, label %exit 898 899then: 900 %load = load <16 x i8>, ptr %src, align 16 901 %and = and i32 %ext.idx.i32, 15 902 %ext = extractelement <16 x i8> %load, i32 %and 903 br label %exit 904 905exit: 906 %p = phi i8 [ 0, %entry ], [ %ext, %then ] 907 ret i8 0 908} 909 910declare void @use(...) 911 912; Make sure we don't assert. 913define void @pr69820(ptr %p, i32 %arg) { 914; CHECK-LABEL: @pr69820( 915; CHECK-NEXT: [[V:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16 916; CHECK-NEXT: [[AND:%.*]] = and i32 [[ARG:%.*]], 3 917; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x float> [[V]], i32 [[AND]] 918; CHECK-NEXT: call void @use(<4 x float> [[V]], float [[EXT]]) 919; CHECK-NEXT: ret void 920; 921 %v = load <4 x float>, ptr %p, align 16 922 %and = and i32 %arg, 3 923 %ext = extractelement <4 x float> %v, i32 %and 924 call void @use(<4 x float> %v, float %ext) 925 ret void 926 927; uselistorder directives 928 uselistorder <4 x float> %v, { 1, 0 } 929} 930