xref: /llvm-project/llvm/test/Transforms/VectorCombine/AArch64/load-extractelement-scalarization.ll (revision 1878b94568e77e51f0bc316ba5a8a6b8994b8daf)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -passes=vector-combine -mtriple=arm64-apple-darwinos -S %s | FileCheck --check-prefixes=CHECK,LIMIT-DEFAULT %s
3; RUN: opt -passes=vector-combine -mtriple=arm64-apple-darwinos -vector-combine-max-scan-instrs=2 -S %s | FileCheck --check-prefixes=CHECK,LIMIT2 %s
4
5define i32 @load_extract_idx_0(ptr %x) {
6; CHECK-LABEL: @load_extract_idx_0(
7; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr [[X:%.*]], i32 0, i32 3
8; CHECK-NEXT:    [[R:%.*]] = load i32, ptr [[TMP1]], align 4
9; CHECK-NEXT:    ret i32 [[R]]
10;
11  %lv = load <4 x i32>, ptr %x
12  %r = extractelement <4 x i32> %lv, i32 3
13  ret i32 %r
14}
15
16define i32 @vscale_load_extract_idx_0(ptr %x) {
17; CHECK-LABEL: @vscale_load_extract_idx_0(
18; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <vscale x 4 x i32>, ptr [[X:%.*]], i32 0, i32 0
19; CHECK-NEXT:    [[R:%.*]] = load i32, ptr [[TMP1]], align 16
20; CHECK-NEXT:    ret i32 [[R]]
21;
22  %lv = load <vscale x 4 x i32>, ptr %x
23  %r = extractelement <vscale x 4 x i32> %lv, i32 0
24  ret i32 %r
25}
26
27; If the original load had a smaller alignment than the scalar type, the
28; smaller alignment should be used.
29define i32 @load_extract_idx_0_small_alignment(ptr %x) {
30; CHECK-LABEL: @load_extract_idx_0_small_alignment(
31; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr [[X:%.*]], i32 0, i32 3
32; CHECK-NEXT:    [[R:%.*]] = load i32, ptr [[TMP1]], align 2
33; CHECK-NEXT:    ret i32 [[R]]
34;
35  %lv = load <4 x i32>, ptr %x, align 2
36  %r = extractelement <4 x i32> %lv, i32 3
37  ret i32 %r
38}
39
40define i32 @load_extract_idx_1(ptr %x) {
41; CHECK-LABEL: @load_extract_idx_1(
42; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr [[X:%.*]], i32 0, i32 1
43; CHECK-NEXT:    [[R:%.*]] = load i32, ptr [[TMP1]], align 4
44; CHECK-NEXT:    ret i32 [[R]]
45;
46  %lv = load <4 x i32>, ptr %x
47  %r = extractelement <4 x i32> %lv, i32 1
48  ret i32 %r
49}
50
51define i32 @load_extract_idx_2(ptr %x) {
52; CHECK-LABEL: @load_extract_idx_2(
53; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr [[X:%.*]], i32 0, i32 2
54; CHECK-NEXT:    [[R:%.*]] = load i32, ptr [[TMP1]], align 8
55; CHECK-NEXT:    ret i32 [[R]]
56;
57  %lv = load <4 x i32>, ptr %x
58  %r = extractelement <4 x i32> %lv, i32 2
59  ret i32 %r
60}
61
62define i32 @vscale_load_extract_idx_2(ptr %x) {
63; CHECK-LABEL: @vscale_load_extract_idx_2(
64; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <vscale x 4 x i32>, ptr [[X:%.*]], i32 0, i32 2
65; CHECK-NEXT:    [[R:%.*]] = load i32, ptr [[TMP1]], align 8
66; CHECK-NEXT:    ret i32 [[R]]
67;
68  %lv = load <vscale x 4 x i32>, ptr %x
69  %r = extractelement <vscale x 4 x i32> %lv, i32 2
70  ret i32 %r
71}
72
73define i32 @load_extract_idx_3(ptr %x) {
74; CHECK-LABEL: @load_extract_idx_3(
75; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr [[X:%.*]], i32 0, i32 3
76; CHECK-NEXT:    [[R:%.*]] = load i32, ptr [[TMP1]], align 4
77; CHECK-NEXT:    ret i32 [[R]]
78;
79  %lv = load <4 x i32>, ptr %x
80  %r = extractelement <4 x i32> %lv, i32 3
81  ret i32 %r
82}
83
84; Out-of-bounds index for extractelement, should not be converted to narrow
85; load, because it would introduce a dereference of a poison pointer.
86define i32 @load_extract_idx_4(ptr %x) {
87; CHECK-LABEL: @load_extract_idx_4(
88; CHECK-NEXT:    [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16
89; CHECK-NEXT:    [[R:%.*]] = extractelement <4 x i32> [[LV]], i32 4
90; CHECK-NEXT:    ret i32 [[R]]
91;
92  %lv = load <4 x i32>, ptr %x
93  %r = extractelement <4 x i32> %lv, i32 4
94  ret i32 %r
95}
96
97define i32 @vscale_load_extract_idx_4(ptr %x) {
98; CHECK-LABEL: @vscale_load_extract_idx_4(
99; CHECK-NEXT:    [[LV:%.*]] = load <vscale x 4 x i32>, ptr [[X:%.*]], align 16
100; CHECK-NEXT:    [[R:%.*]] = extractelement <vscale x 4 x i32> [[LV]], i32 4
101; CHECK-NEXT:    ret i32 [[R]]
102;
103  %lv = load <vscale x 4 x i32>, ptr %x
104  %r = extractelement <vscale x 4 x i32> %lv, i32 4
105  ret i32 %r
106}
107
108define i32 @load_extract_idx_var_i64(ptr %x, i64 %idx) {
109; CHECK-LABEL: @load_extract_idx_var_i64(
110; CHECK-NEXT:    [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16
111; CHECK-NEXT:    [[R:%.*]] = extractelement <4 x i32> [[LV]], i64 [[IDX:%.*]]
112; CHECK-NEXT:    ret i32 [[R]]
113;
114  %lv = load <4 x i32>, ptr %x
115  %r = extractelement <4 x i32> %lv, i64 %idx
116  ret i32 %r
117}
118
119declare void @maythrow() readnone
120
121define i32 @load_extract_idx_var_i64_known_valid_by_assume(ptr %x, i64 %idx) {
122; CHECK-LABEL: @load_extract_idx_var_i64_known_valid_by_assume(
123; CHECK-NEXT:  entry:
124; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IDX:%.*]], 4
125; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
126; CHECK-NEXT:    call void @maythrow()
127; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds <4 x i32>, ptr [[X:%.*]], i32 0, i64 [[IDX]]
128; CHECK-NEXT:    [[R:%.*]] = load i32, ptr [[TMP0]], align 4
129; CHECK-NEXT:    ret i32 [[R]]
130;
131entry:
132  %cmp = icmp ult i64 %idx, 4
133  call void @llvm.assume(i1 %cmp)
134  %lv = load <4 x i32>, ptr %x
135  call void @maythrow()
136  %r = extractelement <4 x i32> %lv, i64 %idx
137  ret i32 %r
138}
139
140define i32 @vscale_load_extract_idx_var_i64_known_valid_by_assume(ptr %x, i64 %idx) {
141; CHECK-LABEL: @vscale_load_extract_idx_var_i64_known_valid_by_assume(
142; CHECK-NEXT:  entry:
143; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IDX:%.*]], 4
144; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
145; CHECK-NEXT:    call void @maythrow()
146; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds <vscale x 4 x i32>, ptr [[X:%.*]], i32 0, i64 [[IDX]]
147; CHECK-NEXT:    [[R:%.*]] = load i32, ptr [[TMP0]], align 4
148; CHECK-NEXT:    ret i32 [[R]]
149;
150entry:
151  %cmp = icmp ult i64 %idx, 4
152  call void @llvm.assume(i1 %cmp)
153  %lv = load <vscale x 4 x i32>, ptr %x
154  call void @maythrow()
155  %r = extractelement <vscale x 4 x i32> %lv, i64 %idx
156  ret i32 %r
157}
158
159declare i1 @cond()
160
161define i32 @load_extract_idx_var_i64_known_valid_by_assume_in_dominating_block(ptr %x, i64 %idx, i1 %c.1) {
162; CHECK-LABEL: @load_extract_idx_var_i64_known_valid_by_assume_in_dominating_block(
163; CHECK-NEXT:  entry:
164; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IDX:%.*]], 4
165; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
166; CHECK-NEXT:    br i1 [[C_1:%.*]], label [[LOOP:%.*]], label [[EXIT:%.*]]
167; CHECK:       loop:
168; CHECK-NEXT:    call void @maythrow()
169; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds <4 x i32>, ptr [[X:%.*]], i32 0, i64 [[IDX]]
170; CHECK-NEXT:    [[R:%.*]] = load i32, ptr [[TMP0]], align 4
171; CHECK-NEXT:    [[C_2:%.*]] = call i1 @cond()
172; CHECK-NEXT:    br i1 [[C_2]], label [[LOOP]], label [[EXIT]]
173; CHECK:       exit:
174; CHECK-NEXT:    [[P:%.*]] = phi i32 [ [[R]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ]
175; CHECK-NEXT:    ret i32 [[P]]
176;
177entry:
178  %cmp = icmp ult i64 %idx, 4
179  call void @llvm.assume(i1 %cmp)
180  br i1 %c.1, label %loop, label %exit
181
182loop:
183  %lv = load <4 x i32>, ptr %x
184  call void @maythrow()
185  %r = extractelement <4 x i32> %lv, i64 %idx
186  %c.2 = call i1 @cond()
187  br i1 %c.2, label %loop, label %exit
188
189exit:
190  %p = phi i32 [ %r, %loop ], [ 0, %entry ]
191  ret i32 %p
192}
193
194define i32 @load_extract_idx_var_i64_known_valid_by_assume_in_non_dominating_block(ptr %x, i64 %idx, i1 %c.1, i1 %c.2) {
195; CHECK-LABEL: @load_extract_idx_var_i64_known_valid_by_assume_in_non_dominating_block(
196; CHECK-NEXT:  entry:
197; CHECK-NEXT:    br i1 [[C_1:%.*]], label [[ASSUME_CHECK:%.*]], label [[LOOP:%.*]]
198; CHECK:       assume_check:
199; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IDX:%.*]], 4
200; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
201; CHECK-NEXT:    br i1 [[C_2:%.*]], label [[LOOP]], label [[EXIT:%.*]]
202; CHECK:       loop:
203; CHECK-NEXT:    [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16
204; CHECK-NEXT:    call void @maythrow()
205; CHECK-NEXT:    [[R:%.*]] = extractelement <4 x i32> [[LV]], i64 [[IDX]]
206; CHECK-NEXT:    [[C_3:%.*]] = call i1 @cond()
207; CHECK-NEXT:    br i1 [[C_3]], label [[LOOP]], label [[EXIT]]
208; CHECK:       exit:
209; CHECK-NEXT:    [[P:%.*]] = phi i32 [ [[R]], [[LOOP]] ], [ 0, [[ASSUME_CHECK]] ]
210; CHECK-NEXT:    ret i32 0
211;
212entry:
213  br i1 %c.1, label %assume_check, label %loop
214
215assume_check:
216  %cmp = icmp ult i64 %idx, 4
217  call void @llvm.assume(i1 %cmp)
218  br i1 %c.2, label %loop, label %exit
219
220loop:
221  %lv = load <4 x i32>, ptr %x
222  call void @maythrow()
223  %r = extractelement <4 x i32> %lv, i64 %idx
224  %c.3 = call i1 @cond()
225  br i1 %c.3, label %loop, label %exit
226
227exit:
228  %p = phi i32 [ %r, %loop ], [ 0, %assume_check ]
229  ret i32 0
230}
231
232define i32 @load_extract_idx_var_i64_not_known_valid_by_assume_after_load(ptr %x, i64 %idx) {
233; CHECK-LABEL: @load_extract_idx_var_i64_not_known_valid_by_assume_after_load(
234; CHECK-NEXT:  entry:
235; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IDX:%.*]], 4
236; CHECK-NEXT:    [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16
237; CHECK-NEXT:    call void @maythrow()
238; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
239; CHECK-NEXT:    [[R:%.*]] = extractelement <4 x i32> [[LV]], i64 [[IDX]]
240; CHECK-NEXT:    ret i32 [[R]]
241;
242entry:
243  %cmp = icmp ult i64 %idx, 4
244  %lv = load <4 x i32>, ptr %x
245  call void @maythrow()
246  call void @llvm.assume(i1 %cmp)
247  %r = extractelement <4 x i32> %lv, i64 %idx
248  ret i32 %r
249}
250
251define i32 @load_extract_idx_var_i64_not_known_valid_by_assume(ptr %x, i64 %idx) {
252; CHECK-LABEL: @load_extract_idx_var_i64_not_known_valid_by_assume(
253; CHECK-NEXT:  entry:
254; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IDX:%.*]], 5
255; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
256; CHECK-NEXT:    [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16
257; CHECK-NEXT:    [[R:%.*]] = extractelement <4 x i32> [[LV]], i64 [[IDX]]
258; CHECK-NEXT:    ret i32 [[R]]
259;
260entry:
261  %cmp = icmp ult i64 %idx, 5
262  call void @llvm.assume(i1 %cmp)
263  %lv = load <4 x i32>, ptr %x
264  %r = extractelement <4 x i32> %lv, i64 %idx
265  ret i32 %r
266}
267
268define i32 @vscale_load_extract_idx_var_i64_not_known_valid_by_assume_0(ptr %x, i64 %idx) {
269; CHECK-LABEL: @vscale_load_extract_idx_var_i64_not_known_valid_by_assume_0(
270; CHECK-NEXT:  entry:
271; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IDX:%.*]], 5
272; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
273; CHECK-NEXT:    [[LV:%.*]] = load <vscale x 4 x i32>, ptr [[X:%.*]], align 16
274; CHECK-NEXT:    [[R:%.*]] = extractelement <vscale x 4 x i32> [[LV]], i64 [[IDX]]
275; CHECK-NEXT:    ret i32 [[R]]
276;
277entry:
278  %cmp = icmp ult i64 %idx, 5
279  call void @llvm.assume(i1 %cmp)
280  %lv = load <vscale x 4 x i32>, ptr %x
281  %r = extractelement <vscale x 4 x i32> %lv, i64 %idx
282  ret i32 %r
283}
284
285define i32 @vscale_load_extract_idx_var_i64_not_known_valid_by_assume_1(ptr %x, i64 %idx) {
286; CHECK-LABEL: @vscale_load_extract_idx_var_i64_not_known_valid_by_assume_1(
287; CHECK-NEXT:  entry:
288; CHECK-NEXT:    [[VS:%.*]] = call i64 @llvm.vscale.i64()
289; CHECK-NEXT:    [[VM:%.*]] = mul i64 [[VS]], 4
290; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IDX:%.*]], [[VM]]
291; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
292; CHECK-NEXT:    [[LV:%.*]] = load <vscale x 4 x i32>, ptr [[X:%.*]], align 16
293; CHECK-NEXT:    [[R:%.*]] = extractelement <vscale x 4 x i32> [[LV]], i64 [[IDX]]
294; CHECK-NEXT:    ret i32 [[R]]
295;
296entry:
297  %vs = call i64 @llvm.vscale.i64()
298  %vm = mul i64 %vs, 4
299  %cmp = icmp ult i64 %idx, %vm
300  call void @llvm.assume(i1 %cmp)
301  %lv = load <vscale x 4 x i32>, ptr %x
302  %r = extractelement <vscale x 4 x i32> %lv, i64 %idx
303  ret i32 %r
304}
305
306declare i64 @llvm.vscale.i64()
307declare void @llvm.assume(i1)
308
309define i32 @load_extract_idx_var_i64_known_valid_by_and(ptr %x, i64 %idx) {
310; CHECK-LABEL: @load_extract_idx_var_i64_known_valid_by_and(
311; CHECK-NEXT:  entry:
312; CHECK-NEXT:    [[IDX_FROZEN:%.*]] = freeze i64 [[IDX:%.*]]
313; CHECK-NEXT:    [[IDX_CLAMPED:%.*]] = and i64 [[IDX_FROZEN]], 3
314; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds <4 x i32>, ptr [[X:%.*]], i32 0, i64 [[IDX_CLAMPED]]
315; CHECK-NEXT:    [[R:%.*]] = load i32, ptr [[TMP0]], align 4
316; CHECK-NEXT:    ret i32 [[R]]
317;
318entry:
319  %idx.clamped = and i64 %idx, 3
320  %lv = load <4 x i32>, ptr %x
321  %r = extractelement <4 x i32> %lv, i64 %idx.clamped
322  ret i32 %r
323}
324
325define i32 @vscale_load_extract_idx_var_i64_known_valid_by_and(ptr %x, i64 %idx) {
326; CHECK-LABEL: @vscale_load_extract_idx_var_i64_known_valid_by_and(
327; CHECK-NEXT:  entry:
328; CHECK-NEXT:    [[IDX_FROZEN:%.*]] = freeze i64 [[IDX:%.*]]
329; CHECK-NEXT:    [[IDX_CLAMPED:%.*]] = and i64 [[IDX_FROZEN]], 3
330; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds <vscale x 4 x i32>, ptr [[X:%.*]], i32 0, i64 [[IDX_CLAMPED]]
331; CHECK-NEXT:    [[R:%.*]] = load i32, ptr [[TMP0]], align 4
332; CHECK-NEXT:    ret i32 [[R]]
333;
334entry:
335  %idx.clamped = and i64 %idx, 3
336  %lv = load <vscale x 4 x i32>, ptr %x
337  %r = extractelement <vscale x 4 x i32> %lv, i64 %idx.clamped
338  ret i32 %r
339}
340
341define i32 @load_extract_idx_var_i64_known_valid_by_and_noundef(ptr %x, i64 noundef %idx) {
342; CHECK-LABEL: @load_extract_idx_var_i64_known_valid_by_and_noundef(
343; CHECK-NEXT:  entry:
344; CHECK-NEXT:    [[IDX_CLAMPED:%.*]] = and i64 [[IDX:%.*]], 3
345; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds <4 x i32>, ptr [[X:%.*]], i32 0, i64 [[IDX_CLAMPED]]
346; CHECK-NEXT:    [[R:%.*]] = load i32, ptr [[TMP0]], align 4
347; CHECK-NEXT:    ret i32 [[R]]
348;
349entry:
350  %idx.clamped = and i64 %idx, 3
351  %lv = load <4 x i32>, ptr %x
352  %r = extractelement <4 x i32> %lv, i64 %idx.clamped
353  ret i32 %r
354}
355
356define i32 @load_extract_idx_var_i64_not_known_valid_by_and(ptr %x, i64 %idx) {
357; CHECK-LABEL: @load_extract_idx_var_i64_not_known_valid_by_and(
358; CHECK-NEXT:  entry:
359; CHECK-NEXT:    [[IDX_CLAMPED:%.*]] = and i64 [[IDX:%.*]], 4
360; CHECK-NEXT:    [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16
361; CHECK-NEXT:    [[R:%.*]] = extractelement <4 x i32> [[LV]], i64 [[IDX_CLAMPED]]
362; CHECK-NEXT:    ret i32 [[R]]
363;
364entry:
365  %idx.clamped = and i64 %idx, 4
366  %lv = load <4 x i32>, ptr %x
367  %r = extractelement <4 x i32> %lv, i64 %idx.clamped
368  ret i32 %r
369}
370
371define i32 @vscale_load_extract_idx_var_i64_not_known_valid_by_and(ptr %x, i64 %idx) {
372; CHECK-LABEL: @vscale_load_extract_idx_var_i64_not_known_valid_by_and(
373; CHECK-NEXT:  entry:
374; CHECK-NEXT:    [[IDX_CLAMPED:%.*]] = and i64 [[IDX:%.*]], 4
375; CHECK-NEXT:    [[LV:%.*]] = load <vscale x 4 x i32>, ptr [[X:%.*]], align 16
376; CHECK-NEXT:    [[R:%.*]] = extractelement <vscale x 4 x i32> [[LV]], i64 [[IDX_CLAMPED]]
377; CHECK-NEXT:    ret i32 [[R]]
378;
379entry:
380  %idx.clamped = and i64 %idx, 4
381  %lv = load <vscale x 4 x i32>, ptr %x
382  %r = extractelement <vscale x 4 x i32> %lv, i64 %idx.clamped
383  ret i32 %r
384}
385
386define i32 @load_extract_idx_var_i64_known_valid_by_urem(ptr %x, i64 %idx) {
387; CHECK-LABEL: @load_extract_idx_var_i64_known_valid_by_urem(
388; CHECK-NEXT:  entry:
389; CHECK-NEXT:    [[IDX_FROZEN:%.*]] = freeze i64 [[IDX:%.*]]
390; CHECK-NEXT:    [[IDX_CLAMPED:%.*]] = urem i64 [[IDX_FROZEN]], 4
391; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds <4 x i32>, ptr [[X:%.*]], i32 0, i64 [[IDX_CLAMPED]]
392; CHECK-NEXT:    [[R:%.*]] = load i32, ptr [[TMP0]], align 4
393; CHECK-NEXT:    ret i32 [[R]]
394;
395entry:
396  %idx.clamped = urem i64 %idx, 4
397  %lv = load <4 x i32>, ptr %x
398  %r = extractelement <4 x i32> %lv, i64 %idx.clamped
399  ret i32 %r
400}
401
402define i32 @vscale_load_extract_idx_var_i64_known_valid_by_urem(ptr %x, i64 %idx) {
403; CHECK-LABEL: @vscale_load_extract_idx_var_i64_known_valid_by_urem(
404; CHECK-NEXT:  entry:
405; CHECK-NEXT:    [[IDX_FROZEN:%.*]] = freeze i64 [[IDX:%.*]]
406; CHECK-NEXT:    [[IDX_CLAMPED:%.*]] = urem i64 [[IDX_FROZEN]], 4
407; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds <vscale x 4 x i32>, ptr [[X:%.*]], i32 0, i64 [[IDX_CLAMPED]]
408; CHECK-NEXT:    [[R:%.*]] = load i32, ptr [[TMP0]], align 4
409; CHECK-NEXT:    ret i32 [[R]]
410;
411entry:
412  %idx.clamped = urem i64 %idx, 4
413  %lv = load <vscale x 4 x i32>, ptr %x
414  %r = extractelement <vscale x 4 x i32> %lv, i64 %idx.clamped
415  ret i32 %r
416}
417
418define i32 @load_extract_idx_var_i64_known_valid_by_urem_noundef(ptr %x, i64 noundef %idx) {
419; CHECK-LABEL: @load_extract_idx_var_i64_known_valid_by_urem_noundef(
420; CHECK-NEXT:  entry:
421; CHECK-NEXT:    [[IDX_CLAMPED:%.*]] = urem i64 [[IDX:%.*]], 4
422; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds <4 x i32>, ptr [[X:%.*]], i32 0, i64 [[IDX_CLAMPED]]
423; CHECK-NEXT:    [[R:%.*]] = load i32, ptr [[TMP0]], align 4
424; CHECK-NEXT:    ret i32 [[R]]
425;
426entry:
427  %idx.clamped = urem i64 %idx, 4
428  %lv = load <4 x i32>, ptr %x
429  %r = extractelement <4 x i32> %lv, i64 %idx.clamped
430  ret i32 %r
431}
432
433define i32 @load_extract_idx_var_i64_not_known_valid_by_urem(ptr %x, i64 %idx) {
434; CHECK-LABEL: @load_extract_idx_var_i64_not_known_valid_by_urem(
435; CHECK-NEXT:  entry:
436; CHECK-NEXT:    [[IDX_CLAMPED:%.*]] = urem i64 [[IDX:%.*]], 5
437; CHECK-NEXT:    [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16
438; CHECK-NEXT:    [[R:%.*]] = extractelement <4 x i32> [[LV]], i64 [[IDX_CLAMPED]]
439; CHECK-NEXT:    ret i32 [[R]]
440;
441entry:
442  %idx.clamped = urem i64 %idx, 5
443  %lv = load <4 x i32>, ptr %x
444  %r = extractelement <4 x i32> %lv, i64 %idx.clamped
445  ret i32 %r
446}
447
448define i32 @vscale_load_extract_idx_var_i64_not_known_valid_by_urem(ptr %x, i64 %idx) {
449; CHECK-LABEL: @vscale_load_extract_idx_var_i64_not_known_valid_by_urem(
450; CHECK-NEXT:  entry:
451; CHECK-NEXT:    [[IDX_CLAMPED:%.*]] = urem i64 [[IDX:%.*]], 5
452; CHECK-NEXT:    [[LV:%.*]] = load <vscale x 4 x i32>, ptr [[X:%.*]], align 16
453; CHECK-NEXT:    [[R:%.*]] = extractelement <vscale x 4 x i32> [[LV]], i64 [[IDX_CLAMPED]]
454; CHECK-NEXT:    ret i32 [[R]]
455;
456entry:
457  %idx.clamped = urem i64 %idx, 5
458  %lv = load <vscale x 4 x i32>, ptr %x
459  %r = extractelement <vscale x 4 x i32> %lv, i64 %idx.clamped
460  ret i32 %r
461}
462
463define i32 @load_extract_idx_var_i32(ptr %x, i32 %idx) {
464; CHECK-LABEL: @load_extract_idx_var_i32(
465; CHECK-NEXT:    [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16
466; CHECK-NEXT:    [[R:%.*]] = extractelement <4 x i32> [[LV]], i32 [[IDX:%.*]]
467; CHECK-NEXT:    ret i32 [[R]]
468;
469  %lv = load <4 x i32>, ptr %x
470  %r = extractelement <4 x i32> %lv, i32 %idx
471  ret i32 %r
472}
473
474declare void @clobber()
475
476define i32 @load_extract_clobber_call_before(ptr %x) {
477; CHECK-LABEL: @load_extract_clobber_call_before(
478; CHECK-NEXT:    call void @clobber()
479; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr [[X:%.*]], i32 0, i32 2
480; CHECK-NEXT:    [[R:%.*]] = load i32, ptr [[TMP1]], align 8
481; CHECK-NEXT:    ret i32 [[R]]
482;
483  call void @clobber()
484  %lv = load <4 x i32>, ptr %x
485  %r = extractelement <4 x i32> %lv, i32 2
486  ret i32 %r
487}
488
489define i32 @load_extract_clobber_call_between(ptr %x) {
490; CHECK-LABEL: @load_extract_clobber_call_between(
491; CHECK-NEXT:    [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16
492; CHECK-NEXT:    call void @clobber()
493; CHECK-NEXT:    [[R:%.*]] = extractelement <4 x i32> [[LV]], i32 2
494; CHECK-NEXT:    ret i32 [[R]]
495;
496  %lv = load <4 x i32>, ptr %x
497  call void @clobber()
498  %r = extractelement <4 x i32> %lv, i32 2
499  ret i32 %r
500}
501
502define i32 @load_extract_clobber_call_after(ptr %x) {
503; CHECK-LABEL: @load_extract_clobber_call_after(
504; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr [[X:%.*]], i32 0, i32 2
505; CHECK-NEXT:    [[R:%.*]] = load i32, ptr [[TMP1]], align 8
506; CHECK-NEXT:    call void @clobber()
507; CHECK-NEXT:    ret i32 [[R]]
508;
509  %lv = load <4 x i32>, ptr %x
510  %r = extractelement <4 x i32> %lv, i32 2
511  call void @clobber()
512  ret i32 %r
513}
514
515define i32 @load_extract_clobber_store_before(ptr %x, ptr %y) {
516; CHECK-LABEL: @load_extract_clobber_store_before(
517; CHECK-NEXT:    store i8 0, ptr [[Y:%.*]], align 1
518; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr [[X:%.*]], i32 0, i32 2
519; CHECK-NEXT:    [[R:%.*]] = load i32, ptr [[TMP1]], align 8
520; CHECK-NEXT:    ret i32 [[R]]
521;
522  store i8 0, ptr %y
523  %lv = load <4 x i32>, ptr %x
524  %r = extractelement <4 x i32> %lv, i32 2
525  ret i32 %r
526}
527
528define i32 @load_extract_clobber_store_between(ptr %x, ptr %y) {
529; CHECK-LABEL: @load_extract_clobber_store_between(
530; CHECK-NEXT:    [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16
531; CHECK-NEXT:    store i8 0, ptr [[Y:%.*]], align 1
532; CHECK-NEXT:    [[R:%.*]] = extractelement <4 x i32> [[LV]], i32 2
533; CHECK-NEXT:    ret i32 [[R]]
534;
535  %lv = load <4 x i32>, ptr %x
536  store i8 0, ptr %y
537  %r = extractelement <4 x i32> %lv, i32 2
538  ret i32 %r
539}
540
541define i32 @load_extract_clobber_store_between_limit(ptr %x, ptr %y, <8 x i32> %z) {
542; CHECK-LABEL: @load_extract_clobber_store_between_limit(
543; CHECK-NEXT:    [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16
544; CHECK-NEXT:    [[SHIFT:%.*]] = shufflevector <8 x i32> [[Z1:%.*]], <8 x i32> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
545; CHECK-NEXT:    [[TMP1:%.*]] = add <8 x i32> [[Z1]], [[SHIFT]]
546; CHECK-NEXT:    [[SHIFT1:%.*]] = shufflevector <8 x i32> [[Z1]], <8 x i32> poison, <8 x i32> <i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
547; CHECK-NEXT:    [[TMP2:%.*]] = add <8 x i32> [[TMP1]], [[SHIFT1]]
548; CHECK-NEXT:    [[SHIFT2:%.*]] = shufflevector <8 x i32> [[Z1]], <8 x i32> poison, <8 x i32> <i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
549; CHECK-NEXT:    [[TMP3:%.*]] = add <8 x i32> [[TMP2]], [[SHIFT2]]
550; CHECK-NEXT:    [[SHIFT3:%.*]] = shufflevector <8 x i32> [[Z1]], <8 x i32> poison, <8 x i32> <i32 4, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
551; CHECK-NEXT:    [[Z:%.*]] = add <8 x i32> [[TMP3]], [[SHIFT3]]
552; CHECK-NEXT:    [[Z_0:%.*]] = extractelement <8 x i32> [[Z]], i32 0
553; CHECK-NEXT:    store i8 0, ptr [[Y:%.*]], align 1
554; CHECK-NEXT:    [[R:%.*]] = extractelement <4 x i32> [[LV]], i32 2
555; CHECK-NEXT:    [[ADD_4:%.*]] = add i32 [[Z_0]], [[R]]
556; CHECK-NEXT:    ret i32 [[ADD_4]]
557;
558  %lv = load <4 x i32>, ptr %x
559  %z.0 = extractelement <8 x i32> %z, i32 0
560  %z.1 = extractelement <8 x i32> %z, i32 1
561  %add.0 = add i32 %z.0, %z.1
562  %z.2 = extractelement <8 x i32> %z, i32 2
563  %add.1 = add i32 %add.0, %z.2
564  %z.3 = extractelement <8 x i32> %z, i32 3
565  %add.2 = add i32 %add.1, %z.3
566  %z.4 = extractelement <8 x i32> %z, i32 4
567  %add.3 = add i32 %add.2, %z.4
568  store i8 0, ptr %y
569  %r = extractelement <4 x i32> %lv, i32 2
570  %add.4 = add i32 %add.3, %r
571  ret i32 %add.4
572}
573
574define i32 @load_extract_clobber_store_after_limit(ptr %x, ptr %y, <8 x i32> %z) {
575; LIMIT-DEFAULT-LABEL: @load_extract_clobber_store_after_limit(
576; LIMIT-DEFAULT-NEXT:    [[SHIFT:%.*]] = shufflevector <8 x i32> [[Z1:%.*]], <8 x i32> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
577; LIMIT-DEFAULT-NEXT:    [[TMP4:%.*]] = add <8 x i32> [[Z1]], [[SHIFT]]
578; LIMIT-DEFAULT-NEXT:    [[SHIFT1:%.*]] = shufflevector <8 x i32> [[Z1]], <8 x i32> poison, <8 x i32> <i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
579; LIMIT-DEFAULT-NEXT:    [[TMP2:%.*]] = add <8 x i32> [[TMP4]], [[SHIFT1]]
580; LIMIT-DEFAULT-NEXT:    [[SHIFT2:%.*]] = shufflevector <8 x i32> [[Z1]], <8 x i32> poison, <8 x i32> <i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
581; LIMIT-DEFAULT-NEXT:    [[TMP3:%.*]] = add <8 x i32> [[TMP2]], [[SHIFT2]]
582; LIMIT-DEFAULT-NEXT:    [[SHIFT3:%.*]] = shufflevector <8 x i32> [[Z1]], <8 x i32> poison, <8 x i32> <i32 4, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
583; LIMIT-DEFAULT-NEXT:    [[Z:%.*]] = add <8 x i32> [[TMP3]], [[SHIFT3]]
584; LIMIT-DEFAULT-NEXT:    [[Z_0:%.*]] = extractelement <8 x i32> [[Z]], i32 0
585; LIMIT-DEFAULT-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr [[X:%.*]], i32 0, i32 2
586; LIMIT-DEFAULT-NEXT:    [[R:%.*]] = load i32, ptr [[TMP1]], align 8
587; LIMIT-DEFAULT-NEXT:    store i8 0, ptr [[Y:%.*]], align 1
588; LIMIT-DEFAULT-NEXT:    [[ADD_4:%.*]] = add i32 [[Z_0]], [[R]]
589; LIMIT-DEFAULT-NEXT:    ret i32 [[ADD_4]]
590;
591; LIMIT2-LABEL: @load_extract_clobber_store_after_limit(
592; LIMIT2-NEXT:    [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16
593; LIMIT2-NEXT:    [[SHIFT:%.*]] = shufflevector <8 x i32> [[Z1:%.*]], <8 x i32> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
594; LIMIT2-NEXT:    [[TMP1:%.*]] = add <8 x i32> [[Z1]], [[SHIFT]]
595; LIMIT2-NEXT:    [[SHIFT1:%.*]] = shufflevector <8 x i32> [[Z1]], <8 x i32> poison, <8 x i32> <i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
596; LIMIT2-NEXT:    [[TMP2:%.*]] = add <8 x i32> [[TMP1]], [[SHIFT1]]
597; LIMIT2-NEXT:    [[SHIFT2:%.*]] = shufflevector <8 x i32> [[Z1]], <8 x i32> poison, <8 x i32> <i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
598; LIMIT2-NEXT:    [[TMP3:%.*]] = add <8 x i32> [[TMP2]], [[SHIFT2]]
599; LIMIT2-NEXT:    [[SHIFT3:%.*]] = shufflevector <8 x i32> [[Z1]], <8 x i32> poison, <8 x i32> <i32 4, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
600; LIMIT2-NEXT:    [[Z:%.*]] = add <8 x i32> [[TMP3]], [[SHIFT3]]
601; LIMIT2-NEXT:    [[Z_0:%.*]] = extractelement <8 x i32> [[Z]], i32 0
602; LIMIT2-NEXT:    [[R:%.*]] = extractelement <4 x i32> [[LV]], i32 2
603; LIMIT2-NEXT:    store i8 0, ptr [[Y:%.*]], align 1
604; LIMIT2-NEXT:    [[ADD_4:%.*]] = add i32 [[Z_0]], [[R]]
605; LIMIT2-NEXT:    ret i32 [[ADD_4]]
606;
607  %lv = load <4 x i32>, ptr %x
608  %z.0 = extractelement <8 x i32> %z, i32 0
609  %z.1 = extractelement <8 x i32> %z, i32 1
610  %add.0 = add i32 %z.0, %z.1
611  %z.2 = extractelement <8 x i32> %z, i32 2
612  %add.1 = add i32 %add.0, %z.2
613  %z.3 = extractelement <8 x i32> %z, i32 3
614  %add.2 = add i32 %add.1, %z.3
615  %z.4 = extractelement <8 x i32> %z, i32 4
616  %add.3 = add i32 %add.2, %z.4
617  %r = extractelement <4 x i32> %lv, i32 2
618  store i8 0, ptr %y
619  %add.4 = add i32 %add.3, %r
620  ret i32 %add.4
621}
622
623declare void @use.v4i32(<4 x i32>)
624
625define i32 @load_extract_idx_different_bbs(ptr %x, i1 %c) {
626; CHECK-LABEL: @load_extract_idx_different_bbs(
627; CHECK-NEXT:    [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16
628; CHECK-NEXT:    br i1 [[C:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]]
629; CHECK:       then:
630; CHECK-NEXT:    [[R:%.*]] = extractelement <4 x i32> [[LV]], i32 1
631; CHECK-NEXT:    ret i32 [[R]]
632; CHECK:       else:
633; CHECK-NEXT:    call void @use.v4i32(<4 x i32> [[LV]])
634; CHECK-NEXT:    ret i32 20
635;
636  %lv = load <4 x i32>, ptr %x
637  br i1 %c, label %then, label %else
638
639then:
640  %r = extractelement <4 x i32> %lv, i32 1
641  ret i32 %r
642
643else:
644  call void @use.v4i32(<4 x i32> %lv)
645  ret i32 20
646}
647
648define i31 @load_with_non_power_of_2_element_type(ptr %x) {
649; CHECK-LABEL: @load_with_non_power_of_2_element_type(
650; CHECK-NEXT:    [[LV:%.*]] = load <4 x i31>, ptr [[X:%.*]], align 16
651; CHECK-NEXT:    [[R:%.*]] = extractelement <4 x i31> [[LV]], i32 1
652; CHECK-NEXT:    ret i31 [[R]]
653;
654  %lv = load <4 x i31>, ptr %x
655  %r = extractelement <4 x i31> %lv, i32 1
656  ret i31 %r
657}
658
659define i1 @load_with_non_power_of_2_element_type_2(ptr %x) {
660; CHECK-LABEL: @load_with_non_power_of_2_element_type_2(
661; CHECK-NEXT:    [[LV:%.*]] = load <8 x i1>, ptr [[X:%.*]], align 1
662; CHECK-NEXT:    [[R:%.*]] = extractelement <8 x i1> [[LV]], i32 1
663; CHECK-NEXT:    ret i1 [[R]]
664;
665  %lv = load <8 x i1>, ptr %x
666  %r = extractelement <8 x i1> %lv, i32 1
667  ret i1 %r
668}
669
670; Scalarizing the load for multiple constant indices may not be profitable.
671define i32 @load_multiple_extracts_with_constant_idx(ptr %x) {
672; CHECK-LABEL: @load_multiple_extracts_with_constant_idx(
673; CHECK-NEXT:    [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16
674; CHECK-NEXT:    [[SHIFT:%.*]] = shufflevector <4 x i32> [[LV]], <4 x i32> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
675; CHECK-NEXT:    [[TMP1:%.*]] = add <4 x i32> [[LV]], [[SHIFT]]
676; CHECK-NEXT:    [[RES:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0
677; CHECK-NEXT:    ret i32 [[RES]]
678;
679  %lv = load <4 x i32>, ptr %x
680  %e.0 = extractelement <4 x i32> %lv, i32 0
681  %e.1 = extractelement <4 x i32> %lv, i32 1
682  %res = add i32 %e.0, %e.1
683  ret i32 %res
684}
685
686; Scalarizing the load for multiple extracts is profitable in this case,
687; because the vector large vector requires 2 vector registers.
688define i32 @load_multiple_extracts_with_constant_idx_profitable(ptr %x) {
689; CHECK-LABEL: @load_multiple_extracts_with_constant_idx_profitable(
690; CHECK-NEXT:    [[LV:%.*]] = load <8 x i32>, ptr [[X:%.*]], align 16
691; CHECK-NEXT:    [[SHIFT:%.*]] = shufflevector <8 x i32> [[LV]], <8 x i32> poison, <8 x i32> <i32 6, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
692; CHECK-NEXT:    [[TMP1:%.*]] = add <8 x i32> [[LV]], [[SHIFT]]
693; CHECK-NEXT:    [[RES:%.*]] = extractelement <8 x i32> [[TMP1]], i32 0
694; CHECK-NEXT:    ret i32 [[RES]]
695;
696  %lv = load <8 x i32>, ptr %x, align 16
697  %e.0 = extractelement <8 x i32> %lv, i32 0
698  %e.1 = extractelement <8 x i32> %lv, i32 6
699  %res = add i32 %e.0, %e.1
700  ret i32 %res
701}
702
703; Scalarizing may or may not be profitable, depending on the target.
704define i32 @load_multiple_2_with_variable_indices(ptr %x, i64 %idx.0, i64 %idx.1) {
705; CHECK-LABEL: @load_multiple_2_with_variable_indices(
706; CHECK-NEXT:    [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16
707; CHECK-NEXT:    [[E_0:%.*]] = extractelement <4 x i32> [[LV]], i64 [[IDX_0:%.*]]
708; CHECK-NEXT:    [[E_1:%.*]] = extractelement <4 x i32> [[LV]], i64 [[IDX_1:%.*]]
709; CHECK-NEXT:    [[RES:%.*]] = add i32 [[E_0]], [[E_1]]
710; CHECK-NEXT:    ret i32 [[RES]]
711;
712  %lv = load <4 x i32>, ptr %x
713  %e.0 = extractelement <4 x i32> %lv, i64 %idx.0
714  %e.1 = extractelement <4 x i32> %lv, i64 %idx.1
715  %res = add i32 %e.0, %e.1
716  ret i32 %res
717}
718
719define i32 @load_4_extracts_with_variable_indices_short_vector(ptr %x, i64 %idx.0, i64 %idx.1, i64 %idx.2, i64 %idx.3) {
720; CHECK-LABEL: @load_4_extracts_with_variable_indices_short_vector(
721; CHECK-NEXT:    [[LV:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16
722; CHECK-NEXT:    [[E_0:%.*]] = extractelement <4 x i32> [[LV]], i64 [[IDX_0:%.*]]
723; CHECK-NEXT:    [[E_1:%.*]] = extractelement <4 x i32> [[LV]], i64 [[IDX_1:%.*]]
724; CHECK-NEXT:    [[E_2:%.*]] = extractelement <4 x i32> [[LV]], i64 [[IDX_2:%.*]]
725; CHECK-NEXT:    [[E_3:%.*]] = extractelement <4 x i32> [[LV]], i64 [[IDX_3:%.*]]
726; CHECK-NEXT:    [[RES_0:%.*]] = add i32 [[E_0]], [[E_1]]
727; CHECK-NEXT:    [[RES_1:%.*]] = add i32 [[RES_0]], [[E_2]]
728; CHECK-NEXT:    [[RES_2:%.*]] = add i32 [[RES_1]], [[E_3]]
729; CHECK-NEXT:    ret i32 [[RES_2]]
730;
731  %lv = load <4 x i32>, ptr %x
732  %e.0 = extractelement <4 x i32> %lv, i64 %idx.0
733  %e.1 = extractelement <4 x i32> %lv, i64 %idx.1
734  %e.2 = extractelement <4 x i32> %lv, i64 %idx.2
735  %e.3 = extractelement <4 x i32> %lv, i64 %idx.3
736  %res.0 = add i32 %e.0, %e.1
737  %res.1 = add i32 %res.0, %e.2
738  %res.2 = add i32 %res.1, %e.3
739  ret i32 %res.2
740}
741
742define i32 @load_multiple_extracts_with_variable_indices_large_vector_only_first_valid(ptr %x, i64 %idx.0, i64 %idx.1) {
743; CHECK-LABEL: @load_multiple_extracts_with_variable_indices_large_vector_only_first_valid(
744; CHECK-NEXT:    [[CMP_IDX_0:%.*]] = icmp ult i64 [[IDX_0:%.*]], 16
745; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP_IDX_0]])
746; CHECK-NEXT:    [[LV:%.*]] = load <16 x i32>, ptr [[X:%.*]], align 64
747; CHECK-NEXT:    [[E_0:%.*]] = extractelement <16 x i32> [[LV]], i64 [[IDX_0]]
748; CHECK-NEXT:    [[E_1:%.*]] = extractelement <16 x i32> [[LV]], i64 [[IDX_1:%.*]]
749; CHECK-NEXT:    [[RES:%.*]] = add i32 [[E_0]], [[E_1]]
750; CHECK-NEXT:    ret i32 [[RES]]
751;
752  %cmp.idx.0 = icmp ult i64 %idx.0, 16
753  call void @llvm.assume(i1 %cmp.idx.0)
754
755  %lv = load <16 x i32>, ptr %x
756  %e.0 = extractelement <16 x i32> %lv, i64 %idx.0
757  %e.1 = extractelement <16 x i32> %lv, i64 %idx.1
758  %res = add i32 %e.0, %e.1
759  ret i32 %res
760}
761
762define i32 @load_multiple_extracts_with_variable_indices_large_vector_only_all_valid(ptr %x, i64 %idx.0, i64 %idx.1) {
763; CHECK-LABEL: @load_multiple_extracts_with_variable_indices_large_vector_only_all_valid(
764; CHECK-NEXT:    [[CMP_IDX_0:%.*]] = icmp ult i64 [[IDX_0:%.*]], 16
765; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP_IDX_0]])
766; CHECK-NEXT:    [[CMP_IDX_1:%.*]] = icmp ult i64 [[IDX_1:%.*]], 16
767; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP_IDX_1]])
768; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <16 x i32>, ptr [[X:%.*]], i32 0, i64 [[IDX_0]]
769; CHECK-NEXT:    [[E_0:%.*]] = load i32, ptr [[TMP1]], align 4
770; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds <16 x i32>, ptr [[X]], i32 0, i64 [[IDX_1]]
771; CHECK-NEXT:    [[E_1:%.*]] = load i32, ptr [[TMP2]], align 4
772; CHECK-NEXT:    [[RES:%.*]] = add i32 [[E_0]], [[E_1]]
773; CHECK-NEXT:    ret i32 [[RES]]
774;
775  %cmp.idx.0 = icmp ult i64 %idx.0, 16
776  call void @llvm.assume(i1 %cmp.idx.0)
777  %cmp.idx.1 = icmp ult i64 %idx.1, 16
778  call void @llvm.assume(i1 %cmp.idx.1)
779
780  %lv = load <16 x i32>, ptr %x
781  %e.0 = extractelement <16 x i32> %lv, i64 %idx.0
782  %e.1 = extractelement <16 x i32> %lv, i64 %idx.1
783  %res = add i32 %e.0, %e.1
784  ret i32 %res
785}
786
787define i32 @load_multiple_extracts_with_variable_indices_large_vector_only_first_valid_by_and(ptr %x, i64 %idx.0, i64 %idx.1) {
788; CHECK-LABEL: @load_multiple_extracts_with_variable_indices_large_vector_only_first_valid_by_and(
789; CHECK-NEXT:    [[IDX_0_CLAMPED:%.*]] = and i64 [[IDX_0:%.*]], 15
790; CHECK-NEXT:    [[LV:%.*]] = load <16 x i32>, ptr [[X:%.*]], align 64
791; CHECK-NEXT:    [[E_0:%.*]] = extractelement <16 x i32> [[LV]], i64 [[IDX_0_CLAMPED]]
792; CHECK-NEXT:    [[E_1:%.*]] = extractelement <16 x i32> [[LV]], i64 [[IDX_1:%.*]]
793; CHECK-NEXT:    [[RES:%.*]] = add i32 [[E_0]], [[E_1]]
794; CHECK-NEXT:    ret i32 [[RES]]
795;
796  %idx.0.clamped = and i64 %idx.0, 15
797
798  %lv = load <16 x i32>, ptr %x
799  %e.0 = extractelement <16 x i32> %lv, i64 %idx.0.clamped
800  %e.1 = extractelement <16 x i32> %lv, i64 %idx.1
801  %res = add i32 %e.0, %e.1
802  ret i32 %res
803}
804
805define i32 @load_multiple_extracts_with_variable_indices_large_vector_all_valid_by_and(ptr %x, i64 %idx.0, i64 %idx.1) {
806; CHECK-LABEL: @load_multiple_extracts_with_variable_indices_large_vector_all_valid_by_and(
807; CHECK-NEXT:    [[IDX_0_FROZEN:%.*]] = freeze i64 [[IDX_0:%.*]]
808; CHECK-NEXT:    [[IDX_0_CLAMPED:%.*]] = and i64 [[IDX_0_FROZEN]], 15
809; CHECK-NEXT:    [[IDX_1_FROZEN:%.*]] = freeze i64 [[IDX_1:%.*]]
810; CHECK-NEXT:    [[IDX_1_CLAMPED:%.*]] = and i64 [[IDX_1_FROZEN]], 15
811; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <16 x i32>, ptr [[X:%.*]], i32 0, i64 [[IDX_0_CLAMPED]]
812; CHECK-NEXT:    [[E_0:%.*]] = load i32, ptr [[TMP1]], align 4
813; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds <16 x i32>, ptr [[X]], i32 0, i64 [[IDX_1_CLAMPED]]
814; CHECK-NEXT:    [[E_1:%.*]] = load i32, ptr [[TMP2]], align 4
815; CHECK-NEXT:    [[RES:%.*]] = add i32 [[E_0]], [[E_1]]
816; CHECK-NEXT:    ret i32 [[RES]]
817;
818  %idx.0.clamped = and i64 %idx.0, 15
819  %idx.1.clamped = and i64 %idx.1, 15
820
821  %lv = load <16 x i32>, ptr %x
822  %e.0 = extractelement <16 x i32> %lv, i64 %idx.0.clamped
823  %e.1 = extractelement <16 x i32> %lv, i64 %idx.1.clamped
824  %res = add i32 %e.0, %e.1
825  ret i32 %res
826}
827
828define i32 @load_multiple_extracts_with_unique_variable_indices_large_vector_valid_by_and(ptr %x, ptr %y, i64 %idx) {
829; LIMIT-DEFAULT-LABEL: @load_multiple_extracts_with_unique_variable_indices_large_vector_valid_by_and(
830; LIMIT-DEFAULT-NEXT:    [[IDX_FROZEN:%.*]] = freeze i64 [[IDX:%.*]]
831; LIMIT-DEFAULT-NEXT:    [[IDX_CLAMPED:%.*]] = and i64 [[IDX_FROZEN]], 15
832; LIMIT-DEFAULT-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <16 x i32>, ptr [[X:%.*]], i32 0, i64 [[IDX_CLAMPED]]
833; LIMIT-DEFAULT-NEXT:    [[E_0:%.*]] = load i32, ptr [[TMP1]], align 4
834; LIMIT-DEFAULT-NEXT:    [[TMP2:%.*]] = getelementptr inbounds <16 x i32>, ptr [[Y:%.*]], i32 0, i64 [[IDX_CLAMPED]]
835; LIMIT-DEFAULT-NEXT:    [[E_1:%.*]] = load i32, ptr [[TMP2]], align 4
836; LIMIT-DEFAULT-NEXT:    [[RES:%.*]] = add i32 [[E_0]], [[E_1]]
837; LIMIT-DEFAULT-NEXT:    ret i32 [[RES]]
838;
839; LIMIT2-LABEL: @load_multiple_extracts_with_unique_variable_indices_large_vector_valid_by_and(
840; LIMIT2-NEXT:    [[IDX_FROZEN:%.*]] = freeze i64 [[IDX:%.*]]
841; LIMIT2-NEXT:    [[IDX_CLAMPED:%.*]] = and i64 [[IDX_FROZEN]], 15
842; LIMIT2-NEXT:    [[LY:%.*]] = load <16 x i32>, ptr [[Y:%.*]], align 64
843; LIMIT2-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <16 x i32>, ptr [[X:%.*]], i32 0, i64 [[IDX_CLAMPED]]
844; LIMIT2-NEXT:    [[E_0:%.*]] = load i32, ptr [[TMP1]], align 4
845; LIMIT2-NEXT:    [[E_1:%.*]] = extractelement <16 x i32> [[LY]], i64 [[IDX_CLAMPED]]
846; LIMIT2-NEXT:    [[RES:%.*]] = add i32 [[E_0]], [[E_1]]
847; LIMIT2-NEXT:    ret i32 [[RES]]
848;
849  %idx.clamped = and i64 %idx, 15
850
851  %lx = load <16 x i32>, ptr %x
852  %ly = load <16 x i32>, ptr %y
853  %e.0 = extractelement <16 x i32> %lx, i64 %idx.clamped
854  %e.1 = extractelement <16 x i32> %ly, i64 %idx.clamped
855  %res = add i32 %e.0, %e.1
856  ret i32 %res
857}
858
859define i32 @load_multiple_extracts_with_variable_indices_large_vector_all_valid_by_and_some_noundef(ptr %x, i64 %idx.0, i64 noundef %idx.1) {
860; CHECK-LABEL: @load_multiple_extracts_with_variable_indices_large_vector_all_valid_by_and_some_noundef(
861; CHECK-NEXT:    [[IDX_0_FROZEN:%.*]] = freeze i64 [[IDX_0:%.*]]
862; CHECK-NEXT:    [[IDX_0_CLAMPED:%.*]] = and i64 [[IDX_0_FROZEN]], 15
863; CHECK-NEXT:    [[IDX_1_CLAMPED:%.*]] = and i64 [[IDX_1:%.*]], 15
864; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <16 x i32>, ptr [[X:%.*]], i32 0, i64 [[IDX_0_CLAMPED]]
865; CHECK-NEXT:    [[E_0:%.*]] = load i32, ptr [[TMP1]], align 4
866; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds <16 x i32>, ptr [[X]], i32 0, i64 [[IDX_1_CLAMPED]]
867; CHECK-NEXT:    [[E_1:%.*]] = load i32, ptr [[TMP2]], align 4
868; CHECK-NEXT:    [[RES:%.*]] = add i32 [[E_0]], [[E_1]]
869; CHECK-NEXT:    ret i32 [[RES]]
870;
871  %idx.0.clamped = and i64 %idx.0, 15
872  %idx.1.clamped = and i64 %idx.1, 15
873
874  %lv = load <16 x i32>, ptr %x
875  %e.0 = extractelement <16 x i32> %lv, i64 %idx.0.clamped
876  %e.1 = extractelement <16 x i32> %lv, i64 %idx.1.clamped
877  %res = add i32 %e.0, %e.1
878  ret i32 %res
879}
880
881; Test case from PR51992.
882define i8 @load_extract_safe_due_to_branch_on_poison(<8 x i8> %in, ptr %src) {
883; CHECK-LABEL: @load_extract_safe_due_to_branch_on_poison(
884; CHECK-NEXT:  entry:
885; CHECK-NEXT:    [[EXT_IDX:%.*]] = extractelement <8 x i8> [[IN:%.*]], i32 0
886; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[EXT_IDX]], 99
887; CHECK-NEXT:    br i1 [[CMP]], label [[THEN:%.*]], label [[EXIT:%.*]]
888; CHECK:       then:
889; CHECK-NEXT:    br label [[EXIT]]
890; CHECK:       exit:
891; CHECK-NEXT:    ret i8 0
892;
893entry:
894  %ext.idx = extractelement <8 x i8> %in, i32 0
895  %ext.idx.i32 = zext i8 %ext.idx to i32
896  %cmp = icmp ult i8 %ext.idx, 99
897  br i1 %cmp, label %then, label %exit
898
899then:
900  %load = load <16 x i8>, ptr %src, align 16
901  %and = and i32 %ext.idx.i32, 15
902  %ext = extractelement <16 x i8> %load, i32 %and
903  br label %exit
904
905exit:
906  %p = phi i8 [ 0, %entry ], [ %ext, %then ]
907  ret i8 0
908}
909
910declare void @use(...)
911
912; Make sure we don't assert.
913define void @pr69820(ptr %p, i32 %arg) {
914; CHECK-LABEL: @pr69820(
915; CHECK-NEXT:    [[V:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16
916; CHECK-NEXT:    [[AND:%.*]] = and i32 [[ARG:%.*]], 3
917; CHECK-NEXT:    [[EXT:%.*]] = extractelement <4 x float> [[V]], i32 [[AND]]
918; CHECK-NEXT:    call void @use(<4 x float> [[V]], float [[EXT]])
919; CHECK-NEXT:    ret void
920;
921  %v = load <4 x float>, ptr %p, align 16
922  %and = and i32 %arg, 3
923  %ext = extractelement <4 x float> %v, i32 %and
924  call void @use(<4 x float> %v, float %ext)
925  ret void
926
927; uselistorder directives
928  uselistorder <4 x float> %v, { 1, 0 }
929}
930