xref: /llvm-project/llvm/test/CodeGen/ARM/vselect_imax.ll (revision e0ed0333f0fed2e73f805afd58b61176a87aa3ad)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; RUN: opt < %s -passes='print<cost-model>' -mtriple=arm-apple-ios6.0.0 -mcpu=cortex-a8 2>&1 -disable-output | FileCheck %s --check-prefix=COST
3; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
4; Make sure that ARM backend with NEON handles vselect.
5
6define void @vmax_v4i32(ptr %m, <4 x i32> %a, <4 x i32> %b) {
7; CHECK-LABEL: vmax_v4i32:
8; CHECK:       @ %bb.0:
9; CHECK-NEXT:    add r1, sp, #8
10; CHECK-NEXT:    vldr d17, [sp]
11; CHECK-NEXT:    vld1.64 {d18, d19}, [r1]
12; CHECK-NEXT:    vmov d16, r2, r3
13; CHECK-NEXT:    vmax.s32 q8, q8, q9
14; CHECK-NEXT:    vst1.64 {d16, d17}, [r0]
15; CHECK-NEXT:    mov pc, lr
16    %cmpres = icmp sgt <4 x i32> %a, %b
17    %maxres = select <4 x i1> %cmpres, <4 x i32> %a,  <4 x i32> %b
18    store <4 x i32> %maxres, ptr %m
19    ret void
20}
21
22%T0_10 = type <16 x i16>
23%T1_10 = type <16 x i1>
24define void @func_blend10(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storeaddr) {
25; CHECK-LABEL: func_blend10:
26; CHECK:       @ %bb.0:
27; CHECK-NEXT:    vld1.16 {d16, d17}, [r1:128]!
28; CHECK-NEXT:    vld1.16 {d18, d19}, [r0:128]!
29; CHECK-NEXT:    vmin.s16 q8, q9, q8
30; CHECK-NEXT:    vld1.64 {d20, d21}, [r1:128]
31; CHECK-NEXT:    vld1.64 {d18, d19}, [r0:128]
32; CHECK-NEXT:    vmin.s16 q9, q9, q10
33; CHECK-NEXT:    vst1.16 {d16, d17}, [r3:128]!
34; CHECK-NEXT:    vst1.64 {d18, d19}, [r3:128]
35; CHECK-NEXT:    mov pc, lr
36; COST: func_blend10
37; COST: cost of 0 {{.*}} icmp
38; COST: cost of 4 {{.*}} select
39
40  %v0 = load %T0_10, ptr %loadaddr
41  %v1 = load %T0_10, ptr %loadaddr2
42  %c = icmp slt %T0_10 %v0, %v1
43  %r = select %T1_10 %c, %T0_10 %v0, %T0_10 %v1
44  store %T0_10 %r, ptr %storeaddr
45  ret void
46}
47
48%T0_14 = type <8 x i32>
49%T1_14 = type <8 x i1>
50define void @func_blend14(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storeaddr) {
51; CHECK-LABEL: func_blend14:
52; CHECK:       @ %bb.0:
53; CHECK-NEXT:    vld1.32 {d16, d17}, [r1:128]!
54; CHECK-NEXT:    vld1.32 {d18, d19}, [r0:128]!
55; CHECK-NEXT:    vmin.s32 q8, q9, q8
56; CHECK-NEXT:    vld1.64 {d20, d21}, [r1:128]
57; CHECK-NEXT:    vld1.64 {d18, d19}, [r0:128]
58; CHECK-NEXT:    vmin.s32 q9, q9, q10
59; CHECK-NEXT:    vst1.32 {d16, d17}, [r3:128]!
60; CHECK-NEXT:    vst1.64 {d18, d19}, [r3:128]
61; CHECK-NEXT:    mov pc, lr
62; COST: func_blend14
63; COST: cost of 0 {{.*}} icmp
64; COST: cost of 4 {{.*}} select
65  %v0 = load %T0_14, ptr %loadaddr
66  %v1 = load %T0_14, ptr %loadaddr2
67  %c = icmp slt %T0_14 %v0, %v1
68  %r = select %T1_14 %c, %T0_14 %v0, %T0_14 %v1
69  store %T0_14 %r, ptr %storeaddr
70  ret void
71}
72
73%T0_15 = type <16 x i32>
74%T1_15 = type <16 x i1>
75define void @func_blend15(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storeaddr) {
76; CHECK-LABEL: func_blend15:
77; CHECK:       @ %bb.0:
78; CHECK-NEXT:    vld1.32 {d16, d17}, [r1:128]!
79; CHECK-NEXT:    vld1.32 {d18, d19}, [r0:128]!
80; CHECK-NEXT:    vmin.s32 q8, q9, q8
81; CHECK-NEXT:    vld1.32 {d20, d21}, [r1:128]!
82; CHECK-NEXT:    vld1.32 {d22, d23}, [r0:128]!
83; CHECK-NEXT:    vmin.s32 q10, q11, q10
84; CHECK-NEXT:    vld1.32 {d24, d25}, [r1:128]!
85; CHECK-NEXT:    vld1.32 {d26, d27}, [r0:128]!
86; CHECK-NEXT:    vmin.s32 q12, q13, q12
87; CHECK-NEXT:    vld1.64 {d18, d19}, [r1:128]
88; CHECK-NEXT:    vld1.64 {d22, d23}, [r0:128]
89; CHECK-NEXT:    vmin.s32 q9, q11, q9
90; CHECK-NEXT:    vst1.32 {d16, d17}, [r3:128]!
91; CHECK-NEXT:    vst1.32 {d20, d21}, [r3:128]!
92; CHECK-NEXT:    vst1.32 {d24, d25}, [r3:128]!
93; CHECK-NEXT:    vst1.64 {d18, d19}, [r3:128]
94; CHECK-NEXT:    mov pc, lr
95; COST: func_blend15
96; COST: cost of 0 {{.*}} icmp
97; COST: cost of 8 {{.*}} select
98
99  %v0 = load %T0_15, ptr %loadaddr
100  %v1 = load %T0_15, ptr %loadaddr2
101  %c = icmp slt %T0_15 %v0, %v1
102  %r = select %T1_15 %c, %T0_15 %v0, %T0_15 %v1
103  store %T0_15 %r, ptr %storeaddr
104  ret void
105}
106
107; We adjusted the cost model of the following selects. When we improve code
108; lowering we also need to adjust the cost.
109%T0_18 = type <4 x i64>
110%T1_18 = type <4 x i1>
111define void @func_blend18(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storeaddr) {
112; CHECK-LABEL: func_blend18:
113; CHECK:       @ %bb.0:
114; CHECK-NEXT:    .save {r4, r5, r6, lr}
115; CHECK-NEXT:    push {r4, r5, r6, lr}
116; CHECK-NEXT:    vld1.64 {d16, d17}, [r1:128]!
117; CHECK-NEXT:    vld1.64 {d22, d23}, [r0:128]!
118; CHECK-NEXT:    vmov r4, r6, d16
119; CHECK-NEXT:    vld1.64 {d18, d19}, [r1:128]
120; CHECK-NEXT:    vld1.64 {d20, d21}, [r0:128]
121; CHECK-NEXT:    vmov lr, r12, d18
122; CHECK-NEXT:    mov r0, #0
123; CHECK-NEXT:    vmov r2, r1, d20
124; CHECK-NEXT:    subs r2, r2, lr
125; CHECK-NEXT:    vmov r2, r5, d22
126; CHECK-NEXT:    sbcs r1, r1, r12
127; CHECK-NEXT:    mov r1, #0
128; CHECK-NEXT:    movlt r1, #1
129; CHECK-NEXT:    cmp r1, #0
130; CHECK-NEXT:    mvnne r1, #0
131; CHECK-NEXT:    subs r2, r2, r4
132; CHECK-NEXT:    sbcs r6, r5, r6
133; CHECK-NEXT:    vmov r2, r12, d17
134; CHECK-NEXT:    vmov r5, r4, d23
135; CHECK-NEXT:    mov r6, #0
136; CHECK-NEXT:    movlt r6, #1
137; CHECK-NEXT:    cmp r6, #0
138; CHECK-NEXT:    mvnne r6, #0
139; CHECK-NEXT:    subs r2, r5, r2
140; CHECK-NEXT:    sbcs r2, r4, r12
141; CHECK-NEXT:    vmov lr, r12, d19
142; CHECK-NEXT:    vmov r4, r5, d21
143; CHECK-NEXT:    mov r2, #0
144; CHECK-NEXT:    movlt r2, #1
145; CHECK-NEXT:    cmp r2, #0
146; CHECK-NEXT:    mvnne r2, #0
147; CHECK-NEXT:    vdup.32 d25, r2
148; CHECK-NEXT:    vdup.32 d24, r6
149; CHECK-NEXT:    vbit q8, q11, q12
150; CHECK-NEXT:    subs r4, r4, lr
151; CHECK-NEXT:    sbcs r5, r5, r12
152; CHECK-NEXT:    movlt r0, #1
153; CHECK-NEXT:    cmp r0, #0
154; CHECK-NEXT:    mvnne r0, #0
155; CHECK-NEXT:    vdup.32 d27, r0
156; CHECK-NEXT:    vdup.32 d26, r1
157; CHECK-NEXT:    vbit q9, q10, q13
158; CHECK-NEXT:    vst1.64 {d16, d17}, [r3:128]!
159; CHECK-NEXT:    vst1.64 {d18, d19}, [r3:128]
160; CHECK-NEXT:    pop {r4, r5, r6, lr}
161; CHECK-NEXT:    mov pc, lr
162; COST: func_blend18
163; COST: cost of 0 {{.*}} icmp
164; COST: cost of 21 {{.*}} select
165  %v0 = load %T0_18, ptr %loadaddr
166  %v1 = load %T0_18, ptr %loadaddr2
167  %c = icmp slt %T0_18 %v0, %v1
168  %r = select %T1_18 %c, %T0_18 %v0, %T0_18 %v1
169  store %T0_18 %r, ptr %storeaddr
170  ret void
171}
172
173%T0_19 = type <8 x i64>
174%T1_19 = type <8 x i1>
175define void @func_blend19(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storeaddr) {
176; CHECK-LABEL: func_blend19:
177; CHECK:       @ %bb.0:
178; CHECK-NEXT:    .save {r4, r5, r6, lr}
179; CHECK-NEXT:    push {r4, r5, r6, lr}
180; CHECK-NEXT:    vld1.64 {d28, d29}, [r1:128]!
181; CHECK-NEXT:    mov lr, #0
182; CHECK-NEXT:    vld1.64 {d30, d31}, [r0:128]!
183; CHECK-NEXT:    vld1.64 {d20, d21}, [r1:128]!
184; CHECK-NEXT:    vld1.64 {d24, d25}, [r0:128]!
185; CHECK-NEXT:    vld1.64 {d22, d23}, [r1:128]!
186; CHECK-NEXT:    vld1.64 {d26, d27}, [r0:128]!
187; CHECK-NEXT:    vld1.64 {d16, d17}, [r1:128]
188; CHECK-NEXT:    vld1.64 {d18, d19}, [r0:128]
189; CHECK-NEXT:    vmov r0, r12, d16
190; CHECK-NEXT:    vmov r1, r2, d18
191; CHECK-NEXT:    subs r0, r1, r0
192; CHECK-NEXT:    vmov r1, r4, d25
193; CHECK-NEXT:    sbcs r0, r2, r12
194; CHECK-NEXT:    mov r12, #0
195; CHECK-NEXT:    vmov r2, r0, d21
196; CHECK-NEXT:    movlt r12, #1
197; CHECK-NEXT:    cmp r12, #0
198; CHECK-NEXT:    mvnne r12, #0
199; CHECK-NEXT:    subs r1, r1, r2
200; CHECK-NEXT:    sbcs r0, r4, r0
201; CHECK-NEXT:    vmov r2, r4, d24
202; CHECK-NEXT:    mov r0, #0
203; CHECK-NEXT:    movlt r0, #1
204; CHECK-NEXT:    cmp r0, #0
205; CHECK-NEXT:    mvnne r0, #0
206; CHECK-NEXT:    vdup.32 d1, r0
207; CHECK-NEXT:    vmov r0, r1, d20
208; CHECK-NEXT:    subs r0, r2, r0
209; CHECK-NEXT:    sbcs r0, r4, r1
210; CHECK-NEXT:    vmov r2, r4, d26
211; CHECK-NEXT:    mov r0, #0
212; CHECK-NEXT:    movlt r0, #1
213; CHECK-NEXT:    cmp r0, #0
214; CHECK-NEXT:    mvnne r0, #0
215; CHECK-NEXT:    vdup.32 d0, r0
216; CHECK-NEXT:    vmov r0, r1, d22
217; CHECK-NEXT:    subs r0, r2, r0
218; CHECK-NEXT:    mov r2, #0
219; CHECK-NEXT:    sbcs r0, r4, r1
220; CHECK-NEXT:    vmov r4, r5, d31
221; CHECK-NEXT:    vmov r0, r1, d29
222; CHECK-NEXT:    movlt r2, #1
223; CHECK-NEXT:    cmp r2, #0
224; CHECK-NEXT:    mvnne r2, #0
225; CHECK-NEXT:    subs r0, r4, r0
226; CHECK-NEXT:    sbcs r0, r5, r1
227; CHECK-NEXT:    vmov r4, r5, d30
228; CHECK-NEXT:    mov r0, #0
229; CHECK-NEXT:    movlt r0, #1
230; CHECK-NEXT:    cmp r0, #0
231; CHECK-NEXT:    mvnne r0, #0
232; CHECK-NEXT:    vdup.32 d3, r0
233; CHECK-NEXT:    vmov r0, r1, d28
234; CHECK-NEXT:    subs r0, r4, r0
235; CHECK-NEXT:    sbcs r0, r5, r1
236; CHECK-NEXT:    vmov r4, r5, d27
237; CHECK-NEXT:    mov r0, #0
238; CHECK-NEXT:    movlt r0, #1
239; CHECK-NEXT:    cmp r0, #0
240; CHECK-NEXT:    mvnne r0, #0
241; CHECK-NEXT:    vdup.32 d2, r0
242; CHECK-NEXT:    vmov r0, r1, d23
243; CHECK-NEXT:    vbit q14, q15, q1
244; CHECK-NEXT:    vbit q10, q12, q0
245; CHECK-NEXT:    subs r0, r4, r0
246; CHECK-NEXT:    sbcs r0, r5, r1
247; CHECK-NEXT:    vmov r1, r4, d17
248; CHECK-NEXT:    vmov r5, r6, d19
249; CHECK-NEXT:    mov r0, #0
250; CHECK-NEXT:    movlt r0, #1
251; CHECK-NEXT:    cmp r0, #0
252; CHECK-NEXT:    mvnne r0, #0
253; CHECK-NEXT:    vdup.32 d31, r0
254; CHECK-NEXT:    vdup.32 d30, r2
255; CHECK-NEXT:    vbit q11, q13, q15
256; CHECK-NEXT:    vst1.64 {d28, d29}, [r3:128]!
257; CHECK-NEXT:    subs r1, r5, r1
258; CHECK-NEXT:    sbcs r1, r6, r4
259; CHECK-NEXT:    movlt lr, #1
260; CHECK-NEXT:    cmp lr, #0
261; CHECK-NEXT:    mvnne lr, #0
262; CHECK-NEXT:    vdup.32 d3, lr
263; CHECK-NEXT:    vdup.32 d2, r12
264; CHECK-NEXT:    vbit q8, q9, q1
265; CHECK-NEXT:    vst1.64 {d20, d21}, [r3:128]!
266; CHECK-NEXT:    vst1.64 {d22, d23}, [r3:128]!
267; CHECK-NEXT:    vst1.64 {d16, d17}, [r3:128]
268; CHECK-NEXT:    pop {r4, r5, r6, lr}
269; CHECK-NEXT:    mov pc, lr
270; COST: func_blend19
271; COST: cost of 0 {{.*}} icmp
272; COST: cost of 54 {{.*}} select
273  %v0 = load %T0_19, ptr %loadaddr
274  %v1 = load %T0_19, ptr %loadaddr2
275  %c = icmp slt %T0_19 %v0, %v1
276  %r = select %T1_19 %c, %T0_19 %v0, %T0_19 %v1
277  store %T0_19 %r, ptr %storeaddr
278  ret void
279}
280
281%T0_20 = type <16 x i64>
282%T1_20 = type <16 x i1>
283define void @func_blend20(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storeaddr) {
284; CHECK-LABEL: func_blend20:
285; CHECK:       @ %bb.0:
286; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, lr}
287; CHECK-NEXT:    push {r4, r5, r6, r7, r8, lr}
288; CHECK-NEXT:    .vsave {d8, d9}
289; CHECK-NEXT:    vpush {d8, d9}
290; CHECK-NEXT:    add r8, r1, #64
291; CHECK-NEXT:    add lr, r0, #64
292; CHECK-NEXT:    vld1.64 {d16, d17}, [r1:128]!
293; CHECK-NEXT:    mov r12, #0
294; CHECK-NEXT:    vld1.64 {d24, d25}, [r0:128]!
295; CHECK-NEXT:    vmov r4, r5, d17
296; CHECK-NEXT:    vmov r6, r7, d25
297; CHECK-NEXT:    vld1.64 {d18, d19}, [lr:128]!
298; CHECK-NEXT:    vld1.64 {d20, d21}, [r8:128]!
299; CHECK-NEXT:    vld1.64 {d22, d23}, [r8:128]!
300; CHECK-NEXT:    vld1.64 {d0, d1}, [lr:128]!
301; CHECK-NEXT:    subs r4, r6, r4
302; CHECK-NEXT:    sbcs r4, r7, r5
303; CHECK-NEXT:    vmov r5, r6, d16
304; CHECK-NEXT:    vmov r7, r2, d24
305; CHECK-NEXT:    mov r4, #0
306; CHECK-NEXT:    movlt r4, #1
307; CHECK-NEXT:    cmp r4, #0
308; CHECK-NEXT:    mvnne r4, #0
309; CHECK-NEXT:    vdup.32 d27, r4
310; CHECK-NEXT:    subs r5, r7, r5
311; CHECK-NEXT:    sbcs r2, r2, r6
312; CHECK-NEXT:    vmov r5, r6, d1
313; CHECK-NEXT:    mov r2, #0
314; CHECK-NEXT:    movlt r2, #1
315; CHECK-NEXT:    cmp r2, #0
316; CHECK-NEXT:    mvnne r2, #0
317; CHECK-NEXT:    vdup.32 d26, r2
318; CHECK-NEXT:    vmov r2, r4, d23
319; CHECK-NEXT:    vbit q8, q12, q13
320; CHECK-NEXT:    vld1.64 {d24, d25}, [r0:128]!
321; CHECK-NEXT:    vld1.64 {d26, d27}, [r1:128]!
322; CHECK-NEXT:    vld1.64 {d28, d29}, [lr:128]!
323; CHECK-NEXT:    subs r2, r5, r2
324; CHECK-NEXT:    sbcs r2, r6, r4
325; CHECK-NEXT:    vmov r4, r5, d22
326; CHECK-NEXT:    vmov r6, r7, d0
327; CHECK-NEXT:    mov r2, #0
328; CHECK-NEXT:    movlt r2, #1
329; CHECK-NEXT:    cmp r2, #0
330; CHECK-NEXT:    mvnne r2, #0
331; CHECK-NEXT:    vdup.32 d3, r2
332; CHECK-NEXT:    subs r4, r6, r4
333; CHECK-NEXT:    sbcs r4, r7, r5
334; CHECK-NEXT:    vmov r2, r5, d27
335; CHECK-NEXT:    vmov r6, r7, d25
336; CHECK-NEXT:    mov r4, #0
337; CHECK-NEXT:    movlt r4, #1
338; CHECK-NEXT:    cmp r4, #0
339; CHECK-NEXT:    mvnne r4, #0
340; CHECK-NEXT:    vdup.32 d2, r4
341; CHECK-NEXT:    subs r2, r6, r2
342; CHECK-NEXT:    sbcs r2, r7, r5
343; CHECK-NEXT:    vmov r6, r7, d24
344; CHECK-NEXT:    mov r2, #0
345; CHECK-NEXT:    movlt r2, #1
346; CHECK-NEXT:    cmp r2, #0
347; CHECK-NEXT:    mvnne r2, #0
348; CHECK-NEXT:    vdup.32 d5, r2
349; CHECK-NEXT:    vmov r2, r5, d26
350; CHECK-NEXT:    subs r2, r6, r2
351; CHECK-NEXT:    sbcs r2, r7, r5
352; CHECK-NEXT:    vmov r6, r7, d19
353; CHECK-NEXT:    mov r2, #0
354; CHECK-NEXT:    movlt r2, #1
355; CHECK-NEXT:    cmp r2, #0
356; CHECK-NEXT:    mvnne r2, #0
357; CHECK-NEXT:    vdup.32 d4, r2
358; CHECK-NEXT:    vmov r2, r5, d21
359; CHECK-NEXT:    subs r2, r6, r2
360; CHECK-NEXT:    sbcs r2, r7, r5
361; CHECK-NEXT:    vmov r6, r7, d18
362; CHECK-NEXT:    mov r2, #0
363; CHECK-NEXT:    movlt r2, #1
364; CHECK-NEXT:    cmp r2, #0
365; CHECK-NEXT:    mvnne r2, #0
366; CHECK-NEXT:    vdup.32 d31, r2
367; CHECK-NEXT:    vmov r2, r5, d20
368; CHECK-NEXT:    subs r2, r6, r2
369; CHECK-NEXT:    sbcs r2, r7, r5
370; CHECK-NEXT:    mov r2, #0
371; CHECK-NEXT:    movlt r2, #1
372; CHECK-NEXT:    cmp r2, #0
373; CHECK-NEXT:    mvnne r2, #0
374; CHECK-NEXT:    vdup.32 d30, r2
375; CHECK-NEXT:    vbif q9, q10, q15
376; CHECK-NEXT:    vld1.64 {d30, d31}, [r8:128]!
377; CHECK-NEXT:    vld1.64 {d20, d21}, [r8:128]
378; CHECK-NEXT:    vbit q13, q12, q2
379; CHECK-NEXT:    vld1.64 {d24, d25}, [lr:128]
380; CHECK-NEXT:    vmov r2, r7, d21
381; CHECK-NEXT:    vbit q11, q0, q1
382; CHECK-NEXT:    mov lr, #0
383; CHECK-NEXT:    vmov r6, r5, d25
384; CHECK-NEXT:    vld1.64 {d4, d5}, [r1:128]!
385; CHECK-NEXT:    vld1.64 {d6, d7}, [r0:128]!
386; CHECK-NEXT:    vld1.64 {d0, d1}, [r1:128]
387; CHECK-NEXT:    vld1.64 {d2, d3}, [r0:128]
388; CHECK-NEXT:    subs r1, r6, r2
389; CHECK-NEXT:    vmov r0, r6, d2
390; CHECK-NEXT:    sbcs r1, r5, r7
391; CHECK-NEXT:    vmov r2, r7, d0
392; CHECK-NEXT:    movlt lr, #1
393; CHECK-NEXT:    cmp lr, #0
394; CHECK-NEXT:    mvnne lr, #0
395; CHECK-NEXT:    subs r0, r0, r2
396; CHECK-NEXT:    sbcs r0, r6, r7
397; CHECK-NEXT:    vmov r2, r7, d30
398; CHECK-NEXT:    vmov r6, r5, d28
399; CHECK-NEXT:    mov r0, #0
400; CHECK-NEXT:    movlt r0, #1
401; CHECK-NEXT:    cmp r0, #0
402; CHECK-NEXT:    mvnne r0, #0
403; CHECK-NEXT:    subs r2, r6, r2
404; CHECK-NEXT:    sbcs r2, r5, r7
405; CHECK-NEXT:    vmov r7, r6, d31
406; CHECK-NEXT:    vmov r5, r4, d29
407; CHECK-NEXT:    mov r2, #0
408; CHECK-NEXT:    movlt r2, #1
409; CHECK-NEXT:    cmp r2, #0
410; CHECK-NEXT:    mvnne r2, #0
411; CHECK-NEXT:    subs r7, r5, r7
412; CHECK-NEXT:    vmov r5, r1, d7
413; CHECK-NEXT:    sbcs r7, r4, r6
414; CHECK-NEXT:    mov r4, #0
415; CHECK-NEXT:    vmov r7, r6, d5
416; CHECK-NEXT:    movlt r4, #1
417; CHECK-NEXT:    cmp r4, #0
418; CHECK-NEXT:    mvnne r4, #0
419; CHECK-NEXT:    subs r5, r5, r7
420; CHECK-NEXT:    sbcs r1, r1, r6
421; CHECK-NEXT:    vmov r6, r7, d6
422; CHECK-NEXT:    mov r1, #0
423; CHECK-NEXT:    movlt r1, #1
424; CHECK-NEXT:    cmp r1, #0
425; CHECK-NEXT:    mvnne r1, #0
426; CHECK-NEXT:    vdup.32 d9, r1
427; CHECK-NEXT:    vmov r1, r5, d4
428; CHECK-NEXT:    subs r1, r6, r1
429; CHECK-NEXT:    sbcs r1, r7, r5
430; CHECK-NEXT:    vmov r6, r7, d3
431; CHECK-NEXT:    mov r1, #0
432; CHECK-NEXT:    movlt r1, #1
433; CHECK-NEXT:    cmp r1, #0
434; CHECK-NEXT:    mvnne r1, #0
435; CHECK-NEXT:    vdup.32 d8, r1
436; CHECK-NEXT:    vmov r1, r5, d1
437; CHECK-NEXT:    vbit q2, q3, q4
438; CHECK-NEXT:    vdup.32 d9, r4
439; CHECK-NEXT:    vdup.32 d8, r2
440; CHECK-NEXT:    subs r1, r6, r1
441; CHECK-NEXT:    sbcs r1, r7, r5
442; CHECK-NEXT:    vmov r5, r6, d24
443; CHECK-NEXT:    mov r1, #0
444; CHECK-NEXT:    movlt r1, #1
445; CHECK-NEXT:    cmp r1, #0
446; CHECK-NEXT:    mvnne r1, #0
447; CHECK-NEXT:    vdup.32 d7, r1
448; CHECK-NEXT:    vmov r1, r4, d20
449; CHECK-NEXT:    vdup.32 d6, r0
450; CHECK-NEXT:    subs r1, r5, r1
451; CHECK-NEXT:    mov r1, r3
452; CHECK-NEXT:    sbcs r0, r6, r4
453; CHECK-NEXT:    vst1.64 {d16, d17}, [r1:128]!
454; CHECK-NEXT:    vorr q8, q4, q4
455; CHECK-NEXT:    movlt r12, #1
456; CHECK-NEXT:    cmp r12, #0
457; CHECK-NEXT:    vbsl q8, q14, q15
458; CHECK-NEXT:    vdup.32 d29, lr
459; CHECK-NEXT:    vorr q15, q3, q3
460; CHECK-NEXT:    mvnne r12, #0
461; CHECK-NEXT:    vdup.32 d28, r12
462; CHECK-NEXT:    add r0, r3, #64
463; CHECK-NEXT:    vbsl q15, q1, q0
464; CHECK-NEXT:    vst1.64 {d26, d27}, [r1:128]!
465; CHECK-NEXT:    vbit q10, q12, q14
466; CHECK-NEXT:    vst1.64 {d18, d19}, [r0:128]!
467; CHECK-NEXT:    vst1.64 {d22, d23}, [r0:128]!
468; CHECK-NEXT:    vst1.64 {d4, d5}, [r1:128]!
469; CHECK-NEXT:    vst1.64 {d16, d17}, [r0:128]!
470; CHECK-NEXT:    vst1.64 {d30, d31}, [r1:128]
471; CHECK-NEXT:    vst1.64 {d20, d21}, [r0:128]
472; CHECK-NEXT:    vpop {d8, d9}
473; CHECK-NEXT:    pop {r4, r5, r6, r7, r8, lr}
474; CHECK-NEXT:    mov pc, lr
475; COST: func_blend20
476; COST: cost of 0 {{.*}} icmp
477; COST: cost of 108 {{.*}} select
478  %v0 = load %T0_20, ptr %loadaddr
479  %v1 = load %T0_20, ptr %loadaddr2
480  %c = icmp slt %T0_20 %v0, %v1
481  %r = select %T1_20 %c, %T0_20 %v0, %T0_20 %v1
482  store %T0_20 %r, ptr %storeaddr
483  ret void
484}
485