1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; RUN: opt < %s -passes='print<cost-model>' -mtriple=arm-apple-ios6.0.0 -mcpu=cortex-a8 2>&1 -disable-output | FileCheck %s --check-prefix=COST 3; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s 4; Make sure that ARM backend with NEON handles vselect. 5 6define void @vmax_v4i32(ptr %m, <4 x i32> %a, <4 x i32> %b) { 7; CHECK-LABEL: vmax_v4i32: 8; CHECK: @ %bb.0: 9; CHECK-NEXT: add r1, sp, #8 10; CHECK-NEXT: vldr d17, [sp] 11; CHECK-NEXT: vld1.64 {d18, d19}, [r1] 12; CHECK-NEXT: vmov d16, r2, r3 13; CHECK-NEXT: vmax.s32 q8, q8, q9 14; CHECK-NEXT: vst1.64 {d16, d17}, [r0] 15; CHECK-NEXT: mov pc, lr 16 %cmpres = icmp sgt <4 x i32> %a, %b 17 %maxres = select <4 x i1> %cmpres, <4 x i32> %a, <4 x i32> %b 18 store <4 x i32> %maxres, ptr %m 19 ret void 20} 21 22%T0_10 = type <16 x i16> 23%T1_10 = type <16 x i1> 24define void @func_blend10(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storeaddr) { 25; CHECK-LABEL: func_blend10: 26; CHECK: @ %bb.0: 27; CHECK-NEXT: vld1.16 {d16, d17}, [r1:128]! 28; CHECK-NEXT: vld1.16 {d18, d19}, [r0:128]! 29; CHECK-NEXT: vmin.s16 q8, q9, q8 30; CHECK-NEXT: vld1.64 {d20, d21}, [r1:128] 31; CHECK-NEXT: vld1.64 {d18, d19}, [r0:128] 32; CHECK-NEXT: vmin.s16 q9, q9, q10 33; CHECK-NEXT: vst1.16 {d16, d17}, [r3:128]! 34; CHECK-NEXT: vst1.64 {d18, d19}, [r3:128] 35; CHECK-NEXT: mov pc, lr 36; COST: func_blend10 37; COST: cost of 0 {{.*}} icmp 38; COST: cost of 4 {{.*}} select 39 40 %v0 = load %T0_10, ptr %loadaddr 41 %v1 = load %T0_10, ptr %loadaddr2 42 %c = icmp slt %T0_10 %v0, %v1 43 %r = select %T1_10 %c, %T0_10 %v0, %T0_10 %v1 44 store %T0_10 %r, ptr %storeaddr 45 ret void 46} 47 48%T0_14 = type <8 x i32> 49%T1_14 = type <8 x i1> 50define void @func_blend14(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storeaddr) { 51; CHECK-LABEL: func_blend14: 52; CHECK: @ %bb.0: 53; CHECK-NEXT: vld1.32 {d16, d17}, [r1:128]! 54; CHECK-NEXT: vld1.32 {d18, d19}, [r0:128]! 55; CHECK-NEXT: vmin.s32 q8, q9, q8 56; CHECK-NEXT: vld1.64 {d20, d21}, [r1:128] 57; CHECK-NEXT: vld1.64 {d18, d19}, [r0:128] 58; CHECK-NEXT: vmin.s32 q9, q9, q10 59; CHECK-NEXT: vst1.32 {d16, d17}, [r3:128]! 60; CHECK-NEXT: vst1.64 {d18, d19}, [r3:128] 61; CHECK-NEXT: mov pc, lr 62; COST: func_blend14 63; COST: cost of 0 {{.*}} icmp 64; COST: cost of 4 {{.*}} select 65 %v0 = load %T0_14, ptr %loadaddr 66 %v1 = load %T0_14, ptr %loadaddr2 67 %c = icmp slt %T0_14 %v0, %v1 68 %r = select %T1_14 %c, %T0_14 %v0, %T0_14 %v1 69 store %T0_14 %r, ptr %storeaddr 70 ret void 71} 72 73%T0_15 = type <16 x i32> 74%T1_15 = type <16 x i1> 75define void @func_blend15(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storeaddr) { 76; CHECK-LABEL: func_blend15: 77; CHECK: @ %bb.0: 78; CHECK-NEXT: vld1.32 {d16, d17}, [r1:128]! 79; CHECK-NEXT: vld1.32 {d18, d19}, [r0:128]! 80; CHECK-NEXT: vmin.s32 q8, q9, q8 81; CHECK-NEXT: vld1.32 {d20, d21}, [r1:128]! 82; CHECK-NEXT: vld1.32 {d22, d23}, [r0:128]! 83; CHECK-NEXT: vmin.s32 q10, q11, q10 84; CHECK-NEXT: vld1.32 {d24, d25}, [r1:128]! 85; CHECK-NEXT: vld1.32 {d26, d27}, [r0:128]! 86; CHECK-NEXT: vmin.s32 q12, q13, q12 87; CHECK-NEXT: vld1.64 {d18, d19}, [r1:128] 88; CHECK-NEXT: vld1.64 {d22, d23}, [r0:128] 89; CHECK-NEXT: vmin.s32 q9, q11, q9 90; CHECK-NEXT: vst1.32 {d16, d17}, [r3:128]! 91; CHECK-NEXT: vst1.32 {d20, d21}, [r3:128]! 92; CHECK-NEXT: vst1.32 {d24, d25}, [r3:128]! 93; CHECK-NEXT: vst1.64 {d18, d19}, [r3:128] 94; CHECK-NEXT: mov pc, lr 95; COST: func_blend15 96; COST: cost of 0 {{.*}} icmp 97; COST: cost of 8 {{.*}} select 98 99 %v0 = load %T0_15, ptr %loadaddr 100 %v1 = load %T0_15, ptr %loadaddr2 101 %c = icmp slt %T0_15 %v0, %v1 102 %r = select %T1_15 %c, %T0_15 %v0, %T0_15 %v1 103 store %T0_15 %r, ptr %storeaddr 104 ret void 105} 106 107; We adjusted the cost model of the following selects. When we improve code 108; lowering we also need to adjust the cost. 109%T0_18 = type <4 x i64> 110%T1_18 = type <4 x i1> 111define void @func_blend18(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storeaddr) { 112; CHECK-LABEL: func_blend18: 113; CHECK: @ %bb.0: 114; CHECK-NEXT: .save {r4, r5, r6, lr} 115; CHECK-NEXT: push {r4, r5, r6, lr} 116; CHECK-NEXT: vld1.64 {d16, d17}, [r1:128]! 117; CHECK-NEXT: vld1.64 {d22, d23}, [r0:128]! 118; CHECK-NEXT: vmov r4, r6, d16 119; CHECK-NEXT: vld1.64 {d18, d19}, [r1:128] 120; CHECK-NEXT: vld1.64 {d20, d21}, [r0:128] 121; CHECK-NEXT: vmov lr, r12, d18 122; CHECK-NEXT: mov r0, #0 123; CHECK-NEXT: vmov r2, r1, d20 124; CHECK-NEXT: subs r2, r2, lr 125; CHECK-NEXT: vmov r2, r5, d22 126; CHECK-NEXT: sbcs r1, r1, r12 127; CHECK-NEXT: mov r1, #0 128; CHECK-NEXT: movlt r1, #1 129; CHECK-NEXT: cmp r1, #0 130; CHECK-NEXT: mvnne r1, #0 131; CHECK-NEXT: subs r2, r2, r4 132; CHECK-NEXT: sbcs r6, r5, r6 133; CHECK-NEXT: vmov r2, r12, d17 134; CHECK-NEXT: vmov r5, r4, d23 135; CHECK-NEXT: mov r6, #0 136; CHECK-NEXT: movlt r6, #1 137; CHECK-NEXT: cmp r6, #0 138; CHECK-NEXT: mvnne r6, #0 139; CHECK-NEXT: subs r2, r5, r2 140; CHECK-NEXT: sbcs r2, r4, r12 141; CHECK-NEXT: vmov lr, r12, d19 142; CHECK-NEXT: vmov r4, r5, d21 143; CHECK-NEXT: mov r2, #0 144; CHECK-NEXT: movlt r2, #1 145; CHECK-NEXT: cmp r2, #0 146; CHECK-NEXT: mvnne r2, #0 147; CHECK-NEXT: vdup.32 d25, r2 148; CHECK-NEXT: vdup.32 d24, r6 149; CHECK-NEXT: vbit q8, q11, q12 150; CHECK-NEXT: subs r4, r4, lr 151; CHECK-NEXT: sbcs r5, r5, r12 152; CHECK-NEXT: movlt r0, #1 153; CHECK-NEXT: cmp r0, #0 154; CHECK-NEXT: mvnne r0, #0 155; CHECK-NEXT: vdup.32 d27, r0 156; CHECK-NEXT: vdup.32 d26, r1 157; CHECK-NEXT: vbit q9, q10, q13 158; CHECK-NEXT: vst1.64 {d16, d17}, [r3:128]! 159; CHECK-NEXT: vst1.64 {d18, d19}, [r3:128] 160; CHECK-NEXT: pop {r4, r5, r6, lr} 161; CHECK-NEXT: mov pc, lr 162; COST: func_blend18 163; COST: cost of 0 {{.*}} icmp 164; COST: cost of 21 {{.*}} select 165 %v0 = load %T0_18, ptr %loadaddr 166 %v1 = load %T0_18, ptr %loadaddr2 167 %c = icmp slt %T0_18 %v0, %v1 168 %r = select %T1_18 %c, %T0_18 %v0, %T0_18 %v1 169 store %T0_18 %r, ptr %storeaddr 170 ret void 171} 172 173%T0_19 = type <8 x i64> 174%T1_19 = type <8 x i1> 175define void @func_blend19(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storeaddr) { 176; CHECK-LABEL: func_blend19: 177; CHECK: @ %bb.0: 178; CHECK-NEXT: .save {r4, r5, r6, lr} 179; CHECK-NEXT: push {r4, r5, r6, lr} 180; CHECK-NEXT: vld1.64 {d28, d29}, [r1:128]! 181; CHECK-NEXT: mov lr, #0 182; CHECK-NEXT: vld1.64 {d30, d31}, [r0:128]! 183; CHECK-NEXT: vld1.64 {d20, d21}, [r1:128]! 184; CHECK-NEXT: vld1.64 {d24, d25}, [r0:128]! 185; CHECK-NEXT: vld1.64 {d22, d23}, [r1:128]! 186; CHECK-NEXT: vld1.64 {d26, d27}, [r0:128]! 187; CHECK-NEXT: vld1.64 {d16, d17}, [r1:128] 188; CHECK-NEXT: vld1.64 {d18, d19}, [r0:128] 189; CHECK-NEXT: vmov r0, r12, d16 190; CHECK-NEXT: vmov r1, r2, d18 191; CHECK-NEXT: subs r0, r1, r0 192; CHECK-NEXT: vmov r1, r4, d25 193; CHECK-NEXT: sbcs r0, r2, r12 194; CHECK-NEXT: mov r12, #0 195; CHECK-NEXT: vmov r2, r0, d21 196; CHECK-NEXT: movlt r12, #1 197; CHECK-NEXT: cmp r12, #0 198; CHECK-NEXT: mvnne r12, #0 199; CHECK-NEXT: subs r1, r1, r2 200; CHECK-NEXT: sbcs r0, r4, r0 201; CHECK-NEXT: vmov r2, r4, d24 202; CHECK-NEXT: mov r0, #0 203; CHECK-NEXT: movlt r0, #1 204; CHECK-NEXT: cmp r0, #0 205; CHECK-NEXT: mvnne r0, #0 206; CHECK-NEXT: vdup.32 d1, r0 207; CHECK-NEXT: vmov r0, r1, d20 208; CHECK-NEXT: subs r0, r2, r0 209; CHECK-NEXT: sbcs r0, r4, r1 210; CHECK-NEXT: vmov r2, r4, d26 211; CHECK-NEXT: mov r0, #0 212; CHECK-NEXT: movlt r0, #1 213; CHECK-NEXT: cmp r0, #0 214; CHECK-NEXT: mvnne r0, #0 215; CHECK-NEXT: vdup.32 d0, r0 216; CHECK-NEXT: vmov r0, r1, d22 217; CHECK-NEXT: subs r0, r2, r0 218; CHECK-NEXT: mov r2, #0 219; CHECK-NEXT: sbcs r0, r4, r1 220; CHECK-NEXT: vmov r4, r5, d31 221; CHECK-NEXT: vmov r0, r1, d29 222; CHECK-NEXT: movlt r2, #1 223; CHECK-NEXT: cmp r2, #0 224; CHECK-NEXT: mvnne r2, #0 225; CHECK-NEXT: subs r0, r4, r0 226; CHECK-NEXT: sbcs r0, r5, r1 227; CHECK-NEXT: vmov r4, r5, d30 228; CHECK-NEXT: mov r0, #0 229; CHECK-NEXT: movlt r0, #1 230; CHECK-NEXT: cmp r0, #0 231; CHECK-NEXT: mvnne r0, #0 232; CHECK-NEXT: vdup.32 d3, r0 233; CHECK-NEXT: vmov r0, r1, d28 234; CHECK-NEXT: subs r0, r4, r0 235; CHECK-NEXT: sbcs r0, r5, r1 236; CHECK-NEXT: vmov r4, r5, d27 237; CHECK-NEXT: mov r0, #0 238; CHECK-NEXT: movlt r0, #1 239; CHECK-NEXT: cmp r0, #0 240; CHECK-NEXT: mvnne r0, #0 241; CHECK-NEXT: vdup.32 d2, r0 242; CHECK-NEXT: vmov r0, r1, d23 243; CHECK-NEXT: vbit q14, q15, q1 244; CHECK-NEXT: vbit q10, q12, q0 245; CHECK-NEXT: subs r0, r4, r0 246; CHECK-NEXT: sbcs r0, r5, r1 247; CHECK-NEXT: vmov r1, r4, d17 248; CHECK-NEXT: vmov r5, r6, d19 249; CHECK-NEXT: mov r0, #0 250; CHECK-NEXT: movlt r0, #1 251; CHECK-NEXT: cmp r0, #0 252; CHECK-NEXT: mvnne r0, #0 253; CHECK-NEXT: vdup.32 d31, r0 254; CHECK-NEXT: vdup.32 d30, r2 255; CHECK-NEXT: vbit q11, q13, q15 256; CHECK-NEXT: vst1.64 {d28, d29}, [r3:128]! 257; CHECK-NEXT: subs r1, r5, r1 258; CHECK-NEXT: sbcs r1, r6, r4 259; CHECK-NEXT: movlt lr, #1 260; CHECK-NEXT: cmp lr, #0 261; CHECK-NEXT: mvnne lr, #0 262; CHECK-NEXT: vdup.32 d3, lr 263; CHECK-NEXT: vdup.32 d2, r12 264; CHECK-NEXT: vbit q8, q9, q1 265; CHECK-NEXT: vst1.64 {d20, d21}, [r3:128]! 266; CHECK-NEXT: vst1.64 {d22, d23}, [r3:128]! 267; CHECK-NEXT: vst1.64 {d16, d17}, [r3:128] 268; CHECK-NEXT: pop {r4, r5, r6, lr} 269; CHECK-NEXT: mov pc, lr 270; COST: func_blend19 271; COST: cost of 0 {{.*}} icmp 272; COST: cost of 54 {{.*}} select 273 %v0 = load %T0_19, ptr %loadaddr 274 %v1 = load %T0_19, ptr %loadaddr2 275 %c = icmp slt %T0_19 %v0, %v1 276 %r = select %T1_19 %c, %T0_19 %v0, %T0_19 %v1 277 store %T0_19 %r, ptr %storeaddr 278 ret void 279} 280 281%T0_20 = type <16 x i64> 282%T1_20 = type <16 x i1> 283define void @func_blend20(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storeaddr) { 284; CHECK-LABEL: func_blend20: 285; CHECK: @ %bb.0: 286; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} 287; CHECK-NEXT: push {r4, r5, r6, r7, r8, lr} 288; CHECK-NEXT: .vsave {d8, d9} 289; CHECK-NEXT: vpush {d8, d9} 290; CHECK-NEXT: add r8, r1, #64 291; CHECK-NEXT: add lr, r0, #64 292; CHECK-NEXT: vld1.64 {d16, d17}, [r1:128]! 293; CHECK-NEXT: mov r12, #0 294; CHECK-NEXT: vld1.64 {d24, d25}, [r0:128]! 295; CHECK-NEXT: vmov r4, r5, d17 296; CHECK-NEXT: vmov r6, r7, d25 297; CHECK-NEXT: vld1.64 {d18, d19}, [lr:128]! 298; CHECK-NEXT: vld1.64 {d20, d21}, [r8:128]! 299; CHECK-NEXT: vld1.64 {d22, d23}, [r8:128]! 300; CHECK-NEXT: vld1.64 {d0, d1}, [lr:128]! 301; CHECK-NEXT: subs r4, r6, r4 302; CHECK-NEXT: sbcs r4, r7, r5 303; CHECK-NEXT: vmov r5, r6, d16 304; CHECK-NEXT: vmov r7, r2, d24 305; CHECK-NEXT: mov r4, #0 306; CHECK-NEXT: movlt r4, #1 307; CHECK-NEXT: cmp r4, #0 308; CHECK-NEXT: mvnne r4, #0 309; CHECK-NEXT: vdup.32 d27, r4 310; CHECK-NEXT: subs r5, r7, r5 311; CHECK-NEXT: sbcs r2, r2, r6 312; CHECK-NEXT: vmov r5, r6, d1 313; CHECK-NEXT: mov r2, #0 314; CHECK-NEXT: movlt r2, #1 315; CHECK-NEXT: cmp r2, #0 316; CHECK-NEXT: mvnne r2, #0 317; CHECK-NEXT: vdup.32 d26, r2 318; CHECK-NEXT: vmov r2, r4, d23 319; CHECK-NEXT: vbit q8, q12, q13 320; CHECK-NEXT: vld1.64 {d24, d25}, [r0:128]! 321; CHECK-NEXT: vld1.64 {d26, d27}, [r1:128]! 322; CHECK-NEXT: vld1.64 {d28, d29}, [lr:128]! 323; CHECK-NEXT: subs r2, r5, r2 324; CHECK-NEXT: sbcs r2, r6, r4 325; CHECK-NEXT: vmov r4, r5, d22 326; CHECK-NEXT: vmov r6, r7, d0 327; CHECK-NEXT: mov r2, #0 328; CHECK-NEXT: movlt r2, #1 329; CHECK-NEXT: cmp r2, #0 330; CHECK-NEXT: mvnne r2, #0 331; CHECK-NEXT: vdup.32 d3, r2 332; CHECK-NEXT: subs r4, r6, r4 333; CHECK-NEXT: sbcs r4, r7, r5 334; CHECK-NEXT: vmov r2, r5, d27 335; CHECK-NEXT: vmov r6, r7, d25 336; CHECK-NEXT: mov r4, #0 337; CHECK-NEXT: movlt r4, #1 338; CHECK-NEXT: cmp r4, #0 339; CHECK-NEXT: mvnne r4, #0 340; CHECK-NEXT: vdup.32 d2, r4 341; CHECK-NEXT: subs r2, r6, r2 342; CHECK-NEXT: sbcs r2, r7, r5 343; CHECK-NEXT: vmov r6, r7, d24 344; CHECK-NEXT: mov r2, #0 345; CHECK-NEXT: movlt r2, #1 346; CHECK-NEXT: cmp r2, #0 347; CHECK-NEXT: mvnne r2, #0 348; CHECK-NEXT: vdup.32 d5, r2 349; CHECK-NEXT: vmov r2, r5, d26 350; CHECK-NEXT: subs r2, r6, r2 351; CHECK-NEXT: sbcs r2, r7, r5 352; CHECK-NEXT: vmov r6, r7, d19 353; CHECK-NEXT: mov r2, #0 354; CHECK-NEXT: movlt r2, #1 355; CHECK-NEXT: cmp r2, #0 356; CHECK-NEXT: mvnne r2, #0 357; CHECK-NEXT: vdup.32 d4, r2 358; CHECK-NEXT: vmov r2, r5, d21 359; CHECK-NEXT: subs r2, r6, r2 360; CHECK-NEXT: sbcs r2, r7, r5 361; CHECK-NEXT: vmov r6, r7, d18 362; CHECK-NEXT: mov r2, #0 363; CHECK-NEXT: movlt r2, #1 364; CHECK-NEXT: cmp r2, #0 365; CHECK-NEXT: mvnne r2, #0 366; CHECK-NEXT: vdup.32 d31, r2 367; CHECK-NEXT: vmov r2, r5, d20 368; CHECK-NEXT: subs r2, r6, r2 369; CHECK-NEXT: sbcs r2, r7, r5 370; CHECK-NEXT: mov r2, #0 371; CHECK-NEXT: movlt r2, #1 372; CHECK-NEXT: cmp r2, #0 373; CHECK-NEXT: mvnne r2, #0 374; CHECK-NEXT: vdup.32 d30, r2 375; CHECK-NEXT: vbif q9, q10, q15 376; CHECK-NEXT: vld1.64 {d30, d31}, [r8:128]! 377; CHECK-NEXT: vld1.64 {d20, d21}, [r8:128] 378; CHECK-NEXT: vbit q13, q12, q2 379; CHECK-NEXT: vld1.64 {d24, d25}, [lr:128] 380; CHECK-NEXT: vmov r2, r7, d21 381; CHECK-NEXT: vbit q11, q0, q1 382; CHECK-NEXT: mov lr, #0 383; CHECK-NEXT: vmov r6, r5, d25 384; CHECK-NEXT: vld1.64 {d4, d5}, [r1:128]! 385; CHECK-NEXT: vld1.64 {d6, d7}, [r0:128]! 386; CHECK-NEXT: vld1.64 {d0, d1}, [r1:128] 387; CHECK-NEXT: vld1.64 {d2, d3}, [r0:128] 388; CHECK-NEXT: subs r1, r6, r2 389; CHECK-NEXT: vmov r0, r6, d2 390; CHECK-NEXT: sbcs r1, r5, r7 391; CHECK-NEXT: vmov r2, r7, d0 392; CHECK-NEXT: movlt lr, #1 393; CHECK-NEXT: cmp lr, #0 394; CHECK-NEXT: mvnne lr, #0 395; CHECK-NEXT: subs r0, r0, r2 396; CHECK-NEXT: sbcs r0, r6, r7 397; CHECK-NEXT: vmov r2, r7, d30 398; CHECK-NEXT: vmov r6, r5, d28 399; CHECK-NEXT: mov r0, #0 400; CHECK-NEXT: movlt r0, #1 401; CHECK-NEXT: cmp r0, #0 402; CHECK-NEXT: mvnne r0, #0 403; CHECK-NEXT: subs r2, r6, r2 404; CHECK-NEXT: sbcs r2, r5, r7 405; CHECK-NEXT: vmov r7, r6, d31 406; CHECK-NEXT: vmov r5, r4, d29 407; CHECK-NEXT: mov r2, #0 408; CHECK-NEXT: movlt r2, #1 409; CHECK-NEXT: cmp r2, #0 410; CHECK-NEXT: mvnne r2, #0 411; CHECK-NEXT: subs r7, r5, r7 412; CHECK-NEXT: vmov r5, r1, d7 413; CHECK-NEXT: sbcs r7, r4, r6 414; CHECK-NEXT: mov r4, #0 415; CHECK-NEXT: vmov r7, r6, d5 416; CHECK-NEXT: movlt r4, #1 417; CHECK-NEXT: cmp r4, #0 418; CHECK-NEXT: mvnne r4, #0 419; CHECK-NEXT: subs r5, r5, r7 420; CHECK-NEXT: sbcs r1, r1, r6 421; CHECK-NEXT: vmov r6, r7, d6 422; CHECK-NEXT: mov r1, #0 423; CHECK-NEXT: movlt r1, #1 424; CHECK-NEXT: cmp r1, #0 425; CHECK-NEXT: mvnne r1, #0 426; CHECK-NEXT: vdup.32 d9, r1 427; CHECK-NEXT: vmov r1, r5, d4 428; CHECK-NEXT: subs r1, r6, r1 429; CHECK-NEXT: sbcs r1, r7, r5 430; CHECK-NEXT: vmov r6, r7, d3 431; CHECK-NEXT: mov r1, #0 432; CHECK-NEXT: movlt r1, #1 433; CHECK-NEXT: cmp r1, #0 434; CHECK-NEXT: mvnne r1, #0 435; CHECK-NEXT: vdup.32 d8, r1 436; CHECK-NEXT: vmov r1, r5, d1 437; CHECK-NEXT: vbit q2, q3, q4 438; CHECK-NEXT: vdup.32 d9, r4 439; CHECK-NEXT: vdup.32 d8, r2 440; CHECK-NEXT: subs r1, r6, r1 441; CHECK-NEXT: sbcs r1, r7, r5 442; CHECK-NEXT: vmov r5, r6, d24 443; CHECK-NEXT: mov r1, #0 444; CHECK-NEXT: movlt r1, #1 445; CHECK-NEXT: cmp r1, #0 446; CHECK-NEXT: mvnne r1, #0 447; CHECK-NEXT: vdup.32 d7, r1 448; CHECK-NEXT: vmov r1, r4, d20 449; CHECK-NEXT: vdup.32 d6, r0 450; CHECK-NEXT: subs r1, r5, r1 451; CHECK-NEXT: mov r1, r3 452; CHECK-NEXT: sbcs r0, r6, r4 453; CHECK-NEXT: vst1.64 {d16, d17}, [r1:128]! 454; CHECK-NEXT: vorr q8, q4, q4 455; CHECK-NEXT: movlt r12, #1 456; CHECK-NEXT: cmp r12, #0 457; CHECK-NEXT: vbsl q8, q14, q15 458; CHECK-NEXT: vdup.32 d29, lr 459; CHECK-NEXT: vorr q15, q3, q3 460; CHECK-NEXT: mvnne r12, #0 461; CHECK-NEXT: vdup.32 d28, r12 462; CHECK-NEXT: add r0, r3, #64 463; CHECK-NEXT: vbsl q15, q1, q0 464; CHECK-NEXT: vst1.64 {d26, d27}, [r1:128]! 465; CHECK-NEXT: vbit q10, q12, q14 466; CHECK-NEXT: vst1.64 {d18, d19}, [r0:128]! 467; CHECK-NEXT: vst1.64 {d22, d23}, [r0:128]! 468; CHECK-NEXT: vst1.64 {d4, d5}, [r1:128]! 469; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128]! 470; CHECK-NEXT: vst1.64 {d30, d31}, [r1:128] 471; CHECK-NEXT: vst1.64 {d20, d21}, [r0:128] 472; CHECK-NEXT: vpop {d8, d9} 473; CHECK-NEXT: pop {r4, r5, r6, r7, r8, lr} 474; CHECK-NEXT: mov pc, lr 475; COST: func_blend20 476; COST: cost of 0 {{.*}} icmp 477; COST: cost of 108 {{.*}} select 478 %v0 = load %T0_20, ptr %loadaddr 479 %v1 = load %T0_20, ptr %loadaddr2 480 %c = icmp slt %T0_20 %v0, %v1 481 %r = select %T1_20 %c, %T0_20 %v0, %T0_20 %v1 482 store %T0_20 %r, ptr %storeaddr 483 ret void 484} 485