1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp %s -o - | FileCheck %s 3 4define arm_aapcs_vfpcc <8 x i8> @u_v8i8(<8 x i8> %a, <8 x i8> %b) { 5; CHECK-LABEL: u_v8i8: 6; CHECK: @ %bb.0: @ %entry 7; CHECK-NEXT: vmovlb.u8 q1, q1 8; CHECK-NEXT: vmovlb.u8 q0, q0 9; CHECK-NEXT: vmov.i32 q2, #0x0 10; CHECK-NEXT: vmov.i16 q3, #0x1 11; CHECK-NEXT: vcmp.u16 hi, q0, q1 12; CHECK-NEXT: vpsel q2, q3, q2 13; CHECK-NEXT: vmov.i8 q3, #0xff 14; CHECK-NEXT: vcmp.u16 hi, q1, q0 15; CHECK-NEXT: vpsel q0, q3, q2 16; CHECK-NEXT: bx lr 17entry: 18 %c = call <8 x i8> @llvm.ucmp(<8 x i8> %a, <8 x i8> %b) 19 ret <8 x i8> %c 20} 21 22define arm_aapcs_vfpcc <16 x i8> @u_v16i8(<16 x i8> %a, <16 x i8> %b) { 23; CHECK-LABEL: u_v16i8: 24; CHECK: @ %bb.0: @ %entry 25; CHECK-NEXT: vmov.i32 q2, #0x0 26; CHECK-NEXT: vmov.i8 q3, #0x1 27; CHECK-NEXT: vcmp.u8 hi, q0, q1 28; CHECK-NEXT: vpsel q2, q3, q2 29; CHECK-NEXT: vmov.i8 q3, #0xff 30; CHECK-NEXT: vcmp.u8 hi, q1, q0 31; CHECK-NEXT: vpsel q0, q3, q2 32; CHECK-NEXT: bx lr 33entry: 34 %c = call <16 x i8> @llvm.ucmp(<16 x i8> %a, <16 x i8> %b) 35 ret <16 x i8> %c 36} 37 38define arm_aapcs_vfpcc <4 x i16> @u_v4i16(<4 x i16> %a, <4 x i16> %b) { 39; CHECK-LABEL: u_v4i16: 40; CHECK: @ %bb.0: @ %entry 41; CHECK-NEXT: vmovlb.u16 q1, q1 42; CHECK-NEXT: vmovlb.u16 q0, q0 43; CHECK-NEXT: vmov.i32 q2, #0x0 44; CHECK-NEXT: vmov.i32 q3, #0x1 45; CHECK-NEXT: vcmp.u32 hi, q0, q1 46; CHECK-NEXT: vpsel q2, q3, q2 47; CHECK-NEXT: vmov.i8 q3, #0xff 48; CHECK-NEXT: vcmp.u32 hi, q1, q0 49; CHECK-NEXT: vpsel q0, q3, q2 50; CHECK-NEXT: bx lr 51entry: 52 %c = call <4 x i16> @llvm.ucmp(<4 x i16> %a, <4 x i16> %b) 53 ret <4 x i16> %c 54} 55 56define arm_aapcs_vfpcc <8 x i16> @u_v8i16(<8 x i16> %a, <8 x i16> %b) { 57; CHECK-LABEL: u_v8i16: 58; CHECK: @ %bb.0: @ %entry 59; CHECK-NEXT: vmov.i32 q2, #0x0 60; CHECK-NEXT: vmov.i16 q3, #0x1 61; CHECK-NEXT: vcmp.u16 hi, q0, q1 62; CHECK-NEXT: vpsel q2, q3, q2 63; CHECK-NEXT: vmov.i8 q3, #0xff 64; CHECK-NEXT: vcmp.u16 hi, q1, q0 65; CHECK-NEXT: vpsel q0, q3, q2 66; CHECK-NEXT: bx lr 67entry: 68 %c = call <8 x i16> @llvm.ucmp(<8 x i16> %a, <8 x i16> %b) 69 ret <8 x i16> %c 70} 71 72define arm_aapcs_vfpcc <16 x i16> @u_v16i16(<16 x i16> %a, <16 x i16> %b) { 73; CHECK-LABEL: u_v16i16: 74; CHECK: @ %bb.0: @ %entry 75; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} 76; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} 77; CHECK-NEXT: vmov.i32 q4, #0x0 78; CHECK-NEXT: vmov.i16 q5, #0x1 79; CHECK-NEXT: vcmp.u16 hi, q0, q2 80; CHECK-NEXT: vmov.i8 q7, #0xff 81; CHECK-NEXT: vpsel q6, q5, q4 82; CHECK-NEXT: vcmp.u16 hi, q2, q0 83; CHECK-NEXT: vpsel q0, q7, q6 84; CHECK-NEXT: vcmp.u16 hi, q1, q3 85; CHECK-NEXT: vpsel q2, q5, q4 86; CHECK-NEXT: vcmp.u16 hi, q3, q1 87; CHECK-NEXT: vpsel q1, q7, q2 88; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} 89; CHECK-NEXT: bx lr 90entry: 91 %c = call <16 x i16> @llvm.ucmp(<16 x i16> %a, <16 x i16> %b) 92 ret <16 x i16> %c 93} 94 95define arm_aapcs_vfpcc <2 x i32> @u_v2i32(<2 x i32> %a, <2 x i32> %b) { 96; CHECK-LABEL: u_v2i32: 97; CHECK: @ %bb.0: @ %entry 98; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} 99; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} 100; CHECK-NEXT: vmov.i64 q2, #0xffffffff 101; CHECK-NEXT: movs r4, #0 102; CHECK-NEXT: vand q1, q1, q2 103; CHECK-NEXT: vand q0, q0, q2 104; CHECK-NEXT: vmov lr, r12, d0 105; CHECK-NEXT: movs r0, #0 106; CHECK-NEXT: vmov r3, r8, d2 107; CHECK-NEXT: vmov r6, r7, d3 108; CHECK-NEXT: vmov.i32 q1, #0x0 109; CHECK-NEXT: subs.w r1, r3, lr 110; CHECK-NEXT: sbcs.w r1, r8, r12 111; CHECK-NEXT: csetm r1, lo 112; CHECK-NEXT: bfi r4, r1, #0, #8 113; CHECK-NEXT: vmov r1, r5, d1 114; CHECK-NEXT: subs r2, r6, r1 115; CHECK-NEXT: sbcs.w r2, r7, r5 116; CHECK-NEXT: csetm r2, lo 117; CHECK-NEXT: bfi r4, r2, #8, #8 118; CHECK-NEXT: adr r2, .LCPI5_0 119; CHECK-NEXT: vldrw.u32 q0, [r2] 120; CHECK-NEXT: subs.w r2, lr, r3 121; CHECK-NEXT: sbcs.w r2, r12, r8 122; CHECK-NEXT: vmsr p0, r4 123; CHECK-NEXT: csetm r2, lo 124; CHECK-NEXT: subs r1, r1, r6 125; CHECK-NEXT: sbcs.w r1, r5, r7 126; CHECK-NEXT: bfi r0, r2, #0, #8 127; CHECK-NEXT: csetm r1, lo 128; CHECK-NEXT: vpsel q0, q0, q1 129; CHECK-NEXT: bfi r0, r1, #8, #8 130; CHECK-NEXT: vmov.i8 q1, #0xff 131; CHECK-NEXT: vmsr p0, r0 132; CHECK-NEXT: vpsel q0, q1, q0 133; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc} 134; CHECK-NEXT: .p2align 4 135; CHECK-NEXT: @ %bb.1: 136; CHECK-NEXT: .LCPI5_0: 137; CHECK-NEXT: .long 1 @ 0x1 138; CHECK-NEXT: .long 0 @ 0x0 139; CHECK-NEXT: .long 1 @ 0x1 140; CHECK-NEXT: .long 0 @ 0x0 141entry: 142 %c = call <2 x i32> @llvm.ucmp(<2 x i32> %a, <2 x i32> %b) 143 ret <2 x i32> %c 144} 145 146define arm_aapcs_vfpcc <4 x i32> @u_v4i32(<4 x i32> %a, <4 x i32> %b) { 147; CHECK-LABEL: u_v4i32: 148; CHECK: @ %bb.0: @ %entry 149; CHECK-NEXT: vmov.i32 q2, #0x0 150; CHECK-NEXT: vmov.i32 q3, #0x1 151; CHECK-NEXT: vcmp.u32 hi, q0, q1 152; CHECK-NEXT: vpsel q2, q3, q2 153; CHECK-NEXT: vmov.i8 q3, #0xff 154; CHECK-NEXT: vcmp.u32 hi, q1, q0 155; CHECK-NEXT: vpsel q0, q3, q2 156; CHECK-NEXT: bx lr 157entry: 158 %c = call <4 x i32> @llvm.ucmp(<4 x i32> %a, <4 x i32> %b) 159 ret <4 x i32> %c 160} 161 162define arm_aapcs_vfpcc <8 x i32> @u_v8i32(<8 x i32> %a, <8 x i32> %b) { 163; CHECK-LABEL: u_v8i32: 164; CHECK: @ %bb.0: @ %entry 165; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} 166; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} 167; CHECK-NEXT: vmov.i32 q4, #0x0 168; CHECK-NEXT: vmov.i32 q5, #0x1 169; CHECK-NEXT: vcmp.u32 hi, q0, q2 170; CHECK-NEXT: vmov.i8 q7, #0xff 171; CHECK-NEXT: vpsel q6, q5, q4 172; CHECK-NEXT: vcmp.u32 hi, q2, q0 173; CHECK-NEXT: vpsel q0, q7, q6 174; CHECK-NEXT: vcmp.u32 hi, q1, q3 175; CHECK-NEXT: vpsel q2, q5, q4 176; CHECK-NEXT: vcmp.u32 hi, q3, q1 177; CHECK-NEXT: vpsel q1, q7, q2 178; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} 179; CHECK-NEXT: bx lr 180entry: 181 %c = call <8 x i32> @llvm.ucmp(<8 x i32> %a, <8 x i32> %b) 182 ret <8 x i32> %c 183} 184 185define arm_aapcs_vfpcc <2 x i64> @u_v2i64(<2 x i64> %a, <2 x i64> %b) { 186; CHECK-LABEL: u_v2i64: 187; CHECK: @ %bb.0: @ %entry 188; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} 189; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} 190; CHECK-NEXT: vmov lr, r12, d0 191; CHECK-NEXT: movs r4, #0 192; CHECK-NEXT: vmov r3, r8, d2 193; CHECK-NEXT: movs r0, #0 194; CHECK-NEXT: vmov r6, r7, d3 195; CHECK-NEXT: vmov.i32 q1, #0x0 196; CHECK-NEXT: subs.w r1, r3, lr 197; CHECK-NEXT: sbcs.w r1, r8, r12 198; CHECK-NEXT: csetm r1, lo 199; CHECK-NEXT: bfi r4, r1, #0, #8 200; CHECK-NEXT: vmov r1, r5, d1 201; CHECK-NEXT: subs r2, r6, r1 202; CHECK-NEXT: sbcs.w r2, r7, r5 203; CHECK-NEXT: csetm r2, lo 204; CHECK-NEXT: bfi r4, r2, #8, #8 205; CHECK-NEXT: adr r2, .LCPI8_0 206; CHECK-NEXT: vldrw.u32 q0, [r2] 207; CHECK-NEXT: subs.w r2, lr, r3 208; CHECK-NEXT: sbcs.w r2, r12, r8 209; CHECK-NEXT: vmsr p0, r4 210; CHECK-NEXT: csetm r2, lo 211; CHECK-NEXT: subs r1, r1, r6 212; CHECK-NEXT: sbcs.w r1, r5, r7 213; CHECK-NEXT: bfi r0, r2, #0, #8 214; CHECK-NEXT: csetm r1, lo 215; CHECK-NEXT: vpsel q0, q0, q1 216; CHECK-NEXT: bfi r0, r1, #8, #8 217; CHECK-NEXT: vmov.i8 q1, #0xff 218; CHECK-NEXT: vmsr p0, r0 219; CHECK-NEXT: vpsel q0, q1, q0 220; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc} 221; CHECK-NEXT: .p2align 4 222; CHECK-NEXT: @ %bb.1: 223; CHECK-NEXT: .LCPI8_0: 224; CHECK-NEXT: .long 1 @ 0x1 225; CHECK-NEXT: .long 0 @ 0x0 226; CHECK-NEXT: .long 1 @ 0x1 227; CHECK-NEXT: .long 0 @ 0x0 228entry: 229 %c = call <2 x i64> @llvm.ucmp(<2 x i64> %a, <2 x i64> %b) 230 ret <2 x i64> %c 231} 232 233define arm_aapcs_vfpcc <4 x i64> @u_v4i64(<4 x i64> %a, <4 x i64> %b) { 234; CHECK-LABEL: u_v4i64: 235; CHECK: @ %bb.0: @ %entry 236; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, lr} 237; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr} 238; CHECK-NEXT: .pad #4 239; CHECK-NEXT: sub sp, #4 240; CHECK-NEXT: .vsave {d8, d9, d10, d11} 241; CHECK-NEXT: vpush {d8, d9, d10, d11} 242; CHECK-NEXT: vmov lr, r12, d0 243; CHECK-NEXT: movs r4, #0 244; CHECK-NEXT: vmov r3, r8, d4 245; CHECK-NEXT: vmov.i32 q5, #0x0 246; CHECK-NEXT: vmov r6, r7, d5 247; CHECK-NEXT: mov.w r9, #0 248; CHECK-NEXT: vmov.i8 q2, #0xff 249; CHECK-NEXT: subs.w r1, r3, lr 250; CHECK-NEXT: sbcs.w r1, r8, r12 251; CHECK-NEXT: csetm r1, lo 252; CHECK-NEXT: bfi r4, r1, #0, #8 253; CHECK-NEXT: vmov r1, r5, d1 254; CHECK-NEXT: subs r2, r6, r1 255; CHECK-NEXT: sbcs.w r2, r7, r5 256; CHECK-NEXT: csetm r2, lo 257; CHECK-NEXT: bfi r4, r2, #8, #8 258; CHECK-NEXT: adr r2, .LCPI9_0 259; CHECK-NEXT: vldrw.u32 q4, [r2] 260; CHECK-NEXT: subs.w r2, lr, r3 261; CHECK-NEXT: sbcs.w r2, r12, r8 262; CHECK-NEXT: mov.w r3, #0 263; CHECK-NEXT: csetm r2, lo 264; CHECK-NEXT: subs r1, r1, r6 265; CHECK-NEXT: sbcs.w r1, r5, r7 266; CHECK-NEXT: bfi r3, r2, #0, #8 267; CHECK-NEXT: csetm r1, lo 268; CHECK-NEXT: vmsr p0, r4 269; CHECK-NEXT: bfi r3, r1, #8, #8 270; CHECK-NEXT: vpsel q0, q4, q5 271; CHECK-NEXT: vmsr p0, r3 272; CHECK-NEXT: vmov lr, r12, d2 273; CHECK-NEXT: vmov r3, r7, d6 274; CHECK-NEXT: movs r5, #0 275; CHECK-NEXT: vmov r2, r1, d7 276; CHECK-NEXT: vpsel q0, q2, q0 277; CHECK-NEXT: subs.w r6, r3, lr 278; CHECK-NEXT: sbcs.w r6, r7, r12 279; CHECK-NEXT: csetm r6, lo 280; CHECK-NEXT: bfi r5, r6, #0, #8 281; CHECK-NEXT: vmov r6, r4, d3 282; CHECK-NEXT: subs r0, r2, r6 283; CHECK-NEXT: sbcs.w r0, r1, r4 284; CHECK-NEXT: csetm r0, lo 285; CHECK-NEXT: bfi r5, r0, #8, #8 286; CHECK-NEXT: subs.w r0, lr, r3 287; CHECK-NEXT: sbcs.w r0, r12, r7 288; CHECK-NEXT: vmsr p0, r5 289; CHECK-NEXT: csetm r0, lo 290; CHECK-NEXT: vpsel q1, q4, q5 291; CHECK-NEXT: bfi r9, r0, #0, #8 292; CHECK-NEXT: subs r0, r6, r2 293; CHECK-NEXT: sbcs.w r0, r4, r1 294; CHECK-NEXT: csetm r0, lo 295; CHECK-NEXT: bfi r9, r0, #8, #8 296; CHECK-NEXT: vmsr p0, r9 297; CHECK-NEXT: vpsel q1, q2, q1 298; CHECK-NEXT: vpop {d8, d9, d10, d11} 299; CHECK-NEXT: add sp, #4 300; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} 301; CHECK-NEXT: .p2align 4 302; CHECK-NEXT: @ %bb.1: 303; CHECK-NEXT: .LCPI9_0: 304; CHECK-NEXT: .long 1 @ 0x1 305; CHECK-NEXT: .long 0 @ 0x0 306; CHECK-NEXT: .long 1 @ 0x1 307; CHECK-NEXT: .long 0 @ 0x0 308entry: 309 %c = call <4 x i64> @llvm.ucmp(<4 x i64> %a, <4 x i64> %b) 310 ret <4 x i64> %c 311} 312 313define arm_aapcs_vfpcc <16 x i8> @signOf_neon(<8 x i16> %s0_lo, <8 x i16> %s0_hi, <8 x i16> %s1_lo, <8 x i16> %s1_hi) { 314; CHECK-LABEL: signOf_neon: 315; CHECK: @ %bb.0: @ %entry 316; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} 317; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} 318; CHECK-NEXT: .pad #16 319; CHECK-NEXT: sub sp, #16 320; CHECK-NEXT: vmov.i32 q4, #0x0 321; CHECK-NEXT: vmov.i16 q5, #0x1 322; CHECK-NEXT: vcmp.u16 hi, q1, q3 323; CHECK-NEXT: vmov.i8 q7, #0xff 324; CHECK-NEXT: vpsel q6, q5, q4 325; CHECK-NEXT: vcmp.u16 hi, q3, q1 326; CHECK-NEXT: vpsel q1, q7, q6 327; CHECK-NEXT: vcmp.u16 hi, q0, q2 328; CHECK-NEXT: vpsel q3, q5, q4 329; CHECK-NEXT: vcmp.u16 hi, q2, q0 330; CHECK-NEXT: mov r0, sp 331; CHECK-NEXT: vpsel q0, q7, q3 332; CHECK-NEXT: vstrb.16 q1, [r0, #8] 333; CHECK-NEXT: vstrb.16 q0, [r0] 334; CHECK-NEXT: vldrw.u32 q0, [r0] 335; CHECK-NEXT: add sp, #16 336; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} 337; CHECK-NEXT: bx lr 338entry: 339 %0 = shufflevector <8 x i16> %s0_lo, <8 x i16> %s0_hi, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 340 %1 = shufflevector <8 x i16> %s1_lo, <8 x i16> %s1_hi, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 341 %or.i = tail call <16 x i8> @llvm.ucmp.v16i8.v16i16(<16 x i16> %0, <16 x i16> %1) 342 ret <16 x i8> %or.i 343} 344