1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve --verify-machineinstrs %s -o - | FileCheck %s 3 4declare i8 @llvm.smax.i8(i8 %a, i8 %b) readnone 5 6define arm_aapcs_vfpcc i8 @smaxi8(i8 %a, i8 %b) { 7; CHECK-LABEL: smaxi8: 8; CHECK: @ %bb.0: 9; CHECK-NEXT: sxtb r1, r1 10; CHECK-NEXT: sxtb r0, r0 11; CHECK-NEXT: cmp r0, r1 12; CHECK-NEXT: csel r0, r0, r1, gt 13; CHECK-NEXT: bx lr 14 %c = call i8 @llvm.smax.i8(i8 %a, i8 %b) 15 ret i8 %c 16} 17 18declare i16 @llvm.smax.i16(i16 %a, i16 %b) readnone 19 20define arm_aapcs_vfpcc i16 @smaxi16(i16 %a, i16 %b) { 21; CHECK-LABEL: smaxi16: 22; CHECK: @ %bb.0: 23; CHECK-NEXT: sxth r1, r1 24; CHECK-NEXT: sxth r0, r0 25; CHECK-NEXT: cmp r0, r1 26; CHECK-NEXT: csel r0, r0, r1, gt 27; CHECK-NEXT: bx lr 28 %c = call i16 @llvm.smax.i16(i16 %a, i16 %b) 29 ret i16 %c 30} 31 32declare i32 @llvm.smax.i32(i32 %a, i32 %b) readnone 33 34define arm_aapcs_vfpcc i32 @smaxi32(i32 %a, i32 %b) { 35; CHECK-LABEL: smaxi32: 36; CHECK: @ %bb.0: 37; CHECK-NEXT: cmp r0, r1 38; CHECK-NEXT: csel r0, r0, r1, gt 39; CHECK-NEXT: bx lr 40 %c = call i32 @llvm.smax.i32(i32 %a, i32 %b) 41 ret i32 %c 42} 43 44declare i64 @llvm.smax.i64(i64 %a, i64 %b) readnone 45 46define arm_aapcs_vfpcc i64 @smaxi64(i64 %a, i64 %b) { 47; CHECK-LABEL: smaxi64: 48; CHECK: @ %bb.0: 49; CHECK-NEXT: subs.w r12, r2, r0 50; CHECK-NEXT: sbcs.w r12, r3, r1 51; CHECK-NEXT: csel r0, r0, r2, lt 52; CHECK-NEXT: csel r1, r1, r3, lt 53; CHECK-NEXT: bx lr 54 %c = call i64 @llvm.smax.i64(i64 %a, i64 %b) 55 ret i64 %c 56} 57 58declare <8 x i8> @llvm.smax.v8i8(<8 x i8> %a, <8 x i8> %b) readnone 59 60define arm_aapcs_vfpcc <8 x i8> @smax8i8(<8 x i8> %a, <8 x i8> %b) { 61; CHECK-LABEL: smax8i8: 62; CHECK: @ %bb.0: 63; CHECK-NEXT: vmovlb.s8 q1, q1 64; CHECK-NEXT: vmovlb.s8 q0, q0 65; CHECK-NEXT: vmax.s16 q0, q0, q1 66; CHECK-NEXT: bx lr 67 %c = call <8 x i8> @llvm.smax.v8i8(<8 x i8> %a, <8 x i8> %b) 68 ret <8 x i8> %c 69} 70 71declare <16 x i8> @llvm.smax.v16i8(<16 x i8> %a, <16 x i8> %b) readnone 72 73define arm_aapcs_vfpcc <16 x i8> @smax16i8(<16 x i8> %a, <16 x i8> %b) { 74; CHECK-LABEL: smax16i8: 75; CHECK: @ %bb.0: 76; CHECK-NEXT: vmax.s8 q0, q0, q1 77; CHECK-NEXT: bx lr 78 %c = call <16 x i8> @llvm.smax.v16i8(<16 x i8> %a, <16 x i8> %b) 79 ret <16 x i8> %c 80} 81 82declare <32 x i8> @llvm.smax.v32i8(<32 x i8> %a, <32 x i8> %b) readnone 83 84define arm_aapcs_vfpcc void @smax32i8(<32 x i8> %a, <32 x i8> %b, ptr %p) { 85; CHECK-LABEL: smax32i8: 86; CHECK: @ %bb.0: 87; CHECK-NEXT: vmax.s8 q1, q1, q3 88; CHECK-NEXT: vmax.s8 q0, q0, q2 89; CHECK-NEXT: vstrw.32 q1, [r0, #16] 90; CHECK-NEXT: vstrw.32 q0, [r0] 91; CHECK-NEXT: bx lr 92 %c = call <32 x i8> @llvm.smax.v32i8(<32 x i8> %a, <32 x i8> %b) 93 store <32 x i8> %c, ptr %p 94 ret void 95} 96 97declare <4 x i16> @llvm.smax.v4i16(<4 x i16> %a, <4 x i16> %b) readnone 98 99define arm_aapcs_vfpcc <4 x i16> @smax4i16(<4 x i16> %a, <4 x i16> %b) { 100; CHECK-LABEL: smax4i16: 101; CHECK: @ %bb.0: 102; CHECK-NEXT: vmovlb.s16 q1, q1 103; CHECK-NEXT: vmovlb.s16 q0, q0 104; CHECK-NEXT: vmax.s32 q0, q0, q1 105; CHECK-NEXT: bx lr 106 %c = call <4 x i16> @llvm.smax.v4i16(<4 x i16> %a, <4 x i16> %b) 107 ret <4 x i16> %c 108} 109 110declare <8 x i16> @llvm.smax.v8i16(<8 x i16> %a, <8 x i16> %b) readnone 111 112define arm_aapcs_vfpcc <8 x i16> @smax8i16(<8 x i16> %a, <8 x i16> %b) { 113; CHECK-LABEL: smax8i16: 114; CHECK: @ %bb.0: 115; CHECK-NEXT: vmax.s16 q0, q0, q1 116; CHECK-NEXT: bx lr 117 %c = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %a, <8 x i16> %b) 118 ret <8 x i16> %c 119} 120 121declare <16 x i16> @llvm.smax.v16i16(<16 x i16> %a, <16 x i16> %b) readnone 122 123define arm_aapcs_vfpcc void @smax16i16(<16 x i16> %a, <16 x i16> %b, ptr %p) { 124; CHECK-LABEL: smax16i16: 125; CHECK: @ %bb.0: 126; CHECK-NEXT: vmax.s16 q1, q1, q3 127; CHECK-NEXT: vmax.s16 q0, q0, q2 128; CHECK-NEXT: vstrw.32 q1, [r0, #16] 129; CHECK-NEXT: vstrw.32 q0, [r0] 130; CHECK-NEXT: bx lr 131 %c = call <16 x i16> @llvm.smax.v16i16(<16 x i16> %a, <16 x i16> %b) 132 store <16 x i16> %c, ptr %p 133 ret void 134} 135 136declare <2 x i32> @llvm.smax.v2i32(<2 x i32> %a, <2 x i32> %b) readnone 137 138define arm_aapcs_vfpcc <2 x i32> @smax2i32(<2 x i32> %a, <2 x i32> %b) { 139; CHECK-LABEL: smax2i32: 140; CHECK: @ %bb.0: 141; CHECK-NEXT: .save {r4, r5, r7, lr} 142; CHECK-NEXT: push {r4, r5, r7, lr} 143; CHECK-NEXT: vmov r1, s4 144; CHECK-NEXT: vmov r3, s0 145; CHECK-NEXT: vmov r0, s6 146; CHECK-NEXT: vmov r2, s2 147; CHECK-NEXT: vmov q1[2], q1[0], r1, r0 148; CHECK-NEXT: vmov q0[2], q0[0], r3, r2 149; CHECK-NEXT: asr.w lr, r1, #31 150; CHECK-NEXT: subs r1, r1, r3 151; CHECK-NEXT: sbcs.w r1, lr, r3, asr #31 152; CHECK-NEXT: asr.w r5, r3, #31 153; CHECK-NEXT: asr.w r12, r0, #31 154; CHECK-NEXT: csetm r1, lt 155; CHECK-NEXT: subs r0, r0, r2 156; CHECK-NEXT: mov.w r3, #0 157; CHECK-NEXT: sbcs.w r0, r12, r2, asr #31 158; CHECK-NEXT: bfi r3, r1, #0, #8 159; CHECK-NEXT: csetm r0, lt 160; CHECK-NEXT: asrs r4, r2, #31 161; CHECK-NEXT: bfi r3, r0, #8, #8 162; CHECK-NEXT: vmov q1[3], q1[1], lr, r12 163; CHECK-NEXT: vmov q0[3], q0[1], r5, r4 164; CHECK-NEXT: vmsr p0, r3 165; CHECK-NEXT: vpsel q0, q0, q1 166; CHECK-NEXT: pop {r4, r5, r7, pc} 167 %c = call <2 x i32> @llvm.smax.v2i32(<2 x i32> %a, <2 x i32> %b) 168 ret <2 x i32> %c 169} 170 171declare <4 x i32> @llvm.smax.v4i32(<4 x i32> %a, <4 x i32> %b) readnone 172 173define arm_aapcs_vfpcc <4 x i32> @smax4i32(<4 x i32> %a, <4 x i32> %b) { 174; CHECK-LABEL: smax4i32: 175; CHECK: @ %bb.0: 176; CHECK-NEXT: vmax.s32 q0, q0, q1 177; CHECK-NEXT: bx lr 178 %c = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %a, <4 x i32> %b) 179 ret <4 x i32> %c 180} 181 182declare <8 x i32> @llvm.smax.v8i32(<8 x i32> %a, <8 x i32> %b) readnone 183 184define arm_aapcs_vfpcc void @smax8i32(<8 x i32> %a, <8 x i32> %b, ptr %p) { 185; CHECK-LABEL: smax8i32: 186; CHECK: @ %bb.0: 187; CHECK-NEXT: vmax.s32 q1, q1, q3 188; CHECK-NEXT: vmax.s32 q0, q0, q2 189; CHECK-NEXT: vstrw.32 q1, [r0, #16] 190; CHECK-NEXT: vstrw.32 q0, [r0] 191; CHECK-NEXT: bx lr 192 %c = call <8 x i32>@llvm.smax.v8i32(<8 x i32> %a, <8 x i32> %b) 193 store <8 x i32> %c, ptr %p 194 ret void 195} 196 197declare <1 x i64> @llvm.smax.v1i64(<1 x i64> %a, <1 x i64> %b) readnone 198 199define arm_aapcs_vfpcc <1 x i64> @smax1i64(<1 x i64> %a, <1 x i64> %b) { 200; CHECK-LABEL: smax1i64: 201; CHECK: @ %bb.0: 202; CHECK-NEXT: .pad #8 203; CHECK-NEXT: sub sp, #8 204; CHECK-NEXT: subs.w r12, r2, r0 205; CHECK-NEXT: sbcs.w r12, r3, r1 206; CHECK-NEXT: csel r0, r0, r2, lt 207; CHECK-NEXT: csel r1, r1, r3, lt 208; CHECK-NEXT: add sp, #8 209; CHECK-NEXT: bx lr 210 %c = call <1 x i64> @llvm.smax.v1i64(<1 x i64> %a, <1 x i64> %b) 211 ret <1 x i64> %c 212} 213 214declare <2 x i64> @llvm.smax.v2i64(<2 x i64> %a, <2 x i64> %b) readnone 215 216define arm_aapcs_vfpcc <2 x i64> @smax2i64(<2 x i64> %a, <2 x i64> %b) { 217; CHECK-LABEL: smax2i64: 218; CHECK: @ %bb.0: 219; CHECK-NEXT: vmov r0, r1, d0 220; CHECK-NEXT: vmov r2, r3, d2 221; CHECK-NEXT: subs r0, r2, r0 222; CHECK-NEXT: sbcs.w r0, r3, r1 223; CHECK-NEXT: mov.w r1, #0 224; CHECK-NEXT: csetm r0, lt 225; CHECK-NEXT: vmov r3, r2, d3 226; CHECK-NEXT: bfi r1, r0, #0, #8 227; CHECK-NEXT: vmov r0, r12, d1 228; CHECK-NEXT: subs r0, r3, r0 229; CHECK-NEXT: sbcs.w r0, r2, r12 230; CHECK-NEXT: csetm r0, lt 231; CHECK-NEXT: bfi r1, r0, #8, #8 232; CHECK-NEXT: vmsr p0, r1 233; CHECK-NEXT: vpsel q0, q0, q1 234; CHECK-NEXT: bx lr 235 %c = call <2 x i64> @llvm.smax.v2i64(<2 x i64> %a, <2 x i64> %b) 236 ret <2 x i64> %c 237} 238 239declare <4 x i64> @llvm.smax.v4i64(<4 x i64> %a, <4 x i64> %b) readnone 240 241define arm_aapcs_vfpcc void @smax4i64(<4 x i64> %a, <4 x i64> %b, ptr %p) { 242; CHECK-LABEL: smax4i64: 243; CHECK: @ %bb.0: 244; CHECK-NEXT: .save {r4, lr} 245; CHECK-NEXT: push {r4, lr} 246; CHECK-NEXT: vmov r1, r12, d2 247; CHECK-NEXT: vmov r3, r2, d6 248; CHECK-NEXT: subs r1, r3, r1 249; CHECK-NEXT: mov.w r3, #0 250; CHECK-NEXT: sbcs.w r1, r2, r12 251; CHECK-NEXT: vmov lr, r12, d3 252; CHECK-NEXT: csetm r2, lt 253; CHECK-NEXT: movs r1, #0 254; CHECK-NEXT: bfi r3, r2, #0, #8 255; CHECK-NEXT: vmov r2, r4, d7 256; CHECK-NEXT: subs.w r2, r2, lr 257; CHECK-NEXT: sbcs.w r2, r4, r12 258; CHECK-NEXT: csetm r2, lt 259; CHECK-NEXT: bfi r3, r2, #8, #8 260; CHECK-NEXT: vmov r2, r12, d0 261; CHECK-NEXT: vmsr p0, r3 262; CHECK-NEXT: vmov r4, r3, d4 263; CHECK-NEXT: vpsel q1, q1, q3 264; CHECK-NEXT: vstrw.32 q1, [r0, #16] 265; CHECK-NEXT: subs r2, r4, r2 266; CHECK-NEXT: sbcs.w r2, r3, r12 267; CHECK-NEXT: vmov r4, r3, d5 268; CHECK-NEXT: csetm r2, lt 269; CHECK-NEXT: bfi r1, r2, #0, #8 270; CHECK-NEXT: vmov r2, r12, d1 271; CHECK-NEXT: subs r2, r4, r2 272; CHECK-NEXT: sbcs.w r2, r3, r12 273; CHECK-NEXT: csetm r2, lt 274; CHECK-NEXT: bfi r1, r2, #8, #8 275; CHECK-NEXT: vmsr p0, r1 276; CHECK-NEXT: vpsel q0, q0, q2 277; CHECK-NEXT: vstrw.32 q0, [r0] 278; CHECK-NEXT: pop {r4, pc} 279 %c = call <4 x i64> @llvm.smax.v4i64(<4 x i64> %a, <4 x i64> %b) 280 store <4 x i64> %c, ptr %p 281 ret void 282} 283 284declare i8 @llvm.umax.i8(i8 %a, i8 %b) readnone 285 286define arm_aapcs_vfpcc i8 @umaxi8(i8 %a, i8 %b) { 287; CHECK-LABEL: umaxi8: 288; CHECK: @ %bb.0: 289; CHECK-NEXT: uxtb r1, r1 290; CHECK-NEXT: uxtb r0, r0 291; CHECK-NEXT: cmp r0, r1 292; CHECK-NEXT: csel r0, r0, r1, hi 293; CHECK-NEXT: bx lr 294 %c = call i8 @llvm.umax.i8(i8 %a, i8 %b) 295 ret i8 %c 296} 297 298declare i16 @llvm.umax.i16(i16 %a, i16 %b) readnone 299 300define arm_aapcs_vfpcc i16 @umaxi16(i16 %a, i16 %b) { 301; CHECK-LABEL: umaxi16: 302; CHECK: @ %bb.0: 303; CHECK-NEXT: uxth r1, r1 304; CHECK-NEXT: uxth r0, r0 305; CHECK-NEXT: cmp r0, r1 306; CHECK-NEXT: csel r0, r0, r1, hi 307; CHECK-NEXT: bx lr 308 %c = call i16 @llvm.umax.i16(i16 %a, i16 %b) 309 ret i16 %c 310} 311 312declare i32 @llvm.umax.i32(i32 %a, i32 %b) readnone 313 314define arm_aapcs_vfpcc i32 @umaxi32(i32 %a, i32 %b) { 315; CHECK-LABEL: umaxi32: 316; CHECK: @ %bb.0: 317; CHECK-NEXT: cmp r0, r1 318; CHECK-NEXT: csel r0, r0, r1, hi 319; CHECK-NEXT: bx lr 320 %c = call i32 @llvm.umax.i32(i32 %a, i32 %b) 321 ret i32 %c 322} 323 324declare i64 @llvm.umax.i64(i64 %a, i64 %b) readnone 325 326define arm_aapcs_vfpcc i64 @umaxi64(i64 %a, i64 %b) { 327; CHECK-LABEL: umaxi64: 328; CHECK: @ %bb.0: 329; CHECK-NEXT: subs.w r12, r2, r0 330; CHECK-NEXT: sbcs.w r12, r3, r1 331; CHECK-NEXT: csel r0, r0, r2, lo 332; CHECK-NEXT: csel r1, r1, r3, lo 333; CHECK-NEXT: bx lr 334 %c = call i64 @llvm.umax.i64(i64 %a, i64 %b) 335 ret i64 %c 336} 337 338declare <8 x i8> @llvm.umax.v8i8(<8 x i8> %a, <8 x i8> %b) readnone 339 340define arm_aapcs_vfpcc <8 x i8> @umax8i8(<8 x i8> %a, <8 x i8> %b) { 341; CHECK-LABEL: umax8i8: 342; CHECK: @ %bb.0: 343; CHECK-NEXT: vmovlb.u8 q1, q1 344; CHECK-NEXT: vmovlb.u8 q0, q0 345; CHECK-NEXT: vmax.u16 q0, q0, q1 346; CHECK-NEXT: bx lr 347 %c = call <8 x i8> @llvm.umax.v8i8(<8 x i8> %a, <8 x i8> %b) 348 ret <8 x i8> %c 349} 350 351declare <16 x i8> @llvm.umax.v16i8(<16 x i8> %a, <16 x i8> %b) readnone 352 353define arm_aapcs_vfpcc <16 x i8> @umax16i8(<16 x i8> %a, <16 x i8> %b) { 354; CHECK-LABEL: umax16i8: 355; CHECK: @ %bb.0: 356; CHECK-NEXT: vmax.u8 q0, q0, q1 357; CHECK-NEXT: bx lr 358 %c = call <16 x i8> @llvm.umax.v16i8(<16 x i8> %a, <16 x i8> %b) 359 ret <16 x i8> %c 360} 361 362declare <32 x i8> @llvm.umax.v32i8(<32 x i8> %a, <32 x i8> %b) readnone 363 364define arm_aapcs_vfpcc void @umax32i8(<32 x i8> %a, <32 x i8> %b, ptr %p) { 365; CHECK-LABEL: umax32i8: 366; CHECK: @ %bb.0: 367; CHECK-NEXT: vmax.u8 q1, q1, q3 368; CHECK-NEXT: vmax.u8 q0, q0, q2 369; CHECK-NEXT: vstrw.32 q1, [r0, #16] 370; CHECK-NEXT: vstrw.32 q0, [r0] 371; CHECK-NEXT: bx lr 372 %c = call <32 x i8> @llvm.umax.v32i8(<32 x i8> %a, <32 x i8> %b) 373 store <32 x i8> %c, ptr %p 374 ret void 375} 376 377declare <4 x i16> @llvm.umax.v4i16(<4 x i16> %a, <4 x i16> %b) readnone 378 379define arm_aapcs_vfpcc <4 x i16> @umax4i16(<4 x i16> %a, <4 x i16> %b) { 380; CHECK-LABEL: umax4i16: 381; CHECK: @ %bb.0: 382; CHECK-NEXT: vmovlb.u16 q1, q1 383; CHECK-NEXT: vmovlb.u16 q0, q0 384; CHECK-NEXT: vmax.u32 q0, q0, q1 385; CHECK-NEXT: bx lr 386 %c = call <4 x i16> @llvm.umax.v4i16(<4 x i16> %a, <4 x i16> %b) 387 ret <4 x i16> %c 388} 389 390declare <8 x i16> @llvm.umax.v8i16(<8 x i16> %a, <8 x i16> %b) readnone 391 392define arm_aapcs_vfpcc <8 x i16> @umax8i16(<8 x i16> %a, <8 x i16> %b) { 393; CHECK-LABEL: umax8i16: 394; CHECK: @ %bb.0: 395; CHECK-NEXT: vmax.u16 q0, q0, q1 396; CHECK-NEXT: bx lr 397 %c = call <8 x i16> @llvm.umax.v8i16(<8 x i16> %a, <8 x i16> %b) 398 ret <8 x i16> %c 399} 400 401declare <16 x i16> @llvm.umax.v16i16(<16 x i16> %a, <16 x i16> %b) readnone 402 403define arm_aapcs_vfpcc void @umax16i16(<16 x i16> %a, <16 x i16> %b, ptr %p) { 404; CHECK-LABEL: umax16i16: 405; CHECK: @ %bb.0: 406; CHECK-NEXT: vmax.u16 q1, q1, q3 407; CHECK-NEXT: vmax.u16 q0, q0, q2 408; CHECK-NEXT: vstrw.32 q1, [r0, #16] 409; CHECK-NEXT: vstrw.32 q0, [r0] 410; CHECK-NEXT: bx lr 411 %c = call <16 x i16> @llvm.umax.v16i16(<16 x i16> %a, <16 x i16> %b) 412 store <16 x i16> %c, ptr %p 413 ret void 414} 415 416declare <2 x i32> @llvm.umax.v2i32(<2 x i32> %a, <2 x i32> %b) readnone 417 418define arm_aapcs_vfpcc <2 x i32> @umax2i32(<2 x i32> %a, <2 x i32> %b) { 419; CHECK-LABEL: umax2i32: 420; CHECK: @ %bb.0: 421; CHECK-NEXT: vmov.i64 q2, #0xffffffff 422; CHECK-NEXT: vand q0, q0, q2 423; CHECK-NEXT: vand q1, q1, q2 424; CHECK-NEXT: vmov r0, r1, d0 425; CHECK-NEXT: vmov r2, r3, d2 426; CHECK-NEXT: subs r0, r2, r0 427; CHECK-NEXT: sbcs.w r0, r3, r1 428; CHECK-NEXT: mov.w r1, #0 429; CHECK-NEXT: csetm r0, lo 430; CHECK-NEXT: vmov r3, r2, d3 431; CHECK-NEXT: bfi r1, r0, #0, #8 432; CHECK-NEXT: vmov r0, r12, d1 433; CHECK-NEXT: subs r0, r3, r0 434; CHECK-NEXT: sbcs.w r0, r2, r12 435; CHECK-NEXT: csetm r0, lo 436; CHECK-NEXT: bfi r1, r0, #8, #8 437; CHECK-NEXT: vmsr p0, r1 438; CHECK-NEXT: vpsel q0, q0, q1 439; CHECK-NEXT: bx lr 440 %c = call <2 x i32> @llvm.umax.v2i32(<2 x i32> %a, <2 x i32> %b) 441 ret <2 x i32> %c 442} 443 444declare <4 x i32> @llvm.umax.v4i32(<4 x i32> %a, <4 x i32> %b) readnone 445 446define arm_aapcs_vfpcc <4 x i32> @umax4i32(<4 x i32> %a, <4 x i32> %b) { 447; CHECK-LABEL: umax4i32: 448; CHECK: @ %bb.0: 449; CHECK-NEXT: vmax.u32 q0, q0, q1 450; CHECK-NEXT: bx lr 451 %c = call <4 x i32> @llvm.umax.v4i32(<4 x i32> %a, <4 x i32> %b) 452 ret <4 x i32> %c 453} 454 455declare <8 x i32> @llvm.umax.v8i32(<8 x i32> %a, <8 x i32> %b) readnone 456 457define arm_aapcs_vfpcc void @umax8i32(<8 x i32> %a, <8 x i32> %b, ptr %p) { 458; CHECK-LABEL: umax8i32: 459; CHECK: @ %bb.0: 460; CHECK-NEXT: vmax.u32 q1, q1, q3 461; CHECK-NEXT: vmax.u32 q0, q0, q2 462; CHECK-NEXT: vstrw.32 q1, [r0, #16] 463; CHECK-NEXT: vstrw.32 q0, [r0] 464; CHECK-NEXT: bx lr 465 %c = call <8 x i32>@llvm.umax.v8i32(<8 x i32> %a, <8 x i32> %b) 466 store <8 x i32> %c, ptr %p 467 ret void 468} 469 470declare <1 x i64> @llvm.umax.v1i64(<1 x i64> %a, <1 x i64> %b) readnone 471 472define arm_aapcs_vfpcc <1 x i64> @umax1i64(<1 x i64> %a, <1 x i64> %b) { 473; CHECK-LABEL: umax1i64: 474; CHECK: @ %bb.0: 475; CHECK-NEXT: .pad #8 476; CHECK-NEXT: sub sp, #8 477; CHECK-NEXT: subs.w r12, r2, r0 478; CHECK-NEXT: sbcs.w r12, r3, r1 479; CHECK-NEXT: csel r0, r0, r2, lo 480; CHECK-NEXT: csel r1, r1, r3, lo 481; CHECK-NEXT: add sp, #8 482; CHECK-NEXT: bx lr 483 %c = call <1 x i64> @llvm.umax.v1i64(<1 x i64> %a, <1 x i64> %b) 484 ret <1 x i64> %c 485} 486 487declare <2 x i64> @llvm.umax.v2i64(<2 x i64> %a, <2 x i64> %b) readnone 488 489define arm_aapcs_vfpcc <2 x i64> @umax2i64(<2 x i64> %a, <2 x i64> %b) { 490; CHECK-LABEL: umax2i64: 491; CHECK: @ %bb.0: 492; CHECK-NEXT: vmov r0, r1, d0 493; CHECK-NEXT: vmov r2, r3, d2 494; CHECK-NEXT: subs r0, r2, r0 495; CHECK-NEXT: sbcs.w r0, r3, r1 496; CHECK-NEXT: mov.w r1, #0 497; CHECK-NEXT: csetm r0, lo 498; CHECK-NEXT: vmov r3, r2, d3 499; CHECK-NEXT: bfi r1, r0, #0, #8 500; CHECK-NEXT: vmov r0, r12, d1 501; CHECK-NEXT: subs r0, r3, r0 502; CHECK-NEXT: sbcs.w r0, r2, r12 503; CHECK-NEXT: csetm r0, lo 504; CHECK-NEXT: bfi r1, r0, #8, #8 505; CHECK-NEXT: vmsr p0, r1 506; CHECK-NEXT: vpsel q0, q0, q1 507; CHECK-NEXT: bx lr 508 %c = call <2 x i64> @llvm.umax.v2i64(<2 x i64> %a, <2 x i64> %b) 509 ret <2 x i64> %c 510} 511 512declare <4 x i64> @llvm.umax.v4i64(<4 x i64> %a, <4 x i64> %b) readnone 513 514define arm_aapcs_vfpcc void @umax4i64(<4 x i64> %a, <4 x i64> %b, ptr %p) { 515; CHECK-LABEL: umax4i64: 516; CHECK: @ %bb.0: 517; CHECK-NEXT: .save {r4, lr} 518; CHECK-NEXT: push {r4, lr} 519; CHECK-NEXT: vmov r1, r12, d2 520; CHECK-NEXT: vmov r3, r2, d6 521; CHECK-NEXT: subs r1, r3, r1 522; CHECK-NEXT: mov.w r3, #0 523; CHECK-NEXT: sbcs.w r1, r2, r12 524; CHECK-NEXT: vmov lr, r12, d3 525; CHECK-NEXT: csetm r2, lo 526; CHECK-NEXT: movs r1, #0 527; CHECK-NEXT: bfi r3, r2, #0, #8 528; CHECK-NEXT: vmov r2, r4, d7 529; CHECK-NEXT: subs.w r2, r2, lr 530; CHECK-NEXT: sbcs.w r2, r4, r12 531; CHECK-NEXT: csetm r2, lo 532; CHECK-NEXT: bfi r3, r2, #8, #8 533; CHECK-NEXT: vmov r2, r12, d0 534; CHECK-NEXT: vmsr p0, r3 535; CHECK-NEXT: vmov r4, r3, d4 536; CHECK-NEXT: vpsel q1, q1, q3 537; CHECK-NEXT: vstrw.32 q1, [r0, #16] 538; CHECK-NEXT: subs r2, r4, r2 539; CHECK-NEXT: sbcs.w r2, r3, r12 540; CHECK-NEXT: vmov r4, r3, d5 541; CHECK-NEXT: csetm r2, lo 542; CHECK-NEXT: bfi r1, r2, #0, #8 543; CHECK-NEXT: vmov r2, r12, d1 544; CHECK-NEXT: subs r2, r4, r2 545; CHECK-NEXT: sbcs.w r2, r3, r12 546; CHECK-NEXT: csetm r2, lo 547; CHECK-NEXT: bfi r1, r2, #8, #8 548; CHECK-NEXT: vmsr p0, r1 549; CHECK-NEXT: vpsel q0, q0, q2 550; CHECK-NEXT: vstrw.32 q0, [r0] 551; CHECK-NEXT: pop {r4, pc} 552 %c = call <4 x i64> @llvm.umax.v4i64(<4 x i64> %a, <4 x i64> %b) 553 store <4 x i64> %c, ptr %p 554 ret void 555} 556 557declare i8 @llvm.smin.i8(i8 %a, i8 %b) readnone 558 559define arm_aapcs_vfpcc i8 @smini8(i8 %a, i8 %b) { 560; CHECK-LABEL: smini8: 561; CHECK: @ %bb.0: 562; CHECK-NEXT: sxtb r1, r1 563; CHECK-NEXT: sxtb r0, r0 564; CHECK-NEXT: cmp r0, r1 565; CHECK-NEXT: csel r0, r0, r1, lt 566; CHECK-NEXT: bx lr 567 %c = call i8 @llvm.smin.i8(i8 %a, i8 %b) 568 ret i8 %c 569} 570 571declare i16 @llvm.smin.i16(i16 %a, i16 %b) readnone 572 573define arm_aapcs_vfpcc i16 @smini16(i16 %a, i16 %b) { 574; CHECK-LABEL: smini16: 575; CHECK: @ %bb.0: 576; CHECK-NEXT: sxth r1, r1 577; CHECK-NEXT: sxth r0, r0 578; CHECK-NEXT: cmp r0, r1 579; CHECK-NEXT: csel r0, r0, r1, lt 580; CHECK-NEXT: bx lr 581 %c = call i16 @llvm.smin.i16(i16 %a, i16 %b) 582 ret i16 %c 583} 584 585declare i32 @llvm.smin.i32(i32 %a, i32 %b) readnone 586 587define arm_aapcs_vfpcc i32 @smini32(i32 %a, i32 %b) { 588; CHECK-LABEL: smini32: 589; CHECK: @ %bb.0: 590; CHECK-NEXT: cmp r0, r1 591; CHECK-NEXT: csel r0, r0, r1, lt 592; CHECK-NEXT: bx lr 593 %c = call i32 @llvm.smin.i32(i32 %a, i32 %b) 594 ret i32 %c 595} 596 597declare i64 @llvm.smin.i64(i64 %a, i64 %b) readnone 598 599define arm_aapcs_vfpcc i64 @smini64(i64 %a, i64 %b) { 600; CHECK-LABEL: smini64: 601; CHECK: @ %bb.0: 602; CHECK-NEXT: subs.w r12, r0, r2 603; CHECK-NEXT: sbcs.w r12, r1, r3 604; CHECK-NEXT: csel r0, r0, r2, lt 605; CHECK-NEXT: csel r1, r1, r3, lt 606; CHECK-NEXT: bx lr 607 %c = call i64 @llvm.smin.i64(i64 %a, i64 %b) 608 ret i64 %c 609} 610 611declare <8 x i8> @llvm.smin.v8i8(<8 x i8> %a, <8 x i8> %b) readnone 612 613define arm_aapcs_vfpcc <8 x i8> @smin8i8(<8 x i8> %a, <8 x i8> %b) { 614; CHECK-LABEL: smin8i8: 615; CHECK: @ %bb.0: 616; CHECK-NEXT: vmovlb.s8 q1, q1 617; CHECK-NEXT: vmovlb.s8 q0, q0 618; CHECK-NEXT: vmin.s16 q0, q0, q1 619; CHECK-NEXT: bx lr 620 %c = call <8 x i8> @llvm.smin.v8i8(<8 x i8> %a, <8 x i8> %b) 621 ret <8 x i8> %c 622} 623 624declare <16 x i8> @llvm.smin.v16i8(<16 x i8> %a, <16 x i8> %b) readnone 625 626define arm_aapcs_vfpcc <16 x i8> @smin16i8(<16 x i8> %a, <16 x i8> %b) { 627; CHECK-LABEL: smin16i8: 628; CHECK: @ %bb.0: 629; CHECK-NEXT: vmin.s8 q0, q0, q1 630; CHECK-NEXT: bx lr 631 %c = call <16 x i8> @llvm.smin.v16i8(<16 x i8> %a, <16 x i8> %b) 632 ret <16 x i8> %c 633} 634 635declare <32 x i8> @llvm.smin.v32i8(<32 x i8> %a, <32 x i8> %b) readnone 636 637define arm_aapcs_vfpcc void @smin32i8(<32 x i8> %a, <32 x i8> %b, ptr %p) { 638; CHECK-LABEL: smin32i8: 639; CHECK: @ %bb.0: 640; CHECK-NEXT: vmin.s8 q1, q1, q3 641; CHECK-NEXT: vmin.s8 q0, q0, q2 642; CHECK-NEXT: vstrw.32 q1, [r0, #16] 643; CHECK-NEXT: vstrw.32 q0, [r0] 644; CHECK-NEXT: bx lr 645 %c = call <32 x i8> @llvm.smin.v32i8(<32 x i8> %a, <32 x i8> %b) 646 store <32 x i8> %c, ptr %p 647 ret void 648} 649 650declare <4 x i16> @llvm.smin.v4i16(<4 x i16> %a, <4 x i16> %b) readnone 651 652define arm_aapcs_vfpcc <4 x i16> @smin4i16(<4 x i16> %a, <4 x i16> %b) { 653; CHECK-LABEL: smin4i16: 654; CHECK: @ %bb.0: 655; CHECK-NEXT: vmovlb.s16 q1, q1 656; CHECK-NEXT: vmovlb.s16 q0, q0 657; CHECK-NEXT: vmin.s32 q0, q0, q1 658; CHECK-NEXT: bx lr 659 %c = call <4 x i16> @llvm.smin.v4i16(<4 x i16> %a, <4 x i16> %b) 660 ret <4 x i16> %c 661} 662 663declare <8 x i16> @llvm.smin.v8i16(<8 x i16> %a, <8 x i16> %b) readnone 664 665define arm_aapcs_vfpcc <8 x i16> @smin8i16(<8 x i16> %a, <8 x i16> %b) { 666; CHECK-LABEL: smin8i16: 667; CHECK: @ %bb.0: 668; CHECK-NEXT: vmin.s16 q0, q0, q1 669; CHECK-NEXT: bx lr 670 %c = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %a, <8 x i16> %b) 671 ret <8 x i16> %c 672} 673 674declare <16 x i16> @llvm.smin.v16i16(<16 x i16> %a, <16 x i16> %b) readnone 675 676define arm_aapcs_vfpcc void @smin16i16(<16 x i16> %a, <16 x i16> %b, ptr %p) { 677; CHECK-LABEL: smin16i16: 678; CHECK: @ %bb.0: 679; CHECK-NEXT: vmin.s16 q1, q1, q3 680; CHECK-NEXT: vmin.s16 q0, q0, q2 681; CHECK-NEXT: vstrw.32 q1, [r0, #16] 682; CHECK-NEXT: vstrw.32 q0, [r0] 683; CHECK-NEXT: bx lr 684 %c = call <16 x i16> @llvm.smin.v16i16(<16 x i16> %a, <16 x i16> %b) 685 store <16 x i16> %c, ptr %p 686 ret void 687} 688 689declare <2 x i32> @llvm.smin.v2i32(<2 x i32> %a, <2 x i32> %b) readnone 690 691define arm_aapcs_vfpcc <2 x i32> @smin2i32(<2 x i32> %a, <2 x i32> %b) { 692; CHECK-LABEL: smin2i32: 693; CHECK: @ %bb.0: 694; CHECK-NEXT: .save {r7, lr} 695; CHECK-NEXT: push {r7, lr} 696; CHECK-NEXT: vmov r0, s6 697; CHECK-NEXT: vmov r1, s4 698; CHECK-NEXT: vmov q1[2], q1[0], r1, r0 699; CHECK-NEXT: asrs r2, r0, #31 700; CHECK-NEXT: asrs r3, r1, #31 701; CHECK-NEXT: vmov q1[3], q1[1], r3, r2 702; CHECK-NEXT: vmov r3, s0 703; CHECK-NEXT: vmov r2, s2 704; CHECK-NEXT: vmov q0[2], q0[0], r3, r2 705; CHECK-NEXT: asr.w lr, r3, #31 706; CHECK-NEXT: subs r3, r3, r1 707; CHECK-NEXT: sbcs.w r1, lr, r1, asr #31 708; CHECK-NEXT: mov.w r3, #0 709; CHECK-NEXT: csetm r1, lt 710; CHECK-NEXT: asr.w r12, r2, #31 711; CHECK-NEXT: bfi r3, r1, #0, #8 712; CHECK-NEXT: subs r1, r2, r0 713; CHECK-NEXT: sbcs.w r0, r12, r0, asr #31 714; CHECK-NEXT: vmov q0[3], q0[1], lr, r12 715; CHECK-NEXT: csetm r0, lt 716; CHECK-NEXT: bfi r3, r0, #8, #8 717; CHECK-NEXT: vmsr p0, r3 718; CHECK-NEXT: vpsel q0, q0, q1 719; CHECK-NEXT: pop {r7, pc} 720 %c = call <2 x i32> @llvm.smin.v2i32(<2 x i32> %a, <2 x i32> %b) 721 ret <2 x i32> %c 722} 723 724declare <4 x i32> @llvm.smin.v4i32(<4 x i32> %a, <4 x i32> %b) readnone 725 726define arm_aapcs_vfpcc <4 x i32> @smin4i32(<4 x i32> %a, <4 x i32> %b) { 727; CHECK-LABEL: smin4i32: 728; CHECK: @ %bb.0: 729; CHECK-NEXT: vmin.s32 q0, q0, q1 730; CHECK-NEXT: bx lr 731 %c = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %a, <4 x i32> %b) 732 ret <4 x i32> %c 733} 734 735declare <8 x i32> @llvm.smin.v8i32(<8 x i32> %a, <8 x i32> %b) readnone 736 737define arm_aapcs_vfpcc void @smin8i32(<8 x i32> %a, <8 x i32> %b, ptr %p) { 738; CHECK-LABEL: smin8i32: 739; CHECK: @ %bb.0: 740; CHECK-NEXT: vmin.s32 q1, q1, q3 741; CHECK-NEXT: vmin.s32 q0, q0, q2 742; CHECK-NEXT: vstrw.32 q1, [r0, #16] 743; CHECK-NEXT: vstrw.32 q0, [r0] 744; CHECK-NEXT: bx lr 745 %c = call <8 x i32>@llvm.smin.v8i32(<8 x i32> %a, <8 x i32> %b) 746 store <8 x i32> %c, ptr %p 747 ret void 748} 749 750declare <1 x i64> @llvm.smin.v1i64(<1 x i64> %a, <1 x i64> %b) readnone 751 752define arm_aapcs_vfpcc <1 x i64> @smin1i64(<1 x i64> %a, <1 x i64> %b) { 753; CHECK-LABEL: smin1i64: 754; CHECK: @ %bb.0: 755; CHECK-NEXT: .pad #8 756; CHECK-NEXT: sub sp, #8 757; CHECK-NEXT: subs.w r12, r0, r2 758; CHECK-NEXT: sbcs.w r12, r1, r3 759; CHECK-NEXT: csel r0, r0, r2, lt 760; CHECK-NEXT: csel r1, r1, r3, lt 761; CHECK-NEXT: add sp, #8 762; CHECK-NEXT: bx lr 763 %c = call <1 x i64> @llvm.smin.v1i64(<1 x i64> %a, <1 x i64> %b) 764 ret <1 x i64> %c 765} 766 767declare <2 x i64> @llvm.smin.v2i64(<2 x i64> %a, <2 x i64> %b) readnone 768 769define arm_aapcs_vfpcc <2 x i64> @smin2i64(<2 x i64> %a, <2 x i64> %b) { 770; CHECK-LABEL: smin2i64: 771; CHECK: @ %bb.0: 772; CHECK-NEXT: vmov r0, r1, d2 773; CHECK-NEXT: vmov r2, r3, d0 774; CHECK-NEXT: subs r0, r2, r0 775; CHECK-NEXT: sbcs.w r0, r3, r1 776; CHECK-NEXT: mov.w r1, #0 777; CHECK-NEXT: csetm r0, lt 778; CHECK-NEXT: vmov r3, r2, d1 779; CHECK-NEXT: bfi r1, r0, #0, #8 780; CHECK-NEXT: vmov r0, r12, d3 781; CHECK-NEXT: subs r0, r3, r0 782; CHECK-NEXT: sbcs.w r0, r2, r12 783; CHECK-NEXT: csetm r0, lt 784; CHECK-NEXT: bfi r1, r0, #8, #8 785; CHECK-NEXT: vmsr p0, r1 786; CHECK-NEXT: vpsel q0, q0, q1 787; CHECK-NEXT: bx lr 788 %c = call <2 x i64> @llvm.smin.v2i64(<2 x i64> %a, <2 x i64> %b) 789 ret <2 x i64> %c 790} 791 792declare <4 x i64> @llvm.smin.v4i64(<4 x i64> %a, <4 x i64> %b) readnone 793 794define arm_aapcs_vfpcc void @smin4i64(<4 x i64> %a, <4 x i64> %b, ptr %p) { 795; CHECK-LABEL: smin4i64: 796; CHECK: @ %bb.0: 797; CHECK-NEXT: .save {r4, lr} 798; CHECK-NEXT: push {r4, lr} 799; CHECK-NEXT: vmov r1, r12, d6 800; CHECK-NEXT: vmov r3, r2, d2 801; CHECK-NEXT: subs r1, r3, r1 802; CHECK-NEXT: mov.w r3, #0 803; CHECK-NEXT: sbcs.w r1, r2, r12 804; CHECK-NEXT: vmov lr, r12, d7 805; CHECK-NEXT: csetm r2, lt 806; CHECK-NEXT: movs r1, #0 807; CHECK-NEXT: bfi r3, r2, #0, #8 808; CHECK-NEXT: vmov r2, r4, d3 809; CHECK-NEXT: subs.w r2, r2, lr 810; CHECK-NEXT: sbcs.w r2, r4, r12 811; CHECK-NEXT: csetm r2, lt 812; CHECK-NEXT: bfi r3, r2, #8, #8 813; CHECK-NEXT: vmov r2, r12, d4 814; CHECK-NEXT: vmsr p0, r3 815; CHECK-NEXT: vmov r4, r3, d0 816; CHECK-NEXT: vpsel q1, q1, q3 817; CHECK-NEXT: vstrw.32 q1, [r0, #16] 818; CHECK-NEXT: subs r2, r4, r2 819; CHECK-NEXT: sbcs.w r2, r3, r12 820; CHECK-NEXT: vmov r4, r3, d1 821; CHECK-NEXT: csetm r2, lt 822; CHECK-NEXT: bfi r1, r2, #0, #8 823; CHECK-NEXT: vmov r2, r12, d5 824; CHECK-NEXT: subs r2, r4, r2 825; CHECK-NEXT: sbcs.w r2, r3, r12 826; CHECK-NEXT: csetm r2, lt 827; CHECK-NEXT: bfi r1, r2, #8, #8 828; CHECK-NEXT: vmsr p0, r1 829; CHECK-NEXT: vpsel q0, q0, q2 830; CHECK-NEXT: vstrw.32 q0, [r0] 831; CHECK-NEXT: pop {r4, pc} 832 %c = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %a, <4 x i64> %b) 833 store <4 x i64> %c, ptr %p 834 ret void 835} 836 837declare i8 @llvm.umin.i8(i8 %a, i8 %b) readnone 838 839define arm_aapcs_vfpcc i8 @umini8(i8 %a, i8 %b) { 840; CHECK-LABEL: umini8: 841; CHECK: @ %bb.0: 842; CHECK-NEXT: uxtb r1, r1 843; CHECK-NEXT: uxtb r0, r0 844; CHECK-NEXT: cmp r0, r1 845; CHECK-NEXT: csel r0, r0, r1, lo 846; CHECK-NEXT: bx lr 847 %c = call i8 @llvm.umin.i8(i8 %a, i8 %b) 848 ret i8 %c 849} 850 851declare i16 @llvm.umin.i16(i16 %a, i16 %b) readnone 852 853define arm_aapcs_vfpcc i16 @umini16(i16 %a, i16 %b) { 854; CHECK-LABEL: umini16: 855; CHECK: @ %bb.0: 856; CHECK-NEXT: uxth r1, r1 857; CHECK-NEXT: uxth r0, r0 858; CHECK-NEXT: cmp r0, r1 859; CHECK-NEXT: csel r0, r0, r1, lo 860; CHECK-NEXT: bx lr 861 %c = call i16 @llvm.umin.i16(i16 %a, i16 %b) 862 ret i16 %c 863} 864 865declare i32 @llvm.umin.i32(i32 %a, i32 %b) readnone 866 867define arm_aapcs_vfpcc i32 @umini32(i32 %a, i32 %b) { 868; CHECK-LABEL: umini32: 869; CHECK: @ %bb.0: 870; CHECK-NEXT: cmp r0, r1 871; CHECK-NEXT: csel r0, r0, r1, lo 872; CHECK-NEXT: bx lr 873 %c = call i32 @llvm.umin.i32(i32 %a, i32 %b) 874 ret i32 %c 875} 876 877declare i64 @llvm.umin.i64(i64 %a, i64 %b) readnone 878 879define arm_aapcs_vfpcc i64 @umini64(i64 %a, i64 %b) { 880; CHECK-LABEL: umini64: 881; CHECK: @ %bb.0: 882; CHECK-NEXT: subs.w r12, r0, r2 883; CHECK-NEXT: sbcs.w r12, r1, r3 884; CHECK-NEXT: csel r0, r0, r2, lo 885; CHECK-NEXT: csel r1, r1, r3, lo 886; CHECK-NEXT: bx lr 887 %c = call i64 @llvm.umin.i64(i64 %a, i64 %b) 888 ret i64 %c 889} 890 891declare <8 x i8> @llvm.umin.v8i8(<8 x i8> %a, <8 x i8> %b) readnone 892 893define arm_aapcs_vfpcc <8 x i8> @umin8i8(<8 x i8> %a, <8 x i8> %b) { 894; CHECK-LABEL: umin8i8: 895; CHECK: @ %bb.0: 896; CHECK-NEXT: vmovlb.u8 q1, q1 897; CHECK-NEXT: vmovlb.u8 q0, q0 898; CHECK-NEXT: vmin.u16 q0, q0, q1 899; CHECK-NEXT: bx lr 900 %c = call <8 x i8> @llvm.umin.v8i8(<8 x i8> %a, <8 x i8> %b) 901 ret <8 x i8> %c 902} 903 904declare <16 x i8> @llvm.umin.v16i8(<16 x i8> %a, <16 x i8> %b) readnone 905 906define arm_aapcs_vfpcc <16 x i8> @umin16i8(<16 x i8> %a, <16 x i8> %b) { 907; CHECK-LABEL: umin16i8: 908; CHECK: @ %bb.0: 909; CHECK-NEXT: vmin.u8 q0, q0, q1 910; CHECK-NEXT: bx lr 911 %c = call <16 x i8> @llvm.umin.v16i8(<16 x i8> %a, <16 x i8> %b) 912 ret <16 x i8> %c 913} 914 915declare <32 x i8> @llvm.umin.v32i8(<32 x i8> %a, <32 x i8> %b) readnone 916 917define arm_aapcs_vfpcc void @umin32i8(<32 x i8> %a, <32 x i8> %b, ptr %p) { 918; CHECK-LABEL: umin32i8: 919; CHECK: @ %bb.0: 920; CHECK-NEXT: vmin.u8 q1, q1, q3 921; CHECK-NEXT: vmin.u8 q0, q0, q2 922; CHECK-NEXT: vstrw.32 q1, [r0, #16] 923; CHECK-NEXT: vstrw.32 q0, [r0] 924; CHECK-NEXT: bx lr 925 %c = call <32 x i8> @llvm.umin.v32i8(<32 x i8> %a, <32 x i8> %b) 926 store <32 x i8> %c, ptr %p 927 ret void 928} 929 930declare <4 x i16> @llvm.umin.v4i16(<4 x i16> %a, <4 x i16> %b) readnone 931 932define arm_aapcs_vfpcc <4 x i16> @umin4i16(<4 x i16> %a, <4 x i16> %b) { 933; CHECK-LABEL: umin4i16: 934; CHECK: @ %bb.0: 935; CHECK-NEXT: vmovlb.u16 q1, q1 936; CHECK-NEXT: vmovlb.u16 q0, q0 937; CHECK-NEXT: vmin.u32 q0, q0, q1 938; CHECK-NEXT: bx lr 939 %c = call <4 x i16> @llvm.umin.v4i16(<4 x i16> %a, <4 x i16> %b) 940 ret <4 x i16> %c 941} 942 943declare <8 x i16> @llvm.umin.v8i16(<8 x i16> %a, <8 x i16> %b) readnone 944 945define arm_aapcs_vfpcc <8 x i16> @umin8i16(<8 x i16> %a, <8 x i16> %b) { 946; CHECK-LABEL: umin8i16: 947; CHECK: @ %bb.0: 948; CHECK-NEXT: vmin.u16 q0, q0, q1 949; CHECK-NEXT: bx lr 950 %c = call <8 x i16> @llvm.umin.v8i16(<8 x i16> %a, <8 x i16> %b) 951 ret <8 x i16> %c 952} 953 954declare <16 x i16> @llvm.umin.v16i16(<16 x i16> %a, <16 x i16> %b) readnone 955 956define arm_aapcs_vfpcc void @umin16i16(<16 x i16> %a, <16 x i16> %b, ptr %p) { 957; CHECK-LABEL: umin16i16: 958; CHECK: @ %bb.0: 959; CHECK-NEXT: vmin.u16 q1, q1, q3 960; CHECK-NEXT: vmin.u16 q0, q0, q2 961; CHECK-NEXT: vstrw.32 q1, [r0, #16] 962; CHECK-NEXT: vstrw.32 q0, [r0] 963; CHECK-NEXT: bx lr 964 %c = call <16 x i16> @llvm.umin.v16i16(<16 x i16> %a, <16 x i16> %b) 965 store <16 x i16> %c, ptr %p 966 ret void 967} 968 969declare <2 x i32> @llvm.umin.v2i32(<2 x i32> %a, <2 x i32> %b) readnone 970 971define arm_aapcs_vfpcc <2 x i32> @umin2i32(<2 x i32> %a, <2 x i32> %b) { 972; CHECK-LABEL: umin2i32: 973; CHECK: @ %bb.0: 974; CHECK-NEXT: vmov.i64 q2, #0xffffffff 975; CHECK-NEXT: vand q1, q1, q2 976; CHECK-NEXT: vand q0, q0, q2 977; CHECK-NEXT: vmov r0, r1, d2 978; CHECK-NEXT: vmov r2, r3, d0 979; CHECK-NEXT: subs r0, r2, r0 980; CHECK-NEXT: sbcs.w r0, r3, r1 981; CHECK-NEXT: mov.w r1, #0 982; CHECK-NEXT: csetm r0, lo 983; CHECK-NEXT: vmov r3, r2, d1 984; CHECK-NEXT: bfi r1, r0, #0, #8 985; CHECK-NEXT: vmov r0, r12, d3 986; CHECK-NEXT: subs r0, r3, r0 987; CHECK-NEXT: sbcs.w r0, r2, r12 988; CHECK-NEXT: csetm r0, lo 989; CHECK-NEXT: bfi r1, r0, #8, #8 990; CHECK-NEXT: vmsr p0, r1 991; CHECK-NEXT: vpsel q0, q0, q1 992; CHECK-NEXT: bx lr 993 %c = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %a, <2 x i32> %b) 994 ret <2 x i32> %c 995} 996 997declare <4 x i32> @llvm.umin.v4i32(<4 x i32> %a, <4 x i32> %b) readnone 998 999define arm_aapcs_vfpcc <4 x i32> @umin4i32(<4 x i32> %a, <4 x i32> %b) { 1000; CHECK-LABEL: umin4i32: 1001; CHECK: @ %bb.0: 1002; CHECK-NEXT: vmin.u32 q0, q0, q1 1003; CHECK-NEXT: bx lr 1004 %c = call <4 x i32> @llvm.umin.v4i32(<4 x i32> %a, <4 x i32> %b) 1005 ret <4 x i32> %c 1006} 1007 1008declare <8 x i32> @llvm.umin.v8i32(<8 x i32> %a, <8 x i32> %b) readnone 1009 1010define arm_aapcs_vfpcc void @umin8i32(<8 x i32> %a, <8 x i32> %b, ptr %p) { 1011; CHECK-LABEL: umin8i32: 1012; CHECK: @ %bb.0: 1013; CHECK-NEXT: vmin.u32 q1, q1, q3 1014; CHECK-NEXT: vmin.u32 q0, q0, q2 1015; CHECK-NEXT: vstrw.32 q1, [r0, #16] 1016; CHECK-NEXT: vstrw.32 q0, [r0] 1017; CHECK-NEXT: bx lr 1018 %c = call <8 x i32>@llvm.umin.v8i32(<8 x i32> %a, <8 x i32> %b) 1019 store <8 x i32> %c, ptr %p 1020 ret void 1021} 1022 1023declare <1 x i64> @llvm.umin.v1i64(<1 x i64> %a, <1 x i64> %b) readnone 1024 1025define arm_aapcs_vfpcc <1 x i64> @umin1i64(<1 x i64> %a, <1 x i64> %b) { 1026; CHECK-LABEL: umin1i64: 1027; CHECK: @ %bb.0: 1028; CHECK-NEXT: .pad #8 1029; CHECK-NEXT: sub sp, #8 1030; CHECK-NEXT: subs.w r12, r0, r2 1031; CHECK-NEXT: sbcs.w r12, r1, r3 1032; CHECK-NEXT: csel r0, r0, r2, lo 1033; CHECK-NEXT: csel r1, r1, r3, lo 1034; CHECK-NEXT: add sp, #8 1035; CHECK-NEXT: bx lr 1036 %c = call <1 x i64> @llvm.umin.v1i64(<1 x i64> %a, <1 x i64> %b) 1037 ret <1 x i64> %c 1038} 1039 1040declare <2 x i64> @llvm.umin.v2i64(<2 x i64> %a, <2 x i64> %b) readnone 1041 1042define arm_aapcs_vfpcc <2 x i64> @umin2i64(<2 x i64> %a, <2 x i64> %b) { 1043; CHECK-LABEL: umin2i64: 1044; CHECK: @ %bb.0: 1045; CHECK-NEXT: vmov r0, r1, d2 1046; CHECK-NEXT: vmov r2, r3, d0 1047; CHECK-NEXT: subs r0, r2, r0 1048; CHECK-NEXT: sbcs.w r0, r3, r1 1049; CHECK-NEXT: mov.w r1, #0 1050; CHECK-NEXT: csetm r0, lo 1051; CHECK-NEXT: vmov r3, r2, d1 1052; CHECK-NEXT: bfi r1, r0, #0, #8 1053; CHECK-NEXT: vmov r0, r12, d3 1054; CHECK-NEXT: subs r0, r3, r0 1055; CHECK-NEXT: sbcs.w r0, r2, r12 1056; CHECK-NEXT: csetm r0, lo 1057; CHECK-NEXT: bfi r1, r0, #8, #8 1058; CHECK-NEXT: vmsr p0, r1 1059; CHECK-NEXT: vpsel q0, q0, q1 1060; CHECK-NEXT: bx lr 1061 %c = call <2 x i64> @llvm.umin.v2i64(<2 x i64> %a, <2 x i64> %b) 1062 ret <2 x i64> %c 1063} 1064 1065declare <4 x i64> @llvm.umin.v4i64(<4 x i64> %a, <4 x i64> %b) readnone 1066 1067define arm_aapcs_vfpcc void @umin4i64(<4 x i64> %a, <4 x i64> %b, ptr %p) { 1068; CHECK-LABEL: umin4i64: 1069; CHECK: @ %bb.0: 1070; CHECK-NEXT: .save {r4, lr} 1071; CHECK-NEXT: push {r4, lr} 1072; CHECK-NEXT: vmov r1, r12, d6 1073; CHECK-NEXT: vmov r3, r2, d2 1074; CHECK-NEXT: subs r1, r3, r1 1075; CHECK-NEXT: mov.w r3, #0 1076; CHECK-NEXT: sbcs.w r1, r2, r12 1077; CHECK-NEXT: vmov lr, r12, d7 1078; CHECK-NEXT: csetm r2, lo 1079; CHECK-NEXT: movs r1, #0 1080; CHECK-NEXT: bfi r3, r2, #0, #8 1081; CHECK-NEXT: vmov r2, r4, d3 1082; CHECK-NEXT: subs.w r2, r2, lr 1083; CHECK-NEXT: sbcs.w r2, r4, r12 1084; CHECK-NEXT: csetm r2, lo 1085; CHECK-NEXT: bfi r3, r2, #8, #8 1086; CHECK-NEXT: vmov r2, r12, d4 1087; CHECK-NEXT: vmsr p0, r3 1088; CHECK-NEXT: vmov r4, r3, d0 1089; CHECK-NEXT: vpsel q1, q1, q3 1090; CHECK-NEXT: vstrw.32 q1, [r0, #16] 1091; CHECK-NEXT: subs r2, r4, r2 1092; CHECK-NEXT: sbcs.w r2, r3, r12 1093; CHECK-NEXT: vmov r4, r3, d1 1094; CHECK-NEXT: csetm r2, lo 1095; CHECK-NEXT: bfi r1, r2, #0, #8 1096; CHECK-NEXT: vmov r2, r12, d5 1097; CHECK-NEXT: subs r2, r4, r2 1098; CHECK-NEXT: sbcs.w r2, r3, r12 1099; CHECK-NEXT: csetm r2, lo 1100; CHECK-NEXT: bfi r1, r2, #8, #8 1101; CHECK-NEXT: vmsr p0, r1 1102; CHECK-NEXT: vpsel q0, q0, q2 1103; CHECK-NEXT: vstrw.32 q0, [r0] 1104; CHECK-NEXT: pop {r4, pc} 1105 %c = call <4 x i64> @llvm.umin.v4i64(<4 x i64> %a, <4 x i64> %b) 1106 store <4 x i64> %c, ptr %p 1107 ret void 1108} 1109