1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; RUN: llc -mtriple=aarch64-none-eabi -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD 3; RUN: llc -mtriple=aarch64-none-eabi -global-isel -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI 4 5define i8 @si8(i8 %a, i8 %b) { 6; CHECK-SD-LABEL: si8: 7; CHECK-SD: // %bb.0: // %entry 8; CHECK-SD-NEXT: sxtb w8, w1 9; CHECK-SD-NEXT: sxtb w9, w0 10; CHECK-SD-NEXT: sdiv w10, w9, w8 11; CHECK-SD-NEXT: msub w0, w10, w8, w9 12; CHECK-SD-NEXT: ret 13; 14; CHECK-GI-LABEL: si8: 15; CHECK-GI: // %bb.0: // %entry 16; CHECK-GI-NEXT: sxtb w8, w0 17; CHECK-GI-NEXT: sxtb w9, w1 18; CHECK-GI-NEXT: sdiv w8, w8, w9 19; CHECK-GI-NEXT: msub w0, w8, w1, w0 20; CHECK-GI-NEXT: ret 21entry: 22 %s = srem i8 %a, %b 23 ret i8 %s 24} 25 26define i8 @ui8(i8 %a, i8 %b) { 27; CHECK-SD-LABEL: ui8: 28; CHECK-SD: // %bb.0: // %entry 29; CHECK-SD-NEXT: and w8, w1, #0xff 30; CHECK-SD-NEXT: and w9, w0, #0xff 31; CHECK-SD-NEXT: udiv w10, w9, w8 32; CHECK-SD-NEXT: msub w0, w10, w8, w9 33; CHECK-SD-NEXT: ret 34; 35; CHECK-GI-LABEL: ui8: 36; CHECK-GI: // %bb.0: // %entry 37; CHECK-GI-NEXT: and w8, w0, #0xff 38; CHECK-GI-NEXT: and w9, w1, #0xff 39; CHECK-GI-NEXT: udiv w8, w8, w9 40; CHECK-GI-NEXT: msub w0, w8, w1, w0 41; CHECK-GI-NEXT: ret 42entry: 43 %s = urem i8 %a, %b 44 ret i8 %s 45} 46 47define i16 @si16(i16 %a, i16 %b) { 48; CHECK-SD-LABEL: si16: 49; CHECK-SD: // %bb.0: // %entry 50; CHECK-SD-NEXT: sxth w8, w1 51; CHECK-SD-NEXT: sxth w9, w0 52; CHECK-SD-NEXT: sdiv w10, w9, w8 53; CHECK-SD-NEXT: msub w0, w10, w8, w9 54; CHECK-SD-NEXT: ret 55; 56; CHECK-GI-LABEL: si16: 57; CHECK-GI: // %bb.0: // %entry 58; CHECK-GI-NEXT: sxth w8, w0 59; CHECK-GI-NEXT: sxth w9, w1 60; CHECK-GI-NEXT: sdiv w8, w8, w9 61; CHECK-GI-NEXT: msub w0, w8, w1, w0 62; CHECK-GI-NEXT: ret 63entry: 64 %s = srem i16 %a, %b 65 ret i16 %s 66} 67 68define i16 @ui16(i16 %a, i16 %b) { 69; CHECK-SD-LABEL: ui16: 70; CHECK-SD: // %bb.0: // %entry 71; CHECK-SD-NEXT: and w8, w1, #0xffff 72; CHECK-SD-NEXT: and w9, w0, #0xffff 73; CHECK-SD-NEXT: udiv w10, w9, w8 74; CHECK-SD-NEXT: msub w0, w10, w8, w9 75; CHECK-SD-NEXT: ret 76; 77; CHECK-GI-LABEL: ui16: 78; CHECK-GI: // %bb.0: // %entry 79; CHECK-GI-NEXT: and w8, w0, #0xffff 80; CHECK-GI-NEXT: and w9, w1, #0xffff 81; CHECK-GI-NEXT: udiv w8, w8, w9 82; CHECK-GI-NEXT: msub w0, w8, w1, w0 83; CHECK-GI-NEXT: ret 84entry: 85 %s = urem i16 %a, %b 86 ret i16 %s 87} 88 89define i32 @si32(i32 %a, i32 %b) { 90; CHECK-LABEL: si32: 91; CHECK: // %bb.0: // %entry 92; CHECK-NEXT: sdiv w8, w0, w1 93; CHECK-NEXT: msub w0, w8, w1, w0 94; CHECK-NEXT: ret 95entry: 96 %s = srem i32 %a, %b 97 ret i32 %s 98} 99 100define i32 @ui32(i32 %a, i32 %b) { 101; CHECK-LABEL: ui32: 102; CHECK: // %bb.0: // %entry 103; CHECK-NEXT: udiv w8, w0, w1 104; CHECK-NEXT: msub w0, w8, w1, w0 105; CHECK-NEXT: ret 106entry: 107 %s = urem i32 %a, %b 108 ret i32 %s 109} 110 111define i64 @si64(i64 %a, i64 %b) { 112; CHECK-LABEL: si64: 113; CHECK: // %bb.0: // %entry 114; CHECK-NEXT: sdiv x8, x0, x1 115; CHECK-NEXT: msub x0, x8, x1, x0 116; CHECK-NEXT: ret 117entry: 118 %s = srem i64 %a, %b 119 ret i64 %s 120} 121 122define i64 @ui64(i64 %a, i64 %b) { 123; CHECK-LABEL: ui64: 124; CHECK: // %bb.0: // %entry 125; CHECK-NEXT: udiv x8, x0, x1 126; CHECK-NEXT: msub x0, x8, x1, x0 127; CHECK-NEXT: ret 128entry: 129 %s = urem i64 %a, %b 130 ret i64 %s 131} 132 133define i128 @si128(i128 %a, i128 %b) { 134; CHECK-LABEL: si128: 135; CHECK: // %bb.0: // %entry 136; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill 137; CHECK-NEXT: .cfi_def_cfa_offset 16 138; CHECK-NEXT: .cfi_offset w30, -16 139; CHECK-NEXT: bl __modti3 140; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload 141; CHECK-NEXT: ret 142entry: 143 %s = srem i128 %a, %b 144 ret i128 %s 145} 146 147define i128 @ui128(i128 %a, i128 %b) { 148; CHECK-LABEL: ui128: 149; CHECK: // %bb.0: // %entry 150; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill 151; CHECK-NEXT: .cfi_def_cfa_offset 16 152; CHECK-NEXT: .cfi_offset w30, -16 153; CHECK-NEXT: bl __umodti3 154; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload 155; CHECK-NEXT: ret 156entry: 157 %s = urem i128 %a, %b 158 ret i128 %s 159} 160 161define <2 x i8> @sv2i8(<2 x i8> %d, <2 x i8> %e) { 162; CHECK-SD-LABEL: sv2i8: 163; CHECK-SD: // %bb.0: // %entry 164; CHECK-SD-NEXT: shl v0.2s, v0.2s, #24 165; CHECK-SD-NEXT: shl v1.2s, v1.2s, #24 166; CHECK-SD-NEXT: sshr v0.2s, v0.2s, #24 167; CHECK-SD-NEXT: sshr v1.2s, v1.2s, #24 168; CHECK-SD-NEXT: fmov w8, s1 169; CHECK-SD-NEXT: fmov w9, s0 170; CHECK-SD-NEXT: mov w11, v1.s[1] 171; CHECK-SD-NEXT: mov w12, v0.s[1] 172; CHECK-SD-NEXT: sdiv w10, w9, w8 173; CHECK-SD-NEXT: sdiv w13, w12, w11 174; CHECK-SD-NEXT: msub w8, w10, w8, w9 175; CHECK-SD-NEXT: fmov s0, w8 176; CHECK-SD-NEXT: msub w9, w13, w11, w12 177; CHECK-SD-NEXT: mov v0.s[1], w9 178; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 179; CHECK-SD-NEXT: ret 180; 181; CHECK-GI-LABEL: sv2i8: 182; CHECK-GI: // %bb.0: // %entry 183; CHECK-GI-NEXT: shl v0.2s, v0.2s, #24 184; CHECK-GI-NEXT: shl v1.2s, v1.2s, #24 185; CHECK-GI-NEXT: sshr v0.2s, v0.2s, #24 186; CHECK-GI-NEXT: sshr v1.2s, v1.2s, #24 187; CHECK-GI-NEXT: fmov w8, s0 188; CHECK-GI-NEXT: fmov w9, s1 189; CHECK-GI-NEXT: mov w10, v1.s[1] 190; CHECK-GI-NEXT: sdiv w8, w8, w9 191; CHECK-GI-NEXT: mov w9, v0.s[1] 192; CHECK-GI-NEXT: sdiv w9, w9, w10 193; CHECK-GI-NEXT: mov v2.s[0], w8 194; CHECK-GI-NEXT: mov v2.s[1], w9 195; CHECK-GI-NEXT: mls v0.2s, v2.2s, v1.2s 196; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 197; CHECK-GI-NEXT: ret 198entry: 199 %s = srem <2 x i8> %d, %e 200 ret <2 x i8> %s 201} 202 203define <3 x i8> @sv3i8(<3 x i8> %d, <3 x i8> %e) { 204; CHECK-SD-LABEL: sv3i8: 205; CHECK-SD: // %bb.0: // %entry 206; CHECK-SD-NEXT: sxtb w8, w3 207; CHECK-SD-NEXT: sxtb w9, w0 208; CHECK-SD-NEXT: sxtb w11, w4 209; CHECK-SD-NEXT: sxtb w12, w1 210; CHECK-SD-NEXT: sxtb w14, w5 211; CHECK-SD-NEXT: sxtb w15, w2 212; CHECK-SD-NEXT: sdiv w10, w9, w8 213; CHECK-SD-NEXT: sdiv w13, w12, w11 214; CHECK-SD-NEXT: msub w0, w10, w8, w9 215; CHECK-SD-NEXT: sdiv w16, w15, w14 216; CHECK-SD-NEXT: msub w1, w13, w11, w12 217; CHECK-SD-NEXT: msub w2, w16, w14, w15 218; CHECK-SD-NEXT: ret 219; 220; CHECK-GI-LABEL: sv3i8: 221; CHECK-GI: // %bb.0: // %entry 222; CHECK-GI-NEXT: sxtb w8, w0 223; CHECK-GI-NEXT: sxtb w9, w3 224; CHECK-GI-NEXT: sxtb w11, w1 225; CHECK-GI-NEXT: sxtb w12, w4 226; CHECK-GI-NEXT: sxtb w14, w2 227; CHECK-GI-NEXT: sxtb w15, w5 228; CHECK-GI-NEXT: sdiv w10, w8, w9 229; CHECK-GI-NEXT: sdiv w13, w11, w12 230; CHECK-GI-NEXT: msub w0, w10, w9, w8 231; CHECK-GI-NEXT: sdiv w16, w14, w15 232; CHECK-GI-NEXT: msub w1, w13, w12, w11 233; CHECK-GI-NEXT: msub w2, w16, w15, w14 234; CHECK-GI-NEXT: ret 235entry: 236 %s = srem <3 x i8> %d, %e 237 ret <3 x i8> %s 238} 239 240define <4 x i8> @sv4i8(<4 x i8> %d, <4 x i8> %e) { 241; CHECK-SD-LABEL: sv4i8: 242; CHECK-SD: // %bb.0: // %entry 243; CHECK-SD-NEXT: shl v0.4h, v0.4h, #8 244; CHECK-SD-NEXT: shl v1.4h, v1.4h, #8 245; CHECK-SD-NEXT: sshr v0.4h, v0.4h, #8 246; CHECK-SD-NEXT: sshr v1.4h, v1.4h, #8 247; CHECK-SD-NEXT: smov w11, v1.h[0] 248; CHECK-SD-NEXT: smov w12, v0.h[0] 249; CHECK-SD-NEXT: smov w8, v1.h[1] 250; CHECK-SD-NEXT: smov w9, v0.h[1] 251; CHECK-SD-NEXT: smov w14, v1.h[2] 252; CHECK-SD-NEXT: smov w15, v0.h[2] 253; CHECK-SD-NEXT: smov w17, v1.h[3] 254; CHECK-SD-NEXT: smov w18, v0.h[3] 255; CHECK-SD-NEXT: sdiv w13, w12, w11 256; CHECK-SD-NEXT: sdiv w10, w9, w8 257; CHECK-SD-NEXT: msub w11, w13, w11, w12 258; CHECK-SD-NEXT: fmov s0, w11 259; CHECK-SD-NEXT: sdiv w16, w15, w14 260; CHECK-SD-NEXT: msub w8, w10, w8, w9 261; CHECK-SD-NEXT: mov v0.h[1], w8 262; CHECK-SD-NEXT: sdiv w9, w18, w17 263; CHECK-SD-NEXT: msub w8, w16, w14, w15 264; CHECK-SD-NEXT: mov v0.h[2], w8 265; CHECK-SD-NEXT: msub w8, w9, w17, w18 266; CHECK-SD-NEXT: mov v0.h[3], w8 267; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 268; CHECK-SD-NEXT: ret 269; 270; CHECK-GI-LABEL: sv4i8: 271; CHECK-GI: // %bb.0: // %entry 272; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 273; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0 274; CHECK-GI-NEXT: shl v0.4s, v0.4s, #24 275; CHECK-GI-NEXT: shl v1.4s, v1.4s, #24 276; CHECK-GI-NEXT: sshr v0.4s, v0.4s, #24 277; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #24 278; CHECK-GI-NEXT: fmov w8, s0 279; CHECK-GI-NEXT: fmov w9, s1 280; CHECK-GI-NEXT: mov w10, v1.s[1] 281; CHECK-GI-NEXT: mov w11, v1.s[2] 282; CHECK-GI-NEXT: mov w12, v1.s[3] 283; CHECK-GI-NEXT: sdiv w8, w8, w9 284; CHECK-GI-NEXT: mov w9, v0.s[1] 285; CHECK-GI-NEXT: sdiv w9, w9, w10 286; CHECK-GI-NEXT: mov w10, v0.s[2] 287; CHECK-GI-NEXT: mov v2.s[0], w8 288; CHECK-GI-NEXT: sdiv w10, w10, w11 289; CHECK-GI-NEXT: mov w11, v0.s[3] 290; CHECK-GI-NEXT: mov v2.s[1], w9 291; CHECK-GI-NEXT: sdiv w8, w11, w12 292; CHECK-GI-NEXT: mov v2.s[2], w10 293; CHECK-GI-NEXT: mov v2.s[3], w8 294; CHECK-GI-NEXT: mls v0.4s, v2.4s, v1.4s 295; CHECK-GI-NEXT: xtn v0.4h, v0.4s 296; CHECK-GI-NEXT: ret 297entry: 298 %s = srem <4 x i8> %d, %e 299 ret <4 x i8> %s 300} 301 302define <8 x i8> @sv8i8(<8 x i8> %d, <8 x i8> %e) { 303; CHECK-SD-LABEL: sv8i8: 304; CHECK-SD: // %bb.0: // %entry 305; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 306; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 307; CHECK-SD-NEXT: smov w11, v1.b[0] 308; CHECK-SD-NEXT: smov w12, v0.b[0] 309; CHECK-SD-NEXT: smov w8, v1.b[1] 310; CHECK-SD-NEXT: smov w9, v0.b[1] 311; CHECK-SD-NEXT: smov w14, v1.b[2] 312; CHECK-SD-NEXT: smov w15, v0.b[2] 313; CHECK-SD-NEXT: smov w17, v1.b[3] 314; CHECK-SD-NEXT: smov w18, v0.b[3] 315; CHECK-SD-NEXT: smov w1, v1.b[4] 316; CHECK-SD-NEXT: smov w2, v0.b[4] 317; CHECK-SD-NEXT: smov w4, v1.b[5] 318; CHECK-SD-NEXT: smov w5, v0.b[5] 319; CHECK-SD-NEXT: sdiv w13, w12, w11 320; CHECK-SD-NEXT: sdiv w10, w9, w8 321; CHECK-SD-NEXT: msub w11, w13, w11, w12 322; CHECK-SD-NEXT: smov w13, v1.b[7] 323; CHECK-SD-NEXT: fmov s2, w11 324; CHECK-SD-NEXT: smov w11, v0.b[6] 325; CHECK-SD-NEXT: sdiv w16, w15, w14 326; CHECK-SD-NEXT: msub w8, w10, w8, w9 327; CHECK-SD-NEXT: smov w10, v1.b[6] 328; CHECK-SD-NEXT: mov v2.b[1], w8 329; CHECK-SD-NEXT: sdiv w0, w18, w17 330; CHECK-SD-NEXT: msub w8, w16, w14, w15 331; CHECK-SD-NEXT: smov w14, v0.b[7] 332; CHECK-SD-NEXT: mov v2.b[2], w8 333; CHECK-SD-NEXT: sdiv w3, w2, w1 334; CHECK-SD-NEXT: msub w8, w0, w17, w18 335; CHECK-SD-NEXT: mov v2.b[3], w8 336; CHECK-SD-NEXT: sdiv w9, w5, w4 337; CHECK-SD-NEXT: msub w8, w3, w1, w2 338; CHECK-SD-NEXT: mov v2.b[4], w8 339; CHECK-SD-NEXT: sdiv w12, w11, w10 340; CHECK-SD-NEXT: msub w8, w9, w4, w5 341; CHECK-SD-NEXT: mov v2.b[5], w8 342; CHECK-SD-NEXT: sdiv w9, w14, w13 343; CHECK-SD-NEXT: msub w8, w12, w10, w11 344; CHECK-SD-NEXT: mov v2.b[6], w8 345; CHECK-SD-NEXT: msub w8, w9, w13, w14 346; CHECK-SD-NEXT: mov v2.b[7], w8 347; CHECK-SD-NEXT: fmov d0, d2 348; CHECK-SD-NEXT: ret 349; 350; CHECK-GI-LABEL: sv8i8: 351; CHECK-GI: // %bb.0: // %entry 352; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0 353; CHECK-GI-NEXT: sshll v1.8h, v1.8b, #0 354; CHECK-GI-NEXT: sshll v2.4s, v0.4h, #0 355; CHECK-GI-NEXT: sshll v3.4s, v1.4h, #0 356; CHECK-GI-NEXT: sshll2 v0.4s, v0.8h, #0 357; CHECK-GI-NEXT: sshll2 v1.4s, v1.8h, #0 358; CHECK-GI-NEXT: fmov w8, s2 359; CHECK-GI-NEXT: fmov w9, s3 360; CHECK-GI-NEXT: mov w10, v3.s[1] 361; CHECK-GI-NEXT: mov w11, v3.s[2] 362; CHECK-GI-NEXT: mov w12, v3.s[3] 363; CHECK-GI-NEXT: fmov w13, s1 364; CHECK-GI-NEXT: mov w14, v1.s[1] 365; CHECK-GI-NEXT: mov w15, v1.s[2] 366; CHECK-GI-NEXT: sdiv w8, w8, w9 367; CHECK-GI-NEXT: mov w9, v2.s[1] 368; CHECK-GI-NEXT: sdiv w9, w9, w10 369; CHECK-GI-NEXT: mov w10, v2.s[2] 370; CHECK-GI-NEXT: mov v4.s[0], w8 371; CHECK-GI-NEXT: mov w8, v0.s[3] 372; CHECK-GI-NEXT: sdiv w10, w10, w11 373; CHECK-GI-NEXT: mov w11, v2.s[3] 374; CHECK-GI-NEXT: mov v4.s[1], w9 375; CHECK-GI-NEXT: sdiv w11, w11, w12 376; CHECK-GI-NEXT: fmov w12, s0 377; CHECK-GI-NEXT: mov v4.s[2], w10 378; CHECK-GI-NEXT: sdiv w12, w12, w13 379; CHECK-GI-NEXT: mov w13, v0.s[1] 380; CHECK-GI-NEXT: mov v4.s[3], w11 381; CHECK-GI-NEXT: mls v2.4s, v4.4s, v3.4s 382; CHECK-GI-NEXT: sdiv w13, w13, w14 383; CHECK-GI-NEXT: mov w14, v0.s[2] 384; CHECK-GI-NEXT: mov v5.s[0], w12 385; CHECK-GI-NEXT: mov w12, v1.s[3] 386; CHECK-GI-NEXT: sdiv w14, w14, w15 387; CHECK-GI-NEXT: mov v5.s[1], w13 388; CHECK-GI-NEXT: sdiv w8, w8, w12 389; CHECK-GI-NEXT: mov v5.s[2], w14 390; CHECK-GI-NEXT: mov v5.s[3], w8 391; CHECK-GI-NEXT: mls v0.4s, v5.4s, v1.4s 392; CHECK-GI-NEXT: uzp1 v0.8h, v2.8h, v0.8h 393; CHECK-GI-NEXT: xtn v0.8b, v0.8h 394; CHECK-GI-NEXT: ret 395entry: 396 %s = srem <8 x i8> %d, %e 397 ret <8 x i8> %s 398} 399 400define <16 x i8> @sv16i8(<16 x i8> %d, <16 x i8> %e) { 401; CHECK-SD-LABEL: sv16i8: 402; CHECK-SD: // %bb.0: // %entry 403; CHECK-SD-NEXT: stp x28, x27, [sp, #-80]! // 16-byte Folded Spill 404; CHECK-SD-NEXT: stp x26, x25, [sp, #16] // 16-byte Folded Spill 405; CHECK-SD-NEXT: stp x24, x23, [sp, #32] // 16-byte Folded Spill 406; CHECK-SD-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill 407; CHECK-SD-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill 408; CHECK-SD-NEXT: .cfi_def_cfa_offset 80 409; CHECK-SD-NEXT: .cfi_offset w19, -8 410; CHECK-SD-NEXT: .cfi_offset w20, -16 411; CHECK-SD-NEXT: .cfi_offset w21, -24 412; CHECK-SD-NEXT: .cfi_offset w22, -32 413; CHECK-SD-NEXT: .cfi_offset w23, -40 414; CHECK-SD-NEXT: .cfi_offset w24, -48 415; CHECK-SD-NEXT: .cfi_offset w25, -56 416; CHECK-SD-NEXT: .cfi_offset w26, -64 417; CHECK-SD-NEXT: .cfi_offset w27, -72 418; CHECK-SD-NEXT: .cfi_offset w28, -80 419; CHECK-SD-NEXT: smov w11, v1.b[0] 420; CHECK-SD-NEXT: smov w12, v0.b[0] 421; CHECK-SD-NEXT: smov w8, v1.b[1] 422; CHECK-SD-NEXT: smov w9, v0.b[1] 423; CHECK-SD-NEXT: smov w14, v1.b[2] 424; CHECK-SD-NEXT: smov w15, v0.b[2] 425; CHECK-SD-NEXT: smov w17, v1.b[3] 426; CHECK-SD-NEXT: smov w18, v0.b[3] 427; CHECK-SD-NEXT: smov w1, v1.b[4] 428; CHECK-SD-NEXT: smov w2, v0.b[4] 429; CHECK-SD-NEXT: smov w4, v1.b[5] 430; CHECK-SD-NEXT: smov w5, v0.b[5] 431; CHECK-SD-NEXT: sdiv w13, w12, w11 432; CHECK-SD-NEXT: smov w7, v1.b[6] 433; CHECK-SD-NEXT: smov w19, v0.b[6] 434; CHECK-SD-NEXT: smov w21, v1.b[7] 435; CHECK-SD-NEXT: smov w22, v0.b[7] 436; CHECK-SD-NEXT: smov w24, v1.b[8] 437; CHECK-SD-NEXT: smov w25, v0.b[8] 438; CHECK-SD-NEXT: smov w27, v1.b[9] 439; CHECK-SD-NEXT: smov w28, v0.b[9] 440; CHECK-SD-NEXT: sdiv w10, w9, w8 441; CHECK-SD-NEXT: msub w11, w13, w11, w12 442; CHECK-SD-NEXT: smov w13, v1.b[11] 443; CHECK-SD-NEXT: fmov s2, w11 444; CHECK-SD-NEXT: smov w11, v0.b[10] 445; CHECK-SD-NEXT: sdiv w16, w15, w14 446; CHECK-SD-NEXT: msub w8, w10, w8, w9 447; CHECK-SD-NEXT: smov w10, v1.b[10] 448; CHECK-SD-NEXT: mov v2.b[1], w8 449; CHECK-SD-NEXT: sdiv w0, w18, w17 450; CHECK-SD-NEXT: msub w8, w16, w14, w15 451; CHECK-SD-NEXT: smov w14, v0.b[11] 452; CHECK-SD-NEXT: smov w16, v1.b[12] 453; CHECK-SD-NEXT: mov v2.b[2], w8 454; CHECK-SD-NEXT: sdiv w3, w2, w1 455; CHECK-SD-NEXT: msub w8, w0, w17, w18 456; CHECK-SD-NEXT: smov w17, v0.b[12] 457; CHECK-SD-NEXT: smov w0, v1.b[13] 458; CHECK-SD-NEXT: mov v2.b[3], w8 459; CHECK-SD-NEXT: sdiv w6, w5, w4 460; CHECK-SD-NEXT: msub w8, w3, w1, w2 461; CHECK-SD-NEXT: smov w1, v0.b[13] 462; CHECK-SD-NEXT: mov v2.b[4], w8 463; CHECK-SD-NEXT: sdiv w20, w19, w7 464; CHECK-SD-NEXT: msub w8, w6, w4, w5 465; CHECK-SD-NEXT: mov v2.b[5], w8 466; CHECK-SD-NEXT: sdiv w23, w22, w21 467; CHECK-SD-NEXT: msub w8, w20, w7, w19 468; CHECK-SD-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload 469; CHECK-SD-NEXT: mov v2.b[6], w8 470; CHECK-SD-NEXT: sdiv w26, w25, w24 471; CHECK-SD-NEXT: msub w8, w23, w21, w22 472; CHECK-SD-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload 473; CHECK-SD-NEXT: mov v2.b[7], w8 474; CHECK-SD-NEXT: sdiv w9, w28, w27 475; CHECK-SD-NEXT: msub w8, w26, w24, w25 476; CHECK-SD-NEXT: ldp x24, x23, [sp, #32] // 16-byte Folded Reload 477; CHECK-SD-NEXT: ldp x26, x25, [sp, #16] // 16-byte Folded Reload 478; CHECK-SD-NEXT: mov v2.b[8], w8 479; CHECK-SD-NEXT: sdiv w12, w11, w10 480; CHECK-SD-NEXT: msub w8, w9, w27, w28 481; CHECK-SD-NEXT: mov v2.b[9], w8 482; CHECK-SD-NEXT: sdiv w15, w14, w13 483; CHECK-SD-NEXT: msub w8, w12, w10, w11 484; CHECK-SD-NEXT: smov w10, v1.b[14] 485; CHECK-SD-NEXT: smov w11, v0.b[14] 486; CHECK-SD-NEXT: mov v2.b[10], w8 487; CHECK-SD-NEXT: sdiv w18, w17, w16 488; CHECK-SD-NEXT: msub w8, w15, w13, w14 489; CHECK-SD-NEXT: smov w13, v1.b[15] 490; CHECK-SD-NEXT: smov w14, v0.b[15] 491; CHECK-SD-NEXT: mov v2.b[11], w8 492; CHECK-SD-NEXT: sdiv w9, w1, w0 493; CHECK-SD-NEXT: msub w8, w18, w16, w17 494; CHECK-SD-NEXT: mov v2.b[12], w8 495; CHECK-SD-NEXT: sdiv w12, w11, w10 496; CHECK-SD-NEXT: msub w8, w9, w0, w1 497; CHECK-SD-NEXT: mov v2.b[13], w8 498; CHECK-SD-NEXT: sdiv w9, w14, w13 499; CHECK-SD-NEXT: msub w8, w12, w10, w11 500; CHECK-SD-NEXT: mov v2.b[14], w8 501; CHECK-SD-NEXT: msub w8, w9, w13, w14 502; CHECK-SD-NEXT: mov v2.b[15], w8 503; CHECK-SD-NEXT: mov v0.16b, v2.16b 504; CHECK-SD-NEXT: ldp x28, x27, [sp], #80 // 16-byte Folded Reload 505; CHECK-SD-NEXT: ret 506; 507; CHECK-GI-LABEL: sv16i8: 508; CHECK-GI: // %bb.0: // %entry 509; CHECK-GI-NEXT: sshll v4.8h, v0.8b, #0 510; CHECK-GI-NEXT: sshll v5.8h, v1.8b, #0 511; CHECK-GI-NEXT: sshll2 v6.8h, v0.16b, #0 512; CHECK-GI-NEXT: sshll2 v7.8h, v1.16b, #0 513; CHECK-GI-NEXT: sshll v2.4s, v4.4h, #0 514; CHECK-GI-NEXT: sshll v3.4s, v5.4h, #0 515; CHECK-GI-NEXT: sshll2 v4.4s, v4.8h, #0 516; CHECK-GI-NEXT: sshll2 v5.4s, v5.8h, #0 517; CHECK-GI-NEXT: sshll v0.4s, v6.4h, #0 518; CHECK-GI-NEXT: sshll v1.4s, v7.4h, #0 519; CHECK-GI-NEXT: sshll2 v6.4s, v6.8h, #0 520; CHECK-GI-NEXT: sshll2 v7.4s, v7.8h, #0 521; CHECK-GI-NEXT: fmov w8, s2 522; CHECK-GI-NEXT: fmov w9, s3 523; CHECK-GI-NEXT: mov w12, v3.s[3] 524; CHECK-GI-NEXT: fmov w13, s5 525; CHECK-GI-NEXT: mov w16, v5.s[3] 526; CHECK-GI-NEXT: fmov w17, s1 527; CHECK-GI-NEXT: mov w18, v1.s[1] 528; CHECK-GI-NEXT: mov w0, v1.s[2] 529; CHECK-GI-NEXT: mov w1, v1.s[3] 530; CHECK-GI-NEXT: sdiv w11, w8, w9 531; CHECK-GI-NEXT: mov w8, v2.s[1] 532; CHECK-GI-NEXT: mov w9, v3.s[1] 533; CHECK-GI-NEXT: fmov w2, s7 534; CHECK-GI-NEXT: mov w3, v7.s[1] 535; CHECK-GI-NEXT: mov w4, v7.s[2] 536; CHECK-GI-NEXT: sdiv w10, w8, w9 537; CHECK-GI-NEXT: mov w8, v2.s[2] 538; CHECK-GI-NEXT: mov w9, v3.s[2] 539; CHECK-GI-NEXT: mov v16.s[0], w11 540; CHECK-GI-NEXT: mov w11, v6.s[3] 541; CHECK-GI-NEXT: sdiv w9, w8, w9 542; CHECK-GI-NEXT: mov w8, v2.s[3] 543; CHECK-GI-NEXT: mov v16.s[1], w10 544; CHECK-GI-NEXT: sdiv w8, w8, w12 545; CHECK-GI-NEXT: fmov w12, s4 546; CHECK-GI-NEXT: mov v16.s[2], w9 547; CHECK-GI-NEXT: sdiv w14, w12, w13 548; CHECK-GI-NEXT: mov w12, v4.s[1] 549; CHECK-GI-NEXT: mov w13, v5.s[1] 550; CHECK-GI-NEXT: mov v16.s[3], w8 551; CHECK-GI-NEXT: mls v2.4s, v16.4s, v3.4s 552; CHECK-GI-NEXT: sdiv w15, w12, w13 553; CHECK-GI-NEXT: mov w12, v4.s[2] 554; CHECK-GI-NEXT: mov w13, v5.s[2] 555; CHECK-GI-NEXT: mov v17.s[0], w14 556; CHECK-GI-NEXT: mov w14, v7.s[3] 557; CHECK-GI-NEXT: sdiv w13, w12, w13 558; CHECK-GI-NEXT: mov w12, v4.s[3] 559; CHECK-GI-NEXT: mov v17.s[1], w15 560; CHECK-GI-NEXT: sdiv w12, w12, w16 561; CHECK-GI-NEXT: fmov w16, s0 562; CHECK-GI-NEXT: mov v17.s[2], w13 563; CHECK-GI-NEXT: sdiv w16, w16, w17 564; CHECK-GI-NEXT: mov w17, v0.s[1] 565; CHECK-GI-NEXT: mov v17.s[3], w12 566; CHECK-GI-NEXT: mls v4.4s, v17.4s, v5.4s 567; CHECK-GI-NEXT: sdiv w17, w17, w18 568; CHECK-GI-NEXT: mov w18, v0.s[2] 569; CHECK-GI-NEXT: mov v18.s[0], w16 570; CHECK-GI-NEXT: sdiv w18, w18, w0 571; CHECK-GI-NEXT: mov w0, v0.s[3] 572; CHECK-GI-NEXT: mov v18.s[1], w17 573; CHECK-GI-NEXT: sdiv w0, w0, w1 574; CHECK-GI-NEXT: fmov w1, s6 575; CHECK-GI-NEXT: mov v18.s[2], w18 576; CHECK-GI-NEXT: sdiv w1, w1, w2 577; CHECK-GI-NEXT: mov w2, v6.s[1] 578; CHECK-GI-NEXT: mov v18.s[3], w0 579; CHECK-GI-NEXT: mls v0.4s, v18.4s, v1.4s 580; CHECK-GI-NEXT: uzp1 v1.8h, v2.8h, v4.8h 581; CHECK-GI-NEXT: sdiv w2, w2, w3 582; CHECK-GI-NEXT: mov w3, v6.s[2] 583; CHECK-GI-NEXT: mov v19.s[0], w1 584; CHECK-GI-NEXT: sdiv w3, w3, w4 585; CHECK-GI-NEXT: mov v19.s[1], w2 586; CHECK-GI-NEXT: sdiv w10, w11, w14 587; CHECK-GI-NEXT: mov v19.s[2], w3 588; CHECK-GI-NEXT: mov v19.s[3], w10 589; CHECK-GI-NEXT: mls v6.4s, v19.4s, v7.4s 590; CHECK-GI-NEXT: uzp1 v0.8h, v0.8h, v6.8h 591; CHECK-GI-NEXT: uzp1 v0.16b, v1.16b, v0.16b 592; CHECK-GI-NEXT: ret 593entry: 594 %s = srem <16 x i8> %d, %e 595 ret <16 x i8> %s 596} 597 598define <32 x i8> @sv32i8(<32 x i8> %d, <32 x i8> %e) { 599; CHECK-SD-LABEL: sv32i8: 600; CHECK-SD: // %bb.0: // %entry 601; CHECK-SD-NEXT: sub sp, sp, #304 602; CHECK-SD-NEXT: stp x29, x30, [sp, #208] // 16-byte Folded Spill 603; CHECK-SD-NEXT: stp x28, x27, [sp, #224] // 16-byte Folded Spill 604; CHECK-SD-NEXT: stp x26, x25, [sp, #240] // 16-byte Folded Spill 605; CHECK-SD-NEXT: stp x24, x23, [sp, #256] // 16-byte Folded Spill 606; CHECK-SD-NEXT: stp x22, x21, [sp, #272] // 16-byte Folded Spill 607; CHECK-SD-NEXT: stp x20, x19, [sp, #288] // 16-byte Folded Spill 608; CHECK-SD-NEXT: .cfi_def_cfa_offset 304 609; CHECK-SD-NEXT: .cfi_offset w19, -8 610; CHECK-SD-NEXT: .cfi_offset w20, -16 611; CHECK-SD-NEXT: .cfi_offset w21, -24 612; CHECK-SD-NEXT: .cfi_offset w22, -32 613; CHECK-SD-NEXT: .cfi_offset w23, -40 614; CHECK-SD-NEXT: .cfi_offset w24, -48 615; CHECK-SD-NEXT: .cfi_offset w25, -56 616; CHECK-SD-NEXT: .cfi_offset w26, -64 617; CHECK-SD-NEXT: .cfi_offset w27, -72 618; CHECK-SD-NEXT: .cfi_offset w28, -80 619; CHECK-SD-NEXT: .cfi_offset w30, -88 620; CHECK-SD-NEXT: .cfi_offset w29, -96 621; CHECK-SD-NEXT: smov w8, v2.b[1] 622; CHECK-SD-NEXT: smov w9, v0.b[1] 623; CHECK-SD-NEXT: smov w19, v3.b[7] 624; CHECK-SD-NEXT: smov w7, v1.b[7] 625; CHECK-SD-NEXT: smov w6, v3.b[8] 626; CHECK-SD-NEXT: smov w3, v1.b[8] 627; CHECK-SD-NEXT: smov w13, v3.b[0] 628; CHECK-SD-NEXT: smov w5, v3.b[1] 629; CHECK-SD-NEXT: smov w0, v1.b[1] 630; CHECK-SD-NEXT: smov w12, v3.b[2] 631; CHECK-SD-NEXT: smov w17, v3.b[3] 632; CHECK-SD-NEXT: smov w16, v1.b[3] 633; CHECK-SD-NEXT: str w8, [sp, #80] // 4-byte Folded Spill 634; CHECK-SD-NEXT: sdiv w10, w9, w8 635; CHECK-SD-NEXT: smov w8, v2.b[0] 636; CHECK-SD-NEXT: str w9, [sp, #88] // 4-byte Folded Spill 637; CHECK-SD-NEXT: smov w9, v0.b[0] 638; CHECK-SD-NEXT: ldr w30, [sp, #80] // 4-byte Folded Reload 639; CHECK-SD-NEXT: smov w15, v3.b[4] 640; CHECK-SD-NEXT: smov w14, v1.b[4] 641; CHECK-SD-NEXT: smov w4, v3.b[5] 642; CHECK-SD-NEXT: smov w1, v1.b[5] 643; CHECK-SD-NEXT: smov w2, v3.b[6] 644; CHECK-SD-NEXT: smov w18, v1.b[6] 645; CHECK-SD-NEXT: str w8, [sp, #32] // 4-byte Folded Spill 646; CHECK-SD-NEXT: smov w21, v3.b[9] 647; CHECK-SD-NEXT: smov w20, v1.b[9] 648; CHECK-SD-NEXT: str w9, [sp, #40] // 4-byte Folded Spill 649; CHECK-SD-NEXT: ldr w29, [sp, #32] // 4-byte Folded Reload 650; CHECK-SD-NEXT: sdiv w11, w9, w8 651; CHECK-SD-NEXT: smov w8, v2.b[2] 652; CHECK-SD-NEXT: smov w9, v0.b[2] 653; CHECK-SD-NEXT: str w10, [sp, #96] // 4-byte Folded Spill 654; CHECK-SD-NEXT: stp w8, w9, [sp, #16] // 8-byte Folded Spill 655; CHECK-SD-NEXT: sdiv w10, w9, w8 656; CHECK-SD-NEXT: smov w8, v2.b[3] 657; CHECK-SD-NEXT: smov w9, v0.b[3] 658; CHECK-SD-NEXT: stp w11, w8, [sp, #48] // 8-byte Folded Spill 659; CHECK-SD-NEXT: str w10, [sp, #24] // 4-byte Folded Spill 660; CHECK-SD-NEXT: sdiv w10, w9, w8 661; CHECK-SD-NEXT: smov w8, v2.b[4] 662; CHECK-SD-NEXT: str w8, [sp, #28] // 4-byte Folded Spill 663; CHECK-SD-NEXT: stp w9, w10, [sp, #56] // 8-byte Folded Spill 664; CHECK-SD-NEXT: smov w9, v0.b[4] 665; CHECK-SD-NEXT: sdiv w27, w0, w5 666; CHECK-SD-NEXT: str w9, [sp, #36] // 4-byte Folded Spill 667; CHECK-SD-NEXT: sdiv w10, w9, w8 668; CHECK-SD-NEXT: smov w8, v2.b[5] 669; CHECK-SD-NEXT: smov w9, v0.b[5] 670; CHECK-SD-NEXT: str w8, [sp, #76] // 4-byte Folded Spill 671; CHECK-SD-NEXT: str w9, [sp, #84] // 4-byte Folded Spill 672; CHECK-SD-NEXT: str w10, [sp, #44] // 4-byte Folded Spill 673; CHECK-SD-NEXT: sdiv w10, w9, w8 674; CHECK-SD-NEXT: smov w8, v2.b[6] 675; CHECK-SD-NEXT: smov w9, v0.b[6] 676; CHECK-SD-NEXT: stp w8, w9, [sp, #64] // 8-byte Folded Spill 677; CHECK-SD-NEXT: str w10, [sp, #92] // 4-byte Folded Spill 678; CHECK-SD-NEXT: sdiv w10, w9, w8 679; CHECK-SD-NEXT: smov w8, v2.b[7] 680; CHECK-SD-NEXT: smov w9, v0.b[7] 681; CHECK-SD-NEXT: stp w8, w9, [sp, #112] // 8-byte Folded Spill 682; CHECK-SD-NEXT: sdiv w11, w9, w8 683; CHECK-SD-NEXT: smov w8, v2.b[8] 684; CHECK-SD-NEXT: smov w9, v0.b[8] 685; CHECK-SD-NEXT: str w10, [sp, #72] // 4-byte Folded Spill 686; CHECK-SD-NEXT: stp w8, w9, [sp, #100] // 8-byte Folded Spill 687; CHECK-SD-NEXT: sdiv w10, w9, w8 688; CHECK-SD-NEXT: smov w8, v2.b[9] 689; CHECK-SD-NEXT: smov w9, v0.b[9] 690; CHECK-SD-NEXT: stp w8, w9, [sp, #136] // 8-byte Folded Spill 691; CHECK-SD-NEXT: str w10, [sp, #108] // 4-byte Folded Spill 692; CHECK-SD-NEXT: sdiv w10, w9, w8 693; CHECK-SD-NEXT: smov w8, v2.b[10] 694; CHECK-SD-NEXT: smov w9, v0.b[10] 695; CHECK-SD-NEXT: stp w11, w8, [sp, #120] // 8-byte Folded Spill 696; CHECK-SD-NEXT: str w10, [sp, #144] // 4-byte Folded Spill 697; CHECK-SD-NEXT: sdiv w10, w9, w8 698; CHECK-SD-NEXT: smov w8, v2.b[11] 699; CHECK-SD-NEXT: stp w9, w10, [sp, #128] // 8-byte Folded Spill 700; CHECK-SD-NEXT: smov w9, v0.b[11] 701; CHECK-SD-NEXT: sdiv w25, w16, w17 702; CHECK-SD-NEXT: stp w8, w9, [sp, #172] // 8-byte Folded Spill 703; CHECK-SD-NEXT: sdiv w11, w9, w8 704; CHECK-SD-NEXT: smov w8, v2.b[12] 705; CHECK-SD-NEXT: smov w9, v0.b[12] 706; CHECK-SD-NEXT: str w8, [sp, #152] // 4-byte Folded Spill 707; CHECK-SD-NEXT: str w9, [sp, #160] // 4-byte Folded Spill 708; CHECK-SD-NEXT: sdiv w10, w9, w8 709; CHECK-SD-NEXT: smov w8, v2.b[13] 710; CHECK-SD-NEXT: smov w9, v0.b[13] 711; CHECK-SD-NEXT: stp w8, w9, [sp, #196] // 8-byte Folded Spill 712; CHECK-SD-NEXT: str w10, [sp, #168] // 4-byte Folded Spill 713; CHECK-SD-NEXT: sdiv w10, w9, w8 714; CHECK-SD-NEXT: smov w8, v2.b[14] 715; CHECK-SD-NEXT: smov w9, v0.b[14] 716; CHECK-SD-NEXT: stp w11, w8, [sp, #180] // 8-byte Folded Spill 717; CHECK-SD-NEXT: smov w11, v1.b[2] 718; CHECK-SD-NEXT: str w10, [sp, #204] // 4-byte Folded Spill 719; CHECK-SD-NEXT: sdiv w10, w9, w8 720; CHECK-SD-NEXT: smov w8, v2.b[15] 721; CHECK-SD-NEXT: str w8, [sp, #148] // 4-byte Folded Spill 722; CHECK-SD-NEXT: stp w9, w10, [sp, #188] // 8-byte Folded Spill 723; CHECK-SD-NEXT: smov w9, v0.b[15] 724; CHECK-SD-NEXT: sdiv w22, w11, w12 725; CHECK-SD-NEXT: str w9, [sp, #156] // 4-byte Folded Spill 726; CHECK-SD-NEXT: sdiv w10, w9, w8 727; CHECK-SD-NEXT: str w10, [sp, #164] // 4-byte Folded Spill 728; CHECK-SD-NEXT: smov w10, v1.b[0] 729; CHECK-SD-NEXT: sdiv w9, w7, w19 730; CHECK-SD-NEXT: sdiv w8, w3, w6 731; CHECK-SD-NEXT: sdiv w23, w10, w13 732; CHECK-SD-NEXT: stp w8, w9, [sp, #8] // 8-byte Folded Spill 733; CHECK-SD-NEXT: ldr w8, [sp, #96] // 4-byte Folded Reload 734; CHECK-SD-NEXT: ldr w9, [sp, #88] // 4-byte Folded Reload 735; CHECK-SD-NEXT: msub w9, w8, w30, w9 736; CHECK-SD-NEXT: ldr w8, [sp, #48] // 4-byte Folded Reload 737; CHECK-SD-NEXT: ldr w30, [sp, #40] // 4-byte Folded Reload 738; CHECK-SD-NEXT: msub w8, w8, w29, w30 739; CHECK-SD-NEXT: ldp x29, x30, [sp, #208] // 16-byte Folded Reload 740; CHECK-SD-NEXT: fmov s0, w8 741; CHECK-SD-NEXT: msub w10, w23, w13, w10 742; CHECK-SD-NEXT: sdiv w24, w14, w15 743; CHECK-SD-NEXT: msub w13, w27, w5, w0 744; CHECK-SD-NEXT: ldr w5, [sp, #16] // 4-byte Folded Reload 745; CHECK-SD-NEXT: mov v0.b[1], w9 746; CHECK-SD-NEXT: msub w9, w22, w12, w11 747; CHECK-SD-NEXT: smov w11, v1.b[10] 748; CHECK-SD-NEXT: fmov s2, w10 749; CHECK-SD-NEXT: ldp w10, w8, [sp, #20] // 8-byte Folded Reload 750; CHECK-SD-NEXT: mov v2.b[1], w13 751; CHECK-SD-NEXT: msub w8, w8, w5, w10 752; CHECK-SD-NEXT: ldr w5, [sp, #52] // 4-byte Folded Reload 753; CHECK-SD-NEXT: smov w10, v3.b[10] 754; CHECK-SD-NEXT: sdiv w28, w1, w4 755; CHECK-SD-NEXT: ldp w13, w12, [sp, #56] // 8-byte Folded Reload 756; CHECK-SD-NEXT: mov v2.b[2], w9 757; CHECK-SD-NEXT: mov v0.b[2], w8 758; CHECK-SD-NEXT: msub w8, w25, w17, w16 759; CHECK-SD-NEXT: ldr w17, [sp, #28] // 4-byte Folded Reload 760; CHECK-SD-NEXT: ldr w16, [sp, #36] // 4-byte Folded Reload 761; CHECK-SD-NEXT: msub w12, w12, w5, w13 762; CHECK-SD-NEXT: ldr w13, [sp, #44] // 4-byte Folded Reload 763; CHECK-SD-NEXT: ldr w5, [sp, #136] // 4-byte Folded Reload 764; CHECK-SD-NEXT: mov v2.b[3], w8 765; CHECK-SD-NEXT: msub w8, w24, w15, w14 766; CHECK-SD-NEXT: ldr w15, [sp, #92] // 4-byte Folded Reload 767; CHECK-SD-NEXT: mov v0.b[3], w12 768; CHECK-SD-NEXT: msub w13, w13, w17, w16 769; CHECK-SD-NEXT: ldr w17, [sp, #76] // 4-byte Folded Reload 770; CHECK-SD-NEXT: sdiv w26, w18, w2 771; CHECK-SD-NEXT: ldr w16, [sp, #84] // 4-byte Folded Reload 772; CHECK-SD-NEXT: smov w12, v3.b[11] 773; CHECK-SD-NEXT: msub w15, w15, w17, w16 774; CHECK-SD-NEXT: smov w14, v1.b[11] 775; CHECK-SD-NEXT: mov v2.b[4], w8 776; CHECK-SD-NEXT: msub w8, w28, w4, w1 777; CHECK-SD-NEXT: ldr w1, [sp, #64] // 4-byte Folded Reload 778; CHECK-SD-NEXT: mov v0.b[4], w13 779; CHECK-SD-NEXT: ldr w4, [sp, #100] // 4-byte Folded Reload 780; CHECK-SD-NEXT: ldp w17, w16, [sp, #68] // 8-byte Folded Reload 781; CHECK-SD-NEXT: ldp x24, x23, [sp, #256] // 16-byte Folded Reload 782; CHECK-SD-NEXT: mov v2.b[5], w8 783; CHECK-SD-NEXT: ldp x28, x27, [sp, #224] // 16-byte Folded Reload 784; CHECK-SD-NEXT: mov v0.b[5], w15 785; CHECK-SD-NEXT: msub w16, w16, w1, w17 786; CHECK-SD-NEXT: smov w15, v3.b[12] 787; CHECK-SD-NEXT: msub w8, w26, w2, w18 788; CHECK-SD-NEXT: ldr w2, [sp, #112] // 4-byte Folded Reload 789; CHECK-SD-NEXT: sdiv w0, w20, w21 790; CHECK-SD-NEXT: ldp w1, w18, [sp, #116] // 8-byte Folded Reload 791; CHECK-SD-NEXT: smov w17, v1.b[12] 792; CHECK-SD-NEXT: ldp x26, x25, [sp, #240] // 16-byte Folded Reload 793; CHECK-SD-NEXT: mov v2.b[6], w8 794; CHECK-SD-NEXT: ldr w8, [sp, #12] // 4-byte Folded Reload 795; CHECK-SD-NEXT: mov v0.b[6], w16 796; CHECK-SD-NEXT: msub w18, w18, w2, w1 797; CHECK-SD-NEXT: msub w8, w8, w19, w7 798; CHECK-SD-NEXT: ldp w2, w1, [sp, #104] // 8-byte Folded Reload 799; CHECK-SD-NEXT: mov v0.b[7], w18 800; CHECK-SD-NEXT: smov w18, v3.b[13] 801; CHECK-SD-NEXT: mov v2.b[7], w8 802; CHECK-SD-NEXT: ldr w8, [sp, #8] // 4-byte Folded Reload 803; CHECK-SD-NEXT: sdiv w9, w11, w10 804; CHECK-SD-NEXT: msub w1, w1, w4, w2 805; CHECK-SD-NEXT: smov w2, v1.b[13] 806; CHECK-SD-NEXT: msub w8, w8, w6, w3 807; CHECK-SD-NEXT: ldp w4, w3, [sp, #140] // 8-byte Folded Reload 808; CHECK-SD-NEXT: mov v0.b[8], w1 809; CHECK-SD-NEXT: mov v2.b[8], w8 810; CHECK-SD-NEXT: msub w8, w0, w21, w20 811; CHECK-SD-NEXT: msub w3, w3, w5, w4 812; CHECK-SD-NEXT: ldr w5, [sp, #124] // 4-byte Folded Reload 813; CHECK-SD-NEXT: ldp w4, w1, [sp, #128] // 8-byte Folded Reload 814; CHECK-SD-NEXT: sdiv w13, w14, w12 815; CHECK-SD-NEXT: ldp x20, x19, [sp, #288] // 16-byte Folded Reload 816; CHECK-SD-NEXT: mov v2.b[9], w8 817; CHECK-SD-NEXT: mov v0.b[9], w3 818; CHECK-SD-NEXT: msub w8, w9, w10, w11 819; CHECK-SD-NEXT: msub w1, w1, w5, w4 820; CHECK-SD-NEXT: ldr w4, [sp, #172] // 4-byte Folded Reload 821; CHECK-SD-NEXT: smov w9, v3.b[14] 822; CHECK-SD-NEXT: ldp w3, w11, [sp, #176] // 8-byte Folded Reload 823; CHECK-SD-NEXT: smov w10, v1.b[14] 824; CHECK-SD-NEXT: ldp x22, x21, [sp, #272] // 16-byte Folded Reload 825; CHECK-SD-NEXT: mov v2.b[10], w8 826; CHECK-SD-NEXT: mov v0.b[10], w1 827; CHECK-SD-NEXT: ldr w1, [sp, #152] // 4-byte Folded Reload 828; CHECK-SD-NEXT: msub w11, w11, w4, w3 829; CHECK-SD-NEXT: sdiv w16, w17, w15 830; CHECK-SD-NEXT: msub w8, w13, w12, w14 831; CHECK-SD-NEXT: ldr w13, [sp, #168] // 4-byte Folded Reload 832; CHECK-SD-NEXT: ldr w14, [sp, #160] // 4-byte Folded Reload 833; CHECK-SD-NEXT: mov v0.b[11], w11 834; CHECK-SD-NEXT: smov w11, v3.b[15] 835; CHECK-SD-NEXT: msub w13, w13, w1, w14 836; CHECK-SD-NEXT: smov w14, v1.b[15] 837; CHECK-SD-NEXT: mov v2.b[11], w8 838; CHECK-SD-NEXT: mov v0.b[12], w13 839; CHECK-SD-NEXT: sdiv w0, w2, w18 840; CHECK-SD-NEXT: msub w8, w16, w15, w17 841; CHECK-SD-NEXT: ldr w17, [sp, #196] // 4-byte Folded Reload 842; CHECK-SD-NEXT: ldp w16, w15, [sp, #200] // 8-byte Folded Reload 843; CHECK-SD-NEXT: mov v2.b[12], w8 844; CHECK-SD-NEXT: msub w15, w15, w17, w16 845; CHECK-SD-NEXT: ldp w17, w16, [sp, #188] // 8-byte Folded Reload 846; CHECK-SD-NEXT: mov v0.b[13], w15 847; CHECK-SD-NEXT: sdiv w12, w10, w9 848; CHECK-SD-NEXT: msub w8, w0, w18, w2 849; CHECK-SD-NEXT: ldr w18, [sp, #184] // 4-byte Folded Reload 850; CHECK-SD-NEXT: msub w16, w16, w18, w17 851; CHECK-SD-NEXT: mov v2.b[13], w8 852; CHECK-SD-NEXT: mov v0.b[14], w16 853; CHECK-SD-NEXT: sdiv w13, w14, w11 854; CHECK-SD-NEXT: msub w8, w12, w9, w10 855; CHECK-SD-NEXT: ldr w9, [sp, #164] // 4-byte Folded Reload 856; CHECK-SD-NEXT: ldr w12, [sp, #148] // 4-byte Folded Reload 857; CHECK-SD-NEXT: ldr w10, [sp, #156] // 4-byte Folded Reload 858; CHECK-SD-NEXT: mov v2.b[14], w8 859; CHECK-SD-NEXT: msub w9, w9, w12, w10 860; CHECK-SD-NEXT: mov v0.b[15], w9 861; CHECK-SD-NEXT: msub w8, w13, w11, w14 862; CHECK-SD-NEXT: mov v2.b[15], w8 863; CHECK-SD-NEXT: mov v1.16b, v2.16b 864; CHECK-SD-NEXT: add sp, sp, #304 865; CHECK-SD-NEXT: ret 866; 867; CHECK-GI-LABEL: sv32i8: 868; CHECK-GI: // %bb.0: // %entry 869; CHECK-GI-NEXT: stp x29, x30, [sp, #-96]! // 16-byte Folded Spill 870; CHECK-GI-NEXT: stp x28, x27, [sp, #16] // 16-byte Folded Spill 871; CHECK-GI-NEXT: stp x26, x25, [sp, #32] // 16-byte Folded Spill 872; CHECK-GI-NEXT: stp x24, x23, [sp, #48] // 16-byte Folded Spill 873; CHECK-GI-NEXT: stp x22, x21, [sp, #64] // 16-byte Folded Spill 874; CHECK-GI-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill 875; CHECK-GI-NEXT: .cfi_def_cfa_offset 96 876; CHECK-GI-NEXT: .cfi_offset w19, -8 877; CHECK-GI-NEXT: .cfi_offset w20, -16 878; CHECK-GI-NEXT: .cfi_offset w21, -24 879; CHECK-GI-NEXT: .cfi_offset w22, -32 880; CHECK-GI-NEXT: .cfi_offset w23, -40 881; CHECK-GI-NEXT: .cfi_offset w24, -48 882; CHECK-GI-NEXT: .cfi_offset w25, -56 883; CHECK-GI-NEXT: .cfi_offset w26, -64 884; CHECK-GI-NEXT: .cfi_offset w27, -72 885; CHECK-GI-NEXT: .cfi_offset w28, -80 886; CHECK-GI-NEXT: .cfi_offset w30, -88 887; CHECK-GI-NEXT: .cfi_offset w29, -96 888; CHECK-GI-NEXT: sshll v4.8h, v0.8b, #0 889; CHECK-GI-NEXT: sshll v5.8h, v2.8b, #0 890; CHECK-GI-NEXT: sshll v16.8h, v1.8b, #0 891; CHECK-GI-NEXT: sshll v17.8h, v3.8b, #0 892; CHECK-GI-NEXT: sshll v6.4s, v4.4h, #0 893; CHECK-GI-NEXT: sshll v7.4s, v5.4h, #0 894; CHECK-GI-NEXT: sshll2 v4.4s, v4.8h, #0 895; CHECK-GI-NEXT: sshll2 v5.4s, v5.8h, #0 896; CHECK-GI-NEXT: sshll v18.4s, v16.4h, #0 897; CHECK-GI-NEXT: sshll v19.4s, v17.4h, #0 898; CHECK-GI-NEXT: sshll2 v16.4s, v16.8h, #0 899; CHECK-GI-NEXT: sshll2 v17.4s, v17.8h, #0 900; CHECK-GI-NEXT: fmov w8, s6 901; CHECK-GI-NEXT: fmov w9, s7 902; CHECK-GI-NEXT: mov w12, v7.s[3] 903; CHECK-GI-NEXT: fmov w13, s5 904; CHECK-GI-NEXT: mov w16, v5.s[3] 905; CHECK-GI-NEXT: fmov w6, s19 906; CHECK-GI-NEXT: mov w7, v19.s[3] 907; CHECK-GI-NEXT: fmov w21, s17 908; CHECK-GI-NEXT: mov w23, v17.s[3] 909; CHECK-GI-NEXT: sdiv w11, w8, w9 910; CHECK-GI-NEXT: mov w8, v6.s[1] 911; CHECK-GI-NEXT: mov w9, v7.s[1] 912; CHECK-GI-NEXT: sdiv w10, w8, w9 913; CHECK-GI-NEXT: mov w8, v6.s[2] 914; CHECK-GI-NEXT: mov w9, v7.s[2] 915; CHECK-GI-NEXT: mov v20.s[0], w11 916; CHECK-GI-NEXT: sdiv w9, w8, w9 917; CHECK-GI-NEXT: mov w8, v6.s[3] 918; CHECK-GI-NEXT: sshll2 v6.8h, v0.16b, #0 919; CHECK-GI-NEXT: mov v20.s[1], w10 920; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0 921; CHECK-GI-NEXT: sshll v28.4s, v0.4h, #0 922; CHECK-GI-NEXT: sshll2 v0.4s, v0.8h, #0 923; CHECK-GI-NEXT: sdiv w8, w8, w12 924; CHECK-GI-NEXT: fmov w12, s4 925; CHECK-GI-NEXT: mov v20.s[2], w9 926; CHECK-GI-NEXT: sdiv w15, w12, w13 927; CHECK-GI-NEXT: mov w12, v4.s[1] 928; CHECK-GI-NEXT: mov w13, v5.s[1] 929; CHECK-GI-NEXT: mov v20.s[3], w8 930; CHECK-GI-NEXT: sdiv w14, w12, w13 931; CHECK-GI-NEXT: mov w12, v4.s[2] 932; CHECK-GI-NEXT: mov w13, v5.s[2] 933; CHECK-GI-NEXT: sshll v5.4s, v6.4h, #0 934; CHECK-GI-NEXT: mov v21.s[0], w15 935; CHECK-GI-NEXT: sdiv w13, w12, w13 936; CHECK-GI-NEXT: mov w12, v4.s[3] 937; CHECK-GI-NEXT: sshll2 v4.8h, v2.16b, #0 938; CHECK-GI-NEXT: mov v21.s[1], w14 939; CHECK-GI-NEXT: sshll v2.8h, v2.8b, #0 940; CHECK-GI-NEXT: sshll v7.4s, v4.4h, #0 941; CHECK-GI-NEXT: sshll v30.4s, v2.4h, #0 942; CHECK-GI-NEXT: sshll2 v2.4s, v2.8h, #0 943; CHECK-GI-NEXT: fmov w17, s7 944; CHECK-GI-NEXT: mls v28.4s, v20.4s, v30.4s 945; CHECK-GI-NEXT: sdiv w12, w12, w16 946; CHECK-GI-NEXT: fmov w16, s5 947; CHECK-GI-NEXT: mov v21.s[2], w13 948; CHECK-GI-NEXT: sdiv w1, w16, w17 949; CHECK-GI-NEXT: mov w16, v5.s[1] 950; CHECK-GI-NEXT: mov w17, v7.s[1] 951; CHECK-GI-NEXT: mov v21.s[3], w12 952; CHECK-GI-NEXT: mls v0.4s, v21.4s, v2.4s 953; CHECK-GI-NEXT: sdiv w0, w16, w17 954; CHECK-GI-NEXT: mov w16, v5.s[2] 955; CHECK-GI-NEXT: mov w17, v7.s[2] 956; CHECK-GI-NEXT: mov v22.s[0], w1 957; CHECK-GI-NEXT: uzp1 v0.8h, v28.8h, v0.8h 958; CHECK-GI-NEXT: sdiv w18, w16, w17 959; CHECK-GI-NEXT: mov w16, v5.s[3] 960; CHECK-GI-NEXT: mov w17, v7.s[3] 961; CHECK-GI-NEXT: sshll2 v5.4s, v6.8h, #0 962; CHECK-GI-NEXT: sshll2 v7.4s, v4.8h, #0 963; CHECK-GI-NEXT: mov v22.s[1], w0 964; CHECK-GI-NEXT: sshll v6.4s, v6.4h, #0 965; CHECK-GI-NEXT: sshll v4.4s, v4.4h, #0 966; CHECK-GI-NEXT: fmov w2, s7 967; CHECK-GI-NEXT: mov w4, v7.s[3] 968; CHECK-GI-NEXT: sdiv w16, w16, w17 969; CHECK-GI-NEXT: fmov w17, s5 970; CHECK-GI-NEXT: mov v22.s[2], w18 971; CHECK-GI-NEXT: sdiv w5, w17, w2 972; CHECK-GI-NEXT: mov w17, v5.s[1] 973; CHECK-GI-NEXT: mov w2, v7.s[1] 974; CHECK-GI-NEXT: mov v22.s[3], w16 975; CHECK-GI-NEXT: mls v6.4s, v22.4s, v4.4s 976; CHECK-GI-NEXT: sdiv w3, w17, w2 977; CHECK-GI-NEXT: mov w17, v5.s[2] 978; CHECK-GI-NEXT: mov w2, v7.s[2] 979; CHECK-GI-NEXT: mov v23.s[0], w5 980; CHECK-GI-NEXT: sdiv w2, w17, w2 981; CHECK-GI-NEXT: mov w17, v5.s[3] 982; CHECK-GI-NEXT: mov v23.s[1], w3 983; CHECK-GI-NEXT: sdiv w17, w17, w4 984; CHECK-GI-NEXT: fmov w4, s18 985; CHECK-GI-NEXT: mov v23.s[2], w2 986; CHECK-GI-NEXT: sdiv w20, w4, w6 987; CHECK-GI-NEXT: mov w4, v18.s[1] 988; CHECK-GI-NEXT: mov w6, v19.s[1] 989; CHECK-GI-NEXT: mov v23.s[3], w17 990; CHECK-GI-NEXT: mls v5.4s, v23.4s, v7.4s 991; CHECK-GI-NEXT: sdiv w19, w4, w6 992; CHECK-GI-NEXT: mov w4, v18.s[2] 993; CHECK-GI-NEXT: mov w6, v19.s[2] 994; CHECK-GI-NEXT: mov v24.s[0], w20 995; CHECK-GI-NEXT: uzp1 v2.8h, v6.8h, v5.8h 996; CHECK-GI-NEXT: uzp1 v0.16b, v0.16b, v2.16b 997; CHECK-GI-NEXT: sdiv w6, w4, w6 998; CHECK-GI-NEXT: mov w4, v18.s[3] 999; CHECK-GI-NEXT: mov v24.s[1], w19 1000; CHECK-GI-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload 1001; CHECK-GI-NEXT: sdiv w4, w4, w7 1002; CHECK-GI-NEXT: fmov w7, s16 1003; CHECK-GI-NEXT: mov v24.s[2], w6 1004; CHECK-GI-NEXT: sdiv w24, w7, w21 1005; CHECK-GI-NEXT: mov w7, v16.s[1] 1006; CHECK-GI-NEXT: mov w21, v17.s[1] 1007; CHECK-GI-NEXT: mov v24.s[3], w4 1008; CHECK-GI-NEXT: sdiv w22, w7, w21 1009; CHECK-GI-NEXT: mov w7, v16.s[2] 1010; CHECK-GI-NEXT: mov w21, v17.s[2] 1011; CHECK-GI-NEXT: sshll2 v17.8h, v1.16b, #0 1012; CHECK-GI-NEXT: mov v25.s[0], w24 1013; CHECK-GI-NEXT: sshll v1.8h, v1.8b, #0 1014; CHECK-GI-NEXT: sshll v18.4s, v17.4h, #0 1015; CHECK-GI-NEXT: sshll v29.4s, v1.4h, #0 1016; CHECK-GI-NEXT: sshll2 v1.4s, v1.8h, #0 1017; CHECK-GI-NEXT: sdiv w21, w7, w21 1018; CHECK-GI-NEXT: mov w7, v16.s[3] 1019; CHECK-GI-NEXT: sshll2 v16.8h, v3.16b, #0 1020; CHECK-GI-NEXT: mov v25.s[1], w22 1021; CHECK-GI-NEXT: sshll v3.8h, v3.8b, #0 1022; CHECK-GI-NEXT: sshll v19.4s, v16.4h, #0 1023; CHECK-GI-NEXT: sshll v31.4s, v3.4h, #0 1024; CHECK-GI-NEXT: sshll2 v3.4s, v3.8h, #0 1025; CHECK-GI-NEXT: fmov w25, s19 1026; CHECK-GI-NEXT: mov w26, v19.s[1] 1027; CHECK-GI-NEXT: mov w27, v19.s[2] 1028; CHECK-GI-NEXT: mov w28, v19.s[3] 1029; CHECK-GI-NEXT: sshll2 v19.4s, v16.8h, #0 1030; CHECK-GI-NEXT: sshll v16.4s, v16.4h, #0 1031; CHECK-GI-NEXT: sdiv w7, w7, w23 1032; CHECK-GI-NEXT: fmov w23, s18 1033; CHECK-GI-NEXT: mov v25.s[2], w21 1034; CHECK-GI-NEXT: mls v29.4s, v24.4s, v31.4s 1035; CHECK-GI-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload 1036; CHECK-GI-NEXT: fmov w29, s19 1037; CHECK-GI-NEXT: mov w30, v19.s[1] 1038; CHECK-GI-NEXT: mov w15, v19.s[2] 1039; CHECK-GI-NEXT: sdiv w25, w23, w25 1040; CHECK-GI-NEXT: mov w23, v18.s[1] 1041; CHECK-GI-NEXT: mov v25.s[3], w7 1042; CHECK-GI-NEXT: mls v1.4s, v25.4s, v3.4s 1043; CHECK-GI-NEXT: sdiv w26, w23, w26 1044; CHECK-GI-NEXT: mov w23, v18.s[2] 1045; CHECK-GI-NEXT: mov v26.s[0], w25 1046; CHECK-GI-NEXT: uzp1 v1.8h, v29.8h, v1.8h 1047; CHECK-GI-NEXT: sdiv w27, w23, w27 1048; CHECK-GI-NEXT: mov w23, v18.s[3] 1049; CHECK-GI-NEXT: sshll2 v18.4s, v17.8h, #0 1050; CHECK-GI-NEXT: mov v26.s[1], w26 1051; CHECK-GI-NEXT: sshll v17.4s, v17.4h, #0 1052; CHECK-GI-NEXT: ldp x26, x25, [sp, #32] // 16-byte Folded Reload 1053; CHECK-GI-NEXT: mov w11, v18.s[2] 1054; CHECK-GI-NEXT: mov w9, v18.s[3] 1055; CHECK-GI-NEXT: sdiv w23, w23, w28 1056; CHECK-GI-NEXT: fmov w28, s18 1057; CHECK-GI-NEXT: mov v26.s[2], w27 1058; CHECK-GI-NEXT: sdiv w28, w28, w29 1059; CHECK-GI-NEXT: mov w29, v18.s[1] 1060; CHECK-GI-NEXT: mov v26.s[3], w23 1061; CHECK-GI-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload 1062; CHECK-GI-NEXT: mls v17.4s, v26.4s, v16.4s 1063; CHECK-GI-NEXT: sdiv w29, w29, w30 1064; CHECK-GI-NEXT: mov v27.s[0], w28 1065; CHECK-GI-NEXT: ldp x28, x27, [sp, #16] // 16-byte Folded Reload 1066; CHECK-GI-NEXT: sdiv w10, w11, w15 1067; CHECK-GI-NEXT: mov w11, v19.s[3] 1068; CHECK-GI-NEXT: mov v27.s[1], w29 1069; CHECK-GI-NEXT: sdiv w8, w9, w11 1070; CHECK-GI-NEXT: mov v27.s[2], w10 1071; CHECK-GI-NEXT: mov v27.s[3], w8 1072; CHECK-GI-NEXT: mls v18.4s, v27.4s, v19.4s 1073; CHECK-GI-NEXT: uzp1 v3.8h, v17.8h, v18.8h 1074; CHECK-GI-NEXT: uzp1 v1.16b, v1.16b, v3.16b 1075; CHECK-GI-NEXT: ldp x29, x30, [sp], #96 // 16-byte Folded Reload 1076; CHECK-GI-NEXT: ret 1077entry: 1078 %s = srem <32 x i8> %d, %e 1079 ret <32 x i8> %s 1080} 1081 1082define <2 x i8> @uv2i8(<2 x i8> %d, <2 x i8> %e) { 1083; CHECK-SD-LABEL: uv2i8: 1084; CHECK-SD: // %bb.0: // %entry 1085; CHECK-SD-NEXT: movi d2, #0x0000ff000000ff 1086; CHECK-SD-NEXT: and v0.8b, v0.8b, v2.8b 1087; CHECK-SD-NEXT: and v1.8b, v1.8b, v2.8b 1088; CHECK-SD-NEXT: fmov w8, s1 1089; CHECK-SD-NEXT: fmov w9, s0 1090; CHECK-SD-NEXT: mov w11, v1.s[1] 1091; CHECK-SD-NEXT: mov w12, v0.s[1] 1092; CHECK-SD-NEXT: udiv w10, w9, w8 1093; CHECK-SD-NEXT: udiv w13, w12, w11 1094; CHECK-SD-NEXT: msub w8, w10, w8, w9 1095; CHECK-SD-NEXT: fmov s0, w8 1096; CHECK-SD-NEXT: msub w9, w13, w11, w12 1097; CHECK-SD-NEXT: mov v0.s[1], w9 1098; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 1099; CHECK-SD-NEXT: ret 1100; 1101; CHECK-GI-LABEL: uv2i8: 1102; CHECK-GI: // %bb.0: // %entry 1103; CHECK-GI-NEXT: movi d2, #0x0000ff000000ff 1104; CHECK-GI-NEXT: and v0.8b, v0.8b, v2.8b 1105; CHECK-GI-NEXT: and v1.8b, v1.8b, v2.8b 1106; CHECK-GI-NEXT: fmov w8, s0 1107; CHECK-GI-NEXT: fmov w9, s1 1108; CHECK-GI-NEXT: mov w10, v1.s[1] 1109; CHECK-GI-NEXT: udiv w8, w8, w9 1110; CHECK-GI-NEXT: mov w9, v0.s[1] 1111; CHECK-GI-NEXT: udiv w9, w9, w10 1112; CHECK-GI-NEXT: mov v2.s[0], w8 1113; CHECK-GI-NEXT: mov v2.s[1], w9 1114; CHECK-GI-NEXT: mls v0.2s, v2.2s, v1.2s 1115; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 1116; CHECK-GI-NEXT: ret 1117entry: 1118 %s = urem <2 x i8> %d, %e 1119 ret <2 x i8> %s 1120} 1121 1122define <3 x i8> @uv3i8(<3 x i8> %d, <3 x i8> %e) { 1123; CHECK-SD-LABEL: uv3i8: 1124; CHECK-SD: // %bb.0: // %entry 1125; CHECK-SD-NEXT: and w8, w3, #0xff 1126; CHECK-SD-NEXT: and w9, w0, #0xff 1127; CHECK-SD-NEXT: and w11, w4, #0xff 1128; CHECK-SD-NEXT: and w12, w1, #0xff 1129; CHECK-SD-NEXT: and w14, w5, #0xff 1130; CHECK-SD-NEXT: and w15, w2, #0xff 1131; CHECK-SD-NEXT: udiv w10, w9, w8 1132; CHECK-SD-NEXT: udiv w13, w12, w11 1133; CHECK-SD-NEXT: msub w0, w10, w8, w9 1134; CHECK-SD-NEXT: udiv w16, w15, w14 1135; CHECK-SD-NEXT: msub w1, w13, w11, w12 1136; CHECK-SD-NEXT: msub w2, w16, w14, w15 1137; CHECK-SD-NEXT: ret 1138; 1139; CHECK-GI-LABEL: uv3i8: 1140; CHECK-GI: // %bb.0: // %entry 1141; CHECK-GI-NEXT: and w8, w0, #0xff 1142; CHECK-GI-NEXT: and w9, w3, #0xff 1143; CHECK-GI-NEXT: and w11, w1, #0xff 1144; CHECK-GI-NEXT: and w12, w4, #0xff 1145; CHECK-GI-NEXT: and w14, w2, #0xff 1146; CHECK-GI-NEXT: and w15, w5, #0xff 1147; CHECK-GI-NEXT: udiv w10, w8, w9 1148; CHECK-GI-NEXT: udiv w13, w11, w12 1149; CHECK-GI-NEXT: msub w0, w10, w9, w8 1150; CHECK-GI-NEXT: udiv w16, w14, w15 1151; CHECK-GI-NEXT: msub w1, w13, w12, w11 1152; CHECK-GI-NEXT: msub w2, w16, w15, w14 1153; CHECK-GI-NEXT: ret 1154entry: 1155 %s = urem <3 x i8> %d, %e 1156 ret <3 x i8> %s 1157} 1158 1159define <4 x i8> @uv4i8(<4 x i8> %d, <4 x i8> %e) { 1160; CHECK-SD-LABEL: uv4i8: 1161; CHECK-SD: // %bb.0: // %entry 1162; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 1163; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 1164; CHECK-SD-NEXT: bic v0.4h, #255, lsl #8 1165; CHECK-SD-NEXT: bic v1.4h, #255, lsl #8 1166; CHECK-SD-NEXT: umov w11, v1.h[0] 1167; CHECK-SD-NEXT: umov w12, v0.h[0] 1168; CHECK-SD-NEXT: umov w8, v1.h[1] 1169; CHECK-SD-NEXT: umov w9, v0.h[1] 1170; CHECK-SD-NEXT: umov w14, v1.h[2] 1171; CHECK-SD-NEXT: umov w15, v0.h[2] 1172; CHECK-SD-NEXT: umov w17, v1.h[3] 1173; CHECK-SD-NEXT: umov w18, v0.h[3] 1174; CHECK-SD-NEXT: udiv w13, w12, w11 1175; CHECK-SD-NEXT: udiv w10, w9, w8 1176; CHECK-SD-NEXT: msub w11, w13, w11, w12 1177; CHECK-SD-NEXT: fmov s0, w11 1178; CHECK-SD-NEXT: udiv w16, w15, w14 1179; CHECK-SD-NEXT: msub w8, w10, w8, w9 1180; CHECK-SD-NEXT: mov v0.h[1], w8 1181; CHECK-SD-NEXT: udiv w9, w18, w17 1182; CHECK-SD-NEXT: msub w8, w16, w14, w15 1183; CHECK-SD-NEXT: mov v0.h[2], w8 1184; CHECK-SD-NEXT: msub w8, w9, w17, w18 1185; CHECK-SD-NEXT: mov v0.h[3], w8 1186; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 1187; CHECK-SD-NEXT: ret 1188; 1189; CHECK-GI-LABEL: uv4i8: 1190; CHECK-GI: // %bb.0: // %entry 1191; CHECK-GI-NEXT: movi v2.2d, #0x0000ff000000ff 1192; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 1193; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0 1194; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b 1195; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b 1196; CHECK-GI-NEXT: fmov w8, s0 1197; CHECK-GI-NEXT: fmov w9, s1 1198; CHECK-GI-NEXT: mov w10, v1.s[1] 1199; CHECK-GI-NEXT: mov w11, v1.s[2] 1200; CHECK-GI-NEXT: mov w12, v1.s[3] 1201; CHECK-GI-NEXT: udiv w8, w8, w9 1202; CHECK-GI-NEXT: mov w9, v0.s[1] 1203; CHECK-GI-NEXT: udiv w9, w9, w10 1204; CHECK-GI-NEXT: mov w10, v0.s[2] 1205; CHECK-GI-NEXT: mov v2.s[0], w8 1206; CHECK-GI-NEXT: udiv w10, w10, w11 1207; CHECK-GI-NEXT: mov w11, v0.s[3] 1208; CHECK-GI-NEXT: mov v2.s[1], w9 1209; CHECK-GI-NEXT: udiv w8, w11, w12 1210; CHECK-GI-NEXT: mov v2.s[2], w10 1211; CHECK-GI-NEXT: mov v2.s[3], w8 1212; CHECK-GI-NEXT: mls v0.4s, v2.4s, v1.4s 1213; CHECK-GI-NEXT: xtn v0.4h, v0.4s 1214; CHECK-GI-NEXT: ret 1215entry: 1216 %s = urem <4 x i8> %d, %e 1217 ret <4 x i8> %s 1218} 1219 1220define <8 x i8> @uv8i8(<8 x i8> %d, <8 x i8> %e) { 1221; CHECK-SD-LABEL: uv8i8: 1222; CHECK-SD: // %bb.0: // %entry 1223; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 1224; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 1225; CHECK-SD-NEXT: umov w11, v1.b[0] 1226; CHECK-SD-NEXT: umov w12, v0.b[0] 1227; CHECK-SD-NEXT: umov w8, v1.b[1] 1228; CHECK-SD-NEXT: umov w9, v0.b[1] 1229; CHECK-SD-NEXT: umov w14, v1.b[2] 1230; CHECK-SD-NEXT: umov w15, v0.b[2] 1231; CHECK-SD-NEXT: umov w17, v1.b[3] 1232; CHECK-SD-NEXT: umov w18, v0.b[3] 1233; CHECK-SD-NEXT: umov w1, v1.b[4] 1234; CHECK-SD-NEXT: umov w2, v0.b[4] 1235; CHECK-SD-NEXT: umov w4, v1.b[5] 1236; CHECK-SD-NEXT: umov w5, v0.b[5] 1237; CHECK-SD-NEXT: udiv w13, w12, w11 1238; CHECK-SD-NEXT: udiv w10, w9, w8 1239; CHECK-SD-NEXT: msub w11, w13, w11, w12 1240; CHECK-SD-NEXT: umov w13, v1.b[7] 1241; CHECK-SD-NEXT: fmov s2, w11 1242; CHECK-SD-NEXT: umov w11, v0.b[6] 1243; CHECK-SD-NEXT: udiv w16, w15, w14 1244; CHECK-SD-NEXT: msub w8, w10, w8, w9 1245; CHECK-SD-NEXT: umov w10, v1.b[6] 1246; CHECK-SD-NEXT: mov v2.b[1], w8 1247; CHECK-SD-NEXT: udiv w0, w18, w17 1248; CHECK-SD-NEXT: msub w8, w16, w14, w15 1249; CHECK-SD-NEXT: umov w14, v0.b[7] 1250; CHECK-SD-NEXT: mov v2.b[2], w8 1251; CHECK-SD-NEXT: udiv w3, w2, w1 1252; CHECK-SD-NEXT: msub w8, w0, w17, w18 1253; CHECK-SD-NEXT: mov v2.b[3], w8 1254; CHECK-SD-NEXT: udiv w9, w5, w4 1255; CHECK-SD-NEXT: msub w8, w3, w1, w2 1256; CHECK-SD-NEXT: mov v2.b[4], w8 1257; CHECK-SD-NEXT: udiv w12, w11, w10 1258; CHECK-SD-NEXT: msub w8, w9, w4, w5 1259; CHECK-SD-NEXT: mov v2.b[5], w8 1260; CHECK-SD-NEXT: udiv w9, w14, w13 1261; CHECK-SD-NEXT: msub w8, w12, w10, w11 1262; CHECK-SD-NEXT: mov v2.b[6], w8 1263; CHECK-SD-NEXT: msub w8, w9, w13, w14 1264; CHECK-SD-NEXT: mov v2.b[7], w8 1265; CHECK-SD-NEXT: fmov d0, d2 1266; CHECK-SD-NEXT: ret 1267; 1268; CHECK-GI-LABEL: uv8i8: 1269; CHECK-GI: // %bb.0: // %entry 1270; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0 1271; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0 1272; CHECK-GI-NEXT: ushll v2.4s, v0.4h, #0 1273; CHECK-GI-NEXT: ushll v3.4s, v1.4h, #0 1274; CHECK-GI-NEXT: ushll2 v0.4s, v0.8h, #0 1275; CHECK-GI-NEXT: ushll2 v1.4s, v1.8h, #0 1276; CHECK-GI-NEXT: fmov w8, s2 1277; CHECK-GI-NEXT: fmov w9, s3 1278; CHECK-GI-NEXT: mov w10, v3.s[1] 1279; CHECK-GI-NEXT: mov w11, v3.s[2] 1280; CHECK-GI-NEXT: mov w12, v3.s[3] 1281; CHECK-GI-NEXT: fmov w13, s1 1282; CHECK-GI-NEXT: mov w14, v1.s[1] 1283; CHECK-GI-NEXT: mov w15, v1.s[2] 1284; CHECK-GI-NEXT: udiv w8, w8, w9 1285; CHECK-GI-NEXT: mov w9, v2.s[1] 1286; CHECK-GI-NEXT: udiv w9, w9, w10 1287; CHECK-GI-NEXT: mov w10, v2.s[2] 1288; CHECK-GI-NEXT: mov v4.s[0], w8 1289; CHECK-GI-NEXT: mov w8, v0.s[3] 1290; CHECK-GI-NEXT: udiv w10, w10, w11 1291; CHECK-GI-NEXT: mov w11, v2.s[3] 1292; CHECK-GI-NEXT: mov v4.s[1], w9 1293; CHECK-GI-NEXT: udiv w11, w11, w12 1294; CHECK-GI-NEXT: fmov w12, s0 1295; CHECK-GI-NEXT: mov v4.s[2], w10 1296; CHECK-GI-NEXT: udiv w12, w12, w13 1297; CHECK-GI-NEXT: mov w13, v0.s[1] 1298; CHECK-GI-NEXT: mov v4.s[3], w11 1299; CHECK-GI-NEXT: mls v2.4s, v4.4s, v3.4s 1300; CHECK-GI-NEXT: udiv w13, w13, w14 1301; CHECK-GI-NEXT: mov w14, v0.s[2] 1302; CHECK-GI-NEXT: mov v5.s[0], w12 1303; CHECK-GI-NEXT: mov w12, v1.s[3] 1304; CHECK-GI-NEXT: udiv w14, w14, w15 1305; CHECK-GI-NEXT: mov v5.s[1], w13 1306; CHECK-GI-NEXT: udiv w8, w8, w12 1307; CHECK-GI-NEXT: mov v5.s[2], w14 1308; CHECK-GI-NEXT: mov v5.s[3], w8 1309; CHECK-GI-NEXT: mls v0.4s, v5.4s, v1.4s 1310; CHECK-GI-NEXT: uzp1 v0.8h, v2.8h, v0.8h 1311; CHECK-GI-NEXT: xtn v0.8b, v0.8h 1312; CHECK-GI-NEXT: ret 1313entry: 1314 %s = urem <8 x i8> %d, %e 1315 ret <8 x i8> %s 1316} 1317 1318define <16 x i8> @uv16i8(<16 x i8> %d, <16 x i8> %e) { 1319; CHECK-SD-LABEL: uv16i8: 1320; CHECK-SD: // %bb.0: // %entry 1321; CHECK-SD-NEXT: stp x28, x27, [sp, #-80]! // 16-byte Folded Spill 1322; CHECK-SD-NEXT: stp x26, x25, [sp, #16] // 16-byte Folded Spill 1323; CHECK-SD-NEXT: stp x24, x23, [sp, #32] // 16-byte Folded Spill 1324; CHECK-SD-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill 1325; CHECK-SD-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill 1326; CHECK-SD-NEXT: .cfi_def_cfa_offset 80 1327; CHECK-SD-NEXT: .cfi_offset w19, -8 1328; CHECK-SD-NEXT: .cfi_offset w20, -16 1329; CHECK-SD-NEXT: .cfi_offset w21, -24 1330; CHECK-SD-NEXT: .cfi_offset w22, -32 1331; CHECK-SD-NEXT: .cfi_offset w23, -40 1332; CHECK-SD-NEXT: .cfi_offset w24, -48 1333; CHECK-SD-NEXT: .cfi_offset w25, -56 1334; CHECK-SD-NEXT: .cfi_offset w26, -64 1335; CHECK-SD-NEXT: .cfi_offset w27, -72 1336; CHECK-SD-NEXT: .cfi_offset w28, -80 1337; CHECK-SD-NEXT: umov w11, v1.b[0] 1338; CHECK-SD-NEXT: umov w12, v0.b[0] 1339; CHECK-SD-NEXT: umov w8, v1.b[1] 1340; CHECK-SD-NEXT: umov w9, v0.b[1] 1341; CHECK-SD-NEXT: umov w14, v1.b[2] 1342; CHECK-SD-NEXT: umov w15, v0.b[2] 1343; CHECK-SD-NEXT: umov w17, v1.b[3] 1344; CHECK-SD-NEXT: umov w18, v0.b[3] 1345; CHECK-SD-NEXT: umov w1, v1.b[4] 1346; CHECK-SD-NEXT: umov w2, v0.b[4] 1347; CHECK-SD-NEXT: umov w4, v1.b[5] 1348; CHECK-SD-NEXT: umov w5, v0.b[5] 1349; CHECK-SD-NEXT: udiv w13, w12, w11 1350; CHECK-SD-NEXT: umov w7, v1.b[6] 1351; CHECK-SD-NEXT: umov w19, v0.b[6] 1352; CHECK-SD-NEXT: umov w21, v1.b[7] 1353; CHECK-SD-NEXT: umov w22, v0.b[7] 1354; CHECK-SD-NEXT: umov w24, v1.b[8] 1355; CHECK-SD-NEXT: umov w25, v0.b[8] 1356; CHECK-SD-NEXT: umov w27, v1.b[9] 1357; CHECK-SD-NEXT: umov w28, v0.b[9] 1358; CHECK-SD-NEXT: udiv w10, w9, w8 1359; CHECK-SD-NEXT: msub w11, w13, w11, w12 1360; CHECK-SD-NEXT: umov w13, v1.b[11] 1361; CHECK-SD-NEXT: fmov s2, w11 1362; CHECK-SD-NEXT: umov w11, v0.b[10] 1363; CHECK-SD-NEXT: udiv w16, w15, w14 1364; CHECK-SD-NEXT: msub w8, w10, w8, w9 1365; CHECK-SD-NEXT: umov w10, v1.b[10] 1366; CHECK-SD-NEXT: mov v2.b[1], w8 1367; CHECK-SD-NEXT: udiv w0, w18, w17 1368; CHECK-SD-NEXT: msub w8, w16, w14, w15 1369; CHECK-SD-NEXT: umov w14, v0.b[11] 1370; CHECK-SD-NEXT: umov w16, v1.b[12] 1371; CHECK-SD-NEXT: mov v2.b[2], w8 1372; CHECK-SD-NEXT: udiv w3, w2, w1 1373; CHECK-SD-NEXT: msub w8, w0, w17, w18 1374; CHECK-SD-NEXT: umov w17, v0.b[12] 1375; CHECK-SD-NEXT: umov w0, v1.b[13] 1376; CHECK-SD-NEXT: mov v2.b[3], w8 1377; CHECK-SD-NEXT: udiv w6, w5, w4 1378; CHECK-SD-NEXT: msub w8, w3, w1, w2 1379; CHECK-SD-NEXT: umov w1, v0.b[13] 1380; CHECK-SD-NEXT: mov v2.b[4], w8 1381; CHECK-SD-NEXT: udiv w20, w19, w7 1382; CHECK-SD-NEXT: msub w8, w6, w4, w5 1383; CHECK-SD-NEXT: mov v2.b[5], w8 1384; CHECK-SD-NEXT: udiv w23, w22, w21 1385; CHECK-SD-NEXT: msub w8, w20, w7, w19 1386; CHECK-SD-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload 1387; CHECK-SD-NEXT: mov v2.b[6], w8 1388; CHECK-SD-NEXT: udiv w26, w25, w24 1389; CHECK-SD-NEXT: msub w8, w23, w21, w22 1390; CHECK-SD-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload 1391; CHECK-SD-NEXT: mov v2.b[7], w8 1392; CHECK-SD-NEXT: udiv w9, w28, w27 1393; CHECK-SD-NEXT: msub w8, w26, w24, w25 1394; CHECK-SD-NEXT: ldp x24, x23, [sp, #32] // 16-byte Folded Reload 1395; CHECK-SD-NEXT: ldp x26, x25, [sp, #16] // 16-byte Folded Reload 1396; CHECK-SD-NEXT: mov v2.b[8], w8 1397; CHECK-SD-NEXT: udiv w12, w11, w10 1398; CHECK-SD-NEXT: msub w8, w9, w27, w28 1399; CHECK-SD-NEXT: mov v2.b[9], w8 1400; CHECK-SD-NEXT: udiv w15, w14, w13 1401; CHECK-SD-NEXT: msub w8, w12, w10, w11 1402; CHECK-SD-NEXT: umov w10, v1.b[14] 1403; CHECK-SD-NEXT: umov w11, v0.b[14] 1404; CHECK-SD-NEXT: mov v2.b[10], w8 1405; CHECK-SD-NEXT: udiv w18, w17, w16 1406; CHECK-SD-NEXT: msub w8, w15, w13, w14 1407; CHECK-SD-NEXT: umov w13, v1.b[15] 1408; CHECK-SD-NEXT: umov w14, v0.b[15] 1409; CHECK-SD-NEXT: mov v2.b[11], w8 1410; CHECK-SD-NEXT: udiv w9, w1, w0 1411; CHECK-SD-NEXT: msub w8, w18, w16, w17 1412; CHECK-SD-NEXT: mov v2.b[12], w8 1413; CHECK-SD-NEXT: udiv w12, w11, w10 1414; CHECK-SD-NEXT: msub w8, w9, w0, w1 1415; CHECK-SD-NEXT: mov v2.b[13], w8 1416; CHECK-SD-NEXT: udiv w9, w14, w13 1417; CHECK-SD-NEXT: msub w8, w12, w10, w11 1418; CHECK-SD-NEXT: mov v2.b[14], w8 1419; CHECK-SD-NEXT: msub w8, w9, w13, w14 1420; CHECK-SD-NEXT: mov v2.b[15], w8 1421; CHECK-SD-NEXT: mov v0.16b, v2.16b 1422; CHECK-SD-NEXT: ldp x28, x27, [sp], #80 // 16-byte Folded Reload 1423; CHECK-SD-NEXT: ret 1424; 1425; CHECK-GI-LABEL: uv16i8: 1426; CHECK-GI: // %bb.0: // %entry 1427; CHECK-GI-NEXT: ushll v4.8h, v0.8b, #0 1428; CHECK-GI-NEXT: ushll v5.8h, v1.8b, #0 1429; CHECK-GI-NEXT: ushll2 v6.8h, v0.16b, #0 1430; CHECK-GI-NEXT: ushll2 v7.8h, v1.16b, #0 1431; CHECK-GI-NEXT: ushll v2.4s, v4.4h, #0 1432; CHECK-GI-NEXT: ushll v3.4s, v5.4h, #0 1433; CHECK-GI-NEXT: ushll2 v4.4s, v4.8h, #0 1434; CHECK-GI-NEXT: ushll2 v5.4s, v5.8h, #0 1435; CHECK-GI-NEXT: ushll v0.4s, v6.4h, #0 1436; CHECK-GI-NEXT: ushll v1.4s, v7.4h, #0 1437; CHECK-GI-NEXT: ushll2 v6.4s, v6.8h, #0 1438; CHECK-GI-NEXT: ushll2 v7.4s, v7.8h, #0 1439; CHECK-GI-NEXT: fmov w8, s2 1440; CHECK-GI-NEXT: fmov w9, s3 1441; CHECK-GI-NEXT: mov w12, v3.s[3] 1442; CHECK-GI-NEXT: fmov w13, s5 1443; CHECK-GI-NEXT: mov w16, v5.s[3] 1444; CHECK-GI-NEXT: fmov w17, s1 1445; CHECK-GI-NEXT: mov w18, v1.s[1] 1446; CHECK-GI-NEXT: mov w0, v1.s[2] 1447; CHECK-GI-NEXT: mov w1, v1.s[3] 1448; CHECK-GI-NEXT: udiv w11, w8, w9 1449; CHECK-GI-NEXT: mov w8, v2.s[1] 1450; CHECK-GI-NEXT: mov w9, v3.s[1] 1451; CHECK-GI-NEXT: fmov w2, s7 1452; CHECK-GI-NEXT: mov w3, v7.s[1] 1453; CHECK-GI-NEXT: mov w4, v7.s[2] 1454; CHECK-GI-NEXT: udiv w10, w8, w9 1455; CHECK-GI-NEXT: mov w8, v2.s[2] 1456; CHECK-GI-NEXT: mov w9, v3.s[2] 1457; CHECK-GI-NEXT: mov v16.s[0], w11 1458; CHECK-GI-NEXT: mov w11, v6.s[3] 1459; CHECK-GI-NEXT: udiv w9, w8, w9 1460; CHECK-GI-NEXT: mov w8, v2.s[3] 1461; CHECK-GI-NEXT: mov v16.s[1], w10 1462; CHECK-GI-NEXT: udiv w8, w8, w12 1463; CHECK-GI-NEXT: fmov w12, s4 1464; CHECK-GI-NEXT: mov v16.s[2], w9 1465; CHECK-GI-NEXT: udiv w14, w12, w13 1466; CHECK-GI-NEXT: mov w12, v4.s[1] 1467; CHECK-GI-NEXT: mov w13, v5.s[1] 1468; CHECK-GI-NEXT: mov v16.s[3], w8 1469; CHECK-GI-NEXT: mls v2.4s, v16.4s, v3.4s 1470; CHECK-GI-NEXT: udiv w15, w12, w13 1471; CHECK-GI-NEXT: mov w12, v4.s[2] 1472; CHECK-GI-NEXT: mov w13, v5.s[2] 1473; CHECK-GI-NEXT: mov v17.s[0], w14 1474; CHECK-GI-NEXT: mov w14, v7.s[3] 1475; CHECK-GI-NEXT: udiv w13, w12, w13 1476; CHECK-GI-NEXT: mov w12, v4.s[3] 1477; CHECK-GI-NEXT: mov v17.s[1], w15 1478; CHECK-GI-NEXT: udiv w12, w12, w16 1479; CHECK-GI-NEXT: fmov w16, s0 1480; CHECK-GI-NEXT: mov v17.s[2], w13 1481; CHECK-GI-NEXT: udiv w16, w16, w17 1482; CHECK-GI-NEXT: mov w17, v0.s[1] 1483; CHECK-GI-NEXT: mov v17.s[3], w12 1484; CHECK-GI-NEXT: mls v4.4s, v17.4s, v5.4s 1485; CHECK-GI-NEXT: udiv w17, w17, w18 1486; CHECK-GI-NEXT: mov w18, v0.s[2] 1487; CHECK-GI-NEXT: mov v18.s[0], w16 1488; CHECK-GI-NEXT: udiv w18, w18, w0 1489; CHECK-GI-NEXT: mov w0, v0.s[3] 1490; CHECK-GI-NEXT: mov v18.s[1], w17 1491; CHECK-GI-NEXT: udiv w0, w0, w1 1492; CHECK-GI-NEXT: fmov w1, s6 1493; CHECK-GI-NEXT: mov v18.s[2], w18 1494; CHECK-GI-NEXT: udiv w1, w1, w2 1495; CHECK-GI-NEXT: mov w2, v6.s[1] 1496; CHECK-GI-NEXT: mov v18.s[3], w0 1497; CHECK-GI-NEXT: mls v0.4s, v18.4s, v1.4s 1498; CHECK-GI-NEXT: uzp1 v1.8h, v2.8h, v4.8h 1499; CHECK-GI-NEXT: udiv w2, w2, w3 1500; CHECK-GI-NEXT: mov w3, v6.s[2] 1501; CHECK-GI-NEXT: mov v19.s[0], w1 1502; CHECK-GI-NEXT: udiv w3, w3, w4 1503; CHECK-GI-NEXT: mov v19.s[1], w2 1504; CHECK-GI-NEXT: udiv w10, w11, w14 1505; CHECK-GI-NEXT: mov v19.s[2], w3 1506; CHECK-GI-NEXT: mov v19.s[3], w10 1507; CHECK-GI-NEXT: mls v6.4s, v19.4s, v7.4s 1508; CHECK-GI-NEXT: uzp1 v0.8h, v0.8h, v6.8h 1509; CHECK-GI-NEXT: uzp1 v0.16b, v1.16b, v0.16b 1510; CHECK-GI-NEXT: ret 1511entry: 1512 %s = urem <16 x i8> %d, %e 1513 ret <16 x i8> %s 1514} 1515 1516define <32 x i8> @uv32i8(<32 x i8> %d, <32 x i8> %e) { 1517; CHECK-SD-LABEL: uv32i8: 1518; CHECK-SD: // %bb.0: // %entry 1519; CHECK-SD-NEXT: sub sp, sp, #304 1520; CHECK-SD-NEXT: stp x29, x30, [sp, #208] // 16-byte Folded Spill 1521; CHECK-SD-NEXT: stp x28, x27, [sp, #224] // 16-byte Folded Spill 1522; CHECK-SD-NEXT: stp x26, x25, [sp, #240] // 16-byte Folded Spill 1523; CHECK-SD-NEXT: stp x24, x23, [sp, #256] // 16-byte Folded Spill 1524; CHECK-SD-NEXT: stp x22, x21, [sp, #272] // 16-byte Folded Spill 1525; CHECK-SD-NEXT: stp x20, x19, [sp, #288] // 16-byte Folded Spill 1526; CHECK-SD-NEXT: .cfi_def_cfa_offset 304 1527; CHECK-SD-NEXT: .cfi_offset w19, -8 1528; CHECK-SD-NEXT: .cfi_offset w20, -16 1529; CHECK-SD-NEXT: .cfi_offset w21, -24 1530; CHECK-SD-NEXT: .cfi_offset w22, -32 1531; CHECK-SD-NEXT: .cfi_offset w23, -40 1532; CHECK-SD-NEXT: .cfi_offset w24, -48 1533; CHECK-SD-NEXT: .cfi_offset w25, -56 1534; CHECK-SD-NEXT: .cfi_offset w26, -64 1535; CHECK-SD-NEXT: .cfi_offset w27, -72 1536; CHECK-SD-NEXT: .cfi_offset w28, -80 1537; CHECK-SD-NEXT: .cfi_offset w30, -88 1538; CHECK-SD-NEXT: .cfi_offset w29, -96 1539; CHECK-SD-NEXT: umov w8, v2.b[1] 1540; CHECK-SD-NEXT: umov w9, v0.b[1] 1541; CHECK-SD-NEXT: umov w19, v3.b[7] 1542; CHECK-SD-NEXT: umov w7, v1.b[7] 1543; CHECK-SD-NEXT: umov w6, v3.b[8] 1544; CHECK-SD-NEXT: umov w3, v1.b[8] 1545; CHECK-SD-NEXT: umov w13, v3.b[0] 1546; CHECK-SD-NEXT: umov w5, v3.b[1] 1547; CHECK-SD-NEXT: umov w0, v1.b[1] 1548; CHECK-SD-NEXT: umov w12, v3.b[2] 1549; CHECK-SD-NEXT: umov w17, v3.b[3] 1550; CHECK-SD-NEXT: umov w16, v1.b[3] 1551; CHECK-SD-NEXT: str w8, [sp, #80] // 4-byte Folded Spill 1552; CHECK-SD-NEXT: udiv w10, w9, w8 1553; CHECK-SD-NEXT: umov w8, v2.b[0] 1554; CHECK-SD-NEXT: str w9, [sp, #88] // 4-byte Folded Spill 1555; CHECK-SD-NEXT: umov w9, v0.b[0] 1556; CHECK-SD-NEXT: ldr w30, [sp, #80] // 4-byte Folded Reload 1557; CHECK-SD-NEXT: umov w15, v3.b[4] 1558; CHECK-SD-NEXT: umov w14, v1.b[4] 1559; CHECK-SD-NEXT: umov w4, v3.b[5] 1560; CHECK-SD-NEXT: umov w1, v1.b[5] 1561; CHECK-SD-NEXT: umov w2, v3.b[6] 1562; CHECK-SD-NEXT: umov w18, v1.b[6] 1563; CHECK-SD-NEXT: str w8, [sp, #32] // 4-byte Folded Spill 1564; CHECK-SD-NEXT: umov w21, v3.b[9] 1565; CHECK-SD-NEXT: umov w20, v1.b[9] 1566; CHECK-SD-NEXT: str w9, [sp, #40] // 4-byte Folded Spill 1567; CHECK-SD-NEXT: ldr w29, [sp, #32] // 4-byte Folded Reload 1568; CHECK-SD-NEXT: udiv w11, w9, w8 1569; CHECK-SD-NEXT: umov w8, v2.b[2] 1570; CHECK-SD-NEXT: umov w9, v0.b[2] 1571; CHECK-SD-NEXT: str w10, [sp, #96] // 4-byte Folded Spill 1572; CHECK-SD-NEXT: stp w8, w9, [sp, #16] // 8-byte Folded Spill 1573; CHECK-SD-NEXT: udiv w10, w9, w8 1574; CHECK-SD-NEXT: umov w8, v2.b[3] 1575; CHECK-SD-NEXT: umov w9, v0.b[3] 1576; CHECK-SD-NEXT: stp w11, w8, [sp, #48] // 8-byte Folded Spill 1577; CHECK-SD-NEXT: str w10, [sp, #24] // 4-byte Folded Spill 1578; CHECK-SD-NEXT: udiv w10, w9, w8 1579; CHECK-SD-NEXT: umov w8, v2.b[4] 1580; CHECK-SD-NEXT: str w8, [sp, #28] // 4-byte Folded Spill 1581; CHECK-SD-NEXT: stp w9, w10, [sp, #56] // 8-byte Folded Spill 1582; CHECK-SD-NEXT: umov w9, v0.b[4] 1583; CHECK-SD-NEXT: udiv w27, w0, w5 1584; CHECK-SD-NEXT: str w9, [sp, #36] // 4-byte Folded Spill 1585; CHECK-SD-NEXT: udiv w10, w9, w8 1586; CHECK-SD-NEXT: umov w8, v2.b[5] 1587; CHECK-SD-NEXT: umov w9, v0.b[5] 1588; CHECK-SD-NEXT: str w8, [sp, #76] // 4-byte Folded Spill 1589; CHECK-SD-NEXT: str w9, [sp, #84] // 4-byte Folded Spill 1590; CHECK-SD-NEXT: str w10, [sp, #44] // 4-byte Folded Spill 1591; CHECK-SD-NEXT: udiv w10, w9, w8 1592; CHECK-SD-NEXT: umov w8, v2.b[6] 1593; CHECK-SD-NEXT: umov w9, v0.b[6] 1594; CHECK-SD-NEXT: stp w8, w9, [sp, #64] // 8-byte Folded Spill 1595; CHECK-SD-NEXT: str w10, [sp, #92] // 4-byte Folded Spill 1596; CHECK-SD-NEXT: udiv w10, w9, w8 1597; CHECK-SD-NEXT: umov w8, v2.b[7] 1598; CHECK-SD-NEXT: umov w9, v0.b[7] 1599; CHECK-SD-NEXT: stp w8, w9, [sp, #112] // 8-byte Folded Spill 1600; CHECK-SD-NEXT: udiv w11, w9, w8 1601; CHECK-SD-NEXT: umov w8, v2.b[8] 1602; CHECK-SD-NEXT: umov w9, v0.b[8] 1603; CHECK-SD-NEXT: str w10, [sp, #72] // 4-byte Folded Spill 1604; CHECK-SD-NEXT: stp w8, w9, [sp, #100] // 8-byte Folded Spill 1605; CHECK-SD-NEXT: udiv w10, w9, w8 1606; CHECK-SD-NEXT: umov w8, v2.b[9] 1607; CHECK-SD-NEXT: umov w9, v0.b[9] 1608; CHECK-SD-NEXT: stp w8, w9, [sp, #136] // 8-byte Folded Spill 1609; CHECK-SD-NEXT: str w10, [sp, #108] // 4-byte Folded Spill 1610; CHECK-SD-NEXT: udiv w10, w9, w8 1611; CHECK-SD-NEXT: umov w8, v2.b[10] 1612; CHECK-SD-NEXT: umov w9, v0.b[10] 1613; CHECK-SD-NEXT: stp w11, w8, [sp, #120] // 8-byte Folded Spill 1614; CHECK-SD-NEXT: str w10, [sp, #144] // 4-byte Folded Spill 1615; CHECK-SD-NEXT: udiv w10, w9, w8 1616; CHECK-SD-NEXT: umov w8, v2.b[11] 1617; CHECK-SD-NEXT: stp w9, w10, [sp, #128] // 8-byte Folded Spill 1618; CHECK-SD-NEXT: umov w9, v0.b[11] 1619; CHECK-SD-NEXT: udiv w25, w16, w17 1620; CHECK-SD-NEXT: stp w8, w9, [sp, #172] // 8-byte Folded Spill 1621; CHECK-SD-NEXT: udiv w11, w9, w8 1622; CHECK-SD-NEXT: umov w8, v2.b[12] 1623; CHECK-SD-NEXT: umov w9, v0.b[12] 1624; CHECK-SD-NEXT: str w8, [sp, #152] // 4-byte Folded Spill 1625; CHECK-SD-NEXT: str w9, [sp, #160] // 4-byte Folded Spill 1626; CHECK-SD-NEXT: udiv w10, w9, w8 1627; CHECK-SD-NEXT: umov w8, v2.b[13] 1628; CHECK-SD-NEXT: umov w9, v0.b[13] 1629; CHECK-SD-NEXT: stp w8, w9, [sp, #196] // 8-byte Folded Spill 1630; CHECK-SD-NEXT: str w10, [sp, #168] // 4-byte Folded Spill 1631; CHECK-SD-NEXT: udiv w10, w9, w8 1632; CHECK-SD-NEXT: umov w8, v2.b[14] 1633; CHECK-SD-NEXT: umov w9, v0.b[14] 1634; CHECK-SD-NEXT: stp w11, w8, [sp, #180] // 8-byte Folded Spill 1635; CHECK-SD-NEXT: umov w11, v1.b[2] 1636; CHECK-SD-NEXT: str w10, [sp, #204] // 4-byte Folded Spill 1637; CHECK-SD-NEXT: udiv w10, w9, w8 1638; CHECK-SD-NEXT: umov w8, v2.b[15] 1639; CHECK-SD-NEXT: str w8, [sp, #148] // 4-byte Folded Spill 1640; CHECK-SD-NEXT: stp w9, w10, [sp, #188] // 8-byte Folded Spill 1641; CHECK-SD-NEXT: umov w9, v0.b[15] 1642; CHECK-SD-NEXT: udiv w22, w11, w12 1643; CHECK-SD-NEXT: str w9, [sp, #156] // 4-byte Folded Spill 1644; CHECK-SD-NEXT: udiv w10, w9, w8 1645; CHECK-SD-NEXT: str w10, [sp, #164] // 4-byte Folded Spill 1646; CHECK-SD-NEXT: umov w10, v1.b[0] 1647; CHECK-SD-NEXT: udiv w9, w7, w19 1648; CHECK-SD-NEXT: udiv w8, w3, w6 1649; CHECK-SD-NEXT: udiv w23, w10, w13 1650; CHECK-SD-NEXT: stp w8, w9, [sp, #8] // 8-byte Folded Spill 1651; CHECK-SD-NEXT: ldr w8, [sp, #96] // 4-byte Folded Reload 1652; CHECK-SD-NEXT: ldr w9, [sp, #88] // 4-byte Folded Reload 1653; CHECK-SD-NEXT: msub w9, w8, w30, w9 1654; CHECK-SD-NEXT: ldr w8, [sp, #48] // 4-byte Folded Reload 1655; CHECK-SD-NEXT: ldr w30, [sp, #40] // 4-byte Folded Reload 1656; CHECK-SD-NEXT: msub w8, w8, w29, w30 1657; CHECK-SD-NEXT: ldp x29, x30, [sp, #208] // 16-byte Folded Reload 1658; CHECK-SD-NEXT: fmov s0, w8 1659; CHECK-SD-NEXT: msub w10, w23, w13, w10 1660; CHECK-SD-NEXT: udiv w24, w14, w15 1661; CHECK-SD-NEXT: msub w13, w27, w5, w0 1662; CHECK-SD-NEXT: ldr w5, [sp, #16] // 4-byte Folded Reload 1663; CHECK-SD-NEXT: mov v0.b[1], w9 1664; CHECK-SD-NEXT: msub w9, w22, w12, w11 1665; CHECK-SD-NEXT: umov w11, v1.b[10] 1666; CHECK-SD-NEXT: fmov s2, w10 1667; CHECK-SD-NEXT: ldp w10, w8, [sp, #20] // 8-byte Folded Reload 1668; CHECK-SD-NEXT: mov v2.b[1], w13 1669; CHECK-SD-NEXT: msub w8, w8, w5, w10 1670; CHECK-SD-NEXT: ldr w5, [sp, #52] // 4-byte Folded Reload 1671; CHECK-SD-NEXT: umov w10, v3.b[10] 1672; CHECK-SD-NEXT: udiv w28, w1, w4 1673; CHECK-SD-NEXT: ldp w13, w12, [sp, #56] // 8-byte Folded Reload 1674; CHECK-SD-NEXT: mov v2.b[2], w9 1675; CHECK-SD-NEXT: mov v0.b[2], w8 1676; CHECK-SD-NEXT: msub w8, w25, w17, w16 1677; CHECK-SD-NEXT: ldr w17, [sp, #28] // 4-byte Folded Reload 1678; CHECK-SD-NEXT: ldr w16, [sp, #36] // 4-byte Folded Reload 1679; CHECK-SD-NEXT: msub w12, w12, w5, w13 1680; CHECK-SD-NEXT: ldr w13, [sp, #44] // 4-byte Folded Reload 1681; CHECK-SD-NEXT: ldr w5, [sp, #136] // 4-byte Folded Reload 1682; CHECK-SD-NEXT: mov v2.b[3], w8 1683; CHECK-SD-NEXT: msub w8, w24, w15, w14 1684; CHECK-SD-NEXT: ldr w15, [sp, #92] // 4-byte Folded Reload 1685; CHECK-SD-NEXT: mov v0.b[3], w12 1686; CHECK-SD-NEXT: msub w13, w13, w17, w16 1687; CHECK-SD-NEXT: ldr w17, [sp, #76] // 4-byte Folded Reload 1688; CHECK-SD-NEXT: udiv w26, w18, w2 1689; CHECK-SD-NEXT: ldr w16, [sp, #84] // 4-byte Folded Reload 1690; CHECK-SD-NEXT: umov w12, v3.b[11] 1691; CHECK-SD-NEXT: msub w15, w15, w17, w16 1692; CHECK-SD-NEXT: umov w14, v1.b[11] 1693; CHECK-SD-NEXT: mov v2.b[4], w8 1694; CHECK-SD-NEXT: msub w8, w28, w4, w1 1695; CHECK-SD-NEXT: ldr w1, [sp, #64] // 4-byte Folded Reload 1696; CHECK-SD-NEXT: mov v0.b[4], w13 1697; CHECK-SD-NEXT: ldr w4, [sp, #100] // 4-byte Folded Reload 1698; CHECK-SD-NEXT: ldp w17, w16, [sp, #68] // 8-byte Folded Reload 1699; CHECK-SD-NEXT: ldp x24, x23, [sp, #256] // 16-byte Folded Reload 1700; CHECK-SD-NEXT: mov v2.b[5], w8 1701; CHECK-SD-NEXT: ldp x28, x27, [sp, #224] // 16-byte Folded Reload 1702; CHECK-SD-NEXT: mov v0.b[5], w15 1703; CHECK-SD-NEXT: msub w16, w16, w1, w17 1704; CHECK-SD-NEXT: umov w15, v3.b[12] 1705; CHECK-SD-NEXT: msub w8, w26, w2, w18 1706; CHECK-SD-NEXT: ldr w2, [sp, #112] // 4-byte Folded Reload 1707; CHECK-SD-NEXT: udiv w0, w20, w21 1708; CHECK-SD-NEXT: ldp w1, w18, [sp, #116] // 8-byte Folded Reload 1709; CHECK-SD-NEXT: umov w17, v1.b[12] 1710; CHECK-SD-NEXT: ldp x26, x25, [sp, #240] // 16-byte Folded Reload 1711; CHECK-SD-NEXT: mov v2.b[6], w8 1712; CHECK-SD-NEXT: ldr w8, [sp, #12] // 4-byte Folded Reload 1713; CHECK-SD-NEXT: mov v0.b[6], w16 1714; CHECK-SD-NEXT: msub w18, w18, w2, w1 1715; CHECK-SD-NEXT: msub w8, w8, w19, w7 1716; CHECK-SD-NEXT: ldp w2, w1, [sp, #104] // 8-byte Folded Reload 1717; CHECK-SD-NEXT: mov v0.b[7], w18 1718; CHECK-SD-NEXT: umov w18, v3.b[13] 1719; CHECK-SD-NEXT: mov v2.b[7], w8 1720; CHECK-SD-NEXT: ldr w8, [sp, #8] // 4-byte Folded Reload 1721; CHECK-SD-NEXT: udiv w9, w11, w10 1722; CHECK-SD-NEXT: msub w1, w1, w4, w2 1723; CHECK-SD-NEXT: umov w2, v1.b[13] 1724; CHECK-SD-NEXT: msub w8, w8, w6, w3 1725; CHECK-SD-NEXT: ldp w4, w3, [sp, #140] // 8-byte Folded Reload 1726; CHECK-SD-NEXT: mov v0.b[8], w1 1727; CHECK-SD-NEXT: mov v2.b[8], w8 1728; CHECK-SD-NEXT: msub w8, w0, w21, w20 1729; CHECK-SD-NEXT: msub w3, w3, w5, w4 1730; CHECK-SD-NEXT: ldr w5, [sp, #124] // 4-byte Folded Reload 1731; CHECK-SD-NEXT: ldp w4, w1, [sp, #128] // 8-byte Folded Reload 1732; CHECK-SD-NEXT: udiv w13, w14, w12 1733; CHECK-SD-NEXT: ldp x20, x19, [sp, #288] // 16-byte Folded Reload 1734; CHECK-SD-NEXT: mov v2.b[9], w8 1735; CHECK-SD-NEXT: mov v0.b[9], w3 1736; CHECK-SD-NEXT: msub w8, w9, w10, w11 1737; CHECK-SD-NEXT: msub w1, w1, w5, w4 1738; CHECK-SD-NEXT: ldr w4, [sp, #172] // 4-byte Folded Reload 1739; CHECK-SD-NEXT: umov w9, v3.b[14] 1740; CHECK-SD-NEXT: ldp w3, w11, [sp, #176] // 8-byte Folded Reload 1741; CHECK-SD-NEXT: umov w10, v1.b[14] 1742; CHECK-SD-NEXT: ldp x22, x21, [sp, #272] // 16-byte Folded Reload 1743; CHECK-SD-NEXT: mov v2.b[10], w8 1744; CHECK-SD-NEXT: mov v0.b[10], w1 1745; CHECK-SD-NEXT: ldr w1, [sp, #152] // 4-byte Folded Reload 1746; CHECK-SD-NEXT: msub w11, w11, w4, w3 1747; CHECK-SD-NEXT: udiv w16, w17, w15 1748; CHECK-SD-NEXT: msub w8, w13, w12, w14 1749; CHECK-SD-NEXT: ldr w13, [sp, #168] // 4-byte Folded Reload 1750; CHECK-SD-NEXT: ldr w14, [sp, #160] // 4-byte Folded Reload 1751; CHECK-SD-NEXT: mov v0.b[11], w11 1752; CHECK-SD-NEXT: umov w11, v3.b[15] 1753; CHECK-SD-NEXT: msub w13, w13, w1, w14 1754; CHECK-SD-NEXT: umov w14, v1.b[15] 1755; CHECK-SD-NEXT: mov v2.b[11], w8 1756; CHECK-SD-NEXT: mov v0.b[12], w13 1757; CHECK-SD-NEXT: udiv w0, w2, w18 1758; CHECK-SD-NEXT: msub w8, w16, w15, w17 1759; CHECK-SD-NEXT: ldr w17, [sp, #196] // 4-byte Folded Reload 1760; CHECK-SD-NEXT: ldp w16, w15, [sp, #200] // 8-byte Folded Reload 1761; CHECK-SD-NEXT: mov v2.b[12], w8 1762; CHECK-SD-NEXT: msub w15, w15, w17, w16 1763; CHECK-SD-NEXT: ldp w17, w16, [sp, #188] // 8-byte Folded Reload 1764; CHECK-SD-NEXT: mov v0.b[13], w15 1765; CHECK-SD-NEXT: udiv w12, w10, w9 1766; CHECK-SD-NEXT: msub w8, w0, w18, w2 1767; CHECK-SD-NEXT: ldr w18, [sp, #184] // 4-byte Folded Reload 1768; CHECK-SD-NEXT: msub w16, w16, w18, w17 1769; CHECK-SD-NEXT: mov v2.b[13], w8 1770; CHECK-SD-NEXT: mov v0.b[14], w16 1771; CHECK-SD-NEXT: udiv w13, w14, w11 1772; CHECK-SD-NEXT: msub w8, w12, w9, w10 1773; CHECK-SD-NEXT: ldr w9, [sp, #164] // 4-byte Folded Reload 1774; CHECK-SD-NEXT: ldr w12, [sp, #148] // 4-byte Folded Reload 1775; CHECK-SD-NEXT: ldr w10, [sp, #156] // 4-byte Folded Reload 1776; CHECK-SD-NEXT: mov v2.b[14], w8 1777; CHECK-SD-NEXT: msub w9, w9, w12, w10 1778; CHECK-SD-NEXT: mov v0.b[15], w9 1779; CHECK-SD-NEXT: msub w8, w13, w11, w14 1780; CHECK-SD-NEXT: mov v2.b[15], w8 1781; CHECK-SD-NEXT: mov v1.16b, v2.16b 1782; CHECK-SD-NEXT: add sp, sp, #304 1783; CHECK-SD-NEXT: ret 1784; 1785; CHECK-GI-LABEL: uv32i8: 1786; CHECK-GI: // %bb.0: // %entry 1787; CHECK-GI-NEXT: stp x29, x30, [sp, #-96]! // 16-byte Folded Spill 1788; CHECK-GI-NEXT: stp x28, x27, [sp, #16] // 16-byte Folded Spill 1789; CHECK-GI-NEXT: stp x26, x25, [sp, #32] // 16-byte Folded Spill 1790; CHECK-GI-NEXT: stp x24, x23, [sp, #48] // 16-byte Folded Spill 1791; CHECK-GI-NEXT: stp x22, x21, [sp, #64] // 16-byte Folded Spill 1792; CHECK-GI-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill 1793; CHECK-GI-NEXT: .cfi_def_cfa_offset 96 1794; CHECK-GI-NEXT: .cfi_offset w19, -8 1795; CHECK-GI-NEXT: .cfi_offset w20, -16 1796; CHECK-GI-NEXT: .cfi_offset w21, -24 1797; CHECK-GI-NEXT: .cfi_offset w22, -32 1798; CHECK-GI-NEXT: .cfi_offset w23, -40 1799; CHECK-GI-NEXT: .cfi_offset w24, -48 1800; CHECK-GI-NEXT: .cfi_offset w25, -56 1801; CHECK-GI-NEXT: .cfi_offset w26, -64 1802; CHECK-GI-NEXT: .cfi_offset w27, -72 1803; CHECK-GI-NEXT: .cfi_offset w28, -80 1804; CHECK-GI-NEXT: .cfi_offset w30, -88 1805; CHECK-GI-NEXT: .cfi_offset w29, -96 1806; CHECK-GI-NEXT: ushll v4.8h, v0.8b, #0 1807; CHECK-GI-NEXT: ushll v5.8h, v2.8b, #0 1808; CHECK-GI-NEXT: ushll v16.8h, v1.8b, #0 1809; CHECK-GI-NEXT: ushll v17.8h, v3.8b, #0 1810; CHECK-GI-NEXT: ushll v6.4s, v4.4h, #0 1811; CHECK-GI-NEXT: ushll v7.4s, v5.4h, #0 1812; CHECK-GI-NEXT: ushll2 v4.4s, v4.8h, #0 1813; CHECK-GI-NEXT: ushll2 v5.4s, v5.8h, #0 1814; CHECK-GI-NEXT: ushll v18.4s, v16.4h, #0 1815; CHECK-GI-NEXT: ushll v19.4s, v17.4h, #0 1816; CHECK-GI-NEXT: ushll2 v16.4s, v16.8h, #0 1817; CHECK-GI-NEXT: ushll2 v17.4s, v17.8h, #0 1818; CHECK-GI-NEXT: fmov w8, s6 1819; CHECK-GI-NEXT: fmov w9, s7 1820; CHECK-GI-NEXT: mov w12, v7.s[3] 1821; CHECK-GI-NEXT: fmov w13, s5 1822; CHECK-GI-NEXT: mov w16, v5.s[3] 1823; CHECK-GI-NEXT: fmov w6, s19 1824; CHECK-GI-NEXT: mov w7, v19.s[3] 1825; CHECK-GI-NEXT: fmov w21, s17 1826; CHECK-GI-NEXT: mov w23, v17.s[3] 1827; CHECK-GI-NEXT: udiv w11, w8, w9 1828; CHECK-GI-NEXT: mov w8, v6.s[1] 1829; CHECK-GI-NEXT: mov w9, v7.s[1] 1830; CHECK-GI-NEXT: udiv w10, w8, w9 1831; CHECK-GI-NEXT: mov w8, v6.s[2] 1832; CHECK-GI-NEXT: mov w9, v7.s[2] 1833; CHECK-GI-NEXT: mov v20.s[0], w11 1834; CHECK-GI-NEXT: udiv w9, w8, w9 1835; CHECK-GI-NEXT: mov w8, v6.s[3] 1836; CHECK-GI-NEXT: ushll2 v6.8h, v0.16b, #0 1837; CHECK-GI-NEXT: mov v20.s[1], w10 1838; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0 1839; CHECK-GI-NEXT: ushll v28.4s, v0.4h, #0 1840; CHECK-GI-NEXT: ushll2 v0.4s, v0.8h, #0 1841; CHECK-GI-NEXT: udiv w8, w8, w12 1842; CHECK-GI-NEXT: fmov w12, s4 1843; CHECK-GI-NEXT: mov v20.s[2], w9 1844; CHECK-GI-NEXT: udiv w15, w12, w13 1845; CHECK-GI-NEXT: mov w12, v4.s[1] 1846; CHECK-GI-NEXT: mov w13, v5.s[1] 1847; CHECK-GI-NEXT: mov v20.s[3], w8 1848; CHECK-GI-NEXT: udiv w14, w12, w13 1849; CHECK-GI-NEXT: mov w12, v4.s[2] 1850; CHECK-GI-NEXT: mov w13, v5.s[2] 1851; CHECK-GI-NEXT: ushll v5.4s, v6.4h, #0 1852; CHECK-GI-NEXT: mov v21.s[0], w15 1853; CHECK-GI-NEXT: udiv w13, w12, w13 1854; CHECK-GI-NEXT: mov w12, v4.s[3] 1855; CHECK-GI-NEXT: ushll2 v4.8h, v2.16b, #0 1856; CHECK-GI-NEXT: mov v21.s[1], w14 1857; CHECK-GI-NEXT: ushll v2.8h, v2.8b, #0 1858; CHECK-GI-NEXT: ushll v7.4s, v4.4h, #0 1859; CHECK-GI-NEXT: ushll v30.4s, v2.4h, #0 1860; CHECK-GI-NEXT: ushll2 v2.4s, v2.8h, #0 1861; CHECK-GI-NEXT: fmov w17, s7 1862; CHECK-GI-NEXT: mls v28.4s, v20.4s, v30.4s 1863; CHECK-GI-NEXT: udiv w12, w12, w16 1864; CHECK-GI-NEXT: fmov w16, s5 1865; CHECK-GI-NEXT: mov v21.s[2], w13 1866; CHECK-GI-NEXT: udiv w1, w16, w17 1867; CHECK-GI-NEXT: mov w16, v5.s[1] 1868; CHECK-GI-NEXT: mov w17, v7.s[1] 1869; CHECK-GI-NEXT: mov v21.s[3], w12 1870; CHECK-GI-NEXT: mls v0.4s, v21.4s, v2.4s 1871; CHECK-GI-NEXT: udiv w0, w16, w17 1872; CHECK-GI-NEXT: mov w16, v5.s[2] 1873; CHECK-GI-NEXT: mov w17, v7.s[2] 1874; CHECK-GI-NEXT: mov v22.s[0], w1 1875; CHECK-GI-NEXT: uzp1 v0.8h, v28.8h, v0.8h 1876; CHECK-GI-NEXT: udiv w18, w16, w17 1877; CHECK-GI-NEXT: mov w16, v5.s[3] 1878; CHECK-GI-NEXT: mov w17, v7.s[3] 1879; CHECK-GI-NEXT: ushll2 v5.4s, v6.8h, #0 1880; CHECK-GI-NEXT: ushll2 v7.4s, v4.8h, #0 1881; CHECK-GI-NEXT: mov v22.s[1], w0 1882; CHECK-GI-NEXT: ushll v6.4s, v6.4h, #0 1883; CHECK-GI-NEXT: ushll v4.4s, v4.4h, #0 1884; CHECK-GI-NEXT: fmov w2, s7 1885; CHECK-GI-NEXT: mov w4, v7.s[3] 1886; CHECK-GI-NEXT: udiv w16, w16, w17 1887; CHECK-GI-NEXT: fmov w17, s5 1888; CHECK-GI-NEXT: mov v22.s[2], w18 1889; CHECK-GI-NEXT: udiv w5, w17, w2 1890; CHECK-GI-NEXT: mov w17, v5.s[1] 1891; CHECK-GI-NEXT: mov w2, v7.s[1] 1892; CHECK-GI-NEXT: mov v22.s[3], w16 1893; CHECK-GI-NEXT: mls v6.4s, v22.4s, v4.4s 1894; CHECK-GI-NEXT: udiv w3, w17, w2 1895; CHECK-GI-NEXT: mov w17, v5.s[2] 1896; CHECK-GI-NEXT: mov w2, v7.s[2] 1897; CHECK-GI-NEXT: mov v23.s[0], w5 1898; CHECK-GI-NEXT: udiv w2, w17, w2 1899; CHECK-GI-NEXT: mov w17, v5.s[3] 1900; CHECK-GI-NEXT: mov v23.s[1], w3 1901; CHECK-GI-NEXT: udiv w17, w17, w4 1902; CHECK-GI-NEXT: fmov w4, s18 1903; CHECK-GI-NEXT: mov v23.s[2], w2 1904; CHECK-GI-NEXT: udiv w20, w4, w6 1905; CHECK-GI-NEXT: mov w4, v18.s[1] 1906; CHECK-GI-NEXT: mov w6, v19.s[1] 1907; CHECK-GI-NEXT: mov v23.s[3], w17 1908; CHECK-GI-NEXT: mls v5.4s, v23.4s, v7.4s 1909; CHECK-GI-NEXT: udiv w19, w4, w6 1910; CHECK-GI-NEXT: mov w4, v18.s[2] 1911; CHECK-GI-NEXT: mov w6, v19.s[2] 1912; CHECK-GI-NEXT: mov v24.s[0], w20 1913; CHECK-GI-NEXT: uzp1 v2.8h, v6.8h, v5.8h 1914; CHECK-GI-NEXT: uzp1 v0.16b, v0.16b, v2.16b 1915; CHECK-GI-NEXT: udiv w6, w4, w6 1916; CHECK-GI-NEXT: mov w4, v18.s[3] 1917; CHECK-GI-NEXT: mov v24.s[1], w19 1918; CHECK-GI-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload 1919; CHECK-GI-NEXT: udiv w4, w4, w7 1920; CHECK-GI-NEXT: fmov w7, s16 1921; CHECK-GI-NEXT: mov v24.s[2], w6 1922; CHECK-GI-NEXT: udiv w24, w7, w21 1923; CHECK-GI-NEXT: mov w7, v16.s[1] 1924; CHECK-GI-NEXT: mov w21, v17.s[1] 1925; CHECK-GI-NEXT: mov v24.s[3], w4 1926; CHECK-GI-NEXT: udiv w22, w7, w21 1927; CHECK-GI-NEXT: mov w7, v16.s[2] 1928; CHECK-GI-NEXT: mov w21, v17.s[2] 1929; CHECK-GI-NEXT: ushll2 v17.8h, v1.16b, #0 1930; CHECK-GI-NEXT: mov v25.s[0], w24 1931; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0 1932; CHECK-GI-NEXT: ushll v18.4s, v17.4h, #0 1933; CHECK-GI-NEXT: ushll v29.4s, v1.4h, #0 1934; CHECK-GI-NEXT: ushll2 v1.4s, v1.8h, #0 1935; CHECK-GI-NEXT: udiv w21, w7, w21 1936; CHECK-GI-NEXT: mov w7, v16.s[3] 1937; CHECK-GI-NEXT: ushll2 v16.8h, v3.16b, #0 1938; CHECK-GI-NEXT: mov v25.s[1], w22 1939; CHECK-GI-NEXT: ushll v3.8h, v3.8b, #0 1940; CHECK-GI-NEXT: ushll v19.4s, v16.4h, #0 1941; CHECK-GI-NEXT: ushll v31.4s, v3.4h, #0 1942; CHECK-GI-NEXT: ushll2 v3.4s, v3.8h, #0 1943; CHECK-GI-NEXT: fmov w25, s19 1944; CHECK-GI-NEXT: mov w26, v19.s[1] 1945; CHECK-GI-NEXT: mov w27, v19.s[2] 1946; CHECK-GI-NEXT: mov w28, v19.s[3] 1947; CHECK-GI-NEXT: ushll2 v19.4s, v16.8h, #0 1948; CHECK-GI-NEXT: ushll v16.4s, v16.4h, #0 1949; CHECK-GI-NEXT: udiv w7, w7, w23 1950; CHECK-GI-NEXT: fmov w23, s18 1951; CHECK-GI-NEXT: mov v25.s[2], w21 1952; CHECK-GI-NEXT: mls v29.4s, v24.4s, v31.4s 1953; CHECK-GI-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload 1954; CHECK-GI-NEXT: fmov w29, s19 1955; CHECK-GI-NEXT: mov w30, v19.s[1] 1956; CHECK-GI-NEXT: mov w15, v19.s[2] 1957; CHECK-GI-NEXT: udiv w25, w23, w25 1958; CHECK-GI-NEXT: mov w23, v18.s[1] 1959; CHECK-GI-NEXT: mov v25.s[3], w7 1960; CHECK-GI-NEXT: mls v1.4s, v25.4s, v3.4s 1961; CHECK-GI-NEXT: udiv w26, w23, w26 1962; CHECK-GI-NEXT: mov w23, v18.s[2] 1963; CHECK-GI-NEXT: mov v26.s[0], w25 1964; CHECK-GI-NEXT: uzp1 v1.8h, v29.8h, v1.8h 1965; CHECK-GI-NEXT: udiv w27, w23, w27 1966; CHECK-GI-NEXT: mov w23, v18.s[3] 1967; CHECK-GI-NEXT: ushll2 v18.4s, v17.8h, #0 1968; CHECK-GI-NEXT: mov v26.s[1], w26 1969; CHECK-GI-NEXT: ushll v17.4s, v17.4h, #0 1970; CHECK-GI-NEXT: ldp x26, x25, [sp, #32] // 16-byte Folded Reload 1971; CHECK-GI-NEXT: mov w11, v18.s[2] 1972; CHECK-GI-NEXT: mov w9, v18.s[3] 1973; CHECK-GI-NEXT: udiv w23, w23, w28 1974; CHECK-GI-NEXT: fmov w28, s18 1975; CHECK-GI-NEXT: mov v26.s[2], w27 1976; CHECK-GI-NEXT: udiv w28, w28, w29 1977; CHECK-GI-NEXT: mov w29, v18.s[1] 1978; CHECK-GI-NEXT: mov v26.s[3], w23 1979; CHECK-GI-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload 1980; CHECK-GI-NEXT: mls v17.4s, v26.4s, v16.4s 1981; CHECK-GI-NEXT: udiv w29, w29, w30 1982; CHECK-GI-NEXT: mov v27.s[0], w28 1983; CHECK-GI-NEXT: ldp x28, x27, [sp, #16] // 16-byte Folded Reload 1984; CHECK-GI-NEXT: udiv w10, w11, w15 1985; CHECK-GI-NEXT: mov w11, v19.s[3] 1986; CHECK-GI-NEXT: mov v27.s[1], w29 1987; CHECK-GI-NEXT: udiv w8, w9, w11 1988; CHECK-GI-NEXT: mov v27.s[2], w10 1989; CHECK-GI-NEXT: mov v27.s[3], w8 1990; CHECK-GI-NEXT: mls v18.4s, v27.4s, v19.4s 1991; CHECK-GI-NEXT: uzp1 v3.8h, v17.8h, v18.8h 1992; CHECK-GI-NEXT: uzp1 v1.16b, v1.16b, v3.16b 1993; CHECK-GI-NEXT: ldp x29, x30, [sp], #96 // 16-byte Folded Reload 1994; CHECK-GI-NEXT: ret 1995entry: 1996 %s = urem <32 x i8> %d, %e 1997 ret <32 x i8> %s 1998} 1999 2000define <2 x i16> @sv2i16(<2 x i16> %d, <2 x i16> %e) { 2001; CHECK-SD-LABEL: sv2i16: 2002; CHECK-SD: // %bb.0: // %entry 2003; CHECK-SD-NEXT: shl v0.2s, v0.2s, #16 2004; CHECK-SD-NEXT: shl v1.2s, v1.2s, #16 2005; CHECK-SD-NEXT: sshr v0.2s, v0.2s, #16 2006; CHECK-SD-NEXT: sshr v1.2s, v1.2s, #16 2007; CHECK-SD-NEXT: fmov w8, s1 2008; CHECK-SD-NEXT: fmov w9, s0 2009; CHECK-SD-NEXT: mov w11, v1.s[1] 2010; CHECK-SD-NEXT: mov w12, v0.s[1] 2011; CHECK-SD-NEXT: sdiv w10, w9, w8 2012; CHECK-SD-NEXT: sdiv w13, w12, w11 2013; CHECK-SD-NEXT: msub w8, w10, w8, w9 2014; CHECK-SD-NEXT: fmov s0, w8 2015; CHECK-SD-NEXT: msub w9, w13, w11, w12 2016; CHECK-SD-NEXT: mov v0.s[1], w9 2017; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 2018; CHECK-SD-NEXT: ret 2019; 2020; CHECK-GI-LABEL: sv2i16: 2021; CHECK-GI: // %bb.0: // %entry 2022; CHECK-GI-NEXT: shl v0.2s, v0.2s, #16 2023; CHECK-GI-NEXT: shl v1.2s, v1.2s, #16 2024; CHECK-GI-NEXT: sshr v0.2s, v0.2s, #16 2025; CHECK-GI-NEXT: sshr v1.2s, v1.2s, #16 2026; CHECK-GI-NEXT: fmov w8, s0 2027; CHECK-GI-NEXT: fmov w9, s1 2028; CHECK-GI-NEXT: mov w10, v1.s[1] 2029; CHECK-GI-NEXT: sdiv w8, w8, w9 2030; CHECK-GI-NEXT: mov w9, v0.s[1] 2031; CHECK-GI-NEXT: sdiv w9, w9, w10 2032; CHECK-GI-NEXT: mov v2.s[0], w8 2033; CHECK-GI-NEXT: mov v2.s[1], w9 2034; CHECK-GI-NEXT: mls v0.2s, v2.2s, v1.2s 2035; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 2036; CHECK-GI-NEXT: ret 2037entry: 2038 %s = srem <2 x i16> %d, %e 2039 ret <2 x i16> %s 2040} 2041 2042define <3 x i16> @sv3i16(<3 x i16> %d, <3 x i16> %e) { 2043; CHECK-SD-LABEL: sv3i16: 2044; CHECK-SD: // %bb.0: // %entry 2045; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 2046; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 2047; CHECK-SD-NEXT: smov w11, v1.h[0] 2048; CHECK-SD-NEXT: smov w12, v0.h[0] 2049; CHECK-SD-NEXT: smov w8, v1.h[1] 2050; CHECK-SD-NEXT: smov w9, v0.h[1] 2051; CHECK-SD-NEXT: smov w14, v1.h[2] 2052; CHECK-SD-NEXT: smov w15, v0.h[2] 2053; CHECK-SD-NEXT: sdiv w13, w12, w11 2054; CHECK-SD-NEXT: sdiv w10, w9, w8 2055; CHECK-SD-NEXT: msub w11, w13, w11, w12 2056; CHECK-SD-NEXT: fmov s0, w11 2057; CHECK-SD-NEXT: sdiv w16, w15, w14 2058; CHECK-SD-NEXT: msub w8, w10, w8, w9 2059; CHECK-SD-NEXT: mov v0.h[1], w8 2060; CHECK-SD-NEXT: msub w8, w16, w14, w15 2061; CHECK-SD-NEXT: mov v0.h[2], w8 2062; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 2063; CHECK-SD-NEXT: ret 2064; 2065; CHECK-GI-LABEL: sv3i16: 2066; CHECK-GI: // %bb.0: // %entry 2067; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 2068; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 2069; CHECK-GI-NEXT: smov w8, v0.h[0] 2070; CHECK-GI-NEXT: smov w9, v1.h[0] 2071; CHECK-GI-NEXT: smov w11, v0.h[1] 2072; CHECK-GI-NEXT: smov w12, v1.h[1] 2073; CHECK-GI-NEXT: smov w14, v0.h[2] 2074; CHECK-GI-NEXT: smov w15, v1.h[2] 2075; CHECK-GI-NEXT: sdiv w10, w8, w9 2076; CHECK-GI-NEXT: sdiv w13, w11, w12 2077; CHECK-GI-NEXT: msub w8, w10, w9, w8 2078; CHECK-GI-NEXT: fmov s0, w8 2079; CHECK-GI-NEXT: sdiv w16, w14, w15 2080; CHECK-GI-NEXT: msub w9, w13, w12, w11 2081; CHECK-GI-NEXT: mov v0.h[1], w9 2082; CHECK-GI-NEXT: msub w8, w16, w15, w14 2083; CHECK-GI-NEXT: mov v0.h[2], w8 2084; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 2085; CHECK-GI-NEXT: ret 2086entry: 2087 %s = srem <3 x i16> %d, %e 2088 ret <3 x i16> %s 2089} 2090 2091define <4 x i16> @sv4i16(<4 x i16> %d, <4 x i16> %e) { 2092; CHECK-SD-LABEL: sv4i16: 2093; CHECK-SD: // %bb.0: // %entry 2094; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 2095; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 2096; CHECK-SD-NEXT: smov w11, v1.h[0] 2097; CHECK-SD-NEXT: smov w12, v0.h[0] 2098; CHECK-SD-NEXT: smov w8, v1.h[1] 2099; CHECK-SD-NEXT: smov w9, v0.h[1] 2100; CHECK-SD-NEXT: smov w14, v1.h[2] 2101; CHECK-SD-NEXT: smov w15, v0.h[2] 2102; CHECK-SD-NEXT: smov w17, v1.h[3] 2103; CHECK-SD-NEXT: smov w18, v0.h[3] 2104; CHECK-SD-NEXT: sdiv w13, w12, w11 2105; CHECK-SD-NEXT: sdiv w10, w9, w8 2106; CHECK-SD-NEXT: msub w11, w13, w11, w12 2107; CHECK-SD-NEXT: fmov s0, w11 2108; CHECK-SD-NEXT: sdiv w16, w15, w14 2109; CHECK-SD-NEXT: msub w8, w10, w8, w9 2110; CHECK-SD-NEXT: mov v0.h[1], w8 2111; CHECK-SD-NEXT: sdiv w9, w18, w17 2112; CHECK-SD-NEXT: msub w8, w16, w14, w15 2113; CHECK-SD-NEXT: mov v0.h[2], w8 2114; CHECK-SD-NEXT: msub w8, w9, w17, w18 2115; CHECK-SD-NEXT: mov v0.h[3], w8 2116; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 2117; CHECK-SD-NEXT: ret 2118; 2119; CHECK-GI-LABEL: sv4i16: 2120; CHECK-GI: // %bb.0: // %entry 2121; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0 2122; CHECK-GI-NEXT: sshll v1.4s, v1.4h, #0 2123; CHECK-GI-NEXT: fmov w8, s0 2124; CHECK-GI-NEXT: fmov w9, s1 2125; CHECK-GI-NEXT: mov w10, v1.s[1] 2126; CHECK-GI-NEXT: mov w11, v1.s[2] 2127; CHECK-GI-NEXT: mov w12, v1.s[3] 2128; CHECK-GI-NEXT: sdiv w8, w8, w9 2129; CHECK-GI-NEXT: mov w9, v0.s[1] 2130; CHECK-GI-NEXT: sdiv w9, w9, w10 2131; CHECK-GI-NEXT: mov w10, v0.s[2] 2132; CHECK-GI-NEXT: mov v2.s[0], w8 2133; CHECK-GI-NEXT: sdiv w10, w10, w11 2134; CHECK-GI-NEXT: mov w11, v0.s[3] 2135; CHECK-GI-NEXT: mov v2.s[1], w9 2136; CHECK-GI-NEXT: sdiv w8, w11, w12 2137; CHECK-GI-NEXT: mov v2.s[2], w10 2138; CHECK-GI-NEXT: mov v2.s[3], w8 2139; CHECK-GI-NEXT: mls v0.4s, v2.4s, v1.4s 2140; CHECK-GI-NEXT: xtn v0.4h, v0.4s 2141; CHECK-GI-NEXT: ret 2142entry: 2143 %s = srem <4 x i16> %d, %e 2144 ret <4 x i16> %s 2145} 2146 2147define <8 x i16> @sv8i16(<8 x i16> %d, <8 x i16> %e) { 2148; CHECK-SD-LABEL: sv8i16: 2149; CHECK-SD: // %bb.0: // %entry 2150; CHECK-SD-NEXT: smov w11, v1.h[0] 2151; CHECK-SD-NEXT: smov w12, v0.h[0] 2152; CHECK-SD-NEXT: smov w8, v1.h[1] 2153; CHECK-SD-NEXT: smov w9, v0.h[1] 2154; CHECK-SD-NEXT: smov w14, v1.h[2] 2155; CHECK-SD-NEXT: smov w15, v0.h[2] 2156; CHECK-SD-NEXT: smov w17, v1.h[3] 2157; CHECK-SD-NEXT: smov w18, v0.h[3] 2158; CHECK-SD-NEXT: smov w1, v1.h[4] 2159; CHECK-SD-NEXT: smov w2, v0.h[4] 2160; CHECK-SD-NEXT: smov w4, v1.h[5] 2161; CHECK-SD-NEXT: smov w5, v0.h[5] 2162; CHECK-SD-NEXT: sdiv w13, w12, w11 2163; CHECK-SD-NEXT: sdiv w10, w9, w8 2164; CHECK-SD-NEXT: msub w11, w13, w11, w12 2165; CHECK-SD-NEXT: smov w13, v1.h[7] 2166; CHECK-SD-NEXT: fmov s2, w11 2167; CHECK-SD-NEXT: smov w11, v0.h[6] 2168; CHECK-SD-NEXT: sdiv w16, w15, w14 2169; CHECK-SD-NEXT: msub w8, w10, w8, w9 2170; CHECK-SD-NEXT: smov w10, v1.h[6] 2171; CHECK-SD-NEXT: mov v2.h[1], w8 2172; CHECK-SD-NEXT: sdiv w0, w18, w17 2173; CHECK-SD-NEXT: msub w8, w16, w14, w15 2174; CHECK-SD-NEXT: smov w14, v0.h[7] 2175; CHECK-SD-NEXT: mov v2.h[2], w8 2176; CHECK-SD-NEXT: sdiv w3, w2, w1 2177; CHECK-SD-NEXT: msub w8, w0, w17, w18 2178; CHECK-SD-NEXT: mov v2.h[3], w8 2179; CHECK-SD-NEXT: sdiv w9, w5, w4 2180; CHECK-SD-NEXT: msub w8, w3, w1, w2 2181; CHECK-SD-NEXT: mov v2.h[4], w8 2182; CHECK-SD-NEXT: sdiv w12, w11, w10 2183; CHECK-SD-NEXT: msub w8, w9, w4, w5 2184; CHECK-SD-NEXT: mov v2.h[5], w8 2185; CHECK-SD-NEXT: sdiv w9, w14, w13 2186; CHECK-SD-NEXT: msub w8, w12, w10, w11 2187; CHECK-SD-NEXT: mov v2.h[6], w8 2188; CHECK-SD-NEXT: msub w8, w9, w13, w14 2189; CHECK-SD-NEXT: mov v2.h[7], w8 2190; CHECK-SD-NEXT: mov v0.16b, v2.16b 2191; CHECK-SD-NEXT: ret 2192; 2193; CHECK-GI-LABEL: sv8i16: 2194; CHECK-GI: // %bb.0: // %entry 2195; CHECK-GI-NEXT: sshll v2.4s, v0.4h, #0 2196; CHECK-GI-NEXT: sshll v3.4s, v1.4h, #0 2197; CHECK-GI-NEXT: sshll2 v0.4s, v0.8h, #0 2198; CHECK-GI-NEXT: sshll2 v1.4s, v1.8h, #0 2199; CHECK-GI-NEXT: fmov w8, s2 2200; CHECK-GI-NEXT: fmov w9, s3 2201; CHECK-GI-NEXT: mov w10, v3.s[1] 2202; CHECK-GI-NEXT: mov w11, v3.s[2] 2203; CHECK-GI-NEXT: mov w12, v3.s[3] 2204; CHECK-GI-NEXT: fmov w13, s1 2205; CHECK-GI-NEXT: mov w14, v1.s[1] 2206; CHECK-GI-NEXT: mov w15, v1.s[2] 2207; CHECK-GI-NEXT: sdiv w8, w8, w9 2208; CHECK-GI-NEXT: mov w9, v2.s[1] 2209; CHECK-GI-NEXT: sdiv w9, w9, w10 2210; CHECK-GI-NEXT: mov w10, v2.s[2] 2211; CHECK-GI-NEXT: mov v4.s[0], w8 2212; CHECK-GI-NEXT: mov w8, v0.s[3] 2213; CHECK-GI-NEXT: sdiv w10, w10, w11 2214; CHECK-GI-NEXT: mov w11, v2.s[3] 2215; CHECK-GI-NEXT: mov v4.s[1], w9 2216; CHECK-GI-NEXT: sdiv w11, w11, w12 2217; CHECK-GI-NEXT: fmov w12, s0 2218; CHECK-GI-NEXT: mov v4.s[2], w10 2219; CHECK-GI-NEXT: sdiv w12, w12, w13 2220; CHECK-GI-NEXT: mov w13, v0.s[1] 2221; CHECK-GI-NEXT: mov v4.s[3], w11 2222; CHECK-GI-NEXT: mls v2.4s, v4.4s, v3.4s 2223; CHECK-GI-NEXT: sdiv w13, w13, w14 2224; CHECK-GI-NEXT: mov w14, v0.s[2] 2225; CHECK-GI-NEXT: mov v5.s[0], w12 2226; CHECK-GI-NEXT: mov w12, v1.s[3] 2227; CHECK-GI-NEXT: sdiv w14, w14, w15 2228; CHECK-GI-NEXT: mov v5.s[1], w13 2229; CHECK-GI-NEXT: sdiv w8, w8, w12 2230; CHECK-GI-NEXT: mov v5.s[2], w14 2231; CHECK-GI-NEXT: mov v5.s[3], w8 2232; CHECK-GI-NEXT: mls v0.4s, v5.4s, v1.4s 2233; CHECK-GI-NEXT: uzp1 v0.8h, v2.8h, v0.8h 2234; CHECK-GI-NEXT: ret 2235entry: 2236 %s = srem <8 x i16> %d, %e 2237 ret <8 x i16> %s 2238} 2239 2240define <16 x i16> @sv16i16(<16 x i16> %d, <16 x i16> %e) { 2241; CHECK-SD-LABEL: sv16i16: 2242; CHECK-SD: // %bb.0: // %entry 2243; CHECK-SD-NEXT: sub sp, sp, #160 2244; CHECK-SD-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill 2245; CHECK-SD-NEXT: stp x28, x27, [sp, #80] // 16-byte Folded Spill 2246; CHECK-SD-NEXT: stp x26, x25, [sp, #96] // 16-byte Folded Spill 2247; CHECK-SD-NEXT: stp x24, x23, [sp, #112] // 16-byte Folded Spill 2248; CHECK-SD-NEXT: stp x22, x21, [sp, #128] // 16-byte Folded Spill 2249; CHECK-SD-NEXT: stp x20, x19, [sp, #144] // 16-byte Folded Spill 2250; CHECK-SD-NEXT: .cfi_def_cfa_offset 160 2251; CHECK-SD-NEXT: .cfi_offset w19, -8 2252; CHECK-SD-NEXT: .cfi_offset w20, -16 2253; CHECK-SD-NEXT: .cfi_offset w21, -24 2254; CHECK-SD-NEXT: .cfi_offset w22, -32 2255; CHECK-SD-NEXT: .cfi_offset w23, -40 2256; CHECK-SD-NEXT: .cfi_offset w24, -48 2257; CHECK-SD-NEXT: .cfi_offset w25, -56 2258; CHECK-SD-NEXT: .cfi_offset w26, -64 2259; CHECK-SD-NEXT: .cfi_offset w27, -72 2260; CHECK-SD-NEXT: .cfi_offset w28, -80 2261; CHECK-SD-NEXT: .cfi_offset w30, -88 2262; CHECK-SD-NEXT: .cfi_offset w29, -96 2263; CHECK-SD-NEXT: smov w8, v2.h[1] 2264; CHECK-SD-NEXT: smov w9, v0.h[1] 2265; CHECK-SD-NEXT: smov w19, v2.h[2] 2266; CHECK-SD-NEXT: smov w22, v0.h[2] 2267; CHECK-SD-NEXT: smov w1, v2.h[0] 2268; CHECK-SD-NEXT: smov w3, v0.h[0] 2269; CHECK-SD-NEXT: smov w7, v2.h[3] 2270; CHECK-SD-NEXT: smov w18, v0.h[3] 2271; CHECK-SD-NEXT: smov w4, v0.h[6] 2272; CHECK-SD-NEXT: smov w0, v2.h[4] 2273; CHECK-SD-NEXT: smov w5, v0.h[4] 2274; CHECK-SD-NEXT: smov w2, v2.h[7] 2275; CHECK-SD-NEXT: str w8, [sp, #52] // 4-byte Folded Spill 2276; CHECK-SD-NEXT: smov w6, v0.h[7] 2277; CHECK-SD-NEXT: smov w27, v3.h[0] 2278; CHECK-SD-NEXT: str w9, [sp, #44] // 4-byte Folded Spill 2279; CHECK-SD-NEXT: sdiv w9, w9, w8 2280; CHECK-SD-NEXT: smov w28, v1.h[0] 2281; CHECK-SD-NEXT: smov w24, v3.h[1] 2282; CHECK-SD-NEXT: smov w25, v1.h[1] 2283; CHECK-SD-NEXT: ldr w21, [sp, #52] // 4-byte Folded Reload 2284; CHECK-SD-NEXT: ldr w23, [sp, #44] // 4-byte Folded Reload 2285; CHECK-SD-NEXT: smov w30, v3.h[2] 2286; CHECK-SD-NEXT: smov w12, v3.h[3] 2287; CHECK-SD-NEXT: smov w11, v1.h[3] 2288; CHECK-SD-NEXT: smov w14, v3.h[5] 2289; CHECK-SD-NEXT: smov w13, v1.h[5] 2290; CHECK-SD-NEXT: sdiv w8, w22, w19 2291; CHECK-SD-NEXT: str w9, [sp, #60] // 4-byte Folded Spill 2292; CHECK-SD-NEXT: ldr w20, [sp, #60] // 4-byte Folded Reload 2293; CHECK-SD-NEXT: msub w21, w20, w21, w23 2294; CHECK-SD-NEXT: sdiv w9, w3, w1 2295; CHECK-SD-NEXT: str w8, [sp, #12] // 4-byte Folded Spill 2296; CHECK-SD-NEXT: sdiv w8, w18, w7 2297; CHECK-SD-NEXT: stp w9, w8, [sp, #24] // 8-byte Folded Spill 2298; CHECK-SD-NEXT: smov w8, v2.h[5] 2299; CHECK-SD-NEXT: smov w9, v0.h[5] 2300; CHECK-SD-NEXT: sdiv w10, w5, w0 2301; CHECK-SD-NEXT: ldr w20, [sp, #24] // 4-byte Folded Reload 2302; CHECK-SD-NEXT: msub w1, w20, w1, w3 2303; CHECK-SD-NEXT: str w9, [sp, #40] // 4-byte Folded Spill 2304; CHECK-SD-NEXT: str w8, [sp, #48] // 4-byte Folded Spill 2305; CHECK-SD-NEXT: fmov s0, w1 2306; CHECK-SD-NEXT: ldr w1, [sp, #12] // 4-byte Folded Reload 2307; CHECK-SD-NEXT: msub w1, w1, w19, w22 2308; CHECK-SD-NEXT: ldr w19, [sp, #28] // 4-byte Folded Reload 2309; CHECK-SD-NEXT: sdiv w9, w9, w8 2310; CHECK-SD-NEXT: smov w8, v2.h[6] 2311; CHECK-SD-NEXT: mov v0.h[1], w21 2312; CHECK-SD-NEXT: msub w18, w19, w7, w18 2313; CHECK-SD-NEXT: ldp x20, x19, [sp, #144] // 16-byte Folded Reload 2314; CHECK-SD-NEXT: ldp x22, x21, [sp, #128] // 16-byte Folded Reload 2315; CHECK-SD-NEXT: mov v0.h[2], w1 2316; CHECK-SD-NEXT: str w9, [sp, #56] // 4-byte Folded Spill 2317; CHECK-SD-NEXT: sdiv w9, w4, w8 2318; CHECK-SD-NEXT: mov v0.h[3], w18 2319; CHECK-SD-NEXT: ldr w18, [sp, #40] // 4-byte Folded Reload 2320; CHECK-SD-NEXT: stp w8, w9, [sp, #32] // 8-byte Folded Spill 2321; CHECK-SD-NEXT: sdiv w8, w6, w2 2322; CHECK-SD-NEXT: smov w9, v1.h[4] 2323; CHECK-SD-NEXT: sdiv w29, w28, w27 2324; CHECK-SD-NEXT: stp w8, w10, [sp, #16] // 8-byte Folded Spill 2325; CHECK-SD-NEXT: smov w8, v1.h[2] 2326; CHECK-SD-NEXT: smov w10, v3.h[4] 2327; CHECK-SD-NEXT: sdiv w26, w25, w24 2328; CHECK-SD-NEXT: msub w3, w29, w27, w28 2329; CHECK-SD-NEXT: ldp x28, x27, [sp, #80] // 16-byte Folded Reload 2330; CHECK-SD-NEXT: fmov s2, w3 2331; CHECK-SD-NEXT: smov w3, v1.h[6] 2332; CHECK-SD-NEXT: sdiv w15, w8, w30 2333; CHECK-SD-NEXT: msub w24, w26, w24, w25 2334; CHECK-SD-NEXT: mov v2.h[1], w24 2335; CHECK-SD-NEXT: ldp x24, x23, [sp, #112] // 16-byte Folded Reload 2336; CHECK-SD-NEXT: sdiv w17, w11, w12 2337; CHECK-SD-NEXT: msub w8, w15, w30, w8 2338; CHECK-SD-NEXT: smov w15, v3.h[6] 2339; CHECK-SD-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload 2340; CHECK-SD-NEXT: mov v2.h[2], w8 2341; CHECK-SD-NEXT: sdiv w16, w9, w10 2342; CHECK-SD-NEXT: msub w8, w17, w12, w11 2343; CHECK-SD-NEXT: ldr w12, [sp, #20] // 4-byte Folded Reload 2344; CHECK-SD-NEXT: ldr w17, [sp, #48] // 4-byte Folded Reload 2345; CHECK-SD-NEXT: msub w12, w12, w0, w5 2346; CHECK-SD-NEXT: mov v2.h[3], w8 2347; CHECK-SD-NEXT: mov v0.h[4], w12 2348; CHECK-SD-NEXT: sdiv w25, w13, w14 2349; CHECK-SD-NEXT: msub w8, w16, w10, w9 2350; CHECK-SD-NEXT: smov w9, v3.h[7] 2351; CHECK-SD-NEXT: smov w10, v1.h[7] 2352; CHECK-SD-NEXT: ldr w16, [sp, #56] // 4-byte Folded Reload 2353; CHECK-SD-NEXT: mov v2.h[4], w8 2354; CHECK-SD-NEXT: msub w16, w16, w17, w18 2355; CHECK-SD-NEXT: mov v0.h[5], w16 2356; CHECK-SD-NEXT: sdiv w11, w3, w15 2357; CHECK-SD-NEXT: msub w8, w25, w14, w13 2358; CHECK-SD-NEXT: ldp w14, w13, [sp, #32] // 8-byte Folded Reload 2359; CHECK-SD-NEXT: ldp x26, x25, [sp, #96] // 16-byte Folded Reload 2360; CHECK-SD-NEXT: mov v2.h[5], w8 2361; CHECK-SD-NEXT: msub w13, w13, w14, w4 2362; CHECK-SD-NEXT: mov v0.h[6], w13 2363; CHECK-SD-NEXT: sdiv w12, w10, w9 2364; CHECK-SD-NEXT: msub w8, w11, w15, w3 2365; CHECK-SD-NEXT: ldr w11, [sp, #16] // 4-byte Folded Reload 2366; CHECK-SD-NEXT: msub w11, w11, w2, w6 2367; CHECK-SD-NEXT: mov v2.h[6], w8 2368; CHECK-SD-NEXT: mov v0.h[7], w11 2369; CHECK-SD-NEXT: msub w8, w12, w9, w10 2370; CHECK-SD-NEXT: mov v2.h[7], w8 2371; CHECK-SD-NEXT: mov v1.16b, v2.16b 2372; CHECK-SD-NEXT: add sp, sp, #160 2373; CHECK-SD-NEXT: ret 2374; 2375; CHECK-GI-LABEL: sv16i16: 2376; CHECK-GI: // %bb.0: // %entry 2377; CHECK-GI-NEXT: sshll v4.4s, v0.4h, #0 2378; CHECK-GI-NEXT: sshll v5.4s, v2.4h, #0 2379; CHECK-GI-NEXT: sshll v6.4s, v1.4h, #0 2380; CHECK-GI-NEXT: sshll v7.4s, v3.4h, #0 2381; CHECK-GI-NEXT: fmov w8, s4 2382; CHECK-GI-NEXT: fmov w9, s5 2383; CHECK-GI-NEXT: mov w12, v5.s[3] 2384; CHECK-GI-NEXT: fmov w17, s7 2385; CHECK-GI-NEXT: mov w18, v7.s[1] 2386; CHECK-GI-NEXT: mov w0, v7.s[2] 2387; CHECK-GI-NEXT: mov w1, v7.s[3] 2388; CHECK-GI-NEXT: sshll2 v7.4s, v3.8h, #0 2389; CHECK-GI-NEXT: sshll v3.4s, v3.4h, #0 2390; CHECK-GI-NEXT: sdiv w11, w8, w9 2391; CHECK-GI-NEXT: mov w8, v4.s[1] 2392; CHECK-GI-NEXT: mov w9, v5.s[1] 2393; CHECK-GI-NEXT: fmov w2, s7 2394; CHECK-GI-NEXT: mov w3, v7.s[1] 2395; CHECK-GI-NEXT: mov w4, v7.s[2] 2396; CHECK-GI-NEXT: sdiv w10, w8, w9 2397; CHECK-GI-NEXT: mov w8, v4.s[2] 2398; CHECK-GI-NEXT: mov w9, v5.s[2] 2399; CHECK-GI-NEXT: sshll2 v5.4s, v2.8h, #0 2400; CHECK-GI-NEXT: mov v16.s[0], w11 2401; CHECK-GI-NEXT: sshll v2.4s, v2.4h, #0 2402; CHECK-GI-NEXT: fmov w13, s5 2403; CHECK-GI-NEXT: mov w14, v5.s[1] 2404; CHECK-GI-NEXT: mov w15, v5.s[2] 2405; CHECK-GI-NEXT: mov w16, v5.s[3] 2406; CHECK-GI-NEXT: sdiv w9, w8, w9 2407; CHECK-GI-NEXT: mov w8, v4.s[3] 2408; CHECK-GI-NEXT: sshll2 v4.4s, v0.8h, #0 2409; CHECK-GI-NEXT: mov v16.s[1], w10 2410; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0 2411; CHECK-GI-NEXT: sdiv w8, w8, w12 2412; CHECK-GI-NEXT: fmov w12, s4 2413; CHECK-GI-NEXT: mov v16.s[2], w9 2414; CHECK-GI-NEXT: sdiv w13, w12, w13 2415; CHECK-GI-NEXT: mov w12, v4.s[1] 2416; CHECK-GI-NEXT: mov v16.s[3], w8 2417; CHECK-GI-NEXT: mls v0.4s, v16.4s, v2.4s 2418; CHECK-GI-NEXT: sdiv w14, w12, w14 2419; CHECK-GI-NEXT: mov w12, v4.s[2] 2420; CHECK-GI-NEXT: mov v17.s[0], w13 2421; CHECK-GI-NEXT: mov w13, v7.s[3] 2422; CHECK-GI-NEXT: sdiv w15, w12, w15 2423; CHECK-GI-NEXT: mov w12, v4.s[3] 2424; CHECK-GI-NEXT: mov v17.s[1], w14 2425; CHECK-GI-NEXT: sdiv w12, w12, w16 2426; CHECK-GI-NEXT: fmov w16, s6 2427; CHECK-GI-NEXT: mov v17.s[2], w15 2428; CHECK-GI-NEXT: sdiv w16, w16, w17 2429; CHECK-GI-NEXT: mov w17, v6.s[1] 2430; CHECK-GI-NEXT: mov v17.s[3], w12 2431; CHECK-GI-NEXT: mls v4.4s, v17.4s, v5.4s 2432; CHECK-GI-NEXT: sdiv w17, w17, w18 2433; CHECK-GI-NEXT: mov w18, v6.s[2] 2434; CHECK-GI-NEXT: mov v18.s[0], w16 2435; CHECK-GI-NEXT: uzp1 v0.8h, v0.8h, v4.8h 2436; CHECK-GI-NEXT: sdiv w18, w18, w0 2437; CHECK-GI-NEXT: mov w0, v6.s[3] 2438; CHECK-GI-NEXT: sshll2 v6.4s, v1.8h, #0 2439; CHECK-GI-NEXT: mov v18.s[1], w17 2440; CHECK-GI-NEXT: sshll v1.4s, v1.4h, #0 2441; CHECK-GI-NEXT: mov w11, v6.s[3] 2442; CHECK-GI-NEXT: sdiv w0, w0, w1 2443; CHECK-GI-NEXT: fmov w1, s6 2444; CHECK-GI-NEXT: mov v18.s[2], w18 2445; CHECK-GI-NEXT: sdiv w1, w1, w2 2446; CHECK-GI-NEXT: mov w2, v6.s[1] 2447; CHECK-GI-NEXT: mov v18.s[3], w0 2448; CHECK-GI-NEXT: mls v1.4s, v18.4s, v3.4s 2449; CHECK-GI-NEXT: sdiv w2, w2, w3 2450; CHECK-GI-NEXT: mov w3, v6.s[2] 2451; CHECK-GI-NEXT: mov v19.s[0], w1 2452; CHECK-GI-NEXT: sdiv w3, w3, w4 2453; CHECK-GI-NEXT: mov v19.s[1], w2 2454; CHECK-GI-NEXT: sdiv w10, w11, w13 2455; CHECK-GI-NEXT: mov v19.s[2], w3 2456; CHECK-GI-NEXT: mov v19.s[3], w10 2457; CHECK-GI-NEXT: mls v6.4s, v19.4s, v7.4s 2458; CHECK-GI-NEXT: uzp1 v1.8h, v1.8h, v6.8h 2459; CHECK-GI-NEXT: ret 2460entry: 2461 %s = srem <16 x i16> %d, %e 2462 ret <16 x i16> %s 2463} 2464 2465define <2 x i16> @uv2i16(<2 x i16> %d, <2 x i16> %e) { 2466; CHECK-SD-LABEL: uv2i16: 2467; CHECK-SD: // %bb.0: // %entry 2468; CHECK-SD-NEXT: movi d2, #0x00ffff0000ffff 2469; CHECK-SD-NEXT: and v0.8b, v0.8b, v2.8b 2470; CHECK-SD-NEXT: and v1.8b, v1.8b, v2.8b 2471; CHECK-SD-NEXT: fmov w8, s1 2472; CHECK-SD-NEXT: fmov w9, s0 2473; CHECK-SD-NEXT: mov w11, v1.s[1] 2474; CHECK-SD-NEXT: mov w12, v0.s[1] 2475; CHECK-SD-NEXT: udiv w10, w9, w8 2476; CHECK-SD-NEXT: udiv w13, w12, w11 2477; CHECK-SD-NEXT: msub w8, w10, w8, w9 2478; CHECK-SD-NEXT: fmov s0, w8 2479; CHECK-SD-NEXT: msub w9, w13, w11, w12 2480; CHECK-SD-NEXT: mov v0.s[1], w9 2481; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 2482; CHECK-SD-NEXT: ret 2483; 2484; CHECK-GI-LABEL: uv2i16: 2485; CHECK-GI: // %bb.0: // %entry 2486; CHECK-GI-NEXT: movi d2, #0x00ffff0000ffff 2487; CHECK-GI-NEXT: and v0.8b, v0.8b, v2.8b 2488; CHECK-GI-NEXT: and v1.8b, v1.8b, v2.8b 2489; CHECK-GI-NEXT: fmov w8, s0 2490; CHECK-GI-NEXT: fmov w9, s1 2491; CHECK-GI-NEXT: mov w10, v1.s[1] 2492; CHECK-GI-NEXT: udiv w8, w8, w9 2493; CHECK-GI-NEXT: mov w9, v0.s[1] 2494; CHECK-GI-NEXT: udiv w9, w9, w10 2495; CHECK-GI-NEXT: mov v2.s[0], w8 2496; CHECK-GI-NEXT: mov v2.s[1], w9 2497; CHECK-GI-NEXT: mls v0.2s, v2.2s, v1.2s 2498; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 2499; CHECK-GI-NEXT: ret 2500entry: 2501 %s = urem <2 x i16> %d, %e 2502 ret <2 x i16> %s 2503} 2504 2505define <3 x i16> @uv3i16(<3 x i16> %d, <3 x i16> %e) { 2506; CHECK-SD-LABEL: uv3i16: 2507; CHECK-SD: // %bb.0: // %entry 2508; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 2509; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 2510; CHECK-SD-NEXT: umov w11, v1.h[0] 2511; CHECK-SD-NEXT: umov w12, v0.h[0] 2512; CHECK-SD-NEXT: umov w8, v1.h[1] 2513; CHECK-SD-NEXT: umov w9, v0.h[1] 2514; CHECK-SD-NEXT: umov w13, v0.h[2] 2515; CHECK-SD-NEXT: umov w14, v1.h[0] 2516; CHECK-SD-NEXT: umov w16, v0.h[0] 2517; CHECK-SD-NEXT: udiv w11, w12, w11 2518; CHECK-SD-NEXT: umov w12, v1.h[2] 2519; CHECK-SD-NEXT: udiv w10, w9, w8 2520; CHECK-SD-NEXT: msub w11, w11, w14, w16 2521; CHECK-SD-NEXT: udiv w15, w13, w12 2522; CHECK-SD-NEXT: msub w8, w10, w8, w9 2523; CHECK-SD-NEXT: sxth w9, w11 2524; CHECK-SD-NEXT: fmov s0, w9 2525; CHECK-SD-NEXT: sxth w8, w8 2526; CHECK-SD-NEXT: mov v0.h[1], w8 2527; CHECK-SD-NEXT: msub w10, w15, w12, w13 2528; CHECK-SD-NEXT: sxth w8, w10 2529; CHECK-SD-NEXT: mov v0.h[2], w8 2530; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 2531; CHECK-SD-NEXT: ret 2532; 2533; CHECK-GI-LABEL: uv3i16: 2534; CHECK-GI: // %bb.0: // %entry 2535; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 2536; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 2537; CHECK-GI-NEXT: umov w8, v0.h[0] 2538; CHECK-GI-NEXT: umov w9, v1.h[0] 2539; CHECK-GI-NEXT: umov w11, v0.h[1] 2540; CHECK-GI-NEXT: umov w12, v1.h[1] 2541; CHECK-GI-NEXT: umov w14, v0.h[2] 2542; CHECK-GI-NEXT: umov w15, v1.h[2] 2543; CHECK-GI-NEXT: udiv w10, w8, w9 2544; CHECK-GI-NEXT: udiv w13, w11, w12 2545; CHECK-GI-NEXT: msub w8, w10, w9, w8 2546; CHECK-GI-NEXT: fmov s0, w8 2547; CHECK-GI-NEXT: udiv w16, w14, w15 2548; CHECK-GI-NEXT: msub w9, w13, w12, w11 2549; CHECK-GI-NEXT: mov v0.h[1], w9 2550; CHECK-GI-NEXT: msub w8, w16, w15, w14 2551; CHECK-GI-NEXT: mov v0.h[2], w8 2552; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 2553; CHECK-GI-NEXT: ret 2554entry: 2555 %s = urem <3 x i16> %d, %e 2556 ret <3 x i16> %s 2557} 2558 2559define <4 x i16> @uv4i16(<4 x i16> %d, <4 x i16> %e) { 2560; CHECK-SD-LABEL: uv4i16: 2561; CHECK-SD: // %bb.0: // %entry 2562; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 2563; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 2564; CHECK-SD-NEXT: umov w11, v1.h[0] 2565; CHECK-SD-NEXT: umov w12, v0.h[0] 2566; CHECK-SD-NEXT: umov w8, v1.h[1] 2567; CHECK-SD-NEXT: umov w9, v0.h[1] 2568; CHECK-SD-NEXT: umov w14, v1.h[2] 2569; CHECK-SD-NEXT: umov w15, v0.h[2] 2570; CHECK-SD-NEXT: umov w17, v1.h[3] 2571; CHECK-SD-NEXT: umov w18, v0.h[3] 2572; CHECK-SD-NEXT: udiv w13, w12, w11 2573; CHECK-SD-NEXT: udiv w10, w9, w8 2574; CHECK-SD-NEXT: msub w11, w13, w11, w12 2575; CHECK-SD-NEXT: fmov s0, w11 2576; CHECK-SD-NEXT: udiv w16, w15, w14 2577; CHECK-SD-NEXT: msub w8, w10, w8, w9 2578; CHECK-SD-NEXT: mov v0.h[1], w8 2579; CHECK-SD-NEXT: udiv w9, w18, w17 2580; CHECK-SD-NEXT: msub w8, w16, w14, w15 2581; CHECK-SD-NEXT: mov v0.h[2], w8 2582; CHECK-SD-NEXT: msub w8, w9, w17, w18 2583; CHECK-SD-NEXT: mov v0.h[3], w8 2584; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 2585; CHECK-SD-NEXT: ret 2586; 2587; CHECK-GI-LABEL: uv4i16: 2588; CHECK-GI: // %bb.0: // %entry 2589; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 2590; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0 2591; CHECK-GI-NEXT: fmov w8, s0 2592; CHECK-GI-NEXT: fmov w9, s1 2593; CHECK-GI-NEXT: mov w10, v1.s[1] 2594; CHECK-GI-NEXT: mov w11, v1.s[2] 2595; CHECK-GI-NEXT: mov w12, v1.s[3] 2596; CHECK-GI-NEXT: udiv w8, w8, w9 2597; CHECK-GI-NEXT: mov w9, v0.s[1] 2598; CHECK-GI-NEXT: udiv w9, w9, w10 2599; CHECK-GI-NEXT: mov w10, v0.s[2] 2600; CHECK-GI-NEXT: mov v2.s[0], w8 2601; CHECK-GI-NEXT: udiv w10, w10, w11 2602; CHECK-GI-NEXT: mov w11, v0.s[3] 2603; CHECK-GI-NEXT: mov v2.s[1], w9 2604; CHECK-GI-NEXT: udiv w8, w11, w12 2605; CHECK-GI-NEXT: mov v2.s[2], w10 2606; CHECK-GI-NEXT: mov v2.s[3], w8 2607; CHECK-GI-NEXT: mls v0.4s, v2.4s, v1.4s 2608; CHECK-GI-NEXT: xtn v0.4h, v0.4s 2609; CHECK-GI-NEXT: ret 2610entry: 2611 %s = urem <4 x i16> %d, %e 2612 ret <4 x i16> %s 2613} 2614 2615define <8 x i16> @uv8i16(<8 x i16> %d, <8 x i16> %e) { 2616; CHECK-SD-LABEL: uv8i16: 2617; CHECK-SD: // %bb.0: // %entry 2618; CHECK-SD-NEXT: umov w11, v1.h[0] 2619; CHECK-SD-NEXT: umov w12, v0.h[0] 2620; CHECK-SD-NEXT: umov w8, v1.h[1] 2621; CHECK-SD-NEXT: umov w9, v0.h[1] 2622; CHECK-SD-NEXT: umov w14, v1.h[2] 2623; CHECK-SD-NEXT: umov w15, v0.h[2] 2624; CHECK-SD-NEXT: umov w17, v1.h[3] 2625; CHECK-SD-NEXT: umov w18, v0.h[3] 2626; CHECK-SD-NEXT: umov w1, v1.h[4] 2627; CHECK-SD-NEXT: umov w2, v0.h[4] 2628; CHECK-SD-NEXT: umov w4, v1.h[5] 2629; CHECK-SD-NEXT: umov w5, v0.h[5] 2630; CHECK-SD-NEXT: udiv w13, w12, w11 2631; CHECK-SD-NEXT: udiv w10, w9, w8 2632; CHECK-SD-NEXT: msub w11, w13, w11, w12 2633; CHECK-SD-NEXT: umov w13, v1.h[7] 2634; CHECK-SD-NEXT: fmov s2, w11 2635; CHECK-SD-NEXT: umov w11, v0.h[6] 2636; CHECK-SD-NEXT: udiv w16, w15, w14 2637; CHECK-SD-NEXT: msub w8, w10, w8, w9 2638; CHECK-SD-NEXT: umov w10, v1.h[6] 2639; CHECK-SD-NEXT: mov v2.h[1], w8 2640; CHECK-SD-NEXT: udiv w0, w18, w17 2641; CHECK-SD-NEXT: msub w8, w16, w14, w15 2642; CHECK-SD-NEXT: umov w14, v0.h[7] 2643; CHECK-SD-NEXT: mov v2.h[2], w8 2644; CHECK-SD-NEXT: udiv w3, w2, w1 2645; CHECK-SD-NEXT: msub w8, w0, w17, w18 2646; CHECK-SD-NEXT: mov v2.h[3], w8 2647; CHECK-SD-NEXT: udiv w9, w5, w4 2648; CHECK-SD-NEXT: msub w8, w3, w1, w2 2649; CHECK-SD-NEXT: mov v2.h[4], w8 2650; CHECK-SD-NEXT: udiv w12, w11, w10 2651; CHECK-SD-NEXT: msub w8, w9, w4, w5 2652; CHECK-SD-NEXT: mov v2.h[5], w8 2653; CHECK-SD-NEXT: udiv w9, w14, w13 2654; CHECK-SD-NEXT: msub w8, w12, w10, w11 2655; CHECK-SD-NEXT: mov v2.h[6], w8 2656; CHECK-SD-NEXT: msub w8, w9, w13, w14 2657; CHECK-SD-NEXT: mov v2.h[7], w8 2658; CHECK-SD-NEXT: mov v0.16b, v2.16b 2659; CHECK-SD-NEXT: ret 2660; 2661; CHECK-GI-LABEL: uv8i16: 2662; CHECK-GI: // %bb.0: // %entry 2663; CHECK-GI-NEXT: ushll v2.4s, v0.4h, #0 2664; CHECK-GI-NEXT: ushll v3.4s, v1.4h, #0 2665; CHECK-GI-NEXT: ushll2 v0.4s, v0.8h, #0 2666; CHECK-GI-NEXT: ushll2 v1.4s, v1.8h, #0 2667; CHECK-GI-NEXT: fmov w8, s2 2668; CHECK-GI-NEXT: fmov w9, s3 2669; CHECK-GI-NEXT: mov w10, v3.s[1] 2670; CHECK-GI-NEXT: mov w11, v3.s[2] 2671; CHECK-GI-NEXT: mov w12, v3.s[3] 2672; CHECK-GI-NEXT: fmov w13, s1 2673; CHECK-GI-NEXT: mov w14, v1.s[1] 2674; CHECK-GI-NEXT: mov w15, v1.s[2] 2675; CHECK-GI-NEXT: udiv w8, w8, w9 2676; CHECK-GI-NEXT: mov w9, v2.s[1] 2677; CHECK-GI-NEXT: udiv w9, w9, w10 2678; CHECK-GI-NEXT: mov w10, v2.s[2] 2679; CHECK-GI-NEXT: mov v4.s[0], w8 2680; CHECK-GI-NEXT: mov w8, v0.s[3] 2681; CHECK-GI-NEXT: udiv w10, w10, w11 2682; CHECK-GI-NEXT: mov w11, v2.s[3] 2683; CHECK-GI-NEXT: mov v4.s[1], w9 2684; CHECK-GI-NEXT: udiv w11, w11, w12 2685; CHECK-GI-NEXT: fmov w12, s0 2686; CHECK-GI-NEXT: mov v4.s[2], w10 2687; CHECK-GI-NEXT: udiv w12, w12, w13 2688; CHECK-GI-NEXT: mov w13, v0.s[1] 2689; CHECK-GI-NEXT: mov v4.s[3], w11 2690; CHECK-GI-NEXT: mls v2.4s, v4.4s, v3.4s 2691; CHECK-GI-NEXT: udiv w13, w13, w14 2692; CHECK-GI-NEXT: mov w14, v0.s[2] 2693; CHECK-GI-NEXT: mov v5.s[0], w12 2694; CHECK-GI-NEXT: mov w12, v1.s[3] 2695; CHECK-GI-NEXT: udiv w14, w14, w15 2696; CHECK-GI-NEXT: mov v5.s[1], w13 2697; CHECK-GI-NEXT: udiv w8, w8, w12 2698; CHECK-GI-NEXT: mov v5.s[2], w14 2699; CHECK-GI-NEXT: mov v5.s[3], w8 2700; CHECK-GI-NEXT: mls v0.4s, v5.4s, v1.4s 2701; CHECK-GI-NEXT: uzp1 v0.8h, v2.8h, v0.8h 2702; CHECK-GI-NEXT: ret 2703entry: 2704 %s = urem <8 x i16> %d, %e 2705 ret <8 x i16> %s 2706} 2707 2708define <16 x i16> @uv16i16(<16 x i16> %d, <16 x i16> %e) { 2709; CHECK-SD-LABEL: uv16i16: 2710; CHECK-SD: // %bb.0: // %entry 2711; CHECK-SD-NEXT: sub sp, sp, #160 2712; CHECK-SD-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill 2713; CHECK-SD-NEXT: stp x28, x27, [sp, #80] // 16-byte Folded Spill 2714; CHECK-SD-NEXT: stp x26, x25, [sp, #96] // 16-byte Folded Spill 2715; CHECK-SD-NEXT: stp x24, x23, [sp, #112] // 16-byte Folded Spill 2716; CHECK-SD-NEXT: stp x22, x21, [sp, #128] // 16-byte Folded Spill 2717; CHECK-SD-NEXT: stp x20, x19, [sp, #144] // 16-byte Folded Spill 2718; CHECK-SD-NEXT: .cfi_def_cfa_offset 160 2719; CHECK-SD-NEXT: .cfi_offset w19, -8 2720; CHECK-SD-NEXT: .cfi_offset w20, -16 2721; CHECK-SD-NEXT: .cfi_offset w21, -24 2722; CHECK-SD-NEXT: .cfi_offset w22, -32 2723; CHECK-SD-NEXT: .cfi_offset w23, -40 2724; CHECK-SD-NEXT: .cfi_offset w24, -48 2725; CHECK-SD-NEXT: .cfi_offset w25, -56 2726; CHECK-SD-NEXT: .cfi_offset w26, -64 2727; CHECK-SD-NEXT: .cfi_offset w27, -72 2728; CHECK-SD-NEXT: .cfi_offset w28, -80 2729; CHECK-SD-NEXT: .cfi_offset w30, -88 2730; CHECK-SD-NEXT: .cfi_offset w29, -96 2731; CHECK-SD-NEXT: umov w8, v2.h[1] 2732; CHECK-SD-NEXT: umov w9, v0.h[1] 2733; CHECK-SD-NEXT: umov w19, v2.h[2] 2734; CHECK-SD-NEXT: umov w22, v0.h[2] 2735; CHECK-SD-NEXT: umov w1, v2.h[0] 2736; CHECK-SD-NEXT: umov w3, v0.h[0] 2737; CHECK-SD-NEXT: umov w7, v2.h[3] 2738; CHECK-SD-NEXT: umov w18, v0.h[3] 2739; CHECK-SD-NEXT: umov w4, v0.h[6] 2740; CHECK-SD-NEXT: umov w0, v2.h[4] 2741; CHECK-SD-NEXT: umov w5, v0.h[4] 2742; CHECK-SD-NEXT: umov w2, v2.h[7] 2743; CHECK-SD-NEXT: str w8, [sp, #52] // 4-byte Folded Spill 2744; CHECK-SD-NEXT: umov w6, v0.h[7] 2745; CHECK-SD-NEXT: umov w27, v3.h[0] 2746; CHECK-SD-NEXT: str w9, [sp, #44] // 4-byte Folded Spill 2747; CHECK-SD-NEXT: udiv w9, w9, w8 2748; CHECK-SD-NEXT: umov w28, v1.h[0] 2749; CHECK-SD-NEXT: umov w24, v3.h[1] 2750; CHECK-SD-NEXT: umov w25, v1.h[1] 2751; CHECK-SD-NEXT: ldr w21, [sp, #52] // 4-byte Folded Reload 2752; CHECK-SD-NEXT: ldr w23, [sp, #44] // 4-byte Folded Reload 2753; CHECK-SD-NEXT: umov w30, v3.h[2] 2754; CHECK-SD-NEXT: umov w12, v3.h[3] 2755; CHECK-SD-NEXT: umov w11, v1.h[3] 2756; CHECK-SD-NEXT: umov w14, v3.h[5] 2757; CHECK-SD-NEXT: umov w13, v1.h[5] 2758; CHECK-SD-NEXT: udiv w8, w22, w19 2759; CHECK-SD-NEXT: str w9, [sp, #60] // 4-byte Folded Spill 2760; CHECK-SD-NEXT: ldr w20, [sp, #60] // 4-byte Folded Reload 2761; CHECK-SD-NEXT: msub w21, w20, w21, w23 2762; CHECK-SD-NEXT: udiv w9, w3, w1 2763; CHECK-SD-NEXT: str w8, [sp, #12] // 4-byte Folded Spill 2764; CHECK-SD-NEXT: udiv w8, w18, w7 2765; CHECK-SD-NEXT: stp w9, w8, [sp, #24] // 8-byte Folded Spill 2766; CHECK-SD-NEXT: umov w8, v2.h[5] 2767; CHECK-SD-NEXT: umov w9, v0.h[5] 2768; CHECK-SD-NEXT: udiv w10, w5, w0 2769; CHECK-SD-NEXT: ldr w20, [sp, #24] // 4-byte Folded Reload 2770; CHECK-SD-NEXT: msub w1, w20, w1, w3 2771; CHECK-SD-NEXT: str w9, [sp, #40] // 4-byte Folded Spill 2772; CHECK-SD-NEXT: str w8, [sp, #48] // 4-byte Folded Spill 2773; CHECK-SD-NEXT: fmov s0, w1 2774; CHECK-SD-NEXT: ldr w1, [sp, #12] // 4-byte Folded Reload 2775; CHECK-SD-NEXT: msub w1, w1, w19, w22 2776; CHECK-SD-NEXT: ldr w19, [sp, #28] // 4-byte Folded Reload 2777; CHECK-SD-NEXT: udiv w9, w9, w8 2778; CHECK-SD-NEXT: umov w8, v2.h[6] 2779; CHECK-SD-NEXT: mov v0.h[1], w21 2780; CHECK-SD-NEXT: msub w18, w19, w7, w18 2781; CHECK-SD-NEXT: ldp x20, x19, [sp, #144] // 16-byte Folded Reload 2782; CHECK-SD-NEXT: ldp x22, x21, [sp, #128] // 16-byte Folded Reload 2783; CHECK-SD-NEXT: mov v0.h[2], w1 2784; CHECK-SD-NEXT: str w9, [sp, #56] // 4-byte Folded Spill 2785; CHECK-SD-NEXT: udiv w9, w4, w8 2786; CHECK-SD-NEXT: mov v0.h[3], w18 2787; CHECK-SD-NEXT: ldr w18, [sp, #40] // 4-byte Folded Reload 2788; CHECK-SD-NEXT: stp w8, w9, [sp, #32] // 8-byte Folded Spill 2789; CHECK-SD-NEXT: udiv w8, w6, w2 2790; CHECK-SD-NEXT: umov w9, v1.h[4] 2791; CHECK-SD-NEXT: udiv w29, w28, w27 2792; CHECK-SD-NEXT: stp w8, w10, [sp, #16] // 8-byte Folded Spill 2793; CHECK-SD-NEXT: umov w8, v1.h[2] 2794; CHECK-SD-NEXT: umov w10, v3.h[4] 2795; CHECK-SD-NEXT: udiv w26, w25, w24 2796; CHECK-SD-NEXT: msub w3, w29, w27, w28 2797; CHECK-SD-NEXT: ldp x28, x27, [sp, #80] // 16-byte Folded Reload 2798; CHECK-SD-NEXT: fmov s2, w3 2799; CHECK-SD-NEXT: umov w3, v1.h[6] 2800; CHECK-SD-NEXT: udiv w15, w8, w30 2801; CHECK-SD-NEXT: msub w24, w26, w24, w25 2802; CHECK-SD-NEXT: mov v2.h[1], w24 2803; CHECK-SD-NEXT: ldp x24, x23, [sp, #112] // 16-byte Folded Reload 2804; CHECK-SD-NEXT: udiv w17, w11, w12 2805; CHECK-SD-NEXT: msub w8, w15, w30, w8 2806; CHECK-SD-NEXT: umov w15, v3.h[6] 2807; CHECK-SD-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload 2808; CHECK-SD-NEXT: mov v2.h[2], w8 2809; CHECK-SD-NEXT: udiv w16, w9, w10 2810; CHECK-SD-NEXT: msub w8, w17, w12, w11 2811; CHECK-SD-NEXT: ldr w12, [sp, #20] // 4-byte Folded Reload 2812; CHECK-SD-NEXT: ldr w17, [sp, #48] // 4-byte Folded Reload 2813; CHECK-SD-NEXT: msub w12, w12, w0, w5 2814; CHECK-SD-NEXT: mov v2.h[3], w8 2815; CHECK-SD-NEXT: mov v0.h[4], w12 2816; CHECK-SD-NEXT: udiv w25, w13, w14 2817; CHECK-SD-NEXT: msub w8, w16, w10, w9 2818; CHECK-SD-NEXT: umov w9, v3.h[7] 2819; CHECK-SD-NEXT: umov w10, v1.h[7] 2820; CHECK-SD-NEXT: ldr w16, [sp, #56] // 4-byte Folded Reload 2821; CHECK-SD-NEXT: mov v2.h[4], w8 2822; CHECK-SD-NEXT: msub w16, w16, w17, w18 2823; CHECK-SD-NEXT: mov v0.h[5], w16 2824; CHECK-SD-NEXT: udiv w11, w3, w15 2825; CHECK-SD-NEXT: msub w8, w25, w14, w13 2826; CHECK-SD-NEXT: ldp w14, w13, [sp, #32] // 8-byte Folded Reload 2827; CHECK-SD-NEXT: ldp x26, x25, [sp, #96] // 16-byte Folded Reload 2828; CHECK-SD-NEXT: mov v2.h[5], w8 2829; CHECK-SD-NEXT: msub w13, w13, w14, w4 2830; CHECK-SD-NEXT: mov v0.h[6], w13 2831; CHECK-SD-NEXT: udiv w12, w10, w9 2832; CHECK-SD-NEXT: msub w8, w11, w15, w3 2833; CHECK-SD-NEXT: ldr w11, [sp, #16] // 4-byte Folded Reload 2834; CHECK-SD-NEXT: msub w11, w11, w2, w6 2835; CHECK-SD-NEXT: mov v2.h[6], w8 2836; CHECK-SD-NEXT: mov v0.h[7], w11 2837; CHECK-SD-NEXT: msub w8, w12, w9, w10 2838; CHECK-SD-NEXT: mov v2.h[7], w8 2839; CHECK-SD-NEXT: mov v1.16b, v2.16b 2840; CHECK-SD-NEXT: add sp, sp, #160 2841; CHECK-SD-NEXT: ret 2842; 2843; CHECK-GI-LABEL: uv16i16: 2844; CHECK-GI: // %bb.0: // %entry 2845; CHECK-GI-NEXT: ushll v4.4s, v0.4h, #0 2846; CHECK-GI-NEXT: ushll v5.4s, v2.4h, #0 2847; CHECK-GI-NEXT: ushll v6.4s, v1.4h, #0 2848; CHECK-GI-NEXT: ushll v7.4s, v3.4h, #0 2849; CHECK-GI-NEXT: fmov w8, s4 2850; CHECK-GI-NEXT: fmov w9, s5 2851; CHECK-GI-NEXT: mov w12, v5.s[3] 2852; CHECK-GI-NEXT: fmov w17, s7 2853; CHECK-GI-NEXT: mov w18, v7.s[1] 2854; CHECK-GI-NEXT: mov w0, v7.s[2] 2855; CHECK-GI-NEXT: mov w1, v7.s[3] 2856; CHECK-GI-NEXT: ushll2 v7.4s, v3.8h, #0 2857; CHECK-GI-NEXT: ushll v3.4s, v3.4h, #0 2858; CHECK-GI-NEXT: udiv w11, w8, w9 2859; CHECK-GI-NEXT: mov w8, v4.s[1] 2860; CHECK-GI-NEXT: mov w9, v5.s[1] 2861; CHECK-GI-NEXT: fmov w2, s7 2862; CHECK-GI-NEXT: mov w3, v7.s[1] 2863; CHECK-GI-NEXT: mov w4, v7.s[2] 2864; CHECK-GI-NEXT: udiv w10, w8, w9 2865; CHECK-GI-NEXT: mov w8, v4.s[2] 2866; CHECK-GI-NEXT: mov w9, v5.s[2] 2867; CHECK-GI-NEXT: ushll2 v5.4s, v2.8h, #0 2868; CHECK-GI-NEXT: mov v16.s[0], w11 2869; CHECK-GI-NEXT: ushll v2.4s, v2.4h, #0 2870; CHECK-GI-NEXT: fmov w13, s5 2871; CHECK-GI-NEXT: mov w14, v5.s[1] 2872; CHECK-GI-NEXT: mov w15, v5.s[2] 2873; CHECK-GI-NEXT: mov w16, v5.s[3] 2874; CHECK-GI-NEXT: udiv w9, w8, w9 2875; CHECK-GI-NEXT: mov w8, v4.s[3] 2876; CHECK-GI-NEXT: ushll2 v4.4s, v0.8h, #0 2877; CHECK-GI-NEXT: mov v16.s[1], w10 2878; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 2879; CHECK-GI-NEXT: udiv w8, w8, w12 2880; CHECK-GI-NEXT: fmov w12, s4 2881; CHECK-GI-NEXT: mov v16.s[2], w9 2882; CHECK-GI-NEXT: udiv w13, w12, w13 2883; CHECK-GI-NEXT: mov w12, v4.s[1] 2884; CHECK-GI-NEXT: mov v16.s[3], w8 2885; CHECK-GI-NEXT: mls v0.4s, v16.4s, v2.4s 2886; CHECK-GI-NEXT: udiv w14, w12, w14 2887; CHECK-GI-NEXT: mov w12, v4.s[2] 2888; CHECK-GI-NEXT: mov v17.s[0], w13 2889; CHECK-GI-NEXT: mov w13, v7.s[3] 2890; CHECK-GI-NEXT: udiv w15, w12, w15 2891; CHECK-GI-NEXT: mov w12, v4.s[3] 2892; CHECK-GI-NEXT: mov v17.s[1], w14 2893; CHECK-GI-NEXT: udiv w12, w12, w16 2894; CHECK-GI-NEXT: fmov w16, s6 2895; CHECK-GI-NEXT: mov v17.s[2], w15 2896; CHECK-GI-NEXT: udiv w16, w16, w17 2897; CHECK-GI-NEXT: mov w17, v6.s[1] 2898; CHECK-GI-NEXT: mov v17.s[3], w12 2899; CHECK-GI-NEXT: mls v4.4s, v17.4s, v5.4s 2900; CHECK-GI-NEXT: udiv w17, w17, w18 2901; CHECK-GI-NEXT: mov w18, v6.s[2] 2902; CHECK-GI-NEXT: mov v18.s[0], w16 2903; CHECK-GI-NEXT: uzp1 v0.8h, v0.8h, v4.8h 2904; CHECK-GI-NEXT: udiv w18, w18, w0 2905; CHECK-GI-NEXT: mov w0, v6.s[3] 2906; CHECK-GI-NEXT: ushll2 v6.4s, v1.8h, #0 2907; CHECK-GI-NEXT: mov v18.s[1], w17 2908; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0 2909; CHECK-GI-NEXT: mov w11, v6.s[3] 2910; CHECK-GI-NEXT: udiv w0, w0, w1 2911; CHECK-GI-NEXT: fmov w1, s6 2912; CHECK-GI-NEXT: mov v18.s[2], w18 2913; CHECK-GI-NEXT: udiv w1, w1, w2 2914; CHECK-GI-NEXT: mov w2, v6.s[1] 2915; CHECK-GI-NEXT: mov v18.s[3], w0 2916; CHECK-GI-NEXT: mls v1.4s, v18.4s, v3.4s 2917; CHECK-GI-NEXT: udiv w2, w2, w3 2918; CHECK-GI-NEXT: mov w3, v6.s[2] 2919; CHECK-GI-NEXT: mov v19.s[0], w1 2920; CHECK-GI-NEXT: udiv w3, w3, w4 2921; CHECK-GI-NEXT: mov v19.s[1], w2 2922; CHECK-GI-NEXT: udiv w10, w11, w13 2923; CHECK-GI-NEXT: mov v19.s[2], w3 2924; CHECK-GI-NEXT: mov v19.s[3], w10 2925; CHECK-GI-NEXT: mls v6.4s, v19.4s, v7.4s 2926; CHECK-GI-NEXT: uzp1 v1.8h, v1.8h, v6.8h 2927; CHECK-GI-NEXT: ret 2928entry: 2929 %s = urem <16 x i16> %d, %e 2930 ret <16 x i16> %s 2931} 2932 2933define <2 x i32> @sv2i32(<2 x i32> %d, <2 x i32> %e) { 2934; CHECK-SD-LABEL: sv2i32: 2935; CHECK-SD: // %bb.0: // %entry 2936; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 2937; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 2938; CHECK-SD-NEXT: fmov w8, s1 2939; CHECK-SD-NEXT: fmov w9, s0 2940; CHECK-SD-NEXT: mov w11, v1.s[1] 2941; CHECK-SD-NEXT: mov w12, v0.s[1] 2942; CHECK-SD-NEXT: sdiv w10, w9, w8 2943; CHECK-SD-NEXT: sdiv w13, w12, w11 2944; CHECK-SD-NEXT: msub w8, w10, w8, w9 2945; CHECK-SD-NEXT: fmov s0, w8 2946; CHECK-SD-NEXT: msub w9, w13, w11, w12 2947; CHECK-SD-NEXT: mov v0.s[1], w9 2948; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 2949; CHECK-SD-NEXT: ret 2950; 2951; CHECK-GI-LABEL: sv2i32: 2952; CHECK-GI: // %bb.0: // %entry 2953; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 2954; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 2955; CHECK-GI-NEXT: fmov w8, s0 2956; CHECK-GI-NEXT: fmov w9, s1 2957; CHECK-GI-NEXT: mov w10, v1.s[1] 2958; CHECK-GI-NEXT: sdiv w8, w8, w9 2959; CHECK-GI-NEXT: mov w9, v0.s[1] 2960; CHECK-GI-NEXT: sdiv w9, w9, w10 2961; CHECK-GI-NEXT: mov v2.s[0], w8 2962; CHECK-GI-NEXT: mov v2.s[1], w9 2963; CHECK-GI-NEXT: mls v0.2s, v2.2s, v1.2s 2964; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 2965; CHECK-GI-NEXT: ret 2966entry: 2967 %s = srem <2 x i32> %d, %e 2968 ret <2 x i32> %s 2969} 2970 2971define <3 x i32> @sv3i32(<3 x i32> %d, <3 x i32> %e) { 2972; CHECK-SD-LABEL: sv3i32: 2973; CHECK-SD: // %bb.0: // %entry 2974; CHECK-SD-NEXT: fmov w11, s1 2975; CHECK-SD-NEXT: fmov w12, s0 2976; CHECK-SD-NEXT: mov w8, v1.s[1] 2977; CHECK-SD-NEXT: mov w9, v0.s[1] 2978; CHECK-SD-NEXT: mov w14, v1.s[2] 2979; CHECK-SD-NEXT: mov w15, v0.s[2] 2980; CHECK-SD-NEXT: sdiv w13, w12, w11 2981; CHECK-SD-NEXT: sdiv w10, w9, w8 2982; CHECK-SD-NEXT: msub w11, w13, w11, w12 2983; CHECK-SD-NEXT: fmov s0, w11 2984; CHECK-SD-NEXT: sdiv w16, w15, w14 2985; CHECK-SD-NEXT: msub w8, w10, w8, w9 2986; CHECK-SD-NEXT: mov v0.s[1], w8 2987; CHECK-SD-NEXT: msub w8, w16, w14, w15 2988; CHECK-SD-NEXT: mov v0.s[2], w8 2989; CHECK-SD-NEXT: ret 2990; 2991; CHECK-GI-LABEL: sv3i32: 2992; CHECK-GI: // %bb.0: // %entry 2993; CHECK-GI-NEXT: fmov w8, s0 2994; CHECK-GI-NEXT: fmov w9, s1 2995; CHECK-GI-NEXT: mov s2, v0.s[1] 2996; CHECK-GI-NEXT: mov s3, v1.s[1] 2997; CHECK-GI-NEXT: mov s0, v0.s[2] 2998; CHECK-GI-NEXT: mov s1, v1.s[2] 2999; CHECK-GI-NEXT: sdiv w10, w8, w9 3000; CHECK-GI-NEXT: fmov w11, s2 3001; CHECK-GI-NEXT: fmov w12, s3 3002; CHECK-GI-NEXT: fmov w14, s0 3003; CHECK-GI-NEXT: fmov w15, s1 3004; CHECK-GI-NEXT: sdiv w13, w11, w12 3005; CHECK-GI-NEXT: msub w8, w10, w9, w8 3006; CHECK-GI-NEXT: mov v0.s[0], w8 3007; CHECK-GI-NEXT: sdiv w9, w14, w15 3008; CHECK-GI-NEXT: msub w8, w13, w12, w11 3009; CHECK-GI-NEXT: mov v0.s[1], w8 3010; CHECK-GI-NEXT: msub w8, w9, w15, w14 3011; CHECK-GI-NEXT: mov v0.s[2], w8 3012; CHECK-GI-NEXT: ret 3013entry: 3014 %s = srem <3 x i32> %d, %e 3015 ret <3 x i32> %s 3016} 3017 3018define <4 x i32> @sv4i32(<4 x i32> %d, <4 x i32> %e) { 3019; CHECK-SD-LABEL: sv4i32: 3020; CHECK-SD: // %bb.0: // %entry 3021; CHECK-SD-NEXT: fmov w11, s1 3022; CHECK-SD-NEXT: fmov w12, s0 3023; CHECK-SD-NEXT: mov w8, v1.s[1] 3024; CHECK-SD-NEXT: mov w9, v0.s[1] 3025; CHECK-SD-NEXT: mov w14, v1.s[2] 3026; CHECK-SD-NEXT: mov w15, v0.s[2] 3027; CHECK-SD-NEXT: mov w17, v1.s[3] 3028; CHECK-SD-NEXT: mov w18, v0.s[3] 3029; CHECK-SD-NEXT: sdiv w13, w12, w11 3030; CHECK-SD-NEXT: sdiv w10, w9, w8 3031; CHECK-SD-NEXT: msub w11, w13, w11, w12 3032; CHECK-SD-NEXT: fmov s0, w11 3033; CHECK-SD-NEXT: sdiv w16, w15, w14 3034; CHECK-SD-NEXT: msub w8, w10, w8, w9 3035; CHECK-SD-NEXT: mov v0.s[1], w8 3036; CHECK-SD-NEXT: sdiv w9, w18, w17 3037; CHECK-SD-NEXT: msub w8, w16, w14, w15 3038; CHECK-SD-NEXT: mov v0.s[2], w8 3039; CHECK-SD-NEXT: msub w8, w9, w17, w18 3040; CHECK-SD-NEXT: mov v0.s[3], w8 3041; CHECK-SD-NEXT: ret 3042; 3043; CHECK-GI-LABEL: sv4i32: 3044; CHECK-GI: // %bb.0: // %entry 3045; CHECK-GI-NEXT: fmov w8, s0 3046; CHECK-GI-NEXT: fmov w9, s1 3047; CHECK-GI-NEXT: mov w10, v1.s[1] 3048; CHECK-GI-NEXT: mov w11, v1.s[2] 3049; CHECK-GI-NEXT: mov w12, v1.s[3] 3050; CHECK-GI-NEXT: sdiv w8, w8, w9 3051; CHECK-GI-NEXT: mov w9, v0.s[1] 3052; CHECK-GI-NEXT: sdiv w9, w9, w10 3053; CHECK-GI-NEXT: mov w10, v0.s[2] 3054; CHECK-GI-NEXT: mov v2.s[0], w8 3055; CHECK-GI-NEXT: sdiv w10, w10, w11 3056; CHECK-GI-NEXT: mov w11, v0.s[3] 3057; CHECK-GI-NEXT: mov v2.s[1], w9 3058; CHECK-GI-NEXT: sdiv w8, w11, w12 3059; CHECK-GI-NEXT: mov v2.s[2], w10 3060; CHECK-GI-NEXT: mov v2.s[3], w8 3061; CHECK-GI-NEXT: mls v0.4s, v2.4s, v1.4s 3062; CHECK-GI-NEXT: ret 3063entry: 3064 %s = srem <4 x i32> %d, %e 3065 ret <4 x i32> %s 3066} 3067 3068define <8 x i32> @sv8i32(<8 x i32> %d, <8 x i32> %e) { 3069; CHECK-SD-LABEL: sv8i32: 3070; CHECK-SD: // %bb.0: // %entry 3071; CHECK-SD-NEXT: stp x22, x21, [sp, #-32]! // 16-byte Folded Spill 3072; CHECK-SD-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill 3073; CHECK-SD-NEXT: .cfi_def_cfa_offset 32 3074; CHECK-SD-NEXT: .cfi_offset w19, -8 3075; CHECK-SD-NEXT: .cfi_offset w20, -16 3076; CHECK-SD-NEXT: .cfi_offset w21, -24 3077; CHECK-SD-NEXT: .cfi_offset w22, -32 3078; CHECK-SD-NEXT: mov w8, v2.s[1] 3079; CHECK-SD-NEXT: mov w9, v0.s[1] 3080; CHECK-SD-NEXT: fmov w11, s2 3081; CHECK-SD-NEXT: fmov w12, s0 3082; CHECK-SD-NEXT: fmov w4, s3 3083; CHECK-SD-NEXT: fmov w5, s1 3084; CHECK-SD-NEXT: mov w1, v3.s[1] 3085; CHECK-SD-NEXT: mov w2, v1.s[1] 3086; CHECK-SD-NEXT: mov w14, v2.s[2] 3087; CHECK-SD-NEXT: mov w15, v0.s[2] 3088; CHECK-SD-NEXT: mov w7, v3.s[2] 3089; CHECK-SD-NEXT: mov w19, v1.s[2] 3090; CHECK-SD-NEXT: sdiv w10, w9, w8 3091; CHECK-SD-NEXT: mov w17, v2.s[3] 3092; CHECK-SD-NEXT: mov w18, v0.s[3] 3093; CHECK-SD-NEXT: mov w21, v3.s[3] 3094; CHECK-SD-NEXT: mov w22, v1.s[3] 3095; CHECK-SD-NEXT: sdiv w13, w12, w11 3096; CHECK-SD-NEXT: msub w8, w10, w8, w9 3097; CHECK-SD-NEXT: sdiv w6, w5, w4 3098; CHECK-SD-NEXT: msub w9, w13, w11, w12 3099; CHECK-SD-NEXT: fmov s0, w9 3100; CHECK-SD-NEXT: mov v0.s[1], w8 3101; CHECK-SD-NEXT: sdiv w3, w2, w1 3102; CHECK-SD-NEXT: msub w10, w6, w4, w5 3103; CHECK-SD-NEXT: fmov s1, w10 3104; CHECK-SD-NEXT: sdiv w16, w15, w14 3105; CHECK-SD-NEXT: msub w11, w3, w1, w2 3106; CHECK-SD-NEXT: mov v1.s[1], w11 3107; CHECK-SD-NEXT: sdiv w20, w19, w7 3108; CHECK-SD-NEXT: msub w9, w16, w14, w15 3109; CHECK-SD-NEXT: mov v0.s[2], w9 3110; CHECK-SD-NEXT: sdiv w0, w18, w17 3111; CHECK-SD-NEXT: msub w8, w20, w7, w19 3112; CHECK-SD-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload 3113; CHECK-SD-NEXT: mov v1.s[2], w8 3114; CHECK-SD-NEXT: sdiv w12, w22, w21 3115; CHECK-SD-NEXT: msub w10, w0, w17, w18 3116; CHECK-SD-NEXT: mov v0.s[3], w10 3117; CHECK-SD-NEXT: msub w8, w12, w21, w22 3118; CHECK-SD-NEXT: mov v1.s[3], w8 3119; CHECK-SD-NEXT: ldp x22, x21, [sp], #32 // 16-byte Folded Reload 3120; CHECK-SD-NEXT: ret 3121; 3122; CHECK-GI-LABEL: sv8i32: 3123; CHECK-GI: // %bb.0: // %entry 3124; CHECK-GI-NEXT: fmov w8, s0 3125; CHECK-GI-NEXT: fmov w9, s2 3126; CHECK-GI-NEXT: mov w10, v2.s[1] 3127; CHECK-GI-NEXT: mov w11, v2.s[2] 3128; CHECK-GI-NEXT: mov w12, v2.s[3] 3129; CHECK-GI-NEXT: fmov w13, s3 3130; CHECK-GI-NEXT: mov w14, v3.s[1] 3131; CHECK-GI-NEXT: mov w15, v3.s[2] 3132; CHECK-GI-NEXT: sdiv w8, w8, w9 3133; CHECK-GI-NEXT: mov w9, v0.s[1] 3134; CHECK-GI-NEXT: sdiv w9, w9, w10 3135; CHECK-GI-NEXT: mov w10, v0.s[2] 3136; CHECK-GI-NEXT: mov v4.s[0], w8 3137; CHECK-GI-NEXT: mov w8, v1.s[3] 3138; CHECK-GI-NEXT: sdiv w10, w10, w11 3139; CHECK-GI-NEXT: mov w11, v0.s[3] 3140; CHECK-GI-NEXT: mov v4.s[1], w9 3141; CHECK-GI-NEXT: sdiv w11, w11, w12 3142; CHECK-GI-NEXT: fmov w12, s1 3143; CHECK-GI-NEXT: mov v4.s[2], w10 3144; CHECK-GI-NEXT: sdiv w12, w12, w13 3145; CHECK-GI-NEXT: mov w13, v1.s[1] 3146; CHECK-GI-NEXT: mov v4.s[3], w11 3147; CHECK-GI-NEXT: mls v0.4s, v4.4s, v2.4s 3148; CHECK-GI-NEXT: sdiv w13, w13, w14 3149; CHECK-GI-NEXT: mov w14, v1.s[2] 3150; CHECK-GI-NEXT: mov v5.s[0], w12 3151; CHECK-GI-NEXT: mov w12, v3.s[3] 3152; CHECK-GI-NEXT: sdiv w14, w14, w15 3153; CHECK-GI-NEXT: mov v5.s[1], w13 3154; CHECK-GI-NEXT: sdiv w8, w8, w12 3155; CHECK-GI-NEXT: mov v5.s[2], w14 3156; CHECK-GI-NEXT: mov v5.s[3], w8 3157; CHECK-GI-NEXT: mls v1.4s, v5.4s, v3.4s 3158; CHECK-GI-NEXT: ret 3159entry: 3160 %s = srem <8 x i32> %d, %e 3161 ret <8 x i32> %s 3162} 3163 3164define <2 x i32> @uv2i32(<2 x i32> %d, <2 x i32> %e) { 3165; CHECK-SD-LABEL: uv2i32: 3166; CHECK-SD: // %bb.0: // %entry 3167; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 3168; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 3169; CHECK-SD-NEXT: fmov w8, s1 3170; CHECK-SD-NEXT: fmov w9, s0 3171; CHECK-SD-NEXT: mov w11, v1.s[1] 3172; CHECK-SD-NEXT: mov w12, v0.s[1] 3173; CHECK-SD-NEXT: udiv w10, w9, w8 3174; CHECK-SD-NEXT: udiv w13, w12, w11 3175; CHECK-SD-NEXT: msub w8, w10, w8, w9 3176; CHECK-SD-NEXT: fmov s0, w8 3177; CHECK-SD-NEXT: msub w9, w13, w11, w12 3178; CHECK-SD-NEXT: mov v0.s[1], w9 3179; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 3180; CHECK-SD-NEXT: ret 3181; 3182; CHECK-GI-LABEL: uv2i32: 3183; CHECK-GI: // %bb.0: // %entry 3184; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 3185; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 3186; CHECK-GI-NEXT: fmov w8, s0 3187; CHECK-GI-NEXT: fmov w9, s1 3188; CHECK-GI-NEXT: mov w10, v1.s[1] 3189; CHECK-GI-NEXT: udiv w8, w8, w9 3190; CHECK-GI-NEXT: mov w9, v0.s[1] 3191; CHECK-GI-NEXT: udiv w9, w9, w10 3192; CHECK-GI-NEXT: mov v2.s[0], w8 3193; CHECK-GI-NEXT: mov v2.s[1], w9 3194; CHECK-GI-NEXT: mls v0.2s, v2.2s, v1.2s 3195; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 3196; CHECK-GI-NEXT: ret 3197entry: 3198 %s = urem <2 x i32> %d, %e 3199 ret <2 x i32> %s 3200} 3201 3202define <3 x i32> @uv3i32(<3 x i32> %d, <3 x i32> %e) { 3203; CHECK-SD-LABEL: uv3i32: 3204; CHECK-SD: // %bb.0: // %entry 3205; CHECK-SD-NEXT: fmov w11, s1 3206; CHECK-SD-NEXT: fmov w12, s0 3207; CHECK-SD-NEXT: mov w8, v1.s[1] 3208; CHECK-SD-NEXT: mov w9, v0.s[1] 3209; CHECK-SD-NEXT: mov w14, v1.s[2] 3210; CHECK-SD-NEXT: mov w15, v0.s[2] 3211; CHECK-SD-NEXT: udiv w13, w12, w11 3212; CHECK-SD-NEXT: udiv w10, w9, w8 3213; CHECK-SD-NEXT: msub w11, w13, w11, w12 3214; CHECK-SD-NEXT: fmov s0, w11 3215; CHECK-SD-NEXT: udiv w16, w15, w14 3216; CHECK-SD-NEXT: msub w8, w10, w8, w9 3217; CHECK-SD-NEXT: mov v0.s[1], w8 3218; CHECK-SD-NEXT: msub w8, w16, w14, w15 3219; CHECK-SD-NEXT: mov v0.s[2], w8 3220; CHECK-SD-NEXT: ret 3221; 3222; CHECK-GI-LABEL: uv3i32: 3223; CHECK-GI: // %bb.0: // %entry 3224; CHECK-GI-NEXT: fmov w8, s0 3225; CHECK-GI-NEXT: fmov w9, s1 3226; CHECK-GI-NEXT: mov s2, v0.s[1] 3227; CHECK-GI-NEXT: mov s3, v1.s[1] 3228; CHECK-GI-NEXT: mov s0, v0.s[2] 3229; CHECK-GI-NEXT: mov s1, v1.s[2] 3230; CHECK-GI-NEXT: udiv w10, w8, w9 3231; CHECK-GI-NEXT: fmov w11, s2 3232; CHECK-GI-NEXT: fmov w12, s3 3233; CHECK-GI-NEXT: fmov w14, s0 3234; CHECK-GI-NEXT: fmov w15, s1 3235; CHECK-GI-NEXT: udiv w13, w11, w12 3236; CHECK-GI-NEXT: msub w8, w10, w9, w8 3237; CHECK-GI-NEXT: mov v0.s[0], w8 3238; CHECK-GI-NEXT: udiv w9, w14, w15 3239; CHECK-GI-NEXT: msub w8, w13, w12, w11 3240; CHECK-GI-NEXT: mov v0.s[1], w8 3241; CHECK-GI-NEXT: msub w8, w9, w15, w14 3242; CHECK-GI-NEXT: mov v0.s[2], w8 3243; CHECK-GI-NEXT: ret 3244entry: 3245 %s = urem <3 x i32> %d, %e 3246 ret <3 x i32> %s 3247} 3248 3249define <4 x i32> @uv4i32(<4 x i32> %d, <4 x i32> %e) { 3250; CHECK-SD-LABEL: uv4i32: 3251; CHECK-SD: // %bb.0: // %entry 3252; CHECK-SD-NEXT: fmov w11, s1 3253; CHECK-SD-NEXT: fmov w12, s0 3254; CHECK-SD-NEXT: mov w8, v1.s[1] 3255; CHECK-SD-NEXT: mov w9, v0.s[1] 3256; CHECK-SD-NEXT: mov w14, v1.s[2] 3257; CHECK-SD-NEXT: mov w15, v0.s[2] 3258; CHECK-SD-NEXT: mov w17, v1.s[3] 3259; CHECK-SD-NEXT: mov w18, v0.s[3] 3260; CHECK-SD-NEXT: udiv w13, w12, w11 3261; CHECK-SD-NEXT: udiv w10, w9, w8 3262; CHECK-SD-NEXT: msub w11, w13, w11, w12 3263; CHECK-SD-NEXT: fmov s0, w11 3264; CHECK-SD-NEXT: udiv w16, w15, w14 3265; CHECK-SD-NEXT: msub w8, w10, w8, w9 3266; CHECK-SD-NEXT: mov v0.s[1], w8 3267; CHECK-SD-NEXT: udiv w9, w18, w17 3268; CHECK-SD-NEXT: msub w8, w16, w14, w15 3269; CHECK-SD-NEXT: mov v0.s[2], w8 3270; CHECK-SD-NEXT: msub w8, w9, w17, w18 3271; CHECK-SD-NEXT: mov v0.s[3], w8 3272; CHECK-SD-NEXT: ret 3273; 3274; CHECK-GI-LABEL: uv4i32: 3275; CHECK-GI: // %bb.0: // %entry 3276; CHECK-GI-NEXT: fmov w8, s0 3277; CHECK-GI-NEXT: fmov w9, s1 3278; CHECK-GI-NEXT: mov w10, v1.s[1] 3279; CHECK-GI-NEXT: mov w11, v1.s[2] 3280; CHECK-GI-NEXT: mov w12, v1.s[3] 3281; CHECK-GI-NEXT: udiv w8, w8, w9 3282; CHECK-GI-NEXT: mov w9, v0.s[1] 3283; CHECK-GI-NEXT: udiv w9, w9, w10 3284; CHECK-GI-NEXT: mov w10, v0.s[2] 3285; CHECK-GI-NEXT: mov v2.s[0], w8 3286; CHECK-GI-NEXT: udiv w10, w10, w11 3287; CHECK-GI-NEXT: mov w11, v0.s[3] 3288; CHECK-GI-NEXT: mov v2.s[1], w9 3289; CHECK-GI-NEXT: udiv w8, w11, w12 3290; CHECK-GI-NEXT: mov v2.s[2], w10 3291; CHECK-GI-NEXT: mov v2.s[3], w8 3292; CHECK-GI-NEXT: mls v0.4s, v2.4s, v1.4s 3293; CHECK-GI-NEXT: ret 3294entry: 3295 %s = urem <4 x i32> %d, %e 3296 ret <4 x i32> %s 3297} 3298 3299define <8 x i32> @uv8i32(<8 x i32> %d, <8 x i32> %e) { 3300; CHECK-SD-LABEL: uv8i32: 3301; CHECK-SD: // %bb.0: // %entry 3302; CHECK-SD-NEXT: stp x22, x21, [sp, #-32]! // 16-byte Folded Spill 3303; CHECK-SD-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill 3304; CHECK-SD-NEXT: .cfi_def_cfa_offset 32 3305; CHECK-SD-NEXT: .cfi_offset w19, -8 3306; CHECK-SD-NEXT: .cfi_offset w20, -16 3307; CHECK-SD-NEXT: .cfi_offset w21, -24 3308; CHECK-SD-NEXT: .cfi_offset w22, -32 3309; CHECK-SD-NEXT: mov w8, v2.s[1] 3310; CHECK-SD-NEXT: mov w9, v0.s[1] 3311; CHECK-SD-NEXT: fmov w11, s2 3312; CHECK-SD-NEXT: fmov w12, s0 3313; CHECK-SD-NEXT: fmov w4, s3 3314; CHECK-SD-NEXT: fmov w5, s1 3315; CHECK-SD-NEXT: mov w1, v3.s[1] 3316; CHECK-SD-NEXT: mov w2, v1.s[1] 3317; CHECK-SD-NEXT: mov w14, v2.s[2] 3318; CHECK-SD-NEXT: mov w15, v0.s[2] 3319; CHECK-SD-NEXT: mov w7, v3.s[2] 3320; CHECK-SD-NEXT: mov w19, v1.s[2] 3321; CHECK-SD-NEXT: udiv w10, w9, w8 3322; CHECK-SD-NEXT: mov w17, v2.s[3] 3323; CHECK-SD-NEXT: mov w18, v0.s[3] 3324; CHECK-SD-NEXT: mov w21, v3.s[3] 3325; CHECK-SD-NEXT: mov w22, v1.s[3] 3326; CHECK-SD-NEXT: udiv w13, w12, w11 3327; CHECK-SD-NEXT: msub w8, w10, w8, w9 3328; CHECK-SD-NEXT: udiv w6, w5, w4 3329; CHECK-SD-NEXT: msub w9, w13, w11, w12 3330; CHECK-SD-NEXT: fmov s0, w9 3331; CHECK-SD-NEXT: mov v0.s[1], w8 3332; CHECK-SD-NEXT: udiv w3, w2, w1 3333; CHECK-SD-NEXT: msub w10, w6, w4, w5 3334; CHECK-SD-NEXT: fmov s1, w10 3335; CHECK-SD-NEXT: udiv w16, w15, w14 3336; CHECK-SD-NEXT: msub w11, w3, w1, w2 3337; CHECK-SD-NEXT: mov v1.s[1], w11 3338; CHECK-SD-NEXT: udiv w20, w19, w7 3339; CHECK-SD-NEXT: msub w9, w16, w14, w15 3340; CHECK-SD-NEXT: mov v0.s[2], w9 3341; CHECK-SD-NEXT: udiv w0, w18, w17 3342; CHECK-SD-NEXT: msub w8, w20, w7, w19 3343; CHECK-SD-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload 3344; CHECK-SD-NEXT: mov v1.s[2], w8 3345; CHECK-SD-NEXT: udiv w12, w22, w21 3346; CHECK-SD-NEXT: msub w10, w0, w17, w18 3347; CHECK-SD-NEXT: mov v0.s[3], w10 3348; CHECK-SD-NEXT: msub w8, w12, w21, w22 3349; CHECK-SD-NEXT: mov v1.s[3], w8 3350; CHECK-SD-NEXT: ldp x22, x21, [sp], #32 // 16-byte Folded Reload 3351; CHECK-SD-NEXT: ret 3352; 3353; CHECK-GI-LABEL: uv8i32: 3354; CHECK-GI: // %bb.0: // %entry 3355; CHECK-GI-NEXT: fmov w8, s0 3356; CHECK-GI-NEXT: fmov w9, s2 3357; CHECK-GI-NEXT: mov w10, v2.s[1] 3358; CHECK-GI-NEXT: mov w11, v2.s[2] 3359; CHECK-GI-NEXT: mov w12, v2.s[3] 3360; CHECK-GI-NEXT: fmov w13, s3 3361; CHECK-GI-NEXT: mov w14, v3.s[1] 3362; CHECK-GI-NEXT: mov w15, v3.s[2] 3363; CHECK-GI-NEXT: udiv w8, w8, w9 3364; CHECK-GI-NEXT: mov w9, v0.s[1] 3365; CHECK-GI-NEXT: udiv w9, w9, w10 3366; CHECK-GI-NEXT: mov w10, v0.s[2] 3367; CHECK-GI-NEXT: mov v4.s[0], w8 3368; CHECK-GI-NEXT: mov w8, v1.s[3] 3369; CHECK-GI-NEXT: udiv w10, w10, w11 3370; CHECK-GI-NEXT: mov w11, v0.s[3] 3371; CHECK-GI-NEXT: mov v4.s[1], w9 3372; CHECK-GI-NEXT: udiv w11, w11, w12 3373; CHECK-GI-NEXT: fmov w12, s1 3374; CHECK-GI-NEXT: mov v4.s[2], w10 3375; CHECK-GI-NEXT: udiv w12, w12, w13 3376; CHECK-GI-NEXT: mov w13, v1.s[1] 3377; CHECK-GI-NEXT: mov v4.s[3], w11 3378; CHECK-GI-NEXT: mls v0.4s, v4.4s, v2.4s 3379; CHECK-GI-NEXT: udiv w13, w13, w14 3380; CHECK-GI-NEXT: mov w14, v1.s[2] 3381; CHECK-GI-NEXT: mov v5.s[0], w12 3382; CHECK-GI-NEXT: mov w12, v3.s[3] 3383; CHECK-GI-NEXT: udiv w14, w14, w15 3384; CHECK-GI-NEXT: mov v5.s[1], w13 3385; CHECK-GI-NEXT: udiv w8, w8, w12 3386; CHECK-GI-NEXT: mov v5.s[2], w14 3387; CHECK-GI-NEXT: mov v5.s[3], w8 3388; CHECK-GI-NEXT: mls v1.4s, v5.4s, v3.4s 3389; CHECK-GI-NEXT: ret 3390entry: 3391 %s = urem <8 x i32> %d, %e 3392 ret <8 x i32> %s 3393} 3394 3395define <2 x i64> @sv2i64(<2 x i64> %d, <2 x i64> %e) { 3396; CHECK-SD-LABEL: sv2i64: 3397; CHECK-SD: // %bb.0: // %entry 3398; CHECK-SD-NEXT: fmov x8, d1 3399; CHECK-SD-NEXT: fmov x9, d0 3400; CHECK-SD-NEXT: mov x11, v1.d[1] 3401; CHECK-SD-NEXT: mov x12, v0.d[1] 3402; CHECK-SD-NEXT: sdiv x10, x9, x8 3403; CHECK-SD-NEXT: sdiv x13, x12, x11 3404; CHECK-SD-NEXT: msub x8, x10, x8, x9 3405; CHECK-SD-NEXT: fmov d0, x8 3406; CHECK-SD-NEXT: msub x9, x13, x11, x12 3407; CHECK-SD-NEXT: mov v0.d[1], x9 3408; CHECK-SD-NEXT: ret 3409; 3410; CHECK-GI-LABEL: sv2i64: 3411; CHECK-GI: // %bb.0: // %entry 3412; CHECK-GI-NEXT: fmov x8, d0 3413; CHECK-GI-NEXT: fmov x9, d1 3414; CHECK-GI-NEXT: mov x10, v1.d[1] 3415; CHECK-GI-NEXT: mov x11, v0.d[1] 3416; CHECK-GI-NEXT: sdiv x8, x8, x9 3417; CHECK-GI-NEXT: sdiv x11, x11, x10 3418; CHECK-GI-NEXT: mov v1.d[0], x8 3419; CHECK-GI-NEXT: mov v1.d[1], x11 3420; CHECK-GI-NEXT: fmov x8, d1 3421; CHECK-GI-NEXT: mov x11, v1.d[1] 3422; CHECK-GI-NEXT: mul x8, x8, x9 3423; CHECK-GI-NEXT: mul x9, x11, x10 3424; CHECK-GI-NEXT: mov v1.d[0], x8 3425; CHECK-GI-NEXT: mov v1.d[1], x9 3426; CHECK-GI-NEXT: sub v0.2d, v0.2d, v1.2d 3427; CHECK-GI-NEXT: ret 3428entry: 3429 %s = srem <2 x i64> %d, %e 3430 ret <2 x i64> %s 3431} 3432 3433define <3 x i64> @sv3i64(<3 x i64> %d, <3 x i64> %e) { 3434; CHECK-SD-LABEL: sv3i64: 3435; CHECK-SD: // %bb.0: // %entry 3436; CHECK-SD-NEXT: // kill: def $d5 killed $d5 def $q5 3437; CHECK-SD-NEXT: // kill: def $d4 killed $d4 def $q4 3438; CHECK-SD-NEXT: // kill: def $d3 killed $d3 def $q3 3439; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2 3440; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 3441; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 3442; CHECK-SD-NEXT: fmov x8, d3 3443; CHECK-SD-NEXT: fmov x9, d0 3444; CHECK-SD-NEXT: fmov x11, d4 3445; CHECK-SD-NEXT: fmov x12, d1 3446; CHECK-SD-NEXT: fmov x14, d5 3447; CHECK-SD-NEXT: fmov x15, d2 3448; CHECK-SD-NEXT: sdiv x10, x9, x8 3449; CHECK-SD-NEXT: sdiv x13, x12, x11 3450; CHECK-SD-NEXT: msub x8, x10, x8, x9 3451; CHECK-SD-NEXT: fmov d0, x8 3452; CHECK-SD-NEXT: sdiv x16, x15, x14 3453; CHECK-SD-NEXT: msub x9, x13, x11, x12 3454; CHECK-SD-NEXT: fmov d1, x9 3455; CHECK-SD-NEXT: msub x10, x16, x14, x15 3456; CHECK-SD-NEXT: fmov d2, x10 3457; CHECK-SD-NEXT: ret 3458; 3459; CHECK-GI-LABEL: sv3i64: 3460; CHECK-GI: // %bb.0: // %entry 3461; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 3462; CHECK-GI-NEXT: // kill: def $d3 killed $d3 def $q3 3463; CHECK-GI-NEXT: fmov x8, d0 3464; CHECK-GI-NEXT: fmov x9, d3 3465; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 3466; CHECK-GI-NEXT: // kill: def $d4 killed $d4 def $q4 3467; CHECK-GI-NEXT: fmov x10, d4 3468; CHECK-GI-NEXT: mov v3.d[1], v4.d[0] 3469; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] 3470; CHECK-GI-NEXT: sdiv x8, x8, x9 3471; CHECK-GI-NEXT: fmov x9, d1 3472; CHECK-GI-NEXT: fmov x11, d3 3473; CHECK-GI-NEXT: mov x14, v3.d[1] 3474; CHECK-GI-NEXT: sdiv x9, x9, x10 3475; CHECK-GI-NEXT: mov v6.d[0], x8 3476; CHECK-GI-NEXT: fmov x8, d2 3477; CHECK-GI-NEXT: mov v6.d[1], x9 3478; CHECK-GI-NEXT: fmov x9, d5 3479; CHECK-GI-NEXT: sdiv x12, x8, x9 3480; CHECK-GI-NEXT: fmov x10, d6 3481; CHECK-GI-NEXT: mov x13, v6.d[1] 3482; CHECK-GI-NEXT: mul x10, x10, x11 3483; CHECK-GI-NEXT: mul x11, x13, x14 3484; CHECK-GI-NEXT: mov v2.d[0], x10 3485; CHECK-GI-NEXT: mov v2.d[1], x11 3486; CHECK-GI-NEXT: msub x8, x12, x9, x8 3487; CHECK-GI-NEXT: sub v0.2d, v0.2d, v2.2d 3488; CHECK-GI-NEXT: mov d1, v0.d[1] 3489; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 3490; CHECK-GI-NEXT: fmov d2, x8 3491; CHECK-GI-NEXT: ret 3492entry: 3493 %s = srem <3 x i64> %d, %e 3494 ret <3 x i64> %s 3495} 3496 3497define <4 x i64> @sv4i64(<4 x i64> %d, <4 x i64> %e) { 3498; CHECK-SD-LABEL: sv4i64: 3499; CHECK-SD: // %bb.0: // %entry 3500; CHECK-SD-NEXT: mov x8, v2.d[1] 3501; CHECK-SD-NEXT: mov x9, v0.d[1] 3502; CHECK-SD-NEXT: fmov x11, d2 3503; CHECK-SD-NEXT: fmov x12, d0 3504; CHECK-SD-NEXT: fmov x14, d3 3505; CHECK-SD-NEXT: fmov x15, d1 3506; CHECK-SD-NEXT: mov x17, v3.d[1] 3507; CHECK-SD-NEXT: mov x18, v1.d[1] 3508; CHECK-SD-NEXT: sdiv x10, x9, x8 3509; CHECK-SD-NEXT: sdiv x13, x12, x11 3510; CHECK-SD-NEXT: msub x8, x10, x8, x9 3511; CHECK-SD-NEXT: sdiv x16, x15, x14 3512; CHECK-SD-NEXT: msub x9, x13, x11, x12 3513; CHECK-SD-NEXT: fmov d0, x9 3514; CHECK-SD-NEXT: mov v0.d[1], x8 3515; CHECK-SD-NEXT: sdiv x0, x18, x17 3516; CHECK-SD-NEXT: msub x10, x16, x14, x15 3517; CHECK-SD-NEXT: fmov d1, x10 3518; CHECK-SD-NEXT: msub x11, x0, x17, x18 3519; CHECK-SD-NEXT: mov v1.d[1], x11 3520; CHECK-SD-NEXT: ret 3521; 3522; CHECK-GI-LABEL: sv4i64: 3523; CHECK-GI: // %bb.0: // %entry 3524; CHECK-GI-NEXT: fmov x8, d0 3525; CHECK-GI-NEXT: fmov x9, d2 3526; CHECK-GI-NEXT: mov x10, v2.d[1] 3527; CHECK-GI-NEXT: mov x11, v0.d[1] 3528; CHECK-GI-NEXT: fmov x12, d1 3529; CHECK-GI-NEXT: fmov x13, d3 3530; CHECK-GI-NEXT: mov x14, v3.d[1] 3531; CHECK-GI-NEXT: mov x15, v1.d[1] 3532; CHECK-GI-NEXT: sdiv x8, x8, x9 3533; CHECK-GI-NEXT: sdiv x12, x12, x13 3534; CHECK-GI-NEXT: mov v2.d[0], x8 3535; CHECK-GI-NEXT: sdiv x11, x11, x10 3536; CHECK-GI-NEXT: mov v3.d[0], x12 3537; CHECK-GI-NEXT: sdiv x15, x15, x14 3538; CHECK-GI-NEXT: mov v2.d[1], x11 3539; CHECK-GI-NEXT: fmov x8, d2 3540; CHECK-GI-NEXT: mov x11, v2.d[1] 3541; CHECK-GI-NEXT: mul x8, x8, x9 3542; CHECK-GI-NEXT: mul x10, x11, x10 3543; CHECK-GI-NEXT: mov v2.d[0], x8 3544; CHECK-GI-NEXT: mov v3.d[1], x15 3545; CHECK-GI-NEXT: mov v2.d[1], x10 3546; CHECK-GI-NEXT: fmov x9, d3 3547; CHECK-GI-NEXT: mov x12, v3.d[1] 3548; CHECK-GI-NEXT: sub v0.2d, v0.2d, v2.2d 3549; CHECK-GI-NEXT: mul x9, x9, x13 3550; CHECK-GI-NEXT: mul x11, x12, x14 3551; CHECK-GI-NEXT: mov v3.d[0], x9 3552; CHECK-GI-NEXT: mov v3.d[1], x11 3553; CHECK-GI-NEXT: sub v1.2d, v1.2d, v3.2d 3554; CHECK-GI-NEXT: ret 3555entry: 3556 %s = srem <4 x i64> %d, %e 3557 ret <4 x i64> %s 3558} 3559 3560define <2 x i64> @uv2i64(<2 x i64> %d, <2 x i64> %e) { 3561; CHECK-SD-LABEL: uv2i64: 3562; CHECK-SD: // %bb.0: // %entry 3563; CHECK-SD-NEXT: fmov x8, d1 3564; CHECK-SD-NEXT: fmov x9, d0 3565; CHECK-SD-NEXT: mov x11, v1.d[1] 3566; CHECK-SD-NEXT: mov x12, v0.d[1] 3567; CHECK-SD-NEXT: udiv x10, x9, x8 3568; CHECK-SD-NEXT: udiv x13, x12, x11 3569; CHECK-SD-NEXT: msub x8, x10, x8, x9 3570; CHECK-SD-NEXT: fmov d0, x8 3571; CHECK-SD-NEXT: msub x9, x13, x11, x12 3572; CHECK-SD-NEXT: mov v0.d[1], x9 3573; CHECK-SD-NEXT: ret 3574; 3575; CHECK-GI-LABEL: uv2i64: 3576; CHECK-GI: // %bb.0: // %entry 3577; CHECK-GI-NEXT: fmov x8, d0 3578; CHECK-GI-NEXT: fmov x9, d1 3579; CHECK-GI-NEXT: mov x10, v1.d[1] 3580; CHECK-GI-NEXT: mov x11, v0.d[1] 3581; CHECK-GI-NEXT: udiv x8, x8, x9 3582; CHECK-GI-NEXT: udiv x11, x11, x10 3583; CHECK-GI-NEXT: mov v1.d[0], x8 3584; CHECK-GI-NEXT: mov v1.d[1], x11 3585; CHECK-GI-NEXT: fmov x8, d1 3586; CHECK-GI-NEXT: mov x11, v1.d[1] 3587; CHECK-GI-NEXT: mul x8, x8, x9 3588; CHECK-GI-NEXT: mul x9, x11, x10 3589; CHECK-GI-NEXT: mov v1.d[0], x8 3590; CHECK-GI-NEXT: mov v1.d[1], x9 3591; CHECK-GI-NEXT: sub v0.2d, v0.2d, v1.2d 3592; CHECK-GI-NEXT: ret 3593entry: 3594 %s = urem <2 x i64> %d, %e 3595 ret <2 x i64> %s 3596} 3597 3598define <3 x i64> @uv3i64(<3 x i64> %d, <3 x i64> %e) { 3599; CHECK-SD-LABEL: uv3i64: 3600; CHECK-SD: // %bb.0: // %entry 3601; CHECK-SD-NEXT: // kill: def $d5 killed $d5 def $q5 3602; CHECK-SD-NEXT: // kill: def $d4 killed $d4 def $q4 3603; CHECK-SD-NEXT: // kill: def $d3 killed $d3 def $q3 3604; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2 3605; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 3606; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 3607; CHECK-SD-NEXT: fmov x8, d3 3608; CHECK-SD-NEXT: fmov x9, d0 3609; CHECK-SD-NEXT: fmov x11, d4 3610; CHECK-SD-NEXT: fmov x12, d1 3611; CHECK-SD-NEXT: fmov x14, d5 3612; CHECK-SD-NEXT: fmov x15, d2 3613; CHECK-SD-NEXT: udiv x10, x9, x8 3614; CHECK-SD-NEXT: udiv x13, x12, x11 3615; CHECK-SD-NEXT: msub x8, x10, x8, x9 3616; CHECK-SD-NEXT: fmov d0, x8 3617; CHECK-SD-NEXT: udiv x16, x15, x14 3618; CHECK-SD-NEXT: msub x9, x13, x11, x12 3619; CHECK-SD-NEXT: fmov d1, x9 3620; CHECK-SD-NEXT: msub x10, x16, x14, x15 3621; CHECK-SD-NEXT: fmov d2, x10 3622; CHECK-SD-NEXT: ret 3623; 3624; CHECK-GI-LABEL: uv3i64: 3625; CHECK-GI: // %bb.0: // %entry 3626; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 3627; CHECK-GI-NEXT: // kill: def $d3 killed $d3 def $q3 3628; CHECK-GI-NEXT: fmov x8, d0 3629; CHECK-GI-NEXT: fmov x9, d3 3630; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 3631; CHECK-GI-NEXT: // kill: def $d4 killed $d4 def $q4 3632; CHECK-GI-NEXT: fmov x10, d4 3633; CHECK-GI-NEXT: mov v3.d[1], v4.d[0] 3634; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] 3635; CHECK-GI-NEXT: udiv x8, x8, x9 3636; CHECK-GI-NEXT: fmov x9, d1 3637; CHECK-GI-NEXT: fmov x11, d3 3638; CHECK-GI-NEXT: mov x14, v3.d[1] 3639; CHECK-GI-NEXT: udiv x9, x9, x10 3640; CHECK-GI-NEXT: mov v6.d[0], x8 3641; CHECK-GI-NEXT: fmov x8, d2 3642; CHECK-GI-NEXT: mov v6.d[1], x9 3643; CHECK-GI-NEXT: fmov x9, d5 3644; CHECK-GI-NEXT: udiv x12, x8, x9 3645; CHECK-GI-NEXT: fmov x10, d6 3646; CHECK-GI-NEXT: mov x13, v6.d[1] 3647; CHECK-GI-NEXT: mul x10, x10, x11 3648; CHECK-GI-NEXT: mul x11, x13, x14 3649; CHECK-GI-NEXT: mov v2.d[0], x10 3650; CHECK-GI-NEXT: mov v2.d[1], x11 3651; CHECK-GI-NEXT: msub x8, x12, x9, x8 3652; CHECK-GI-NEXT: sub v0.2d, v0.2d, v2.2d 3653; CHECK-GI-NEXT: mov d1, v0.d[1] 3654; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 3655; CHECK-GI-NEXT: fmov d2, x8 3656; CHECK-GI-NEXT: ret 3657entry: 3658 %s = urem <3 x i64> %d, %e 3659 ret <3 x i64> %s 3660} 3661 3662define <4 x i64> @uv4i64(<4 x i64> %d, <4 x i64> %e) { 3663; CHECK-SD-LABEL: uv4i64: 3664; CHECK-SD: // %bb.0: // %entry 3665; CHECK-SD-NEXT: mov x8, v2.d[1] 3666; CHECK-SD-NEXT: mov x9, v0.d[1] 3667; CHECK-SD-NEXT: fmov x11, d2 3668; CHECK-SD-NEXT: fmov x12, d0 3669; CHECK-SD-NEXT: fmov x14, d3 3670; CHECK-SD-NEXT: fmov x15, d1 3671; CHECK-SD-NEXT: mov x17, v3.d[1] 3672; CHECK-SD-NEXT: mov x18, v1.d[1] 3673; CHECK-SD-NEXT: udiv x10, x9, x8 3674; CHECK-SD-NEXT: udiv x13, x12, x11 3675; CHECK-SD-NEXT: msub x8, x10, x8, x9 3676; CHECK-SD-NEXT: udiv x16, x15, x14 3677; CHECK-SD-NEXT: msub x9, x13, x11, x12 3678; CHECK-SD-NEXT: fmov d0, x9 3679; CHECK-SD-NEXT: mov v0.d[1], x8 3680; CHECK-SD-NEXT: udiv x0, x18, x17 3681; CHECK-SD-NEXT: msub x10, x16, x14, x15 3682; CHECK-SD-NEXT: fmov d1, x10 3683; CHECK-SD-NEXT: msub x11, x0, x17, x18 3684; CHECK-SD-NEXT: mov v1.d[1], x11 3685; CHECK-SD-NEXT: ret 3686; 3687; CHECK-GI-LABEL: uv4i64: 3688; CHECK-GI: // %bb.0: // %entry 3689; CHECK-GI-NEXT: fmov x8, d0 3690; CHECK-GI-NEXT: fmov x9, d2 3691; CHECK-GI-NEXT: mov x10, v2.d[1] 3692; CHECK-GI-NEXT: mov x11, v0.d[1] 3693; CHECK-GI-NEXT: fmov x12, d1 3694; CHECK-GI-NEXT: fmov x13, d3 3695; CHECK-GI-NEXT: mov x14, v3.d[1] 3696; CHECK-GI-NEXT: mov x15, v1.d[1] 3697; CHECK-GI-NEXT: udiv x8, x8, x9 3698; CHECK-GI-NEXT: udiv x12, x12, x13 3699; CHECK-GI-NEXT: mov v2.d[0], x8 3700; CHECK-GI-NEXT: udiv x11, x11, x10 3701; CHECK-GI-NEXT: mov v3.d[0], x12 3702; CHECK-GI-NEXT: udiv x15, x15, x14 3703; CHECK-GI-NEXT: mov v2.d[1], x11 3704; CHECK-GI-NEXT: fmov x8, d2 3705; CHECK-GI-NEXT: mov x11, v2.d[1] 3706; CHECK-GI-NEXT: mul x8, x8, x9 3707; CHECK-GI-NEXT: mul x10, x11, x10 3708; CHECK-GI-NEXT: mov v2.d[0], x8 3709; CHECK-GI-NEXT: mov v3.d[1], x15 3710; CHECK-GI-NEXT: mov v2.d[1], x10 3711; CHECK-GI-NEXT: fmov x9, d3 3712; CHECK-GI-NEXT: mov x12, v3.d[1] 3713; CHECK-GI-NEXT: sub v0.2d, v0.2d, v2.2d 3714; CHECK-GI-NEXT: mul x9, x9, x13 3715; CHECK-GI-NEXT: mul x11, x12, x14 3716; CHECK-GI-NEXT: mov v3.d[0], x9 3717; CHECK-GI-NEXT: mov v3.d[1], x11 3718; CHECK-GI-NEXT: sub v1.2d, v1.2d, v3.2d 3719; CHECK-GI-NEXT: ret 3720entry: 3721 %s = urem <4 x i64> %d, %e 3722 ret <4 x i64> %s 3723} 3724 3725define <2 x i128> @sv2i128(<2 x i128> %d, <2 x i128> %e) { 3726; CHECK-SD-LABEL: sv2i128: 3727; CHECK-SD: // %bb.0: // %entry 3728; CHECK-SD-NEXT: str x30, [sp, #-64]! // 8-byte Folded Spill 3729; CHECK-SD-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill 3730; CHECK-SD-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill 3731; CHECK-SD-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill 3732; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 3733; CHECK-SD-NEXT: .cfi_offset w19, -8 3734; CHECK-SD-NEXT: .cfi_offset w20, -16 3735; CHECK-SD-NEXT: .cfi_offset w21, -24 3736; CHECK-SD-NEXT: .cfi_offset w22, -32 3737; CHECK-SD-NEXT: .cfi_offset w23, -40 3738; CHECK-SD-NEXT: .cfi_offset w24, -48 3739; CHECK-SD-NEXT: .cfi_offset w30, -64 3740; CHECK-SD-NEXT: mov x21, x3 3741; CHECK-SD-NEXT: mov x22, x2 3742; CHECK-SD-NEXT: mov x2, x4 3743; CHECK-SD-NEXT: mov x3, x5 3744; CHECK-SD-NEXT: mov x19, x7 3745; CHECK-SD-NEXT: mov x20, x6 3746; CHECK-SD-NEXT: bl __modti3 3747; CHECK-SD-NEXT: mov x23, x0 3748; CHECK-SD-NEXT: mov x24, x1 3749; CHECK-SD-NEXT: mov x0, x22 3750; CHECK-SD-NEXT: mov x1, x21 3751; CHECK-SD-NEXT: mov x2, x20 3752; CHECK-SD-NEXT: mov x3, x19 3753; CHECK-SD-NEXT: bl __modti3 3754; CHECK-SD-NEXT: mov x2, x0 3755; CHECK-SD-NEXT: mov x3, x1 3756; CHECK-SD-NEXT: mov x0, x23 3757; CHECK-SD-NEXT: mov x1, x24 3758; CHECK-SD-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload 3759; CHECK-SD-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload 3760; CHECK-SD-NEXT: ldp x24, x23, [sp, #16] // 16-byte Folded Reload 3761; CHECK-SD-NEXT: ldr x30, [sp], #64 // 8-byte Folded Reload 3762; CHECK-SD-NEXT: ret 3763; 3764; CHECK-GI-LABEL: sv2i128: 3765; CHECK-GI: // %bb.0: // %entry 3766; CHECK-GI-NEXT: str x30, [sp, #-64]! // 8-byte Folded Spill 3767; CHECK-GI-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill 3768; CHECK-GI-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill 3769; CHECK-GI-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill 3770; CHECK-GI-NEXT: .cfi_def_cfa_offset 64 3771; CHECK-GI-NEXT: .cfi_offset w19, -8 3772; CHECK-GI-NEXT: .cfi_offset w20, -16 3773; CHECK-GI-NEXT: .cfi_offset w21, -24 3774; CHECK-GI-NEXT: .cfi_offset w22, -32 3775; CHECK-GI-NEXT: .cfi_offset w23, -40 3776; CHECK-GI-NEXT: .cfi_offset w24, -48 3777; CHECK-GI-NEXT: .cfi_offset w30, -64 3778; CHECK-GI-NEXT: mov x19, x2 3779; CHECK-GI-NEXT: mov x20, x3 3780; CHECK-GI-NEXT: mov x2, x4 3781; CHECK-GI-NEXT: mov x3, x5 3782; CHECK-GI-NEXT: mov x21, x6 3783; CHECK-GI-NEXT: mov x22, x7 3784; CHECK-GI-NEXT: bl __modti3 3785; CHECK-GI-NEXT: mov x23, x0 3786; CHECK-GI-NEXT: mov x24, x1 3787; CHECK-GI-NEXT: mov x0, x19 3788; CHECK-GI-NEXT: mov x1, x20 3789; CHECK-GI-NEXT: mov x2, x21 3790; CHECK-GI-NEXT: mov x3, x22 3791; CHECK-GI-NEXT: bl __modti3 3792; CHECK-GI-NEXT: mov x2, x0 3793; CHECK-GI-NEXT: mov x3, x1 3794; CHECK-GI-NEXT: mov x0, x23 3795; CHECK-GI-NEXT: mov x1, x24 3796; CHECK-GI-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload 3797; CHECK-GI-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload 3798; CHECK-GI-NEXT: ldp x24, x23, [sp, #16] // 16-byte Folded Reload 3799; CHECK-GI-NEXT: ldr x30, [sp], #64 // 8-byte Folded Reload 3800; CHECK-GI-NEXT: ret 3801entry: 3802 %s = srem <2 x i128> %d, %e 3803 ret <2 x i128> %s 3804} 3805 3806define <3 x i128> @sv3i128(<3 x i128> %d, <3 x i128> %e) { 3807; CHECK-SD-LABEL: sv3i128: 3808; CHECK-SD: // %bb.0: // %entry 3809; CHECK-SD-NEXT: str x30, [sp, #-96]! // 8-byte Folded Spill 3810; CHECK-SD-NEXT: stp x28, x27, [sp, #16] // 16-byte Folded Spill 3811; CHECK-SD-NEXT: stp x26, x25, [sp, #32] // 16-byte Folded Spill 3812; CHECK-SD-NEXT: stp x24, x23, [sp, #48] // 16-byte Folded Spill 3813; CHECK-SD-NEXT: stp x22, x21, [sp, #64] // 16-byte Folded Spill 3814; CHECK-SD-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill 3815; CHECK-SD-NEXT: .cfi_def_cfa_offset 96 3816; CHECK-SD-NEXT: .cfi_offset w19, -8 3817; CHECK-SD-NEXT: .cfi_offset w20, -16 3818; CHECK-SD-NEXT: .cfi_offset w21, -24 3819; CHECK-SD-NEXT: .cfi_offset w22, -32 3820; CHECK-SD-NEXT: .cfi_offset w23, -40 3821; CHECK-SD-NEXT: .cfi_offset w24, -48 3822; CHECK-SD-NEXT: .cfi_offset w25, -56 3823; CHECK-SD-NEXT: .cfi_offset w26, -64 3824; CHECK-SD-NEXT: .cfi_offset w27, -72 3825; CHECK-SD-NEXT: .cfi_offset w28, -80 3826; CHECK-SD-NEXT: .cfi_offset w30, -96 3827; CHECK-SD-NEXT: ldp x23, x24, [sp, #112] 3828; CHECK-SD-NEXT: mov x21, x3 3829; CHECK-SD-NEXT: ldp x25, x26, [sp, #96] 3830; CHECK-SD-NEXT: mov x22, x2 3831; CHECK-SD-NEXT: mov x2, x6 3832; CHECK-SD-NEXT: mov x3, x7 3833; CHECK-SD-NEXT: mov x19, x5 3834; CHECK-SD-NEXT: mov x20, x4 3835; CHECK-SD-NEXT: bl __modti3 3836; CHECK-SD-NEXT: mov x27, x0 3837; CHECK-SD-NEXT: mov x28, x1 3838; CHECK-SD-NEXT: mov x0, x22 3839; CHECK-SD-NEXT: mov x1, x21 3840; CHECK-SD-NEXT: mov x2, x25 3841; CHECK-SD-NEXT: mov x3, x26 3842; CHECK-SD-NEXT: bl __modti3 3843; CHECK-SD-NEXT: mov x21, x0 3844; CHECK-SD-NEXT: mov x22, x1 3845; CHECK-SD-NEXT: mov x0, x20 3846; CHECK-SD-NEXT: mov x1, x19 3847; CHECK-SD-NEXT: mov x2, x23 3848; CHECK-SD-NEXT: mov x3, x24 3849; CHECK-SD-NEXT: bl __modti3 3850; CHECK-SD-NEXT: mov x4, x0 3851; CHECK-SD-NEXT: mov x5, x1 3852; CHECK-SD-NEXT: mov x0, x27 3853; CHECK-SD-NEXT: mov x1, x28 3854; CHECK-SD-NEXT: mov x2, x21 3855; CHECK-SD-NEXT: mov x3, x22 3856; CHECK-SD-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload 3857; CHECK-SD-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload 3858; CHECK-SD-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload 3859; CHECK-SD-NEXT: ldp x26, x25, [sp, #32] // 16-byte Folded Reload 3860; CHECK-SD-NEXT: ldp x28, x27, [sp, #16] // 16-byte Folded Reload 3861; CHECK-SD-NEXT: ldr x30, [sp], #96 // 8-byte Folded Reload 3862; CHECK-SD-NEXT: ret 3863; 3864; CHECK-GI-LABEL: sv3i128: 3865; CHECK-GI: // %bb.0: // %entry 3866; CHECK-GI-NEXT: str x30, [sp, #-96]! // 8-byte Folded Spill 3867; CHECK-GI-NEXT: stp x28, x27, [sp, #16] // 16-byte Folded Spill 3868; CHECK-GI-NEXT: stp x26, x25, [sp, #32] // 16-byte Folded Spill 3869; CHECK-GI-NEXT: stp x24, x23, [sp, #48] // 16-byte Folded Spill 3870; CHECK-GI-NEXT: stp x22, x21, [sp, #64] // 16-byte Folded Spill 3871; CHECK-GI-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill 3872; CHECK-GI-NEXT: .cfi_def_cfa_offset 96 3873; CHECK-GI-NEXT: .cfi_offset w19, -8 3874; CHECK-GI-NEXT: .cfi_offset w20, -16 3875; CHECK-GI-NEXT: .cfi_offset w21, -24 3876; CHECK-GI-NEXT: .cfi_offset w22, -32 3877; CHECK-GI-NEXT: .cfi_offset w23, -40 3878; CHECK-GI-NEXT: .cfi_offset w24, -48 3879; CHECK-GI-NEXT: .cfi_offset w25, -56 3880; CHECK-GI-NEXT: .cfi_offset w26, -64 3881; CHECK-GI-NEXT: .cfi_offset w27, -72 3882; CHECK-GI-NEXT: .cfi_offset w28, -80 3883; CHECK-GI-NEXT: .cfi_offset w30, -96 3884; CHECK-GI-NEXT: ldp x23, x24, [sp, #96] 3885; CHECK-GI-NEXT: mov x19, x2 3886; CHECK-GI-NEXT: ldp x25, x26, [sp, #112] 3887; CHECK-GI-NEXT: mov x20, x3 3888; CHECK-GI-NEXT: mov x2, x6 3889; CHECK-GI-NEXT: mov x3, x7 3890; CHECK-GI-NEXT: mov x21, x4 3891; CHECK-GI-NEXT: mov x22, x5 3892; CHECK-GI-NEXT: bl __modti3 3893; CHECK-GI-NEXT: mov x27, x0 3894; CHECK-GI-NEXT: mov x28, x1 3895; CHECK-GI-NEXT: mov x0, x19 3896; CHECK-GI-NEXT: mov x1, x20 3897; CHECK-GI-NEXT: mov x2, x23 3898; CHECK-GI-NEXT: mov x3, x24 3899; CHECK-GI-NEXT: bl __modti3 3900; CHECK-GI-NEXT: mov x19, x0 3901; CHECK-GI-NEXT: mov x20, x1 3902; CHECK-GI-NEXT: mov x0, x21 3903; CHECK-GI-NEXT: mov x1, x22 3904; CHECK-GI-NEXT: mov x2, x25 3905; CHECK-GI-NEXT: mov x3, x26 3906; CHECK-GI-NEXT: bl __modti3 3907; CHECK-GI-NEXT: mov x4, x0 3908; CHECK-GI-NEXT: mov x5, x1 3909; CHECK-GI-NEXT: mov x0, x27 3910; CHECK-GI-NEXT: mov x1, x28 3911; CHECK-GI-NEXT: mov x2, x19 3912; CHECK-GI-NEXT: mov x3, x20 3913; CHECK-GI-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload 3914; CHECK-GI-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload 3915; CHECK-GI-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload 3916; CHECK-GI-NEXT: ldp x26, x25, [sp, #32] // 16-byte Folded Reload 3917; CHECK-GI-NEXT: ldp x28, x27, [sp, #16] // 16-byte Folded Reload 3918; CHECK-GI-NEXT: ldr x30, [sp], #96 // 8-byte Folded Reload 3919; CHECK-GI-NEXT: ret 3920entry: 3921 %s = srem <3 x i128> %d, %e 3922 ret <3 x i128> %s 3923} 3924 3925define <4 x i128> @sv4i128(<4 x i128> %d, <4 x i128> %e) { 3926; CHECK-SD-LABEL: sv4i128: 3927; CHECK-SD: // %bb.0: // %entry 3928; CHECK-SD-NEXT: sub sp, sp, #128 3929; CHECK-SD-NEXT: stp x29, x30, [sp, #32] // 16-byte Folded Spill 3930; CHECK-SD-NEXT: stp x28, x27, [sp, #48] // 16-byte Folded Spill 3931; CHECK-SD-NEXT: stp x26, x25, [sp, #64] // 16-byte Folded Spill 3932; CHECK-SD-NEXT: stp x24, x23, [sp, #80] // 16-byte Folded Spill 3933; CHECK-SD-NEXT: stp x22, x21, [sp, #96] // 16-byte Folded Spill 3934; CHECK-SD-NEXT: stp x20, x19, [sp, #112] // 16-byte Folded Spill 3935; CHECK-SD-NEXT: .cfi_def_cfa_offset 128 3936; CHECK-SD-NEXT: .cfi_offset w19, -8 3937; CHECK-SD-NEXT: .cfi_offset w20, -16 3938; CHECK-SD-NEXT: .cfi_offset w21, -24 3939; CHECK-SD-NEXT: .cfi_offset w22, -32 3940; CHECK-SD-NEXT: .cfi_offset w23, -40 3941; CHECK-SD-NEXT: .cfi_offset w24, -48 3942; CHECK-SD-NEXT: .cfi_offset w25, -56 3943; CHECK-SD-NEXT: .cfi_offset w26, -64 3944; CHECK-SD-NEXT: .cfi_offset w27, -72 3945; CHECK-SD-NEXT: .cfi_offset w28, -80 3946; CHECK-SD-NEXT: .cfi_offset w30, -88 3947; CHECK-SD-NEXT: .cfi_offset w29, -96 3948; CHECK-SD-NEXT: mov x23, x3 3949; CHECK-SD-NEXT: mov x24, x2 3950; CHECK-SD-NEXT: stp x6, x7, [sp, #16] // 16-byte Folded Spill 3951; CHECK-SD-NEXT: ldp x8, x26, [sp, #176] 3952; CHECK-SD-NEXT: mov x21, x5 3953; CHECK-SD-NEXT: ldp x2, x3, [sp, #128] 3954; CHECK-SD-NEXT: mov x22, x4 3955; CHECK-SD-NEXT: ldp x27, x28, [sp, #160] 3956; CHECK-SD-NEXT: ldp x29, x19, [sp, #144] 3957; CHECK-SD-NEXT: str x8, [sp, #8] // 8-byte Folded Spill 3958; CHECK-SD-NEXT: bl __modti3 3959; CHECK-SD-NEXT: mov x20, x0 3960; CHECK-SD-NEXT: mov x25, x1 3961; CHECK-SD-NEXT: mov x0, x24 3962; CHECK-SD-NEXT: mov x1, x23 3963; CHECK-SD-NEXT: mov x2, x29 3964; CHECK-SD-NEXT: mov x3, x19 3965; CHECK-SD-NEXT: bl __modti3 3966; CHECK-SD-NEXT: mov x19, x0 3967; CHECK-SD-NEXT: mov x23, x1 3968; CHECK-SD-NEXT: mov x0, x22 3969; CHECK-SD-NEXT: mov x1, x21 3970; CHECK-SD-NEXT: mov x2, x27 3971; CHECK-SD-NEXT: mov x3, x28 3972; CHECK-SD-NEXT: bl __modti3 3973; CHECK-SD-NEXT: mov x21, x0 3974; CHECK-SD-NEXT: mov x22, x1 3975; CHECK-SD-NEXT: ldr x2, [sp, #8] // 8-byte Folded Reload 3976; CHECK-SD-NEXT: ldp x0, x1, [sp, #16] // 16-byte Folded Reload 3977; CHECK-SD-NEXT: mov x3, x26 3978; CHECK-SD-NEXT: bl __modti3 3979; CHECK-SD-NEXT: mov x6, x0 3980; CHECK-SD-NEXT: mov x7, x1 3981; CHECK-SD-NEXT: mov x0, x20 3982; CHECK-SD-NEXT: mov x1, x25 3983; CHECK-SD-NEXT: mov x2, x19 3984; CHECK-SD-NEXT: mov x3, x23 3985; CHECK-SD-NEXT: mov x4, x21 3986; CHECK-SD-NEXT: mov x5, x22 3987; CHECK-SD-NEXT: ldp x20, x19, [sp, #112] // 16-byte Folded Reload 3988; CHECK-SD-NEXT: ldp x22, x21, [sp, #96] // 16-byte Folded Reload 3989; CHECK-SD-NEXT: ldp x24, x23, [sp, #80] // 16-byte Folded Reload 3990; CHECK-SD-NEXT: ldp x26, x25, [sp, #64] // 16-byte Folded Reload 3991; CHECK-SD-NEXT: ldp x28, x27, [sp, #48] // 16-byte Folded Reload 3992; CHECK-SD-NEXT: ldp x29, x30, [sp, #32] // 16-byte Folded Reload 3993; CHECK-SD-NEXT: add sp, sp, #128 3994; CHECK-SD-NEXT: ret 3995; 3996; CHECK-GI-LABEL: sv4i128: 3997; CHECK-GI: // %bb.0: // %entry 3998; CHECK-GI-NEXT: sub sp, sp, #128 3999; CHECK-GI-NEXT: stp x29, x30, [sp, #32] // 16-byte Folded Spill 4000; CHECK-GI-NEXT: stp x28, x27, [sp, #48] // 16-byte Folded Spill 4001; CHECK-GI-NEXT: stp x26, x25, [sp, #64] // 16-byte Folded Spill 4002; CHECK-GI-NEXT: stp x24, x23, [sp, #80] // 16-byte Folded Spill 4003; CHECK-GI-NEXT: stp x22, x21, [sp, #96] // 16-byte Folded Spill 4004; CHECK-GI-NEXT: stp x20, x19, [sp, #112] // 16-byte Folded Spill 4005; CHECK-GI-NEXT: .cfi_def_cfa_offset 128 4006; CHECK-GI-NEXT: .cfi_offset w19, -8 4007; CHECK-GI-NEXT: .cfi_offset w20, -16 4008; CHECK-GI-NEXT: .cfi_offset w21, -24 4009; CHECK-GI-NEXT: .cfi_offset w22, -32 4010; CHECK-GI-NEXT: .cfi_offset w23, -40 4011; CHECK-GI-NEXT: .cfi_offset w24, -48 4012; CHECK-GI-NEXT: .cfi_offset w25, -56 4013; CHECK-GI-NEXT: .cfi_offset w26, -64 4014; CHECK-GI-NEXT: .cfi_offset w27, -72 4015; CHECK-GI-NEXT: .cfi_offset w28, -80 4016; CHECK-GI-NEXT: .cfi_offset w30, -88 4017; CHECK-GI-NEXT: .cfi_offset w29, -96 4018; CHECK-GI-NEXT: mov x19, x2 4019; CHECK-GI-NEXT: mov x20, x3 4020; CHECK-GI-NEXT: mov x21, x4 4021; CHECK-GI-NEXT: ldp x2, x3, [sp, #128] 4022; CHECK-GI-NEXT: mov x22, x5 4023; CHECK-GI-NEXT: ldp x9, x8, [sp, #176] 4024; CHECK-GI-NEXT: mov x23, x7 4025; CHECK-GI-NEXT: ldp x24, x25, [sp, #144] 4026; CHECK-GI-NEXT: ldp x26, x27, [sp, #160] 4027; CHECK-GI-NEXT: stp x9, x6, [sp, #16] // 16-byte Folded Spill 4028; CHECK-GI-NEXT: str x8, [sp, #8] // 8-byte Folded Spill 4029; CHECK-GI-NEXT: bl __modti3 4030; CHECK-GI-NEXT: mov x28, x0 4031; CHECK-GI-NEXT: mov x29, x1 4032; CHECK-GI-NEXT: mov x0, x19 4033; CHECK-GI-NEXT: mov x1, x20 4034; CHECK-GI-NEXT: mov x2, x24 4035; CHECK-GI-NEXT: mov x3, x25 4036; CHECK-GI-NEXT: bl __modti3 4037; CHECK-GI-NEXT: mov x19, x0 4038; CHECK-GI-NEXT: mov x20, x1 4039; CHECK-GI-NEXT: mov x0, x21 4040; CHECK-GI-NEXT: mov x1, x22 4041; CHECK-GI-NEXT: mov x2, x26 4042; CHECK-GI-NEXT: mov x3, x27 4043; CHECK-GI-NEXT: bl __modti3 4044; CHECK-GI-NEXT: mov x21, x0 4045; CHECK-GI-NEXT: ldp x2, x0, [sp, #16] // 16-byte Folded Reload 4046; CHECK-GI-NEXT: ldr x3, [sp, #8] // 8-byte Folded Reload 4047; CHECK-GI-NEXT: mov x22, x1 4048; CHECK-GI-NEXT: mov x1, x23 4049; CHECK-GI-NEXT: bl __modti3 4050; CHECK-GI-NEXT: mov x6, x0 4051; CHECK-GI-NEXT: mov x7, x1 4052; CHECK-GI-NEXT: mov x0, x28 4053; CHECK-GI-NEXT: mov x1, x29 4054; CHECK-GI-NEXT: mov x2, x19 4055; CHECK-GI-NEXT: mov x3, x20 4056; CHECK-GI-NEXT: mov x4, x21 4057; CHECK-GI-NEXT: mov x5, x22 4058; CHECK-GI-NEXT: ldp x20, x19, [sp, #112] // 16-byte Folded Reload 4059; CHECK-GI-NEXT: ldp x22, x21, [sp, #96] // 16-byte Folded Reload 4060; CHECK-GI-NEXT: ldp x24, x23, [sp, #80] // 16-byte Folded Reload 4061; CHECK-GI-NEXT: ldp x26, x25, [sp, #64] // 16-byte Folded Reload 4062; CHECK-GI-NEXT: ldp x28, x27, [sp, #48] // 16-byte Folded Reload 4063; CHECK-GI-NEXT: ldp x29, x30, [sp, #32] // 16-byte Folded Reload 4064; CHECK-GI-NEXT: add sp, sp, #128 4065; CHECK-GI-NEXT: ret 4066entry: 4067 %s = srem <4 x i128> %d, %e 4068 ret <4 x i128> %s 4069} 4070 4071define <2 x i128> @uv2i128(<2 x i128> %d, <2 x i128> %e) { 4072; CHECK-SD-LABEL: uv2i128: 4073; CHECK-SD: // %bb.0: // %entry 4074; CHECK-SD-NEXT: str x30, [sp, #-64]! // 8-byte Folded Spill 4075; CHECK-SD-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill 4076; CHECK-SD-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill 4077; CHECK-SD-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill 4078; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 4079; CHECK-SD-NEXT: .cfi_offset w19, -8 4080; CHECK-SD-NEXT: .cfi_offset w20, -16 4081; CHECK-SD-NEXT: .cfi_offset w21, -24 4082; CHECK-SD-NEXT: .cfi_offset w22, -32 4083; CHECK-SD-NEXT: .cfi_offset w23, -40 4084; CHECK-SD-NEXT: .cfi_offset w24, -48 4085; CHECK-SD-NEXT: .cfi_offset w30, -64 4086; CHECK-SD-NEXT: mov x21, x3 4087; CHECK-SD-NEXT: mov x22, x2 4088; CHECK-SD-NEXT: mov x2, x4 4089; CHECK-SD-NEXT: mov x3, x5 4090; CHECK-SD-NEXT: mov x19, x7 4091; CHECK-SD-NEXT: mov x20, x6 4092; CHECK-SD-NEXT: bl __umodti3 4093; CHECK-SD-NEXT: mov x23, x0 4094; CHECK-SD-NEXT: mov x24, x1 4095; CHECK-SD-NEXT: mov x0, x22 4096; CHECK-SD-NEXT: mov x1, x21 4097; CHECK-SD-NEXT: mov x2, x20 4098; CHECK-SD-NEXT: mov x3, x19 4099; CHECK-SD-NEXT: bl __umodti3 4100; CHECK-SD-NEXT: mov x2, x0 4101; CHECK-SD-NEXT: mov x3, x1 4102; CHECK-SD-NEXT: mov x0, x23 4103; CHECK-SD-NEXT: mov x1, x24 4104; CHECK-SD-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload 4105; CHECK-SD-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload 4106; CHECK-SD-NEXT: ldp x24, x23, [sp, #16] // 16-byte Folded Reload 4107; CHECK-SD-NEXT: ldr x30, [sp], #64 // 8-byte Folded Reload 4108; CHECK-SD-NEXT: ret 4109; 4110; CHECK-GI-LABEL: uv2i128: 4111; CHECK-GI: // %bb.0: // %entry 4112; CHECK-GI-NEXT: str x30, [sp, #-64]! // 8-byte Folded Spill 4113; CHECK-GI-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill 4114; CHECK-GI-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill 4115; CHECK-GI-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill 4116; CHECK-GI-NEXT: .cfi_def_cfa_offset 64 4117; CHECK-GI-NEXT: .cfi_offset w19, -8 4118; CHECK-GI-NEXT: .cfi_offset w20, -16 4119; CHECK-GI-NEXT: .cfi_offset w21, -24 4120; CHECK-GI-NEXT: .cfi_offset w22, -32 4121; CHECK-GI-NEXT: .cfi_offset w23, -40 4122; CHECK-GI-NEXT: .cfi_offset w24, -48 4123; CHECK-GI-NEXT: .cfi_offset w30, -64 4124; CHECK-GI-NEXT: mov x19, x2 4125; CHECK-GI-NEXT: mov x20, x3 4126; CHECK-GI-NEXT: mov x2, x4 4127; CHECK-GI-NEXT: mov x3, x5 4128; CHECK-GI-NEXT: mov x21, x6 4129; CHECK-GI-NEXT: mov x22, x7 4130; CHECK-GI-NEXT: bl __umodti3 4131; CHECK-GI-NEXT: mov x23, x0 4132; CHECK-GI-NEXT: mov x24, x1 4133; CHECK-GI-NEXT: mov x0, x19 4134; CHECK-GI-NEXT: mov x1, x20 4135; CHECK-GI-NEXT: mov x2, x21 4136; CHECK-GI-NEXT: mov x3, x22 4137; CHECK-GI-NEXT: bl __umodti3 4138; CHECK-GI-NEXT: mov x2, x0 4139; CHECK-GI-NEXT: mov x3, x1 4140; CHECK-GI-NEXT: mov x0, x23 4141; CHECK-GI-NEXT: mov x1, x24 4142; CHECK-GI-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload 4143; CHECK-GI-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload 4144; CHECK-GI-NEXT: ldp x24, x23, [sp, #16] // 16-byte Folded Reload 4145; CHECK-GI-NEXT: ldr x30, [sp], #64 // 8-byte Folded Reload 4146; CHECK-GI-NEXT: ret 4147entry: 4148 %s = urem <2 x i128> %d, %e 4149 ret <2 x i128> %s 4150} 4151 4152define <3 x i128> @uv3i128(<3 x i128> %d, <3 x i128> %e) { 4153; CHECK-SD-LABEL: uv3i128: 4154; CHECK-SD: // %bb.0: // %entry 4155; CHECK-SD-NEXT: str x30, [sp, #-96]! // 8-byte Folded Spill 4156; CHECK-SD-NEXT: stp x28, x27, [sp, #16] // 16-byte Folded Spill 4157; CHECK-SD-NEXT: stp x26, x25, [sp, #32] // 16-byte Folded Spill 4158; CHECK-SD-NEXT: stp x24, x23, [sp, #48] // 16-byte Folded Spill 4159; CHECK-SD-NEXT: stp x22, x21, [sp, #64] // 16-byte Folded Spill 4160; CHECK-SD-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill 4161; CHECK-SD-NEXT: .cfi_def_cfa_offset 96 4162; CHECK-SD-NEXT: .cfi_offset w19, -8 4163; CHECK-SD-NEXT: .cfi_offset w20, -16 4164; CHECK-SD-NEXT: .cfi_offset w21, -24 4165; CHECK-SD-NEXT: .cfi_offset w22, -32 4166; CHECK-SD-NEXT: .cfi_offset w23, -40 4167; CHECK-SD-NEXT: .cfi_offset w24, -48 4168; CHECK-SD-NEXT: .cfi_offset w25, -56 4169; CHECK-SD-NEXT: .cfi_offset w26, -64 4170; CHECK-SD-NEXT: .cfi_offset w27, -72 4171; CHECK-SD-NEXT: .cfi_offset w28, -80 4172; CHECK-SD-NEXT: .cfi_offset w30, -96 4173; CHECK-SD-NEXT: ldp x23, x24, [sp, #112] 4174; CHECK-SD-NEXT: mov x21, x3 4175; CHECK-SD-NEXT: ldp x25, x26, [sp, #96] 4176; CHECK-SD-NEXT: mov x22, x2 4177; CHECK-SD-NEXT: mov x2, x6 4178; CHECK-SD-NEXT: mov x3, x7 4179; CHECK-SD-NEXT: mov x19, x5 4180; CHECK-SD-NEXT: mov x20, x4 4181; CHECK-SD-NEXT: bl __umodti3 4182; CHECK-SD-NEXT: mov x27, x0 4183; CHECK-SD-NEXT: mov x28, x1 4184; CHECK-SD-NEXT: mov x0, x22 4185; CHECK-SD-NEXT: mov x1, x21 4186; CHECK-SD-NEXT: mov x2, x25 4187; CHECK-SD-NEXT: mov x3, x26 4188; CHECK-SD-NEXT: bl __umodti3 4189; CHECK-SD-NEXT: mov x21, x0 4190; CHECK-SD-NEXT: mov x22, x1 4191; CHECK-SD-NEXT: mov x0, x20 4192; CHECK-SD-NEXT: mov x1, x19 4193; CHECK-SD-NEXT: mov x2, x23 4194; CHECK-SD-NEXT: mov x3, x24 4195; CHECK-SD-NEXT: bl __umodti3 4196; CHECK-SD-NEXT: mov x4, x0 4197; CHECK-SD-NEXT: mov x5, x1 4198; CHECK-SD-NEXT: mov x0, x27 4199; CHECK-SD-NEXT: mov x1, x28 4200; CHECK-SD-NEXT: mov x2, x21 4201; CHECK-SD-NEXT: mov x3, x22 4202; CHECK-SD-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload 4203; CHECK-SD-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload 4204; CHECK-SD-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload 4205; CHECK-SD-NEXT: ldp x26, x25, [sp, #32] // 16-byte Folded Reload 4206; CHECK-SD-NEXT: ldp x28, x27, [sp, #16] // 16-byte Folded Reload 4207; CHECK-SD-NEXT: ldr x30, [sp], #96 // 8-byte Folded Reload 4208; CHECK-SD-NEXT: ret 4209; 4210; CHECK-GI-LABEL: uv3i128: 4211; CHECK-GI: // %bb.0: // %entry 4212; CHECK-GI-NEXT: str x30, [sp, #-96]! // 8-byte Folded Spill 4213; CHECK-GI-NEXT: stp x28, x27, [sp, #16] // 16-byte Folded Spill 4214; CHECK-GI-NEXT: stp x26, x25, [sp, #32] // 16-byte Folded Spill 4215; CHECK-GI-NEXT: stp x24, x23, [sp, #48] // 16-byte Folded Spill 4216; CHECK-GI-NEXT: stp x22, x21, [sp, #64] // 16-byte Folded Spill 4217; CHECK-GI-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill 4218; CHECK-GI-NEXT: .cfi_def_cfa_offset 96 4219; CHECK-GI-NEXT: .cfi_offset w19, -8 4220; CHECK-GI-NEXT: .cfi_offset w20, -16 4221; CHECK-GI-NEXT: .cfi_offset w21, -24 4222; CHECK-GI-NEXT: .cfi_offset w22, -32 4223; CHECK-GI-NEXT: .cfi_offset w23, -40 4224; CHECK-GI-NEXT: .cfi_offset w24, -48 4225; CHECK-GI-NEXT: .cfi_offset w25, -56 4226; CHECK-GI-NEXT: .cfi_offset w26, -64 4227; CHECK-GI-NEXT: .cfi_offset w27, -72 4228; CHECK-GI-NEXT: .cfi_offset w28, -80 4229; CHECK-GI-NEXT: .cfi_offset w30, -96 4230; CHECK-GI-NEXT: ldp x23, x24, [sp, #96] 4231; CHECK-GI-NEXT: mov x19, x2 4232; CHECK-GI-NEXT: ldp x25, x26, [sp, #112] 4233; CHECK-GI-NEXT: mov x20, x3 4234; CHECK-GI-NEXT: mov x2, x6 4235; CHECK-GI-NEXT: mov x3, x7 4236; CHECK-GI-NEXT: mov x21, x4 4237; CHECK-GI-NEXT: mov x22, x5 4238; CHECK-GI-NEXT: bl __umodti3 4239; CHECK-GI-NEXT: mov x27, x0 4240; CHECK-GI-NEXT: mov x28, x1 4241; CHECK-GI-NEXT: mov x0, x19 4242; CHECK-GI-NEXT: mov x1, x20 4243; CHECK-GI-NEXT: mov x2, x23 4244; CHECK-GI-NEXT: mov x3, x24 4245; CHECK-GI-NEXT: bl __umodti3 4246; CHECK-GI-NEXT: mov x19, x0 4247; CHECK-GI-NEXT: mov x20, x1 4248; CHECK-GI-NEXT: mov x0, x21 4249; CHECK-GI-NEXT: mov x1, x22 4250; CHECK-GI-NEXT: mov x2, x25 4251; CHECK-GI-NEXT: mov x3, x26 4252; CHECK-GI-NEXT: bl __umodti3 4253; CHECK-GI-NEXT: mov x4, x0 4254; CHECK-GI-NEXT: mov x5, x1 4255; CHECK-GI-NEXT: mov x0, x27 4256; CHECK-GI-NEXT: mov x1, x28 4257; CHECK-GI-NEXT: mov x2, x19 4258; CHECK-GI-NEXT: mov x3, x20 4259; CHECK-GI-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload 4260; CHECK-GI-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload 4261; CHECK-GI-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload 4262; CHECK-GI-NEXT: ldp x26, x25, [sp, #32] // 16-byte Folded Reload 4263; CHECK-GI-NEXT: ldp x28, x27, [sp, #16] // 16-byte Folded Reload 4264; CHECK-GI-NEXT: ldr x30, [sp], #96 // 8-byte Folded Reload 4265; CHECK-GI-NEXT: ret 4266entry: 4267 %s = urem <3 x i128> %d, %e 4268 ret <3 x i128> %s 4269} 4270 4271define <4 x i128> @uv4i128(<4 x i128> %d, <4 x i128> %e) { 4272; CHECK-SD-LABEL: uv4i128: 4273; CHECK-SD: // %bb.0: // %entry 4274; CHECK-SD-NEXT: sub sp, sp, #128 4275; CHECK-SD-NEXT: stp x29, x30, [sp, #32] // 16-byte Folded Spill 4276; CHECK-SD-NEXT: stp x28, x27, [sp, #48] // 16-byte Folded Spill 4277; CHECK-SD-NEXT: stp x26, x25, [sp, #64] // 16-byte Folded Spill 4278; CHECK-SD-NEXT: stp x24, x23, [sp, #80] // 16-byte Folded Spill 4279; CHECK-SD-NEXT: stp x22, x21, [sp, #96] // 16-byte Folded Spill 4280; CHECK-SD-NEXT: stp x20, x19, [sp, #112] // 16-byte Folded Spill 4281; CHECK-SD-NEXT: .cfi_def_cfa_offset 128 4282; CHECK-SD-NEXT: .cfi_offset w19, -8 4283; CHECK-SD-NEXT: .cfi_offset w20, -16 4284; CHECK-SD-NEXT: .cfi_offset w21, -24 4285; CHECK-SD-NEXT: .cfi_offset w22, -32 4286; CHECK-SD-NEXT: .cfi_offset w23, -40 4287; CHECK-SD-NEXT: .cfi_offset w24, -48 4288; CHECK-SD-NEXT: .cfi_offset w25, -56 4289; CHECK-SD-NEXT: .cfi_offset w26, -64 4290; CHECK-SD-NEXT: .cfi_offset w27, -72 4291; CHECK-SD-NEXT: .cfi_offset w28, -80 4292; CHECK-SD-NEXT: .cfi_offset w30, -88 4293; CHECK-SD-NEXT: .cfi_offset w29, -96 4294; CHECK-SD-NEXT: mov x23, x3 4295; CHECK-SD-NEXT: mov x24, x2 4296; CHECK-SD-NEXT: stp x6, x7, [sp, #16] // 16-byte Folded Spill 4297; CHECK-SD-NEXT: ldp x8, x26, [sp, #176] 4298; CHECK-SD-NEXT: mov x21, x5 4299; CHECK-SD-NEXT: ldp x2, x3, [sp, #128] 4300; CHECK-SD-NEXT: mov x22, x4 4301; CHECK-SD-NEXT: ldp x27, x28, [sp, #160] 4302; CHECK-SD-NEXT: ldp x29, x19, [sp, #144] 4303; CHECK-SD-NEXT: str x8, [sp, #8] // 8-byte Folded Spill 4304; CHECK-SD-NEXT: bl __umodti3 4305; CHECK-SD-NEXT: mov x20, x0 4306; CHECK-SD-NEXT: mov x25, x1 4307; CHECK-SD-NEXT: mov x0, x24 4308; CHECK-SD-NEXT: mov x1, x23 4309; CHECK-SD-NEXT: mov x2, x29 4310; CHECK-SD-NEXT: mov x3, x19 4311; CHECK-SD-NEXT: bl __umodti3 4312; CHECK-SD-NEXT: mov x19, x0 4313; CHECK-SD-NEXT: mov x23, x1 4314; CHECK-SD-NEXT: mov x0, x22 4315; CHECK-SD-NEXT: mov x1, x21 4316; CHECK-SD-NEXT: mov x2, x27 4317; CHECK-SD-NEXT: mov x3, x28 4318; CHECK-SD-NEXT: bl __umodti3 4319; CHECK-SD-NEXT: mov x21, x0 4320; CHECK-SD-NEXT: mov x22, x1 4321; CHECK-SD-NEXT: ldr x2, [sp, #8] // 8-byte Folded Reload 4322; CHECK-SD-NEXT: ldp x0, x1, [sp, #16] // 16-byte Folded Reload 4323; CHECK-SD-NEXT: mov x3, x26 4324; CHECK-SD-NEXT: bl __umodti3 4325; CHECK-SD-NEXT: mov x6, x0 4326; CHECK-SD-NEXT: mov x7, x1 4327; CHECK-SD-NEXT: mov x0, x20 4328; CHECK-SD-NEXT: mov x1, x25 4329; CHECK-SD-NEXT: mov x2, x19 4330; CHECK-SD-NEXT: mov x3, x23 4331; CHECK-SD-NEXT: mov x4, x21 4332; CHECK-SD-NEXT: mov x5, x22 4333; CHECK-SD-NEXT: ldp x20, x19, [sp, #112] // 16-byte Folded Reload 4334; CHECK-SD-NEXT: ldp x22, x21, [sp, #96] // 16-byte Folded Reload 4335; CHECK-SD-NEXT: ldp x24, x23, [sp, #80] // 16-byte Folded Reload 4336; CHECK-SD-NEXT: ldp x26, x25, [sp, #64] // 16-byte Folded Reload 4337; CHECK-SD-NEXT: ldp x28, x27, [sp, #48] // 16-byte Folded Reload 4338; CHECK-SD-NEXT: ldp x29, x30, [sp, #32] // 16-byte Folded Reload 4339; CHECK-SD-NEXT: add sp, sp, #128 4340; CHECK-SD-NEXT: ret 4341; 4342; CHECK-GI-LABEL: uv4i128: 4343; CHECK-GI: // %bb.0: // %entry 4344; CHECK-GI-NEXT: sub sp, sp, #128 4345; CHECK-GI-NEXT: stp x29, x30, [sp, #32] // 16-byte Folded Spill 4346; CHECK-GI-NEXT: stp x28, x27, [sp, #48] // 16-byte Folded Spill 4347; CHECK-GI-NEXT: stp x26, x25, [sp, #64] // 16-byte Folded Spill 4348; CHECK-GI-NEXT: stp x24, x23, [sp, #80] // 16-byte Folded Spill 4349; CHECK-GI-NEXT: stp x22, x21, [sp, #96] // 16-byte Folded Spill 4350; CHECK-GI-NEXT: stp x20, x19, [sp, #112] // 16-byte Folded Spill 4351; CHECK-GI-NEXT: .cfi_def_cfa_offset 128 4352; CHECK-GI-NEXT: .cfi_offset w19, -8 4353; CHECK-GI-NEXT: .cfi_offset w20, -16 4354; CHECK-GI-NEXT: .cfi_offset w21, -24 4355; CHECK-GI-NEXT: .cfi_offset w22, -32 4356; CHECK-GI-NEXT: .cfi_offset w23, -40 4357; CHECK-GI-NEXT: .cfi_offset w24, -48 4358; CHECK-GI-NEXT: .cfi_offset w25, -56 4359; CHECK-GI-NEXT: .cfi_offset w26, -64 4360; CHECK-GI-NEXT: .cfi_offset w27, -72 4361; CHECK-GI-NEXT: .cfi_offset w28, -80 4362; CHECK-GI-NEXT: .cfi_offset w30, -88 4363; CHECK-GI-NEXT: .cfi_offset w29, -96 4364; CHECK-GI-NEXT: mov x19, x2 4365; CHECK-GI-NEXT: mov x20, x3 4366; CHECK-GI-NEXT: mov x21, x4 4367; CHECK-GI-NEXT: ldp x2, x3, [sp, #128] 4368; CHECK-GI-NEXT: mov x22, x5 4369; CHECK-GI-NEXT: ldp x9, x8, [sp, #176] 4370; CHECK-GI-NEXT: mov x23, x7 4371; CHECK-GI-NEXT: ldp x24, x25, [sp, #144] 4372; CHECK-GI-NEXT: ldp x26, x27, [sp, #160] 4373; CHECK-GI-NEXT: stp x9, x6, [sp, #16] // 16-byte Folded Spill 4374; CHECK-GI-NEXT: str x8, [sp, #8] // 8-byte Folded Spill 4375; CHECK-GI-NEXT: bl __umodti3 4376; CHECK-GI-NEXT: mov x28, x0 4377; CHECK-GI-NEXT: mov x29, x1 4378; CHECK-GI-NEXT: mov x0, x19 4379; CHECK-GI-NEXT: mov x1, x20 4380; CHECK-GI-NEXT: mov x2, x24 4381; CHECK-GI-NEXT: mov x3, x25 4382; CHECK-GI-NEXT: bl __umodti3 4383; CHECK-GI-NEXT: mov x19, x0 4384; CHECK-GI-NEXT: mov x20, x1 4385; CHECK-GI-NEXT: mov x0, x21 4386; CHECK-GI-NEXT: mov x1, x22 4387; CHECK-GI-NEXT: mov x2, x26 4388; CHECK-GI-NEXT: mov x3, x27 4389; CHECK-GI-NEXT: bl __umodti3 4390; CHECK-GI-NEXT: mov x21, x0 4391; CHECK-GI-NEXT: ldp x2, x0, [sp, #16] // 16-byte Folded Reload 4392; CHECK-GI-NEXT: ldr x3, [sp, #8] // 8-byte Folded Reload 4393; CHECK-GI-NEXT: mov x22, x1 4394; CHECK-GI-NEXT: mov x1, x23 4395; CHECK-GI-NEXT: bl __umodti3 4396; CHECK-GI-NEXT: mov x6, x0 4397; CHECK-GI-NEXT: mov x7, x1 4398; CHECK-GI-NEXT: mov x0, x28 4399; CHECK-GI-NEXT: mov x1, x29 4400; CHECK-GI-NEXT: mov x2, x19 4401; CHECK-GI-NEXT: mov x3, x20 4402; CHECK-GI-NEXT: mov x4, x21 4403; CHECK-GI-NEXT: mov x5, x22 4404; CHECK-GI-NEXT: ldp x20, x19, [sp, #112] // 16-byte Folded Reload 4405; CHECK-GI-NEXT: ldp x22, x21, [sp, #96] // 16-byte Folded Reload 4406; CHECK-GI-NEXT: ldp x24, x23, [sp, #80] // 16-byte Folded Reload 4407; CHECK-GI-NEXT: ldp x26, x25, [sp, #64] // 16-byte Folded Reload 4408; CHECK-GI-NEXT: ldp x28, x27, [sp, #48] // 16-byte Folded Reload 4409; CHECK-GI-NEXT: ldp x29, x30, [sp, #32] // 16-byte Folded Reload 4410; CHECK-GI-NEXT: add sp, sp, #128 4411; CHECK-GI-NEXT: ret 4412entry: 4413 %s = urem <4 x i128> %d, %e 4414 ret <4 x i128> %s 4415} 4416