1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s 3; arm64 has its own copy of this because of the intrinsics 4 5define <8 x i8> @mul8xi8(<8 x i8> %A, <8 x i8> %B) { 6; CHECK-LABEL: mul8xi8: 7; CHECK: // %bb.0: 8; CHECK-NEXT: mul v0.8b, v0.8b, v1.8b 9; CHECK-NEXT: ret 10 %tmp3 = mul <8 x i8> %A, %B; 11 ret <8 x i8> %tmp3 12} 13 14define <16 x i8> @mul16xi8(<16 x i8> %A, <16 x i8> %B) { 15; CHECK-LABEL: mul16xi8: 16; CHECK: // %bb.0: 17; CHECK-NEXT: mul v0.16b, v0.16b, v1.16b 18; CHECK-NEXT: ret 19 %tmp3 = mul <16 x i8> %A, %B; 20 ret <16 x i8> %tmp3 21} 22 23define <4 x i16> @mul4xi16(<4 x i16> %A, <4 x i16> %B) { 24; CHECK-LABEL: mul4xi16: 25; CHECK: // %bb.0: 26; CHECK-NEXT: mul v0.4h, v0.4h, v1.4h 27; CHECK-NEXT: ret 28 %tmp3 = mul <4 x i16> %A, %B; 29 ret <4 x i16> %tmp3 30} 31 32define <8 x i16> @mul8xi16(<8 x i16> %A, <8 x i16> %B) { 33; CHECK-LABEL: mul8xi16: 34; CHECK: // %bb.0: 35; CHECK-NEXT: mul v0.8h, v0.8h, v1.8h 36; CHECK-NEXT: ret 37 %tmp3 = mul <8 x i16> %A, %B; 38 ret <8 x i16> %tmp3 39} 40 41define <2 x i32> @mul2xi32(<2 x i32> %A, <2 x i32> %B) { 42; CHECK-LABEL: mul2xi32: 43; CHECK: // %bb.0: 44; CHECK-NEXT: mul v0.2s, v0.2s, v1.2s 45; CHECK-NEXT: ret 46 %tmp3 = mul <2 x i32> %A, %B; 47 ret <2 x i32> %tmp3 48} 49 50define <4 x i32> @mul4x32(<4 x i32> %A, <4 x i32> %B) { 51; CHECK-LABEL: mul4x32: 52; CHECK: // %bb.0: 53; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s 54; CHECK-NEXT: ret 55 %tmp3 = mul <4 x i32> %A, %B; 56 ret <4 x i32> %tmp3 57} 58 59define <1 x i64> @mul1xi64(<1 x i64> %A, <1 x i64> %B) { 60; CHECK-LABEL: mul1xi64: 61; CHECK: // %bb.0: 62; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 63; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 64; CHECK-NEXT: fmov x8, d1 65; CHECK-NEXT: fmov x9, d0 66; CHECK-NEXT: mul x8, x9, x8 67; CHECK-NEXT: fmov d0, x8 68; CHECK-NEXT: ret 69 %tmp3 = mul <1 x i64> %A, %B; 70 ret <1 x i64> %tmp3 71} 72 73define <2 x i64> @mul2xi64(<2 x i64> %A, <2 x i64> %B) { 74; CHECK-LABEL: mul2xi64: 75; CHECK: // %bb.0: 76; CHECK-NEXT: fmov x10, d1 77; CHECK-NEXT: fmov x11, d0 78; CHECK-NEXT: mov x8, v1.d[1] 79; CHECK-NEXT: mov x9, v0.d[1] 80; CHECK-NEXT: mul x10, x11, x10 81; CHECK-NEXT: mul x8, x9, x8 82; CHECK-NEXT: fmov d0, x10 83; CHECK-NEXT: mov v0.d[1], x8 84; CHECK-NEXT: ret 85 %tmp3 = mul <2 x i64> %A, %B; 86 ret <2 x i64> %tmp3 87} 88 89 define <2 x float> @mul2xfloat(<2 x float> %A, <2 x float> %B) { 90; CHECK-LABEL: mul2xfloat: 91; CHECK: // %bb.0: 92; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s 93; CHECK-NEXT: ret 94 %tmp3 = fmul <2 x float> %A, %B; 95 ret <2 x float> %tmp3 96} 97 98define <4 x float> @mul4xfloat(<4 x float> %A, <4 x float> %B) { 99; CHECK-LABEL: mul4xfloat: 100; CHECK: // %bb.0: 101; CHECK-NEXT: fmul v0.4s, v0.4s, v1.4s 102; CHECK-NEXT: ret 103 %tmp3 = fmul <4 x float> %A, %B; 104 ret <4 x float> %tmp3 105} 106define <2 x double> @mul2xdouble(<2 x double> %A, <2 x double> %B) { 107; CHECK-LABEL: mul2xdouble: 108; CHECK: // %bb.0: 109; CHECK-NEXT: fmul v0.2d, v0.2d, v1.2d 110; CHECK-NEXT: ret 111 %tmp3 = fmul <2 x double> %A, %B; 112 ret <2 x double> %tmp3 113} 114 115 116 define <2 x float> @div2xfloat(<2 x float> %A, <2 x float> %B) { 117; CHECK-LABEL: div2xfloat: 118; CHECK: // %bb.0: 119; CHECK-NEXT: fdiv v0.2s, v0.2s, v1.2s 120; CHECK-NEXT: ret 121 %tmp3 = fdiv <2 x float> %A, %B; 122 ret <2 x float> %tmp3 123} 124 125define <4 x float> @div4xfloat(<4 x float> %A, <4 x float> %B) { 126; CHECK-LABEL: div4xfloat: 127; CHECK: // %bb.0: 128; CHECK-NEXT: fdiv v0.4s, v0.4s, v1.4s 129; CHECK-NEXT: ret 130 %tmp3 = fdiv <4 x float> %A, %B; 131 ret <4 x float> %tmp3 132} 133define <2 x double> @div2xdouble(<2 x double> %A, <2 x double> %B) { 134; CHECK-LABEL: div2xdouble: 135; CHECK: // %bb.0: 136; CHECK-NEXT: fdiv v0.2d, v0.2d, v1.2d 137; CHECK-NEXT: ret 138 %tmp3 = fdiv <2 x double> %A, %B; 139 ret <2 x double> %tmp3 140} 141 142define <1 x i8> @sdiv1x8(<1 x i8> %A, <1 x i8> %B) { 143; CHECK-LABEL: sdiv1x8: 144; CHECK: // %bb.0: 145; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 146; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 147; CHECK-NEXT: smov w8, v1.b[0] 148; CHECK-NEXT: smov w9, v0.b[0] 149; CHECK-NEXT: sdiv w8, w9, w8 150; CHECK-NEXT: fmov s0, w8 151; CHECK-NEXT: ret 152 %tmp3 = sdiv <1 x i8> %A, %B; 153 ret <1 x i8> %tmp3 154} 155 156define <8 x i8> @sdiv8x8(<8 x i8> %A, <8 x i8> %B) { 157; CHECK-LABEL: sdiv8x8: 158; CHECK: // %bb.0: 159; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 160; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 161; CHECK-NEXT: smov w8, v1.b[1] 162; CHECK-NEXT: smov w9, v0.b[1] 163; CHECK-NEXT: smov w10, v0.b[0] 164; CHECK-NEXT: smov w11, v0.b[2] 165; CHECK-NEXT: smov w12, v0.b[3] 166; CHECK-NEXT: smov w13, v0.b[4] 167; CHECK-NEXT: smov w14, v0.b[5] 168; CHECK-NEXT: sdiv w8, w9, w8 169; CHECK-NEXT: smov w9, v1.b[0] 170; CHECK-NEXT: sdiv w9, w10, w9 171; CHECK-NEXT: smov w10, v1.b[2] 172; CHECK-NEXT: sdiv w10, w11, w10 173; CHECK-NEXT: smov w11, v1.b[3] 174; CHECK-NEXT: fmov s2, w9 175; CHECK-NEXT: smov w9, v1.b[6] 176; CHECK-NEXT: mov v2.b[1], w8 177; CHECK-NEXT: sdiv w11, w12, w11 178; CHECK-NEXT: smov w12, v1.b[4] 179; CHECK-NEXT: mov v2.b[2], w10 180; CHECK-NEXT: smov w10, v0.b[6] 181; CHECK-NEXT: sdiv w12, w13, w12 182; CHECK-NEXT: smov w13, v1.b[5] 183; CHECK-NEXT: mov v2.b[3], w11 184; CHECK-NEXT: smov w11, v0.b[7] 185; CHECK-NEXT: sdiv w8, w14, w13 186; CHECK-NEXT: mov v2.b[4], w12 187; CHECK-NEXT: sdiv w9, w10, w9 188; CHECK-NEXT: smov w10, v1.b[7] 189; CHECK-NEXT: mov v2.b[5], w8 190; CHECK-NEXT: sdiv w8, w11, w10 191; CHECK-NEXT: mov v2.b[6], w9 192; CHECK-NEXT: mov v2.b[7], w8 193; CHECK-NEXT: fmov d0, d2 194; CHECK-NEXT: ret 195 %tmp3 = sdiv <8 x i8> %A, %B; 196 ret <8 x i8> %tmp3 197} 198 199define <16 x i8> @sdiv16x8(<16 x i8> %A, <16 x i8> %B) { 200; CHECK-LABEL: sdiv16x8: 201; CHECK: // %bb.0: 202; CHECK-NEXT: smov w8, v1.b[1] 203; CHECK-NEXT: smov w9, v0.b[1] 204; CHECK-NEXT: smov w10, v0.b[0] 205; CHECK-NEXT: smov w11, v0.b[2] 206; CHECK-NEXT: smov w12, v0.b[3] 207; CHECK-NEXT: smov w13, v0.b[4] 208; CHECK-NEXT: smov w14, v0.b[5] 209; CHECK-NEXT: smov w15, v0.b[6] 210; CHECK-NEXT: smov w16, v0.b[7] 211; CHECK-NEXT: smov w17, v0.b[8] 212; CHECK-NEXT: smov w18, v0.b[9] 213; CHECK-NEXT: sdiv w8, w9, w8 214; CHECK-NEXT: smov w9, v1.b[0] 215; CHECK-NEXT: sdiv w9, w10, w9 216; CHECK-NEXT: smov w10, v1.b[2] 217; CHECK-NEXT: sdiv w10, w11, w10 218; CHECK-NEXT: smov w11, v1.b[3] 219; CHECK-NEXT: fmov s2, w9 220; CHECK-NEXT: smov w9, v1.b[10] 221; CHECK-NEXT: mov v2.b[1], w8 222; CHECK-NEXT: sdiv w11, w12, w11 223; CHECK-NEXT: smov w12, v1.b[4] 224; CHECK-NEXT: mov v2.b[2], w10 225; CHECK-NEXT: smov w10, v0.b[10] 226; CHECK-NEXT: sdiv w12, w13, w12 227; CHECK-NEXT: smov w13, v1.b[5] 228; CHECK-NEXT: mov v2.b[3], w11 229; CHECK-NEXT: smov w11, v0.b[11] 230; CHECK-NEXT: sdiv w13, w14, w13 231; CHECK-NEXT: smov w14, v1.b[6] 232; CHECK-NEXT: mov v2.b[4], w12 233; CHECK-NEXT: smov w12, v0.b[12] 234; CHECK-NEXT: sdiv w14, w15, w14 235; CHECK-NEXT: smov w15, v1.b[7] 236; CHECK-NEXT: mov v2.b[5], w13 237; CHECK-NEXT: smov w13, v0.b[13] 238; CHECK-NEXT: sdiv w15, w16, w15 239; CHECK-NEXT: smov w16, v1.b[8] 240; CHECK-NEXT: mov v2.b[6], w14 241; CHECK-NEXT: sdiv w16, w17, w16 242; CHECK-NEXT: smov w17, v1.b[9] 243; CHECK-NEXT: mov v2.b[7], w15 244; CHECK-NEXT: sdiv w8, w18, w17 245; CHECK-NEXT: mov v2.b[8], w16 246; CHECK-NEXT: sdiv w9, w10, w9 247; CHECK-NEXT: smov w10, v1.b[11] 248; CHECK-NEXT: mov v2.b[9], w8 249; CHECK-NEXT: sdiv w10, w11, w10 250; CHECK-NEXT: smov w11, v1.b[12] 251; CHECK-NEXT: mov v2.b[10], w9 252; CHECK-NEXT: smov w9, v1.b[14] 253; CHECK-NEXT: sdiv w11, w12, w11 254; CHECK-NEXT: smov w12, v1.b[13] 255; CHECK-NEXT: mov v2.b[11], w10 256; CHECK-NEXT: smov w10, v1.b[15] 257; CHECK-NEXT: sdiv w8, w13, w12 258; CHECK-NEXT: smov w12, v0.b[14] 259; CHECK-NEXT: mov v2.b[12], w11 260; CHECK-NEXT: smov w11, v0.b[15] 261; CHECK-NEXT: sdiv w9, w12, w9 262; CHECK-NEXT: mov v2.b[13], w8 263; CHECK-NEXT: sdiv w8, w11, w10 264; CHECK-NEXT: mov v2.b[14], w9 265; CHECK-NEXT: mov v2.b[15], w8 266; CHECK-NEXT: mov v0.16b, v2.16b 267; CHECK-NEXT: ret 268 %tmp3 = sdiv <16 x i8> %A, %B; 269 ret <16 x i8> %tmp3 270} 271 272define <1 x i16> @sdiv1x16(<1 x i16> %A, <1 x i16> %B) { 273; CHECK-LABEL: sdiv1x16: 274; CHECK: // %bb.0: 275; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 276; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 277; CHECK-NEXT: smov w8, v1.h[0] 278; CHECK-NEXT: smov w9, v0.h[0] 279; CHECK-NEXT: sdiv w8, w9, w8 280; CHECK-NEXT: fmov s0, w8 281; CHECK-NEXT: ret 282 %tmp3 = sdiv <1 x i16> %A, %B; 283 ret <1 x i16> %tmp3 284} 285 286define <4 x i16> @sdiv4x16(<4 x i16> %A, <4 x i16> %B) { 287; CHECK-LABEL: sdiv4x16: 288; CHECK: // %bb.0: 289; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 290; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 291; CHECK-NEXT: smov w8, v1.h[1] 292; CHECK-NEXT: smov w9, v0.h[1] 293; CHECK-NEXT: smov w10, v0.h[0] 294; CHECK-NEXT: smov w11, v0.h[2] 295; CHECK-NEXT: smov w12, v0.h[3] 296; CHECK-NEXT: sdiv w8, w9, w8 297; CHECK-NEXT: smov w9, v1.h[0] 298; CHECK-NEXT: sdiv w9, w10, w9 299; CHECK-NEXT: smov w10, v1.h[2] 300; CHECK-NEXT: sdiv w10, w11, w10 301; CHECK-NEXT: smov w11, v1.h[3] 302; CHECK-NEXT: fmov s0, w9 303; CHECK-NEXT: mov v0.h[1], w8 304; CHECK-NEXT: sdiv w8, w12, w11 305; CHECK-NEXT: mov v0.h[2], w10 306; CHECK-NEXT: mov v0.h[3], w8 307; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 308; CHECK-NEXT: ret 309 %tmp3 = sdiv <4 x i16> %A, %B; 310 ret <4 x i16> %tmp3 311} 312 313define <8 x i16> @sdiv8x16(<8 x i16> %A, <8 x i16> %B) { 314; CHECK-LABEL: sdiv8x16: 315; CHECK: // %bb.0: 316; CHECK-NEXT: smov w8, v1.h[1] 317; CHECK-NEXT: smov w9, v0.h[1] 318; CHECK-NEXT: smov w10, v0.h[0] 319; CHECK-NEXT: smov w11, v0.h[2] 320; CHECK-NEXT: smov w12, v0.h[3] 321; CHECK-NEXT: smov w13, v0.h[4] 322; CHECK-NEXT: smov w14, v0.h[5] 323; CHECK-NEXT: sdiv w8, w9, w8 324; CHECK-NEXT: smov w9, v1.h[0] 325; CHECK-NEXT: sdiv w9, w10, w9 326; CHECK-NEXT: smov w10, v1.h[2] 327; CHECK-NEXT: sdiv w10, w11, w10 328; CHECK-NEXT: smov w11, v1.h[3] 329; CHECK-NEXT: fmov s2, w9 330; CHECK-NEXT: smov w9, v1.h[6] 331; CHECK-NEXT: mov v2.h[1], w8 332; CHECK-NEXT: sdiv w11, w12, w11 333; CHECK-NEXT: smov w12, v1.h[4] 334; CHECK-NEXT: mov v2.h[2], w10 335; CHECK-NEXT: smov w10, v0.h[6] 336; CHECK-NEXT: sdiv w12, w13, w12 337; CHECK-NEXT: smov w13, v1.h[5] 338; CHECK-NEXT: mov v2.h[3], w11 339; CHECK-NEXT: smov w11, v0.h[7] 340; CHECK-NEXT: sdiv w8, w14, w13 341; CHECK-NEXT: mov v2.h[4], w12 342; CHECK-NEXT: sdiv w9, w10, w9 343; CHECK-NEXT: smov w10, v1.h[7] 344; CHECK-NEXT: mov v2.h[5], w8 345; CHECK-NEXT: sdiv w8, w11, w10 346; CHECK-NEXT: mov v2.h[6], w9 347; CHECK-NEXT: mov v2.h[7], w8 348; CHECK-NEXT: mov v0.16b, v2.16b 349; CHECK-NEXT: ret 350 %tmp3 = sdiv <8 x i16> %A, %B; 351 ret <8 x i16> %tmp3 352} 353 354define <1 x i32> @sdiv1x32(<1 x i32> %A, <1 x i32> %B) { 355; CHECK-LABEL: sdiv1x32: 356; CHECK: // %bb.0: 357; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 358; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 359; CHECK-NEXT: fmov w8, s1 360; CHECK-NEXT: fmov w9, s0 361; CHECK-NEXT: sdiv w8, w9, w8 362; CHECK-NEXT: fmov s0, w8 363; CHECK-NEXT: ret 364 %tmp3 = sdiv <1 x i32> %A, %B; 365 ret <1 x i32> %tmp3 366} 367 368define <2 x i32> @sdiv2x32(<2 x i32> %A, <2 x i32> %B) { 369; CHECK-LABEL: sdiv2x32: 370; CHECK: // %bb.0: 371; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 372; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 373; CHECK-NEXT: fmov w8, s1 374; CHECK-NEXT: fmov w9, s0 375; CHECK-NEXT: mov w10, v0.s[1] 376; CHECK-NEXT: sdiv w8, w9, w8 377; CHECK-NEXT: mov w9, v1.s[1] 378; CHECK-NEXT: sdiv w9, w10, w9 379; CHECK-NEXT: fmov s0, w8 380; CHECK-NEXT: mov v0.s[1], w9 381; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 382; CHECK-NEXT: ret 383 %tmp3 = sdiv <2 x i32> %A, %B; 384 ret <2 x i32> %tmp3 385} 386 387define <4 x i32> @sdiv4x32(<4 x i32> %A, <4 x i32> %B) { 388; CHECK-LABEL: sdiv4x32: 389; CHECK: // %bb.0: 390; CHECK-NEXT: mov w8, v1.s[1] 391; CHECK-NEXT: mov w9, v0.s[1] 392; CHECK-NEXT: fmov w10, s0 393; CHECK-NEXT: mov w11, v0.s[2] 394; CHECK-NEXT: mov w12, v0.s[3] 395; CHECK-NEXT: sdiv w8, w9, w8 396; CHECK-NEXT: fmov w9, s1 397; CHECK-NEXT: sdiv w9, w10, w9 398; CHECK-NEXT: mov w10, v1.s[2] 399; CHECK-NEXT: sdiv w10, w11, w10 400; CHECK-NEXT: mov w11, v1.s[3] 401; CHECK-NEXT: fmov s0, w9 402; CHECK-NEXT: mov v0.s[1], w8 403; CHECK-NEXT: sdiv w8, w12, w11 404; CHECK-NEXT: mov v0.s[2], w10 405; CHECK-NEXT: mov v0.s[3], w8 406; CHECK-NEXT: ret 407 %tmp3 = sdiv <4 x i32> %A, %B; 408 ret <4 x i32> %tmp3 409} 410 411define <1 x i64> @sdiv1x64(<1 x i64> %A, <1 x i64> %B) { 412; CHECK-LABEL: sdiv1x64: 413; CHECK: // %bb.0: 414; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 415; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 416; CHECK-NEXT: fmov x8, d1 417; CHECK-NEXT: fmov x9, d0 418; CHECK-NEXT: sdiv x8, x9, x8 419; CHECK-NEXT: fmov d0, x8 420; CHECK-NEXT: ret 421 %tmp3 = sdiv <1 x i64> %A, %B; 422 ret <1 x i64> %tmp3 423} 424 425define <2 x i64> @sdiv2x64(<2 x i64> %A, <2 x i64> %B) { 426; CHECK-LABEL: sdiv2x64: 427; CHECK: // %bb.0: 428; CHECK-NEXT: fmov x8, d1 429; CHECK-NEXT: fmov x9, d0 430; CHECK-NEXT: mov x10, v0.d[1] 431; CHECK-NEXT: sdiv x8, x9, x8 432; CHECK-NEXT: mov x9, v1.d[1] 433; CHECK-NEXT: sdiv x9, x10, x9 434; CHECK-NEXT: fmov d0, x8 435; CHECK-NEXT: mov v0.d[1], x9 436; CHECK-NEXT: ret 437 %tmp3 = sdiv <2 x i64> %A, %B; 438 ret <2 x i64> %tmp3 439} 440 441define <1 x i8> @udiv1x8(<1 x i8> %A, <1 x i8> %B) { 442; CHECK-LABEL: udiv1x8: 443; CHECK: // %bb.0: 444; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 445; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 446; CHECK-NEXT: umov w8, v1.b[0] 447; CHECK-NEXT: umov w9, v0.b[0] 448; CHECK-NEXT: udiv w8, w9, w8 449; CHECK-NEXT: fmov s0, w8 450; CHECK-NEXT: ret 451 %tmp3 = udiv <1 x i8> %A, %B; 452 ret <1 x i8> %tmp3 453} 454 455define <8 x i8> @udiv8x8(<8 x i8> %A, <8 x i8> %B) { 456; CHECK-LABEL: udiv8x8: 457; CHECK: // %bb.0: 458; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 459; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 460; CHECK-NEXT: umov w8, v1.b[1] 461; CHECK-NEXT: umov w9, v0.b[1] 462; CHECK-NEXT: umov w10, v0.b[0] 463; CHECK-NEXT: umov w11, v0.b[2] 464; CHECK-NEXT: umov w12, v0.b[3] 465; CHECK-NEXT: umov w13, v0.b[4] 466; CHECK-NEXT: umov w14, v0.b[5] 467; CHECK-NEXT: udiv w8, w9, w8 468; CHECK-NEXT: umov w9, v1.b[0] 469; CHECK-NEXT: udiv w9, w10, w9 470; CHECK-NEXT: umov w10, v1.b[2] 471; CHECK-NEXT: udiv w10, w11, w10 472; CHECK-NEXT: umov w11, v1.b[3] 473; CHECK-NEXT: fmov s2, w9 474; CHECK-NEXT: umov w9, v1.b[6] 475; CHECK-NEXT: mov v2.b[1], w8 476; CHECK-NEXT: udiv w11, w12, w11 477; CHECK-NEXT: umov w12, v1.b[4] 478; CHECK-NEXT: mov v2.b[2], w10 479; CHECK-NEXT: umov w10, v0.b[6] 480; CHECK-NEXT: udiv w12, w13, w12 481; CHECK-NEXT: umov w13, v1.b[5] 482; CHECK-NEXT: mov v2.b[3], w11 483; CHECK-NEXT: umov w11, v0.b[7] 484; CHECK-NEXT: udiv w8, w14, w13 485; CHECK-NEXT: mov v2.b[4], w12 486; CHECK-NEXT: udiv w9, w10, w9 487; CHECK-NEXT: umov w10, v1.b[7] 488; CHECK-NEXT: mov v2.b[5], w8 489; CHECK-NEXT: udiv w8, w11, w10 490; CHECK-NEXT: mov v2.b[6], w9 491; CHECK-NEXT: mov v2.b[7], w8 492; CHECK-NEXT: fmov d0, d2 493; CHECK-NEXT: ret 494 %tmp3 = udiv <8 x i8> %A, %B; 495 ret <8 x i8> %tmp3 496} 497 498define <16 x i8> @udiv16x8(<16 x i8> %A, <16 x i8> %B) { 499; CHECK-LABEL: udiv16x8: 500; CHECK: // %bb.0: 501; CHECK-NEXT: umov w8, v1.b[1] 502; CHECK-NEXT: umov w9, v0.b[1] 503; CHECK-NEXT: umov w10, v0.b[0] 504; CHECK-NEXT: umov w11, v0.b[2] 505; CHECK-NEXT: umov w12, v0.b[3] 506; CHECK-NEXT: umov w13, v0.b[4] 507; CHECK-NEXT: umov w14, v0.b[5] 508; CHECK-NEXT: umov w15, v0.b[6] 509; CHECK-NEXT: umov w16, v0.b[7] 510; CHECK-NEXT: umov w17, v0.b[8] 511; CHECK-NEXT: umov w18, v0.b[9] 512; CHECK-NEXT: udiv w8, w9, w8 513; CHECK-NEXT: umov w9, v1.b[0] 514; CHECK-NEXT: udiv w9, w10, w9 515; CHECK-NEXT: umov w10, v1.b[2] 516; CHECK-NEXT: udiv w10, w11, w10 517; CHECK-NEXT: umov w11, v1.b[3] 518; CHECK-NEXT: fmov s2, w9 519; CHECK-NEXT: umov w9, v1.b[10] 520; CHECK-NEXT: mov v2.b[1], w8 521; CHECK-NEXT: udiv w11, w12, w11 522; CHECK-NEXT: umov w12, v1.b[4] 523; CHECK-NEXT: mov v2.b[2], w10 524; CHECK-NEXT: umov w10, v0.b[10] 525; CHECK-NEXT: udiv w12, w13, w12 526; CHECK-NEXT: umov w13, v1.b[5] 527; CHECK-NEXT: mov v2.b[3], w11 528; CHECK-NEXT: umov w11, v0.b[11] 529; CHECK-NEXT: udiv w13, w14, w13 530; CHECK-NEXT: umov w14, v1.b[6] 531; CHECK-NEXT: mov v2.b[4], w12 532; CHECK-NEXT: umov w12, v0.b[12] 533; CHECK-NEXT: udiv w14, w15, w14 534; CHECK-NEXT: umov w15, v1.b[7] 535; CHECK-NEXT: mov v2.b[5], w13 536; CHECK-NEXT: umov w13, v0.b[13] 537; CHECK-NEXT: udiv w15, w16, w15 538; CHECK-NEXT: umov w16, v1.b[8] 539; CHECK-NEXT: mov v2.b[6], w14 540; CHECK-NEXT: udiv w16, w17, w16 541; CHECK-NEXT: umov w17, v1.b[9] 542; CHECK-NEXT: mov v2.b[7], w15 543; CHECK-NEXT: udiv w8, w18, w17 544; CHECK-NEXT: mov v2.b[8], w16 545; CHECK-NEXT: udiv w9, w10, w9 546; CHECK-NEXT: umov w10, v1.b[11] 547; CHECK-NEXT: mov v2.b[9], w8 548; CHECK-NEXT: udiv w10, w11, w10 549; CHECK-NEXT: umov w11, v1.b[12] 550; CHECK-NEXT: mov v2.b[10], w9 551; CHECK-NEXT: umov w9, v1.b[14] 552; CHECK-NEXT: udiv w11, w12, w11 553; CHECK-NEXT: umov w12, v1.b[13] 554; CHECK-NEXT: mov v2.b[11], w10 555; CHECK-NEXT: umov w10, v1.b[15] 556; CHECK-NEXT: udiv w8, w13, w12 557; CHECK-NEXT: umov w12, v0.b[14] 558; CHECK-NEXT: mov v2.b[12], w11 559; CHECK-NEXT: umov w11, v0.b[15] 560; CHECK-NEXT: udiv w9, w12, w9 561; CHECK-NEXT: mov v2.b[13], w8 562; CHECK-NEXT: udiv w8, w11, w10 563; CHECK-NEXT: mov v2.b[14], w9 564; CHECK-NEXT: mov v2.b[15], w8 565; CHECK-NEXT: mov v0.16b, v2.16b 566; CHECK-NEXT: ret 567 %tmp3 = udiv <16 x i8> %A, %B; 568 ret <16 x i8> %tmp3 569} 570 571define <1 x i16> @udiv1x16(<1 x i16> %A, <1 x i16> %B) { 572; CHECK-LABEL: udiv1x16: 573; CHECK: // %bb.0: 574; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 575; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 576; CHECK-NEXT: umov w8, v1.h[0] 577; CHECK-NEXT: umov w9, v0.h[0] 578; CHECK-NEXT: udiv w8, w9, w8 579; CHECK-NEXT: fmov s0, w8 580; CHECK-NEXT: ret 581 %tmp3 = udiv <1 x i16> %A, %B; 582 ret <1 x i16> %tmp3 583} 584 585define <4 x i16> @udiv4x16(<4 x i16> %A, <4 x i16> %B) { 586; CHECK-LABEL: udiv4x16: 587; CHECK: // %bb.0: 588; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 589; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 590; CHECK-NEXT: umov w8, v1.h[1] 591; CHECK-NEXT: umov w9, v0.h[1] 592; CHECK-NEXT: umov w10, v0.h[0] 593; CHECK-NEXT: umov w11, v0.h[2] 594; CHECK-NEXT: umov w12, v0.h[3] 595; CHECK-NEXT: udiv w8, w9, w8 596; CHECK-NEXT: umov w9, v1.h[0] 597; CHECK-NEXT: udiv w9, w10, w9 598; CHECK-NEXT: umov w10, v1.h[2] 599; CHECK-NEXT: udiv w10, w11, w10 600; CHECK-NEXT: umov w11, v1.h[3] 601; CHECK-NEXT: fmov s0, w9 602; CHECK-NEXT: mov v0.h[1], w8 603; CHECK-NEXT: udiv w8, w12, w11 604; CHECK-NEXT: mov v0.h[2], w10 605; CHECK-NEXT: mov v0.h[3], w8 606; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 607; CHECK-NEXT: ret 608 %tmp3 = udiv <4 x i16> %A, %B; 609 ret <4 x i16> %tmp3 610} 611 612define <8 x i16> @udiv8x16(<8 x i16> %A, <8 x i16> %B) { 613; CHECK-LABEL: udiv8x16: 614; CHECK: // %bb.0: 615; CHECK-NEXT: umov w8, v1.h[1] 616; CHECK-NEXT: umov w9, v0.h[1] 617; CHECK-NEXT: umov w10, v0.h[0] 618; CHECK-NEXT: umov w11, v0.h[2] 619; CHECK-NEXT: umov w12, v0.h[3] 620; CHECK-NEXT: umov w13, v0.h[4] 621; CHECK-NEXT: umov w14, v0.h[5] 622; CHECK-NEXT: udiv w8, w9, w8 623; CHECK-NEXT: umov w9, v1.h[0] 624; CHECK-NEXT: udiv w9, w10, w9 625; CHECK-NEXT: umov w10, v1.h[2] 626; CHECK-NEXT: udiv w10, w11, w10 627; CHECK-NEXT: umov w11, v1.h[3] 628; CHECK-NEXT: fmov s2, w9 629; CHECK-NEXT: umov w9, v1.h[6] 630; CHECK-NEXT: mov v2.h[1], w8 631; CHECK-NEXT: udiv w11, w12, w11 632; CHECK-NEXT: umov w12, v1.h[4] 633; CHECK-NEXT: mov v2.h[2], w10 634; CHECK-NEXT: umov w10, v0.h[6] 635; CHECK-NEXT: udiv w12, w13, w12 636; CHECK-NEXT: umov w13, v1.h[5] 637; CHECK-NEXT: mov v2.h[3], w11 638; CHECK-NEXT: umov w11, v0.h[7] 639; CHECK-NEXT: udiv w8, w14, w13 640; CHECK-NEXT: mov v2.h[4], w12 641; CHECK-NEXT: udiv w9, w10, w9 642; CHECK-NEXT: umov w10, v1.h[7] 643; CHECK-NEXT: mov v2.h[5], w8 644; CHECK-NEXT: udiv w8, w11, w10 645; CHECK-NEXT: mov v2.h[6], w9 646; CHECK-NEXT: mov v2.h[7], w8 647; CHECK-NEXT: mov v0.16b, v2.16b 648; CHECK-NEXT: ret 649 %tmp3 = udiv <8 x i16> %A, %B; 650 ret <8 x i16> %tmp3 651} 652 653define <1 x i32> @udiv1x32(<1 x i32> %A, <1 x i32> %B) { 654; CHECK-LABEL: udiv1x32: 655; CHECK: // %bb.0: 656; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 657; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 658; CHECK-NEXT: fmov w8, s1 659; CHECK-NEXT: fmov w9, s0 660; CHECK-NEXT: udiv w8, w9, w8 661; CHECK-NEXT: fmov s0, w8 662; CHECK-NEXT: ret 663 %tmp3 = udiv <1 x i32> %A, %B; 664 ret <1 x i32> %tmp3 665} 666 667define <2 x i32> @udiv2x32(<2 x i32> %A, <2 x i32> %B) { 668; CHECK-LABEL: udiv2x32: 669; CHECK: // %bb.0: 670; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 671; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 672; CHECK-NEXT: fmov w8, s1 673; CHECK-NEXT: fmov w9, s0 674; CHECK-NEXT: mov w10, v0.s[1] 675; CHECK-NEXT: udiv w8, w9, w8 676; CHECK-NEXT: mov w9, v1.s[1] 677; CHECK-NEXT: udiv w9, w10, w9 678; CHECK-NEXT: fmov s0, w8 679; CHECK-NEXT: mov v0.s[1], w9 680; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 681; CHECK-NEXT: ret 682 %tmp3 = udiv <2 x i32> %A, %B; 683 ret <2 x i32> %tmp3 684} 685 686define <4 x i32> @udiv4x32(<4 x i32> %A, <4 x i32> %B) { 687; CHECK-LABEL: udiv4x32: 688; CHECK: // %bb.0: 689; CHECK-NEXT: mov w8, v1.s[1] 690; CHECK-NEXT: mov w9, v0.s[1] 691; CHECK-NEXT: fmov w10, s0 692; CHECK-NEXT: mov w11, v0.s[2] 693; CHECK-NEXT: mov w12, v0.s[3] 694; CHECK-NEXT: udiv w8, w9, w8 695; CHECK-NEXT: fmov w9, s1 696; CHECK-NEXT: udiv w9, w10, w9 697; CHECK-NEXT: mov w10, v1.s[2] 698; CHECK-NEXT: udiv w10, w11, w10 699; CHECK-NEXT: mov w11, v1.s[3] 700; CHECK-NEXT: fmov s0, w9 701; CHECK-NEXT: mov v0.s[1], w8 702; CHECK-NEXT: udiv w8, w12, w11 703; CHECK-NEXT: mov v0.s[2], w10 704; CHECK-NEXT: mov v0.s[3], w8 705; CHECK-NEXT: ret 706 %tmp3 = udiv <4 x i32> %A, %B; 707 ret <4 x i32> %tmp3 708} 709 710define <1 x i64> @udiv1x64(<1 x i64> %A, <1 x i64> %B) { 711; CHECK-LABEL: udiv1x64: 712; CHECK: // %bb.0: 713; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 714; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 715; CHECK-NEXT: fmov x8, d1 716; CHECK-NEXT: fmov x9, d0 717; CHECK-NEXT: udiv x8, x9, x8 718; CHECK-NEXT: fmov d0, x8 719; CHECK-NEXT: ret 720 %tmp3 = udiv <1 x i64> %A, %B; 721 ret <1 x i64> %tmp3 722} 723 724define <2 x i64> @udiv2x64(<2 x i64> %A, <2 x i64> %B) { 725; CHECK-LABEL: udiv2x64: 726; CHECK: // %bb.0: 727; CHECK-NEXT: fmov x8, d1 728; CHECK-NEXT: fmov x9, d0 729; CHECK-NEXT: mov x10, v0.d[1] 730; CHECK-NEXT: udiv x8, x9, x8 731; CHECK-NEXT: mov x9, v1.d[1] 732; CHECK-NEXT: udiv x9, x10, x9 733; CHECK-NEXT: fmov d0, x8 734; CHECK-NEXT: mov v0.d[1], x9 735; CHECK-NEXT: ret 736 %tmp3 = udiv <2 x i64> %A, %B; 737 ret <2 x i64> %tmp3 738} 739 740define <1 x i8> @srem1x8(<1 x i8> %A, <1 x i8> %B) { 741; CHECK-LABEL: srem1x8: 742; CHECK: // %bb.0: 743; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 744; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 745; CHECK-NEXT: smov w8, v1.b[0] 746; CHECK-NEXT: smov w9, v0.b[0] 747; CHECK-NEXT: sdiv w10, w9, w8 748; CHECK-NEXT: msub w8, w10, w8, w9 749; CHECK-NEXT: fmov s0, w8 750; CHECK-NEXT: ret 751 %tmp3 = srem <1 x i8> %A, %B; 752 ret <1 x i8> %tmp3 753} 754 755define <8 x i8> @srem8x8(<8 x i8> %A, <8 x i8> %B) { 756; CHECK-LABEL: srem8x8: 757; CHECK: // %bb.0: 758; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 759; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 760; CHECK-NEXT: smov w11, v1.b[0] 761; CHECK-NEXT: smov w12, v0.b[0] 762; CHECK-NEXT: smov w8, v1.b[1] 763; CHECK-NEXT: smov w9, v0.b[1] 764; CHECK-NEXT: smov w14, v1.b[2] 765; CHECK-NEXT: smov w15, v0.b[2] 766; CHECK-NEXT: smov w17, v1.b[3] 767; CHECK-NEXT: smov w18, v0.b[3] 768; CHECK-NEXT: smov w1, v1.b[4] 769; CHECK-NEXT: smov w2, v0.b[4] 770; CHECK-NEXT: smov w4, v1.b[5] 771; CHECK-NEXT: smov w5, v0.b[5] 772; CHECK-NEXT: sdiv w13, w12, w11 773; CHECK-NEXT: sdiv w10, w9, w8 774; CHECK-NEXT: msub w11, w13, w11, w12 775; CHECK-NEXT: smov w13, v1.b[7] 776; CHECK-NEXT: fmov s2, w11 777; CHECK-NEXT: smov w11, v0.b[6] 778; CHECK-NEXT: sdiv w16, w15, w14 779; CHECK-NEXT: msub w8, w10, w8, w9 780; CHECK-NEXT: smov w10, v1.b[6] 781; CHECK-NEXT: mov v2.b[1], w8 782; CHECK-NEXT: sdiv w0, w18, w17 783; CHECK-NEXT: msub w8, w16, w14, w15 784; CHECK-NEXT: smov w14, v0.b[7] 785; CHECK-NEXT: mov v2.b[2], w8 786; CHECK-NEXT: sdiv w3, w2, w1 787; CHECK-NEXT: msub w8, w0, w17, w18 788; CHECK-NEXT: mov v2.b[3], w8 789; CHECK-NEXT: sdiv w9, w5, w4 790; CHECK-NEXT: msub w8, w3, w1, w2 791; CHECK-NEXT: mov v2.b[4], w8 792; CHECK-NEXT: sdiv w12, w11, w10 793; CHECK-NEXT: msub w8, w9, w4, w5 794; CHECK-NEXT: mov v2.b[5], w8 795; CHECK-NEXT: sdiv w9, w14, w13 796; CHECK-NEXT: msub w8, w12, w10, w11 797; CHECK-NEXT: mov v2.b[6], w8 798; CHECK-NEXT: msub w8, w9, w13, w14 799; CHECK-NEXT: mov v2.b[7], w8 800; CHECK-NEXT: fmov d0, d2 801; CHECK-NEXT: ret 802 %tmp3 = srem <8 x i8> %A, %B; 803 ret <8 x i8> %tmp3 804} 805 806define <16 x i8> @srem16x8(<16 x i8> %A, <16 x i8> %B) { 807; CHECK-LABEL: srem16x8: 808; CHECK: // %bb.0: 809; CHECK-NEXT: stp x28, x27, [sp, #-80]! // 16-byte Folded Spill 810; CHECK-NEXT: stp x26, x25, [sp, #16] // 16-byte Folded Spill 811; CHECK-NEXT: stp x24, x23, [sp, #32] // 16-byte Folded Spill 812; CHECK-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill 813; CHECK-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill 814; CHECK-NEXT: .cfi_def_cfa_offset 80 815; CHECK-NEXT: .cfi_offset w19, -8 816; CHECK-NEXT: .cfi_offset w20, -16 817; CHECK-NEXT: .cfi_offset w21, -24 818; CHECK-NEXT: .cfi_offset w22, -32 819; CHECK-NEXT: .cfi_offset w23, -40 820; CHECK-NEXT: .cfi_offset w24, -48 821; CHECK-NEXT: .cfi_offset w25, -56 822; CHECK-NEXT: .cfi_offset w26, -64 823; CHECK-NEXT: .cfi_offset w27, -72 824; CHECK-NEXT: .cfi_offset w28, -80 825; CHECK-NEXT: smov w11, v1.b[0] 826; CHECK-NEXT: smov w12, v0.b[0] 827; CHECK-NEXT: smov w8, v1.b[1] 828; CHECK-NEXT: smov w9, v0.b[1] 829; CHECK-NEXT: smov w14, v1.b[2] 830; CHECK-NEXT: smov w15, v0.b[2] 831; CHECK-NEXT: smov w17, v1.b[3] 832; CHECK-NEXT: smov w18, v0.b[3] 833; CHECK-NEXT: smov w1, v1.b[4] 834; CHECK-NEXT: smov w2, v0.b[4] 835; CHECK-NEXT: smov w4, v1.b[5] 836; CHECK-NEXT: smov w5, v0.b[5] 837; CHECK-NEXT: sdiv w13, w12, w11 838; CHECK-NEXT: smov w7, v1.b[6] 839; CHECK-NEXT: smov w19, v0.b[6] 840; CHECK-NEXT: smov w21, v1.b[7] 841; CHECK-NEXT: smov w22, v0.b[7] 842; CHECK-NEXT: smov w24, v1.b[8] 843; CHECK-NEXT: smov w25, v0.b[8] 844; CHECK-NEXT: smov w27, v1.b[9] 845; CHECK-NEXT: smov w28, v0.b[9] 846; CHECK-NEXT: sdiv w10, w9, w8 847; CHECK-NEXT: msub w11, w13, w11, w12 848; CHECK-NEXT: smov w13, v1.b[11] 849; CHECK-NEXT: fmov s2, w11 850; CHECK-NEXT: smov w11, v0.b[10] 851; CHECK-NEXT: sdiv w16, w15, w14 852; CHECK-NEXT: msub w8, w10, w8, w9 853; CHECK-NEXT: smov w10, v1.b[10] 854; CHECK-NEXT: mov v2.b[1], w8 855; CHECK-NEXT: sdiv w0, w18, w17 856; CHECK-NEXT: msub w8, w16, w14, w15 857; CHECK-NEXT: smov w14, v0.b[11] 858; CHECK-NEXT: smov w16, v1.b[12] 859; CHECK-NEXT: mov v2.b[2], w8 860; CHECK-NEXT: sdiv w3, w2, w1 861; CHECK-NEXT: msub w8, w0, w17, w18 862; CHECK-NEXT: smov w17, v0.b[12] 863; CHECK-NEXT: smov w0, v1.b[13] 864; CHECK-NEXT: mov v2.b[3], w8 865; CHECK-NEXT: sdiv w6, w5, w4 866; CHECK-NEXT: msub w8, w3, w1, w2 867; CHECK-NEXT: smov w1, v0.b[13] 868; CHECK-NEXT: mov v2.b[4], w8 869; CHECK-NEXT: sdiv w20, w19, w7 870; CHECK-NEXT: msub w8, w6, w4, w5 871; CHECK-NEXT: mov v2.b[5], w8 872; CHECK-NEXT: sdiv w23, w22, w21 873; CHECK-NEXT: msub w8, w20, w7, w19 874; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload 875; CHECK-NEXT: mov v2.b[6], w8 876; CHECK-NEXT: sdiv w26, w25, w24 877; CHECK-NEXT: msub w8, w23, w21, w22 878; CHECK-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload 879; CHECK-NEXT: mov v2.b[7], w8 880; CHECK-NEXT: sdiv w9, w28, w27 881; CHECK-NEXT: msub w8, w26, w24, w25 882; CHECK-NEXT: ldp x24, x23, [sp, #32] // 16-byte Folded Reload 883; CHECK-NEXT: ldp x26, x25, [sp, #16] // 16-byte Folded Reload 884; CHECK-NEXT: mov v2.b[8], w8 885; CHECK-NEXT: sdiv w12, w11, w10 886; CHECK-NEXT: msub w8, w9, w27, w28 887; CHECK-NEXT: mov v2.b[9], w8 888; CHECK-NEXT: sdiv w15, w14, w13 889; CHECK-NEXT: msub w8, w12, w10, w11 890; CHECK-NEXT: smov w10, v1.b[14] 891; CHECK-NEXT: smov w11, v0.b[14] 892; CHECK-NEXT: mov v2.b[10], w8 893; CHECK-NEXT: sdiv w18, w17, w16 894; CHECK-NEXT: msub w8, w15, w13, w14 895; CHECK-NEXT: smov w13, v1.b[15] 896; CHECK-NEXT: smov w14, v0.b[15] 897; CHECK-NEXT: mov v2.b[11], w8 898; CHECK-NEXT: sdiv w9, w1, w0 899; CHECK-NEXT: msub w8, w18, w16, w17 900; CHECK-NEXT: mov v2.b[12], w8 901; CHECK-NEXT: sdiv w12, w11, w10 902; CHECK-NEXT: msub w8, w9, w0, w1 903; CHECK-NEXT: mov v2.b[13], w8 904; CHECK-NEXT: sdiv w9, w14, w13 905; CHECK-NEXT: msub w8, w12, w10, w11 906; CHECK-NEXT: mov v2.b[14], w8 907; CHECK-NEXT: msub w8, w9, w13, w14 908; CHECK-NEXT: mov v2.b[15], w8 909; CHECK-NEXT: mov v0.16b, v2.16b 910; CHECK-NEXT: ldp x28, x27, [sp], #80 // 16-byte Folded Reload 911; CHECK-NEXT: ret 912 %tmp3 = srem <16 x i8> %A, %B; 913 ret <16 x i8> %tmp3 914} 915 916define <1 x i16> @srem1x16(<1 x i16> %A, <1 x i16> %B) { 917; CHECK-LABEL: srem1x16: 918; CHECK: // %bb.0: 919; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 920; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 921; CHECK-NEXT: smov w8, v1.h[0] 922; CHECK-NEXT: smov w9, v0.h[0] 923; CHECK-NEXT: sdiv w10, w9, w8 924; CHECK-NEXT: msub w8, w10, w8, w9 925; CHECK-NEXT: fmov s0, w8 926; CHECK-NEXT: ret 927 %tmp3 = srem <1 x i16> %A, %B; 928 ret <1 x i16> %tmp3 929} 930 931define <4 x i16> @srem4x16(<4 x i16> %A, <4 x i16> %B) { 932; CHECK-LABEL: srem4x16: 933; CHECK: // %bb.0: 934; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 935; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 936; CHECK-NEXT: smov w11, v1.h[0] 937; CHECK-NEXT: smov w12, v0.h[0] 938; CHECK-NEXT: smov w8, v1.h[1] 939; CHECK-NEXT: smov w9, v0.h[1] 940; CHECK-NEXT: smov w14, v1.h[2] 941; CHECK-NEXT: smov w15, v0.h[2] 942; CHECK-NEXT: smov w17, v1.h[3] 943; CHECK-NEXT: smov w18, v0.h[3] 944; CHECK-NEXT: sdiv w13, w12, w11 945; CHECK-NEXT: sdiv w10, w9, w8 946; CHECK-NEXT: msub w11, w13, w11, w12 947; CHECK-NEXT: fmov s0, w11 948; CHECK-NEXT: sdiv w16, w15, w14 949; CHECK-NEXT: msub w8, w10, w8, w9 950; CHECK-NEXT: mov v0.h[1], w8 951; CHECK-NEXT: sdiv w9, w18, w17 952; CHECK-NEXT: msub w8, w16, w14, w15 953; CHECK-NEXT: mov v0.h[2], w8 954; CHECK-NEXT: msub w8, w9, w17, w18 955; CHECK-NEXT: mov v0.h[3], w8 956; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 957; CHECK-NEXT: ret 958 %tmp3 = srem <4 x i16> %A, %B; 959 ret <4 x i16> %tmp3 960} 961 962define <8 x i16> @srem8x16(<8 x i16> %A, <8 x i16> %B) { 963; CHECK-LABEL: srem8x16: 964; CHECK: // %bb.0: 965; CHECK-NEXT: smov w11, v1.h[0] 966; CHECK-NEXT: smov w12, v0.h[0] 967; CHECK-NEXT: smov w8, v1.h[1] 968; CHECK-NEXT: smov w9, v0.h[1] 969; CHECK-NEXT: smov w14, v1.h[2] 970; CHECK-NEXT: smov w15, v0.h[2] 971; CHECK-NEXT: smov w17, v1.h[3] 972; CHECK-NEXT: smov w18, v0.h[3] 973; CHECK-NEXT: smov w1, v1.h[4] 974; CHECK-NEXT: smov w2, v0.h[4] 975; CHECK-NEXT: smov w4, v1.h[5] 976; CHECK-NEXT: smov w5, v0.h[5] 977; CHECK-NEXT: sdiv w13, w12, w11 978; CHECK-NEXT: sdiv w10, w9, w8 979; CHECK-NEXT: msub w11, w13, w11, w12 980; CHECK-NEXT: smov w13, v1.h[7] 981; CHECK-NEXT: fmov s2, w11 982; CHECK-NEXT: smov w11, v0.h[6] 983; CHECK-NEXT: sdiv w16, w15, w14 984; CHECK-NEXT: msub w8, w10, w8, w9 985; CHECK-NEXT: smov w10, v1.h[6] 986; CHECK-NEXT: mov v2.h[1], w8 987; CHECK-NEXT: sdiv w0, w18, w17 988; CHECK-NEXT: msub w8, w16, w14, w15 989; CHECK-NEXT: smov w14, v0.h[7] 990; CHECK-NEXT: mov v2.h[2], w8 991; CHECK-NEXT: sdiv w3, w2, w1 992; CHECK-NEXT: msub w8, w0, w17, w18 993; CHECK-NEXT: mov v2.h[3], w8 994; CHECK-NEXT: sdiv w9, w5, w4 995; CHECK-NEXT: msub w8, w3, w1, w2 996; CHECK-NEXT: mov v2.h[4], w8 997; CHECK-NEXT: sdiv w12, w11, w10 998; CHECK-NEXT: msub w8, w9, w4, w5 999; CHECK-NEXT: mov v2.h[5], w8 1000; CHECK-NEXT: sdiv w9, w14, w13 1001; CHECK-NEXT: msub w8, w12, w10, w11 1002; CHECK-NEXT: mov v2.h[6], w8 1003; CHECK-NEXT: msub w8, w9, w13, w14 1004; CHECK-NEXT: mov v2.h[7], w8 1005; CHECK-NEXT: mov v0.16b, v2.16b 1006; CHECK-NEXT: ret 1007 %tmp3 = srem <8 x i16> %A, %B; 1008 ret <8 x i16> %tmp3 1009} 1010 1011define <1 x i32> @srem1x32(<1 x i32> %A, <1 x i32> %B) { 1012; CHECK-LABEL: srem1x32: 1013; CHECK: // %bb.0: 1014; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 1015; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1016; CHECK-NEXT: fmov w8, s1 1017; CHECK-NEXT: fmov w9, s0 1018; CHECK-NEXT: sdiv w10, w9, w8 1019; CHECK-NEXT: msub w8, w10, w8, w9 1020; CHECK-NEXT: fmov s0, w8 1021; CHECK-NEXT: ret 1022 %tmp3 = srem <1 x i32> %A, %B; 1023 ret <1 x i32> %tmp3 1024} 1025 1026define <2 x i32> @srem2x32(<2 x i32> %A, <2 x i32> %B) { 1027; CHECK-LABEL: srem2x32: 1028; CHECK: // %bb.0: 1029; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 1030; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1031; CHECK-NEXT: fmov w8, s1 1032; CHECK-NEXT: fmov w9, s0 1033; CHECK-NEXT: mov w11, v1.s[1] 1034; CHECK-NEXT: mov w12, v0.s[1] 1035; CHECK-NEXT: sdiv w10, w9, w8 1036; CHECK-NEXT: sdiv w13, w12, w11 1037; CHECK-NEXT: msub w8, w10, w8, w9 1038; CHECK-NEXT: fmov s0, w8 1039; CHECK-NEXT: msub w9, w13, w11, w12 1040; CHECK-NEXT: mov v0.s[1], w9 1041; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 1042; CHECK-NEXT: ret 1043 %tmp3 = srem <2 x i32> %A, %B; 1044 ret <2 x i32> %tmp3 1045} 1046 1047define <4 x i32> @srem4x32(<4 x i32> %A, <4 x i32> %B) { 1048; CHECK-LABEL: srem4x32: 1049; CHECK: // %bb.0: 1050; CHECK-NEXT: fmov w11, s1 1051; CHECK-NEXT: fmov w12, s0 1052; CHECK-NEXT: mov w8, v1.s[1] 1053; CHECK-NEXT: mov w9, v0.s[1] 1054; CHECK-NEXT: mov w14, v1.s[2] 1055; CHECK-NEXT: mov w15, v0.s[2] 1056; CHECK-NEXT: mov w17, v1.s[3] 1057; CHECK-NEXT: mov w18, v0.s[3] 1058; CHECK-NEXT: sdiv w13, w12, w11 1059; CHECK-NEXT: sdiv w10, w9, w8 1060; CHECK-NEXT: msub w11, w13, w11, w12 1061; CHECK-NEXT: fmov s0, w11 1062; CHECK-NEXT: sdiv w16, w15, w14 1063; CHECK-NEXT: msub w8, w10, w8, w9 1064; CHECK-NEXT: mov v0.s[1], w8 1065; CHECK-NEXT: sdiv w9, w18, w17 1066; CHECK-NEXT: msub w8, w16, w14, w15 1067; CHECK-NEXT: mov v0.s[2], w8 1068; CHECK-NEXT: msub w8, w9, w17, w18 1069; CHECK-NEXT: mov v0.s[3], w8 1070; CHECK-NEXT: ret 1071 %tmp3 = srem <4 x i32> %A, %B; 1072 ret <4 x i32> %tmp3 1073} 1074 1075define <1 x i64> @srem1x64(<1 x i64> %A, <1 x i64> %B) { 1076; CHECK-LABEL: srem1x64: 1077; CHECK: // %bb.0: 1078; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 1079; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1080; CHECK-NEXT: fmov x8, d1 1081; CHECK-NEXT: fmov x9, d0 1082; CHECK-NEXT: sdiv x10, x9, x8 1083; CHECK-NEXT: msub x8, x10, x8, x9 1084; CHECK-NEXT: fmov d0, x8 1085; CHECK-NEXT: ret 1086 %tmp3 = srem <1 x i64> %A, %B; 1087 ret <1 x i64> %tmp3 1088} 1089 1090define <2 x i64> @srem2x64(<2 x i64> %A, <2 x i64> %B) { 1091; CHECK-LABEL: srem2x64: 1092; CHECK: // %bb.0: 1093; CHECK-NEXT: fmov x8, d1 1094; CHECK-NEXT: fmov x9, d0 1095; CHECK-NEXT: mov x11, v1.d[1] 1096; CHECK-NEXT: mov x12, v0.d[1] 1097; CHECK-NEXT: sdiv x10, x9, x8 1098; CHECK-NEXT: sdiv x13, x12, x11 1099; CHECK-NEXT: msub x8, x10, x8, x9 1100; CHECK-NEXT: fmov d0, x8 1101; CHECK-NEXT: msub x9, x13, x11, x12 1102; CHECK-NEXT: mov v0.d[1], x9 1103; CHECK-NEXT: ret 1104 %tmp3 = srem <2 x i64> %A, %B; 1105 ret <2 x i64> %tmp3 1106} 1107 1108define <1 x i8> @urem1x8(<1 x i8> %A, <1 x i8> %B) { 1109; CHECK-LABEL: urem1x8: 1110; CHECK: // %bb.0: 1111; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 1112; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1113; CHECK-NEXT: umov w8, v1.b[0] 1114; CHECK-NEXT: umov w9, v0.b[0] 1115; CHECK-NEXT: udiv w10, w9, w8 1116; CHECK-NEXT: msub w8, w10, w8, w9 1117; CHECK-NEXT: fmov s0, w8 1118; CHECK-NEXT: ret 1119 %tmp3 = urem <1 x i8> %A, %B; 1120 ret <1 x i8> %tmp3 1121} 1122 1123define <8 x i8> @urem8x8(<8 x i8> %A, <8 x i8> %B) { 1124; CHECK-LABEL: urem8x8: 1125; CHECK: // %bb.0: 1126; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 1127; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1128; CHECK-NEXT: umov w11, v1.b[0] 1129; CHECK-NEXT: umov w12, v0.b[0] 1130; CHECK-NEXT: umov w8, v1.b[1] 1131; CHECK-NEXT: umov w9, v0.b[1] 1132; CHECK-NEXT: umov w14, v1.b[2] 1133; CHECK-NEXT: umov w15, v0.b[2] 1134; CHECK-NEXT: umov w17, v1.b[3] 1135; CHECK-NEXT: umov w18, v0.b[3] 1136; CHECK-NEXT: umov w1, v1.b[4] 1137; CHECK-NEXT: umov w2, v0.b[4] 1138; CHECK-NEXT: umov w4, v1.b[5] 1139; CHECK-NEXT: umov w5, v0.b[5] 1140; CHECK-NEXT: udiv w13, w12, w11 1141; CHECK-NEXT: udiv w10, w9, w8 1142; CHECK-NEXT: msub w11, w13, w11, w12 1143; CHECK-NEXT: umov w13, v1.b[7] 1144; CHECK-NEXT: fmov s2, w11 1145; CHECK-NEXT: umov w11, v0.b[6] 1146; CHECK-NEXT: udiv w16, w15, w14 1147; CHECK-NEXT: msub w8, w10, w8, w9 1148; CHECK-NEXT: umov w10, v1.b[6] 1149; CHECK-NEXT: mov v2.b[1], w8 1150; CHECK-NEXT: udiv w0, w18, w17 1151; CHECK-NEXT: msub w8, w16, w14, w15 1152; CHECK-NEXT: umov w14, v0.b[7] 1153; CHECK-NEXT: mov v2.b[2], w8 1154; CHECK-NEXT: udiv w3, w2, w1 1155; CHECK-NEXT: msub w8, w0, w17, w18 1156; CHECK-NEXT: mov v2.b[3], w8 1157; CHECK-NEXT: udiv w9, w5, w4 1158; CHECK-NEXT: msub w8, w3, w1, w2 1159; CHECK-NEXT: mov v2.b[4], w8 1160; CHECK-NEXT: udiv w12, w11, w10 1161; CHECK-NEXT: msub w8, w9, w4, w5 1162; CHECK-NEXT: mov v2.b[5], w8 1163; CHECK-NEXT: udiv w9, w14, w13 1164; CHECK-NEXT: msub w8, w12, w10, w11 1165; CHECK-NEXT: mov v2.b[6], w8 1166; CHECK-NEXT: msub w8, w9, w13, w14 1167; CHECK-NEXT: mov v2.b[7], w8 1168; CHECK-NEXT: fmov d0, d2 1169; CHECK-NEXT: ret 1170 %tmp3 = urem <8 x i8> %A, %B; 1171 ret <8 x i8> %tmp3 1172} 1173 1174define <16 x i8> @urem16x8(<16 x i8> %A, <16 x i8> %B) { 1175; CHECK-LABEL: urem16x8: 1176; CHECK: // %bb.0: 1177; CHECK-NEXT: stp x28, x27, [sp, #-80]! // 16-byte Folded Spill 1178; CHECK-NEXT: stp x26, x25, [sp, #16] // 16-byte Folded Spill 1179; CHECK-NEXT: stp x24, x23, [sp, #32] // 16-byte Folded Spill 1180; CHECK-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill 1181; CHECK-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill 1182; CHECK-NEXT: .cfi_def_cfa_offset 80 1183; CHECK-NEXT: .cfi_offset w19, -8 1184; CHECK-NEXT: .cfi_offset w20, -16 1185; CHECK-NEXT: .cfi_offset w21, -24 1186; CHECK-NEXT: .cfi_offset w22, -32 1187; CHECK-NEXT: .cfi_offset w23, -40 1188; CHECK-NEXT: .cfi_offset w24, -48 1189; CHECK-NEXT: .cfi_offset w25, -56 1190; CHECK-NEXT: .cfi_offset w26, -64 1191; CHECK-NEXT: .cfi_offset w27, -72 1192; CHECK-NEXT: .cfi_offset w28, -80 1193; CHECK-NEXT: umov w11, v1.b[0] 1194; CHECK-NEXT: umov w12, v0.b[0] 1195; CHECK-NEXT: umov w8, v1.b[1] 1196; CHECK-NEXT: umov w9, v0.b[1] 1197; CHECK-NEXT: umov w14, v1.b[2] 1198; CHECK-NEXT: umov w15, v0.b[2] 1199; CHECK-NEXT: umov w17, v1.b[3] 1200; CHECK-NEXT: umov w18, v0.b[3] 1201; CHECK-NEXT: umov w1, v1.b[4] 1202; CHECK-NEXT: umov w2, v0.b[4] 1203; CHECK-NEXT: umov w4, v1.b[5] 1204; CHECK-NEXT: umov w5, v0.b[5] 1205; CHECK-NEXT: udiv w13, w12, w11 1206; CHECK-NEXT: umov w7, v1.b[6] 1207; CHECK-NEXT: umov w19, v0.b[6] 1208; CHECK-NEXT: umov w21, v1.b[7] 1209; CHECK-NEXT: umov w22, v0.b[7] 1210; CHECK-NEXT: umov w24, v1.b[8] 1211; CHECK-NEXT: umov w25, v0.b[8] 1212; CHECK-NEXT: umov w27, v1.b[9] 1213; CHECK-NEXT: umov w28, v0.b[9] 1214; CHECK-NEXT: udiv w10, w9, w8 1215; CHECK-NEXT: msub w11, w13, w11, w12 1216; CHECK-NEXT: umov w13, v1.b[11] 1217; CHECK-NEXT: fmov s2, w11 1218; CHECK-NEXT: umov w11, v0.b[10] 1219; CHECK-NEXT: udiv w16, w15, w14 1220; CHECK-NEXT: msub w8, w10, w8, w9 1221; CHECK-NEXT: umov w10, v1.b[10] 1222; CHECK-NEXT: mov v2.b[1], w8 1223; CHECK-NEXT: udiv w0, w18, w17 1224; CHECK-NEXT: msub w8, w16, w14, w15 1225; CHECK-NEXT: umov w14, v0.b[11] 1226; CHECK-NEXT: umov w16, v1.b[12] 1227; CHECK-NEXT: mov v2.b[2], w8 1228; CHECK-NEXT: udiv w3, w2, w1 1229; CHECK-NEXT: msub w8, w0, w17, w18 1230; CHECK-NEXT: umov w17, v0.b[12] 1231; CHECK-NEXT: umov w0, v1.b[13] 1232; CHECK-NEXT: mov v2.b[3], w8 1233; CHECK-NEXT: udiv w6, w5, w4 1234; CHECK-NEXT: msub w8, w3, w1, w2 1235; CHECK-NEXT: umov w1, v0.b[13] 1236; CHECK-NEXT: mov v2.b[4], w8 1237; CHECK-NEXT: udiv w20, w19, w7 1238; CHECK-NEXT: msub w8, w6, w4, w5 1239; CHECK-NEXT: mov v2.b[5], w8 1240; CHECK-NEXT: udiv w23, w22, w21 1241; CHECK-NEXT: msub w8, w20, w7, w19 1242; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload 1243; CHECK-NEXT: mov v2.b[6], w8 1244; CHECK-NEXT: udiv w26, w25, w24 1245; CHECK-NEXT: msub w8, w23, w21, w22 1246; CHECK-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload 1247; CHECK-NEXT: mov v2.b[7], w8 1248; CHECK-NEXT: udiv w9, w28, w27 1249; CHECK-NEXT: msub w8, w26, w24, w25 1250; CHECK-NEXT: ldp x24, x23, [sp, #32] // 16-byte Folded Reload 1251; CHECK-NEXT: ldp x26, x25, [sp, #16] // 16-byte Folded Reload 1252; CHECK-NEXT: mov v2.b[8], w8 1253; CHECK-NEXT: udiv w12, w11, w10 1254; CHECK-NEXT: msub w8, w9, w27, w28 1255; CHECK-NEXT: mov v2.b[9], w8 1256; CHECK-NEXT: udiv w15, w14, w13 1257; CHECK-NEXT: msub w8, w12, w10, w11 1258; CHECK-NEXT: umov w10, v1.b[14] 1259; CHECK-NEXT: umov w11, v0.b[14] 1260; CHECK-NEXT: mov v2.b[10], w8 1261; CHECK-NEXT: udiv w18, w17, w16 1262; CHECK-NEXT: msub w8, w15, w13, w14 1263; CHECK-NEXT: umov w13, v1.b[15] 1264; CHECK-NEXT: umov w14, v0.b[15] 1265; CHECK-NEXT: mov v2.b[11], w8 1266; CHECK-NEXT: udiv w9, w1, w0 1267; CHECK-NEXT: msub w8, w18, w16, w17 1268; CHECK-NEXT: mov v2.b[12], w8 1269; CHECK-NEXT: udiv w12, w11, w10 1270; CHECK-NEXT: msub w8, w9, w0, w1 1271; CHECK-NEXT: mov v2.b[13], w8 1272; CHECK-NEXT: udiv w9, w14, w13 1273; CHECK-NEXT: msub w8, w12, w10, w11 1274; CHECK-NEXT: mov v2.b[14], w8 1275; CHECK-NEXT: msub w8, w9, w13, w14 1276; CHECK-NEXT: mov v2.b[15], w8 1277; CHECK-NEXT: mov v0.16b, v2.16b 1278; CHECK-NEXT: ldp x28, x27, [sp], #80 // 16-byte Folded Reload 1279; CHECK-NEXT: ret 1280 %tmp3 = urem <16 x i8> %A, %B; 1281 ret <16 x i8> %tmp3 1282} 1283 1284define <1 x i16> @urem1x16(<1 x i16> %A, <1 x i16> %B) { 1285; CHECK-LABEL: urem1x16: 1286; CHECK: // %bb.0: 1287; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 1288; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1289; CHECK-NEXT: umov w8, v1.h[0] 1290; CHECK-NEXT: umov w9, v0.h[0] 1291; CHECK-NEXT: udiv w10, w9, w8 1292; CHECK-NEXT: msub w8, w10, w8, w9 1293; CHECK-NEXT: fmov s0, w8 1294; CHECK-NEXT: ret 1295 %tmp3 = urem <1 x i16> %A, %B; 1296 ret <1 x i16> %tmp3 1297} 1298 1299define <4 x i16> @urem4x16(<4 x i16> %A, <4 x i16> %B) { 1300; CHECK-LABEL: urem4x16: 1301; CHECK: // %bb.0: 1302; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 1303; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1304; CHECK-NEXT: umov w11, v1.h[0] 1305; CHECK-NEXT: umov w12, v0.h[0] 1306; CHECK-NEXT: umov w8, v1.h[1] 1307; CHECK-NEXT: umov w9, v0.h[1] 1308; CHECK-NEXT: umov w14, v1.h[2] 1309; CHECK-NEXT: umov w15, v0.h[2] 1310; CHECK-NEXT: umov w17, v1.h[3] 1311; CHECK-NEXT: umov w18, v0.h[3] 1312; CHECK-NEXT: udiv w13, w12, w11 1313; CHECK-NEXT: udiv w10, w9, w8 1314; CHECK-NEXT: msub w11, w13, w11, w12 1315; CHECK-NEXT: fmov s0, w11 1316; CHECK-NEXT: udiv w16, w15, w14 1317; CHECK-NEXT: msub w8, w10, w8, w9 1318; CHECK-NEXT: mov v0.h[1], w8 1319; CHECK-NEXT: udiv w9, w18, w17 1320; CHECK-NEXT: msub w8, w16, w14, w15 1321; CHECK-NEXT: mov v0.h[2], w8 1322; CHECK-NEXT: msub w8, w9, w17, w18 1323; CHECK-NEXT: mov v0.h[3], w8 1324; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 1325; CHECK-NEXT: ret 1326 %tmp3 = urem <4 x i16> %A, %B; 1327 ret <4 x i16> %tmp3 1328} 1329 1330define <8 x i16> @urem8x16(<8 x i16> %A, <8 x i16> %B) { 1331; CHECK-LABEL: urem8x16: 1332; CHECK: // %bb.0: 1333; CHECK-NEXT: umov w11, v1.h[0] 1334; CHECK-NEXT: umov w12, v0.h[0] 1335; CHECK-NEXT: umov w8, v1.h[1] 1336; CHECK-NEXT: umov w9, v0.h[1] 1337; CHECK-NEXT: umov w14, v1.h[2] 1338; CHECK-NEXT: umov w15, v0.h[2] 1339; CHECK-NEXT: umov w17, v1.h[3] 1340; CHECK-NEXT: umov w18, v0.h[3] 1341; CHECK-NEXT: umov w1, v1.h[4] 1342; CHECK-NEXT: umov w2, v0.h[4] 1343; CHECK-NEXT: umov w4, v1.h[5] 1344; CHECK-NEXT: umov w5, v0.h[5] 1345; CHECK-NEXT: udiv w13, w12, w11 1346; CHECK-NEXT: udiv w10, w9, w8 1347; CHECK-NEXT: msub w11, w13, w11, w12 1348; CHECK-NEXT: umov w13, v1.h[7] 1349; CHECK-NEXT: fmov s2, w11 1350; CHECK-NEXT: umov w11, v0.h[6] 1351; CHECK-NEXT: udiv w16, w15, w14 1352; CHECK-NEXT: msub w8, w10, w8, w9 1353; CHECK-NEXT: umov w10, v1.h[6] 1354; CHECK-NEXT: mov v2.h[1], w8 1355; CHECK-NEXT: udiv w0, w18, w17 1356; CHECK-NEXT: msub w8, w16, w14, w15 1357; CHECK-NEXT: umov w14, v0.h[7] 1358; CHECK-NEXT: mov v2.h[2], w8 1359; CHECK-NEXT: udiv w3, w2, w1 1360; CHECK-NEXT: msub w8, w0, w17, w18 1361; CHECK-NEXT: mov v2.h[3], w8 1362; CHECK-NEXT: udiv w9, w5, w4 1363; CHECK-NEXT: msub w8, w3, w1, w2 1364; CHECK-NEXT: mov v2.h[4], w8 1365; CHECK-NEXT: udiv w12, w11, w10 1366; CHECK-NEXT: msub w8, w9, w4, w5 1367; CHECK-NEXT: mov v2.h[5], w8 1368; CHECK-NEXT: udiv w9, w14, w13 1369; CHECK-NEXT: msub w8, w12, w10, w11 1370; CHECK-NEXT: mov v2.h[6], w8 1371; CHECK-NEXT: msub w8, w9, w13, w14 1372; CHECK-NEXT: mov v2.h[7], w8 1373; CHECK-NEXT: mov v0.16b, v2.16b 1374; CHECK-NEXT: ret 1375 %tmp3 = urem <8 x i16> %A, %B; 1376 ret <8 x i16> %tmp3 1377} 1378 1379define <1 x i32> @urem1x32(<1 x i32> %A, <1 x i32> %B) { 1380; CHECK-LABEL: urem1x32: 1381; CHECK: // %bb.0: 1382; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 1383; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1384; CHECK-NEXT: fmov w8, s1 1385; CHECK-NEXT: fmov w9, s0 1386; CHECK-NEXT: udiv w10, w9, w8 1387; CHECK-NEXT: msub w8, w10, w8, w9 1388; CHECK-NEXT: fmov s0, w8 1389; CHECK-NEXT: ret 1390 %tmp3 = urem <1 x i32> %A, %B; 1391 ret <1 x i32> %tmp3 1392} 1393 1394define <2 x i32> @urem2x32(<2 x i32> %A, <2 x i32> %B) { 1395; CHECK-LABEL: urem2x32: 1396; CHECK: // %bb.0: 1397; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 1398; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1399; CHECK-NEXT: fmov w8, s1 1400; CHECK-NEXT: fmov w9, s0 1401; CHECK-NEXT: mov w11, v1.s[1] 1402; CHECK-NEXT: mov w12, v0.s[1] 1403; CHECK-NEXT: udiv w10, w9, w8 1404; CHECK-NEXT: udiv w13, w12, w11 1405; CHECK-NEXT: msub w8, w10, w8, w9 1406; CHECK-NEXT: fmov s0, w8 1407; CHECK-NEXT: msub w9, w13, w11, w12 1408; CHECK-NEXT: mov v0.s[1], w9 1409; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 1410; CHECK-NEXT: ret 1411 %tmp3 = urem <2 x i32> %A, %B; 1412 ret <2 x i32> %tmp3 1413} 1414 1415define <4 x i32> @urem4x32(<4 x i32> %A, <4 x i32> %B) { 1416; CHECK-LABEL: urem4x32: 1417; CHECK: // %bb.0: 1418; CHECK-NEXT: fmov w11, s1 1419; CHECK-NEXT: fmov w12, s0 1420; CHECK-NEXT: mov w8, v1.s[1] 1421; CHECK-NEXT: mov w9, v0.s[1] 1422; CHECK-NEXT: mov w14, v1.s[2] 1423; CHECK-NEXT: mov w15, v0.s[2] 1424; CHECK-NEXT: mov w17, v1.s[3] 1425; CHECK-NEXT: mov w18, v0.s[3] 1426; CHECK-NEXT: udiv w13, w12, w11 1427; CHECK-NEXT: udiv w10, w9, w8 1428; CHECK-NEXT: msub w11, w13, w11, w12 1429; CHECK-NEXT: fmov s0, w11 1430; CHECK-NEXT: udiv w16, w15, w14 1431; CHECK-NEXT: msub w8, w10, w8, w9 1432; CHECK-NEXT: mov v0.s[1], w8 1433; CHECK-NEXT: udiv w9, w18, w17 1434; CHECK-NEXT: msub w8, w16, w14, w15 1435; CHECK-NEXT: mov v0.s[2], w8 1436; CHECK-NEXT: msub w8, w9, w17, w18 1437; CHECK-NEXT: mov v0.s[3], w8 1438; CHECK-NEXT: ret 1439 %tmp3 = urem <4 x i32> %A, %B; 1440 ret <4 x i32> %tmp3 1441} 1442 1443define <1 x i64> @urem1x64(<1 x i64> %A, <1 x i64> %B) { 1444; CHECK-LABEL: urem1x64: 1445; CHECK: // %bb.0: 1446; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 1447; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1448; CHECK-NEXT: fmov x8, d1 1449; CHECK-NEXT: fmov x9, d0 1450; CHECK-NEXT: udiv x10, x9, x8 1451; CHECK-NEXT: msub x8, x10, x8, x9 1452; CHECK-NEXT: fmov d0, x8 1453; CHECK-NEXT: ret 1454 %tmp3 = urem <1 x i64> %A, %B; 1455 ret <1 x i64> %tmp3 1456} 1457 1458define <2 x i64> @urem2x64(<2 x i64> %A, <2 x i64> %B) { 1459; CHECK-LABEL: urem2x64: 1460; CHECK: // %bb.0: 1461; CHECK-NEXT: fmov x8, d1 1462; CHECK-NEXT: fmov x9, d0 1463; CHECK-NEXT: mov x11, v1.d[1] 1464; CHECK-NEXT: mov x12, v0.d[1] 1465; CHECK-NEXT: udiv x10, x9, x8 1466; CHECK-NEXT: udiv x13, x12, x11 1467; CHECK-NEXT: msub x8, x10, x8, x9 1468; CHECK-NEXT: fmov d0, x8 1469; CHECK-NEXT: msub x9, x13, x11, x12 1470; CHECK-NEXT: mov v0.d[1], x9 1471; CHECK-NEXT: ret 1472 %tmp3 = urem <2 x i64> %A, %B; 1473 ret <2 x i64> %tmp3 1474} 1475 1476define <2 x float> @frem2f32(<2 x float> %A, <2 x float> %B) { 1477; CHECK-LABEL: frem2f32: 1478; CHECK: // %bb.0: 1479; CHECK-NEXT: sub sp, sp, #64 1480; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill 1481; CHECK-NEXT: .cfi_def_cfa_offset 64 1482; CHECK-NEXT: .cfi_offset w30, -16 1483; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 1484; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1485; CHECK-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill 1486; CHECK-NEXT: mov s0, v0.s[1] 1487; CHECK-NEXT: mov s1, v1.s[1] 1488; CHECK-NEXT: bl fmodf 1489; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 1490; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill 1491; CHECK-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload 1492; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 1493; CHECK-NEXT: // kill: def $s1 killed $s1 killed $q1 1494; CHECK-NEXT: bl fmodf 1495; CHECK-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload 1496; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 1497; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload 1498; CHECK-NEXT: mov v0.s[1], v1.s[0] 1499; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 1500; CHECK-NEXT: add sp, sp, #64 1501; CHECK-NEXT: ret 1502 %tmp3 = frem <2 x float> %A, %B; 1503 ret <2 x float> %tmp3 1504} 1505 1506define <4 x float> @frem4f32(<4 x float> %A, <4 x float> %B) { 1507; CHECK-LABEL: frem4f32: 1508; CHECK: // %bb.0: 1509; CHECK-NEXT: sub sp, sp, #64 1510; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill 1511; CHECK-NEXT: .cfi_def_cfa_offset 64 1512; CHECK-NEXT: .cfi_offset w30, -16 1513; CHECK-NEXT: stp q0, q1, [sp, #16] // 32-byte Folded Spill 1514; CHECK-NEXT: mov s0, v0.s[1] 1515; CHECK-NEXT: mov s1, v1.s[1] 1516; CHECK-NEXT: bl fmodf 1517; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 1518; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill 1519; CHECK-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload 1520; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 1521; CHECK-NEXT: // kill: def $s1 killed $s1 killed $q1 1522; CHECK-NEXT: bl fmodf 1523; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload 1524; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 1525; CHECK-NEXT: mov v0.s[1], v1.s[0] 1526; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill 1527; CHECK-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload 1528; CHECK-NEXT: mov s0, v0.s[2] 1529; CHECK-NEXT: mov s1, v1.s[2] 1530; CHECK-NEXT: bl fmodf 1531; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload 1532; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 1533; CHECK-NEXT: mov v1.s[2], v0.s[0] 1534; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill 1535; CHECK-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload 1536; CHECK-NEXT: mov s0, v0.s[3] 1537; CHECK-NEXT: mov s1, v1.s[3] 1538; CHECK-NEXT: bl fmodf 1539; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload 1540; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 1541; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload 1542; CHECK-NEXT: mov v1.s[3], v0.s[0] 1543; CHECK-NEXT: mov v0.16b, v1.16b 1544; CHECK-NEXT: add sp, sp, #64 1545; CHECK-NEXT: ret 1546 %tmp3 = frem <4 x float> %A, %B; 1547 ret <4 x float> %tmp3 1548} 1549 1550define <1 x double> @frem1d64(<1 x double> %A, <1 x double> %B) { 1551; CHECK-LABEL: frem1d64: 1552; CHECK: // %bb.0: 1553; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill 1554; CHECK-NEXT: .cfi_def_cfa_offset 16 1555; CHECK-NEXT: .cfi_offset w30, -16 1556; CHECK-NEXT: bl fmod 1557; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload 1558; CHECK-NEXT: ret 1559 %tmp3 = frem <1 x double> %A, %B; 1560 ret <1 x double> %tmp3 1561} 1562 1563define <2 x double> @frem2d64(<2 x double> %A, <2 x double> %B) { 1564; CHECK-LABEL: frem2d64: 1565; CHECK: // %bb.0: 1566; CHECK-NEXT: sub sp, sp, #64 1567; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill 1568; CHECK-NEXT: .cfi_def_cfa_offset 64 1569; CHECK-NEXT: .cfi_offset w30, -16 1570; CHECK-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill 1571; CHECK-NEXT: mov d0, v0.d[1] 1572; CHECK-NEXT: mov d1, v1.d[1] 1573; CHECK-NEXT: bl fmod 1574; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1575; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill 1576; CHECK-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload 1577; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 1578; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1 1579; CHECK-NEXT: bl fmod 1580; CHECK-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload 1581; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 1582; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload 1583; CHECK-NEXT: mov v0.d[1], v1.d[0] 1584; CHECK-NEXT: add sp, sp, #64 1585; CHECK-NEXT: ret 1586 %tmp3 = frem <2 x double> %A, %B; 1587 ret <2 x double> %tmp3 1588} 1589 1590declare <8 x i8> @llvm.aarch64.neon.pmul.v8i8(<8 x i8>, <8 x i8>) 1591declare <16 x i8> @llvm.aarch64.neon.pmul.v16i8(<16 x i8>, <16 x i8>) 1592 1593define <8 x i8> @poly_mulv8i8(<8 x i8> %lhs, <8 x i8> %rhs) { 1594; CHECK-LABEL: poly_mulv8i8: 1595; CHECK: // %bb.0: 1596; CHECK-NEXT: pmul v0.8b, v0.8b, v1.8b 1597; CHECK-NEXT: ret 1598 %prod = call <8 x i8> @llvm.aarch64.neon.pmul.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) 1599 ret <8 x i8> %prod 1600} 1601 1602define <16 x i8> @poly_mulv16i8(<16 x i8> %lhs, <16 x i8> %rhs) { 1603; CHECK-LABEL: poly_mulv16i8: 1604; CHECK: // %bb.0: 1605; CHECK-NEXT: pmul v0.16b, v0.16b, v1.16b 1606; CHECK-NEXT: ret 1607 %prod = call <16 x i8> @llvm.aarch64.neon.pmul.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) 1608 ret <16 x i8> %prod 1609} 1610 1611declare <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16>, <4 x i16>) 1612declare <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16>, <8 x i16>) 1613declare <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32>, <2 x i32>) 1614declare <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32>, <4 x i32>) 1615 1616define <4 x i16> @test_sqdmulh_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { 1617; CHECK-LABEL: test_sqdmulh_v4i16: 1618; CHECK: // %bb.0: 1619; CHECK-NEXT: sqdmulh v0.4h, v0.4h, v1.4h 1620; CHECK-NEXT: ret 1621 %prod = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) 1622 ret <4 x i16> %prod 1623} 1624 1625define <8 x i16> @test_sqdmulh_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { 1626; CHECK-LABEL: test_sqdmulh_v8i16: 1627; CHECK: // %bb.0: 1628; CHECK-NEXT: sqdmulh v0.8h, v0.8h, v1.8h 1629; CHECK-NEXT: ret 1630 %prod = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) 1631 ret <8 x i16> %prod 1632} 1633 1634define <2 x i32> @test_sqdmulh_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { 1635; CHECK-LABEL: test_sqdmulh_v2i32: 1636; CHECK: // %bb.0: 1637; CHECK-NEXT: sqdmulh v0.2s, v0.2s, v1.2s 1638; CHECK-NEXT: ret 1639 %prod = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) 1640 ret <2 x i32> %prod 1641} 1642 1643define <4 x i32> @test_sqdmulh_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { 1644; CHECK-LABEL: test_sqdmulh_v4i32: 1645; CHECK: // %bb.0: 1646; CHECK-NEXT: sqdmulh v0.4s, v0.4s, v1.4s 1647; CHECK-NEXT: ret 1648 %prod = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) 1649 ret <4 x i32> %prod 1650} 1651 1652declare <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16>, <4 x i16>) 1653declare <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16>, <8 x i16>) 1654declare <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32>, <2 x i32>) 1655declare <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32>, <4 x i32>) 1656 1657define <4 x i16> @test_sqrdmulh_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { 1658; CHECK-LABEL: test_sqrdmulh_v4i16: 1659; CHECK: // %bb.0: 1660; CHECK-NEXT: sqrdmulh v0.4h, v0.4h, v1.4h 1661; CHECK-NEXT: ret 1662 %prod = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) 1663 ret <4 x i16> %prod 1664} 1665 1666define <8 x i16> @test_sqrdmulh_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { 1667; CHECK-LABEL: test_sqrdmulh_v8i16: 1668; CHECK: // %bb.0: 1669; CHECK-NEXT: sqrdmulh v0.8h, v0.8h, v1.8h 1670; CHECK-NEXT: ret 1671 %prod = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) 1672 ret <8 x i16> %prod 1673} 1674 1675define <2 x i32> @test_sqrdmulh_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { 1676; CHECK-LABEL: test_sqrdmulh_v2i32: 1677; CHECK: // %bb.0: 1678; CHECK-NEXT: sqrdmulh v0.2s, v0.2s, v1.2s 1679; CHECK-NEXT: ret 1680 %prod = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) 1681 ret <2 x i32> %prod 1682} 1683 1684define <4 x i32> @test_sqrdmulh_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { 1685; CHECK-LABEL: test_sqrdmulh_v4i32: 1686; CHECK: // %bb.0: 1687; CHECK-NEXT: sqrdmulh v0.4s, v0.4s, v1.4s 1688; CHECK-NEXT: ret 1689 %prod = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) 1690 ret <4 x i32> %prod 1691} 1692 1693declare <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float>, <2 x float>) 1694declare <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float>, <4 x float>) 1695declare <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double>, <2 x double>) 1696 1697define <2 x float> @fmulx_v2f32(<2 x float> %lhs, <2 x float> %rhs) { 1698; CHECK-LABEL: fmulx_v2f32: 1699; CHECK: // %bb.0: 1700; CHECK-NEXT: fmulx v0.2s, v0.2s, v1.2s 1701; CHECK-NEXT: ret 1702; Using registers other than v0, v1 and v2 are possible, but would be odd. 1703 %val = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %lhs, <2 x float> %rhs) 1704 ret <2 x float> %val 1705} 1706 1707define <4 x float> @fmulx_v4f32(<4 x float> %lhs, <4 x float> %rhs) { 1708; CHECK-LABEL: fmulx_v4f32: 1709; CHECK: // %bb.0: 1710; CHECK-NEXT: fmulx v0.4s, v0.4s, v1.4s 1711; CHECK-NEXT: ret 1712; Using registers other than v0, v1 and v2 are possible, but would be odd. 1713 %val = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %lhs, <4 x float> %rhs) 1714 ret <4 x float> %val 1715} 1716 1717define <2 x double> @fmulx_v2f64(<2 x double> %lhs, <2 x double> %rhs) { 1718; CHECK-LABEL: fmulx_v2f64: 1719; CHECK: // %bb.0: 1720; CHECK-NEXT: fmulx v0.2d, v0.2d, v1.2d 1721; CHECK-NEXT: ret 1722; Using registers other than v0, v1 and v2 are possible, but would be odd. 1723 %val = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %lhs, <2 x double> %rhs) 1724 ret <2 x double> %val 1725} 1726