1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=aarch64-linux-gnu < %s | FileCheck %s 3 4; Check that expensive divides are expanded into a more performant sequence 5 6; 7; SDIV 8; 9 10define <vscale x 16 x i8> @sdiv_i8(<vscale x 16 x i8> %a) #0 { 11; CHECK-LABEL: sdiv_i8: 12; CHECK: // %bb.0: 13; CHECK-NEXT: mov z1.b, #86 // =0x56 14; CHECK-NEXT: ptrue p0.b 15; CHECK-NEXT: smulh z0.b, p0/m, z0.b, z1.b 16; CHECK-NEXT: lsr z1.b, z0.b, #7 17; CHECK-NEXT: add z0.b, z0.b, z1.b 18; CHECK-NEXT: ret 19 %div = sdiv <vscale x 16 x i8> %a, splat (i8 3) 20 ret <vscale x 16 x i8> %div 21} 22 23define <vscale x 8 x i16> @sdiv_i16(<vscale x 8 x i16> %a) #0 { 24; CHECK-LABEL: sdiv_i16: 25; CHECK: // %bb.0: 26; CHECK-NEXT: mov w8, #21846 // =0x5556 27; CHECK-NEXT: ptrue p0.h 28; CHECK-NEXT: mov z1.h, w8 29; CHECK-NEXT: smulh z0.h, p0/m, z0.h, z1.h 30; CHECK-NEXT: lsr z1.h, z0.h, #15 31; CHECK-NEXT: add z0.h, z0.h, z1.h 32; CHECK-NEXT: ret 33 %div = sdiv <vscale x 8 x i16> %a, splat (i16 3) 34 ret <vscale x 8 x i16> %div 35} 36 37define <vscale x 4 x i32> @sdiv_i32(<vscale x 4 x i32> %a) #0 { 38; CHECK-LABEL: sdiv_i32: 39; CHECK: // %bb.0: 40; CHECK-NEXT: mov w8, #21846 // =0x5556 41; CHECK-NEXT: ptrue p0.s 42; CHECK-NEXT: movk w8, #21845, lsl #16 43; CHECK-NEXT: mov z1.s, w8 44; CHECK-NEXT: smulh z0.s, p0/m, z0.s, z1.s 45; CHECK-NEXT: lsr z1.s, z0.s, #31 46; CHECK-NEXT: add z0.s, z0.s, z1.s 47; CHECK-NEXT: ret 48 %div = sdiv <vscale x 4 x i32> %a, splat (i32 3) 49 ret <vscale x 4 x i32> %div 50} 51 52define <vscale x 2 x i64> @sdiv_i64(<vscale x 2 x i64> %a) #0 { 53; CHECK-LABEL: sdiv_i64: 54; CHECK: // %bb.0: 55; CHECK-NEXT: mov x8, #6148914691236517205 // =0x5555555555555555 56; CHECK-NEXT: ptrue p0.d 57; CHECK-NEXT: movk x8, #21846 58; CHECK-NEXT: mov z1.d, x8 59; CHECK-NEXT: smulh z0.d, p0/m, z0.d, z1.d 60; CHECK-NEXT: lsr z1.d, z0.d, #63 61; CHECK-NEXT: add z0.d, z0.d, z1.d 62; CHECK-NEXT: ret 63 %div = sdiv <vscale x 2 x i64> %a, splat (i64 3) 64 ret <vscale x 2 x i64> %div 65} 66 67; 68; UDIV 69; 70 71define <vscale x 16 x i8> @udiv_i8(<vscale x 16 x i8> %a) #0 { 72; CHECK-LABEL: udiv_i8: 73; CHECK: // %bb.0: 74; CHECK-NEXT: mov z1.b, #-85 // =0xffffffffffffffab 75; CHECK-NEXT: ptrue p0.b 76; CHECK-NEXT: umulh z0.b, p0/m, z0.b, z1.b 77; CHECK-NEXT: lsr z0.b, z0.b, #1 78; CHECK-NEXT: ret 79 %div = udiv <vscale x 16 x i8> %a, splat (i8 3) 80 ret <vscale x 16 x i8> %div 81} 82 83define <vscale x 8 x i16> @udiv_i16(<vscale x 8 x i16> %a) #0 { 84; CHECK-LABEL: udiv_i16: 85; CHECK: // %bb.0: 86; CHECK-NEXT: mov w8, #-21845 // =0xffffaaab 87; CHECK-NEXT: ptrue p0.h 88; CHECK-NEXT: mov z1.h, w8 89; CHECK-NEXT: umulh z0.h, p0/m, z0.h, z1.h 90; CHECK-NEXT: lsr z0.h, z0.h, #1 91; CHECK-NEXT: ret 92 %div = udiv <vscale x 8 x i16> %a, splat (i16 3) 93 ret <vscale x 8 x i16> %div 94} 95 96define <vscale x 4 x i32> @udiv_i32(<vscale x 4 x i32> %a) #0 { 97; CHECK-LABEL: udiv_i32: 98; CHECK: // %bb.0: 99; CHECK-NEXT: mov w8, #43691 // =0xaaab 100; CHECK-NEXT: ptrue p0.s 101; CHECK-NEXT: movk w8, #43690, lsl #16 102; CHECK-NEXT: mov z1.s, w8 103; CHECK-NEXT: umulh z0.s, p0/m, z0.s, z1.s 104; CHECK-NEXT: lsr z0.s, z0.s, #1 105; CHECK-NEXT: ret 106 %div = udiv <vscale x 4 x i32> %a, splat (i32 3) 107 ret <vscale x 4 x i32> %div 108} 109 110define <vscale x 2 x i64> @udiv_i64(<vscale x 2 x i64> %a) #0 { 111; CHECK-LABEL: udiv_i64: 112; CHECK: // %bb.0: 113; CHECK-NEXT: mov x8, #-6148914691236517206 // =0xaaaaaaaaaaaaaaaa 114; CHECK-NEXT: ptrue p0.d 115; CHECK-NEXT: movk x8, #43691 116; CHECK-NEXT: mov z1.d, x8 117; CHECK-NEXT: umulh z0.d, p0/m, z0.d, z1.d 118; CHECK-NEXT: lsr z0.d, z0.d, #1 119; CHECK-NEXT: ret 120 %div = udiv <vscale x 2 x i64> %a, splat (i64 3) 121 ret <vscale x 2 x i64> %div 122} 123 124attributes #0 = { "target-features"="+sve" } 125