1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s 3 4; PR48683 'Quadratic Reciprocity' - and(mul(x,x),2) -> 0 5 6define i1 @PR48683(i32 %x) { 7; CHECK-LABEL: PR48683: 8; CHECK: // %bb.0: 9; CHECK-NEXT: mov w0, wzr 10; CHECK-NEXT: ret 11 %a = mul i32 %x, %x 12 %b = and i32 %a, 2 13 %c = icmp ne i32 %b, 0 14 ret i1 %c 15} 16 17define <4 x i1> @PR48683_vec(<4 x i32> %x) { 18; CHECK-LABEL: PR48683_vec: 19; CHECK: // %bb.0: 20; CHECK-NEXT: movi v0.2d, #0000000000000000 21; CHECK-NEXT: ret 22 %a = mul <4 x i32> %x, %x 23 %b = and <4 x i32> %a, <i32 2, i32 2, i32 2, i32 2> 24 %c = icmp ne <4 x i32> %b, zeroinitializer 25 ret <4 x i1> %c 26} 27 28define <4 x i1> @PR48683_vec_undef(<4 x i32> %x) { 29; CHECK-LABEL: PR48683_vec_undef: 30; CHECK: // %bb.0: 31; CHECK-NEXT: movi v1.4s, #2 32; CHECK-NEXT: mul v0.4s, v0.4s, v0.4s 33; CHECK-NEXT: cmtst v0.4s, v0.4s, v1.4s 34; CHECK-NEXT: xtn v0.4h, v0.4s 35; CHECK-NEXT: ret 36 %a = mul <4 x i32> %x, %x 37 %b = and <4 x i32> %a, <i32 2, i32 2, i32 2, i32 undef> 38 %c = icmp ne <4 x i32> %b, zeroinitializer 39 ret <4 x i1> %c 40} 41 42; mul(x,x) - bit[1] is 0, but if demanding the other bits the source must not be undef 43 44define i64 @combine_mul_self_demandedbits(i64 %x) { 45; CHECK-LABEL: combine_mul_self_demandedbits: 46; CHECK: // %bb.0: 47; CHECK-NEXT: mul x0, x0, x0 48; CHECK-NEXT: ret 49 %1 = mul i64 %x, %x 50 %2 = and i64 %1, -3 51 ret i64 %2 52} 53 54define <4 x i32> @combine_mul_self_demandedbits_vector(<4 x i32> %x) { 55; CHECK-LABEL: combine_mul_self_demandedbits_vector: 56; CHECK: // %bb.0: 57; CHECK-NEXT: mul v0.4s, v0.4s, v0.4s 58; CHECK-NEXT: ret 59 %1 = freeze <4 x i32> %x 60 %2 = mul <4 x i32> %1, %1 61 %3 = and <4 x i32> %2, <i32 -3, i32 -3, i32 -3, i32 -3> 62 ret <4 x i32> %3 63} 64 65define i8 @one_demanded_bit(i8 %x) { 66; CHECK-LABEL: one_demanded_bit: 67; CHECK: // %bb.0: 68; CHECK-NEXT: lsl w8, w0, #6 69; CHECK-NEXT: orr w0, w8, #0xffffffbf 70; CHECK-NEXT: ret 71 %m = mul i8 %x, 192 ; 0b1100_0000 72 %r = or i8 %m, 191 ; 0b1011_1111 73 ret i8 %r 74} 75 76define <2 x i64> @one_demanded_bit_splat(<2 x i64> %x) { 77; CHECK-LABEL: one_demanded_bit_splat: 78; CHECK: // %bb.0: 79; CHECK-NEXT: mov w8, #32 // =0x20 80; CHECK-NEXT: shl v0.2d, v0.2d, #5 81; CHECK-NEXT: dup v1.2d, x8 82; CHECK-NEXT: and v0.16b, v0.16b, v1.16b 83; CHECK-NEXT: ret 84 %m = mul <2 x i64> %x, <i64 160, i64 160> ; 0b1010_0000 85 %r = and <2 x i64> %m, <i64 32, i64 32> ; 0b0010_0000 86 ret <2 x i64> %r 87} 88 89define i32 @one_demanded_low_bit(i32 %x) { 90; CHECK-LABEL: one_demanded_low_bit: 91; CHECK: // %bb.0: 92; CHECK-NEXT: and w0, w0, #0x1 93; CHECK-NEXT: ret 94 %m = mul i32 %x, -63 ; any odd number will do 95 %r = and i32 %m, 1 96 ret i32 %r 97} 98 99define i16 @squared_one_demanded_low_bit(i16 %x) { 100; CHECK-LABEL: squared_one_demanded_low_bit: 101; CHECK: // %bb.0: 102; CHECK-NEXT: and w0, w0, #0x1 103; CHECK-NEXT: ret 104 %mul = mul i16 %x, %x 105 %and = and i16 %mul, 1 106 ret i16 %and 107} 108 109define <4 x i32> @squared_one_demanded_low_bit_splat(<4 x i32> %x) { 110; CHECK-LABEL: squared_one_demanded_low_bit_splat: 111; CHECK: // %bb.0: 112; CHECK-NEXT: mvni v1.4s, #1 113; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b 114; CHECK-NEXT: ret 115 %mul = mul <4 x i32> %x, %x 116 %and = or <4 x i32> %mul, <i32 -2, i32 -2, i32 -2, i32 -2> 117 ret <4 x i32> %and 118} 119 120define i32 @squared_demanded_2_low_bits(i32 %x) { 121; CHECK-LABEL: squared_demanded_2_low_bits: 122; CHECK: // %bb.0: 123; CHECK-NEXT: and w0, w0, #0x1 124; CHECK-NEXT: ret 125 %mul = mul i32 %x, %x 126 %and = and i32 %mul, 3 127 ret i32 %and 128} 129 130define <2 x i64> @squared_demanded_2_low_bits_splat(<2 x i64> %x) { 131; CHECK-LABEL: squared_demanded_2_low_bits_splat: 132; CHECK: // %bb.0: 133; CHECK-NEXT: mov x8, #-2 // =0xfffffffffffffffe 134; CHECK-NEXT: dup v1.2d, x8 135; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b 136; CHECK-NEXT: ret 137 %mul = mul <2 x i64> %x, %x 138 %and = or <2 x i64> %mul, <i64 -2, i64 -2> 139 ret <2 x i64> %and 140} 141