1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 2; RUN: llc -mtriple=aarch64 -mattr=+sve < %s -o - | FileCheck --check-prefixes=CHECK,SVE %s 3; RUN: llc -mtriple=aarch64 -mattr=+sve2 < %s -o - | FileCheck --check-prefixes=CHECK,SVE2 %s 4 5define <vscale x 2 x i64> @xar_nxv2i64_l(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y) { 6; SVE-LABEL: xar_nxv2i64_l: 7; SVE: // %bb.0: 8; SVE-NEXT: eor z0.d, z0.d, z1.d 9; SVE-NEXT: lsr z1.d, z0.d, #4 10; SVE-NEXT: lsl z0.d, z0.d, #60 11; SVE-NEXT: orr z0.d, z0.d, z1.d 12; SVE-NEXT: ret 13; 14; SVE2-LABEL: xar_nxv2i64_l: 15; SVE2: // %bb.0: 16; SVE2-NEXT: xar z0.d, z0.d, z1.d, #4 17; SVE2-NEXT: ret 18 %a = xor <vscale x 2 x i64> %x, %y 19 %b = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %a, <vscale x 2 x i64> splat (i64 60)) 20 ret <vscale x 2 x i64> %b 21} 22 23define <vscale x 2 x i64> @xar_nxv2i64_r(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y) { 24; SVE-LABEL: xar_nxv2i64_r: 25; SVE: // %bb.0: 26; SVE-NEXT: eor z0.d, z0.d, z1.d 27; SVE-NEXT: lsl z1.d, z0.d, #60 28; SVE-NEXT: lsr z0.d, z0.d, #4 29; SVE-NEXT: orr z0.d, z0.d, z1.d 30; SVE-NEXT: ret 31; 32; SVE2-LABEL: xar_nxv2i64_r: 33; SVE2: // %bb.0: 34; SVE2-NEXT: xar z0.d, z0.d, z1.d, #4 35; SVE2-NEXT: ret 36 %a = xor <vscale x 2 x i64> %x, %y 37 %b = call <vscale x 2 x i64> @llvm.fshr.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %a, <vscale x 2 x i64> splat (i64 4)) 38 ret <vscale x 2 x i64> %b 39} 40 41 42define <vscale x 4 x i32> @xar_nxv4i32_l(<vscale x 4 x i32> %x, <vscale x 4 x i32> %y) { 43; SVE-LABEL: xar_nxv4i32_l: 44; SVE: // %bb.0: 45; SVE-NEXT: eor z0.d, z0.d, z1.d 46; SVE-NEXT: lsr z1.s, z0.s, #4 47; SVE-NEXT: lsl z0.s, z0.s, #28 48; SVE-NEXT: orr z0.d, z0.d, z1.d 49; SVE-NEXT: ret 50; 51; SVE2-LABEL: xar_nxv4i32_l: 52; SVE2: // %bb.0: 53; SVE2-NEXT: xar z0.s, z0.s, z1.s, #4 54; SVE2-NEXT: ret 55 %a = xor <vscale x 4 x i32> %x, %y 56 %b = call <vscale x 4 x i32> @llvm.fshl.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %a, <vscale x 4 x i32> splat (i32 28)) 57 ret <vscale x 4 x i32> %b 58} 59 60define <vscale x 4 x i32> @xar_nxv4i32_r(<vscale x 4 x i32> %x, <vscale x 4 x i32> %y) { 61; SVE-LABEL: xar_nxv4i32_r: 62; SVE: // %bb.0: 63; SVE-NEXT: eor z0.d, z0.d, z1.d 64; SVE-NEXT: lsl z1.s, z0.s, #28 65; SVE-NEXT: lsr z0.s, z0.s, #4 66; SVE-NEXT: orr z0.d, z0.d, z1.d 67; SVE-NEXT: ret 68; 69; SVE2-LABEL: xar_nxv4i32_r: 70; SVE2: // %bb.0: 71; SVE2-NEXT: xar z0.s, z0.s, z1.s, #4 72; SVE2-NEXT: ret 73 %a = xor <vscale x 4 x i32> %x, %y 74 %b = call <vscale x 4 x i32> @llvm.fshr.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %a, <vscale x 4 x i32> splat (i32 4)) 75 ret <vscale x 4 x i32> %b 76} 77 78define <vscale x 8 x i16> @xar_nxv8i16_l(<vscale x 8 x i16> %x, <vscale x 8 x i16> %y) { 79; SVE-LABEL: xar_nxv8i16_l: 80; SVE: // %bb.0: 81; SVE-NEXT: eor z0.d, z0.d, z1.d 82; SVE-NEXT: lsr z1.h, z0.h, #4 83; SVE-NEXT: lsl z0.h, z0.h, #12 84; SVE-NEXT: orr z0.d, z0.d, z1.d 85; SVE-NEXT: ret 86; 87; SVE2-LABEL: xar_nxv8i16_l: 88; SVE2: // %bb.0: 89; SVE2-NEXT: xar z0.h, z0.h, z1.h, #4 90; SVE2-NEXT: ret 91 %a = xor <vscale x 8 x i16> %x, %y 92 %b = call <vscale x 8 x i16> @llvm.fshl.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %a, <vscale x 8 x i16> splat (i16 12)) 93 ret <vscale x 8 x i16> %b 94} 95 96define <vscale x 8 x i16> @xar_nxv8i16_r(<vscale x 8 x i16> %x, <vscale x 8 x i16> %y) { 97; SVE-LABEL: xar_nxv8i16_r: 98; SVE: // %bb.0: 99; SVE-NEXT: eor z0.d, z0.d, z1.d 100; SVE-NEXT: lsl z1.h, z0.h, #12 101; SVE-NEXT: lsr z0.h, z0.h, #4 102; SVE-NEXT: orr z0.d, z0.d, z1.d 103; SVE-NEXT: ret 104; 105; SVE2-LABEL: xar_nxv8i16_r: 106; SVE2: // %bb.0: 107; SVE2-NEXT: xar z0.h, z0.h, z1.h, #4 108; SVE2-NEXT: ret 109 %a = xor <vscale x 8 x i16> %x, %y 110 %b = call <vscale x 8 x i16> @llvm.fshr.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %a, <vscale x 8 x i16> splat (i16 4)) 111 ret <vscale x 8 x i16> %b 112} 113 114define <vscale x 16 x i8> @xar_nxv16i8_l(<vscale x 16 x i8> %x, <vscale x 16 x i8> %y) { 115; SVE-LABEL: xar_nxv16i8_l: 116; SVE: // %bb.0: 117; SVE-NEXT: eor z0.d, z0.d, z1.d 118; SVE-NEXT: lsr z1.b, z0.b, #4 119; SVE-NEXT: lsl z0.b, z0.b, #4 120; SVE-NEXT: orr z0.d, z0.d, z1.d 121; SVE-NEXT: ret 122; 123; SVE2-LABEL: xar_nxv16i8_l: 124; SVE2: // %bb.0: 125; SVE2-NEXT: xar z0.b, z0.b, z1.b, #4 126; SVE2-NEXT: ret 127 %a = xor <vscale x 16 x i8> %x, %y 128 %b = call <vscale x 16 x i8> @llvm.fshl.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %a, <vscale x 16 x i8> splat (i8 4)) 129 ret <vscale x 16 x i8> %b 130} 131 132define <vscale x 16 x i8> @xar_nxv16i8_r(<vscale x 16 x i8> %x, <vscale x 16 x i8> %y) { 133; SVE-LABEL: xar_nxv16i8_r: 134; SVE: // %bb.0: 135; SVE-NEXT: eor z0.d, z0.d, z1.d 136; SVE-NEXT: lsl z1.b, z0.b, #4 137; SVE-NEXT: lsr z0.b, z0.b, #4 138; SVE-NEXT: orr z0.d, z0.d, z1.d 139; SVE-NEXT: ret 140; 141; SVE2-LABEL: xar_nxv16i8_r: 142; SVE2: // %bb.0: 143; SVE2-NEXT: xar z0.b, z0.b, z1.b, #4 144; SVE2-NEXT: ret 145 %a = xor <vscale x 16 x i8> %x, %y 146 %b = call <vscale x 16 x i8> @llvm.fshr.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %a, <vscale x 16 x i8> splat (i8 4)) 147 ret <vscale x 16 x i8> %b 148} 149 150; Shift is not a constant. 151define <vscale x 2 x i64> @xar_nxv2i64_l_neg1(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y, <vscale x 2 x i64> %z) { 152; CHECK-LABEL: xar_nxv2i64_l_neg1: 153; CHECK: // %bb.0: 154; CHECK-NEXT: mov z3.d, z2.d 155; CHECK-NEXT: subr z2.d, z2.d, #0 // =0x0 156; CHECK-NEXT: eor z0.d, z0.d, z1.d 157; CHECK-NEXT: ptrue p0.d 158; CHECK-NEXT: and z3.d, z3.d, #0x3f 159; CHECK-NEXT: and z2.d, z2.d, #0x3f 160; CHECK-NEXT: movprfx z1, z0 161; CHECK-NEXT: lsl z1.d, p0/m, z1.d, z3.d 162; CHECK-NEXT: lsr z0.d, p0/m, z0.d, z2.d 163; CHECK-NEXT: orr z0.d, z1.d, z0.d 164; CHECK-NEXT: ret 165 %a = xor <vscale x 2 x i64> %x, %y 166 %b = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %a, <vscale x 2 x i64> %z) 167 ret <vscale x 2 x i64> %b 168} 169 170; OR instead of an XOR. 171; TODO: We could use usra instruction here for SVE2. 172define <vscale x 2 x i64> @xar_nxv2i64_l_neg2(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y) { 173; CHECK-LABEL: xar_nxv2i64_l_neg2: 174; CHECK: // %bb.0: 175; CHECK-NEXT: orr z0.d, z0.d, z1.d 176; CHECK-NEXT: lsr z1.d, z0.d, #4 177; CHECK-NEXT: lsl z0.d, z0.d, #60 178; CHECK-NEXT: orr z0.d, z0.d, z1.d 179; CHECK-NEXT: ret 180 %a = or <vscale x 2 x i64> %x, %y 181 %b = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %a, <vscale x 2 x i64> splat (i64 60)) 182 ret <vscale x 2 x i64> %b 183} 184 185; Rotate amount is 0. 186define <vscale x 2 x i64> @xar_nxv2i64_l_neg3(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y) { 187; CHECK-LABEL: xar_nxv2i64_l_neg3: 188; CHECK: // %bb.0: 189; CHECK-NEXT: eor z0.d, z0.d, z1.d 190; CHECK-NEXT: ret 191 %a = xor <vscale x 2 x i64> %x, %y 192 %b = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %a, <vscale x 2 x i64> splat (i64 64)) 193 ret <vscale x 2 x i64> %b 194} 195 196; Uses individual shifts instead of funnel shifts, just one test. 197define <vscale x 2 x i64> @xar_nxv2i64_shifts(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y) { 198; SVE-LABEL: xar_nxv2i64_shifts: 199; SVE: // %bb.0: 200; SVE-NEXT: eor z0.d, z0.d, z1.d 201; SVE-NEXT: lsr z1.d, z0.d, #4 202; SVE-NEXT: lsl z0.d, z0.d, #60 203; SVE-NEXT: orr z0.d, z0.d, z1.d 204; SVE-NEXT: ret 205; 206; SVE2-LABEL: xar_nxv2i64_shifts: 207; SVE2: // %bb.0: 208; SVE2-NEXT: xar z0.d, z0.d, z1.d, #4 209; SVE2-NEXT: ret 210 %xor = xor <vscale x 2 x i64> %x, %y 211 %shl = shl <vscale x 2 x i64> %xor, splat (i64 60) 212 %shr = lshr <vscale x 2 x i64> %xor, splat (i64 4) 213 %or = or <vscale x 2 x i64> %shl, %shr 214 ret <vscale x 2 x i64> %or 215} 216 217; Not a rotate operation as 60 + 3 != 64 218define <vscale x 2 x i64> @xar_nxv2i64_shifts_neg(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y) { 219; CHECK-LABEL: xar_nxv2i64_shifts_neg: 220; CHECK: // %bb.0: 221; CHECK-NEXT: eor z0.d, z0.d, z1.d 222; CHECK-NEXT: lsl z1.d, z0.d, #60 223; CHECK-NEXT: lsr z0.d, z0.d, #3 224; CHECK-NEXT: orr z0.d, z1.d, z0.d 225; CHECK-NEXT: ret 226 %xor = xor <vscale x 2 x i64> %x, %y 227 %shl = shl <vscale x 2 x i64> %xor, splat (i64 60) 228 %shr = lshr <vscale x 2 x i64> %xor, splat (i64 3) 229 %or = or <vscale x 2 x i64> %shl, %shr 230 ret <vscale x 2 x i64> %or 231} 232 233declare <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>) 234declare <vscale x 4 x i32> @llvm.fshl.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>) 235declare <vscale x 8 x i16> @llvm.fshl.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>) 236declare <vscale x 16 x i8> @llvm.fshl.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>) 237declare <vscale x 2 x i64> @llvm.fshr.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>) 238declare <vscale x 4 x i32> @llvm.fshr.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>) 239declare <vscale x 8 x i16> @llvm.fshr.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>) 240declare <vscale x 16 x i8> @llvm.fshr.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>) 241