1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256 3; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 4; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 5 6target triple = "aarch64-unknown-linux-gnu" 7 8; 9; AND 10; 11 12; Don't use SVE for 64-bit vectors. 13define <8 x i8> @and_v8i8(<8 x i8> %op1, <8 x i8> %op2) vscale_range(2,0) #0 { 14; CHECK-LABEL: and_v8i8: 15; CHECK: // %bb.0: 16; CHECK-NEXT: and v0.8b, v0.8b, v1.8b 17; CHECK-NEXT: ret 18 %res = and <8 x i8> %op1, %op2 19 ret <8 x i8> %res 20} 21 22; Don't use SVE for 128-bit vectors. 23define <16 x i8> @and_v16i8(<16 x i8> %op1, <16 x i8> %op2) vscale_range(2,0) #0 { 24; CHECK-LABEL: and_v16i8: 25; CHECK: // %bb.0: 26; CHECK-NEXT: and v0.16b, v0.16b, v1.16b 27; CHECK-NEXT: ret 28 %res = and <16 x i8> %op1, %op2 29 ret <16 x i8> %res 30} 31 32define void @and_v32i8(ptr %a, ptr %b) vscale_range(2,0) #0 { 33; CHECK-LABEL: and_v32i8: 34; CHECK: // %bb.0: 35; CHECK-NEXT: ptrue p0.b, vl32 36; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] 37; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1] 38; CHECK-NEXT: and z0.d, z0.d, z1.d 39; CHECK-NEXT: st1b { z0.b }, p0, [x0] 40; CHECK-NEXT: ret 41 %op1 = load <32 x i8>, ptr %a 42 %op2 = load <32 x i8>, ptr %b 43 %res = and <32 x i8> %op1, %op2 44 store <32 x i8> %res, ptr %a 45 ret void 46} 47 48define void @and_v64i8(ptr %a, ptr %b) #0 { 49; VBITS_GE_256-LABEL: and_v64i8: 50; VBITS_GE_256: // %bb.0: 51; VBITS_GE_256-NEXT: ptrue p0.b, vl32 52; VBITS_GE_256-NEXT: mov w8, #32 // =0x20 53; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] 54; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x1, x8] 55; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x0] 56; VBITS_GE_256-NEXT: ld1b { z3.b }, p0/z, [x1] 57; VBITS_GE_256-NEXT: and z0.d, z0.d, z1.d 58; VBITS_GE_256-NEXT: and z1.d, z2.d, z3.d 59; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x0, x8] 60; VBITS_GE_256-NEXT: st1b { z1.b }, p0, [x0] 61; VBITS_GE_256-NEXT: ret 62; 63; VBITS_GE_512-LABEL: and_v64i8: 64; VBITS_GE_512: // %bb.0: 65; VBITS_GE_512-NEXT: ptrue p0.b, vl64 66; VBITS_GE_512-NEXT: ld1b { z0.b }, p0/z, [x0] 67; VBITS_GE_512-NEXT: ld1b { z1.b }, p0/z, [x1] 68; VBITS_GE_512-NEXT: and z0.d, z0.d, z1.d 69; VBITS_GE_512-NEXT: st1b { z0.b }, p0, [x0] 70; VBITS_GE_512-NEXT: ret 71 %op1 = load <64 x i8>, ptr %a 72 %op2 = load <64 x i8>, ptr %b 73 %res = and <64 x i8> %op1, %op2 74 store <64 x i8> %res, ptr %a 75 ret void 76} 77 78define void @and_v128i8(ptr %a, ptr %b) vscale_range(8,0) #0 { 79; CHECK-LABEL: and_v128i8: 80; CHECK: // %bb.0: 81; CHECK-NEXT: ptrue p0.b, vl128 82; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] 83; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1] 84; CHECK-NEXT: and z0.d, z0.d, z1.d 85; CHECK-NEXT: st1b { z0.b }, p0, [x0] 86; CHECK-NEXT: ret 87 %op1 = load <128 x i8>, ptr %a 88 %op2 = load <128 x i8>, ptr %b 89 %res = and <128 x i8> %op1, %op2 90 store <128 x i8> %res, ptr %a 91 ret void 92} 93 94define void @and_v256i8(ptr %a, ptr %b) vscale_range(16,0) #0 { 95; CHECK-LABEL: and_v256i8: 96; CHECK: // %bb.0: 97; CHECK-NEXT: ptrue p0.b, vl256 98; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] 99; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1] 100; CHECK-NEXT: and z0.d, z0.d, z1.d 101; CHECK-NEXT: st1b { z0.b }, p0, [x0] 102; CHECK-NEXT: ret 103 %op1 = load <256 x i8>, ptr %a 104 %op2 = load <256 x i8>, ptr %b 105 %res = and <256 x i8> %op1, %op2 106 store <256 x i8> %res, ptr %a 107 ret void 108} 109 110; Don't use SVE for 64-bit vectors. 111define <4 x i16> @and_v4i16(<4 x i16> %op1, <4 x i16> %op2) vscale_range(2,0) #0 { 112; CHECK-LABEL: and_v4i16: 113; CHECK: // %bb.0: 114; CHECK-NEXT: and v0.8b, v0.8b, v1.8b 115; CHECK-NEXT: ret 116 %res = and <4 x i16> %op1, %op2 117 ret <4 x i16> %res 118} 119 120; Don't use SVE for 128-bit vectors. 121define <8 x i16> @and_v8i16(<8 x i16> %op1, <8 x i16> %op2) vscale_range(2,0) #0 { 122; CHECK-LABEL: and_v8i16: 123; CHECK: // %bb.0: 124; CHECK-NEXT: and v0.16b, v0.16b, v1.16b 125; CHECK-NEXT: ret 126 %res = and <8 x i16> %op1, %op2 127 ret <8 x i16> %res 128} 129 130define void @and_v16i16(ptr %a, ptr %b) vscale_range(2,0) #0 { 131; CHECK-LABEL: and_v16i16: 132; CHECK: // %bb.0: 133; CHECK-NEXT: ptrue p0.h, vl16 134; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 135; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] 136; CHECK-NEXT: and z0.d, z0.d, z1.d 137; CHECK-NEXT: st1h { z0.h }, p0, [x0] 138; CHECK-NEXT: ret 139 %op1 = load <16 x i16>, ptr %a 140 %op2 = load <16 x i16>, ptr %b 141 %res = and <16 x i16> %op1, %op2 142 store <16 x i16> %res, ptr %a 143 ret void 144} 145 146define void @and_v32i16(ptr %a, ptr %b) #0 { 147; VBITS_GE_256-LABEL: and_v32i16: 148; VBITS_GE_256: // %bb.0: 149; VBITS_GE_256-NEXT: ptrue p0.h, vl16 150; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 151; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] 152; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x1, x8, lsl #1] 153; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x0] 154; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x1] 155; VBITS_GE_256-NEXT: and z0.d, z0.d, z1.d 156; VBITS_GE_256-NEXT: and z1.d, z2.d, z3.d 157; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1] 158; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0] 159; VBITS_GE_256-NEXT: ret 160; 161; VBITS_GE_512-LABEL: and_v32i16: 162; VBITS_GE_512: // %bb.0: 163; VBITS_GE_512-NEXT: ptrue p0.h, vl32 164; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0] 165; VBITS_GE_512-NEXT: ld1h { z1.h }, p0/z, [x1] 166; VBITS_GE_512-NEXT: and z0.d, z0.d, z1.d 167; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0] 168; VBITS_GE_512-NEXT: ret 169 %op1 = load <32 x i16>, ptr %a 170 %op2 = load <32 x i16>, ptr %b 171 %res = and <32 x i16> %op1, %op2 172 store <32 x i16> %res, ptr %a 173 ret void 174} 175 176define void @and_v64i16(ptr %a, ptr %b) vscale_range(8,0) #0 { 177; CHECK-LABEL: and_v64i16: 178; CHECK: // %bb.0: 179; CHECK-NEXT: ptrue p0.h, vl64 180; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 181; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] 182; CHECK-NEXT: and z0.d, z0.d, z1.d 183; CHECK-NEXT: st1h { z0.h }, p0, [x0] 184; CHECK-NEXT: ret 185 %op1 = load <64 x i16>, ptr %a 186 %op2 = load <64 x i16>, ptr %b 187 %res = and <64 x i16> %op1, %op2 188 store <64 x i16> %res, ptr %a 189 ret void 190} 191 192define void @and_v128i16(ptr %a, ptr %b) vscale_range(16,0) #0 { 193; CHECK-LABEL: and_v128i16: 194; CHECK: // %bb.0: 195; CHECK-NEXT: ptrue p0.h, vl128 196; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 197; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] 198; CHECK-NEXT: and z0.d, z0.d, z1.d 199; CHECK-NEXT: st1h { z0.h }, p0, [x0] 200; CHECK-NEXT: ret 201 %op1 = load <128 x i16>, ptr %a 202 %op2 = load <128 x i16>, ptr %b 203 %res = and <128 x i16> %op1, %op2 204 store <128 x i16> %res, ptr %a 205 ret void 206} 207 208; Don't use SVE for 64-bit vectors. 209define <2 x i32> @and_v2i32(<2 x i32> %op1, <2 x i32> %op2) vscale_range(2,0) #0 { 210; CHECK-LABEL: and_v2i32: 211; CHECK: // %bb.0: 212; CHECK-NEXT: and v0.8b, v0.8b, v1.8b 213; CHECK-NEXT: ret 214 %res = and <2 x i32> %op1, %op2 215 ret <2 x i32> %res 216} 217 218; Don't use SVE for 128-bit vectors. 219define <4 x i32> @and_v4i32(<4 x i32> %op1, <4 x i32> %op2) vscale_range(2,0) #0 { 220; CHECK-LABEL: and_v4i32: 221; CHECK: // %bb.0: 222; CHECK-NEXT: and v0.16b, v0.16b, v1.16b 223; CHECK-NEXT: ret 224 %res = and <4 x i32> %op1, %op2 225 ret <4 x i32> %res 226} 227 228define void @and_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 { 229; CHECK-LABEL: and_v8i32: 230; CHECK: // %bb.0: 231; CHECK-NEXT: ptrue p0.s, vl8 232; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 233; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] 234; CHECK-NEXT: and z0.d, z0.d, z1.d 235; CHECK-NEXT: st1w { z0.s }, p0, [x0] 236; CHECK-NEXT: ret 237 %op1 = load <8 x i32>, ptr %a 238 %op2 = load <8 x i32>, ptr %b 239 %res = and <8 x i32> %op1, %op2 240 store <8 x i32> %res, ptr %a 241 ret void 242} 243 244define void @and_v16i32(ptr %a, ptr %b) #0 { 245; VBITS_GE_256-LABEL: and_v16i32: 246; VBITS_GE_256: // %bb.0: 247; VBITS_GE_256-NEXT: ptrue p0.s, vl8 248; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 249; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] 250; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x1, x8, lsl #2] 251; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x0] 252; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x1] 253; VBITS_GE_256-NEXT: and z0.d, z0.d, z1.d 254; VBITS_GE_256-NEXT: and z1.d, z2.d, z3.d 255; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] 256; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0] 257; VBITS_GE_256-NEXT: ret 258; 259; VBITS_GE_512-LABEL: and_v16i32: 260; VBITS_GE_512: // %bb.0: 261; VBITS_GE_512-NEXT: ptrue p0.s, vl16 262; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0] 263; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x1] 264; VBITS_GE_512-NEXT: and z0.d, z0.d, z1.d 265; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0] 266; VBITS_GE_512-NEXT: ret 267 %op1 = load <16 x i32>, ptr %a 268 %op2 = load <16 x i32>, ptr %b 269 %res = and <16 x i32> %op1, %op2 270 store <16 x i32> %res, ptr %a 271 ret void 272} 273 274define void @and_v32i32(ptr %a, ptr %b) vscale_range(8,0) #0 { 275; CHECK-LABEL: and_v32i32: 276; CHECK: // %bb.0: 277; CHECK-NEXT: ptrue p0.s, vl32 278; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 279; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] 280; CHECK-NEXT: and z0.d, z0.d, z1.d 281; CHECK-NEXT: st1w { z0.s }, p0, [x0] 282; CHECK-NEXT: ret 283 %op1 = load <32 x i32>, ptr %a 284 %op2 = load <32 x i32>, ptr %b 285 %res = and <32 x i32> %op1, %op2 286 store <32 x i32> %res, ptr %a 287 ret void 288} 289 290define void @and_v64i32(ptr %a, ptr %b) vscale_range(16,0) #0 { 291; CHECK-LABEL: and_v64i32: 292; CHECK: // %bb.0: 293; CHECK-NEXT: ptrue p0.s, vl64 294; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 295; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] 296; CHECK-NEXT: and z0.d, z0.d, z1.d 297; CHECK-NEXT: st1w { z0.s }, p0, [x0] 298; CHECK-NEXT: ret 299 %op1 = load <64 x i32>, ptr %a 300 %op2 = load <64 x i32>, ptr %b 301 %res = and <64 x i32> %op1, %op2 302 store <64 x i32> %res, ptr %a 303 ret void 304} 305 306; Don't use SVE for 64-bit vectors. 307define <1 x i64> @and_v1i64(<1 x i64> %op1, <1 x i64> %op2) vscale_range(2,0) #0 { 308; CHECK-LABEL: and_v1i64: 309; CHECK: // %bb.0: 310; CHECK-NEXT: and v0.8b, v0.8b, v1.8b 311; CHECK-NEXT: ret 312 %res = and <1 x i64> %op1, %op2 313 ret <1 x i64> %res 314} 315 316; Don't use SVE for 128-bit vectors. 317define <2 x i64> @and_v2i64(<2 x i64> %op1, <2 x i64> %op2) vscale_range(2,0) #0 { 318; CHECK-LABEL: and_v2i64: 319; CHECK: // %bb.0: 320; CHECK-NEXT: and v0.16b, v0.16b, v1.16b 321; CHECK-NEXT: ret 322 %res = and <2 x i64> %op1, %op2 323 ret <2 x i64> %res 324} 325 326define void @and_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 { 327; CHECK-LABEL: and_v4i64: 328; CHECK: // %bb.0: 329; CHECK-NEXT: ptrue p0.d, vl4 330; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 331; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] 332; CHECK-NEXT: and z0.d, z0.d, z1.d 333; CHECK-NEXT: st1d { z0.d }, p0, [x0] 334; CHECK-NEXT: ret 335 %op1 = load <4 x i64>, ptr %a 336 %op2 = load <4 x i64>, ptr %b 337 %res = and <4 x i64> %op1, %op2 338 store <4 x i64> %res, ptr %a 339 ret void 340} 341 342define void @and_v8i64(ptr %a, ptr %b) #0 { 343; VBITS_GE_256-LABEL: and_v8i64: 344; VBITS_GE_256: // %bb.0: 345; VBITS_GE_256-NEXT: ptrue p0.d, vl4 346; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 347; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] 348; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x1, x8, lsl #3] 349; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x0] 350; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1] 351; VBITS_GE_256-NEXT: and z0.d, z0.d, z1.d 352; VBITS_GE_256-NEXT: and z1.d, z2.d, z3.d 353; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3] 354; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0] 355; VBITS_GE_256-NEXT: ret 356; 357; VBITS_GE_512-LABEL: and_v8i64: 358; VBITS_GE_512: // %bb.0: 359; VBITS_GE_512-NEXT: ptrue p0.d, vl8 360; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0] 361; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x1] 362; VBITS_GE_512-NEXT: and z0.d, z0.d, z1.d 363; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0] 364; VBITS_GE_512-NEXT: ret 365 %op1 = load <8 x i64>, ptr %a 366 %op2 = load <8 x i64>, ptr %b 367 %res = and <8 x i64> %op1, %op2 368 store <8 x i64> %res, ptr %a 369 ret void 370} 371 372define void @and_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 { 373; CHECK-LABEL: and_v16i64: 374; CHECK: // %bb.0: 375; CHECK-NEXT: ptrue p0.d, vl16 376; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 377; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] 378; CHECK-NEXT: and z0.d, z0.d, z1.d 379; CHECK-NEXT: st1d { z0.d }, p0, [x0] 380; CHECK-NEXT: ret 381 %op1 = load <16 x i64>, ptr %a 382 %op2 = load <16 x i64>, ptr %b 383 %res = and <16 x i64> %op1, %op2 384 store <16 x i64> %res, ptr %a 385 ret void 386} 387 388define void @and_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 { 389; CHECK-LABEL: and_v32i64: 390; CHECK: // %bb.0: 391; CHECK-NEXT: ptrue p0.d, vl32 392; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 393; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] 394; CHECK-NEXT: and z0.d, z0.d, z1.d 395; CHECK-NEXT: st1d { z0.d }, p0, [x0] 396; CHECK-NEXT: ret 397 %op1 = load <32 x i64>, ptr %a 398 %op2 = load <32 x i64>, ptr %b 399 %res = and <32 x i64> %op1, %op2 400 store <32 x i64> %res, ptr %a 401 ret void 402} 403 404; 405; OR 406; 407 408; Don't use SVE for 64-bit vectors. 409define <8 x i8> @or_v8i8(<8 x i8> %op1, <8 x i8> %op2) vscale_range(2,0) #0 { 410; CHECK-LABEL: or_v8i8: 411; CHECK: // %bb.0: 412; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b 413; CHECK-NEXT: ret 414 %res = or <8 x i8> %op1, %op2 415 ret <8 x i8> %res 416} 417 418; Don't use SVE for 128-bit vectors. 419define <16 x i8> @or_v16i8(<16 x i8> %op1, <16 x i8> %op2) vscale_range(2,0) #0 { 420; CHECK-LABEL: or_v16i8: 421; CHECK: // %bb.0: 422; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b 423; CHECK-NEXT: ret 424 %res = or <16 x i8> %op1, %op2 425 ret <16 x i8> %res 426} 427 428define void @or_v32i8(ptr %a, ptr %b) vscale_range(2,0) #0 { 429; CHECK-LABEL: or_v32i8: 430; CHECK: // %bb.0: 431; CHECK-NEXT: ptrue p0.b, vl32 432; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] 433; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1] 434; CHECK-NEXT: orr z0.d, z0.d, z1.d 435; CHECK-NEXT: st1b { z0.b }, p0, [x0] 436; CHECK-NEXT: ret 437 %op1 = load <32 x i8>, ptr %a 438 %op2 = load <32 x i8>, ptr %b 439 %res = or <32 x i8> %op1, %op2 440 store <32 x i8> %res, ptr %a 441 ret void 442} 443 444define void @or_v64i8(ptr %a, ptr %b) #0 { 445; VBITS_GE_256-LABEL: or_v64i8: 446; VBITS_GE_256: // %bb.0: 447; VBITS_GE_256-NEXT: ptrue p0.b, vl32 448; VBITS_GE_256-NEXT: mov w8, #32 // =0x20 449; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] 450; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x1, x8] 451; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x0] 452; VBITS_GE_256-NEXT: ld1b { z3.b }, p0/z, [x1] 453; VBITS_GE_256-NEXT: orr z0.d, z0.d, z1.d 454; VBITS_GE_256-NEXT: orr z1.d, z2.d, z3.d 455; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x0, x8] 456; VBITS_GE_256-NEXT: st1b { z1.b }, p0, [x0] 457; VBITS_GE_256-NEXT: ret 458; 459; VBITS_GE_512-LABEL: or_v64i8: 460; VBITS_GE_512: // %bb.0: 461; VBITS_GE_512-NEXT: ptrue p0.b, vl64 462; VBITS_GE_512-NEXT: ld1b { z0.b }, p0/z, [x0] 463; VBITS_GE_512-NEXT: ld1b { z1.b }, p0/z, [x1] 464; VBITS_GE_512-NEXT: orr z0.d, z0.d, z1.d 465; VBITS_GE_512-NEXT: st1b { z0.b }, p0, [x0] 466; VBITS_GE_512-NEXT: ret 467 %op1 = load <64 x i8>, ptr %a 468 %op2 = load <64 x i8>, ptr %b 469 %res = or <64 x i8> %op1, %op2 470 store <64 x i8> %res, ptr %a 471 ret void 472} 473 474define void @or_v128i8(ptr %a, ptr %b) vscale_range(8,0) #0 { 475; CHECK-LABEL: or_v128i8: 476; CHECK: // %bb.0: 477; CHECK-NEXT: ptrue p0.b, vl128 478; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] 479; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1] 480; CHECK-NEXT: orr z0.d, z0.d, z1.d 481; CHECK-NEXT: st1b { z0.b }, p0, [x0] 482; CHECK-NEXT: ret 483 %op1 = load <128 x i8>, ptr %a 484 %op2 = load <128 x i8>, ptr %b 485 %res = or <128 x i8> %op1, %op2 486 store <128 x i8> %res, ptr %a 487 ret void 488} 489 490define void @or_v256i8(ptr %a, ptr %b) vscale_range(16,0) #0 { 491; CHECK-LABEL: or_v256i8: 492; CHECK: // %bb.0: 493; CHECK-NEXT: ptrue p0.b, vl256 494; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] 495; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1] 496; CHECK-NEXT: orr z0.d, z0.d, z1.d 497; CHECK-NEXT: st1b { z0.b }, p0, [x0] 498; CHECK-NEXT: ret 499 %op1 = load <256 x i8>, ptr %a 500 %op2 = load <256 x i8>, ptr %b 501 %res = or <256 x i8> %op1, %op2 502 store <256 x i8> %res, ptr %a 503 ret void 504} 505 506; Don't use SVE for 64-bit vectors. 507define <4 x i16> @or_v4i16(<4 x i16> %op1, <4 x i16> %op2) vscale_range(2,0) #0 { 508; CHECK-LABEL: or_v4i16: 509; CHECK: // %bb.0: 510; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b 511; CHECK-NEXT: ret 512 %res = or <4 x i16> %op1, %op2 513 ret <4 x i16> %res 514} 515 516; Don't use SVE for 128-bit vectors. 517define <8 x i16> @or_v8i16(<8 x i16> %op1, <8 x i16> %op2) vscale_range(2,0) #0 { 518; CHECK-LABEL: or_v8i16: 519; CHECK: // %bb.0: 520; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b 521; CHECK-NEXT: ret 522 %res = or <8 x i16> %op1, %op2 523 ret <8 x i16> %res 524} 525 526define void @or_v16i16(ptr %a, ptr %b) vscale_range(2,0) #0 { 527; CHECK-LABEL: or_v16i16: 528; CHECK: // %bb.0: 529; CHECK-NEXT: ptrue p0.h, vl16 530; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 531; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] 532; CHECK-NEXT: orr z0.d, z0.d, z1.d 533; CHECK-NEXT: st1h { z0.h }, p0, [x0] 534; CHECK-NEXT: ret 535 %op1 = load <16 x i16>, ptr %a 536 %op2 = load <16 x i16>, ptr %b 537 %res = or <16 x i16> %op1, %op2 538 store <16 x i16> %res, ptr %a 539 ret void 540} 541 542define void @or_v32i16(ptr %a, ptr %b) #0 { 543; VBITS_GE_256-LABEL: or_v32i16: 544; VBITS_GE_256: // %bb.0: 545; VBITS_GE_256-NEXT: ptrue p0.h, vl16 546; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 547; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] 548; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x1, x8, lsl #1] 549; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x0] 550; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x1] 551; VBITS_GE_256-NEXT: orr z0.d, z0.d, z1.d 552; VBITS_GE_256-NEXT: orr z1.d, z2.d, z3.d 553; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1] 554; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0] 555; VBITS_GE_256-NEXT: ret 556; 557; VBITS_GE_512-LABEL: or_v32i16: 558; VBITS_GE_512: // %bb.0: 559; VBITS_GE_512-NEXT: ptrue p0.h, vl32 560; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0] 561; VBITS_GE_512-NEXT: ld1h { z1.h }, p0/z, [x1] 562; VBITS_GE_512-NEXT: orr z0.d, z0.d, z1.d 563; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0] 564; VBITS_GE_512-NEXT: ret 565 %op1 = load <32 x i16>, ptr %a 566 %op2 = load <32 x i16>, ptr %b 567 %res = or <32 x i16> %op1, %op2 568 store <32 x i16> %res, ptr %a 569 ret void 570} 571 572define void @or_v64i16(ptr %a, ptr %b) vscale_range(8,0) #0 { 573; CHECK-LABEL: or_v64i16: 574; CHECK: // %bb.0: 575; CHECK-NEXT: ptrue p0.h, vl64 576; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 577; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] 578; CHECK-NEXT: orr z0.d, z0.d, z1.d 579; CHECK-NEXT: st1h { z0.h }, p0, [x0] 580; CHECK-NEXT: ret 581 %op1 = load <64 x i16>, ptr %a 582 %op2 = load <64 x i16>, ptr %b 583 %res = or <64 x i16> %op1, %op2 584 store <64 x i16> %res, ptr %a 585 ret void 586} 587 588define void @or_v128i16(ptr %a, ptr %b) vscale_range(16,0) #0 { 589; CHECK-LABEL: or_v128i16: 590; CHECK: // %bb.0: 591; CHECK-NEXT: ptrue p0.h, vl128 592; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 593; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] 594; CHECK-NEXT: orr z0.d, z0.d, z1.d 595; CHECK-NEXT: st1h { z0.h }, p0, [x0] 596; CHECK-NEXT: ret 597 %op1 = load <128 x i16>, ptr %a 598 %op2 = load <128 x i16>, ptr %b 599 %res = or <128 x i16> %op1, %op2 600 store <128 x i16> %res, ptr %a 601 ret void 602} 603 604; Don't use SVE for 64-bit vectors. 605define <2 x i32> @or_v2i32(<2 x i32> %op1, <2 x i32> %op2) vscale_range(2,0) #0 { 606; CHECK-LABEL: or_v2i32: 607; CHECK: // %bb.0: 608; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b 609; CHECK-NEXT: ret 610 %res = or <2 x i32> %op1, %op2 611 ret <2 x i32> %res 612} 613 614; Don't use SVE for 128-bit vectors. 615define <4 x i32> @or_v4i32(<4 x i32> %op1, <4 x i32> %op2) vscale_range(2,0) #0 { 616; CHECK-LABEL: or_v4i32: 617; CHECK: // %bb.0: 618; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b 619; CHECK-NEXT: ret 620 %res = or <4 x i32> %op1, %op2 621 ret <4 x i32> %res 622} 623 624define void @or_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 { 625; CHECK-LABEL: or_v8i32: 626; CHECK: // %bb.0: 627; CHECK-NEXT: ptrue p0.s, vl8 628; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 629; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] 630; CHECK-NEXT: orr z0.d, z0.d, z1.d 631; CHECK-NEXT: st1w { z0.s }, p0, [x0] 632; CHECK-NEXT: ret 633 %op1 = load <8 x i32>, ptr %a 634 %op2 = load <8 x i32>, ptr %b 635 %res = or <8 x i32> %op1, %op2 636 store <8 x i32> %res, ptr %a 637 ret void 638} 639 640define void @or_v16i32(ptr %a, ptr %b) #0 { 641; VBITS_GE_256-LABEL: or_v16i32: 642; VBITS_GE_256: // %bb.0: 643; VBITS_GE_256-NEXT: ptrue p0.s, vl8 644; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 645; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] 646; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x1, x8, lsl #2] 647; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x0] 648; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x1] 649; VBITS_GE_256-NEXT: orr z0.d, z0.d, z1.d 650; VBITS_GE_256-NEXT: orr z1.d, z2.d, z3.d 651; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] 652; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0] 653; VBITS_GE_256-NEXT: ret 654; 655; VBITS_GE_512-LABEL: or_v16i32: 656; VBITS_GE_512: // %bb.0: 657; VBITS_GE_512-NEXT: ptrue p0.s, vl16 658; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0] 659; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x1] 660; VBITS_GE_512-NEXT: orr z0.d, z0.d, z1.d 661; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0] 662; VBITS_GE_512-NEXT: ret 663 %op1 = load <16 x i32>, ptr %a 664 %op2 = load <16 x i32>, ptr %b 665 %res = or <16 x i32> %op1, %op2 666 store <16 x i32> %res, ptr %a 667 ret void 668} 669 670define void @or_v32i32(ptr %a, ptr %b) vscale_range(8,0) #0 { 671; CHECK-LABEL: or_v32i32: 672; CHECK: // %bb.0: 673; CHECK-NEXT: ptrue p0.s, vl32 674; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 675; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] 676; CHECK-NEXT: orr z0.d, z0.d, z1.d 677; CHECK-NEXT: st1w { z0.s }, p0, [x0] 678; CHECK-NEXT: ret 679 %op1 = load <32 x i32>, ptr %a 680 %op2 = load <32 x i32>, ptr %b 681 %res = or <32 x i32> %op1, %op2 682 store <32 x i32> %res, ptr %a 683 ret void 684} 685 686define void @or_v64i32(ptr %a, ptr %b) vscale_range(16,0) #0 { 687; CHECK-LABEL: or_v64i32: 688; CHECK: // %bb.0: 689; CHECK-NEXT: ptrue p0.s, vl64 690; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 691; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] 692; CHECK-NEXT: orr z0.d, z0.d, z1.d 693; CHECK-NEXT: st1w { z0.s }, p0, [x0] 694; CHECK-NEXT: ret 695 %op1 = load <64 x i32>, ptr %a 696 %op2 = load <64 x i32>, ptr %b 697 %res = or <64 x i32> %op1, %op2 698 store <64 x i32> %res, ptr %a 699 ret void 700} 701 702; Don't use SVE for 64-bit vectors. 703define <1 x i64> @or_v1i64(<1 x i64> %op1, <1 x i64> %op2) vscale_range(2,0) #0 { 704; CHECK-LABEL: or_v1i64: 705; CHECK: // %bb.0: 706; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b 707; CHECK-NEXT: ret 708 %res = or <1 x i64> %op1, %op2 709 ret <1 x i64> %res 710} 711 712; Don't use SVE for 128-bit vectors. 713define <2 x i64> @or_v2i64(<2 x i64> %op1, <2 x i64> %op2) vscale_range(2,0) #0 { 714; CHECK-LABEL: or_v2i64: 715; CHECK: // %bb.0: 716; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b 717; CHECK-NEXT: ret 718 %res = or <2 x i64> %op1, %op2 719 ret <2 x i64> %res 720} 721 722define void @or_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 { 723; CHECK-LABEL: or_v4i64: 724; CHECK: // %bb.0: 725; CHECK-NEXT: ptrue p0.d, vl4 726; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 727; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] 728; CHECK-NEXT: orr z0.d, z0.d, z1.d 729; CHECK-NEXT: st1d { z0.d }, p0, [x0] 730; CHECK-NEXT: ret 731 %op1 = load <4 x i64>, ptr %a 732 %op2 = load <4 x i64>, ptr %b 733 %res = or <4 x i64> %op1, %op2 734 store <4 x i64> %res, ptr %a 735 ret void 736} 737 738define void @or_v8i64(ptr %a, ptr %b) #0 { 739; VBITS_GE_256-LABEL: or_v8i64: 740; VBITS_GE_256: // %bb.0: 741; VBITS_GE_256-NEXT: ptrue p0.d, vl4 742; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 743; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] 744; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x1, x8, lsl #3] 745; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x0] 746; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1] 747; VBITS_GE_256-NEXT: orr z0.d, z0.d, z1.d 748; VBITS_GE_256-NEXT: orr z1.d, z2.d, z3.d 749; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3] 750; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0] 751; VBITS_GE_256-NEXT: ret 752; 753; VBITS_GE_512-LABEL: or_v8i64: 754; VBITS_GE_512: // %bb.0: 755; VBITS_GE_512-NEXT: ptrue p0.d, vl8 756; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0] 757; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x1] 758; VBITS_GE_512-NEXT: orr z0.d, z0.d, z1.d 759; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0] 760; VBITS_GE_512-NEXT: ret 761 %op1 = load <8 x i64>, ptr %a 762 %op2 = load <8 x i64>, ptr %b 763 %res = or <8 x i64> %op1, %op2 764 store <8 x i64> %res, ptr %a 765 ret void 766} 767 768define void @or_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 { 769; CHECK-LABEL: or_v16i64: 770; CHECK: // %bb.0: 771; CHECK-NEXT: ptrue p0.d, vl16 772; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 773; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] 774; CHECK-NEXT: orr z0.d, z0.d, z1.d 775; CHECK-NEXT: st1d { z0.d }, p0, [x0] 776; CHECK-NEXT: ret 777 %op1 = load <16 x i64>, ptr %a 778 %op2 = load <16 x i64>, ptr %b 779 %res = or <16 x i64> %op1, %op2 780 store <16 x i64> %res, ptr %a 781 ret void 782} 783 784define void @or_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 { 785; CHECK-LABEL: or_v32i64: 786; CHECK: // %bb.0: 787; CHECK-NEXT: ptrue p0.d, vl32 788; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 789; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] 790; CHECK-NEXT: orr z0.d, z0.d, z1.d 791; CHECK-NEXT: st1d { z0.d }, p0, [x0] 792; CHECK-NEXT: ret 793 %op1 = load <32 x i64>, ptr %a 794 %op2 = load <32 x i64>, ptr %b 795 %res = or <32 x i64> %op1, %op2 796 store <32 x i64> %res, ptr %a 797 ret void 798} 799 800; 801; XOR 802; 803 804; Don't use SVE for 64-bit vectors. 805define <8 x i8> @xor_v8i8(<8 x i8> %op1, <8 x i8> %op2) vscale_range(2,0) #0 { 806; CHECK-LABEL: xor_v8i8: 807; CHECK: // %bb.0: 808; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b 809; CHECK-NEXT: ret 810 %res = xor <8 x i8> %op1, %op2 811 ret <8 x i8> %res 812} 813 814; Don't use SVE for 128-bit vectors. 815define <16 x i8> @xor_v16i8(<16 x i8> %op1, <16 x i8> %op2) vscale_range(2,0) #0 { 816; CHECK-LABEL: xor_v16i8: 817; CHECK: // %bb.0: 818; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b 819; CHECK-NEXT: ret 820 %res = xor <16 x i8> %op1, %op2 821 ret <16 x i8> %res 822} 823 824define void @xor_v32i8(ptr %a, ptr %b) vscale_range(2,0) #0 { 825; CHECK-LABEL: xor_v32i8: 826; CHECK: // %bb.0: 827; CHECK-NEXT: ptrue p0.b, vl32 828; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] 829; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1] 830; CHECK-NEXT: eor z0.d, z0.d, z1.d 831; CHECK-NEXT: st1b { z0.b }, p0, [x0] 832; CHECK-NEXT: ret 833 %op1 = load <32 x i8>, ptr %a 834 %op2 = load <32 x i8>, ptr %b 835 %res = xor <32 x i8> %op1, %op2 836 store <32 x i8> %res, ptr %a 837 ret void 838} 839 840define void @xor_v64i8(ptr %a, ptr %b) #0 { 841; VBITS_GE_256-LABEL: xor_v64i8: 842; VBITS_GE_256: // %bb.0: 843; VBITS_GE_256-NEXT: ptrue p0.b, vl32 844; VBITS_GE_256-NEXT: mov w8, #32 // =0x20 845; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] 846; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x1, x8] 847; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x0] 848; VBITS_GE_256-NEXT: ld1b { z3.b }, p0/z, [x1] 849; VBITS_GE_256-NEXT: eor z0.d, z0.d, z1.d 850; VBITS_GE_256-NEXT: eor z1.d, z2.d, z3.d 851; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x0, x8] 852; VBITS_GE_256-NEXT: st1b { z1.b }, p0, [x0] 853; VBITS_GE_256-NEXT: ret 854; 855; VBITS_GE_512-LABEL: xor_v64i8: 856; VBITS_GE_512: // %bb.0: 857; VBITS_GE_512-NEXT: ptrue p0.b, vl64 858; VBITS_GE_512-NEXT: ld1b { z0.b }, p0/z, [x0] 859; VBITS_GE_512-NEXT: ld1b { z1.b }, p0/z, [x1] 860; VBITS_GE_512-NEXT: eor z0.d, z0.d, z1.d 861; VBITS_GE_512-NEXT: st1b { z0.b }, p0, [x0] 862; VBITS_GE_512-NEXT: ret 863 %op1 = load <64 x i8>, ptr %a 864 %op2 = load <64 x i8>, ptr %b 865 %res = xor <64 x i8> %op1, %op2 866 store <64 x i8> %res, ptr %a 867 ret void 868} 869 870define void @xor_v128i8(ptr %a, ptr %b) vscale_range(8,0) #0 { 871; CHECK-LABEL: xor_v128i8: 872; CHECK: // %bb.0: 873; CHECK-NEXT: ptrue p0.b, vl128 874; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] 875; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1] 876; CHECK-NEXT: eor z0.d, z0.d, z1.d 877; CHECK-NEXT: st1b { z0.b }, p0, [x0] 878; CHECK-NEXT: ret 879 %op1 = load <128 x i8>, ptr %a 880 %op2 = load <128 x i8>, ptr %b 881 %res = xor <128 x i8> %op1, %op2 882 store <128 x i8> %res, ptr %a 883 ret void 884} 885 886define void @xor_v256i8(ptr %a, ptr %b) vscale_range(16,0) #0 { 887; CHECK-LABEL: xor_v256i8: 888; CHECK: // %bb.0: 889; CHECK-NEXT: ptrue p0.b, vl256 890; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] 891; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1] 892; CHECK-NEXT: eor z0.d, z0.d, z1.d 893; CHECK-NEXT: st1b { z0.b }, p0, [x0] 894; CHECK-NEXT: ret 895 %op1 = load <256 x i8>, ptr %a 896 %op2 = load <256 x i8>, ptr %b 897 %res = xor <256 x i8> %op1, %op2 898 store <256 x i8> %res, ptr %a 899 ret void 900} 901 902; Don't use SVE for 64-bit vectors. 903define <4 x i16> @xor_v4i16(<4 x i16> %op1, <4 x i16> %op2) vscale_range(2,0) #0 { 904; CHECK-LABEL: xor_v4i16: 905; CHECK: // %bb.0: 906; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b 907; CHECK-NEXT: ret 908 %res = xor <4 x i16> %op1, %op2 909 ret <4 x i16> %res 910} 911 912; Don't use SVE for 128-bit vectors. 913define <8 x i16> @xor_v8i16(<8 x i16> %op1, <8 x i16> %op2) vscale_range(2,0) #0 { 914; CHECK-LABEL: xor_v8i16: 915; CHECK: // %bb.0: 916; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b 917; CHECK-NEXT: ret 918 %res = xor <8 x i16> %op1, %op2 919 ret <8 x i16> %res 920} 921 922define void @xor_v16i16(ptr %a, ptr %b) vscale_range(2,0) #0 { 923; CHECK-LABEL: xor_v16i16: 924; CHECK: // %bb.0: 925; CHECK-NEXT: ptrue p0.h, vl16 926; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 927; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] 928; CHECK-NEXT: eor z0.d, z0.d, z1.d 929; CHECK-NEXT: st1h { z0.h }, p0, [x0] 930; CHECK-NEXT: ret 931 %op1 = load <16 x i16>, ptr %a 932 %op2 = load <16 x i16>, ptr %b 933 %res = xor <16 x i16> %op1, %op2 934 store <16 x i16> %res, ptr %a 935 ret void 936} 937 938define void @xor_v32i16(ptr %a, ptr %b) #0 { 939; VBITS_GE_256-LABEL: xor_v32i16: 940; VBITS_GE_256: // %bb.0: 941; VBITS_GE_256-NEXT: ptrue p0.h, vl16 942; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 943; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] 944; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x1, x8, lsl #1] 945; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x0] 946; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x1] 947; VBITS_GE_256-NEXT: eor z0.d, z0.d, z1.d 948; VBITS_GE_256-NEXT: eor z1.d, z2.d, z3.d 949; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1] 950; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0] 951; VBITS_GE_256-NEXT: ret 952; 953; VBITS_GE_512-LABEL: xor_v32i16: 954; VBITS_GE_512: // %bb.0: 955; VBITS_GE_512-NEXT: ptrue p0.h, vl32 956; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0] 957; VBITS_GE_512-NEXT: ld1h { z1.h }, p0/z, [x1] 958; VBITS_GE_512-NEXT: eor z0.d, z0.d, z1.d 959; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0] 960; VBITS_GE_512-NEXT: ret 961 %op1 = load <32 x i16>, ptr %a 962 %op2 = load <32 x i16>, ptr %b 963 %res = xor <32 x i16> %op1, %op2 964 store <32 x i16> %res, ptr %a 965 ret void 966} 967 968define void @xor_v64i16(ptr %a, ptr %b) vscale_range(8,0) #0 { 969; CHECK-LABEL: xor_v64i16: 970; CHECK: // %bb.0: 971; CHECK-NEXT: ptrue p0.h, vl64 972; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 973; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] 974; CHECK-NEXT: eor z0.d, z0.d, z1.d 975; CHECK-NEXT: st1h { z0.h }, p0, [x0] 976; CHECK-NEXT: ret 977 %op1 = load <64 x i16>, ptr %a 978 %op2 = load <64 x i16>, ptr %b 979 %res = xor <64 x i16> %op1, %op2 980 store <64 x i16> %res, ptr %a 981 ret void 982} 983 984define void @xor_v128i16(ptr %a, ptr %b) vscale_range(16,0) #0 { 985; CHECK-LABEL: xor_v128i16: 986; CHECK: // %bb.0: 987; CHECK-NEXT: ptrue p0.h, vl128 988; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 989; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] 990; CHECK-NEXT: eor z0.d, z0.d, z1.d 991; CHECK-NEXT: st1h { z0.h }, p0, [x0] 992; CHECK-NEXT: ret 993 %op1 = load <128 x i16>, ptr %a 994 %op2 = load <128 x i16>, ptr %b 995 %res = xor <128 x i16> %op1, %op2 996 store <128 x i16> %res, ptr %a 997 ret void 998} 999 1000; Don't use SVE for 64-bit vectors. 1001define <2 x i32> @xor_v2i32(<2 x i32> %op1, <2 x i32> %op2) vscale_range(2,0) #0 { 1002; CHECK-LABEL: xor_v2i32: 1003; CHECK: // %bb.0: 1004; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b 1005; CHECK-NEXT: ret 1006 %res = xor <2 x i32> %op1, %op2 1007 ret <2 x i32> %res 1008} 1009 1010; Don't use SVE for 128-bit vectors. 1011define <4 x i32> @xor_v4i32(<4 x i32> %op1, <4 x i32> %op2) vscale_range(2,0) #0 { 1012; CHECK-LABEL: xor_v4i32: 1013; CHECK: // %bb.0: 1014; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b 1015; CHECK-NEXT: ret 1016 %res = xor <4 x i32> %op1, %op2 1017 ret <4 x i32> %res 1018} 1019 1020define void @xor_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 { 1021; CHECK-LABEL: xor_v8i32: 1022; CHECK: // %bb.0: 1023; CHECK-NEXT: ptrue p0.s, vl8 1024; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 1025; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] 1026; CHECK-NEXT: eor z0.d, z0.d, z1.d 1027; CHECK-NEXT: st1w { z0.s }, p0, [x0] 1028; CHECK-NEXT: ret 1029 %op1 = load <8 x i32>, ptr %a 1030 %op2 = load <8 x i32>, ptr %b 1031 %res = xor <8 x i32> %op1, %op2 1032 store <8 x i32> %res, ptr %a 1033 ret void 1034} 1035 1036define void @xor_v16i32(ptr %a, ptr %b) #0 { 1037; VBITS_GE_256-LABEL: xor_v16i32: 1038; VBITS_GE_256: // %bb.0: 1039; VBITS_GE_256-NEXT: ptrue p0.s, vl8 1040; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 1041; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] 1042; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x1, x8, lsl #2] 1043; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x0] 1044; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x1] 1045; VBITS_GE_256-NEXT: eor z0.d, z0.d, z1.d 1046; VBITS_GE_256-NEXT: eor z1.d, z2.d, z3.d 1047; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] 1048; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0] 1049; VBITS_GE_256-NEXT: ret 1050; 1051; VBITS_GE_512-LABEL: xor_v16i32: 1052; VBITS_GE_512: // %bb.0: 1053; VBITS_GE_512-NEXT: ptrue p0.s, vl16 1054; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0] 1055; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x1] 1056; VBITS_GE_512-NEXT: eor z0.d, z0.d, z1.d 1057; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0] 1058; VBITS_GE_512-NEXT: ret 1059 %op1 = load <16 x i32>, ptr %a 1060 %op2 = load <16 x i32>, ptr %b 1061 %res = xor <16 x i32> %op1, %op2 1062 store <16 x i32> %res, ptr %a 1063 ret void 1064} 1065 1066define void @xor_v32i32(ptr %a, ptr %b) vscale_range(8,0) #0 { 1067; CHECK-LABEL: xor_v32i32: 1068; CHECK: // %bb.0: 1069; CHECK-NEXT: ptrue p0.s, vl32 1070; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 1071; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] 1072; CHECK-NEXT: eor z0.d, z0.d, z1.d 1073; CHECK-NEXT: st1w { z0.s }, p0, [x0] 1074; CHECK-NEXT: ret 1075 %op1 = load <32 x i32>, ptr %a 1076 %op2 = load <32 x i32>, ptr %b 1077 %res = xor <32 x i32> %op1, %op2 1078 store <32 x i32> %res, ptr %a 1079 ret void 1080} 1081 1082define void @xor_v64i32(ptr %a, ptr %b) vscale_range(16,0) #0 { 1083; CHECK-LABEL: xor_v64i32: 1084; CHECK: // %bb.0: 1085; CHECK-NEXT: ptrue p0.s, vl64 1086; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 1087; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] 1088; CHECK-NEXT: eor z0.d, z0.d, z1.d 1089; CHECK-NEXT: st1w { z0.s }, p0, [x0] 1090; CHECK-NEXT: ret 1091 %op1 = load <64 x i32>, ptr %a 1092 %op2 = load <64 x i32>, ptr %b 1093 %res = xor <64 x i32> %op1, %op2 1094 store <64 x i32> %res, ptr %a 1095 ret void 1096} 1097 1098; Don't use SVE for 64-bit vectors. 1099define <1 x i64> @xor_v1i64(<1 x i64> %op1, <1 x i64> %op2) vscale_range(2,0) #0 { 1100; CHECK-LABEL: xor_v1i64: 1101; CHECK: // %bb.0: 1102; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b 1103; CHECK-NEXT: ret 1104 %res = xor <1 x i64> %op1, %op2 1105 ret <1 x i64> %res 1106} 1107 1108; Don't use SVE for 128-bit vectors. 1109define <2 x i64> @xor_v2i64(<2 x i64> %op1, <2 x i64> %op2) vscale_range(2,0) #0 { 1110; CHECK-LABEL: xor_v2i64: 1111; CHECK: // %bb.0: 1112; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b 1113; CHECK-NEXT: ret 1114 %res = xor <2 x i64> %op1, %op2 1115 ret <2 x i64> %res 1116} 1117 1118define void @xor_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 { 1119; CHECK-LABEL: xor_v4i64: 1120; CHECK: // %bb.0: 1121; CHECK-NEXT: ptrue p0.d, vl4 1122; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 1123; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] 1124; CHECK-NEXT: eor z0.d, z0.d, z1.d 1125; CHECK-NEXT: st1d { z0.d }, p0, [x0] 1126; CHECK-NEXT: ret 1127 %op1 = load <4 x i64>, ptr %a 1128 %op2 = load <4 x i64>, ptr %b 1129 %res = xor <4 x i64> %op1, %op2 1130 store <4 x i64> %res, ptr %a 1131 ret void 1132} 1133 1134define void @xor_v8i64(ptr %a, ptr %b) #0 { 1135; VBITS_GE_256-LABEL: xor_v8i64: 1136; VBITS_GE_256: // %bb.0: 1137; VBITS_GE_256-NEXT: ptrue p0.d, vl4 1138; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 1139; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] 1140; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x1, x8, lsl #3] 1141; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x0] 1142; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1] 1143; VBITS_GE_256-NEXT: eor z0.d, z0.d, z1.d 1144; VBITS_GE_256-NEXT: eor z1.d, z2.d, z3.d 1145; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3] 1146; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0] 1147; VBITS_GE_256-NEXT: ret 1148; 1149; VBITS_GE_512-LABEL: xor_v8i64: 1150; VBITS_GE_512: // %bb.0: 1151; VBITS_GE_512-NEXT: ptrue p0.d, vl8 1152; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0] 1153; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x1] 1154; VBITS_GE_512-NEXT: eor z0.d, z0.d, z1.d 1155; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0] 1156; VBITS_GE_512-NEXT: ret 1157 %op1 = load <8 x i64>, ptr %a 1158 %op2 = load <8 x i64>, ptr %b 1159 %res = xor <8 x i64> %op1, %op2 1160 store <8 x i64> %res, ptr %a 1161 ret void 1162} 1163 1164define void @xor_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 { 1165; CHECK-LABEL: xor_v16i64: 1166; CHECK: // %bb.0: 1167; CHECK-NEXT: ptrue p0.d, vl16 1168; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 1169; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] 1170; CHECK-NEXT: eor z0.d, z0.d, z1.d 1171; CHECK-NEXT: st1d { z0.d }, p0, [x0] 1172; CHECK-NEXT: ret 1173 %op1 = load <16 x i64>, ptr %a 1174 %op2 = load <16 x i64>, ptr %b 1175 %res = xor <16 x i64> %op1, %op2 1176 store <16 x i64> %res, ptr %a 1177 ret void 1178} 1179 1180define void @xor_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 { 1181; CHECK-LABEL: xor_v32i64: 1182; CHECK: // %bb.0: 1183; CHECK-NEXT: ptrue p0.d, vl32 1184; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 1185; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] 1186; CHECK-NEXT: eor z0.d, z0.d, z1.d 1187; CHECK-NEXT: st1d { z0.d }, p0, [x0] 1188; CHECK-NEXT: ret 1189 %op1 = load <32 x i64>, ptr %a 1190 %op2 = load <32 x i64>, ptr %b 1191 %res = xor <32 x i64> %op1, %op2 1192 store <32 x i64> %res, ptr %a 1193 ret void 1194} 1195 1196attributes #0 = { "target-features"="+sve" } 1197