1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256 3; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 4; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 5 6target triple = "aarch64-unknown-linux-gnu" 7 8; 9; ICMP EQ 10; 11 12; Don't use SVE for 64-bit vectors. 13define <8 x i8> @icmp_eq_v8i8(<8 x i8> %op1, <8 x i8> %op2) vscale_range(2,0) #0 { 14; CHECK-LABEL: icmp_eq_v8i8: 15; CHECK: // %bb.0: 16; CHECK-NEXT: cmeq v0.8b, v0.8b, v1.8b 17; CHECK-NEXT: ret 18 %cmp = icmp eq <8 x i8> %op1, %op2 19 %sext = sext <8 x i1> %cmp to <8 x i8> 20 ret <8 x i8> %sext 21} 22 23; Don't use SVE for 128-bit vectors. 24define <16 x i8> @icmp_eq_v16i8(<16 x i8> %op1, <16 x i8> %op2) vscale_range(2,0) #0 { 25; CHECK-LABEL: icmp_eq_v16i8: 26; CHECK: // %bb.0: 27; CHECK-NEXT: cmeq v0.16b, v0.16b, v1.16b 28; CHECK-NEXT: ret 29 %cmp = icmp eq <16 x i8> %op1, %op2 30 %sext = sext <16 x i1> %cmp to <16 x i8> 31 ret <16 x i8> %sext 32} 33 34define void @icmp_eq_v32i8(ptr %a, ptr %b) vscale_range(2,0) #0 { 35; CHECK-LABEL: icmp_eq_v32i8: 36; CHECK: // %bb.0: 37; CHECK-NEXT: ptrue p0.b, vl32 38; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] 39; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1] 40; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, z1.b 41; CHECK-NEXT: mov z0.b, p1/z, #-1 // =0xffffffffffffffff 42; CHECK-NEXT: st1b { z0.b }, p0, [x0] 43; CHECK-NEXT: ret 44 %op1 = load <32 x i8>, ptr %a 45 %op2 = load <32 x i8>, ptr %b 46 %cmp = icmp eq <32 x i8> %op1, %op2 47 %sext = sext <32 x i1> %cmp to <32 x i8> 48 store <32 x i8> %sext, ptr %a 49 ret void 50} 51 52define void @icmp_eq_v64i8(ptr %a, ptr %b) #0 { 53; VBITS_GE_256-LABEL: icmp_eq_v64i8: 54; VBITS_GE_256: // %bb.0: 55; VBITS_GE_256-NEXT: ptrue p0.b, vl32 56; VBITS_GE_256-NEXT: mov w8, #32 // =0x20 57; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] 58; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x1, x8] 59; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x0] 60; VBITS_GE_256-NEXT: ld1b { z3.b }, p0/z, [x1] 61; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z0.b, z1.b 62; VBITS_GE_256-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b 63; VBITS_GE_256-NEXT: mov z0.b, p1/z, #-1 // =0xffffffffffffffff 64; VBITS_GE_256-NEXT: mov z1.b, p2/z, #-1 // =0xffffffffffffffff 65; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x0, x8] 66; VBITS_GE_256-NEXT: st1b { z1.b }, p0, [x0] 67; VBITS_GE_256-NEXT: ret 68; 69; VBITS_GE_512-LABEL: icmp_eq_v64i8: 70; VBITS_GE_512: // %bb.0: 71; VBITS_GE_512-NEXT: ptrue p0.b, vl64 72; VBITS_GE_512-NEXT: ld1b { z0.b }, p0/z, [x0] 73; VBITS_GE_512-NEXT: ld1b { z1.b }, p0/z, [x1] 74; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z0.b, z1.b 75; VBITS_GE_512-NEXT: mov z0.b, p1/z, #-1 // =0xffffffffffffffff 76; VBITS_GE_512-NEXT: st1b { z0.b }, p0, [x0] 77; VBITS_GE_512-NEXT: ret 78 %op1 = load <64 x i8>, ptr %a 79 %op2 = load <64 x i8>, ptr %b 80 %cmp = icmp eq <64 x i8> %op1, %op2 81 %sext = sext <64 x i1> %cmp to <64 x i8> 82 store <64 x i8> %sext, ptr %a 83 ret void 84} 85 86define void @icmp_eq_v128i8(ptr %a, ptr %b) vscale_range(8,0) #0 { 87; CHECK-LABEL: icmp_eq_v128i8: 88; CHECK: // %bb.0: 89; CHECK-NEXT: ptrue p0.b, vl128 90; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] 91; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1] 92; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, z1.b 93; CHECK-NEXT: mov z0.b, p1/z, #-1 // =0xffffffffffffffff 94; CHECK-NEXT: st1b { z0.b }, p0, [x0] 95; CHECK-NEXT: ret 96 %op1 = load <128 x i8>, ptr %a 97 %op2 = load <128 x i8>, ptr %b 98 %cmp = icmp eq <128 x i8> %op1, %op2 99 %sext = sext <128 x i1> %cmp to <128 x i8> 100 store <128 x i8> %sext, ptr %a 101 ret void 102} 103 104define void @icmp_eq_v256i8(ptr %a, ptr %b) vscale_range(16,0) #0 { 105; CHECK-LABEL: icmp_eq_v256i8: 106; CHECK: // %bb.0: 107; CHECK-NEXT: ptrue p0.b, vl256 108; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] 109; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1] 110; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, z1.b 111; CHECK-NEXT: mov z0.b, p1/z, #-1 // =0xffffffffffffffff 112; CHECK-NEXT: st1b { z0.b }, p0, [x0] 113; CHECK-NEXT: ret 114 %op1 = load <256 x i8>, ptr %a 115 %op2 = load <256 x i8>, ptr %b 116 %cmp = icmp eq <256 x i8> %op1, %op2 117 %sext = sext <256 x i1> %cmp to <256 x i8> 118 store <256 x i8> %sext, ptr %a 119 ret void 120} 121 122; Don't use SVE for 64-bit vectors. 123define <4 x i16> @icmp_eq_v4i16(<4 x i16> %op1, <4 x i16> %op2) vscale_range(2,0) #0 { 124; CHECK-LABEL: icmp_eq_v4i16: 125; CHECK: // %bb.0: 126; CHECK-NEXT: cmeq v0.4h, v0.4h, v1.4h 127; CHECK-NEXT: ret 128 %cmp = icmp eq <4 x i16> %op1, %op2 129 %sext = sext <4 x i1> %cmp to <4 x i16> 130 ret <4 x i16> %sext 131} 132 133; Don't use SVE for 128-bit vectors. 134define <8 x i16> @icmp_eq_v8i16(<8 x i16> %op1, <8 x i16> %op2) vscale_range(2,0) #0 { 135; CHECK-LABEL: icmp_eq_v8i16: 136; CHECK: // %bb.0: 137; CHECK-NEXT: cmeq v0.8h, v0.8h, v1.8h 138; CHECK-NEXT: ret 139 %cmp = icmp eq <8 x i16> %op1, %op2 140 %sext = sext <8 x i1> %cmp to <8 x i16> 141 ret <8 x i16> %sext 142} 143 144define void @icmp_eq_v16i16(ptr %a, ptr %b) vscale_range(2,0) #0 { 145; CHECK-LABEL: icmp_eq_v16i16: 146; CHECK: // %bb.0: 147; CHECK-NEXT: ptrue p0.h, vl16 148; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 149; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] 150; CHECK-NEXT: cmpeq p1.h, p0/z, z0.h, z1.h 151; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff 152; CHECK-NEXT: st1h { z0.h }, p0, [x0] 153; CHECK-NEXT: ret 154 %op1 = load <16 x i16>, ptr %a 155 %op2 = load <16 x i16>, ptr %b 156 %cmp = icmp eq <16 x i16> %op1, %op2 157 %sext = sext <16 x i1> %cmp to <16 x i16> 158 store <16 x i16> %sext, ptr %a 159 ret void 160} 161 162define void @icmp_eq_v32i16(ptr %a, ptr %b) #0 { 163; VBITS_GE_256-LABEL: icmp_eq_v32i16: 164; VBITS_GE_256: // %bb.0: 165; VBITS_GE_256-NEXT: ptrue p0.h, vl16 166; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 167; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] 168; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x1, x8, lsl #1] 169; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x0] 170; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x1] 171; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z0.h, z1.h 172; VBITS_GE_256-NEXT: cmpeq p2.h, p0/z, z2.h, z3.h 173; VBITS_GE_256-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff 174; VBITS_GE_256-NEXT: mov z1.h, p2/z, #-1 // =0xffffffffffffffff 175; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1] 176; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0] 177; VBITS_GE_256-NEXT: ret 178; 179; VBITS_GE_512-LABEL: icmp_eq_v32i16: 180; VBITS_GE_512: // %bb.0: 181; VBITS_GE_512-NEXT: ptrue p0.h, vl32 182; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0] 183; VBITS_GE_512-NEXT: ld1h { z1.h }, p0/z, [x1] 184; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z0.h, z1.h 185; VBITS_GE_512-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff 186; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0] 187; VBITS_GE_512-NEXT: ret 188 %op1 = load <32 x i16>, ptr %a 189 %op2 = load <32 x i16>, ptr %b 190 %cmp = icmp eq <32 x i16> %op1, %op2 191 %sext = sext <32 x i1> %cmp to <32 x i16> 192 store <32 x i16> %sext, ptr %a 193 ret void 194} 195 196define void @icmp_eq_v64i16(ptr %a, ptr %b) vscale_range(8,0) #0 { 197; CHECK-LABEL: icmp_eq_v64i16: 198; CHECK: // %bb.0: 199; CHECK-NEXT: ptrue p0.h, vl64 200; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 201; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] 202; CHECK-NEXT: cmpeq p1.h, p0/z, z0.h, z1.h 203; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff 204; CHECK-NEXT: st1h { z0.h }, p0, [x0] 205; CHECK-NEXT: ret 206 %op1 = load <64 x i16>, ptr %a 207 %op2 = load <64 x i16>, ptr %b 208 %cmp = icmp eq <64 x i16> %op1, %op2 209 %sext = sext <64 x i1> %cmp to <64 x i16> 210 store <64 x i16> %sext, ptr %a 211 ret void 212} 213 214define void @icmp_eq_v128i16(ptr %a, ptr %b) vscale_range(16,0) #0 { 215; CHECK-LABEL: icmp_eq_v128i16: 216; CHECK: // %bb.0: 217; CHECK-NEXT: ptrue p0.h, vl128 218; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 219; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] 220; CHECK-NEXT: cmpeq p1.h, p0/z, z0.h, z1.h 221; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff 222; CHECK-NEXT: st1h { z0.h }, p0, [x0] 223; CHECK-NEXT: ret 224 %op1 = load <128 x i16>, ptr %a 225 %op2 = load <128 x i16>, ptr %b 226 %cmp = icmp eq <128 x i16> %op1, %op2 227 %sext = sext <128 x i1> %cmp to <128 x i16> 228 store <128 x i16> %sext, ptr %a 229 ret void 230} 231 232; Don't use SVE for 64-bit vectors. 233define <2 x i32> @icmp_eq_v2i32(<2 x i32> %op1, <2 x i32> %op2) vscale_range(2,0) #0 { 234; CHECK-LABEL: icmp_eq_v2i32: 235; CHECK: // %bb.0: 236; CHECK-NEXT: cmeq v0.2s, v0.2s, v1.2s 237; CHECK-NEXT: ret 238 %cmp = icmp eq <2 x i32> %op1, %op2 239 %sext = sext <2 x i1> %cmp to <2 x i32> 240 ret <2 x i32> %sext 241} 242 243; Don't use SVE for 128-bit vectors. 244define <4 x i32> @icmp_eq_v4i32(<4 x i32> %op1, <4 x i32> %op2) vscale_range(2,0) #0 { 245; CHECK-LABEL: icmp_eq_v4i32: 246; CHECK: // %bb.0: 247; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s 248; CHECK-NEXT: ret 249 %cmp = icmp eq <4 x i32> %op1, %op2 250 %sext = sext <4 x i1> %cmp to <4 x i32> 251 ret <4 x i32> %sext 252} 253 254define void @icmp_eq_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 { 255; CHECK-LABEL: icmp_eq_v8i32: 256; CHECK: // %bb.0: 257; CHECK-NEXT: ptrue p0.s, vl8 258; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 259; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] 260; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, z1.s 261; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff 262; CHECK-NEXT: st1w { z0.s }, p0, [x0] 263; CHECK-NEXT: ret 264 %op1 = load <8 x i32>, ptr %a 265 %op2 = load <8 x i32>, ptr %b 266 %cmp = icmp eq <8 x i32> %op1, %op2 267 %sext = sext <8 x i1> %cmp to <8 x i32> 268 store <8 x i32> %sext, ptr %a 269 ret void 270} 271 272define void @icmp_eq_v16i32(ptr %a, ptr %b) #0 { 273; VBITS_GE_256-LABEL: icmp_eq_v16i32: 274; VBITS_GE_256: // %bb.0: 275; VBITS_GE_256-NEXT: ptrue p0.s, vl8 276; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 277; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] 278; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x1, x8, lsl #2] 279; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x0] 280; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x1] 281; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z0.s, z1.s 282; VBITS_GE_256-NEXT: cmpeq p2.s, p0/z, z2.s, z3.s 283; VBITS_GE_256-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff 284; VBITS_GE_256-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff 285; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] 286; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0] 287; VBITS_GE_256-NEXT: ret 288; 289; VBITS_GE_512-LABEL: icmp_eq_v16i32: 290; VBITS_GE_512: // %bb.0: 291; VBITS_GE_512-NEXT: ptrue p0.s, vl16 292; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0] 293; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x1] 294; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z0.s, z1.s 295; VBITS_GE_512-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff 296; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0] 297; VBITS_GE_512-NEXT: ret 298 %op1 = load <16 x i32>, ptr %a 299 %op2 = load <16 x i32>, ptr %b 300 %cmp = icmp eq <16 x i32> %op1, %op2 301 %sext = sext <16 x i1> %cmp to <16 x i32> 302 store <16 x i32> %sext, ptr %a 303 ret void 304} 305 306define void @icmp_eq_v32i32(ptr %a, ptr %b) vscale_range(8,0) #0 { 307; CHECK-LABEL: icmp_eq_v32i32: 308; CHECK: // %bb.0: 309; CHECK-NEXT: ptrue p0.s, vl32 310; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 311; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] 312; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, z1.s 313; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff 314; CHECK-NEXT: st1w { z0.s }, p0, [x0] 315; CHECK-NEXT: ret 316 %op1 = load <32 x i32>, ptr %a 317 %op2 = load <32 x i32>, ptr %b 318 %cmp = icmp eq <32 x i32> %op1, %op2 319 %sext = sext <32 x i1> %cmp to <32 x i32> 320 store <32 x i32> %sext, ptr %a 321 ret void 322} 323 324define void @icmp_eq_v64i32(ptr %a, ptr %b) vscale_range(16,0) #0 { 325; CHECK-LABEL: icmp_eq_v64i32: 326; CHECK: // %bb.0: 327; CHECK-NEXT: ptrue p0.s, vl64 328; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 329; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] 330; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, z1.s 331; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff 332; CHECK-NEXT: st1w { z0.s }, p0, [x0] 333; CHECK-NEXT: ret 334 %op1 = load <64 x i32>, ptr %a 335 %op2 = load <64 x i32>, ptr %b 336 %cmp = icmp eq <64 x i32> %op1, %op2 337 %sext = sext <64 x i1> %cmp to <64 x i32> 338 store <64 x i32> %sext, ptr %a 339 ret void 340} 341 342; Don't use SVE for 64-bit vectors. 343define <1 x i64> @icmp_eq_v1i64(<1 x i64> %op1, <1 x i64> %op2) vscale_range(2,0) #0 { 344; CHECK-LABEL: icmp_eq_v1i64: 345; CHECK: // %bb.0: 346; CHECK-NEXT: cmeq d0, d0, d1 347; CHECK-NEXT: ret 348 %cmp = icmp eq <1 x i64> %op1, %op2 349 %sext = sext <1 x i1> %cmp to <1 x i64> 350 ret <1 x i64> %sext 351} 352 353; Don't use SVE for 128-bit vectors. 354define <2 x i64> @icmp_eq_v2i64(<2 x i64> %op1, <2 x i64> %op2) vscale_range(2,0) #0 { 355; CHECK-LABEL: icmp_eq_v2i64: 356; CHECK: // %bb.0: 357; CHECK-NEXT: cmeq v0.2d, v0.2d, v1.2d 358; CHECK-NEXT: ret 359 %cmp = icmp eq <2 x i64> %op1, %op2 360 %sext = sext <2 x i1> %cmp to <2 x i64> 361 ret <2 x i64> %sext 362} 363 364define void @icmp_eq_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 { 365; CHECK-LABEL: icmp_eq_v4i64: 366; CHECK: // %bb.0: 367; CHECK-NEXT: ptrue p0.d, vl4 368; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 369; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] 370; CHECK-NEXT: cmpeq p1.d, p0/z, z0.d, z1.d 371; CHECK-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff 372; CHECK-NEXT: st1d { z0.d }, p0, [x0] 373; CHECK-NEXT: ret 374 %op1 = load <4 x i64>, ptr %a 375 %op2 = load <4 x i64>, ptr %b 376 %cmp = icmp eq <4 x i64> %op1, %op2 377 %sext = sext <4 x i1> %cmp to <4 x i64> 378 store <4 x i64> %sext, ptr %a 379 ret void 380} 381 382define void @icmp_eq_v8i64(ptr %a, ptr %b) #0 { 383; VBITS_GE_256-LABEL: icmp_eq_v8i64: 384; VBITS_GE_256: // %bb.0: 385; VBITS_GE_256-NEXT: ptrue p0.d, vl4 386; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 387; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] 388; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x1, x8, lsl #3] 389; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x0] 390; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1] 391; VBITS_GE_256-NEXT: cmpeq p1.d, p0/z, z0.d, z1.d 392; VBITS_GE_256-NEXT: cmpeq p2.d, p0/z, z2.d, z3.d 393; VBITS_GE_256-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff 394; VBITS_GE_256-NEXT: mov z1.d, p2/z, #-1 // =0xffffffffffffffff 395; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3] 396; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0] 397; VBITS_GE_256-NEXT: ret 398; 399; VBITS_GE_512-LABEL: icmp_eq_v8i64: 400; VBITS_GE_512: // %bb.0: 401; VBITS_GE_512-NEXT: ptrue p0.d, vl8 402; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0] 403; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x1] 404; VBITS_GE_512-NEXT: cmpeq p1.d, p0/z, z0.d, z1.d 405; VBITS_GE_512-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff 406; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0] 407; VBITS_GE_512-NEXT: ret 408 %op1 = load <8 x i64>, ptr %a 409 %op2 = load <8 x i64>, ptr %b 410 %cmp = icmp eq <8 x i64> %op1, %op2 411 %sext = sext <8 x i1> %cmp to <8 x i64> 412 store <8 x i64> %sext, ptr %a 413 ret void 414} 415 416define void @icmp_eq_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 { 417; CHECK-LABEL: icmp_eq_v16i64: 418; CHECK: // %bb.0: 419; CHECK-NEXT: ptrue p0.d, vl16 420; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 421; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] 422; CHECK-NEXT: cmpeq p1.d, p0/z, z0.d, z1.d 423; CHECK-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff 424; CHECK-NEXT: st1d { z0.d }, p0, [x0] 425; CHECK-NEXT: ret 426 %op1 = load <16 x i64>, ptr %a 427 %op2 = load <16 x i64>, ptr %b 428 %cmp = icmp eq <16 x i64> %op1, %op2 429 %sext = sext <16 x i1> %cmp to <16 x i64> 430 store <16 x i64> %sext, ptr %a 431 ret void 432} 433 434define void @icmp_eq_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 { 435; CHECK-LABEL: icmp_eq_v32i64: 436; CHECK: // %bb.0: 437; CHECK-NEXT: ptrue p0.d, vl32 438; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 439; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] 440; CHECK-NEXT: cmpeq p1.d, p0/z, z0.d, z1.d 441; CHECK-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff 442; CHECK-NEXT: st1d { z0.d }, p0, [x0] 443; CHECK-NEXT: ret 444 %op1 = load <32 x i64>, ptr %a 445 %op2 = load <32 x i64>, ptr %b 446 %cmp = icmp eq <32 x i64> %op1, %op2 447 %sext = sext <32 x i1> %cmp to <32 x i64> 448 store <32 x i64> %sext, ptr %a 449 ret void 450} 451 452; 453; ICMP NE 454; 455 456define void @icmp_ne_v32i8(ptr %a, ptr %b) vscale_range(2,0) #0 { 457; CHECK-LABEL: icmp_ne_v32i8: 458; CHECK: // %bb.0: 459; CHECK-NEXT: ptrue p0.b, vl32 460; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] 461; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1] 462; CHECK-NEXT: cmpne p1.b, p0/z, z0.b, z1.b 463; CHECK-NEXT: mov z0.b, p1/z, #-1 // =0xffffffffffffffff 464; CHECK-NEXT: st1b { z0.b }, p0, [x0] 465; CHECK-NEXT: ret 466 %op1 = load <32 x i8>, ptr %a 467 %op2 = load <32 x i8>, ptr %b 468 %cmp = icmp ne <32 x i8> %op1, %op2 469 %sext = sext <32 x i1> %cmp to <32 x i8> 470 store <32 x i8> %sext, ptr %a 471 ret void 472} 473 474; 475; ICMP SGE 476; 477 478define void @icmp_sge_v32i16(ptr %a, ptr %b) vscale_range(4,0) #0 { 479; CHECK-LABEL: icmp_sge_v32i16: 480; CHECK: // %bb.0: 481; CHECK-NEXT: ptrue p0.h, vl32 482; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 483; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] 484; CHECK-NEXT: cmpge p1.h, p0/z, z0.h, z1.h 485; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff 486; CHECK-NEXT: st1h { z0.h }, p0, [x0] 487; CHECK-NEXT: ret 488 %op1 = load <32 x i16>, ptr %a 489 %op2 = load <32 x i16>, ptr %b 490 %cmp = icmp sge <32 x i16> %op1, %op2 491 %sext = sext <32 x i1> %cmp to <32 x i16> 492 store <32 x i16> %sext, ptr %a 493 ret void 494} 495 496; 497; ICMP SGT 498; 499 500define void @icmp_sgt_v16i16(ptr %a, ptr %b) vscale_range(2,0) #0 { 501; CHECK-LABEL: icmp_sgt_v16i16: 502; CHECK: // %bb.0: 503; CHECK-NEXT: ptrue p0.h, vl16 504; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 505; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] 506; CHECK-NEXT: cmpgt p1.h, p0/z, z0.h, z1.h 507; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff 508; CHECK-NEXT: st1h { z0.h }, p0, [x0] 509; CHECK-NEXT: ret 510 %op1 = load <16 x i16>, ptr %a 511 %op2 = load <16 x i16>, ptr %b 512 %cmp = icmp sgt <16 x i16> %op1, %op2 513 %sext = sext <16 x i1> %cmp to <16 x i16> 514 store <16 x i16> %sext, ptr %a 515 ret void 516} 517 518; 519; ICMP SLE 520; 521 522define void @icmp_sle_v16i32(ptr %a, ptr %b) vscale_range(4,0) #0 { 523; CHECK-LABEL: icmp_sle_v16i32: 524; CHECK: // %bb.0: 525; CHECK-NEXT: ptrue p0.s, vl16 526; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 527; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] 528; CHECK-NEXT: cmpge p1.s, p0/z, z1.s, z0.s 529; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff 530; CHECK-NEXT: st1w { z0.s }, p0, [x0] 531; CHECK-NEXT: ret 532 %op1 = load <16 x i32>, ptr %a 533 %op2 = load <16 x i32>, ptr %b 534 %cmp = icmp sle <16 x i32> %op1, %op2 535 %sext = sext <16 x i1> %cmp to <16 x i32> 536 store <16 x i32> %sext, ptr %a 537 ret void 538} 539 540; 541; ICMP SLT 542; 543 544define void @icmp_slt_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 { 545; CHECK-LABEL: icmp_slt_v8i32: 546; CHECK: // %bb.0: 547; CHECK-NEXT: ptrue p0.s, vl8 548; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 549; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] 550; CHECK-NEXT: cmpgt p1.s, p0/z, z1.s, z0.s 551; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff 552; CHECK-NEXT: st1w { z0.s }, p0, [x0] 553; CHECK-NEXT: ret 554 %op1 = load <8 x i32>, ptr %a 555 %op2 = load <8 x i32>, ptr %b 556 %cmp = icmp slt <8 x i32> %op1, %op2 557 %sext = sext <8 x i1> %cmp to <8 x i32> 558 store <8 x i32> %sext, ptr %a 559 ret void 560} 561 562; 563; ICMP UGE 564; 565 566define void @icmp_uge_v8i64(ptr %a, ptr %b) vscale_range(4,0) #0 { 567; CHECK-LABEL: icmp_uge_v8i64: 568; CHECK: // %bb.0: 569; CHECK-NEXT: ptrue p0.d, vl8 570; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 571; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] 572; CHECK-NEXT: cmphs p1.d, p0/z, z0.d, z1.d 573; CHECK-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff 574; CHECK-NEXT: st1d { z0.d }, p0, [x0] 575; CHECK-NEXT: ret 576 %op1 = load <8 x i64>, ptr %a 577 %op2 = load <8 x i64>, ptr %b 578 %cmp = icmp uge <8 x i64> %op1, %op2 579 %sext = sext <8 x i1> %cmp to <8 x i64> 580 store <8 x i64> %sext, ptr %a 581 ret void 582} 583 584; 585; ICMP UGT 586; 587 588define void @icmp_ugt_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 { 589; CHECK-LABEL: icmp_ugt_v4i64: 590; CHECK: // %bb.0: 591; CHECK-NEXT: ptrue p0.d, vl4 592; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 593; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] 594; CHECK-NEXT: cmphi p1.d, p0/z, z0.d, z1.d 595; CHECK-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff 596; CHECK-NEXT: st1d { z0.d }, p0, [x0] 597; CHECK-NEXT: ret 598 %op1 = load <4 x i64>, ptr %a 599 %op2 = load <4 x i64>, ptr %b 600 %cmp = icmp ugt <4 x i64> %op1, %op2 601 %sext = sext <4 x i1> %cmp to <4 x i64> 602 store <4 x i64> %sext, ptr %a 603 ret void 604} 605 606; 607; ICMP ULE 608; 609 610define void @icmp_ule_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 { 611; CHECK-LABEL: icmp_ule_v16i64: 612; CHECK: // %bb.0: 613; CHECK-NEXT: ptrue p0.d, vl16 614; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 615; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] 616; CHECK-NEXT: cmphs p1.d, p0/z, z1.d, z0.d 617; CHECK-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff 618; CHECK-NEXT: st1d { z0.d }, p0, [x0] 619; CHECK-NEXT: ret 620 %op1 = load <16 x i64>, ptr %a 621 %op2 = load <16 x i64>, ptr %b 622 %cmp = icmp ule <16 x i64> %op1, %op2 623 %sext = sext <16 x i1> %cmp to <16 x i64> 624 store <16 x i64> %sext, ptr %a 625 ret void 626} 627 628; 629; ICMP ULT 630; 631 632define void @icmp_ult_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 { 633; CHECK-LABEL: icmp_ult_v32i64: 634; CHECK: // %bb.0: 635; CHECK-NEXT: ptrue p0.d, vl32 636; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 637; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] 638; CHECK-NEXT: cmphi p1.d, p0/z, z1.d, z0.d 639; CHECK-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff 640; CHECK-NEXT: st1d { z0.d }, p0, [x0] 641; CHECK-NEXT: ret 642 %op1 = load <32 x i64>, ptr %a 643 %op2 = load <32 x i64>, ptr %b 644 %cmp = icmp ult <32 x i64> %op1, %op2 645 %sext = sext <32 x i1> %cmp to <32 x i64> 646 store <32 x i64> %sext, ptr %a 647 ret void 648} 649 650attributes #0 = { "target-features"="+sve" } 651