1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256 3; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 4; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 5 6target triple = "aarch64-unknown-linux-gnu" 7 8; 9; FCMP OEQ 10; 11 12; Don't use SVE for 64-bit vectors. 13define <4 x i16> @fcmp_oeq_v4f16(<4 x half> %op1, <4 x half> %op2) vscale_range(2,0) #0 { 14; CHECK-LABEL: fcmp_oeq_v4f16: 15; CHECK: // %bb.0: 16; CHECK-NEXT: fcmeq v0.4h, v0.4h, v1.4h 17; CHECK-NEXT: ret 18 %cmp = fcmp oeq <4 x half> %op1, %op2 19 %sext = sext <4 x i1> %cmp to <4 x i16> 20 ret <4 x i16> %sext 21} 22 23; Don't use SVE for 128-bit vectors. 24define <8 x i16> @fcmp_oeq_v8f16(<8 x half> %op1, <8 x half> %op2) vscale_range(2,0) #0 { 25; CHECK-LABEL: fcmp_oeq_v8f16: 26; CHECK: // %bb.0: 27; CHECK-NEXT: fcmeq v0.8h, v0.8h, v1.8h 28; CHECK-NEXT: ret 29 %cmp = fcmp oeq <8 x half> %op1, %op2 30 %sext = sext <8 x i1> %cmp to <8 x i16> 31 ret <8 x i16> %sext 32} 33 34define void @fcmp_oeq_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 { 35; CHECK-LABEL: fcmp_oeq_v16f16: 36; CHECK: // %bb.0: 37; CHECK-NEXT: ptrue p0.h, vl16 38; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 39; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] 40; CHECK-NEXT: fcmeq p1.h, p0/z, z0.h, z1.h 41; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff 42; CHECK-NEXT: st1h { z0.h }, p0, [x2] 43; CHECK-NEXT: ret 44 %op1 = load <16 x half>, ptr %a 45 %op2 = load <16 x half>, ptr %b 46 %cmp = fcmp oeq <16 x half> %op1, %op2 47 %sext = sext <16 x i1> %cmp to <16 x i16> 48 store <16 x i16> %sext, ptr %c 49 ret void 50} 51 52define void @fcmp_oeq_v32f16(ptr %a, ptr %b, ptr %c) #0 { 53; VBITS_GE_256-LABEL: fcmp_oeq_v32f16: 54; VBITS_GE_256: // %bb.0: 55; VBITS_GE_256-NEXT: ptrue p0.h, vl16 56; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 57; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] 58; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x1, x8, lsl #1] 59; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x0] 60; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x1] 61; VBITS_GE_256-NEXT: fcmeq p1.h, p0/z, z0.h, z1.h 62; VBITS_GE_256-NEXT: fcmeq p2.h, p0/z, z2.h, z3.h 63; VBITS_GE_256-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff 64; VBITS_GE_256-NEXT: mov z1.h, p2/z, #-1 // =0xffffffffffffffff 65; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x2, x8, lsl #1] 66; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x2] 67; VBITS_GE_256-NEXT: ret 68; 69; VBITS_GE_512-LABEL: fcmp_oeq_v32f16: 70; VBITS_GE_512: // %bb.0: 71; VBITS_GE_512-NEXT: ptrue p0.h, vl32 72; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0] 73; VBITS_GE_512-NEXT: ld1h { z1.h }, p0/z, [x1] 74; VBITS_GE_512-NEXT: fcmeq p1.h, p0/z, z0.h, z1.h 75; VBITS_GE_512-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff 76; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x2] 77; VBITS_GE_512-NEXT: ret 78 %op1 = load <32 x half>, ptr %a 79 %op2 = load <32 x half>, ptr %b 80 %cmp = fcmp oeq <32 x half> %op1, %op2 81 %sext = sext <32 x i1> %cmp to <32 x i16> 82 store <32 x i16> %sext, ptr %c 83 ret void 84} 85 86define void @fcmp_oeq_v64f16(ptr %a, ptr %b, ptr %c) vscale_range(8,0) #0 { 87; CHECK-LABEL: fcmp_oeq_v64f16: 88; CHECK: // %bb.0: 89; CHECK-NEXT: ptrue p0.h, vl64 90; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 91; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] 92; CHECK-NEXT: fcmeq p1.h, p0/z, z0.h, z1.h 93; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff 94; CHECK-NEXT: st1h { z0.h }, p0, [x2] 95; CHECK-NEXT: ret 96 %op1 = load <64 x half>, ptr %a 97 %op2 = load <64 x half>, ptr %b 98 %cmp = fcmp oeq <64 x half> %op1, %op2 99 %sext = sext <64 x i1> %cmp to <64 x i16> 100 store <64 x i16> %sext, ptr %c 101 ret void 102} 103 104define void @fcmp_oeq_v128f16(ptr %a, ptr %b, ptr %c) vscale_range(16,0) #0 { 105; CHECK-LABEL: fcmp_oeq_v128f16: 106; CHECK: // %bb.0: 107; CHECK-NEXT: ptrue p0.h, vl128 108; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 109; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] 110; CHECK-NEXT: fcmeq p1.h, p0/z, z0.h, z1.h 111; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff 112; CHECK-NEXT: st1h { z0.h }, p0, [x2] 113; CHECK-NEXT: ret 114 %op1 = load <128 x half>, ptr %a 115 %op2 = load <128 x half>, ptr %b 116 %cmp = fcmp oeq <128 x half> %op1, %op2 117 %sext = sext <128 x i1> %cmp to <128 x i16> 118 store <128 x i16> %sext, ptr %c 119 ret void 120} 121 122; Don't use SVE for 64-bit vectors. 123define <2 x i32> @fcmp_oeq_v2f32(<2 x float> %op1, <2 x float> %op2) vscale_range(2,0) #0 { 124; CHECK-LABEL: fcmp_oeq_v2f32: 125; CHECK: // %bb.0: 126; CHECK-NEXT: fcmeq v0.2s, v0.2s, v1.2s 127; CHECK-NEXT: ret 128 %cmp = fcmp oeq <2 x float> %op1, %op2 129 %sext = sext <2 x i1> %cmp to <2 x i32> 130 ret <2 x i32> %sext 131} 132 133; Don't use SVE for 128-bit vectors. 134define <4 x i32> @fcmp_oeq_v4f32(<4 x float> %op1, <4 x float> %op2) vscale_range(2,0) #0 { 135; CHECK-LABEL: fcmp_oeq_v4f32: 136; CHECK: // %bb.0: 137; CHECK-NEXT: fcmeq v0.4s, v0.4s, v1.4s 138; CHECK-NEXT: ret 139 %cmp = fcmp oeq <4 x float> %op1, %op2 140 %sext = sext <4 x i1> %cmp to <4 x i32> 141 ret <4 x i32> %sext 142} 143 144define void @fcmp_oeq_v8f32(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 { 145; CHECK-LABEL: fcmp_oeq_v8f32: 146; CHECK: // %bb.0: 147; CHECK-NEXT: ptrue p0.s, vl8 148; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 149; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] 150; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, z1.s 151; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff 152; CHECK-NEXT: st1w { z0.s }, p0, [x2] 153; CHECK-NEXT: ret 154 %op1 = load <8 x float>, ptr %a 155 %op2 = load <8 x float>, ptr %b 156 %cmp = fcmp oeq <8 x float> %op1, %op2 157 %sext = sext <8 x i1> %cmp to <8 x i32> 158 store <8 x i32> %sext, ptr %c 159 ret void 160} 161 162define void @fcmp_oeq_v16f32(ptr %a, ptr %b, ptr %c) #0 { 163; VBITS_GE_256-LABEL: fcmp_oeq_v16f32: 164; VBITS_GE_256: // %bb.0: 165; VBITS_GE_256-NEXT: ptrue p0.s, vl8 166; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 167; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] 168; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x1, x8, lsl #2] 169; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x0] 170; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x1] 171; VBITS_GE_256-NEXT: fcmeq p1.s, p0/z, z0.s, z1.s 172; VBITS_GE_256-NEXT: fcmeq p2.s, p0/z, z2.s, z3.s 173; VBITS_GE_256-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff 174; VBITS_GE_256-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff 175; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x2, x8, lsl #2] 176; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x2] 177; VBITS_GE_256-NEXT: ret 178; 179; VBITS_GE_512-LABEL: fcmp_oeq_v16f32: 180; VBITS_GE_512: // %bb.0: 181; VBITS_GE_512-NEXT: ptrue p0.s, vl16 182; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0] 183; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x1] 184; VBITS_GE_512-NEXT: fcmeq p1.s, p0/z, z0.s, z1.s 185; VBITS_GE_512-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff 186; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x2] 187; VBITS_GE_512-NEXT: ret 188 %op1 = load <16 x float>, ptr %a 189 %op2 = load <16 x float>, ptr %b 190 %cmp = fcmp oeq <16 x float> %op1, %op2 191 %sext = sext <16 x i1> %cmp to <16 x i32> 192 store <16 x i32> %sext, ptr %c 193 ret void 194} 195 196define void @fcmp_oeq_v32f32(ptr %a, ptr %b, ptr %c) vscale_range(8,0) #0 { 197; CHECK-LABEL: fcmp_oeq_v32f32: 198; CHECK: // %bb.0: 199; CHECK-NEXT: ptrue p0.s, vl32 200; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 201; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] 202; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, z1.s 203; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff 204; CHECK-NEXT: st1w { z0.s }, p0, [x2] 205; CHECK-NEXT: ret 206 %op1 = load <32 x float>, ptr %a 207 %op2 = load <32 x float>, ptr %b 208 %cmp = fcmp oeq <32 x float> %op1, %op2 209 %sext = sext <32 x i1> %cmp to <32 x i32> 210 store <32 x i32> %sext, ptr %c 211 ret void 212} 213 214define void @fcmp_oeq_v64f32(ptr %a, ptr %b, ptr %c) vscale_range(16,0) #0 { 215; CHECK-LABEL: fcmp_oeq_v64f32: 216; CHECK: // %bb.0: 217; CHECK-NEXT: ptrue p0.s, vl64 218; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 219; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] 220; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, z1.s 221; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff 222; CHECK-NEXT: st1w { z0.s }, p0, [x2] 223; CHECK-NEXT: ret 224 %op1 = load <64 x float>, ptr %a 225 %op2 = load <64 x float>, ptr %b 226 %cmp = fcmp oeq <64 x float> %op1, %op2 227 %sext = sext <64 x i1> %cmp to <64 x i32> 228 store <64 x i32> %sext, ptr %c 229 ret void 230} 231 232; Don't use SVE for 64-bit vectors. 233define <1 x i64> @fcmp_oeq_v1f64(<1 x double> %op1, <1 x double> %op2) vscale_range(2,0) #0 { 234; CHECK-LABEL: fcmp_oeq_v1f64: 235; CHECK: // %bb.0: 236; CHECK-NEXT: fcmeq d0, d0, d1 237; CHECK-NEXT: ret 238 %cmp = fcmp oeq <1 x double> %op1, %op2 239 %sext = sext <1 x i1> %cmp to <1 x i64> 240 ret <1 x i64> %sext 241} 242 243; Don't use SVE for 128-bit vectors. 244define <2 x i64> @fcmp_oeq_v2f64(<2 x double> %op1, <2 x double> %op2) vscale_range(2,0) #0 { 245; CHECK-LABEL: fcmp_oeq_v2f64: 246; CHECK: // %bb.0: 247; CHECK-NEXT: fcmeq v0.2d, v0.2d, v1.2d 248; CHECK-NEXT: ret 249 %cmp = fcmp oeq <2 x double> %op1, %op2 250 %sext = sext <2 x i1> %cmp to <2 x i64> 251 ret <2 x i64> %sext 252} 253 254define void @fcmp_oeq_v4f64(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 { 255; CHECK-LABEL: fcmp_oeq_v4f64: 256; CHECK: // %bb.0: 257; CHECK-NEXT: ptrue p0.d, vl4 258; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 259; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] 260; CHECK-NEXT: fcmeq p1.d, p0/z, z0.d, z1.d 261; CHECK-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff 262; CHECK-NEXT: st1d { z0.d }, p0, [x2] 263; CHECK-NEXT: ret 264 %op1 = load <4 x double>, ptr %a 265 %op2 = load <4 x double>, ptr %b 266 %cmp = fcmp oeq <4 x double> %op1, %op2 267 %sext = sext <4 x i1> %cmp to <4 x i64> 268 store <4 x i64> %sext, ptr %c 269 ret void 270} 271 272define void @fcmp_oeq_v8f64(ptr %a, ptr %b, ptr %c) #0 { 273; VBITS_GE_256-LABEL: fcmp_oeq_v8f64: 274; VBITS_GE_256: // %bb.0: 275; VBITS_GE_256-NEXT: ptrue p0.d, vl4 276; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 277; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] 278; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x1, x8, lsl #3] 279; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x0] 280; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1] 281; VBITS_GE_256-NEXT: fcmeq p1.d, p0/z, z0.d, z1.d 282; VBITS_GE_256-NEXT: fcmeq p2.d, p0/z, z2.d, z3.d 283; VBITS_GE_256-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff 284; VBITS_GE_256-NEXT: mov z1.d, p2/z, #-1 // =0xffffffffffffffff 285; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x2, x8, lsl #3] 286; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x2] 287; VBITS_GE_256-NEXT: ret 288; 289; VBITS_GE_512-LABEL: fcmp_oeq_v8f64: 290; VBITS_GE_512: // %bb.0: 291; VBITS_GE_512-NEXT: ptrue p0.d, vl8 292; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0] 293; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x1] 294; VBITS_GE_512-NEXT: fcmeq p1.d, p0/z, z0.d, z1.d 295; VBITS_GE_512-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff 296; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x2] 297; VBITS_GE_512-NEXT: ret 298 %op1 = load <8 x double>, ptr %a 299 %op2 = load <8 x double>, ptr %b 300 %cmp = fcmp oeq <8 x double> %op1, %op2 301 %sext = sext <8 x i1> %cmp to <8 x i64> 302 store <8 x i64> %sext, ptr %c 303 ret void 304} 305 306define void @fcmp_oeq_v16f64(ptr %a, ptr %b, ptr %c) vscale_range(8,0) #0 { 307; CHECK-LABEL: fcmp_oeq_v16f64: 308; CHECK: // %bb.0: 309; CHECK-NEXT: ptrue p0.d, vl16 310; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 311; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] 312; CHECK-NEXT: fcmeq p1.d, p0/z, z0.d, z1.d 313; CHECK-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff 314; CHECK-NEXT: st1d { z0.d }, p0, [x2] 315; CHECK-NEXT: ret 316 %op1 = load <16 x double>, ptr %a 317 %op2 = load <16 x double>, ptr %b 318 %cmp = fcmp oeq <16 x double> %op1, %op2 319 %sext = sext <16 x i1> %cmp to <16 x i64> 320 store <16 x i64> %sext, ptr %c 321 ret void 322} 323 324define void @fcmp_oeq_v32f64(ptr %a, ptr %b, ptr %c) vscale_range(16,0) #0 { 325; CHECK-LABEL: fcmp_oeq_v32f64: 326; CHECK: // %bb.0: 327; CHECK-NEXT: ptrue p0.d, vl32 328; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 329; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] 330; CHECK-NEXT: fcmeq p1.d, p0/z, z0.d, z1.d 331; CHECK-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff 332; CHECK-NEXT: st1d { z0.d }, p0, [x2] 333; CHECK-NEXT: ret 334 %op1 = load <32 x double>, ptr %a 335 %op2 = load <32 x double>, ptr %b 336 %cmp = fcmp oeq <32 x double> %op1, %op2 337 %sext = sext <32 x i1> %cmp to <32 x i64> 338 store <32 x i64> %sext, ptr %c 339 ret void 340} 341 342; 343; FCMP UEQ 344; 345 346define void @fcmp_ueq_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 { 347; CHECK-LABEL: fcmp_ueq_v16f16: 348; CHECK: // %bb.0: 349; CHECK-NEXT: ptrue p0.h, vl16 350; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 351; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] 352; CHECK-NEXT: fcmuo p1.h, p0/z, z0.h, z1.h 353; CHECK-NEXT: fcmeq p2.h, p0/z, z0.h, z1.h 354; CHECK-NEXT: mov p1.b, p2/m, p2.b 355; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff 356; CHECK-NEXT: st1h { z0.h }, p0, [x2] 357; CHECK-NEXT: ret 358 %op1 = load <16 x half>, ptr %a 359 %op2 = load <16 x half>, ptr %b 360 %cmp = fcmp ueq <16 x half> %op1, %op2 361 %sext = sext <16 x i1> %cmp to <16 x i16> 362 store <16 x i16> %sext, ptr %c 363 ret void 364} 365 366; 367; FCMP ONE 368; 369 370define void @fcmp_one_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 { 371; CHECK-LABEL: fcmp_one_v16f16: 372; CHECK: // %bb.0: 373; CHECK-NEXT: ptrue p0.h, vl16 374; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 375; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] 376; CHECK-NEXT: fcmgt p1.h, p0/z, z1.h, z0.h 377; CHECK-NEXT: fcmgt p2.h, p0/z, z0.h, z1.h 378; CHECK-NEXT: mov p1.b, p2/m, p2.b 379; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff 380; CHECK-NEXT: st1h { z0.h }, p0, [x2] 381; CHECK-NEXT: ret 382 %op1 = load <16 x half>, ptr %a 383 %op2 = load <16 x half>, ptr %b 384 %cmp = fcmp one <16 x half> %op1, %op2 385 %sext = sext <16 x i1> %cmp to <16 x i16> 386 store <16 x i16> %sext, ptr %c 387 ret void 388} 389 390; 391; FCMP UNE 392; 393 394define void @fcmp_une_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 { 395; CHECK-LABEL: fcmp_une_v16f16: 396; CHECK: // %bb.0: 397; CHECK-NEXT: ptrue p0.h, vl16 398; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 399; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] 400; CHECK-NEXT: fcmne p1.h, p0/z, z0.h, z1.h 401; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff 402; CHECK-NEXT: st1h { z0.h }, p0, [x2] 403; CHECK-NEXT: ret 404 %op1 = load <16 x half>, ptr %a 405 %op2 = load <16 x half>, ptr %b 406 %cmp = fcmp une <16 x half> %op1, %op2 407 %sext = sext <16 x i1> %cmp to <16 x i16> 408 store <16 x i16> %sext, ptr %c 409 ret void 410} 411 412; 413; FCMP OGT 414; 415 416define void @fcmp_ogt_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 { 417; CHECK-LABEL: fcmp_ogt_v16f16: 418; CHECK: // %bb.0: 419; CHECK-NEXT: ptrue p0.h, vl16 420; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 421; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] 422; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z1.h 423; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff 424; CHECK-NEXT: st1h { z0.h }, p0, [x2] 425; CHECK-NEXT: ret 426 %op1 = load <16 x half>, ptr %a 427 %op2 = load <16 x half>, ptr %b 428 %cmp = fcmp ogt <16 x half> %op1, %op2 429 %sext = sext <16 x i1> %cmp to <16 x i16> 430 store <16 x i16> %sext, ptr %c 431 ret void 432} 433 434; 435; FCMP UGT 436; 437 438define void @fcmp_ugt_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 { 439; CHECK-LABEL: fcmp_ugt_v16f16: 440; CHECK: // %bb.0: 441; CHECK-NEXT: ptrue p0.h, vl16 442; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 443; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] 444; CHECK-NEXT: fcmge p1.h, p0/z, z1.h, z0.h 445; CHECK-NEXT: mov z1.h, #-1 // =0xffffffffffffffff 446; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff 447; CHECK-NEXT: eor z0.d, z0.d, z1.d 448; CHECK-NEXT: st1h { z0.h }, p0, [x2] 449; CHECK-NEXT: ret 450 %op1 = load <16 x half>, ptr %a 451 %op2 = load <16 x half>, ptr %b 452 %cmp = fcmp ugt <16 x half> %op1, %op2 453 %sext = sext <16 x i1> %cmp to <16 x i16> 454 store <16 x i16> %sext, ptr %c 455 ret void 456} 457 458; 459; FCMP OLT 460; 461 462define void @fcmp_olt_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 { 463; CHECK-LABEL: fcmp_olt_v16f16: 464; CHECK: // %bb.0: 465; CHECK-NEXT: ptrue p0.h, vl16 466; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 467; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] 468; CHECK-NEXT: fcmgt p1.h, p0/z, z1.h, z0.h 469; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff 470; CHECK-NEXT: st1h { z0.h }, p0, [x2] 471; CHECK-NEXT: ret 472 %op1 = load <16 x half>, ptr %a 473 %op2 = load <16 x half>, ptr %b 474 %cmp = fcmp olt <16 x half> %op1, %op2 475 %sext = sext <16 x i1> %cmp to <16 x i16> 476 store <16 x i16> %sext, ptr %c 477 ret void 478} 479 480; 481; FCMP ULT 482; 483 484define void @fcmp_ult_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 { 485; CHECK-LABEL: fcmp_ult_v16f16: 486; CHECK: // %bb.0: 487; CHECK-NEXT: ptrue p0.h, vl16 488; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 489; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] 490; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h 491; CHECK-NEXT: mov z1.h, #-1 // =0xffffffffffffffff 492; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff 493; CHECK-NEXT: eor z0.d, z0.d, z1.d 494; CHECK-NEXT: st1h { z0.h }, p0, [x2] 495; CHECK-NEXT: ret 496 %op1 = load <16 x half>, ptr %a 497 %op2 = load <16 x half>, ptr %b 498 %cmp = fcmp ult <16 x half> %op1, %op2 499 %sext = sext <16 x i1> %cmp to <16 x i16> 500 store <16 x i16> %sext, ptr %c 501 ret void 502} 503 504; 505; FCMP OGE 506; 507 508define void @fcmp_oge_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 { 509; CHECK-LABEL: fcmp_oge_v16f16: 510; CHECK: // %bb.0: 511; CHECK-NEXT: ptrue p0.h, vl16 512; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 513; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] 514; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h 515; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff 516; CHECK-NEXT: st1h { z0.h }, p0, [x2] 517; CHECK-NEXT: ret 518 %op1 = load <16 x half>, ptr %a 519 %op2 = load <16 x half>, ptr %b 520 %cmp = fcmp oge <16 x half> %op1, %op2 521 %sext = sext <16 x i1> %cmp to <16 x i16> 522 store <16 x i16> %sext, ptr %c 523 ret void 524} 525 526; 527; FCMP UGE 528; 529 530define void @fcmp_uge_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 { 531; CHECK-LABEL: fcmp_uge_v16f16: 532; CHECK: // %bb.0: 533; CHECK-NEXT: ptrue p0.h, vl16 534; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 535; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] 536; CHECK-NEXT: fcmgt p1.h, p0/z, z1.h, z0.h 537; CHECK-NEXT: mov z1.h, #-1 // =0xffffffffffffffff 538; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff 539; CHECK-NEXT: eor z0.d, z0.d, z1.d 540; CHECK-NEXT: st1h { z0.h }, p0, [x2] 541; CHECK-NEXT: ret 542 %op1 = load <16 x half>, ptr %a 543 %op2 = load <16 x half>, ptr %b 544 %cmp = fcmp uge <16 x half> %op1, %op2 545 %sext = sext <16 x i1> %cmp to <16 x i16> 546 store <16 x i16> %sext, ptr %c 547 ret void 548} 549 550; 551; FCMP OLE 552; 553 554define void @fcmp_ole_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 { 555; CHECK-LABEL: fcmp_ole_v16f16: 556; CHECK: // %bb.0: 557; CHECK-NEXT: ptrue p0.h, vl16 558; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 559; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] 560; CHECK-NEXT: fcmge p1.h, p0/z, z1.h, z0.h 561; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff 562; CHECK-NEXT: st1h { z0.h }, p0, [x2] 563; CHECK-NEXT: ret 564 %op1 = load <16 x half>, ptr %a 565 %op2 = load <16 x half>, ptr %b 566 %cmp = fcmp ole <16 x half> %op1, %op2 567 %sext = sext <16 x i1> %cmp to <16 x i16> 568 store <16 x i16> %sext, ptr %c 569 ret void 570} 571 572; 573; FCMP ULE 574; 575 576define void @fcmp_ule_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 { 577; CHECK-LABEL: fcmp_ule_v16f16: 578; CHECK: // %bb.0: 579; CHECK-NEXT: ptrue p0.h, vl16 580; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 581; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] 582; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z1.h 583; CHECK-NEXT: mov z1.h, #-1 // =0xffffffffffffffff 584; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff 585; CHECK-NEXT: eor z0.d, z0.d, z1.d 586; CHECK-NEXT: st1h { z0.h }, p0, [x2] 587; CHECK-NEXT: ret 588 %op1 = load <16 x half>, ptr %a 589 %op2 = load <16 x half>, ptr %b 590 %cmp = fcmp ule <16 x half> %op1, %op2 591 %sext = sext <16 x i1> %cmp to <16 x i16> 592 store <16 x i16> %sext, ptr %c 593 ret void 594} 595 596; 597; FCMP UNO 598; 599 600define void @fcmp_uno_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 { 601; CHECK-LABEL: fcmp_uno_v16f16: 602; CHECK: // %bb.0: 603; CHECK-NEXT: ptrue p0.h, vl16 604; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 605; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] 606; CHECK-NEXT: fcmuo p1.h, p0/z, z0.h, z1.h 607; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff 608; CHECK-NEXT: st1h { z0.h }, p0, [x2] 609; CHECK-NEXT: ret 610 %op1 = load <16 x half>, ptr %a 611 %op2 = load <16 x half>, ptr %b 612 %cmp = fcmp uno <16 x half> %op1, %op2 613 %sext = sext <16 x i1> %cmp to <16 x i16> 614 store <16 x i16> %sext, ptr %c 615 ret void 616} 617 618; 619; FCMP ORD 620; 621 622define void @fcmp_ord_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 { 623; CHECK-LABEL: fcmp_ord_v16f16: 624; CHECK: // %bb.0: 625; CHECK-NEXT: ptrue p0.h, vl16 626; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 627; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] 628; CHECK-NEXT: fcmuo p1.h, p0/z, z0.h, z1.h 629; CHECK-NEXT: mov z1.h, #-1 // =0xffffffffffffffff 630; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff 631; CHECK-NEXT: eor z0.d, z0.d, z1.d 632; CHECK-NEXT: st1h { z0.h }, p0, [x2] 633; CHECK-NEXT: ret 634 %op1 = load <16 x half>, ptr %a 635 %op2 = load <16 x half>, ptr %b 636 %cmp = fcmp ord <16 x half> %op1, %op2 637 %sext = sext <16 x i1> %cmp to <16 x i16> 638 store <16 x i16> %sext, ptr %c 639 ret void 640} 641 642; 643; FCMP EQ 644; 645 646define void @fcmp_eq_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 { 647; CHECK-LABEL: fcmp_eq_v16f16: 648; CHECK: // %bb.0: 649; CHECK-NEXT: ptrue p0.h, vl16 650; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 651; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] 652; CHECK-NEXT: fcmeq p1.h, p0/z, z0.h, z1.h 653; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff 654; CHECK-NEXT: st1h { z0.h }, p0, [x2] 655; CHECK-NEXT: ret 656 %op1 = load <16 x half>, ptr %a 657 %op2 = load <16 x half>, ptr %b 658 %cmp = fcmp fast oeq <16 x half> %op1, %op2 659 %sext = sext <16 x i1> %cmp to <16 x i16> 660 store <16 x i16> %sext, ptr %c 661 ret void 662} 663 664; 665; FCMP NE 666; 667 668define void @fcmp_ne_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 { 669; CHECK-LABEL: fcmp_ne_v16f16: 670; CHECK: // %bb.0: 671; CHECK-NEXT: ptrue p0.h, vl16 672; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 673; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] 674; CHECK-NEXT: fcmne p1.h, p0/z, z0.h, z1.h 675; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff 676; CHECK-NEXT: st1h { z0.h }, p0, [x2] 677; CHECK-NEXT: ret 678 %op1 = load <16 x half>, ptr %a 679 %op2 = load <16 x half>, ptr %b 680 %cmp = fcmp fast one <16 x half> %op1, %op2 681 %sext = sext <16 x i1> %cmp to <16 x i16> 682 store <16 x i16> %sext, ptr %c 683 ret void 684} 685 686; 687; FCMP GT 688; 689 690define void @fcmp_gt_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 { 691; CHECK-LABEL: fcmp_gt_v16f16: 692; CHECK: // %bb.0: 693; CHECK-NEXT: ptrue p0.h, vl16 694; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 695; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] 696; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z1.h 697; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff 698; CHECK-NEXT: st1h { z0.h }, p0, [x2] 699; CHECK-NEXT: ret 700 %op1 = load <16 x half>, ptr %a 701 %op2 = load <16 x half>, ptr %b 702 %cmp = fcmp fast ogt <16 x half> %op1, %op2 703 %sext = sext <16 x i1> %cmp to <16 x i16> 704 store <16 x i16> %sext, ptr %c 705 ret void 706} 707 708; 709; FCMP LT 710; 711 712define void @fcmp_lt_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 { 713; CHECK-LABEL: fcmp_lt_v16f16: 714; CHECK: // %bb.0: 715; CHECK-NEXT: ptrue p0.h, vl16 716; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 717; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] 718; CHECK-NEXT: fcmgt p1.h, p0/z, z1.h, z0.h 719; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff 720; CHECK-NEXT: st1h { z0.h }, p0, [x2] 721; CHECK-NEXT: ret 722 %op1 = load <16 x half>, ptr %a 723 %op2 = load <16 x half>, ptr %b 724 %cmp = fcmp fast olt <16 x half> %op1, %op2 725 %sext = sext <16 x i1> %cmp to <16 x i16> 726 store <16 x i16> %sext, ptr %c 727 ret void 728} 729 730; 731; FCMP GE 732; 733 734define void @fcmp_ge_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 { 735; CHECK-LABEL: fcmp_ge_v16f16: 736; CHECK: // %bb.0: 737; CHECK-NEXT: ptrue p0.h, vl16 738; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 739; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] 740; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h 741; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff 742; CHECK-NEXT: st1h { z0.h }, p0, [x2] 743; CHECK-NEXT: ret 744 %op1 = load <16 x half>, ptr %a 745 %op2 = load <16 x half>, ptr %b 746 %cmp = fcmp fast oge <16 x half> %op1, %op2 747 %sext = sext <16 x i1> %cmp to <16 x i16> 748 store <16 x i16> %sext, ptr %c 749 ret void 750} 751 752; 753; FCMP LE 754; 755 756define void @fcmp_le_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 { 757; CHECK-LABEL: fcmp_le_v16f16: 758; CHECK: // %bb.0: 759; CHECK-NEXT: ptrue p0.h, vl16 760; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 761; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] 762; CHECK-NEXT: fcmge p1.h, p0/z, z1.h, z0.h 763; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff 764; CHECK-NEXT: st1h { z0.h }, p0, [x2] 765; CHECK-NEXT: ret 766 %op1 = load <16 x half>, ptr %a 767 %op2 = load <16 x half>, ptr %b 768 %cmp = fcmp fast ole <16 x half> %op1, %op2 769 %sext = sext <16 x i1> %cmp to <16 x i16> 770 store <16 x i16> %sext, ptr %c 771 ret void 772} 773 774attributes #0 = { "target-features"="+sve" } 775