1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s 3 4define <vscale x 8 x half> @fadd_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) { 5; CHECK-LABEL: fadd_nxv8f16: 6; CHECK: // %bb.0: 7; CHECK-NEXT: fadd z0.h, z0.h, z1.h 8; CHECK-NEXT: ret 9 %res = fadd <vscale x 8 x half> %a, %b 10 ret <vscale x 8 x half> %res 11} 12 13define <vscale x 4 x half> @fadd_nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b) { 14; CHECK-LABEL: fadd_nxv4f16: 15; CHECK: // %bb.0: 16; CHECK-NEXT: ptrue p0.s 17; CHECK-NEXT: fadd z0.h, p0/m, z0.h, z1.h 18; CHECK-NEXT: ret 19 %res = fadd <vscale x 4 x half> %a, %b 20 ret <vscale x 4 x half> %res 21} 22 23define <vscale x 2 x half> @fadd_nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b) { 24; CHECK-LABEL: fadd_nxv2f16: 25; CHECK: // %bb.0: 26; CHECK-NEXT: ptrue p0.d 27; CHECK-NEXT: fadd z0.h, p0/m, z0.h, z1.h 28; CHECK-NEXT: ret 29 %res = fadd <vscale x 2 x half> %a, %b 30 ret <vscale x 2 x half> %res 31} 32 33define <vscale x 4 x float> @fadd_nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) { 34; CHECK-LABEL: fadd_nxv4f32: 35; CHECK: // %bb.0: 36; CHECK-NEXT: fadd z0.s, z0.s, z1.s 37; CHECK-NEXT: ret 38 %res = fadd <vscale x 4 x float> %a, %b 39 ret <vscale x 4 x float> %res 40} 41 42define <vscale x 2 x float> @fadd_nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b) { 43; CHECK-LABEL: fadd_nxv2f32: 44; CHECK: // %bb.0: 45; CHECK-NEXT: ptrue p0.d 46; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z1.s 47; CHECK-NEXT: ret 48 %res = fadd <vscale x 2 x float> %a, %b 49 ret <vscale x 2 x float> %res 50} 51 52define <vscale x 2 x double> @fadd_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) { 53; CHECK-LABEL: fadd_nxv2f64: 54; CHECK: // %bb.0: 55; CHECK-NEXT: fadd z0.d, z0.d, z1.d 56; CHECK-NEXT: ret 57 %res = fadd <vscale x 2 x double> %a, %b 58 ret <vscale x 2 x double> %res 59} 60 61define <vscale x 8 x half> @fdiv_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) { 62; CHECK-LABEL: fdiv_nxv8f16: 63; CHECK: // %bb.0: 64; CHECK-NEXT: ptrue p0.h 65; CHECK-NEXT: fdiv z0.h, p0/m, z0.h, z1.h 66; CHECK-NEXT: ret 67 %res = fdiv <vscale x 8 x half> %a, %b 68 ret <vscale x 8 x half> %res 69} 70 71define <vscale x 4 x half> @fdiv_nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b) { 72; CHECK-LABEL: fdiv_nxv4f16: 73; CHECK: // %bb.0: 74; CHECK-NEXT: ptrue p0.s 75; CHECK-NEXT: fdiv z0.h, p0/m, z0.h, z1.h 76; CHECK-NEXT: ret 77 %res = fdiv <vscale x 4 x half> %a, %b 78 ret <vscale x 4 x half> %res 79} 80 81define <vscale x 2 x half> @fdiv_nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b) { 82; CHECK-LABEL: fdiv_nxv2f16: 83; CHECK: // %bb.0: 84; CHECK-NEXT: ptrue p0.d 85; CHECK-NEXT: fdiv z0.h, p0/m, z0.h, z1.h 86; CHECK-NEXT: ret 87 %res = fdiv <vscale x 2 x half> %a, %b 88 ret <vscale x 2 x half> %res 89} 90 91define <vscale x 4 x float> @fdiv_nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) { 92; CHECK-LABEL: fdiv_nxv4f32: 93; CHECK: // %bb.0: 94; CHECK-NEXT: ptrue p0.s 95; CHECK-NEXT: fdiv z0.s, p0/m, z0.s, z1.s 96; CHECK-NEXT: ret 97 %res = fdiv <vscale x 4 x float> %a, %b 98 ret <vscale x 4 x float> %res 99} 100 101define <vscale x 2 x float> @fdiv_nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b) { 102; CHECK-LABEL: fdiv_nxv2f32: 103; CHECK: // %bb.0: 104; CHECK-NEXT: ptrue p0.d 105; CHECK-NEXT: fdiv z0.s, p0/m, z0.s, z1.s 106; CHECK-NEXT: ret 107 %res = fdiv <vscale x 2 x float> %a, %b 108 ret <vscale x 2 x float> %res 109} 110 111define <vscale x 2 x double> @fdiv_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) { 112; CHECK-LABEL: fdiv_nxv2f64: 113; CHECK: // %bb.0: 114; CHECK-NEXT: ptrue p0.d 115; CHECK-NEXT: fdiv z0.d, p0/m, z0.d, z1.d 116; CHECK-NEXT: ret 117 %res = fdiv <vscale x 2 x double> %a, %b 118 ret <vscale x 2 x double> %res 119} 120 121define <vscale x 8 x half> @fsub_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) { 122; CHECK-LABEL: fsub_nxv8f16: 123; CHECK: // %bb.0: 124; CHECK-NEXT: fsub z0.h, z0.h, z1.h 125; CHECK-NEXT: ret 126 %res = fsub <vscale x 8 x half> %a, %b 127 ret <vscale x 8 x half> %res 128} 129 130define <vscale x 4 x half> @fsub_nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b) { 131; CHECK-LABEL: fsub_nxv4f16: 132; CHECK: // %bb.0: 133; CHECK-NEXT: ptrue p0.s 134; CHECK-NEXT: fsub z0.h, p0/m, z0.h, z1.h 135; CHECK-NEXT: ret 136 %res = fsub <vscale x 4 x half> %a, %b 137 ret <vscale x 4 x half> %res 138} 139 140define <vscale x 2 x half> @fsub_nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b) { 141; CHECK-LABEL: fsub_nxv2f16: 142; CHECK: // %bb.0: 143; CHECK-NEXT: ptrue p0.d 144; CHECK-NEXT: fsub z0.h, p0/m, z0.h, z1.h 145; CHECK-NEXT: ret 146 %res = fsub <vscale x 2 x half> %a, %b 147 ret <vscale x 2 x half> %res 148} 149 150define <vscale x 4 x float> @fsub_nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) { 151; CHECK-LABEL: fsub_nxv4f32: 152; CHECK: // %bb.0: 153; CHECK-NEXT: fsub z0.s, z0.s, z1.s 154; CHECK-NEXT: ret 155 %res = fsub <vscale x 4 x float> %a, %b 156 ret <vscale x 4 x float> %res 157} 158 159define <vscale x 2 x float> @fsub_nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b) { 160; CHECK-LABEL: fsub_nxv2f32: 161; CHECK: // %bb.0: 162; CHECK-NEXT: ptrue p0.d 163; CHECK-NEXT: fsub z0.s, p0/m, z0.s, z1.s 164; CHECK-NEXT: ret 165 %res = fsub <vscale x 2 x float> %a, %b 166 ret <vscale x 2 x float> %res 167} 168 169define <vscale x 2 x double> @fsub_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) { 170; CHECK-LABEL: fsub_nxv2f64: 171; CHECK: // %bb.0: 172; CHECK-NEXT: fsub z0.d, z0.d, z1.d 173; CHECK-NEXT: ret 174 %res = fsub <vscale x 2 x double> %a, %b 175 ret <vscale x 2 x double> %res 176} 177 178define <vscale x 8 x half> @fmul_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) { 179; CHECK-LABEL: fmul_nxv8f16: 180; CHECK: // %bb.0: 181; CHECK-NEXT: fmul z0.h, z0.h, z1.h 182; CHECK-NEXT: ret 183 %res = fmul <vscale x 8 x half> %a, %b 184 ret <vscale x 8 x half> %res 185} 186 187define <vscale x 4 x half> @fmul_nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b) { 188; CHECK-LABEL: fmul_nxv4f16: 189; CHECK: // %bb.0: 190; CHECK-NEXT: ptrue p0.s 191; CHECK-NEXT: fmul z0.h, p0/m, z0.h, z1.h 192; CHECK-NEXT: ret 193 %res = fmul <vscale x 4 x half> %a, %b 194 ret <vscale x 4 x half> %res 195} 196 197define <vscale x 2 x half> @fmul_nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b) { 198; CHECK-LABEL: fmul_nxv2f16: 199; CHECK: // %bb.0: 200; CHECK-NEXT: ptrue p0.d 201; CHECK-NEXT: fmul z0.h, p0/m, z0.h, z1.h 202; CHECK-NEXT: ret 203 %res = fmul <vscale x 2 x half> %a, %b 204 ret <vscale x 2 x half> %res 205} 206 207define <vscale x 4 x float> @fmul_nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) { 208; CHECK-LABEL: fmul_nxv4f32: 209; CHECK: // %bb.0: 210; CHECK-NEXT: fmul z0.s, z0.s, z1.s 211; CHECK-NEXT: ret 212 %res = fmul <vscale x 4 x float> %a, %b 213 ret <vscale x 4 x float> %res 214} 215 216define <vscale x 2 x float> @fmul_nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b) { 217; CHECK-LABEL: fmul_nxv2f32: 218; CHECK: // %bb.0: 219; CHECK-NEXT: ptrue p0.d 220; CHECK-NEXT: fmul z0.s, p0/m, z0.s, z1.s 221; CHECK-NEXT: ret 222 %res = fmul <vscale x 2 x float> %a, %b 223 ret <vscale x 2 x float> %res 224} 225 226define <vscale x 2 x double> @fmul_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) { 227; CHECK-LABEL: fmul_nxv2f64: 228; CHECK: // %bb.0: 229; CHECK-NEXT: fmul z0.d, z0.d, z1.d 230; CHECK-NEXT: ret 231 %res = fmul <vscale x 2 x double> %a, %b 232 ret <vscale x 2 x double> %res 233} 234 235define <vscale x 8 x half> @fma_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) { 236; CHECK-LABEL: fma_nxv8f16: 237; CHECK: // %bb.0: 238; CHECK-NEXT: ptrue p0.h 239; CHECK-NEXT: fmad z0.h, p0/m, z1.h, z2.h 240; CHECK-NEXT: ret 241 %r = call <vscale x 8 x half> @llvm.fma.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) 242 ret <vscale x 8 x half> %r 243} 244 245define <vscale x 4 x half> @fma_nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b, <vscale x 4 x half> %c) { 246; CHECK-LABEL: fma_nxv4f16: 247; CHECK: // %bb.0: 248; CHECK-NEXT: ptrue p0.s 249; CHECK-NEXT: fmad z0.h, p0/m, z1.h, z2.h 250; CHECK-NEXT: ret 251 %r = call <vscale x 4 x half> @llvm.fma.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b, <vscale x 4 x half> %c) 252 ret <vscale x 4 x half> %r 253} 254 255define <vscale x 2 x half> @fma_nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b, <vscale x 2 x half> %c) { 256; CHECK-LABEL: fma_nxv2f16: 257; CHECK: // %bb.0: 258; CHECK-NEXT: ptrue p0.d 259; CHECK-NEXT: fmad z0.h, p0/m, z1.h, z2.h 260; CHECK-NEXT: ret 261 %r = call <vscale x 2 x half> @llvm.fma.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b, <vscale x 2 x half> %c) 262 ret <vscale x 2 x half> %r 263} 264 265define <vscale x 4 x float> @fma_nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) { 266; CHECK-LABEL: fma_nxv4f32: 267; CHECK: // %bb.0: 268; CHECK-NEXT: ptrue p0.s 269; CHECK-NEXT: fmad z0.s, p0/m, z1.s, z2.s 270; CHECK-NEXT: ret 271 %r = call <vscale x 4 x float> @llvm.fma.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) 272 ret <vscale x 4 x float> %r 273} 274 275define <vscale x 2 x float> @fma_nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b, <vscale x 2 x float> %c) { 276; CHECK-LABEL: fma_nxv2f32: 277; CHECK: // %bb.0: 278; CHECK-NEXT: ptrue p0.d 279; CHECK-NEXT: fmad z0.s, p0/m, z1.s, z2.s 280; CHECK-NEXT: ret 281 %r = call <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b, <vscale x 2 x float> %c) 282 ret <vscale x 2 x float> %r 283} 284 285define <vscale x 2 x double> @fma_nxv2f64_1(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) { 286; CHECK-LABEL: fma_nxv2f64_1: 287; CHECK: // %bb.0: 288; CHECK-NEXT: ptrue p0.d 289; CHECK-NEXT: fmad z0.d, p0/m, z1.d, z2.d 290; CHECK-NEXT: ret 291 %r = call <vscale x 2 x double> @llvm.fma.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) 292 ret <vscale x 2 x double> %r 293} 294 295define <vscale x 2 x double> @fma_nxv2f64_2(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) { 296; CHECK-LABEL: fma_nxv2f64_2: 297; CHECK: // %bb.0: 298; CHECK-NEXT: ptrue p0.d 299; CHECK-NEXT: fmad z0.d, p0/m, z1.d, z2.d 300; CHECK-NEXT: ret 301 %r = call <vscale x 2 x double> @llvm.fma.nxv2f64(<vscale x 2 x double> %b, <vscale x 2 x double> %a, <vscale x 2 x double> %c) 302 ret <vscale x 2 x double> %r 303} 304 305define <vscale x 2 x double> @fma_nxv2f64_3(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) { 306; CHECK-LABEL: fma_nxv2f64_3: 307; CHECK: // %bb.0: 308; CHECK-NEXT: ptrue p0.d 309; CHECK-NEXT: fmla z0.d, p0/m, z2.d, z1.d 310; CHECK-NEXT: ret 311 %r = call <vscale x 2 x double> @llvm.fma.nxv2f64(<vscale x 2 x double> %c, <vscale x 2 x double> %b, <vscale x 2 x double> %a) 312 ret <vscale x 2 x double> %r 313} 314 315define <vscale x 8 x half> @fmls_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) { 316; CHECK-LABEL: fmls_nxv8f16: 317; CHECK: // %bb.0: 318; CHECK-NEXT: ptrue p0.h 319; CHECK-NEXT: fmls z0.h, p0/m, z1.h, z2.h 320; CHECK-NEXT: ret 321 %neg = fneg <vscale x 8 x half> %b 322 %r = call <vscale x 8 x half> @llvm.fma.nxv8f16(<vscale x 8 x half> %c, <vscale x 8 x half> %neg, <vscale x 8 x half> %a) 323 ret <vscale x 8 x half> %r 324} 325 326define <vscale x 4 x half> @fmls_nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b, <vscale x 4 x half> %c) { 327; CHECK-LABEL: fmls_nxv4f16: 328; CHECK: // %bb.0: 329; CHECK-NEXT: ptrue p0.s 330; CHECK-NEXT: fmls z0.h, p0/m, z1.h, z2.h 331; CHECK-NEXT: ret 332 %neg = fneg <vscale x 4 x half> %b 333 %r = call <vscale x 4 x half> @llvm.fma.nxv4f16(<vscale x 4 x half> %c, <vscale x 4 x half> %neg, <vscale x 4 x half> %a) 334 ret <vscale x 4 x half> %r 335} 336 337define <vscale x 2 x half> @fmls_nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b, <vscale x 2 x half> %c) { 338; CHECK-LABEL: fmls_nxv2f16: 339; CHECK: // %bb.0: 340; CHECK-NEXT: ptrue p0.d 341; CHECK-NEXT: fmls z0.h, p0/m, z1.h, z2.h 342; CHECK-NEXT: ret 343 %neg = fneg <vscale x 2 x half> %b 344 %r = call <vscale x 2 x half> @llvm.fma.nxv2f16(<vscale x 2 x half> %c, <vscale x 2 x half> %neg, <vscale x 2 x half> %a) 345 ret <vscale x 2 x half> %r 346} 347 348define <vscale x 4 x float> @fmls_nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) { 349; CHECK-LABEL: fmls_nxv4f32: 350; CHECK: // %bb.0: 351; CHECK-NEXT: ptrue p0.s 352; CHECK-NEXT: fmls z0.s, p0/m, z1.s, z2.s 353; CHECK-NEXT: ret 354 %neg = fneg <vscale x 4 x float> %b 355 %r = call <vscale x 4 x float> @llvm.fma.nxv4f32(<vscale x 4 x float> %c, <vscale x 4 x float> %neg, <vscale x 4 x float> %a) 356 ret <vscale x 4 x float> %r 357} 358 359define <vscale x 2 x float> @fmls_nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b, <vscale x 2 x float> %c) { 360; CHECK-LABEL: fmls_nxv2f32: 361; CHECK: // %bb.0: 362; CHECK-NEXT: ptrue p0.d 363; CHECK-NEXT: fmls z0.s, p0/m, z1.s, z2.s 364; CHECK-NEXT: ret 365 %neg = fneg <vscale x 2 x float> %b 366 %r = call <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float> %c, <vscale x 2 x float> %neg, <vscale x 2 x float> %a) 367 ret <vscale x 2 x float> %r 368} 369 370define <vscale x 2 x double> @fmls_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) { 371; CHECK-LABEL: fmls_nxv2f64: 372; CHECK: // %bb.0: 373; CHECK-NEXT: ptrue p0.d 374; CHECK-NEXT: fmls z0.d, p0/m, z1.d, z2.d 375; CHECK-NEXT: ret 376 %neg = fneg <vscale x 2 x double> %b 377 %r = call <vscale x 2 x double> @llvm.fma.nxv2f64(<vscale x 2 x double> %c, <vscale x 2 x double> %neg, <vscale x 2 x double> %a) 378 ret <vscale x 2 x double> %r 379} 380 381define <vscale x 8 x half> @fneg_nxv8f16(<vscale x 8 x half> %a) { 382; CHECK-LABEL: fneg_nxv8f16: 383; CHECK: // %bb.0: 384; CHECK-NEXT: ptrue p0.h 385; CHECK-NEXT: fneg z0.h, p0/m, z0.h 386; CHECK-NEXT: ret 387 %res = fneg <vscale x 8 x half> %a 388 ret <vscale x 8 x half> %res 389} 390 391define <vscale x 4 x half> @fneg_nxv4f16(<vscale x 4 x half> %a) { 392; CHECK-LABEL: fneg_nxv4f16: 393; CHECK: // %bb.0: 394; CHECK-NEXT: ptrue p0.s 395; CHECK-NEXT: fneg z0.h, p0/m, z0.h 396; CHECK-NEXT: ret 397 %res = fneg <vscale x 4 x half> %a 398 ret <vscale x 4 x half> %res 399} 400 401define <vscale x 2 x half> @fneg_nxv2f16(<vscale x 2 x half> %a) { 402; CHECK-LABEL: fneg_nxv2f16: 403; CHECK: // %bb.0: 404; CHECK-NEXT: ptrue p0.d 405; CHECK-NEXT: fneg z0.h, p0/m, z0.h 406; CHECK-NEXT: ret 407 %res = fneg <vscale x 2 x half> %a 408 ret <vscale x 2 x half> %res 409} 410 411define <vscale x 4 x float> @fneg_nxv4f32(<vscale x 4 x float> %a) { 412; CHECK-LABEL: fneg_nxv4f32: 413; CHECK: // %bb.0: 414; CHECK-NEXT: ptrue p0.s 415; CHECK-NEXT: fneg z0.s, p0/m, z0.s 416; CHECK-NEXT: ret 417 %res = fneg <vscale x 4 x float> %a 418 ret <vscale x 4 x float> %res 419} 420 421define <vscale x 2 x float> @fneg_nxv2f32(<vscale x 2 x float> %a) { 422; CHECK-LABEL: fneg_nxv2f32: 423; CHECK: // %bb.0: 424; CHECK-NEXT: ptrue p0.d 425; CHECK-NEXT: fneg z0.s, p0/m, z0.s 426; CHECK-NEXT: ret 427 %res = fneg <vscale x 2 x float> %a 428 ret <vscale x 2 x float> %res 429} 430 431define <vscale x 2 x double> @fneg_nxv2f64(<vscale x 2 x double> %a) { 432; CHECK-LABEL: fneg_nxv2f64: 433; CHECK: // %bb.0: 434; CHECK-NEXT: ptrue p0.d 435; CHECK-NEXT: fneg z0.d, p0/m, z0.d 436; CHECK-NEXT: ret 437 %res = fneg <vscale x 2 x double> %a 438 ret <vscale x 2 x double> %res 439} 440 441define <vscale x 8 x half> @frecps_h(<vscale x 8 x half> %a, <vscale x 8 x half> %b) { 442; CHECK-LABEL: frecps_h: 443; CHECK: // %bb.0: 444; CHECK-NEXT: frecps z0.h, z0.h, z1.h 445; CHECK-NEXT: ret 446 %res = call <vscale x 8 x half> @llvm.aarch64.sve.frecps.x.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) 447 ret <vscale x 8 x half> %res 448} 449 450define <vscale x 4 x float> @frecps_s(<vscale x 4 x float> %a, <vscale x 4 x float> %b) { 451; CHECK-LABEL: frecps_s: 452; CHECK: // %bb.0: 453; CHECK-NEXT: frecps z0.s, z0.s, z1.s 454; CHECK-NEXT: ret 455 %res = call <vscale x 4 x float> @llvm.aarch64.sve.frecps.x.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) 456 ret <vscale x 4 x float> %res 457} 458 459define <vscale x 2 x double> @frecps_d(<vscale x 2 x double> %a, <vscale x 2 x double> %b) { 460; CHECK-LABEL: frecps_d: 461; CHECK: // %bb.0: 462; CHECK-NEXT: frecps z0.d, z0.d, z1.d 463; CHECK-NEXT: ret 464 %res = call <vscale x 2 x double> @llvm.aarch64.sve.frecps.x.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) 465 ret <vscale x 2 x double> %res 466} 467 468define <vscale x 8 x half> @frsqrts_h(<vscale x 8 x half> %a, <vscale x 8 x half> %b) { 469; CHECK-LABEL: frsqrts_h: 470; CHECK: // %bb.0: 471; CHECK-NEXT: frsqrts z0.h, z0.h, z1.h 472; CHECK-NEXT: ret 473 %res = call <vscale x 8 x half> @llvm.aarch64.sve.frsqrts.x.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) 474 ret <vscale x 8 x half> %res 475} 476 477define <vscale x 4 x float> @frsqrts_s(<vscale x 4 x float> %a, <vscale x 4 x float> %b) { 478; CHECK-LABEL: frsqrts_s: 479; CHECK: // %bb.0: 480; CHECK-NEXT: frsqrts z0.s, z0.s, z1.s 481; CHECK-NEXT: ret 482 %res = call <vscale x 4 x float> @llvm.aarch64.sve.frsqrts.x.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) 483 ret <vscale x 4 x float> %res 484} 485 486define <vscale x 2 x double> @frsqrts_d(<vscale x 2 x double> %a, <vscale x 2 x double> %b) { 487; CHECK-LABEL: frsqrts_d: 488; CHECK: // %bb.0: 489; CHECK-NEXT: frsqrts z0.d, z0.d, z1.d 490; CHECK-NEXT: ret 491 %res = call <vscale x 2 x double> @llvm.aarch64.sve.frsqrts.x.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) 492 ret <vscale x 2 x double> %res 493} 494 495%complex = type { { double, double } } 496 497define void @scalar_to_vector(ptr %outval, <vscale x 2 x i1> %pred, <vscale x 2 x double> %in1, <vscale x 2 x double> %in2) { 498; CHECK-LABEL: scalar_to_vector: 499; CHECK: // %bb.0: 500; CHECK-NEXT: faddv d0, p0, z0.d 501; CHECK-NEXT: faddv d1, p0, z1.d 502; CHECK-NEXT: mov v0.d[1], v1.d[0] 503; CHECK-NEXT: str q0, [x0] 504; CHECK-NEXT: ret 505 %imagp = getelementptr inbounds %complex, ptr %outval, i64 0, i32 0, i32 1 506 %1 = call double @llvm.aarch64.sve.faddv.nxv2f64(<vscale x 2 x i1> %pred, <vscale x 2 x double> %in1) 507 %2 = call double @llvm.aarch64.sve.faddv.nxv2f64(<vscale x 2 x i1> %pred, <vscale x 2 x double> %in2) 508 store double %1, ptr %outval, align 8 509 store double %2, ptr %imagp, align 8 510 ret void 511} 512 513define void @float_copy(ptr %P1, ptr %P2) { 514; CHECK-LABEL: float_copy: 515; CHECK: // %bb.0: 516; CHECK-NEXT: ptrue p0.s 517; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 518; CHECK-NEXT: st1w { z0.s }, p0, [x1] 519; CHECK-NEXT: ret 520 %A = load <vscale x 4 x float>, ptr %P1, align 16 521 store <vscale x 4 x float> %A, ptr %P2, align 16 522 ret void 523} 524 525; FSQRT 526 527define <vscale x 8 x half> @fsqrt_nxv8f16(<vscale x 8 x half> %a) { 528; CHECK-LABEL: fsqrt_nxv8f16: 529; CHECK: // %bb.0: 530; CHECK-NEXT: ptrue p0.h 531; CHECK-NEXT: fsqrt z0.h, p0/m, z0.h 532; CHECK-NEXT: ret 533 %res = call <vscale x 8 x half> @llvm.sqrt.nxv8f16(<vscale x 8 x half> %a) 534 ret <vscale x 8 x half> %res 535} 536 537define <vscale x 4 x half> @fsqrt_nxv4f16(<vscale x 4 x half> %a) { 538; CHECK-LABEL: fsqrt_nxv4f16: 539; CHECK: // %bb.0: 540; CHECK-NEXT: ptrue p0.s 541; CHECK-NEXT: fsqrt z0.h, p0/m, z0.h 542; CHECK-NEXT: ret 543 %res = call <vscale x 4 x half> @llvm.sqrt.nxv4f16(<vscale x 4 x half> %a) 544 ret <vscale x 4 x half> %res 545} 546 547define <vscale x 2 x half> @fsqrt_nxv2f16(<vscale x 2 x half> %a) { 548; CHECK-LABEL: fsqrt_nxv2f16: 549; CHECK: // %bb.0: 550; CHECK-NEXT: ptrue p0.d 551; CHECK-NEXT: fsqrt z0.h, p0/m, z0.h 552; CHECK-NEXT: ret 553 %res = call <vscale x 2 x half> @llvm.sqrt.nxv2f16(<vscale x 2 x half> %a) 554 ret <vscale x 2 x half> %res 555} 556 557define <vscale x 4 x float> @fsqrt_nxv4f32(<vscale x 4 x float> %a) { 558; CHECK-LABEL: fsqrt_nxv4f32: 559; CHECK: // %bb.0: 560; CHECK-NEXT: ptrue p0.s 561; CHECK-NEXT: fsqrt z0.s, p0/m, z0.s 562; CHECK-NEXT: ret 563 %res = call <vscale x 4 x float> @llvm.sqrt.nxv4f32(<vscale x 4 x float> %a) 564 ret <vscale x 4 x float> %res 565} 566 567define <vscale x 2 x float> @fsqrt_nxv2f32(<vscale x 2 x float> %a) { 568; CHECK-LABEL: fsqrt_nxv2f32: 569; CHECK: // %bb.0: 570; CHECK-NEXT: ptrue p0.d 571; CHECK-NEXT: fsqrt z0.s, p0/m, z0.s 572; CHECK-NEXT: ret 573 %res = call <vscale x 2 x float> @llvm.sqrt.nxv2f32(<vscale x 2 x float> %a) 574 ret <vscale x 2 x float> %res 575} 576 577define <vscale x 2 x double> @fsqrt_nxv2f64(<vscale x 2 x double> %a) { 578; CHECK-LABEL: fsqrt_nxv2f64: 579; CHECK: // %bb.0: 580; CHECK-NEXT: ptrue p0.d 581; CHECK-NEXT: fsqrt z0.d, p0/m, z0.d 582; CHECK-NEXT: ret 583 %res = call <vscale x 2 x double> @llvm.sqrt.nxv2f64(<vscale x 2 x double> %a) 584 ret <vscale x 2 x double> %res 585} 586 587; FABS 588 589define <vscale x 8 x half> @fabs_nxv8f16(<vscale x 8 x half> %a) { 590; CHECK-LABEL: fabs_nxv8f16: 591; CHECK: // %bb.0: 592; CHECK-NEXT: ptrue p0.h 593; CHECK-NEXT: fabs z0.h, p0/m, z0.h 594; CHECK-NEXT: ret 595 %res = call <vscale x 8 x half> @llvm.fabs.nxv8f16(<vscale x 8 x half> %a) 596 ret <vscale x 8 x half> %res 597} 598 599define <vscale x 4 x half> @fabs_nxv4f16(<vscale x 4 x half> %a) { 600; CHECK-LABEL: fabs_nxv4f16: 601; CHECK: // %bb.0: 602; CHECK-NEXT: ptrue p0.s 603; CHECK-NEXT: fabs z0.h, p0/m, z0.h 604; CHECK-NEXT: ret 605 %res = call <vscale x 4 x half> @llvm.fabs.nxv4f16(<vscale x 4 x half> %a) 606 ret <vscale x 4 x half> %res 607} 608 609define <vscale x 2 x half> @fabs_nxv2f16(<vscale x 2 x half> %a) { 610; CHECK-LABEL: fabs_nxv2f16: 611; CHECK: // %bb.0: 612; CHECK-NEXT: ptrue p0.d 613; CHECK-NEXT: fabs z0.h, p0/m, z0.h 614; CHECK-NEXT: ret 615 %res = call <vscale x 2 x half> @llvm.fabs.nxv2f16(<vscale x 2 x half> %a) 616 ret <vscale x 2 x half> %res 617} 618 619define <vscale x 4 x float> @fabs_nxv4f32(<vscale x 4 x float> %a) { 620; CHECK-LABEL: fabs_nxv4f32: 621; CHECK: // %bb.0: 622; CHECK-NEXT: ptrue p0.s 623; CHECK-NEXT: fabs z0.s, p0/m, z0.s 624; CHECK-NEXT: ret 625 %res = call <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float> %a) 626 ret <vscale x 4 x float> %res 627} 628 629define <vscale x 2 x float> @fabs_nxv2f32(<vscale x 2 x float> %a) { 630; CHECK-LABEL: fabs_nxv2f32: 631; CHECK: // %bb.0: 632; CHECK-NEXT: ptrue p0.d 633; CHECK-NEXT: fabs z0.s, p0/m, z0.s 634; CHECK-NEXT: ret 635 %res = call <vscale x 2 x float> @llvm.fabs.nxv2f32(<vscale x 2 x float> %a) 636 ret <vscale x 2 x float> %res 637} 638 639define <vscale x 2 x double> @fabs_nxv2f64(<vscale x 2 x double> %a) { 640; CHECK-LABEL: fabs_nxv2f64: 641; CHECK: // %bb.0: 642; CHECK-NEXT: ptrue p0.d 643; CHECK-NEXT: fabs z0.d, p0/m, z0.d 644; CHECK-NEXT: ret 645 %res = call <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double> %a) 646 ret <vscale x 2 x double> %res 647} 648 649; FABD 650 651define <vscale x 8 x half> @fabd_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) { 652; CHECK-LABEL: fabd_nxv8f16: 653; CHECK: // %bb.0: 654; CHECK-NEXT: ptrue p0.h 655; CHECK-NEXT: fabd z0.h, p0/m, z0.h, z1.h 656; CHECK-NEXT: ret 657 %sub = fsub <vscale x 8 x half> %a, %b 658 %res = call <vscale x 8 x half> @llvm.fabs.nxv8f16(<vscale x 8 x half> %sub) 659 ret <vscale x 8 x half> %res 660} 661 662define <vscale x 4 x half> @fabd_nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b) { 663; CHECK-LABEL: fabd_nxv4f16: 664; CHECK: // %bb.0: 665; CHECK-NEXT: ptrue p0.s 666; CHECK-NEXT: fabd z0.h, p0/m, z0.h, z1.h 667; CHECK-NEXT: ret 668 %sub = fsub <vscale x 4 x half> %a, %b 669 %res = call <vscale x 4 x half> @llvm.fabs.nxv4f16(<vscale x 4 x half> %sub) 670 ret <vscale x 4 x half> %res 671} 672 673define <vscale x 2 x half> @fabd_nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b) { 674; CHECK-LABEL: fabd_nxv2f16: 675; CHECK: // %bb.0: 676; CHECK-NEXT: ptrue p0.d 677; CHECK-NEXT: fabd z0.h, p0/m, z0.h, z1.h 678; CHECK-NEXT: ret 679 %sub = fsub <vscale x 2 x half> %a, %b 680 %res = call <vscale x 2 x half> @llvm.fabs.nxv2f16(<vscale x 2 x half> %sub) 681 ret <vscale x 2 x half> %res 682} 683 684define <vscale x 4 x float> @fabd_nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) { 685; CHECK-LABEL: fabd_nxv4f32: 686; CHECK: // %bb.0: 687; CHECK-NEXT: ptrue p0.s 688; CHECK-NEXT: fabd z0.s, p0/m, z0.s, z1.s 689; CHECK-NEXT: ret 690 %sub = fsub <vscale x 4 x float> %a, %b 691 %res = call <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float> %sub) 692 ret <vscale x 4 x float> %res 693} 694 695define <vscale x 2 x float> @fabd_nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b) { 696; CHECK-LABEL: fabd_nxv2f32: 697; CHECK: // %bb.0: 698; CHECK-NEXT: ptrue p0.d 699; CHECK-NEXT: fabd z0.s, p0/m, z0.s, z1.s 700; CHECK-NEXT: ret 701 %sub = fsub <vscale x 2 x float> %a, %b 702 %res = call <vscale x 2 x float> @llvm.fabs.nxv2f32(<vscale x 2 x float> %sub) 703 ret <vscale x 2 x float> %res 704} 705 706define <vscale x 2 x double> @fabd_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) { 707; CHECK-LABEL: fabd_nxv2f64: 708; CHECK: // %bb.0: 709; CHECK-NEXT: ptrue p0.d 710; CHECK-NEXT: fabd z0.d, p0/m, z0.d, z1.d 711; CHECK-NEXT: ret 712 %sub = fsub <vscale x 2 x double> %a, %b 713 %res = call <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double> %sub) 714 ret <vscale x 2 x double> %res 715} 716 717; maxnum minnum 718 719define <vscale x 16 x half> @maxnum_nxv16f16(<vscale x 16 x half> %a, <vscale x 16 x half> %b) { 720; CHECK-LABEL: maxnum_nxv16f16: 721; CHECK: // %bb.0: 722; CHECK-NEXT: ptrue p0.h 723; CHECK-NEXT: fmaxnm z0.h, p0/m, z0.h, z2.h 724; CHECK-NEXT: fmaxnm z1.h, p0/m, z1.h, z3.h 725; CHECK-NEXT: ret 726 %res = call <vscale x 16 x half> @llvm.maxnum.nxv16f16(<vscale x 16 x half> %a, <vscale x 16 x half> %b) 727 ret <vscale x 16 x half> %res 728} 729 730define <vscale x 8 x half> @maxnum_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) { 731; CHECK-LABEL: maxnum_nxv8f16: 732; CHECK: // %bb.0: 733; CHECK-NEXT: ptrue p0.h 734; CHECK-NEXT: fmaxnm z0.h, p0/m, z0.h, z1.h 735; CHECK-NEXT: ret 736 %res = call <vscale x 8 x half> @llvm.maxnum.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) 737 ret <vscale x 8 x half> %res 738} 739 740define <vscale x 4 x half> @maxnum_nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b) { 741; CHECK-LABEL: maxnum_nxv4f16: 742; CHECK: // %bb.0: 743; CHECK-NEXT: ptrue p0.s 744; CHECK-NEXT: fmaxnm z0.h, p0/m, z0.h, z1.h 745; CHECK-NEXT: ret 746 %res = call <vscale x 4 x half> @llvm.maxnum.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b) 747 ret <vscale x 4 x half> %res 748} 749 750define <vscale x 2 x half> @maxnum_nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b) { 751; CHECK-LABEL: maxnum_nxv2f16: 752; CHECK: // %bb.0: 753; CHECK-NEXT: ptrue p0.d 754; CHECK-NEXT: fmaxnm z0.h, p0/m, z0.h, z1.h 755; CHECK-NEXT: ret 756 %res = call <vscale x 2 x half> @llvm.maxnum.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b) 757 ret <vscale x 2 x half> %res 758} 759 760define <vscale x 8 x float> @maxnum_nxv8f32(<vscale x 8 x float> %a, <vscale x 8 x float> %b) { 761; CHECK-LABEL: maxnum_nxv8f32: 762; CHECK: // %bb.0: 763; CHECK-NEXT: ptrue p0.s 764; CHECK-NEXT: fmaxnm z0.s, p0/m, z0.s, z2.s 765; CHECK-NEXT: fmaxnm z1.s, p0/m, z1.s, z3.s 766; CHECK-NEXT: ret 767 %res = call <vscale x 8 x float> @llvm.maxnum.nxv8f32(<vscale x 8 x float> %a, <vscale x 8 x float> %b) 768 ret <vscale x 8 x float> %res 769} 770 771define <vscale x 4 x float> @maxnum_nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) { 772; CHECK-LABEL: maxnum_nxv4f32: 773; CHECK: // %bb.0: 774; CHECK-NEXT: ptrue p0.s 775; CHECK-NEXT: fmaxnm z0.s, p0/m, z0.s, z1.s 776; CHECK-NEXT: ret 777 %res = call <vscale x 4 x float> @llvm.maxnum.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) 778 ret <vscale x 4 x float> %res 779} 780 781define <vscale x 2 x float> @maxnum_nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b) { 782; CHECK-LABEL: maxnum_nxv2f32: 783; CHECK: // %bb.0: 784; CHECK-NEXT: ptrue p0.d 785; CHECK-NEXT: fmaxnm z0.s, p0/m, z0.s, z1.s 786; CHECK-NEXT: ret 787 %res = call <vscale x 2 x float> @llvm.maxnum.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b) 788 ret <vscale x 2 x float> %res 789} 790 791define <vscale x 4 x double> @maxnum_nxv4f64(<vscale x 4 x double> %a, <vscale x 4 x double> %b) { 792; CHECK-LABEL: maxnum_nxv4f64: 793; CHECK: // %bb.0: 794; CHECK-NEXT: ptrue p0.d 795; CHECK-NEXT: fmaxnm z0.d, p0/m, z0.d, z2.d 796; CHECK-NEXT: fmaxnm z1.d, p0/m, z1.d, z3.d 797; CHECK-NEXT: ret 798 %res = call <vscale x 4 x double> @llvm.maxnum.nxv4f64(<vscale x 4 x double> %a, <vscale x 4 x double> %b) 799 ret <vscale x 4 x double> %res 800} 801 802define <vscale x 2 x double> @maxnum_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) { 803; CHECK-LABEL: maxnum_nxv2f64: 804; CHECK: // %bb.0: 805; CHECK-NEXT: ptrue p0.d 806; CHECK-NEXT: fmaxnm z0.d, p0/m, z0.d, z1.d 807; CHECK-NEXT: ret 808 %res = call <vscale x 2 x double> @llvm.maxnum.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) 809 ret <vscale x 2 x double> %res 810} 811 812define <vscale x 16 x half> @minnum_nxv16f16(<vscale x 16 x half> %a, <vscale x 16 x half> %b) { 813; CHECK-LABEL: minnum_nxv16f16: 814; CHECK: // %bb.0: 815; CHECK-NEXT: ptrue p0.h 816; CHECK-NEXT: fminnm z0.h, p0/m, z0.h, z2.h 817; CHECK-NEXT: fminnm z1.h, p0/m, z1.h, z3.h 818; CHECK-NEXT: ret 819 %res = call <vscale x 16 x half> @llvm.minnum.nxv16f16(<vscale x 16 x half> %a, <vscale x 16 x half> %b) 820 ret <vscale x 16 x half> %res 821} 822 823define <vscale x 8 x half> @minnum_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) { 824; CHECK-LABEL: minnum_nxv8f16: 825; CHECK: // %bb.0: 826; CHECK-NEXT: ptrue p0.h 827; CHECK-NEXT: fminnm z0.h, p0/m, z0.h, z1.h 828; CHECK-NEXT: ret 829 %res = call <vscale x 8 x half> @llvm.minnum.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) 830 ret <vscale x 8 x half> %res 831} 832 833define <vscale x 4 x half> @minnum_nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b) { 834; CHECK-LABEL: minnum_nxv4f16: 835; CHECK: // %bb.0: 836; CHECK-NEXT: ptrue p0.s 837; CHECK-NEXT: fminnm z0.h, p0/m, z0.h, z1.h 838; CHECK-NEXT: ret 839 %res = call <vscale x 4 x half> @llvm.minnum.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b) 840 ret <vscale x 4 x half> %res 841} 842 843define <vscale x 2 x half> @minnum_nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b) { 844; CHECK-LABEL: minnum_nxv2f16: 845; CHECK: // %bb.0: 846; CHECK-NEXT: ptrue p0.d 847; CHECK-NEXT: fminnm z0.h, p0/m, z0.h, z1.h 848; CHECK-NEXT: ret 849 %res = call <vscale x 2 x half> @llvm.minnum.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b) 850 ret <vscale x 2 x half> %res 851} 852 853define <vscale x 8 x float> @minnum_nxv8f32(<vscale x 8 x float> %a, <vscale x 8 x float> %b) { 854; CHECK-LABEL: minnum_nxv8f32: 855; CHECK: // %bb.0: 856; CHECK-NEXT: ptrue p0.s 857; CHECK-NEXT: fminnm z0.s, p0/m, z0.s, z2.s 858; CHECK-NEXT: fminnm z1.s, p0/m, z1.s, z3.s 859; CHECK-NEXT: ret 860 %res = call <vscale x 8 x float> @llvm.minnum.nxv8f32(<vscale x 8 x float> %a, <vscale x 8 x float> %b) 861 ret <vscale x 8 x float> %res 862} 863 864define <vscale x 4 x float> @minnum_nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) { 865; CHECK-LABEL: minnum_nxv4f32: 866; CHECK: // %bb.0: 867; CHECK-NEXT: ptrue p0.s 868; CHECK-NEXT: fminnm z0.s, p0/m, z0.s, z1.s 869; CHECK-NEXT: ret 870 %res = call <vscale x 4 x float> @llvm.minnum.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) 871 ret <vscale x 4 x float> %res 872} 873 874define <vscale x 2 x float> @minnum_nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b) { 875; CHECK-LABEL: minnum_nxv2f32: 876; CHECK: // %bb.0: 877; CHECK-NEXT: ptrue p0.d 878; CHECK-NEXT: fminnm z0.s, p0/m, z0.s, z1.s 879; CHECK-NEXT: ret 880 %res = call <vscale x 2 x float> @llvm.minnum.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b) 881 ret <vscale x 2 x float> %res 882} 883 884define <vscale x 4 x double> @minnum_nxv4f64(<vscale x 4 x double> %a, <vscale x 4 x double> %b) { 885; CHECK-LABEL: minnum_nxv4f64: 886; CHECK: // %bb.0: 887; CHECK-NEXT: ptrue p0.d 888; CHECK-NEXT: fminnm z0.d, p0/m, z0.d, z2.d 889; CHECK-NEXT: fminnm z1.d, p0/m, z1.d, z3.d 890; CHECK-NEXT: ret 891 %res = call <vscale x 4 x double> @llvm.minnum.nxv4f64(<vscale x 4 x double> %a, <vscale x 4 x double> %b) 892 ret <vscale x 4 x double> %res 893} 894 895define <vscale x 2 x double> @minnum_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) { 896; CHECK-LABEL: minnum_nxv2f64: 897; CHECK: // %bb.0: 898; CHECK-NEXT: ptrue p0.d 899; CHECK-NEXT: fminnm z0.d, p0/m, z0.d, z1.d 900; CHECK-NEXT: ret 901 %res = call <vscale x 2 x double> @llvm.minnum.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) 902 ret <vscale x 2 x double> %res 903} 904 905; maximum minimum 906 907define <vscale x 16 x half> @maximum_nxv16f16(<vscale x 16 x half> %a, <vscale x 16 x half> %b) { 908; CHECK-LABEL: maximum_nxv16f16: 909; CHECK: // %bb.0: 910; CHECK-NEXT: ptrue p0.h 911; CHECK-NEXT: fmax z0.h, p0/m, z0.h, z2.h 912; CHECK-NEXT: fmax z1.h, p0/m, z1.h, z3.h 913; CHECK-NEXT: ret 914 %res = call <vscale x 16 x half> @llvm.maximum.nxv16f16(<vscale x 16 x half> %a, <vscale x 16 x half> %b) 915 ret <vscale x 16 x half> %res 916} 917 918define <vscale x 8 x half> @maximum_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) { 919; CHECK-LABEL: maximum_nxv8f16: 920; CHECK: // %bb.0: 921; CHECK-NEXT: ptrue p0.h 922; CHECK-NEXT: fmax z0.h, p0/m, z0.h, z1.h 923; CHECK-NEXT: ret 924 %res = call <vscale x 8 x half> @llvm.maximum.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) 925 ret <vscale x 8 x half> %res 926} 927 928define <vscale x 4 x half> @maximum_nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b) { 929; CHECK-LABEL: maximum_nxv4f16: 930; CHECK: // %bb.0: 931; CHECK-NEXT: ptrue p0.s 932; CHECK-NEXT: fmax z0.h, p0/m, z0.h, z1.h 933; CHECK-NEXT: ret 934 %res = call <vscale x 4 x half> @llvm.maximum.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b) 935 ret <vscale x 4 x half> %res 936} 937 938define <vscale x 2 x half> @maximum_nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b) { 939; CHECK-LABEL: maximum_nxv2f16: 940; CHECK: // %bb.0: 941; CHECK-NEXT: ptrue p0.d 942; CHECK-NEXT: fmax z0.h, p0/m, z0.h, z1.h 943; CHECK-NEXT: ret 944 %res = call <vscale x 2 x half> @llvm.maximum.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b) 945 ret <vscale x 2 x half> %res 946} 947 948define <vscale x 8 x float> @maximum_nxv8f32(<vscale x 8 x float> %a, <vscale x 8 x float> %b) { 949; CHECK-LABEL: maximum_nxv8f32: 950; CHECK: // %bb.0: 951; CHECK-NEXT: ptrue p0.s 952; CHECK-NEXT: fmax z0.s, p0/m, z0.s, z2.s 953; CHECK-NEXT: fmax z1.s, p0/m, z1.s, z3.s 954; CHECK-NEXT: ret 955 %res = call <vscale x 8 x float> @llvm.maximum.nxv8f32(<vscale x 8 x float> %a, <vscale x 8 x float> %b) 956 ret <vscale x 8 x float> %res 957} 958 959define <vscale x 4 x float> @maximum_nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) { 960; CHECK-LABEL: maximum_nxv4f32: 961; CHECK: // %bb.0: 962; CHECK-NEXT: ptrue p0.s 963; CHECK-NEXT: fmax z0.s, p0/m, z0.s, z1.s 964; CHECK-NEXT: ret 965 %res = call <vscale x 4 x float> @llvm.maximum.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) 966 ret <vscale x 4 x float> %res 967} 968 969define <vscale x 2 x float> @maximum_nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b) { 970; CHECK-LABEL: maximum_nxv2f32: 971; CHECK: // %bb.0: 972; CHECK-NEXT: ptrue p0.d 973; CHECK-NEXT: fmax z0.s, p0/m, z0.s, z1.s 974; CHECK-NEXT: ret 975 %res = call <vscale x 2 x float> @llvm.maximum.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b) 976 ret <vscale x 2 x float> %res 977} 978 979define <vscale x 4 x double> @maximum_nxv4f64(<vscale x 4 x double> %a, <vscale x 4 x double> %b) { 980; CHECK-LABEL: maximum_nxv4f64: 981; CHECK: // %bb.0: 982; CHECK-NEXT: ptrue p0.d 983; CHECK-NEXT: fmax z0.d, p0/m, z0.d, z2.d 984; CHECK-NEXT: fmax z1.d, p0/m, z1.d, z3.d 985; CHECK-NEXT: ret 986 %res = call <vscale x 4 x double> @llvm.maximum.nxv4f64(<vscale x 4 x double> %a, <vscale x 4 x double> %b) 987 ret <vscale x 4 x double> %res 988} 989 990define <vscale x 2 x double> @maximum_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) { 991; CHECK-LABEL: maximum_nxv2f64: 992; CHECK: // %bb.0: 993; CHECK-NEXT: ptrue p0.d 994; CHECK-NEXT: fmax z0.d, p0/m, z0.d, z1.d 995; CHECK-NEXT: ret 996 %res = call <vscale x 2 x double> @llvm.maximum.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) 997 ret <vscale x 2 x double> %res 998} 999 1000define <vscale x 16 x half> @minimum_nxv16f16(<vscale x 16 x half> %a, <vscale x 16 x half> %b) { 1001; CHECK-LABEL: minimum_nxv16f16: 1002; CHECK: // %bb.0: 1003; CHECK-NEXT: ptrue p0.h 1004; CHECK-NEXT: fmin z0.h, p0/m, z0.h, z2.h 1005; CHECK-NEXT: fmin z1.h, p0/m, z1.h, z3.h 1006; CHECK-NEXT: ret 1007 %res = call <vscale x 16 x half> @llvm.minimum.nxv16f16(<vscale x 16 x half> %a, <vscale x 16 x half> %b) 1008 ret <vscale x 16 x half> %res 1009} 1010 1011define <vscale x 8 x half> @minimum_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) { 1012; CHECK-LABEL: minimum_nxv8f16: 1013; CHECK: // %bb.0: 1014; CHECK-NEXT: ptrue p0.h 1015; CHECK-NEXT: fmin z0.h, p0/m, z0.h, z1.h 1016; CHECK-NEXT: ret 1017 %res = call <vscale x 8 x half> @llvm.minimum.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) 1018 ret <vscale x 8 x half> %res 1019} 1020 1021define <vscale x 4 x half> @minimum_nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b) { 1022; CHECK-LABEL: minimum_nxv4f16: 1023; CHECK: // %bb.0: 1024; CHECK-NEXT: ptrue p0.s 1025; CHECK-NEXT: fmin z0.h, p0/m, z0.h, z1.h 1026; CHECK-NEXT: ret 1027 %res = call <vscale x 4 x half> @llvm.minimum.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b) 1028 ret <vscale x 4 x half> %res 1029} 1030 1031define <vscale x 2 x half> @minimum_nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b) { 1032; CHECK-LABEL: minimum_nxv2f16: 1033; CHECK: // %bb.0: 1034; CHECK-NEXT: ptrue p0.d 1035; CHECK-NEXT: fmin z0.h, p0/m, z0.h, z1.h 1036; CHECK-NEXT: ret 1037 %res = call <vscale x 2 x half> @llvm.minimum.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b) 1038 ret <vscale x 2 x half> %res 1039} 1040 1041define <vscale x 8 x float> @minimum_nxv8f32(<vscale x 8 x float> %a, <vscale x 8 x float> %b) { 1042; CHECK-LABEL: minimum_nxv8f32: 1043; CHECK: // %bb.0: 1044; CHECK-NEXT: ptrue p0.s 1045; CHECK-NEXT: fmin z0.s, p0/m, z0.s, z2.s 1046; CHECK-NEXT: fmin z1.s, p0/m, z1.s, z3.s 1047; CHECK-NEXT: ret 1048 %res = call <vscale x 8 x float> @llvm.minimum.nxv8f32(<vscale x 8 x float> %a, <vscale x 8 x float> %b) 1049 ret <vscale x 8 x float> %res 1050} 1051 1052define <vscale x 4 x float> @minimum_nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) { 1053; CHECK-LABEL: minimum_nxv4f32: 1054; CHECK: // %bb.0: 1055; CHECK-NEXT: ptrue p0.s 1056; CHECK-NEXT: fmin z0.s, p0/m, z0.s, z1.s 1057; CHECK-NEXT: ret 1058 %res = call <vscale x 4 x float> @llvm.minimum.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) 1059 ret <vscale x 4 x float> %res 1060} 1061 1062define <vscale x 2 x float> @minimum_nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b) { 1063; CHECK-LABEL: minimum_nxv2f32: 1064; CHECK: // %bb.0: 1065; CHECK-NEXT: ptrue p0.d 1066; CHECK-NEXT: fmin z0.s, p0/m, z0.s, z1.s 1067; CHECK-NEXT: ret 1068 %res = call <vscale x 2 x float> @llvm.minimum.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b) 1069 ret <vscale x 2 x float> %res 1070} 1071 1072define <vscale x 4 x double> @minimum_nxv4f64(<vscale x 4 x double> %a, <vscale x 4 x double> %b) { 1073; CHECK-LABEL: minimum_nxv4f64: 1074; CHECK: // %bb.0: 1075; CHECK-NEXT: ptrue p0.d 1076; CHECK-NEXT: fmin z0.d, p0/m, z0.d, z2.d 1077; CHECK-NEXT: fmin z1.d, p0/m, z1.d, z3.d 1078; CHECK-NEXT: ret 1079 %res = call <vscale x 4 x double> @llvm.minimum.nxv4f64(<vscale x 4 x double> %a, <vscale x 4 x double> %b) 1080 ret <vscale x 4 x double> %res 1081} 1082 1083define <vscale x 2 x double> @minimum_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) { 1084; CHECK-LABEL: minimum_nxv2f64: 1085; CHECK: // %bb.0: 1086; CHECK-NEXT: ptrue p0.d 1087; CHECK-NEXT: fmin z0.d, p0/m, z0.d, z1.d 1088; CHECK-NEXT: ret 1089 %res = call <vscale x 2 x double> @llvm.minimum.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) 1090 ret <vscale x 2 x double> %res 1091} 1092 1093declare <vscale x 8 x half> @llvm.aarch64.sve.frecps.x.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>) 1094declare <vscale x 4 x float> @llvm.aarch64.sve.frecps.x.nxv4f32(<vscale x 4 x float> , <vscale x 4 x float>) 1095declare <vscale x 2 x double> @llvm.aarch64.sve.frecps.x.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>) 1096 1097declare <vscale x 8 x half> @llvm.aarch64.sve.frsqrts.x.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>) 1098declare <vscale x 4 x float> @llvm.aarch64.sve.frsqrts.x.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>) 1099declare <vscale x 2 x double> @llvm.aarch64.sve.frsqrts.x.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>) 1100 1101declare <vscale x 2 x double> @llvm.fma.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>) 1102declare <vscale x 4 x float> @llvm.fma.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>) 1103declare <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, <vscale x 2 x float>) 1104declare <vscale x 8 x half> @llvm.fma.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>) 1105declare <vscale x 4 x half> @llvm.fma.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>, <vscale x 4 x half>) 1106declare <vscale x 2 x half> @llvm.fma.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>, <vscale x 2 x half>) 1107 1108declare <vscale x 8 x half> @llvm.sqrt.nxv8f16( <vscale x 8 x half>) 1109declare <vscale x 4 x half> @llvm.sqrt.nxv4f16( <vscale x 4 x half>) 1110declare <vscale x 2 x half> @llvm.sqrt.nxv2f16( <vscale x 2 x half>) 1111declare <vscale x 4 x float> @llvm.sqrt.nxv4f32(<vscale x 4 x float>) 1112declare <vscale x 2 x float> @llvm.sqrt.nxv2f32(<vscale x 2 x float>) 1113declare <vscale x 2 x double> @llvm.sqrt.nxv2f64(<vscale x 2 x double>) 1114 1115declare <vscale x 8 x half> @llvm.fabs.nxv8f16( <vscale x 8 x half>) 1116declare <vscale x 4 x half> @llvm.fabs.nxv4f16( <vscale x 4 x half>) 1117declare <vscale x 2 x half> @llvm.fabs.nxv2f16( <vscale x 2 x half>) 1118declare <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float>) 1119declare <vscale x 2 x float> @llvm.fabs.nxv2f32(<vscale x 2 x float>) 1120declare <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double>) 1121 1122declare <vscale x 16 x half> @llvm.maxnum.nxv16f16(<vscale x 16 x half>, <vscale x 16 x half>) 1123declare <vscale x 8 x half> @llvm.maxnum.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>) 1124declare <vscale x 4 x half> @llvm.maxnum.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>) 1125declare <vscale x 2 x half> @llvm.maxnum.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>) 1126declare <vscale x 8 x float> @llvm.maxnum.nxv8f32(<vscale x 8 x float>, <vscale x 8 x float>) 1127declare <vscale x 4 x float> @llvm.maxnum.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>) 1128declare <vscale x 2 x float> @llvm.maxnum.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>) 1129declare <vscale x 4 x double> @llvm.maxnum.nxv4f64(<vscale x 4 x double>, <vscale x 4 x double>) 1130declare <vscale x 2 x double> @llvm.maxnum.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>) 1131declare <vscale x 16 x half> @llvm.minnum.nxv16f16(<vscale x 16 x half>, <vscale x 16 x half>) 1132declare <vscale x 8 x half> @llvm.minnum.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>) 1133declare <vscale x 4 x half> @llvm.minnum.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>) 1134declare <vscale x 2 x half> @llvm.minnum.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>) 1135declare <vscale x 8 x float> @llvm.minnum.nxv8f32(<vscale x 8 x float>, <vscale x 8 x float>) 1136declare <vscale x 4 x float> @llvm.minnum.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>) 1137declare <vscale x 2 x float> @llvm.minnum.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>) 1138declare <vscale x 4 x double> @llvm.minnum.nxv4f64(<vscale x 4 x double>, <vscale x 4 x double>) 1139declare <vscale x 2 x double> @llvm.minnum.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>) 1140 1141declare <vscale x 16 x half> @llvm.maximum.nxv16f16(<vscale x 16 x half>, <vscale x 16 x half>) 1142declare <vscale x 8 x half> @llvm.maximum.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>) 1143declare <vscale x 4 x half> @llvm.maximum.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>) 1144declare <vscale x 2 x half> @llvm.maximum.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>) 1145declare <vscale x 8 x float> @llvm.maximum.nxv8f32(<vscale x 8 x float>, <vscale x 8 x float>) 1146declare <vscale x 4 x float> @llvm.maximum.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>) 1147declare <vscale x 2 x float> @llvm.maximum.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>) 1148declare <vscale x 4 x double> @llvm.maximum.nxv4f64(<vscale x 4 x double>, <vscale x 4 x double>) 1149declare <vscale x 2 x double> @llvm.maximum.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>) 1150declare <vscale x 16 x half> @llvm.minimum.nxv16f16(<vscale x 16 x half>, <vscale x 16 x half>) 1151declare <vscale x 8 x half> @llvm.minimum.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>) 1152declare <vscale x 4 x half> @llvm.minimum.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>) 1153declare <vscale x 2 x half> @llvm.minimum.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>) 1154declare <vscale x 8 x float> @llvm.minimum.nxv8f32(<vscale x 8 x float>, <vscale x 8 x float>) 1155declare <vscale x 4 x float> @llvm.minimum.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>) 1156declare <vscale x 2 x float> @llvm.minimum.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>) 1157declare <vscale x 4 x double> @llvm.minimum.nxv4f64(<vscale x 4 x double>, <vscale x 4 x double>) 1158declare <vscale x 2 x double> @llvm.minimum.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>) 1159 1160; Function Attrs: nounwind readnone 1161declare double @llvm.aarch64.sve.faddv.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>) #2 1162