1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zvfh,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH 3; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfh,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH 4; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN 5; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN 6 7 8define void @fadd_v8bf16(ptr %x, ptr %y) { 9; CHECK-LABEL: fadd_v8bf16: 10; CHECK: # %bb.0: 11; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 12; CHECK-NEXT: vle16.v v8, (a1) 13; CHECK-NEXT: vle16.v v9, (a0) 14; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 15; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 16; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 17; CHECK-NEXT: vfadd.vv v8, v12, v10 18; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 19; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8 20; CHECK-NEXT: vse16.v v10, (a0) 21; CHECK-NEXT: ret 22 %a = load <8 x bfloat>, ptr %x 23 %b = load <8 x bfloat>, ptr %y 24 %c = fadd <8 x bfloat> %a, %b 25 store <8 x bfloat> %c, ptr %x 26 ret void 27} 28 29define void @fadd_v6bf16(ptr %x, ptr %y) { 30; CHECK-LABEL: fadd_v6bf16: 31; CHECK: # %bb.0: 32; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma 33; CHECK-NEXT: vle16.v v8, (a1) 34; CHECK-NEXT: vle16.v v9, (a0) 35; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 36; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 37; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 38; CHECK-NEXT: vfadd.vv v8, v12, v10 39; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 40; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8 41; CHECK-NEXT: vse16.v v10, (a0) 42; CHECK-NEXT: ret 43 %a = load <6 x bfloat>, ptr %x 44 %b = load <6 x bfloat>, ptr %y 45 %c = fadd <6 x bfloat> %a, %b 46 store <6 x bfloat> %c, ptr %x 47 ret void 48} 49 50define void @fadd_v8f16(ptr %x, ptr %y) { 51; ZVFH-LABEL: fadd_v8f16: 52; ZVFH: # %bb.0: 53; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma 54; ZVFH-NEXT: vle16.v v8, (a0) 55; ZVFH-NEXT: vle16.v v9, (a1) 56; ZVFH-NEXT: vfadd.vv v8, v8, v9 57; ZVFH-NEXT: vse16.v v8, (a0) 58; ZVFH-NEXT: ret 59; 60; ZVFHMIN-LABEL: fadd_v8f16: 61; ZVFHMIN: # %bb.0: 62; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma 63; ZVFHMIN-NEXT: vle16.v v8, (a1) 64; ZVFHMIN-NEXT: vle16.v v9, (a0) 65; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 66; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 67; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 68; ZVFHMIN-NEXT: vfadd.vv v8, v12, v10 69; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 70; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 71; ZVFHMIN-NEXT: vse16.v v10, (a0) 72; ZVFHMIN-NEXT: ret 73 %a = load <8 x half>, ptr %x 74 %b = load <8 x half>, ptr %y 75 %c = fadd <8 x half> %a, %b 76 store <8 x half> %c, ptr %x 77 ret void 78} 79 80define void @fadd_v6f16(ptr %x, ptr %y) { 81; ZVFH-LABEL: fadd_v6f16: 82; ZVFH: # %bb.0: 83; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma 84; ZVFH-NEXT: vle16.v v8, (a0) 85; ZVFH-NEXT: vle16.v v9, (a1) 86; ZVFH-NEXT: vfadd.vv v8, v8, v9 87; ZVFH-NEXT: vse16.v v8, (a0) 88; ZVFH-NEXT: ret 89; 90; ZVFHMIN-LABEL: fadd_v6f16: 91; ZVFHMIN: # %bb.0: 92; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma 93; ZVFHMIN-NEXT: vle16.v v8, (a1) 94; ZVFHMIN-NEXT: vle16.v v9, (a0) 95; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 96; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 97; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 98; ZVFHMIN-NEXT: vfadd.vv v8, v12, v10 99; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 100; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 101; ZVFHMIN-NEXT: vse16.v v10, (a0) 102; ZVFHMIN-NEXT: ret 103 %a = load <6 x half>, ptr %x 104 %b = load <6 x half>, ptr %y 105 %c = fadd <6 x half> %a, %b 106 store <6 x half> %c, ptr %x 107 ret void 108} 109 110define void @fadd_v4f32(ptr %x, ptr %y) { 111; CHECK-LABEL: fadd_v4f32: 112; CHECK: # %bb.0: 113; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 114; CHECK-NEXT: vle32.v v8, (a0) 115; CHECK-NEXT: vle32.v v9, (a1) 116; CHECK-NEXT: vfadd.vv v8, v8, v9 117; CHECK-NEXT: vse32.v v8, (a0) 118; CHECK-NEXT: ret 119 %a = load <4 x float>, ptr %x 120 %b = load <4 x float>, ptr %y 121 %c = fadd <4 x float> %a, %b 122 store <4 x float> %c, ptr %x 123 ret void 124} 125 126define void @fadd_v2f64(ptr %x, ptr %y) { 127; CHECK-LABEL: fadd_v2f64: 128; CHECK: # %bb.0: 129; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 130; CHECK-NEXT: vle64.v v8, (a0) 131; CHECK-NEXT: vle64.v v9, (a1) 132; CHECK-NEXT: vfadd.vv v8, v8, v9 133; CHECK-NEXT: vse64.v v8, (a0) 134; CHECK-NEXT: ret 135 %a = load <2 x double>, ptr %x 136 %b = load <2 x double>, ptr %y 137 %c = fadd <2 x double> %a, %b 138 store <2 x double> %c, ptr %x 139 ret void 140} 141 142define void @fsub_v8bf16(ptr %x, ptr %y) { 143; CHECK-LABEL: fsub_v8bf16: 144; CHECK: # %bb.0: 145; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 146; CHECK-NEXT: vle16.v v8, (a1) 147; CHECK-NEXT: vle16.v v9, (a0) 148; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 149; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 150; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 151; CHECK-NEXT: vfsub.vv v8, v12, v10 152; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 153; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8 154; CHECK-NEXT: vse16.v v10, (a0) 155; CHECK-NEXT: ret 156 %a = load <8 x bfloat>, ptr %x 157 %b = load <8 x bfloat>, ptr %y 158 %c = fsub <8 x bfloat> %a, %b 159 store <8 x bfloat> %c, ptr %x 160 ret void 161} 162 163define void @fsub_v6bf16(ptr %x, ptr %y) { 164; CHECK-LABEL: fsub_v6bf16: 165; CHECK: # %bb.0: 166; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma 167; CHECK-NEXT: vle16.v v8, (a1) 168; CHECK-NEXT: vle16.v v9, (a0) 169; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 170; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 171; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 172; CHECK-NEXT: vfsub.vv v8, v12, v10 173; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 174; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8 175; CHECK-NEXT: vse16.v v10, (a0) 176; CHECK-NEXT: ret 177 %a = load <6 x bfloat>, ptr %x 178 %b = load <6 x bfloat>, ptr %y 179 %c = fsub <6 x bfloat> %a, %b 180 store <6 x bfloat> %c, ptr %x 181 ret void 182} 183 184define void @fsub_v8f16(ptr %x, ptr %y) { 185; ZVFH-LABEL: fsub_v8f16: 186; ZVFH: # %bb.0: 187; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma 188; ZVFH-NEXT: vle16.v v8, (a0) 189; ZVFH-NEXT: vle16.v v9, (a1) 190; ZVFH-NEXT: vfsub.vv v8, v8, v9 191; ZVFH-NEXT: vse16.v v8, (a0) 192; ZVFH-NEXT: ret 193; 194; ZVFHMIN-LABEL: fsub_v8f16: 195; ZVFHMIN: # %bb.0: 196; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma 197; ZVFHMIN-NEXT: vle16.v v8, (a1) 198; ZVFHMIN-NEXT: vle16.v v9, (a0) 199; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 200; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 201; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 202; ZVFHMIN-NEXT: vfsub.vv v8, v12, v10 203; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 204; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 205; ZVFHMIN-NEXT: vse16.v v10, (a0) 206; ZVFHMIN-NEXT: ret 207 %a = load <8 x half>, ptr %x 208 %b = load <8 x half>, ptr %y 209 %c = fsub <8 x half> %a, %b 210 store <8 x half> %c, ptr %x 211 ret void 212} 213 214define void @fsub_v6f16(ptr %x, ptr %y) { 215; ZVFH-LABEL: fsub_v6f16: 216; ZVFH: # %bb.0: 217; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma 218; ZVFH-NEXT: vle16.v v8, (a0) 219; ZVFH-NEXT: vle16.v v9, (a1) 220; ZVFH-NEXT: vfsub.vv v8, v8, v9 221; ZVFH-NEXT: vse16.v v8, (a0) 222; ZVFH-NEXT: ret 223; 224; ZVFHMIN-LABEL: fsub_v6f16: 225; ZVFHMIN: # %bb.0: 226; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma 227; ZVFHMIN-NEXT: vle16.v v8, (a1) 228; ZVFHMIN-NEXT: vle16.v v9, (a0) 229; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 230; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 231; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 232; ZVFHMIN-NEXT: vfsub.vv v8, v12, v10 233; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 234; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 235; ZVFHMIN-NEXT: vse16.v v10, (a0) 236; ZVFHMIN-NEXT: ret 237 %a = load <6 x half>, ptr %x 238 %b = load <6 x half>, ptr %y 239 %c = fsub <6 x half> %a, %b 240 store <6 x half> %c, ptr %x 241 ret void 242} 243 244define void @fsub_v4f32(ptr %x, ptr %y) { 245; CHECK-LABEL: fsub_v4f32: 246; CHECK: # %bb.0: 247; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 248; CHECK-NEXT: vle32.v v8, (a0) 249; CHECK-NEXT: vle32.v v9, (a1) 250; CHECK-NEXT: vfsub.vv v8, v8, v9 251; CHECK-NEXT: vse32.v v8, (a0) 252; CHECK-NEXT: ret 253 %a = load <4 x float>, ptr %x 254 %b = load <4 x float>, ptr %y 255 %c = fsub <4 x float> %a, %b 256 store <4 x float> %c, ptr %x 257 ret void 258} 259 260define void @fsub_v2f64(ptr %x, ptr %y) { 261; CHECK-LABEL: fsub_v2f64: 262; CHECK: # %bb.0: 263; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 264; CHECK-NEXT: vle64.v v8, (a0) 265; CHECK-NEXT: vle64.v v9, (a1) 266; CHECK-NEXT: vfsub.vv v8, v8, v9 267; CHECK-NEXT: vse64.v v8, (a0) 268; CHECK-NEXT: ret 269 %a = load <2 x double>, ptr %x 270 %b = load <2 x double>, ptr %y 271 %c = fsub <2 x double> %a, %b 272 store <2 x double> %c, ptr %x 273 ret void 274} 275 276define void @fmul_v8bf16(ptr %x, ptr %y) { 277; CHECK-LABEL: fmul_v8bf16: 278; CHECK: # %bb.0: 279; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 280; CHECK-NEXT: vle16.v v8, (a1) 281; CHECK-NEXT: vle16.v v9, (a0) 282; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 283; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 284; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 285; CHECK-NEXT: vfmul.vv v8, v12, v10 286; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 287; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8 288; CHECK-NEXT: vse16.v v10, (a0) 289; CHECK-NEXT: ret 290 %a = load <8 x bfloat>, ptr %x 291 %b = load <8 x bfloat>, ptr %y 292 %c = fmul <8 x bfloat> %a, %b 293 store <8 x bfloat> %c, ptr %x 294 ret void 295} 296 297define void @fmul_v6bf16(ptr %x, ptr %y) { 298; CHECK-LABEL: fmul_v6bf16: 299; CHECK: # %bb.0: 300; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma 301; CHECK-NEXT: vle16.v v8, (a1) 302; CHECK-NEXT: vle16.v v9, (a0) 303; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 304; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 305; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 306; CHECK-NEXT: vfmul.vv v8, v12, v10 307; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 308; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8 309; CHECK-NEXT: vse16.v v10, (a0) 310; CHECK-NEXT: ret 311 %a = load <6 x bfloat>, ptr %x 312 %b = load <6 x bfloat>, ptr %y 313 %c = fmul <6 x bfloat> %a, %b 314 store <6 x bfloat> %c, ptr %x 315 ret void 316} 317 318define void @fmul_v8f16(ptr %x, ptr %y) { 319; ZVFH-LABEL: fmul_v8f16: 320; ZVFH: # %bb.0: 321; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma 322; ZVFH-NEXT: vle16.v v8, (a0) 323; ZVFH-NEXT: vle16.v v9, (a1) 324; ZVFH-NEXT: vfmul.vv v8, v8, v9 325; ZVFH-NEXT: vse16.v v8, (a0) 326; ZVFH-NEXT: ret 327; 328; ZVFHMIN-LABEL: fmul_v8f16: 329; ZVFHMIN: # %bb.0: 330; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma 331; ZVFHMIN-NEXT: vle16.v v8, (a1) 332; ZVFHMIN-NEXT: vle16.v v9, (a0) 333; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 334; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 335; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 336; ZVFHMIN-NEXT: vfmul.vv v8, v12, v10 337; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 338; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 339; ZVFHMIN-NEXT: vse16.v v10, (a0) 340; ZVFHMIN-NEXT: ret 341 %a = load <8 x half>, ptr %x 342 %b = load <8 x half>, ptr %y 343 %c = fmul <8 x half> %a, %b 344 store <8 x half> %c, ptr %x 345 ret void 346} 347 348define void @fmul_v6f16(ptr %x, ptr %y) { 349; ZVFH-LABEL: fmul_v6f16: 350; ZVFH: # %bb.0: 351; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma 352; ZVFH-NEXT: vle16.v v8, (a0) 353; ZVFH-NEXT: vle16.v v9, (a1) 354; ZVFH-NEXT: vfmul.vv v8, v8, v9 355; ZVFH-NEXT: vse16.v v8, (a0) 356; ZVFH-NEXT: ret 357; 358; ZVFHMIN-LABEL: fmul_v6f16: 359; ZVFHMIN: # %bb.0: 360; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma 361; ZVFHMIN-NEXT: vle16.v v8, (a1) 362; ZVFHMIN-NEXT: vle16.v v9, (a0) 363; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 364; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 365; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 366; ZVFHMIN-NEXT: vfmul.vv v8, v12, v10 367; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 368; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 369; ZVFHMIN-NEXT: vse16.v v10, (a0) 370; ZVFHMIN-NEXT: ret 371 %a = load <6 x half>, ptr %x 372 %b = load <6 x half>, ptr %y 373 %c = fmul <6 x half> %a, %b 374 store <6 x half> %c, ptr %x 375 ret void 376} 377 378define void @fmul_v4f32(ptr %x, ptr %y) { 379; CHECK-LABEL: fmul_v4f32: 380; CHECK: # %bb.0: 381; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 382; CHECK-NEXT: vle32.v v8, (a0) 383; CHECK-NEXT: vle32.v v9, (a1) 384; CHECK-NEXT: vfmul.vv v8, v8, v9 385; CHECK-NEXT: vse32.v v8, (a0) 386; CHECK-NEXT: ret 387 %a = load <4 x float>, ptr %x 388 %b = load <4 x float>, ptr %y 389 %c = fmul <4 x float> %a, %b 390 store <4 x float> %c, ptr %x 391 ret void 392} 393 394define void @fmul_v2f64(ptr %x, ptr %y) { 395; CHECK-LABEL: fmul_v2f64: 396; CHECK: # %bb.0: 397; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 398; CHECK-NEXT: vle64.v v8, (a0) 399; CHECK-NEXT: vle64.v v9, (a1) 400; CHECK-NEXT: vfmul.vv v8, v8, v9 401; CHECK-NEXT: vse64.v v8, (a0) 402; CHECK-NEXT: ret 403 %a = load <2 x double>, ptr %x 404 %b = load <2 x double>, ptr %y 405 %c = fmul <2 x double> %a, %b 406 store <2 x double> %c, ptr %x 407 ret void 408} 409 410define void @fdiv_v8bf16(ptr %x, ptr %y) { 411; CHECK-LABEL: fdiv_v8bf16: 412; CHECK: # %bb.0: 413; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 414; CHECK-NEXT: vle16.v v8, (a1) 415; CHECK-NEXT: vle16.v v9, (a0) 416; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 417; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 418; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 419; CHECK-NEXT: vfdiv.vv v8, v12, v10 420; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 421; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8 422; CHECK-NEXT: vse16.v v10, (a0) 423; CHECK-NEXT: ret 424 %a = load <8 x bfloat>, ptr %x 425 %b = load <8 x bfloat>, ptr %y 426 %c = fdiv <8 x bfloat> %a, %b 427 store <8 x bfloat> %c, ptr %x 428 ret void 429} 430 431define void @fdiv_v6bf16(ptr %x, ptr %y) { 432; CHECK-LABEL: fdiv_v6bf16: 433; CHECK: # %bb.0: 434; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma 435; CHECK-NEXT: vle16.v v8, (a1) 436; CHECK-NEXT: vle16.v v9, (a0) 437; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 438; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 439; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 440; CHECK-NEXT: vfdiv.vv v8, v12, v10 441; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 442; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8 443; CHECK-NEXT: vse16.v v10, (a0) 444; CHECK-NEXT: ret 445 %a = load <6 x bfloat>, ptr %x 446 %b = load <6 x bfloat>, ptr %y 447 %c = fdiv <6 x bfloat> %a, %b 448 store <6 x bfloat> %c, ptr %x 449 ret void 450} 451 452define void @fdiv_v8f16(ptr %x, ptr %y) { 453; ZVFH-LABEL: fdiv_v8f16: 454; ZVFH: # %bb.0: 455; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma 456; ZVFH-NEXT: vle16.v v8, (a0) 457; ZVFH-NEXT: vle16.v v9, (a1) 458; ZVFH-NEXT: vfdiv.vv v8, v8, v9 459; ZVFH-NEXT: vse16.v v8, (a0) 460; ZVFH-NEXT: ret 461; 462; ZVFHMIN-LABEL: fdiv_v8f16: 463; ZVFHMIN: # %bb.0: 464; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma 465; ZVFHMIN-NEXT: vle16.v v8, (a1) 466; ZVFHMIN-NEXT: vle16.v v9, (a0) 467; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 468; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 469; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 470; ZVFHMIN-NEXT: vfdiv.vv v8, v12, v10 471; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 472; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 473; ZVFHMIN-NEXT: vse16.v v10, (a0) 474; ZVFHMIN-NEXT: ret 475 %a = load <8 x half>, ptr %x 476 %b = load <8 x half>, ptr %y 477 %c = fdiv <8 x half> %a, %b 478 store <8 x half> %c, ptr %x 479 ret void 480} 481 482define void @fdiv_v6f16(ptr %x, ptr %y) { 483; ZVFH-LABEL: fdiv_v6f16: 484; ZVFH: # %bb.0: 485; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma 486; ZVFH-NEXT: vle16.v v8, (a0) 487; ZVFH-NEXT: vle16.v v9, (a1) 488; ZVFH-NEXT: vfdiv.vv v8, v8, v9 489; ZVFH-NEXT: vse16.v v8, (a0) 490; ZVFH-NEXT: ret 491; 492; ZVFHMIN-LABEL: fdiv_v6f16: 493; ZVFHMIN: # %bb.0: 494; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma 495; ZVFHMIN-NEXT: vle16.v v8, (a1) 496; ZVFHMIN-NEXT: vle16.v v9, (a0) 497; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 498; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 499; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 500; ZVFHMIN-NEXT: vfdiv.vv v8, v12, v10 501; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 502; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 503; ZVFHMIN-NEXT: vse16.v v10, (a0) 504; ZVFHMIN-NEXT: ret 505 %a = load <6 x half>, ptr %x 506 %b = load <6 x half>, ptr %y 507 %c = fdiv <6 x half> %a, %b 508 store <6 x half> %c, ptr %x 509 ret void 510} 511 512define void @fdiv_v4f32(ptr %x, ptr %y) { 513; CHECK-LABEL: fdiv_v4f32: 514; CHECK: # %bb.0: 515; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 516; CHECK-NEXT: vle32.v v8, (a0) 517; CHECK-NEXT: vle32.v v9, (a1) 518; CHECK-NEXT: vfdiv.vv v8, v8, v9 519; CHECK-NEXT: vse32.v v8, (a0) 520; CHECK-NEXT: ret 521 %a = load <4 x float>, ptr %x 522 %b = load <4 x float>, ptr %y 523 %c = fdiv <4 x float> %a, %b 524 store <4 x float> %c, ptr %x 525 ret void 526} 527 528define void @fdiv_v2f64(ptr %x, ptr %y) { 529; CHECK-LABEL: fdiv_v2f64: 530; CHECK: # %bb.0: 531; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 532; CHECK-NEXT: vle64.v v8, (a0) 533; CHECK-NEXT: vle64.v v9, (a1) 534; CHECK-NEXT: vfdiv.vv v8, v8, v9 535; CHECK-NEXT: vse64.v v8, (a0) 536; CHECK-NEXT: ret 537 %a = load <2 x double>, ptr %x 538 %b = load <2 x double>, ptr %y 539 %c = fdiv <2 x double> %a, %b 540 store <2 x double> %c, ptr %x 541 ret void 542} 543 544define void @fneg_v8bf16(ptr %x) { 545; CHECK-LABEL: fneg_v8bf16: 546; CHECK: # %bb.0: 547; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 548; CHECK-NEXT: vle16.v v8, (a0) 549; CHECK-NEXT: lui a1, 8 550; CHECK-NEXT: vxor.vx v8, v8, a1 551; CHECK-NEXT: vse16.v v8, (a0) 552; CHECK-NEXT: ret 553 %a = load <8 x bfloat>, ptr %x 554 %b = fneg <8 x bfloat> %a 555 store <8 x bfloat> %b, ptr %x 556 ret void 557} 558 559define void @fneg_v6bf16(ptr %x) { 560; CHECK-LABEL: fneg_v6bf16: 561; CHECK: # %bb.0: 562; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma 563; CHECK-NEXT: vle16.v v8, (a0) 564; CHECK-NEXT: lui a1, 8 565; CHECK-NEXT: vxor.vx v8, v8, a1 566; CHECK-NEXT: vse16.v v8, (a0) 567; CHECK-NEXT: ret 568 %a = load <6 x bfloat>, ptr %x 569 %b = fneg <6 x bfloat> %a 570 store <6 x bfloat> %b, ptr %x 571 ret void 572} 573 574define void @fneg_v8f16(ptr %x) { 575; ZVFH-LABEL: fneg_v8f16: 576; ZVFH: # %bb.0: 577; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma 578; ZVFH-NEXT: vle16.v v8, (a0) 579; ZVFH-NEXT: vfneg.v v8, v8 580; ZVFH-NEXT: vse16.v v8, (a0) 581; ZVFH-NEXT: ret 582; 583; ZVFHMIN-LABEL: fneg_v8f16: 584; ZVFHMIN: # %bb.0: 585; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma 586; ZVFHMIN-NEXT: vle16.v v8, (a0) 587; ZVFHMIN-NEXT: lui a1, 8 588; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 589; ZVFHMIN-NEXT: vse16.v v8, (a0) 590; ZVFHMIN-NEXT: ret 591 %a = load <8 x half>, ptr %x 592 %b = fneg <8 x half> %a 593 store <8 x half> %b, ptr %x 594 ret void 595} 596 597define void @fneg_v6f16(ptr %x) { 598; ZVFH-LABEL: fneg_v6f16: 599; ZVFH: # %bb.0: 600; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma 601; ZVFH-NEXT: vle16.v v8, (a0) 602; ZVFH-NEXT: vfneg.v v8, v8 603; ZVFH-NEXT: vse16.v v8, (a0) 604; ZVFH-NEXT: ret 605; 606; ZVFHMIN-LABEL: fneg_v6f16: 607; ZVFHMIN: # %bb.0: 608; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma 609; ZVFHMIN-NEXT: vle16.v v8, (a0) 610; ZVFHMIN-NEXT: lui a1, 8 611; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 612; ZVFHMIN-NEXT: vse16.v v8, (a0) 613; ZVFHMIN-NEXT: ret 614 %a = load <6 x half>, ptr %x 615 %b = fneg <6 x half> %a 616 store <6 x half> %b, ptr %x 617 ret void 618} 619 620define void @fneg_v4f32(ptr %x) { 621; CHECK-LABEL: fneg_v4f32: 622; CHECK: # %bb.0: 623; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 624; CHECK-NEXT: vle32.v v8, (a0) 625; CHECK-NEXT: vfneg.v v8, v8 626; CHECK-NEXT: vse32.v v8, (a0) 627; CHECK-NEXT: ret 628 %a = load <4 x float>, ptr %x 629 %b = fneg <4 x float> %a 630 store <4 x float> %b, ptr %x 631 ret void 632} 633 634define void @fneg_v2f64(ptr %x) { 635; CHECK-LABEL: fneg_v2f64: 636; CHECK: # %bb.0: 637; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 638; CHECK-NEXT: vle64.v v8, (a0) 639; CHECK-NEXT: vfneg.v v8, v8 640; CHECK-NEXT: vse64.v v8, (a0) 641; CHECK-NEXT: ret 642 %a = load <2 x double>, ptr %x 643 %b = fneg <2 x double> %a 644 store <2 x double> %b, ptr %x 645 ret void 646} 647 648define void @fabs_v8bf16(ptr %x) { 649; CHECK-LABEL: fabs_v8bf16: 650; CHECK: # %bb.0: 651; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 652; CHECK-NEXT: vle16.v v8, (a0) 653; CHECK-NEXT: lui a1, 8 654; CHECK-NEXT: addi a1, a1, -1 655; CHECK-NEXT: vand.vx v8, v8, a1 656; CHECK-NEXT: vse16.v v8, (a0) 657; CHECK-NEXT: ret 658 %a = load <8 x bfloat>, ptr %x 659 %b = call <8 x bfloat> @llvm.fabs.v8bf16(<8 x bfloat> %a) 660 store <8 x bfloat> %b, ptr %x 661 ret void 662} 663 664define void @fabs_v6bf16(ptr %x) { 665; CHECK-LABEL: fabs_v6bf16: 666; CHECK: # %bb.0: 667; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma 668; CHECK-NEXT: vle16.v v8, (a0) 669; CHECK-NEXT: lui a1, 8 670; CHECK-NEXT: addi a1, a1, -1 671; CHECK-NEXT: vand.vx v8, v8, a1 672; CHECK-NEXT: vse16.v v8, (a0) 673; CHECK-NEXT: ret 674 %a = load <6 x bfloat>, ptr %x 675 %b = call <6 x bfloat> @llvm.fabs.v6bf16(<6 x bfloat> %a) 676 store <6 x bfloat> %b, ptr %x 677 ret void 678} 679 680define void @fabs_v8f16(ptr %x) { 681; ZVFH-LABEL: fabs_v8f16: 682; ZVFH: # %bb.0: 683; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma 684; ZVFH-NEXT: vle16.v v8, (a0) 685; ZVFH-NEXT: vfabs.v v8, v8 686; ZVFH-NEXT: vse16.v v8, (a0) 687; ZVFH-NEXT: ret 688; 689; ZVFHMIN-LABEL: fabs_v8f16: 690; ZVFHMIN: # %bb.0: 691; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma 692; ZVFHMIN-NEXT: vle16.v v8, (a0) 693; ZVFHMIN-NEXT: lui a1, 8 694; ZVFHMIN-NEXT: addi a1, a1, -1 695; ZVFHMIN-NEXT: vand.vx v8, v8, a1 696; ZVFHMIN-NEXT: vse16.v v8, (a0) 697; ZVFHMIN-NEXT: ret 698 %a = load <8 x half>, ptr %x 699 %b = call <8 x half> @llvm.fabs.v8f16(<8 x half> %a) 700 store <8 x half> %b, ptr %x 701 ret void 702} 703 704define void @fabs_v6f16(ptr %x) { 705; ZVFH-LABEL: fabs_v6f16: 706; ZVFH: # %bb.0: 707; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma 708; ZVFH-NEXT: vle16.v v8, (a0) 709; ZVFH-NEXT: vfabs.v v8, v8 710; ZVFH-NEXT: vse16.v v8, (a0) 711; ZVFH-NEXT: ret 712; 713; ZVFHMIN-LABEL: fabs_v6f16: 714; ZVFHMIN: # %bb.0: 715; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma 716; ZVFHMIN-NEXT: vle16.v v8, (a0) 717; ZVFHMIN-NEXT: lui a1, 8 718; ZVFHMIN-NEXT: addi a1, a1, -1 719; ZVFHMIN-NEXT: vand.vx v8, v8, a1 720; ZVFHMIN-NEXT: vse16.v v8, (a0) 721; ZVFHMIN-NEXT: ret 722 %a = load <6 x half>, ptr %x 723 %b = call <6 x half> @llvm.fabs.v6f16(<6 x half> %a) 724 store <6 x half> %b, ptr %x 725 ret void 726} 727 728define void @fabs_v4f32(ptr %x) { 729; CHECK-LABEL: fabs_v4f32: 730; CHECK: # %bb.0: 731; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 732; CHECK-NEXT: vle32.v v8, (a0) 733; CHECK-NEXT: vfabs.v v8, v8 734; CHECK-NEXT: vse32.v v8, (a0) 735; CHECK-NEXT: ret 736 %a = load <4 x float>, ptr %x 737 %b = call <4 x float> @llvm.fabs.v4f32(<4 x float> %a) 738 store <4 x float> %b, ptr %x 739 ret void 740} 741 742define void @fabs_v2f64(ptr %x) { 743; CHECK-LABEL: fabs_v2f64: 744; CHECK: # %bb.0: 745; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 746; CHECK-NEXT: vle64.v v8, (a0) 747; CHECK-NEXT: vfabs.v v8, v8 748; CHECK-NEXT: vse64.v v8, (a0) 749; CHECK-NEXT: ret 750 %a = load <2 x double>, ptr %x 751 %b = call <2 x double> @llvm.fabs.v2f64(<2 x double> %a) 752 store <2 x double> %b, ptr %x 753 ret void 754} 755 756define void @copysign_v8bf16(ptr %x, ptr %y) { 757; CHECK-LABEL: copysign_v8bf16: 758; CHECK: # %bb.0: 759; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 760; CHECK-NEXT: vle16.v v8, (a1) 761; CHECK-NEXT: vle16.v v9, (a0) 762; CHECK-NEXT: lui a1, 8 763; CHECK-NEXT: vand.vx v8, v8, a1 764; CHECK-NEXT: addi a1, a1, -1 765; CHECK-NEXT: vand.vx v9, v9, a1 766; CHECK-NEXT: vor.vv v8, v9, v8 767; CHECK-NEXT: vse16.v v8, (a0) 768; CHECK-NEXT: ret 769 %a = load <8 x bfloat>, ptr %x 770 %b = load <8 x bfloat>, ptr %y 771 %c = call <8 x bfloat> @llvm.copysign.v8bf16(<8 x bfloat> %a, <8 x bfloat> %b) 772 store <8 x bfloat> %c, ptr %x 773 ret void 774} 775 776define void @copysign_v6bf16(ptr %x, ptr %y) { 777; CHECK-LABEL: copysign_v6bf16: 778; CHECK: # %bb.0: 779; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma 780; CHECK-NEXT: vle16.v v8, (a1) 781; CHECK-NEXT: vle16.v v9, (a0) 782; CHECK-NEXT: lui a1, 8 783; CHECK-NEXT: vand.vx v8, v8, a1 784; CHECK-NEXT: addi a1, a1, -1 785; CHECK-NEXT: vand.vx v9, v9, a1 786; CHECK-NEXT: vor.vv v8, v9, v8 787; CHECK-NEXT: vse16.v v8, (a0) 788; CHECK-NEXT: ret 789 %a = load <6 x bfloat>, ptr %x 790 %b = load <6 x bfloat>, ptr %y 791 %c = call <6 x bfloat> @llvm.copysign.v6bf16(<6 x bfloat> %a, <6 x bfloat> %b) 792 store <6 x bfloat> %c, ptr %x 793 ret void 794} 795 796define void @copysign_v8f16(ptr %x, ptr %y) { 797; ZVFH-LABEL: copysign_v8f16: 798; ZVFH: # %bb.0: 799; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma 800; ZVFH-NEXT: vle16.v v8, (a0) 801; ZVFH-NEXT: vle16.v v9, (a1) 802; ZVFH-NEXT: vfsgnj.vv v8, v8, v9 803; ZVFH-NEXT: vse16.v v8, (a0) 804; ZVFH-NEXT: ret 805; 806; ZVFHMIN-LABEL: copysign_v8f16: 807; ZVFHMIN: # %bb.0: 808; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma 809; ZVFHMIN-NEXT: vle16.v v8, (a1) 810; ZVFHMIN-NEXT: vle16.v v9, (a0) 811; ZVFHMIN-NEXT: lui a1, 8 812; ZVFHMIN-NEXT: vand.vx v8, v8, a1 813; ZVFHMIN-NEXT: addi a1, a1, -1 814; ZVFHMIN-NEXT: vand.vx v9, v9, a1 815; ZVFHMIN-NEXT: vor.vv v8, v9, v8 816; ZVFHMIN-NEXT: vse16.v v8, (a0) 817; ZVFHMIN-NEXT: ret 818 %a = load <8 x half>, ptr %x 819 %b = load <8 x half>, ptr %y 820 %c = call <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %b) 821 store <8 x half> %c, ptr %x 822 ret void 823} 824 825define void @copysign_v6f16(ptr %x, ptr %y) { 826; ZVFH-LABEL: copysign_v6f16: 827; ZVFH: # %bb.0: 828; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma 829; ZVFH-NEXT: vle16.v v8, (a0) 830; ZVFH-NEXT: vle16.v v9, (a1) 831; ZVFH-NEXT: vfsgnj.vv v8, v8, v9 832; ZVFH-NEXT: vse16.v v8, (a0) 833; ZVFH-NEXT: ret 834; 835; ZVFHMIN-LABEL: copysign_v6f16: 836; ZVFHMIN: # %bb.0: 837; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma 838; ZVFHMIN-NEXT: vle16.v v8, (a1) 839; ZVFHMIN-NEXT: vle16.v v9, (a0) 840; ZVFHMIN-NEXT: lui a1, 8 841; ZVFHMIN-NEXT: vand.vx v8, v8, a1 842; ZVFHMIN-NEXT: addi a1, a1, -1 843; ZVFHMIN-NEXT: vand.vx v9, v9, a1 844; ZVFHMIN-NEXT: vor.vv v8, v9, v8 845; ZVFHMIN-NEXT: vse16.v v8, (a0) 846; ZVFHMIN-NEXT: ret 847 %a = load <6 x half>, ptr %x 848 %b = load <6 x half>, ptr %y 849 %c = call <6 x half> @llvm.copysign.v6f16(<6 x half> %a, <6 x half> %b) 850 store <6 x half> %c, ptr %x 851 ret void 852} 853 854define void @copysign_v4f32(ptr %x, ptr %y) { 855; CHECK-LABEL: copysign_v4f32: 856; CHECK: # %bb.0: 857; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 858; CHECK-NEXT: vle32.v v8, (a0) 859; CHECK-NEXT: vle32.v v9, (a1) 860; CHECK-NEXT: vfsgnj.vv v8, v8, v9 861; CHECK-NEXT: vse32.v v8, (a0) 862; CHECK-NEXT: ret 863 %a = load <4 x float>, ptr %x 864 %b = load <4 x float>, ptr %y 865 %c = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %b) 866 store <4 x float> %c, ptr %x 867 ret void 868} 869 870define void @copysign_v2f64(ptr %x, ptr %y) { 871; CHECK-LABEL: copysign_v2f64: 872; CHECK: # %bb.0: 873; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 874; CHECK-NEXT: vle64.v v8, (a0) 875; CHECK-NEXT: vle64.v v9, (a1) 876; CHECK-NEXT: vfsgnj.vv v8, v8, v9 877; CHECK-NEXT: vse64.v v8, (a0) 878; CHECK-NEXT: ret 879 %a = load <2 x double>, ptr %x 880 %b = load <2 x double>, ptr %y 881 %c = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %b) 882 store <2 x double> %c, ptr %x 883 ret void 884} 885 886define void @copysign_vf_v8bf16(ptr %x, bfloat %y) { 887; CHECK-LABEL: copysign_vf_v8bf16: 888; CHECK: # %bb.0: 889; CHECK-NEXT: fmv.x.w a1, fa0 890; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 891; CHECK-NEXT: vle16.v v8, (a0) 892; CHECK-NEXT: lui a2, 8 893; CHECK-NEXT: vmv.v.x v9, a1 894; CHECK-NEXT: addi a1, a2, -1 895; CHECK-NEXT: vand.vx v8, v8, a1 896; CHECK-NEXT: vand.vx v9, v9, a2 897; CHECK-NEXT: vor.vv v8, v8, v9 898; CHECK-NEXT: vse16.v v8, (a0) 899; CHECK-NEXT: ret 900 %a = load <8 x bfloat>, ptr %x 901 %b = insertelement <8 x bfloat> poison, bfloat %y, i32 0 902 %c = shufflevector <8 x bfloat> %b, <8 x bfloat> poison, <8 x i32> zeroinitializer 903 %d = call <8 x bfloat> @llvm.copysign.v8bf16(<8 x bfloat> %a, <8 x bfloat> %c) 904 store <8 x bfloat> %d, ptr %x 905 ret void 906} 907 908define void @copysign_vf_v6bf16(ptr %x, bfloat %y) { 909; CHECK-LABEL: copysign_vf_v6bf16: 910; CHECK: # %bb.0: 911; CHECK-NEXT: fmv.x.w a1, fa0 912; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma 913; CHECK-NEXT: vle16.v v8, (a0) 914; CHECK-NEXT: lui a2, 8 915; CHECK-NEXT: vmv.v.x v9, a1 916; CHECK-NEXT: addi a1, a2, -1 917; CHECK-NEXT: vand.vx v8, v8, a1 918; CHECK-NEXT: vand.vx v9, v9, a2 919; CHECK-NEXT: vor.vv v8, v8, v9 920; CHECK-NEXT: vse16.v v8, (a0) 921; CHECK-NEXT: ret 922 %a = load <6 x bfloat>, ptr %x 923 %b = insertelement <6 x bfloat> poison, bfloat %y, i32 0 924 %c = shufflevector <6 x bfloat> %b, <6 x bfloat> poison, <6 x i32> zeroinitializer 925 %d = call <6 x bfloat> @llvm.copysign.v6bf16(<6 x bfloat> %a, <6 x bfloat> %c) 926 store <6 x bfloat> %d, ptr %x 927 ret void 928} 929 930define void @copysign_vf_v8f16(ptr %x, half %y) { 931; ZVFH-LABEL: copysign_vf_v8f16: 932; ZVFH: # %bb.0: 933; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma 934; ZVFH-NEXT: vle16.v v8, (a0) 935; ZVFH-NEXT: vfsgnj.vf v8, v8, fa0 936; ZVFH-NEXT: vse16.v v8, (a0) 937; ZVFH-NEXT: ret 938; 939; ZVFHMIN-LABEL: copysign_vf_v8f16: 940; ZVFHMIN: # %bb.0: 941; ZVFHMIN-NEXT: fmv.x.w a1, fa0 942; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma 943; ZVFHMIN-NEXT: vle16.v v8, (a0) 944; ZVFHMIN-NEXT: lui a2, 8 945; ZVFHMIN-NEXT: vmv.v.x v9, a1 946; ZVFHMIN-NEXT: addi a1, a2, -1 947; ZVFHMIN-NEXT: vand.vx v8, v8, a1 948; ZVFHMIN-NEXT: vand.vx v9, v9, a2 949; ZVFHMIN-NEXT: vor.vv v8, v8, v9 950; ZVFHMIN-NEXT: vse16.v v8, (a0) 951; ZVFHMIN-NEXT: ret 952 %a = load <8 x half>, ptr %x 953 %b = insertelement <8 x half> poison, half %y, i32 0 954 %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer 955 %d = call <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %c) 956 store <8 x half> %d, ptr %x 957 ret void 958} 959 960define void @copysign_vf_v6f16(ptr %x, half %y) { 961; ZVFH-LABEL: copysign_vf_v6f16: 962; ZVFH: # %bb.0: 963; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma 964; ZVFH-NEXT: vle16.v v8, (a0) 965; ZVFH-NEXT: vfsgnj.vf v8, v8, fa0 966; ZVFH-NEXT: vse16.v v8, (a0) 967; ZVFH-NEXT: ret 968; 969; ZVFHMIN-LABEL: copysign_vf_v6f16: 970; ZVFHMIN: # %bb.0: 971; ZVFHMIN-NEXT: fmv.x.w a1, fa0 972; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma 973; ZVFHMIN-NEXT: vle16.v v8, (a0) 974; ZVFHMIN-NEXT: lui a2, 8 975; ZVFHMIN-NEXT: vmv.v.x v9, a1 976; ZVFHMIN-NEXT: addi a1, a2, -1 977; ZVFHMIN-NEXT: vand.vx v8, v8, a1 978; ZVFHMIN-NEXT: vand.vx v9, v9, a2 979; ZVFHMIN-NEXT: vor.vv v8, v8, v9 980; ZVFHMIN-NEXT: vse16.v v8, (a0) 981; ZVFHMIN-NEXT: ret 982 %a = load <6 x half>, ptr %x 983 %b = insertelement <6 x half> poison, half %y, i32 0 984 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer 985 %d = call <6 x half> @llvm.copysign.v6f16(<6 x half> %a, <6 x half> %c) 986 store <6 x half> %d, ptr %x 987 ret void 988} 989 990define void @copysign_vf_v4f32(ptr %x, float %y) { 991; CHECK-LABEL: copysign_vf_v4f32: 992; CHECK: # %bb.0: 993; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 994; CHECK-NEXT: vle32.v v8, (a0) 995; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 996; CHECK-NEXT: vse32.v v8, (a0) 997; CHECK-NEXT: ret 998 %a = load <4 x float>, ptr %x 999 %b = insertelement <4 x float> poison, float %y, i32 0 1000 %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer 1001 %d = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %c) 1002 store <4 x float> %d, ptr %x 1003 ret void 1004} 1005 1006define void @copysign_vf_v2f64(ptr %x, double %y) { 1007; CHECK-LABEL: copysign_vf_v2f64: 1008; CHECK: # %bb.0: 1009; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 1010; CHECK-NEXT: vle64.v v8, (a0) 1011; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 1012; CHECK-NEXT: vse64.v v8, (a0) 1013; CHECK-NEXT: ret 1014 %a = load <2 x double>, ptr %x 1015 %b = insertelement <2 x double> poison, double %y, i32 0 1016 %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer 1017 %d = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %c) 1018 store <2 x double> %d, ptr %x 1019 ret void 1020} 1021 1022define void @copysign_neg_v8bf16(ptr %x, ptr %y) { 1023; CHECK-LABEL: copysign_neg_v8bf16: 1024; CHECK: # %bb.0: 1025; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 1026; CHECK-NEXT: vle16.v v8, (a1) 1027; CHECK-NEXT: vle16.v v9, (a0) 1028; CHECK-NEXT: lui a1, 8 1029; CHECK-NEXT: addi a2, a1, -1 1030; CHECK-NEXT: vxor.vx v8, v8, a1 1031; CHECK-NEXT: vand.vx v9, v9, a2 1032; CHECK-NEXT: vand.vx v8, v8, a1 1033; CHECK-NEXT: vor.vv v8, v9, v8 1034; CHECK-NEXT: vse16.v v8, (a0) 1035; CHECK-NEXT: ret 1036 %a = load <8 x bfloat>, ptr %x 1037 %b = load <8 x bfloat>, ptr %y 1038 %c = fneg <8 x bfloat> %b 1039 %d = call <8 x bfloat> @llvm.copysign.v8bf16(<8 x bfloat> %a, <8 x bfloat> %c) 1040 store <8 x bfloat> %d, ptr %x 1041 ret void 1042} 1043 1044define void @copysign_neg_v6bf16(ptr %x, ptr %y) { 1045; CHECK-LABEL: copysign_neg_v6bf16: 1046; CHECK: # %bb.0: 1047; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma 1048; CHECK-NEXT: vle16.v v8, (a1) 1049; CHECK-NEXT: vle16.v v9, (a0) 1050; CHECK-NEXT: lui a1, 8 1051; CHECK-NEXT: addi a2, a1, -1 1052; CHECK-NEXT: vxor.vx v8, v8, a1 1053; CHECK-NEXT: vand.vx v9, v9, a2 1054; CHECK-NEXT: vand.vx v8, v8, a1 1055; CHECK-NEXT: vor.vv v8, v9, v8 1056; CHECK-NEXT: vse16.v v8, (a0) 1057; CHECK-NEXT: ret 1058 %a = load <6 x bfloat>, ptr %x 1059 %b = load <6 x bfloat>, ptr %y 1060 %c = fneg <6 x bfloat> %b 1061 %d = call <6 x bfloat> @llvm.copysign.v6bf16(<6 x bfloat> %a, <6 x bfloat> %c) 1062 store <6 x bfloat> %d, ptr %x 1063 ret void 1064} 1065 1066define void @copysign_neg_v8f16(ptr %x, ptr %y) { 1067; ZVFH-LABEL: copysign_neg_v8f16: 1068; ZVFH: # %bb.0: 1069; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma 1070; ZVFH-NEXT: vle16.v v8, (a0) 1071; ZVFH-NEXT: vle16.v v9, (a1) 1072; ZVFH-NEXT: vfsgnjn.vv v8, v8, v9 1073; ZVFH-NEXT: vse16.v v8, (a0) 1074; ZVFH-NEXT: ret 1075; 1076; ZVFHMIN-LABEL: copysign_neg_v8f16: 1077; ZVFHMIN: # %bb.0: 1078; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma 1079; ZVFHMIN-NEXT: vle16.v v8, (a1) 1080; ZVFHMIN-NEXT: vle16.v v9, (a0) 1081; ZVFHMIN-NEXT: lui a1, 8 1082; ZVFHMIN-NEXT: addi a2, a1, -1 1083; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 1084; ZVFHMIN-NEXT: vand.vx v9, v9, a2 1085; ZVFHMIN-NEXT: vand.vx v8, v8, a1 1086; ZVFHMIN-NEXT: vor.vv v8, v9, v8 1087; ZVFHMIN-NEXT: vse16.v v8, (a0) 1088; ZVFHMIN-NEXT: ret 1089 %a = load <8 x half>, ptr %x 1090 %b = load <8 x half>, ptr %y 1091 %c = fneg <8 x half> %b 1092 %d = call <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %c) 1093 store <8 x half> %d, ptr %x 1094 ret void 1095} 1096 1097define void @copysign_neg_v6f16(ptr %x, ptr %y) { 1098; ZVFH-LABEL: copysign_neg_v6f16: 1099; ZVFH: # %bb.0: 1100; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma 1101; ZVFH-NEXT: vle16.v v8, (a0) 1102; ZVFH-NEXT: vle16.v v9, (a1) 1103; ZVFH-NEXT: vfsgnjn.vv v8, v8, v9 1104; ZVFH-NEXT: vse16.v v8, (a0) 1105; ZVFH-NEXT: ret 1106; 1107; ZVFHMIN-LABEL: copysign_neg_v6f16: 1108; ZVFHMIN: # %bb.0: 1109; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma 1110; ZVFHMIN-NEXT: vle16.v v8, (a1) 1111; ZVFHMIN-NEXT: vle16.v v9, (a0) 1112; ZVFHMIN-NEXT: lui a1, 8 1113; ZVFHMIN-NEXT: addi a2, a1, -1 1114; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 1115; ZVFHMIN-NEXT: vand.vx v9, v9, a2 1116; ZVFHMIN-NEXT: vand.vx v8, v8, a1 1117; ZVFHMIN-NEXT: vor.vv v8, v9, v8 1118; ZVFHMIN-NEXT: vse16.v v8, (a0) 1119; ZVFHMIN-NEXT: ret 1120 %a = load <6 x half>, ptr %x 1121 %b = load <6 x half>, ptr %y 1122 %c = fneg <6 x half> %b 1123 %d = call <6 x half> @llvm.copysign.v6f16(<6 x half> %a, <6 x half> %c) 1124 store <6 x half> %d, ptr %x 1125 ret void 1126} 1127 1128define void @copysign_neg_v4f32(ptr %x, ptr %y) { 1129; CHECK-LABEL: copysign_neg_v4f32: 1130; CHECK: # %bb.0: 1131; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 1132; CHECK-NEXT: vle32.v v8, (a0) 1133; CHECK-NEXT: vle32.v v9, (a1) 1134; CHECK-NEXT: vfsgnjn.vv v8, v8, v9 1135; CHECK-NEXT: vse32.v v8, (a0) 1136; CHECK-NEXT: ret 1137 %a = load <4 x float>, ptr %x 1138 %b = load <4 x float>, ptr %y 1139 %c = fneg <4 x float> %b 1140 %d = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %c) 1141 store <4 x float> %d, ptr %x 1142 ret void 1143} 1144 1145define void @copysign_neg_v2f64(ptr %x, ptr %y) { 1146; CHECK-LABEL: copysign_neg_v2f64: 1147; CHECK: # %bb.0: 1148; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 1149; CHECK-NEXT: vle64.v v8, (a0) 1150; CHECK-NEXT: vle64.v v9, (a1) 1151; CHECK-NEXT: vfsgnjn.vv v8, v8, v9 1152; CHECK-NEXT: vse64.v v8, (a0) 1153; CHECK-NEXT: ret 1154 %a = load <2 x double>, ptr %x 1155 %b = load <2 x double>, ptr %y 1156 %c = fneg <2 x double> %b 1157 %d = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %c) 1158 store <2 x double> %d, ptr %x 1159 ret void 1160} 1161 1162define void @copysign_neg_trunc_v4bf16_v4f32(ptr %x, ptr %y) { 1163; CHECK-LABEL: copysign_neg_trunc_v4bf16_v4f32: 1164; CHECK: # %bb.0: 1165; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 1166; CHECK-NEXT: vle16.v v8, (a0) 1167; CHECK-NEXT: vle32.v v9, (a1) 1168; CHECK-NEXT: lui a1, 8 1169; CHECK-NEXT: addi a2, a1, -1 1170; CHECK-NEXT: vand.vx v8, v8, a2 1171; CHECK-NEXT: vfncvtbf16.f.f.w v10, v9 1172; CHECK-NEXT: vxor.vx v9, v10, a1 1173; CHECK-NEXT: vand.vx v9, v9, a1 1174; CHECK-NEXT: vor.vv v8, v8, v9 1175; CHECK-NEXT: vse16.v v8, (a0) 1176; CHECK-NEXT: ret 1177 %a = load <4 x bfloat>, ptr %x 1178 %b = load <4 x float>, ptr %y 1179 %c = fneg <4 x float> %b 1180 %d = fptrunc <4 x float> %c to <4 x bfloat> 1181 %e = call <4 x bfloat> @llvm.copysign.v4bf16(<4 x bfloat> %a, <4 x bfloat> %d) 1182 store <4 x bfloat> %e, ptr %x 1183 ret void 1184} 1185 1186define void @copysign_neg_trunc_v3bf16_v3f32(ptr %x, ptr %y) { 1187; CHECK-LABEL: copysign_neg_trunc_v3bf16_v3f32: 1188; CHECK: # %bb.0: 1189; CHECK-NEXT: vsetivli zero, 3, e16, mf2, ta, ma 1190; CHECK-NEXT: vle16.v v8, (a0) 1191; CHECK-NEXT: vle32.v v9, (a1) 1192; CHECK-NEXT: lui a1, 8 1193; CHECK-NEXT: addi a2, a1, -1 1194; CHECK-NEXT: vand.vx v8, v8, a2 1195; CHECK-NEXT: vfncvtbf16.f.f.w v10, v9 1196; CHECK-NEXT: vxor.vx v9, v10, a1 1197; CHECK-NEXT: vand.vx v9, v9, a1 1198; CHECK-NEXT: vor.vv v8, v8, v9 1199; CHECK-NEXT: vse16.v v8, (a0) 1200; CHECK-NEXT: ret 1201 %a = load <3 x bfloat>, ptr %x 1202 %b = load <3 x float>, ptr %y 1203 %c = fneg <3 x float> %b 1204 %d = fptrunc <3 x float> %c to <3 x bfloat> 1205 %e = call <3 x bfloat> @llvm.copysign.v3bf16(<3 x bfloat> %a, <3 x bfloat> %d) 1206 store <3 x bfloat> %e, ptr %x 1207 ret void 1208} 1209 1210define void @copysign_neg_trunc_v4f16_v4f32(ptr %x, ptr %y) { 1211; ZVFH-LABEL: copysign_neg_trunc_v4f16_v4f32: 1212; ZVFH: # %bb.0: 1213; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 1214; ZVFH-NEXT: vle32.v v8, (a1) 1215; ZVFH-NEXT: vle16.v v9, (a0) 1216; ZVFH-NEXT: vfncvt.f.f.w v10, v8 1217; ZVFH-NEXT: vfsgnjn.vv v8, v9, v10 1218; ZVFH-NEXT: vse16.v v8, (a0) 1219; ZVFH-NEXT: ret 1220; 1221; ZVFHMIN-LABEL: copysign_neg_trunc_v4f16_v4f32: 1222; ZVFHMIN: # %bb.0: 1223; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma 1224; ZVFHMIN-NEXT: vle16.v v8, (a0) 1225; ZVFHMIN-NEXT: vle32.v v9, (a1) 1226; ZVFHMIN-NEXT: lui a1, 8 1227; ZVFHMIN-NEXT: addi a2, a1, -1 1228; ZVFHMIN-NEXT: vand.vx v8, v8, a2 1229; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 1230; ZVFHMIN-NEXT: vxor.vx v9, v10, a1 1231; ZVFHMIN-NEXT: vand.vx v9, v9, a1 1232; ZVFHMIN-NEXT: vor.vv v8, v8, v9 1233; ZVFHMIN-NEXT: vse16.v v8, (a0) 1234; ZVFHMIN-NEXT: ret 1235 %a = load <4 x half>, ptr %x 1236 %b = load <4 x float>, ptr %y 1237 %c = fneg <4 x float> %b 1238 %d = fptrunc <4 x float> %c to <4 x half> 1239 %e = call <4 x half> @llvm.copysign.v4f16(<4 x half> %a, <4 x half> %d) 1240 store <4 x half> %e, ptr %x 1241 ret void 1242} 1243 1244define void @copysign_neg_trunc_v3f16_v3f32(ptr %x, ptr %y) { 1245; ZVFH-LABEL: copysign_neg_trunc_v3f16_v3f32: 1246; ZVFH: # %bb.0: 1247; ZVFH-NEXT: vsetivli zero, 3, e16, mf2, ta, ma 1248; ZVFH-NEXT: vle32.v v8, (a1) 1249; ZVFH-NEXT: vle16.v v9, (a0) 1250; ZVFH-NEXT: vfncvt.f.f.w v10, v8 1251; ZVFH-NEXT: vfsgnjn.vv v8, v9, v10 1252; ZVFH-NEXT: vse16.v v8, (a0) 1253; ZVFH-NEXT: ret 1254; 1255; ZVFHMIN-LABEL: copysign_neg_trunc_v3f16_v3f32: 1256; ZVFHMIN: # %bb.0: 1257; ZVFHMIN-NEXT: vsetivli zero, 3, e16, mf2, ta, ma 1258; ZVFHMIN-NEXT: vle16.v v8, (a0) 1259; ZVFHMIN-NEXT: vle32.v v9, (a1) 1260; ZVFHMIN-NEXT: lui a1, 8 1261; ZVFHMIN-NEXT: addi a2, a1, -1 1262; ZVFHMIN-NEXT: vand.vx v8, v8, a2 1263; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v9 1264; ZVFHMIN-NEXT: vxor.vx v9, v10, a1 1265; ZVFHMIN-NEXT: vand.vx v9, v9, a1 1266; ZVFHMIN-NEXT: vor.vv v8, v8, v9 1267; ZVFHMIN-NEXT: vse16.v v8, (a0) 1268; ZVFHMIN-NEXT: ret 1269 %a = load <3 x half>, ptr %x 1270 %b = load <3 x float>, ptr %y 1271 %c = fneg <3 x float> %b 1272 %d = fptrunc <3 x float> %c to <3 x half> 1273 %e = call <3 x half> @llvm.copysign.v3f16(<3 x half> %a, <3 x half> %d) 1274 store <3 x half> %e, ptr %x 1275 ret void 1276} 1277 1278define void @copysign_neg_ext_v2f64_v2f32(ptr %x, ptr %y) { 1279; CHECK-LABEL: copysign_neg_ext_v2f64_v2f32: 1280; CHECK: # %bb.0: 1281; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma 1282; CHECK-NEXT: vle32.v v8, (a1) 1283; CHECK-NEXT: vle64.v v9, (a0) 1284; CHECK-NEXT: vfwcvt.f.f.v v10, v8 1285; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma 1286; CHECK-NEXT: vfsgnjn.vv v8, v9, v10 1287; CHECK-NEXT: vse64.v v8, (a0) 1288; CHECK-NEXT: ret 1289 %a = load <2 x double>, ptr %x 1290 %b = load <2 x float>, ptr %y 1291 %c = fneg <2 x float> %b 1292 %d = fpext <2 x float> %c to <2 x double> 1293 %e = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %d) 1294 store <2 x double> %e, ptr %x 1295 ret void 1296} 1297 1298define void @sqrt_v8bf16(ptr %x) { 1299; CHECK-LABEL: sqrt_v8bf16: 1300; CHECK: # %bb.0: 1301; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 1302; CHECK-NEXT: vle16.v v8, (a0) 1303; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 1304; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 1305; CHECK-NEXT: vfsqrt.v v8, v10 1306; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 1307; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8 1308; CHECK-NEXT: vse16.v v10, (a0) 1309; CHECK-NEXT: ret 1310 %a = load <8 x bfloat>, ptr %x 1311 %b = call <8 x bfloat> @llvm.sqrt.v8bf16(<8 x bfloat> %a) 1312 store <8 x bfloat> %b, ptr %x 1313 ret void 1314} 1315 1316define void @sqrt_v6bf16(ptr %x) { 1317; CHECK-LABEL: sqrt_v6bf16: 1318; CHECK: # %bb.0: 1319; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma 1320; CHECK-NEXT: vle16.v v8, (a0) 1321; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 1322; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 1323; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 1324; CHECK-NEXT: vfsqrt.v v8, v10 1325; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma 1326; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8 1327; CHECK-NEXT: vse16.v v10, (a0) 1328; CHECK-NEXT: ret 1329 %a = load <6 x bfloat>, ptr %x 1330 %b = call <6 x bfloat> @llvm.sqrt.v6bf16(<6 x bfloat> %a) 1331 store <6 x bfloat> %b, ptr %x 1332 ret void 1333} 1334 1335define void @sqrt_v8f16(ptr %x) { 1336; ZVFH-LABEL: sqrt_v8f16: 1337; ZVFH: # %bb.0: 1338; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma 1339; ZVFH-NEXT: vle16.v v8, (a0) 1340; ZVFH-NEXT: vfsqrt.v v8, v8 1341; ZVFH-NEXT: vse16.v v8, (a0) 1342; ZVFH-NEXT: ret 1343; 1344; ZVFHMIN-LABEL: sqrt_v8f16: 1345; ZVFHMIN: # %bb.0: 1346; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma 1347; ZVFHMIN-NEXT: vle16.v v8, (a0) 1348; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 1349; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 1350; ZVFHMIN-NEXT: vfsqrt.v v8, v10 1351; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 1352; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 1353; ZVFHMIN-NEXT: vse16.v v10, (a0) 1354; ZVFHMIN-NEXT: ret 1355 %a = load <8 x half>, ptr %x 1356 %b = call <8 x half> @llvm.sqrt.v8f16(<8 x half> %a) 1357 store <8 x half> %b, ptr %x 1358 ret void 1359} 1360 1361define void @sqrt_v6f16(ptr %x) { 1362; ZVFH-LABEL: sqrt_v6f16: 1363; ZVFH: # %bb.0: 1364; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma 1365; ZVFH-NEXT: vle16.v v8, (a0) 1366; ZVFH-NEXT: vfsqrt.v v8, v8 1367; ZVFH-NEXT: vse16.v v8, (a0) 1368; ZVFH-NEXT: ret 1369; 1370; ZVFHMIN-LABEL: sqrt_v6f16: 1371; ZVFHMIN: # %bb.0: 1372; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma 1373; ZVFHMIN-NEXT: vle16.v v8, (a0) 1374; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma 1375; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 1376; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 1377; ZVFHMIN-NEXT: vfsqrt.v v8, v10 1378; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma 1379; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 1380; ZVFHMIN-NEXT: vse16.v v10, (a0) 1381; ZVFHMIN-NEXT: ret 1382 %a = load <6 x half>, ptr %x 1383 %b = call <6 x half> @llvm.sqrt.v6f16(<6 x half> %a) 1384 store <6 x half> %b, ptr %x 1385 ret void 1386} 1387 1388define void @sqrt_v4f32(ptr %x) { 1389; CHECK-LABEL: sqrt_v4f32: 1390; CHECK: # %bb.0: 1391; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 1392; CHECK-NEXT: vle32.v v8, (a0) 1393; CHECK-NEXT: vfsqrt.v v8, v8 1394; CHECK-NEXT: vse32.v v8, (a0) 1395; CHECK-NEXT: ret 1396 %a = load <4 x float>, ptr %x 1397 %b = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a) 1398 store <4 x float> %b, ptr %x 1399 ret void 1400} 1401 1402define void @sqrt_v2f64(ptr %x) { 1403; CHECK-LABEL: sqrt_v2f64: 1404; CHECK: # %bb.0: 1405; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 1406; CHECK-NEXT: vle64.v v8, (a0) 1407; CHECK-NEXT: vfsqrt.v v8, v8 1408; CHECK-NEXT: vse64.v v8, (a0) 1409; CHECK-NEXT: ret 1410 %a = load <2 x double>, ptr %x 1411 %b = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %a) 1412 store <2 x double> %b, ptr %x 1413 ret void 1414} 1415 1416define void @fma_v8bf16(ptr %x, ptr %y, ptr %z) { 1417; CHECK-LABEL: fma_v8bf16: 1418; CHECK: # %bb.0: 1419; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 1420; CHECK-NEXT: vle16.v v8, (a2) 1421; CHECK-NEXT: vle16.v v9, (a0) 1422; CHECK-NEXT: vle16.v v10, (a1) 1423; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 1424; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v9 1425; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10 1426; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 1427; CHECK-NEXT: vfmadd.vv v8, v14, v12 1428; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 1429; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8 1430; CHECK-NEXT: vse16.v v10, (a0) 1431; CHECK-NEXT: ret 1432 %a = load <8 x bfloat>, ptr %x 1433 %b = load <8 x bfloat>, ptr %y 1434 %c = load <8 x bfloat>, ptr %z 1435 %d = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bfloat> %c) 1436 store <8 x bfloat> %d, ptr %x 1437 ret void 1438} 1439 1440define void @fma_v6bf16(ptr %x, ptr %y, ptr %z) { 1441; CHECK-LABEL: fma_v6bf16: 1442; CHECK: # %bb.0: 1443; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma 1444; CHECK-NEXT: vle16.v v8, (a2) 1445; CHECK-NEXT: vle16.v v9, (a0) 1446; CHECK-NEXT: vle16.v v10, (a1) 1447; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 1448; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 1449; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v9 1450; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10 1451; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 1452; CHECK-NEXT: vfmadd.vv v8, v14, v12 1453; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma 1454; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8 1455; CHECK-NEXT: vse16.v v10, (a0) 1456; CHECK-NEXT: ret 1457 %a = load <6 x bfloat>, ptr %x 1458 %b = load <6 x bfloat>, ptr %y 1459 %c = load <6 x bfloat>, ptr %z 1460 %d = call <6 x bfloat> @llvm.fma.v6bf16(<6 x bfloat> %a, <6 x bfloat> %b, <6 x bfloat> %c) 1461 store <6 x bfloat> %d, ptr %x 1462 ret void 1463} 1464 1465define void @fma_v8f16(ptr %x, ptr %y, ptr %z) { 1466; ZVFH-LABEL: fma_v8f16: 1467; ZVFH: # %bb.0: 1468; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma 1469; ZVFH-NEXT: vle16.v v8, (a0) 1470; ZVFH-NEXT: vle16.v v9, (a1) 1471; ZVFH-NEXT: vle16.v v10, (a2) 1472; ZVFH-NEXT: vfmacc.vv v10, v8, v9 1473; ZVFH-NEXT: vse16.v v10, (a0) 1474; ZVFH-NEXT: ret 1475; 1476; ZVFHMIN-LABEL: fma_v8f16: 1477; ZVFHMIN: # %bb.0: 1478; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma 1479; ZVFHMIN-NEXT: vle16.v v8, (a2) 1480; ZVFHMIN-NEXT: vle16.v v9, (a0) 1481; ZVFHMIN-NEXT: vle16.v v10, (a1) 1482; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 1483; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9 1484; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 1485; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 1486; ZVFHMIN-NEXT: vfmadd.vv v8, v14, v12 1487; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 1488; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 1489; ZVFHMIN-NEXT: vse16.v v10, (a0) 1490; ZVFHMIN-NEXT: ret 1491 %a = load <8 x half>, ptr %x 1492 %b = load <8 x half>, ptr %y 1493 %c = load <8 x half>, ptr %z 1494 %d = call <8 x half> @llvm.fma.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) 1495 store <8 x half> %d, ptr %x 1496 ret void 1497} 1498 1499define void @fma_v6f16(ptr %x, ptr %y, ptr %z) { 1500; ZVFH-LABEL: fma_v6f16: 1501; ZVFH: # %bb.0: 1502; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma 1503; ZVFH-NEXT: vle16.v v8, (a0) 1504; ZVFH-NEXT: vle16.v v9, (a1) 1505; ZVFH-NEXT: vle16.v v10, (a2) 1506; ZVFH-NEXT: vfmacc.vv v10, v8, v9 1507; ZVFH-NEXT: vse16.v v10, (a0) 1508; ZVFH-NEXT: ret 1509; 1510; ZVFHMIN-LABEL: fma_v6f16: 1511; ZVFHMIN: # %bb.0: 1512; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma 1513; ZVFHMIN-NEXT: vle16.v v8, (a2) 1514; ZVFHMIN-NEXT: vle16.v v9, (a0) 1515; ZVFHMIN-NEXT: vle16.v v10, (a1) 1516; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma 1517; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 1518; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9 1519; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 1520; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 1521; ZVFHMIN-NEXT: vfmadd.vv v8, v14, v12 1522; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma 1523; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 1524; ZVFHMIN-NEXT: vse16.v v10, (a0) 1525; ZVFHMIN-NEXT: ret 1526 %a = load <6 x half>, ptr %x 1527 %b = load <6 x half>, ptr %y 1528 %c = load <6 x half>, ptr %z 1529 %d = call <6 x half> @llvm.fma.v6f16(<6 x half> %a, <6 x half> %b, <6 x half> %c) 1530 store <6 x half> %d, ptr %x 1531 ret void 1532} 1533 1534define void @fma_v4f32(ptr %x, ptr %y, ptr %z) { 1535; CHECK-LABEL: fma_v4f32: 1536; CHECK: # %bb.0: 1537; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 1538; CHECK-NEXT: vle32.v v8, (a0) 1539; CHECK-NEXT: vle32.v v9, (a1) 1540; CHECK-NEXT: vle32.v v10, (a2) 1541; CHECK-NEXT: vfmacc.vv v10, v8, v9 1542; CHECK-NEXT: vse32.v v10, (a0) 1543; CHECK-NEXT: ret 1544 %a = load <4 x float>, ptr %x 1545 %b = load <4 x float>, ptr %y 1546 %c = load <4 x float>, ptr %z 1547 %d = call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) 1548 store <4 x float> %d, ptr %x 1549 ret void 1550} 1551 1552define void @fma_v2f64(ptr %x, ptr %y, ptr %z) { 1553; CHECK-LABEL: fma_v2f64: 1554; CHECK: # %bb.0: 1555; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 1556; CHECK-NEXT: vle64.v v8, (a0) 1557; CHECK-NEXT: vle64.v v9, (a1) 1558; CHECK-NEXT: vle64.v v10, (a2) 1559; CHECK-NEXT: vfmacc.vv v10, v8, v9 1560; CHECK-NEXT: vse64.v v10, (a0) 1561; CHECK-NEXT: ret 1562 %a = load <2 x double>, ptr %x 1563 %b = load <2 x double>, ptr %y 1564 %c = load <2 x double>, ptr %z 1565 %d = call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) 1566 store <2 x double> %d, ptr %x 1567 ret void 1568} 1569 1570define void @fmsub_v8bf16(ptr %x, ptr %y, ptr %z) { 1571; CHECK-LABEL: fmsub_v8bf16: 1572; CHECK: # %bb.0: 1573; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 1574; CHECK-NEXT: vle16.v v8, (a2) 1575; CHECK-NEXT: vle16.v v9, (a0) 1576; CHECK-NEXT: vle16.v v10, (a1) 1577; CHECK-NEXT: lui a1, 8 1578; CHECK-NEXT: vxor.vx v8, v8, a1 1579; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 1580; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v8 1581; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10 1582; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 1583; CHECK-NEXT: vfmadd.vv v8, v12, v14 1584; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 1585; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8 1586; CHECK-NEXT: vse16.v v10, (a0) 1587; CHECK-NEXT: ret 1588 %a = load <8 x bfloat>, ptr %x 1589 %b = load <8 x bfloat>, ptr %y 1590 %c = load <8 x bfloat>, ptr %z 1591 %neg = fneg <8 x bfloat> %c 1592 %d = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bfloat> %neg) 1593 store <8 x bfloat> %d, ptr %x 1594 ret void 1595} 1596 1597define void @fmsub_v6bf16(ptr %x, ptr %y, ptr %z) { 1598; CHECK-LABEL: fmsub_v6bf16: 1599; CHECK: # %bb.0: 1600; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma 1601; CHECK-NEXT: vle16.v v8, (a2) 1602; CHECK-NEXT: vle16.v v9, (a0) 1603; CHECK-NEXT: vle16.v v10, (a1) 1604; CHECK-NEXT: lui a1, 8 1605; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 1606; CHECK-NEXT: vxor.vx v8, v8, a1 1607; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 1608; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v8 1609; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10 1610; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 1611; CHECK-NEXT: vfmadd.vv v8, v12, v14 1612; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma 1613; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8 1614; CHECK-NEXT: vse16.v v10, (a0) 1615; CHECK-NEXT: ret 1616 %a = load <6 x bfloat>, ptr %x 1617 %b = load <6 x bfloat>, ptr %y 1618 %c = load <6 x bfloat>, ptr %z 1619 %neg = fneg <6 x bfloat> %c 1620 %d = call <6 x bfloat> @llvm.fma.v6bf16(<6 x bfloat> %a, <6 x bfloat> %b, <6 x bfloat> %neg) 1621 store <6 x bfloat> %d, ptr %x 1622 ret void 1623} 1624 1625define void @fmsub_v8f16(ptr %x, ptr %y, ptr %z) { 1626; ZVFH-LABEL: fmsub_v8f16: 1627; ZVFH: # %bb.0: 1628; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma 1629; ZVFH-NEXT: vle16.v v8, (a0) 1630; ZVFH-NEXT: vle16.v v9, (a1) 1631; ZVFH-NEXT: vle16.v v10, (a2) 1632; ZVFH-NEXT: vfmsac.vv v10, v8, v9 1633; ZVFH-NEXT: vse16.v v10, (a0) 1634; ZVFH-NEXT: ret 1635; 1636; ZVFHMIN-LABEL: fmsub_v8f16: 1637; ZVFHMIN: # %bb.0: 1638; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma 1639; ZVFHMIN-NEXT: vle16.v v8, (a2) 1640; ZVFHMIN-NEXT: vle16.v v9, (a0) 1641; ZVFHMIN-NEXT: vle16.v v10, (a1) 1642; ZVFHMIN-NEXT: lui a1, 8 1643; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 1644; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 1645; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8 1646; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 1647; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 1648; ZVFHMIN-NEXT: vfmadd.vv v8, v12, v14 1649; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 1650; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 1651; ZVFHMIN-NEXT: vse16.v v10, (a0) 1652; ZVFHMIN-NEXT: ret 1653 %a = load <8 x half>, ptr %x 1654 %b = load <8 x half>, ptr %y 1655 %c = load <8 x half>, ptr %z 1656 %neg = fneg <8 x half> %c 1657 %d = call <8 x half> @llvm.fma.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %neg) 1658 store <8 x half> %d, ptr %x 1659 ret void 1660} 1661 1662define void @fmsub_v6f16(ptr %x, ptr %y, ptr %z) { 1663; ZVFH-LABEL: fmsub_v6f16: 1664; ZVFH: # %bb.0: 1665; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma 1666; ZVFH-NEXT: vle16.v v8, (a0) 1667; ZVFH-NEXT: vle16.v v9, (a1) 1668; ZVFH-NEXT: vle16.v v10, (a2) 1669; ZVFH-NEXT: vfmsac.vv v10, v8, v9 1670; ZVFH-NEXT: vse16.v v10, (a0) 1671; ZVFH-NEXT: ret 1672; 1673; ZVFHMIN-LABEL: fmsub_v6f16: 1674; ZVFHMIN: # %bb.0: 1675; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma 1676; ZVFHMIN-NEXT: vle16.v v8, (a2) 1677; ZVFHMIN-NEXT: vle16.v v9, (a0) 1678; ZVFHMIN-NEXT: vle16.v v10, (a1) 1679; ZVFHMIN-NEXT: lui a1, 8 1680; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma 1681; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 1682; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 1683; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8 1684; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 1685; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 1686; ZVFHMIN-NEXT: vfmadd.vv v8, v12, v14 1687; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma 1688; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 1689; ZVFHMIN-NEXT: vse16.v v10, (a0) 1690; ZVFHMIN-NEXT: ret 1691 %a = load <6 x half>, ptr %x 1692 %b = load <6 x half>, ptr %y 1693 %c = load <6 x half>, ptr %z 1694 %neg = fneg <6 x half> %c 1695 %d = call <6 x half> @llvm.fma.v6f16(<6 x half> %a, <6 x half> %b, <6 x half> %neg) 1696 store <6 x half> %d, ptr %x 1697 ret void 1698} 1699 1700define void @fnmsub_v4f32(ptr %x, ptr %y, ptr %z) { 1701; CHECK-LABEL: fnmsub_v4f32: 1702; CHECK: # %bb.0: 1703; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 1704; CHECK-NEXT: vle32.v v8, (a0) 1705; CHECK-NEXT: vle32.v v9, (a1) 1706; CHECK-NEXT: vle32.v v10, (a2) 1707; CHECK-NEXT: vfnmsac.vv v10, v8, v9 1708; CHECK-NEXT: vse32.v v10, (a0) 1709; CHECK-NEXT: ret 1710 %a = load <4 x float>, ptr %x 1711 %b = load <4 x float>, ptr %y 1712 %c = load <4 x float>, ptr %z 1713 %neg = fneg <4 x float> %a 1714 %d = call <4 x float> @llvm.fma.v4f32(<4 x float> %neg, <4 x float> %b, <4 x float> %c) 1715 store <4 x float> %d, ptr %x 1716 ret void 1717} 1718 1719define void @fnmadd_v2f64(ptr %x, ptr %y, ptr %z) { 1720; CHECK-LABEL: fnmadd_v2f64: 1721; CHECK: # %bb.0: 1722; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 1723; CHECK-NEXT: vle64.v v8, (a0) 1724; CHECK-NEXT: vle64.v v9, (a1) 1725; CHECK-NEXT: vle64.v v10, (a2) 1726; CHECK-NEXT: vfnmacc.vv v10, v8, v9 1727; CHECK-NEXT: vse64.v v10, (a0) 1728; CHECK-NEXT: ret 1729 %a = load <2 x double>, ptr %x 1730 %b = load <2 x double>, ptr %y 1731 %c = load <2 x double>, ptr %z 1732 %neg = fneg <2 x double> %b 1733 %neg2 = fneg <2 x double> %c 1734 %d = call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %neg, <2 x double> %neg2) 1735 store <2 x double> %d, ptr %x 1736 ret void 1737} 1738 1739define void @fadd_v16bf16(ptr %x, ptr %y) { 1740; CHECK-LABEL: fadd_v16bf16: 1741; CHECK: # %bb.0: 1742; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma 1743; CHECK-NEXT: vle16.v v8, (a1) 1744; CHECK-NEXT: vle16.v v10, (a0) 1745; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 1746; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 1747; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma 1748; CHECK-NEXT: vfadd.vv v8, v16, v12 1749; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma 1750; CHECK-NEXT: vfncvtbf16.f.f.w v12, v8 1751; CHECK-NEXT: vse16.v v12, (a0) 1752; CHECK-NEXT: ret 1753 %a = load <16 x bfloat>, ptr %x 1754 %b = load <16 x bfloat>, ptr %y 1755 %c = fadd <16 x bfloat> %a, %b 1756 store <16 x bfloat> %c, ptr %x 1757 ret void 1758} 1759 1760define void @fadd_v16f16(ptr %x, ptr %y) { 1761; ZVFH-LABEL: fadd_v16f16: 1762; ZVFH: # %bb.0: 1763; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma 1764; ZVFH-NEXT: vle16.v v8, (a0) 1765; ZVFH-NEXT: vle16.v v10, (a1) 1766; ZVFH-NEXT: vfadd.vv v8, v8, v10 1767; ZVFH-NEXT: vse16.v v8, (a0) 1768; ZVFH-NEXT: ret 1769; 1770; ZVFHMIN-LABEL: fadd_v16f16: 1771; ZVFHMIN: # %bb.0: 1772; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma 1773; ZVFHMIN-NEXT: vle16.v v8, (a1) 1774; ZVFHMIN-NEXT: vle16.v v10, (a0) 1775; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 1776; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 1777; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma 1778; ZVFHMIN-NEXT: vfadd.vv v8, v16, v12 1779; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma 1780; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v8 1781; ZVFHMIN-NEXT: vse16.v v12, (a0) 1782; ZVFHMIN-NEXT: ret 1783 %a = load <16 x half>, ptr %x 1784 %b = load <16 x half>, ptr %y 1785 %c = fadd <16 x half> %a, %b 1786 store <16 x half> %c, ptr %x 1787 ret void 1788} 1789 1790define void @fadd_v8f32(ptr %x, ptr %y) { 1791; CHECK-LABEL: fadd_v8f32: 1792; CHECK: # %bb.0: 1793; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1794; CHECK-NEXT: vle32.v v8, (a0) 1795; CHECK-NEXT: vle32.v v10, (a1) 1796; CHECK-NEXT: vfadd.vv v8, v8, v10 1797; CHECK-NEXT: vse32.v v8, (a0) 1798; CHECK-NEXT: ret 1799 %a = load <8 x float>, ptr %x 1800 %b = load <8 x float>, ptr %y 1801 %c = fadd <8 x float> %a, %b 1802 store <8 x float> %c, ptr %x 1803 ret void 1804} 1805 1806define void @fadd_v4f64(ptr %x, ptr %y) { 1807; CHECK-LABEL: fadd_v4f64: 1808; CHECK: # %bb.0: 1809; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 1810; CHECK-NEXT: vle64.v v8, (a0) 1811; CHECK-NEXT: vle64.v v10, (a1) 1812; CHECK-NEXT: vfadd.vv v8, v8, v10 1813; CHECK-NEXT: vse64.v v8, (a0) 1814; CHECK-NEXT: ret 1815 %a = load <4 x double>, ptr %x 1816 %b = load <4 x double>, ptr %y 1817 %c = fadd <4 x double> %a, %b 1818 store <4 x double> %c, ptr %x 1819 ret void 1820} 1821 1822define void @fsub_v16bf16(ptr %x, ptr %y) { 1823; CHECK-LABEL: fsub_v16bf16: 1824; CHECK: # %bb.0: 1825; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma 1826; CHECK-NEXT: vle16.v v8, (a1) 1827; CHECK-NEXT: vle16.v v10, (a0) 1828; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 1829; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 1830; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma 1831; CHECK-NEXT: vfsub.vv v8, v16, v12 1832; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma 1833; CHECK-NEXT: vfncvtbf16.f.f.w v12, v8 1834; CHECK-NEXT: vse16.v v12, (a0) 1835; CHECK-NEXT: ret 1836 %a = load <16 x bfloat>, ptr %x 1837 %b = load <16 x bfloat>, ptr %y 1838 %c = fsub <16 x bfloat> %a, %b 1839 store <16 x bfloat> %c, ptr %x 1840 ret void 1841} 1842 1843define void @fsub_v16f16(ptr %x, ptr %y) { 1844; ZVFH-LABEL: fsub_v16f16: 1845; ZVFH: # %bb.0: 1846; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma 1847; ZVFH-NEXT: vle16.v v8, (a0) 1848; ZVFH-NEXT: vle16.v v10, (a1) 1849; ZVFH-NEXT: vfsub.vv v8, v8, v10 1850; ZVFH-NEXT: vse16.v v8, (a0) 1851; ZVFH-NEXT: ret 1852; 1853; ZVFHMIN-LABEL: fsub_v16f16: 1854; ZVFHMIN: # %bb.0: 1855; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma 1856; ZVFHMIN-NEXT: vle16.v v8, (a1) 1857; ZVFHMIN-NEXT: vle16.v v10, (a0) 1858; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 1859; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 1860; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma 1861; ZVFHMIN-NEXT: vfsub.vv v8, v16, v12 1862; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma 1863; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v8 1864; ZVFHMIN-NEXT: vse16.v v12, (a0) 1865; ZVFHMIN-NEXT: ret 1866 %a = load <16 x half>, ptr %x 1867 %b = load <16 x half>, ptr %y 1868 %c = fsub <16 x half> %a, %b 1869 store <16 x half> %c, ptr %x 1870 ret void 1871} 1872 1873define void @fsub_v8f32(ptr %x, ptr %y) { 1874; CHECK-LABEL: fsub_v8f32: 1875; CHECK: # %bb.0: 1876; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1877; CHECK-NEXT: vle32.v v8, (a0) 1878; CHECK-NEXT: vle32.v v10, (a1) 1879; CHECK-NEXT: vfsub.vv v8, v8, v10 1880; CHECK-NEXT: vse32.v v8, (a0) 1881; CHECK-NEXT: ret 1882 %a = load <8 x float>, ptr %x 1883 %b = load <8 x float>, ptr %y 1884 %c = fsub <8 x float> %a, %b 1885 store <8 x float> %c, ptr %x 1886 ret void 1887} 1888 1889define void @fsub_v4f64(ptr %x, ptr %y) { 1890; CHECK-LABEL: fsub_v4f64: 1891; CHECK: # %bb.0: 1892; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 1893; CHECK-NEXT: vle64.v v8, (a0) 1894; CHECK-NEXT: vle64.v v10, (a1) 1895; CHECK-NEXT: vfsub.vv v8, v8, v10 1896; CHECK-NEXT: vse64.v v8, (a0) 1897; CHECK-NEXT: ret 1898 %a = load <4 x double>, ptr %x 1899 %b = load <4 x double>, ptr %y 1900 %c = fsub <4 x double> %a, %b 1901 store <4 x double> %c, ptr %x 1902 ret void 1903} 1904 1905define void @fmul_v16bf16(ptr %x, ptr %y) { 1906; CHECK-LABEL: fmul_v16bf16: 1907; CHECK: # %bb.0: 1908; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma 1909; CHECK-NEXT: vle16.v v8, (a1) 1910; CHECK-NEXT: vle16.v v10, (a0) 1911; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 1912; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 1913; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma 1914; CHECK-NEXT: vfmul.vv v8, v16, v12 1915; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma 1916; CHECK-NEXT: vfncvtbf16.f.f.w v12, v8 1917; CHECK-NEXT: vse16.v v12, (a0) 1918; CHECK-NEXT: ret 1919 %a = load <16 x bfloat>, ptr %x 1920 %b = load <16 x bfloat>, ptr %y 1921 %c = fmul <16 x bfloat> %a, %b 1922 store <16 x bfloat> %c, ptr %x 1923 ret void 1924} 1925 1926define void @fmul_v16f16(ptr %x, ptr %y) { 1927; ZVFH-LABEL: fmul_v16f16: 1928; ZVFH: # %bb.0: 1929; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma 1930; ZVFH-NEXT: vle16.v v8, (a0) 1931; ZVFH-NEXT: vle16.v v10, (a1) 1932; ZVFH-NEXT: vfmul.vv v8, v8, v10 1933; ZVFH-NEXT: vse16.v v8, (a0) 1934; ZVFH-NEXT: ret 1935; 1936; ZVFHMIN-LABEL: fmul_v16f16: 1937; ZVFHMIN: # %bb.0: 1938; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma 1939; ZVFHMIN-NEXT: vle16.v v8, (a1) 1940; ZVFHMIN-NEXT: vle16.v v10, (a0) 1941; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 1942; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 1943; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma 1944; ZVFHMIN-NEXT: vfmul.vv v8, v16, v12 1945; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma 1946; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v8 1947; ZVFHMIN-NEXT: vse16.v v12, (a0) 1948; ZVFHMIN-NEXT: ret 1949 %a = load <16 x half>, ptr %x 1950 %b = load <16 x half>, ptr %y 1951 %c = fmul <16 x half> %a, %b 1952 store <16 x half> %c, ptr %x 1953 ret void 1954} 1955 1956define void @fmul_v8f32(ptr %x, ptr %y) { 1957; CHECK-LABEL: fmul_v8f32: 1958; CHECK: # %bb.0: 1959; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 1960; CHECK-NEXT: vle32.v v8, (a0) 1961; CHECK-NEXT: vle32.v v10, (a1) 1962; CHECK-NEXT: vfmul.vv v8, v8, v10 1963; CHECK-NEXT: vse32.v v8, (a0) 1964; CHECK-NEXT: ret 1965 %a = load <8 x float>, ptr %x 1966 %b = load <8 x float>, ptr %y 1967 %c = fmul <8 x float> %a, %b 1968 store <8 x float> %c, ptr %x 1969 ret void 1970} 1971 1972define void @fmul_v4f64(ptr %x, ptr %y) { 1973; CHECK-LABEL: fmul_v4f64: 1974; CHECK: # %bb.0: 1975; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 1976; CHECK-NEXT: vle64.v v8, (a0) 1977; CHECK-NEXT: vle64.v v10, (a1) 1978; CHECK-NEXT: vfmul.vv v8, v8, v10 1979; CHECK-NEXT: vse64.v v8, (a0) 1980; CHECK-NEXT: ret 1981 %a = load <4 x double>, ptr %x 1982 %b = load <4 x double>, ptr %y 1983 %c = fmul <4 x double> %a, %b 1984 store <4 x double> %c, ptr %x 1985 ret void 1986} 1987 1988define void @fdiv_v16bf16(ptr %x, ptr %y) { 1989; CHECK-LABEL: fdiv_v16bf16: 1990; CHECK: # %bb.0: 1991; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma 1992; CHECK-NEXT: vle16.v v8, (a1) 1993; CHECK-NEXT: vle16.v v10, (a0) 1994; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 1995; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 1996; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma 1997; CHECK-NEXT: vfdiv.vv v8, v16, v12 1998; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma 1999; CHECK-NEXT: vfncvtbf16.f.f.w v12, v8 2000; CHECK-NEXT: vse16.v v12, (a0) 2001; CHECK-NEXT: ret 2002 %a = load <16 x bfloat>, ptr %x 2003 %b = load <16 x bfloat>, ptr %y 2004 %c = fdiv <16 x bfloat> %a, %b 2005 store <16 x bfloat> %c, ptr %x 2006 ret void 2007} 2008 2009define void @fdiv_v16f16(ptr %x, ptr %y) { 2010; ZVFH-LABEL: fdiv_v16f16: 2011; ZVFH: # %bb.0: 2012; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma 2013; ZVFH-NEXT: vle16.v v8, (a0) 2014; ZVFH-NEXT: vle16.v v10, (a1) 2015; ZVFH-NEXT: vfdiv.vv v8, v8, v10 2016; ZVFH-NEXT: vse16.v v8, (a0) 2017; ZVFH-NEXT: ret 2018; 2019; ZVFHMIN-LABEL: fdiv_v16f16: 2020; ZVFHMIN: # %bb.0: 2021; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma 2022; ZVFHMIN-NEXT: vle16.v v8, (a1) 2023; ZVFHMIN-NEXT: vle16.v v10, (a0) 2024; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 2025; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 2026; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma 2027; ZVFHMIN-NEXT: vfdiv.vv v8, v16, v12 2028; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma 2029; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v8 2030; ZVFHMIN-NEXT: vse16.v v12, (a0) 2031; ZVFHMIN-NEXT: ret 2032 %a = load <16 x half>, ptr %x 2033 %b = load <16 x half>, ptr %y 2034 %c = fdiv <16 x half> %a, %b 2035 store <16 x half> %c, ptr %x 2036 ret void 2037} 2038 2039define void @fdiv_v8f32(ptr %x, ptr %y) { 2040; CHECK-LABEL: fdiv_v8f32: 2041; CHECK: # %bb.0: 2042; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 2043; CHECK-NEXT: vle32.v v8, (a0) 2044; CHECK-NEXT: vle32.v v10, (a1) 2045; CHECK-NEXT: vfdiv.vv v8, v8, v10 2046; CHECK-NEXT: vse32.v v8, (a0) 2047; CHECK-NEXT: ret 2048 %a = load <8 x float>, ptr %x 2049 %b = load <8 x float>, ptr %y 2050 %c = fdiv <8 x float> %a, %b 2051 store <8 x float> %c, ptr %x 2052 ret void 2053} 2054 2055define void @fdiv_v4f64(ptr %x, ptr %y) { 2056; CHECK-LABEL: fdiv_v4f64: 2057; CHECK: # %bb.0: 2058; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 2059; CHECK-NEXT: vle64.v v8, (a0) 2060; CHECK-NEXT: vle64.v v10, (a1) 2061; CHECK-NEXT: vfdiv.vv v8, v8, v10 2062; CHECK-NEXT: vse64.v v8, (a0) 2063; CHECK-NEXT: ret 2064 %a = load <4 x double>, ptr %x 2065 %b = load <4 x double>, ptr %y 2066 %c = fdiv <4 x double> %a, %b 2067 store <4 x double> %c, ptr %x 2068 ret void 2069} 2070 2071define void @fneg_v16bf16(ptr %x) { 2072; CHECK-LABEL: fneg_v16bf16: 2073; CHECK: # %bb.0: 2074; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma 2075; CHECK-NEXT: vle16.v v8, (a0) 2076; CHECK-NEXT: lui a1, 8 2077; CHECK-NEXT: vxor.vx v8, v8, a1 2078; CHECK-NEXT: vse16.v v8, (a0) 2079; CHECK-NEXT: ret 2080 %a = load <16 x bfloat>, ptr %x 2081 %b = fneg <16 x bfloat> %a 2082 store <16 x bfloat> %b, ptr %x 2083 ret void 2084} 2085 2086define void @fneg_v16f16(ptr %x) { 2087; ZVFH-LABEL: fneg_v16f16: 2088; ZVFH: # %bb.0: 2089; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma 2090; ZVFH-NEXT: vle16.v v8, (a0) 2091; ZVFH-NEXT: vfneg.v v8, v8 2092; ZVFH-NEXT: vse16.v v8, (a0) 2093; ZVFH-NEXT: ret 2094; 2095; ZVFHMIN-LABEL: fneg_v16f16: 2096; ZVFHMIN: # %bb.0: 2097; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma 2098; ZVFHMIN-NEXT: vle16.v v8, (a0) 2099; ZVFHMIN-NEXT: lui a1, 8 2100; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 2101; ZVFHMIN-NEXT: vse16.v v8, (a0) 2102; ZVFHMIN-NEXT: ret 2103 %a = load <16 x half>, ptr %x 2104 %b = fneg <16 x half> %a 2105 store <16 x half> %b, ptr %x 2106 ret void 2107} 2108 2109define void @fneg_v8f32(ptr %x) { 2110; CHECK-LABEL: fneg_v8f32: 2111; CHECK: # %bb.0: 2112; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 2113; CHECK-NEXT: vle32.v v8, (a0) 2114; CHECK-NEXT: vfneg.v v8, v8 2115; CHECK-NEXT: vse32.v v8, (a0) 2116; CHECK-NEXT: ret 2117 %a = load <8 x float>, ptr %x 2118 %b = fneg <8 x float> %a 2119 store <8 x float> %b, ptr %x 2120 ret void 2121} 2122 2123define void @fneg_v4f64(ptr %x) { 2124; CHECK-LABEL: fneg_v4f64: 2125; CHECK: # %bb.0: 2126; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 2127; CHECK-NEXT: vle64.v v8, (a0) 2128; CHECK-NEXT: vfneg.v v8, v8 2129; CHECK-NEXT: vse64.v v8, (a0) 2130; CHECK-NEXT: ret 2131 %a = load <4 x double>, ptr %x 2132 %b = fneg <4 x double> %a 2133 store <4 x double> %b, ptr %x 2134 ret void 2135} 2136 2137define void @fma_v16bf16(ptr %x, ptr %y, ptr %z) { 2138; CHECK-LABEL: fma_v16bf16: 2139; CHECK: # %bb.0: 2140; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma 2141; CHECK-NEXT: vle16.v v8, (a2) 2142; CHECK-NEXT: vle16.v v10, (a0) 2143; CHECK-NEXT: vle16.v v12, (a1) 2144; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 2145; CHECK-NEXT: vfwcvtbf16.f.f.v v20, v10 2146; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v12 2147; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma 2148; CHECK-NEXT: vfmadd.vv v8, v20, v16 2149; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma 2150; CHECK-NEXT: vfncvtbf16.f.f.w v12, v8 2151; CHECK-NEXT: vse16.v v12, (a0) 2152; CHECK-NEXT: ret 2153 %a = load <16 x bfloat>, ptr %x 2154 %b = load <16 x bfloat>, ptr %y 2155 %c = load <16 x bfloat>, ptr %z 2156 %d = call <16 x bfloat> @llvm.fma.v16bf16(<16 x bfloat> %a, <16 x bfloat> %b, <16 x bfloat> %c) 2157 store <16 x bfloat> %d, ptr %x 2158 ret void 2159} 2160 2161define void @fma_v16f16(ptr %x, ptr %y, ptr %z) { 2162; ZVFH-LABEL: fma_v16f16: 2163; ZVFH: # %bb.0: 2164; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma 2165; ZVFH-NEXT: vle16.v v8, (a0) 2166; ZVFH-NEXT: vle16.v v10, (a1) 2167; ZVFH-NEXT: vle16.v v12, (a2) 2168; ZVFH-NEXT: vfmacc.vv v12, v8, v10 2169; ZVFH-NEXT: vse16.v v12, (a0) 2170; ZVFH-NEXT: ret 2171; 2172; ZVFHMIN-LABEL: fma_v16f16: 2173; ZVFHMIN: # %bb.0: 2174; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma 2175; ZVFHMIN-NEXT: vle16.v v8, (a2) 2176; ZVFHMIN-NEXT: vle16.v v10, (a0) 2177; ZVFHMIN-NEXT: vle16.v v12, (a1) 2178; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 2179; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10 2180; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12 2181; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma 2182; ZVFHMIN-NEXT: vfmadd.vv v8, v20, v16 2183; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma 2184; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v8 2185; ZVFHMIN-NEXT: vse16.v v12, (a0) 2186; ZVFHMIN-NEXT: ret 2187 %a = load <16 x half>, ptr %x 2188 %b = load <16 x half>, ptr %y 2189 %c = load <16 x half>, ptr %z 2190 %d = call <16 x half> @llvm.fma.v16f16(<16 x half> %a, <16 x half> %b, <16 x half> %c) 2191 store <16 x half> %d, ptr %x 2192 ret void 2193} 2194 2195define void @fma_v8f32(ptr %x, ptr %y, ptr %z) { 2196; CHECK-LABEL: fma_v8f32: 2197; CHECK: # %bb.0: 2198; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma 2199; CHECK-NEXT: vle32.v v8, (a0) 2200; CHECK-NEXT: vle32.v v10, (a1) 2201; CHECK-NEXT: vle32.v v12, (a2) 2202; CHECK-NEXT: vfmacc.vv v12, v8, v10 2203; CHECK-NEXT: vse32.v v12, (a0) 2204; CHECK-NEXT: ret 2205 %a = load <8 x float>, ptr %x 2206 %b = load <8 x float>, ptr %y 2207 %c = load <8 x float>, ptr %z 2208 %d = call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c) 2209 store <8 x float> %d, ptr %x 2210 ret void 2211} 2212 2213define void @fma_v4f64(ptr %x, ptr %y, ptr %z) { 2214; CHECK-LABEL: fma_v4f64: 2215; CHECK: # %bb.0: 2216; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma 2217; CHECK-NEXT: vle64.v v8, (a0) 2218; CHECK-NEXT: vle64.v v10, (a1) 2219; CHECK-NEXT: vle64.v v12, (a2) 2220; CHECK-NEXT: vfmacc.vv v12, v8, v10 2221; CHECK-NEXT: vse64.v v12, (a0) 2222; CHECK-NEXT: ret 2223 %a = load <4 x double>, ptr %x 2224 %b = load <4 x double>, ptr %y 2225 %c = load <4 x double>, ptr %z 2226 %d = call <4 x double> @llvm.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c) 2227 store <4 x double> %d, ptr %x 2228 ret void 2229} 2230 2231define void @fadd_vf_v8bf16(ptr %x, bfloat %y) { 2232; CHECK-LABEL: fadd_vf_v8bf16: 2233; CHECK: # %bb.0: 2234; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 2235; CHECK-NEXT: vle16.v v8, (a0) 2236; CHECK-NEXT: fmv.x.w a1, fa0 2237; CHECK-NEXT: vmv.v.x v9, a1 2238; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 2239; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 2240; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 2241; CHECK-NEXT: vfadd.vv v8, v10, v12 2242; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 2243; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8 2244; CHECK-NEXT: vse16.v v10, (a0) 2245; CHECK-NEXT: ret 2246 %a = load <8 x bfloat>, ptr %x 2247 %b = insertelement <8 x bfloat> poison, bfloat %y, i32 0 2248 %c = shufflevector <8 x bfloat> %b, <8 x bfloat> poison, <8 x i32> zeroinitializer 2249 %d = fadd <8 x bfloat> %a, %c 2250 store <8 x bfloat> %d, ptr %x 2251 ret void 2252} 2253 2254define void @fadd_vf_v6bf16(ptr %x, bfloat %y) { 2255; CHECK-LABEL: fadd_vf_v6bf16: 2256; CHECK: # %bb.0: 2257; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma 2258; CHECK-NEXT: vle16.v v8, (a0) 2259; CHECK-NEXT: fmv.x.w a1, fa0 2260; CHECK-NEXT: vmv.v.x v9, a1 2261; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 2262; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 2263; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 2264; CHECK-NEXT: vfadd.vv v8, v10, v12 2265; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 2266; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8 2267; CHECK-NEXT: vse16.v v10, (a0) 2268; CHECK-NEXT: ret 2269 %a = load <6 x bfloat>, ptr %x 2270 %b = insertelement <6 x bfloat> poison, bfloat %y, i32 0 2271 %c = shufflevector <6 x bfloat> %b, <6 x bfloat> poison, <6 x i32> zeroinitializer 2272 %d = fadd <6 x bfloat> %a, %c 2273 store <6 x bfloat> %d, ptr %x 2274 ret void 2275} 2276 2277define void @fadd_vf_v8f16(ptr %x, half %y) { 2278; ZVFH-LABEL: fadd_vf_v8f16: 2279; ZVFH: # %bb.0: 2280; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma 2281; ZVFH-NEXT: vle16.v v8, (a0) 2282; ZVFH-NEXT: vfadd.vf v8, v8, fa0 2283; ZVFH-NEXT: vse16.v v8, (a0) 2284; ZVFH-NEXT: ret 2285; 2286; ZVFHMIN-LABEL: fadd_vf_v8f16: 2287; ZVFHMIN: # %bb.0: 2288; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma 2289; ZVFHMIN-NEXT: vle16.v v8, (a0) 2290; ZVFHMIN-NEXT: fmv.x.w a1, fa0 2291; ZVFHMIN-NEXT: vmv.v.x v9, a1 2292; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 2293; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 2294; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 2295; ZVFHMIN-NEXT: vfadd.vv v8, v10, v12 2296; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 2297; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 2298; ZVFHMIN-NEXT: vse16.v v10, (a0) 2299; ZVFHMIN-NEXT: ret 2300 %a = load <8 x half>, ptr %x 2301 %b = insertelement <8 x half> poison, half %y, i32 0 2302 %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer 2303 %d = fadd <8 x half> %a, %c 2304 store <8 x half> %d, ptr %x 2305 ret void 2306} 2307 2308define void @fadd_vf_v6f16(ptr %x, half %y) { 2309; ZVFH-LABEL: fadd_vf_v6f16: 2310; ZVFH: # %bb.0: 2311; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma 2312; ZVFH-NEXT: vle16.v v8, (a0) 2313; ZVFH-NEXT: vfadd.vf v8, v8, fa0 2314; ZVFH-NEXT: vse16.v v8, (a0) 2315; ZVFH-NEXT: ret 2316; 2317; ZVFHMIN-LABEL: fadd_vf_v6f16: 2318; ZVFHMIN: # %bb.0: 2319; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma 2320; ZVFHMIN-NEXT: vle16.v v8, (a0) 2321; ZVFHMIN-NEXT: fmv.x.w a1, fa0 2322; ZVFHMIN-NEXT: vmv.v.x v9, a1 2323; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 2324; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 2325; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 2326; ZVFHMIN-NEXT: vfadd.vv v8, v10, v12 2327; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 2328; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 2329; ZVFHMIN-NEXT: vse16.v v10, (a0) 2330; ZVFHMIN-NEXT: ret 2331 %a = load <6 x half>, ptr %x 2332 %b = insertelement <6 x half> poison, half %y, i32 0 2333 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer 2334 %d = fadd <6 x half> %a, %c 2335 store <6 x half> %d, ptr %x 2336 ret void 2337} 2338 2339define void @fadd_vf_v4f32(ptr %x, float %y) { 2340; CHECK-LABEL: fadd_vf_v4f32: 2341; CHECK: # %bb.0: 2342; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 2343; CHECK-NEXT: vle32.v v8, (a0) 2344; CHECK-NEXT: vfadd.vf v8, v8, fa0 2345; CHECK-NEXT: vse32.v v8, (a0) 2346; CHECK-NEXT: ret 2347 %a = load <4 x float>, ptr %x 2348 %b = insertelement <4 x float> poison, float %y, i32 0 2349 %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer 2350 %d = fadd <4 x float> %a, %c 2351 store <4 x float> %d, ptr %x 2352 ret void 2353} 2354 2355define void @fadd_vf_v2f64(ptr %x, double %y) { 2356; CHECK-LABEL: fadd_vf_v2f64: 2357; CHECK: # %bb.0: 2358; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 2359; CHECK-NEXT: vle64.v v8, (a0) 2360; CHECK-NEXT: vfadd.vf v8, v8, fa0 2361; CHECK-NEXT: vse64.v v8, (a0) 2362; CHECK-NEXT: ret 2363 %a = load <2 x double>, ptr %x 2364 %b = insertelement <2 x double> poison, double %y, i32 0 2365 %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer 2366 %d = fadd <2 x double> %a, %c 2367 store <2 x double> %d, ptr %x 2368 ret void 2369} 2370 2371define void @fadd_fv_v8bf16(ptr %x, bfloat %y) { 2372; CHECK-LABEL: fadd_fv_v8bf16: 2373; CHECK: # %bb.0: 2374; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 2375; CHECK-NEXT: vle16.v v8, (a0) 2376; CHECK-NEXT: fmv.x.w a1, fa0 2377; CHECK-NEXT: vmv.v.x v9, a1 2378; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 2379; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 2380; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 2381; CHECK-NEXT: vfadd.vv v8, v12, v10 2382; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 2383; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8 2384; CHECK-NEXT: vse16.v v10, (a0) 2385; CHECK-NEXT: ret 2386 %a = load <8 x bfloat>, ptr %x 2387 %b = insertelement <8 x bfloat> poison, bfloat %y, i32 0 2388 %c = shufflevector <8 x bfloat> %b, <8 x bfloat> poison, <8 x i32> zeroinitializer 2389 %d = fadd <8 x bfloat> %c, %a 2390 store <8 x bfloat> %d, ptr %x 2391 ret void 2392} 2393 2394define void @fadd_fv_v6bf16(ptr %x, bfloat %y) { 2395; CHECK-LABEL: fadd_fv_v6bf16: 2396; CHECK: # %bb.0: 2397; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma 2398; CHECK-NEXT: vle16.v v8, (a0) 2399; CHECK-NEXT: fmv.x.w a1, fa0 2400; CHECK-NEXT: vmv.v.x v9, a1 2401; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 2402; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 2403; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 2404; CHECK-NEXT: vfadd.vv v8, v12, v10 2405; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 2406; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8 2407; CHECK-NEXT: vse16.v v10, (a0) 2408; CHECK-NEXT: ret 2409 %a = load <6 x bfloat>, ptr %x 2410 %b = insertelement <6 x bfloat> poison, bfloat %y, i32 0 2411 %c = shufflevector <6 x bfloat> %b, <6 x bfloat> poison, <6 x i32> zeroinitializer 2412 %d = fadd <6 x bfloat> %c, %a 2413 store <6 x bfloat> %d, ptr %x 2414 ret void 2415} 2416 2417define void @fadd_fv_v8f16(ptr %x, half %y) { 2418; ZVFH-LABEL: fadd_fv_v8f16: 2419; ZVFH: # %bb.0: 2420; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma 2421; ZVFH-NEXT: vle16.v v8, (a0) 2422; ZVFH-NEXT: vfadd.vf v8, v8, fa0 2423; ZVFH-NEXT: vse16.v v8, (a0) 2424; ZVFH-NEXT: ret 2425; 2426; ZVFHMIN-LABEL: fadd_fv_v8f16: 2427; ZVFHMIN: # %bb.0: 2428; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma 2429; ZVFHMIN-NEXT: vle16.v v8, (a0) 2430; ZVFHMIN-NEXT: fmv.x.w a1, fa0 2431; ZVFHMIN-NEXT: vmv.v.x v9, a1 2432; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 2433; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 2434; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 2435; ZVFHMIN-NEXT: vfadd.vv v8, v12, v10 2436; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 2437; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 2438; ZVFHMIN-NEXT: vse16.v v10, (a0) 2439; ZVFHMIN-NEXT: ret 2440 %a = load <8 x half>, ptr %x 2441 %b = insertelement <8 x half> poison, half %y, i32 0 2442 %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer 2443 %d = fadd <8 x half> %c, %a 2444 store <8 x half> %d, ptr %x 2445 ret void 2446} 2447 2448define void @fadd_fv_v6f16(ptr %x, half %y) { 2449; ZVFH-LABEL: fadd_fv_v6f16: 2450; ZVFH: # %bb.0: 2451; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma 2452; ZVFH-NEXT: vle16.v v8, (a0) 2453; ZVFH-NEXT: vfadd.vf v8, v8, fa0 2454; ZVFH-NEXT: vse16.v v8, (a0) 2455; ZVFH-NEXT: ret 2456; 2457; ZVFHMIN-LABEL: fadd_fv_v6f16: 2458; ZVFHMIN: # %bb.0: 2459; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma 2460; ZVFHMIN-NEXT: vle16.v v8, (a0) 2461; ZVFHMIN-NEXT: fmv.x.w a1, fa0 2462; ZVFHMIN-NEXT: vmv.v.x v9, a1 2463; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 2464; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 2465; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 2466; ZVFHMIN-NEXT: vfadd.vv v8, v12, v10 2467; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 2468; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 2469; ZVFHMIN-NEXT: vse16.v v10, (a0) 2470; ZVFHMIN-NEXT: ret 2471 %a = load <6 x half>, ptr %x 2472 %b = insertelement <6 x half> poison, half %y, i32 0 2473 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer 2474 %d = fadd <6 x half> %c, %a 2475 store <6 x half> %d, ptr %x 2476 ret void 2477} 2478 2479define void @fadd_fv_v4f32(ptr %x, float %y) { 2480; CHECK-LABEL: fadd_fv_v4f32: 2481; CHECK: # %bb.0: 2482; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 2483; CHECK-NEXT: vle32.v v8, (a0) 2484; CHECK-NEXT: vfadd.vf v8, v8, fa0 2485; CHECK-NEXT: vse32.v v8, (a0) 2486; CHECK-NEXT: ret 2487 %a = load <4 x float>, ptr %x 2488 %b = insertelement <4 x float> poison, float %y, i32 0 2489 %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer 2490 %d = fadd <4 x float> %c, %a 2491 store <4 x float> %d, ptr %x 2492 ret void 2493} 2494 2495define void @fadd_fv_v2f64(ptr %x, double %y) { 2496; CHECK-LABEL: fadd_fv_v2f64: 2497; CHECK: # %bb.0: 2498; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 2499; CHECK-NEXT: vle64.v v8, (a0) 2500; CHECK-NEXT: vfadd.vf v8, v8, fa0 2501; CHECK-NEXT: vse64.v v8, (a0) 2502; CHECK-NEXT: ret 2503 %a = load <2 x double>, ptr %x 2504 %b = insertelement <2 x double> poison, double %y, i32 0 2505 %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer 2506 %d = fadd <2 x double> %c, %a 2507 store <2 x double> %d, ptr %x 2508 ret void 2509} 2510 2511define void @fsub_vf_v8bf16(ptr %x, bfloat %y) { 2512; CHECK-LABEL: fsub_vf_v8bf16: 2513; CHECK: # %bb.0: 2514; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 2515; CHECK-NEXT: vle16.v v8, (a0) 2516; CHECK-NEXT: fmv.x.w a1, fa0 2517; CHECK-NEXT: vmv.v.x v9, a1 2518; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 2519; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 2520; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 2521; CHECK-NEXT: vfsub.vv v8, v10, v12 2522; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 2523; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8 2524; CHECK-NEXT: vse16.v v10, (a0) 2525; CHECK-NEXT: ret 2526 %a = load <8 x bfloat>, ptr %x 2527 %b = insertelement <8 x bfloat> poison, bfloat %y, i32 0 2528 %c = shufflevector <8 x bfloat> %b, <8 x bfloat> poison, <8 x i32> zeroinitializer 2529 %d = fsub <8 x bfloat> %a, %c 2530 store <8 x bfloat> %d, ptr %x 2531 ret void 2532} 2533 2534define void @fsub_vf_v6bf16(ptr %x, bfloat %y) { 2535; CHECK-LABEL: fsub_vf_v6bf16: 2536; CHECK: # %bb.0: 2537; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma 2538; CHECK-NEXT: vle16.v v8, (a0) 2539; CHECK-NEXT: fmv.x.w a1, fa0 2540; CHECK-NEXT: vmv.v.x v9, a1 2541; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 2542; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 2543; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 2544; CHECK-NEXT: vfsub.vv v8, v10, v12 2545; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 2546; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8 2547; CHECK-NEXT: vse16.v v10, (a0) 2548; CHECK-NEXT: ret 2549 %a = load <6 x bfloat>, ptr %x 2550 %b = insertelement <6 x bfloat> poison, bfloat %y, i32 0 2551 %c = shufflevector <6 x bfloat> %b, <6 x bfloat> poison, <6 x i32> zeroinitializer 2552 %d = fsub <6 x bfloat> %a, %c 2553 store <6 x bfloat> %d, ptr %x 2554 ret void 2555} 2556 2557define void @fsub_vf_v8f16(ptr %x, half %y) { 2558; ZVFH-LABEL: fsub_vf_v8f16: 2559; ZVFH: # %bb.0: 2560; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma 2561; ZVFH-NEXT: vle16.v v8, (a0) 2562; ZVFH-NEXT: vfsub.vf v8, v8, fa0 2563; ZVFH-NEXT: vse16.v v8, (a0) 2564; ZVFH-NEXT: ret 2565; 2566; ZVFHMIN-LABEL: fsub_vf_v8f16: 2567; ZVFHMIN: # %bb.0: 2568; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma 2569; ZVFHMIN-NEXT: vle16.v v8, (a0) 2570; ZVFHMIN-NEXT: fmv.x.w a1, fa0 2571; ZVFHMIN-NEXT: vmv.v.x v9, a1 2572; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 2573; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 2574; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 2575; ZVFHMIN-NEXT: vfsub.vv v8, v10, v12 2576; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 2577; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 2578; ZVFHMIN-NEXT: vse16.v v10, (a0) 2579; ZVFHMIN-NEXT: ret 2580 %a = load <8 x half>, ptr %x 2581 %b = insertelement <8 x half> poison, half %y, i32 0 2582 %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer 2583 %d = fsub <8 x half> %a, %c 2584 store <8 x half> %d, ptr %x 2585 ret void 2586} 2587 2588define void @fsub_vf_v6f16(ptr %x, half %y) { 2589; ZVFH-LABEL: fsub_vf_v6f16: 2590; ZVFH: # %bb.0: 2591; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma 2592; ZVFH-NEXT: vle16.v v8, (a0) 2593; ZVFH-NEXT: vfsub.vf v8, v8, fa0 2594; ZVFH-NEXT: vse16.v v8, (a0) 2595; ZVFH-NEXT: ret 2596; 2597; ZVFHMIN-LABEL: fsub_vf_v6f16: 2598; ZVFHMIN: # %bb.0: 2599; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma 2600; ZVFHMIN-NEXT: vle16.v v8, (a0) 2601; ZVFHMIN-NEXT: fmv.x.w a1, fa0 2602; ZVFHMIN-NEXT: vmv.v.x v9, a1 2603; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 2604; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 2605; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 2606; ZVFHMIN-NEXT: vfsub.vv v8, v10, v12 2607; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 2608; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 2609; ZVFHMIN-NEXT: vse16.v v10, (a0) 2610; ZVFHMIN-NEXT: ret 2611 %a = load <6 x half>, ptr %x 2612 %b = insertelement <6 x half> poison, half %y, i32 0 2613 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer 2614 %d = fsub <6 x half> %a, %c 2615 store <6 x half> %d, ptr %x 2616 ret void 2617} 2618 2619define void @fsub_vf_v4f32(ptr %x, float %y) { 2620; CHECK-LABEL: fsub_vf_v4f32: 2621; CHECK: # %bb.0: 2622; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 2623; CHECK-NEXT: vle32.v v8, (a0) 2624; CHECK-NEXT: vfsub.vf v8, v8, fa0 2625; CHECK-NEXT: vse32.v v8, (a0) 2626; CHECK-NEXT: ret 2627 %a = load <4 x float>, ptr %x 2628 %b = insertelement <4 x float> poison, float %y, i32 0 2629 %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer 2630 %d = fsub <4 x float> %a, %c 2631 store <4 x float> %d, ptr %x 2632 ret void 2633} 2634 2635define void @fsub_vf_v2f64(ptr %x, double %y) { 2636; CHECK-LABEL: fsub_vf_v2f64: 2637; CHECK: # %bb.0: 2638; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 2639; CHECK-NEXT: vle64.v v8, (a0) 2640; CHECK-NEXT: vfsub.vf v8, v8, fa0 2641; CHECK-NEXT: vse64.v v8, (a0) 2642; CHECK-NEXT: ret 2643 %a = load <2 x double>, ptr %x 2644 %b = insertelement <2 x double> poison, double %y, i32 0 2645 %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer 2646 %d = fsub <2 x double> %a, %c 2647 store <2 x double> %d, ptr %x 2648 ret void 2649} 2650 2651define void @fsub_fv_v8bf16(ptr %x, bfloat %y) { 2652; CHECK-LABEL: fsub_fv_v8bf16: 2653; CHECK: # %bb.0: 2654; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 2655; CHECK-NEXT: vle16.v v8, (a0) 2656; CHECK-NEXT: fmv.x.w a1, fa0 2657; CHECK-NEXT: vmv.v.x v9, a1 2658; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 2659; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 2660; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 2661; CHECK-NEXT: vfsub.vv v8, v12, v10 2662; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 2663; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8 2664; CHECK-NEXT: vse16.v v10, (a0) 2665; CHECK-NEXT: ret 2666 %a = load <8 x bfloat>, ptr %x 2667 %b = insertelement <8 x bfloat> poison, bfloat %y, i32 0 2668 %c = shufflevector <8 x bfloat> %b, <8 x bfloat> poison, <8 x i32> zeroinitializer 2669 %d = fsub <8 x bfloat> %c, %a 2670 store <8 x bfloat> %d, ptr %x 2671 ret void 2672} 2673 2674define void @fsub_fv_v6bf16(ptr %x, bfloat %y) { 2675; CHECK-LABEL: fsub_fv_v6bf16: 2676; CHECK: # %bb.0: 2677; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma 2678; CHECK-NEXT: vle16.v v8, (a0) 2679; CHECK-NEXT: fmv.x.w a1, fa0 2680; CHECK-NEXT: vmv.v.x v9, a1 2681; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 2682; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 2683; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 2684; CHECK-NEXT: vfsub.vv v8, v12, v10 2685; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 2686; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8 2687; CHECK-NEXT: vse16.v v10, (a0) 2688; CHECK-NEXT: ret 2689 %a = load <6 x bfloat>, ptr %x 2690 %b = insertelement <6 x bfloat> poison, bfloat %y, i32 0 2691 %c = shufflevector <6 x bfloat> %b, <6 x bfloat> poison, <6 x i32> zeroinitializer 2692 %d = fsub <6 x bfloat> %c, %a 2693 store <6 x bfloat> %d, ptr %x 2694 ret void 2695} 2696 2697define void @fsub_fv_v8f16(ptr %x, half %y) { 2698; ZVFH-LABEL: fsub_fv_v8f16: 2699; ZVFH: # %bb.0: 2700; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma 2701; ZVFH-NEXT: vle16.v v8, (a0) 2702; ZVFH-NEXT: vfrsub.vf v8, v8, fa0 2703; ZVFH-NEXT: vse16.v v8, (a0) 2704; ZVFH-NEXT: ret 2705; 2706; ZVFHMIN-LABEL: fsub_fv_v8f16: 2707; ZVFHMIN: # %bb.0: 2708; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma 2709; ZVFHMIN-NEXT: vle16.v v8, (a0) 2710; ZVFHMIN-NEXT: fmv.x.w a1, fa0 2711; ZVFHMIN-NEXT: vmv.v.x v9, a1 2712; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 2713; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 2714; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 2715; ZVFHMIN-NEXT: vfsub.vv v8, v12, v10 2716; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 2717; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 2718; ZVFHMIN-NEXT: vse16.v v10, (a0) 2719; ZVFHMIN-NEXT: ret 2720 %a = load <8 x half>, ptr %x 2721 %b = insertelement <8 x half> poison, half %y, i32 0 2722 %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer 2723 %d = fsub <8 x half> %c, %a 2724 store <8 x half> %d, ptr %x 2725 ret void 2726} 2727 2728define void @fsub_fv_v6f16(ptr %x, half %y) { 2729; ZVFH-LABEL: fsub_fv_v6f16: 2730; ZVFH: # %bb.0: 2731; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma 2732; ZVFH-NEXT: vle16.v v8, (a0) 2733; ZVFH-NEXT: vfrsub.vf v8, v8, fa0 2734; ZVFH-NEXT: vse16.v v8, (a0) 2735; ZVFH-NEXT: ret 2736; 2737; ZVFHMIN-LABEL: fsub_fv_v6f16: 2738; ZVFHMIN: # %bb.0: 2739; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma 2740; ZVFHMIN-NEXT: vle16.v v8, (a0) 2741; ZVFHMIN-NEXT: fmv.x.w a1, fa0 2742; ZVFHMIN-NEXT: vmv.v.x v9, a1 2743; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 2744; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 2745; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 2746; ZVFHMIN-NEXT: vfsub.vv v8, v12, v10 2747; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 2748; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 2749; ZVFHMIN-NEXT: vse16.v v10, (a0) 2750; ZVFHMIN-NEXT: ret 2751 %a = load <6 x half>, ptr %x 2752 %b = insertelement <6 x half> poison, half %y, i32 0 2753 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer 2754 %d = fsub <6 x half> %c, %a 2755 store <6 x half> %d, ptr %x 2756 ret void 2757} 2758 2759define void @fsub_fv_v4f32(ptr %x, float %y) { 2760; CHECK-LABEL: fsub_fv_v4f32: 2761; CHECK: # %bb.0: 2762; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 2763; CHECK-NEXT: vle32.v v8, (a0) 2764; CHECK-NEXT: vfrsub.vf v8, v8, fa0 2765; CHECK-NEXT: vse32.v v8, (a0) 2766; CHECK-NEXT: ret 2767 %a = load <4 x float>, ptr %x 2768 %b = insertelement <4 x float> poison, float %y, i32 0 2769 %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer 2770 %d = fsub <4 x float> %c, %a 2771 store <4 x float> %d, ptr %x 2772 ret void 2773} 2774 2775define void @fsub_fv_v2f64(ptr %x, double %y) { 2776; CHECK-LABEL: fsub_fv_v2f64: 2777; CHECK: # %bb.0: 2778; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 2779; CHECK-NEXT: vle64.v v8, (a0) 2780; CHECK-NEXT: vfrsub.vf v8, v8, fa0 2781; CHECK-NEXT: vse64.v v8, (a0) 2782; CHECK-NEXT: ret 2783 %a = load <2 x double>, ptr %x 2784 %b = insertelement <2 x double> poison, double %y, i32 0 2785 %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer 2786 %d = fsub <2 x double> %c, %a 2787 store <2 x double> %d, ptr %x 2788 ret void 2789} 2790 2791define void @fmul_vf_v8bf16(ptr %x, bfloat %y) { 2792; CHECK-LABEL: fmul_vf_v8bf16: 2793; CHECK: # %bb.0: 2794; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 2795; CHECK-NEXT: vle16.v v8, (a0) 2796; CHECK-NEXT: fmv.x.w a1, fa0 2797; CHECK-NEXT: vmv.v.x v9, a1 2798; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 2799; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 2800; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 2801; CHECK-NEXT: vfmul.vv v8, v10, v12 2802; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 2803; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8 2804; CHECK-NEXT: vse16.v v10, (a0) 2805; CHECK-NEXT: ret 2806 %a = load <8 x bfloat>, ptr %x 2807 %b = insertelement <8 x bfloat> poison, bfloat %y, i32 0 2808 %c = shufflevector <8 x bfloat> %b, <8 x bfloat> poison, <8 x i32> zeroinitializer 2809 %d = fmul <8 x bfloat> %a, %c 2810 store <8 x bfloat> %d, ptr %x 2811 ret void 2812} 2813 2814define void @fmul_vf_v6bf16(ptr %x, bfloat %y) { 2815; CHECK-LABEL: fmul_vf_v6bf16: 2816; CHECK: # %bb.0: 2817; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma 2818; CHECK-NEXT: vle16.v v8, (a0) 2819; CHECK-NEXT: fmv.x.w a1, fa0 2820; CHECK-NEXT: vmv.v.x v9, a1 2821; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 2822; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 2823; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 2824; CHECK-NEXT: vfmul.vv v8, v10, v12 2825; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 2826; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8 2827; CHECK-NEXT: vse16.v v10, (a0) 2828; CHECK-NEXT: ret 2829 %a = load <6 x bfloat>, ptr %x 2830 %b = insertelement <6 x bfloat> poison, bfloat %y, i32 0 2831 %c = shufflevector <6 x bfloat> %b, <6 x bfloat> poison, <6 x i32> zeroinitializer 2832 %d = fmul <6 x bfloat> %a, %c 2833 store <6 x bfloat> %d, ptr %x 2834 ret void 2835} 2836 2837define void @fmul_vf_v8f16(ptr %x, half %y) { 2838; ZVFH-LABEL: fmul_vf_v8f16: 2839; ZVFH: # %bb.0: 2840; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma 2841; ZVFH-NEXT: vle16.v v8, (a0) 2842; ZVFH-NEXT: vfmul.vf v8, v8, fa0 2843; ZVFH-NEXT: vse16.v v8, (a0) 2844; ZVFH-NEXT: ret 2845; 2846; ZVFHMIN-LABEL: fmul_vf_v8f16: 2847; ZVFHMIN: # %bb.0: 2848; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma 2849; ZVFHMIN-NEXT: vle16.v v8, (a0) 2850; ZVFHMIN-NEXT: fmv.x.w a1, fa0 2851; ZVFHMIN-NEXT: vmv.v.x v9, a1 2852; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 2853; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 2854; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 2855; ZVFHMIN-NEXT: vfmul.vv v8, v10, v12 2856; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 2857; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 2858; ZVFHMIN-NEXT: vse16.v v10, (a0) 2859; ZVFHMIN-NEXT: ret 2860 %a = load <8 x half>, ptr %x 2861 %b = insertelement <8 x half> poison, half %y, i32 0 2862 %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer 2863 %d = fmul <8 x half> %a, %c 2864 store <8 x half> %d, ptr %x 2865 ret void 2866} 2867 2868define void @fmul_vf_v6f16(ptr %x, half %y) { 2869; ZVFH-LABEL: fmul_vf_v6f16: 2870; ZVFH: # %bb.0: 2871; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma 2872; ZVFH-NEXT: vle16.v v8, (a0) 2873; ZVFH-NEXT: vfmul.vf v8, v8, fa0 2874; ZVFH-NEXT: vse16.v v8, (a0) 2875; ZVFH-NEXT: ret 2876; 2877; ZVFHMIN-LABEL: fmul_vf_v6f16: 2878; ZVFHMIN: # %bb.0: 2879; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma 2880; ZVFHMIN-NEXT: vle16.v v8, (a0) 2881; ZVFHMIN-NEXT: fmv.x.w a1, fa0 2882; ZVFHMIN-NEXT: vmv.v.x v9, a1 2883; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 2884; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 2885; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 2886; ZVFHMIN-NEXT: vfmul.vv v8, v10, v12 2887; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 2888; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 2889; ZVFHMIN-NEXT: vse16.v v10, (a0) 2890; ZVFHMIN-NEXT: ret 2891 %a = load <6 x half>, ptr %x 2892 %b = insertelement <6 x half> poison, half %y, i32 0 2893 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer 2894 %d = fmul <6 x half> %a, %c 2895 store <6 x half> %d, ptr %x 2896 ret void 2897} 2898 2899define void @fmul_vf_v4f32(ptr %x, float %y) { 2900; CHECK-LABEL: fmul_vf_v4f32: 2901; CHECK: # %bb.0: 2902; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 2903; CHECK-NEXT: vle32.v v8, (a0) 2904; CHECK-NEXT: vfmul.vf v8, v8, fa0 2905; CHECK-NEXT: vse32.v v8, (a0) 2906; CHECK-NEXT: ret 2907 %a = load <4 x float>, ptr %x 2908 %b = insertelement <4 x float> poison, float %y, i32 0 2909 %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer 2910 %d = fmul <4 x float> %a, %c 2911 store <4 x float> %d, ptr %x 2912 ret void 2913} 2914 2915define void @fmul_vf_v2f64(ptr %x, double %y) { 2916; CHECK-LABEL: fmul_vf_v2f64: 2917; CHECK: # %bb.0: 2918; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 2919; CHECK-NEXT: vle64.v v8, (a0) 2920; CHECK-NEXT: vfmul.vf v8, v8, fa0 2921; CHECK-NEXT: vse64.v v8, (a0) 2922; CHECK-NEXT: ret 2923 %a = load <2 x double>, ptr %x 2924 %b = insertelement <2 x double> poison, double %y, i32 0 2925 %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer 2926 %d = fmul <2 x double> %a, %c 2927 store <2 x double> %d, ptr %x 2928 ret void 2929} 2930 2931define void @fmul_fv_v8bf16(ptr %x, bfloat %y) { 2932; CHECK-LABEL: fmul_fv_v8bf16: 2933; CHECK: # %bb.0: 2934; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 2935; CHECK-NEXT: vle16.v v8, (a0) 2936; CHECK-NEXT: fmv.x.w a1, fa0 2937; CHECK-NEXT: vmv.v.x v9, a1 2938; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 2939; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 2940; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 2941; CHECK-NEXT: vfmul.vv v8, v12, v10 2942; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 2943; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8 2944; CHECK-NEXT: vse16.v v10, (a0) 2945; CHECK-NEXT: ret 2946 %a = load <8 x bfloat>, ptr %x 2947 %b = insertelement <8 x bfloat> poison, bfloat %y, i32 0 2948 %c = shufflevector <8 x bfloat> %b, <8 x bfloat> poison, <8 x i32> zeroinitializer 2949 %d = fmul <8 x bfloat> %c, %a 2950 store <8 x bfloat> %d, ptr %x 2951 ret void 2952} 2953 2954define void @fmul_fv_v6bf16(ptr %x, bfloat %y) { 2955; CHECK-LABEL: fmul_fv_v6bf16: 2956; CHECK: # %bb.0: 2957; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma 2958; CHECK-NEXT: vle16.v v8, (a0) 2959; CHECK-NEXT: fmv.x.w a1, fa0 2960; CHECK-NEXT: vmv.v.x v9, a1 2961; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 2962; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 2963; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 2964; CHECK-NEXT: vfmul.vv v8, v12, v10 2965; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 2966; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8 2967; CHECK-NEXT: vse16.v v10, (a0) 2968; CHECK-NEXT: ret 2969 %a = load <6 x bfloat>, ptr %x 2970 %b = insertelement <6 x bfloat> poison, bfloat %y, i32 0 2971 %c = shufflevector <6 x bfloat> %b, <6 x bfloat> poison, <6 x i32> zeroinitializer 2972 %d = fmul <6 x bfloat> %c, %a 2973 store <6 x bfloat> %d, ptr %x 2974 ret void 2975} 2976 2977define void @fmul_fv_v8f16(ptr %x, half %y) { 2978; ZVFH-LABEL: fmul_fv_v8f16: 2979; ZVFH: # %bb.0: 2980; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma 2981; ZVFH-NEXT: vle16.v v8, (a0) 2982; ZVFH-NEXT: vfmul.vf v8, v8, fa0 2983; ZVFH-NEXT: vse16.v v8, (a0) 2984; ZVFH-NEXT: ret 2985; 2986; ZVFHMIN-LABEL: fmul_fv_v8f16: 2987; ZVFHMIN: # %bb.0: 2988; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma 2989; ZVFHMIN-NEXT: vle16.v v8, (a0) 2990; ZVFHMIN-NEXT: fmv.x.w a1, fa0 2991; ZVFHMIN-NEXT: vmv.v.x v9, a1 2992; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 2993; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 2994; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 2995; ZVFHMIN-NEXT: vfmul.vv v8, v12, v10 2996; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 2997; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 2998; ZVFHMIN-NEXT: vse16.v v10, (a0) 2999; ZVFHMIN-NEXT: ret 3000 %a = load <8 x half>, ptr %x 3001 %b = insertelement <8 x half> poison, half %y, i32 0 3002 %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer 3003 %d = fmul <8 x half> %c, %a 3004 store <8 x half> %d, ptr %x 3005 ret void 3006} 3007 3008define void @fmul_fv_v6f16(ptr %x, half %y) { 3009; ZVFH-LABEL: fmul_fv_v6f16: 3010; ZVFH: # %bb.0: 3011; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma 3012; ZVFH-NEXT: vle16.v v8, (a0) 3013; ZVFH-NEXT: vfmul.vf v8, v8, fa0 3014; ZVFH-NEXT: vse16.v v8, (a0) 3015; ZVFH-NEXT: ret 3016; 3017; ZVFHMIN-LABEL: fmul_fv_v6f16: 3018; ZVFHMIN: # %bb.0: 3019; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma 3020; ZVFHMIN-NEXT: vle16.v v8, (a0) 3021; ZVFHMIN-NEXT: fmv.x.w a1, fa0 3022; ZVFHMIN-NEXT: vmv.v.x v9, a1 3023; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 3024; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 3025; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 3026; ZVFHMIN-NEXT: vfmul.vv v8, v12, v10 3027; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 3028; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 3029; ZVFHMIN-NEXT: vse16.v v10, (a0) 3030; ZVFHMIN-NEXT: ret 3031 %a = load <6 x half>, ptr %x 3032 %b = insertelement <6 x half> poison, half %y, i32 0 3033 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer 3034 %d = fmul <6 x half> %c, %a 3035 store <6 x half> %d, ptr %x 3036 ret void 3037} 3038 3039define void @fmul_fv_v4f32(ptr %x, float %y) { 3040; CHECK-LABEL: fmul_fv_v4f32: 3041; CHECK: # %bb.0: 3042; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 3043; CHECK-NEXT: vle32.v v8, (a0) 3044; CHECK-NEXT: vfmul.vf v8, v8, fa0 3045; CHECK-NEXT: vse32.v v8, (a0) 3046; CHECK-NEXT: ret 3047 %a = load <4 x float>, ptr %x 3048 %b = insertelement <4 x float> poison, float %y, i32 0 3049 %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer 3050 %d = fmul <4 x float> %c, %a 3051 store <4 x float> %d, ptr %x 3052 ret void 3053} 3054 3055define void @fmul_fv_v2f64(ptr %x, double %y) { 3056; CHECK-LABEL: fmul_fv_v2f64: 3057; CHECK: # %bb.0: 3058; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 3059; CHECK-NEXT: vle64.v v8, (a0) 3060; CHECK-NEXT: vfmul.vf v8, v8, fa0 3061; CHECK-NEXT: vse64.v v8, (a0) 3062; CHECK-NEXT: ret 3063 %a = load <2 x double>, ptr %x 3064 %b = insertelement <2 x double> poison, double %y, i32 0 3065 %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer 3066 %d = fmul <2 x double> %c, %a 3067 store <2 x double> %d, ptr %x 3068 ret void 3069} 3070 3071define void @fdiv_vf_v8bf16(ptr %x, bfloat %y) { 3072; CHECK-LABEL: fdiv_vf_v8bf16: 3073; CHECK: # %bb.0: 3074; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 3075; CHECK-NEXT: vle16.v v8, (a0) 3076; CHECK-NEXT: fmv.x.w a1, fa0 3077; CHECK-NEXT: vmv.v.x v9, a1 3078; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 3079; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 3080; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 3081; CHECK-NEXT: vfdiv.vv v8, v10, v12 3082; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 3083; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8 3084; CHECK-NEXT: vse16.v v10, (a0) 3085; CHECK-NEXT: ret 3086 %a = load <8 x bfloat>, ptr %x 3087 %b = insertelement <8 x bfloat> poison, bfloat %y, i32 0 3088 %c = shufflevector <8 x bfloat> %b, <8 x bfloat> poison, <8 x i32> zeroinitializer 3089 %d = fdiv <8 x bfloat> %a, %c 3090 store <8 x bfloat> %d, ptr %x 3091 ret void 3092} 3093 3094define void @fdiv_vf_v6bf16(ptr %x, bfloat %y) { 3095; CHECK-LABEL: fdiv_vf_v6bf16: 3096; CHECK: # %bb.0: 3097; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma 3098; CHECK-NEXT: vle16.v v8, (a0) 3099; CHECK-NEXT: fmv.x.w a1, fa0 3100; CHECK-NEXT: vmv.v.x v9, a1 3101; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 3102; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 3103; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 3104; CHECK-NEXT: vfdiv.vv v8, v10, v12 3105; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 3106; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8 3107; CHECK-NEXT: vse16.v v10, (a0) 3108; CHECK-NEXT: ret 3109 %a = load <6 x bfloat>, ptr %x 3110 %b = insertelement <6 x bfloat> poison, bfloat %y, i32 0 3111 %c = shufflevector <6 x bfloat> %b, <6 x bfloat> poison, <6 x i32> zeroinitializer 3112 %d = fdiv <6 x bfloat> %a, %c 3113 store <6 x bfloat> %d, ptr %x 3114 ret void 3115} 3116 3117define void @fdiv_vf_v8f16(ptr %x, half %y) { 3118; ZVFH-LABEL: fdiv_vf_v8f16: 3119; ZVFH: # %bb.0: 3120; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma 3121; ZVFH-NEXT: vle16.v v8, (a0) 3122; ZVFH-NEXT: vfdiv.vf v8, v8, fa0 3123; ZVFH-NEXT: vse16.v v8, (a0) 3124; ZVFH-NEXT: ret 3125; 3126; ZVFHMIN-LABEL: fdiv_vf_v8f16: 3127; ZVFHMIN: # %bb.0: 3128; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma 3129; ZVFHMIN-NEXT: vle16.v v8, (a0) 3130; ZVFHMIN-NEXT: fmv.x.w a1, fa0 3131; ZVFHMIN-NEXT: vmv.v.x v9, a1 3132; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 3133; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 3134; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 3135; ZVFHMIN-NEXT: vfdiv.vv v8, v10, v12 3136; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 3137; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 3138; ZVFHMIN-NEXT: vse16.v v10, (a0) 3139; ZVFHMIN-NEXT: ret 3140 %a = load <8 x half>, ptr %x 3141 %b = insertelement <8 x half> poison, half %y, i32 0 3142 %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer 3143 %d = fdiv <8 x half> %a, %c 3144 store <8 x half> %d, ptr %x 3145 ret void 3146} 3147 3148define void @fdiv_vf_v6f16(ptr %x, half %y) { 3149; ZVFH-LABEL: fdiv_vf_v6f16: 3150; ZVFH: # %bb.0: 3151; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma 3152; ZVFH-NEXT: vle16.v v8, (a0) 3153; ZVFH-NEXT: vfdiv.vf v8, v8, fa0 3154; ZVFH-NEXT: vse16.v v8, (a0) 3155; ZVFH-NEXT: ret 3156; 3157; ZVFHMIN-LABEL: fdiv_vf_v6f16: 3158; ZVFHMIN: # %bb.0: 3159; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma 3160; ZVFHMIN-NEXT: vle16.v v8, (a0) 3161; ZVFHMIN-NEXT: fmv.x.w a1, fa0 3162; ZVFHMIN-NEXT: vmv.v.x v9, a1 3163; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 3164; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 3165; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 3166; ZVFHMIN-NEXT: vfdiv.vv v8, v10, v12 3167; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 3168; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 3169; ZVFHMIN-NEXT: vse16.v v10, (a0) 3170; ZVFHMIN-NEXT: ret 3171 %a = load <6 x half>, ptr %x 3172 %b = insertelement <6 x half> poison, half %y, i32 0 3173 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer 3174 %d = fdiv <6 x half> %a, %c 3175 store <6 x half> %d, ptr %x 3176 ret void 3177} 3178 3179define void @fdiv_vf_v4f32(ptr %x, float %y) { 3180; CHECK-LABEL: fdiv_vf_v4f32: 3181; CHECK: # %bb.0: 3182; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 3183; CHECK-NEXT: vle32.v v8, (a0) 3184; CHECK-NEXT: vfdiv.vf v8, v8, fa0 3185; CHECK-NEXT: vse32.v v8, (a0) 3186; CHECK-NEXT: ret 3187 %a = load <4 x float>, ptr %x 3188 %b = insertelement <4 x float> poison, float %y, i32 0 3189 %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer 3190 %d = fdiv <4 x float> %a, %c 3191 store <4 x float> %d, ptr %x 3192 ret void 3193} 3194 3195define void @fdiv_vf_v2f64(ptr %x, double %y) { 3196; CHECK-LABEL: fdiv_vf_v2f64: 3197; CHECK: # %bb.0: 3198; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 3199; CHECK-NEXT: vle64.v v8, (a0) 3200; CHECK-NEXT: vfdiv.vf v8, v8, fa0 3201; CHECK-NEXT: vse64.v v8, (a0) 3202; CHECK-NEXT: ret 3203 %a = load <2 x double>, ptr %x 3204 %b = insertelement <2 x double> poison, double %y, i32 0 3205 %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer 3206 %d = fdiv <2 x double> %a, %c 3207 store <2 x double> %d, ptr %x 3208 ret void 3209} 3210 3211define void @fdiv_fv_v8bf16(ptr %x, bfloat %y) { 3212; CHECK-LABEL: fdiv_fv_v8bf16: 3213; CHECK: # %bb.0: 3214; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 3215; CHECK-NEXT: vle16.v v8, (a0) 3216; CHECK-NEXT: fmv.x.w a1, fa0 3217; CHECK-NEXT: vmv.v.x v9, a1 3218; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 3219; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 3220; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 3221; CHECK-NEXT: vfdiv.vv v8, v12, v10 3222; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 3223; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8 3224; CHECK-NEXT: vse16.v v10, (a0) 3225; CHECK-NEXT: ret 3226 %a = load <8 x bfloat>, ptr %x 3227 %b = insertelement <8 x bfloat> poison, bfloat %y, i32 0 3228 %c = shufflevector <8 x bfloat> %b, <8 x bfloat> poison, <8 x i32> zeroinitializer 3229 %d = fdiv <8 x bfloat> %c, %a 3230 store <8 x bfloat> %d, ptr %x 3231 ret void 3232} 3233 3234define void @fdiv_fv_v6bf16(ptr %x, bfloat %y) { 3235; CHECK-LABEL: fdiv_fv_v6bf16: 3236; CHECK: # %bb.0: 3237; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma 3238; CHECK-NEXT: vle16.v v8, (a0) 3239; CHECK-NEXT: fmv.x.w a1, fa0 3240; CHECK-NEXT: vmv.v.x v9, a1 3241; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 3242; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 3243; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 3244; CHECK-NEXT: vfdiv.vv v8, v12, v10 3245; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 3246; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8 3247; CHECK-NEXT: vse16.v v10, (a0) 3248; CHECK-NEXT: ret 3249 %a = load <6 x bfloat>, ptr %x 3250 %b = insertelement <6 x bfloat> poison, bfloat %y, i32 0 3251 %c = shufflevector <6 x bfloat> %b, <6 x bfloat> poison, <6 x i32> zeroinitializer 3252 %d = fdiv <6 x bfloat> %c, %a 3253 store <6 x bfloat> %d, ptr %x 3254 ret void 3255} 3256 3257define void @fdiv_fv_v8f16(ptr %x, half %y) { 3258; ZVFH-LABEL: fdiv_fv_v8f16: 3259; ZVFH: # %bb.0: 3260; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma 3261; ZVFH-NEXT: vle16.v v8, (a0) 3262; ZVFH-NEXT: vfrdiv.vf v8, v8, fa0 3263; ZVFH-NEXT: vse16.v v8, (a0) 3264; ZVFH-NEXT: ret 3265; 3266; ZVFHMIN-LABEL: fdiv_fv_v8f16: 3267; ZVFHMIN: # %bb.0: 3268; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma 3269; ZVFHMIN-NEXT: vle16.v v8, (a0) 3270; ZVFHMIN-NEXT: fmv.x.w a1, fa0 3271; ZVFHMIN-NEXT: vmv.v.x v9, a1 3272; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 3273; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 3274; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 3275; ZVFHMIN-NEXT: vfdiv.vv v8, v12, v10 3276; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 3277; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 3278; ZVFHMIN-NEXT: vse16.v v10, (a0) 3279; ZVFHMIN-NEXT: ret 3280 %a = load <8 x half>, ptr %x 3281 %b = insertelement <8 x half> poison, half %y, i32 0 3282 %c = shufflevector <8 x half> %b, <8 x half> poison, <8 x i32> zeroinitializer 3283 %d = fdiv <8 x half> %c, %a 3284 store <8 x half> %d, ptr %x 3285 ret void 3286} 3287 3288define void @fdiv_fv_v6f16(ptr %x, half %y) { 3289; ZVFH-LABEL: fdiv_fv_v6f16: 3290; ZVFH: # %bb.0: 3291; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma 3292; ZVFH-NEXT: vle16.v v8, (a0) 3293; ZVFH-NEXT: vfrdiv.vf v8, v8, fa0 3294; ZVFH-NEXT: vse16.v v8, (a0) 3295; ZVFH-NEXT: ret 3296; 3297; ZVFHMIN-LABEL: fdiv_fv_v6f16: 3298; ZVFHMIN: # %bb.0: 3299; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma 3300; ZVFHMIN-NEXT: vle16.v v8, (a0) 3301; ZVFHMIN-NEXT: fmv.x.w a1, fa0 3302; ZVFHMIN-NEXT: vmv.v.x v9, a1 3303; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 3304; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 3305; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 3306; ZVFHMIN-NEXT: vfdiv.vv v8, v12, v10 3307; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 3308; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 3309; ZVFHMIN-NEXT: vse16.v v10, (a0) 3310; ZVFHMIN-NEXT: ret 3311 %a = load <6 x half>, ptr %x 3312 %b = insertelement <6 x half> poison, half %y, i32 0 3313 %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer 3314 %d = fdiv <6 x half> %c, %a 3315 store <6 x half> %d, ptr %x 3316 ret void 3317} 3318 3319define void @fdiv_fv_v4f32(ptr %x, float %y) { 3320; CHECK-LABEL: fdiv_fv_v4f32: 3321; CHECK: # %bb.0: 3322; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 3323; CHECK-NEXT: vle32.v v8, (a0) 3324; CHECK-NEXT: vfrdiv.vf v8, v8, fa0 3325; CHECK-NEXT: vse32.v v8, (a0) 3326; CHECK-NEXT: ret 3327 %a = load <4 x float>, ptr %x 3328 %b = insertelement <4 x float> poison, float %y, i32 0 3329 %c = shufflevector <4 x float> %b, <4 x float> poison, <4 x i32> zeroinitializer 3330 %d = fdiv <4 x float> %c, %a 3331 store <4 x float> %d, ptr %x 3332 ret void 3333} 3334 3335define void @fdiv_fv_v2f64(ptr %x, double %y) { 3336; CHECK-LABEL: fdiv_fv_v2f64: 3337; CHECK: # %bb.0: 3338; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 3339; CHECK-NEXT: vle64.v v8, (a0) 3340; CHECK-NEXT: vfrdiv.vf v8, v8, fa0 3341; CHECK-NEXT: vse64.v v8, (a0) 3342; CHECK-NEXT: ret 3343 %a = load <2 x double>, ptr %x 3344 %b = insertelement <2 x double> poison, double %y, i32 0 3345 %c = shufflevector <2 x double> %b, <2 x double> poison, <2 x i32> zeroinitializer 3346 %d = fdiv <2 x double> %c, %a 3347 store <2 x double> %d, ptr %x 3348 ret void 3349} 3350 3351define void @fma_vf_v8bf16(ptr %x, ptr %y, bfloat %z) { 3352; CHECK-LABEL: fma_vf_v8bf16: 3353; CHECK: # %bb.0: 3354; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 3355; CHECK-NEXT: vle16.v v8, (a1) 3356; CHECK-NEXT: vle16.v v9, (a0) 3357; CHECK-NEXT: fmv.x.w a1, fa0 3358; CHECK-NEXT: vmv.v.x v10, a1 3359; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 3360; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v9 3361; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10 3362; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 3363; CHECK-NEXT: vfmadd.vv v8, v14, v12 3364; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 3365; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8 3366; CHECK-NEXT: vse16.v v10, (a0) 3367; CHECK-NEXT: ret 3368 %a = load <8 x bfloat>, ptr %x 3369 %b = load <8 x bfloat>, ptr %y 3370 %c = insertelement <8 x bfloat> poison, bfloat %z, i32 0 3371 %d = shufflevector <8 x bfloat> %c, <8 x bfloat> poison, <8 x i32> zeroinitializer 3372 %e = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %a, <8 x bfloat> %d, <8 x bfloat> %b) 3373 store <8 x bfloat> %e, ptr %x 3374 ret void 3375} 3376 3377define void @fma_vf_v6bf16(ptr %x, ptr %y, bfloat %z) { 3378; CHECK-LABEL: fma_vf_v6bf16: 3379; CHECK: # %bb.0: 3380; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma 3381; CHECK-NEXT: vle16.v v8, (a1) 3382; CHECK-NEXT: vle16.v v9, (a0) 3383; CHECK-NEXT: fmv.x.w a1, fa0 3384; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 3385; CHECK-NEXT: vmv.v.x v10, a1 3386; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 3387; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v9 3388; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10 3389; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 3390; CHECK-NEXT: vfmadd.vv v8, v14, v12 3391; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma 3392; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8 3393; CHECK-NEXT: vse16.v v10, (a0) 3394; CHECK-NEXT: ret 3395 %a = load <6 x bfloat>, ptr %x 3396 %b = load <6 x bfloat>, ptr %y 3397 %c = insertelement <6 x bfloat> poison, bfloat %z, i32 0 3398 %d = shufflevector <6 x bfloat> %c, <6 x bfloat> poison, <6 x i32> zeroinitializer 3399 %e = call <6 x bfloat> @llvm.fma.v6bf16(<6 x bfloat> %a, <6 x bfloat> %d, <6 x bfloat> %b) 3400 store <6 x bfloat> %e, ptr %x 3401 ret void 3402} 3403 3404define void @fma_vf_v8f16(ptr %x, ptr %y, half %z) { 3405; ZVFH-LABEL: fma_vf_v8f16: 3406; ZVFH: # %bb.0: 3407; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma 3408; ZVFH-NEXT: vle16.v v8, (a0) 3409; ZVFH-NEXT: vle16.v v9, (a1) 3410; ZVFH-NEXT: vfmacc.vf v9, fa0, v8 3411; ZVFH-NEXT: vse16.v v9, (a0) 3412; ZVFH-NEXT: ret 3413; 3414; ZVFHMIN-LABEL: fma_vf_v8f16: 3415; ZVFHMIN: # %bb.0: 3416; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma 3417; ZVFHMIN-NEXT: vle16.v v8, (a1) 3418; ZVFHMIN-NEXT: vle16.v v9, (a0) 3419; ZVFHMIN-NEXT: fmv.x.w a1, fa0 3420; ZVFHMIN-NEXT: vmv.v.x v10, a1 3421; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 3422; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9 3423; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 3424; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 3425; ZVFHMIN-NEXT: vfmadd.vv v8, v14, v12 3426; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 3427; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 3428; ZVFHMIN-NEXT: vse16.v v10, (a0) 3429; ZVFHMIN-NEXT: ret 3430 %a = load <8 x half>, ptr %x 3431 %b = load <8 x half>, ptr %y 3432 %c = insertelement <8 x half> poison, half %z, i32 0 3433 %d = shufflevector <8 x half> %c, <8 x half> poison, <8 x i32> zeroinitializer 3434 %e = call <8 x half> @llvm.fma.v8f16(<8 x half> %a, <8 x half> %d, <8 x half> %b) 3435 store <8 x half> %e, ptr %x 3436 ret void 3437} 3438 3439define void @fma_vf_v6f16(ptr %x, ptr %y, half %z) { 3440; ZVFH-LABEL: fma_vf_v6f16: 3441; ZVFH: # %bb.0: 3442; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma 3443; ZVFH-NEXT: vle16.v v8, (a0) 3444; ZVFH-NEXT: vle16.v v9, (a1) 3445; ZVFH-NEXT: vfmacc.vf v9, fa0, v8 3446; ZVFH-NEXT: vse16.v v9, (a0) 3447; ZVFH-NEXT: ret 3448; 3449; ZVFHMIN-LABEL: fma_vf_v6f16: 3450; ZVFHMIN: # %bb.0: 3451; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma 3452; ZVFHMIN-NEXT: vle16.v v8, (a1) 3453; ZVFHMIN-NEXT: vle16.v v9, (a0) 3454; ZVFHMIN-NEXT: fmv.x.w a1, fa0 3455; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma 3456; ZVFHMIN-NEXT: vmv.v.x v10, a1 3457; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 3458; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9 3459; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 3460; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 3461; ZVFHMIN-NEXT: vfmadd.vv v8, v14, v12 3462; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma 3463; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 3464; ZVFHMIN-NEXT: vse16.v v10, (a0) 3465; ZVFHMIN-NEXT: ret 3466 %a = load <6 x half>, ptr %x 3467 %b = load <6 x half>, ptr %y 3468 %c = insertelement <6 x half> poison, half %z, i32 0 3469 %d = shufflevector <6 x half> %c, <6 x half> poison, <6 x i32> zeroinitializer 3470 %e = call <6 x half> @llvm.fma.v6f16(<6 x half> %a, <6 x half> %d, <6 x half> %b) 3471 store <6 x half> %e, ptr %x 3472 ret void 3473} 3474 3475define void @fma_vf_v4f32(ptr %x, ptr %y, float %z) { 3476; CHECK-LABEL: fma_vf_v4f32: 3477; CHECK: # %bb.0: 3478; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 3479; CHECK-NEXT: vle32.v v8, (a0) 3480; CHECK-NEXT: vle32.v v9, (a1) 3481; CHECK-NEXT: vfmacc.vf v9, fa0, v8 3482; CHECK-NEXT: vse32.v v9, (a0) 3483; CHECK-NEXT: ret 3484 %a = load <4 x float>, ptr %x 3485 %b = load <4 x float>, ptr %y 3486 %c = insertelement <4 x float> poison, float %z, i32 0 3487 %d = shufflevector <4 x float> %c, <4 x float> poison, <4 x i32> zeroinitializer 3488 %e = call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %d, <4 x float> %b) 3489 store <4 x float> %e, ptr %x 3490 ret void 3491} 3492 3493define void @fma_vf_v2f64(ptr %x, ptr %y, double %z) { 3494; CHECK-LABEL: fma_vf_v2f64: 3495; CHECK: # %bb.0: 3496; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 3497; CHECK-NEXT: vle64.v v8, (a0) 3498; CHECK-NEXT: vle64.v v9, (a1) 3499; CHECK-NEXT: vfmacc.vf v9, fa0, v8 3500; CHECK-NEXT: vse64.v v9, (a0) 3501; CHECK-NEXT: ret 3502 %a = load <2 x double>, ptr %x 3503 %b = load <2 x double>, ptr %y 3504 %c = insertelement <2 x double> poison, double %z, i32 0 3505 %d = shufflevector <2 x double> %c, <2 x double> poison, <2 x i32> zeroinitializer 3506 %e = call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %d, <2 x double> %b) 3507 store <2 x double> %e, ptr %x 3508 ret void 3509} 3510 3511define void @fma_fv_v8bf16(ptr %x, ptr %y, bfloat %z) { 3512; CHECK-LABEL: fma_fv_v8bf16: 3513; CHECK: # %bb.0: 3514; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 3515; CHECK-NEXT: vle16.v v8, (a1) 3516; CHECK-NEXT: vle16.v v9, (a0) 3517; CHECK-NEXT: fmv.x.w a1, fa0 3518; CHECK-NEXT: vmv.v.x v10, a1 3519; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 3520; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v9 3521; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10 3522; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 3523; CHECK-NEXT: vfmadd.vv v8, v14, v12 3524; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 3525; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8 3526; CHECK-NEXT: vse16.v v10, (a0) 3527; CHECK-NEXT: ret 3528 %a = load <8 x bfloat>, ptr %x 3529 %b = load <8 x bfloat>, ptr %y 3530 %c = insertelement <8 x bfloat> poison, bfloat %z, i32 0 3531 %d = shufflevector <8 x bfloat> %c, <8 x bfloat> poison, <8 x i32> zeroinitializer 3532 %e = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %d, <8 x bfloat> %a, <8 x bfloat> %b) 3533 store <8 x bfloat> %e, ptr %x 3534 ret void 3535} 3536 3537define void @fma_fv_v6bf16(ptr %x, ptr %y, bfloat %z) { 3538; CHECK-LABEL: fma_fv_v6bf16: 3539; CHECK: # %bb.0: 3540; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma 3541; CHECK-NEXT: vle16.v v8, (a1) 3542; CHECK-NEXT: vle16.v v9, (a0) 3543; CHECK-NEXT: fmv.x.w a1, fa0 3544; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 3545; CHECK-NEXT: vmv.v.x v10, a1 3546; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 3547; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v9 3548; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10 3549; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 3550; CHECK-NEXT: vfmadd.vv v8, v14, v12 3551; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma 3552; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8 3553; CHECK-NEXT: vse16.v v10, (a0) 3554; CHECK-NEXT: ret 3555 %a = load <6 x bfloat>, ptr %x 3556 %b = load <6 x bfloat>, ptr %y 3557 %c = insertelement <6 x bfloat> poison, bfloat %z, i32 0 3558 %d = shufflevector <6 x bfloat> %c, <6 x bfloat> poison, <6 x i32> zeroinitializer 3559 %e = call <6 x bfloat> @llvm.fma.v6bf16(<6 x bfloat> %d, <6 x bfloat> %a, <6 x bfloat> %b) 3560 store <6 x bfloat> %e, ptr %x 3561 ret void 3562} 3563 3564define void @fma_fv_v8f16(ptr %x, ptr %y, half %z) { 3565; ZVFH-LABEL: fma_fv_v8f16: 3566; ZVFH: # %bb.0: 3567; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma 3568; ZVFH-NEXT: vle16.v v8, (a0) 3569; ZVFH-NEXT: vle16.v v9, (a1) 3570; ZVFH-NEXT: vfmacc.vf v9, fa0, v8 3571; ZVFH-NEXT: vse16.v v9, (a0) 3572; ZVFH-NEXT: ret 3573; 3574; ZVFHMIN-LABEL: fma_fv_v8f16: 3575; ZVFHMIN: # %bb.0: 3576; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma 3577; ZVFHMIN-NEXT: vle16.v v8, (a1) 3578; ZVFHMIN-NEXT: vle16.v v9, (a0) 3579; ZVFHMIN-NEXT: fmv.x.w a1, fa0 3580; ZVFHMIN-NEXT: vmv.v.x v10, a1 3581; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 3582; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9 3583; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 3584; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 3585; ZVFHMIN-NEXT: vfmadd.vv v8, v14, v12 3586; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 3587; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 3588; ZVFHMIN-NEXT: vse16.v v10, (a0) 3589; ZVFHMIN-NEXT: ret 3590 %a = load <8 x half>, ptr %x 3591 %b = load <8 x half>, ptr %y 3592 %c = insertelement <8 x half> poison, half %z, i32 0 3593 %d = shufflevector <8 x half> %c, <8 x half> poison, <8 x i32> zeroinitializer 3594 %e = call <8 x half> @llvm.fma.v8f16(<8 x half> %d, <8 x half> %a, <8 x half> %b) 3595 store <8 x half> %e, ptr %x 3596 ret void 3597} 3598 3599define void @fma_fv_v6f16(ptr %x, ptr %y, half %z) { 3600; ZVFH-LABEL: fma_fv_v6f16: 3601; ZVFH: # %bb.0: 3602; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma 3603; ZVFH-NEXT: vle16.v v8, (a0) 3604; ZVFH-NEXT: vle16.v v9, (a1) 3605; ZVFH-NEXT: vfmacc.vf v9, fa0, v8 3606; ZVFH-NEXT: vse16.v v9, (a0) 3607; ZVFH-NEXT: ret 3608; 3609; ZVFHMIN-LABEL: fma_fv_v6f16: 3610; ZVFHMIN: # %bb.0: 3611; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma 3612; ZVFHMIN-NEXT: vle16.v v8, (a1) 3613; ZVFHMIN-NEXT: vle16.v v9, (a0) 3614; ZVFHMIN-NEXT: fmv.x.w a1, fa0 3615; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma 3616; ZVFHMIN-NEXT: vmv.v.x v10, a1 3617; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 3618; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9 3619; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 3620; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 3621; ZVFHMIN-NEXT: vfmadd.vv v8, v14, v12 3622; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma 3623; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 3624; ZVFHMIN-NEXT: vse16.v v10, (a0) 3625; ZVFHMIN-NEXT: ret 3626 %a = load <6 x half>, ptr %x 3627 %b = load <6 x half>, ptr %y 3628 %c = insertelement <6 x half> poison, half %z, i32 0 3629 %d = shufflevector <6 x half> %c, <6 x half> poison, <6 x i32> zeroinitializer 3630 %e = call <6 x half> @llvm.fma.v6f16(<6 x half> %d, <6 x half> %a, <6 x half> %b) 3631 store <6 x half> %e, ptr %x 3632 ret void 3633} 3634 3635define void @fma_fv_v4f32(ptr %x, ptr %y, float %z) { 3636; CHECK-LABEL: fma_fv_v4f32: 3637; CHECK: # %bb.0: 3638; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 3639; CHECK-NEXT: vle32.v v8, (a0) 3640; CHECK-NEXT: vle32.v v9, (a1) 3641; CHECK-NEXT: vfmacc.vf v9, fa0, v8 3642; CHECK-NEXT: vse32.v v9, (a0) 3643; CHECK-NEXT: ret 3644 %a = load <4 x float>, ptr %x 3645 %b = load <4 x float>, ptr %y 3646 %c = insertelement <4 x float> poison, float %z, i32 0 3647 %d = shufflevector <4 x float> %c, <4 x float> poison, <4 x i32> zeroinitializer 3648 %e = call <4 x float> @llvm.fma.v4f32(<4 x float> %d, <4 x float> %a, <4 x float> %b) 3649 store <4 x float> %e, ptr %x 3650 ret void 3651} 3652 3653define void @fma_fv_v2f64(ptr %x, ptr %y, double %z) { 3654; CHECK-LABEL: fma_fv_v2f64: 3655; CHECK: # %bb.0: 3656; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 3657; CHECK-NEXT: vle64.v v8, (a0) 3658; CHECK-NEXT: vle64.v v9, (a1) 3659; CHECK-NEXT: vfmacc.vf v9, fa0, v8 3660; CHECK-NEXT: vse64.v v9, (a0) 3661; CHECK-NEXT: ret 3662 %a = load <2 x double>, ptr %x 3663 %b = load <2 x double>, ptr %y 3664 %c = insertelement <2 x double> poison, double %z, i32 0 3665 %d = shufflevector <2 x double> %c, <2 x double> poison, <2 x i32> zeroinitializer 3666 %e = call <2 x double> @llvm.fma.v2f64(<2 x double> %d, <2 x double> %a, <2 x double> %b) 3667 store <2 x double> %e, ptr %x 3668 ret void 3669} 3670 3671define void @fmsub_vf_v8bf16(ptr %x, ptr %y, bfloat %z) { 3672; CHECK-LABEL: fmsub_vf_v8bf16: 3673; CHECK: # %bb.0: 3674; CHECK-NEXT: fmv.x.w a2, fa0 3675; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 3676; CHECK-NEXT: vle16.v v8, (a1) 3677; CHECK-NEXT: vle16.v v9, (a0) 3678; CHECK-NEXT: lui a1, 8 3679; CHECK-NEXT: vmv.v.x v10, a2 3680; CHECK-NEXT: vxor.vx v8, v8, a1 3681; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 3682; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v8 3683; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10 3684; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 3685; CHECK-NEXT: vfmadd.vv v8, v12, v14 3686; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 3687; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8 3688; CHECK-NEXT: vse16.v v10, (a0) 3689; CHECK-NEXT: ret 3690 %a = load <8 x bfloat>, ptr %x 3691 %b = load <8 x bfloat>, ptr %y 3692 %c = insertelement <8 x bfloat> poison, bfloat %z, i32 0 3693 %d = shufflevector <8 x bfloat> %c, <8 x bfloat> poison, <8 x i32> zeroinitializer 3694 %neg = fneg <8 x bfloat> %b 3695 %e = call <8 x bfloat> @llvm.fma.v8bf16(<8 x bfloat> %a, <8 x bfloat> %d, <8 x bfloat> %neg) 3696 store <8 x bfloat> %e, ptr %x 3697 ret void 3698} 3699 3700define void @fmsub_vf_v6bf16(ptr %x, ptr %y, bfloat %z) { 3701; CHECK-LABEL: fmsub_vf_v6bf16: 3702; CHECK: # %bb.0: 3703; CHECK-NEXT: fmv.x.w a2, fa0 3704; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma 3705; CHECK-NEXT: vle16.v v8, (a1) 3706; CHECK-NEXT: vle16.v v9, (a0) 3707; CHECK-NEXT: lui a1, 8 3708; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 3709; CHECK-NEXT: vmv.v.x v10, a2 3710; CHECK-NEXT: vxor.vx v8, v8, a1 3711; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 3712; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v8 3713; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10 3714; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 3715; CHECK-NEXT: vfmadd.vv v8, v12, v14 3716; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma 3717; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8 3718; CHECK-NEXT: vse16.v v10, (a0) 3719; CHECK-NEXT: ret 3720 %a = load <6 x bfloat>, ptr %x 3721 %b = load <6 x bfloat>, ptr %y 3722 %c = insertelement <6 x bfloat> poison, bfloat %z, i32 0 3723 %d = shufflevector <6 x bfloat> %c, <6 x bfloat> poison, <6 x i32> zeroinitializer 3724 %neg = fneg <6 x bfloat> %b 3725 %e = call <6 x bfloat> @llvm.fma.v6bf16(<6 x bfloat> %a, <6 x bfloat> %d, <6 x bfloat> %neg) 3726 store <6 x bfloat> %e, ptr %x 3727 ret void 3728} 3729 3730define void @fmsub_vf_v8f16(ptr %x, ptr %y, half %z) { 3731; ZVFH-LABEL: fmsub_vf_v8f16: 3732; ZVFH: # %bb.0: 3733; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma 3734; ZVFH-NEXT: vle16.v v8, (a0) 3735; ZVFH-NEXT: vle16.v v9, (a1) 3736; ZVFH-NEXT: vfmsac.vf v9, fa0, v8 3737; ZVFH-NEXT: vse16.v v9, (a0) 3738; ZVFH-NEXT: ret 3739; 3740; ZVFHMIN-LABEL: fmsub_vf_v8f16: 3741; ZVFHMIN: # %bb.0: 3742; ZVFHMIN-NEXT: fmv.x.w a2, fa0 3743; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma 3744; ZVFHMIN-NEXT: vle16.v v8, (a1) 3745; ZVFHMIN-NEXT: vle16.v v9, (a0) 3746; ZVFHMIN-NEXT: lui a1, 8 3747; ZVFHMIN-NEXT: vmv.v.x v10, a2 3748; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 3749; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 3750; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8 3751; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 3752; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 3753; ZVFHMIN-NEXT: vfmadd.vv v8, v12, v14 3754; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 3755; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 3756; ZVFHMIN-NEXT: vse16.v v10, (a0) 3757; ZVFHMIN-NEXT: ret 3758 %a = load <8 x half>, ptr %x 3759 %b = load <8 x half>, ptr %y 3760 %c = insertelement <8 x half> poison, half %z, i32 0 3761 %d = shufflevector <8 x half> %c, <8 x half> poison, <8 x i32> zeroinitializer 3762 %neg = fneg <8 x half> %b 3763 %e = call <8 x half> @llvm.fma.v8f16(<8 x half> %a, <8 x half> %d, <8 x half> %neg) 3764 store <8 x half> %e, ptr %x 3765 ret void 3766} 3767 3768define void @fmsub_vf_v6f16(ptr %x, ptr %y, half %z) { 3769; ZVFH-LABEL: fmsub_vf_v6f16: 3770; ZVFH: # %bb.0: 3771; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma 3772; ZVFH-NEXT: vle16.v v8, (a0) 3773; ZVFH-NEXT: vle16.v v9, (a1) 3774; ZVFH-NEXT: vfmsac.vf v9, fa0, v8 3775; ZVFH-NEXT: vse16.v v9, (a0) 3776; ZVFH-NEXT: ret 3777; 3778; ZVFHMIN-LABEL: fmsub_vf_v6f16: 3779; ZVFHMIN: # %bb.0: 3780; ZVFHMIN-NEXT: fmv.x.w a2, fa0 3781; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma 3782; ZVFHMIN-NEXT: vle16.v v8, (a1) 3783; ZVFHMIN-NEXT: vle16.v v9, (a0) 3784; ZVFHMIN-NEXT: lui a1, 8 3785; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma 3786; ZVFHMIN-NEXT: vmv.v.x v10, a2 3787; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 3788; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 3789; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8 3790; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 3791; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 3792; ZVFHMIN-NEXT: vfmadd.vv v8, v12, v14 3793; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma 3794; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 3795; ZVFHMIN-NEXT: vse16.v v10, (a0) 3796; ZVFHMIN-NEXT: ret 3797 %a = load <6 x half>, ptr %x 3798 %b = load <6 x half>, ptr %y 3799 %c = insertelement <6 x half> poison, half %z, i32 0 3800 %d = shufflevector <6 x half> %c, <6 x half> poison, <6 x i32> zeroinitializer 3801 %neg = fneg <6 x half> %b 3802 %e = call <6 x half> @llvm.fma.v6f16(<6 x half> %a, <6 x half> %d, <6 x half> %neg) 3803 store <6 x half> %e, ptr %x 3804 ret void 3805} 3806 3807define void @fnmsub_vf_v4f32(ptr %x, ptr %y, float %z) { 3808; CHECK-LABEL: fnmsub_vf_v4f32: 3809; CHECK: # %bb.0: 3810; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 3811; CHECK-NEXT: vle32.v v8, (a0) 3812; CHECK-NEXT: vle32.v v9, (a1) 3813; CHECK-NEXT: vfnmsac.vf v9, fa0, v8 3814; CHECK-NEXT: vse32.v v9, (a0) 3815; CHECK-NEXT: ret 3816 %a = load <4 x float>, ptr %x 3817 %b = load <4 x float>, ptr %y 3818 %c = insertelement <4 x float> poison, float %z, i32 0 3819 %d = shufflevector <4 x float> %c, <4 x float> poison, <4 x i32> zeroinitializer 3820 %neg = fneg <4 x float> %a 3821 %e = call <4 x float> @llvm.fma.v4f32(<4 x float> %neg, <4 x float> %d, <4 x float> %b) 3822 store <4 x float> %e, ptr %x 3823 ret void 3824} 3825 3826define void @fnmadd_vf_v2f64(ptr %x, ptr %y, double %z) { 3827; CHECK-LABEL: fnmadd_vf_v2f64: 3828; CHECK: # %bb.0: 3829; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 3830; CHECK-NEXT: vle64.v v8, (a0) 3831; CHECK-NEXT: vle64.v v9, (a1) 3832; CHECK-NEXT: vfnmacc.vf v9, fa0, v8 3833; CHECK-NEXT: vse64.v v9, (a0) 3834; CHECK-NEXT: ret 3835 %a = load <2 x double>, ptr %x 3836 %b = load <2 x double>, ptr %y 3837 %c = insertelement <2 x double> poison, double %z, i32 0 3838 %d = shufflevector <2 x double> %c, <2 x double> poison, <2 x i32> zeroinitializer 3839 %neg = fneg <2 x double> %a 3840 %neg2 = fneg <2 x double> %b 3841 %e = call <2 x double> @llvm.fma.v2f64(<2 x double> %neg, <2 x double> %d, <2 x double> %neg2) 3842 store <2 x double> %e, ptr %x 3843 ret void 3844} 3845 3846define void @fnmsub_fv_v4f32(ptr %x, ptr %y, float %z) { 3847; CHECK-LABEL: fnmsub_fv_v4f32: 3848; CHECK: # %bb.0: 3849; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 3850; CHECK-NEXT: vle32.v v8, (a0) 3851; CHECK-NEXT: vle32.v v9, (a1) 3852; CHECK-NEXT: vfnmsac.vf v9, fa0, v8 3853; CHECK-NEXT: vse32.v v9, (a0) 3854; CHECK-NEXT: ret 3855 %a = load <4 x float>, ptr %x 3856 %b = load <4 x float>, ptr %y 3857 %c = insertelement <4 x float> poison, float %z, i32 0 3858 %d = shufflevector <4 x float> %c, <4 x float> poison, <4 x i32> zeroinitializer 3859 %neg = fneg <4 x float> %d 3860 %e = call <4 x float> @llvm.fma.v4f32(<4 x float> %neg, <4 x float> %a, <4 x float> %b) 3861 store <4 x float> %e, ptr %x 3862 ret void 3863} 3864 3865define void @fnmadd_fv_v2f64(ptr %x, ptr %y, double %z) { 3866; CHECK-LABEL: fnmadd_fv_v2f64: 3867; CHECK: # %bb.0: 3868; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 3869; CHECK-NEXT: vle64.v v8, (a0) 3870; CHECK-NEXT: vle64.v v9, (a1) 3871; CHECK-NEXT: vfnmacc.vf v9, fa0, v8 3872; CHECK-NEXT: vse64.v v9, (a0) 3873; CHECK-NEXT: ret 3874 %a = load <2 x double>, ptr %x 3875 %b = load <2 x double>, ptr %y 3876 %c = insertelement <2 x double> poison, double %z, i32 0 3877 %d = shufflevector <2 x double> %c, <2 x double> poison, <2 x i32> zeroinitializer 3878 %neg = fneg <2 x double> %d 3879 %neg2 = fneg <2 x double> %b 3880 %e = call <2 x double> @llvm.fma.v2f64(<2 x double> %neg, <2 x double> %a, <2 x double> %neg2) 3881 store <2 x double> %e, ptr %x 3882 ret void 3883} 3884 3885define void @trunc_v8bf16(ptr %x) { 3886; CHECK-LABEL: trunc_v8bf16: 3887; CHECK: # %bb.0: 3888; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 3889; CHECK-NEXT: vle16.v v8, (a0) 3890; CHECK-NEXT: lui a1, 307200 3891; CHECK-NEXT: fmv.w.x fa5, a1 3892; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 3893; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 3894; CHECK-NEXT: vfabs.v v8, v10 3895; CHECK-NEXT: vmflt.vf v0, v8, fa5 3896; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v10, v0.t 3897; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t 3898; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu 3899; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t 3900; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 3901; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 3902; CHECK-NEXT: vse16.v v8, (a0) 3903; CHECK-NEXT: ret 3904 %a = load <8 x bfloat>, ptr %x 3905 %b = call <8 x bfloat> @llvm.trunc.v8bf16(<8 x bfloat> %a) 3906 store <8 x bfloat> %b, ptr %x 3907 ret void 3908} 3909 3910define void @trunc_v6bf16(ptr %x) { 3911; CHECK-LABEL: trunc_v6bf16: 3912; CHECK: # %bb.0: 3913; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma 3914; CHECK-NEXT: vle16.v v8, (a0) 3915; CHECK-NEXT: lui a1, 307200 3916; CHECK-NEXT: fmv.w.x fa5, a1 3917; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 3918; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 3919; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 3920; CHECK-NEXT: vfabs.v v8, v10 3921; CHECK-NEXT: vmflt.vf v0, v8, fa5 3922; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma 3923; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v10, v0.t 3924; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t 3925; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu 3926; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t 3927; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 3928; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 3929; CHECK-NEXT: vse16.v v8, (a0) 3930; CHECK-NEXT: ret 3931 %a = load <6 x bfloat>, ptr %x 3932 %b = call <6 x bfloat> @llvm.trunc.v6bf16(<6 x bfloat> %a) 3933 store <6 x bfloat> %b, ptr %x 3934 ret void 3935} 3936 3937define void @trunc_v8f16(ptr %x) { 3938; ZVFH-LABEL: trunc_v8f16: 3939; ZVFH: # %bb.0: 3940; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma 3941; ZVFH-NEXT: vle16.v v8, (a0) 3942; ZVFH-NEXT: lui a1, %hi(.LCPI171_0) 3943; ZVFH-NEXT: flh fa5, %lo(.LCPI171_0)(a1) 3944; ZVFH-NEXT: vfabs.v v9, v8 3945; ZVFH-NEXT: vmflt.vf v0, v9, fa5 3946; ZVFH-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t 3947; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t 3948; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu 3949; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t 3950; ZVFH-NEXT: vse16.v v8, (a0) 3951; ZVFH-NEXT: ret 3952; 3953; ZVFHMIN-LABEL: trunc_v8f16: 3954; ZVFHMIN: # %bb.0: 3955; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma 3956; ZVFHMIN-NEXT: vle16.v v8, (a0) 3957; ZVFHMIN-NEXT: lui a1, 307200 3958; ZVFHMIN-NEXT: fmv.w.x fa5, a1 3959; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 3960; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 3961; ZVFHMIN-NEXT: vfabs.v v8, v10 3962; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 3963; ZVFHMIN-NEXT: vfcvt.rtz.x.f.v v8, v10, v0.t 3964; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t 3965; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu 3966; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t 3967; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 3968; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 3969; ZVFHMIN-NEXT: vse16.v v8, (a0) 3970; ZVFHMIN-NEXT: ret 3971 %a = load <8 x half>, ptr %x 3972 %b = call <8 x half> @llvm.trunc.v8f16(<8 x half> %a) 3973 store <8 x half> %b, ptr %x 3974 ret void 3975} 3976 3977define void @trunc_v6f16(ptr %x) { 3978; ZVFH-LABEL: trunc_v6f16: 3979; ZVFH: # %bb.0: 3980; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma 3981; ZVFH-NEXT: vle16.v v8, (a0) 3982; ZVFH-NEXT: lui a1, %hi(.LCPI172_0) 3983; ZVFH-NEXT: flh fa5, %lo(.LCPI172_0)(a1) 3984; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma 3985; ZVFH-NEXT: vfabs.v v9, v8 3986; ZVFH-NEXT: vmflt.vf v0, v9, fa5 3987; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma 3988; ZVFH-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t 3989; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t 3990; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu 3991; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t 3992; ZVFH-NEXT: vse16.v v8, (a0) 3993; ZVFH-NEXT: ret 3994; 3995; ZVFHMIN-LABEL: trunc_v6f16: 3996; ZVFHMIN: # %bb.0: 3997; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma 3998; ZVFHMIN-NEXT: vle16.v v8, (a0) 3999; ZVFHMIN-NEXT: lui a1, 307200 4000; ZVFHMIN-NEXT: fmv.w.x fa5, a1 4001; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma 4002; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 4003; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 4004; ZVFHMIN-NEXT: vfabs.v v8, v10 4005; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 4006; ZVFHMIN-NEXT: vsetivli zero, 6, e32, m2, ta, ma 4007; ZVFHMIN-NEXT: vfcvt.rtz.x.f.v v8, v10, v0.t 4008; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t 4009; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu 4010; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t 4011; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 4012; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 4013; ZVFHMIN-NEXT: vse16.v v8, (a0) 4014; ZVFHMIN-NEXT: ret 4015 %a = load <6 x half>, ptr %x 4016 %b = call <6 x half> @llvm.trunc.v6f16(<6 x half> %a) 4017 store <6 x half> %b, ptr %x 4018 ret void 4019} 4020 4021define void @trunc_v4f32(ptr %x) { 4022; CHECK-LABEL: trunc_v4f32: 4023; CHECK: # %bb.0: 4024; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 4025; CHECK-NEXT: vle32.v v8, (a0) 4026; CHECK-NEXT: lui a1, 307200 4027; CHECK-NEXT: fmv.w.x fa5, a1 4028; CHECK-NEXT: vfabs.v v9, v8 4029; CHECK-NEXT: vmflt.vf v0, v9, fa5 4030; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t 4031; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t 4032; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu 4033; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t 4034; CHECK-NEXT: vse32.v v8, (a0) 4035; CHECK-NEXT: ret 4036 %a = load <4 x float>, ptr %x 4037 %b = call <4 x float> @llvm.trunc.v4f32(<4 x float> %a) 4038 store <4 x float> %b, ptr %x 4039 ret void 4040} 4041 4042define void @trunc_v2f64(ptr %x) { 4043; CHECK-LABEL: trunc_v2f64: 4044; CHECK: # %bb.0: 4045; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 4046; CHECK-NEXT: vle64.v v8, (a0) 4047; CHECK-NEXT: lui a1, %hi(.LCPI174_0) 4048; CHECK-NEXT: fld fa5, %lo(.LCPI174_0)(a1) 4049; CHECK-NEXT: vfabs.v v9, v8 4050; CHECK-NEXT: vmflt.vf v0, v9, fa5 4051; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t 4052; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t 4053; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu 4054; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t 4055; CHECK-NEXT: vse64.v v8, (a0) 4056; CHECK-NEXT: ret 4057 %a = load <2 x double>, ptr %x 4058 %b = call <2 x double> @llvm.trunc.v2f64(<2 x double> %a) 4059 store <2 x double> %b, ptr %x 4060 ret void 4061} 4062 4063define void @ceil_v8bf16(ptr %x) { 4064; CHECK-LABEL: ceil_v8bf16: 4065; CHECK: # %bb.0: 4066; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 4067; CHECK-NEXT: vle16.v v8, (a0) 4068; CHECK-NEXT: lui a1, 307200 4069; CHECK-NEXT: fmv.w.x fa5, a1 4070; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 4071; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 4072; CHECK-NEXT: vfabs.v v8, v10 4073; CHECK-NEXT: vmflt.vf v0, v8, fa5 4074; CHECK-NEXT: fsrmi a1, 3 4075; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t 4076; CHECK-NEXT: fsrm a1 4077; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t 4078; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu 4079; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t 4080; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 4081; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 4082; CHECK-NEXT: vse16.v v8, (a0) 4083; CHECK-NEXT: ret 4084 %a = load <8 x bfloat>, ptr %x 4085 %b = call <8 x bfloat> @llvm.ceil.v8bf16(<8 x bfloat> %a) 4086 store <8 x bfloat> %b, ptr %x 4087 ret void 4088} 4089 4090define void @ceil_v6bf16(ptr %x) { 4091; CHECK-LABEL: ceil_v6bf16: 4092; CHECK: # %bb.0: 4093; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma 4094; CHECK-NEXT: vle16.v v8, (a0) 4095; CHECK-NEXT: lui a1, 307200 4096; CHECK-NEXT: fmv.w.x fa5, a1 4097; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 4098; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 4099; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 4100; CHECK-NEXT: vfabs.v v8, v10 4101; CHECK-NEXT: vmflt.vf v0, v8, fa5 4102; CHECK-NEXT: fsrmi a1, 3 4103; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma 4104; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t 4105; CHECK-NEXT: fsrm a1 4106; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t 4107; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu 4108; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t 4109; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 4110; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 4111; CHECK-NEXT: vse16.v v8, (a0) 4112; CHECK-NEXT: ret 4113 %a = load <6 x bfloat>, ptr %x 4114 %b = call <6 x bfloat> @llvm.ceil.v6bf16(<6 x bfloat> %a) 4115 store <6 x bfloat> %b, ptr %x 4116 ret void 4117} 4118 4119define void @ceil_v8f16(ptr %x) { 4120; ZVFH-LABEL: ceil_v8f16: 4121; ZVFH: # %bb.0: 4122; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma 4123; ZVFH-NEXT: vle16.v v8, (a0) 4124; ZVFH-NEXT: lui a1, %hi(.LCPI177_0) 4125; ZVFH-NEXT: flh fa5, %lo(.LCPI177_0)(a1) 4126; ZVFH-NEXT: vfabs.v v9, v8 4127; ZVFH-NEXT: vmflt.vf v0, v9, fa5 4128; ZVFH-NEXT: fsrmi a1, 3 4129; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t 4130; ZVFH-NEXT: fsrm a1 4131; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t 4132; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu 4133; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t 4134; ZVFH-NEXT: vse16.v v8, (a0) 4135; ZVFH-NEXT: ret 4136; 4137; ZVFHMIN-LABEL: ceil_v8f16: 4138; ZVFHMIN: # %bb.0: 4139; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma 4140; ZVFHMIN-NEXT: vle16.v v8, (a0) 4141; ZVFHMIN-NEXT: lui a1, 307200 4142; ZVFHMIN-NEXT: fmv.w.x fa5, a1 4143; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 4144; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 4145; ZVFHMIN-NEXT: vfabs.v v8, v10 4146; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 4147; ZVFHMIN-NEXT: fsrmi a1, 3 4148; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t 4149; ZVFHMIN-NEXT: fsrm a1 4150; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t 4151; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu 4152; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t 4153; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 4154; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 4155; ZVFHMIN-NEXT: vse16.v v8, (a0) 4156; ZVFHMIN-NEXT: ret 4157 %a = load <8 x half>, ptr %x 4158 %b = call <8 x half> @llvm.ceil.v8f16(<8 x half> %a) 4159 store <8 x half> %b, ptr %x 4160 ret void 4161} 4162 4163define void @ceil_v6f16(ptr %x) { 4164; ZVFH-LABEL: ceil_v6f16: 4165; ZVFH: # %bb.0: 4166; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma 4167; ZVFH-NEXT: vle16.v v8, (a0) 4168; ZVFH-NEXT: lui a1, %hi(.LCPI178_0) 4169; ZVFH-NEXT: flh fa5, %lo(.LCPI178_0)(a1) 4170; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma 4171; ZVFH-NEXT: vfabs.v v9, v8 4172; ZVFH-NEXT: vmflt.vf v0, v9, fa5 4173; ZVFH-NEXT: fsrmi a1, 3 4174; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma 4175; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t 4176; ZVFH-NEXT: fsrm a1 4177; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t 4178; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu 4179; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t 4180; ZVFH-NEXT: vse16.v v8, (a0) 4181; ZVFH-NEXT: ret 4182; 4183; ZVFHMIN-LABEL: ceil_v6f16: 4184; ZVFHMIN: # %bb.0: 4185; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma 4186; ZVFHMIN-NEXT: vle16.v v8, (a0) 4187; ZVFHMIN-NEXT: lui a1, 307200 4188; ZVFHMIN-NEXT: fmv.w.x fa5, a1 4189; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma 4190; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 4191; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 4192; ZVFHMIN-NEXT: vfabs.v v8, v10 4193; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 4194; ZVFHMIN-NEXT: fsrmi a1, 3 4195; ZVFHMIN-NEXT: vsetivli zero, 6, e32, m2, ta, ma 4196; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t 4197; ZVFHMIN-NEXT: fsrm a1 4198; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t 4199; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu 4200; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t 4201; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 4202; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 4203; ZVFHMIN-NEXT: vse16.v v8, (a0) 4204; ZVFHMIN-NEXT: ret 4205 %a = load <6 x half>, ptr %x 4206 %b = call <6 x half> @llvm.ceil.v6f16(<6 x half> %a) 4207 store <6 x half> %b, ptr %x 4208 ret void 4209} 4210 4211define void @ceil_v4f32(ptr %x) { 4212; CHECK-LABEL: ceil_v4f32: 4213; CHECK: # %bb.0: 4214; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 4215; CHECK-NEXT: vle32.v v8, (a0) 4216; CHECK-NEXT: lui a1, 307200 4217; CHECK-NEXT: fmv.w.x fa5, a1 4218; CHECK-NEXT: vfabs.v v9, v8 4219; CHECK-NEXT: vmflt.vf v0, v9, fa5 4220; CHECK-NEXT: fsrmi a1, 3 4221; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t 4222; CHECK-NEXT: fsrm a1 4223; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t 4224; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu 4225; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t 4226; CHECK-NEXT: vse32.v v8, (a0) 4227; CHECK-NEXT: ret 4228 %a = load <4 x float>, ptr %x 4229 %b = call <4 x float> @llvm.ceil.v4f32(<4 x float> %a) 4230 store <4 x float> %b, ptr %x 4231 ret void 4232} 4233 4234define void @ceil_v2f64(ptr %x) { 4235; CHECK-LABEL: ceil_v2f64: 4236; CHECK: # %bb.0: 4237; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 4238; CHECK-NEXT: vle64.v v8, (a0) 4239; CHECK-NEXT: lui a1, %hi(.LCPI180_0) 4240; CHECK-NEXT: fld fa5, %lo(.LCPI180_0)(a1) 4241; CHECK-NEXT: vfabs.v v9, v8 4242; CHECK-NEXT: vmflt.vf v0, v9, fa5 4243; CHECK-NEXT: fsrmi a1, 3 4244; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t 4245; CHECK-NEXT: fsrm a1 4246; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t 4247; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu 4248; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t 4249; CHECK-NEXT: vse64.v v8, (a0) 4250; CHECK-NEXT: ret 4251 %a = load <2 x double>, ptr %x 4252 %b = call <2 x double> @llvm.ceil.v2f64(<2 x double> %a) 4253 store <2 x double> %b, ptr %x 4254 ret void 4255} 4256 4257define void @floor_v8bf16(ptr %x) { 4258; CHECK-LABEL: floor_v8bf16: 4259; CHECK: # %bb.0: 4260; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 4261; CHECK-NEXT: vle16.v v8, (a0) 4262; CHECK-NEXT: lui a1, 307200 4263; CHECK-NEXT: fmv.w.x fa5, a1 4264; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 4265; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 4266; CHECK-NEXT: vfabs.v v8, v10 4267; CHECK-NEXT: vmflt.vf v0, v8, fa5 4268; CHECK-NEXT: fsrmi a1, 2 4269; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t 4270; CHECK-NEXT: fsrm a1 4271; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t 4272; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu 4273; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t 4274; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 4275; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 4276; CHECK-NEXT: vse16.v v8, (a0) 4277; CHECK-NEXT: ret 4278 %a = load <8 x bfloat>, ptr %x 4279 %b = call <8 x bfloat> @llvm.floor.v8bf16(<8 x bfloat> %a) 4280 store <8 x bfloat> %b, ptr %x 4281 ret void 4282} 4283 4284define void @floor_v6bf16(ptr %x) { 4285; CHECK-LABEL: floor_v6bf16: 4286; CHECK: # %bb.0: 4287; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma 4288; CHECK-NEXT: vle16.v v8, (a0) 4289; CHECK-NEXT: lui a1, 307200 4290; CHECK-NEXT: fmv.w.x fa5, a1 4291; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 4292; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 4293; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 4294; CHECK-NEXT: vfabs.v v8, v10 4295; CHECK-NEXT: vmflt.vf v0, v8, fa5 4296; CHECK-NEXT: fsrmi a1, 2 4297; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma 4298; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t 4299; CHECK-NEXT: fsrm a1 4300; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t 4301; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu 4302; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t 4303; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 4304; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 4305; CHECK-NEXT: vse16.v v8, (a0) 4306; CHECK-NEXT: ret 4307 %a = load <6 x bfloat>, ptr %x 4308 %b = call <6 x bfloat> @llvm.floor.v6bf16(<6 x bfloat> %a) 4309 store <6 x bfloat> %b, ptr %x 4310 ret void 4311} 4312 4313define void @floor_v8f16(ptr %x) { 4314; ZVFH-LABEL: floor_v8f16: 4315; ZVFH: # %bb.0: 4316; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma 4317; ZVFH-NEXT: vle16.v v8, (a0) 4318; ZVFH-NEXT: lui a1, %hi(.LCPI183_0) 4319; ZVFH-NEXT: flh fa5, %lo(.LCPI183_0)(a1) 4320; ZVFH-NEXT: vfabs.v v9, v8 4321; ZVFH-NEXT: vmflt.vf v0, v9, fa5 4322; ZVFH-NEXT: fsrmi a1, 2 4323; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t 4324; ZVFH-NEXT: fsrm a1 4325; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t 4326; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu 4327; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t 4328; ZVFH-NEXT: vse16.v v8, (a0) 4329; ZVFH-NEXT: ret 4330; 4331; ZVFHMIN-LABEL: floor_v8f16: 4332; ZVFHMIN: # %bb.0: 4333; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma 4334; ZVFHMIN-NEXT: vle16.v v8, (a0) 4335; ZVFHMIN-NEXT: lui a1, 307200 4336; ZVFHMIN-NEXT: fmv.w.x fa5, a1 4337; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 4338; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 4339; ZVFHMIN-NEXT: vfabs.v v8, v10 4340; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 4341; ZVFHMIN-NEXT: fsrmi a1, 2 4342; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t 4343; ZVFHMIN-NEXT: fsrm a1 4344; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t 4345; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu 4346; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t 4347; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 4348; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 4349; ZVFHMIN-NEXT: vse16.v v8, (a0) 4350; ZVFHMIN-NEXT: ret 4351 %a = load <8 x half>, ptr %x 4352 %b = call <8 x half> @llvm.floor.v8f16(<8 x half> %a) 4353 store <8 x half> %b, ptr %x 4354 ret void 4355} 4356 4357define void @floor_v6f16(ptr %x) { 4358; ZVFH-LABEL: floor_v6f16: 4359; ZVFH: # %bb.0: 4360; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma 4361; ZVFH-NEXT: vle16.v v8, (a0) 4362; ZVFH-NEXT: lui a1, %hi(.LCPI184_0) 4363; ZVFH-NEXT: flh fa5, %lo(.LCPI184_0)(a1) 4364; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma 4365; ZVFH-NEXT: vfabs.v v9, v8 4366; ZVFH-NEXT: vmflt.vf v0, v9, fa5 4367; ZVFH-NEXT: fsrmi a1, 2 4368; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma 4369; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t 4370; ZVFH-NEXT: fsrm a1 4371; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t 4372; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu 4373; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t 4374; ZVFH-NEXT: vse16.v v8, (a0) 4375; ZVFH-NEXT: ret 4376; 4377; ZVFHMIN-LABEL: floor_v6f16: 4378; ZVFHMIN: # %bb.0: 4379; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma 4380; ZVFHMIN-NEXT: vle16.v v8, (a0) 4381; ZVFHMIN-NEXT: lui a1, 307200 4382; ZVFHMIN-NEXT: fmv.w.x fa5, a1 4383; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma 4384; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 4385; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 4386; ZVFHMIN-NEXT: vfabs.v v8, v10 4387; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 4388; ZVFHMIN-NEXT: fsrmi a1, 2 4389; ZVFHMIN-NEXT: vsetivli zero, 6, e32, m2, ta, ma 4390; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t 4391; ZVFHMIN-NEXT: fsrm a1 4392; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t 4393; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu 4394; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t 4395; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 4396; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 4397; ZVFHMIN-NEXT: vse16.v v8, (a0) 4398; ZVFHMIN-NEXT: ret 4399 %a = load <6 x half>, ptr %x 4400 %b = call <6 x half> @llvm.floor.v6f16(<6 x half> %a) 4401 store <6 x half> %b, ptr %x 4402 ret void 4403} 4404 4405define void @floor_v4f32(ptr %x) { 4406; CHECK-LABEL: floor_v4f32: 4407; CHECK: # %bb.0: 4408; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 4409; CHECK-NEXT: vle32.v v8, (a0) 4410; CHECK-NEXT: lui a1, 307200 4411; CHECK-NEXT: fmv.w.x fa5, a1 4412; CHECK-NEXT: vfabs.v v9, v8 4413; CHECK-NEXT: vmflt.vf v0, v9, fa5 4414; CHECK-NEXT: fsrmi a1, 2 4415; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t 4416; CHECK-NEXT: fsrm a1 4417; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t 4418; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu 4419; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t 4420; CHECK-NEXT: vse32.v v8, (a0) 4421; CHECK-NEXT: ret 4422 %a = load <4 x float>, ptr %x 4423 %b = call <4 x float> @llvm.floor.v4f32(<4 x float> %a) 4424 store <4 x float> %b, ptr %x 4425 ret void 4426} 4427 4428define void @floor_v2f64(ptr %x) { 4429; CHECK-LABEL: floor_v2f64: 4430; CHECK: # %bb.0: 4431; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 4432; CHECK-NEXT: vle64.v v8, (a0) 4433; CHECK-NEXT: lui a1, %hi(.LCPI186_0) 4434; CHECK-NEXT: fld fa5, %lo(.LCPI186_0)(a1) 4435; CHECK-NEXT: vfabs.v v9, v8 4436; CHECK-NEXT: vmflt.vf v0, v9, fa5 4437; CHECK-NEXT: fsrmi a1, 2 4438; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t 4439; CHECK-NEXT: fsrm a1 4440; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t 4441; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu 4442; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t 4443; CHECK-NEXT: vse64.v v8, (a0) 4444; CHECK-NEXT: ret 4445 %a = load <2 x double>, ptr %x 4446 %b = call <2 x double> @llvm.floor.v2f64(<2 x double> %a) 4447 store <2 x double> %b, ptr %x 4448 ret void 4449} 4450 4451define void @round_v8bf16(ptr %x) { 4452; CHECK-LABEL: round_v8bf16: 4453; CHECK: # %bb.0: 4454; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 4455; CHECK-NEXT: vle16.v v8, (a0) 4456; CHECK-NEXT: lui a1, 307200 4457; CHECK-NEXT: fmv.w.x fa5, a1 4458; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 4459; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 4460; CHECK-NEXT: vfabs.v v8, v10 4461; CHECK-NEXT: vmflt.vf v0, v8, fa5 4462; CHECK-NEXT: fsrmi a1, 4 4463; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t 4464; CHECK-NEXT: fsrm a1 4465; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t 4466; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu 4467; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t 4468; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 4469; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 4470; CHECK-NEXT: vse16.v v8, (a0) 4471; CHECK-NEXT: ret 4472 %a = load <8 x bfloat>, ptr %x 4473 %b = call <8 x bfloat> @llvm.round.v8bf16(<8 x bfloat> %a) 4474 store <8 x bfloat> %b, ptr %x 4475 ret void 4476} 4477 4478define void @round_v6bf16(ptr %x) { 4479; CHECK-LABEL: round_v6bf16: 4480; CHECK: # %bb.0: 4481; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma 4482; CHECK-NEXT: vle16.v v8, (a0) 4483; CHECK-NEXT: lui a1, 307200 4484; CHECK-NEXT: fmv.w.x fa5, a1 4485; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 4486; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 4487; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 4488; CHECK-NEXT: vfabs.v v8, v10 4489; CHECK-NEXT: vmflt.vf v0, v8, fa5 4490; CHECK-NEXT: fsrmi a1, 4 4491; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma 4492; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t 4493; CHECK-NEXT: fsrm a1 4494; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t 4495; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu 4496; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t 4497; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 4498; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 4499; CHECK-NEXT: vse16.v v8, (a0) 4500; CHECK-NEXT: ret 4501 %a = load <6 x bfloat>, ptr %x 4502 %b = call <6 x bfloat> @llvm.round.v6bf16(<6 x bfloat> %a) 4503 store <6 x bfloat> %b, ptr %x 4504 ret void 4505} 4506 4507define void @round_v8f16(ptr %x) { 4508; ZVFH-LABEL: round_v8f16: 4509; ZVFH: # %bb.0: 4510; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma 4511; ZVFH-NEXT: vle16.v v8, (a0) 4512; ZVFH-NEXT: lui a1, %hi(.LCPI189_0) 4513; ZVFH-NEXT: flh fa5, %lo(.LCPI189_0)(a1) 4514; ZVFH-NEXT: vfabs.v v9, v8 4515; ZVFH-NEXT: vmflt.vf v0, v9, fa5 4516; ZVFH-NEXT: fsrmi a1, 4 4517; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t 4518; ZVFH-NEXT: fsrm a1 4519; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t 4520; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu 4521; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t 4522; ZVFH-NEXT: vse16.v v8, (a0) 4523; ZVFH-NEXT: ret 4524; 4525; ZVFHMIN-LABEL: round_v8f16: 4526; ZVFHMIN: # %bb.0: 4527; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma 4528; ZVFHMIN-NEXT: vle16.v v8, (a0) 4529; ZVFHMIN-NEXT: lui a1, 307200 4530; ZVFHMIN-NEXT: fmv.w.x fa5, a1 4531; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 4532; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 4533; ZVFHMIN-NEXT: vfabs.v v8, v10 4534; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 4535; ZVFHMIN-NEXT: fsrmi a1, 4 4536; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t 4537; ZVFHMIN-NEXT: fsrm a1 4538; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t 4539; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu 4540; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t 4541; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 4542; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 4543; ZVFHMIN-NEXT: vse16.v v8, (a0) 4544; ZVFHMIN-NEXT: ret 4545 %a = load <8 x half>, ptr %x 4546 %b = call <8 x half> @llvm.round.v8f16(<8 x half> %a) 4547 store <8 x half> %b, ptr %x 4548 ret void 4549} 4550 4551define void @round_v6f16(ptr %x) { 4552; ZVFH-LABEL: round_v6f16: 4553; ZVFH: # %bb.0: 4554; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma 4555; ZVFH-NEXT: vle16.v v8, (a0) 4556; ZVFH-NEXT: lui a1, %hi(.LCPI190_0) 4557; ZVFH-NEXT: flh fa5, %lo(.LCPI190_0)(a1) 4558; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma 4559; ZVFH-NEXT: vfabs.v v9, v8 4560; ZVFH-NEXT: vmflt.vf v0, v9, fa5 4561; ZVFH-NEXT: fsrmi a1, 4 4562; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma 4563; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t 4564; ZVFH-NEXT: fsrm a1 4565; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t 4566; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu 4567; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t 4568; ZVFH-NEXT: vse16.v v8, (a0) 4569; ZVFH-NEXT: ret 4570; 4571; ZVFHMIN-LABEL: round_v6f16: 4572; ZVFHMIN: # %bb.0: 4573; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma 4574; ZVFHMIN-NEXT: vle16.v v8, (a0) 4575; ZVFHMIN-NEXT: lui a1, 307200 4576; ZVFHMIN-NEXT: fmv.w.x fa5, a1 4577; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma 4578; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 4579; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 4580; ZVFHMIN-NEXT: vfabs.v v8, v10 4581; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 4582; ZVFHMIN-NEXT: fsrmi a1, 4 4583; ZVFHMIN-NEXT: vsetivli zero, 6, e32, m2, ta, ma 4584; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t 4585; ZVFHMIN-NEXT: fsrm a1 4586; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t 4587; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu 4588; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t 4589; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 4590; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 4591; ZVFHMIN-NEXT: vse16.v v8, (a0) 4592; ZVFHMIN-NEXT: ret 4593 %a = load <6 x half>, ptr %x 4594 %b = call <6 x half> @llvm.round.v6f16(<6 x half> %a) 4595 store <6 x half> %b, ptr %x 4596 ret void 4597} 4598 4599define void @round_v4f32(ptr %x) { 4600; CHECK-LABEL: round_v4f32: 4601; CHECK: # %bb.0: 4602; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 4603; CHECK-NEXT: vle32.v v8, (a0) 4604; CHECK-NEXT: lui a1, 307200 4605; CHECK-NEXT: fmv.w.x fa5, a1 4606; CHECK-NEXT: vfabs.v v9, v8 4607; CHECK-NEXT: vmflt.vf v0, v9, fa5 4608; CHECK-NEXT: fsrmi a1, 4 4609; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t 4610; CHECK-NEXT: fsrm a1 4611; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t 4612; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu 4613; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t 4614; CHECK-NEXT: vse32.v v8, (a0) 4615; CHECK-NEXT: ret 4616 %a = load <4 x float>, ptr %x 4617 %b = call <4 x float> @llvm.round.v4f32(<4 x float> %a) 4618 store <4 x float> %b, ptr %x 4619 ret void 4620} 4621 4622define void @round_v2f64(ptr %x) { 4623; CHECK-LABEL: round_v2f64: 4624; CHECK: # %bb.0: 4625; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 4626; CHECK-NEXT: vle64.v v8, (a0) 4627; CHECK-NEXT: lui a1, %hi(.LCPI192_0) 4628; CHECK-NEXT: fld fa5, %lo(.LCPI192_0)(a1) 4629; CHECK-NEXT: vfabs.v v9, v8 4630; CHECK-NEXT: vmflt.vf v0, v9, fa5 4631; CHECK-NEXT: fsrmi a1, 4 4632; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t 4633; CHECK-NEXT: fsrm a1 4634; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t 4635; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu 4636; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t 4637; CHECK-NEXT: vse64.v v8, (a0) 4638; CHECK-NEXT: ret 4639 %a = load <2 x double>, ptr %x 4640 %b = call <2 x double> @llvm.round.v2f64(<2 x double> %a) 4641 store <2 x double> %b, ptr %x 4642 ret void 4643} 4644 4645define void @rint_v8bf16(ptr %x) { 4646; CHECK-LABEL: rint_v8bf16: 4647; CHECK: # %bb.0: 4648; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 4649; CHECK-NEXT: vle16.v v8, (a0) 4650; CHECK-NEXT: lui a1, 307200 4651; CHECK-NEXT: fmv.w.x fa5, a1 4652; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 4653; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 4654; CHECK-NEXT: vfabs.v v8, v10 4655; CHECK-NEXT: vmflt.vf v0, v8, fa5 4656; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t 4657; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t 4658; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu 4659; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t 4660; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 4661; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 4662; CHECK-NEXT: vse16.v v8, (a0) 4663; CHECK-NEXT: ret 4664 %a = load <8 x bfloat>, ptr %x 4665 %b = call <8 x bfloat> @llvm.rint.v8bf16(<8 x bfloat> %a) 4666 store <8 x bfloat> %b, ptr %x 4667 ret void 4668} 4669 4670define void @rint_v8f16(ptr %x) { 4671; ZVFH-LABEL: rint_v8f16: 4672; ZVFH: # %bb.0: 4673; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma 4674; ZVFH-NEXT: vle16.v v8, (a0) 4675; ZVFH-NEXT: lui a1, %hi(.LCPI194_0) 4676; ZVFH-NEXT: flh fa5, %lo(.LCPI194_0)(a1) 4677; ZVFH-NEXT: vfabs.v v9, v8 4678; ZVFH-NEXT: vmflt.vf v0, v9, fa5 4679; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t 4680; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t 4681; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu 4682; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t 4683; ZVFH-NEXT: vse16.v v8, (a0) 4684; ZVFH-NEXT: ret 4685; 4686; ZVFHMIN-LABEL: rint_v8f16: 4687; ZVFHMIN: # %bb.0: 4688; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma 4689; ZVFHMIN-NEXT: vle16.v v8, (a0) 4690; ZVFHMIN-NEXT: lui a1, 307200 4691; ZVFHMIN-NEXT: fmv.w.x fa5, a1 4692; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 4693; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 4694; ZVFHMIN-NEXT: vfabs.v v8, v10 4695; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 4696; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t 4697; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t 4698; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu 4699; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t 4700; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 4701; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 4702; ZVFHMIN-NEXT: vse16.v v8, (a0) 4703; ZVFHMIN-NEXT: ret 4704 %a = load <8 x half>, ptr %x 4705 %b = call <8 x half> @llvm.rint.v8f16(<8 x half> %a) 4706 store <8 x half> %b, ptr %x 4707 ret void 4708} 4709 4710define void @rint_v4f32(ptr %x) { 4711; CHECK-LABEL: rint_v4f32: 4712; CHECK: # %bb.0: 4713; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 4714; CHECK-NEXT: vle32.v v8, (a0) 4715; CHECK-NEXT: lui a1, 307200 4716; CHECK-NEXT: fmv.w.x fa5, a1 4717; CHECK-NEXT: vfabs.v v9, v8 4718; CHECK-NEXT: vmflt.vf v0, v9, fa5 4719; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t 4720; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t 4721; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu 4722; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t 4723; CHECK-NEXT: vse32.v v8, (a0) 4724; CHECK-NEXT: ret 4725 %a = load <4 x float>, ptr %x 4726 %b = call <4 x float> @llvm.rint.v4f32(<4 x float> %a) 4727 store <4 x float> %b, ptr %x 4728 ret void 4729} 4730 4731define void @rint_v2f64(ptr %x) { 4732; CHECK-LABEL: rint_v2f64: 4733; CHECK: # %bb.0: 4734; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 4735; CHECK-NEXT: vle64.v v8, (a0) 4736; CHECK-NEXT: lui a1, %hi(.LCPI196_0) 4737; CHECK-NEXT: fld fa5, %lo(.LCPI196_0)(a1) 4738; CHECK-NEXT: vfabs.v v9, v8 4739; CHECK-NEXT: vmflt.vf v0, v9, fa5 4740; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t 4741; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t 4742; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu 4743; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t 4744; CHECK-NEXT: vse64.v v8, (a0) 4745; CHECK-NEXT: ret 4746 %a = load <2 x double>, ptr %x 4747 %b = call <2 x double> @llvm.rint.v2f64(<2 x double> %a) 4748 store <2 x double> %b, ptr %x 4749 ret void 4750} 4751 4752define void @nearbyint_v8bf16(ptr %x) { 4753; CHECK-LABEL: nearbyint_v8bf16: 4754; CHECK: # %bb.0: 4755; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 4756; CHECK-NEXT: vle16.v v8, (a0) 4757; CHECK-NEXT: lui a1, 307200 4758; CHECK-NEXT: fmv.w.x fa5, a1 4759; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 4760; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 4761; CHECK-NEXT: vfabs.v v8, v10 4762; CHECK-NEXT: vmflt.vf v0, v8, fa5 4763; CHECK-NEXT: frflags a1 4764; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t 4765; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t 4766; CHECK-NEXT: fsflags a1 4767; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu 4768; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t 4769; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 4770; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 4771; CHECK-NEXT: vse16.v v8, (a0) 4772; CHECK-NEXT: ret 4773 %a = load <8 x bfloat>, ptr %x 4774 %b = call <8 x bfloat> @llvm.nearbyint.v8bf16(<8 x bfloat> %a) 4775 store <8 x bfloat> %b, ptr %x 4776 ret void 4777} 4778 4779define void @nearbyint_v8f16(ptr %x) { 4780; ZVFH-LABEL: nearbyint_v8f16: 4781; ZVFH: # %bb.0: 4782; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma 4783; ZVFH-NEXT: vle16.v v8, (a0) 4784; ZVFH-NEXT: lui a1, %hi(.LCPI198_0) 4785; ZVFH-NEXT: flh fa5, %lo(.LCPI198_0)(a1) 4786; ZVFH-NEXT: vfabs.v v9, v8 4787; ZVFH-NEXT: vmflt.vf v0, v9, fa5 4788; ZVFH-NEXT: frflags a1 4789; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t 4790; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t 4791; ZVFH-NEXT: fsflags a1 4792; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu 4793; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t 4794; ZVFH-NEXT: vse16.v v8, (a0) 4795; ZVFH-NEXT: ret 4796; 4797; ZVFHMIN-LABEL: nearbyint_v8f16: 4798; ZVFHMIN: # %bb.0: 4799; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma 4800; ZVFHMIN-NEXT: vle16.v v8, (a0) 4801; ZVFHMIN-NEXT: lui a1, 307200 4802; ZVFHMIN-NEXT: fmv.w.x fa5, a1 4803; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 4804; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 4805; ZVFHMIN-NEXT: vfabs.v v8, v10 4806; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 4807; ZVFHMIN-NEXT: frflags a1 4808; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t 4809; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t 4810; ZVFHMIN-NEXT: fsflags a1 4811; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu 4812; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t 4813; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 4814; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 4815; ZVFHMIN-NEXT: vse16.v v8, (a0) 4816; ZVFHMIN-NEXT: ret 4817 %a = load <8 x half>, ptr %x 4818 %b = call <8 x half> @llvm.nearbyint.v8f16(<8 x half> %a) 4819 store <8 x half> %b, ptr %x 4820 ret void 4821} 4822 4823define void @nearbyint_v4f32(ptr %x) { 4824; CHECK-LABEL: nearbyint_v4f32: 4825; CHECK: # %bb.0: 4826; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 4827; CHECK-NEXT: vle32.v v8, (a0) 4828; CHECK-NEXT: lui a1, 307200 4829; CHECK-NEXT: fmv.w.x fa5, a1 4830; CHECK-NEXT: vfabs.v v9, v8 4831; CHECK-NEXT: vmflt.vf v0, v9, fa5 4832; CHECK-NEXT: frflags a1 4833; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t 4834; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t 4835; CHECK-NEXT: fsflags a1 4836; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu 4837; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t 4838; CHECK-NEXT: vse32.v v8, (a0) 4839; CHECK-NEXT: ret 4840 %a = load <4 x float>, ptr %x 4841 %b = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %a) 4842 store <4 x float> %b, ptr %x 4843 ret void 4844} 4845 4846define void @nearbyint_v2f64(ptr %x) { 4847; CHECK-LABEL: nearbyint_v2f64: 4848; CHECK: # %bb.0: 4849; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 4850; CHECK-NEXT: vle64.v v8, (a0) 4851; CHECK-NEXT: lui a1, %hi(.LCPI200_0) 4852; CHECK-NEXT: fld fa5, %lo(.LCPI200_0)(a1) 4853; CHECK-NEXT: vfabs.v v9, v8 4854; CHECK-NEXT: vmflt.vf v0, v9, fa5 4855; CHECK-NEXT: frflags a1 4856; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t 4857; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t 4858; CHECK-NEXT: fsflags a1 4859; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu 4860; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t 4861; CHECK-NEXT: vse64.v v8, (a0) 4862; CHECK-NEXT: ret 4863 %a = load <2 x double>, ptr %x 4864 %b = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %a) 4865 store <2 x double> %b, ptr %x 4866 ret void 4867} 4868 4869define void @fmuladd_v8bf16(ptr %x, ptr %y, ptr %z) { 4870; CHECK-LABEL: fmuladd_v8bf16: 4871; CHECK: # %bb.0: 4872; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 4873; CHECK-NEXT: vle16.v v8, (a1) 4874; CHECK-NEXT: vle16.v v9, (a0) 4875; CHECK-NEXT: vle16.v v10, (a2) 4876; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 4877; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v9 4878; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 4879; CHECK-NEXT: vfmul.vv v8, v14, v12 4880; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 4881; CHECK-NEXT: vfncvtbf16.f.f.w v11, v8 4882; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v11 4883; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 4884; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 4885; CHECK-NEXT: vfadd.vv v8, v8, v12 4886; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 4887; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8 4888; CHECK-NEXT: vse16.v v10, (a0) 4889; CHECK-NEXT: ret 4890 %a = load <8 x bfloat>, ptr %x 4891 %b = load <8 x bfloat>, ptr %y 4892 %c = load <8 x bfloat>, ptr %z 4893 %d = call <8 x bfloat> @llvm.fmuladd.v8bf16(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bfloat> %c) 4894 store <8 x bfloat> %d, ptr %x 4895 ret void 4896} 4897 4898define void @fmuladd_v6bf16(ptr %x, ptr %y, ptr %z) { 4899; CHECK-LABEL: fmuladd_v6bf16: 4900; CHECK: # %bb.0: 4901; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma 4902; CHECK-NEXT: vle16.v v8, (a1) 4903; CHECK-NEXT: vle16.v v9, (a0) 4904; CHECK-NEXT: vle16.v v10, (a2) 4905; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 4906; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v9 4907; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 4908; CHECK-NEXT: vfmul.vv v8, v14, v12 4909; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 4910; CHECK-NEXT: vfncvtbf16.f.f.w v11, v8 4911; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v11 4912; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 4913; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 4914; CHECK-NEXT: vfadd.vv v8, v8, v12 4915; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 4916; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8 4917; CHECK-NEXT: vse16.v v10, (a0) 4918; CHECK-NEXT: ret 4919 %a = load <6 x bfloat>, ptr %x 4920 %b = load <6 x bfloat>, ptr %y 4921 %c = load <6 x bfloat>, ptr %z 4922 %d = call <6 x bfloat> @llvm.fmuladd.v6bf16(<6 x bfloat> %a, <6 x bfloat> %b, <6 x bfloat> %c) 4923 store <6 x bfloat> %d, ptr %x 4924 ret void 4925} 4926 4927define void @fmuladd_v8f16(ptr %x, ptr %y, ptr %z) { 4928; ZVFH-LABEL: fmuladd_v8f16: 4929; ZVFH: # %bb.0: 4930; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma 4931; ZVFH-NEXT: vle16.v v8, (a0) 4932; ZVFH-NEXT: vle16.v v9, (a1) 4933; ZVFH-NEXT: vle16.v v10, (a2) 4934; ZVFH-NEXT: vfmacc.vv v10, v8, v9 4935; ZVFH-NEXT: vse16.v v10, (a0) 4936; ZVFH-NEXT: ret 4937; 4938; ZVFHMIN-LABEL: fmuladd_v8f16: 4939; ZVFHMIN: # %bb.0: 4940; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma 4941; ZVFHMIN-NEXT: vle16.v v8, (a1) 4942; ZVFHMIN-NEXT: vle16.v v9, (a0) 4943; ZVFHMIN-NEXT: vle16.v v10, (a2) 4944; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 4945; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9 4946; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 4947; ZVFHMIN-NEXT: vfmul.vv v8, v14, v12 4948; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 4949; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v8 4950; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11 4951; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 4952; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 4953; ZVFHMIN-NEXT: vfadd.vv v8, v8, v12 4954; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 4955; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 4956; ZVFHMIN-NEXT: vse16.v v10, (a0) 4957; ZVFHMIN-NEXT: ret 4958 %a = load <8 x half>, ptr %x 4959 %b = load <8 x half>, ptr %y 4960 %c = load <8 x half>, ptr %z 4961 %d = call <8 x half> @llvm.fmuladd.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) 4962 store <8 x half> %d, ptr %x 4963 ret void 4964} 4965 4966define void @fmuladd_v6f16(ptr %x, ptr %y, ptr %z) { 4967; ZVFH-LABEL: fmuladd_v6f16: 4968; ZVFH: # %bb.0: 4969; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma 4970; ZVFH-NEXT: vle16.v v8, (a0) 4971; ZVFH-NEXT: vle16.v v9, (a1) 4972; ZVFH-NEXT: vle16.v v10, (a2) 4973; ZVFH-NEXT: vfmacc.vv v10, v8, v9 4974; ZVFH-NEXT: vse16.v v10, (a0) 4975; ZVFH-NEXT: ret 4976; 4977; ZVFHMIN-LABEL: fmuladd_v6f16: 4978; ZVFHMIN: # %bb.0: 4979; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma 4980; ZVFHMIN-NEXT: vle16.v v8, (a1) 4981; ZVFHMIN-NEXT: vle16.v v9, (a0) 4982; ZVFHMIN-NEXT: vle16.v v10, (a2) 4983; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 4984; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9 4985; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 4986; ZVFHMIN-NEXT: vfmul.vv v8, v14, v12 4987; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 4988; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v8 4989; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11 4990; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 4991; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 4992; ZVFHMIN-NEXT: vfadd.vv v8, v8, v12 4993; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 4994; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 4995; ZVFHMIN-NEXT: vse16.v v10, (a0) 4996; ZVFHMIN-NEXT: ret 4997 %a = load <6 x half>, ptr %x 4998 %b = load <6 x half>, ptr %y 4999 %c = load <6 x half>, ptr %z 5000 %d = call <6 x half> @llvm.fmuladd.v6f16(<6 x half> %a, <6 x half> %b, <6 x half> %c) 5001 store <6 x half> %d, ptr %x 5002 ret void 5003} 5004 5005define void @fmuladd_v4f32(ptr %x, ptr %y, ptr %z) { 5006; CHECK-LABEL: fmuladd_v4f32: 5007; CHECK: # %bb.0: 5008; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 5009; CHECK-NEXT: vle32.v v8, (a0) 5010; CHECK-NEXT: vle32.v v9, (a1) 5011; CHECK-NEXT: vle32.v v10, (a2) 5012; CHECK-NEXT: vfmacc.vv v10, v8, v9 5013; CHECK-NEXT: vse32.v v10, (a0) 5014; CHECK-NEXT: ret 5015 %a = load <4 x float>, ptr %x 5016 %b = load <4 x float>, ptr %y 5017 %c = load <4 x float>, ptr %z 5018 %d = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) 5019 store <4 x float> %d, ptr %x 5020 ret void 5021} 5022 5023define void @fmuladd_v2f64(ptr %x, ptr %y, ptr %z) { 5024; CHECK-LABEL: fmuladd_v2f64: 5025; CHECK: # %bb.0: 5026; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 5027; CHECK-NEXT: vle64.v v8, (a0) 5028; CHECK-NEXT: vle64.v v9, (a1) 5029; CHECK-NEXT: vle64.v v10, (a2) 5030; CHECK-NEXT: vfmacc.vv v10, v8, v9 5031; CHECK-NEXT: vse64.v v10, (a0) 5032; CHECK-NEXT: ret 5033 %a = load <2 x double>, ptr %x 5034 %b = load <2 x double>, ptr %y 5035 %c = load <2 x double>, ptr %z 5036 %d = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) 5037 store <2 x double> %d, ptr %x 5038 ret void 5039} 5040 5041define void @fmsub_fmuladd_v8bf16(ptr %x, ptr %y, ptr %z) { 5042; CHECK-LABEL: fmsub_fmuladd_v8bf16: 5043; CHECK: # %bb.0: 5044; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma 5045; CHECK-NEXT: vle16.v v8, (a1) 5046; CHECK-NEXT: vle16.v v9, (a0) 5047; CHECK-NEXT: vle16.v v10, (a2) 5048; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 5049; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v9 5050; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 5051; CHECK-NEXT: vfmul.vv v8, v14, v12 5052; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 5053; CHECK-NEXT: vfncvtbf16.f.f.w v11, v8 5054; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v11 5055; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 5056; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 5057; CHECK-NEXT: vfsub.vv v8, v8, v12 5058; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 5059; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8 5060; CHECK-NEXT: vse16.v v10, (a0) 5061; CHECK-NEXT: ret 5062 %a = load <8 x bfloat>, ptr %x 5063 %b = load <8 x bfloat>, ptr %y 5064 %c = load <8 x bfloat>, ptr %z 5065 %neg = fneg <8 x bfloat> %c 5066 %d = call <8 x bfloat> @llvm.fmuladd.v8bf16(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bfloat> %neg) 5067 store <8 x bfloat> %d, ptr %x 5068 ret void 5069} 5070 5071define void @fmsub_fmuladd_v6bf16(ptr %x, ptr %y, ptr %z) { 5072; CHECK-LABEL: fmsub_fmuladd_v6bf16: 5073; CHECK: # %bb.0: 5074; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma 5075; CHECK-NEXT: vle16.v v8, (a1) 5076; CHECK-NEXT: vle16.v v9, (a0) 5077; CHECK-NEXT: vle16.v v10, (a2) 5078; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 5079; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v9 5080; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 5081; CHECK-NEXT: vfmul.vv v8, v14, v12 5082; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 5083; CHECK-NEXT: vfncvtbf16.f.f.w v11, v8 5084; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v11 5085; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 5086; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma 5087; CHECK-NEXT: vfsub.vv v8, v8, v12 5088; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma 5089; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8 5090; CHECK-NEXT: vse16.v v10, (a0) 5091; CHECK-NEXT: ret 5092 %a = load <6 x bfloat>, ptr %x 5093 %b = load <6 x bfloat>, ptr %y 5094 %c = load <6 x bfloat>, ptr %z 5095 %neg = fneg <6 x bfloat> %c 5096 %d = call <6 x bfloat> @llvm.fmuladd.v6bf16(<6 x bfloat> %a, <6 x bfloat> %b, <6 x bfloat> %neg) 5097 store <6 x bfloat> %d, ptr %x 5098 ret void 5099} 5100 5101define void @fmsub_fmuladd_v8f16(ptr %x, ptr %y, ptr %z) { 5102; ZVFH-LABEL: fmsub_fmuladd_v8f16: 5103; ZVFH: # %bb.0: 5104; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma 5105; ZVFH-NEXT: vle16.v v8, (a0) 5106; ZVFH-NEXT: vle16.v v9, (a1) 5107; ZVFH-NEXT: vle16.v v10, (a2) 5108; ZVFH-NEXT: vfmsac.vv v10, v8, v9 5109; ZVFH-NEXT: vse16.v v10, (a0) 5110; ZVFH-NEXT: ret 5111; 5112; ZVFHMIN-LABEL: fmsub_fmuladd_v8f16: 5113; ZVFHMIN: # %bb.0: 5114; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma 5115; ZVFHMIN-NEXT: vle16.v v8, (a1) 5116; ZVFHMIN-NEXT: vle16.v v9, (a0) 5117; ZVFHMIN-NEXT: vle16.v v10, (a2) 5118; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 5119; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9 5120; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 5121; ZVFHMIN-NEXT: vfmul.vv v8, v14, v12 5122; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 5123; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v8 5124; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11 5125; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 5126; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 5127; ZVFHMIN-NEXT: vfsub.vv v8, v8, v12 5128; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 5129; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 5130; ZVFHMIN-NEXT: vse16.v v10, (a0) 5131; ZVFHMIN-NEXT: ret 5132 %a = load <8 x half>, ptr %x 5133 %b = load <8 x half>, ptr %y 5134 %c = load <8 x half>, ptr %z 5135 %neg = fneg <8 x half> %c 5136 %d = call <8 x half> @llvm.fmuladd.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %neg) 5137 store <8 x half> %d, ptr %x 5138 ret void 5139} 5140 5141define void @fmsub_fmuladd_v6f16(ptr %x, ptr %y, ptr %z) { 5142; ZVFH-LABEL: fmsub_fmuladd_v6f16: 5143; ZVFH: # %bb.0: 5144; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma 5145; ZVFH-NEXT: vle16.v v8, (a0) 5146; ZVFH-NEXT: vle16.v v9, (a1) 5147; ZVFH-NEXT: vle16.v v10, (a2) 5148; ZVFH-NEXT: vfmsac.vv v10, v8, v9 5149; ZVFH-NEXT: vse16.v v10, (a0) 5150; ZVFH-NEXT: ret 5151; 5152; ZVFHMIN-LABEL: fmsub_fmuladd_v6f16: 5153; ZVFHMIN: # %bb.0: 5154; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma 5155; ZVFHMIN-NEXT: vle16.v v8, (a1) 5156; ZVFHMIN-NEXT: vle16.v v9, (a0) 5157; ZVFHMIN-NEXT: vle16.v v10, (a2) 5158; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 5159; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9 5160; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 5161; ZVFHMIN-NEXT: vfmul.vv v8, v14, v12 5162; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 5163; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v8 5164; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11 5165; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 5166; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma 5167; ZVFHMIN-NEXT: vfsub.vv v8, v8, v12 5168; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma 5169; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 5170; ZVFHMIN-NEXT: vse16.v v10, (a0) 5171; ZVFHMIN-NEXT: ret 5172 %a = load <6 x half>, ptr %x 5173 %b = load <6 x half>, ptr %y 5174 %c = load <6 x half>, ptr %z 5175 %neg = fneg <6 x half> %c 5176 %d = call <6 x half> @llvm.fmuladd.v6f16(<6 x half> %a, <6 x half> %b, <6 x half> %neg) 5177 store <6 x half> %d, ptr %x 5178 ret void 5179} 5180 5181define void @fnmsub_fmuladd_v4f32(ptr %x, ptr %y, ptr %z) { 5182; CHECK-LABEL: fnmsub_fmuladd_v4f32: 5183; CHECK: # %bb.0: 5184; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma 5185; CHECK-NEXT: vle32.v v8, (a0) 5186; CHECK-NEXT: vle32.v v9, (a1) 5187; CHECK-NEXT: vle32.v v10, (a2) 5188; CHECK-NEXT: vfnmsac.vv v10, v8, v9 5189; CHECK-NEXT: vse32.v v10, (a0) 5190; CHECK-NEXT: ret 5191 %a = load <4 x float>, ptr %x 5192 %b = load <4 x float>, ptr %y 5193 %c = load <4 x float>, ptr %z 5194 %neg = fneg <4 x float> %a 5195 %d = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %neg, <4 x float> %b, <4 x float> %c) 5196 store <4 x float> %d, ptr %x 5197 ret void 5198} 5199 5200define void @fnmadd_fmuladd_v2f64(ptr %x, ptr %y, ptr %z) { 5201; CHECK-LABEL: fnmadd_fmuladd_v2f64: 5202; CHECK: # %bb.0: 5203; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 5204; CHECK-NEXT: vle64.v v8, (a0) 5205; CHECK-NEXT: vle64.v v9, (a1) 5206; CHECK-NEXT: vle64.v v10, (a2) 5207; CHECK-NEXT: vfnmacc.vv v10, v8, v9 5208; CHECK-NEXT: vse64.v v10, (a0) 5209; CHECK-NEXT: ret 5210 %a = load <2 x double>, ptr %x 5211 %b = load <2 x double>, ptr %y 5212 %c = load <2 x double>, ptr %z 5213 %neg = fneg <2 x double> %b 5214 %neg2 = fneg <2 x double> %c 5215 %d = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> %a, <2 x double> %neg, <2 x double> %neg2) 5216 store <2 x double> %d, ptr %x 5217 ret void 5218} 5219