1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV32I-ILP32 3; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV64I-LP64 4; RUN: llc -mtriple=riscv32 -mattr=+d -target-abi=ilp32 -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV32ID-ILP32 5; RUN: llc -mtriple=riscv64 -mattr=+d -target-abi=lp64 -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV64ID-LP64 6; RUN: llc -mtriple=riscv32 -mattr=+d -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV32ID-ILP32D 7; RUN: llc -mtriple=riscv64 -mattr=+d -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV64ID-LP64D 8 9define bfloat @float_to_bfloat(float %a) nounwind { 10; RV32I-ILP32-LABEL: float_to_bfloat: 11; RV32I-ILP32: # %bb.0: 12; RV32I-ILP32-NEXT: addi sp, sp, -16 13; RV32I-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 14; RV32I-ILP32-NEXT: call __truncsfbf2 15; RV32I-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 16; RV32I-ILP32-NEXT: addi sp, sp, 16 17; RV32I-ILP32-NEXT: ret 18; 19; RV64I-LP64-LABEL: float_to_bfloat: 20; RV64I-LP64: # %bb.0: 21; RV64I-LP64-NEXT: addi sp, sp, -16 22; RV64I-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill 23; RV64I-LP64-NEXT: call __truncsfbf2 24; RV64I-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload 25; RV64I-LP64-NEXT: addi sp, sp, 16 26; RV64I-LP64-NEXT: ret 27; 28; RV32ID-ILP32-LABEL: float_to_bfloat: 29; RV32ID-ILP32: # %bb.0: 30; RV32ID-ILP32-NEXT: addi sp, sp, -16 31; RV32ID-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 32; RV32ID-ILP32-NEXT: call __truncsfbf2 33; RV32ID-ILP32-NEXT: lui a1, 1048560 34; RV32ID-ILP32-NEXT: or a0, a0, a1 35; RV32ID-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 36; RV32ID-ILP32-NEXT: addi sp, sp, 16 37; RV32ID-ILP32-NEXT: ret 38; 39; RV64ID-LP64-LABEL: float_to_bfloat: 40; RV64ID-LP64: # %bb.0: 41; RV64ID-LP64-NEXT: addi sp, sp, -16 42; RV64ID-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill 43; RV64ID-LP64-NEXT: call __truncsfbf2 44; RV64ID-LP64-NEXT: lui a1, 1048560 45; RV64ID-LP64-NEXT: or a0, a0, a1 46; RV64ID-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload 47; RV64ID-LP64-NEXT: addi sp, sp, 16 48; RV64ID-LP64-NEXT: ret 49; 50; RV32ID-ILP32D-LABEL: float_to_bfloat: 51; RV32ID-ILP32D: # %bb.0: 52; RV32ID-ILP32D-NEXT: addi sp, sp, -16 53; RV32ID-ILP32D-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 54; RV32ID-ILP32D-NEXT: call __truncsfbf2 55; RV32ID-ILP32D-NEXT: fmv.x.w a0, fa0 56; RV32ID-ILP32D-NEXT: lui a1, 1048560 57; RV32ID-ILP32D-NEXT: or a0, a0, a1 58; RV32ID-ILP32D-NEXT: fmv.w.x fa0, a0 59; RV32ID-ILP32D-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 60; RV32ID-ILP32D-NEXT: addi sp, sp, 16 61; RV32ID-ILP32D-NEXT: ret 62; 63; RV64ID-LP64D-LABEL: float_to_bfloat: 64; RV64ID-LP64D: # %bb.0: 65; RV64ID-LP64D-NEXT: addi sp, sp, -16 66; RV64ID-LP64D-NEXT: sd ra, 8(sp) # 8-byte Folded Spill 67; RV64ID-LP64D-NEXT: call __truncsfbf2 68; RV64ID-LP64D-NEXT: fmv.x.w a0, fa0 69; RV64ID-LP64D-NEXT: lui a1, 1048560 70; RV64ID-LP64D-NEXT: or a0, a0, a1 71; RV64ID-LP64D-NEXT: fmv.w.x fa0, a0 72; RV64ID-LP64D-NEXT: ld ra, 8(sp) # 8-byte Folded Reload 73; RV64ID-LP64D-NEXT: addi sp, sp, 16 74; RV64ID-LP64D-NEXT: ret 75 %1 = fptrunc float %a to bfloat 76 ret bfloat %1 77} 78 79define bfloat @double_to_bfloat(double %a) nounwind { 80; RV32I-ILP32-LABEL: double_to_bfloat: 81; RV32I-ILP32: # %bb.0: 82; RV32I-ILP32-NEXT: addi sp, sp, -16 83; RV32I-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 84; RV32I-ILP32-NEXT: call __truncdfbf2 85; RV32I-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 86; RV32I-ILP32-NEXT: addi sp, sp, 16 87; RV32I-ILP32-NEXT: ret 88; 89; RV64I-LP64-LABEL: double_to_bfloat: 90; RV64I-LP64: # %bb.0: 91; RV64I-LP64-NEXT: addi sp, sp, -16 92; RV64I-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill 93; RV64I-LP64-NEXT: call __truncdfbf2 94; RV64I-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload 95; RV64I-LP64-NEXT: addi sp, sp, 16 96; RV64I-LP64-NEXT: ret 97; 98; RV32ID-ILP32-LABEL: double_to_bfloat: 99; RV32ID-ILP32: # %bb.0: 100; RV32ID-ILP32-NEXT: addi sp, sp, -16 101; RV32ID-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 102; RV32ID-ILP32-NEXT: call __truncdfbf2 103; RV32ID-ILP32-NEXT: lui a1, 1048560 104; RV32ID-ILP32-NEXT: or a0, a0, a1 105; RV32ID-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 106; RV32ID-ILP32-NEXT: addi sp, sp, 16 107; RV32ID-ILP32-NEXT: ret 108; 109; RV64ID-LP64-LABEL: double_to_bfloat: 110; RV64ID-LP64: # %bb.0: 111; RV64ID-LP64-NEXT: addi sp, sp, -16 112; RV64ID-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill 113; RV64ID-LP64-NEXT: call __truncdfbf2 114; RV64ID-LP64-NEXT: lui a1, 1048560 115; RV64ID-LP64-NEXT: or a0, a0, a1 116; RV64ID-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload 117; RV64ID-LP64-NEXT: addi sp, sp, 16 118; RV64ID-LP64-NEXT: ret 119; 120; RV32ID-ILP32D-LABEL: double_to_bfloat: 121; RV32ID-ILP32D: # %bb.0: 122; RV32ID-ILP32D-NEXT: addi sp, sp, -16 123; RV32ID-ILP32D-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 124; RV32ID-ILP32D-NEXT: call __truncdfbf2 125; RV32ID-ILP32D-NEXT: fmv.x.w a0, fa0 126; RV32ID-ILP32D-NEXT: lui a1, 1048560 127; RV32ID-ILP32D-NEXT: or a0, a0, a1 128; RV32ID-ILP32D-NEXT: fmv.w.x fa0, a0 129; RV32ID-ILP32D-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 130; RV32ID-ILP32D-NEXT: addi sp, sp, 16 131; RV32ID-ILP32D-NEXT: ret 132; 133; RV64ID-LP64D-LABEL: double_to_bfloat: 134; RV64ID-LP64D: # %bb.0: 135; RV64ID-LP64D-NEXT: addi sp, sp, -16 136; RV64ID-LP64D-NEXT: sd ra, 8(sp) # 8-byte Folded Spill 137; RV64ID-LP64D-NEXT: call __truncdfbf2 138; RV64ID-LP64D-NEXT: fmv.x.w a0, fa0 139; RV64ID-LP64D-NEXT: lui a1, 1048560 140; RV64ID-LP64D-NEXT: or a0, a0, a1 141; RV64ID-LP64D-NEXT: fmv.w.x fa0, a0 142; RV64ID-LP64D-NEXT: ld ra, 8(sp) # 8-byte Folded Reload 143; RV64ID-LP64D-NEXT: addi sp, sp, 16 144; RV64ID-LP64D-NEXT: ret 145 %1 = fptrunc double %a to bfloat 146 ret bfloat %1 147} 148 149define float @bfloat_to_float(bfloat %a) nounwind { 150; RV32I-ILP32-LABEL: bfloat_to_float: 151; RV32I-ILP32: # %bb.0: 152; RV32I-ILP32-NEXT: slli a0, a0, 16 153; RV32I-ILP32-NEXT: ret 154; 155; RV64I-LP64-LABEL: bfloat_to_float: 156; RV64I-LP64: # %bb.0: 157; RV64I-LP64-NEXT: slliw a0, a0, 16 158; RV64I-LP64-NEXT: ret 159; 160; RV32ID-ILP32-LABEL: bfloat_to_float: 161; RV32ID-ILP32: # %bb.0: 162; RV32ID-ILP32-NEXT: slli a0, a0, 16 163; RV32ID-ILP32-NEXT: ret 164; 165; RV64ID-LP64-LABEL: bfloat_to_float: 166; RV64ID-LP64: # %bb.0: 167; RV64ID-LP64-NEXT: slli a0, a0, 16 168; RV64ID-LP64-NEXT: ret 169; 170; RV32ID-ILP32D-LABEL: bfloat_to_float: 171; RV32ID-ILP32D: # %bb.0: 172; RV32ID-ILP32D-NEXT: fmv.x.w a0, fa0 173; RV32ID-ILP32D-NEXT: slli a0, a0, 16 174; RV32ID-ILP32D-NEXT: fmv.w.x fa0, a0 175; RV32ID-ILP32D-NEXT: ret 176; 177; RV64ID-LP64D-LABEL: bfloat_to_float: 178; RV64ID-LP64D: # %bb.0: 179; RV64ID-LP64D-NEXT: fmv.x.w a0, fa0 180; RV64ID-LP64D-NEXT: slli a0, a0, 16 181; RV64ID-LP64D-NEXT: fmv.w.x fa0, a0 182; RV64ID-LP64D-NEXT: ret 183 %1 = fpext bfloat %a to float 184 ret float %1 185} 186 187define double @bfloat_to_double(bfloat %a) nounwind { 188; RV32I-ILP32-LABEL: bfloat_to_double: 189; RV32I-ILP32: # %bb.0: 190; RV32I-ILP32-NEXT: addi sp, sp, -16 191; RV32I-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 192; RV32I-ILP32-NEXT: slli a0, a0, 16 193; RV32I-ILP32-NEXT: call __extendsfdf2 194; RV32I-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 195; RV32I-ILP32-NEXT: addi sp, sp, 16 196; RV32I-ILP32-NEXT: ret 197; 198; RV64I-LP64-LABEL: bfloat_to_double: 199; RV64I-LP64: # %bb.0: 200; RV64I-LP64-NEXT: addi sp, sp, -16 201; RV64I-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill 202; RV64I-LP64-NEXT: slliw a0, a0, 16 203; RV64I-LP64-NEXT: call __extendsfdf2 204; RV64I-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload 205; RV64I-LP64-NEXT: addi sp, sp, 16 206; RV64I-LP64-NEXT: ret 207; 208; RV32ID-ILP32-LABEL: bfloat_to_double: 209; RV32ID-ILP32: # %bb.0: 210; RV32ID-ILP32-NEXT: addi sp, sp, -16 211; RV32ID-ILP32-NEXT: slli a0, a0, 16 212; RV32ID-ILP32-NEXT: fmv.w.x fa5, a0 213; RV32ID-ILP32-NEXT: fcvt.d.s fa5, fa5 214; RV32ID-ILP32-NEXT: fsd fa5, 8(sp) 215; RV32ID-ILP32-NEXT: lw a0, 8(sp) 216; RV32ID-ILP32-NEXT: lw a1, 12(sp) 217; RV32ID-ILP32-NEXT: addi sp, sp, 16 218; RV32ID-ILP32-NEXT: ret 219; 220; RV64ID-LP64-LABEL: bfloat_to_double: 221; RV64ID-LP64: # %bb.0: 222; RV64ID-LP64-NEXT: slli a0, a0, 16 223; RV64ID-LP64-NEXT: fmv.w.x fa5, a0 224; RV64ID-LP64-NEXT: fcvt.d.s fa5, fa5 225; RV64ID-LP64-NEXT: fmv.x.d a0, fa5 226; RV64ID-LP64-NEXT: ret 227; 228; RV32ID-ILP32D-LABEL: bfloat_to_double: 229; RV32ID-ILP32D: # %bb.0: 230; RV32ID-ILP32D-NEXT: fmv.x.w a0, fa0 231; RV32ID-ILP32D-NEXT: slli a0, a0, 16 232; RV32ID-ILP32D-NEXT: fmv.w.x fa5, a0 233; RV32ID-ILP32D-NEXT: fcvt.d.s fa0, fa5 234; RV32ID-ILP32D-NEXT: ret 235; 236; RV64ID-LP64D-LABEL: bfloat_to_double: 237; RV64ID-LP64D: # %bb.0: 238; RV64ID-LP64D-NEXT: fmv.x.w a0, fa0 239; RV64ID-LP64D-NEXT: slli a0, a0, 16 240; RV64ID-LP64D-NEXT: fmv.w.x fa5, a0 241; RV64ID-LP64D-NEXT: fcvt.d.s fa0, fa5 242; RV64ID-LP64D-NEXT: ret 243 %1 = fpext bfloat %a to double 244 ret double %1 245} 246 247define bfloat @i16_to_bfloat(i16 %a) nounwind { 248; RV32I-ILP32-LABEL: i16_to_bfloat: 249; RV32I-ILP32: # %bb.0: 250; RV32I-ILP32-NEXT: ret 251; 252; RV64I-LP64-LABEL: i16_to_bfloat: 253; RV64I-LP64: # %bb.0: 254; RV64I-LP64-NEXT: ret 255; 256; RV32ID-ILP32-LABEL: i16_to_bfloat: 257; RV32ID-ILP32: # %bb.0: 258; RV32ID-ILP32-NEXT: lui a1, 1048560 259; RV32ID-ILP32-NEXT: or a0, a0, a1 260; RV32ID-ILP32-NEXT: ret 261; 262; RV64ID-LP64-LABEL: i16_to_bfloat: 263; RV64ID-LP64: # %bb.0: 264; RV64ID-LP64-NEXT: lui a1, 1048560 265; RV64ID-LP64-NEXT: or a0, a0, a1 266; RV64ID-LP64-NEXT: ret 267; 268; RV32ID-ILP32D-LABEL: i16_to_bfloat: 269; RV32ID-ILP32D: # %bb.0: 270; RV32ID-ILP32D-NEXT: lui a1, 1048560 271; RV32ID-ILP32D-NEXT: or a0, a0, a1 272; RV32ID-ILP32D-NEXT: fmv.w.x fa0, a0 273; RV32ID-ILP32D-NEXT: ret 274; 275; RV64ID-LP64D-LABEL: i16_to_bfloat: 276; RV64ID-LP64D: # %bb.0: 277; RV64ID-LP64D-NEXT: lui a1, 1048560 278; RV64ID-LP64D-NEXT: or a0, a0, a1 279; RV64ID-LP64D-NEXT: fmv.w.x fa0, a0 280; RV64ID-LP64D-NEXT: ret 281 %1 = bitcast i16 %a to bfloat 282 ret bfloat %1 283} 284 285define i16 @bfloat_to_i16(bfloat %a) nounwind { 286; RV32I-ILP32-LABEL: bfloat_to_i16: 287; RV32I-ILP32: # %bb.0: 288; RV32I-ILP32-NEXT: ret 289; 290; RV64I-LP64-LABEL: bfloat_to_i16: 291; RV64I-LP64: # %bb.0: 292; RV64I-LP64-NEXT: ret 293; 294; RV32ID-ILP32-LABEL: bfloat_to_i16: 295; RV32ID-ILP32: # %bb.0: 296; RV32ID-ILP32-NEXT: ret 297; 298; RV64ID-LP64-LABEL: bfloat_to_i16: 299; RV64ID-LP64: # %bb.0: 300; RV64ID-LP64-NEXT: ret 301; 302; RV32ID-ILP32D-LABEL: bfloat_to_i16: 303; RV32ID-ILP32D: # %bb.0: 304; RV32ID-ILP32D-NEXT: fmv.x.w a0, fa0 305; RV32ID-ILP32D-NEXT: ret 306; 307; RV64ID-LP64D-LABEL: bfloat_to_i16: 308; RV64ID-LP64D: # %bb.0: 309; RV64ID-LP64D-NEXT: fmv.x.w a0, fa0 310; RV64ID-LP64D-NEXT: ret 311 %1 = bitcast bfloat %a to i16 312 ret i16 %1 313} 314 315define bfloat @bfloat_add(bfloat %a, bfloat %b) nounwind { 316; RV32I-ILP32-LABEL: bfloat_add: 317; RV32I-ILP32: # %bb.0: 318; RV32I-ILP32-NEXT: addi sp, sp, -16 319; RV32I-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 320; RV32I-ILP32-NEXT: slli a0, a0, 16 321; RV32I-ILP32-NEXT: slli a1, a1, 16 322; RV32I-ILP32-NEXT: call __addsf3 323; RV32I-ILP32-NEXT: call __truncsfbf2 324; RV32I-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 325; RV32I-ILP32-NEXT: addi sp, sp, 16 326; RV32I-ILP32-NEXT: ret 327; 328; RV64I-LP64-LABEL: bfloat_add: 329; RV64I-LP64: # %bb.0: 330; RV64I-LP64-NEXT: addi sp, sp, -16 331; RV64I-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill 332; RV64I-LP64-NEXT: slliw a0, a0, 16 333; RV64I-LP64-NEXT: slliw a1, a1, 16 334; RV64I-LP64-NEXT: call __addsf3 335; RV64I-LP64-NEXT: call __truncsfbf2 336; RV64I-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload 337; RV64I-LP64-NEXT: addi sp, sp, 16 338; RV64I-LP64-NEXT: ret 339; 340; RV32ID-ILP32-LABEL: bfloat_add: 341; RV32ID-ILP32: # %bb.0: 342; RV32ID-ILP32-NEXT: addi sp, sp, -16 343; RV32ID-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 344; RV32ID-ILP32-NEXT: slli a1, a1, 16 345; RV32ID-ILP32-NEXT: slli a0, a0, 16 346; RV32ID-ILP32-NEXT: fmv.w.x fa5, a1 347; RV32ID-ILP32-NEXT: fmv.w.x fa4, a0 348; RV32ID-ILP32-NEXT: fadd.s fa5, fa4, fa5 349; RV32ID-ILP32-NEXT: fmv.x.w a0, fa5 350; RV32ID-ILP32-NEXT: call __truncsfbf2 351; RV32ID-ILP32-NEXT: lui a1, 1048560 352; RV32ID-ILP32-NEXT: or a0, a0, a1 353; RV32ID-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 354; RV32ID-ILP32-NEXT: addi sp, sp, 16 355; RV32ID-ILP32-NEXT: ret 356; 357; RV64ID-LP64-LABEL: bfloat_add: 358; RV64ID-LP64: # %bb.0: 359; RV64ID-LP64-NEXT: addi sp, sp, -16 360; RV64ID-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill 361; RV64ID-LP64-NEXT: slli a1, a1, 16 362; RV64ID-LP64-NEXT: slli a0, a0, 16 363; RV64ID-LP64-NEXT: fmv.w.x fa5, a1 364; RV64ID-LP64-NEXT: fmv.w.x fa4, a0 365; RV64ID-LP64-NEXT: fadd.s fa5, fa4, fa5 366; RV64ID-LP64-NEXT: fmv.x.w a0, fa5 367; RV64ID-LP64-NEXT: call __truncsfbf2 368; RV64ID-LP64-NEXT: lui a1, 1048560 369; RV64ID-LP64-NEXT: or a0, a0, a1 370; RV64ID-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload 371; RV64ID-LP64-NEXT: addi sp, sp, 16 372; RV64ID-LP64-NEXT: ret 373; 374; RV32ID-ILP32D-LABEL: bfloat_add: 375; RV32ID-ILP32D: # %bb.0: 376; RV32ID-ILP32D-NEXT: addi sp, sp, -16 377; RV32ID-ILP32D-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 378; RV32ID-ILP32D-NEXT: fmv.x.w a0, fa0 379; RV32ID-ILP32D-NEXT: fmv.x.w a1, fa1 380; RV32ID-ILP32D-NEXT: slli a1, a1, 16 381; RV32ID-ILP32D-NEXT: slli a0, a0, 16 382; RV32ID-ILP32D-NEXT: fmv.w.x fa5, a1 383; RV32ID-ILP32D-NEXT: fmv.w.x fa4, a0 384; RV32ID-ILP32D-NEXT: fadd.s fa0, fa4, fa5 385; RV32ID-ILP32D-NEXT: call __truncsfbf2 386; RV32ID-ILP32D-NEXT: fmv.x.w a0, fa0 387; RV32ID-ILP32D-NEXT: lui a1, 1048560 388; RV32ID-ILP32D-NEXT: or a0, a0, a1 389; RV32ID-ILP32D-NEXT: fmv.w.x fa0, a0 390; RV32ID-ILP32D-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 391; RV32ID-ILP32D-NEXT: addi sp, sp, 16 392; RV32ID-ILP32D-NEXT: ret 393; 394; RV64ID-LP64D-LABEL: bfloat_add: 395; RV64ID-LP64D: # %bb.0: 396; RV64ID-LP64D-NEXT: addi sp, sp, -16 397; RV64ID-LP64D-NEXT: sd ra, 8(sp) # 8-byte Folded Spill 398; RV64ID-LP64D-NEXT: fmv.x.w a0, fa0 399; RV64ID-LP64D-NEXT: fmv.x.w a1, fa1 400; RV64ID-LP64D-NEXT: slli a1, a1, 16 401; RV64ID-LP64D-NEXT: slli a0, a0, 16 402; RV64ID-LP64D-NEXT: fmv.w.x fa5, a1 403; RV64ID-LP64D-NEXT: fmv.w.x fa4, a0 404; RV64ID-LP64D-NEXT: fadd.s fa0, fa4, fa5 405; RV64ID-LP64D-NEXT: call __truncsfbf2 406; RV64ID-LP64D-NEXT: fmv.x.w a0, fa0 407; RV64ID-LP64D-NEXT: lui a1, 1048560 408; RV64ID-LP64D-NEXT: or a0, a0, a1 409; RV64ID-LP64D-NEXT: fmv.w.x fa0, a0 410; RV64ID-LP64D-NEXT: ld ra, 8(sp) # 8-byte Folded Reload 411; RV64ID-LP64D-NEXT: addi sp, sp, 16 412; RV64ID-LP64D-NEXT: ret 413 %1 = fadd bfloat %a, %b 414 ret bfloat %1 415} 416 417define bfloat @bfloat_load(ptr %a) nounwind { 418; RV32I-ILP32-LABEL: bfloat_load: 419; RV32I-ILP32: # %bb.0: 420; RV32I-ILP32-NEXT: addi sp, sp, -16 421; RV32I-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 422; RV32I-ILP32-NEXT: lh a1, 0(a0) 423; RV32I-ILP32-NEXT: lh a2, 6(a0) 424; RV32I-ILP32-NEXT: slli a0, a1, 16 425; RV32I-ILP32-NEXT: slli a1, a2, 16 426; RV32I-ILP32-NEXT: call __addsf3 427; RV32I-ILP32-NEXT: call __truncsfbf2 428; RV32I-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 429; RV32I-ILP32-NEXT: addi sp, sp, 16 430; RV32I-ILP32-NEXT: ret 431; 432; RV64I-LP64-LABEL: bfloat_load: 433; RV64I-LP64: # %bb.0: 434; RV64I-LP64-NEXT: addi sp, sp, -16 435; RV64I-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill 436; RV64I-LP64-NEXT: lh a1, 0(a0) 437; RV64I-LP64-NEXT: lh a2, 6(a0) 438; RV64I-LP64-NEXT: slliw a0, a1, 16 439; RV64I-LP64-NEXT: slliw a1, a2, 16 440; RV64I-LP64-NEXT: call __addsf3 441; RV64I-LP64-NEXT: call __truncsfbf2 442; RV64I-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload 443; RV64I-LP64-NEXT: addi sp, sp, 16 444; RV64I-LP64-NEXT: ret 445; 446; RV32ID-ILP32-LABEL: bfloat_load: 447; RV32ID-ILP32: # %bb.0: 448; RV32ID-ILP32-NEXT: addi sp, sp, -16 449; RV32ID-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 450; RV32ID-ILP32-NEXT: lhu a1, 6(a0) 451; RV32ID-ILP32-NEXT: lhu a0, 0(a0) 452; RV32ID-ILP32-NEXT: slli a1, a1, 16 453; RV32ID-ILP32-NEXT: slli a0, a0, 16 454; RV32ID-ILP32-NEXT: fmv.w.x fa5, a1 455; RV32ID-ILP32-NEXT: fmv.w.x fa4, a0 456; RV32ID-ILP32-NEXT: fadd.s fa5, fa4, fa5 457; RV32ID-ILP32-NEXT: fmv.x.w a0, fa5 458; RV32ID-ILP32-NEXT: call __truncsfbf2 459; RV32ID-ILP32-NEXT: lui a1, 1048560 460; RV32ID-ILP32-NEXT: or a0, a0, a1 461; RV32ID-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 462; RV32ID-ILP32-NEXT: addi sp, sp, 16 463; RV32ID-ILP32-NEXT: ret 464; 465; RV64ID-LP64-LABEL: bfloat_load: 466; RV64ID-LP64: # %bb.0: 467; RV64ID-LP64-NEXT: addi sp, sp, -16 468; RV64ID-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill 469; RV64ID-LP64-NEXT: lhu a1, 6(a0) 470; RV64ID-LP64-NEXT: lhu a0, 0(a0) 471; RV64ID-LP64-NEXT: slli a1, a1, 16 472; RV64ID-LP64-NEXT: slli a0, a0, 16 473; RV64ID-LP64-NEXT: fmv.w.x fa5, a1 474; RV64ID-LP64-NEXT: fmv.w.x fa4, a0 475; RV64ID-LP64-NEXT: fadd.s fa5, fa4, fa5 476; RV64ID-LP64-NEXT: fmv.x.w a0, fa5 477; RV64ID-LP64-NEXT: call __truncsfbf2 478; RV64ID-LP64-NEXT: lui a1, 1048560 479; RV64ID-LP64-NEXT: or a0, a0, a1 480; RV64ID-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload 481; RV64ID-LP64-NEXT: addi sp, sp, 16 482; RV64ID-LP64-NEXT: ret 483; 484; RV32ID-ILP32D-LABEL: bfloat_load: 485; RV32ID-ILP32D: # %bb.0: 486; RV32ID-ILP32D-NEXT: addi sp, sp, -16 487; RV32ID-ILP32D-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 488; RV32ID-ILP32D-NEXT: lhu a1, 6(a0) 489; RV32ID-ILP32D-NEXT: lhu a0, 0(a0) 490; RV32ID-ILP32D-NEXT: slli a1, a1, 16 491; RV32ID-ILP32D-NEXT: slli a0, a0, 16 492; RV32ID-ILP32D-NEXT: fmv.w.x fa5, a1 493; RV32ID-ILP32D-NEXT: fmv.w.x fa4, a0 494; RV32ID-ILP32D-NEXT: fadd.s fa0, fa4, fa5 495; RV32ID-ILP32D-NEXT: call __truncsfbf2 496; RV32ID-ILP32D-NEXT: fmv.x.w a0, fa0 497; RV32ID-ILP32D-NEXT: lui a1, 1048560 498; RV32ID-ILP32D-NEXT: or a0, a0, a1 499; RV32ID-ILP32D-NEXT: fmv.w.x fa0, a0 500; RV32ID-ILP32D-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 501; RV32ID-ILP32D-NEXT: addi sp, sp, 16 502; RV32ID-ILP32D-NEXT: ret 503; 504; RV64ID-LP64D-LABEL: bfloat_load: 505; RV64ID-LP64D: # %bb.0: 506; RV64ID-LP64D-NEXT: addi sp, sp, -16 507; RV64ID-LP64D-NEXT: sd ra, 8(sp) # 8-byte Folded Spill 508; RV64ID-LP64D-NEXT: lhu a1, 6(a0) 509; RV64ID-LP64D-NEXT: lhu a0, 0(a0) 510; RV64ID-LP64D-NEXT: slli a1, a1, 16 511; RV64ID-LP64D-NEXT: slli a0, a0, 16 512; RV64ID-LP64D-NEXT: fmv.w.x fa5, a1 513; RV64ID-LP64D-NEXT: fmv.w.x fa4, a0 514; RV64ID-LP64D-NEXT: fadd.s fa0, fa4, fa5 515; RV64ID-LP64D-NEXT: call __truncsfbf2 516; RV64ID-LP64D-NEXT: fmv.x.w a0, fa0 517; RV64ID-LP64D-NEXT: lui a1, 1048560 518; RV64ID-LP64D-NEXT: or a0, a0, a1 519; RV64ID-LP64D-NEXT: fmv.w.x fa0, a0 520; RV64ID-LP64D-NEXT: ld ra, 8(sp) # 8-byte Folded Reload 521; RV64ID-LP64D-NEXT: addi sp, sp, 16 522; RV64ID-LP64D-NEXT: ret 523 %1 = load bfloat, ptr %a 524 %2 = getelementptr bfloat, ptr %a, i32 3 525 %3 = load bfloat, ptr %2 526 %4 = fadd bfloat %1, %3 527 ret bfloat %4 528} 529 530define void @bfloat_store(ptr %a, bfloat %b, bfloat %c) nounwind { 531; RV32I-ILP32-LABEL: bfloat_store: 532; RV32I-ILP32: # %bb.0: 533; RV32I-ILP32-NEXT: addi sp, sp, -16 534; RV32I-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 535; RV32I-ILP32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill 536; RV32I-ILP32-NEXT: mv s0, a0 537; RV32I-ILP32-NEXT: slli a0, a1, 16 538; RV32I-ILP32-NEXT: slli a1, a2, 16 539; RV32I-ILP32-NEXT: call __addsf3 540; RV32I-ILP32-NEXT: call __truncsfbf2 541; RV32I-ILP32-NEXT: sh a0, 0(s0) 542; RV32I-ILP32-NEXT: sh a0, 16(s0) 543; RV32I-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 544; RV32I-ILP32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload 545; RV32I-ILP32-NEXT: addi sp, sp, 16 546; RV32I-ILP32-NEXT: ret 547; 548; RV64I-LP64-LABEL: bfloat_store: 549; RV64I-LP64: # %bb.0: 550; RV64I-LP64-NEXT: addi sp, sp, -16 551; RV64I-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill 552; RV64I-LP64-NEXT: sd s0, 0(sp) # 8-byte Folded Spill 553; RV64I-LP64-NEXT: mv s0, a0 554; RV64I-LP64-NEXT: slliw a0, a1, 16 555; RV64I-LP64-NEXT: slliw a1, a2, 16 556; RV64I-LP64-NEXT: call __addsf3 557; RV64I-LP64-NEXT: call __truncsfbf2 558; RV64I-LP64-NEXT: sh a0, 0(s0) 559; RV64I-LP64-NEXT: sh a0, 16(s0) 560; RV64I-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload 561; RV64I-LP64-NEXT: ld s0, 0(sp) # 8-byte Folded Reload 562; RV64I-LP64-NEXT: addi sp, sp, 16 563; RV64I-LP64-NEXT: ret 564; 565; RV32ID-ILP32-LABEL: bfloat_store: 566; RV32ID-ILP32: # %bb.0: 567; RV32ID-ILP32-NEXT: addi sp, sp, -16 568; RV32ID-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 569; RV32ID-ILP32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill 570; RV32ID-ILP32-NEXT: mv s0, a0 571; RV32ID-ILP32-NEXT: slli a2, a2, 16 572; RV32ID-ILP32-NEXT: slli a1, a1, 16 573; RV32ID-ILP32-NEXT: fmv.w.x fa5, a2 574; RV32ID-ILP32-NEXT: fmv.w.x fa4, a1 575; RV32ID-ILP32-NEXT: fadd.s fa5, fa4, fa5 576; RV32ID-ILP32-NEXT: fmv.x.w a0, fa5 577; RV32ID-ILP32-NEXT: call __truncsfbf2 578; RV32ID-ILP32-NEXT: sh a0, 0(s0) 579; RV32ID-ILP32-NEXT: sh a0, 16(s0) 580; RV32ID-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 581; RV32ID-ILP32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload 582; RV32ID-ILP32-NEXT: addi sp, sp, 16 583; RV32ID-ILP32-NEXT: ret 584; 585; RV64ID-LP64-LABEL: bfloat_store: 586; RV64ID-LP64: # %bb.0: 587; RV64ID-LP64-NEXT: addi sp, sp, -16 588; RV64ID-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill 589; RV64ID-LP64-NEXT: sd s0, 0(sp) # 8-byte Folded Spill 590; RV64ID-LP64-NEXT: mv s0, a0 591; RV64ID-LP64-NEXT: slli a2, a2, 16 592; RV64ID-LP64-NEXT: slli a1, a1, 16 593; RV64ID-LP64-NEXT: fmv.w.x fa5, a2 594; RV64ID-LP64-NEXT: fmv.w.x fa4, a1 595; RV64ID-LP64-NEXT: fadd.s fa5, fa4, fa5 596; RV64ID-LP64-NEXT: fmv.x.w a0, fa5 597; RV64ID-LP64-NEXT: call __truncsfbf2 598; RV64ID-LP64-NEXT: sh a0, 0(s0) 599; RV64ID-LP64-NEXT: sh a0, 16(s0) 600; RV64ID-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload 601; RV64ID-LP64-NEXT: ld s0, 0(sp) # 8-byte Folded Reload 602; RV64ID-LP64-NEXT: addi sp, sp, 16 603; RV64ID-LP64-NEXT: ret 604; 605; RV32ID-ILP32D-LABEL: bfloat_store: 606; RV32ID-ILP32D: # %bb.0: 607; RV32ID-ILP32D-NEXT: addi sp, sp, -16 608; RV32ID-ILP32D-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 609; RV32ID-ILP32D-NEXT: sw s0, 8(sp) # 4-byte Folded Spill 610; RV32ID-ILP32D-NEXT: mv s0, a0 611; RV32ID-ILP32D-NEXT: fmv.x.w a0, fa0 612; RV32ID-ILP32D-NEXT: fmv.x.w a1, fa1 613; RV32ID-ILP32D-NEXT: slli a1, a1, 16 614; RV32ID-ILP32D-NEXT: slli a0, a0, 16 615; RV32ID-ILP32D-NEXT: fmv.w.x fa5, a1 616; RV32ID-ILP32D-NEXT: fmv.w.x fa4, a0 617; RV32ID-ILP32D-NEXT: fadd.s fa0, fa4, fa5 618; RV32ID-ILP32D-NEXT: call __truncsfbf2 619; RV32ID-ILP32D-NEXT: fmv.x.w a0, fa0 620; RV32ID-ILP32D-NEXT: sh a0, 0(s0) 621; RV32ID-ILP32D-NEXT: sh a0, 16(s0) 622; RV32ID-ILP32D-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 623; RV32ID-ILP32D-NEXT: lw s0, 8(sp) # 4-byte Folded Reload 624; RV32ID-ILP32D-NEXT: addi sp, sp, 16 625; RV32ID-ILP32D-NEXT: ret 626; 627; RV64ID-LP64D-LABEL: bfloat_store: 628; RV64ID-LP64D: # %bb.0: 629; RV64ID-LP64D-NEXT: addi sp, sp, -16 630; RV64ID-LP64D-NEXT: sd ra, 8(sp) # 8-byte Folded Spill 631; RV64ID-LP64D-NEXT: sd s0, 0(sp) # 8-byte Folded Spill 632; RV64ID-LP64D-NEXT: mv s0, a0 633; RV64ID-LP64D-NEXT: fmv.x.w a0, fa0 634; RV64ID-LP64D-NEXT: fmv.x.w a1, fa1 635; RV64ID-LP64D-NEXT: slli a1, a1, 16 636; RV64ID-LP64D-NEXT: slli a0, a0, 16 637; RV64ID-LP64D-NEXT: fmv.w.x fa5, a1 638; RV64ID-LP64D-NEXT: fmv.w.x fa4, a0 639; RV64ID-LP64D-NEXT: fadd.s fa0, fa4, fa5 640; RV64ID-LP64D-NEXT: call __truncsfbf2 641; RV64ID-LP64D-NEXT: fmv.x.w a0, fa0 642; RV64ID-LP64D-NEXT: sh a0, 0(s0) 643; RV64ID-LP64D-NEXT: sh a0, 16(s0) 644; RV64ID-LP64D-NEXT: ld ra, 8(sp) # 8-byte Folded Reload 645; RV64ID-LP64D-NEXT: ld s0, 0(sp) # 8-byte Folded Reload 646; RV64ID-LP64D-NEXT: addi sp, sp, 16 647; RV64ID-LP64D-NEXT: ret 648 %1 = fadd bfloat %b, %c 649 store bfloat %1, ptr %a 650 %2 = getelementptr bfloat, ptr %a, i32 8 651 store bfloat %1, ptr %2 652 ret void 653} 654