1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; RUN: llc < %s -mcpu=sm_50 -mattr=+ptx32 | FileCheck --check-prefixes=CHECK %s 3; RUN: llc < %s -mcpu=sm_75 -mattr=+ptx70 | FileCheck --check-prefixes=CHECK-FP16 %s 4; RUN: llc < %s -mcpu=sm_90 -mattr=+ptx78 | FileCheck --check-prefixes=CHECK-BF16 %s 5; RUN: %if ptxas-12.0 %{ llc < %s -mcpu=sm_50 -mattr=+ptx32 | %ptxas-verify -arch=sm_50 %} 6; RUN: %if ptxas-12.0 %{ llc < %s -mcpu=sm_75 -mattr=+ptx70 | %ptxas-verify -arch=sm_75 %} 7; RUN: %if ptxas-12.0 %{ llc < %s -mcpu=sm_90 -mattr=+ptx78 | %ptxas-verify -arch=sm_90 %} 8target triple = "nvptx64-nvidia-cuda" 9 10; --- f32 --- 11 12; CHECK-LABEL: exp2_test 13define float @exp2_test(float %in) { 14; CHECK-LABEL: exp2_test( 15; CHECK: { 16; CHECK-NEXT: .reg .f32 %f<3>; 17; CHECK-EMPTY: 18; CHECK-NEXT: // %bb.0: // %entry 19; CHECK-NEXT: ld.param.f32 %f1, [exp2_test_param_0]; 20; CHECK-NEXT: ex2.approx.f32 %f2, %f1; 21; CHECK-NEXT: st.param.f32 [func_retval0], %f2; 22; CHECK-NEXT: ret; 23; 24; CHECK-FP16-LABEL: exp2_test( 25; CHECK-FP16: { 26; CHECK-FP16-NEXT: .reg .f32 %f<3>; 27; CHECK-FP16-EMPTY: 28; CHECK-FP16-NEXT: // %bb.0: // %entry 29; CHECK-FP16-NEXT: ld.param.f32 %f1, [exp2_test_param_0]; 30; CHECK-FP16-NEXT: ex2.approx.f32 %f2, %f1; 31; CHECK-FP16-NEXT: st.param.f32 [func_retval0], %f2; 32; CHECK-FP16-NEXT: ret; 33; 34; CHECK-BF16-LABEL: exp2_test( 35; CHECK-BF16: { 36; CHECK-BF16-NEXT: .reg .f32 %f<3>; 37; CHECK-BF16-EMPTY: 38; CHECK-BF16-NEXT: // %bb.0: // %entry 39; CHECK-BF16-NEXT: ld.param.f32 %f1, [exp2_test_param_0]; 40; CHECK-BF16-NEXT: ex2.approx.f32 %f2, %f1; 41; CHECK-BF16-NEXT: st.param.f32 [func_retval0], %f2; 42; CHECK-BF16-NEXT: ret; 43entry: 44 %exp2 = call float @llvm.exp2.f32(float %in) 45 ret float %exp2 46} 47 48; CHECK-LABEL: exp2_ftz_test 49define float @exp2_ftz_test(float %in) #0 { 50; CHECK-LABEL: exp2_ftz_test( 51; CHECK: { 52; CHECK-NEXT: .reg .f32 %f<3>; 53; CHECK-EMPTY: 54; CHECK-NEXT: // %bb.0: // %entry 55; CHECK-NEXT: ld.param.f32 %f1, [exp2_ftz_test_param_0]; 56; CHECK-NEXT: ex2.approx.ftz.f32 %f2, %f1; 57; CHECK-NEXT: st.param.f32 [func_retval0], %f2; 58; CHECK-NEXT: ret; 59; 60; CHECK-FP16-LABEL: exp2_ftz_test( 61; CHECK-FP16: { 62; CHECK-FP16-NEXT: .reg .f32 %f<3>; 63; CHECK-FP16-EMPTY: 64; CHECK-FP16-NEXT: // %bb.0: // %entry 65; CHECK-FP16-NEXT: ld.param.f32 %f1, [exp2_ftz_test_param_0]; 66; CHECK-FP16-NEXT: ex2.approx.ftz.f32 %f2, %f1; 67; CHECK-FP16-NEXT: st.param.f32 [func_retval0], %f2; 68; CHECK-FP16-NEXT: ret; 69; 70; CHECK-BF16-LABEL: exp2_ftz_test( 71; CHECK-BF16: { 72; CHECK-BF16-NEXT: .reg .f32 %f<3>; 73; CHECK-BF16-EMPTY: 74; CHECK-BF16-NEXT: // %bb.0: // %entry 75; CHECK-BF16-NEXT: ld.param.f32 %f1, [exp2_ftz_test_param_0]; 76; CHECK-BF16-NEXT: ex2.approx.ftz.f32 %f2, %f1; 77; CHECK-BF16-NEXT: st.param.f32 [func_retval0], %f2; 78; CHECK-BF16-NEXT: ret; 79entry: 80 %exp2 = call float @llvm.exp2.f32(float %in) 81 ret float %exp2 82} 83 84; CHECK-LABEL: exp2_test_v 85define <2 x float> @exp2_test_v(<2 x float> %in) { 86; CHECK-LABEL: exp2_test_v( 87; CHECK: { 88; CHECK-NEXT: .reg .f32 %f<5>; 89; CHECK-EMPTY: 90; CHECK-NEXT: // %bb.0: // %entry 91; CHECK-NEXT: ld.param.v2.f32 {%f1, %f2}, [exp2_test_v_param_0]; 92; CHECK-NEXT: ex2.approx.f32 %f3, %f2; 93; CHECK-NEXT: ex2.approx.f32 %f4, %f1; 94; CHECK-NEXT: st.param.v2.f32 [func_retval0], {%f4, %f3}; 95; CHECK-NEXT: ret; 96; 97; CHECK-FP16-LABEL: exp2_test_v( 98; CHECK-FP16: { 99; CHECK-FP16-NEXT: .reg .f32 %f<5>; 100; CHECK-FP16-EMPTY: 101; CHECK-FP16-NEXT: // %bb.0: // %entry 102; CHECK-FP16-NEXT: ld.param.v2.f32 {%f1, %f2}, [exp2_test_v_param_0]; 103; CHECK-FP16-NEXT: ex2.approx.f32 %f3, %f2; 104; CHECK-FP16-NEXT: ex2.approx.f32 %f4, %f1; 105; CHECK-FP16-NEXT: st.param.v2.f32 [func_retval0], {%f4, %f3}; 106; CHECK-FP16-NEXT: ret; 107; 108; CHECK-BF16-LABEL: exp2_test_v( 109; CHECK-BF16: { 110; CHECK-BF16-NEXT: .reg .f32 %f<5>; 111; CHECK-BF16-EMPTY: 112; CHECK-BF16-NEXT: // %bb.0: // %entry 113; CHECK-BF16-NEXT: ld.param.v2.f32 {%f1, %f2}, [exp2_test_v_param_0]; 114; CHECK-BF16-NEXT: ex2.approx.f32 %f3, %f2; 115; CHECK-BF16-NEXT: ex2.approx.f32 %f4, %f1; 116; CHECK-BF16-NEXT: st.param.v2.f32 [func_retval0], {%f4, %f3}; 117; CHECK-BF16-NEXT: ret; 118entry: 119 %exp2 = call <2 x float> @llvm.exp2.v2f32(<2 x float> %in) 120 ret <2 x float> %exp2 121} 122 123; --- f16 --- 124 125; CHECK-LABEL: exp2_f16_test 126define half @exp2_f16_test(half %in) { 127; CHECK-LABEL: exp2_f16_test( 128; CHECK: { 129; CHECK-NEXT: .reg .b16 %rs<3>; 130; CHECK-NEXT: .reg .f32 %f<3>; 131; CHECK-EMPTY: 132; CHECK-NEXT: // %bb.0: // %entry 133; CHECK-NEXT: ld.param.b16 %rs1, [exp2_f16_test_param_0]; 134; CHECK-NEXT: cvt.f32.f16 %f1, %rs1; 135; CHECK-NEXT: ex2.approx.f32 %f2, %f1; 136; CHECK-NEXT: cvt.rn.f16.f32 %rs2, %f2; 137; CHECK-NEXT: st.param.b16 [func_retval0], %rs2; 138; CHECK-NEXT: ret; 139; 140; CHECK-FP16-LABEL: exp2_f16_test( 141; CHECK-FP16: { 142; CHECK-FP16-NEXT: .reg .b16 %rs<3>; 143; CHECK-FP16-EMPTY: 144; CHECK-FP16-NEXT: // %bb.0: // %entry 145; CHECK-FP16-NEXT: ld.param.b16 %rs1, [exp2_f16_test_param_0]; 146; CHECK-FP16-NEXT: ex2.approx.f16 %rs2, %rs1; 147; CHECK-FP16-NEXT: st.param.b16 [func_retval0], %rs2; 148; CHECK-FP16-NEXT: ret; 149; 150; CHECK-BF16-LABEL: exp2_f16_test( 151; CHECK-BF16: { 152; CHECK-BF16-NEXT: .reg .b16 %rs<3>; 153; CHECK-BF16-EMPTY: 154; CHECK-BF16-NEXT: // %bb.0: // %entry 155; CHECK-BF16-NEXT: ld.param.b16 %rs1, [exp2_f16_test_param_0]; 156; CHECK-BF16-NEXT: ex2.approx.f16 %rs2, %rs1; 157; CHECK-BF16-NEXT: st.param.b16 [func_retval0], %rs2; 158; CHECK-BF16-NEXT: ret; 159entry: 160 %exp2 = call half @llvm.exp2.f16(half %in) 161 ret half %exp2 162} 163 164; COM: we should never have .ftz for f16 165; CHECK-LABEL: exp2_f16_ftz_test 166define half @exp2_f16_ftz_test(half %in) #0 { 167; CHECK-LABEL: exp2_f16_ftz_test( 168; CHECK: { 169; CHECK-NEXT: .reg .b16 %rs<3>; 170; CHECK-NEXT: .reg .f32 %f<3>; 171; CHECK-EMPTY: 172; CHECK-NEXT: // %bb.0: // %entry 173; CHECK-NEXT: ld.param.b16 %rs1, [exp2_f16_ftz_test_param_0]; 174; CHECK-NEXT: cvt.ftz.f32.f16 %f1, %rs1; 175; CHECK-NEXT: ex2.approx.ftz.f32 %f2, %f1; 176; CHECK-NEXT: cvt.rn.f16.f32 %rs2, %f2; 177; CHECK-NEXT: st.param.b16 [func_retval0], %rs2; 178; CHECK-NEXT: ret; 179; 180; CHECK-FP16-LABEL: exp2_f16_ftz_test( 181; CHECK-FP16: { 182; CHECK-FP16-NEXT: .reg .b16 %rs<3>; 183; CHECK-FP16-EMPTY: 184; CHECK-FP16-NEXT: // %bb.0: // %entry 185; CHECK-FP16-NEXT: ld.param.b16 %rs1, [exp2_f16_ftz_test_param_0]; 186; CHECK-FP16-NEXT: ex2.approx.f16 %rs2, %rs1; 187; CHECK-FP16-NEXT: st.param.b16 [func_retval0], %rs2; 188; CHECK-FP16-NEXT: ret; 189; 190; CHECK-BF16-LABEL: exp2_f16_ftz_test( 191; CHECK-BF16: { 192; CHECK-BF16-NEXT: .reg .b16 %rs<3>; 193; CHECK-BF16-EMPTY: 194; CHECK-BF16-NEXT: // %bb.0: // %entry 195; CHECK-BF16-NEXT: ld.param.b16 %rs1, [exp2_f16_ftz_test_param_0]; 196; CHECK-BF16-NEXT: ex2.approx.f16 %rs2, %rs1; 197; CHECK-BF16-NEXT: st.param.b16 [func_retval0], %rs2; 198; CHECK-BF16-NEXT: ret; 199entry: 200 %exp2 = call half @llvm.exp2.f16(half %in) 201 ret half %exp2 202} 203 204; CHECK-LABEL: exp2_f16_test_v 205define <2 x half> @exp2_f16_test_v(<2 x half> %in) { 206; CHECK-LABEL: exp2_f16_test_v( 207; CHECK: { 208; CHECK-NEXT: .reg .b16 %rs<5>; 209; CHECK-NEXT: .reg .b32 %r<3>; 210; CHECK-NEXT: .reg .f32 %f<5>; 211; CHECK-EMPTY: 212; CHECK-NEXT: // %bb.0: // %entry 213; CHECK-NEXT: ld.param.b32 %r1, [exp2_f16_test_v_param_0]; 214; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; 215; CHECK-NEXT: cvt.f32.f16 %f1, %rs2; 216; CHECK-NEXT: ex2.approx.f32 %f2, %f1; 217; CHECK-NEXT: cvt.rn.f16.f32 %rs3, %f2; 218; CHECK-NEXT: cvt.f32.f16 %f3, %rs1; 219; CHECK-NEXT: ex2.approx.f32 %f4, %f3; 220; CHECK-NEXT: cvt.rn.f16.f32 %rs4, %f4; 221; CHECK-NEXT: mov.b32 %r2, {%rs4, %rs3}; 222; CHECK-NEXT: st.param.b32 [func_retval0], %r2; 223; CHECK-NEXT: ret; 224; 225; CHECK-FP16-LABEL: exp2_f16_test_v( 226; CHECK-FP16: { 227; CHECK-FP16-NEXT: .reg .b32 %r<3>; 228; CHECK-FP16-EMPTY: 229; CHECK-FP16-NEXT: // %bb.0: // %entry 230; CHECK-FP16-NEXT: ld.param.b32 %r1, [exp2_f16_test_v_param_0]; 231; CHECK-FP16-NEXT: ex2.approx.f16x2 %r2, %r1; 232; CHECK-FP16-NEXT: st.param.b32 [func_retval0], %r2; 233; CHECK-FP16-NEXT: ret; 234; 235; CHECK-BF16-LABEL: exp2_f16_test_v( 236; CHECK-BF16: { 237; CHECK-BF16-NEXT: .reg .b32 %r<3>; 238; CHECK-BF16-EMPTY: 239; CHECK-BF16-NEXT: // %bb.0: // %entry 240; CHECK-BF16-NEXT: ld.param.b32 %r1, [exp2_f16_test_v_param_0]; 241; CHECK-BF16-NEXT: ex2.approx.f16x2 %r2, %r1; 242; CHECK-BF16-NEXT: st.param.b32 [func_retval0], %r2; 243; CHECK-BF16-NEXT: ret; 244entry: 245 %exp2 = call <2 x half> @llvm.exp2.v2f16(<2 x half> %in) 246 ret <2 x half> %exp2 247} 248 249; --- bf16 --- 250 251; COM: we should always have .ftz for bf16 252; CHECK-LABEL: exp2_bf16_test 253define bfloat @exp2_bf16_test(bfloat %in) { 254; CHECK-LABEL: exp2_bf16_test( 255; CHECK: { 256; CHECK-NEXT: .reg .pred %p<2>; 257; CHECK-NEXT: .reg .b16 %rs<2>; 258; CHECK-NEXT: .reg .b32 %r<9>; 259; CHECK-NEXT: .reg .f32 %f<3>; 260; CHECK-EMPTY: 261; CHECK-NEXT: // %bb.0: // %entry 262; CHECK-NEXT: ld.param.u16 %r1, [exp2_bf16_test_param_0]; 263; CHECK-NEXT: shl.b32 %r2, %r1, 16; 264; CHECK-NEXT: mov.b32 %f1, %r2; 265; CHECK-NEXT: ex2.approx.f32 %f2, %f1; 266; CHECK-NEXT: mov.b32 %r3, %f2; 267; CHECK-NEXT: bfe.u32 %r4, %r3, 16, 1; 268; CHECK-NEXT: add.s32 %r5, %r4, %r3; 269; CHECK-NEXT: add.s32 %r6, %r5, 32767; 270; CHECK-NEXT: setp.nan.f32 %p1, %f2, %f2; 271; CHECK-NEXT: or.b32 %r7, %r3, 4194304; 272; CHECK-NEXT: selp.b32 %r8, %r7, %r6, %p1; 273; CHECK-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs1}, %r8; } 274; CHECK-NEXT: st.param.b16 [func_retval0], %rs1; 275; CHECK-NEXT: ret; 276; 277; CHECK-FP16-LABEL: exp2_bf16_test( 278; CHECK-FP16: { 279; CHECK-FP16-NEXT: .reg .pred %p<2>; 280; CHECK-FP16-NEXT: .reg .b16 %rs<2>; 281; CHECK-FP16-NEXT: .reg .b32 %r<9>; 282; CHECK-FP16-NEXT: .reg .f32 %f<3>; 283; CHECK-FP16-EMPTY: 284; CHECK-FP16-NEXT: // %bb.0: // %entry 285; CHECK-FP16-NEXT: ld.param.u16 %r1, [exp2_bf16_test_param_0]; 286; CHECK-FP16-NEXT: shl.b32 %r2, %r1, 16; 287; CHECK-FP16-NEXT: mov.b32 %f1, %r2; 288; CHECK-FP16-NEXT: ex2.approx.f32 %f2, %f1; 289; CHECK-FP16-NEXT: mov.b32 %r3, %f2; 290; CHECK-FP16-NEXT: bfe.u32 %r4, %r3, 16, 1; 291; CHECK-FP16-NEXT: add.s32 %r5, %r4, %r3; 292; CHECK-FP16-NEXT: add.s32 %r6, %r5, 32767; 293; CHECK-FP16-NEXT: setp.nan.f32 %p1, %f2, %f2; 294; CHECK-FP16-NEXT: or.b32 %r7, %r3, 4194304; 295; CHECK-FP16-NEXT: selp.b32 %r8, %r7, %r6, %p1; 296; CHECK-FP16-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs1}, %r8; } 297; CHECK-FP16-NEXT: st.param.b16 [func_retval0], %rs1; 298; CHECK-FP16-NEXT: ret; 299; 300; CHECK-BF16-LABEL: exp2_bf16_test( 301; CHECK-BF16: { 302; CHECK-BF16-NEXT: .reg .b16 %rs<3>; 303; CHECK-BF16-EMPTY: 304; CHECK-BF16-NEXT: // %bb.0: // %entry 305; CHECK-BF16-NEXT: ld.param.b16 %rs1, [exp2_bf16_test_param_0]; 306; CHECK-BF16-NEXT: ex2.approx.ftz.bf16 %rs2, %rs1; 307; CHECK-BF16-NEXT: st.param.b16 [func_retval0], %rs2; 308; CHECK-BF16-NEXT: ret; 309entry: 310 %exp2 = call bfloat @llvm.exp2.bf16(bfloat %in) 311 ret bfloat %exp2 312} 313 314; CHECK-LABEL: exp2_bf16_test_v 315define <2 x bfloat> @exp2_bf16_test_v(<2 x bfloat> %in) { 316; CHECK-LABEL: exp2_bf16_test_v( 317; CHECK: { 318; CHECK-NEXT: .reg .pred %p<3>; 319; CHECK-NEXT: .reg .b16 %rs<3>; 320; CHECK-NEXT: .reg .b32 %r<19>; 321; CHECK-NEXT: .reg .f32 %f<5>; 322; CHECK-EMPTY: 323; CHECK-NEXT: // %bb.0: // %entry 324; CHECK-NEXT: ld.param.b32 %r1, [exp2_bf16_test_v_param_0]; 325; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; 326; CHECK-NEXT: cvt.u32.u16 %r2, %rs2; 327; CHECK-NEXT: shl.b32 %r3, %r2, 16; 328; CHECK-NEXT: mov.b32 %f1, %r3; 329; CHECK-NEXT: ex2.approx.f32 %f2, %f1; 330; CHECK-NEXT: mov.b32 %r4, %f2; 331; CHECK-NEXT: bfe.u32 %r5, %r4, 16, 1; 332; CHECK-NEXT: add.s32 %r6, %r5, %r4; 333; CHECK-NEXT: add.s32 %r7, %r6, 32767; 334; CHECK-NEXT: setp.nan.f32 %p1, %f2, %f2; 335; CHECK-NEXT: or.b32 %r8, %r4, 4194304; 336; CHECK-NEXT: selp.b32 %r9, %r8, %r7, %p1; 337; CHECK-NEXT: cvt.u32.u16 %r10, %rs1; 338; CHECK-NEXT: shl.b32 %r11, %r10, 16; 339; CHECK-NEXT: mov.b32 %f3, %r11; 340; CHECK-NEXT: ex2.approx.f32 %f4, %f3; 341; CHECK-NEXT: mov.b32 %r12, %f4; 342; CHECK-NEXT: bfe.u32 %r13, %r12, 16, 1; 343; CHECK-NEXT: add.s32 %r14, %r13, %r12; 344; CHECK-NEXT: add.s32 %r15, %r14, 32767; 345; CHECK-NEXT: setp.nan.f32 %p2, %f4, %f4; 346; CHECK-NEXT: or.b32 %r16, %r12, 4194304; 347; CHECK-NEXT: selp.b32 %r17, %r16, %r15, %p2; 348; CHECK-NEXT: prmt.b32 %r18, %r17, %r9, 0x7632U; 349; CHECK-NEXT: st.param.b32 [func_retval0], %r18; 350; CHECK-NEXT: ret; 351; 352; CHECK-FP16-LABEL: exp2_bf16_test_v( 353; CHECK-FP16: { 354; CHECK-FP16-NEXT: .reg .pred %p<3>; 355; CHECK-FP16-NEXT: .reg .b16 %rs<3>; 356; CHECK-FP16-NEXT: .reg .b32 %r<19>; 357; CHECK-FP16-NEXT: .reg .f32 %f<5>; 358; CHECK-FP16-EMPTY: 359; CHECK-FP16-NEXT: // %bb.0: // %entry 360; CHECK-FP16-NEXT: ld.param.b32 %r1, [exp2_bf16_test_v_param_0]; 361; CHECK-FP16-NEXT: mov.b32 {%rs1, %rs2}, %r1; 362; CHECK-FP16-NEXT: cvt.u32.u16 %r2, %rs2; 363; CHECK-FP16-NEXT: shl.b32 %r3, %r2, 16; 364; CHECK-FP16-NEXT: mov.b32 %f1, %r3; 365; CHECK-FP16-NEXT: ex2.approx.f32 %f2, %f1; 366; CHECK-FP16-NEXT: mov.b32 %r4, %f2; 367; CHECK-FP16-NEXT: bfe.u32 %r5, %r4, 16, 1; 368; CHECK-FP16-NEXT: add.s32 %r6, %r5, %r4; 369; CHECK-FP16-NEXT: add.s32 %r7, %r6, 32767; 370; CHECK-FP16-NEXT: setp.nan.f32 %p1, %f2, %f2; 371; CHECK-FP16-NEXT: or.b32 %r8, %r4, 4194304; 372; CHECK-FP16-NEXT: selp.b32 %r9, %r8, %r7, %p1; 373; CHECK-FP16-NEXT: cvt.u32.u16 %r10, %rs1; 374; CHECK-FP16-NEXT: shl.b32 %r11, %r10, 16; 375; CHECK-FP16-NEXT: mov.b32 %f3, %r11; 376; CHECK-FP16-NEXT: ex2.approx.f32 %f4, %f3; 377; CHECK-FP16-NEXT: mov.b32 %r12, %f4; 378; CHECK-FP16-NEXT: bfe.u32 %r13, %r12, 16, 1; 379; CHECK-FP16-NEXT: add.s32 %r14, %r13, %r12; 380; CHECK-FP16-NEXT: add.s32 %r15, %r14, 32767; 381; CHECK-FP16-NEXT: setp.nan.f32 %p2, %f4, %f4; 382; CHECK-FP16-NEXT: or.b32 %r16, %r12, 4194304; 383; CHECK-FP16-NEXT: selp.b32 %r17, %r16, %r15, %p2; 384; CHECK-FP16-NEXT: prmt.b32 %r18, %r17, %r9, 0x7632U; 385; CHECK-FP16-NEXT: st.param.b32 [func_retval0], %r18; 386; CHECK-FP16-NEXT: ret; 387; 388; CHECK-BF16-LABEL: exp2_bf16_test_v( 389; CHECK-BF16: { 390; CHECK-BF16-NEXT: .reg .b32 %r<3>; 391; CHECK-BF16-EMPTY: 392; CHECK-BF16-NEXT: // %bb.0: // %entry 393; CHECK-BF16-NEXT: ld.param.b32 %r1, [exp2_bf16_test_v_param_0]; 394; CHECK-BF16-NEXT: ex2.approx.ftz.bf16x2 %r2, %r1; 395; CHECK-BF16-NEXT: st.param.b32 [func_retval0], %r2; 396; CHECK-BF16-NEXT: ret; 397entry: 398 %exp2 = call <2 x bfloat> @llvm.exp2.v2bf16(<2 x bfloat> %in) 399 ret <2 x bfloat> %exp2 400} 401 402declare float @llvm.exp2.f32(float %val) 403 404declare <2 x float> @llvm.exp2.v2f32(<2 x float> %val) 405 406declare half @llvm.exp2.f16(half %val) 407 408declare <2 x half> @llvm.exp2.v2f16(<2 x half> %val) 409 410declare bfloat @llvm.exp2.bf16(bfloat %val) 411 412declare <2 x bfloat> @llvm.exp2.v2bf16(<2 x bfloat> %val) 413 414attributes #0 = {"denormal-fp-math"="preserve-sign"} 415