1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx70 | FileCheck %s 3; RUN: %if ptxas-11.0 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx70 | %ptxas-verify -arch=sm_80 %} 4 5 6define <2 x bfloat> @cvt_rn_bf16x2_f32(float %f1, float %f2) { 7; CHECK-LABEL: cvt_rn_bf16x2_f32( 8; CHECK: { 9; CHECK-NEXT: .reg .b32 %r<2>; 10; CHECK-NEXT: .reg .f32 %f<3>; 11; CHECK-EMPTY: 12; CHECK-NEXT: // %bb.0: 13; CHECK-NEXT: ld.param.f32 %f1, [cvt_rn_bf16x2_f32_param_0]; 14; CHECK-NEXT: ld.param.f32 %f2, [cvt_rn_bf16x2_f32_param_1]; 15; CHECK-NEXT: cvt.rn.bf16x2.f32 %r1, %f1, %f2; 16; CHECK-NEXT: st.param.b32 [func_retval0], %r1; 17; CHECK-NEXT: ret; 18 %val = call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rn(float %f1, float %f2) 19 ret <2 x bfloat> %val 20} 21 22define <2 x bfloat> @cvt_rn_relu_bf16x2_f32(float %f1, float %f2) { 23; CHECK-LABEL: cvt_rn_relu_bf16x2_f32( 24; CHECK: { 25; CHECK-NEXT: .reg .b32 %r<2>; 26; CHECK-NEXT: .reg .f32 %f<3>; 27; CHECK-EMPTY: 28; CHECK-NEXT: // %bb.0: 29; CHECK-NEXT: ld.param.f32 %f1, [cvt_rn_relu_bf16x2_f32_param_0]; 30; CHECK-NEXT: ld.param.f32 %f2, [cvt_rn_relu_bf16x2_f32_param_1]; 31; CHECK-NEXT: cvt.rn.relu.bf16x2.f32 %r1, %f1, %f2; 32; CHECK-NEXT: st.param.b32 [func_retval0], %r1; 33; CHECK-NEXT: ret; 34 %val = call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rn.relu(float %f1, float %f2) 35 ret <2 x bfloat> %val 36} 37 38define <2 x bfloat> @cvt_rz_bf16x2_f32(float %f1, float %f2) { 39; CHECK-LABEL: cvt_rz_bf16x2_f32( 40; CHECK: { 41; CHECK-NEXT: .reg .b32 %r<2>; 42; CHECK-NEXT: .reg .f32 %f<3>; 43; CHECK-EMPTY: 44; CHECK-NEXT: // %bb.0: 45; CHECK-NEXT: ld.param.f32 %f1, [cvt_rz_bf16x2_f32_param_0]; 46; CHECK-NEXT: ld.param.f32 %f2, [cvt_rz_bf16x2_f32_param_1]; 47; CHECK-NEXT: cvt.rz.bf16x2.f32 %r1, %f1, %f2; 48; CHECK-NEXT: st.param.b32 [func_retval0], %r1; 49; CHECK-NEXT: ret; 50 %val = call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rz(float %f1, float %f2) 51 ret <2 x bfloat> %val 52} 53 54define <2 x bfloat> @cvt_rz_relu_bf16x2_f32(float %f1, float %f2) { 55; CHECK-LABEL: cvt_rz_relu_bf16x2_f32( 56; CHECK: { 57; CHECK-NEXT: .reg .b32 %r<2>; 58; CHECK-NEXT: .reg .f32 %f<3>; 59; CHECK-EMPTY: 60; CHECK-NEXT: // %bb.0: 61; CHECK-NEXT: ld.param.f32 %f1, [cvt_rz_relu_bf16x2_f32_param_0]; 62; CHECK-NEXT: ld.param.f32 %f2, [cvt_rz_relu_bf16x2_f32_param_1]; 63; CHECK-NEXT: cvt.rz.relu.bf16x2.f32 %r1, %f1, %f2; 64; CHECK-NEXT: st.param.b32 [func_retval0], %r1; 65; CHECK-NEXT: ret; 66 %val = call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rz.relu(float %f1, float %f2) 67 ret <2 x bfloat> %val 68} 69 70declare <2 x bfloat> @llvm.nvvm.ff2bf16x2.rn(float, float) 71declare <2 x bfloat> @llvm.nvvm.ff2bf16x2.rn.relu(float, float) 72declare <2 x bfloat> @llvm.nvvm.ff2bf16x2.rz(float, float) 73declare <2 x bfloat> @llvm.nvvm.ff2bf16x2.rz.relu(float, float) 74 75define <2 x half> @cvt_rn_f16x2_f32(float %f1, float %f2) { 76; CHECK-LABEL: cvt_rn_f16x2_f32( 77; CHECK: { 78; CHECK-NEXT: .reg .b32 %r<2>; 79; CHECK-NEXT: .reg .f32 %f<3>; 80; CHECK-EMPTY: 81; CHECK-NEXT: // %bb.0: 82; CHECK-NEXT: ld.param.f32 %f1, [cvt_rn_f16x2_f32_param_0]; 83; CHECK-NEXT: ld.param.f32 %f2, [cvt_rn_f16x2_f32_param_1]; 84; CHECK-NEXT: cvt.rn.f16x2.f32 %r1, %f1, %f2; 85; CHECK-NEXT: st.param.b32 [func_retval0], %r1; 86; CHECK-NEXT: ret; 87 %val = call <2 x half> @llvm.nvvm.ff2f16x2.rn(float %f1, float %f2) 88 ret <2 x half> %val 89} 90 91define <2 x half> @cvt_rn_relu_f16x2_f32(float %f1, float %f2) { 92; CHECK-LABEL: cvt_rn_relu_f16x2_f32( 93; CHECK: { 94; CHECK-NEXT: .reg .b32 %r<2>; 95; CHECK-NEXT: .reg .f32 %f<3>; 96; CHECK-EMPTY: 97; CHECK-NEXT: // %bb.0: 98; CHECK-NEXT: ld.param.f32 %f1, [cvt_rn_relu_f16x2_f32_param_0]; 99; CHECK-NEXT: ld.param.f32 %f2, [cvt_rn_relu_f16x2_f32_param_1]; 100; CHECK-NEXT: cvt.rn.relu.f16x2.f32 %r1, %f1, %f2; 101; CHECK-NEXT: st.param.b32 [func_retval0], %r1; 102; CHECK-NEXT: ret; 103 %val = call <2 x half> @llvm.nvvm.ff2f16x2.rn.relu(float %f1, float %f2) 104 ret <2 x half> %val 105} 106 107define <2 x half> @cvt_rz_f16x2_f32(float %f1, float %f2) { 108; CHECK-LABEL: cvt_rz_f16x2_f32( 109; CHECK: { 110; CHECK-NEXT: .reg .b32 %r<2>; 111; CHECK-NEXT: .reg .f32 %f<3>; 112; CHECK-EMPTY: 113; CHECK-NEXT: // %bb.0: 114; CHECK-NEXT: ld.param.f32 %f1, [cvt_rz_f16x2_f32_param_0]; 115; CHECK-NEXT: ld.param.f32 %f2, [cvt_rz_f16x2_f32_param_1]; 116; CHECK-NEXT: cvt.rz.f16x2.f32 %r1, %f1, %f2; 117; CHECK-NEXT: st.param.b32 [func_retval0], %r1; 118; CHECK-NEXT: ret; 119 %val = call <2 x half> @llvm.nvvm.ff2f16x2.rz(float %f1, float %f2) 120 ret <2 x half> %val 121} 122 123define <2 x half> @cvt_rz_relu_f16x2_f32(float %f1, float %f2) { 124; CHECK-LABEL: cvt_rz_relu_f16x2_f32( 125; CHECK: { 126; CHECK-NEXT: .reg .b32 %r<2>; 127; CHECK-NEXT: .reg .f32 %f<3>; 128; CHECK-EMPTY: 129; CHECK-NEXT: // %bb.0: 130; CHECK-NEXT: ld.param.f32 %f1, [cvt_rz_relu_f16x2_f32_param_0]; 131; CHECK-NEXT: ld.param.f32 %f2, [cvt_rz_relu_f16x2_f32_param_1]; 132; CHECK-NEXT: cvt.rz.relu.f16x2.f32 %r1, %f1, %f2; 133; CHECK-NEXT: st.param.b32 [func_retval0], %r1; 134; CHECK-NEXT: ret; 135 %val = call <2 x half> @llvm.nvvm.ff2f16x2.rz.relu(float %f1, float %f2) 136 ret <2 x half> %val 137} 138 139declare <2 x half> @llvm.nvvm.ff2f16x2.rn(float, float) 140declare <2 x half> @llvm.nvvm.ff2f16x2.rn.relu(float, float) 141declare <2 x half> @llvm.nvvm.ff2f16x2.rz(float, float) 142declare <2 x half> @llvm.nvvm.ff2f16x2.rz.relu(float, float) 143 144define bfloat @cvt_rn_bf16_f32(float %f1) { 145; CHECK-LABEL: cvt_rn_bf16_f32( 146; CHECK: { 147; CHECK-NEXT: .reg .b16 %rs<2>; 148; CHECK-NEXT: .reg .f32 %f<2>; 149; CHECK-EMPTY: 150; CHECK-NEXT: // %bb.0: 151; CHECK-NEXT: ld.param.f32 %f1, [cvt_rn_bf16_f32_param_0]; 152; CHECK-NEXT: cvt.rn.bf16.f32 %rs1, %f1; 153; CHECK-NEXT: st.param.b16 [func_retval0], %rs1; 154; CHECK-NEXT: ret; 155 %val = call bfloat @llvm.nvvm.f2bf16.rn(float %f1) 156 ret bfloat %val 157} 158 159define bfloat @cvt_rn_relu_bf16_f32(float %f1) { 160; CHECK-LABEL: cvt_rn_relu_bf16_f32( 161; CHECK: { 162; CHECK-NEXT: .reg .b16 %rs<2>; 163; CHECK-NEXT: .reg .f32 %f<2>; 164; CHECK-EMPTY: 165; CHECK-NEXT: // %bb.0: 166; CHECK-NEXT: ld.param.f32 %f1, [cvt_rn_relu_bf16_f32_param_0]; 167; CHECK-NEXT: cvt.rn.relu.bf16.f32 %rs1, %f1; 168; CHECK-NEXT: st.param.b16 [func_retval0], %rs1; 169; CHECK-NEXT: ret; 170 %val = call bfloat @llvm.nvvm.f2bf16.rn.relu(float %f1) 171 ret bfloat %val 172} 173 174define bfloat @cvt_rz_bf16_f32(float %f1) { 175; CHECK-LABEL: cvt_rz_bf16_f32( 176; CHECK: { 177; CHECK-NEXT: .reg .b16 %rs<2>; 178; CHECK-NEXT: .reg .f32 %f<2>; 179; CHECK-EMPTY: 180; CHECK-NEXT: // %bb.0: 181; CHECK-NEXT: ld.param.f32 %f1, [cvt_rz_bf16_f32_param_0]; 182; CHECK-NEXT: cvt.rz.bf16.f32 %rs1, %f1; 183; CHECK-NEXT: st.param.b16 [func_retval0], %rs1; 184; CHECK-NEXT: ret; 185 %val = call bfloat @llvm.nvvm.f2bf16.rz(float %f1) 186 ret bfloat %val 187} 188 189define bfloat @cvt_rz_relu_bf16_f32(float %f1) { 190; CHECK-LABEL: cvt_rz_relu_bf16_f32( 191; CHECK: { 192; CHECK-NEXT: .reg .b16 %rs<2>; 193; CHECK-NEXT: .reg .f32 %f<2>; 194; CHECK-EMPTY: 195; CHECK-NEXT: // %bb.0: 196; CHECK-NEXT: ld.param.f32 %f1, [cvt_rz_relu_bf16_f32_param_0]; 197; CHECK-NEXT: cvt.rz.relu.bf16.f32 %rs1, %f1; 198; CHECK-NEXT: st.param.b16 [func_retval0], %rs1; 199; CHECK-NEXT: ret; 200 %val = call bfloat @llvm.nvvm.f2bf16.rz.relu(float %f1) 201 ret bfloat %val 202} 203 204declare bfloat @llvm.nvvm.f2bf16.rn(float) 205declare bfloat @llvm.nvvm.f2bf16.rn.relu(float) 206declare bfloat @llvm.nvvm.f2bf16.rz(float) 207declare bfloat @llvm.nvvm.f2bf16.rz.relu(float) 208 209define i32 @cvt_rna_tf32_f32(float %f1) { 210; CHECK-LABEL: cvt_rna_tf32_f32( 211; CHECK: { 212; CHECK-NEXT: .reg .b32 %r<2>; 213; CHECK-NEXT: .reg .f32 %f<2>; 214; CHECK-EMPTY: 215; CHECK-NEXT: // %bb.0: 216; CHECK-NEXT: ld.param.f32 %f1, [cvt_rna_tf32_f32_param_0]; 217; CHECK-NEXT: cvt.rna.tf32.f32 %r1, %f1; 218; CHECK-NEXT: st.param.b32 [func_retval0], %r1; 219; CHECK-NEXT: ret; 220 %val = call i32 @llvm.nvvm.f2tf32.rna(float %f1) 221 ret i32 %val 222} 223 224declare i32 @llvm.nvvm.f2tf32.rna(float) 225 226 227define <2 x bfloat> @fold_ff2bf16x2(float %lo, float %hi) { 228; CHECK-LABEL: fold_ff2bf16x2( 229; CHECK: { 230; CHECK-NEXT: .reg .b32 %r<2>; 231; CHECK-NEXT: .reg .f32 %f<3>; 232; CHECK-EMPTY: 233; CHECK-NEXT: // %bb.0: 234; CHECK-NEXT: ld.param.f32 %f1, [fold_ff2bf16x2_param_0]; 235; CHECK-NEXT: ld.param.f32 %f2, [fold_ff2bf16x2_param_1]; 236; CHECK-NEXT: cvt.rn.bf16x2.f32 %r1, %f2, %f1; 237; CHECK-NEXT: st.param.b32 [func_retval0], %r1; 238; CHECK-NEXT: ret; 239 %loh = fptrunc float %lo to bfloat 240 %hih = fptrunc float %hi to bfloat 241 %v0 = insertelement <2 x bfloat> poison, bfloat %loh, i64 0 242 %v1 = insertelement <2 x bfloat> %v0, bfloat %hih, i64 1 243 ret <2 x bfloat> %v1 244} 245 246define <2 x half> @fold_ff2f16x2(float %lo, float %hi) { 247; CHECK-LABEL: fold_ff2f16x2( 248; CHECK: { 249; CHECK-NEXT: .reg .b32 %r<2>; 250; CHECK-NEXT: .reg .f32 %f<3>; 251; CHECK-EMPTY: 252; CHECK-NEXT: // %bb.0: 253; CHECK-NEXT: ld.param.f32 %f1, [fold_ff2f16x2_param_0]; 254; CHECK-NEXT: ld.param.f32 %f2, [fold_ff2f16x2_param_1]; 255; CHECK-NEXT: cvt.rn.f16x2.f32 %r1, %f2, %f1; 256; CHECK-NEXT: st.param.b32 [func_retval0], %r1; 257; CHECK-NEXT: ret; 258 %loh = fptrunc float %lo to half 259 %hih = fptrunc float %hi to half 260 %v0 = insertelement <2 x half> poison, half %loh, i64 0 261 %v1 = insertelement <2 x half> %v0, half %hih, i64 1 262 ret <2 x half> %v1 263} 264