1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 2; RUN: llc -o - %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK-NO_FP16 3; RUN: llc -o - %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 -mattr=+avx512fp16 | FileCheck %s --check-prefixes=CHECK-WITH_FP16 4 5; Note: We could check more configurations, but anything with software 6; emulation of fp16 generates a ton of assembly code and is not particularly 7; interesting. 8 9;---------------------------------------- 10; i8 input 11;---------------------------------------- 12 13; uint8_t to float. 14; - Go from i8 to i32: zext 15; - Convert i32 to float 16define float @uint8ToFloat(i8 %int8) { 17; CHECK-NO_FP16-LABEL: uint8ToFloat: 18; CHECK-NO_FP16: # %bb.0: 19; CHECK-NO_FP16-NEXT: movzbl %dil, %eax 20; CHECK-NO_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 21; CHECK-NO_FP16-NEXT: retq 22; 23; CHECK-WITH_FP16-LABEL: uint8ToFloat: 24; CHECK-WITH_FP16: # %bb.0: 25; CHECK-WITH_FP16-NEXT: movzbl %dil, %eax 26; CHECK-WITH_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 27; CHECK-WITH_FP16-NEXT: retq 28 %fp32 = uitofp i8 %int8 to float 29 ret float %fp32 30} 31 32; vector uint8_t to float. 33; Same as @uint8ToFloat but with vector types. 34define <16 x float> @vector_uint8ToFloat(<16 x i8> %int8) { 35; CHECK-NO_FP16-LABEL: vector_uint8ToFloat: 36; CHECK-NO_FP16: # %bb.0: 37; CHECK-NO_FP16-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 38; CHECK-NO_FP16-NEXT: vcvtdq2ps %zmm0, %zmm0 39; CHECK-NO_FP16-NEXT: retq 40; 41; CHECK-WITH_FP16-LABEL: vector_uint8ToFloat: 42; CHECK-WITH_FP16: # %bb.0: 43; CHECK-WITH_FP16-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 44; CHECK-WITH_FP16-NEXT: vcvtdq2ps %zmm0, %zmm0 45; CHECK-WITH_FP16-NEXT: retq 46 %fp32 = uitofp <16 x i8> %int8 to <16 x float> 47 ret <16 x float> %fp32 48} 49 50 51; uint8_t to half. 52; 53; If no half support: 54; - Go from i8 to i32: zext 55; - Convert i32 to float 56; - Trunc from float to half 57; 58; Else if half support: 59; - Go from i8 to i32: zext 60; - Convert i32 to half 61define half @uint8ToHalf(i8 %int8) { 62; CHECK-NO_FP16-LABEL: uint8ToHalf: 63; CHECK-NO_FP16: # %bb.0: 64; CHECK-NO_FP16-NEXT: movzbl %dil, %eax 65; CHECK-NO_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 66; CHECK-NO_FP16-NEXT: vcvtps2ph $4, %xmm0, %xmm0 67; CHECK-NO_FP16-NEXT: vmovd %xmm0, %eax 68; CHECK-NO_FP16-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 69; CHECK-NO_FP16-NEXT: retq 70; 71; CHECK-WITH_FP16-LABEL: uint8ToHalf: 72; CHECK-WITH_FP16: # %bb.0: 73; CHECK-WITH_FP16-NEXT: movzbl %dil, %eax 74; CHECK-WITH_FP16-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0 75; CHECK-WITH_FP16-NEXT: retq 76 %fp32 = uitofp i8 %int8 to half 77 ret half %fp32 78} 79 80; vector uint8_t to half. 81; 82; If no half support: 83; - Go from i8 to i32: zext 84; - Convert i32 to float 85; - Trunc from float to half 86; 87; Else if half support: 88; - Go from i8 to i16: zext 89; - Convert i16 to half 90; 91; The difference with the scalar version (uint8ToHalf) is that we use i16 92; for the intermediate type when we have half support. 93define <16 x half> @vector_uint8ToHalf(<16 x i8> %int8) { 94; CHECK-NO_FP16-LABEL: vector_uint8ToHalf: 95; CHECK-NO_FP16: # %bb.0: 96; CHECK-NO_FP16-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 97; CHECK-NO_FP16-NEXT: vcvtdq2ps %zmm0, %zmm0 98; CHECK-NO_FP16-NEXT: vcvtps2ph $4, %zmm0, %ymm0 99; CHECK-NO_FP16-NEXT: retq 100; 101; CHECK-WITH_FP16-LABEL: vector_uint8ToHalf: 102; CHECK-WITH_FP16: # %bb.0: 103; CHECK-WITH_FP16-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 104; CHECK-WITH_FP16-NEXT: vcvtw2ph %ymm0, %ymm0 105; CHECK-WITH_FP16-NEXT: retq 106 %fp32 = uitofp <16 x i8> %int8 to <16 x half> 107 ret <16 x half> %fp32 108} 109 110; Same as uint8_t but with the signed variant. 111; I.e., use sext instead of zext. 112define float @sint8ToFloat(i8 %int8) { 113; CHECK-NO_FP16-LABEL: sint8ToFloat: 114; CHECK-NO_FP16: # %bb.0: 115; CHECK-NO_FP16-NEXT: movsbl %dil, %eax 116; CHECK-NO_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 117; CHECK-NO_FP16-NEXT: retq 118; 119; CHECK-WITH_FP16-LABEL: sint8ToFloat: 120; CHECK-WITH_FP16: # %bb.0: 121; CHECK-WITH_FP16-NEXT: movsbl %dil, %eax 122; CHECK-WITH_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 123; CHECK-WITH_FP16-NEXT: retq 124 %fp32 = sitofp i8 %int8 to float 125 ret float %fp32 126} 127 128define <16 x float> @vector_sint8ToFloat(<16 x i8> %int8) { 129; CHECK-NO_FP16-LABEL: vector_sint8ToFloat: 130; CHECK-NO_FP16: # %bb.0: 131; CHECK-NO_FP16-NEXT: vpmovsxbd %xmm0, %zmm0 132; CHECK-NO_FP16-NEXT: vcvtdq2ps %zmm0, %zmm0 133; CHECK-NO_FP16-NEXT: retq 134; 135; CHECK-WITH_FP16-LABEL: vector_sint8ToFloat: 136; CHECK-WITH_FP16: # %bb.0: 137; CHECK-WITH_FP16-NEXT: vpmovsxbd %xmm0, %zmm0 138; CHECK-WITH_FP16-NEXT: vcvtdq2ps %zmm0, %zmm0 139; CHECK-WITH_FP16-NEXT: retq 140 %fp32 = sitofp <16 x i8> %int8 to <16 x float> 141 ret <16 x float> %fp32 142} 143 144define half @sint8ToHalf(i8 %int8) { 145; CHECK-NO_FP16-LABEL: sint8ToHalf: 146; CHECK-NO_FP16: # %bb.0: 147; CHECK-NO_FP16-NEXT: movsbl %dil, %eax 148; CHECK-NO_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 149; CHECK-NO_FP16-NEXT: vcvtps2ph $4, %xmm0, %xmm0 150; CHECK-NO_FP16-NEXT: vmovd %xmm0, %eax 151; CHECK-NO_FP16-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 152; CHECK-NO_FP16-NEXT: retq 153; 154; CHECK-WITH_FP16-LABEL: sint8ToHalf: 155; CHECK-WITH_FP16: # %bb.0: 156; CHECK-WITH_FP16-NEXT: movsbl %dil, %eax 157; CHECK-WITH_FP16-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0 158; CHECK-WITH_FP16-NEXT: retq 159 %fp32 = sitofp i8 %int8 to half 160 ret half %fp32 161} 162 163define <16 x half> @vector_sint8ToHalf(<16 x i8> %int8) { 164; CHECK-NO_FP16-LABEL: vector_sint8ToHalf: 165; CHECK-NO_FP16: # %bb.0: 166; CHECK-NO_FP16-NEXT: vpmovsxbd %xmm0, %zmm0 167; CHECK-NO_FP16-NEXT: vcvtdq2ps %zmm0, %zmm0 168; CHECK-NO_FP16-NEXT: vcvtps2ph $4, %zmm0, %ymm0 169; CHECK-NO_FP16-NEXT: retq 170; 171; CHECK-WITH_FP16-LABEL: vector_sint8ToHalf: 172; CHECK-WITH_FP16: # %bb.0: 173; CHECK-WITH_FP16-NEXT: vpmovsxbw %xmm0, %ymm0 174; CHECK-WITH_FP16-NEXT: vcvtw2ph %ymm0, %ymm0 175; CHECK-WITH_FP16-NEXT: retq 176 %fp32 = sitofp <16 x i8> %int8 to <16 x half> 177 ret <16 x half> %fp32 178} 179 180 181;---------------------------------------- 182; i16 input 183;---------------------------------------- 184 185; Similar lowering as i8, but with i16 as the input type. 186 187define float @uint16ToFloat(i16 %int16) { 188; CHECK-NO_FP16-LABEL: uint16ToFloat: 189; CHECK-NO_FP16: # %bb.0: 190; CHECK-NO_FP16-NEXT: movzwl %di, %eax 191; CHECK-NO_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 192; CHECK-NO_FP16-NEXT: retq 193; 194; CHECK-WITH_FP16-LABEL: uint16ToFloat: 195; CHECK-WITH_FP16: # %bb.0: 196; CHECK-WITH_FP16-NEXT: movzwl %di, %eax 197; CHECK-WITH_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 198; CHECK-WITH_FP16-NEXT: retq 199 %fp32 = uitofp i16 %int16 to float 200 ret float %fp32 201} 202 203define <16 x float> @vector_uint16ToFloat(<16 x i16> %int16) { 204; CHECK-NO_FP16-LABEL: vector_uint16ToFloat: 205; CHECK-NO_FP16: # %bb.0: 206; CHECK-NO_FP16-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 207; CHECK-NO_FP16-NEXT: vcvtdq2ps %zmm0, %zmm0 208; CHECK-NO_FP16-NEXT: retq 209; 210; CHECK-WITH_FP16-LABEL: vector_uint16ToFloat: 211; CHECK-WITH_FP16: # %bb.0: 212; CHECK-WITH_FP16-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 213; CHECK-WITH_FP16-NEXT: vcvtdq2ps %zmm0, %zmm0 214; CHECK-WITH_FP16-NEXT: retq 215 %fp32 = uitofp <16 x i16> %int16 to <16 x float> 216 ret <16 x float> %fp32 217} 218 219define half @uint16ToHalf(i16 %int16) { 220; CHECK-NO_FP16-LABEL: uint16ToHalf: 221; CHECK-NO_FP16: # %bb.0: 222; CHECK-NO_FP16-NEXT: movzwl %di, %eax 223; CHECK-NO_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 224; CHECK-NO_FP16-NEXT: vcvtps2ph $4, %xmm0, %xmm0 225; CHECK-NO_FP16-NEXT: vmovd %xmm0, %eax 226; CHECK-NO_FP16-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 227; CHECK-NO_FP16-NEXT: retq 228; 229; CHECK-WITH_FP16-LABEL: uint16ToHalf: 230; CHECK-WITH_FP16: # %bb.0: 231; CHECK-WITH_FP16-NEXT: movzwl %di, %eax 232; CHECK-WITH_FP16-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0 233; CHECK-WITH_FP16-NEXT: retq 234 %fp32 = uitofp i16 %int16 to half 235 ret half %fp32 236} 237 238define <16 x half> @vector_uint16ToHalf(<16 x i16> %int16) { 239; CHECK-NO_FP16-LABEL: vector_uint16ToHalf: 240; CHECK-NO_FP16: # %bb.0: 241; CHECK-NO_FP16-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 242; CHECK-NO_FP16-NEXT: vcvtdq2ps %zmm0, %zmm0 243; CHECK-NO_FP16-NEXT: vcvtps2ph $4, %zmm0, %ymm0 244; CHECK-NO_FP16-NEXT: retq 245; 246; CHECK-WITH_FP16-LABEL: vector_uint16ToHalf: 247; CHECK-WITH_FP16: # %bb.0: 248; CHECK-WITH_FP16-NEXT: vcvtuw2ph %ymm0, %ymm0 249; CHECK-WITH_FP16-NEXT: retq 250 %fp32 = uitofp <16 x i16> %int16 to <16 x half> 251 ret <16 x half> %fp32 252} 253 254define float @sint16ToFloat(i16 %int16) { 255; CHECK-NO_FP16-LABEL: sint16ToFloat: 256; CHECK-NO_FP16: # %bb.0: 257; CHECK-NO_FP16-NEXT: movswl %di, %eax 258; CHECK-NO_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 259; CHECK-NO_FP16-NEXT: retq 260; 261; CHECK-WITH_FP16-LABEL: sint16ToFloat: 262; CHECK-WITH_FP16: # %bb.0: 263; CHECK-WITH_FP16-NEXT: movswl %di, %eax 264; CHECK-WITH_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 265; CHECK-WITH_FP16-NEXT: retq 266 %fp32 = sitofp i16 %int16 to float 267 ret float %fp32 268} 269 270define <16 x float> @vector_sint16ToFloat(<16 x i16> %int16) { 271; CHECK-NO_FP16-LABEL: vector_sint16ToFloat: 272; CHECK-NO_FP16: # %bb.0: 273; CHECK-NO_FP16-NEXT: vpmovsxwd %ymm0, %zmm0 274; CHECK-NO_FP16-NEXT: vcvtdq2ps %zmm0, %zmm0 275; CHECK-NO_FP16-NEXT: retq 276; 277; CHECK-WITH_FP16-LABEL: vector_sint16ToFloat: 278; CHECK-WITH_FP16: # %bb.0: 279; CHECK-WITH_FP16-NEXT: vpmovsxwd %ymm0, %zmm0 280; CHECK-WITH_FP16-NEXT: vcvtdq2ps %zmm0, %zmm0 281; CHECK-WITH_FP16-NEXT: retq 282 %fp32 = sitofp <16 x i16> %int16 to <16 x float> 283 ret <16 x float> %fp32 284} 285 286define half @sint16ToHalf(i16 %int16) { 287; CHECK-NO_FP16-LABEL: sint16ToHalf: 288; CHECK-NO_FP16: # %bb.0: 289; CHECK-NO_FP16-NEXT: movswl %di, %eax 290; CHECK-NO_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 291; CHECK-NO_FP16-NEXT: vcvtps2ph $4, %xmm0, %xmm0 292; CHECK-NO_FP16-NEXT: vmovd %xmm0, %eax 293; CHECK-NO_FP16-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 294; CHECK-NO_FP16-NEXT: retq 295; 296; CHECK-WITH_FP16-LABEL: sint16ToHalf: 297; CHECK-WITH_FP16: # %bb.0: 298; CHECK-WITH_FP16-NEXT: movswl %di, %eax 299; CHECK-WITH_FP16-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0 300; CHECK-WITH_FP16-NEXT: retq 301 %fp32 = sitofp i16 %int16 to half 302 ret half %fp32 303} 304 305define <16 x half> @vector_sint16ToHalf(<16 x i16> %int16) { 306; CHECK-NO_FP16-LABEL: vector_sint16ToHalf: 307; CHECK-NO_FP16: # %bb.0: 308; CHECK-NO_FP16-NEXT: vpmovsxwd %ymm0, %zmm0 309; CHECK-NO_FP16-NEXT: vcvtdq2ps %zmm0, %zmm0 310; CHECK-NO_FP16-NEXT: vcvtps2ph $4, %zmm0, %ymm0 311; CHECK-NO_FP16-NEXT: retq 312; 313; CHECK-WITH_FP16-LABEL: vector_sint16ToHalf: 314; CHECK-WITH_FP16: # %bb.0: 315; CHECK-WITH_FP16-NEXT: vcvtw2ph %ymm0, %ymm0 316; CHECK-WITH_FP16-NEXT: retq 317 %fp32 = sitofp <16 x i16> %int16 to <16 x half> 318 ret <16 x half> %fp32 319} 320