1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; RUN: llc < %s | FileCheck %s 3 4target triple = "aarch64" 5 6; First some corner cases 7define <4 x float> @f_v4_s0(<4 x i32> %u) { 8; CHECK-LABEL: f_v4_s0: 9; CHECK: // %bb.0: 10; CHECK-NEXT: scvtf v0.4s, v0.4s 11; CHECK-NEXT: ret 12 %s = ashr exact <4 x i32> %u, <i32 0, i32 0, i32 0, i32 0> 13 %v = sitofp <4 x i32> %s to <4 x float> 14 ret <4 x float> %v 15} 16 17define <4 x float> @f_v4_s1(<4 x i32> %u) { 18; CHECK-LABEL: f_v4_s1: 19; CHECK: // %bb.0: 20; CHECK-NEXT: scvtf v0.4s, v0.4s, #1 21; CHECK-NEXT: ret 22 %s = ashr exact <4 x i32> %u, <i32 1, i32 1, i32 1, i32 1> 23 %v = sitofp <4 x i32> %s to <4 x float> 24 ret <4 x float> %v 25} 26 27define <4 x float> @f_v4_s24_inexact(<4 x i32> %u) { 28; CHECK-LABEL: f_v4_s24_inexact: 29; CHECK: // %bb.0: 30; CHECK-NEXT: sshr v0.4s, v0.4s, #24 31; CHECK-NEXT: scvtf v0.4s, v0.4s 32; CHECK-NEXT: ret 33 %s = ashr <4 x i32> %u, <i32 24, i32 24, i32 24, i32 24> 34 %v = sitofp <4 x i32> %s to <4 x float> 35 ret <4 x float> %v 36} 37 38define <4 x float> @f_v4_s31(<4 x i32> %u) { 39; CHECK-LABEL: f_v4_s31: 40; CHECK: // %bb.0: 41; CHECK-NEXT: cmlt v0.4s, v0.4s, #0 42; CHECK-NEXT: scvtf v0.4s, v0.4s 43; CHECK-NEXT: ret 44 %s = ashr <4 x i32> %u, <i32 31, i32 31, i32 31, i32 31> 45 %v = sitofp <4 x i32> %s to <4 x float> 46 ret <4 x float> %v 47} 48 49; Common cases for conversion from signed integer to floating point types 50define <2 x float> @f_v2_s24(<2 x i32> %u) { 51; CHECK-LABEL: f_v2_s24: 52; CHECK: // %bb.0: 53; CHECK-NEXT: scvtf v0.2s, v0.2s, #24 54; CHECK-NEXT: ret 55 %s = ashr exact <2 x i32> %u, <i32 24, i32 24> 56 %v = sitofp <2 x i32> %s to <2 x float> 57 ret <2 x float> %v 58} 59 60define <4 x float> @f_v4_s24(<4 x i32> %u) { 61; CHECK-LABEL: f_v4_s24: 62; CHECK: // %bb.0: 63; CHECK-NEXT: scvtf v0.4s, v0.4s, #24 64; CHECK-NEXT: ret 65 %s = ashr exact <4 x i32> %u, <i32 24, i32 24, i32 24, i32 24> 66 %v = sitofp <4 x i32> %s to <4 x float> 67 ret <4 x float> %v 68} 69 70; Check legalisation to <2 x f64> does not get in the way 71define <8 x double> @d_v8_s64(<8 x i64> %u) { 72; CHECK-LABEL: d_v8_s64: 73; CHECK: // %bb.0: 74; CHECK-NEXT: scvtf v0.2d, v0.2d, #56 75; CHECK-NEXT: scvtf v1.2d, v1.2d, #56 76; CHECK-NEXT: scvtf v2.2d, v2.2d, #56 77; CHECK-NEXT: scvtf v3.2d, v3.2d, #56 78; CHECK-NEXT: ret 79 %s = ashr exact <8 x i64> %u, <i64 56, i64 56, i64 56, i64 56, i64 56, i64 56, i64 56, i64 56> 80 %v = sitofp <8 x i64> %s to <8 x double> 81 ret <8 x double> %v 82} 83 84define <4 x half> @h_v4_s8(<4 x i16> %u) #0 { 85; CHECK-LABEL: h_v4_s8: 86; CHECK: // %bb.0: 87; CHECK-NEXT: scvtf v0.4h, v0.4h, #8 88; CHECK-NEXT: ret 89 %s = ashr exact <4 x i16> %u, <i16 8, i16 8, i16 8, i16 8> 90 %v = sitofp <4 x i16> %s to <4 x half> 91 ret <4 x half> %v 92} 93 94define <8 x half> @h_v8_s8(<8 x i16> %u) #0 { 95; CHECK-LABEL: h_v8_s8: 96; CHECK: // %bb.0: 97; CHECK-NEXT: scvtf v0.8h, v0.8h, #8 98; CHECK-NEXT: ret 99 %s = ashr exact <8 x i16> %u, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 100 %v = sitofp <8 x i16> %s to <8 x half> 101 ret <8 x half> %v 102} 103 104; int-to-fp conversion of element in lane 0 should apply 105; cvtf on vector subregister to avoid fpr->gpr trip 106define float @l0_extract_f_v2s(<2 x i32> %u) { 107; CHECK-LABEL: l0_extract_f_v2s: 108; CHECK: // %bb.0: 109; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 110; CHECK-NEXT: scvtf s0, s0 111; CHECK-NEXT: ret 112 %i = extractelement <2 x i32> %u, i64 0 113 %f = sitofp i32 %i to float 114 ret float %f 115} 116 117; cvtf to use ssub for bottom 32-bits from v2i32 118define float @l0_extract_f_v2u(<2 x i32> %u) { 119; CHECK-LABEL: l0_extract_f_v2u: 120; CHECK: // %bb.0: 121; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 122; CHECK-NEXT: ucvtf s0, s0 123; CHECK-NEXT: ret 124 %i = extractelement <2 x i32> %u, i64 0 125 %f = uitofp i32 %i to float 126 ret float %f 127} 128 129; Pattern should only apply when it is known to be lane 0 130define float @ln_extract_f_v2s(<2 x i32> %u, i64 %n) { 131; CHECK-LABEL: ln_extract_f_v2s: 132; CHECK: // %bb.0: 133; CHECK-NEXT: sub sp, sp, #16 134; CHECK-NEXT: .cfi_def_cfa_offset 16 135; CHECK-NEXT: add x8, sp, #8 136; CHECK-NEXT: str d0, [sp, #8] 137; CHECK-NEXT: bfi x8, x0, #2, #1 138; CHECK-NEXT: ldr s0, [x8] 139; CHECK-NEXT: scvtf s0, s0 140; CHECK-NEXT: add sp, sp, #16 141; CHECK-NEXT: ret 142 %i = extractelement <2 x i32> %u, i64 %n 143 %f = sitofp i32 %i to float 144 ret float %f 145} 146 147; cvtf to use ssub for bottom 32-bits from v4i32 148define float @l0_extract_f_v4s(<4 x i32> %u) { 149; CHECK-LABEL: l0_extract_f_v4s: 150; CHECK: // %bb.0: 151; CHECK-NEXT: scvtf s0, s0 152; CHECK-NEXT: ret 153 %i = extractelement <4 x i32> %u, i64 0 154 %f = sitofp i32 %i to float 155 ret float %f 156} 157 158define float @l0_extract_f_v4u(<4 x i32> %u) { 159; CHECK-LABEL: l0_extract_f_v4u: 160; CHECK: // %bb.0: 161; CHECK-NEXT: ucvtf s0, s0 162; CHECK-NEXT: ret 163 %i = extractelement <4 x i32> %u, i64 0 164 %f = uitofp i32 %i to float 165 ret float %f 166} 167 168define float @ln_extract_f_v4s(<4 x i32> %u, i64 %n) { 169; CHECK-LABEL: ln_extract_f_v4s: 170; CHECK: // %bb.0: 171; CHECK-NEXT: sub sp, sp, #16 172; CHECK-NEXT: .cfi_def_cfa_offset 16 173; CHECK-NEXT: mov x8, sp 174; CHECK-NEXT: str q0, [sp] 175; CHECK-NEXT: bfi x8, x0, #2, #2 176; CHECK-NEXT: ldr s0, [x8] 177; CHECK-NEXT: scvtf s0, s0 178; CHECK-NEXT: add sp, sp, #16 179; CHECK-NEXT: ret 180 %i = extractelement <4 x i32> %u, i64 %n 181 %f = sitofp i32 %i to float 182 ret float %f 183} 184 185; cvtf to use dsub for bottom 64-bits from v2i64 186define double @l0_extract_d_v2s(<2 x i64> %u) { 187; CHECK-LABEL: l0_extract_d_v2s: 188; CHECK: // %bb.0: 189; CHECK-NEXT: scvtf d0, d0 190; CHECK-NEXT: ret 191 %i = extractelement <2 x i64> %u, i64 0 192 %f = sitofp i64 %i to double 193 ret double %f 194} 195 196define double @l0_extract_d_v2u(<2 x i64> %u) { 197; CHECK-LABEL: l0_extract_d_v2u: 198; CHECK: // %bb.0: 199; CHECK-NEXT: ucvtf d0, d0 200; CHECK-NEXT: ret 201 %i = extractelement <2 x i64> %u, i64 0 202 %f = uitofp i64 %i to double 203 ret double %f 204} 205 206define double @ln_extract_d_v2s(<2 x i64> %u, i64 %n) { 207; CHECK-LABEL: ln_extract_d_v2s: 208; CHECK: // %bb.0: 209; CHECK-NEXT: sub sp, sp, #16 210; CHECK-NEXT: .cfi_def_cfa_offset 16 211; CHECK-NEXT: mov x8, sp 212; CHECK-NEXT: str q0, [sp] 213; CHECK-NEXT: bfi x8, x0, #3, #1 214; CHECK-NEXT: ldr d0, [x8] 215; CHECK-NEXT: scvtf d0, d0 216; CHECK-NEXT: add sp, sp, #16 217; CHECK-NEXT: ret 218 %i = extractelement <2 x i64> %u, i64 %n 219 %f = sitofp i64 %i to double 220 ret double %f 221} 222 223; (fullfp16) cvtf to use hsub for bottom 16-bits from v8i16 224define half @l0_extract_h_v8s(<8 x i16> %u) #0 { 225; CHECK-LABEL: l0_extract_h_v8s: 226; CHECK: // %bb.0: 227; CHECK-NEXT: scvtf h0, h0 228; CHECK-NEXT: ret 229 %i = extractelement <8 x i16> %u, i32 0 230 %f = sitofp i16 %i to half 231 ret half %f 232} 233 234define half @l0_extract_h_v8u(<8 x i16> %u) #0 { 235; CHECK-LABEL: l0_extract_h_v8u: 236; CHECK: // %bb.0: 237; CHECK-NEXT: ucvtf h0, h0 238; CHECK-NEXT: ret 239 %i = extractelement <8 x i16> %u, i32 0 240 %f = uitofp i16 %i to half 241 ret half %f 242} 243 244define half @ln_extract_h_v8u(<8 x i16> %u, i32 %n) #0 { 245; CHECK-LABEL: ln_extract_h_v8u: 246; CHECK: // %bb.0: 247; CHECK-NEXT: sub sp, sp, #16 248; CHECK-NEXT: .cfi_def_cfa_offset 16 249; CHECK-NEXT: mov x8, sp 250; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 251; CHECK-NEXT: str q0, [sp] 252; CHECK-NEXT: bfi x8, x0, #1, #3 253; CHECK-NEXT: ldrh w8, [x8] 254; CHECK-NEXT: ucvtf h0, w8 255; CHECK-NEXT: add sp, sp, #16 256; CHECK-NEXT: ret 257 %i = extractelement <8 x i16> %u, i32 %n 258 %f = uitofp i16 %i to half 259 ret half %f 260} 261 262attributes #0 = { "target-features"="+fullfp16"} 263