1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 2 // RUN: %clang_cc1 \ 3 // RUN: -triple aarch64 -target-feature +neon -target-feature +bf16 \ 4 // RUN: -disable-O0-optnone -emit-llvm -o - %s \ 5 // RUN: | opt -S -passes=mem2reg \ 6 // RUN: | FileCheck --check-prefixes=CHECK,CHECK-A64 %s 7 // RUN: %clang_cc1 \ 8 // RUN: -triple armv8.6a-arm-none-eabi -target-feature +neon \ 9 // RUN: -target-feature +bf16 -mfloat-abi hard \ 10 // RUN: -disable-O0-optnone -emit-llvm -o - %s \ 11 // RUN: | opt -S -passes=mem2reg \ 12 // RUN: | FileCheck --check-prefixes=CHECK,CHECK-A32-HARDFP %s 13 // RUN: %clang_cc1 \ 14 // RUN: -triple armv8.6a-arm-none-eabi -target-feature +neon \ 15 // RUN: -target-feature +bf16 -mfloat-abi softfp \ 16 // RUN: -disable-O0-optnone -emit-llvm -o - %s \ 17 // RUN: | opt -S -passes=mem2reg \ 18 // RUN: | FileCheck --check-prefixes=CHECK,CHECK-A32-SOFTFP %s 19 20 // REQUIRES: arm-registered-target 21 // REQUIRES: aarch64-registered-target 22 23 #include <arm_neon.h> 24 25 // CHECK-A64-LABEL: @test_vcvt_f32_bf16( 26 // CHECK-A64-NEXT: entry: 27 // CHECK-A64-NEXT: [[__REINT_808_I:%.*]] = alloca <4 x bfloat>, align 8 28 // CHECK-A64-NEXT: [[__REINT1_808_I:%.*]] = alloca <4 x i32>, align 16 29 // CHECK-A64-NEXT: store <4 x bfloat> [[A:%.*]], ptr [[__REINT_808_I]], align 8 30 // CHECK-A64-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_808_I]], align 8 31 // CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8> 32 // CHECK-A64-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP0]] to <4 x i32> 33 // CHECK-A64-NEXT: [[VSHLL_N_I:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 16) 34 // CHECK-A64-NEXT: store <4 x i32> [[VSHLL_N_I]], ptr [[__REINT1_808_I]], align 16 35 // CHECK-A64-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr [[__REINT1_808_I]], align 16 36 // CHECK-A64-NEXT: ret <4 x float> [[TMP3]] 37 // 38 // CHECK-A32-HARDFP-LABEL: @test_vcvt_f32_bf16( 39 // CHECK-A32-HARDFP-NEXT: entry: 40 // CHECK-A32-HARDFP-NEXT: [[__REINT_808_I:%.*]] = alloca <4 x bfloat>, align 8 41 // CHECK-A32-HARDFP-NEXT: [[__REINT1_808_I:%.*]] = alloca <4 x i32>, align 8 42 // CHECK-A32-HARDFP-NEXT: store <4 x bfloat> [[A:%.*]], ptr [[__REINT_808_I]], align 8 43 // CHECK-A32-HARDFP-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_808_I]], align 8 44 // CHECK-A32-HARDFP-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8> 45 // CHECK-A32-HARDFP-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP0]] to <4 x i32> 46 // CHECK-A32-HARDFP-NEXT: [[VSHLL_N_I:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 16) 47 // CHECK-A32-HARDFP-NEXT: store <4 x i32> [[VSHLL_N_I]], ptr [[__REINT1_808_I]], align 8 48 // CHECK-A32-HARDFP-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr [[__REINT1_808_I]], align 8 49 // CHECK-A32-HARDFP-NEXT: ret <4 x float> [[TMP3]] 50 // 51 // CHECK-A32-SOFTFP-LABEL: @test_vcvt_f32_bf16( 52 // CHECK-A32-SOFTFP-NEXT: entry: 53 // CHECK-A32-SOFTFP-NEXT: [[__P0_808_I:%.*]] = alloca <4 x bfloat>, align 8 54 // CHECK-A32-SOFTFP-NEXT: [[__REINT_808_I:%.*]] = alloca <4 x bfloat>, align 8 55 // CHECK-A32-SOFTFP-NEXT: [[__REINT1_808_I:%.*]] = alloca <4 x i32>, align 8 56 // CHECK-A32-SOFTFP-NEXT: [[A:%.*]] = alloca <4 x bfloat>, align 8 57 // CHECK-A32-SOFTFP-NEXT: [[COERCE:%.*]] = alloca <4 x bfloat>, align 8 58 // CHECK-A32-SOFTFP-NEXT: store <2 x i32> [[A_COERCE:%.*]], ptr [[A]], align 8 59 // CHECK-A32-SOFTFP-NEXT: [[A1:%.*]] = load <4 x bfloat>, ptr [[A]], align 8 60 // CHECK-A32-SOFTFP-NEXT: store <4 x bfloat> [[A1]], ptr [[COERCE]], align 8 61 // CHECK-A32-SOFTFP-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[COERCE]], align 8 62 // CHECK-A32-SOFTFP-NEXT: store <2 x i32> [[TMP0]], ptr [[__P0_808_I]], align 8 63 // CHECK-A32-SOFTFP-NEXT: [[__P0_8081_I:%.*]] = load <4 x bfloat>, ptr [[__P0_808_I]], align 8 64 // CHECK-A32-SOFTFP-NEXT: store <4 x bfloat> [[__P0_8081_I]], ptr [[__REINT_808_I]], align 8 65 // CHECK-A32-SOFTFP-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[__REINT_808_I]], align 8 66 // CHECK-A32-SOFTFP-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8> 67 // CHECK-A32-SOFTFP-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> 68 // CHECK-A32-SOFTFP-NEXT: [[VSHLL_N_I:%.*]] = shl <4 x i32> [[TMP3]], splat (i32 16) 69 // CHECK-A32-SOFTFP-NEXT: store <4 x i32> [[VSHLL_N_I]], ptr [[__REINT1_808_I]], align 8 70 // CHECK-A32-SOFTFP-NEXT: [[TMP4:%.*]] = load <4 x float>, ptr [[__REINT1_808_I]], align 8 71 // CHECK-A32-SOFTFP-NEXT: ret <4 x float> [[TMP4]] 72 // 73 float32x4_t test_vcvt_f32_bf16(bfloat16x4_t a) { 74 return vcvt_f32_bf16(a); 75 } 76 77 // CHECK-A64-LABEL: @test_vcvtq_low_f32_bf16( 78 // CHECK-A64-NEXT: entry: 79 // CHECK-A64-NEXT: [[__REINT_808_I_I:%.*]] = alloca <4 x bfloat>, align 8 80 // CHECK-A64-NEXT: [[__REINT1_808_I_I:%.*]] = alloca <4 x i32>, align 16 81 // CHECK-A64-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x bfloat> [[A:%.*]], <8 x bfloat> [[A]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 82 // CHECK-A64-NEXT: store <4 x bfloat> [[SHUFFLE_I]], ptr [[__REINT_808_I_I]], align 8 83 // CHECK-A64-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_808_I_I]], align 8 84 // CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8> 85 // CHECK-A64-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP0]] to <4 x i32> 86 // CHECK-A64-NEXT: [[VSHLL_N_I_I:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 16) 87 // CHECK-A64-NEXT: store <4 x i32> [[VSHLL_N_I_I]], ptr [[__REINT1_808_I_I]], align 16 88 // CHECK-A64-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr [[__REINT1_808_I_I]], align 16 89 // CHECK-A64-NEXT: ret <4 x float> [[TMP3]] 90 // 91 // CHECK-A32-HARDFP-LABEL: @test_vcvtq_low_f32_bf16( 92 // CHECK-A32-HARDFP-NEXT: entry: 93 // CHECK-A32-HARDFP-NEXT: [[__REINT_808_I_I:%.*]] = alloca <4 x bfloat>, align 8 94 // CHECK-A32-HARDFP-NEXT: [[__REINT1_808_I_I:%.*]] = alloca <4 x i32>, align 8 95 // CHECK-A32-HARDFP-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x bfloat> [[A:%.*]], <8 x bfloat> [[A]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 96 // CHECK-A32-HARDFP-NEXT: store <4 x bfloat> [[SHUFFLE_I]], ptr [[__REINT_808_I_I]], align 8 97 // CHECK-A32-HARDFP-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_808_I_I]], align 8 98 // CHECK-A32-HARDFP-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8> 99 // CHECK-A32-HARDFP-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP0]] to <4 x i32> 100 // CHECK-A32-HARDFP-NEXT: [[VSHLL_N_I_I:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 16) 101 // CHECK-A32-HARDFP-NEXT: store <4 x i32> [[VSHLL_N_I_I]], ptr [[__REINT1_808_I_I]], align 8 102 // CHECK-A32-HARDFP-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr [[__REINT1_808_I_I]], align 8 103 // CHECK-A32-HARDFP-NEXT: ret <4 x float> [[TMP3]] 104 // 105 // CHECK-A32-SOFTFP-LABEL: @test_vcvtq_low_f32_bf16( 106 // CHECK-A32-SOFTFP-NEXT: entry: 107 // CHECK-A32-SOFTFP-NEXT: [[RETVAL_I:%.*]] = alloca <4 x bfloat>, align 8 108 // CHECK-A32-SOFTFP-NEXT: [[__P0_I2:%.*]] = alloca <8 x bfloat>, align 8 109 // CHECK-A32-SOFTFP-NEXT: [[__P0_808_I_I:%.*]] = alloca <4 x bfloat>, align 8 110 // CHECK-A32-SOFTFP-NEXT: [[__REINT_808_I_I:%.*]] = alloca <4 x bfloat>, align 8 111 // CHECK-A32-SOFTFP-NEXT: [[__REINT1_808_I_I:%.*]] = alloca <4 x i32>, align 8 112 // CHECK-A32-SOFTFP-NEXT: [[__P0_I:%.*]] = alloca <8 x bfloat>, align 8 113 // CHECK-A32-SOFTFP-NEXT: [[COERCE_I:%.*]] = alloca <8 x bfloat>, align 8 114 // CHECK-A32-SOFTFP-NEXT: [[COERCE2_I:%.*]] = alloca <4 x bfloat>, align 8 115 // CHECK-A32-SOFTFP-NEXT: [[COERCE3_I:%.*]] = alloca <4 x bfloat>, align 8 116 // CHECK-A32-SOFTFP-NEXT: [[A:%.*]] = alloca <8 x bfloat>, align 8 117 // CHECK-A32-SOFTFP-NEXT: [[COERCE:%.*]] = alloca <8 x bfloat>, align 8 118 // CHECK-A32-SOFTFP-NEXT: store <4 x i32> [[A_COERCE:%.*]], ptr [[A]], align 8 119 // CHECK-A32-SOFTFP-NEXT: [[A1:%.*]] = load <8 x bfloat>, ptr [[A]], align 8 120 // CHECK-A32-SOFTFP-NEXT: store <8 x bfloat> [[A1]], ptr [[COERCE]], align 8 121 // CHECK-A32-SOFTFP-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[COERCE]], align 8 122 // CHECK-A32-SOFTFP-NEXT: store <4 x i32> [[TMP0]], ptr [[__P0_I]], align 8 123 // CHECK-A32-SOFTFP-NEXT: [[__P01_I:%.*]] = load <8 x bfloat>, ptr [[__P0_I]], align 8 124 // CHECK-A32-SOFTFP-NEXT: store <8 x bfloat> [[__P01_I]], ptr [[COERCE_I]], align 8 125 // CHECK-A32-SOFTFP-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[COERCE_I]], align 8 126 // CHECK-A32-SOFTFP-NEXT: store <4 x i32> [[TMP1]], ptr [[__P0_I2]], align 8 127 // CHECK-A32-SOFTFP-NEXT: [[__P01_I5:%.*]] = load <8 x bfloat>, ptr [[__P0_I2]], align 8 128 // CHECK-A32-SOFTFP-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x bfloat> [[__P01_I5]], <8 x bfloat> [[__P01_I5]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 129 // CHECK-A32-SOFTFP-NEXT: store <4 x bfloat> [[SHUFFLE_I]], ptr [[RETVAL_I]], align 8 130 // CHECK-A32-SOFTFP-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[RETVAL_I]], align 8 131 // CHECK-A32-SOFTFP-NEXT: store <2 x i32> [[TMP2]], ptr [[COERCE2_I]], align 8 132 // CHECK-A32-SOFTFP-NEXT: [[TMP3:%.*]] = load <4 x bfloat>, ptr [[COERCE2_I]], align 8 133 // CHECK-A32-SOFTFP-NEXT: store <4 x bfloat> [[TMP3]], ptr [[COERCE3_I]], align 8 134 // CHECK-A32-SOFTFP-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr [[COERCE3_I]], align 8 135 // CHECK-A32-SOFTFP-NEXT: store <2 x i32> [[TMP4]], ptr [[__P0_808_I_I]], align 8 136 // CHECK-A32-SOFTFP-NEXT: [[__P0_8081_I_I:%.*]] = load <4 x bfloat>, ptr [[__P0_808_I_I]], align 8 137 // CHECK-A32-SOFTFP-NEXT: store <4 x bfloat> [[__P0_8081_I_I]], ptr [[__REINT_808_I_I]], align 8 138 // CHECK-A32-SOFTFP-NEXT: [[TMP5:%.*]] = load <4 x i16>, ptr [[__REINT_808_I_I]], align 8 139 // CHECK-A32-SOFTFP-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 140 // CHECK-A32-SOFTFP-NEXT: [[TMP7:%.*]] = zext <4 x i16> [[TMP5]] to <4 x i32> 141 // CHECK-A32-SOFTFP-NEXT: [[VSHLL_N_I_I:%.*]] = shl <4 x i32> [[TMP7]], splat (i32 16) 142 // CHECK-A32-SOFTFP-NEXT: store <4 x i32> [[VSHLL_N_I_I]], ptr [[__REINT1_808_I_I]], align 8 143 // CHECK-A32-SOFTFP-NEXT: [[TMP8:%.*]] = load <4 x float>, ptr [[__REINT1_808_I_I]], align 8 144 // CHECK-A32-SOFTFP-NEXT: ret <4 x float> [[TMP8]] 145 // 146 float32x4_t test_vcvtq_low_f32_bf16(bfloat16x8_t a) { 147 return vcvtq_low_f32_bf16(a); 148 } 149 150 // CHECK-A64-LABEL: @test_vcvtq_high_f32_bf16( 151 // CHECK-A64-NEXT: entry: 152 // CHECK-A64-NEXT: [[__REINT_808_I_I:%.*]] = alloca <4 x bfloat>, align 8 153 // CHECK-A64-NEXT: [[__REINT1_808_I_I:%.*]] = alloca <4 x i32>, align 16 154 // CHECK-A64-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x bfloat> [[A:%.*]], <8 x bfloat> [[A]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> 155 // CHECK-A64-NEXT: store <4 x bfloat> [[SHUFFLE_I]], ptr [[__REINT_808_I_I]], align 8 156 // CHECK-A64-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_808_I_I]], align 8 157 // CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8> 158 // CHECK-A64-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP0]] to <4 x i32> 159 // CHECK-A64-NEXT: [[VSHLL_N_I_I:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 16) 160 // CHECK-A64-NEXT: store <4 x i32> [[VSHLL_N_I_I]], ptr [[__REINT1_808_I_I]], align 16 161 // CHECK-A64-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr [[__REINT1_808_I_I]], align 16 162 // CHECK-A64-NEXT: ret <4 x float> [[TMP3]] 163 // 164 // CHECK-A32-HARDFP-LABEL: @test_vcvtq_high_f32_bf16( 165 // CHECK-A32-HARDFP-NEXT: entry: 166 // CHECK-A32-HARDFP-NEXT: [[__REINT_808_I_I:%.*]] = alloca <4 x bfloat>, align 8 167 // CHECK-A32-HARDFP-NEXT: [[__REINT1_808_I_I:%.*]] = alloca <4 x i32>, align 8 168 // CHECK-A32-HARDFP-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x bfloat> [[A:%.*]], <8 x bfloat> [[A]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> 169 // CHECK-A32-HARDFP-NEXT: store <4 x bfloat> [[SHUFFLE_I]], ptr [[__REINT_808_I_I]], align 8 170 // CHECK-A32-HARDFP-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_808_I_I]], align 8 171 // CHECK-A32-HARDFP-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8> 172 // CHECK-A32-HARDFP-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP0]] to <4 x i32> 173 // CHECK-A32-HARDFP-NEXT: [[VSHLL_N_I_I:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 16) 174 // CHECK-A32-HARDFP-NEXT: store <4 x i32> [[VSHLL_N_I_I]], ptr [[__REINT1_808_I_I]], align 8 175 // CHECK-A32-HARDFP-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr [[__REINT1_808_I_I]], align 8 176 // CHECK-A32-HARDFP-NEXT: ret <4 x float> [[TMP3]] 177 // 178 // CHECK-A32-SOFTFP-LABEL: @test_vcvtq_high_f32_bf16( 179 // CHECK-A32-SOFTFP-NEXT: entry: 180 // CHECK-A32-SOFTFP-NEXT: [[RETVAL_I:%.*]] = alloca <4 x bfloat>, align 8 181 // CHECK-A32-SOFTFP-NEXT: [[__P0_I2:%.*]] = alloca <8 x bfloat>, align 8 182 // CHECK-A32-SOFTFP-NEXT: [[__P0_808_I_I:%.*]] = alloca <4 x bfloat>, align 8 183 // CHECK-A32-SOFTFP-NEXT: [[__REINT_808_I_I:%.*]] = alloca <4 x bfloat>, align 8 184 // CHECK-A32-SOFTFP-NEXT: [[__REINT1_808_I_I:%.*]] = alloca <4 x i32>, align 8 185 // CHECK-A32-SOFTFP-NEXT: [[__P0_I:%.*]] = alloca <8 x bfloat>, align 8 186 // CHECK-A32-SOFTFP-NEXT: [[COERCE_I:%.*]] = alloca <8 x bfloat>, align 8 187 // CHECK-A32-SOFTFP-NEXT: [[COERCE2_I:%.*]] = alloca <4 x bfloat>, align 8 188 // CHECK-A32-SOFTFP-NEXT: [[COERCE3_I:%.*]] = alloca <4 x bfloat>, align 8 189 // CHECK-A32-SOFTFP-NEXT: [[A:%.*]] = alloca <8 x bfloat>, align 8 190 // CHECK-A32-SOFTFP-NEXT: [[COERCE:%.*]] = alloca <8 x bfloat>, align 8 191 // CHECK-A32-SOFTFP-NEXT: store <4 x i32> [[A_COERCE:%.*]], ptr [[A]], align 8 192 // CHECK-A32-SOFTFP-NEXT: [[A1:%.*]] = load <8 x bfloat>, ptr [[A]], align 8 193 // CHECK-A32-SOFTFP-NEXT: store <8 x bfloat> [[A1]], ptr [[COERCE]], align 8 194 // CHECK-A32-SOFTFP-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[COERCE]], align 8 195 // CHECK-A32-SOFTFP-NEXT: store <4 x i32> [[TMP0]], ptr [[__P0_I]], align 8 196 // CHECK-A32-SOFTFP-NEXT: [[__P01_I:%.*]] = load <8 x bfloat>, ptr [[__P0_I]], align 8 197 // CHECK-A32-SOFTFP-NEXT: store <8 x bfloat> [[__P01_I]], ptr [[COERCE_I]], align 8 198 // CHECK-A32-SOFTFP-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[COERCE_I]], align 8 199 // CHECK-A32-SOFTFP-NEXT: store <4 x i32> [[TMP1]], ptr [[__P0_I2]], align 8 200 // CHECK-A32-SOFTFP-NEXT: [[__P01_I5:%.*]] = load <8 x bfloat>, ptr [[__P0_I2]], align 8 201 // CHECK-A32-SOFTFP-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x bfloat> [[__P01_I5]], <8 x bfloat> [[__P01_I5]], <4 x i32> <i32 4, i32 5, i32 6, i32 7> 202 // CHECK-A32-SOFTFP-NEXT: store <4 x bfloat> [[SHUFFLE_I]], ptr [[RETVAL_I]], align 8 203 // CHECK-A32-SOFTFP-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[RETVAL_I]], align 8 204 // CHECK-A32-SOFTFP-NEXT: store <2 x i32> [[TMP2]], ptr [[COERCE2_I]], align 8 205 // CHECK-A32-SOFTFP-NEXT: [[TMP3:%.*]] = load <4 x bfloat>, ptr [[COERCE2_I]], align 8 206 // CHECK-A32-SOFTFP-NEXT: store <4 x bfloat> [[TMP3]], ptr [[COERCE3_I]], align 8 207 // CHECK-A32-SOFTFP-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr [[COERCE3_I]], align 8 208 // CHECK-A32-SOFTFP-NEXT: store <2 x i32> [[TMP4]], ptr [[__P0_808_I_I]], align 8 209 // CHECK-A32-SOFTFP-NEXT: [[__P0_8081_I_I:%.*]] = load <4 x bfloat>, ptr [[__P0_808_I_I]], align 8 210 // CHECK-A32-SOFTFP-NEXT: store <4 x bfloat> [[__P0_8081_I_I]], ptr [[__REINT_808_I_I]], align 8 211 // CHECK-A32-SOFTFP-NEXT: [[TMP5:%.*]] = load <4 x i16>, ptr [[__REINT_808_I_I]], align 8 212 // CHECK-A32-SOFTFP-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 213 // CHECK-A32-SOFTFP-NEXT: [[TMP7:%.*]] = zext <4 x i16> [[TMP5]] to <4 x i32> 214 // CHECK-A32-SOFTFP-NEXT: [[VSHLL_N_I_I:%.*]] = shl <4 x i32> [[TMP7]], splat (i32 16) 215 // CHECK-A32-SOFTFP-NEXT: store <4 x i32> [[VSHLL_N_I_I]], ptr [[__REINT1_808_I_I]], align 8 216 // CHECK-A32-SOFTFP-NEXT: [[TMP8:%.*]] = load <4 x float>, ptr [[__REINT1_808_I_I]], align 8 217 // CHECK-A32-SOFTFP-NEXT: ret <4 x float> [[TMP8]] 218 // 219 float32x4_t test_vcvtq_high_f32_bf16(bfloat16x8_t a) { 220 return vcvtq_high_f32_bf16(a); 221 } 222 223 // CHECK-A64-LABEL: @test_vcvt_bf16_f32( 224 // CHECK-A64-NEXT: entry: 225 // CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8> 226 // CHECK-A64-NEXT: [[TMP1:%.*]] = fptrunc <4 x float> [[A]] to <4 x bfloat> 227 // CHECK-A64-NEXT: ret <4 x bfloat> [[TMP1]] 228 // 229 // CHECK-A32-HARDFP-LABEL: @test_vcvt_bf16_f32( 230 // CHECK-A32-HARDFP-NEXT: entry: 231 // CHECK-A32-HARDFP-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8> 232 // CHECK-A32-HARDFP-NEXT: [[VCVTFP2BF1_I:%.*]] = call <4 x bfloat> @llvm.arm.neon.vcvtfp2bf.v4bf16(<4 x float> [[A]]) 233 // CHECK-A32-HARDFP-NEXT: ret <4 x bfloat> [[VCVTFP2BF1_I]] 234 // 235 // CHECK-A32-SOFTFP-LABEL: @test_vcvt_bf16_f32( 236 // CHECK-A32-SOFTFP-NEXT: entry: 237 // CHECK-A32-SOFTFP-NEXT: [[RETVAL_I1:%.*]] = alloca <4 x bfloat>, align 8 238 // CHECK-A32-SOFTFP-NEXT: [[RETVAL_I:%.*]] = alloca <4 x bfloat>, align 8 239 // CHECK-A32-SOFTFP-NEXT: [[COERCE_I:%.*]] = alloca <4 x bfloat>, align 8 240 // CHECK-A32-SOFTFP-NEXT: [[RETVAL:%.*]] = alloca <4 x bfloat>, align 8 241 // CHECK-A32-SOFTFP-NEXT: [[COERCE:%.*]] = alloca <4 x bfloat>, align 8 242 // CHECK-A32-SOFTFP-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8> 243 // CHECK-A32-SOFTFP-NEXT: [[VCVTFP2BF1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vcvtfp2bf.v4i16(<4 x float> [[A]]) 244 // CHECK-A32-SOFTFP-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[VCVTFP2BF1_I]] to <4 x bfloat> 245 // CHECK-A32-SOFTFP-NEXT: store <4 x bfloat> [[TMP1]], ptr [[RETVAL_I1]], align 8 246 // CHECK-A32-SOFTFP-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[RETVAL_I1]], align 8 247 // CHECK-A32-SOFTFP-NEXT: store <2 x i32> [[TMP2]], ptr [[COERCE_I]], align 8 248 // CHECK-A32-SOFTFP-NEXT: [[TMP3:%.*]] = load <4 x bfloat>, ptr [[COERCE_I]], align 8 249 // CHECK-A32-SOFTFP-NEXT: store <4 x bfloat> [[TMP3]], ptr [[RETVAL_I]], align 8 250 // CHECK-A32-SOFTFP-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr [[RETVAL_I]], align 8 251 // CHECK-A32-SOFTFP-NEXT: store <2 x i32> [[TMP4]], ptr [[COERCE]], align 8 252 // CHECK-A32-SOFTFP-NEXT: [[TMP5:%.*]] = load <4 x bfloat>, ptr [[COERCE]], align 8 253 // CHECK-A32-SOFTFP-NEXT: store <4 x bfloat> [[TMP5]], ptr [[RETVAL]], align 8 254 // CHECK-A32-SOFTFP-NEXT: [[TMP6:%.*]] = load <2 x i32>, ptr [[RETVAL]], align 8 255 // CHECK-A32-SOFTFP-NEXT: ret <2 x i32> [[TMP6]] 256 // 257 bfloat16x4_t test_vcvt_bf16_f32(float32x4_t a) { 258 return vcvt_bf16_f32(a); 259 } 260 261 // CHECK-A64-LABEL: @test_vcvtq_low_bf16_f32( 262 // CHECK-A64-NEXT: entry: 263 // CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8> 264 // CHECK-A64-NEXT: [[TMP1:%.*]] = fptrunc <4 x float> [[A]] to <4 x bfloat> 265 // CHECK-A64-NEXT: [[TMP2:%.*]] = shufflevector <4 x bfloat> [[TMP1]], <4 x bfloat> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 266 // CHECK-A64-NEXT: ret <8 x bfloat> [[TMP2]] 267 // 268 // CHECK-A32-HARDFP-LABEL: @test_vcvtq_low_bf16_f32( 269 // CHECK-A32-HARDFP-NEXT: entry: 270 // CHECK-A32-HARDFP-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8> 271 // CHECK-A32-HARDFP-NEXT: [[VCVTFP2BF1_I:%.*]] = call <4 x bfloat> @llvm.arm.neon.vcvtfp2bf.v4bf16(<4 x float> [[A]]) 272 // CHECK-A32-HARDFP-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x bfloat> zeroinitializer, <4 x bfloat> [[VCVTFP2BF1_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 273 // CHECK-A32-HARDFP-NEXT: ret <8 x bfloat> [[SHUFFLE_I]] 274 // 275 // CHECK-A32-SOFTFP-LABEL: @test_vcvtq_low_bf16_f32( 276 // CHECK-A32-SOFTFP-NEXT: entry: 277 // CHECK-A32-SOFTFP-NEXT: [[RETVAL_I4:%.*]] = alloca <8 x bfloat>, align 8 278 // CHECK-A32-SOFTFP-NEXT: [[__P0_I:%.*]] = alloca <4 x bfloat>, align 8 279 // CHECK-A32-SOFTFP-NEXT: [[__P1_I:%.*]] = alloca <4 x bfloat>, align 8 280 // CHECK-A32-SOFTFP-NEXT: [[RETVAL_I1:%.*]] = alloca <4 x bfloat>, align 8 281 // CHECK-A32-SOFTFP-NEXT: [[RETVAL_I:%.*]] = alloca <8 x bfloat>, align 8 282 // CHECK-A32-SOFTFP-NEXT: [[COERCE_I:%.*]] = alloca <4 x bfloat>, align 8 283 // CHECK-A32-SOFTFP-NEXT: [[COERCE1_I:%.*]] = alloca <4 x bfloat>, align 8 284 // CHECK-A32-SOFTFP-NEXT: [[COERCE2_I:%.*]] = alloca <4 x bfloat>, align 8 285 // CHECK-A32-SOFTFP-NEXT: [[COERCE4_I:%.*]] = alloca <8 x bfloat>, align 8 286 // CHECK-A32-SOFTFP-NEXT: [[RETVAL:%.*]] = alloca <8 x bfloat>, align 8 287 // CHECK-A32-SOFTFP-NEXT: [[COERCE:%.*]] = alloca <8 x bfloat>, align 8 288 // CHECK-A32-SOFTFP-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8> 289 // CHECK-A32-SOFTFP-NEXT: [[VCVTFP2BF1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vcvtfp2bf.v4i16(<4 x float> [[A]]) 290 // CHECK-A32-SOFTFP-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[VCVTFP2BF1_I]] to <4 x bfloat> 291 // CHECK-A32-SOFTFP-NEXT: store <4 x bfloat> [[TMP1]], ptr [[RETVAL_I1]], align 8 292 // CHECK-A32-SOFTFP-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[RETVAL_I1]], align 8 293 // CHECK-A32-SOFTFP-NEXT: store <2 x i32> [[TMP2]], ptr [[COERCE_I]], align 8 294 // CHECK-A32-SOFTFP-NEXT: [[TMP3:%.*]] = load <4 x bfloat>, ptr [[COERCE_I]], align 8 295 // CHECK-A32-SOFTFP-NEXT: store <4 x bfloat> zeroinitializer, ptr [[COERCE1_I]], align 8 296 // CHECK-A32-SOFTFP-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr [[COERCE1_I]], align 8 297 // CHECK-A32-SOFTFP-NEXT: store <4 x bfloat> [[TMP3]], ptr [[COERCE2_I]], align 8 298 // CHECK-A32-SOFTFP-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr [[COERCE2_I]], align 8 299 // CHECK-A32-SOFTFP-NEXT: store <2 x i32> [[TMP4]], ptr [[__P0_I]], align 8 300 // CHECK-A32-SOFTFP-NEXT: [[__P01_I:%.*]] = load <4 x bfloat>, ptr [[__P0_I]], align 8 301 // CHECK-A32-SOFTFP-NEXT: store <2 x i32> [[TMP5]], ptr [[__P1_I]], align 8 302 // CHECK-A32-SOFTFP-NEXT: [[__P12_I:%.*]] = load <4 x bfloat>, ptr [[__P1_I]], align 8 303 // CHECK-A32-SOFTFP-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x bfloat> [[__P01_I]], <4 x bfloat> [[__P12_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 304 // CHECK-A32-SOFTFP-NEXT: store <8 x bfloat> [[SHUFFLE_I]], ptr [[RETVAL_I4]], align 8 305 // CHECK-A32-SOFTFP-NEXT: [[TMP6:%.*]] = load <4 x i32>, ptr [[RETVAL_I4]], align 8 306 // CHECK-A32-SOFTFP-NEXT: store <4 x i32> [[TMP6]], ptr [[COERCE4_I]], align 8 307 // CHECK-A32-SOFTFP-NEXT: [[TMP7:%.*]] = load <8 x bfloat>, ptr [[COERCE4_I]], align 8 308 // CHECK-A32-SOFTFP-NEXT: store <8 x bfloat> [[TMP7]], ptr [[RETVAL_I]], align 8 309 // CHECK-A32-SOFTFP-NEXT: [[TMP8:%.*]] = load <4 x i32>, ptr [[RETVAL_I]], align 8 310 // CHECK-A32-SOFTFP-NEXT: store <4 x i32> [[TMP8]], ptr [[COERCE]], align 8 311 // CHECK-A32-SOFTFP-NEXT: [[TMP9:%.*]] = load <8 x bfloat>, ptr [[COERCE]], align 8 312 // CHECK-A32-SOFTFP-NEXT: store <8 x bfloat> [[TMP9]], ptr [[RETVAL]], align 8 313 // CHECK-A32-SOFTFP-NEXT: [[TMP10:%.*]] = load <4 x i32>, ptr [[RETVAL]], align 8 314 // CHECK-A32-SOFTFP-NEXT: ret <4 x i32> [[TMP10]] 315 // 316 bfloat16x8_t test_vcvtq_low_bf16_f32(float32x4_t a) { 317 return vcvtq_low_bf16_f32(a); 318 } 319 320 // CHECK-A64-LABEL: @test_vcvtq_high_bf16_f32( 321 // CHECK-A64-NEXT: entry: 322 // CHECK-A64-NEXT: [[TMP0:%.*]] = bitcast <8 x bfloat> [[INACTIVE:%.*]] to <16 x i8> 323 // CHECK-A64-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8> 324 // CHECK-A64-NEXT: [[TMP2:%.*]] = shufflevector <8 x bfloat> [[INACTIVE]], <8 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 325 // CHECK-A64-NEXT: [[TMP3:%.*]] = fptrunc <4 x float> [[A]] to <4 x bfloat> 326 // CHECK-A64-NEXT: [[TMP4:%.*]] = shufflevector <4 x bfloat> [[TMP2]], <4 x bfloat> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 327 // CHECK-A64-NEXT: ret <8 x bfloat> [[TMP4]] 328 // 329 // CHECK-A32-HARDFP-LABEL: @test_vcvtq_high_bf16_f32( 330 // CHECK-A32-HARDFP-NEXT: entry: 331 // CHECK-A32-HARDFP-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8> 332 // CHECK-A32-HARDFP-NEXT: [[VCVTFP2BF1_I:%.*]] = call <4 x bfloat> @llvm.arm.neon.vcvtfp2bf.v4bf16(<4 x float> [[A]]) 333 // CHECK-A32-HARDFP-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x bfloat> [[INACTIVE:%.*]], <8 x bfloat> [[INACTIVE]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 334 // CHECK-A32-HARDFP-NEXT: [[SHUFFLE_I8:%.*]] = shufflevector <4 x bfloat> [[VCVTFP2BF1_I]], <4 x bfloat> [[SHUFFLE_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 335 // CHECK-A32-HARDFP-NEXT: ret <8 x bfloat> [[SHUFFLE_I8]] 336 // 337 // CHECK-A32-SOFTFP-LABEL: @test_vcvtq_high_bf16_f32( 338 // CHECK-A32-SOFTFP-NEXT: entry: 339 // CHECK-A32-SOFTFP-NEXT: [[RETVAL_I11:%.*]] = alloca <8 x bfloat>, align 8 340 // CHECK-A32-SOFTFP-NEXT: [[__P0_I12:%.*]] = alloca <4 x bfloat>, align 8 341 // CHECK-A32-SOFTFP-NEXT: [[__P1_I:%.*]] = alloca <4 x bfloat>, align 8 342 // CHECK-A32-SOFTFP-NEXT: [[RETVAL_I8:%.*]] = alloca <4 x bfloat>, align 8 343 // CHECK-A32-SOFTFP-NEXT: [[RETVAL_I3:%.*]] = alloca <4 x bfloat>, align 8 344 // CHECK-A32-SOFTFP-NEXT: [[__P0_I4:%.*]] = alloca <8 x bfloat>, align 8 345 // CHECK-A32-SOFTFP-NEXT: [[RETVAL_I:%.*]] = alloca <8 x bfloat>, align 8 346 // CHECK-A32-SOFTFP-NEXT: [[__P0_I:%.*]] = alloca <8 x bfloat>, align 8 347 // CHECK-A32-SOFTFP-NEXT: [[COERCE_I:%.*]] = alloca <4 x bfloat>, align 8 348 // CHECK-A32-SOFTFP-NEXT: [[COERCE2_I:%.*]] = alloca <8 x bfloat>, align 8 349 // CHECK-A32-SOFTFP-NEXT: [[COERCE4_I:%.*]] = alloca <4 x bfloat>, align 8 350 // CHECK-A32-SOFTFP-NEXT: [[COERCE5_I:%.*]] = alloca <4 x bfloat>, align 8 351 // CHECK-A32-SOFTFP-NEXT: [[COERCE6_I:%.*]] = alloca <4 x bfloat>, align 8 352 // CHECK-A32-SOFTFP-NEXT: [[COERCE8_I:%.*]] = alloca <8 x bfloat>, align 8 353 // CHECK-A32-SOFTFP-NEXT: [[RETVAL:%.*]] = alloca <8 x bfloat>, align 8 354 // CHECK-A32-SOFTFP-NEXT: [[INACTIVE:%.*]] = alloca <8 x bfloat>, align 8 355 // CHECK-A32-SOFTFP-NEXT: [[COERCE:%.*]] = alloca <8 x bfloat>, align 8 356 // CHECK-A32-SOFTFP-NEXT: [[COERCE2:%.*]] = alloca <8 x bfloat>, align 8 357 // CHECK-A32-SOFTFP-NEXT: store <4 x i32> [[INACTIVE_COERCE:%.*]], ptr [[INACTIVE]], align 8 358 // CHECK-A32-SOFTFP-NEXT: [[INACTIVE1:%.*]] = load <8 x bfloat>, ptr [[INACTIVE]], align 8 359 // CHECK-A32-SOFTFP-NEXT: store <8 x bfloat> [[INACTIVE1]], ptr [[COERCE]], align 8 360 // CHECK-A32-SOFTFP-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[COERCE]], align 8 361 // CHECK-A32-SOFTFP-NEXT: store <4 x i32> [[TMP0]], ptr [[__P0_I]], align 8 362 // CHECK-A32-SOFTFP-NEXT: [[__P01_I:%.*]] = load <8 x bfloat>, ptr [[__P0_I]], align 8 363 // CHECK-A32-SOFTFP-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8> 364 // CHECK-A32-SOFTFP-NEXT: [[VCVTFP2BF1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vcvtfp2bf.v4i16(<4 x float> [[A]]) 365 // CHECK-A32-SOFTFP-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[VCVTFP2BF1_I]] to <4 x bfloat> 366 // CHECK-A32-SOFTFP-NEXT: store <4 x bfloat> [[TMP2]], ptr [[RETVAL_I8]], align 8 367 // CHECK-A32-SOFTFP-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr [[RETVAL_I8]], align 8 368 // CHECK-A32-SOFTFP-NEXT: store <2 x i32> [[TMP3]], ptr [[COERCE_I]], align 8 369 // CHECK-A32-SOFTFP-NEXT: [[TMP4:%.*]] = load <4 x bfloat>, ptr [[COERCE_I]], align 8 370 // CHECK-A32-SOFTFP-NEXT: store <8 x bfloat> [[__P01_I]], ptr [[COERCE2_I]], align 8 371 // CHECK-A32-SOFTFP-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr [[COERCE2_I]], align 8 372 // CHECK-A32-SOFTFP-NEXT: store <4 x i32> [[TMP5]], ptr [[__P0_I4]], align 8 373 // CHECK-A32-SOFTFP-NEXT: [[__P01_I7:%.*]] = load <8 x bfloat>, ptr [[__P0_I4]], align 8 374 // CHECK-A32-SOFTFP-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x bfloat> [[__P01_I7]], <8 x bfloat> [[__P01_I7]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 375 // CHECK-A32-SOFTFP-NEXT: store <4 x bfloat> [[SHUFFLE_I]], ptr [[RETVAL_I3]], align 8 376 // CHECK-A32-SOFTFP-NEXT: [[TMP6:%.*]] = load <2 x i32>, ptr [[RETVAL_I3]], align 8 377 // CHECK-A32-SOFTFP-NEXT: store <2 x i32> [[TMP6]], ptr [[COERCE4_I]], align 8 378 // CHECK-A32-SOFTFP-NEXT: [[TMP7:%.*]] = load <4 x bfloat>, ptr [[COERCE4_I]], align 8 379 // CHECK-A32-SOFTFP-NEXT: store <4 x bfloat> [[TMP4]], ptr [[COERCE5_I]], align 8 380 // CHECK-A32-SOFTFP-NEXT: [[TMP8:%.*]] = load <2 x i32>, ptr [[COERCE5_I]], align 8 381 // CHECK-A32-SOFTFP-NEXT: store <4 x bfloat> [[TMP7]], ptr [[COERCE6_I]], align 8 382 // CHECK-A32-SOFTFP-NEXT: [[TMP9:%.*]] = load <2 x i32>, ptr [[COERCE6_I]], align 8 383 // CHECK-A32-SOFTFP-NEXT: store <2 x i32> [[TMP8]], ptr [[__P0_I12]], align 8 384 // CHECK-A32-SOFTFP-NEXT: [[__P01_I16:%.*]] = load <4 x bfloat>, ptr [[__P0_I12]], align 8 385 // CHECK-A32-SOFTFP-NEXT: store <2 x i32> [[TMP9]], ptr [[__P1_I]], align 8 386 // CHECK-A32-SOFTFP-NEXT: [[__P12_I:%.*]] = load <4 x bfloat>, ptr [[__P1_I]], align 8 387 // CHECK-A32-SOFTFP-NEXT: [[SHUFFLE_I17:%.*]] = shufflevector <4 x bfloat> [[__P01_I16]], <4 x bfloat> [[__P12_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 388 // CHECK-A32-SOFTFP-NEXT: store <8 x bfloat> [[SHUFFLE_I17]], ptr [[RETVAL_I11]], align 8 389 // CHECK-A32-SOFTFP-NEXT: [[TMP10:%.*]] = load <4 x i32>, ptr [[RETVAL_I11]], align 8 390 // CHECK-A32-SOFTFP-NEXT: store <4 x i32> [[TMP10]], ptr [[COERCE8_I]], align 8 391 // CHECK-A32-SOFTFP-NEXT: [[TMP11:%.*]] = load <8 x bfloat>, ptr [[COERCE8_I]], align 8 392 // CHECK-A32-SOFTFP-NEXT: store <8 x bfloat> [[TMP11]], ptr [[RETVAL_I]], align 8 393 // CHECK-A32-SOFTFP-NEXT: [[TMP12:%.*]] = load <4 x i32>, ptr [[RETVAL_I]], align 8 394 // CHECK-A32-SOFTFP-NEXT: store <4 x i32> [[TMP12]], ptr [[COERCE2]], align 8 395 // CHECK-A32-SOFTFP-NEXT: [[TMP13:%.*]] = load <8 x bfloat>, ptr [[COERCE2]], align 8 396 // CHECK-A32-SOFTFP-NEXT: store <8 x bfloat> [[TMP13]], ptr [[RETVAL]], align 8 397 // CHECK-A32-SOFTFP-NEXT: [[TMP14:%.*]] = load <4 x i32>, ptr [[RETVAL]], align 8 398 // CHECK-A32-SOFTFP-NEXT: ret <4 x i32> [[TMP14]] 399 // 400 bfloat16x8_t test_vcvtq_high_bf16_f32(bfloat16x8_t inactive, float32x4_t a) { 401 return vcvtq_high_bf16_f32(inactive, a); 402 } 403 404 // CHECK-A64-LABEL: @test_vcvth_bf16_f32( 405 // CHECK-A64-NEXT: entry: 406 // CHECK-A64-NEXT: [[TMP0:%.*]] = fptrunc float [[A:%.*]] to bfloat 407 // CHECK-A64-NEXT: ret bfloat [[TMP0]] 408 // 409 // CHECK-A32-HARDFP-LABEL: @test_vcvth_bf16_f32( 410 // CHECK-A32-HARDFP-NEXT: entry: 411 // CHECK-A32-HARDFP-NEXT: [[VCVTBFP2BF_I:%.*]] = call bfloat @llvm.arm.neon.vcvtbfp2bf(float [[A:%.*]]) 412 // CHECK-A32-HARDFP-NEXT: ret bfloat [[VCVTBFP2BF_I]] 413 // 414 // CHECK-A32-SOFTFP-LABEL: @test_vcvth_bf16_f32( 415 // CHECK-A32-SOFTFP-NEXT: entry: 416 // CHECK-A32-SOFTFP-NEXT: [[VCVTBFP2BF_I:%.*]] = call bfloat @llvm.arm.neon.vcvtbfp2bf(float [[A:%.*]]) 417 // CHECK-A32-SOFTFP-NEXT: ret bfloat [[VCVTBFP2BF_I]] 418 // 419 bfloat16_t test_vcvth_bf16_f32(float32_t a) { 420 return vcvth_bf16_f32(a); 421 } 422 423 // CHECK-LABEL: @test_vcvtah_f32_bf16( 424 // CHECK-NEXT: entry: 425 // CHECK-NEXT: [[__REINT_I:%.*]] = alloca bfloat, align 2 426 // CHECK-NEXT: [[__REINT1_I:%.*]] = alloca i32, align 4 427 // CHECK-NEXT: store bfloat [[A:%.*]], ptr [[__REINT_I]], align 2 428 // CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[__REINT_I]], align 2 429 // CHECK-NEXT: [[CONV_I:%.*]] = zext i16 [[TMP0]] to i32 430 // CHECK-NEXT: [[SHL_I:%.*]] = shl i32 [[CONV_I]], 16 431 // CHECK-NEXT: store i32 [[SHL_I]], ptr [[__REINT1_I]], align 4 432 // CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[__REINT1_I]], align 4 433 // CHECK-NEXT: ret float [[TMP1]] 434 // 435 float32_t test_vcvtah_f32_bf16(bfloat16_t a) { 436 return vcvtah_f32_bf16(a); 437 } 438 439