1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 2 3 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +bf16 -target-feature +fp8 -target-feature +fp8dot2 -target-feature +fp8dot4 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg | FileCheck %s 4 // RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +bf16 -target-feature +fp8 -target-feature +fp8dot2 -target-feature +fp8dot4 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg | FileCheck %s -check-prefix CHECK-CXX 5 6 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +bf16 -target-feature +fp8 -target-feature +fp8dot2 -target-feature +fp8dot4 -O3 -Werror -Wall -S -o /dev/null %s 7 8 // REQUIRES: aarch64-registered-target 9 10 #include <arm_neon.h> 11 12 // CHECK-LABEL: define dso_local <4 x half> @test_vdot_f16( 13 // CHECK-SAME: <4 x half> noundef [[VD:%.*]], <8 x i8> [[VN:%.*]], <8 x i8> [[VM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0:[0-9]+]] { 14 // CHECK-NEXT: [[ENTRY:.*:]] 15 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[VD]] to <8 x i8> 16 // CHECK-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) 17 // CHECK-NEXT: [[FDOT21_I:%.*]] = call <4 x half> @llvm.aarch64.neon.fp8.fdot2.v4f16.v8i8(<4 x half> [[VD]], <8 x i8> [[VN]], <8 x i8> [[VM]]) 18 // CHECK-NEXT: ret <4 x half> [[FDOT21_I]] 19 // 20 // CHECK-CXX-LABEL: define dso_local noundef <4 x half> @_Z13test_vdot_f1613__Float16x4_t13__Mfloat8x8_tS0_m( 21 // CHECK-CXX-SAME: <4 x half> noundef [[VD:%.*]], <8 x i8> [[VN:%.*]], <8 x i8> [[VM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0:[0-9]+]] { 22 // CHECK-CXX-NEXT: [[ENTRY:.*:]] 23 // CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[VD]] to <8 x i8> 24 // CHECK-CXX-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) 25 // CHECK-CXX-NEXT: [[FDOT21_I:%.*]] = call <4 x half> @llvm.aarch64.neon.fp8.fdot2.v4f16.v8i8(<4 x half> [[VD]], <8 x i8> [[VN]], <8 x i8> [[VM]]) 26 // CHECK-CXX-NEXT: ret <4 x half> [[FDOT21_I]] 27 // 28 float16x4_t test_vdot_f16(float16x4_t vd, mfloat8x8_t vn, mfloat8x8_t vm, fpm_t fpmr) { 29 return vdot_f16_mf8_fpm(vd, vn, vm, fpmr); 30 } 31 32 // CHECK-LABEL: define dso_local <8 x half> @test_vdotq_f16( 33 // CHECK-SAME: <8 x half> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <16 x i8> [[VM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { 34 // CHECK-NEXT: [[ENTRY:.*:]] 35 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[VD]] to <16 x i8> 36 // CHECK-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) 37 // CHECK-NEXT: [[FDOT21_I:%.*]] = call <8 x half> @llvm.aarch64.neon.fp8.fdot2.v8f16.v16i8(<8 x half> [[VD]], <16 x i8> [[VN]], <16 x i8> [[VM]]) 38 // CHECK-NEXT: ret <8 x half> [[FDOT21_I]] 39 // 40 // CHECK-CXX-LABEL: define dso_local noundef <8 x half> @_Z14test_vdotq_f1613__Float16x8_t14__Mfloat8x16_tS0_m( 41 // CHECK-CXX-SAME: <8 x half> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <16 x i8> [[VM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { 42 // CHECK-CXX-NEXT: [[ENTRY:.*:]] 43 // CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[VD]] to <16 x i8> 44 // CHECK-CXX-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) 45 // CHECK-CXX-NEXT: [[FDOT21_I:%.*]] = call <8 x half> @llvm.aarch64.neon.fp8.fdot2.v8f16.v16i8(<8 x half> [[VD]], <16 x i8> [[VN]], <16 x i8> [[VM]]) 46 // CHECK-CXX-NEXT: ret <8 x half> [[FDOT21_I]] 47 // 48 float16x8_t test_vdotq_f16(float16x8_t vd, mfloat8x16_t vn, mfloat8x16_t vm, fpm_t fpmr) { 49 return vdotq_f16_mf8_fpm(vd, vn, vm, fpmr); 50 } 51 52 // CHECK-LABEL: define dso_local <4 x half> @test_vdot_lane_f16( 53 // CHECK-SAME: <4 x half> noundef [[VD:%.*]], <8 x i8> [[VN:%.*]], <8 x i8> [[VM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { 54 // CHECK-NEXT: [[ENTRY:.*:]] 55 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[VD]] to <8 x i8> 56 // CHECK-NEXT: [[TMP1:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> poison, <8 x i8> [[VM]], i64 0) 57 // CHECK-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) 58 // CHECK-NEXT: [[FDOT2_LANE:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half> 59 // CHECK-NEXT: [[FDOT2_LANE1:%.*]] = call <4 x half> @llvm.aarch64.neon.fp8.fdot2.lane.v4f16.v8i8(<4 x half> [[FDOT2_LANE]], <8 x i8> [[VN]], <16 x i8> [[TMP1]], i32 3) 60 // CHECK-NEXT: ret <4 x half> [[FDOT2_LANE1]] 61 // 62 // CHECK-CXX-LABEL: define dso_local noundef <4 x half> @_Z18test_vdot_lane_f1613__Float16x4_t13__Mfloat8x8_tS0_m( 63 // CHECK-CXX-SAME: <4 x half> noundef [[VD:%.*]], <8 x i8> [[VN:%.*]], <8 x i8> [[VM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { 64 // CHECK-CXX-NEXT: [[ENTRY:.*:]] 65 // CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[VD]] to <8 x i8> 66 // CHECK-CXX-NEXT: [[TMP1:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> poison, <8 x i8> [[VM]], i64 0) 67 // CHECK-CXX-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) 68 // CHECK-CXX-NEXT: [[FDOT2_LANE:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half> 69 // CHECK-CXX-NEXT: [[FDOT2_LANE1:%.*]] = call <4 x half> @llvm.aarch64.neon.fp8.fdot2.lane.v4f16.v8i8(<4 x half> [[FDOT2_LANE]], <8 x i8> [[VN]], <16 x i8> [[TMP1]], i32 3) 70 // CHECK-CXX-NEXT: ret <4 x half> [[FDOT2_LANE1]] 71 // 72 float16x4_t test_vdot_lane_f16(float16x4_t vd, mfloat8x8_t vn, mfloat8x8_t vm, fpm_t fpmr) { 73 return vdot_lane_f16_mf8_fpm(vd, vn, vm, 3, fpmr); 74 } 75 76 // CHECK-LABEL: define dso_local <4 x half> @test_vdot_laneq_f16( 77 // CHECK-SAME: <4 x half> noundef [[VD:%.*]], <8 x i8> [[VN:%.*]], <16 x i8> [[VM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { 78 // CHECK-NEXT: [[ENTRY:.*:]] 79 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[VD]] to <8 x i8> 80 // CHECK-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) 81 // CHECK-NEXT: [[FDOT2_LANE:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half> 82 // CHECK-NEXT: [[FDOT2_LANE1:%.*]] = call <4 x half> @llvm.aarch64.neon.fp8.fdot2.lane.v4f16.v8i8(<4 x half> [[FDOT2_LANE]], <8 x i8> [[VN]], <16 x i8> [[VM]], i32 7) 83 // CHECK-NEXT: ret <4 x half> [[FDOT2_LANE1]] 84 // 85 // CHECK-CXX-LABEL: define dso_local noundef <4 x half> @_Z19test_vdot_laneq_f1613__Float16x4_t13__Mfloat8x8_t14__Mfloat8x16_tm( 86 // CHECK-CXX-SAME: <4 x half> noundef [[VD:%.*]], <8 x i8> [[VN:%.*]], <16 x i8> [[VM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { 87 // CHECK-CXX-NEXT: [[ENTRY:.*:]] 88 // CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[VD]] to <8 x i8> 89 // CHECK-CXX-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) 90 // CHECK-CXX-NEXT: [[FDOT2_LANE:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half> 91 // CHECK-CXX-NEXT: [[FDOT2_LANE1:%.*]] = call <4 x half> @llvm.aarch64.neon.fp8.fdot2.lane.v4f16.v8i8(<4 x half> [[FDOT2_LANE]], <8 x i8> [[VN]], <16 x i8> [[VM]], i32 7) 92 // CHECK-CXX-NEXT: ret <4 x half> [[FDOT2_LANE1]] 93 // 94 float16x4_t test_vdot_laneq_f16(float16x4_t vd, mfloat8x8_t vn, mfloat8x16_t vm, fpm_t fpmr) { 95 return vdot_laneq_f16_mf8_fpm(vd, vn, vm, 7, fpmr); 96 } 97 98 // CHECK-LABEL: define dso_local <8 x half> @test_vdotq_lane_f16( 99 // CHECK-SAME: <8 x half> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <8 x i8> [[VM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { 100 // CHECK-NEXT: [[ENTRY:.*:]] 101 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[VD]] to <16 x i8> 102 // CHECK-NEXT: [[TMP1:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> poison, <8 x i8> [[VM]], i64 0) 103 // CHECK-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) 104 // CHECK-NEXT: [[FDOT2_LANE:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half> 105 // CHECK-NEXT: [[FDOT2_LANE1:%.*]] = call <8 x half> @llvm.aarch64.neon.fp8.fdot2.lane.v8f16.v16i8(<8 x half> [[FDOT2_LANE]], <16 x i8> [[VN]], <16 x i8> [[TMP1]], i32 3) 106 // CHECK-NEXT: ret <8 x half> [[FDOT2_LANE1]] 107 // 108 // CHECK-CXX-LABEL: define dso_local noundef <8 x half> @_Z19test_vdotq_lane_f1613__Float16x8_t14__Mfloat8x16_t13__Mfloat8x8_tm( 109 // CHECK-CXX-SAME: <8 x half> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <8 x i8> [[VM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { 110 // CHECK-CXX-NEXT: [[ENTRY:.*:]] 111 // CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[VD]] to <16 x i8> 112 // CHECK-CXX-NEXT: [[TMP1:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> poison, <8 x i8> [[VM]], i64 0) 113 // CHECK-CXX-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) 114 // CHECK-CXX-NEXT: [[FDOT2_LANE:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half> 115 // CHECK-CXX-NEXT: [[FDOT2_LANE1:%.*]] = call <8 x half> @llvm.aarch64.neon.fp8.fdot2.lane.v8f16.v16i8(<8 x half> [[FDOT2_LANE]], <16 x i8> [[VN]], <16 x i8> [[TMP1]], i32 3) 116 // CHECK-CXX-NEXT: ret <8 x half> [[FDOT2_LANE1]] 117 // 118 float16x8_t test_vdotq_lane_f16(float16x8_t vd, mfloat8x16_t vn, mfloat8x8_t vm, fpm_t fpmr) { 119 return vdotq_lane_f16_mf8_fpm(vd, vn, vm, 3, fpmr); 120 } 121 122 // CHECK-LABEL: define dso_local <8 x half> @test_vdotq_laneq_f16( 123 // CHECK-SAME: <8 x half> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <16 x i8> [[VM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { 124 // CHECK-NEXT: [[ENTRY:.*:]] 125 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[VD]] to <16 x i8> 126 // CHECK-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) 127 // CHECK-NEXT: [[FDOT2_LANE:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half> 128 // CHECK-NEXT: [[FDOT2_LANE1:%.*]] = call <8 x half> @llvm.aarch64.neon.fp8.fdot2.lane.v8f16.v16i8(<8 x half> [[FDOT2_LANE]], <16 x i8> [[VN]], <16 x i8> [[VM]], i32 7) 129 // CHECK-NEXT: ret <8 x half> [[FDOT2_LANE1]] 130 // 131 // CHECK-CXX-LABEL: define dso_local noundef <8 x half> @_Z20test_vdotq_laneq_f1613__Float16x8_t14__Mfloat8x16_tS0_m( 132 // CHECK-CXX-SAME: <8 x half> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <16 x i8> [[VM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { 133 // CHECK-CXX-NEXT: [[ENTRY:.*:]] 134 // CHECK-CXX-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[VD]] to <16 x i8> 135 // CHECK-CXX-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) 136 // CHECK-CXX-NEXT: [[FDOT2_LANE:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half> 137 // CHECK-CXX-NEXT: [[FDOT2_LANE1:%.*]] = call <8 x half> @llvm.aarch64.neon.fp8.fdot2.lane.v8f16.v16i8(<8 x half> [[FDOT2_LANE]], <16 x i8> [[VN]], <16 x i8> [[VM]], i32 7) 138 // CHECK-CXX-NEXT: ret <8 x half> [[FDOT2_LANE1]] 139 // 140 float16x8_t test_vdotq_laneq_f16(float16x8_t vd, mfloat8x16_t vn, mfloat8x16_t vm, fpm_t fpmr) { 141 return vdotq_laneq_f16_mf8_fpm(vd, vn, vm, 7, fpmr); 142 } 143 144 // CHECK-LABEL: define dso_local <2 x float> @test_vdot_f32( 145 // CHECK-SAME: <2 x float> noundef [[VD:%.*]], <8 x i8> [[VN:%.*]], <8 x i8> [[VM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { 146 // CHECK-NEXT: [[ENTRY:.*:]] 147 // CHECK-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) 148 // CHECK-NEXT: [[FDOT4_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fp8.fdot4.v2f32.v8i8(<2 x float> [[VD]], <8 x i8> [[VN]], <8 x i8> [[VM]]) 149 // CHECK-NEXT: ret <2 x float> [[FDOT4_I]] 150 // 151 // CHECK-CXX-LABEL: define dso_local noundef <2 x float> @_Z13test_vdot_f3213__Float32x2_t13__Mfloat8x8_tS0_m( 152 // CHECK-CXX-SAME: <2 x float> noundef [[VD:%.*]], <8 x i8> [[VN:%.*]], <8 x i8> [[VM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { 153 // CHECK-CXX-NEXT: [[ENTRY:.*:]] 154 // CHECK-CXX-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) 155 // CHECK-CXX-NEXT: [[FDOT4_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fp8.fdot4.v2f32.v8i8(<2 x float> [[VD]], <8 x i8> [[VN]], <8 x i8> [[VM]]) 156 // CHECK-CXX-NEXT: ret <2 x float> [[FDOT4_I]] 157 // 158 float32x2_t test_vdot_f32(float32x2_t vd, mfloat8x8_t vn, mfloat8x8_t vm, fpm_t fpmr) { 159 return vdot_f32_mf8_fpm(vd, vn, vm, fpmr); 160 } 161 162 // CHECK-LABEL: define dso_local <4 x float> @test_vdotq_f32( 163 // CHECK-SAME: <4 x float> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <16 x i8> [[VM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { 164 // CHECK-NEXT: [[ENTRY:.*:]] 165 // CHECK-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) 166 // CHECK-NEXT: [[FDOT4_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fp8.fdot4.v4f32.v16i8(<4 x float> [[VD]], <16 x i8> [[VN]], <16 x i8> [[VM]]) 167 // CHECK-NEXT: ret <4 x float> [[FDOT4_I]] 168 // 169 // CHECK-CXX-LABEL: define dso_local noundef <4 x float> @_Z14test_vdotq_f3213__Float32x4_t14__Mfloat8x16_tS0_m( 170 // CHECK-CXX-SAME: <4 x float> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <16 x i8> [[VM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { 171 // CHECK-CXX-NEXT: [[ENTRY:.*:]] 172 // CHECK-CXX-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) 173 // CHECK-CXX-NEXT: [[FDOT4_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fp8.fdot4.v4f32.v16i8(<4 x float> [[VD]], <16 x i8> [[VN]], <16 x i8> [[VM]]) 174 // CHECK-CXX-NEXT: ret <4 x float> [[FDOT4_I]] 175 // 176 float32x4_t test_vdotq_f32(float32x4_t vd, mfloat8x16_t vn, mfloat8x16_t vm, fpm_t fpmr) { 177 return vdotq_f32_mf8_fpm(vd, vn, vm, fpmr); 178 } 179 180 // CHECK-LABEL: define dso_local <2 x float> @test_vdot_lane_f32( 181 // CHECK-SAME: <2 x float> noundef [[VD:%.*]], <8 x i8> [[VN:%.*]], <8 x i8> [[VM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { 182 // CHECK-NEXT: [[ENTRY:.*:]] 183 // CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> poison, <8 x i8> [[VM]], i64 0) 184 // CHECK-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) 185 // CHECK-NEXT: [[FDOT4_LANE:%.*]] = call <2 x float> @llvm.aarch64.neon.fp8.fdot4.lane.v2f32.v8i8(<2 x float> [[VD]], <8 x i8> [[VN]], <16 x i8> [[TMP0]], i32 1) 186 // CHECK-NEXT: ret <2 x float> [[FDOT4_LANE]] 187 // 188 // CHECK-CXX-LABEL: define dso_local noundef <2 x float> @_Z18test_vdot_lane_f3213__Float32x2_t13__Mfloat8x8_tS0_m( 189 // CHECK-CXX-SAME: <2 x float> noundef [[VD:%.*]], <8 x i8> [[VN:%.*]], <8 x i8> [[VM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { 190 // CHECK-CXX-NEXT: [[ENTRY:.*:]] 191 // CHECK-CXX-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> poison, <8 x i8> [[VM]], i64 0) 192 // CHECK-CXX-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) 193 // CHECK-CXX-NEXT: [[FDOT4_LANE:%.*]] = call <2 x float> @llvm.aarch64.neon.fp8.fdot4.lane.v2f32.v8i8(<2 x float> [[VD]], <8 x i8> [[VN]], <16 x i8> [[TMP0]], i32 1) 194 // CHECK-CXX-NEXT: ret <2 x float> [[FDOT4_LANE]] 195 // 196 float32x2_t test_vdot_lane_f32(float32x2_t vd, mfloat8x8_t vn, mfloat8x8_t vm, fpm_t fpmr) { 197 return vdot_lane_f32_mf8_fpm(vd, vn, vm, 1, fpmr); 198 } 199 200 // CHECK-LABEL: define dso_local <2 x float> @test_vdot_laneq_f32( 201 // CHECK-SAME: <2 x float> noundef [[VD:%.*]], <8 x i8> [[VN:%.*]], <16 x i8> [[VM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { 202 // CHECK-NEXT: [[ENTRY:.*:]] 203 // CHECK-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) 204 // CHECK-NEXT: [[FDOT4_LANE:%.*]] = call <2 x float> @llvm.aarch64.neon.fp8.fdot4.lane.v2f32.v8i8(<2 x float> [[VD]], <8 x i8> [[VN]], <16 x i8> [[VM]], i32 3) 205 // CHECK-NEXT: ret <2 x float> [[FDOT4_LANE]] 206 // 207 // CHECK-CXX-LABEL: define dso_local noundef <2 x float> @_Z19test_vdot_laneq_f3213__Float32x2_t13__Mfloat8x8_t14__Mfloat8x16_tm( 208 // CHECK-CXX-SAME: <2 x float> noundef [[VD:%.*]], <8 x i8> [[VN:%.*]], <16 x i8> [[VM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { 209 // CHECK-CXX-NEXT: [[ENTRY:.*:]] 210 // CHECK-CXX-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) 211 // CHECK-CXX-NEXT: [[FDOT4_LANE:%.*]] = call <2 x float> @llvm.aarch64.neon.fp8.fdot4.lane.v2f32.v8i8(<2 x float> [[VD]], <8 x i8> [[VN]], <16 x i8> [[VM]], i32 3) 212 // CHECK-CXX-NEXT: ret <2 x float> [[FDOT4_LANE]] 213 // 214 float32x2_t test_vdot_laneq_f32(float32x2_t vd, mfloat8x8_t vn, mfloat8x16_t vm, fpm_t fpmr) { 215 return vdot_laneq_f32_mf8_fpm(vd, vn, vm, 3, fpmr); 216 } 217 218 // CHECK-LABEL: define dso_local <4 x float> @test_vdotq_lane_f32( 219 // CHECK-SAME: <4 x float> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <8 x i8> [[VM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { 220 // CHECK-NEXT: [[ENTRY:.*:]] 221 // CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> poison, <8 x i8> [[VM]], i64 0) 222 // CHECK-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) 223 // CHECK-NEXT: [[FDOT4_LANE:%.*]] = call <4 x float> @llvm.aarch64.neon.fp8.fdot4.lane.v4f32.v16i8(<4 x float> [[VD]], <16 x i8> [[VN]], <16 x i8> [[TMP0]], i32 1) 224 // CHECK-NEXT: ret <4 x float> [[FDOT4_LANE]] 225 // 226 // CHECK-CXX-LABEL: define dso_local noundef <4 x float> @_Z19test_vdotq_lane_f3213__Float32x4_t14__Mfloat8x16_t13__Mfloat8x8_tm( 227 // CHECK-CXX-SAME: <4 x float> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <8 x i8> [[VM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { 228 // CHECK-CXX-NEXT: [[ENTRY:.*:]] 229 // CHECK-CXX-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> poison, <8 x i8> [[VM]], i64 0) 230 // CHECK-CXX-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) 231 // CHECK-CXX-NEXT: [[FDOT4_LANE:%.*]] = call <4 x float> @llvm.aarch64.neon.fp8.fdot4.lane.v4f32.v16i8(<4 x float> [[VD]], <16 x i8> [[VN]], <16 x i8> [[TMP0]], i32 1) 232 // CHECK-CXX-NEXT: ret <4 x float> [[FDOT4_LANE]] 233 // 234 float32x4_t test_vdotq_lane_f32(float32x4_t vd, mfloat8x16_t vn, mfloat8x8_t vm, fpm_t fpmr) { 235 return vdotq_lane_f32_mf8_fpm(vd, vn, vm, 1, fpmr); 236 } 237 238 // CHECK-LABEL: define dso_local <4 x float> @test_vdotq_laneq_f32( 239 // CHECK-SAME: <4 x float> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <16 x i8> [[VM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { 240 // CHECK-NEXT: [[ENTRY:.*:]] 241 // CHECK-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) 242 // CHECK-NEXT: [[FDOT4_LANE:%.*]] = call <4 x float> @llvm.aarch64.neon.fp8.fdot4.lane.v4f32.v16i8(<4 x float> [[VD]], <16 x i8> [[VN]], <16 x i8> [[VM]], i32 3) 243 // CHECK-NEXT: ret <4 x float> [[FDOT4_LANE]] 244 // 245 // CHECK-CXX-LABEL: define dso_local noundef <4 x float> @_Z20test_vdotq_laneq_f3213__Float32x4_t14__Mfloat8x16_tS0_m( 246 // CHECK-CXX-SAME: <4 x float> noundef [[VD:%.*]], <16 x i8> [[VN:%.*]], <16 x i8> [[VM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { 247 // CHECK-CXX-NEXT: [[ENTRY:.*:]] 248 // CHECK-CXX-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) 249 // CHECK-CXX-NEXT: [[FDOT4_LANE:%.*]] = call <4 x float> @llvm.aarch64.neon.fp8.fdot4.lane.v4f32.v16i8(<4 x float> [[VD]], <16 x i8> [[VN]], <16 x i8> [[VM]], i32 3) 250 // CHECK-CXX-NEXT: ret <4 x float> [[FDOT4_LANE]] 251 // 252 float32x4_t test_vdotq_laneq_f32(float32x4_t vd, mfloat8x16_t vn, mfloat8x16_t vm, fpm_t fpmr) { 253 return vdotq_laneq_f32_mf8_fpm(vd, vn, vm, 3, fpmr); 254 } 255