1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 2 // RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon \ 3 // RUN: -target-feature +v8.1a -emit-llvm -disable-O0-optnone -o - %s | opt -passes=mem2reg,dce -S | FileCheck %s 4 5 // REQUIRES: aarch64-registered-target 6 7 #include <arm_neon.h> 8 9 // CHECK-LABEL: @test_vqrdmlah_laneq_s16( 10 // CHECK-NEXT: entry: 11 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 12 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 13 // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> <i32 7, i32 7, i32 7, i32 7> 14 // CHECK-NEXT: [[VQRDMLAH_V3_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmlah.v4i16(<4 x i16> [[A:%.*]], <4 x i16> [[B:%.*]], <4 x i16> [[LANE]]) 15 // CHECK-NEXT: ret <4 x i16> [[VQRDMLAH_V3_I]] 16 // 17 int16x4_t test_vqrdmlah_laneq_s16(int16x4_t a, int16x4_t b, int16x8_t v) { 18 return vqrdmlah_laneq_s16(a, b, v, 7); 19 } 20 21 // CHECK-LABEL: @test_vqrdmlah_laneq_s32( 22 // CHECK-NEXT: entry: 23 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 24 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 25 // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> <i32 3, i32 3> 26 // CHECK-NEXT: [[VQRDMLAH_V3_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmlah.v2i32(<2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]], <2 x i32> [[LANE]]) 27 // CHECK-NEXT: ret <2 x i32> [[VQRDMLAH_V3_I]] 28 // 29 int32x2_t test_vqrdmlah_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v) { 30 return vqrdmlah_laneq_s32(a, b, v, 3); 31 } 32 33 // CHECK-LABEL: @test_vqrdmlahq_laneq_s16( 34 // CHECK-NEXT: entry: 35 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 36 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 37 // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 38 // CHECK-NEXT: [[VQRDMLAHQ_V3_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmlah.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i16> [[LANE]]) 39 // CHECK-NEXT: ret <8 x i16> [[VQRDMLAHQ_V3_I]] 40 // 41 int16x8_t test_vqrdmlahq_laneq_s16(int16x8_t a, int16x8_t b, int16x8_t v) { 42 return vqrdmlahq_laneq_s16(a, b, v, 7); 43 } 44 45 // CHECK-LABEL: @test_vqrdmlahq_laneq_s32( 46 // CHECK-NEXT: entry: 47 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 48 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 49 // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3> 50 // CHECK-NEXT: [[VQRDMLAHQ_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmlah.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> [[LANE]]) 51 // CHECK-NEXT: ret <4 x i32> [[VQRDMLAHQ_V3_I]] 52 // 53 int32x4_t test_vqrdmlahq_laneq_s32(int32x4_t a, int32x4_t b, int32x4_t v) { 54 return vqrdmlahq_laneq_s32(a, b, v, 3); 55 } 56 57 // CHECK-LABEL: @test_vqrdmlahh_s16( 58 // CHECK-NEXT: entry: 59 // CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 [[A:%.*]], i64 0 60 // CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[B:%.*]], i64 0 61 // CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i16> poison, i16 [[C:%.*]], i64 0 62 // CHECK-NEXT: [[VQRDMLAHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmlah.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]], <4 x i16> [[TMP2]]) 63 // CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i16> [[VQRDMLAHH_S16_I]], i64 0 64 // CHECK-NEXT: ret i16 [[TMP3]] 65 // 66 int16_t test_vqrdmlahh_s16(int16_t a, int16_t b, int16_t c) { 67 return vqrdmlahh_s16(a, b, c); 68 } 69 70 // CHECK-LABEL: @test_vqrdmlahs_s32( 71 // CHECK-NEXT: entry: 72 // CHECK-NEXT: [[VQRDMLAHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmlah.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) 73 // CHECK-NEXT: ret i32 [[VQRDMLAHS_S32_I]] 74 // 75 int32_t test_vqrdmlahs_s32(int32_t a, int32_t b, int32_t c) { 76 return vqrdmlahs_s32(a, b, c); 77 } 78 79 // CHECK-LABEL: @test_vqrdmlahh_lane_s16( 80 // CHECK-NEXT: entry: 81 // CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[C:%.*]], i32 3 82 // CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 [[A:%.*]], i64 0 83 // CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[B:%.*]], i64 0 84 // CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i16> poison, i16 [[VGET_LANE]], i64 0 85 // CHECK-NEXT: [[VQRDMLAHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmlah.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]], <4 x i16> [[TMP2]]) 86 // CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i16> [[VQRDMLAHH_S16_I]], i64 0 87 // CHECK-NEXT: ret i16 [[TMP3]] 88 // 89 int16_t test_vqrdmlahh_lane_s16(int16_t a, int16_t b, int16x4_t c) { 90 return vqrdmlahh_lane_s16(a, b, c, 3); 91 } 92 93 // CHECK-LABEL: @test_vqrdmlahs_lane_s32( 94 // CHECK-NEXT: entry: 95 // CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[C:%.*]], i32 1 96 // CHECK-NEXT: [[VQRDMLAHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmlah.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[VGET_LANE]]) 97 // CHECK-NEXT: ret i32 [[VQRDMLAHS_S32_I]] 98 // 99 int32_t test_vqrdmlahs_lane_s32(int32_t a, int32_t b, int32x2_t c) { 100 return vqrdmlahs_lane_s32(a, b, c, 1); 101 } 102 103 // CHECK-LABEL: @test_vqrdmlahh_laneq_s16( 104 // CHECK-NEXT: entry: 105 // CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[C:%.*]], i32 7 106 // CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 [[A:%.*]], i64 0 107 // CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[B:%.*]], i64 0 108 // CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i16> poison, i16 [[VGETQ_LANE]], i64 0 109 // CHECK-NEXT: [[VQRDMLAHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmlah.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]], <4 x i16> [[TMP2]]) 110 // CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i16> [[VQRDMLAHH_S16_I]], i64 0 111 // CHECK-NEXT: ret i16 [[TMP3]] 112 // 113 int16_t test_vqrdmlahh_laneq_s16(int16_t a, int16_t b, int16x8_t c) { 114 return vqrdmlahh_laneq_s16(a, b, c, 7); 115 } 116 117 // CHECK-LABEL: @test_vqrdmlahs_laneq_s32( 118 // CHECK-NEXT: entry: 119 // CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 3 120 // CHECK-NEXT: [[VQRDMLAHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmlah.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[VGETQ_LANE]]) 121 // CHECK-NEXT: ret i32 [[VQRDMLAHS_S32_I]] 122 // 123 int32_t test_vqrdmlahs_laneq_s32(int32_t a, int32_t b, int32x4_t c) { 124 return vqrdmlahs_laneq_s32(a, b, c, 3); 125 } 126 127 // CHECK-LABEL: @test_vqrdmlsh_laneq_s16( 128 // CHECK-NEXT: entry: 129 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 130 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 131 // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <4 x i32> <i32 7, i32 7, i32 7, i32 7> 132 // CHECK-NEXT: [[VQRDMLSH_V3_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmlsh.v4i16(<4 x i16> [[A:%.*]], <4 x i16> [[B:%.*]], <4 x i16> [[LANE]]) 133 // CHECK-NEXT: ret <4 x i16> [[VQRDMLSH_V3_I]] 134 // 135 int16x4_t test_vqrdmlsh_laneq_s16(int16x4_t a, int16x4_t b, int16x8_t v) { 136 return vqrdmlsh_laneq_s16(a, b, v, 7); 137 } 138 139 // CHECK-LABEL: @test_vqrdmlsh_laneq_s32( 140 // CHECK-NEXT: entry: 141 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 142 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 143 // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <2 x i32> <i32 3, i32 3> 144 // CHECK-NEXT: [[VQRDMLSH_V3_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmlsh.v2i32(<2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]], <2 x i32> [[LANE]]) 145 // CHECK-NEXT: ret <2 x i32> [[VQRDMLSH_V3_I]] 146 // 147 int32x2_t test_vqrdmlsh_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v) { 148 return vqrdmlsh_laneq_s32(a, b, v, 3); 149 } 150 151 // CHECK-LABEL: @test_vqrdmlshq_laneq_s16( 152 // CHECK-NEXT: entry: 153 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[V:%.*]] to <16 x i8> 154 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 155 // CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP1]], <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 156 // CHECK-NEXT: [[VQRDMLSHQ_V3_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmlsh.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i16> [[LANE]]) 157 // CHECK-NEXT: ret <8 x i16> [[VQRDMLSHQ_V3_I]] 158 // 159 int16x8_t test_vqrdmlshq_laneq_s16(int16x8_t a, int16x8_t b, int16x8_t v) { 160 return vqrdmlshq_laneq_s16(a, b, v, 7); 161 } 162 163 // CHECK-LABEL: @test_vqrdmlshq_laneq_s32( 164 // CHECK-NEXT: entry: 165 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> 166 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 167 // CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3> 168 // CHECK-NEXT: [[VQRDMLSHQ_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmlsh.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> [[LANE]]) 169 // CHECK-NEXT: ret <4 x i32> [[VQRDMLSHQ_V3_I]] 170 // 171 int32x4_t test_vqrdmlshq_laneq_s32(int32x4_t a, int32x4_t b, int32x4_t v) { 172 return vqrdmlshq_laneq_s32(a, b, v, 3); 173 } 174 175 // CHECK-LABEL: @test_vqrdmlshh_s16( 176 // CHECK-NEXT: entry: 177 // CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 [[A:%.*]], i64 0 178 // CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[B:%.*]], i64 0 179 // CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i16> poison, i16 [[C:%.*]], i64 0 180 // CHECK-NEXT: [[VQRDMLSHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmlsh.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]], <4 x i16> [[TMP2]]) 181 // CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i16> [[VQRDMLSHH_S16_I]], i64 0 182 // CHECK-NEXT: ret i16 [[TMP3]] 183 // 184 int16_t test_vqrdmlshh_s16(int16_t a, int16_t b, int16_t c) { 185 return vqrdmlshh_s16(a, b, c); 186 } 187 188 // CHECK-LABEL: @test_vqrdmlshs_s32( 189 // CHECK-NEXT: entry: 190 // CHECK-NEXT: [[VQRDMLSHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmlsh.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) 191 // CHECK-NEXT: ret i32 [[VQRDMLSHS_S32_I]] 192 // 193 int32_t test_vqrdmlshs_s32(int32_t a, int32_t b, int32_t c) { 194 return vqrdmlshs_s32(a, b, c); 195 } 196 197 // CHECK-LABEL: @test_vqrdmlshh_lane_s16( 198 // CHECK-NEXT: entry: 199 // CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[C:%.*]], i32 3 200 // CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 [[A:%.*]], i64 0 201 // CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[B:%.*]], i64 0 202 // CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i16> poison, i16 [[VGET_LANE]], i64 0 203 // CHECK-NEXT: [[VQRDMLSHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmlsh.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]], <4 x i16> [[TMP2]]) 204 // CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i16> [[VQRDMLSHH_S16_I]], i64 0 205 // CHECK-NEXT: ret i16 [[TMP3]] 206 // 207 int16_t test_vqrdmlshh_lane_s16(int16_t a, int16_t b, int16x4_t c) { 208 return vqrdmlshh_lane_s16(a, b, c, 3); 209 } 210 211 // CHECK-LABEL: @test_vqrdmlshs_lane_s32( 212 // CHECK-NEXT: entry: 213 // CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[C:%.*]], i32 1 214 // CHECK-NEXT: [[VQRDMLSHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmlsh.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[VGET_LANE]]) 215 // CHECK-NEXT: ret i32 [[VQRDMLSHS_S32_I]] 216 // 217 int32_t test_vqrdmlshs_lane_s32(int32_t a, int32_t b, int32x2_t c) { 218 return vqrdmlshs_lane_s32(a, b, c, 1); 219 } 220 221 // CHECK-LABEL: @test_vqrdmlshh_laneq_s16( 222 // CHECK-NEXT: entry: 223 // CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[C:%.*]], i32 7 224 // CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 [[A:%.*]], i64 0 225 // CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[B:%.*]], i64 0 226 // CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i16> poison, i16 [[VGETQ_LANE]], i64 0 227 // CHECK-NEXT: [[VQRDMLSHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmlsh.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]], <4 x i16> [[TMP2]]) 228 // CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i16> [[VQRDMLSHH_S16_I]], i64 0 229 // CHECK-NEXT: ret i16 [[TMP3]] 230 // 231 int16_t test_vqrdmlshh_laneq_s16(int16_t a, int16_t b, int16x8_t c) { 232 return vqrdmlshh_laneq_s16(a, b, c, 7); 233 } 234 235 // CHECK-LABEL: @test_vqrdmlshs_laneq_s32( 236 // CHECK-NEXT: entry: 237 // CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 3 238 // CHECK-NEXT: [[VQRDMLSHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmlsh.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 [[VGETQ_LANE]]) 239 // CHECK-NEXT: ret i32 [[VQRDMLSHS_S32_I]] 240 // 241 int32_t test_vqrdmlshs_laneq_s32(int32_t a, int32_t b, int32x4_t c) { 242 return vqrdmlshs_laneq_s32(a, b, c, 3); 243 } 244