1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 2 // RUN: %clang_cc1 -triple aarch64 -target-feature +neon \ 3 // RUN: -target-feature +rcpc3 -disable-O0-optnone -emit-llvm -o - %s \ 4 // RUN: | opt -S -passes=mem2reg | FileCheck %s 5 6 // REQUIRES: aarch64-registered-target 7 8 #include <arm_neon.h> 9 10 11 // CHECK-LABEL: @test_vldap1q_lane_u64( 12 // CHECK-NEXT: entry: 13 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B:%.*]] to <16 x i8> 14 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 15 // CHECK-NEXT: [[TMP2:%.*]] = load atomic i64, ptr [[A:%.*]] acquire, align 8 16 // CHECK-NEXT: [[VLDAP1_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[TMP2]], i32 1 17 // CHECK-NEXT: ret <2 x i64> [[VLDAP1_LANE]] 18 // 19 uint64x2_t test_vldap1q_lane_u64(uint64_t *a, uint64x2_t b) { 20 return vldap1q_lane_u64(a, b, 1); 21 } 22 23 // CHECK-LABEL: @test_vldap1q_lane_s64( 24 // CHECK-NEXT: entry: 25 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B:%.*]] to <16 x i8> 26 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 27 // CHECK-NEXT: [[TMP2:%.*]] = load atomic i64, ptr [[A:%.*]] acquire, align 8 28 // CHECK-NEXT: [[VLDAP1_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[TMP2]], i32 1 29 // CHECK-NEXT: ret <2 x i64> [[VLDAP1_LANE]] 30 // 31 int64x2_t test_vldap1q_lane_s64(int64_t *a, int64x2_t b) { 32 return vldap1q_lane_s64(a, b, 1); 33 } 34 35 // CHECK-LABEL: @test_vldap1q_lane_f64( 36 // CHECK-NEXT: entry: 37 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[B:%.*]] to <16 x i8> 38 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 39 // CHECK-NEXT: [[TMP2:%.*]] = load atomic double, ptr [[A:%.*]] acquire, align 8 40 // CHECK-NEXT: [[VLDAP1_LANE:%.*]] = insertelement <2 x double> [[TMP1]], double [[TMP2]], i32 1 41 // CHECK-NEXT: ret <2 x double> [[VLDAP1_LANE]] 42 // 43 float64x2_t test_vldap1q_lane_f64(float64_t *a, float64x2_t b) { 44 return vldap1q_lane_f64(a, b, 1); 45 } 46 47 // CHECK-LABEL: @test_vldap1q_lane_p64( 48 // CHECK-NEXT: entry: 49 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B:%.*]] to <16 x i8> 50 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 51 // CHECK-NEXT: [[TMP2:%.*]] = load atomic i64, ptr [[A:%.*]] acquire, align 8 52 // CHECK-NEXT: [[VLDAP1_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[TMP2]], i32 1 53 // CHECK-NEXT: ret <2 x i64> [[VLDAP1_LANE]] 54 // 55 poly64x2_t test_vldap1q_lane_p64(poly64_t *a, poly64x2_t b) { 56 return vldap1q_lane_p64(a, b, 1); 57 } 58 59 // CHECK-LABEL: @test_vldap1_lane_u64( 60 // CHECK-NEXT: entry: 61 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B:%.*]] to <8 x i8> 62 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 63 // CHECK-NEXT: [[TMP2:%.*]] = load atomic i64, ptr [[A:%.*]] acquire, align 8 64 // CHECK-NEXT: [[VLDAP1_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 [[TMP2]], i32 0 65 // CHECK-NEXT: ret <1 x i64> [[VLDAP1_LANE]] 66 // 67 uint64x1_t test_vldap1_lane_u64(uint64_t *a, uint64x1_t b) { 68 return vldap1_lane_u64(a, b, 0); 69 } 70 71 // CHECK-LABEL: @test_vldap1_lane_s64( 72 // CHECK-NEXT: entry: 73 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B:%.*]] to <8 x i8> 74 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 75 // CHECK-NEXT: [[TMP2:%.*]] = load atomic i64, ptr [[A:%.*]] acquire, align 8 76 // CHECK-NEXT: [[VLDAP1_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 [[TMP2]], i32 0 77 // CHECK-NEXT: ret <1 x i64> [[VLDAP1_LANE]] 78 // 79 int64x1_t test_vldap1_lane_s64(int64_t *a, int64x1_t b) { 80 return vldap1_lane_s64(a, b, 0); 81 } 82 83 // CHECK-LABEL: @test_vldap1_lane_f64( 84 // CHECK-NEXT: entry: 85 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[B:%.*]] to <8 x i8> 86 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 87 // CHECK-NEXT: [[TMP2:%.*]] = load atomic double, ptr [[A:%.*]] acquire, align 8 88 // CHECK-NEXT: [[VLDAP1_LANE:%.*]] = insertelement <1 x double> [[TMP1]], double [[TMP2]], i32 0 89 // CHECK-NEXT: ret <1 x double> [[VLDAP1_LANE]] 90 // 91 float64x1_t test_vldap1_lane_f64(float64_t *a, float64x1_t b) { 92 return vldap1_lane_f64(a, b, 0); 93 } 94 95 // CHECK-LABEL: @test_vldap1_lane_p64( 96 // CHECK-NEXT: entry: 97 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B:%.*]] to <8 x i8> 98 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 99 // CHECK-NEXT: [[TMP2:%.*]] = load atomic i64, ptr [[A:%.*]] acquire, align 8 100 // CHECK-NEXT: [[VLDAP1_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 [[TMP2]], i32 0 101 // CHECK-NEXT: ret <1 x i64> [[VLDAP1_LANE]] 102 // 103 poly64x1_t test_vldap1_lane_p64(poly64_t *a, poly64x1_t b) { 104 return vldap1_lane_p64(a, b, 0); 105 } 106 107 // CHECK-LABEL: @test_vstl1q_lane_u64( 108 // CHECK-NEXT: entry: 109 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B:%.*]] to <16 x i8> 110 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 111 // CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 112 // CHECK-NEXT: store atomic i64 [[TMP2]], ptr [[A:%.*]] release, align 8 113 // CHECK-NEXT: ret void 114 // 115 void test_vstl1q_lane_u64(uint64_t *a, uint64x2_t b) { 116 vstl1q_lane_u64(a, b, 1); 117 } 118 119 // CHECK-LABEL: @test_vstl1q_lane_s64( 120 // CHECK-NEXT: entry: 121 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B:%.*]] to <16 x i8> 122 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 123 // CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 124 // CHECK-NEXT: store atomic i64 [[TMP2]], ptr [[A:%.*]] release, align 8 125 // CHECK-NEXT: ret void 126 // 127 void test_vstl1q_lane_s64(int64_t *a, int64x2_t b) { 128 vstl1q_lane_s64(a, b, 1); 129 } 130 131 // CHECK-LABEL: @test_vstl1q_lane_f64( 132 // CHECK-NEXT: entry: 133 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x double> [[B:%.*]] to <16 x i8> 134 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 135 // CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 1 136 // CHECK-NEXT: store atomic double [[TMP2]], ptr [[A:%.*]] release, align 8 137 // CHECK-NEXT: ret void 138 // 139 void test_vstl1q_lane_f64(float64_t *a, float64x2_t b) { 140 vstl1q_lane_f64(a, b, 1); 141 } 142 143 // CHECK-LABEL: @test_vstl1q_lane_p64( 144 // CHECK-NEXT: entry: 145 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[B:%.*]] to <16 x i8> 146 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 147 // CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 148 // CHECK-NEXT: store atomic i64 [[TMP2]], ptr [[A:%.*]] release, align 8 149 // CHECK-NEXT: ret void 150 // 151 void test_vstl1q_lane_p64(poly64_t *a, poly64x2_t b) { 152 vstl1q_lane_p64(a, b, 1); 153 } 154 155 // CHECK-LABEL: @test_vstl1_lane_u64( 156 // CHECK-NEXT: entry: 157 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B:%.*]] to <8 x i8> 158 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 159 // CHECK-NEXT: [[TMP2:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0 160 // CHECK-NEXT: store atomic i64 [[TMP2]], ptr [[A:%.*]] release, align 8 161 // CHECK-NEXT: ret void 162 // 163 void test_vstl1_lane_u64(uint64_t *a, uint64x1_t b) { 164 vstl1_lane_u64(a, b, 0); 165 } 166 167 // CHECK-LABEL: @test_vstl1_lane_s64( 168 // CHECK-NEXT: entry: 169 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B:%.*]] to <8 x i8> 170 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 171 // CHECK-NEXT: [[TMP2:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0 172 // CHECK-NEXT: store atomic i64 [[TMP2]], ptr [[A:%.*]] release, align 8 173 // CHECK-NEXT: ret void 174 // 175 void test_vstl1_lane_s64(int64_t *a, int64x1_t b) { 176 vstl1_lane_s64(a, b, 0); 177 } 178 179 // CHECK-LABEL: @test_vstl1_lane_f64( 180 // CHECK-NEXT: entry: 181 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x double> [[B:%.*]] to <8 x i8> 182 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 183 // CHECK-NEXT: [[TMP2:%.*]] = extractelement <1 x double> [[TMP1]], i32 0 184 // CHECK-NEXT: store atomic double [[TMP2]], ptr [[A:%.*]] release, align 8 185 // CHECK-NEXT: ret void 186 // 187 void test_vstl1_lane_f64(float64_t *a, float64x1_t b) { 188 vstl1_lane_f64(a, b, 0); 189 } 190 191 // CHECK-LABEL: @test_vstl1_lane_p64( 192 // CHECK-NEXT: entry: 193 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[B:%.*]] to <8 x i8> 194 // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 195 // CHECK-NEXT: [[TMP2:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0 196 // CHECK-NEXT: store atomic i64 [[TMP2]], ptr [[A:%.*]] release, align 8 197 // CHECK-NEXT: ret void 198 // 199 void test_vstl1_lane_p64(poly64_t *a, poly64x1_t b) { 200 vstl1_lane_p64(a, b, 0); 201 } 202