xref: /llvm-project/clang/test/CodeGen/AArch64/sve-vls-bitwise-ops.c (revision 98e747ba56b2f8b51a7c797a3f379d02c545c42b)
1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve \
3 // RUN: -disable-O0-optnone -mvscale-min=4 -mvscale-max=4 \
4 // RUN:  -emit-llvm -o - %s | opt -S -passes=sroa | FileCheck %s
5 
6 // REQUIRES: aarch64-registered-target
7 
8 #include <arm_sve.h>
9 
10 #define N 512
11 
12 typedef svint8_t fixed_int8_t __attribute__((arm_sve_vector_bits(N)));
13 typedef svint16_t fixed_int16_t __attribute__((arm_sve_vector_bits(N)));
14 typedef svint32_t fixed_int32_t __attribute__((arm_sve_vector_bits(N)));
15 typedef svint64_t fixed_int64_t __attribute__((arm_sve_vector_bits(N)));
16 
17 typedef svuint8_t fixed_uint8_t __attribute__((arm_sve_vector_bits(N)));
18 typedef svuint16_t fixed_uint16_t __attribute__((arm_sve_vector_bits(N)));
19 typedef svuint32_t fixed_uint32_t __attribute__((arm_sve_vector_bits(N)));
20 typedef svuint64_t fixed_uint64_t __attribute__((arm_sve_vector_bits(N)));
21 
22 typedef svfloat16_t fixed_float16_t __attribute__((arm_sve_vector_bits(N)));
23 typedef svfloat32_t fixed_float32_t __attribute__((arm_sve_vector_bits(N)));
24 typedef svfloat64_t fixed_float64_t __attribute__((arm_sve_vector_bits(N)));
25 
26 typedef svbool_t fixed_bool_t __attribute__((arm_sve_vector_bits(N)));
27 
28 // AND
29 
30 // CHECK-LABEL: @and_bool(
31 // CHECK-NEXT:  entry:
32 // CHECK-NEXT:    [[A_COERCE:%.*]] = bitcast <vscale x 16 x i1> [[TMP0:%.*]] to <vscale x 2 x i8>
33 // CHECK-NEXT:    [[A:%.*]] = call <8 x i8> @llvm.vector.extract.v8i8.nxv2i8(<vscale x 2 x i8> [[A_COERCE]], i64 0)
34 // CHECK-NEXT:    [[B_COERCE:%.*]] = bitcast <vscale x 16 x i1> [[TMP1:%.*]] to <vscale x 2 x i8>
35 // CHECK-NEXT:    [[B:%.*]] = call <8 x i8> @llvm.vector.extract.v8i8.nxv2i8(<vscale x 2 x i8> [[B_COERCE]], i64 0)
36 // CHECK-NEXT:    [[AND:%.*]] = and <8 x i8> [[A]], [[B]]
37 // CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v8i8(<vscale x 2 x i8> poison, <8 x i8> [[AND]], i64 0)
38 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <vscale x 2 x i8> [[CAST_SCALABLE]] to <vscale x 16 x i1>
39 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
40 //
41 fixed_bool_t and_bool(fixed_bool_t a, fixed_bool_t b) {
42   return a & b;
43 }
44 
45 // CHECK-LABEL: @and_i8(
46 // CHECK-NEXT:  entry:
47 // CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
48 // CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
49 // CHECK-NEXT:    [[AND:%.*]] = and <64 x i8> [[A]], [[B]]
50 // CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[AND]], i64 0)
51 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CAST_SCALABLE]]
52 //
53 fixed_int8_t and_i8(fixed_int8_t a, fixed_int8_t b) {
54   return a & b;
55 }
56 
57 // CHECK-LABEL: @and_i16(
58 // CHECK-NEXT:  entry:
59 // CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
60 // CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
61 // CHECK-NEXT:    [[AND:%.*]] = and <32 x i16> [[A]], [[B]]
62 // CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[AND]], i64 0)
63 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CAST_SCALABLE]]
64 //
65 fixed_int16_t and_i16(fixed_int16_t a, fixed_int16_t b) {
66   return a & b;
67 }
68 
69 // CHECK-LABEL: @and_i32(
70 // CHECK-NEXT:  entry:
71 // CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
72 // CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
73 // CHECK-NEXT:    [[AND:%.*]] = and <16 x i32> [[A]], [[B]]
74 // CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[AND]], i64 0)
75 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CAST_SCALABLE]]
76 //
77 fixed_int32_t and_i32(fixed_int32_t a, fixed_int32_t b) {
78   return a & b;
79 }
80 
81 // CHECK-LABEL: @and_i64(
82 // CHECK-NEXT:  entry:
83 // CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
84 // CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
85 // CHECK-NEXT:    [[AND:%.*]] = and <8 x i64> [[A]], [[B]]
86 // CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[AND]], i64 0)
87 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CAST_SCALABLE]]
88 //
89 fixed_int64_t and_i64(fixed_int64_t a, fixed_int64_t b) {
90   return a & b;
91 }
92 
93 // CHECK-LABEL: @and_u8(
94 // CHECK-NEXT:  entry:
95 // CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
96 // CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
97 // CHECK-NEXT:    [[AND:%.*]] = and <64 x i8> [[A]], [[B]]
98 // CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[AND]], i64 0)
99 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CAST_SCALABLE]]
100 //
101 fixed_uint8_t and_u8(fixed_uint8_t a, fixed_uint8_t b) {
102   return a & b;
103 }
104 
105 // CHECK-LABEL: @and_u16(
106 // CHECK-NEXT:  entry:
107 // CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
108 // CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
109 // CHECK-NEXT:    [[AND:%.*]] = and <32 x i16> [[A]], [[B]]
110 // CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[AND]], i64 0)
111 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CAST_SCALABLE]]
112 //
113 fixed_uint16_t and_u16(fixed_uint16_t a, fixed_uint16_t b) {
114   return a & b;
115 }
116 
117 // CHECK-LABEL: @and_u32(
118 // CHECK-NEXT:  entry:
119 // CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
120 // CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
121 // CHECK-NEXT:    [[AND:%.*]] = and <16 x i32> [[A]], [[B]]
122 // CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[AND]], i64 0)
123 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CAST_SCALABLE]]
124 //
125 fixed_uint32_t and_u32(fixed_uint32_t a, fixed_uint32_t b) {
126   return a & b;
127 }
128 
129 // CHECK-LABEL: @and_u64(
130 // CHECK-NEXT:  entry:
131 // CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
132 // CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
133 // CHECK-NEXT:    [[AND:%.*]] = and <8 x i64> [[A]], [[B]]
134 // CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[AND]], i64 0)
135 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CAST_SCALABLE]]
136 //
137 fixed_uint64_t and_u64(fixed_uint64_t a, fixed_uint64_t b) {
138   return a & b;
139 }
140 
141 // OR
142 
143 // CHECK-LABEL: @or_bool(
144 // CHECK-NEXT:  entry:
145 // CHECK-NEXT:    [[A_COERCE:%.*]] = bitcast <vscale x 16 x i1> [[TMP0:%.*]] to <vscale x 2 x i8>
146 // CHECK-NEXT:    [[A:%.*]] = call <8 x i8> @llvm.vector.extract.v8i8.nxv2i8(<vscale x 2 x i8> [[A_COERCE]], i64 0)
147 // CHECK-NEXT:    [[B_COERCE:%.*]] = bitcast <vscale x 16 x i1> [[TMP1:%.*]] to <vscale x 2 x i8>
148 // CHECK-NEXT:    [[B:%.*]] = call <8 x i8> @llvm.vector.extract.v8i8.nxv2i8(<vscale x 2 x i8> [[B_COERCE]], i64 0)
149 // CHECK-NEXT:    [[OR:%.*]] = or <8 x i8> [[A]], [[B]]
150 // CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v8i8(<vscale x 2 x i8> poison, <8 x i8> [[OR]], i64 0)
151 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <vscale x 2 x i8> [[CAST_SCALABLE]] to <vscale x 16 x i1>
152 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
153 //
154 fixed_bool_t or_bool(fixed_bool_t a, fixed_bool_t b) {
155   return a | b;
156 }
157 
158 // CHECK-LABEL: @or_i8(
159 // CHECK-NEXT:  entry:
160 // CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
161 // CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
162 // CHECK-NEXT:    [[OR:%.*]] = or <64 x i8> [[A]], [[B]]
163 // CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[OR]], i64 0)
164 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CAST_SCALABLE]]
165 //
166 fixed_int8_t or_i8(fixed_int8_t a, fixed_int8_t b) {
167   return a | b;
168 }
169 
170 // CHECK-LABEL: @or_i16(
171 // CHECK-NEXT:  entry:
172 // CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
173 // CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
174 // CHECK-NEXT:    [[OR:%.*]] = or <32 x i16> [[A]], [[B]]
175 // CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[OR]], i64 0)
176 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CAST_SCALABLE]]
177 //
178 fixed_int16_t or_i16(fixed_int16_t a, fixed_int16_t b) {
179   return a | b;
180 }
181 
182 // CHECK-LABEL: @or_i32(
183 // CHECK-NEXT:  entry:
184 // CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
185 // CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
186 // CHECK-NEXT:    [[OR:%.*]] = or <16 x i32> [[A]], [[B]]
187 // CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[OR]], i64 0)
188 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CAST_SCALABLE]]
189 //
190 fixed_int32_t or_i32(fixed_int32_t a, fixed_int32_t b) {
191   return a | b;
192 }
193 
194 // CHECK-LABEL: @or_i64(
195 // CHECK-NEXT:  entry:
196 // CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
197 // CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
198 // CHECK-NEXT:    [[OR:%.*]] = or <8 x i64> [[A]], [[B]]
199 // CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[OR]], i64 0)
200 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CAST_SCALABLE]]
201 //
202 fixed_int64_t or_i64(fixed_int64_t a, fixed_int64_t b) {
203   return a | b;
204 }
205 
206 // CHECK-LABEL: @or_u8(
207 // CHECK-NEXT:  entry:
208 // CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
209 // CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
210 // CHECK-NEXT:    [[OR:%.*]] = or <64 x i8> [[A]], [[B]]
211 // CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[OR]], i64 0)
212 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CAST_SCALABLE]]
213 //
214 fixed_uint8_t or_u8(fixed_uint8_t a, fixed_uint8_t b) {
215   return a | b;
216 }
217 
218 // CHECK-LABEL: @or_u16(
219 // CHECK-NEXT:  entry:
220 // CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
221 // CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
222 // CHECK-NEXT:    [[OR:%.*]] = or <32 x i16> [[A]], [[B]]
223 // CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[OR]], i64 0)
224 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CAST_SCALABLE]]
225 //
226 fixed_uint16_t or_u16(fixed_uint16_t a, fixed_uint16_t b) {
227   return a | b;
228 }
229 
230 // CHECK-LABEL: @or_u32(
231 // CHECK-NEXT:  entry:
232 // CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
233 // CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
234 // CHECK-NEXT:    [[OR:%.*]] = or <16 x i32> [[A]], [[B]]
235 // CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[OR]], i64 0)
236 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CAST_SCALABLE]]
237 //
238 fixed_uint32_t or_u32(fixed_uint32_t a, fixed_uint32_t b) {
239   return a | b;
240 }
241 
242 // CHECK-LABEL: @or_u64(
243 // CHECK-NEXT:  entry:
244 // CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
245 // CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
246 // CHECK-NEXT:    [[OR:%.*]] = or <8 x i64> [[A]], [[B]]
247 // CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[OR]], i64 0)
248 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CAST_SCALABLE]]
249 //
250 fixed_uint64_t or_u64(fixed_uint64_t a, fixed_uint64_t b) {
251   return a | b;
252 }
253 
254 // XOR
255 
256 // CHECK-LABEL: @xor_bool(
257 // CHECK-NEXT:  entry:
258 // CHECK-NEXT:    [[A_COERCE:%.*]] = bitcast <vscale x 16 x i1> [[TMP0:%.*]] to <vscale x 2 x i8>
259 // CHECK-NEXT:    [[A:%.*]] = call <8 x i8> @llvm.vector.extract.v8i8.nxv2i8(<vscale x 2 x i8> [[A_COERCE]], i64 0)
260 // CHECK-NEXT:    [[B_COERCE:%.*]] = bitcast <vscale x 16 x i1> [[TMP1:%.*]] to <vscale x 2 x i8>
261 // CHECK-NEXT:    [[B:%.*]] = call <8 x i8> @llvm.vector.extract.v8i8.nxv2i8(<vscale x 2 x i8> [[B_COERCE]], i64 0)
262 // CHECK-NEXT:    [[XOR:%.*]] = xor <8 x i8> [[A]], [[B]]
263 // CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v8i8(<vscale x 2 x i8> poison, <8 x i8> [[XOR]], i64 0)
264 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast <vscale x 2 x i8> [[CAST_SCALABLE]] to <vscale x 16 x i1>
265 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
266 //
267 fixed_bool_t xor_bool(fixed_bool_t a, fixed_bool_t b) {
268   return a ^ b;
269 }
270 
271 // CHECK-LABEL: @xor_i8(
272 // CHECK-NEXT:  entry:
273 // CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
274 // CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
275 // CHECK-NEXT:    [[XOR:%.*]] = xor <64 x i8> [[A]], [[B]]
276 // CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[XOR]], i64 0)
277 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CAST_SCALABLE]]
278 //
279 fixed_int8_t xor_i8(fixed_int8_t a, fixed_int8_t b) {
280   return a ^ b;
281 }
282 
283 // CHECK-LABEL: @xor_i16(
284 // CHECK-NEXT:  entry:
285 // CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
286 // CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
287 // CHECK-NEXT:    [[XOR:%.*]] = xor <32 x i16> [[A]], [[B]]
288 // CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[XOR]], i64 0)
289 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CAST_SCALABLE]]
290 //
291 fixed_int16_t xor_i16(fixed_int16_t a, fixed_int16_t b) {
292   return a ^ b;
293 }
294 
295 // CHECK-LABEL: @xor_i32(
296 // CHECK-NEXT:  entry:
297 // CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
298 // CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
299 // CHECK-NEXT:    [[XOR:%.*]] = xor <16 x i32> [[A]], [[B]]
300 // CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[XOR]], i64 0)
301 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CAST_SCALABLE]]
302 //
303 fixed_int32_t xor_i32(fixed_int32_t a, fixed_int32_t b) {
304   return a ^ b;
305 }
306 
307 // CHECK-LABEL: @xor_i64(
308 // CHECK-NEXT:  entry:
309 // CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
310 // CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
311 // CHECK-NEXT:    [[XOR:%.*]] = xor <8 x i64> [[A]], [[B]]
312 // CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[XOR]], i64 0)
313 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CAST_SCALABLE]]
314 //
315 fixed_int64_t xor_i64(fixed_int64_t a, fixed_int64_t b) {
316   return a ^ b;
317 }
318 
319 // CHECK-LABEL: @xor_u8(
320 // CHECK-NEXT:  entry:
321 // CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
322 // CHECK-NEXT:    [[B:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[B_COERCE:%.*]], i64 0)
323 // CHECK-NEXT:    [[XOR:%.*]] = xor <64 x i8> [[A]], [[B]]
324 // CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[XOR]], i64 0)
325 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CAST_SCALABLE]]
326 //
327 fixed_uint8_t xor_u8(fixed_uint8_t a, fixed_uint8_t b) {
328   return a ^ b;
329 }
330 
331 // CHECK-LABEL: @xor_u16(
332 // CHECK-NEXT:  entry:
333 // CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
334 // CHECK-NEXT:    [[B:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[B_COERCE:%.*]], i64 0)
335 // CHECK-NEXT:    [[XOR:%.*]] = xor <32 x i16> [[A]], [[B]]
336 // CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[XOR]], i64 0)
337 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CAST_SCALABLE]]
338 //
339 fixed_uint16_t xor_u16(fixed_uint16_t a, fixed_uint16_t b) {
340   return a ^ b;
341 }
342 
343 // CHECK-LABEL: @xor_u32(
344 // CHECK-NEXT:  entry:
345 // CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
346 // CHECK-NEXT:    [[B:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[B_COERCE:%.*]], i64 0)
347 // CHECK-NEXT:    [[XOR:%.*]] = xor <16 x i32> [[A]], [[B]]
348 // CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[XOR]], i64 0)
349 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CAST_SCALABLE]]
350 //
351 fixed_uint32_t xor_u32(fixed_uint32_t a, fixed_uint32_t b) {
352   return a ^ b;
353 }
354 
355 // CHECK-LABEL: @xor_u64(
356 // CHECK-NEXT:  entry:
357 // CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
358 // CHECK-NEXT:    [[B:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[B_COERCE:%.*]], i64 0)
359 // CHECK-NEXT:    [[XOR:%.*]] = xor <8 x i64> [[A]], [[B]]
360 // CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[XOR]], i64 0)
361 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CAST_SCALABLE]]
362 //
363 fixed_uint64_t xor_u64(fixed_uint64_t a, fixed_uint64_t b) {
364   return a ^ b;
365 }
366 
367 // NEG
368 
369 // CHECK-LABEL: @neg_bool(
370 // CHECK-NEXT:  entry:
371 // CHECK-NEXT:    [[A_COERCE:%.*]] = bitcast <vscale x 16 x i1> [[TMP0:%.*]] to <vscale x 2 x i8>
372 // CHECK-NEXT:    [[A:%.*]] = call <8 x i8> @llvm.vector.extract.v8i8.nxv2i8(<vscale x 2 x i8> [[A_COERCE]], i64 0)
373 // CHECK-NEXT:    [[NOT:%.*]] = xor <8 x i8> [[A]], splat (i8 -1)
374 // CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v8i8(<vscale x 2 x i8> poison, <8 x i8> [[NOT]], i64 0)
375 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <vscale x 2 x i8> [[CAST_SCALABLE]] to <vscale x 16 x i1>
376 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
377 //
378 fixed_bool_t neg_bool(fixed_bool_t a) {
379   return ~a;
380 }
381 
382 // CHECK-LABEL: @neg_i8(
383 // CHECK-NEXT:  entry:
384 // CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
385 // CHECK-NEXT:    [[NOT:%.*]] = xor <64 x i8> [[A]], splat (i8 -1)
386 // CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[NOT]], i64 0)
387 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CAST_SCALABLE]]
388 //
389 fixed_int8_t neg_i8(fixed_int8_t a) {
390   return ~a;
391 }
392 
393 // CHECK-LABEL: @neg_i16(
394 // CHECK-NEXT:  entry:
395 // CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
396 // CHECK-NEXT:    [[NOT:%.*]] = xor <32 x i16> [[A]], splat (i16 -1)
397 // CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[NOT]], i64 0)
398 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CAST_SCALABLE]]
399 //
400 fixed_int16_t neg_i16(fixed_int16_t a) {
401   return ~a;
402 }
403 
404 // CHECK-LABEL: @neg_i32(
405 // CHECK-NEXT:  entry:
406 // CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
407 // CHECK-NEXT:    [[NOT:%.*]] = xor <16 x i32> [[A]], splat (i32 -1)
408 // CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[NOT]], i64 0)
409 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CAST_SCALABLE]]
410 //
411 fixed_int32_t neg_i32(fixed_int32_t a) {
412   return ~a;
413 }
414 
415 // CHECK-LABEL: @neg_i64(
416 // CHECK-NEXT:  entry:
417 // CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
418 // CHECK-NEXT:    [[NOT:%.*]] = xor <8 x i64> [[A]], splat (i64 -1)
419 // CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[NOT]], i64 0)
420 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CAST_SCALABLE]]
421 //
422 fixed_int64_t neg_i64(fixed_int64_t a) {
423   return ~a;
424 }
425 
426 // CHECK-LABEL: @neg_u8(
427 // CHECK-NEXT:  entry:
428 // CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
429 // CHECK-NEXT:    [[NOT:%.*]] = xor <64 x i8> [[A]], splat (i8 -1)
430 // CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> poison, <64 x i8> [[NOT]], i64 0)
431 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CAST_SCALABLE]]
432 //
433 fixed_uint8_t neg_u8(fixed_uint8_t a) {
434   return ~a;
435 }
436 
437 // CHECK-LABEL: @neg_u16(
438 // CHECK-NEXT:  entry:
439 // CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
440 // CHECK-NEXT:    [[NOT:%.*]] = xor <32 x i16> [[A]], splat (i16 -1)
441 // CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> poison, <32 x i16> [[NOT]], i64 0)
442 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CAST_SCALABLE]]
443 //
444 fixed_uint16_t neg_u16(fixed_uint16_t a) {
445   return ~a;
446 }
447 
448 // CHECK-LABEL: @neg_u32(
449 // CHECK-NEXT:  entry:
450 // CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
451 // CHECK-NEXT:    [[NOT:%.*]] = xor <16 x i32> [[A]], splat (i32 -1)
452 // CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> poison, <16 x i32> [[NOT]], i64 0)
453 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CAST_SCALABLE]]
454 //
455 fixed_uint32_t neg_u32(fixed_uint32_t a) {
456   return ~a;
457 }
458 
459 // CHECK-LABEL: @neg_u64(
460 // CHECK-NEXT:  entry:
461 // CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
462 // CHECK-NEXT:    [[NOT:%.*]] = xor <8 x i64> [[A]], splat (i64 -1)
463 // CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> poison, <8 x i64> [[NOT]], i64 0)
464 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CAST_SCALABLE]]
465 //
466 fixed_uint64_t neg_u64(fixed_uint64_t a) {
467   return ~a;
468 }
469