1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s --check-prefixes=CHECK,SVE 3; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s --check-prefixes=CHECK,SVE2 4 5; 6; CLASTA (Vectors) 7; 8 9define <vscale x 16 x i8> @clasta_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 10; CHECK-LABEL: clasta_i8: 11; CHECK: // %bb.0: 12; CHECK-NEXT: clasta z0.b, p0, z0.b, z1.b 13; CHECK-NEXT: ret 14 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.clasta.nxv16i8(<vscale x 16 x i1> %pg, 15 <vscale x 16 x i8> %a, 16 <vscale x 16 x i8> %b) 17 ret <vscale x 16 x i8> %out 18} 19 20define <vscale x 8 x i16> @clasta_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 21; CHECK-LABEL: clasta_i16: 22; CHECK: // %bb.0: 23; CHECK-NEXT: clasta z0.h, p0, z0.h, z1.h 24; CHECK-NEXT: ret 25 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.clasta.nxv8i16(<vscale x 8 x i1> %pg, 26 <vscale x 8 x i16> %a, 27 <vscale x 8 x i16> %b) 28 ret <vscale x 8 x i16> %out 29} 30 31define <vscale x 4 x i32> @clasta_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 32; CHECK-LABEL: clasta_i32: 33; CHECK: // %bb.0: 34; CHECK-NEXT: clasta z0.s, p0, z0.s, z1.s 35; CHECK-NEXT: ret 36 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.clasta.nxv4i32(<vscale x 4 x i1> %pg, 37 <vscale x 4 x i32> %a, 38 <vscale x 4 x i32> %b) 39 ret <vscale x 4 x i32> %out 40} 41 42define <vscale x 2 x i64> @clasta_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 43; CHECK-LABEL: clasta_i64: 44; CHECK: // %bb.0: 45; CHECK-NEXT: clasta z0.d, p0, z0.d, z1.d 46; CHECK-NEXT: ret 47 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.clasta.nxv2i64(<vscale x 2 x i1> %pg, 48 <vscale x 2 x i64> %a, 49 <vscale x 2 x i64> %b) 50 ret <vscale x 2 x i64> %out 51} 52 53define <vscale x 8 x half> @clasta_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) { 54; CHECK-LABEL: clasta_f16: 55; CHECK: // %bb.0: 56; CHECK-NEXT: clasta z0.h, p0, z0.h, z1.h 57; CHECK-NEXT: ret 58 %out = call <vscale x 8 x half> @llvm.aarch64.sve.clasta.nxv8f16(<vscale x 8 x i1> %pg, 59 <vscale x 8 x half> %a, 60 <vscale x 8 x half> %b) 61 ret <vscale x 8 x half> %out 62} 63 64define <vscale x 8 x bfloat> @clasta_bf16(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 { 65; CHECK-LABEL: clasta_bf16: 66; CHECK: // %bb.0: 67; CHECK-NEXT: clasta z0.h, p0, z0.h, z1.h 68; CHECK-NEXT: ret 69 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.clasta.nxv8bf16(<vscale x 8 x i1> %pg, 70 <vscale x 8 x bfloat> %a, 71 <vscale x 8 x bfloat> %b) 72 ret <vscale x 8 x bfloat> %out 73} 74 75define <vscale x 4 x float> @clasta_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) { 76; CHECK-LABEL: clasta_f32: 77; CHECK: // %bb.0: 78; CHECK-NEXT: clasta z0.s, p0, z0.s, z1.s 79; CHECK-NEXT: ret 80 %out = call <vscale x 4 x float> @llvm.aarch64.sve.clasta.nxv4f32(<vscale x 4 x i1> %pg, 81 <vscale x 4 x float> %a, 82 <vscale x 4 x float> %b) 83 ret <vscale x 4 x float> %out 84} 85 86define <vscale x 2 x double> @clasta_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) { 87; CHECK-LABEL: clasta_f64: 88; CHECK: // %bb.0: 89; CHECK-NEXT: clasta z0.d, p0, z0.d, z1.d 90; CHECK-NEXT: ret 91 %out = call <vscale x 2 x double> @llvm.aarch64.sve.clasta.nxv2f64(<vscale x 2 x i1> %pg, 92 <vscale x 2 x double> %a, 93 <vscale x 2 x double> %b) 94 ret <vscale x 2 x double> %out 95} 96 97; 98; CLASTA (Scalar) 99; 100 101define i8 @clasta_n_i8(<vscale x 16 x i1> %pg, i8 %a, <vscale x 16 x i8> %b) { 102; CHECK-LABEL: clasta_n_i8: 103; CHECK: // %bb.0: 104; CHECK-NEXT: clasta w0, p0, w0, z0.b 105; CHECK-NEXT: ret 106 %out = call i8 @llvm.aarch64.sve.clasta.n.nxv16i8(<vscale x 16 x i1> %pg, 107 i8 %a, 108 <vscale x 16 x i8> %b) 109 ret i8 %out 110} 111 112define i16 @clasta_n_i16(<vscale x 8 x i1> %pg, i16 %a, <vscale x 8 x i16> %b) { 113; CHECK-LABEL: clasta_n_i16: 114; CHECK: // %bb.0: 115; CHECK-NEXT: clasta w0, p0, w0, z0.h 116; CHECK-NEXT: ret 117 %out = call i16 @llvm.aarch64.sve.clasta.n.nxv8i16(<vscale x 8 x i1> %pg, 118 i16 %a, 119 <vscale x 8 x i16> %b) 120 ret i16 %out 121} 122 123define i32 @clasta_n_i32(<vscale x 4 x i1> %pg, i32 %a, <vscale x 4 x i32> %b) { 124; CHECK-LABEL: clasta_n_i32: 125; CHECK: // %bb.0: 126; CHECK-NEXT: clasta w0, p0, w0, z0.s 127; CHECK-NEXT: ret 128 %out = call i32 @llvm.aarch64.sve.clasta.n.nxv4i32(<vscale x 4 x i1> %pg, 129 i32 %a, 130 <vscale x 4 x i32> %b) 131 ret i32 %out 132} 133 134define i64 @clasta_n_i64(<vscale x 2 x i1> %pg, i64 %a, <vscale x 2 x i64> %b) { 135; CHECK-LABEL: clasta_n_i64: 136; CHECK: // %bb.0: 137; CHECK-NEXT: clasta x0, p0, x0, z0.d 138; CHECK-NEXT: ret 139 %out = call i64 @llvm.aarch64.sve.clasta.n.nxv2i64(<vscale x 2 x i1> %pg, 140 i64 %a, 141 <vscale x 2 x i64> %b) 142 ret i64 %out 143} 144 145define half @clasta_n_f16(<vscale x 8 x i1> %pg, half %a, <vscale x 8 x half> %b) { 146; CHECK-LABEL: clasta_n_f16: 147; CHECK: // %bb.0: 148; CHECK-NEXT: clasta h0, p0, h0, z1.h 149; CHECK-NEXT: ret 150 %out = call half @llvm.aarch64.sve.clasta.n.nxv8f16(<vscale x 8 x i1> %pg, 151 half %a, 152 <vscale x 8 x half> %b) 153 ret half %out 154} 155 156define bfloat @clasta_n_bf16(<vscale x 8 x i1> %pg, bfloat %a, <vscale x 8 x bfloat> %b) #0 { 157; CHECK-LABEL: clasta_n_bf16: 158; CHECK: // %bb.0: 159; CHECK-NEXT: clasta h0, p0, h0, z1.h 160; CHECK-NEXT: ret 161 %out = call bfloat @llvm.aarch64.sve.clasta.n.nxv8bf16(<vscale x 8 x i1> %pg, 162 bfloat %a, 163 <vscale x 8 x bfloat> %b) 164 ret bfloat %out 165} 166 167define float @clasta_n_f32(<vscale x 4 x i1> %pg, float %a, <vscale x 4 x float> %b) { 168; CHECK-LABEL: clasta_n_f32: 169; CHECK: // %bb.0: 170; CHECK-NEXT: clasta s0, p0, s0, z1.s 171; CHECK-NEXT: ret 172 %out = call float @llvm.aarch64.sve.clasta.n.nxv4f32(<vscale x 4 x i1> %pg, 173 float %a, 174 <vscale x 4 x float> %b) 175 ret float %out 176} 177 178define double @clasta_n_f64(<vscale x 2 x i1> %pg, double %a, <vscale x 2 x double> %b) { 179; CHECK-LABEL: clasta_n_f64: 180; CHECK: // %bb.0: 181; CHECK-NEXT: clasta d0, p0, d0, z1.d 182; CHECK-NEXT: ret 183 %out = call double @llvm.aarch64.sve.clasta.n.nxv2f64(<vscale x 2 x i1> %pg, 184 double %a, 185 <vscale x 2 x double> %b) 186 ret double %out 187} 188 189; 190; CLASTB (Vectors) 191; 192 193define <vscale x 16 x i8> @clastb_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 194; CHECK-LABEL: clastb_i8: 195; CHECK: // %bb.0: 196; CHECK-NEXT: clastb z0.b, p0, z0.b, z1.b 197; CHECK-NEXT: ret 198 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.clastb.nxv16i8(<vscale x 16 x i1> %pg, 199 <vscale x 16 x i8> %a, 200 <vscale x 16 x i8> %b) 201 ret <vscale x 16 x i8> %out 202} 203 204define <vscale x 8 x i16> @clastb_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 205; CHECK-LABEL: clastb_i16: 206; CHECK: // %bb.0: 207; CHECK-NEXT: clastb z0.h, p0, z0.h, z1.h 208; CHECK-NEXT: ret 209 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.clastb.nxv8i16(<vscale x 8 x i1> %pg, 210 <vscale x 8 x i16> %a, 211 <vscale x 8 x i16> %b) 212 ret <vscale x 8 x i16> %out 213} 214 215define <vscale x 4 x i32> @clastb_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 216; CHECK-LABEL: clastb_i32: 217; CHECK: // %bb.0: 218; CHECK-NEXT: clastb z0.s, p0, z0.s, z1.s 219; CHECK-NEXT: ret 220 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.clastb.nxv4i32(<vscale x 4 x i1> %pg, 221 <vscale x 4 x i32> %a, 222 <vscale x 4 x i32> %b) 223 ret <vscale x 4 x i32> %out 224} 225 226define <vscale x 2 x i64> @clastb_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 227; CHECK-LABEL: clastb_i64: 228; CHECK: // %bb.0: 229; CHECK-NEXT: clastb z0.d, p0, z0.d, z1.d 230; CHECK-NEXT: ret 231 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.clastb.nxv2i64(<vscale x 2 x i1> %pg, 232 <vscale x 2 x i64> %a, 233 <vscale x 2 x i64> %b) 234 ret <vscale x 2 x i64> %out 235} 236 237define <vscale x 8 x half> @clastb_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) { 238; CHECK-LABEL: clastb_f16: 239; CHECK: // %bb.0: 240; CHECK-NEXT: clastb z0.h, p0, z0.h, z1.h 241; CHECK-NEXT: ret 242 %out = call <vscale x 8 x half> @llvm.aarch64.sve.clastb.nxv8f16(<vscale x 8 x i1> %pg, 243 <vscale x 8 x half> %a, 244 <vscale x 8 x half> %b) 245 ret <vscale x 8 x half> %out 246} 247 248define <vscale x 8 x bfloat> @clastb_bf16(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 { 249; CHECK-LABEL: clastb_bf16: 250; CHECK: // %bb.0: 251; CHECK-NEXT: clastb z0.h, p0, z0.h, z1.h 252; CHECK-NEXT: ret 253 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.clastb.nxv8bf16(<vscale x 8 x i1> %pg, 254 <vscale x 8 x bfloat> %a, 255 <vscale x 8 x bfloat> %b) 256 ret <vscale x 8 x bfloat> %out 257} 258 259define <vscale x 4 x float> @clastb_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) { 260; CHECK-LABEL: clastb_f32: 261; CHECK: // %bb.0: 262; CHECK-NEXT: clastb z0.s, p0, z0.s, z1.s 263; CHECK-NEXT: ret 264 %out = call <vscale x 4 x float> @llvm.aarch64.sve.clastb.nxv4f32(<vscale x 4 x i1> %pg, 265 <vscale x 4 x float> %a, 266 <vscale x 4 x float> %b) 267 ret <vscale x 4 x float> %out 268} 269 270define <vscale x 2 x double> @clastb_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) { 271; CHECK-LABEL: clastb_f64: 272; CHECK: // %bb.0: 273; CHECK-NEXT: clastb z0.d, p0, z0.d, z1.d 274; CHECK-NEXT: ret 275 %out = call <vscale x 2 x double> @llvm.aarch64.sve.clastb.nxv2f64(<vscale x 2 x i1> %pg, 276 <vscale x 2 x double> %a, 277 <vscale x 2 x double> %b) 278 ret <vscale x 2 x double> %out 279} 280 281; 282; CLASTB (Scalar) 283; 284 285define i8 @clastb_n_i8(<vscale x 16 x i1> %pg, i8 %a, <vscale x 16 x i8> %b) { 286; CHECK-LABEL: clastb_n_i8: 287; CHECK: // %bb.0: 288; CHECK-NEXT: clastb w0, p0, w0, z0.b 289; CHECK-NEXT: ret 290 %out = call i8 @llvm.aarch64.sve.clastb.n.nxv16i8(<vscale x 16 x i1> %pg, 291 i8 %a, 292 <vscale x 16 x i8> %b) 293 ret i8 %out 294} 295 296define i16 @clastb_n_i16(<vscale x 8 x i1> %pg, i16 %a, <vscale x 8 x i16> %b) { 297; CHECK-LABEL: clastb_n_i16: 298; CHECK: // %bb.0: 299; CHECK-NEXT: clastb w0, p0, w0, z0.h 300; CHECK-NEXT: ret 301 %out = call i16 @llvm.aarch64.sve.clastb.n.nxv8i16(<vscale x 8 x i1> %pg, 302 i16 %a, 303 <vscale x 8 x i16> %b) 304 ret i16 %out 305} 306 307define i32 @clastb_n_i32(<vscale x 4 x i1> %pg, i32 %a, <vscale x 4 x i32> %b) { 308; CHECK-LABEL: clastb_n_i32: 309; CHECK: // %bb.0: 310; CHECK-NEXT: clastb w0, p0, w0, z0.s 311; CHECK-NEXT: ret 312 %out = call i32 @llvm.aarch64.sve.clastb.n.nxv4i32(<vscale x 4 x i1> %pg, 313 i32 %a, 314 <vscale x 4 x i32> %b) 315 ret i32 %out 316} 317 318define i64 @clastb_n_i64(<vscale x 2 x i1> %pg, i64 %a, <vscale x 2 x i64> %b) { 319; CHECK-LABEL: clastb_n_i64: 320; CHECK: // %bb.0: 321; CHECK-NEXT: clastb x0, p0, x0, z0.d 322; CHECK-NEXT: ret 323 %out = call i64 @llvm.aarch64.sve.clastb.n.nxv2i64(<vscale x 2 x i1> %pg, 324 i64 %a, 325 <vscale x 2 x i64> %b) 326 ret i64 %out 327} 328 329define half @clastb_n_f16(<vscale x 8 x i1> %pg, half %a, <vscale x 8 x half> %b) { 330; CHECK-LABEL: clastb_n_f16: 331; CHECK: // %bb.0: 332; CHECK-NEXT: clastb h0, p0, h0, z1.h 333; CHECK-NEXT: ret 334 %out = call half @llvm.aarch64.sve.clastb.n.nxv8f16(<vscale x 8 x i1> %pg, 335 half %a, 336 <vscale x 8 x half> %b) 337 ret half %out 338} 339 340define bfloat @clastb_n_bf16(<vscale x 8 x i1> %pg, bfloat %a, <vscale x 8 x bfloat> %b) #0 { 341; CHECK-LABEL: clastb_n_bf16: 342; CHECK: // %bb.0: 343; CHECK-NEXT: clastb h0, p0, h0, z1.h 344; CHECK-NEXT: ret 345 %out = call bfloat @llvm.aarch64.sve.clastb.n.nxv8bf16(<vscale x 8 x i1> %pg, 346 bfloat %a, 347 <vscale x 8 x bfloat> %b) 348 ret bfloat %out 349} 350 351define float @clastb_n_f32(<vscale x 4 x i1> %pg, float %a, <vscale x 4 x float> %b) { 352; CHECK-LABEL: clastb_n_f32: 353; CHECK: // %bb.0: 354; CHECK-NEXT: clastb s0, p0, s0, z1.s 355; CHECK-NEXT: ret 356 %out = call float @llvm.aarch64.sve.clastb.n.nxv4f32(<vscale x 4 x i1> %pg, 357 float %a, 358 <vscale x 4 x float> %b) 359 ret float %out 360} 361 362define double @clastb_n_f64(<vscale x 2 x i1> %pg, double %a, <vscale x 2 x double> %b) { 363; CHECK-LABEL: clastb_n_f64: 364; CHECK: // %bb.0: 365; CHECK-NEXT: clastb d0, p0, d0, z1.d 366; CHECK-NEXT: ret 367 %out = call double @llvm.aarch64.sve.clastb.n.nxv2f64(<vscale x 2 x i1> %pg, 368 double %a, 369 <vscale x 2 x double> %b) 370 ret double %out 371} 372 373; 374; DUPQ 375; 376 377define <vscale x 16 x i8> @dupq_i8(<vscale x 16 x i8> %a) { 378; CHECK-LABEL: dupq_i8: 379; CHECK: // %bb.0: 380; CHECK-NEXT: mov z0.q, q0 381; CHECK-NEXT: ret 382 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.dupq.lane.nxv16i8(<vscale x 16 x i8> %a, i64 0) 383 ret <vscale x 16 x i8> %out 384} 385 386define <vscale x 8 x i16> @dupq_i16(<vscale x 8 x i16> %a) { 387; CHECK-LABEL: dupq_i16: 388; CHECK: // %bb.0: 389; CHECK-NEXT: mov z0.q, z0.q[1] 390; CHECK-NEXT: ret 391 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.dupq.lane.nxv8i16(<vscale x 8 x i16> %a, i64 1) 392 ret <vscale x 8 x i16> %out 393} 394 395define <vscale x 4 x i32> @dupq_i32(<vscale x 4 x i32> %a) { 396; CHECK-LABEL: dupq_i32: 397; CHECK: // %bb.0: 398; CHECK-NEXT: mov z0.q, z0.q[2] 399; CHECK-NEXT: ret 400 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.dupq.lane.nxv4i32(<vscale x 4 x i32> %a, i64 2) 401 ret <vscale x 4 x i32> %out 402} 403 404define <vscale x 2 x i64> @dupq_i64(<vscale x 2 x i64> %a) { 405; CHECK-LABEL: dupq_i64: 406; CHECK: // %bb.0: 407; CHECK-NEXT: mov z0.q, z0.q[3] 408; CHECK-NEXT: ret 409 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> %a, i64 3) 410 ret <vscale x 2 x i64> %out 411} 412 413define <vscale x 8 x half> @dupq_f16(<vscale x 8 x half> %a) { 414; CHECK-LABEL: dupq_f16: 415; CHECK: // %bb.0: 416; CHECK-NEXT: mov z0.q, q0 417; CHECK-NEXT: ret 418 %out = call <vscale x 8 x half> @llvm.aarch64.sve.dupq.lane.nxv8f16(<vscale x 8 x half> %a, i64 0) 419 ret <vscale x 8 x half> %out 420} 421 422define <vscale x 8 x bfloat> @dupq_bf16(<vscale x 8 x bfloat> %a) #0 { 423; CHECK-LABEL: dupq_bf16: 424; CHECK: // %bb.0: 425; CHECK-NEXT: mov z0.q, q0 426; CHECK-NEXT: ret 427 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dupq.lane.nxv8bf16(<vscale x 8 x bfloat> %a, i64 0) 428 ret <vscale x 8 x bfloat> %out 429} 430 431define <vscale x 4 x float> @dupq_f32(<vscale x 4 x float> %a) { 432; CHECK-LABEL: dupq_f32: 433; CHECK: // %bb.0: 434; CHECK-NEXT: mov z0.q, z0.q[1] 435; CHECK-NEXT: ret 436 %out = call <vscale x 4 x float> @llvm.aarch64.sve.dupq.lane.nxv4f32(<vscale x 4 x float> %a, i64 1) 437 ret <vscale x 4 x float> %out 438} 439 440define <vscale x 2 x double> @dupq_f64(<vscale x 2 x double> %a) { 441; CHECK-LABEL: dupq_f64: 442; CHECK: // %bb.0: 443; CHECK-NEXT: mov z0.q, z0.q[2] 444; CHECK-NEXT: ret 445 %out = call <vscale x 2 x double> @llvm.aarch64.sve.dupq.lane.nxv2f64(<vscale x 2 x double> %a, i64 2) 446 ret <vscale x 2 x double> %out 447} 448 449; 450; DUPQ_LANE 451; 452 453define <vscale x 16 x i8> @dupq_lane_i8(<vscale x 16 x i8> %a, i64 %idx) { 454; CHECK-LABEL: dupq_lane_i8: 455; CHECK: // %bb.0: 456; CHECK-NEXT: index z1.d, #0, #1 457; CHECK-NEXT: add x8, x0, x0 458; CHECK-NEXT: mov z2.d, x8 459; CHECK-NEXT: and z1.d, z1.d, #0x1 460; CHECK-NEXT: add z1.d, z1.d, z2.d 461; CHECK-NEXT: tbl z0.d, { z0.d }, z1.d 462; CHECK-NEXT: ret 463 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.dupq.lane.nxv16i8(<vscale x 16 x i8> %a, i64 %idx) 464 ret <vscale x 16 x i8> %out 465} 466 467; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant). 468define <vscale x 8 x i16> @dupq_lane_i16(<vscale x 8 x i16> %a, i64 %idx) { 469; CHECK-LABEL: dupq_lane_i16: 470; CHECK: // %bb.0: 471; CHECK-NEXT: index z1.d, #0, #1 472; CHECK-NEXT: add x8, x0, x0 473; CHECK-NEXT: mov z2.d, x8 474; CHECK-NEXT: and z1.d, z1.d, #0x1 475; CHECK-NEXT: add z1.d, z1.d, z2.d 476; CHECK-NEXT: tbl z0.d, { z0.d }, z1.d 477; CHECK-NEXT: ret 478 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.dupq.lane.nxv8i16(<vscale x 8 x i16> %a, i64 %idx) 479 ret <vscale x 8 x i16> %out 480} 481 482; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant). 483define <vscale x 4 x i32> @dupq_lane_i32(<vscale x 4 x i32> %a, i64 %idx) { 484; CHECK-LABEL: dupq_lane_i32: 485; CHECK: // %bb.0: 486; CHECK-NEXT: index z1.d, #0, #1 487; CHECK-NEXT: add x8, x0, x0 488; CHECK-NEXT: mov z2.d, x8 489; CHECK-NEXT: and z1.d, z1.d, #0x1 490; CHECK-NEXT: add z1.d, z1.d, z2.d 491; CHECK-NEXT: tbl z0.d, { z0.d }, z1.d 492; CHECK-NEXT: ret 493 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.dupq.lane.nxv4i32(<vscale x 4 x i32> %a, i64 %idx) 494 ret <vscale x 4 x i32> %out 495} 496 497; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant). 498define <vscale x 2 x i64> @dupq_lane_i64(<vscale x 2 x i64> %a, i64 %idx) { 499; CHECK-LABEL: dupq_lane_i64: 500; CHECK: // %bb.0: 501; CHECK-NEXT: index z1.d, #0, #1 502; CHECK-NEXT: add x8, x0, x0 503; CHECK-NEXT: mov z2.d, x8 504; CHECK-NEXT: and z1.d, z1.d, #0x1 505; CHECK-NEXT: add z1.d, z1.d, z2.d 506; CHECK-NEXT: tbl z0.d, { z0.d }, z1.d 507; CHECK-NEXT: ret 508 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> %a, i64 %idx) 509 ret <vscale x 2 x i64> %out 510} 511 512; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant). 513define <vscale x 8 x half> @dupq_lane_f16(<vscale x 8 x half> %a, i64 %idx) { 514; CHECK-LABEL: dupq_lane_f16: 515; CHECK: // %bb.0: 516; CHECK-NEXT: index z1.d, #0, #1 517; CHECK-NEXT: add x8, x0, x0 518; CHECK-NEXT: mov z2.d, x8 519; CHECK-NEXT: and z1.d, z1.d, #0x1 520; CHECK-NEXT: add z1.d, z1.d, z2.d 521; CHECK-NEXT: tbl z0.d, { z0.d }, z1.d 522; CHECK-NEXT: ret 523 %out = call <vscale x 8 x half> @llvm.aarch64.sve.dupq.lane.nxv8f16(<vscale x 8 x half> %a, i64 %idx) 524 ret <vscale x 8 x half> %out 525} 526 527; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant). 528define <vscale x 8 x bfloat> @dupq_lane_bf16(<vscale x 8 x bfloat> %a, i64 %idx) #0 { 529; CHECK-LABEL: dupq_lane_bf16: 530; CHECK: // %bb.0: 531; CHECK-NEXT: index z1.d, #0, #1 532; CHECK-NEXT: add x8, x0, x0 533; CHECK-NEXT: mov z2.d, x8 534; CHECK-NEXT: and z1.d, z1.d, #0x1 535; CHECK-NEXT: add z1.d, z1.d, z2.d 536; CHECK-NEXT: tbl z0.d, { z0.d }, z1.d 537; CHECK-NEXT: ret 538 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dupq.lane.nxv8bf16(<vscale x 8 x bfloat> %a, i64 %idx) 539 ret <vscale x 8 x bfloat> %out 540} 541 542; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant). 543define <vscale x 4 x float> @dupq_lane_f32(<vscale x 4 x float> %a, i64 %idx) { 544; CHECK-LABEL: dupq_lane_f32: 545; CHECK: // %bb.0: 546; CHECK-NEXT: index z1.d, #0, #1 547; CHECK-NEXT: add x8, x0, x0 548; CHECK-NEXT: mov z2.d, x8 549; CHECK-NEXT: and z1.d, z1.d, #0x1 550; CHECK-NEXT: add z1.d, z1.d, z2.d 551; CHECK-NEXT: tbl z0.d, { z0.d }, z1.d 552; CHECK-NEXT: ret 553 %out = call <vscale x 4 x float> @llvm.aarch64.sve.dupq.lane.nxv4f32(<vscale x 4 x float> %a, i64 %idx) 554 ret <vscale x 4 x float> %out 555} 556 557; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant). 558define <vscale x 2 x double> @dupq_lane_f64(<vscale x 2 x double> %a, i64 %idx) { 559; CHECK-LABEL: dupq_lane_f64: 560; CHECK: // %bb.0: 561; CHECK-NEXT: index z1.d, #0, #1 562; CHECK-NEXT: add x8, x0, x0 563; CHECK-NEXT: mov z2.d, x8 564; CHECK-NEXT: and z1.d, z1.d, #0x1 565; CHECK-NEXT: add z1.d, z1.d, z2.d 566; CHECK-NEXT: tbl z0.d, { z0.d }, z1.d 567; CHECK-NEXT: ret 568 %out = call <vscale x 2 x double> @llvm.aarch64.sve.dupq.lane.nxv2f64(<vscale x 2 x double> %a, i64 %idx) 569 ret <vscale x 2 x double> %out 570} 571 572; NOTE: Index out of range (0-3) 573define <vscale x 2 x i64> @dupq_i64_range(<vscale x 2 x i64> %a) { 574; SVE-LABEL: dupq_i64_range: 575; SVE: // %bb.0: 576; SVE-NEXT: index z1.d, #0, #1 577; SVE-NEXT: and z1.d, z1.d, #0x1 578; SVE-NEXT: orr z1.d, z1.d, #0x8 579; SVE-NEXT: tbl z0.d, { z0.d }, z1.d 580; SVE-NEXT: ret 581; 582; SVE2-LABEL: dupq_i64_range: 583; SVE2: // %bb.0: 584; SVE2-NEXT: index z1.d, #0, #1 585; SVE2-NEXT: and z1.d, z1.d, #0x1 586; SVE2-NEXT: add z1.d, z1.d, #8 // =0x8 587; SVE2-NEXT: tbl z0.d, { z0.d }, z1.d 588; SVE2-NEXT: ret 589 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> %a, i64 4) 590 ret <vscale x 2 x i64> %out 591} 592; 593; EXT 594; 595 596define dso_local <vscale x 4 x float> @dupq_f32_repeat_complex(float %x, float %y) { 597; CHECK-LABEL: dupq_f32_repeat_complex: 598; CHECK: // %bb.0: 599; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0 600; CHECK-NEXT: // kill: def $s1 killed $s1 def $q1 601; CHECK-NEXT: mov v0.s[1], v1.s[0] 602; CHECK-NEXT: mov z0.d, d0 603; CHECK-NEXT: ret 604 %1 = insertelement <4 x float> undef, float %x, i64 0 605 %2 = insertelement <4 x float> %1, float %y, i64 1 606 %3 = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> undef, <4 x float> %2, i64 0) 607 %4 = bitcast <vscale x 4 x float> %3 to <vscale x 2 x double> 608 %5 = shufflevector <vscale x 2 x double> %4, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer 609 %6 = bitcast <vscale x 2 x double> %5 to <vscale x 4 x float> 610 ret <vscale x 4 x float> %6 611} 612 613define dso_local <vscale x 8 x half> @dupq_f16_repeat_complex(half %x, half %y) { 614; CHECK-LABEL: dupq_f16_repeat_complex: 615; CHECK: // %bb.0: 616; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 617; CHECK-NEXT: // kill: def $h1 killed $h1 def $q1 618; CHECK-NEXT: mov v0.h[1], v1.h[0] 619; CHECK-NEXT: mov z0.s, s0 620; CHECK-NEXT: ret 621 %1 = insertelement <8 x half> undef, half %x, i64 0 622 %2 = insertelement <8 x half> %1, half %y, i64 1 623 %3 = call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v8f16(<vscale x 8 x half> undef, <8 x half> %2, i64 0) 624 %4 = bitcast <vscale x 8 x half> %3 to <vscale x 4 x float> 625 %5 = shufflevector <vscale x 4 x float> %4, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer 626 %6 = bitcast <vscale x 4 x float> %5 to <vscale x 8 x half> 627 ret <vscale x 8 x half> %6 628} 629 630define <vscale x 16 x i8> @ext_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 631; SVE-LABEL: ext_i8: 632; SVE: // %bb.0: 633; SVE-NEXT: ext z0.b, z0.b, z1.b, #255 634; SVE-NEXT: ret 635; 636; SVE2-LABEL: ext_i8: 637; SVE2: // %bb.0: 638; SVE2-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 639; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 640; SVE2-NEXT: ext z0.b, { z0.b, z1.b }, #255 641; SVE2-NEXT: ret 642 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.ext.nxv16i8(<vscale x 16 x i8> %a, 643 <vscale x 16 x i8> %b, 644 i32 255) 645 ret <vscale x 16 x i8> %out 646} 647 648define <vscale x 8 x i16> @ext_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 649; SVE-LABEL: ext_i16: 650; SVE: // %bb.0: 651; SVE-NEXT: ext z0.b, z0.b, z1.b, #0 652; SVE-NEXT: ret 653; 654; SVE2-LABEL: ext_i16: 655; SVE2: // %bb.0: 656; SVE2-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 657; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 658; SVE2-NEXT: ext z0.b, { z0.b, z1.b }, #0 659; SVE2-NEXT: ret 660 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.ext.nxv8i16(<vscale x 8 x i16> %a, 661 <vscale x 8 x i16> %b, 662 i32 0) 663 ret <vscale x 8 x i16> %out 664} 665 666define <vscale x 4 x i32> @ext_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 667; SVE-LABEL: ext_i32: 668; SVE: // %bb.0: 669; SVE-NEXT: ext z0.b, z0.b, z1.b, #4 670; SVE-NEXT: ret 671; 672; SVE2-LABEL: ext_i32: 673; SVE2: // %bb.0: 674; SVE2-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 675; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 676; SVE2-NEXT: ext z0.b, { z0.b, z1.b }, #4 677; SVE2-NEXT: ret 678 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.ext.nxv4i32(<vscale x 4 x i32> %a, 679 <vscale x 4 x i32> %b, 680 i32 1) 681 ret <vscale x 4 x i32> %out 682} 683 684define <vscale x 2 x i64> @ext_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 685; SVE-LABEL: ext_i64: 686; SVE: // %bb.0: 687; SVE-NEXT: ext z0.b, z0.b, z1.b, #16 688; SVE-NEXT: ret 689; 690; SVE2-LABEL: ext_i64: 691; SVE2: // %bb.0: 692; SVE2-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 693; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 694; SVE2-NEXT: ext z0.b, { z0.b, z1.b }, #16 695; SVE2-NEXT: ret 696 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.ext.nxv2i64(<vscale x 2 x i64> %a, 697 <vscale x 2 x i64> %b, 698 i32 2) 699 ret <vscale x 2 x i64> %out 700} 701 702define <vscale x 8 x bfloat> @ext_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 { 703; SVE-LABEL: ext_bf16: 704; SVE: // %bb.0: 705; SVE-NEXT: ext z0.b, z0.b, z1.b, #6 706; SVE-NEXT: ret 707; 708; SVE2-LABEL: ext_bf16: 709; SVE2: // %bb.0: 710; SVE2-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 711; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 712; SVE2-NEXT: ext z0.b, { z0.b, z1.b }, #6 713; SVE2-NEXT: ret 714 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.ext.nxv8bf16(<vscale x 8 x bfloat> %a, 715 <vscale x 8 x bfloat> %b, 716 i32 3) 717 ret <vscale x 8 x bfloat> %out 718} 719 720define <vscale x 8 x half> @ext_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) { 721; SVE-LABEL: ext_f16: 722; SVE: // %bb.0: 723; SVE-NEXT: ext z0.b, z0.b, z1.b, #6 724; SVE-NEXT: ret 725; 726; SVE2-LABEL: ext_f16: 727; SVE2: // %bb.0: 728; SVE2-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 729; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 730; SVE2-NEXT: ext z0.b, { z0.b, z1.b }, #6 731; SVE2-NEXT: ret 732 %out = call <vscale x 8 x half> @llvm.aarch64.sve.ext.nxv8f16(<vscale x 8 x half> %a, 733 <vscale x 8 x half> %b, 734 i32 3) 735 ret <vscale x 8 x half> %out 736} 737 738define <vscale x 4 x float> @ext_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) { 739; SVE-LABEL: ext_f32: 740; SVE: // %bb.0: 741; SVE-NEXT: ext z0.b, z0.b, z1.b, #16 742; SVE-NEXT: ret 743; 744; SVE2-LABEL: ext_f32: 745; SVE2: // %bb.0: 746; SVE2-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 747; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 748; SVE2-NEXT: ext z0.b, { z0.b, z1.b }, #16 749; SVE2-NEXT: ret 750 %out = call <vscale x 4 x float> @llvm.aarch64.sve.ext.nxv4f32(<vscale x 4 x float> %a, 751 <vscale x 4 x float> %b, 752 i32 4) 753 ret <vscale x 4 x float> %out 754} 755 756define <vscale x 2 x double> @ext_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) { 757; SVE-LABEL: ext_f64: 758; SVE: // %bb.0: 759; SVE-NEXT: ext z0.b, z0.b, z1.b, #40 760; SVE-NEXT: ret 761; 762; SVE2-LABEL: ext_f64: 763; SVE2: // %bb.0: 764; SVE2-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 765; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 766; SVE2-NEXT: ext z0.b, { z0.b, z1.b }, #40 767; SVE2-NEXT: ret 768 %out = call <vscale x 2 x double> @llvm.aarch64.sve.ext.nxv2f64(<vscale x 2 x double> %a, 769 <vscale x 2 x double> %b, 770 i32 5) 771 ret <vscale x 2 x double> %out 772} 773 774; 775; LASTA 776; 777 778define i8 @lasta_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) { 779; CHECK-LABEL: lasta_i8: 780; CHECK: // %bb.0: 781; CHECK-NEXT: lasta w0, p0, z0.b 782; CHECK-NEXT: ret 783 %res = call i8 @llvm.aarch64.sve.lasta.nxv16i8(<vscale x 16 x i1> %pg, 784 <vscale x 16 x i8> %a) 785 ret i8 %res 786} 787 788define i16 @lasta_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) { 789; CHECK-LABEL: lasta_i16: 790; CHECK: // %bb.0: 791; CHECK-NEXT: lasta w0, p0, z0.h 792; CHECK-NEXT: ret 793 %res = call i16 @llvm.aarch64.sve.lasta.nxv8i16(<vscale x 8 x i1> %pg, 794 <vscale x 8 x i16> %a) 795 ret i16 %res 796} 797 798define i32 @lasta_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) { 799; CHECK-LABEL: lasta_i32: 800; CHECK: // %bb.0: 801; CHECK-NEXT: lasta w0, p0, z0.s 802; CHECK-NEXT: ret 803 %res = call i32 @llvm.aarch64.sve.lasta.nxv4i32(<vscale x 4 x i1> %pg, 804 <vscale x 4 x i32> %a) 805 ret i32 %res 806} 807 808define i64 @lasta_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) { 809; CHECK-LABEL: lasta_i64: 810; CHECK: // %bb.0: 811; CHECK-NEXT: lasta x0, p0, z0.d 812; CHECK-NEXT: ret 813 %res = call i64 @llvm.aarch64.sve.lasta.nxv2i64(<vscale x 2 x i1> %pg, 814 <vscale x 2 x i64> %a) 815 ret i64 %res 816} 817 818define half @lasta_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) { 819; CHECK-LABEL: lasta_f16: 820; CHECK: // %bb.0: 821; CHECK-NEXT: lasta h0, p0, z0.h 822; CHECK-NEXT: ret 823 %res = call half @llvm.aarch64.sve.lasta.nxv8f16(<vscale x 8 x i1> %pg, 824 <vscale x 8 x half> %a) 825 ret half %res 826} 827 828define bfloat @lasta_bf16(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %a) #0 { 829; CHECK-LABEL: lasta_bf16: 830; CHECK: // %bb.0: 831; CHECK-NEXT: lasta h0, p0, z0.h 832; CHECK-NEXT: ret 833 %res = call bfloat @llvm.aarch64.sve.lasta.nxv8bf16(<vscale x 8 x i1> %pg, 834 <vscale x 8 x bfloat> %a) 835 ret bfloat %res 836} 837 838define float @lasta_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) { 839; CHECK-LABEL: lasta_f32: 840; CHECK: // %bb.0: 841; CHECK-NEXT: lasta s0, p0, z0.s 842; CHECK-NEXT: ret 843 %res = call float @llvm.aarch64.sve.lasta.nxv4f32(<vscale x 4 x i1> %pg, 844 <vscale x 4 x float> %a) 845 ret float %res 846} 847 848define float @lasta_f32_v2(<vscale x 2 x i1> %pg, <vscale x 2 x float> %a) { 849; CHECK-LABEL: lasta_f32_v2: 850; CHECK: // %bb.0: 851; CHECK-NEXT: lasta s0, p0, z0.s 852; CHECK-NEXT: ret 853 %res = call float @llvm.aarch64.sve.lasta.nxv2f32(<vscale x 2 x i1> %pg, 854 <vscale x 2 x float> %a) 855 ret float %res 856} 857 858define double @lasta_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) { 859; CHECK-LABEL: lasta_f64: 860; CHECK: // %bb.0: 861; CHECK-NEXT: lasta d0, p0, z0.d 862; CHECK-NEXT: ret 863 %res = call double @llvm.aarch64.sve.lasta.nxv2f64(<vscale x 2 x i1> %pg, 864 <vscale x 2 x double> %a) 865 ret double %res 866} 867 868; 869; LASTB 870; 871 872define i8 @lastb_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) { 873; CHECK-LABEL: lastb_i8: 874; CHECK: // %bb.0: 875; CHECK-NEXT: lastb w0, p0, z0.b 876; CHECK-NEXT: ret 877 %res = call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> %pg, 878 <vscale x 16 x i8> %a) 879 ret i8 %res 880} 881 882define i16 @lastb_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) { 883; CHECK-LABEL: lastb_i16: 884; CHECK: // %bb.0: 885; CHECK-NEXT: lastb w0, p0, z0.h 886; CHECK-NEXT: ret 887 %res = call i16 @llvm.aarch64.sve.lastb.nxv8i16(<vscale x 8 x i1> %pg, 888 <vscale x 8 x i16> %a) 889 ret i16 %res 890} 891 892define i32 @lastb_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) { 893; CHECK-LABEL: lastb_i32: 894; CHECK: // %bb.0: 895; CHECK-NEXT: lastb w0, p0, z0.s 896; CHECK-NEXT: ret 897 %res = call i32 @llvm.aarch64.sve.lastb.nxv4i32(<vscale x 4 x i1> %pg, 898 <vscale x 4 x i32> %a) 899 ret i32 %res 900} 901 902define i64 @lastb_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) { 903; CHECK-LABEL: lastb_i64: 904; CHECK: // %bb.0: 905; CHECK-NEXT: lastb x0, p0, z0.d 906; CHECK-NEXT: ret 907 %res = call i64 @llvm.aarch64.sve.lastb.nxv2i64(<vscale x 2 x i1> %pg, 908 <vscale x 2 x i64> %a) 909 ret i64 %res 910} 911 912define half @lastb_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) { 913; CHECK-LABEL: lastb_f16: 914; CHECK: // %bb.0: 915; CHECK-NEXT: lastb h0, p0, z0.h 916; CHECK-NEXT: ret 917 %res = call half @llvm.aarch64.sve.lastb.nxv8f16(<vscale x 8 x i1> %pg, 918 <vscale x 8 x half> %a) 919 ret half %res 920} 921 922define bfloat @lastb_bf16(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %a) #0 { 923; CHECK-LABEL: lastb_bf16: 924; CHECK: // %bb.0: 925; CHECK-NEXT: lastb h0, p0, z0.h 926; CHECK-NEXT: ret 927 %res = call bfloat @llvm.aarch64.sve.lastb.nxv8bf16(<vscale x 8 x i1> %pg, 928 <vscale x 8 x bfloat> %a) 929 ret bfloat %res 930} 931 932define float @lastb_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) { 933; CHECK-LABEL: lastb_f32: 934; CHECK: // %bb.0: 935; CHECK-NEXT: lastb s0, p0, z0.s 936; CHECK-NEXT: ret 937 %res = call float @llvm.aarch64.sve.lastb.nxv4f32(<vscale x 4 x i1> %pg, 938 <vscale x 4 x float> %a) 939 ret float %res 940} 941 942define float @lastb_f32_v2(<vscale x 2 x i1> %pg, <vscale x 2 x float> %a) { 943; CHECK-LABEL: lastb_f32_v2: 944; CHECK: // %bb.0: 945; CHECK-NEXT: lastb s0, p0, z0.s 946; CHECK-NEXT: ret 947 %res = call float @llvm.aarch64.sve.lastb.nxv2f32(<vscale x 2 x i1> %pg, 948 <vscale x 2 x float> %a) 949 ret float %res 950} 951 952define double @lastb_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) { 953; CHECK-LABEL: lastb_f64: 954; CHECK: // %bb.0: 955; CHECK-NEXT: lastb d0, p0, z0.d 956; CHECK-NEXT: ret 957 %res = call double @llvm.aarch64.sve.lastb.nxv2f64(<vscale x 2 x i1> %pg, 958 <vscale x 2 x double> %a) 959 ret double %res 960} 961 962; 963; COMPACT 964; 965 966define <vscale x 4 x i32> @compact_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) { 967; CHECK-LABEL: compact_i32: 968; CHECK: // %bb.0: 969; CHECK-NEXT: compact z0.s, p0, z0.s 970; CHECK-NEXT: ret 971 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.compact.nxv4i32(<vscale x 4 x i1> %pg, 972 <vscale x 4 x i32> %a) 973 ret <vscale x 4 x i32> %out 974} 975 976define <vscale x 2 x i64> @compact_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) { 977; CHECK-LABEL: compact_i64: 978; CHECK: // %bb.0: 979; CHECK-NEXT: compact z0.d, p0, z0.d 980; CHECK-NEXT: ret 981 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.compact.nxv2i64(<vscale x 2 x i1> %pg, 982 <vscale x 2 x i64> %a) 983 ret <vscale x 2 x i64> %out 984} 985 986define <vscale x 4 x float> @compact_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) { 987; CHECK-LABEL: compact_f32: 988; CHECK: // %bb.0: 989; CHECK-NEXT: compact z0.s, p0, z0.s 990; CHECK-NEXT: ret 991 %out = call <vscale x 4 x float> @llvm.aarch64.sve.compact.nxv4f32(<vscale x 4 x i1> %pg, 992 <vscale x 4 x float> %a) 993 ret <vscale x 4 x float> %out 994} 995 996define <vscale x 2 x double> @compact_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) { 997; CHECK-LABEL: compact_f64: 998; CHECK: // %bb.0: 999; CHECK-NEXT: compact z0.d, p0, z0.d 1000; CHECK-NEXT: ret 1001 %out = call <vscale x 2 x double> @llvm.aarch64.sve.compact.nxv2f64(<vscale x 2 x i1> %pg, 1002 <vscale x 2 x double> %a) 1003 ret <vscale x 2 x double> %out 1004} 1005 1006; 1007; REV 1008; 1009 1010define <vscale x 16 x i1> @rev_nxv16i1(<vscale x 16 x i1> %a) { 1011; CHECK-LABEL: rev_nxv16i1: 1012; CHECK: // %bb.0: 1013; CHECK-NEXT: rev p0.b, p0.b 1014; CHECK-NEXT: ret 1015 %res = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.nxv16i1(<vscale x 16 x i1> %a) 1016 ret <vscale x 16 x i1> %res 1017} 1018 1019define <vscale x 8 x i1> @rev_nxv8i1(<vscale x 8 x i1> %a) { 1020; CHECK-LABEL: rev_nxv8i1: 1021; CHECK: // %bb.0: 1022; CHECK-NEXT: rev p0.h, p0.h 1023; CHECK-NEXT: ret 1024 %res = call <vscale x 8 x i1> @llvm.aarch64.sve.rev.nxv8i1(<vscale x 8 x i1> %a) 1025 ret <vscale x 8 x i1> %res 1026} 1027 1028define <vscale x 4 x i1> @rev_nxv4i1(<vscale x 4 x i1> %a) { 1029; CHECK-LABEL: rev_nxv4i1: 1030; CHECK: // %bb.0: 1031; CHECK-NEXT: rev p0.s, p0.s 1032; CHECK-NEXT: ret 1033 %res = call <vscale x 4 x i1> @llvm.aarch64.sve.rev.nxv4i1(<vscale x 4 x i1> %a) 1034 ret <vscale x 4 x i1> %res 1035} 1036 1037define <vscale x 2 x i1> @rev_nxv2i1(<vscale x 2 x i1> %a) { 1038; CHECK-LABEL: rev_nxv2i1: 1039; CHECK: // %bb.0: 1040; CHECK-NEXT: rev p0.d, p0.d 1041; CHECK-NEXT: ret 1042 %res = call <vscale x 2 x i1> @llvm.aarch64.sve.rev.nxv2i1(<vscale x 2 x i1> %a) 1043 ret <vscale x 2 x i1> %res 1044} 1045 1046define <vscale x 16 x i1> @rev_b16(<vscale x 16 x i1> %a) { 1047; CHECK-LABEL: rev_b16: 1048; CHECK: // %bb.0: 1049; CHECK-NEXT: rev p0.h, p0.h 1050; CHECK-NEXT: ret 1051 %res = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b16(<vscale x 16 x i1> %a) 1052 ret <vscale x 16 x i1> %res 1053} 1054 1055define <vscale x 16 x i1> @rev_b32(<vscale x 16 x i1> %a) { 1056; CHECK-LABEL: rev_b32: 1057; CHECK: // %bb.0: 1058; CHECK-NEXT: rev p0.s, p0.s 1059; CHECK-NEXT: ret 1060 %res = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b32(<vscale x 16 x i1> %a) 1061 ret <vscale x 16 x i1> %res 1062} 1063 1064define <vscale x 16 x i1> @rev_b64(<vscale x 16 x i1> %a) { 1065; CHECK-LABEL: rev_b64: 1066; CHECK: // %bb.0: 1067; CHECK-NEXT: rev p0.d, p0.d 1068; CHECK-NEXT: ret 1069 %res = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b64(<vscale x 16 x i1> %a) 1070 ret <vscale x 16 x i1> %res 1071} 1072 1073define <vscale x 16 x i8> @rev_i8(<vscale x 16 x i8> %a) { 1074; CHECK-LABEL: rev_i8: 1075; CHECK: // %bb.0: 1076; CHECK-NEXT: rev z0.b, z0.b 1077; CHECK-NEXT: ret 1078 %res = call <vscale x 16 x i8> @llvm.aarch64.sve.rev.nxv16i8(<vscale x 16 x i8> %a) 1079 ret <vscale x 16 x i8> %res 1080} 1081 1082define <vscale x 8 x i16> @rev_i16(<vscale x 8 x i16> %a) { 1083; CHECK-LABEL: rev_i16: 1084; CHECK: // %bb.0: 1085; CHECK-NEXT: rev z0.h, z0.h 1086; CHECK-NEXT: ret 1087 %res = call <vscale x 8 x i16> @llvm.aarch64.sve.rev.nxv8i16(<vscale x 8 x i16> %a) 1088 ret <vscale x 8 x i16> %res 1089} 1090 1091define <vscale x 4 x i32> @rev_i32(<vscale x 4 x i32> %a) { 1092; CHECK-LABEL: rev_i32: 1093; CHECK: // %bb.0: 1094; CHECK-NEXT: rev z0.s, z0.s 1095; CHECK-NEXT: ret 1096 %res = call <vscale x 4 x i32> @llvm.aarch64.sve.rev.nxv4i32(<vscale x 4 x i32> %a) 1097 ret <vscale x 4 x i32> %res 1098} 1099 1100define <vscale x 2 x i64> @rev_i64(<vscale x 2 x i64> %a) { 1101; CHECK-LABEL: rev_i64: 1102; CHECK: // %bb.0: 1103; CHECK-NEXT: rev z0.d, z0.d 1104; CHECK-NEXT: ret 1105 %res = call <vscale x 2 x i64> @llvm.aarch64.sve.rev.nxv2i64(<vscale x 2 x i64> %a) 1106 ret <vscale x 2 x i64> %res 1107} 1108 1109define <vscale x 8 x bfloat> @rev_bf16(<vscale x 8 x bfloat> %a) #0 { 1110; CHECK-LABEL: rev_bf16: 1111; CHECK: // %bb.0: 1112; CHECK-NEXT: rev z0.h, z0.h 1113; CHECK-NEXT: ret 1114 %res = call <vscale x 8 x bfloat> @llvm.aarch64.sve.rev.nxv8bf16(<vscale x 8 x bfloat> %a) 1115 ret <vscale x 8 x bfloat> %res 1116} 1117 1118define <vscale x 8 x half> @rev_f16(<vscale x 8 x half> %a) { 1119; CHECK-LABEL: rev_f16: 1120; CHECK: // %bb.0: 1121; CHECK-NEXT: rev z0.h, z0.h 1122; CHECK-NEXT: ret 1123 %res = call <vscale x 8 x half> @llvm.aarch64.sve.rev.nxv8f16(<vscale x 8 x half> %a) 1124 ret <vscale x 8 x half> %res 1125} 1126 1127define <vscale x 4 x float> @rev_f32(<vscale x 4 x float> %a) { 1128; CHECK-LABEL: rev_f32: 1129; CHECK: // %bb.0: 1130; CHECK-NEXT: rev z0.s, z0.s 1131; CHECK-NEXT: ret 1132 %res = call <vscale x 4 x float> @llvm.aarch64.sve.rev.nxv4f32(<vscale x 4 x float> %a) 1133 ret <vscale x 4 x float> %res 1134} 1135 1136define <vscale x 2 x double> @rev_f64(<vscale x 2 x double> %a) { 1137; CHECK-LABEL: rev_f64: 1138; CHECK: // %bb.0: 1139; CHECK-NEXT: rev z0.d, z0.d 1140; CHECK-NEXT: ret 1141 %res = call <vscale x 2 x double> @llvm.aarch64.sve.rev.nxv2f64(<vscale x 2 x double> %a) 1142 ret <vscale x 2 x double> %res 1143} 1144 1145; 1146; SPLICE 1147; 1148 1149define <vscale x 16 x i8> @splice_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 1150; SVE-LABEL: splice_i8: 1151; SVE: // %bb.0: 1152; SVE-NEXT: splice z0.b, p0, z0.b, z1.b 1153; SVE-NEXT: ret 1154; 1155; SVE2-LABEL: splice_i8: 1156; SVE2: // %bb.0: 1157; SVE2-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 1158; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 1159; SVE2-NEXT: splice z0.b, p0, { z0.b, z1.b } 1160; SVE2-NEXT: ret 1161 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.splice.nxv16i8(<vscale x 16 x i1> %pg, 1162 <vscale x 16 x i8> %a, 1163 <vscale x 16 x i8> %b) 1164 ret <vscale x 16 x i8> %out 1165} 1166 1167define <vscale x 8 x i16> @splice_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 1168; SVE-LABEL: splice_i16: 1169; SVE: // %bb.0: 1170; SVE-NEXT: splice z0.h, p0, z0.h, z1.h 1171; SVE-NEXT: ret 1172; 1173; SVE2-LABEL: splice_i16: 1174; SVE2: // %bb.0: 1175; SVE2-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 1176; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 1177; SVE2-NEXT: splice z0.h, p0, { z0.h, z1.h } 1178; SVE2-NEXT: ret 1179 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.splice.nxv8i16(<vscale x 8 x i1> %pg, 1180 <vscale x 8 x i16> %a, 1181 <vscale x 8 x i16> %b) 1182 ret <vscale x 8 x i16> %out 1183} 1184 1185define <vscale x 4 x i32> @splice_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 1186; SVE-LABEL: splice_i32: 1187; SVE: // %bb.0: 1188; SVE-NEXT: splice z0.s, p0, z0.s, z1.s 1189; SVE-NEXT: ret 1190; 1191; SVE2-LABEL: splice_i32: 1192; SVE2: // %bb.0: 1193; SVE2-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 1194; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 1195; SVE2-NEXT: splice z0.s, p0, { z0.s, z1.s } 1196; SVE2-NEXT: ret 1197 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.splice.nxv4i32(<vscale x 4 x i1> %pg, 1198 <vscale x 4 x i32> %a, 1199 <vscale x 4 x i32> %b) 1200 ret <vscale x 4 x i32> %out 1201} 1202 1203define <vscale x 2 x i64> @splice_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 1204; SVE-LABEL: splice_i64: 1205; SVE: // %bb.0: 1206; SVE-NEXT: splice z0.d, p0, z0.d, z1.d 1207; SVE-NEXT: ret 1208; 1209; SVE2-LABEL: splice_i64: 1210; SVE2: // %bb.0: 1211; SVE2-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 1212; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 1213; SVE2-NEXT: splice z0.d, p0, { z0.d, z1.d } 1214; SVE2-NEXT: ret 1215 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.splice.nxv2i64(<vscale x 2 x i1> %pg, 1216 <vscale x 2 x i64> %a, 1217 <vscale x 2 x i64> %b) 1218 ret <vscale x 2 x i64> %out 1219} 1220 1221define <vscale x 8 x bfloat> @splice_bf16(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 { 1222; SVE-LABEL: splice_bf16: 1223; SVE: // %bb.0: 1224; SVE-NEXT: splice z0.h, p0, z0.h, z1.h 1225; SVE-NEXT: ret 1226; 1227; SVE2-LABEL: splice_bf16: 1228; SVE2: // %bb.0: 1229; SVE2-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 1230; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 1231; SVE2-NEXT: splice z0.h, p0, { z0.h, z1.h } 1232; SVE2-NEXT: ret 1233 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.splice.nxv8bf16(<vscale x 8 x i1> %pg, 1234 <vscale x 8 x bfloat> %a, 1235 <vscale x 8 x bfloat> %b) 1236 ret <vscale x 8 x bfloat> %out 1237} 1238 1239define <vscale x 8 x half> @splice_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) { 1240; SVE-LABEL: splice_f16: 1241; SVE: // %bb.0: 1242; SVE-NEXT: splice z0.h, p0, z0.h, z1.h 1243; SVE-NEXT: ret 1244; 1245; SVE2-LABEL: splice_f16: 1246; SVE2: // %bb.0: 1247; SVE2-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 1248; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 1249; SVE2-NEXT: splice z0.h, p0, { z0.h, z1.h } 1250; SVE2-NEXT: ret 1251 %out = call <vscale x 8 x half> @llvm.aarch64.sve.splice.nxv8f16(<vscale x 8 x i1> %pg, 1252 <vscale x 8 x half> %a, 1253 <vscale x 8 x half> %b) 1254 ret <vscale x 8 x half> %out 1255} 1256 1257define <vscale x 4 x float> @splice_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) { 1258; SVE-LABEL: splice_f32: 1259; SVE: // %bb.0: 1260; SVE-NEXT: splice z0.s, p0, z0.s, z1.s 1261; SVE-NEXT: ret 1262; 1263; SVE2-LABEL: splice_f32: 1264; SVE2: // %bb.0: 1265; SVE2-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 1266; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 1267; SVE2-NEXT: splice z0.s, p0, { z0.s, z1.s } 1268; SVE2-NEXT: ret 1269 %out = call <vscale x 4 x float> @llvm.aarch64.sve.splice.nxv4f32(<vscale x 4 x i1> %pg, 1270 <vscale x 4 x float> %a, 1271 <vscale x 4 x float> %b) 1272 ret <vscale x 4 x float> %out 1273} 1274 1275define <vscale x 2 x double> @splice_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) { 1276; SVE-LABEL: splice_f64: 1277; SVE: // %bb.0: 1278; SVE-NEXT: splice z0.d, p0, z0.d, z1.d 1279; SVE-NEXT: ret 1280; 1281; SVE2-LABEL: splice_f64: 1282; SVE2: // %bb.0: 1283; SVE2-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 1284; SVE2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 1285; SVE2-NEXT: splice z0.d, p0, { z0.d, z1.d } 1286; SVE2-NEXT: ret 1287 %out = call <vscale x 2 x double> @llvm.aarch64.sve.splice.nxv2f64(<vscale x 2 x i1> %pg, 1288 <vscale x 2 x double> %a, 1289 <vscale x 2 x double> %b) 1290 ret <vscale x 2 x double> %out 1291} 1292 1293; 1294; SUNPKHI 1295; 1296 1297define <vscale x 8 x i16> @sunpkhi_i16(<vscale x 16 x i8> %a) { 1298; CHECK-LABEL: sunpkhi_i16: 1299; CHECK: // %bb.0: 1300; CHECK-NEXT: sunpkhi z0.h, z0.b 1301; CHECK-NEXT: ret 1302 %res = call <vscale x 8 x i16> @llvm.aarch64.sve.sunpkhi.nxv8i16(<vscale x 16 x i8> %a) 1303 ret <vscale x 8 x i16> %res 1304} 1305 1306define <vscale x 4 x i32> @sunpkhi_i32(<vscale x 8 x i16> %a) { 1307; CHECK-LABEL: sunpkhi_i32: 1308; CHECK: // %bb.0: 1309; CHECK-NEXT: sunpkhi z0.s, z0.h 1310; CHECK-NEXT: ret 1311 %res = call <vscale x 4 x i32> @llvm.aarch64.sve.sunpkhi.nxv4i32(<vscale x 8 x i16> %a) 1312 ret <vscale x 4 x i32> %res 1313} 1314 1315define <vscale x 2 x i64> @sunpkhi_i64(<vscale x 4 x i32> %a) { 1316; CHECK-LABEL: sunpkhi_i64: 1317; CHECK: // %bb.0: 1318; CHECK-NEXT: sunpkhi z0.d, z0.s 1319; CHECK-NEXT: ret 1320 %res = call <vscale x 2 x i64> @llvm.aarch64.sve.sunpkhi.nxv2i64(<vscale x 4 x i32> %a) 1321 ret <vscale x 2 x i64> %res 1322} 1323 1324; 1325; SUNPKLO 1326; 1327 1328define <vscale x 8 x i16> @sunpklo_i16(<vscale x 16 x i8> %a) { 1329; CHECK-LABEL: sunpklo_i16: 1330; CHECK: // %bb.0: 1331; CHECK-NEXT: sunpklo z0.h, z0.b 1332; CHECK-NEXT: ret 1333 %res = call <vscale x 8 x i16> @llvm.aarch64.sve.sunpklo.nxv8i16(<vscale x 16 x i8> %a) 1334 ret <vscale x 8 x i16> %res 1335} 1336 1337define <vscale x 4 x i32> @sunpklo_i32(<vscale x 8 x i16> %a) { 1338; CHECK-LABEL: sunpklo_i32: 1339; CHECK: // %bb.0: 1340; CHECK-NEXT: sunpklo z0.s, z0.h 1341; CHECK-NEXT: ret 1342 %res = call <vscale x 4 x i32> @llvm.aarch64.sve.sunpklo.nxv4i32(<vscale x 8 x i16> %a) 1343 ret <vscale x 4 x i32> %res 1344} 1345 1346define <vscale x 2 x i64> @sunpklo_i64(<vscale x 4 x i32> %a) { 1347; CHECK-LABEL: sunpklo_i64: 1348; CHECK: // %bb.0: 1349; CHECK-NEXT: sunpklo z0.d, z0.s 1350; CHECK-NEXT: ret 1351 %res = call <vscale x 2 x i64> @llvm.aarch64.sve.sunpklo.nxv2i64(<vscale x 4 x i32> %a) 1352 ret <vscale x 2 x i64> %res 1353} 1354 1355; 1356; TBL 1357; 1358 1359define <vscale x 16 x i8> @tbl_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 1360; CHECK-LABEL: tbl_i8: 1361; CHECK: // %bb.0: 1362; CHECK-NEXT: tbl z0.b, { z0.b }, z1.b 1363; CHECK-NEXT: ret 1364 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.tbl.nxv16i8(<vscale x 16 x i8> %a, 1365 <vscale x 16 x i8> %b) 1366 ret <vscale x 16 x i8> %out 1367} 1368 1369define <vscale x 8 x i16> @tbl_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 1370; CHECK-LABEL: tbl_i16: 1371; CHECK: // %bb.0: 1372; CHECK-NEXT: tbl z0.h, { z0.h }, z1.h 1373; CHECK-NEXT: ret 1374 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.tbl.nxv8i16(<vscale x 8 x i16> %a, 1375 <vscale x 8 x i16> %b) 1376 ret <vscale x 8 x i16> %out 1377} 1378 1379define <vscale x 4 x i32> @tbl_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 1380; CHECK-LABEL: tbl_i32: 1381; CHECK: // %bb.0: 1382; CHECK-NEXT: tbl z0.s, { z0.s }, z1.s 1383; CHECK-NEXT: ret 1384 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.tbl.nxv4i32(<vscale x 4 x i32> %a, 1385 <vscale x 4 x i32> %b) 1386 ret <vscale x 4 x i32> %out 1387} 1388 1389define <vscale x 2 x i64> @tbl_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 1390; CHECK-LABEL: tbl_i64: 1391; CHECK: // %bb.0: 1392; CHECK-NEXT: tbl z0.d, { z0.d }, z1.d 1393; CHECK-NEXT: ret 1394 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.tbl.nxv2i64(<vscale x 2 x i64> %a, 1395 <vscale x 2 x i64> %b) 1396 ret <vscale x 2 x i64> %out 1397} 1398 1399define <vscale x 8 x half> @tbl_f16(<vscale x 8 x half> %a, <vscale x 8 x i16> %b) { 1400; CHECK-LABEL: tbl_f16: 1401; CHECK: // %bb.0: 1402; CHECK-NEXT: tbl z0.h, { z0.h }, z1.h 1403; CHECK-NEXT: ret 1404 %out = call <vscale x 8 x half> @llvm.aarch64.sve.tbl.nxv8f16(<vscale x 8 x half> %a, 1405 <vscale x 8 x i16> %b) 1406 ret <vscale x 8 x half> %out 1407} 1408 1409define <vscale x 8 x bfloat> @tbl_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x i16> %b) #0 { 1410; CHECK-LABEL: tbl_bf16: 1411; CHECK: // %bb.0: 1412; CHECK-NEXT: tbl z0.h, { z0.h }, z1.h 1413; CHECK-NEXT: ret 1414 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.tbl.nxv8bf16(<vscale x 8 x bfloat> %a, 1415 <vscale x 8 x i16> %b) 1416 ret <vscale x 8 x bfloat> %out 1417} 1418 1419define <vscale x 4 x float> @tbl_f32(<vscale x 4 x float> %a, <vscale x 4 x i32> %b) { 1420; CHECK-LABEL: tbl_f32: 1421; CHECK: // %bb.0: 1422; CHECK-NEXT: tbl z0.s, { z0.s }, z1.s 1423; CHECK-NEXT: ret 1424 %out = call <vscale x 4 x float> @llvm.aarch64.sve.tbl.nxv4f32(<vscale x 4 x float> %a, 1425 <vscale x 4 x i32> %b) 1426 ret <vscale x 4 x float> %out 1427} 1428 1429define <vscale x 2 x double> @tbl_f64(<vscale x 2 x double> %a, <vscale x 2 x i64> %b) { 1430; CHECK-LABEL: tbl_f64: 1431; CHECK: // %bb.0: 1432; CHECK-NEXT: tbl z0.d, { z0.d }, z1.d 1433; CHECK-NEXT: ret 1434 %out = call <vscale x 2 x double> @llvm.aarch64.sve.tbl.nxv2f64(<vscale x 2 x double> %a, 1435 <vscale x 2 x i64> %b) 1436 ret <vscale x 2 x double> %out 1437} 1438 1439; 1440; UUNPKHI 1441; 1442 1443define <vscale x 8 x i16> @uunpkhi_i16(<vscale x 16 x i8> %a) { 1444; CHECK-LABEL: uunpkhi_i16: 1445; CHECK: // %bb.0: 1446; CHECK-NEXT: uunpkhi z0.h, z0.b 1447; CHECK-NEXT: ret 1448 %res = call <vscale x 8 x i16> @llvm.aarch64.sve.uunpkhi.nxv8i16(<vscale x 16 x i8> %a) 1449 ret <vscale x 8 x i16> %res 1450} 1451 1452define <vscale x 4 x i32> @uunpkhi_i32(<vscale x 8 x i16> %a) { 1453; CHECK-LABEL: uunpkhi_i32: 1454; CHECK: // %bb.0: 1455; CHECK-NEXT: uunpkhi z0.s, z0.h 1456; CHECK-NEXT: ret 1457 %res = call <vscale x 4 x i32> @llvm.aarch64.sve.uunpkhi.nxv4i32(<vscale x 8 x i16> %a) 1458 ret <vscale x 4 x i32> %res 1459} 1460 1461define <vscale x 2 x i64> @uunpkhi_i64(<vscale x 4 x i32> %a) { 1462; CHECK-LABEL: uunpkhi_i64: 1463; CHECK: // %bb.0: 1464; CHECK-NEXT: uunpkhi z0.d, z0.s 1465; CHECK-NEXT: ret 1466 %res = call <vscale x 2 x i64> @llvm.aarch64.sve.uunpkhi.nxv2i64(<vscale x 4 x i32> %a) 1467 ret <vscale x 2 x i64> %res 1468} 1469 1470; 1471; UUNPKLO 1472; 1473 1474define <vscale x 8 x i16> @uunpklo_i16(<vscale x 16 x i8> %a) { 1475; CHECK-LABEL: uunpklo_i16: 1476; CHECK: // %bb.0: 1477; CHECK-NEXT: uunpklo z0.h, z0.b 1478; CHECK-NEXT: ret 1479 %res = call <vscale x 8 x i16> @llvm.aarch64.sve.uunpklo.nxv8i16(<vscale x 16 x i8> %a) 1480 ret <vscale x 8 x i16> %res 1481} 1482 1483define <vscale x 4 x i32> @uunpklo_i32(<vscale x 8 x i16> %a) { 1484; CHECK-LABEL: uunpklo_i32: 1485; CHECK: // %bb.0: 1486; CHECK-NEXT: uunpklo z0.s, z0.h 1487; CHECK-NEXT: ret 1488 %res = call <vscale x 4 x i32> @llvm.aarch64.sve.uunpklo.nxv4i32(<vscale x 8 x i16> %a) 1489 ret <vscale x 4 x i32> %res 1490} 1491 1492define <vscale x 2 x i64> @uunpklo_i64(<vscale x 4 x i32> %a) { 1493; CHECK-LABEL: uunpklo_i64: 1494; CHECK: // %bb.0: 1495; CHECK-NEXT: uunpklo z0.d, z0.s 1496; CHECK-NEXT: ret 1497 %res = call <vscale x 2 x i64> @llvm.aarch64.sve.uunpklo.nxv2i64(<vscale x 4 x i32> %a) 1498 ret <vscale x 2 x i64> %res 1499} 1500 1501; 1502; TRN1 1503; 1504 1505define <vscale x 16 x i1> @trn1_nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) { 1506; CHECK-LABEL: trn1_nxv16i1: 1507; CHECK: // %bb.0: 1508; CHECK-NEXT: trn1 p0.b, p0.b, p1.b 1509; CHECK-NEXT: ret 1510 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.trn1.nxv16i1(<vscale x 16 x i1> %a, 1511 <vscale x 16 x i1> %b) 1512 ret <vscale x 16 x i1> %out 1513} 1514 1515define <vscale x 8 x i1> @trn1_nxv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) { 1516; CHECK-LABEL: trn1_nxv8i1: 1517; CHECK: // %bb.0: 1518; CHECK-NEXT: trn1 p0.h, p0.h, p1.h 1519; CHECK-NEXT: ret 1520 %out = call <vscale x 8 x i1> @llvm.aarch64.sve.trn1.nxv8i1(<vscale x 8 x i1> %a, 1521 <vscale x 8 x i1> %b) 1522 ret <vscale x 8 x i1> %out 1523} 1524 1525define <vscale x 4 x i1> @trn1_nxv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) { 1526; CHECK-LABEL: trn1_nxv4i1: 1527; CHECK: // %bb.0: 1528; CHECK-NEXT: trn1 p0.s, p0.s, p1.s 1529; CHECK-NEXT: ret 1530 %out = call <vscale x 4 x i1> @llvm.aarch64.sve.trn1.nxv4i1(<vscale x 4 x i1> %a, 1531 <vscale x 4 x i1> %b) 1532 ret <vscale x 4 x i1> %out 1533} 1534 1535define <vscale x 2 x i1> @trn1_nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) { 1536; CHECK-LABEL: trn1_nxv2i1: 1537; CHECK: // %bb.0: 1538; CHECK-NEXT: trn1 p0.d, p0.d, p1.d 1539; CHECK-NEXT: ret 1540 %out = call <vscale x 2 x i1> @llvm.aarch64.sve.trn1.nxv2i1(<vscale x 2 x i1> %a, 1541 <vscale x 2 x i1> %b) 1542 ret <vscale x 2 x i1> %out 1543} 1544 1545define <vscale x 16 x i1> @trn1_b16(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) { 1546; CHECK-LABEL: trn1_b16: 1547; CHECK: // %bb.0: 1548; CHECK-NEXT: trn1 p0.h, p0.h, p1.h 1549; CHECK-NEXT: ret 1550 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.trn1.b16(<vscale x 16 x i1> %a, 1551 <vscale x 16 x i1> %b) 1552 ret <vscale x 16 x i1> %out 1553} 1554 1555define <vscale x 16 x i1> @trn1_b32(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) { 1556; CHECK-LABEL: trn1_b32: 1557; CHECK: // %bb.0: 1558; CHECK-NEXT: trn1 p0.s, p0.s, p1.s 1559; CHECK-NEXT: ret 1560 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.trn1.b32(<vscale x 16 x i1> %a, 1561 <vscale x 16 x i1> %b) 1562 ret <vscale x 16 x i1> %out 1563} 1564 1565define <vscale x 16 x i1> @trn1_b64(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) { 1566; CHECK-LABEL: trn1_b64: 1567; CHECK: // %bb.0: 1568; CHECK-NEXT: trn1 p0.d, p0.d, p1.d 1569; CHECK-NEXT: ret 1570 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.trn1.b64(<vscale x 16 x i1> %a, 1571 <vscale x 16 x i1> %b) 1572 ret <vscale x 16 x i1> %out 1573} 1574 1575define <vscale x 16 x i8> @trn1_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 1576; CHECK-LABEL: trn1_i8: 1577; CHECK: // %bb.0: 1578; CHECK-NEXT: trn1 z0.b, z0.b, z1.b 1579; CHECK-NEXT: ret 1580 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.trn1.nxv16i8(<vscale x 16 x i8> %a, 1581 <vscale x 16 x i8> %b) 1582 ret <vscale x 16 x i8> %out 1583} 1584 1585define <vscale x 8 x i16> @trn1_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 1586; CHECK-LABEL: trn1_i16: 1587; CHECK: // %bb.0: 1588; CHECK-NEXT: trn1 z0.h, z0.h, z1.h 1589; CHECK-NEXT: ret 1590 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.trn1.nxv8i16(<vscale x 8 x i16> %a, 1591 <vscale x 8 x i16> %b) 1592 ret <vscale x 8 x i16> %out 1593} 1594 1595define <vscale x 4 x i32> @trn1_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 1596; CHECK-LABEL: trn1_i32: 1597; CHECK: // %bb.0: 1598; CHECK-NEXT: trn1 z0.s, z0.s, z1.s 1599; CHECK-NEXT: ret 1600 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.trn1.nxv4i32(<vscale x 4 x i32> %a, 1601 <vscale x 4 x i32> %b) 1602 ret <vscale x 4 x i32> %out 1603} 1604 1605define <vscale x 2 x i64> @trn1_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 1606; CHECK-LABEL: trn1_i64: 1607; CHECK: // %bb.0: 1608; CHECK-NEXT: trn1 z0.d, z0.d, z1.d 1609; CHECK-NEXT: ret 1610 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.trn1.nxv2i64(<vscale x 2 x i64> %a, 1611 <vscale x 2 x i64> %b) 1612 ret <vscale x 2 x i64> %out 1613} 1614 1615define <vscale x 2 x half> @trn1_f16_v2(<vscale x 2 x half> %a, <vscale x 2 x half> %b) { 1616; CHECK-LABEL: trn1_f16_v2: 1617; CHECK: // %bb.0: 1618; CHECK-NEXT: trn1 z0.d, z0.d, z1.d 1619; CHECK-NEXT: ret 1620 %out = call <vscale x 2 x half> @llvm.aarch64.sve.trn1.nxv2f16(<vscale x 2 x half> %a, 1621 <vscale x 2 x half> %b) 1622 ret <vscale x 2 x half> %out 1623} 1624 1625define <vscale x 4 x half> @trn1_f16_v4(<vscale x 4 x half> %a, <vscale x 4 x half> %b) { 1626; CHECK-LABEL: trn1_f16_v4: 1627; CHECK: // %bb.0: 1628; CHECK-NEXT: trn1 z0.s, z0.s, z1.s 1629; CHECK-NEXT: ret 1630 %out = call <vscale x 4 x half> @llvm.aarch64.sve.trn1.nxv4f16(<vscale x 4 x half> %a, 1631 <vscale x 4 x half> %b) 1632 ret <vscale x 4 x half> %out 1633} 1634 1635define <vscale x 8 x bfloat> @trn1_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 { 1636; CHECK-LABEL: trn1_bf16: 1637; CHECK: // %bb.0: 1638; CHECK-NEXT: trn1 z0.h, z0.h, z1.h 1639; CHECK-NEXT: ret 1640 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.trn1.nxv8bf16(<vscale x 8 x bfloat> %a, 1641 <vscale x 8 x bfloat> %b) 1642 ret <vscale x 8 x bfloat> %out 1643} 1644 1645define <vscale x 8 x half> @trn1_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) { 1646; CHECK-LABEL: trn1_f16: 1647; CHECK: // %bb.0: 1648; CHECK-NEXT: trn1 z0.h, z0.h, z1.h 1649; CHECK-NEXT: ret 1650 %out = call <vscale x 8 x half> @llvm.aarch64.sve.trn1.nxv8f16(<vscale x 8 x half> %a, 1651 <vscale x 8 x half> %b) 1652 ret <vscale x 8 x half> %out 1653} 1654 1655define <vscale x 2 x float> @trn1_f32_v2(<vscale x 2 x float> %a, <vscale x 2 x float> %b) { 1656; CHECK-LABEL: trn1_f32_v2: 1657; CHECK: // %bb.0: 1658; CHECK-NEXT: trn1 z0.d, z0.d, z1.d 1659; CHECK-NEXT: ret 1660 %out = call <vscale x 2 x float> @llvm.aarch64.sve.trn1.nxv2f32(<vscale x 2 x float> %a, 1661 <vscale x 2 x float> %b) 1662 ret <vscale x 2 x float> %out 1663} 1664 1665define <vscale x 4 x float> @trn1_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) { 1666; CHECK-LABEL: trn1_f32: 1667; CHECK: // %bb.0: 1668; CHECK-NEXT: trn1 z0.s, z0.s, z1.s 1669; CHECK-NEXT: ret 1670 %out = call <vscale x 4 x float> @llvm.aarch64.sve.trn1.nxv4f32(<vscale x 4 x float> %a, 1671 <vscale x 4 x float> %b) 1672 ret <vscale x 4 x float> %out 1673} 1674 1675define <vscale x 2 x double> @trn1_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) { 1676; CHECK-LABEL: trn1_f64: 1677; CHECK: // %bb.0: 1678; CHECK-NEXT: trn1 z0.d, z0.d, z1.d 1679; CHECK-NEXT: ret 1680 %out = call <vscale x 2 x double> @llvm.aarch64.sve.trn1.nxv2f64(<vscale x 2 x double> %a, 1681 <vscale x 2 x double> %b) 1682 ret <vscale x 2 x double> %out 1683} 1684 1685; 1686; TRN2 1687; 1688 1689define <vscale x 16 x i1> @trn2_nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) { 1690; CHECK-LABEL: trn2_nxv16i1: 1691; CHECK: // %bb.0: 1692; CHECK-NEXT: trn2 p0.b, p0.b, p1.b 1693; CHECK-NEXT: ret 1694 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.trn2.nxv16i1(<vscale x 16 x i1> %a, 1695 <vscale x 16 x i1> %b) 1696 ret <vscale x 16 x i1> %out 1697} 1698 1699define <vscale x 8 x i1> @trn2_nxv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) { 1700; CHECK-LABEL: trn2_nxv8i1: 1701; CHECK: // %bb.0: 1702; CHECK-NEXT: trn2 p0.h, p0.h, p1.h 1703; CHECK-NEXT: ret 1704 %out = call <vscale x 8 x i1> @llvm.aarch64.sve.trn2.nxv8i1(<vscale x 8 x i1> %a, 1705 <vscale x 8 x i1> %b) 1706 ret <vscale x 8 x i1> %out 1707} 1708 1709define <vscale x 4 x i1> @trn2_nxv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) { 1710; CHECK-LABEL: trn2_nxv4i1: 1711; CHECK: // %bb.0: 1712; CHECK-NEXT: trn2 p0.s, p0.s, p1.s 1713; CHECK-NEXT: ret 1714 %out = call <vscale x 4 x i1> @llvm.aarch64.sve.trn2.nxv4i1(<vscale x 4 x i1> %a, 1715 <vscale x 4 x i1> %b) 1716 ret <vscale x 4 x i1> %out 1717} 1718 1719define <vscale x 2 x i1> @trn2_nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) { 1720; CHECK-LABEL: trn2_nxv2i1: 1721; CHECK: // %bb.0: 1722; CHECK-NEXT: trn2 p0.d, p0.d, p1.d 1723; CHECK-NEXT: ret 1724 %out = call <vscale x 2 x i1> @llvm.aarch64.sve.trn2.nxv2i1(<vscale x 2 x i1> %a, 1725 <vscale x 2 x i1> %b) 1726 ret <vscale x 2 x i1> %out 1727} 1728 1729define <vscale x 16 x i1> @trn2_b16(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) { 1730; CHECK-LABEL: trn2_b16: 1731; CHECK: // %bb.0: 1732; CHECK-NEXT: trn2 p0.h, p0.h, p1.h 1733; CHECK-NEXT: ret 1734 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.trn2.b16(<vscale x 16 x i1> %a, 1735 <vscale x 16 x i1> %b) 1736 ret <vscale x 16 x i1> %out 1737} 1738 1739define <vscale x 16 x i1> @trn2_b32(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) { 1740; CHECK-LABEL: trn2_b32: 1741; CHECK: // %bb.0: 1742; CHECK-NEXT: trn2 p0.s, p0.s, p1.s 1743; CHECK-NEXT: ret 1744 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.trn2.b32(<vscale x 16 x i1> %a, 1745 <vscale x 16 x i1> %b) 1746 ret <vscale x 16 x i1> %out 1747} 1748 1749define <vscale x 16 x i1> @trn2_b64(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) { 1750; CHECK-LABEL: trn2_b64: 1751; CHECK: // %bb.0: 1752; CHECK-NEXT: trn2 p0.d, p0.d, p1.d 1753; CHECK-NEXT: ret 1754 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.trn2.b64(<vscale x 16 x i1> %a, 1755 <vscale x 16 x i1> %b) 1756 ret <vscale x 16 x i1> %out 1757} 1758 1759define <vscale x 16 x i8> @trn2_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 1760; CHECK-LABEL: trn2_i8: 1761; CHECK: // %bb.0: 1762; CHECK-NEXT: trn2 z0.b, z0.b, z1.b 1763; CHECK-NEXT: ret 1764 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.trn2.nxv16i8(<vscale x 16 x i8> %a, 1765 <vscale x 16 x i8> %b) 1766 ret <vscale x 16 x i8> %out 1767} 1768 1769define <vscale x 8 x i16> @trn2_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 1770; CHECK-LABEL: trn2_i16: 1771; CHECK: // %bb.0: 1772; CHECK-NEXT: trn2 z0.h, z0.h, z1.h 1773; CHECK-NEXT: ret 1774 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.trn2.nxv8i16(<vscale x 8 x i16> %a, 1775 <vscale x 8 x i16> %b) 1776 ret <vscale x 8 x i16> %out 1777} 1778 1779define <vscale x 4 x i32> @trn2_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 1780; CHECK-LABEL: trn2_i32: 1781; CHECK: // %bb.0: 1782; CHECK-NEXT: trn2 z0.s, z0.s, z1.s 1783; CHECK-NEXT: ret 1784 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.trn2.nxv4i32(<vscale x 4 x i32> %a, 1785 <vscale x 4 x i32> %b) 1786 ret <vscale x 4 x i32> %out 1787} 1788 1789define <vscale x 2 x i64> @trn2_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 1790; CHECK-LABEL: trn2_i64: 1791; CHECK: // %bb.0: 1792; CHECK-NEXT: trn2 z0.d, z0.d, z1.d 1793; CHECK-NEXT: ret 1794 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.trn2.nxv2i64(<vscale x 2 x i64> %a, 1795 <vscale x 2 x i64> %b) 1796 ret <vscale x 2 x i64> %out 1797} 1798 1799define <vscale x 2 x half> @trn2_f16_v2(<vscale x 2 x half> %a, <vscale x 2 x half> %b) { 1800; CHECK-LABEL: trn2_f16_v2: 1801; CHECK: // %bb.0: 1802; CHECK-NEXT: trn2 z0.d, z0.d, z1.d 1803; CHECK-NEXT: ret 1804 %out = call <vscale x 2 x half> @llvm.aarch64.sve.trn2.nxv2f16(<vscale x 2 x half> %a, 1805 <vscale x 2 x half> %b) 1806 ret <vscale x 2 x half> %out 1807} 1808 1809define <vscale x 4 x half> @trn2_f16_v4(<vscale x 4 x half> %a, <vscale x 4 x half> %b) { 1810; CHECK-LABEL: trn2_f16_v4: 1811; CHECK: // %bb.0: 1812; CHECK-NEXT: trn2 z0.s, z0.s, z1.s 1813; CHECK-NEXT: ret 1814 %out = call <vscale x 4 x half> @llvm.aarch64.sve.trn2.nxv4f16(<vscale x 4 x half> %a, 1815 <vscale x 4 x half> %b) 1816 ret <vscale x 4 x half> %out 1817} 1818 1819define <vscale x 8 x bfloat> @trn2_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 { 1820; CHECK-LABEL: trn2_bf16: 1821; CHECK: // %bb.0: 1822; CHECK-NEXT: trn2 z0.h, z0.h, z1.h 1823; CHECK-NEXT: ret 1824 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.trn2.nxv8bf16(<vscale x 8 x bfloat> %a, 1825 <vscale x 8 x bfloat> %b) 1826 ret <vscale x 8 x bfloat> %out 1827} 1828 1829define <vscale x 8 x half> @trn2_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) { 1830; CHECK-LABEL: trn2_f16: 1831; CHECK: // %bb.0: 1832; CHECK-NEXT: trn2 z0.h, z0.h, z1.h 1833; CHECK-NEXT: ret 1834 %out = call <vscale x 8 x half> @llvm.aarch64.sve.trn2.nxv8f16(<vscale x 8 x half> %a, 1835 <vscale x 8 x half> %b) 1836 ret <vscale x 8 x half> %out 1837} 1838 1839define <vscale x 2 x float> @trn2_f32_v2(<vscale x 2 x float> %a, <vscale x 2 x float> %b) { 1840; CHECK-LABEL: trn2_f32_v2: 1841; CHECK: // %bb.0: 1842; CHECK-NEXT: trn2 z0.d, z0.d, z1.d 1843; CHECK-NEXT: ret 1844 %out = call <vscale x 2 x float> @llvm.aarch64.sve.trn2.nxv2f32(<vscale x 2 x float> %a, 1845 <vscale x 2 x float> %b) 1846 ret <vscale x 2 x float> %out 1847} 1848 1849define <vscale x 4 x float> @trn2_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) { 1850; CHECK-LABEL: trn2_f32: 1851; CHECK: // %bb.0: 1852; CHECK-NEXT: trn2 z0.s, z0.s, z1.s 1853; CHECK-NEXT: ret 1854 %out = call <vscale x 4 x float> @llvm.aarch64.sve.trn2.nxv4f32(<vscale x 4 x float> %a, 1855 <vscale x 4 x float> %b) 1856 ret <vscale x 4 x float> %out 1857} 1858 1859define <vscale x 2 x double> @trn2_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) { 1860; CHECK-LABEL: trn2_f64: 1861; CHECK: // %bb.0: 1862; CHECK-NEXT: trn2 z0.d, z0.d, z1.d 1863; CHECK-NEXT: ret 1864 %out = call <vscale x 2 x double> @llvm.aarch64.sve.trn2.nxv2f64(<vscale x 2 x double> %a, 1865 <vscale x 2 x double> %b) 1866 ret <vscale x 2 x double> %out 1867} 1868 1869; 1870; UZP1 1871; 1872 1873define <vscale x 16 x i1> @uzp1_nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) { 1874; CHECK-LABEL: uzp1_nxv16i1: 1875; CHECK: // %bb.0: 1876; CHECK-NEXT: uzp1 p0.b, p0.b, p1.b 1877; CHECK-NEXT: ret 1878 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.uzp1.nxv16i1(<vscale x 16 x i1> %a, 1879 <vscale x 16 x i1> %b) 1880 ret <vscale x 16 x i1> %out 1881} 1882 1883define <vscale x 8 x i1> @uzp1_nxv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) { 1884; CHECK-LABEL: uzp1_nxv8i1: 1885; CHECK: // %bb.0: 1886; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h 1887; CHECK-NEXT: ret 1888 %out = call <vscale x 8 x i1> @llvm.aarch64.sve.uzp1.nxv8i1(<vscale x 8 x i1> %a, 1889 <vscale x 8 x i1> %b) 1890 ret <vscale x 8 x i1> %out 1891} 1892 1893define <vscale x 4 x i1> @uzp1_nxv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) { 1894; CHECK-LABEL: uzp1_nxv4i1: 1895; CHECK: // %bb.0: 1896; CHECK-NEXT: uzp1 p0.s, p0.s, p1.s 1897; CHECK-NEXT: ret 1898 %out = call <vscale x 4 x i1> @llvm.aarch64.sve.uzp1.nxv4i1(<vscale x 4 x i1> %a, 1899 <vscale x 4 x i1> %b) 1900 ret <vscale x 4 x i1> %out 1901} 1902 1903define <vscale x 2 x i1> @uzp1_nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) { 1904; CHECK-LABEL: uzp1_nxv2i1: 1905; CHECK: // %bb.0: 1906; CHECK-NEXT: uzp1 p0.d, p0.d, p1.d 1907; CHECK-NEXT: ret 1908 %out = call <vscale x 2 x i1> @llvm.aarch64.sve.uzp1.nxv2i1(<vscale x 2 x i1> %a, 1909 <vscale x 2 x i1> %b) 1910 ret <vscale x 2 x i1> %out 1911} 1912 1913define <vscale x 16 x i1> @uzp1_b16(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) { 1914; CHECK-LABEL: uzp1_b16: 1915; CHECK: // %bb.0: 1916; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h 1917; CHECK-NEXT: ret 1918 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.uzp1.b16(<vscale x 16 x i1> %a, 1919 <vscale x 16 x i1> %b) 1920 ret <vscale x 16 x i1> %out 1921} 1922 1923define <vscale x 16 x i1> @uzp1_b32(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) { 1924; CHECK-LABEL: uzp1_b32: 1925; CHECK: // %bb.0: 1926; CHECK-NEXT: uzp1 p0.s, p0.s, p1.s 1927; CHECK-NEXT: ret 1928 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.uzp1.b32(<vscale x 16 x i1> %a, 1929 <vscale x 16 x i1> %b) 1930 ret <vscale x 16 x i1> %out 1931} 1932 1933define <vscale x 16 x i1> @uzp1_b64(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) { 1934; CHECK-LABEL: uzp1_b64: 1935; CHECK: // %bb.0: 1936; CHECK-NEXT: uzp1 p0.d, p0.d, p1.d 1937; CHECK-NEXT: ret 1938 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.uzp1.b64(<vscale x 16 x i1> %a, 1939 <vscale x 16 x i1> %b) 1940 ret <vscale x 16 x i1> %out 1941} 1942 1943define <vscale x 16 x i8> @uzp1_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 1944; CHECK-LABEL: uzp1_i8: 1945; CHECK: // %bb.0: 1946; CHECK-NEXT: uzp1 z0.b, z0.b, z1.b 1947; CHECK-NEXT: ret 1948 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uzp1.nxv16i8(<vscale x 16 x i8> %a, 1949 <vscale x 16 x i8> %b) 1950 ret <vscale x 16 x i8> %out 1951} 1952 1953define <vscale x 8 x i16> @uzp1_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 1954; CHECK-LABEL: uzp1_i16: 1955; CHECK: // %bb.0: 1956; CHECK-NEXT: uzp1 z0.h, z0.h, z1.h 1957; CHECK-NEXT: ret 1958 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uzp1.nxv8i16(<vscale x 8 x i16> %a, 1959 <vscale x 8 x i16> %b) 1960 ret <vscale x 8 x i16> %out 1961} 1962 1963define <vscale x 4 x i32> @uzp1_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 1964; CHECK-LABEL: uzp1_i32: 1965; CHECK: // %bb.0: 1966; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s 1967; CHECK-NEXT: ret 1968 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uzp1.nxv4i32(<vscale x 4 x i32> %a, 1969 <vscale x 4 x i32> %b) 1970 ret <vscale x 4 x i32> %out 1971} 1972 1973define <vscale x 2 x i64> @uzp1_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 1974; CHECK-LABEL: uzp1_i64: 1975; CHECK: // %bb.0: 1976; CHECK-NEXT: uzp1 z0.d, z0.d, z1.d 1977; CHECK-NEXT: ret 1978 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uzp1.nxv2i64(<vscale x 2 x i64> %a, 1979 <vscale x 2 x i64> %b) 1980 ret <vscale x 2 x i64> %out 1981} 1982 1983define <vscale x 2 x half> @uzp1_f16_v2(<vscale x 2 x half> %a, <vscale x 2 x half> %b) { 1984; CHECK-LABEL: uzp1_f16_v2: 1985; CHECK: // %bb.0: 1986; CHECK-NEXT: uzp1 z0.d, z0.d, z1.d 1987; CHECK-NEXT: ret 1988 %out = call <vscale x 2 x half> @llvm.aarch64.sve.uzp1.nxv2f16(<vscale x 2 x half> %a, 1989 <vscale x 2 x half> %b) 1990 ret <vscale x 2 x half> %out 1991} 1992 1993define <vscale x 4 x half> @uzp1_f16_v4(<vscale x 4 x half> %a, <vscale x 4 x half> %b) { 1994; CHECK-LABEL: uzp1_f16_v4: 1995; CHECK: // %bb.0: 1996; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s 1997; CHECK-NEXT: ret 1998 %out = call <vscale x 4 x half> @llvm.aarch64.sve.uzp1.nxv4f16(<vscale x 4 x half> %a, 1999 <vscale x 4 x half> %b) 2000 ret <vscale x 4 x half> %out 2001} 2002 2003define <vscale x 8 x bfloat> @uzp1_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 { 2004; CHECK-LABEL: uzp1_bf16: 2005; CHECK: // %bb.0: 2006; CHECK-NEXT: uzp1 z0.h, z0.h, z1.h 2007; CHECK-NEXT: ret 2008 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.uzp1.nxv8bf16(<vscale x 8 x bfloat> %a, 2009 <vscale x 8 x bfloat> %b) 2010 ret <vscale x 8 x bfloat> %out 2011} 2012 2013define <vscale x 8 x half> @uzp1_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) { 2014; CHECK-LABEL: uzp1_f16: 2015; CHECK: // %bb.0: 2016; CHECK-NEXT: uzp1 z0.h, z0.h, z1.h 2017; CHECK-NEXT: ret 2018 %out = call <vscale x 8 x half> @llvm.aarch64.sve.uzp1.nxv8f16(<vscale x 8 x half> %a, 2019 <vscale x 8 x half> %b) 2020 ret <vscale x 8 x half> %out 2021} 2022 2023define <vscale x 2 x float> @uzp1_f32_v2(<vscale x 2 x float> %a, <vscale x 2 x float> %b) { 2024; CHECK-LABEL: uzp1_f32_v2: 2025; CHECK: // %bb.0: 2026; CHECK-NEXT: uzp1 z0.d, z0.d, z1.d 2027; CHECK-NEXT: ret 2028 %out = call <vscale x 2 x float> @llvm.aarch64.sve.uzp1.nxv2f32(<vscale x 2 x float> %a, 2029 <vscale x 2 x float> %b) 2030 ret <vscale x 2 x float> %out 2031} 2032 2033define <vscale x 4 x float> @uzp1_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) { 2034; CHECK-LABEL: uzp1_f32: 2035; CHECK: // %bb.0: 2036; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s 2037; CHECK-NEXT: ret 2038 %out = call <vscale x 4 x float> @llvm.aarch64.sve.uzp1.nxv4f32(<vscale x 4 x float> %a, 2039 <vscale x 4 x float> %b) 2040 ret <vscale x 4 x float> %out 2041} 2042 2043define <vscale x 2 x double> @uzp1_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) { 2044; CHECK-LABEL: uzp1_f64: 2045; CHECK: // %bb.0: 2046; CHECK-NEXT: uzp1 z0.d, z0.d, z1.d 2047; CHECK-NEXT: ret 2048 %out = call <vscale x 2 x double> @llvm.aarch64.sve.uzp1.nxv2f64(<vscale x 2 x double> %a, 2049 <vscale x 2 x double> %b) 2050 ret <vscale x 2 x double> %out 2051} 2052 2053; 2054; UZP2 2055; 2056 2057define <vscale x 16 x i1> @uzp2_nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) { 2058; CHECK-LABEL: uzp2_nxv16i1: 2059; CHECK: // %bb.0: 2060; CHECK-NEXT: uzp2 p0.b, p0.b, p1.b 2061; CHECK-NEXT: ret 2062 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.uzp2.nxv16i1(<vscale x 16 x i1> %a, 2063 <vscale x 16 x i1> %b) 2064 ret <vscale x 16 x i1> %out 2065} 2066 2067define <vscale x 8 x i1> @uzp2_nxv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) { 2068; CHECK-LABEL: uzp2_nxv8i1: 2069; CHECK: // %bb.0: 2070; CHECK-NEXT: uzp2 p0.h, p0.h, p1.h 2071; CHECK-NEXT: ret 2072 %out = call <vscale x 8 x i1> @llvm.aarch64.sve.uzp2.nxv8i1(<vscale x 8 x i1> %a, 2073 <vscale x 8 x i1> %b) 2074 ret <vscale x 8 x i1> %out 2075} 2076 2077define <vscale x 4 x i1> @uzp2_nxv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) { 2078; CHECK-LABEL: uzp2_nxv4i1: 2079; CHECK: // %bb.0: 2080; CHECK-NEXT: uzp2 p0.s, p0.s, p1.s 2081; CHECK-NEXT: ret 2082 %out = call <vscale x 4 x i1> @llvm.aarch64.sve.uzp2.nxv4i1(<vscale x 4 x i1> %a, 2083 <vscale x 4 x i1> %b) 2084 ret <vscale x 4 x i1> %out 2085} 2086 2087define <vscale x 2 x i1> @uzp2_nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) { 2088; CHECK-LABEL: uzp2_nxv2i1: 2089; CHECK: // %bb.0: 2090; CHECK-NEXT: uzp2 p0.d, p0.d, p1.d 2091; CHECK-NEXT: ret 2092 %out = call <vscale x 2 x i1> @llvm.aarch64.sve.uzp2.nxv2i1(<vscale x 2 x i1> %a, 2093 <vscale x 2 x i1> %b) 2094 ret <vscale x 2 x i1> %out 2095} 2096 2097define <vscale x 16 x i1> @uzp2_b16(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) { 2098; CHECK-LABEL: uzp2_b16: 2099; CHECK: // %bb.0: 2100; CHECK-NEXT: uzp2 p0.h, p0.h, p1.h 2101; CHECK-NEXT: ret 2102 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.uzp2.b16(<vscale x 16 x i1> %a, 2103 <vscale x 16 x i1> %b) 2104 ret <vscale x 16 x i1> %out 2105} 2106 2107define <vscale x 16 x i1> @uzp2_b32(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) { 2108; CHECK-LABEL: uzp2_b32: 2109; CHECK: // %bb.0: 2110; CHECK-NEXT: uzp2 p0.s, p0.s, p1.s 2111; CHECK-NEXT: ret 2112 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.uzp2.b32(<vscale x 16 x i1> %a, 2113 <vscale x 16 x i1> %b) 2114 ret <vscale x 16 x i1> %out 2115} 2116 2117define <vscale x 16 x i1> @uzp2_b64(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) { 2118; CHECK-LABEL: uzp2_b64: 2119; CHECK: // %bb.0: 2120; CHECK-NEXT: uzp2 p0.d, p0.d, p1.d 2121; CHECK-NEXT: ret 2122 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.uzp2.b64(<vscale x 16 x i1> %a, 2123 <vscale x 16 x i1> %b) 2124 ret <vscale x 16 x i1> %out 2125} 2126 2127define <vscale x 16 x i8> @uzp2_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 2128; CHECK-LABEL: uzp2_i8: 2129; CHECK: // %bb.0: 2130; CHECK-NEXT: uzp2 z0.b, z0.b, z1.b 2131; CHECK-NEXT: ret 2132 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uzp2.nxv16i8(<vscale x 16 x i8> %a, 2133 <vscale x 16 x i8> %b) 2134 ret <vscale x 16 x i8> %out 2135} 2136 2137define <vscale x 8 x i16> @uzp2_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 2138; CHECK-LABEL: uzp2_i16: 2139; CHECK: // %bb.0: 2140; CHECK-NEXT: uzp2 z0.h, z0.h, z1.h 2141; CHECK-NEXT: ret 2142 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uzp2.nxv8i16(<vscale x 8 x i16> %a, 2143 <vscale x 8 x i16> %b) 2144 ret <vscale x 8 x i16> %out 2145} 2146 2147define <vscale x 4 x i32> @uzp2_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 2148; CHECK-LABEL: uzp2_i32: 2149; CHECK: // %bb.0: 2150; CHECK-NEXT: uzp2 z0.s, z0.s, z1.s 2151; CHECK-NEXT: ret 2152 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uzp2.nxv4i32(<vscale x 4 x i32> %a, 2153 <vscale x 4 x i32> %b) 2154 ret <vscale x 4 x i32> %out 2155} 2156 2157define <vscale x 2 x i64> @uzp2_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 2158; CHECK-LABEL: uzp2_i64: 2159; CHECK: // %bb.0: 2160; CHECK-NEXT: uzp2 z0.d, z0.d, z1.d 2161; CHECK-NEXT: ret 2162 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uzp2.nxv2i64(<vscale x 2 x i64> %a, 2163 <vscale x 2 x i64> %b) 2164 ret <vscale x 2 x i64> %out 2165} 2166 2167define <vscale x 2 x half> @uzp2_f16_v2(<vscale x 2 x half> %a, <vscale x 2 x half> %b) { 2168; CHECK-LABEL: uzp2_f16_v2: 2169; CHECK: // %bb.0: 2170; CHECK-NEXT: uzp2 z0.d, z0.d, z1.d 2171; CHECK-NEXT: ret 2172 %out = call <vscale x 2 x half> @llvm.aarch64.sve.uzp2.nxv2f16(<vscale x 2 x half> %a, 2173 <vscale x 2 x half> %b) 2174 ret <vscale x 2 x half> %out 2175} 2176 2177define <vscale x 4 x half> @uzp2_f16_v4(<vscale x 4 x half> %a, <vscale x 4 x half> %b) { 2178; CHECK-LABEL: uzp2_f16_v4: 2179; CHECK: // %bb.0: 2180; CHECK-NEXT: uzp2 z0.s, z0.s, z1.s 2181; CHECK-NEXT: ret 2182 %out = call <vscale x 4 x half> @llvm.aarch64.sve.uzp2.nxv4f16(<vscale x 4 x half> %a, 2183 <vscale x 4 x half> %b) 2184 ret <vscale x 4 x half> %out 2185} 2186 2187define <vscale x 8 x bfloat> @uzp2_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 { 2188; CHECK-LABEL: uzp2_bf16: 2189; CHECK: // %bb.0: 2190; CHECK-NEXT: uzp2 z0.h, z0.h, z1.h 2191; CHECK-NEXT: ret 2192 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.uzp2.nxv8bf16(<vscale x 8 x bfloat> %a, 2193 <vscale x 8 x bfloat> %b) 2194 ret <vscale x 8 x bfloat> %out 2195} 2196 2197define <vscale x 8 x half> @uzp2_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) { 2198; CHECK-LABEL: uzp2_f16: 2199; CHECK: // %bb.0: 2200; CHECK-NEXT: uzp2 z0.h, z0.h, z1.h 2201; CHECK-NEXT: ret 2202 %out = call <vscale x 8 x half> @llvm.aarch64.sve.uzp2.nxv8f16(<vscale x 8 x half> %a, 2203 <vscale x 8 x half> %b) 2204 ret <vscale x 8 x half> %out 2205} 2206 2207define <vscale x 2 x float> @uzp2_f32_v2(<vscale x 2 x float> %a, <vscale x 2 x float> %b) { 2208; CHECK-LABEL: uzp2_f32_v2: 2209; CHECK: // %bb.0: 2210; CHECK-NEXT: uzp2 z0.d, z0.d, z1.d 2211; CHECK-NEXT: ret 2212 %out = call <vscale x 2 x float> @llvm.aarch64.sve.uzp2.nxv2f32(<vscale x 2 x float> %a, 2213 <vscale x 2 x float> %b) 2214 ret <vscale x 2 x float> %out 2215} 2216 2217define <vscale x 4 x float> @uzp2_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) { 2218; CHECK-LABEL: uzp2_f32: 2219; CHECK: // %bb.0: 2220; CHECK-NEXT: uzp2 z0.s, z0.s, z1.s 2221; CHECK-NEXT: ret 2222 %out = call <vscale x 4 x float> @llvm.aarch64.sve.uzp2.nxv4f32(<vscale x 4 x float> %a, 2223 <vscale x 4 x float> %b) 2224 ret <vscale x 4 x float> %out 2225} 2226 2227define <vscale x 2 x double> @uzp2_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) { 2228; CHECK-LABEL: uzp2_f64: 2229; CHECK: // %bb.0: 2230; CHECK-NEXT: uzp2 z0.d, z0.d, z1.d 2231; CHECK-NEXT: ret 2232 %out = call <vscale x 2 x double> @llvm.aarch64.sve.uzp2.nxv2f64(<vscale x 2 x double> %a, 2233 <vscale x 2 x double> %b) 2234 ret <vscale x 2 x double> %out 2235} 2236 2237; 2238; ZIP1 2239; 2240 2241define <vscale x 16 x i1> @zip1_nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) { 2242; CHECK-LABEL: zip1_nxv16i1: 2243; CHECK: // %bb.0: 2244; CHECK-NEXT: zip1 p0.b, p0.b, p1.b 2245; CHECK-NEXT: ret 2246 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.zip1.nxv16i1(<vscale x 16 x i1> %a, 2247 <vscale x 16 x i1> %b) 2248 ret <vscale x 16 x i1> %out 2249} 2250 2251define <vscale x 8 x i1> @zip1_nxv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) { 2252; CHECK-LABEL: zip1_nxv8i1: 2253; CHECK: // %bb.0: 2254; CHECK-NEXT: zip1 p0.h, p0.h, p1.h 2255; CHECK-NEXT: ret 2256 %out = call <vscale x 8 x i1> @llvm.aarch64.sve.zip1.nxv8i1(<vscale x 8 x i1> %a, 2257 <vscale x 8 x i1> %b) 2258 ret <vscale x 8 x i1> %out 2259} 2260 2261define <vscale x 4 x i1> @zip1_nxv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) { 2262; CHECK-LABEL: zip1_nxv4i1: 2263; CHECK: // %bb.0: 2264; CHECK-NEXT: zip1 p0.s, p0.s, p1.s 2265; CHECK-NEXT: ret 2266 %out = call <vscale x 4 x i1> @llvm.aarch64.sve.zip1.nxv4i1(<vscale x 4 x i1> %a, 2267 <vscale x 4 x i1> %b) 2268 ret <vscale x 4 x i1> %out 2269} 2270 2271define <vscale x 2 x i1> @zip1_nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) { 2272; CHECK-LABEL: zip1_nxv2i1: 2273; CHECK: // %bb.0: 2274; CHECK-NEXT: zip1 p0.d, p0.d, p1.d 2275; CHECK-NEXT: ret 2276 %out = call <vscale x 2 x i1> @llvm.aarch64.sve.zip1.nxv2i1(<vscale x 2 x i1> %a, 2277 <vscale x 2 x i1> %b) 2278 ret <vscale x 2 x i1> %out 2279} 2280 2281define <vscale x 16 x i1> @zip1_b16(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) { 2282; CHECK-LABEL: zip1_b16: 2283; CHECK: // %bb.0: 2284; CHECK-NEXT: zip1 p0.h, p0.h, p1.h 2285; CHECK-NEXT: ret 2286 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.zip1.b16(<vscale x 16 x i1> %a, 2287 <vscale x 16 x i1> %b) 2288 ret <vscale x 16 x i1> %out 2289} 2290 2291define <vscale x 16 x i1> @zip1_b32(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) { 2292; CHECK-LABEL: zip1_b32: 2293; CHECK: // %bb.0: 2294; CHECK-NEXT: zip1 p0.s, p0.s, p1.s 2295; CHECK-NEXT: ret 2296 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.zip1.b32(<vscale x 16 x i1> %a, 2297 <vscale x 16 x i1> %b) 2298 ret <vscale x 16 x i1> %out 2299} 2300 2301define <vscale x 16 x i1> @zip1_b64(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) { 2302; CHECK-LABEL: zip1_b64: 2303; CHECK: // %bb.0: 2304; CHECK-NEXT: zip1 p0.d, p0.d, p1.d 2305; CHECK-NEXT: ret 2306 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.zip1.b64(<vscale x 16 x i1> %a, 2307 <vscale x 16 x i1> %b) 2308 ret <vscale x 16 x i1> %out 2309} 2310 2311define <vscale x 16 x i8> @zip1_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 2312; CHECK-LABEL: zip1_i8: 2313; CHECK: // %bb.0: 2314; CHECK-NEXT: zip1 z0.b, z0.b, z1.b 2315; CHECK-NEXT: ret 2316 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.zip1.nxv16i8(<vscale x 16 x i8> %a, 2317 <vscale x 16 x i8> %b) 2318 ret <vscale x 16 x i8> %out 2319} 2320 2321define <vscale x 8 x i16> @zip1_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 2322; CHECK-LABEL: zip1_i16: 2323; CHECK: // %bb.0: 2324; CHECK-NEXT: zip1 z0.h, z0.h, z1.h 2325; CHECK-NEXT: ret 2326 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.zip1.nxv8i16(<vscale x 8 x i16> %a, 2327 <vscale x 8 x i16> %b) 2328 ret <vscale x 8 x i16> %out 2329} 2330 2331define <vscale x 4 x i32> @zip1_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 2332; CHECK-LABEL: zip1_i32: 2333; CHECK: // %bb.0: 2334; CHECK-NEXT: zip1 z0.s, z0.s, z1.s 2335; CHECK-NEXT: ret 2336 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.zip1.nxv4i32(<vscale x 4 x i32> %a, 2337 <vscale x 4 x i32> %b) 2338 ret <vscale x 4 x i32> %out 2339} 2340 2341define <vscale x 2 x i64> @zip1_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 2342; CHECK-LABEL: zip1_i64: 2343; CHECK: // %bb.0: 2344; CHECK-NEXT: zip1 z0.d, z0.d, z1.d 2345; CHECK-NEXT: ret 2346 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.zip1.nxv2i64(<vscale x 2 x i64> %a, 2347 <vscale x 2 x i64> %b) 2348 ret <vscale x 2 x i64> %out 2349} 2350 2351define <vscale x 2 x half> @zip1_f16_v2(<vscale x 2 x half> %a, <vscale x 2 x half> %b) { 2352; CHECK-LABEL: zip1_f16_v2: 2353; CHECK: // %bb.0: 2354; CHECK-NEXT: zip1 z0.d, z0.d, z1.d 2355; CHECK-NEXT: ret 2356 %out = call <vscale x 2 x half> @llvm.aarch64.sve.zip1.nxv2f16(<vscale x 2 x half> %a, 2357 <vscale x 2 x half> %b) 2358 ret <vscale x 2 x half> %out 2359} 2360 2361define <vscale x 4 x half> @zip1_f16_v4(<vscale x 4 x half> %a, <vscale x 4 x half> %b) { 2362; CHECK-LABEL: zip1_f16_v4: 2363; CHECK: // %bb.0: 2364; CHECK-NEXT: zip1 z0.s, z0.s, z1.s 2365; CHECK-NEXT: ret 2366 %out = call <vscale x 4 x half> @llvm.aarch64.sve.zip1.nxv4f16(<vscale x 4 x half> %a, 2367 <vscale x 4 x half> %b) 2368 ret <vscale x 4 x half> %out 2369} 2370 2371define <vscale x 8 x bfloat> @zip1_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 { 2372; CHECK-LABEL: zip1_bf16: 2373; CHECK: // %bb.0: 2374; CHECK-NEXT: zip1 z0.h, z0.h, z1.h 2375; CHECK-NEXT: ret 2376 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.zip1.nxv8bf16(<vscale x 8 x bfloat> %a, 2377 <vscale x 8 x bfloat> %b) 2378 ret <vscale x 8 x bfloat> %out 2379} 2380 2381define <vscale x 8 x half> @zip1_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) { 2382; CHECK-LABEL: zip1_f16: 2383; CHECK: // %bb.0: 2384; CHECK-NEXT: zip1 z0.h, z0.h, z1.h 2385; CHECK-NEXT: ret 2386 %out = call <vscale x 8 x half> @llvm.aarch64.sve.zip1.nxv8f16(<vscale x 8 x half> %a, 2387 <vscale x 8 x half> %b) 2388 ret <vscale x 8 x half> %out 2389} 2390 2391define <vscale x 2 x float> @zip1_f32_v2(<vscale x 2 x float> %a, <vscale x 2 x float> %b) { 2392; CHECK-LABEL: zip1_f32_v2: 2393; CHECK: // %bb.0: 2394; CHECK-NEXT: zip1 z0.d, z0.d, z1.d 2395; CHECK-NEXT: ret 2396 %out = call <vscale x 2 x float> @llvm.aarch64.sve.zip1.nxv2f32(<vscale x 2 x float> %a, 2397 <vscale x 2 x float> %b) 2398 ret <vscale x 2 x float> %out 2399} 2400 2401define <vscale x 4 x float> @zip1_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) { 2402; CHECK-LABEL: zip1_f32: 2403; CHECK: // %bb.0: 2404; CHECK-NEXT: zip1 z0.s, z0.s, z1.s 2405; CHECK-NEXT: ret 2406 %out = call <vscale x 4 x float> @llvm.aarch64.sve.zip1.nxv4f32(<vscale x 4 x float> %a, 2407 <vscale x 4 x float> %b) 2408 ret <vscale x 4 x float> %out 2409} 2410 2411define <vscale x 2 x double> @zip1_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) { 2412; CHECK-LABEL: zip1_f64: 2413; CHECK: // %bb.0: 2414; CHECK-NEXT: zip1 z0.d, z0.d, z1.d 2415; CHECK-NEXT: ret 2416 %out = call <vscale x 2 x double> @llvm.aarch64.sve.zip1.nxv2f64(<vscale x 2 x double> %a, 2417 <vscale x 2 x double> %b) 2418 ret <vscale x 2 x double> %out 2419} 2420 2421; 2422; ZIP2 2423; 2424 2425define <vscale x 16 x i1> @zip2_nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) { 2426; CHECK-LABEL: zip2_nxv16i1: 2427; CHECK: // %bb.0: 2428; CHECK-NEXT: zip2 p0.b, p0.b, p1.b 2429; CHECK-NEXT: ret 2430 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.zip2.nxv16i1(<vscale x 16 x i1> %a, 2431 <vscale x 16 x i1> %b) 2432 ret <vscale x 16 x i1> %out 2433} 2434 2435define <vscale x 8 x i1> @zip2_nxv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) { 2436; CHECK-LABEL: zip2_nxv8i1: 2437; CHECK: // %bb.0: 2438; CHECK-NEXT: zip2 p0.h, p0.h, p1.h 2439; CHECK-NEXT: ret 2440 %out = call <vscale x 8 x i1> @llvm.aarch64.sve.zip2.nxv8i1(<vscale x 8 x i1> %a, 2441 <vscale x 8 x i1> %b) 2442 ret <vscale x 8 x i1> %out 2443} 2444 2445define <vscale x 4 x i1> @zip2_nxv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) { 2446; CHECK-LABEL: zip2_nxv4i1: 2447; CHECK: // %bb.0: 2448; CHECK-NEXT: zip2 p0.s, p0.s, p1.s 2449; CHECK-NEXT: ret 2450 %out = call <vscale x 4 x i1> @llvm.aarch64.sve.zip2.nxv4i1(<vscale x 4 x i1> %a, 2451 <vscale x 4 x i1> %b) 2452 ret <vscale x 4 x i1> %out 2453} 2454 2455define <vscale x 2 x i1> @zip2_nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) { 2456; CHECK-LABEL: zip2_nxv2i1: 2457; CHECK: // %bb.0: 2458; CHECK-NEXT: zip2 p0.d, p0.d, p1.d 2459; CHECK-NEXT: ret 2460 %out = call <vscale x 2 x i1> @llvm.aarch64.sve.zip2.nxv2i1(<vscale x 2 x i1> %a, 2461 <vscale x 2 x i1> %b) 2462 ret <vscale x 2 x i1> %out 2463} 2464 2465define <vscale x 16 x i1> @zip2_b16(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) { 2466; CHECK-LABEL: zip2_b16: 2467; CHECK: // %bb.0: 2468; CHECK-NEXT: zip2 p0.h, p0.h, p1.h 2469; CHECK-NEXT: ret 2470 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.zip2.b16(<vscale x 16 x i1> %a, 2471 <vscale x 16 x i1> %b) 2472 ret <vscale x 16 x i1> %out 2473} 2474 2475define <vscale x 16 x i1> @zip2_b32(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) { 2476; CHECK-LABEL: zip2_b32: 2477; CHECK: // %bb.0: 2478; CHECK-NEXT: zip2 p0.s, p0.s, p1.s 2479; CHECK-NEXT: ret 2480 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.zip2.b32(<vscale x 16 x i1> %a, 2481 <vscale x 16 x i1> %b) 2482 ret <vscale x 16 x i1> %out 2483} 2484 2485define <vscale x 16 x i1> @zip2_b64(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) { 2486; CHECK-LABEL: zip2_b64: 2487; CHECK: // %bb.0: 2488; CHECK-NEXT: zip2 p0.d, p0.d, p1.d 2489; CHECK-NEXT: ret 2490 %out = call <vscale x 16 x i1> @llvm.aarch64.sve.zip2.b64(<vscale x 16 x i1> %a, 2491 <vscale x 16 x i1> %b) 2492 ret <vscale x 16 x i1> %out 2493} 2494 2495define <vscale x 16 x i8> @zip2_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 2496; CHECK-LABEL: zip2_i8: 2497; CHECK: // %bb.0: 2498; CHECK-NEXT: zip2 z0.b, z0.b, z1.b 2499; CHECK-NEXT: ret 2500 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.zip2.nxv16i8(<vscale x 16 x i8> %a, 2501 <vscale x 16 x i8> %b) 2502 ret <vscale x 16 x i8> %out 2503} 2504 2505define <vscale x 8 x i16> @zip2_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 2506; CHECK-LABEL: zip2_i16: 2507; CHECK: // %bb.0: 2508; CHECK-NEXT: zip2 z0.h, z0.h, z1.h 2509; CHECK-NEXT: ret 2510 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.zip2.nxv8i16(<vscale x 8 x i16> %a, 2511 <vscale x 8 x i16> %b) 2512 ret <vscale x 8 x i16> %out 2513} 2514 2515define <vscale x 4 x i32> @zip2_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 2516; CHECK-LABEL: zip2_i32: 2517; CHECK: // %bb.0: 2518; CHECK-NEXT: zip2 z0.s, z0.s, z1.s 2519; CHECK-NEXT: ret 2520 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.zip2.nxv4i32(<vscale x 4 x i32> %a, 2521 <vscale x 4 x i32> %b) 2522 ret <vscale x 4 x i32> %out 2523} 2524 2525define <vscale x 2 x i64> @zip2_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 2526; CHECK-LABEL: zip2_i64: 2527; CHECK: // %bb.0: 2528; CHECK-NEXT: zip2 z0.d, z0.d, z1.d 2529; CHECK-NEXT: ret 2530 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.zip2.nxv2i64(<vscale x 2 x i64> %a, 2531 <vscale x 2 x i64> %b) 2532 ret <vscale x 2 x i64> %out 2533} 2534 2535define <vscale x 2 x half> @zip2_f16_v2(<vscale x 2 x half> %a, <vscale x 2 x half> %b) { 2536; CHECK-LABEL: zip2_f16_v2: 2537; CHECK: // %bb.0: 2538; CHECK-NEXT: zip2 z0.d, z0.d, z1.d 2539; CHECK-NEXT: ret 2540 %out = call <vscale x 2 x half> @llvm.aarch64.sve.zip2.nxv2f16(<vscale x 2 x half> %a, 2541 <vscale x 2 x half> %b) 2542 ret <vscale x 2 x half> %out 2543} 2544 2545define <vscale x 4 x half> @zip2_f16_v4(<vscale x 4 x half> %a, <vscale x 4 x half> %b) { 2546; CHECK-LABEL: zip2_f16_v4: 2547; CHECK: // %bb.0: 2548; CHECK-NEXT: zip2 z0.s, z0.s, z1.s 2549; CHECK-NEXT: ret 2550 %out = call <vscale x 4 x half> @llvm.aarch64.sve.zip2.nxv4f16(<vscale x 4 x half> %a, 2551 <vscale x 4 x half> %b) 2552 ret <vscale x 4 x half> %out 2553} 2554 2555define <vscale x 8 x bfloat> @zip2_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 { 2556; CHECK-LABEL: zip2_bf16: 2557; CHECK: // %bb.0: 2558; CHECK-NEXT: zip2 z0.h, z0.h, z1.h 2559; CHECK-NEXT: ret 2560 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.zip2.nxv8bf16(<vscale x 8 x bfloat> %a, 2561 <vscale x 8 x bfloat> %b) 2562 ret <vscale x 8 x bfloat> %out 2563} 2564 2565define <vscale x 8 x half> @zip2_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) { 2566; CHECK-LABEL: zip2_f16: 2567; CHECK: // %bb.0: 2568; CHECK-NEXT: zip2 z0.h, z0.h, z1.h 2569; CHECK-NEXT: ret 2570 %out = call <vscale x 8 x half> @llvm.aarch64.sve.zip2.nxv8f16(<vscale x 8 x half> %a, 2571 <vscale x 8 x half> %b) 2572 ret <vscale x 8 x half> %out 2573} 2574 2575define <vscale x 2 x float> @zip2_f32_v2(<vscale x 2 x float> %a, <vscale x 2 x float> %b) { 2576; CHECK-LABEL: zip2_f32_v2: 2577; CHECK: // %bb.0: 2578; CHECK-NEXT: zip2 z0.d, z0.d, z1.d 2579; CHECK-NEXT: ret 2580 %out = call <vscale x 2 x float> @llvm.aarch64.sve.zip2.nxv2f32(<vscale x 2 x float> %a, 2581 <vscale x 2 x float> %b) 2582 ret <vscale x 2 x float> %out 2583} 2584 2585define <vscale x 4 x float> @zip2_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) { 2586; CHECK-LABEL: zip2_f32: 2587; CHECK: // %bb.0: 2588; CHECK-NEXT: zip2 z0.s, z0.s, z1.s 2589; CHECK-NEXT: ret 2590 %out = call <vscale x 4 x float> @llvm.aarch64.sve.zip2.nxv4f32(<vscale x 4 x float> %a, 2591 <vscale x 4 x float> %b) 2592 ret <vscale x 4 x float> %out 2593} 2594 2595define <vscale x 2 x double> @zip2_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) { 2596; CHECK-LABEL: zip2_f64: 2597; CHECK: // %bb.0: 2598; CHECK-NEXT: zip2 z0.d, z0.d, z1.d 2599; CHECK-NEXT: ret 2600 %out = call <vscale x 2 x double> @llvm.aarch64.sve.zip2.nxv2f64(<vscale x 2 x double> %a, 2601 <vscale x 2 x double> %b) 2602 ret <vscale x 2 x double> %out 2603} 2604 2605declare <vscale x 16 x i8> @llvm.aarch64.sve.clasta.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) 2606declare <vscale x 8 x i16> @llvm.aarch64.sve.clasta.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) 2607declare <vscale x 4 x i32> @llvm.aarch64.sve.clasta.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) 2608declare <vscale x 2 x i64> @llvm.aarch64.sve.clasta.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) 2609declare <vscale x 8 x half> @llvm.aarch64.sve.clasta.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>) 2610declare <vscale x 8 x bfloat> @llvm.aarch64.sve.clasta.nxv8bf16(<vscale x 8 x i1>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>) 2611declare <vscale x 4 x float> @llvm.aarch64.sve.clasta.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>) 2612declare <vscale x 2 x double> @llvm.aarch64.sve.clasta.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>) 2613 2614declare i8 @llvm.aarch64.sve.clasta.n.nxv16i8(<vscale x 16 x i1>, i8, <vscale x 16 x i8>) 2615declare i16 @llvm.aarch64.sve.clasta.n.nxv8i16(<vscale x 8 x i1>, i16, <vscale x 8 x i16>) 2616declare i32 @llvm.aarch64.sve.clasta.n.nxv4i32(<vscale x 4 x i1>, i32, <vscale x 4 x i32>) 2617declare i64 @llvm.aarch64.sve.clasta.n.nxv2i64(<vscale x 2 x i1>, i64, <vscale x 2 x i64>) 2618declare half @llvm.aarch64.sve.clasta.n.nxv8f16(<vscale x 8 x i1>, half, <vscale x 8 x half>) 2619declare bfloat @llvm.aarch64.sve.clasta.n.nxv8bf16(<vscale x 8 x i1>, bfloat, <vscale x 8 x bfloat>) 2620declare float @llvm.aarch64.sve.clasta.n.nxv4f32(<vscale x 4 x i1>, float, <vscale x 4 x float>) 2621declare double @llvm.aarch64.sve.clasta.n.nxv2f64(<vscale x 2 x i1>, double, <vscale x 2 x double>) 2622 2623declare <vscale x 16 x i8> @llvm.aarch64.sve.clastb.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) 2624declare <vscale x 8 x i16> @llvm.aarch64.sve.clastb.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) 2625declare <vscale x 4 x i32> @llvm.aarch64.sve.clastb.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) 2626declare <vscale x 2 x i64> @llvm.aarch64.sve.clastb.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) 2627declare <vscale x 8 x half> @llvm.aarch64.sve.clastb.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>) 2628declare <vscale x 8 x bfloat> @llvm.aarch64.sve.clastb.nxv8bf16(<vscale x 8 x i1>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>) 2629declare <vscale x 4 x float> @llvm.aarch64.sve.clastb.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>) 2630declare <vscale x 2 x double> @llvm.aarch64.sve.clastb.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>) 2631 2632declare i8 @llvm.aarch64.sve.clastb.n.nxv16i8(<vscale x 16 x i1>, i8, <vscale x 16 x i8>) 2633declare i16 @llvm.aarch64.sve.clastb.n.nxv8i16(<vscale x 8 x i1>, i16, <vscale x 8 x i16>) 2634declare i32 @llvm.aarch64.sve.clastb.n.nxv4i32(<vscale x 4 x i1>, i32, <vscale x 4 x i32>) 2635declare i64 @llvm.aarch64.sve.clastb.n.nxv2i64(<vscale x 2 x i1>, i64, <vscale x 2 x i64>) 2636declare half @llvm.aarch64.sve.clastb.n.nxv8f16(<vscale x 8 x i1>, half, <vscale x 8 x half>) 2637declare bfloat @llvm.aarch64.sve.clastb.n.nxv8bf16(<vscale x 8 x i1>, bfloat, <vscale x 8 x bfloat>) 2638declare float @llvm.aarch64.sve.clastb.n.nxv4f32(<vscale x 4 x i1>, float, <vscale x 4 x float>) 2639declare double @llvm.aarch64.sve.clastb.n.nxv2f64(<vscale x 2 x i1>, double, <vscale x 2 x double>) 2640 2641declare <vscale x 4 x i32> @llvm.aarch64.sve.compact.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>) 2642declare <vscale x 2 x i64> @llvm.aarch64.sve.compact.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>) 2643declare <vscale x 4 x float> @llvm.aarch64.sve.compact.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>) 2644declare <vscale x 2 x double> @llvm.aarch64.sve.compact.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>) 2645 2646declare <vscale x 16 x i8> @llvm.aarch64.sve.dupq.lane.nxv16i8(<vscale x 16 x i8>, i64) 2647declare <vscale x 8 x i16> @llvm.aarch64.sve.dupq.lane.nxv8i16(<vscale x 8 x i16>, i64) 2648declare <vscale x 4 x i32> @llvm.aarch64.sve.dupq.lane.nxv4i32(<vscale x 4 x i32>, i64) 2649declare <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64>, i64) 2650declare <vscale x 8 x half> @llvm.aarch64.sve.dupq.lane.nxv8f16(<vscale x 8 x half>, i64) 2651declare <vscale x 8 x bfloat> @llvm.aarch64.sve.dupq.lane.nxv8bf16(<vscale x 8 x bfloat>, i64) 2652declare <vscale x 4 x float> @llvm.aarch64.sve.dupq.lane.nxv4f32(<vscale x 4 x float>, i64) 2653declare <vscale x 2 x double> @llvm.aarch64.sve.dupq.lane.nxv2f64(<vscale x 2 x double>, i64) 2654 2655declare <vscale x 16 x i8> @llvm.aarch64.sve.ext.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, i32) 2656declare <vscale x 8 x i16> @llvm.aarch64.sve.ext.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32) 2657declare <vscale x 4 x i32> @llvm.aarch64.sve.ext.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32) 2658declare <vscale x 2 x i64> @llvm.aarch64.sve.ext.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32) 2659declare <vscale x 8 x bfloat> @llvm.aarch64.sve.ext.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, i32) 2660declare <vscale x 8 x half> @llvm.aarch64.sve.ext.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, i32) 2661declare <vscale x 4 x float> @llvm.aarch64.sve.ext.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, i32) 2662declare <vscale x 2 x double> @llvm.aarch64.sve.ext.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, i32) 2663 2664declare i8 @llvm.aarch64.sve.lasta.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>) 2665declare i16 @llvm.aarch64.sve.lasta.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>) 2666declare i32 @llvm.aarch64.sve.lasta.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>) 2667declare i64 @llvm.aarch64.sve.lasta.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>) 2668declare half @llvm.aarch64.sve.lasta.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>) 2669declare bfloat @llvm.aarch64.sve.lasta.nxv8bf16(<vscale x 8 x i1>, <vscale x 8 x bfloat>) 2670declare float @llvm.aarch64.sve.lasta.nxv2f32(<vscale x 2 x i1>, <vscale x 2 x float>) 2671declare float @llvm.aarch64.sve.lasta.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>) 2672declare double @llvm.aarch64.sve.lasta.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>) 2673 2674declare i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>) 2675declare i16 @llvm.aarch64.sve.lastb.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>) 2676declare i32 @llvm.aarch64.sve.lastb.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>) 2677declare i64 @llvm.aarch64.sve.lastb.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>) 2678declare half @llvm.aarch64.sve.lastb.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>) 2679declare bfloat @llvm.aarch64.sve.lastb.nxv8bf16(<vscale x 8 x i1>, <vscale x 8 x bfloat>) 2680declare float @llvm.aarch64.sve.lastb.nxv2f32(<vscale x 2 x i1>, <vscale x 2 x float>) 2681declare float @llvm.aarch64.sve.lastb.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>) 2682declare double @llvm.aarch64.sve.lastb.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>) 2683 2684declare <vscale x 16 x i1> @llvm.aarch64.sve.rev.nxv16i1(<vscale x 16 x i1>) 2685declare <vscale x 8 x i1> @llvm.aarch64.sve.rev.nxv8i1(<vscale x 8 x i1>) 2686declare <vscale x 4 x i1> @llvm.aarch64.sve.rev.nxv4i1(<vscale x 4 x i1>) 2687declare <vscale x 2 x i1> @llvm.aarch64.sve.rev.nxv2i1(<vscale x 2 x i1>) 2688declare <vscale x 16 x i8> @llvm.aarch64.sve.rev.nxv16i8(<vscale x 16 x i8>) 2689declare <vscale x 8 x i16> @llvm.aarch64.sve.rev.nxv8i16(<vscale x 8 x i16>) 2690declare <vscale x 4 x i32> @llvm.aarch64.sve.rev.nxv4i32(<vscale x 4 x i32>) 2691declare <vscale x 2 x i64> @llvm.aarch64.sve.rev.nxv2i64(<vscale x 2 x i64>) 2692declare <vscale x 8 x bfloat> @llvm.aarch64.sve.rev.nxv8bf16(<vscale x 8 x bfloat>) 2693declare <vscale x 8 x half> @llvm.aarch64.sve.rev.nxv8f16(<vscale x 8 x half>) 2694declare <vscale x 4 x float> @llvm.aarch64.sve.rev.nxv4f32(<vscale x 4 x float>) 2695declare <vscale x 2 x double> @llvm.aarch64.sve.rev.nxv2f64(<vscale x 2 x double>) 2696 2697declare <vscale x 16 x i1> @llvm.aarch64.sve.rev.b16(<vscale x 16 x i1>) 2698declare <vscale x 16 x i1> @llvm.aarch64.sve.rev.b32(<vscale x 16 x i1>) 2699declare <vscale x 16 x i1> @llvm.aarch64.sve.rev.b64(<vscale x 16 x i1>) 2700 2701declare <vscale x 16 x i8> @llvm.aarch64.sve.splice.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) 2702declare <vscale x 8 x i16> @llvm.aarch64.sve.splice.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) 2703declare <vscale x 4 x i32> @llvm.aarch64.sve.splice.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) 2704declare <vscale x 2 x i64> @llvm.aarch64.sve.splice.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) 2705declare <vscale x 8 x bfloat> @llvm.aarch64.sve.splice.nxv8bf16(<vscale x 8 x i1>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>) 2706declare <vscale x 8 x half> @llvm.aarch64.sve.splice.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>) 2707declare <vscale x 4 x float> @llvm.aarch64.sve.splice.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>) 2708declare <vscale x 2 x double> @llvm.aarch64.sve.splice.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>) 2709 2710declare <vscale x 8 x i16> @llvm.aarch64.sve.sunpkhi.nxv8i16(<vscale x 16 x i8>) 2711declare <vscale x 4 x i32> @llvm.aarch64.sve.sunpkhi.nxv4i32(<vscale x 8 x i16>) 2712declare <vscale x 2 x i64> @llvm.aarch64.sve.sunpkhi.nxv2i64(<vscale x 4 x i32>) 2713 2714declare <vscale x 8 x i16> @llvm.aarch64.sve.sunpklo.nxv8i16(<vscale x 16 x i8>) 2715declare <vscale x 4 x i32> @llvm.aarch64.sve.sunpklo.nxv4i32(<vscale x 8 x i16>) 2716declare <vscale x 2 x i64> @llvm.aarch64.sve.sunpklo.nxv2i64(<vscale x 4 x i32>) 2717 2718declare <vscale x 16 x i8> @llvm.aarch64.sve.tbl.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>) 2719declare <vscale x 8 x i16> @llvm.aarch64.sve.tbl.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>) 2720declare <vscale x 4 x i32> @llvm.aarch64.sve.tbl.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>) 2721declare <vscale x 2 x i64> @llvm.aarch64.sve.tbl.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>) 2722declare <vscale x 8 x half> @llvm.aarch64.sve.tbl.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i16>) 2723declare <vscale x 8 x bfloat> @llvm.aarch64.sve.tbl.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x i16>) 2724declare <vscale x 4 x float> @llvm.aarch64.sve.tbl.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i32>) 2725declare <vscale x 2 x double> @llvm.aarch64.sve.tbl.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i64>) 2726 2727declare <vscale x 8 x i16> @llvm.aarch64.sve.uunpkhi.nxv8i16(<vscale x 16 x i8>) 2728declare <vscale x 4 x i32> @llvm.aarch64.sve.uunpkhi.nxv4i32(<vscale x 8 x i16>) 2729declare <vscale x 2 x i64> @llvm.aarch64.sve.uunpkhi.nxv2i64(<vscale x 4 x i32>) 2730 2731declare <vscale x 8 x i16> @llvm.aarch64.sve.uunpklo.nxv8i16(<vscale x 16 x i8>) 2732declare <vscale x 4 x i32> @llvm.aarch64.sve.uunpklo.nxv4i32(<vscale x 8 x i16>) 2733declare <vscale x 2 x i64> @llvm.aarch64.sve.uunpklo.nxv2i64(<vscale x 4 x i32>) 2734 2735declare <vscale x 16 x i1> @llvm.aarch64.sve.trn1.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>) 2736declare <vscale x 8 x i1> @llvm.aarch64.sve.trn1.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>) 2737declare <vscale x 4 x i1> @llvm.aarch64.sve.trn1.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>) 2738declare <vscale x 2 x i1> @llvm.aarch64.sve.trn1.nxv2i1(<vscale x 2 x i1>, <vscale x 2 x i1>) 2739declare <vscale x 16 x i8> @llvm.aarch64.sve.trn1.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>) 2740declare <vscale x 8 x i16> @llvm.aarch64.sve.trn1.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>) 2741declare <vscale x 4 x i32> @llvm.aarch64.sve.trn1.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>) 2742declare <vscale x 2 x i64> @llvm.aarch64.sve.trn1.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>) 2743declare <vscale x 2 x half> @llvm.aarch64.sve.trn1.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>) 2744declare <vscale x 4 x half> @llvm.aarch64.sve.trn1.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>) 2745declare <vscale x 8 x bfloat> @llvm.aarch64.sve.trn1.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>) 2746declare <vscale x 8 x half> @llvm.aarch64.sve.trn1.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>) 2747declare <vscale x 2 x float> @llvm.aarch64.sve.trn1.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>) 2748declare <vscale x 4 x float> @llvm.aarch64.sve.trn1.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>) 2749declare <vscale x 2 x double> @llvm.aarch64.sve.trn1.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>) 2750 2751declare <vscale x 16 x i1> @llvm.aarch64.sve.trn1.b16(<vscale x 16 x i1>, <vscale x 16 x i1>) 2752declare <vscale x 16 x i1> @llvm.aarch64.sve.trn1.b32(<vscale x 16 x i1>, <vscale x 16 x i1>) 2753declare <vscale x 16 x i1> @llvm.aarch64.sve.trn1.b64(<vscale x 16 x i1>, <vscale x 16 x i1>) 2754 2755declare <vscale x 16 x i1> @llvm.aarch64.sve.trn2.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>) 2756declare <vscale x 8 x i1> @llvm.aarch64.sve.trn2.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>) 2757declare <vscale x 4 x i1> @llvm.aarch64.sve.trn2.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>) 2758declare <vscale x 2 x i1> @llvm.aarch64.sve.trn2.nxv2i1(<vscale x 2 x i1>, <vscale x 2 x i1>) 2759declare <vscale x 16 x i8> @llvm.aarch64.sve.trn2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>) 2760declare <vscale x 8 x i16> @llvm.aarch64.sve.trn2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>) 2761declare <vscale x 4 x i32> @llvm.aarch64.sve.trn2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>) 2762declare <vscale x 2 x i64> @llvm.aarch64.sve.trn2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>) 2763declare <vscale x 2 x half> @llvm.aarch64.sve.trn2.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>) 2764declare <vscale x 4 x half> @llvm.aarch64.sve.trn2.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>) 2765declare <vscale x 8 x bfloat> @llvm.aarch64.sve.trn2.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>) 2766declare <vscale x 8 x half> @llvm.aarch64.sve.trn2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>) 2767declare <vscale x 2 x float> @llvm.aarch64.sve.trn2.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>) 2768declare <vscale x 4 x float> @llvm.aarch64.sve.trn2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>) 2769declare <vscale x 2 x double> @llvm.aarch64.sve.trn2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>) 2770 2771declare <vscale x 16 x i1> @llvm.aarch64.sve.trn2.b16(<vscale x 16 x i1>, <vscale x 16 x i1>) 2772declare <vscale x 16 x i1> @llvm.aarch64.sve.trn2.b32(<vscale x 16 x i1>, <vscale x 16 x i1>) 2773declare <vscale x 16 x i1> @llvm.aarch64.sve.trn2.b64(<vscale x 16 x i1>, <vscale x 16 x i1>) 2774 2775declare <vscale x 16 x i1> @llvm.aarch64.sve.uzp1.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>) 2776declare <vscale x 8 x i1> @llvm.aarch64.sve.uzp1.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>) 2777declare <vscale x 4 x i1> @llvm.aarch64.sve.uzp1.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>) 2778declare <vscale x 2 x i1> @llvm.aarch64.sve.uzp1.nxv2i1(<vscale x 2 x i1>, <vscale x 2 x i1>) 2779declare <vscale x 16 x i8> @llvm.aarch64.sve.uzp1.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>) 2780declare <vscale x 8 x i16> @llvm.aarch64.sve.uzp1.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>) 2781declare <vscale x 4 x i32> @llvm.aarch64.sve.uzp1.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>) 2782declare <vscale x 2 x i64> @llvm.aarch64.sve.uzp1.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>) 2783declare <vscale x 2 x half> @llvm.aarch64.sve.uzp1.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>) 2784declare <vscale x 4 x half> @llvm.aarch64.sve.uzp1.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>) 2785declare <vscale x 8 x bfloat> @llvm.aarch64.sve.uzp1.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>) 2786declare <vscale x 8 x half> @llvm.aarch64.sve.uzp1.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>) 2787declare <vscale x 2 x float> @llvm.aarch64.sve.uzp1.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>) 2788declare <vscale x 4 x float> @llvm.aarch64.sve.uzp1.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>) 2789declare <vscale x 2 x double> @llvm.aarch64.sve.uzp1.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>) 2790 2791declare <vscale x 16 x i1> @llvm.aarch64.sve.uzp1.b16(<vscale x 16 x i1>, <vscale x 16 x i1>) 2792declare <vscale x 16 x i1> @llvm.aarch64.sve.uzp1.b32(<vscale x 16 x i1>, <vscale x 16 x i1>) 2793declare <vscale x 16 x i1> @llvm.aarch64.sve.uzp1.b64(<vscale x 16 x i1>, <vscale x 16 x i1>) 2794 2795declare <vscale x 16 x i1> @llvm.aarch64.sve.uzp2.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>) 2796declare <vscale x 8 x i1> @llvm.aarch64.sve.uzp2.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>) 2797declare <vscale x 4 x i1> @llvm.aarch64.sve.uzp2.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>) 2798declare <vscale x 2 x i1> @llvm.aarch64.sve.uzp2.nxv2i1(<vscale x 2 x i1>, <vscale x 2 x i1>) 2799declare <vscale x 16 x i8> @llvm.aarch64.sve.uzp2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>) 2800declare <vscale x 8 x i16> @llvm.aarch64.sve.uzp2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>) 2801declare <vscale x 4 x i32> @llvm.aarch64.sve.uzp2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>) 2802declare <vscale x 2 x i64> @llvm.aarch64.sve.uzp2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>) 2803declare <vscale x 2 x half> @llvm.aarch64.sve.uzp2.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>) 2804declare <vscale x 4 x half> @llvm.aarch64.sve.uzp2.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>) 2805declare <vscale x 8 x bfloat> @llvm.aarch64.sve.uzp2.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>) 2806declare <vscale x 8 x half> @llvm.aarch64.sve.uzp2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>) 2807declare <vscale x 2 x float> @llvm.aarch64.sve.uzp2.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>) 2808declare <vscale x 4 x float> @llvm.aarch64.sve.uzp2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>) 2809declare <vscale x 2 x double> @llvm.aarch64.sve.uzp2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>) 2810 2811declare <vscale x 16 x i1> @llvm.aarch64.sve.uzp2.b16(<vscale x 16 x i1>, <vscale x 16 x i1>) 2812declare <vscale x 16 x i1> @llvm.aarch64.sve.uzp2.b32(<vscale x 16 x i1>, <vscale x 16 x i1>) 2813declare <vscale x 16 x i1> @llvm.aarch64.sve.uzp2.b64(<vscale x 16 x i1>, <vscale x 16 x i1>) 2814 2815declare <vscale x 16 x i1> @llvm.aarch64.sve.zip1.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>) 2816declare <vscale x 8 x i1> @llvm.aarch64.sve.zip1.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>) 2817declare <vscale x 4 x i1> @llvm.aarch64.sve.zip1.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>) 2818declare <vscale x 2 x i1> @llvm.aarch64.sve.zip1.nxv2i1(<vscale x 2 x i1>, <vscale x 2 x i1>) 2819declare <vscale x 16 x i8> @llvm.aarch64.sve.zip1.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>) 2820declare <vscale x 8 x i16> @llvm.aarch64.sve.zip1.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>) 2821declare <vscale x 4 x i32> @llvm.aarch64.sve.zip1.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>) 2822declare <vscale x 2 x i64> @llvm.aarch64.sve.zip1.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>) 2823declare <vscale x 2 x half> @llvm.aarch64.sve.zip1.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>) 2824declare <vscale x 4 x half> @llvm.aarch64.sve.zip1.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>) 2825declare <vscale x 8 x bfloat> @llvm.aarch64.sve.zip1.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>) 2826declare <vscale x 8 x half> @llvm.aarch64.sve.zip1.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>) 2827declare <vscale x 2 x float> @llvm.aarch64.sve.zip1.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>) 2828declare <vscale x 4 x float> @llvm.aarch64.sve.zip1.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>) 2829declare <vscale x 2 x double> @llvm.aarch64.sve.zip1.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>) 2830 2831declare <vscale x 16 x i1> @llvm.aarch64.sve.zip1.b16(<vscale x 16 x i1>, <vscale x 16 x i1>) 2832declare <vscale x 16 x i1> @llvm.aarch64.sve.zip1.b32(<vscale x 16 x i1>, <vscale x 16 x i1>) 2833declare <vscale x 16 x i1> @llvm.aarch64.sve.zip1.b64(<vscale x 16 x i1>, <vscale x 16 x i1>) 2834 2835declare <vscale x 16 x i1> @llvm.aarch64.sve.zip2.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>) 2836declare <vscale x 8 x i1> @llvm.aarch64.sve.zip2.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>) 2837declare <vscale x 4 x i1> @llvm.aarch64.sve.zip2.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>) 2838declare <vscale x 2 x i1> @llvm.aarch64.sve.zip2.nxv2i1(<vscale x 2 x i1>, <vscale x 2 x i1>) 2839declare <vscale x 16 x i8> @llvm.aarch64.sve.zip2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>) 2840declare <vscale x 8 x i16> @llvm.aarch64.sve.zip2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>) 2841declare <vscale x 4 x i32> @llvm.aarch64.sve.zip2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>) 2842declare <vscale x 2 x i64> @llvm.aarch64.sve.zip2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>) 2843declare <vscale x 2 x half> @llvm.aarch64.sve.zip2.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>) 2844declare <vscale x 4 x half> @llvm.aarch64.sve.zip2.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>) 2845declare <vscale x 8 x bfloat> @llvm.aarch64.sve.zip2.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>) 2846declare <vscale x 8 x half> @llvm.aarch64.sve.zip2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>) 2847declare <vscale x 2 x float> @llvm.aarch64.sve.zip2.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>) 2848declare <vscale x 4 x float> @llvm.aarch64.sve.zip2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>) 2849declare <vscale x 2 x double> @llvm.aarch64.sve.zip2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>) 2850 2851declare <vscale x 16 x i1> @llvm.aarch64.sve.zip2.b16(<vscale x 16 x i1>, <vscale x 16 x i1>) 2852declare <vscale x 16 x i1> @llvm.aarch64.sve.zip2.b32(<vscale x 16 x i1>, <vscale x 16 x i1>) 2853declare <vscale x 16 x i1> @llvm.aarch64.sve.zip2.b64(<vscale x 16 x i1>, <vscale x 16 x i1>) 2854 2855declare <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v2f64(<vscale x 2 x double>, <2 x double>, i64) 2856declare <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float>, <4 x float>, i64) 2857declare <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v8f16(<vscale x 8 x half>, <8 x half>, i64) 2858declare <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64>, <2 x i64>, i64) 2859declare <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32>, <4 x i32>, i64) 2860declare <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16>, <8 x i16>, i64) 2861declare <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8>, <16 x i8>, i64) 2862declare <vscale x 8 x bfloat> @llvm.vector.insert.nxv8bf16.v8bf16(<vscale x 8 x bfloat>, <8 x bfloat>, i64) 2863 2864; +bf16 is required for the bfloat version. 2865attributes #0 = { "target-features"="+sve,+bf16" } 2866