1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+sve,+f64mm < %s | FileCheck %s 3 4; 5; TRN1Q 6; 7 8define <vscale x 16 x i8> @trn1_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) nounwind { 9; CHECK-LABEL: trn1_i8: 10; CHECK: // %bb.0: 11; CHECK-NEXT: trn1 z0.q, z0.q, z1.q 12; CHECK-NEXT: ret 13 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.trn1q.nxv16i8(<vscale x 16 x i8> %a, 14 <vscale x 16 x i8> %b) 15 ret <vscale x 16 x i8> %out 16} 17 18define <vscale x 8 x i16> @trn1_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) nounwind { 19; CHECK-LABEL: trn1_i16: 20; CHECK: // %bb.0: 21; CHECK-NEXT: trn1 z0.q, z0.q, z1.q 22; CHECK-NEXT: ret 23 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.trn1q.nxv8i16(<vscale x 8 x i16> %a, 24 <vscale x 8 x i16> %b) 25 ret <vscale x 8 x i16> %out 26} 27 28define <vscale x 4 x i32> @trn1_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) nounwind { 29; CHECK-LABEL: trn1_i32: 30; CHECK: // %bb.0: 31; CHECK-NEXT: trn1 z0.q, z0.q, z1.q 32; CHECK-NEXT: ret 33 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.trn1q.nxv4i32(<vscale x 4 x i32> %a, 34 <vscale x 4 x i32> %b) 35 ret <vscale x 4 x i32> %out 36} 37 38define <vscale x 2 x i64> @trn1_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) nounwind { 39; CHECK-LABEL: trn1_i64: 40; CHECK: // %bb.0: 41; CHECK-NEXT: trn1 z0.q, z0.q, z1.q 42; CHECK-NEXT: ret 43 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.trn1q.nxv2i64(<vscale x 2 x i64> %a, 44 <vscale x 2 x i64> %b) 45 ret <vscale x 2 x i64> %out 46} 47 48define <vscale x 8 x half> @trn1_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) nounwind { 49; CHECK-LABEL: trn1_f16: 50; CHECK: // %bb.0: 51; CHECK-NEXT: trn1 z0.q, z0.q, z1.q 52; CHECK-NEXT: ret 53 %out = call <vscale x 8 x half> @llvm.aarch64.sve.trn1q.nxv8f16(<vscale x 8 x half> %a, 54 <vscale x 8 x half> %b) 55 ret <vscale x 8 x half> %out 56} 57 58define <vscale x 8 x bfloat> @trn1_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) nounwind #0 { 59; CHECK-LABEL: trn1_bf16: 60; CHECK: // %bb.0: 61; CHECK-NEXT: trn1 z0.q, z0.q, z1.q 62; CHECK-NEXT: ret 63 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.trn1q.nxv8bf16(<vscale x 8 x bfloat> %a, 64 <vscale x 8 x bfloat> %b) 65 ret <vscale x 8 x bfloat> %out 66} 67 68define <vscale x 4 x float> @trn1_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) nounwind { 69; CHECK-LABEL: trn1_f32: 70; CHECK: // %bb.0: 71; CHECK-NEXT: trn1 z0.q, z0.q, z1.q 72; CHECK-NEXT: ret 73 %out = call <vscale x 4 x float> @llvm.aarch64.sve.trn1q.nxv4f32(<vscale x 4 x float> %a, 74 <vscale x 4 x float> %b) 75 ret <vscale x 4 x float> %out 76} 77 78define <vscale x 2 x double> @trn1_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) nounwind { 79; CHECK-LABEL: trn1_f64: 80; CHECK: // %bb.0: 81; CHECK-NEXT: trn1 z0.q, z0.q, z1.q 82; CHECK-NEXT: ret 83 %out = call <vscale x 2 x double> @llvm.aarch64.sve.trn1q.nxv2f64(<vscale x 2 x double> %a, 84 <vscale x 2 x double> %b) 85 ret <vscale x 2 x double> %out 86} 87 88; 89; TRN2Q 90; 91 92define <vscale x 16 x i8> @trn2_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) nounwind { 93; CHECK-LABEL: trn2_i8: 94; CHECK: // %bb.0: 95; CHECK-NEXT: trn2 z0.q, z0.q, z1.q 96; CHECK-NEXT: ret 97 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.trn2q.nxv16i8(<vscale x 16 x i8> %a, 98 <vscale x 16 x i8> %b) 99 ret <vscale x 16 x i8> %out 100} 101 102define <vscale x 8 x i16> @trn2_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) nounwind { 103; CHECK-LABEL: trn2_i16: 104; CHECK: // %bb.0: 105; CHECK-NEXT: trn2 z0.q, z0.q, z1.q 106; CHECK-NEXT: ret 107 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.trn2q.nxv8i16(<vscale x 8 x i16> %a, 108 <vscale x 8 x i16> %b) 109 ret <vscale x 8 x i16> %out 110} 111 112define <vscale x 4 x i32> @trn2_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) nounwind { 113; CHECK-LABEL: trn2_i32: 114; CHECK: // %bb.0: 115; CHECK-NEXT: trn2 z0.q, z0.q, z1.q 116; CHECK-NEXT: ret 117 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.trn2q.nxv4i32(<vscale x 4 x i32> %a, 118 <vscale x 4 x i32> %b) 119 ret <vscale x 4 x i32> %out 120} 121 122define <vscale x 2 x i64> @trn2_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) nounwind { 123; CHECK-LABEL: trn2_i64: 124; CHECK: // %bb.0: 125; CHECK-NEXT: trn2 z0.q, z0.q, z1.q 126; CHECK-NEXT: ret 127 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.trn2q.nxv2i64(<vscale x 2 x i64> %a, 128 <vscale x 2 x i64> %b) 129 ret <vscale x 2 x i64> %out 130} 131 132define <vscale x 8 x half> @trn2_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) nounwind { 133; CHECK-LABEL: trn2_f16: 134; CHECK: // %bb.0: 135; CHECK-NEXT: trn2 z0.q, z0.q, z1.q 136; CHECK-NEXT: ret 137 %out = call <vscale x 8 x half> @llvm.aarch64.sve.trn2q.nxv8f16(<vscale x 8 x half> %a, 138 <vscale x 8 x half> %b) 139 ret <vscale x 8 x half> %out 140} 141 142define <vscale x 8 x bfloat> @trn2_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) nounwind #0 { 143; CHECK-LABEL: trn2_bf16: 144; CHECK: // %bb.0: 145; CHECK-NEXT: trn2 z0.q, z0.q, z1.q 146; CHECK-NEXT: ret 147 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.trn2q.nxv8bf16(<vscale x 8 x bfloat> %a, 148 <vscale x 8 x bfloat> %b) 149 ret <vscale x 8 x bfloat> %out 150} 151 152define <vscale x 4 x float> @trn2_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) nounwind { 153; CHECK-LABEL: trn2_f32: 154; CHECK: // %bb.0: 155; CHECK-NEXT: trn2 z0.q, z0.q, z1.q 156; CHECK-NEXT: ret 157 %out = call <vscale x 4 x float> @llvm.aarch64.sve.trn2q.nxv4f32(<vscale x 4 x float> %a, 158 <vscale x 4 x float> %b) 159 ret <vscale x 4 x float> %out 160} 161 162define <vscale x 2 x double> @trn2_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) nounwind { 163; CHECK-LABEL: trn2_f64: 164; CHECK: // %bb.0: 165; CHECK-NEXT: trn2 z0.q, z0.q, z1.q 166; CHECK-NEXT: ret 167 %out = call <vscale x 2 x double> @llvm.aarch64.sve.trn2q.nxv2f64(<vscale x 2 x double> %a, 168 <vscale x 2 x double> %b) 169 ret <vscale x 2 x double> %out 170} 171 172; 173; UZP1Q 174; 175 176define <vscale x 16 x i8> @uzp1_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) nounwind { 177; CHECK-LABEL: uzp1_i8: 178; CHECK: // %bb.0: 179; CHECK-NEXT: uzp1 z0.q, z0.q, z1.q 180; CHECK-NEXT: ret 181 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uzp1q.nxv16i8(<vscale x 16 x i8> %a, 182 <vscale x 16 x i8> %b) 183 ret <vscale x 16 x i8> %out 184} 185 186define <vscale x 8 x i16> @uzp1_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) nounwind { 187; CHECK-LABEL: uzp1_i16: 188; CHECK: // %bb.0: 189; CHECK-NEXT: uzp1 z0.q, z0.q, z1.q 190; CHECK-NEXT: ret 191 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uzp1q.nxv8i16(<vscale x 8 x i16> %a, 192 <vscale x 8 x i16> %b) 193 ret <vscale x 8 x i16> %out 194} 195 196define <vscale x 4 x i32> @uzp1_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) nounwind { 197; CHECK-LABEL: uzp1_i32: 198; CHECK: // %bb.0: 199; CHECK-NEXT: uzp1 z0.q, z0.q, z1.q 200; CHECK-NEXT: ret 201 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uzp1q.nxv4i32(<vscale x 4 x i32> %a, 202 <vscale x 4 x i32> %b) 203 ret <vscale x 4 x i32> %out 204} 205 206define <vscale x 2 x i64> @uzp1_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) nounwind { 207; CHECK-LABEL: uzp1_i64: 208; CHECK: // %bb.0: 209; CHECK-NEXT: uzp1 z0.q, z0.q, z1.q 210; CHECK-NEXT: ret 211 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uzp1q.nxv2i64(<vscale x 2 x i64> %a, 212 <vscale x 2 x i64> %b) 213 ret <vscale x 2 x i64> %out 214} 215 216define <vscale x 8 x half> @uzp1_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) nounwind { 217; CHECK-LABEL: uzp1_f16: 218; CHECK: // %bb.0: 219; CHECK-NEXT: uzp1 z0.q, z0.q, z1.q 220; CHECK-NEXT: ret 221 %out = call <vscale x 8 x half> @llvm.aarch64.sve.uzp1q.nxv8f16(<vscale x 8 x half> %a, 222 <vscale x 8 x half> %b) 223 ret <vscale x 8 x half> %out 224} 225 226define <vscale x 8 x bfloat> @uzp1_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) nounwind #0 { 227; CHECK-LABEL: uzp1_bf16: 228; CHECK: // %bb.0: 229; CHECK-NEXT: uzp1 z0.q, z0.q, z1.q 230; CHECK-NEXT: ret 231 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.uzp1q.nxv8bf16(<vscale x 8 x bfloat> %a, 232 <vscale x 8 x bfloat> %b) 233 ret <vscale x 8 x bfloat> %out 234} 235 236define <vscale x 4 x float> @uzp1_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) nounwind { 237; CHECK-LABEL: uzp1_f32: 238; CHECK: // %bb.0: 239; CHECK-NEXT: uzp1 z0.q, z0.q, z1.q 240; CHECK-NEXT: ret 241 %out = call <vscale x 4 x float> @llvm.aarch64.sve.uzp1q.nxv4f32(<vscale x 4 x float> %a, 242 <vscale x 4 x float> %b) 243 ret <vscale x 4 x float> %out 244} 245 246define <vscale x 2 x double> @uzp1_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) nounwind { 247; CHECK-LABEL: uzp1_f64: 248; CHECK: // %bb.0: 249; CHECK-NEXT: uzp1 z0.q, z0.q, z1.q 250; CHECK-NEXT: ret 251 %out = call <vscale x 2 x double> @llvm.aarch64.sve.uzp1q.nxv2f64(<vscale x 2 x double> %a, 252 <vscale x 2 x double> %b) 253 ret <vscale x 2 x double> %out 254} 255 256; 257; UZP2Q 258; 259 260define <vscale x 16 x i8> @uzp2_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) nounwind { 261; CHECK-LABEL: uzp2_i8: 262; CHECK: // %bb.0: 263; CHECK-NEXT: uzp2 z0.q, z0.q, z1.q 264; CHECK-NEXT: ret 265 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uzp2q.nxv16i8(<vscale x 16 x i8> %a, 266 <vscale x 16 x i8> %b) 267 ret <vscale x 16 x i8> %out 268} 269 270define <vscale x 8 x i16> @uzp2_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) nounwind { 271; CHECK-LABEL: uzp2_i16: 272; CHECK: // %bb.0: 273; CHECK-NEXT: uzp2 z0.q, z0.q, z1.q 274; CHECK-NEXT: ret 275 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uzp2q.nxv8i16(<vscale x 8 x i16> %a, 276 <vscale x 8 x i16> %b) 277 ret <vscale x 8 x i16> %out 278} 279 280define <vscale x 4 x i32> @uzp2_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) nounwind { 281; CHECK-LABEL: uzp2_i32: 282; CHECK: // %bb.0: 283; CHECK-NEXT: uzp2 z0.q, z0.q, z1.q 284; CHECK-NEXT: ret 285 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uzp2q.nxv4i32(<vscale x 4 x i32> %a, 286 <vscale x 4 x i32> %b) 287 ret <vscale x 4 x i32> %out 288} 289 290define <vscale x 2 x i64> @uzp2_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) nounwind { 291; CHECK-LABEL: uzp2_i64: 292; CHECK: // %bb.0: 293; CHECK-NEXT: uzp2 z0.q, z0.q, z1.q 294; CHECK-NEXT: ret 295 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uzp2q.nxv2i64(<vscale x 2 x i64> %a, 296 <vscale x 2 x i64> %b) 297 ret <vscale x 2 x i64> %out 298} 299 300define <vscale x 8 x half> @uzp2_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) nounwind { 301; CHECK-LABEL: uzp2_f16: 302; CHECK: // %bb.0: 303; CHECK-NEXT: uzp2 z0.q, z0.q, z1.q 304; CHECK-NEXT: ret 305 %out = call <vscale x 8 x half> @llvm.aarch64.sve.uzp2q.nxv8f16(<vscale x 8 x half> %a, 306 <vscale x 8 x half> %b) 307 ret <vscale x 8 x half> %out 308} 309 310define <vscale x 8 x bfloat> @uzp2_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) nounwind #0 { 311; CHECK-LABEL: uzp2_bf16: 312; CHECK: // %bb.0: 313; CHECK-NEXT: uzp2 z0.q, z0.q, z1.q 314; CHECK-NEXT: ret 315 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.uzp2q.nxv8bf16(<vscale x 8 x bfloat> %a, 316 <vscale x 8 x bfloat> %b) 317 ret <vscale x 8 x bfloat> %out 318} 319 320define <vscale x 4 x float> @uzp2_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) nounwind { 321; CHECK-LABEL: uzp2_f32: 322; CHECK: // %bb.0: 323; CHECK-NEXT: uzp2 z0.q, z0.q, z1.q 324; CHECK-NEXT: ret 325 %out = call <vscale x 4 x float> @llvm.aarch64.sve.uzp2q.nxv4f32(<vscale x 4 x float> %a, 326 <vscale x 4 x float> %b) 327 ret <vscale x 4 x float> %out 328} 329 330define <vscale x 2 x double> @uzp2_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) nounwind { 331; CHECK-LABEL: uzp2_f64: 332; CHECK: // %bb.0: 333; CHECK-NEXT: uzp2 z0.q, z0.q, z1.q 334; CHECK-NEXT: ret 335 %out = call <vscale x 2 x double> @llvm.aarch64.sve.uzp2q.nxv2f64(<vscale x 2 x double> %a, 336 <vscale x 2 x double> %b) 337 ret <vscale x 2 x double> %out 338} 339 340; 341; ZIP1Q 342; 343 344define <vscale x 16 x i8> @zip1_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) nounwind { 345; CHECK-LABEL: zip1_i8: 346; CHECK: // %bb.0: 347; CHECK-NEXT: zip1 z0.q, z0.q, z1.q 348; CHECK-NEXT: ret 349 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.zip1q.nxv16i8(<vscale x 16 x i8> %a, 350 <vscale x 16 x i8> %b) 351 ret <vscale x 16 x i8> %out 352} 353 354define <vscale x 8 x i16> @zip1_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) nounwind { 355; CHECK-LABEL: zip1_i16: 356; CHECK: // %bb.0: 357; CHECK-NEXT: zip1 z0.q, z0.q, z1.q 358; CHECK-NEXT: ret 359 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.zip1q.nxv8i16(<vscale x 8 x i16> %a, 360 <vscale x 8 x i16> %b) 361 ret <vscale x 8 x i16> %out 362} 363 364define <vscale x 4 x i32> @zip1_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) nounwind { 365; CHECK-LABEL: zip1_i32: 366; CHECK: // %bb.0: 367; CHECK-NEXT: zip1 z0.q, z0.q, z1.q 368; CHECK-NEXT: ret 369 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.zip1q.nxv4i32(<vscale x 4 x i32> %a, 370 <vscale x 4 x i32> %b) 371 ret <vscale x 4 x i32> %out 372} 373 374define <vscale x 2 x i64> @zip1_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) nounwind { 375; CHECK-LABEL: zip1_i64: 376; CHECK: // %bb.0: 377; CHECK-NEXT: zip1 z0.q, z0.q, z1.q 378; CHECK-NEXT: ret 379 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.zip1q.nxv2i64(<vscale x 2 x i64> %a, 380 <vscale x 2 x i64> %b) 381 ret <vscale x 2 x i64> %out 382} 383 384define <vscale x 8 x half> @zip1_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) nounwind { 385; CHECK-LABEL: zip1_f16: 386; CHECK: // %bb.0: 387; CHECK-NEXT: zip1 z0.q, z0.q, z1.q 388; CHECK-NEXT: ret 389 %out = call <vscale x 8 x half> @llvm.aarch64.sve.zip1q.nxv8f16(<vscale x 8 x half> %a, 390 <vscale x 8 x half> %b) 391 ret <vscale x 8 x half> %out 392} 393 394define <vscale x 8 x bfloat> @zip1_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) nounwind #0 { 395; CHECK-LABEL: zip1_bf16: 396; CHECK: // %bb.0: 397; CHECK-NEXT: zip1 z0.q, z0.q, z1.q 398; CHECK-NEXT: ret 399 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.zip1q.nxv8bf16(<vscale x 8 x bfloat> %a, 400 <vscale x 8 x bfloat> %b) 401 ret <vscale x 8 x bfloat> %out 402} 403 404define <vscale x 4 x float> @zip1_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) nounwind { 405; CHECK-LABEL: zip1_f32: 406; CHECK: // %bb.0: 407; CHECK-NEXT: zip1 z0.q, z0.q, z1.q 408; CHECK-NEXT: ret 409 %out = call <vscale x 4 x float> @llvm.aarch64.sve.zip1q.nxv4f32(<vscale x 4 x float> %a, 410 <vscale x 4 x float> %b) 411 ret <vscale x 4 x float> %out 412} 413 414define <vscale x 2 x double> @zip1_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) nounwind { 415; CHECK-LABEL: zip1_f64: 416; CHECK: // %bb.0: 417; CHECK-NEXT: zip1 z0.q, z0.q, z1.q 418; CHECK-NEXT: ret 419 %out = call <vscale x 2 x double> @llvm.aarch64.sve.zip1q.nxv2f64(<vscale x 2 x double> %a, 420 <vscale x 2 x double> %b) 421 ret <vscale x 2 x double> %out 422} 423 424; 425; ZIP2Q 426; 427 428define <vscale x 16 x i8> @zip2_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) nounwind { 429; CHECK-LABEL: zip2_i8: 430; CHECK: // %bb.0: 431; CHECK-NEXT: zip2 z0.q, z0.q, z1.q 432; CHECK-NEXT: ret 433 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.zip2q.nxv16i8(<vscale x 16 x i8> %a, 434 <vscale x 16 x i8> %b) 435 ret <vscale x 16 x i8> %out 436} 437 438define <vscale x 8 x i16> @zip2_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) nounwind { 439; CHECK-LABEL: zip2_i16: 440; CHECK: // %bb.0: 441; CHECK-NEXT: zip2 z0.q, z0.q, z1.q 442; CHECK-NEXT: ret 443 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.zip2q.nxv8i16(<vscale x 8 x i16> %a, 444 <vscale x 8 x i16> %b) 445 ret <vscale x 8 x i16> %out 446} 447 448define <vscale x 4 x i32> @zip2_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) nounwind { 449; CHECK-LABEL: zip2_i32: 450; CHECK: // %bb.0: 451; CHECK-NEXT: zip2 z0.q, z0.q, z1.q 452; CHECK-NEXT: ret 453 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.zip2q.nxv4i32(<vscale x 4 x i32> %a, 454 <vscale x 4 x i32> %b) 455 ret <vscale x 4 x i32> %out 456} 457 458define <vscale x 2 x i64> @zip2_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) nounwind { 459; CHECK-LABEL: zip2_i64: 460; CHECK: // %bb.0: 461; CHECK-NEXT: zip2 z0.q, z0.q, z1.q 462; CHECK-NEXT: ret 463 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.zip2q.nxv2i64(<vscale x 2 x i64> %a, 464 <vscale x 2 x i64> %b) 465 ret <vscale x 2 x i64> %out 466} 467 468define <vscale x 8 x half> @zip2_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) nounwind { 469; CHECK-LABEL: zip2_f16: 470; CHECK: // %bb.0: 471; CHECK-NEXT: zip2 z0.q, z0.q, z1.q 472; CHECK-NEXT: ret 473 %out = call <vscale x 8 x half> @llvm.aarch64.sve.zip2q.nxv8f16(<vscale x 8 x half> %a, 474 <vscale x 8 x half> %b) 475 ret <vscale x 8 x half> %out 476} 477 478define <vscale x 8 x bfloat> @zip2_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) nounwind #0 { 479; CHECK-LABEL: zip2_bf16: 480; CHECK: // %bb.0: 481; CHECK-NEXT: zip2 z0.q, z0.q, z1.q 482; CHECK-NEXT: ret 483 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.zip2q.nxv8bf16(<vscale x 8 x bfloat> %a, 484 <vscale x 8 x bfloat> %b) 485 ret <vscale x 8 x bfloat> %out 486} 487 488define <vscale x 4 x float> @zip2_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) nounwind { 489; CHECK-LABEL: zip2_f32: 490; CHECK: // %bb.0: 491; CHECK-NEXT: zip2 z0.q, z0.q, z1.q 492; CHECK-NEXT: ret 493 %out = call <vscale x 4 x float> @llvm.aarch64.sve.zip2q.nxv4f32(<vscale x 4 x float> %a, 494 <vscale x 4 x float> %b) 495 ret <vscale x 4 x float> %out 496} 497 498define <vscale x 2 x double> @zip2_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) nounwind { 499; CHECK-LABEL: zip2_f64: 500; CHECK: // %bb.0: 501; CHECK-NEXT: zip2 z0.q, z0.q, z1.q 502; CHECK-NEXT: ret 503 %out = call <vscale x 2 x double> @llvm.aarch64.sve.zip2q.nxv2f64(<vscale x 2 x double> %a, 504 <vscale x 2 x double> %b) 505 ret <vscale x 2 x double> %out 506} 507 508 509declare <vscale x 2 x double> @llvm.aarch64.sve.trn1q.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>) 510declare <vscale x 2 x i64> @llvm.aarch64.sve.trn1q.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>) 511declare <vscale x 4 x float> @llvm.aarch64.sve.trn1q.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>) 512declare <vscale x 4 x i32> @llvm.aarch64.sve.trn1q.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>) 513declare <vscale x 8 x bfloat> @llvm.aarch64.sve.trn1q.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>) 514declare <vscale x 8 x half> @llvm.aarch64.sve.trn1q.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>) 515declare <vscale x 8 x i16> @llvm.aarch64.sve.trn1q.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>) 516declare <vscale x 16 x i8> @llvm.aarch64.sve.trn1q.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>) 517 518declare <vscale x 2 x double> @llvm.aarch64.sve.trn2q.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>) 519declare <vscale x 2 x i64> @llvm.aarch64.sve.trn2q.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>) 520declare <vscale x 4 x float> @llvm.aarch64.sve.trn2q.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>) 521declare <vscale x 4 x i32> @llvm.aarch64.sve.trn2q.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>) 522declare <vscale x 8 x bfloat> @llvm.aarch64.sve.trn2q.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>) 523declare <vscale x 8 x half> @llvm.aarch64.sve.trn2q.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>) 524declare <vscale x 8 x i16> @llvm.aarch64.sve.trn2q.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>) 525declare <vscale x 16 x i8> @llvm.aarch64.sve.trn2q.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>) 526 527declare <vscale x 2 x double> @llvm.aarch64.sve.uzp1q.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>) 528declare <vscale x 2 x i64> @llvm.aarch64.sve.uzp1q.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>) 529declare <vscale x 4 x float> @llvm.aarch64.sve.uzp1q.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>) 530declare <vscale x 4 x i32> @llvm.aarch64.sve.uzp1q.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>) 531declare <vscale x 8 x bfloat> @llvm.aarch64.sve.uzp1q.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>) 532declare <vscale x 8 x half> @llvm.aarch64.sve.uzp1q.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>) 533declare <vscale x 8 x i16> @llvm.aarch64.sve.uzp1q.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>) 534declare <vscale x 16 x i8> @llvm.aarch64.sve.uzp1q.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>) 535 536declare <vscale x 2 x double> @llvm.aarch64.sve.uzp2q.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>) 537declare <vscale x 2 x i64> @llvm.aarch64.sve.uzp2q.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>) 538declare <vscale x 4 x float> @llvm.aarch64.sve.uzp2q.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>) 539declare <vscale x 4 x i32> @llvm.aarch64.sve.uzp2q.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>) 540declare <vscale x 8 x bfloat> @llvm.aarch64.sve.uzp2q.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>) 541declare <vscale x 8 x half> @llvm.aarch64.sve.uzp2q.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>) 542declare <vscale x 8 x i16> @llvm.aarch64.sve.uzp2q.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>) 543declare <vscale x 16 x i8> @llvm.aarch64.sve.uzp2q.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>) 544 545declare <vscale x 2 x double> @llvm.aarch64.sve.zip1q.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>) 546declare <vscale x 2 x i64> @llvm.aarch64.sve.zip1q.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>) 547declare <vscale x 4 x float> @llvm.aarch64.sve.zip1q.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>) 548declare <vscale x 4 x i32> @llvm.aarch64.sve.zip1q.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>) 549declare <vscale x 8 x bfloat> @llvm.aarch64.sve.zip1q.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>) 550declare <vscale x 8 x half> @llvm.aarch64.sve.zip1q.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>) 551declare <vscale x 8 x i16> @llvm.aarch64.sve.zip1q.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>) 552declare <vscale x 16 x i8> @llvm.aarch64.sve.zip1q.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>) 553 554declare <vscale x 2 x double> @llvm.aarch64.sve.zip2q.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>) 555declare <vscale x 2 x i64> @llvm.aarch64.sve.zip2q.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>) 556declare <vscale x 4 x float> @llvm.aarch64.sve.zip2q.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>) 557declare <vscale x 4 x i32> @llvm.aarch64.sve.zip2q.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>) 558declare <vscale x 8 x bfloat> @llvm.aarch64.sve.zip2q.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>) 559declare <vscale x 8 x half> @llvm.aarch64.sve.zip2q.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>) 560declare <vscale x 8 x i16> @llvm.aarch64.sve.zip2q.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>) 561declare <vscale x 16 x i8> @llvm.aarch64.sve.zip2q.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>) 562 563; +bf16 is required for the bfloat version. 564attributes #0 = { "target-features"="+sve,+f64mm,+bf16" } 565