1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=aarch64-linux-gnu -mattr=+sve | FileCheck %s 3 4define <vscale x 4 x half> @interleave2_nxv4f16(<vscale x 2 x half> %vec0, <vscale x 2 x half> %vec1) { 5; CHECK-LABEL: interleave2_nxv4f16: 6; CHECK: // %bb.0: 7; CHECK-NEXT: zip2 z2.d, z0.d, z1.d 8; CHECK-NEXT: zip1 z0.d, z0.d, z1.d 9; CHECK-NEXT: uzp1 z0.s, z0.s, z2.s 10; CHECK-NEXT: ret 11 %retval = call <vscale x 4 x half> @llvm.vector.interleave2.nxv4f16(<vscale x 2 x half> %vec0, <vscale x 2 x half> %vec1) 12 ret <vscale x 4 x half> %retval 13} 14 15define <vscale x 8 x half> @interleave2_nxv8f16(<vscale x 4 x half> %vec0, <vscale x 4 x half> %vec1) { 16; CHECK-LABEL: interleave2_nxv8f16: 17; CHECK: // %bb.0: 18; CHECK-NEXT: zip2 z2.s, z0.s, z1.s 19; CHECK-NEXT: zip1 z0.s, z0.s, z1.s 20; CHECK-NEXT: uzp1 z0.h, z0.h, z2.h 21; CHECK-NEXT: ret 22 %retval = call <vscale x 8 x half> @llvm.vector.interleave2.nxv8f16(<vscale x 4 x half> %vec0, <vscale x 4 x half> %vec1) 23 ret <vscale x 8 x half> %retval 24} 25 26define <vscale x 16 x half> @interleave2_nxv16f16(<vscale x 8 x half> %vec0, <vscale x 8 x half> %vec1) { 27; CHECK-LABEL: interleave2_nxv16f16: 28; CHECK: // %bb.0: 29; CHECK-NEXT: zip1 z2.h, z0.h, z1.h 30; CHECK-NEXT: zip2 z1.h, z0.h, z1.h 31; CHECK-NEXT: mov z0.d, z2.d 32; CHECK-NEXT: ret 33 %retval = call <vscale x 16 x half> @llvm.vector.interleave2.nxv16f16(<vscale x 8 x half> %vec0, <vscale x 8 x half> %vec1) 34 ret <vscale x 16 x half> %retval 35} 36 37define <vscale x 4 x float> @interleave2_nxv4f32(<vscale x 2 x float> %vec0, <vscale x 2 x float> %vec1) { 38; CHECK-LABEL: interleave2_nxv4f32: 39; CHECK: // %bb.0: 40; CHECK-NEXT: zip2 z2.d, z0.d, z1.d 41; CHECK-NEXT: zip1 z0.d, z0.d, z1.d 42; CHECK-NEXT: uzp1 z0.s, z0.s, z2.s 43; CHECK-NEXT: ret 44 %retval = call <vscale x 4 x float> @llvm.vector.interleave2.nxv4f32(<vscale x 2 x float> %vec0, <vscale x 2 x float> %vec1) 45 ret <vscale x 4 x float> %retval 46} 47 48define <vscale x 8 x float> @interleave2_nxv8f32(<vscale x 4 x float> %vec0, <vscale x 4 x float> %vec1) { 49; CHECK-LABEL: interleave2_nxv8f32: 50; CHECK: // %bb.0: 51; CHECK-NEXT: zip1 z2.s, z0.s, z1.s 52; CHECK-NEXT: zip2 z1.s, z0.s, z1.s 53; CHECK-NEXT: mov z0.d, z2.d 54; CHECK-NEXT: ret 55 %retval = call <vscale x 8 x float> @llvm.vector.interleave2.nxv8f32(<vscale x 4 x float> %vec0, <vscale x 4 x float> %vec1) 56 ret <vscale x 8 x float> %retval 57} 58 59define <vscale x 4 x double> @interleave2_nxv4f64(<vscale x 2 x double> %vec0, <vscale x 2 x double> %vec1) { 60; CHECK-LABEL: interleave2_nxv4f64: 61; CHECK: // %bb.0: 62; CHECK-NEXT: zip1 z2.d, z0.d, z1.d 63; CHECK-NEXT: zip2 z1.d, z0.d, z1.d 64; CHECK-NEXT: mov z0.d, z2.d 65; CHECK-NEXT: ret 66 %retval = call <vscale x 4 x double>@llvm.vector.interleave2.nxv4f64(<vscale x 2 x double> %vec0, <vscale x 2 x double> %vec1) 67 ret <vscale x 4 x double> %retval 68} 69 70; Integers 71 72define <vscale x 32 x i8> @interleave2_nxv32i8(<vscale x 16 x i8> %vec0, <vscale x 16 x i8> %vec1) { 73; CHECK-LABEL: interleave2_nxv32i8: 74; CHECK: // %bb.0: 75; CHECK-NEXT: zip1 z2.b, z0.b, z1.b 76; CHECK-NEXT: zip2 z1.b, z0.b, z1.b 77; CHECK-NEXT: mov z0.d, z2.d 78; CHECK-NEXT: ret 79 %retval = call <vscale x 32 x i8> @llvm.vector.interleave2.nxv32i8(<vscale x 16 x i8> %vec0, <vscale x 16 x i8> %vec1) 80 ret <vscale x 32 x i8> %retval 81} 82 83define <vscale x 16 x i16> @interleave2_nxv16i16(<vscale x 8 x i16> %vec0, <vscale x 8 x i16> %vec1) { 84; CHECK-LABEL: interleave2_nxv16i16: 85; CHECK: // %bb.0: 86; CHECK-NEXT: zip1 z2.h, z0.h, z1.h 87; CHECK-NEXT: zip2 z1.h, z0.h, z1.h 88; CHECK-NEXT: mov z0.d, z2.d 89; CHECK-NEXT: ret 90 %retval = call <vscale x 16 x i16> @llvm.vector.interleave2.nxv16i16(<vscale x 8 x i16> %vec0, <vscale x 8 x i16> %vec1) 91 ret <vscale x 16 x i16> %retval 92} 93 94define <vscale x 8 x i32> @interleave2_nxv8i32(<vscale x 4 x i32> %vec0, <vscale x 4 x i32> %vec1) { 95; CHECK-LABEL: interleave2_nxv8i32: 96; CHECK: // %bb.0: 97; CHECK-NEXT: zip1 z2.s, z0.s, z1.s 98; CHECK-NEXT: zip2 z1.s, z0.s, z1.s 99; CHECK-NEXT: mov z0.d, z2.d 100; CHECK-NEXT: ret 101 %retval = call <vscale x 8 x i32> @llvm.vector.interleave2.nxv8i32(<vscale x 4 x i32> %vec0, <vscale x 4 x i32> %vec1) 102 ret <vscale x 8 x i32> %retval 103} 104 105define <vscale x 4 x i64> @interleave2_nxv4i64(<vscale x 2 x i64> %vec0, <vscale x 2 x i64> %vec1) { 106; CHECK-LABEL: interleave2_nxv4i64: 107; CHECK: // %bb.0: 108; CHECK-NEXT: zip1 z2.d, z0.d, z1.d 109; CHECK-NEXT: zip2 z1.d, z0.d, z1.d 110; CHECK-NEXT: mov z0.d, z2.d 111; CHECK-NEXT: ret 112 %retval = call <vscale x 4 x i64> @llvm.vector.interleave2.nxv4i64(<vscale x 2 x i64> %vec0, <vscale x 2 x i64> %vec1) 113 ret <vscale x 4 x i64> %retval 114} 115 116; Predicated 117 118define <vscale x 32 x i1> @interleave2_nxv32i1(<vscale x 16 x i1> %vec0, <vscale x 16 x i1> %vec1) { 119; CHECK-LABEL: interleave2_nxv32i1: 120; CHECK: // %bb.0: 121; CHECK-NEXT: zip1 p2.b, p0.b, p1.b 122; CHECK-NEXT: zip2 p1.b, p0.b, p1.b 123; CHECK-NEXT: mov p0.b, p2.b 124; CHECK-NEXT: ret 125 %retval = call <vscale x 32 x i1> @llvm.vector.interleave2.nxv32i1(<vscale x 16 x i1> %vec0, <vscale x 16 x i1> %vec1) 126 ret <vscale x 32 x i1> %retval 127} 128 129define <vscale x 16 x i1> @interleave2_nxv16i1(<vscale x 8 x i1> %vec0, <vscale x 8 x i1> %vec1) { 130; CHECK-LABEL: interleave2_nxv16i1: 131; CHECK: // %bb.0: 132; CHECK-NEXT: zip2 p2.h, p0.h, p1.h 133; CHECK-NEXT: zip1 p0.h, p0.h, p1.h 134; CHECK-NEXT: uzp1 p0.b, p0.b, p2.b 135; CHECK-NEXT: ret 136 %retval = call <vscale x 16 x i1> @llvm.vector.interleave2.nxv16i1(<vscale x 8 x i1> %vec0, <vscale x 8 x i1> %vec1) 137 ret <vscale x 16 x i1> %retval 138} 139 140define <vscale x 8 x i1> @interleave2_nxv8i1(<vscale x 4 x i1> %vec0, <vscale x 4 x i1> %vec1) { 141; CHECK-LABEL: interleave2_nxv8i1: 142; CHECK: // %bb.0: 143; CHECK-NEXT: zip2 p2.s, p0.s, p1.s 144; CHECK-NEXT: zip1 p0.s, p0.s, p1.s 145; CHECK-NEXT: uzp1 p0.h, p0.h, p2.h 146; CHECK-NEXT: ret 147 %retval = call <vscale x 8 x i1> @llvm.vector.interleave2.nxv8i1(<vscale x 4 x i1> %vec0, <vscale x 4 x i1> %vec1) 148 ret <vscale x 8 x i1> %retval 149} 150 151define <vscale x 4 x i1> @interleave2_nxv4i1(<vscale x 2 x i1> %vec0, <vscale x 2 x i1> %vec1) { 152; CHECK-LABEL: interleave2_nxv4i1: 153; CHECK: // %bb.0: 154; CHECK-NEXT: zip2 p2.d, p0.d, p1.d 155; CHECK-NEXT: zip1 p0.d, p0.d, p1.d 156; CHECK-NEXT: uzp1 p0.s, p0.s, p2.s 157; CHECK-NEXT: ret 158 %retval = call <vscale x 4 x i1> @llvm.vector.interleave2.nxv4i1(<vscale x 2 x i1> %vec0, <vscale x 2 x i1> %vec1) 159 ret <vscale x 4 x i1> %retval 160} 161 162; Split illegal type size 163 164define <vscale x 16 x i32> @interleave2_nxv16i32(<vscale x 8 x i32> %vec0, <vscale x 8 x i32> %vec1) { 165; CHECK-LABEL: interleave2_nxv16i32: 166; CHECK: // %bb.0: 167; CHECK-NEXT: zip1 z4.s, z1.s, z3.s 168; CHECK-NEXT: zip1 z5.s, z0.s, z2.s 169; CHECK-NEXT: zip2 z2.s, z0.s, z2.s 170; CHECK-NEXT: zip2 z3.s, z1.s, z3.s 171; CHECK-NEXT: mov z0.d, z5.d 172; CHECK-NEXT: mov z1.d, z2.d 173; CHECK-NEXT: mov z2.d, z4.d 174; CHECK-NEXT: ret 175 %retval = call <vscale x 16 x i32>@llvm.vector.interleave2.nxv16i32(<vscale x 8 x i32> %vec0, <vscale x 8 x i32> %vec1) 176 ret <vscale x 16 x i32> %retval 177} 178 179define <vscale x 8 x i64> @interleave2_nxv8i64(<vscale x 4 x i64> %vec0, <vscale x 4 x i64> %vec1) { 180; CHECK-LABEL: interleave2_nxv8i64: 181; CHECK: // %bb.0: 182; CHECK-NEXT: zip1 z4.d, z1.d, z3.d 183; CHECK-NEXT: zip1 z5.d, z0.d, z2.d 184; CHECK-NEXT: zip2 z2.d, z0.d, z2.d 185; CHECK-NEXT: zip2 z3.d, z1.d, z3.d 186; CHECK-NEXT: mov z0.d, z5.d 187; CHECK-NEXT: mov z1.d, z2.d 188; CHECK-NEXT: mov z2.d, z4.d 189; CHECK-NEXT: ret 190 %retval = call <vscale x 8 x i64> @llvm.vector.interleave2.nxv8i64(<vscale x 4 x i64> %vec0, <vscale x 4 x i64> %vec1) 191 ret <vscale x 8 x i64> %retval 192} 193 194; Promote illegal type size 195 196define <vscale x 16 x i8> @interleave2_nxv8i8(<vscale x 8 x i8> %vec0, <vscale x 8 x i8> %vec1) { 197; CHECK-LABEL: interleave2_nxv8i8: 198; CHECK: // %bb.0: 199; CHECK-NEXT: zip2 z2.h, z0.h, z1.h 200; CHECK-NEXT: zip1 z0.h, z0.h, z1.h 201; CHECK-NEXT: uzp1 z0.b, z0.b, z2.b 202; CHECK-NEXT: ret 203 %retval = call <vscale x 16 x i8> @llvm.vector.interleave2.nxv16i8(<vscale x 8 x i8> %vec0, <vscale x 8 x i8> %vec1) 204 ret <vscale x 16 x i8> %retval 205} 206 207define <vscale x 8 x i16> @interleave2_nxv4i16(<vscale x 4 x i16> %vec0, <vscale x 4 x i16> %vec1) { 208; CHECK-LABEL: interleave2_nxv4i16: 209; CHECK: // %bb.0: 210; CHECK-NEXT: zip2 z2.s, z0.s, z1.s 211; CHECK-NEXT: zip1 z0.s, z0.s, z1.s 212; CHECK-NEXT: uzp1 z0.h, z0.h, z2.h 213; CHECK-NEXT: ret 214 %retval = call <vscale x 8 x i16> @llvm.vector.interleave2.nxv8i16(<vscale x 4 x i16> %vec0, <vscale x 4 x i16> %vec1) 215 ret <vscale x 8 x i16> %retval 216} 217 218define <vscale x 4 x i32> @interleave2_nxv2i32(<vscale x 2 x i32> %vec0, <vscale x 2 x i32> %vec1) { 219; CHECK-LABEL: interleave2_nxv2i32: 220; CHECK: // %bb.0: 221; CHECK-NEXT: zip2 z2.d, z0.d, z1.d 222; CHECK-NEXT: zip1 z0.d, z0.d, z1.d 223; CHECK-NEXT: uzp1 z0.s, z0.s, z2.s 224; CHECK-NEXT: ret 225 %retval = call <vscale x 4 x i32> @llvm.vector.interleave2.nxv4i32(<vscale x 2 x i32> %vec0, <vscale x 2 x i32> %vec1) 226 ret <vscale x 4 x i32> %retval 227} 228 229; Float declarations 230declare <vscale x 4 x half> @llvm.vector.interleave2.nxv4f16(<vscale x 2 x half>, <vscale x 2 x half>) 231declare <vscale x 8 x half> @llvm.vector.interleave2.nxv8f16(<vscale x 4 x half>, <vscale x 4 x half>) 232declare <vscale x 16 x half> @llvm.vector.interleave2.nxv16f16(<vscale x 8 x half>, <vscale x 8 x half>) 233declare <vscale x 4 x float> @llvm.vector.interleave2.nxv4f32(<vscale x 2 x float>, <vscale x 2 x float>) 234declare <vscale x 8 x float> @llvm.vector.interleave2.nxv8f32(<vscale x 4 x float>, <vscale x 4 x float>) 235declare <vscale x 4 x double> @llvm.vector.interleave2.nxv4f64(<vscale x 2 x double>, <vscale x 2 x double>) 236 237; Integer declarations 238declare <vscale x 32 x i8> @llvm.vector.interleave2.nxv32i8(<vscale x 16 x i8>, <vscale x 16 x i8>) 239declare <vscale x 16 x i16> @llvm.vector.interleave2.nxv16i16(<vscale x 8 x i16>, <vscale x 8 x i16>) 240declare <vscale x 8 x i32> @llvm.vector.interleave2.nxv8i32(<vscale x 4 x i32>, <vscale x 4 x i32>) 241declare <vscale x 4 x i64> @llvm.vector.interleave2.nxv4i64(<vscale x 2 x i64>, <vscale x 2 x i64>) 242 243; Predicated 244declare <vscale x 32 x i1> @llvm.vector.interleave2.nxv32i1(<vscale x 16 x i1>, <vscale x 16 x i1>) 245declare <vscale x 16 x i1> @llvm.vector.interleave2.nxv16i1(<vscale x 8 x i1>, <vscale x 8 x i1>) 246declare <vscale x 8 x i1> @llvm.vector.interleave2.nxv8i1(<vscale x 4 x i1>, <vscale x 4 x i1>) 247declare <vscale x 4 x i1> @llvm.vector.interleave2.nxv4i1(<vscale x 2 x i1>, <vscale x 2 x i1>) 248 249; Illegal type size 250declare <vscale x 16 x i32> @llvm.vector.interleave2.nxv16i32(<vscale x 8 x i32>, <vscale x 8 x i32>) 251declare <vscale x 8 x i64> @llvm.vector.interleave2.nxv8i64(<vscale x 4 x i64>, <vscale x 4 x i64>) 252 253declare <vscale x 16 x i8> @llvm.vector.interleave2.nxv16i8(<vscale x 8 x i8>, <vscale x 8 x i8>) 254declare <vscale x 8 x i16> @llvm.vector.interleave2.nxv8i16(<vscale x 4 x i16>, <vscale x 4 x i16>) 255declare <vscale x 4 x i32> @llvm.vector.interleave2.nxv4i32(<vscale x 2 x i32>, <vscale x 2 x i32>) 256