1; RUN: opt < %s -mtriple=aarch64--linux-gnu -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s --check-prefix=COST 2; RUN: llc < %s -mtriple=aarch64--linux-gnu | FileCheck %s --check-prefix=CODE 3 4target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" 5 6; COST-LABEL: trn1.v8i8 7; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 8; CODE-LABEL: trn1.v8i8 9; CODE: trn1 v0.8b, v0.8b, v1.8b 10define <8 x i8> @trn1.v8i8(<8 x i8> %v0, <8 x i8> %v1) { 11 %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 12 ret <8 x i8> %tmp0 13} 14 15; COST-LABEL: trn2.v8i8 16; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 17; CODE-LABEL: trn2.v8i8 18; CODE: trn2 v0.8b, v0.8b, v1.8b 19define <8 x i8> @trn2.v8i8(<8 x i8> %v0, <8 x i8> %v1) { 20 %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 21 ret <8 x i8> %tmp0 22} 23 24; COST-LABEL: trn1.v16i8 25; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30> 26; CODE-LABEL: trn1.v16i8 27; CODE: trn1 v0.16b, v0.16b, v1.16b 28define <16 x i8> @trn1.v16i8(<16 x i8> %v0, <16 x i8> %v1) { 29 %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30> 30 ret <16 x i8> %tmp0 31} 32 33; COST-LABEL: trn2.v16i8 34; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31> 35; CODE-LABEL: trn2.v16i8 36; CODE: trn2 v0.16b, v0.16b, v1.16b 37define <16 x i8> @trn2.v16i8(<16 x i8> %v0, <16 x i8> %v1) { 38 %tmp0 = shufflevector <16 x i8> %v0, <16 x i8> %v1, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31> 39 ret <16 x i8> %tmp0 40} 41 42; COST-LABEL: trn1.v4i16 43; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 44; CODE-LABEL: trn1.v4i16 45; CODE: trn1 v0.4h, v0.4h, v1.4h 46define <4 x i16> @trn1.v4i16(<4 x i16> %v0, <4 x i16> %v1) { 47 %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 48 ret <4 x i16> %tmp0 49} 50 51; COST-LABEL: trn2.v4i16 52; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 53; CODE-LABEL: trn2.v4i16 54; CODE: trn2 v0.4h, v0.4h, v1.4h 55define <4 x i16> @trn2.v4i16(<4 x i16> %v0, <4 x i16> %v1) { 56 %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 57 ret <4 x i16> %tmp0 58} 59 60; COST-LABEL: trn1.v8i16 61; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 62; CODE-LABEL: trn1.v8i16 63; CODE: trn1 v0.8h, v0.8h, v1.8h 64define <8 x i16> @trn1.v8i16(<8 x i16> %v0, <8 x i16> %v1) { 65 %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 66 ret <8 x i16> %tmp0 67} 68 69; COST-LABEL: trn2.v8i16 70; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 71; CODE-LABEL: trn2.v8i16 72; CODE: trn2 v0.8h, v0.8h, v1.8h 73define <8 x i16> @trn2.v8i16(<8 x i16> %v0, <8 x i16> %v1) { 74 %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 75 ret <8 x i16> %tmp0 76} 77 78; COST-LABEL: trn1.v2i32 79; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <2 x i32> %v0, <2 x i32> %v1, <2 x i32> <i32 0, i32 2> 80; CODE-LABEL: trn1.v2i32 81; CODE: zip1 v0.2s, v0.2s, v1.2s 82define <2 x i32> @trn1.v2i32(<2 x i32> %v0, <2 x i32> %v1) { 83 %tmp0 = shufflevector <2 x i32> %v0, <2 x i32> %v1, <2 x i32> <i32 0, i32 2> 84 ret <2 x i32> %tmp0 85} 86 87; COST-LABEL: trn2.v2i32 88; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <2 x i32> %v0, <2 x i32> %v1, <2 x i32> <i32 1, i32 3> 89; CODE-LABEL: trn2.v2i32 90; CODE: zip2 v0.2s, v0.2s, v1.2s 91define <2 x i32> @trn2.v2i32(<2 x i32> %v0, <2 x i32> %v1) { 92 %tmp0 = shufflevector <2 x i32> %v0, <2 x i32> %v1, <2 x i32> <i32 1, i32 3> 93 ret <2 x i32> %tmp0 94} 95 96; COST-LABEL: trn1.v4i32 97; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 98; CODE-LABEL: trn1.v4i32 99; CODE: trn1 v0.4s, v0.4s, v1.4s 100define <4 x i32> @trn1.v4i32(<4 x i32> %v0, <4 x i32> %v1) { 101 %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 102 ret <4 x i32> %tmp0 103} 104 105; COST-LABEL: trn2.v4i32 106; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 107; CODE-LABEL: trn2.v4i32 108; CODE: trn2 v0.4s, v0.4s, v1.4s 109define <4 x i32> @trn2.v4i32(<4 x i32> %v0, <4 x i32> %v1) { 110 %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 111 ret <4 x i32> %tmp0 112} 113 114; COST-LABEL: trn1.v2i64 115; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <2 x i64> %v0, <2 x i64> %v1, <2 x i32> <i32 0, i32 2> 116; CODE-LABEL: trn1.v2i64 117; CODE: zip1 v0.2d, v0.2d, v1.2d 118define <2 x i64> @trn1.v2i64(<2 x i64> %v0, <2 x i64> %v1) { 119 %tmp0 = shufflevector <2 x i64> %v0, <2 x i64> %v1, <2 x i32> <i32 0, i32 2> 120 ret <2 x i64> %tmp0 121} 122 123; COST-LABEL: trn2.v2i64 124; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <2 x i64> %v0, <2 x i64> %v1, <2 x i32> <i32 1, i32 3> 125; CODE-LABEL: trn2.v2i64 126; CODE: zip2 v0.2d, v0.2d, v1.2d 127define <2 x i64> @trn2.v2i64(<2 x i64> %v0, <2 x i64> %v1) { 128 %tmp0 = shufflevector <2 x i64> %v0, <2 x i64> %v1, <2 x i32> <i32 1, i32 3> 129 ret <2 x i64> %tmp0 130} 131 132; COST-LABEL: trn1.v2f32 133; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <2 x float> %v0, <2 x float> %v1, <2 x i32> <i32 0, i32 2> 134; CODE-LABEL: trn1.v2f32 135; CODE: zip1 v0.2s, v0.2s, v1.2s 136define <2 x float> @trn1.v2f32(<2 x float> %v0, <2 x float> %v1) { 137 %tmp0 = shufflevector <2 x float> %v0, <2 x float> %v1, <2 x i32> <i32 0, i32 2> 138 ret <2 x float> %tmp0 139} 140 141; COST-LABEL: trn2.v2f32 142; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <2 x float> %v0, <2 x float> %v1, <2 x i32> <i32 1, i32 3> 143; CODE-LABEL: trn2.v2f32 144; CODE: zip2 v0.2s, v0.2s, v1.2s 145define <2 x float> @trn2.v2f32(<2 x float> %v0, <2 x float> %v1) { 146 %tmp0 = shufflevector <2 x float> %v0, <2 x float> %v1, <2 x i32> <i32 1, i32 3> 147 ret <2 x float> %tmp0 148} 149 150; COST-LABEL: trn1.v4f32 151; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 152; CODE-LABEL: trn1.v4f32 153; CODE: trn1 v0.4s, v0.4s, v1.4s 154define <4 x float> @trn1.v4f32(<4 x float> %v0, <4 x float> %v1) { 155 %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 156 ret <4 x float> %tmp0 157} 158 159; COST-LABEL: trn2.v4f32 160; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 161; CODE-LABEL: trn2.v4f32 162; CODE: trn2 v0.4s, v0.4s, v1.4s 163define <4 x float> @trn2.v4f32(<4 x float> %v0, <4 x float> %v1) { 164 %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 165 ret <4 x float> %tmp0 166} 167 168; COST-LABEL: trn1.v2f64 169; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <2 x double> %v0, <2 x double> %v1, <2 x i32> <i32 0, i32 2> 170; CODE-LABEL: trn1.v2f64 171; CODE: zip1 v0.2d, v0.2d, v1.2d 172define <2 x double> @trn1.v2f64(<2 x double> %v0, <2 x double> %v1) { 173 %tmp0 = shufflevector <2 x double> %v0, <2 x double> %v1, <2 x i32> <i32 0, i32 2> 174 ret <2 x double> %tmp0 175} 176 177; COST-LABEL: trn2.v2f64 178; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <2 x double> %v0, <2 x double> %v1, <2 x i32> <i32 1, i32 3> 179; CODE-LABEL: trn2.v2f64 180; CODE: zip2 v0.2d, v0.2d, v1.2d 181define <2 x double> @trn2.v2f64(<2 x double> %v0, <2 x double> %v1) { 182 %tmp0 = shufflevector <2 x double> %v0, <2 x double> %v1, <2 x i32> <i32 1, i32 3> 183 ret <2 x double> %tmp0 184} 185 186; COST-LABEL: trn1.v4f16 187; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 188; CODE-LABEL: trn1.v4f16 189; CODE: trn1 v0.4h, v0.4h, v1.4h 190define <4 x half> @trn1.v4f16(<4 x half> %v0, <4 x half> %v1) { 191 %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 192 ret <4 x half> %tmp0 193} 194 195; COST-LABEL: trn2.v4f16 196; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 197; CODE-LABEL: trn2.v4f16 198; CODE: trn2 v0.4h, v0.4h, v1.4h 199define <4 x half> @trn2.v4f16(<4 x half> %v0, <4 x half> %v1) { 200 %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 201 ret <4 x half> %tmp0 202} 203 204; COST-LABEL: trn1.v8f16 205; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 206; CODE-LABEL: trn1.v8f16 207; CODE: trn1 v0.8h, v0.8h, v1.8h 208define <8 x half> @trn1.v8f16(<8 x half> %v0, <8 x half> %v1) { 209 %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 210 ret <8 x half> %tmp0 211} 212 213; COST-LABEL: trn2.v8f16 214; COST: Found an estimated cost of 1 for instruction: %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 215; CODE-LABEL: trn2.v8f16 216; CODE: trn2 v0.8h, v0.8h, v1.8h 217define <8 x half> @trn2.v8f16(<8 x half> %v0, <8 x half> %v1) { 218 %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 219 ret <8 x half> %tmp0 220} 221