1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s 3 4define <8 x i8> @extract_2_v4i16(<4 x i16> %a, <4 x i16> %b) { 5; CHECK-LABEL: extract_2_v4i16: 6; CHECK: // %bb.0: // %entry 7; CHECK-NEXT: uzp1 v0.8b, v0.8b, v1.8b 8; CHECK-NEXT: ret 9entry: 10 %a0 = extractelement <4 x i16> %a, i32 0 11 %a1 = extractelement <4 x i16> %a, i32 1 12 %a2 = extractelement <4 x i16> %a, i32 2 13 %a3 = extractelement <4 x i16> %a, i32 3 14 %b0 = extractelement <4 x i16> %b, i32 0 15 %b1 = extractelement <4 x i16> %b, i32 1 16 %b2 = extractelement <4 x i16> %b, i32 2 17 %b3 = extractelement <4 x i16> %b, i32 3 18 %t0 = trunc i16 %a0 to i8 19 %t1 = trunc i16 %a1 to i8 20 %t2 = trunc i16 %a2 to i8 21 %t3 = trunc i16 %a3 to i8 22 %t4 = trunc i16 %b0 to i8 23 %t5 = trunc i16 %b1 to i8 24 %t6 = trunc i16 %b2 to i8 25 %t7 = trunc i16 %b3 to i8 26 %i0 = insertelement <8 x i8> undef, i8 %t0, i32 0 27 %i1 = insertelement <8 x i8> %i0, i8 %t1, i32 1 28 %i2 = insertelement <8 x i8> %i1, i8 %t2, i32 2 29 %i3 = insertelement <8 x i8> %i2, i8 %t3, i32 3 30 %i4 = insertelement <8 x i8> %i3, i8 %t4, i32 4 31 %i5 = insertelement <8 x i8> %i4, i8 %t5, i32 5 32 %i6 = insertelement <8 x i8> %i5, i8 %t6, i32 6 33 %i7 = insertelement <8 x i8> %i6, i8 %t7, i32 7 34 ret <8 x i8> %i7 35} 36 37define <8 x i8> @extract_2_v4i32(<4 x i32> %a, <4 x i32> %b) { 38; CHECK-LABEL: extract_2_v4i32: 39; CHECK: // %bb.0: // %entry 40; CHECK-NEXT: mov w8, v0.s[1] 41; CHECK-NEXT: mov w9, v0.s[2] 42; CHECK-NEXT: mov w10, v0.s[3] 43; CHECK-NEXT: mov v0.b[1], w8 44; CHECK-NEXT: mov w8, v1.s[1] 45; CHECK-NEXT: mov v0.b[2], w9 46; CHECK-NEXT: mov w9, v1.s[2] 47; CHECK-NEXT: mov v0.b[3], w10 48; CHECK-NEXT: mov v0.b[4], v1.b[0] 49; CHECK-NEXT: mov v0.b[5], w8 50; CHECK-NEXT: mov w8, v1.s[3] 51; CHECK-NEXT: mov v0.b[6], w9 52; CHECK-NEXT: mov v0.b[7], w8 53; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 54; CHECK-NEXT: ret 55entry: 56 %a0 = extractelement <4 x i32> %a, i32 0 57 %a1 = extractelement <4 x i32> %a, i32 1 58 %a2 = extractelement <4 x i32> %a, i32 2 59 %a3 = extractelement <4 x i32> %a, i32 3 60 %b0 = extractelement <4 x i32> %b, i32 0 61 %b1 = extractelement <4 x i32> %b, i32 1 62 %b2 = extractelement <4 x i32> %b, i32 2 63 %b3 = extractelement <4 x i32> %b, i32 3 64 %t0 = trunc i32 %a0 to i8 65 %t1 = trunc i32 %a1 to i8 66 %t2 = trunc i32 %a2 to i8 67 %t3 = trunc i32 %a3 to i8 68 %t4 = trunc i32 %b0 to i8 69 %t5 = trunc i32 %b1 to i8 70 %t6 = trunc i32 %b2 to i8 71 %t7 = trunc i32 %b3 to i8 72 %i0 = insertelement <8 x i8> undef, i8 %t0, i32 0 73 %i1 = insertelement <8 x i8> %i0, i8 %t1, i32 1 74 %i2 = insertelement <8 x i8> %i1, i8 %t2, i32 2 75 %i3 = insertelement <8 x i8> %i2, i8 %t3, i32 3 76 %i4 = insertelement <8 x i8> %i3, i8 %t4, i32 4 77 %i5 = insertelement <8 x i8> %i4, i8 %t5, i32 5 78 %i6 = insertelement <8 x i8> %i5, i8 %t6, i32 6 79 %i7 = insertelement <8 x i8> %i6, i8 %t7, i32 7 80 ret <8 x i8> %i7 81} 82 83define <16 x i8> @extract_4_v4i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16> %d) { 84; CHECK-LABEL: extract_4_v4i16: 85; CHECK: // %bb.0: // %entry 86; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 87; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 88; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3 89; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 90; CHECK-NEXT: mov v2.d[1], v3.d[0] 91; CHECK-NEXT: mov v0.d[1], v1.d[0] 92; CHECK-NEXT: uzp1 v0.16b, v0.16b, v2.16b 93; CHECK-NEXT: ret 94entry: 95 %a0 = extractelement <4 x i16> %a, i32 0 96 %a1 = extractelement <4 x i16> %a, i32 1 97 %a2 = extractelement <4 x i16> %a, i32 2 98 %a3 = extractelement <4 x i16> %a, i32 3 99 %b0 = extractelement <4 x i16> %b, i32 0 100 %b1 = extractelement <4 x i16> %b, i32 1 101 %b2 = extractelement <4 x i16> %b, i32 2 102 %b3 = extractelement <4 x i16> %b, i32 3 103 %c0 = extractelement <4 x i16> %c, i32 0 104 %c1 = extractelement <4 x i16> %c, i32 1 105 %c2 = extractelement <4 x i16> %c, i32 2 106 %c3 = extractelement <4 x i16> %c, i32 3 107 %d0 = extractelement <4 x i16> %d, i32 0 108 %d1 = extractelement <4 x i16> %d, i32 1 109 %d2 = extractelement <4 x i16> %d, i32 2 110 %d3 = extractelement <4 x i16> %d, i32 3 111 %t0 = trunc i16 %a0 to i8 112 %t1 = trunc i16 %a1 to i8 113 %t2 = trunc i16 %a2 to i8 114 %t3 = trunc i16 %a3 to i8 115 %t4 = trunc i16 %b0 to i8 116 %t5 = trunc i16 %b1 to i8 117 %t6 = trunc i16 %b2 to i8 118 %t7 = trunc i16 %b3 to i8 119 %t8 = trunc i16 %c0 to i8 120 %t9 = trunc i16 %c1 to i8 121 %t10 = trunc i16 %c2 to i8 122 %t11 = trunc i16 %c3 to i8 123 %t12 = trunc i16 %d0 to i8 124 %t13 = trunc i16 %d1 to i8 125 %t14 = trunc i16 %d2 to i8 126 %t15 = trunc i16 %d3 to i8 127 %i0 = insertelement <16 x i8> undef, i8 %t0, i32 0 128 %i1 = insertelement <16 x i8> %i0, i8 %t1, i32 1 129 %i2 = insertelement <16 x i8> %i1, i8 %t2, i32 2 130 %i3 = insertelement <16 x i8> %i2, i8 %t3, i32 3 131 %i4 = insertelement <16 x i8> %i3, i8 %t4, i32 4 132 %i5 = insertelement <16 x i8> %i4, i8 %t5, i32 5 133 %i6 = insertelement <16 x i8> %i5, i8 %t6, i32 6 134 %i7 = insertelement <16 x i8> %i6, i8 %t7, i32 7 135 %i8 = insertelement <16 x i8> %i7, i8 %t8, i32 8 136 %i9 = insertelement <16 x i8> %i8, i8 %t9, i32 9 137 %i10 = insertelement <16 x i8> %i9, i8 %t10, i32 10 138 %i11 = insertelement <16 x i8> %i10, i8 %t11, i32 11 139 %i12 = insertelement <16 x i8> %i11, i8 %t12, i32 12 140 %i13 = insertelement <16 x i8> %i12, i8 %t13, i32 13 141 %i14 = insertelement <16 x i8> %i13, i8 %t14, i32 14 142 %i15 = insertelement <16 x i8> %i14, i8 %t15, i32 15 143 ret <16 x i8> %i15 144} 145 146define <16 x i8> @extract_4_v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) { 147; CHECK-LABEL: extract_4_v4i32: 148; CHECK: // %bb.0: // %entry 149; CHECK-NEXT: uzp1 v2.8h, v2.8h, v3.8h 150; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h 151; CHECK-NEXT: uzp1 v0.16b, v0.16b, v2.16b 152; CHECK-NEXT: ret 153entry: 154 %a0 = extractelement <4 x i32> %a, i32 0 155 %a1 = extractelement <4 x i32> %a, i32 1 156 %a2 = extractelement <4 x i32> %a, i32 2 157 %a3 = extractelement <4 x i32> %a, i32 3 158 %b0 = extractelement <4 x i32> %b, i32 0 159 %b1 = extractelement <4 x i32> %b, i32 1 160 %b2 = extractelement <4 x i32> %b, i32 2 161 %b3 = extractelement <4 x i32> %b, i32 3 162 %c0 = extractelement <4 x i32> %c, i32 0 163 %c1 = extractelement <4 x i32> %c, i32 1 164 %c2 = extractelement <4 x i32> %c, i32 2 165 %c3 = extractelement <4 x i32> %c, i32 3 166 %d0 = extractelement <4 x i32> %d, i32 0 167 %d1 = extractelement <4 x i32> %d, i32 1 168 %d2 = extractelement <4 x i32> %d, i32 2 169 %d3 = extractelement <4 x i32> %d, i32 3 170 %t0 = trunc i32 %a0 to i8 171 %t1 = trunc i32 %a1 to i8 172 %t2 = trunc i32 %a2 to i8 173 %t3 = trunc i32 %a3 to i8 174 %t4 = trunc i32 %b0 to i8 175 %t5 = trunc i32 %b1 to i8 176 %t6 = trunc i32 %b2 to i8 177 %t7 = trunc i32 %b3 to i8 178 %t8 = trunc i32 %c0 to i8 179 %t9 = trunc i32 %c1 to i8 180 %t10 = trunc i32 %c2 to i8 181 %t11 = trunc i32 %c3 to i8 182 %t12 = trunc i32 %d0 to i8 183 %t13 = trunc i32 %d1 to i8 184 %t14 = trunc i32 %d2 to i8 185 %t15 = trunc i32 %d3 to i8 186 %i0 = insertelement <16 x i8> undef, i8 %t0, i32 0 187 %i1 = insertelement <16 x i8> %i0, i8 %t1, i32 1 188 %i2 = insertelement <16 x i8> %i1, i8 %t2, i32 2 189 %i3 = insertelement <16 x i8> %i2, i8 %t3, i32 3 190 %i4 = insertelement <16 x i8> %i3, i8 %t4, i32 4 191 %i5 = insertelement <16 x i8> %i4, i8 %t5, i32 5 192 %i6 = insertelement <16 x i8> %i5, i8 %t6, i32 6 193 %i7 = insertelement <16 x i8> %i6, i8 %t7, i32 7 194 %i8 = insertelement <16 x i8> %i7, i8 %t8, i32 8 195 %i9 = insertelement <16 x i8> %i8, i8 %t9, i32 9 196 %i10 = insertelement <16 x i8> %i9, i8 %t10, i32 10 197 %i11 = insertelement <16 x i8> %i10, i8 %t11, i32 11 198 %i12 = insertelement <16 x i8> %i11, i8 %t12, i32 12 199 %i13 = insertelement <16 x i8> %i12, i8 %t13, i32 13 200 %i14 = insertelement <16 x i8> %i13, i8 %t14, i32 14 201 %i15 = insertelement <16 x i8> %i14, i8 %t15, i32 15 202 ret <16 x i8> %i15 203} 204 205define <16 x i8> @extract_4_mixed(<4 x i16> %a, <4 x i32> %b, <4 x i32> %c, <4 x i16> %d) { 206; CHECK-LABEL: extract_4_mixed: 207; CHECK: // %bb.0: // %entry 208; CHECK-NEXT: xtn v2.4h, v2.4s 209; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 210; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3 211; CHECK-NEXT: xtn2 v0.8h, v1.4s 212; CHECK-NEXT: mov v2.d[1], v3.d[0] 213; CHECK-NEXT: uzp1 v0.16b, v0.16b, v2.16b 214; CHECK-NEXT: ret 215entry: 216 %a0 = extractelement <4 x i16> %a, i32 0 217 %a1 = extractelement <4 x i16> %a, i32 1 218 %a2 = extractelement <4 x i16> %a, i32 2 219 %a3 = extractelement <4 x i16> %a, i32 3 220 %b0 = extractelement <4 x i32> %b, i32 0 221 %b1 = extractelement <4 x i32> %b, i32 1 222 %b2 = extractelement <4 x i32> %b, i32 2 223 %b3 = extractelement <4 x i32> %b, i32 3 224 %c0 = extractelement <4 x i32> %c, i32 0 225 %c1 = extractelement <4 x i32> %c, i32 1 226 %c2 = extractelement <4 x i32> %c, i32 2 227 %c3 = extractelement <4 x i32> %c, i32 3 228 %d0 = extractelement <4 x i16> %d, i32 0 229 %d1 = extractelement <4 x i16> %d, i32 1 230 %d2 = extractelement <4 x i16> %d, i32 2 231 %d3 = extractelement <4 x i16> %d, i32 3 232 %t0 = trunc i16 %a0 to i8 233 %t1 = trunc i16 %a1 to i8 234 %t2 = trunc i16 %a2 to i8 235 %t3 = trunc i16 %a3 to i8 236 %t4 = trunc i32 %b0 to i8 237 %t5 = trunc i32 %b1 to i8 238 %t6 = trunc i32 %b2 to i8 239 %t7 = trunc i32 %b3 to i8 240 %t8 = trunc i32 %c0 to i8 241 %t9 = trunc i32 %c1 to i8 242 %t10 = trunc i32 %c2 to i8 243 %t11 = trunc i32 %c3 to i8 244 %t12 = trunc i16 %d0 to i8 245 %t13 = trunc i16 %d1 to i8 246 %t14 = trunc i16 %d2 to i8 247 %t15 = trunc i16 %d3 to i8 248 %i0 = insertelement <16 x i8> undef, i8 %t0, i32 0 249 %i1 = insertelement <16 x i8> %i0, i8 %t1, i32 1 250 %i2 = insertelement <16 x i8> %i1, i8 %t2, i32 2 251 %i3 = insertelement <16 x i8> %i2, i8 %t3, i32 3 252 %i4 = insertelement <16 x i8> %i3, i8 %t4, i32 4 253 %i5 = insertelement <16 x i8> %i4, i8 %t5, i32 5 254 %i6 = insertelement <16 x i8> %i5, i8 %t6, i32 6 255 %i7 = insertelement <16 x i8> %i6, i8 %t7, i32 7 256 %i8 = insertelement <16 x i8> %i7, i8 %t8, i32 8 257 %i9 = insertelement <16 x i8> %i8, i8 %t9, i32 9 258 %i10 = insertelement <16 x i8> %i9, i8 %t10, i32 10 259 %i11 = insertelement <16 x i8> %i10, i8 %t11, i32 11 260 %i12 = insertelement <16 x i8> %i11, i8 %t12, i32 12 261 %i13 = insertelement <16 x i8> %i12, i8 %t13, i32 13 262 %i14 = insertelement <16 x i8> %i13, i8 %t14, i32 14 263 %i15 = insertelement <16 x i8> %i14, i8 %t15, i32 15 264 ret <16 x i8> %i15 265} 266 267define <16 x i8> @extract_4_v4i32_badindex(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) { 268; CHECK-LABEL: extract_4_v4i32_badindex: 269; CHECK: // %bb.0: // %entry 270; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 271; CHECK-NEXT: adrp x8, .LCPI5_0 272; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 273; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI5_0] 274; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 275; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 276; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b 277; CHECK-NEXT: ret 278entry: 279 %a0 = extractelement <4 x i32> %a, i32 0 280 %a1 = extractelement <4 x i32> %a, i32 1 281 %a2 = extractelement <4 x i32> %a, i32 2 282 %a3 = extractelement <4 x i32> %a, i32 3 283 %b0 = extractelement <4 x i32> %b, i32 0 284 %b1 = extractelement <4 x i32> %b, i32 2 285 %b2 = extractelement <4 x i32> %b, i32 1 286 %b3 = extractelement <4 x i32> %b, i32 3 287 %c0 = extractelement <4 x i32> %c, i32 0 288 %c1 = extractelement <4 x i32> %c, i32 1 289 %c2 = extractelement <4 x i32> %c, i32 2 290 %c3 = extractelement <4 x i32> %c, i32 3 291 %d0 = extractelement <4 x i32> %d, i32 0 292 %d1 = extractelement <4 x i32> %d, i32 1 293 %d2 = extractelement <4 x i32> %d, i32 2 294 %d3 = extractelement <4 x i32> %d, i32 3 295 %t0 = trunc i32 %a0 to i8 296 %t1 = trunc i32 %a1 to i8 297 %t2 = trunc i32 %a2 to i8 298 %t3 = trunc i32 %a3 to i8 299 %t4 = trunc i32 %b0 to i8 300 %t5 = trunc i32 %b1 to i8 301 %t6 = trunc i32 %b2 to i8 302 %t7 = trunc i32 %b3 to i8 303 %t8 = trunc i32 %c0 to i8 304 %t9 = trunc i32 %c1 to i8 305 %t10 = trunc i32 %c2 to i8 306 %t11 = trunc i32 %c3 to i8 307 %t12 = trunc i32 %d0 to i8 308 %t13 = trunc i32 %d1 to i8 309 %t14 = trunc i32 %d2 to i8 310 %t15 = trunc i32 %d3 to i8 311 %i0 = insertelement <16 x i8> undef, i8 %t0, i32 0 312 %i1 = insertelement <16 x i8> %i0, i8 %t1, i32 1 313 %i2 = insertelement <16 x i8> %i1, i8 %t2, i32 2 314 %i3 = insertelement <16 x i8> %i2, i8 %t3, i32 3 315 %i4 = insertelement <16 x i8> %i3, i8 %t4, i32 4 316 %i5 = insertelement <16 x i8> %i4, i8 %t5, i32 5 317 %i6 = insertelement <16 x i8> %i5, i8 %t6, i32 6 318 %i7 = insertelement <16 x i8> %i6, i8 %t7, i32 7 319 %i8 = insertelement <16 x i8> %i7, i8 %t8, i32 8 320 %i9 = insertelement <16 x i8> %i8, i8 %t9, i32 9 321 %i10 = insertelement <16 x i8> %i9, i8 %t10, i32 10 322 %i11 = insertelement <16 x i8> %i10, i8 %t11, i32 11 323 %i12 = insertelement <16 x i8> %i11, i8 %t12, i32 12 324 %i13 = insertelement <16 x i8> %i12, i8 %t13, i32 13 325 %i14 = insertelement <16 x i8> %i13, i8 %t14, i32 14 326 %i15 = insertelement <16 x i8> %i14, i8 %t15, i32 15 327 ret <16 x i8> %i15 328} 329 330define <16 x i8> @extract_4_v4i32_one(<4 x i32> %a) { 331; CHECK-LABEL: extract_4_v4i32_one: 332; CHECK: // %bb.0: // %entry 333; CHECK-NEXT: uzp1 v0.8h, v0.8h, v0.8h 334; CHECK-NEXT: uzp1 v0.16b, v0.16b, v0.16b 335; CHECK-NEXT: ret 336entry: 337 %a0 = extractelement <4 x i32> %a, i32 0 338 %a1 = extractelement <4 x i32> %a, i32 1 339 %a2 = extractelement <4 x i32> %a, i32 2 340 %a3 = extractelement <4 x i32> %a, i32 3 341 %t0 = trunc i32 %a0 to i8 342 %t1 = trunc i32 %a1 to i8 343 %t2 = trunc i32 %a2 to i8 344 %t3 = trunc i32 %a3 to i8 345 %i0 = insertelement <16 x i8> undef, i8 %t0, i32 0 346 %i1 = insertelement <16 x i8> %i0, i8 %t1, i32 1 347 %i2 = insertelement <16 x i8> %i1, i8 %t2, i32 2 348 %i3 = insertelement <16 x i8> %i2, i8 %t3, i32 3 349 %i4 = insertelement <16 x i8> %i3, i8 %t0, i32 4 350 %i5 = insertelement <16 x i8> %i4, i8 %t1, i32 5 351 %i6 = insertelement <16 x i8> %i5, i8 %t2, i32 6 352 %i7 = insertelement <16 x i8> %i6, i8 %t3, i32 7 353 %i8 = insertelement <16 x i8> %i7, i8 %t0, i32 8 354 %i9 = insertelement <16 x i8> %i8, i8 %t1, i32 9 355 %i10 = insertelement <16 x i8> %i9, i8 %t2, i32 10 356 %i11 = insertelement <16 x i8> %i10, i8 %t3, i32 11 357 %i12 = insertelement <16 x i8> %i11, i8 %t0, i32 12 358 %i13 = insertelement <16 x i8> %i12, i8 %t1, i32 13 359 %i14 = insertelement <16 x i8> %i13, i8 %t2, i32 14 360 %i15 = insertelement <16 x i8> %i14, i8 %t3, i32 15 361 ret <16 x i8> %i15 362} 363 364