1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 2; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-SD 3; RUN: llc -mtriple=aarch64 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI 4 5; PR23065: SCALAR_TO_VECTOR implies the top elements 1 to N-1 of the N-element vector are undefined. 6 7define <4 x i16> @foo1(<2 x i32> %a) { 8; CHECK-SD-LABEL: foo1: 9; CHECK-SD: // %bb.0: 10; CHECK-SD-NEXT: movi v0.2d, #0000000000000000 11; CHECK-SD-NEXT: ret 12; 13; CHECK-GI-LABEL: foo1: 14; CHECK-GI: // %bb.0: 15; CHECK-GI-NEXT: mov w8, #58712 // =0xe558 16; CHECK-GI-NEXT: mov v1.s[0], w8 17; CHECK-GI-NEXT: zip1 v0.2s, v1.2s, v0.2s 18; CHECK-GI-NEXT: rev32 v0.4h, v0.4h 19; CHECK-GI-NEXT: ret 20 %1 = shufflevector <2 x i32> <i32 58712, i32 undef>, <2 x i32> %a, <2 x i32> <i32 0, i32 2> 21; Can't optimize the following bitcast to scalar_to_vector. 22 %2 = bitcast <2 x i32> %1 to <4 x i16> 23 %3 = shufflevector <4 x i16> %2, <4 x i16> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 24 ret <4 x i16> %3 25} 26 27define <4 x i16> @foo2(<2 x i32> %a) { 28; CHECK-SD-LABEL: foo2: 29; CHECK-SD: // %bb.0: 30; CHECK-SD-NEXT: movi v0.2d, #0000000000000000 31; CHECK-SD-NEXT: ret 32; 33; CHECK-GI-LABEL: foo2: 34; CHECK-GI: // %bb.0: 35; CHECK-GI-NEXT: mov w8, #712 // =0x2c8 36; CHECK-GI-NEXT: mov v1.s[0], w8 37; CHECK-GI-NEXT: zip1 v0.2s, v1.2s, v0.2s 38; CHECK-GI-NEXT: rev32 v0.4h, v0.4h 39; CHECK-GI-NEXT: ret 40 %1 = shufflevector <2 x i32> <i32 712, i32 undef>, <2 x i32> %a, <2 x i32> <i32 0, i32 2> 41; Can't optimize the following bitcast to scalar_to_vector. 42 %2 = bitcast <2 x i32> %1 to <4 x i16> 43 %3 = shufflevector <4 x i16> %2, <4 x i16> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 44 ret <4 x i16> %3 45} 46 47; ===== To and From Scalar Types ===== 48 49define i32 @bitcast_v4i8_i32(<4 x i8> %a, <4 x i8> %b){ 50; CHECK-SD-LABEL: bitcast_v4i8_i32: 51; CHECK-SD: // %bb.0: 52; CHECK-SD-NEXT: sub sp, sp, #16 53; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 54; CHECK-SD-NEXT: add v0.4h, v0.4h, v1.4h 55; CHECK-SD-NEXT: uzp1 v0.8b, v0.8b, v0.8b 56; CHECK-SD-NEXT: fmov w0, s0 57; CHECK-SD-NEXT: add sp, sp, #16 58; CHECK-SD-NEXT: ret 59; 60; CHECK-GI-LABEL: bitcast_v4i8_i32: 61; CHECK-GI: // %bb.0: 62; CHECK-GI-NEXT: add v0.4h, v0.4h, v1.4h 63; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b 64; CHECK-GI-NEXT: fmov w0, s0 65; CHECK-GI-NEXT: ret 66 %c = add <4 x i8> %a, %b 67 %d = bitcast <4 x i8> %c to i32 68 ret i32 %d 69} 70 71define <4 x i8> @bitcast_i32_v4i8(i32 %a, i32 %b){ 72; CHECK-SD-LABEL: bitcast_i32_v4i8: 73; CHECK-SD: // %bb.0: 74; CHECK-SD-NEXT: add w8, w0, w1 75; CHECK-SD-NEXT: fmov s0, w8 76; CHECK-SD-NEXT: zip1 v0.8b, v0.8b, v0.8b 77; CHECK-SD-NEXT: ret 78; 79; CHECK-GI-LABEL: bitcast_i32_v4i8: 80; CHECK-GI: // %bb.0: 81; CHECK-GI-NEXT: add w8, w0, w1 82; CHECK-GI-NEXT: fmov s0, w8 83; CHECK-GI-NEXT: mov b1, v0.b[1] 84; CHECK-GI-NEXT: mov b2, v0.b[2] 85; CHECK-GI-NEXT: fmov w8, s1 86; CHECK-GI-NEXT: mov b1, v0.b[3] 87; CHECK-GI-NEXT: mov v0.h[1], w8 88; CHECK-GI-NEXT: fmov w8, s2 89; CHECK-GI-NEXT: mov v0.h[2], w8 90; CHECK-GI-NEXT: fmov w8, s1 91; CHECK-GI-NEXT: mov v0.h[3], w8 92; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 93; CHECK-GI-NEXT: ret 94 %c = add i32 %a, %b 95 %d = bitcast i32 %c to <4 x i8> 96 ret <4 x i8> %d 97} 98 99define i32 @bitcast_v2i16_i32(<2 x i16> %a, <2 x i16> %b){ 100; CHECK-SD-LABEL: bitcast_v2i16_i32: 101; CHECK-SD: // %bb.0: 102; CHECK-SD-NEXT: sub sp, sp, #16 103; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 104; CHECK-SD-NEXT: add v0.2s, v0.2s, v1.2s 105; CHECK-SD-NEXT: mov w8, v0.s[1] 106; CHECK-SD-NEXT: fmov w9, s0 107; CHECK-SD-NEXT: strh w9, [sp, #12] 108; CHECK-SD-NEXT: strh w8, [sp, #14] 109; CHECK-SD-NEXT: ldr w0, [sp, #12] 110; CHECK-SD-NEXT: add sp, sp, #16 111; CHECK-SD-NEXT: ret 112; 113; CHECK-GI-LABEL: bitcast_v2i16_i32: 114; CHECK-GI: // %bb.0: 115; CHECK-GI-NEXT: add v0.2s, v0.2s, v1.2s 116; CHECK-GI-NEXT: uzp1 v0.4h, v0.4h, v0.4h 117; CHECK-GI-NEXT: fmov w0, s0 118; CHECK-GI-NEXT: ret 119 %c = add <2 x i16> %a, %b 120 %d = bitcast <2 x i16> %c to i32 121 ret i32 %d 122} 123 124define <2 x i16> @bitcast_i32_v2i16(i32 %a, i32 %b){ 125; CHECK-SD-LABEL: bitcast_i32_v2i16: 126; CHECK-SD: // %bb.0: 127; CHECK-SD-NEXT: add w8, w0, w1 128; CHECK-SD-NEXT: fmov s0, w8 129; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0 130; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 131; CHECK-SD-NEXT: ret 132; 133; CHECK-GI-LABEL: bitcast_i32_v2i16: 134; CHECK-GI: // %bb.0: 135; CHECK-GI-NEXT: add w8, w0, w1 136; CHECK-GI-NEXT: fmov s0, w8 137; CHECK-GI-NEXT: mov h1, v0.h[1] 138; CHECK-GI-NEXT: mov v0.s[0], w8 139; CHECK-GI-NEXT: fmov w8, s1 140; CHECK-GI-NEXT: mov v0.s[1], w8 141; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 142; CHECK-GI-NEXT: ret 143 %c = add i32 %a, %b 144 %d = bitcast i32 %c to <2 x i16> 145 ret <2 x i16> %d 146} 147 148define i64 @bitcast_v8i8_i64(<8 x i8> %a, <8 x i8> %b){ 149; CHECK-LABEL: bitcast_v8i8_i64: 150; CHECK: // %bb.0: 151; CHECK-NEXT: add v0.8b, v0.8b, v1.8b 152; CHECK-NEXT: fmov x0, d0 153; CHECK-NEXT: ret 154 %c = add <8 x i8> %a, %b 155 %d = bitcast <8 x i8> %c to i64 156 ret i64 %d 157} 158 159define <8 x i8> @bitcast_i64_v8i8(i64 %a, i64 %b){ 160; CHECK-LABEL: bitcast_i64_v8i8: 161; CHECK: // %bb.0: 162; CHECK-NEXT: add x8, x0, x1 163; CHECK-NEXT: fmov d0, x8 164; CHECK-NEXT: ret 165 %c = add i64 %a, %b 166 %d = bitcast i64 %c to <8 x i8> 167 ret <8 x i8> %d 168} 169 170define i64 @bitcast_v4i16_i64(<4 x i16> %a, <4 x i16> %b){ 171; CHECK-LABEL: bitcast_v4i16_i64: 172; CHECK: // %bb.0: 173; CHECK-NEXT: add v0.4h, v0.4h, v1.4h 174; CHECK-NEXT: fmov x0, d0 175; CHECK-NEXT: ret 176 %c = add <4 x i16> %a, %b 177 %d = bitcast <4 x i16> %c to i64 178 ret i64 %d 179} 180 181define <4 x i16> @bitcast_i64_v4i16(i64 %a, i64 %b){ 182; CHECK-LABEL: bitcast_i64_v4i16: 183; CHECK: // %bb.0: 184; CHECK-NEXT: add x8, x0, x1 185; CHECK-NEXT: fmov d0, x8 186; CHECK-NEXT: ret 187 %c = add i64 %a, %b 188 %d = bitcast i64 %c to <4 x i16> 189 ret <4 x i16> %d 190} 191 192define i64 @bitcast_v2i32_i64(<2 x i32> %a, <2 x i32> %b){ 193; CHECK-LABEL: bitcast_v2i32_i64: 194; CHECK: // %bb.0: 195; CHECK-NEXT: add v0.2s, v0.2s, v1.2s 196; CHECK-NEXT: fmov x0, d0 197; CHECK-NEXT: ret 198 %c = add <2 x i32> %a, %b 199 %d = bitcast <2 x i32> %c to i64 200 ret i64 %d 201} 202 203define <2 x i32> @bitcast_i64_v2i32(i64 %a, i64 %b){ 204; CHECK-LABEL: bitcast_i64_v2i32: 205; CHECK: // %bb.0: 206; CHECK-NEXT: add x8, x0, x1 207; CHECK-NEXT: fmov d0, x8 208; CHECK-NEXT: ret 209 %c = add i64 %a, %b 210 %d = bitcast i64 %c to <2 x i32> 211 ret <2 x i32> %d 212} 213 214; ===== Legal Vector Types ===== 215 216define <4 x i16> @bitcast_v2i32_v4i16(<2 x i32> %a, <2 x i32> %b){ 217; CHECK-LABEL: bitcast_v2i32_v4i16: 218; CHECK: // %bb.0: 219; CHECK-NEXT: add v0.2s, v0.2s, v1.2s 220; CHECK-NEXT: ret 221 %c = add <2 x i32> %a, %b 222 %d = bitcast <2 x i32> %c to <4 x i16> 223 ret <4 x i16> %d 224} 225 226define <4 x i32> @bitcast_v2i64_v4i32(<2 x i64> %a, <2 x i64> %b){ 227; CHECK-LABEL: bitcast_v2i64_v4i32: 228; CHECK: // %bb.0: 229; CHECK-NEXT: add v0.2d, v0.2d, v1.2d 230; CHECK-NEXT: ret 231 %c = add <2 x i64> %a, %b 232 %d = bitcast <2 x i64> %c to <4 x i32> 233 ret <4 x i32> %d 234} 235 236define <8 x i8> @bitcast_v2i32_v8i8(<2 x i32> %a, <2 x i32> %b){ 237; CHECK-LABEL: bitcast_v2i32_v8i8: 238; CHECK: // %bb.0: 239; CHECK-NEXT: add v0.2s, v0.2s, v1.2s 240; CHECK-NEXT: ret 241 %c = add <2 x i32> %a, %b 242 %d = bitcast <2 x i32> %c to <8 x i8> 243 ret <8 x i8> %d 244} 245 246define <8 x i16> @bitcast_v2i64_v8i16(<2 x i64> %a, <2 x i64> %b){ 247; CHECK-LABEL: bitcast_v2i64_v8i16: 248; CHECK: // %bb.0: 249; CHECK-NEXT: add v0.2d, v0.2d, v1.2d 250; CHECK-NEXT: ret 251 %c = add <2 x i64> %a, %b 252 %d = bitcast <2 x i64> %c to <8 x i16> 253 ret <8 x i16> %d 254} 255 256define <16 x i8> @bitcast_v2i64_v16i8(<2 x i64> %a, <2 x i64> %b){ 257; CHECK-LABEL: bitcast_v2i64_v16i8: 258; CHECK: // %bb.0: 259; CHECK-NEXT: add v0.2d, v0.2d, v1.2d 260; CHECK-NEXT: ret 261 %c = add <2 x i64> %a, %b 262 %d = bitcast <2 x i64> %c to <16 x i8> 263 ret <16 x i8> %d 264} 265 266define <2 x i32> @bitcast_v4i16_v2i32(<4 x i16> %a, <4 x i16> %b){ 267; CHECK-LABEL: bitcast_v4i16_v2i32: 268; CHECK: // %bb.0: 269; CHECK-NEXT: add v0.4h, v0.4h, v1.4h 270; CHECK-NEXT: ret 271 %c = add <4 x i16> %a, %b 272 %d = bitcast <4 x i16> %c to <2 x i32> 273 ret <2 x i32> %d 274} 275 276define <2 x i64> @bitcast_v4i32_v2i64(<4 x i32> %a, <4 x i32> %b){ 277; CHECK-LABEL: bitcast_v4i32_v2i64: 278; CHECK: // %bb.0: 279; CHECK-NEXT: add v0.4s, v0.4s, v1.4s 280; CHECK-NEXT: ret 281 %c = add <4 x i32> %a, %b 282 %d = bitcast <4 x i32> %c to <2 x i64> 283 ret <2 x i64> %d 284} 285 286define <8 x i8> @bitcast_v4i16_v8i8(<4 x i16> %a, <4 x i16> %b){ 287; CHECK-LABEL: bitcast_v4i16_v8i8: 288; CHECK: // %bb.0: 289; CHECK-NEXT: add v0.4h, v0.4h, v1.4h 290; CHECK-NEXT: ret 291 %c = add <4 x i16> %a, %b 292 %d = bitcast <4 x i16> %c to <8 x i8> 293 ret <8 x i8> %d 294} 295 296define <8 x i16> @bitcast_v4i32_v8i16(<4 x i32> %a, <4 x i32> %b){ 297; CHECK-LABEL: bitcast_v4i32_v8i16: 298; CHECK: // %bb.0: 299; CHECK-NEXT: add v0.4s, v0.4s, v1.4s 300; CHECK-NEXT: ret 301 %c = add <4 x i32> %a, %b 302 %d = bitcast <4 x i32> %c to <8 x i16> 303 ret <8 x i16> %d 304} 305 306define <16 x i8> @bitcast_v4i32_v16i8(<4 x i32> %a, <4 x i32> %b){ 307; CHECK-LABEL: bitcast_v4i32_v16i8: 308; CHECK: // %bb.0: 309; CHECK-NEXT: add v0.4s, v0.4s, v1.4s 310; CHECK-NEXT: ret 311 %c = add <4 x i32> %a, %b 312 %d = bitcast <4 x i32> %c to <16 x i8> 313 ret <16 x i8> %d 314} 315 316define <2 x i32> @bitcast_v8i8_v2i32(<8 x i8> %a, <8 x i8> %b){ 317; CHECK-LABEL: bitcast_v8i8_v2i32: 318; CHECK: // %bb.0: 319; CHECK-NEXT: add v0.8b, v0.8b, v1.8b 320; CHECK-NEXT: ret 321 %c = add <8 x i8> %a, %b 322 %d = bitcast <8 x i8> %c to <2 x i32> 323 ret <2 x i32> %d 324} 325 326define <2 x i64> @bitcast_v8i16_v2i64(<8 x i16> %a, <8 x i16> %b){ 327; CHECK-LABEL: bitcast_v8i16_v2i64: 328; CHECK: // %bb.0: 329; CHECK-NEXT: add v0.8h, v0.8h, v1.8h 330; CHECK-NEXT: ret 331 %c = add <8 x i16> %a, %b 332 %d = bitcast <8 x i16> %c to <2 x i64> 333 ret <2 x i64> %d 334} 335 336define <4 x i16> @bitcast_v8i8_v4i16(<8 x i8> %a, <8 x i8> %b){ 337; CHECK-LABEL: bitcast_v8i8_v4i16: 338; CHECK: // %bb.0: 339; CHECK-NEXT: add v0.8b, v0.8b, v1.8b 340; CHECK-NEXT: ret 341 %c = add <8 x i8> %a, %b 342 %d = bitcast <8 x i8> %c to <4 x i16> 343 ret <4 x i16> %d 344} 345 346define <4 x i32> @bitcast_v8i16_v4i32(<8 x i16> %a, <8 x i16> %b){ 347; CHECK-LABEL: bitcast_v8i16_v4i32: 348; CHECK: // %bb.0: 349; CHECK-NEXT: add v0.8h, v0.8h, v1.8h 350; CHECK-NEXT: ret 351 %c = add <8 x i16> %a, %b 352 %d = bitcast <8 x i16> %c to <4 x i32> 353 ret <4 x i32> %d 354} 355 356define <16 x i8> @bitcast_v8i16_v16i8(<8 x i16> %a, <8 x i16> %b){ 357; CHECK-LABEL: bitcast_v8i16_v16i8: 358; CHECK: // %bb.0: 359; CHECK-NEXT: add v0.8h, v0.8h, v1.8h 360; CHECK-NEXT: ret 361 %c = add <8 x i16> %a, %b 362 %d = bitcast <8 x i16> %c to <16 x i8> 363 ret <16 x i8> %d 364} 365 366define <2 x i64> @bitcast_v16i8_v2i64(<16 x i8> %a, <16 x i8> %b){ 367; CHECK-LABEL: bitcast_v16i8_v2i64: 368; CHECK: // %bb.0: 369; CHECK-NEXT: add v0.16b, v0.16b, v1.16b 370; CHECK-NEXT: ret 371 %c = add <16 x i8> %a, %b 372 %d = bitcast <16 x i8> %c to <2 x i64> 373 ret <2 x i64> %d 374} 375 376define <4 x i32> @bitcast_v16i8_v4i32(<16 x i8> %a, <16 x i8> %b){ 377; CHECK-LABEL: bitcast_v16i8_v4i32: 378; CHECK: // %bb.0: 379; CHECK-NEXT: add v0.16b, v0.16b, v1.16b 380; CHECK-NEXT: ret 381 %c = add <16 x i8> %a, %b 382 %d = bitcast <16 x i8> %c to <4 x i32> 383 ret <4 x i32> %d 384} 385 386define <8 x i16> @bitcast_v16i8_v8i16(<16 x i8> %a, <16 x i8> %b){ 387; CHECK-LABEL: bitcast_v16i8_v8i16: 388; CHECK: // %bb.0: 389; CHECK-NEXT: add v0.16b, v0.16b, v1.16b 390; CHECK-NEXT: ret 391 %c = add <16 x i8> %a, %b 392 %d = bitcast <16 x i8> %c to <8 x i16> 393 ret <8 x i16> %d 394} 395 396; ===== Smaller/Larger Width Vectors with Legal Element Sizes ===== 397 398define <4 x i8> @bitcast_v2i16_v4i8(<2 x i16> %a, <2 x i16> %b){ 399; CHECK-SD-LABEL: bitcast_v2i16_v4i8: 400; CHECK-SD: // %bb.0: 401; CHECK-SD-NEXT: sub sp, sp, #16 402; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 403; CHECK-SD-NEXT: add v0.2s, v0.2s, v1.2s 404; CHECK-SD-NEXT: mov w8, v0.s[1] 405; CHECK-SD-NEXT: fmov w9, s0 406; CHECK-SD-NEXT: strh w9, [sp, #12] 407; CHECK-SD-NEXT: strh w8, [sp, #14] 408; CHECK-SD-NEXT: ldr s0, [sp, #12] 409; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0 410; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 411; CHECK-SD-NEXT: add sp, sp, #16 412; CHECK-SD-NEXT: ret 413; 414; CHECK-GI-LABEL: bitcast_v2i16_v4i8: 415; CHECK-GI: // %bb.0: 416; CHECK-GI-NEXT: add v0.2s, v0.2s, v1.2s 417; CHECK-GI-NEXT: uzp1 v0.4h, v0.4h, v0.4h 418; CHECK-GI-NEXT: mov b1, v0.b[1] 419; CHECK-GI-NEXT: mov b2, v0.b[2] 420; CHECK-GI-NEXT: fmov w8, s1 421; CHECK-GI-NEXT: mov b1, v0.b[3] 422; CHECK-GI-NEXT: mov v0.h[1], w8 423; CHECK-GI-NEXT: fmov w8, s2 424; CHECK-GI-NEXT: mov v0.h[2], w8 425; CHECK-GI-NEXT: fmov w8, s1 426; CHECK-GI-NEXT: mov v0.h[3], w8 427; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 428; CHECK-GI-NEXT: ret 429 %c = add <2 x i16> %a, %b 430 %d = bitcast <2 x i16> %c to <4 x i8> 431 ret <4 x i8> %d 432} 433 434define <2 x i16> @bitcast_v4i8_v2i16(<4 x i8> %a, <4 x i8> %b){ 435; CHECK-SD-LABEL: bitcast_v4i8_v2i16: 436; CHECK-SD: // %bb.0: 437; CHECK-SD-NEXT: sub sp, sp, #16 438; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 439; CHECK-SD-NEXT: add v0.4h, v0.4h, v1.4h 440; CHECK-SD-NEXT: add x8, sp, #12 441; CHECK-SD-NEXT: uzp1 v0.8b, v0.8b, v0.8b 442; CHECK-SD-NEXT: str s0, [sp, #12] 443; CHECK-SD-NEXT: ld1 { v0.h }[0], [x8] 444; CHECK-SD-NEXT: orr x8, x8, #0x2 445; CHECK-SD-NEXT: ld1 { v0.h }[2], [x8] 446; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 447; CHECK-SD-NEXT: add sp, sp, #16 448; CHECK-SD-NEXT: ret 449; 450; CHECK-GI-LABEL: bitcast_v4i8_v2i16: 451; CHECK-GI: // %bb.0: 452; CHECK-GI-NEXT: add v0.4h, v0.4h, v1.4h 453; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b 454; CHECK-GI-NEXT: mov h1, v0.h[1] 455; CHECK-GI-NEXT: fmov w8, s0 456; CHECK-GI-NEXT: mov v0.s[0], w8 457; CHECK-GI-NEXT: fmov w8, s1 458; CHECK-GI-NEXT: mov v0.s[1], w8 459; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 460; CHECK-GI-NEXT: ret 461 %c = add <4 x i8> %a, %b 462 %d = bitcast <4 x i8> %c to <2 x i16> 463 ret <2 x i16> %d 464} 465 466define <8 x i32> @bitcast_v4i64_v8i32(<4 x i64> %a, <4 x i64> %b){ 467; CHECK-SD-LABEL: bitcast_v4i64_v8i32: 468; CHECK-SD: // %bb.0: 469; CHECK-SD-NEXT: add v1.2d, v1.2d, v3.2d 470; CHECK-SD-NEXT: add v0.2d, v0.2d, v2.2d 471; CHECK-SD-NEXT: ret 472; 473; CHECK-GI-LABEL: bitcast_v4i64_v8i32: 474; CHECK-GI: // %bb.0: 475; CHECK-GI-NEXT: add v0.2d, v0.2d, v2.2d 476; CHECK-GI-NEXT: add v1.2d, v1.2d, v3.2d 477; CHECK-GI-NEXT: ret 478 %c = add <4 x i64> %a, %b 479 %d = bitcast <4 x i64> %c to <8 x i32> 480 ret <8 x i32> %d 481} 482 483define <16 x i16> @bitcast_v4i64_v16i16(<4 x i64> %a, <4 x i64> %b){ 484; CHECK-SD-LABEL: bitcast_v4i64_v16i16: 485; CHECK-SD: // %bb.0: 486; CHECK-SD-NEXT: add v1.2d, v1.2d, v3.2d 487; CHECK-SD-NEXT: add v0.2d, v0.2d, v2.2d 488; CHECK-SD-NEXT: ret 489; 490; CHECK-GI-LABEL: bitcast_v4i64_v16i16: 491; CHECK-GI: // %bb.0: 492; CHECK-GI-NEXT: add v0.2d, v0.2d, v2.2d 493; CHECK-GI-NEXT: add v1.2d, v1.2d, v3.2d 494; CHECK-GI-NEXT: ret 495 %c = add <4 x i64> %a, %b 496 %d = bitcast <4 x i64> %c to <16 x i16> 497 ret <16 x i16> %d 498} 499 500define <4 x i64> @bitcast_v8i32_v4i64(<8 x i32> %a, <8 x i32> %b){ 501; CHECK-SD-LABEL: bitcast_v8i32_v4i64: 502; CHECK-SD: // %bb.0: 503; CHECK-SD-NEXT: add v1.4s, v1.4s, v3.4s 504; CHECK-SD-NEXT: add v0.4s, v0.4s, v2.4s 505; CHECK-SD-NEXT: ret 506; 507; CHECK-GI-LABEL: bitcast_v8i32_v4i64: 508; CHECK-GI: // %bb.0: 509; CHECK-GI-NEXT: add v2.4s, v0.4s, v2.4s 510; CHECK-GI-NEXT: add v3.4s, v1.4s, v3.4s 511; CHECK-GI-NEXT: mov x8, v2.d[1] 512; CHECK-GI-NEXT: mov x9, v3.d[1] 513; CHECK-GI-NEXT: mov v0.d[0], v2.d[0] 514; CHECK-GI-NEXT: mov v1.d[0], v3.d[0] 515; CHECK-GI-NEXT: mov v0.d[1], x8 516; CHECK-GI-NEXT: mov v1.d[1], x9 517; CHECK-GI-NEXT: ret 518 %c = add <8 x i32> %a, %b 519 %d = bitcast <8 x i32> %c to <4 x i64> 520 ret <4 x i64> %d 521} 522 523define <16 x i16> @bitcast_v8i32_v16i16(<8 x i32> %a, <8 x i32> %b){ 524; CHECK-SD-LABEL: bitcast_v8i32_v16i16: 525; CHECK-SD: // %bb.0: 526; CHECK-SD-NEXT: add v1.4s, v1.4s, v3.4s 527; CHECK-SD-NEXT: add v0.4s, v0.4s, v2.4s 528; CHECK-SD-NEXT: ret 529; 530; CHECK-GI-LABEL: bitcast_v8i32_v16i16: 531; CHECK-GI: // %bb.0: 532; CHECK-GI-NEXT: add v0.4s, v0.4s, v2.4s 533; CHECK-GI-NEXT: add v1.4s, v1.4s, v3.4s 534; CHECK-GI-NEXT: ret 535 %c = add <8 x i32> %a, %b 536 %d = bitcast <8 x i32> %c to <16 x i16> 537 ret <16 x i16> %d 538} 539 540define <16 x i32> @bitcast_v8i64_v16i32(<8 x i64> %a, <8 x i64> %b){ 541; CHECK-SD-LABEL: bitcast_v8i64_v16i32: 542; CHECK-SD: // %bb.0: 543; CHECK-SD-NEXT: add v2.2d, v2.2d, v6.2d 544; CHECK-SD-NEXT: add v0.2d, v0.2d, v4.2d 545; CHECK-SD-NEXT: add v1.2d, v1.2d, v5.2d 546; CHECK-SD-NEXT: add v3.2d, v3.2d, v7.2d 547; CHECK-SD-NEXT: ret 548; 549; CHECK-GI-LABEL: bitcast_v8i64_v16i32: 550; CHECK-GI: // %bb.0: 551; CHECK-GI-NEXT: add v0.2d, v0.2d, v4.2d 552; CHECK-GI-NEXT: add v1.2d, v1.2d, v5.2d 553; CHECK-GI-NEXT: add v2.2d, v2.2d, v6.2d 554; CHECK-GI-NEXT: add v3.2d, v3.2d, v7.2d 555; CHECK-GI-NEXT: ret 556 %c = add <8 x i64> %a, %b 557 %d = bitcast <8 x i64> %c to <16 x i32> 558 ret <16 x i32> %d 559} 560 561define <4 x i64> @bitcast_v16i16_v4i64(<16 x i16> %a, <16 x i16> %b){ 562; CHECK-SD-LABEL: bitcast_v16i16_v4i64: 563; CHECK-SD: // %bb.0: 564; CHECK-SD-NEXT: add v1.8h, v1.8h, v3.8h 565; CHECK-SD-NEXT: add v0.8h, v0.8h, v2.8h 566; CHECK-SD-NEXT: ret 567; 568; CHECK-GI-LABEL: bitcast_v16i16_v4i64: 569; CHECK-GI: // %bb.0: 570; CHECK-GI-NEXT: add v2.8h, v0.8h, v2.8h 571; CHECK-GI-NEXT: add v3.8h, v1.8h, v3.8h 572; CHECK-GI-NEXT: mov x8, v2.d[1] 573; CHECK-GI-NEXT: mov x9, v3.d[1] 574; CHECK-GI-NEXT: mov v0.d[0], v2.d[0] 575; CHECK-GI-NEXT: mov v1.d[0], v3.d[0] 576; CHECK-GI-NEXT: mov v0.d[1], x8 577; CHECK-GI-NEXT: mov v1.d[1], x9 578; CHECK-GI-NEXT: ret 579 %c = add <16 x i16> %a, %b 580 %d = bitcast <16 x i16> %c to <4 x i64> 581 ret <4 x i64> %d 582} 583 584define <8 x i32> @bitcast_v16i16_v8i32(<16 x i16> %a, <16 x i16> %b){ 585; CHECK-SD-LABEL: bitcast_v16i16_v8i32: 586; CHECK-SD: // %bb.0: 587; CHECK-SD-NEXT: add v1.8h, v1.8h, v3.8h 588; CHECK-SD-NEXT: add v0.8h, v0.8h, v2.8h 589; CHECK-SD-NEXT: ret 590; 591; CHECK-GI-LABEL: bitcast_v16i16_v8i32: 592; CHECK-GI: // %bb.0: 593; CHECK-GI-NEXT: add v0.8h, v0.8h, v2.8h 594; CHECK-GI-NEXT: add v1.8h, v1.8h, v3.8h 595; CHECK-GI-NEXT: ret 596 %c = add <16 x i16> %a, %b 597 %d = bitcast <16 x i16> %c to <8 x i32> 598 ret <8 x i32> %d 599} 600 601define <8 x i64> @bitcast_v16i32_v8i64(<16 x i32> %a, <16 x i32> %b){ 602; CHECK-SD-LABEL: bitcast_v16i32_v8i64: 603; CHECK-SD: // %bb.0: 604; CHECK-SD-NEXT: add v2.4s, v2.4s, v6.4s 605; CHECK-SD-NEXT: add v0.4s, v0.4s, v4.4s 606; CHECK-SD-NEXT: add v1.4s, v1.4s, v5.4s 607; CHECK-SD-NEXT: add v3.4s, v3.4s, v7.4s 608; CHECK-SD-NEXT: ret 609; 610; CHECK-GI-LABEL: bitcast_v16i32_v8i64: 611; CHECK-GI: // %bb.0: 612; CHECK-GI-NEXT: add v4.4s, v0.4s, v4.4s 613; CHECK-GI-NEXT: add v5.4s, v1.4s, v5.4s 614; CHECK-GI-NEXT: add v6.4s, v2.4s, v6.4s 615; CHECK-GI-NEXT: add v7.4s, v3.4s, v7.4s 616; CHECK-GI-NEXT: mov x8, v4.d[1] 617; CHECK-GI-NEXT: mov x9, v5.d[1] 618; CHECK-GI-NEXT: mov x10, v6.d[1] 619; CHECK-GI-NEXT: mov x11, v7.d[1] 620; CHECK-GI-NEXT: mov v0.d[0], v4.d[0] 621; CHECK-GI-NEXT: mov v1.d[0], v5.d[0] 622; CHECK-GI-NEXT: mov v2.d[0], v6.d[0] 623; CHECK-GI-NEXT: mov v3.d[0], v7.d[0] 624; CHECK-GI-NEXT: mov v0.d[1], x8 625; CHECK-GI-NEXT: mov v1.d[1], x9 626; CHECK-GI-NEXT: mov v2.d[1], x10 627; CHECK-GI-NEXT: mov v3.d[1], x11 628; CHECK-GI-NEXT: ret 629 %c = add <16 x i32> %a, %b 630 %d = bitcast <16 x i32> %c to <8 x i64> 631 ret <8 x i64> %d 632} 633 634; ===== Vectors with Non-Pow 2 Widths ===== 635 636define <6 x i16> @bitcast_v3i32_v6i16(<3 x i32> %a, <3 x i32> %b){ 637; CHECK-LABEL: bitcast_v3i32_v6i16: 638; CHECK: // %bb.0: 639; CHECK-NEXT: add v0.4s, v0.4s, v1.4s 640; CHECK-NEXT: ret 641 %c = add <3 x i32> %a, %b 642 %d = bitcast <3 x i32> %c to <6 x i16> 643 ret <6 x i16> %d 644} 645