1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=aarch64 < %s | FileCheck %s 3 4; CHECK: .LCPI0_0: 5; CHECK: .byte 0 // 0x0 6; CHECK: .byte 16 // 0x10 7; CHECK: .byte 32 // 0x20 8; CHECK: .byte 48 // 0x30 9; CHECK: .byte 2 // 0x2 10; CHECK: .byte 18 // 0x12 11; CHECK: .byte 34 // 0x22 12; CHECK: .byte 50 // 0x32 13; CHECK: .byte 4 // 0x4 14; CHECK: .byte 20 // 0x14 15; CHECK: .byte 36 // 0x24 16; CHECK: .byte 52 // 0x34 17; CHECK: .byte 6 // 0x6 18; CHECK: .byte 22 // 0x16 19; CHECK: .byte 38 // 0x26 20; CHECK: .byte 54 // 0x36 21define <16 x i8> @shuffle4_v4i8_16(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x i8> %d) { 22; CHECK-LABEL: shuffle4_v4i8_16: 23; CHECK: // %bb.0: 24; CHECK-NEXT: // kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 25; CHECK-NEXT: adrp x8, .LCPI0_0 26; CHECK-NEXT: // kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 27; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI0_0] 28; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 29; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 30; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b 31; CHECK-NEXT: ret 32 %x = shufflevector <4 x i8> %a, <4 x i8> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 33 %y = shufflevector <4 x i8> %c, <4 x i8> %d, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 34 %z = shufflevector <8 x i8> %x, <8 x i8> %y, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15> 35 ret <16 x i8> %z 36} 37 38; CHECK: .LCPI1_0: 39; CHECK: .byte 0 // 0x0 40; CHECK: .byte 16 // 0x10 41; CHECK: .byte 32 // 0x20 42; CHECK: .byte 48 // 0x30 43; CHECK: .byte 2 // 0x2 44; CHECK: .byte 18 // 0x12 45; CHECK: .byte 34 // 0x22 46; CHECK: .byte 50 // 0x32 47define <8 x i8> @shuffle4_v4i8_8(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x i8> %d) { 48; CHECK-LABEL: shuffle4_v4i8_8: 49; CHECK: // %bb.0: 50; CHECK-NEXT: // kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 51; CHECK-NEXT: adrp x8, .LCPI1_0 52; CHECK-NEXT: // kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 53; CHECK-NEXT: ldr d4, [x8, :lo12:.LCPI1_0] 54; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 55; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 56; CHECK-NEXT: tbl v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.8b 57; CHECK-NEXT: ret 58 %x = shufflevector <4 x i8> %a, <4 x i8> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 59 %y = shufflevector <4 x i8> %c, <4 x i8> %d, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 60 %z = shufflevector <8 x i8> %x, <8 x i8> %y, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13> 61 ret <8 x i8> %z 62} 63 64; CHECK: .LCPI2_0: 65; CHECK: .byte 0 // 0x0 66; CHECK: .byte 3 // 0x3 67; CHECK: .byte 2 // 0x2 68; CHECK: .byte 1 // 0x1 69; CHECK: .byte 12 // 0xc 70; CHECK: .byte 15 // 0xf 71; CHECK: .byte 14 // 0xe 72; CHECK: .byte 12 // 0xc 73; CHECK: .LCPI2_1: 74; CHECK: .byte 4 // 0x4 75; CHECK: .byte 7 // 0x7 76; CHECK: .byte 6 // 0x6 77; CHECK: .byte 7 // 0x7 78; CHECK: .byte 8 // 0x8 79; CHECK: .byte 10 // 0xa 80; CHECK: .byte 9 // 0x9 81; CHECK: .byte 11 // 0xb 82; CHECK: .section .rodata.cst16,"aM",@progbits,16 83; CHECK: .p2align 4 84; CHECK: .LCPI2_2: 85; CHECK: .byte 0 // 0x0 86; CHECK: .byte 4 // 0x4 87; CHECK: .byte 16 // 0x10 88; CHECK: .byte 20 // 0x14 89; CHECK: .byte 1 // 0x1 90; CHECK: .byte 5 // 0x5 91; CHECK: .byte 17 // 0x11 92; CHECK: .byte 21 // 0x15 93; CHECK: .byte 2 // 0x2 94; CHECK: .byte 6 // 0x6 95; CHECK: .byte 18 // 0x12 96; CHECK: .byte 22 // 0x16 97; CHECK: .byte 3 // 0x3 98; CHECK: .byte 7 // 0x7 99; CHECK: .byte 19 // 0x13 100; CHECK: .byte 23 // 0x17 101define <16 x i8> @shuffle4_v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d) { 102; CHECK-LABEL: shuffle4_v8i8: 103; CHECK: // %bb.0: 104; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 105; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 106; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 107; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3 108; CHECK-NEXT: adrp x8, .LCPI2_0 109; CHECK-NEXT: mov v0.d[1], v1.d[0] 110; CHECK-NEXT: mov v2.d[1], v3.d[0] 111; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI2_0] 112; CHECK-NEXT: adrp x8, .LCPI2_1 113; CHECK-NEXT: ldr d3, [x8, :lo12:.LCPI2_1] 114; CHECK-NEXT: adrp x8, .LCPI2_2 115; CHECK-NEXT: tbl v0.8b, { v0.16b }, v1.8b 116; CHECK-NEXT: tbl v1.8b, { v2.16b }, v3.8b 117; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI2_2] 118; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b 119; CHECK-NEXT: ret 120 %x = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 3, i32 2, i32 1, i32 12, i32 15, i32 14, i32 12> 121 %y = shufflevector <8 x i8> %c, <8 x i8> %d, <8 x i32> <i32 4, i32 7, i32 6, i32 7, i32 8, i32 10, i32 9, i32 11> 122 %z = shufflevector <8 x i8> %x, <8 x i8> %y, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15> 123 ret <16 x i8> %z 124} 125 126; CHECK: .LCPI3_0: 127; CHECK: .byte 0 // 0x0 128; CHECK: .byte 3 // 0x3 129; CHECK: .byte 2 // 0x2 130; CHECK: .byte 1 // 0x1 131; CHECK: .byte 12 // 0xc 132; CHECK: .byte 15 // 0xf 133; CHECK: .byte 14 // 0xe 134; CHECK: .byte 12 // 0xc 135; CHECK: .byte 255 // 0xff 136; CHECK: .byte 255 // 0xff 137; CHECK: .byte 255 // 0xff 138; CHECK: .byte 255 // 0xff 139; CHECK: .byte 255 // 0xff 140; CHECK: .byte 255 // 0xff 141; CHECK: .byte 255 // 0xff 142; CHECK: .byte 255 // 0xff 143; CHECK: .LCPI3_1: 144; CHECK: .byte 4 // 0x4 145; CHECK: .byte 7 // 0x7 146; CHECK: .byte 6 // 0x6 147; CHECK: .byte 7 // 0x7 148; CHECK: .byte 8 // 0x8 149; CHECK: .byte 10 // 0xa 150; CHECK: .byte 9 // 0x9 151; CHECK: .byte 11 // 0xb 152; CHECK: .byte 255 // 0xff 153; CHECK: .byte 255 // 0xff 154; CHECK: .byte 255 // 0xff 155; CHECK: .byte 255 // 0xff 156; CHECK: .byte 255 // 0xff 157; CHECK: .byte 255 // 0xff 158; CHECK: .byte 255 // 0xff 159; CHECK: .byte 255 // 0xff 160; CHECK: .LCPI3_2: 161; CHECK: .byte 16 // 0x10 162; CHECK: .byte 20 // 0x14 163; CHECK: .byte 0 // 0x0 164; CHECK: .byte 4 // 0x4 165; CHECK: .byte 17 // 0x11 166; CHECK: .byte 21 // 0x15 167; CHECK: .byte 1 // 0x1 168; CHECK: .byte 5 // 0x5 169; CHECK: .byte 18 // 0x12 170; CHECK: .byte 22 // 0x16 171; CHECK: .byte 2 // 0x2 172; CHECK: .byte 6 // 0x6 173; CHECK: .byte 19 // 0x13 174; CHECK: .byte 23 // 0x17 175; CHECK: .byte 3 // 0x3 176; CHECK: .byte 7 // 0x7 177define <16 x i8> @shuffle4_v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) { 178; CHECK-LABEL: shuffle4_v16i8: 179; CHECK: // %bb.0: 180; CHECK-NEXT: adrp x8, .LCPI3_0 181; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_0] 182; CHECK-NEXT: adrp x8, .LCPI3_1 183; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI3_1] 184; CHECK-NEXT: adrp x8, .LCPI3_2 185; CHECK-NEXT: tbl v1.16b, { v0.16b }, v1.16b 186; CHECK-NEXT: tbl v0.16b, { v2.16b }, v3.16b 187; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI3_2] 188; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b 189; CHECK-NEXT: ret 190 %x = shufflevector <16 x i8> %a, <16 x i8> %b, <8 x i32> <i32 0, i32 3, i32 2, i32 1, i32 12, i32 15, i32 14, i32 12> 191 %y = shufflevector <16 x i8> %c, <16 x i8> %d, <8 x i32> <i32 4, i32 7, i32 6, i32 7, i32 8, i32 10, i32 9, i32 11> 192 %z = shufflevector <8 x i8> %x, <8 x i8> %y, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15> 193 ret <16 x i8> %z 194} 195 196; CHECK: .LCPI4_0: 197; CHECK: .byte 0 // 0x0 198; CHECK: .byte 1 // 0x1 199; CHECK: .byte 8 // 0x8 200; CHECK: .byte 9 // 0x9 201; CHECK: .byte 16 // 0x10 202; CHECK: .byte 17 // 0x11 203; CHECK: .byte 24 // 0x18 204; CHECK: .byte 25 // 0x19 205; CHECK: .byte 2 // 0x2 206; CHECK: .byte 3 // 0x3 207; CHECK: .byte 10 // 0xa 208; CHECK: .byte 11 // 0xb 209; CHECK: .byte 18 // 0x12 210; CHECK: .byte 19 // 0x13 211; CHECK: .byte 26 // 0x1a 212; CHECK: .byte 27 // 0x1b 213define <8 x i16> @shuffle4_v8i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16> %d) { 214; CHECK-LABEL: shuffle4_v8i16: 215; CHECK: // %bb.0: 216; CHECK-NEXT: fmov d5, d2 217; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 218; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3 219; CHECK-NEXT: adrp x8, .LCPI4_0 220; CHECK-NEXT: fmov d4, d0 221; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI4_0] 222; CHECK-NEXT: mov v4.d[1], v1.d[0] 223; CHECK-NEXT: mov v5.d[1], v3.d[0] 224; CHECK-NEXT: tbl v0.16b, { v4.16b, v5.16b }, v0.16b 225; CHECK-NEXT: ret 226 %x = shufflevector <4 x i16> %a, <4 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 227 %y = shufflevector <4 x i16> %c, <4 x i16> %d, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 228 %z = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13> 229 ret <8 x i16> %z 230} 231 232define <4 x i32> @shuffle4_v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) { 233; CHECK-LABEL: shuffle4_v4i32: 234; CHECK: // %bb.0: 235; CHECK-NEXT: zip1 v1.4s, v1.4s, v1.4s 236; CHECK-NEXT: rev64 v3.4s, v3.4s 237; CHECK-NEXT: ext v1.16b, v1.16b, v0.16b, #4 238; CHECK-NEXT: zip2 v0.4s, v3.4s, v2.4s 239; CHECK-NEXT: mov v0.d[1], v1.d[1] 240; CHECK-NEXT: ret 241 %x = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 242 %y = shufflevector <4 x i32> %c, <4 x i32> %d, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 243 %z = shufflevector <8 x i32> %x, <8 x i32> %y, <4 x i32> <i32 15, i32 10, i32 5, i32 0> 244 ret <4 x i32> %z 245} 246 247; CHECK: .LCPI6_0: 248; CHECK: .byte 0 // 0x0 249; CHECK: .byte 7 // 0x7 250; CHECK: .byte 255 // 0xff 251; CHECK: .byte 1 // 0x1 252; CHECK: .byte 255 // 0xff 253; CHECK: .byte 255 // 0xff 254; CHECK: .byte 255 // 0xff 255; CHECK: .byte 255 // 0xff 256; CHECK: .section .rodata.cst16,"aM",@progbits,16 257; CHECK: .p2align 4 258; CHECK: .LCPI6_1: 259; CHECK: .byte 0 // 0x0 260; CHECK: .byte 16 // 0x10 261; CHECK: .byte 19 // 0x13 262; CHECK: .byte 3 // 0x3 263; CHECK: .byte 1 // 0x1 264; CHECK: .byte 17 // 0x11 265; CHECK: .byte 0 // 0x0 266; CHECK: .byte 1 // 0x1 267; CHECK: .byte 0 // 0x0 268; CHECK: .byte 16 // 0x10 269; CHECK: .byte 19 // 0x13 270; CHECK: .byte 3 // 0x3 271; CHECK: .byte 1 // 0x1 272; CHECK: .byte 17 // 0x11 273; CHECK: .byte 0 // 0x0 274; CHECK: .byte 1 // 0x1 275define <16 x i8> @shuffle4_v8i8_v16i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d) { 276; CHECK-LABEL: shuffle4_v8i8_v16i8: 277; CHECK: // %bb.0: 278; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 279; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 280; CHECK-NEXT: adrp x8, .LCPI6_0 281; CHECK-NEXT: mov v2.d[1], v2.d[0] 282; CHECK-NEXT: mov v0.d[1], v0.d[0] 283; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI6_0] 284; CHECK-NEXT: adrp x8, .LCPI6_1 285; CHECK-NEXT: tbl v3.8b, { v2.16b }, v1.8b 286; CHECK-NEXT: tbl v2.8b, { v0.16b }, v1.8b 287; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI6_1] 288; CHECK-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b 289; CHECK-NEXT: ret 290 %x = shufflevector <8 x i8> %a, <8 x i8> %b, <4 x i32> <i32 0, i32 7, i32 5, i32 1> 291 %y = shufflevector <8 x i8> %c, <8 x i8> %d, <4 x i32> <i32 0, i32 7, i32 5, i32 1> 292 %z = shufflevector <4 x i8> %x, <4 x i8> %y, <16 x i32> <i32 0, i32 4, i32 7, i32 3, i32 1, i32 5, i32 0, i32 1, i32 0, i32 4, i32 7, i32 3, i32 1, i32 5, i32 0, i32 1> 293 ret <16 x i8> %z 294} 295 296; CHECK: .LCPI7_0: 297; CHECK: .byte 0 // 0x0 298; CHECK: .byte 7 // 0x7 299; CHECK: .byte 255 // 0xff 300; CHECK: .byte 1 // 0x1 301; CHECK: .byte 255 // 0xff 302; CHECK: .byte 255 // 0xff 303; CHECK: .byte 255 // 0xff 304; CHECK: .byte 255 // 0xff 305; CHECK: .LCPI7_1: 306; CHECK: .byte 0 // 0x0 307; CHECK: .byte 8 // 0x8 308; CHECK: .byte 11 // 0xb 309; CHECK: .byte 3 // 0x3 310; CHECK: .byte 1 // 0x1 311; CHECK: .byte 9 // 0x9 312; CHECK: .byte 0 // 0x0 313; CHECK: .byte 1 // 0x1 314define <8 x i8> @shuffle4_v8i8_v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d) { 315; CHECK-LABEL: shuffle4_v8i8_v8i8: 316; CHECK: // %bb.0: 317; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 318; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 319; CHECK-NEXT: adrp x8, .LCPI7_0 320; CHECK-NEXT: mov v2.d[1], v2.d[0] 321; CHECK-NEXT: mov v0.d[1], v0.d[0] 322; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI7_0] 323; CHECK-NEXT: adrp x8, .LCPI7_1 324; CHECK-NEXT: tbl v2.8b, { v2.16b }, v1.8b 325; CHECK-NEXT: tbl v0.8b, { v0.16b }, v1.8b 326; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI7_1] 327; CHECK-NEXT: mov v0.d[1], v2.d[0] 328; CHECK-NEXT: tbl v0.8b, { v0.16b }, v1.8b 329; CHECK-NEXT: ret 330 %x = shufflevector <8 x i8> %a, <8 x i8> %b, <4 x i32> <i32 0, i32 7, i32 5, i32 1> 331 %y = shufflevector <8 x i8> %c, <8 x i8> %d, <4 x i32> <i32 0, i32 7, i32 5, i32 1> 332 %z = shufflevector <4 x i8> %x, <4 x i8> %y, <8 x i32> <i32 0, i32 4, i32 7, i32 3, i32 1, i32 5, i32 0, i32 1> 333 ret <8 x i8> %z 334} 335 336; CHECK: .LCPI8_0: 337; CHECK: .byte 0 // 0x0 338; CHECK: .byte 1 // 0x1 339; CHECK: .byte 8 // 0x8 340; CHECK: .byte 9 // 0x9 341; CHECK: .byte 16 // 0x10 342; CHECK: .byte 17 // 0x11 343; CHECK: .byte 24 // 0x18 344; CHECK: .byte 25 // 0x19 345; CHECK: .byte 2 // 0x2 346; CHECK: .byte 3 // 0x3 347; CHECK: .byte 10 // 0xa 348; CHECK: .byte 11 // 0xb 349; CHECK: .byte 18 // 0x12 350; CHECK: .byte 19 // 0x13 351; CHECK: .byte 26 // 0x1a 352; CHECK: .byte 27 // 0x1b 353define <8 x i16> @shuffle4_v4i8_zext(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x i8> %d) { 354; CHECK-LABEL: shuffle4_v4i8_zext: 355; CHECK: // %bb.0: 356; CHECK-NEXT: fmov d5, d2 357; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 358; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3 359; CHECK-NEXT: adrp x8, .LCPI8_0 360; CHECK-NEXT: fmov d4, d0 361; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI8_0] 362; CHECK-NEXT: mov v4.d[1], v1.d[0] 363; CHECK-NEXT: mov v5.d[1], v3.d[0] 364; CHECK-NEXT: bic v4.8h, #255, lsl #8 365; CHECK-NEXT: bic v5.8h, #255, lsl #8 366; CHECK-NEXT: tbl v0.16b, { v4.16b, v5.16b }, v0.16b 367; CHECK-NEXT: ret 368 %x = shufflevector <4 x i8> %a, <4 x i8> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 369 %y = shufflevector <4 x i8> %c, <4 x i8> %d, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 370 %xe = zext <8 x i8> %x to <8 x i16> 371 %ye = zext <8 x i8> %y to <8 x i16> 372 %z = shufflevector <8 x i16> %xe, <8 x i16> %ye, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13> 373 ret <8 x i16> %z 374} 375 376; CHECK: .LCPI9_0: 377; CHECK: .byte 0 // 0x0 378; CHECK: .byte 16 // 0x10 379; CHECK: .byte 32 // 0x20 380; CHECK: .byte 48 // 0x30 381; CHECK: .byte 2 // 0x2 382; CHECK: .byte 18 // 0x12 383; CHECK: .byte 34 // 0x22 384; CHECK: .byte 50 // 0x32 385; CHECK: .byte 4 // 0x4 386; CHECK: .byte 20 // 0x14 387; CHECK: .byte 36 // 0x24 388; CHECK: .byte 52 // 0x34 389; CHECK: .byte 6 // 0x6 390; CHECK: .byte 22 // 0x16 391; CHECK: .byte 38 // 0x26 392; CHECK: .byte 54 // 0x36 393define <16 x i8> @shuffle4_v4i16_trunc(<4 x i16> %ae, <4 x i16> %be, <4 x i16> %ce, <4 x i16> %de) { 394; CHECK-LABEL: shuffle4_v4i16_trunc: 395; CHECK: // %bb.0: 396; CHECK-NEXT: // kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 397; CHECK-NEXT: adrp x8, .LCPI9_0 398; CHECK-NEXT: // kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 399; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI9_0] 400; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 401; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 402; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b 403; CHECK-NEXT: ret 404 %a = trunc <4 x i16> %ae to <4 x i8> 405 %b = trunc <4 x i16> %be to <4 x i8> 406 %c = trunc <4 x i16> %ce to <4 x i8> 407 %d = trunc <4 x i16> %de to <4 x i8> 408 %x = shufflevector <4 x i8> %a, <4 x i8> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 409 %y = shufflevector <4 x i8> %c, <4 x i8> %d, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 410 %z = shufflevector <8 x i8> %x, <8 x i8> %y, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15> 411 ret <16 x i8> %z 412} 413 414; CHECK: .LCPI10_0: 415; CHECK: .byte 0 // 0x0 416; CHECK: .byte 16 // 0x10 417; CHECK: .byte 32 // 0x20 418; CHECK: .byte 48 // 0x30 419; CHECK: .byte 2 // 0x2 420; CHECK: .byte 18 // 0x12 421; CHECK: .byte 34 // 0x22 422; CHECK: .byte 50 // 0x32 423; CHECK: .byte 4 // 0x4 424; CHECK: .byte 20 // 0x14 425; CHECK: .byte 36 // 0x24 426; CHECK: .byte 52 // 0x34 427; CHECK: .byte 6 // 0x6 428; CHECK: .byte 22 // 0x16 429; CHECK: .byte 38 // 0x26 430; CHECK: .byte 54 // 0x36 431; CHECK: .text 432define <16 x i8> @shuffle4_v4i32_trunc(<4 x i32> %ae, <4 x i32> %be, <4 x i32> %ce, <4 x i32> %de) { 433; CHECK-LABEL: shuffle4_v4i32_trunc: 434; CHECK: // %bb.0: 435; CHECK-NEXT: xtn v4.4h, v0.4s 436; CHECK-NEXT: adrp x8, .LCPI10_0 437; CHECK-NEXT: xtn v5.4h, v1.4s 438; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI10_0] 439; CHECK-NEXT: xtn v6.4h, v2.4s 440; CHECK-NEXT: xtn v7.4h, v3.4s 441; CHECK-NEXT: tbl v0.16b, { v4.16b, v5.16b, v6.16b, v7.16b }, v0.16b 442; CHECK-NEXT: ret 443 %a = trunc <4 x i32> %ae to <4 x i8> 444 %b = trunc <4 x i32> %be to <4 x i8> 445 %c = trunc <4 x i32> %ce to <4 x i8> 446 %d = trunc <4 x i32> %de to <4 x i8> 447 %x = shufflevector <4 x i8> %a, <4 x i8> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 448 %y = shufflevector <4 x i8> %c, <4 x i8> %d, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 449 %z = shufflevector <8 x i8> %x, <8 x i8> %y, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15> 450 ret <16 x i8> %z 451} 452 453; CHECK: .LCPI11_0: 454; CHECK: .byte 0 // 0x0 455; CHECK: .byte 16 // 0x10 456; CHECK: .byte 32 // 0x20 457; CHECK: .byte 2 // 0x2 458; CHECK: .byte 18 // 0x12 459; CHECK: .byte 34 // 0x22 460; CHECK: .byte 4 // 0x4 461; CHECK: .byte 20 // 0x14 462; CHECK: .byte 36 // 0x24 463; CHECK: .byte 6 // 0x6 464; CHECK: .byte 22 // 0x16 465; CHECK: .byte 38 // 0x26 466; CHECK: .byte 255 // 0xff 467; CHECK: .byte 255 // 0xff 468; CHECK: .byte 255 // 0xff 469; CHECK: .byte 255 // 0xff 470define <12 x i8> @shuffle3_v4i8(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c) { 471; CHECK-LABEL: shuffle3_v4i8: 472; CHECK: // %bb.0: 473; CHECK-NEXT: // kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 474; CHECK-NEXT: adrp x8, .LCPI11_0 475; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI11_0] 476; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 477; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 478; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b }, v3.16b 479; CHECK-NEXT: ret 480 %x = shufflevector <4 x i8> %a, <4 x i8> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 481 %y = shufflevector <4 x i8> %c, <4 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 482 %z = shufflevector <8 x i8> %x, <8 x i8> %y, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11> 483 ret <12 x i8> %z 484} 485 486; CHECK: .LCPI12_0: 487; CHECK: .byte 0 // 0x0 488; CHECK: .byte 1 // 0x1 489; CHECK: .byte 8 // 0x8 490; CHECK: .byte 9 // 0x9 491; CHECK: .byte 16 // 0x10 492; CHECK: .byte 17 // 0x11 493; CHECK: .byte 2 // 0x2 494; CHECK: .byte 3 // 0x3 495; CHECK: .byte 10 // 0xa 496; CHECK: .byte 11 // 0xb 497; CHECK: .byte 18 // 0x12 498; CHECK: .byte 19 // 0x13 499; CHECK: .byte 4 // 0x4 500; CHECK: .byte 5 // 0x5 501; CHECK: .byte 12 // 0xc 502; CHECK: .byte 13 // 0xd 503define <8 x i16> @shuffle3_v4i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) { 504; CHECK-LABEL: shuffle3_v4i16: 505; CHECK: // %bb.0: 506; CHECK-NEXT: fmov d3, d2 507; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 508; CHECK-NEXT: adrp x8, .LCPI12_0 509; CHECK-NEXT: fmov d2, d0 510; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI12_0] 511; CHECK-NEXT: mov v2.d[1], v1.d[0] 512; CHECK-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b 513; CHECK-NEXT: ret 514 %x = shufflevector <4 x i16> %a, <4 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 515 %y = shufflevector <4 x i16> %c, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 516 %z = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6> 517 ret <8 x i16> %z 518} 519 520define <4 x i32> @shuffle3_v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 521; CHECK-LABEL: shuffle3_v4i32: 522; CHECK: // %bb.0: 523; CHECK-NEXT: trn1 v1.4s, v0.4s, v1.4s 524; CHECK-NEXT: mov v1.d[1], v0.d[0] 525; CHECK-NEXT: mov v1.s[2], v2.s[0] 526; CHECK-NEXT: mov v0.16b, v1.16b 527; CHECK-NEXT: ret 528 %x = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 529 %y = shufflevector <4 x i32> %c, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 530 %z = shufflevector <8 x i32> %x, <8 x i32> %y, <4 x i32> <i32 0, i32 4, i32 8, i32 1> 531 ret <4 x i32> %z 532} 533 534; CHECK: .LCPI14_0: 535; CHECK: .byte 4 // 0x4 536; CHECK: .byte 8 // 0x8 537; CHECK: .byte 255 // 0xff 538; CHECK: .byte 255 // 0xff 539; CHECK: .byte 14 // 0xe 540; CHECK: .byte 3 // 0x3 541; CHECK: .byte 255 // 0xff 542; CHECK: .byte 255 // 0xff 543; CHECK: .section .rodata.cst16,"aM",@progbits,16 544; CHECK: .p2align 4 545; CHECK: .LCPI14_1: 546; CHECK: .byte 255 // 0xff 547; CHECK: .byte 255 // 0xff 548; CHECK: .byte 15 // 0xf 549; CHECK: .byte 27 // 0x1b 550; CHECK: .byte 255 // 0xff 551; CHECK: .byte 255 // 0xff 552; CHECK: .byte 24 // 0x18 553; CHECK: .byte 12 // 0xc 554; CHECK: .byte 255 // 0xff 555; CHECK: .byte 255 // 0xff 556; CHECK: .byte 255 // 0xff 557; CHECK: .byte 255 // 0xff 558; CHECK: .byte 255 // 0xff 559; CHECK: .byte 255 // 0xff 560; CHECK: .byte 255 // 0xff 561; CHECK: .byte 255 // 0xff 562define <8 x i8> @insert4_v8i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c, <16 x i8> %d) { 563; CHECK-LABEL: insert4_v8i8: 564; CHECK: // %bb.0: 565; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 566; CHECK-NEXT: mov v4.16b, v3.16b 567; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 568; CHECK-NEXT: adrp x8, .LCPI14_0 569; CHECK-NEXT: adrp x9, .LCPI14_1 570; CHECK-NEXT: mov v0.d[1], v2.d[0] 571; CHECK-NEXT: mov v3.16b, v1.16b 572; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI14_0] 573; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI14_1] 574; CHECK-NEXT: tbl v0.8b, { v0.16b }, v1.8b 575; CHECK-NEXT: tbl v1.16b, { v3.16b, v4.16b }, v2.16b 576; CHECK-NEXT: trn1 v0.4h, v1.4h, v0.4h 577; CHECK-NEXT: trn2 v0.4h, v0.4h, v1.4h 578; CHECK-NEXT: ret 579 %e1 = extractelement <8 x i8> %a, i32 4 580 %e2 = extractelement <8 x i8> %c, i32 0 581 %e3 = extractelement <16 x i8> %b, i32 15 582 %e4 = extractelement <16 x i8> %d, i32 11 583 %e5 = extractelement <8 x i8> %c, i32 6 584 %e6 = extractelement <8 x i8> %a, i32 3 585 %e7 = extractelement <16 x i8> %d, i32 8 586 %e8 = extractelement <16 x i8> %b, i32 12 587 %i1 = insertelement <8 x i8> undef, i8 %e1, i32 0 588 %i2 = insertelement <8 x i8> %i1, i8 %e2, i32 1 589 %i3 = insertelement <8 x i8> %i2, i8 %e3, i32 2 590 %i4 = insertelement <8 x i8> %i3, i8 %e4, i32 3 591 %i5 = insertelement <8 x i8> %i4, i8 %e5, i32 4 592 %i6 = insertelement <8 x i8> %i5, i8 %e6, i32 5 593 %i7 = insertelement <8 x i8> %i6, i8 %e7, i32 6 594 %i8 = insertelement <8 x i8> %i7, i8 %e8, i32 7 595 ret <8 x i8> %i8 596} 597 598; CHECK: .LCPI15_0: 599; CHECK: .byte 255 // 0xff 600; CHECK: .byte 255 // 0xff 601; CHECK: .byte 15 // 0xf 602; CHECK: .byte 27 // 0x1b 603; CHECK: .byte 255 // 0xff 604; CHECK: .byte 255 // 0xff 605; CHECK: .byte 24 // 0x18 606; CHECK: .byte 12 // 0xc 607; CHECK: .byte 255 // 0xff 608; CHECK: .byte 255 // 0xff 609; CHECK: .byte 15 // 0xf 610; CHECK: .byte 27 // 0x1b 611; CHECK: .byte 255 // 0xff 612; CHECK: .byte 255 // 0xff 613; CHECK: .byte 24 // 0x18 614; CHECK: .byte 12 // 0xc 615; CHECK: .LCPI15_1: 616; CHECK: .byte 20 // 0x14 617; CHECK: .byte 24 // 0x18 618; CHECK: .byte 2 // 0x2 619; CHECK: .byte 3 // 0x3 620; CHECK: .byte 30 // 0x1e 621; CHECK: .byte 19 // 0x13 622; CHECK: .byte 6 // 0x6 623; CHECK: .byte 7 // 0x7 624; CHECK: .byte 20 // 0x14 625; CHECK: .byte 24 // 0x18 626; CHECK: .byte 10 // 0xa 627; CHECK: .byte 11 // 0xb 628; CHECK: .byte 30 // 0x1e 629; CHECK: .byte 19 // 0x13 630; CHECK: .byte 14 // 0xe 631; CHECK: .byte 15 // 0xf 632define <16 x i8> @insert4_v16i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c, <16 x i8> %d) { 633; CHECK-LABEL: insert4_v16i8: 634; CHECK: // %bb.0: 635; CHECK-NEXT: mov v4.16b, v3.16b 636; CHECK-NEXT: adrp x8, .LCPI15_0 637; CHECK-NEXT: // kill: def $d0 killed $d0 def $q31_q0 638; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 639; CHECK-NEXT: mov v3.16b, v1.16b 640; CHECK-NEXT: ldr q5, [x8, :lo12:.LCPI15_0] 641; CHECK-NEXT: mov v0.d[1], v2.d[0] 642; CHECK-NEXT: adrp x8, .LCPI15_1 643; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI15_1] 644; CHECK-NEXT: tbl v31.16b, { v3.16b, v4.16b }, v5.16b 645; CHECK-NEXT: tbl v0.16b, { v31.16b, v0.16b }, v1.16b 646; CHECK-NEXT: ret 647 %e1 = extractelement <8 x i8> %a, i32 4 648 %e2 = extractelement <8 x i8> %c, i32 0 649 %e3 = extractelement <16 x i8> %b, i32 15 650 %e4 = extractelement <16 x i8> %d, i32 11 651 %e5 = extractelement <8 x i8> %c, i32 6 652 %e6 = extractelement <8 x i8> %a, i32 3 653 %e7 = extractelement <16 x i8> %d, i32 8 654 %e8 = extractelement <16 x i8> %b, i32 12 655 %e9 = extractelement <8 x i8> %a, i32 4 656 %e10 = extractelement <8 x i8> %c, i32 0 657 %e11 = extractelement <16 x i8> %b, i32 15 658 %e12 = extractelement <16 x i8> %d, i32 11 659 %e13 = extractelement <8 x i8> %c, i32 6 660 %e14 = extractelement <8 x i8> %a, i32 3 661 %e15 = extractelement <16 x i8> %d, i32 8 662 %e16 = extractelement <16 x i8> %b, i32 12 663 %i1 = insertelement <16 x i8> undef, i8 %e1, i32 0 664 %i2 = insertelement <16 x i8> %i1, i8 %e2, i32 1 665 %i3 = insertelement <16 x i8> %i2, i8 %e3, i32 2 666 %i4 = insertelement <16 x i8> %i3, i8 %e4, i32 3 667 %i5 = insertelement <16 x i8> %i4, i8 %e5, i32 4 668 %i6 = insertelement <16 x i8> %i5, i8 %e6, i32 5 669 %i7 = insertelement <16 x i8> %i6, i8 %e7, i32 6 670 %i8 = insertelement <16 x i8> %i7, i8 %e8, i32 7 671 %i9 = insertelement <16 x i8> %i8, i8 %e9, i32 8 672 %i10 = insertelement <16 x i8> %i9, i8 %e10, i32 9 673 %i11 = insertelement <16 x i8> %i10, i8 %e11, i32 10 674 %i12 = insertelement <16 x i8> %i11, i8 %e12, i32 11 675 %i13 = insertelement <16 x i8> %i12, i8 %e13, i32 12 676 %i14 = insertelement <16 x i8> %i13, i8 %e14, i32 13 677 %i15 = insertelement <16 x i8> %i14, i8 %e15, i32 14 678 %i16 = insertelement <16 x i8> %i15, i8 %e16, i32 15 679 ret <16 x i8> %i16 680} 681 682 683; CHECK: .LCPI16_0: 684; CHECK: .byte 0 685; CHECK: .byte 1 686; CHECK: .byte 4 687; CHECK: .byte 5 688; CHECK: .byte 16 689; CHECK: .byte 17 690; CHECK: .byte 20 691; CHECK: .byte 21 692; CHECK: .byte 32 693; CHECK: .byte 33 694; CHECK: .byte 36 695; CHECK: .byte 37 696; CHECK: .byte 48 697; CHECK: .byte 49 698; CHECK: .byte 52 699; CHECK: .byte 53 700define <16 x i16> @test(<2 x double> %l213, <2 x double> %l231, <2 x double> %l249, <2 x double> %l267, <2 x double> %l285, <2 x double> %l303, <2 x double> %l321, <2 x double> %l339) { 701; CHECK-LABEL: test: 702; CHECK: // %bb.0: 703; CHECK-NEXT: frintm v0.2d, v0.2d 704; CHECK-NEXT: frintm v4.2d, v4.2d 705; CHECK-NEXT: adrp x8, .LCPI16_0 706; CHECK-NEXT: frintm v1.2d, v1.2d 707; CHECK-NEXT: frintm v5.2d, v5.2d 708; CHECK-NEXT: frintm v2.2d, v2.2d 709; CHECK-NEXT: frintm v6.2d, v6.2d 710; CHECK-NEXT: frintm v3.2d, v3.2d 711; CHECK-NEXT: frintm v7.2d, v7.2d 712; CHECK-NEXT: fcvtzs v0.2d, v0.2d 713; CHECK-NEXT: fcvtzs v4.2d, v4.2d 714; CHECK-NEXT: fcvtzs v1.2d, v1.2d 715; CHECK-NEXT: fcvtzs v5.2d, v5.2d 716; CHECK-NEXT: fcvtzs v2.2d, v2.2d 717; CHECK-NEXT: fcvtzs v6.2d, v6.2d 718; CHECK-NEXT: fcvtzs v3.2d, v3.2d 719; CHECK-NEXT: fcvtzs v7.2d, v7.2d 720; CHECK-NEXT: xtn v16.2s, v0.2d 721; CHECK-NEXT: xtn v20.2s, v4.2d 722; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI16_0] 723; CHECK-NEXT: xtn v17.2s, v1.2d 724; CHECK-NEXT: xtn v21.2s, v5.2d 725; CHECK-NEXT: xtn v18.2s, v2.2d 726; CHECK-NEXT: xtn v22.2s, v6.2d 727; CHECK-NEXT: xtn v19.2s, v3.2d 728; CHECK-NEXT: xtn v23.2s, v7.2d 729; CHECK-NEXT: tbl v1.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v0.16b 730; CHECK-NEXT: tbl v2.16b, { v20.16b, v21.16b, v22.16b, v23.16b }, v0.16b 731; CHECK-NEXT: uzp1 v0.8h, v1.8h, v2.8h 732; CHECK-NEXT: uzp2 v1.8h, v1.8h, v2.8h 733; CHECK-NEXT: ret 734 %l214 = call fast <2 x double> @llvm.floor.v2f64(<2 x double> %l213) 735 %l215 = fptosi <2 x double> %l214 to <2 x i16> 736 %l232 = call fast <2 x double> @llvm.floor.v2f64(<2 x double> %l231) 737 %l233 = fptosi <2 x double> %l232 to <2 x i16> 738 %l250 = call fast <2 x double> @llvm.floor.v2f64(<2 x double> %l249) 739 %l251 = fptosi <2 x double> %l250 to <2 x i16> 740 %l268 = call fast <2 x double> @llvm.floor.v2f64(<2 x double> %l267) 741 %l269 = fptosi <2 x double> %l268 to <2 x i16> 742 %l286 = call fast <2 x double> @llvm.floor.v2f64(<2 x double> %l285) 743 %l287 = fptosi <2 x double> %l286 to <2 x i16> 744 %l304 = call fast <2 x double> @llvm.floor.v2f64(<2 x double> %l303) 745 %l305 = fptosi <2 x double> %l304 to <2 x i16> 746 %l322 = call fast <2 x double> @llvm.floor.v2f64(<2 x double> %l321) 747 %l323 = fptosi <2 x double> %l322 to <2 x i16> 748 %l340 = call fast <2 x double> @llvm.floor.v2f64(<2 x double> %l339) 749 %l341 = fptosi <2 x double> %l340 to <2 x i16> 750 %l342 = shufflevector <2 x i16> %l215, <2 x i16> %l233, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 751 %l343 = shufflevector <2 x i16> %l251, <2 x i16> %l269, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 752 %l344 = shufflevector <2 x i16> %l287, <2 x i16> %l305, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 753 %l345 = shufflevector <2 x i16> %l323, <2 x i16> %l341, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 754 %l346 = shufflevector <4 x i16> %l342, <4 x i16> %l343, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 755 %l347 = shufflevector <4 x i16> %l344, <4 x i16> %l345, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 756 %interleaved.vec = shufflevector <8 x i16> %l346, <8 x i16> %l347, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 757 ret <16 x i16> %interleaved.vec 758} 759 760declare <2 x double> @llvm.floor.v2f64(<2 x double> %l213) 761