1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=CHECK,CHECK-SD 3; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI 4 5define <8 x i8> @tbl1_8b(<16 x i8> %A, <8 x i8> %B) nounwind { 6; CHECK-LABEL: tbl1_8b: 7; CHECK: // %bb.0: 8; CHECK-NEXT: tbl.8b v0, { v0 }, v1 9; CHECK-NEXT: ret 10 %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %A, <8 x i8> %B) 11 ret <8 x i8> %tmp3 12} 13 14define <16 x i8> @tbl1_16b(<16 x i8> %A, <16 x i8> %B) nounwind { 15; CHECK-LABEL: tbl1_16b: 16; CHECK: // %bb.0: 17; CHECK-NEXT: tbl.16b v0, { v0 }, v1 18; CHECK-NEXT: ret 19 %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> %A, <16 x i8> %B) 20 ret <16 x i8> %tmp3 21} 22 23define <8 x i8> @tbl2_8b(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C) { 24; CHECK-SD-LABEL: tbl2_8b: 25; CHECK-SD: // %bb.0: 26; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 27; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 28; CHECK-SD-NEXT: tbl.8b v0, { v0, v1 }, v2 29; CHECK-SD-NEXT: ret 30; 31; CHECK-GI-LABEL: tbl2_8b: 32; CHECK-GI: // %bb.0: 33; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 34; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 35; CHECK-GI-NEXT: tbl.8b v0, { v0, v1 }, v2 36; CHECK-GI-NEXT: ret 37 %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C) 38 ret <8 x i8> %tmp3 39} 40 41define <16 x i8> @tbl2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) { 42; CHECK-SD-LABEL: tbl2_16b: 43; CHECK-SD: // %bb.0: 44; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 45; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 46; CHECK-SD-NEXT: tbl.16b v0, { v0, v1 }, v2 47; CHECK-SD-NEXT: ret 48; 49; CHECK-GI-LABEL: tbl2_16b: 50; CHECK-GI: // %bb.0: 51; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 52; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 53; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2 54; CHECK-GI-NEXT: ret 55 %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) 56 ret <16 x i8> %tmp3 57} 58 59define <8 x i8> @tbl3_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) { 60; CHECK-SD-LABEL: tbl3_8b: 61; CHECK-SD: // %bb.0: 62; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 63; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 64; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 65; CHECK-SD-NEXT: tbl.8b v0, { v0, v1, v2 }, v3 66; CHECK-SD-NEXT: ret 67; 68; CHECK-GI-LABEL: tbl3_8b: 69; CHECK-GI: // %bb.0: 70; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 71; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 72; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 73; CHECK-GI-NEXT: tbl.8b v0, { v0, v1, v2 }, v3 74; CHECK-GI-NEXT: ret 75 %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) 76 ret <8 x i8> %tmp3 77} 78 79define <16 x i8> @tbl3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) { 80; CHECK-SD-LABEL: tbl3_16b: 81; CHECK-SD: // %bb.0: 82; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 83; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 84; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 85; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2 }, v3 86; CHECK-SD-NEXT: ret 87; 88; CHECK-GI-LABEL: tbl3_16b: 89; CHECK-GI: // %bb.0: 90; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 91; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 92; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 93; CHECK-GI-NEXT: tbl.16b v0, { v0, v1, v2 }, v3 94; CHECK-GI-NEXT: ret 95 %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) 96 ret <16 x i8> %tmp3 97} 98 99define <8 x i8> @tbl4_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) { 100; CHECK-SD-LABEL: tbl4_8b: 101; CHECK-SD: // %bb.0: 102; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 103; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 104; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 105; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 106; CHECK-SD-NEXT: tbl.8b v0, { v0, v1, v2, v3 }, v4 107; CHECK-SD-NEXT: ret 108; 109; CHECK-GI-LABEL: tbl4_8b: 110; CHECK-GI: // %bb.0: 111; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 112; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 113; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 114; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 115; CHECK-GI-NEXT: tbl.8b v0, { v0, v1, v2, v3 }, v4 116; CHECK-GI-NEXT: ret 117 %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) 118 ret <8 x i8> %tmp3 119} 120 121define <16 x i8> @tbl4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) { 122; CHECK-SD-LABEL: tbl4_16b: 123; CHECK-SD: // %bb.0: 124; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 125; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 126; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 127; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 128; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4 129; CHECK-SD-NEXT: ret 130; 131; CHECK-GI-LABEL: tbl4_16b: 132; CHECK-GI: // %bb.0: 133; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 134; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 135; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 136; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 137; CHECK-GI-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4 138; CHECK-GI-NEXT: ret 139 %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) 140 ret <16 x i8> %tmp3 141} 142 143; CHECK-SD-LABEL: .LCPI8_0: 144; CHECK-SD: .byte 0 // 0x0 145; CHECK-SD-NEXT: .byte 4 // 0x4 146; CHECK-SD-NEXT: .byte 8 // 0x8 147; CHECK-SD-NEXT: .byte 12 // 0xc 148; CHECK-SD-NEXT: .byte 255 // 0xff 149; CHECK-SD-NEXT: .byte 255 // 0xff 150; CHECK-SD-NEXT: .byte 255 // 0xff 151; CHECK-SD-NEXT: .byte 255 // 0xff 152 153; CHECK-GI-LABEL: .LCPI8_0: 154; CHECK-GI: .byte 0 // 0x0 155; CHECK-GI-NEXT: .byte 1 // 0x1 156; CHECK-GI-NEXT: .byte 2 // 0x2 157; CHECK-GI-NEXT: .byte 3 // 0x3 158; CHECK-GI-NEXT: .byte 12 // 0xc 159; CHECK-GI-NEXT: .byte 13 // 0xd 160; CHECK-GI-NEXT: .byte 14 // 0xe 161; CHECK-GI-NEXT: .byte 15 // 0xf 162; CHECK-GI-LABEL: .LCPI8_1: 163; CHECK-GI: .byte 0 // 0x0 164; CHECK-GI-NEXT: .byte 4 // 0x4 165; CHECK-GI-NEXT: .byte 8 // 0x8 166; CHECK-GI-NEXT: .byte 12 // 0xc 167; CHECK-GI-NEXT: .byte 255 // 0xff 168; CHECK-GI-NEXT: .byte 255 // 0xff 169; CHECK-GI-NEXT: .byte 255 // 0xff 170; CHECK-GI-NEXT: .byte 255 // 0xff 171 172define <8 x i8> @shuffled_tbl2_to_tbl4_v8i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) { 173; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_v8i8: 174; CHECK-SD: // %bb.0: 175; CHECK-SD-NEXT: adrp x8, .LCPI8_0 176; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 177; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 178; CHECK-SD-NEXT: ldr d4, [x8, :lo12:.LCPI8_0] 179; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 180; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 181; CHECK-SD-NEXT: tbl.8b v0, { v0, v1 }, v4 182; CHECK-SD-NEXT: tbl.8b v1, { v2, v3 }, v4 183; CHECK-SD-NEXT: mov.s v0[1], v1[1] 184; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 185; CHECK-SD-NEXT: ret 186; 187; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_v8i8: 188; CHECK-GI: // %bb.0: 189; CHECK-GI-NEXT: adrp x8, .LCPI8_1 190; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 191; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 192; CHECK-GI-NEXT: ldr d4, [x8, :lo12:.LCPI8_1] 193; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 194; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 195; CHECK-GI-NEXT: adrp x8, .LCPI8_0 196; CHECK-GI-NEXT: tbl.8b v0, { v0, v1 }, v4 197; CHECK-GI-NEXT: tbl.8b v1, { v2, v3 }, v4 198; CHECK-GI-NEXT: mov.d v0[1], v1[0] 199; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI8_0] 200; CHECK-GI-NEXT: tbl.16b v0, { v0 }, v1 201; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 202; CHECK-GI-NEXT: ret 203 %t1 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %a, <16 x i8> %b, <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>) 204 %t2 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %c, <16 x i8> %d, <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>) 205 %s = shufflevector <8 x i8> %t1, <8 x i8> %t2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15> 206 ret <8 x i8> %s 207} 208 209; CHECK-SD-LABEL: .LCPI9_0: 210; CHECK-SD-NEXT: .byte 0 // 0x0 211; CHECK-SD-NEXT: .byte 4 // 0x4 212; CHECK-SD-NEXT: .byte 8 // 0x8 213; CHECK-SD-NEXT: .byte 12 // 0xc 214; CHECK-SD-NEXT: .byte 16 // 0x10 215; CHECK-SD-NEXT: .byte 20 // 0x14 216; CHECK-SD-NEXT: .byte 24 // 0x18 217; CHECK-SD-NEXT: .byte 28 // 0x1c 218; CHECK-SD-NEXT: .byte 32 // 0x20 219; CHECK-SD-NEXT: .byte 36 // 0x24 220; CHECK-SD-NEXT: .byte 40 // 0x28 221; CHECK-SD-NEXT: .byte 44 // 0x2c 222; CHECK-SD-NEXT: .byte 48 // 0x30 223; CHECK-SD-NEXT: .byte 52 // 0x34 224; CHECK-SD-NEXT: .byte 56 // 0x38 225; CHECK-SD-NEXT: .byte 60 // 0x3c 226 227;CHECK-GI-LABEL: .LCPI9_0: 228;CHECK-GI: .byte 0 // 0x0 229;CHECK-GI-NEXT: .byte 1 // 0x1 230;CHECK-GI-NEXT: .byte 2 // 0x2 231;CHECK-GI-NEXT: .byte 3 // 0x3 232;CHECK-GI-NEXT: .byte 4 // 0x4 233;CHECK-GI-NEXT: .byte 5 // 0x5 234;CHECK-GI-NEXT: .byte 6 // 0x6 235;CHECK-GI-NEXT: .byte 7 // 0x7 236;CHECK-GI-NEXT: .byte 16 // 0x10 237;CHECK-GI-NEXT: .byte 17 // 0x11 238;CHECK-GI-NEXT: .byte 18 // 0x12 239;CHECK-GI-NEXT: .byte 19 // 0x13 240;CHECK-GI-NEXT: .byte 20 // 0x14 241;CHECK-GI-NEXT: .byte 21 // 0x15 242;CHECK-GI-NEXT: .byte 22 // 0x16 243;CHECK-GI-NEXT: .byte 23 // 0x17 244;CHECK-GI-LABEL: .LCPI9_1: 245;CHECK-GI: .byte 0 // 0x0 246;CHECK-GI-NEXT: .byte 4 // 0x4 247;CHECK-GI-NEXT: .byte 8 // 0x8 248;CHECK-GI-NEXT: .byte 12 // 0xc 249;CHECK-GI-NEXT: .byte 16 // 0x10 250;CHECK-GI-NEXT: .byte 20 // 0x14 251;CHECK-GI-NEXT: .byte 24 // 0x18 252;CHECK-GI-NEXT: .byte 28 // 0x1c 253;CHECK-GI-NEXT: .byte 255 // 0xff 254;CHECK-GI-NEXT: .byte 255 // 0xff 255;CHECK-GI-NEXT: .byte 255 // 0xff 256;CHECK-GI-NEXT: .byte 255 // 0xff 257;CHECK-GI-NEXT: .byte 255 // 0xff 258;CHECK-GI-NEXT: .byte 255 // 0xff 259;CHECK-GI-NEXT: .byte 255 // 0xff 260;CHECK-GI-NEXT: .byte 255 // 0xff 261 262define <16 x i8> @shuffled_tbl2_to_tbl4(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) { 263; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4: 264; CHECK-SD: // %bb.0: 265; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 266; CHECK-SD-NEXT: adrp x8, .LCPI9_0 267; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 268; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI9_0] 269; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 270; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 271; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4 272; CHECK-SD-NEXT: ret 273; 274; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4: 275; CHECK-GI: // %bb.0: 276; CHECK-GI-NEXT: adrp x8, .LCPI9_1 277; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 278; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 279; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI9_1] 280; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 281; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 282; CHECK-GI-NEXT: adrp x8, .LCPI9_0 283; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v4 284; CHECK-GI-NEXT: tbl.16b v1, { v2, v3 }, v4 285; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI9_0] 286; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2 287; CHECK-GI-NEXT: ret 288 %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 289 %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 290 %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> 291 ret <16 x i8> %s 292} 293 294; CHECK-GI-LABEL: .LCPI10_0: 295; CHECK-GI: .byte 0 // 0x0 296; CHECK-GI-NEXT: .byte 1 // 0x1 297; CHECK-GI-NEXT: .byte 2 // 0x2 298; CHECK-GI-NEXT: .byte 3 // 0x3 299; CHECK-GI-NEXT: .byte 4 // 0x4 300; CHECK-GI-NEXT: .byte 5 // 0x5 301; CHECK-GI-NEXT: .byte 6 // 0x6 302; CHECK-GI-NEXT: .byte 7 // 0x7 303; CHECK-GI-NEXT: .byte 16 // 0x10 304; CHECK-GI-NEXT: .byte 17 // 0x11 305; CHECK-GI-NEXT: .byte 18 // 0x12 306; CHECK-GI-NEXT: .byte 19 // 0x13 307; CHECK-GI-NEXT: .byte 20 // 0x14 308; CHECK-GI-NEXT: .byte 21 // 0x15 309; CHECK-GI-NEXT: .byte 22 // 0x16 310; CHECK-GI-NEXT: .byte 23 // 0x17 311; CHECK-GI-LABEL: .LCPI10_1: 312; CHECK-GI: .byte 0 // 0x0 313; CHECK-GI-NEXT: .byte 4 // 0x4 314; CHECK-GI-NEXT: .byte 8 // 0x8 315; CHECK-GI-NEXT: .byte 12 // 0xc 316; CHECK-GI-NEXT: .byte 16 // 0x10 317; CHECK-GI-NEXT: .byte 20 // 0x14 318; CHECK-GI-NEXT: .byte 24 // 0x18 319; CHECK-GI-NEXT: .byte 28 // 0x1c 320; CHECK-GI-NEXT: .byte 255 // 0xff 321; CHECK-GI-NEXT: .byte 255 // 0xff 322; CHECK-GI-NEXT: .byte 255 // 0xff 323; CHECK-GI-NEXT: .byte 255 // 0xff 324; CHECK-GI-NEXT: .byte 255 // 0xff 325; CHECK-GI-NEXT: .byte 255 // 0xff 326; CHECK-GI-NEXT: .byte 255 // 0xff 327; CHECK-GI-NEXT: .byte 255 // 0xff 328 329define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) { 330; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_nonconst_first_mask: 331; CHECK-SD: // %bb.0: 332; CHECK-SD-NEXT: fmov s4, w0 333; CHECK-SD-NEXT: mov w8, #32 // =0x20 334; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 335; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 336; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 337; CHECK-SD-NEXT: mov.b v4[1], w0 338; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 339; CHECK-SD-NEXT: mov.b v4[2], w0 340; CHECK-SD-NEXT: mov.b v4[3], w0 341; CHECK-SD-NEXT: mov.b v4[4], w0 342; CHECK-SD-NEXT: mov.b v4[5], w0 343; CHECK-SD-NEXT: mov.b v4[6], w0 344; CHECK-SD-NEXT: mov.b v4[7], w0 345; CHECK-SD-NEXT: mov.b v4[8], w8 346; CHECK-SD-NEXT: mov w8, #36 // =0x24 347; CHECK-SD-NEXT: mov.b v4[9], w8 348; CHECK-SD-NEXT: mov w8, #40 // =0x28 349; CHECK-SD-NEXT: mov.b v4[10], w8 350; CHECK-SD-NEXT: mov w8, #44 // =0x2c 351; CHECK-SD-NEXT: mov.b v4[11], w8 352; CHECK-SD-NEXT: mov w8, #48 // =0x30 353; CHECK-SD-NEXT: mov.b v4[12], w8 354; CHECK-SD-NEXT: mov w8, #52 // =0x34 355; CHECK-SD-NEXT: mov.b v4[13], w8 356; CHECK-SD-NEXT: mov w8, #56 // =0x38 357; CHECK-SD-NEXT: mov.b v4[14], w8 358; CHECK-SD-NEXT: mov w8, #60 // =0x3c 359; CHECK-SD-NEXT: mov.b v4[15], w8 360; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4 361; CHECK-SD-NEXT: ret 362; 363; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_nonconst_first_mask: 364; CHECK-GI: // %bb.0: 365; CHECK-GI-NEXT: fmov s4, w0 366; CHECK-GI-NEXT: mov w8, #255 // =0xff 367; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 368; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 369; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 370; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 371; CHECK-GI-NEXT: mov.b v4[1], w0 372; CHECK-GI-NEXT: mov.b v4[2], w0 373; CHECK-GI-NEXT: mov.b v4[3], w0 374; CHECK-GI-NEXT: mov.b v4[4], w0 375; CHECK-GI-NEXT: mov.b v4[5], w0 376; CHECK-GI-NEXT: mov.b v4[6], w0 377; CHECK-GI-NEXT: mov.b v4[7], w0 378; CHECK-GI-NEXT: mov.b v4[8], w8 379; CHECK-GI-NEXT: mov.b v4[9], w8 380; CHECK-GI-NEXT: mov.b v4[10], w8 381; CHECK-GI-NEXT: mov.b v4[11], w8 382; CHECK-GI-NEXT: mov.b v4[12], w8 383; CHECK-GI-NEXT: mov.b v4[13], w8 384; CHECK-GI-NEXT: mov.b v4[14], w8 385; CHECK-GI-NEXT: mov.b v4[15], w8 386; CHECK-GI-NEXT: adrp x8, .LCPI10_1 387; CHECK-GI-NEXT: ldr q5, [x8, :lo12:.LCPI10_1] 388; CHECK-GI-NEXT: adrp x8, .LCPI10_0 389; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v4 390; CHECK-GI-NEXT: tbl.16b v1, { v2, v3 }, v5 391; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI10_0] 392; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2 393; CHECK-GI-NEXT: ret 394 %ins.0 = insertelement <16 x i8> poison, i8 %v, i32 0 395 %ins.1 = insertelement <16 x i8> %ins.0, i8 %v, i32 1 396 %ins.2 = insertelement <16 x i8> %ins.1, i8 %v, i32 2 397 %ins.3 = insertelement <16 x i8> %ins.2, i8 %v, i32 3 398 %ins.4 = insertelement <16 x i8> %ins.3, i8 %v, i32 4 399 %ins.5 = insertelement <16 x i8> %ins.4, i8 %v, i32 5 400 %ins.6 = insertelement <16 x i8> %ins.5, i8 %v, i32 6 401 %ins.7 = insertelement <16 x i8> %ins.6, i8 %v, i32 7 402 %ins.8 = insertelement <16 x i8> %ins.7, i8 -1, i32 8 403 %ins.9 = insertelement <16 x i8> %ins.8, i8 -1, i32 9 404 %ins.10 = insertelement <16 x i8> %ins.9, i8 -1, i32 10 405 %ins.11 = insertelement <16 x i8> %ins.10, i8 -1, i32 11 406 %ins.12 = insertelement <16 x i8> %ins.11, i8 -1, i32 12 407 %ins.13 = insertelement <16 x i8> %ins.12, i8 -1, i32 13 408 %ins.14 = insertelement <16 x i8> %ins.13, i8 -1, i32 14 409 %ins.15 = insertelement <16 x i8> %ins.14, i8 -1, i32 15 410 %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %ins.15) 411 %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 412 %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> 413 ret <16 x i8> %s 414} 415 416; CHECK-GI-LABEL: .LCPI11_0: 417; CHECK-GI: .byte 0 // 0x0 418; CHECK-GI-NEXT: .byte 1 // 0x1 419; CHECK-GI-NEXT: .byte 2 // 0x2 420; CHECK-GI-NEXT: .byte 3 // 0x3 421; CHECK-GI-NEXT: .byte 4 // 0x4 422; CHECK-GI-NEXT: .byte 5 // 0x5 423; CHECK-GI-NEXT: .byte 6 // 0x6 424; CHECK-GI-NEXT: .byte 15 // 0xf 425; CHECK-GI-NEXT: .byte 16 // 0x10 426; CHECK-GI-NEXT: .byte 17 // 0x11 427; CHECK-GI-NEXT: .byte 18 // 0x12 428; CHECK-GI-NEXT: .byte 19 // 0x13 429; CHECK-GI-NEXT: .byte 20 // 0x14 430; CHECK-GI-NEXT: .byte 21 // 0x15 431; CHECK-GI-NEXT: .byte 22 // 0x16 432; CHECK-GI-NEXT: .byte 31 // 0x1f 433; CHECK-GI-LABEL: .LCPI11_1: 434; CHECK-GI: .byte 0 // 0x0 435; CHECK-GI-NEXT: .byte 4 // 0x4 436; CHECK-GI-NEXT: .byte 8 // 0x8 437; CHECK-GI-NEXT: .byte 12 // 0xc 438; CHECK-GI-NEXT: .byte 16 // 0x10 439; CHECK-GI-NEXT: .byte 20 // 0x14 440; CHECK-GI-NEXT: .byte 24 // 0x18 441; CHECK-GI-NEXT: .byte 28 // 0x1c 442; CHECK-GI-NEXT: .byte 255 // 0xff 443; CHECK-GI-NEXT: .byte 255 // 0xff 444; CHECK-GI-NEXT: .byte 255 // 0xff 445; CHECK-GI-NEXT: .byte 255 // 0xff 446; CHECK-GI-NEXT: .byte 255 // 0xff 447; CHECK-GI-NEXT: .byte 255 // 0xff 448; CHECK-GI-NEXT: .byte 255 // 0xff 449; CHECK-GI-NEXT: .byte 255 // 0xff 450 451define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) { 452; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_nonconst_first_mask2: 453; CHECK-SD: // %bb.0: 454; CHECK-SD-NEXT: mov w8, #1 // =0x1 455; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 456; CHECK-SD-NEXT: fmov s4, w8 457; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 458; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 459; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 460; CHECK-SD-NEXT: mov.b v4[1], w8 461; CHECK-SD-NEXT: mov.b v4[2], w8 462; CHECK-SD-NEXT: mov.b v4[3], w8 463; CHECK-SD-NEXT: mov.b v4[4], w8 464; CHECK-SD-NEXT: mov.b v4[5], w8 465; CHECK-SD-NEXT: mov.b v4[6], w8 466; CHECK-SD-NEXT: mov w8, #32 // =0x20 467; CHECK-SD-NEXT: mov.b v4[7], w0 468; CHECK-SD-NEXT: mov.b v4[8], w8 469; CHECK-SD-NEXT: mov w8, #36 // =0x24 470; CHECK-SD-NEXT: mov.b v4[9], w8 471; CHECK-SD-NEXT: mov w8, #40 // =0x28 472; CHECK-SD-NEXT: mov.b v4[10], w8 473; CHECK-SD-NEXT: mov w8, #44 // =0x2c 474; CHECK-SD-NEXT: mov.b v4[11], w8 475; CHECK-SD-NEXT: mov w8, #48 // =0x30 476; CHECK-SD-NEXT: mov.b v4[12], w8 477; CHECK-SD-NEXT: mov w8, #52 // =0x34 478; CHECK-SD-NEXT: mov.b v4[13], w8 479; CHECK-SD-NEXT: mov w8, #56 // =0x38 480; CHECK-SD-NEXT: mov.b v4[14], w8 481; CHECK-SD-NEXT: mov w8, #31 // =0x1f 482; CHECK-SD-NEXT: mov.b v4[15], w8 483; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4 484; CHECK-SD-NEXT: ret 485; 486; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_nonconst_first_mask2: 487; CHECK-GI: // %bb.0: 488; CHECK-GI-NEXT: mov w8, #1 // =0x1 489; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 490; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 491; CHECK-GI-NEXT: fmov s4, w8 492; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 493; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 494; CHECK-GI-NEXT: mov.b v4[1], w8 495; CHECK-GI-NEXT: mov.b v4[2], w8 496; CHECK-GI-NEXT: mov.b v4[3], w8 497; CHECK-GI-NEXT: mov.b v4[4], w8 498; CHECK-GI-NEXT: mov.b v4[5], w8 499; CHECK-GI-NEXT: mov.b v4[6], w8 500; CHECK-GI-NEXT: mov.b v4[7], w8 501; CHECK-GI-NEXT: mov w8, #255 // =0xff 502; CHECK-GI-NEXT: mov.b v4[8], w8 503; CHECK-GI-NEXT: mov.b v4[9], w8 504; CHECK-GI-NEXT: mov.b v4[10], w8 505; CHECK-GI-NEXT: mov.b v4[11], w8 506; CHECK-GI-NEXT: mov.b v4[12], w0 507; CHECK-GI-NEXT: mov.b v4[13], w0 508; CHECK-GI-NEXT: mov.b v4[14], w8 509; CHECK-GI-NEXT: adrp x8, .LCPI11_1 510; CHECK-GI-NEXT: ldr q5, [x8, :lo12:.LCPI11_1] 511; CHECK-GI-NEXT: adrp x8, .LCPI11_0 512; CHECK-GI-NEXT: mov.b v4[15], w0 513; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v4 514; CHECK-GI-NEXT: tbl.16b v1, { v2, v3 }, v5 515; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI11_0] 516; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2 517; CHECK-GI-NEXT: ret 518 %ins.0 = insertelement <16 x i8> poison, i8 1, i32 0 519 %ins.1 = insertelement <16 x i8> %ins.0, i8 1, i32 1 520 %ins.2 = insertelement <16 x i8> %ins.1, i8 1, i32 2 521 %ins.3 = insertelement <16 x i8> %ins.2, i8 1, i32 3 522 %ins.4 = insertelement <16 x i8> %ins.3, i8 1, i32 4 523 %ins.5 = insertelement <16 x i8> %ins.4, i8 1, i32 5 524 %ins.6 = insertelement <16 x i8> %ins.5, i8 1, i32 6 525 %ins.7 = insertelement <16 x i8> %ins.6, i8 1, i32 7 526 %ins.8 = insertelement <16 x i8> %ins.7, i8 -1, i32 8 527 %ins.9 = insertelement <16 x i8> %ins.8, i8 -1, i32 9 528 %ins.10 = insertelement <16 x i8> %ins.9, i8 -1, i32 10 529 %ins.11 = insertelement <16 x i8> %ins.10, i8 -1, i32 11 530 %ins.12 = insertelement <16 x i8> %ins.11, i8 %v, i32 12 531 %ins.13 = insertelement <16 x i8> %ins.12, i8 %v, i32 13 532 %ins.14 = insertelement <16 x i8> %ins.13, i8 -1, i32 14 533 %ins.15 = insertelement <16 x i8> %ins.14, i8 %v, i32 15 534 %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %ins.15) 535 %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 536 %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 31> 537 ret <16 x i8> %s 538} 539 540; CHECK-SD-LABEL: .LCPI12_0: 541; CHECK-SD: .byte 0 // 0x0 542; CHECK-SD-NEXT: .byte 4 // 0x4 543; CHECK-SD-NEXT: .byte 8 // 0x8 544; CHECK-SD-NEXT: .byte 12 // 0xc 545; CHECK-SD-NEXT: .byte 16 // 0x10 546; CHECK-SD-NEXT: .byte 20 // 0x14 547; CHECK-SD-NEXT: .byte 24 // 0x18 548; CHECK-SD-NEXT: .byte 28 // 0x1c 549; CHECK-SD-NEXT: .byte 255 // 0xff 550; CHECK-SD-NEXT: .byte 255 // 0xff 551; CHECK-SD-NEXT: .byte 255 // 0xff 552; CHECK-SD-NEXT: .byte 255 // 0xff 553; CHECK-SD-NEXT: .byte 255 // 0xff 554; CHECK-SD-NEXT: .byte 255 // 0xff 555; CHECK-SD-NEXT: .byte 255 // 0xff 556; CHECK-SD-NEXT: .byte 255 // 0xff 557 558; CHECK-GI-LABEL: .LCPI12_0: 559; CHECK-GI: .byte 0 // 0x0 560; CHECK-GI-NEXT: .byte 1 // 0x1 561; CHECK-GI-NEXT: .byte 2 // 0x2 562; CHECK-GI-NEXT: .byte 3 // 0x3 563; CHECK-GI-NEXT: .byte 4 // 0x4 564; CHECK-GI-NEXT: .byte 5 // 0x5 565; CHECK-GI-NEXT: .byte 6 // 0x6 566; CHECK-GI-NEXT: .byte 7 // 0x7 567; CHECK-GI-NEXT: .byte 16 // 0x10 568; CHECK-GI-NEXT: .byte 17 // 0x11 569; CHECK-GI-NEXT: .byte 18 // 0x12 570; CHECK-GI-NEXT: .byte 19 // 0x13 571; CHECK-GI-NEXT: .byte 20 // 0x14 572; CHECK-GI-NEXT: .byte 21 // 0x15 573; CHECK-GI-NEXT: .byte 22 // 0x16 574; CHECK-GI-NEXT: .byte 23 // 0x17 575; CHECK-GI-LABEL: .LCPI12_1: 576; CHECK-GI: .byte 0 // 0x0 577; CHECK-GI-NEXT: .byte 4 // 0x4 578; CHECK-GI-NEXT: .byte 8 // 0x8 579; CHECK-GI-NEXT: .byte 12 // 0xc 580; CHECK-GI-NEXT: .byte 16 // 0x10 581; CHECK-GI-NEXT: .byte 20 // 0x14 582; CHECK-GI-NEXT: .byte 24 // 0x18 583; CHECK-GI-NEXT: .byte 28 // 0x1c 584; CHECK-GI-NEXT: .byte 255 // 0xff 585; CHECK-GI-NEXT: .byte 255 // 0xff 586; CHECK-GI-NEXT: .byte 255 // 0xff 587; CHECK-GI-NEXT: .byte 255 // 0xff 588; CHECK-GI-NEXT: .byte 255 // 0xff 589; CHECK-GI-NEXT: .byte 255 // 0xff 590; CHECK-GI-NEXT: .byte 255 // 0xff 591; CHECK-GI-NEXT: .byte 255 // 0xff 592 593define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) { 594; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_nonconst_second_mask: 595; CHECK-SD: // %bb.0: 596; CHECK-SD-NEXT: movi.2d v4, #0xffffffffffffffff 597; CHECK-SD-NEXT: adrp x8, .LCPI12_0 598; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 599; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 600; CHECK-SD-NEXT: ldr q5, [x8, :lo12:.LCPI12_0] 601; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 602; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 603; CHECK-SD-NEXT: tbl.16b v2, { v2, v3 }, v5 604; CHECK-SD-NEXT: mov.b v4[0], w0 605; CHECK-SD-NEXT: mov.b v4[1], w0 606; CHECK-SD-NEXT: mov.b v4[2], w0 607; CHECK-SD-NEXT: mov.b v4[3], w0 608; CHECK-SD-NEXT: mov.b v4[4], w0 609; CHECK-SD-NEXT: mov.b v4[5], w0 610; CHECK-SD-NEXT: mov.b v4[6], w0 611; CHECK-SD-NEXT: mov.b v4[7], w0 612; CHECK-SD-NEXT: tbl.16b v0, { v0, v1 }, v4 613; CHECK-SD-NEXT: mov.d v2[1], v0[0] 614; CHECK-SD-NEXT: mov.16b v0, v2 615; CHECK-SD-NEXT: ret 616; 617; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_nonconst_second_mask: 618; CHECK-GI: // %bb.0: 619; CHECK-GI-NEXT: fmov s4, w0 620; CHECK-GI-NEXT: mov w8, #255 // =0xff 621; CHECK-GI-NEXT: adrp x9, .LCPI12_1 622; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 623; CHECK-GI-NEXT: ldr q5, [x9, :lo12:.LCPI12_1] 624; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 625; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 626; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 627; CHECK-GI-NEXT: mov.b v4[1], w0 628; CHECK-GI-NEXT: tbl.16b v2, { v2, v3 }, v5 629; CHECK-GI-NEXT: mov.b v4[2], w0 630; CHECK-GI-NEXT: mov.b v4[3], w0 631; CHECK-GI-NEXT: mov.b v4[4], w0 632; CHECK-GI-NEXT: mov.b v4[5], w0 633; CHECK-GI-NEXT: mov.b v4[6], w0 634; CHECK-GI-NEXT: mov.b v4[7], w0 635; CHECK-GI-NEXT: mov.b v4[8], w8 636; CHECK-GI-NEXT: mov.b v4[9], w8 637; CHECK-GI-NEXT: mov.b v4[10], w8 638; CHECK-GI-NEXT: mov.b v4[11], w8 639; CHECK-GI-NEXT: mov.b v4[12], w8 640; CHECK-GI-NEXT: mov.b v4[13], w8 641; CHECK-GI-NEXT: mov.b v4[14], w8 642; CHECK-GI-NEXT: mov.b v4[15], w8 643; CHECK-GI-NEXT: adrp x8, .LCPI12_0 644; CHECK-GI-NEXT: tbl.16b v3, { v0, v1 }, v4 645; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI12_0] 646; CHECK-GI-NEXT: tbl.16b v0, { v2, v3 }, v0 647; CHECK-GI-NEXT: ret 648 %ins.0 = insertelement <16 x i8> poison, i8 %v, i32 0 649 %ins.1 = insertelement <16 x i8> %ins.0, i8 %v, i32 1 650 %ins.2 = insertelement <16 x i8> %ins.1, i8 %v, i32 2 651 %ins.3 = insertelement <16 x i8> %ins.2, i8 %v, i32 3 652 %ins.4 = insertelement <16 x i8> %ins.3, i8 %v, i32 4 653 %ins.5 = insertelement <16 x i8> %ins.4, i8 %v, i32 5 654 %ins.6 = insertelement <16 x i8> %ins.5, i8 %v, i32 6 655 %ins.7 = insertelement <16 x i8> %ins.6, i8 %v, i32 7 656 %ins.8 = insertelement <16 x i8> %ins.7, i8 -1, i32 8 657 %ins.9 = insertelement <16 x i8> %ins.8, i8 -1, i32 9 658 %ins.10 = insertelement <16 x i8> %ins.9, i8 -1, i32 10 659 %ins.11 = insertelement <16 x i8> %ins.10, i8 -1, i32 11 660 %ins.12 = insertelement <16 x i8> %ins.11, i8 -1, i32 12 661 %ins.13 = insertelement <16 x i8> %ins.12, i8 -1, i32 13 662 %ins.14 = insertelement <16 x i8> %ins.13, i8 -1, i32 14 663 %ins.15 = insertelement <16 x i8> %ins.14, i8 -1, i32 15 664 %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 665 %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %ins.15) 666 %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> 667 ret <16 x i8> %s 668} 669 670; CHECK-SD-LABEL: .LCPI13_0: 671; CHECK-SD: .byte 0 // 0x0 672; CHECK-SD-NEXT: .byte 4 // 0x4 673; CHECK-SD-NEXT: .byte 8 // 0x8 674; CHECK-SD-NEXT: .byte 12 // 0xc 675; CHECK-SD-NEXT: .byte 16 // 0x10 676; CHECK-SD-NEXT: .byte 20 // 0x14 677; CHECK-SD-NEXT: .byte 24 // 0x18 678; CHECK-SD-NEXT: .byte 28 // 0x1c 679; CHECK-SD-NEXT: .byte 255 // 0xff 680; CHECK-SD-NEXT: .byte 255 // 0xff 681; CHECK-SD-NEXT: .byte 255 // 0xff 682; CHECK-SD-NEXT: .byte 255 // 0xff 683; CHECK-SD-NEXT: .byte 255 // 0xff 684; CHECK-SD-NEXT: .byte 255 // 0xff 685; CHECK-SD-NEXT: .byte 255 // 0xff 686; CHECK-SD-NEXT: .byte 255 // 0xff 687; CHECK-SD-LABEL: .LCPI13_1: 688; CHECK-SD: .byte 0 // 0x0 689; CHECK-SD-NEXT: .byte 1 // 0x1 690; CHECK-SD-NEXT: .byte 2 // 0x2 691; CHECK-SD-NEXT: .byte 3 // 0x3 692; CHECK-SD-NEXT: .byte 4 // 0x4 693; CHECK-SD-NEXT: .byte 5 // 0x5 694; CHECK-SD-NEXT: .byte 6 // 0x6 695; CHECK-SD-NEXT: .byte 7 // 0x7 696; CHECK-SD-NEXT: .byte 16 // 0x10 697; CHECK-SD-NEXT: .byte 17 // 0x11 698; CHECK-SD-NEXT: .byte 18 // 0x12 699; CHECK-SD-NEXT: .byte 19 // 0x13 700; CHECK-SD-NEXT: .byte 20 // 0x14 701; CHECK-SD-NEXT: .byte 21 // 0x15 702; CHECK-SD-NEXT: .byte 30 // 0x1e 703; CHECK-SD-NEXT: .byte 31 // 0x1f 704 705; CHECK-GI-LABEL: .LCPI13_0: 706; CHECK-GI: .byte 0 // 0x0 707; CHECK-GI-NEXT: .byte 1 // 0x1 708; CHECK-GI-NEXT: .byte 2 // 0x2 709; CHECK-GI-NEXT: .byte 3 // 0x3 710; CHECK-GI-NEXT: .byte 4 // 0x4 711; CHECK-GI-NEXT: .byte 5 // 0x5 712; CHECK-GI-NEXT: .byte 6 // 0x6 713; CHECK-GI-NEXT: .byte 7 // 0x7 714; CHECK-GI-NEXT: .byte 16 // 0x10 715; CHECK-GI-NEXT: .byte 17 // 0x11 716; CHECK-GI-NEXT: .byte 18 // 0x12 717; CHECK-GI-NEXT: .byte 19 // 0x13 718; CHECK-GI-NEXT: .byte 20 // 0x14 719; CHECK-GI-NEXT: .byte 21 // 0x15 720; CHECK-GI-NEXT: .byte 30 // 0x1e 721; CHECK-GI-NEXT: .byte 31 // 0x1f 722; CHECK-GI-LABEL: .LCPI13_1: 723; CHECK-GI: .byte 0 // 0x0 724; CHECK-GI-NEXT: .byte 4 // 0x4 725; CHECK-GI-NEXT: .byte 8 // 0x8 726; CHECK-GI-NEXT: .byte 12 // 0xc 727; CHECK-GI-NEXT: .byte 16 // 0x10 728; CHECK-GI-NEXT: .byte 20 // 0x14 729; CHECK-GI-NEXT: .byte 24 // 0x18 730; CHECK-GI-NEXT: .byte 28 // 0x1c 731; CHECK-GI-NEXT: .byte 255 // 0xff 732; CHECK-GI-NEXT: .byte 255 // 0xff 733; CHECK-GI-NEXT: .byte 255 // 0xff 734; CHECK-GI-NEXT: .byte 255 // 0xff 735; CHECK-GI-NEXT: .byte 255 // 0xff 736; CHECK-GI-NEXT: .byte 255 // 0xff 737; CHECK-GI-NEXT: .byte 255 // 0xff 738; CHECK-GI-NEXT: .byte 255 // 0xff 739 740define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) { 741; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_nonconst_second_mask2: 742; CHECK-SD: // %bb.0: 743; CHECK-SD-NEXT: dup.16b v4, w0 744; CHECK-SD-NEXT: mov w8, #255 // =0xff 745; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 746; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 747; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 748; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 749; CHECK-SD-NEXT: mov.b v4[8], w8 750; CHECK-SD-NEXT: mov.b v4[9], w8 751; CHECK-SD-NEXT: mov.b v4[10], w8 752; CHECK-SD-NEXT: mov.b v4[11], w8 753; CHECK-SD-NEXT: mov.b v4[12], w8 754; CHECK-SD-NEXT: mov.b v4[13], w8 755; CHECK-SD-NEXT: adrp x8, .LCPI13_0 756; CHECK-SD-NEXT: ldr q5, [x8, :lo12:.LCPI13_0] 757; CHECK-SD-NEXT: adrp x8, .LCPI13_1 758; CHECK-SD-NEXT: tbl.16b v2, { v2, v3 }, v5 759; CHECK-SD-NEXT: tbl.16b v3, { v0, v1 }, v4 760; CHECK-SD-NEXT: ldr q0, [x8, :lo12:.LCPI13_1] 761; CHECK-SD-NEXT: tbl.16b v0, { v2, v3 }, v0 762; CHECK-SD-NEXT: ret 763; 764; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_nonconst_second_mask2: 765; CHECK-GI: // %bb.0: 766; CHECK-GI-NEXT: fmov s4, w0 767; CHECK-GI-NEXT: mov w8, #255 // =0xff 768; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 769; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 770; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 771; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 772; CHECK-GI-NEXT: mov.b v4[1], w0 773; CHECK-GI-NEXT: mov.b v4[2], w0 774; CHECK-GI-NEXT: mov.b v4[3], w0 775; CHECK-GI-NEXT: mov.b v4[4], w0 776; CHECK-GI-NEXT: mov.b v4[5], w0 777; CHECK-GI-NEXT: mov.b v4[6], w0 778; CHECK-GI-NEXT: mov.b v4[7], w0 779; CHECK-GI-NEXT: mov.b v4[8], w8 780; CHECK-GI-NEXT: mov.b v4[9], w8 781; CHECK-GI-NEXT: mov.b v4[10], w8 782; CHECK-GI-NEXT: mov.b v4[11], w8 783; CHECK-GI-NEXT: mov.b v4[12], w8 784; CHECK-GI-NEXT: mov.b v4[13], w8 785; CHECK-GI-NEXT: adrp x8, .LCPI13_1 786; CHECK-GI-NEXT: ldr q5, [x8, :lo12:.LCPI13_1] 787; CHECK-GI-NEXT: adrp x8, .LCPI13_0 788; CHECK-GI-NEXT: tbl.16b v2, { v2, v3 }, v5 789; CHECK-GI-NEXT: mov.b v4[14], w0 790; CHECK-GI-NEXT: mov.b v4[15], w0 791; CHECK-GI-NEXT: tbl.16b v3, { v0, v1 }, v4 792; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI13_0] 793; CHECK-GI-NEXT: tbl.16b v0, { v2, v3 }, v0 794; CHECK-GI-NEXT: ret 795 %ins.0 = insertelement <16 x i8> poison, i8 %v, i32 0 796 %ins.1 = insertelement <16 x i8> %ins.0, i8 %v, i32 1 797 %ins.2 = insertelement <16 x i8> %ins.1, i8 %v, i32 2 798 %ins.3 = insertelement <16 x i8> %ins.2, i8 %v, i32 3 799 %ins.4 = insertelement <16 x i8> %ins.3, i8 %v, i32 4 800 %ins.5 = insertelement <16 x i8> %ins.4, i8 %v, i32 5 801 %ins.6 = insertelement <16 x i8> %ins.5, i8 %v, i32 6 802 %ins.7 = insertelement <16 x i8> %ins.6, i8 %v, i32 7 803 %ins.8 = insertelement <16 x i8> %ins.7, i8 -1, i32 8 804 %ins.9 = insertelement <16 x i8> %ins.8, i8 -1, i32 9 805 %ins.10 = insertelement <16 x i8> %ins.9, i8 -1, i32 10 806 %ins.11 = insertelement <16 x i8> %ins.10, i8 -1, i32 11 807 %ins.12 = insertelement <16 x i8> %ins.11, i8 -1, i32 12 808 %ins.13 = insertelement <16 x i8> %ins.12, i8 -1, i32 13 809 %ins.14 = insertelement <16 x i8> %ins.13, i8 %v, i32 14 810 %ins.15 = insertelement <16 x i8> %ins.14, i8 %v, i32 15 811 %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 812 %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %ins.15) 813 %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 30, i32 31> 814 ret <16 x i8> %s 815} 816 817; CHECK-SD-LABEL: .LCPI14_0: 818; CHECK-SD: .byte 0 // 0x0 819; CHECK-SD-NEXT: .byte 4 // 0x4 820; CHECK-SD-NEXT: .byte 52 // 0x34 821; CHECK-SD-NEXT: .byte 12 // 0xc 822; CHECK-SD-NEXT: .byte 16 // 0x10 823; CHECK-SD-NEXT: .byte 20 // 0x14 824; CHECK-SD-NEXT: .byte 24 // 0x18 825; CHECK-SD-NEXT: .byte 28 // 0x1c 826; CHECK-SD-NEXT: .byte 32 // 0x20 827; CHECK-SD-NEXT: .byte 36 // 0x24 828; CHECK-SD-NEXT: .byte 40 // 0x28 829; CHECK-SD-NEXT: .byte 44 // 0x2c 830; CHECK-SD-NEXT: .byte 48 // 0x30 831; CHECK-SD-NEXT: .byte 52 // 0x34 832; CHECK-SD-NEXT: .byte 56 // 0x38 833; CHECK-SD-NEXT: .byte 60 // 0x3c 834 835; CHECK-GI-LABEL: .LCPI14_0: 836; CHECK-GI: .byte 0 // 0x0 837; CHECK-GI-NEXT: .byte 1 // 0x1 838; CHECK-GI-NEXT: .byte 21 // 0x15 839; CHECK-GI-NEXT: .byte 3 // 0x3 840; CHECK-GI-NEXT: .byte 4 // 0x4 841; CHECK-GI-NEXT: .byte 5 // 0x5 842; CHECK-GI-NEXT: .byte 6 // 0x6 843; CHECK-GI-NEXT: .byte 7 // 0x7 844; CHECK-GI-NEXT: .byte 16 // 0x10 845; CHECK-GI-NEXT: .byte 17 // 0x11 846; CHECK-GI-NEXT: .byte 18 // 0x12 847; CHECK-GI-NEXT: .byte 19 // 0x13 848; CHECK-GI-NEXT: .byte 20 // 0x14 849; CHECK-GI-NEXT: .byte 21 // 0x15 850; CHECK-GI-NEXT: .byte 22 // 0x16 851; CHECK-GI-NEXT: .byte 23 // 0x17 852; CHECK-GI-LABEL: .LCPI14_1: 853; CHECK-GI: .byte 0 // 0x0 854; CHECK-GI-NEXT: .byte 4 // 0x4 855; CHECK-GI-NEXT: .byte 8 // 0x8 856; CHECK-GI-NEXT: .byte 12 // 0xc 857; CHECK-GI-NEXT: .byte 16 // 0x10 858; CHECK-GI-NEXT: .byte 20 // 0x14 859; CHECK-GI-NEXT: .byte 24 // 0x18 860; CHECK-GI-NEXT: .byte 28 // 0x1c 861; CHECK-GI-NEXT: .byte 255 // 0xff 862; CHECK-GI-NEXT: .byte 255 // 0xff 863; CHECK-GI-NEXT: .byte 255 // 0xff 864; CHECK-GI-NEXT: .byte 255 // 0xff 865; CHECK-GI-NEXT: .byte 255 // 0xff 866; CHECK-GI-NEXT: .byte 255 // 0xff 867; CHECK-GI-NEXT: .byte 255 // 0xff 868; CHECK-GI-NEXT: .byte 255 // 0xff 869 870define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_shuffle(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) { 871; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_mixed_shuffle: 872; CHECK-SD: // %bb.0: 873; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 874; CHECK-SD-NEXT: adrp x8, .LCPI14_0 875; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 876; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI14_0] 877; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 878; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 879; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4 880; CHECK-SD-NEXT: ret 881; 882; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_mixed_shuffle: 883; CHECK-GI: // %bb.0: 884; CHECK-GI-NEXT: adrp x8, .LCPI14_1 885; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 886; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 887; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI14_1] 888; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 889; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 890; CHECK-GI-NEXT: adrp x8, .LCPI14_0 891; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v4 892; CHECK-GI-NEXT: tbl.16b v1, { v2, v3 }, v4 893; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI14_0] 894; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2 895; CHECK-GI-NEXT: ret 896 %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 897 %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 898 %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 21, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> 899 ret <16 x i8> %s 900} 901 902; CHECK-SD-LABEL: .LCPI15_0: 903; CHECK-SD: .byte 0 // 0x0 904; CHECK-SD-NEXT: .byte 4 // 0x4 905; CHECK-SD-NEXT: .byte 52 // 0x34 906; CHECK-SD-NEXT: .byte 12 // 0xc 907; CHECK-SD-NEXT: .byte 16 // 0x10 908; CHECK-SD-NEXT: .byte 20 // 0x14 909; CHECK-SD-NEXT: .byte 24 // 0x18 910; CHECK-SD-NEXT: .byte 28 // 0x1c 911; CHECK-SD-NEXT: .byte 32 // 0x20 912; CHECK-SD-NEXT: .byte 36 // 0x24 913; CHECK-SD-NEXT: .byte 40 // 0x28 914; CHECK-SD-NEXT: .byte 44 // 0x2c 915; CHECK-SD-NEXT: .byte 48 // 0x30 916; CHECK-SD-NEXT: .byte 52 // 0x34 917; CHECK-SD-NEXT: .byte 56 // 0x38 918; CHECK-SD-NEXT: .byte 60 // 0x3c 919 920; CHECK-GI-LABEL: .LCPI15_0: 921; CHECK-GI: .byte 0 // 0x0 922; CHECK-GI-NEXT: .byte 1 // 0x1 923; CHECK-GI-NEXT: .byte 21 // 0x15 924; CHECK-GI-NEXT: .byte 3 // 0x3 925; CHECK-GI-NEXT: .byte 4 // 0x4 926; CHECK-GI-NEXT: .byte 5 // 0x5 927; CHECK-GI-NEXT: .byte 6 // 0x6 928; CHECK-GI-NEXT: .byte 7 // 0x7 929; CHECK-GI-NEXT: .byte 16 // 0x10 930; CHECK-GI-NEXT: .byte 17 // 0x11 931; CHECK-GI-NEXT: .byte 18 // 0x12 932; CHECK-GI-NEXT: .byte 19 // 0x13 933; CHECK-GI-NEXT: .byte 20 // 0x14 934; CHECK-GI-NEXT: .byte 21 // 0x15 935; CHECK-GI-NEXT: .byte 22 // 0x16 936; CHECK-GI-NEXT: .byte 23 // 0x17 937; CHECK-GI-LABEL: .LCPI15_1: 938; CHECK-GI: .byte 0 // 0x0 939; CHECK-GI-NEXT: .byte 4 // 0x4 940; CHECK-GI-NEXT: .byte 8 // 0x8 941; CHECK-GI-NEXT: .byte 12 // 0xc 942; CHECK-GI-NEXT: .byte 16 // 0x10 943; CHECK-GI-NEXT: .byte 20 // 0x14 944; CHECK-GI-NEXT: .byte 24 // 0x18 945; CHECK-GI-NEXT: .byte 28 // 0x1c 946; CHECK-GI-NEXT: .byte 255 // 0xff 947; CHECK-GI-NEXT: .byte 255 // 0xff 948; CHECK-GI-NEXT: .byte 255 // 0xff 949; CHECK-GI-NEXT: .byte 255 // 0xff 950; CHECK-GI-NEXT: .byte 255 // 0xff 951; CHECK-GI-NEXT: .byte 255 // 0xff 952; CHECK-GI-NEXT: .byte 255 // 0xff 953; CHECK-GI-NEXT: .byte 255 // 0xff 954; CHECK-GI-LABEL: .LCPI15_2: 955; CHECK-GI: .byte 0 // 0x0 956; CHECK-GI-NEXT: .byte 4 // 0x4 957; CHECK-GI-NEXT: .byte 8 // 0x8 958; CHECK-GI-NEXT: .byte 12 // 0xc 959; CHECK-GI-NEXT: .byte 16 // 0x10 960; CHECK-GI-NEXT: .byte 20 // 0x14 961; CHECK-GI-NEXT: .byte 24 // 0x18 962; CHECK-GI-NEXT: .byte 28 // 0x1c 963; CHECK-GI-NEXT: .byte 0 // 0x0 964; CHECK-GI-NEXT: .byte 255 // 0xff 965; CHECK-GI-NEXT: .byte 255 // 0xff 966; CHECK-GI-NEXT: .byte 255 // 0xff 967; CHECK-GI-NEXT: .byte 255 // 0xff 968; CHECK-GI-NEXT: .byte 255 // 0xff 969; CHECK-GI-NEXT: .byte 255 // 0xff 970; CHECK-GI-NEXT: .byte 255 // 0xff 971 972define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask1(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) { 973; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_mixed_tbl2_mask1: 974; CHECK-SD: // %bb.0: 975; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 976; CHECK-SD-NEXT: adrp x8, .LCPI15_0 977; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 978; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI15_0] 979; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 980; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 981; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4 982; CHECK-SD-NEXT: ret 983; 984; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_mixed_tbl2_mask1: 985; CHECK-GI: // %bb.0: 986; CHECK-GI-NEXT: adrp x8, .LCPI15_2 987; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 988; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 989; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI15_2] 990; CHECK-GI-NEXT: adrp x8, .LCPI15_1 991; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 992; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 993; CHECK-GI-NEXT: ldr q5, [x8, :lo12:.LCPI15_1] 994; CHECK-GI-NEXT: adrp x8, .LCPI15_0 995; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v4 996; CHECK-GI-NEXT: tbl.16b v1, { v2, v3 }, v5 997; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI15_0] 998; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2 999; CHECK-GI-NEXT: ret 1000 %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 1001 %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 1002 %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 21, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> 1003 ret <16 x i8> %s 1004} 1005 1006; CHECK-SD-LABEL: .LCPI16_0: 1007; CHECK-SD: .byte 0 // 0x0 1008; CHECK-SD-NEXT: .byte 4 // 0x4 1009; CHECK-SD-NEXT: .byte 52 // 0x34 1010; CHECK-SD-NEXT: .byte 12 // 0xc 1011; CHECK-SD-NEXT: .byte 16 // 0x10 1012; CHECK-SD-NEXT: .byte 20 // 0x14 1013; CHECK-SD-NEXT: .byte 24 // 0x18 1014; CHECK-SD-NEXT: .byte 28 // 0x1c 1015; CHECK-SD-NEXT: .byte 32 // 0x20 1016; CHECK-SD-NEXT: .byte 36 // 0x24 1017; CHECK-SD-NEXT: .byte 40 // 0x28 1018; CHECK-SD-NEXT: .byte 44 // 0x2c 1019; CHECK-SD-NEXT: .byte 48 // 0x30 1020; CHECK-SD-NEXT: .byte 52 // 0x34 1021; CHECK-SD-NEXT: .byte 56 // 0x38 1022; CHECK-SD-NEXT: .byte 60 // 0x3c 1023 1024; CHECK-GI-LABEL: .LCPI16_0: 1025; CHECK-GI: .byte 0 // 0x0 1026; CHECK-GI-NEXT: .byte 1 // 0x1 1027; CHECK-GI-NEXT: .byte 21 // 0x15 1028; CHECK-GI-NEXT: .byte 3 // 0x3 1029; CHECK-GI-NEXT: .byte 4 // 0x4 1030; CHECK-GI-NEXT: .byte 5 // 0x5 1031; CHECK-GI-NEXT: .byte 6 // 0x6 1032; CHECK-GI-NEXT: .byte 7 // 0x7 1033; CHECK-GI-NEXT: .byte 16 // 0x10 1034; CHECK-GI-NEXT: .byte 17 // 0x11 1035; CHECK-GI-NEXT: .byte 18 // 0x12 1036; CHECK-GI-NEXT: .byte 19 // 0x13 1037; CHECK-GI-NEXT: .byte 20 // 0x14 1038; CHECK-GI-NEXT: .byte 21 // 0x15 1039; CHECK-GI-NEXT: .byte 22 // 0x16 1040; CHECK-GI-NEXT: .byte 23 // 0x17 1041; CHECK-GI-LABEL: .LCPI16_1: 1042; CHECK-GI: .byte 0 // 0x0 1043; CHECK-GI-NEXT: .byte 4 // 0x4 1044; CHECK-GI-NEXT: .byte 8 // 0x8 1045; CHECK-GI-NEXT: .byte 12 // 0xc 1046; CHECK-GI-NEXT: .byte 16 // 0x10 1047; CHECK-GI-NEXT: .byte 20 // 0x14 1048; CHECK-GI-NEXT: .byte 24 // 0x18 1049; CHECK-GI-NEXT: .byte 28 // 0x1c 1050; CHECK-GI-NEXT: .byte 0 // 0x0 1051; CHECK-GI-NEXT: .byte 255 // 0xff 1052; CHECK-GI-NEXT: .byte 255 // 0xff 1053; CHECK-GI-NEXT: .byte 255 // 0xff 1054; CHECK-GI-NEXT: .byte 255 // 0xff 1055; CHECK-GI-NEXT: .byte 255 // 0xff 1056; CHECK-GI-NEXT: .byte 255 // 0xff 1057; CHECK-GI-NEXT: .byte 255 // 0xff 1058; CHECK-GI-LABEL: .LCPI16_2: 1059; CHECK-GI: .byte 0 // 0x0 1060; CHECK-GI-NEXT: .byte 4 // 0x4 1061; CHECK-GI-NEXT: .byte 8 // 0x8 1062; CHECK-GI-NEXT: .byte 12 // 0xc 1063; CHECK-GI-NEXT: .byte 16 // 0x10 1064; CHECK-GI-NEXT: .byte 20 // 0x14 1065; CHECK-GI-NEXT: .byte 24 // 0x18 1066; CHECK-GI-NEXT: .byte 28 // 0x1c 1067; CHECK-GI-NEXT: .byte 255 // 0xff 1068; CHECK-GI-NEXT: .byte 255 // 0xff 1069; CHECK-GI-NEXT: .byte 255 // 0xff 1070; CHECK-GI-NEXT: .byte 255 // 0xff 1071; CHECK-GI-NEXT: .byte 255 // 0xff 1072; CHECK-GI-NEXT: .byte 255 // 0xff 1073; CHECK-GI-NEXT: .byte 255 // 0xff 1074; CHECK-GI-NEXT: .byte 255 // 0xff 1075 1076define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) { 1077; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_mixed_tbl2_mask2: 1078; CHECK-SD: // %bb.0: 1079; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 1080; CHECK-SD-NEXT: adrp x8, .LCPI16_0 1081; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 1082; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI16_0] 1083; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 1084; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 1085; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4 1086; CHECK-SD-NEXT: ret 1087; 1088; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_mixed_tbl2_mask2: 1089; CHECK-GI: // %bb.0: 1090; CHECK-GI-NEXT: adrp x8, .LCPI16_2 1091; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 1092; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 1093; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI16_2] 1094; CHECK-GI-NEXT: adrp x8, .LCPI16_1 1095; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 1096; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 1097; CHECK-GI-NEXT: ldr q5, [x8, :lo12:.LCPI16_1] 1098; CHECK-GI-NEXT: adrp x8, .LCPI16_0 1099; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v4 1100; CHECK-GI-NEXT: tbl.16b v1, { v2, v3 }, v5 1101; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI16_0] 1102; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2 1103; CHECK-GI-NEXT: ret 1104 %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 1105 %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 1106 %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 21, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> 1107 ret <16 x i8> %s 1108} 1109 1110declare <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8>, <8 x i8>) nounwind readnone 1111declare <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8>, <16 x i8>) nounwind readnone 1112declare <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone 1113declare <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone 1114declare <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone 1115declare <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone 1116declare <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone 1117declare <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone 1118 1119define <8 x i8> @tbx1_8b(<8 x i8> %A, <16 x i8> %B, <8 x i8> %C) nounwind { 1120; CHECK-LABEL: tbx1_8b: 1121; CHECK: // %bb.0: 1122; CHECK-NEXT: tbx.8b v0, { v1 }, v2 1123; CHECK-NEXT: ret 1124 %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %A, <16 x i8> %B, <8 x i8> %C) 1125 ret <8 x i8> %tmp3 1126} 1127 1128define <16 x i8> @tbx1_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) nounwind { 1129; CHECK-LABEL: tbx1_16b: 1130; CHECK: // %bb.0: 1131; CHECK-NEXT: tbx.16b v0, { v1 }, v2 1132; CHECK-NEXT: ret 1133 %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) 1134 ret <16 x i8> %tmp3 1135} 1136 1137define <8 x i8> @tbx2_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) { 1138; CHECK-SD-LABEL: tbx2_8b: 1139; CHECK-SD: // %bb.0: 1140; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q1_q2 def $q1_q2 1141; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q1_q2 def $q1_q2 1142; CHECK-SD-NEXT: tbx.8b v0, { v1, v2 }, v3 1143; CHECK-SD-NEXT: ret 1144; 1145; CHECK-GI-LABEL: tbx2_8b: 1146; CHECK-GI: // %bb.0: 1147; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q1_q2 def $q1_q2 1148; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q1_q2 def $q1_q2 1149; CHECK-GI-NEXT: tbx.8b v0, { v1, v2 }, v3 1150; CHECK-GI-NEXT: ret 1151 %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) 1152 ret <8 x i8> %tmp3 1153} 1154 1155define <16 x i8> @tbx2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) { 1156; CHECK-SD-LABEL: tbx2_16b: 1157; CHECK-SD: // %bb.0: 1158; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q1_q2 def $q1_q2 1159; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q1_q2 def $q1_q2 1160; CHECK-SD-NEXT: tbx.16b v0, { v1, v2 }, v3 1161; CHECK-SD-NEXT: ret 1162; 1163; CHECK-GI-LABEL: tbx2_16b: 1164; CHECK-GI: // %bb.0: 1165; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q1_q2 def $q1_q2 1166; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q1_q2 def $q1_q2 1167; CHECK-GI-NEXT: tbx.16b v0, { v1, v2 }, v3 1168; CHECK-GI-NEXT: ret 1169 %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) 1170 ret <16 x i8> %tmp3 1171} 1172 1173define <8 x i8> @tbx3_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) { 1174; CHECK-SD-LABEL: tbx3_8b: 1175; CHECK-SD: // %bb.0: 1176; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3 def $q1_q2_q3 1177; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3 def $q1_q2_q3 1178; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3 def $q1_q2_q3 1179; CHECK-SD-NEXT: tbx.8b v0, { v1, v2, v3 }, v4 1180; CHECK-SD-NEXT: ret 1181; 1182; CHECK-GI-LABEL: tbx3_8b: 1183; CHECK-GI: // %bb.0: 1184; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3 def $q1_q2_q3 1185; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3 def $q1_q2_q3 1186; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3 def $q1_q2_q3 1187; CHECK-GI-NEXT: tbx.8b v0, { v1, v2, v3 }, v4 1188; CHECK-GI-NEXT: ret 1189 %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(< 8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) 1190 ret <8 x i8> %tmp3 1191} 1192 1193define <16 x i8> @tbx3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) { 1194; CHECK-SD-LABEL: tbx3_16b: 1195; CHECK-SD: // %bb.0: 1196; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3 def $q1_q2_q3 1197; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3 def $q1_q2_q3 1198; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3 def $q1_q2_q3 1199; CHECK-SD-NEXT: tbx.16b v0, { v1, v2, v3 }, v4 1200; CHECK-SD-NEXT: ret 1201; 1202; CHECK-GI-LABEL: tbx3_16b: 1203; CHECK-GI: // %bb.0: 1204; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3 def $q1_q2_q3 1205; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3 def $q1_q2_q3 1206; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3 def $q1_q2_q3 1207; CHECK-GI-NEXT: tbx.16b v0, { v1, v2, v3 }, v4 1208; CHECK-GI-NEXT: ret 1209 %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) 1210 ret <16 x i8> %tmp3 1211} 1212 1213define <8 x i8> @tbx4_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <8 x i8> %F) { 1214; CHECK-SD-LABEL: tbx4_8b: 1215; CHECK-SD: // %bb.0: 1216; CHECK-SD-NEXT: // kill: def $q4 killed $q4 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 1217; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 1218; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 1219; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 1220; CHECK-SD-NEXT: tbx.8b v0, { v1, v2, v3, v4 }, v5 1221; CHECK-SD-NEXT: ret 1222; 1223; CHECK-GI-LABEL: tbx4_8b: 1224; CHECK-GI: // %bb.0: 1225; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 1226; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 1227; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 1228; CHECK-GI-NEXT: // kill: def $q4 killed $q4 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 1229; CHECK-GI-NEXT: tbx.8b v0, { v1, v2, v3, v4 }, v5 1230; CHECK-GI-NEXT: ret 1231 %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <8 x i8> %F) 1232 ret <8 x i8> %tmp3 1233} 1234 1235define <16 x i8> @tbx4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <16 x i8> %F) { 1236; CHECK-SD-LABEL: tbx4_16b: 1237; CHECK-SD: // %bb.0: 1238; CHECK-SD-NEXT: // kill: def $q4 killed $q4 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 1239; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 1240; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 1241; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 1242; CHECK-SD-NEXT: tbx.16b v0, { v1, v2, v3, v4 }, v5 1243; CHECK-SD-NEXT: ret 1244; 1245; CHECK-GI-LABEL: tbx4_16b: 1246; CHECK-GI: // %bb.0: 1247; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 1248; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 1249; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 1250; CHECK-GI-NEXT: // kill: def $q4 killed $q4 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 1251; CHECK-GI-NEXT: tbx.16b v0, { v1, v2, v3, v4 }, v5 1252; CHECK-GI-NEXT: ret 1253 %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <16 x i8> %F) 1254 ret <16 x i8> %tmp3 1255} 1256 1257declare <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8>, <16 x i8>, <8 x i8>) nounwind readnone 1258declare <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone 1259declare <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone 1260declare <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone 1261declare <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone 1262declare <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone 1263declare <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone 1264declare <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone 1265 1266