xref: /llvm-project/llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_tbl.ll (revision 38fffa630ee80163dc65e759392ad29798905679)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool build/bin/opt --version 2
2; Test memory sanitizer instrumentation for Arm NEON tbl instructions.
3;
4; RUN: opt < %s -passes=msan -S | FileCheck %s
5;
6; Forked from llvm/test/CodeGen/AArch64/arm64-tbl.ll
7
8target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
9target triple = "aarch64--linux-android9001"
10
11; -----------------------------------------------------------------------------------------------------------------------------------------------
12
13define <8 x i8> @tbl1_8b(<16 x i8> %A, <8 x i8> %B) nounwind sanitize_memory {
14; CHECK-LABEL: define <8 x i8> @tbl1_8b
15; CHECK-SAME: (<16 x i8> [[A:%.*]], <8 x i8> [[B:%.*]]) #[[ATTR0:[0-9]+]] {
16; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
17; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
18; CHECK-NEXT:    call void @llvm.donothing()
19; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[TMP1]], <8 x i8> [[B]])
20; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i8> [[TMP2]], [[TMP3]]
21; CHECK-NEXT:    [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[A]], <8 x i8> [[B]])
22; CHECK-NEXT:    store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
23; CHECK-NEXT:    ret <8 x i8> [[OUT]]
24;
25  %out = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %A, <8 x i8> %B)
26  ret <8 x i8> %out
27}
28
29define <16 x i8> @tbl1_16b(<16 x i8> %A, <16 x i8> %B) nounwind sanitize_memory {
30; CHECK-LABEL: define <16 x i8> @tbl1_16b
31; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]]) #[[ATTR0]] {
32; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
33; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
34; CHECK-NEXT:    call void @llvm.donothing()
35; CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[B]])
36; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> [[TMP2]], [[TMP3]]
37; CHECK-NEXT:    [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
38; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
39; CHECK-NEXT:    ret <16 x i8> [[OUT]]
40;
41  %out = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> %A, <16 x i8> %B)
42  ret <16 x i8> %out
43}
44
45define <8 x i8> @tbl2_8b(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C) sanitize_memory {
46; CHECK-LABEL: define <8 x i8> @tbl2_8b
47; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <8 x i8> [[C:%.*]]) #[[ATTR1:[0-9]+]] {
48; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
49; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
50; CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
51; CHECK-NEXT:    call void @llvm.donothing()
52; CHECK-NEXT:    [[TMP4:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> [[C]])
53; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i8> [[TMP3]], [[TMP4]]
54; CHECK-NEXT:    [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[A]], <16 x i8> [[B]], <8 x i8> [[C]])
55; CHECK-NEXT:    store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
56; CHECK-NEXT:    ret <8 x i8> [[OUT]]
57;
58  %out = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C)
59  ret <8 x i8> %out
60}
61
62define <16 x i8> @tbl2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) sanitize_memory {
63; CHECK-LABEL: define <16 x i8> @tbl2_16b
64; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]]) #[[ATTR1]] {
65; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
66; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
67; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
68; CHECK-NEXT:    call void @llvm.donothing()
69; CHECK-NEXT:    [[TMP4:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[C]])
70; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> [[TMP3]], [[TMP4]]
71; CHECK-NEXT:    [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]])
72; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
73; CHECK-NEXT:    ret <16 x i8> [[OUT]]
74;
75  %out = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C)
76  ret <16 x i8> %out
77}
78
79define <8 x i8> @tbl3_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) sanitize_memory {
80; CHECK-LABEL: define <8 x i8> @tbl3_8b
81; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <8 x i8> [[D:%.*]]) #[[ATTR1]] {
82; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
83; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
84; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
85; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
86; CHECK-NEXT:    call void @llvm.donothing()
87; CHECK-NEXT:    [[TMP5:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> [[D]])
88; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i8> [[TMP4]], [[TMP5]]
89; CHECK-NEXT:    [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <8 x i8> [[D]])
90; CHECK-NEXT:    store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
91; CHECK-NEXT:    ret <8 x i8> [[OUT]]
92;
93  %out = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D)
94  ret <8 x i8> %out
95}
96
97define <16 x i8> @tbl3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) sanitize_memory {
98; CHECK-LABEL: define <16 x i8> @tbl3_16b
99; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) #[[ATTR1]] {
100; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
101; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
102; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
103; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
104; CHECK-NEXT:    call void @llvm.donothing()
105; CHECK-NEXT:    [[TMP5:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[D]])
106; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> [[TMP4]], [[TMP5]]
107; CHECK-NEXT:    [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]])
108; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
109; CHECK-NEXT:    ret <16 x i8> [[OUT]]
110;
111  %out = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D)
112  ret <16 x i8> %out
113}
114
115define <8 x i8> @tbl4_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) sanitize_memory {
116; CHECK-LABEL: define <8 x i8> @tbl4_8b
117; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <8 x i8> [[E:%.*]]) #[[ATTR1]] {
118; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
119; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
120; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
121; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
122; CHECK-NEXT:    [[TMP5:%.*]] = load <8 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
123; CHECK-NEXT:    call void @llvm.donothing()
124; CHECK-NEXT:    [[TMP6:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> [[E]])
125; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i8> [[TMP5]], [[TMP6]]
126; CHECK-NEXT:    [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]], <8 x i8> [[E]])
127; CHECK-NEXT:    store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
128; CHECK-NEXT:    ret <8 x i8> [[OUT]]
129;
130  %out = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E)
131  ret <8 x i8> %out
132}
133
134define <16 x i8> @tbl4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) sanitize_memory {
135; CHECK-LABEL: define <16 x i8> @tbl4_16b
136; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <16 x i8> [[E:%.*]]) #[[ATTR1]] {
137; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
138; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
139; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
140; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
141; CHECK-NEXT:    [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
142; CHECK-NEXT:    call void @llvm.donothing()
143; CHECK-NEXT:    [[TMP6:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[E]])
144; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> [[TMP5]], [[TMP6]]
145; CHECK-NEXT:    [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> [[E]])
146; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
147; CHECK-NEXT:    ret <16 x i8> [[OUT]]
148;
149  %out = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E)
150  ret <16 x i8> %out
151}
152
153
154
155define <8 x i8> @shuffled_tbl2_to_tbl4_v8i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) sanitize_memory {
156; CHECK-LABEL: define <8 x i8> @shuffled_tbl2_to_tbl4_v8i8
157; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) #[[ATTR1]] {
158; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
159; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
160; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
161; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
162; CHECK-NEXT:    call void @llvm.donothing()
163; CHECK-NEXT:    [[TMP5:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>)
164; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i8> zeroinitializer, [[TMP5]]
165; CHECK-NEXT:    [[T1:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[A]], <16 x i8> [[B]], <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>)
166; CHECK-NEXT:    [[TMP6:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>)
167; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <8 x i8> zeroinitializer, [[TMP6]]
168; CHECK-NEXT:    [[T2:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[C]], <16 x i8> [[D]], <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>)
169; CHECK-NEXT:    [[_MSPROP2:%.*]] = shufflevector <8 x i8> [[_MSPROP]], <8 x i8> [[_MSPROP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
170; CHECK-NEXT:    [[S:%.*]] = shufflevector <8 x i8> [[T1]], <8 x i8> [[T2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
171; CHECK-NEXT:    store <8 x i8> [[_MSPROP2]], ptr @__msan_retval_tls, align 8
172; CHECK-NEXT:    ret <8 x i8> [[S]]
173;
174  %t1 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %a, <16 x i8> %b, <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>)
175  %t2 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %c, <16 x i8> %d, <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>)
176  %s = shufflevector <8 x i8> %t1, <8 x i8> %t2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
177  ret <8 x i8> %s
178}
179
180
181
182define <16 x i8> @shuffled_tbl2_to_tbl4(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) sanitize_memory {
183; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4
184; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) #[[ATTR1]] {
185; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
186; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
187; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
188; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
189; CHECK-NEXT:    call void @llvm.donothing()
190; CHECK-NEXT:    [[TMP5:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
191; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> zeroinitializer, [[TMP5]]
192; CHECK-NEXT:    [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
193; CHECK-NEXT:    [[TMP6:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
194; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <16 x i8> zeroinitializer, [[TMP6]]
195; CHECK-NEXT:    [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
196; CHECK-NEXT:    [[_MSPROP2:%.*]] = shufflevector <16 x i8> [[_MSPROP]], <16 x i8> [[_MSPROP1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
197; CHECK-NEXT:    [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
198; CHECK-NEXT:    store <16 x i8> [[_MSPROP2]], ptr @__msan_retval_tls, align 8
199; CHECK-NEXT:    ret <16 x i8> [[S]]
200;
201  %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
202  %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
203  %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
204  ret <16 x i8> %s
205}
206
207
208define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) sanitize_memory {
209; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask
210; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], i8 [[V:%.*]]) #[[ATTR1]] {
211; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
212; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
213; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
214; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
215; CHECK-NEXT:    [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
216; CHECK-NEXT:    call void @llvm.donothing()
217; CHECK-NEXT:    [[_MSPROP:%.*]] = insertelement <16 x i8> splat (i8 -1), i8 [[TMP1]], i32 0
218; CHECK-NEXT:    [[INS_0:%.*]] = insertelement <16 x i8> poison, i8 [[V]], i32 0
219; CHECK-NEXT:    [[_MSPROP1:%.*]] = insertelement <16 x i8> [[_MSPROP]], i8 [[TMP1]], i32 1
220; CHECK-NEXT:    [[INS_1:%.*]] = insertelement <16 x i8> [[INS_0]], i8 [[V]], i32 1
221; CHECK-NEXT:    [[_MSPROP2:%.*]] = insertelement <16 x i8> [[_MSPROP1]], i8 [[TMP1]], i32 2
222; CHECK-NEXT:    [[INS_2:%.*]] = insertelement <16 x i8> [[INS_1]], i8 [[V]], i32 2
223; CHECK-NEXT:    [[_MSPROP3:%.*]] = insertelement <16 x i8> [[_MSPROP2]], i8 [[TMP1]], i32 3
224; CHECK-NEXT:    [[INS_3:%.*]] = insertelement <16 x i8> [[INS_2]], i8 [[V]], i32 3
225; CHECK-NEXT:    [[_MSPROP4:%.*]] = insertelement <16 x i8> [[_MSPROP3]], i8 [[TMP1]], i32 4
226; CHECK-NEXT:    [[INS_4:%.*]] = insertelement <16 x i8> [[INS_3]], i8 [[V]], i32 4
227; CHECK-NEXT:    [[_MSPROP5:%.*]] = insertelement <16 x i8> [[_MSPROP4]], i8 [[TMP1]], i32 5
228; CHECK-NEXT:    [[INS_5:%.*]] = insertelement <16 x i8> [[INS_4]], i8 [[V]], i32 5
229; CHECK-NEXT:    [[_MSPROP6:%.*]] = insertelement <16 x i8> [[_MSPROP5]], i8 [[TMP1]], i32 6
230; CHECK-NEXT:    [[INS_6:%.*]] = insertelement <16 x i8> [[INS_5]], i8 [[V]], i32 6
231; CHECK-NEXT:    [[_MSPROP7:%.*]] = insertelement <16 x i8> [[_MSPROP6]], i8 [[TMP1]], i32 7
232; CHECK-NEXT:    [[INS_7:%.*]] = insertelement <16 x i8> [[INS_6]], i8 [[V]], i32 7
233; CHECK-NEXT:    [[_MSPROP8:%.*]] = insertelement <16 x i8> [[_MSPROP7]], i8 0, i32 8
234; CHECK-NEXT:    [[INS_8:%.*]] = insertelement <16 x i8> [[INS_7]], i8 -1, i32 8
235; CHECK-NEXT:    [[_MSPROP9:%.*]] = insertelement <16 x i8> [[_MSPROP8]], i8 0, i32 9
236; CHECK-NEXT:    [[INS_9:%.*]] = insertelement <16 x i8> [[INS_8]], i8 -1, i32 9
237; CHECK-NEXT:    [[_MSPROP10:%.*]] = insertelement <16 x i8> [[_MSPROP9]], i8 0, i32 10
238; CHECK-NEXT:    [[INS_10:%.*]] = insertelement <16 x i8> [[INS_9]], i8 -1, i32 10
239; CHECK-NEXT:    [[_MSPROP11:%.*]] = insertelement <16 x i8> [[_MSPROP10]], i8 0, i32 11
240; CHECK-NEXT:    [[INS_11:%.*]] = insertelement <16 x i8> [[INS_10]], i8 -1, i32 11
241; CHECK-NEXT:    [[_MSPROP12:%.*]] = insertelement <16 x i8> [[_MSPROP11]], i8 0, i32 12
242; CHECK-NEXT:    [[INS_12:%.*]] = insertelement <16 x i8> [[INS_11]], i8 -1, i32 12
243; CHECK-NEXT:    [[_MSPROP13:%.*]] = insertelement <16 x i8> [[_MSPROP12]], i8 0, i32 13
244; CHECK-NEXT:    [[INS_13:%.*]] = insertelement <16 x i8> [[INS_12]], i8 -1, i32 13
245; CHECK-NEXT:    [[_MSPROP14:%.*]] = insertelement <16 x i8> [[_MSPROP13]], i8 0, i32 14
246; CHECK-NEXT:    [[INS_14:%.*]] = insertelement <16 x i8> [[INS_13]], i8 -1, i32 14
247; CHECK-NEXT:    [[_MSPROP15:%.*]] = insertelement <16 x i8> [[_MSPROP14]], i8 0, i32 15
248; CHECK-NEXT:    [[INS_15:%.*]] = insertelement <16 x i8> [[INS_14]], i8 -1, i32 15
249; CHECK-NEXT:    [[TMP6:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[INS_15]])
250; CHECK-NEXT:    [[_MSPROP16:%.*]] = or <16 x i8> [[_MSPROP15]], [[TMP6]]
251; CHECK-NEXT:    [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[INS_15]])
252; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP4]], <16 x i8> [[TMP5]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
253; CHECK-NEXT:    [[_MSPROP17:%.*]] = or <16 x i8> zeroinitializer, [[TMP7]]
254; CHECK-NEXT:    [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
255; CHECK-NEXT:    [[_MSPROP18:%.*]] = shufflevector <16 x i8> [[_MSPROP16]], <16 x i8> [[_MSPROP17]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
256; CHECK-NEXT:    [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
257; CHECK-NEXT:    store <16 x i8> [[_MSPROP18]], ptr @__msan_retval_tls, align 8
258; CHECK-NEXT:    ret <16 x i8> [[S]]
259;
260  %ins.0 = insertelement <16 x i8> poison, i8 %v, i32 0
261  %ins.1 = insertelement <16 x i8> %ins.0, i8 %v, i32 1
262  %ins.2 = insertelement <16 x i8> %ins.1, i8 %v, i32 2
263  %ins.3 = insertelement <16 x i8> %ins.2, i8 %v, i32 3
264  %ins.4 = insertelement <16 x i8> %ins.3, i8 %v, i32 4
265  %ins.5 = insertelement <16 x i8> %ins.4, i8 %v, i32 5
266  %ins.6 = insertelement <16 x i8> %ins.5, i8 %v, i32 6
267  %ins.7 = insertelement <16 x i8> %ins.6, i8 %v, i32 7
268  %ins.8 = insertelement <16 x i8> %ins.7, i8 -1, i32 8
269  %ins.9 = insertelement <16 x i8> %ins.8, i8 -1, i32 9
270  %ins.10 = insertelement <16 x i8> %ins.9, i8 -1, i32 10
271  %ins.11 = insertelement <16 x i8> %ins.10, i8 -1, i32 11
272  %ins.12 = insertelement <16 x i8> %ins.11, i8 -1, i32 12
273  %ins.13 = insertelement <16 x i8> %ins.12, i8 -1, i32 13
274  %ins.14 = insertelement <16 x i8> %ins.13, i8 -1, i32 14
275  %ins.15 = insertelement <16 x i8> %ins.14, i8 -1, i32 15
276  %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %ins.15)
277  %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
278  %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
279  ret <16 x i8> %s
280}
281
282
283define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) sanitize_memory {
284; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask2
285; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], i8 [[V:%.*]]) #[[ATTR1]] {
286; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
287; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
288; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
289; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
290; CHECK-NEXT:    [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
291; CHECK-NEXT:    call void @llvm.donothing()
292; CHECK-NEXT:    [[INS_0:%.*]] = insertelement <16 x i8> poison, i8 1, i32 0
293; CHECK-NEXT:    [[INS_1:%.*]] = insertelement <16 x i8> [[INS_0]], i8 1, i32 1
294; CHECK-NEXT:    [[INS_2:%.*]] = insertelement <16 x i8> [[INS_1]], i8 1, i32 2
295; CHECK-NEXT:    [[INS_3:%.*]] = insertelement <16 x i8> [[INS_2]], i8 1, i32 3
296; CHECK-NEXT:    [[INS_4:%.*]] = insertelement <16 x i8> [[INS_3]], i8 1, i32 4
297; CHECK-NEXT:    [[INS_5:%.*]] = insertelement <16 x i8> [[INS_4]], i8 1, i32 5
298; CHECK-NEXT:    [[INS_6:%.*]] = insertelement <16 x i8> [[INS_5]], i8 1, i32 6
299; CHECK-NEXT:    [[INS_7:%.*]] = insertelement <16 x i8> [[INS_6]], i8 1, i32 7
300; CHECK-NEXT:    [[INS_8:%.*]] = insertelement <16 x i8> [[INS_7]], i8 -1, i32 8
301; CHECK-NEXT:    [[INS_9:%.*]] = insertelement <16 x i8> [[INS_8]], i8 -1, i32 9
302; CHECK-NEXT:    [[INS_10:%.*]] = insertelement <16 x i8> [[INS_9]], i8 -1, i32 10
303; CHECK-NEXT:    [[INS_11:%.*]] = insertelement <16 x i8> [[INS_10]], i8 -1, i32 11
304; CHECK-NEXT:    [[_MSPROP:%.*]] = insertelement <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 -1, i8 -1, i8 -1, i8 -1>, i8 [[TMP1]], i32 12
305; CHECK-NEXT:    [[INS_12:%.*]] = insertelement <16 x i8> [[INS_11]], i8 [[V]], i32 12
306; CHECK-NEXT:    [[_MSPROP1:%.*]] = insertelement <16 x i8> [[_MSPROP]], i8 [[TMP1]], i32 13
307; CHECK-NEXT:    [[INS_13:%.*]] = insertelement <16 x i8> [[INS_12]], i8 [[V]], i32 13
308; CHECK-NEXT:    [[_MSPROP2:%.*]] = insertelement <16 x i8> [[_MSPROP1]], i8 0, i32 14
309; CHECK-NEXT:    [[INS_14:%.*]] = insertelement <16 x i8> [[INS_13]], i8 -1, i32 14
310; CHECK-NEXT:    [[_MSPROP3:%.*]] = insertelement <16 x i8> [[_MSPROP2]], i8 [[TMP1]], i32 15
311; CHECK-NEXT:    [[INS_15:%.*]] = insertelement <16 x i8> [[INS_14]], i8 [[V]], i32 15
312; CHECK-NEXT:    [[TMP6:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[INS_15]])
313; CHECK-NEXT:    [[_MSPROP4:%.*]] = or <16 x i8> [[_MSPROP3]], [[TMP6]]
314; CHECK-NEXT:    [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[INS_15]])
315; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP4]], <16 x i8> [[TMP5]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
316; CHECK-NEXT:    [[_MSPROP5:%.*]] = or <16 x i8> zeroinitializer, [[TMP7]]
317; CHECK-NEXT:    [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
318; CHECK-NEXT:    [[_MSPROP6:%.*]] = shufflevector <16 x i8> [[_MSPROP4]], <16 x i8> [[_MSPROP5]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 31>
319; CHECK-NEXT:    [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 31>
320; CHECK-NEXT:    store <16 x i8> [[_MSPROP6]], ptr @__msan_retval_tls, align 8
321; CHECK-NEXT:    ret <16 x i8> [[S]]
322;
323  %ins.0 = insertelement <16 x i8> poison, i8 1, i32 0
324  %ins.1 = insertelement <16 x i8> %ins.0, i8 1, i32 1
325  %ins.2 = insertelement <16 x i8> %ins.1, i8 1, i32 2
326  %ins.3 = insertelement <16 x i8> %ins.2, i8 1, i32 3
327  %ins.4 = insertelement <16 x i8> %ins.3, i8 1, i32 4
328  %ins.5 = insertelement <16 x i8> %ins.4, i8 1, i32 5
329  %ins.6 = insertelement <16 x i8> %ins.5, i8 1, i32 6
330  %ins.7 = insertelement <16 x i8> %ins.6, i8 1, i32 7
331  %ins.8 = insertelement <16 x i8> %ins.7, i8 -1, i32 8
332  %ins.9 = insertelement <16 x i8> %ins.8, i8 -1, i32 9
333  %ins.10 = insertelement <16 x i8> %ins.9, i8 -1, i32 10
334  %ins.11 = insertelement <16 x i8> %ins.10, i8 -1, i32 11
335  %ins.12 = insertelement <16 x i8> %ins.11, i8 %v, i32 12
336  %ins.13 = insertelement <16 x i8> %ins.12, i8 %v, i32 13
337  %ins.14 = insertelement <16 x i8> %ins.13, i8 -1, i32 14
338  %ins.15 = insertelement <16 x i8> %ins.14, i8 %v, i32 15
339  %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %ins.15)
340  %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
341  %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 31>
342  ret <16 x i8> %s
343}
344
345
346
347define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) sanitize_memory {
348; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask
349; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], i8 [[V:%.*]]) #[[ATTR1]] {
350; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
351; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
352; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
353; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
354; CHECK-NEXT:    [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
355; CHECK-NEXT:    call void @llvm.donothing()
356; CHECK-NEXT:    [[_MSPROP:%.*]] = insertelement <16 x i8> splat (i8 -1), i8 [[TMP1]], i32 0
357; CHECK-NEXT:    [[INS_0:%.*]] = insertelement <16 x i8> poison, i8 [[V]], i32 0
358; CHECK-NEXT:    [[_MSPROP1:%.*]] = insertelement <16 x i8> [[_MSPROP]], i8 [[TMP1]], i32 1
359; CHECK-NEXT:    [[INS_1:%.*]] = insertelement <16 x i8> [[INS_0]], i8 [[V]], i32 1
360; CHECK-NEXT:    [[_MSPROP2:%.*]] = insertelement <16 x i8> [[_MSPROP1]], i8 [[TMP1]], i32 2
361; CHECK-NEXT:    [[INS_2:%.*]] = insertelement <16 x i8> [[INS_1]], i8 [[V]], i32 2
362; CHECK-NEXT:    [[_MSPROP3:%.*]] = insertelement <16 x i8> [[_MSPROP2]], i8 [[TMP1]], i32 3
363; CHECK-NEXT:    [[INS_3:%.*]] = insertelement <16 x i8> [[INS_2]], i8 [[V]], i32 3
364; CHECK-NEXT:    [[_MSPROP4:%.*]] = insertelement <16 x i8> [[_MSPROP3]], i8 [[TMP1]], i32 4
365; CHECK-NEXT:    [[INS_4:%.*]] = insertelement <16 x i8> [[INS_3]], i8 [[V]], i32 4
366; CHECK-NEXT:    [[_MSPROP5:%.*]] = insertelement <16 x i8> [[_MSPROP4]], i8 [[TMP1]], i32 5
367; CHECK-NEXT:    [[INS_5:%.*]] = insertelement <16 x i8> [[INS_4]], i8 [[V]], i32 5
368; CHECK-NEXT:    [[_MSPROP6:%.*]] = insertelement <16 x i8> [[_MSPROP5]], i8 [[TMP1]], i32 6
369; CHECK-NEXT:    [[INS_6:%.*]] = insertelement <16 x i8> [[INS_5]], i8 [[V]], i32 6
370; CHECK-NEXT:    [[_MSPROP7:%.*]] = insertelement <16 x i8> [[_MSPROP6]], i8 [[TMP1]], i32 7
371; CHECK-NEXT:    [[INS_7:%.*]] = insertelement <16 x i8> [[INS_6]], i8 [[V]], i32 7
372; CHECK-NEXT:    [[_MSPROP8:%.*]] = insertelement <16 x i8> [[_MSPROP7]], i8 0, i32 8
373; CHECK-NEXT:    [[INS_8:%.*]] = insertelement <16 x i8> [[INS_7]], i8 -1, i32 8
374; CHECK-NEXT:    [[_MSPROP9:%.*]] = insertelement <16 x i8> [[_MSPROP8]], i8 0, i32 9
375; CHECK-NEXT:    [[INS_9:%.*]] = insertelement <16 x i8> [[INS_8]], i8 -1, i32 9
376; CHECK-NEXT:    [[_MSPROP10:%.*]] = insertelement <16 x i8> [[_MSPROP9]], i8 0, i32 10
377; CHECK-NEXT:    [[INS_10:%.*]] = insertelement <16 x i8> [[INS_9]], i8 -1, i32 10
378; CHECK-NEXT:    [[_MSPROP11:%.*]] = insertelement <16 x i8> [[_MSPROP10]], i8 0, i32 11
379; CHECK-NEXT:    [[INS_11:%.*]] = insertelement <16 x i8> [[INS_10]], i8 -1, i32 11
380; CHECK-NEXT:    [[_MSPROP12:%.*]] = insertelement <16 x i8> [[_MSPROP11]], i8 0, i32 12
381; CHECK-NEXT:    [[INS_12:%.*]] = insertelement <16 x i8> [[INS_11]], i8 -1, i32 12
382; CHECK-NEXT:    [[_MSPROP13:%.*]] = insertelement <16 x i8> [[_MSPROP12]], i8 0, i32 13
383; CHECK-NEXT:    [[INS_13:%.*]] = insertelement <16 x i8> [[INS_12]], i8 -1, i32 13
384; CHECK-NEXT:    [[_MSPROP14:%.*]] = insertelement <16 x i8> [[_MSPROP13]], i8 0, i32 14
385; CHECK-NEXT:    [[INS_14:%.*]] = insertelement <16 x i8> [[INS_13]], i8 -1, i32 14
386; CHECK-NEXT:    [[_MSPROP15:%.*]] = insertelement <16 x i8> [[_MSPROP14]], i8 0, i32 15
387; CHECK-NEXT:    [[INS_15:%.*]] = insertelement <16 x i8> [[INS_14]], i8 -1, i32 15
388; CHECK-NEXT:    [[TMP6:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
389; CHECK-NEXT:    [[_MSPROP16:%.*]] = or <16 x i8> zeroinitializer, [[TMP6]]
390; CHECK-NEXT:    [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
391; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP4]], <16 x i8> [[TMP5]], <16 x i8> [[INS_15]])
392; CHECK-NEXT:    [[_MSPROP17:%.*]] = or <16 x i8> [[_MSPROP15]], [[TMP7]]
393; CHECK-NEXT:    [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[INS_15]])
394; CHECK-NEXT:    [[_MSPROP18:%.*]] = shufflevector <16 x i8> [[_MSPROP16]], <16 x i8> [[_MSPROP17]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
395; CHECK-NEXT:    [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
396; CHECK-NEXT:    store <16 x i8> [[_MSPROP18]], ptr @__msan_retval_tls, align 8
397; CHECK-NEXT:    ret <16 x i8> [[S]]
398;
399  %ins.0 = insertelement <16 x i8> poison, i8 %v, i32 0
400  %ins.1 = insertelement <16 x i8> %ins.0, i8 %v, i32 1
401  %ins.2 = insertelement <16 x i8> %ins.1, i8 %v, i32 2
402  %ins.3 = insertelement <16 x i8> %ins.2, i8 %v, i32 3
403  %ins.4 = insertelement <16 x i8> %ins.3, i8 %v, i32 4
404  %ins.5 = insertelement <16 x i8> %ins.4, i8 %v, i32 5
405  %ins.6 = insertelement <16 x i8> %ins.5, i8 %v, i32 6
406  %ins.7 = insertelement <16 x i8> %ins.6, i8 %v, i32 7
407  %ins.8 = insertelement <16 x i8> %ins.7, i8 -1, i32 8
408  %ins.9 = insertelement <16 x i8> %ins.8, i8 -1, i32 9
409  %ins.10 = insertelement <16 x i8> %ins.9, i8 -1, i32 10
410  %ins.11 = insertelement <16 x i8> %ins.10, i8 -1, i32 11
411  %ins.12 = insertelement <16 x i8> %ins.11, i8 -1, i32 12
412  %ins.13 = insertelement <16 x i8> %ins.12, i8 -1, i32 13
413  %ins.14 = insertelement <16 x i8> %ins.13, i8 -1, i32 14
414  %ins.15 = insertelement <16 x i8> %ins.14, i8 -1, i32 15
415  %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
416  %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %ins.15)
417  %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
418  ret <16 x i8> %s
419}
420
421
422
423define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) sanitize_memory {
424; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask2
425; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], i8 [[V:%.*]]) #[[ATTR1]] {
426; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
427; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
428; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
429; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
430; CHECK-NEXT:    [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
431; CHECK-NEXT:    call void @llvm.donothing()
432; CHECK-NEXT:    [[_MSPROP:%.*]] = insertelement <16 x i8> splat (i8 -1), i8 [[TMP1]], i32 0
433; CHECK-NEXT:    [[INS_0:%.*]] = insertelement <16 x i8> poison, i8 [[V]], i32 0
434; CHECK-NEXT:    [[_MSPROP1:%.*]] = insertelement <16 x i8> [[_MSPROP]], i8 [[TMP1]], i32 1
435; CHECK-NEXT:    [[INS_1:%.*]] = insertelement <16 x i8> [[INS_0]], i8 [[V]], i32 1
436; CHECK-NEXT:    [[_MSPROP2:%.*]] = insertelement <16 x i8> [[_MSPROP1]], i8 [[TMP1]], i32 2
437; CHECK-NEXT:    [[INS_2:%.*]] = insertelement <16 x i8> [[INS_1]], i8 [[V]], i32 2
438; CHECK-NEXT:    [[_MSPROP3:%.*]] = insertelement <16 x i8> [[_MSPROP2]], i8 [[TMP1]], i32 3
439; CHECK-NEXT:    [[INS_3:%.*]] = insertelement <16 x i8> [[INS_2]], i8 [[V]], i32 3
440; CHECK-NEXT:    [[_MSPROP4:%.*]] = insertelement <16 x i8> [[_MSPROP3]], i8 [[TMP1]], i32 4
441; CHECK-NEXT:    [[INS_4:%.*]] = insertelement <16 x i8> [[INS_3]], i8 [[V]], i32 4
442; CHECK-NEXT:    [[_MSPROP5:%.*]] = insertelement <16 x i8> [[_MSPROP4]], i8 [[TMP1]], i32 5
443; CHECK-NEXT:    [[INS_5:%.*]] = insertelement <16 x i8> [[INS_4]], i8 [[V]], i32 5
444; CHECK-NEXT:    [[_MSPROP6:%.*]] = insertelement <16 x i8> [[_MSPROP5]], i8 [[TMP1]], i32 6
445; CHECK-NEXT:    [[INS_6:%.*]] = insertelement <16 x i8> [[INS_5]], i8 [[V]], i32 6
446; CHECK-NEXT:    [[_MSPROP7:%.*]] = insertelement <16 x i8> [[_MSPROP6]], i8 [[TMP1]], i32 7
447; CHECK-NEXT:    [[INS_7:%.*]] = insertelement <16 x i8> [[INS_6]], i8 [[V]], i32 7
448; CHECK-NEXT:    [[_MSPROP8:%.*]] = insertelement <16 x i8> [[_MSPROP7]], i8 0, i32 8
449; CHECK-NEXT:    [[INS_8:%.*]] = insertelement <16 x i8> [[INS_7]], i8 -1, i32 8
450; CHECK-NEXT:    [[_MSPROP9:%.*]] = insertelement <16 x i8> [[_MSPROP8]], i8 0, i32 9
451; CHECK-NEXT:    [[INS_9:%.*]] = insertelement <16 x i8> [[INS_8]], i8 -1, i32 9
452; CHECK-NEXT:    [[_MSPROP10:%.*]] = insertelement <16 x i8> [[_MSPROP9]], i8 0, i32 10
453; CHECK-NEXT:    [[INS_10:%.*]] = insertelement <16 x i8> [[INS_9]], i8 -1, i32 10
454; CHECK-NEXT:    [[_MSPROP11:%.*]] = insertelement <16 x i8> [[_MSPROP10]], i8 0, i32 11
455; CHECK-NEXT:    [[INS_11:%.*]] = insertelement <16 x i8> [[INS_10]], i8 -1, i32 11
456; CHECK-NEXT:    [[_MSPROP12:%.*]] = insertelement <16 x i8> [[_MSPROP11]], i8 0, i32 12
457; CHECK-NEXT:    [[INS_12:%.*]] = insertelement <16 x i8> [[INS_11]], i8 -1, i32 12
458; CHECK-NEXT:    [[_MSPROP13:%.*]] = insertelement <16 x i8> [[_MSPROP12]], i8 0, i32 13
459; CHECK-NEXT:    [[INS_13:%.*]] = insertelement <16 x i8> [[INS_12]], i8 -1, i32 13
460; CHECK-NEXT:    [[_MSPROP14:%.*]] = insertelement <16 x i8> [[_MSPROP13]], i8 [[TMP1]], i32 14
461; CHECK-NEXT:    [[INS_14:%.*]] = insertelement <16 x i8> [[INS_13]], i8 [[V]], i32 14
462; CHECK-NEXT:    [[_MSPROP15:%.*]] = insertelement <16 x i8> [[_MSPROP14]], i8 [[TMP1]], i32 15
463; CHECK-NEXT:    [[INS_15:%.*]] = insertelement <16 x i8> [[INS_14]], i8 [[V]], i32 15
464; CHECK-NEXT:    [[TMP6:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
465; CHECK-NEXT:    [[_MSPROP16:%.*]] = or <16 x i8> zeroinitializer, [[TMP6]]
466; CHECK-NEXT:    [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
467; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP4]], <16 x i8> [[TMP5]], <16 x i8> [[INS_15]])
468; CHECK-NEXT:    [[_MSPROP17:%.*]] = or <16 x i8> [[_MSPROP15]], [[TMP7]]
469; CHECK-NEXT:    [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[INS_15]])
470; CHECK-NEXT:    [[_MSPROP18:%.*]] = shufflevector <16 x i8> [[_MSPROP16]], <16 x i8> [[_MSPROP17]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 30, i32 31>
471; CHECK-NEXT:    [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 30, i32 31>
472; CHECK-NEXT:    store <16 x i8> [[_MSPROP18]], ptr @__msan_retval_tls, align 8
473; CHECK-NEXT:    ret <16 x i8> [[S]]
474;
475  %ins.0 = insertelement <16 x i8> poison, i8 %v, i32 0
476  %ins.1 = insertelement <16 x i8> %ins.0, i8 %v, i32 1
477  %ins.2 = insertelement <16 x i8> %ins.1, i8 %v, i32 2
478  %ins.3 = insertelement <16 x i8> %ins.2, i8 %v, i32 3
479  %ins.4 = insertelement <16 x i8> %ins.3, i8 %v, i32 4
480  %ins.5 = insertelement <16 x i8> %ins.4, i8 %v, i32 5
481  %ins.6 = insertelement <16 x i8> %ins.5, i8 %v, i32 6
482  %ins.7 = insertelement <16 x i8> %ins.6, i8 %v, i32 7
483  %ins.8 = insertelement <16 x i8> %ins.7, i8 -1, i32 8
484  %ins.9 = insertelement <16 x i8> %ins.8, i8 -1, i32 9
485  %ins.10 = insertelement <16 x i8> %ins.9, i8 -1, i32 10
486  %ins.11 = insertelement <16 x i8> %ins.10, i8 -1, i32 11
487  %ins.12 = insertelement <16 x i8> %ins.11, i8 -1, i32 12
488  %ins.13 = insertelement <16 x i8> %ins.12, i8 -1, i32 13
489  %ins.14 = insertelement <16 x i8> %ins.13, i8 %v, i32 14
490  %ins.15 = insertelement <16 x i8> %ins.14, i8 %v, i32 15
491  %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
492  %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %ins.15)
493  %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 30, i32 31>
494  ret <16 x i8> %s
495}
496
497
498
499define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_shuffle(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) sanitize_memory {
500; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_shuffle
501; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) #[[ATTR1]] {
502; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
503; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
504; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
505; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
506; CHECK-NEXT:    call void @llvm.donothing()
507; CHECK-NEXT:    [[TMP5:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
508; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> zeroinitializer, [[TMP5]]
509; CHECK-NEXT:    [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
510; CHECK-NEXT:    [[TMP6:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
511; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <16 x i8> zeroinitializer, [[TMP6]]
512; CHECK-NEXT:    [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
513; CHECK-NEXT:    [[_MSPROP2:%.*]] = shufflevector <16 x i8> [[_MSPROP]], <16 x i8> [[_MSPROP1]], <16 x i32> <i32 0, i32 1, i32 21, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
514; CHECK-NEXT:    [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> <i32 0, i32 1, i32 21, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
515; CHECK-NEXT:    store <16 x i8> [[_MSPROP2]], ptr @__msan_retval_tls, align 8
516; CHECK-NEXT:    ret <16 x i8> [[S]]
517;
518  %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
519  %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
520  %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 21, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
521  ret <16 x i8> %s
522}
523
524
525
526define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask1(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) sanitize_memory {
527; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask1
528; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) #[[ATTR1]] {
529; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
530; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
531; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
532; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
533; CHECK-NEXT:    call void @llvm.donothing()
534; CHECK-NEXT:    [[TMP5:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
535; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> zeroinitializer, [[TMP5]]
536; CHECK-NEXT:    [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
537; CHECK-NEXT:    [[TMP6:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
538; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <16 x i8> zeroinitializer, [[TMP6]]
539; CHECK-NEXT:    [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
540; CHECK-NEXT:    [[_MSPROP2:%.*]] = shufflevector <16 x i8> [[_MSPROP]], <16 x i8> [[_MSPROP1]], <16 x i32> <i32 0, i32 1, i32 21, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
541; CHECK-NEXT:    [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> <i32 0, i32 1, i32 21, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
542; CHECK-NEXT:    store <16 x i8> [[_MSPROP2]], ptr @__msan_retval_tls, align 8
543; CHECK-NEXT:    ret <16 x i8> [[S]]
544;
545  %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
546  %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
547  %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 21, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
548  ret <16 x i8> %s
549}
550
551
552
553define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) sanitize_memory {
554; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask2
555; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) #[[ATTR1]] {
556; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
557; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
558; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
559; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
560; CHECK-NEXT:    call void @llvm.donothing()
561; CHECK-NEXT:    [[TMP5:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
562; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> zeroinitializer, [[TMP5]]
563; CHECK-NEXT:    [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
564; CHECK-NEXT:    [[TMP6:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
565; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <16 x i8> zeroinitializer, [[TMP6]]
566; CHECK-NEXT:    [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
567; CHECK-NEXT:    [[_MSPROP2:%.*]] = shufflevector <16 x i8> [[_MSPROP]], <16 x i8> [[_MSPROP1]], <16 x i32> <i32 0, i32 1, i32 21, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
568; CHECK-NEXT:    [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> <i32 0, i32 1, i32 21, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
569; CHECK-NEXT:    store <16 x i8> [[_MSPROP2]], ptr @__msan_retval_tls, align 8
570; CHECK-NEXT:    ret <16 x i8> [[S]]
571;
572  %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
573  %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
574  %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 21, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
575  ret <16 x i8> %s
576}
577
578declare <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8>, <8 x i8>) nounwind sanitize_memory readnone
579declare <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8>, <16 x i8>) nounwind sanitize_memory readnone
580declare <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8>, <16 x i8>, <8 x i8>) nounwind sanitize_memory readnone
581declare <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind sanitize_memory readnone
582declare <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind sanitize_memory readnone
583declare <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind sanitize_memory readnone
584declare <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind sanitize_memory readnone
585declare <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind sanitize_memory readnone
586
587define <8 x i8> @tbx1_8b(<8 x i8> %A, <16 x i8> %B, <8 x i8> %C) nounwind sanitize_memory {
588; CHECK-LABEL: define <8 x i8> @tbx1_8b
589; CHECK-SAME: (<8 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <8 x i8> [[C:%.*]]) #[[ATTR0]] {
590; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8
591; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
592; CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
593; CHECK-NEXT:    call void @llvm.donothing()
594; CHECK-NEXT:    [[TMP4:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> [[C]])
595; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i8> [[TMP3]], [[TMP4]]
596; CHECK-NEXT:    [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> [[A]], <16 x i8> [[B]], <8 x i8> [[C]])
597; CHECK-NEXT:    store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
598; CHECK-NEXT:    ret <8 x i8> [[OUT]]
599;
600  %out = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %A, <16 x i8> %B, <8 x i8> %C)
601  ret <8 x i8> %out
602}
603
604define <16 x i8> @tbx1_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) nounwind sanitize_memory {
605; CHECK-LABEL: define <16 x i8> @tbx1_16b
606; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]]) #[[ATTR0]] {
607; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
608; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
609; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
610; CHECK-NEXT:    call void @llvm.donothing()
611; CHECK-NEXT:    [[TMP4:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[C]])
612; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> [[TMP3]], [[TMP4]]
613; CHECK-NEXT:    [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]])
614; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
615; CHECK-NEXT:    ret <16 x i8> [[OUT]]
616;
617  %out = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C)
618  ret <16 x i8> %out
619}
620
621define <8 x i8> @tbx2_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) sanitize_memory {
622; CHECK-LABEL: define <8 x i8> @tbx2_8b
623; CHECK-SAME: (<8 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <8 x i8> [[D:%.*]]) #[[ATTR1]] {
624; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8
625; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
626; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
627; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
628; CHECK-NEXT:    call void @llvm.donothing()
629; CHECK-NEXT:    [[TMP5:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> [[D]])
630; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i8> [[TMP4]], [[TMP5]]
631; CHECK-NEXT:    [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <8 x i8> [[D]])
632; CHECK-NEXT:    store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
633; CHECK-NEXT:    ret <8 x i8> [[OUT]]
634;
635  %out = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D)
636  ret <8 x i8> %out
637}
638
639define <16 x i8> @tbx2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) sanitize_memory {
640; CHECK-LABEL: define <16 x i8> @tbx2_16b
641; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) #[[ATTR1]] {
642; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
643; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
644; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
645; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
646; CHECK-NEXT:    call void @llvm.donothing()
647; CHECK-NEXT:    [[TMP5:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[D]])
648; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> [[TMP4]], [[TMP5]]
649; CHECK-NEXT:    [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]])
650; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
651; CHECK-NEXT:    ret <16 x i8> [[OUT]]
652;
653  %out = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D)
654  ret <16 x i8> %out
655}
656
657define <8 x i8> @tbx3_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) sanitize_memory {
658; CHECK-LABEL: define <8 x i8> @tbx3_8b
659; CHECK-SAME: (<8 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <8 x i8> [[E:%.*]]) #[[ATTR1]] {
660; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8
661; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
662; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
663; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
664; CHECK-NEXT:    [[TMP5:%.*]] = load <8 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 56) to ptr), align 8
665; CHECK-NEXT:    call void @llvm.donothing()
666; CHECK-NEXT:    [[TMP6:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> [[E]])
667; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i8> [[TMP5]], [[TMP6]]
668; CHECK-NEXT:    [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]], <8 x i8> [[E]])
669; CHECK-NEXT:    store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
670; CHECK-NEXT:    ret <8 x i8> [[OUT]]
671;
672  %out = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(< 8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E)
673  ret <8 x i8> %out
674}
675
676define <16 x i8> @tbx3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) sanitize_memory {
677; CHECK-LABEL: define <16 x i8> @tbx3_16b
678; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <16 x i8> [[E:%.*]]) #[[ATTR1]] {
679; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
680; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
681; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
682; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
683; CHECK-NEXT:    [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
684; CHECK-NEXT:    call void @llvm.donothing()
685; CHECK-NEXT:    [[TMP6:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[E]])
686; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> [[TMP5]], [[TMP6]]
687; CHECK-NEXT:    [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> [[E]])
688; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
689; CHECK-NEXT:    ret <16 x i8> [[OUT]]
690;
691  %out = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E)
692  ret <16 x i8> %out
693}
694
695define <8 x i8> @tbx4_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <8 x i8> %F) sanitize_memory {
696; CHECK-LABEL: define <8 x i8> @tbx4_8b
697; CHECK-SAME: (<8 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <16 x i8> [[E:%.*]], <8 x i8> [[F:%.*]]) #[[ATTR1]] {
698; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8
699; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
700; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
701; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
702; CHECK-NEXT:    [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 56) to ptr), align 8
703; CHECK-NEXT:    [[TMP6:%.*]] = load <8 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8
704; CHECK-NEXT:    call void @llvm.donothing()
705; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], <8 x i8> [[F]])
706; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i8> [[TMP6]], [[TMP7]]
707; CHECK-NEXT:    [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> [[E]], <8 x i8> [[F]])
708; CHECK-NEXT:    store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
709; CHECK-NEXT:    ret <8 x i8> [[OUT]]
710;
711  %out = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <8 x i8> %F)
712  ret <8 x i8> %out
713}
714
715define <16 x i8> @tbx4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <16 x i8> %F) sanitize_memory {
716; CHECK-LABEL: define <16 x i8> @tbx4_16b
717; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <16 x i8> [[E:%.*]], <16 x i8> [[F:%.*]]) #[[ATTR1]] {
718; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
719; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
720; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
721; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
722; CHECK-NEXT:    [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
723; CHECK-NEXT:    [[TMP6:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8
724; CHECK-NEXT:    call void @llvm.donothing()
725; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], <16 x i8> [[F]])
726; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> [[TMP6]], [[TMP7]]
727; CHECK-NEXT:    [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> [[E]], <16 x i8> [[F]])
728; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
729; CHECK-NEXT:    ret <16 x i8> [[OUT]]
730;
731  %out = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <16 x i8> %F)
732  ret <16 x i8> %out
733}
734
735declare <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8>, <16 x i8>, <8 x i8>) nounwind sanitize_memory readnone
736declare <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind sanitize_memory readnone
737declare <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind sanitize_memory readnone
738declare <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind sanitize_memory readnone
739declare <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind sanitize_memory readnone
740declare <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind sanitize_memory readnone
741declare <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind sanitize_memory readnone
742declare <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind sanitize_memory readnone
743