1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool build/bin/opt --version 2 2; Test memory sanitizer instrumentation for Arm NEON tbl instructions. 3; 4; RUN: opt < %s -passes=msan -S | FileCheck %s 5; 6; Forked from llvm/test/CodeGen/AArch64/arm64-tbl.ll 7 8target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" 9target triple = "aarch64--linux-android9001" 10 11; ----------------------------------------------------------------------------------------------------------------------------------------------- 12 13define <8 x i8> @tbl1_8b(<16 x i8> %A, <8 x i8> %B) nounwind sanitize_memory { 14; CHECK-LABEL: define <8 x i8> @tbl1_8b 15; CHECK-SAME: (<16 x i8> [[A:%.*]], <8 x i8> [[B:%.*]]) #[[ATTR0:[0-9]+]] { 16; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 17; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 18; CHECK-NEXT: call void @llvm.donothing() 19; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[TMP1]], <8 x i8> [[B]]) 20; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[TMP2]], [[TMP3]] 21; CHECK-NEXT: [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[A]], <8 x i8> [[B]]) 22; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 23; CHECK-NEXT: ret <8 x i8> [[OUT]] 24; 25 %out = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %A, <8 x i8> %B) 26 ret <8 x i8> %out 27} 28 29define <16 x i8> @tbl1_16b(<16 x i8> %A, <16 x i8> %B) nounwind sanitize_memory { 30; CHECK-LABEL: define <16 x i8> @tbl1_16b 31; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]]) #[[ATTR0]] { 32; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 33; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 34; CHECK-NEXT: call void @llvm.donothing() 35; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[B]]) 36; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP2]], [[TMP3]] 37; CHECK-NEXT: [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> [[A]], <16 x i8> [[B]]) 38; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 39; CHECK-NEXT: ret <16 x i8> [[OUT]] 40; 41 %out = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> %A, <16 x i8> %B) 42 ret <16 x i8> %out 43} 44 45define <8 x i8> @tbl2_8b(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C) sanitize_memory { 46; CHECK-LABEL: define <8 x i8> @tbl2_8b 47; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <8 x i8> [[C:%.*]]) #[[ATTR1:[0-9]+]] { 48; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 49; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 50; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 51; CHECK-NEXT: call void @llvm.donothing() 52; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> [[C]]) 53; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[TMP3]], [[TMP4]] 54; CHECK-NEXT: [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[A]], <16 x i8> [[B]], <8 x i8> [[C]]) 55; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 56; CHECK-NEXT: ret <8 x i8> [[OUT]] 57; 58 %out = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C) 59 ret <8 x i8> %out 60} 61 62define <16 x i8> @tbl2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) sanitize_memory { 63; CHECK-LABEL: define <16 x i8> @tbl2_16b 64; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]]) #[[ATTR1]] { 65; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 66; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 67; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 68; CHECK-NEXT: call void @llvm.donothing() 69; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[C]]) 70; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP3]], [[TMP4]] 71; CHECK-NEXT: [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]]) 72; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 73; CHECK-NEXT: ret <16 x i8> [[OUT]] 74; 75 %out = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) 76 ret <16 x i8> %out 77} 78 79define <8 x i8> @tbl3_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) sanitize_memory { 80; CHECK-LABEL: define <8 x i8> @tbl3_8b 81; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <8 x i8> [[D:%.*]]) #[[ATTR1]] { 82; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 83; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 84; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 85; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 86; CHECK-NEXT: call void @llvm.donothing() 87; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> [[D]]) 88; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[TMP4]], [[TMP5]] 89; CHECK-NEXT: [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <8 x i8> [[D]]) 90; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 91; CHECK-NEXT: ret <8 x i8> [[OUT]] 92; 93 %out = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) 94 ret <8 x i8> %out 95} 96 97define <16 x i8> @tbl3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) sanitize_memory { 98; CHECK-LABEL: define <16 x i8> @tbl3_16b 99; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) #[[ATTR1]] { 100; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 101; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 102; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 103; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 104; CHECK-NEXT: call void @llvm.donothing() 105; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[D]]) 106; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP4]], [[TMP5]] 107; CHECK-NEXT: [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]]) 108; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 109; CHECK-NEXT: ret <16 x i8> [[OUT]] 110; 111 %out = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) 112 ret <16 x i8> %out 113} 114 115define <8 x i8> @tbl4_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) sanitize_memory { 116; CHECK-LABEL: define <8 x i8> @tbl4_8b 117; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <8 x i8> [[E:%.*]]) #[[ATTR1]] { 118; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 119; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 120; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 121; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 122; CHECK-NEXT: [[TMP5:%.*]] = load <8 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 123; CHECK-NEXT: call void @llvm.donothing() 124; CHECK-NEXT: [[TMP6:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> [[E]]) 125; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[TMP5]], [[TMP6]] 126; CHECK-NEXT: [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]], <8 x i8> [[E]]) 127; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 128; CHECK-NEXT: ret <8 x i8> [[OUT]] 129; 130 %out = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) 131 ret <8 x i8> %out 132} 133 134define <16 x i8> @tbl4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) sanitize_memory { 135; CHECK-LABEL: define <16 x i8> @tbl4_16b 136; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <16 x i8> [[E:%.*]]) #[[ATTR1]] { 137; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 138; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 139; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 140; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 141; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 142; CHECK-NEXT: call void @llvm.donothing() 143; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[E]]) 144; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP5]], [[TMP6]] 145; CHECK-NEXT: [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> [[E]]) 146; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 147; CHECK-NEXT: ret <16 x i8> [[OUT]] 148; 149 %out = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) 150 ret <16 x i8> %out 151} 152 153 154 155define <8 x i8> @shuffled_tbl2_to_tbl4_v8i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) sanitize_memory { 156; CHECK-LABEL: define <8 x i8> @shuffled_tbl2_to_tbl4_v8i8 157; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) #[[ATTR1]] { 158; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 159; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 160; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 161; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 162; CHECK-NEXT: call void @llvm.donothing() 163; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>) 164; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> zeroinitializer, [[TMP5]] 165; CHECK-NEXT: [[T1:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[A]], <16 x i8> [[B]], <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>) 166; CHECK-NEXT: [[TMP6:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>) 167; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i8> zeroinitializer, [[TMP6]] 168; CHECK-NEXT: [[T2:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[C]], <16 x i8> [[D]], <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>) 169; CHECK-NEXT: [[_MSPROP2:%.*]] = shufflevector <8 x i8> [[_MSPROP]], <8 x i8> [[_MSPROP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15> 170; CHECK-NEXT: [[S:%.*]] = shufflevector <8 x i8> [[T1]], <8 x i8> [[T2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15> 171; CHECK-NEXT: store <8 x i8> [[_MSPROP2]], ptr @__msan_retval_tls, align 8 172; CHECK-NEXT: ret <8 x i8> [[S]] 173; 174 %t1 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %a, <16 x i8> %b, <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>) 175 %t2 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %c, <16 x i8> %d, <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>) 176 %s = shufflevector <8 x i8> %t1, <8 x i8> %t2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15> 177 ret <8 x i8> %s 178} 179 180 181 182define <16 x i8> @shuffled_tbl2_to_tbl4(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) sanitize_memory { 183; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4 184; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) #[[ATTR1]] { 185; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 186; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 187; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 188; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 189; CHECK-NEXT: call void @llvm.donothing() 190; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 191; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> zeroinitializer, [[TMP5]] 192; CHECK-NEXT: [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 193; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 194; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i8> zeroinitializer, [[TMP6]] 195; CHECK-NEXT: [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 196; CHECK-NEXT: [[_MSPROP2:%.*]] = shufflevector <16 x i8> [[_MSPROP]], <16 x i8> [[_MSPROP1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> 197; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> 198; CHECK-NEXT: store <16 x i8> [[_MSPROP2]], ptr @__msan_retval_tls, align 8 199; CHECK-NEXT: ret <16 x i8> [[S]] 200; 201 %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 202 %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 203 %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> 204 ret <16 x i8> %s 205} 206 207 208define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) sanitize_memory { 209; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask 210; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], i8 [[V:%.*]]) #[[ATTR1]] { 211; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 212; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 213; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 214; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 215; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 216; CHECK-NEXT: call void @llvm.donothing() 217; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i8> splat (i8 -1), i8 [[TMP1]], i32 0 218; CHECK-NEXT: [[INS_0:%.*]] = insertelement <16 x i8> poison, i8 [[V]], i32 0 219; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <16 x i8> [[_MSPROP]], i8 [[TMP1]], i32 1 220; CHECK-NEXT: [[INS_1:%.*]] = insertelement <16 x i8> [[INS_0]], i8 [[V]], i32 1 221; CHECK-NEXT: [[_MSPROP2:%.*]] = insertelement <16 x i8> [[_MSPROP1]], i8 [[TMP1]], i32 2 222; CHECK-NEXT: [[INS_2:%.*]] = insertelement <16 x i8> [[INS_1]], i8 [[V]], i32 2 223; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <16 x i8> [[_MSPROP2]], i8 [[TMP1]], i32 3 224; CHECK-NEXT: [[INS_3:%.*]] = insertelement <16 x i8> [[INS_2]], i8 [[V]], i32 3 225; CHECK-NEXT: [[_MSPROP4:%.*]] = insertelement <16 x i8> [[_MSPROP3]], i8 [[TMP1]], i32 4 226; CHECK-NEXT: [[INS_4:%.*]] = insertelement <16 x i8> [[INS_3]], i8 [[V]], i32 4 227; CHECK-NEXT: [[_MSPROP5:%.*]] = insertelement <16 x i8> [[_MSPROP4]], i8 [[TMP1]], i32 5 228; CHECK-NEXT: [[INS_5:%.*]] = insertelement <16 x i8> [[INS_4]], i8 [[V]], i32 5 229; CHECK-NEXT: [[_MSPROP6:%.*]] = insertelement <16 x i8> [[_MSPROP5]], i8 [[TMP1]], i32 6 230; CHECK-NEXT: [[INS_6:%.*]] = insertelement <16 x i8> [[INS_5]], i8 [[V]], i32 6 231; CHECK-NEXT: [[_MSPROP7:%.*]] = insertelement <16 x i8> [[_MSPROP6]], i8 [[TMP1]], i32 7 232; CHECK-NEXT: [[INS_7:%.*]] = insertelement <16 x i8> [[INS_6]], i8 [[V]], i32 7 233; CHECK-NEXT: [[_MSPROP8:%.*]] = insertelement <16 x i8> [[_MSPROP7]], i8 0, i32 8 234; CHECK-NEXT: [[INS_8:%.*]] = insertelement <16 x i8> [[INS_7]], i8 -1, i32 8 235; CHECK-NEXT: [[_MSPROP9:%.*]] = insertelement <16 x i8> [[_MSPROP8]], i8 0, i32 9 236; CHECK-NEXT: [[INS_9:%.*]] = insertelement <16 x i8> [[INS_8]], i8 -1, i32 9 237; CHECK-NEXT: [[_MSPROP10:%.*]] = insertelement <16 x i8> [[_MSPROP9]], i8 0, i32 10 238; CHECK-NEXT: [[INS_10:%.*]] = insertelement <16 x i8> [[INS_9]], i8 -1, i32 10 239; CHECK-NEXT: [[_MSPROP11:%.*]] = insertelement <16 x i8> [[_MSPROP10]], i8 0, i32 11 240; CHECK-NEXT: [[INS_11:%.*]] = insertelement <16 x i8> [[INS_10]], i8 -1, i32 11 241; CHECK-NEXT: [[_MSPROP12:%.*]] = insertelement <16 x i8> [[_MSPROP11]], i8 0, i32 12 242; CHECK-NEXT: [[INS_12:%.*]] = insertelement <16 x i8> [[INS_11]], i8 -1, i32 12 243; CHECK-NEXT: [[_MSPROP13:%.*]] = insertelement <16 x i8> [[_MSPROP12]], i8 0, i32 13 244; CHECK-NEXT: [[INS_13:%.*]] = insertelement <16 x i8> [[INS_12]], i8 -1, i32 13 245; CHECK-NEXT: [[_MSPROP14:%.*]] = insertelement <16 x i8> [[_MSPROP13]], i8 0, i32 14 246; CHECK-NEXT: [[INS_14:%.*]] = insertelement <16 x i8> [[INS_13]], i8 -1, i32 14 247; CHECK-NEXT: [[_MSPROP15:%.*]] = insertelement <16 x i8> [[_MSPROP14]], i8 0, i32 15 248; CHECK-NEXT: [[INS_15:%.*]] = insertelement <16 x i8> [[INS_14]], i8 -1, i32 15 249; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[INS_15]]) 250; CHECK-NEXT: [[_MSPROP16:%.*]] = or <16 x i8> [[_MSPROP15]], [[TMP6]] 251; CHECK-NEXT: [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[INS_15]]) 252; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP4]], <16 x i8> [[TMP5]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 253; CHECK-NEXT: [[_MSPROP17:%.*]] = or <16 x i8> zeroinitializer, [[TMP7]] 254; CHECK-NEXT: [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 255; CHECK-NEXT: [[_MSPROP18:%.*]] = shufflevector <16 x i8> [[_MSPROP16]], <16 x i8> [[_MSPROP17]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> 256; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> 257; CHECK-NEXT: store <16 x i8> [[_MSPROP18]], ptr @__msan_retval_tls, align 8 258; CHECK-NEXT: ret <16 x i8> [[S]] 259; 260 %ins.0 = insertelement <16 x i8> poison, i8 %v, i32 0 261 %ins.1 = insertelement <16 x i8> %ins.0, i8 %v, i32 1 262 %ins.2 = insertelement <16 x i8> %ins.1, i8 %v, i32 2 263 %ins.3 = insertelement <16 x i8> %ins.2, i8 %v, i32 3 264 %ins.4 = insertelement <16 x i8> %ins.3, i8 %v, i32 4 265 %ins.5 = insertelement <16 x i8> %ins.4, i8 %v, i32 5 266 %ins.6 = insertelement <16 x i8> %ins.5, i8 %v, i32 6 267 %ins.7 = insertelement <16 x i8> %ins.6, i8 %v, i32 7 268 %ins.8 = insertelement <16 x i8> %ins.7, i8 -1, i32 8 269 %ins.9 = insertelement <16 x i8> %ins.8, i8 -1, i32 9 270 %ins.10 = insertelement <16 x i8> %ins.9, i8 -1, i32 10 271 %ins.11 = insertelement <16 x i8> %ins.10, i8 -1, i32 11 272 %ins.12 = insertelement <16 x i8> %ins.11, i8 -1, i32 12 273 %ins.13 = insertelement <16 x i8> %ins.12, i8 -1, i32 13 274 %ins.14 = insertelement <16 x i8> %ins.13, i8 -1, i32 14 275 %ins.15 = insertelement <16 x i8> %ins.14, i8 -1, i32 15 276 %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %ins.15) 277 %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 278 %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> 279 ret <16 x i8> %s 280} 281 282 283define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) sanitize_memory { 284; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask2 285; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], i8 [[V:%.*]]) #[[ATTR1]] { 286; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 287; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 288; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 289; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 290; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 291; CHECK-NEXT: call void @llvm.donothing() 292; CHECK-NEXT: [[INS_0:%.*]] = insertelement <16 x i8> poison, i8 1, i32 0 293; CHECK-NEXT: [[INS_1:%.*]] = insertelement <16 x i8> [[INS_0]], i8 1, i32 1 294; CHECK-NEXT: [[INS_2:%.*]] = insertelement <16 x i8> [[INS_1]], i8 1, i32 2 295; CHECK-NEXT: [[INS_3:%.*]] = insertelement <16 x i8> [[INS_2]], i8 1, i32 3 296; CHECK-NEXT: [[INS_4:%.*]] = insertelement <16 x i8> [[INS_3]], i8 1, i32 4 297; CHECK-NEXT: [[INS_5:%.*]] = insertelement <16 x i8> [[INS_4]], i8 1, i32 5 298; CHECK-NEXT: [[INS_6:%.*]] = insertelement <16 x i8> [[INS_5]], i8 1, i32 6 299; CHECK-NEXT: [[INS_7:%.*]] = insertelement <16 x i8> [[INS_6]], i8 1, i32 7 300; CHECK-NEXT: [[INS_8:%.*]] = insertelement <16 x i8> [[INS_7]], i8 -1, i32 8 301; CHECK-NEXT: [[INS_9:%.*]] = insertelement <16 x i8> [[INS_8]], i8 -1, i32 9 302; CHECK-NEXT: [[INS_10:%.*]] = insertelement <16 x i8> [[INS_9]], i8 -1, i32 10 303; CHECK-NEXT: [[INS_11:%.*]] = insertelement <16 x i8> [[INS_10]], i8 -1, i32 11 304; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 -1, i8 -1, i8 -1, i8 -1>, i8 [[TMP1]], i32 12 305; CHECK-NEXT: [[INS_12:%.*]] = insertelement <16 x i8> [[INS_11]], i8 [[V]], i32 12 306; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <16 x i8> [[_MSPROP]], i8 [[TMP1]], i32 13 307; CHECK-NEXT: [[INS_13:%.*]] = insertelement <16 x i8> [[INS_12]], i8 [[V]], i32 13 308; CHECK-NEXT: [[_MSPROP2:%.*]] = insertelement <16 x i8> [[_MSPROP1]], i8 0, i32 14 309; CHECK-NEXT: [[INS_14:%.*]] = insertelement <16 x i8> [[INS_13]], i8 -1, i32 14 310; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <16 x i8> [[_MSPROP2]], i8 [[TMP1]], i32 15 311; CHECK-NEXT: [[INS_15:%.*]] = insertelement <16 x i8> [[INS_14]], i8 [[V]], i32 15 312; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[INS_15]]) 313; CHECK-NEXT: [[_MSPROP4:%.*]] = or <16 x i8> [[_MSPROP3]], [[TMP6]] 314; CHECK-NEXT: [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[INS_15]]) 315; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP4]], <16 x i8> [[TMP5]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 316; CHECK-NEXT: [[_MSPROP5:%.*]] = or <16 x i8> zeroinitializer, [[TMP7]] 317; CHECK-NEXT: [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 318; CHECK-NEXT: [[_MSPROP6:%.*]] = shufflevector <16 x i8> [[_MSPROP4]], <16 x i8> [[_MSPROP5]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 31> 319; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 31> 320; CHECK-NEXT: store <16 x i8> [[_MSPROP6]], ptr @__msan_retval_tls, align 8 321; CHECK-NEXT: ret <16 x i8> [[S]] 322; 323 %ins.0 = insertelement <16 x i8> poison, i8 1, i32 0 324 %ins.1 = insertelement <16 x i8> %ins.0, i8 1, i32 1 325 %ins.2 = insertelement <16 x i8> %ins.1, i8 1, i32 2 326 %ins.3 = insertelement <16 x i8> %ins.2, i8 1, i32 3 327 %ins.4 = insertelement <16 x i8> %ins.3, i8 1, i32 4 328 %ins.5 = insertelement <16 x i8> %ins.4, i8 1, i32 5 329 %ins.6 = insertelement <16 x i8> %ins.5, i8 1, i32 6 330 %ins.7 = insertelement <16 x i8> %ins.6, i8 1, i32 7 331 %ins.8 = insertelement <16 x i8> %ins.7, i8 -1, i32 8 332 %ins.9 = insertelement <16 x i8> %ins.8, i8 -1, i32 9 333 %ins.10 = insertelement <16 x i8> %ins.9, i8 -1, i32 10 334 %ins.11 = insertelement <16 x i8> %ins.10, i8 -1, i32 11 335 %ins.12 = insertelement <16 x i8> %ins.11, i8 %v, i32 12 336 %ins.13 = insertelement <16 x i8> %ins.12, i8 %v, i32 13 337 %ins.14 = insertelement <16 x i8> %ins.13, i8 -1, i32 14 338 %ins.15 = insertelement <16 x i8> %ins.14, i8 %v, i32 15 339 %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %ins.15) 340 %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 341 %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 31> 342 ret <16 x i8> %s 343} 344 345 346 347define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) sanitize_memory { 348; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask 349; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], i8 [[V:%.*]]) #[[ATTR1]] { 350; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 351; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 352; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 353; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 354; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 355; CHECK-NEXT: call void @llvm.donothing() 356; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i8> splat (i8 -1), i8 [[TMP1]], i32 0 357; CHECK-NEXT: [[INS_0:%.*]] = insertelement <16 x i8> poison, i8 [[V]], i32 0 358; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <16 x i8> [[_MSPROP]], i8 [[TMP1]], i32 1 359; CHECK-NEXT: [[INS_1:%.*]] = insertelement <16 x i8> [[INS_0]], i8 [[V]], i32 1 360; CHECK-NEXT: [[_MSPROP2:%.*]] = insertelement <16 x i8> [[_MSPROP1]], i8 [[TMP1]], i32 2 361; CHECK-NEXT: [[INS_2:%.*]] = insertelement <16 x i8> [[INS_1]], i8 [[V]], i32 2 362; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <16 x i8> [[_MSPROP2]], i8 [[TMP1]], i32 3 363; CHECK-NEXT: [[INS_3:%.*]] = insertelement <16 x i8> [[INS_2]], i8 [[V]], i32 3 364; CHECK-NEXT: [[_MSPROP4:%.*]] = insertelement <16 x i8> [[_MSPROP3]], i8 [[TMP1]], i32 4 365; CHECK-NEXT: [[INS_4:%.*]] = insertelement <16 x i8> [[INS_3]], i8 [[V]], i32 4 366; CHECK-NEXT: [[_MSPROP5:%.*]] = insertelement <16 x i8> [[_MSPROP4]], i8 [[TMP1]], i32 5 367; CHECK-NEXT: [[INS_5:%.*]] = insertelement <16 x i8> [[INS_4]], i8 [[V]], i32 5 368; CHECK-NEXT: [[_MSPROP6:%.*]] = insertelement <16 x i8> [[_MSPROP5]], i8 [[TMP1]], i32 6 369; CHECK-NEXT: [[INS_6:%.*]] = insertelement <16 x i8> [[INS_5]], i8 [[V]], i32 6 370; CHECK-NEXT: [[_MSPROP7:%.*]] = insertelement <16 x i8> [[_MSPROP6]], i8 [[TMP1]], i32 7 371; CHECK-NEXT: [[INS_7:%.*]] = insertelement <16 x i8> [[INS_6]], i8 [[V]], i32 7 372; CHECK-NEXT: [[_MSPROP8:%.*]] = insertelement <16 x i8> [[_MSPROP7]], i8 0, i32 8 373; CHECK-NEXT: [[INS_8:%.*]] = insertelement <16 x i8> [[INS_7]], i8 -1, i32 8 374; CHECK-NEXT: [[_MSPROP9:%.*]] = insertelement <16 x i8> [[_MSPROP8]], i8 0, i32 9 375; CHECK-NEXT: [[INS_9:%.*]] = insertelement <16 x i8> [[INS_8]], i8 -1, i32 9 376; CHECK-NEXT: [[_MSPROP10:%.*]] = insertelement <16 x i8> [[_MSPROP9]], i8 0, i32 10 377; CHECK-NEXT: [[INS_10:%.*]] = insertelement <16 x i8> [[INS_9]], i8 -1, i32 10 378; CHECK-NEXT: [[_MSPROP11:%.*]] = insertelement <16 x i8> [[_MSPROP10]], i8 0, i32 11 379; CHECK-NEXT: [[INS_11:%.*]] = insertelement <16 x i8> [[INS_10]], i8 -1, i32 11 380; CHECK-NEXT: [[_MSPROP12:%.*]] = insertelement <16 x i8> [[_MSPROP11]], i8 0, i32 12 381; CHECK-NEXT: [[INS_12:%.*]] = insertelement <16 x i8> [[INS_11]], i8 -1, i32 12 382; CHECK-NEXT: [[_MSPROP13:%.*]] = insertelement <16 x i8> [[_MSPROP12]], i8 0, i32 13 383; CHECK-NEXT: [[INS_13:%.*]] = insertelement <16 x i8> [[INS_12]], i8 -1, i32 13 384; CHECK-NEXT: [[_MSPROP14:%.*]] = insertelement <16 x i8> [[_MSPROP13]], i8 0, i32 14 385; CHECK-NEXT: [[INS_14:%.*]] = insertelement <16 x i8> [[INS_13]], i8 -1, i32 14 386; CHECK-NEXT: [[_MSPROP15:%.*]] = insertelement <16 x i8> [[_MSPROP14]], i8 0, i32 15 387; CHECK-NEXT: [[INS_15:%.*]] = insertelement <16 x i8> [[INS_14]], i8 -1, i32 15 388; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 389; CHECK-NEXT: [[_MSPROP16:%.*]] = or <16 x i8> zeroinitializer, [[TMP6]] 390; CHECK-NEXT: [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 391; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP4]], <16 x i8> [[TMP5]], <16 x i8> [[INS_15]]) 392; CHECK-NEXT: [[_MSPROP17:%.*]] = or <16 x i8> [[_MSPROP15]], [[TMP7]] 393; CHECK-NEXT: [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[INS_15]]) 394; CHECK-NEXT: [[_MSPROP18:%.*]] = shufflevector <16 x i8> [[_MSPROP16]], <16 x i8> [[_MSPROP17]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> 395; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> 396; CHECK-NEXT: store <16 x i8> [[_MSPROP18]], ptr @__msan_retval_tls, align 8 397; CHECK-NEXT: ret <16 x i8> [[S]] 398; 399 %ins.0 = insertelement <16 x i8> poison, i8 %v, i32 0 400 %ins.1 = insertelement <16 x i8> %ins.0, i8 %v, i32 1 401 %ins.2 = insertelement <16 x i8> %ins.1, i8 %v, i32 2 402 %ins.3 = insertelement <16 x i8> %ins.2, i8 %v, i32 3 403 %ins.4 = insertelement <16 x i8> %ins.3, i8 %v, i32 4 404 %ins.5 = insertelement <16 x i8> %ins.4, i8 %v, i32 5 405 %ins.6 = insertelement <16 x i8> %ins.5, i8 %v, i32 6 406 %ins.7 = insertelement <16 x i8> %ins.6, i8 %v, i32 7 407 %ins.8 = insertelement <16 x i8> %ins.7, i8 -1, i32 8 408 %ins.9 = insertelement <16 x i8> %ins.8, i8 -1, i32 9 409 %ins.10 = insertelement <16 x i8> %ins.9, i8 -1, i32 10 410 %ins.11 = insertelement <16 x i8> %ins.10, i8 -1, i32 11 411 %ins.12 = insertelement <16 x i8> %ins.11, i8 -1, i32 12 412 %ins.13 = insertelement <16 x i8> %ins.12, i8 -1, i32 13 413 %ins.14 = insertelement <16 x i8> %ins.13, i8 -1, i32 14 414 %ins.15 = insertelement <16 x i8> %ins.14, i8 -1, i32 15 415 %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 416 %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %ins.15) 417 %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> 418 ret <16 x i8> %s 419} 420 421 422 423define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) sanitize_memory { 424; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask2 425; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], i8 [[V:%.*]]) #[[ATTR1]] { 426; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 427; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 428; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 429; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 430; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 431; CHECK-NEXT: call void @llvm.donothing() 432; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i8> splat (i8 -1), i8 [[TMP1]], i32 0 433; CHECK-NEXT: [[INS_0:%.*]] = insertelement <16 x i8> poison, i8 [[V]], i32 0 434; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <16 x i8> [[_MSPROP]], i8 [[TMP1]], i32 1 435; CHECK-NEXT: [[INS_1:%.*]] = insertelement <16 x i8> [[INS_0]], i8 [[V]], i32 1 436; CHECK-NEXT: [[_MSPROP2:%.*]] = insertelement <16 x i8> [[_MSPROP1]], i8 [[TMP1]], i32 2 437; CHECK-NEXT: [[INS_2:%.*]] = insertelement <16 x i8> [[INS_1]], i8 [[V]], i32 2 438; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <16 x i8> [[_MSPROP2]], i8 [[TMP1]], i32 3 439; CHECK-NEXT: [[INS_3:%.*]] = insertelement <16 x i8> [[INS_2]], i8 [[V]], i32 3 440; CHECK-NEXT: [[_MSPROP4:%.*]] = insertelement <16 x i8> [[_MSPROP3]], i8 [[TMP1]], i32 4 441; CHECK-NEXT: [[INS_4:%.*]] = insertelement <16 x i8> [[INS_3]], i8 [[V]], i32 4 442; CHECK-NEXT: [[_MSPROP5:%.*]] = insertelement <16 x i8> [[_MSPROP4]], i8 [[TMP1]], i32 5 443; CHECK-NEXT: [[INS_5:%.*]] = insertelement <16 x i8> [[INS_4]], i8 [[V]], i32 5 444; CHECK-NEXT: [[_MSPROP6:%.*]] = insertelement <16 x i8> [[_MSPROP5]], i8 [[TMP1]], i32 6 445; CHECK-NEXT: [[INS_6:%.*]] = insertelement <16 x i8> [[INS_5]], i8 [[V]], i32 6 446; CHECK-NEXT: [[_MSPROP7:%.*]] = insertelement <16 x i8> [[_MSPROP6]], i8 [[TMP1]], i32 7 447; CHECK-NEXT: [[INS_7:%.*]] = insertelement <16 x i8> [[INS_6]], i8 [[V]], i32 7 448; CHECK-NEXT: [[_MSPROP8:%.*]] = insertelement <16 x i8> [[_MSPROP7]], i8 0, i32 8 449; CHECK-NEXT: [[INS_8:%.*]] = insertelement <16 x i8> [[INS_7]], i8 -1, i32 8 450; CHECK-NEXT: [[_MSPROP9:%.*]] = insertelement <16 x i8> [[_MSPROP8]], i8 0, i32 9 451; CHECK-NEXT: [[INS_9:%.*]] = insertelement <16 x i8> [[INS_8]], i8 -1, i32 9 452; CHECK-NEXT: [[_MSPROP10:%.*]] = insertelement <16 x i8> [[_MSPROP9]], i8 0, i32 10 453; CHECK-NEXT: [[INS_10:%.*]] = insertelement <16 x i8> [[INS_9]], i8 -1, i32 10 454; CHECK-NEXT: [[_MSPROP11:%.*]] = insertelement <16 x i8> [[_MSPROP10]], i8 0, i32 11 455; CHECK-NEXT: [[INS_11:%.*]] = insertelement <16 x i8> [[INS_10]], i8 -1, i32 11 456; CHECK-NEXT: [[_MSPROP12:%.*]] = insertelement <16 x i8> [[_MSPROP11]], i8 0, i32 12 457; CHECK-NEXT: [[INS_12:%.*]] = insertelement <16 x i8> [[INS_11]], i8 -1, i32 12 458; CHECK-NEXT: [[_MSPROP13:%.*]] = insertelement <16 x i8> [[_MSPROP12]], i8 0, i32 13 459; CHECK-NEXT: [[INS_13:%.*]] = insertelement <16 x i8> [[INS_12]], i8 -1, i32 13 460; CHECK-NEXT: [[_MSPROP14:%.*]] = insertelement <16 x i8> [[_MSPROP13]], i8 [[TMP1]], i32 14 461; CHECK-NEXT: [[INS_14:%.*]] = insertelement <16 x i8> [[INS_13]], i8 [[V]], i32 14 462; CHECK-NEXT: [[_MSPROP15:%.*]] = insertelement <16 x i8> [[_MSPROP14]], i8 [[TMP1]], i32 15 463; CHECK-NEXT: [[INS_15:%.*]] = insertelement <16 x i8> [[INS_14]], i8 [[V]], i32 15 464; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 465; CHECK-NEXT: [[_MSPROP16:%.*]] = or <16 x i8> zeroinitializer, [[TMP6]] 466; CHECK-NEXT: [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 467; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP4]], <16 x i8> [[TMP5]], <16 x i8> [[INS_15]]) 468; CHECK-NEXT: [[_MSPROP17:%.*]] = or <16 x i8> [[_MSPROP15]], [[TMP7]] 469; CHECK-NEXT: [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[INS_15]]) 470; CHECK-NEXT: [[_MSPROP18:%.*]] = shufflevector <16 x i8> [[_MSPROP16]], <16 x i8> [[_MSPROP17]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 30, i32 31> 471; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 30, i32 31> 472; CHECK-NEXT: store <16 x i8> [[_MSPROP18]], ptr @__msan_retval_tls, align 8 473; CHECK-NEXT: ret <16 x i8> [[S]] 474; 475 %ins.0 = insertelement <16 x i8> poison, i8 %v, i32 0 476 %ins.1 = insertelement <16 x i8> %ins.0, i8 %v, i32 1 477 %ins.2 = insertelement <16 x i8> %ins.1, i8 %v, i32 2 478 %ins.3 = insertelement <16 x i8> %ins.2, i8 %v, i32 3 479 %ins.4 = insertelement <16 x i8> %ins.3, i8 %v, i32 4 480 %ins.5 = insertelement <16 x i8> %ins.4, i8 %v, i32 5 481 %ins.6 = insertelement <16 x i8> %ins.5, i8 %v, i32 6 482 %ins.7 = insertelement <16 x i8> %ins.6, i8 %v, i32 7 483 %ins.8 = insertelement <16 x i8> %ins.7, i8 -1, i32 8 484 %ins.9 = insertelement <16 x i8> %ins.8, i8 -1, i32 9 485 %ins.10 = insertelement <16 x i8> %ins.9, i8 -1, i32 10 486 %ins.11 = insertelement <16 x i8> %ins.10, i8 -1, i32 11 487 %ins.12 = insertelement <16 x i8> %ins.11, i8 -1, i32 12 488 %ins.13 = insertelement <16 x i8> %ins.12, i8 -1, i32 13 489 %ins.14 = insertelement <16 x i8> %ins.13, i8 %v, i32 14 490 %ins.15 = insertelement <16 x i8> %ins.14, i8 %v, i32 15 491 %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 492 %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %ins.15) 493 %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 30, i32 31> 494 ret <16 x i8> %s 495} 496 497 498 499define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_shuffle(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) sanitize_memory { 500; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_shuffle 501; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) #[[ATTR1]] { 502; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 503; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 504; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 505; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 506; CHECK-NEXT: call void @llvm.donothing() 507; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 508; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> zeroinitializer, [[TMP5]] 509; CHECK-NEXT: [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 510; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 511; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i8> zeroinitializer, [[TMP6]] 512; CHECK-NEXT: [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 513; CHECK-NEXT: [[_MSPROP2:%.*]] = shufflevector <16 x i8> [[_MSPROP]], <16 x i8> [[_MSPROP1]], <16 x i32> <i32 0, i32 1, i32 21, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> 514; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> <i32 0, i32 1, i32 21, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> 515; CHECK-NEXT: store <16 x i8> [[_MSPROP2]], ptr @__msan_retval_tls, align 8 516; CHECK-NEXT: ret <16 x i8> [[S]] 517; 518 %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 519 %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 520 %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 21, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> 521 ret <16 x i8> %s 522} 523 524 525 526define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask1(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) sanitize_memory { 527; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask1 528; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) #[[ATTR1]] { 529; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 530; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 531; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 532; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 533; CHECK-NEXT: call void @llvm.donothing() 534; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 535; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> zeroinitializer, [[TMP5]] 536; CHECK-NEXT: [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 537; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 538; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i8> zeroinitializer, [[TMP6]] 539; CHECK-NEXT: [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 540; CHECK-NEXT: [[_MSPROP2:%.*]] = shufflevector <16 x i8> [[_MSPROP]], <16 x i8> [[_MSPROP1]], <16 x i32> <i32 0, i32 1, i32 21, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> 541; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> <i32 0, i32 1, i32 21, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> 542; CHECK-NEXT: store <16 x i8> [[_MSPROP2]], ptr @__msan_retval_tls, align 8 543; CHECK-NEXT: ret <16 x i8> [[S]] 544; 545 %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 546 %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 547 %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 21, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> 548 ret <16 x i8> %s 549} 550 551 552 553define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) sanitize_memory { 554; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask2 555; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) #[[ATTR1]] { 556; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 557; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 558; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 559; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 560; CHECK-NEXT: call void @llvm.donothing() 561; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 562; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> zeroinitializer, [[TMP5]] 563; CHECK-NEXT: [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 564; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 565; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i8> zeroinitializer, [[TMP6]] 566; CHECK-NEXT: [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 567; CHECK-NEXT: [[_MSPROP2:%.*]] = shufflevector <16 x i8> [[_MSPROP]], <16 x i8> [[_MSPROP1]], <16 x i32> <i32 0, i32 1, i32 21, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> 568; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> <i32 0, i32 1, i32 21, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> 569; CHECK-NEXT: store <16 x i8> [[_MSPROP2]], ptr @__msan_retval_tls, align 8 570; CHECK-NEXT: ret <16 x i8> [[S]] 571; 572 %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 573 %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) 574 %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 21, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> 575 ret <16 x i8> %s 576} 577 578declare <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8>, <8 x i8>) nounwind sanitize_memory readnone 579declare <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8>, <16 x i8>) nounwind sanitize_memory readnone 580declare <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8>, <16 x i8>, <8 x i8>) nounwind sanitize_memory readnone 581declare <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind sanitize_memory readnone 582declare <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind sanitize_memory readnone 583declare <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind sanitize_memory readnone 584declare <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind sanitize_memory readnone 585declare <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind sanitize_memory readnone 586 587define <8 x i8> @tbx1_8b(<8 x i8> %A, <16 x i8> %B, <8 x i8> %C) nounwind sanitize_memory { 588; CHECK-LABEL: define <8 x i8> @tbx1_8b 589; CHECK-SAME: (<8 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <8 x i8> [[C:%.*]]) #[[ATTR0]] { 590; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8 591; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 592; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 593; CHECK-NEXT: call void @llvm.donothing() 594; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> [[C]]) 595; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[TMP3]], [[TMP4]] 596; CHECK-NEXT: [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> [[A]], <16 x i8> [[B]], <8 x i8> [[C]]) 597; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 598; CHECK-NEXT: ret <8 x i8> [[OUT]] 599; 600 %out = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %A, <16 x i8> %B, <8 x i8> %C) 601 ret <8 x i8> %out 602} 603 604define <16 x i8> @tbx1_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) nounwind sanitize_memory { 605; CHECK-LABEL: define <16 x i8> @tbx1_16b 606; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]]) #[[ATTR0]] { 607; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 608; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 609; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 610; CHECK-NEXT: call void @llvm.donothing() 611; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[C]]) 612; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP3]], [[TMP4]] 613; CHECK-NEXT: [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]]) 614; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 615; CHECK-NEXT: ret <16 x i8> [[OUT]] 616; 617 %out = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) 618 ret <16 x i8> %out 619} 620 621define <8 x i8> @tbx2_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) sanitize_memory { 622; CHECK-LABEL: define <8 x i8> @tbx2_8b 623; CHECK-SAME: (<8 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <8 x i8> [[D:%.*]]) #[[ATTR1]] { 624; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8 625; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 626; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 627; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8 628; CHECK-NEXT: call void @llvm.donothing() 629; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> [[D]]) 630; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[TMP4]], [[TMP5]] 631; CHECK-NEXT: [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <8 x i8> [[D]]) 632; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 633; CHECK-NEXT: ret <8 x i8> [[OUT]] 634; 635 %out = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) 636 ret <8 x i8> %out 637} 638 639define <16 x i8> @tbx2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) sanitize_memory { 640; CHECK-LABEL: define <16 x i8> @tbx2_16b 641; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) #[[ATTR1]] { 642; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 643; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 644; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 645; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 646; CHECK-NEXT: call void @llvm.donothing() 647; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[D]]) 648; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP4]], [[TMP5]] 649; CHECK-NEXT: [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]]) 650; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 651; CHECK-NEXT: ret <16 x i8> [[OUT]] 652; 653 %out = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) 654 ret <16 x i8> %out 655} 656 657define <8 x i8> @tbx3_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) sanitize_memory { 658; CHECK-LABEL: define <8 x i8> @tbx3_8b 659; CHECK-SAME: (<8 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <8 x i8> [[E:%.*]]) #[[ATTR1]] { 660; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8 661; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 662; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 663; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8 664; CHECK-NEXT: [[TMP5:%.*]] = load <8 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 56) to ptr), align 8 665; CHECK-NEXT: call void @llvm.donothing() 666; CHECK-NEXT: [[TMP6:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> [[E]]) 667; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[TMP5]], [[TMP6]] 668; CHECK-NEXT: [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]], <8 x i8> [[E]]) 669; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 670; CHECK-NEXT: ret <8 x i8> [[OUT]] 671; 672 %out = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(< 8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) 673 ret <8 x i8> %out 674} 675 676define <16 x i8> @tbx3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) sanitize_memory { 677; CHECK-LABEL: define <16 x i8> @tbx3_16b 678; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <16 x i8> [[E:%.*]]) #[[ATTR1]] { 679; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 680; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 681; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 682; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 683; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 684; CHECK-NEXT: call void @llvm.donothing() 685; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[E]]) 686; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP5]], [[TMP6]] 687; CHECK-NEXT: [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> [[E]]) 688; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 689; CHECK-NEXT: ret <16 x i8> [[OUT]] 690; 691 %out = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) 692 ret <16 x i8> %out 693} 694 695define <8 x i8> @tbx4_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <8 x i8> %F) sanitize_memory { 696; CHECK-LABEL: define <8 x i8> @tbx4_8b 697; CHECK-SAME: (<8 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <16 x i8> [[E:%.*]], <8 x i8> [[F:%.*]]) #[[ATTR1]] { 698; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8 699; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 700; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8 701; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8 702; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 56) to ptr), align 8 703; CHECK-NEXT: [[TMP6:%.*]] = load <8 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8 704; CHECK-NEXT: call void @llvm.donothing() 705; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], <8 x i8> [[F]]) 706; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[TMP6]], [[TMP7]] 707; CHECK-NEXT: [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> [[E]], <8 x i8> [[F]]) 708; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 709; CHECK-NEXT: ret <8 x i8> [[OUT]] 710; 711 %out = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <8 x i8> %F) 712 ret <8 x i8> %out 713} 714 715define <16 x i8> @tbx4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <16 x i8> %F) sanitize_memory { 716; CHECK-LABEL: define <16 x i8> @tbx4_16b 717; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <16 x i8> [[E:%.*]], <16 x i8> [[F:%.*]]) #[[ATTR1]] { 718; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 719; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 720; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 721; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8 722; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 723; CHECK-NEXT: [[TMP6:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 724; CHECK-NEXT: call void @llvm.donothing() 725; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], <16 x i8> [[F]]) 726; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP6]], [[TMP7]] 727; CHECK-NEXT: [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> [[E]], <16 x i8> [[F]]) 728; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 729; CHECK-NEXT: ret <16 x i8> [[OUT]] 730; 731 %out = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <16 x i8> %F) 732 ret <16 x i8> %out 733} 734 735declare <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8>, <16 x i8>, <8 x i8>) nounwind sanitize_memory readnone 736declare <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind sanitize_memory readnone 737declare <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind sanitize_memory readnone 738declare <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind sanitize_memory readnone 739declare <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind sanitize_memory readnone 740declare <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind sanitize_memory readnone 741declare <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind sanitize_memory readnone 742declare <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind sanitize_memory readnone 743