1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 2; RUN: opt < %s -passes=msan -S | FileCheck %s 3; 4; Test memory sanitizer instrumentation for Arm vector multiplication 5; instructions. 6; 7; Forked from llvm/test/CodeGen/AArch64/arm64-vmul.ll 8 9target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" 10target triple = "aarch64--linux-android9001" 11 12define <8 x i16> @smull8h(ptr %A, ptr %B) nounwind sanitize_memory { 13; CHECK-LABEL: define <8 x i16> @smull8h( 14; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] { 15; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__msan_param_tls, align 8 16; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 17; CHECK-NEXT: call void @llvm.donothing() 18; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP4]], 0 19; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1:![0-9]+]] 20; CHECK: [[BB3]]: 21; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8:[0-9]+]] 22; CHECK-NEXT: unreachable 23; CHECK: [[BB4]]: 24; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8 25; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 26; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 27; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr 28; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 29; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP8]], 0 30; CHECK-NEXT: br i1 [[_MSCMP3]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]] 31; CHECK: [[BB8]]: 32; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 33; CHECK-NEXT: unreachable 34; CHECK: [[BB9]]: 35; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[B]], align 8 36; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 37; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 38; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr 39; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8 40; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[_MSLD]], [[_MSLD1]] 41; CHECK-NEXT: [[_MSPROP2:%.*]] = or <8 x i8> [[_MSPROP]], zeroinitializer 42; CHECK-NEXT: [[TMP13:%.*]] = zext <8 x i8> [[_MSPROP2]] to <8 x i16> 43; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) 44; CHECK-NEXT: store <8 x i16> [[TMP13]], ptr @__msan_retval_tls, align 8 45; CHECK-NEXT: ret <8 x i16> [[TMP3]] 46; 47 %temp1 = load <8 x i8>, ptr %A 48 %temp2 = load <8 x i8>, ptr %B 49 %temp3 = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %temp1, <8 x i8> %temp2) 50 ret <8 x i16> %temp3 51} 52 53define <4 x i32> @smull4s(ptr %A, ptr %B) nounwind sanitize_memory { 54; CHECK-LABEL: define <4 x i32> @smull4s( 55; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { 56; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__msan_param_tls, align 8 57; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 58; CHECK-NEXT: call void @llvm.donothing() 59; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP4]], 0 60; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] 61; CHECK: [[BB3]]: 62; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 63; CHECK-NEXT: unreachable 64; CHECK: [[BB4]]: 65; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8 66; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 67; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 68; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr 69; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 70; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP8]], 0 71; CHECK-NEXT: br i1 [[_MSCMP3]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]] 72; CHECK: [[BB8]]: 73; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 74; CHECK-NEXT: unreachable 75; CHECK: [[BB9]]: 76; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr [[B]], align 8 77; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 78; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 79; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr 80; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8 81; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[_MSLD]], [[_MSLD1]] 82; CHECK-NEXT: [[_MSPROP2:%.*]] = or <4 x i16> [[_MSPROP]], zeroinitializer 83; CHECK-NEXT: [[TMP13:%.*]] = zext <4 x i16> [[_MSPROP2]] to <4 x i32> 84; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[TMP1]], <4 x i16> [[TMP2]]) 85; CHECK-NEXT: store <4 x i32> [[TMP13]], ptr @__msan_retval_tls, align 8 86; CHECK-NEXT: ret <4 x i32> [[TMP3]] 87; 88 %temp1 = load <4 x i16>, ptr %A 89 %temp2 = load <4 x i16>, ptr %B 90 %temp3 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %temp1, <4 x i16> %temp2) 91 ret <4 x i32> %temp3 92} 93 94define <2 x i64> @smull2d(ptr %A, ptr %B) nounwind sanitize_memory { 95; CHECK-LABEL: define <2 x i64> @smull2d( 96; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { 97; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__msan_param_tls, align 8 98; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 99; CHECK-NEXT: call void @llvm.donothing() 100; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP4]], 0 101; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] 102; CHECK: [[BB3]]: 103; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 104; CHECK-NEXT: unreachable 105; CHECK: [[BB4]]: 106; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8 107; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 108; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 109; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr 110; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 111; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP8]], 0 112; CHECK-NEXT: br i1 [[_MSCMP3]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]] 113; CHECK: [[BB8]]: 114; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 115; CHECK-NEXT: unreachable 116; CHECK: [[BB9]]: 117; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[B]], align 8 118; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 119; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 120; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr 121; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8 122; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[_MSLD]], [[_MSLD1]] 123; CHECK-NEXT: [[_MSPROP2:%.*]] = or <2 x i32> [[_MSPROP]], zeroinitializer 124; CHECK-NEXT: [[TMP13:%.*]] = zext <2 x i32> [[_MSPROP2]] to <2 x i64> 125; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]]) 126; CHECK-NEXT: store <2 x i64> [[TMP13]], ptr @__msan_retval_tls, align 8 127; CHECK-NEXT: ret <2 x i64> [[TMP3]] 128; 129 %temp1 = load <2 x i32>, ptr %A 130 %temp2 = load <2 x i32>, ptr %B 131 %temp3 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %temp1, <2 x i32> %temp2) 132 ret <2 x i64> %temp3 133} 134 135declare <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8>, <8 x i8>) nounwind readnone 136declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone 137declare <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32>, <2 x i32>) nounwind readnone 138 139define <8 x i16> @umull8h(ptr %A, ptr %B) nounwind sanitize_memory { 140; CHECK-LABEL: define <8 x i16> @umull8h( 141; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { 142; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__msan_param_tls, align 8 143; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 144; CHECK-NEXT: call void @llvm.donothing() 145; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP4]], 0 146; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] 147; CHECK: [[BB3]]: 148; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 149; CHECK-NEXT: unreachable 150; CHECK: [[BB4]]: 151; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8 152; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 153; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 154; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr 155; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 156; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP8]], 0 157; CHECK-NEXT: br i1 [[_MSCMP3]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]] 158; CHECK: [[BB8]]: 159; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 160; CHECK-NEXT: unreachable 161; CHECK: [[BB9]]: 162; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[B]], align 8 163; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 164; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 165; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr 166; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8 167; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[_MSLD]], [[_MSLD1]] 168; CHECK-NEXT: [[_MSPROP2:%.*]] = or <8 x i8> [[_MSPROP]], zeroinitializer 169; CHECK-NEXT: [[TMP13:%.*]] = zext <8 x i8> [[_MSPROP2]] to <8 x i16> 170; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) 171; CHECK-NEXT: store <8 x i16> [[TMP13]], ptr @__msan_retval_tls, align 8 172; CHECK-NEXT: ret <8 x i16> [[TMP3]] 173; 174 %temp1 = load <8 x i8>, ptr %A 175 %temp2 = load <8 x i8>, ptr %B 176 %temp3 = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %temp1, <8 x i8> %temp2) 177 ret <8 x i16> %temp3 178} 179 180define <4 x i32> @umull4s(ptr %A, ptr %B) nounwind sanitize_memory { 181; CHECK-LABEL: define <4 x i32> @umull4s( 182; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { 183; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__msan_param_tls, align 8 184; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 185; CHECK-NEXT: call void @llvm.donothing() 186; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP4]], 0 187; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] 188; CHECK: [[BB3]]: 189; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 190; CHECK-NEXT: unreachable 191; CHECK: [[BB4]]: 192; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8 193; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 194; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 195; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr 196; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 197; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP8]], 0 198; CHECK-NEXT: br i1 [[_MSCMP3]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]] 199; CHECK: [[BB8]]: 200; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 201; CHECK-NEXT: unreachable 202; CHECK: [[BB9]]: 203; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr [[B]], align 8 204; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 205; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 206; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr 207; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8 208; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[_MSLD]], [[_MSLD1]] 209; CHECK-NEXT: [[_MSPROP2:%.*]] = or <4 x i16> [[_MSPROP]], zeroinitializer 210; CHECK-NEXT: [[TMP13:%.*]] = zext <4 x i16> [[_MSPROP2]] to <4 x i32> 211; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[TMP1]], <4 x i16> [[TMP2]]) 212; CHECK-NEXT: store <4 x i32> [[TMP13]], ptr @__msan_retval_tls, align 8 213; CHECK-NEXT: ret <4 x i32> [[TMP3]] 214; 215 %temp1 = load <4 x i16>, ptr %A 216 %temp2 = load <4 x i16>, ptr %B 217 %temp3 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %temp1, <4 x i16> %temp2) 218 ret <4 x i32> %temp3 219} 220 221define <2 x i64> @umull2d(ptr %A, ptr %B) nounwind sanitize_memory { 222; CHECK-LABEL: define <2 x i64> @umull2d( 223; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { 224; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__msan_param_tls, align 8 225; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 226; CHECK-NEXT: call void @llvm.donothing() 227; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP4]], 0 228; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] 229; CHECK: [[BB3]]: 230; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 231; CHECK-NEXT: unreachable 232; CHECK: [[BB4]]: 233; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8 234; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 235; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 236; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr 237; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 238; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP8]], 0 239; CHECK-NEXT: br i1 [[_MSCMP3]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]] 240; CHECK: [[BB8]]: 241; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 242; CHECK-NEXT: unreachable 243; CHECK: [[BB9]]: 244; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[B]], align 8 245; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 246; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 247; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr 248; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8 249; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[_MSLD]], [[_MSLD1]] 250; CHECK-NEXT: [[_MSPROP2:%.*]] = or <2 x i32> [[_MSPROP]], zeroinitializer 251; CHECK-NEXT: [[TMP13:%.*]] = zext <2 x i32> [[_MSPROP2]] to <2 x i64> 252; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]]) 253; CHECK-NEXT: store <2 x i64> [[TMP13]], ptr @__msan_retval_tls, align 8 254; CHECK-NEXT: ret <2 x i64> [[TMP3]] 255; 256 %temp1 = load <2 x i32>, ptr %A 257 %temp2 = load <2 x i32>, ptr %B 258 %temp3 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %temp1, <2 x i32> %temp2) 259 ret <2 x i64> %temp3 260} 261 262declare <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8>, <8 x i8>) nounwind readnone 263declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone 264declare <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32>, <2 x i32>) nounwind readnone 265 266define <4 x i32> @sqdmull4s(ptr %A, ptr %B) nounwind sanitize_memory { 267; CHECK-LABEL: define <4 x i32> @sqdmull4s( 268; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { 269; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__msan_param_tls, align 8 270; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 271; CHECK-NEXT: call void @llvm.donothing() 272; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP4]], 0 273; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] 274; CHECK: [[BB3]]: 275; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 276; CHECK-NEXT: unreachable 277; CHECK: [[BB4]]: 278; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8 279; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 280; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 281; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr 282; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 283; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP8]], 0 284; CHECK-NEXT: br i1 [[_MSCMP2]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]] 285; CHECK: [[BB8]]: 286; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 287; CHECK-NEXT: unreachable 288; CHECK: [[BB9]]: 289; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr [[B]], align 8 290; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 291; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 292; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr 293; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8 294; CHECK-NEXT: [[TMP13:%.*]] = bitcast <4 x i16> [[_MSLD]] to i64 295; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP13]], 0 296; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i16> [[_MSLD1]] to i64 297; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[TMP14]], 0 298; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]] 299; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB15:.*]], label %[[BB16:.*]], !prof [[PROF1]] 300; CHECK: [[BB15]]: 301; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 302; CHECK-NEXT: unreachable 303; CHECK: [[BB16]]: 304; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP1]], <4 x i16> [[TMP2]]) 305; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 306; CHECK-NEXT: ret <4 x i32> [[TMP3]] 307; 308 %temp1 = load <4 x i16>, ptr %A 309 %temp2 = load <4 x i16>, ptr %B 310 %temp3 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %temp1, <4 x i16> %temp2) 311 ret <4 x i32> %temp3 312} 313 314define <2 x i64> @sqdmull2d(ptr %A, ptr %B) nounwind sanitize_memory { 315; CHECK-LABEL: define <2 x i64> @sqdmull2d( 316; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { 317; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__msan_param_tls, align 8 318; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 319; CHECK-NEXT: call void @llvm.donothing() 320; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP4]], 0 321; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] 322; CHECK: [[BB3]]: 323; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 324; CHECK-NEXT: unreachable 325; CHECK: [[BB4]]: 326; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8 327; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 328; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 329; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr 330; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 331; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP8]], 0 332; CHECK-NEXT: br i1 [[_MSCMP2]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]] 333; CHECK: [[BB8]]: 334; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 335; CHECK-NEXT: unreachable 336; CHECK: [[BB9]]: 337; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[B]], align 8 338; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 339; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 340; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr 341; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8 342; CHECK-NEXT: [[TMP13:%.*]] = bitcast <2 x i32> [[_MSLD]] to i64 343; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP13]], 0 344; CHECK-NEXT: [[TMP14:%.*]] = bitcast <2 x i32> [[_MSLD1]] to i64 345; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[TMP14]], 0 346; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]] 347; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB15:.*]], label %[[BB16:.*]], !prof [[PROF1]] 348; CHECK: [[BB15]]: 349; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 350; CHECK-NEXT: unreachable 351; CHECK: [[BB16]]: 352; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]]) 353; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 354; CHECK-NEXT: ret <2 x i64> [[TMP3]] 355; 356 %temp1 = load <2 x i32>, ptr %A 357 %temp2 = load <2 x i32>, ptr %B 358 %temp3 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %temp1, <2 x i32> %temp2) 359 ret <2 x i64> %temp3 360} 361 362define <4 x i32> @sqdmull2_4s(ptr %A, ptr %B) nounwind sanitize_memory { 363; CHECK-LABEL: define <4 x i32> @sqdmull2_4s( 364; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { 365; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__msan_param_tls, align 8 366; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 367; CHECK-NEXT: call void @llvm.donothing() 368; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP4]], 0 369; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] 370; CHECK: [[BB3]]: 371; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 372; CHECK-NEXT: unreachable 373; CHECK: [[BB4]]: 374; CHECK-NEXT: [[LOAD1:%.*]] = load <8 x i16>, ptr [[A]], align 16 375; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 376; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 377; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr 378; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16 379; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP8]], 0 380; CHECK-NEXT: br i1 [[_MSCMP3]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]] 381; CHECK: [[BB8]]: 382; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 383; CHECK-NEXT: unreachable 384; CHECK: [[BB9]]: 385; CHECK-NEXT: [[LOAD2:%.*]] = load <8 x i16>, ptr [[B]], align 16 386; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 387; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 388; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr 389; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16 390; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i16> [[_MSLD]], <8 x i16> splat (i16 -1), <4 x i32> <i32 4, i32 5, i32 6, i32 7> 391; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[LOAD1]], <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 392; CHECK-NEXT: [[_MSPROP2:%.*]] = shufflevector <8 x i16> [[_MSLD1]], <8 x i16> splat (i16 -1), <4 x i32> <i32 4, i32 5, i32 6, i32 7> 393; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[LOAD2]], <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 394; CHECK-NEXT: [[TMP13:%.*]] = bitcast <4 x i16> [[_MSPROP]] to i64 395; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[TMP13]], 0 396; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i16> [[_MSPROP2]] to i64 397; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i64 [[TMP14]], 0 398; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]] 399; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB15:.*]], label %[[BB16:.*]], !prof [[PROF1]] 400; CHECK: [[BB15]]: 401; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 402; CHECK-NEXT: unreachable 403; CHECK: [[BB16]]: 404; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP1]], <4 x i16> [[TMP2]]) 405; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 406; CHECK-NEXT: ret <4 x i32> [[TMP3]] 407; 408 %load1 = load <8 x i16>, ptr %A 409 %load2 = load <8 x i16>, ptr %B 410 %temp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 411 %temp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 412 %temp3 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %temp1, <4 x i16> %temp2) 413 ret <4 x i32> %temp3 414} 415 416define <2 x i64> @sqdmull2_2d(ptr %A, ptr %B) nounwind sanitize_memory { 417; CHECK-LABEL: define <2 x i64> @sqdmull2_2d( 418; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { 419; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__msan_param_tls, align 8 420; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 421; CHECK-NEXT: call void @llvm.donothing() 422; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP4]], 0 423; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] 424; CHECK: [[BB3]]: 425; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 426; CHECK-NEXT: unreachable 427; CHECK: [[BB4]]: 428; CHECK-NEXT: [[LOAD1:%.*]] = load <4 x i32>, ptr [[A]], align 16 429; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 430; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 431; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr 432; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16 433; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP8]], 0 434; CHECK-NEXT: br i1 [[_MSCMP3]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]] 435; CHECK: [[BB8]]: 436; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 437; CHECK-NEXT: unreachable 438; CHECK: [[BB9]]: 439; CHECK-NEXT: [[LOAD2:%.*]] = load <4 x i32>, ptr [[B]], align 16 440; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 441; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 442; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr 443; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16 444; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> splat (i32 -1), <2 x i32> <i32 2, i32 3> 445; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[LOAD1]], <4 x i32> undef, <2 x i32> <i32 2, i32 3> 446; CHECK-NEXT: [[_MSPROP2:%.*]] = shufflevector <4 x i32> [[_MSLD1]], <4 x i32> splat (i32 -1), <2 x i32> <i32 2, i32 3> 447; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[LOAD2]], <4 x i32> undef, <2 x i32> <i32 2, i32 3> 448; CHECK-NEXT: [[TMP13:%.*]] = bitcast <2 x i32> [[_MSPROP]] to i64 449; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[TMP13]], 0 450; CHECK-NEXT: [[TMP14:%.*]] = bitcast <2 x i32> [[_MSPROP2]] to i64 451; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i64 [[TMP14]], 0 452; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]] 453; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB15:.*]], label %[[BB16:.*]], !prof [[PROF1]] 454; CHECK: [[BB15]]: 455; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 456; CHECK-NEXT: unreachable 457; CHECK: [[BB16]]: 458; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]]) 459; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 460; CHECK-NEXT: ret <2 x i64> [[TMP3]] 461; 462 %load1 = load <4 x i32>, ptr %A 463 %load2 = load <4 x i32>, ptr %B 464 %temp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 465 %temp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 466 %temp3 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %temp1, <2 x i32> %temp2) 467 ret <2 x i64> %temp3 468} 469 470 471declare <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone 472declare <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32>, <2 x i32>) nounwind readnone 473 474define <8 x i16> @pmull8h(ptr %A, ptr %B) nounwind sanitize_memory { 475; CHECK-LABEL: define <8 x i16> @pmull8h( 476; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { 477; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__msan_param_tls, align 8 478; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 479; CHECK-NEXT: call void @llvm.donothing() 480; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP4]], 0 481; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] 482; CHECK: [[BB3]]: 483; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 484; CHECK-NEXT: unreachable 485; CHECK: [[BB4]]: 486; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8 487; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 488; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 489; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr 490; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8 491; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP8]], 0 492; CHECK-NEXT: br i1 [[_MSCMP3]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]] 493; CHECK: [[BB8]]: 494; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 495; CHECK-NEXT: unreachable 496; CHECK: [[BB9]]: 497; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[B]], align 8 498; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 499; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 500; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr 501; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8 502; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[_MSLD]], [[_MSLD1]] 503; CHECK-NEXT: [[_MSPROP2:%.*]] = or <8 x i8> [[_MSPROP]], zeroinitializer 504; CHECK-NEXT: [[TMP13:%.*]] = zext <8 x i8> [[_MSPROP2]] to <8 x i16> 505; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) 506; CHECK-NEXT: store <8 x i16> [[TMP13]], ptr @__msan_retval_tls, align 8 507; CHECK-NEXT: ret <8 x i16> [[TMP3]] 508; 509 %temp1 = load <8 x i8>, ptr %A 510 %temp2 = load <8 x i8>, ptr %B 511 %temp3 = call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %temp1, <8 x i8> %temp2) 512 ret <8 x i16> %temp3 513} 514 515declare <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8>, <8 x i8>) nounwind readnone 516 517define <4 x i16> @sqdmulh_4h(ptr %A, ptr %B) nounwind sanitize_memory { 518; CHECK-LABEL: define <4 x i16> @sqdmulh_4h( 519; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { 520; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__msan_param_tls, align 8 521; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 522; CHECK-NEXT: call void @llvm.donothing() 523; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP4]], 0 524; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] 525; CHECK: [[BB3]]: 526; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 527; CHECK-NEXT: unreachable 528; CHECK: [[BB4]]: 529; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8 530; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 531; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 532; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr 533; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 534; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP8]], 0 535; CHECK-NEXT: br i1 [[_MSCMP2]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]] 536; CHECK: [[BB8]]: 537; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 538; CHECK-NEXT: unreachable 539; CHECK: [[BB9]]: 540; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr [[B]], align 8 541; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 542; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 543; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr 544; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8 545; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[_MSLD]], [[_MSLD1]] 546; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[TMP1]], <4 x i16> [[TMP2]]) 547; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 548; CHECK-NEXT: ret <4 x i16> [[TMP3]] 549; 550 %temp1 = load <4 x i16>, ptr %A 551 %temp2 = load <4 x i16>, ptr %B 552 %temp3 = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %temp1, <4 x i16> %temp2) 553 ret <4 x i16> %temp3 554} 555 556define <8 x i16> @sqdmulh_8h(ptr %A, ptr %B) nounwind sanitize_memory { 557; CHECK-LABEL: define <8 x i16> @sqdmulh_8h( 558; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { 559; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__msan_param_tls, align 8 560; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 561; CHECK-NEXT: call void @llvm.donothing() 562; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP4]], 0 563; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] 564; CHECK: [[BB3]]: 565; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 566; CHECK-NEXT: unreachable 567; CHECK: [[BB4]]: 568; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16 569; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 570; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 571; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr 572; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16 573; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP8]], 0 574; CHECK-NEXT: br i1 [[_MSCMP2]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]] 575; CHECK: [[BB8]]: 576; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 577; CHECK-NEXT: unreachable 578; CHECK: [[BB9]]: 579; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr [[B]], align 16 580; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 581; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 582; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr 583; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16 584; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[_MSLD]], [[_MSLD1]] 585; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) 586; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 587; CHECK-NEXT: ret <8 x i16> [[TMP3]] 588; 589 %temp1 = load <8 x i16>, ptr %A 590 %temp2 = load <8 x i16>, ptr %B 591 %temp3 = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %temp1, <8 x i16> %temp2) 592 ret <8 x i16> %temp3 593} 594 595define <2 x i32> @sqdmulh_2s(ptr %A, ptr %B) nounwind sanitize_memory { 596; CHECK-LABEL: define <2 x i32> @sqdmulh_2s( 597; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { 598; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__msan_param_tls, align 8 599; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 600; CHECK-NEXT: call void @llvm.donothing() 601; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP4]], 0 602; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] 603; CHECK: [[BB3]]: 604; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 605; CHECK-NEXT: unreachable 606; CHECK: [[BB4]]: 607; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8 608; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 609; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 610; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr 611; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 612; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP8]], 0 613; CHECK-NEXT: br i1 [[_MSCMP2]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]] 614; CHECK: [[BB8]]: 615; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 616; CHECK-NEXT: unreachable 617; CHECK: [[BB9]]: 618; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[B]], align 8 619; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 620; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 621; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr 622; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8 623; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[_MSLD]], [[_MSLD1]] 624; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]]) 625; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 626; CHECK-NEXT: ret <2 x i32> [[TMP3]] 627; 628 %temp1 = load <2 x i32>, ptr %A 629 %temp2 = load <2 x i32>, ptr %B 630 %temp3 = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %temp1, <2 x i32> %temp2) 631 ret <2 x i32> %temp3 632} 633 634define <4 x i32> @sqdmulh_4s(ptr %A, ptr %B) nounwind sanitize_memory { 635; CHECK-LABEL: define <4 x i32> @sqdmulh_4s( 636; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { 637; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__msan_param_tls, align 8 638; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 639; CHECK-NEXT: call void @llvm.donothing() 640; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP4]], 0 641; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] 642; CHECK: [[BB3]]: 643; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 644; CHECK-NEXT: unreachable 645; CHECK: [[BB4]]: 646; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16 647; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 648; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 649; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr 650; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16 651; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP8]], 0 652; CHECK-NEXT: br i1 [[_MSCMP2]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]] 653; CHECK: [[BB8]]: 654; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 655; CHECK-NEXT: unreachable 656; CHECK: [[BB9]]: 657; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[B]], align 16 658; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 659; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 660; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr 661; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16 662; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[_MSLD]], [[_MSLD1]] 663; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) 664; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 665; CHECK-NEXT: ret <4 x i32> [[TMP3]] 666; 667 %temp1 = load <4 x i32>, ptr %A 668 %temp2 = load <4 x i32>, ptr %B 669 %temp3 = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %temp1, <4 x i32> %temp2) 670 ret <4 x i32> %temp3 671} 672 673define i32 @sqdmulh_1s(ptr %A, ptr %B) nounwind sanitize_memory { 674; CHECK-LABEL: define i32 @sqdmulh_1s( 675; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { 676; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__msan_param_tls, align 8 677; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 678; CHECK-NEXT: call void @llvm.donothing() 679; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP4]], 0 680; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] 681; CHECK: [[BB3]]: 682; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 683; CHECK-NEXT: unreachable 684; CHECK: [[BB4]]: 685; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 4 686; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 687; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 688; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr 689; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP7]], align 4 690; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP8]], 0 691; CHECK-NEXT: br i1 [[_MSCMP2]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]] 692; CHECK: [[BB8]]: 693; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 694; CHECK-NEXT: unreachable 695; CHECK: [[BB9]]: 696; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[B]], align 4 697; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 698; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 699; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr 700; CHECK-NEXT: [[_MSLD1:%.*]] = load i32, ptr [[TMP12]], align 4 701; CHECK-NEXT: [[_MSPROP:%.*]] = or i32 [[_MSLD]], [[_MSLD1]] 702; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.aarch64.neon.sqdmulh.i32(i32 [[TMP1]], i32 [[TMP2]]) 703; CHECK-NEXT: store i32 [[_MSPROP]], ptr @__msan_retval_tls, align 8 704; CHECK-NEXT: ret i32 [[TMP3]] 705; 706 %temp1 = load i32, ptr %A 707 %temp2 = load i32, ptr %B 708 %temp3 = call i32 @llvm.aarch64.neon.sqdmulh.i32(i32 %temp1, i32 %temp2) 709 ret i32 %temp3 710} 711 712declare <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 713declare <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 714declare <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 715declare <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 716declare i32 @llvm.aarch64.neon.sqdmulh.i32(i32, i32) nounwind readnone 717 718define <4 x i16> @sqrdmulh_4h(ptr %A, ptr %B) nounwind sanitize_memory { 719; CHECK-LABEL: define <4 x i16> @sqrdmulh_4h( 720; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { 721; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__msan_param_tls, align 8 722; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 723; CHECK-NEXT: call void @llvm.donothing() 724; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP4]], 0 725; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] 726; CHECK: [[BB3]]: 727; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 728; CHECK-NEXT: unreachable 729; CHECK: [[BB4]]: 730; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8 731; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 732; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 733; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr 734; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8 735; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP8]], 0 736; CHECK-NEXT: br i1 [[_MSCMP2]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]] 737; CHECK: [[BB8]]: 738; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 739; CHECK-NEXT: unreachable 740; CHECK: [[BB9]]: 741; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr [[B]], align 8 742; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 743; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 744; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr 745; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8 746; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[_MSLD]], [[_MSLD1]] 747; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[TMP1]], <4 x i16> [[TMP2]]) 748; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 749; CHECK-NEXT: ret <4 x i16> [[TMP3]] 750; 751 %temp1 = load <4 x i16>, ptr %A 752 %temp2 = load <4 x i16>, ptr %B 753 %temp3 = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %temp1, <4 x i16> %temp2) 754 ret <4 x i16> %temp3 755} 756 757define <8 x i16> @sqrdmulh_8h(ptr %A, ptr %B) nounwind sanitize_memory { 758; CHECK-LABEL: define <8 x i16> @sqrdmulh_8h( 759; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { 760; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__msan_param_tls, align 8 761; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 762; CHECK-NEXT: call void @llvm.donothing() 763; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP4]], 0 764; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] 765; CHECK: [[BB3]]: 766; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 767; CHECK-NEXT: unreachable 768; CHECK: [[BB4]]: 769; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16 770; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 771; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 772; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr 773; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16 774; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP8]], 0 775; CHECK-NEXT: br i1 [[_MSCMP2]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]] 776; CHECK: [[BB8]]: 777; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 778; CHECK-NEXT: unreachable 779; CHECK: [[BB9]]: 780; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr [[B]], align 16 781; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 782; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 783; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr 784; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16 785; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[_MSLD]], [[_MSLD1]] 786; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) 787; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 788; CHECK-NEXT: ret <8 x i16> [[TMP3]] 789; 790 %temp1 = load <8 x i16>, ptr %A 791 %temp2 = load <8 x i16>, ptr %B 792 %temp3 = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %temp1, <8 x i16> %temp2) 793 ret <8 x i16> %temp3 794} 795 796define <2 x i32> @sqrdmulh_2s(ptr %A, ptr %B) nounwind sanitize_memory { 797; CHECK-LABEL: define <2 x i32> @sqrdmulh_2s( 798; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { 799; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__msan_param_tls, align 8 800; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 801; CHECK-NEXT: call void @llvm.donothing() 802; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP4]], 0 803; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] 804; CHECK: [[BB3]]: 805; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 806; CHECK-NEXT: unreachable 807; CHECK: [[BB4]]: 808; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8 809; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 810; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 811; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr 812; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 813; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP8]], 0 814; CHECK-NEXT: br i1 [[_MSCMP2]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]] 815; CHECK: [[BB8]]: 816; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 817; CHECK-NEXT: unreachable 818; CHECK: [[BB9]]: 819; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[B]], align 8 820; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 821; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 822; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr 823; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8 824; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[_MSLD]], [[_MSLD1]] 825; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]]) 826; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 827; CHECK-NEXT: ret <2 x i32> [[TMP3]] 828; 829 %temp1 = load <2 x i32>, ptr %A 830 %temp2 = load <2 x i32>, ptr %B 831 %temp3 = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %temp1, <2 x i32> %temp2) 832 ret <2 x i32> %temp3 833} 834 835define <4 x i32> @sqrdmulh_4s(ptr %A, ptr %B) nounwind sanitize_memory { 836; CHECK-LABEL: define <4 x i32> @sqrdmulh_4s( 837; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { 838; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__msan_param_tls, align 8 839; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 840; CHECK-NEXT: call void @llvm.donothing() 841; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP4]], 0 842; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] 843; CHECK: [[BB3]]: 844; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 845; CHECK-NEXT: unreachable 846; CHECK: [[BB4]]: 847; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16 848; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 849; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 850; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr 851; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16 852; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP8]], 0 853; CHECK-NEXT: br i1 [[_MSCMP2]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]] 854; CHECK: [[BB8]]: 855; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 856; CHECK-NEXT: unreachable 857; CHECK: [[BB9]]: 858; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[B]], align 16 859; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 860; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 861; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr 862; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16 863; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[_MSLD]], [[_MSLD1]] 864; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) 865; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 866; CHECK-NEXT: ret <4 x i32> [[TMP3]] 867; 868 %temp1 = load <4 x i32>, ptr %A 869 %temp2 = load <4 x i32>, ptr %B 870 %temp3 = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %temp1, <4 x i32> %temp2) 871 ret <4 x i32> %temp3 872} 873 874define i32 @sqrdmulh_1s(ptr %A, ptr %B) nounwind sanitize_memory { 875; CHECK-LABEL: define i32 @sqrdmulh_1s( 876; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { 877; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__msan_param_tls, align 8 878; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 879; CHECK-NEXT: call void @llvm.donothing() 880; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP4]], 0 881; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] 882; CHECK: [[BB3]]: 883; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 884; CHECK-NEXT: unreachable 885; CHECK: [[BB4]]: 886; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 4 887; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 888; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 889; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr 890; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP7]], align 4 891; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP8]], 0 892; CHECK-NEXT: br i1 [[_MSCMP2]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]] 893; CHECK: [[BB8]]: 894; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 895; CHECK-NEXT: unreachable 896; CHECK: [[BB9]]: 897; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[B]], align 4 898; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 899; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 900; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr 901; CHECK-NEXT: [[_MSLD1:%.*]] = load i32, ptr [[TMP12]], align 4 902; CHECK-NEXT: [[_MSPROP:%.*]] = or i32 [[_MSLD]], [[_MSLD1]] 903; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 [[TMP1]], i32 [[TMP2]]) 904; CHECK-NEXT: store i32 [[_MSPROP]], ptr @__msan_retval_tls, align 8 905; CHECK-NEXT: ret i32 [[TMP3]] 906; 907 %temp1 = load i32, ptr %A 908 %temp2 = load i32, ptr %B 909 %temp3 = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %temp1, i32 %temp2) 910 ret i32 %temp3 911} 912 913declare <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16>, <4 x i16>) nounwind readnone 914declare <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16>, <8 x i16>) nounwind readnone 915declare <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32>, <2 x i32>) nounwind readnone 916declare <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32>, <4 x i32>) nounwind readnone 917declare i32 @llvm.aarch64.neon.sqrdmulh.i32(i32, i32) nounwind readnone 918 919define <2 x float> @fmulx_2s(ptr %A, ptr %B) nounwind sanitize_memory { 920; CHECK-LABEL: define <2 x float> @fmulx_2s( 921; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { 922; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__msan_param_tls, align 8 923; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 924; CHECK-NEXT: call void @llvm.donothing() 925; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP4]], 0 926; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] 927; CHECK: [[BB3]]: 928; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 929; CHECK-NEXT: unreachable 930; CHECK: [[BB4]]: 931; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[A]], align 8 932; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 933; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 934; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr 935; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 936; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP8]], 0 937; CHECK-NEXT: br i1 [[_MSCMP3]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]] 938; CHECK: [[BB8]]: 939; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 940; CHECK-NEXT: unreachable 941; CHECK: [[BB9]]: 942; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr [[B]], align 8 943; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 944; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 945; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr 946; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8 947; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[_MSLD]], [[_MSLD1]] 948; CHECK-NEXT: [[_MSPROP2:%.*]] = or <2 x i32> [[_MSPROP]], zeroinitializer 949; CHECK-NEXT: [[TMP3:%.*]] = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> [[TMP1]], <2 x float> [[TMP2]]) 950; CHECK-NEXT: store <2 x i32> [[_MSPROP2]], ptr @__msan_retval_tls, align 8 951; CHECK-NEXT: ret <2 x float> [[TMP3]] 952; 953 %temp1 = load <2 x float>, ptr %A 954 %temp2 = load <2 x float>, ptr %B 955 %temp3 = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %temp1, <2 x float> %temp2) 956 ret <2 x float> %temp3 957} 958 959define <4 x float> @fmulx_4s(ptr %A, ptr %B) nounwind sanitize_memory { 960; CHECK-LABEL: define <4 x float> @fmulx_4s( 961; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { 962; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__msan_param_tls, align 8 963; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 964; CHECK-NEXT: call void @llvm.donothing() 965; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP4]], 0 966; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] 967; CHECK: [[BB3]]: 968; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 969; CHECK-NEXT: unreachable 970; CHECK: [[BB4]]: 971; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[A]], align 16 972; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 973; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 974; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr 975; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16 976; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP8]], 0 977; CHECK-NEXT: br i1 [[_MSCMP3]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]] 978; CHECK: [[BB8]]: 979; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 980; CHECK-NEXT: unreachable 981; CHECK: [[BB9]]: 982; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[B]], align 16 983; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 984; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 985; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr 986; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16 987; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[_MSLD]], [[_MSLD1]] 988; CHECK-NEXT: [[_MSPROP2:%.*]] = or <4 x i32> [[_MSPROP]], zeroinitializer 989; CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> [[TMP1]], <4 x float> [[TMP2]]) 990; CHECK-NEXT: store <4 x i32> [[_MSPROP2]], ptr @__msan_retval_tls, align 8 991; CHECK-NEXT: ret <4 x float> [[TMP3]] 992; 993 %temp1 = load <4 x float>, ptr %A 994 %temp2 = load <4 x float>, ptr %B 995 %temp3 = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %temp1, <4 x float> %temp2) 996 ret <4 x float> %temp3 997} 998 999define <2 x double> @fmulx_2d(ptr %A, ptr %B) nounwind sanitize_memory { 1000; CHECK-LABEL: define <2 x double> @fmulx_2d( 1001; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { 1002; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__msan_param_tls, align 8 1003; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 1004; CHECK-NEXT: call void @llvm.donothing() 1005; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP4]], 0 1006; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]] 1007; CHECK: [[BB3]]: 1008; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 1009; CHECK-NEXT: unreachable 1010; CHECK: [[BB4]]: 1011; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[A]], align 16 1012; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64 1013; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576 1014; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr 1015; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16 1016; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP8]], 0 1017; CHECK-NEXT: br i1 [[_MSCMP3]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]] 1018; CHECK: [[BB8]]: 1019; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 1020; CHECK-NEXT: unreachable 1021; CHECK: [[BB9]]: 1022; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[B]], align 16 1023; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64 1024; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576 1025; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr 1026; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16 1027; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[_MSLD]], [[_MSLD1]] 1028; CHECK-NEXT: [[_MSPROP2:%.*]] = or <2 x i64> [[_MSPROP]], zeroinitializer 1029; CHECK-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> [[TMP1]], <2 x double> [[TMP2]]) 1030; CHECK-NEXT: store <2 x i64> [[_MSPROP2]], ptr @__msan_retval_tls, align 8 1031; CHECK-NEXT: ret <2 x double> [[TMP3]] 1032; 1033 %temp1 = load <2 x double>, ptr %A 1034 %temp2 = load <2 x double>, ptr %B 1035 %temp3 = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %temp1, <2 x double> %temp2) 1036 ret <2 x double> %temp3 1037} 1038 1039declare <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float>, <2 x float>) nounwind readnone 1040declare <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float>, <4 x float>) nounwind readnone 1041declare <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double>, <2 x double>) nounwind readnone 1042 1043define <4 x i32> @smlal4s(ptr %A, ptr %B, ptr %C) nounwind sanitize_memory { 1044; CHECK-LABEL: define <4 x i32> @smlal4s( 1045; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]]) #[[ATTR0]] { 1046; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr @__msan_param_tls, align 8 1047; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 1048; CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 1049; CHECK-NEXT: call void @llvm.donothing() 1050; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP9]], 0 1051; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB4:.*]], label %[[BB5:.*]], !prof [[PROF1]] 1052; CHECK: [[BB4]]: 1053; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 1054; CHECK-NEXT: unreachable 1055; CHECK: [[BB5]]: 1056; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8 1057; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[A]] to i64 1058; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 193514046488576 1059; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr 1060; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP8]], align 8 1061; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i64 [[TMP10]], 0 1062; CHECK-NEXT: br i1 [[_MSCMP5]], label %[[BB9:.*]], label %[[BB10:.*]], !prof [[PROF1]] 1063; CHECK: [[BB9]]: 1064; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 1065; CHECK-NEXT: unreachable 1066; CHECK: [[BB10]]: 1067; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr [[B]], align 8 1068; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[B]] to i64 1069; CHECK-NEXT: [[TMP12:%.*]] = xor i64 [[TMP11]], 193514046488576 1070; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr 1071; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP13]], align 8 1072; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i64 [[TMP14]], 0 1073; CHECK-NEXT: br i1 [[_MSCMP6]], label %[[BB14:.*]], label %[[BB15:.*]], !prof [[PROF1]] 1074; CHECK: [[BB14]]: 1075; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 1076; CHECK-NEXT: unreachable 1077; CHECK: [[BB15]]: 1078; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[C]], align 16 1079; CHECK-NEXT: [[TMP16:%.*]] = ptrtoint ptr [[C]] to i64 1080; CHECK-NEXT: [[TMP17:%.*]] = xor i64 [[TMP16]], 193514046488576 1081; CHECK-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr 1082; CHECK-NEXT: [[_MSLD2:%.*]] = load <4 x i32>, ptr [[TMP18]], align 16 1083; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[_MSLD]], [[_MSLD1]] 1084; CHECK-NEXT: [[_MSPROP3:%.*]] = or <4 x i16> [[_MSPROP]], zeroinitializer 1085; CHECK-NEXT: [[TMP19:%.*]] = zext <4 x i16> [[_MSPROP3]] to <4 x i32> 1086; CHECK-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[TMP1]], <4 x i16> [[TMP2]]) 1087; CHECK-NEXT: [[_MSPROP4:%.*]] = or <4 x i32> [[_MSLD2]], [[TMP19]] 1088; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP3]], [[TMP4]] 1089; CHECK-NEXT: store <4 x i32> [[_MSPROP4]], ptr @__msan_retval_tls, align 8 1090; CHECK-NEXT: ret <4 x i32> [[TMP5]] 1091; 1092 %temp1 = load <4 x i16>, ptr %A 1093 %temp2 = load <4 x i16>, ptr %B 1094 %temp3 = load <4 x i32>, ptr %C 1095 %temp4 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %temp1, <4 x i16> %temp2) 1096 %temp5 = add <4 x i32> %temp3, %temp4 1097 ret <4 x i32> %temp5 1098} 1099 1100define <2 x i64> @smlal2d(ptr %A, ptr %B, ptr %C) nounwind sanitize_memory { 1101; CHECK-LABEL: define <2 x i64> @smlal2d( 1102; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]]) #[[ATTR0]] { 1103; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr @__msan_param_tls, align 8 1104; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 1105; CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 1106; CHECK-NEXT: call void @llvm.donothing() 1107; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP9]], 0 1108; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB4:.*]], label %[[BB5:.*]], !prof [[PROF1]] 1109; CHECK: [[BB4]]: 1110; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 1111; CHECK-NEXT: unreachable 1112; CHECK: [[BB5]]: 1113; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8 1114; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[A]] to i64 1115; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 193514046488576 1116; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr 1117; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP8]], align 8 1118; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i64 [[TMP10]], 0 1119; CHECK-NEXT: br i1 [[_MSCMP5]], label %[[BB9:.*]], label %[[BB10:.*]], !prof [[PROF1]] 1120; CHECK: [[BB9]]: 1121; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 1122; CHECK-NEXT: unreachable 1123; CHECK: [[BB10]]: 1124; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[B]], align 8 1125; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[B]] to i64 1126; CHECK-NEXT: [[TMP12:%.*]] = xor i64 [[TMP11]], 193514046488576 1127; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr 1128; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP13]], align 8 1129; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i64 [[TMP14]], 0 1130; CHECK-NEXT: br i1 [[_MSCMP6]], label %[[BB14:.*]], label %[[BB15:.*]], !prof [[PROF1]] 1131; CHECK: [[BB14]]: 1132; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 1133; CHECK-NEXT: unreachable 1134; CHECK: [[BB15]]: 1135; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr [[C]], align 16 1136; CHECK-NEXT: [[TMP16:%.*]] = ptrtoint ptr [[C]] to i64 1137; CHECK-NEXT: [[TMP17:%.*]] = xor i64 [[TMP16]], 193514046488576 1138; CHECK-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr 1139; CHECK-NEXT: [[_MSLD2:%.*]] = load <2 x i64>, ptr [[TMP18]], align 16 1140; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[_MSLD]], [[_MSLD1]] 1141; CHECK-NEXT: [[_MSPROP3:%.*]] = or <2 x i32> [[_MSPROP]], zeroinitializer 1142; CHECK-NEXT: [[TMP19:%.*]] = zext <2 x i32> [[_MSPROP3]] to <2 x i64> 1143; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]]) 1144; CHECK-NEXT: [[_MSPROP4:%.*]] = or <2 x i64> [[_MSLD2]], [[TMP19]] 1145; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP3]], [[TMP4]] 1146; CHECK-NEXT: store <2 x i64> [[_MSPROP4]], ptr @__msan_retval_tls, align 8 1147; CHECK-NEXT: ret <2 x i64> [[TMP5]] 1148; 1149 %temp1 = load <2 x i32>, ptr %A 1150 %temp2 = load <2 x i32>, ptr %B 1151 %temp3 = load <2 x i64>, ptr %C 1152 %temp4 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %temp1, <2 x i32> %temp2) 1153 %temp5 = add <2 x i64> %temp3, %temp4 1154 ret <2 x i64> %temp5 1155} 1156 1157define void @smlal8h_chain_with_constant(ptr %dst, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) { 1158; CHECK-LABEL: define void @smlal8h_chain_with_constant( 1159; CHECK-SAME: ptr [[DST:%.*]], <8 x i8> [[V1:%.*]], <8 x i8> [[V2:%.*]], <8 x i8> [[V3:%.*]]) { 1160; CHECK-NEXT: call void @llvm.donothing() 1161; CHECK-NEXT: [[XOR:%.*]] = xor <8 x i8> [[V3]], splat (i8 -1) 1162; CHECK-NEXT: [[SMULL_1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[V1]], <8 x i8> [[V3]]) 1163; CHECK-NEXT: [[ADD_1:%.*]] = add <8 x i16> [[SMULL_1]], splat (i16 257) 1164; CHECK-NEXT: [[SMULL_2:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[V2]], <8 x i8> [[XOR]]) 1165; CHECK-NEXT: [[ADD_2:%.*]] = add <8 x i16> [[ADD_1]], [[SMULL_2]] 1166; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[DST]] to i64 1167; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576 1168; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr 1169; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr [[TMP3]], align 16 1170; CHECK-NEXT: store <8 x i16> [[ADD_2]], ptr [[DST]], align 16 1171; CHECK-NEXT: ret void 1172; 1173 %xor = xor <8 x i8> %v3, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 1174 %smull.1 = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %v1, <8 x i8> %v3) 1175 %add.1 = add <8 x i16> %smull.1, <i16 257, i16 257, i16 257, i16 257, i16 257, i16 257, i16 257, i16 257> 1176 %smull.2 = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %v2, <8 x i8> %xor) 1177 %add.2 = add <8 x i16> %add.1, %smull.2 1178 store <8 x i16> %add.2, ptr %dst 1179 ret void 1180} 1181 1182define void @smlal2d_chain_with_constant(ptr %dst, <2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) { 1183; CHECK-LABEL: define void @smlal2d_chain_with_constant( 1184; CHECK-SAME: ptr [[DST:%.*]], <2 x i32> [[V1:%.*]], <2 x i32> [[V2:%.*]], <2 x i32> [[V3:%.*]]) { 1185; CHECK-NEXT: call void @llvm.donothing() 1186; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i32> [[V3]], splat (i32 -1) 1187; CHECK-NEXT: [[SMULL_1:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[V1]], <2 x i32> [[V3]]) 1188; CHECK-NEXT: [[ADD_1:%.*]] = add <2 x i64> [[SMULL_1]], splat (i64 257) 1189; CHECK-NEXT: [[SMULL_2:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[V2]], <2 x i32> [[XOR]]) 1190; CHECK-NEXT: [[ADD_2:%.*]] = add <2 x i64> [[ADD_1]], [[SMULL_2]] 1191; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[DST]] to i64 1192; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576 1193; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr 1194; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr [[TMP3]], align 16 1195; CHECK-NEXT: store <2 x i64> [[ADD_2]], ptr [[DST]], align 16 1196; CHECK-NEXT: ret void 1197; 1198 %xor = xor <2 x i32> %v3, <i32 -1, i32 -1> 1199 %smull.1 = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %v1, <2 x i32> %v3) 1200 %add.1 = add <2 x i64> %smull.1, <i64 257, i64 257> 1201 %smull.2 = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %v2, <2 x i32> %xor) 1202 %add.2 = add <2 x i64> %add.1, %smull.2 1203 store <2 x i64> %add.2, ptr %dst 1204 ret void 1205} 1206 1207define <4 x i32> @smlsl4s(ptr %A, ptr %B, ptr %C) nounwind sanitize_memory { 1208; CHECK-LABEL: define <4 x i32> @smlsl4s( 1209; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]]) #[[ATTR0]] { 1210; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr @__msan_param_tls, align 8 1211; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 1212; CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 1213; CHECK-NEXT: call void @llvm.donothing() 1214; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP9]], 0 1215; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB4:.*]], label %[[BB5:.*]], !prof [[PROF1]] 1216; CHECK: [[BB4]]: 1217; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 1218; CHECK-NEXT: unreachable 1219; CHECK: [[BB5]]: 1220; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8 1221; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[A]] to i64 1222; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 193514046488576 1223; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr 1224; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP8]], align 8 1225; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i64 [[TMP10]], 0 1226; CHECK-NEXT: br i1 [[_MSCMP5]], label %[[BB9:.*]], label %[[BB10:.*]], !prof [[PROF1]] 1227; CHECK: [[BB9]]: 1228; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 1229; CHECK-NEXT: unreachable 1230; CHECK: [[BB10]]: 1231; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr [[B]], align 8 1232; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[B]] to i64 1233; CHECK-NEXT: [[TMP12:%.*]] = xor i64 [[TMP11]], 193514046488576 1234; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr 1235; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP13]], align 8 1236; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i64 [[TMP14]], 0 1237; CHECK-NEXT: br i1 [[_MSCMP6]], label %[[BB14:.*]], label %[[BB15:.*]], !prof [[PROF1]] 1238; CHECK: [[BB14]]: 1239; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 1240; CHECK-NEXT: unreachable 1241; CHECK: [[BB15]]: 1242; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[C]], align 16 1243; CHECK-NEXT: [[TMP16:%.*]] = ptrtoint ptr [[C]] to i64 1244; CHECK-NEXT: [[TMP17:%.*]] = xor i64 [[TMP16]], 193514046488576 1245; CHECK-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr 1246; CHECK-NEXT: [[_MSLD2:%.*]] = load <4 x i32>, ptr [[TMP18]], align 16 1247; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[_MSLD]], [[_MSLD1]] 1248; CHECK-NEXT: [[_MSPROP3:%.*]] = or <4 x i16> [[_MSPROP]], zeroinitializer 1249; CHECK-NEXT: [[TMP19:%.*]] = zext <4 x i16> [[_MSPROP3]] to <4 x i32> 1250; CHECK-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[TMP1]], <4 x i16> [[TMP2]]) 1251; CHECK-NEXT: [[_MSPROP4:%.*]] = or <4 x i32> [[_MSLD2]], [[TMP19]] 1252; CHECK-NEXT: [[TMP5:%.*]] = sub <4 x i32> [[TMP3]], [[TMP4]] 1253; CHECK-NEXT: store <4 x i32> [[_MSPROP4]], ptr @__msan_retval_tls, align 8 1254; CHECK-NEXT: ret <4 x i32> [[TMP5]] 1255; 1256 %temp1 = load <4 x i16>, ptr %A 1257 %temp2 = load <4 x i16>, ptr %B 1258 %temp3 = load <4 x i32>, ptr %C 1259 %temp4 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %temp1, <4 x i16> %temp2) 1260 %temp5 = sub <4 x i32> %temp3, %temp4 1261 ret <4 x i32> %temp5 1262} 1263 1264define <2 x i64> @smlsl2d(ptr %A, ptr %B, ptr %C) nounwind sanitize_memory { 1265; CHECK-LABEL: define <2 x i64> @smlsl2d( 1266; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]]) #[[ATTR0]] { 1267; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr @__msan_param_tls, align 8 1268; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 1269; CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 1270; CHECK-NEXT: call void @llvm.donothing() 1271; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP9]], 0 1272; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB4:.*]], label %[[BB5:.*]], !prof [[PROF1]] 1273; CHECK: [[BB4]]: 1274; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 1275; CHECK-NEXT: unreachable 1276; CHECK: [[BB5]]: 1277; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8 1278; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[A]] to i64 1279; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 193514046488576 1280; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr 1281; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP8]], align 8 1282; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i64 [[TMP10]], 0 1283; CHECK-NEXT: br i1 [[_MSCMP5]], label %[[BB9:.*]], label %[[BB10:.*]], !prof [[PROF1]] 1284; CHECK: [[BB9]]: 1285; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 1286; CHECK-NEXT: unreachable 1287; CHECK: [[BB10]]: 1288; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[B]], align 8 1289; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[B]] to i64 1290; CHECK-NEXT: [[TMP12:%.*]] = xor i64 [[TMP11]], 193514046488576 1291; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr 1292; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP13]], align 8 1293; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i64 [[TMP14]], 0 1294; CHECK-NEXT: br i1 [[_MSCMP6]], label %[[BB14:.*]], label %[[BB15:.*]], !prof [[PROF1]] 1295; CHECK: [[BB14]]: 1296; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 1297; CHECK-NEXT: unreachable 1298; CHECK: [[BB15]]: 1299; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr [[C]], align 16 1300; CHECK-NEXT: [[TMP16:%.*]] = ptrtoint ptr [[C]] to i64 1301; CHECK-NEXT: [[TMP17:%.*]] = xor i64 [[TMP16]], 193514046488576 1302; CHECK-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr 1303; CHECK-NEXT: [[_MSLD2:%.*]] = load <2 x i64>, ptr [[TMP18]], align 16 1304; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[_MSLD]], [[_MSLD1]] 1305; CHECK-NEXT: [[_MSPROP3:%.*]] = or <2 x i32> [[_MSPROP]], zeroinitializer 1306; CHECK-NEXT: [[TMP19:%.*]] = zext <2 x i32> [[_MSPROP3]] to <2 x i64> 1307; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]]) 1308; CHECK-NEXT: [[_MSPROP4:%.*]] = or <2 x i64> [[_MSLD2]], [[TMP19]] 1309; CHECK-NEXT: [[TMP5:%.*]] = sub <2 x i64> [[TMP3]], [[TMP4]] 1310; CHECK-NEXT: store <2 x i64> [[_MSPROP4]], ptr @__msan_retval_tls, align 8 1311; CHECK-NEXT: ret <2 x i64> [[TMP5]] 1312; 1313 %temp1 = load <2 x i32>, ptr %A 1314 %temp2 = load <2 x i32>, ptr %B 1315 %temp3 = load <2 x i64>, ptr %C 1316 %temp4 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %temp1, <2 x i32> %temp2) 1317 %temp5 = sub <2 x i64> %temp3, %temp4 1318 ret <2 x i64> %temp5 1319} 1320 1321define void @smlsl8h_chain_with_constant(ptr %dst, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) { 1322; CHECK-LABEL: define void @smlsl8h_chain_with_constant( 1323; CHECK-SAME: ptr [[DST:%.*]], <8 x i8> [[V1:%.*]], <8 x i8> [[V2:%.*]], <8 x i8> [[V3:%.*]]) { 1324; CHECK-NEXT: call void @llvm.donothing() 1325; CHECK-NEXT: [[XOR:%.*]] = xor <8 x i8> [[V3]], splat (i8 -1) 1326; CHECK-NEXT: [[SMULL_1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[V1]], <8 x i8> [[V3]]) 1327; CHECK-NEXT: [[SUB_1:%.*]] = sub <8 x i16> splat (i16 257), [[SMULL_1]] 1328; CHECK-NEXT: [[SMULL_2:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[V2]], <8 x i8> [[XOR]]) 1329; CHECK-NEXT: [[SUB_2:%.*]] = sub <8 x i16> [[SUB_1]], [[SMULL_2]] 1330; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[DST]] to i64 1331; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576 1332; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr 1333; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr [[TMP3]], align 16 1334; CHECK-NEXT: store <8 x i16> [[SUB_2]], ptr [[DST]], align 16 1335; CHECK-NEXT: ret void 1336; 1337 %xor = xor <8 x i8> %v3, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 1338 %smull.1 = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %v1, <8 x i8> %v3) 1339 %sub.1 = sub <8 x i16> <i16 257, i16 257, i16 257, i16 257, i16 257, i16 257, i16 257, i16 257>, %smull.1 1340 %smull.2 = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %v2, <8 x i8> %xor) 1341 %sub.2 = sub <8 x i16> %sub.1, %smull.2 1342 store <8 x i16> %sub.2, ptr %dst 1343 ret void 1344} 1345 1346define void @smlsl2d_chain_with_constant(ptr %dst, <2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) { 1347; CHECK-LABEL: define void @smlsl2d_chain_with_constant( 1348; CHECK-SAME: ptr [[DST:%.*]], <2 x i32> [[V1:%.*]], <2 x i32> [[V2:%.*]], <2 x i32> [[V3:%.*]]) { 1349; CHECK-NEXT: call void @llvm.donothing() 1350; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i32> [[V3]], splat (i32 -1) 1351; CHECK-NEXT: [[SMULL_1:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[V1]], <2 x i32> [[V3]]) 1352; CHECK-NEXT: [[SUB_1:%.*]] = sub <2 x i64> splat (i64 257), [[SMULL_1]] 1353; CHECK-NEXT: [[SMULL_2:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[V2]], <2 x i32> [[XOR]]) 1354; CHECK-NEXT: [[SUB_2:%.*]] = sub <2 x i64> [[SUB_1]], [[SMULL_2]] 1355; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[DST]] to i64 1356; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576 1357; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr 1358; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr [[TMP3]], align 16 1359; CHECK-NEXT: store <2 x i64> [[SUB_2]], ptr [[DST]], align 16 1360; CHECK-NEXT: ret void 1361; 1362 %xor = xor <2 x i32> %v3, <i32 -1, i32 -1> 1363 %smull.1 = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %v1, <2 x i32> %v3) 1364 %sub.1 = sub <2 x i64> <i64 257, i64 257>, %smull.1 1365 %smull.2 = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %v2, <2 x i32> %xor) 1366 %sub.2 = sub <2 x i64> %sub.1, %smull.2 1367 store <2 x i64> %sub.2, ptr %dst 1368 ret void 1369} 1370 1371declare <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32>, <4 x i32>) 1372declare <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64>, <2 x i64>) 1373declare <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32>, <4 x i32>) 1374declare <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64>, <2 x i64>) 1375 1376define <4 x i32> @sqdmlal4s(ptr %A, ptr %B, ptr %C) nounwind sanitize_memory { 1377; CHECK-LABEL: define <4 x i32> @sqdmlal4s( 1378; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]]) #[[ATTR0]] { 1379; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr @__msan_param_tls, align 8 1380; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 1381; CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 1382; CHECK-NEXT: call void @llvm.donothing() 1383; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP9]], 0 1384; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB4:.*]], label %[[BB5:.*]], !prof [[PROF1]] 1385; CHECK: [[BB4]]: 1386; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 1387; CHECK-NEXT: unreachable 1388; CHECK: [[BB5]]: 1389; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8 1390; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[A]] to i64 1391; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 193514046488576 1392; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr 1393; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP8]], align 8 1394; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP10]], 0 1395; CHECK-NEXT: br i1 [[_MSCMP3]], label %[[BB9:.*]], label %[[BB10:.*]], !prof [[PROF1]] 1396; CHECK: [[BB9]]: 1397; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 1398; CHECK-NEXT: unreachable 1399; CHECK: [[BB10]]: 1400; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr [[B]], align 8 1401; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[B]] to i64 1402; CHECK-NEXT: [[TMP12:%.*]] = xor i64 [[TMP11]], 193514046488576 1403; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr 1404; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP13]], align 8 1405; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[TMP14]], 0 1406; CHECK-NEXT: br i1 [[_MSCMP4]], label %[[BB14:.*]], label %[[BB15:.*]], !prof [[PROF1]] 1407; CHECK: [[BB14]]: 1408; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 1409; CHECK-NEXT: unreachable 1410; CHECK: [[BB15]]: 1411; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[C]], align 16 1412; CHECK-NEXT: [[TMP16:%.*]] = ptrtoint ptr [[C]] to i64 1413; CHECK-NEXT: [[TMP17:%.*]] = xor i64 [[TMP16]], 193514046488576 1414; CHECK-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr 1415; CHECK-NEXT: [[_MSLD2:%.*]] = load <4 x i32>, ptr [[TMP18]], align 16 1416; CHECK-NEXT: [[TMP19:%.*]] = bitcast <4 x i16> [[_MSLD]] to i64 1417; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i64 [[TMP19]], 0 1418; CHECK-NEXT: [[TMP20:%.*]] = bitcast <4 x i16> [[_MSLD1]] to i64 1419; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i64 [[TMP20]], 0 1420; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP5]], [[_MSCMP6]] 1421; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB21:.*]], label %[[BB22:.*]], !prof [[PROF1]] 1422; CHECK: [[BB21]]: 1423; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 1424; CHECK-NEXT: unreachable 1425; CHECK: [[BB22]]: 1426; CHECK-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP1]], <4 x i16> [[TMP2]]) 1427; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[_MSLD2]], zeroinitializer 1428; CHECK-NEXT: [[TMP5:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[TMP3]], <4 x i32> [[TMP4]]) 1429; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 1430; CHECK-NEXT: ret <4 x i32> [[TMP5]] 1431; 1432 %temp1 = load <4 x i16>, ptr %A 1433 %temp2 = load <4 x i16>, ptr %B 1434 %temp3 = load <4 x i32>, ptr %C 1435 %temp4 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %temp1, <4 x i16> %temp2) 1436 %temp5 = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %temp3, <4 x i32> %temp4) 1437 ret <4 x i32> %temp5 1438} 1439 1440define <2 x i64> @sqdmlal2d(ptr %A, ptr %B, ptr %C) nounwind sanitize_memory { 1441; CHECK-LABEL: define <2 x i64> @sqdmlal2d( 1442; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]]) #[[ATTR0]] { 1443; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr @__msan_param_tls, align 8 1444; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 1445; CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 1446; CHECK-NEXT: call void @llvm.donothing() 1447; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP9]], 0 1448; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB4:.*]], label %[[BB5:.*]], !prof [[PROF1]] 1449; CHECK: [[BB4]]: 1450; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 1451; CHECK-NEXT: unreachable 1452; CHECK: [[BB5]]: 1453; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8 1454; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[A]] to i64 1455; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 193514046488576 1456; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr 1457; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP8]], align 8 1458; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP10]], 0 1459; CHECK-NEXT: br i1 [[_MSCMP3]], label %[[BB9:.*]], label %[[BB10:.*]], !prof [[PROF1]] 1460; CHECK: [[BB9]]: 1461; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 1462; CHECK-NEXT: unreachable 1463; CHECK: [[BB10]]: 1464; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[B]], align 8 1465; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[B]] to i64 1466; CHECK-NEXT: [[TMP12:%.*]] = xor i64 [[TMP11]], 193514046488576 1467; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr 1468; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP13]], align 8 1469; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[TMP14]], 0 1470; CHECK-NEXT: br i1 [[_MSCMP4]], label %[[BB14:.*]], label %[[BB15:.*]], !prof [[PROF1]] 1471; CHECK: [[BB14]]: 1472; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 1473; CHECK-NEXT: unreachable 1474; CHECK: [[BB15]]: 1475; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr [[C]], align 16 1476; CHECK-NEXT: [[TMP16:%.*]] = ptrtoint ptr [[C]] to i64 1477; CHECK-NEXT: [[TMP17:%.*]] = xor i64 [[TMP16]], 193514046488576 1478; CHECK-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr 1479; CHECK-NEXT: [[_MSLD2:%.*]] = load <2 x i64>, ptr [[TMP18]], align 16 1480; CHECK-NEXT: [[TMP19:%.*]] = bitcast <2 x i32> [[_MSLD]] to i64 1481; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i64 [[TMP19]], 0 1482; CHECK-NEXT: [[TMP20:%.*]] = bitcast <2 x i32> [[_MSLD1]] to i64 1483; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i64 [[TMP20]], 0 1484; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP5]], [[_MSCMP6]] 1485; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB21:.*]], label %[[BB22:.*]], !prof [[PROF1]] 1486; CHECK: [[BB21]]: 1487; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 1488; CHECK-NEXT: unreachable 1489; CHECK: [[BB22]]: 1490; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]]) 1491; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[_MSLD2]], zeroinitializer 1492; CHECK-NEXT: [[TMP5:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[TMP3]], <2 x i64> [[TMP4]]) 1493; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 1494; CHECK-NEXT: ret <2 x i64> [[TMP5]] 1495; 1496 %temp1 = load <2 x i32>, ptr %A 1497 %temp2 = load <2 x i32>, ptr %B 1498 %temp3 = load <2 x i64>, ptr %C 1499 %temp4 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %temp1, <2 x i32> %temp2) 1500 %temp5 = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %temp3, <2 x i64> %temp4) 1501 ret <2 x i64> %temp5 1502} 1503 1504define <4 x i32> @sqdmlal2_4s(ptr %A, ptr %B, ptr %C) nounwind sanitize_memory { 1505; CHECK-LABEL: define <4 x i32> @sqdmlal2_4s( 1506; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]]) #[[ATTR0]] { 1507; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr @__msan_param_tls, align 8 1508; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 1509; CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 1510; CHECK-NEXT: call void @llvm.donothing() 1511; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP9]], 0 1512; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB4:.*]], label %[[BB5:.*]], !prof [[PROF1]] 1513; CHECK: [[BB4]]: 1514; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 1515; CHECK-NEXT: unreachable 1516; CHECK: [[BB5]]: 1517; CHECK-NEXT: [[LOAD1:%.*]] = load <8 x i16>, ptr [[A]], align 16 1518; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[A]] to i64 1519; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 193514046488576 1520; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr 1521; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP8]], align 16 1522; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i64 [[TMP10]], 0 1523; CHECK-NEXT: br i1 [[_MSCMP5]], label %[[BB9:.*]], label %[[BB10:.*]], !prof [[PROF1]] 1524; CHECK: [[BB9]]: 1525; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 1526; CHECK-NEXT: unreachable 1527; CHECK: [[BB10]]: 1528; CHECK-NEXT: [[LOAD2:%.*]] = load <8 x i16>, ptr [[B]], align 16 1529; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[B]] to i64 1530; CHECK-NEXT: [[TMP12:%.*]] = xor i64 [[TMP11]], 193514046488576 1531; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr 1532; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP13]], align 16 1533; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i64 [[TMP14]], 0 1534; CHECK-NEXT: br i1 [[_MSCMP6]], label %[[BB14:.*]], label %[[BB15:.*]], !prof [[PROF1]] 1535; CHECK: [[BB14]]: 1536; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 1537; CHECK-NEXT: unreachable 1538; CHECK: [[BB15]]: 1539; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[C]], align 16 1540; CHECK-NEXT: [[TMP16:%.*]] = ptrtoint ptr [[C]] to i64 1541; CHECK-NEXT: [[TMP17:%.*]] = xor i64 [[TMP16]], 193514046488576 1542; CHECK-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr 1543; CHECK-NEXT: [[_MSLD2:%.*]] = load <4 x i32>, ptr [[TMP18]], align 16 1544; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i16> [[_MSLD]], <8 x i16> splat (i16 -1), <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1545; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[LOAD1]], <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1546; CHECK-NEXT: [[_MSPROP3:%.*]] = shufflevector <8 x i16> [[_MSLD1]], <8 x i16> splat (i16 -1), <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1547; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[LOAD2]], <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1548; CHECK-NEXT: [[TMP19:%.*]] = bitcast <4 x i16> [[_MSPROP]] to i64 1549; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i64 [[TMP19]], 0 1550; CHECK-NEXT: [[TMP20:%.*]] = bitcast <4 x i16> [[_MSPROP3]] to i64 1551; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i64 [[TMP20]], 0 1552; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP7]], [[_MSCMP8]] 1553; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB21:.*]], label %[[BB22:.*]], !prof [[PROF1]] 1554; CHECK: [[BB21]]: 1555; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 1556; CHECK-NEXT: unreachable 1557; CHECK: [[BB22]]: 1558; CHECK-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP1]], <4 x i16> [[TMP2]]) 1559; CHECK-NEXT: [[_MSPROP4:%.*]] = or <4 x i32> [[_MSLD2]], zeroinitializer 1560; CHECK-NEXT: [[TMP5:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[TMP3]], <4 x i32> [[TMP4]]) 1561; CHECK-NEXT: store <4 x i32> [[_MSPROP4]], ptr @__msan_retval_tls, align 8 1562; CHECK-NEXT: ret <4 x i32> [[TMP5]] 1563; 1564 %load1 = load <8 x i16>, ptr %A 1565 %load2 = load <8 x i16>, ptr %B 1566 %temp3 = load <4 x i32>, ptr %C 1567 %temp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1568 %temp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1569 %temp4 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %temp1, <4 x i16> %temp2) 1570 %temp5 = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %temp3, <4 x i32> %temp4) 1571 ret <4 x i32> %temp5 1572} 1573 1574define <2 x i64> @sqdmlal2_2d(ptr %A, ptr %B, ptr %C) nounwind sanitize_memory { 1575; CHECK-LABEL: define <2 x i64> @sqdmlal2_2d( 1576; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]]) #[[ATTR0]] { 1577; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr @__msan_param_tls, align 8 1578; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 1579; CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 1580; CHECK-NEXT: call void @llvm.donothing() 1581; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP9]], 0 1582; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB4:.*]], label %[[BB5:.*]], !prof [[PROF1]] 1583; CHECK: [[BB4]]: 1584; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 1585; CHECK-NEXT: unreachable 1586; CHECK: [[BB5]]: 1587; CHECK-NEXT: [[LOAD1:%.*]] = load <4 x i32>, ptr [[A]], align 16 1588; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[A]] to i64 1589; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 193514046488576 1590; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr 1591; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP8]], align 16 1592; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i64 [[TMP10]], 0 1593; CHECK-NEXT: br i1 [[_MSCMP5]], label %[[BB9:.*]], label %[[BB10:.*]], !prof [[PROF1]] 1594; CHECK: [[BB9]]: 1595; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 1596; CHECK-NEXT: unreachable 1597; CHECK: [[BB10]]: 1598; CHECK-NEXT: [[LOAD2:%.*]] = load <4 x i32>, ptr [[B]], align 16 1599; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[B]] to i64 1600; CHECK-NEXT: [[TMP12:%.*]] = xor i64 [[TMP11]], 193514046488576 1601; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr 1602; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP13]], align 16 1603; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i64 [[TMP14]], 0 1604; CHECK-NEXT: br i1 [[_MSCMP6]], label %[[BB14:.*]], label %[[BB15:.*]], !prof [[PROF1]] 1605; CHECK: [[BB14]]: 1606; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 1607; CHECK-NEXT: unreachable 1608; CHECK: [[BB15]]: 1609; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr [[C]], align 16 1610; CHECK-NEXT: [[TMP16:%.*]] = ptrtoint ptr [[C]] to i64 1611; CHECK-NEXT: [[TMP17:%.*]] = xor i64 [[TMP16]], 193514046488576 1612; CHECK-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr 1613; CHECK-NEXT: [[_MSLD2:%.*]] = load <2 x i64>, ptr [[TMP18]], align 16 1614; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> splat (i32 -1), <2 x i32> <i32 2, i32 3> 1615; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[LOAD1]], <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1616; CHECK-NEXT: [[_MSPROP3:%.*]] = shufflevector <4 x i32> [[_MSLD1]], <4 x i32> splat (i32 -1), <2 x i32> <i32 2, i32 3> 1617; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[LOAD2]], <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1618; CHECK-NEXT: [[TMP19:%.*]] = bitcast <2 x i32> [[_MSPROP]] to i64 1619; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i64 [[TMP19]], 0 1620; CHECK-NEXT: [[TMP20:%.*]] = bitcast <2 x i32> [[_MSPROP3]] to i64 1621; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i64 [[TMP20]], 0 1622; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP7]], [[_MSCMP8]] 1623; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB21:.*]], label %[[BB22:.*]], !prof [[PROF1]] 1624; CHECK: [[BB21]]: 1625; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 1626; CHECK-NEXT: unreachable 1627; CHECK: [[BB22]]: 1628; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]]) 1629; CHECK-NEXT: [[_MSPROP4:%.*]] = or <2 x i64> [[_MSLD2]], zeroinitializer 1630; CHECK-NEXT: [[TMP5:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[TMP3]], <2 x i64> [[TMP4]]) 1631; CHECK-NEXT: store <2 x i64> [[_MSPROP4]], ptr @__msan_retval_tls, align 8 1632; CHECK-NEXT: ret <2 x i64> [[TMP5]] 1633; 1634 %load1 = load <4 x i32>, ptr %A 1635 %load2 = load <4 x i32>, ptr %B 1636 %temp3 = load <2 x i64>, ptr %C 1637 %temp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1638 %temp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1639 %temp4 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %temp1, <2 x i32> %temp2) 1640 %temp5 = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %temp3, <2 x i64> %temp4) 1641 ret <2 x i64> %temp5 1642} 1643 1644define <4 x i32> @sqdmlsl4s(ptr %A, ptr %B, ptr %C) nounwind sanitize_memory { 1645; CHECK-LABEL: define <4 x i32> @sqdmlsl4s( 1646; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]]) #[[ATTR0]] { 1647; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr @__msan_param_tls, align 8 1648; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 1649; CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 1650; CHECK-NEXT: call void @llvm.donothing() 1651; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP9]], 0 1652; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB4:.*]], label %[[BB5:.*]], !prof [[PROF1]] 1653; CHECK: [[BB4]]: 1654; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 1655; CHECK-NEXT: unreachable 1656; CHECK: [[BB5]]: 1657; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8 1658; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[A]] to i64 1659; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 193514046488576 1660; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr 1661; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP8]], align 8 1662; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP10]], 0 1663; CHECK-NEXT: br i1 [[_MSCMP3]], label %[[BB9:.*]], label %[[BB10:.*]], !prof [[PROF1]] 1664; CHECK: [[BB9]]: 1665; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 1666; CHECK-NEXT: unreachable 1667; CHECK: [[BB10]]: 1668; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr [[B]], align 8 1669; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[B]] to i64 1670; CHECK-NEXT: [[TMP12:%.*]] = xor i64 [[TMP11]], 193514046488576 1671; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr 1672; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP13]], align 8 1673; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[TMP14]], 0 1674; CHECK-NEXT: br i1 [[_MSCMP4]], label %[[BB14:.*]], label %[[BB15:.*]], !prof [[PROF1]] 1675; CHECK: [[BB14]]: 1676; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 1677; CHECK-NEXT: unreachable 1678; CHECK: [[BB15]]: 1679; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[C]], align 16 1680; CHECK-NEXT: [[TMP16:%.*]] = ptrtoint ptr [[C]] to i64 1681; CHECK-NEXT: [[TMP17:%.*]] = xor i64 [[TMP16]], 193514046488576 1682; CHECK-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr 1683; CHECK-NEXT: [[_MSLD2:%.*]] = load <4 x i32>, ptr [[TMP18]], align 16 1684; CHECK-NEXT: [[TMP19:%.*]] = bitcast <4 x i16> [[_MSLD]] to i64 1685; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i64 [[TMP19]], 0 1686; CHECK-NEXT: [[TMP20:%.*]] = bitcast <4 x i16> [[_MSLD1]] to i64 1687; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i64 [[TMP20]], 0 1688; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP5]], [[_MSCMP6]] 1689; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB21:.*]], label %[[BB22:.*]], !prof [[PROF1]] 1690; CHECK: [[BB21]]: 1691; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 1692; CHECK-NEXT: unreachable 1693; CHECK: [[BB22]]: 1694; CHECK-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP1]], <4 x i16> [[TMP2]]) 1695; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[_MSLD2]], zeroinitializer 1696; CHECK-NEXT: [[TMP5:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[TMP3]], <4 x i32> [[TMP4]]) 1697; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8 1698; CHECK-NEXT: ret <4 x i32> [[TMP5]] 1699; 1700 %temp1 = load <4 x i16>, ptr %A 1701 %temp2 = load <4 x i16>, ptr %B 1702 %temp3 = load <4 x i32>, ptr %C 1703 %temp4 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %temp1, <4 x i16> %temp2) 1704 %temp5 = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %temp3, <4 x i32> %temp4) 1705 ret <4 x i32> %temp5 1706} 1707 1708define <2 x i64> @sqdmlsl2d(ptr %A, ptr %B, ptr %C) nounwind sanitize_memory { 1709; CHECK-LABEL: define <2 x i64> @sqdmlsl2d( 1710; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]]) #[[ATTR0]] { 1711; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr @__msan_param_tls, align 8 1712; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 1713; CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 1714; CHECK-NEXT: call void @llvm.donothing() 1715; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP9]], 0 1716; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB4:.*]], label %[[BB5:.*]], !prof [[PROF1]] 1717; CHECK: [[BB4]]: 1718; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 1719; CHECK-NEXT: unreachable 1720; CHECK: [[BB5]]: 1721; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8 1722; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[A]] to i64 1723; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 193514046488576 1724; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr 1725; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP8]], align 8 1726; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP10]], 0 1727; CHECK-NEXT: br i1 [[_MSCMP3]], label %[[BB9:.*]], label %[[BB10:.*]], !prof [[PROF1]] 1728; CHECK: [[BB9]]: 1729; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 1730; CHECK-NEXT: unreachable 1731; CHECK: [[BB10]]: 1732; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[B]], align 8 1733; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[B]] to i64 1734; CHECK-NEXT: [[TMP12:%.*]] = xor i64 [[TMP11]], 193514046488576 1735; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr 1736; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP13]], align 8 1737; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[TMP14]], 0 1738; CHECK-NEXT: br i1 [[_MSCMP4]], label %[[BB14:.*]], label %[[BB15:.*]], !prof [[PROF1]] 1739; CHECK: [[BB14]]: 1740; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 1741; CHECK-NEXT: unreachable 1742; CHECK: [[BB15]]: 1743; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr [[C]], align 16 1744; CHECK-NEXT: [[TMP16:%.*]] = ptrtoint ptr [[C]] to i64 1745; CHECK-NEXT: [[TMP17:%.*]] = xor i64 [[TMP16]], 193514046488576 1746; CHECK-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr 1747; CHECK-NEXT: [[_MSLD2:%.*]] = load <2 x i64>, ptr [[TMP18]], align 16 1748; CHECK-NEXT: [[TMP19:%.*]] = bitcast <2 x i32> [[_MSLD]] to i64 1749; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i64 [[TMP19]], 0 1750; CHECK-NEXT: [[TMP20:%.*]] = bitcast <2 x i32> [[_MSLD1]] to i64 1751; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i64 [[TMP20]], 0 1752; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP5]], [[_MSCMP6]] 1753; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB21:.*]], label %[[BB22:.*]], !prof [[PROF1]] 1754; CHECK: [[BB21]]: 1755; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 1756; CHECK-NEXT: unreachable 1757; CHECK: [[BB22]]: 1758; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]]) 1759; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[_MSLD2]], zeroinitializer 1760; CHECK-NEXT: [[TMP5:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[TMP3]], <2 x i64> [[TMP4]]) 1761; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 1762; CHECK-NEXT: ret <2 x i64> [[TMP5]] 1763; 1764 %temp1 = load <2 x i32>, ptr %A 1765 %temp2 = load <2 x i32>, ptr %B 1766 %temp3 = load <2 x i64>, ptr %C 1767 %temp4 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %temp1, <2 x i32> %temp2) 1768 %temp5 = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %temp3, <2 x i64> %temp4) 1769 ret <2 x i64> %temp5 1770} 1771 1772define <4 x i32> @sqdmlsl2_4s(ptr %A, ptr %B, ptr %C) nounwind sanitize_memory { 1773; CHECK-LABEL: define <4 x i32> @sqdmlsl2_4s( 1774; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]]) #[[ATTR0]] { 1775; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr @__msan_param_tls, align 8 1776; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 1777; CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 1778; CHECK-NEXT: call void @llvm.donothing() 1779; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP9]], 0 1780; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB4:.*]], label %[[BB5:.*]], !prof [[PROF1]] 1781; CHECK: [[BB4]]: 1782; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 1783; CHECK-NEXT: unreachable 1784; CHECK: [[BB5]]: 1785; CHECK-NEXT: [[LOAD1:%.*]] = load <8 x i16>, ptr [[A]], align 16 1786; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[A]] to i64 1787; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 193514046488576 1788; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr 1789; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP8]], align 16 1790; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i64 [[TMP10]], 0 1791; CHECK-NEXT: br i1 [[_MSCMP5]], label %[[BB9:.*]], label %[[BB10:.*]], !prof [[PROF1]] 1792; CHECK: [[BB9]]: 1793; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 1794; CHECK-NEXT: unreachable 1795; CHECK: [[BB10]]: 1796; CHECK-NEXT: [[LOAD2:%.*]] = load <8 x i16>, ptr [[B]], align 16 1797; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[B]] to i64 1798; CHECK-NEXT: [[TMP12:%.*]] = xor i64 [[TMP11]], 193514046488576 1799; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr 1800; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP13]], align 16 1801; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i64 [[TMP14]], 0 1802; CHECK-NEXT: br i1 [[_MSCMP6]], label %[[BB14:.*]], label %[[BB15:.*]], !prof [[PROF1]] 1803; CHECK: [[BB14]]: 1804; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 1805; CHECK-NEXT: unreachable 1806; CHECK: [[BB15]]: 1807; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[C]], align 16 1808; CHECK-NEXT: [[TMP16:%.*]] = ptrtoint ptr [[C]] to i64 1809; CHECK-NEXT: [[TMP17:%.*]] = xor i64 [[TMP16]], 193514046488576 1810; CHECK-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr 1811; CHECK-NEXT: [[_MSLD2:%.*]] = load <4 x i32>, ptr [[TMP18]], align 16 1812; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i16> [[_MSLD]], <8 x i16> splat (i16 -1), <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1813; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[LOAD1]], <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1814; CHECK-NEXT: [[_MSPROP3:%.*]] = shufflevector <8 x i16> [[_MSLD1]], <8 x i16> splat (i16 -1), <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1815; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[LOAD2]], <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1816; CHECK-NEXT: [[TMP19:%.*]] = bitcast <4 x i16> [[_MSPROP]] to i64 1817; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i64 [[TMP19]], 0 1818; CHECK-NEXT: [[TMP20:%.*]] = bitcast <4 x i16> [[_MSPROP3]] to i64 1819; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i64 [[TMP20]], 0 1820; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP7]], [[_MSCMP8]] 1821; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB21:.*]], label %[[BB22:.*]], !prof [[PROF1]] 1822; CHECK: [[BB21]]: 1823; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 1824; CHECK-NEXT: unreachable 1825; CHECK: [[BB22]]: 1826; CHECK-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP1]], <4 x i16> [[TMP2]]) 1827; CHECK-NEXT: [[_MSPROP4:%.*]] = or <4 x i32> [[_MSLD2]], zeroinitializer 1828; CHECK-NEXT: [[TMP5:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[TMP3]], <4 x i32> [[TMP4]]) 1829; CHECK-NEXT: store <4 x i32> [[_MSPROP4]], ptr @__msan_retval_tls, align 8 1830; CHECK-NEXT: ret <4 x i32> [[TMP5]] 1831; 1832 %load1 = load <8 x i16>, ptr %A 1833 %load2 = load <8 x i16>, ptr %B 1834 %temp3 = load <4 x i32>, ptr %C 1835 %temp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1836 %temp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 1837 %temp4 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %temp1, <4 x i16> %temp2) 1838 %temp5 = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %temp3, <4 x i32> %temp4) 1839 ret <4 x i32> %temp5 1840} 1841 1842define <2 x i64> @sqdmlsl2_2d(ptr %A, ptr %B, ptr %C) nounwind sanitize_memory { 1843; CHECK-LABEL: define <2 x i64> @sqdmlsl2_2d( 1844; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]]) #[[ATTR0]] { 1845; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr @__msan_param_tls, align 8 1846; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 1847; CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 1848; CHECK-NEXT: call void @llvm.donothing() 1849; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP9]], 0 1850; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB4:.*]], label %[[BB5:.*]], !prof [[PROF1]] 1851; CHECK: [[BB4]]: 1852; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 1853; CHECK-NEXT: unreachable 1854; CHECK: [[BB5]]: 1855; CHECK-NEXT: [[LOAD1:%.*]] = load <4 x i32>, ptr [[A]], align 16 1856; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[A]] to i64 1857; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 193514046488576 1858; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr 1859; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP8]], align 16 1860; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i64 [[TMP10]], 0 1861; CHECK-NEXT: br i1 [[_MSCMP5]], label %[[BB9:.*]], label %[[BB10:.*]], !prof [[PROF1]] 1862; CHECK: [[BB9]]: 1863; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 1864; CHECK-NEXT: unreachable 1865; CHECK: [[BB10]]: 1866; CHECK-NEXT: [[LOAD2:%.*]] = load <4 x i32>, ptr [[B]], align 16 1867; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[B]] to i64 1868; CHECK-NEXT: [[TMP12:%.*]] = xor i64 [[TMP11]], 193514046488576 1869; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr 1870; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP13]], align 16 1871; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i64 [[TMP14]], 0 1872; CHECK-NEXT: br i1 [[_MSCMP6]], label %[[BB14:.*]], label %[[BB15:.*]], !prof [[PROF1]] 1873; CHECK: [[BB14]]: 1874; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 1875; CHECK-NEXT: unreachable 1876; CHECK: [[BB15]]: 1877; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr [[C]], align 16 1878; CHECK-NEXT: [[TMP16:%.*]] = ptrtoint ptr [[C]] to i64 1879; CHECK-NEXT: [[TMP17:%.*]] = xor i64 [[TMP16]], 193514046488576 1880; CHECK-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr 1881; CHECK-NEXT: [[_MSLD2:%.*]] = load <2 x i64>, ptr [[TMP18]], align 16 1882; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> splat (i32 -1), <2 x i32> <i32 2, i32 3> 1883; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[LOAD1]], <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1884; CHECK-NEXT: [[_MSPROP3:%.*]] = shufflevector <4 x i32> [[_MSLD1]], <4 x i32> splat (i32 -1), <2 x i32> <i32 2, i32 3> 1885; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[LOAD2]], <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1886; CHECK-NEXT: [[TMP19:%.*]] = bitcast <2 x i32> [[_MSPROP]] to i64 1887; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i64 [[TMP19]], 0 1888; CHECK-NEXT: [[TMP20:%.*]] = bitcast <2 x i32> [[_MSPROP3]] to i64 1889; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i64 [[TMP20]], 0 1890; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP7]], [[_MSCMP8]] 1891; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB21:.*]], label %[[BB22:.*]], !prof [[PROF1]] 1892; CHECK: [[BB21]]: 1893; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 1894; CHECK-NEXT: unreachable 1895; CHECK: [[BB22]]: 1896; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]]) 1897; CHECK-NEXT: [[_MSPROP4:%.*]] = or <2 x i64> [[_MSLD2]], zeroinitializer 1898; CHECK-NEXT: [[TMP5:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[TMP3]], <2 x i64> [[TMP4]]) 1899; CHECK-NEXT: store <2 x i64> [[_MSPROP4]], ptr @__msan_retval_tls, align 8 1900; CHECK-NEXT: ret <2 x i64> [[TMP5]] 1901; 1902 %load1 = load <4 x i32>, ptr %A 1903 %load2 = load <4 x i32>, ptr %B 1904 %temp3 = load <2 x i64>, ptr %C 1905 %temp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1906 %temp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 1907 %temp4 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %temp1, <2 x i32> %temp2) 1908 %temp5 = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %temp3, <2 x i64> %temp4) 1909 ret <2 x i64> %temp5 1910} 1911 1912define <4 x i32> @umlal4s(ptr %A, ptr %B, ptr %C) nounwind sanitize_memory { 1913; CHECK-LABEL: define <4 x i32> @umlal4s( 1914; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]]) #[[ATTR0]] { 1915; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr @__msan_param_tls, align 8 1916; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 1917; CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 1918; CHECK-NEXT: call void @llvm.donothing() 1919; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP9]], 0 1920; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB4:.*]], label %[[BB5:.*]], !prof [[PROF1]] 1921; CHECK: [[BB4]]: 1922; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 1923; CHECK-NEXT: unreachable 1924; CHECK: [[BB5]]: 1925; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8 1926; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[A]] to i64 1927; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 193514046488576 1928; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr 1929; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP8]], align 8 1930; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i64 [[TMP10]], 0 1931; CHECK-NEXT: br i1 [[_MSCMP5]], label %[[BB9:.*]], label %[[BB10:.*]], !prof [[PROF1]] 1932; CHECK: [[BB9]]: 1933; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 1934; CHECK-NEXT: unreachable 1935; CHECK: [[BB10]]: 1936; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr [[B]], align 8 1937; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[B]] to i64 1938; CHECK-NEXT: [[TMP12:%.*]] = xor i64 [[TMP11]], 193514046488576 1939; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr 1940; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP13]], align 8 1941; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i64 [[TMP14]], 0 1942; CHECK-NEXT: br i1 [[_MSCMP6]], label %[[BB14:.*]], label %[[BB15:.*]], !prof [[PROF1]] 1943; CHECK: [[BB14]]: 1944; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 1945; CHECK-NEXT: unreachable 1946; CHECK: [[BB15]]: 1947; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[C]], align 16 1948; CHECK-NEXT: [[TMP16:%.*]] = ptrtoint ptr [[C]] to i64 1949; CHECK-NEXT: [[TMP17:%.*]] = xor i64 [[TMP16]], 193514046488576 1950; CHECK-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr 1951; CHECK-NEXT: [[_MSLD2:%.*]] = load <4 x i32>, ptr [[TMP18]], align 16 1952; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[_MSLD]], [[_MSLD1]] 1953; CHECK-NEXT: [[_MSPROP3:%.*]] = or <4 x i16> [[_MSPROP]], zeroinitializer 1954; CHECK-NEXT: [[TMP19:%.*]] = zext <4 x i16> [[_MSPROP3]] to <4 x i32> 1955; CHECK-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[TMP1]], <4 x i16> [[TMP2]]) 1956; CHECK-NEXT: [[_MSPROP4:%.*]] = or <4 x i32> [[_MSLD2]], [[TMP19]] 1957; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP3]], [[TMP4]] 1958; CHECK-NEXT: store <4 x i32> [[_MSPROP4]], ptr @__msan_retval_tls, align 8 1959; CHECK-NEXT: ret <4 x i32> [[TMP5]] 1960; 1961 %temp1 = load <4 x i16>, ptr %A 1962 %temp2 = load <4 x i16>, ptr %B 1963 %temp3 = load <4 x i32>, ptr %C 1964 %temp4 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %temp1, <4 x i16> %temp2) 1965 %temp5 = add <4 x i32> %temp3, %temp4 1966 ret <4 x i32> %temp5 1967} 1968 1969define <2 x i64> @umlal2d(ptr %A, ptr %B, ptr %C) nounwind sanitize_memory { 1970; CHECK-LABEL: define <2 x i64> @umlal2d( 1971; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]]) #[[ATTR0]] { 1972; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr @__msan_param_tls, align 8 1973; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 1974; CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 1975; CHECK-NEXT: call void @llvm.donothing() 1976; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP9]], 0 1977; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB4:.*]], label %[[BB5:.*]], !prof [[PROF1]] 1978; CHECK: [[BB4]]: 1979; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 1980; CHECK-NEXT: unreachable 1981; CHECK: [[BB5]]: 1982; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8 1983; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[A]] to i64 1984; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 193514046488576 1985; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr 1986; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP8]], align 8 1987; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i64 [[TMP10]], 0 1988; CHECK-NEXT: br i1 [[_MSCMP5]], label %[[BB9:.*]], label %[[BB10:.*]], !prof [[PROF1]] 1989; CHECK: [[BB9]]: 1990; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 1991; CHECK-NEXT: unreachable 1992; CHECK: [[BB10]]: 1993; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[B]], align 8 1994; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[B]] to i64 1995; CHECK-NEXT: [[TMP12:%.*]] = xor i64 [[TMP11]], 193514046488576 1996; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr 1997; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP13]], align 8 1998; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i64 [[TMP14]], 0 1999; CHECK-NEXT: br i1 [[_MSCMP6]], label %[[BB14:.*]], label %[[BB15:.*]], !prof [[PROF1]] 2000; CHECK: [[BB14]]: 2001; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 2002; CHECK-NEXT: unreachable 2003; CHECK: [[BB15]]: 2004; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr [[C]], align 16 2005; CHECK-NEXT: [[TMP16:%.*]] = ptrtoint ptr [[C]] to i64 2006; CHECK-NEXT: [[TMP17:%.*]] = xor i64 [[TMP16]], 193514046488576 2007; CHECK-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr 2008; CHECK-NEXT: [[_MSLD2:%.*]] = load <2 x i64>, ptr [[TMP18]], align 16 2009; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[_MSLD]], [[_MSLD1]] 2010; CHECK-NEXT: [[_MSPROP3:%.*]] = or <2 x i32> [[_MSPROP]], zeroinitializer 2011; CHECK-NEXT: [[TMP19:%.*]] = zext <2 x i32> [[_MSPROP3]] to <2 x i64> 2012; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]]) 2013; CHECK-NEXT: [[_MSPROP4:%.*]] = or <2 x i64> [[_MSLD2]], [[TMP19]] 2014; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP3]], [[TMP4]] 2015; CHECK-NEXT: store <2 x i64> [[_MSPROP4]], ptr @__msan_retval_tls, align 8 2016; CHECK-NEXT: ret <2 x i64> [[TMP5]] 2017; 2018 %temp1 = load <2 x i32>, ptr %A 2019 %temp2 = load <2 x i32>, ptr %B 2020 %temp3 = load <2 x i64>, ptr %C 2021 %temp4 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %temp1, <2 x i32> %temp2) 2022 %temp5 = add <2 x i64> %temp3, %temp4 2023 ret <2 x i64> %temp5 2024} 2025 2026define void @umlal8h_chain_with_constant(ptr %dst, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) { 2027; CHECK-LABEL: define void @umlal8h_chain_with_constant( 2028; CHECK-SAME: ptr [[DST:%.*]], <8 x i8> [[V1:%.*]], <8 x i8> [[V2:%.*]], <8 x i8> [[V3:%.*]]) { 2029; CHECK-NEXT: call void @llvm.donothing() 2030; CHECK-NEXT: [[XOR:%.*]] = xor <8 x i8> [[V3]], splat (i8 -1) 2031; CHECK-NEXT: [[UMULL_1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[V1]], <8 x i8> [[V3]]) 2032; CHECK-NEXT: [[ADD_1:%.*]] = add <8 x i16> [[UMULL_1]], splat (i16 257) 2033; CHECK-NEXT: [[UMULL_2:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[V2]], <8 x i8> [[XOR]]) 2034; CHECK-NEXT: [[ADD_2:%.*]] = add <8 x i16> [[ADD_1]], [[UMULL_2]] 2035; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[DST]] to i64 2036; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576 2037; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr 2038; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr [[TMP3]], align 16 2039; CHECK-NEXT: store <8 x i16> [[ADD_2]], ptr [[DST]], align 16 2040; CHECK-NEXT: ret void 2041; 2042 %xor = xor <8 x i8> %v3, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 2043 %umull.1 = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %v1, <8 x i8> %v3) 2044 %add.1 = add <8 x i16> %umull.1, <i16 257, i16 257, i16 257, i16 257, i16 257, i16 257, i16 257, i16 257> 2045 %umull.2 = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %v2, <8 x i8> %xor) 2046 %add.2 = add <8 x i16> %add.1, %umull.2 2047 store <8 x i16> %add.2, ptr %dst 2048 ret void 2049} 2050 2051define void @umlal2d_chain_with_constant(ptr %dst, <2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) { 2052; CHECK-LABEL: define void @umlal2d_chain_with_constant( 2053; CHECK-SAME: ptr [[DST:%.*]], <2 x i32> [[V1:%.*]], <2 x i32> [[V2:%.*]], <2 x i32> [[V3:%.*]]) { 2054; CHECK-NEXT: call void @llvm.donothing() 2055; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i32> [[V3]], splat (i32 -1) 2056; CHECK-NEXT: [[UMULL_1:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[V1]], <2 x i32> [[V3]]) 2057; CHECK-NEXT: [[ADD_1:%.*]] = add <2 x i64> [[UMULL_1]], splat (i64 257) 2058; CHECK-NEXT: [[UMULL_2:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[V2]], <2 x i32> [[XOR]]) 2059; CHECK-NEXT: [[ADD_2:%.*]] = add <2 x i64> [[ADD_1]], [[UMULL_2]] 2060; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[DST]] to i64 2061; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576 2062; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr 2063; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr [[TMP3]], align 16 2064; CHECK-NEXT: store <2 x i64> [[ADD_2]], ptr [[DST]], align 16 2065; CHECK-NEXT: ret void 2066; 2067 %xor = xor <2 x i32> %v3, <i32 -1, i32 -1> 2068 %umull.1 = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %v1, <2 x i32> %v3) 2069 %add.1 = add <2 x i64> %umull.1, <i64 257, i64 257> 2070 %umull.2 = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %v2, <2 x i32> %xor) 2071 %add.2 = add <2 x i64> %add.1, %umull.2 2072 store <2 x i64> %add.2, ptr %dst 2073 ret void 2074} 2075 2076define <4 x i32> @umlsl4s(ptr %A, ptr %B, ptr %C) nounwind sanitize_memory { 2077; CHECK-LABEL: define <4 x i32> @umlsl4s( 2078; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]]) #[[ATTR0]] { 2079; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr @__msan_param_tls, align 8 2080; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 2081; CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 2082; CHECK-NEXT: call void @llvm.donothing() 2083; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP9]], 0 2084; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB4:.*]], label %[[BB5:.*]], !prof [[PROF1]] 2085; CHECK: [[BB4]]: 2086; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 2087; CHECK-NEXT: unreachable 2088; CHECK: [[BB5]]: 2089; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8 2090; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[A]] to i64 2091; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 193514046488576 2092; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr 2093; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP8]], align 8 2094; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i64 [[TMP10]], 0 2095; CHECK-NEXT: br i1 [[_MSCMP5]], label %[[BB9:.*]], label %[[BB10:.*]], !prof [[PROF1]] 2096; CHECK: [[BB9]]: 2097; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 2098; CHECK-NEXT: unreachable 2099; CHECK: [[BB10]]: 2100; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr [[B]], align 8 2101; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[B]] to i64 2102; CHECK-NEXT: [[TMP12:%.*]] = xor i64 [[TMP11]], 193514046488576 2103; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr 2104; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP13]], align 8 2105; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i64 [[TMP14]], 0 2106; CHECK-NEXT: br i1 [[_MSCMP6]], label %[[BB14:.*]], label %[[BB15:.*]], !prof [[PROF1]] 2107; CHECK: [[BB14]]: 2108; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 2109; CHECK-NEXT: unreachable 2110; CHECK: [[BB15]]: 2111; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[C]], align 16 2112; CHECK-NEXT: [[TMP16:%.*]] = ptrtoint ptr [[C]] to i64 2113; CHECK-NEXT: [[TMP17:%.*]] = xor i64 [[TMP16]], 193514046488576 2114; CHECK-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr 2115; CHECK-NEXT: [[_MSLD2:%.*]] = load <4 x i32>, ptr [[TMP18]], align 16 2116; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[_MSLD]], [[_MSLD1]] 2117; CHECK-NEXT: [[_MSPROP3:%.*]] = or <4 x i16> [[_MSPROP]], zeroinitializer 2118; CHECK-NEXT: [[TMP19:%.*]] = zext <4 x i16> [[_MSPROP3]] to <4 x i32> 2119; CHECK-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[TMP1]], <4 x i16> [[TMP2]]) 2120; CHECK-NEXT: [[_MSPROP4:%.*]] = or <4 x i32> [[_MSLD2]], [[TMP19]] 2121; CHECK-NEXT: [[TMP5:%.*]] = sub <4 x i32> [[TMP3]], [[TMP4]] 2122; CHECK-NEXT: store <4 x i32> [[_MSPROP4]], ptr @__msan_retval_tls, align 8 2123; CHECK-NEXT: ret <4 x i32> [[TMP5]] 2124; 2125 %temp1 = load <4 x i16>, ptr %A 2126 %temp2 = load <4 x i16>, ptr %B 2127 %temp3 = load <4 x i32>, ptr %C 2128 %temp4 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %temp1, <4 x i16> %temp2) 2129 %temp5 = sub <4 x i32> %temp3, %temp4 2130 ret <4 x i32> %temp5 2131} 2132 2133define <2 x i64> @umlsl2d(ptr %A, ptr %B, ptr %C) nounwind sanitize_memory { 2134; CHECK-LABEL: define <2 x i64> @umlsl2d( 2135; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]]) #[[ATTR0]] { 2136; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr @__msan_param_tls, align 8 2137; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 2138; CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 2139; CHECK-NEXT: call void @llvm.donothing() 2140; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP9]], 0 2141; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB4:.*]], label %[[BB5:.*]], !prof [[PROF1]] 2142; CHECK: [[BB4]]: 2143; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 2144; CHECK-NEXT: unreachable 2145; CHECK: [[BB5]]: 2146; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8 2147; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[A]] to i64 2148; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 193514046488576 2149; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr 2150; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP8]], align 8 2151; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i64 [[TMP10]], 0 2152; CHECK-NEXT: br i1 [[_MSCMP5]], label %[[BB9:.*]], label %[[BB10:.*]], !prof [[PROF1]] 2153; CHECK: [[BB9]]: 2154; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 2155; CHECK-NEXT: unreachable 2156; CHECK: [[BB10]]: 2157; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[B]], align 8 2158; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[B]] to i64 2159; CHECK-NEXT: [[TMP12:%.*]] = xor i64 [[TMP11]], 193514046488576 2160; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr 2161; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP13]], align 8 2162; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i64 [[TMP14]], 0 2163; CHECK-NEXT: br i1 [[_MSCMP6]], label %[[BB14:.*]], label %[[BB15:.*]], !prof [[PROF1]] 2164; CHECK: [[BB14]]: 2165; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 2166; CHECK-NEXT: unreachable 2167; CHECK: [[BB15]]: 2168; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr [[C]], align 16 2169; CHECK-NEXT: [[TMP16:%.*]] = ptrtoint ptr [[C]] to i64 2170; CHECK-NEXT: [[TMP17:%.*]] = xor i64 [[TMP16]], 193514046488576 2171; CHECK-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr 2172; CHECK-NEXT: [[_MSLD2:%.*]] = load <2 x i64>, ptr [[TMP18]], align 16 2173; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[_MSLD]], [[_MSLD1]] 2174; CHECK-NEXT: [[_MSPROP3:%.*]] = or <2 x i32> [[_MSPROP]], zeroinitializer 2175; CHECK-NEXT: [[TMP19:%.*]] = zext <2 x i32> [[_MSPROP3]] to <2 x i64> 2176; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]]) 2177; CHECK-NEXT: [[_MSPROP4:%.*]] = or <2 x i64> [[_MSLD2]], [[TMP19]] 2178; CHECK-NEXT: [[TMP5:%.*]] = sub <2 x i64> [[TMP3]], [[TMP4]] 2179; CHECK-NEXT: store <2 x i64> [[_MSPROP4]], ptr @__msan_retval_tls, align 8 2180; CHECK-NEXT: ret <2 x i64> [[TMP5]] 2181; 2182 %temp1 = load <2 x i32>, ptr %A 2183 %temp2 = load <2 x i32>, ptr %B 2184 %temp3 = load <2 x i64>, ptr %C 2185 %temp4 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %temp1, <2 x i32> %temp2) 2186 %temp5 = sub <2 x i64> %temp3, %temp4 2187 ret <2 x i64> %temp5 2188} 2189 2190define void @umlsl8h_chain_with_constant(ptr %dst, <8 x i8> %v1, <8 x i8> %v2, <8 x i8> %v3) { 2191; CHECK-LABEL: define void @umlsl8h_chain_with_constant( 2192; CHECK-SAME: ptr [[DST:%.*]], <8 x i8> [[V1:%.*]], <8 x i8> [[V2:%.*]], <8 x i8> [[V3:%.*]]) { 2193; CHECK-NEXT: call void @llvm.donothing() 2194; CHECK-NEXT: [[XOR:%.*]] = xor <8 x i8> [[V3]], splat (i8 -1) 2195; CHECK-NEXT: [[UMULL_1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[V1]], <8 x i8> [[V3]]) 2196; CHECK-NEXT: [[ADD_1:%.*]] = sub <8 x i16> splat (i16 257), [[UMULL_1]] 2197; CHECK-NEXT: [[UMULL_2:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[V2]], <8 x i8> [[XOR]]) 2198; CHECK-NEXT: [[ADD_2:%.*]] = sub <8 x i16> [[ADD_1]], [[UMULL_2]] 2199; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[DST]] to i64 2200; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576 2201; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr 2202; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr [[TMP3]], align 16 2203; CHECK-NEXT: store <8 x i16> [[ADD_2]], ptr [[DST]], align 16 2204; CHECK-NEXT: ret void 2205; 2206 %xor = xor <8 x i8> %v3, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 2207 %umull.1 = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %v1, <8 x i8> %v3) 2208 %add.1 = sub <8 x i16> <i16 257, i16 257, i16 257, i16 257, i16 257, i16 257, i16 257, i16 257>, %umull.1 2209 %umull.2 = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %v2, <8 x i8> %xor) 2210 %add.2 = sub <8 x i16> %add.1, %umull.2 2211 store <8 x i16> %add.2, ptr %dst 2212 ret void 2213} 2214 2215define void @umlsl2d_chain_with_constant(ptr %dst, <2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) { 2216; CHECK-LABEL: define void @umlsl2d_chain_with_constant( 2217; CHECK-SAME: ptr [[DST:%.*]], <2 x i32> [[V1:%.*]], <2 x i32> [[V2:%.*]], <2 x i32> [[V3:%.*]]) { 2218; CHECK-NEXT: call void @llvm.donothing() 2219; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i32> [[V3]], splat (i32 -1) 2220; CHECK-NEXT: [[UMULL_1:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[V1]], <2 x i32> [[V3]]) 2221; CHECK-NEXT: [[ADD_1:%.*]] = sub <2 x i64> splat (i64 257), [[UMULL_1]] 2222; CHECK-NEXT: [[UMULL_2:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[V2]], <2 x i32> [[XOR]]) 2223; CHECK-NEXT: [[ADD_2:%.*]] = sub <2 x i64> [[ADD_1]], [[UMULL_2]] 2224; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[DST]] to i64 2225; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576 2226; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr 2227; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr [[TMP3]], align 16 2228; CHECK-NEXT: store <2 x i64> [[ADD_2]], ptr [[DST]], align 16 2229; CHECK-NEXT: ret void 2230; 2231 %xor = xor <2 x i32> %v3, <i32 -1, i32 -1> 2232 %umull.1 = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %v1, <2 x i32> %v3) 2233 %add.1 = sub <2 x i64> <i64 257, i64 257>, %umull.1 2234 %umull.2 = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %v2, <2 x i32> %xor) 2235 %add.2 = sub <2 x i64> %add.1, %umull.2 2236 store <2 x i64> %add.2, ptr %dst 2237 ret void 2238} 2239 2240define <2 x float> @fmla_2s(ptr %A, ptr %B, ptr %C) nounwind sanitize_memory { 2241; CHECK-LABEL: define <2 x float> @fmla_2s( 2242; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]]) #[[ATTR0]] { 2243; CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr @__msan_param_tls, align 8 2244; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 2245; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 2246; CHECK-NEXT: call void @llvm.donothing() 2247; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP5]], 0 2248; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB4:.*]], label %[[BB5:.*]], !prof [[PROF1]] 2249; CHECK: [[BB4]]: 2250; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 2251; CHECK-NEXT: unreachable 2252; CHECK: [[BB5]]: 2253; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[A]], align 8 2254; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[A]] to i64 2255; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 193514046488576 2256; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr 2257; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP8]], align 8 2258; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[TMP9]], 0 2259; CHECK-NEXT: br i1 [[_MSCMP4]], label %[[BB9:.*]], label %[[BB10:.*]], !prof [[PROF1]] 2260; CHECK: [[BB9]]: 2261; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 2262; CHECK-NEXT: unreachable 2263; CHECK: [[BB10]]: 2264; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr [[B]], align 8 2265; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[B]] to i64 2266; CHECK-NEXT: [[TMP12:%.*]] = xor i64 [[TMP11]], 193514046488576 2267; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr 2268; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP13]], align 8 2269; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i64 [[TMP10]], 0 2270; CHECK-NEXT: br i1 [[_MSCMP5]], label %[[BB14:.*]], label %[[BB15:.*]], !prof [[PROF1]] 2271; CHECK: [[BB14]]: 2272; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 2273; CHECK-NEXT: unreachable 2274; CHECK: [[BB15]]: 2275; CHECK-NEXT: [[TMP3:%.*]] = load <2 x float>, ptr [[C]], align 8 2276; CHECK-NEXT: [[TMP16:%.*]] = ptrtoint ptr [[C]] to i64 2277; CHECK-NEXT: [[TMP17:%.*]] = xor i64 [[TMP16]], 193514046488576 2278; CHECK-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr 2279; CHECK-NEXT: [[_MSLD2:%.*]] = load <2 x i32>, ptr [[TMP18]], align 8 2280; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[_MSLD]], [[_MSLD1]] 2281; CHECK-NEXT: [[_MSPROP3:%.*]] = or <2 x i32> [[_MSPROP]], [[_MSLD2]] 2282; CHECK-NEXT: [[TMP4:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[TMP1]], <2 x float> [[TMP2]], <2 x float> [[TMP3]]) 2283; CHECK-NEXT: store <2 x i32> [[_MSPROP3]], ptr @__msan_retval_tls, align 8 2284; CHECK-NEXT: ret <2 x float> [[TMP4]] 2285; 2286 %temp1 = load <2 x float>, ptr %A 2287 %temp2 = load <2 x float>, ptr %B 2288 %temp3 = load <2 x float>, ptr %C 2289 %temp4 = call <2 x float> @llvm.fma.v2f32(<2 x float> %temp1, <2 x float> %temp2, <2 x float> %temp3) 2290 ret <2 x float> %temp4 2291} 2292 2293define <4 x float> @fmla_4s(ptr %A, ptr %B, ptr %C) nounwind sanitize_memory { 2294; CHECK-LABEL: define <4 x float> @fmla_4s( 2295; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]]) #[[ATTR0]] { 2296; CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr @__msan_param_tls, align 8 2297; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 2298; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 2299; CHECK-NEXT: call void @llvm.donothing() 2300; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP5]], 0 2301; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB4:.*]], label %[[BB5:.*]], !prof [[PROF1]] 2302; CHECK: [[BB4]]: 2303; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 2304; CHECK-NEXT: unreachable 2305; CHECK: [[BB5]]: 2306; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[A]], align 16 2307; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[A]] to i64 2308; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 193514046488576 2309; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr 2310; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP8]], align 16 2311; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[TMP9]], 0 2312; CHECK-NEXT: br i1 [[_MSCMP4]], label %[[BB9:.*]], label %[[BB10:.*]], !prof [[PROF1]] 2313; CHECK: [[BB9]]: 2314; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 2315; CHECK-NEXT: unreachable 2316; CHECK: [[BB10]]: 2317; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[B]], align 16 2318; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[B]] to i64 2319; CHECK-NEXT: [[TMP12:%.*]] = xor i64 [[TMP11]], 193514046488576 2320; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr 2321; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP13]], align 16 2322; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i64 [[TMP10]], 0 2323; CHECK-NEXT: br i1 [[_MSCMP5]], label %[[BB14:.*]], label %[[BB15:.*]], !prof [[PROF1]] 2324; CHECK: [[BB14]]: 2325; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 2326; CHECK-NEXT: unreachable 2327; CHECK: [[BB15]]: 2328; CHECK-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr [[C]], align 16 2329; CHECK-NEXT: [[TMP16:%.*]] = ptrtoint ptr [[C]] to i64 2330; CHECK-NEXT: [[TMP17:%.*]] = xor i64 [[TMP16]], 193514046488576 2331; CHECK-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr 2332; CHECK-NEXT: [[_MSLD2:%.*]] = load <4 x i32>, ptr [[TMP18]], align 16 2333; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[_MSLD]], [[_MSLD1]] 2334; CHECK-NEXT: [[_MSPROP3:%.*]] = or <4 x i32> [[_MSPROP]], [[_MSLD2]] 2335; CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]]) 2336; CHECK-NEXT: store <4 x i32> [[_MSPROP3]], ptr @__msan_retval_tls, align 8 2337; CHECK-NEXT: ret <4 x float> [[TMP4]] 2338; 2339 %temp1 = load <4 x float>, ptr %A 2340 %temp2 = load <4 x float>, ptr %B 2341 %temp3 = load <4 x float>, ptr %C 2342 %temp4 = call <4 x float> @llvm.fma.v4f32(<4 x float> %temp1, <4 x float> %temp2, <4 x float> %temp3) 2343 ret <4 x float> %temp4 2344} 2345 2346define <2 x double> @fmla_2d(ptr %A, ptr %B, ptr %C) nounwind sanitize_memory { 2347; CHECK-LABEL: define <2 x double> @fmla_2d( 2348; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]]) #[[ATTR0]] { 2349; CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr @__msan_param_tls, align 8 2350; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 2351; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 2352; CHECK-NEXT: call void @llvm.donothing() 2353; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP5]], 0 2354; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB4:.*]], label %[[BB5:.*]], !prof [[PROF1]] 2355; CHECK: [[BB4]]: 2356; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 2357; CHECK-NEXT: unreachable 2358; CHECK: [[BB5]]: 2359; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[A]], align 16 2360; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[A]] to i64 2361; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 193514046488576 2362; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr 2363; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP8]], align 16 2364; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[TMP9]], 0 2365; CHECK-NEXT: br i1 [[_MSCMP4]], label %[[BB9:.*]], label %[[BB10:.*]], !prof [[PROF1]] 2366; CHECK: [[BB9]]: 2367; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 2368; CHECK-NEXT: unreachable 2369; CHECK: [[BB10]]: 2370; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[B]], align 16 2371; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[B]] to i64 2372; CHECK-NEXT: [[TMP12:%.*]] = xor i64 [[TMP11]], 193514046488576 2373; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr 2374; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP13]], align 16 2375; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i64 [[TMP10]], 0 2376; CHECK-NEXT: br i1 [[_MSCMP5]], label %[[BB14:.*]], label %[[BB15:.*]], !prof [[PROF1]] 2377; CHECK: [[BB14]]: 2378; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 2379; CHECK-NEXT: unreachable 2380; CHECK: [[BB15]]: 2381; CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, ptr [[C]], align 16 2382; CHECK-NEXT: [[TMP16:%.*]] = ptrtoint ptr [[C]] to i64 2383; CHECK-NEXT: [[TMP17:%.*]] = xor i64 [[TMP16]], 193514046488576 2384; CHECK-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr 2385; CHECK-NEXT: [[_MSLD2:%.*]] = load <2 x i64>, ptr [[TMP18]], align 16 2386; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[_MSLD]], [[_MSLD1]] 2387; CHECK-NEXT: [[_MSPROP3:%.*]] = or <2 x i64> [[_MSPROP]], [[_MSLD2]] 2388; CHECK-NEXT: [[TMP4:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[TMP1]], <2 x double> [[TMP2]], <2 x double> [[TMP3]]) 2389; CHECK-NEXT: store <2 x i64> [[_MSPROP3]], ptr @__msan_retval_tls, align 8 2390; CHECK-NEXT: ret <2 x double> [[TMP4]] 2391; 2392 %temp1 = load <2 x double>, ptr %A 2393 %temp2 = load <2 x double>, ptr %B 2394 %temp3 = load <2 x double>, ptr %C 2395 %temp4 = call <2 x double> @llvm.fma.v2f64(<2 x double> %temp1, <2 x double> %temp2, <2 x double> %temp3) 2396 ret <2 x double> %temp4 2397} 2398 2399declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) nounwind readnone 2400declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) nounwind readnone 2401declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 2402 2403define <2 x float> @fmls_2s(ptr %A, ptr %B, ptr %C) nounwind sanitize_memory { 2404; CHECK-LABEL: define <2 x float> @fmls_2s( 2405; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]]) #[[ATTR0]] { 2406; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr @__msan_param_tls, align 8 2407; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 2408; CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 2409; CHECK-NEXT: call void @llvm.donothing() 2410; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP9]], 0 2411; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB4:.*]], label %[[BB5:.*]], !prof [[PROF1]] 2412; CHECK: [[BB4]]: 2413; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 2414; CHECK-NEXT: unreachable 2415; CHECK: [[BB5]]: 2416; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[A]], align 8 2417; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[A]] to i64 2418; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 193514046488576 2419; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr 2420; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP8]], align 8 2421; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i64 [[TMP10]], 0 2422; CHECK-NEXT: br i1 [[_MSCMP5]], label %[[BB9:.*]], label %[[BB10:.*]], !prof [[PROF1]] 2423; CHECK: [[BB9]]: 2424; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 2425; CHECK-NEXT: unreachable 2426; CHECK: [[BB10]]: 2427; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr [[B]], align 8 2428; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[B]] to i64 2429; CHECK-NEXT: [[TMP12:%.*]] = xor i64 [[TMP11]], 193514046488576 2430; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr 2431; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP13]], align 8 2432; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i64 [[TMP14]], 0 2433; CHECK-NEXT: br i1 [[_MSCMP6]], label %[[BB14:.*]], label %[[BB15:.*]], !prof [[PROF1]] 2434; CHECK: [[BB14]]: 2435; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 2436; CHECK-NEXT: unreachable 2437; CHECK: [[BB15]]: 2438; CHECK-NEXT: [[TMP3:%.*]] = load <2 x float>, ptr [[C]], align 8 2439; CHECK-NEXT: [[TMP16:%.*]] = ptrtoint ptr [[C]] to i64 2440; CHECK-NEXT: [[TMP17:%.*]] = xor i64 [[TMP16]], 193514046488576 2441; CHECK-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr 2442; CHECK-NEXT: [[_MSLD2:%.*]] = load <2 x i32>, ptr [[TMP18]], align 8 2443; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> zeroinitializer, [[_MSLD1]] 2444; CHECK-NEXT: [[TMP4:%.*]] = fsub <2 x float> splat (float -0.000000e+00), [[TMP2]] 2445; CHECK-NEXT: [[_MSPROP3:%.*]] = or <2 x i32> [[_MSLD]], [[_MSPROP]] 2446; CHECK-NEXT: [[_MSPROP4:%.*]] = or <2 x i32> [[_MSPROP3]], [[_MSLD2]] 2447; CHECK-NEXT: [[TMP5:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[TMP1]], <2 x float> [[TMP4]], <2 x float> [[TMP3]]) 2448; CHECK-NEXT: store <2 x i32> [[_MSPROP4]], ptr @__msan_retval_tls, align 8 2449; CHECK-NEXT: ret <2 x float> [[TMP5]] 2450; 2451 %temp1 = load <2 x float>, ptr %A 2452 %temp2 = load <2 x float>, ptr %B 2453 %temp3 = load <2 x float>, ptr %C 2454 %temp4 = fsub <2 x float> <float -0.0, float -0.0>, %temp2 2455 %temp5 = call <2 x float> @llvm.fma.v2f32(<2 x float> %temp1, <2 x float> %temp4, <2 x float> %temp3) 2456 ret <2 x float> %temp5 2457} 2458 2459define <4 x float> @fmls_4s(ptr %A, ptr %B, ptr %C) nounwind sanitize_memory { 2460; CHECK-LABEL: define <4 x float> @fmls_4s( 2461; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]]) #[[ATTR0]] { 2462; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr @__msan_param_tls, align 8 2463; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 2464; CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 2465; CHECK-NEXT: call void @llvm.donothing() 2466; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP9]], 0 2467; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB4:.*]], label %[[BB5:.*]], !prof [[PROF1]] 2468; CHECK: [[BB4]]: 2469; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 2470; CHECK-NEXT: unreachable 2471; CHECK: [[BB5]]: 2472; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[A]], align 16 2473; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[A]] to i64 2474; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 193514046488576 2475; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr 2476; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP8]], align 16 2477; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i64 [[TMP10]], 0 2478; CHECK-NEXT: br i1 [[_MSCMP5]], label %[[BB9:.*]], label %[[BB10:.*]], !prof [[PROF1]] 2479; CHECK: [[BB9]]: 2480; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 2481; CHECK-NEXT: unreachable 2482; CHECK: [[BB10]]: 2483; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[B]], align 16 2484; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[B]] to i64 2485; CHECK-NEXT: [[TMP12:%.*]] = xor i64 [[TMP11]], 193514046488576 2486; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr 2487; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP13]], align 16 2488; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i64 [[TMP14]], 0 2489; CHECK-NEXT: br i1 [[_MSCMP6]], label %[[BB14:.*]], label %[[BB15:.*]], !prof [[PROF1]] 2490; CHECK: [[BB14]]: 2491; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 2492; CHECK-NEXT: unreachable 2493; CHECK: [[BB15]]: 2494; CHECK-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr [[C]], align 16 2495; CHECK-NEXT: [[TMP16:%.*]] = ptrtoint ptr [[C]] to i64 2496; CHECK-NEXT: [[TMP17:%.*]] = xor i64 [[TMP16]], 193514046488576 2497; CHECK-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr 2498; CHECK-NEXT: [[_MSLD2:%.*]] = load <4 x i32>, ptr [[TMP18]], align 16 2499; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> zeroinitializer, [[_MSLD1]] 2500; CHECK-NEXT: [[TMP4:%.*]] = fsub <4 x float> splat (float -0.000000e+00), [[TMP2]] 2501; CHECK-NEXT: [[_MSPROP3:%.*]] = or <4 x i32> [[_MSLD]], [[_MSPROP]] 2502; CHECK-NEXT: [[_MSPROP4:%.*]] = or <4 x i32> [[_MSPROP3]], [[_MSLD2]] 2503; CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[TMP1]], <4 x float> [[TMP4]], <4 x float> [[TMP3]]) 2504; CHECK-NEXT: store <4 x i32> [[_MSPROP4]], ptr @__msan_retval_tls, align 8 2505; CHECK-NEXT: ret <4 x float> [[TMP5]] 2506; 2507 %temp1 = load <4 x float>, ptr %A 2508 %temp2 = load <4 x float>, ptr %B 2509 %temp3 = load <4 x float>, ptr %C 2510 %temp4 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %temp2 2511 %temp5 = call <4 x float> @llvm.fma.v4f32(<4 x float> %temp1, <4 x float> %temp4, <4 x float> %temp3) 2512 ret <4 x float> %temp5 2513} 2514 2515define <2 x double> @fmls_2d(ptr %A, ptr %B, ptr %C) nounwind sanitize_memory { 2516; CHECK-LABEL: define <2 x double> @fmls_2d( 2517; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]]) #[[ATTR0]] { 2518; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr @__msan_param_tls, align 8 2519; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 2520; CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 2521; CHECK-NEXT: call void @llvm.donothing() 2522; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP9]], 0 2523; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB4:.*]], label %[[BB5:.*]], !prof [[PROF1]] 2524; CHECK: [[BB4]]: 2525; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 2526; CHECK-NEXT: unreachable 2527; CHECK: [[BB5]]: 2528; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[A]], align 16 2529; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[A]] to i64 2530; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 193514046488576 2531; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr 2532; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP8]], align 16 2533; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i64 [[TMP10]], 0 2534; CHECK-NEXT: br i1 [[_MSCMP5]], label %[[BB9:.*]], label %[[BB10:.*]], !prof [[PROF1]] 2535; CHECK: [[BB9]]: 2536; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 2537; CHECK-NEXT: unreachable 2538; CHECK: [[BB10]]: 2539; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[B]], align 16 2540; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[B]] to i64 2541; CHECK-NEXT: [[TMP12:%.*]] = xor i64 [[TMP11]], 193514046488576 2542; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr 2543; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP13]], align 16 2544; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i64 [[TMP14]], 0 2545; CHECK-NEXT: br i1 [[_MSCMP6]], label %[[BB14:.*]], label %[[BB15:.*]], !prof [[PROF1]] 2546; CHECK: [[BB14]]: 2547; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 2548; CHECK-NEXT: unreachable 2549; CHECK: [[BB15]]: 2550; CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, ptr [[C]], align 16 2551; CHECK-NEXT: [[TMP16:%.*]] = ptrtoint ptr [[C]] to i64 2552; CHECK-NEXT: [[TMP17:%.*]] = xor i64 [[TMP16]], 193514046488576 2553; CHECK-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr 2554; CHECK-NEXT: [[_MSLD2:%.*]] = load <2 x i64>, ptr [[TMP18]], align 16 2555; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> zeroinitializer, [[_MSLD1]] 2556; CHECK-NEXT: [[TMP4:%.*]] = fsub <2 x double> splat (double -0.000000e+00), [[TMP2]] 2557; CHECK-NEXT: [[_MSPROP3:%.*]] = or <2 x i64> [[_MSLD]], [[_MSPROP]] 2558; CHECK-NEXT: [[_MSPROP4:%.*]] = or <2 x i64> [[_MSPROP3]], [[_MSLD2]] 2559; CHECK-NEXT: [[TMP5:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[TMP1]], <2 x double> [[TMP4]], <2 x double> [[TMP3]]) 2560; CHECK-NEXT: store <2 x i64> [[_MSPROP4]], ptr @__msan_retval_tls, align 8 2561; CHECK-NEXT: ret <2 x double> [[TMP5]] 2562; 2563 %temp1 = load <2 x double>, ptr %A 2564 %temp2 = load <2 x double>, ptr %B 2565 %temp3 = load <2 x double>, ptr %C 2566 %temp4 = fsub <2 x double> <double -0.0, double -0.0>, %temp2 2567 %temp5 = call <2 x double> @llvm.fma.v2f64(<2 x double> %temp1, <2 x double> %temp4, <2 x double> %temp3) 2568 ret <2 x double> %temp5 2569} 2570 2571define <2 x float> @fmls_commuted_neg_2s(ptr %A, ptr %B, ptr %C) nounwind sanitize_memory { 2572; CHECK-LABEL: define <2 x float> @fmls_commuted_neg_2s( 2573; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]]) #[[ATTR0]] { 2574; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr @__msan_param_tls, align 8 2575; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 2576; CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 2577; CHECK-NEXT: call void @llvm.donothing() 2578; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP9]], 0 2579; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB4:.*]], label %[[BB5:.*]], !prof [[PROF1]] 2580; CHECK: [[BB4]]: 2581; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 2582; CHECK-NEXT: unreachable 2583; CHECK: [[BB5]]: 2584; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[A]], align 8 2585; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[A]] to i64 2586; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 193514046488576 2587; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr 2588; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP8]], align 8 2589; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i64 [[TMP10]], 0 2590; CHECK-NEXT: br i1 [[_MSCMP5]], label %[[BB9:.*]], label %[[BB10:.*]], !prof [[PROF1]] 2591; CHECK: [[BB9]]: 2592; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 2593; CHECK-NEXT: unreachable 2594; CHECK: [[BB10]]: 2595; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr [[B]], align 8 2596; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[B]] to i64 2597; CHECK-NEXT: [[TMP12:%.*]] = xor i64 [[TMP11]], 193514046488576 2598; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr 2599; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP13]], align 8 2600; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i64 [[TMP14]], 0 2601; CHECK-NEXT: br i1 [[_MSCMP6]], label %[[BB14:.*]], label %[[BB15:.*]], !prof [[PROF1]] 2602; CHECK: [[BB14]]: 2603; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 2604; CHECK-NEXT: unreachable 2605; CHECK: [[BB15]]: 2606; CHECK-NEXT: [[TMP3:%.*]] = load <2 x float>, ptr [[C]], align 8 2607; CHECK-NEXT: [[TMP16:%.*]] = ptrtoint ptr [[C]] to i64 2608; CHECK-NEXT: [[TMP17:%.*]] = xor i64 [[TMP16]], 193514046488576 2609; CHECK-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr 2610; CHECK-NEXT: [[_MSLD2:%.*]] = load <2 x i32>, ptr [[TMP18]], align 8 2611; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> zeroinitializer, [[_MSLD1]] 2612; CHECK-NEXT: [[TMP4:%.*]] = fsub <2 x float> splat (float -0.000000e+00), [[TMP2]] 2613; CHECK-NEXT: [[_MSPROP3:%.*]] = or <2 x i32> [[_MSPROP]], [[_MSLD]] 2614; CHECK-NEXT: [[_MSPROP4:%.*]] = or <2 x i32> [[_MSPROP3]], [[_MSLD2]] 2615; CHECK-NEXT: [[TMP5:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[TMP4]], <2 x float> [[TMP1]], <2 x float> [[TMP3]]) 2616; CHECK-NEXT: store <2 x i32> [[_MSPROP4]], ptr @__msan_retval_tls, align 8 2617; CHECK-NEXT: ret <2 x float> [[TMP5]] 2618; 2619 %temp1 = load <2 x float>, ptr %A 2620 %temp2 = load <2 x float>, ptr %B 2621 %temp3 = load <2 x float>, ptr %C 2622 %temp4 = fsub <2 x float> <float -0.0, float -0.0>, %temp2 2623 %temp5 = call <2 x float> @llvm.fma.v2f32(<2 x float> %temp4, <2 x float> %temp1, <2 x float> %temp3) 2624 ret <2 x float> %temp5 2625} 2626 2627define <4 x float> @fmls_commuted_neg_4s(ptr %A, ptr %B, ptr %C) nounwind sanitize_memory { 2628; CHECK-LABEL: define <4 x float> @fmls_commuted_neg_4s( 2629; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]]) #[[ATTR0]] { 2630; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr @__msan_param_tls, align 8 2631; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 2632; CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 2633; CHECK-NEXT: call void @llvm.donothing() 2634; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP9]], 0 2635; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB4:.*]], label %[[BB5:.*]], !prof [[PROF1]] 2636; CHECK: [[BB4]]: 2637; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 2638; CHECK-NEXT: unreachable 2639; CHECK: [[BB5]]: 2640; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[A]], align 16 2641; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[A]] to i64 2642; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 193514046488576 2643; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr 2644; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP8]], align 16 2645; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i64 [[TMP10]], 0 2646; CHECK-NEXT: br i1 [[_MSCMP5]], label %[[BB9:.*]], label %[[BB10:.*]], !prof [[PROF1]] 2647; CHECK: [[BB9]]: 2648; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 2649; CHECK-NEXT: unreachable 2650; CHECK: [[BB10]]: 2651; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[B]], align 16 2652; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[B]] to i64 2653; CHECK-NEXT: [[TMP12:%.*]] = xor i64 [[TMP11]], 193514046488576 2654; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr 2655; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP13]], align 16 2656; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i64 [[TMP14]], 0 2657; CHECK-NEXT: br i1 [[_MSCMP6]], label %[[BB14:.*]], label %[[BB15:.*]], !prof [[PROF1]] 2658; CHECK: [[BB14]]: 2659; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 2660; CHECK-NEXT: unreachable 2661; CHECK: [[BB15]]: 2662; CHECK-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr [[C]], align 16 2663; CHECK-NEXT: [[TMP16:%.*]] = ptrtoint ptr [[C]] to i64 2664; CHECK-NEXT: [[TMP17:%.*]] = xor i64 [[TMP16]], 193514046488576 2665; CHECK-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr 2666; CHECK-NEXT: [[_MSLD2:%.*]] = load <4 x i32>, ptr [[TMP18]], align 16 2667; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> zeroinitializer, [[_MSLD1]] 2668; CHECK-NEXT: [[TMP4:%.*]] = fsub <4 x float> splat (float -0.000000e+00), [[TMP2]] 2669; CHECK-NEXT: [[_MSPROP3:%.*]] = or <4 x i32> [[_MSPROP]], [[_MSLD]] 2670; CHECK-NEXT: [[_MSPROP4:%.*]] = or <4 x i32> [[_MSPROP3]], [[_MSLD2]] 2671; CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[TMP4]], <4 x float> [[TMP1]], <4 x float> [[TMP3]]) 2672; CHECK-NEXT: store <4 x i32> [[_MSPROP4]], ptr @__msan_retval_tls, align 8 2673; CHECK-NEXT: ret <4 x float> [[TMP5]] 2674; 2675 %temp1 = load <4 x float>, ptr %A 2676 %temp2 = load <4 x float>, ptr %B 2677 %temp3 = load <4 x float>, ptr %C 2678 %temp4 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %temp2 2679 %temp5 = call <4 x float> @llvm.fma.v4f32(<4 x float> %temp4, <4 x float> %temp1, <4 x float> %temp3) 2680 ret <4 x float> %temp5 2681} 2682 2683define <2 x double> @fmls_commuted_neg_2d(ptr %A, ptr %B, ptr %C) nounwind sanitize_memory { 2684; CHECK-LABEL: define <2 x double> @fmls_commuted_neg_2d( 2685; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]]) #[[ATTR0]] { 2686; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr @__msan_param_tls, align 8 2687; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 2688; CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 2689; CHECK-NEXT: call void @llvm.donothing() 2690; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP9]], 0 2691; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB4:.*]], label %[[BB5:.*]], !prof [[PROF1]] 2692; CHECK: [[BB4]]: 2693; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 2694; CHECK-NEXT: unreachable 2695; CHECK: [[BB5]]: 2696; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[A]], align 16 2697; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[A]] to i64 2698; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 193514046488576 2699; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr 2700; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP8]], align 16 2701; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i64 [[TMP10]], 0 2702; CHECK-NEXT: br i1 [[_MSCMP5]], label %[[BB9:.*]], label %[[BB10:.*]], !prof [[PROF1]] 2703; CHECK: [[BB9]]: 2704; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 2705; CHECK-NEXT: unreachable 2706; CHECK: [[BB10]]: 2707; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[B]], align 16 2708; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[B]] to i64 2709; CHECK-NEXT: [[TMP12:%.*]] = xor i64 [[TMP11]], 193514046488576 2710; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr 2711; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP13]], align 16 2712; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i64 [[TMP14]], 0 2713; CHECK-NEXT: br i1 [[_MSCMP6]], label %[[BB14:.*]], label %[[BB15:.*]], !prof [[PROF1]] 2714; CHECK: [[BB14]]: 2715; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 2716; CHECK-NEXT: unreachable 2717; CHECK: [[BB15]]: 2718; CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, ptr [[C]], align 16 2719; CHECK-NEXT: [[TMP16:%.*]] = ptrtoint ptr [[C]] to i64 2720; CHECK-NEXT: [[TMP17:%.*]] = xor i64 [[TMP16]], 193514046488576 2721; CHECK-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr 2722; CHECK-NEXT: [[_MSLD2:%.*]] = load <2 x i64>, ptr [[TMP18]], align 16 2723; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> zeroinitializer, [[_MSLD1]] 2724; CHECK-NEXT: [[TMP4:%.*]] = fsub <2 x double> splat (double -0.000000e+00), [[TMP2]] 2725; CHECK-NEXT: [[_MSPROP3:%.*]] = or <2 x i64> [[_MSPROP]], [[_MSLD]] 2726; CHECK-NEXT: [[_MSPROP4:%.*]] = or <2 x i64> [[_MSPROP3]], [[_MSLD2]] 2727; CHECK-NEXT: [[TMP5:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[TMP4]], <2 x double> [[TMP1]], <2 x double> [[TMP3]]) 2728; CHECK-NEXT: store <2 x i64> [[_MSPROP4]], ptr @__msan_retval_tls, align 8 2729; CHECK-NEXT: ret <2 x double> [[TMP5]] 2730; 2731 %temp1 = load <2 x double>, ptr %A 2732 %temp2 = load <2 x double>, ptr %B 2733 %temp3 = load <2 x double>, ptr %C 2734 %temp4 = fsub <2 x double> <double -0.0, double -0.0>, %temp2 2735 %temp5 = call <2 x double> @llvm.fma.v2f64(<2 x double> %temp4, <2 x double> %temp1, <2 x double> %temp3) 2736 ret <2 x double> %temp5 2737} 2738 2739define <2 x float> @fmls_indexed_2s(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind readnone ssp { 2740; CHECK-LABEL: define <2 x float> @fmls_indexed_2s( 2741; CHECK-SAME: <2 x float> [[A:%.*]], <2 x float> [[B:%.*]], <2 x float> [[C:%.*]]) #[[ATTR3:[0-9]+]] { 2742; CHECK-NEXT: [[ENTRY:.*:]] 2743; CHECK-NEXT: call void @llvm.donothing() 2744; CHECK-NEXT: [[TMP0:%.*]] = fsub <2 x float> splat (float -0.000000e+00), [[C]] 2745; CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[B]], <2 x float> undef, <2 x i32> zeroinitializer 2746; CHECK-NEXT: [[FMLS1:%.*]] = tail call <2 x float> @llvm.fma.v2f32(<2 x float> [[TMP0]], <2 x float> [[LANE]], <2 x float> [[A]]) 2747; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 2748; CHECK-NEXT: ret <2 x float> [[FMLS1]] 2749; 2750entry: 2751 %0 = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %c 2752 %lane = shufflevector <2 x float> %b, <2 x float> undef, <2 x i32> zeroinitializer 2753 %fmls1 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %0, <2 x float> %lane, <2 x float> %a) 2754 ret <2 x float> %fmls1 2755} 2756 2757define <4 x float> @fmls_indexed_4s(<4 x float> %a, <4 x float> %b, <4 x float> %c) nounwind readnone ssp { 2758; CHECK-LABEL: define <4 x float> @fmls_indexed_4s( 2759; CHECK-SAME: <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]]) #[[ATTR3]] { 2760; CHECK-NEXT: [[ENTRY:.*:]] 2761; CHECK-NEXT: call void @llvm.donothing() 2762; CHECK-NEXT: [[TMP0:%.*]] = fsub <4 x float> splat (float -0.000000e+00), [[C]] 2763; CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[B]], <4 x float> undef, <4 x i32> zeroinitializer 2764; CHECK-NEXT: [[FMLS1:%.*]] = tail call <4 x float> @llvm.fma.v4f32(<4 x float> [[TMP0]], <4 x float> [[LANE]], <4 x float> [[A]]) 2765; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 2766; CHECK-NEXT: ret <4 x float> [[FMLS1]] 2767; 2768entry: 2769 %0 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c 2770 %lane = shufflevector <4 x float> %b, <4 x float> undef, <4 x i32> zeroinitializer 2771 %fmls1 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %0, <4 x float> %lane, <4 x float> %a) 2772 ret <4 x float> %fmls1 2773} 2774 2775define <2 x double> @fmls_indexed_2d(<2 x double> %a, <2 x double> %b, <2 x double> %c) nounwind readnone ssp { 2776; CHECK-LABEL: define <2 x double> @fmls_indexed_2d( 2777; CHECK-SAME: <2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]]) #[[ATTR3]] { 2778; CHECK-NEXT: [[ENTRY:.*:]] 2779; CHECK-NEXT: call void @llvm.donothing() 2780; CHECK-NEXT: [[TMP0:%.*]] = fsub <2 x double> splat (double -0.000000e+00), [[C]] 2781; CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x double> [[B]], <2 x double> undef, <2 x i32> zeroinitializer 2782; CHECK-NEXT: [[FMLS1:%.*]] = tail call <2 x double> @llvm.fma.v2f64(<2 x double> [[TMP0]], <2 x double> [[LANE]], <2 x double> [[A]]) 2783; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 2784; CHECK-NEXT: ret <2 x double> [[FMLS1]] 2785; 2786entry: 2787 %0 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %c 2788 %lane = shufflevector <2 x double> %b, <2 x double> undef, <2 x i32> zeroinitializer 2789 %fmls1 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %0, <2 x double> %lane, <2 x double> %a) 2790 ret <2 x double> %fmls1 2791} 2792 2793define <2 x float> @fmla_indexed_scalar_2s(<2 x float> %a, <2 x float> %b, float %c) nounwind readnone ssp { 2794; CHECK-LABEL: define <2 x float> @fmla_indexed_scalar_2s( 2795; CHECK-SAME: <2 x float> [[A:%.*]], <2 x float> [[B:%.*]], float [[C:%.*]]) #[[ATTR3]] { 2796; CHECK-NEXT: [[ENTRY:.*:]] 2797; CHECK-NEXT: call void @llvm.donothing() 2798; CHECK-NEXT: [[V1:%.*]] = insertelement <2 x float> undef, float [[C]], i32 0 2799; CHECK-NEXT: [[V2:%.*]] = insertelement <2 x float> [[V1]], float [[C]], i32 1 2800; CHECK-NEXT: [[FMLA1:%.*]] = tail call <2 x float> @llvm.fma.v2f32(<2 x float> [[V1]], <2 x float> [[B]], <2 x float> [[A]]) #[[ATTR7:[0-9]+]] 2801; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 2802; CHECK-NEXT: ret <2 x float> [[FMLA1]] 2803; 2804entry: 2805 %v1 = insertelement <2 x float> undef, float %c, i32 0 2806 %v2 = insertelement <2 x float> %v1, float %c, i32 1 2807 %fmla1 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %v1, <2 x float> %b, <2 x float> %a) nounwind 2808 ret <2 x float> %fmla1 2809} 2810 2811define <4 x float> @fmla_indexed_scalar_4s(<4 x float> %a, <4 x float> %b, float %c) nounwind readnone ssp { 2812; CHECK-LABEL: define <4 x float> @fmla_indexed_scalar_4s( 2813; CHECK-SAME: <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], float [[C:%.*]]) #[[ATTR3]] { 2814; CHECK-NEXT: [[ENTRY:.*:]] 2815; CHECK-NEXT: call void @llvm.donothing() 2816; CHECK-NEXT: [[V1:%.*]] = insertelement <4 x float> undef, float [[C]], i32 0 2817; CHECK-NEXT: [[V2:%.*]] = insertelement <4 x float> [[V1]], float [[C]], i32 1 2818; CHECK-NEXT: [[V3:%.*]] = insertelement <4 x float> [[V2]], float [[C]], i32 2 2819; CHECK-NEXT: [[V4:%.*]] = insertelement <4 x float> [[V3]], float [[C]], i32 3 2820; CHECK-NEXT: [[FMLA1:%.*]] = tail call <4 x float> @llvm.fma.v4f32(<4 x float> [[V4]], <4 x float> [[B]], <4 x float> [[A]]) #[[ATTR7]] 2821; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 2822; CHECK-NEXT: ret <4 x float> [[FMLA1]] 2823; 2824entry: 2825 %v1 = insertelement <4 x float> undef, float %c, i32 0 2826 %v2 = insertelement <4 x float> %v1, float %c, i32 1 2827 %v3 = insertelement <4 x float> %v2, float %c, i32 2 2828 %v4 = insertelement <4 x float> %v3, float %c, i32 3 2829 %fmla1 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %v4, <4 x float> %b, <4 x float> %a) nounwind 2830 ret <4 x float> %fmla1 2831} 2832 2833define <2 x double> @fmla_indexed_scalar_2d(<2 x double> %a, <2 x double> %b, double %c) nounwind readnone ssp { 2834; CHECK-LABEL: define <2 x double> @fmla_indexed_scalar_2d( 2835; CHECK-SAME: <2 x double> [[A:%.*]], <2 x double> [[B:%.*]], double [[C:%.*]]) #[[ATTR3]] { 2836; CHECK-NEXT: [[ENTRY:.*:]] 2837; CHECK-NEXT: call void @llvm.donothing() 2838; CHECK-NEXT: [[V1:%.*]] = insertelement <2 x double> undef, double [[C]], i32 0 2839; CHECK-NEXT: [[V2:%.*]] = insertelement <2 x double> [[V1]], double [[C]], i32 1 2840; CHECK-NEXT: [[FMLA1:%.*]] = tail call <2 x double> @llvm.fma.v2f64(<2 x double> [[V2]], <2 x double> [[B]], <2 x double> [[A]]) #[[ATTR7]] 2841; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 2842; CHECK-NEXT: ret <2 x double> [[FMLA1]] 2843; 2844entry: 2845 %v1 = insertelement <2 x double> undef, double %c, i32 0 2846 %v2 = insertelement <2 x double> %v1, double %c, i32 1 2847 %fmla1 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %v2, <2 x double> %b, <2 x double> %a) nounwind 2848 ret <2 x double> %fmla1 2849} 2850 2851define <2 x float> @fmls_indexed_2s_strict(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind readnone ssp strictfp { 2852; CHECK-LABEL: define <2 x float> @fmls_indexed_2s_strict( 2853; CHECK-SAME: <2 x float> [[A:%.*]], <2 x float> [[B:%.*]], <2 x float> [[C:%.*]]) #[[ATTR4:[0-9]+]] { 2854; CHECK-NEXT: [[ENTRY:.*:]] 2855; CHECK-NEXT: call void @llvm.donothing() 2856; CHECK-NEXT: [[TMP0:%.*]] = fneg <2 x float> [[C]] 2857; CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x float> [[B]], <2 x float> undef, <2 x i32> zeroinitializer 2858; CHECK-NEXT: [[FMLS1:%.*]] = tail call <2 x float> @llvm.experimental.constrained.fma.v2f32(<2 x float> [[TMP0]], <2 x float> [[LANE]], <2 x float> [[A]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR9:[0-9]+]] 2859; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 2860; CHECK-NEXT: ret <2 x float> [[FMLS1]] 2861; 2862entry: 2863 %0 = fneg <2 x float> %c 2864 %lane = shufflevector <2 x float> %b, <2 x float> undef, <2 x i32> zeroinitializer 2865 %fmls1 = tail call <2 x float> @llvm.experimental.constrained.fma.v2f32(<2 x float> %0, <2 x float> %lane, <2 x float> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 2866 ret <2 x float> %fmls1 2867} 2868 2869define <4 x float> @fmls_indexed_4s_strict(<4 x float> %a, <4 x float> %b, <4 x float> %c) nounwind readnone ssp strictfp { 2870; CHECK-LABEL: define <4 x float> @fmls_indexed_4s_strict( 2871; CHECK-SAME: <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]]) #[[ATTR4]] { 2872; CHECK-NEXT: [[ENTRY:.*:]] 2873; CHECK-NEXT: call void @llvm.donothing() 2874; CHECK-NEXT: [[TMP0:%.*]] = fneg <4 x float> [[C]] 2875; CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x float> [[B]], <4 x float> undef, <4 x i32> zeroinitializer 2876; CHECK-NEXT: [[FMLS1:%.*]] = tail call <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float> [[TMP0]], <4 x float> [[LANE]], <4 x float> [[A]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR9]] 2877; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 2878; CHECK-NEXT: ret <4 x float> [[FMLS1]] 2879; 2880entry: 2881 %0 = fneg <4 x float> %c 2882 %lane = shufflevector <4 x float> %b, <4 x float> undef, <4 x i32> zeroinitializer 2883 %fmls1 = tail call <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float> %0, <4 x float> %lane, <4 x float> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 2884 ret <4 x float> %fmls1 2885} 2886 2887define <2 x double> @fmls_indexed_2d_strict(<2 x double> %a, <2 x double> %b, <2 x double> %c) nounwind readnone ssp strictfp { 2888; CHECK-LABEL: define <2 x double> @fmls_indexed_2d_strict( 2889; CHECK-SAME: <2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]]) #[[ATTR4]] { 2890; CHECK-NEXT: [[ENTRY:.*:]] 2891; CHECK-NEXT: call void @llvm.donothing() 2892; CHECK-NEXT: [[TMP0:%.*]] = fneg <2 x double> [[C]] 2893; CHECK-NEXT: [[LANE:%.*]] = shufflevector <2 x double> [[B]], <2 x double> undef, <2 x i32> zeroinitializer 2894; CHECK-NEXT: [[FMLS1:%.*]] = tail call <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double> [[TMP0]], <2 x double> [[LANE]], <2 x double> [[A]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR9]] 2895; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 2896; CHECK-NEXT: ret <2 x double> [[FMLS1]] 2897; 2898entry: 2899 %0 = fneg <2 x double> %c 2900 %lane = shufflevector <2 x double> %b, <2 x double> undef, <2 x i32> zeroinitializer 2901 %fmls1 = tail call <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double> %0, <2 x double> %lane, <2 x double> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 2902 ret <2 x double> %fmls1 2903} 2904 2905define <2 x float> @fmla_indexed_scalar_2s_strict(<2 x float> %a, <2 x float> %b, float %c) nounwind readnone ssp strictfp { 2906; CHECK-LABEL: define <2 x float> @fmla_indexed_scalar_2s_strict( 2907; CHECK-SAME: <2 x float> [[A:%.*]], <2 x float> [[B:%.*]], float [[C:%.*]]) #[[ATTR4]] { 2908; CHECK-NEXT: [[ENTRY:.*:]] 2909; CHECK-NEXT: call void @llvm.donothing() 2910; CHECK-NEXT: [[V1:%.*]] = insertelement <2 x float> undef, float [[C]], i32 0 2911; CHECK-NEXT: [[V2:%.*]] = insertelement <2 x float> [[V1]], float [[C]], i32 1 2912; CHECK-NEXT: [[FMLA1:%.*]] = tail call <2 x float> @llvm.experimental.constrained.fma.v2f32(<2 x float> [[V2]], <2 x float> [[B]], <2 x float> [[A]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR9]] 2913; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 2914; CHECK-NEXT: ret <2 x float> [[FMLA1]] 2915; 2916entry: 2917 %v1 = insertelement <2 x float> undef, float %c, i32 0 2918 %v2 = insertelement <2 x float> %v1, float %c, i32 1 2919 %fmla1 = tail call <2 x float> @llvm.experimental.constrained.fma.v2f32(<2 x float> %v2, <2 x float> %b, <2 x float> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 2920 ret <2 x float> %fmla1 2921} 2922 2923define <4 x float> @fmla_indexed_scalar_4s_strict(<4 x float> %a, <4 x float> %b, float %c) nounwind readnone ssp strictfp { 2924; CHECK-LABEL: define <4 x float> @fmla_indexed_scalar_4s_strict( 2925; CHECK-SAME: <4 x float> [[A:%.*]], <4 x float> [[B:%.*]], float [[C:%.*]]) #[[ATTR4]] { 2926; CHECK-NEXT: [[ENTRY:.*:]] 2927; CHECK-NEXT: call void @llvm.donothing() 2928; CHECK-NEXT: [[V1:%.*]] = insertelement <4 x float> undef, float [[C]], i32 0 2929; CHECK-NEXT: [[V2:%.*]] = insertelement <4 x float> [[V1]], float [[C]], i32 1 2930; CHECK-NEXT: [[V3:%.*]] = insertelement <4 x float> [[V2]], float [[C]], i32 2 2931; CHECK-NEXT: [[V4:%.*]] = insertelement <4 x float> [[V3]], float [[C]], i32 3 2932; CHECK-NEXT: [[FMLA1:%.*]] = tail call <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float> [[V4]], <4 x float> [[B]], <4 x float> [[A]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR9]] 2933; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 2934; CHECK-NEXT: ret <4 x float> [[FMLA1]] 2935; 2936entry: 2937 %v1 = insertelement <4 x float> undef, float %c, i32 0 2938 %v2 = insertelement <4 x float> %v1, float %c, i32 1 2939 %v3 = insertelement <4 x float> %v2, float %c, i32 2 2940 %v4 = insertelement <4 x float> %v3, float %c, i32 3 2941 %fmla1 = tail call <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float> %v4, <4 x float> %b, <4 x float> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 2942 ret <4 x float> %fmla1 2943} 2944 2945define <2 x double> @fmla_indexed_scalar_2d_strict(<2 x double> %a, <2 x double> %b, double %c) nounwind readnone ssp strictfp { 2946; CHECK-LABEL: define <2 x double> @fmla_indexed_scalar_2d_strict( 2947; CHECK-SAME: <2 x double> [[A:%.*]], <2 x double> [[B:%.*]], double [[C:%.*]]) #[[ATTR4]] { 2948; CHECK-NEXT: [[ENTRY:.*:]] 2949; CHECK-NEXT: call void @llvm.donothing() 2950; CHECK-NEXT: [[V1:%.*]] = insertelement <2 x double> undef, double [[C]], i32 0 2951; CHECK-NEXT: [[V2:%.*]] = insertelement <2 x double> [[V1]], double [[C]], i32 1 2952; CHECK-NEXT: [[FMLA1:%.*]] = tail call <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double> [[V2]], <2 x double> [[B]], <2 x double> [[A]], metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR9]] 2953; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 2954; CHECK-NEXT: ret <2 x double> [[FMLA1]] 2955; 2956entry: 2957 %v1 = insertelement <2 x double> undef, double %c, i32 0 2958 %v2 = insertelement <2 x double> %v1, double %c, i32 1 2959 %fmla1 = tail call <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double> %v2, <2 x double> %b, <2 x double> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 2960 ret <2 x double> %fmla1 2961} 2962 2963attributes #0 = { strictfp } 2964 2965declare <2 x float> @llvm.experimental.constrained.fma.v2f32(<2 x float>, <2 x float>, <2 x float>, metadata, metadata) 2966declare <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float>, <4 x float>, <4 x float>, metadata, metadata) 2967declare <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double>, <2 x double>, <2 x double>, metadata, metadata) 2968 2969define <4 x i16> @mul_4h(<4 x i16> %A, <4 x i16> %B) nounwind sanitize_memory { 2970; CHECK-LABEL: define <4 x i16> @mul_4h( 2971; CHECK-SAME: <4 x i16> [[A:%.*]], <4 x i16> [[B:%.*]]) #[[ATTR0]] { 2972; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 2973; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr @__msan_param_tls, align 8 2974; CHECK-NEXT: call void @llvm.donothing() 2975; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> splat (i16 -1), <4 x i32> <i32 1, i32 1, i32 1, i32 1> 2976; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i16> [[B]], <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 2977; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i16> [[TMP2]], [[_MSPROP]] 2978; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i16> [[A]], [[TMP3]] 2979; CHECK-NEXT: store <4 x i16> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 2980; CHECK-NEXT: ret <4 x i16> [[TMP4]] 2981; 2982 %temp3 = shufflevector <4 x i16> %B, <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 2983 %temp4 = mul <4 x i16> %A, %temp3 2984 ret <4 x i16> %temp4 2985} 2986 2987define <8 x i16> @mul_8h(<8 x i16> %A, <8 x i16> %B) nounwind sanitize_memory { 2988; CHECK-LABEL: define <8 x i16> @mul_8h( 2989; CHECK-SAME: <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]]) #[[ATTR0]] { 2990; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 2991; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 2992; CHECK-NEXT: call void @llvm.donothing() 2993; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> splat (i16 -1), <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 2994; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 2995; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i16> [[TMP2]], [[_MSPROP]] 2996; CHECK-NEXT: [[TMP4:%.*]] = mul <8 x i16> [[A]], [[TMP3]] 2997; CHECK-NEXT: store <8 x i16> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 2998; CHECK-NEXT: ret <8 x i16> [[TMP4]] 2999; 3000 %temp3 = shufflevector <8 x i16> %B, <8 x i16> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 3001 %temp4 = mul <8 x i16> %A, %temp3 3002 ret <8 x i16> %temp4 3003} 3004 3005define <2 x i32> @mul_2s(<2 x i32> %A, <2 x i32> %B) nounwind sanitize_memory { 3006; CHECK-LABEL: define <2 x i32> @mul_2s( 3007; CHECK-SAME: <2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]]) #[[ATTR0]] { 3008; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 3009; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8 3010; CHECK-NEXT: call void @llvm.donothing() 3011; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> splat (i32 -1), <2 x i32> <i32 1, i32 1> 3012; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[B]], <2 x i32> poison, <2 x i32> <i32 1, i32 1> 3013; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i32> [[TMP2]], [[_MSPROP]] 3014; CHECK-NEXT: [[TMP4:%.*]] = mul <2 x i32> [[A]], [[TMP3]] 3015; CHECK-NEXT: store <2 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 3016; CHECK-NEXT: ret <2 x i32> [[TMP4]] 3017; 3018 %temp3 = shufflevector <2 x i32> %B, <2 x i32> poison, <2 x i32> <i32 1, i32 1> 3019 %temp4 = mul <2 x i32> %A, %temp3 3020 ret <2 x i32> %temp4 3021} 3022 3023define <4 x i32> @mul_4s(<4 x i32> %A, <4 x i32> %B) nounwind sanitize_memory { 3024; CHECK-LABEL: define <4 x i32> @mul_4s( 3025; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR0]] { 3026; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 3027; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 3028; CHECK-NEXT: call void @llvm.donothing() 3029; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> splat (i32 -1), <4 x i32> <i32 1, i32 1, i32 1, i32 1> 3030; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 3031; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[TMP2]], [[_MSPROP]] 3032; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i32> [[A]], [[TMP3]] 3033; CHECK-NEXT: store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 3034; CHECK-NEXT: ret <4 x i32> [[TMP4]] 3035; 3036 %temp3 = shufflevector <4 x i32> %B, <4 x i32> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 3037 %temp4 = mul <4 x i32> %A, %temp3 3038 ret <4 x i32> %temp4 3039} 3040 3041define <2 x i64> @mul_2d(<2 x i64> %A, <2 x i64> %B) nounwind sanitize_memory { 3042; CHECK-LABEL: define <2 x i64> @mul_2d( 3043; CHECK-SAME: <2 x i64> [[A:%.*]], <2 x i64> [[B:%.*]]) #[[ATTR0]] { 3044; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 3045; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 3046; CHECK-NEXT: call void @llvm.donothing() 3047; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP3]], [[TMP2]] 3048; CHECK-NEXT: [[TMP1:%.*]] = mul <2 x i64> [[A]], [[B]] 3049; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 3050; CHECK-NEXT: ret <2 x i64> [[TMP1]] 3051; 3052 %temp1 = mul <2 x i64> %A, %B 3053 ret <2 x i64> %temp1 3054} 3055 3056define <2 x float> @fmul_lane_2s(<2 x float> %A, <2 x float> %B) nounwind sanitize_memory { 3057; CHECK-LABEL: define <2 x float> @fmul_lane_2s( 3058; CHECK-SAME: <2 x float> [[A:%.*]], <2 x float> [[B:%.*]]) #[[ATTR0]] { 3059; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 3060; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8 3061; CHECK-NEXT: call void @llvm.donothing() 3062; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> splat (i32 -1), <2 x i32> <i32 1, i32 1> 3063; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[B]], <2 x float> poison, <2 x i32> <i32 1, i32 1> 3064; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i32> [[TMP2]], [[_MSPROP]] 3065; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x float> [[A]], [[TMP3]] 3066; CHECK-NEXT: store <2 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 3067; CHECK-NEXT: ret <2 x float> [[TMP4]] 3068; 3069 %temp3 = shufflevector <2 x float> %B, <2 x float> poison, <2 x i32> <i32 1, i32 1> 3070 %temp4 = fmul <2 x float> %A, %temp3 3071 ret <2 x float> %temp4 3072} 3073 3074define <4 x float> @fmul_lane_4s(<4 x float> %A, <4 x float> %B) nounwind sanitize_memory { 3075; CHECK-LABEL: define <4 x float> @fmul_lane_4s( 3076; CHECK-SAME: <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) #[[ATTR0]] { 3077; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 3078; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 3079; CHECK-NEXT: call void @llvm.donothing() 3080; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> splat (i32 -1), <4 x i32> <i32 1, i32 1, i32 1, i32 1> 3081; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 3082; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[TMP2]], [[_MSPROP]] 3083; CHECK-NEXT: [[TMP4:%.*]] = fmul <4 x float> [[A]], [[TMP3]] 3084; CHECK-NEXT: store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 3085; CHECK-NEXT: ret <4 x float> [[TMP4]] 3086; 3087 %temp3 = shufflevector <4 x float> %B, <4 x float> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 3088 %temp4 = fmul <4 x float> %A, %temp3 3089 ret <4 x float> %temp4 3090} 3091 3092define <2 x double> @fmul_lane_2d(<2 x double> %A, <2 x double> %B) nounwind sanitize_memory { 3093; CHECK-LABEL: define <2 x double> @fmul_lane_2d( 3094; CHECK-SAME: <2 x double> [[A:%.*]], <2 x double> [[B:%.*]]) #[[ATTR0]] { 3095; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 3096; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 3097; CHECK-NEXT: call void @llvm.donothing() 3098; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> splat (i64 -1), <2 x i32> <i32 1, i32 1> 3099; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[B]], <2 x double> poison, <2 x i32> <i32 1, i32 1> 3100; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i64> [[TMP2]], [[_MSPROP]] 3101; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[A]], [[TMP3]] 3102; CHECK-NEXT: store <2 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 3103; CHECK-NEXT: ret <2 x double> [[TMP4]] 3104; 3105 %temp3 = shufflevector <2 x double> %B, <2 x double> poison, <2 x i32> <i32 1, i32 1> 3106 %temp4 = fmul <2 x double> %A, %temp3 3107 ret <2 x double> %temp4 3108} 3109 3110define float @fmul_lane_s(float %A, <4 x float> %vec) nounwind sanitize_memory { 3111; CHECK-LABEL: define float @fmul_lane_s( 3112; CHECK-SAME: float [[A:%.*]], <4 x float> [[VEC:%.*]]) #[[ATTR0]] { 3113; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 3114; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @__msan_param_tls, align 8 3115; CHECK-NEXT: call void @llvm.donothing() 3116; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3 3117; CHECK-NEXT: [[B:%.*]] = extractelement <4 x float> [[VEC]], i32 3 3118; CHECK-NEXT: [[_MSPROP1:%.*]] = or i32 [[TMP2]], [[_MSPROP]] 3119; CHECK-NEXT: [[RES:%.*]] = fmul float [[A]], [[B]] 3120; CHECK-NEXT: store i32 [[_MSPROP1]], ptr @__msan_retval_tls, align 8 3121; CHECK-NEXT: ret float [[RES]] 3122; 3123 %B = extractelement <4 x float> %vec, i32 3 3124 %res = fmul float %A, %B 3125 ret float %res 3126} 3127 3128define double @fmul_lane_d(double %A, <2 x double> %vec) nounwind sanitize_memory { 3129; CHECK-LABEL: define double @fmul_lane_d( 3130; CHECK-SAME: double [[A:%.*]], <2 x double> [[VEC:%.*]]) #[[ATTR0]] { 3131; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 3132; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr @__msan_param_tls, align 8 3133; CHECK-NEXT: call void @llvm.donothing() 3134; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 3135; CHECK-NEXT: [[B:%.*]] = extractelement <2 x double> [[VEC]], i32 1 3136; CHECK-NEXT: [[_MSPROP1:%.*]] = or i64 [[TMP2]], [[_MSPROP]] 3137; CHECK-NEXT: [[RES:%.*]] = fmul double [[A]], [[B]] 3138; CHECK-NEXT: store i64 [[_MSPROP1]], ptr @__msan_retval_tls, align 8 3139; CHECK-NEXT: ret double [[RES]] 3140; 3141 %B = extractelement <2 x double> %vec, i32 1 3142 %res = fmul double %A, %B 3143 ret double %res 3144} 3145 3146 3147 3148define <2 x float> @fmulx_lane_2s(<2 x float> %A, <2 x float> %B) nounwind sanitize_memory { 3149; CHECK-LABEL: define <2 x float> @fmulx_lane_2s( 3150; CHECK-SAME: <2 x float> [[A:%.*]], <2 x float> [[B:%.*]]) #[[ATTR0]] { 3151; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 3152; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8 3153; CHECK-NEXT: call void @llvm.donothing() 3154; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> splat (i32 -1), <2 x i32> <i32 1, i32 1> 3155; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[B]], <2 x float> poison, <2 x i32> <i32 1, i32 1> 3156; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i32> [[TMP2]], [[_MSPROP]] 3157; CHECK-NEXT: [[_MSPROP2:%.*]] = or <2 x i32> [[_MSPROP1]], zeroinitializer 3158; CHECK-NEXT: [[TMP4:%.*]] = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> [[A]], <2 x float> [[TMP3]]) 3159; CHECK-NEXT: store <2 x i32> [[_MSPROP2]], ptr @__msan_retval_tls, align 8 3160; CHECK-NEXT: ret <2 x float> [[TMP4]] 3161; 3162 %temp3 = shufflevector <2 x float> %B, <2 x float> poison, <2 x i32> <i32 1, i32 1> 3163 %temp4 = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %A, <2 x float> %temp3) 3164 ret <2 x float> %temp4 3165} 3166 3167define <4 x float> @fmulx_lane_4s(<4 x float> %A, <4 x float> %B) nounwind sanitize_memory { 3168; CHECK-LABEL: define <4 x float> @fmulx_lane_4s( 3169; CHECK-SAME: <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) #[[ATTR0]] { 3170; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 3171; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 3172; CHECK-NEXT: call void @llvm.donothing() 3173; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> splat (i32 -1), <4 x i32> <i32 1, i32 1, i32 1, i32 1> 3174; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 3175; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[TMP2]], [[_MSPROP]] 3176; CHECK-NEXT: [[_MSPROP2:%.*]] = or <4 x i32> [[_MSPROP1]], zeroinitializer 3177; CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> [[A]], <4 x float> [[TMP3]]) 3178; CHECK-NEXT: store <4 x i32> [[_MSPROP2]], ptr @__msan_retval_tls, align 8 3179; CHECK-NEXT: ret <4 x float> [[TMP4]] 3180; 3181 %temp3 = shufflevector <4 x float> %B, <4 x float> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 3182 %temp4 = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %A, <4 x float> %temp3) 3183 ret <4 x float> %temp4 3184} 3185 3186define <2 x double> @fmulx_lane_2d(<2 x double> %A, <2 x double> %B) nounwind sanitize_memory { 3187; CHECK-LABEL: define <2 x double> @fmulx_lane_2d( 3188; CHECK-SAME: <2 x double> [[A:%.*]], <2 x double> [[B:%.*]]) #[[ATTR0]] { 3189; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 3190; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 3191; CHECK-NEXT: call void @llvm.donothing() 3192; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> splat (i64 -1), <2 x i32> <i32 1, i32 1> 3193; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[B]], <2 x double> poison, <2 x i32> <i32 1, i32 1> 3194; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i64> [[TMP2]], [[_MSPROP]] 3195; CHECK-NEXT: [[_MSPROP2:%.*]] = or <2 x i64> [[_MSPROP1]], zeroinitializer 3196; CHECK-NEXT: [[TMP4:%.*]] = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> [[A]], <2 x double> [[TMP3]]) 3197; CHECK-NEXT: store <2 x i64> [[_MSPROP2]], ptr @__msan_retval_tls, align 8 3198; CHECK-NEXT: ret <2 x double> [[TMP4]] 3199; 3200 %temp3 = shufflevector <2 x double> %B, <2 x double> poison, <2 x i32> <i32 1, i32 1> 3201 %temp4 = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %A, <2 x double> %temp3) 3202 ret <2 x double> %temp4 3203} 3204 3205define <4 x i16> @sqdmulh_lane_4h(<4 x i16> %A, <4 x i16> %B) nounwind sanitize_memory { 3206; CHECK-LABEL: define <4 x i16> @sqdmulh_lane_4h( 3207; CHECK-SAME: <4 x i16> [[A:%.*]], <4 x i16> [[B:%.*]]) #[[ATTR0]] { 3208; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 3209; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr @__msan_param_tls, align 8 3210; CHECK-NEXT: call void @llvm.donothing() 3211; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> splat (i16 -1), <4 x i32> <i32 1, i32 1, i32 1, i32 1> 3212; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i16> [[B]], <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 3213; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i16> [[TMP2]], [[_MSPROP]] 3214; CHECK-NEXT: [[TMP4:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[A]], <4 x i16> [[TMP3]]) 3215; CHECK-NEXT: store <4 x i16> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 3216; CHECK-NEXT: ret <4 x i16> [[TMP4]] 3217; 3218 %temp3 = shufflevector <4 x i16> %B, <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 3219 %temp4 = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %A, <4 x i16> %temp3) 3220 ret <4 x i16> %temp4 3221} 3222 3223define <8 x i16> @sqdmulh_lane_8h(<8 x i16> %A, <8 x i16> %B) nounwind sanitize_memory { 3224; CHECK-LABEL: define <8 x i16> @sqdmulh_lane_8h( 3225; CHECK-SAME: <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]]) #[[ATTR0]] { 3226; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 3227; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 3228; CHECK-NEXT: call void @llvm.donothing() 3229; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> splat (i16 -1), <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 3230; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 3231; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i16> [[TMP2]], [[_MSPROP]] 3232; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> [[A]], <8 x i16> [[TMP3]]) 3233; CHECK-NEXT: store <8 x i16> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 3234; CHECK-NEXT: ret <8 x i16> [[TMP4]] 3235; 3236 %temp3 = shufflevector <8 x i16> %B, <8 x i16> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 3237 %temp4 = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %A, <8 x i16> %temp3) 3238 ret <8 x i16> %temp4 3239} 3240 3241define <2 x i32> @sqdmulh_lane_2s(<2 x i32> %A, <2 x i32> %B) nounwind sanitize_memory { 3242; CHECK-LABEL: define <2 x i32> @sqdmulh_lane_2s( 3243; CHECK-SAME: <2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]]) #[[ATTR0]] { 3244; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 3245; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8 3246; CHECK-NEXT: call void @llvm.donothing() 3247; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> splat (i32 -1), <2 x i32> <i32 1, i32 1> 3248; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[B]], <2 x i32> poison, <2 x i32> <i32 1, i32 1> 3249; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i32> [[TMP2]], [[_MSPROP]] 3250; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> [[A]], <2 x i32> [[TMP3]]) 3251; CHECK-NEXT: store <2 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 3252; CHECK-NEXT: ret <2 x i32> [[TMP4]] 3253; 3254 %temp3 = shufflevector <2 x i32> %B, <2 x i32> poison, <2 x i32> <i32 1, i32 1> 3255 %temp4 = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %A, <2 x i32> %temp3) 3256 ret <2 x i32> %temp4 3257} 3258 3259define <4 x i32> @sqdmulh_lane_4s(<4 x i32> %A, <4 x i32> %B) nounwind sanitize_memory { 3260; CHECK-LABEL: define <4 x i32> @sqdmulh_lane_4s( 3261; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR0]] { 3262; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 3263; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 3264; CHECK-NEXT: call void @llvm.donothing() 3265; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> splat (i32 -1), <4 x i32> <i32 1, i32 1, i32 1, i32 1> 3266; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 3267; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[TMP2]], [[_MSPROP]] 3268; CHECK-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> [[A]], <4 x i32> [[TMP3]]) 3269; CHECK-NEXT: store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 3270; CHECK-NEXT: ret <4 x i32> [[TMP4]] 3271; 3272 %temp3 = shufflevector <4 x i32> %B, <4 x i32> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 3273 %temp4 = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %A, <4 x i32> %temp3) 3274 ret <4 x i32> %temp4 3275} 3276 3277define i32 @sqdmulh_lane_1s(i32 %A, <4 x i32> %B) nounwind sanitize_memory { 3278; CHECK-LABEL: define i32 @sqdmulh_lane_1s( 3279; CHECK-SAME: i32 [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR0]] { 3280; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 3281; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr @__msan_param_tls, align 8 3282; CHECK-NEXT: call void @llvm.donothing() 3283; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <4 x i32> [[TMP3]], i32 1 3284; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[B]], i32 1 3285; CHECK-NEXT: [[_MSPROP1:%.*]] = or i32 [[TMP4]], [[_MSPROP]] 3286; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.aarch64.neon.sqdmulh.i32(i32 [[A]], i32 [[TMP1]]) 3287; CHECK-NEXT: store i32 [[_MSPROP1]], ptr @__msan_retval_tls, align 8 3288; CHECK-NEXT: ret i32 [[TMP2]] 3289; 3290 %temp1 = extractelement <4 x i32> %B, i32 1 3291 %temp2 = call i32 @llvm.aarch64.neon.sqdmulh.i32(i32 %A, i32 %temp1) 3292 ret i32 %temp2 3293} 3294 3295define <4 x i16> @sqrdmulh_lane_4h(<4 x i16> %A, <4 x i16> %B) nounwind sanitize_memory { 3296; CHECK-LABEL: define <4 x i16> @sqrdmulh_lane_4h( 3297; CHECK-SAME: <4 x i16> [[A:%.*]], <4 x i16> [[B:%.*]]) #[[ATTR0]] { 3298; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 3299; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr @__msan_param_tls, align 8 3300; CHECK-NEXT: call void @llvm.donothing() 3301; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> splat (i16 -1), <4 x i32> <i32 1, i32 1, i32 1, i32 1> 3302; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i16> [[B]], <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 3303; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i16> [[TMP2]], [[_MSPROP]] 3304; CHECK-NEXT: [[TMP4:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[A]], <4 x i16> [[TMP3]]) 3305; CHECK-NEXT: store <4 x i16> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 3306; CHECK-NEXT: ret <4 x i16> [[TMP4]] 3307; 3308 %temp3 = shufflevector <4 x i16> %B, <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 3309 %temp4 = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %A, <4 x i16> %temp3) 3310 ret <4 x i16> %temp4 3311} 3312 3313define <8 x i16> @sqrdmulh_lane_8h(<8 x i16> %A, <8 x i16> %B) nounwind sanitize_memory { 3314; CHECK-LABEL: define <8 x i16> @sqrdmulh_lane_8h( 3315; CHECK-SAME: <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]]) #[[ATTR0]] { 3316; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 3317; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 3318; CHECK-NEXT: call void @llvm.donothing() 3319; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> splat (i16 -1), <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 3320; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 3321; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i16> [[TMP2]], [[_MSPROP]] 3322; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> [[A]], <8 x i16> [[TMP3]]) 3323; CHECK-NEXT: store <8 x i16> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 3324; CHECK-NEXT: ret <8 x i16> [[TMP4]] 3325; 3326 %temp3 = shufflevector <8 x i16> %B, <8 x i16> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 3327 %temp4 = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %A, <8 x i16> %temp3) 3328 ret <8 x i16> %temp4 3329} 3330 3331define <2 x i32> @sqrdmulh_lane_2s(<2 x i32> %A, <2 x i32> %B) nounwind sanitize_memory { 3332; CHECK-LABEL: define <2 x i32> @sqrdmulh_lane_2s( 3333; CHECK-SAME: <2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]]) #[[ATTR0]] { 3334; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 3335; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8 3336; CHECK-NEXT: call void @llvm.donothing() 3337; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> splat (i32 -1), <2 x i32> <i32 1, i32 1> 3338; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[B]], <2 x i32> poison, <2 x i32> <i32 1, i32 1> 3339; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i32> [[TMP2]], [[_MSPROP]] 3340; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> [[A]], <2 x i32> [[TMP3]]) 3341; CHECK-NEXT: store <2 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 3342; CHECK-NEXT: ret <2 x i32> [[TMP4]] 3343; 3344 %temp3 = shufflevector <2 x i32> %B, <2 x i32> poison, <2 x i32> <i32 1, i32 1> 3345 %temp4 = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %A, <2 x i32> %temp3) 3346 ret <2 x i32> %temp4 3347} 3348 3349define <4 x i32> @sqrdmulh_lane_4s(<4 x i32> %A, <4 x i32> %B) nounwind sanitize_memory { 3350; CHECK-LABEL: define <4 x i32> @sqrdmulh_lane_4s( 3351; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR0]] { 3352; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 3353; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 3354; CHECK-NEXT: call void @llvm.donothing() 3355; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> splat (i32 -1), <4 x i32> <i32 1, i32 1, i32 1, i32 1> 3356; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 3357; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[TMP2]], [[_MSPROP]] 3358; CHECK-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> [[A]], <4 x i32> [[TMP3]]) 3359; CHECK-NEXT: store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 3360; CHECK-NEXT: ret <4 x i32> [[TMP4]] 3361; 3362 %temp3 = shufflevector <4 x i32> %B, <4 x i32> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 3363 %temp4 = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %A, <4 x i32> %temp3) 3364 ret <4 x i32> %temp4 3365} 3366 3367define i32 @sqrdmulh_lane_1s(i32 %A, <4 x i32> %B) nounwind sanitize_memory { 3368; CHECK-LABEL: define i32 @sqrdmulh_lane_1s( 3369; CHECK-SAME: i32 [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR0]] { 3370; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 3371; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr @__msan_param_tls, align 8 3372; CHECK-NEXT: call void @llvm.donothing() 3373; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <4 x i32> [[TMP3]], i32 1 3374; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[B]], i32 1 3375; CHECK-NEXT: [[_MSPROP1:%.*]] = or i32 [[TMP4]], [[_MSPROP]] 3376; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 [[A]], i32 [[TMP1]]) 3377; CHECK-NEXT: store i32 [[_MSPROP1]], ptr @__msan_retval_tls, align 8 3378; CHECK-NEXT: ret i32 [[TMP2]] 3379; 3380 %temp1 = extractelement <4 x i32> %B, i32 1 3381 %temp2 = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %A, i32 %temp1) 3382 ret i32 %temp2 3383} 3384 3385define <4 x i32> @sqdmull_lane_4s(<4 x i16> %A, <4 x i16> %B) nounwind sanitize_memory { 3386; CHECK-LABEL: define <4 x i32> @sqdmull_lane_4s( 3387; CHECK-SAME: <4 x i16> [[A:%.*]], <4 x i16> [[B:%.*]]) #[[ATTR0]] { 3388; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 3389; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr @__msan_param_tls, align 8 3390; CHECK-NEXT: call void @llvm.donothing() 3391; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> splat (i16 -1), <4 x i32> <i32 1, i32 1, i32 1, i32 1> 3392; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i16> [[B]], <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 3393; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 3394; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP5]], 0 3395; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[_MSPROP]] to i64 3396; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP6]], 0 3397; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] 3398; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB5:.*]], label %[[BB6:.*]], !prof [[PROF1]] 3399; CHECK: [[BB5]]: 3400; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 3401; CHECK-NEXT: unreachable 3402; CHECK: [[BB6]]: 3403; CHECK-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[A]], <4 x i16> [[TMP3]]) 3404; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 3405; CHECK-NEXT: ret <4 x i32> [[TMP4]] 3406; 3407 %temp3 = shufflevector <4 x i16> %B, <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 3408 %temp4 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %A, <4 x i16> %temp3) 3409 ret <4 x i32> %temp4 3410} 3411 3412define <2 x i64> @sqdmull_lane_2d(<2 x i32> %A, <2 x i32> %B) nounwind sanitize_memory { 3413; CHECK-LABEL: define <2 x i64> @sqdmull_lane_2d( 3414; CHECK-SAME: <2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]]) #[[ATTR0]] { 3415; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 3416; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8 3417; CHECK-NEXT: call void @llvm.donothing() 3418; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> splat (i32 -1), <2 x i32> <i32 1, i32 1> 3419; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[B]], <2 x i32> poison, <2 x i32> <i32 1, i32 1> 3420; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 3421; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP5]], 0 3422; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i32> [[_MSPROP]] to i64 3423; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP6]], 0 3424; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] 3425; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB5:.*]], label %[[BB6:.*]], !prof [[PROF1]] 3426; CHECK: [[BB5]]: 3427; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 3428; CHECK-NEXT: unreachable 3429; CHECK: [[BB6]]: 3430; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[A]], <2 x i32> [[TMP3]]) 3431; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 3432; CHECK-NEXT: ret <2 x i64> [[TMP4]] 3433; 3434 %temp3 = shufflevector <2 x i32> %B, <2 x i32> poison, <2 x i32> <i32 1, i32 1> 3435 %temp4 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %A, <2 x i32> %temp3) 3436 ret <2 x i64> %temp4 3437} 3438 3439define <4 x i32> @sqdmull2_lane_4s(<8 x i16> %A, <8 x i16> %B) nounwind sanitize_memory { 3440; CHECK-LABEL: define <4 x i32> @sqdmull2_lane_4s( 3441; CHECK-SAME: <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]]) #[[ATTR0]] { 3442; CHECK-NEXT: [[TMP5:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 3443; CHECK-NEXT: [[TMP6:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 3444; CHECK-NEXT: call void @llvm.donothing() 3445; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i16> [[TMP5]], <8 x i16> splat (i16 -1), <4 x i32> <i32 4, i32 5, i32 6, i32 7> 3446; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 3447; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <8 x i16> [[TMP6]], <8 x i16> splat (i16 -1), <4 x i32> <i32 1, i32 1, i32 1, i32 1> 3448; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 3449; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[_MSPROP]] to i64 3450; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP3]], 0 3451; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[_MSPROP1]] to i64 3452; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP7]], 0 3453; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP2]] 3454; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB5:.*]], label %[[BB6:.*]], !prof [[PROF1]] 3455; CHECK: [[BB5]]: 3456; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 3457; CHECK-NEXT: unreachable 3458; CHECK: [[BB6]]: 3459; CHECK-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP1]], <4 x i16> [[TMP2]]) 3460; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 3461; CHECK-NEXT: ret <4 x i32> [[TMP4]] 3462; 3463 %temp1 = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 3464 %temp2 = shufflevector <8 x i16> %B, <8 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 3465 %temp4 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %temp1, <4 x i16> %temp2) 3466 ret <4 x i32> %temp4 3467} 3468 3469define <2 x i64> @sqdmull2_lane_2d(<4 x i32> %A, <4 x i32> %B) nounwind sanitize_memory { 3470; CHECK-LABEL: define <2 x i64> @sqdmull2_lane_2d( 3471; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR0]] { 3472; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 3473; CHECK-NEXT: [[TMP6:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 3474; CHECK-NEXT: call void @llvm.donothing() 3475; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> splat (i32 -1), <2 x i32> <i32 2, i32 3> 3476; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> undef, <2 x i32> <i32 2, i32 3> 3477; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> splat (i32 -1), <2 x i32> <i32 1, i32 1> 3478; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> undef, <2 x i32> <i32 1, i32 1> 3479; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[_MSPROP]] to i64 3480; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP3]], 0 3481; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[_MSPROP1]] to i64 3482; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP7]], 0 3483; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP2]] 3484; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB5:.*]], label %[[BB6:.*]], !prof [[PROF1]] 3485; CHECK: [[BB5]]: 3486; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 3487; CHECK-NEXT: unreachable 3488; CHECK: [[BB6]]: 3489; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]]) 3490; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 3491; CHECK-NEXT: ret <2 x i64> [[TMP4]] 3492; 3493 %temp1 = shufflevector <4 x i32> %A, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 3494 %temp2 = shufflevector <4 x i32> %B, <4 x i32> undef, <2 x i32> <i32 1, i32 1> 3495 %temp4 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %temp1, <2 x i32> %temp2) 3496 ret <2 x i64> %temp4 3497} 3498 3499define <4 x i32> @umull_lane_4s(<4 x i16> %A, <4 x i16> %B) nounwind sanitize_memory { 3500; CHECK-LABEL: define <4 x i32> @umull_lane_4s( 3501; CHECK-SAME: <4 x i16> [[A:%.*]], <4 x i16> [[B:%.*]]) #[[ATTR0]] { 3502; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 3503; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr @__msan_param_tls, align 8 3504; CHECK-NEXT: call void @llvm.donothing() 3505; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> splat (i16 -1), <4 x i32> <i32 1, i32 1, i32 1, i32 1> 3506; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i16> [[B]], <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 3507; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i16> [[TMP2]], [[_MSPROP]] 3508; CHECK-NEXT: [[_MSPROP2:%.*]] = or <4 x i16> [[_MSPROP1]], zeroinitializer 3509; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i16> [[_MSPROP2]] to <4 x i32> 3510; CHECK-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[A]], <4 x i16> [[TMP3]]) 3511; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr @__msan_retval_tls, align 8 3512; CHECK-NEXT: ret <4 x i32> [[TMP4]] 3513; 3514 %temp3 = shufflevector <4 x i16> %B, <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 3515 %temp4 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %A, <4 x i16> %temp3) 3516 ret <4 x i32> %temp4 3517} 3518 3519define <2 x i64> @umull_lane_2d(<2 x i32> %A, <2 x i32> %B) nounwind sanitize_memory { 3520; CHECK-LABEL: define <2 x i64> @umull_lane_2d( 3521; CHECK-SAME: <2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]]) #[[ATTR0]] { 3522; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 3523; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8 3524; CHECK-NEXT: call void @llvm.donothing() 3525; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> splat (i32 -1), <2 x i32> <i32 1, i32 1> 3526; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[B]], <2 x i32> poison, <2 x i32> <i32 1, i32 1> 3527; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i32> [[TMP2]], [[_MSPROP]] 3528; CHECK-NEXT: [[_MSPROP2:%.*]] = or <2 x i32> [[_MSPROP1]], zeroinitializer 3529; CHECK-NEXT: [[TMP5:%.*]] = zext <2 x i32> [[_MSPROP2]] to <2 x i64> 3530; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[A]], <2 x i32> [[TMP3]]) 3531; CHECK-NEXT: store <2 x i64> [[TMP5]], ptr @__msan_retval_tls, align 8 3532; CHECK-NEXT: ret <2 x i64> [[TMP4]] 3533; 3534 %temp3 = shufflevector <2 x i32> %B, <2 x i32> poison, <2 x i32> <i32 1, i32 1> 3535 %temp4 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %A, <2 x i32> %temp3) 3536 ret <2 x i64> %temp4 3537} 3538 3539define <4 x i32> @smull_lane_4s(<4 x i16> %A, <4 x i16> %B) nounwind sanitize_memory { 3540; CHECK-LABEL: define <4 x i32> @smull_lane_4s( 3541; CHECK-SAME: <4 x i16> [[A:%.*]], <4 x i16> [[B:%.*]]) #[[ATTR0]] { 3542; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 3543; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr @__msan_param_tls, align 8 3544; CHECK-NEXT: call void @llvm.donothing() 3545; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> splat (i16 -1), <4 x i32> <i32 1, i32 1, i32 1, i32 1> 3546; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i16> [[B]], <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 3547; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i16> [[TMP2]], [[_MSPROP]] 3548; CHECK-NEXT: [[_MSPROP2:%.*]] = or <4 x i16> [[_MSPROP1]], zeroinitializer 3549; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i16> [[_MSPROP2]] to <4 x i32> 3550; CHECK-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[A]], <4 x i16> [[TMP3]]) 3551; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr @__msan_retval_tls, align 8 3552; CHECK-NEXT: ret <4 x i32> [[TMP4]] 3553; 3554 %temp3 = shufflevector <4 x i16> %B, <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 3555 %temp4 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %A, <4 x i16> %temp3) 3556 ret <4 x i32> %temp4 3557} 3558 3559define <2 x i64> @smull_lane_2d(<2 x i32> %A, <2 x i32> %B) nounwind sanitize_memory { 3560; CHECK-LABEL: define <2 x i64> @smull_lane_2d( 3561; CHECK-SAME: <2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]]) #[[ATTR0]] { 3562; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 3563; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8 3564; CHECK-NEXT: call void @llvm.donothing() 3565; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> splat (i32 -1), <2 x i32> <i32 1, i32 1> 3566; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[B]], <2 x i32> poison, <2 x i32> <i32 1, i32 1> 3567; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i32> [[TMP2]], [[_MSPROP]] 3568; CHECK-NEXT: [[_MSPROP2:%.*]] = or <2 x i32> [[_MSPROP1]], zeroinitializer 3569; CHECK-NEXT: [[TMP5:%.*]] = zext <2 x i32> [[_MSPROP2]] to <2 x i64> 3570; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[A]], <2 x i32> [[TMP3]]) 3571; CHECK-NEXT: store <2 x i64> [[TMP5]], ptr @__msan_retval_tls, align 8 3572; CHECK-NEXT: ret <2 x i64> [[TMP4]] 3573; 3574 %temp3 = shufflevector <2 x i32> %B, <2 x i32> poison, <2 x i32> <i32 1, i32 1> 3575 %temp4 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %A, <2 x i32> %temp3) 3576 ret <2 x i64> %temp4 3577} 3578 3579define <4 x i32> @smlal_lane_4s(<4 x i16> %A, <4 x i16> %B, <4 x i32> %C) nounwind sanitize_memory { 3580; CHECK-LABEL: define <4 x i32> @smlal_lane_4s( 3581; CHECK-SAME: <4 x i16> [[A:%.*]], <4 x i16> [[B:%.*]], <4 x i32> [[C:%.*]]) #[[ATTR0]] { 3582; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 3583; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr @__msan_param_tls, align 8 3584; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 3585; CHECK-NEXT: call void @llvm.donothing() 3586; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> splat (i16 -1), <4 x i32> <i32 1, i32 1, i32 1, i32 1> 3587; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[B]], <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 3588; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i16> [[TMP2]], [[_MSPROP]] 3589; CHECK-NEXT: [[_MSPROP2:%.*]] = or <4 x i16> [[_MSPROP1]], zeroinitializer 3590; CHECK-NEXT: [[TMP7:%.*]] = zext <4 x i16> [[_MSPROP2]] to <4 x i32> 3591; CHECK-NEXT: [[TMP5:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[A]], <4 x i16> [[TMP4]]) 3592; CHECK-NEXT: [[_MSPROP3:%.*]] = or <4 x i32> [[TMP3]], [[TMP7]] 3593; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i32> [[C]], [[TMP5]] 3594; CHECK-NEXT: store <4 x i32> [[_MSPROP3]], ptr @__msan_retval_tls, align 8 3595; CHECK-NEXT: ret <4 x i32> [[TMP6]] 3596; 3597 %temp4 = shufflevector <4 x i16> %B, <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 3598 %temp5 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %A, <4 x i16> %temp4) 3599 %temp6 = add <4 x i32> %C, %temp5 3600 ret <4 x i32> %temp6 3601} 3602 3603define <2 x i64> @smlal_lane_2d(<2 x i32> %A, <2 x i32> %B, <2 x i64> %C) nounwind sanitize_memory { 3604; CHECK-LABEL: define <2 x i64> @smlal_lane_2d( 3605; CHECK-SAME: <2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]], <2 x i64> [[C:%.*]]) #[[ATTR0]] { 3606; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 3607; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8 3608; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 3609; CHECK-NEXT: call void @llvm.donothing() 3610; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> splat (i32 -1), <2 x i32> <i32 1, i32 1> 3611; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[B]], <2 x i32> poison, <2 x i32> <i32 1, i32 1> 3612; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i32> [[TMP2]], [[_MSPROP]] 3613; CHECK-NEXT: [[_MSPROP2:%.*]] = or <2 x i32> [[_MSPROP1]], zeroinitializer 3614; CHECK-NEXT: [[TMP7:%.*]] = zext <2 x i32> [[_MSPROP2]] to <2 x i64> 3615; CHECK-NEXT: [[TMP5:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[A]], <2 x i32> [[TMP4]]) 3616; CHECK-NEXT: [[_MSPROP3:%.*]] = or <2 x i64> [[TMP3]], [[TMP7]] 3617; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i64> [[C]], [[TMP5]] 3618; CHECK-NEXT: store <2 x i64> [[_MSPROP3]], ptr @__msan_retval_tls, align 8 3619; CHECK-NEXT: ret <2 x i64> [[TMP6]] 3620; 3621 %temp4 = shufflevector <2 x i32> %B, <2 x i32> poison, <2 x i32> <i32 1, i32 1> 3622 %temp5 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %A, <2 x i32> %temp4) 3623 %temp6 = add <2 x i64> %C, %temp5 3624 ret <2 x i64> %temp6 3625} 3626 3627define <4 x i32> @sqdmlal_lane_4s(<4 x i16> %A, <4 x i16> %B, <4 x i32> %C) nounwind sanitize_memory { 3628; CHECK-LABEL: define <4 x i32> @sqdmlal_lane_4s( 3629; CHECK-SAME: <4 x i16> [[A:%.*]], <4 x i16> [[B:%.*]], <4 x i32> [[C:%.*]]) #[[ATTR0]] { 3630; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 3631; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr @__msan_param_tls, align 8 3632; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 3633; CHECK-NEXT: call void @llvm.donothing() 3634; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> splat (i16 -1), <4 x i32> <i32 1, i32 1, i32 1, i32 1> 3635; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[B]], <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 3636; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 3637; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP7]], 0 3638; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i16> [[_MSPROP]] to i64 3639; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP8]], 0 3640; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP2]] 3641; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] 3642; CHECK: [[BB6]]: 3643; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 3644; CHECK-NEXT: unreachable 3645; CHECK: [[BB7]]: 3646; CHECK-NEXT: [[TMP5:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[A]], <4 x i16> [[TMP4]]) 3647; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[TMP3]], zeroinitializer 3648; CHECK-NEXT: [[TMP6:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[C]], <4 x i32> [[TMP5]]) 3649; CHECK-NEXT: store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 3650; CHECK-NEXT: ret <4 x i32> [[TMP6]] 3651; 3652 %temp4 = shufflevector <4 x i16> %B, <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 3653 %temp5 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %A, <4 x i16> %temp4) 3654 %temp6 = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %C, <4 x i32> %temp5) 3655 ret <4 x i32> %temp6 3656} 3657 3658define <2 x i64> @sqdmlal_lane_2d(<2 x i32> %A, <2 x i32> %B, <2 x i64> %C) nounwind sanitize_memory { 3659; CHECK-LABEL: define <2 x i64> @sqdmlal_lane_2d( 3660; CHECK-SAME: <2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]], <2 x i64> [[C:%.*]]) #[[ATTR0]] { 3661; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 3662; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8 3663; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 3664; CHECK-NEXT: call void @llvm.donothing() 3665; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> splat (i32 -1), <2 x i32> <i32 1, i32 1> 3666; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[B]], <2 x i32> poison, <2 x i32> <i32 1, i32 1> 3667; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 3668; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP7]], 0 3669; CHECK-NEXT: [[TMP8:%.*]] = bitcast <2 x i32> [[_MSPROP]] to i64 3670; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP8]], 0 3671; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP2]] 3672; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] 3673; CHECK: [[BB6]]: 3674; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 3675; CHECK-NEXT: unreachable 3676; CHECK: [[BB7]]: 3677; CHECK-NEXT: [[TMP5:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[A]], <2 x i32> [[TMP4]]) 3678; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i64> [[TMP3]], zeroinitializer 3679; CHECK-NEXT: [[TMP6:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[C]], <2 x i64> [[TMP5]]) 3680; CHECK-NEXT: store <2 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 3681; CHECK-NEXT: ret <2 x i64> [[TMP6]] 3682; 3683 %temp4 = shufflevector <2 x i32> %B, <2 x i32> poison, <2 x i32> <i32 1, i32 1> 3684 %temp5 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %A, <2 x i32> %temp4) 3685 %temp6 = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %C, <2 x i64> %temp5) 3686 ret <2 x i64> %temp6 3687} 3688 3689define <4 x i32> @sqdmlal2_lane_4s(<8 x i16> %A, <8 x i16> %B, <4 x i32> %C) nounwind sanitize_memory { 3690; CHECK-LABEL: define <4 x i32> @sqdmlal2_lane_4s( 3691; CHECK-SAME: <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <4 x i32> [[C:%.*]]) #[[ATTR0]] { 3692; CHECK-NEXT: [[TMP7:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 3693; CHECK-NEXT: [[TMP8:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 3694; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 3695; CHECK-NEXT: call void @llvm.donothing() 3696; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i16> [[TMP7]], <8 x i16> splat (i16 -1), <4 x i32> <i32 4, i32 5, i32 6, i32 7> 3697; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 3698; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <8 x i16> [[TMP8]], <8 x i16> splat (i16 -1), <4 x i32> <i32 1, i32 1, i32 1, i32 1> 3699; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 3700; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[_MSPROP]] to i64 3701; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP4]], 0 3702; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i16> [[_MSPROP1]] to i64 3703; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP9]], 0 3704; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP3]] 3705; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] 3706; CHECK: [[BB6]]: 3707; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 3708; CHECK-NEXT: unreachable 3709; CHECK: [[BB7]]: 3710; CHECK-NEXT: [[TMP5:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP1]], <4 x i16> [[TMP2]]) 3711; CHECK-NEXT: [[_MSPROP2:%.*]] = or <4 x i32> [[TMP3]], zeroinitializer 3712; CHECK-NEXT: [[TMP6:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> [[C]], <4 x i32> [[TMP5]]) 3713; CHECK-NEXT: store <4 x i32> [[_MSPROP2]], ptr @__msan_retval_tls, align 8 3714; CHECK-NEXT: ret <4 x i32> [[TMP6]] 3715; 3716 %temp1 = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 3717 %temp2 = shufflevector <8 x i16> %B, <8 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 3718 %temp5 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %temp1, <4 x i16> %temp2) 3719 %temp6 = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %C, <4 x i32> %temp5) 3720 ret <4 x i32> %temp6 3721} 3722 3723define <2 x i64> @sqdmlal2_lane_2d(<4 x i32> %A, <4 x i32> %B, <2 x i64> %C) nounwind sanitize_memory { 3724; CHECK-LABEL: define <2 x i64> @sqdmlal2_lane_2d( 3725; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <2 x i64> [[C:%.*]]) #[[ATTR0]] { 3726; CHECK-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 3727; CHECK-NEXT: [[TMP8:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 3728; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 3729; CHECK-NEXT: call void @llvm.donothing() 3730; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i32> [[TMP7]], <4 x i32> splat (i32 -1), <2 x i32> <i32 2, i32 3> 3731; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> undef, <2 x i32> <i32 2, i32 3> 3732; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> splat (i32 -1), <2 x i32> <i32 1, i32 1> 3733; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> undef, <2 x i32> <i32 1, i32 1> 3734; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[_MSPROP]] to i64 3735; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP4]], 0 3736; CHECK-NEXT: [[TMP9:%.*]] = bitcast <2 x i32> [[_MSPROP1]] to i64 3737; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP9]], 0 3738; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP3]] 3739; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] 3740; CHECK: [[BB6]]: 3741; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 3742; CHECK-NEXT: unreachable 3743; CHECK: [[BB7]]: 3744; CHECK-NEXT: [[TMP5:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]]) 3745; CHECK-NEXT: [[_MSPROP2:%.*]] = or <2 x i64> [[TMP3]], zeroinitializer 3746; CHECK-NEXT: [[TMP6:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[C]], <2 x i64> [[TMP5]]) 3747; CHECK-NEXT: store <2 x i64> [[_MSPROP2]], ptr @__msan_retval_tls, align 8 3748; CHECK-NEXT: ret <2 x i64> [[TMP6]] 3749; 3750 %temp1 = shufflevector <4 x i32> %A, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 3751 %temp2 = shufflevector <4 x i32> %B, <4 x i32> undef, <2 x i32> <i32 1, i32 1> 3752 %temp5 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %temp1, <2 x i32> %temp2) 3753 %temp6 = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %C, <2 x i64> %temp5) 3754 ret <2 x i64> %temp6 3755} 3756 3757define i32 @sqdmlal_lane_1s(i32 %A, i16 %B, <4 x i16> %C) nounwind sanitize_memory { 3758; CHECK-LABEL: define i32 @sqdmlal_lane_1s( 3759; CHECK-SAME: i32 [[A:%.*]], i16 [[B:%.*]], <4 x i16> [[C:%.*]]) #[[ATTR0]] { 3760; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 3761; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 3762; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr @__msan_param_tls, align 8 3763; CHECK-NEXT: call void @llvm.donothing() 3764; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i16> splat (i16 -1), i16 [[TMP1]], i32 0 3765; CHECK-NEXT: [[LHS:%.*]] = insertelement <4 x i16> undef, i16 [[B]], i32 0 3766; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> splat (i16 -1), <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison> 3767; CHECK-NEXT: [[RHS:%.*]] = shufflevector <4 x i16> [[C]], <4 x i16> undef, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison> 3768; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[_MSPROP]] to i64 3769; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP4]], 0 3770; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[_MSPROP1]] to i64 3771; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP5]], 0 3772; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP3]] 3773; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] 3774; CHECK: [[BB6]]: 3775; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 3776; CHECK-NEXT: unreachable 3777; CHECK: [[BB7]]: 3778; CHECK-NEXT: [[PROD_VEC:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[LHS]], <4 x i16> [[RHS]]) 3779; CHECK-NEXT: [[PROD:%.*]] = extractelement <4 x i32> [[PROD_VEC]], i32 0 3780; CHECK-NEXT: [[_MSPROP2:%.*]] = or i32 [[TMP3]], 0 3781; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.aarch64.neon.sqadd.i32(i32 [[A]], i32 [[PROD]]) 3782; CHECK-NEXT: store i32 [[_MSPROP2]], ptr @__msan_retval_tls, align 8 3783; CHECK-NEXT: ret i32 [[RES]] 3784; 3785 %lhs = insertelement <4 x i16> undef, i16 %B, i32 0 3786 %rhs = shufflevector <4 x i16> %C, <4 x i16> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 3787 %prod.vec = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %lhs, <4 x i16> %rhs) 3788 %prod = extractelement <4 x i32> %prod.vec, i32 0 3789 %res = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %A, i32 %prod) 3790 ret i32 %res 3791} 3792declare i32 @llvm.aarch64.neon.sqadd.i32(i32, i32) 3793 3794define i32 @sqdmlsl_lane_1s(i32 %A, i16 %B, <4 x i16> %C) nounwind sanitize_memory { 3795; CHECK-LABEL: define i32 @sqdmlsl_lane_1s( 3796; CHECK-SAME: i32 [[A:%.*]], i16 [[B:%.*]], <4 x i16> [[C:%.*]]) #[[ATTR0]] { 3797; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 3798; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 3799; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr @__msan_param_tls, align 8 3800; CHECK-NEXT: call void @llvm.donothing() 3801; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i16> splat (i16 -1), i16 [[TMP1]], i32 0 3802; CHECK-NEXT: [[LHS:%.*]] = insertelement <4 x i16> undef, i16 [[B]], i32 0 3803; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> splat (i16 -1), <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison> 3804; CHECK-NEXT: [[RHS:%.*]] = shufflevector <4 x i16> [[C]], <4 x i16> undef, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison> 3805; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[_MSPROP]] to i64 3806; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP4]], 0 3807; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[_MSPROP1]] to i64 3808; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP5]], 0 3809; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP3]] 3810; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] 3811; CHECK: [[BB6]]: 3812; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 3813; CHECK-NEXT: unreachable 3814; CHECK: [[BB7]]: 3815; CHECK-NEXT: [[PROD_VEC:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[LHS]], <4 x i16> [[RHS]]) 3816; CHECK-NEXT: [[PROD:%.*]] = extractelement <4 x i32> [[PROD_VEC]], i32 0 3817; CHECK-NEXT: [[_MSPROP2:%.*]] = or i32 [[TMP3]], 0 3818; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.aarch64.neon.sqsub.i32(i32 [[A]], i32 [[PROD]]) 3819; CHECK-NEXT: store i32 [[_MSPROP2]], ptr @__msan_retval_tls, align 8 3820; CHECK-NEXT: ret i32 [[RES]] 3821; 3822 %lhs = insertelement <4 x i16> undef, i16 %B, i32 0 3823 %rhs = shufflevector <4 x i16> %C, <4 x i16> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 3824 %prod.vec = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %lhs, <4 x i16> %rhs) 3825 %prod = extractelement <4 x i32> %prod.vec, i32 0 3826 %res = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %A, i32 %prod) 3827 ret i32 %res 3828} 3829declare i32 @llvm.aarch64.neon.sqsub.i32(i32, i32) 3830 3831define i32 @sqadd_lane1_sqdmull4s(i32 %A, <4 x i16> %B, <4 x i16> %C) nounwind sanitize_memory { 3832; CHECK-LABEL: define i32 @sqadd_lane1_sqdmull4s( 3833; CHECK-SAME: i32 [[A:%.*]], <4 x i16> [[B:%.*]], <4 x i16> [[C:%.*]]) #[[ATTR0]] { 3834; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 3835; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 3836; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr @__msan_param_tls, align 8 3837; CHECK-NEXT: call void @llvm.donothing() 3838; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP1]] to i64 3839; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP4]], 0 3840; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 3841; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP5]], 0 3842; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] 3843; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] 3844; CHECK: [[BB6]]: 3845; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 3846; CHECK-NEXT: unreachable 3847; CHECK: [[BB7]]: 3848; CHECK-NEXT: [[PROD_VEC:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[B]], <4 x i16> [[C]]) 3849; CHECK-NEXT: [[PROD:%.*]] = extractelement <4 x i32> [[PROD_VEC]], i32 1 3850; CHECK-NEXT: [[_MSPROP:%.*]] = or i32 [[TMP3]], 0 3851; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.aarch64.neon.sqadd.i32(i32 [[A]], i32 [[PROD]]) 3852; CHECK-NEXT: store i32 [[_MSPROP]], ptr @__msan_retval_tls, align 8 3853; CHECK-NEXT: ret i32 [[RES]] 3854; 3855 %prod.vec = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %B, <4 x i16> %C) 3856 %prod = extractelement <4 x i32> %prod.vec, i32 1 3857 %res = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %A, i32 %prod) 3858 ret i32 %res 3859} 3860 3861define i32 @sqsub_lane1_sqdmull4s(i32 %A, <4 x i16> %B, <4 x i16> %C) nounwind sanitize_memory { 3862; CHECK-LABEL: define i32 @sqsub_lane1_sqdmull4s( 3863; CHECK-SAME: i32 [[A:%.*]], <4 x i16> [[B:%.*]], <4 x i16> [[C:%.*]]) #[[ATTR0]] { 3864; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 3865; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 3866; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr @__msan_param_tls, align 8 3867; CHECK-NEXT: call void @llvm.donothing() 3868; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP1]] to i64 3869; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP4]], 0 3870; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 3871; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP5]], 0 3872; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] 3873; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] 3874; CHECK: [[BB6]]: 3875; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 3876; CHECK-NEXT: unreachable 3877; CHECK: [[BB7]]: 3878; CHECK-NEXT: [[PROD_VEC:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[B]], <4 x i16> [[C]]) 3879; CHECK-NEXT: [[PROD:%.*]] = extractelement <4 x i32> [[PROD_VEC]], i32 1 3880; CHECK-NEXT: [[_MSPROP:%.*]] = or i32 [[TMP3]], 0 3881; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.aarch64.neon.sqsub.i32(i32 [[A]], i32 [[PROD]]) 3882; CHECK-NEXT: store i32 [[_MSPROP]], ptr @__msan_retval_tls, align 8 3883; CHECK-NEXT: ret i32 [[RES]] 3884; 3885 %prod.vec = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %B, <4 x i16> %C) 3886 %prod = extractelement <4 x i32> %prod.vec, i32 1 3887 %res = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %A, i32 %prod) 3888 ret i32 %res 3889} 3890 3891define i64 @sqdmlal_lane_1d(i64 %A, i32 %B, <2 x i32> %C) nounwind sanitize_memory { 3892; CHECK-LABEL: define i64 @sqdmlal_lane_1d( 3893; CHECK-SAME: i64 [[A:%.*]], i32 [[B:%.*]], <2 x i32> [[C:%.*]]) #[[ATTR0]] { 3894; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 3895; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 3896; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr @__msan_param_tls, align 8 3897; CHECK-NEXT: call void @llvm.donothing() 3898; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 3899; CHECK-NEXT: [[RHS:%.*]] = extractelement <2 x i32> [[C]], i32 1 3900; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP2]], 0 3901; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i32 [[_MSPROP]], 0 3902; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP2]] 3903; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB4:.*]], label %[[BB5:.*]], !prof [[PROF1]] 3904; CHECK: [[BB4]]: 3905; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 3906; CHECK-NEXT: unreachable 3907; CHECK: [[BB5]]: 3908; CHECK-NEXT: [[PROD:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 [[B]], i32 [[RHS]]) 3909; CHECK-NEXT: [[_MSPROP1:%.*]] = or i64 [[TMP3]], 0 3910; CHECK-NEXT: [[RES:%.*]] = call i64 @llvm.aarch64.neon.sqadd.i64(i64 [[A]], i64 [[PROD]]) 3911; CHECK-NEXT: store i64 [[_MSPROP1]], ptr @__msan_retval_tls, align 8 3912; CHECK-NEXT: ret i64 [[RES]] 3913; 3914 %rhs = extractelement <2 x i32> %C, i32 1 3915 %prod = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %B, i32 %rhs) 3916 %res = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %A, i64 %prod) 3917 ret i64 %res 3918} 3919declare i64 @llvm.aarch64.neon.sqdmulls.scalar(i32, i32) 3920declare i64 @llvm.aarch64.neon.sqadd.i64(i64, i64) 3921 3922define i64 @sqdmlsl_lane_1d(i64 %A, i32 %B, <2 x i32> %C) nounwind sanitize_memory { 3923; CHECK-LABEL: define i64 @sqdmlsl_lane_1d( 3924; CHECK-SAME: i64 [[A:%.*]], i32 [[B:%.*]], <2 x i32> [[C:%.*]]) #[[ATTR0]] { 3925; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 3926; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 3927; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr @__msan_param_tls, align 8 3928; CHECK-NEXT: call void @llvm.donothing() 3929; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 3930; CHECK-NEXT: [[RHS:%.*]] = extractelement <2 x i32> [[C]], i32 1 3931; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP2]], 0 3932; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i32 [[_MSPROP]], 0 3933; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP2]] 3934; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB4:.*]], label %[[BB5:.*]], !prof [[PROF1]] 3935; CHECK: [[BB4]]: 3936; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 3937; CHECK-NEXT: unreachable 3938; CHECK: [[BB5]]: 3939; CHECK-NEXT: [[PROD:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 [[B]], i32 [[RHS]]) 3940; CHECK-NEXT: [[_MSPROP1:%.*]] = or i64 [[TMP3]], 0 3941; CHECK-NEXT: [[RES:%.*]] = call i64 @llvm.aarch64.neon.sqsub.i64(i64 [[A]], i64 [[PROD]]) 3942; CHECK-NEXT: store i64 [[_MSPROP1]], ptr @__msan_retval_tls, align 8 3943; CHECK-NEXT: ret i64 [[RES]] 3944; 3945 %rhs = extractelement <2 x i32> %C, i32 1 3946 %prod = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %B, i32 %rhs) 3947 %res = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %A, i64 %prod) 3948 ret i64 %res 3949} 3950declare i64 @llvm.aarch64.neon.sqsub.i64(i64, i64) 3951 3952 3953define <4 x i32> @umlal_lane_4s(<4 x i16> %A, <4 x i16> %B, <4 x i32> %C) nounwind sanitize_memory { 3954; CHECK-LABEL: define <4 x i32> @umlal_lane_4s( 3955; CHECK-SAME: <4 x i16> [[A:%.*]], <4 x i16> [[B:%.*]], <4 x i32> [[C:%.*]]) #[[ATTR0]] { 3956; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 3957; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr @__msan_param_tls, align 8 3958; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 3959; CHECK-NEXT: call void @llvm.donothing() 3960; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> splat (i16 -1), <4 x i32> <i32 1, i32 1, i32 1, i32 1> 3961; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[B]], <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 3962; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i16> [[TMP2]], [[_MSPROP]] 3963; CHECK-NEXT: [[_MSPROP2:%.*]] = or <4 x i16> [[_MSPROP1]], zeroinitializer 3964; CHECK-NEXT: [[TMP7:%.*]] = zext <4 x i16> [[_MSPROP2]] to <4 x i32> 3965; CHECK-NEXT: [[TMP5:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[A]], <4 x i16> [[TMP4]]) 3966; CHECK-NEXT: [[_MSPROP3:%.*]] = or <4 x i32> [[TMP3]], [[TMP7]] 3967; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i32> [[C]], [[TMP5]] 3968; CHECK-NEXT: store <4 x i32> [[_MSPROP3]], ptr @__msan_retval_tls, align 8 3969; CHECK-NEXT: ret <4 x i32> [[TMP6]] 3970; 3971 %temp4 = shufflevector <4 x i16> %B, <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 3972 %temp5 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %A, <4 x i16> %temp4) 3973 %temp6 = add <4 x i32> %C, %temp5 3974 ret <4 x i32> %temp6 3975} 3976 3977define <2 x i64> @umlal_lane_2d(<2 x i32> %A, <2 x i32> %B, <2 x i64> %C) nounwind sanitize_memory { 3978; CHECK-LABEL: define <2 x i64> @umlal_lane_2d( 3979; CHECK-SAME: <2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]], <2 x i64> [[C:%.*]]) #[[ATTR0]] { 3980; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 3981; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8 3982; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 3983; CHECK-NEXT: call void @llvm.donothing() 3984; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> splat (i32 -1), <2 x i32> <i32 1, i32 1> 3985; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[B]], <2 x i32> poison, <2 x i32> <i32 1, i32 1> 3986; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i32> [[TMP2]], [[_MSPROP]] 3987; CHECK-NEXT: [[_MSPROP2:%.*]] = or <2 x i32> [[_MSPROP1]], zeroinitializer 3988; CHECK-NEXT: [[TMP7:%.*]] = zext <2 x i32> [[_MSPROP2]] to <2 x i64> 3989; CHECK-NEXT: [[TMP5:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[A]], <2 x i32> [[TMP4]]) 3990; CHECK-NEXT: [[_MSPROP3:%.*]] = or <2 x i64> [[TMP3]], [[TMP7]] 3991; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i64> [[C]], [[TMP5]] 3992; CHECK-NEXT: store <2 x i64> [[_MSPROP3]], ptr @__msan_retval_tls, align 8 3993; CHECK-NEXT: ret <2 x i64> [[TMP6]] 3994; 3995 %temp4 = shufflevector <2 x i32> %B, <2 x i32> poison, <2 x i32> <i32 1, i32 1> 3996 %temp5 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %A, <2 x i32> %temp4) 3997 %temp6 = add <2 x i64> %C, %temp5 3998 ret <2 x i64> %temp6 3999} 4000 4001 4002define <4 x i32> @smlsl_lane_4s(<4 x i16> %A, <4 x i16> %B, <4 x i32> %C) nounwind sanitize_memory { 4003; CHECK-LABEL: define <4 x i32> @smlsl_lane_4s( 4004; CHECK-SAME: <4 x i16> [[A:%.*]], <4 x i16> [[B:%.*]], <4 x i32> [[C:%.*]]) #[[ATTR0]] { 4005; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 4006; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr @__msan_param_tls, align 8 4007; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 4008; CHECK-NEXT: call void @llvm.donothing() 4009; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> splat (i16 -1), <4 x i32> <i32 1, i32 1, i32 1, i32 1> 4010; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[B]], <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 4011; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i16> [[TMP2]], [[_MSPROP]] 4012; CHECK-NEXT: [[_MSPROP2:%.*]] = or <4 x i16> [[_MSPROP1]], zeroinitializer 4013; CHECK-NEXT: [[TMP7:%.*]] = zext <4 x i16> [[_MSPROP2]] to <4 x i32> 4014; CHECK-NEXT: [[TMP5:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[A]], <4 x i16> [[TMP4]]) 4015; CHECK-NEXT: [[_MSPROP3:%.*]] = or <4 x i32> [[TMP3]], [[TMP7]] 4016; CHECK-NEXT: [[TMP6:%.*]] = sub <4 x i32> [[C]], [[TMP5]] 4017; CHECK-NEXT: store <4 x i32> [[_MSPROP3]], ptr @__msan_retval_tls, align 8 4018; CHECK-NEXT: ret <4 x i32> [[TMP6]] 4019; 4020 %temp4 = shufflevector <4 x i16> %B, <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 4021 %temp5 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %A, <4 x i16> %temp4) 4022 %temp6 = sub <4 x i32> %C, %temp5 4023 ret <4 x i32> %temp6 4024} 4025 4026define <2 x i64> @smlsl_lane_2d(<2 x i32> %A, <2 x i32> %B, <2 x i64> %C) nounwind sanitize_memory { 4027; CHECK-LABEL: define <2 x i64> @smlsl_lane_2d( 4028; CHECK-SAME: <2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]], <2 x i64> [[C:%.*]]) #[[ATTR0]] { 4029; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 4030; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8 4031; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 4032; CHECK-NEXT: call void @llvm.donothing() 4033; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> splat (i32 -1), <2 x i32> <i32 1, i32 1> 4034; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[B]], <2 x i32> poison, <2 x i32> <i32 1, i32 1> 4035; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i32> [[TMP2]], [[_MSPROP]] 4036; CHECK-NEXT: [[_MSPROP2:%.*]] = or <2 x i32> [[_MSPROP1]], zeroinitializer 4037; CHECK-NEXT: [[TMP7:%.*]] = zext <2 x i32> [[_MSPROP2]] to <2 x i64> 4038; CHECK-NEXT: [[TMP5:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[A]], <2 x i32> [[TMP4]]) 4039; CHECK-NEXT: [[_MSPROP3:%.*]] = or <2 x i64> [[TMP3]], [[TMP7]] 4040; CHECK-NEXT: [[TMP6:%.*]] = sub <2 x i64> [[C]], [[TMP5]] 4041; CHECK-NEXT: store <2 x i64> [[_MSPROP3]], ptr @__msan_retval_tls, align 8 4042; CHECK-NEXT: ret <2 x i64> [[TMP6]] 4043; 4044 %temp4 = shufflevector <2 x i32> %B, <2 x i32> poison, <2 x i32> <i32 1, i32 1> 4045 %temp5 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %A, <2 x i32> %temp4) 4046 %temp6 = sub <2 x i64> %C, %temp5 4047 ret <2 x i64> %temp6 4048} 4049 4050define <4 x i32> @sqdmlsl_lane_4s(<4 x i16> %A, <4 x i16> %B, <4 x i32> %C) nounwind sanitize_memory { 4051; CHECK-LABEL: define <4 x i32> @sqdmlsl_lane_4s( 4052; CHECK-SAME: <4 x i16> [[A:%.*]], <4 x i16> [[B:%.*]], <4 x i32> [[C:%.*]]) #[[ATTR0]] { 4053; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 4054; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr @__msan_param_tls, align 8 4055; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 4056; CHECK-NEXT: call void @llvm.donothing() 4057; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> splat (i16 -1), <4 x i32> <i32 1, i32 1, i32 1, i32 1> 4058; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[B]], <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 4059; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 4060; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP7]], 0 4061; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i16> [[_MSPROP]] to i64 4062; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP8]], 0 4063; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP2]] 4064; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] 4065; CHECK: [[BB6]]: 4066; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 4067; CHECK-NEXT: unreachable 4068; CHECK: [[BB7]]: 4069; CHECK-NEXT: [[TMP5:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[A]], <4 x i16> [[TMP4]]) 4070; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[TMP3]], zeroinitializer 4071; CHECK-NEXT: [[TMP6:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[C]], <4 x i32> [[TMP5]]) 4072; CHECK-NEXT: store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 4073; CHECK-NEXT: ret <4 x i32> [[TMP6]] 4074; 4075 %temp4 = shufflevector <4 x i16> %B, <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 4076 %temp5 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %A, <4 x i16> %temp4) 4077 %temp6 = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %C, <4 x i32> %temp5) 4078 ret <4 x i32> %temp6 4079} 4080 4081define <2 x i64> @sqdmlsl_lane_2d(<2 x i32> %A, <2 x i32> %B, <2 x i64> %C) nounwind sanitize_memory { 4082; CHECK-LABEL: define <2 x i64> @sqdmlsl_lane_2d( 4083; CHECK-SAME: <2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]], <2 x i64> [[C:%.*]]) #[[ATTR0]] { 4084; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 4085; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8 4086; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 4087; CHECK-NEXT: call void @llvm.donothing() 4088; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> splat (i32 -1), <2 x i32> <i32 1, i32 1> 4089; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[B]], <2 x i32> poison, <2 x i32> <i32 1, i32 1> 4090; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 4091; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP7]], 0 4092; CHECK-NEXT: [[TMP8:%.*]] = bitcast <2 x i32> [[_MSPROP]] to i64 4093; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP8]], 0 4094; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP2]] 4095; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] 4096; CHECK: [[BB6]]: 4097; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 4098; CHECK-NEXT: unreachable 4099; CHECK: [[BB7]]: 4100; CHECK-NEXT: [[TMP5:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[A]], <2 x i32> [[TMP4]]) 4101; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i64> [[TMP3]], zeroinitializer 4102; CHECK-NEXT: [[TMP6:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[C]], <2 x i64> [[TMP5]]) 4103; CHECK-NEXT: store <2 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 4104; CHECK-NEXT: ret <2 x i64> [[TMP6]] 4105; 4106 %temp4 = shufflevector <2 x i32> %B, <2 x i32> poison, <2 x i32> <i32 1, i32 1> 4107 %temp5 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %A, <2 x i32> %temp4) 4108 %temp6 = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %C, <2 x i64> %temp5) 4109 ret <2 x i64> %temp6 4110} 4111 4112define <4 x i32> @sqdmlsl2_lane_4s(<8 x i16> %A, <8 x i16> %B, <4 x i32> %C) nounwind sanitize_memory { 4113; CHECK-LABEL: define <4 x i32> @sqdmlsl2_lane_4s( 4114; CHECK-SAME: <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <4 x i32> [[C:%.*]]) #[[ATTR0]] { 4115; CHECK-NEXT: [[TMP7:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 4116; CHECK-NEXT: [[TMP8:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 4117; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 4118; CHECK-NEXT: call void @llvm.donothing() 4119; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i16> [[TMP7]], <8 x i16> splat (i16 -1), <4 x i32> <i32 4, i32 5, i32 6, i32 7> 4120; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 4121; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <8 x i16> [[TMP8]], <8 x i16> splat (i16 -1), <4 x i32> <i32 1, i32 1, i32 1, i32 1> 4122; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 4123; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[_MSPROP]] to i64 4124; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP4]], 0 4125; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i16> [[_MSPROP1]] to i64 4126; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP9]], 0 4127; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP3]] 4128; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] 4129; CHECK: [[BB6]]: 4130; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 4131; CHECK-NEXT: unreachable 4132; CHECK: [[BB7]]: 4133; CHECK-NEXT: [[TMP5:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP1]], <4 x i16> [[TMP2]]) 4134; CHECK-NEXT: [[_MSPROP2:%.*]] = or <4 x i32> [[TMP3]], zeroinitializer 4135; CHECK-NEXT: [[TMP6:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> [[C]], <4 x i32> [[TMP5]]) 4136; CHECK-NEXT: store <4 x i32> [[_MSPROP2]], ptr @__msan_retval_tls, align 8 4137; CHECK-NEXT: ret <4 x i32> [[TMP6]] 4138; 4139 %temp1 = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 4140 %temp2 = shufflevector <8 x i16> %B, <8 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 4141 %temp5 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %temp1, <4 x i16> %temp2) 4142 %temp6 = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %C, <4 x i32> %temp5) 4143 ret <4 x i32> %temp6 4144} 4145 4146define <2 x i64> @sqdmlsl2_lane_2d(<4 x i32> %A, <4 x i32> %B, <2 x i64> %C) nounwind sanitize_memory { 4147; CHECK-LABEL: define <2 x i64> @sqdmlsl2_lane_2d( 4148; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <2 x i64> [[C:%.*]]) #[[ATTR0]] { 4149; CHECK-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 4150; CHECK-NEXT: [[TMP8:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 4151; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 4152; CHECK-NEXT: call void @llvm.donothing() 4153; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i32> [[TMP7]], <4 x i32> splat (i32 -1), <2 x i32> <i32 2, i32 3> 4154; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> undef, <2 x i32> <i32 2, i32 3> 4155; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <4 x i32> [[TMP8]], <4 x i32> splat (i32 -1), <2 x i32> <i32 1, i32 1> 4156; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> undef, <2 x i32> <i32 1, i32 1> 4157; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[_MSPROP]] to i64 4158; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP4]], 0 4159; CHECK-NEXT: [[TMP9:%.*]] = bitcast <2 x i32> [[_MSPROP1]] to i64 4160; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP9]], 0 4161; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP3]] 4162; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] 4163; CHECK: [[BB6]]: 4164; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 4165; CHECK-NEXT: unreachable 4166; CHECK: [[BB7]]: 4167; CHECK-NEXT: [[TMP5:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]]) 4168; CHECK-NEXT: [[_MSPROP2:%.*]] = or <2 x i64> [[TMP3]], zeroinitializer 4169; CHECK-NEXT: [[TMP6:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> [[C]], <2 x i64> [[TMP5]]) 4170; CHECK-NEXT: store <2 x i64> [[_MSPROP2]], ptr @__msan_retval_tls, align 8 4171; CHECK-NEXT: ret <2 x i64> [[TMP6]] 4172; 4173 %temp1 = shufflevector <4 x i32> %A, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 4174 %temp2 = shufflevector <4 x i32> %B, <4 x i32> undef, <2 x i32> <i32 1, i32 1> 4175 %temp5 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %temp1, <2 x i32> %temp2) 4176 %temp6 = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %C, <2 x i64> %temp5) 4177 ret <2 x i64> %temp6 4178} 4179 4180define <4 x i32> @umlsl_lane_4s(<4 x i16> %A, <4 x i16> %B, <4 x i32> %C) nounwind sanitize_memory { 4181; CHECK-LABEL: define <4 x i32> @umlsl_lane_4s( 4182; CHECK-SAME: <4 x i16> [[A:%.*]], <4 x i16> [[B:%.*]], <4 x i32> [[C:%.*]]) #[[ATTR0]] { 4183; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 4184; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr @__msan_param_tls, align 8 4185; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 4186; CHECK-NEXT: call void @llvm.donothing() 4187; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> splat (i16 -1), <4 x i32> <i32 1, i32 1, i32 1, i32 1> 4188; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[B]], <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 4189; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i16> [[TMP2]], [[_MSPROP]] 4190; CHECK-NEXT: [[_MSPROP2:%.*]] = or <4 x i16> [[_MSPROP1]], zeroinitializer 4191; CHECK-NEXT: [[TMP7:%.*]] = zext <4 x i16> [[_MSPROP2]] to <4 x i32> 4192; CHECK-NEXT: [[TMP5:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[A]], <4 x i16> [[TMP4]]) 4193; CHECK-NEXT: [[_MSPROP3:%.*]] = or <4 x i32> [[TMP3]], [[TMP7]] 4194; CHECK-NEXT: [[TMP6:%.*]] = sub <4 x i32> [[C]], [[TMP5]] 4195; CHECK-NEXT: store <4 x i32> [[_MSPROP3]], ptr @__msan_retval_tls, align 8 4196; CHECK-NEXT: ret <4 x i32> [[TMP6]] 4197; 4198 %temp4 = shufflevector <4 x i16> %B, <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 4199 %temp5 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %A, <4 x i16> %temp4) 4200 %temp6 = sub <4 x i32> %C, %temp5 4201 ret <4 x i32> %temp6 4202} 4203 4204define <2 x i64> @umlsl_lane_2d(<2 x i32> %A, <2 x i32> %B, <2 x i64> %C) nounwind sanitize_memory { 4205; CHECK-LABEL: define <2 x i64> @umlsl_lane_2d( 4206; CHECK-SAME: <2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]], <2 x i64> [[C:%.*]]) #[[ATTR0]] { 4207; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 4208; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8 4209; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 4210; CHECK-NEXT: call void @llvm.donothing() 4211; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> splat (i32 -1), <2 x i32> <i32 1, i32 1> 4212; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[B]], <2 x i32> poison, <2 x i32> <i32 1, i32 1> 4213; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i32> [[TMP2]], [[_MSPROP]] 4214; CHECK-NEXT: [[_MSPROP2:%.*]] = or <2 x i32> [[_MSPROP1]], zeroinitializer 4215; CHECK-NEXT: [[TMP7:%.*]] = zext <2 x i32> [[_MSPROP2]] to <2 x i64> 4216; CHECK-NEXT: [[TMP5:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[A]], <2 x i32> [[TMP4]]) 4217; CHECK-NEXT: [[_MSPROP3:%.*]] = or <2 x i64> [[TMP3]], [[TMP7]] 4218; CHECK-NEXT: [[TMP6:%.*]] = sub <2 x i64> [[C]], [[TMP5]] 4219; CHECK-NEXT: store <2 x i64> [[_MSPROP3]], ptr @__msan_retval_tls, align 8 4220; CHECK-NEXT: ret <2 x i64> [[TMP6]] 4221; 4222 %temp4 = shufflevector <2 x i32> %B, <2 x i32> poison, <2 x i32> <i32 1, i32 1> 4223 %temp5 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %A, <2 x i32> %temp4) 4224 %temp6 = sub <2 x i64> %C, %temp5 4225 ret <2 x i64> %temp6 4226} 4227 4228; Scalar FMULX 4229define float @fmulxs(float %a, float %b) nounwind sanitize_memory { 4230; CHECK-LABEL: define float @fmulxs( 4231; CHECK-SAME: float [[A:%.*]], float [[B:%.*]]) #[[ATTR0]] { 4232; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8 4233; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 4234; CHECK-NEXT: call void @llvm.donothing() 4235; CHECK-NEXT: [[_MSPROP:%.*]] = or i32 [[TMP1]], [[TMP2]] 4236; CHECK-NEXT: [[_MSPROP1:%.*]] = or i32 [[_MSPROP]], 0 4237; CHECK-NEXT: [[FMULX_I:%.*]] = tail call float @llvm.aarch64.neon.fmulx.f32(float [[A]], float [[B]]) #[[ATTR7]] 4238; CHECK-NEXT: store i32 [[_MSPROP1]], ptr @__msan_retval_tls, align 8 4239; CHECK-NEXT: ret float [[FMULX_I]] 4240; 4241 %fmulx.i = tail call float @llvm.aarch64.neon.fmulx.f32(float %a, float %b) nounwind 4242 ret float %fmulx.i 4243} 4244 4245define double @fmulxd(double %a, double %b) nounwind sanitize_memory { 4246; CHECK-LABEL: define double @fmulxd( 4247; CHECK-SAME: double [[A:%.*]], double [[B:%.*]]) #[[ATTR0]] { 4248; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 4249; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 4250; CHECK-NEXT: call void @llvm.donothing() 4251; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP1]], [[TMP2]] 4252; CHECK-NEXT: [[_MSPROP1:%.*]] = or i64 [[_MSPROP]], 0 4253; CHECK-NEXT: [[FMULX_I:%.*]] = tail call double @llvm.aarch64.neon.fmulx.f64(double [[A]], double [[B]]) #[[ATTR7]] 4254; CHECK-NEXT: store i64 [[_MSPROP1]], ptr @__msan_retval_tls, align 8 4255; CHECK-NEXT: ret double [[FMULX_I]] 4256; 4257 %fmulx.i = tail call double @llvm.aarch64.neon.fmulx.f64(double %a, double %b) nounwind 4258 ret double %fmulx.i 4259} 4260 4261define float @fmulxs_lane(float %a, <4 x float> %vec) nounwind sanitize_memory { 4262; CHECK-LABEL: define float @fmulxs_lane( 4263; CHECK-SAME: float [[A:%.*]], <4 x float> [[VEC:%.*]]) #[[ATTR0]] { 4264; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 4265; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @__msan_param_tls, align 8 4266; CHECK-NEXT: call void @llvm.donothing() 4267; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3 4268; CHECK-NEXT: [[B:%.*]] = extractelement <4 x float> [[VEC]], i32 3 4269; CHECK-NEXT: [[_MSPROP1:%.*]] = or i32 [[TMP2]], [[_MSPROP]] 4270; CHECK-NEXT: [[_MSPROP2:%.*]] = or i32 [[_MSPROP1]], 0 4271; CHECK-NEXT: [[FMULX_I:%.*]] = tail call float @llvm.aarch64.neon.fmulx.f32(float [[A]], float [[B]]) #[[ATTR7]] 4272; CHECK-NEXT: store i32 [[_MSPROP2]], ptr @__msan_retval_tls, align 8 4273; CHECK-NEXT: ret float [[FMULX_I]] 4274; 4275 %b = extractelement <4 x float> %vec, i32 3 4276 %fmulx.i = tail call float @llvm.aarch64.neon.fmulx.f32(float %a, float %b) nounwind 4277 ret float %fmulx.i 4278} 4279 4280define double @fmulxd_lane(double %a, <2 x double> %vec) nounwind sanitize_memory { 4281; CHECK-LABEL: define double @fmulxd_lane( 4282; CHECK-SAME: double [[A:%.*]], <2 x double> [[VEC:%.*]]) #[[ATTR0]] { 4283; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 4284; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr @__msan_param_tls, align 8 4285; CHECK-NEXT: call void @llvm.donothing() 4286; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 4287; CHECK-NEXT: [[B:%.*]] = extractelement <2 x double> [[VEC]], i32 1 4288; CHECK-NEXT: [[_MSPROP1:%.*]] = or i64 [[TMP2]], [[_MSPROP]] 4289; CHECK-NEXT: [[_MSPROP2:%.*]] = or i64 [[_MSPROP1]], 0 4290; CHECK-NEXT: [[FMULX_I:%.*]] = tail call double @llvm.aarch64.neon.fmulx.f64(double [[A]], double [[B]]) #[[ATTR7]] 4291; CHECK-NEXT: store i64 [[_MSPROP2]], ptr @__msan_retval_tls, align 8 4292; CHECK-NEXT: ret double [[FMULX_I]] 4293; 4294 %b = extractelement <2 x double> %vec, i32 1 4295 %fmulx.i = tail call double @llvm.aarch64.neon.fmulx.f64(double %a, double %b) nounwind 4296 ret double %fmulx.i 4297} 4298 4299declare double @llvm.aarch64.neon.fmulx.f64(double, double) nounwind readnone 4300declare float @llvm.aarch64.neon.fmulx.f32(float, float) nounwind readnone 4301 4302 4303define <8 x i16> @smull2_8h_simple(<16 x i8> %a, <16 x i8> %b) nounwind sanitize_memory { 4304; CHECK-LABEL: define <8 x i16> @smull2_8h_simple( 4305; CHECK-SAME: <16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]]) #[[ATTR0]] { 4306; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 4307; CHECK-NEXT: [[TMP6:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 4308; CHECK-NEXT: call void @llvm.donothing() 4309; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <16 x i8> [[TMP4]], <16 x i8> splat (i8 -1), <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 4310; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 4311; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i8> [[TMP6]], <16 x i8> splat (i8 -1), <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 4312; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 4313; CHECK-NEXT: [[_MSPROP2:%.*]] = or <8 x i8> [[_MSPROP]], [[_MSPROP1]] 4314; CHECK-NEXT: [[_MSPROP3:%.*]] = or <8 x i8> [[_MSPROP2]], zeroinitializer 4315; CHECK-NEXT: [[TMP5:%.*]] = zext <8 x i8> [[_MSPROP3]] to <8 x i16> 4316; CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) 4317; CHECK-NEXT: store <8 x i16> [[TMP5]], ptr @__msan_retval_tls, align 8 4318; CHECK-NEXT: ret <8 x i16> [[TMP3]] 4319; 4320 %1 = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 4321 %2 = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 4322 %3 = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %1, <8 x i8> %2) #2 4323 ret <8 x i16> %3 4324} 4325 4326define <8 x i16> @foo0(<16 x i8> %a, <16 x i8> %b) nounwind sanitize_memory { 4327; CHECK-LABEL: define <8 x i16> @foo0( 4328; CHECK-SAME: <16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]]) #[[ATTR0]] { 4329; CHECK-NEXT: [[TMP8:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 4330; CHECK-NEXT: [[TMP9:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 4331; CHECK-NEXT: call void @llvm.donothing() 4332; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64> 4333; CHECK-NEXT: [[TMP:%.*]] = bitcast <16 x i8> [[A]] to <2 x i64> 4334; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i64> [[TMP10]], <2 x i64> splat (i64 -1), <1 x i32> <i32 1> 4335; CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i64> [[TMP]], <2 x i64> undef, <1 x i32> <i32 1> 4336; CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <8 x i8> 4337; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[SHUFFLE_I_I]] to <8 x i8> 4338; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP9]] to <2 x i64> 4339; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[B]] to <2 x i64> 4340; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <2 x i64> [[TMP5]], <2 x i64> splat (i64 -1), <1 x i32> <i32 1> 4341; CHECK-NEXT: [[SHUFFLE_I3_I:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> undef, <1 x i32> <i32 1> 4342; CHECK-NEXT: [[TMP6:%.*]] = bitcast <1 x i64> [[_MSPROP1]] to <8 x i8> 4343; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[SHUFFLE_I3_I]] to <8 x i8> 4344; CHECK-NEXT: [[_MSPROP2:%.*]] = or <8 x i8> [[TMP4]], [[TMP6]] 4345; CHECK-NEXT: [[_MSPROP3:%.*]] = or <8 x i8> [[_MSPROP2]], zeroinitializer 4346; CHECK-NEXT: [[TMP7:%.*]] = zext <8 x i8> [[_MSPROP3]] to <8 x i16> 4347; CHECK-NEXT: [[VMULL_I_I:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[TMP1]], <8 x i8> [[TMP3]]) #[[ATTR7]] 4348; CHECK-NEXT: store <8 x i16> [[TMP7]], ptr @__msan_retval_tls, align 8 4349; CHECK-NEXT: ret <8 x i16> [[VMULL_I_I]] 4350; 4351 %temp = bitcast <16 x i8> %a to <2 x i64> 4352 %shuffle.i.i = shufflevector <2 x i64> %temp, <2 x i64> undef, <1 x i32> <i32 1> 4353 %temp1 = bitcast <1 x i64> %shuffle.i.i to <8 x i8> 4354 %temp2 = bitcast <16 x i8> %b to <2 x i64> 4355 %shuffle.i3.i = shufflevector <2 x i64> %temp2, <2 x i64> undef, <1 x i32> <i32 1> 4356 %temp3 = bitcast <1 x i64> %shuffle.i3.i to <8 x i8> 4357 %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %temp1, <8 x i8> %temp3) nounwind 4358 ret <8 x i16> %vmull.i.i 4359} 4360 4361define <4 x i32> @foo1(<8 x i16> %a, <8 x i16> %b) nounwind sanitize_memory { 4362; CHECK-LABEL: define <4 x i32> @foo1( 4363; CHECK-SAME: <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]]) #[[ATTR0]] { 4364; CHECK-NEXT: [[TMP8:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 4365; CHECK-NEXT: [[TMP9:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 4366; CHECK-NEXT: call void @llvm.donothing() 4367; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP8]] to <2 x i64> 4368; CHECK-NEXT: [[TMP:%.*]] = bitcast <8 x i16> [[A]] to <2 x i64> 4369; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i64> [[TMP10]], <2 x i64> splat (i64 -1), <1 x i32> <i32 1> 4370; CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i64> [[TMP]], <2 x i64> undef, <1 x i32> <i32 1> 4371; CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16> 4372; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[SHUFFLE_I_I]] to <4 x i16> 4373; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP9]] to <2 x i64> 4374; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[B]] to <2 x i64> 4375; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <2 x i64> [[TMP5]], <2 x i64> splat (i64 -1), <1 x i32> <i32 1> 4376; CHECK-NEXT: [[SHUFFLE_I3_I:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> undef, <1 x i32> <i32 1> 4377; CHECK-NEXT: [[TMP6:%.*]] = bitcast <1 x i64> [[_MSPROP1]] to <4 x i16> 4378; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[SHUFFLE_I3_I]] to <4 x i16> 4379; CHECK-NEXT: [[_MSPROP2:%.*]] = or <4 x i16> [[TMP4]], [[TMP6]] 4380; CHECK-NEXT: [[_MSPROP3:%.*]] = or <4 x i16> [[_MSPROP2]], zeroinitializer 4381; CHECK-NEXT: [[TMP7:%.*]] = zext <4 x i16> [[_MSPROP3]] to <4 x i32> 4382; CHECK-NEXT: [[VMULL2_I_I:%.*]] = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[TMP1]], <4 x i16> [[TMP3]]) #[[ATTR7]] 4383; CHECK-NEXT: store <4 x i32> [[TMP7]], ptr @__msan_retval_tls, align 8 4384; CHECK-NEXT: ret <4 x i32> [[VMULL2_I_I]] 4385; 4386 %temp = bitcast <8 x i16> %a to <2 x i64> 4387 %shuffle.i.i = shufflevector <2 x i64> %temp, <2 x i64> undef, <1 x i32> <i32 1> 4388 %temp1 = bitcast <1 x i64> %shuffle.i.i to <4 x i16> 4389 %temp2 = bitcast <8 x i16> %b to <2 x i64> 4390 %shuffle.i3.i = shufflevector <2 x i64> %temp2, <2 x i64> undef, <1 x i32> <i32 1> 4391 %temp3 = bitcast <1 x i64> %shuffle.i3.i to <4 x i16> 4392 %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %temp1, <4 x i16> %temp3) nounwind 4393 ret <4 x i32> %vmull2.i.i 4394} 4395 4396define <2 x i64> @foo2(<4 x i32> %a, <4 x i32> %b) nounwind sanitize_memory { 4397; CHECK-LABEL: define <2 x i64> @foo2( 4398; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR0]] { 4399; CHECK-NEXT: [[TMP8:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 4400; CHECK-NEXT: [[TMP9:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 4401; CHECK-NEXT: call void @llvm.donothing() 4402; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP8]] to <2 x i64> 4403; CHECK-NEXT: [[TMP:%.*]] = bitcast <4 x i32> [[A]] to <2 x i64> 4404; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i64> [[TMP10]], <2 x i64> splat (i64 -1), <1 x i32> <i32 1> 4405; CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i64> [[TMP]], <2 x i64> undef, <1 x i32> <i32 1> 4406; CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <2 x i32> 4407; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[SHUFFLE_I_I]] to <2 x i32> 4408; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP9]] to <2 x i64> 4409; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[B]] to <2 x i64> 4410; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <2 x i64> [[TMP5]], <2 x i64> splat (i64 -1), <1 x i32> <i32 1> 4411; CHECK-NEXT: [[SHUFFLE_I3_I:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> undef, <1 x i32> <i32 1> 4412; CHECK-NEXT: [[TMP6:%.*]] = bitcast <1 x i64> [[_MSPROP1]] to <2 x i32> 4413; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[SHUFFLE_I3_I]] to <2 x i32> 4414; CHECK-NEXT: [[_MSPROP2:%.*]] = or <2 x i32> [[TMP4]], [[TMP6]] 4415; CHECK-NEXT: [[_MSPROP3:%.*]] = or <2 x i32> [[_MSPROP2]], zeroinitializer 4416; CHECK-NEXT: [[TMP7:%.*]] = zext <2 x i32> [[_MSPROP3]] to <2 x i64> 4417; CHECK-NEXT: [[VMULL2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[TMP1]], <2 x i32> [[TMP3]]) #[[ATTR7]] 4418; CHECK-NEXT: store <2 x i64> [[TMP7]], ptr @__msan_retval_tls, align 8 4419; CHECK-NEXT: ret <2 x i64> [[VMULL2_I_I]] 4420; 4421 %temp = bitcast <4 x i32> %a to <2 x i64> 4422 %shuffle.i.i = shufflevector <2 x i64> %temp, <2 x i64> undef, <1 x i32> <i32 1> 4423 %temp1 = bitcast <1 x i64> %shuffle.i.i to <2 x i32> 4424 %temp2 = bitcast <4 x i32> %b to <2 x i64> 4425 %shuffle.i3.i = shufflevector <2 x i64> %temp2, <2 x i64> undef, <1 x i32> <i32 1> 4426 %temp3 = bitcast <1 x i64> %shuffle.i3.i to <2 x i32> 4427 %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %temp1, <2 x i32> %temp3) nounwind 4428 ret <2 x i64> %vmull2.i.i 4429} 4430 4431define <8 x i16> @foo3(<16 x i8> %a, <16 x i8> %b) nounwind sanitize_memory { 4432; CHECK-LABEL: define <8 x i16> @foo3( 4433; CHECK-SAME: <16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]]) #[[ATTR0]] { 4434; CHECK-NEXT: [[TMP8:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 4435; CHECK-NEXT: [[TMP9:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 4436; CHECK-NEXT: call void @llvm.donothing() 4437; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64> 4438; CHECK-NEXT: [[TMP:%.*]] = bitcast <16 x i8> [[A]] to <2 x i64> 4439; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i64> [[TMP10]], <2 x i64> splat (i64 -1), <1 x i32> <i32 1> 4440; CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i64> [[TMP]], <2 x i64> undef, <1 x i32> <i32 1> 4441; CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <8 x i8> 4442; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[SHUFFLE_I_I]] to <8 x i8> 4443; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP9]] to <2 x i64> 4444; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[B]] to <2 x i64> 4445; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <2 x i64> [[TMP5]], <2 x i64> splat (i64 -1), <1 x i32> <i32 1> 4446; CHECK-NEXT: [[SHUFFLE_I3_I:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> undef, <1 x i32> <i32 1> 4447; CHECK-NEXT: [[TMP6:%.*]] = bitcast <1 x i64> [[_MSPROP1]] to <8 x i8> 4448; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[SHUFFLE_I3_I]] to <8 x i8> 4449; CHECK-NEXT: [[_MSPROP2:%.*]] = or <8 x i8> [[TMP4]], [[TMP6]] 4450; CHECK-NEXT: [[_MSPROP3:%.*]] = or <8 x i8> [[_MSPROP2]], zeroinitializer 4451; CHECK-NEXT: [[TMP7:%.*]] = zext <8 x i8> [[_MSPROP3]] to <8 x i16> 4452; CHECK-NEXT: [[VMULL_I_I:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[TMP1]], <8 x i8> [[TMP3]]) #[[ATTR7]] 4453; CHECK-NEXT: store <8 x i16> [[TMP7]], ptr @__msan_retval_tls, align 8 4454; CHECK-NEXT: ret <8 x i16> [[VMULL_I_I]] 4455; 4456 %temp = bitcast <16 x i8> %a to <2 x i64> 4457 %shuffle.i.i = shufflevector <2 x i64> %temp, <2 x i64> undef, <1 x i32> <i32 1> 4458 %temp1 = bitcast <1 x i64> %shuffle.i.i to <8 x i8> 4459 %temp2 = bitcast <16 x i8> %b to <2 x i64> 4460 %shuffle.i3.i = shufflevector <2 x i64> %temp2, <2 x i64> undef, <1 x i32> <i32 1> 4461 %temp3 = bitcast <1 x i64> %shuffle.i3.i to <8 x i8> 4462 %vmull.i.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %temp1, <8 x i8> %temp3) nounwind 4463 ret <8 x i16> %vmull.i.i 4464} 4465 4466define <4 x i32> @foo4(<8 x i16> %a, <8 x i16> %b) nounwind sanitize_memory { 4467; CHECK-LABEL: define <4 x i32> @foo4( 4468; CHECK-SAME: <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]]) #[[ATTR0]] { 4469; CHECK-NEXT: [[TMP8:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 4470; CHECK-NEXT: [[TMP9:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 4471; CHECK-NEXT: call void @llvm.donothing() 4472; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP8]] to <2 x i64> 4473; CHECK-NEXT: [[TMP:%.*]] = bitcast <8 x i16> [[A]] to <2 x i64> 4474; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i64> [[TMP10]], <2 x i64> splat (i64 -1), <1 x i32> <i32 1> 4475; CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i64> [[TMP]], <2 x i64> undef, <1 x i32> <i32 1> 4476; CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16> 4477; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[SHUFFLE_I_I]] to <4 x i16> 4478; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP9]] to <2 x i64> 4479; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[B]] to <2 x i64> 4480; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <2 x i64> [[TMP5]], <2 x i64> splat (i64 -1), <1 x i32> <i32 1> 4481; CHECK-NEXT: [[SHUFFLE_I3_I:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> undef, <1 x i32> <i32 1> 4482; CHECK-NEXT: [[TMP6:%.*]] = bitcast <1 x i64> [[_MSPROP1]] to <4 x i16> 4483; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[SHUFFLE_I3_I]] to <4 x i16> 4484; CHECK-NEXT: [[_MSPROP2:%.*]] = or <4 x i16> [[TMP4]], [[TMP6]] 4485; CHECK-NEXT: [[_MSPROP3:%.*]] = or <4 x i16> [[_MSPROP2]], zeroinitializer 4486; CHECK-NEXT: [[TMP7:%.*]] = zext <4 x i16> [[_MSPROP3]] to <4 x i32> 4487; CHECK-NEXT: [[VMULL2_I_I:%.*]] = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[TMP1]], <4 x i16> [[TMP3]]) #[[ATTR7]] 4488; CHECK-NEXT: store <4 x i32> [[TMP7]], ptr @__msan_retval_tls, align 8 4489; CHECK-NEXT: ret <4 x i32> [[VMULL2_I_I]] 4490; 4491 %temp = bitcast <8 x i16> %a to <2 x i64> 4492 %shuffle.i.i = shufflevector <2 x i64> %temp, <2 x i64> undef, <1 x i32> <i32 1> 4493 %temp1 = bitcast <1 x i64> %shuffle.i.i to <4 x i16> 4494 %temp2 = bitcast <8 x i16> %b to <2 x i64> 4495 %shuffle.i3.i = shufflevector <2 x i64> %temp2, <2 x i64> undef, <1 x i32> <i32 1> 4496 %temp3 = bitcast <1 x i64> %shuffle.i3.i to <4 x i16> 4497 %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %temp1, <4 x i16> %temp3) nounwind 4498 ret <4 x i32> %vmull2.i.i 4499} 4500 4501define <2 x i64> @foo5(<4 x i32> %a, <4 x i32> %b) nounwind sanitize_memory { 4502; CHECK-LABEL: define <2 x i64> @foo5( 4503; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR0]] { 4504; CHECK-NEXT: [[TMP8:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 4505; CHECK-NEXT: [[TMP9:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 4506; CHECK-NEXT: call void @llvm.donothing() 4507; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP8]] to <2 x i64> 4508; CHECK-NEXT: [[TMP:%.*]] = bitcast <4 x i32> [[A]] to <2 x i64> 4509; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i64> [[TMP10]], <2 x i64> splat (i64 -1), <1 x i32> <i32 1> 4510; CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i64> [[TMP]], <2 x i64> undef, <1 x i32> <i32 1> 4511; CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <2 x i32> 4512; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[SHUFFLE_I_I]] to <2 x i32> 4513; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP9]] to <2 x i64> 4514; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[B]] to <2 x i64> 4515; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <2 x i64> [[TMP5]], <2 x i64> splat (i64 -1), <1 x i32> <i32 1> 4516; CHECK-NEXT: [[SHUFFLE_I3_I:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> undef, <1 x i32> <i32 1> 4517; CHECK-NEXT: [[TMP6:%.*]] = bitcast <1 x i64> [[_MSPROP1]] to <2 x i32> 4518; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[SHUFFLE_I3_I]] to <2 x i32> 4519; CHECK-NEXT: [[_MSPROP2:%.*]] = or <2 x i32> [[TMP4]], [[TMP6]] 4520; CHECK-NEXT: [[_MSPROP3:%.*]] = or <2 x i32> [[_MSPROP2]], zeroinitializer 4521; CHECK-NEXT: [[TMP7:%.*]] = zext <2 x i32> [[_MSPROP3]] to <2 x i64> 4522; CHECK-NEXT: [[VMULL2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[TMP1]], <2 x i32> [[TMP3]]) #[[ATTR7]] 4523; CHECK-NEXT: store <2 x i64> [[TMP7]], ptr @__msan_retval_tls, align 8 4524; CHECK-NEXT: ret <2 x i64> [[VMULL2_I_I]] 4525; 4526 %temp = bitcast <4 x i32> %a to <2 x i64> 4527 %shuffle.i.i = shufflevector <2 x i64> %temp, <2 x i64> undef, <1 x i32> <i32 1> 4528 %temp1 = bitcast <1 x i64> %shuffle.i.i to <2 x i32> 4529 %temp2 = bitcast <4 x i32> %b to <2 x i64> 4530 %shuffle.i3.i = shufflevector <2 x i64> %temp2, <2 x i64> undef, <1 x i32> <i32 1> 4531 %temp3 = bitcast <1 x i64> %shuffle.i3.i to <2 x i32> 4532 %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %temp1, <2 x i32> %temp3) nounwind 4533 ret <2 x i64> %vmull2.i.i 4534} 4535 4536define <4 x i32> @foo6(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c) nounwind readnone optsize ssp { 4537; CHECK-LABEL: define <4 x i32> @foo6( 4538; CHECK-SAME: <4 x i32> [[A:%.*]], <8 x i16> [[B:%.*]], <4 x i16> [[C:%.*]]) #[[ATTR6:[0-9]+]] { 4539; CHECK-NEXT: [[ENTRY:.*:]] 4540; CHECK-NEXT: call void @llvm.donothing() 4541; CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B]] to <2 x i64> 4542; CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> undef, <1 x i32> <i32 1> 4543; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[SHUFFLE_I]] to <4 x i16> 4544; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[C]], <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 4545; CHECK-NEXT: [[VMULL2_I:%.*]] = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[TMP1]], <4 x i16> [[SHUFFLE]]) #[[ATTR7]] 4546; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 4547; CHECK-NEXT: ret <4 x i32> [[VMULL2_I]] 4548; 4549entry: 4550 %0 = bitcast <8 x i16> %b to <2 x i64> 4551 %shuffle.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> <i32 1> 4552 %1 = bitcast <1 x i64> %shuffle.i to <4 x i16> 4553 %shuffle = shufflevector <4 x i16> %c, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 4554 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %1, <4 x i16> %shuffle) nounwind 4555 ret <4 x i32> %vmull2.i 4556} 4557 4558define <4 x i32> @foo6a(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c) nounwind readnone optsize ssp { 4559; CHECK-LABEL: define <4 x i32> @foo6a( 4560; CHECK-SAME: <4 x i32> [[A:%.*]], <8 x i16> [[B:%.*]], <4 x i16> [[C:%.*]]) #[[ATTR6]] { 4561; CHECK-NEXT: [[ENTRY:.*:]] 4562; CHECK-NEXT: call void @llvm.donothing() 4563; CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B]] to <2 x i64> 4564; CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> undef, <1 x i32> zeroinitializer 4565; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[SHUFFLE_I]] to <4 x i16> 4566; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[C]], <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 4567; CHECK-NEXT: [[VMULL2_I:%.*]] = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[TMP1]], <4 x i16> [[SHUFFLE]]) #[[ATTR7]] 4568; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 4569; CHECK-NEXT: ret <4 x i32> [[VMULL2_I]] 4570; 4571entry: 4572 %0 = bitcast <8 x i16> %b to <2 x i64> 4573 %shuffle.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> <i32 0> 4574 %1 = bitcast <1 x i64> %shuffle.i to <4 x i16> 4575 %shuffle = shufflevector <4 x i16> %c, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 4576 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %1, <4 x i16> %shuffle) nounwind 4577 ret <4 x i32> %vmull2.i 4578} 4579 4580define <2 x i64> @foo7(<2 x i64> %a, <4 x i32> %b, <2 x i32> %c) nounwind readnone optsize ssp { 4581; CHECK-LABEL: define <2 x i64> @foo7( 4582; CHECK-SAME: <2 x i64> [[A:%.*]], <4 x i32> [[B:%.*]], <2 x i32> [[C:%.*]]) #[[ATTR6]] { 4583; CHECK-NEXT: [[ENTRY:.*:]] 4584; CHECK-NEXT: call void @llvm.donothing() 4585; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[B]] to <2 x i64> 4586; CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> undef, <1 x i32> <i32 1> 4587; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[SHUFFLE_I]] to <2 x i32> 4588; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[C]], <2 x i32> undef, <2 x i32> <i32 1, i32 1> 4589; CHECK-NEXT: [[VMULL2_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[TMP1]], <2 x i32> [[SHUFFLE]]) #[[ATTR7]] 4590; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 4591; CHECK-NEXT: ret <2 x i64> [[VMULL2_I]] 4592; 4593entry: 4594 %0 = bitcast <4 x i32> %b to <2 x i64> 4595 %shuffle.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> <i32 1> 4596 %1 = bitcast <1 x i64> %shuffle.i to <2 x i32> 4597 %shuffle = shufflevector <2 x i32> %c, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 4598 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %1, <2 x i32> %shuffle) nounwind 4599 ret <2 x i64> %vmull2.i 4600} 4601 4602define <2 x i64> @foo7a(<2 x i64> %a, <4 x i32> %b, <2 x i32> %c) nounwind readnone optsize ssp { 4603; CHECK-LABEL: define <2 x i64> @foo7a( 4604; CHECK-SAME: <2 x i64> [[A:%.*]], <4 x i32> [[B:%.*]], <2 x i32> [[C:%.*]]) #[[ATTR6]] { 4605; CHECK-NEXT: [[ENTRY:.*:]] 4606; CHECK-NEXT: call void @llvm.donothing() 4607; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[B]] to <2 x i64> 4608; CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> undef, <1 x i32> zeroinitializer 4609; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[SHUFFLE_I]] to <2 x i32> 4610; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[C]], <2 x i32> undef, <2 x i32> <i32 1, i32 1> 4611; CHECK-NEXT: [[VMULL2_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[TMP1]], <2 x i32> [[SHUFFLE]]) #[[ATTR7]] 4612; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 4613; CHECK-NEXT: ret <2 x i64> [[VMULL2_I]] 4614; 4615entry: 4616 %0 = bitcast <4 x i32> %b to <2 x i64> 4617 %shuffle.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> <i32 0> 4618 %1 = bitcast <1 x i64> %shuffle.i to <2 x i32> 4619 %shuffle = shufflevector <2 x i32> %c, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 4620 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %1, <2 x i32> %shuffle) nounwind 4621 ret <2 x i64> %vmull2.i 4622} 4623 4624 4625define <4 x i32> @foo8(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c) nounwind readnone optsize ssp { 4626; CHECK-LABEL: define <4 x i32> @foo8( 4627; CHECK-SAME: <4 x i32> [[A:%.*]], <8 x i16> [[B:%.*]], <4 x i16> [[C:%.*]]) #[[ATTR6]] { 4628; CHECK-NEXT: [[ENTRY:.*:]] 4629; CHECK-NEXT: call void @llvm.donothing() 4630; CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B]] to <2 x i64> 4631; CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> undef, <1 x i32> <i32 1> 4632; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[SHUFFLE_I]] to <4 x i16> 4633; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[C]], <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 4634; CHECK-NEXT: [[VMULL2_I:%.*]] = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[TMP1]], <4 x i16> [[SHUFFLE]]) #[[ATTR7]] 4635; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 4636; CHECK-NEXT: ret <4 x i32> [[VMULL2_I]] 4637; 4638entry: 4639 %0 = bitcast <8 x i16> %b to <2 x i64> 4640 %shuffle.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> <i32 1> 4641 %1 = bitcast <1 x i64> %shuffle.i to <4 x i16> 4642 %shuffle = shufflevector <4 x i16> %c, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 4643 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %1, <4 x i16> %shuffle) nounwind 4644 ret <4 x i32> %vmull2.i 4645} 4646 4647define <4 x i32> @foo8a(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c) nounwind readnone optsize ssp { 4648; CHECK-LABEL: define <4 x i32> @foo8a( 4649; CHECK-SAME: <4 x i32> [[A:%.*]], <8 x i16> [[B:%.*]], <4 x i16> [[C:%.*]]) #[[ATTR6]] { 4650; CHECK-NEXT: [[ENTRY:.*:]] 4651; CHECK-NEXT: call void @llvm.donothing() 4652; CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B]] to <2 x i64> 4653; CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> undef, <1 x i32> zeroinitializer 4654; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[SHUFFLE_I]] to <4 x i16> 4655; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[C]], <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 4656; CHECK-NEXT: [[VMULL2_I:%.*]] = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[TMP1]], <4 x i16> [[SHUFFLE]]) #[[ATTR7]] 4657; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 4658; CHECK-NEXT: ret <4 x i32> [[VMULL2_I]] 4659; 4660entry: 4661 %0 = bitcast <8 x i16> %b to <2 x i64> 4662 %shuffle.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> <i32 0> 4663 %1 = bitcast <1 x i64> %shuffle.i to <4 x i16> 4664 %shuffle = shufflevector <4 x i16> %c, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 4665 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %1, <4 x i16> %shuffle) nounwind 4666 ret <4 x i32> %vmull2.i 4667} 4668 4669define <2 x i64> @foo9(<2 x i64> %a, <4 x i32> %b, <2 x i32> %c) nounwind readnone optsize ssp { 4670; CHECK-LABEL: define <2 x i64> @foo9( 4671; CHECK-SAME: <2 x i64> [[A:%.*]], <4 x i32> [[B:%.*]], <2 x i32> [[C:%.*]]) #[[ATTR6]] { 4672; CHECK-NEXT: [[ENTRY:.*:]] 4673; CHECK-NEXT: call void @llvm.donothing() 4674; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[B]] to <2 x i64> 4675; CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> undef, <1 x i32> <i32 1> 4676; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[SHUFFLE_I]] to <2 x i32> 4677; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[C]], <2 x i32> undef, <2 x i32> <i32 1, i32 1> 4678; CHECK-NEXT: [[VMULL2_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[TMP1]], <2 x i32> [[SHUFFLE]]) #[[ATTR7]] 4679; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 4680; CHECK-NEXT: ret <2 x i64> [[VMULL2_I]] 4681; 4682entry: 4683 %0 = bitcast <4 x i32> %b to <2 x i64> 4684 %shuffle.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> <i32 1> 4685 %1 = bitcast <1 x i64> %shuffle.i to <2 x i32> 4686 %shuffle = shufflevector <2 x i32> %c, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 4687 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %1, <2 x i32> %shuffle) nounwind 4688 ret <2 x i64> %vmull2.i 4689} 4690 4691define <2 x i64> @foo9a(<2 x i64> %a, <4 x i32> %b, <2 x i32> %c) nounwind readnone optsize ssp { 4692; CHECK-LABEL: define <2 x i64> @foo9a( 4693; CHECK-SAME: <2 x i64> [[A:%.*]], <4 x i32> [[B:%.*]], <2 x i32> [[C:%.*]]) #[[ATTR6]] { 4694; CHECK-NEXT: [[ENTRY:.*:]] 4695; CHECK-NEXT: call void @llvm.donothing() 4696; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[B]] to <2 x i64> 4697; CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> undef, <1 x i32> zeroinitializer 4698; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[SHUFFLE_I]] to <2 x i32> 4699; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[C]], <2 x i32> undef, <2 x i32> <i32 1, i32 1> 4700; CHECK-NEXT: [[VMULL2_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[TMP1]], <2 x i32> [[SHUFFLE]]) #[[ATTR7]] 4701; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 4702; CHECK-NEXT: ret <2 x i64> [[VMULL2_I]] 4703; 4704entry: 4705 %0 = bitcast <4 x i32> %b to <2 x i64> 4706 %shuffle.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> <i32 0> 4707 %1 = bitcast <1 x i64> %shuffle.i to <2 x i32> 4708 %shuffle = shufflevector <2 x i32> %c, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 4709 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %1, <2 x i32> %shuffle) nounwind 4710 ret <2 x i64> %vmull2.i 4711} 4712 4713define <8 x i16> @bar0(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) nounwind sanitize_memory { 4714; CHECK-LABEL: define <8 x i16> @bar0( 4715; CHECK-SAME: <8 x i16> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]]) #[[ATTR0]] { 4716; CHECK-NEXT: [[TMP9:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 4717; CHECK-NEXT: [[TMP10:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 4718; CHECK-NEXT: [[TMP11:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 4719; CHECK-NEXT: call void @llvm.donothing() 4720; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP9]] to <2 x i64> 4721; CHECK-NEXT: [[TMP:%.*]] = bitcast <16 x i8> [[B]] to <2 x i64> 4722; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> splat (i64 -1), <1 x i32> <i32 1> 4723; CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <2 x i64> [[TMP]], <2 x i64> undef, <1 x i32> <i32 1> 4724; CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <8 x i8> 4725; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[SHUFFLE_I_I_I]] to <8 x i8> 4726; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64> 4727; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[C]] to <2 x i64> 4728; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <2 x i64> [[TMP6]], <2 x i64> splat (i64 -1), <1 x i32> <i32 1> 4729; CHECK-NEXT: [[SHUFFLE_I3_I_I:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> undef, <1 x i32> <i32 1> 4730; CHECK-NEXT: [[TMP7:%.*]] = bitcast <1 x i64> [[_MSPROP1]] to <8 x i8> 4731; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[SHUFFLE_I3_I_I]] to <8 x i8> 4732; CHECK-NEXT: [[_MSPROP2:%.*]] = or <8 x i8> [[TMP5]], [[TMP7]] 4733; CHECK-NEXT: [[_MSPROP3:%.*]] = or <8 x i8> [[_MSPROP2]], zeroinitializer 4734; CHECK-NEXT: [[TMP8:%.*]] = zext <8 x i8> [[_MSPROP3]] to <8 x i16> 4735; CHECK-NEXT: [[VMULL_I_I_I:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[TMP1]], <8 x i8> [[TMP3]]) #[[ATTR7]] 4736; CHECK-NEXT: [[_MSPROP4:%.*]] = or <8 x i16> [[TMP8]], [[TMP11]] 4737; CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i16> [[VMULL_I_I_I]], [[A]] 4738; CHECK-NEXT: store <8 x i16> [[_MSPROP4]], ptr @__msan_retval_tls, align 8 4739; CHECK-NEXT: ret <8 x i16> [[ADD_I]] 4740; 4741 %temp = bitcast <16 x i8> %b to <2 x i64> 4742 %shuffle.i.i.i = shufflevector <2 x i64> %temp, <2 x i64> undef, <1 x i32> <i32 1> 4743 %temp1 = bitcast <1 x i64> %shuffle.i.i.i to <8 x i8> 4744 %temp2 = bitcast <16 x i8> %c to <2 x i64> 4745 %shuffle.i3.i.i = shufflevector <2 x i64> %temp2, <2 x i64> undef, <1 x i32> <i32 1> 4746 %temp3 = bitcast <1 x i64> %shuffle.i3.i.i to <8 x i8> 4747 %vmull.i.i.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %temp1, <8 x i8> %temp3) nounwind 4748 %add.i = add <8 x i16> %vmull.i.i.i, %a 4749 ret <8 x i16> %add.i 4750} 4751 4752define <4 x i32> @bar1(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) nounwind sanitize_memory { 4753; CHECK-LABEL: define <4 x i32> @bar1( 4754; CHECK-SAME: <4 x i32> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i16> [[C:%.*]]) #[[ATTR0]] { 4755; CHECK-NEXT: [[TMP9:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 4756; CHECK-NEXT: [[TMP10:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 4757; CHECK-NEXT: [[TMP11:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 4758; CHECK-NEXT: call void @llvm.donothing() 4759; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP9]] to <2 x i64> 4760; CHECK-NEXT: [[TMP:%.*]] = bitcast <8 x i16> [[B]] to <2 x i64> 4761; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> splat (i64 -1), <1 x i32> <i32 1> 4762; CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <2 x i64> [[TMP]], <2 x i64> undef, <1 x i32> <i32 1> 4763; CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16> 4764; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[SHUFFLE_I_I_I]] to <4 x i16> 4765; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP10]] to <2 x i64> 4766; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[C]] to <2 x i64> 4767; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <2 x i64> [[TMP6]], <2 x i64> splat (i64 -1), <1 x i32> <i32 1> 4768; CHECK-NEXT: [[SHUFFLE_I3_I_I:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> undef, <1 x i32> <i32 1> 4769; CHECK-NEXT: [[TMP7:%.*]] = bitcast <1 x i64> [[_MSPROP1]] to <4 x i16> 4770; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[SHUFFLE_I3_I_I]] to <4 x i16> 4771; CHECK-NEXT: [[_MSPROP2:%.*]] = or <4 x i16> [[TMP5]], [[TMP7]] 4772; CHECK-NEXT: [[_MSPROP3:%.*]] = or <4 x i16> [[_MSPROP2]], zeroinitializer 4773; CHECK-NEXT: [[TMP8:%.*]] = zext <4 x i16> [[_MSPROP3]] to <4 x i32> 4774; CHECK-NEXT: [[VMULL2_I_I_I:%.*]] = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[TMP1]], <4 x i16> [[TMP3]]) #[[ATTR7]] 4775; CHECK-NEXT: [[_MSPROP4:%.*]] = or <4 x i32> [[TMP8]], [[TMP11]] 4776; CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[VMULL2_I_I_I]], [[A]] 4777; CHECK-NEXT: store <4 x i32> [[_MSPROP4]], ptr @__msan_retval_tls, align 8 4778; CHECK-NEXT: ret <4 x i32> [[ADD_I]] 4779; 4780 %temp = bitcast <8 x i16> %b to <2 x i64> 4781 %shuffle.i.i.i = shufflevector <2 x i64> %temp, <2 x i64> undef, <1 x i32> <i32 1> 4782 %temp1 = bitcast <1 x i64> %shuffle.i.i.i to <4 x i16> 4783 %temp2 = bitcast <8 x i16> %c to <2 x i64> 4784 %shuffle.i3.i.i = shufflevector <2 x i64> %temp2, <2 x i64> undef, <1 x i32> <i32 1> 4785 %temp3 = bitcast <1 x i64> %shuffle.i3.i.i to <4 x i16> 4786 %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %temp1, <4 x i16> %temp3) nounwind 4787 %add.i = add <4 x i32> %vmull2.i.i.i, %a 4788 ret <4 x i32> %add.i 4789} 4790 4791define <2 x i64> @bar2(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) nounwind sanitize_memory { 4792; CHECK-LABEL: define <2 x i64> @bar2( 4793; CHECK-SAME: <2 x i64> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]]) #[[ATTR0]] { 4794; CHECK-NEXT: [[TMP9:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 4795; CHECK-NEXT: [[TMP10:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 4796; CHECK-NEXT: [[TMP11:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 4797; CHECK-NEXT: call void @llvm.donothing() 4798; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP9]] to <2 x i64> 4799; CHECK-NEXT: [[TMP:%.*]] = bitcast <4 x i32> [[B]] to <2 x i64> 4800; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> splat (i64 -1), <1 x i32> <i32 1> 4801; CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <2 x i64> [[TMP]], <2 x i64> undef, <1 x i32> <i32 1> 4802; CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <2 x i32> 4803; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[SHUFFLE_I_I_I]] to <2 x i32> 4804; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP10]] to <2 x i64> 4805; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[C]] to <2 x i64> 4806; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <2 x i64> [[TMP6]], <2 x i64> splat (i64 -1), <1 x i32> <i32 1> 4807; CHECK-NEXT: [[SHUFFLE_I3_I_I:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> undef, <1 x i32> <i32 1> 4808; CHECK-NEXT: [[TMP7:%.*]] = bitcast <1 x i64> [[_MSPROP1]] to <2 x i32> 4809; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[SHUFFLE_I3_I_I]] to <2 x i32> 4810; CHECK-NEXT: [[_MSPROP2:%.*]] = or <2 x i32> [[TMP5]], [[TMP7]] 4811; CHECK-NEXT: [[_MSPROP3:%.*]] = or <2 x i32> [[_MSPROP2]], zeroinitializer 4812; CHECK-NEXT: [[TMP8:%.*]] = zext <2 x i32> [[_MSPROP3]] to <2 x i64> 4813; CHECK-NEXT: [[VMULL2_I_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[TMP1]], <2 x i32> [[TMP3]]) #[[ATTR7]] 4814; CHECK-NEXT: [[_MSPROP4:%.*]] = or <2 x i64> [[TMP8]], [[TMP11]] 4815; CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i64> [[VMULL2_I_I_I]], [[A]] 4816; CHECK-NEXT: store <2 x i64> [[_MSPROP4]], ptr @__msan_retval_tls, align 8 4817; CHECK-NEXT: ret <2 x i64> [[ADD_I]] 4818; 4819 %temp = bitcast <4 x i32> %b to <2 x i64> 4820 %shuffle.i.i.i = shufflevector <2 x i64> %temp, <2 x i64> undef, <1 x i32> <i32 1> 4821 %temp1 = bitcast <1 x i64> %shuffle.i.i.i to <2 x i32> 4822 %temp2 = bitcast <4 x i32> %c to <2 x i64> 4823 %shuffle.i3.i.i = shufflevector <2 x i64> %temp2, <2 x i64> undef, <1 x i32> <i32 1> 4824 %temp3 = bitcast <1 x i64> %shuffle.i3.i.i to <2 x i32> 4825 %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %temp1, <2 x i32> %temp3) nounwind 4826 %add.i = add <2 x i64> %vmull2.i.i.i, %a 4827 ret <2 x i64> %add.i 4828} 4829 4830define <8 x i16> @bar3(<8 x i16> %a, <16 x i8> %b, <16 x i8> %c) nounwind sanitize_memory { 4831; CHECK-LABEL: define <8 x i16> @bar3( 4832; CHECK-SAME: <8 x i16> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]]) #[[ATTR0]] { 4833; CHECK-NEXT: [[TMP9:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 4834; CHECK-NEXT: [[TMP10:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 4835; CHECK-NEXT: [[TMP11:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 4836; CHECK-NEXT: call void @llvm.donothing() 4837; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP9]] to <2 x i64> 4838; CHECK-NEXT: [[TMP:%.*]] = bitcast <16 x i8> [[B]] to <2 x i64> 4839; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> splat (i64 -1), <1 x i32> <i32 1> 4840; CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <2 x i64> [[TMP]], <2 x i64> undef, <1 x i32> <i32 1> 4841; CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <8 x i8> 4842; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[SHUFFLE_I_I_I]] to <8 x i8> 4843; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64> 4844; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[C]] to <2 x i64> 4845; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <2 x i64> [[TMP6]], <2 x i64> splat (i64 -1), <1 x i32> <i32 1> 4846; CHECK-NEXT: [[SHUFFLE_I3_I_I:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> undef, <1 x i32> <i32 1> 4847; CHECK-NEXT: [[TMP7:%.*]] = bitcast <1 x i64> [[_MSPROP1]] to <8 x i8> 4848; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[SHUFFLE_I3_I_I]] to <8 x i8> 4849; CHECK-NEXT: [[_MSPROP2:%.*]] = or <8 x i8> [[TMP5]], [[TMP7]] 4850; CHECK-NEXT: [[_MSPROP3:%.*]] = or <8 x i8> [[_MSPROP2]], zeroinitializer 4851; CHECK-NEXT: [[TMP8:%.*]] = zext <8 x i8> [[_MSPROP3]] to <8 x i16> 4852; CHECK-NEXT: [[VMULL_I_I_I:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[TMP1]], <8 x i8> [[TMP3]]) #[[ATTR7]] 4853; CHECK-NEXT: [[_MSPROP4:%.*]] = or <8 x i16> [[TMP8]], [[TMP11]] 4854; CHECK-NEXT: [[ADD_I:%.*]] = add <8 x i16> [[VMULL_I_I_I]], [[A]] 4855; CHECK-NEXT: store <8 x i16> [[_MSPROP4]], ptr @__msan_retval_tls, align 8 4856; CHECK-NEXT: ret <8 x i16> [[ADD_I]] 4857; 4858 %temp = bitcast <16 x i8> %b to <2 x i64> 4859 %shuffle.i.i.i = shufflevector <2 x i64> %temp, <2 x i64> undef, <1 x i32> <i32 1> 4860 %temp1 = bitcast <1 x i64> %shuffle.i.i.i to <8 x i8> 4861 %temp2 = bitcast <16 x i8> %c to <2 x i64> 4862 %shuffle.i3.i.i = shufflevector <2 x i64> %temp2, <2 x i64> undef, <1 x i32> <i32 1> 4863 %temp3 = bitcast <1 x i64> %shuffle.i3.i.i to <8 x i8> 4864 %vmull.i.i.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %temp1, <8 x i8> %temp3) nounwind 4865 %add.i = add <8 x i16> %vmull.i.i.i, %a 4866 ret <8 x i16> %add.i 4867} 4868 4869define <4 x i32> @bar4(<4 x i32> %a, <8 x i16> %b, <8 x i16> %c) nounwind sanitize_memory { 4870; CHECK-LABEL: define <4 x i32> @bar4( 4871; CHECK-SAME: <4 x i32> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i16> [[C:%.*]]) #[[ATTR0]] { 4872; CHECK-NEXT: [[TMP9:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 4873; CHECK-NEXT: [[TMP10:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 4874; CHECK-NEXT: [[TMP11:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 4875; CHECK-NEXT: call void @llvm.donothing() 4876; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP9]] to <2 x i64> 4877; CHECK-NEXT: [[TMP:%.*]] = bitcast <8 x i16> [[B]] to <2 x i64> 4878; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> splat (i64 -1), <1 x i32> <i32 1> 4879; CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <2 x i64> [[TMP]], <2 x i64> undef, <1 x i32> <i32 1> 4880; CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16> 4881; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[SHUFFLE_I_I_I]] to <4 x i16> 4882; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP10]] to <2 x i64> 4883; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[C]] to <2 x i64> 4884; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <2 x i64> [[TMP6]], <2 x i64> splat (i64 -1), <1 x i32> <i32 1> 4885; CHECK-NEXT: [[SHUFFLE_I3_I_I:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> undef, <1 x i32> <i32 1> 4886; CHECK-NEXT: [[TMP7:%.*]] = bitcast <1 x i64> [[_MSPROP1]] to <4 x i16> 4887; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[SHUFFLE_I3_I_I]] to <4 x i16> 4888; CHECK-NEXT: [[_MSPROP2:%.*]] = or <4 x i16> [[TMP5]], [[TMP7]] 4889; CHECK-NEXT: [[_MSPROP3:%.*]] = or <4 x i16> [[_MSPROP2]], zeroinitializer 4890; CHECK-NEXT: [[TMP8:%.*]] = zext <4 x i16> [[_MSPROP3]] to <4 x i32> 4891; CHECK-NEXT: [[VMULL2_I_I_I:%.*]] = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[TMP1]], <4 x i16> [[TMP3]]) #[[ATTR7]] 4892; CHECK-NEXT: [[_MSPROP4:%.*]] = or <4 x i32> [[TMP8]], [[TMP11]] 4893; CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[VMULL2_I_I_I]], [[A]] 4894; CHECK-NEXT: store <4 x i32> [[_MSPROP4]], ptr @__msan_retval_tls, align 8 4895; CHECK-NEXT: ret <4 x i32> [[ADD_I]] 4896; 4897 %temp = bitcast <8 x i16> %b to <2 x i64> 4898 %shuffle.i.i.i = shufflevector <2 x i64> %temp, <2 x i64> undef, <1 x i32> <i32 1> 4899 %temp1 = bitcast <1 x i64> %shuffle.i.i.i to <4 x i16> 4900 %temp2 = bitcast <8 x i16> %c to <2 x i64> 4901 %shuffle.i3.i.i = shufflevector <2 x i64> %temp2, <2 x i64> undef, <1 x i32> <i32 1> 4902 %temp3 = bitcast <1 x i64> %shuffle.i3.i.i to <4 x i16> 4903 %vmull2.i.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %temp1, <4 x i16> %temp3) nounwind 4904 %add.i = add <4 x i32> %vmull2.i.i.i, %a 4905 ret <4 x i32> %add.i 4906} 4907 4908define <2 x i64> @bar5(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) nounwind sanitize_memory { 4909; CHECK-LABEL: define <2 x i64> @bar5( 4910; CHECK-SAME: <2 x i64> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]]) #[[ATTR0]] { 4911; CHECK-NEXT: [[TMP9:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 4912; CHECK-NEXT: [[TMP10:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 4913; CHECK-NEXT: [[TMP11:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 4914; CHECK-NEXT: call void @llvm.donothing() 4915; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP9]] to <2 x i64> 4916; CHECK-NEXT: [[TMP:%.*]] = bitcast <4 x i32> [[B]] to <2 x i64> 4917; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> splat (i64 -1), <1 x i32> <i32 1> 4918; CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <2 x i64> [[TMP]], <2 x i64> undef, <1 x i32> <i32 1> 4919; CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <2 x i32> 4920; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[SHUFFLE_I_I_I]] to <2 x i32> 4921; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP10]] to <2 x i64> 4922; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[C]] to <2 x i64> 4923; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <2 x i64> [[TMP6]], <2 x i64> splat (i64 -1), <1 x i32> <i32 1> 4924; CHECK-NEXT: [[SHUFFLE_I3_I_I:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> undef, <1 x i32> <i32 1> 4925; CHECK-NEXT: [[TMP7:%.*]] = bitcast <1 x i64> [[_MSPROP1]] to <2 x i32> 4926; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[SHUFFLE_I3_I_I]] to <2 x i32> 4927; CHECK-NEXT: [[_MSPROP2:%.*]] = or <2 x i32> [[TMP5]], [[TMP7]] 4928; CHECK-NEXT: [[_MSPROP3:%.*]] = or <2 x i32> [[_MSPROP2]], zeroinitializer 4929; CHECK-NEXT: [[TMP8:%.*]] = zext <2 x i32> [[_MSPROP3]] to <2 x i64> 4930; CHECK-NEXT: [[VMULL2_I_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[TMP1]], <2 x i32> [[TMP3]]) #[[ATTR7]] 4931; CHECK-NEXT: [[_MSPROP4:%.*]] = or <2 x i64> [[TMP8]], [[TMP11]] 4932; CHECK-NEXT: [[ADD_I:%.*]] = add <2 x i64> [[VMULL2_I_I_I]], [[A]] 4933; CHECK-NEXT: store <2 x i64> [[_MSPROP4]], ptr @__msan_retval_tls, align 8 4934; CHECK-NEXT: ret <2 x i64> [[ADD_I]] 4935; 4936 %temp = bitcast <4 x i32> %b to <2 x i64> 4937 %shuffle.i.i.i = shufflevector <2 x i64> %temp, <2 x i64> undef, <1 x i32> <i32 1> 4938 %temp1 = bitcast <1 x i64> %shuffle.i.i.i to <2 x i32> 4939 %temp2 = bitcast <4 x i32> %c to <2 x i64> 4940 %shuffle.i3.i.i = shufflevector <2 x i64> %temp2, <2 x i64> undef, <1 x i32> <i32 1> 4941 %temp3 = bitcast <1 x i64> %shuffle.i3.i.i to <2 x i32> 4942 %vmull2.i.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %temp1, <2 x i32> %temp3) nounwind 4943 %add.i = add <2 x i64> %vmull2.i.i.i, %a 4944 ret <2 x i64> %add.i 4945} 4946 4947define <4 x i32> @mlal2_1(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c) nounwind sanitize_memory { 4948; CHECK-LABEL: define <4 x i32> @mlal2_1( 4949; CHECK-SAME: <4 x i32> [[A:%.*]], <8 x i16> [[B:%.*]], <4 x i16> [[C:%.*]]) #[[ATTR0]] { 4950; CHECK-NEXT: [[TMP9:%.*]] = load <4 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 4951; CHECK-NEXT: [[TMP10:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 4952; CHECK-NEXT: [[TMP11:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 4953; CHECK-NEXT: call void @llvm.donothing() 4954; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i16> [[TMP9]], <4 x i16> splat (i16 -1), <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 4955; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[C]], <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 4956; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP10]] to <2 x i64> 4957; CHECK-NEXT: [[TMP:%.*]] = bitcast <8 x i16> [[B]] to <2 x i64> 4958; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> splat (i64 -1), <1 x i32> <i32 1> 4959; CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i64> [[TMP]], <2 x i64> undef, <1 x i32> <i32 1> 4960; CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[_MSPROP1]] to <4 x i16> 4961; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[SHUFFLE_I_I]] to <4 x i16> 4962; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i16> [[_MSPROP]] to <2 x i64> 4963; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[SHUFFLE]] to <2 x i64> 4964; CHECK-NEXT: [[_MSPROP2:%.*]] = shufflevector <2 x i64> [[TMP6]], <2 x i64> splat (i64 -1), <1 x i32> <i32 1> 4965; CHECK-NEXT: [[SHUFFLE_I3_I:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> undef, <1 x i32> <i32 1> 4966; CHECK-NEXT: [[TMP7:%.*]] = bitcast <1 x i64> [[_MSPROP2]] to <4 x i16> 4967; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[SHUFFLE_I3_I]] to <4 x i16> 4968; CHECK-NEXT: [[_MSPROP3:%.*]] = or <4 x i16> [[TMP5]], [[TMP7]] 4969; CHECK-NEXT: [[_MSPROP4:%.*]] = or <4 x i16> [[_MSPROP3]], zeroinitializer 4970; CHECK-NEXT: [[TMP8:%.*]] = zext <4 x i16> [[_MSPROP4]] to <4 x i32> 4971; CHECK-NEXT: [[VMULL2_I_I:%.*]] = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[TMP1]], <4 x i16> [[TMP3]]) #[[ATTR7]] 4972; CHECK-NEXT: [[_MSPROP5:%.*]] = or <4 x i32> [[TMP8]], [[TMP11]] 4973; CHECK-NEXT: [[ADD:%.*]] = add <4 x i32> [[VMULL2_I_I]], [[A]] 4974; CHECK-NEXT: store <4 x i32> [[_MSPROP5]], ptr @__msan_retval_tls, align 8 4975; CHECK-NEXT: ret <4 x i32> [[ADD]] 4976; 4977 %shuffle = shufflevector <4 x i16> %c, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 4978 %temp = bitcast <8 x i16> %b to <2 x i64> 4979 %shuffle.i.i = shufflevector <2 x i64> %temp, <2 x i64> undef, <1 x i32> <i32 1> 4980 %temp1 = bitcast <1 x i64> %shuffle.i.i to <4 x i16> 4981 %temp2 = bitcast <8 x i16> %shuffle to <2 x i64> 4982 %shuffle.i3.i = shufflevector <2 x i64> %temp2, <2 x i64> undef, <1 x i32> <i32 1> 4983 %temp3 = bitcast <1 x i64> %shuffle.i3.i to <4 x i16> 4984 %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %temp1, <4 x i16> %temp3) nounwind 4985 %add = add <4 x i32> %vmull2.i.i, %a 4986 ret <4 x i32> %add 4987} 4988 4989define <2 x i64> @mlal2_2(<2 x i64> %a, <4 x i32> %b, <2 x i32> %c) nounwind sanitize_memory { 4990; CHECK-LABEL: define <2 x i64> @mlal2_2( 4991; CHECK-SAME: <2 x i64> [[A:%.*]], <4 x i32> [[B:%.*]], <2 x i32> [[C:%.*]]) #[[ATTR0]] { 4992; CHECK-NEXT: [[TMP9:%.*]] = load <2 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 4993; CHECK-NEXT: [[TMP10:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 4994; CHECK-NEXT: [[TMP11:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 4995; CHECK-NEXT: call void @llvm.donothing() 4996; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> splat (i32 -1), <4 x i32> <i32 1, i32 1, i32 1, i32 1> 4997; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[C]], <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 4998; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP10]] to <2 x i64> 4999; CHECK-NEXT: [[TMP:%.*]] = bitcast <4 x i32> [[B]] to <2 x i64> 5000; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> splat (i64 -1), <1 x i32> <i32 1> 5001; CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i64> [[TMP]], <2 x i64> undef, <1 x i32> <i32 1> 5002; CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[_MSPROP1]] to <2 x i32> 5003; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[SHUFFLE_I_I]] to <2 x i32> 5004; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[_MSPROP]] to <2 x i64> 5005; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <2 x i64> 5006; CHECK-NEXT: [[_MSPROP2:%.*]] = shufflevector <2 x i64> [[TMP6]], <2 x i64> splat (i64 -1), <1 x i32> <i32 1> 5007; CHECK-NEXT: [[SHUFFLE_I3_I:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> undef, <1 x i32> <i32 1> 5008; CHECK-NEXT: [[TMP7:%.*]] = bitcast <1 x i64> [[_MSPROP2]] to <2 x i32> 5009; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[SHUFFLE_I3_I]] to <2 x i32> 5010; CHECK-NEXT: [[_MSPROP3:%.*]] = or <2 x i32> [[TMP5]], [[TMP7]] 5011; CHECK-NEXT: [[_MSPROP4:%.*]] = or <2 x i32> [[_MSPROP3]], zeroinitializer 5012; CHECK-NEXT: [[TMP8:%.*]] = zext <2 x i32> [[_MSPROP4]] to <2 x i64> 5013; CHECK-NEXT: [[VMULL2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[TMP1]], <2 x i32> [[TMP3]]) #[[ATTR7]] 5014; CHECK-NEXT: [[_MSPROP5:%.*]] = or <2 x i64> [[TMP8]], [[TMP11]] 5015; CHECK-NEXT: [[ADD:%.*]] = add <2 x i64> [[VMULL2_I_I]], [[A]] 5016; CHECK-NEXT: store <2 x i64> [[_MSPROP5]], ptr @__msan_retval_tls, align 8 5017; CHECK-NEXT: ret <2 x i64> [[ADD]] 5018; 5019 %shuffle = shufflevector <2 x i32> %c, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 5020 %temp = bitcast <4 x i32> %b to <2 x i64> 5021 %shuffle.i.i = shufflevector <2 x i64> %temp, <2 x i64> undef, <1 x i32> <i32 1> 5022 %temp1 = bitcast <1 x i64> %shuffle.i.i to <2 x i32> 5023 %temp2 = bitcast <4 x i32> %shuffle to <2 x i64> 5024 %shuffle.i3.i = shufflevector <2 x i64> %temp2, <2 x i64> undef, <1 x i32> <i32 1> 5025 %temp3 = bitcast <1 x i64> %shuffle.i3.i to <2 x i32> 5026 %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %temp1, <2 x i32> %temp3) nounwind 5027 %add = add <2 x i64> %vmull2.i.i, %a 5028 ret <2 x i64> %add 5029} 5030 5031define <4 x i32> @mlal2_4(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c) nounwind sanitize_memory { 5032; CHECK-LABEL: define <4 x i32> @mlal2_4( 5033; CHECK-SAME: <4 x i32> [[A:%.*]], <8 x i16> [[B:%.*]], <4 x i16> [[C:%.*]]) #[[ATTR0]] { 5034; CHECK-NEXT: [[TMP9:%.*]] = load <4 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 5035; CHECK-NEXT: [[TMP10:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 5036; CHECK-NEXT: [[TMP11:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 5037; CHECK-NEXT: call void @llvm.donothing() 5038; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i16> [[TMP9]], <4 x i16> splat (i16 -1), <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> 5039; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[C]], <4 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> 5040; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP10]] to <2 x i64> 5041; CHECK-NEXT: [[TMP:%.*]] = bitcast <8 x i16> [[B]] to <2 x i64> 5042; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> splat (i64 -1), <1 x i32> <i32 1> 5043; CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i64> [[TMP]], <2 x i64> undef, <1 x i32> <i32 1> 5044; CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[_MSPROP1]] to <4 x i16> 5045; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[SHUFFLE_I_I]] to <4 x i16> 5046; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i16> [[_MSPROP]] to <2 x i64> 5047; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[SHUFFLE]] to <2 x i64> 5048; CHECK-NEXT: [[_MSPROP2:%.*]] = shufflevector <2 x i64> [[TMP6]], <2 x i64> splat (i64 -1), <1 x i32> <i32 1> 5049; CHECK-NEXT: [[SHUFFLE_I3_I:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> undef, <1 x i32> <i32 1> 5050; CHECK-NEXT: [[TMP7:%.*]] = bitcast <1 x i64> [[_MSPROP2]] to <4 x i16> 5051; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[SHUFFLE_I3_I]] to <4 x i16> 5052; CHECK-NEXT: [[_MSPROP3:%.*]] = or <4 x i16> [[TMP5]], [[TMP7]] 5053; CHECK-NEXT: [[_MSPROP4:%.*]] = or <4 x i16> [[_MSPROP3]], zeroinitializer 5054; CHECK-NEXT: [[TMP8:%.*]] = zext <4 x i16> [[_MSPROP4]] to <4 x i32> 5055; CHECK-NEXT: [[VMULL2_I_I:%.*]] = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[TMP1]], <4 x i16> [[TMP3]]) #[[ATTR7]] 5056; CHECK-NEXT: [[_MSPROP5:%.*]] = or <4 x i32> [[TMP8]], [[TMP11]] 5057; CHECK-NEXT: [[ADD:%.*]] = add <4 x i32> [[VMULL2_I_I]], [[A]] 5058; CHECK-NEXT: store <4 x i32> [[_MSPROP5]], ptr @__msan_retval_tls, align 8 5059; CHECK-NEXT: ret <4 x i32> [[ADD]] 5060; 5061 %shuffle = shufflevector <4 x i16> %c, <4 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> 5062 %temp = bitcast <8 x i16> %b to <2 x i64> 5063 %shuffle.i.i = shufflevector <2 x i64> %temp, <2 x i64> undef, <1 x i32> <i32 1> 5064 %temp1 = bitcast <1 x i64> %shuffle.i.i to <4 x i16> 5065 %temp2 = bitcast <8 x i16> %shuffle to <2 x i64> 5066 %shuffle.i3.i = shufflevector <2 x i64> %temp2, <2 x i64> undef, <1 x i32> <i32 1> 5067 %temp3 = bitcast <1 x i64> %shuffle.i3.i to <4 x i16> 5068 %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %temp1, <4 x i16> %temp3) nounwind 5069 %add = add <4 x i32> %vmull2.i.i, %a 5070 ret <4 x i32> %add 5071} 5072 5073define <2 x i64> @mlal2_5(<2 x i64> %a, <4 x i32> %b, <2 x i32> %c) nounwind sanitize_memory { 5074; CHECK-LABEL: define <2 x i64> @mlal2_5( 5075; CHECK-SAME: <2 x i64> [[A:%.*]], <4 x i32> [[B:%.*]], <2 x i32> [[C:%.*]]) #[[ATTR0]] { 5076; CHECK-NEXT: [[TMP9:%.*]] = load <2 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 5077; CHECK-NEXT: [[TMP10:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 5078; CHECK-NEXT: [[TMP11:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 5079; CHECK-NEXT: call void @llvm.donothing() 5080; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> splat (i32 -1), <4 x i32> zeroinitializer 5081; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[C]], <2 x i32> undef, <4 x i32> zeroinitializer 5082; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP10]] to <2 x i64> 5083; CHECK-NEXT: [[TMP:%.*]] = bitcast <4 x i32> [[B]] to <2 x i64> 5084; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> splat (i64 -1), <1 x i32> <i32 1> 5085; CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i64> [[TMP]], <2 x i64> undef, <1 x i32> <i32 1> 5086; CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[_MSPROP1]] to <2 x i32> 5087; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[SHUFFLE_I_I]] to <2 x i32> 5088; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[_MSPROP]] to <2 x i64> 5089; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[SHUFFLE]] to <2 x i64> 5090; CHECK-NEXT: [[_MSPROP2:%.*]] = shufflevector <2 x i64> [[TMP6]], <2 x i64> splat (i64 -1), <1 x i32> <i32 1> 5091; CHECK-NEXT: [[SHUFFLE_I3_I:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> undef, <1 x i32> <i32 1> 5092; CHECK-NEXT: [[TMP7:%.*]] = bitcast <1 x i64> [[_MSPROP2]] to <2 x i32> 5093; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[SHUFFLE_I3_I]] to <2 x i32> 5094; CHECK-NEXT: [[_MSPROP3:%.*]] = or <2 x i32> [[TMP5]], [[TMP7]] 5095; CHECK-NEXT: [[_MSPROP4:%.*]] = or <2 x i32> [[_MSPROP3]], zeroinitializer 5096; CHECK-NEXT: [[TMP8:%.*]] = zext <2 x i32> [[_MSPROP4]] to <2 x i64> 5097; CHECK-NEXT: [[VMULL2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[TMP1]], <2 x i32> [[TMP3]]) #[[ATTR7]] 5098; CHECK-NEXT: [[_MSPROP5:%.*]] = or <2 x i64> [[TMP8]], [[TMP11]] 5099; CHECK-NEXT: [[ADD:%.*]] = add <2 x i64> [[VMULL2_I_I]], [[A]] 5100; CHECK-NEXT: store <2 x i64> [[_MSPROP5]], ptr @__msan_retval_tls, align 8 5101; CHECK-NEXT: ret <2 x i64> [[ADD]] 5102; 5103 %shuffle = shufflevector <2 x i32> %c, <2 x i32> undef, <4 x i32> zeroinitializer 5104 %temp = bitcast <4 x i32> %b to <2 x i64> 5105 %shuffle.i.i = shufflevector <2 x i64> %temp, <2 x i64> undef, <1 x i32> <i32 1> 5106 %temp1 = bitcast <1 x i64> %shuffle.i.i to <2 x i32> 5107 %temp2 = bitcast <4 x i32> %shuffle to <2 x i64> 5108 %shuffle.i3.i = shufflevector <2 x i64> %temp2, <2 x i64> undef, <1 x i32> <i32 1> 5109 %temp3 = bitcast <1 x i64> %shuffle.i3.i to <2 x i32> 5110 %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %temp1, <2 x i32> %temp3) nounwind 5111 %add = add <2 x i64> %vmull2.i.i, %a 5112 ret <2 x i64> %add 5113} 5114 5115; rdar://12328502 5116define <2 x double> @vmulq_n_f64(<2 x double> %x, double %y) nounwind readnone ssp { 5117; CHECK-LABEL: define <2 x double> @vmulq_n_f64( 5118; CHECK-SAME: <2 x double> [[X:%.*]], double [[Y:%.*]]) #[[ATTR3]] { 5119; CHECK-NEXT: [[ENTRY:.*:]] 5120; CHECK-NEXT: call void @llvm.donothing() 5121; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double [[Y]], i32 0 5122; CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double [[Y]], i32 1 5123; CHECK-NEXT: [[MUL_I:%.*]] = fmul <2 x double> [[VECINIT1_I]], [[X]] 5124; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 5125; CHECK-NEXT: ret <2 x double> [[MUL_I]] 5126; 5127entry: 5128 %vecinit.i = insertelement <2 x double> undef, double %y, i32 0 5129 %vecinit1.i = insertelement <2 x double> %vecinit.i, double %y, i32 1 5130 %mul.i = fmul <2 x double> %vecinit1.i, %x 5131 ret <2 x double> %mul.i 5132} 5133 5134define <4 x float> @vmulq_n_f32(<4 x float> %x, float %y) nounwind readnone ssp { 5135; CHECK-LABEL: define <4 x float> @vmulq_n_f32( 5136; CHECK-SAME: <4 x float> [[X:%.*]], float [[Y:%.*]]) #[[ATTR3]] { 5137; CHECK-NEXT: [[ENTRY:.*:]] 5138; CHECK-NEXT: call void @llvm.donothing() 5139; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float [[Y]], i32 0 5140; CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float [[Y]], i32 1 5141; CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float [[Y]], i32 2 5142; CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float [[Y]], i32 3 5143; CHECK-NEXT: [[MUL_I:%.*]] = fmul <4 x float> [[VECINIT3_I]], [[X]] 5144; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 5145; CHECK-NEXT: ret <4 x float> [[MUL_I]] 5146; 5147entry: 5148 %vecinit.i = insertelement <4 x float> undef, float %y, i32 0 5149 %vecinit1.i = insertelement <4 x float> %vecinit.i, float %y, i32 1 5150 %vecinit2.i = insertelement <4 x float> %vecinit1.i, float %y, i32 2 5151 %vecinit3.i = insertelement <4 x float> %vecinit2.i, float %y, i32 3 5152 %mul.i = fmul <4 x float> %vecinit3.i, %x 5153 ret <4 x float> %mul.i 5154} 5155 5156define <2 x float> @vmul_n_f32(<2 x float> %x, float %y) nounwind readnone ssp { 5157; CHECK-LABEL: define <2 x float> @vmul_n_f32( 5158; CHECK-SAME: <2 x float> [[X:%.*]], float [[Y:%.*]]) #[[ATTR3]] { 5159; CHECK-NEXT: [[ENTRY:.*:]] 5160; CHECK-NEXT: call void @llvm.donothing() 5161; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float [[Y]], i32 0 5162; CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float [[Y]], i32 1 5163; CHECK-NEXT: [[MUL_I:%.*]] = fmul <2 x float> [[VECINIT1_I]], [[X]] 5164; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 5165; CHECK-NEXT: ret <2 x float> [[MUL_I]] 5166; 5167entry: 5168 %vecinit.i = insertelement <2 x float> undef, float %y, i32 0 5169 %vecinit1.i = insertelement <2 x float> %vecinit.i, float %y, i32 1 5170 %mul.i = fmul <2 x float> %vecinit1.i, %x 5171 ret <2 x float> %mul.i 5172} 5173 5174define <4 x i16> @vmla_laneq_s16_test(<4 x i16> %a, <4 x i16> %b, <8 x i16> %c) nounwind readnone ssp { 5175; CHECK-LABEL: define <4 x i16> @vmla_laneq_s16_test( 5176; CHECK-SAME: <4 x i16> [[A:%.*]], <4 x i16> [[B:%.*]], <8 x i16> [[C:%.*]]) #[[ATTR3]] { 5177; CHECK-NEXT: [[ENTRY:.*:]] 5178; CHECK-NEXT: call void @llvm.donothing() 5179; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[C]], <8 x i16> undef, <4 x i32> <i32 6, i32 6, i32 6, i32 6> 5180; CHECK-NEXT: [[MUL:%.*]] = mul <4 x i16> [[SHUFFLE]], [[B]] 5181; CHECK-NEXT: [[ADD:%.*]] = add <4 x i16> [[MUL]], [[A]] 5182; CHECK-NEXT: store <4 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 5183; CHECK-NEXT: ret <4 x i16> [[ADD]] 5184; 5185entry: 5186 %shuffle = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 6, i32 6, i32 6, i32 6> 5187 %mul = mul <4 x i16> %shuffle, %b 5188 %add = add <4 x i16> %mul, %a 5189 ret <4 x i16> %add 5190} 5191 5192define <2 x i32> @vmla_laneq_s32_test(<2 x i32> %a, <2 x i32> %b, <4 x i32> %c) nounwind readnone ssp { 5193; CHECK-LABEL: define <2 x i32> @vmla_laneq_s32_test( 5194; CHECK-SAME: <2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]], <4 x i32> [[C:%.*]]) #[[ATTR3]] { 5195; CHECK-NEXT: [[ENTRY:.*:]] 5196; CHECK-NEXT: call void @llvm.donothing() 5197; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[C]], <4 x i32> undef, <2 x i32> <i32 3, i32 3> 5198; CHECK-NEXT: [[MUL:%.*]] = mul <2 x i32> [[SHUFFLE]], [[B]] 5199; CHECK-NEXT: [[ADD:%.*]] = add <2 x i32> [[MUL]], [[A]] 5200; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 5201; CHECK-NEXT: ret <2 x i32> [[ADD]] 5202; 5203entry: 5204 %shuffle = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 3, i32 3> 5205 %mul = mul <2 x i32> %shuffle, %b 5206 %add = add <2 x i32> %mul, %a 5207 ret <2 x i32> %add 5208} 5209 5210define <8 x i16> @not_really_vmlaq_laneq_s16_test(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) nounwind readnone ssp { 5211; CHECK-LABEL: define <8 x i16> @not_really_vmlaq_laneq_s16_test( 5212; CHECK-SAME: <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i16> [[C:%.*]]) #[[ATTR3]] { 5213; CHECK-NEXT: [[ENTRY:.*:]] 5214; CHECK-NEXT: call void @llvm.donothing() 5215; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <8 x i16> [[C]], <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 5216; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <4 x i16> [[SHUFFLE1]], <4 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 5217; CHECK-NEXT: [[MUL:%.*]] = mul <8 x i16> [[SHUFFLE2]], [[B]] 5218; CHECK-NEXT: [[ADD:%.*]] = add <8 x i16> [[MUL]], [[A]] 5219; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 5220; CHECK-NEXT: ret <8 x i16> [[ADD]] 5221; 5222entry: 5223 %shuffle1 = shufflevector <8 x i16> %c, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 5224 %shuffle2 = shufflevector <4 x i16> %shuffle1, <4 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 5225 %mul = mul <8 x i16> %shuffle2, %b 5226 %add = add <8 x i16> %mul, %a 5227 ret <8 x i16> %add 5228} 5229 5230define <4 x i32> @not_really_vmlaq_laneq_s32_test(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) nounwind readnone ssp { 5231; CHECK-LABEL: define <4 x i32> @not_really_vmlaq_laneq_s32_test( 5232; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]]) #[[ATTR3]] { 5233; CHECK-NEXT: [[ENTRY:.*:]] 5234; CHECK-NEXT: call void @llvm.donothing() 5235; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x i32> [[C]], <4 x i32> undef, <2 x i32> <i32 2, i32 3> 5236; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <2 x i32> [[SHUFFLE1]], <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 5237; CHECK-NEXT: [[MUL:%.*]] = mul <4 x i32> [[SHUFFLE2]], [[B]] 5238; CHECK-NEXT: [[ADD:%.*]] = add <4 x i32> [[MUL]], [[A]] 5239; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 5240; CHECK-NEXT: ret <4 x i32> [[ADD]] 5241; 5242entry: 5243 %shuffle1 = shufflevector <4 x i32> %c, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 5244 %shuffle2 = shufflevector <2 x i32> %shuffle1, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 5245 %mul = mul <4 x i32> %shuffle2, %b 5246 %add = add <4 x i32> %mul, %a 5247 ret <4 x i32> %add 5248} 5249 5250define <4 x i32> @vmull_laneq_s16_test(<4 x i16> %a, <8 x i16> %b) nounwind readnone ssp { 5251; CHECK-LABEL: define <4 x i32> @vmull_laneq_s16_test( 5252; CHECK-SAME: <4 x i16> [[A:%.*]], <8 x i16> [[B:%.*]]) #[[ATTR3]] { 5253; CHECK-NEXT: [[ENTRY:.*:]] 5254; CHECK-NEXT: call void @llvm.donothing() 5255; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> undef, <4 x i32> <i32 6, i32 6, i32 6, i32 6> 5256; CHECK-NEXT: [[VMULL2_I:%.*]] = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[A]], <4 x i16> [[SHUFFLE]]) 5257; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 5258; CHECK-NEXT: ret <4 x i32> [[VMULL2_I]] 5259; 5260entry: 5261 %shuffle = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 6, i32 6, i32 6, i32 6> 5262 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) #2 5263 ret <4 x i32> %vmull2.i 5264} 5265 5266define <2 x i64> @vmull_laneq_s32_test(<2 x i32> %a, <4 x i32> %b) nounwind readnone ssp { 5267; CHECK-LABEL: define <2 x i64> @vmull_laneq_s32_test( 5268; CHECK-SAME: <2 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR3]] { 5269; CHECK-NEXT: [[ENTRY:.*:]] 5270; CHECK-NEXT: call void @llvm.donothing() 5271; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> undef, <2 x i32> <i32 2, i32 2> 5272; CHECK-NEXT: [[VMULL2_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[A]], <2 x i32> [[SHUFFLE]]) 5273; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 5274; CHECK-NEXT: ret <2 x i64> [[VMULL2_I]] 5275; 5276entry: 5277 %shuffle = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 2> 5278 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) #2 5279 ret <2 x i64> %vmull2.i 5280} 5281define <4 x i32> @vmull_laneq_u16_test(<4 x i16> %a, <8 x i16> %b) nounwind readnone ssp { 5282; CHECK-LABEL: define <4 x i32> @vmull_laneq_u16_test( 5283; CHECK-SAME: <4 x i16> [[A:%.*]], <8 x i16> [[B:%.*]]) #[[ATTR3]] { 5284; CHECK-NEXT: [[ENTRY:.*:]] 5285; CHECK-NEXT: call void @llvm.donothing() 5286; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> undef, <4 x i32> <i32 6, i32 6, i32 6, i32 6> 5287; CHECK-NEXT: [[VMULL2_I:%.*]] = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[A]], <4 x i16> [[SHUFFLE]]) 5288; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 5289; CHECK-NEXT: ret <4 x i32> [[VMULL2_I]] 5290; 5291entry: 5292 %shuffle = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> <i32 6, i32 6, i32 6, i32 6> 5293 %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %shuffle) #2 5294 ret <4 x i32> %vmull2.i 5295} 5296 5297define <2 x i64> @vmull_laneq_u32_test(<2 x i32> %a, <4 x i32> %b) nounwind readnone ssp { 5298; CHECK-LABEL: define <2 x i64> @vmull_laneq_u32_test( 5299; CHECK-SAME: <2 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR3]] { 5300; CHECK-NEXT: [[ENTRY:.*:]] 5301; CHECK-NEXT: call void @llvm.donothing() 5302; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> undef, <2 x i32> <i32 2, i32 2> 5303; CHECK-NEXT: [[VMULL2_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[A]], <2 x i32> [[SHUFFLE]]) 5304; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 5305; CHECK-NEXT: ret <2 x i64> [[VMULL2_I]] 5306; 5307entry: 5308 %shuffle = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> <i32 2, i32 2> 5309 %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %shuffle) #2 5310 ret <2 x i64> %vmull2.i 5311} 5312 5313define <4 x i32> @vmull_low_n_s16_test(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c, i32 %d) nounwind readnone optsize ssp { 5314; CHECK-LABEL: define <4 x i32> @vmull_low_n_s16_test( 5315; CHECK-SAME: <4 x i32> [[A:%.*]], <8 x i16> [[B:%.*]], <4 x i16> [[C:%.*]], i32 [[D:%.*]]) #[[ATTR6]] { 5316; CHECK-NEXT: [[ENTRY:.*:]] 5317; CHECK-NEXT: call void @llvm.donothing() 5318; CHECK-NEXT: [[CONV:%.*]] = trunc i32 [[D]] to i16 5319; CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B]] to <2 x i64> 5320; CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> undef, <1 x i32> zeroinitializer 5321; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[SHUFFLE_I_I]] to <4 x i16> 5322; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 [[CONV]], i32 0 5323; CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 [[CONV]], i32 1 5324; CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 [[CONV]], i32 2 5325; CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[CONV]], i32 3 5326; CHECK-NEXT: [[VMULL2_I_I:%.*]] = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[TMP1]], <4 x i16> [[VECINIT3_I]]) #[[ATTR7]] 5327; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 5328; CHECK-NEXT: ret <4 x i32> [[VMULL2_I_I]] 5329; 5330entry: 5331 %conv = trunc i32 %d to i16 5332 %0 = bitcast <8 x i16> %b to <2 x i64> 5333 %shuffle.i.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> <i32 0> 5334 %1 = bitcast <1 x i64> %shuffle.i.i to <4 x i16> 5335 %vecinit.i = insertelement <4 x i16> undef, i16 %conv, i32 0 5336 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %conv, i32 1 5337 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %conv, i32 2 5338 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %conv, i32 3 5339 %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %1, <4 x i16> %vecinit3.i) nounwind 5340 ret <4 x i32> %vmull2.i.i 5341} 5342 5343define <4 x i32> @vmull_high_n_s16_test(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c, i32 %d) nounwind readnone optsize ssp { 5344; CHECK-LABEL: define <4 x i32> @vmull_high_n_s16_test( 5345; CHECK-SAME: <4 x i32> [[A:%.*]], <8 x i16> [[B:%.*]], <4 x i16> [[C:%.*]], i32 [[D:%.*]]) #[[ATTR6]] { 5346; CHECK-NEXT: [[ENTRY:.*:]] 5347; CHECK-NEXT: call void @llvm.donothing() 5348; CHECK-NEXT: [[CONV:%.*]] = trunc i32 [[D]] to i16 5349; CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B]] to <2 x i64> 5350; CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> undef, <1 x i32> <i32 1> 5351; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[SHUFFLE_I_I]] to <4 x i16> 5352; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 [[CONV]], i32 0 5353; CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 [[CONV]], i32 1 5354; CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 [[CONV]], i32 2 5355; CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[CONV]], i32 3 5356; CHECK-NEXT: [[VMULL2_I_I:%.*]] = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[TMP1]], <4 x i16> [[VECINIT3_I]]) #[[ATTR7]] 5357; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 5358; CHECK-NEXT: ret <4 x i32> [[VMULL2_I_I]] 5359; 5360entry: 5361 %conv = trunc i32 %d to i16 5362 %0 = bitcast <8 x i16> %b to <2 x i64> 5363 %shuffle.i.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> <i32 1> 5364 %1 = bitcast <1 x i64> %shuffle.i.i to <4 x i16> 5365 %vecinit.i = insertelement <4 x i16> undef, i16 %conv, i32 0 5366 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %conv, i32 1 5367 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %conv, i32 2 5368 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %conv, i32 3 5369 %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %1, <4 x i16> %vecinit3.i) nounwind 5370 ret <4 x i32> %vmull2.i.i 5371} 5372 5373define <2 x i64> @vmull_high_n_s32_test(<2 x i64> %a, <4 x i32> %b, <2 x i32> %c, i32 %d) nounwind readnone optsize ssp { 5374; CHECK-LABEL: define <2 x i64> @vmull_high_n_s32_test( 5375; CHECK-SAME: <2 x i64> [[A:%.*]], <4 x i32> [[B:%.*]], <2 x i32> [[C:%.*]], i32 [[D:%.*]]) #[[ATTR6]] { 5376; CHECK-NEXT: [[ENTRY:.*:]] 5377; CHECK-NEXT: call void @llvm.donothing() 5378; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[B]] to <2 x i64> 5379; CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> undef, <1 x i32> <i32 1> 5380; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[SHUFFLE_I_I]] to <2 x i32> 5381; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 [[D]], i32 0 5382; CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 [[D]], i32 1 5383; CHECK-NEXT: [[VMULL2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[TMP1]], <2 x i32> [[VECINIT1_I]]) #[[ATTR7]] 5384; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 5385; CHECK-NEXT: ret <2 x i64> [[VMULL2_I_I]] 5386; 5387entry: 5388 %0 = bitcast <4 x i32> %b to <2 x i64> 5389 %shuffle.i.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> <i32 1> 5390 %1 = bitcast <1 x i64> %shuffle.i.i to <2 x i32> 5391 %vecinit.i = insertelement <2 x i32> undef, i32 %d, i32 0 5392 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %d, i32 1 5393 %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %1, <2 x i32> %vecinit1.i) nounwind 5394 ret <2 x i64> %vmull2.i.i 5395} 5396 5397define <4 x i32> @vmull_high_n_u16_test(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c, i32 %d) nounwind readnone optsize ssp { 5398; CHECK-LABEL: define <4 x i32> @vmull_high_n_u16_test( 5399; CHECK-SAME: <4 x i32> [[A:%.*]], <8 x i16> [[B:%.*]], <4 x i16> [[C:%.*]], i32 [[D:%.*]]) #[[ATTR6]] { 5400; CHECK-NEXT: [[ENTRY:.*:]] 5401; CHECK-NEXT: call void @llvm.donothing() 5402; CHECK-NEXT: [[CONV:%.*]] = trunc i32 [[D]] to i16 5403; CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[B]] to <2 x i64> 5404; CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> undef, <1 x i32> <i32 1> 5405; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[SHUFFLE_I_I]] to <4 x i16> 5406; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 [[CONV]], i32 0 5407; CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 [[CONV]], i32 1 5408; CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 [[CONV]], i32 2 5409; CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[CONV]], i32 3 5410; CHECK-NEXT: [[VMULL2_I_I:%.*]] = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[TMP1]], <4 x i16> [[VECINIT3_I]]) #[[ATTR7]] 5411; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 5412; CHECK-NEXT: ret <4 x i32> [[VMULL2_I_I]] 5413; 5414entry: 5415 %conv = trunc i32 %d to i16 5416 %0 = bitcast <8 x i16> %b to <2 x i64> 5417 %shuffle.i.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> <i32 1> 5418 %1 = bitcast <1 x i64> %shuffle.i.i to <4 x i16> 5419 %vecinit.i = insertelement <4 x i16> undef, i16 %conv, i32 0 5420 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %conv, i32 1 5421 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %conv, i32 2 5422 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %conv, i32 3 5423 %vmull2.i.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %1, <4 x i16> %vecinit3.i) nounwind 5424 ret <4 x i32> %vmull2.i.i 5425} 5426 5427define <2 x i64> @vmull_high_n_u32_test(<2 x i64> %a, <4 x i32> %b, <2 x i32> %c, i32 %d) nounwind readnone optsize ssp { 5428; CHECK-LABEL: define <2 x i64> @vmull_high_n_u32_test( 5429; CHECK-SAME: <2 x i64> [[A:%.*]], <4 x i32> [[B:%.*]], <2 x i32> [[C:%.*]], i32 [[D:%.*]]) #[[ATTR6]] { 5430; CHECK-NEXT: [[ENTRY:.*:]] 5431; CHECK-NEXT: call void @llvm.donothing() 5432; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[B]] to <2 x i64> 5433; CHECK-NEXT: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> undef, <1 x i32> <i32 1> 5434; CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[SHUFFLE_I_I]] to <2 x i32> 5435; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 [[D]], i32 0 5436; CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 [[D]], i32 1 5437; CHECK-NEXT: [[VMULL2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[TMP1]], <2 x i32> [[VECINIT1_I]]) #[[ATTR7]] 5438; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 5439; CHECK-NEXT: ret <2 x i64> [[VMULL2_I_I]] 5440; 5441entry: 5442 %0 = bitcast <4 x i32> %b to <2 x i64> 5443 %shuffle.i.i = shufflevector <2 x i64> %0, <2 x i64> undef, <1 x i32> <i32 1> 5444 %1 = bitcast <1 x i64> %shuffle.i.i to <2 x i32> 5445 %vecinit.i = insertelement <2 x i32> undef, i32 %d, i32 0 5446 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %d, i32 1 5447 %vmull2.i.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %1, <2 x i32> %vecinit1.i) nounwind 5448 ret <2 x i64> %vmull2.i.i 5449} 5450 5451define <4 x i32> @vmul_built_dup_test(<4 x i32> %a, <4 x i32> %b) { 5452; CHECK-LABEL: define <4 x i32> @vmul_built_dup_test( 5453; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) { 5454; CHECK-NEXT: call void @llvm.donothing() 5455; CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x i32> [[B]], i32 1 5456; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 [[VGET_LANE]], i32 0 5457; CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 [[VGET_LANE]], i32 1 5458; CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 [[VGET_LANE]], i32 2 5459; CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 [[VGET_LANE]], i32 3 5460; CHECK-NEXT: [[PROD:%.*]] = mul <4 x i32> [[A]], [[VECINIT3_I]] 5461; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 5462; CHECK-NEXT: ret <4 x i32> [[PROD]] 5463; 5464 %vget_lane = extractelement <4 x i32> %b, i32 1 5465 %vecinit.i = insertelement <4 x i32> undef, i32 %vget_lane, i32 0 5466 %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %vget_lane, i32 1 5467 %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %vget_lane, i32 2 5468 %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %vget_lane, i32 3 5469 %prod = mul <4 x i32> %a, %vecinit3.i 5470 ret <4 x i32> %prod 5471} 5472 5473define <4 x i16> @vmul_built_dup_fromsmall_test(<4 x i16> %a, <4 x i16> %b) { 5474; CHECK-LABEL: define <4 x i16> @vmul_built_dup_fromsmall_test( 5475; CHECK-SAME: <4 x i16> [[A:%.*]], <4 x i16> [[B:%.*]]) { 5476; CHECK-NEXT: call void @llvm.donothing() 5477; CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[B]], i32 3 5478; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 [[VGET_LANE]], i32 0 5479; CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 [[VGET_LANE]], i32 1 5480; CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 [[VGET_LANE]], i32 2 5481; CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 [[VGET_LANE]], i32 3 5482; CHECK-NEXT: [[PROD:%.*]] = mul <4 x i16> [[A]], [[VECINIT3_I]] 5483; CHECK-NEXT: store <4 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 5484; CHECK-NEXT: ret <4 x i16> [[PROD]] 5485; 5486 %vget_lane = extractelement <4 x i16> %b, i32 3 5487 %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0 5488 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1 5489 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2 5490 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3 5491 %prod = mul <4 x i16> %a, %vecinit3.i 5492 ret <4 x i16> %prod 5493} 5494 5495define <8 x i16> @vmulq_built_dup_fromsmall_test(<8 x i16> %a, <4 x i16> %b) { 5496; CHECK-LABEL: define <8 x i16> @vmulq_built_dup_fromsmall_test( 5497; CHECK-SAME: <8 x i16> [[A:%.*]], <4 x i16> [[B:%.*]]) { 5498; CHECK-NEXT: call void @llvm.donothing() 5499; CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[B]], i32 0 5500; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 [[VGET_LANE]], i32 0 5501; CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 [[VGET_LANE]], i32 1 5502; CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 [[VGET_LANE]], i32 2 5503; CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 [[VGET_LANE]], i32 3 5504; CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 [[VGET_LANE]], i32 4 5505; CHECK-NEXT: [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 [[VGET_LANE]], i32 5 5506; CHECK-NEXT: [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 [[VGET_LANE]], i32 6 5507; CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 [[VGET_LANE]], i32 7 5508; CHECK-NEXT: [[PROD:%.*]] = mul <8 x i16> [[A]], [[VECINIT7_I]] 5509; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 5510; CHECK-NEXT: ret <8 x i16> [[PROD]] 5511; 5512 %vget_lane = extractelement <4 x i16> %b, i32 0 5513 %vecinit.i = insertelement <8 x i16> undef, i16 %vget_lane, i32 0 5514 %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %vget_lane, i32 1 5515 %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %vget_lane, i32 2 5516 %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %vget_lane, i32 3 5517 %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %vget_lane, i32 4 5518 %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %vget_lane, i32 5 5519 %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %vget_lane, i32 6 5520 %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %vget_lane, i32 7 5521 %prod = mul <8 x i16> %a, %vecinit7.i 5522 ret <8 x i16> %prod 5523} 5524 5525define <2 x i64> @mull_from_two_extracts(<4 x i32> %lhs, <4 x i32> %rhs) { 5526; CHECK-LABEL: define <2 x i64> @mull_from_two_extracts( 5527; CHECK-SAME: <4 x i32> [[LHS:%.*]], <4 x i32> [[RHS:%.*]]) { 5528; CHECK-NEXT: call void @llvm.donothing() 5529; CHECK-NEXT: [[LHS_HIGH:%.*]] = shufflevector <4 x i32> [[LHS]], <4 x i32> undef, <2 x i32> <i32 2, i32 3> 5530; CHECK-NEXT: [[RHS_HIGH:%.*]] = shufflevector <4 x i32> [[RHS]], <4 x i32> undef, <2 x i32> <i32 2, i32 3> 5531; CHECK-NEXT: [[RES:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[LHS_HIGH]], <2 x i32> [[RHS_HIGH]]) #[[ATTR7]] 5532; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 5533; CHECK-NEXT: ret <2 x i64> [[RES]] 5534; 5535 %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 5536 %rhs.high = shufflevector <4 x i32> %rhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 5537 5538 %res = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhs.high) nounwind 5539 ret <2 x i64> %res 5540} 5541 5542define <2 x i64> @mlal_from_two_extracts(<2 x i64> %accum, <4 x i32> %lhs, <4 x i32> %rhs) { 5543; CHECK-LABEL: define <2 x i64> @mlal_from_two_extracts( 5544; CHECK-SAME: <2 x i64> [[ACCUM:%.*]], <4 x i32> [[LHS:%.*]], <4 x i32> [[RHS:%.*]]) { 5545; CHECK-NEXT: call void @llvm.donothing() 5546; CHECK-NEXT: [[LHS_HIGH:%.*]] = shufflevector <4 x i32> [[LHS]], <4 x i32> undef, <2 x i32> <i32 2, i32 3> 5547; CHECK-NEXT: [[RHS_HIGH:%.*]] = shufflevector <4 x i32> [[RHS]], <4 x i32> undef, <2 x i32> <i32 2, i32 3> 5548; CHECK-NEXT: [[RES:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[LHS_HIGH]], <2 x i32> [[RHS_HIGH]]) #[[ATTR7]] 5549; CHECK-NEXT: [[SUM:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[ACCUM]], <2 x i64> [[RES]]) 5550; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 5551; CHECK-NEXT: ret <2 x i64> [[SUM]] 5552; 5553 %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 5554 %rhs.high = shufflevector <4 x i32> %rhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 5555 5556 %res = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhs.high) nounwind 5557 %sum = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %accum, <2 x i64> %res) 5558 ret <2 x i64> %sum 5559} 5560 5561define <2 x i64> @mull_from_extract_dup_low(<4 x i32> %lhs, i32 %rhs) { 5562; CHECK-LABEL: define <2 x i64> @mull_from_extract_dup_low( 5563; CHECK-SAME: <4 x i32> [[LHS:%.*]], i32 [[RHS:%.*]]) { 5564; CHECK-NEXT: call void @llvm.donothing() 5565; CHECK-NEXT: [[RHSVEC_TMP:%.*]] = insertelement <2 x i32> undef, i32 [[RHS]], i32 0 5566; CHECK-NEXT: [[RHSVEC:%.*]] = insertelement <2 x i32> [[RHSVEC_TMP]], i32 [[RHS]], i32 1 5567; CHECK-NEXT: [[LHS_HIGH:%.*]] = shufflevector <4 x i32> [[LHS]], <4 x i32> undef, <2 x i32> <i32 0, i32 1> 5568; CHECK-NEXT: [[RES:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[LHS_HIGH]], <2 x i32> [[RHSVEC]]) #[[ATTR7]] 5569; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 5570; CHECK-NEXT: ret <2 x i64> [[RES]] 5571; 5572 %rhsvec.tmp = insertelement <2 x i32> undef, i32 %rhs, i32 0 5573 %rhsvec = insertelement <2 x i32> %rhsvec.tmp, i32 %rhs, i32 1 5574 5575 %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 0, i32 1> 5576 5577 %res = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhsvec) nounwind 5578 ret <2 x i64> %res 5579} 5580 5581define <2 x i64> @mull_from_extract_dup_high(<4 x i32> %lhs, i32 %rhs) { 5582; CHECK-LABEL: define <2 x i64> @mull_from_extract_dup_high( 5583; CHECK-SAME: <4 x i32> [[LHS:%.*]], i32 [[RHS:%.*]]) { 5584; CHECK-NEXT: call void @llvm.donothing() 5585; CHECK-NEXT: [[RHSVEC_TMP:%.*]] = insertelement <2 x i32> undef, i32 [[RHS]], i32 0 5586; CHECK-NEXT: [[RHSVEC:%.*]] = insertelement <2 x i32> [[RHSVEC_TMP]], i32 [[RHS]], i32 1 5587; CHECK-NEXT: [[LHS_HIGH:%.*]] = shufflevector <4 x i32> [[LHS]], <4 x i32> undef, <2 x i32> <i32 2, i32 3> 5588; CHECK-NEXT: [[RES:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[LHS_HIGH]], <2 x i32> [[RHSVEC]]) #[[ATTR7]] 5589; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 5590; CHECK-NEXT: ret <2 x i64> [[RES]] 5591; 5592 %rhsvec.tmp = insertelement <2 x i32> undef, i32 %rhs, i32 0 5593 %rhsvec = insertelement <2 x i32> %rhsvec.tmp, i32 %rhs, i32 1 5594 5595 %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 5596 5597 %res = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhsvec) nounwind 5598 ret <2 x i64> %res 5599} 5600 5601define <8 x i16> @pmull_from_extract_dup_low(<16 x i8> %lhs, i8 %rhs) { 5602; CHECK-LABEL: define <8 x i16> @pmull_from_extract_dup_low( 5603; CHECK-SAME: <16 x i8> [[LHS:%.*]], i8 [[RHS:%.*]]) { 5604; CHECK-NEXT: call void @llvm.donothing() 5605; CHECK-NEXT: [[RHSVEC_0:%.*]] = insertelement <8 x i8> undef, i8 [[RHS]], i32 0 5606; CHECK-NEXT: [[RHSVEC:%.*]] = shufflevector <8 x i8> [[RHSVEC_0]], <8 x i8> undef, <8 x i32> zeroinitializer 5607; CHECK-NEXT: [[LHS_HIGH:%.*]] = shufflevector <16 x i8> [[LHS]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 5608; CHECK-NEXT: [[RES:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> [[LHS_HIGH]], <8 x i8> [[RHSVEC]]) #[[ATTR7]] 5609; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 5610; CHECK-NEXT: ret <8 x i16> [[RES]] 5611; 5612 %rhsvec.0 = insertelement <8 x i8> undef, i8 %rhs, i32 0 5613 %rhsvec = shufflevector <8 x i8> %rhsvec.0, <8 x i8> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 5614 5615 %lhs.high = shufflevector <16 x i8> %lhs, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 5616 5617 %res = tail call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %lhs.high, <8 x i8> %rhsvec) nounwind 5618 ret <8 x i16> %res 5619} 5620 5621define <8 x i16> @pmull_from_extract_dup_high(<16 x i8> %lhs, i8 %rhs) { 5622; CHECK-LABEL: define <8 x i16> @pmull_from_extract_dup_high( 5623; CHECK-SAME: <16 x i8> [[LHS:%.*]], i8 [[RHS:%.*]]) { 5624; CHECK-NEXT: call void @llvm.donothing() 5625; CHECK-NEXT: [[RHSVEC_0:%.*]] = insertelement <8 x i8> undef, i8 [[RHS]], i32 0 5626; CHECK-NEXT: [[RHSVEC:%.*]] = shufflevector <8 x i8> [[RHSVEC_0]], <8 x i8> undef, <8 x i32> zeroinitializer 5627; CHECK-NEXT: [[LHS_HIGH:%.*]] = shufflevector <16 x i8> [[LHS]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 5628; CHECK-NEXT: [[RES:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> [[LHS_HIGH]], <8 x i8> [[RHSVEC]]) #[[ATTR7]] 5629; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 5630; CHECK-NEXT: ret <8 x i16> [[RES]] 5631; 5632 %rhsvec.0 = insertelement <8 x i8> undef, i8 %rhs, i32 0 5633 %rhsvec = shufflevector <8 x i8> %rhsvec.0, <8 x i8> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 5634 5635 %lhs.high = shufflevector <16 x i8> %lhs, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 5636 5637 %res = tail call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %lhs.high, <8 x i8> %rhsvec) nounwind 5638 ret <8 x i16> %res 5639} 5640 5641define <8 x i16> @pmull_from_extract_duplane_low(<16 x i8> %lhs, <8 x i8> %rhs) { 5642; CHECK-LABEL: define <8 x i16> @pmull_from_extract_duplane_low( 5643; CHECK-SAME: <16 x i8> [[LHS:%.*]], <8 x i8> [[RHS:%.*]]) { 5644; CHECK-NEXT: call void @llvm.donothing() 5645; CHECK-NEXT: [[LHS_HIGH:%.*]] = shufflevector <16 x i8> [[LHS]], <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 5646; CHECK-NEXT: [[RHS_HIGH:%.*]] = shufflevector <8 x i8> [[RHS]], <8 x i8> undef, <8 x i32> zeroinitializer 5647; CHECK-NEXT: [[RES:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> [[LHS_HIGH]], <8 x i8> [[RHS_HIGH]]) #[[ATTR7]] 5648; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 5649; CHECK-NEXT: ret <8 x i16> [[RES]] 5650; 5651 %lhs.high = shufflevector <16 x i8> %lhs, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 5652 %rhs.high = shufflevector <8 x i8> %rhs, <8 x i8> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 5653 5654 %res = tail call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %lhs.high, <8 x i8> %rhs.high) nounwind 5655 ret <8 x i16> %res 5656} 5657 5658define <8 x i16> @pmull_from_extract_duplane_high(<16 x i8> %lhs, <8 x i8> %rhs) { 5659; CHECK-LABEL: define <8 x i16> @pmull_from_extract_duplane_high( 5660; CHECK-SAME: <16 x i8> [[LHS:%.*]], <8 x i8> [[RHS:%.*]]) { 5661; CHECK-NEXT: call void @llvm.donothing() 5662; CHECK-NEXT: [[LHS_HIGH:%.*]] = shufflevector <16 x i8> [[LHS]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 5663; CHECK-NEXT: [[RHS_HIGH:%.*]] = shufflevector <8 x i8> [[RHS]], <8 x i8> undef, <8 x i32> zeroinitializer 5664; CHECK-NEXT: [[RES:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> [[LHS_HIGH]], <8 x i8> [[RHS_HIGH]]) #[[ATTR7]] 5665; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 5666; CHECK-NEXT: ret <8 x i16> [[RES]] 5667; 5668 %lhs.high = shufflevector <16 x i8> %lhs, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 5669 %rhs.high = shufflevector <8 x i8> %rhs, <8 x i8> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 5670 5671 %res = tail call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %lhs.high, <8 x i8> %rhs.high) nounwind 5672 ret <8 x i16> %res 5673} 5674 5675define <2 x i64> @sqdmull_from_extract_duplane_low(<4 x i32> %lhs, <4 x i32> %rhs) { 5676; CHECK-LABEL: define <2 x i64> @sqdmull_from_extract_duplane_low( 5677; CHECK-SAME: <4 x i32> [[LHS:%.*]], <4 x i32> [[RHS:%.*]]) { 5678; CHECK-NEXT: call void @llvm.donothing() 5679; CHECK-NEXT: [[LHS_HIGH:%.*]] = shufflevector <4 x i32> [[LHS]], <4 x i32> undef, <2 x i32> <i32 0, i32 1> 5680; CHECK-NEXT: [[RHS_HIGH:%.*]] = shufflevector <4 x i32> [[RHS]], <4 x i32> undef, <2 x i32> zeroinitializer 5681; CHECK-NEXT: [[RES:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[LHS_HIGH]], <2 x i32> [[RHS_HIGH]]) #[[ATTR7]] 5682; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 5683; CHECK-NEXT: ret <2 x i64> [[RES]] 5684; 5685 %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 0, i32 1> 5686 %rhs.high = shufflevector <4 x i32> %rhs, <4 x i32> undef, <2 x i32> <i32 0, i32 0> 5687 5688 %res = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhs.high) nounwind 5689 ret <2 x i64> %res 5690} 5691 5692define <2 x i64> @sqdmull_from_extract_duplane_high(<4 x i32> %lhs, <4 x i32> %rhs) { 5693; CHECK-LABEL: define <2 x i64> @sqdmull_from_extract_duplane_high( 5694; CHECK-SAME: <4 x i32> [[LHS:%.*]], <4 x i32> [[RHS:%.*]]) { 5695; CHECK-NEXT: call void @llvm.donothing() 5696; CHECK-NEXT: [[LHS_HIGH:%.*]] = shufflevector <4 x i32> [[LHS]], <4 x i32> undef, <2 x i32> <i32 2, i32 3> 5697; CHECK-NEXT: [[RHS_HIGH:%.*]] = shufflevector <4 x i32> [[RHS]], <4 x i32> undef, <2 x i32> zeroinitializer 5698; CHECK-NEXT: [[RES:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[LHS_HIGH]], <2 x i32> [[RHS_HIGH]]) #[[ATTR7]] 5699; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 5700; CHECK-NEXT: ret <2 x i64> [[RES]] 5701; 5702 %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 5703 %rhs.high = shufflevector <4 x i32> %rhs, <4 x i32> undef, <2 x i32> <i32 0, i32 0> 5704 5705 %res = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhs.high) nounwind 5706 ret <2 x i64> %res 5707} 5708 5709define <2 x i64> @sqdmlal_from_extract_duplane_low(<2 x i64> %accum, <4 x i32> %lhs, <4 x i32> %rhs) { 5710; CHECK-LABEL: define <2 x i64> @sqdmlal_from_extract_duplane_low( 5711; CHECK-SAME: <2 x i64> [[ACCUM:%.*]], <4 x i32> [[LHS:%.*]], <4 x i32> [[RHS:%.*]]) { 5712; CHECK-NEXT: call void @llvm.donothing() 5713; CHECK-NEXT: [[LHS_HIGH:%.*]] = shufflevector <4 x i32> [[LHS]], <4 x i32> undef, <2 x i32> <i32 0, i32 1> 5714; CHECK-NEXT: [[RHS_HIGH:%.*]] = shufflevector <4 x i32> [[RHS]], <4 x i32> undef, <2 x i32> zeroinitializer 5715; CHECK-NEXT: [[RES:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[LHS_HIGH]], <2 x i32> [[RHS_HIGH]]) #[[ATTR7]] 5716; CHECK-NEXT: [[SUM:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[ACCUM]], <2 x i64> [[RES]]) 5717; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 5718; CHECK-NEXT: ret <2 x i64> [[SUM]] 5719; 5720 %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 0, i32 1> 5721 %rhs.high = shufflevector <4 x i32> %rhs, <4 x i32> undef, <2 x i32> <i32 0, i32 0> 5722 5723 %res = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhs.high) nounwind 5724 %sum = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %accum, <2 x i64> %res) 5725 ret <2 x i64> %sum 5726} 5727 5728define <2 x i64> @sqdmlal_from_extract_duplane_high(<2 x i64> %accum, <4 x i32> %lhs, <4 x i32> %rhs) { 5729; CHECK-LABEL: define <2 x i64> @sqdmlal_from_extract_duplane_high( 5730; CHECK-SAME: <2 x i64> [[ACCUM:%.*]], <4 x i32> [[LHS:%.*]], <4 x i32> [[RHS:%.*]]) { 5731; CHECK-NEXT: call void @llvm.donothing() 5732; CHECK-NEXT: [[LHS_HIGH:%.*]] = shufflevector <4 x i32> [[LHS]], <4 x i32> undef, <2 x i32> <i32 2, i32 3> 5733; CHECK-NEXT: [[RHS_HIGH:%.*]] = shufflevector <4 x i32> [[RHS]], <4 x i32> undef, <2 x i32> zeroinitializer 5734; CHECK-NEXT: [[RES:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[LHS_HIGH]], <2 x i32> [[RHS_HIGH]]) #[[ATTR7]] 5735; CHECK-NEXT: [[SUM:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> [[ACCUM]], <2 x i64> [[RES]]) 5736; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 5737; CHECK-NEXT: ret <2 x i64> [[SUM]] 5738; 5739 %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 5740 %rhs.high = shufflevector <4 x i32> %rhs, <4 x i32> undef, <2 x i32> <i32 0, i32 0> 5741 5742 %res = tail call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhs.high) nounwind 5743 %sum = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %accum, <2 x i64> %res) 5744 ret <2 x i64> %sum 5745} 5746 5747define <2 x i64> @umlal_from_extract_duplane_low(<2 x i64> %accum, <4 x i32> %lhs, <4 x i32> %rhs) { 5748; CHECK-LABEL: define <2 x i64> @umlal_from_extract_duplane_low( 5749; CHECK-SAME: <2 x i64> [[ACCUM:%.*]], <4 x i32> [[LHS:%.*]], <4 x i32> [[RHS:%.*]]) { 5750; CHECK-NEXT: call void @llvm.donothing() 5751; CHECK-NEXT: [[LHS_HIGH:%.*]] = shufflevector <4 x i32> [[LHS]], <4 x i32> undef, <2 x i32> <i32 0, i32 1> 5752; CHECK-NEXT: [[RHS_HIGH:%.*]] = shufflevector <4 x i32> [[RHS]], <4 x i32> undef, <2 x i32> zeroinitializer 5753; CHECK-NEXT: [[RES:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[LHS_HIGH]], <2 x i32> [[RHS_HIGH]]) #[[ATTR7]] 5754; CHECK-NEXT: [[SUM:%.*]] = add <2 x i64> [[ACCUM]], [[RES]] 5755; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 5756; CHECK-NEXT: ret <2 x i64> [[SUM]] 5757; 5758 %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 0, i32 1> 5759 %rhs.high = shufflevector <4 x i32> %rhs, <4 x i32> undef, <2 x i32> <i32 0, i32 0> 5760 5761 %res = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhs.high) nounwind 5762 %sum = add <2 x i64> %accum, %res 5763 ret <2 x i64> %sum 5764} 5765 5766define <2 x i64> @umlal_from_extract_duplane_high(<2 x i64> %accum, <4 x i32> %lhs, <4 x i32> %rhs) { 5767; CHECK-LABEL: define <2 x i64> @umlal_from_extract_duplane_high( 5768; CHECK-SAME: <2 x i64> [[ACCUM:%.*]], <4 x i32> [[LHS:%.*]], <4 x i32> [[RHS:%.*]]) { 5769; CHECK-NEXT: call void @llvm.donothing() 5770; CHECK-NEXT: [[LHS_HIGH:%.*]] = shufflevector <4 x i32> [[LHS]], <4 x i32> undef, <2 x i32> <i32 2, i32 3> 5771; CHECK-NEXT: [[RHS_HIGH:%.*]] = shufflevector <4 x i32> [[RHS]], <4 x i32> undef, <2 x i32> zeroinitializer 5772; CHECK-NEXT: [[RES:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[LHS_HIGH]], <2 x i32> [[RHS_HIGH]]) #[[ATTR7]] 5773; CHECK-NEXT: [[SUM:%.*]] = add <2 x i64> [[ACCUM]], [[RES]] 5774; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 5775; CHECK-NEXT: ret <2 x i64> [[SUM]] 5776; 5777 %lhs.high = shufflevector <4 x i32> %lhs, <4 x i32> undef, <2 x i32> <i32 2, i32 3> 5778 %rhs.high = shufflevector <4 x i32> %rhs, <4 x i32> undef, <2 x i32> <i32 0, i32 0> 5779 5780 %res = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %lhs.high, <2 x i32> %rhs.high) nounwind 5781 %sum = add <2 x i64> %accum, %res 5782 ret <2 x i64> %sum 5783} 5784 5785define float @scalar_fmla_from_extract_v4f32(float %accum, float %lhs, <4 x float> %rvec) { 5786; CHECK-LABEL: define float @scalar_fmla_from_extract_v4f32( 5787; CHECK-SAME: float [[ACCUM:%.*]], float [[LHS:%.*]], <4 x float> [[RVEC:%.*]]) { 5788; CHECK-NEXT: call void @llvm.donothing() 5789; CHECK-NEXT: [[RHS:%.*]] = extractelement <4 x float> [[RVEC]], i32 3 5790; CHECK-NEXT: [[RES:%.*]] = call float @llvm.fma.f32(float [[LHS]], float [[RHS]], float [[ACCUM]]) 5791; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 5792; CHECK-NEXT: ret float [[RES]] 5793; 5794 %rhs = extractelement <4 x float> %rvec, i32 3 5795 %res = call float @llvm.fma.f32(float %lhs, float %rhs, float %accum) 5796 ret float %res 5797} 5798 5799define float @scalar_fmla_from_extract_v2f32(float %accum, float %lhs, <2 x float> %rvec) { 5800; CHECK-LABEL: define float @scalar_fmla_from_extract_v2f32( 5801; CHECK-SAME: float [[ACCUM:%.*]], float [[LHS:%.*]], <2 x float> [[RVEC:%.*]]) { 5802; CHECK-NEXT: call void @llvm.donothing() 5803; CHECK-NEXT: [[RHS:%.*]] = extractelement <2 x float> [[RVEC]], i32 1 5804; CHECK-NEXT: [[RES:%.*]] = call float @llvm.fma.f32(float [[LHS]], float [[RHS]], float [[ACCUM]]) 5805; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 5806; CHECK-NEXT: ret float [[RES]] 5807; 5808 %rhs = extractelement <2 x float> %rvec, i32 1 5809 %res = call float @llvm.fma.f32(float %lhs, float %rhs, float %accum) 5810 ret float %res 5811} 5812 5813define float @scalar_fmls_from_extract_v4f32(float %accum, float %lhs, <4 x float> %rvec) { 5814; CHECK-LABEL: define float @scalar_fmls_from_extract_v4f32( 5815; CHECK-SAME: float [[ACCUM:%.*]], float [[LHS:%.*]], <4 x float> [[RVEC:%.*]]) { 5816; CHECK-NEXT: call void @llvm.donothing() 5817; CHECK-NEXT: [[RHS_SCAL:%.*]] = extractelement <4 x float> [[RVEC]], i32 3 5818; CHECK-NEXT: [[RHS:%.*]] = fsub float -0.000000e+00, [[RHS_SCAL]] 5819; CHECK-NEXT: [[RES:%.*]] = call float @llvm.fma.f32(float [[LHS]], float [[RHS]], float [[ACCUM]]) 5820; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 5821; CHECK-NEXT: ret float [[RES]] 5822; 5823 %rhs.scal = extractelement <4 x float> %rvec, i32 3 5824 %rhs = fsub float -0.0, %rhs.scal 5825 %res = call float @llvm.fma.f32(float %lhs, float %rhs, float %accum) 5826 ret float %res 5827} 5828 5829define float @scalar_fmls_from_extract_v2f32(float %accum, float %lhs, <2 x float> %rvec) { 5830; CHECK-LABEL: define float @scalar_fmls_from_extract_v2f32( 5831; CHECK-SAME: float [[ACCUM:%.*]], float [[LHS:%.*]], <2 x float> [[RVEC:%.*]]) { 5832; CHECK-NEXT: call void @llvm.donothing() 5833; CHECK-NEXT: [[RHS_SCAL:%.*]] = extractelement <2 x float> [[RVEC]], i32 1 5834; CHECK-NEXT: [[RHS:%.*]] = fsub float -0.000000e+00, [[RHS_SCAL]] 5835; CHECK-NEXT: [[RES:%.*]] = call float @llvm.fma.f32(float [[LHS]], float [[RHS]], float [[ACCUM]]) 5836; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 5837; CHECK-NEXT: ret float [[RES]] 5838; 5839 %rhs.scal = extractelement <2 x float> %rvec, i32 1 5840 %rhs = fsub float -0.0, %rhs.scal 5841 %res = call float @llvm.fma.f32(float %lhs, float %rhs, float %accum) 5842 ret float %res 5843} 5844 5845declare float @llvm.fma.f32(float, float, float) 5846 5847define double @scalar_fmla_from_extract_v2f64(double %accum, double %lhs, <2 x double> %rvec) { 5848; CHECK-LABEL: define double @scalar_fmla_from_extract_v2f64( 5849; CHECK-SAME: double [[ACCUM:%.*]], double [[LHS:%.*]], <2 x double> [[RVEC:%.*]]) { 5850; CHECK-NEXT: call void @llvm.donothing() 5851; CHECK-NEXT: [[RHS:%.*]] = extractelement <2 x double> [[RVEC]], i32 1 5852; CHECK-NEXT: [[RES:%.*]] = call double @llvm.fma.f64(double [[LHS]], double [[RHS]], double [[ACCUM]]) 5853; CHECK-NEXT: store i64 0, ptr @__msan_retval_tls, align 8 5854; CHECK-NEXT: ret double [[RES]] 5855; 5856 %rhs = extractelement <2 x double> %rvec, i32 1 5857 %res = call double @llvm.fma.f64(double %lhs, double %rhs, double %accum) 5858 ret double %res 5859} 5860 5861define double @scalar_fmls_from_extract_v2f64(double %accum, double %lhs, <2 x double> %rvec) { 5862; CHECK-LABEL: define double @scalar_fmls_from_extract_v2f64( 5863; CHECK-SAME: double [[ACCUM:%.*]], double [[LHS:%.*]], <2 x double> [[RVEC:%.*]]) { 5864; CHECK-NEXT: call void @llvm.donothing() 5865; CHECK-NEXT: [[RHS_SCAL:%.*]] = extractelement <2 x double> [[RVEC]], i32 1 5866; CHECK-NEXT: [[RHS:%.*]] = fsub double -0.000000e+00, [[RHS_SCAL]] 5867; CHECK-NEXT: [[RES:%.*]] = call double @llvm.fma.f64(double [[LHS]], double [[RHS]], double [[ACCUM]]) 5868; CHECK-NEXT: store i64 0, ptr @__msan_retval_tls, align 8 5869; CHECK-NEXT: ret double [[RES]] 5870; 5871 %rhs.scal = extractelement <2 x double> %rvec, i32 1 5872 %rhs = fsub double -0.0, %rhs.scal 5873 %res = call double @llvm.fma.f64(double %lhs, double %rhs, double %accum) 5874 ret double %res 5875} 5876 5877declare double @llvm.fma.f64(double, double, double) 5878 5879define <2 x float> @fmls_with_fneg_before_extract_v2f32(<2 x float> %accum, <2 x float> %lhs, <4 x float> %rhs) { 5880; CHECK-LABEL: define <2 x float> @fmls_with_fneg_before_extract_v2f32( 5881; CHECK-SAME: <2 x float> [[ACCUM:%.*]], <2 x float> [[LHS:%.*]], <4 x float> [[RHS:%.*]]) { 5882; CHECK-NEXT: call void @llvm.donothing() 5883; CHECK-NEXT: [[RHS_NEG:%.*]] = fsub <4 x float> splat (float -0.000000e+00), [[RHS]] 5884; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x float> [[RHS_NEG]], <4 x float> undef, <2 x i32> <i32 3, i32 3> 5885; CHECK-NEXT: [[RES:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[LHS]], <2 x float> [[SPLAT]], <2 x float> [[ACCUM]]) 5886; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 5887; CHECK-NEXT: ret <2 x float> [[RES]] 5888; 5889 %rhs_neg = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %rhs 5890 %splat = shufflevector <4 x float> %rhs_neg, <4 x float> undef, <2 x i32> <i32 3, i32 3> 5891 %res = call <2 x float> @llvm.fma.v2f32(<2 x float> %lhs, <2 x float> %splat, <2 x float> %accum) 5892 ret <2 x float> %res 5893} 5894 5895define <2 x float> @fmls_with_fneg_before_extract_v2f32_1(<2 x float> %accum, <2 x float> %lhs, <2 x float> %rhs) { 5896; CHECK-LABEL: define <2 x float> @fmls_with_fneg_before_extract_v2f32_1( 5897; CHECK-SAME: <2 x float> [[ACCUM:%.*]], <2 x float> [[LHS:%.*]], <2 x float> [[RHS:%.*]]) { 5898; CHECK-NEXT: call void @llvm.donothing() 5899; CHECK-NEXT: [[RHS_NEG:%.*]] = fsub <2 x float> splat (float -0.000000e+00), [[RHS]] 5900; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <2 x float> [[RHS_NEG]], <2 x float> undef, <2 x i32> <i32 1, i32 1> 5901; CHECK-NEXT: [[RES:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[LHS]], <2 x float> [[SPLAT]], <2 x float> [[ACCUM]]) 5902; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 5903; CHECK-NEXT: ret <2 x float> [[RES]] 5904; 5905 %rhs_neg = fsub <2 x float> <float -0.0, float -0.0>, %rhs 5906 %splat = shufflevector <2 x float> %rhs_neg, <2 x float> undef, <2 x i32> <i32 1, i32 1> 5907 %res = call <2 x float> @llvm.fma.v2f32(<2 x float> %lhs, <2 x float> %splat, <2 x float> %accum) 5908 ret <2 x float> %res 5909} 5910 5911define <4 x float> @fmls_with_fneg_before_extract_v4f32(<4 x float> %accum, <4 x float> %lhs, <4 x float> %rhs) { 5912; CHECK-LABEL: define <4 x float> @fmls_with_fneg_before_extract_v4f32( 5913; CHECK-SAME: <4 x float> [[ACCUM:%.*]], <4 x float> [[LHS:%.*]], <4 x float> [[RHS:%.*]]) { 5914; CHECK-NEXT: call void @llvm.donothing() 5915; CHECK-NEXT: [[RHS_NEG:%.*]] = fsub <4 x float> splat (float -0.000000e+00), [[RHS]] 5916; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x float> [[RHS_NEG]], <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 5917; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[LHS]], <4 x float> [[SPLAT]], <4 x float> [[ACCUM]]) 5918; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 5919; CHECK-NEXT: ret <4 x float> [[RES]] 5920; 5921 %rhs_neg = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %rhs 5922 %splat = shufflevector <4 x float> %rhs_neg, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 5923 %res = call <4 x float> @llvm.fma.v4f32(<4 x float> %lhs, <4 x float> %splat, <4 x float> %accum) 5924 ret <4 x float> %res 5925} 5926 5927define <4 x float> @fmls_with_fneg_before_extract_v4f32_1(<4 x float> %accum, <4 x float> %lhs, <2 x float> %rhs) { 5928; CHECK-LABEL: define <4 x float> @fmls_with_fneg_before_extract_v4f32_1( 5929; CHECK-SAME: <4 x float> [[ACCUM:%.*]], <4 x float> [[LHS:%.*]], <2 x float> [[RHS:%.*]]) { 5930; CHECK-NEXT: call void @llvm.donothing() 5931; CHECK-NEXT: [[RHS_NEG:%.*]] = fsub <2 x float> splat (float -0.000000e+00), [[RHS]] 5932; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <2 x float> [[RHS_NEG]], <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 5933; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[LHS]], <4 x float> [[SPLAT]], <4 x float> [[ACCUM]]) 5934; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 5935; CHECK-NEXT: ret <4 x float> [[RES]] 5936; 5937 %rhs_neg = fsub <2 x float> <float -0.0, float -0.0>, %rhs 5938 %splat = shufflevector <2 x float> %rhs_neg, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 5939 %res = call <4 x float> @llvm.fma.v4f32(<4 x float> %lhs, <4 x float> %splat, <4 x float> %accum) 5940 ret <4 x float> %res 5941} 5942 5943define <2 x double> @fmls_with_fneg_before_extract_v2f64(<2 x double> %accum, <2 x double> %lhs, <2 x double> %rhs) { 5944; CHECK-LABEL: define <2 x double> @fmls_with_fneg_before_extract_v2f64( 5945; CHECK-SAME: <2 x double> [[ACCUM:%.*]], <2 x double> [[LHS:%.*]], <2 x double> [[RHS:%.*]]) { 5946; CHECK-NEXT: call void @llvm.donothing() 5947; CHECK-NEXT: [[RHS_NEG:%.*]] = fsub <2 x double> splat (double -0.000000e+00), [[RHS]] 5948; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <2 x double> [[RHS_NEG]], <2 x double> undef, <2 x i32> <i32 1, i32 1> 5949; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[LHS]], <2 x double> [[SPLAT]], <2 x double> [[ACCUM]]) 5950; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 5951; CHECK-NEXT: ret <2 x double> [[RES]] 5952; 5953 %rhs_neg = fsub <2 x double> <double -0.0, double -0.0>, %rhs 5954 %splat = shufflevector <2 x double> %rhs_neg, <2 x double> undef, <2 x i32> <i32 1, i32 1> 5955 %res = call <2 x double> @llvm.fma.v2f64(<2 x double> %lhs, <2 x double> %splat, <2 x double> %accum) 5956 ret <2 x double> %res 5957} 5958 5959define <1 x double> @test_fmul_v1f64(<1 x double> %L, <1 x double> %R) nounwind sanitize_memory { 5960; CHECK-LABEL: define <1 x double> @test_fmul_v1f64( 5961; CHECK-SAME: <1 x double> [[L:%.*]], <1 x double> [[R:%.*]]) #[[ATTR0]] { 5962; CHECK-NEXT: [[TMP1:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 5963; CHECK-NEXT: [[TMP2:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 5964; CHECK-NEXT: call void @llvm.donothing() 5965; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP1]], [[TMP2]] 5966; CHECK-NEXT: [[PROD:%.*]] = fmul <1 x double> [[L]], [[R]] 5967; CHECK-NEXT: store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 5968; CHECK-NEXT: ret <1 x double> [[PROD]] 5969; 5970 %prod = fmul <1 x double> %L, %R 5971 ret <1 x double> %prod 5972} 5973 5974define <1 x double> @test_fdiv_v1f64(<1 x double> %L, <1 x double> %R) nounwind sanitize_memory { 5975; CHECK-LABEL: define <1 x double> @test_fdiv_v1f64( 5976; CHECK-SAME: <1 x double> [[L:%.*]], <1 x double> [[R:%.*]]) #[[ATTR0]] { 5977; CHECK-NEXT: [[TMP1:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 5978; CHECK-NEXT: [[TMP2:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 5979; CHECK-NEXT: call void @llvm.donothing() 5980; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP1]], [[TMP2]] 5981; CHECK-NEXT: [[PROD:%.*]] = fdiv <1 x double> [[L]], [[R]] 5982; CHECK-NEXT: store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 5983; CHECK-NEXT: ret <1 x double> [[PROD]] 5984; 5985 %prod = fdiv <1 x double> %L, %R 5986 ret <1 x double> %prod 5987} 5988 5989define i32 @sqdmlal_s(i16 %A, i16 %B, i32 %C) nounwind sanitize_memory { 5990; CHECK-LABEL: define i32 @sqdmlal_s( 5991; CHECK-SAME: i16 [[A:%.*]], i16 [[B:%.*]], i32 [[C:%.*]]) #[[ATTR0]] { 5992; CHECK-NEXT: [[TMP6:%.*]] = load i16, ptr @__msan_param_tls, align 8 5993; CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 5994; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 5995; CHECK-NEXT: call void @llvm.donothing() 5996; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i16> splat (i16 -1), i16 [[TMP6]], i64 0 5997; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 [[A]], i64 0 5998; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <4 x i16> splat (i16 -1), i16 [[TMP7]], i64 0 5999; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 [[B]], i64 0 6000; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i16> [[_MSPROP]] to i64 6001; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP9]], 0 6002; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i16> [[_MSPROP1]] to i64 6003; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP10]], 0 6004; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP3]] 6005; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] 6006; CHECK: [[BB6]]: 6007; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 6008; CHECK-NEXT: unreachable 6009; CHECK: [[BB7]]: 6010; CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP1]], <4 x i16> [[TMP2]]) 6011; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 6012; CHECK-NEXT: [[_MSPROP2:%.*]] = or i32 [[TMP8]], 0 6013; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.sqadd.i32(i32 [[C]], i32 [[TMP4]]) 6014; CHECK-NEXT: store i32 [[_MSPROP2]], ptr @__msan_retval_tls, align 8 6015; CHECK-NEXT: ret i32 [[TMP5]] 6016; 6017 %temp1 = insertelement <4 x i16> undef, i16 %A, i64 0 6018 %temp2 = insertelement <4 x i16> undef, i16 %B, i64 0 6019 %temp3 = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %temp1, <4 x i16> %temp2) 6020 %temp4 = extractelement <4 x i32> %temp3, i64 0 6021 %temp5 = tail call i32 @llvm.aarch64.neon.sqadd.i32(i32 %C, i32 %temp4) 6022 ret i32 %temp5 6023} 6024 6025define i64 @sqdmlal_d(i32 %A, i32 %B, i64 %C) nounwind sanitize_memory { 6026; CHECK-LABEL: define i64 @sqdmlal_d( 6027; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]], i64 [[C:%.*]]) #[[ATTR0]] { 6028; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8 6029; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 6030; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 6031; CHECK-NEXT: call void @llvm.donothing() 6032; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0 6033; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[TMP2]], 0 6034; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] 6035; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB4:.*]], label %[[BB5:.*]], !prof [[PROF1]] 6036; CHECK: [[BB4]]: 6037; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 6038; CHECK-NEXT: unreachable 6039; CHECK: [[BB5]]: 6040; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 [[A]], i32 [[B]]) 6041; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP3]], 0 6042; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.aarch64.neon.sqadd.i64(i64 [[C]], i64 [[TMP4]]) 6043; CHECK-NEXT: store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8 6044; CHECK-NEXT: ret i64 [[TMP5]] 6045; 6046 %temp4 = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %A, i32 %B) 6047 %temp5 = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %C, i64 %temp4) 6048 ret i64 %temp5 6049} 6050 6051define i32 @sqdmlsl_s(i16 %A, i16 %B, i32 %C) nounwind sanitize_memory { 6052; CHECK-LABEL: define i32 @sqdmlsl_s( 6053; CHECK-SAME: i16 [[A:%.*]], i16 [[B:%.*]], i32 [[C:%.*]]) #[[ATTR0]] { 6054; CHECK-NEXT: [[TMP6:%.*]] = load i16, ptr @__msan_param_tls, align 8 6055; CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 6056; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 6057; CHECK-NEXT: call void @llvm.donothing() 6058; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i16> splat (i16 -1), i16 [[TMP6]], i64 0 6059; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> undef, i16 [[A]], i64 0 6060; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <4 x i16> splat (i16 -1), i16 [[TMP7]], i64 0 6061; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 [[B]], i64 0 6062; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i16> [[_MSPROP]] to i64 6063; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP9]], 0 6064; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i16> [[_MSPROP1]] to i64 6065; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP10]], 0 6066; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP3]] 6067; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]] 6068; CHECK: [[BB6]]: 6069; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 6070; CHECK-NEXT: unreachable 6071; CHECK: [[BB7]]: 6072; CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP1]], <4 x i16> [[TMP2]]) 6073; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 6074; CHECK-NEXT: [[_MSPROP2:%.*]] = or i32 [[TMP8]], 0 6075; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.sqsub.i32(i32 [[C]], i32 [[TMP4]]) 6076; CHECK-NEXT: store i32 [[_MSPROP2]], ptr @__msan_retval_tls, align 8 6077; CHECK-NEXT: ret i32 [[TMP5]] 6078; 6079 %temp1 = insertelement <4 x i16> undef, i16 %A, i64 0 6080 %temp2 = insertelement <4 x i16> undef, i16 %B, i64 0 6081 %temp3 = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %temp1, <4 x i16> %temp2) 6082 %temp4 = extractelement <4 x i32> %temp3, i64 0 6083 %temp5 = tail call i32 @llvm.aarch64.neon.sqsub.i32(i32 %C, i32 %temp4) 6084 ret i32 %temp5 6085} 6086 6087define i64 @sqdmlsl_d(i32 %A, i32 %B, i64 %C) nounwind sanitize_memory { 6088; CHECK-LABEL: define i64 @sqdmlsl_d( 6089; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]], i64 [[C:%.*]]) #[[ATTR0]] { 6090; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8 6091; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 6092; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 6093; CHECK-NEXT: call void @llvm.donothing() 6094; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0 6095; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[TMP2]], 0 6096; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] 6097; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB4:.*]], label %[[BB5:.*]], !prof [[PROF1]] 6098; CHECK: [[BB4]]: 6099; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] 6100; CHECK-NEXT: unreachable 6101; CHECK: [[BB5]]: 6102; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 [[A]], i32 [[B]]) 6103; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP3]], 0 6104; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.aarch64.neon.sqsub.i64(i64 [[C]], i64 [[TMP4]]) 6105; CHECK-NEXT: store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8 6106; CHECK-NEXT: ret i64 [[TMP5]] 6107; 6108 %temp4 = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %A, i32 %B) 6109 %temp5 = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %C, i64 %temp4) 6110 ret i64 %temp5 6111} 6112 6113define <16 x i8> @test_pmull_64(i64 %l, i64 %r) nounwind sanitize_memory { 6114; CHECK-LABEL: define <16 x i8> @test_pmull_64( 6115; CHECK-SAME: i64 [[L:%.*]], i64 [[R:%.*]]) #[[ATTR0]] { 6116; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 6117; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 6118; CHECK-NEXT: call void @llvm.donothing() 6119; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP1]], [[TMP2]] 6120; CHECK-NEXT: [[_MSPROP1:%.*]] = or i64 [[_MSPROP]], 0 6121; CHECK-NEXT: [[TMP3:%.*]] = zext i64 [[_MSPROP1]] to i128 6122; CHECK-NEXT: [[TMP4:%.*]] = bitcast i128 [[TMP3]] to <16 x i8> 6123; CHECK-NEXT: [[VAL:%.*]] = call <16 x i8> @llvm.aarch64.neon.pmull64(i64 [[L]], i64 [[R]]) 6124; CHECK-NEXT: store <16 x i8> [[TMP4]], ptr @__msan_retval_tls, align 8 6125; CHECK-NEXT: ret <16 x i8> [[VAL]] 6126; 6127 %val = call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %l, i64 %r) 6128 ret <16 x i8> %val 6129} 6130 6131define <16 x i8> @test_pmull_high_64(<2 x i64> %l, <2 x i64> %r) nounwind sanitize_memory { 6132; CHECK-LABEL: define <16 x i8> @test_pmull_high_64( 6133; CHECK-SAME: <2 x i64> [[L:%.*]], <2 x i64> [[R:%.*]]) #[[ATTR0]] { 6134; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 6135; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 6136; CHECK-NEXT: call void @llvm.donothing() 6137; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 6138; CHECK-NEXT: [[L_HI:%.*]] = extractelement <2 x i64> [[L]], i32 1 6139; CHECK-NEXT: [[_MSPROP1:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1 6140; CHECK-NEXT: [[R_HI:%.*]] = extractelement <2 x i64> [[R]], i32 1 6141; CHECK-NEXT: [[_MSPROP2:%.*]] = or i64 [[_MSPROP]], [[_MSPROP1]] 6142; CHECK-NEXT: [[_MSPROP3:%.*]] = or i64 [[_MSPROP2]], 0 6143; CHECK-NEXT: [[TMP3:%.*]] = zext i64 [[_MSPROP3]] to i128 6144; CHECK-NEXT: [[TMP4:%.*]] = bitcast i128 [[TMP3]] to <16 x i8> 6145; CHECK-NEXT: [[VAL:%.*]] = call <16 x i8> @llvm.aarch64.neon.pmull64(i64 [[L_HI]], i64 [[R_HI]]) 6146; CHECK-NEXT: store <16 x i8> [[TMP4]], ptr @__msan_retval_tls, align 8 6147; CHECK-NEXT: ret <16 x i8> [[VAL]] 6148; 6149 %l_hi = extractelement <2 x i64> %l, i32 1 6150 %r_hi = extractelement <2 x i64> %r, i32 1 6151 %val = call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %l_hi, i64 %r_hi) 6152 ret <16 x i8> %val 6153} 6154 6155declare <16 x i8> @llvm.aarch64.neon.pmull64(i64, i64) 6156 6157define <1 x i64> @test_mul_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) nounwind sanitize_memory { 6158; CHECK-LABEL: define <1 x i64> @test_mul_v1i64( 6159; CHECK-SAME: <1 x i64> [[LHS:%.*]], <1 x i64> [[RHS:%.*]]) #[[ATTR0]] { 6160; CHECK-NEXT: [[TMP1:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 6161; CHECK-NEXT: [[TMP2:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 6162; CHECK-NEXT: call void @llvm.donothing() 6163; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP1]], [[TMP2]] 6164; CHECK-NEXT: [[PROD:%.*]] = mul <1 x i64> [[LHS]], [[RHS]] 6165; CHECK-NEXT: store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 6166; CHECK-NEXT: ret <1 x i64> [[PROD]] 6167; 6168 %prod = mul <1 x i64> %lhs, %rhs 6169 ret <1 x i64> %prod 6170} 6171;. 6172; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575} 6173;. 6174