1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes 2; RUN: opt %s -S -passes=msan 2>&1 | FileCheck %s 3 4target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" 5target triple = "x86_64-unknown-linux-gnu" 6 7define <2 x double> @test_x86_sse41_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 { 8; CHECK-LABEL: @test_x86_sse41_blendvpd( 9; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 10; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 11; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 12; CHECK-NEXT: call void @llvm.donothing() 13; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[A2:%.*]] to <2 x i64> 14; CHECK-NEXT: [[TMP5:%.*]] = ashr <2 x i64> [[TMP4]], splat (i64 63) 15; CHECK-NEXT: [[TMP6:%.*]] = trunc <2 x i64> [[TMP5]] to <2 x i1> 16; CHECK-NEXT: [[TMP7:%.*]] = ashr <2 x i64> [[TMP1]], splat (i64 63) 17; CHECK-NEXT: [[TMP8:%.*]] = trunc <2 x i64> [[TMP7]] to <2 x i1> 18; CHECK-NEXT: [[TMP9:%.*]] = select <2 x i1> [[TMP6]], <2 x i64> [[TMP2]], <2 x i64> [[TMP3]] 19; CHECK-NEXT: [[TMP10:%.*]] = bitcast <2 x double> [[A1:%.*]] to <2 x i64> 20; CHECK-NEXT: [[TMP11:%.*]] = bitcast <2 x double> [[A0:%.*]] to <2 x i64> 21; CHECK-NEXT: [[TMP12:%.*]] = xor <2 x i64> [[TMP10]], [[TMP11]] 22; CHECK-NEXT: [[TMP13:%.*]] = or <2 x i64> [[TMP12]], [[TMP2]] 23; CHECK-NEXT: [[TMP14:%.*]] = or <2 x i64> [[TMP13]], [[TMP3]] 24; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <2 x i1> [[TMP8]], <2 x i64> [[TMP14]], <2 x i64> [[TMP9]] 25; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> [[A0]], <2 x double> [[A1]], <2 x double> [[A2]]) 26; CHECK-NEXT: store <2 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 27; CHECK-NEXT: ret <2 x double> [[RES]] 28; 29 %res = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) ; <<2 x double>> [#uses=1] 30 ret <2 x double> %res 31} 32declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone 33 34 35define <4 x float> @test_x86_sse41_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 36; CHECK-LABEL: @test_x86_sse41_blendvps( 37; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 38; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 39; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 40; CHECK-NEXT: call void @llvm.donothing() 41; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[A2:%.*]] to <4 x i32> 42; CHECK-NEXT: [[TMP5:%.*]] = ashr <4 x i32> [[TMP4]], splat (i32 31) 43; CHECK-NEXT: [[TMP6:%.*]] = trunc <4 x i32> [[TMP5]] to <4 x i1> 44; CHECK-NEXT: [[TMP7:%.*]] = ashr <4 x i32> [[TMP1]], splat (i32 31) 45; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i32> [[TMP7]] to <4 x i1> 46; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP6]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]] 47; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x float> [[A1:%.*]] to <4 x i32> 48; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x float> [[A0:%.*]] to <4 x i32> 49; CHECK-NEXT: [[TMP12:%.*]] = xor <4 x i32> [[TMP10]], [[TMP11]] 50; CHECK-NEXT: [[TMP13:%.*]] = or <4 x i32> [[TMP12]], [[TMP2]] 51; CHECK-NEXT: [[TMP14:%.*]] = or <4 x i32> [[TMP13]], [[TMP3]] 52; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <4 x i1> [[TMP8]], <4 x i32> [[TMP14]], <4 x i32> [[TMP9]] 53; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> [[A0]], <4 x float> [[A1]], <4 x float> [[A2]]) 54; CHECK-NEXT: store <4 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 55; CHECK-NEXT: ret <4 x float> [[RES]] 56; 57 %res = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) ; <<4 x float>> [#uses=1] 58 ret <4 x float> %res 59} 60declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone 61 62 63define <2 x double> @test_x86_sse41_dppd(<2 x double> %a0, <2 x double> %a1) #0 { 64; CHECK-LABEL: @test_x86_sse41_dppd( 65; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 66; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 67; CHECK-NEXT: call void @llvm.donothing() 68; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] 69; CHECK-NEXT: [[TMP4:%.*]] = select <2 x i1> <i1 false, i1 true>, <2 x i64> [[TMP3]], <2 x i64> zeroinitializer 70; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> [[TMP4]]) 71; CHECK-NEXT: [[_MSDPP:%.*]] = icmp eq i64 [[TMP5]], 0 72; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[_MSDPP]], <2 x i1> zeroinitializer, <2 x i1> <i1 false, i1 true> 73; CHECK-NEXT: [[_MSDPP1:%.*]] = sext <2 x i1> [[TMP6]] to <2 x i64> 74; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], i8 -18) 75; CHECK-NEXT: store <2 x i64> [[_MSDPP1]], ptr @__msan_retval_tls, align 8 76; CHECK-NEXT: ret <2 x double> [[RES]] 77; 78 %res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i8 -18) ; <<2 x double>> [#uses=1] 79 ret <2 x double> %res 80} 81declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i8) nounwind readnone 82 83 84define <4 x float> @test_x86_sse41_dpps(<4 x float> %a0, <4 x float> %a1) #0 { 85; CHECK-LABEL: @test_x86_sse41_dpps( 86; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 87; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 88; CHECK-NEXT: call void @llvm.donothing() 89; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]] 90; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x i32> [[TMP3]], <4 x i32> zeroinitializer 91; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP4]]) 92; CHECK-NEXT: [[_MSDPP:%.*]] = icmp eq i32 [[TMP5]], 0 93; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[_MSDPP]], <4 x i1> zeroinitializer, <4 x i1> <i1 false, i1 true, i1 true, i1 true> 94; CHECK-NEXT: [[_MSDPP1:%.*]] = sext <4 x i1> [[TMP6]] to <4 x i32> 95; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], i8 -18) 96; CHECK-NEXT: store <4 x i32> [[_MSDPP1]], ptr @__msan_retval_tls, align 8 97; CHECK-NEXT: ret <4 x float> [[RES]] 98; 99 %res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i8 -18) ; <<4 x float>> [#uses=1] 100 ret <4 x float> %res 101} 102declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i8) nounwind readnone 103 104 105define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) #0 { 106; CHECK-LABEL: @test_x86_sse41_insertps( 107; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 108; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 109; CHECK-NEXT: call void @llvm.donothing() 110; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 111; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 112; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 113; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0 114; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] 115; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1:![0-9]+]] 116; CHECK: 5: 117; CHECK-NEXT: call void @__msan_warning_noreturn() 118; CHECK-NEXT: unreachable 119; CHECK: 6: 120; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], i8 17) 121; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 122; CHECK-NEXT: ret <4 x float> [[RES]] 123; 124 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 17) ; <<4 x float>> [#uses=1] 125 ret <4 x float> %res 126} 127declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone 128 129 130 131define <8 x i16> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) #0 { 132; CHECK-LABEL: @test_x86_sse41_mpsadbw( 133; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 134; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 135; CHECK-NEXT: call void @llvm.donothing() 136; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 137; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0 138; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 139; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0 140; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] 141; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] 142; CHECK: 5: 143; CHECK-NEXT: call void @__msan_warning_noreturn() 144; CHECK-NEXT: unreachable 145; CHECK: 6: 146; CHECK-NEXT: [[RES:%.*]] = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> [[A0:%.*]], <16 x i8> [[A1:%.*]], i8 7) 147; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 148; CHECK-NEXT: ret <8 x i16> [[RES]] 149; 150 %res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<8 x i16>> [#uses=1] 151 ret <8 x i16> %res 152} 153declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i8) nounwind readnone 154 155define <8 x i16> @test_x86_sse41_mpsadbw_load_op0(ptr %ptr, <16 x i8> %a1) #0 { 156; CHECK-LABEL: @test_x86_sse41_mpsadbw_load_op0( 157; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 158; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 159; CHECK-NEXT: call void @llvm.donothing() 160; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 161; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] 162; CHECK: 3: 163; CHECK-NEXT: call void @__msan_warning_noreturn() 164; CHECK-NEXT: unreachable 165; CHECK: 4: 166; CHECK-NEXT: [[A0:%.*]] = load <16 x i8>, ptr [[PTR:%.*]], align 16 167; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[PTR]] to i64 168; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 169; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr 170; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16 171; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[_MSLD]] to i128 172; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP8]], 0 173; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 174; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP9]], 0 175; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] 176; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] 177; CHECK: 10: 178; CHECK-NEXT: call void @__msan_warning_noreturn() 179; CHECK-NEXT: unreachable 180; CHECK: 11: 181; CHECK-NEXT: [[RES:%.*]] = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> [[A0]], <16 x i8> [[A1:%.*]], i8 7) 182; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 183; CHECK-NEXT: ret <8 x i16> [[RES]] 184; 185 %a0 = load <16 x i8>, ptr %ptr 186 %res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<8 x i16>> [#uses=1] 187 ret <8 x i16> %res 188} 189 190define <8 x i16> @test_x86_sse41_packusdw(<4 x i32> %a0, <4 x i32> %a1) #0 { 191; CHECK-LABEL: @test_x86_sse41_packusdw( 192; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 193; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 194; CHECK-NEXT: call void @llvm.donothing() 195; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP1]], zeroinitializer 196; CHECK-NEXT: [[TMP4:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32> 197; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer 198; CHECK-NEXT: [[TMP6:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i32> 199; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> [[TMP4]], <4 x i32> [[TMP6]]) 200; CHECK-NEXT: [[RES:%.*]] = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]]) 201; CHECK-NEXT: store <8 x i16> [[_MSPROP_VECTOR_PACK]], ptr @__msan_retval_tls, align 8 202; CHECK-NEXT: ret <8 x i16> [[RES]] 203; 204 %res = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1] 205 ret <8 x i16> %res 206} 207declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone 208 209 210define <8 x i16> @test_x86_sse41_packusdw_fold() #0 { 211; CHECK-LABEL: @test_x86_sse41_packusdw_fold( 212; CHECK-NEXT: call void @llvm.donothing() 213; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> zeroinitializer, <4 x i32> zeroinitializer) 214; CHECK-NEXT: [[RES:%.*]] = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> zeroinitializer, <4 x i32> <i32 65535, i32 65536, i32 -1, i32 -131072>) 215; CHECK-NEXT: store <8 x i16> [[_MSPROP_VECTOR_PACK]], ptr @__msan_retval_tls, align 8 216; CHECK-NEXT: ret <8 x i16> [[RES]] 217; 218 %res = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> zeroinitializer, <4 x i32> <i32 65535, i32 65536, i32 -1, i32 -131072>) 219 ret <8 x i16> %res 220} 221 222 223define <16 x i8> @test_x86_sse41_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) #0 { 224; CHECK-LABEL: @test_x86_sse41_pblendvb( 225; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 226; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 227; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 228; CHECK-NEXT: call void @llvm.donothing() 229; CHECK-NEXT: [[TMP4:%.*]] = ashr <16 x i8> [[A2:%.*]], splat (i8 7) 230; CHECK-NEXT: [[TMP5:%.*]] = trunc <16 x i8> [[TMP4]] to <16 x i1> 231; CHECK-NEXT: [[TMP6:%.*]] = ashr <16 x i8> [[TMP1]], splat (i8 7) 232; CHECK-NEXT: [[TMP7:%.*]] = trunc <16 x i8> [[TMP6]] to <16 x i1> 233; CHECK-NEXT: [[TMP8:%.*]] = select <16 x i1> [[TMP5]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]] 234; CHECK-NEXT: [[TMP9:%.*]] = xor <16 x i8> [[A1:%.*]], [[A0:%.*]] 235; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i8> [[TMP9]], [[TMP2]] 236; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i8> [[TMP10]], [[TMP3]] 237; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP7]], <16 x i8> [[TMP11]], <16 x i8> [[TMP8]] 238; CHECK-NEXT: [[RES:%.*]] = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> [[A0]], <16 x i8> [[A1]], <16 x i8> [[A2]]) 239; CHECK-NEXT: store <16 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 240; CHECK-NEXT: ret <16 x i8> [[RES]] 241; 242 %res = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) ; <<16 x i8>> [#uses=1] 243 ret <16 x i8> %res 244} 245declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone 246 247 248define <8 x i16> @test_x86_sse41_phminposuw(<8 x i16> %a0) #0 { 249; CHECK-LABEL: @test_x86_sse41_phminposuw( 250; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 251; CHECK-NEXT: call void @llvm.donothing() 252; CHECK-NEXT: [[RES:%.*]] = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> [[A0:%.*]]) 253; CHECK-NEXT: store <8 x i16> [[TMP1]], ptr @__msan_retval_tls, align 8 254; CHECK-NEXT: ret <8 x i16> [[RES]] 255; 256 %res = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %a0) ; <<8 x i16>> [#uses=1] 257 ret <8 x i16> %res 258} 259declare <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16>) nounwind readnone 260 261 262define i32 @test_x86_sse41_ptestc(<2 x i64> %a0, <2 x i64> %a1) #0 { 263; CHECK-LABEL: @test_x86_sse41_ptestc( 264; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 265; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 266; CHECK-NEXT: call void @llvm.donothing() 267; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] 268; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <2 x i64> [[TMP3]], zeroinitializer 269; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i1> [[TMP4]] to i2 270; CHECK-NEXT: [[TMP6:%.*]] = zext i2 [[TMP5]] to i32 271; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse41.ptestc(<2 x i64> [[A0:%.*]], <2 x i64> [[A1:%.*]]) 272; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8 273; CHECK-NEXT: ret i32 [[RES]] 274; 275 %res = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1] 276 ret i32 %res 277} 278declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone 279 280 281define i32 @test_x86_sse41_ptestnzc(<2 x i64> %a0, <2 x i64> %a1) #0 { 282; CHECK-LABEL: @test_x86_sse41_ptestnzc( 283; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 284; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 285; CHECK-NEXT: call void @llvm.donothing() 286; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] 287; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <2 x i64> [[TMP3]], zeroinitializer 288; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i1> [[TMP4]] to i2 289; CHECK-NEXT: [[TMP6:%.*]] = zext i2 [[TMP5]] to i32 290; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> [[A0:%.*]], <2 x i64> [[A1:%.*]]) 291; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8 292; CHECK-NEXT: ret i32 [[RES]] 293; 294 %res = call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1] 295 ret i32 %res 296} 297declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone 298 299 300define i32 @test_x86_sse41_ptestz(<2 x i64> %a0, <2 x i64> %a1) #0 { 301; CHECK-LABEL: @test_x86_sse41_ptestz( 302; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 303; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 304; CHECK-NEXT: call void @llvm.donothing() 305; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] 306; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <2 x i64> [[TMP3]], zeroinitializer 307; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i1> [[TMP4]] to i2 308; CHECK-NEXT: [[TMP6:%.*]] = zext i2 [[TMP5]] to i32 309; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse41.ptestz(<2 x i64> [[A0:%.*]], <2 x i64> [[A1:%.*]]) 310; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8 311; CHECK-NEXT: ret i32 [[RES]] 312; 313 %res = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1] 314 ret i32 %res 315} 316declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone 317 318 319define <2 x double> @test_x86_sse41_round_pd(<2 x double> %a0) #0 { 320; CHECK-LABEL: @test_x86_sse41_round_pd( 321; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 322; CHECK-NEXT: call void @llvm.donothing() 323; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> [[A0:%.*]], i32 7) 324; CHECK-NEXT: store <2 x i64> [[TMP1]], ptr @__msan_retval_tls, align 8 325; CHECK-NEXT: ret <2 x double> [[RES]] 326; 327 %res = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1] 328 ret <2 x double> %res 329} 330declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone 331 332 333define <4 x float> @test_x86_sse41_round_ps(<4 x float> %a0) #0 { 334; CHECK-LABEL: @test_x86_sse41_round_ps( 335; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 336; CHECK-NEXT: call void @llvm.donothing() 337; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> [[A0:%.*]], i32 7) 338; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr @__msan_retval_tls, align 8 339; CHECK-NEXT: ret <4 x float> [[RES]] 340; 341 %res = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1] 342 ret <4 x float> %res 343} 344declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone 345 346 347define <2 x double> @test_x86_sse41_round_sd(<2 x double> %a0, <2 x double> %a1) #0 { 348; CHECK-LABEL: @test_x86_sse41_round_sd( 349; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 350; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 351; CHECK-NEXT: call void @llvm.donothing() 352; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> [[TMP2]], <2 x i32> <i32 2, i32 1> 353; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], i32 7) 354; CHECK-NEXT: store <2 x i64> [[TMP3]], ptr @__msan_retval_tls, align 8 355; CHECK-NEXT: ret <2 x double> [[RES]] 356; 357 %res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1] 358 ret <2 x double> %res 359} 360declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone 361 362 363define <2 x double> @test_x86_sse41_round_sd_load(<2 x double> %a0, ptr %a1) #0 { 364; CHECK-LABEL: @test_x86_sse41_round_sd_load( 365; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 366; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 367; CHECK-NEXT: call void @llvm.donothing() 368; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 369; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] 370; CHECK: 3: 371; CHECK-NEXT: call void @__msan_warning_noreturn() 372; CHECK-NEXT: unreachable 373; CHECK: 4: 374; CHECK-NEXT: [[A1B:%.*]] = load <2 x double>, ptr [[A1:%.*]], align 16 375; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A1]] to i64 376; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 377; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr 378; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16 379; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[_MSLD]], <2 x i32> <i32 2, i32 1> 380; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1B]], i32 7) 381; CHECK-NEXT: store <2 x i64> [[TMP8]], ptr @__msan_retval_tls, align 8 382; CHECK-NEXT: ret <2 x double> [[RES]] 383; 384 %a1b = load <2 x double>, ptr %a1 385 %res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1b, i32 7) ; <<2 x double>> [#uses=1] 386 ret <2 x double> %res 387} 388 389 390define <4 x float> @test_x86_sse41_round_ss_load(<4 x float> %a0, ptr %a1) #0 { 391; CHECK-LABEL: @test_x86_sse41_round_ss_load( 392; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 393; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 394; CHECK-NEXT: call void @llvm.donothing() 395; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 396; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] 397; CHECK: 3: 398; CHECK-NEXT: call void @__msan_warning_noreturn() 399; CHECK-NEXT: unreachable 400; CHECK: 4: 401; CHECK-NEXT: [[A1B:%.*]] = load <4 x float>, ptr [[A1:%.*]], align 16 402; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A1]] to i64 403; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 404; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr 405; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16 406; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[_MSLD]], <4 x i32> <i32 4, i32 1, i32 2, i32 3> 407; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1B]], i32 7) 408; CHECK-NEXT: store <4 x i32> [[TMP8]], ptr @__msan_retval_tls, align 8 409; CHECK-NEXT: ret <4 x float> [[RES]] 410; 411 %a1b = load <4 x float>, ptr %a1 412 %res = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1b, i32 7) ; <<4 x float>> [#uses=1] 413 ret <4 x float> %res 414} 415declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone 416 417attributes #0 = { sanitize_memory } 418