1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt %s -S -passes=msan 2>&1 | FileCheck %s 3 4target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" 5target triple = "x86_64-unknown-linux-gnu" 6 7define <2 x double> @test_x86_sse2_cmp_pd(<2 x double> %a0, <2 x double> %a1) #0 { 8; CHECK-LABEL: @test_x86_sse2_cmp_pd( 9; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 10; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 11; CHECK-NEXT: call void @llvm.donothing() 12; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] 13; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <2 x i64> [[TMP3]], zeroinitializer 14; CHECK-NEXT: [[TMP5:%.*]] = sext <2 x i1> [[TMP4]] to <2 x i64> 15; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], i8 7) 16; CHECK-NEXT: store <2 x i64> [[TMP5]], ptr @__msan_retval_tls, align 8 17; CHECK-NEXT: ret <2 x double> [[RES]] 18; 19 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1] 20 ret <2 x double> %res 21} 22declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone 23 24 25define <2 x double> @test_x86_sse2_cmp_sd(<2 x double> %a0, <2 x double> %a1) #0 { 26; CHECK-LABEL: @test_x86_sse2_cmp_sd( 27; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 28; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 29; CHECK-NEXT: call void @llvm.donothing() 30; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] 31; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 32; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 33; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i128 34; CHECK-NEXT: [[TMP7:%.*]] = bitcast i128 [[TMP6]] to <2 x i64> 35; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], i8 7) 36; CHECK-NEXT: store <2 x i64> [[TMP7]], ptr @__msan_retval_tls, align 8 37; CHECK-NEXT: ret <2 x double> [[RES]] 38; 39 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1] 40 ret <2 x double> %res 41} 42declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone 43 44 45define i32 @test_x86_sse2_comieq_sd(<2 x double> %a0, <2 x double> %a1) #0 { 46; CHECK-LABEL: @test_x86_sse2_comieq_sd( 47; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 48; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 49; CHECK-NEXT: call void @llvm.donothing() 50; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] 51; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 52; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 53; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32 54; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]]) 55; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8 56; CHECK-NEXT: ret i32 [[RES]] 57; 58 %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 59 ret i32 %res 60} 61declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone 62 63 64define i32 @test_x86_sse2_comige_sd(<2 x double> %a0, <2 x double> %a1) #0 { 65; CHECK-LABEL: @test_x86_sse2_comige_sd( 66; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 67; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 68; CHECK-NEXT: call void @llvm.donothing() 69; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] 70; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 71; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 72; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32 73; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.comige.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]]) 74; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8 75; CHECK-NEXT: ret i32 [[RES]] 76; 77 %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 78 ret i32 %res 79} 80declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readnone 81 82 83define i32 @test_x86_sse2_comigt_sd(<2 x double> %a0, <2 x double> %a1) #0 { 84; CHECK-LABEL: @test_x86_sse2_comigt_sd( 85; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 86; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 87; CHECK-NEXT: call void @llvm.donothing() 88; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] 89; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 90; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 91; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32 92; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]]) 93; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8 94; CHECK-NEXT: ret i32 [[RES]] 95; 96 %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 97 ret i32 %res 98} 99declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readnone 100 101 102define i32 @test_x86_sse2_comile_sd(<2 x double> %a0, <2 x double> %a1) #0 { 103; CHECK-LABEL: @test_x86_sse2_comile_sd( 104; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 105; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 106; CHECK-NEXT: call void @llvm.donothing() 107; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] 108; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 109; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 110; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32 111; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.comile.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]]) 112; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8 113; CHECK-NEXT: ret i32 [[RES]] 114; 115 %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 116 ret i32 %res 117} 118declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readnone 119 120 121define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) #0 { 122; CHECK-LABEL: @test_x86_sse2_comilt_sd( 123; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 124; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 125; CHECK-NEXT: call void @llvm.donothing() 126; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] 127; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 128; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 129; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32 130; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]]) 131; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8 132; CHECK-NEXT: ret i32 [[RES]] 133; 134 %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 135 ret i32 %res 136} 137declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readnone 138 139 140define i32 @test_x86_sse2_comineq_sd(<2 x double> %a0, <2 x double> %a1) #0 { 141; CHECK-LABEL: @test_x86_sse2_comineq_sd( 142; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 143; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 144; CHECK-NEXT: call void @llvm.donothing() 145; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] 146; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 147; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 148; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32 149; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]]) 150; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8 151; CHECK-NEXT: ret i32 [[RES]] 152; 153 %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 154 ret i32 %res 155} 156declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone 157 158 159define <4 x i32> @test_x86_sse2_cvtpd2dq(<2 x double> %a0) #0 { 160; CHECK-LABEL: @test_x86_sse2_cvtpd2dq( 161; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 162; CHECK-NEXT: call void @llvm.donothing() 163; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 164; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 165; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1:![0-9]+]] 166; CHECK: 3: 167; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5:[0-9]+]] 168; CHECK-NEXT: unreachable 169; CHECK: 4: 170; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> [[A0:%.*]]) 171; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 172; CHECK-NEXT: ret <4 x i32> [[RES]] 173; 174 %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1] 175 ret <4 x i32> %res 176} 177declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone 178 179 180define <2 x i64> @test_mm_cvtpd_epi32_zext(<2 x double> %a0) nounwind #0 { 181; CHECK-LABEL: @test_mm_cvtpd_epi32_zext( 182; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 183; CHECK-NEXT: call void @llvm.donothing() 184; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 185; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 186; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] 187; CHECK: 3: 188; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] 189; CHECK-NEXT: unreachable 190; CHECK: 4: 191; CHECK-NEXT: [[CVT:%.*]] = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> [[A0:%.*]]) 192; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i32> [[CVT]], <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 193; CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64> 194; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 195; CHECK-NEXT: ret <2 x i64> [[BC]] 196; 197 %cvt = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) 198 %res = shufflevector <4 x i32> %cvt, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 199 %bc = bitcast <4 x i32> %res to <2 x i64> 200 ret <2 x i64> %bc 201} 202 203 204define <2 x i64> @test_mm_cvtpd_epi32_zext_load(ptr %p0) nounwind #0 { 205; CHECK-LABEL: @test_mm_cvtpd_epi32_zext_load( 206; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 207; CHECK-NEXT: call void @llvm.donothing() 208; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 209; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] 210; CHECK: 2: 211; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] 212; CHECK-NEXT: unreachable 213; CHECK: 3: 214; CHECK-NEXT: [[A0:%.*]] = load <2 x double>, ptr [[P0:%.*]], align 16 215; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P0]] to i64 216; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080 217; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr 218; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16 219; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[_MSLD]] to i128 220; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0 221; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] 222; CHECK: 8: 223; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] 224; CHECK-NEXT: unreachable 225; CHECK: 9: 226; CHECK-NEXT: [[CVT:%.*]] = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> [[A0]]) 227; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i32> [[CVT]], <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 228; CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64> 229; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 230; CHECK-NEXT: ret <2 x i64> [[BC]] 231; 232 %a0 = load <2 x double>, ptr %p0 233 %cvt = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) 234 %res = shufflevector <4 x i32> %cvt, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 235 %bc = bitcast <4 x i32> %res to <2 x i64> 236 ret <2 x i64> %bc 237} 238 239 240define <4 x float> @test_x86_sse2_cvtpd2ps(<2 x double> %a0) #0 { 241; CHECK-LABEL: @test_x86_sse2_cvtpd2ps( 242; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 243; CHECK-NEXT: call void @llvm.donothing() 244; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 245; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 246; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] 247; CHECK: 3: 248; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] 249; CHECK-NEXT: unreachable 250; CHECK: 4: 251; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> [[A0:%.*]]) 252; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 253; CHECK-NEXT: ret <4 x float> [[RES]] 254; 255 %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) ; <<4 x float>> [#uses=1] 256 ret <4 x float> %res 257} 258declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone 259 260define <4 x float> @test_x86_sse2_cvtpd2ps_zext(<2 x double> %a0) nounwind #0 { 261; CHECK-LABEL: @test_x86_sse2_cvtpd2ps_zext( 262; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 263; CHECK-NEXT: call void @llvm.donothing() 264; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 265; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 266; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] 267; CHECK: 3: 268; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] 269; CHECK-NEXT: unreachable 270; CHECK: 4: 271; CHECK-NEXT: [[CVT:%.*]] = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> [[A0:%.*]]) 272; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x float> [[CVT]], <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 273; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 274; CHECK-NEXT: ret <4 x float> [[RES]] 275; 276 %cvt = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) 277 %res = shufflevector <4 x float> %cvt, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 278 ret <4 x float> %res 279} 280 281define <4 x float> @test_x86_sse2_cvtpd2ps_zext_load(ptr %p0) nounwind #0 { 282; CHECK-LABEL: @test_x86_sse2_cvtpd2ps_zext_load( 283; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 284; CHECK-NEXT: call void @llvm.donothing() 285; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 286; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] 287; CHECK: 2: 288; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] 289; CHECK-NEXT: unreachable 290; CHECK: 3: 291; CHECK-NEXT: [[A0:%.*]] = load <2 x double>, ptr [[P0:%.*]], align 16 292; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P0]] to i64 293; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080 294; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr 295; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16 296; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[_MSLD]] to i128 297; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0 298; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] 299; CHECK: 8: 300; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] 301; CHECK-NEXT: unreachable 302; CHECK: 9: 303; CHECK-NEXT: [[CVT:%.*]] = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> [[A0]]) 304; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x float> [[CVT]], <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 305; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 306; CHECK-NEXT: ret <4 x float> [[RES]] 307; 308 %a0 = load <2 x double>, ptr %p0 309 %cvt = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) 310 %res = shufflevector <4 x float> %cvt, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 311 ret <4 x float> %res 312} 313 314define <4 x i32> @test_x86_sse2_cvtps2dq(<4 x float> %a0) #0 { 315; CHECK-LABEL: @test_x86_sse2_cvtps2dq( 316; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 317; CHECK-NEXT: call void @llvm.donothing() 318; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 319; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 320; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] 321; CHECK: 3: 322; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] 323; CHECK-NEXT: unreachable 324; CHECK: 4: 325; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> [[A0:%.*]]) 326; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 327; CHECK-NEXT: ret <4 x i32> [[RES]] 328; 329 %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1] 330 ret <4 x i32> %res 331} 332declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone 333 334 335define i32 @test_x86_sse2_cvtsd2si(<2 x double> %a0) #0 { 336; CHECK-LABEL: @test_x86_sse2_cvtsd2si( 337; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 338; CHECK-NEXT: call void @llvm.donothing() 339; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0 340; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 341; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] 342; CHECK: 3: 343; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] 344; CHECK-NEXT: unreachable 345; CHECK: 4: 346; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> [[A0:%.*]]) 347; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 348; CHECK-NEXT: ret i32 [[RES]] 349; 350 %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) ; <i32> [#uses=1] 351 ret i32 %res 352} 353declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone 354 355 356define <4 x float> @test_x86_sse2_cvtsd2ss(<4 x float> %a0, <2 x double> %a1) #0 { 357; CHECK-LABEL: @test_x86_sse2_cvtsd2ss( 358; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 359; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 360; CHECK-NEXT: call void @llvm.donothing() 361; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0 362; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP2]], i32 0, i32 0 363; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP3]], 0 364; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] 365; CHECK: 5: 366; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] 367; CHECK-NEXT: unreachable 368; CHECK: 6: 369; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> [[A0:%.*]], <2 x double> [[A1:%.*]]) 370; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr @__msan_retval_tls, align 8 371; CHECK-NEXT: ret <4 x float> [[RES]] 372; 373 %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1] 374 ret <4 x float> %res 375} 376declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone 377 378 379define <4 x float> @test_x86_sse2_cvtsd2ss_load(<4 x float> %a0, ptr %p1) #0 { 380; CHECK-LABEL: @test_x86_sse2_cvtsd2ss_load( 381; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 382; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 383; CHECK-NEXT: call void @llvm.donothing() 384; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 385; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] 386; CHECK: 3: 387; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] 388; CHECK-NEXT: unreachable 389; CHECK: 4: 390; CHECK-NEXT: [[A1:%.*]] = load <2 x double>, ptr [[P1:%.*]], align 16 391; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[P1]] to i64 392; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 393; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr 394; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16 395; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[_MSLD]], i32 0 396; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> [[TMP2]], i32 0, i32 0 397; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP8]], 0 398; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] 399; CHECK: 10: 400; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] 401; CHECK-NEXT: unreachable 402; CHECK: 11: 403; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> [[A0:%.*]], <2 x double> [[A1]]) 404; CHECK-NEXT: store <4 x i32> [[TMP9]], ptr @__msan_retval_tls, align 8 405; CHECK-NEXT: ret <4 x float> [[RES]] 406; 407 %a1 = load <2 x double>, ptr %p1 408 %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1] 409 ret <4 x float> %res 410} 411 412 413define <4 x float> @test_x86_sse2_cvtsd2ss_load_optsize(<4 x float> %a0, ptr %p1) optsize #0 { 414; CHECK-LABEL: @test_x86_sse2_cvtsd2ss_load_optsize( 415; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 416; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 417; CHECK-NEXT: call void @llvm.donothing() 418; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 419; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] 420; CHECK: 3: 421; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] 422; CHECK-NEXT: unreachable 423; CHECK: 4: 424; CHECK-NEXT: [[A1:%.*]] = load <2 x double>, ptr [[P1:%.*]], align 16 425; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[P1]] to i64 426; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 427; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr 428; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16 429; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[_MSLD]], i32 0 430; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> [[TMP2]], i32 0, i32 0 431; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP8]], 0 432; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] 433; CHECK: 10: 434; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] 435; CHECK-NEXT: unreachable 436; CHECK: 11: 437; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> [[A0:%.*]], <2 x double> [[A1]]) 438; CHECK-NEXT: store <4 x i32> [[TMP9]], ptr @__msan_retval_tls, align 8 439; CHECK-NEXT: ret <4 x float> [[RES]] 440; 441 %a1 = load <2 x double>, ptr %p1 442 %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1] 443 ret <4 x float> %res 444} 445 446 447define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) #0 { 448; CHECK-LABEL: @test_x86_sse2_cvttpd2dq( 449; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 450; CHECK-NEXT: call void @llvm.donothing() 451; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 452; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 453; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] 454; CHECK: 3: 455; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] 456; CHECK-NEXT: unreachable 457; CHECK: 4: 458; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> [[A0:%.*]]) 459; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 460; CHECK-NEXT: ret <4 x i32> [[RES]] 461; 462 %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1] 463 ret <4 x i32> %res 464} 465declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone 466 467 468define <2 x i64> @test_mm_cvttpd_epi32_zext(<2 x double> %a0) nounwind #0 { 469; CHECK-LABEL: @test_mm_cvttpd_epi32_zext( 470; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 471; CHECK-NEXT: call void @llvm.donothing() 472; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 473; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 474; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] 475; CHECK: 3: 476; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] 477; CHECK-NEXT: unreachable 478; CHECK: 4: 479; CHECK-NEXT: [[CVT:%.*]] = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> [[A0:%.*]]) 480; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i32> [[CVT]], <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 481; CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64> 482; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 483; CHECK-NEXT: ret <2 x i64> [[BC]] 484; 485 %cvt = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) 486 %res = shufflevector <4 x i32> %cvt, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 487 %bc = bitcast <4 x i32> %res to <2 x i64> 488 ret <2 x i64> %bc 489} 490 491 492define <2 x i64> @test_mm_cvttpd_epi32_zext_load(ptr %p0) nounwind #0 { 493; CHECK-LABEL: @test_mm_cvttpd_epi32_zext_load( 494; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 495; CHECK-NEXT: call void @llvm.donothing() 496; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 497; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] 498; CHECK: 2: 499; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] 500; CHECK-NEXT: unreachable 501; CHECK: 3: 502; CHECK-NEXT: [[A0:%.*]] = load <2 x double>, ptr [[P0:%.*]], align 16 503; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P0]] to i64 504; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080 505; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr 506; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16 507; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[_MSLD]] to i128 508; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0 509; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] 510; CHECK: 8: 511; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] 512; CHECK-NEXT: unreachable 513; CHECK: 9: 514; CHECK-NEXT: [[CVT:%.*]] = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> [[A0]]) 515; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i32> [[CVT]], <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 516; CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64> 517; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8 518; CHECK-NEXT: ret <2 x i64> [[BC]] 519; 520 %a0 = load <2 x double>, ptr %p0 521 %cvt = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) 522 %res = shufflevector <4 x i32> %cvt, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 523 %bc = bitcast <4 x i32> %res to <2 x i64> 524 ret <2 x i64> %bc 525} 526 527 528define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) #0 { 529; CHECK-LABEL: @test_x86_sse2_cvttps2dq( 530; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 531; CHECK-NEXT: call void @llvm.donothing() 532; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 533; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 534; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] 535; CHECK: 3: 536; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] 537; CHECK-NEXT: unreachable 538; CHECK: 4: 539; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> [[A0:%.*]]) 540; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 541; CHECK-NEXT: ret <4 x i32> [[RES]] 542; 543 %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1] 544 ret <4 x i32> %res 545} 546declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone 547 548 549define i32 @test_x86_sse2_cvttsd2si(<2 x double> %a0) #0 { 550; CHECK-LABEL: @test_x86_sse2_cvttsd2si( 551; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 552; CHECK-NEXT: call void @llvm.donothing() 553; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0 554; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0 555; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] 556; CHECK: 3: 557; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] 558; CHECK-NEXT: unreachable 559; CHECK: 4: 560; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> [[A0:%.*]]) 561; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 562; CHECK-NEXT: ret i32 [[RES]] 563; 564 %res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) ; <i32> [#uses=1] 565 ret i32 %res 566} 567declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone 568 569 570define <2 x double> @test_x86_sse2_max_pd(<2 x double> %a0, <2 x double> %a1) #0 { 571; CHECK-LABEL: @test_x86_sse2_max_pd( 572; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 573; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 574; CHECK-NEXT: call void @llvm.donothing() 575; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] 576; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]]) 577; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 578; CHECK-NEXT: ret <2 x double> [[RES]] 579; 580 %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 581 ret <2 x double> %res 582} 583declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone 584 585 586define <2 x double> @test_x86_sse2_max_sd(<2 x double> %a0, <2 x double> %a1) #0 { 587; CHECK-LABEL: @test_x86_sse2_max_sd( 588; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 589; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 590; CHECK-NEXT: call void @llvm.donothing() 591; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] 592; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> [[TMP3]], <2 x i32> <i32 2, i32 1> 593; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]]) 594; CHECK-NEXT: store <2 x i64> [[TMP4]], ptr @__msan_retval_tls, align 8 595; CHECK-NEXT: ret <2 x double> [[RES]] 596; 597 %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 598 ret <2 x double> %res 599} 600declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone 601 602 603define <2 x double> @test_x86_sse2_min_pd(<2 x double> %a0, <2 x double> %a1) #0 { 604; CHECK-LABEL: @test_x86_sse2_min_pd( 605; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 606; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 607; CHECK-NEXT: call void @llvm.donothing() 608; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] 609; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]]) 610; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8 611; CHECK-NEXT: ret <2 x double> [[RES]] 612; 613 %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 614 ret <2 x double> %res 615} 616declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone 617 618 619define <2 x double> @test_x86_sse2_min_sd(<2 x double> %a0, <2 x double> %a1) #0 { 620; CHECK-LABEL: @test_x86_sse2_min_sd( 621; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 622; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 623; CHECK-NEXT: call void @llvm.donothing() 624; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] 625; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> [[TMP3]], <2 x i32> <i32 2, i32 1> 626; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]]) 627; CHECK-NEXT: store <2 x i64> [[TMP4]], ptr @__msan_retval_tls, align 8 628; CHECK-NEXT: ret <2 x double> [[RES]] 629; 630 %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] 631 ret <2 x double> %res 632} 633declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone 634 635 636define i32 @test_x86_sse2_movmsk_pd(<2 x double> %a0) #0 { 637; CHECK-LABEL: @test_x86_sse2_movmsk_pd( 638; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 639; CHECK-NEXT: call void @llvm.donothing() 640; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 641; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 642; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] 643; CHECK: 3: 644; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] 645; CHECK-NEXT: unreachable 646; CHECK: 4: 647; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> [[A0:%.*]]) 648; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 649; CHECK-NEXT: ret i32 [[RES]] 650; 651 %res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) ; <i32> [#uses=1] 652 ret i32 %res 653} 654declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone 655 656 657define <8 x i16> @test_x86_sse2_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) #0 { 658; CHECK-LABEL: @test_x86_sse2_packssdw_128( 659; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 660; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 661; CHECK-NEXT: call void @llvm.donothing() 662; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP1]], zeroinitializer 663; CHECK-NEXT: [[TMP4:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32> 664; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer 665; CHECK-NEXT: [[TMP6:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i32> 666; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> [[TMP4]], <4 x i32> [[TMP6]]) 667; CHECK-NEXT: [[RES:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]]) 668; CHECK-NEXT: store <8 x i16> [[_MSPROP_VECTOR_PACK]], ptr @__msan_retval_tls, align 8 669; CHECK-NEXT: ret <8 x i16> [[RES]] 670; 671 %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1] 672 ret <8 x i16> %res 673} 674declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone 675 676 677define <8 x i16> @test_x86_sse2_packssdw_128_fold() #0 { 678; CHECK-LABEL: @test_x86_sse2_packssdw_128_fold( 679; CHECK-NEXT: call void @llvm.donothing() 680; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> zeroinitializer, <4 x i32> zeroinitializer) 681; CHECK-NEXT: [[RES:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> zeroinitializer, <4 x i32> <i32 65535, i32 65536, i32 -1, i32 -131072>) 682; CHECK-NEXT: store <8 x i16> [[_MSPROP_VECTOR_PACK]], ptr @__msan_retval_tls, align 8 683; CHECK-NEXT: ret <8 x i16> [[RES]] 684; 685 %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> zeroinitializer, <4 x i32> <i32 65535, i32 65536, i32 -1, i32 -131072>) 686 ret <8 x i16> %res 687} 688 689 690define <16 x i8> @test_x86_sse2_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) #0 { 691; CHECK-LABEL: @test_x86_sse2_packsswb_128( 692; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 693; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 694; CHECK-NEXT: call void @llvm.donothing() 695; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP1]], zeroinitializer 696; CHECK-NEXT: [[TMP4:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16> 697; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer 698; CHECK-NEXT: [[TMP6:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i16> 699; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> [[TMP4]], <8 x i16> [[TMP6]]) 700; CHECK-NEXT: [[RES:%.*]] = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]]) 701; CHECK-NEXT: store <16 x i8> [[_MSPROP_VECTOR_PACK]], ptr @__msan_retval_tls, align 8 702; CHECK-NEXT: ret <16 x i8> [[RES]] 703; 704 %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1] 705 ret <16 x i8> %res 706} 707declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone 708 709 710define <16 x i8> @test_x86_sse2_packsswb_128_fold() #0 { 711; CHECK-LABEL: @test_x86_sse2_packsswb_128_fold( 712; CHECK-NEXT: call void @llvm.donothing() 713; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> zeroinitializer, <8 x i16> zeroinitializer) 714; CHECK-NEXT: [[RES:%.*]] = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> <i16 0, i16 255, i16 256, i16 -1, i16 -1, i16 -255, i16 -256, i16 -32678>, <8 x i16> zeroinitializer) 715; CHECK-NEXT: store <16 x i8> [[_MSPROP_VECTOR_PACK]], ptr @__msan_retval_tls, align 8 716; CHECK-NEXT: ret <16 x i8> [[RES]] 717; 718 %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> <i16 0, i16 255, i16 256, i16 65535, i16 -1, i16 -255, i16 -256, i16 -32678>, <8 x i16> zeroinitializer) 719 ret <16 x i8> %res 720} 721 722 723define <16 x i8> @test_x86_sse2_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) #0 { 724; CHECK-LABEL: @test_x86_sse2_packuswb_128( 725; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 726; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 727; CHECK-NEXT: call void @llvm.donothing() 728; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP1]], zeroinitializer 729; CHECK-NEXT: [[TMP4:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16> 730; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer 731; CHECK-NEXT: [[TMP6:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i16> 732; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> [[TMP4]], <8 x i16> [[TMP6]]) 733; CHECK-NEXT: [[RES:%.*]] = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]]) 734; CHECK-NEXT: store <16 x i8> [[_MSPROP_VECTOR_PACK]], ptr @__msan_retval_tls, align 8 735; CHECK-NEXT: ret <16 x i8> [[RES]] 736; 737 %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1] 738 ret <16 x i8> %res 739} 740declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone 741 742 743define <16 x i8> @test_x86_sse2_packuswb_128_fold() #0 { 744; CHECK-LABEL: @test_x86_sse2_packuswb_128_fold( 745; CHECK-NEXT: call void @llvm.donothing() 746; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> zeroinitializer, <8 x i16> zeroinitializer) 747; CHECK-NEXT: [[RES:%.*]] = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> <i16 0, i16 255, i16 256, i16 -1, i16 -1, i16 -255, i16 -256, i16 -32678>, <8 x i16> zeroinitializer) 748; CHECK-NEXT: store <16 x i8> [[_MSPROP_VECTOR_PACK]], ptr @__msan_retval_tls, align 8 749; CHECK-NEXT: ret <16 x i8> [[RES]] 750; 751 %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> <i16 0, i16 255, i16 256, i16 65535, i16 -1, i16 -255, i16 -256, i16 -32678>, <8 x i16> zeroinitializer) 752 ret <16 x i8> %res 753} 754 755 756define <16 x i8> @test_x86_sse2_pavg_b(<16 x i8> %a0, <16 x i8> %a1) #0 { 757; CHECK-LABEL: @test_x86_sse2_pavg_b( 758; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 759; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 760; CHECK-NEXT: call void @llvm.donothing() 761; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP1]], [[TMP2]] 762; CHECK-NEXT: [[RES:%.*]] = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> [[A0:%.*]], <16 x i8> [[A1:%.*]]) 763; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8 764; CHECK-NEXT: ret <16 x i8> [[RES]] 765; 766 %res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] 767 ret <16 x i8> %res 768} 769declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>) nounwind readnone 770 771 772define <8 x i16> @test_x86_sse2_pavg_w(<8 x i16> %a0, <8 x i16> %a1) #0 { 773; CHECK-LABEL: @test_x86_sse2_pavg_w( 774; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 775; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 776; CHECK-NEXT: call void @llvm.donothing() 777; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP1]], [[TMP2]] 778; CHECK-NEXT: [[RES:%.*]] = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]]) 779; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 780; CHECK-NEXT: ret <8 x i16> [[RES]] 781; 782 %res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 783 ret <8 x i16> %res 784} 785declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone 786 787 788define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) #0 { 789; CHECK-LABEL: @test_x86_sse2_pmadd_wd( 790; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 791; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 792; CHECK-NEXT: call void @llvm.donothing() 793; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i16> [[TMP1]], [[TMP2]] 794; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <4 x i32> 795; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <4 x i32> [[TMP4]], zeroinitializer 796; CHECK-NEXT: [[TMP6:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i32> 797; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]]) 798; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8 799; CHECK-NEXT: ret <4 x i32> [[RES]] 800; 801 %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1) ; <<4 x i32>> [#uses=1] 802 ret <4 x i32> %res 803} 804declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone 805 806 807define i32 @test_x86_sse2_pmovmskb_128(<16 x i8> %a0) #0 { 808; CHECK-LABEL: @test_x86_sse2_pmovmskb_128( 809; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 810; CHECK-NEXT: call void @llvm.donothing() 811; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 812; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0 813; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] 814; CHECK: 3: 815; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] 816; CHECK-NEXT: unreachable 817; CHECK: 4: 818; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> [[A0:%.*]]) 819; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8 820; CHECK-NEXT: ret i32 [[RES]] 821; 822 %res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0) ; <i32> [#uses=1] 823 ret i32 %res 824} 825declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone 826 827 828define <8 x i16> @test_x86_sse2_pmulh_w(<8 x i16> %a0, <8 x i16> %a1) #0 { 829; CHECK-LABEL: @test_x86_sse2_pmulh_w( 830; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 831; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 832; CHECK-NEXT: call void @llvm.donothing() 833; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP1]], [[TMP2]] 834; CHECK-NEXT: [[RES:%.*]] = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]]) 835; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 836; CHECK-NEXT: ret <8 x i16> [[RES]] 837; 838 %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 839 ret <8 x i16> %res 840} 841declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone 842 843 844define <8 x i16> @test_x86_sse2_pmulhu_w(<8 x i16> %a0, <8 x i16> %a1) #0 { 845; CHECK-LABEL: @test_x86_sse2_pmulhu_w( 846; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 847; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 848; CHECK-NEXT: call void @llvm.donothing() 849; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP1]], [[TMP2]] 850; CHECK-NEXT: [[RES:%.*]] = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]]) 851; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8 852; CHECK-NEXT: ret <8 x i16> [[RES]] 853; 854 %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 855 ret <8 x i16> %res 856} 857declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone 858 859 860define <2 x i64> @test_x86_sse2_psad_bw(<16 x i8> %a0, <16 x i8> %a1) #0 { 861; CHECK-LABEL: @test_x86_sse2_psad_bw( 862; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8 863; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 864; CHECK-NEXT: call void @llvm.donothing() 865; CHECK-NEXT: [[TMP3:%.*]] = or <16 x i8> [[TMP1]], [[TMP2]] 866; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x i64> 867; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <2 x i64> [[TMP4]], zeroinitializer 868; CHECK-NEXT: [[TMP6:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i64> 869; CHECK-NEXT: [[TMP7:%.*]] = lshr <2 x i64> [[TMP6]], splat (i64 48) 870; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> [[A0:%.*]], <16 x i8> [[A1:%.*]]) 871; CHECK-NEXT: store <2 x i64> [[TMP7]], ptr @__msan_retval_tls, align 8 872; CHECK-NEXT: ret <2 x i64> [[RES]] 873; 874 %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1) ; <<2 x i64>> [#uses=1] 875 ret <2 x i64> %res 876} 877declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone 878 879 880define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) #0 { 881; CHECK-LABEL: @test_x86_sse2_psll_d( 882; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 883; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 884; CHECK-NEXT: call void @llvm.donothing() 885; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 886; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64 887; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 888; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i128 889; CHECK-NEXT: [[TMP7:%.*]] = bitcast i128 [[TMP6]] to <4 x i32> 890; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> [[TMP1]], <4 x i32> [[A1:%.*]]) 891; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i32> [[TMP8]], [[TMP7]] 892; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> [[A0:%.*]], <4 x i32> [[A1]]) 893; CHECK-NEXT: store <4 x i32> [[TMP9]], ptr @__msan_retval_tls, align 8 894; CHECK-NEXT: ret <4 x i32> [[RES]] 895; 896 %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 897 ret <4 x i32> %res 898} 899declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone 900 901 902define <2 x i64> @test_x86_sse2_psll_q(<2 x i64> %a0, <2 x i64> %a1) #0 { 903; CHECK-LABEL: @test_x86_sse2_psll_q( 904; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 905; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 906; CHECK-NEXT: call void @llvm.donothing() 907; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 908; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64 909; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 910; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i128 911; CHECK-NEXT: [[TMP7:%.*]] = bitcast i128 [[TMP6]] to <2 x i64> 912; CHECK-NEXT: [[TMP8:%.*]] = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> [[TMP1]], <2 x i64> [[A1:%.*]]) 913; CHECK-NEXT: [[TMP9:%.*]] = or <2 x i64> [[TMP8]], [[TMP7]] 914; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> [[A0:%.*]], <2 x i64> [[A1]]) 915; CHECK-NEXT: store <2 x i64> [[TMP9]], ptr @__msan_retval_tls, align 8 916; CHECK-NEXT: ret <2 x i64> [[RES]] 917; 918 %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 919 ret <2 x i64> %res 920} 921declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone 922 923 924define <8 x i16> @test_x86_sse2_psll_w(<8 x i16> %a0, <8 x i16> %a1) #0 { 925; CHECK-LABEL: @test_x86_sse2_psll_w( 926; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 927; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 928; CHECK-NEXT: call void @llvm.donothing() 929; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 930; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64 931; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 932; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i128 933; CHECK-NEXT: [[TMP7:%.*]] = bitcast i128 [[TMP6]] to <8 x i16> 934; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> [[TMP1]], <8 x i16> [[A1:%.*]]) 935; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i16> [[TMP8]], [[TMP7]] 936; CHECK-NEXT: [[RES:%.*]] = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> [[A0:%.*]], <8 x i16> [[A1]]) 937; CHECK-NEXT: store <8 x i16> [[TMP9]], ptr @__msan_retval_tls, align 8 938; CHECK-NEXT: ret <8 x i16> [[RES]] 939; 940 %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 941 ret <8 x i16> %res 942} 943declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone 944 945 946define <4 x i32> @test_x86_sse2_pslli_d(<4 x i32> %a0) #0 { 947; CHECK-LABEL: @test_x86_sse2_pslli_d( 948; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 949; CHECK-NEXT: call void @llvm.donothing() 950; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> [[TMP1]], i32 7) 951; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP2]], zeroinitializer 952; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> [[A0:%.*]], i32 7) 953; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8 954; CHECK-NEXT: ret <4 x i32> [[RES]] 955; 956 %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1] 957 ret <4 x i32> %res 958} 959declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone 960 961 962define <2 x i64> @test_x86_sse2_pslli_q(<2 x i64> %a0) #0 { 963; CHECK-LABEL: @test_x86_sse2_pslli_q( 964; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 965; CHECK-NEXT: call void @llvm.donothing() 966; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> [[TMP1]], i32 7) 967; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP2]], zeroinitializer 968; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> [[A0:%.*]], i32 7) 969; CHECK-NEXT: store <2 x i64> [[TMP3]], ptr @__msan_retval_tls, align 8 970; CHECK-NEXT: ret <2 x i64> [[RES]] 971; 972 %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] 973 ret <2 x i64> %res 974} 975declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone 976 977 978define <8 x i16> @test_x86_sse2_pslli_w(<8 x i16> %a0) #0 { 979; CHECK-LABEL: @test_x86_sse2_pslli_w( 980; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 981; CHECK-NEXT: call void @llvm.donothing() 982; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> [[TMP1]], i32 7) 983; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i16> [[TMP2]], zeroinitializer 984; CHECK-NEXT: [[RES:%.*]] = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> [[A0:%.*]], i32 7) 985; CHECK-NEXT: store <8 x i16> [[TMP3]], ptr @__msan_retval_tls, align 8 986; CHECK-NEXT: ret <8 x i16> [[RES]] 987; 988 %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1] 989 ret <8 x i16> %res 990} 991declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone 992 993 994define <4 x i32> @test_x86_sse2_psra_d(<4 x i32> %a0, <4 x i32> %a1) #0 { 995; CHECK-LABEL: @test_x86_sse2_psra_d( 996; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 997; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 998; CHECK-NEXT: call void @llvm.donothing() 999; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 1000; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64 1001; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 1002; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i128 1003; CHECK-NEXT: [[TMP7:%.*]] = bitcast i128 [[TMP6]] to <4 x i32> 1004; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> [[TMP1]], <4 x i32> [[A1:%.*]]) 1005; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i32> [[TMP8]], [[TMP7]] 1006; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> [[A0:%.*]], <4 x i32> [[A1]]) 1007; CHECK-NEXT: store <4 x i32> [[TMP9]], ptr @__msan_retval_tls, align 8 1008; CHECK-NEXT: ret <4 x i32> [[RES]] 1009; 1010 %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 1011 ret <4 x i32> %res 1012} 1013declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone 1014 1015 1016define <8 x i16> @test_x86_sse2_psra_w(<8 x i16> %a0, <8 x i16> %a1) #0 { 1017; CHECK-LABEL: @test_x86_sse2_psra_w( 1018; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 1019; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 1020; CHECK-NEXT: call void @llvm.donothing() 1021; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 1022; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64 1023; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 1024; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i128 1025; CHECK-NEXT: [[TMP7:%.*]] = bitcast i128 [[TMP6]] to <8 x i16> 1026; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> [[TMP1]], <8 x i16> [[A1:%.*]]) 1027; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i16> [[TMP8]], [[TMP7]] 1028; CHECK-NEXT: [[RES:%.*]] = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> [[A0:%.*]], <8 x i16> [[A1]]) 1029; CHECK-NEXT: store <8 x i16> [[TMP9]], ptr @__msan_retval_tls, align 8 1030; CHECK-NEXT: ret <8 x i16> [[RES]] 1031; 1032 %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 1033 ret <8 x i16> %res 1034} 1035declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone 1036 1037 1038define <4 x i32> @test_x86_sse2_psrai_d(<4 x i32> %a0) #0 { 1039; CHECK-LABEL: @test_x86_sse2_psrai_d( 1040; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 1041; CHECK-NEXT: call void @llvm.donothing() 1042; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> [[TMP1]], i32 7) 1043; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP2]], zeroinitializer 1044; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> [[A0:%.*]], i32 7) 1045; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8 1046; CHECK-NEXT: ret <4 x i32> [[RES]] 1047; 1048 %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1] 1049 ret <4 x i32> %res 1050} 1051declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone 1052 1053 1054define <8 x i16> @test_x86_sse2_psrai_w(<8 x i16> %a0) #0 { 1055; CHECK-LABEL: @test_x86_sse2_psrai_w( 1056; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 1057; CHECK-NEXT: call void @llvm.donothing() 1058; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> [[TMP1]], i32 7) 1059; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i16> [[TMP2]], zeroinitializer 1060; CHECK-NEXT: [[RES:%.*]] = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> [[A0:%.*]], i32 7) 1061; CHECK-NEXT: store <8 x i16> [[TMP3]], ptr @__msan_retval_tls, align 8 1062; CHECK-NEXT: ret <8 x i16> [[RES]] 1063; 1064 %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1] 1065 ret <8 x i16> %res 1066} 1067declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone 1068 1069 1070define <4 x i32> @test_x86_sse2_psrl_d(<4 x i32> %a0, <4 x i32> %a1) #0 { 1071; CHECK-LABEL: @test_x86_sse2_psrl_d( 1072; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 1073; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 1074; CHECK-NEXT: call void @llvm.donothing() 1075; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 1076; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64 1077; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 1078; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i128 1079; CHECK-NEXT: [[TMP7:%.*]] = bitcast i128 [[TMP6]] to <4 x i32> 1080; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> [[TMP1]], <4 x i32> [[A1:%.*]]) 1081; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i32> [[TMP8]], [[TMP7]] 1082; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> [[A0:%.*]], <4 x i32> [[A1]]) 1083; CHECK-NEXT: store <4 x i32> [[TMP9]], ptr @__msan_retval_tls, align 8 1084; CHECK-NEXT: ret <4 x i32> [[RES]] 1085; 1086 %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 1087 ret <4 x i32> %res 1088} 1089declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone 1090 1091 1092define <2 x i64> @test_x86_sse2_psrl_q(<2 x i64> %a0, <2 x i64> %a1) #0 { 1093; CHECK-LABEL: @test_x86_sse2_psrl_q( 1094; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 1095; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 1096; CHECK-NEXT: call void @llvm.donothing() 1097; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 1098; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64 1099; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 1100; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i128 1101; CHECK-NEXT: [[TMP7:%.*]] = bitcast i128 [[TMP6]] to <2 x i64> 1102; CHECK-NEXT: [[TMP8:%.*]] = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> [[TMP1]], <2 x i64> [[A1:%.*]]) 1103; CHECK-NEXT: [[TMP9:%.*]] = or <2 x i64> [[TMP8]], [[TMP7]] 1104; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> [[A0:%.*]], <2 x i64> [[A1]]) 1105; CHECK-NEXT: store <2 x i64> [[TMP9]], ptr @__msan_retval_tls, align 8 1106; CHECK-NEXT: ret <2 x i64> [[RES]] 1107; 1108 %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 1109 ret <2 x i64> %res 1110} 1111declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone 1112 1113 1114define <8 x i16> @test_x86_sse2_psrl_w(<8 x i16> %a0, <8 x i16> %a1) #0 { 1115; CHECK-LABEL: @test_x86_sse2_psrl_w( 1116; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 1117; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 1118; CHECK-NEXT: call void @llvm.donothing() 1119; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 1120; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64 1121; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 1122; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i128 1123; CHECK-NEXT: [[TMP7:%.*]] = bitcast i128 [[TMP6]] to <8 x i16> 1124; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> [[TMP1]], <8 x i16> [[A1:%.*]]) 1125; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i16> [[TMP8]], [[TMP7]] 1126; CHECK-NEXT: [[RES:%.*]] = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> [[A0:%.*]], <8 x i16> [[A1]]) 1127; CHECK-NEXT: store <8 x i16> [[TMP9]], ptr @__msan_retval_tls, align 8 1128; CHECK-NEXT: ret <8 x i16> [[RES]] 1129; 1130 %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 1131 ret <8 x i16> %res 1132} 1133declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone 1134 1135 1136define <8 x i16> @test_x86_sse2_psrl_w_load(<8 x i16> %a0, ptr %p) #0 { 1137; CHECK-LABEL: @test_x86_sse2_psrl_w_load( 1138; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 1139; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 1140; CHECK-NEXT: call void @llvm.donothing() 1141; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 1142; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]] 1143; CHECK: 3: 1144; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] 1145; CHECK-NEXT: unreachable 1146; CHECK: 4: 1147; CHECK-NEXT: [[A1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 16 1148; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[P]] to i64 1149; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 1150; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr 1151; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16 1152; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i16> [[_MSLD]] to i128 1153; CHECK-NEXT: [[TMP9:%.*]] = trunc i128 [[TMP8]] to i64 1154; CHECK-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0 1155; CHECK-NEXT: [[TMP11:%.*]] = sext i1 [[TMP10]] to i128 1156; CHECK-NEXT: [[TMP12:%.*]] = bitcast i128 [[TMP11]] to <8 x i16> 1157; CHECK-NEXT: [[TMP13:%.*]] = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> [[TMP2]], <8 x i16> [[A1]]) 1158; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i16> [[TMP13]], [[TMP12]] 1159; CHECK-NEXT: [[RES:%.*]] = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> [[A0:%.*]], <8 x i16> [[A1]]) 1160; CHECK-NEXT: store <8 x i16> [[TMP14]], ptr @__msan_retval_tls, align 8 1161; CHECK-NEXT: ret <8 x i16> [[RES]] 1162; 1163 %a1 = load <8 x i16>, ptr %p 1164 %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] 1165 ret <8 x i16> %res 1166} 1167 1168 1169define <4 x i32> @test_x86_sse2_psrli_d(<4 x i32> %a0) #0 { 1170; CHECK-LABEL: @test_x86_sse2_psrli_d( 1171; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8 1172; CHECK-NEXT: call void @llvm.donothing() 1173; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> [[TMP1]], i32 7) 1174; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP2]], zeroinitializer 1175; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> [[A0:%.*]], i32 7) 1176; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8 1177; CHECK-NEXT: ret <4 x i32> [[RES]] 1178; 1179 %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1] 1180 ret <4 x i32> %res 1181} 1182declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone 1183 1184 1185define <2 x i64> @test_x86_sse2_psrli_q(<2 x i64> %a0) #0 { 1186; CHECK-LABEL: @test_x86_sse2_psrli_q( 1187; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 1188; CHECK-NEXT: call void @llvm.donothing() 1189; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> [[TMP1]], i32 7) 1190; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP2]], zeroinitializer 1191; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> [[A0:%.*]], i32 7) 1192; CHECK-NEXT: store <2 x i64> [[TMP3]], ptr @__msan_retval_tls, align 8 1193; CHECK-NEXT: ret <2 x i64> [[RES]] 1194; 1195 %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] 1196 ret <2 x i64> %res 1197} 1198declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone 1199 1200 1201define <8 x i16> @test_x86_sse2_psrli_w(<8 x i16> %a0) #0 { 1202; CHECK-LABEL: @test_x86_sse2_psrli_w( 1203; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8 1204; CHECK-NEXT: call void @llvm.donothing() 1205; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> [[TMP1]], i32 7) 1206; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i16> [[TMP2]], zeroinitializer 1207; CHECK-NEXT: [[RES:%.*]] = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> [[A0:%.*]], i32 7) 1208; CHECK-NEXT: store <8 x i16> [[TMP3]], ptr @__msan_retval_tls, align 8 1209; CHECK-NEXT: ret <8 x i16> [[RES]] 1210; 1211 %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1] 1212 ret <8 x i16> %res 1213} 1214declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone 1215 1216 1217define i32 @test_x86_sse2_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) #0 { 1218; CHECK-LABEL: @test_x86_sse2_ucomieq_sd( 1219; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 1220; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 1221; CHECK-NEXT: call void @llvm.donothing() 1222; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] 1223; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 1224; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 1225; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32 1226; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]]) 1227; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8 1228; CHECK-NEXT: ret i32 [[RES]] 1229; 1230 %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 1231 ret i32 %res 1232} 1233declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone 1234 1235 1236define i32 @test_x86_sse2_ucomige_sd(<2 x double> %a0, <2 x double> %a1) #0 { 1237; CHECK-LABEL: @test_x86_sse2_ucomige_sd( 1238; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 1239; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 1240; CHECK-NEXT: call void @llvm.donothing() 1241; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] 1242; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 1243; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 1244; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32 1245; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]]) 1246; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8 1247; CHECK-NEXT: ret i32 [[RES]] 1248; 1249 %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 1250 ret i32 %res 1251} 1252declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readnone 1253 1254 1255define i32 @test_x86_sse2_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) #0 { 1256; CHECK-LABEL: @test_x86_sse2_ucomigt_sd( 1257; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 1258; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 1259; CHECK-NEXT: call void @llvm.donothing() 1260; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] 1261; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 1262; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 1263; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32 1264; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]]) 1265; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8 1266; CHECK-NEXT: ret i32 [[RES]] 1267; 1268 %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 1269 ret i32 %res 1270} 1271declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readnone 1272 1273 1274define i32 @test_x86_sse2_ucomile_sd(<2 x double> %a0, <2 x double> %a1) #0 { 1275; CHECK-LABEL: @test_x86_sse2_ucomile_sd( 1276; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 1277; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 1278; CHECK-NEXT: call void @llvm.donothing() 1279; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] 1280; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 1281; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 1282; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32 1283; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]]) 1284; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8 1285; CHECK-NEXT: ret i32 [[RES]] 1286; 1287 %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 1288 ret i32 %res 1289} 1290declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readnone 1291 1292 1293define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) #0 { 1294; CHECK-LABEL: @test_x86_sse2_ucomilt_sd( 1295; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 1296; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 1297; CHECK-NEXT: call void @llvm.donothing() 1298; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] 1299; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 1300; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 1301; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32 1302; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]]) 1303; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8 1304; CHECK-NEXT: ret i32 [[RES]] 1305; 1306 %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 1307 ret i32 %res 1308} 1309declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readnone 1310 1311 1312define i32 @test_x86_sse2_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) #0 { 1313; CHECK-LABEL: @test_x86_sse2_ucomineq_sd( 1314; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8 1315; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 1316; CHECK-NEXT: call void @llvm.donothing() 1317; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] 1318; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0 1319; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0 1320; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32 1321; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]]) 1322; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8 1323; CHECK-NEXT: ret i32 [[RES]] 1324; 1325 %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 1326 ret i32 %res 1327} 1328declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind readnone 1329 1330define void @test_x86_sse2_pause() #0 { 1331; CHECK-LABEL: @test_x86_sse2_pause( 1332; CHECK-NEXT: call void @llvm.donothing() 1333; CHECK-NEXT: tail call void @llvm.x86.sse2.pause() 1334; CHECK-NEXT: ret void 1335; 1336 tail call void @llvm.x86.sse2.pause() 1337 ret void 1338} 1339declare void @llvm.x86.sse2.pause() nounwind 1340 1341define void @lfence() nounwind #0 { 1342; CHECK-LABEL: @lfence( 1343; CHECK-NEXT: call void @llvm.donothing() 1344; CHECK-NEXT: tail call void @llvm.x86.sse2.lfence() 1345; CHECK-NEXT: ret void 1346; 1347 tail call void @llvm.x86.sse2.lfence() 1348 ret void 1349} 1350declare void @llvm.x86.sse2.lfence() nounwind 1351 1352define void @mfence() nounwind #0 { 1353; CHECK-LABEL: @mfence( 1354; CHECK-NEXT: call void @llvm.donothing() 1355; CHECK-NEXT: tail call void @llvm.x86.sse2.mfence() 1356; CHECK-NEXT: ret void 1357; 1358 tail call void @llvm.x86.sse2.mfence() 1359 ret void 1360} 1361declare void @llvm.x86.sse2.mfence() nounwind 1362 1363define void @clflush(ptr %p) nounwind #0 { 1364; CHECK-LABEL: @clflush( 1365; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 1366; CHECK-NEXT: call void @llvm.donothing() 1367; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 1368; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]] 1369; CHECK: 2: 1370; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]] 1371; CHECK-NEXT: unreachable 1372; CHECK: 3: 1373; CHECK-NEXT: tail call void @llvm.x86.sse2.clflush(ptr [[P:%.*]]) 1374; CHECK-NEXT: ret void 1375; 1376 tail call void @llvm.x86.sse2.clflush(ptr %p) 1377 ret void 1378} 1379declare void @llvm.x86.sse2.clflush(ptr) nounwind 1380 1381attributes #0 = { sanitize_memory } 1382