1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -passes=instcombine -mtriple=x86_64-unknown-unknown -S | FileCheck %s 3target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 4 5define float @test_rcp_ss_0(float %a) { 6; CHECK-LABEL: @test_rcp_ss_0( 7; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i64 0 8; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> [[TMP1]]) 9; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i64 0 10; CHECK-NEXT: ret float [[TMP3]] 11; 12 %1 = insertelement <4 x float> undef, float %a, i32 0 13 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 14 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 15 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 16 %5 = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %4) 17 %6 = extractelement <4 x float> %5, i32 0 18 ret float %6 19} 20 21define float @test_rcp_ss_1(float %a) { 22; CHECK-LABEL: @test_rcp_ss_1( 23; CHECK-NEXT: ret float 1.000000e+00 24; 25 %1 = insertelement <4 x float> undef, float %a, i32 0 26 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 27 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 28 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 29 %5 = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %4) 30 %6 = extractelement <4 x float> %5, i32 1 31 ret float %6 32} 33 34define float @test_sqrt_ss_0(float %a) { 35; CHECK-LABEL: @test_sqrt_ss_0( 36; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.sqrt.f32(float [[A:%.*]]) 37; CHECK-NEXT: ret float [[TMP1]] 38; 39 %1 = insertelement <4 x float> undef, float %a, i32 0 40 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 41 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 42 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 43 %5 = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %4) 44 %6 = extractelement <4 x float> %5, i32 0 45 ret float %6 46} 47 48define float @test_sqrt_ss_2(float %a) { 49; CHECK-LABEL: @test_sqrt_ss_2( 50; CHECK-NEXT: ret float 2.000000e+00 51; 52 %1 = insertelement <4 x float> undef, float %a, i32 0 53 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 54 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 55 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 56 %5 = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %4) 57 %6 = extractelement <4 x float> %5, i32 2 58 ret float %6 59} 60 61define float @test_rsqrt_ss_0(float %a) { 62; CHECK-LABEL: @test_rsqrt_ss_0( 63; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i64 0 64; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> [[TMP1]]) 65; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i64 0 66; CHECK-NEXT: ret float [[TMP3]] 67; 68 %1 = insertelement <4 x float> undef, float %a, i32 0 69 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 70 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 71 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 72 %5 = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %4) 73 %6 = extractelement <4 x float> %5, i32 0 74 ret float %6 75} 76 77define float @test_rsqrt_ss_3(float %a) { 78; CHECK-LABEL: @test_rsqrt_ss_3( 79; CHECK-NEXT: ret float 3.000000e+00 80; 81 %1 = insertelement <4 x float> undef, float %a, i32 0 82 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 83 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 84 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 85 %5 = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %4) 86 %6 = extractelement <4 x float> %5, i32 3 87 ret float %6 88} 89 90define float @test_add_ss_0(float %a, float %b) { 91; CHECK-LABEL: @test_add_ss_0( 92; CHECK-NEXT: [[TMP1:%.*]] = fadd float [[A:%.*]], [[B:%.*]] 93; CHECK-NEXT: ret float [[TMP1]] 94; 95 %1 = insertelement <4 x float> undef, float %a, i32 0 96 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 97 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 98 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 99 %5 = insertelement <4 x float> undef, float %b, i32 0 100 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 101 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 102 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 103 %9 = tail call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %4, <4 x float> %8) 104 %r = extractelement <4 x float> %9, i32 0 105 ret float %r 106} 107 108define float @test_add_ss_1(float %a, float %b) { 109; CHECK-LABEL: @test_add_ss_1( 110; CHECK-NEXT: ret float 1.000000e+00 111; 112 %1 = insertelement <4 x float> undef, float %a, i32 0 113 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 114 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 115 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 116 %5 = insertelement <4 x float> undef, float %b, i32 0 117 %6 = tail call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %4, <4 x float> %5) 118 %7 = extractelement <4 x float> %6, i32 1 119 ret float %7 120} 121 122define float @test_add_ss_2(float %a) { 123; CHECK-LABEL: @test_add_ss_2( 124; CHECK-NEXT: [[TMP1:%.*]] = fadd float [[A:%.*]], [[A]] 125; CHECK-NEXT: ret float [[TMP1]] 126; 127 %1 = insertelement <4 x float> zeroinitializer, float %a, i32 0 128 %2 = tail call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %1, <4 x float> %1) 129 %3 = extractelement <4 x float> %2, i32 0 130 ret float %3 131} 132 133define float @test_sub_ss_0(float %a, float %b) { 134; CHECK-LABEL: @test_sub_ss_0( 135; CHECK-NEXT: [[TMP1:%.*]] = fsub float [[A:%.*]], [[B:%.*]] 136; CHECK-NEXT: ret float [[TMP1]] 137; 138 %1 = insertelement <4 x float> undef, float %a, i32 0 139 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 140 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 141 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 142 %5 = insertelement <4 x float> undef, float %b, i32 0 143 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 144 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 145 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 146 %9 = tail call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %4, <4 x float> %8) 147 %r = extractelement <4 x float> %9, i32 0 148 ret float %r 149} 150 151define float @test_sub_ss_2(float %a, float %b) { 152; CHECK-LABEL: @test_sub_ss_2( 153; CHECK-NEXT: ret float 2.000000e+00 154; 155 %1 = insertelement <4 x float> undef, float %a, i32 0 156 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 157 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 158 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 159 %5 = insertelement <4 x float> undef, float %b, i32 0 160 %6 = tail call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %4, <4 x float> %5) 161 %7 = extractelement <4 x float> %6, i32 2 162 ret float %7 163} 164 165define float @test_sub_ss_3(float %a) { 166; CHECK-LABEL: @test_sub_ss_3( 167; CHECK-NEXT: [[TMP1:%.*]] = fsub float [[A:%.*]], [[A]] 168; CHECK-NEXT: ret float [[TMP1]] 169; 170 %1 = insertelement <4 x float> zeroinitializer, float %a, i32 0 171 %2 = tail call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %1, <4 x float> %1) 172 %3 = extractelement <4 x float> %2, i32 0 173 ret float %3 174} 175 176define float @test_mul_ss_0(float %a, float %b) { 177; CHECK-LABEL: @test_mul_ss_0( 178; CHECK-NEXT: [[TMP1:%.*]] = fmul float [[A:%.*]], [[B:%.*]] 179; CHECK-NEXT: ret float [[TMP1]] 180; 181 %1 = insertelement <4 x float> undef, float %a, i32 0 182 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 183 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 184 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 185 %5 = insertelement <4 x float> undef, float %b, i32 0 186 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 187 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 188 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 189 %9 = tail call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %4, <4 x float> %8) 190 %r = extractelement <4 x float> %9, i32 0 191 ret float %r 192} 193 194define float @test_mul_ss_3(float %a, float %b) { 195; CHECK-LABEL: @test_mul_ss_3( 196; CHECK-NEXT: ret float 3.000000e+00 197; 198 %1 = insertelement <4 x float> undef, float %a, i32 0 199 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 200 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 201 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 202 %5 = insertelement <4 x float> undef, float %b, i32 0 203 %6 = tail call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %4, <4 x float> %5) 204 %7 = extractelement <4 x float> %6, i32 3 205 ret float %7 206} 207 208define float @test_mul_ss_4(float %a) { 209; CHECK-LABEL: @test_mul_ss_4( 210; CHECK-NEXT: [[TMP1:%.*]] = fmul float [[A:%.*]], [[A]] 211; CHECK-NEXT: ret float [[TMP1]] 212; 213 %1 = insertelement <4 x float> zeroinitializer, float %a, i32 0 214 %2 = tail call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %1, <4 x float> %1) 215 %3 = extractelement <4 x float> %2, i32 0 216 ret float %3 217} 218 219define float @test_div_ss_0(float %a, float %b) { 220; CHECK-LABEL: @test_div_ss_0( 221; CHECK-NEXT: [[TMP1:%.*]] = fdiv float [[A:%.*]], [[B:%.*]] 222; CHECK-NEXT: ret float [[TMP1]] 223; 224 %1 = insertelement <4 x float> undef, float %a, i32 0 225 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 226 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 227 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 228 %5 = insertelement <4 x float> undef, float %b, i32 0 229 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 230 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 231 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 232 %9 = tail call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %4, <4 x float> %8) 233 %r = extractelement <4 x float> %9, i32 0 234 ret float %r 235} 236 237define float @test_div_ss_1(float %a, float %b) { 238; CHECK-LABEL: @test_div_ss_1( 239; CHECK-NEXT: ret float 1.000000e+00 240; 241 %1 = insertelement <4 x float> undef, float %a, i32 0 242 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 243 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 244 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 245 %5 = insertelement <4 x float> undef, float %b, i32 0 246 %6 = tail call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %4, <4 x float> %5) 247 %7 = extractelement <4 x float> %6, i32 1 248 ret float %7 249} 250 251define float @test_div_ss_2(float %a) { 252; CHECK-LABEL: @test_div_ss_2( 253; CHECK-NEXT: [[TMP1:%.*]] = fdiv float [[A:%.*]], [[A]] 254; CHECK-NEXT: ret float [[TMP1]] 255; 256 %1 = insertelement <4 x float> zeroinitializer, float %a, i32 0 257 %2 = tail call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %1, <4 x float> %1) 258 %3 = extractelement <4 x float> %2, i32 0 259 ret float %3 260} 261 262define <4 x float> @test_min_ss(<4 x float> %a, <4 x float> %b) { 263; CHECK-LABEL: @test_min_ss( 264; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) 265; CHECK-NEXT: ret <4 x float> [[TMP1]] 266; 267 %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1 268 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 269 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 270 %4 = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a, <4 x float> %3) 271 ret <4 x float> %4 272} 273 274define float @test_min_ss_0(float %a, float %b) { 275; CHECK-LABEL: @test_min_ss_0( 276; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i64 0 277; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0 278; CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]]) 279; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP3]], i64 0 280; CHECK-NEXT: ret float [[TMP4]] 281; 282 %1 = insertelement <4 x float> undef, float %a, i32 0 283 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 284 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 285 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 286 %5 = insertelement <4 x float> undef, float %b, i32 0 287 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 288 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 289 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 290 %9 = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %4, <4 x float> %8) 291 %10 = extractelement <4 x float> %9, i32 0 292 ret float %10 293} 294 295define float @test_min_ss_2(float %a, float %b) { 296; CHECK-LABEL: @test_min_ss_2( 297; CHECK-NEXT: ret float 2.000000e+00 298; 299 %1 = insertelement <4 x float> undef, float %a, i32 0 300 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 301 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 302 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 303 %5 = insertelement <4 x float> undef, float %b, i32 0 304 %6 = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %4, <4 x float> %5) 305 %7 = extractelement <4 x float> %6, i32 2 306 ret float %7 307} 308 309define float @test_min_ss_3(float %a) { 310; CHECK-LABEL: @test_min_ss_3( 311; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> <float poison, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float [[A:%.*]], i64 0 312; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> [[TMP1]], <4 x float> [[TMP1]]) 313; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i64 0 314; CHECK-NEXT: ret float [[TMP3]] 315; 316 %1 = insertelement <4 x float> zeroinitializer, float %a, i32 0 317 %2 = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %1, <4 x float> %1) 318 %3 = extractelement <4 x float> %2, i32 0 319 ret float %3 320} 321 322define <4 x float> @test_max_ss(<4 x float> %a, <4 x float> %b) { 323; CHECK-LABEL: @test_max_ss( 324; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) 325; CHECK-NEXT: ret <4 x float> [[TMP1]] 326; 327 %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1 328 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 329 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 330 %4 = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a, <4 x float> %3) 331 ret <4 x float> %4 332} 333 334define float @test_max_ss_0(float %a, float %b) { 335; CHECK-LABEL: @test_max_ss_0( 336; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i64 0 337; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0 338; CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]]) 339; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP3]], i64 0 340; CHECK-NEXT: ret float [[TMP4]] 341; 342 %1 = insertelement <4 x float> undef, float %a, i32 0 343 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 344 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 345 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 346 %5 = insertelement <4 x float> undef, float %b, i32 0 347 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 348 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 349 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 350 %9 = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %4, <4 x float> %8) 351 %10 = extractelement <4 x float> %9, i32 0 352 ret float %10 353} 354 355define float @test_max_ss_3(float %a, float %b) { 356; CHECK-LABEL: @test_max_ss_3( 357; CHECK-NEXT: ret float 3.000000e+00 358; 359 %1 = insertelement <4 x float> undef, float %a, i32 0 360 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 361 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 362 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 363 %5 = insertelement <4 x float> undef, float %b, i32 0 364 %6 = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %4, <4 x float> %5) 365 %7 = extractelement <4 x float> %6, i32 3 366 ret float %7 367} 368 369define float @test_max_ss_4(float %a) { 370; CHECK-LABEL: @test_max_ss_4( 371; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> <float poison, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float [[A:%.*]], i64 0 372; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> [[TMP1]], <4 x float> [[TMP1]]) 373; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i64 0 374; CHECK-NEXT: ret float [[TMP3]] 375; 376 %1 = insertelement <4 x float> zeroinitializer, float %a, i32 0 377 %2 = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %1, <4 x float> %1) 378 %3 = extractelement <4 x float> %2, i32 0 379 ret float %3 380} 381 382define <4 x float> @test_cmp_ss(<4 x float> %a, <4 x float> %b) { 383; CHECK-LABEL: @test_cmp_ss( 384; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i8 0) 385; CHECK-NEXT: ret <4 x float> [[TMP1]] 386; 387 %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1 388 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 389 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 390 %4 = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a, <4 x float> %3, i8 0) 391 ret <4 x float> %4 392} 393 394define float @test_cmp_ss_0(float %a, float %b) { 395; CHECK-LABEL: @test_cmp_ss_0( 396; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i64 0 397; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0 398; CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]], i8 0) 399; CHECK-NEXT: [[R:%.*]] = extractelement <4 x float> [[TMP3]], i64 0 400; CHECK-NEXT: ret float [[R]] 401; 402 %1 = insertelement <4 x float> undef, float %a, i32 0 403 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 404 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 405 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 406 %5 = insertelement <4 x float> undef, float %b, i32 0 407 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 408 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 409 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 410 %9 = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %4, <4 x float> %8, i8 0) 411 %r = extractelement <4 x float> %9, i32 0 412 ret float %r 413} 414 415define float @test_cmp_ss_1(float %a, float %b) { 416; CHECK-LABEL: @test_cmp_ss_1( 417; CHECK-NEXT: ret float 1.000000e+00 418; 419 %1 = insertelement <4 x float> undef, float %a, i32 0 420 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 421 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 422 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 423 %5 = insertelement <4 x float> undef, float %b, i32 0 424 %6 = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %4, <4 x float> %5, i8 0) 425 %7 = extractelement <4 x float> %6, i32 1 426 ret float %7 427} 428 429define float @test_cmp_ss_2(float %a) { 430; CHECK-LABEL: @test_cmp_ss_2( 431; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> <float poison, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float [[A:%.*]], i64 0 432; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> [[TMP1]], <4 x float> [[TMP1]], i8 3) 433; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i64 0 434; CHECK-NEXT: ret float [[TMP3]] 435; 436 %1 = insertelement <4 x float> zeroinitializer, float %a, i32 0 437 %2 = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %1, <4 x float> %1, i8 3) 438 %3 = extractelement <4 x float> %2, i32 0 439 ret float %3 440} 441 442define i32 @test_comieq_ss_0(float %a, float %b) { 443; CHECK-LABEL: @test_comieq_ss_0( 444; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i64 0 445; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0 446; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comieq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]]) 447; CHECK-NEXT: ret i32 [[TMP3]] 448; 449 %1 = insertelement <4 x float> undef, float %a, i32 0 450 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 451 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 452 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 453 %5 = insertelement <4 x float> undef, float %b, i32 0 454 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 455 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 456 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 457 %9 = tail call i32 @llvm.x86.sse.comieq.ss(<4 x float> %4, <4 x float> %8) 458 ret i32 %9 459} 460 461define i32 @test_comige_ss_0(float %a, float %b) { 462; CHECK-LABEL: @test_comige_ss_0( 463; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i64 0 464; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0 465; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comige.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]]) 466; CHECK-NEXT: ret i32 [[TMP3]] 467; 468 %1 = insertelement <4 x float> undef, float %a, i32 0 469 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 470 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 471 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 472 %5 = insertelement <4 x float> undef, float %b, i32 0 473 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 474 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 475 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 476 %9 = tail call i32 @llvm.x86.sse.comige.ss(<4 x float> %4, <4 x float> %8) 477 ret i32 %9 478} 479 480define i32 @test_comigt_ss_0(float %a, float %b) { 481; CHECK-LABEL: @test_comigt_ss_0( 482; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i64 0 483; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0 484; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comigt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]]) 485; CHECK-NEXT: ret i32 [[TMP3]] 486; 487 %1 = insertelement <4 x float> undef, float %a, i32 0 488 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 489 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 490 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 491 %5 = insertelement <4 x float> undef, float %b, i32 0 492 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 493 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 494 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 495 %9 = tail call i32 @llvm.x86.sse.comigt.ss(<4 x float> %4, <4 x float> %8) 496 ret i32 %9 497} 498 499define i32 @test_comile_ss_0(float %a, float %b) { 500; CHECK-LABEL: @test_comile_ss_0( 501; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i64 0 502; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0 503; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comile.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]]) 504; CHECK-NEXT: ret i32 [[TMP3]] 505; 506 %1 = insertelement <4 x float> undef, float %a, i32 0 507 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 508 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 509 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 510 %5 = insertelement <4 x float> undef, float %b, i32 0 511 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 512 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 513 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 514 %9 = tail call i32 @llvm.x86.sse.comile.ss(<4 x float> %4, <4 x float> %8) 515 ret i32 %9 516} 517 518define i32 @test_comilt_ss_0(float %a, float %b) { 519; CHECK-LABEL: @test_comilt_ss_0( 520; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i64 0 521; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0 522; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comilt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]]) 523; CHECK-NEXT: ret i32 [[TMP3]] 524; 525 %1 = insertelement <4 x float> undef, float %a, i32 0 526 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 527 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 528 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 529 %5 = insertelement <4 x float> undef, float %b, i32 0 530 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 531 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 532 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 533 %9 = tail call i32 @llvm.x86.sse.comilt.ss(<4 x float> %4, <4 x float> %8) 534 ret i32 %9 535} 536 537define i32 @test_comineq_ss_0(float %a, float %b) { 538; CHECK-LABEL: @test_comineq_ss_0( 539; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i64 0 540; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0 541; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comineq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]]) 542; CHECK-NEXT: ret i32 [[TMP3]] 543; 544 %1 = insertelement <4 x float> undef, float %a, i32 0 545 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 546 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 547 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 548 %5 = insertelement <4 x float> undef, float %b, i32 0 549 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 550 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 551 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 552 %9 = tail call i32 @llvm.x86.sse.comineq.ss(<4 x float> %4, <4 x float> %8) 553 ret i32 %9 554} 555 556define i32 @test_ucomieq_ss_0(float %a, float %b) { 557; CHECK-LABEL: @test_ucomieq_ss_0( 558; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i64 0 559; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0 560; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]]) 561; CHECK-NEXT: ret i32 [[TMP3]] 562; 563 %1 = insertelement <4 x float> undef, float %a, i32 0 564 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 565 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 566 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 567 %5 = insertelement <4 x float> undef, float %b, i32 0 568 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 569 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 570 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 571 %9 = tail call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %4, <4 x float> %8) 572 ret i32 %9 573} 574 575define i32 @test_ucomige_ss_0(float %a, float %b) { 576; CHECK-LABEL: @test_ucomige_ss_0( 577; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i64 0 578; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0 579; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomige.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]]) 580; CHECK-NEXT: ret i32 [[TMP3]] 581; 582 %1 = insertelement <4 x float> undef, float %a, i32 0 583 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 584 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 585 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 586 %5 = insertelement <4 x float> undef, float %b, i32 0 587 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 588 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 589 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 590 %9 = tail call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %4, <4 x float> %8) 591 ret i32 %9 592} 593 594define i32 @test_ucomigt_ss_0(float %a, float %b) { 595; CHECK-LABEL: @test_ucomigt_ss_0( 596; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i64 0 597; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0 598; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]]) 599; CHECK-NEXT: ret i32 [[TMP3]] 600; 601 %1 = insertelement <4 x float> undef, float %a, i32 0 602 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 603 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 604 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 605 %5 = insertelement <4 x float> undef, float %b, i32 0 606 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 607 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 608 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 609 %9 = tail call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %4, <4 x float> %8) 610 ret i32 %9 611} 612 613define i32 @test_ucomile_ss_0(float %a, float %b) { 614; CHECK-LABEL: @test_ucomile_ss_0( 615; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i64 0 616; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0 617; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomile.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]]) 618; CHECK-NEXT: ret i32 [[TMP3]] 619; 620 %1 = insertelement <4 x float> undef, float %a, i32 0 621 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 622 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 623 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 624 %5 = insertelement <4 x float> undef, float %b, i32 0 625 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 626 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 627 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 628 %9 = tail call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %4, <4 x float> %8) 629 ret i32 %9 630} 631 632define i32 @test_ucomilt_ss_0(float %a, float %b) { 633; CHECK-LABEL: @test_ucomilt_ss_0( 634; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i64 0 635; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0 636; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]]) 637; CHECK-NEXT: ret i32 [[TMP3]] 638; 639 %1 = insertelement <4 x float> undef, float %a, i32 0 640 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 641 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 642 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 643 %5 = insertelement <4 x float> undef, float %b, i32 0 644 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 645 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 646 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 647 %9 = tail call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %4, <4 x float> %8) 648 ret i32 %9 649} 650 651define i32 @test_ucomineq_ss_0(float %a, float %b) { 652; CHECK-LABEL: @test_ucomineq_ss_0( 653; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i64 0 654; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0 655; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]]) 656; CHECK-NEXT: ret i32 [[TMP3]] 657; 658 %1 = insertelement <4 x float> undef, float %a, i32 0 659 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 660 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 661 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 662 %5 = insertelement <4 x float> undef, float %b, i32 0 663 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 664 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 665 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 666 %9 = tail call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %4, <4 x float> %8) 667 ret i32 %9 668} 669 670declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) 671declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) 672declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) 673 674declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) 675declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) 676declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) 677declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) 678declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) 679declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) 680declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) 681 682declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) 683declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>) 684declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>) 685declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>) 686declare i32 @llvm.x86.sse.comilt.ss(<4 x float>, <4 x float>) 687declare i32 @llvm.x86.sse.comineq.ss(<4 x float>, <4 x float>) 688 689declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) 690declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>) 691declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>) 692declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>) 693declare i32 @llvm.x86.sse.ucomilt.ss(<4 x float>, <4 x float>) 694declare i32 @llvm.x86.sse.ucomineq.ss(<4 x float>, <4 x float>) 695