1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefix=SSE 3; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.2 | FileCheck %s --check-prefix=SSE 4; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefix=AVX 5; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefix=AVX 6; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512dq,+avx512bw | FileCheck %s --check-prefix=AVX 7 8define <4 x i32> @add_op1_constant(ptr %p) nounwind { 9; SSE-LABEL: add_op1_constant: 10; SSE: # %bb.0: 11; SSE-NEXT: movl (%rdi), %eax 12; SSE-NEXT: addl $42, %eax 13; SSE-NEXT: movd %eax, %xmm0 14; SSE-NEXT: retq 15; 16; AVX-LABEL: add_op1_constant: 17; AVX: # %bb.0: 18; AVX-NEXT: movl (%rdi), %eax 19; AVX-NEXT: addl $42, %eax 20; AVX-NEXT: vmovd %eax, %xmm0 21; AVX-NEXT: retq 22 %x = load i32, ptr %p 23 %b = add i32 %x, 42 24 %r = insertelement <4 x i32> undef, i32 %b, i32 0 25 ret <4 x i32> %r 26} 27 28; Code and data size may increase by using more vector ops, so the transform is disabled here. 29 30define <4 x i32> @add_op1_constant_optsize(ptr %p) nounwind optsize { 31; SSE-LABEL: add_op1_constant_optsize: 32; SSE: # %bb.0: 33; SSE-NEXT: movl (%rdi), %eax 34; SSE-NEXT: addl $42, %eax 35; SSE-NEXT: movd %eax, %xmm0 36; SSE-NEXT: retq 37; 38; AVX-LABEL: add_op1_constant_optsize: 39; AVX: # %bb.0: 40; AVX-NEXT: movl (%rdi), %eax 41; AVX-NEXT: addl $42, %eax 42; AVX-NEXT: vmovd %eax, %xmm0 43; AVX-NEXT: retq 44 %x = load i32, ptr %p 45 %b = add i32 %x, 42 46 %r = insertelement <4 x i32> undef, i32 %b, i32 0 47 ret <4 x i32> %r 48} 49 50define <8 x i16> @add_op0_constant(ptr %p) nounwind { 51; SSE-LABEL: add_op0_constant: 52; SSE: # %bb.0: 53; SSE-NEXT: movzwl (%rdi), %eax 54; SSE-NEXT: addl $42, %eax 55; SSE-NEXT: movd %eax, %xmm0 56; SSE-NEXT: retq 57; 58; AVX-LABEL: add_op0_constant: 59; AVX: # %bb.0: 60; AVX-NEXT: movzwl (%rdi), %eax 61; AVX-NEXT: addl $42, %eax 62; AVX-NEXT: vmovd %eax, %xmm0 63; AVX-NEXT: retq 64 %x = load i16, ptr %p 65 %b = add i16 42, %x 66 %r = insertelement <8 x i16> undef, i16 %b, i32 0 67 ret <8 x i16> %r 68} 69 70define <2 x i64> @sub_op0_constant(ptr %p) nounwind { 71; SSE-LABEL: sub_op0_constant: 72; SSE: # %bb.0: 73; SSE-NEXT: movl $42, %eax 74; SSE-NEXT: subq (%rdi), %rax 75; SSE-NEXT: movq %rax, %xmm0 76; SSE-NEXT: retq 77; 78; AVX-LABEL: sub_op0_constant: 79; AVX: # %bb.0: 80; AVX-NEXT: movl $42, %eax 81; AVX-NEXT: subq (%rdi), %rax 82; AVX-NEXT: vmovq %rax, %xmm0 83; AVX-NEXT: retq 84 %x = load i64, ptr %p 85 %b = sub i64 42, %x 86 %r = insertelement <2 x i64> undef, i64 %b, i32 0 87 ret <2 x i64> %r 88} 89 90define <16 x i8> @sub_op1_constant(ptr %p) nounwind { 91; SSE-LABEL: sub_op1_constant: 92; SSE: # %bb.0: 93; SSE-NEXT: movzbl (%rdi), %eax 94; SSE-NEXT: addb $-42, %al 95; SSE-NEXT: movzbl %al, %eax 96; SSE-NEXT: movd %eax, %xmm0 97; SSE-NEXT: retq 98; 99; AVX-LABEL: sub_op1_constant: 100; AVX: # %bb.0: 101; AVX-NEXT: movzbl (%rdi), %eax 102; AVX-NEXT: addb $-42, %al 103; AVX-NEXT: vmovd %eax, %xmm0 104; AVX-NEXT: retq 105 %x = load i8, ptr %p 106 %b = sub i8 %x, 42 107 %r = insertelement <16 x i8> undef, i8 %b, i32 0 108 ret <16 x i8> %r 109} 110 111define <4 x i32> @mul_op1_constant(ptr %p) nounwind { 112; SSE-LABEL: mul_op1_constant: 113; SSE: # %bb.0: 114; SSE-NEXT: imull $42, (%rdi), %eax 115; SSE-NEXT: movd %eax, %xmm0 116; SSE-NEXT: retq 117; 118; AVX-LABEL: mul_op1_constant: 119; AVX: # %bb.0: 120; AVX-NEXT: imull $42, (%rdi), %eax 121; AVX-NEXT: vmovd %eax, %xmm0 122; AVX-NEXT: retq 123 %x = load i32, ptr %p 124 %b = mul i32 %x, 42 125 %r = insertelement <4 x i32> undef, i32 %b, i32 0 126 ret <4 x i32> %r 127} 128 129define <8 x i16> @mul_op0_constant(ptr %p) nounwind { 130; SSE-LABEL: mul_op0_constant: 131; SSE: # %bb.0: 132; SSE-NEXT: movzwl (%rdi), %eax 133; SSE-NEXT: imull $42, %eax, %eax 134; SSE-NEXT: movd %eax, %xmm0 135; SSE-NEXT: retq 136; 137; AVX-LABEL: mul_op0_constant: 138; AVX: # %bb.0: 139; AVX-NEXT: movzwl (%rdi), %eax 140; AVX-NEXT: imull $42, %eax, %eax 141; AVX-NEXT: vmovd %eax, %xmm0 142; AVX-NEXT: retq 143 %x = load i16, ptr %p 144 %b = mul i16 42, %x 145 %r = insertelement <8 x i16> undef, i16 %b, i32 0 146 ret <8 x i16> %r 147} 148 149define <4 x i32> @and_op1_constant(ptr %p) nounwind { 150; SSE-LABEL: and_op1_constant: 151; SSE: # %bb.0: 152; SSE-NEXT: movl (%rdi), %eax 153; SSE-NEXT: andl $42, %eax 154; SSE-NEXT: movd %eax, %xmm0 155; SSE-NEXT: retq 156; 157; AVX-LABEL: and_op1_constant: 158; AVX: # %bb.0: 159; AVX-NEXT: movl (%rdi), %eax 160; AVX-NEXT: andl $42, %eax 161; AVX-NEXT: vmovd %eax, %xmm0 162; AVX-NEXT: retq 163 %x = load i32, ptr %p 164 %b = and i32 %x, 42 165 %r = insertelement <4 x i32> undef, i32 %b, i32 0 166 ret <4 x i32> %r 167} 168 169define <2 x i64> @or_op1_constant(ptr %p) nounwind { 170; SSE-LABEL: or_op1_constant: 171; SSE: # %bb.0: 172; SSE-NEXT: movq (%rdi), %rax 173; SSE-NEXT: orq $42, %rax 174; SSE-NEXT: movq %rax, %xmm0 175; SSE-NEXT: retq 176; 177; AVX-LABEL: or_op1_constant: 178; AVX: # %bb.0: 179; AVX-NEXT: movq (%rdi), %rax 180; AVX-NEXT: orq $42, %rax 181; AVX-NEXT: vmovq %rax, %xmm0 182; AVX-NEXT: retq 183 %x = load i64, ptr %p 184 %b = or i64 %x, 42 185 %r = insertelement <2 x i64> undef, i64 %b, i32 0 186 ret <2 x i64> %r 187} 188 189define <8 x i16> @xor_op1_constant(ptr %p) nounwind { 190; SSE-LABEL: xor_op1_constant: 191; SSE: # %bb.0: 192; SSE-NEXT: movzwl (%rdi), %eax 193; SSE-NEXT: xorl $42, %eax 194; SSE-NEXT: movd %eax, %xmm0 195; SSE-NEXT: retq 196; 197; AVX-LABEL: xor_op1_constant: 198; AVX: # %bb.0: 199; AVX-NEXT: movzwl (%rdi), %eax 200; AVX-NEXT: xorl $42, %eax 201; AVX-NEXT: vmovd %eax, %xmm0 202; AVX-NEXT: retq 203 %x = load i16, ptr %p 204 %b = xor i16 %x, 42 205 %r = insertelement <8 x i16> undef, i16 %b, i32 0 206 ret <8 x i16> %r 207} 208 209define <4 x i32> @shl_op0_constant(ptr %p) nounwind { 210; SSE-LABEL: shl_op0_constant: 211; SSE: # %bb.0: 212; SSE-NEXT: movzbl (%rdi), %ecx 213; SSE-NEXT: movl $42, %eax 214; SSE-NEXT: shll %cl, %eax 215; SSE-NEXT: movd %eax, %xmm0 216; SSE-NEXT: retq 217; 218; AVX-LABEL: shl_op0_constant: 219; AVX: # %bb.0: 220; AVX-NEXT: movzbl (%rdi), %ecx 221; AVX-NEXT: movl $42, %eax 222; AVX-NEXT: shll %cl, %eax 223; AVX-NEXT: vmovd %eax, %xmm0 224; AVX-NEXT: retq 225 %x = load i32, ptr %p 226 %b = shl i32 42, %x 227 %r = insertelement <4 x i32> undef, i32 %b, i32 0 228 ret <4 x i32> %r 229} 230 231define <16 x i8> @shl_op1_constant(ptr %p) nounwind { 232; SSE-LABEL: shl_op1_constant: 233; SSE: # %bb.0: 234; SSE-NEXT: movzbl (%rdi), %eax 235; SSE-NEXT: shlb $5, %al 236; SSE-NEXT: movzbl %al, %eax 237; SSE-NEXT: movd %eax, %xmm0 238; SSE-NEXT: retq 239; 240; AVX-LABEL: shl_op1_constant: 241; AVX: # %bb.0: 242; AVX-NEXT: movzbl (%rdi), %eax 243; AVX-NEXT: shlb $5, %al 244; AVX-NEXT: vmovd %eax, %xmm0 245; AVX-NEXT: retq 246 %x = load i8, ptr %p 247 %b = shl i8 %x, 5 248 %r = insertelement <16 x i8> undef, i8 %b, i32 0 249 ret <16 x i8> %r 250} 251 252define <2 x i64> @lshr_op0_constant(ptr %p) nounwind { 253; SSE-LABEL: lshr_op0_constant: 254; SSE: # %bb.0: 255; SSE-NEXT: movzbl (%rdi), %ecx 256; SSE-NEXT: movl $42, %eax 257; SSE-NEXT: shrq %cl, %rax 258; SSE-NEXT: movd %eax, %xmm0 259; SSE-NEXT: retq 260; 261; AVX-LABEL: lshr_op0_constant: 262; AVX: # %bb.0: 263; AVX-NEXT: movzbl (%rdi), %ecx 264; AVX-NEXT: movl $42, %eax 265; AVX-NEXT: shrq %cl, %rax 266; AVX-NEXT: vmovd %eax, %xmm0 267; AVX-NEXT: retq 268 %x = load i64, ptr %p 269 %b = lshr i64 42, %x 270 %r = insertelement <2 x i64> undef, i64 %b, i32 0 271 ret <2 x i64> %r 272} 273 274define <4 x i32> @lshr_op1_constant(ptr %p) nounwind { 275; SSE-LABEL: lshr_op1_constant: 276; SSE: # %bb.0: 277; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 278; SSE-NEXT: psrld $17, %xmm0 279; SSE-NEXT: retq 280; 281; AVX-LABEL: lshr_op1_constant: 282; AVX: # %bb.0: 283; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 284; AVX-NEXT: vpsrld $17, %xmm0, %xmm0 285; AVX-NEXT: retq 286 %x = load i32, ptr %p 287 %b = lshr i32 %x, 17 288 %r = insertelement <4 x i32> undef, i32 %b, i32 0 289 ret <4 x i32> %r 290} 291 292define <8 x i16> @ashr_op0_constant(ptr %p) nounwind { 293; SSE-LABEL: ashr_op0_constant: 294; SSE: # %bb.0: 295; SSE-NEXT: movzbl (%rdi), %ecx 296; SSE-NEXT: movl $-42, %eax 297; SSE-NEXT: sarl %cl, %eax 298; SSE-NEXT: movd %eax, %xmm0 299; SSE-NEXT: retq 300; 301; AVX-LABEL: ashr_op0_constant: 302; AVX: # %bb.0: 303; AVX-NEXT: movzbl (%rdi), %ecx 304; AVX-NEXT: movl $-42, %eax 305; AVX-NEXT: sarl %cl, %eax 306; AVX-NEXT: vmovd %eax, %xmm0 307; AVX-NEXT: retq 308 %x = load i16, ptr %p 309 %b = ashr i16 -42, %x 310 %r = insertelement <8 x i16> undef, i16 %b, i32 0 311 ret <8 x i16> %r 312} 313 314define <8 x i16> @ashr_op1_constant(ptr %p) nounwind { 315; SSE-LABEL: ashr_op1_constant: 316; SSE: # %bb.0: 317; SSE-NEXT: movswl (%rdi), %eax 318; SSE-NEXT: movd %eax, %xmm0 319; SSE-NEXT: psrad $7, %xmm0 320; SSE-NEXT: retq 321; 322; AVX-LABEL: ashr_op1_constant: 323; AVX: # %bb.0: 324; AVX-NEXT: movswl (%rdi), %eax 325; AVX-NEXT: vmovd %eax, %xmm0 326; AVX-NEXT: vpsrad $7, %xmm0, %xmm0 327; AVX-NEXT: retq 328 %x = load i16, ptr %p 329 %b = ashr i16 %x, 7 330 %r = insertelement <8 x i16> undef, i16 %b, i32 0 331 ret <8 x i16> %r 332} 333 334define <4 x i32> @sdiv_op0_constant(ptr %p) nounwind { 335; SSE-LABEL: sdiv_op0_constant: 336; SSE: # %bb.0: 337; SSE-NEXT: movl $42, %eax 338; SSE-NEXT: xorl %edx, %edx 339; SSE-NEXT: idivl (%rdi) 340; SSE-NEXT: movd %eax, %xmm0 341; SSE-NEXT: retq 342; 343; AVX-LABEL: sdiv_op0_constant: 344; AVX: # %bb.0: 345; AVX-NEXT: movl $42, %eax 346; AVX-NEXT: xorl %edx, %edx 347; AVX-NEXT: idivl (%rdi) 348; AVX-NEXT: vmovd %eax, %xmm0 349; AVX-NEXT: retq 350 %x = load i32, ptr %p 351 %b = sdiv i32 42, %x 352 %r = insertelement <4 x i32> undef, i32 %b, i32 0 353 ret <4 x i32> %r 354} 355 356define <8 x i16> @sdiv_op1_constant(ptr %p) nounwind { 357; SSE-LABEL: sdiv_op1_constant: 358; SSE: # %bb.0: 359; SSE-NEXT: movswl (%rdi), %eax 360; SSE-NEXT: imull $-15603, %eax, %ecx # imm = 0xC30D 361; SSE-NEXT: shrl $16, %ecx 362; SSE-NEXT: addl %eax, %ecx 363; SSE-NEXT: movzwl %cx, %eax 364; SSE-NEXT: movswl %ax, %ecx 365; SSE-NEXT: shrl $15, %eax 366; SSE-NEXT: sarl $5, %ecx 367; SSE-NEXT: addl %eax, %ecx 368; SSE-NEXT: movd %ecx, %xmm0 369; SSE-NEXT: retq 370; 371; AVX-LABEL: sdiv_op1_constant: 372; AVX: # %bb.0: 373; AVX-NEXT: movswl (%rdi), %eax 374; AVX-NEXT: imull $-15603, %eax, %ecx # imm = 0xC30D 375; AVX-NEXT: shrl $16, %ecx 376; AVX-NEXT: addl %eax, %ecx 377; AVX-NEXT: movzwl %cx, %eax 378; AVX-NEXT: movswl %ax, %ecx 379; AVX-NEXT: shrl $15, %eax 380; AVX-NEXT: sarl $5, %ecx 381; AVX-NEXT: addl %eax, %ecx 382; AVX-NEXT: vmovd %ecx, %xmm0 383; AVX-NEXT: retq 384 %x = load i16, ptr %p 385 %b = sdiv i16 %x, 42 386 %r = insertelement <8 x i16> undef, i16 %b, i32 0 387 ret <8 x i16> %r 388} 389 390define <8 x i16> @srem_op0_constant(ptr %p) nounwind { 391; SSE-LABEL: srem_op0_constant: 392; SSE: # %bb.0: 393; SSE-NEXT: movw $42, %ax 394; SSE-NEXT: xorl %edx, %edx 395; SSE-NEXT: idivw (%rdi) 396; SSE-NEXT: # kill: def $dx killed $dx def $edx 397; SSE-NEXT: movd %edx, %xmm0 398; SSE-NEXT: retq 399; 400; AVX-LABEL: srem_op0_constant: 401; AVX: # %bb.0: 402; AVX-NEXT: movw $42, %ax 403; AVX-NEXT: xorl %edx, %edx 404; AVX-NEXT: idivw (%rdi) 405; AVX-NEXT: # kill: def $dx killed $dx def $edx 406; AVX-NEXT: vmovd %edx, %xmm0 407; AVX-NEXT: retq 408 %x = load i16, ptr %p 409 %b = srem i16 42, %x 410 %r = insertelement <8 x i16> undef, i16 %b, i32 0 411 ret <8 x i16> %r 412} 413 414define <4 x i32> @srem_op1_constant(ptr %p) nounwind { 415; SSE-LABEL: srem_op1_constant: 416; SSE: # %bb.0: 417; SSE-NEXT: movslq (%rdi), %rax 418; SSE-NEXT: imulq $818089009, %rax, %rcx # imm = 0x30C30C31 419; SSE-NEXT: movq %rcx, %rdx 420; SSE-NEXT: shrq $63, %rdx 421; SSE-NEXT: sarq $35, %rcx 422; SSE-NEXT: addl %edx, %ecx 423; SSE-NEXT: imull $42, %ecx, %ecx 424; SSE-NEXT: subl %ecx, %eax 425; SSE-NEXT: movd %eax, %xmm0 426; SSE-NEXT: retq 427; 428; AVX-LABEL: srem_op1_constant: 429; AVX: # %bb.0: 430; AVX-NEXT: movslq (%rdi), %rax 431; AVX-NEXT: imulq $818089009, %rax, %rcx # imm = 0x30C30C31 432; AVX-NEXT: movq %rcx, %rdx 433; AVX-NEXT: shrq $63, %rdx 434; AVX-NEXT: sarq $35, %rcx 435; AVX-NEXT: addl %edx, %ecx 436; AVX-NEXT: imull $42, %ecx, %ecx 437; AVX-NEXT: subl %ecx, %eax 438; AVX-NEXT: vmovd %eax, %xmm0 439; AVX-NEXT: retq 440 %x = load i32, ptr %p 441 %b = srem i32 %x, 42 442 %r = insertelement <4 x i32> undef, i32 %b, i32 0 443 ret <4 x i32> %r 444} 445 446define <4 x i32> @udiv_op0_constant(ptr %p) nounwind { 447; SSE-LABEL: udiv_op0_constant: 448; SSE: # %bb.0: 449; SSE-NEXT: movl $42, %eax 450; SSE-NEXT: xorl %edx, %edx 451; SSE-NEXT: divl (%rdi) 452; SSE-NEXT: movd %eax, %xmm0 453; SSE-NEXT: retq 454; 455; AVX-LABEL: udiv_op0_constant: 456; AVX: # %bb.0: 457; AVX-NEXT: movl $42, %eax 458; AVX-NEXT: xorl %edx, %edx 459; AVX-NEXT: divl (%rdi) 460; AVX-NEXT: vmovd %eax, %xmm0 461; AVX-NEXT: retq 462 %x = load i32, ptr %p 463 %b = udiv i32 42, %x 464 %r = insertelement <4 x i32> undef, i32 %b, i32 0 465 ret <4 x i32> %r 466} 467 468define <2 x i64> @udiv_op1_constant(ptr %p) nounwind { 469; SSE-LABEL: udiv_op1_constant: 470; SSE: # %bb.0: 471; SSE-NEXT: movq (%rdi), %rax 472; SSE-NEXT: shrq %rax 473; SSE-NEXT: movabsq $-4392081922311798003, %rcx # imm = 0xC30C30C30C30C30D 474; SSE-NEXT: mulq %rcx 475; SSE-NEXT: movq %rdx, %xmm0 476; SSE-NEXT: psrlq $4, %xmm0 477; SSE-NEXT: retq 478; 479; AVX-LABEL: udiv_op1_constant: 480; AVX: # %bb.0: 481; AVX-NEXT: movq (%rdi), %rax 482; AVX-NEXT: shrq %rax 483; AVX-NEXT: movabsq $-4392081922311798003, %rcx # imm = 0xC30C30C30C30C30D 484; AVX-NEXT: mulq %rcx 485; AVX-NEXT: vmovq %rdx, %xmm0 486; AVX-NEXT: vpsrlq $4, %xmm0, %xmm0 487; AVX-NEXT: retq 488 %x = load i64, ptr %p 489 %b = udiv i64 %x, 42 490 %r = insertelement <2 x i64> undef, i64 %b, i32 0 491 ret <2 x i64> %r 492} 493 494define <2 x i64> @urem_op0_constant(ptr %p) nounwind { 495; SSE-LABEL: urem_op0_constant: 496; SSE: # %bb.0: 497; SSE-NEXT: movl $42, %eax 498; SSE-NEXT: xorl %edx, %edx 499; SSE-NEXT: divq (%rdi) 500; SSE-NEXT: movq %rdx, %xmm0 501; SSE-NEXT: retq 502; 503; AVX-LABEL: urem_op0_constant: 504; AVX: # %bb.0: 505; AVX-NEXT: movl $42, %eax 506; AVX-NEXT: xorl %edx, %edx 507; AVX-NEXT: divq (%rdi) 508; AVX-NEXT: vmovq %rdx, %xmm0 509; AVX-NEXT: retq 510 %x = load i64, ptr %p 511 %b = urem i64 42, %x 512 %r = insertelement <2 x i64> undef, i64 %b, i32 0 513 ret <2 x i64> %r 514} 515 516define <16 x i8> @urem_op1_constant(ptr %p) nounwind { 517; SSE-LABEL: urem_op1_constant: 518; SSE: # %bb.0: 519; SSE-NEXT: movzbl (%rdi), %eax 520; SSE-NEXT: movl %eax, %ecx 521; SSE-NEXT: shrb %cl 522; SSE-NEXT: movzbl %cl, %ecx 523; SSE-NEXT: imull $49, %ecx, %ecx 524; SSE-NEXT: shrl $10, %ecx 525; SSE-NEXT: imull $42, %ecx, %ecx 526; SSE-NEXT: subb %cl, %al 527; SSE-NEXT: movzbl %al, %eax 528; SSE-NEXT: movd %eax, %xmm0 529; SSE-NEXT: retq 530; 531; AVX-LABEL: urem_op1_constant: 532; AVX: # %bb.0: 533; AVX-NEXT: movzbl (%rdi), %eax 534; AVX-NEXT: movl %eax, %ecx 535; AVX-NEXT: shrb %cl 536; AVX-NEXT: movzbl %cl, %ecx 537; AVX-NEXT: imull $49, %ecx, %ecx 538; AVX-NEXT: shrl $10, %ecx 539; AVX-NEXT: imull $42, %ecx, %ecx 540; AVX-NEXT: subb %cl, %al 541; AVX-NEXT: vmovd %eax, %xmm0 542; AVX-NEXT: retq 543 %x = load i8, ptr %p 544 %b = urem i8 %x, 42 545 %r = insertelement <16 x i8> undef, i8 %b, i32 0 546 ret <16 x i8> %r 547} 548 549define <4 x float> @fadd_op1_constant(ptr %p) nounwind { 550; SSE-LABEL: fadd_op1_constant: 551; SSE: # %bb.0: 552; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 553; SSE-NEXT: addss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 554; SSE-NEXT: retq 555; 556; AVX-LABEL: fadd_op1_constant: 557; AVX: # %bb.0: 558; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 559; AVX-NEXT: vaddss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 560; AVX-NEXT: retq 561 %x = load float, ptr %p 562 %b = fadd float %x, 42.0 563 %r = insertelement <4 x float> undef, float %b, i32 0 564 ret <4 x float> %r 565} 566 567define <2 x double> @fsub_op1_constant(ptr %p) nounwind { 568; SSE-LABEL: fsub_op1_constant: 569; SSE: # %bb.0: 570; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 571; SSE-NEXT: addsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 572; SSE-NEXT: retq 573; 574; AVX-LABEL: fsub_op1_constant: 575; AVX: # %bb.0: 576; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 577; AVX-NEXT: vaddsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 578; AVX-NEXT: retq 579 %x = load double, ptr %p 580 %b = fsub double %x, 42.0 581 %r = insertelement <2 x double> undef, double %b, i32 0 582 ret <2 x double> %r 583} 584 585define <4 x float> @fsub_op0_constant(ptr %p) nounwind { 586; SSE-LABEL: fsub_op0_constant: 587; SSE: # %bb.0: 588; SSE-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 589; SSE-NEXT: subss (%rdi), %xmm0 590; SSE-NEXT: retq 591; 592; AVX-LABEL: fsub_op0_constant: 593; AVX: # %bb.0: 594; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 595; AVX-NEXT: vsubss (%rdi), %xmm0, %xmm0 596; AVX-NEXT: retq 597 %x = load float, ptr %p 598 %b = fsub float 42.0, %x 599 %r = insertelement <4 x float> undef, float %b, i32 0 600 ret <4 x float> %r 601} 602 603define <4 x float> @fmul_op1_constant(ptr %p) nounwind { 604; SSE-LABEL: fmul_op1_constant: 605; SSE: # %bb.0: 606; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 607; SSE-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 608; SSE-NEXT: retq 609; 610; AVX-LABEL: fmul_op1_constant: 611; AVX: # %bb.0: 612; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 613; AVX-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 614; AVX-NEXT: retq 615 %x = load float, ptr %p 616 %b = fmul float %x, 42.0 617 %r = insertelement <4 x float> undef, float %b, i32 0 618 ret <4 x float> %r 619} 620 621define <2 x double> @fdiv_op1_constant(ptr %p) nounwind { 622; SSE-LABEL: fdiv_op1_constant: 623; SSE: # %bb.0: 624; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 625; SSE-NEXT: divsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 626; SSE-NEXT: retq 627; 628; AVX-LABEL: fdiv_op1_constant: 629; AVX: # %bb.0: 630; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 631; AVX-NEXT: vdivsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 632; AVX-NEXT: retq 633 %x = load double, ptr %p 634 %b = fdiv double %x, 42.0 635 %r = insertelement <2 x double> undef, double %b, i32 0 636 ret <2 x double> %r 637} 638 639define <4 x float> @fdiv_op0_constant(ptr %p) nounwind { 640; SSE-LABEL: fdiv_op0_constant: 641; SSE: # %bb.0: 642; SSE-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 643; SSE-NEXT: divss (%rdi), %xmm0 644; SSE-NEXT: retq 645; 646; AVX-LABEL: fdiv_op0_constant: 647; AVX: # %bb.0: 648; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 649; AVX-NEXT: vdivss (%rdi), %xmm0, %xmm0 650; AVX-NEXT: retq 651 %x = load float, ptr %p 652 %b = fdiv float 42.0, %x 653 %r = insertelement <4 x float> undef, float %b, i32 0 654 ret <4 x float> %r 655} 656 657define <4 x float> @frem_op1_constant(ptr %p) nounwind { 658; SSE-LABEL: frem_op1_constant: 659; SSE: # %bb.0: 660; SSE-NEXT: pushq %rax 661; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 662; SSE-NEXT: movss {{.*#+}} xmm1 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 663; SSE-NEXT: callq fmodf@PLT 664; SSE-NEXT: popq %rax 665; SSE-NEXT: retq 666; 667; AVX-LABEL: frem_op1_constant: 668; AVX: # %bb.0: 669; AVX-NEXT: pushq %rax 670; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 671; AVX-NEXT: vmovss {{.*#+}} xmm1 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 672; AVX-NEXT: callq fmodf@PLT 673; AVX-NEXT: popq %rax 674; AVX-NEXT: retq 675 %x = load float, ptr %p 676 %b = frem float %x, 42.0 677 %r = insertelement <4 x float> undef, float %b, i32 0 678 ret <4 x float> %r 679} 680 681define <2 x double> @frem_op0_constant(ptr %p) nounwind { 682; SSE-LABEL: frem_op0_constant: 683; SSE: # %bb.0: 684; SSE-NEXT: pushq %rax 685; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 686; SSE-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 687; SSE-NEXT: callq fmod@PLT 688; SSE-NEXT: popq %rax 689; SSE-NEXT: retq 690; 691; AVX-LABEL: frem_op0_constant: 692; AVX: # %bb.0: 693; AVX-NEXT: pushq %rax 694; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 695; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 696; AVX-NEXT: callq fmod@PLT 697; AVX-NEXT: popq %rax 698; AVX-NEXT: retq 699 %x = load double, ptr %p 700 %b = frem double 42.0, %x 701 %r = insertelement <2 x double> undef, double %b, i32 0 702 ret <2 x double> %r 703} 704 705; Try again with 256-bit types. 706 707define <8 x i32> @add_op1_constant_v8i32(ptr %p) nounwind { 708; SSE-LABEL: add_op1_constant_v8i32: 709; SSE: # %bb.0: 710; SSE-NEXT: movl (%rdi), %eax 711; SSE-NEXT: addl $42, %eax 712; SSE-NEXT: movd %eax, %xmm0 713; SSE-NEXT: retq 714; 715; AVX-LABEL: add_op1_constant_v8i32: 716; AVX: # %bb.0: 717; AVX-NEXT: movl (%rdi), %eax 718; AVX-NEXT: addl $42, %eax 719; AVX-NEXT: vmovd %eax, %xmm0 720; AVX-NEXT: retq 721 %x = load i32, ptr %p 722 %b = add i32 %x, 42 723 %r = insertelement <8 x i32> undef, i32 %b, i32 0 724 ret <8 x i32> %r 725} 726 727define <4 x i64> @sub_op0_constant_v4i64(ptr %p) nounwind { 728; SSE-LABEL: sub_op0_constant_v4i64: 729; SSE: # %bb.0: 730; SSE-NEXT: movl $42, %eax 731; SSE-NEXT: subq (%rdi), %rax 732; SSE-NEXT: movq %rax, %xmm0 733; SSE-NEXT: retq 734; 735; AVX-LABEL: sub_op0_constant_v4i64: 736; AVX: # %bb.0: 737; AVX-NEXT: movl $42, %eax 738; AVX-NEXT: subq (%rdi), %rax 739; AVX-NEXT: vmovq %rax, %xmm0 740; AVX-NEXT: retq 741 %x = load i64, ptr %p 742 %b = sub i64 42, %x 743 %r = insertelement <4 x i64> undef, i64 %b, i32 0 744 ret <4 x i64> %r 745} 746 747define <8 x i32> @mul_op1_constant_v8i32(ptr %p) nounwind { 748; SSE-LABEL: mul_op1_constant_v8i32: 749; SSE: # %bb.0: 750; SSE-NEXT: imull $42, (%rdi), %eax 751; SSE-NEXT: movd %eax, %xmm0 752; SSE-NEXT: retq 753; 754; AVX-LABEL: mul_op1_constant_v8i32: 755; AVX: # %bb.0: 756; AVX-NEXT: imull $42, (%rdi), %eax 757; AVX-NEXT: vmovd %eax, %xmm0 758; AVX-NEXT: retq 759 %x = load i32, ptr %p 760 %b = mul i32 %x, 42 761 %r = insertelement <8 x i32> undef, i32 %b, i32 0 762 ret <8 x i32> %r 763} 764 765define <4 x i64> @or_op1_constant_v4i64(ptr %p) nounwind { 766; SSE-LABEL: or_op1_constant_v4i64: 767; SSE: # %bb.0: 768; SSE-NEXT: movq (%rdi), %rax 769; SSE-NEXT: orq $42, %rax 770; SSE-NEXT: movq %rax, %xmm0 771; SSE-NEXT: retq 772; 773; AVX-LABEL: or_op1_constant_v4i64: 774; AVX: # %bb.0: 775; AVX-NEXT: movq (%rdi), %rax 776; AVX-NEXT: orq $42, %rax 777; AVX-NEXT: vmovq %rax, %xmm0 778; AVX-NEXT: retq 779 %x = load i64, ptr %p 780 %b = or i64 %x, 42 781 %r = insertelement <4 x i64> undef, i64 %b, i32 0 782 ret <4 x i64> %r 783} 784 785; Try again with 512-bit types. 786 787define <16 x i32> @add_op1_constant_v16i32(ptr %p) nounwind { 788; SSE-LABEL: add_op1_constant_v16i32: 789; SSE: # %bb.0: 790; SSE-NEXT: movl (%rdi), %eax 791; SSE-NEXT: addl $42, %eax 792; SSE-NEXT: movd %eax, %xmm0 793; SSE-NEXT: retq 794; 795; AVX-LABEL: add_op1_constant_v16i32: 796; AVX: # %bb.0: 797; AVX-NEXT: movl (%rdi), %eax 798; AVX-NEXT: addl $42, %eax 799; AVX-NEXT: vmovd %eax, %xmm0 800; AVX-NEXT: retq 801 %x = load i32, ptr %p 802 %b = add i32 %x, 42 803 %r = insertelement <16 x i32> undef, i32 %b, i32 0 804 ret <16 x i32> %r 805} 806 807define <8 x i64> @sub_op0_constant_v8i64(ptr %p) nounwind { 808; SSE-LABEL: sub_op0_constant_v8i64: 809; SSE: # %bb.0: 810; SSE-NEXT: movl $42, %eax 811; SSE-NEXT: subq (%rdi), %rax 812; SSE-NEXT: movq %rax, %xmm0 813; SSE-NEXT: retq 814; 815; AVX-LABEL: sub_op0_constant_v8i64: 816; AVX: # %bb.0: 817; AVX-NEXT: movl $42, %eax 818; AVX-NEXT: subq (%rdi), %rax 819; AVX-NEXT: vmovq %rax, %xmm0 820; AVX-NEXT: retq 821 %x = load i64, ptr %p 822 %b = sub i64 42, %x 823 %r = insertelement <8 x i64> undef, i64 %b, i32 0 824 ret <8 x i64> %r 825} 826 827define <16 x i32> @mul_op1_constant_v16i32(ptr %p) nounwind { 828; SSE-LABEL: mul_op1_constant_v16i32: 829; SSE: # %bb.0: 830; SSE-NEXT: imull $42, (%rdi), %eax 831; SSE-NEXT: movd %eax, %xmm0 832; SSE-NEXT: retq 833; 834; AVX-LABEL: mul_op1_constant_v16i32: 835; AVX: # %bb.0: 836; AVX-NEXT: imull $42, (%rdi), %eax 837; AVX-NEXT: vmovd %eax, %xmm0 838; AVX-NEXT: retq 839 %x = load i32, ptr %p 840 %b = mul i32 %x, 42 841 %r = insertelement <16 x i32> undef, i32 %b, i32 0 842 ret <16 x i32> %r 843} 844 845define <8 x i64> @or_op1_constant_v8i64(ptr %p) nounwind { 846; SSE-LABEL: or_op1_constant_v8i64: 847; SSE: # %bb.0: 848; SSE-NEXT: movq (%rdi), %rax 849; SSE-NEXT: orq $42, %rax 850; SSE-NEXT: movq %rax, %xmm0 851; SSE-NEXT: retq 852; 853; AVX-LABEL: or_op1_constant_v8i64: 854; AVX: # %bb.0: 855; AVX-NEXT: movq (%rdi), %rax 856; AVX-NEXT: orq $42, %rax 857; AVX-NEXT: vmovq %rax, %xmm0 858; AVX-NEXT: retq 859 %x = load i64, ptr %p 860 %b = or i64 %x, 42 861 %r = insertelement <8 x i64> undef, i64 %b, i32 0 862 ret <8 x i64> %r 863} 864 865