1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefixes=CHECK,X86 3; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefixes=CHECK,X64 4; RUN: llc < %s -mtriple=i686-unknown -mattr=+xop | FileCheck %s --check-prefixes=CHECK,X86XOP 5; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx512bw,+avx512vl,+gfni | FileCheck %s --check-prefixes=CHECK,GFNI,X86GFNI 6; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512bw,+avx512vl,+gfni | FileCheck %s --check-prefixes=CHECK,GFNI,X64GFNI 7 8; These tests just check that the plumbing is in place for @llvm.bitreverse. The 9; actual output is massive at the moment as llvm.bitreverse is not yet legal. 10 11declare <2 x i16> @llvm.bitreverse.v2i16(<2 x i16>) readnone 12 13define <2 x i16> @test_bitreverse_v2i16(<2 x i16> %a) nounwind { 14; X86-LABEL: test_bitreverse_v2i16: 15; X86: # %bb.0: 16; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx 17; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax 18; X86-NEXT: rolw $8, %ax 19; X86-NEXT: movl %eax, %edx 20; X86-NEXT: andl $3855, %edx # imm = 0xF0F 21; X86-NEXT: shll $4, %edx 22; X86-NEXT: shrl $4, %eax 23; X86-NEXT: andl $3855, %eax # imm = 0xF0F 24; X86-NEXT: orl %edx, %eax 25; X86-NEXT: movl %eax, %edx 26; X86-NEXT: andl $13107, %edx # imm = 0x3333 27; X86-NEXT: shrl $2, %eax 28; X86-NEXT: andl $13107, %eax # imm = 0x3333 29; X86-NEXT: leal (%eax,%edx,4), %eax 30; X86-NEXT: movl %eax, %edx 31; X86-NEXT: andl $21845, %edx # imm = 0x5555 32; X86-NEXT: shrl %eax 33; X86-NEXT: andl $21845, %eax # imm = 0x5555 34; X86-NEXT: leal (%eax,%edx,2), %eax 35; X86-NEXT: rolw $8, %cx 36; X86-NEXT: movl %ecx, %edx 37; X86-NEXT: andl $3855, %edx # imm = 0xF0F 38; X86-NEXT: shll $4, %edx 39; X86-NEXT: shrl $4, %ecx 40; X86-NEXT: andl $3855, %ecx # imm = 0xF0F 41; X86-NEXT: orl %edx, %ecx 42; X86-NEXT: movl %ecx, %edx 43; X86-NEXT: andl $13107, %edx # imm = 0x3333 44; X86-NEXT: shrl $2, %ecx 45; X86-NEXT: andl $13107, %ecx # imm = 0x3333 46; X86-NEXT: leal (%ecx,%edx,4), %ecx 47; X86-NEXT: movl %ecx, %edx 48; X86-NEXT: andl $21845, %edx # imm = 0x5555 49; X86-NEXT: shrl %ecx 50; X86-NEXT: andl $21845, %ecx # imm = 0x5555 51; X86-NEXT: leal (%ecx,%edx,2), %edx 52; X86-NEXT: # kill: def $ax killed $ax killed $eax 53; X86-NEXT: # kill: def $dx killed $dx killed $edx 54; X86-NEXT: retl 55; 56; X64-LABEL: test_bitreverse_v2i16: 57; X64: # %bb.0: 58; X64-NEXT: movdqa %xmm0, %xmm1 59; X64-NEXT: psrlw $8, %xmm1 60; X64-NEXT: psllw $8, %xmm0 61; X64-NEXT: por %xmm1, %xmm0 62; X64-NEXT: movdqa %xmm0, %xmm1 63; X64-NEXT: psrlw $4, %xmm1 64; X64-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 65; X64-NEXT: pand %xmm2, %xmm1 66; X64-NEXT: pand %xmm2, %xmm0 67; X64-NEXT: psllw $4, %xmm0 68; X64-NEXT: por %xmm1, %xmm0 69; X64-NEXT: movdqa %xmm0, %xmm1 70; X64-NEXT: psrlw $2, %xmm1 71; X64-NEXT: movdqa {{.*#+}} xmm2 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 72; X64-NEXT: pand %xmm2, %xmm1 73; X64-NEXT: pand %xmm2, %xmm0 74; X64-NEXT: psllw $2, %xmm0 75; X64-NEXT: por %xmm1, %xmm0 76; X64-NEXT: movdqa %xmm0, %xmm1 77; X64-NEXT: psrlw $1, %xmm1 78; X64-NEXT: movdqa {{.*#+}} xmm2 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85] 79; X64-NEXT: pand %xmm2, %xmm1 80; X64-NEXT: pand %xmm2, %xmm0 81; X64-NEXT: paddb %xmm0, %xmm0 82; X64-NEXT: por %xmm1, %xmm0 83; X64-NEXT: retq 84; 85; X86XOP-LABEL: test_bitreverse_v2i16: 86; X86XOP: # %bb.0: 87; X86XOP-NEXT: vpperm {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0, %xmm0 88; X86XOP-NEXT: retl 89; 90; X86GFNI-LABEL: test_bitreverse_v2i16: 91; X86GFNI: # %bb.0: 92; X86GFNI-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] 93; X86GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}{1to2}, %xmm0, %xmm0 94; X86GFNI-NEXT: retl 95; 96; X64GFNI-LABEL: test_bitreverse_v2i16: 97; X64GFNI: # %bb.0: 98; X64GFNI-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] 99; X64GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 100; X64GFNI-NEXT: retq 101 %b = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> %a) 102 ret <2 x i16> %b 103} 104 105declare i64 @llvm.bitreverse.i64(i64) readnone 106 107define i64 @test_bitreverse_i64(i64 %a) nounwind { 108; X86-LABEL: test_bitreverse_i64: 109; X86: # %bb.0: 110; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 111; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 112; X86-NEXT: bswapl %eax 113; X86-NEXT: movl %eax, %edx 114; X86-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F 115; X86-NEXT: shll $4, %edx 116; X86-NEXT: shrl $4, %eax 117; X86-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F 118; X86-NEXT: orl %edx, %eax 119; X86-NEXT: movl %eax, %edx 120; X86-NEXT: andl $858993459, %edx # imm = 0x33333333 121; X86-NEXT: shrl $2, %eax 122; X86-NEXT: andl $858993459, %eax # imm = 0x33333333 123; X86-NEXT: leal (%eax,%edx,4), %eax 124; X86-NEXT: movl %eax, %edx 125; X86-NEXT: andl $1431655765, %edx # imm = 0x55555555 126; X86-NEXT: shrl %eax 127; X86-NEXT: andl $1431655765, %eax # imm = 0x55555555 128; X86-NEXT: leal (%eax,%edx,2), %eax 129; X86-NEXT: bswapl %ecx 130; X86-NEXT: movl %ecx, %edx 131; X86-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F 132; X86-NEXT: shll $4, %edx 133; X86-NEXT: shrl $4, %ecx 134; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 135; X86-NEXT: orl %edx, %ecx 136; X86-NEXT: movl %ecx, %edx 137; X86-NEXT: andl $858993459, %edx # imm = 0x33333333 138; X86-NEXT: shrl $2, %ecx 139; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 140; X86-NEXT: leal (%ecx,%edx,4), %ecx 141; X86-NEXT: movl %ecx, %edx 142; X86-NEXT: andl $1431655765, %edx # imm = 0x55555555 143; X86-NEXT: shrl %ecx 144; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 145; X86-NEXT: leal (%ecx,%edx,2), %edx 146; X86-NEXT: retl 147; 148; X64-LABEL: test_bitreverse_i64: 149; X64: # %bb.0: 150; X64-NEXT: bswapq %rdi 151; X64-NEXT: movq %rdi, %rax 152; X64-NEXT: shrq $4, %rax 153; X64-NEXT: movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F 154; X64-NEXT: andq %rcx, %rax 155; X64-NEXT: andq %rcx, %rdi 156; X64-NEXT: shlq $4, %rdi 157; X64-NEXT: orq %rax, %rdi 158; X64-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333 159; X64-NEXT: movq %rdi, %rcx 160; X64-NEXT: andq %rax, %rcx 161; X64-NEXT: shrq $2, %rdi 162; X64-NEXT: andq %rax, %rdi 163; X64-NEXT: leaq (%rdi,%rcx,4), %rax 164; X64-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555 165; X64-NEXT: movq %rax, %rdx 166; X64-NEXT: andq %rcx, %rdx 167; X64-NEXT: shrq %rax 168; X64-NEXT: andq %rcx, %rax 169; X64-NEXT: leaq (%rax,%rdx,2), %rax 170; X64-NEXT: retq 171; 172; X86XOP-LABEL: test_bitreverse_i64: 173; X86XOP: # %bb.0: 174; X86XOP-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 175; X86XOP-NEXT: vpperm {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0, %xmm0 176; X86XOP-NEXT: vmovd %xmm0, %eax 177; X86XOP-NEXT: vpextrd $1, %xmm0, %edx 178; X86XOP-NEXT: retl 179; 180; X86GFNI-LABEL: test_bitreverse_i64: 181; X86GFNI: # %bb.0: 182; X86GFNI-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 183; X86GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}{1to2}, %xmm0, %xmm0 184; X86GFNI-NEXT: vpextrd $1, %xmm0, %eax 185; X86GFNI-NEXT: bswapl %eax 186; X86GFNI-NEXT: vmovd %xmm0, %edx 187; X86GFNI-NEXT: bswapl %edx 188; X86GFNI-NEXT: retl 189; 190; X64GFNI-LABEL: test_bitreverse_i64: 191; X64GFNI: # %bb.0: 192; X64GFNI-NEXT: vmovq %rdi, %xmm0 193; X64GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 194; X64GFNI-NEXT: vmovq %xmm0, %rax 195; X64GFNI-NEXT: bswapq %rax 196; X64GFNI-NEXT: retq 197 %b = call i64 @llvm.bitreverse.i64(i64 %a) 198 ret i64 %b 199} 200 201declare i32 @llvm.bitreverse.i32(i32) readnone 202 203define i32 @test_bitreverse_i32(i32 %a) nounwind { 204; X86-LABEL: test_bitreverse_i32: 205; X86: # %bb.0: 206; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 207; X86-NEXT: bswapl %eax 208; X86-NEXT: movl %eax, %ecx 209; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 210; X86-NEXT: shll $4, %ecx 211; X86-NEXT: shrl $4, %eax 212; X86-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F 213; X86-NEXT: orl %ecx, %eax 214; X86-NEXT: movl %eax, %ecx 215; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 216; X86-NEXT: shrl $2, %eax 217; X86-NEXT: andl $858993459, %eax # imm = 0x33333333 218; X86-NEXT: leal (%eax,%ecx,4), %eax 219; X86-NEXT: movl %eax, %ecx 220; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 221; X86-NEXT: shrl %eax 222; X86-NEXT: andl $1431655765, %eax # imm = 0x55555555 223; X86-NEXT: leal (%eax,%ecx,2), %eax 224; X86-NEXT: retl 225; 226; X64-LABEL: test_bitreverse_i32: 227; X64: # %bb.0: 228; X64-NEXT: # kill: def $edi killed $edi def $rdi 229; X64-NEXT: bswapl %edi 230; X64-NEXT: movl %edi, %eax 231; X64-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F 232; X64-NEXT: shll $4, %eax 233; X64-NEXT: shrl $4, %edi 234; X64-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F 235; X64-NEXT: orl %eax, %edi 236; X64-NEXT: movl %edi, %eax 237; X64-NEXT: andl $858993459, %eax # imm = 0x33333333 238; X64-NEXT: shrl $2, %edi 239; X64-NEXT: andl $858993459, %edi # imm = 0x33333333 240; X64-NEXT: leal (%rdi,%rax,4), %eax 241; X64-NEXT: movl %eax, %ecx 242; X64-NEXT: andl $1431655765, %ecx # imm = 0x55555555 243; X64-NEXT: shrl %eax 244; X64-NEXT: andl $1431655765, %eax # imm = 0x55555555 245; X64-NEXT: leal (%rax,%rcx,2), %eax 246; X64-NEXT: retq 247; 248; X86XOP-LABEL: test_bitreverse_i32: 249; X86XOP: # %bb.0: 250; X86XOP-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 251; X86XOP-NEXT: vpperm {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0, %xmm0 252; X86XOP-NEXT: vmovd %xmm0, %eax 253; X86XOP-NEXT: retl 254; 255; X86GFNI-LABEL: test_bitreverse_i32: 256; X86GFNI: # %bb.0: 257; X86GFNI-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 258; X86GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}{1to2}, %xmm0, %xmm0 259; X86GFNI-NEXT: vmovd %xmm0, %eax 260; X86GFNI-NEXT: bswapl %eax 261; X86GFNI-NEXT: retl 262; 263; X64GFNI-LABEL: test_bitreverse_i32: 264; X64GFNI: # %bb.0: 265; X64GFNI-NEXT: vmovd %edi, %xmm0 266; X64GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 267; X64GFNI-NEXT: vmovd %xmm0, %eax 268; X64GFNI-NEXT: bswapl %eax 269; X64GFNI-NEXT: retq 270 %b = call i32 @llvm.bitreverse.i32(i32 %a) 271 ret i32 %b 272} 273 274declare i24 @llvm.bitreverse.i24(i24) readnone 275 276define i24 @test_bitreverse_i24(i24 %a) nounwind { 277; X86-LABEL: test_bitreverse_i24: 278; X86: # %bb.0: 279; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 280; X86-NEXT: bswapl %eax 281; X86-NEXT: movl %eax, %ecx 282; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 283; X86-NEXT: shll $4, %ecx 284; X86-NEXT: shrl $4, %eax 285; X86-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F 286; X86-NEXT: orl %ecx, %eax 287; X86-NEXT: movl %eax, %ecx 288; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 289; X86-NEXT: shrl $2, %eax 290; X86-NEXT: andl $858993459, %eax # imm = 0x33333333 291; X86-NEXT: leal (%eax,%ecx,4), %eax 292; X86-NEXT: movl %eax, %ecx 293; X86-NEXT: andl $1431655680, %ecx # imm = 0x55555500 294; X86-NEXT: shrl %eax 295; X86-NEXT: andl $1431655680, %eax # imm = 0x55555500 296; X86-NEXT: leal (%eax,%ecx,2), %eax 297; X86-NEXT: shrl $8, %eax 298; X86-NEXT: retl 299; 300; X64-LABEL: test_bitreverse_i24: 301; X64: # %bb.0: 302; X64-NEXT: # kill: def $edi killed $edi def $rdi 303; X64-NEXT: bswapl %edi 304; X64-NEXT: movl %edi, %eax 305; X64-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F 306; X64-NEXT: shll $4, %eax 307; X64-NEXT: shrl $4, %edi 308; X64-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F 309; X64-NEXT: orl %eax, %edi 310; X64-NEXT: movl %edi, %eax 311; X64-NEXT: andl $858993459, %eax # imm = 0x33333333 312; X64-NEXT: shrl $2, %edi 313; X64-NEXT: andl $858993459, %edi # imm = 0x33333333 314; X64-NEXT: leal (%rdi,%rax,4), %eax 315; X64-NEXT: movl %eax, %ecx 316; X64-NEXT: andl $1431655680, %ecx # imm = 0x55555500 317; X64-NEXT: shrl %eax 318; X64-NEXT: andl $1431655680, %eax # imm = 0x55555500 319; X64-NEXT: leal (%rax,%rcx,2), %eax 320; X64-NEXT: shrl $8, %eax 321; X64-NEXT: retq 322; 323; X86XOP-LABEL: test_bitreverse_i24: 324; X86XOP: # %bb.0: 325; X86XOP-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 326; X86XOP-NEXT: vpperm {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0, %xmm0 327; X86XOP-NEXT: vmovd %xmm0, %eax 328; X86XOP-NEXT: shrl $8, %eax 329; X86XOP-NEXT: retl 330; 331; X86GFNI-LABEL: test_bitreverse_i24: 332; X86GFNI: # %bb.0: 333; X86GFNI-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 334; X86GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}{1to2}, %xmm0, %xmm0 335; X86GFNI-NEXT: vmovd %xmm0, %eax 336; X86GFNI-NEXT: bswapl %eax 337; X86GFNI-NEXT: shrl $8, %eax 338; X86GFNI-NEXT: retl 339; 340; X64GFNI-LABEL: test_bitreverse_i24: 341; X64GFNI: # %bb.0: 342; X64GFNI-NEXT: vmovd %edi, %xmm0 343; X64GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 344; X64GFNI-NEXT: vmovd %xmm0, %eax 345; X64GFNI-NEXT: bswapl %eax 346; X64GFNI-NEXT: shrl $8, %eax 347; X64GFNI-NEXT: retq 348 %b = call i24 @llvm.bitreverse.i24(i24 %a) 349 ret i24 %b 350} 351 352declare i16 @llvm.bitreverse.i16(i16) readnone 353 354define i16 @test_bitreverse_i16(i16 %a) nounwind { 355; X86-LABEL: test_bitreverse_i16: 356; X86: # %bb.0: 357; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax 358; X86-NEXT: rolw $8, %ax 359; X86-NEXT: movl %eax, %ecx 360; X86-NEXT: andl $3855, %ecx # imm = 0xF0F 361; X86-NEXT: shll $4, %ecx 362; X86-NEXT: shrl $4, %eax 363; X86-NEXT: andl $3855, %eax # imm = 0xF0F 364; X86-NEXT: orl %ecx, %eax 365; X86-NEXT: movl %eax, %ecx 366; X86-NEXT: andl $13107, %ecx # imm = 0x3333 367; X86-NEXT: shrl $2, %eax 368; X86-NEXT: andl $13107, %eax # imm = 0x3333 369; X86-NEXT: leal (%eax,%ecx,4), %eax 370; X86-NEXT: movl %eax, %ecx 371; X86-NEXT: andl $21845, %ecx # imm = 0x5555 372; X86-NEXT: shrl %eax 373; X86-NEXT: andl $21845, %eax # imm = 0x5555 374; X86-NEXT: leal (%eax,%ecx,2), %eax 375; X86-NEXT: # kill: def $ax killed $ax killed $eax 376; X86-NEXT: retl 377; 378; X64-LABEL: test_bitreverse_i16: 379; X64: # %bb.0: 380; X64-NEXT: # kill: def $edi killed $edi def $rdi 381; X64-NEXT: rolw $8, %di 382; X64-NEXT: movl %edi, %eax 383; X64-NEXT: andl $3855, %eax # imm = 0xF0F 384; X64-NEXT: shll $4, %eax 385; X64-NEXT: shrl $4, %edi 386; X64-NEXT: andl $3855, %edi # imm = 0xF0F 387; X64-NEXT: orl %eax, %edi 388; X64-NEXT: movl %edi, %eax 389; X64-NEXT: andl $13107, %eax # imm = 0x3333 390; X64-NEXT: shrl $2, %edi 391; X64-NEXT: andl $13107, %edi # imm = 0x3333 392; X64-NEXT: leal (%rdi,%rax,4), %eax 393; X64-NEXT: movl %eax, %ecx 394; X64-NEXT: andl $21845, %ecx # imm = 0x5555 395; X64-NEXT: shrl %eax 396; X64-NEXT: andl $21845, %eax # imm = 0x5555 397; X64-NEXT: leal (%rax,%rcx,2), %eax 398; X64-NEXT: # kill: def $ax killed $ax killed $eax 399; X64-NEXT: retq 400; 401; X86XOP-LABEL: test_bitreverse_i16: 402; X86XOP: # %bb.0: 403; X86XOP-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 404; X86XOP-NEXT: vpperm {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0, %xmm0 405; X86XOP-NEXT: vmovd %xmm0, %eax 406; X86XOP-NEXT: # kill: def $ax killed $ax killed $eax 407; X86XOP-NEXT: retl 408; 409; X86GFNI-LABEL: test_bitreverse_i16: 410; X86GFNI: # %bb.0: 411; X86GFNI-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 412; X86GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}{1to2}, %xmm0, %xmm0 413; X86GFNI-NEXT: vmovd %xmm0, %eax 414; X86GFNI-NEXT: rolw $8, %ax 415; X86GFNI-NEXT: # kill: def $ax killed $ax killed $eax 416; X86GFNI-NEXT: retl 417; 418; X64GFNI-LABEL: test_bitreverse_i16: 419; X64GFNI: # %bb.0: 420; X64GFNI-NEXT: vmovd %edi, %xmm0 421; X64GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 422; X64GFNI-NEXT: vmovd %xmm0, %eax 423; X64GFNI-NEXT: rolw $8, %ax 424; X64GFNI-NEXT: # kill: def $ax killed $ax killed $eax 425; X64GFNI-NEXT: retq 426 %b = call i16 @llvm.bitreverse.i16(i16 %a) 427 ret i16 %b 428} 429 430declare i8 @llvm.bitreverse.i8(i8) readnone 431 432define i8 @test_bitreverse_i8(i8 %a) { 433; X86-LABEL: test_bitreverse_i8: 434; X86: # %bb.0: 435; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 436; X86-NEXT: rolb $4, %al 437; X86-NEXT: movl %eax, %ecx 438; X86-NEXT: andb $51, %cl 439; X86-NEXT: shlb $2, %cl 440; X86-NEXT: shrb $2, %al 441; X86-NEXT: andb $51, %al 442; X86-NEXT: orb %cl, %al 443; X86-NEXT: movl %eax, %ecx 444; X86-NEXT: andb $85, %cl 445; X86-NEXT: addb %cl, %cl 446; X86-NEXT: shrb %al 447; X86-NEXT: andb $85, %al 448; X86-NEXT: orb %cl, %al 449; X86-NEXT: retl 450; 451; X64-LABEL: test_bitreverse_i8: 452; X64: # %bb.0: 453; X64-NEXT: rolb $4, %dil 454; X64-NEXT: movl %edi, %eax 455; X64-NEXT: andb $51, %al 456; X64-NEXT: shlb $2, %al 457; X64-NEXT: shrb $2, %dil 458; X64-NEXT: andb $51, %dil 459; X64-NEXT: orb %dil, %al 460; X64-NEXT: movl %eax, %ecx 461; X64-NEXT: andb $85, %cl 462; X64-NEXT: addb %cl, %cl 463; X64-NEXT: shrb %al 464; X64-NEXT: andb $85, %al 465; X64-NEXT: orb %cl, %al 466; X64-NEXT: retq 467; 468; X86XOP-LABEL: test_bitreverse_i8: 469; X86XOP: # %bb.0: 470; X86XOP-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 471; X86XOP-NEXT: vpperm {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0, %xmm0 472; X86XOP-NEXT: vmovd %xmm0, %eax 473; X86XOP-NEXT: # kill: def $al killed $al killed $eax 474; X86XOP-NEXT: retl 475; 476; X86GFNI-LABEL: test_bitreverse_i8: 477; X86GFNI: # %bb.0: 478; X86GFNI-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 479; X86GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}{1to2}, %xmm0, %xmm0 480; X86GFNI-NEXT: vmovd %xmm0, %eax 481; X86GFNI-NEXT: # kill: def $al killed $al killed $eax 482; X86GFNI-NEXT: retl 483; 484; X64GFNI-LABEL: test_bitreverse_i8: 485; X64GFNI: # %bb.0: 486; X64GFNI-NEXT: vmovd %edi, %xmm0 487; X64GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 488; X64GFNI-NEXT: vmovd %xmm0, %eax 489; X64GFNI-NEXT: # kill: def $al killed $al killed $eax 490; X64GFNI-NEXT: retq 491 %b = call i8 @llvm.bitreverse.i8(i8 %a) 492 ret i8 %b 493} 494 495declare i4 @llvm.bitreverse.i4(i4) readnone 496 497define i4 @test_bitreverse_i4(i4 %a) { 498; X86-LABEL: test_bitreverse_i4: 499; X86: # %bb.0: 500; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 501; X86-NEXT: movl %ecx, %eax 502; X86-NEXT: andb $8, %al 503; X86-NEXT: movl %ecx, %edx 504; X86-NEXT: addb %cl, %dl 505; X86-NEXT: andb $4, %dl 506; X86-NEXT: movb %cl, %ah 507; X86-NEXT: shlb $3, %ah 508; X86-NEXT: andb $8, %ah 509; X86-NEXT: orb %dl, %ah 510; X86-NEXT: shrb %cl 511; X86-NEXT: andb $2, %cl 512; X86-NEXT: orb %ah, %cl 513; X86-NEXT: shrb $3, %al 514; X86-NEXT: orb %cl, %al 515; X86-NEXT: retl 516; 517; X64-LABEL: test_bitreverse_i4: 518; X64: # %bb.0: 519; X64-NEXT: # kill: def $edi killed $edi def $rdi 520; X64-NEXT: movl %edi, %eax 521; X64-NEXT: andb $8, %al 522; X64-NEXT: leal (%rdi,%rdi), %ecx 523; X64-NEXT: andb $4, %cl 524; X64-NEXT: leal (,%rdi,8), %edx 525; X64-NEXT: andb $8, %dl 526; X64-NEXT: orb %cl, %dl 527; X64-NEXT: shrb %dil 528; X64-NEXT: andb $2, %dil 529; X64-NEXT: orb %dil, %dl 530; X64-NEXT: shrb $3, %al 531; X64-NEXT: orb %dl, %al 532; X64-NEXT: retq 533; 534; X86XOP-LABEL: test_bitreverse_i4: 535; X86XOP: # %bb.0: 536; X86XOP-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 537; X86XOP-NEXT: vpperm {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0, %xmm0 538; X86XOP-NEXT: vmovd %xmm0, %eax 539; X86XOP-NEXT: shrb $4, %al 540; X86XOP-NEXT: # kill: def $al killed $al killed $eax 541; X86XOP-NEXT: retl 542; 543; X86GFNI-LABEL: test_bitreverse_i4: 544; X86GFNI: # %bb.0: 545; X86GFNI-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 546; X86GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}{1to2}, %xmm0, %xmm0 547; X86GFNI-NEXT: vmovd %xmm0, %eax 548; X86GFNI-NEXT: shrb $4, %al 549; X86GFNI-NEXT: # kill: def $al killed $al killed $eax 550; X86GFNI-NEXT: retl 551; 552; X64GFNI-LABEL: test_bitreverse_i4: 553; X64GFNI: # %bb.0: 554; X64GFNI-NEXT: vmovd %edi, %xmm0 555; X64GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 556; X64GFNI-NEXT: vmovd %xmm0, %eax 557; X64GFNI-NEXT: shrb $4, %al 558; X64GFNI-NEXT: # kill: def $al killed $al killed $eax 559; X64GFNI-NEXT: retq 560 %b = call i4 @llvm.bitreverse.i4(i4 %a) 561 ret i4 %b 562} 563 564; These tests check that bitreverse(constant) calls are folded 565 566define <2 x i16> @fold_v2i16() { 567; X86-LABEL: fold_v2i16: 568; X86: # %bb.0: 569; X86-NEXT: movw $-4096, %ax # imm = 0xF000 570; X86-NEXT: movw $240, %dx 571; X86-NEXT: retl 572; 573; X64-LABEL: fold_v2i16: 574; X64: # %bb.0: 575; X64-NEXT: movss {{.*#+}} xmm0 = [61440,240,0,0,0,0,0,0] 576; X64-NEXT: retq 577; 578; X86XOP-LABEL: fold_v2i16: 579; X86XOP: # %bb.0: 580; X86XOP-NEXT: vmovss {{.*#+}} xmm0 = [61440,240,0,0,0,0,0,0] 581; X86XOP-NEXT: retl 582; 583; GFNI-LABEL: fold_v2i16: 584; GFNI: # %bb.0: 585; GFNI-NEXT: vmovss {{.*#+}} xmm0 = [61440,240,0,0,0,0,0,0] 586; GFNI-NEXT: ret{{[l|q]}} 587 %b = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> <i16 15, i16 3840>) 588 ret <2 x i16> %b 589} 590 591define i24 @fold_i24() { 592; CHECK-LABEL: fold_i24: 593; CHECK: # %bb.0: 594; CHECK-NEXT: movl $2048, %eax # imm = 0x800 595; CHECK-NEXT: ret{{[l|q]}} 596 %b = call i24 @llvm.bitreverse.i24(i24 4096) 597 ret i24 %b 598} 599 600define i8 @fold_i8() { 601; CHECK-LABEL: fold_i8: 602; CHECK: # %bb.0: 603; CHECK-NEXT: movb $-16, %al 604; CHECK-NEXT: ret{{[l|q]}} 605 %b = call i8 @llvm.bitreverse.i8(i8 15) 606 ret i8 %b 607} 608 609define i4 @fold_i4() { 610; CHECK-LABEL: fold_i4: 611; CHECK: # %bb.0: 612; CHECK-NEXT: movb $1, %al 613; CHECK-NEXT: ret{{[l|q]}} 614 %b = call i4 @llvm.bitreverse.i4(i4 8) 615 ret i4 %b 616} 617 618; These tests check that bitreverse(bitreverse()) calls are removed 619 620define i8 @identity_i8(i8 %a) { 621; X86-LABEL: identity_i8: 622; X86: # %bb.0: 623; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 624; X86-NEXT: retl 625; 626; X64-LABEL: identity_i8: 627; X64: # %bb.0: 628; X64-NEXT: movl %edi, %eax 629; X64-NEXT: # kill: def $al killed $al killed $eax 630; X64-NEXT: retq 631; 632; X86XOP-LABEL: identity_i8: 633; X86XOP: # %bb.0: 634; X86XOP-NEXT: movzbl {{[0-9]+}}(%esp), %eax 635; X86XOP-NEXT: retl 636; 637; X86GFNI-LABEL: identity_i8: 638; X86GFNI: # %bb.0: 639; X86GFNI-NEXT: movzbl {{[0-9]+}}(%esp), %eax 640; X86GFNI-NEXT: retl 641; 642; X64GFNI-LABEL: identity_i8: 643; X64GFNI: # %bb.0: 644; X64GFNI-NEXT: movl %edi, %eax 645; X64GFNI-NEXT: # kill: def $al killed $al killed $eax 646; X64GFNI-NEXT: retq 647 %b = call i8 @llvm.bitreverse.i8(i8 %a) 648 %c = call i8 @llvm.bitreverse.i8(i8 %b) 649 ret i8 %c 650} 651 652define <2 x i16> @identity_v2i16(<2 x i16> %a) { 653; X86-LABEL: identity_v2i16: 654; X86: # %bb.0: 655; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax 656; X86-NEXT: movzwl {{[0-9]+}}(%esp), %edx 657; X86-NEXT: retl 658; 659; X64-LABEL: identity_v2i16: 660; X64: # %bb.0: 661; X64-NEXT: retq 662; 663; X86XOP-LABEL: identity_v2i16: 664; X86XOP: # %bb.0: 665; X86XOP-NEXT: retl 666; 667; GFNI-LABEL: identity_v2i16: 668; GFNI: # %bb.0: 669; GFNI-NEXT: ret{{[l|q]}} 670 %b = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> %a) 671 %c = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> %b) 672 ret <2 x i16> %c 673} 674 675; These tests check that bitreverse(undef) calls are removed 676 677define i8 @undef_i8() { 678; CHECK-LABEL: undef_i8: 679; CHECK: # %bb.0: 680; CHECK-NEXT: ret{{[l|q]}} 681 %b = call i8 @llvm.bitreverse.i8(i8 undef) 682 ret i8 %b 683} 684 685define <2 x i16> @undef_v2i16() { 686; CHECK-LABEL: undef_v2i16: 687; CHECK: # %bb.0: 688; CHECK-NEXT: ret{{[l|q]}} 689 %b = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> undef) 690 ret <2 x i16> %b 691} 692 693; Make sure we don't assert during type legalization promoting a large 694; bitreverse due to the need for a large shift that won't fit in the i8 returned 695; from getShiftAmountTy. 696define i528 @large_promotion(i528 %A) nounwind { 697; X86-LABEL: large_promotion: 698; X86: # %bb.0: 699; X86-NEXT: pushl %ebp 700; X86-NEXT: pushl %ebx 701; X86-NEXT: pushl %edi 702; X86-NEXT: pushl %esi 703; X86-NEXT: subl $60, %esp 704; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 705; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 706; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 707; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 708; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 709; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx 710; X86-NEXT: bswapl %ebx 711; X86-NEXT: movl %ebx, %ebp 712; X86-NEXT: andl $252645135, %ebp # imm = 0xF0F0F0F 713; X86-NEXT: shll $4, %ebp 714; X86-NEXT: shrl $4, %ebx 715; X86-NEXT: andl $252645135, %ebx # imm = 0xF0F0F0F 716; X86-NEXT: orl %ebp, %ebx 717; X86-NEXT: movl %ebx, %ebp 718; X86-NEXT: andl $858993459, %ebp # imm = 0x33333333 719; X86-NEXT: shrl $2, %ebx 720; X86-NEXT: andl $858993459, %ebx # imm = 0x33333333 721; X86-NEXT: leal (%ebx,%ebp,4), %ebx 722; X86-NEXT: movl %ebx, %ebp 723; X86-NEXT: andl $1431633920, %ebp # imm = 0x55550000 724; X86-NEXT: shrl %ebx 725; X86-NEXT: andl $1431633920, %ebx # imm = 0x55550000 726; X86-NEXT: leal (%ebx,%ebp,2), %ebp 727; X86-NEXT: bswapl %edi 728; X86-NEXT: movl %edi, %ebx 729; X86-NEXT: andl $252645135, %ebx # imm = 0xF0F0F0F 730; X86-NEXT: shll $4, %ebx 731; X86-NEXT: shrl $4, %edi 732; X86-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F 733; X86-NEXT: orl %ebx, %edi 734; X86-NEXT: movl %edi, %ebx 735; X86-NEXT: andl $858993459, %ebx # imm = 0x33333333 736; X86-NEXT: shrl $2, %edi 737; X86-NEXT: andl $858993459, %edi # imm = 0x33333333 738; X86-NEXT: leal (%edi,%ebx,4), %edi 739; X86-NEXT: movl %edi, %ebx 740; X86-NEXT: andl $1431655765, %ebx # imm = 0x55555555 741; X86-NEXT: shrl %edi 742; X86-NEXT: andl $1431655765, %edi # imm = 0x55555555 743; X86-NEXT: leal (%edi,%ebx,2), %edi 744; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 745; X86-NEXT: bswapl %esi 746; X86-NEXT: movl %esi, %edi 747; X86-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F 748; X86-NEXT: shll $4, %edi 749; X86-NEXT: shrl $4, %esi 750; X86-NEXT: andl $252645135, %esi # imm = 0xF0F0F0F 751; X86-NEXT: orl %edi, %esi 752; X86-NEXT: movl %esi, %edi 753; X86-NEXT: andl $858993459, %edi # imm = 0x33333333 754; X86-NEXT: shrl $2, %esi 755; X86-NEXT: andl $858993459, %esi # imm = 0x33333333 756; X86-NEXT: leal (%esi,%edi,4), %esi 757; X86-NEXT: movl %esi, %edi 758; X86-NEXT: andl $1431655765, %edi # imm = 0x55555555 759; X86-NEXT: shrl %esi 760; X86-NEXT: andl $1431655765, %esi # imm = 0x55555555 761; X86-NEXT: leal (%esi,%edi,2), %ebx 762; X86-NEXT: bswapl %edx 763; X86-NEXT: movl %edx, %esi 764; X86-NEXT: andl $252645135, %esi # imm = 0xF0F0F0F 765; X86-NEXT: shll $4, %esi 766; X86-NEXT: shrl $4, %edx 767; X86-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F 768; X86-NEXT: orl %esi, %edx 769; X86-NEXT: movl %edx, %esi 770; X86-NEXT: andl $858993459, %esi # imm = 0x33333333 771; X86-NEXT: shrl $2, %edx 772; X86-NEXT: andl $858993459, %edx # imm = 0x33333333 773; X86-NEXT: leal (%edx,%esi,4), %edx 774; X86-NEXT: movl %edx, %esi 775; X86-NEXT: andl $1431655765, %esi # imm = 0x55555555 776; X86-NEXT: shrl %edx 777; X86-NEXT: andl $1431655765, %edx # imm = 0x55555555 778; X86-NEXT: leal (%edx,%esi,2), %edx 779; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 780; X86-NEXT: bswapl %ecx 781; X86-NEXT: movl %ecx, %edx 782; X86-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F 783; X86-NEXT: shll $4, %edx 784; X86-NEXT: shrl $4, %ecx 785; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 786; X86-NEXT: orl %edx, %ecx 787; X86-NEXT: movl %ecx, %edx 788; X86-NEXT: andl $858993459, %edx # imm = 0x33333333 789; X86-NEXT: shrl $2, %ecx 790; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 791; X86-NEXT: leal (%ecx,%edx,4), %ecx 792; X86-NEXT: movl %ecx, %edx 793; X86-NEXT: andl $1431655765, %edx # imm = 0x55555555 794; X86-NEXT: shrl %ecx 795; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 796; X86-NEXT: leal (%ecx,%edx,2), %ecx 797; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 798; X86-NEXT: bswapl %eax 799; X86-NEXT: movl %eax, %ecx 800; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 801; X86-NEXT: shll $4, %ecx 802; X86-NEXT: shrl $4, %eax 803; X86-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F 804; X86-NEXT: orl %ecx, %eax 805; X86-NEXT: movl %eax, %ecx 806; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 807; X86-NEXT: shrl $2, %eax 808; X86-NEXT: andl $858993459, %eax # imm = 0x33333333 809; X86-NEXT: leal (%eax,%ecx,4), %eax 810; X86-NEXT: movl %eax, %ecx 811; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 812; X86-NEXT: shrl %eax 813; X86-NEXT: andl $1431655765, %eax # imm = 0x55555555 814; X86-NEXT: leal (%eax,%ecx,2), %eax 815; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 816; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 817; X86-NEXT: bswapl %eax 818; X86-NEXT: movl %eax, %ecx 819; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 820; X86-NEXT: shll $4, %ecx 821; X86-NEXT: shrl $4, %eax 822; X86-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F 823; X86-NEXT: orl %ecx, %eax 824; X86-NEXT: movl %eax, %ecx 825; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 826; X86-NEXT: shrl $2, %eax 827; X86-NEXT: andl $858993459, %eax # imm = 0x33333333 828; X86-NEXT: leal (%eax,%ecx,4), %eax 829; X86-NEXT: movl %eax, %ecx 830; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 831; X86-NEXT: shrl %eax 832; X86-NEXT: andl $1431655765, %eax # imm = 0x55555555 833; X86-NEXT: leal (%eax,%ecx,2), %eax 834; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 835; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 836; X86-NEXT: bswapl %eax 837; X86-NEXT: movl %eax, %ecx 838; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 839; X86-NEXT: shll $4, %ecx 840; X86-NEXT: shrl $4, %eax 841; X86-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F 842; X86-NEXT: orl %ecx, %eax 843; X86-NEXT: movl %eax, %ecx 844; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 845; X86-NEXT: shrl $2, %eax 846; X86-NEXT: andl $858993459, %eax # imm = 0x33333333 847; X86-NEXT: leal (%eax,%ecx,4), %eax 848; X86-NEXT: movl %eax, %ecx 849; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 850; X86-NEXT: shrl %eax 851; X86-NEXT: andl $1431655765, %eax # imm = 0x55555555 852; X86-NEXT: leal (%eax,%ecx,2), %eax 853; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 854; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 855; X86-NEXT: bswapl %eax 856; X86-NEXT: movl %eax, %ecx 857; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 858; X86-NEXT: shll $4, %ecx 859; X86-NEXT: shrl $4, %eax 860; X86-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F 861; X86-NEXT: orl %ecx, %eax 862; X86-NEXT: movl %eax, %ecx 863; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 864; X86-NEXT: shrl $2, %eax 865; X86-NEXT: andl $858993459, %eax # imm = 0x33333333 866; X86-NEXT: leal (%eax,%ecx,4), %eax 867; X86-NEXT: movl %eax, %ecx 868; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 869; X86-NEXT: shrl %eax 870; X86-NEXT: andl $1431655765, %eax # imm = 0x55555555 871; X86-NEXT: leal (%eax,%ecx,2), %eax 872; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 873; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 874; X86-NEXT: bswapl %eax 875; X86-NEXT: movl %eax, %ecx 876; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 877; X86-NEXT: shll $4, %ecx 878; X86-NEXT: shrl $4, %eax 879; X86-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F 880; X86-NEXT: orl %ecx, %eax 881; X86-NEXT: movl %eax, %ecx 882; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 883; X86-NEXT: shrl $2, %eax 884; X86-NEXT: andl $858993459, %eax # imm = 0x33333333 885; X86-NEXT: leal (%eax,%ecx,4), %eax 886; X86-NEXT: movl %eax, %ecx 887; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 888; X86-NEXT: shrl %eax 889; X86-NEXT: andl $1431655765, %eax # imm = 0x55555555 890; X86-NEXT: leal (%eax,%ecx,2), %edi 891; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 892; X86-NEXT: bswapl %eax 893; X86-NEXT: movl %eax, %ecx 894; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 895; X86-NEXT: shll $4, %ecx 896; X86-NEXT: shrl $4, %eax 897; X86-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F 898; X86-NEXT: orl %ecx, %eax 899; X86-NEXT: movl %eax, %ecx 900; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 901; X86-NEXT: shrl $2, %eax 902; X86-NEXT: andl $858993459, %eax # imm = 0x33333333 903; X86-NEXT: leal (%eax,%ecx,4), %eax 904; X86-NEXT: movl %eax, %ecx 905; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 906; X86-NEXT: shrl %eax 907; X86-NEXT: andl $1431655765, %eax # imm = 0x55555555 908; X86-NEXT: leal (%eax,%ecx,2), %eax 909; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 910; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 911; X86-NEXT: bswapl %eax 912; X86-NEXT: movl %eax, %ecx 913; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 914; X86-NEXT: shll $4, %ecx 915; X86-NEXT: shrl $4, %eax 916; X86-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F 917; X86-NEXT: orl %ecx, %eax 918; X86-NEXT: movl %eax, %ecx 919; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 920; X86-NEXT: shrl $2, %eax 921; X86-NEXT: andl $858993459, %eax # imm = 0x33333333 922; X86-NEXT: leal (%eax,%ecx,4), %eax 923; X86-NEXT: movl %eax, %ecx 924; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 925; X86-NEXT: shrl %eax 926; X86-NEXT: andl $1431655765, %eax # imm = 0x55555555 927; X86-NEXT: leal (%eax,%ecx,2), %eax 928; X86-NEXT: movl %eax, (%esp) # 4-byte Spill 929; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 930; X86-NEXT: bswapl %eax 931; X86-NEXT: movl %eax, %ecx 932; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 933; X86-NEXT: shll $4, %ecx 934; X86-NEXT: shrl $4, %eax 935; X86-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F 936; X86-NEXT: orl %ecx, %eax 937; X86-NEXT: movl %eax, %ecx 938; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 939; X86-NEXT: shrl $2, %eax 940; X86-NEXT: andl $858993459, %eax # imm = 0x33333333 941; X86-NEXT: leal (%eax,%ecx,4), %eax 942; X86-NEXT: movl %eax, %ecx 943; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 944; X86-NEXT: shrl %eax 945; X86-NEXT: andl $1431655765, %eax # imm = 0x55555555 946; X86-NEXT: leal (%eax,%ecx,2), %eax 947; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 948; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 949; X86-NEXT: bswapl %eax 950; X86-NEXT: movl %eax, %ecx 951; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 952; X86-NEXT: shll $4, %ecx 953; X86-NEXT: shrl $4, %eax 954; X86-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F 955; X86-NEXT: orl %ecx, %eax 956; X86-NEXT: movl %eax, %ecx 957; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 958; X86-NEXT: shrl $2, %eax 959; X86-NEXT: andl $858993459, %eax # imm = 0x33333333 960; X86-NEXT: leal (%eax,%ecx,4), %eax 961; X86-NEXT: movl %eax, %ecx 962; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 963; X86-NEXT: shrl %eax 964; X86-NEXT: andl $1431655765, %eax # imm = 0x55555555 965; X86-NEXT: leal (%eax,%ecx,2), %eax 966; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 967; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 968; X86-NEXT: bswapl %eax 969; X86-NEXT: movl %eax, %ecx 970; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 971; X86-NEXT: shll $4, %ecx 972; X86-NEXT: shrl $4, %eax 973; X86-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F 974; X86-NEXT: orl %ecx, %eax 975; X86-NEXT: movl %eax, %ecx 976; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 977; X86-NEXT: shrl $2, %eax 978; X86-NEXT: andl $858993459, %eax # imm = 0x33333333 979; X86-NEXT: leal (%eax,%ecx,4), %eax 980; X86-NEXT: movl %eax, %ecx 981; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 982; X86-NEXT: shrl %eax 983; X86-NEXT: andl $1431655765, %eax # imm = 0x55555555 984; X86-NEXT: leal (%eax,%ecx,2), %eax 985; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 986; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 987; X86-NEXT: bswapl %eax 988; X86-NEXT: movl %eax, %ecx 989; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 990; X86-NEXT: shll $4, %ecx 991; X86-NEXT: shrl $4, %eax 992; X86-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F 993; X86-NEXT: orl %ecx, %eax 994; X86-NEXT: movl %eax, %ecx 995; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 996; X86-NEXT: shrl $2, %eax 997; X86-NEXT: andl $858993459, %eax # imm = 0x33333333 998; X86-NEXT: leal (%eax,%ecx,4), %eax 999; X86-NEXT: movl %eax, %ecx 1000; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 1001; X86-NEXT: shrl %eax 1002; X86-NEXT: andl $1431655765, %eax # imm = 0x55555555 1003; X86-NEXT: leal (%eax,%ecx,2), %eax 1004; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 1005; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1006; X86-NEXT: bswapl %eax 1007; X86-NEXT: movl %eax, %ecx 1008; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 1009; X86-NEXT: shll $4, %ecx 1010; X86-NEXT: shrl $4, %eax 1011; X86-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F 1012; X86-NEXT: orl %ecx, %eax 1013; X86-NEXT: movl %eax, %ecx 1014; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333 1015; X86-NEXT: shrl $2, %eax 1016; X86-NEXT: andl $858993459, %eax # imm = 0x33333333 1017; X86-NEXT: leal (%eax,%ecx,4), %eax 1018; X86-NEXT: movl %eax, %ecx 1019; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555 1020; X86-NEXT: shrl %eax 1021; X86-NEXT: andl $1431655765, %eax # imm = 0x55555555 1022; X86-NEXT: leal (%eax,%ecx,2), %edx 1023; X86-NEXT: movl %ebp, %esi 1024; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 1025; X86-NEXT: shrdl $16, %ecx, %esi 1026; X86-NEXT: movl %ebx, %eax 1027; X86-NEXT: shrdl $16, %ebx, %ecx 1028; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 1029; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 1030; X86-NEXT: shrdl $16, %ecx, %eax 1031; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 1032; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload 1033; X86-NEXT: shrdl $16, %eax, %ecx 1034; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 1035; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 1036; X86-NEXT: shrdl $16, %ecx, %eax 1037; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 1038; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload 1039; X86-NEXT: shrdl $16, %eax, %ecx 1040; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 1041; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 1042; X86-NEXT: shrdl $16, %ecx, %eax 1043; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 1044; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload 1045; X86-NEXT: shrdl $16, %eax, %ecx 1046; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 1047; X86-NEXT: shrdl $16, %edi, %eax 1048; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 1049; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload 1050; X86-NEXT: shrdl $16, %eax, %edi 1051; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 1052; X86-NEXT: movl (%esp), %ecx # 4-byte Reload 1053; X86-NEXT: shrdl $16, %ecx, %eax 1054; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 1055; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload 1056; X86-NEXT: shrdl $16, %ebp, %ecx 1057; X86-NEXT: movl %ecx, (%esp) # 4-byte Spill 1058; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload 1059; X86-NEXT: shrdl $16, %ebx, %ebp 1060; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload 1061; X86-NEXT: shrdl $16, %edi, %ebx 1062; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 1063; X86-NEXT: shrdl $16, %ecx, %edi 1064; X86-NEXT: shrdl $16, %edx, %ecx 1065; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1066; X86-NEXT: movl %ecx, 60(%eax) 1067; X86-NEXT: movl %edi, 56(%eax) 1068; X86-NEXT: movl %ebx, 52(%eax) 1069; X86-NEXT: movl %ebp, 48(%eax) 1070; X86-NEXT: movl (%esp), %ecx # 4-byte Reload 1071; X86-NEXT: movl %ecx, 44(%eax) 1072; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 1073; X86-NEXT: movl %ecx, 40(%eax) 1074; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 1075; X86-NEXT: movl %ecx, 36(%eax) 1076; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 1077; X86-NEXT: movl %ecx, 32(%eax) 1078; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 1079; X86-NEXT: movl %ecx, 28(%eax) 1080; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 1081; X86-NEXT: movl %ecx, 24(%eax) 1082; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 1083; X86-NEXT: movl %ecx, 20(%eax) 1084; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 1085; X86-NEXT: movl %ecx, 16(%eax) 1086; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 1087; X86-NEXT: movl %ecx, 12(%eax) 1088; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 1089; X86-NEXT: movl %ecx, 8(%eax) 1090; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 1091; X86-NEXT: movl %ecx, 4(%eax) 1092; X86-NEXT: movl %esi, (%eax) 1093; X86-NEXT: shrl $16, %edx 1094; X86-NEXT: movw %dx, 64(%eax) 1095; X86-NEXT: addl $60, %esp 1096; X86-NEXT: popl %esi 1097; X86-NEXT: popl %edi 1098; X86-NEXT: popl %ebx 1099; X86-NEXT: popl %ebp 1100; X86-NEXT: retl $4 1101; 1102; X64-LABEL: large_promotion: 1103; X64: # %bb.0: 1104; X64-NEXT: pushq %r15 1105; X64-NEXT: pushq %r14 1106; X64-NEXT: pushq %r13 1107; X64-NEXT: pushq %r12 1108; X64-NEXT: pushq %rbx 1109; X64-NEXT: movq %rdi, %rax 1110; X64-NEXT: movq {{[0-9]+}}(%rsp), %r12 1111; X64-NEXT: movq {{[0-9]+}}(%rsp), %r15 1112; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbx 1113; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdi 1114; X64-NEXT: bswapq %rdi 1115; X64-NEXT: movq %rdi, %r10 1116; X64-NEXT: shrq $4, %r10 1117; X64-NEXT: movabsq $1085102592571150095, %r11 # imm = 0xF0F0F0F0F0F0F0F 1118; X64-NEXT: andq %r11, %r10 1119; X64-NEXT: andq %r11, %rdi 1120; X64-NEXT: shlq $4, %rdi 1121; X64-NEXT: orq %r10, %rdi 1122; X64-NEXT: movabsq $3689348814741910323, %r10 # imm = 0x3333333333333333 1123; X64-NEXT: movq %rdi, %r14 1124; X64-NEXT: andq %r10, %r14 1125; X64-NEXT: shrq $2, %rdi 1126; X64-NEXT: andq %r10, %rdi 1127; X64-NEXT: leaq (%rdi,%r14,4), %rdi 1128; X64-NEXT: movabsq $6148820866244280320, %r14 # imm = 0x5555000000000000 1129; X64-NEXT: movq %rdi, %r13 1130; X64-NEXT: andq %r14, %r13 1131; X64-NEXT: shrq %rdi 1132; X64-NEXT: andq %r14, %rdi 1133; X64-NEXT: leaq (%rdi,%r13,2), %rdi 1134; X64-NEXT: bswapq %rbx 1135; X64-NEXT: movq %rbx, %r14 1136; X64-NEXT: shrq $4, %r14 1137; X64-NEXT: andq %r11, %r14 1138; X64-NEXT: andq %r11, %rbx 1139; X64-NEXT: shlq $4, %rbx 1140; X64-NEXT: orq %r14, %rbx 1141; X64-NEXT: movq %rbx, %r14 1142; X64-NEXT: andq %r10, %r14 1143; X64-NEXT: shrq $2, %rbx 1144; X64-NEXT: andq %r10, %rbx 1145; X64-NEXT: leaq (%rbx,%r14,4), %rbx 1146; X64-NEXT: movabsq $6148914691236517205, %r14 # imm = 0x5555555555555555 1147; X64-NEXT: movq %rbx, %r13 1148; X64-NEXT: andq %r14, %r13 1149; X64-NEXT: shrq %rbx 1150; X64-NEXT: andq %r14, %rbx 1151; X64-NEXT: leaq (%rbx,%r13,2), %rbx 1152; X64-NEXT: shrdq $48, %rbx, %rdi 1153; X64-NEXT: bswapq %r15 1154; X64-NEXT: movq %r15, %r13 1155; X64-NEXT: shrq $4, %r13 1156; X64-NEXT: andq %r11, %r13 1157; X64-NEXT: andq %r11, %r15 1158; X64-NEXT: shlq $4, %r15 1159; X64-NEXT: orq %r13, %r15 1160; X64-NEXT: movq %r15, %r13 1161; X64-NEXT: andq %r10, %r13 1162; X64-NEXT: shrq $2, %r15 1163; X64-NEXT: andq %r10, %r15 1164; X64-NEXT: leaq (%r15,%r13,4), %r15 1165; X64-NEXT: movq %r15, %r13 1166; X64-NEXT: andq %r14, %r13 1167; X64-NEXT: shrq %r15 1168; X64-NEXT: andq %r14, %r15 1169; X64-NEXT: leaq (%r15,%r13,2), %r15 1170; X64-NEXT: shrdq $48, %r15, %rbx 1171; X64-NEXT: bswapq %r12 1172; X64-NEXT: movq %r12, %r13 1173; X64-NEXT: shrq $4, %r13 1174; X64-NEXT: andq %r11, %r13 1175; X64-NEXT: andq %r11, %r12 1176; X64-NEXT: shlq $4, %r12 1177; X64-NEXT: orq %r13, %r12 1178; X64-NEXT: movq %r12, %r13 1179; X64-NEXT: andq %r10, %r13 1180; X64-NEXT: shrq $2, %r12 1181; X64-NEXT: andq %r10, %r12 1182; X64-NEXT: leaq (%r12,%r13,4), %r12 1183; X64-NEXT: movq %r12, %r13 1184; X64-NEXT: andq %r14, %r13 1185; X64-NEXT: shrq %r12 1186; X64-NEXT: andq %r14, %r12 1187; X64-NEXT: leaq (%r12,%r13,2), %r12 1188; X64-NEXT: shrdq $48, %r12, %r15 1189; X64-NEXT: bswapq %r9 1190; X64-NEXT: movq %r9, %r13 1191; X64-NEXT: shrq $4, %r13 1192; X64-NEXT: andq %r11, %r13 1193; X64-NEXT: andq %r11, %r9 1194; X64-NEXT: shlq $4, %r9 1195; X64-NEXT: orq %r13, %r9 1196; X64-NEXT: movq %r9, %r13 1197; X64-NEXT: andq %r10, %r13 1198; X64-NEXT: shrq $2, %r9 1199; X64-NEXT: andq %r10, %r9 1200; X64-NEXT: leaq (%r9,%r13,4), %r9 1201; X64-NEXT: movq %r9, %r13 1202; X64-NEXT: andq %r14, %r13 1203; X64-NEXT: shrq %r9 1204; X64-NEXT: andq %r14, %r9 1205; X64-NEXT: leaq (%r9,%r13,2), %r9 1206; X64-NEXT: shrdq $48, %r9, %r12 1207; X64-NEXT: bswapq %r8 1208; X64-NEXT: movq %r8, %r13 1209; X64-NEXT: shrq $4, %r13 1210; X64-NEXT: andq %r11, %r13 1211; X64-NEXT: andq %r11, %r8 1212; X64-NEXT: shlq $4, %r8 1213; X64-NEXT: orq %r13, %r8 1214; X64-NEXT: movq %r8, %r13 1215; X64-NEXT: andq %r10, %r13 1216; X64-NEXT: shrq $2, %r8 1217; X64-NEXT: andq %r10, %r8 1218; X64-NEXT: leaq (%r8,%r13,4), %r8 1219; X64-NEXT: movq %r8, %r13 1220; X64-NEXT: andq %r14, %r13 1221; X64-NEXT: shrq %r8 1222; X64-NEXT: andq %r14, %r8 1223; X64-NEXT: leaq (%r8,%r13,2), %r8 1224; X64-NEXT: shrdq $48, %r8, %r9 1225; X64-NEXT: bswapq %rcx 1226; X64-NEXT: movq %rcx, %r13 1227; X64-NEXT: shrq $4, %r13 1228; X64-NEXT: andq %r11, %r13 1229; X64-NEXT: andq %r11, %rcx 1230; X64-NEXT: shlq $4, %rcx 1231; X64-NEXT: orq %r13, %rcx 1232; X64-NEXT: movq %rcx, %r13 1233; X64-NEXT: andq %r10, %r13 1234; X64-NEXT: shrq $2, %rcx 1235; X64-NEXT: andq %r10, %rcx 1236; X64-NEXT: leaq (%rcx,%r13,4), %rcx 1237; X64-NEXT: movq %rcx, %r13 1238; X64-NEXT: andq %r14, %r13 1239; X64-NEXT: shrq %rcx 1240; X64-NEXT: andq %r14, %rcx 1241; X64-NEXT: leaq (%rcx,%r13,2), %rcx 1242; X64-NEXT: shrdq $48, %rcx, %r8 1243; X64-NEXT: bswapq %rdx 1244; X64-NEXT: movq %rdx, %r13 1245; X64-NEXT: shrq $4, %r13 1246; X64-NEXT: andq %r11, %r13 1247; X64-NEXT: andq %r11, %rdx 1248; X64-NEXT: shlq $4, %rdx 1249; X64-NEXT: orq %r13, %rdx 1250; X64-NEXT: movq %rdx, %r13 1251; X64-NEXT: andq %r10, %r13 1252; X64-NEXT: shrq $2, %rdx 1253; X64-NEXT: andq %r10, %rdx 1254; X64-NEXT: leaq (%rdx,%r13,4), %rdx 1255; X64-NEXT: movq %rdx, %r13 1256; X64-NEXT: andq %r14, %r13 1257; X64-NEXT: shrq %rdx 1258; X64-NEXT: andq %r14, %rdx 1259; X64-NEXT: leaq (%rdx,%r13,2), %rdx 1260; X64-NEXT: shrdq $48, %rdx, %rcx 1261; X64-NEXT: bswapq %rsi 1262; X64-NEXT: movq %rsi, %r13 1263; X64-NEXT: shrq $4, %r13 1264; X64-NEXT: andq %r11, %r13 1265; X64-NEXT: andq %r11, %rsi 1266; X64-NEXT: shlq $4, %rsi 1267; X64-NEXT: orq %r13, %rsi 1268; X64-NEXT: movq %rsi, %r11 1269; X64-NEXT: andq %r10, %r11 1270; X64-NEXT: shrq $2, %rsi 1271; X64-NEXT: andq %r10, %rsi 1272; X64-NEXT: leaq (%rsi,%r11,4), %rsi 1273; X64-NEXT: movq %rsi, %r10 1274; X64-NEXT: andq %r14, %r10 1275; X64-NEXT: shrq %rsi 1276; X64-NEXT: andq %r14, %rsi 1277; X64-NEXT: leaq (%rsi,%r10,2), %rsi 1278; X64-NEXT: shrdq $48, %rsi, %rdx 1279; X64-NEXT: shrq $48, %rsi 1280; X64-NEXT: movq %rdx, 56(%rax) 1281; X64-NEXT: movq %rcx, 48(%rax) 1282; X64-NEXT: movq %r8, 40(%rax) 1283; X64-NEXT: movq %r9, 32(%rax) 1284; X64-NEXT: movq %r12, 24(%rax) 1285; X64-NEXT: movq %r15, 16(%rax) 1286; X64-NEXT: movq %rbx, 8(%rax) 1287; X64-NEXT: movq %rdi, (%rax) 1288; X64-NEXT: movw %si, 64(%rax) 1289; X64-NEXT: popq %rbx 1290; X64-NEXT: popq %r12 1291; X64-NEXT: popq %r13 1292; X64-NEXT: popq %r14 1293; X64-NEXT: popq %r15 1294; X64-NEXT: retq 1295; 1296; X86XOP-LABEL: large_promotion: 1297; X86XOP: # %bb.0: 1298; X86XOP-NEXT: pushl %ebp 1299; X86XOP-NEXT: pushl %ebx 1300; X86XOP-NEXT: pushl %edi 1301; X86XOP-NEXT: pushl %esi 1302; X86XOP-NEXT: subl $44, %esp 1303; X86XOP-NEXT: vmovdqa {{.*#+}} xmm0 = [87,86,85,84,83,82,81,80,95,94,93,92,91,90,89,88] 1304; X86XOP-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero 1305; X86XOP-NEXT: vpperm %xmm0, %xmm1, %xmm0, %xmm1 1306; X86XOP-NEXT: vpextrd $1, %xmm1, %eax 1307; X86XOP-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero 1308; X86XOP-NEXT: vpperm %xmm0, %xmm1, %xmm0, %xmm1 1309; X86XOP-NEXT: vmovd %xmm1, %ecx 1310; X86XOP-NEXT: shrdl $16, %ecx, %eax 1311; X86XOP-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 1312; X86XOP-NEXT: vpextrd $1, %xmm1, %eax 1313; X86XOP-NEXT: shrdl $16, %eax, %ecx 1314; X86XOP-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 1315; X86XOP-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero 1316; X86XOP-NEXT: vpperm %xmm0, %xmm1, %xmm0, %xmm1 1317; X86XOP-NEXT: vmovd %xmm1, %ecx 1318; X86XOP-NEXT: shrdl $16, %ecx, %eax 1319; X86XOP-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 1320; X86XOP-NEXT: vpextrd $1, %xmm1, %eax 1321; X86XOP-NEXT: shrdl $16, %eax, %ecx 1322; X86XOP-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 1323; X86XOP-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero 1324; X86XOP-NEXT: vpperm %xmm0, %xmm1, %xmm0, %xmm1 1325; X86XOP-NEXT: vmovd %xmm1, %ecx 1326; X86XOP-NEXT: shrdl $16, %ecx, %eax 1327; X86XOP-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 1328; X86XOP-NEXT: vpextrd $1, %xmm1, %eax 1329; X86XOP-NEXT: shrdl $16, %eax, %ecx 1330; X86XOP-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 1331; X86XOP-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero 1332; X86XOP-NEXT: vpperm %xmm0, %xmm1, %xmm0, %xmm1 1333; X86XOP-NEXT: vmovd %xmm1, %ecx 1334; X86XOP-NEXT: shrdl $16, %ecx, %eax 1335; X86XOP-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 1336; X86XOP-NEXT: vpextrd $1, %xmm1, %eax 1337; X86XOP-NEXT: shrdl $16, %eax, %ecx 1338; X86XOP-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 1339; X86XOP-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero 1340; X86XOP-NEXT: vpperm %xmm0, %xmm1, %xmm0, %xmm1 1341; X86XOP-NEXT: vmovd %xmm1, %ecx 1342; X86XOP-NEXT: shrdl $16, %ecx, %eax 1343; X86XOP-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 1344; X86XOP-NEXT: vpextrd $1, %xmm1, %eax 1345; X86XOP-NEXT: shrdl $16, %eax, %ecx 1346; X86XOP-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 1347; X86XOP-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero 1348; X86XOP-NEXT: vpperm %xmm0, %xmm1, %xmm0, %xmm1 1349; X86XOP-NEXT: vmovd %xmm1, %ebp 1350; X86XOP-NEXT: shrdl $16, %ebp, %eax 1351; X86XOP-NEXT: movl %eax, (%esp) # 4-byte Spill 1352; X86XOP-NEXT: vpextrd $1, %xmm1, %ebx 1353; X86XOP-NEXT: shrdl $16, %ebx, %ebp 1354; X86XOP-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero 1355; X86XOP-NEXT: vpperm %xmm0, %xmm1, %xmm0, %xmm1 1356; X86XOP-NEXT: vmovd %xmm1, %esi 1357; X86XOP-NEXT: shrdl $16, %esi, %ebx 1358; X86XOP-NEXT: vpextrd $1, %xmm1, %edx 1359; X86XOP-NEXT: shrdl $16, %edx, %esi 1360; X86XOP-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero 1361; X86XOP-NEXT: vpperm %xmm0, %xmm1, %xmm0, %xmm0 1362; X86XOP-NEXT: vmovd %xmm0, %ecx 1363; X86XOP-NEXT: shrdl $16, %ecx, %edx 1364; X86XOP-NEXT: vpextrd $1, %xmm0, %edi 1365; X86XOP-NEXT: shrdl $16, %edi, %ecx 1366; X86XOP-NEXT: movl {{[0-9]+}}(%esp), %eax 1367; X86XOP-NEXT: movl %ecx, 60(%eax) 1368; X86XOP-NEXT: movl %edx, 56(%eax) 1369; X86XOP-NEXT: movl %esi, 52(%eax) 1370; X86XOP-NEXT: movl %ebx, 48(%eax) 1371; X86XOP-NEXT: movl %ebp, 44(%eax) 1372; X86XOP-NEXT: movl (%esp), %ecx # 4-byte Reload 1373; X86XOP-NEXT: movl %ecx, 40(%eax) 1374; X86XOP-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 1375; X86XOP-NEXT: movl %ecx, 36(%eax) 1376; X86XOP-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 1377; X86XOP-NEXT: movl %ecx, 32(%eax) 1378; X86XOP-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 1379; X86XOP-NEXT: movl %ecx, 28(%eax) 1380; X86XOP-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 1381; X86XOP-NEXT: movl %ecx, 24(%eax) 1382; X86XOP-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 1383; X86XOP-NEXT: movl %ecx, 20(%eax) 1384; X86XOP-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 1385; X86XOP-NEXT: movl %ecx, 16(%eax) 1386; X86XOP-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 1387; X86XOP-NEXT: movl %ecx, 12(%eax) 1388; X86XOP-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 1389; X86XOP-NEXT: movl %ecx, 8(%eax) 1390; X86XOP-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 1391; X86XOP-NEXT: movl %ecx, 4(%eax) 1392; X86XOP-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 1393; X86XOP-NEXT: movl %ecx, (%eax) 1394; X86XOP-NEXT: shrl $16, %edi 1395; X86XOP-NEXT: movw %di, 64(%eax) 1396; X86XOP-NEXT: addl $44, %esp 1397; X86XOP-NEXT: popl %esi 1398; X86XOP-NEXT: popl %edi 1399; X86XOP-NEXT: popl %ebx 1400; X86XOP-NEXT: popl %ebp 1401; X86XOP-NEXT: retl $4 1402; 1403; X86GFNI-LABEL: large_promotion: 1404; X86GFNI: # %bb.0: 1405; X86GFNI-NEXT: pushl %ebp 1406; X86GFNI-NEXT: pushl %ebx 1407; X86GFNI-NEXT: pushl %edi 1408; X86GFNI-NEXT: pushl %esi 1409; X86GFNI-NEXT: subl $44, %esp 1410; X86GFNI-NEXT: vpbroadcastq {{.*#+}} xmm0 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 1411; X86GFNI-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero 1412; X86GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1 1413; X86GFNI-NEXT: vmovd %xmm1, %eax 1414; X86GFNI-NEXT: bswapl %eax 1415; X86GFNI-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero 1416; X86GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1 1417; X86GFNI-NEXT: vpextrd $1, %xmm1, %ecx 1418; X86GFNI-NEXT: bswapl %ecx 1419; X86GFNI-NEXT: shrdl $16, %ecx, %eax 1420; X86GFNI-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 1421; X86GFNI-NEXT: vmovd %xmm1, %eax 1422; X86GFNI-NEXT: bswapl %eax 1423; X86GFNI-NEXT: shrdl $16, %eax, %ecx 1424; X86GFNI-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 1425; X86GFNI-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero 1426; X86GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1 1427; X86GFNI-NEXT: vpextrd $1, %xmm1, %ecx 1428; X86GFNI-NEXT: bswapl %ecx 1429; X86GFNI-NEXT: shrdl $16, %ecx, %eax 1430; X86GFNI-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 1431; X86GFNI-NEXT: vmovd %xmm1, %eax 1432; X86GFNI-NEXT: bswapl %eax 1433; X86GFNI-NEXT: shrdl $16, %eax, %ecx 1434; X86GFNI-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 1435; X86GFNI-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero 1436; X86GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1 1437; X86GFNI-NEXT: vpextrd $1, %xmm1, %ecx 1438; X86GFNI-NEXT: bswapl %ecx 1439; X86GFNI-NEXT: shrdl $16, %ecx, %eax 1440; X86GFNI-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 1441; X86GFNI-NEXT: vmovd %xmm1, %eax 1442; X86GFNI-NEXT: bswapl %eax 1443; X86GFNI-NEXT: shrdl $16, %eax, %ecx 1444; X86GFNI-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 1445; X86GFNI-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero 1446; X86GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1 1447; X86GFNI-NEXT: vpextrd $1, %xmm1, %ecx 1448; X86GFNI-NEXT: bswapl %ecx 1449; X86GFNI-NEXT: shrdl $16, %ecx, %eax 1450; X86GFNI-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 1451; X86GFNI-NEXT: vmovd %xmm1, %eax 1452; X86GFNI-NEXT: bswapl %eax 1453; X86GFNI-NEXT: shrdl $16, %eax, %ecx 1454; X86GFNI-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 1455; X86GFNI-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero 1456; X86GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1 1457; X86GFNI-NEXT: vpextrd $1, %xmm1, %ecx 1458; X86GFNI-NEXT: bswapl %ecx 1459; X86GFNI-NEXT: shrdl $16, %ecx, %eax 1460; X86GFNI-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 1461; X86GFNI-NEXT: vmovd %xmm1, %eax 1462; X86GFNI-NEXT: bswapl %eax 1463; X86GFNI-NEXT: shrdl $16, %eax, %ecx 1464; X86GFNI-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 1465; X86GFNI-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero 1466; X86GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1 1467; X86GFNI-NEXT: vpextrd $1, %xmm1, %ebp 1468; X86GFNI-NEXT: bswapl %ebp 1469; X86GFNI-NEXT: shrdl $16, %ebp, %eax 1470; X86GFNI-NEXT: movl %eax, (%esp) # 4-byte Spill 1471; X86GFNI-NEXT: vmovd %xmm1, %ebx 1472; X86GFNI-NEXT: bswapl %ebx 1473; X86GFNI-NEXT: shrdl $16, %ebx, %ebp 1474; X86GFNI-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero 1475; X86GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1 1476; X86GFNI-NEXT: vpextrd $1, %xmm1, %edi 1477; X86GFNI-NEXT: bswapl %edi 1478; X86GFNI-NEXT: shrdl $16, %edi, %ebx 1479; X86GFNI-NEXT: vmovd %xmm1, %edx 1480; X86GFNI-NEXT: bswapl %edx 1481; X86GFNI-NEXT: shrdl $16, %edx, %edi 1482; X86GFNI-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero 1483; X86GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm0 1484; X86GFNI-NEXT: vpextrd $1, %xmm0, %ecx 1485; X86GFNI-NEXT: bswapl %ecx 1486; X86GFNI-NEXT: shrdl $16, %ecx, %edx 1487; X86GFNI-NEXT: vmovd %xmm0, %esi 1488; X86GFNI-NEXT: bswapl %esi 1489; X86GFNI-NEXT: shrdl $16, %esi, %ecx 1490; X86GFNI-NEXT: movl {{[0-9]+}}(%esp), %eax 1491; X86GFNI-NEXT: movl %ecx, 60(%eax) 1492; X86GFNI-NEXT: movl %edx, 56(%eax) 1493; X86GFNI-NEXT: movl %edi, 52(%eax) 1494; X86GFNI-NEXT: movl %ebx, 48(%eax) 1495; X86GFNI-NEXT: movl %ebp, 44(%eax) 1496; X86GFNI-NEXT: movl (%esp), %ecx # 4-byte Reload 1497; X86GFNI-NEXT: movl %ecx, 40(%eax) 1498; X86GFNI-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 1499; X86GFNI-NEXT: movl %ecx, 36(%eax) 1500; X86GFNI-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 1501; X86GFNI-NEXT: movl %ecx, 32(%eax) 1502; X86GFNI-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 1503; X86GFNI-NEXT: movl %ecx, 28(%eax) 1504; X86GFNI-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 1505; X86GFNI-NEXT: movl %ecx, 24(%eax) 1506; X86GFNI-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 1507; X86GFNI-NEXT: movl %ecx, 20(%eax) 1508; X86GFNI-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 1509; X86GFNI-NEXT: movl %ecx, 16(%eax) 1510; X86GFNI-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 1511; X86GFNI-NEXT: movl %ecx, 12(%eax) 1512; X86GFNI-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 1513; X86GFNI-NEXT: movl %ecx, 8(%eax) 1514; X86GFNI-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 1515; X86GFNI-NEXT: movl %ecx, 4(%eax) 1516; X86GFNI-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 1517; X86GFNI-NEXT: movl %ecx, (%eax) 1518; X86GFNI-NEXT: shrl $16, %esi 1519; X86GFNI-NEXT: movw %si, 64(%eax) 1520; X86GFNI-NEXT: addl $44, %esp 1521; X86GFNI-NEXT: popl %esi 1522; X86GFNI-NEXT: popl %edi 1523; X86GFNI-NEXT: popl %ebx 1524; X86GFNI-NEXT: popl %ebp 1525; X86GFNI-NEXT: retl $4 1526; 1527; X64GFNI-LABEL: large_promotion: 1528; X64GFNI: # %bb.0: 1529; X64GFNI-NEXT: pushq %r14 1530; X64GFNI-NEXT: pushq %rbx 1531; X64GFNI-NEXT: movq %rdi, %rax 1532; X64GFNI-NEXT: vpbroadcastq {{.*#+}} xmm0 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 1533; X64GFNI-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero 1534; X64GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1 1535; X64GFNI-NEXT: vmovq %xmm1, %r10 1536; X64GFNI-NEXT: bswapq %r10 1537; X64GFNI-NEXT: vmovq %r9, %xmm1 1538; X64GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1 1539; X64GFNI-NEXT: vmovq %xmm1, %rdi 1540; X64GFNI-NEXT: bswapq %rdi 1541; X64GFNI-NEXT: vmovq %r8, %xmm1 1542; X64GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1 1543; X64GFNI-NEXT: vmovq %xmm1, %r8 1544; X64GFNI-NEXT: bswapq %r8 1545; X64GFNI-NEXT: movq %r8, %r9 1546; X64GFNI-NEXT: shldq $16, %rdi, %r9 1547; X64GFNI-NEXT: shldq $16, %r10, %rdi 1548; X64GFNI-NEXT: vmovq %rcx, %xmm1 1549; X64GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1 1550; X64GFNI-NEXT: vmovq %xmm1, %rcx 1551; X64GFNI-NEXT: bswapq %rcx 1552; X64GFNI-NEXT: shrdq $48, %rcx, %r8 1553; X64GFNI-NEXT: vmovq %rdx, %xmm1 1554; X64GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1 1555; X64GFNI-NEXT: vmovq %xmm1, %rdx 1556; X64GFNI-NEXT: bswapq %rdx 1557; X64GFNI-NEXT: shrdq $48, %rdx, %rcx 1558; X64GFNI-NEXT: vmovq %rsi, %xmm1 1559; X64GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1 1560; X64GFNI-NEXT: vmovq %xmm1, %rsi 1561; X64GFNI-NEXT: bswapq %rsi 1562; X64GFNI-NEXT: shrdq $48, %rsi, %rdx 1563; X64GFNI-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero 1564; X64GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1 1565; X64GFNI-NEXT: vmovq %xmm1, %r11 1566; X64GFNI-NEXT: bswapq %r11 1567; X64GFNI-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero 1568; X64GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm1 1569; X64GFNI-NEXT: vmovq %xmm1, %rbx 1570; X64GFNI-NEXT: bswapq %rbx 1571; X64GFNI-NEXT: shrdq $48, %rbx, %r11 1572; X64GFNI-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero 1573; X64GFNI-NEXT: vgf2p8affineqb $0, %xmm0, %xmm1, %xmm0 1574; X64GFNI-NEXT: vmovq %xmm0, %r14 1575; X64GFNI-NEXT: bswapq %r14 1576; X64GFNI-NEXT: shrdq $48, %r14, %rbx 1577; X64GFNI-NEXT: shrdq $48, %r10, %r14 1578; X64GFNI-NEXT: shrq $48, %rsi 1579; X64GFNI-NEXT: movq %r14, 16(%rax) 1580; X64GFNI-NEXT: movq %rbx, 8(%rax) 1581; X64GFNI-NEXT: movq %r11, (%rax) 1582; X64GFNI-NEXT: movq %rdx, 56(%rax) 1583; X64GFNI-NEXT: movq %rcx, 48(%rax) 1584; X64GFNI-NEXT: movq %r8, 40(%rax) 1585; X64GFNI-NEXT: movq %r9, 32(%rax) 1586; X64GFNI-NEXT: movq %rdi, 24(%rax) 1587; X64GFNI-NEXT: movw %si, 64(%rax) 1588; X64GFNI-NEXT: popq %rbx 1589; X64GFNI-NEXT: popq %r14 1590; X64GFNI-NEXT: retq 1591 %Z = call i528 @llvm.bitreverse.i528(i528 %A) 1592 ret i528 %Z 1593} 1594declare i528 @llvm.bitreverse.i528(i528) 1595