1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86 3; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=X64 4 5; These tests just check that the plumbing is in place for @llvm.bswap. The 6; actual output is massive at the moment as llvm.bswap is not yet legal. 7 8declare i16 @llvm.bswap.i16(i16) readnone 9declare i32 @llvm.bswap.i32(i32) readnone 10declare i64 @llvm.bswap.i64(i64) readnone 11declare i32 @llvm.bswap.v4i32(i32) readnone 12 13; fold (bswap undef) -> undef 14define i32 @test_undef() nounwind { 15; X86-LABEL: test_undef: 16; X86: # %bb.0: 17; X86-NEXT: retl 18; 19; X64-LABEL: test_undef: 20; X64: # %bb.0: 21; X64-NEXT: retq 22 %b = call i32 @llvm.bswap.i32(i32 undef) 23 ret i32 %b 24} 25 26; fold (bswap (bswap x)) -> x 27define i32 @test_bswap_bswap(i32 %a0) nounwind { 28; X86-LABEL: test_bswap_bswap: 29; X86: # %bb.0: 30; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 31; X86-NEXT: retl 32; 33; X64-LABEL: test_bswap_bswap: 34; X64: # %bb.0: 35; X64-NEXT: movl %edi, %eax 36; X64-NEXT: retq 37 %b = call i32 @llvm.bswap.i32(i32 %a0) 38 %c = call i32 @llvm.bswap.i32(i32 %b) 39 ret i32 %c 40} 41 42define i16 @test_bswap_srli_8_bswap_i16(i16 %a) nounwind { 43; X86-LABEL: test_bswap_srli_8_bswap_i16: 44; X86: # %bb.0: 45; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 46; X86-NEXT: shll $8, %eax 47; X86-NEXT: # kill: def $ax killed $ax killed $eax 48; X86-NEXT: retl 49; 50; X64-LABEL: test_bswap_srli_8_bswap_i16: 51; X64: # %bb.0: 52; X64-NEXT: movl %edi, %eax 53; X64-NEXT: shll $8, %eax 54; X64-NEXT: # kill: def $ax killed $ax killed $eax 55; X64-NEXT: retq 56 %1 = call i16 @llvm.bswap.i16(i16 %a) 57 %2 = lshr i16 %1, 8 58 %3 = call i16 @llvm.bswap.i16(i16 %2) 59 ret i16 %3 60} 61 62define i32 @test_bswap_srli_8_bswap_i32(i32 %a) nounwind { 63; X86-LABEL: test_bswap_srli_8_bswap_i32: 64; X86: # %bb.0: 65; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 66; X86-NEXT: shll $8, %eax 67; X86-NEXT: retl 68; 69; X64-LABEL: test_bswap_srli_8_bswap_i32: 70; X64: # %bb.0: 71; X64-NEXT: movl %edi, %eax 72; X64-NEXT: shll $8, %eax 73; X64-NEXT: retq 74 %1 = call i32 @llvm.bswap.i32(i32 %a) 75 %2 = lshr i32 %1, 8 76 %3 = call i32 @llvm.bswap.i32(i32 %2) 77 ret i32 %3 78} 79 80define i64 @test_bswap_srli_16_bswap_i64(i64 %a) nounwind { 81; X86-LABEL: test_bswap_srli_16_bswap_i64: 82; X86: # %bb.0: 83; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 84; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 85; X86-NEXT: shll $16, %eax 86; X86-NEXT: retl 87; 88; X64-LABEL: test_bswap_srli_16_bswap_i64: 89; X64: # %bb.0: 90; X64-NEXT: movq %rdi, %rax 91; X64-NEXT: shlq $16, %rax 92; X64-NEXT: retq 93 %1 = call i64 @llvm.bswap.i64(i64 %a) 94 %2 = lshr i64 %1, 16 95 %3 = call i64 @llvm.bswap.i64(i64 %2) 96 ret i64 %3 97} 98 99define i16 @test_bswap_shli_8_bswap_i16(i16 %a) nounwind { 100; X86-LABEL: test_bswap_shli_8_bswap_i16: 101; X86: # %bb.0: 102; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 103; X86-NEXT: # kill: def $ax killed $ax killed $eax 104; X86-NEXT: retl 105; 106; X64-LABEL: test_bswap_shli_8_bswap_i16: 107; X64: # %bb.0: 108; X64-NEXT: movl %edi, %eax 109; X64-NEXT: movzbl %ah, %eax 110; X64-NEXT: # kill: def $ax killed $ax killed $eax 111; X64-NEXT: retq 112 %1 = call i16 @llvm.bswap.i16(i16 %a) 113 %2 = shl i16 %1, 8 114 %3 = call i16 @llvm.bswap.i16(i16 %2) 115 ret i16 %3 116} 117 118define i32 @test_bswap_shli_8_bswap_i32(i32 %a) nounwind { 119; X86-LABEL: test_bswap_shli_8_bswap_i32: 120; X86: # %bb.0: 121; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 122; X86-NEXT: shrl $8, %eax 123; X86-NEXT: retl 124; 125; X64-LABEL: test_bswap_shli_8_bswap_i32: 126; X64: # %bb.0: 127; X64-NEXT: movl %edi, %eax 128; X64-NEXT: shrl $8, %eax 129; X64-NEXT: retq 130 %1 = call i32 @llvm.bswap.i32(i32 %a) 131 %2 = shl i32 %1, 8 132 %3 = call i32 @llvm.bswap.i32(i32 %2) 133 ret i32 %3 134} 135 136define i64 @test_bswap_shli_16_bswap_i64(i64 %a) nounwind { 137; X86-LABEL: test_bswap_shli_16_bswap_i64: 138; X86: # %bb.0: 139; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 140; X86-NEXT: movzwl {{[0-9]+}}(%esp), %edx 141; X86-NEXT: retl 142; 143; X64-LABEL: test_bswap_shli_16_bswap_i64: 144; X64: # %bb.0: 145; X64-NEXT: movq %rdi, %rax 146; X64-NEXT: shrq $16, %rax 147; X64-NEXT: retq 148 %1 = call i64 @llvm.bswap.i64(i64 %a) 149 %2 = shl i64 %1, 16 150 %3 = call i64 @llvm.bswap.i64(i64 %2) 151 ret i64 %3 152} 153 154define i32 @test_demandedbits_bswap(i32 %a0) nounwind { 155; X86-LABEL: test_demandedbits_bswap: 156; X86: # %bb.0: 157; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 158; X86-NEXT: bswapl %eax 159; X86-NEXT: andl $-65536, %eax # imm = 0xFFFF0000 160; X86-NEXT: retl 161; 162; X64-LABEL: test_demandedbits_bswap: 163; X64: # %bb.0: 164; X64-NEXT: movl %edi, %eax 165; X64-NEXT: bswapl %eax 166; X64-NEXT: andl $-65536, %eax # imm = 0xFFFF0000 167; X64-NEXT: retq 168 %b = or i32 %a0, 4278190080 169 %c = call i32 @llvm.bswap.i32(i32 %b) 170 %d = and i32 %c, 4294901760 171 ret i32 %d 172} 173 174define void @demand_one_loaded_byte(ptr %xp, ptr %yp) { 175; X86-LABEL: demand_one_loaded_byte: 176; X86: # %bb.0: 177; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 178; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 179; X86-NEXT: movzbl 4(%ecx), %ecx 180; X86-NEXT: movb %cl, (%eax) 181; X86-NEXT: retl 182; 183; X64-LABEL: demand_one_loaded_byte: 184; X64: # %bb.0: 185; X64-NEXT: movzbl 4(%rdi), %eax 186; X64-NEXT: movb %al, (%rsi) 187; X64-NEXT: retq 188 %x = load i64, ptr %xp, align 8 189 %x_zzzz7654 = lshr i64 %x, 32 190 %x_z7654zzz = shl nuw nsw i64 %x_zzzz7654, 24 191 %x_4zzz = trunc i64 %x_z7654zzz to i32 192 %y = load i32, ptr %yp, align 4 193 %y_321z = and i32 %y, -256 194 %x_zzz4 = call i32 @llvm.bswap.i32(i32 %x_4zzz) 195 %r = or i32 %x_zzz4, %y_321z 196 store i32 %r, ptr %yp, align 4 197 ret void 198} 199 200define i64 @test_bswap64_shift48_zext(i16 %a0) { 201; X86-LABEL: test_bswap64_shift48_zext: 202; X86: # %bb.0: 203; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax 204; X86-NEXT: rolw $8, %ax 205; X86-NEXT: movzwl %ax, %eax 206; X86-NEXT: xorl %edx, %edx 207; X86-NEXT: retl 208; 209; X64-LABEL: test_bswap64_shift48_zext: 210; X64: # %bb.0: 211; X64-NEXT: rolw $8, %di 212; X64-NEXT: movzwl %di, %eax 213; X64-NEXT: retq 214 %z = zext i16 %a0 to i64 215 %s = shl i64 %z, 48 216 %b = call i64 @llvm.bswap.i64(i64 %s) 217 ret i64 %b 218} 219 220define i64 @test_bswap64_shift48(i64 %a0) { 221; X86-LABEL: test_bswap64_shift48: 222; X86: # %bb.0: 223; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 224; X86-NEXT: rolw $8, %ax 225; X86-NEXT: movzwl %ax, %eax 226; X86-NEXT: xorl %edx, %edx 227; X86-NEXT: retl 228; 229; X64-LABEL: test_bswap64_shift48: 230; X64: # %bb.0: 231; X64-NEXT: rolw $8, %di 232; X64-NEXT: movzwl %di, %eax 233; X64-NEXT: retq 234 %s = shl i64 %a0, 48 235 %b = call i64 @llvm.bswap.i64(i64 %s) 236 ret i64 %b 237} 238 239define i32 @test_bswap32_shift17(i32 %a0) { 240; X86-LABEL: test_bswap32_shift17: 241; X86: # %bb.0: 242; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 243; X86-NEXT: shll $17, %eax 244; X86-NEXT: bswapl %eax 245; X86-NEXT: retl 246; 247; X64-LABEL: test_bswap32_shift17: 248; X64: # %bb.0: 249; X64-NEXT: movl %edi, %eax 250; X64-NEXT: shll $17, %eax 251; X64-NEXT: bswapl %eax 252; X64-NEXT: retq 253 %s = shl i32 %a0, 17 254 %b = call i32 @llvm.bswap.i32(i32 %s) 255 ret i32 %b 256} 257 258define i32 @bs_and_lhs_bs32(i32 %a, i32 %b) #0 { 259; X86-LABEL: bs_and_lhs_bs32: 260; X86: # %bb.0: 261; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 262; X86-NEXT: bswapl %eax 263; X86-NEXT: andl {{[0-9]+}}(%esp), %eax 264; X86-NEXT: retl 265; 266; X64-LABEL: bs_and_lhs_bs32: 267; X64: # %bb.0: 268; X64-NEXT: movl %esi, %eax 269; X64-NEXT: bswapl %eax 270; X64-NEXT: andl %edi, %eax 271; X64-NEXT: retq 272 %1 = tail call i32 @llvm.bswap.i32(i32 %a) 273 %2 = and i32 %1, %b 274 %3 = tail call i32 @llvm.bswap.i32(i32 %2) 275 ret i32 %3 276} 277 278define i64 @bs_or_lhs_bs64(i64 %a, i64 %b) #0 { 279; X86-LABEL: bs_or_lhs_bs64: 280; X86: # %bb.0: 281; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 282; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 283; X86-NEXT: bswapl %eax 284; X86-NEXT: orl {{[0-9]+}}(%esp), %eax 285; X86-NEXT: bswapl %edx 286; X86-NEXT: orl {{[0-9]+}}(%esp), %edx 287; X86-NEXT: retl 288; 289; X64-LABEL: bs_or_lhs_bs64: 290; X64: # %bb.0: 291; X64-NEXT: movq %rsi, %rax 292; X64-NEXT: bswapq %rax 293; X64-NEXT: orq %rdi, %rax 294; X64-NEXT: retq 295 %1 = tail call i64 @llvm.bswap.i64(i64 %a) 296 %2 = or i64 %1, %b 297 %3 = tail call i64 @llvm.bswap.i64(i64 %2) 298 ret i64 %3 299} 300 301define i64 @bs_xor_rhs_bs64(i64 %a, i64 %b) #0 { 302; X86-LABEL: bs_xor_rhs_bs64: 303; X86: # %bb.0: 304; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 305; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 306; X86-NEXT: bswapl %eax 307; X86-NEXT: xorl {{[0-9]+}}(%esp), %eax 308; X86-NEXT: bswapl %edx 309; X86-NEXT: xorl {{[0-9]+}}(%esp), %edx 310; X86-NEXT: retl 311; 312; X64-LABEL: bs_xor_rhs_bs64: 313; X64: # %bb.0: 314; X64-NEXT: movq %rdi, %rax 315; X64-NEXT: bswapq %rax 316; X64-NEXT: xorq %rsi, %rax 317; X64-NEXT: retq 318 %1 = tail call i64 @llvm.bswap.i64(i64 %b) 319 %2 = xor i64 %a, %1 320 %3 = tail call i64 @llvm.bswap.i64(i64 %2) 321 ret i64 %3 322} 323 324define i32 @bs_and_all_operand_multiuse(i32 %a, i32 %b) #0 { 325; X86-LABEL: bs_and_all_operand_multiuse: 326; X86: # %bb.0: 327; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 328; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 329; X86-NEXT: movl %eax, %edx 330; X86-NEXT: bswapl %edx 331; X86-NEXT: andl %ecx, %eax 332; X86-NEXT: bswapl %ecx 333; X86-NEXT: imull %edx, %eax 334; X86-NEXT: imull %ecx, %eax 335; X86-NEXT: retl 336; 337; X64-LABEL: bs_and_all_operand_multiuse: 338; X64: # %bb.0: 339; X64-NEXT: movl %edi, %eax 340; X64-NEXT: bswapl %eax 341; X64-NEXT: andl %esi, %edi 342; X64-NEXT: bswapl %esi 343; X64-NEXT: imull %edi, %eax 344; X64-NEXT: imull %esi, %eax 345; X64-NEXT: retq 346 %1 = tail call i32 @llvm.bswap.i32(i32 %a) 347 %2 = tail call i32 @llvm.bswap.i32(i32 %b) 348 %3 = and i32 %1, %2 349 %4 = tail call i32 @llvm.bswap.i32(i32 %3) 350 %5 = mul i32 %1, %4 ;increase use of left bswap 351 %6 = mul i32 %2, %5 ;increase use of right bswap 352 353 ret i32 %6 354} 355 356; negative test 357define i32 @bs_and_rhs_bs32_multiuse1(i32 %a, i32 %b) #0 { 358; X86-LABEL: bs_and_rhs_bs32_multiuse1: 359; X86: # %bb.0: 360; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 361; X86-NEXT: bswapl %ecx 362; X86-NEXT: andl {{[0-9]+}}(%esp), %ecx 363; X86-NEXT: movl %ecx, %eax 364; X86-NEXT: bswapl %eax 365; X86-NEXT: imull %ecx, %eax 366; X86-NEXT: retl 367; 368; X64-LABEL: bs_and_rhs_bs32_multiuse1: 369; X64: # %bb.0: 370; X64-NEXT: bswapl %esi 371; X64-NEXT: andl %edi, %esi 372; X64-NEXT: movl %esi, %eax 373; X64-NEXT: bswapl %eax 374; X64-NEXT: imull %esi, %eax 375; X64-NEXT: retq 376 %1 = tail call i32 @llvm.bswap.i32(i32 %b) 377 %2 = and i32 %1, %a 378 %3 = tail call i32 @llvm.bswap.i32(i32 %2) 379 %4 = mul i32 %2, %3 ;increase use of logical op 380 ret i32 %4 381} 382 383; negative test 384define i32 @bs_and_rhs_bs32_multiuse2(i32 %a, i32 %b) #0 { 385; X86-LABEL: bs_and_rhs_bs32_multiuse2: 386; X86: # %bb.0: 387; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 388; X86-NEXT: bswapl %ecx 389; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 390; X86-NEXT: andl %ecx, %eax 391; X86-NEXT: bswapl %eax 392; X86-NEXT: imull %ecx, %eax 393; X86-NEXT: retl 394; 395; X64-LABEL: bs_and_rhs_bs32_multiuse2: 396; X64: # %bb.0: 397; X64-NEXT: movl %edi, %eax 398; X64-NEXT: bswapl %esi 399; X64-NEXT: andl %esi, %eax 400; X64-NEXT: bswapl %eax 401; X64-NEXT: imull %esi, %eax 402; X64-NEXT: retq 403 %1 = tail call i32 @llvm.bswap.i32(i32 %b) 404 %2 = and i32 %1, %a 405 %3 = tail call i32 @llvm.bswap.i32(i32 %2) 406 %4 = mul i32 %1, %3 ;increase use of inner bswap 407 ret i32 %4 408} 409 410; negative test 411define i64 @test_bswap64_shift17(i64 %a0) { 412; X86-LABEL: test_bswap64_shift17: 413; X86: # %bb.0: 414; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 415; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 416; X86-NEXT: shldl $17, %edx, %eax 417; X86-NEXT: shll $17, %edx 418; X86-NEXT: bswapl %eax 419; X86-NEXT: bswapl %edx 420; X86-NEXT: retl 421; 422; X64-LABEL: test_bswap64_shift17: 423; X64: # %bb.0: 424; X64-NEXT: movq %rdi, %rax 425; X64-NEXT: shlq $17, %rax 426; X64-NEXT: bswapq %rax 427; X64-NEXT: retq 428 %s = shl i64 %a0, 17 429 %b = call i64 @llvm.bswap.i64(i64 %s) 430 ret i64 %b 431} 432 433; negative test 434define i64 @test_bswap64_shift48_multiuse(i64 %a0, ptr %a1) { 435; X86-LABEL: test_bswap64_shift48_multiuse: 436; X86: # %bb.0: 437; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 438; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 439; X86-NEXT: shll $16, %eax 440; X86-NEXT: movl %eax, 4(%ecx) 441; X86-NEXT: bswapl %eax 442; X86-NEXT: movl %eax, (%ecx) 443; X86-NEXT: xorl %edx, %edx 444; X86-NEXT: retl 445; 446; X64-LABEL: test_bswap64_shift48_multiuse: 447; X64: # %bb.0: 448; X64-NEXT: shlq $48, %rdi 449; X64-NEXT: movq %rdi, %rax 450; X64-NEXT: bswapq %rax 451; X64-NEXT: orq %rax, %rdi 452; X64-NEXT: movq %rdi, (%rsi) 453; X64-NEXT: retq 454 %s = shl i64 %a0, 48 455 %b = call i64 @llvm.bswap.i64(i64 %s) 456 %a = add i64 %s, %b 457 store i64 %a, ptr %a1 458 ret i64 %b 459} 460