1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s 3 4define i1000 @square(i1000 %A) nounwind { 5; CHECK-LABEL: square: 6; CHECK: # %bb.0: 7; CHECK-NEXT: pushq %rbp 8; CHECK-NEXT: pushq %r15 9; CHECK-NEXT: pushq %r14 10; CHECK-NEXT: pushq %r13 11; CHECK-NEXT: pushq %r12 12; CHECK-NEXT: pushq %rbx 13; CHECK-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 14; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 15; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rdi 16; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rbx 17; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %r15 18; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %r14 19; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %r12 20; CHECK-NEXT: bswapq %r12 21; CHECK-NEXT: movq %r12, %r10 22; CHECK-NEXT: shrq $4, %r10 23; CHECK-NEXT: movabsq $1085102592571150095, %rsi # imm = 0xF0F0F0F0F0F0F0F 24; CHECK-NEXT: andq %rsi, %r10 25; CHECK-NEXT: andq %rsi, %r12 26; CHECK-NEXT: shlq $4, %r12 27; CHECK-NEXT: orq %r10, %r12 28; CHECK-NEXT: movabsq $3689348814741910323, %r10 # imm = 0x3333333333333333 29; CHECK-NEXT: movq %r12, %r13 30; CHECK-NEXT: andq %r10, %r13 31; CHECK-NEXT: shrq $2, %r12 32; CHECK-NEXT: andq %r10, %r12 33; CHECK-NEXT: leaq (%r12,%r13,4), %r12 34; CHECK-NEXT: movabsq $6148914691230924800, %r13 # imm = 0x5555555555000000 35; CHECK-NEXT: movq %r12, %rbp 36; CHECK-NEXT: andq %r13, %rbp 37; CHECK-NEXT: shrq %r12 38; CHECK-NEXT: andq %r13, %r12 39; CHECK-NEXT: leaq (%r12,%rbp,2), %rax 40; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 41; CHECK-NEXT: bswapq %r14 42; CHECK-NEXT: movq %r14, %r12 43; CHECK-NEXT: shrq $4, %r12 44; CHECK-NEXT: andq %rsi, %r12 45; CHECK-NEXT: andq %rsi, %r14 46; CHECK-NEXT: shlq $4, %r14 47; CHECK-NEXT: orq %r12, %r14 48; CHECK-NEXT: movq %r14, %r12 49; CHECK-NEXT: andq %r10, %r12 50; CHECK-NEXT: shrq $2, %r14 51; CHECK-NEXT: andq %r10, %r14 52; CHECK-NEXT: leaq (%r14,%r12,4), %r12 53; CHECK-NEXT: movabsq $6148914691236517205, %r14 # imm = 0x5555555555555555 54; CHECK-NEXT: movq %r12, %r13 55; CHECK-NEXT: andq %r14, %r13 56; CHECK-NEXT: shrq %r12 57; CHECK-NEXT: andq %r14, %r12 58; CHECK-NEXT: leaq (%r12,%r13,2), %rax 59; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 60; CHECK-NEXT: bswapq %r15 61; CHECK-NEXT: movq %r15, %r12 62; CHECK-NEXT: shrq $4, %r12 63; CHECK-NEXT: andq %rsi, %r12 64; CHECK-NEXT: andq %rsi, %r15 65; CHECK-NEXT: shlq $4, %r15 66; CHECK-NEXT: orq %r12, %r15 67; CHECK-NEXT: movq %r15, %r12 68; CHECK-NEXT: andq %r10, %r12 69; CHECK-NEXT: shrq $2, %r15 70; CHECK-NEXT: andq %r10, %r15 71; CHECK-NEXT: leaq (%r15,%r12,4), %r15 72; CHECK-NEXT: movq %r15, %r12 73; CHECK-NEXT: andq %r14, %r12 74; CHECK-NEXT: shrq %r15 75; CHECK-NEXT: andq %r14, %r15 76; CHECK-NEXT: leaq (%r15,%r12,2), %rax 77; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 78; CHECK-NEXT: bswapq %rbx 79; CHECK-NEXT: movq %rbx, %r15 80; CHECK-NEXT: shrq $4, %r15 81; CHECK-NEXT: andq %rsi, %r15 82; CHECK-NEXT: andq %rsi, %rbx 83; CHECK-NEXT: shlq $4, %rbx 84; CHECK-NEXT: orq %r15, %rbx 85; CHECK-NEXT: movq %rbx, %r15 86; CHECK-NEXT: andq %r10, %r15 87; CHECK-NEXT: shrq $2, %rbx 88; CHECK-NEXT: andq %r10, %rbx 89; CHECK-NEXT: leaq (%rbx,%r15,4), %rbx 90; CHECK-NEXT: movq %rbx, %r15 91; CHECK-NEXT: andq %r14, %r15 92; CHECK-NEXT: shrq %rbx 93; CHECK-NEXT: andq %r14, %rbx 94; CHECK-NEXT: leaq (%rbx,%r15,2), %rax 95; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 96; CHECK-NEXT: bswapq %rdi 97; CHECK-NEXT: movq %rdi, %rbx 98; CHECK-NEXT: shrq $4, %rbx 99; CHECK-NEXT: andq %rsi, %rbx 100; CHECK-NEXT: andq %rsi, %rdi 101; CHECK-NEXT: shlq $4, %rdi 102; CHECK-NEXT: orq %rbx, %rdi 103; CHECK-NEXT: movq %rdi, %rbx 104; CHECK-NEXT: andq %r10, %rbx 105; CHECK-NEXT: shrq $2, %rdi 106; CHECK-NEXT: andq %r10, %rdi 107; CHECK-NEXT: leaq (%rdi,%rbx,4), %rdi 108; CHECK-NEXT: movq %rdi, %rbx 109; CHECK-NEXT: andq %r14, %rbx 110; CHECK-NEXT: shrq %rdi 111; CHECK-NEXT: andq %r14, %rdi 112; CHECK-NEXT: leaq (%rdi,%rbx,2), %rax 113; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 114; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rdi 115; CHECK-NEXT: bswapq %rdi 116; CHECK-NEXT: movq %rdi, %rbx 117; CHECK-NEXT: shrq $4, %rbx 118; CHECK-NEXT: andq %rsi, %rbx 119; CHECK-NEXT: andq %rsi, %rdi 120; CHECK-NEXT: shlq $4, %rdi 121; CHECK-NEXT: orq %rbx, %rdi 122; CHECK-NEXT: movq %rdi, %rbx 123; CHECK-NEXT: andq %r10, %rbx 124; CHECK-NEXT: shrq $2, %rdi 125; CHECK-NEXT: andq %r10, %rdi 126; CHECK-NEXT: leaq (%rdi,%rbx,4), %rdi 127; CHECK-NEXT: movq %rdi, %rbx 128; CHECK-NEXT: andq %r14, %rbx 129; CHECK-NEXT: shrq %rdi 130; CHECK-NEXT: andq %r14, %rdi 131; CHECK-NEXT: leaq (%rdi,%rbx,2), %rax 132; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 133; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rdi 134; CHECK-NEXT: bswapq %rdi 135; CHECK-NEXT: movq %rdi, %rbx 136; CHECK-NEXT: shrq $4, %rbx 137; CHECK-NEXT: andq %rsi, %rbx 138; CHECK-NEXT: andq %rsi, %rdi 139; CHECK-NEXT: shlq $4, %rdi 140; CHECK-NEXT: orq %rbx, %rdi 141; CHECK-NEXT: movq %rdi, %rbx 142; CHECK-NEXT: andq %r10, %rbx 143; CHECK-NEXT: shrq $2, %rdi 144; CHECK-NEXT: andq %r10, %rdi 145; CHECK-NEXT: leaq (%rdi,%rbx,4), %rdi 146; CHECK-NEXT: movq %rdi, %rbx 147; CHECK-NEXT: andq %r14, %rbx 148; CHECK-NEXT: shrq %rdi 149; CHECK-NEXT: andq %r14, %rdi 150; CHECK-NEXT: leaq (%rdi,%rbx,2), %rax 151; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 152; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rdi 153; CHECK-NEXT: bswapq %rdi 154; CHECK-NEXT: movq %rdi, %rbx 155; CHECK-NEXT: shrq $4, %rbx 156; CHECK-NEXT: andq %rsi, %rbx 157; CHECK-NEXT: andq %rsi, %rdi 158; CHECK-NEXT: shlq $4, %rdi 159; CHECK-NEXT: orq %rbx, %rdi 160; CHECK-NEXT: movq %rdi, %rbx 161; CHECK-NEXT: andq %r10, %rbx 162; CHECK-NEXT: shrq $2, %rdi 163; CHECK-NEXT: andq %r10, %rdi 164; CHECK-NEXT: leaq (%rdi,%rbx,4), %rdi 165; CHECK-NEXT: movq %rdi, %rbx 166; CHECK-NEXT: andq %r14, %rbx 167; CHECK-NEXT: shrq %rdi 168; CHECK-NEXT: andq %r14, %rdi 169; CHECK-NEXT: leaq (%rdi,%rbx,2), %rax 170; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 171; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rdi 172; CHECK-NEXT: bswapq %rdi 173; CHECK-NEXT: movq %rdi, %rbx 174; CHECK-NEXT: shrq $4, %rbx 175; CHECK-NEXT: andq %rsi, %rbx 176; CHECK-NEXT: andq %rsi, %rdi 177; CHECK-NEXT: shlq $4, %rdi 178; CHECK-NEXT: orq %rbx, %rdi 179; CHECK-NEXT: movq %rdi, %rbx 180; CHECK-NEXT: andq %r10, %rbx 181; CHECK-NEXT: shrq $2, %rdi 182; CHECK-NEXT: andq %r10, %rdi 183; CHECK-NEXT: leaq (%rdi,%rbx,4), %rdi 184; CHECK-NEXT: movq %rdi, %rbx 185; CHECK-NEXT: andq %r14, %rbx 186; CHECK-NEXT: shrq %rdi 187; CHECK-NEXT: andq %r14, %rdi 188; CHECK-NEXT: leaq (%rdi,%rbx,2), %rax 189; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 190; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rdi 191; CHECK-NEXT: bswapq %rdi 192; CHECK-NEXT: movq %rdi, %rbx 193; CHECK-NEXT: shrq $4, %rbx 194; CHECK-NEXT: andq %rsi, %rbx 195; CHECK-NEXT: andq %rsi, %rdi 196; CHECK-NEXT: shlq $4, %rdi 197; CHECK-NEXT: orq %rbx, %rdi 198; CHECK-NEXT: movq %rdi, %rbx 199; CHECK-NEXT: andq %r10, %rbx 200; CHECK-NEXT: shrq $2, %rdi 201; CHECK-NEXT: andq %r10, %rdi 202; CHECK-NEXT: leaq (%rdi,%rbx,4), %rdi 203; CHECK-NEXT: movq %rdi, %rbx 204; CHECK-NEXT: andq %r14, %rbx 205; CHECK-NEXT: shrq %rdi 206; CHECK-NEXT: andq %r14, %rdi 207; CHECK-NEXT: leaq (%rdi,%rbx,2), %rax 208; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 209; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rdi 210; CHECK-NEXT: bswapq %rdi 211; CHECK-NEXT: movq %rdi, %rax 212; CHECK-NEXT: shrq $4, %rax 213; CHECK-NEXT: andq %rsi, %rax 214; CHECK-NEXT: andq %rsi, %rdi 215; CHECK-NEXT: shlq $4, %rdi 216; CHECK-NEXT: orq %rax, %rdi 217; CHECK-NEXT: movq %rdi, %rax 218; CHECK-NEXT: andq %r10, %rax 219; CHECK-NEXT: shrq $2, %rdi 220; CHECK-NEXT: andq %r10, %rdi 221; CHECK-NEXT: leaq (%rdi,%rax,4), %rax 222; CHECK-NEXT: movq %rax, %rdi 223; CHECK-NEXT: andq %r14, %rdi 224; CHECK-NEXT: shrq %rax 225; CHECK-NEXT: andq %r14, %rax 226; CHECK-NEXT: leaq (%rax,%rdi,2), %rdi 227; CHECK-NEXT: bswapq %r9 228; CHECK-NEXT: movq %r9, %rax 229; CHECK-NEXT: shrq $4, %rax 230; CHECK-NEXT: andq %rsi, %rax 231; CHECK-NEXT: andq %rsi, %r9 232; CHECK-NEXT: shlq $4, %r9 233; CHECK-NEXT: orq %rax, %r9 234; CHECK-NEXT: movq %r9, %rax 235; CHECK-NEXT: andq %r10, %rax 236; CHECK-NEXT: shrq $2, %r9 237; CHECK-NEXT: andq %r10, %r9 238; CHECK-NEXT: leaq (%r9,%rax,4), %rax 239; CHECK-NEXT: movq %rax, %r9 240; CHECK-NEXT: andq %r14, %r9 241; CHECK-NEXT: shrq %rax 242; CHECK-NEXT: andq %r14, %rax 243; CHECK-NEXT: leaq (%rax,%r9,2), %rax 244; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 245; CHECK-NEXT: bswapq %r8 246; CHECK-NEXT: movq %r8, %rax 247; CHECK-NEXT: shrq $4, %rax 248; CHECK-NEXT: andq %rsi, %rax 249; CHECK-NEXT: andq %rsi, %r8 250; CHECK-NEXT: shlq $4, %r8 251; CHECK-NEXT: orq %rax, %r8 252; CHECK-NEXT: movq %r8, %rax 253; CHECK-NEXT: andq %r10, %rax 254; CHECK-NEXT: shrq $2, %r8 255; CHECK-NEXT: andq %r10, %r8 256; CHECK-NEXT: leaq (%r8,%rax,4), %rax 257; CHECK-NEXT: movq %rax, %r8 258; CHECK-NEXT: andq %r14, %r8 259; CHECK-NEXT: shrq %rax 260; CHECK-NEXT: andq %r14, %rax 261; CHECK-NEXT: leaq (%rax,%r8,2), %rax 262; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 263; CHECK-NEXT: bswapq %rcx 264; CHECK-NEXT: movq %rcx, %rax 265; CHECK-NEXT: shrq $4, %rax 266; CHECK-NEXT: andq %rsi, %rax 267; CHECK-NEXT: andq %rsi, %rcx 268; CHECK-NEXT: shlq $4, %rcx 269; CHECK-NEXT: orq %rax, %rcx 270; CHECK-NEXT: movq %rcx, %rax 271; CHECK-NEXT: andq %r10, %rax 272; CHECK-NEXT: shrq $2, %rcx 273; CHECK-NEXT: andq %r10, %rcx 274; CHECK-NEXT: leaq (%rcx,%rax,4), %rax 275; CHECK-NEXT: movq %rax, %rcx 276; CHECK-NEXT: andq %r14, %rcx 277; CHECK-NEXT: shrq %rax 278; CHECK-NEXT: andq %r14, %rax 279; CHECK-NEXT: leaq (%rax,%rcx,2), %rbx 280; CHECK-NEXT: bswapq %rdx 281; CHECK-NEXT: movq %rdx, %rax 282; CHECK-NEXT: shrq $4, %rax 283; CHECK-NEXT: andq %rsi, %rax 284; CHECK-NEXT: andq %rsi, %rdx 285; CHECK-NEXT: shlq $4, %rdx 286; CHECK-NEXT: orq %rax, %rdx 287; CHECK-NEXT: movq %rdx, %rax 288; CHECK-NEXT: andq %r10, %rax 289; CHECK-NEXT: shrq $2, %rdx 290; CHECK-NEXT: andq %r10, %rdx 291; CHECK-NEXT: leaq (%rdx,%rax,4), %rax 292; CHECK-NEXT: movq %rax, %rdx 293; CHECK-NEXT: andq %r14, %rdx 294; CHECK-NEXT: shrq %rax 295; CHECK-NEXT: andq %r14, %rax 296; CHECK-NEXT: leaq (%rax,%rdx,2), %rdx 297; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload 298; CHECK-NEXT: bswapq %rcx 299; CHECK-NEXT: movq %rcx, %rax 300; CHECK-NEXT: shrq $4, %rax 301; CHECK-NEXT: andq %rsi, %rax 302; CHECK-NEXT: andq %rsi, %rcx 303; CHECK-NEXT: shlq $4, %rcx 304; CHECK-NEXT: orq %rax, %rcx 305; CHECK-NEXT: movq %rcx, %rax 306; CHECK-NEXT: andq %r10, %rax 307; CHECK-NEXT: shrq $2, %rcx 308; CHECK-NEXT: andq %r10, %rcx 309; CHECK-NEXT: leaq (%rcx,%rax,4), %rax 310; CHECK-NEXT: movq %rax, %rsi 311; CHECK-NEXT: andq %r14, %rsi 312; CHECK-NEXT: shrq %rax 313; CHECK-NEXT: andq %r14, %rax 314; CHECK-NEXT: leaq (%rax,%rsi,2), %rsi 315; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload 316; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload 317; CHECK-NEXT: shrdq $24, %rax, %r10 318; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload 319; CHECK-NEXT: shrdq $24, %rcx, %rax 320; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 321; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload 322; CHECK-NEXT: shrdq $24, %rbp, %rcx 323; CHECK-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 324; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload 325; CHECK-NEXT: shrdq $24, %r13, %rbp 326; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload 327; CHECK-NEXT: shrdq $24, %r12, %r13 328; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload 329; CHECK-NEXT: shrdq $24, %r15, %r12 330; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload 331; CHECK-NEXT: shrdq $24, %r14, %r15 332; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r11 # 8-byte Reload 333; CHECK-NEXT: shrdq $24, %r11, %r14 334; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload 335; CHECK-NEXT: shrdq $24, %r9, %r11 336; CHECK-NEXT: movq %rdi, %r8 337; CHECK-NEXT: shrdq $24, %rdi, %r9 338; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload 339; CHECK-NEXT: shrdq $24, %rdi, %r8 340; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload 341; CHECK-NEXT: shrdq $24, %rcx, %rdi 342; CHECK-NEXT: shrdq $24, %rbx, %rcx 343; CHECK-NEXT: shrdq $24, %rdx, %rbx 344; CHECK-NEXT: shrdq $24, %rsi, %rdx 345; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload 346; CHECK-NEXT: movq %rdx, 112(%rax) 347; CHECK-NEXT: movq %rbx, 104(%rax) 348; CHECK-NEXT: movq %rcx, 96(%rax) 349; CHECK-NEXT: movq %rdi, 88(%rax) 350; CHECK-NEXT: movq %r8, 80(%rax) 351; CHECK-NEXT: movq %r9, 72(%rax) 352; CHECK-NEXT: movq %r11, 64(%rax) 353; CHECK-NEXT: movq %r14, 56(%rax) 354; CHECK-NEXT: movq %r15, 48(%rax) 355; CHECK-NEXT: movq %r12, 40(%rax) 356; CHECK-NEXT: movq %r13, 32(%rax) 357; CHECK-NEXT: movq %rbp, 24(%rax) 358; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload 359; CHECK-NEXT: movq %rcx, 16(%rax) 360; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload 361; CHECK-NEXT: movq %rcx, 8(%rax) 362; CHECK-NEXT: movq %r10, (%rax) 363; CHECK-NEXT: movq %rsi, %rcx 364; CHECK-NEXT: shrq $56, %rsi 365; CHECK-NEXT: movb %sil, 124(%rax) 366; CHECK-NEXT: shrq $24, %rcx 367; CHECK-NEXT: movl %ecx, 120(%rax) 368; CHECK-NEXT: popq %rbx 369; CHECK-NEXT: popq %r12 370; CHECK-NEXT: popq %r13 371; CHECK-NEXT: popq %r14 372; CHECK-NEXT: popq %r15 373; CHECK-NEXT: popq %rbp 374; CHECK-NEXT: retq 375 %Z = call i1000 @llvm.bitreverse.i1000(i1000 %A) 376 ret i1000 %Z 377} 378 379declare i1000 @llvm.bitreverse.i1000(i1000) 380