1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; Check that a division is bypassed when appropriate only. 3; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=atom < %s | FileCheck -check-prefixes=CHECK,ATOM %s 4; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=x86-64 < %s | FileCheck -check-prefixes=CHECK,REST,X64 %s 5; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=silvermont < %s | FileCheck -check-prefixes=CHECK,REST,SLM %s 6; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake < %s | FileCheck -check-prefixes=CHECK,REST,SKL %s 7; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=goldmont < %s | FileCheck -check-prefixes=CHECK,REST,GMT %s 8; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mcpu=gracemont < %s | FileCheck -check-prefixes=CHECK,REST,GMT %s 9; RUN: llc -profile-summary-huge-working-set-size-threshold=1 -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake < %s | FileCheck -check-prefixes=HUGEWS %s 10 11; Verify that div32 is bypassed only for Atoms. 12define i32 @div32(i32 %a, i32 %b) { 13; ATOM-LABEL: div32: 14; ATOM: # %bb.0: # %entry 15; ATOM-NEXT: movl %edi, %eax 16; ATOM-NEXT: orl %esi, %eax 17; ATOM-NEXT: testl $-256, %eax 18; ATOM-NEXT: je .LBB0_1 19; ATOM-NEXT: # %bb.2: 20; ATOM-NEXT: movl %edi, %eax 21; ATOM-NEXT: cltd 22; ATOM-NEXT: idivl %esi 23; ATOM-NEXT: retq 24; ATOM-NEXT: .LBB0_1: 25; ATOM-NEXT: movzbl %dil, %eax 26; ATOM-NEXT: divb %sil 27; ATOM-NEXT: movzbl %al, %eax 28; ATOM-NEXT: retq 29; 30; REST-LABEL: div32: 31; REST: # %bb.0: # %entry 32; REST-NEXT: movl %edi, %eax 33; REST-NEXT: cltd 34; REST-NEXT: idivl %esi 35; REST-NEXT: retq 36; 37; HUGEWS-LABEL: div32: 38; HUGEWS: # %bb.0: # %entry 39; HUGEWS-NEXT: movl %edi, %eax 40; HUGEWS-NEXT: cltd 41; HUGEWS-NEXT: idivl %esi 42; HUGEWS-NEXT: retq 43entry: 44 %div = sdiv i32 %a, %b 45 ret i32 %div 46} 47 48; Verify that div64 is always bypassed. 49define i64 @div64(i64 %a, i64 %b) { 50; ATOM-LABEL: div64: 51; ATOM: # %bb.0: # %entry 52; ATOM-NEXT: movq %rdi, %rcx 53; ATOM-NEXT: movq %rdi, %rax 54; ATOM-NEXT: orq %rsi, %rcx 55; ATOM-NEXT: shrq $32, %rcx 56; ATOM-NEXT: je .LBB1_1 57; ATOM-NEXT: # %bb.2: 58; ATOM-NEXT: cqto 59; ATOM-NEXT: idivq %rsi 60; ATOM-NEXT: retq 61; ATOM-NEXT: .LBB1_1: 62; ATOM-NEXT: # kill: def $eax killed $eax killed $rax 63; ATOM-NEXT: xorl %edx, %edx 64; ATOM-NEXT: divl %esi 65; ATOM-NEXT: # kill: def $eax killed $eax def $rax 66; ATOM-NEXT: retq 67; 68; X64-LABEL: div64: 69; X64: # %bb.0: # %entry 70; X64-NEXT: movq %rdi, %rax 71; X64-NEXT: movq %rdi, %rcx 72; X64-NEXT: orq %rsi, %rcx 73; X64-NEXT: shrq $32, %rcx 74; X64-NEXT: je .LBB1_1 75; X64-NEXT: # %bb.2: 76; X64-NEXT: cqto 77; X64-NEXT: idivq %rsi 78; X64-NEXT: retq 79; X64-NEXT: .LBB1_1: 80; X64-NEXT: # kill: def $eax killed $eax killed $rax 81; X64-NEXT: xorl %edx, %edx 82; X64-NEXT: divl %esi 83; X64-NEXT: # kill: def $eax killed $eax def $rax 84; X64-NEXT: retq 85; 86; SLM-LABEL: div64: 87; SLM: # %bb.0: # %entry 88; SLM-NEXT: movq %rdi, %rcx 89; SLM-NEXT: movq %rdi, %rax 90; SLM-NEXT: orq %rsi, %rcx 91; SLM-NEXT: shrq $32, %rcx 92; SLM-NEXT: je .LBB1_1 93; SLM-NEXT: # %bb.2: 94; SLM-NEXT: cqto 95; SLM-NEXT: idivq %rsi 96; SLM-NEXT: retq 97; SLM-NEXT: .LBB1_1: 98; SLM-NEXT: xorl %edx, %edx 99; SLM-NEXT: # kill: def $eax killed $eax killed $rax 100; SLM-NEXT: divl %esi 101; SLM-NEXT: # kill: def $eax killed $eax def $rax 102; SLM-NEXT: retq 103; 104; SKL-LABEL: div64: 105; SKL: # %bb.0: # %entry 106; SKL-NEXT: movq %rdi, %rax 107; SKL-NEXT: movq %rdi, %rcx 108; SKL-NEXT: orq %rsi, %rcx 109; SKL-NEXT: shrq $32, %rcx 110; SKL-NEXT: je .LBB1_1 111; SKL-NEXT: # %bb.2: 112; SKL-NEXT: cqto 113; SKL-NEXT: idivq %rsi 114; SKL-NEXT: retq 115; SKL-NEXT: .LBB1_1: 116; SKL-NEXT: # kill: def $eax killed $eax killed $rax 117; SKL-NEXT: xorl %edx, %edx 118; SKL-NEXT: divl %esi 119; SKL-NEXT: # kill: def $eax killed $eax def $rax 120; SKL-NEXT: retq 121; 122; GMT-LABEL: div64: 123; GMT: # %bb.0: # %entry 124; GMT-NEXT: movq %rdi, %rax 125; GMT-NEXT: cqto 126; GMT-NEXT: idivq %rsi 127; GMT-NEXT: retq 128; 129; HUGEWS-LABEL: div64: 130; HUGEWS: # %bb.0: # %entry 131; HUGEWS-NEXT: movq %rdi, %rax 132; HUGEWS-NEXT: cqto 133; HUGEWS-NEXT: idivq %rsi 134; HUGEWS-NEXT: retq 135entry: 136 %div = sdiv i64 %a, %b 137 ret i64 %div 138} 139 140 141; Verify that no extra code is generated when optimizing for size. 142 143define i64 @div64_optsize(i64 %a, i64 %b) optsize { 144; CHECK-LABEL: div64_optsize: 145; CHECK: # %bb.0: 146; CHECK-NEXT: movq %rdi, %rax 147; CHECK-NEXT: cqto 148; CHECK-NEXT: idivq %rsi 149; CHECK-NEXT: retq 150; 151; HUGEWS-LABEL: div64_optsize: 152; HUGEWS: # %bb.0: 153; HUGEWS-NEXT: movq %rdi, %rax 154; HUGEWS-NEXT: cqto 155; HUGEWS-NEXT: idivq %rsi 156; HUGEWS-NEXT: retq 157 %div = sdiv i64 %a, %b 158 ret i64 %div 159} 160 161define i64 @div64_pgso(i64 %a, i64 %b) !prof !15 { 162; CHECK-LABEL: div64_pgso: 163; CHECK: # %bb.0: 164; CHECK-NEXT: movq %rdi, %rax 165; CHECK-NEXT: cqto 166; CHECK-NEXT: idivq %rsi 167; CHECK-NEXT: retq 168; 169; HUGEWS-LABEL: div64_pgso: 170; HUGEWS: # %bb.0: 171; HUGEWS-NEXT: movq %rdi, %rax 172; HUGEWS-NEXT: cqto 173; HUGEWS-NEXT: idivq %rsi 174; HUGEWS-NEXT: retq 175 %div = sdiv i64 %a, %b 176 ret i64 %div 177} 178 179define i64 @div64_hugews(i64 %a, i64 %b) { 180; ATOM-LABEL: div64_hugews: 181; ATOM: # %bb.0: 182; ATOM-NEXT: movq %rdi, %rcx 183; ATOM-NEXT: movq %rdi, %rax 184; ATOM-NEXT: orq %rsi, %rcx 185; ATOM-NEXT: shrq $32, %rcx 186; ATOM-NEXT: je .LBB4_1 187; ATOM-NEXT: # %bb.2: 188; ATOM-NEXT: cqto 189; ATOM-NEXT: idivq %rsi 190; ATOM-NEXT: retq 191; ATOM-NEXT: .LBB4_1: 192; ATOM-NEXT: # kill: def $eax killed $eax killed $rax 193; ATOM-NEXT: xorl %edx, %edx 194; ATOM-NEXT: divl %esi 195; ATOM-NEXT: # kill: def $eax killed $eax def $rax 196; ATOM-NEXT: retq 197; 198; X64-LABEL: div64_hugews: 199; X64: # %bb.0: 200; X64-NEXT: movq %rdi, %rax 201; X64-NEXT: movq %rdi, %rcx 202; X64-NEXT: orq %rsi, %rcx 203; X64-NEXT: shrq $32, %rcx 204; X64-NEXT: je .LBB4_1 205; X64-NEXT: # %bb.2: 206; X64-NEXT: cqto 207; X64-NEXT: idivq %rsi 208; X64-NEXT: retq 209; X64-NEXT: .LBB4_1: 210; X64-NEXT: # kill: def $eax killed $eax killed $rax 211; X64-NEXT: xorl %edx, %edx 212; X64-NEXT: divl %esi 213; X64-NEXT: # kill: def $eax killed $eax def $rax 214; X64-NEXT: retq 215; 216; SLM-LABEL: div64_hugews: 217; SLM: # %bb.0: 218; SLM-NEXT: movq %rdi, %rcx 219; SLM-NEXT: movq %rdi, %rax 220; SLM-NEXT: orq %rsi, %rcx 221; SLM-NEXT: shrq $32, %rcx 222; SLM-NEXT: je .LBB4_1 223; SLM-NEXT: # %bb.2: 224; SLM-NEXT: cqto 225; SLM-NEXT: idivq %rsi 226; SLM-NEXT: retq 227; SLM-NEXT: .LBB4_1: 228; SLM-NEXT: xorl %edx, %edx 229; SLM-NEXT: # kill: def $eax killed $eax killed $rax 230; SLM-NEXT: divl %esi 231; SLM-NEXT: # kill: def $eax killed $eax def $rax 232; SLM-NEXT: retq 233; 234; SKL-LABEL: div64_hugews: 235; SKL: # %bb.0: 236; SKL-NEXT: movq %rdi, %rax 237; SKL-NEXT: movq %rdi, %rcx 238; SKL-NEXT: orq %rsi, %rcx 239; SKL-NEXT: shrq $32, %rcx 240; SKL-NEXT: je .LBB4_1 241; SKL-NEXT: # %bb.2: 242; SKL-NEXT: cqto 243; SKL-NEXT: idivq %rsi 244; SKL-NEXT: retq 245; SKL-NEXT: .LBB4_1: 246; SKL-NEXT: # kill: def $eax killed $eax killed $rax 247; SKL-NEXT: xorl %edx, %edx 248; SKL-NEXT: divl %esi 249; SKL-NEXT: # kill: def $eax killed $eax def $rax 250; SKL-NEXT: retq 251; 252; GMT-LABEL: div64_hugews: 253; GMT: # %bb.0: 254; GMT-NEXT: movq %rdi, %rax 255; GMT-NEXT: cqto 256; GMT-NEXT: idivq %rsi 257; GMT-NEXT: retq 258; 259; HUGEWS-LABEL: div64_hugews: 260; HUGEWS: # %bb.0: 261; HUGEWS-NEXT: movq %rdi, %rax 262; HUGEWS-NEXT: cqto 263; HUGEWS-NEXT: idivq %rsi 264; HUGEWS-NEXT: retq 265 %div = sdiv i64 %a, %b 266 ret i64 %div 267} 268 269define i32 @div32_optsize(i32 %a, i32 %b) optsize { 270; CHECK-LABEL: div32_optsize: 271; CHECK: # %bb.0: 272; CHECK-NEXT: movl %edi, %eax 273; CHECK-NEXT: cltd 274; CHECK-NEXT: idivl %esi 275; CHECK-NEXT: retq 276; 277; HUGEWS-LABEL: div32_optsize: 278; HUGEWS: # %bb.0: 279; HUGEWS-NEXT: movl %edi, %eax 280; HUGEWS-NEXT: cltd 281; HUGEWS-NEXT: idivl %esi 282; HUGEWS-NEXT: retq 283 %div = sdiv i32 %a, %b 284 ret i32 %div 285} 286 287define i32 @div32_pgso(i32 %a, i32 %b) !prof !15 { 288; CHECK-LABEL: div32_pgso: 289; CHECK: # %bb.0: 290; CHECK-NEXT: movl %edi, %eax 291; CHECK-NEXT: cltd 292; CHECK-NEXT: idivl %esi 293; CHECK-NEXT: retq 294; 295; HUGEWS-LABEL: div32_pgso: 296; HUGEWS: # %bb.0: 297; HUGEWS-NEXT: movl %edi, %eax 298; HUGEWS-NEXT: cltd 299; HUGEWS-NEXT: idivl %esi 300; HUGEWS-NEXT: retq 301 %div = sdiv i32 %a, %b 302 ret i32 %div 303} 304 305define i32 @div32_minsize(i32 %a, i32 %b) minsize { 306; CHECK-LABEL: div32_minsize: 307; CHECK: # %bb.0: 308; CHECK-NEXT: movl %edi, %eax 309; CHECK-NEXT: cltd 310; CHECK-NEXT: idivl %esi 311; CHECK-NEXT: retq 312; 313; HUGEWS-LABEL: div32_minsize: 314; HUGEWS: # %bb.0: 315; HUGEWS-NEXT: movl %edi, %eax 316; HUGEWS-NEXT: cltd 317; HUGEWS-NEXT: idivl %esi 318; HUGEWS-NEXT: retq 319 %div = sdiv i32 %a, %b 320 ret i32 %div 321} 322 323!llvm.module.flags = !{!1} 324!1 = !{i32 1, !"ProfileSummary", !2} 325!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} 326!3 = !{!"ProfileFormat", !"InstrProf"} 327!4 = !{!"TotalCount", i64 10000} 328!5 = !{!"MaxCount", i64 1000} 329!6 = !{!"MaxInternalCount", i64 1} 330!7 = !{!"MaxFunctionCount", i64 1000} 331!8 = !{!"NumCounts", i64 3} 332!9 = !{!"NumFunctions", i64 3} 333!10 = !{!"DetailedSummary", !11} 334!11 = !{!12, !13, !14} 335!12 = !{i32 10000, i64 1000, i32 1} 336!13 = !{i32 999000, i64 1000, i32 3} 337!14 = !{i32 999999, i64 5, i32 3} 338!15 = !{!"function_entry_count", i64 0} 339