1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X86,X86-NOBMI2,X86-BASELINE 3; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+cmov,+bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X86,X86-NOBMI2,X86-BMI1 4; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+cmov,+bmi,+tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X86,X86-NOBMI2,X86-BMI1 5; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+cmov,+bmi,+tbm,+bmi2 < %s | FileCheck %s --check-prefixes=X86,X86-BMI2 6; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+cmov,+bmi,-tbm,+bmi2 < %s | FileCheck %s --check-prefixes=X86,X86-BMI2 7; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X64,X64-NOBMI2 8; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X64,X64-NOBMI2 9; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X64,X64-NOBMI2 10; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2 < %s | FileCheck %s --check-prefixes=X64,X64-BMI2 11; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2 < %s | FileCheck %s --check-prefixes=X64,X64-BMI2 12 13; Patterns: 14; c) x & (-1 >> y) 15; d) x << y >> y 16; are equivalent, but we prefer the second variant if we have BMI2. 17 18; We do not test the variant where y = (32 - z), because that is BMI2's BZHI. 19 20; ---------------------------------------------------------------------------- ; 21; 8-bit 22; ---------------------------------------------------------------------------- ; 23 24define i8 @clear_highbits8_c0(i8 %val, i8 %numhighbits) nounwind { 25; X86-LABEL: clear_highbits8_c0: 26; X86: # %bb.0: 27; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 28; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 29; X86-NEXT: shlb %cl, %al 30; X86-NEXT: shrb %cl, %al 31; X86-NEXT: retl 32; 33; X64-LABEL: clear_highbits8_c0: 34; X64: # %bb.0: 35; X64-NEXT: movl %esi, %ecx 36; X64-NEXT: movl %edi, %eax 37; X64-NEXT: shlb %cl, %al 38; X64-NEXT: # kill: def $cl killed $cl killed $ecx 39; X64-NEXT: shrb %cl, %al 40; X64-NEXT: # kill: def $al killed $al killed $eax 41; X64-NEXT: retq 42 %mask = lshr i8 -1, %numhighbits 43 %masked = and i8 %mask, %val 44 ret i8 %masked 45} 46 47define i8 @clear_highbits8_c2_load(ptr %w, i8 %numhighbits) nounwind { 48; X86-LABEL: clear_highbits8_c2_load: 49; X86: # %bb.0: 50; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 51; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 52; X86-NEXT: movzbl (%eax), %eax 53; X86-NEXT: shlb %cl, %al 54; X86-NEXT: shrb %cl, %al 55; X86-NEXT: retl 56; 57; X64-LABEL: clear_highbits8_c2_load: 58; X64: # %bb.0: 59; X64-NEXT: movl %esi, %ecx 60; X64-NEXT: movzbl (%rdi), %eax 61; X64-NEXT: shlb %cl, %al 62; X64-NEXT: # kill: def $cl killed $cl killed $ecx 63; X64-NEXT: shrb %cl, %al 64; X64-NEXT: retq 65 %val = load i8, ptr %w 66 %mask = lshr i8 -1, %numhighbits 67 %masked = and i8 %mask, %val 68 ret i8 %masked 69} 70 71define i8 @clear_highbits8_c4_commutative(i8 %val, i8 %numhighbits) nounwind { 72; X86-LABEL: clear_highbits8_c4_commutative: 73; X86: # %bb.0: 74; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 75; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 76; X86-NEXT: shlb %cl, %al 77; X86-NEXT: shrb %cl, %al 78; X86-NEXT: retl 79; 80; X64-LABEL: clear_highbits8_c4_commutative: 81; X64: # %bb.0: 82; X64-NEXT: movl %esi, %ecx 83; X64-NEXT: movl %edi, %eax 84; X64-NEXT: shlb %cl, %al 85; X64-NEXT: # kill: def $cl killed $cl killed $ecx 86; X64-NEXT: shrb %cl, %al 87; X64-NEXT: # kill: def $al killed $al killed $eax 88; X64-NEXT: retq 89 %mask = lshr i8 -1, %numhighbits 90 %masked = and i8 %val, %mask ; swapped order 91 ret i8 %masked 92} 93 94; ---------------------------------------------------------------------------- ; 95; 16-bit 96; ---------------------------------------------------------------------------- ; 97 98define i16 @clear_highbits16_c0(i16 %val, i16 %numhighbits) nounwind { 99; X86-NOBMI2-LABEL: clear_highbits16_c0: 100; X86-NOBMI2: # %bb.0: 101; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 102; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 103; X86-NOBMI2-NEXT: shll %cl, %eax 104; X86-NOBMI2-NEXT: movzwl %ax, %eax 105; X86-NOBMI2-NEXT: shrl %cl, %eax 106; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax 107; X86-NOBMI2-NEXT: retl 108; 109; X86-BMI2-LABEL: clear_highbits16_c0: 110; X86-BMI2: # %bb.0: 111; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax 112; X86-BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %ecx 113; X86-BMI2-NEXT: movzwl %cx, %ecx 114; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax 115; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax 116; X86-BMI2-NEXT: retl 117; 118; X64-NOBMI2-LABEL: clear_highbits16_c0: 119; X64-NOBMI2: # %bb.0: 120; X64-NOBMI2-NEXT: movl %esi, %ecx 121; X64-NOBMI2-NEXT: shll %cl, %edi 122; X64-NOBMI2-NEXT: movzwl %di, %eax 123; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 124; X64-NOBMI2-NEXT: shrl %cl, %eax 125; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax 126; X64-NOBMI2-NEXT: retq 127; 128; X64-BMI2-LABEL: clear_highbits16_c0: 129; X64-BMI2: # %bb.0: 130; X64-BMI2-NEXT: shlxl %esi, %edi, %eax 131; X64-BMI2-NEXT: movzwl %ax, %eax 132; X64-BMI2-NEXT: shrxl %esi, %eax, %eax 133; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax 134; X64-BMI2-NEXT: retq 135 %mask = lshr i16 -1, %numhighbits 136 %masked = and i16 %mask, %val 137 ret i16 %masked 138} 139 140define i16 @clear_highbits16_c1_indexzext(i16 %val, i8 %numhighbits) nounwind { 141; X86-NOBMI2-LABEL: clear_highbits16_c1_indexzext: 142; X86-NOBMI2: # %bb.0: 143; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 144; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 145; X86-NOBMI2-NEXT: shll %cl, %eax 146; X86-NOBMI2-NEXT: movzwl %ax, %eax 147; X86-NOBMI2-NEXT: shrl %cl, %eax 148; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax 149; X86-NOBMI2-NEXT: retl 150; 151; X86-BMI2-LABEL: clear_highbits16_c1_indexzext: 152; X86-BMI2: # %bb.0: 153; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax 154; X86-BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %ecx 155; X86-BMI2-NEXT: movzwl %cx, %ecx 156; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax 157; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax 158; X86-BMI2-NEXT: retl 159; 160; X64-NOBMI2-LABEL: clear_highbits16_c1_indexzext: 161; X64-NOBMI2: # %bb.0: 162; X64-NOBMI2-NEXT: movl %esi, %ecx 163; X64-NOBMI2-NEXT: shll %cl, %edi 164; X64-NOBMI2-NEXT: movzwl %di, %eax 165; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 166; X64-NOBMI2-NEXT: shrl %cl, %eax 167; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax 168; X64-NOBMI2-NEXT: retq 169; 170; X64-BMI2-LABEL: clear_highbits16_c1_indexzext: 171; X64-BMI2: # %bb.0: 172; X64-BMI2-NEXT: shlxl %esi, %edi, %eax 173; X64-BMI2-NEXT: movzwl %ax, %eax 174; X64-BMI2-NEXT: shrxl %esi, %eax, %eax 175; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax 176; X64-BMI2-NEXT: retq 177 %sh_prom = zext i8 %numhighbits to i16 178 %mask = lshr i16 -1, %sh_prom 179 %masked = and i16 %mask, %val 180 ret i16 %masked 181} 182 183define i16 @clear_highbits16_c2_load(ptr %w, i16 %numhighbits) nounwind { 184; X86-NOBMI2-LABEL: clear_highbits16_c2_load: 185; X86-NOBMI2: # %bb.0: 186; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 187; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 188; X86-NOBMI2-NEXT: movzwl (%eax), %eax 189; X86-NOBMI2-NEXT: shll %cl, %eax 190; X86-NOBMI2-NEXT: movzwl %ax, %eax 191; X86-NOBMI2-NEXT: shrl %cl, %eax 192; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax 193; X86-NOBMI2-NEXT: retl 194; 195; X86-BMI2-LABEL: clear_highbits16_c2_load: 196; X86-BMI2: # %bb.0: 197; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax 198; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx 199; X86-BMI2-NEXT: movzwl (%ecx), %ecx 200; X86-BMI2-NEXT: shlxl %eax, %ecx, %ecx 201; X86-BMI2-NEXT: movzwl %cx, %ecx 202; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax 203; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax 204; X86-BMI2-NEXT: retl 205; 206; X64-NOBMI2-LABEL: clear_highbits16_c2_load: 207; X64-NOBMI2: # %bb.0: 208; X64-NOBMI2-NEXT: movl %esi, %ecx 209; X64-NOBMI2-NEXT: movzwl (%rdi), %eax 210; X64-NOBMI2-NEXT: shll %cl, %eax 211; X64-NOBMI2-NEXT: movzwl %ax, %eax 212; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 213; X64-NOBMI2-NEXT: shrl %cl, %eax 214; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax 215; X64-NOBMI2-NEXT: retq 216; 217; X64-BMI2-LABEL: clear_highbits16_c2_load: 218; X64-BMI2: # %bb.0: 219; X64-BMI2-NEXT: movzwl (%rdi), %eax 220; X64-BMI2-NEXT: shlxl %esi, %eax, %eax 221; X64-BMI2-NEXT: movzwl %ax, %eax 222; X64-BMI2-NEXT: shrxl %esi, %eax, %eax 223; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax 224; X64-BMI2-NEXT: retq 225 %val = load i16, ptr %w 226 %mask = lshr i16 -1, %numhighbits 227 %masked = and i16 %mask, %val 228 ret i16 %masked 229} 230 231define i16 @clear_highbits16_c3_load_indexzext(ptr %w, i8 %numhighbits) nounwind { 232; X86-NOBMI2-LABEL: clear_highbits16_c3_load_indexzext: 233; X86-NOBMI2: # %bb.0: 234; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 235; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 236; X86-NOBMI2-NEXT: movzwl (%eax), %eax 237; X86-NOBMI2-NEXT: shll %cl, %eax 238; X86-NOBMI2-NEXT: movzwl %ax, %eax 239; X86-NOBMI2-NEXT: shrl %cl, %eax 240; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax 241; X86-NOBMI2-NEXT: retl 242; 243; X86-BMI2-LABEL: clear_highbits16_c3_load_indexzext: 244; X86-BMI2: # %bb.0: 245; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax 246; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx 247; X86-BMI2-NEXT: movzwl (%ecx), %ecx 248; X86-BMI2-NEXT: shlxl %eax, %ecx, %ecx 249; X86-BMI2-NEXT: movzwl %cx, %ecx 250; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax 251; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax 252; X86-BMI2-NEXT: retl 253; 254; X64-NOBMI2-LABEL: clear_highbits16_c3_load_indexzext: 255; X64-NOBMI2: # %bb.0: 256; X64-NOBMI2-NEXT: movl %esi, %ecx 257; X64-NOBMI2-NEXT: movzwl (%rdi), %eax 258; X64-NOBMI2-NEXT: shll %cl, %eax 259; X64-NOBMI2-NEXT: movzwl %ax, %eax 260; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 261; X64-NOBMI2-NEXT: shrl %cl, %eax 262; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax 263; X64-NOBMI2-NEXT: retq 264; 265; X64-BMI2-LABEL: clear_highbits16_c3_load_indexzext: 266; X64-BMI2: # %bb.0: 267; X64-BMI2-NEXT: movzwl (%rdi), %eax 268; X64-BMI2-NEXT: shlxl %esi, %eax, %eax 269; X64-BMI2-NEXT: movzwl %ax, %eax 270; X64-BMI2-NEXT: shrxl %esi, %eax, %eax 271; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax 272; X64-BMI2-NEXT: retq 273 %val = load i16, ptr %w 274 %sh_prom = zext i8 %numhighbits to i16 275 %mask = lshr i16 -1, %sh_prom 276 %masked = and i16 %mask, %val 277 ret i16 %masked 278} 279 280define i16 @clear_highbits16_c4_commutative(i16 %val, i16 %numhighbits) nounwind { 281; X86-NOBMI2-LABEL: clear_highbits16_c4_commutative: 282; X86-NOBMI2: # %bb.0: 283; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 284; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 285; X86-NOBMI2-NEXT: shll %cl, %eax 286; X86-NOBMI2-NEXT: movzwl %ax, %eax 287; X86-NOBMI2-NEXT: shrl %cl, %eax 288; X86-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax 289; X86-NOBMI2-NEXT: retl 290; 291; X86-BMI2-LABEL: clear_highbits16_c4_commutative: 292; X86-BMI2: # %bb.0: 293; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax 294; X86-BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %ecx 295; X86-BMI2-NEXT: movzwl %cx, %ecx 296; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax 297; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax 298; X86-BMI2-NEXT: retl 299; 300; X64-NOBMI2-LABEL: clear_highbits16_c4_commutative: 301; X64-NOBMI2: # %bb.0: 302; X64-NOBMI2-NEXT: movl %esi, %ecx 303; X64-NOBMI2-NEXT: shll %cl, %edi 304; X64-NOBMI2-NEXT: movzwl %di, %eax 305; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 306; X64-NOBMI2-NEXT: shrl %cl, %eax 307; X64-NOBMI2-NEXT: # kill: def $ax killed $ax killed $eax 308; X64-NOBMI2-NEXT: retq 309; 310; X64-BMI2-LABEL: clear_highbits16_c4_commutative: 311; X64-BMI2: # %bb.0: 312; X64-BMI2-NEXT: shlxl %esi, %edi, %eax 313; X64-BMI2-NEXT: movzwl %ax, %eax 314; X64-BMI2-NEXT: shrxl %esi, %eax, %eax 315; X64-BMI2-NEXT: # kill: def $ax killed $ax killed $eax 316; X64-BMI2-NEXT: retq 317 %mask = lshr i16 -1, %numhighbits 318 %masked = and i16 %val, %mask ; swapped order 319 ret i16 %masked 320} 321 322; ---------------------------------------------------------------------------- ; 323; 32-bit 324; ---------------------------------------------------------------------------- ; 325 326define i32 @clear_highbits32_c0(i32 %val, i32 %numhighbits) nounwind { 327; X86-NOBMI2-LABEL: clear_highbits32_c0: 328; X86-NOBMI2: # %bb.0: 329; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 330; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 331; X86-NOBMI2-NEXT: shll %cl, %eax 332; X86-NOBMI2-NEXT: shrl %cl, %eax 333; X86-NOBMI2-NEXT: retl 334; 335; X86-BMI2-LABEL: clear_highbits32_c0: 336; X86-BMI2: # %bb.0: 337; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax 338; X86-BMI2-NEXT: movl $32, %ecx 339; X86-BMI2-NEXT: subl %eax, %ecx 340; X86-BMI2-NEXT: bzhil %ecx, {{[0-9]+}}(%esp), %eax 341; X86-BMI2-NEXT: retl 342; 343; X64-NOBMI2-LABEL: clear_highbits32_c0: 344; X64-NOBMI2: # %bb.0: 345; X64-NOBMI2-NEXT: movl %esi, %ecx 346; X64-NOBMI2-NEXT: movl %edi, %eax 347; X64-NOBMI2-NEXT: shll %cl, %eax 348; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 349; X64-NOBMI2-NEXT: shrl %cl, %eax 350; X64-NOBMI2-NEXT: retq 351; 352; X64-BMI2-LABEL: clear_highbits32_c0: 353; X64-BMI2: # %bb.0: 354; X64-BMI2-NEXT: movl $32, %eax 355; X64-BMI2-NEXT: subl %esi, %eax 356; X64-BMI2-NEXT: bzhil %eax, %edi, %eax 357; X64-BMI2-NEXT: retq 358 %mask = lshr i32 -1, %numhighbits 359 %masked = and i32 %mask, %val 360 ret i32 %masked 361} 362 363define i32 @clear_highbits32_c1_indexzext(i32 %val, i8 %numhighbits) nounwind { 364; X86-NOBMI2-LABEL: clear_highbits32_c1_indexzext: 365; X86-NOBMI2: # %bb.0: 366; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 367; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 368; X86-NOBMI2-NEXT: shll %cl, %eax 369; X86-NOBMI2-NEXT: shrl %cl, %eax 370; X86-NOBMI2-NEXT: retl 371; 372; X86-BMI2-LABEL: clear_highbits32_c1_indexzext: 373; X86-BMI2: # %bb.0: 374; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax 375; X86-BMI2-NEXT: movl $32, %ecx 376; X86-BMI2-NEXT: subl %eax, %ecx 377; X86-BMI2-NEXT: bzhil %ecx, {{[0-9]+}}(%esp), %eax 378; X86-BMI2-NEXT: retl 379; 380; X64-NOBMI2-LABEL: clear_highbits32_c1_indexzext: 381; X64-NOBMI2: # %bb.0: 382; X64-NOBMI2-NEXT: movl %esi, %ecx 383; X64-NOBMI2-NEXT: movl %edi, %eax 384; X64-NOBMI2-NEXT: shll %cl, %eax 385; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 386; X64-NOBMI2-NEXT: shrl %cl, %eax 387; X64-NOBMI2-NEXT: retq 388; 389; X64-BMI2-LABEL: clear_highbits32_c1_indexzext: 390; X64-BMI2: # %bb.0: 391; X64-BMI2-NEXT: movl $32, %eax 392; X64-BMI2-NEXT: subl %esi, %eax 393; X64-BMI2-NEXT: bzhil %eax, %edi, %eax 394; X64-BMI2-NEXT: retq 395 %sh_prom = zext i8 %numhighbits to i32 396 %mask = lshr i32 -1, %sh_prom 397 %masked = and i32 %mask, %val 398 ret i32 %masked 399} 400 401define i32 @clear_highbits32_c2_load(ptr %w, i32 %numhighbits) nounwind { 402; X86-NOBMI2-LABEL: clear_highbits32_c2_load: 403; X86-NOBMI2: # %bb.0: 404; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 405; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 406; X86-NOBMI2-NEXT: movl (%eax), %eax 407; X86-NOBMI2-NEXT: shll %cl, %eax 408; X86-NOBMI2-NEXT: shrl %cl, %eax 409; X86-NOBMI2-NEXT: retl 410; 411; X86-BMI2-LABEL: clear_highbits32_c2_load: 412; X86-BMI2: # %bb.0: 413; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 414; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 415; X86-BMI2-NEXT: movl $32, %edx 416; X86-BMI2-NEXT: subl %ecx, %edx 417; X86-BMI2-NEXT: bzhil %edx, (%eax), %eax 418; X86-BMI2-NEXT: retl 419; 420; X64-NOBMI2-LABEL: clear_highbits32_c2_load: 421; X64-NOBMI2: # %bb.0: 422; X64-NOBMI2-NEXT: movl %esi, %ecx 423; X64-NOBMI2-NEXT: movl (%rdi), %eax 424; X64-NOBMI2-NEXT: shll %cl, %eax 425; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 426; X64-NOBMI2-NEXT: shrl %cl, %eax 427; X64-NOBMI2-NEXT: retq 428; 429; X64-BMI2-LABEL: clear_highbits32_c2_load: 430; X64-BMI2: # %bb.0: 431; X64-BMI2-NEXT: movl $32, %eax 432; X64-BMI2-NEXT: subl %esi, %eax 433; X64-BMI2-NEXT: bzhil %eax, (%rdi), %eax 434; X64-BMI2-NEXT: retq 435 %val = load i32, ptr %w 436 %mask = lshr i32 -1, %numhighbits 437 %masked = and i32 %mask, %val 438 ret i32 %masked 439} 440 441define i32 @clear_highbits32_c3_load_indexzext(ptr %w, i8 %numhighbits) nounwind { 442; X86-NOBMI2-LABEL: clear_highbits32_c3_load_indexzext: 443; X86-NOBMI2: # %bb.0: 444; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 445; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 446; X86-NOBMI2-NEXT: movl (%eax), %eax 447; X86-NOBMI2-NEXT: shll %cl, %eax 448; X86-NOBMI2-NEXT: shrl %cl, %eax 449; X86-NOBMI2-NEXT: retl 450; 451; X86-BMI2-LABEL: clear_highbits32_c3_load_indexzext: 452; X86-BMI2: # %bb.0: 453; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 454; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 455; X86-BMI2-NEXT: movl $32, %edx 456; X86-BMI2-NEXT: subl %ecx, %edx 457; X86-BMI2-NEXT: bzhil %edx, (%eax), %eax 458; X86-BMI2-NEXT: retl 459; 460; X64-NOBMI2-LABEL: clear_highbits32_c3_load_indexzext: 461; X64-NOBMI2: # %bb.0: 462; X64-NOBMI2-NEXT: movl %esi, %ecx 463; X64-NOBMI2-NEXT: movl (%rdi), %eax 464; X64-NOBMI2-NEXT: shll %cl, %eax 465; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 466; X64-NOBMI2-NEXT: shrl %cl, %eax 467; X64-NOBMI2-NEXT: retq 468; 469; X64-BMI2-LABEL: clear_highbits32_c3_load_indexzext: 470; X64-BMI2: # %bb.0: 471; X64-BMI2-NEXT: movl $32, %eax 472; X64-BMI2-NEXT: subl %esi, %eax 473; X64-BMI2-NEXT: bzhil %eax, (%rdi), %eax 474; X64-BMI2-NEXT: retq 475 %val = load i32, ptr %w 476 %sh_prom = zext i8 %numhighbits to i32 477 %mask = lshr i32 -1, %sh_prom 478 %masked = and i32 %mask, %val 479 ret i32 %masked 480} 481 482define i32 @clear_highbits32_c4_commutative(i32 %val, i32 %numhighbits) nounwind { 483; X86-NOBMI2-LABEL: clear_highbits32_c4_commutative: 484; X86-NOBMI2: # %bb.0: 485; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 486; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 487; X86-NOBMI2-NEXT: shll %cl, %eax 488; X86-NOBMI2-NEXT: shrl %cl, %eax 489; X86-NOBMI2-NEXT: retl 490; 491; X86-BMI2-LABEL: clear_highbits32_c4_commutative: 492; X86-BMI2: # %bb.0: 493; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax 494; X86-BMI2-NEXT: movl $32, %ecx 495; X86-BMI2-NEXT: subl %eax, %ecx 496; X86-BMI2-NEXT: bzhil %ecx, {{[0-9]+}}(%esp), %eax 497; X86-BMI2-NEXT: retl 498; 499; X64-NOBMI2-LABEL: clear_highbits32_c4_commutative: 500; X64-NOBMI2: # %bb.0: 501; X64-NOBMI2-NEXT: movl %esi, %ecx 502; X64-NOBMI2-NEXT: movl %edi, %eax 503; X64-NOBMI2-NEXT: shll %cl, %eax 504; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 505; X64-NOBMI2-NEXT: shrl %cl, %eax 506; X64-NOBMI2-NEXT: retq 507; 508; X64-BMI2-LABEL: clear_highbits32_c4_commutative: 509; X64-BMI2: # %bb.0: 510; X64-BMI2-NEXT: movl $32, %eax 511; X64-BMI2-NEXT: subl %esi, %eax 512; X64-BMI2-NEXT: bzhil %eax, %edi, %eax 513; X64-BMI2-NEXT: retq 514 %mask = lshr i32 -1, %numhighbits 515 %masked = and i32 %val, %mask ; swapped order 516 ret i32 %masked 517} 518 519; ---------------------------------------------------------------------------- ; 520; 64-bit 521; ---------------------------------------------------------------------------- ; 522 523define i64 @clear_highbits64_c0(i64 %val, i64 %numhighbits) nounwind { 524; X86-BASELINE-LABEL: clear_highbits64_c0: 525; X86-BASELINE: # %bb.0: 526; X86-BASELINE-NEXT: pushl %esi 527; X86-BASELINE-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 528; X86-BASELINE-NEXT: movl $-1, %eax 529; X86-BASELINE-NEXT: movl $-1, %esi 530; X86-BASELINE-NEXT: shrl %cl, %esi 531; X86-BASELINE-NEXT: xorl %edx, %edx 532; X86-BASELINE-NEXT: testb $32, %cl 533; X86-BASELINE-NEXT: jne .LBB13_1 534; X86-BASELINE-NEXT: # %bb.2: 535; X86-BASELINE-NEXT: movl %esi, %edx 536; X86-BASELINE-NEXT: jmp .LBB13_3 537; X86-BASELINE-NEXT: .LBB13_1: 538; X86-BASELINE-NEXT: movl %esi, %eax 539; X86-BASELINE-NEXT: .LBB13_3: 540; X86-BASELINE-NEXT: andl {{[0-9]+}}(%esp), %eax 541; X86-BASELINE-NEXT: andl {{[0-9]+}}(%esp), %edx 542; X86-BASELINE-NEXT: popl %esi 543; X86-BASELINE-NEXT: retl 544; 545; X86-BMI1-LABEL: clear_highbits64_c0: 546; X86-BMI1: # %bb.0: 547; X86-BMI1-NEXT: pushl %esi 548; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 549; X86-BMI1-NEXT: movl $-1, %esi 550; X86-BMI1-NEXT: movl $-1, %eax 551; X86-BMI1-NEXT: shrl %cl, %eax 552; X86-BMI1-NEXT: xorl %edx, %edx 553; X86-BMI1-NEXT: testb $32, %cl 554; X86-BMI1-NEXT: cmovel %eax, %edx 555; X86-BMI1-NEXT: cmovel %esi, %eax 556; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %eax 557; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %edx 558; X86-BMI1-NEXT: popl %esi 559; X86-BMI1-NEXT: retl 560; 561; X86-BMI2-LABEL: clear_highbits64_c0: 562; X86-BMI2: # %bb.0: 563; X86-BMI2-NEXT: pushl %esi 564; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 565; X86-BMI2-NEXT: movl $-1, %eax 566; X86-BMI2-NEXT: shrxl %ecx, %eax, %esi 567; X86-BMI2-NEXT: xorl %edx, %edx 568; X86-BMI2-NEXT: testb $32, %cl 569; X86-BMI2-NEXT: cmovel %esi, %edx 570; X86-BMI2-NEXT: cmovnel %esi, %eax 571; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax 572; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx 573; X86-BMI2-NEXT: popl %esi 574; X86-BMI2-NEXT: retl 575; 576; X64-NOBMI2-LABEL: clear_highbits64_c0: 577; X64-NOBMI2: # %bb.0: 578; X64-NOBMI2-NEXT: movq %rsi, %rcx 579; X64-NOBMI2-NEXT: movq %rdi, %rax 580; X64-NOBMI2-NEXT: shlq %cl, %rax 581; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx 582; X64-NOBMI2-NEXT: shrq %cl, %rax 583; X64-NOBMI2-NEXT: retq 584; 585; X64-BMI2-LABEL: clear_highbits64_c0: 586; X64-BMI2: # %bb.0: 587; X64-BMI2-NEXT: movl $64, %eax 588; X64-BMI2-NEXT: subl %esi, %eax 589; X64-BMI2-NEXT: bzhiq %rax, %rdi, %rax 590; X64-BMI2-NEXT: retq 591 %mask = lshr i64 -1, %numhighbits 592 %masked = and i64 %mask, %val 593 ret i64 %masked 594} 595 596define i64 @clear_highbits64_c1_indexzext(i64 %val, i8 %numhighbits) nounwind { 597; X86-BASELINE-LABEL: clear_highbits64_c1_indexzext: 598; X86-BASELINE: # %bb.0: 599; X86-BASELINE-NEXT: pushl %esi 600; X86-BASELINE-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 601; X86-BASELINE-NEXT: movl $-1, %eax 602; X86-BASELINE-NEXT: movl $-1, %esi 603; X86-BASELINE-NEXT: shrl %cl, %esi 604; X86-BASELINE-NEXT: xorl %edx, %edx 605; X86-BASELINE-NEXT: testb $32, %cl 606; X86-BASELINE-NEXT: jne .LBB14_1 607; X86-BASELINE-NEXT: # %bb.2: 608; X86-BASELINE-NEXT: movl %esi, %edx 609; X86-BASELINE-NEXT: jmp .LBB14_3 610; X86-BASELINE-NEXT: .LBB14_1: 611; X86-BASELINE-NEXT: movl %esi, %eax 612; X86-BASELINE-NEXT: .LBB14_3: 613; X86-BASELINE-NEXT: andl {{[0-9]+}}(%esp), %eax 614; X86-BASELINE-NEXT: andl {{[0-9]+}}(%esp), %edx 615; X86-BASELINE-NEXT: popl %esi 616; X86-BASELINE-NEXT: retl 617; 618; X86-BMI1-LABEL: clear_highbits64_c1_indexzext: 619; X86-BMI1: # %bb.0: 620; X86-BMI1-NEXT: pushl %esi 621; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 622; X86-BMI1-NEXT: movl $-1, %esi 623; X86-BMI1-NEXT: movl $-1, %eax 624; X86-BMI1-NEXT: shrl %cl, %eax 625; X86-BMI1-NEXT: xorl %edx, %edx 626; X86-BMI1-NEXT: testb $32, %cl 627; X86-BMI1-NEXT: cmovel %eax, %edx 628; X86-BMI1-NEXT: cmovel %esi, %eax 629; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %eax 630; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %edx 631; X86-BMI1-NEXT: popl %esi 632; X86-BMI1-NEXT: retl 633; 634; X86-BMI2-LABEL: clear_highbits64_c1_indexzext: 635; X86-BMI2: # %bb.0: 636; X86-BMI2-NEXT: pushl %esi 637; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 638; X86-BMI2-NEXT: movl $-1, %eax 639; X86-BMI2-NEXT: shrxl %ecx, %eax, %esi 640; X86-BMI2-NEXT: xorl %edx, %edx 641; X86-BMI2-NEXT: testb $32, %cl 642; X86-BMI2-NEXT: cmovel %esi, %edx 643; X86-BMI2-NEXT: cmovnel %esi, %eax 644; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax 645; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx 646; X86-BMI2-NEXT: popl %esi 647; X86-BMI2-NEXT: retl 648; 649; X64-NOBMI2-LABEL: clear_highbits64_c1_indexzext: 650; X64-NOBMI2: # %bb.0: 651; X64-NOBMI2-NEXT: movl %esi, %ecx 652; X64-NOBMI2-NEXT: movq %rdi, %rax 653; X64-NOBMI2-NEXT: shlq %cl, %rax 654; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 655; X64-NOBMI2-NEXT: shrq %cl, %rax 656; X64-NOBMI2-NEXT: retq 657; 658; X64-BMI2-LABEL: clear_highbits64_c1_indexzext: 659; X64-BMI2: # %bb.0: 660; X64-BMI2-NEXT: movl $64, %eax 661; X64-BMI2-NEXT: subl %esi, %eax 662; X64-BMI2-NEXT: bzhiq %rax, %rdi, %rax 663; X64-BMI2-NEXT: retq 664 %sh_prom = zext i8 %numhighbits to i64 665 %mask = lshr i64 -1, %sh_prom 666 %masked = and i64 %mask, %val 667 ret i64 %masked 668} 669 670define i64 @clear_highbits64_c2_load(ptr %w, i64 %numhighbits) nounwind { 671; X86-BASELINE-LABEL: clear_highbits64_c2_load: 672; X86-BASELINE: # %bb.0: 673; X86-BASELINE-NEXT: pushl %edi 674; X86-BASELINE-NEXT: pushl %esi 675; X86-BASELINE-NEXT: movl {{[0-9]+}}(%esp), %esi 676; X86-BASELINE-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 677; X86-BASELINE-NEXT: movl $-1, %eax 678; X86-BASELINE-NEXT: movl $-1, %edi 679; X86-BASELINE-NEXT: shrl %cl, %edi 680; X86-BASELINE-NEXT: xorl %edx, %edx 681; X86-BASELINE-NEXT: testb $32, %cl 682; X86-BASELINE-NEXT: jne .LBB15_1 683; X86-BASELINE-NEXT: # %bb.2: 684; X86-BASELINE-NEXT: movl %edi, %edx 685; X86-BASELINE-NEXT: jmp .LBB15_3 686; X86-BASELINE-NEXT: .LBB15_1: 687; X86-BASELINE-NEXT: movl %edi, %eax 688; X86-BASELINE-NEXT: .LBB15_3: 689; X86-BASELINE-NEXT: andl (%esi), %eax 690; X86-BASELINE-NEXT: andl 4(%esi), %edx 691; X86-BASELINE-NEXT: popl %esi 692; X86-BASELINE-NEXT: popl %edi 693; X86-BASELINE-NEXT: retl 694; 695; X86-BMI1-LABEL: clear_highbits64_c2_load: 696; X86-BMI1: # %bb.0: 697; X86-BMI1-NEXT: pushl %edi 698; X86-BMI1-NEXT: pushl %esi 699; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi 700; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 701; X86-BMI1-NEXT: movl $-1, %edi 702; X86-BMI1-NEXT: movl $-1, %eax 703; X86-BMI1-NEXT: shrl %cl, %eax 704; X86-BMI1-NEXT: xorl %edx, %edx 705; X86-BMI1-NEXT: testb $32, %cl 706; X86-BMI1-NEXT: cmovel %eax, %edx 707; X86-BMI1-NEXT: cmovel %edi, %eax 708; X86-BMI1-NEXT: andl (%esi), %eax 709; X86-BMI1-NEXT: andl 4(%esi), %edx 710; X86-BMI1-NEXT: popl %esi 711; X86-BMI1-NEXT: popl %edi 712; X86-BMI1-NEXT: retl 713; 714; X86-BMI2-LABEL: clear_highbits64_c2_load: 715; X86-BMI2: # %bb.0: 716; X86-BMI2-NEXT: pushl %ebx 717; X86-BMI2-NEXT: pushl %esi 718; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx 719; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx 720; X86-BMI2-NEXT: movl $-1, %eax 721; X86-BMI2-NEXT: shrxl %ebx, %eax, %esi 722; X86-BMI2-NEXT: xorl %edx, %edx 723; X86-BMI2-NEXT: testb $32, %bl 724; X86-BMI2-NEXT: cmovel %esi, %edx 725; X86-BMI2-NEXT: cmovnel %esi, %eax 726; X86-BMI2-NEXT: andl (%ecx), %eax 727; X86-BMI2-NEXT: andl 4(%ecx), %edx 728; X86-BMI2-NEXT: popl %esi 729; X86-BMI2-NEXT: popl %ebx 730; X86-BMI2-NEXT: retl 731; 732; X64-NOBMI2-LABEL: clear_highbits64_c2_load: 733; X64-NOBMI2: # %bb.0: 734; X64-NOBMI2-NEXT: movq %rsi, %rcx 735; X64-NOBMI2-NEXT: movq (%rdi), %rax 736; X64-NOBMI2-NEXT: shlq %cl, %rax 737; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx 738; X64-NOBMI2-NEXT: shrq %cl, %rax 739; X64-NOBMI2-NEXT: retq 740; 741; X64-BMI2-LABEL: clear_highbits64_c2_load: 742; X64-BMI2: # %bb.0: 743; X64-BMI2-NEXT: movl $64, %eax 744; X64-BMI2-NEXT: subl %esi, %eax 745; X64-BMI2-NEXT: bzhiq %rax, (%rdi), %rax 746; X64-BMI2-NEXT: retq 747 %val = load i64, ptr %w 748 %mask = lshr i64 -1, %numhighbits 749 %masked = and i64 %mask, %val 750 ret i64 %masked 751} 752 753define i64 @clear_highbits64_c3_load_indexzext(ptr %w, i8 %numhighbits) nounwind { 754; X86-BASELINE-LABEL: clear_highbits64_c3_load_indexzext: 755; X86-BASELINE: # %bb.0: 756; X86-BASELINE-NEXT: pushl %edi 757; X86-BASELINE-NEXT: pushl %esi 758; X86-BASELINE-NEXT: movl {{[0-9]+}}(%esp), %esi 759; X86-BASELINE-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 760; X86-BASELINE-NEXT: movl $-1, %eax 761; X86-BASELINE-NEXT: movl $-1, %edi 762; X86-BASELINE-NEXT: shrl %cl, %edi 763; X86-BASELINE-NEXT: xorl %edx, %edx 764; X86-BASELINE-NEXT: testb $32, %cl 765; X86-BASELINE-NEXT: jne .LBB16_1 766; X86-BASELINE-NEXT: # %bb.2: 767; X86-BASELINE-NEXT: movl %edi, %edx 768; X86-BASELINE-NEXT: jmp .LBB16_3 769; X86-BASELINE-NEXT: .LBB16_1: 770; X86-BASELINE-NEXT: movl %edi, %eax 771; X86-BASELINE-NEXT: .LBB16_3: 772; X86-BASELINE-NEXT: andl (%esi), %eax 773; X86-BASELINE-NEXT: andl 4(%esi), %edx 774; X86-BASELINE-NEXT: popl %esi 775; X86-BASELINE-NEXT: popl %edi 776; X86-BASELINE-NEXT: retl 777; 778; X86-BMI1-LABEL: clear_highbits64_c3_load_indexzext: 779; X86-BMI1: # %bb.0: 780; X86-BMI1-NEXT: pushl %edi 781; X86-BMI1-NEXT: pushl %esi 782; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi 783; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 784; X86-BMI1-NEXT: movl $-1, %edi 785; X86-BMI1-NEXT: movl $-1, %eax 786; X86-BMI1-NEXT: shrl %cl, %eax 787; X86-BMI1-NEXT: xorl %edx, %edx 788; X86-BMI1-NEXT: testb $32, %cl 789; X86-BMI1-NEXT: cmovel %eax, %edx 790; X86-BMI1-NEXT: cmovel %edi, %eax 791; X86-BMI1-NEXT: andl (%esi), %eax 792; X86-BMI1-NEXT: andl 4(%esi), %edx 793; X86-BMI1-NEXT: popl %esi 794; X86-BMI1-NEXT: popl %edi 795; X86-BMI1-NEXT: retl 796; 797; X86-BMI2-LABEL: clear_highbits64_c3_load_indexzext: 798; X86-BMI2: # %bb.0: 799; X86-BMI2-NEXT: pushl %ebx 800; X86-BMI2-NEXT: pushl %esi 801; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx 802; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx 803; X86-BMI2-NEXT: movl $-1, %eax 804; X86-BMI2-NEXT: shrxl %ebx, %eax, %esi 805; X86-BMI2-NEXT: xorl %edx, %edx 806; X86-BMI2-NEXT: testb $32, %bl 807; X86-BMI2-NEXT: cmovel %esi, %edx 808; X86-BMI2-NEXT: cmovnel %esi, %eax 809; X86-BMI2-NEXT: andl (%ecx), %eax 810; X86-BMI2-NEXT: andl 4(%ecx), %edx 811; X86-BMI2-NEXT: popl %esi 812; X86-BMI2-NEXT: popl %ebx 813; X86-BMI2-NEXT: retl 814; 815; X64-NOBMI2-LABEL: clear_highbits64_c3_load_indexzext: 816; X64-NOBMI2: # %bb.0: 817; X64-NOBMI2-NEXT: movl %esi, %ecx 818; X64-NOBMI2-NEXT: movq (%rdi), %rax 819; X64-NOBMI2-NEXT: shlq %cl, %rax 820; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 821; X64-NOBMI2-NEXT: shrq %cl, %rax 822; X64-NOBMI2-NEXT: retq 823; 824; X64-BMI2-LABEL: clear_highbits64_c3_load_indexzext: 825; X64-BMI2: # %bb.0: 826; X64-BMI2-NEXT: movl $64, %eax 827; X64-BMI2-NEXT: subl %esi, %eax 828; X64-BMI2-NEXT: bzhiq %rax, (%rdi), %rax 829; X64-BMI2-NEXT: retq 830 %val = load i64, ptr %w 831 %sh_prom = zext i8 %numhighbits to i64 832 %mask = lshr i64 -1, %sh_prom 833 %masked = and i64 %mask, %val 834 ret i64 %masked 835} 836 837define i64 @clear_highbits64_c4_commutative(i64 %val, i64 %numhighbits) nounwind { 838; X86-BASELINE-LABEL: clear_highbits64_c4_commutative: 839; X86-BASELINE: # %bb.0: 840; X86-BASELINE-NEXT: pushl %esi 841; X86-BASELINE-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 842; X86-BASELINE-NEXT: movl $-1, %eax 843; X86-BASELINE-NEXT: movl $-1, %esi 844; X86-BASELINE-NEXT: shrl %cl, %esi 845; X86-BASELINE-NEXT: xorl %edx, %edx 846; X86-BASELINE-NEXT: testb $32, %cl 847; X86-BASELINE-NEXT: jne .LBB17_1 848; X86-BASELINE-NEXT: # %bb.2: 849; X86-BASELINE-NEXT: movl %esi, %edx 850; X86-BASELINE-NEXT: jmp .LBB17_3 851; X86-BASELINE-NEXT: .LBB17_1: 852; X86-BASELINE-NEXT: movl %esi, %eax 853; X86-BASELINE-NEXT: .LBB17_3: 854; X86-BASELINE-NEXT: andl {{[0-9]+}}(%esp), %eax 855; X86-BASELINE-NEXT: andl {{[0-9]+}}(%esp), %edx 856; X86-BASELINE-NEXT: popl %esi 857; X86-BASELINE-NEXT: retl 858; 859; X86-BMI1-LABEL: clear_highbits64_c4_commutative: 860; X86-BMI1: # %bb.0: 861; X86-BMI1-NEXT: pushl %esi 862; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 863; X86-BMI1-NEXT: movl $-1, %esi 864; X86-BMI1-NEXT: movl $-1, %eax 865; X86-BMI1-NEXT: shrl %cl, %eax 866; X86-BMI1-NEXT: xorl %edx, %edx 867; X86-BMI1-NEXT: testb $32, %cl 868; X86-BMI1-NEXT: cmovel %eax, %edx 869; X86-BMI1-NEXT: cmovel %esi, %eax 870; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %eax 871; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %edx 872; X86-BMI1-NEXT: popl %esi 873; X86-BMI1-NEXT: retl 874; 875; X86-BMI2-LABEL: clear_highbits64_c4_commutative: 876; X86-BMI2: # %bb.0: 877; X86-BMI2-NEXT: pushl %esi 878; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 879; X86-BMI2-NEXT: movl $-1, %eax 880; X86-BMI2-NEXT: shrxl %ecx, %eax, %esi 881; X86-BMI2-NEXT: xorl %edx, %edx 882; X86-BMI2-NEXT: testb $32, %cl 883; X86-BMI2-NEXT: cmovel %esi, %edx 884; X86-BMI2-NEXT: cmovnel %esi, %eax 885; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax 886; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx 887; X86-BMI2-NEXT: popl %esi 888; X86-BMI2-NEXT: retl 889; 890; X64-NOBMI2-LABEL: clear_highbits64_c4_commutative: 891; X64-NOBMI2: # %bb.0: 892; X64-NOBMI2-NEXT: movq %rsi, %rcx 893; X64-NOBMI2-NEXT: movq %rdi, %rax 894; X64-NOBMI2-NEXT: shlq %cl, %rax 895; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx 896; X64-NOBMI2-NEXT: shrq %cl, %rax 897; X64-NOBMI2-NEXT: retq 898; 899; X64-BMI2-LABEL: clear_highbits64_c4_commutative: 900; X64-BMI2: # %bb.0: 901; X64-BMI2-NEXT: movl $64, %eax 902; X64-BMI2-NEXT: subl %esi, %eax 903; X64-BMI2-NEXT: bzhiq %rax, %rdi, %rax 904; X64-BMI2-NEXT: retq 905 %mask = lshr i64 -1, %numhighbits 906 %masked = and i64 %val, %mask ; swapped order 907 ret i64 %masked 908} 909 910; ---------------------------------------------------------------------------- ; 911; Multi-use tests 912; ---------------------------------------------------------------------------- ; 913 914define i32 @oneuse32_c(i32 %val, i32 %numhighbits, ptr %escape) nounwind { 915; X86-NOBMI2-LABEL: oneuse32_c: 916; X86-NOBMI2: # %bb.0: 917; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx 918; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 919; X86-NOBMI2-NEXT: movl $-1, %eax 920; X86-NOBMI2-NEXT: shrl %cl, %eax 921; X86-NOBMI2-NEXT: movl %eax, (%edx) 922; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %eax 923; X86-NOBMI2-NEXT: retl 924; 925; X86-BMI2-LABEL: oneuse32_c: 926; X86-BMI2: # %bb.0: 927; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx 928; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax 929; X86-BMI2-NEXT: movl $-1, %edx 930; X86-BMI2-NEXT: shrxl %eax, %edx, %eax 931; X86-BMI2-NEXT: movl %eax, (%ecx) 932; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax 933; X86-BMI2-NEXT: retl 934; 935; X64-NOBMI2-LABEL: oneuse32_c: 936; X64-NOBMI2: # %bb.0: 937; X64-NOBMI2-NEXT: movl %esi, %ecx 938; X64-NOBMI2-NEXT: movl $-1, %eax 939; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 940; X64-NOBMI2-NEXT: shrl %cl, %eax 941; X64-NOBMI2-NEXT: movl %eax, (%rdx) 942; X64-NOBMI2-NEXT: andl %edi, %eax 943; X64-NOBMI2-NEXT: retq 944; 945; X64-BMI2-LABEL: oneuse32_c: 946; X64-BMI2: # %bb.0: 947; X64-BMI2-NEXT: movl $-1, %eax 948; X64-BMI2-NEXT: shrxl %esi, %eax, %eax 949; X64-BMI2-NEXT: movl %eax, (%rdx) 950; X64-BMI2-NEXT: andl %edi, %eax 951; X64-BMI2-NEXT: retq 952 %mask = lshr i32 -1, %numhighbits 953 store i32 %mask, ptr %escape 954 %masked = and i32 %mask, %val 955 ret i32 %masked 956} 957 958define i64 @oneuse64_c(i64 %val, i64 %numhighbits, ptr %escape) nounwind { 959; X86-BASELINE-LABEL: oneuse64_c: 960; X86-BASELINE: # %bb.0: 961; X86-BASELINE-NEXT: pushl %esi 962; X86-BASELINE-NEXT: movl {{[0-9]+}}(%esp), %esi 963; X86-BASELINE-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 964; X86-BASELINE-NEXT: movl $-1, %eax 965; X86-BASELINE-NEXT: movl $-1, %edx 966; X86-BASELINE-NEXT: shrl %cl, %edx 967; X86-BASELINE-NEXT: testb $32, %cl 968; X86-BASELINE-NEXT: je .LBB19_2 969; X86-BASELINE-NEXT: # %bb.1: 970; X86-BASELINE-NEXT: movl %edx, %eax 971; X86-BASELINE-NEXT: xorl %edx, %edx 972; X86-BASELINE-NEXT: .LBB19_2: 973; X86-BASELINE-NEXT: movl %edx, 4(%esi) 974; X86-BASELINE-NEXT: movl %eax, (%esi) 975; X86-BASELINE-NEXT: andl {{[0-9]+}}(%esp), %eax 976; X86-BASELINE-NEXT: andl {{[0-9]+}}(%esp), %edx 977; X86-BASELINE-NEXT: popl %esi 978; X86-BASELINE-NEXT: retl 979; 980; X86-BMI1-LABEL: oneuse64_c: 981; X86-BMI1: # %bb.0: 982; X86-BMI1-NEXT: pushl %edi 983; X86-BMI1-NEXT: pushl %esi 984; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi 985; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 986; X86-BMI1-NEXT: movl $-1, %eax 987; X86-BMI1-NEXT: movl $-1, %edi 988; X86-BMI1-NEXT: shrl %cl, %edi 989; X86-BMI1-NEXT: xorl %edx, %edx 990; X86-BMI1-NEXT: testb $32, %cl 991; X86-BMI1-NEXT: cmovnel %edi, %eax 992; X86-BMI1-NEXT: cmovel %edi, %edx 993; X86-BMI1-NEXT: movl %edx, 4(%esi) 994; X86-BMI1-NEXT: movl %eax, (%esi) 995; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %eax 996; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %edx 997; X86-BMI1-NEXT: popl %esi 998; X86-BMI1-NEXT: popl %edi 999; X86-BMI1-NEXT: retl 1000; 1001; X86-BMI2-LABEL: oneuse64_c: 1002; X86-BMI2: # %bb.0: 1003; X86-BMI2-NEXT: pushl %ebx 1004; X86-BMI2-NEXT: pushl %esi 1005; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx 1006; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx 1007; X86-BMI2-NEXT: movl $-1, %eax 1008; X86-BMI2-NEXT: shrxl %ebx, %eax, %esi 1009; X86-BMI2-NEXT: xorl %edx, %edx 1010; X86-BMI2-NEXT: testb $32, %bl 1011; X86-BMI2-NEXT: cmovnel %esi, %eax 1012; X86-BMI2-NEXT: cmovel %esi, %edx 1013; X86-BMI2-NEXT: movl %edx, 4(%ecx) 1014; X86-BMI2-NEXT: movl %eax, (%ecx) 1015; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax 1016; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx 1017; X86-BMI2-NEXT: popl %esi 1018; X86-BMI2-NEXT: popl %ebx 1019; X86-BMI2-NEXT: retl 1020; 1021; X64-NOBMI2-LABEL: oneuse64_c: 1022; X64-NOBMI2: # %bb.0: 1023; X64-NOBMI2-NEXT: movq %rsi, %rcx 1024; X64-NOBMI2-NEXT: movq $-1, %rax 1025; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx 1026; X64-NOBMI2-NEXT: shrq %cl, %rax 1027; X64-NOBMI2-NEXT: movq %rax, (%rdx) 1028; X64-NOBMI2-NEXT: andq %rdi, %rax 1029; X64-NOBMI2-NEXT: retq 1030; 1031; X64-BMI2-LABEL: oneuse64_c: 1032; X64-BMI2: # %bb.0: 1033; X64-BMI2-NEXT: movq $-1, %rax 1034; X64-BMI2-NEXT: shrxq %rsi, %rax, %rax 1035; X64-BMI2-NEXT: movq %rax, (%rdx) 1036; X64-BMI2-NEXT: andq %rdi, %rax 1037; X64-BMI2-NEXT: retq 1038 %mask = lshr i64 -1, %numhighbits 1039 store i64 %mask, ptr %escape 1040 %masked = and i64 %mask, %val 1041 ret i64 %masked 1042} 1043 1044define i32 @oneuse32_d(i32 %val, i32 %numhighbits, ptr %escape) nounwind { 1045; X86-NOBMI2-LABEL: oneuse32_d: 1046; X86-NOBMI2: # %bb.0: 1047; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx 1048; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1049; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 1050; X86-NOBMI2-NEXT: shll %cl, %eax 1051; X86-NOBMI2-NEXT: movl %eax, (%edx) 1052; X86-NOBMI2-NEXT: shrl %cl, %eax 1053; X86-NOBMI2-NEXT: retl 1054; 1055; X86-BMI2-LABEL: oneuse32_d: 1056; X86-BMI2: # %bb.0: 1057; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 1058; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1059; X86-BMI2-NEXT: shlxl %ecx, {{[0-9]+}}(%esp), %edx 1060; X86-BMI2-NEXT: movl %edx, (%eax) 1061; X86-BMI2-NEXT: shrxl %ecx, %edx, %eax 1062; X86-BMI2-NEXT: retl 1063; 1064; X64-NOBMI2-LABEL: oneuse32_d: 1065; X64-NOBMI2: # %bb.0: 1066; X64-NOBMI2-NEXT: movl %esi, %ecx 1067; X64-NOBMI2-NEXT: movl %edi, %eax 1068; X64-NOBMI2-NEXT: shll %cl, %eax 1069; X64-NOBMI2-NEXT: movl %eax, (%rdx) 1070; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx 1071; X64-NOBMI2-NEXT: shrl %cl, %eax 1072; X64-NOBMI2-NEXT: retq 1073; 1074; X64-BMI2-LABEL: oneuse32_d: 1075; X64-BMI2: # %bb.0: 1076; X64-BMI2-NEXT: shlxl %esi, %edi, %eax 1077; X64-BMI2-NEXT: movl %eax, (%rdx) 1078; X64-BMI2-NEXT: shrxl %esi, %eax, %eax 1079; X64-BMI2-NEXT: retq 1080 %sh1 = shl i32 %val, %numhighbits 1081 store i32 %sh1, ptr %escape 1082 %masked = lshr i32 %sh1, %numhighbits 1083 ret i32 %masked 1084} 1085 1086define i64 @oneusei64_d(i64 %val, i64 %numhighbits, ptr %escape) nounwind { 1087; X86-BASELINE-LABEL: oneusei64_d: 1088; X86-BASELINE: # %bb.0: 1089; X86-BASELINE-NEXT: pushl %ebx 1090; X86-BASELINE-NEXT: pushl %edi 1091; X86-BASELINE-NEXT: pushl %esi 1092; X86-BASELINE-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1093; X86-BASELINE-NEXT: movl {{[0-9]+}}(%esp), %edx 1094; X86-BASELINE-NEXT: movl {{[0-9]+}}(%esp), %eax 1095; X86-BASELINE-NEXT: movl %edx, %edi 1096; X86-BASELINE-NEXT: shll %cl, %edi 1097; X86-BASELINE-NEXT: shldl %cl, %edx, %eax 1098; X86-BASELINE-NEXT: testb $32, %cl 1099; X86-BASELINE-NEXT: movl %edi, %esi 1100; X86-BASELINE-NEXT: jne .LBB21_2 1101; X86-BASELINE-NEXT: # %bb.1: 1102; X86-BASELINE-NEXT: movl %eax, %esi 1103; X86-BASELINE-NEXT: .LBB21_2: 1104; X86-BASELINE-NEXT: movl %esi, %eax 1105; X86-BASELINE-NEXT: shrl %cl, %eax 1106; X86-BASELINE-NEXT: xorl %ebx, %ebx 1107; X86-BASELINE-NEXT: testb $32, %cl 1108; X86-BASELINE-NEXT: movl $0, %edx 1109; X86-BASELINE-NEXT: jne .LBB21_4 1110; X86-BASELINE-NEXT: # %bb.3: 1111; X86-BASELINE-NEXT: movl %edi, %ebx 1112; X86-BASELINE-NEXT: movl %eax, %edx 1113; X86-BASELINE-NEXT: .LBB21_4: 1114; X86-BASELINE-NEXT: movl %ebx, %edi 1115; X86-BASELINE-NEXT: shrdl %cl, %esi, %edi 1116; X86-BASELINE-NEXT: testb $32, %cl 1117; X86-BASELINE-NEXT: movl {{[0-9]+}}(%esp), %ecx 1118; X86-BASELINE-NEXT: movl %ebx, (%ecx) 1119; X86-BASELINE-NEXT: movl %esi, 4(%ecx) 1120; X86-BASELINE-NEXT: jne .LBB21_6 1121; X86-BASELINE-NEXT: # %bb.5: 1122; X86-BASELINE-NEXT: movl %edi, %eax 1123; X86-BASELINE-NEXT: .LBB21_6: 1124; X86-BASELINE-NEXT: popl %esi 1125; X86-BASELINE-NEXT: popl %edi 1126; X86-BASELINE-NEXT: popl %ebx 1127; X86-BASELINE-NEXT: retl 1128; 1129; X86-BMI1-LABEL: oneusei64_d: 1130; X86-BMI1: # %bb.0: 1131; X86-BMI1-NEXT: pushl %ebx 1132; X86-BMI1-NEXT: pushl %edi 1133; X86-BMI1-NEXT: pushl %esi 1134; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1135; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx 1136; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi 1137; X86-BMI1-NEXT: movl %edx, %eax 1138; X86-BMI1-NEXT: shll %cl, %eax 1139; X86-BMI1-NEXT: shldl %cl, %edx, %esi 1140; X86-BMI1-NEXT: testb $32, %cl 1141; X86-BMI1-NEXT: cmovnel %eax, %esi 1142; X86-BMI1-NEXT: movl %esi, %edi 1143; X86-BMI1-NEXT: shrl %cl, %edi 1144; X86-BMI1-NEXT: xorl %edx, %edx 1145; X86-BMI1-NEXT: testb $32, %cl 1146; X86-BMI1-NEXT: cmovnel %edx, %eax 1147; X86-BMI1-NEXT: cmovel %edi, %edx 1148; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %ebx 1149; X86-BMI1-NEXT: movl %eax, (%ebx) 1150; X86-BMI1-NEXT: shrdl %cl, %esi, %eax 1151; X86-BMI1-NEXT: testb $32, %cl 1152; X86-BMI1-NEXT: movl %esi, 4(%ebx) 1153; X86-BMI1-NEXT: cmovnel %edi, %eax 1154; X86-BMI1-NEXT: popl %esi 1155; X86-BMI1-NEXT: popl %edi 1156; X86-BMI1-NEXT: popl %ebx 1157; X86-BMI1-NEXT: retl 1158; 1159; X86-BMI2-LABEL: oneusei64_d: 1160; X86-BMI2: # %bb.0: 1161; X86-BMI2-NEXT: pushl %ebx 1162; X86-BMI2-NEXT: pushl %edi 1163; X86-BMI2-NEXT: pushl %esi 1164; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1165; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 1166; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi 1167; X86-BMI2-NEXT: shldl %cl, %eax, %esi 1168; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax 1169; X86-BMI2-NEXT: xorl %edx, %edx 1170; X86-BMI2-NEXT: testb $32, %cl 1171; X86-BMI2-NEXT: cmovnel %eax, %esi 1172; X86-BMI2-NEXT: cmovnel %edx, %eax 1173; X86-BMI2-NEXT: shrxl %ecx, %esi, %edi 1174; X86-BMI2-NEXT: cmovel %edi, %edx 1175; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ebx 1176; X86-BMI2-NEXT: movl %eax, (%ebx) 1177; X86-BMI2-NEXT: shrdl %cl, %esi, %eax 1178; X86-BMI2-NEXT: testb $32, %cl 1179; X86-BMI2-NEXT: movl %esi, 4(%ebx) 1180; X86-BMI2-NEXT: cmovnel %edi, %eax 1181; X86-BMI2-NEXT: popl %esi 1182; X86-BMI2-NEXT: popl %edi 1183; X86-BMI2-NEXT: popl %ebx 1184; X86-BMI2-NEXT: retl 1185; 1186; X64-NOBMI2-LABEL: oneusei64_d: 1187; X64-NOBMI2: # %bb.0: 1188; X64-NOBMI2-NEXT: movq %rsi, %rcx 1189; X64-NOBMI2-NEXT: movq %rdi, %rax 1190; X64-NOBMI2-NEXT: shlq %cl, %rax 1191; X64-NOBMI2-NEXT: movq %rax, (%rdx) 1192; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx 1193; X64-NOBMI2-NEXT: shrq %cl, %rax 1194; X64-NOBMI2-NEXT: retq 1195; 1196; X64-BMI2-LABEL: oneusei64_d: 1197; X64-BMI2: # %bb.0: 1198; X64-BMI2-NEXT: shlxq %rsi, %rdi, %rax 1199; X64-BMI2-NEXT: movq %rax, (%rdx) 1200; X64-BMI2-NEXT: shrxq %rsi, %rax, %rax 1201; X64-BMI2-NEXT: retq 1202 %sh1 = shl i64 %val, %numhighbits 1203 store i64 %sh1, ptr %escape 1204 %masked = lshr i64 %sh1, %numhighbits 1205 ret i64 %masked 1206} 1207 1208; ---------------------------------------------------------------------------- ; 1209; Misc. 1210; 1211; Variation of pattern 1212; c) x & (-1 >> (C - y)) 1213; but with C != bitwidth(x) 1214; ---------------------------------------------------------------------------- ; 1215 1216define i32 @clear_highbits32_16(i32 %val, i32 %numlowbits) nounwind { 1217; X86-NOBMI2-LABEL: clear_highbits32_16: 1218; X86-NOBMI2: # %bb.0: 1219; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 1220; X86-NOBMI2-NEXT: movb $16, %cl 1221; X86-NOBMI2-NEXT: subb {{[0-9]+}}(%esp), %cl 1222; X86-NOBMI2-NEXT: shll %cl, %eax 1223; X86-NOBMI2-NEXT: shrl %cl, %eax 1224; X86-NOBMI2-NEXT: retl 1225; 1226; X86-BMI2-LABEL: clear_highbits32_16: 1227; X86-BMI2: # %bb.0: 1228; X86-BMI2-NEXT: movb $16, %al 1229; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %al 1230; X86-BMI2-NEXT: movl $32, %ecx 1231; X86-BMI2-NEXT: subl %eax, %ecx 1232; X86-BMI2-NEXT: bzhil %ecx, {{[0-9]+}}(%esp), %eax 1233; X86-BMI2-NEXT: retl 1234; 1235; X64-NOBMI2-LABEL: clear_highbits32_16: 1236; X64-NOBMI2: # %bb.0: 1237; X64-NOBMI2-NEXT: movl %edi, %eax 1238; X64-NOBMI2-NEXT: movb $16, %cl 1239; X64-NOBMI2-NEXT: subb %sil, %cl 1240; X64-NOBMI2-NEXT: shll %cl, %eax 1241; X64-NOBMI2-NEXT: shrl %cl, %eax 1242; X64-NOBMI2-NEXT: retq 1243; 1244; X64-BMI2-LABEL: clear_highbits32_16: 1245; X64-BMI2: # %bb.0: 1246; X64-BMI2-NEXT: movb $16, %al 1247; X64-BMI2-NEXT: subb %sil, %al 1248; X64-BMI2-NEXT: movl $32, %ecx 1249; X64-BMI2-NEXT: subl %eax, %ecx 1250; X64-BMI2-NEXT: bzhil %ecx, %edi, %eax 1251; X64-BMI2-NEXT: retq 1252 %numhighbits = sub i32 16, %numlowbits 1253 %mask = lshr i32 -1, %numhighbits 1254 %masked = and i32 %mask, %val 1255 ret i32 %masked 1256} 1257define i32 @clear_highbits32_48(i32 %val, i32 %numlowbits) nounwind { 1258; X86-NOBMI2-LABEL: clear_highbits32_48: 1259; X86-NOBMI2: # %bb.0: 1260; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 1261; X86-NOBMI2-NEXT: movb $48, %cl 1262; X86-NOBMI2-NEXT: subb {{[0-9]+}}(%esp), %cl 1263; X86-NOBMI2-NEXT: shll %cl, %eax 1264; X86-NOBMI2-NEXT: shrl %cl, %eax 1265; X86-NOBMI2-NEXT: retl 1266; 1267; X86-BMI2-LABEL: clear_highbits32_48: 1268; X86-BMI2: # %bb.0: 1269; X86-BMI2-NEXT: movb $48, %al 1270; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %al 1271; X86-BMI2-NEXT: movl $32, %ecx 1272; X86-BMI2-NEXT: subl %eax, %ecx 1273; X86-BMI2-NEXT: bzhil %ecx, {{[0-9]+}}(%esp), %eax 1274; X86-BMI2-NEXT: retl 1275; 1276; X64-NOBMI2-LABEL: clear_highbits32_48: 1277; X64-NOBMI2: # %bb.0: 1278; X64-NOBMI2-NEXT: movl %edi, %eax 1279; X64-NOBMI2-NEXT: movb $48, %cl 1280; X64-NOBMI2-NEXT: subb %sil, %cl 1281; X64-NOBMI2-NEXT: shll %cl, %eax 1282; X64-NOBMI2-NEXT: shrl %cl, %eax 1283; X64-NOBMI2-NEXT: retq 1284; 1285; X64-BMI2-LABEL: clear_highbits32_48: 1286; X64-BMI2: # %bb.0: 1287; X64-BMI2-NEXT: movb $48, %al 1288; X64-BMI2-NEXT: subb %sil, %al 1289; X64-BMI2-NEXT: movl $32, %ecx 1290; X64-BMI2-NEXT: subl %eax, %ecx 1291; X64-BMI2-NEXT: bzhil %ecx, %edi, %eax 1292; X64-BMI2-NEXT: retq 1293 %numhighbits = sub i32 48, %numlowbits 1294 %mask = lshr i32 -1, %numhighbits 1295 %masked = and i32 %mask, %val 1296 ret i32 %masked 1297} 1298 1299define i32 @clear_highbits32_16_extrause(i32 %val, i32 %numlowbits, ptr %escape) nounwind { 1300; X86-NOBMI2-LABEL: clear_highbits32_16_extrause: 1301; X86-NOBMI2: # %bb.0: 1302; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx 1303; X86-NOBMI2-NEXT: movb $16, %cl 1304; X86-NOBMI2-NEXT: subb {{[0-9]+}}(%esp), %cl 1305; X86-NOBMI2-NEXT: movl $-1, %eax 1306; X86-NOBMI2-NEXT: shrl %cl, %eax 1307; X86-NOBMI2-NEXT: movl %eax, (%edx) 1308; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %eax 1309; X86-NOBMI2-NEXT: retl 1310; 1311; X86-BMI2-LABEL: clear_highbits32_16_extrause: 1312; X86-BMI2: # %bb.0: 1313; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx 1314; X86-BMI2-NEXT: movb $16, %al 1315; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %al 1316; X86-BMI2-NEXT: movl $-1, %edx 1317; X86-BMI2-NEXT: shrxl %eax, %edx, %eax 1318; X86-BMI2-NEXT: movl %eax, (%ecx) 1319; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax 1320; X86-BMI2-NEXT: retl 1321; 1322; X64-NOBMI2-LABEL: clear_highbits32_16_extrause: 1323; X64-NOBMI2: # %bb.0: 1324; X64-NOBMI2-NEXT: movb $16, %cl 1325; X64-NOBMI2-NEXT: subb %sil, %cl 1326; X64-NOBMI2-NEXT: movl $-1, %eax 1327; X64-NOBMI2-NEXT: shrl %cl, %eax 1328; X64-NOBMI2-NEXT: movl %eax, (%rdx) 1329; X64-NOBMI2-NEXT: andl %edi, %eax 1330; X64-NOBMI2-NEXT: retq 1331; 1332; X64-BMI2-LABEL: clear_highbits32_16_extrause: 1333; X64-BMI2: # %bb.0: 1334; X64-BMI2-NEXT: movb $16, %al 1335; X64-BMI2-NEXT: subb %sil, %al 1336; X64-BMI2-NEXT: movl $-1, %ecx 1337; X64-BMI2-NEXT: shrxl %eax, %ecx, %eax 1338; X64-BMI2-NEXT: movl %eax, (%rdx) 1339; X64-BMI2-NEXT: andl %edi, %eax 1340; X64-BMI2-NEXT: retq 1341 %numhighbits = sub i32 16, %numlowbits 1342 %mask = lshr i32 -1, %numhighbits 1343 store i32 %mask, ptr %escape 1344 %masked = and i32 %mask, %val 1345 ret i32 %masked 1346} 1347define i32 @clear_highbits32_48_extrause(i32 %val, i32 %numlowbits, ptr %escape) nounwind { 1348; X86-NOBMI2-LABEL: clear_highbits32_48_extrause: 1349; X86-NOBMI2: # %bb.0: 1350; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx 1351; X86-NOBMI2-NEXT: movb $48, %cl 1352; X86-NOBMI2-NEXT: subb {{[0-9]+}}(%esp), %cl 1353; X86-NOBMI2-NEXT: movl $-1, %eax 1354; X86-NOBMI2-NEXT: shrl %cl, %eax 1355; X86-NOBMI2-NEXT: movl %eax, (%edx) 1356; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %eax 1357; X86-NOBMI2-NEXT: retl 1358; 1359; X86-BMI2-LABEL: clear_highbits32_48_extrause: 1360; X86-BMI2: # %bb.0: 1361; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx 1362; X86-BMI2-NEXT: movb $48, %al 1363; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %al 1364; X86-BMI2-NEXT: movl $-1, %edx 1365; X86-BMI2-NEXT: shrxl %eax, %edx, %eax 1366; X86-BMI2-NEXT: movl %eax, (%ecx) 1367; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax 1368; X86-BMI2-NEXT: retl 1369; 1370; X64-NOBMI2-LABEL: clear_highbits32_48_extrause: 1371; X64-NOBMI2: # %bb.0: 1372; X64-NOBMI2-NEXT: movb $48, %cl 1373; X64-NOBMI2-NEXT: subb %sil, %cl 1374; X64-NOBMI2-NEXT: movl $-1, %eax 1375; X64-NOBMI2-NEXT: shrl %cl, %eax 1376; X64-NOBMI2-NEXT: movl %eax, (%rdx) 1377; X64-NOBMI2-NEXT: andl %edi, %eax 1378; X64-NOBMI2-NEXT: retq 1379; 1380; X64-BMI2-LABEL: clear_highbits32_48_extrause: 1381; X64-BMI2: # %bb.0: 1382; X64-BMI2-NEXT: movb $48, %al 1383; X64-BMI2-NEXT: subb %sil, %al 1384; X64-BMI2-NEXT: movl $-1, %ecx 1385; X64-BMI2-NEXT: shrxl %eax, %ecx, %eax 1386; X64-BMI2-NEXT: movl %eax, (%rdx) 1387; X64-BMI2-NEXT: andl %edi, %eax 1388; X64-BMI2-NEXT: retq 1389 %numhighbits = sub i32 48, %numlowbits 1390 %mask = lshr i32 -1, %numhighbits 1391 store i32 %mask, ptr %escape 1392 %masked = and i32 %mask, %val 1393 ret i32 %masked 1394} 1395