1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X86,X86-NOBMI 3; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X86,X86-BMI1 4; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X86,X86-BMI1 5; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2 < %s | FileCheck %s --check-prefixes=X86,X86-BMI2 6; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2 < %s | FileCheck %s --check-prefixes=X86,X86-BMI2 7; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=-bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X64,X64-NOBMI 8; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X64,X64-BMI1,X64-BMI1NOTBM 9; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,-bmi2 < %s | FileCheck %s --check-prefixes=X64,X64-BMI1,X64-BMI1TBM 10; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,+tbm,+bmi2 < %s | FileCheck %s --check-prefixes=X64,X64-BMI2,X64-BMI2TBM 11; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi,-tbm,+bmi2 < %s | FileCheck %s --check-prefixes=X64,X64-BMI2,X64-BMI2NOTBM 12 13; *Please* keep in sync with test/CodeGen/AArch64/extract-lowbits.ll 14 15; https://bugs.llvm.org/show_bug.cgi?id=36419 16; https://bugs.llvm.org/show_bug.cgi?id=37603 17; https://bugs.llvm.org/show_bug.cgi?id=37610 18 19; Patterns: 20; a) x & (1 << nbits) - 1 21; b) x & ~(-1 << nbits) 22; c) x & (-1 >> (32 - y)) 23; d) x << (32 - y) >> (32 - y) 24; are equivalent. 25 26; ---------------------------------------------------------------------------- ; 27; Pattern a. 32-bit 28; ---------------------------------------------------------------------------- ; 29 30define i32 @bzhi32_a0(i32 %val, i32 %numlowbits) nounwind { 31; X86-NOBMI-LABEL: bzhi32_a0: 32; X86-NOBMI: # %bb.0: 33; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 34; X86-NOBMI-NEXT: movl $1, %eax 35; X86-NOBMI-NEXT: shll %cl, %eax 36; X86-NOBMI-NEXT: decl %eax 37; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 38; X86-NOBMI-NEXT: retl 39; 40; X86-BMI1-LABEL: bzhi32_a0: 41; X86-BMI1: # %bb.0: 42; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax 43; X86-BMI1-NEXT: shll $8, %eax 44; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax 45; X86-BMI1-NEXT: retl 46; 47; X86-BMI2-LABEL: bzhi32_a0: 48; X86-BMI2: # %bb.0: 49; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax 50; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax 51; X86-BMI2-NEXT: retl 52; 53; X64-NOBMI-LABEL: bzhi32_a0: 54; X64-NOBMI: # %bb.0: 55; X64-NOBMI-NEXT: movl %esi, %ecx 56; X64-NOBMI-NEXT: movl $1, %eax 57; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 58; X64-NOBMI-NEXT: shll %cl, %eax 59; X64-NOBMI-NEXT: decl %eax 60; X64-NOBMI-NEXT: andl %edi, %eax 61; X64-NOBMI-NEXT: retq 62; 63; X64-BMI1-LABEL: bzhi32_a0: 64; X64-BMI1: # %bb.0: 65; X64-BMI1-NEXT: shll $8, %esi 66; X64-BMI1-NEXT: bextrl %esi, %edi, %eax 67; X64-BMI1-NEXT: retq 68; 69; X64-BMI2-LABEL: bzhi32_a0: 70; X64-BMI2: # %bb.0: 71; X64-BMI2-NEXT: bzhil %esi, %edi, %eax 72; X64-BMI2-NEXT: retq 73 %onebit = shl i32 1, %numlowbits 74 %mask = add nsw i32 %onebit, -1 75 %masked = and i32 %mask, %val 76 ret i32 %masked 77} 78 79define i32 @bzhi32_a1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind { 80; X86-NOBMI-LABEL: bzhi32_a1_indexzext: 81; X86-NOBMI: # %bb.0: 82; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 83; X86-NOBMI-NEXT: movl $1, %eax 84; X86-NOBMI-NEXT: shll %cl, %eax 85; X86-NOBMI-NEXT: decl %eax 86; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 87; X86-NOBMI-NEXT: retl 88; 89; X86-BMI1-LABEL: bzhi32_a1_indexzext: 90; X86-BMI1: # %bb.0: 91; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax 92; X86-BMI1-NEXT: shll $8, %eax 93; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax 94; X86-BMI1-NEXT: retl 95; 96; X86-BMI2-LABEL: bzhi32_a1_indexzext: 97; X86-BMI2: # %bb.0: 98; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax 99; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax 100; X86-BMI2-NEXT: retl 101; 102; X64-NOBMI-LABEL: bzhi32_a1_indexzext: 103; X64-NOBMI: # %bb.0: 104; X64-NOBMI-NEXT: movl %esi, %ecx 105; X64-NOBMI-NEXT: movl $1, %eax 106; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 107; X64-NOBMI-NEXT: shll %cl, %eax 108; X64-NOBMI-NEXT: decl %eax 109; X64-NOBMI-NEXT: andl %edi, %eax 110; X64-NOBMI-NEXT: retq 111; 112; X64-BMI1-LABEL: bzhi32_a1_indexzext: 113; X64-BMI1: # %bb.0: 114; X64-BMI1-NEXT: shll $8, %esi 115; X64-BMI1-NEXT: bextrl %esi, %edi, %eax 116; X64-BMI1-NEXT: retq 117; 118; X64-BMI2-LABEL: bzhi32_a1_indexzext: 119; X64-BMI2: # %bb.0: 120; X64-BMI2-NEXT: bzhil %esi, %edi, %eax 121; X64-BMI2-NEXT: retq 122 %conv = zext i8 %numlowbits to i32 123 %onebit = shl i32 1, %conv 124 %mask = add nsw i32 %onebit, -1 125 %masked = and i32 %mask, %val 126 ret i32 %masked 127} 128 129define i32 @bzhi32_a2_load(ptr %w, i32 %numlowbits) nounwind { 130; X86-NOBMI-LABEL: bzhi32_a2_load: 131; X86-NOBMI: # %bb.0: 132; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx 133; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 134; X86-NOBMI-NEXT: movl $1, %eax 135; X86-NOBMI-NEXT: shll %cl, %eax 136; X86-NOBMI-NEXT: decl %eax 137; X86-NOBMI-NEXT: andl (%edx), %eax 138; X86-NOBMI-NEXT: retl 139; 140; X86-BMI1-LABEL: bzhi32_a2_load: 141; X86-BMI1: # %bb.0: 142; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax 143; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 144; X86-BMI1-NEXT: shll $8, %ecx 145; X86-BMI1-NEXT: bextrl %ecx, (%eax), %eax 146; X86-BMI1-NEXT: retl 147; 148; X86-BMI2-LABEL: bzhi32_a2_load: 149; X86-BMI2: # %bb.0: 150; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 151; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 152; X86-BMI2-NEXT: bzhil %ecx, (%eax), %eax 153; X86-BMI2-NEXT: retl 154; 155; X64-NOBMI-LABEL: bzhi32_a2_load: 156; X64-NOBMI: # %bb.0: 157; X64-NOBMI-NEXT: movl %esi, %ecx 158; X64-NOBMI-NEXT: movl $1, %eax 159; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 160; X64-NOBMI-NEXT: shll %cl, %eax 161; X64-NOBMI-NEXT: decl %eax 162; X64-NOBMI-NEXT: andl (%rdi), %eax 163; X64-NOBMI-NEXT: retq 164; 165; X64-BMI1-LABEL: bzhi32_a2_load: 166; X64-BMI1: # %bb.0: 167; X64-BMI1-NEXT: shll $8, %esi 168; X64-BMI1-NEXT: bextrl %esi, (%rdi), %eax 169; X64-BMI1-NEXT: retq 170; 171; X64-BMI2-LABEL: bzhi32_a2_load: 172; X64-BMI2: # %bb.0: 173; X64-BMI2-NEXT: bzhil %esi, (%rdi), %eax 174; X64-BMI2-NEXT: retq 175 %val = load i32, ptr %w 176 %onebit = shl i32 1, %numlowbits 177 %mask = add nsw i32 %onebit, -1 178 %masked = and i32 %mask, %val 179 ret i32 %masked 180} 181 182define i32 @bzhi32_a3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind { 183; X86-NOBMI-LABEL: bzhi32_a3_load_indexzext: 184; X86-NOBMI: # %bb.0: 185; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx 186; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 187; X86-NOBMI-NEXT: movl $1, %eax 188; X86-NOBMI-NEXT: shll %cl, %eax 189; X86-NOBMI-NEXT: decl %eax 190; X86-NOBMI-NEXT: andl (%edx), %eax 191; X86-NOBMI-NEXT: retl 192; 193; X86-BMI1-LABEL: bzhi32_a3_load_indexzext: 194; X86-BMI1: # %bb.0: 195; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax 196; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 197; X86-BMI1-NEXT: shll $8, %ecx 198; X86-BMI1-NEXT: bextrl %ecx, (%eax), %eax 199; X86-BMI1-NEXT: retl 200; 201; X86-BMI2-LABEL: bzhi32_a3_load_indexzext: 202; X86-BMI2: # %bb.0: 203; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 204; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 205; X86-BMI2-NEXT: bzhil %ecx, (%eax), %eax 206; X86-BMI2-NEXT: retl 207; 208; X64-NOBMI-LABEL: bzhi32_a3_load_indexzext: 209; X64-NOBMI: # %bb.0: 210; X64-NOBMI-NEXT: movl %esi, %ecx 211; X64-NOBMI-NEXT: movl $1, %eax 212; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 213; X64-NOBMI-NEXT: shll %cl, %eax 214; X64-NOBMI-NEXT: decl %eax 215; X64-NOBMI-NEXT: andl (%rdi), %eax 216; X64-NOBMI-NEXT: retq 217; 218; X64-BMI1-LABEL: bzhi32_a3_load_indexzext: 219; X64-BMI1: # %bb.0: 220; X64-BMI1-NEXT: shll $8, %esi 221; X64-BMI1-NEXT: bextrl %esi, (%rdi), %eax 222; X64-BMI1-NEXT: retq 223; 224; X64-BMI2-LABEL: bzhi32_a3_load_indexzext: 225; X64-BMI2: # %bb.0: 226; X64-BMI2-NEXT: bzhil %esi, (%rdi), %eax 227; X64-BMI2-NEXT: retq 228 %val = load i32, ptr %w 229 %conv = zext i8 %numlowbits to i32 230 %onebit = shl i32 1, %conv 231 %mask = add nsw i32 %onebit, -1 232 %masked = and i32 %mask, %val 233 ret i32 %masked 234} 235 236define i32 @bzhi32_a4_commutative(i32 %val, i32 %numlowbits) nounwind { 237; X86-NOBMI-LABEL: bzhi32_a4_commutative: 238; X86-NOBMI: # %bb.0: 239; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 240; X86-NOBMI-NEXT: movl $1, %eax 241; X86-NOBMI-NEXT: shll %cl, %eax 242; X86-NOBMI-NEXT: decl %eax 243; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 244; X86-NOBMI-NEXT: retl 245; 246; X86-BMI1-LABEL: bzhi32_a4_commutative: 247; X86-BMI1: # %bb.0: 248; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax 249; X86-BMI1-NEXT: shll $8, %eax 250; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax 251; X86-BMI1-NEXT: retl 252; 253; X86-BMI2-LABEL: bzhi32_a4_commutative: 254; X86-BMI2: # %bb.0: 255; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax 256; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax 257; X86-BMI2-NEXT: retl 258; 259; X64-NOBMI-LABEL: bzhi32_a4_commutative: 260; X64-NOBMI: # %bb.0: 261; X64-NOBMI-NEXT: movl %esi, %ecx 262; X64-NOBMI-NEXT: movl $1, %eax 263; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 264; X64-NOBMI-NEXT: shll %cl, %eax 265; X64-NOBMI-NEXT: decl %eax 266; X64-NOBMI-NEXT: andl %edi, %eax 267; X64-NOBMI-NEXT: retq 268; 269; X64-BMI1-LABEL: bzhi32_a4_commutative: 270; X64-BMI1: # %bb.0: 271; X64-BMI1-NEXT: shll $8, %esi 272; X64-BMI1-NEXT: bextrl %esi, %edi, %eax 273; X64-BMI1-NEXT: retq 274; 275; X64-BMI2-LABEL: bzhi32_a4_commutative: 276; X64-BMI2: # %bb.0: 277; X64-BMI2-NEXT: bzhil %esi, %edi, %eax 278; X64-BMI2-NEXT: retq 279 %onebit = shl i32 1, %numlowbits 280 %mask = add nsw i32 %onebit, -1 281 %masked = and i32 %val, %mask ; swapped order 282 ret i32 %masked 283} 284 285; 64-bit 286 287define i64 @bzhi64_a0(i64 %val, i64 %numlowbits) nounwind { 288; X86-NOBMI-LABEL: bzhi64_a0: 289; X86-NOBMI: # %bb.0: 290; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 291; X86-NOBMI-NEXT: movl $1, %eax 292; X86-NOBMI-NEXT: xorl %edx, %edx 293; X86-NOBMI-NEXT: shldl %cl, %eax, %edx 294; X86-NOBMI-NEXT: shll %cl, %eax 295; X86-NOBMI-NEXT: testb $32, %cl 296; X86-NOBMI-NEXT: je .LBB5_2 297; X86-NOBMI-NEXT: # %bb.1: 298; X86-NOBMI-NEXT: movl %eax, %edx 299; X86-NOBMI-NEXT: xorl %eax, %eax 300; X86-NOBMI-NEXT: .LBB5_2: 301; X86-NOBMI-NEXT: addl $-1, %eax 302; X86-NOBMI-NEXT: adcl $-1, %edx 303; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx 304; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 305; X86-NOBMI-NEXT: retl 306; 307; X86-BMI1-LABEL: bzhi64_a0: 308; X86-BMI1: # %bb.0: 309; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 310; X86-BMI1-NEXT: movl $1, %eax 311; X86-BMI1-NEXT: xorl %edx, %edx 312; X86-BMI1-NEXT: shldl %cl, %eax, %edx 313; X86-BMI1-NEXT: shll %cl, %eax 314; X86-BMI1-NEXT: testb $32, %cl 315; X86-BMI1-NEXT: je .LBB5_2 316; X86-BMI1-NEXT: # %bb.1: 317; X86-BMI1-NEXT: movl %eax, %edx 318; X86-BMI1-NEXT: xorl %eax, %eax 319; X86-BMI1-NEXT: .LBB5_2: 320; X86-BMI1-NEXT: addl $-1, %eax 321; X86-BMI1-NEXT: adcl $-1, %edx 322; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %edx 323; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %eax 324; X86-BMI1-NEXT: retl 325; 326; X86-BMI2-LABEL: bzhi64_a0: 327; X86-BMI2: # %bb.0: 328; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 329; X86-BMI2-NEXT: movl $1, %eax 330; X86-BMI2-NEXT: xorl %edx, %edx 331; X86-BMI2-NEXT: shldl %cl, %eax, %edx 332; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax 333; X86-BMI2-NEXT: testb $32, %cl 334; X86-BMI2-NEXT: je .LBB5_2 335; X86-BMI2-NEXT: # %bb.1: 336; X86-BMI2-NEXT: movl %eax, %edx 337; X86-BMI2-NEXT: xorl %eax, %eax 338; X86-BMI2-NEXT: .LBB5_2: 339; X86-BMI2-NEXT: addl $-1, %eax 340; X86-BMI2-NEXT: adcl $-1, %edx 341; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx 342; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax 343; X86-BMI2-NEXT: retl 344; 345; X64-NOBMI-LABEL: bzhi64_a0: 346; X64-NOBMI: # %bb.0: 347; X64-NOBMI-NEXT: movq %rsi, %rcx 348; X64-NOBMI-NEXT: movl $1, %eax 349; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx 350; X64-NOBMI-NEXT: shlq %cl, %rax 351; X64-NOBMI-NEXT: decq %rax 352; X64-NOBMI-NEXT: andq %rdi, %rax 353; X64-NOBMI-NEXT: retq 354; 355; X64-BMI1-LABEL: bzhi64_a0: 356; X64-BMI1: # %bb.0: 357; X64-BMI1-NEXT: shll $8, %esi 358; X64-BMI1-NEXT: bextrq %rsi, %rdi, %rax 359; X64-BMI1-NEXT: retq 360; 361; X64-BMI2-LABEL: bzhi64_a0: 362; X64-BMI2: # %bb.0: 363; X64-BMI2-NEXT: bzhiq %rsi, %rdi, %rax 364; X64-BMI2-NEXT: retq 365 %onebit = shl i64 1, %numlowbits 366 %mask = add nsw i64 %onebit, -1 367 %masked = and i64 %mask, %val 368 ret i64 %masked 369} 370 371; Check that we don't throw away the vreg_width-1 mask if not using shifts 372define i64 @bzhi64_a0_masked(i64 %val, i64 %numlowbits) nounwind { 373; X86-NOBMI-LABEL: bzhi64_a0_masked: 374; X86-NOBMI: # %bb.0: 375; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 376; X86-NOBMI-NEXT: movl $1, %eax 377; X86-NOBMI-NEXT: xorl %edx, %edx 378; X86-NOBMI-NEXT: shldl %cl, %eax, %edx 379; X86-NOBMI-NEXT: shll %cl, %eax 380; X86-NOBMI-NEXT: testb $32, %cl 381; X86-NOBMI-NEXT: je .LBB6_2 382; X86-NOBMI-NEXT: # %bb.1: 383; X86-NOBMI-NEXT: movl %eax, %edx 384; X86-NOBMI-NEXT: xorl %eax, %eax 385; X86-NOBMI-NEXT: .LBB6_2: 386; X86-NOBMI-NEXT: addl $-1, %eax 387; X86-NOBMI-NEXT: adcl $-1, %edx 388; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx 389; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 390; X86-NOBMI-NEXT: retl 391; 392; X86-BMI1-LABEL: bzhi64_a0_masked: 393; X86-BMI1: # %bb.0: 394; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 395; X86-BMI1-NEXT: movl $1, %eax 396; X86-BMI1-NEXT: xorl %edx, %edx 397; X86-BMI1-NEXT: shldl %cl, %eax, %edx 398; X86-BMI1-NEXT: shll %cl, %eax 399; X86-BMI1-NEXT: testb $32, %cl 400; X86-BMI1-NEXT: je .LBB6_2 401; X86-BMI1-NEXT: # %bb.1: 402; X86-BMI1-NEXT: movl %eax, %edx 403; X86-BMI1-NEXT: xorl %eax, %eax 404; X86-BMI1-NEXT: .LBB6_2: 405; X86-BMI1-NEXT: addl $-1, %eax 406; X86-BMI1-NEXT: adcl $-1, %edx 407; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %edx 408; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %eax 409; X86-BMI1-NEXT: retl 410; 411; X86-BMI2-LABEL: bzhi64_a0_masked: 412; X86-BMI2: # %bb.0: 413; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 414; X86-BMI2-NEXT: movl $1, %eax 415; X86-BMI2-NEXT: xorl %edx, %edx 416; X86-BMI2-NEXT: shldl %cl, %eax, %edx 417; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax 418; X86-BMI2-NEXT: testb $32, %cl 419; X86-BMI2-NEXT: je .LBB6_2 420; X86-BMI2-NEXT: # %bb.1: 421; X86-BMI2-NEXT: movl %eax, %edx 422; X86-BMI2-NEXT: xorl %eax, %eax 423; X86-BMI2-NEXT: .LBB6_2: 424; X86-BMI2-NEXT: addl $-1, %eax 425; X86-BMI2-NEXT: adcl $-1, %edx 426; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx 427; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax 428; X86-BMI2-NEXT: retl 429; 430; X64-NOBMI-LABEL: bzhi64_a0_masked: 431; X64-NOBMI: # %bb.0: 432; X64-NOBMI-NEXT: movq %rsi, %rcx 433; X64-NOBMI-NEXT: movl $1, %eax 434; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx 435; X64-NOBMI-NEXT: shlq %cl, %rax 436; X64-NOBMI-NEXT: decq %rax 437; X64-NOBMI-NEXT: andq %rdi, %rax 438; X64-NOBMI-NEXT: retq 439; 440; X64-BMI1-LABEL: bzhi64_a0_masked: 441; X64-BMI1: # %bb.0: 442; X64-BMI1-NEXT: andb $63, %sil 443; X64-BMI1-NEXT: shll $8, %esi 444; X64-BMI1-NEXT: bextrq %rsi, %rdi, %rax 445; X64-BMI1-NEXT: retq 446; 447; X64-BMI2-LABEL: bzhi64_a0_masked: 448; X64-BMI2: # %bb.0: 449; X64-BMI2-NEXT: andb $63, %sil 450; X64-BMI2-NEXT: bzhiq %rsi, %rdi, %rax 451; X64-BMI2-NEXT: retq 452 %numlowbits.masked = and i64 %numlowbits, 63 453 %onebit = shl i64 1, %numlowbits.masked 454 %mask = add nsw i64 %onebit, -1 455 %masked = and i64 %mask, %val 456 ret i64 %masked 457} 458 459define i64 @bzhi64_a1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind { 460; X86-NOBMI-LABEL: bzhi64_a1_indexzext: 461; X86-NOBMI: # %bb.0: 462; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 463; X86-NOBMI-NEXT: movl $1, %eax 464; X86-NOBMI-NEXT: xorl %edx, %edx 465; X86-NOBMI-NEXT: shldl %cl, %eax, %edx 466; X86-NOBMI-NEXT: shll %cl, %eax 467; X86-NOBMI-NEXT: testb $32, %cl 468; X86-NOBMI-NEXT: je .LBB7_2 469; X86-NOBMI-NEXT: # %bb.1: 470; X86-NOBMI-NEXT: movl %eax, %edx 471; X86-NOBMI-NEXT: xorl %eax, %eax 472; X86-NOBMI-NEXT: .LBB7_2: 473; X86-NOBMI-NEXT: addl $-1, %eax 474; X86-NOBMI-NEXT: adcl $-1, %edx 475; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx 476; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 477; X86-NOBMI-NEXT: retl 478; 479; X86-BMI1-LABEL: bzhi64_a1_indexzext: 480; X86-BMI1: # %bb.0: 481; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 482; X86-BMI1-NEXT: movl $1, %eax 483; X86-BMI1-NEXT: xorl %edx, %edx 484; X86-BMI1-NEXT: shldl %cl, %eax, %edx 485; X86-BMI1-NEXT: shll %cl, %eax 486; X86-BMI1-NEXT: testb $32, %cl 487; X86-BMI1-NEXT: je .LBB7_2 488; X86-BMI1-NEXT: # %bb.1: 489; X86-BMI1-NEXT: movl %eax, %edx 490; X86-BMI1-NEXT: xorl %eax, %eax 491; X86-BMI1-NEXT: .LBB7_2: 492; X86-BMI1-NEXT: addl $-1, %eax 493; X86-BMI1-NEXT: adcl $-1, %edx 494; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %edx 495; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %eax 496; X86-BMI1-NEXT: retl 497; 498; X86-BMI2-LABEL: bzhi64_a1_indexzext: 499; X86-BMI2: # %bb.0: 500; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 501; X86-BMI2-NEXT: movl $1, %eax 502; X86-BMI2-NEXT: xorl %edx, %edx 503; X86-BMI2-NEXT: shldl %cl, %eax, %edx 504; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax 505; X86-BMI2-NEXT: testb $32, %cl 506; X86-BMI2-NEXT: je .LBB7_2 507; X86-BMI2-NEXT: # %bb.1: 508; X86-BMI2-NEXT: movl %eax, %edx 509; X86-BMI2-NEXT: xorl %eax, %eax 510; X86-BMI2-NEXT: .LBB7_2: 511; X86-BMI2-NEXT: addl $-1, %eax 512; X86-BMI2-NEXT: adcl $-1, %edx 513; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx 514; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax 515; X86-BMI2-NEXT: retl 516; 517; X64-NOBMI-LABEL: bzhi64_a1_indexzext: 518; X64-NOBMI: # %bb.0: 519; X64-NOBMI-NEXT: movl %esi, %ecx 520; X64-NOBMI-NEXT: movl $1, %eax 521; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 522; X64-NOBMI-NEXT: shlq %cl, %rax 523; X64-NOBMI-NEXT: decq %rax 524; X64-NOBMI-NEXT: andq %rdi, %rax 525; X64-NOBMI-NEXT: retq 526; 527; X64-BMI1-LABEL: bzhi64_a1_indexzext: 528; X64-BMI1: # %bb.0: 529; X64-BMI1-NEXT: # kill: def $esi killed $esi def $rsi 530; X64-BMI1-NEXT: shll $8, %esi 531; X64-BMI1-NEXT: bextrq %rsi, %rdi, %rax 532; X64-BMI1-NEXT: retq 533; 534; X64-BMI2-LABEL: bzhi64_a1_indexzext: 535; X64-BMI2: # %bb.0: 536; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi 537; X64-BMI2-NEXT: bzhiq %rsi, %rdi, %rax 538; X64-BMI2-NEXT: retq 539 %conv = zext i8 %numlowbits to i64 540 %onebit = shl i64 1, %conv 541 %mask = add nsw i64 %onebit, -1 542 %masked = and i64 %mask, %val 543 ret i64 %masked 544} 545 546define i64 @bzhi64_a2_load(ptr %w, i64 %numlowbits) nounwind { 547; X86-NOBMI-LABEL: bzhi64_a2_load: 548; X86-NOBMI: # %bb.0: 549; X86-NOBMI-NEXT: pushl %esi 550; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi 551; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 552; X86-NOBMI-NEXT: movl $1, %eax 553; X86-NOBMI-NEXT: xorl %edx, %edx 554; X86-NOBMI-NEXT: shldl %cl, %eax, %edx 555; X86-NOBMI-NEXT: shll %cl, %eax 556; X86-NOBMI-NEXT: testb $32, %cl 557; X86-NOBMI-NEXT: je .LBB8_2 558; X86-NOBMI-NEXT: # %bb.1: 559; X86-NOBMI-NEXT: movl %eax, %edx 560; X86-NOBMI-NEXT: xorl %eax, %eax 561; X86-NOBMI-NEXT: .LBB8_2: 562; X86-NOBMI-NEXT: addl $-1, %eax 563; X86-NOBMI-NEXT: adcl $-1, %edx 564; X86-NOBMI-NEXT: andl 4(%esi), %edx 565; X86-NOBMI-NEXT: andl (%esi), %eax 566; X86-NOBMI-NEXT: popl %esi 567; X86-NOBMI-NEXT: retl 568; 569; X86-BMI1-LABEL: bzhi64_a2_load: 570; X86-BMI1: # %bb.0: 571; X86-BMI1-NEXT: pushl %esi 572; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi 573; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 574; X86-BMI1-NEXT: movl $1, %eax 575; X86-BMI1-NEXT: xorl %edx, %edx 576; X86-BMI1-NEXT: shldl %cl, %eax, %edx 577; X86-BMI1-NEXT: shll %cl, %eax 578; X86-BMI1-NEXT: testb $32, %cl 579; X86-BMI1-NEXT: je .LBB8_2 580; X86-BMI1-NEXT: # %bb.1: 581; X86-BMI1-NEXT: movl %eax, %edx 582; X86-BMI1-NEXT: xorl %eax, %eax 583; X86-BMI1-NEXT: .LBB8_2: 584; X86-BMI1-NEXT: addl $-1, %eax 585; X86-BMI1-NEXT: adcl $-1, %edx 586; X86-BMI1-NEXT: andl 4(%esi), %edx 587; X86-BMI1-NEXT: andl (%esi), %eax 588; X86-BMI1-NEXT: popl %esi 589; X86-BMI1-NEXT: retl 590; 591; X86-BMI2-LABEL: bzhi64_a2_load: 592; X86-BMI2: # %bb.0: 593; X86-BMI2-NEXT: pushl %esi 594; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi 595; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 596; X86-BMI2-NEXT: movl $1, %eax 597; X86-BMI2-NEXT: xorl %edx, %edx 598; X86-BMI2-NEXT: shldl %cl, %eax, %edx 599; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax 600; X86-BMI2-NEXT: testb $32, %cl 601; X86-BMI2-NEXT: je .LBB8_2 602; X86-BMI2-NEXT: # %bb.1: 603; X86-BMI2-NEXT: movl %eax, %edx 604; X86-BMI2-NEXT: xorl %eax, %eax 605; X86-BMI2-NEXT: .LBB8_2: 606; X86-BMI2-NEXT: addl $-1, %eax 607; X86-BMI2-NEXT: adcl $-1, %edx 608; X86-BMI2-NEXT: andl 4(%esi), %edx 609; X86-BMI2-NEXT: andl (%esi), %eax 610; X86-BMI2-NEXT: popl %esi 611; X86-BMI2-NEXT: retl 612; 613; X64-NOBMI-LABEL: bzhi64_a2_load: 614; X64-NOBMI: # %bb.0: 615; X64-NOBMI-NEXT: movq %rsi, %rcx 616; X64-NOBMI-NEXT: movl $1, %eax 617; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx 618; X64-NOBMI-NEXT: shlq %cl, %rax 619; X64-NOBMI-NEXT: decq %rax 620; X64-NOBMI-NEXT: andq (%rdi), %rax 621; X64-NOBMI-NEXT: retq 622; 623; X64-BMI1-LABEL: bzhi64_a2_load: 624; X64-BMI1: # %bb.0: 625; X64-BMI1-NEXT: shll $8, %esi 626; X64-BMI1-NEXT: bextrq %rsi, (%rdi), %rax 627; X64-BMI1-NEXT: retq 628; 629; X64-BMI2-LABEL: bzhi64_a2_load: 630; X64-BMI2: # %bb.0: 631; X64-BMI2-NEXT: bzhiq %rsi, (%rdi), %rax 632; X64-BMI2-NEXT: retq 633 %val = load i64, ptr %w 634 %onebit = shl i64 1, %numlowbits 635 %mask = add nsw i64 %onebit, -1 636 %masked = and i64 %mask, %val 637 ret i64 %masked 638} 639 640define i64 @bzhi64_a3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind { 641; X86-NOBMI-LABEL: bzhi64_a3_load_indexzext: 642; X86-NOBMI: # %bb.0: 643; X86-NOBMI-NEXT: pushl %esi 644; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi 645; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 646; X86-NOBMI-NEXT: movl $1, %eax 647; X86-NOBMI-NEXT: xorl %edx, %edx 648; X86-NOBMI-NEXT: shldl %cl, %eax, %edx 649; X86-NOBMI-NEXT: shll %cl, %eax 650; X86-NOBMI-NEXT: testb $32, %cl 651; X86-NOBMI-NEXT: je .LBB9_2 652; X86-NOBMI-NEXT: # %bb.1: 653; X86-NOBMI-NEXT: movl %eax, %edx 654; X86-NOBMI-NEXT: xorl %eax, %eax 655; X86-NOBMI-NEXT: .LBB9_2: 656; X86-NOBMI-NEXT: addl $-1, %eax 657; X86-NOBMI-NEXT: adcl $-1, %edx 658; X86-NOBMI-NEXT: andl 4(%esi), %edx 659; X86-NOBMI-NEXT: andl (%esi), %eax 660; X86-NOBMI-NEXT: popl %esi 661; X86-NOBMI-NEXT: retl 662; 663; X86-BMI1-LABEL: bzhi64_a3_load_indexzext: 664; X86-BMI1: # %bb.0: 665; X86-BMI1-NEXT: pushl %esi 666; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi 667; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 668; X86-BMI1-NEXT: movl $1, %eax 669; X86-BMI1-NEXT: xorl %edx, %edx 670; X86-BMI1-NEXT: shldl %cl, %eax, %edx 671; X86-BMI1-NEXT: shll %cl, %eax 672; X86-BMI1-NEXT: testb $32, %cl 673; X86-BMI1-NEXT: je .LBB9_2 674; X86-BMI1-NEXT: # %bb.1: 675; X86-BMI1-NEXT: movl %eax, %edx 676; X86-BMI1-NEXT: xorl %eax, %eax 677; X86-BMI1-NEXT: .LBB9_2: 678; X86-BMI1-NEXT: addl $-1, %eax 679; X86-BMI1-NEXT: adcl $-1, %edx 680; X86-BMI1-NEXT: andl 4(%esi), %edx 681; X86-BMI1-NEXT: andl (%esi), %eax 682; X86-BMI1-NEXT: popl %esi 683; X86-BMI1-NEXT: retl 684; 685; X86-BMI2-LABEL: bzhi64_a3_load_indexzext: 686; X86-BMI2: # %bb.0: 687; X86-BMI2-NEXT: pushl %esi 688; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi 689; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 690; X86-BMI2-NEXT: movl $1, %eax 691; X86-BMI2-NEXT: xorl %edx, %edx 692; X86-BMI2-NEXT: shldl %cl, %eax, %edx 693; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax 694; X86-BMI2-NEXT: testb $32, %cl 695; X86-BMI2-NEXT: je .LBB9_2 696; X86-BMI2-NEXT: # %bb.1: 697; X86-BMI2-NEXT: movl %eax, %edx 698; X86-BMI2-NEXT: xorl %eax, %eax 699; X86-BMI2-NEXT: .LBB9_2: 700; X86-BMI2-NEXT: addl $-1, %eax 701; X86-BMI2-NEXT: adcl $-1, %edx 702; X86-BMI2-NEXT: andl 4(%esi), %edx 703; X86-BMI2-NEXT: andl (%esi), %eax 704; X86-BMI2-NEXT: popl %esi 705; X86-BMI2-NEXT: retl 706; 707; X64-NOBMI-LABEL: bzhi64_a3_load_indexzext: 708; X64-NOBMI: # %bb.0: 709; X64-NOBMI-NEXT: movl %esi, %ecx 710; X64-NOBMI-NEXT: movl $1, %eax 711; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 712; X64-NOBMI-NEXT: shlq %cl, %rax 713; X64-NOBMI-NEXT: decq %rax 714; X64-NOBMI-NEXT: andq (%rdi), %rax 715; X64-NOBMI-NEXT: retq 716; 717; X64-BMI1-LABEL: bzhi64_a3_load_indexzext: 718; X64-BMI1: # %bb.0: 719; X64-BMI1-NEXT: # kill: def $esi killed $esi def $rsi 720; X64-BMI1-NEXT: shll $8, %esi 721; X64-BMI1-NEXT: bextrq %rsi, (%rdi), %rax 722; X64-BMI1-NEXT: retq 723; 724; X64-BMI2-LABEL: bzhi64_a3_load_indexzext: 725; X64-BMI2: # %bb.0: 726; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi 727; X64-BMI2-NEXT: bzhiq %rsi, (%rdi), %rax 728; X64-BMI2-NEXT: retq 729 %val = load i64, ptr %w 730 %conv = zext i8 %numlowbits to i64 731 %onebit = shl i64 1, %conv 732 %mask = add nsw i64 %onebit, -1 733 %masked = and i64 %mask, %val 734 ret i64 %masked 735} 736 737define i64 @bzhi64_a4_commutative(i64 %val, i64 %numlowbits) nounwind { 738; X86-NOBMI-LABEL: bzhi64_a4_commutative: 739; X86-NOBMI: # %bb.0: 740; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 741; X86-NOBMI-NEXT: movl $1, %eax 742; X86-NOBMI-NEXT: xorl %edx, %edx 743; X86-NOBMI-NEXT: shldl %cl, %eax, %edx 744; X86-NOBMI-NEXT: shll %cl, %eax 745; X86-NOBMI-NEXT: testb $32, %cl 746; X86-NOBMI-NEXT: je .LBB10_2 747; X86-NOBMI-NEXT: # %bb.1: 748; X86-NOBMI-NEXT: movl %eax, %edx 749; X86-NOBMI-NEXT: xorl %eax, %eax 750; X86-NOBMI-NEXT: .LBB10_2: 751; X86-NOBMI-NEXT: addl $-1, %eax 752; X86-NOBMI-NEXT: adcl $-1, %edx 753; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx 754; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 755; X86-NOBMI-NEXT: retl 756; 757; X86-BMI1-LABEL: bzhi64_a4_commutative: 758; X86-BMI1: # %bb.0: 759; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 760; X86-BMI1-NEXT: movl $1, %eax 761; X86-BMI1-NEXT: xorl %edx, %edx 762; X86-BMI1-NEXT: shldl %cl, %eax, %edx 763; X86-BMI1-NEXT: shll %cl, %eax 764; X86-BMI1-NEXT: testb $32, %cl 765; X86-BMI1-NEXT: je .LBB10_2 766; X86-BMI1-NEXT: # %bb.1: 767; X86-BMI1-NEXT: movl %eax, %edx 768; X86-BMI1-NEXT: xorl %eax, %eax 769; X86-BMI1-NEXT: .LBB10_2: 770; X86-BMI1-NEXT: addl $-1, %eax 771; X86-BMI1-NEXT: adcl $-1, %edx 772; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %edx 773; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %eax 774; X86-BMI1-NEXT: retl 775; 776; X86-BMI2-LABEL: bzhi64_a4_commutative: 777; X86-BMI2: # %bb.0: 778; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 779; X86-BMI2-NEXT: movl $1, %eax 780; X86-BMI2-NEXT: xorl %edx, %edx 781; X86-BMI2-NEXT: shldl %cl, %eax, %edx 782; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax 783; X86-BMI2-NEXT: testb $32, %cl 784; X86-BMI2-NEXT: je .LBB10_2 785; X86-BMI2-NEXT: # %bb.1: 786; X86-BMI2-NEXT: movl %eax, %edx 787; X86-BMI2-NEXT: xorl %eax, %eax 788; X86-BMI2-NEXT: .LBB10_2: 789; X86-BMI2-NEXT: addl $-1, %eax 790; X86-BMI2-NEXT: adcl $-1, %edx 791; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx 792; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax 793; X86-BMI2-NEXT: retl 794; 795; X64-NOBMI-LABEL: bzhi64_a4_commutative: 796; X64-NOBMI: # %bb.0: 797; X64-NOBMI-NEXT: movq %rsi, %rcx 798; X64-NOBMI-NEXT: movl $1, %eax 799; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx 800; X64-NOBMI-NEXT: shlq %cl, %rax 801; X64-NOBMI-NEXT: decq %rax 802; X64-NOBMI-NEXT: andq %rdi, %rax 803; X64-NOBMI-NEXT: retq 804; 805; X64-BMI1-LABEL: bzhi64_a4_commutative: 806; X64-BMI1: # %bb.0: 807; X64-BMI1-NEXT: shll $8, %esi 808; X64-BMI1-NEXT: bextrq %rsi, %rdi, %rax 809; X64-BMI1-NEXT: retq 810; 811; X64-BMI2-LABEL: bzhi64_a4_commutative: 812; X64-BMI2: # %bb.0: 813; X64-BMI2-NEXT: bzhiq %rsi, %rdi, %rax 814; X64-BMI2-NEXT: retq 815 %onebit = shl i64 1, %numlowbits 816 %mask = add nsw i64 %onebit, -1 817 %masked = and i64 %val, %mask ; swapped order 818 ret i64 %masked 819} 820 821; 64-bit, but with 32-bit output 822 823; Everything done in 64-bit, truncation happens last. 824define i32 @bzhi64_32_a0(i64 %val, i64 %numlowbits) nounwind { 825; X86-NOBMI-LABEL: bzhi64_32_a0: 826; X86-NOBMI: # %bb.0: 827; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 828; X86-NOBMI-NEXT: movl $1, %edx 829; X86-NOBMI-NEXT: shll %cl, %edx 830; X86-NOBMI-NEXT: xorl %eax, %eax 831; X86-NOBMI-NEXT: testb $32, %cl 832; X86-NOBMI-NEXT: jne .LBB11_2 833; X86-NOBMI-NEXT: # %bb.1: 834; X86-NOBMI-NEXT: movl %edx, %eax 835; X86-NOBMI-NEXT: .LBB11_2: 836; X86-NOBMI-NEXT: decl %eax 837; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 838; X86-NOBMI-NEXT: retl 839; 840; X86-BMI1-LABEL: bzhi64_32_a0: 841; X86-BMI1: # %bb.0: 842; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 843; X86-BMI1-NEXT: movl $1, %edx 844; X86-BMI1-NEXT: shll %cl, %edx 845; X86-BMI1-NEXT: xorl %eax, %eax 846; X86-BMI1-NEXT: testb $32, %cl 847; X86-BMI1-NEXT: jne .LBB11_2 848; X86-BMI1-NEXT: # %bb.1: 849; X86-BMI1-NEXT: movl %edx, %eax 850; X86-BMI1-NEXT: .LBB11_2: 851; X86-BMI1-NEXT: decl %eax 852; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %eax 853; X86-BMI1-NEXT: retl 854; 855; X86-BMI2-LABEL: bzhi64_32_a0: 856; X86-BMI2: # %bb.0: 857; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 858; X86-BMI2-NEXT: xorl %eax, %eax 859; X86-BMI2-NEXT: testb $32, %cl 860; X86-BMI2-NEXT: jne .LBB11_2 861; X86-BMI2-NEXT: # %bb.1: 862; X86-BMI2-NEXT: movl $1, %eax 863; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax 864; X86-BMI2-NEXT: .LBB11_2: 865; X86-BMI2-NEXT: decl %eax 866; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax 867; X86-BMI2-NEXT: retl 868; 869; X64-NOBMI-LABEL: bzhi64_32_a0: 870; X64-NOBMI: # %bb.0: 871; X64-NOBMI-NEXT: movq %rsi, %rcx 872; X64-NOBMI-NEXT: movl $1, %eax 873; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx 874; X64-NOBMI-NEXT: shlq %cl, %rax 875; X64-NOBMI-NEXT: decl %eax 876; X64-NOBMI-NEXT: andl %edi, %eax 877; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax 878; X64-NOBMI-NEXT: retq 879; 880; X64-BMI1-LABEL: bzhi64_32_a0: 881; X64-BMI1: # %bb.0: 882; X64-BMI1-NEXT: shll $8, %esi 883; X64-BMI1-NEXT: bextrl %esi, %edi, %eax 884; X64-BMI1-NEXT: retq 885; 886; X64-BMI2-LABEL: bzhi64_32_a0: 887; X64-BMI2: # %bb.0: 888; X64-BMI2-NEXT: bzhil %esi, %edi, %eax 889; X64-BMI2-NEXT: retq 890 %onebit = shl i64 1, %numlowbits 891 %mask = add nsw i64 %onebit, -1 892 %masked = and i64 %mask, %val 893 %res = trunc i64 %masked to i32 894 ret i32 %res 895} 896 897; Shifting happens in 64-bit, then truncation. Masking is 32-bit. 898define i32 @bzhi64_32_a1(i64 %val, i32 %numlowbits) nounwind { 899; X86-NOBMI-LABEL: bzhi64_32_a1: 900; X86-NOBMI: # %bb.0: 901; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 902; X86-NOBMI-NEXT: movl $1, %eax 903; X86-NOBMI-NEXT: shll %cl, %eax 904; X86-NOBMI-NEXT: decl %eax 905; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 906; X86-NOBMI-NEXT: retl 907; 908; X86-BMI1-LABEL: bzhi64_32_a1: 909; X86-BMI1: # %bb.0: 910; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax 911; X86-BMI1-NEXT: shll $8, %eax 912; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax 913; X86-BMI1-NEXT: retl 914; 915; X86-BMI2-LABEL: bzhi64_32_a1: 916; X86-BMI2: # %bb.0: 917; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax 918; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax 919; X86-BMI2-NEXT: retl 920; 921; X64-NOBMI-LABEL: bzhi64_32_a1: 922; X64-NOBMI: # %bb.0: 923; X64-NOBMI-NEXT: movl %esi, %ecx 924; X64-NOBMI-NEXT: movl $1, %eax 925; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 926; X64-NOBMI-NEXT: shll %cl, %eax 927; X64-NOBMI-NEXT: decl %eax 928; X64-NOBMI-NEXT: andl %edi, %eax 929; X64-NOBMI-NEXT: retq 930; 931; X64-BMI1-LABEL: bzhi64_32_a1: 932; X64-BMI1: # %bb.0: 933; X64-BMI1-NEXT: shll $8, %esi 934; X64-BMI1-NEXT: bextrl %esi, %edi, %eax 935; X64-BMI1-NEXT: retq 936; 937; X64-BMI2-LABEL: bzhi64_32_a1: 938; X64-BMI2: # %bb.0: 939; X64-BMI2-NEXT: bzhil %esi, %edi, %eax 940; X64-BMI2-NEXT: retq 941 %truncval = trunc i64 %val to i32 942 %onebit = shl i32 1, %numlowbits 943 %mask = add nsw i32 %onebit, -1 944 %masked = and i32 %mask, %truncval 945 ret i32 %masked 946} 947 948; Shifting happens in 64-bit, then truncation (with extra use). 949; Masking is 32-bit. 950define i32 @bzhi64_32_a1_trunc_extrause(i64 %val, i32 %numlowbits, ptr %escape) nounwind { 951; X86-NOBMI-LABEL: bzhi64_32_a1_trunc_extrause: 952; X86-NOBMI: # %bb.0: 953; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 954; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx 955; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 956; X86-NOBMI-NEXT: movl %edx, (%eax) 957; X86-NOBMI-NEXT: movl $1, %eax 958; X86-NOBMI-NEXT: shll %cl, %eax 959; X86-NOBMI-NEXT: decl %eax 960; X86-NOBMI-NEXT: andl %edx, %eax 961; X86-NOBMI-NEXT: retl 962; 963; X86-BMI1-LABEL: bzhi64_32_a1_trunc_extrause: 964; X86-BMI1: # %bb.0: 965; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax 966; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %ecx 967; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx 968; X86-BMI1-NEXT: movl %ecx, (%edx) 969; X86-BMI1-NEXT: shll $8, %eax 970; X86-BMI1-NEXT: bextrl %eax, %ecx, %eax 971; X86-BMI1-NEXT: retl 972; 973; X86-BMI2-LABEL: bzhi64_32_a1_trunc_extrause: 974; X86-BMI2: # %bb.0: 975; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax 976; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx 977; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx 978; X86-BMI2-NEXT: movl %ecx, (%edx) 979; X86-BMI2-NEXT: bzhil %eax, %ecx, %eax 980; X86-BMI2-NEXT: retl 981; 982; X64-NOBMI-LABEL: bzhi64_32_a1_trunc_extrause: 983; X64-NOBMI: # %bb.0: 984; X64-NOBMI-NEXT: movl %esi, %ecx 985; X64-NOBMI-NEXT: movl %edi, (%rdx) 986; X64-NOBMI-NEXT: movl $1, %eax 987; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 988; X64-NOBMI-NEXT: shll %cl, %eax 989; X64-NOBMI-NEXT: decl %eax 990; X64-NOBMI-NEXT: andl %edi, %eax 991; X64-NOBMI-NEXT: retq 992; 993; X64-BMI1-LABEL: bzhi64_32_a1_trunc_extrause: 994; X64-BMI1: # %bb.0: 995; X64-BMI1-NEXT: movl %edi, (%rdx) 996; X64-BMI1-NEXT: shll $8, %esi 997; X64-BMI1-NEXT: bextrl %esi, %edi, %eax 998; X64-BMI1-NEXT: retq 999; 1000; X64-BMI2-LABEL: bzhi64_32_a1_trunc_extrause: 1001; X64-BMI2: # %bb.0: 1002; X64-BMI2-NEXT: movl %edi, (%rdx) 1003; X64-BMI2-NEXT: bzhil %esi, %edi, %eax 1004; X64-BMI2-NEXT: retq 1005 %truncval = trunc i64 %val to i32 1006 store i32 %truncval, ptr %escape 1007 %onebit = shl i32 1, %numlowbits 1008 %mask = add nsw i32 %onebit, -1 1009 %masked = and i32 %mask, %truncval 1010 ret i32 %masked 1011} 1012 1013; Shifting happens in 64-bit. Mask is 32-bit, but extended to 64-bit. 1014; Masking is 64-bit. Then truncation. 1015define i32 @bzhi64_32_a2(i64 %val, i32 %numlowbits) nounwind { 1016; X86-NOBMI-LABEL: bzhi64_32_a2: 1017; X86-NOBMI: # %bb.0: 1018; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1019; X86-NOBMI-NEXT: movl $1, %eax 1020; X86-NOBMI-NEXT: shll %cl, %eax 1021; X86-NOBMI-NEXT: decl %eax 1022; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 1023; X86-NOBMI-NEXT: retl 1024; 1025; X86-BMI1-LABEL: bzhi64_32_a2: 1026; X86-BMI1: # %bb.0: 1027; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1028; X86-BMI1-NEXT: shll $8, %eax 1029; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax 1030; X86-BMI1-NEXT: retl 1031; 1032; X86-BMI2-LABEL: bzhi64_32_a2: 1033; X86-BMI2: # %bb.0: 1034; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1035; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax 1036; X86-BMI2-NEXT: retl 1037; 1038; X64-NOBMI-LABEL: bzhi64_32_a2: 1039; X64-NOBMI: # %bb.0: 1040; X64-NOBMI-NEXT: movl %esi, %ecx 1041; X64-NOBMI-NEXT: movl $1, %eax 1042; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 1043; X64-NOBMI-NEXT: shll %cl, %eax 1044; X64-NOBMI-NEXT: decl %eax 1045; X64-NOBMI-NEXT: andl %edi, %eax 1046; X64-NOBMI-NEXT: retq 1047; 1048; X64-BMI1-LABEL: bzhi64_32_a2: 1049; X64-BMI1: # %bb.0: 1050; X64-BMI1-NEXT: shll $8, %esi 1051; X64-BMI1-NEXT: bextrl %esi, %edi, %eax 1052; X64-BMI1-NEXT: retq 1053; 1054; X64-BMI2-LABEL: bzhi64_32_a2: 1055; X64-BMI2: # %bb.0: 1056; X64-BMI2-NEXT: bzhil %esi, %edi, %eax 1057; X64-BMI2-NEXT: retq 1058 %onebit = shl i32 1, %numlowbits 1059 %mask = add nsw i32 %onebit, -1 1060 %zextmask = zext i32 %mask to i64 1061 %masked = and i64 %zextmask, %val 1062 %truncmasked = trunc i64 %masked to i32 1063 ret i32 %truncmasked 1064} 1065 1066; Shifting happens in 64-bit. Mask is 32-bit, but calculated in 64-bit. 1067; Masking is 64-bit. Then truncation. 1068define i32 @bzhi64_32_a3(i64 %val, i64 %numlowbits) nounwind { 1069; X86-NOBMI-LABEL: bzhi64_32_a3: 1070; X86-NOBMI: # %bb.0: 1071; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1072; X86-NOBMI-NEXT: movl $1, %edx 1073; X86-NOBMI-NEXT: shll %cl, %edx 1074; X86-NOBMI-NEXT: xorl %eax, %eax 1075; X86-NOBMI-NEXT: testb $32, %cl 1076; X86-NOBMI-NEXT: jne .LBB15_2 1077; X86-NOBMI-NEXT: # %bb.1: 1078; X86-NOBMI-NEXT: movl %edx, %eax 1079; X86-NOBMI-NEXT: .LBB15_2: 1080; X86-NOBMI-NEXT: decl %eax 1081; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 1082; X86-NOBMI-NEXT: retl 1083; 1084; X86-BMI1-LABEL: bzhi64_32_a3: 1085; X86-BMI1: # %bb.0: 1086; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1087; X86-BMI1-NEXT: movl $1, %edx 1088; X86-BMI1-NEXT: shll %cl, %edx 1089; X86-BMI1-NEXT: xorl %eax, %eax 1090; X86-BMI1-NEXT: testb $32, %cl 1091; X86-BMI1-NEXT: jne .LBB15_2 1092; X86-BMI1-NEXT: # %bb.1: 1093; X86-BMI1-NEXT: movl %edx, %eax 1094; X86-BMI1-NEXT: .LBB15_2: 1095; X86-BMI1-NEXT: decl %eax 1096; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %eax 1097; X86-BMI1-NEXT: retl 1098; 1099; X86-BMI2-LABEL: bzhi64_32_a3: 1100; X86-BMI2: # %bb.0: 1101; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1102; X86-BMI2-NEXT: xorl %eax, %eax 1103; X86-BMI2-NEXT: testb $32, %cl 1104; X86-BMI2-NEXT: jne .LBB15_2 1105; X86-BMI2-NEXT: # %bb.1: 1106; X86-BMI2-NEXT: movl $1, %eax 1107; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax 1108; X86-BMI2-NEXT: .LBB15_2: 1109; X86-BMI2-NEXT: decl %eax 1110; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax 1111; X86-BMI2-NEXT: retl 1112; 1113; X64-NOBMI-LABEL: bzhi64_32_a3: 1114; X64-NOBMI: # %bb.0: 1115; X64-NOBMI-NEXT: movq %rsi, %rcx 1116; X64-NOBMI-NEXT: movl $1, %eax 1117; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx 1118; X64-NOBMI-NEXT: shlq %cl, %rax 1119; X64-NOBMI-NEXT: decl %eax 1120; X64-NOBMI-NEXT: andl %edi, %eax 1121; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax 1122; X64-NOBMI-NEXT: retq 1123; 1124; X64-BMI1-LABEL: bzhi64_32_a3: 1125; X64-BMI1: # %bb.0: 1126; X64-BMI1-NEXT: shll $8, %esi 1127; X64-BMI1-NEXT: bextrl %esi, %edi, %eax 1128; X64-BMI1-NEXT: retq 1129; 1130; X64-BMI2-LABEL: bzhi64_32_a3: 1131; X64-BMI2: # %bb.0: 1132; X64-BMI2-NEXT: bzhil %esi, %edi, %eax 1133; X64-BMI2-NEXT: retq 1134 %onebit = shl i64 1, %numlowbits 1135 %mask = add nsw i64 %onebit, 4294967295 1136 %masked = and i64 %mask, %val 1137 %truncmasked = trunc i64 %masked to i32 1138 ret i32 %truncmasked 1139} 1140 1141; ---------------------------------------------------------------------------- ; 1142; Pattern b. 32-bit 1143; ---------------------------------------------------------------------------- ; 1144 1145define i32 @bzhi32_b0(i32 %val, i32 %numlowbits) nounwind { 1146; X86-NOBMI-LABEL: bzhi32_b0: 1147; X86-NOBMI: # %bb.0: 1148; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1149; X86-NOBMI-NEXT: movl $-1, %eax 1150; X86-NOBMI-NEXT: shll %cl, %eax 1151; X86-NOBMI-NEXT: notl %eax 1152; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 1153; X86-NOBMI-NEXT: retl 1154; 1155; X86-BMI1-LABEL: bzhi32_b0: 1156; X86-BMI1: # %bb.0: 1157; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1158; X86-BMI1-NEXT: shll $8, %eax 1159; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax 1160; X86-BMI1-NEXT: retl 1161; 1162; X86-BMI2-LABEL: bzhi32_b0: 1163; X86-BMI2: # %bb.0: 1164; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1165; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax 1166; X86-BMI2-NEXT: retl 1167; 1168; X64-NOBMI-LABEL: bzhi32_b0: 1169; X64-NOBMI: # %bb.0: 1170; X64-NOBMI-NEXT: movl %esi, %ecx 1171; X64-NOBMI-NEXT: movl $-1, %eax 1172; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 1173; X64-NOBMI-NEXT: shll %cl, %eax 1174; X64-NOBMI-NEXT: notl %eax 1175; X64-NOBMI-NEXT: andl %edi, %eax 1176; X64-NOBMI-NEXT: retq 1177; 1178; X64-BMI1-LABEL: bzhi32_b0: 1179; X64-BMI1: # %bb.0: 1180; X64-BMI1-NEXT: shll $8, %esi 1181; X64-BMI1-NEXT: bextrl %esi, %edi, %eax 1182; X64-BMI1-NEXT: retq 1183; 1184; X64-BMI2-LABEL: bzhi32_b0: 1185; X64-BMI2: # %bb.0: 1186; X64-BMI2-NEXT: bzhil %esi, %edi, %eax 1187; X64-BMI2-NEXT: retq 1188 %notmask = shl i32 -1, %numlowbits 1189 %mask = xor i32 %notmask, -1 1190 %masked = and i32 %mask, %val 1191 ret i32 %masked 1192} 1193 1194define i32 @bzhi32_b1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind { 1195; X86-NOBMI-LABEL: bzhi32_b1_indexzext: 1196; X86-NOBMI: # %bb.0: 1197; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1198; X86-NOBMI-NEXT: movl $-1, %eax 1199; X86-NOBMI-NEXT: shll %cl, %eax 1200; X86-NOBMI-NEXT: notl %eax 1201; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 1202; X86-NOBMI-NEXT: retl 1203; 1204; X86-BMI1-LABEL: bzhi32_b1_indexzext: 1205; X86-BMI1: # %bb.0: 1206; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1207; X86-BMI1-NEXT: shll $8, %eax 1208; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax 1209; X86-BMI1-NEXT: retl 1210; 1211; X86-BMI2-LABEL: bzhi32_b1_indexzext: 1212; X86-BMI2: # %bb.0: 1213; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1214; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax 1215; X86-BMI2-NEXT: retl 1216; 1217; X64-NOBMI-LABEL: bzhi32_b1_indexzext: 1218; X64-NOBMI: # %bb.0: 1219; X64-NOBMI-NEXT: movl %esi, %ecx 1220; X64-NOBMI-NEXT: movl $-1, %eax 1221; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 1222; X64-NOBMI-NEXT: shll %cl, %eax 1223; X64-NOBMI-NEXT: notl %eax 1224; X64-NOBMI-NEXT: andl %edi, %eax 1225; X64-NOBMI-NEXT: retq 1226; 1227; X64-BMI1-LABEL: bzhi32_b1_indexzext: 1228; X64-BMI1: # %bb.0: 1229; X64-BMI1-NEXT: shll $8, %esi 1230; X64-BMI1-NEXT: bextrl %esi, %edi, %eax 1231; X64-BMI1-NEXT: retq 1232; 1233; X64-BMI2-LABEL: bzhi32_b1_indexzext: 1234; X64-BMI2: # %bb.0: 1235; X64-BMI2-NEXT: bzhil %esi, %edi, %eax 1236; X64-BMI2-NEXT: retq 1237 %conv = zext i8 %numlowbits to i32 1238 %notmask = shl i32 -1, %conv 1239 %mask = xor i32 %notmask, -1 1240 %masked = and i32 %mask, %val 1241 ret i32 %masked 1242} 1243 1244define i32 @bzhi32_b2_load(ptr %w, i32 %numlowbits) nounwind { 1245; X86-NOBMI-LABEL: bzhi32_b2_load: 1246; X86-NOBMI: # %bb.0: 1247; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx 1248; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1249; X86-NOBMI-NEXT: movl $-1, %eax 1250; X86-NOBMI-NEXT: shll %cl, %eax 1251; X86-NOBMI-NEXT: notl %eax 1252; X86-NOBMI-NEXT: andl (%edx), %eax 1253; X86-NOBMI-NEXT: retl 1254; 1255; X86-BMI1-LABEL: bzhi32_b2_load: 1256; X86-BMI1: # %bb.0: 1257; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax 1258; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1259; X86-BMI1-NEXT: shll $8, %ecx 1260; X86-BMI1-NEXT: bextrl %ecx, (%eax), %eax 1261; X86-BMI1-NEXT: retl 1262; 1263; X86-BMI2-LABEL: bzhi32_b2_load: 1264; X86-BMI2: # %bb.0: 1265; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 1266; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1267; X86-BMI2-NEXT: bzhil %ecx, (%eax), %eax 1268; X86-BMI2-NEXT: retl 1269; 1270; X64-NOBMI-LABEL: bzhi32_b2_load: 1271; X64-NOBMI: # %bb.0: 1272; X64-NOBMI-NEXT: movl %esi, %ecx 1273; X64-NOBMI-NEXT: movl $-1, %eax 1274; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 1275; X64-NOBMI-NEXT: shll %cl, %eax 1276; X64-NOBMI-NEXT: notl %eax 1277; X64-NOBMI-NEXT: andl (%rdi), %eax 1278; X64-NOBMI-NEXT: retq 1279; 1280; X64-BMI1-LABEL: bzhi32_b2_load: 1281; X64-BMI1: # %bb.0: 1282; X64-BMI1-NEXT: shll $8, %esi 1283; X64-BMI1-NEXT: bextrl %esi, (%rdi), %eax 1284; X64-BMI1-NEXT: retq 1285; 1286; X64-BMI2-LABEL: bzhi32_b2_load: 1287; X64-BMI2: # %bb.0: 1288; X64-BMI2-NEXT: bzhil %esi, (%rdi), %eax 1289; X64-BMI2-NEXT: retq 1290 %val = load i32, ptr %w 1291 %notmask = shl i32 -1, %numlowbits 1292 %mask = xor i32 %notmask, -1 1293 %masked = and i32 %mask, %val 1294 ret i32 %masked 1295} 1296 1297define i32 @bzhi32_b3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind { 1298; X86-NOBMI-LABEL: bzhi32_b3_load_indexzext: 1299; X86-NOBMI: # %bb.0: 1300; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx 1301; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1302; X86-NOBMI-NEXT: movl $-1, %eax 1303; X86-NOBMI-NEXT: shll %cl, %eax 1304; X86-NOBMI-NEXT: notl %eax 1305; X86-NOBMI-NEXT: andl (%edx), %eax 1306; X86-NOBMI-NEXT: retl 1307; 1308; X86-BMI1-LABEL: bzhi32_b3_load_indexzext: 1309; X86-BMI1: # %bb.0: 1310; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax 1311; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1312; X86-BMI1-NEXT: shll $8, %ecx 1313; X86-BMI1-NEXT: bextrl %ecx, (%eax), %eax 1314; X86-BMI1-NEXT: retl 1315; 1316; X86-BMI2-LABEL: bzhi32_b3_load_indexzext: 1317; X86-BMI2: # %bb.0: 1318; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 1319; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1320; X86-BMI2-NEXT: bzhil %ecx, (%eax), %eax 1321; X86-BMI2-NEXT: retl 1322; 1323; X64-NOBMI-LABEL: bzhi32_b3_load_indexzext: 1324; X64-NOBMI: # %bb.0: 1325; X64-NOBMI-NEXT: movl %esi, %ecx 1326; X64-NOBMI-NEXT: movl $-1, %eax 1327; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 1328; X64-NOBMI-NEXT: shll %cl, %eax 1329; X64-NOBMI-NEXT: notl %eax 1330; X64-NOBMI-NEXT: andl (%rdi), %eax 1331; X64-NOBMI-NEXT: retq 1332; 1333; X64-BMI1-LABEL: bzhi32_b3_load_indexzext: 1334; X64-BMI1: # %bb.0: 1335; X64-BMI1-NEXT: shll $8, %esi 1336; X64-BMI1-NEXT: bextrl %esi, (%rdi), %eax 1337; X64-BMI1-NEXT: retq 1338; 1339; X64-BMI2-LABEL: bzhi32_b3_load_indexzext: 1340; X64-BMI2: # %bb.0: 1341; X64-BMI2-NEXT: bzhil %esi, (%rdi), %eax 1342; X64-BMI2-NEXT: retq 1343 %val = load i32, ptr %w 1344 %conv = zext i8 %numlowbits to i32 1345 %notmask = shl i32 -1, %conv 1346 %mask = xor i32 %notmask, -1 1347 %masked = and i32 %mask, %val 1348 ret i32 %masked 1349} 1350 1351define i32 @bzhi32_b4_commutative(i32 %val, i32 %numlowbits) nounwind { 1352; X86-NOBMI-LABEL: bzhi32_b4_commutative: 1353; X86-NOBMI: # %bb.0: 1354; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1355; X86-NOBMI-NEXT: movl $-1, %eax 1356; X86-NOBMI-NEXT: shll %cl, %eax 1357; X86-NOBMI-NEXT: notl %eax 1358; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 1359; X86-NOBMI-NEXT: retl 1360; 1361; X86-BMI1-LABEL: bzhi32_b4_commutative: 1362; X86-BMI1: # %bb.0: 1363; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1364; X86-BMI1-NEXT: shll $8, %eax 1365; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax 1366; X86-BMI1-NEXT: retl 1367; 1368; X86-BMI2-LABEL: bzhi32_b4_commutative: 1369; X86-BMI2: # %bb.0: 1370; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1371; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax 1372; X86-BMI2-NEXT: retl 1373; 1374; X64-NOBMI-LABEL: bzhi32_b4_commutative: 1375; X64-NOBMI: # %bb.0: 1376; X64-NOBMI-NEXT: movl %esi, %ecx 1377; X64-NOBMI-NEXT: movl $-1, %eax 1378; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 1379; X64-NOBMI-NEXT: shll %cl, %eax 1380; X64-NOBMI-NEXT: notl %eax 1381; X64-NOBMI-NEXT: andl %edi, %eax 1382; X64-NOBMI-NEXT: retq 1383; 1384; X64-BMI1-LABEL: bzhi32_b4_commutative: 1385; X64-BMI1: # %bb.0: 1386; X64-BMI1-NEXT: shll $8, %esi 1387; X64-BMI1-NEXT: bextrl %esi, %edi, %eax 1388; X64-BMI1-NEXT: retq 1389; 1390; X64-BMI2-LABEL: bzhi32_b4_commutative: 1391; X64-BMI2: # %bb.0: 1392; X64-BMI2-NEXT: bzhil %esi, %edi, %eax 1393; X64-BMI2-NEXT: retq 1394 %notmask = shl i32 -1, %numlowbits 1395 %mask = xor i32 %notmask, -1 1396 %masked = and i32 %val, %mask ; swapped order 1397 ret i32 %masked 1398} 1399 1400; 64-bit 1401 1402define i64 @bzhi64_b0(i64 %val, i64 %numlowbits) nounwind { 1403; X86-NOBMI-LABEL: bzhi64_b0: 1404; X86-NOBMI: # %bb.0: 1405; X86-NOBMI-NEXT: pushl %esi 1406; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1407; X86-NOBMI-NEXT: movl $-1, %edx 1408; X86-NOBMI-NEXT: movl $-1, %esi 1409; X86-NOBMI-NEXT: shll %cl, %esi 1410; X86-NOBMI-NEXT: xorl %eax, %eax 1411; X86-NOBMI-NEXT: testb $32, %cl 1412; X86-NOBMI-NEXT: jne .LBB21_1 1413; X86-NOBMI-NEXT: # %bb.2: 1414; X86-NOBMI-NEXT: movl %esi, %eax 1415; X86-NOBMI-NEXT: jmp .LBB21_3 1416; X86-NOBMI-NEXT: .LBB21_1: 1417; X86-NOBMI-NEXT: movl %esi, %edx 1418; X86-NOBMI-NEXT: .LBB21_3: 1419; X86-NOBMI-NEXT: notl %edx 1420; X86-NOBMI-NEXT: notl %eax 1421; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 1422; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx 1423; X86-NOBMI-NEXT: popl %esi 1424; X86-NOBMI-NEXT: retl 1425; 1426; X86-BMI1-LABEL: bzhi64_b0: 1427; X86-BMI1: # %bb.0: 1428; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1429; X86-BMI1-NEXT: movl $-1, %edx 1430; X86-BMI1-NEXT: movl $-1, %eax 1431; X86-BMI1-NEXT: shll %cl, %eax 1432; X86-BMI1-NEXT: testb $32, %cl 1433; X86-BMI1-NEXT: je .LBB21_2 1434; X86-BMI1-NEXT: # %bb.1: 1435; X86-BMI1-NEXT: movl %eax, %edx 1436; X86-BMI1-NEXT: xorl %eax, %eax 1437; X86-BMI1-NEXT: .LBB21_2: 1438; X86-BMI1-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax 1439; X86-BMI1-NEXT: andnl {{[0-9]+}}(%esp), %edx, %edx 1440; X86-BMI1-NEXT: retl 1441; 1442; X86-BMI2-LABEL: bzhi64_b0: 1443; X86-BMI2: # %bb.0: 1444; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %edx 1445; X86-BMI2-NEXT: movl $-1, %ecx 1446; X86-BMI2-NEXT: shlxl %edx, %ecx, %eax 1447; X86-BMI2-NEXT: testb $32, %dl 1448; X86-BMI2-NEXT: je .LBB21_2 1449; X86-BMI2-NEXT: # %bb.1: 1450; X86-BMI2-NEXT: movl %eax, %ecx 1451; X86-BMI2-NEXT: xorl %eax, %eax 1452; X86-BMI2-NEXT: .LBB21_2: 1453; X86-BMI2-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax 1454; X86-BMI2-NEXT: andnl {{[0-9]+}}(%esp), %ecx, %edx 1455; X86-BMI2-NEXT: retl 1456; 1457; X64-NOBMI-LABEL: bzhi64_b0: 1458; X64-NOBMI: # %bb.0: 1459; X64-NOBMI-NEXT: movq %rsi, %rcx 1460; X64-NOBMI-NEXT: movq $-1, %rax 1461; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx 1462; X64-NOBMI-NEXT: shlq %cl, %rax 1463; X64-NOBMI-NEXT: notq %rax 1464; X64-NOBMI-NEXT: andq %rdi, %rax 1465; X64-NOBMI-NEXT: retq 1466; 1467; X64-BMI1-LABEL: bzhi64_b0: 1468; X64-BMI1: # %bb.0: 1469; X64-BMI1-NEXT: shll $8, %esi 1470; X64-BMI1-NEXT: bextrq %rsi, %rdi, %rax 1471; X64-BMI1-NEXT: retq 1472; 1473; X64-BMI2-LABEL: bzhi64_b0: 1474; X64-BMI2: # %bb.0: 1475; X64-BMI2-NEXT: bzhiq %rsi, %rdi, %rax 1476; X64-BMI2-NEXT: retq 1477 %notmask = shl i64 -1, %numlowbits 1478 %mask = xor i64 %notmask, -1 1479 %masked = and i64 %mask, %val 1480 ret i64 %masked 1481} 1482 1483define i64 @bzhi64_b1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind { 1484; X86-NOBMI-LABEL: bzhi64_b1_indexzext: 1485; X86-NOBMI: # %bb.0: 1486; X86-NOBMI-NEXT: pushl %esi 1487; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1488; X86-NOBMI-NEXT: movl $-1, %edx 1489; X86-NOBMI-NEXT: movl $-1, %esi 1490; X86-NOBMI-NEXT: shll %cl, %esi 1491; X86-NOBMI-NEXT: xorl %eax, %eax 1492; X86-NOBMI-NEXT: testb $32, %cl 1493; X86-NOBMI-NEXT: jne .LBB22_1 1494; X86-NOBMI-NEXT: # %bb.2: 1495; X86-NOBMI-NEXT: movl %esi, %eax 1496; X86-NOBMI-NEXT: jmp .LBB22_3 1497; X86-NOBMI-NEXT: .LBB22_1: 1498; X86-NOBMI-NEXT: movl %esi, %edx 1499; X86-NOBMI-NEXT: .LBB22_3: 1500; X86-NOBMI-NEXT: notl %edx 1501; X86-NOBMI-NEXT: notl %eax 1502; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 1503; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx 1504; X86-NOBMI-NEXT: popl %esi 1505; X86-NOBMI-NEXT: retl 1506; 1507; X86-BMI1-LABEL: bzhi64_b1_indexzext: 1508; X86-BMI1: # %bb.0: 1509; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1510; X86-BMI1-NEXT: movl $-1, %edx 1511; X86-BMI1-NEXT: movl $-1, %eax 1512; X86-BMI1-NEXT: shll %cl, %eax 1513; X86-BMI1-NEXT: testb $32, %cl 1514; X86-BMI1-NEXT: je .LBB22_2 1515; X86-BMI1-NEXT: # %bb.1: 1516; X86-BMI1-NEXT: movl %eax, %edx 1517; X86-BMI1-NEXT: xorl %eax, %eax 1518; X86-BMI1-NEXT: .LBB22_2: 1519; X86-BMI1-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax 1520; X86-BMI1-NEXT: andnl {{[0-9]+}}(%esp), %edx, %edx 1521; X86-BMI1-NEXT: retl 1522; 1523; X86-BMI2-LABEL: bzhi64_b1_indexzext: 1524; X86-BMI2: # %bb.0: 1525; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %edx 1526; X86-BMI2-NEXT: movl $-1, %ecx 1527; X86-BMI2-NEXT: shlxl %edx, %ecx, %eax 1528; X86-BMI2-NEXT: testb $32, %dl 1529; X86-BMI2-NEXT: je .LBB22_2 1530; X86-BMI2-NEXT: # %bb.1: 1531; X86-BMI2-NEXT: movl %eax, %ecx 1532; X86-BMI2-NEXT: xorl %eax, %eax 1533; X86-BMI2-NEXT: .LBB22_2: 1534; X86-BMI2-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax 1535; X86-BMI2-NEXT: andnl {{[0-9]+}}(%esp), %ecx, %edx 1536; X86-BMI2-NEXT: retl 1537; 1538; X64-NOBMI-LABEL: bzhi64_b1_indexzext: 1539; X64-NOBMI: # %bb.0: 1540; X64-NOBMI-NEXT: movl %esi, %ecx 1541; X64-NOBMI-NEXT: movq $-1, %rax 1542; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 1543; X64-NOBMI-NEXT: shlq %cl, %rax 1544; X64-NOBMI-NEXT: notq %rax 1545; X64-NOBMI-NEXT: andq %rdi, %rax 1546; X64-NOBMI-NEXT: retq 1547; 1548; X64-BMI1-LABEL: bzhi64_b1_indexzext: 1549; X64-BMI1: # %bb.0: 1550; X64-BMI1-NEXT: # kill: def $esi killed $esi def $rsi 1551; X64-BMI1-NEXT: shll $8, %esi 1552; X64-BMI1-NEXT: bextrq %rsi, %rdi, %rax 1553; X64-BMI1-NEXT: retq 1554; 1555; X64-BMI2-LABEL: bzhi64_b1_indexzext: 1556; X64-BMI2: # %bb.0: 1557; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi 1558; X64-BMI2-NEXT: bzhiq %rsi, %rdi, %rax 1559; X64-BMI2-NEXT: retq 1560 %conv = zext i8 %numlowbits to i64 1561 %notmask = shl i64 -1, %conv 1562 %mask = xor i64 %notmask, -1 1563 %masked = and i64 %mask, %val 1564 ret i64 %masked 1565} 1566 1567define i64 @bzhi64_b2_load(ptr %w, i64 %numlowbits) nounwind { 1568; X86-NOBMI-LABEL: bzhi64_b2_load: 1569; X86-NOBMI: # %bb.0: 1570; X86-NOBMI-NEXT: pushl %edi 1571; X86-NOBMI-NEXT: pushl %esi 1572; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi 1573; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1574; X86-NOBMI-NEXT: movl $-1, %edx 1575; X86-NOBMI-NEXT: movl $-1, %edi 1576; X86-NOBMI-NEXT: shll %cl, %edi 1577; X86-NOBMI-NEXT: xorl %eax, %eax 1578; X86-NOBMI-NEXT: testb $32, %cl 1579; X86-NOBMI-NEXT: jne .LBB23_1 1580; X86-NOBMI-NEXT: # %bb.2: 1581; X86-NOBMI-NEXT: movl %edi, %eax 1582; X86-NOBMI-NEXT: jmp .LBB23_3 1583; X86-NOBMI-NEXT: .LBB23_1: 1584; X86-NOBMI-NEXT: movl %edi, %edx 1585; X86-NOBMI-NEXT: .LBB23_3: 1586; X86-NOBMI-NEXT: notl %edx 1587; X86-NOBMI-NEXT: notl %eax 1588; X86-NOBMI-NEXT: andl (%esi), %eax 1589; X86-NOBMI-NEXT: andl 4(%esi), %edx 1590; X86-NOBMI-NEXT: popl %esi 1591; X86-NOBMI-NEXT: popl %edi 1592; X86-NOBMI-NEXT: retl 1593; 1594; X86-BMI1-LABEL: bzhi64_b2_load: 1595; X86-BMI1: # %bb.0: 1596; X86-BMI1-NEXT: pushl %esi 1597; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx 1598; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1599; X86-BMI1-NEXT: movl $-1, %esi 1600; X86-BMI1-NEXT: movl $-1, %eax 1601; X86-BMI1-NEXT: shll %cl, %eax 1602; X86-BMI1-NEXT: testb $32, %cl 1603; X86-BMI1-NEXT: je .LBB23_2 1604; X86-BMI1-NEXT: # %bb.1: 1605; X86-BMI1-NEXT: movl %eax, %esi 1606; X86-BMI1-NEXT: xorl %eax, %eax 1607; X86-BMI1-NEXT: .LBB23_2: 1608; X86-BMI1-NEXT: andnl (%edx), %eax, %eax 1609; X86-BMI1-NEXT: andnl 4(%edx), %esi, %edx 1610; X86-BMI1-NEXT: popl %esi 1611; X86-BMI1-NEXT: retl 1612; 1613; X86-BMI2-LABEL: bzhi64_b2_load: 1614; X86-BMI2: # %bb.0: 1615; X86-BMI2-NEXT: pushl %ebx 1616; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx 1617; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx 1618; X86-BMI2-NEXT: movl $-1, %edx 1619; X86-BMI2-NEXT: shlxl %ebx, %edx, %eax 1620; X86-BMI2-NEXT: testb $32, %bl 1621; X86-BMI2-NEXT: je .LBB23_2 1622; X86-BMI2-NEXT: # %bb.1: 1623; X86-BMI2-NEXT: movl %eax, %edx 1624; X86-BMI2-NEXT: xorl %eax, %eax 1625; X86-BMI2-NEXT: .LBB23_2: 1626; X86-BMI2-NEXT: andnl (%ecx), %eax, %eax 1627; X86-BMI2-NEXT: andnl 4(%ecx), %edx, %edx 1628; X86-BMI2-NEXT: popl %ebx 1629; X86-BMI2-NEXT: retl 1630; 1631; X64-NOBMI-LABEL: bzhi64_b2_load: 1632; X64-NOBMI: # %bb.0: 1633; X64-NOBMI-NEXT: movq %rsi, %rcx 1634; X64-NOBMI-NEXT: movq $-1, %rax 1635; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx 1636; X64-NOBMI-NEXT: shlq %cl, %rax 1637; X64-NOBMI-NEXT: notq %rax 1638; X64-NOBMI-NEXT: andq (%rdi), %rax 1639; X64-NOBMI-NEXT: retq 1640; 1641; X64-BMI1-LABEL: bzhi64_b2_load: 1642; X64-BMI1: # %bb.0: 1643; X64-BMI1-NEXT: shll $8, %esi 1644; X64-BMI1-NEXT: bextrq %rsi, (%rdi), %rax 1645; X64-BMI1-NEXT: retq 1646; 1647; X64-BMI2-LABEL: bzhi64_b2_load: 1648; X64-BMI2: # %bb.0: 1649; X64-BMI2-NEXT: bzhiq %rsi, (%rdi), %rax 1650; X64-BMI2-NEXT: retq 1651 %val = load i64, ptr %w 1652 %notmask = shl i64 -1, %numlowbits 1653 %mask = xor i64 %notmask, -1 1654 %masked = and i64 %mask, %val 1655 ret i64 %masked 1656} 1657 1658define i64 @bzhi64_b3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind { 1659; X86-NOBMI-LABEL: bzhi64_b3_load_indexzext: 1660; X86-NOBMI: # %bb.0: 1661; X86-NOBMI-NEXT: pushl %edi 1662; X86-NOBMI-NEXT: pushl %esi 1663; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi 1664; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1665; X86-NOBMI-NEXT: movl $-1, %edx 1666; X86-NOBMI-NEXT: movl $-1, %edi 1667; X86-NOBMI-NEXT: shll %cl, %edi 1668; X86-NOBMI-NEXT: xorl %eax, %eax 1669; X86-NOBMI-NEXT: testb $32, %cl 1670; X86-NOBMI-NEXT: jne .LBB24_1 1671; X86-NOBMI-NEXT: # %bb.2: 1672; X86-NOBMI-NEXT: movl %edi, %eax 1673; X86-NOBMI-NEXT: jmp .LBB24_3 1674; X86-NOBMI-NEXT: .LBB24_1: 1675; X86-NOBMI-NEXT: movl %edi, %edx 1676; X86-NOBMI-NEXT: .LBB24_3: 1677; X86-NOBMI-NEXT: notl %edx 1678; X86-NOBMI-NEXT: notl %eax 1679; X86-NOBMI-NEXT: andl (%esi), %eax 1680; X86-NOBMI-NEXT: andl 4(%esi), %edx 1681; X86-NOBMI-NEXT: popl %esi 1682; X86-NOBMI-NEXT: popl %edi 1683; X86-NOBMI-NEXT: retl 1684; 1685; X86-BMI1-LABEL: bzhi64_b3_load_indexzext: 1686; X86-BMI1: # %bb.0: 1687; X86-BMI1-NEXT: pushl %esi 1688; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx 1689; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1690; X86-BMI1-NEXT: movl $-1, %esi 1691; X86-BMI1-NEXT: movl $-1, %eax 1692; X86-BMI1-NEXT: shll %cl, %eax 1693; X86-BMI1-NEXT: testb $32, %cl 1694; X86-BMI1-NEXT: je .LBB24_2 1695; X86-BMI1-NEXT: # %bb.1: 1696; X86-BMI1-NEXT: movl %eax, %esi 1697; X86-BMI1-NEXT: xorl %eax, %eax 1698; X86-BMI1-NEXT: .LBB24_2: 1699; X86-BMI1-NEXT: andnl (%edx), %eax, %eax 1700; X86-BMI1-NEXT: andnl 4(%edx), %esi, %edx 1701; X86-BMI1-NEXT: popl %esi 1702; X86-BMI1-NEXT: retl 1703; 1704; X86-BMI2-LABEL: bzhi64_b3_load_indexzext: 1705; X86-BMI2: # %bb.0: 1706; X86-BMI2-NEXT: pushl %ebx 1707; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx 1708; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx 1709; X86-BMI2-NEXT: movl $-1, %edx 1710; X86-BMI2-NEXT: shlxl %ebx, %edx, %eax 1711; X86-BMI2-NEXT: testb $32, %bl 1712; X86-BMI2-NEXT: je .LBB24_2 1713; X86-BMI2-NEXT: # %bb.1: 1714; X86-BMI2-NEXT: movl %eax, %edx 1715; X86-BMI2-NEXT: xorl %eax, %eax 1716; X86-BMI2-NEXT: .LBB24_2: 1717; X86-BMI2-NEXT: andnl (%ecx), %eax, %eax 1718; X86-BMI2-NEXT: andnl 4(%ecx), %edx, %edx 1719; X86-BMI2-NEXT: popl %ebx 1720; X86-BMI2-NEXT: retl 1721; 1722; X64-NOBMI-LABEL: bzhi64_b3_load_indexzext: 1723; X64-NOBMI: # %bb.0: 1724; X64-NOBMI-NEXT: movl %esi, %ecx 1725; X64-NOBMI-NEXT: movq $-1, %rax 1726; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 1727; X64-NOBMI-NEXT: shlq %cl, %rax 1728; X64-NOBMI-NEXT: notq %rax 1729; X64-NOBMI-NEXT: andq (%rdi), %rax 1730; X64-NOBMI-NEXT: retq 1731; 1732; X64-BMI1-LABEL: bzhi64_b3_load_indexzext: 1733; X64-BMI1: # %bb.0: 1734; X64-BMI1-NEXT: # kill: def $esi killed $esi def $rsi 1735; X64-BMI1-NEXT: shll $8, %esi 1736; X64-BMI1-NEXT: bextrq %rsi, (%rdi), %rax 1737; X64-BMI1-NEXT: retq 1738; 1739; X64-BMI2-LABEL: bzhi64_b3_load_indexzext: 1740; X64-BMI2: # %bb.0: 1741; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi 1742; X64-BMI2-NEXT: bzhiq %rsi, (%rdi), %rax 1743; X64-BMI2-NEXT: retq 1744 %val = load i64, ptr %w 1745 %conv = zext i8 %numlowbits to i64 1746 %notmask = shl i64 -1, %conv 1747 %mask = xor i64 %notmask, -1 1748 %masked = and i64 %mask, %val 1749 ret i64 %masked 1750} 1751 1752define i64 @bzhi64_b4_commutative(i64 %val, i64 %numlowbits) nounwind { 1753; X86-NOBMI-LABEL: bzhi64_b4_commutative: 1754; X86-NOBMI: # %bb.0: 1755; X86-NOBMI-NEXT: pushl %esi 1756; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1757; X86-NOBMI-NEXT: movl $-1, %edx 1758; X86-NOBMI-NEXT: movl $-1, %esi 1759; X86-NOBMI-NEXT: shll %cl, %esi 1760; X86-NOBMI-NEXT: xorl %eax, %eax 1761; X86-NOBMI-NEXT: testb $32, %cl 1762; X86-NOBMI-NEXT: jne .LBB25_1 1763; X86-NOBMI-NEXT: # %bb.2: 1764; X86-NOBMI-NEXT: movl %esi, %eax 1765; X86-NOBMI-NEXT: jmp .LBB25_3 1766; X86-NOBMI-NEXT: .LBB25_1: 1767; X86-NOBMI-NEXT: movl %esi, %edx 1768; X86-NOBMI-NEXT: .LBB25_3: 1769; X86-NOBMI-NEXT: notl %edx 1770; X86-NOBMI-NEXT: notl %eax 1771; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 1772; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx 1773; X86-NOBMI-NEXT: popl %esi 1774; X86-NOBMI-NEXT: retl 1775; 1776; X86-BMI1-LABEL: bzhi64_b4_commutative: 1777; X86-BMI1: # %bb.0: 1778; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1779; X86-BMI1-NEXT: movl $-1, %edx 1780; X86-BMI1-NEXT: movl $-1, %eax 1781; X86-BMI1-NEXT: shll %cl, %eax 1782; X86-BMI1-NEXT: testb $32, %cl 1783; X86-BMI1-NEXT: je .LBB25_2 1784; X86-BMI1-NEXT: # %bb.1: 1785; X86-BMI1-NEXT: movl %eax, %edx 1786; X86-BMI1-NEXT: xorl %eax, %eax 1787; X86-BMI1-NEXT: .LBB25_2: 1788; X86-BMI1-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax 1789; X86-BMI1-NEXT: andnl {{[0-9]+}}(%esp), %edx, %edx 1790; X86-BMI1-NEXT: retl 1791; 1792; X86-BMI2-LABEL: bzhi64_b4_commutative: 1793; X86-BMI2: # %bb.0: 1794; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %edx 1795; X86-BMI2-NEXT: movl $-1, %ecx 1796; X86-BMI2-NEXT: shlxl %edx, %ecx, %eax 1797; X86-BMI2-NEXT: testb $32, %dl 1798; X86-BMI2-NEXT: je .LBB25_2 1799; X86-BMI2-NEXT: # %bb.1: 1800; X86-BMI2-NEXT: movl %eax, %ecx 1801; X86-BMI2-NEXT: xorl %eax, %eax 1802; X86-BMI2-NEXT: .LBB25_2: 1803; X86-BMI2-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax 1804; X86-BMI2-NEXT: andnl {{[0-9]+}}(%esp), %ecx, %edx 1805; X86-BMI2-NEXT: retl 1806; 1807; X64-NOBMI-LABEL: bzhi64_b4_commutative: 1808; X64-NOBMI: # %bb.0: 1809; X64-NOBMI-NEXT: movq %rsi, %rcx 1810; X64-NOBMI-NEXT: movq $-1, %rax 1811; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx 1812; X64-NOBMI-NEXT: shlq %cl, %rax 1813; X64-NOBMI-NEXT: notq %rax 1814; X64-NOBMI-NEXT: andq %rdi, %rax 1815; X64-NOBMI-NEXT: retq 1816; 1817; X64-BMI1-LABEL: bzhi64_b4_commutative: 1818; X64-BMI1: # %bb.0: 1819; X64-BMI1-NEXT: shll $8, %esi 1820; X64-BMI1-NEXT: bextrq %rsi, %rdi, %rax 1821; X64-BMI1-NEXT: retq 1822; 1823; X64-BMI2-LABEL: bzhi64_b4_commutative: 1824; X64-BMI2: # %bb.0: 1825; X64-BMI2-NEXT: bzhiq %rsi, %rdi, %rax 1826; X64-BMI2-NEXT: retq 1827 %notmask = shl i64 -1, %numlowbits 1828 %mask = xor i64 %notmask, -1 1829 %masked = and i64 %val, %mask ; swapped order 1830 ret i64 %masked 1831} 1832 1833; 64-bit, but with 32-bit output 1834 1835; Everything done in 64-bit, truncation happens last. 1836define i32 @bzhi64_32_b0(i64 %val, i8 %numlowbits) nounwind { 1837; X86-NOBMI-LABEL: bzhi64_32_b0: 1838; X86-NOBMI: # %bb.0: 1839; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1840; X86-NOBMI-NEXT: movl $-1, %edx 1841; X86-NOBMI-NEXT: shll %cl, %edx 1842; X86-NOBMI-NEXT: xorl %eax, %eax 1843; X86-NOBMI-NEXT: testb $32, %cl 1844; X86-NOBMI-NEXT: jne .LBB26_2 1845; X86-NOBMI-NEXT: # %bb.1: 1846; X86-NOBMI-NEXT: movl %edx, %eax 1847; X86-NOBMI-NEXT: .LBB26_2: 1848; X86-NOBMI-NEXT: notl %eax 1849; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 1850; X86-NOBMI-NEXT: retl 1851; 1852; X86-BMI1-LABEL: bzhi64_32_b0: 1853; X86-BMI1: # %bb.0: 1854; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1855; X86-BMI1-NEXT: movl $-1, %eax 1856; X86-BMI1-NEXT: shll %cl, %eax 1857; X86-BMI1-NEXT: xorl %edx, %edx 1858; X86-BMI1-NEXT: testb $32, %cl 1859; X86-BMI1-NEXT: jne .LBB26_2 1860; X86-BMI1-NEXT: # %bb.1: 1861; X86-BMI1-NEXT: movl %eax, %edx 1862; X86-BMI1-NEXT: .LBB26_2: 1863; X86-BMI1-NEXT: andnl {{[0-9]+}}(%esp), %edx, %eax 1864; X86-BMI1-NEXT: retl 1865; 1866; X86-BMI2-LABEL: bzhi64_32_b0: 1867; X86-BMI2: # %bb.0: 1868; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1869; X86-BMI2-NEXT: xorl %ecx, %ecx 1870; X86-BMI2-NEXT: testb $32, %al 1871; X86-BMI2-NEXT: jne .LBB26_2 1872; X86-BMI2-NEXT: # %bb.1: 1873; X86-BMI2-NEXT: movl $-1, %ecx 1874; X86-BMI2-NEXT: shlxl %eax, %ecx, %ecx 1875; X86-BMI2-NEXT: .LBB26_2: 1876; X86-BMI2-NEXT: andnl {{[0-9]+}}(%esp), %ecx, %eax 1877; X86-BMI2-NEXT: retl 1878; 1879; X64-NOBMI-LABEL: bzhi64_32_b0: 1880; X64-NOBMI: # %bb.0: 1881; X64-NOBMI-NEXT: movl %esi, %ecx 1882; X64-NOBMI-NEXT: movq $-1, %rax 1883; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 1884; X64-NOBMI-NEXT: shlq %cl, %rax 1885; X64-NOBMI-NEXT: notl %eax 1886; X64-NOBMI-NEXT: andl %edi, %eax 1887; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax 1888; X64-NOBMI-NEXT: retq 1889; 1890; X64-BMI1-LABEL: bzhi64_32_b0: 1891; X64-BMI1: # %bb.0: 1892; X64-BMI1-NEXT: shll $8, %esi 1893; X64-BMI1-NEXT: bextrl %esi, %edi, %eax 1894; X64-BMI1-NEXT: retq 1895; 1896; X64-BMI2-LABEL: bzhi64_32_b0: 1897; X64-BMI2: # %bb.0: 1898; X64-BMI2-NEXT: bzhil %esi, %edi, %eax 1899; X64-BMI2-NEXT: retq 1900 %widenumlowbits = zext i8 %numlowbits to i64 1901 %notmask = shl nsw i64 -1, %widenumlowbits 1902 %mask = xor i64 %notmask, -1 1903 %wideres = and i64 %val, %mask 1904 %res = trunc i64 %wideres to i32 1905 ret i32 %res 1906} 1907 1908; Shifting happens in 64-bit, then truncation. Masking is 32-bit. 1909define i32 @bzhi64_32_b1(i64 %val, i8 %numlowbits) nounwind { 1910; X86-NOBMI-LABEL: bzhi64_32_b1: 1911; X86-NOBMI: # %bb.0: 1912; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1913; X86-NOBMI-NEXT: movl $-1, %eax 1914; X86-NOBMI-NEXT: shll %cl, %eax 1915; X86-NOBMI-NEXT: notl %eax 1916; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 1917; X86-NOBMI-NEXT: retl 1918; 1919; X86-BMI1-LABEL: bzhi64_32_b1: 1920; X86-BMI1: # %bb.0: 1921; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1922; X86-BMI1-NEXT: shll $8, %eax 1923; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax 1924; X86-BMI1-NEXT: retl 1925; 1926; X86-BMI2-LABEL: bzhi64_32_b1: 1927; X86-BMI2: # %bb.0: 1928; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1929; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax 1930; X86-BMI2-NEXT: retl 1931; 1932; X64-NOBMI-LABEL: bzhi64_32_b1: 1933; X64-NOBMI: # %bb.0: 1934; X64-NOBMI-NEXT: movl %esi, %ecx 1935; X64-NOBMI-NEXT: movl $-1, %eax 1936; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 1937; X64-NOBMI-NEXT: shll %cl, %eax 1938; X64-NOBMI-NEXT: notl %eax 1939; X64-NOBMI-NEXT: andl %edi, %eax 1940; X64-NOBMI-NEXT: retq 1941; 1942; X64-BMI1-LABEL: bzhi64_32_b1: 1943; X64-BMI1: # %bb.0: 1944; X64-BMI1-NEXT: shll $8, %esi 1945; X64-BMI1-NEXT: bextrl %esi, %edi, %eax 1946; X64-BMI1-NEXT: retq 1947; 1948; X64-BMI2-LABEL: bzhi64_32_b1: 1949; X64-BMI2: # %bb.0: 1950; X64-BMI2-NEXT: bzhil %esi, %edi, %eax 1951; X64-BMI2-NEXT: retq 1952 %truncval = trunc i64 %val to i32 1953 %widenumlowbits = zext i8 %numlowbits to i32 1954 %notmask = shl nsw i32 -1, %widenumlowbits 1955 %mask = xor i32 %notmask, -1 1956 %res = and i32 %truncval, %mask 1957 ret i32 %res 1958} 1959 1960; Shifting happens in 64-bit. Mask is 32-bit, but extended to 64-bit. 1961; Masking is 64-bit. Then truncation. 1962define i32 @bzhi64_32_b2(i64 %val, i8 %numlowbits) nounwind { 1963; X86-NOBMI-LABEL: bzhi64_32_b2: 1964; X86-NOBMI: # %bb.0: 1965; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1966; X86-NOBMI-NEXT: movl $-1, %eax 1967; X86-NOBMI-NEXT: shll %cl, %eax 1968; X86-NOBMI-NEXT: notl %eax 1969; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 1970; X86-NOBMI-NEXT: retl 1971; 1972; X86-BMI1-LABEL: bzhi64_32_b2: 1973; X86-BMI1: # %bb.0: 1974; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1975; X86-BMI1-NEXT: shll $8, %eax 1976; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax 1977; X86-BMI1-NEXT: retl 1978; 1979; X86-BMI2-LABEL: bzhi64_32_b2: 1980; X86-BMI2: # %bb.0: 1981; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax 1982; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax 1983; X86-BMI2-NEXT: retl 1984; 1985; X64-NOBMI-LABEL: bzhi64_32_b2: 1986; X64-NOBMI: # %bb.0: 1987; X64-NOBMI-NEXT: movl %esi, %ecx 1988; X64-NOBMI-NEXT: movl $-1, %eax 1989; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 1990; X64-NOBMI-NEXT: shll %cl, %eax 1991; X64-NOBMI-NEXT: notl %eax 1992; X64-NOBMI-NEXT: andl %edi, %eax 1993; X64-NOBMI-NEXT: retq 1994; 1995; X64-BMI1-LABEL: bzhi64_32_b2: 1996; X64-BMI1: # %bb.0: 1997; X64-BMI1-NEXT: shll $8, %esi 1998; X64-BMI1-NEXT: bextrl %esi, %edi, %eax 1999; X64-BMI1-NEXT: retq 2000; 2001; X64-BMI2-LABEL: bzhi64_32_b2: 2002; X64-BMI2: # %bb.0: 2003; X64-BMI2-NEXT: bzhil %esi, %edi, %eax 2004; X64-BMI2-NEXT: retq 2005 %widenumlowbits = zext i8 %numlowbits to i32 2006 %notmask = shl nsw i32 -1, %widenumlowbits 2007 %mask = xor i32 %notmask, -1 2008 %zextmask = zext i32 %mask to i64 2009 %wideres = and i64 %val, %zextmask 2010 %res = trunc i64 %wideres to i32 2011 ret i32 %res 2012} 2013 2014; Shifting happens in 64-bit. Mask is 32-bit, but calculated in 64-bit. 2015; Masking is 64-bit. Then truncation. 2016define i32 @bzhi64_32_b3(i64 %val, i8 %numlowbits) nounwind { 2017; X86-NOBMI-LABEL: bzhi64_32_b3: 2018; X86-NOBMI: # %bb.0: 2019; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 2020; X86-NOBMI-NEXT: movl $-1, %edx 2021; X86-NOBMI-NEXT: shll %cl, %edx 2022; X86-NOBMI-NEXT: xorl %eax, %eax 2023; X86-NOBMI-NEXT: testb $32, %cl 2024; X86-NOBMI-NEXT: jne .LBB29_2 2025; X86-NOBMI-NEXT: # %bb.1: 2026; X86-NOBMI-NEXT: movl %edx, %eax 2027; X86-NOBMI-NEXT: .LBB29_2: 2028; X86-NOBMI-NEXT: notl %eax 2029; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 2030; X86-NOBMI-NEXT: retl 2031; 2032; X86-BMI1-LABEL: bzhi64_32_b3: 2033; X86-BMI1: # %bb.0: 2034; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 2035; X86-BMI1-NEXT: movl $-1, %eax 2036; X86-BMI1-NEXT: shll %cl, %eax 2037; X86-BMI1-NEXT: xorl %edx, %edx 2038; X86-BMI1-NEXT: testb $32, %cl 2039; X86-BMI1-NEXT: jne .LBB29_2 2040; X86-BMI1-NEXT: # %bb.1: 2041; X86-BMI1-NEXT: movl %eax, %edx 2042; X86-BMI1-NEXT: .LBB29_2: 2043; X86-BMI1-NEXT: andnl {{[0-9]+}}(%esp), %edx, %eax 2044; X86-BMI1-NEXT: retl 2045; 2046; X86-BMI2-LABEL: bzhi64_32_b3: 2047; X86-BMI2: # %bb.0: 2048; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax 2049; X86-BMI2-NEXT: xorl %ecx, %ecx 2050; X86-BMI2-NEXT: testb $32, %al 2051; X86-BMI2-NEXT: jne .LBB29_2 2052; X86-BMI2-NEXT: # %bb.1: 2053; X86-BMI2-NEXT: movl $-1, %ecx 2054; X86-BMI2-NEXT: shlxl %eax, %ecx, %ecx 2055; X86-BMI2-NEXT: .LBB29_2: 2056; X86-BMI2-NEXT: andnl {{[0-9]+}}(%esp), %ecx, %eax 2057; X86-BMI2-NEXT: retl 2058; 2059; X64-NOBMI-LABEL: bzhi64_32_b3: 2060; X64-NOBMI: # %bb.0: 2061; X64-NOBMI-NEXT: movl %esi, %ecx 2062; X64-NOBMI-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF 2063; X64-NOBMI-NEXT: movl $4294967295, %edx # imm = 0xFFFFFFFF 2064; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 2065; X64-NOBMI-NEXT: shlq %cl, %rdx 2066; X64-NOBMI-NEXT: xorl %edx, %eax 2067; X64-NOBMI-NEXT: andl %edi, %eax 2068; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax 2069; X64-NOBMI-NEXT: retq 2070; 2071; X64-BMI1-LABEL: bzhi64_32_b3: 2072; X64-BMI1: # %bb.0: 2073; X64-BMI1-NEXT: shll $8, %esi 2074; X64-BMI1-NEXT: bextrl %esi, %edi, %eax 2075; X64-BMI1-NEXT: retq 2076; 2077; X64-BMI2-LABEL: bzhi64_32_b3: 2078; X64-BMI2: # %bb.0: 2079; X64-BMI2-NEXT: bzhil %esi, %edi, %eax 2080; X64-BMI2-NEXT: retq 2081 %widenumlowbits = zext i8 %numlowbits to i64 2082 %notmask = shl nsw i64 4294967295, %widenumlowbits 2083 %mask = xor i64 %notmask, 4294967295 2084 %wideres = and i64 %val, %mask 2085 %res = trunc i64 %wideres to i32 2086 ret i32 %res 2087} 2088 2089; ---------------------------------------------------------------------------- ; 2090; Pattern c. 32-bit 2091; ---------------------------------------------------------------------------- ; 2092 2093define i32 @bzhi32_c0(i32 %val, i32 %numlowbits, ptr %escape) nounwind { 2094; X86-NOBMI-LABEL: bzhi32_c0: 2095; X86-NOBMI: # %bb.0: 2096; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx 2097; X86-NOBMI-NEXT: xorl %ecx, %ecx 2098; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 2099; X86-NOBMI-NEXT: movl $-1, %eax 2100; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 2101; X86-NOBMI-NEXT: shrl %cl, %eax 2102; X86-NOBMI-NEXT: movl %eax, (%edx) 2103; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 2104; X86-NOBMI-NEXT: retl 2105; 2106; X86-BMI1-LABEL: bzhi32_c0: 2107; X86-BMI1: # %bb.0: 2108; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx 2109; X86-BMI1-NEXT: xorl %ecx, %ecx 2110; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl 2111; X86-BMI1-NEXT: movl $-1, %eax 2112; X86-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx 2113; X86-BMI1-NEXT: shrl %cl, %eax 2114; X86-BMI1-NEXT: movl %eax, (%edx) 2115; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %eax 2116; X86-BMI1-NEXT: retl 2117; 2118; X86-BMI2-LABEL: bzhi32_c0: 2119; X86-BMI2: # %bb.0: 2120; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 2121; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 2122; X86-BMI2-NEXT: movl $-1, %edx 2123; X86-BMI2-NEXT: bzhil %ecx, %edx, %edx 2124; X86-BMI2-NEXT: movl %edx, (%eax) 2125; X86-BMI2-NEXT: bzhil %ecx, {{[0-9]+}}(%esp), %eax 2126; X86-BMI2-NEXT: retl 2127; 2128; X64-NOBMI-LABEL: bzhi32_c0: 2129; X64-NOBMI: # %bb.0: 2130; X64-NOBMI-NEXT: movl %esi, %ecx 2131; X64-NOBMI-NEXT: negb %cl 2132; X64-NOBMI-NEXT: movl $-1, %eax 2133; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 2134; X64-NOBMI-NEXT: shrl %cl, %eax 2135; X64-NOBMI-NEXT: movl %eax, (%rdx) 2136; X64-NOBMI-NEXT: andl %edi, %eax 2137; X64-NOBMI-NEXT: retq 2138; 2139; X64-BMI1-LABEL: bzhi32_c0: 2140; X64-BMI1: # %bb.0: 2141; X64-BMI1-NEXT: movl %esi, %ecx 2142; X64-BMI1-NEXT: negb %cl 2143; X64-BMI1-NEXT: movl $-1, %eax 2144; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx 2145; X64-BMI1-NEXT: shrl %cl, %eax 2146; X64-BMI1-NEXT: movl %eax, (%rdx) 2147; X64-BMI1-NEXT: andl %edi, %eax 2148; X64-BMI1-NEXT: retq 2149; 2150; X64-BMI2-LABEL: bzhi32_c0: 2151; X64-BMI2: # %bb.0: 2152; X64-BMI2-NEXT: movl $-1, %eax 2153; X64-BMI2-NEXT: bzhil %esi, %eax, %eax 2154; X64-BMI2-NEXT: movl %eax, (%rdx) 2155; X64-BMI2-NEXT: bzhil %esi, %edi, %eax 2156; X64-BMI2-NEXT: retq 2157 %numhighbits = sub i32 32, %numlowbits 2158 %mask = lshr i32 -1, %numhighbits 2159 store i32 %mask, ptr %escape 2160 %masked = and i32 %mask, %val 2161 ret i32 %masked 2162} 2163 2164define i32 @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits, ptr %escape) nounwind { 2165; X86-NOBMI-LABEL: bzhi32_c1_indexzext: 2166; X86-NOBMI: # %bb.0: 2167; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx 2168; X86-NOBMI-NEXT: xorl %ecx, %ecx 2169; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 2170; X86-NOBMI-NEXT: movl $-1, %eax 2171; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 2172; X86-NOBMI-NEXT: shrl %cl, %eax 2173; X86-NOBMI-NEXT: movl %eax, (%edx) 2174; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 2175; X86-NOBMI-NEXT: retl 2176; 2177; X86-BMI1-LABEL: bzhi32_c1_indexzext: 2178; X86-BMI1: # %bb.0: 2179; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx 2180; X86-BMI1-NEXT: xorl %ecx, %ecx 2181; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl 2182; X86-BMI1-NEXT: movl $-1, %eax 2183; X86-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx 2184; X86-BMI1-NEXT: shrl %cl, %eax 2185; X86-BMI1-NEXT: movl %eax, (%edx) 2186; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %eax 2187; X86-BMI1-NEXT: retl 2188; 2189; X86-BMI2-LABEL: bzhi32_c1_indexzext: 2190; X86-BMI2: # %bb.0: 2191; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 2192; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 2193; X86-BMI2-NEXT: movl $-1, %edx 2194; X86-BMI2-NEXT: bzhil %ecx, %edx, %edx 2195; X86-BMI2-NEXT: movl %edx, (%eax) 2196; X86-BMI2-NEXT: bzhil %ecx, {{[0-9]+}}(%esp), %eax 2197; X86-BMI2-NEXT: retl 2198; 2199; X64-NOBMI-LABEL: bzhi32_c1_indexzext: 2200; X64-NOBMI: # %bb.0: 2201; X64-NOBMI-NEXT: movl %esi, %ecx 2202; X64-NOBMI-NEXT: negb %cl 2203; X64-NOBMI-NEXT: movl $-1, %eax 2204; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 2205; X64-NOBMI-NEXT: shrl %cl, %eax 2206; X64-NOBMI-NEXT: movl %eax, (%rdx) 2207; X64-NOBMI-NEXT: andl %edi, %eax 2208; X64-NOBMI-NEXT: retq 2209; 2210; X64-BMI1-LABEL: bzhi32_c1_indexzext: 2211; X64-BMI1: # %bb.0: 2212; X64-BMI1-NEXT: movl %esi, %ecx 2213; X64-BMI1-NEXT: negb %cl 2214; X64-BMI1-NEXT: movl $-1, %eax 2215; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx 2216; X64-BMI1-NEXT: shrl %cl, %eax 2217; X64-BMI1-NEXT: movl %eax, (%rdx) 2218; X64-BMI1-NEXT: andl %edi, %eax 2219; X64-BMI1-NEXT: retq 2220; 2221; X64-BMI2-LABEL: bzhi32_c1_indexzext: 2222; X64-BMI2: # %bb.0: 2223; X64-BMI2-NEXT: movl $-1, %eax 2224; X64-BMI2-NEXT: bzhil %esi, %eax, %eax 2225; X64-BMI2-NEXT: movl %eax, (%rdx) 2226; X64-BMI2-NEXT: bzhil %esi, %edi, %eax 2227; X64-BMI2-NEXT: retq 2228 %numhighbits = sub i8 32, %numlowbits 2229 %sh_prom = zext i8 %numhighbits to i32 2230 %mask = lshr i32 -1, %sh_prom 2231 store i32 %mask, ptr %escape 2232 %masked = and i32 %mask, %val 2233 ret i32 %masked 2234} 2235 2236define i32 @bzhi32_c2_load(ptr %w, i32 %numlowbits, ptr %escape) nounwind { 2237; X86-NOBMI-LABEL: bzhi32_c2_load: 2238; X86-NOBMI: # %bb.0: 2239; X86-NOBMI-NEXT: pushl %esi 2240; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx 2241; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 2242; X86-NOBMI-NEXT: xorl %ecx, %ecx 2243; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 2244; X86-NOBMI-NEXT: movl $-1, %esi 2245; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 2246; X86-NOBMI-NEXT: shrl %cl, %esi 2247; X86-NOBMI-NEXT: movl (%eax), %eax 2248; X86-NOBMI-NEXT: andl %esi, %eax 2249; X86-NOBMI-NEXT: movl %esi, (%edx) 2250; X86-NOBMI-NEXT: popl %esi 2251; X86-NOBMI-NEXT: retl 2252; 2253; X86-BMI1-LABEL: bzhi32_c2_load: 2254; X86-BMI1: # %bb.0: 2255; X86-BMI1-NEXT: pushl %esi 2256; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx 2257; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax 2258; X86-BMI1-NEXT: xorl %ecx, %ecx 2259; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl 2260; X86-BMI1-NEXT: movl $-1, %esi 2261; X86-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx 2262; X86-BMI1-NEXT: shrl %cl, %esi 2263; X86-BMI1-NEXT: movl (%eax), %eax 2264; X86-BMI1-NEXT: andl %esi, %eax 2265; X86-BMI1-NEXT: movl %esi, (%edx) 2266; X86-BMI1-NEXT: popl %esi 2267; X86-BMI1-NEXT: retl 2268; 2269; X86-BMI2-LABEL: bzhi32_c2_load: 2270; X86-BMI2: # %bb.0: 2271; X86-BMI2-NEXT: pushl %esi 2272; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx 2273; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 2274; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %edx 2275; X86-BMI2-NEXT: movl $-1, %esi 2276; X86-BMI2-NEXT: bzhil %edx, %esi, %esi 2277; X86-BMI2-NEXT: bzhil %edx, (%eax), %eax 2278; X86-BMI2-NEXT: movl %esi, (%ecx) 2279; X86-BMI2-NEXT: popl %esi 2280; X86-BMI2-NEXT: retl 2281; 2282; X64-NOBMI-LABEL: bzhi32_c2_load: 2283; X64-NOBMI: # %bb.0: 2284; X64-NOBMI-NEXT: movl %esi, %ecx 2285; X64-NOBMI-NEXT: negb %cl 2286; X64-NOBMI-NEXT: movl $-1, %esi 2287; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 2288; X64-NOBMI-NEXT: shrl %cl, %esi 2289; X64-NOBMI-NEXT: movl (%rdi), %eax 2290; X64-NOBMI-NEXT: andl %esi, %eax 2291; X64-NOBMI-NEXT: movl %esi, (%rdx) 2292; X64-NOBMI-NEXT: retq 2293; 2294; X64-BMI1-LABEL: bzhi32_c2_load: 2295; X64-BMI1: # %bb.0: 2296; X64-BMI1-NEXT: movl %esi, %ecx 2297; X64-BMI1-NEXT: negb %cl 2298; X64-BMI1-NEXT: movl $-1, %esi 2299; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx 2300; X64-BMI1-NEXT: shrl %cl, %esi 2301; X64-BMI1-NEXT: movl (%rdi), %eax 2302; X64-BMI1-NEXT: andl %esi, %eax 2303; X64-BMI1-NEXT: movl %esi, (%rdx) 2304; X64-BMI1-NEXT: retq 2305; 2306; X64-BMI2-LABEL: bzhi32_c2_load: 2307; X64-BMI2: # %bb.0: 2308; X64-BMI2-NEXT: movl $-1, %eax 2309; X64-BMI2-NEXT: bzhil %esi, %eax, %ecx 2310; X64-BMI2-NEXT: bzhil %esi, (%rdi), %eax 2311; X64-BMI2-NEXT: movl %ecx, (%rdx) 2312; X64-BMI2-NEXT: retq 2313 %val = load i32, ptr %w 2314 %numhighbits = sub i32 32, %numlowbits 2315 %mask = lshr i32 -1, %numhighbits 2316 store i32 %mask, ptr %escape 2317 %masked = and i32 %mask, %val 2318 ret i32 %masked 2319} 2320 2321define i32 @bzhi32_c3_load_indexzext(ptr %w, i8 %numlowbits, ptr %escape) nounwind { 2322; X86-NOBMI-LABEL: bzhi32_c3_load_indexzext: 2323; X86-NOBMI: # %bb.0: 2324; X86-NOBMI-NEXT: pushl %esi 2325; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx 2326; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 2327; X86-NOBMI-NEXT: xorl %ecx, %ecx 2328; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 2329; X86-NOBMI-NEXT: movl $-1, %esi 2330; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 2331; X86-NOBMI-NEXT: shrl %cl, %esi 2332; X86-NOBMI-NEXT: movl (%eax), %eax 2333; X86-NOBMI-NEXT: andl %esi, %eax 2334; X86-NOBMI-NEXT: movl %esi, (%edx) 2335; X86-NOBMI-NEXT: popl %esi 2336; X86-NOBMI-NEXT: retl 2337; 2338; X86-BMI1-LABEL: bzhi32_c3_load_indexzext: 2339; X86-BMI1: # %bb.0: 2340; X86-BMI1-NEXT: pushl %esi 2341; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx 2342; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax 2343; X86-BMI1-NEXT: xorl %ecx, %ecx 2344; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl 2345; X86-BMI1-NEXT: movl $-1, %esi 2346; X86-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx 2347; X86-BMI1-NEXT: shrl %cl, %esi 2348; X86-BMI1-NEXT: movl (%eax), %eax 2349; X86-BMI1-NEXT: andl %esi, %eax 2350; X86-BMI1-NEXT: movl %esi, (%edx) 2351; X86-BMI1-NEXT: popl %esi 2352; X86-BMI1-NEXT: retl 2353; 2354; X86-BMI2-LABEL: bzhi32_c3_load_indexzext: 2355; X86-BMI2: # %bb.0: 2356; X86-BMI2-NEXT: pushl %esi 2357; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx 2358; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 2359; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %edx 2360; X86-BMI2-NEXT: movl $-1, %esi 2361; X86-BMI2-NEXT: bzhil %edx, %esi, %esi 2362; X86-BMI2-NEXT: bzhil %edx, (%eax), %eax 2363; X86-BMI2-NEXT: movl %esi, (%ecx) 2364; X86-BMI2-NEXT: popl %esi 2365; X86-BMI2-NEXT: retl 2366; 2367; X64-NOBMI-LABEL: bzhi32_c3_load_indexzext: 2368; X64-NOBMI: # %bb.0: 2369; X64-NOBMI-NEXT: movl %esi, %ecx 2370; X64-NOBMI-NEXT: negb %cl 2371; X64-NOBMI-NEXT: movl $-1, %esi 2372; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 2373; X64-NOBMI-NEXT: shrl %cl, %esi 2374; X64-NOBMI-NEXT: movl (%rdi), %eax 2375; X64-NOBMI-NEXT: andl %esi, %eax 2376; X64-NOBMI-NEXT: movl %esi, (%rdx) 2377; X64-NOBMI-NEXT: retq 2378; 2379; X64-BMI1-LABEL: bzhi32_c3_load_indexzext: 2380; X64-BMI1: # %bb.0: 2381; X64-BMI1-NEXT: movl %esi, %ecx 2382; X64-BMI1-NEXT: negb %cl 2383; X64-BMI1-NEXT: movl $-1, %esi 2384; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx 2385; X64-BMI1-NEXT: shrl %cl, %esi 2386; X64-BMI1-NEXT: movl (%rdi), %eax 2387; X64-BMI1-NEXT: andl %esi, %eax 2388; X64-BMI1-NEXT: movl %esi, (%rdx) 2389; X64-BMI1-NEXT: retq 2390; 2391; X64-BMI2-LABEL: bzhi32_c3_load_indexzext: 2392; X64-BMI2: # %bb.0: 2393; X64-BMI2-NEXT: movl $-1, %eax 2394; X64-BMI2-NEXT: bzhil %esi, %eax, %ecx 2395; X64-BMI2-NEXT: bzhil %esi, (%rdi), %eax 2396; X64-BMI2-NEXT: movl %ecx, (%rdx) 2397; X64-BMI2-NEXT: retq 2398 %val = load i32, ptr %w 2399 %numhighbits = sub i8 32, %numlowbits 2400 %sh_prom = zext i8 %numhighbits to i32 2401 %mask = lshr i32 -1, %sh_prom 2402 store i32 %mask, ptr %escape 2403 %masked = and i32 %mask, %val 2404 ret i32 %masked 2405} 2406 2407define i32 @bzhi32_c4_commutative(i32 %val, i32 %numlowbits, ptr %escape) nounwind { 2408; X86-NOBMI-LABEL: bzhi32_c4_commutative: 2409; X86-NOBMI: # %bb.0: 2410; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx 2411; X86-NOBMI-NEXT: xorl %ecx, %ecx 2412; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 2413; X86-NOBMI-NEXT: movl $-1, %eax 2414; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 2415; X86-NOBMI-NEXT: shrl %cl, %eax 2416; X86-NOBMI-NEXT: movl %eax, (%edx) 2417; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 2418; X86-NOBMI-NEXT: retl 2419; 2420; X86-BMI1-LABEL: bzhi32_c4_commutative: 2421; X86-BMI1: # %bb.0: 2422; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx 2423; X86-BMI1-NEXT: xorl %ecx, %ecx 2424; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl 2425; X86-BMI1-NEXT: movl $-1, %eax 2426; X86-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx 2427; X86-BMI1-NEXT: shrl %cl, %eax 2428; X86-BMI1-NEXT: movl %eax, (%edx) 2429; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %eax 2430; X86-BMI1-NEXT: retl 2431; 2432; X86-BMI2-LABEL: bzhi32_c4_commutative: 2433; X86-BMI2: # %bb.0: 2434; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 2435; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 2436; X86-BMI2-NEXT: movl $-1, %edx 2437; X86-BMI2-NEXT: bzhil %ecx, %edx, %edx 2438; X86-BMI2-NEXT: movl %edx, (%eax) 2439; X86-BMI2-NEXT: bzhil %ecx, {{[0-9]+}}(%esp), %eax 2440; X86-BMI2-NEXT: retl 2441; 2442; X64-NOBMI-LABEL: bzhi32_c4_commutative: 2443; X64-NOBMI: # %bb.0: 2444; X64-NOBMI-NEXT: movl %esi, %ecx 2445; X64-NOBMI-NEXT: negb %cl 2446; X64-NOBMI-NEXT: movl $-1, %eax 2447; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 2448; X64-NOBMI-NEXT: shrl %cl, %eax 2449; X64-NOBMI-NEXT: movl %eax, (%rdx) 2450; X64-NOBMI-NEXT: andl %edi, %eax 2451; X64-NOBMI-NEXT: retq 2452; 2453; X64-BMI1-LABEL: bzhi32_c4_commutative: 2454; X64-BMI1: # %bb.0: 2455; X64-BMI1-NEXT: movl %esi, %ecx 2456; X64-BMI1-NEXT: negb %cl 2457; X64-BMI1-NEXT: movl $-1, %eax 2458; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx 2459; X64-BMI1-NEXT: shrl %cl, %eax 2460; X64-BMI1-NEXT: movl %eax, (%rdx) 2461; X64-BMI1-NEXT: andl %edi, %eax 2462; X64-BMI1-NEXT: retq 2463; 2464; X64-BMI2-LABEL: bzhi32_c4_commutative: 2465; X64-BMI2: # %bb.0: 2466; X64-BMI2-NEXT: movl $-1, %eax 2467; X64-BMI2-NEXT: bzhil %esi, %eax, %eax 2468; X64-BMI2-NEXT: movl %eax, (%rdx) 2469; X64-BMI2-NEXT: bzhil %esi, %edi, %eax 2470; X64-BMI2-NEXT: retq 2471 %numhighbits = sub i32 32, %numlowbits 2472 %mask = lshr i32 -1, %numhighbits 2473 store i32 %mask, ptr %escape 2474 %masked = and i32 %val, %mask ; swapped order 2475 ret i32 %masked 2476} 2477 2478; 64-bit 2479 2480define i64 @bzhi64_c0(i64 %val, i64 %numlowbits, ptr %escape) nounwind { 2481; X86-NOBMI-LABEL: bzhi64_c0: 2482; X86-NOBMI: # %bb.0: 2483; X86-NOBMI-NEXT: pushl %esi 2484; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi 2485; X86-NOBMI-NEXT: movb $64, %cl 2486; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 2487; X86-NOBMI-NEXT: movl $-1, %eax 2488; X86-NOBMI-NEXT: movl $-1, %edx 2489; X86-NOBMI-NEXT: shrl %cl, %edx 2490; X86-NOBMI-NEXT: testb $32, %cl 2491; X86-NOBMI-NEXT: je .LBB35_2 2492; X86-NOBMI-NEXT: # %bb.1: 2493; X86-NOBMI-NEXT: movl %edx, %eax 2494; X86-NOBMI-NEXT: xorl %edx, %edx 2495; X86-NOBMI-NEXT: .LBB35_2: 2496; X86-NOBMI-NEXT: movl %edx, 4(%esi) 2497; X86-NOBMI-NEXT: movl %eax, (%esi) 2498; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 2499; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx 2500; X86-NOBMI-NEXT: popl %esi 2501; X86-NOBMI-NEXT: retl 2502; 2503; X86-BMI1-LABEL: bzhi64_c0: 2504; X86-BMI1: # %bb.0: 2505; X86-BMI1-NEXT: pushl %esi 2506; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi 2507; X86-BMI1-NEXT: movb $64, %cl 2508; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl 2509; X86-BMI1-NEXT: movl $-1, %eax 2510; X86-BMI1-NEXT: movl $-1, %edx 2511; X86-BMI1-NEXT: shrl %cl, %edx 2512; X86-BMI1-NEXT: testb $32, %cl 2513; X86-BMI1-NEXT: je .LBB35_2 2514; X86-BMI1-NEXT: # %bb.1: 2515; X86-BMI1-NEXT: movl %edx, %eax 2516; X86-BMI1-NEXT: xorl %edx, %edx 2517; X86-BMI1-NEXT: .LBB35_2: 2518; X86-BMI1-NEXT: movl %edx, 4(%esi) 2519; X86-BMI1-NEXT: movl %eax, (%esi) 2520; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %eax 2521; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %edx 2522; X86-BMI1-NEXT: popl %esi 2523; X86-BMI1-NEXT: retl 2524; 2525; X86-BMI2-LABEL: bzhi64_c0: 2526; X86-BMI2: # %bb.0: 2527; X86-BMI2-NEXT: pushl %ebx 2528; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx 2529; X86-BMI2-NEXT: movb $64, %bl 2530; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %bl 2531; X86-BMI2-NEXT: movl $-1, %eax 2532; X86-BMI2-NEXT: shrxl %ebx, %eax, %edx 2533; X86-BMI2-NEXT: testb $32, %bl 2534; X86-BMI2-NEXT: je .LBB35_2 2535; X86-BMI2-NEXT: # %bb.1: 2536; X86-BMI2-NEXT: movl %edx, %eax 2537; X86-BMI2-NEXT: xorl %edx, %edx 2538; X86-BMI2-NEXT: .LBB35_2: 2539; X86-BMI2-NEXT: movl %edx, 4(%ecx) 2540; X86-BMI2-NEXT: movl %eax, (%ecx) 2541; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax 2542; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx 2543; X86-BMI2-NEXT: popl %ebx 2544; X86-BMI2-NEXT: retl 2545; 2546; X64-NOBMI-LABEL: bzhi64_c0: 2547; X64-NOBMI: # %bb.0: 2548; X64-NOBMI-NEXT: movq %rsi, %rcx 2549; X64-NOBMI-NEXT: negb %cl 2550; X64-NOBMI-NEXT: movq $-1, %rax 2551; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx 2552; X64-NOBMI-NEXT: shrq %cl, %rax 2553; X64-NOBMI-NEXT: movq %rax, (%rdx) 2554; X64-NOBMI-NEXT: andq %rdi, %rax 2555; X64-NOBMI-NEXT: retq 2556; 2557; X64-BMI1-LABEL: bzhi64_c0: 2558; X64-BMI1: # %bb.0: 2559; X64-BMI1-NEXT: movq %rsi, %rcx 2560; X64-BMI1-NEXT: negb %cl 2561; X64-BMI1-NEXT: movq $-1, %rax 2562; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $rcx 2563; X64-BMI1-NEXT: shrq %cl, %rax 2564; X64-BMI1-NEXT: movq %rax, (%rdx) 2565; X64-BMI1-NEXT: andq %rdi, %rax 2566; X64-BMI1-NEXT: retq 2567; 2568; X64-BMI2-LABEL: bzhi64_c0: 2569; X64-BMI2: # %bb.0: 2570; X64-BMI2-NEXT: movq $-1, %rax 2571; X64-BMI2-NEXT: bzhiq %rsi, %rax, %rax 2572; X64-BMI2-NEXT: movq %rax, (%rdx) 2573; X64-BMI2-NEXT: bzhiq %rsi, %rdi, %rax 2574; X64-BMI2-NEXT: retq 2575 %numhighbits = sub i64 64, %numlowbits 2576 %mask = lshr i64 -1, %numhighbits 2577 store i64 %mask, ptr %escape 2578 %masked = and i64 %mask, %val 2579 ret i64 %masked 2580} 2581 2582define i64 @bzhi64_c1_indexzext(i64 %val, i8 %numlowbits, ptr %escape) nounwind { 2583; X86-NOBMI-LABEL: bzhi64_c1_indexzext: 2584; X86-NOBMI: # %bb.0: 2585; X86-NOBMI-NEXT: pushl %esi 2586; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi 2587; X86-NOBMI-NEXT: movb $64, %cl 2588; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 2589; X86-NOBMI-NEXT: movl $-1, %eax 2590; X86-NOBMI-NEXT: movl $-1, %edx 2591; X86-NOBMI-NEXT: shrl %cl, %edx 2592; X86-NOBMI-NEXT: testb $32, %cl 2593; X86-NOBMI-NEXT: je .LBB36_2 2594; X86-NOBMI-NEXT: # %bb.1: 2595; X86-NOBMI-NEXT: movl %edx, %eax 2596; X86-NOBMI-NEXT: xorl %edx, %edx 2597; X86-NOBMI-NEXT: .LBB36_2: 2598; X86-NOBMI-NEXT: movl %edx, 4(%esi) 2599; X86-NOBMI-NEXT: movl %eax, (%esi) 2600; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 2601; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx 2602; X86-NOBMI-NEXT: popl %esi 2603; X86-NOBMI-NEXT: retl 2604; 2605; X86-BMI1-LABEL: bzhi64_c1_indexzext: 2606; X86-BMI1: # %bb.0: 2607; X86-BMI1-NEXT: pushl %esi 2608; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi 2609; X86-BMI1-NEXT: movb $64, %cl 2610; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl 2611; X86-BMI1-NEXT: movl $-1, %eax 2612; X86-BMI1-NEXT: movl $-1, %edx 2613; X86-BMI1-NEXT: shrl %cl, %edx 2614; X86-BMI1-NEXT: testb $32, %cl 2615; X86-BMI1-NEXT: je .LBB36_2 2616; X86-BMI1-NEXT: # %bb.1: 2617; X86-BMI1-NEXT: movl %edx, %eax 2618; X86-BMI1-NEXT: xorl %edx, %edx 2619; X86-BMI1-NEXT: .LBB36_2: 2620; X86-BMI1-NEXT: movl %edx, 4(%esi) 2621; X86-BMI1-NEXT: movl %eax, (%esi) 2622; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %eax 2623; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %edx 2624; X86-BMI1-NEXT: popl %esi 2625; X86-BMI1-NEXT: retl 2626; 2627; X86-BMI2-LABEL: bzhi64_c1_indexzext: 2628; X86-BMI2: # %bb.0: 2629; X86-BMI2-NEXT: pushl %ebx 2630; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx 2631; X86-BMI2-NEXT: movb $64, %bl 2632; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %bl 2633; X86-BMI2-NEXT: movl $-1, %eax 2634; X86-BMI2-NEXT: shrxl %ebx, %eax, %edx 2635; X86-BMI2-NEXT: testb $32, %bl 2636; X86-BMI2-NEXT: je .LBB36_2 2637; X86-BMI2-NEXT: # %bb.1: 2638; X86-BMI2-NEXT: movl %edx, %eax 2639; X86-BMI2-NEXT: xorl %edx, %edx 2640; X86-BMI2-NEXT: .LBB36_2: 2641; X86-BMI2-NEXT: movl %edx, 4(%ecx) 2642; X86-BMI2-NEXT: movl %eax, (%ecx) 2643; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax 2644; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx 2645; X86-BMI2-NEXT: popl %ebx 2646; X86-BMI2-NEXT: retl 2647; 2648; X64-NOBMI-LABEL: bzhi64_c1_indexzext: 2649; X64-NOBMI: # %bb.0: 2650; X64-NOBMI-NEXT: movl %esi, %ecx 2651; X64-NOBMI-NEXT: negb %cl 2652; X64-NOBMI-NEXT: movq $-1, %rax 2653; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 2654; X64-NOBMI-NEXT: shrq %cl, %rax 2655; X64-NOBMI-NEXT: movq %rax, (%rdx) 2656; X64-NOBMI-NEXT: andq %rdi, %rax 2657; X64-NOBMI-NEXT: retq 2658; 2659; X64-BMI1-LABEL: bzhi64_c1_indexzext: 2660; X64-BMI1: # %bb.0: 2661; X64-BMI1-NEXT: movl %esi, %ecx 2662; X64-BMI1-NEXT: negb %cl 2663; X64-BMI1-NEXT: movq $-1, %rax 2664; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx 2665; X64-BMI1-NEXT: shrq %cl, %rax 2666; X64-BMI1-NEXT: movq %rax, (%rdx) 2667; X64-BMI1-NEXT: andq %rdi, %rax 2668; X64-BMI1-NEXT: retq 2669; 2670; X64-BMI2-LABEL: bzhi64_c1_indexzext: 2671; X64-BMI2: # %bb.0: 2672; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi 2673; X64-BMI2-NEXT: movq $-1, %rax 2674; X64-BMI2-NEXT: bzhiq %rsi, %rax, %rax 2675; X64-BMI2-NEXT: movq %rax, (%rdx) 2676; X64-BMI2-NEXT: bzhiq %rsi, %rdi, %rax 2677; X64-BMI2-NEXT: retq 2678 %numhighbits = sub i8 64, %numlowbits 2679 %sh_prom = zext i8 %numhighbits to i64 2680 %mask = lshr i64 -1, %sh_prom 2681 store i64 %mask, ptr %escape 2682 %masked = and i64 %mask, %val 2683 ret i64 %masked 2684} 2685 2686define i64 @bzhi64_c2_load(ptr %w, i64 %numlowbits, ptr %escape) nounwind { 2687; X86-NOBMI-LABEL: bzhi64_c2_load: 2688; X86-NOBMI: # %bb.0: 2689; X86-NOBMI-NEXT: pushl %ebx 2690; X86-NOBMI-NEXT: pushl %edi 2691; X86-NOBMI-NEXT: pushl %esi 2692; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi 2693; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 2694; X86-NOBMI-NEXT: movb $64, %cl 2695; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 2696; X86-NOBMI-NEXT: movl $-1, %edi 2697; X86-NOBMI-NEXT: movl $-1, %ebx 2698; X86-NOBMI-NEXT: shrl %cl, %ebx 2699; X86-NOBMI-NEXT: testb $32, %cl 2700; X86-NOBMI-NEXT: je .LBB37_2 2701; X86-NOBMI-NEXT: # %bb.1: 2702; X86-NOBMI-NEXT: movl %ebx, %edi 2703; X86-NOBMI-NEXT: xorl %ebx, %ebx 2704; X86-NOBMI-NEXT: .LBB37_2: 2705; X86-NOBMI-NEXT: movl 4(%eax), %edx 2706; X86-NOBMI-NEXT: andl %ebx, %edx 2707; X86-NOBMI-NEXT: movl (%eax), %eax 2708; X86-NOBMI-NEXT: andl %edi, %eax 2709; X86-NOBMI-NEXT: movl %ebx, 4(%esi) 2710; X86-NOBMI-NEXT: movl %edi, (%esi) 2711; X86-NOBMI-NEXT: popl %esi 2712; X86-NOBMI-NEXT: popl %edi 2713; X86-NOBMI-NEXT: popl %ebx 2714; X86-NOBMI-NEXT: retl 2715; 2716; X86-BMI1-LABEL: bzhi64_c2_load: 2717; X86-BMI1: # %bb.0: 2718; X86-BMI1-NEXT: pushl %ebx 2719; X86-BMI1-NEXT: pushl %edi 2720; X86-BMI1-NEXT: pushl %esi 2721; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi 2722; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax 2723; X86-BMI1-NEXT: movb $64, %cl 2724; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl 2725; X86-BMI1-NEXT: movl $-1, %edi 2726; X86-BMI1-NEXT: movl $-1, %ebx 2727; X86-BMI1-NEXT: shrl %cl, %ebx 2728; X86-BMI1-NEXT: testb $32, %cl 2729; X86-BMI1-NEXT: je .LBB37_2 2730; X86-BMI1-NEXT: # %bb.1: 2731; X86-BMI1-NEXT: movl %ebx, %edi 2732; X86-BMI1-NEXT: xorl %ebx, %ebx 2733; X86-BMI1-NEXT: .LBB37_2: 2734; X86-BMI1-NEXT: movl 4(%eax), %edx 2735; X86-BMI1-NEXT: andl %ebx, %edx 2736; X86-BMI1-NEXT: movl (%eax), %eax 2737; X86-BMI1-NEXT: andl %edi, %eax 2738; X86-BMI1-NEXT: movl %ebx, 4(%esi) 2739; X86-BMI1-NEXT: movl %edi, (%esi) 2740; X86-BMI1-NEXT: popl %esi 2741; X86-BMI1-NEXT: popl %edi 2742; X86-BMI1-NEXT: popl %ebx 2743; X86-BMI1-NEXT: retl 2744; 2745; X86-BMI2-LABEL: bzhi64_c2_load: 2746; X86-BMI2: # %bb.0: 2747; X86-BMI2-NEXT: pushl %edi 2748; X86-BMI2-NEXT: pushl %esi 2749; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx 2750; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 2751; X86-BMI2-NEXT: movb $64, %dl 2752; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %dl 2753; X86-BMI2-NEXT: movl $-1, %esi 2754; X86-BMI2-NEXT: shrxl %edx, %esi, %edi 2755; X86-BMI2-NEXT: testb $32, %dl 2756; X86-BMI2-NEXT: je .LBB37_2 2757; X86-BMI2-NEXT: # %bb.1: 2758; X86-BMI2-NEXT: movl %edi, %esi 2759; X86-BMI2-NEXT: xorl %edi, %edi 2760; X86-BMI2-NEXT: .LBB37_2: 2761; X86-BMI2-NEXT: movl 4(%eax), %edx 2762; X86-BMI2-NEXT: andl %edi, %edx 2763; X86-BMI2-NEXT: movl (%eax), %eax 2764; X86-BMI2-NEXT: andl %esi, %eax 2765; X86-BMI2-NEXT: movl %edi, 4(%ecx) 2766; X86-BMI2-NEXT: movl %esi, (%ecx) 2767; X86-BMI2-NEXT: popl %esi 2768; X86-BMI2-NEXT: popl %edi 2769; X86-BMI2-NEXT: retl 2770; 2771; X64-NOBMI-LABEL: bzhi64_c2_load: 2772; X64-NOBMI: # %bb.0: 2773; X64-NOBMI-NEXT: movq %rsi, %rcx 2774; X64-NOBMI-NEXT: negb %cl 2775; X64-NOBMI-NEXT: movq $-1, %rsi 2776; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx 2777; X64-NOBMI-NEXT: shrq %cl, %rsi 2778; X64-NOBMI-NEXT: movq (%rdi), %rax 2779; X64-NOBMI-NEXT: andq %rsi, %rax 2780; X64-NOBMI-NEXT: movq %rsi, (%rdx) 2781; X64-NOBMI-NEXT: retq 2782; 2783; X64-BMI1-LABEL: bzhi64_c2_load: 2784; X64-BMI1: # %bb.0: 2785; X64-BMI1-NEXT: movq %rsi, %rcx 2786; X64-BMI1-NEXT: negb %cl 2787; X64-BMI1-NEXT: movq $-1, %rsi 2788; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $rcx 2789; X64-BMI1-NEXT: shrq %cl, %rsi 2790; X64-BMI1-NEXT: movq (%rdi), %rax 2791; X64-BMI1-NEXT: andq %rsi, %rax 2792; X64-BMI1-NEXT: movq %rsi, (%rdx) 2793; X64-BMI1-NEXT: retq 2794; 2795; X64-BMI2-LABEL: bzhi64_c2_load: 2796; X64-BMI2: # %bb.0: 2797; X64-BMI2-NEXT: movq $-1, %rax 2798; X64-BMI2-NEXT: bzhiq %rsi, %rax, %rcx 2799; X64-BMI2-NEXT: bzhiq %rsi, (%rdi), %rax 2800; X64-BMI2-NEXT: movq %rcx, (%rdx) 2801; X64-BMI2-NEXT: retq 2802 %val = load i64, ptr %w 2803 %numhighbits = sub i64 64, %numlowbits 2804 %mask = lshr i64 -1, %numhighbits 2805 store i64 %mask, ptr %escape 2806 %masked = and i64 %mask, %val 2807 ret i64 %masked 2808} 2809 2810define i64 @bzhi64_c3_load_indexzext(ptr %w, i8 %numlowbits, ptr %escape) nounwind { 2811; X86-NOBMI-LABEL: bzhi64_c3_load_indexzext: 2812; X86-NOBMI: # %bb.0: 2813; X86-NOBMI-NEXT: pushl %ebx 2814; X86-NOBMI-NEXT: pushl %edi 2815; X86-NOBMI-NEXT: pushl %esi 2816; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi 2817; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 2818; X86-NOBMI-NEXT: movb $64, %cl 2819; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 2820; X86-NOBMI-NEXT: movl $-1, %edi 2821; X86-NOBMI-NEXT: movl $-1, %ebx 2822; X86-NOBMI-NEXT: shrl %cl, %ebx 2823; X86-NOBMI-NEXT: testb $32, %cl 2824; X86-NOBMI-NEXT: je .LBB38_2 2825; X86-NOBMI-NEXT: # %bb.1: 2826; X86-NOBMI-NEXT: movl %ebx, %edi 2827; X86-NOBMI-NEXT: xorl %ebx, %ebx 2828; X86-NOBMI-NEXT: .LBB38_2: 2829; X86-NOBMI-NEXT: movl 4(%eax), %edx 2830; X86-NOBMI-NEXT: andl %ebx, %edx 2831; X86-NOBMI-NEXT: movl (%eax), %eax 2832; X86-NOBMI-NEXT: andl %edi, %eax 2833; X86-NOBMI-NEXT: movl %ebx, 4(%esi) 2834; X86-NOBMI-NEXT: movl %edi, (%esi) 2835; X86-NOBMI-NEXT: popl %esi 2836; X86-NOBMI-NEXT: popl %edi 2837; X86-NOBMI-NEXT: popl %ebx 2838; X86-NOBMI-NEXT: retl 2839; 2840; X86-BMI1-LABEL: bzhi64_c3_load_indexzext: 2841; X86-BMI1: # %bb.0: 2842; X86-BMI1-NEXT: pushl %ebx 2843; X86-BMI1-NEXT: pushl %edi 2844; X86-BMI1-NEXT: pushl %esi 2845; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi 2846; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax 2847; X86-BMI1-NEXT: movb $64, %cl 2848; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl 2849; X86-BMI1-NEXT: movl $-1, %edi 2850; X86-BMI1-NEXT: movl $-1, %ebx 2851; X86-BMI1-NEXT: shrl %cl, %ebx 2852; X86-BMI1-NEXT: testb $32, %cl 2853; X86-BMI1-NEXT: je .LBB38_2 2854; X86-BMI1-NEXT: # %bb.1: 2855; X86-BMI1-NEXT: movl %ebx, %edi 2856; X86-BMI1-NEXT: xorl %ebx, %ebx 2857; X86-BMI1-NEXT: .LBB38_2: 2858; X86-BMI1-NEXT: movl 4(%eax), %edx 2859; X86-BMI1-NEXT: andl %ebx, %edx 2860; X86-BMI1-NEXT: movl (%eax), %eax 2861; X86-BMI1-NEXT: andl %edi, %eax 2862; X86-BMI1-NEXT: movl %ebx, 4(%esi) 2863; X86-BMI1-NEXT: movl %edi, (%esi) 2864; X86-BMI1-NEXT: popl %esi 2865; X86-BMI1-NEXT: popl %edi 2866; X86-BMI1-NEXT: popl %ebx 2867; X86-BMI1-NEXT: retl 2868; 2869; X86-BMI2-LABEL: bzhi64_c3_load_indexzext: 2870; X86-BMI2: # %bb.0: 2871; X86-BMI2-NEXT: pushl %edi 2872; X86-BMI2-NEXT: pushl %esi 2873; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx 2874; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 2875; X86-BMI2-NEXT: movb $64, %dl 2876; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %dl 2877; X86-BMI2-NEXT: movl $-1, %esi 2878; X86-BMI2-NEXT: shrxl %edx, %esi, %edi 2879; X86-BMI2-NEXT: testb $32, %dl 2880; X86-BMI2-NEXT: je .LBB38_2 2881; X86-BMI2-NEXT: # %bb.1: 2882; X86-BMI2-NEXT: movl %edi, %esi 2883; X86-BMI2-NEXT: xorl %edi, %edi 2884; X86-BMI2-NEXT: .LBB38_2: 2885; X86-BMI2-NEXT: movl 4(%eax), %edx 2886; X86-BMI2-NEXT: andl %edi, %edx 2887; X86-BMI2-NEXT: movl (%eax), %eax 2888; X86-BMI2-NEXT: andl %esi, %eax 2889; X86-BMI2-NEXT: movl %edi, 4(%ecx) 2890; X86-BMI2-NEXT: movl %esi, (%ecx) 2891; X86-BMI2-NEXT: popl %esi 2892; X86-BMI2-NEXT: popl %edi 2893; X86-BMI2-NEXT: retl 2894; 2895; X64-NOBMI-LABEL: bzhi64_c3_load_indexzext: 2896; X64-NOBMI: # %bb.0: 2897; X64-NOBMI-NEXT: movl %esi, %ecx 2898; X64-NOBMI-NEXT: negb %cl 2899; X64-NOBMI-NEXT: movq $-1, %rsi 2900; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 2901; X64-NOBMI-NEXT: shrq %cl, %rsi 2902; X64-NOBMI-NEXT: movq (%rdi), %rax 2903; X64-NOBMI-NEXT: andq %rsi, %rax 2904; X64-NOBMI-NEXT: movq %rsi, (%rdx) 2905; X64-NOBMI-NEXT: retq 2906; 2907; X64-BMI1-LABEL: bzhi64_c3_load_indexzext: 2908; X64-BMI1: # %bb.0: 2909; X64-BMI1-NEXT: movl %esi, %ecx 2910; X64-BMI1-NEXT: negb %cl 2911; X64-BMI1-NEXT: movq $-1, %rsi 2912; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx 2913; X64-BMI1-NEXT: shrq %cl, %rsi 2914; X64-BMI1-NEXT: movq (%rdi), %rax 2915; X64-BMI1-NEXT: andq %rsi, %rax 2916; X64-BMI1-NEXT: movq %rsi, (%rdx) 2917; X64-BMI1-NEXT: retq 2918; 2919; X64-BMI2-LABEL: bzhi64_c3_load_indexzext: 2920; X64-BMI2: # %bb.0: 2921; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi 2922; X64-BMI2-NEXT: movq $-1, %rax 2923; X64-BMI2-NEXT: bzhiq %rsi, %rax, %rcx 2924; X64-BMI2-NEXT: bzhiq %rsi, (%rdi), %rax 2925; X64-BMI2-NEXT: movq %rcx, (%rdx) 2926; X64-BMI2-NEXT: retq 2927 %val = load i64, ptr %w 2928 %numhighbits = sub i8 64, %numlowbits 2929 %sh_prom = zext i8 %numhighbits to i64 2930 %mask = lshr i64 -1, %sh_prom 2931 store i64 %mask, ptr %escape 2932 %masked = and i64 %mask, %val 2933 ret i64 %masked 2934} 2935 2936define i64 @bzhi64_c4_commutative(i64 %val, i64 %numlowbits, ptr %escape) nounwind { 2937; X86-NOBMI-LABEL: bzhi64_c4_commutative: 2938; X86-NOBMI: # %bb.0: 2939; X86-NOBMI-NEXT: pushl %esi 2940; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi 2941; X86-NOBMI-NEXT: movb $64, %cl 2942; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 2943; X86-NOBMI-NEXT: movl $-1, %eax 2944; X86-NOBMI-NEXT: movl $-1, %edx 2945; X86-NOBMI-NEXT: shrl %cl, %edx 2946; X86-NOBMI-NEXT: testb $32, %cl 2947; X86-NOBMI-NEXT: je .LBB39_2 2948; X86-NOBMI-NEXT: # %bb.1: 2949; X86-NOBMI-NEXT: movl %edx, %eax 2950; X86-NOBMI-NEXT: xorl %edx, %edx 2951; X86-NOBMI-NEXT: .LBB39_2: 2952; X86-NOBMI-NEXT: movl %edx, 4(%esi) 2953; X86-NOBMI-NEXT: movl %eax, (%esi) 2954; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 2955; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx 2956; X86-NOBMI-NEXT: popl %esi 2957; X86-NOBMI-NEXT: retl 2958; 2959; X86-BMI1-LABEL: bzhi64_c4_commutative: 2960; X86-BMI1: # %bb.0: 2961; X86-BMI1-NEXT: pushl %esi 2962; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi 2963; X86-BMI1-NEXT: movb $64, %cl 2964; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl 2965; X86-BMI1-NEXT: movl $-1, %eax 2966; X86-BMI1-NEXT: movl $-1, %edx 2967; X86-BMI1-NEXT: shrl %cl, %edx 2968; X86-BMI1-NEXT: testb $32, %cl 2969; X86-BMI1-NEXT: je .LBB39_2 2970; X86-BMI1-NEXT: # %bb.1: 2971; X86-BMI1-NEXT: movl %edx, %eax 2972; X86-BMI1-NEXT: xorl %edx, %edx 2973; X86-BMI1-NEXT: .LBB39_2: 2974; X86-BMI1-NEXT: movl %edx, 4(%esi) 2975; X86-BMI1-NEXT: movl %eax, (%esi) 2976; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %eax 2977; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %edx 2978; X86-BMI1-NEXT: popl %esi 2979; X86-BMI1-NEXT: retl 2980; 2981; X86-BMI2-LABEL: bzhi64_c4_commutative: 2982; X86-BMI2: # %bb.0: 2983; X86-BMI2-NEXT: pushl %ebx 2984; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx 2985; X86-BMI2-NEXT: movb $64, %bl 2986; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %bl 2987; X86-BMI2-NEXT: movl $-1, %eax 2988; X86-BMI2-NEXT: shrxl %ebx, %eax, %edx 2989; X86-BMI2-NEXT: testb $32, %bl 2990; X86-BMI2-NEXT: je .LBB39_2 2991; X86-BMI2-NEXT: # %bb.1: 2992; X86-BMI2-NEXT: movl %edx, %eax 2993; X86-BMI2-NEXT: xorl %edx, %edx 2994; X86-BMI2-NEXT: .LBB39_2: 2995; X86-BMI2-NEXT: movl %edx, 4(%ecx) 2996; X86-BMI2-NEXT: movl %eax, (%ecx) 2997; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax 2998; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx 2999; X86-BMI2-NEXT: popl %ebx 3000; X86-BMI2-NEXT: retl 3001; 3002; X64-NOBMI-LABEL: bzhi64_c4_commutative: 3003; X64-NOBMI: # %bb.0: 3004; X64-NOBMI-NEXT: movq %rsi, %rcx 3005; X64-NOBMI-NEXT: negb %cl 3006; X64-NOBMI-NEXT: movq $-1, %rax 3007; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx 3008; X64-NOBMI-NEXT: shrq %cl, %rax 3009; X64-NOBMI-NEXT: movq %rax, (%rdx) 3010; X64-NOBMI-NEXT: andq %rdi, %rax 3011; X64-NOBMI-NEXT: retq 3012; 3013; X64-BMI1-LABEL: bzhi64_c4_commutative: 3014; X64-BMI1: # %bb.0: 3015; X64-BMI1-NEXT: movq %rsi, %rcx 3016; X64-BMI1-NEXT: negb %cl 3017; X64-BMI1-NEXT: movq $-1, %rax 3018; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $rcx 3019; X64-BMI1-NEXT: shrq %cl, %rax 3020; X64-BMI1-NEXT: movq %rax, (%rdx) 3021; X64-BMI1-NEXT: andq %rdi, %rax 3022; X64-BMI1-NEXT: retq 3023; 3024; X64-BMI2-LABEL: bzhi64_c4_commutative: 3025; X64-BMI2: # %bb.0: 3026; X64-BMI2-NEXT: movq $-1, %rax 3027; X64-BMI2-NEXT: bzhiq %rsi, %rax, %rax 3028; X64-BMI2-NEXT: movq %rax, (%rdx) 3029; X64-BMI2-NEXT: bzhiq %rsi, %rdi, %rax 3030; X64-BMI2-NEXT: retq 3031 %numhighbits = sub i64 64, %numlowbits 3032 %mask = lshr i64 -1, %numhighbits 3033 store i64 %mask, ptr %escape 3034 %masked = and i64 %val, %mask ; swapped order 3035 ret i64 %masked 3036} 3037 3038; 64-bit, but with 32-bit output 3039 3040; Everything done in 64-bit, truncation happens last. 3041define i32 @bzhi64_32_c0(i64 %val, i64 %numlowbits) nounwind { 3042; X86-NOBMI-LABEL: bzhi64_32_c0: 3043; X86-NOBMI: # %bb.0: 3044; X86-NOBMI-NEXT: movb $64, %cl 3045; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 3046; X86-NOBMI-NEXT: movl $-1, %eax 3047; X86-NOBMI-NEXT: shrl %cl, %eax 3048; X86-NOBMI-NEXT: testb $32, %cl 3049; X86-NOBMI-NEXT: jne .LBB40_2 3050; X86-NOBMI-NEXT: # %bb.1: 3051; X86-NOBMI-NEXT: movl $-1, %eax 3052; X86-NOBMI-NEXT: .LBB40_2: 3053; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax 3054; X86-NOBMI-NEXT: retl 3055; 3056; X86-BMI1-LABEL: bzhi64_32_c0: 3057; X86-BMI1: # %bb.0: 3058; X86-BMI1-NEXT: movb $64, %cl 3059; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl 3060; X86-BMI1-NEXT: movl $-1, %eax 3061; X86-BMI1-NEXT: shrl %cl, %eax 3062; X86-BMI1-NEXT: testb $32, %cl 3063; X86-BMI1-NEXT: jne .LBB40_2 3064; X86-BMI1-NEXT: # %bb.1: 3065; X86-BMI1-NEXT: movl $-1, %eax 3066; X86-BMI1-NEXT: .LBB40_2: 3067; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %eax 3068; X86-BMI1-NEXT: retl 3069; 3070; X86-BMI2-LABEL: bzhi64_32_c0: 3071; X86-BMI2: # %bb.0: 3072; X86-BMI2-NEXT: movb $64, %cl 3073; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl 3074; X86-BMI2-NEXT: movl $-1, %eax 3075; X86-BMI2-NEXT: testb $32, %cl 3076; X86-BMI2-NEXT: je .LBB40_2 3077; X86-BMI2-NEXT: # %bb.1: 3078; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax 3079; X86-BMI2-NEXT: .LBB40_2: 3080; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax 3081; X86-BMI2-NEXT: retl 3082; 3083; X64-NOBMI-LABEL: bzhi64_32_c0: 3084; X64-NOBMI: # %bb.0: 3085; X64-NOBMI-NEXT: movq %rsi, %rcx 3086; X64-NOBMI-NEXT: negb %cl 3087; X64-NOBMI-NEXT: movq $-1, %rax 3088; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx 3089; X64-NOBMI-NEXT: shrq %cl, %rax 3090; X64-NOBMI-NEXT: andl %edi, %eax 3091; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax 3092; X64-NOBMI-NEXT: retq 3093; 3094; X64-BMI1-LABEL: bzhi64_32_c0: 3095; X64-BMI1: # %bb.0: 3096; X64-BMI1-NEXT: shll $8, %esi 3097; X64-BMI1-NEXT: bextrl %esi, %edi, %eax 3098; X64-BMI1-NEXT: retq 3099; 3100; X64-BMI2-LABEL: bzhi64_32_c0: 3101; X64-BMI2: # %bb.0: 3102; X64-BMI2-NEXT: bzhil %esi, %edi, %eax 3103; X64-BMI2-NEXT: retq 3104 %numhighbits = sub i64 64, %numlowbits 3105 %mask = lshr i64 -1, %numhighbits 3106 %masked = and i64 %mask, %val 3107 %res = trunc i64 %masked to i32 3108 ret i32 %res 3109} 3110 3111; Shifting happens in 64-bit, then truncation. Masking is 32-bit. 3112define i32 @bzhi64_32_c1(i64 %val, i32 %numlowbits) nounwind { 3113; X86-NOBMI-LABEL: bzhi64_32_c1: 3114; X86-NOBMI: # %bb.0: 3115; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 3116; X86-NOBMI-NEXT: xorl %ecx, %ecx 3117; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 3118; X86-NOBMI-NEXT: shll %cl, %eax 3119; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 3120; X86-NOBMI-NEXT: shrl %cl, %eax 3121; X86-NOBMI-NEXT: retl 3122; 3123; X86-BMI1-LABEL: bzhi64_32_c1: 3124; X86-BMI1: # %bb.0: 3125; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax 3126; X86-BMI1-NEXT: shll $8, %eax 3127; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax 3128; X86-BMI1-NEXT: retl 3129; 3130; X86-BMI2-LABEL: bzhi64_32_c1: 3131; X86-BMI2: # %bb.0: 3132; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax 3133; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax 3134; X86-BMI2-NEXT: retl 3135; 3136; X64-NOBMI-LABEL: bzhi64_32_c1: 3137; X64-NOBMI: # %bb.0: 3138; X64-NOBMI-NEXT: movl %esi, %ecx 3139; X64-NOBMI-NEXT: movq %rdi, %rax 3140; X64-NOBMI-NEXT: negb %cl 3141; X64-NOBMI-NEXT: shll %cl, %eax 3142; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 3143; X64-NOBMI-NEXT: shrl %cl, %eax 3144; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax 3145; X64-NOBMI-NEXT: retq 3146; 3147; X64-BMI1-LABEL: bzhi64_32_c1: 3148; X64-BMI1: # %bb.0: 3149; X64-BMI1-NEXT: shll $8, %esi 3150; X64-BMI1-NEXT: bextrl %esi, %edi, %eax 3151; X64-BMI1-NEXT: retq 3152; 3153; X64-BMI2-LABEL: bzhi64_32_c1: 3154; X64-BMI2: # %bb.0: 3155; X64-BMI2-NEXT: bzhil %esi, %edi, %eax 3156; X64-BMI2-NEXT: retq 3157 %truncval = trunc i64 %val to i32 3158 %numhighbits = sub i32 32, %numlowbits 3159 %mask = lshr i32 -1, %numhighbits 3160 %masked = and i32 %mask, %truncval 3161 ret i32 %masked 3162} 3163 3164; Shifting happens in 64-bit. Mask is 32-bit, but extended to 64-bit. 3165; Masking is 64-bit. Then truncation. 3166define i32 @bzhi64_32_c2(i64 %val, i32 %numlowbits) nounwind { 3167; X86-NOBMI-LABEL: bzhi64_32_c2: 3168; X86-NOBMI: # %bb.0: 3169; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 3170; X86-NOBMI-NEXT: xorl %ecx, %ecx 3171; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 3172; X86-NOBMI-NEXT: shll %cl, %eax 3173; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 3174; X86-NOBMI-NEXT: shrl %cl, %eax 3175; X86-NOBMI-NEXT: retl 3176; 3177; X86-BMI1-LABEL: bzhi64_32_c2: 3178; X86-BMI1: # %bb.0: 3179; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax 3180; X86-BMI1-NEXT: shll $8, %eax 3181; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax 3182; X86-BMI1-NEXT: retl 3183; 3184; X86-BMI2-LABEL: bzhi64_32_c2: 3185; X86-BMI2: # %bb.0: 3186; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax 3187; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax 3188; X86-BMI2-NEXT: retl 3189; 3190; X64-NOBMI-LABEL: bzhi64_32_c2: 3191; X64-NOBMI: # %bb.0: 3192; X64-NOBMI-NEXT: movl %esi, %ecx 3193; X64-NOBMI-NEXT: movq %rdi, %rax 3194; X64-NOBMI-NEXT: negb %cl 3195; X64-NOBMI-NEXT: shll %cl, %eax 3196; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 3197; X64-NOBMI-NEXT: shrl %cl, %eax 3198; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax 3199; X64-NOBMI-NEXT: retq 3200; 3201; X64-BMI1-LABEL: bzhi64_32_c2: 3202; X64-BMI1: # %bb.0: 3203; X64-BMI1-NEXT: shll $8, %esi 3204; X64-BMI1-NEXT: bextrl %esi, %edi, %eax 3205; X64-BMI1-NEXT: retq 3206; 3207; X64-BMI2-LABEL: bzhi64_32_c2: 3208; X64-BMI2: # %bb.0: 3209; X64-BMI2-NEXT: bzhil %esi, %edi, %eax 3210; X64-BMI2-NEXT: retq 3211 %numhighbits = sub i32 32, %numlowbits 3212 %mask = lshr i32 -1, %numhighbits 3213 %zextmask = zext i32 %mask to i64 3214 %masked = and i64 %zextmask, %val 3215 %truncmasked = trunc i64 %masked to i32 3216 ret i32 %truncmasked 3217} 3218 3219; Shifting happens in 64-bit. Mask is 32-bit, but calculated in 64-bit. 3220; Masking is 64-bit. Then truncation. 3221define i32 @bzhi64_32_c3(i64 %val, i64 %numlowbits) nounwind { 3222; X86-LABEL: bzhi64_32_c3: 3223; X86: # %bb.0: 3224; X86-NEXT: movb $64, %cl 3225; X86-NEXT: subb {{[0-9]+}}(%esp), %cl 3226; X86-NEXT: xorl %eax, %eax 3227; X86-NEXT: movl $-1, %edx 3228; X86-NEXT: shrdl %cl, %eax, %edx 3229; X86-NEXT: testb $32, %cl 3230; X86-NEXT: jne .LBB43_2 3231; X86-NEXT: # %bb.1: 3232; X86-NEXT: movl %edx, %eax 3233; X86-NEXT: .LBB43_2: 3234; X86-NEXT: andl {{[0-9]+}}(%esp), %eax 3235; X86-NEXT: retl 3236; 3237; X64-NOBMI-LABEL: bzhi64_32_c3: 3238; X64-NOBMI: # %bb.0: 3239; X64-NOBMI-NEXT: movq %rsi, %rcx 3240; X64-NOBMI-NEXT: negb %cl 3241; X64-NOBMI-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF 3242; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx 3243; X64-NOBMI-NEXT: shrq %cl, %rax 3244; X64-NOBMI-NEXT: andl %edi, %eax 3245; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax 3246; X64-NOBMI-NEXT: retq 3247; 3248; X64-BMI1-LABEL: bzhi64_32_c3: 3249; X64-BMI1: # %bb.0: 3250; X64-BMI1-NEXT: movq %rsi, %rcx 3251; X64-BMI1-NEXT: negb %cl 3252; X64-BMI1-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF 3253; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $rcx 3254; X64-BMI1-NEXT: shrq %cl, %rax 3255; X64-BMI1-NEXT: andl %edi, %eax 3256; X64-BMI1-NEXT: # kill: def $eax killed $eax killed $rax 3257; X64-BMI1-NEXT: retq 3258; 3259; X64-BMI2-LABEL: bzhi64_32_c3: 3260; X64-BMI2: # %bb.0: 3261; X64-BMI2-NEXT: negb %sil 3262; X64-BMI2-NEXT: movl $4294967295, %eax # imm = 0xFFFFFFFF 3263; X64-BMI2-NEXT: shrxq %rsi, %rax, %rax 3264; X64-BMI2-NEXT: andl %edi, %eax 3265; X64-BMI2-NEXT: # kill: def $eax killed $eax killed $rax 3266; X64-BMI2-NEXT: retq 3267 %numhighbits = sub i64 64, %numlowbits 3268 %mask = lshr i64 4294967295, %numhighbits 3269 %masked = and i64 %mask, %val 3270 %truncmasked = trunc i64 %masked to i32 3271 ret i32 %truncmasked 3272} 3273 3274; ---------------------------------------------------------------------------- ; 3275; Pattern d. 32-bit. 3276; ---------------------------------------------------------------------------- ; 3277 3278define i32 @bzhi32_d0(i32 %val, i32 %numlowbits) nounwind { 3279; X86-NOBMI-LABEL: bzhi32_d0: 3280; X86-NOBMI: # %bb.0: 3281; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 3282; X86-NOBMI-NEXT: xorl %ecx, %ecx 3283; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 3284; X86-NOBMI-NEXT: shll %cl, %eax 3285; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 3286; X86-NOBMI-NEXT: shrl %cl, %eax 3287; X86-NOBMI-NEXT: retl 3288; 3289; X86-BMI1-LABEL: bzhi32_d0: 3290; X86-BMI1: # %bb.0: 3291; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax 3292; X86-BMI1-NEXT: shll $8, %eax 3293; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax 3294; X86-BMI1-NEXT: retl 3295; 3296; X86-BMI2-LABEL: bzhi32_d0: 3297; X86-BMI2: # %bb.0: 3298; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax 3299; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax 3300; X86-BMI2-NEXT: retl 3301; 3302; X64-NOBMI-LABEL: bzhi32_d0: 3303; X64-NOBMI: # %bb.0: 3304; X64-NOBMI-NEXT: movl %esi, %ecx 3305; X64-NOBMI-NEXT: movl %edi, %eax 3306; X64-NOBMI-NEXT: negb %cl 3307; X64-NOBMI-NEXT: shll %cl, %eax 3308; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 3309; X64-NOBMI-NEXT: shrl %cl, %eax 3310; X64-NOBMI-NEXT: retq 3311; 3312; X64-BMI1-LABEL: bzhi32_d0: 3313; X64-BMI1: # %bb.0: 3314; X64-BMI1-NEXT: shll $8, %esi 3315; X64-BMI1-NEXT: bextrl %esi, %edi, %eax 3316; X64-BMI1-NEXT: retq 3317; 3318; X64-BMI2-LABEL: bzhi32_d0: 3319; X64-BMI2: # %bb.0: 3320; X64-BMI2-NEXT: bzhil %esi, %edi, %eax 3321; X64-BMI2-NEXT: retq 3322 %numhighbits = sub i32 32, %numlowbits 3323 %highbitscleared = shl i32 %val, %numhighbits 3324 %masked = lshr i32 %highbitscleared, %numhighbits 3325 ret i32 %masked 3326} 3327 3328define i32 @bzhi32_d1_indexzext(i32 %val, i8 %numlowbits) nounwind { 3329; X86-NOBMI-LABEL: bzhi32_d1_indexzext: 3330; X86-NOBMI: # %bb.0: 3331; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 3332; X86-NOBMI-NEXT: xorl %ecx, %ecx 3333; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 3334; X86-NOBMI-NEXT: shll %cl, %eax 3335; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 3336; X86-NOBMI-NEXT: shrl %cl, %eax 3337; X86-NOBMI-NEXT: retl 3338; 3339; X86-BMI1-LABEL: bzhi32_d1_indexzext: 3340; X86-BMI1: # %bb.0: 3341; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax 3342; X86-BMI1-NEXT: shll $8, %eax 3343; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax 3344; X86-BMI1-NEXT: retl 3345; 3346; X86-BMI2-LABEL: bzhi32_d1_indexzext: 3347; X86-BMI2: # %bb.0: 3348; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax 3349; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax 3350; X86-BMI2-NEXT: retl 3351; 3352; X64-NOBMI-LABEL: bzhi32_d1_indexzext: 3353; X64-NOBMI: # %bb.0: 3354; X64-NOBMI-NEXT: movl %esi, %ecx 3355; X64-NOBMI-NEXT: movl %edi, %eax 3356; X64-NOBMI-NEXT: negb %cl 3357; X64-NOBMI-NEXT: shll %cl, %eax 3358; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 3359; X64-NOBMI-NEXT: shrl %cl, %eax 3360; X64-NOBMI-NEXT: retq 3361; 3362; X64-BMI1-LABEL: bzhi32_d1_indexzext: 3363; X64-BMI1: # %bb.0: 3364; X64-BMI1-NEXT: shll $8, %esi 3365; X64-BMI1-NEXT: bextrl %esi, %edi, %eax 3366; X64-BMI1-NEXT: retq 3367; 3368; X64-BMI2-LABEL: bzhi32_d1_indexzext: 3369; X64-BMI2: # %bb.0: 3370; X64-BMI2-NEXT: bzhil %esi, %edi, %eax 3371; X64-BMI2-NEXT: retq 3372 %numhighbits = sub i8 32, %numlowbits 3373 %sh_prom = zext i8 %numhighbits to i32 3374 %highbitscleared = shl i32 %val, %sh_prom 3375 %masked = lshr i32 %highbitscleared, %sh_prom 3376 ret i32 %masked 3377} 3378 3379define i32 @bzhi32_d2_load(ptr %w, i32 %numlowbits) nounwind { 3380; X86-NOBMI-LABEL: bzhi32_d2_load: 3381; X86-NOBMI: # %bb.0: 3382; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 3383; X86-NOBMI-NEXT: movl (%eax), %eax 3384; X86-NOBMI-NEXT: xorl %ecx, %ecx 3385; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 3386; X86-NOBMI-NEXT: shll %cl, %eax 3387; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 3388; X86-NOBMI-NEXT: shrl %cl, %eax 3389; X86-NOBMI-NEXT: retl 3390; 3391; X86-BMI1-LABEL: bzhi32_d2_load: 3392; X86-BMI1: # %bb.0: 3393; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax 3394; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 3395; X86-BMI1-NEXT: shll $8, %ecx 3396; X86-BMI1-NEXT: bextrl %ecx, (%eax), %eax 3397; X86-BMI1-NEXT: retl 3398; 3399; X86-BMI2-LABEL: bzhi32_d2_load: 3400; X86-BMI2: # %bb.0: 3401; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 3402; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 3403; X86-BMI2-NEXT: bzhil %ecx, (%eax), %eax 3404; X86-BMI2-NEXT: retl 3405; 3406; X64-NOBMI-LABEL: bzhi32_d2_load: 3407; X64-NOBMI: # %bb.0: 3408; X64-NOBMI-NEXT: movl %esi, %ecx 3409; X64-NOBMI-NEXT: movl (%rdi), %eax 3410; X64-NOBMI-NEXT: negb %cl 3411; X64-NOBMI-NEXT: shll %cl, %eax 3412; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 3413; X64-NOBMI-NEXT: shrl %cl, %eax 3414; X64-NOBMI-NEXT: retq 3415; 3416; X64-BMI1-LABEL: bzhi32_d2_load: 3417; X64-BMI1: # %bb.0: 3418; X64-BMI1-NEXT: shll $8, %esi 3419; X64-BMI1-NEXT: bextrl %esi, (%rdi), %eax 3420; X64-BMI1-NEXT: retq 3421; 3422; X64-BMI2-LABEL: bzhi32_d2_load: 3423; X64-BMI2: # %bb.0: 3424; X64-BMI2-NEXT: bzhil %esi, (%rdi), %eax 3425; X64-BMI2-NEXT: retq 3426 %val = load i32, ptr %w 3427 %numhighbits = sub i32 32, %numlowbits 3428 %highbitscleared = shl i32 %val, %numhighbits 3429 %masked = lshr i32 %highbitscleared, %numhighbits 3430 ret i32 %masked 3431} 3432 3433define i32 @bzhi32_d3_load_indexzext(ptr %w, i8 %numlowbits) nounwind { 3434; X86-NOBMI-LABEL: bzhi32_d3_load_indexzext: 3435; X86-NOBMI: # %bb.0: 3436; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 3437; X86-NOBMI-NEXT: movl (%eax), %eax 3438; X86-NOBMI-NEXT: xorl %ecx, %ecx 3439; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 3440; X86-NOBMI-NEXT: shll %cl, %eax 3441; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 3442; X86-NOBMI-NEXT: shrl %cl, %eax 3443; X86-NOBMI-NEXT: retl 3444; 3445; X86-BMI1-LABEL: bzhi32_d3_load_indexzext: 3446; X86-BMI1: # %bb.0: 3447; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax 3448; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 3449; X86-BMI1-NEXT: shll $8, %ecx 3450; X86-BMI1-NEXT: bextrl %ecx, (%eax), %eax 3451; X86-BMI1-NEXT: retl 3452; 3453; X86-BMI2-LABEL: bzhi32_d3_load_indexzext: 3454; X86-BMI2: # %bb.0: 3455; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 3456; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 3457; X86-BMI2-NEXT: bzhil %ecx, (%eax), %eax 3458; X86-BMI2-NEXT: retl 3459; 3460; X64-NOBMI-LABEL: bzhi32_d3_load_indexzext: 3461; X64-NOBMI: # %bb.0: 3462; X64-NOBMI-NEXT: movl %esi, %ecx 3463; X64-NOBMI-NEXT: movl (%rdi), %eax 3464; X64-NOBMI-NEXT: negb %cl 3465; X64-NOBMI-NEXT: shll %cl, %eax 3466; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 3467; X64-NOBMI-NEXT: shrl %cl, %eax 3468; X64-NOBMI-NEXT: retq 3469; 3470; X64-BMI1-LABEL: bzhi32_d3_load_indexzext: 3471; X64-BMI1: # %bb.0: 3472; X64-BMI1-NEXT: shll $8, %esi 3473; X64-BMI1-NEXT: bextrl %esi, (%rdi), %eax 3474; X64-BMI1-NEXT: retq 3475; 3476; X64-BMI2-LABEL: bzhi32_d3_load_indexzext: 3477; X64-BMI2: # %bb.0: 3478; X64-BMI2-NEXT: bzhil %esi, (%rdi), %eax 3479; X64-BMI2-NEXT: retq 3480 %val = load i32, ptr %w 3481 %numhighbits = sub i8 32, %numlowbits 3482 %sh_prom = zext i8 %numhighbits to i32 3483 %highbitscleared = shl i32 %val, %sh_prom 3484 %masked = lshr i32 %highbitscleared, %sh_prom 3485 ret i32 %masked 3486} 3487 3488; 64-bit. 3489 3490define i64 @bzhi64_d0(i64 %val, i64 %numlowbits) nounwind { 3491; X86-NOBMI-LABEL: bzhi64_d0: 3492; X86-NOBMI: # %bb.0: 3493; X86-NOBMI-NEXT: pushl %ebx 3494; X86-NOBMI-NEXT: pushl %edi 3495; X86-NOBMI-NEXT: pushl %esi 3496; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx 3497; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 3498; X86-NOBMI-NEXT: movb $64, %cl 3499; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 3500; X86-NOBMI-NEXT: movl %edx, %esi 3501; X86-NOBMI-NEXT: shll %cl, %esi 3502; X86-NOBMI-NEXT: shldl %cl, %edx, %eax 3503; X86-NOBMI-NEXT: testb $32, %cl 3504; X86-NOBMI-NEXT: movl %esi, %edi 3505; X86-NOBMI-NEXT: jne .LBB48_2 3506; X86-NOBMI-NEXT: # %bb.1: 3507; X86-NOBMI-NEXT: movl %eax, %edi 3508; X86-NOBMI-NEXT: .LBB48_2: 3509; X86-NOBMI-NEXT: movl %edi, %eax 3510; X86-NOBMI-NEXT: shrl %cl, %eax 3511; X86-NOBMI-NEXT: xorl %ebx, %ebx 3512; X86-NOBMI-NEXT: testb $32, %cl 3513; X86-NOBMI-NEXT: movl $0, %edx 3514; X86-NOBMI-NEXT: jne .LBB48_4 3515; X86-NOBMI-NEXT: # %bb.3: 3516; X86-NOBMI-NEXT: movl %esi, %ebx 3517; X86-NOBMI-NEXT: movl %eax, %edx 3518; X86-NOBMI-NEXT: .LBB48_4: 3519; X86-NOBMI-NEXT: shrdl %cl, %edi, %ebx 3520; X86-NOBMI-NEXT: testb $32, %cl 3521; X86-NOBMI-NEXT: jne .LBB48_6 3522; X86-NOBMI-NEXT: # %bb.5: 3523; X86-NOBMI-NEXT: movl %ebx, %eax 3524; X86-NOBMI-NEXT: .LBB48_6: 3525; X86-NOBMI-NEXT: popl %esi 3526; X86-NOBMI-NEXT: popl %edi 3527; X86-NOBMI-NEXT: popl %ebx 3528; X86-NOBMI-NEXT: retl 3529; 3530; X86-BMI1-LABEL: bzhi64_d0: 3531; X86-BMI1: # %bb.0: 3532; X86-BMI1-NEXT: pushl %ebx 3533; X86-BMI1-NEXT: pushl %edi 3534; X86-BMI1-NEXT: pushl %esi 3535; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx 3536; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax 3537; X86-BMI1-NEXT: movb $64, %cl 3538; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl 3539; X86-BMI1-NEXT: movl %edx, %esi 3540; X86-BMI1-NEXT: shll %cl, %esi 3541; X86-BMI1-NEXT: shldl %cl, %edx, %eax 3542; X86-BMI1-NEXT: testb $32, %cl 3543; X86-BMI1-NEXT: movl %esi, %edi 3544; X86-BMI1-NEXT: jne .LBB48_2 3545; X86-BMI1-NEXT: # %bb.1: 3546; X86-BMI1-NEXT: movl %eax, %edi 3547; X86-BMI1-NEXT: .LBB48_2: 3548; X86-BMI1-NEXT: movl %edi, %eax 3549; X86-BMI1-NEXT: shrl %cl, %eax 3550; X86-BMI1-NEXT: xorl %ebx, %ebx 3551; X86-BMI1-NEXT: testb $32, %cl 3552; X86-BMI1-NEXT: movl $0, %edx 3553; X86-BMI1-NEXT: jne .LBB48_4 3554; X86-BMI1-NEXT: # %bb.3: 3555; X86-BMI1-NEXT: movl %esi, %ebx 3556; X86-BMI1-NEXT: movl %eax, %edx 3557; X86-BMI1-NEXT: .LBB48_4: 3558; X86-BMI1-NEXT: shrdl %cl, %edi, %ebx 3559; X86-BMI1-NEXT: testb $32, %cl 3560; X86-BMI1-NEXT: jne .LBB48_6 3561; X86-BMI1-NEXT: # %bb.5: 3562; X86-BMI1-NEXT: movl %ebx, %eax 3563; X86-BMI1-NEXT: .LBB48_6: 3564; X86-BMI1-NEXT: popl %esi 3565; X86-BMI1-NEXT: popl %edi 3566; X86-BMI1-NEXT: popl %ebx 3567; X86-BMI1-NEXT: retl 3568; 3569; X86-BMI2-LABEL: bzhi64_d0: 3570; X86-BMI2: # %bb.0: 3571; X86-BMI2-NEXT: pushl %edi 3572; X86-BMI2-NEXT: pushl %esi 3573; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 3574; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi 3575; X86-BMI2-NEXT: movb $64, %cl 3576; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl 3577; X86-BMI2-NEXT: shldl %cl, %eax, %esi 3578; X86-BMI2-NEXT: shlxl %ecx, %eax, %edi 3579; X86-BMI2-NEXT: xorl %edx, %edx 3580; X86-BMI2-NEXT: testb $32, %cl 3581; X86-BMI2-NEXT: je .LBB48_2 3582; X86-BMI2-NEXT: # %bb.1: 3583; X86-BMI2-NEXT: movl %edi, %esi 3584; X86-BMI2-NEXT: movl $0, %edi 3585; X86-BMI2-NEXT: .LBB48_2: 3586; X86-BMI2-NEXT: shrxl %ecx, %esi, %eax 3587; X86-BMI2-NEXT: jne .LBB48_4 3588; X86-BMI2-NEXT: # %bb.3: 3589; X86-BMI2-NEXT: movl %eax, %edx 3590; X86-BMI2-NEXT: .LBB48_4: 3591; X86-BMI2-NEXT: shrdl %cl, %esi, %edi 3592; X86-BMI2-NEXT: testb $32, %cl 3593; X86-BMI2-NEXT: jne .LBB48_6 3594; X86-BMI2-NEXT: # %bb.5: 3595; X86-BMI2-NEXT: movl %edi, %eax 3596; X86-BMI2-NEXT: .LBB48_6: 3597; X86-BMI2-NEXT: popl %esi 3598; X86-BMI2-NEXT: popl %edi 3599; X86-BMI2-NEXT: retl 3600; 3601; X64-NOBMI-LABEL: bzhi64_d0: 3602; X64-NOBMI: # %bb.0: 3603; X64-NOBMI-NEXT: movq %rsi, %rcx 3604; X64-NOBMI-NEXT: movq %rdi, %rax 3605; X64-NOBMI-NEXT: negb %cl 3606; X64-NOBMI-NEXT: shlq %cl, %rax 3607; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx 3608; X64-NOBMI-NEXT: shrq %cl, %rax 3609; X64-NOBMI-NEXT: retq 3610; 3611; X64-BMI1-LABEL: bzhi64_d0: 3612; X64-BMI1: # %bb.0: 3613; X64-BMI1-NEXT: shll $8, %esi 3614; X64-BMI1-NEXT: bextrq %rsi, %rdi, %rax 3615; X64-BMI1-NEXT: retq 3616; 3617; X64-BMI2-LABEL: bzhi64_d0: 3618; X64-BMI2: # %bb.0: 3619; X64-BMI2-NEXT: bzhiq %rsi, %rdi, %rax 3620; X64-BMI2-NEXT: retq 3621 %numhighbits = sub i64 64, %numlowbits 3622 %highbitscleared = shl i64 %val, %numhighbits 3623 %masked = lshr i64 %highbitscleared, %numhighbits 3624 ret i64 %masked 3625} 3626 3627define i64 @bzhi64_d1_indexzext(i64 %val, i8 %numlowbits) nounwind { 3628; X86-NOBMI-LABEL: bzhi64_d1_indexzext: 3629; X86-NOBMI: # %bb.0: 3630; X86-NOBMI-NEXT: pushl %ebx 3631; X86-NOBMI-NEXT: pushl %edi 3632; X86-NOBMI-NEXT: pushl %esi 3633; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx 3634; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 3635; X86-NOBMI-NEXT: movb $64, %cl 3636; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 3637; X86-NOBMI-NEXT: movl %edx, %esi 3638; X86-NOBMI-NEXT: shll %cl, %esi 3639; X86-NOBMI-NEXT: shldl %cl, %edx, %eax 3640; X86-NOBMI-NEXT: testb $32, %cl 3641; X86-NOBMI-NEXT: movl %esi, %edi 3642; X86-NOBMI-NEXT: jne .LBB49_2 3643; X86-NOBMI-NEXT: # %bb.1: 3644; X86-NOBMI-NEXT: movl %eax, %edi 3645; X86-NOBMI-NEXT: .LBB49_2: 3646; X86-NOBMI-NEXT: movl %edi, %eax 3647; X86-NOBMI-NEXT: shrl %cl, %eax 3648; X86-NOBMI-NEXT: xorl %ebx, %ebx 3649; X86-NOBMI-NEXT: testb $32, %cl 3650; X86-NOBMI-NEXT: movl $0, %edx 3651; X86-NOBMI-NEXT: jne .LBB49_4 3652; X86-NOBMI-NEXT: # %bb.3: 3653; X86-NOBMI-NEXT: movl %esi, %ebx 3654; X86-NOBMI-NEXT: movl %eax, %edx 3655; X86-NOBMI-NEXT: .LBB49_4: 3656; X86-NOBMI-NEXT: shrdl %cl, %edi, %ebx 3657; X86-NOBMI-NEXT: testb $32, %cl 3658; X86-NOBMI-NEXT: jne .LBB49_6 3659; X86-NOBMI-NEXT: # %bb.5: 3660; X86-NOBMI-NEXT: movl %ebx, %eax 3661; X86-NOBMI-NEXT: .LBB49_6: 3662; X86-NOBMI-NEXT: popl %esi 3663; X86-NOBMI-NEXT: popl %edi 3664; X86-NOBMI-NEXT: popl %ebx 3665; X86-NOBMI-NEXT: retl 3666; 3667; X86-BMI1-LABEL: bzhi64_d1_indexzext: 3668; X86-BMI1: # %bb.0: 3669; X86-BMI1-NEXT: pushl %ebx 3670; X86-BMI1-NEXT: pushl %edi 3671; X86-BMI1-NEXT: pushl %esi 3672; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx 3673; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax 3674; X86-BMI1-NEXT: movb $64, %cl 3675; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl 3676; X86-BMI1-NEXT: movl %edx, %esi 3677; X86-BMI1-NEXT: shll %cl, %esi 3678; X86-BMI1-NEXT: shldl %cl, %edx, %eax 3679; X86-BMI1-NEXT: testb $32, %cl 3680; X86-BMI1-NEXT: movl %esi, %edi 3681; X86-BMI1-NEXT: jne .LBB49_2 3682; X86-BMI1-NEXT: # %bb.1: 3683; X86-BMI1-NEXT: movl %eax, %edi 3684; X86-BMI1-NEXT: .LBB49_2: 3685; X86-BMI1-NEXT: movl %edi, %eax 3686; X86-BMI1-NEXT: shrl %cl, %eax 3687; X86-BMI1-NEXT: xorl %ebx, %ebx 3688; X86-BMI1-NEXT: testb $32, %cl 3689; X86-BMI1-NEXT: movl $0, %edx 3690; X86-BMI1-NEXT: jne .LBB49_4 3691; X86-BMI1-NEXT: # %bb.3: 3692; X86-BMI1-NEXT: movl %esi, %ebx 3693; X86-BMI1-NEXT: movl %eax, %edx 3694; X86-BMI1-NEXT: .LBB49_4: 3695; X86-BMI1-NEXT: shrdl %cl, %edi, %ebx 3696; X86-BMI1-NEXT: testb $32, %cl 3697; X86-BMI1-NEXT: jne .LBB49_6 3698; X86-BMI1-NEXT: # %bb.5: 3699; X86-BMI1-NEXT: movl %ebx, %eax 3700; X86-BMI1-NEXT: .LBB49_6: 3701; X86-BMI1-NEXT: popl %esi 3702; X86-BMI1-NEXT: popl %edi 3703; X86-BMI1-NEXT: popl %ebx 3704; X86-BMI1-NEXT: retl 3705; 3706; X86-BMI2-LABEL: bzhi64_d1_indexzext: 3707; X86-BMI2: # %bb.0: 3708; X86-BMI2-NEXT: pushl %edi 3709; X86-BMI2-NEXT: pushl %esi 3710; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 3711; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi 3712; X86-BMI2-NEXT: movb $64, %cl 3713; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl 3714; X86-BMI2-NEXT: shldl %cl, %eax, %esi 3715; X86-BMI2-NEXT: shlxl %ecx, %eax, %edi 3716; X86-BMI2-NEXT: xorl %edx, %edx 3717; X86-BMI2-NEXT: testb $32, %cl 3718; X86-BMI2-NEXT: je .LBB49_2 3719; X86-BMI2-NEXT: # %bb.1: 3720; X86-BMI2-NEXT: movl %edi, %esi 3721; X86-BMI2-NEXT: movl $0, %edi 3722; X86-BMI2-NEXT: .LBB49_2: 3723; X86-BMI2-NEXT: shrxl %ecx, %esi, %eax 3724; X86-BMI2-NEXT: jne .LBB49_4 3725; X86-BMI2-NEXT: # %bb.3: 3726; X86-BMI2-NEXT: movl %eax, %edx 3727; X86-BMI2-NEXT: .LBB49_4: 3728; X86-BMI2-NEXT: shrdl %cl, %esi, %edi 3729; X86-BMI2-NEXT: testb $32, %cl 3730; X86-BMI2-NEXT: jne .LBB49_6 3731; X86-BMI2-NEXT: # %bb.5: 3732; X86-BMI2-NEXT: movl %edi, %eax 3733; X86-BMI2-NEXT: .LBB49_6: 3734; X86-BMI2-NEXT: popl %esi 3735; X86-BMI2-NEXT: popl %edi 3736; X86-BMI2-NEXT: retl 3737; 3738; X64-NOBMI-LABEL: bzhi64_d1_indexzext: 3739; X64-NOBMI: # %bb.0: 3740; X64-NOBMI-NEXT: movl %esi, %ecx 3741; X64-NOBMI-NEXT: movq %rdi, %rax 3742; X64-NOBMI-NEXT: negb %cl 3743; X64-NOBMI-NEXT: shlq %cl, %rax 3744; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 3745; X64-NOBMI-NEXT: shrq %cl, %rax 3746; X64-NOBMI-NEXT: retq 3747; 3748; X64-BMI1-LABEL: bzhi64_d1_indexzext: 3749; X64-BMI1: # %bb.0: 3750; X64-BMI1-NEXT: # kill: def $esi killed $esi def $rsi 3751; X64-BMI1-NEXT: shll $8, %esi 3752; X64-BMI1-NEXT: bextrq %rsi, %rdi, %rax 3753; X64-BMI1-NEXT: retq 3754; 3755; X64-BMI2-LABEL: bzhi64_d1_indexzext: 3756; X64-BMI2: # %bb.0: 3757; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi 3758; X64-BMI2-NEXT: bzhiq %rsi, %rdi, %rax 3759; X64-BMI2-NEXT: retq 3760 %numhighbits = sub i8 64, %numlowbits 3761 %sh_prom = zext i8 %numhighbits to i64 3762 %highbitscleared = shl i64 %val, %sh_prom 3763 %masked = lshr i64 %highbitscleared, %sh_prom 3764 ret i64 %masked 3765} 3766 3767define i64 @bzhi64_d2_load(ptr %w, i64 %numlowbits) nounwind { 3768; X86-NOBMI-LABEL: bzhi64_d2_load: 3769; X86-NOBMI: # %bb.0: 3770; X86-NOBMI-NEXT: pushl %ebx 3771; X86-NOBMI-NEXT: pushl %edi 3772; X86-NOBMI-NEXT: pushl %esi 3773; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 3774; X86-NOBMI-NEXT: movl (%eax), %edx 3775; X86-NOBMI-NEXT: movl 4(%eax), %eax 3776; X86-NOBMI-NEXT: movb $64, %cl 3777; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 3778; X86-NOBMI-NEXT: movl %edx, %esi 3779; X86-NOBMI-NEXT: shll %cl, %esi 3780; X86-NOBMI-NEXT: shldl %cl, %edx, %eax 3781; X86-NOBMI-NEXT: testb $32, %cl 3782; X86-NOBMI-NEXT: movl %esi, %edi 3783; X86-NOBMI-NEXT: jne .LBB50_2 3784; X86-NOBMI-NEXT: # %bb.1: 3785; X86-NOBMI-NEXT: movl %eax, %edi 3786; X86-NOBMI-NEXT: .LBB50_2: 3787; X86-NOBMI-NEXT: movl %edi, %eax 3788; X86-NOBMI-NEXT: shrl %cl, %eax 3789; X86-NOBMI-NEXT: xorl %ebx, %ebx 3790; X86-NOBMI-NEXT: testb $32, %cl 3791; X86-NOBMI-NEXT: movl $0, %edx 3792; X86-NOBMI-NEXT: jne .LBB50_4 3793; X86-NOBMI-NEXT: # %bb.3: 3794; X86-NOBMI-NEXT: movl %esi, %ebx 3795; X86-NOBMI-NEXT: movl %eax, %edx 3796; X86-NOBMI-NEXT: .LBB50_4: 3797; X86-NOBMI-NEXT: shrdl %cl, %edi, %ebx 3798; X86-NOBMI-NEXT: testb $32, %cl 3799; X86-NOBMI-NEXT: jne .LBB50_6 3800; X86-NOBMI-NEXT: # %bb.5: 3801; X86-NOBMI-NEXT: movl %ebx, %eax 3802; X86-NOBMI-NEXT: .LBB50_6: 3803; X86-NOBMI-NEXT: popl %esi 3804; X86-NOBMI-NEXT: popl %edi 3805; X86-NOBMI-NEXT: popl %ebx 3806; X86-NOBMI-NEXT: retl 3807; 3808; X86-BMI1-LABEL: bzhi64_d2_load: 3809; X86-BMI1: # %bb.0: 3810; X86-BMI1-NEXT: pushl %ebx 3811; X86-BMI1-NEXT: pushl %edi 3812; X86-BMI1-NEXT: pushl %esi 3813; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax 3814; X86-BMI1-NEXT: movl (%eax), %edx 3815; X86-BMI1-NEXT: movl 4(%eax), %eax 3816; X86-BMI1-NEXT: movb $64, %cl 3817; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl 3818; X86-BMI1-NEXT: movl %edx, %esi 3819; X86-BMI1-NEXT: shll %cl, %esi 3820; X86-BMI1-NEXT: shldl %cl, %edx, %eax 3821; X86-BMI1-NEXT: testb $32, %cl 3822; X86-BMI1-NEXT: movl %esi, %edi 3823; X86-BMI1-NEXT: jne .LBB50_2 3824; X86-BMI1-NEXT: # %bb.1: 3825; X86-BMI1-NEXT: movl %eax, %edi 3826; X86-BMI1-NEXT: .LBB50_2: 3827; X86-BMI1-NEXT: movl %edi, %eax 3828; X86-BMI1-NEXT: shrl %cl, %eax 3829; X86-BMI1-NEXT: xorl %ebx, %ebx 3830; X86-BMI1-NEXT: testb $32, %cl 3831; X86-BMI1-NEXT: movl $0, %edx 3832; X86-BMI1-NEXT: jne .LBB50_4 3833; X86-BMI1-NEXT: # %bb.3: 3834; X86-BMI1-NEXT: movl %esi, %ebx 3835; X86-BMI1-NEXT: movl %eax, %edx 3836; X86-BMI1-NEXT: .LBB50_4: 3837; X86-BMI1-NEXT: shrdl %cl, %edi, %ebx 3838; X86-BMI1-NEXT: testb $32, %cl 3839; X86-BMI1-NEXT: jne .LBB50_6 3840; X86-BMI1-NEXT: # %bb.5: 3841; X86-BMI1-NEXT: movl %ebx, %eax 3842; X86-BMI1-NEXT: .LBB50_6: 3843; X86-BMI1-NEXT: popl %esi 3844; X86-BMI1-NEXT: popl %edi 3845; X86-BMI1-NEXT: popl %ebx 3846; X86-BMI1-NEXT: retl 3847; 3848; X86-BMI2-LABEL: bzhi64_d2_load: 3849; X86-BMI2: # %bb.0: 3850; X86-BMI2-NEXT: pushl %edi 3851; X86-BMI2-NEXT: pushl %esi 3852; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 3853; X86-BMI2-NEXT: movl (%eax), %edx 3854; X86-BMI2-NEXT: movl 4(%eax), %esi 3855; X86-BMI2-NEXT: movb $64, %cl 3856; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl 3857; X86-BMI2-NEXT: shldl %cl, %edx, %esi 3858; X86-BMI2-NEXT: shlxl %ecx, %edx, %edi 3859; X86-BMI2-NEXT: xorl %edx, %edx 3860; X86-BMI2-NEXT: testb $32, %cl 3861; X86-BMI2-NEXT: je .LBB50_2 3862; X86-BMI2-NEXT: # %bb.1: 3863; X86-BMI2-NEXT: movl %edi, %esi 3864; X86-BMI2-NEXT: movl $0, %edi 3865; X86-BMI2-NEXT: .LBB50_2: 3866; X86-BMI2-NEXT: shrxl %ecx, %esi, %eax 3867; X86-BMI2-NEXT: jne .LBB50_4 3868; X86-BMI2-NEXT: # %bb.3: 3869; X86-BMI2-NEXT: movl %eax, %edx 3870; X86-BMI2-NEXT: .LBB50_4: 3871; X86-BMI2-NEXT: shrdl %cl, %esi, %edi 3872; X86-BMI2-NEXT: testb $32, %cl 3873; X86-BMI2-NEXT: jne .LBB50_6 3874; X86-BMI2-NEXT: # %bb.5: 3875; X86-BMI2-NEXT: movl %edi, %eax 3876; X86-BMI2-NEXT: .LBB50_6: 3877; X86-BMI2-NEXT: popl %esi 3878; X86-BMI2-NEXT: popl %edi 3879; X86-BMI2-NEXT: retl 3880; 3881; X64-NOBMI-LABEL: bzhi64_d2_load: 3882; X64-NOBMI: # %bb.0: 3883; X64-NOBMI-NEXT: movq %rsi, %rcx 3884; X64-NOBMI-NEXT: movq (%rdi), %rax 3885; X64-NOBMI-NEXT: negb %cl 3886; X64-NOBMI-NEXT: shlq %cl, %rax 3887; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx 3888; X64-NOBMI-NEXT: shrq %cl, %rax 3889; X64-NOBMI-NEXT: retq 3890; 3891; X64-BMI1-LABEL: bzhi64_d2_load: 3892; X64-BMI1: # %bb.0: 3893; X64-BMI1-NEXT: shll $8, %esi 3894; X64-BMI1-NEXT: bextrq %rsi, (%rdi), %rax 3895; X64-BMI1-NEXT: retq 3896; 3897; X64-BMI2-LABEL: bzhi64_d2_load: 3898; X64-BMI2: # %bb.0: 3899; X64-BMI2-NEXT: bzhiq %rsi, (%rdi), %rax 3900; X64-BMI2-NEXT: retq 3901 %val = load i64, ptr %w 3902 %numhighbits = sub i64 64, %numlowbits 3903 %highbitscleared = shl i64 %val, %numhighbits 3904 %masked = lshr i64 %highbitscleared, %numhighbits 3905 ret i64 %masked 3906} 3907 3908define i64 @bzhi64_d3_load_indexzext(ptr %w, i8 %numlowbits) nounwind { 3909; X86-NOBMI-LABEL: bzhi64_d3_load_indexzext: 3910; X86-NOBMI: # %bb.0: 3911; X86-NOBMI-NEXT: pushl %ebx 3912; X86-NOBMI-NEXT: pushl %edi 3913; X86-NOBMI-NEXT: pushl %esi 3914; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 3915; X86-NOBMI-NEXT: movl (%eax), %edx 3916; X86-NOBMI-NEXT: movl 4(%eax), %eax 3917; X86-NOBMI-NEXT: movb $64, %cl 3918; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 3919; X86-NOBMI-NEXT: movl %edx, %esi 3920; X86-NOBMI-NEXT: shll %cl, %esi 3921; X86-NOBMI-NEXT: shldl %cl, %edx, %eax 3922; X86-NOBMI-NEXT: testb $32, %cl 3923; X86-NOBMI-NEXT: movl %esi, %edi 3924; X86-NOBMI-NEXT: jne .LBB51_2 3925; X86-NOBMI-NEXT: # %bb.1: 3926; X86-NOBMI-NEXT: movl %eax, %edi 3927; X86-NOBMI-NEXT: .LBB51_2: 3928; X86-NOBMI-NEXT: movl %edi, %eax 3929; X86-NOBMI-NEXT: shrl %cl, %eax 3930; X86-NOBMI-NEXT: xorl %ebx, %ebx 3931; X86-NOBMI-NEXT: testb $32, %cl 3932; X86-NOBMI-NEXT: movl $0, %edx 3933; X86-NOBMI-NEXT: jne .LBB51_4 3934; X86-NOBMI-NEXT: # %bb.3: 3935; X86-NOBMI-NEXT: movl %esi, %ebx 3936; X86-NOBMI-NEXT: movl %eax, %edx 3937; X86-NOBMI-NEXT: .LBB51_4: 3938; X86-NOBMI-NEXT: shrdl %cl, %edi, %ebx 3939; X86-NOBMI-NEXT: testb $32, %cl 3940; X86-NOBMI-NEXT: jne .LBB51_6 3941; X86-NOBMI-NEXT: # %bb.5: 3942; X86-NOBMI-NEXT: movl %ebx, %eax 3943; X86-NOBMI-NEXT: .LBB51_6: 3944; X86-NOBMI-NEXT: popl %esi 3945; X86-NOBMI-NEXT: popl %edi 3946; X86-NOBMI-NEXT: popl %ebx 3947; X86-NOBMI-NEXT: retl 3948; 3949; X86-BMI1-LABEL: bzhi64_d3_load_indexzext: 3950; X86-BMI1: # %bb.0: 3951; X86-BMI1-NEXT: pushl %ebx 3952; X86-BMI1-NEXT: pushl %edi 3953; X86-BMI1-NEXT: pushl %esi 3954; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax 3955; X86-BMI1-NEXT: movl (%eax), %edx 3956; X86-BMI1-NEXT: movl 4(%eax), %eax 3957; X86-BMI1-NEXT: movb $64, %cl 3958; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl 3959; X86-BMI1-NEXT: movl %edx, %esi 3960; X86-BMI1-NEXT: shll %cl, %esi 3961; X86-BMI1-NEXT: shldl %cl, %edx, %eax 3962; X86-BMI1-NEXT: testb $32, %cl 3963; X86-BMI1-NEXT: movl %esi, %edi 3964; X86-BMI1-NEXT: jne .LBB51_2 3965; X86-BMI1-NEXT: # %bb.1: 3966; X86-BMI1-NEXT: movl %eax, %edi 3967; X86-BMI1-NEXT: .LBB51_2: 3968; X86-BMI1-NEXT: movl %edi, %eax 3969; X86-BMI1-NEXT: shrl %cl, %eax 3970; X86-BMI1-NEXT: xorl %ebx, %ebx 3971; X86-BMI1-NEXT: testb $32, %cl 3972; X86-BMI1-NEXT: movl $0, %edx 3973; X86-BMI1-NEXT: jne .LBB51_4 3974; X86-BMI1-NEXT: # %bb.3: 3975; X86-BMI1-NEXT: movl %esi, %ebx 3976; X86-BMI1-NEXT: movl %eax, %edx 3977; X86-BMI1-NEXT: .LBB51_4: 3978; X86-BMI1-NEXT: shrdl %cl, %edi, %ebx 3979; X86-BMI1-NEXT: testb $32, %cl 3980; X86-BMI1-NEXT: jne .LBB51_6 3981; X86-BMI1-NEXT: # %bb.5: 3982; X86-BMI1-NEXT: movl %ebx, %eax 3983; X86-BMI1-NEXT: .LBB51_6: 3984; X86-BMI1-NEXT: popl %esi 3985; X86-BMI1-NEXT: popl %edi 3986; X86-BMI1-NEXT: popl %ebx 3987; X86-BMI1-NEXT: retl 3988; 3989; X86-BMI2-LABEL: bzhi64_d3_load_indexzext: 3990; X86-BMI2: # %bb.0: 3991; X86-BMI2-NEXT: pushl %edi 3992; X86-BMI2-NEXT: pushl %esi 3993; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 3994; X86-BMI2-NEXT: movl (%eax), %edx 3995; X86-BMI2-NEXT: movl 4(%eax), %esi 3996; X86-BMI2-NEXT: movb $64, %cl 3997; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl 3998; X86-BMI2-NEXT: shldl %cl, %edx, %esi 3999; X86-BMI2-NEXT: shlxl %ecx, %edx, %edi 4000; X86-BMI2-NEXT: xorl %edx, %edx 4001; X86-BMI2-NEXT: testb $32, %cl 4002; X86-BMI2-NEXT: je .LBB51_2 4003; X86-BMI2-NEXT: # %bb.1: 4004; X86-BMI2-NEXT: movl %edi, %esi 4005; X86-BMI2-NEXT: movl $0, %edi 4006; X86-BMI2-NEXT: .LBB51_2: 4007; X86-BMI2-NEXT: shrxl %ecx, %esi, %eax 4008; X86-BMI2-NEXT: jne .LBB51_4 4009; X86-BMI2-NEXT: # %bb.3: 4010; X86-BMI2-NEXT: movl %eax, %edx 4011; X86-BMI2-NEXT: .LBB51_4: 4012; X86-BMI2-NEXT: shrdl %cl, %esi, %edi 4013; X86-BMI2-NEXT: testb $32, %cl 4014; X86-BMI2-NEXT: jne .LBB51_6 4015; X86-BMI2-NEXT: # %bb.5: 4016; X86-BMI2-NEXT: movl %edi, %eax 4017; X86-BMI2-NEXT: .LBB51_6: 4018; X86-BMI2-NEXT: popl %esi 4019; X86-BMI2-NEXT: popl %edi 4020; X86-BMI2-NEXT: retl 4021; 4022; X64-NOBMI-LABEL: bzhi64_d3_load_indexzext: 4023; X64-NOBMI: # %bb.0: 4024; X64-NOBMI-NEXT: movl %esi, %ecx 4025; X64-NOBMI-NEXT: movq (%rdi), %rax 4026; X64-NOBMI-NEXT: negb %cl 4027; X64-NOBMI-NEXT: shlq %cl, %rax 4028; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 4029; X64-NOBMI-NEXT: shrq %cl, %rax 4030; X64-NOBMI-NEXT: retq 4031; 4032; X64-BMI1-LABEL: bzhi64_d3_load_indexzext: 4033; X64-BMI1: # %bb.0: 4034; X64-BMI1-NEXT: # kill: def $esi killed $esi def $rsi 4035; X64-BMI1-NEXT: shll $8, %esi 4036; X64-BMI1-NEXT: bextrq %rsi, (%rdi), %rax 4037; X64-BMI1-NEXT: retq 4038; 4039; X64-BMI2-LABEL: bzhi64_d3_load_indexzext: 4040; X64-BMI2: # %bb.0: 4041; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi 4042; X64-BMI2-NEXT: bzhiq %rsi, (%rdi), %rax 4043; X64-BMI2-NEXT: retq 4044 %val = load i64, ptr %w 4045 %numhighbits = sub i8 64, %numlowbits 4046 %sh_prom = zext i8 %numhighbits to i64 4047 %highbitscleared = shl i64 %val, %sh_prom 4048 %masked = lshr i64 %highbitscleared, %sh_prom 4049 ret i64 %masked 4050} 4051 4052; 64-bit, but with 32-bit output 4053 4054; Everything done in 64-bit, truncation happens last. 4055define i32 @bzhi64_32_d0(i64 %val, i64 %numlowbits) nounwind { 4056; X86-NOBMI-LABEL: bzhi64_32_d0: 4057; X86-NOBMI: # %bb.0: 4058; X86-NOBMI-NEXT: pushl %esi 4059; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi 4060; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 4061; X86-NOBMI-NEXT: movb $64, %cl 4062; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 4063; X86-NOBMI-NEXT: movl %esi, %edx 4064; X86-NOBMI-NEXT: shll %cl, %edx 4065; X86-NOBMI-NEXT: shldl %cl, %esi, %eax 4066; X86-NOBMI-NEXT: testb $32, %cl 4067; X86-NOBMI-NEXT: je .LBB52_2 4068; X86-NOBMI-NEXT: # %bb.1: 4069; X86-NOBMI-NEXT: movl %edx, %eax 4070; X86-NOBMI-NEXT: xorl %edx, %edx 4071; X86-NOBMI-NEXT: .LBB52_2: 4072; X86-NOBMI-NEXT: shrdl %cl, %eax, %edx 4073; X86-NOBMI-NEXT: shrl %cl, %eax 4074; X86-NOBMI-NEXT: testb $32, %cl 4075; X86-NOBMI-NEXT: jne .LBB52_4 4076; X86-NOBMI-NEXT: # %bb.3: 4077; X86-NOBMI-NEXT: movl %edx, %eax 4078; X86-NOBMI-NEXT: .LBB52_4: 4079; X86-NOBMI-NEXT: popl %esi 4080; X86-NOBMI-NEXT: retl 4081; 4082; X86-BMI1-LABEL: bzhi64_32_d0: 4083; X86-BMI1: # %bb.0: 4084; X86-BMI1-NEXT: pushl %esi 4085; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi 4086; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax 4087; X86-BMI1-NEXT: movb $64, %cl 4088; X86-BMI1-NEXT: subb {{[0-9]+}}(%esp), %cl 4089; X86-BMI1-NEXT: movl %esi, %edx 4090; X86-BMI1-NEXT: shll %cl, %edx 4091; X86-BMI1-NEXT: shldl %cl, %esi, %eax 4092; X86-BMI1-NEXT: testb $32, %cl 4093; X86-BMI1-NEXT: je .LBB52_2 4094; X86-BMI1-NEXT: # %bb.1: 4095; X86-BMI1-NEXT: movl %edx, %eax 4096; X86-BMI1-NEXT: xorl %edx, %edx 4097; X86-BMI1-NEXT: .LBB52_2: 4098; X86-BMI1-NEXT: shrdl %cl, %eax, %edx 4099; X86-BMI1-NEXT: shrl %cl, %eax 4100; X86-BMI1-NEXT: testb $32, %cl 4101; X86-BMI1-NEXT: jne .LBB52_4 4102; X86-BMI1-NEXT: # %bb.3: 4103; X86-BMI1-NEXT: movl %edx, %eax 4104; X86-BMI1-NEXT: .LBB52_4: 4105; X86-BMI1-NEXT: popl %esi 4106; X86-BMI1-NEXT: retl 4107; 4108; X86-BMI2-LABEL: bzhi64_32_d0: 4109; X86-BMI2: # %bb.0: 4110; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax 4111; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx 4112; X86-BMI2-NEXT: movb $64, %cl 4113; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl 4114; X86-BMI2-NEXT: shldl %cl, %eax, %edx 4115; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax 4116; X86-BMI2-NEXT: testb $32, %cl 4117; X86-BMI2-NEXT: je .LBB52_2 4118; X86-BMI2-NEXT: # %bb.1: 4119; X86-BMI2-NEXT: movl %eax, %edx 4120; X86-BMI2-NEXT: xorl %eax, %eax 4121; X86-BMI2-NEXT: .LBB52_2: 4122; X86-BMI2-NEXT: shrdl %cl, %edx, %eax 4123; X86-BMI2-NEXT: testb $32, %cl 4124; X86-BMI2-NEXT: je .LBB52_4 4125; X86-BMI2-NEXT: # %bb.3: 4126; X86-BMI2-NEXT: shrxl %ecx, %edx, %eax 4127; X86-BMI2-NEXT: .LBB52_4: 4128; X86-BMI2-NEXT: retl 4129; 4130; X64-NOBMI-LABEL: bzhi64_32_d0: 4131; X64-NOBMI: # %bb.0: 4132; X64-NOBMI-NEXT: movq %rsi, %rcx 4133; X64-NOBMI-NEXT: movq %rdi, %rax 4134; X64-NOBMI-NEXT: negb %cl 4135; X64-NOBMI-NEXT: shlq %cl, %rax 4136; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx 4137; X64-NOBMI-NEXT: shrq %cl, %rax 4138; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax 4139; X64-NOBMI-NEXT: retq 4140; 4141; X64-BMI1-LABEL: bzhi64_32_d0: 4142; X64-BMI1: # %bb.0: 4143; X64-BMI1-NEXT: shll $8, %esi 4144; X64-BMI1-NEXT: bextrq %rsi, %rdi, %rax 4145; X64-BMI1-NEXT: # kill: def $eax killed $eax killed $rax 4146; X64-BMI1-NEXT: retq 4147; 4148; X64-BMI2-LABEL: bzhi64_32_d0: 4149; X64-BMI2: # %bb.0: 4150; X64-BMI2-NEXT: bzhiq %rsi, %rdi, %rax 4151; X64-BMI2-NEXT: # kill: def $eax killed $eax killed $rax 4152; X64-BMI2-NEXT: retq 4153 %numhighbits = sub i64 64, %numlowbits 4154 %highbitscleared = shl i64 %val, %numhighbits 4155 %masked = lshr i64 %highbitscleared, %numhighbits 4156 %res = trunc i64 %masked to i32 4157 ret i32 %res 4158} 4159 4160; Shifting happens in 64-bit, then truncation. Masking is 32-bit. 4161define i32 @bzhi64_32_d1(i64 %val, i32 %numlowbits) nounwind { 4162; X86-NOBMI-LABEL: bzhi64_32_d1: 4163; X86-NOBMI: # %bb.0: 4164; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 4165; X86-NOBMI-NEXT: xorl %ecx, %ecx 4166; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl 4167; X86-NOBMI-NEXT: shll %cl, %eax 4168; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 4169; X86-NOBMI-NEXT: shrl %cl, %eax 4170; X86-NOBMI-NEXT: retl 4171; 4172; X86-BMI1-LABEL: bzhi64_32_d1: 4173; X86-BMI1: # %bb.0: 4174; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax 4175; X86-BMI1-NEXT: shll $8, %eax 4176; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax 4177; X86-BMI1-NEXT: retl 4178; 4179; X86-BMI2-LABEL: bzhi64_32_d1: 4180; X86-BMI2: # %bb.0: 4181; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax 4182; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax 4183; X86-BMI2-NEXT: retl 4184; 4185; X64-NOBMI-LABEL: bzhi64_32_d1: 4186; X64-NOBMI: # %bb.0: 4187; X64-NOBMI-NEXT: movl %esi, %ecx 4188; X64-NOBMI-NEXT: movq %rdi, %rax 4189; X64-NOBMI-NEXT: negb %cl 4190; X64-NOBMI-NEXT: shll %cl, %eax 4191; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx 4192; X64-NOBMI-NEXT: shrl %cl, %eax 4193; X64-NOBMI-NEXT: # kill: def $eax killed $eax killed $rax 4194; X64-NOBMI-NEXT: retq 4195; 4196; X64-BMI1-LABEL: bzhi64_32_d1: 4197; X64-BMI1: # %bb.0: 4198; X64-BMI1-NEXT: shll $8, %esi 4199; X64-BMI1-NEXT: bextrl %esi, %edi, %eax 4200; X64-BMI1-NEXT: retq 4201; 4202; X64-BMI2-LABEL: bzhi64_32_d1: 4203; X64-BMI2: # %bb.0: 4204; X64-BMI2-NEXT: bzhil %esi, %edi, %eax 4205; X64-BMI2-NEXT: retq 4206 %truncval = trunc i64 %val to i32 4207 %numhighbits = sub i32 32, %numlowbits 4208 %highbitscleared = shl i32 %truncval, %numhighbits 4209 %masked = lshr i32 %highbitscleared, %numhighbits 4210 ret i32 %masked 4211} 4212 4213; ---------------------------------------------------------------------------- ; 4214; Constant mask 4215; ---------------------------------------------------------------------------- ; 4216 4217; 32-bit 4218 4219define i32 @bzhi32_constant_mask32(i32 %val) nounwind { 4220; X86-LABEL: bzhi32_constant_mask32: 4221; X86: # %bb.0: 4222; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF 4223; X86-NEXT: andl {{[0-9]+}}(%esp), %eax 4224; X86-NEXT: retl 4225; 4226; X64-LABEL: bzhi32_constant_mask32: 4227; X64: # %bb.0: 4228; X64-NEXT: movl %edi, %eax 4229; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF 4230; X64-NEXT: retq 4231 %masked = and i32 %val, 2147483647 4232 ret i32 %masked 4233} 4234 4235define i32 @bzhi32_constant_mask32_load(ptr %val) nounwind { 4236; X86-LABEL: bzhi32_constant_mask32_load: 4237; X86: # %bb.0: 4238; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 4239; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF 4240; X86-NEXT: andl (%ecx), %eax 4241; X86-NEXT: retl 4242; 4243; X64-LABEL: bzhi32_constant_mask32_load: 4244; X64: # %bb.0: 4245; X64-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF 4246; X64-NEXT: andl (%rdi), %eax 4247; X64-NEXT: retq 4248 %val1 = load i32, ptr %val 4249 %masked = and i32 %val1, 2147483647 4250 ret i32 %masked 4251} 4252 4253define i32 @bzhi32_constant_mask16(i32 %val) nounwind { 4254; X86-LABEL: bzhi32_constant_mask16: 4255; X86: # %bb.0: 4256; X86-NEXT: movl $32767, %eax # imm = 0x7FFF 4257; X86-NEXT: andl {{[0-9]+}}(%esp), %eax 4258; X86-NEXT: retl 4259; 4260; X64-LABEL: bzhi32_constant_mask16: 4261; X64: # %bb.0: 4262; X64-NEXT: movl %edi, %eax 4263; X64-NEXT: andl $32767, %eax # imm = 0x7FFF 4264; X64-NEXT: retq 4265 %masked = and i32 %val, 32767 4266 ret i32 %masked 4267} 4268 4269define i32 @bzhi32_constant_mask16_load(ptr %val) nounwind { 4270; X86-LABEL: bzhi32_constant_mask16_load: 4271; X86: # %bb.0: 4272; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 4273; X86-NEXT: movl $32767, %eax # imm = 0x7FFF 4274; X86-NEXT: andl (%ecx), %eax 4275; X86-NEXT: retl 4276; 4277; X64-LABEL: bzhi32_constant_mask16_load: 4278; X64: # %bb.0: 4279; X64-NEXT: movl $32767, %eax # imm = 0x7FFF 4280; X64-NEXT: andl (%rdi), %eax 4281; X64-NEXT: retq 4282 %val1 = load i32, ptr %val 4283 %masked = and i32 %val1, 32767 4284 ret i32 %masked 4285} 4286 4287define i32 @bzhi32_constant_mask8(i32 %val) nounwind { 4288; X86-LABEL: bzhi32_constant_mask8: 4289; X86: # %bb.0: 4290; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 4291; X86-NEXT: andl $127, %eax 4292; X86-NEXT: retl 4293; 4294; X64-LABEL: bzhi32_constant_mask8: 4295; X64: # %bb.0: 4296; X64-NEXT: movl %edi, %eax 4297; X64-NEXT: andl $127, %eax 4298; X64-NEXT: retq 4299 %masked = and i32 %val, 127 4300 ret i32 %masked 4301} 4302 4303define i32 @bzhi32_constant_mask8_load(ptr %val) nounwind { 4304; X86-LABEL: bzhi32_constant_mask8_load: 4305; X86: # %bb.0: 4306; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 4307; X86-NEXT: movl (%eax), %eax 4308; X86-NEXT: andl $127, %eax 4309; X86-NEXT: retl 4310; 4311; X64-LABEL: bzhi32_constant_mask8_load: 4312; X64: # %bb.0: 4313; X64-NEXT: movl (%rdi), %eax 4314; X64-NEXT: andl $127, %eax 4315; X64-NEXT: retq 4316 %val1 = load i32, ptr %val 4317 %masked = and i32 %val1, 127 4318 ret i32 %masked 4319} 4320 4321; 64-bit 4322 4323define i64 @bzhi64_constant_mask64(i64 %val) nounwind { 4324; X86-LABEL: bzhi64_constant_mask64: 4325; X86: # %bb.0: 4326; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 4327; X86-NEXT: movl $1073741823, %edx # imm = 0x3FFFFFFF 4328; X86-NEXT: andl {{[0-9]+}}(%esp), %edx 4329; X86-NEXT: retl 4330; 4331; X64-NOBMI-LABEL: bzhi64_constant_mask64: 4332; X64-NOBMI: # %bb.0: 4333; X64-NOBMI-NEXT: movabsq $4611686018427387903, %rax # imm = 0x3FFFFFFFFFFFFFFF 4334; X64-NOBMI-NEXT: andq %rdi, %rax 4335; X64-NOBMI-NEXT: retq 4336; 4337; X64-BMI1NOTBM-LABEL: bzhi64_constant_mask64: 4338; X64-BMI1NOTBM: # %bb.0: 4339; X64-BMI1NOTBM-NEXT: movl $15872, %eax # imm = 0x3E00 4340; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdi, %rax 4341; X64-BMI1NOTBM-NEXT: retq 4342; 4343; X64-BMI1TBM-LABEL: bzhi64_constant_mask64: 4344; X64-BMI1TBM: # %bb.0: 4345; X64-BMI1TBM-NEXT: bextrq $15872, %rdi, %rax # imm = 0x3E00 4346; X64-BMI1TBM-NEXT: retq 4347; 4348; X64-BMI2TBM-LABEL: bzhi64_constant_mask64: 4349; X64-BMI2TBM: # %bb.0: 4350; X64-BMI2TBM-NEXT: bextrq $15872, %rdi, %rax # imm = 0x3E00 4351; X64-BMI2TBM-NEXT: retq 4352; 4353; X64-BMI2NOTBM-LABEL: bzhi64_constant_mask64: 4354; X64-BMI2NOTBM: # %bb.0: 4355; X64-BMI2NOTBM-NEXT: movb $62, %al 4356; X64-BMI2NOTBM-NEXT: bzhiq %rax, %rdi, %rax 4357; X64-BMI2NOTBM-NEXT: retq 4358 %masked = and i64 %val, 4611686018427387903 4359 ret i64 %masked 4360} 4361 4362define i64 @bzhi64_constant_mask64_load(ptr %val) nounwind { 4363; X86-LABEL: bzhi64_constant_mask64_load: 4364; X86: # %bb.0: 4365; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 4366; X86-NEXT: movl (%ecx), %eax 4367; X86-NEXT: movl $1073741823, %edx # imm = 0x3FFFFFFF 4368; X86-NEXT: andl 4(%ecx), %edx 4369; X86-NEXT: retl 4370; 4371; X64-NOBMI-LABEL: bzhi64_constant_mask64_load: 4372; X64-NOBMI: # %bb.0: 4373; X64-NOBMI-NEXT: movabsq $4611686018427387903, %rax # imm = 0x3FFFFFFFFFFFFFFF 4374; X64-NOBMI-NEXT: andq (%rdi), %rax 4375; X64-NOBMI-NEXT: retq 4376; 4377; X64-BMI1NOTBM-LABEL: bzhi64_constant_mask64_load: 4378; X64-BMI1NOTBM: # %bb.0: 4379; X64-BMI1NOTBM-NEXT: movl $15872, %eax # imm = 0x3E00 4380; X64-BMI1NOTBM-NEXT: bextrq %rax, (%rdi), %rax 4381; X64-BMI1NOTBM-NEXT: retq 4382; 4383; X64-BMI1TBM-LABEL: bzhi64_constant_mask64_load: 4384; X64-BMI1TBM: # %bb.0: 4385; X64-BMI1TBM-NEXT: bextrq $15872, (%rdi), %rax # imm = 0x3E00 4386; X64-BMI1TBM-NEXT: retq 4387; 4388; X64-BMI2TBM-LABEL: bzhi64_constant_mask64_load: 4389; X64-BMI2TBM: # %bb.0: 4390; X64-BMI2TBM-NEXT: bextrq $15872, (%rdi), %rax # imm = 0x3E00 4391; X64-BMI2TBM-NEXT: retq 4392; 4393; X64-BMI2NOTBM-LABEL: bzhi64_constant_mask64_load: 4394; X64-BMI2NOTBM: # %bb.0: 4395; X64-BMI2NOTBM-NEXT: movb $62, %al 4396; X64-BMI2NOTBM-NEXT: bzhiq %rax, (%rdi), %rax 4397; X64-BMI2NOTBM-NEXT: retq 4398 %val1 = load i64, ptr %val 4399 %masked = and i64 %val1, 4611686018427387903 4400 ret i64 %masked 4401} 4402 4403define i64 @bzhi64_constant_mask32(i64 %val) nounwind { 4404; X86-LABEL: bzhi64_constant_mask32: 4405; X86: # %bb.0: 4406; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF 4407; X86-NEXT: andl {{[0-9]+}}(%esp), %eax 4408; X86-NEXT: xorl %edx, %edx 4409; X86-NEXT: retl 4410; 4411; X64-LABEL: bzhi64_constant_mask32: 4412; X64: # %bb.0: 4413; X64-NEXT: movq %rdi, %rax 4414; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF 4415; X64-NEXT: retq 4416 %masked = and i64 %val, 2147483647 4417 ret i64 %masked 4418} 4419 4420define i64 @bzhi64_constant_mask32_load(ptr %val) nounwind { 4421; X86-LABEL: bzhi64_constant_mask32_load: 4422; X86: # %bb.0: 4423; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 4424; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF 4425; X86-NEXT: andl (%ecx), %eax 4426; X86-NEXT: xorl %edx, %edx 4427; X86-NEXT: retl 4428; 4429; X64-LABEL: bzhi64_constant_mask32_load: 4430; X64: # %bb.0: 4431; X64-NEXT: movq (%rdi), %rax 4432; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF 4433; X64-NEXT: retq 4434 %val1 = load i64, ptr %val 4435 %masked = and i64 %val1, 2147483647 4436 ret i64 %masked 4437} 4438 4439define i64 @bzhi64_constant_mask16(i64 %val) nounwind { 4440; X86-LABEL: bzhi64_constant_mask16: 4441; X86: # %bb.0: 4442; X86-NEXT: movl $32767, %eax # imm = 0x7FFF 4443; X86-NEXT: andl {{[0-9]+}}(%esp), %eax 4444; X86-NEXT: xorl %edx, %edx 4445; X86-NEXT: retl 4446; 4447; X64-LABEL: bzhi64_constant_mask16: 4448; X64: # %bb.0: 4449; X64-NEXT: movq %rdi, %rax 4450; X64-NEXT: andl $32767, %eax # imm = 0x7FFF 4451; X64-NEXT: retq 4452 %masked = and i64 %val, 32767 4453 ret i64 %masked 4454} 4455 4456define i64 @bzhi64_constant_mask16_load(ptr %val) nounwind { 4457; X86-LABEL: bzhi64_constant_mask16_load: 4458; X86: # %bb.0: 4459; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 4460; X86-NEXT: movl $32767, %eax # imm = 0x7FFF 4461; X86-NEXT: andl (%ecx), %eax 4462; X86-NEXT: xorl %edx, %edx 4463; X86-NEXT: retl 4464; 4465; X64-LABEL: bzhi64_constant_mask16_load: 4466; X64: # %bb.0: 4467; X64-NEXT: movq (%rdi), %rax 4468; X64-NEXT: andl $32767, %eax # imm = 0x7FFF 4469; X64-NEXT: retq 4470 %val1 = load i64, ptr %val 4471 %masked = and i64 %val1, 32767 4472 ret i64 %masked 4473} 4474 4475define i64 @bzhi64_constant_mask8(i64 %val) nounwind { 4476; X86-LABEL: bzhi64_constant_mask8: 4477; X86: # %bb.0: 4478; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 4479; X86-NEXT: andl $127, %eax 4480; X86-NEXT: xorl %edx, %edx 4481; X86-NEXT: retl 4482; 4483; X64-LABEL: bzhi64_constant_mask8: 4484; X64: # %bb.0: 4485; X64-NEXT: movq %rdi, %rax 4486; X64-NEXT: andl $127, %eax 4487; X64-NEXT: retq 4488 %masked = and i64 %val, 127 4489 ret i64 %masked 4490} 4491 4492define i64 @bzhi64_constant_mask8_load(ptr %val) nounwind { 4493; X86-LABEL: bzhi64_constant_mask8_load: 4494; X86: # %bb.0: 4495; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 4496; X86-NEXT: movl (%eax), %eax 4497; X86-NEXT: andl $127, %eax 4498; X86-NEXT: xorl %edx, %edx 4499; X86-NEXT: retl 4500; 4501; X64-LABEL: bzhi64_constant_mask8_load: 4502; X64: # %bb.0: 4503; X64-NEXT: movq (%rdi), %rax 4504; X64-NEXT: andl $127, %eax 4505; X64-NEXT: retq 4506 %val1 = load i64, ptr %val 4507 %masked = and i64 %val1, 127 4508 ret i64 %masked 4509} 4510 4511; Ensure constant hoisting doesn't prevent BEXTR/BZHI instructions in both paths. 4512define void @PR111323(ptr nocapture noundef writeonly %use, i64 noundef %x, i64 noundef %y) nounwind { 4513; X86-LABEL: PR111323: 4514; X86: # %bb.0: # %entry 4515; X86-NEXT: pushl %edi 4516; X86-NEXT: pushl %esi 4517; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 4518; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 4519; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 4520; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 4521; X86-NEXT: movzwl {{[0-9]+}}(%esp), %edi 4522; X86-NEXT: movl %edi, 4(%ecx) 4523; X86-NEXT: movl %esi, (%ecx) 4524; X86-NEXT: movl %edx, %esi 4525; X86-NEXT: orl %eax, %esi 4526; X86-NEXT: je .LBB68_2 4527; X86-NEXT: # %bb.1: # %if.end 4528; X86-NEXT: andl $65535, %eax # imm = 0xFFFF 4529; X86-NEXT: andl $-1, %edx 4530; X86-NEXT: movl %edx, 8(%ecx) 4531; X86-NEXT: movl %eax, 12(%ecx) 4532; X86-NEXT: .LBB68_2: # %return 4533; X86-NEXT: popl %esi 4534; X86-NEXT: popl %edi 4535; X86-NEXT: retl 4536; 4537; X64-NOBMI-LABEL: PR111323: 4538; X64-NOBMI: # %bb.0: # %entry 4539; X64-NOBMI-NEXT: movabsq $281474976710655, %rax # imm = 0xFFFFFFFFFFFF 4540; X64-NOBMI-NEXT: andq %rax, %rsi 4541; X64-NOBMI-NEXT: movq %rsi, (%rdi) 4542; X64-NOBMI-NEXT: testq %rdx, %rdx 4543; X64-NOBMI-NEXT: je .LBB68_2 4544; X64-NOBMI-NEXT: # %bb.1: # %if.end 4545; X64-NOBMI-NEXT: andq %rax, %rdx 4546; X64-NOBMI-NEXT: movq %rdx, 8(%rdi) 4547; X64-NOBMI-NEXT: .LBB68_2: # %return 4548; X64-NOBMI-NEXT: retq 4549; 4550; X64-BMI1NOTBM-LABEL: PR111323: 4551; X64-BMI1NOTBM: # %bb.0: # %entry 4552; X64-BMI1NOTBM-NEXT: movl $12288, %eax # imm = 0x3000 4553; X64-BMI1NOTBM-NEXT: bextrq %rax, %rsi, %rax 4554; X64-BMI1NOTBM-NEXT: movq %rax, (%rdi) 4555; X64-BMI1NOTBM-NEXT: testq %rdx, %rdx 4556; X64-BMI1NOTBM-NEXT: je .LBB68_2 4557; X64-BMI1NOTBM-NEXT: # %bb.1: # %if.end 4558; X64-BMI1NOTBM-NEXT: movl $12288, %eax # imm = 0x3000 4559; X64-BMI1NOTBM-NEXT: bextrq %rax, %rdx, %rax 4560; X64-BMI1NOTBM-NEXT: movq %rax, 8(%rdi) 4561; X64-BMI1NOTBM-NEXT: .LBB68_2: # %return 4562; X64-BMI1NOTBM-NEXT: retq 4563; 4564; X64-BMI1TBM-LABEL: PR111323: 4565; X64-BMI1TBM: # %bb.0: # %entry 4566; X64-BMI1TBM-NEXT: bextrq $12288, %rsi, %rax # imm = 0x3000 4567; X64-BMI1TBM-NEXT: movq %rax, (%rdi) 4568; X64-BMI1TBM-NEXT: testq %rdx, %rdx 4569; X64-BMI1TBM-NEXT: je .LBB68_2 4570; X64-BMI1TBM-NEXT: # %bb.1: # %if.end 4571; X64-BMI1TBM-NEXT: bextrq $12288, %rdx, %rax # imm = 0x3000 4572; X64-BMI1TBM-NEXT: movq %rax, 8(%rdi) 4573; X64-BMI1TBM-NEXT: .LBB68_2: # %return 4574; X64-BMI1TBM-NEXT: retq 4575; 4576; X64-BMI2TBM-LABEL: PR111323: 4577; X64-BMI2TBM: # %bb.0: # %entry 4578; X64-BMI2TBM-NEXT: bextrq $12288, %rsi, %rax # imm = 0x3000 4579; X64-BMI2TBM-NEXT: movq %rax, (%rdi) 4580; X64-BMI2TBM-NEXT: testq %rdx, %rdx 4581; X64-BMI2TBM-NEXT: je .LBB68_2 4582; X64-BMI2TBM-NEXT: # %bb.1: # %if.end 4583; X64-BMI2TBM-NEXT: bextrq $12288, %rdx, %rax # imm = 0x3000 4584; X64-BMI2TBM-NEXT: movq %rax, 8(%rdi) 4585; X64-BMI2TBM-NEXT: .LBB68_2: # %return 4586; X64-BMI2TBM-NEXT: retq 4587; 4588; X64-BMI2NOTBM-LABEL: PR111323: 4589; X64-BMI2NOTBM: # %bb.0: # %entry 4590; X64-BMI2NOTBM-NEXT: movb $48, %al 4591; X64-BMI2NOTBM-NEXT: bzhiq %rax, %rsi, %rcx 4592; X64-BMI2NOTBM-NEXT: movq %rcx, (%rdi) 4593; X64-BMI2NOTBM-NEXT: testq %rdx, %rdx 4594; X64-BMI2NOTBM-NEXT: je .LBB68_2 4595; X64-BMI2NOTBM-NEXT: # %bb.1: # %if.end 4596; X64-BMI2NOTBM-NEXT: bzhiq %rax, %rdx, %rax 4597; X64-BMI2NOTBM-NEXT: movq %rax, 8(%rdi) 4598; X64-BMI2NOTBM-NEXT: .LBB68_2: # %return 4599; X64-BMI2NOTBM-NEXT: retq 4600entry: 4601 %and = and i64 %x, 281474976710655 4602 store i64 %and, ptr %use, align 8 4603 %cmp = icmp eq i64 %y, 0 4604 br i1 %cmp, label %return, label %if.end 4605 4606if.end: 4607 %and1 = and i64 %y, 281474976710655 4608 %arrayidx2 = getelementptr inbounds i8, ptr %use, i64 8 4609 store i64 %and1, ptr %arrayidx2, align 8 4610 br label %return 4611 4612return: 4613 ret void 4614} 4615