1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s 3; RUN: llc -mtriple=amdgcn-- -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s 4 5; Loosely based on test/CodeGen/{X86,AArch64}/extract-lowbits.ll, 6; but with all 64-bit tests, and tests with loads dropped. 7 8; Patterns: 9; a) x & (1 << nbits) - 1 10; b) x & ~(-1 << nbits) 11; c) x & (-1 >> (32 - y)) 12; d) x << (32 - y) >> (32 - y) 13; are equivalent. 14 15; ---------------------------------------------------------------------------- ; 16; Pattern a. 32-bit 17; ---------------------------------------------------------------------------- ; 18 19define i32 @bzhi32_a0(i32 %val, i32 %numlowbits) nounwind { 20; GCN-LABEL: bzhi32_a0: 21; GCN: ; %bb.0: 22; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23; GCN-NEXT: v_bfe_u32 v0, v0, 0, v1 24; GCN-NEXT: s_setpc_b64 s[30:31] 25 %onebit = shl i32 1, %numlowbits 26 %mask = add nsw i32 %onebit, -1 27 %masked = and i32 %mask, %val 28 ret i32 %masked 29} 30 31define i32 @bzhi32_a1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind { 32; GCN-LABEL: bzhi32_a1_indexzext: 33; GCN: ; %bb.0: 34; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 35; GCN-NEXT: v_bfe_u32 v0, v0, 0, v1 36; GCN-NEXT: s_setpc_b64 s[30:31] 37 %conv = zext i8 %numlowbits to i32 38 %onebit = shl i32 1, %conv 39 %mask = add nsw i32 %onebit, -1 40 %masked = and i32 %mask, %val 41 ret i32 %masked 42} 43 44define i32 @bzhi32_a4_commutative(i32 %val, i32 %numlowbits) nounwind { 45; GCN-LABEL: bzhi32_a4_commutative: 46; GCN: ; %bb.0: 47; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 48; GCN-NEXT: v_bfe_u32 v0, v0, 0, v1 49; GCN-NEXT: s_setpc_b64 s[30:31] 50 %onebit = shl i32 1, %numlowbits 51 %mask = add nsw i32 %onebit, -1 52 %masked = and i32 %val, %mask ; swapped order 53 ret i32 %masked 54} 55 56; ---------------------------------------------------------------------------- ; 57; Pattern b. 32-bit 58; ---------------------------------------------------------------------------- ; 59 60define i32 @bzhi32_b0(i32 %val, i32 %numlowbits) nounwind { 61; GCN-LABEL: bzhi32_b0: 62; GCN: ; %bb.0: 63; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 64; GCN-NEXT: v_bfe_u32 v0, v0, 0, v1 65; GCN-NEXT: s_setpc_b64 s[30:31] 66 %notmask = shl i32 -1, %numlowbits 67 %mask = xor i32 %notmask, -1 68 %masked = and i32 %mask, %val 69 ret i32 %masked 70} 71 72define i32 @bzhi32_b1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind { 73; GCN-LABEL: bzhi32_b1_indexzext: 74; GCN: ; %bb.0: 75; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 76; GCN-NEXT: v_bfe_u32 v0, v0, 0, v1 77; GCN-NEXT: s_setpc_b64 s[30:31] 78 %conv = zext i8 %numlowbits to i32 79 %notmask = shl i32 -1, %conv 80 %mask = xor i32 %notmask, -1 81 %masked = and i32 %mask, %val 82 ret i32 %masked 83} 84 85define i32 @bzhi32_b4_commutative(i32 %val, i32 %numlowbits) nounwind { 86; GCN-LABEL: bzhi32_b4_commutative: 87; GCN: ; %bb.0: 88; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 89; GCN-NEXT: v_bfe_u32 v0, v0, 0, v1 90; GCN-NEXT: s_setpc_b64 s[30:31] 91 %notmask = shl i32 -1, %numlowbits 92 %mask = xor i32 %notmask, -1 93 %masked = and i32 %val, %mask ; swapped order 94 ret i32 %masked 95} 96 97; ---------------------------------------------------------------------------- ; 98; Pattern c. 32-bit 99; ---------------------------------------------------------------------------- ; 100 101define i32 @bzhi32_c0(i32 %val, i32 %numlowbits) nounwind { 102; SI-LABEL: bzhi32_c0: 103; SI: ; %bb.0: 104; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 105; SI-NEXT: v_sub_i32_e32 v1, vcc, 32, v1 106; SI-NEXT: v_lshr_b32_e32 v1, -1, v1 107; SI-NEXT: v_and_b32_e32 v0, v1, v0 108; SI-NEXT: s_setpc_b64 s[30:31] 109; 110; VI-LABEL: bzhi32_c0: 111; VI: ; %bb.0: 112; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 113; VI-NEXT: v_sub_u32_e32 v1, vcc, 32, v1 114; VI-NEXT: v_lshrrev_b32_e64 v1, v1, -1 115; VI-NEXT: v_and_b32_e32 v0, v1, v0 116; VI-NEXT: s_setpc_b64 s[30:31] 117 %numhighbits = sub i32 32, %numlowbits 118 %mask = lshr i32 -1, %numhighbits 119 %masked = and i32 %mask, %val 120 ret i32 %masked 121} 122 123define i32 @bzhi32_c0_clamp(i32 %val, i32 %numlowbits) nounwind { 124; GCN-LABEL: bzhi32_c0_clamp: 125; GCN: ; %bb.0: 126; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 127; GCN-NEXT: v_and_b32_e32 v1, 31, v1 128; GCN-NEXT: v_bfe_u32 v0, v0, 0, v1 129; GCN-NEXT: s_setpc_b64 s[30:31] 130 %low5bits = and i32 %numlowbits, 31 131 %numhighbits = sub i32 32, %low5bits 132 %mask = lshr i32 -1, %numhighbits 133 %masked = and i32 %mask, %val 134 ret i32 %masked 135} 136 137define i32 @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits) nounwind { 138; SI-LABEL: bzhi32_c1_indexzext: 139; SI: ; %bb.0: 140; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 141; SI-NEXT: v_sub_i32_e32 v1, vcc, 32, v1 142; SI-NEXT: v_lshr_b32_e32 v1, -1, v1 143; SI-NEXT: v_and_b32_e32 v0, v1, v0 144; SI-NEXT: s_setpc_b64 s[30:31] 145; 146; VI-LABEL: bzhi32_c1_indexzext: 147; VI: ; %bb.0: 148; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 149; VI-NEXT: v_sub_u16_e32 v1, 32, v1 150; VI-NEXT: v_lshrrev_b32_e64 v1, v1, -1 151; VI-NEXT: v_and_b32_e32 v0, v1, v0 152; VI-NEXT: s_setpc_b64 s[30:31] 153 %numhighbits = sub i8 32, %numlowbits 154 %sh_prom = zext i8 %numhighbits to i32 155 %mask = lshr i32 -1, %sh_prom 156 %masked = and i32 %mask, %val 157 ret i32 %masked 158} 159 160define i32 @bzhi32_c4_commutative(i32 %val, i32 %numlowbits) nounwind { 161; SI-LABEL: bzhi32_c4_commutative: 162; SI: ; %bb.0: 163; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 164; SI-NEXT: v_sub_i32_e32 v1, vcc, 32, v1 165; SI-NEXT: v_lshr_b32_e32 v1, -1, v1 166; SI-NEXT: v_and_b32_e32 v0, v0, v1 167; SI-NEXT: s_setpc_b64 s[30:31] 168; 169; VI-LABEL: bzhi32_c4_commutative: 170; VI: ; %bb.0: 171; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 172; VI-NEXT: v_sub_u32_e32 v1, vcc, 32, v1 173; VI-NEXT: v_lshrrev_b32_e64 v1, v1, -1 174; VI-NEXT: v_and_b32_e32 v0, v0, v1 175; VI-NEXT: s_setpc_b64 s[30:31] 176 %numhighbits = sub i32 32, %numlowbits 177 %mask = lshr i32 -1, %numhighbits 178 %masked = and i32 %val, %mask ; swapped order 179 ret i32 %masked 180} 181 182; ---------------------------------------------------------------------------- ; 183; Pattern d. 32-bit. 184; ---------------------------------------------------------------------------- ; 185 186define i32 @bzhi32_d0(i32 %val, i32 %numlowbits) nounwind { 187; SI-LABEL: bzhi32_d0: 188; SI: ; %bb.0: 189; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 190; SI-NEXT: v_sub_i32_e32 v1, vcc, 32, v1 191; SI-NEXT: v_lshlrev_b32_e32 v0, v1, v0 192; SI-NEXT: v_lshrrev_b32_e32 v0, v1, v0 193; SI-NEXT: s_setpc_b64 s[30:31] 194; 195; VI-LABEL: bzhi32_d0: 196; VI: ; %bb.0: 197; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 198; VI-NEXT: v_sub_u32_e32 v1, vcc, 32, v1 199; VI-NEXT: v_lshlrev_b32_e32 v0, v1, v0 200; VI-NEXT: v_lshrrev_b32_e32 v0, v1, v0 201; VI-NEXT: s_setpc_b64 s[30:31] 202 %numhighbits = sub i32 32, %numlowbits 203 %highbitscleared = shl i32 %val, %numhighbits 204 %masked = lshr i32 %highbitscleared, %numhighbits 205 ret i32 %masked 206} 207 208define i32 @bzhi32_d0_5bits(i32 %val, i32 %numlowbits) nounwind { 209; SI-LABEL: bzhi32_d0_5bits: 210; SI: ; %bb.0: 211; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 212; SI-NEXT: v_and_b32_e32 v1, 31, v1 213; SI-NEXT: v_sub_i32_e32 v1, vcc, 32, v1 214; SI-NEXT: v_lshlrev_b32_e32 v0, v1, v0 215; SI-NEXT: v_lshrrev_b32_e32 v0, v1, v0 216; SI-NEXT: s_setpc_b64 s[30:31] 217; 218; VI-LABEL: bzhi32_d0_5bits: 219; VI: ; %bb.0: 220; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 221; VI-NEXT: v_and_b32_e32 v1, 31, v1 222; VI-NEXT: v_sub_u32_e32 v1, vcc, 32, v1 223; VI-NEXT: v_lshlrev_b32_e32 v0, v1, v0 224; VI-NEXT: v_lshrrev_b32_e32 v0, v1, v0 225; VI-NEXT: s_setpc_b64 s[30:31] 226 %numlow5bits = and i32 %numlowbits, 31 227 %numhighbits = sub i32 32, %numlow5bits 228 %highbitscleared = shl i32 %val, %numhighbits 229 %masked = lshr i32 %highbitscleared, %numhighbits 230 ret i32 %masked 231} 232 233define i32 @bzhi32_d1_indexzext(i32 %val, i8 %numlowbits) nounwind { 234; SI-LABEL: bzhi32_d1_indexzext: 235; SI: ; %bb.0: 236; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 237; SI-NEXT: v_sub_i32_e32 v1, vcc, 32, v1 238; SI-NEXT: v_lshlrev_b32_e32 v0, v1, v0 239; SI-NEXT: v_lshrrev_b32_e32 v0, v1, v0 240; SI-NEXT: s_setpc_b64 s[30:31] 241; 242; VI-LABEL: bzhi32_d1_indexzext: 243; VI: ; %bb.0: 244; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 245; VI-NEXT: v_sub_u16_e32 v1, 32, v1 246; VI-NEXT: v_lshlrev_b32_e32 v0, v1, v0 247; VI-NEXT: v_lshrrev_b32_e32 v0, v1, v0 248; VI-NEXT: s_setpc_b64 s[30:31] 249 %numhighbits = sub i8 32, %numlowbits 250 %sh_prom = zext i8 %numhighbits to i32 251 %highbitscleared = shl i32 %val, %sh_prom 252 %masked = lshr i32 %highbitscleared, %sh_prom 253 ret i32 %masked 254} 255