1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 2; RUN: llc -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX678,GFX6 %s 3; RUN: llc -mtriple=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefixes=GCN,GFX678,GFX7 %s 4; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX678,GFX8 %s 5; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s 6; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX10 %s 7; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX11 %s 8 9declare void @llvm.set.rounding(i32) 10declare i32 @llvm.get.rounding() 11 12define amdgpu_gfx void @s_set_rounding(i32 inreg %rounding) { 13; GFX678-LABEL: s_set_rounding: 14; GFX678: ; %bb.0: 15; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16; GFX678-NEXT: s_add_i32 s34, s4, -4 17; GFX678-NEXT: s_min_u32 s34, s4, s34 18; GFX678-NEXT: s_lshl_b32 s36, s34, 2 19; GFX678-NEXT: s_mov_b32 s34, 0x1c84a50f 20; GFX678-NEXT: s_mov_b32 s35, 0xb73e62d9 21; GFX678-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 22; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 23; GFX678-NEXT: s_setpc_b64 s[30:31] 24; 25; GFX9-LABEL: s_set_rounding: 26; GFX9: ; %bb.0: 27; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 28; GFX9-NEXT: s_add_i32 s34, s4, -4 29; GFX9-NEXT: s_min_u32 s34, s4, s34 30; GFX9-NEXT: s_lshl_b32 s36, s34, 2 31; GFX9-NEXT: s_mov_b32 s34, 0x1c84a50f 32; GFX9-NEXT: s_mov_b32 s35, 0xb73e62d9 33; GFX9-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 34; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 35; GFX9-NEXT: s_setpc_b64 s[30:31] 36; 37; GFX10-LABEL: s_set_rounding: 38; GFX10: ; %bb.0: 39; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 40; GFX10-NEXT: s_add_i32 s34, s4, -4 41; GFX10-NEXT: s_min_u32 s36, s4, s34 42; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f 43; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9 44; GFX10-NEXT: s_lshl_b32 s36, s36, 2 45; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 46; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 47; GFX10-NEXT: s_setpc_b64 s[30:31] 48; 49; GFX11-LABEL: s_set_rounding: 50; GFX11: ; %bb.0: 51; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 52; GFX11-NEXT: s_add_i32 s0, s4, -4 53; GFX11-NEXT: s_min_u32 s2, s4, s0 54; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f 55; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9 56; GFX11-NEXT: s_lshl_b32 s2, s2, 2 57; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 58; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 59; GFX11-NEXT: s_setpc_b64 s[30:31] 60 call void @llvm.set.rounding(i32 %rounding) 61 ret void 62} 63 64define amdgpu_kernel void @s_set_rounding_kernel(i32 inreg %rounding) { 65; GFX6-LABEL: s_set_rounding_kernel: 66; GFX6: ; %bb.0: 67; GFX6-NEXT: s_load_dword s2, s[4:5], 0x9 68; GFX6-NEXT: s_mov_b32 s0, 0x1c84a50f 69; GFX6-NEXT: s_mov_b32 s1, 0xb73e62d9 70; GFX6-NEXT: ;;#ASMSTART 71; GFX6-NEXT: ;;#ASMEND 72; GFX6-NEXT: s_waitcnt lgkmcnt(0) 73; GFX6-NEXT: s_add_i32 s3, s2, -4 74; GFX6-NEXT: s_min_u32 s2, s2, s3 75; GFX6-NEXT: s_lshl_b32 s2, s2, 2 76; GFX6-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 77; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 78; GFX6-NEXT: s_endpgm 79; 80; GFX7-LABEL: s_set_rounding_kernel: 81; GFX7: ; %bb.0: 82; GFX7-NEXT: s_load_dword s2, s[4:5], 0x9 83; GFX7-NEXT: s_mov_b32 s0, 0x1c84a50f 84; GFX7-NEXT: s_mov_b32 s1, 0xb73e62d9 85; GFX7-NEXT: ;;#ASMSTART 86; GFX7-NEXT: ;;#ASMEND 87; GFX7-NEXT: s_waitcnt lgkmcnt(0) 88; GFX7-NEXT: s_add_i32 s3, s2, -4 89; GFX7-NEXT: s_min_u32 s2, s2, s3 90; GFX7-NEXT: s_lshl_b32 s2, s2, 2 91; GFX7-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 92; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 93; GFX7-NEXT: s_endpgm 94; 95; GFX8-LABEL: s_set_rounding_kernel: 96; GFX8: ; %bb.0: 97; GFX8-NEXT: s_load_dword s2, s[4:5], 0x24 98; GFX8-NEXT: s_mov_b32 s0, 0x1c84a50f 99; GFX8-NEXT: s_mov_b32 s1, 0xb73e62d9 100; GFX8-NEXT: ;;#ASMSTART 101; GFX8-NEXT: ;;#ASMEND 102; GFX8-NEXT: s_waitcnt lgkmcnt(0) 103; GFX8-NEXT: s_add_i32 s3, s2, -4 104; GFX8-NEXT: s_min_u32 s2, s2, s3 105; GFX8-NEXT: s_lshl_b32 s2, s2, 2 106; GFX8-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 107; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 108; GFX8-NEXT: s_endpgm 109; 110; GFX9-LABEL: s_set_rounding_kernel: 111; GFX9: ; %bb.0: 112; GFX9-NEXT: s_load_dword s2, s[4:5], 0x24 113; GFX9-NEXT: s_mov_b32 s0, 0x1c84a50f 114; GFX9-NEXT: s_mov_b32 s1, 0xb73e62d9 115; GFX9-NEXT: ;;#ASMSTART 116; GFX9-NEXT: ;;#ASMEND 117; GFX9-NEXT: s_waitcnt lgkmcnt(0) 118; GFX9-NEXT: s_add_i32 s3, s2, -4 119; GFX9-NEXT: s_min_u32 s2, s2, s3 120; GFX9-NEXT: s_lshl_b32 s2, s2, 2 121; GFX9-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 122; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 123; GFX9-NEXT: s_endpgm 124; 125; GFX10-LABEL: s_set_rounding_kernel: 126; GFX10: ; %bb.0: 127; GFX10-NEXT: s_load_dword s2, s[4:5], 0x24 128; GFX10-NEXT: s_mov_b32 s0, 0x1c84a50f 129; GFX10-NEXT: s_mov_b32 s1, 0xb73e62d9 130; GFX10-NEXT: ;;#ASMSTART 131; GFX10-NEXT: ;;#ASMEND 132; GFX10-NEXT: s_waitcnt lgkmcnt(0) 133; GFX10-NEXT: s_add_i32 s3, s2, -4 134; GFX10-NEXT: s_min_u32 s2, s2, s3 135; GFX10-NEXT: s_lshl_b32 s2, s2, 2 136; GFX10-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 137; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 138; GFX10-NEXT: s_endpgm 139; 140; GFX11-LABEL: s_set_rounding_kernel: 141; GFX11: ; %bb.0: 142; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x24 143; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f 144; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9 145; GFX11-NEXT: ;;#ASMSTART 146; GFX11-NEXT: ;;#ASMEND 147; GFX11-NEXT: s_waitcnt lgkmcnt(0) 148; GFX11-NEXT: s_add_i32 s3, s2, -4 149; GFX11-NEXT: s_min_u32 s2, s2, s3 150; GFX11-NEXT: s_lshl_b32 s2, s2, 2 151; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 152; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 153; GFX11-NEXT: s_endpgm 154 call void @llvm.set.rounding(i32 %rounding) 155 call void asm sideeffect "",""() 156 ret void 157} 158 159define void @v_set_rounding(i32 %rounding) { 160; GFX6-LABEL: v_set_rounding: 161; GFX6: ; %bb.0: 162; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 163; GFX6-NEXT: v_add_i32_e32 v1, vcc, -4, v0 164; GFX6-NEXT: v_min_u32_e32 v0, v0, v1 165; GFX6-NEXT: s_mov_b32 s4, 0x1c84a50f 166; GFX6-NEXT: v_lshlrev_b32_e32 v0, 2, v0 167; GFX6-NEXT: s_mov_b32 s5, 0xb73e62d9 168; GFX6-NEXT: v_lshr_b64 v[0:1], s[4:5], v0 169; GFX6-NEXT: v_readfirstlane_b32 s4, v0 170; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4 171; GFX6-NEXT: s_setpc_b64 s[30:31] 172; 173; GFX7-LABEL: v_set_rounding: 174; GFX7: ; %bb.0: 175; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 176; GFX7-NEXT: v_add_i32_e32 v1, vcc, -4, v0 177; GFX7-NEXT: v_min_u32_e32 v0, v0, v1 178; GFX7-NEXT: s_mov_b32 s4, 0x1c84a50f 179; GFX7-NEXT: v_lshlrev_b32_e32 v0, 2, v0 180; GFX7-NEXT: s_mov_b32 s5, 0xb73e62d9 181; GFX7-NEXT: v_lshr_b64 v[0:1], s[4:5], v0 182; GFX7-NEXT: v_readfirstlane_b32 s4, v0 183; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4 184; GFX7-NEXT: s_setpc_b64 s[30:31] 185; 186; GFX8-LABEL: v_set_rounding: 187; GFX8: ; %bb.0: 188; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 189; GFX8-NEXT: v_add_u32_e32 v1, vcc, -4, v0 190; GFX8-NEXT: v_min_u32_e32 v0, v0, v1 191; GFX8-NEXT: s_mov_b32 s4, 0x1c84a50f 192; GFX8-NEXT: v_lshlrev_b32_e32 v0, 2, v0 193; GFX8-NEXT: s_mov_b32 s5, 0xb73e62d9 194; GFX8-NEXT: v_lshrrev_b64 v[0:1], v0, s[4:5] 195; GFX8-NEXT: v_readfirstlane_b32 s4, v0 196; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4 197; GFX8-NEXT: s_setpc_b64 s[30:31] 198; 199; GFX9-LABEL: v_set_rounding: 200; GFX9: ; %bb.0: 201; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 202; GFX9-NEXT: v_add_u32_e32 v1, -4, v0 203; GFX9-NEXT: v_min_u32_e32 v0, v0, v1 204; GFX9-NEXT: s_mov_b32 s4, 0x1c84a50f 205; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 206; GFX9-NEXT: s_mov_b32 s5, 0xb73e62d9 207; GFX9-NEXT: v_lshrrev_b64 v[0:1], v0, s[4:5] 208; GFX9-NEXT: v_readfirstlane_b32 s4, v0 209; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4 210; GFX9-NEXT: s_setpc_b64 s[30:31] 211; 212; GFX10-LABEL: v_set_rounding: 213; GFX10: ; %bb.0: 214; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 215; GFX10-NEXT: v_add_nc_u32_e32 v1, -4, v0 216; GFX10-NEXT: s_mov_b32 s4, 0x1c84a50f 217; GFX10-NEXT: s_mov_b32 s5, 0xb73e62d9 218; GFX10-NEXT: v_min_u32_e32 v0, v0, v1 219; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 220; GFX10-NEXT: v_lshrrev_b64 v[0:1], v0, s[4:5] 221; GFX10-NEXT: v_readfirstlane_b32 s4, v0 222; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4 223; GFX10-NEXT: s_setpc_b64 s[30:31] 224; 225; GFX11-LABEL: v_set_rounding: 226; GFX11: ; %bb.0: 227; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 228; GFX11-NEXT: v_add_nc_u32_e32 v1, -4, v0 229; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f 230; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9 231; GFX11-NEXT: v_min_u32_e32 v0, v0, v1 232; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 233; GFX11-NEXT: v_lshrrev_b64 v[0:1], v0, s[0:1] 234; GFX11-NEXT: v_readfirstlane_b32 s0, v0 235; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 236; GFX11-NEXT: s_setpc_b64 s[30:31] 237 call void @llvm.set.rounding(i32 %rounding) 238 ret void 239} 240 241define void @set_rounding_get_rounding() { 242; GFX678-LABEL: set_rounding_get_rounding: 243; GFX678: ; %bb.0: 244; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 245; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 4) 246; GFX678-NEXT: s_lshl_b32 s6, s4, 2 247; GFX678-NEXT: s_mov_b32 s4, 0xeb24da71 248; GFX678-NEXT: s_mov_b32 s5, 0xc96f385 249; GFX678-NEXT: s_lshr_b64 s[4:5], s[4:5], s6 250; GFX678-NEXT: s_and_b32 s4, s4, 15 251; GFX678-NEXT: s_add_i32 s5, s4, 4 252; GFX678-NEXT: s_cmp_lt_u32 s4, 4 253; GFX678-NEXT: s_cselect_b32 s4, s4, s5 254; GFX678-NEXT: s_add_i32 s5, s4, -4 255; GFX678-NEXT: s_min_u32 s4, s4, s5 256; GFX678-NEXT: s_lshl_b32 s6, s4, 2 257; GFX678-NEXT: s_mov_b32 s4, 0x1c84a50f 258; GFX678-NEXT: s_mov_b32 s5, 0xb73e62d9 259; GFX678-NEXT: s_lshr_b64 s[4:5], s[4:5], s6 260; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4 261; GFX678-NEXT: s_setpc_b64 s[30:31] 262; 263; GFX9-LABEL: set_rounding_get_rounding: 264; GFX9: ; %bb.0: 265; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 266; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 4) 267; GFX9-NEXT: s_lshl_b32 s6, s4, 2 268; GFX9-NEXT: s_mov_b32 s4, 0xeb24da71 269; GFX9-NEXT: s_mov_b32 s5, 0xc96f385 270; GFX9-NEXT: s_lshr_b64 s[4:5], s[4:5], s6 271; GFX9-NEXT: s_and_b32 s4, s4, 15 272; GFX9-NEXT: s_add_i32 s5, s4, 4 273; GFX9-NEXT: s_cmp_lt_u32 s4, 4 274; GFX9-NEXT: s_cselect_b32 s4, s4, s5 275; GFX9-NEXT: s_add_i32 s5, s4, -4 276; GFX9-NEXT: s_min_u32 s4, s4, s5 277; GFX9-NEXT: s_lshl_b32 s6, s4, 2 278; GFX9-NEXT: s_mov_b32 s4, 0x1c84a50f 279; GFX9-NEXT: s_mov_b32 s5, 0xb73e62d9 280; GFX9-NEXT: s_lshr_b64 s[4:5], s[4:5], s6 281; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4 282; GFX9-NEXT: s_setpc_b64 s[30:31] 283; 284; GFX10-LABEL: set_rounding_get_rounding: 285; GFX10: ; %bb.0: 286; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 287; GFX10-NEXT: s_getreg_b32 s6, hwreg(HW_REG_MODE, 0, 4) 288; GFX10-NEXT: s_mov_b32 s4, 0xeb24da71 289; GFX10-NEXT: s_mov_b32 s5, 0xc96f385 290; GFX10-NEXT: s_lshl_b32 s6, s6, 2 291; GFX10-NEXT: s_lshr_b64 s[4:5], s[4:5], s6 292; GFX10-NEXT: s_and_b32 s4, s4, 15 293; GFX10-NEXT: s_add_i32 s5, s4, 4 294; GFX10-NEXT: s_cmp_lt_u32 s4, 4 295; GFX10-NEXT: s_cselect_b32 s4, s4, s5 296; GFX10-NEXT: s_add_i32 s5, s4, -4 297; GFX10-NEXT: s_min_u32 s6, s4, s5 298; GFX10-NEXT: s_mov_b32 s4, 0x1c84a50f 299; GFX10-NEXT: s_mov_b32 s5, 0xb73e62d9 300; GFX10-NEXT: s_lshl_b32 s6, s6, 2 301; GFX10-NEXT: s_lshr_b64 s[4:5], s[4:5], s6 302; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4 303; GFX10-NEXT: s_setpc_b64 s[30:31] 304; 305; GFX11-LABEL: set_rounding_get_rounding: 306; GFX11: ; %bb.0: 307; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 308; GFX11-NEXT: s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 4) 309; GFX11-NEXT: s_mov_b32 s0, 0xeb24da71 310; GFX11-NEXT: s_mov_b32 s1, 0xc96f385 311; GFX11-NEXT: s_lshl_b32 s2, s2, 2 312; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 313; GFX11-NEXT: s_and_b32 s0, s0, 15 314; GFX11-NEXT: s_add_i32 s1, s0, 4 315; GFX11-NEXT: s_cmp_lt_u32 s0, 4 316; GFX11-NEXT: s_cselect_b32 s0, s0, s1 317; GFX11-NEXT: s_add_i32 s1, s0, -4 318; GFX11-NEXT: s_min_u32 s2, s0, s1 319; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f 320; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9 321; GFX11-NEXT: s_lshl_b32 s2, s2, 2 322; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 323; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 324; GFX11-NEXT: s_setpc_b64 s[30:31] 325 %rounding = call i32 @llvm.get.rounding() 326 call void @llvm.set.rounding(i32 %rounding) 327 ret void 328} 329 330define void @s_set_rounding_0() { 331; GFX678-LABEL: s_set_rounding_0: 332; GFX678: ; %bb.0: 333; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 334; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 15 335; GFX678-NEXT: s_setpc_b64 s[30:31] 336; 337; GFX9-LABEL: s_set_rounding_0: 338; GFX9: ; %bb.0: 339; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 340; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 15 341; GFX9-NEXT: s_setpc_b64 s[30:31] 342; 343; GFX1011-LABEL: s_set_rounding_0: 344; GFX1011: ; %bb.0: 345; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 346; GFX1011-NEXT: s_round_mode 0xf 347; GFX1011-NEXT: s_setpc_b64 s[30:31] 348 call void @llvm.set.rounding(i32 0) 349 ret void 350} 351 352define void @s_set_rounding_1() { 353; GFX678-LABEL: s_set_rounding_1: 354; GFX678: ; %bb.0: 355; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 356; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0 357; GFX678-NEXT: s_setpc_b64 s[30:31] 358; 359; GFX9-LABEL: s_set_rounding_1: 360; GFX9: ; %bb.0: 361; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 362; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0 363; GFX9-NEXT: s_setpc_b64 s[30:31] 364; 365; GFX1011-LABEL: s_set_rounding_1: 366; GFX1011: ; %bb.0: 367; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 368; GFX1011-NEXT: s_round_mode 0x0 369; GFX1011-NEXT: s_setpc_b64 s[30:31] 370 call void @llvm.set.rounding(i32 1) 371 ret void 372} 373 374define void @s_set_rounding_2() { 375; GFX678-LABEL: s_set_rounding_2: 376; GFX678: ; %bb.0: 377; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 378; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 5 379; GFX678-NEXT: s_setpc_b64 s[30:31] 380; 381; GFX9-LABEL: s_set_rounding_2: 382; GFX9: ; %bb.0: 383; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 384; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 5 385; GFX9-NEXT: s_setpc_b64 s[30:31] 386; 387; GFX1011-LABEL: s_set_rounding_2: 388; GFX1011: ; %bb.0: 389; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 390; GFX1011-NEXT: s_round_mode 0x5 391; GFX1011-NEXT: s_setpc_b64 s[30:31] 392 call void @llvm.set.rounding(i32 2) 393 ret void 394} 395 396define void @s_set_rounding_3() { 397; GFX678-LABEL: s_set_rounding_3: 398; GFX678: ; %bb.0: 399; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 400; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 10 401; GFX678-NEXT: s_setpc_b64 s[30:31] 402; 403; GFX9-LABEL: s_set_rounding_3: 404; GFX9: ; %bb.0: 405; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 406; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 10 407; GFX9-NEXT: s_setpc_b64 s[30:31] 408; 409; GFX1011-LABEL: s_set_rounding_3: 410; GFX1011: ; %bb.0: 411; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 412; GFX1011-NEXT: s_round_mode 0xa 413; GFX1011-NEXT: s_setpc_b64 s[30:31] 414 call void @llvm.set.rounding(i32 3) 415 ret void 416} 417 418; Unsupported mode. 419define void @s_set_rounding_4() { 420; GFX678-LABEL: s_set_rounding_4: 421; GFX678: ; %bb.0: 422; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 423; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 15 424; GFX678-NEXT: s_setpc_b64 s[30:31] 425; 426; GFX9-LABEL: s_set_rounding_4: 427; GFX9: ; %bb.0: 428; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 429; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 15 430; GFX9-NEXT: s_setpc_b64 s[30:31] 431; 432; GFX1011-LABEL: s_set_rounding_4: 433; GFX1011: ; %bb.0: 434; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 435; GFX1011-NEXT: s_round_mode 0xf 436; GFX1011-NEXT: s_setpc_b64 s[30:31] 437 call void @llvm.set.rounding(i32 4) 438 ret void 439} 440 441; undefined 442define void @s_set_rounding_5() { 443; GFX678-LABEL: s_set_rounding_5: 444; GFX678: ; %bb.0: 445; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 446; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0 447; GFX678-NEXT: s_setpc_b64 s[30:31] 448; 449; GFX9-LABEL: s_set_rounding_5: 450; GFX9: ; %bb.0: 451; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 452; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0 453; GFX9-NEXT: s_setpc_b64 s[30:31] 454; 455; GFX1011-LABEL: s_set_rounding_5: 456; GFX1011: ; %bb.0: 457; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 458; GFX1011-NEXT: s_round_mode 0x0 459; GFX1011-NEXT: s_setpc_b64 s[30:31] 460 call void @llvm.set.rounding(i32 5) 461 ret void 462} 463 464; undefined 465define void @s_set_rounding_6() { 466; GFX678-LABEL: s_set_rounding_6: 467; GFX678: ; %bb.0: 468; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 469; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 5 470; GFX678-NEXT: s_setpc_b64 s[30:31] 471; 472; GFX9-LABEL: s_set_rounding_6: 473; GFX9: ; %bb.0: 474; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 475; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 5 476; GFX9-NEXT: s_setpc_b64 s[30:31] 477; 478; GFX1011-LABEL: s_set_rounding_6: 479; GFX1011: ; %bb.0: 480; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 481; GFX1011-NEXT: s_round_mode 0x5 482; GFX1011-NEXT: s_setpc_b64 s[30:31] 483 call void @llvm.set.rounding(i32 6) 484 ret void 485} 486 487; "Dynamic" 488define void @s_set_rounding_7() { 489; GFX678-LABEL: s_set_rounding_7: 490; GFX678: ; %bb.0: 491; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 492; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 10 493; GFX678-NEXT: s_setpc_b64 s[30:31] 494; 495; GFX9-LABEL: s_set_rounding_7: 496; GFX9: ; %bb.0: 497; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 498; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 10 499; GFX9-NEXT: s_setpc_b64 s[30:31] 500; 501; GFX1011-LABEL: s_set_rounding_7: 502; GFX1011: ; %bb.0: 503; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 504; GFX1011-NEXT: s_round_mode 0xa 505; GFX1011-NEXT: s_setpc_b64 s[30:31] 506 call void @llvm.set.rounding(i32 7) 507 ret void 508} 509 510; Invalid 511define void @s_set_rounding_neg1() { 512; GFX678-LABEL: s_set_rounding_neg1: 513; GFX678: ; %bb.0: 514; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 515; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 11 516; GFX678-NEXT: s_setpc_b64 s[30:31] 517; 518; GFX9-LABEL: s_set_rounding_neg1: 519; GFX9: ; %bb.0: 520; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 521; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 11 522; GFX9-NEXT: s_setpc_b64 s[30:31] 523; 524; GFX1011-LABEL: s_set_rounding_neg1: 525; GFX1011: ; %bb.0: 526; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 527; GFX1011-NEXT: s_round_mode 0xb 528; GFX1011-NEXT: s_setpc_b64 s[30:31] 529 call void @llvm.set.rounding(i32 -1) 530 ret void 531} 532 533; -------------------------------------------------------------------- 534; Test extended values 535; -------------------------------------------------------------------- 536 537; NearestTiesToEvenF32_TowardPositiveF64 = 8 538define void @s_set_rounding_8() { 539; GFX678-LABEL: s_set_rounding_8: 540; GFX678: ; %bb.0: 541; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 542; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 4 543; GFX678-NEXT: s_setpc_b64 s[30:31] 544; 545; GFX9-LABEL: s_set_rounding_8: 546; GFX9: ; %bb.0: 547; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 548; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 4 549; GFX9-NEXT: s_setpc_b64 s[30:31] 550; 551; GFX1011-LABEL: s_set_rounding_8: 552; GFX1011: ; %bb.0: 553; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 554; GFX1011-NEXT: s_round_mode 0x4 555; GFX1011-NEXT: s_setpc_b64 s[30:31] 556 call void @llvm.set.rounding(i32 8) 557 ret void 558} 559 560; NearestTiesToEvenF32_TowardNegativeF64 = 9 561define void @s_set_rounding_9() { 562; GFX678-LABEL: s_set_rounding_9: 563; GFX678: ; %bb.0: 564; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 565; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 8 566; GFX678-NEXT: s_setpc_b64 s[30:31] 567; 568; GFX9-LABEL: s_set_rounding_9: 569; GFX9: ; %bb.0: 570; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 571; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 8 572; GFX9-NEXT: s_setpc_b64 s[30:31] 573; 574; GFX1011-LABEL: s_set_rounding_9: 575; GFX1011: ; %bb.0: 576; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 577; GFX1011-NEXT: s_round_mode 0x8 578; GFX1011-NEXT: s_setpc_b64 s[30:31] 579 call void @llvm.set.rounding(i32 9) 580 ret void 581} 582 583; NearestTiesToEvenF32_TowardZeroF64 = 10 584define void @s_set_rounding_10() { 585; GFX678-LABEL: s_set_rounding_10: 586; GFX678: ; %bb.0: 587; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 588; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 12 589; GFX678-NEXT: s_setpc_b64 s[30:31] 590; 591; GFX9-LABEL: s_set_rounding_10: 592; GFX9: ; %bb.0: 593; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 594; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 12 595; GFX9-NEXT: s_setpc_b64 s[30:31] 596; 597; GFX1011-LABEL: s_set_rounding_10: 598; GFX1011: ; %bb.0: 599; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 600; GFX1011-NEXT: s_round_mode 0xc 601; GFX1011-NEXT: s_setpc_b64 s[30:31] 602 call void @llvm.set.rounding(i32 10) 603 ret void 604} 605 606; TowardPositiveF32_NearestTiesToEvenF64 = 11 607define void @s_set_rounding_11() { 608; GFX678-LABEL: s_set_rounding_11: 609; GFX678: ; %bb.0: 610; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 611; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 1 612; GFX678-NEXT: s_setpc_b64 s[30:31] 613; 614; GFX9-LABEL: s_set_rounding_11: 615; GFX9: ; %bb.0: 616; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 617; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 1 618; GFX9-NEXT: s_setpc_b64 s[30:31] 619; 620; GFX1011-LABEL: s_set_rounding_11: 621; GFX1011: ; %bb.0: 622; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 623; GFX1011-NEXT: s_round_mode 0x1 624; GFX1011-NEXT: s_setpc_b64 s[30:31] 625 call void @llvm.set.rounding(i32 11) 626 ret void 627} 628 629; TowardPositiveF32_TowardNegativeF64 = 12 630define void @s_set_rounding_12() { 631; GFX678-LABEL: s_set_rounding_12: 632; GFX678: ; %bb.0: 633; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 634; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 9 635; GFX678-NEXT: s_setpc_b64 s[30:31] 636; 637; GFX9-LABEL: s_set_rounding_12: 638; GFX9: ; %bb.0: 639; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 640; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 9 641; GFX9-NEXT: s_setpc_b64 s[30:31] 642; 643; GFX1011-LABEL: s_set_rounding_12: 644; GFX1011: ; %bb.0: 645; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 646; GFX1011-NEXT: s_round_mode 0x9 647; GFX1011-NEXT: s_setpc_b64 s[30:31] 648 call void @llvm.set.rounding(i32 12) 649 ret void 650} 651 652; TowardPositiveF32_TowardZeroF64 = 13 653define void @s_set_rounding_13() { 654; GFX678-LABEL: s_set_rounding_13: 655; GFX678: ; %bb.0: 656; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 657; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 13 658; GFX678-NEXT: s_setpc_b64 s[30:31] 659; 660; GFX9-LABEL: s_set_rounding_13: 661; GFX9: ; %bb.0: 662; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 663; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 13 664; GFX9-NEXT: s_setpc_b64 s[30:31] 665; 666; GFX1011-LABEL: s_set_rounding_13: 667; GFX1011: ; %bb.0: 668; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 669; GFX1011-NEXT: s_round_mode 0xd 670; GFX1011-NEXT: s_setpc_b64 s[30:31] 671 call void @llvm.set.rounding(i32 13) 672 ret void 673} 674 675; TowardNegativeF32_NearestTiesToEvenF64 = 14 676define void @s_set_rounding_14() { 677; GFX678-LABEL: s_set_rounding_14: 678; GFX678: ; %bb.0: 679; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 680; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 2 681; GFX678-NEXT: s_setpc_b64 s[30:31] 682; 683; GFX9-LABEL: s_set_rounding_14: 684; GFX9: ; %bb.0: 685; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 686; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 2 687; GFX9-NEXT: s_setpc_b64 s[30:31] 688; 689; GFX1011-LABEL: s_set_rounding_14: 690; GFX1011: ; %bb.0: 691; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 692; GFX1011-NEXT: s_round_mode 0x2 693; GFX1011-NEXT: s_setpc_b64 s[30:31] 694 call void @llvm.set.rounding(i32 14) 695 ret void 696} 697 698; TowardNegativeF32_TowardPositiveF64 = 15 699define void @s_set_rounding_15() { 700; GFX678-LABEL: s_set_rounding_15: 701; GFX678: ; %bb.0: 702; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 703; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 6 704; GFX678-NEXT: s_setpc_b64 s[30:31] 705; 706; GFX9-LABEL: s_set_rounding_15: 707; GFX9: ; %bb.0: 708; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 709; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 6 710; GFX9-NEXT: s_setpc_b64 s[30:31] 711; 712; GFX1011-LABEL: s_set_rounding_15: 713; GFX1011: ; %bb.0: 714; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 715; GFX1011-NEXT: s_round_mode 0x6 716; GFX1011-NEXT: s_setpc_b64 s[30:31] 717 call void @llvm.set.rounding(i32 15) 718 ret void 719} 720 721 722; TowardNegativeF32_TowardZeroF64 = 16 723define void @s_set_rounding_16() { 724; GFX678-LABEL: s_set_rounding_16: 725; GFX678: ; %bb.0: 726; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 727; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 14 728; GFX678-NEXT: s_setpc_b64 s[30:31] 729; 730; GFX9-LABEL: s_set_rounding_16: 731; GFX9: ; %bb.0: 732; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 733; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 14 734; GFX9-NEXT: s_setpc_b64 s[30:31] 735; 736; GFX1011-LABEL: s_set_rounding_16: 737; GFX1011: ; %bb.0: 738; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 739; GFX1011-NEXT: s_round_mode 0xe 740; GFX1011-NEXT: s_setpc_b64 s[30:31] 741 call void @llvm.set.rounding(i32 16) 742 ret void 743} 744 745; TowardZeroF32_NearestTiesToEvenF64 = 17 746define void @s_set_rounding_17() { 747; GFX678-LABEL: s_set_rounding_17: 748; GFX678: ; %bb.0: 749; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 750; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 3 751; GFX678-NEXT: s_setpc_b64 s[30:31] 752; 753; GFX9-LABEL: s_set_rounding_17: 754; GFX9: ; %bb.0: 755; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 756; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 3 757; GFX9-NEXT: s_setpc_b64 s[30:31] 758; 759; GFX1011-LABEL: s_set_rounding_17: 760; GFX1011: ; %bb.0: 761; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 762; GFX1011-NEXT: s_round_mode 0x3 763; GFX1011-NEXT: s_setpc_b64 s[30:31] 764 call void @llvm.set.rounding(i32 17) 765 ret void 766} 767 768; TowardZeroF32_TowardPositiveF64 = 18 769define void @s_set_rounding_18() { 770; GFX678-LABEL: s_set_rounding_18: 771; GFX678: ; %bb.0: 772; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 773; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 7 774; GFX678-NEXT: s_setpc_b64 s[30:31] 775; 776; GFX9-LABEL: s_set_rounding_18: 777; GFX9: ; %bb.0: 778; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 779; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 7 780; GFX9-NEXT: s_setpc_b64 s[30:31] 781; 782; GFX1011-LABEL: s_set_rounding_18: 783; GFX1011: ; %bb.0: 784; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 785; GFX1011-NEXT: s_round_mode 0x7 786; GFX1011-NEXT: s_setpc_b64 s[30:31] 787 call void @llvm.set.rounding(i32 18) 788 ret void 789} 790 791; TowardZeroF32_TowardNegativeF64 = 19, 792define void @s_set_rounding_19() { 793; GFX678-LABEL: s_set_rounding_19: 794; GFX678: ; %bb.0: 795; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 796; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 11 797; GFX678-NEXT: s_setpc_b64 s[30:31] 798; 799; GFX9-LABEL: s_set_rounding_19: 800; GFX9: ; %bb.0: 801; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 802; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 11 803; GFX9-NEXT: s_setpc_b64 s[30:31] 804; 805; GFX1011-LABEL: s_set_rounding_19: 806; GFX1011: ; %bb.0: 807; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 808; GFX1011-NEXT: s_round_mode 0xb 809; GFX1011-NEXT: s_setpc_b64 s[30:31] 810 call void @llvm.set.rounding(i32 19) 811 ret void 812} 813 814; Invalid, out of bounds 815define void @s_set_rounding_20() { 816; GFX678-LABEL: s_set_rounding_20: 817; GFX678: ; %bb.0: 818; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 819; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 11 820; GFX678-NEXT: s_setpc_b64 s[30:31] 821; 822; GFX9-LABEL: s_set_rounding_20: 823; GFX9: ; %bb.0: 824; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 825; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 11 826; GFX9-NEXT: s_setpc_b64 s[30:31] 827; 828; GFX1011-LABEL: s_set_rounding_20: 829; GFX1011: ; %bb.0: 830; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 831; GFX1011-NEXT: s_round_mode 0xb 832; GFX1011-NEXT: s_setpc_b64 s[30:31] 833 call void @llvm.set.rounding(i32 20) 834 ret void 835} 836 837define void @s_set_rounding_0xffff() { 838; GFX678-LABEL: s_set_rounding_0xffff: 839; GFX678: ; %bb.0: 840; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 841; GFX678-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 11 842; GFX678-NEXT: s_setpc_b64 s[30:31] 843; 844; GFX9-LABEL: s_set_rounding_0xffff: 845; GFX9: ; %bb.0: 846; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 847; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 11 848; GFX9-NEXT: s_setpc_b64 s[30:31] 849; 850; GFX1011-LABEL: s_set_rounding_0xffff: 851; GFX1011: ; %bb.0: 852; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 853; GFX1011-NEXT: s_round_mode 0xb 854; GFX1011-NEXT: s_setpc_b64 s[30:31] 855 call void @llvm.set.rounding(i32 65535) 856 ret void 857} 858 859; -------------------------------------------------------------------- 860; Test optimization knowing the value can only be in the standard 861; range 862; -------------------------------------------------------------------- 863 864define amdgpu_gfx void @s_set_rounding_i2_zeroext(i2 zeroext inreg %rounding) { 865; GFX6-LABEL: s_set_rounding_i2_zeroext: 866; GFX6: ; %bb.0: 867; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 868; GFX6-NEXT: s_lshl_b32 s34, s4, 2 869; GFX6-NEXT: s_lshr_b32 s34, 0xa50f, s34 870; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 871; GFX6-NEXT: s_setpc_b64 s[30:31] 872; 873; GFX7-LABEL: s_set_rounding_i2_zeroext: 874; GFX7: ; %bb.0: 875; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 876; GFX7-NEXT: s_lshl_b32 s34, s4, 2 877; GFX7-NEXT: s_lshr_b32 s34, 0xa50f, s34 878; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 879; GFX7-NEXT: s_setpc_b64 s[30:31] 880; 881; GFX8-LABEL: s_set_rounding_i2_zeroext: 882; GFX8: ; %bb.0: 883; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 884; GFX8-NEXT: s_and_b32 s34, 0xffff, s4 885; GFX8-NEXT: s_lshl_b32 s34, s34, 2 886; GFX8-NEXT: s_lshr_b32 s34, 0xa50f, s34 887; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 888; GFX8-NEXT: s_setpc_b64 s[30:31] 889; 890; GFX9-LABEL: s_set_rounding_i2_zeroext: 891; GFX9: ; %bb.0: 892; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 893; GFX9-NEXT: s_and_b32 s34, 0xffff, s4 894; GFX9-NEXT: s_lshl_b32 s34, s34, 2 895; GFX9-NEXT: s_lshr_b32 s34, 0xa50f, s34 896; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 897; GFX9-NEXT: s_setpc_b64 s[30:31] 898; 899; GFX10-LABEL: s_set_rounding_i2_zeroext: 900; GFX10: ; %bb.0: 901; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 902; GFX10-NEXT: s_and_b32 s34, 0xffff, s4 903; GFX10-NEXT: s_lshl_b32 s34, s34, 2 904; GFX10-NEXT: s_lshr_b32 s34, 0xa50f, s34 905; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 906; GFX10-NEXT: s_setpc_b64 s[30:31] 907; 908; GFX11-LABEL: s_set_rounding_i2_zeroext: 909; GFX11: ; %bb.0: 910; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 911; GFX11-NEXT: s_and_b32 s0, 0xffff, s4 912; GFX11-NEXT: s_lshl_b32 s0, s0, 2 913; GFX11-NEXT: s_lshr_b32 s0, 0xa50f, s0 914; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 915; GFX11-NEXT: s_setpc_b64 s[30:31] 916 %zext.rounding = zext i2 %rounding to i32 917 call void @llvm.set.rounding(i32 %zext.rounding) 918 ret void 919} 920 921define amdgpu_gfx void @s_set_rounding_i2_signext(i2 signext inreg %rounding) { 922; GFX6-LABEL: s_set_rounding_i2_signext: 923; GFX6: ; %bb.0: 924; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 925; GFX6-NEXT: s_add_i32 s34, s4, -4 926; GFX6-NEXT: s_min_u32 s34, s4, s34 927; GFX6-NEXT: s_lshl_b32 s36, s34, 2 928; GFX6-NEXT: s_mov_b32 s34, 0x1c84a50f 929; GFX6-NEXT: s_mov_b32 s35, 0xb73e62d9 930; GFX6-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 931; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 932; GFX6-NEXT: s_setpc_b64 s[30:31] 933; 934; GFX7-LABEL: s_set_rounding_i2_signext: 935; GFX7: ; %bb.0: 936; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 937; GFX7-NEXT: s_add_i32 s34, s4, -4 938; GFX7-NEXT: s_min_u32 s34, s4, s34 939; GFX7-NEXT: s_lshl_b32 s36, s34, 2 940; GFX7-NEXT: s_mov_b32 s34, 0x1c84a50f 941; GFX7-NEXT: s_mov_b32 s35, 0xb73e62d9 942; GFX7-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 943; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 944; GFX7-NEXT: s_setpc_b64 s[30:31] 945; 946; GFX8-LABEL: s_set_rounding_i2_signext: 947; GFX8: ; %bb.0: 948; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 949; GFX8-NEXT: s_sext_i32_i16 s34, s4 950; GFX8-NEXT: s_add_i32 s35, s34, -4 951; GFX8-NEXT: s_min_u32 s34, s34, s35 952; GFX8-NEXT: s_lshl_b32 s36, s34, 2 953; GFX8-NEXT: s_mov_b32 s34, 0x1c84a50f 954; GFX8-NEXT: s_mov_b32 s35, 0xb73e62d9 955; GFX8-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 956; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 957; GFX8-NEXT: s_setpc_b64 s[30:31] 958; 959; GFX9-LABEL: s_set_rounding_i2_signext: 960; GFX9: ; %bb.0: 961; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 962; GFX9-NEXT: s_sext_i32_i16 s34, s4 963; GFX9-NEXT: s_add_i32 s35, s34, -4 964; GFX9-NEXT: s_min_u32 s34, s34, s35 965; GFX9-NEXT: s_lshl_b32 s36, s34, 2 966; GFX9-NEXT: s_mov_b32 s34, 0x1c84a50f 967; GFX9-NEXT: s_mov_b32 s35, 0xb73e62d9 968; GFX9-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 969; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 970; GFX9-NEXT: s_setpc_b64 s[30:31] 971; 972; GFX10-LABEL: s_set_rounding_i2_signext: 973; GFX10: ; %bb.0: 974; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 975; GFX10-NEXT: s_sext_i32_i16 s34, s4 976; GFX10-NEXT: s_add_i32 s35, s34, -4 977; GFX10-NEXT: s_min_u32 s36, s34, s35 978; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f 979; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9 980; GFX10-NEXT: s_lshl_b32 s36, s36, 2 981; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 982; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 983; GFX10-NEXT: s_setpc_b64 s[30:31] 984; 985; GFX11-LABEL: s_set_rounding_i2_signext: 986; GFX11: ; %bb.0: 987; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 988; GFX11-NEXT: s_sext_i32_i16 s0, s4 989; GFX11-NEXT: s_add_i32 s1, s0, -4 990; GFX11-NEXT: s_min_u32 s2, s0, s1 991; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f 992; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9 993; GFX11-NEXT: s_lshl_b32 s2, s2, 2 994; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 995; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 996; GFX11-NEXT: s_setpc_b64 s[30:31] 997 %sext.rounding = sext i2 %rounding to i32 998 call void @llvm.set.rounding(i32 %sext.rounding) 999 ret void 1000} 1001 1002define amdgpu_gfx void @s_set_rounding_i3_signext(i3 signext inreg %rounding) { 1003; GFX6-LABEL: s_set_rounding_i3_signext: 1004; GFX6: ; %bb.0: 1005; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1006; GFX6-NEXT: s_add_i32 s34, s4, -4 1007; GFX6-NEXT: s_min_u32 s34, s4, s34 1008; GFX6-NEXT: s_lshl_b32 s36, s34, 2 1009; GFX6-NEXT: s_mov_b32 s34, 0x1c84a50f 1010; GFX6-NEXT: s_mov_b32 s35, 0xb73e62d9 1011; GFX6-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 1012; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 1013; GFX6-NEXT: s_setpc_b64 s[30:31] 1014; 1015; GFX7-LABEL: s_set_rounding_i3_signext: 1016; GFX7: ; %bb.0: 1017; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1018; GFX7-NEXT: s_add_i32 s34, s4, -4 1019; GFX7-NEXT: s_min_u32 s34, s4, s34 1020; GFX7-NEXT: s_lshl_b32 s36, s34, 2 1021; GFX7-NEXT: s_mov_b32 s34, 0x1c84a50f 1022; GFX7-NEXT: s_mov_b32 s35, 0xb73e62d9 1023; GFX7-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 1024; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 1025; GFX7-NEXT: s_setpc_b64 s[30:31] 1026; 1027; GFX8-LABEL: s_set_rounding_i3_signext: 1028; GFX8: ; %bb.0: 1029; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1030; GFX8-NEXT: s_sext_i32_i16 s34, s4 1031; GFX8-NEXT: s_add_i32 s35, s34, -4 1032; GFX8-NEXT: s_min_u32 s34, s34, s35 1033; GFX8-NEXT: s_lshl_b32 s36, s34, 2 1034; GFX8-NEXT: s_mov_b32 s34, 0x1c84a50f 1035; GFX8-NEXT: s_mov_b32 s35, 0xb73e62d9 1036; GFX8-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 1037; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 1038; GFX8-NEXT: s_setpc_b64 s[30:31] 1039; 1040; GFX9-LABEL: s_set_rounding_i3_signext: 1041; GFX9: ; %bb.0: 1042; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1043; GFX9-NEXT: s_sext_i32_i16 s34, s4 1044; GFX9-NEXT: s_add_i32 s35, s34, -4 1045; GFX9-NEXT: s_min_u32 s34, s34, s35 1046; GFX9-NEXT: s_lshl_b32 s36, s34, 2 1047; GFX9-NEXT: s_mov_b32 s34, 0x1c84a50f 1048; GFX9-NEXT: s_mov_b32 s35, 0xb73e62d9 1049; GFX9-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 1050; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 1051; GFX9-NEXT: s_setpc_b64 s[30:31] 1052; 1053; GFX10-LABEL: s_set_rounding_i3_signext: 1054; GFX10: ; %bb.0: 1055; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1056; GFX10-NEXT: s_sext_i32_i16 s34, s4 1057; GFX10-NEXT: s_add_i32 s35, s34, -4 1058; GFX10-NEXT: s_min_u32 s36, s34, s35 1059; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f 1060; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9 1061; GFX10-NEXT: s_lshl_b32 s36, s36, 2 1062; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 1063; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 1064; GFX10-NEXT: s_setpc_b64 s[30:31] 1065; 1066; GFX11-LABEL: s_set_rounding_i3_signext: 1067; GFX11: ; %bb.0: 1068; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1069; GFX11-NEXT: s_sext_i32_i16 s0, s4 1070; GFX11-NEXT: s_add_i32 s1, s0, -4 1071; GFX11-NEXT: s_min_u32 s2, s0, s1 1072; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f 1073; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9 1074; GFX11-NEXT: s_lshl_b32 s2, s2, 2 1075; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 1076; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 1077; GFX11-NEXT: s_setpc_b64 s[30:31] 1078 %sext.rounding = sext i3 %rounding to i32 1079 call void @llvm.set.rounding(i32 %sext.rounding) 1080 ret void 1081} 1082 1083define amdgpu_gfx void @s_set_rounding_i3_zeroext(i3 zeroext inreg %rounding) { 1084; GFX6-LABEL: s_set_rounding_i3_zeroext: 1085; GFX6: ; %bb.0: 1086; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1087; GFX6-NEXT: s_add_i32 s34, s4, -4 1088; GFX6-NEXT: s_min_u32 s34, s4, s34 1089; GFX6-NEXT: s_lshl_b32 s36, s34, 2 1090; GFX6-NEXT: s_mov_b32 s34, 0x1c84a50f 1091; GFX6-NEXT: s_mov_b32 s35, 0xb73e62d9 1092; GFX6-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 1093; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 1094; GFX6-NEXT: s_setpc_b64 s[30:31] 1095; 1096; GFX7-LABEL: s_set_rounding_i3_zeroext: 1097; GFX7: ; %bb.0: 1098; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1099; GFX7-NEXT: s_add_i32 s34, s4, -4 1100; GFX7-NEXT: s_min_u32 s34, s4, s34 1101; GFX7-NEXT: s_lshl_b32 s36, s34, 2 1102; GFX7-NEXT: s_mov_b32 s34, 0x1c84a50f 1103; GFX7-NEXT: s_mov_b32 s35, 0xb73e62d9 1104; GFX7-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 1105; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 1106; GFX7-NEXT: s_setpc_b64 s[30:31] 1107; 1108; GFX8-LABEL: s_set_rounding_i3_zeroext: 1109; GFX8: ; %bb.0: 1110; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1111; GFX8-NEXT: s_and_b32 s34, 0xffff, s4 1112; GFX8-NEXT: s_add_i32 s35, s34, -4 1113; GFX8-NEXT: s_min_u32 s34, s34, s35 1114; GFX8-NEXT: s_lshl_b32 s36, s34, 2 1115; GFX8-NEXT: s_mov_b32 s34, 0x1c84a50f 1116; GFX8-NEXT: s_mov_b32 s35, 0xb73e62d9 1117; GFX8-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 1118; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 1119; GFX8-NEXT: s_setpc_b64 s[30:31] 1120; 1121; GFX9-LABEL: s_set_rounding_i3_zeroext: 1122; GFX9: ; %bb.0: 1123; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1124; GFX9-NEXT: s_and_b32 s34, 0xffff, s4 1125; GFX9-NEXT: s_add_i32 s35, s34, -4 1126; GFX9-NEXT: s_min_u32 s34, s34, s35 1127; GFX9-NEXT: s_lshl_b32 s36, s34, 2 1128; GFX9-NEXT: s_mov_b32 s34, 0x1c84a50f 1129; GFX9-NEXT: s_mov_b32 s35, 0xb73e62d9 1130; GFX9-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 1131; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 1132; GFX9-NEXT: s_setpc_b64 s[30:31] 1133; 1134; GFX10-LABEL: s_set_rounding_i3_zeroext: 1135; GFX10: ; %bb.0: 1136; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1137; GFX10-NEXT: s_and_b32 s34, 0xffff, s4 1138; GFX10-NEXT: s_add_i32 s35, s34, -4 1139; GFX10-NEXT: s_min_u32 s36, s34, s35 1140; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f 1141; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9 1142; GFX10-NEXT: s_lshl_b32 s36, s36, 2 1143; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 1144; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 1145; GFX10-NEXT: s_setpc_b64 s[30:31] 1146; 1147; GFX11-LABEL: s_set_rounding_i3_zeroext: 1148; GFX11: ; %bb.0: 1149; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1150; GFX11-NEXT: s_and_b32 s0, 0xffff, s4 1151; GFX11-NEXT: s_add_i32 s1, s0, -4 1152; GFX11-NEXT: s_min_u32 s2, s0, s1 1153; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f 1154; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9 1155; GFX11-NEXT: s_lshl_b32 s2, s2, 2 1156; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 1157; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 1158; GFX11-NEXT: s_setpc_b64 s[30:31] 1159 %sext.rounding = zext i3 %rounding to i32 1160 call void @llvm.set.rounding(i32 %sext.rounding) 1161 ret void 1162} 1163 1164define amdgpu_gfx void @s_set_rounding_select_0_1(i32 inreg %cond) { 1165; GFX6-LABEL: s_set_rounding_select_0_1: 1166; GFX6: ; %bb.0: 1167; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1168; GFX6-NEXT: s_cmp_lg_u32 s4, 0 1169; GFX6-NEXT: s_cselect_b64 s[34:35], -1, 0 1170; GFX6-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35] 1171; GFX6-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1172; GFX6-NEXT: v_lshr_b32_e32 v0, 0xa50f, v0 1173; GFX6-NEXT: v_readfirstlane_b32 s34, v0 1174; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 1175; GFX6-NEXT: s_setpc_b64 s[30:31] 1176; 1177; GFX7-LABEL: s_set_rounding_select_0_1: 1178; GFX7: ; %bb.0: 1179; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1180; GFX7-NEXT: s_cmp_lg_u32 s4, 0 1181; GFX7-NEXT: s_cselect_b64 s[34:35], -1, 0 1182; GFX7-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35] 1183; GFX7-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1184; GFX7-NEXT: v_lshr_b32_e32 v0, 0xa50f, v0 1185; GFX7-NEXT: v_readfirstlane_b32 s34, v0 1186; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 1187; GFX7-NEXT: s_setpc_b64 s[30:31] 1188; 1189; GFX8-LABEL: s_set_rounding_select_0_1: 1190; GFX8: ; %bb.0: 1191; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1192; GFX8-NEXT: s_cmp_lg_u32 s4, 0 1193; GFX8-NEXT: s_cselect_b64 s[34:35], -1, 0 1194; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35] 1195; GFX8-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1196; GFX8-NEXT: s_mov_b32 s34, 0xa50f 1197; GFX8-NEXT: v_lshrrev_b32_e64 v0, v0, s34 1198; GFX8-NEXT: v_readfirstlane_b32 s34, v0 1199; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 1200; GFX8-NEXT: s_setpc_b64 s[30:31] 1201; 1202; GFX9-LABEL: s_set_rounding_select_0_1: 1203; GFX9: ; %bb.0: 1204; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1205; GFX9-NEXT: s_cmp_lg_u32 s4, 0 1206; GFX9-NEXT: s_cselect_b64 s[34:35], -1, 0 1207; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35] 1208; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1209; GFX9-NEXT: s_mov_b32 s34, 0xa50f 1210; GFX9-NEXT: v_lshrrev_b32_e64 v0, v0, s34 1211; GFX9-NEXT: v_readfirstlane_b32 s34, v0 1212; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 1213; GFX9-NEXT: s_setpc_b64 s[30:31] 1214; 1215; GFX10-LABEL: s_set_rounding_select_0_1: 1216; GFX10: ; %bb.0: 1217; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1218; GFX10-NEXT: s_cmp_lg_u32 s4, 0 1219; GFX10-NEXT: s_cselect_b32 s34, -1, 0 1220; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s34 1221; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1222; GFX10-NEXT: v_lshrrev_b32_e64 v0, v0, 0xa50f 1223; GFX10-NEXT: v_readfirstlane_b32 s34, v0 1224; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 1225; GFX10-NEXT: s_setpc_b64 s[30:31] 1226; 1227; GFX11-LABEL: s_set_rounding_select_0_1: 1228; GFX11: ; %bb.0: 1229; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1230; GFX11-NEXT: s_cmp_lg_u32 s4, 0 1231; GFX11-NEXT: s_cselect_b32 s0, -1, 0 1232; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 1233; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1234; GFX11-NEXT: v_lshrrev_b32_e64 v0, v0, 0xa50f 1235; GFX11-NEXT: v_readfirstlane_b32 s0, v0 1236; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 1237; GFX11-NEXT: s_setpc_b64 s[30:31] 1238 %cmp = icmp eq i32 %cond, 0 1239 %rounding = select i1 %cmp, i32 0, i32 1 1240 call void @llvm.set.rounding(i32 %rounding) 1241 ret void 1242} 1243 1244define amdgpu_gfx void @s_set_rounding_select_1_3(i32 inreg %cond) { 1245; GFX678-LABEL: s_set_rounding_select_1_3: 1246; GFX678: ; %bb.0: 1247; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1248; GFX678-NEXT: s_cmp_eq_u32 s4, 0 1249; GFX678-NEXT: s_cselect_b32 s34, 0xa50, 10 1250; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 1251; GFX678-NEXT: s_setpc_b64 s[30:31] 1252; 1253; GFX9-LABEL: s_set_rounding_select_1_3: 1254; GFX9: ; %bb.0: 1255; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1256; GFX9-NEXT: s_cmp_eq_u32 s4, 0 1257; GFX9-NEXT: s_cselect_b32 s34, 0xa50, 10 1258; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 1259; GFX9-NEXT: s_setpc_b64 s[30:31] 1260; 1261; GFX10-LABEL: s_set_rounding_select_1_3: 1262; GFX10: ; %bb.0: 1263; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1264; GFX10-NEXT: s_cmp_eq_u32 s4, 0 1265; GFX10-NEXT: s_cselect_b32 s34, 0xa50, 10 1266; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 1267; GFX10-NEXT: s_setpc_b64 s[30:31] 1268; 1269; GFX11-LABEL: s_set_rounding_select_1_3: 1270; GFX11: ; %bb.0: 1271; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1272; GFX11-NEXT: s_cmp_eq_u32 s4, 0 1273; GFX11-NEXT: s_cselect_b32 s0, 0xa50, 10 1274; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 1275; GFX11-NEXT: s_setpc_b64 s[30:31] 1276 %cmp = icmp eq i32 %cond, 0 1277 %rounding = select i1 %cmp, i32 1, i32 3 1278 call void @llvm.set.rounding(i32 %rounding) 1279 ret void 1280} 1281 1282define void @v_set_rounding_select_1_3(i32 %cond) { 1283; GFX678-LABEL: v_set_rounding_select_1_3: 1284; GFX678: ; %bb.0: 1285; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1286; GFX678-NEXT: v_mov_b32_e32 v1, 0xa50 1287; GFX678-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 1288; GFX678-NEXT: v_cndmask_b32_e32 v0, 10, v1, vcc 1289; GFX678-NEXT: v_readfirstlane_b32 s4, v0 1290; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4 1291; GFX678-NEXT: s_setpc_b64 s[30:31] 1292; 1293; GFX9-LABEL: v_set_rounding_select_1_3: 1294; GFX9: ; %bb.0: 1295; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1296; GFX9-NEXT: v_mov_b32_e32 v1, 0xa50 1297; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 1298; GFX9-NEXT: v_cndmask_b32_e32 v0, 10, v1, vcc 1299; GFX9-NEXT: v_readfirstlane_b32 s4, v0 1300; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4 1301; GFX9-NEXT: s_setpc_b64 s[30:31] 1302; 1303; GFX10-LABEL: v_set_rounding_select_1_3: 1304; GFX10: ; %bb.0: 1305; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1306; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 1307; GFX10-NEXT: v_cndmask_b32_e64 v0, 10, 0xa50, vcc_lo 1308; GFX10-NEXT: v_readfirstlane_b32 s4, v0 1309; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4 1310; GFX10-NEXT: s_setpc_b64 s[30:31] 1311; 1312; GFX11-LABEL: v_set_rounding_select_1_3: 1313; GFX11: ; %bb.0: 1314; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1315; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 1316; GFX11-NEXT: v_cndmask_b32_e64 v0, 10, 0xa50, vcc_lo 1317; GFX11-NEXT: v_readfirstlane_b32 s0, v0 1318; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 1319; GFX11-NEXT: s_setpc_b64 s[30:31] 1320 %cmp = icmp eq i32 %cond, 0 1321 %rounding = select i1 %cmp, i32 1, i32 3 1322 call void @llvm.set.rounding(i32 %rounding) 1323 ret void 1324} 1325 1326define amdgpu_gfx void @s_set_rounding_select_2_0(i32 inreg %cond) { 1327; GFX6-LABEL: s_set_rounding_select_2_0: 1328; GFX6: ; %bb.0: 1329; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1330; GFX6-NEXT: s_cmp_eq_u32 s4, 0 1331; GFX6-NEXT: s_cselect_b64 s[34:35], -1, 0 1332; GFX6-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35] 1333; GFX6-NEXT: v_lshlrev_b32_e32 v0, 3, v0 1334; GFX6-NEXT: v_lshr_b32_e32 v0, 0xa50f, v0 1335; GFX6-NEXT: v_readfirstlane_b32 s34, v0 1336; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 1337; GFX6-NEXT: s_setpc_b64 s[30:31] 1338; 1339; GFX7-LABEL: s_set_rounding_select_2_0: 1340; GFX7: ; %bb.0: 1341; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1342; GFX7-NEXT: s_cmp_eq_u32 s4, 0 1343; GFX7-NEXT: s_cselect_b64 s[34:35], -1, 0 1344; GFX7-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35] 1345; GFX7-NEXT: v_lshlrev_b32_e32 v0, 3, v0 1346; GFX7-NEXT: v_lshr_b32_e32 v0, 0xa50f, v0 1347; GFX7-NEXT: v_readfirstlane_b32 s34, v0 1348; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 1349; GFX7-NEXT: s_setpc_b64 s[30:31] 1350; 1351; GFX8-LABEL: s_set_rounding_select_2_0: 1352; GFX8: ; %bb.0: 1353; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1354; GFX8-NEXT: s_cmp_eq_u32 s4, 0 1355; GFX8-NEXT: s_cselect_b64 s[34:35], -1, 0 1356; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35] 1357; GFX8-NEXT: v_lshlrev_b32_e32 v0, 3, v0 1358; GFX8-NEXT: s_mov_b32 s34, 0xa50f 1359; GFX8-NEXT: v_lshrrev_b32_e64 v0, v0, s34 1360; GFX8-NEXT: v_readfirstlane_b32 s34, v0 1361; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 1362; GFX8-NEXT: s_setpc_b64 s[30:31] 1363; 1364; GFX9-LABEL: s_set_rounding_select_2_0: 1365; GFX9: ; %bb.0: 1366; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1367; GFX9-NEXT: s_cmp_eq_u32 s4, 0 1368; GFX9-NEXT: s_cselect_b64 s[34:35], -1, 0 1369; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35] 1370; GFX9-NEXT: v_lshlrev_b32_e32 v0, 3, v0 1371; GFX9-NEXT: s_mov_b32 s34, 0xa50f 1372; GFX9-NEXT: v_lshrrev_b32_e64 v0, v0, s34 1373; GFX9-NEXT: v_readfirstlane_b32 s34, v0 1374; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 1375; GFX9-NEXT: s_setpc_b64 s[30:31] 1376; 1377; GFX10-LABEL: s_set_rounding_select_2_0: 1378; GFX10: ; %bb.0: 1379; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1380; GFX10-NEXT: s_cmp_eq_u32 s4, 0 1381; GFX10-NEXT: s_cselect_b32 s34, -1, 0 1382; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s34 1383; GFX10-NEXT: v_lshlrev_b32_e32 v0, 3, v0 1384; GFX10-NEXT: v_lshrrev_b32_e64 v0, v0, 0xa50f 1385; GFX10-NEXT: v_readfirstlane_b32 s34, v0 1386; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 1387; GFX10-NEXT: s_setpc_b64 s[30:31] 1388; 1389; GFX11-LABEL: s_set_rounding_select_2_0: 1390; GFX11: ; %bb.0: 1391; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1392; GFX11-NEXT: s_cmp_eq_u32 s4, 0 1393; GFX11-NEXT: s_cselect_b32 s0, -1, 0 1394; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 1395; GFX11-NEXT: v_lshlrev_b32_e32 v0, 3, v0 1396; GFX11-NEXT: v_lshrrev_b32_e64 v0, v0, 0xa50f 1397; GFX11-NEXT: v_readfirstlane_b32 s0, v0 1398; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 1399; GFX11-NEXT: s_setpc_b64 s[30:31] 1400 %cmp = icmp eq i32 %cond, 0 1401 %rounding = select i1 %cmp, i32 2, i32 0 1402 call void @llvm.set.rounding(i32 %rounding) 1403 ret void 1404} 1405 1406define amdgpu_gfx void @s_set_rounding_select_2_1(i32 inreg %cond) { 1407; GFX678-LABEL: s_set_rounding_select_2_1: 1408; GFX678: ; %bb.0: 1409; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1410; GFX678-NEXT: s_cmp_eq_u32 s4, 0 1411; GFX678-NEXT: s_movk_i32 s34, 0xa5 1412; GFX678-NEXT: s_cselect_b32 s34, s34, 0xa50 1413; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 1414; GFX678-NEXT: s_setpc_b64 s[30:31] 1415; 1416; GFX9-LABEL: s_set_rounding_select_2_1: 1417; GFX9: ; %bb.0: 1418; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1419; GFX9-NEXT: s_cmp_eq_u32 s4, 0 1420; GFX9-NEXT: s_movk_i32 s34, 0xa5 1421; GFX9-NEXT: s_cselect_b32 s34, s34, 0xa50 1422; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 1423; GFX9-NEXT: s_setpc_b64 s[30:31] 1424; 1425; GFX10-LABEL: s_set_rounding_select_2_1: 1426; GFX10: ; %bb.0: 1427; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1428; GFX10-NEXT: s_cmp_eq_u32 s4, 0 1429; GFX10-NEXT: s_movk_i32 s34, 0xa5 1430; GFX10-NEXT: s_cselect_b32 s34, s34, 0xa50 1431; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 1432; GFX10-NEXT: s_setpc_b64 s[30:31] 1433; 1434; GFX11-LABEL: s_set_rounding_select_2_1: 1435; GFX11: ; %bb.0: 1436; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1437; GFX11-NEXT: s_cmp_eq_u32 s4, 0 1438; GFX11-NEXT: s_movk_i32 s0, 0xa5 1439; GFX11-NEXT: s_cselect_b32 s0, s0, 0xa50 1440; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 1441; GFX11-NEXT: s_setpc_b64 s[30:31] 1442 %cmp = icmp eq i32 %cond, 0 1443 %rounding = select i1 %cmp, i32 2, i32 1 1444 call void @llvm.set.rounding(i32 %rounding) 1445 ret void 1446} 1447 1448define amdgpu_gfx void @s_set_rounding_select_1_2(i32 inreg %cond) { 1449; GFX678-LABEL: s_set_rounding_select_1_2: 1450; GFX678: ; %bb.0: 1451; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1452; GFX678-NEXT: s_cmp_eq_u32 s4, 0 1453; GFX678-NEXT: s_movk_i32 s34, 0xa50 1454; GFX678-NEXT: s_cselect_b32 s34, s34, 0xa5 1455; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 1456; GFX678-NEXT: s_setpc_b64 s[30:31] 1457; 1458; GFX9-LABEL: s_set_rounding_select_1_2: 1459; GFX9: ; %bb.0: 1460; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1461; GFX9-NEXT: s_cmp_eq_u32 s4, 0 1462; GFX9-NEXT: s_movk_i32 s34, 0xa50 1463; GFX9-NEXT: s_cselect_b32 s34, s34, 0xa5 1464; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 1465; GFX9-NEXT: s_setpc_b64 s[30:31] 1466; 1467; GFX10-LABEL: s_set_rounding_select_1_2: 1468; GFX10: ; %bb.0: 1469; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1470; GFX10-NEXT: s_cmp_eq_u32 s4, 0 1471; GFX10-NEXT: s_movk_i32 s34, 0xa50 1472; GFX10-NEXT: s_cselect_b32 s34, s34, 0xa5 1473; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 1474; GFX10-NEXT: s_setpc_b64 s[30:31] 1475; 1476; GFX11-LABEL: s_set_rounding_select_1_2: 1477; GFX11: ; %bb.0: 1478; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1479; GFX11-NEXT: s_cmp_eq_u32 s4, 0 1480; GFX11-NEXT: s_movk_i32 s0, 0xa50 1481; GFX11-NEXT: s_cselect_b32 s0, s0, 0xa5 1482; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 1483; GFX11-NEXT: s_setpc_b64 s[30:31] 1484 %cmp = icmp eq i32 %cond, 0 1485 %rounding = select i1 %cmp, i32 1, i32 2 1486 call void @llvm.set.rounding(i32 %rounding) 1487 ret void 1488} 1489 1490define amdgpu_gfx void @s_set_rounding_select_3_0(i32 inreg %cond) { 1491; GFX678-LABEL: s_set_rounding_select_3_0: 1492; GFX678: ; %bb.0: 1493; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1494; GFX678-NEXT: s_cmp_eq_u32 s4, 0 1495; GFX678-NEXT: s_cselect_b32 s34, 10, 0xa50f 1496; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 1497; GFX678-NEXT: s_setpc_b64 s[30:31] 1498; 1499; GFX9-LABEL: s_set_rounding_select_3_0: 1500; GFX9: ; %bb.0: 1501; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1502; GFX9-NEXT: s_cmp_eq_u32 s4, 0 1503; GFX9-NEXT: s_cselect_b32 s34, 10, 0xa50f 1504; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 1505; GFX9-NEXT: s_setpc_b64 s[30:31] 1506; 1507; GFX10-LABEL: s_set_rounding_select_3_0: 1508; GFX10: ; %bb.0: 1509; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1510; GFX10-NEXT: s_cmp_eq_u32 s4, 0 1511; GFX10-NEXT: s_cselect_b32 s34, 10, 0xa50f 1512; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 1513; GFX10-NEXT: s_setpc_b64 s[30:31] 1514; 1515; GFX11-LABEL: s_set_rounding_select_3_0: 1516; GFX11: ; %bb.0: 1517; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1518; GFX11-NEXT: s_cmp_eq_u32 s4, 0 1519; GFX11-NEXT: s_cselect_b32 s0, 10, 0xa50f 1520; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 1521; GFX11-NEXT: s_setpc_b64 s[30:31] 1522 %cmp = icmp eq i32 %cond, 0 1523 %rounding = select i1 %cmp, i32 3, i32 0 1524 call void @llvm.set.rounding(i32 %rounding) 1525 ret void 1526} 1527 1528define amdgpu_gfx void @s_set_rounding_select_4_0(i32 inreg %cond) { 1529; GFX678-LABEL: s_set_rounding_select_4_0: 1530; GFX678: ; %bb.0: 1531; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1532; GFX678-NEXT: s_cmp_eq_u32 s4, 0 1533; GFX678-NEXT: s_cselect_b64 s[34:35], -1, 0 1534; GFX678-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35] 1535; GFX678-NEXT: v_readfirstlane_b32 s34, v0 1536; GFX678-NEXT: s_lshl_b32 s34, s34, 2 1537; GFX678-NEXT: s_add_i32 s35, s34, -4 1538; GFX678-NEXT: s_min_u32 s34, s34, s35 1539; GFX678-NEXT: s_lshl_b32 s36, s34, 2 1540; GFX678-NEXT: s_mov_b32 s34, 0x1c84a50f 1541; GFX678-NEXT: s_mov_b32 s35, 0xb73e62d9 1542; GFX678-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 1543; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 1544; GFX678-NEXT: s_setpc_b64 s[30:31] 1545; 1546; GFX9-LABEL: s_set_rounding_select_4_0: 1547; GFX9: ; %bb.0: 1548; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1549; GFX9-NEXT: s_cmp_eq_u32 s4, 0 1550; GFX9-NEXT: s_cselect_b64 s[34:35], -1, 0 1551; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35] 1552; GFX9-NEXT: v_readfirstlane_b32 s34, v0 1553; GFX9-NEXT: s_lshl_b32 s34, s34, 2 1554; GFX9-NEXT: s_add_i32 s35, s34, -4 1555; GFX9-NEXT: s_min_u32 s34, s34, s35 1556; GFX9-NEXT: s_lshl_b32 s36, s34, 2 1557; GFX9-NEXT: s_mov_b32 s34, 0x1c84a50f 1558; GFX9-NEXT: s_mov_b32 s35, 0xb73e62d9 1559; GFX9-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 1560; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 1561; GFX9-NEXT: s_setpc_b64 s[30:31] 1562; 1563; GFX10-LABEL: s_set_rounding_select_4_0: 1564; GFX10: ; %bb.0: 1565; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1566; GFX10-NEXT: s_cmp_eq_u32 s4, 0 1567; GFX10-NEXT: s_cselect_b32 s34, -1, 0 1568; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s34 1569; GFX10-NEXT: v_readfirstlane_b32 s34, v0 1570; GFX10-NEXT: s_lshl_b32 s34, s34, 2 1571; GFX10-NEXT: s_add_i32 s35, s34, -4 1572; GFX10-NEXT: s_min_u32 s36, s34, s35 1573; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f 1574; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9 1575; GFX10-NEXT: s_lshl_b32 s36, s36, 2 1576; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 1577; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 1578; GFX10-NEXT: s_setpc_b64 s[30:31] 1579; 1580; GFX11-LABEL: s_set_rounding_select_4_0: 1581; GFX11: ; %bb.0: 1582; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1583; GFX11-NEXT: s_cmp_eq_u32 s4, 0 1584; GFX11-NEXT: s_cselect_b32 s0, -1, 0 1585; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 1586; GFX11-NEXT: v_readfirstlane_b32 s0, v0 1587; GFX11-NEXT: s_lshl_b32 s0, s0, 2 1588; GFX11-NEXT: s_add_i32 s1, s0, -4 1589; GFX11-NEXT: s_min_u32 s2, s0, s1 1590; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f 1591; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9 1592; GFX11-NEXT: s_lshl_b32 s2, s2, 2 1593; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 1594; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 1595; GFX11-NEXT: s_setpc_b64 s[30:31] 1596 %cmp = icmp eq i32 %cond, 0 1597 %rounding = select i1 %cmp, i32 4, i32 0 1598 call void @llvm.set.rounding(i32 %rounding) 1599 ret void 1600} 1601 1602define amdgpu_gfx void @s_set_rounding_select_3_5(i32 inreg %cond) { 1603; GFX678-LABEL: s_set_rounding_select_3_5: 1604; GFX678: ; %bb.0: 1605; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1606; GFX678-NEXT: s_cmp_eq_u32 s4, 0 1607; GFX678-NEXT: s_cselect_b32 s34, 3, 5 1608; GFX678-NEXT: s_add_i32 s35, s34, -4 1609; GFX678-NEXT: s_min_u32 s34, s34, s35 1610; GFX678-NEXT: s_lshl_b32 s36, s34, 2 1611; GFX678-NEXT: s_mov_b32 s34, 0x1c84a50f 1612; GFX678-NEXT: s_mov_b32 s35, 0xb73e62d9 1613; GFX678-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 1614; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 1615; GFX678-NEXT: s_setpc_b64 s[30:31] 1616; 1617; GFX9-LABEL: s_set_rounding_select_3_5: 1618; GFX9: ; %bb.0: 1619; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1620; GFX9-NEXT: s_cmp_eq_u32 s4, 0 1621; GFX9-NEXT: s_cselect_b32 s34, 3, 5 1622; GFX9-NEXT: s_add_i32 s35, s34, -4 1623; GFX9-NEXT: s_min_u32 s34, s34, s35 1624; GFX9-NEXT: s_lshl_b32 s36, s34, 2 1625; GFX9-NEXT: s_mov_b32 s34, 0x1c84a50f 1626; GFX9-NEXT: s_mov_b32 s35, 0xb73e62d9 1627; GFX9-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 1628; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 1629; GFX9-NEXT: s_setpc_b64 s[30:31] 1630; 1631; GFX10-LABEL: s_set_rounding_select_3_5: 1632; GFX10: ; %bb.0: 1633; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1634; GFX10-NEXT: s_cmp_eq_u32 s4, 0 1635; GFX10-NEXT: s_cselect_b32 s34, 3, 5 1636; GFX10-NEXT: s_add_i32 s35, s34, -4 1637; GFX10-NEXT: s_min_u32 s36, s34, s35 1638; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f 1639; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9 1640; GFX10-NEXT: s_lshl_b32 s36, s36, 2 1641; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 1642; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 1643; GFX10-NEXT: s_setpc_b64 s[30:31] 1644; 1645; GFX11-LABEL: s_set_rounding_select_3_5: 1646; GFX11: ; %bb.0: 1647; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1648; GFX11-NEXT: s_cmp_eq_u32 s4, 0 1649; GFX11-NEXT: s_cselect_b32 s0, 3, 5 1650; GFX11-NEXT: s_add_i32 s1, s0, -4 1651; GFX11-NEXT: s_min_u32 s2, s0, s1 1652; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f 1653; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9 1654; GFX11-NEXT: s_lshl_b32 s2, s2, 2 1655; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 1656; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 1657; GFX11-NEXT: s_setpc_b64 s[30:31] 1658 %cmp = icmp eq i32 %cond, 0 1659 %rounding = select i1 %cmp, i32 3, i32 5 1660 call void @llvm.set.rounding(i32 %rounding) 1661 ret void 1662} 1663 1664define amdgpu_kernel void @get_rounding_after_set_rounding_1() { 1665; GFX6-LABEL: get_rounding_after_set_rounding_1: 1666; GFX6: ; %bb.0: 1667; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0 1668; GFX6-NEXT: s_mov_b32 s3, 0xf000 1669; GFX6-NEXT: s_nop 0 1670; GFX6-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 4) 1671; GFX6-NEXT: s_lshl_b32 s2, s0, 2 1672; GFX6-NEXT: s_mov_b32 s0, 0xeb24da71 1673; GFX6-NEXT: s_mov_b32 s1, 0xc96f385 1674; GFX6-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 1675; GFX6-NEXT: s_and_b32 s0, s0, 15 1676; GFX6-NEXT: s_add_i32 s1, s0, 4 1677; GFX6-NEXT: s_cmp_lt_u32 s0, 4 1678; GFX6-NEXT: s_cselect_b32 s4, s0, s1 1679; GFX6-NEXT: s_mov_b32 s0, 0 1680; GFX6-NEXT: s_mov_b32 s2, -1 1681; GFX6-NEXT: s_mov_b32 s1, s0 1682; GFX6-NEXT: v_mov_b32_e32 v0, s4 1683; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 1684; GFX6-NEXT: s_waitcnt vmcnt(0) 1685; GFX6-NEXT: s_endpgm 1686; 1687; GFX7-LABEL: get_rounding_after_set_rounding_1: 1688; GFX7: ; %bb.0: 1689; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0 1690; GFX7-NEXT: s_mov_b32 s3, 0xf000 1691; GFX7-NEXT: s_nop 0 1692; GFX7-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 4) 1693; GFX7-NEXT: s_lshl_b32 s2, s0, 2 1694; GFX7-NEXT: s_mov_b32 s0, 0xeb24da71 1695; GFX7-NEXT: s_mov_b32 s1, 0xc96f385 1696; GFX7-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 1697; GFX7-NEXT: s_and_b32 s0, s0, 15 1698; GFX7-NEXT: s_add_i32 s1, s0, 4 1699; GFX7-NEXT: s_cmp_lt_u32 s0, 4 1700; GFX7-NEXT: s_cselect_b32 s4, s0, s1 1701; GFX7-NEXT: s_mov_b32 s0, 0 1702; GFX7-NEXT: s_mov_b32 s2, -1 1703; GFX7-NEXT: s_mov_b32 s1, s0 1704; GFX7-NEXT: v_mov_b32_e32 v0, s4 1705; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 1706; GFX7-NEXT: s_waitcnt vmcnt(0) 1707; GFX7-NEXT: s_endpgm 1708; 1709; GFX8-LABEL: get_rounding_after_set_rounding_1: 1710; GFX8: ; %bb.0: 1711; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0 1712; GFX8-NEXT: v_mov_b32_e32 v0, 0 1713; GFX8-NEXT: v_mov_b32_e32 v1, 0 1714; GFX8-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 4) 1715; GFX8-NEXT: s_lshl_b32 s2, s0, 2 1716; GFX8-NEXT: s_mov_b32 s0, 0xeb24da71 1717; GFX8-NEXT: s_mov_b32 s1, 0xc96f385 1718; GFX8-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 1719; GFX8-NEXT: s_and_b32 s0, s0, 15 1720; GFX8-NEXT: s_add_i32 s1, s0, 4 1721; GFX8-NEXT: s_cmp_lt_u32 s0, 4 1722; GFX8-NEXT: s_cselect_b32 s0, s0, s1 1723; GFX8-NEXT: v_mov_b32_e32 v2, s0 1724; GFX8-NEXT: flat_store_dword v[0:1], v2 1725; GFX8-NEXT: s_waitcnt vmcnt(0) 1726; GFX8-NEXT: s_endpgm 1727; 1728; GFX9-LABEL: get_rounding_after_set_rounding_1: 1729; GFX9: ; %bb.0: 1730; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0 1731; GFX9-NEXT: v_mov_b32_e32 v0, 0 1732; GFX9-NEXT: v_mov_b32_e32 v1, 0 1733; GFX9-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 4) 1734; GFX9-NEXT: s_lshl_b32 s2, s0, 2 1735; GFX9-NEXT: s_mov_b32 s0, 0xeb24da71 1736; GFX9-NEXT: s_mov_b32 s1, 0xc96f385 1737; GFX9-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 1738; GFX9-NEXT: s_and_b32 s0, s0, 15 1739; GFX9-NEXT: s_add_i32 s1, s0, 4 1740; GFX9-NEXT: s_cmp_lt_u32 s0, 4 1741; GFX9-NEXT: s_cselect_b32 s0, s0, s1 1742; GFX9-NEXT: v_mov_b32_e32 v2, s0 1743; GFX9-NEXT: global_store_dword v[0:1], v2, off 1744; GFX9-NEXT: s_waitcnt vmcnt(0) 1745; GFX9-NEXT: s_endpgm 1746; 1747; GFX10-LABEL: get_rounding_after_set_rounding_1: 1748; GFX10: ; %bb.0: 1749; GFX10-NEXT: s_round_mode 0x0 1750; GFX10-NEXT: s_mov_b32 s0, 0xeb24da71 1751; GFX10-NEXT: s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 4) 1752; GFX10-NEXT: s_mov_b32 s1, 0xc96f385 1753; GFX10-NEXT: s_lshl_b32 s2, s2, 2 1754; GFX10-NEXT: v_mov_b32_e32 v0, 0 1755; GFX10-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 1756; GFX10-NEXT: v_mov_b32_e32 v1, 0 1757; GFX10-NEXT: s_and_b32 s0, s0, 15 1758; GFX10-NEXT: s_add_i32 s1, s0, 4 1759; GFX10-NEXT: s_cmp_lt_u32 s0, 4 1760; GFX10-NEXT: s_cselect_b32 s0, s0, s1 1761; GFX10-NEXT: v_mov_b32_e32 v2, s0 1762; GFX10-NEXT: global_store_dword v[0:1], v2, off 1763; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1764; GFX10-NEXT: s_endpgm 1765; 1766; GFX11-LABEL: get_rounding_after_set_rounding_1: 1767; GFX11: ; %bb.0: 1768; GFX11-NEXT: s_round_mode 0x0 1769; GFX11-NEXT: s_mov_b32 s0, 0xeb24da71 1770; GFX11-NEXT: s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 4) 1771; GFX11-NEXT: s_mov_b32 s1, 0xc96f385 1772; GFX11-NEXT: s_lshl_b32 s2, s2, 2 1773; GFX11-NEXT: v_mov_b32_e32 v0, 0 1774; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 1775; GFX11-NEXT: s_and_b32 s0, s0, 15 1776; GFX11-NEXT: s_add_i32 s1, s0, 4 1777; GFX11-NEXT: s_cmp_lt_u32 s0, 4 1778; GFX11-NEXT: s_cselect_b32 s0, s0, s1 1779; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s0 1780; GFX11-NEXT: global_store_b32 v[0:1], v2, off dlc 1781; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1782; GFX11-NEXT: s_endpgm 1783 tail call void @llvm.set.rounding(i32 1) 1784 %set.mode = tail call i32 @llvm.get.rounding() 1785 store volatile i32 %set.mode, ptr addrspace(1) null 1786 ret void 1787} 1788 1789;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 1790; GCN: {{.*}} 1791