1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 2; RUN: llc -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX678,GFX6 %s 3; RUN: llc -mtriple=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefixes=GCN,GFX678,GFX7 %s 4; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX678,GFX8 %s 5; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s 6; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX10 %s 7; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX11 %s 8 9declare i32 @llvm.get.fpmode.i32() 10 11define i32 @func_fpmode_i32() { 12; GFX678-LABEL: func_fpmode_i32: 13; GFX678: ; %bb.0: 14; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19) 16; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4 17; GFX678-NEXT: v_mov_b32_e32 v0, s4 18; GFX678-NEXT: s_setpc_b64 s[30:31] 19; 20; GFX9-LABEL: func_fpmode_i32: 21; GFX9: ; %bb.0: 22; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) 24; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4 25; GFX9-NEXT: v_mov_b32_e32 v0, s4 26; GFX9-NEXT: s_setpc_b64 s[30:31] 27; 28; GFX10-LABEL: func_fpmode_i32: 29; GFX10: ; %bb.0: 30; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 31; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) 32; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4 33; GFX10-NEXT: v_mov_b32_e32 v0, s4 34; GFX10-NEXT: s_setpc_b64 s[30:31] 35; 36; GFX11-LABEL: func_fpmode_i32: 37; GFX11: ; %bb.0: 38; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 39; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24) 40; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0 41; GFX11-NEXT: v_mov_b32_e32 v0, s0 42; GFX11-NEXT: s_setpc_b64 s[30:31] 43 %fpmode = call i32 @llvm.get.fpmode.i32() 44 ret i32 %fpmode 45} 46 47define i32 @strictfp_func_fpmode_i32() strictfp { 48; GFX678-LABEL: strictfp_func_fpmode_i32: 49; GFX678: ; %bb.0: 50; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 51; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19) 52; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4 53; GFX678-NEXT: v_mov_b32_e32 v0, s4 54; GFX678-NEXT: s_setpc_b64 s[30:31] 55; 56; GFX9-LABEL: strictfp_func_fpmode_i32: 57; GFX9: ; %bb.0: 58; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 59; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) 60; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4 61; GFX9-NEXT: v_mov_b32_e32 v0, s4 62; GFX9-NEXT: s_setpc_b64 s[30:31] 63; 64; GFX10-LABEL: strictfp_func_fpmode_i32: 65; GFX10: ; %bb.0: 66; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 67; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) 68; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4 69; GFX10-NEXT: v_mov_b32_e32 v0, s4 70; GFX10-NEXT: s_setpc_b64 s[30:31] 71; 72; GFX11-LABEL: strictfp_func_fpmode_i32: 73; GFX11: ; %bb.0: 74; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 75; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24) 76; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0 77; GFX11-NEXT: v_mov_b32_e32 v0, s0 78; GFX11-NEXT: s_setpc_b64 s[30:31] 79 %fpmode = call i32 @llvm.get.fpmode.i32() strictfp 80 ret i32 %fpmode 81} 82 83define amdgpu_kernel void @kernel_fpmode_i32(ptr addrspace(1) %ptr) { 84; GFX6-LABEL: kernel_fpmode_i32: 85; GFX6: ; %bb.0: 86; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 87; GFX6-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19) 88; GFX6-NEXT: s_and_b32 s4, 0x7f3ff, s4 89; GFX6-NEXT: s_mov_b32 s3, 0xf000 90; GFX6-NEXT: s_mov_b32 s2, -1 91; GFX6-NEXT: v_mov_b32_e32 v0, s4 92; GFX6-NEXT: s_waitcnt lgkmcnt(0) 93; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 94; GFX6-NEXT: s_endpgm 95; 96; GFX7-LABEL: kernel_fpmode_i32: 97; GFX7: ; %bb.0: 98; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 99; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19) 100; GFX7-NEXT: s_and_b32 s4, 0x7f3ff, s4 101; GFX7-NEXT: s_mov_b32 s3, 0xf000 102; GFX7-NEXT: s_mov_b32 s2, -1 103; GFX7-NEXT: v_mov_b32_e32 v0, s4 104; GFX7-NEXT: s_waitcnt lgkmcnt(0) 105; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 106; GFX7-NEXT: s_endpgm 107; 108; GFX8-LABEL: kernel_fpmode_i32: 109; GFX8: ; %bb.0: 110; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 111; GFX8-NEXT: s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 19) 112; GFX8-NEXT: s_and_b32 s2, 0x7f3ff, s2 113; GFX8-NEXT: v_mov_b32_e32 v2, s2 114; GFX8-NEXT: s_waitcnt lgkmcnt(0) 115; GFX8-NEXT: v_mov_b32_e32 v0, s0 116; GFX8-NEXT: v_mov_b32_e32 v1, s1 117; GFX8-NEXT: flat_store_dword v[0:1], v2 118; GFX8-NEXT: s_endpgm 119; 120; GFX9-LABEL: kernel_fpmode_i32: 121; GFX9: ; %bb.0: 122; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 123; GFX9-NEXT: s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 24) 124; GFX9-NEXT: s_and_b32 s2, 0x87f3ff, s2 125; GFX9-NEXT: v_mov_b32_e32 v0, 0 126; GFX9-NEXT: v_mov_b32_e32 v1, s2 127; GFX9-NEXT: s_waitcnt lgkmcnt(0) 128; GFX9-NEXT: global_store_dword v0, v1, s[0:1] 129; GFX9-NEXT: s_endpgm 130; 131; GFX10-LABEL: kernel_fpmode_i32: 132; GFX10: ; %bb.0: 133; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 134; GFX10-NEXT: s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 24) 135; GFX10-NEXT: v_mov_b32_e32 v0, 0 136; GFX10-NEXT: s_and_b32 s2, 0x87f3ff, s2 137; GFX10-NEXT: v_mov_b32_e32 v1, s2 138; GFX10-NEXT: s_waitcnt lgkmcnt(0) 139; GFX10-NEXT: global_store_dword v0, v1, s[0:1] 140; GFX10-NEXT: s_endpgm 141; 142; GFX11-LABEL: kernel_fpmode_i32: 143; GFX11: ; %bb.0: 144; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 145; GFX11-NEXT: s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 24) 146; GFX11-NEXT: s_and_b32 s2, 0x87f3ff, s2 147; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 148; GFX11-NEXT: s_waitcnt lgkmcnt(0) 149; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 150; GFX11-NEXT: s_endpgm 151 %fpmode = call i32 @llvm.get.fpmode.i32() 152 store i32 %fpmode, ptr addrspace(1) %ptr 153 ret void 154} 155 156; TODO: We should be able to reduce the demanded bits and ask for less 157; from s_getreg_b32 158define i32 @func_fpmode_i32_denormonly() { 159; GFX678-LABEL: func_fpmode_i32_denormonly: 160; GFX678: ; %bb.0: 161; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 162; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19) 163; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4 164; GFX678-NEXT: s_and_b32 s4, s4, 0xf0 165; GFX678-NEXT: v_mov_b32_e32 v0, s4 166; GFX678-NEXT: s_setpc_b64 s[30:31] 167; 168; GFX9-LABEL: func_fpmode_i32_denormonly: 169; GFX9: ; %bb.0: 170; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 171; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) 172; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4 173; GFX9-NEXT: s_and_b32 s4, s4, 0xf0 174; GFX9-NEXT: v_mov_b32_e32 v0, s4 175; GFX9-NEXT: s_setpc_b64 s[30:31] 176; 177; GFX10-LABEL: func_fpmode_i32_denormonly: 178; GFX10: ; %bb.0: 179; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 180; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) 181; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4 182; GFX10-NEXT: s_and_b32 s4, s4, 0xf0 183; GFX10-NEXT: v_mov_b32_e32 v0, s4 184; GFX10-NEXT: s_setpc_b64 s[30:31] 185; 186; GFX11-LABEL: func_fpmode_i32_denormonly: 187; GFX11: ; %bb.0: 188; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 189; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24) 190; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0 191; GFX11-NEXT: s_and_b32 s0, s0, 0xf0 192; GFX11-NEXT: v_mov_b32_e32 v0, s0 193; GFX11-NEXT: s_setpc_b64 s[30:31] 194 %fpmode = call i32 @llvm.get.fpmode.i32() 195 %denorm.only = and i32 %fpmode, 240 196 ret i32 %denorm.only 197} 198 199define i32 @func_fpmode_i32_roundonly() { 200; GFX678-LABEL: func_fpmode_i32_roundonly: 201; GFX678: ; %bb.0: 202; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 203; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19) 204; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4 205; GFX678-NEXT: s_and_b32 s4, s4, 15 206; GFX678-NEXT: v_mov_b32_e32 v0, s4 207; GFX678-NEXT: s_setpc_b64 s[30:31] 208; 209; GFX9-LABEL: func_fpmode_i32_roundonly: 210; GFX9: ; %bb.0: 211; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 212; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) 213; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4 214; GFX9-NEXT: s_and_b32 s4, s4, 15 215; GFX9-NEXT: v_mov_b32_e32 v0, s4 216; GFX9-NEXT: s_setpc_b64 s[30:31] 217; 218; GFX10-LABEL: func_fpmode_i32_roundonly: 219; GFX10: ; %bb.0: 220; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 221; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) 222; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4 223; GFX10-NEXT: s_and_b32 s4, s4, 15 224; GFX10-NEXT: v_mov_b32_e32 v0, s4 225; GFX10-NEXT: s_setpc_b64 s[30:31] 226; 227; GFX11-LABEL: func_fpmode_i32_roundonly: 228; GFX11: ; %bb.0: 229; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 230; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24) 231; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0 232; GFX11-NEXT: s_and_b32 s0, s0, 15 233; GFX11-NEXT: v_mov_b32_e32 v0, s0 234; GFX11-NEXT: s_setpc_b64 s[30:31] 235 %fpmode = call i32 @llvm.get.fpmode.i32() 236 %round.only = and i32 %fpmode, 15 237 ret i32 %round.only 238} 239 240define i32 @func_fpmode_i32_round_denorm_only() { 241; GFX678-LABEL: func_fpmode_i32_round_denorm_only: 242; GFX678: ; %bb.0: 243; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 244; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19) 245; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4 246; GFX678-NEXT: s_and_b32 s4, s4, 0xff 247; GFX678-NEXT: v_mov_b32_e32 v0, s4 248; GFX678-NEXT: s_setpc_b64 s[30:31] 249; 250; GFX9-LABEL: func_fpmode_i32_round_denorm_only: 251; GFX9: ; %bb.0: 252; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 253; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) 254; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4 255; GFX9-NEXT: s_and_b32 s4, s4, 0xff 256; GFX9-NEXT: v_mov_b32_e32 v0, s4 257; GFX9-NEXT: s_setpc_b64 s[30:31] 258; 259; GFX10-LABEL: func_fpmode_i32_round_denorm_only: 260; GFX10: ; %bb.0: 261; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 262; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) 263; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4 264; GFX10-NEXT: s_and_b32 s4, s4, 0xff 265; GFX10-NEXT: v_mov_b32_e32 v0, s4 266; GFX10-NEXT: s_setpc_b64 s[30:31] 267; 268; GFX11-LABEL: func_fpmode_i32_round_denorm_only: 269; GFX11: ; %bb.0: 270; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 271; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24) 272; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0 273; GFX11-NEXT: s_and_b32 s0, s0, 0xff 274; GFX11-NEXT: v_mov_b32_e32 v0, s0 275; GFX11-NEXT: s_setpc_b64 s[30:31] 276 %fpmode = call i32 @llvm.get.fpmode.i32() 277 %round.denorm.only = and i32 %fpmode, 255 278 ret i32 %round.denorm.only 279} 280 281define i32 @func_fpmode_i32_round_denorm_dx10_ieee() { 282; GFX678-LABEL: func_fpmode_i32_round_denorm_dx10_ieee: 283; GFX678: ; %bb.0: 284; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 285; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19) 286; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4 287; GFX678-NEXT: s_and_b32 s4, s4, 0x3ff 288; GFX678-NEXT: v_mov_b32_e32 v0, s4 289; GFX678-NEXT: s_setpc_b64 s[30:31] 290; 291; GFX9-LABEL: func_fpmode_i32_round_denorm_dx10_ieee: 292; GFX9: ; %bb.0: 293; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 294; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) 295; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4 296; GFX9-NEXT: s_and_b32 s4, s4, 0x3ff 297; GFX9-NEXT: v_mov_b32_e32 v0, s4 298; GFX9-NEXT: s_setpc_b64 s[30:31] 299; 300; GFX10-LABEL: func_fpmode_i32_round_denorm_dx10_ieee: 301; GFX10: ; %bb.0: 302; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 303; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) 304; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4 305; GFX10-NEXT: s_and_b32 s4, s4, 0x3ff 306; GFX10-NEXT: v_mov_b32_e32 v0, s4 307; GFX10-NEXT: s_setpc_b64 s[30:31] 308; 309; GFX11-LABEL: func_fpmode_i32_round_denorm_dx10_ieee: 310; GFX11: ; %bb.0: 311; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 312; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24) 313; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0 314; GFX11-NEXT: s_and_b32 s0, s0, 0x3ff 315; GFX11-NEXT: v_mov_b32_e32 v0, s0 316; GFX11-NEXT: s_setpc_b64 s[30:31] 317 %fpmode = call i32 @llvm.get.fpmode.i32() 318 %core.mode = and i32 %fpmode, 1023 319 ret i32 %core.mode 320} 321 322define i32 @func_fpmode_i32_excp_en() { 323; GFX678-LABEL: func_fpmode_i32_excp_en: 324; GFX678: ; %bb.0: 325; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 326; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19) 327; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4 328; GFX678-NEXT: s_and_b32 s4, s4, 0x7f000 329; GFX678-NEXT: v_mov_b32_e32 v0, s4 330; GFX678-NEXT: s_setpc_b64 s[30:31] 331; 332; GFX9-LABEL: func_fpmode_i32_excp_en: 333; GFX9: ; %bb.0: 334; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 335; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) 336; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4 337; GFX9-NEXT: s_and_b32 s4, s4, 0x7f000 338; GFX9-NEXT: v_mov_b32_e32 v0, s4 339; GFX9-NEXT: s_setpc_b64 s[30:31] 340; 341; GFX10-LABEL: func_fpmode_i32_excp_en: 342; GFX10: ; %bb.0: 343; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 344; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) 345; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4 346; GFX10-NEXT: s_and_b32 s4, s4, 0x7f000 347; GFX10-NEXT: v_mov_b32_e32 v0, s4 348; GFX10-NEXT: s_setpc_b64 s[30:31] 349; 350; GFX11-LABEL: func_fpmode_i32_excp_en: 351; GFX11: ; %bb.0: 352; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 353; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24) 354; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0 355; GFX11-NEXT: s_and_b32 s0, s0, 0x7f000 356; GFX11-NEXT: v_mov_b32_e32 v0, s0 357; GFX11-NEXT: s_setpc_b64 s[30:31] 358 %fpmode = call i32 @llvm.get.fpmode.i32() 359 %core.mode = and i32 %fpmode, 520192 360 ret i32 %core.mode 361} 362 363; Mask for all bits used on gfx6+ 364define i32 @func_fpmode_i32_environment_gfx6() { 365; GFX678-LABEL: func_fpmode_i32_environment_gfx6: 366; GFX678: ; %bb.0: 367; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 368; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19) 369; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4 370; GFX678-NEXT: s_and_b32 s4, s4, 0x7f3ff 371; GFX678-NEXT: v_mov_b32_e32 v0, s4 372; GFX678-NEXT: s_setpc_b64 s[30:31] 373; 374; GFX9-LABEL: func_fpmode_i32_environment_gfx6: 375; GFX9: ; %bb.0: 376; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 377; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) 378; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4 379; GFX9-NEXT: s_and_b32 s4, s4, 0x7f3ff 380; GFX9-NEXT: v_mov_b32_e32 v0, s4 381; GFX9-NEXT: s_setpc_b64 s[30:31] 382; 383; GFX10-LABEL: func_fpmode_i32_environment_gfx6: 384; GFX10: ; %bb.0: 385; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 386; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) 387; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4 388; GFX10-NEXT: s_and_b32 s4, s4, 0x7f3ff 389; GFX10-NEXT: v_mov_b32_e32 v0, s4 390; GFX10-NEXT: s_setpc_b64 s[30:31] 391; 392; GFX11-LABEL: func_fpmode_i32_environment_gfx6: 393; GFX11: ; %bb.0: 394; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 395; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24) 396; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0 397; GFX11-NEXT: s_and_b32 s0, s0, 0x7f3ff 398; GFX11-NEXT: v_mov_b32_e32 v0, s0 399; GFX11-NEXT: s_setpc_b64 s[30:31] 400 %fpmode = call i32 @llvm.get.fpmode.i32() 401 %core.mode = and i32 %fpmode, 521215 402 ret i32 %core.mode 403} 404 405; Mask for all bits used on gfx9+ 406define i32 @func_fpmode_i32_environment_gfx9() { 407; GFX678-LABEL: func_fpmode_i32_environment_gfx9: 408; GFX678: ; %bb.0: 409; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 410; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19) 411; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4 412; GFX678-NEXT: s_and_b32 s4, s4, 0x87f3ff 413; GFX678-NEXT: v_mov_b32_e32 v0, s4 414; GFX678-NEXT: s_setpc_b64 s[30:31] 415; 416; GFX9-LABEL: func_fpmode_i32_environment_gfx9: 417; GFX9: ; %bb.0: 418; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 419; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) 420; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4 421; GFX9-NEXT: s_and_b32 s4, s4, 0x87f3ff 422; GFX9-NEXT: v_mov_b32_e32 v0, s4 423; GFX9-NEXT: s_setpc_b64 s[30:31] 424; 425; GFX10-LABEL: func_fpmode_i32_environment_gfx9: 426; GFX10: ; %bb.0: 427; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 428; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) 429; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4 430; GFX10-NEXT: s_and_b32 s4, s4, 0x87f3ff 431; GFX10-NEXT: v_mov_b32_e32 v0, s4 432; GFX10-NEXT: s_setpc_b64 s[30:31] 433; 434; GFX11-LABEL: func_fpmode_i32_environment_gfx9: 435; GFX11: ; %bb.0: 436; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 437; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24) 438; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0 439; GFX11-NEXT: s_and_b32 s0, s0, 0x87f3ff 440; GFX11-NEXT: v_mov_b32_e32 v0, s0 441; GFX11-NEXT: s_setpc_b64 s[30:31] 442 %fpmode = call i32 @llvm.get.fpmode.i32() 443 %core.mode = and i32 %fpmode, 8909823 444 ret i32 %core.mode 445} 446 447define i32 @func_fpmode_i32_denormf32only() { 448; GFX678-LABEL: func_fpmode_i32_denormf32only: 449; GFX678: ; %bb.0: 450; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 451; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19) 452; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4 453; GFX678-NEXT: s_and_b32 s4, s4, 48 454; GFX678-NEXT: v_mov_b32_e32 v0, s4 455; GFX678-NEXT: s_setpc_b64 s[30:31] 456; 457; GFX9-LABEL: func_fpmode_i32_denormf32only: 458; GFX9: ; %bb.0: 459; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 460; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) 461; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4 462; GFX9-NEXT: s_and_b32 s4, s4, 48 463; GFX9-NEXT: v_mov_b32_e32 v0, s4 464; GFX9-NEXT: s_setpc_b64 s[30:31] 465; 466; GFX10-LABEL: func_fpmode_i32_denormf32only: 467; GFX10: ; %bb.0: 468; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 469; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) 470; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4 471; GFX10-NEXT: s_and_b32 s4, s4, 48 472; GFX10-NEXT: v_mov_b32_e32 v0, s4 473; GFX10-NEXT: s_setpc_b64 s[30:31] 474; 475; GFX11-LABEL: func_fpmode_i32_denormf32only: 476; GFX11: ; %bb.0: 477; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 478; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24) 479; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0 480; GFX11-NEXT: s_and_b32 s0, s0, 48 481; GFX11-NEXT: v_mov_b32_e32 v0, s0 482; GFX11-NEXT: s_setpc_b64 s[30:31] 483 %fpmode = call i32 @llvm.get.fpmode.i32() 484 %denorm.only = and i32 %fpmode, 48 485 ret i32 %denorm.only 486} 487 488define i32 @func_fpmode_i32_denormf32only_0() { 489; GFX678-LABEL: func_fpmode_i32_denormf32only_0: 490; GFX678: ; %bb.0: 491; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 492; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19) 493; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4 494; GFX678-NEXT: s_and_b32 s4, s4, 32 495; GFX678-NEXT: v_mov_b32_e32 v0, s4 496; GFX678-NEXT: s_setpc_b64 s[30:31] 497; 498; GFX9-LABEL: func_fpmode_i32_denormf32only_0: 499; GFX9: ; %bb.0: 500; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 501; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) 502; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4 503; GFX9-NEXT: s_and_b32 s4, s4, 32 504; GFX9-NEXT: v_mov_b32_e32 v0, s4 505; GFX9-NEXT: s_setpc_b64 s[30:31] 506; 507; GFX10-LABEL: func_fpmode_i32_denormf32only_0: 508; GFX10: ; %bb.0: 509; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 510; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) 511; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4 512; GFX10-NEXT: s_and_b32 s4, s4, 32 513; GFX10-NEXT: v_mov_b32_e32 v0, s4 514; GFX10-NEXT: s_setpc_b64 s[30:31] 515; 516; GFX11-LABEL: func_fpmode_i32_denormf32only_0: 517; GFX11: ; %bb.0: 518; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 519; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24) 520; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0 521; GFX11-NEXT: s_and_b32 s0, s0, 32 522; GFX11-NEXT: v_mov_b32_e32 v0, s0 523; GFX11-NEXT: s_setpc_b64 s[30:31] 524 %fpmode = call i32 @llvm.get.fpmode.i32() 525 %denorm.only = and i32 %fpmode, 32 526 ret i32 %denorm.only 527} 528 529define i32 @func_fpmode_i32_denormf32only_1() { 530; GFX678-LABEL: func_fpmode_i32_denormf32only_1: 531; GFX678: ; %bb.0: 532; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 533; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19) 534; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4 535; GFX678-NEXT: s_and_b32 s4, s4, 64 536; GFX678-NEXT: v_mov_b32_e32 v0, s4 537; GFX678-NEXT: s_setpc_b64 s[30:31] 538; 539; GFX9-LABEL: func_fpmode_i32_denormf32only_1: 540; GFX9: ; %bb.0: 541; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 542; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) 543; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4 544; GFX9-NEXT: s_and_b32 s4, s4, 64 545; GFX9-NEXT: v_mov_b32_e32 v0, s4 546; GFX9-NEXT: s_setpc_b64 s[30:31] 547; 548; GFX10-LABEL: func_fpmode_i32_denormf32only_1: 549; GFX10: ; %bb.0: 550; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 551; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) 552; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4 553; GFX10-NEXT: s_and_b32 s4, s4, 64 554; GFX10-NEXT: v_mov_b32_e32 v0, s4 555; GFX10-NEXT: s_setpc_b64 s[30:31] 556; 557; GFX11-LABEL: func_fpmode_i32_denormf32only_1: 558; GFX11: ; %bb.0: 559; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 560; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24) 561; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0 562; GFX11-NEXT: s_and_b32 s0, s0, 64 563; GFX11-NEXT: v_mov_b32_e32 v0, s0 564; GFX11-NEXT: s_setpc_b64 s[30:31] 565 %fpmode = call i32 @llvm.get.fpmode.i32() 566 %denorm.only = and i32 %fpmode, 64 567 ret i32 %denorm.only 568} 569 570define i32 @func_fpmode_i32_denormf64f16only() { 571; GFX678-LABEL: func_fpmode_i32_denormf64f16only: 572; GFX678: ; %bb.0: 573; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 574; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19) 575; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4 576; GFX678-NEXT: s_and_b32 s4, s4, 0xc0 577; GFX678-NEXT: v_mov_b32_e32 v0, s4 578; GFX678-NEXT: s_setpc_b64 s[30:31] 579; 580; GFX9-LABEL: func_fpmode_i32_denormf64f16only: 581; GFX9: ; %bb.0: 582; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 583; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) 584; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4 585; GFX9-NEXT: s_and_b32 s4, s4, 0xc0 586; GFX9-NEXT: v_mov_b32_e32 v0, s4 587; GFX9-NEXT: s_setpc_b64 s[30:31] 588; 589; GFX10-LABEL: func_fpmode_i32_denormf64f16only: 590; GFX10: ; %bb.0: 591; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 592; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) 593; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4 594; GFX10-NEXT: s_and_b32 s4, s4, 0xc0 595; GFX10-NEXT: v_mov_b32_e32 v0, s4 596; GFX10-NEXT: s_setpc_b64 s[30:31] 597; 598; GFX11-LABEL: func_fpmode_i32_denormf64f16only: 599; GFX11: ; %bb.0: 600; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 601; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24) 602; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0 603; GFX11-NEXT: s_and_b32 s0, s0, 0xc0 604; GFX11-NEXT: v_mov_b32_e32 v0, s0 605; GFX11-NEXT: s_setpc_b64 s[30:31] 606 %fpmode = call i32 @llvm.get.fpmode.i32() 607 %denorm.only = and i32 %fpmode, 192 608 ret i32 %denorm.only 609} 610 611define i32 @func_fpmode_i32_dx10_clamp_only() { 612; GFX678-LABEL: func_fpmode_i32_dx10_clamp_only: 613; GFX678: ; %bb.0: 614; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 615; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19) 616; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4 617; GFX678-NEXT: s_and_b32 s4, s4, 0x100 618; GFX678-NEXT: v_mov_b32_e32 v0, s4 619; GFX678-NEXT: s_setpc_b64 s[30:31] 620; 621; GFX9-LABEL: func_fpmode_i32_dx10_clamp_only: 622; GFX9: ; %bb.0: 623; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 624; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) 625; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4 626; GFX9-NEXT: s_and_b32 s4, s4, 0x100 627; GFX9-NEXT: v_mov_b32_e32 v0, s4 628; GFX9-NEXT: s_setpc_b64 s[30:31] 629; 630; GFX10-LABEL: func_fpmode_i32_dx10_clamp_only: 631; GFX10: ; %bb.0: 632; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 633; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) 634; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4 635; GFX10-NEXT: s_and_b32 s4, s4, 0x100 636; GFX10-NEXT: v_mov_b32_e32 v0, s4 637; GFX10-NEXT: s_setpc_b64 s[30:31] 638; 639; GFX11-LABEL: func_fpmode_i32_dx10_clamp_only: 640; GFX11: ; %bb.0: 641; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 642; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24) 643; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0 644; GFX11-NEXT: s_and_b32 s0, s0, 0x100 645; GFX11-NEXT: v_mov_b32_e32 v0, s0 646; GFX11-NEXT: s_setpc_b64 s[30:31] 647 %fpmode = call i32 @llvm.get.fpmode.i32() 648 %dx10.only = and i32 %fpmode, 256 649 ret i32 %dx10.only 650} 651 652define i32 @func_fpmode_i32_ieee_only() { 653; GFX678-LABEL: func_fpmode_i32_ieee_only: 654; GFX678: ; %bb.0: 655; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 656; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 19) 657; GFX678-NEXT: s_and_b32 s4, 0x7f3ff, s4 658; GFX678-NEXT: s_and_b32 s4, s4, 0x200 659; GFX678-NEXT: v_mov_b32_e32 v0, s4 660; GFX678-NEXT: s_setpc_b64 s[30:31] 661; 662; GFX9-LABEL: func_fpmode_i32_ieee_only: 663; GFX9: ; %bb.0: 664; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 665; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) 666; GFX9-NEXT: s_and_b32 s4, 0x87f3ff, s4 667; GFX9-NEXT: s_and_b32 s4, s4, 0x200 668; GFX9-NEXT: v_mov_b32_e32 v0, s4 669; GFX9-NEXT: s_setpc_b64 s[30:31] 670; 671; GFX10-LABEL: func_fpmode_i32_ieee_only: 672; GFX10: ; %bb.0: 673; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 674; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 24) 675; GFX10-NEXT: s_and_b32 s4, 0x87f3ff, s4 676; GFX10-NEXT: s_and_b32 s4, s4, 0x200 677; GFX10-NEXT: v_mov_b32_e32 v0, s4 678; GFX10-NEXT: s_setpc_b64 s[30:31] 679; 680; GFX11-LABEL: func_fpmode_i32_ieee_only: 681; GFX11: ; %bb.0: 682; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 683; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 24) 684; GFX11-NEXT: s_and_b32 s0, 0x87f3ff, s0 685; GFX11-NEXT: s_and_b32 s0, s0, 0x200 686; GFX11-NEXT: v_mov_b32_e32 v0, s0 687; GFX11-NEXT: s_setpc_b64 s[30:31] 688 %fpmode = call i32 @llvm.get.fpmode.i32() 689 %ieee.only = and i32 %fpmode, 512 690 ret i32 %ieee.only 691} 692 693;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 694; GCN: {{.*}} 695; GFX1011: {{.*}} 696