1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 2; RUN: llc -mtriple=amdgcn -global-isel=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=SI,SI-SDAG %s 3; RUN: llc -mtriple=amdgcn -global-isel=1 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=SI,SI-GISEL %s 4; RUN: llc -mtriple=amdgcn -mcpu=tonga -global-isel=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI-SDAG %s 5; RUN: llc -mtriple=amdgcn -mcpu=tonga -global-isel=1 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI-GISEL %s 6; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -global-isel=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9-SDAG %s 7; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -global-isel=1 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9-GISEL %s 8; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-real-true16 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-FAKE16 %s 9; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-real-true16 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-FAKE16 %s 10; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=+real-true16 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-TRUE16 %s 11; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=+real-true16 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 %s 12 13define amdgpu_kernel void @v_test_nnan_input_fmed3_r_i_i_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 { 14; SI-SDAG-LABEL: v_test_nnan_input_fmed3_r_i_i_f32: 15; SI-SDAG: ; %bb.0: 16; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 17; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 18; SI-SDAG-NEXT: s_mov_b32 s6, 0 19; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 20; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 21; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 22; SI-SDAG-NEXT: s_mov_b64 s[4:5], s[2:3] 23; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 24; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[6:7] 25; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 26; SI-SDAG-NEXT: v_add_f32_e32 v2, 1.0, v2 27; SI-SDAG-NEXT: v_med3_f32 v2, v2, 2.0, 4.0 28; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 29; SI-SDAG-NEXT: s_endpgm 30; 31; SI-GISEL-LABEL: v_test_nnan_input_fmed3_r_i_i_f32: 32; SI-GISEL: ; %bb.0: 33; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 34; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 35; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 36; SI-GISEL-NEXT: s_mov_b32 s6, 0 37; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 38; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 39; SI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3] 40; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 41; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 42; SI-GISEL-NEXT: v_add_f32_e32 v2, 1.0, v2 43; SI-GISEL-NEXT: v_med3_f32 v2, v2, 2.0, 4.0 44; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7] 45; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 46; SI-GISEL-NEXT: s_endpgm 47; 48; VI-SDAG-LABEL: v_test_nnan_input_fmed3_r_i_i_f32: 49; VI-SDAG: ; %bb.0: 50; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 51; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0 52; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 53; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 54; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v2 55; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 56; VI-SDAG-NEXT: flat_load_dword v3, v[0:1] 57; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 58; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v2 59; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 60; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 61; VI-SDAG-NEXT: v_add_f32_e32 v2, 1.0, v3 62; VI-SDAG-NEXT: v_med3_f32 v2, v2, 2.0, 4.0 63; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 64; VI-SDAG-NEXT: s_endpgm 65; 66; VI-GISEL-LABEL: v_test_nnan_input_fmed3_r_i_i_f32: 67; VI-GISEL: ; %bb.0: 68; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 69; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v0 70; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 71; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 72; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 73; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 74; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 75; VI-GISEL-NEXT: flat_load_dword v3, v[0:1] 76; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 77; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 78; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 79; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 80; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 81; VI-GISEL-NEXT: v_add_f32_e32 v2, 1.0, v3 82; VI-GISEL-NEXT: v_med3_f32 v2, v2, 2.0, 4.0 83; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 84; VI-GISEL-NEXT: s_endpgm 85; 86; GFX9-LABEL: v_test_nnan_input_fmed3_r_i_i_f32: 87; GFX9: ; %bb.0: 88; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 89; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 90; GFX9-NEXT: s_waitcnt lgkmcnt(0) 91; GFX9-NEXT: global_load_dword v1, v0, s[2:3] 92; GFX9-NEXT: s_waitcnt vmcnt(0) 93; GFX9-NEXT: v_add_f32_e32 v1, 1.0, v1 94; GFX9-NEXT: v_med3_f32 v1, v1, 2.0, 4.0 95; GFX9-NEXT: global_store_dword v0, v1, s[0:1] 96; GFX9-NEXT: s_endpgm 97; 98; GFX11-LABEL: v_test_nnan_input_fmed3_r_i_i_f32: 99; GFX11: ; %bb.0: 100; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 101; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 102; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_1) 103; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 104; GFX11-NEXT: s_waitcnt lgkmcnt(0) 105; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] 106; GFX11-NEXT: s_waitcnt vmcnt(0) 107; GFX11-NEXT: v_add_f32_e32 v1, 1.0, v1 108; GFX11-NEXT: v_med3_f32 v1, v1, 2.0, 4.0 109; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 110; GFX11-NEXT: s_endpgm 111 %tid = call i32 @llvm.amdgcn.workitem.id.x() 112 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid 113 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid 114 %a = load float, ptr addrspace(1) %gep0 115 %a.add = fadd nnan float %a, 1.0 116 %max = call float @llvm.maxnum.f32(float %a.add, float 2.0) 117 %med = call float @llvm.minnum.f32(float %max, float 4.0) 118 119 store float %med, ptr addrspace(1) %outgep 120 ret void 121} 122 123define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 { 124; SI-SDAG-LABEL: v_test_fmed3_nnan_r_i_i_f32: 125; SI-SDAG: ; %bb.0: 126; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 127; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 128; SI-SDAG-NEXT: s_mov_b32 s6, 0 129; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 130; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 131; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 132; SI-SDAG-NEXT: s_mov_b64 s[4:5], s[2:3] 133; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 134; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[6:7] 135; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 136; SI-SDAG-NEXT: v_add_f32_e32 v2, 1.0, v2 137; SI-SDAG-NEXT: v_med3_f32 v2, v2, 2.0, 4.0 138; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 139; SI-SDAG-NEXT: s_endpgm 140; 141; SI-GISEL-LABEL: v_test_fmed3_nnan_r_i_i_f32: 142; SI-GISEL: ; %bb.0: 143; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 144; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 145; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 146; SI-GISEL-NEXT: s_mov_b32 s6, 0 147; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 148; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 149; SI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3] 150; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 151; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 152; SI-GISEL-NEXT: v_add_f32_e32 v2, 1.0, v2 153; SI-GISEL-NEXT: v_med3_f32 v2, v2, 2.0, 4.0 154; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7] 155; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 156; SI-GISEL-NEXT: s_endpgm 157; 158; VI-SDAG-LABEL: v_test_fmed3_nnan_r_i_i_f32: 159; VI-SDAG: ; %bb.0: 160; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 161; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0 162; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 163; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 164; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v2 165; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 166; VI-SDAG-NEXT: flat_load_dword v3, v[0:1] 167; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 168; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v2 169; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 170; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 171; VI-SDAG-NEXT: v_add_f32_e32 v2, 1.0, v3 172; VI-SDAG-NEXT: v_med3_f32 v2, v2, 2.0, 4.0 173; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 174; VI-SDAG-NEXT: s_endpgm 175; 176; VI-GISEL-LABEL: v_test_fmed3_nnan_r_i_i_f32: 177; VI-GISEL: ; %bb.0: 178; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 179; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v0 180; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 181; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 182; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 183; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 184; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 185; VI-GISEL-NEXT: flat_load_dword v3, v[0:1] 186; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 187; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 188; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 189; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 190; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 191; VI-GISEL-NEXT: v_add_f32_e32 v2, 1.0, v3 192; VI-GISEL-NEXT: v_med3_f32 v2, v2, 2.0, 4.0 193; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 194; VI-GISEL-NEXT: s_endpgm 195; 196; GFX9-LABEL: v_test_fmed3_nnan_r_i_i_f32: 197; GFX9: ; %bb.0: 198; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 199; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 200; GFX9-NEXT: s_waitcnt lgkmcnt(0) 201; GFX9-NEXT: global_load_dword v1, v0, s[2:3] 202; GFX9-NEXT: s_waitcnt vmcnt(0) 203; GFX9-NEXT: v_add_f32_e32 v1, 1.0, v1 204; GFX9-NEXT: v_med3_f32 v1, v1, 2.0, 4.0 205; GFX9-NEXT: global_store_dword v0, v1, s[0:1] 206; GFX9-NEXT: s_endpgm 207; 208; GFX11-LABEL: v_test_fmed3_nnan_r_i_i_f32: 209; GFX11: ; %bb.0: 210; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 211; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 212; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_1) 213; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 214; GFX11-NEXT: s_waitcnt lgkmcnt(0) 215; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] 216; GFX11-NEXT: s_waitcnt vmcnt(0) 217; GFX11-NEXT: v_add_f32_e32 v1, 1.0, v1 218; GFX11-NEXT: v_med3_f32 v1, v1, 2.0, 4.0 219; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 220; GFX11-NEXT: s_endpgm 221 %tid = call i32 @llvm.amdgcn.workitem.id.x() 222 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid 223 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid 224 %a = load float, ptr addrspace(1) %gep0 225 %a.add = fadd nnan float %a, 1.0 226 227 %max = call float @llvm.maxnum.f32(float %a.add, float 2.0) 228 %med = call float @llvm.minnum.f32(float %max, float 4.0) 229 230 store float %med, ptr addrspace(1) %outgep 231 ret void 232} 233 234define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_commute0_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 { 235; SI-SDAG-LABEL: v_test_fmed3_nnan_r_i_i_commute0_f32: 236; SI-SDAG: ; %bb.0: 237; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 238; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 239; SI-SDAG-NEXT: s_mov_b32 s6, 0 240; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 241; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 242; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 243; SI-SDAG-NEXT: s_mov_b64 s[4:5], s[2:3] 244; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 245; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[6:7] 246; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 247; SI-SDAG-NEXT: v_add_f32_e32 v2, 1.0, v2 248; SI-SDAG-NEXT: v_med3_f32 v2, v2, 2.0, 4.0 249; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 250; SI-SDAG-NEXT: s_endpgm 251; 252; SI-GISEL-LABEL: v_test_fmed3_nnan_r_i_i_commute0_f32: 253; SI-GISEL: ; %bb.0: 254; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 255; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 256; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 257; SI-GISEL-NEXT: s_mov_b32 s6, 0 258; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 259; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 260; SI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3] 261; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 262; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 263; SI-GISEL-NEXT: v_add_f32_e32 v2, 1.0, v2 264; SI-GISEL-NEXT: v_med3_f32 v2, v2, 2.0, 4.0 265; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7] 266; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 267; SI-GISEL-NEXT: s_endpgm 268; 269; VI-SDAG-LABEL: v_test_fmed3_nnan_r_i_i_commute0_f32: 270; VI-SDAG: ; %bb.0: 271; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 272; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0 273; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 274; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 275; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v2 276; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 277; VI-SDAG-NEXT: flat_load_dword v3, v[0:1] 278; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 279; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v2 280; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 281; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 282; VI-SDAG-NEXT: v_add_f32_e32 v2, 1.0, v3 283; VI-SDAG-NEXT: v_med3_f32 v2, v2, 2.0, 4.0 284; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 285; VI-SDAG-NEXT: s_endpgm 286; 287; VI-GISEL-LABEL: v_test_fmed3_nnan_r_i_i_commute0_f32: 288; VI-GISEL: ; %bb.0: 289; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 290; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v0 291; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 292; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 293; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 294; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 295; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 296; VI-GISEL-NEXT: flat_load_dword v3, v[0:1] 297; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 298; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 299; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 300; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 301; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 302; VI-GISEL-NEXT: v_add_f32_e32 v2, 1.0, v3 303; VI-GISEL-NEXT: v_med3_f32 v2, v2, 2.0, 4.0 304; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 305; VI-GISEL-NEXT: s_endpgm 306; 307; GFX9-LABEL: v_test_fmed3_nnan_r_i_i_commute0_f32: 308; GFX9: ; %bb.0: 309; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 310; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 311; GFX9-NEXT: s_waitcnt lgkmcnt(0) 312; GFX9-NEXT: global_load_dword v1, v0, s[2:3] 313; GFX9-NEXT: s_waitcnt vmcnt(0) 314; GFX9-NEXT: v_add_f32_e32 v1, 1.0, v1 315; GFX9-NEXT: v_med3_f32 v1, v1, 2.0, 4.0 316; GFX9-NEXT: global_store_dword v0, v1, s[0:1] 317; GFX9-NEXT: s_endpgm 318; 319; GFX11-LABEL: v_test_fmed3_nnan_r_i_i_commute0_f32: 320; GFX11: ; %bb.0: 321; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 322; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 323; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_1) 324; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 325; GFX11-NEXT: s_waitcnt lgkmcnt(0) 326; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] 327; GFX11-NEXT: s_waitcnt vmcnt(0) 328; GFX11-NEXT: v_add_f32_e32 v1, 1.0, v1 329; GFX11-NEXT: v_med3_f32 v1, v1, 2.0, 4.0 330; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 331; GFX11-NEXT: s_endpgm 332 %tid = call i32 @llvm.amdgcn.workitem.id.x() 333 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid 334 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid 335 %a = load float, ptr addrspace(1) %gep0 336 %a.add = fadd nnan float %a, 1.0 337 338 %max = call float @llvm.maxnum.f32(float 2.0, float %a.add) 339 %med = call float @llvm.minnum.f32(float 4.0, float %max) 340 341 store float %med, ptr addrspace(1) %outgep 342 ret void 343} 344 345define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_commute1_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 { 346; SI-SDAG-LABEL: v_test_fmed3_nnan_r_i_i_commute1_f32: 347; SI-SDAG: ; %bb.0: 348; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 349; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 350; SI-SDAG-NEXT: s_mov_b32 s6, 0 351; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 352; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 353; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 354; SI-SDAG-NEXT: s_mov_b64 s[4:5], s[2:3] 355; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 356; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[6:7] 357; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 358; SI-SDAG-NEXT: v_add_f32_e32 v2, 1.0, v2 359; SI-SDAG-NEXT: v_med3_f32 v2, v2, 2.0, 4.0 360; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 361; SI-SDAG-NEXT: s_endpgm 362; 363; SI-GISEL-LABEL: v_test_fmed3_nnan_r_i_i_commute1_f32: 364; SI-GISEL: ; %bb.0: 365; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 366; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 367; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 368; SI-GISEL-NEXT: s_mov_b32 s6, 0 369; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 370; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 371; SI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3] 372; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 373; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 374; SI-GISEL-NEXT: v_add_f32_e32 v2, 1.0, v2 375; SI-GISEL-NEXT: v_med3_f32 v2, v2, 2.0, 4.0 376; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7] 377; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 378; SI-GISEL-NEXT: s_endpgm 379; 380; VI-SDAG-LABEL: v_test_fmed3_nnan_r_i_i_commute1_f32: 381; VI-SDAG: ; %bb.0: 382; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 383; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0 384; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 385; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 386; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v2 387; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 388; VI-SDAG-NEXT: flat_load_dword v3, v[0:1] 389; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 390; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v2 391; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 392; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 393; VI-SDAG-NEXT: v_add_f32_e32 v2, 1.0, v3 394; VI-SDAG-NEXT: v_med3_f32 v2, v2, 2.0, 4.0 395; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 396; VI-SDAG-NEXT: s_endpgm 397; 398; VI-GISEL-LABEL: v_test_fmed3_nnan_r_i_i_commute1_f32: 399; VI-GISEL: ; %bb.0: 400; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 401; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v0 402; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 403; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 404; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 405; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 406; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 407; VI-GISEL-NEXT: flat_load_dword v3, v[0:1] 408; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 409; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 410; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 411; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 412; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 413; VI-GISEL-NEXT: v_add_f32_e32 v2, 1.0, v3 414; VI-GISEL-NEXT: v_med3_f32 v2, v2, 2.0, 4.0 415; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 416; VI-GISEL-NEXT: s_endpgm 417; 418; GFX9-LABEL: v_test_fmed3_nnan_r_i_i_commute1_f32: 419; GFX9: ; %bb.0: 420; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 421; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 422; GFX9-NEXT: s_waitcnt lgkmcnt(0) 423; GFX9-NEXT: global_load_dword v1, v0, s[2:3] 424; GFX9-NEXT: s_waitcnt vmcnt(0) 425; GFX9-NEXT: v_add_f32_e32 v1, 1.0, v1 426; GFX9-NEXT: v_med3_f32 v1, v1, 2.0, 4.0 427; GFX9-NEXT: global_store_dword v0, v1, s[0:1] 428; GFX9-NEXT: s_endpgm 429; 430; GFX11-LABEL: v_test_fmed3_nnan_r_i_i_commute1_f32: 431; GFX11: ; %bb.0: 432; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 433; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 434; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_1) 435; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 436; GFX11-NEXT: s_waitcnt lgkmcnt(0) 437; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] 438; GFX11-NEXT: s_waitcnt vmcnt(0) 439; GFX11-NEXT: v_add_f32_e32 v1, 1.0, v1 440; GFX11-NEXT: v_med3_f32 v1, v1, 2.0, 4.0 441; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 442; GFX11-NEXT: s_endpgm 443 %tid = call i32 @llvm.amdgcn.workitem.id.x() 444 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid 445 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid 446 %a = load float, ptr addrspace(1) %gep0 447 %a.add = fadd nnan float %a, 1.0 448 449 %max = call float @llvm.maxnum.f32(float %a.add, float 2.0) 450 %med = call float @llvm.minnum.f32(float 4.0, float %max) 451 452 store float %med, ptr addrspace(1) %outgep 453 ret void 454} 455 456define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_constant_order_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 { 457; SI-SDAG-LABEL: v_test_fmed3_nnan_r_i_i_constant_order_f32: 458; SI-SDAG: ; %bb.0: 459; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 460; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 461; SI-SDAG-NEXT: s_mov_b32 s6, 0 462; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 463; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 464; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 465; SI-SDAG-NEXT: s_mov_b64 s[4:5], s[2:3] 466; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 467; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[6:7] 468; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 469; SI-SDAG-NEXT: v_add_f32_e32 v2, 1.0, v2 470; SI-SDAG-NEXT: v_max_f32_e32 v2, 4.0, v2 471; SI-SDAG-NEXT: v_min_f32_e32 v2, 2.0, v2 472; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 473; SI-SDAG-NEXT: s_endpgm 474; 475; SI-GISEL-LABEL: v_test_fmed3_nnan_r_i_i_constant_order_f32: 476; SI-GISEL: ; %bb.0: 477; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 478; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 479; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 480; SI-GISEL-NEXT: s_mov_b32 s6, 0 481; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 482; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 483; SI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3] 484; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 485; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 486; SI-GISEL-NEXT: v_add_f32_e32 v2, 1.0, v2 487; SI-GISEL-NEXT: v_max_f32_e32 v2, 4.0, v2 488; SI-GISEL-NEXT: v_min_f32_e32 v2, 2.0, v2 489; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7] 490; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 491; SI-GISEL-NEXT: s_endpgm 492; 493; VI-SDAG-LABEL: v_test_fmed3_nnan_r_i_i_constant_order_f32: 494; VI-SDAG: ; %bb.0: 495; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 496; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0 497; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 498; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 499; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v2 500; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 501; VI-SDAG-NEXT: flat_load_dword v3, v[0:1] 502; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v2 503; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 504; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 505; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 506; VI-SDAG-NEXT: v_add_f32_e32 v2, 1.0, v3 507; VI-SDAG-NEXT: v_max_f32_e32 v2, 4.0, v2 508; VI-SDAG-NEXT: v_min_f32_e32 v2, 2.0, v2 509; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 510; VI-SDAG-NEXT: s_endpgm 511; 512; VI-GISEL-LABEL: v_test_fmed3_nnan_r_i_i_constant_order_f32: 513; VI-GISEL: ; %bb.0: 514; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 515; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v0 516; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 517; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 518; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 519; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 520; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 521; VI-GISEL-NEXT: flat_load_dword v3, v[0:1] 522; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 523; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 524; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 525; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 526; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 527; VI-GISEL-NEXT: v_add_f32_e32 v2, 1.0, v3 528; VI-GISEL-NEXT: v_max_f32_e32 v2, 4.0, v2 529; VI-GISEL-NEXT: v_min_f32_e32 v2, 2.0, v2 530; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 531; VI-GISEL-NEXT: s_endpgm 532; 533; GFX9-LABEL: v_test_fmed3_nnan_r_i_i_constant_order_f32: 534; GFX9: ; %bb.0: 535; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 536; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 537; GFX9-NEXT: s_waitcnt lgkmcnt(0) 538; GFX9-NEXT: global_load_dword v1, v0, s[2:3] 539; GFX9-NEXT: s_waitcnt vmcnt(0) 540; GFX9-NEXT: v_add_f32_e32 v1, 1.0, v1 541; GFX9-NEXT: v_max_f32_e32 v1, 4.0, v1 542; GFX9-NEXT: v_min_f32_e32 v1, 2.0, v1 543; GFX9-NEXT: global_store_dword v0, v1, s[0:1] 544; GFX9-NEXT: s_endpgm 545; 546; GFX11-LABEL: v_test_fmed3_nnan_r_i_i_constant_order_f32: 547; GFX11: ; %bb.0: 548; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 549; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 550; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_1) 551; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 552; GFX11-NEXT: s_waitcnt lgkmcnt(0) 553; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] 554; GFX11-NEXT: s_waitcnt vmcnt(0) 555; GFX11-NEXT: v_add_f32_e32 v1, 1.0, v1 556; GFX11-NEXT: v_maxmin_f32 v1, v1, 4.0, 2.0 557; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 558; GFX11-NEXT: s_endpgm 559 %tid = call i32 @llvm.amdgcn.workitem.id.x() 560 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid 561 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid 562 %a = load float, ptr addrspace(1) %gep0 563 %a.add = fadd nnan float %a, 1.0 564 565 %max = call float @llvm.maxnum.f32(float %a.add, float 4.0) 566 %med = call float @llvm.minnum.f32(float %max, float 2.0) 567 568 store float %med, ptr addrspace(1) %outgep 569 ret void 570} 571 572define amdgpu_kernel void @v_test_fmed3_nnan_r_i_i_multi_use_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 { 573; SI-SDAG-LABEL: v_test_fmed3_nnan_r_i_i_multi_use_f32: 574; SI-SDAG: ; %bb.0: 575; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 576; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 577; SI-SDAG-NEXT: s_mov_b32 s6, 0 578; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 579; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 580; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 581; SI-SDAG-NEXT: s_mov_b64 s[4:5], s[2:3] 582; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 583; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[6:7] 584; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 585; SI-SDAG-NEXT: v_add_f32_e32 v2, 1.0, v2 586; SI-SDAG-NEXT: v_max_f32_e32 v2, 2.0, v2 587; SI-SDAG-NEXT: v_min_f32_e32 v3, 4.0, v2 588; SI-SDAG-NEXT: buffer_store_dword v3, v[0:1], s[0:3], 0 addr64 589; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 590; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 591; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 592; SI-SDAG-NEXT: s_endpgm 593; 594; SI-GISEL-LABEL: v_test_fmed3_nnan_r_i_i_multi_use_f32: 595; SI-GISEL: ; %bb.0: 596; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 597; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 598; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 599; SI-GISEL-NEXT: s_mov_b32 s6, 0 600; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 601; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 602; SI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3] 603; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 604; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7] 605; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 606; SI-GISEL-NEXT: v_add_f32_e32 v2, 1.0, v2 607; SI-GISEL-NEXT: v_max_f32_e32 v3, 2.0, v2 608; SI-GISEL-NEXT: v_med3_f32 v2, v2, 2.0, 4.0 609; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 610; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 611; SI-GISEL-NEXT: buffer_store_dword v3, v[0:1], s[0:3], 0 addr64 612; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 613; SI-GISEL-NEXT: s_endpgm 614; 615; VI-SDAG-LABEL: v_test_fmed3_nnan_r_i_i_multi_use_f32: 616; VI-SDAG: ; %bb.0: 617; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 618; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0 619; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 620; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 621; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v2 622; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 623; VI-SDAG-NEXT: flat_load_dword v3, v[0:1] 624; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v2 625; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 626; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 627; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 628; VI-SDAG-NEXT: v_add_f32_e32 v2, 1.0, v3 629; VI-SDAG-NEXT: v_max_f32_e32 v2, 2.0, v2 630; VI-SDAG-NEXT: v_min_f32_e32 v3, 4.0, v2 631; VI-SDAG-NEXT: flat_store_dword v[0:1], v3 632; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 633; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 634; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 635; VI-SDAG-NEXT: s_endpgm 636; 637; VI-GISEL-LABEL: v_test_fmed3_nnan_r_i_i_multi_use_f32: 638; VI-GISEL: ; %bb.0: 639; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 640; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v0 641; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 642; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 643; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 644; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 645; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 646; VI-GISEL-NEXT: flat_load_dword v3, v[0:1] 647; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 648; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 649; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 650; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 651; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 652; VI-GISEL-NEXT: v_add_f32_e32 v2, 1.0, v3 653; VI-GISEL-NEXT: v_max_f32_e32 v3, 2.0, v2 654; VI-GISEL-NEXT: v_med3_f32 v2, v2, 2.0, 4.0 655; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 656; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 657; VI-GISEL-NEXT: flat_store_dword v[0:1], v3 658; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 659; VI-GISEL-NEXT: s_endpgm 660; 661; GFX9-SDAG-LABEL: v_test_fmed3_nnan_r_i_i_multi_use_f32: 662; GFX9-SDAG: ; %bb.0: 663; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 664; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 665; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) 666; GFX9-SDAG-NEXT: global_load_dword v1, v0, s[2:3] 667; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 668; GFX9-SDAG-NEXT: v_add_f32_e32 v1, 1.0, v1 669; GFX9-SDAG-NEXT: v_max_f32_e32 v1, 2.0, v1 670; GFX9-SDAG-NEXT: v_min_f32_e32 v2, 4.0, v1 671; GFX9-SDAG-NEXT: global_store_dword v0, v2, s[0:1] 672; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 673; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1] 674; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 675; GFX9-SDAG-NEXT: s_endpgm 676; 677; GFX9-GISEL-LABEL: v_test_fmed3_nnan_r_i_i_multi_use_f32: 678; GFX9-GISEL: ; %bb.0: 679; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 680; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 681; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 682; GFX9-GISEL-NEXT: global_load_dword v1, v0, s[2:3] 683; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 684; GFX9-GISEL-NEXT: v_add_f32_e32 v1, 1.0, v1 685; GFX9-GISEL-NEXT: v_max_f32_e32 v2, 2.0, v1 686; GFX9-GISEL-NEXT: v_med3_f32 v1, v1, 2.0, 4.0 687; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[0:1] 688; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 689; GFX9-GISEL-NEXT: global_store_dword v0, v2, s[0:1] 690; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 691; GFX9-GISEL-NEXT: s_endpgm 692; 693; GFX11-SDAG-LABEL: v_test_fmed3_nnan_r_i_i_multi_use_f32: 694; GFX11-SDAG: ; %bb.0: 695; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 696; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 697; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_1) 698; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 699; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 700; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3] 701; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 702; GFX11-SDAG-NEXT: v_add_f32_e32 v1, 1.0, v1 703; GFX11-SDAG-NEXT: v_max_f32_e32 v1, 2.0, v1 704; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 705; GFX11-SDAG-NEXT: v_min_f32_e32 v2, 4.0, v1 706; GFX11-SDAG-NEXT: global_store_b32 v0, v2, s[0:1] dlc 707; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 708; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1] dlc 709; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 710; GFX11-SDAG-NEXT: s_endpgm 711; 712; GFX11-GISEL-LABEL: v_test_fmed3_nnan_r_i_i_multi_use_f32: 713; GFX11-GISEL: ; %bb.0: 714; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 715; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 716; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_1) 717; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 718; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 719; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3] 720; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 721; GFX11-GISEL-NEXT: v_add_f32_e32 v1, 1.0, v1 722; GFX11-GISEL-NEXT: v_med3_f32 v2, v1, 2.0, 4.0 723; GFX11-GISEL-NEXT: v_max_f32_e32 v1, 2.0, v1 724; GFX11-GISEL-NEXT: global_store_b32 v0, v2, s[0:1] dlc 725; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 726; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1] dlc 727; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 728; GFX11-GISEL-NEXT: s_endpgm 729 %tid = call i32 @llvm.amdgcn.workitem.id.x() 730 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid 731 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid 732 %a = load float, ptr addrspace(1) %gep0 733 %a.add = fadd nnan float %a, 1.0 734 735 %max = call float @llvm.maxnum.f32(float %a.add, float 2.0) 736 %med = call float @llvm.minnum.f32(float %max, float 4.0) 737 738 store volatile float %med, ptr addrspace(1) %outgep 739 store volatile float %max, ptr addrspace(1) %outgep 740 ret void 741} 742 743define amdgpu_kernel void @v_test_fmed3_r_i_i_f64(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 { 744; SI-SDAG-LABEL: v_test_fmed3_r_i_i_f64: 745; SI-SDAG: ; %bb.0: 746; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 747; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 748; SI-SDAG-NEXT: s_mov_b32 s6, 0 749; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 3, v0 750; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 751; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 752; SI-SDAG-NEXT: s_mov_b64 s[4:5], s[2:3] 753; SI-SDAG-NEXT: buffer_load_dwordx2 v[2:3], v[0:1], s[4:7], 0 addr64 754; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[6:7] 755; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 756; SI-SDAG-NEXT: v_add_f64 v[2:3], v[2:3], 1.0 757; SI-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], 2.0 758; SI-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], 4.0 759; SI-SDAG-NEXT: buffer_store_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64 760; SI-SDAG-NEXT: s_endpgm 761; 762; SI-GISEL-LABEL: v_test_fmed3_r_i_i_f64: 763; SI-GISEL: ; %bb.0: 764; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 765; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 3, v0 766; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 767; SI-GISEL-NEXT: s_mov_b32 s6, 0 768; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 769; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 770; SI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3] 771; SI-GISEL-NEXT: buffer_load_dwordx2 v[2:3], v[0:1], s[4:7], 0 addr64 772; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 773; SI-GISEL-NEXT: v_add_f64 v[2:3], v[2:3], 1.0 774; SI-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], 2.0 775; SI-GISEL-NEXT: v_min_f64 v[2:3], v[2:3], 4.0 776; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7] 777; SI-GISEL-NEXT: buffer_store_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64 778; SI-GISEL-NEXT: s_endpgm 779; 780; VI-SDAG-LABEL: v_test_fmed3_r_i_i_f64: 781; VI-SDAG: ; %bb.0: 782; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 783; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 3, v0 784; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 785; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 786; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v2 787; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 788; VI-SDAG-NEXT: flat_load_dwordx2 v[0:1], v[0:1] 789; VI-SDAG-NEXT: v_mov_b32_e32 v3, s1 790; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s0, v2 791; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 792; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 793; VI-SDAG-NEXT: v_add_f64 v[0:1], v[0:1], 1.0 794; VI-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], 2.0 795; VI-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], 4.0 796; VI-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 797; VI-SDAG-NEXT: s_endpgm 798; 799; VI-GISEL-LABEL: v_test_fmed3_r_i_i_f64: 800; VI-GISEL: ; %bb.0: 801; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 802; VI-GISEL-NEXT: v_lshlrev_b32_e32 v4, 3, v0 803; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 804; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 805; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 806; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v4 807; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 808; VI-GISEL-NEXT: flat_load_dwordx2 v[0:1], v[0:1] 809; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 810; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0 811; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v4 812; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 813; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 814; VI-GISEL-NEXT: v_add_f64 v[0:1], v[0:1], 1.0 815; VI-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], 2.0 816; VI-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], 4.0 817; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 818; VI-GISEL-NEXT: s_endpgm 819; 820; GFX9-LABEL: v_test_fmed3_r_i_i_f64: 821; GFX9: ; %bb.0: 822; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 823; GFX9-NEXT: v_lshlrev_b32_e32 v2, 3, v0 824; GFX9-NEXT: s_waitcnt lgkmcnt(0) 825; GFX9-NEXT: global_load_dwordx2 v[0:1], v2, s[2:3] 826; GFX9-NEXT: s_waitcnt vmcnt(0) 827; GFX9-NEXT: v_add_f64 v[0:1], v[0:1], 1.0 828; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], 2.0 829; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], 4.0 830; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 831; GFX9-NEXT: s_endpgm 832; 833; GFX11-LABEL: v_test_fmed3_r_i_i_f64: 834; GFX11: ; %bb.0: 835; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 836; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 837; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_1) 838; GFX11-NEXT: v_lshlrev_b32_e32 v2, 3, v0 839; GFX11-NEXT: s_waitcnt lgkmcnt(0) 840; GFX11-NEXT: global_load_b64 v[0:1], v2, s[2:3] 841; GFX11-NEXT: s_waitcnt vmcnt(0) 842; GFX11-NEXT: v_add_f64 v[0:1], v[0:1], 1.0 843; GFX11-NEXT: v_max_f64 v[0:1], v[0:1], 2.0 844; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 845; GFX11-NEXT: v_min_f64 v[0:1], v[0:1], 4.0 846; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] 847; GFX11-NEXT: s_endpgm 848 %tid = call i32 @llvm.amdgcn.workitem.id.x() 849 %gep0 = getelementptr double, ptr addrspace(1) %aptr, i32 %tid 850 %outgep = getelementptr double, ptr addrspace(1) %out, i32 %tid 851 %a = load double, ptr addrspace(1) %gep0 852 %a.add = fadd nnan double %a, 1.0 853 854 %max = call double @llvm.maxnum.f64(double %a.add, double 2.0) 855 %med = call double @llvm.minnum.f64(double %max, double 4.0) 856 857 store double %med, ptr addrspace(1) %outgep 858 ret void 859} 860 861define amdgpu_kernel void @v_test_fmed3_r_i_i_no_nans_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #2 { 862; SI-SDAG-LABEL: v_test_fmed3_r_i_i_no_nans_f32: 863; SI-SDAG: ; %bb.0: 864; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 865; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 866; SI-SDAG-NEXT: s_mov_b32 s6, 0 867; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 868; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 869; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 870; SI-SDAG-NEXT: s_mov_b64 s[4:5], s[2:3] 871; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 872; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[6:7] 873; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 874; SI-SDAG-NEXT: v_med3_f32 v2, v2, 2.0, 4.0 875; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 876; SI-SDAG-NEXT: s_endpgm 877; 878; SI-GISEL-LABEL: v_test_fmed3_r_i_i_no_nans_f32: 879; SI-GISEL: ; %bb.0: 880; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 881; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 882; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 883; SI-GISEL-NEXT: s_mov_b32 s6, 0 884; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 885; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 886; SI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3] 887; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 888; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 889; SI-GISEL-NEXT: v_med3_f32 v2, v2, 2.0, 4.0 890; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7] 891; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 892; SI-GISEL-NEXT: s_endpgm 893; 894; VI-SDAG-LABEL: v_test_fmed3_r_i_i_no_nans_f32: 895; VI-SDAG: ; %bb.0: 896; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 897; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0 898; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 899; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 900; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v2 901; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 902; VI-SDAG-NEXT: flat_load_dword v3, v[0:1] 903; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 904; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v2 905; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 906; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 907; VI-SDAG-NEXT: v_med3_f32 v2, v3, 2.0, 4.0 908; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 909; VI-SDAG-NEXT: s_endpgm 910; 911; VI-GISEL-LABEL: v_test_fmed3_r_i_i_no_nans_f32: 912; VI-GISEL: ; %bb.0: 913; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 914; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v0 915; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 916; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 917; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 918; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 919; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 920; VI-GISEL-NEXT: flat_load_dword v3, v[0:1] 921; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 922; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 923; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 924; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 925; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 926; VI-GISEL-NEXT: v_med3_f32 v2, v3, 2.0, 4.0 927; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 928; VI-GISEL-NEXT: s_endpgm 929; 930; GFX9-LABEL: v_test_fmed3_r_i_i_no_nans_f32: 931; GFX9: ; %bb.0: 932; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 933; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 934; GFX9-NEXT: s_waitcnt lgkmcnt(0) 935; GFX9-NEXT: global_load_dword v1, v0, s[2:3] 936; GFX9-NEXT: s_waitcnt vmcnt(0) 937; GFX9-NEXT: v_med3_f32 v1, v1, 2.0, 4.0 938; GFX9-NEXT: global_store_dword v0, v1, s[0:1] 939; GFX9-NEXT: s_endpgm 940; 941; GFX11-LABEL: v_test_fmed3_r_i_i_no_nans_f32: 942; GFX11: ; %bb.0: 943; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 944; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 945; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 946; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 947; GFX11-NEXT: s_waitcnt lgkmcnt(0) 948; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] 949; GFX11-NEXT: s_waitcnt vmcnt(0) 950; GFX11-NEXT: v_med3_f32 v1, v1, 2.0, 4.0 951; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 952; GFX11-NEXT: s_endpgm 953 %tid = call i32 @llvm.amdgcn.workitem.id.x() 954 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid 955 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid 956 %a = load float, ptr addrspace(1) %gep0 957 958 %max = call float @llvm.maxnum.f32(float %a, float 2.0) 959 %med = call float @llvm.minnum.f32(float %max, float 4.0) 960 961 store float %med, ptr addrspace(1) %outgep 962 ret void 963} 964 965define amdgpu_kernel void @v_test_legacy_fmed3_r_i_i_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 { 966; SI-SDAG-LABEL: v_test_legacy_fmed3_r_i_i_f32: 967; SI-SDAG: ; %bb.0: 968; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 969; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 970; SI-SDAG-NEXT: s_mov_b32 s6, 0 971; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 972; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 973; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 974; SI-SDAG-NEXT: s_mov_b64 s[4:5], s[2:3] 975; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 976; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[6:7] 977; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 978; SI-SDAG-NEXT: v_add_f32_e32 v2, 1.0, v2 979; SI-SDAG-NEXT: v_med3_f32 v2, v2, 2.0, 4.0 980; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 981; SI-SDAG-NEXT: s_endpgm 982; 983; SI-GISEL-LABEL: v_test_legacy_fmed3_r_i_i_f32: 984; SI-GISEL: ; %bb.0: 985; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 986; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 987; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 988; SI-GISEL-NEXT: s_mov_b32 s6, 0 989; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 990; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 991; SI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3] 992; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 993; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 994; SI-GISEL-NEXT: v_add_f32_e32 v2, 1.0, v2 995; SI-GISEL-NEXT: v_max_legacy_f32_e64 v2, v2, 2.0 996; SI-GISEL-NEXT: v_min_legacy_f32_e64 v2, v2, 4.0 997; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7] 998; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 999; SI-GISEL-NEXT: s_endpgm 1000; 1001; VI-SDAG-LABEL: v_test_legacy_fmed3_r_i_i_f32: 1002; VI-SDAG: ; %bb.0: 1003; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1004; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0 1005; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1006; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 1007; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v2 1008; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1009; VI-SDAG-NEXT: flat_load_dword v3, v[0:1] 1010; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 1011; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v2 1012; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1013; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 1014; VI-SDAG-NEXT: v_add_f32_e32 v2, 1.0, v3 1015; VI-SDAG-NEXT: v_med3_f32 v2, v2, 2.0, 4.0 1016; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 1017; VI-SDAG-NEXT: s_endpgm 1018; 1019; VI-GISEL-LABEL: v_test_legacy_fmed3_r_i_i_f32: 1020; VI-GISEL: ; %bb.0: 1021; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1022; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v0 1023; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1024; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 1025; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 1026; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 1027; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1028; VI-GISEL-NEXT: flat_load_dword v3, v[0:1] 1029; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 1030; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 1031; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 1032; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1033; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 1034; VI-GISEL-NEXT: v_add_f32_e32 v2, 1.0, v3 1035; VI-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, 2.0, v2 1036; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, 2.0, vcc 1037; VI-GISEL-NEXT: v_cmp_ngt_f32_e32 vcc, 4.0, v2 1038; VI-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, 4.0, vcc 1039; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 1040; VI-GISEL-NEXT: s_endpgm 1041; 1042; GFX9-SDAG-LABEL: v_test_legacy_fmed3_r_i_i_f32: 1043; GFX9-SDAG: ; %bb.0: 1044; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1045; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1046; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1047; GFX9-SDAG-NEXT: global_load_dword v1, v0, s[2:3] 1048; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 1049; GFX9-SDAG-NEXT: v_add_f32_e32 v1, 1.0, v1 1050; GFX9-SDAG-NEXT: v_med3_f32 v1, v1, 2.0, 4.0 1051; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1] 1052; GFX9-SDAG-NEXT: s_endpgm 1053; 1054; GFX9-GISEL-LABEL: v_test_legacy_fmed3_r_i_i_f32: 1055; GFX9-GISEL: ; %bb.0: 1056; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1057; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1058; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1059; GFX9-GISEL-NEXT: global_load_dword v1, v0, s[2:3] 1060; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 1061; GFX9-GISEL-NEXT: v_add_f32_e32 v1, 1.0, v1 1062; GFX9-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc, 2.0, v1 1063; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 2.0, vcc 1064; GFX9-GISEL-NEXT: v_cmp_ngt_f32_e32 vcc, 4.0, v1 1065; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 4.0, vcc 1066; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[0:1] 1067; GFX9-GISEL-NEXT: s_endpgm 1068; 1069; GFX11-SDAG-LABEL: v_test_legacy_fmed3_r_i_i_f32: 1070; GFX11-SDAG: ; %bb.0: 1071; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1072; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 1073; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_1) 1074; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1075; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1076; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3] 1077; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 1078; GFX11-SDAG-NEXT: v_add_f32_e32 v1, 1.0, v1 1079; GFX11-SDAG-NEXT: v_med3_f32 v1, v1, 2.0, 4.0 1080; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1] 1081; GFX11-SDAG-NEXT: s_endpgm 1082; 1083; GFX11-GISEL-LABEL: v_test_legacy_fmed3_r_i_i_f32: 1084; GFX11-GISEL: ; %bb.0: 1085; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1086; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 1087; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_1) 1088; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1089; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1090; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3] 1091; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 1092; GFX11-GISEL-NEXT: v_add_f32_e32 v1, 1.0, v1 1093; GFX11-GISEL-NEXT: v_cmp_nlt_f32_e32 vcc_lo, 2.0, v1 1094; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 2.0, vcc_lo 1095; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 1096; GFX11-GISEL-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 4.0, v1 1097; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, v1, 4.0, vcc_lo 1098; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1] 1099; GFX11-GISEL-NEXT: s_endpgm 1100 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1101 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid 1102 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid 1103 %a = load float, ptr addrspace(1) %gep0 1104 %a.nnan = fadd nnan float %a, 1.0 1105 1106 ; fmax_legacy 1107 %cmp0 = fcmp ule float %a.nnan, 2.0 1108 %max = select i1 %cmp0, float 2.0, float %a.nnan 1109 1110 ; fmin_legacy 1111 %cmp1 = fcmp uge float %max, 4.0 1112 %med = select i1 %cmp1, float 4.0, float %max 1113 1114 store float %med, ptr addrspace(1) %outgep 1115 ret void 1116} 1117 1118define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 { 1119; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod0: 1120; SI-SDAG: ; %bb.0: 1121; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 1122; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000 1123; SI-SDAG-NEXT: s_mov_b32 s10, 0 1124; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1125; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 1126; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11] 1127; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11] 1128; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1129; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3] 1130; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5] 1131; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7] 1132; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 1133; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 1134; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc 1135; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 1136; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc 1137; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 1138; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11] 1139; SI-SDAG-NEXT: v_med3_f32 v2, -v2, v3, v4 1140; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 1141; SI-SDAG-NEXT: s_endpgm 1142; 1143; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod0: 1144; SI-GISEL: ; %bb.0: 1145; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 1146; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1147; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 1148; SI-GISEL-NEXT: s_mov_b32 s10, 0 1149; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000 1150; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1151; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3] 1152; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 1153; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 1154; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5] 1155; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc 1156; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 1157; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7] 1158; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc 1159; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 1160; SI-GISEL-NEXT: v_mul_f32_e32 v2, -1.0, v2 1161; SI-GISEL-NEXT: v_med3_f32 v2, v2, v3, v4 1162; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11] 1163; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 1164; SI-GISEL-NEXT: s_endpgm 1165; 1166; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod0: 1167; VI-SDAG: ; %bb.0: 1168; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 1169; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0 1170; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1171; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 1172; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6 1173; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1174; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5 1175; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6 1176; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 1177; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7 1178; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6 1179; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 1180; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc 1181; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 1182; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc 1183; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 1184; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc 1185; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 1186; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 1187; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6 1188; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1189; VI-SDAG-NEXT: v_med3_f32 v2, -v7, v2, v3 1190; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 1191; VI-SDAG-NEXT: s_endpgm 1192; 1193; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod0: 1194; VI-GISEL: ; %bb.0: 1195; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 1196; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0 1197; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1198; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 1199; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 1200; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 1201; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1202; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4 1203; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5 1204; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6 1205; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 1206; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6 1207; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7 1208; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6 1209; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 1210; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc 1211; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 1212; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc 1213; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 1214; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc 1215; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 1216; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 1217; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 1218; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 1219; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1220; VI-GISEL-NEXT: v_mul_f32_e32 v4, -1.0, v7 1221; VI-GISEL-NEXT: v_med3_f32 v2, v4, v2, v3 1222; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 1223; VI-GISEL-NEXT: s_endpgm 1224; 1225; GFX9-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod0: 1226; GFX9-SDAG: ; %bb.0: 1227; GFX9-SDAG-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 1228; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1229; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1230; GFX9-SDAG-NEXT: global_load_dword v1, v0, s[10:11] glc 1231; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 1232; GFX9-SDAG-NEXT: global_load_dword v2, v0, s[12:13] glc 1233; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 1234; GFX9-SDAG-NEXT: global_load_dword v3, v0, s[14:15] glc 1235; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 1236; GFX9-SDAG-NEXT: v_med3_f32 v1, -v1, v2, v3 1237; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[8:9] 1238; GFX9-SDAG-NEXT: s_endpgm 1239; 1240; GFX9-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod0: 1241; GFX9-GISEL: ; %bb.0: 1242; GFX9-GISEL-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 1243; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1244; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1245; GFX9-GISEL-NEXT: global_load_dword v1, v0, s[10:11] glc 1246; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 1247; GFX9-GISEL-NEXT: global_load_dword v2, v0, s[12:13] glc 1248; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 1249; GFX9-GISEL-NEXT: global_load_dword v3, v0, s[14:15] glc 1250; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 1251; GFX9-GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1 1252; GFX9-GISEL-NEXT: v_med3_f32 v1, v1, v2, v3 1253; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[8:9] 1254; GFX9-GISEL-NEXT: s_endpgm 1255; 1256; GFX11-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod0: 1257; GFX11-SDAG: ; %bb.0: 1258; GFX11-SDAG-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 1259; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 1260; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 1261; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1262; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1263; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc 1264; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 1265; GFX11-SDAG-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc 1266; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 1267; GFX11-SDAG-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc 1268; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 1269; GFX11-SDAG-NEXT: v_med3_f32 v1, -v1, v2, v3 1270; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1] 1271; GFX11-SDAG-NEXT: s_endpgm 1272; 1273; GFX11-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod0: 1274; GFX11-GISEL: ; %bb.0: 1275; GFX11-GISEL-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 1276; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 1277; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 1278; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1279; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1280; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc 1281; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 1282; GFX11-GISEL-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc 1283; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 1284; GFX11-GISEL-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc 1285; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 1286; GFX11-GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1 1287; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 1288; GFX11-GISEL-NEXT: v_med3_f32 v1, v1, v2, v3 1289; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1] 1290; GFX11-GISEL-NEXT: s_endpgm 1291 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1292 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid 1293 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid 1294 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid 1295 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid 1296 %a = load volatile float, ptr addrspace(1) %gep0 1297 %b = load volatile float, ptr addrspace(1) %gep1 1298 %c = load volatile float, ptr addrspace(1) %gep2 1299 %a.fneg = fsub float -0.0, %a 1300 %tmp0 = call float @llvm.minnum.f32(float %a.fneg, float %b) 1301 %tmp1 = call float @llvm.maxnum.f32(float %a.fneg, float %b) 1302 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c) 1303 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2) 1304 store float %med3, ptr addrspace(1) %outgep 1305 ret void 1306} 1307 1308define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod1(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 { 1309; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod1: 1310; SI-SDAG: ; %bb.0: 1311; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 1312; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000 1313; SI-SDAG-NEXT: s_mov_b32 s10, 0 1314; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1315; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 1316; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11] 1317; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11] 1318; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1319; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3] 1320; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5] 1321; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7] 1322; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 1323; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 1324; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc 1325; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 1326; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc 1327; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 1328; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11] 1329; SI-SDAG-NEXT: v_med3_f32 v2, v2, -v3, v4 1330; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 1331; SI-SDAG-NEXT: s_endpgm 1332; 1333; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod1: 1334; SI-GISEL: ; %bb.0: 1335; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 1336; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1337; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 1338; SI-GISEL-NEXT: s_mov_b32 s10, 0 1339; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000 1340; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1341; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3] 1342; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 1343; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 1344; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5] 1345; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc 1346; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 1347; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7] 1348; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc 1349; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 1350; SI-GISEL-NEXT: v_mul_f32_e32 v3, -1.0, v3 1351; SI-GISEL-NEXT: v_med3_f32 v2, v2, v3, v4 1352; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11] 1353; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 1354; SI-GISEL-NEXT: s_endpgm 1355; 1356; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod1: 1357; VI-SDAG: ; %bb.0: 1358; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 1359; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0 1360; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1361; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 1362; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6 1363; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1364; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5 1365; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6 1366; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 1367; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7 1368; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6 1369; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 1370; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc 1371; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 1372; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc 1373; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 1374; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc 1375; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 1376; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 1377; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6 1378; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1379; VI-SDAG-NEXT: v_med3_f32 v2, v7, -v2, v3 1380; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 1381; VI-SDAG-NEXT: s_endpgm 1382; 1383; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod1: 1384; VI-GISEL: ; %bb.0: 1385; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 1386; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0 1387; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1388; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 1389; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 1390; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 1391; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1392; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4 1393; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5 1394; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6 1395; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 1396; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6 1397; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7 1398; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6 1399; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 1400; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc 1401; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 1402; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc 1403; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 1404; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc 1405; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 1406; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 1407; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 1408; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 1409; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1410; VI-GISEL-NEXT: v_mul_f32_e32 v2, -1.0, v2 1411; VI-GISEL-NEXT: v_med3_f32 v2, v7, v2, v3 1412; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 1413; VI-GISEL-NEXT: s_endpgm 1414; 1415; GFX9-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod1: 1416; GFX9-SDAG: ; %bb.0: 1417; GFX9-SDAG-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 1418; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1419; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1420; GFX9-SDAG-NEXT: global_load_dword v1, v0, s[10:11] glc 1421; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 1422; GFX9-SDAG-NEXT: global_load_dword v2, v0, s[12:13] glc 1423; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 1424; GFX9-SDAG-NEXT: global_load_dword v3, v0, s[14:15] glc 1425; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 1426; GFX9-SDAG-NEXT: v_med3_f32 v1, v1, -v2, v3 1427; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[8:9] 1428; GFX9-SDAG-NEXT: s_endpgm 1429; 1430; GFX9-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod1: 1431; GFX9-GISEL: ; %bb.0: 1432; GFX9-GISEL-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 1433; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1434; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1435; GFX9-GISEL-NEXT: global_load_dword v1, v0, s[10:11] glc 1436; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 1437; GFX9-GISEL-NEXT: global_load_dword v2, v0, s[12:13] glc 1438; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 1439; GFX9-GISEL-NEXT: global_load_dword v3, v0, s[14:15] glc 1440; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 1441; GFX9-GISEL-NEXT: v_max_f32_e64 v2, -v2, -v2 1442; GFX9-GISEL-NEXT: v_med3_f32 v1, v1, v2, v3 1443; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[8:9] 1444; GFX9-GISEL-NEXT: s_endpgm 1445; 1446; GFX11-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod1: 1447; GFX11-SDAG: ; %bb.0: 1448; GFX11-SDAG-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 1449; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 1450; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 1451; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1452; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1453; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc 1454; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 1455; GFX11-SDAG-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc 1456; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 1457; GFX11-SDAG-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc 1458; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 1459; GFX11-SDAG-NEXT: v_med3_f32 v1, v1, -v2, v3 1460; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1] 1461; GFX11-SDAG-NEXT: s_endpgm 1462; 1463; GFX11-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod1: 1464; GFX11-GISEL: ; %bb.0: 1465; GFX11-GISEL-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 1466; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 1467; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 1468; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1469; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1470; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc 1471; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 1472; GFX11-GISEL-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc 1473; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 1474; GFX11-GISEL-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc 1475; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 1476; GFX11-GISEL-NEXT: v_max_f32_e64 v2, -v2, -v2 1477; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 1478; GFX11-GISEL-NEXT: v_med3_f32 v1, v1, v2, v3 1479; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1] 1480; GFX11-GISEL-NEXT: s_endpgm 1481 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1482 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid 1483 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid 1484 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid 1485 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid 1486 %a = load volatile float, ptr addrspace(1) %gep0 1487 %b = load volatile float, ptr addrspace(1) %gep1 1488 %c = load volatile float, ptr addrspace(1) %gep2 1489 %b.fneg = fsub float -0.0, %b 1490 %tmp0 = call float @llvm.minnum.f32(float %a, float %b.fneg) 1491 %tmp1 = call float @llvm.maxnum.f32(float %a, float %b.fneg) 1492 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c) 1493 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2) 1494 store float %med3, ptr addrspace(1) %outgep 1495 ret void 1496} 1497 1498define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod2(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 { 1499; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod2: 1500; SI-SDAG: ; %bb.0: 1501; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 1502; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000 1503; SI-SDAG-NEXT: s_mov_b32 s10, 0 1504; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1505; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 1506; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11] 1507; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11] 1508; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1509; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3] 1510; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5] 1511; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7] 1512; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 1513; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 1514; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc 1515; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 1516; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc 1517; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 1518; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11] 1519; SI-SDAG-NEXT: v_med3_f32 v2, v2, v3, -v4 1520; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 1521; SI-SDAG-NEXT: s_endpgm 1522; 1523; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod2: 1524; SI-GISEL: ; %bb.0: 1525; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 1526; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1527; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 1528; SI-GISEL-NEXT: s_mov_b32 s10, 0 1529; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000 1530; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1531; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3] 1532; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 1533; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 1534; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5] 1535; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc 1536; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 1537; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7] 1538; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc 1539; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 1540; SI-GISEL-NEXT: v_mul_f32_e32 v4, -1.0, v4 1541; SI-GISEL-NEXT: v_med3_f32 v2, v2, v3, v4 1542; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11] 1543; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 1544; SI-GISEL-NEXT: s_endpgm 1545; 1546; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod2: 1547; VI-SDAG: ; %bb.0: 1548; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 1549; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0 1550; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1551; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 1552; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6 1553; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1554; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5 1555; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6 1556; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 1557; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7 1558; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6 1559; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 1560; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc 1561; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 1562; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc 1563; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 1564; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc 1565; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 1566; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 1567; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6 1568; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1569; VI-SDAG-NEXT: v_med3_f32 v2, v7, v2, -v3 1570; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 1571; VI-SDAG-NEXT: s_endpgm 1572; 1573; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod2: 1574; VI-GISEL: ; %bb.0: 1575; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 1576; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0 1577; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1578; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 1579; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 1580; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 1581; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1582; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4 1583; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5 1584; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6 1585; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 1586; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6 1587; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7 1588; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6 1589; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 1590; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc 1591; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 1592; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc 1593; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 1594; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc 1595; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 1596; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 1597; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 1598; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 1599; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1600; VI-GISEL-NEXT: v_mul_f32_e32 v3, -1.0, v3 1601; VI-GISEL-NEXT: v_med3_f32 v2, v7, v2, v3 1602; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 1603; VI-GISEL-NEXT: s_endpgm 1604; 1605; GFX9-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod2: 1606; GFX9-SDAG: ; %bb.0: 1607; GFX9-SDAG-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 1608; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1609; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1610; GFX9-SDAG-NEXT: global_load_dword v1, v0, s[10:11] glc 1611; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 1612; GFX9-SDAG-NEXT: global_load_dword v2, v0, s[12:13] glc 1613; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 1614; GFX9-SDAG-NEXT: global_load_dword v3, v0, s[14:15] glc 1615; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 1616; GFX9-SDAG-NEXT: v_med3_f32 v1, v1, v2, -v3 1617; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[8:9] 1618; GFX9-SDAG-NEXT: s_endpgm 1619; 1620; GFX9-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod2: 1621; GFX9-GISEL: ; %bb.0: 1622; GFX9-GISEL-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 1623; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1624; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1625; GFX9-GISEL-NEXT: global_load_dword v1, v0, s[10:11] glc 1626; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 1627; GFX9-GISEL-NEXT: global_load_dword v2, v0, s[12:13] glc 1628; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 1629; GFX9-GISEL-NEXT: global_load_dword v3, v0, s[14:15] glc 1630; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 1631; GFX9-GISEL-NEXT: v_max_f32_e64 v3, -v3, -v3 1632; GFX9-GISEL-NEXT: v_med3_f32 v1, v1, v2, v3 1633; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[8:9] 1634; GFX9-GISEL-NEXT: s_endpgm 1635; 1636; GFX11-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod2: 1637; GFX11-SDAG: ; %bb.0: 1638; GFX11-SDAG-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 1639; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 1640; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 1641; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1642; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1643; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc 1644; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 1645; GFX11-SDAG-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc 1646; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 1647; GFX11-SDAG-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc 1648; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 1649; GFX11-SDAG-NEXT: v_med3_f32 v1, v1, v2, -v3 1650; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1] 1651; GFX11-SDAG-NEXT: s_endpgm 1652; 1653; GFX11-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod2: 1654; GFX11-GISEL: ; %bb.0: 1655; GFX11-GISEL-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 1656; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 1657; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 1658; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1659; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1660; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc 1661; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 1662; GFX11-GISEL-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc 1663; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 1664; GFX11-GISEL-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc 1665; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 1666; GFX11-GISEL-NEXT: v_max_f32_e64 v3, -v3, -v3 1667; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 1668; GFX11-GISEL-NEXT: v_med3_f32 v1, v1, v2, v3 1669; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1] 1670; GFX11-GISEL-NEXT: s_endpgm 1671 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1672 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid 1673 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid 1674 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid 1675 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid 1676 %a = load volatile float, ptr addrspace(1) %gep0 1677 %b = load volatile float, ptr addrspace(1) %gep1 1678 %c = load volatile float, ptr addrspace(1) %gep2 1679 %c.fneg = fsub float -0.0, %c 1680 %tmp0 = call float @llvm.minnum.f32(float %a, float %b) 1681 %tmp1 = call float @llvm.maxnum.f32(float %a, float %b) 1682 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.fneg) 1683 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2) 1684 store float %med3, ptr addrspace(1) %outgep 1685 ret void 1686} 1687 1688define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod012(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 { 1689; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod012: 1690; SI-SDAG: ; %bb.0: 1691; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 1692; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000 1693; SI-SDAG-NEXT: s_mov_b32 s10, 0 1694; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1695; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 1696; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11] 1697; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11] 1698; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1699; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3] 1700; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5] 1701; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7] 1702; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 1703; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 1704; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc 1705; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 1706; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc 1707; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 1708; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11] 1709; SI-SDAG-NEXT: v_med3_f32 v2, -v2, |v3|, -|v4| 1710; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 1711; SI-SDAG-NEXT: s_endpgm 1712; 1713; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod012: 1714; SI-GISEL: ; %bb.0: 1715; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 1716; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1717; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 1718; SI-GISEL-NEXT: s_mov_b32 s10, 0 1719; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000 1720; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1721; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3] 1722; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 1723; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 1724; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5] 1725; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc 1726; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 1727; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7] 1728; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc 1729; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 1730; SI-GISEL-NEXT: v_mul_f32_e32 v2, -1.0, v2 1731; SI-GISEL-NEXT: v_mul_f32_e64 v4, -1.0, |v4| 1732; SI-GISEL-NEXT: v_med3_f32 v2, v2, |v3|, v4 1733; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11] 1734; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 1735; SI-GISEL-NEXT: s_endpgm 1736; 1737; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod012: 1738; VI-SDAG: ; %bb.0: 1739; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 1740; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0 1741; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1742; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 1743; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6 1744; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1745; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5 1746; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6 1747; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 1748; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7 1749; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6 1750; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 1751; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc 1752; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 1753; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc 1754; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 1755; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc 1756; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 1757; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 1758; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6 1759; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1760; VI-SDAG-NEXT: v_med3_f32 v2, -v7, |v2|, -|v3| 1761; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 1762; VI-SDAG-NEXT: s_endpgm 1763; 1764; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod012: 1765; VI-GISEL: ; %bb.0: 1766; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 1767; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0 1768; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1769; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 1770; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 1771; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 1772; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1773; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4 1774; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5 1775; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6 1776; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 1777; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6 1778; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7 1779; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6 1780; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 1781; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc 1782; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 1783; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc 1784; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 1785; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc 1786; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 1787; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 1788; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 1789; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 1790; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1791; VI-GISEL-NEXT: v_mul_f32_e32 v4, -1.0, v7 1792; VI-GISEL-NEXT: v_mul_f32_e64 v3, -1.0, |v3| 1793; VI-GISEL-NEXT: v_med3_f32 v2, v4, |v2|, v3 1794; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 1795; VI-GISEL-NEXT: s_endpgm 1796; 1797; GFX9-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod012: 1798; GFX9-SDAG: ; %bb.0: 1799; GFX9-SDAG-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 1800; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1801; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1802; GFX9-SDAG-NEXT: global_load_dword v1, v0, s[10:11] glc 1803; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 1804; GFX9-SDAG-NEXT: global_load_dword v2, v0, s[12:13] glc 1805; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 1806; GFX9-SDAG-NEXT: global_load_dword v3, v0, s[14:15] glc 1807; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 1808; GFX9-SDAG-NEXT: v_med3_f32 v1, -v1, |v2|, -|v3| 1809; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[8:9] 1810; GFX9-SDAG-NEXT: s_endpgm 1811; 1812; GFX9-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod012: 1813; GFX9-GISEL: ; %bb.0: 1814; GFX9-GISEL-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 1815; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1816; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1817; GFX9-GISEL-NEXT: global_load_dword v1, v0, s[10:11] glc 1818; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 1819; GFX9-GISEL-NEXT: global_load_dword v2, v0, s[12:13] glc 1820; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 1821; GFX9-GISEL-NEXT: global_load_dword v3, v0, s[14:15] glc 1822; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 1823; GFX9-GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1 1824; GFX9-GISEL-NEXT: v_max_f32_e64 v3, -|v3|, -|v3| 1825; GFX9-GISEL-NEXT: v_med3_f32 v1, v1, |v2|, v3 1826; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[8:9] 1827; GFX9-GISEL-NEXT: s_endpgm 1828; 1829; GFX11-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod012: 1830; GFX11-SDAG: ; %bb.0: 1831; GFX11-SDAG-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 1832; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 1833; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 1834; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1835; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1836; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc 1837; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 1838; GFX11-SDAG-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc 1839; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 1840; GFX11-SDAG-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc 1841; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 1842; GFX11-SDAG-NEXT: v_med3_f32 v1, -v1, |v2|, -|v3| 1843; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1] 1844; GFX11-SDAG-NEXT: s_endpgm 1845; 1846; GFX11-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod012: 1847; GFX11-GISEL: ; %bb.0: 1848; GFX11-GISEL-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 1849; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 1850; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 1851; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1852; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1853; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc 1854; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 1855; GFX11-GISEL-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc 1856; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 1857; GFX11-GISEL-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc 1858; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 1859; GFX11-GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1 1860; GFX11-GISEL-NEXT: v_max_f32_e64 v3, -|v3|, -|v3| 1861; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 1862; GFX11-GISEL-NEXT: v_med3_f32 v1, v1, |v2|, v3 1863; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1] 1864; GFX11-GISEL-NEXT: s_endpgm 1865 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1866 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid 1867 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid 1868 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid 1869 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid 1870 %a = load volatile float, ptr addrspace(1) %gep0 1871 %b = load volatile float, ptr addrspace(1) %gep1 1872 %c = load volatile float, ptr addrspace(1) %gep2 1873 1874 %a.fneg = fsub float -0.0, %a 1875 %b.fabs = call float @llvm.fabs.f32(float %b) 1876 %c.fabs = call float @llvm.fabs.f32(float %c) 1877 %c.fabs.fneg = fsub float -0.0, %c.fabs 1878 1879 %tmp0 = call float @llvm.minnum.f32(float %a.fneg, float %b.fabs) 1880 %tmp1 = call float @llvm.maxnum.f32(float %a.fneg, float %b.fabs) 1881 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.fabs.fneg) 1882 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2) 1883 1884 store float %med3, ptr addrspace(1) %outgep 1885 ret void 1886} 1887 1888define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_negabs012(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 { 1889; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_negabs012: 1890; SI-SDAG: ; %bb.0: 1891; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 1892; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000 1893; SI-SDAG-NEXT: s_mov_b32 s10, 0 1894; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1895; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 1896; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11] 1897; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11] 1898; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1899; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3] 1900; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5] 1901; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7] 1902; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 1903; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 1904; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc 1905; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 1906; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc 1907; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 1908; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11] 1909; SI-SDAG-NEXT: v_med3_f32 v2, -|v2|, -|v3|, -|v4| 1910; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 1911; SI-SDAG-NEXT: s_endpgm 1912; 1913; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_negabs012: 1914; SI-GISEL: ; %bb.0: 1915; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 1916; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 1917; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 1918; SI-GISEL-NEXT: s_mov_b32 s10, 0 1919; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000 1920; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1921; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3] 1922; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 1923; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 1924; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5] 1925; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc 1926; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 1927; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7] 1928; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc 1929; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 1930; SI-GISEL-NEXT: v_mul_f32_e64 v2, -1.0, |v2| 1931; SI-GISEL-NEXT: v_mul_f32_e64 v3, -1.0, |v3| 1932; SI-GISEL-NEXT: v_mul_f32_e64 v4, -1.0, |v4| 1933; SI-GISEL-NEXT: v_med3_f32 v2, v2, v3, v4 1934; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11] 1935; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 1936; SI-GISEL-NEXT: s_endpgm 1937; 1938; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_negabs012: 1939; VI-SDAG: ; %bb.0: 1940; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 1941; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0 1942; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1943; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 1944; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6 1945; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1946; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5 1947; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6 1948; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 1949; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7 1950; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6 1951; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 1952; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc 1953; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 1954; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc 1955; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 1956; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc 1957; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 1958; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 1959; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6 1960; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1961; VI-SDAG-NEXT: v_med3_f32 v2, -|v7|, -|v2|, -|v3| 1962; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 1963; VI-SDAG-NEXT: s_endpgm 1964; 1965; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_negabs012: 1966; VI-GISEL: ; %bb.0: 1967; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 1968; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0 1969; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1970; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 1971; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 1972; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 1973; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1974; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4 1975; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5 1976; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6 1977; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 1978; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6 1979; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7 1980; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6 1981; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 1982; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc 1983; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 1984; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc 1985; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 1986; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc 1987; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 1988; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 1989; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 1990; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 1991; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 1992; VI-GISEL-NEXT: v_mul_f32_e64 v4, -1.0, |v7| 1993; VI-GISEL-NEXT: v_mul_f32_e64 v2, -1.0, |v2| 1994; VI-GISEL-NEXT: v_mul_f32_e64 v3, -1.0, |v3| 1995; VI-GISEL-NEXT: v_med3_f32 v2, v4, v2, v3 1996; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 1997; VI-GISEL-NEXT: s_endpgm 1998; 1999; GFX9-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_negabs012: 2000; GFX9-SDAG: ; %bb.0: 2001; GFX9-SDAG-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 2002; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 2003; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2004; GFX9-SDAG-NEXT: global_load_dword v1, v0, s[10:11] glc 2005; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 2006; GFX9-SDAG-NEXT: global_load_dword v2, v0, s[12:13] glc 2007; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 2008; GFX9-SDAG-NEXT: global_load_dword v3, v0, s[14:15] glc 2009; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 2010; GFX9-SDAG-NEXT: v_med3_f32 v1, -|v1|, -|v2|, -|v3| 2011; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[8:9] 2012; GFX9-SDAG-NEXT: s_endpgm 2013; 2014; GFX9-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_negabs012: 2015; GFX9-GISEL: ; %bb.0: 2016; GFX9-GISEL-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 2017; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 2018; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2019; GFX9-GISEL-NEXT: global_load_dword v1, v0, s[10:11] glc 2020; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 2021; GFX9-GISEL-NEXT: global_load_dword v2, v0, s[12:13] glc 2022; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 2023; GFX9-GISEL-NEXT: global_load_dword v3, v0, s[14:15] glc 2024; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 2025; GFX9-GISEL-NEXT: v_max_f32_e64 v1, -|v1|, -|v1| 2026; GFX9-GISEL-NEXT: v_max_f32_e64 v2, -|v2|, -|v2| 2027; GFX9-GISEL-NEXT: v_max_f32_e64 v3, -|v3|, -|v3| 2028; GFX9-GISEL-NEXT: v_med3_f32 v1, v1, v2, v3 2029; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[8:9] 2030; GFX9-GISEL-NEXT: s_endpgm 2031; 2032; GFX11-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_negabs012: 2033; GFX11-SDAG: ; %bb.0: 2034; GFX11-SDAG-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 2035; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 2036; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 2037; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 2038; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2039; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc 2040; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 2041; GFX11-SDAG-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc 2042; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 2043; GFX11-SDAG-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc 2044; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 2045; GFX11-SDAG-NEXT: v_med3_f32 v1, -|v1|, -|v2|, -|v3| 2046; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1] 2047; GFX11-SDAG-NEXT: s_endpgm 2048; 2049; GFX11-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_negabs012: 2050; GFX11-GISEL: ; %bb.0: 2051; GFX11-GISEL-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 2052; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 2053; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 2054; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 2055; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2056; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc 2057; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 2058; GFX11-GISEL-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc 2059; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 2060; GFX11-GISEL-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc 2061; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 2062; GFX11-GISEL-NEXT: v_max_f32_e64 v1, -|v1|, -|v1| 2063; GFX11-GISEL-NEXT: v_max_f32_e64 v2, -|v2|, -|v2| 2064; GFX11-GISEL-NEXT: v_max_f32_e64 v3, -|v3|, -|v3| 2065; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 2066; GFX11-GISEL-NEXT: v_med3_f32 v1, v1, v2, v3 2067; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1] 2068; GFX11-GISEL-NEXT: s_endpgm 2069 %tid = call i32 @llvm.amdgcn.workitem.id.x() 2070 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid 2071 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid 2072 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid 2073 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid 2074 %a = load volatile float, ptr addrspace(1) %gep0 2075 %b = load volatile float, ptr addrspace(1) %gep1 2076 %c = load volatile float, ptr addrspace(1) %gep2 2077 2078 %a.fabs = call float @llvm.fabs.f32(float %a) 2079 %a.fabs.fneg = fsub float -0.0, %a.fabs 2080 %b.fabs = call float @llvm.fabs.f32(float %b) 2081 %b.fabs.fneg = fsub float -0.0, %b.fabs 2082 %c.fabs = call float @llvm.fabs.f32(float %c) 2083 %c.fabs.fneg = fsub float -0.0, %c.fabs 2084 2085 %tmp0 = call float @llvm.minnum.f32(float %a.fabs.fneg, float %b.fabs.fneg) 2086 %tmp1 = call float @llvm.maxnum.f32(float %a.fabs.fneg, float %b.fabs.fneg) 2087 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.fabs.fneg) 2088 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2) 2089 2090 store float %med3, ptr addrspace(1) %outgep 2091 ret void 2092} 2093 2094define amdgpu_kernel void @v_nnan_inputs_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 { 2095; SI-SDAG-LABEL: v_nnan_inputs_med3_f32_pat0: 2096; SI-SDAG: ; %bb.0: 2097; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 2098; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000 2099; SI-SDAG-NEXT: s_mov_b32 s10, 0 2100; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 2101; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 2102; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11] 2103; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11] 2104; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2105; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3] 2106; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5] 2107; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7] 2108; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 2109; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 2110; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc 2111; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 2112; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc 2113; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 2114; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11] 2115; SI-SDAG-NEXT: v_add_f32_e32 v2, 1.0, v2 2116; SI-SDAG-NEXT: v_add_f32_e32 v3, 2.0, v3 2117; SI-SDAG-NEXT: v_add_f32_e32 v4, 4.0, v4 2118; SI-SDAG-NEXT: v_med3_f32 v2, v2, v3, v4 2119; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 2120; SI-SDAG-NEXT: s_endpgm 2121; 2122; SI-GISEL-LABEL: v_nnan_inputs_med3_f32_pat0: 2123; SI-GISEL: ; %bb.0: 2124; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 2125; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 2126; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 2127; SI-GISEL-NEXT: s_mov_b32 s10, 0 2128; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000 2129; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2130; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3] 2131; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 2132; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 2133; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5] 2134; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc 2135; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 2136; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7] 2137; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc 2138; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 2139; SI-GISEL-NEXT: v_add_f32_e32 v2, 1.0, v2 2140; SI-GISEL-NEXT: v_add_f32_e32 v3, 2.0, v3 2141; SI-GISEL-NEXT: v_add_f32_e32 v4, 4.0, v4 2142; SI-GISEL-NEXT: v_med3_f32 v2, v2, v3, v4 2143; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11] 2144; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 2145; SI-GISEL-NEXT: s_endpgm 2146; 2147; VI-SDAG-LABEL: v_nnan_inputs_med3_f32_pat0: 2148; VI-SDAG: ; %bb.0: 2149; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 2150; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0 2151; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2152; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 2153; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6 2154; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2155; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5 2156; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6 2157; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 2158; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7 2159; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6 2160; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 2161; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc 2162; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 2163; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc 2164; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 2165; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc 2166; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 2167; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 2168; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6 2169; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2170; VI-SDAG-NEXT: v_add_f32_e32 v4, 1.0, v7 2171; VI-SDAG-NEXT: v_add_f32_e32 v2, 2.0, v2 2172; VI-SDAG-NEXT: v_add_f32_e32 v3, 4.0, v3 2173; VI-SDAG-NEXT: v_med3_f32 v2, v4, v2, v3 2174; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 2175; VI-SDAG-NEXT: s_endpgm 2176; 2177; VI-GISEL-LABEL: v_nnan_inputs_med3_f32_pat0: 2178; VI-GISEL: ; %bb.0: 2179; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 2180; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0 2181; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2182; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 2183; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 2184; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 2185; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2186; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4 2187; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5 2188; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6 2189; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 2190; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6 2191; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7 2192; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6 2193; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 2194; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc 2195; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 2196; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc 2197; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 2198; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc 2199; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 2200; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 2201; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 2202; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 2203; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2204; VI-GISEL-NEXT: v_add_f32_e32 v4, 1.0, v7 2205; VI-GISEL-NEXT: v_add_f32_e32 v2, 2.0, v2 2206; VI-GISEL-NEXT: v_add_f32_e32 v3, 4.0, v3 2207; VI-GISEL-NEXT: v_med3_f32 v2, v4, v2, v3 2208; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 2209; VI-GISEL-NEXT: s_endpgm 2210; 2211; GFX9-LABEL: v_nnan_inputs_med3_f32_pat0: 2212; GFX9: ; %bb.0: 2213; GFX9-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 2214; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 2215; GFX9-NEXT: s_waitcnt lgkmcnt(0) 2216; GFX9-NEXT: global_load_dword v1, v0, s[10:11] glc 2217; GFX9-NEXT: s_waitcnt vmcnt(0) 2218; GFX9-NEXT: global_load_dword v2, v0, s[12:13] glc 2219; GFX9-NEXT: s_waitcnt vmcnt(0) 2220; GFX9-NEXT: global_load_dword v3, v0, s[14:15] glc 2221; GFX9-NEXT: s_waitcnt vmcnt(0) 2222; GFX9-NEXT: v_add_f32_e32 v1, 1.0, v1 2223; GFX9-NEXT: v_add_f32_e32 v2, 2.0, v2 2224; GFX9-NEXT: v_add_f32_e32 v3, 4.0, v3 2225; GFX9-NEXT: v_med3_f32 v1, v1, v2, v3 2226; GFX9-NEXT: global_store_dword v0, v1, s[8:9] 2227; GFX9-NEXT: s_endpgm 2228; 2229; GFX11-LABEL: v_nnan_inputs_med3_f32_pat0: 2230; GFX11: ; %bb.0: 2231; GFX11-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 2232; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 2233; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 2234; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 2235; GFX11-NEXT: s_waitcnt lgkmcnt(0) 2236; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc 2237; GFX11-NEXT: s_waitcnt vmcnt(0) 2238; GFX11-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc 2239; GFX11-NEXT: s_waitcnt vmcnt(0) 2240; GFX11-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc 2241; GFX11-NEXT: s_waitcnt vmcnt(0) 2242; GFX11-NEXT: v_dual_add_f32 v1, 1.0, v1 :: v_dual_add_f32 v2, 2.0, v2 2243; GFX11-NEXT: v_add_f32_e32 v3, 4.0, v3 2244; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 2245; GFX11-NEXT: v_med3_f32 v1, v1, v2, v3 2246; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 2247; GFX11-NEXT: s_endpgm 2248 %tid = call i32 @llvm.amdgcn.workitem.id.x() 2249 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid 2250 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid 2251 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid 2252 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid 2253 %a = load volatile float, ptr addrspace(1) %gep0 2254 %b = load volatile float, ptr addrspace(1) %gep1 2255 %c = load volatile float, ptr addrspace(1) %gep2 2256 2257 %a.nnan = fadd nnan float %a, 1.0 2258 %b.nnan = fadd nnan float %b, 2.0 2259 %c.nnan = fadd nnan float %c, 4.0 2260 2261 %tmp0 = call float @llvm.minnum.f32(float %a.nnan, float %b.nnan) 2262 %tmp1 = call float @llvm.maxnum.f32(float %a.nnan, float %b.nnan) 2263 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.nnan) 2264 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2) 2265 store float %med3, ptr addrspace(1) %outgep 2266 ret void 2267} 2268 2269define amdgpu_kernel void @v_nnan_input_calls_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 { 2270; SI-SDAG-LABEL: v_nnan_input_calls_med3_f32_pat0: 2271; SI-SDAG: ; %bb.0: 2272; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 2273; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000 2274; SI-SDAG-NEXT: s_mov_b32 s10, 0 2275; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 2276; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 2277; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11] 2278; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11] 2279; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2280; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3] 2281; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5] 2282; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7] 2283; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 2284; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 2285; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc 2286; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 2287; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc 2288; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 2289; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11] 2290; SI-SDAG-NEXT: v_med3_f32 v2, v2, v3, v4 2291; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 2292; SI-SDAG-NEXT: s_endpgm 2293; 2294; SI-GISEL-LABEL: v_nnan_input_calls_med3_f32_pat0: 2295; SI-GISEL: ; %bb.0: 2296; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 2297; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 2298; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 2299; SI-GISEL-NEXT: s_mov_b32 s10, 0 2300; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000 2301; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2302; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3] 2303; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 2304; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 2305; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5] 2306; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc 2307; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 2308; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7] 2309; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc 2310; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 2311; SI-GISEL-NEXT: v_med3_f32 v2, v2, v3, v4 2312; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11] 2313; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 2314; SI-GISEL-NEXT: s_endpgm 2315; 2316; VI-SDAG-LABEL: v_nnan_input_calls_med3_f32_pat0: 2317; VI-SDAG: ; %bb.0: 2318; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 2319; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0 2320; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2321; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 2322; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6 2323; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2324; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5 2325; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6 2326; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 2327; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7 2328; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6 2329; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 2330; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc 2331; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 2332; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc 2333; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 2334; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc 2335; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 2336; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 2337; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6 2338; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2339; VI-SDAG-NEXT: v_med3_f32 v2, v7, v2, v3 2340; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 2341; VI-SDAG-NEXT: s_endpgm 2342; 2343; VI-GISEL-LABEL: v_nnan_input_calls_med3_f32_pat0: 2344; VI-GISEL: ; %bb.0: 2345; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 2346; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0 2347; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2348; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 2349; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 2350; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 2351; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2352; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4 2353; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5 2354; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6 2355; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 2356; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6 2357; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7 2358; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6 2359; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 2360; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc 2361; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 2362; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc 2363; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 2364; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc 2365; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 2366; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 2367; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 2368; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 2369; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2370; VI-GISEL-NEXT: v_med3_f32 v2, v7, v2, v3 2371; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 2372; VI-GISEL-NEXT: s_endpgm 2373; 2374; GFX9-LABEL: v_nnan_input_calls_med3_f32_pat0: 2375; GFX9: ; %bb.0: 2376; GFX9-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 2377; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 2378; GFX9-NEXT: s_waitcnt lgkmcnt(0) 2379; GFX9-NEXT: global_load_dword v1, v0, s[10:11] glc 2380; GFX9-NEXT: s_waitcnt vmcnt(0) 2381; GFX9-NEXT: global_load_dword v2, v0, s[12:13] glc 2382; GFX9-NEXT: s_waitcnt vmcnt(0) 2383; GFX9-NEXT: global_load_dword v3, v0, s[14:15] glc 2384; GFX9-NEXT: s_waitcnt vmcnt(0) 2385; GFX9-NEXT: v_med3_f32 v1, v1, v2, v3 2386; GFX9-NEXT: global_store_dword v0, v1, s[8:9] 2387; GFX9-NEXT: s_endpgm 2388; 2389; GFX11-LABEL: v_nnan_input_calls_med3_f32_pat0: 2390; GFX11: ; %bb.0: 2391; GFX11-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 2392; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 2393; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 2394; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 2395; GFX11-NEXT: s_waitcnt lgkmcnt(0) 2396; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc 2397; GFX11-NEXT: s_waitcnt vmcnt(0) 2398; GFX11-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc 2399; GFX11-NEXT: s_waitcnt vmcnt(0) 2400; GFX11-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc 2401; GFX11-NEXT: s_waitcnt vmcnt(0) 2402; GFX11-NEXT: v_med3_f32 v1, v1, v2, v3 2403; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 2404; GFX11-NEXT: s_endpgm 2405 %tid = call i32 @llvm.amdgcn.workitem.id.x() 2406 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid 2407 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid 2408 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid 2409 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid 2410 %a = load volatile float, ptr addrspace(1) %gep0 2411 %b = load volatile float, ptr addrspace(1) %gep1 2412 %c = load volatile float, ptr addrspace(1) %gep2 2413 %tmp0 = call nnan float @llvm.minnum.f32(float %a, float %b) 2414 %tmp1 = call nnan float @llvm.maxnum.f32(float %a, float %b) 2415 %tmp2 = call nnan float @llvm.minnum.f32(float %tmp1, float %c) 2416 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2) 2417 store float %med3, ptr addrspace(1) %outgep 2418 ret void 2419} 2420 2421define amdgpu_kernel void @v_nnan_call_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 { 2422; SI-SDAG-LABEL: v_nnan_call_med3_f32_pat0: 2423; SI-SDAG: ; %bb.0: 2424; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 2425; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000 2426; SI-SDAG-NEXT: s_mov_b32 s10, 0 2427; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 2428; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 2429; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11] 2430; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11] 2431; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2432; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3] 2433; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5] 2434; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7] 2435; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 2436; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 2437; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc 2438; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 2439; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc 2440; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 2441; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11] 2442; SI-SDAG-NEXT: v_med3_f32 v2, v2, v3, v4 2443; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 2444; SI-SDAG-NEXT: s_endpgm 2445; 2446; SI-GISEL-LABEL: v_nnan_call_med3_f32_pat0: 2447; SI-GISEL: ; %bb.0: 2448; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 2449; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 2450; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 2451; SI-GISEL-NEXT: s_mov_b32 s10, 0 2452; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000 2453; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2454; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3] 2455; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 2456; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 2457; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5] 2458; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc 2459; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 2460; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7] 2461; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc 2462; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 2463; SI-GISEL-NEXT: v_med3_f32 v2, v2, v3, v4 2464; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11] 2465; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 2466; SI-GISEL-NEXT: s_endpgm 2467; 2468; VI-SDAG-LABEL: v_nnan_call_med3_f32_pat0: 2469; VI-SDAG: ; %bb.0: 2470; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 2471; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0 2472; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2473; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 2474; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6 2475; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2476; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5 2477; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6 2478; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 2479; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7 2480; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6 2481; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 2482; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc 2483; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 2484; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc 2485; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 2486; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc 2487; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 2488; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 2489; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6 2490; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2491; VI-SDAG-NEXT: v_med3_f32 v2, v7, v2, v3 2492; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 2493; VI-SDAG-NEXT: s_endpgm 2494; 2495; VI-GISEL-LABEL: v_nnan_call_med3_f32_pat0: 2496; VI-GISEL: ; %bb.0: 2497; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 2498; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0 2499; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2500; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 2501; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 2502; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 2503; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2504; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4 2505; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5 2506; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6 2507; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 2508; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6 2509; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7 2510; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6 2511; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 2512; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc 2513; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 2514; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc 2515; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 2516; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc 2517; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 2518; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 2519; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 2520; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 2521; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2522; VI-GISEL-NEXT: v_med3_f32 v2, v7, v2, v3 2523; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 2524; VI-GISEL-NEXT: s_endpgm 2525; 2526; GFX9-LABEL: v_nnan_call_med3_f32_pat0: 2527; GFX9: ; %bb.0: 2528; GFX9-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 2529; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 2530; GFX9-NEXT: s_waitcnt lgkmcnt(0) 2531; GFX9-NEXT: global_load_dword v1, v0, s[10:11] glc 2532; GFX9-NEXT: s_waitcnt vmcnt(0) 2533; GFX9-NEXT: global_load_dword v2, v0, s[12:13] glc 2534; GFX9-NEXT: s_waitcnt vmcnt(0) 2535; GFX9-NEXT: global_load_dword v3, v0, s[14:15] glc 2536; GFX9-NEXT: s_waitcnt vmcnt(0) 2537; GFX9-NEXT: v_med3_f32 v1, v1, v2, v3 2538; GFX9-NEXT: global_store_dword v0, v1, s[8:9] 2539; GFX9-NEXT: s_endpgm 2540; 2541; GFX11-LABEL: v_nnan_call_med3_f32_pat0: 2542; GFX11: ; %bb.0: 2543; GFX11-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 2544; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 2545; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 2546; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 2547; GFX11-NEXT: s_waitcnt lgkmcnt(0) 2548; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc 2549; GFX11-NEXT: s_waitcnt vmcnt(0) 2550; GFX11-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc 2551; GFX11-NEXT: s_waitcnt vmcnt(0) 2552; GFX11-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc 2553; GFX11-NEXT: s_waitcnt vmcnt(0) 2554; GFX11-NEXT: v_med3_f32 v1, v1, v2, v3 2555; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 2556; GFX11-NEXT: s_endpgm 2557 %tid = call i32 @llvm.amdgcn.workitem.id.x() 2558 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid 2559 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid 2560 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid 2561 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid 2562 %a = load volatile float, ptr addrspace(1) %gep0 2563 %b = load volatile float, ptr addrspace(1) %gep1 2564 %c = load volatile float, ptr addrspace(1) %gep2 2565 %tmp0 = call nnan float @llvm.minnum.f32(float %a, float %b) 2566 %tmp1 = call nnan float @llvm.maxnum.f32(float %a, float %b) 2567 %tmp2 = call nnan float @llvm.minnum.f32(float %tmp1, float %c) 2568 %med3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2) 2569 store float %med3, ptr addrspace(1) %outgep 2570 ret void 2571} 2572 2573define amdgpu_kernel void @v_fast_call_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 { 2574; SI-SDAG-LABEL: v_fast_call_med3_f32_pat0: 2575; SI-SDAG: ; %bb.0: 2576; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 2577; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000 2578; SI-SDAG-NEXT: s_mov_b32 s10, 0 2579; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 2580; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 2581; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11] 2582; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11] 2583; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2584; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3] 2585; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5] 2586; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7] 2587; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 2588; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 2589; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc 2590; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 2591; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc 2592; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 2593; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11] 2594; SI-SDAG-NEXT: v_med3_f32 v2, v2, v3, v4 2595; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 2596; SI-SDAG-NEXT: s_endpgm 2597; 2598; SI-GISEL-LABEL: v_fast_call_med3_f32_pat0: 2599; SI-GISEL: ; %bb.0: 2600; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 2601; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 2602; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 2603; SI-GISEL-NEXT: s_mov_b32 s10, 0 2604; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000 2605; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2606; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3] 2607; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 2608; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 2609; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5] 2610; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc 2611; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 2612; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7] 2613; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc 2614; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 2615; SI-GISEL-NEXT: v_med3_f32 v2, v2, v3, v4 2616; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11] 2617; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 2618; SI-GISEL-NEXT: s_endpgm 2619; 2620; VI-SDAG-LABEL: v_fast_call_med3_f32_pat0: 2621; VI-SDAG: ; %bb.0: 2622; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 2623; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0 2624; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2625; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 2626; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6 2627; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2628; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5 2629; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6 2630; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 2631; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7 2632; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6 2633; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 2634; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc 2635; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 2636; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc 2637; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 2638; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc 2639; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 2640; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 2641; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6 2642; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2643; VI-SDAG-NEXT: v_med3_f32 v2, v7, v2, v3 2644; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 2645; VI-SDAG-NEXT: s_endpgm 2646; 2647; VI-GISEL-LABEL: v_fast_call_med3_f32_pat0: 2648; VI-GISEL: ; %bb.0: 2649; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 2650; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0 2651; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2652; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 2653; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 2654; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 2655; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2656; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4 2657; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5 2658; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6 2659; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 2660; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6 2661; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7 2662; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6 2663; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 2664; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc 2665; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 2666; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc 2667; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 2668; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc 2669; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 2670; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 2671; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 2672; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 2673; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2674; VI-GISEL-NEXT: v_med3_f32 v2, v7, v2, v3 2675; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 2676; VI-GISEL-NEXT: s_endpgm 2677; 2678; GFX9-LABEL: v_fast_call_med3_f32_pat0: 2679; GFX9: ; %bb.0: 2680; GFX9-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 2681; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 2682; GFX9-NEXT: s_waitcnt lgkmcnt(0) 2683; GFX9-NEXT: global_load_dword v1, v0, s[10:11] glc 2684; GFX9-NEXT: s_waitcnt vmcnt(0) 2685; GFX9-NEXT: global_load_dword v2, v0, s[12:13] glc 2686; GFX9-NEXT: s_waitcnt vmcnt(0) 2687; GFX9-NEXT: global_load_dword v3, v0, s[14:15] glc 2688; GFX9-NEXT: s_waitcnt vmcnt(0) 2689; GFX9-NEXT: v_med3_f32 v1, v1, v2, v3 2690; GFX9-NEXT: global_store_dword v0, v1, s[8:9] 2691; GFX9-NEXT: s_endpgm 2692; 2693; GFX11-LABEL: v_fast_call_med3_f32_pat0: 2694; GFX11: ; %bb.0: 2695; GFX11-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 2696; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 2697; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 2698; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 2699; GFX11-NEXT: s_waitcnt lgkmcnt(0) 2700; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc 2701; GFX11-NEXT: s_waitcnt vmcnt(0) 2702; GFX11-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc 2703; GFX11-NEXT: s_waitcnt vmcnt(0) 2704; GFX11-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc 2705; GFX11-NEXT: s_waitcnt vmcnt(0) 2706; GFX11-NEXT: v_med3_f32 v1, v1, v2, v3 2707; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 2708; GFX11-NEXT: s_endpgm 2709 %tid = call i32 @llvm.amdgcn.workitem.id.x() 2710 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid 2711 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid 2712 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid 2713 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid 2714 %a = load volatile float, ptr addrspace(1) %gep0 2715 %b = load volatile float, ptr addrspace(1) %gep1 2716 %c = load volatile float, ptr addrspace(1) %gep2 2717 %tmp0 = call fast float @llvm.minnum.f32(float %a, float %b) 2718 %tmp1 = call fast float @llvm.maxnum.f32(float %a, float %b) 2719 %tmp2 = call fast float @llvm.minnum.f32(float %tmp1, float %c) 2720 %med3 = call fast float @llvm.maxnum.f32(float %tmp0, float %tmp2) 2721 store float %med3, ptr addrspace(1) %outgep 2722 ret void 2723} 2724 2725; 16 combinations 2726 2727; 0: max(min(x, y), min(max(x, y), z)) 2728; 1: max(min(x, y), min(max(y, x), z)) 2729; 2: max(min(x, y), min(z, max(x, y))) 2730; 3: max(min(x, y), min(z, max(y, x))) 2731; 4: max(min(y, x), min(max(x, y), z)) 2732; 5: max(min(y, x), min(max(y, x), z)) 2733; 6: max(min(y, x), min(z, max(x, y))) 2734; 7: max(min(y, x), min(z, max(y, x))) 2735; + commute outermost max 2736 2737define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 { 2738; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0: 2739; SI-SDAG: ; %bb.0: 2740; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 2741; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000 2742; SI-SDAG-NEXT: s_mov_b32 s10, 0 2743; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 2744; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 2745; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11] 2746; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11] 2747; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2748; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3] 2749; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5] 2750; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7] 2751; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 2752; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 2753; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc 2754; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 2755; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc 2756; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 2757; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11] 2758; SI-SDAG-NEXT: v_med3_f32 v2, v2, v3, v4 2759; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 2760; SI-SDAG-NEXT: s_endpgm 2761; 2762; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0: 2763; SI-GISEL: ; %bb.0: 2764; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 2765; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 2766; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 2767; SI-GISEL-NEXT: s_mov_b32 s10, 0 2768; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000 2769; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2770; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3] 2771; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 2772; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 2773; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5] 2774; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc 2775; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 2776; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7] 2777; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc 2778; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 2779; SI-GISEL-NEXT: v_med3_f32 v2, v2, v3, v4 2780; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11] 2781; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 2782; SI-GISEL-NEXT: s_endpgm 2783; 2784; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0: 2785; VI-SDAG: ; %bb.0: 2786; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 2787; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0 2788; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2789; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 2790; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6 2791; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2792; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5 2793; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6 2794; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 2795; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7 2796; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6 2797; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 2798; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc 2799; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 2800; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc 2801; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 2802; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc 2803; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 2804; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 2805; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6 2806; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2807; VI-SDAG-NEXT: v_med3_f32 v2, v7, v2, v3 2808; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 2809; VI-SDAG-NEXT: s_endpgm 2810; 2811; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0: 2812; VI-GISEL: ; %bb.0: 2813; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 2814; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0 2815; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2816; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 2817; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 2818; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 2819; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2820; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4 2821; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5 2822; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6 2823; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 2824; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6 2825; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7 2826; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6 2827; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 2828; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc 2829; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 2830; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc 2831; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 2832; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc 2833; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 2834; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 2835; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 2836; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 2837; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2838; VI-GISEL-NEXT: v_med3_f32 v2, v7, v2, v3 2839; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 2840; VI-GISEL-NEXT: s_endpgm 2841; 2842; GFX9-LABEL: v_test_global_nnans_med3_f32_pat0: 2843; GFX9: ; %bb.0: 2844; GFX9-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 2845; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 2846; GFX9-NEXT: s_waitcnt lgkmcnt(0) 2847; GFX9-NEXT: global_load_dword v1, v0, s[10:11] glc 2848; GFX9-NEXT: s_waitcnt vmcnt(0) 2849; GFX9-NEXT: global_load_dword v2, v0, s[12:13] glc 2850; GFX9-NEXT: s_waitcnt vmcnt(0) 2851; GFX9-NEXT: global_load_dword v3, v0, s[14:15] glc 2852; GFX9-NEXT: s_waitcnt vmcnt(0) 2853; GFX9-NEXT: v_med3_f32 v1, v1, v2, v3 2854; GFX9-NEXT: global_store_dword v0, v1, s[8:9] 2855; GFX9-NEXT: s_endpgm 2856; 2857; GFX11-LABEL: v_test_global_nnans_med3_f32_pat0: 2858; GFX11: ; %bb.0: 2859; GFX11-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 2860; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 2861; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 2862; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 2863; GFX11-NEXT: s_waitcnt lgkmcnt(0) 2864; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc 2865; GFX11-NEXT: s_waitcnt vmcnt(0) 2866; GFX11-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc 2867; GFX11-NEXT: s_waitcnt vmcnt(0) 2868; GFX11-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc 2869; GFX11-NEXT: s_waitcnt vmcnt(0) 2870; GFX11-NEXT: v_med3_f32 v1, v1, v2, v3 2871; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 2872; GFX11-NEXT: s_endpgm 2873 %tid = call i32 @llvm.amdgcn.workitem.id.x() 2874 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid 2875 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid 2876 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid 2877 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid 2878 %a = load volatile float, ptr addrspace(1) %gep0 2879 %b = load volatile float, ptr addrspace(1) %gep1 2880 %c = load volatile float, ptr addrspace(1) %gep2 2881 %tmp0 = call float @llvm.minnum.f32(float %a, float %b) 2882 %tmp1 = call float @llvm.maxnum.f32(float %a, float %b) 2883 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c) 2884 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2) 2885 store float %med3, ptr addrspace(1) %outgep 2886 ret void 2887} 2888 2889define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat1(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 { 2890; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat1: 2891; SI-SDAG: ; %bb.0: 2892; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 2893; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000 2894; SI-SDAG-NEXT: s_mov_b32 s10, 0 2895; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 2896; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 2897; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11] 2898; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11] 2899; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2900; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3] 2901; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5] 2902; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7] 2903; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 2904; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 2905; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc 2906; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 2907; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc 2908; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 2909; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11] 2910; SI-SDAG-NEXT: v_med3_f32 v2, v2, v3, v4 2911; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 2912; SI-SDAG-NEXT: s_endpgm 2913; 2914; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat1: 2915; SI-GISEL: ; %bb.0: 2916; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 2917; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 2918; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 2919; SI-GISEL-NEXT: s_mov_b32 s10, 0 2920; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000 2921; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2922; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3] 2923; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 2924; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 2925; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5] 2926; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc 2927; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 2928; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7] 2929; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc 2930; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 2931; SI-GISEL-NEXT: v_med3_f32 v2, v2, v3, v4 2932; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11] 2933; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 2934; SI-GISEL-NEXT: s_endpgm 2935; 2936; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat1: 2937; VI-SDAG: ; %bb.0: 2938; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 2939; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0 2940; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2941; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 2942; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6 2943; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2944; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5 2945; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6 2946; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 2947; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7 2948; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6 2949; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 2950; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc 2951; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 2952; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc 2953; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 2954; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc 2955; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 2956; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 2957; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6 2958; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2959; VI-SDAG-NEXT: v_med3_f32 v2, v7, v2, v3 2960; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 2961; VI-SDAG-NEXT: s_endpgm 2962; 2963; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat1: 2964; VI-GISEL: ; %bb.0: 2965; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 2966; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0 2967; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2968; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 2969; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 2970; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 2971; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2972; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4 2973; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5 2974; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6 2975; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 2976; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6 2977; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7 2978; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6 2979; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 2980; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc 2981; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 2982; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc 2983; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 2984; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc 2985; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 2986; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 2987; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 2988; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 2989; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2990; VI-GISEL-NEXT: v_med3_f32 v2, v7, v2, v3 2991; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 2992; VI-GISEL-NEXT: s_endpgm 2993; 2994; GFX9-LABEL: v_test_global_nnans_med3_f32_pat1: 2995; GFX9: ; %bb.0: 2996; GFX9-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 2997; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 2998; GFX9-NEXT: s_waitcnt lgkmcnt(0) 2999; GFX9-NEXT: global_load_dword v1, v0, s[10:11] glc 3000; GFX9-NEXT: s_waitcnt vmcnt(0) 3001; GFX9-NEXT: global_load_dword v2, v0, s[12:13] glc 3002; GFX9-NEXT: s_waitcnt vmcnt(0) 3003; GFX9-NEXT: global_load_dword v3, v0, s[14:15] glc 3004; GFX9-NEXT: s_waitcnt vmcnt(0) 3005; GFX9-NEXT: v_med3_f32 v1, v1, v2, v3 3006; GFX9-NEXT: global_store_dword v0, v1, s[8:9] 3007; GFX9-NEXT: s_endpgm 3008; 3009; GFX11-LABEL: v_test_global_nnans_med3_f32_pat1: 3010; GFX11: ; %bb.0: 3011; GFX11-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 3012; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 3013; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 3014; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3015; GFX11-NEXT: s_waitcnt lgkmcnt(0) 3016; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc 3017; GFX11-NEXT: s_waitcnt vmcnt(0) 3018; GFX11-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc 3019; GFX11-NEXT: s_waitcnt vmcnt(0) 3020; GFX11-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc 3021; GFX11-NEXT: s_waitcnt vmcnt(0) 3022; GFX11-NEXT: v_med3_f32 v1, v1, v2, v3 3023; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 3024; GFX11-NEXT: s_endpgm 3025 %tid = call i32 @llvm.amdgcn.workitem.id.x() 3026 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid 3027 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid 3028 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid 3029 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid 3030 %a = load volatile float, ptr addrspace(1) %gep0 3031 %b = load volatile float, ptr addrspace(1) %gep1 3032 %c = load volatile float, ptr addrspace(1) %gep2 3033 %tmp0 = call float @llvm.minnum.f32(float %a, float %b) 3034 %tmp1 = call float @llvm.maxnum.f32(float %b, float %a) 3035 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c) 3036 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2) 3037 store float %med3, ptr addrspace(1) %outgep 3038 ret void 3039} 3040 3041define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat1_srcmod0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 { 3042; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat1_srcmod0: 3043; SI-SDAG: ; %bb.0: 3044; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 3045; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000 3046; SI-SDAG-NEXT: s_mov_b32 s10, 0 3047; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3048; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 3049; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11] 3050; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11] 3051; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 3052; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3] 3053; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5] 3054; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7] 3055; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 3056; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 3057; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc 3058; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 3059; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc 3060; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 3061; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11] 3062; SI-SDAG-NEXT: v_med3_f32 v2, -v2, v3, v4 3063; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 3064; SI-SDAG-NEXT: s_endpgm 3065; 3066; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat1_srcmod0: 3067; SI-GISEL: ; %bb.0: 3068; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 3069; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3070; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 3071; SI-GISEL-NEXT: s_mov_b32 s10, 0 3072; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000 3073; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 3074; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3] 3075; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 3076; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 3077; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5] 3078; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc 3079; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 3080; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7] 3081; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc 3082; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 3083; SI-GISEL-NEXT: v_mul_f32_e32 v2, -1.0, v2 3084; SI-GISEL-NEXT: v_med3_f32 v2, v2, v3, v4 3085; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11] 3086; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 3087; SI-GISEL-NEXT: s_endpgm 3088; 3089; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat1_srcmod0: 3090; VI-SDAG: ; %bb.0: 3091; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 3092; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0 3093; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 3094; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 3095; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6 3096; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 3097; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5 3098; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6 3099; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 3100; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7 3101; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6 3102; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 3103; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc 3104; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 3105; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc 3106; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 3107; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc 3108; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 3109; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 3110; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6 3111; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 3112; VI-SDAG-NEXT: v_med3_f32 v2, -v7, v2, v3 3113; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 3114; VI-SDAG-NEXT: s_endpgm 3115; 3116; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat1_srcmod0: 3117; VI-GISEL: ; %bb.0: 3118; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 3119; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0 3120; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 3121; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 3122; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 3123; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 3124; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 3125; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4 3126; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5 3127; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6 3128; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 3129; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6 3130; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7 3131; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6 3132; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 3133; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc 3134; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 3135; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc 3136; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 3137; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc 3138; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 3139; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 3140; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 3141; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 3142; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 3143; VI-GISEL-NEXT: v_mul_f32_e32 v4, -1.0, v7 3144; VI-GISEL-NEXT: v_med3_f32 v2, v4, v2, v3 3145; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 3146; VI-GISEL-NEXT: s_endpgm 3147; 3148; GFX9-SDAG-LABEL: v_test_global_nnans_med3_f32_pat1_srcmod0: 3149; GFX9-SDAG: ; %bb.0: 3150; GFX9-SDAG-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 3151; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3152; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) 3153; GFX9-SDAG-NEXT: global_load_dword v1, v0, s[10:11] glc 3154; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 3155; GFX9-SDAG-NEXT: global_load_dword v2, v0, s[12:13] glc 3156; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 3157; GFX9-SDAG-NEXT: global_load_dword v3, v0, s[14:15] glc 3158; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 3159; GFX9-SDAG-NEXT: v_med3_f32 v1, -v1, v2, v3 3160; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[8:9] 3161; GFX9-SDAG-NEXT: s_endpgm 3162; 3163; GFX9-GISEL-LABEL: v_test_global_nnans_med3_f32_pat1_srcmod0: 3164; GFX9-GISEL: ; %bb.0: 3165; GFX9-GISEL-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 3166; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3167; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 3168; GFX9-GISEL-NEXT: global_load_dword v1, v0, s[10:11] glc 3169; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 3170; GFX9-GISEL-NEXT: global_load_dword v2, v0, s[12:13] glc 3171; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 3172; GFX9-GISEL-NEXT: global_load_dword v3, v0, s[14:15] glc 3173; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 3174; GFX9-GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1 3175; GFX9-GISEL-NEXT: v_med3_f32 v1, v1, v2, v3 3176; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[8:9] 3177; GFX9-GISEL-NEXT: s_endpgm 3178; 3179; GFX11-SDAG-LABEL: v_test_global_nnans_med3_f32_pat1_srcmod0: 3180; GFX11-SDAG: ; %bb.0: 3181; GFX11-SDAG-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 3182; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 3183; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 3184; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3185; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 3186; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc 3187; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 3188; GFX11-SDAG-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc 3189; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 3190; GFX11-SDAG-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc 3191; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 3192; GFX11-SDAG-NEXT: v_med3_f32 v1, -v1, v2, v3 3193; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1] 3194; GFX11-SDAG-NEXT: s_endpgm 3195; 3196; GFX11-GISEL-LABEL: v_test_global_nnans_med3_f32_pat1_srcmod0: 3197; GFX11-GISEL: ; %bb.0: 3198; GFX11-GISEL-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 3199; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 3200; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 3201; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3202; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 3203; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc 3204; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 3205; GFX11-GISEL-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc 3206; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 3207; GFX11-GISEL-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc 3208; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 3209; GFX11-GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1 3210; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 3211; GFX11-GISEL-NEXT: v_med3_f32 v1, v1, v2, v3 3212; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1] 3213; GFX11-GISEL-NEXT: s_endpgm 3214 %tid = call i32 @llvm.amdgcn.workitem.id.x() 3215 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid 3216 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid 3217 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid 3218 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid 3219 %a = load volatile float, ptr addrspace(1) %gep0 3220 %b = load volatile float, ptr addrspace(1) %gep1 3221 %c = load volatile float, ptr addrspace(1) %gep2 3222 %a.fneg = fsub float -0.0, %a 3223 %tmp0 = call float @llvm.maxnum.f32(float %a.fneg, float %b) 3224 %tmp1 = call float @llvm.minnum.f32(float %a.fneg, float %b) 3225 %tmp2 = call float @llvm.maxnum.f32(float %tmp1, float %c) 3226 %med3 = call float @llvm.minnum.f32(float %tmp0, float %tmp2) 3227 store float %med3, ptr addrspace(1) %outgep 3228 ret void 3229} 3230 3231define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat2(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 { 3232; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat2: 3233; SI-SDAG: ; %bb.0: 3234; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 3235; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000 3236; SI-SDAG-NEXT: s_mov_b32 s10, 0 3237; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3238; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 3239; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11] 3240; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11] 3241; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 3242; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3] 3243; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5] 3244; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7] 3245; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 3246; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 3247; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc 3248; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 3249; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc 3250; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 3251; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11] 3252; SI-SDAG-NEXT: v_med3_f32 v2, v2, v3, v4 3253; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 3254; SI-SDAG-NEXT: s_endpgm 3255; 3256; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat2: 3257; SI-GISEL: ; %bb.0: 3258; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 3259; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3260; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 3261; SI-GISEL-NEXT: s_mov_b32 s10, 0 3262; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000 3263; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 3264; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3] 3265; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 3266; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 3267; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5] 3268; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc 3269; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 3270; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7] 3271; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc 3272; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 3273; SI-GISEL-NEXT: v_med3_f32 v2, v2, v3, v4 3274; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11] 3275; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 3276; SI-GISEL-NEXT: s_endpgm 3277; 3278; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat2: 3279; VI-SDAG: ; %bb.0: 3280; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 3281; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0 3282; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 3283; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 3284; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6 3285; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 3286; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5 3287; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6 3288; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 3289; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7 3290; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6 3291; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 3292; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc 3293; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 3294; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc 3295; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 3296; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc 3297; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 3298; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 3299; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6 3300; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 3301; VI-SDAG-NEXT: v_med3_f32 v2, v7, v2, v3 3302; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 3303; VI-SDAG-NEXT: s_endpgm 3304; 3305; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat2: 3306; VI-GISEL: ; %bb.0: 3307; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 3308; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0 3309; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 3310; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 3311; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 3312; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 3313; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 3314; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4 3315; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5 3316; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6 3317; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 3318; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6 3319; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7 3320; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6 3321; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 3322; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc 3323; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 3324; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc 3325; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 3326; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc 3327; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 3328; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 3329; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 3330; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 3331; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 3332; VI-GISEL-NEXT: v_med3_f32 v2, v7, v2, v3 3333; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 3334; VI-GISEL-NEXT: s_endpgm 3335; 3336; GFX9-LABEL: v_test_global_nnans_med3_f32_pat2: 3337; GFX9: ; %bb.0: 3338; GFX9-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 3339; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3340; GFX9-NEXT: s_waitcnt lgkmcnt(0) 3341; GFX9-NEXT: global_load_dword v1, v0, s[10:11] glc 3342; GFX9-NEXT: s_waitcnt vmcnt(0) 3343; GFX9-NEXT: global_load_dword v2, v0, s[12:13] glc 3344; GFX9-NEXT: s_waitcnt vmcnt(0) 3345; GFX9-NEXT: global_load_dword v3, v0, s[14:15] glc 3346; GFX9-NEXT: s_waitcnt vmcnt(0) 3347; GFX9-NEXT: v_med3_f32 v1, v1, v2, v3 3348; GFX9-NEXT: global_store_dword v0, v1, s[8:9] 3349; GFX9-NEXT: s_endpgm 3350; 3351; GFX11-LABEL: v_test_global_nnans_med3_f32_pat2: 3352; GFX11: ; %bb.0: 3353; GFX11-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 3354; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 3355; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 3356; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3357; GFX11-NEXT: s_waitcnt lgkmcnt(0) 3358; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc 3359; GFX11-NEXT: s_waitcnt vmcnt(0) 3360; GFX11-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc 3361; GFX11-NEXT: s_waitcnt vmcnt(0) 3362; GFX11-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc 3363; GFX11-NEXT: s_waitcnt vmcnt(0) 3364; GFX11-NEXT: v_med3_f32 v1, v1, v2, v3 3365; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 3366; GFX11-NEXT: s_endpgm 3367 %tid = call i32 @llvm.amdgcn.workitem.id.x() 3368 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid 3369 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid 3370 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid 3371 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid 3372 %a = load volatile float, ptr addrspace(1) %gep0 3373 %b = load volatile float, ptr addrspace(1) %gep1 3374 %c = load volatile float, ptr addrspace(1) %gep2 3375 %tmp0 = call float @llvm.minnum.f32(float %a, float %b) 3376 %tmp1 = call float @llvm.maxnum.f32(float %a, float %b) 3377 %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1) 3378 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2) 3379 store float %med3, ptr addrspace(1) %outgep 3380 ret void 3381} 3382 3383define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat3(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 { 3384; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat3: 3385; SI-SDAG: ; %bb.0: 3386; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 3387; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000 3388; SI-SDAG-NEXT: s_mov_b32 s10, 0 3389; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3390; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 3391; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11] 3392; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11] 3393; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 3394; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3] 3395; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5] 3396; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7] 3397; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 3398; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 3399; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc 3400; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 3401; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc 3402; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 3403; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11] 3404; SI-SDAG-NEXT: v_med3_f32 v2, v2, v3, v4 3405; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 3406; SI-SDAG-NEXT: s_endpgm 3407; 3408; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat3: 3409; SI-GISEL: ; %bb.0: 3410; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 3411; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3412; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 3413; SI-GISEL-NEXT: s_mov_b32 s10, 0 3414; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000 3415; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 3416; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3] 3417; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 3418; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 3419; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5] 3420; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc 3421; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 3422; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7] 3423; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc 3424; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 3425; SI-GISEL-NEXT: v_med3_f32 v2, v2, v3, v4 3426; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11] 3427; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 3428; SI-GISEL-NEXT: s_endpgm 3429; 3430; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat3: 3431; VI-SDAG: ; %bb.0: 3432; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 3433; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0 3434; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 3435; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 3436; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6 3437; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 3438; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5 3439; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6 3440; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 3441; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7 3442; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6 3443; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 3444; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc 3445; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 3446; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc 3447; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 3448; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc 3449; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 3450; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 3451; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6 3452; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 3453; VI-SDAG-NEXT: v_med3_f32 v2, v7, v2, v3 3454; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 3455; VI-SDAG-NEXT: s_endpgm 3456; 3457; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat3: 3458; VI-GISEL: ; %bb.0: 3459; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 3460; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0 3461; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 3462; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 3463; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 3464; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 3465; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 3466; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4 3467; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5 3468; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6 3469; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 3470; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6 3471; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7 3472; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6 3473; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 3474; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc 3475; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 3476; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc 3477; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 3478; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc 3479; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 3480; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 3481; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 3482; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 3483; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 3484; VI-GISEL-NEXT: v_med3_f32 v2, v7, v2, v3 3485; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 3486; VI-GISEL-NEXT: s_endpgm 3487; 3488; GFX9-LABEL: v_test_global_nnans_med3_f32_pat3: 3489; GFX9: ; %bb.0: 3490; GFX9-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 3491; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3492; GFX9-NEXT: s_waitcnt lgkmcnt(0) 3493; GFX9-NEXT: global_load_dword v1, v0, s[10:11] glc 3494; GFX9-NEXT: s_waitcnt vmcnt(0) 3495; GFX9-NEXT: global_load_dword v2, v0, s[12:13] glc 3496; GFX9-NEXT: s_waitcnt vmcnt(0) 3497; GFX9-NEXT: global_load_dword v3, v0, s[14:15] glc 3498; GFX9-NEXT: s_waitcnt vmcnt(0) 3499; GFX9-NEXT: v_med3_f32 v1, v1, v2, v3 3500; GFX9-NEXT: global_store_dword v0, v1, s[8:9] 3501; GFX9-NEXT: s_endpgm 3502; 3503; GFX11-LABEL: v_test_global_nnans_med3_f32_pat3: 3504; GFX11: ; %bb.0: 3505; GFX11-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 3506; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 3507; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 3508; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3509; GFX11-NEXT: s_waitcnt lgkmcnt(0) 3510; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc 3511; GFX11-NEXT: s_waitcnt vmcnt(0) 3512; GFX11-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc 3513; GFX11-NEXT: s_waitcnt vmcnt(0) 3514; GFX11-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc 3515; GFX11-NEXT: s_waitcnt vmcnt(0) 3516; GFX11-NEXT: v_med3_f32 v1, v1, v2, v3 3517; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 3518; GFX11-NEXT: s_endpgm 3519 %tid = call i32 @llvm.amdgcn.workitem.id.x() 3520 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid 3521 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid 3522 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid 3523 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid 3524 %a = load volatile float, ptr addrspace(1) %gep0 3525 %b = load volatile float, ptr addrspace(1) %gep1 3526 %c = load volatile float, ptr addrspace(1) %gep2 3527 %tmp0 = call float @llvm.minnum.f32(float %a, float %b) 3528 %tmp1 = call float @llvm.maxnum.f32(float %b, float %a) 3529 %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1) 3530 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2) 3531 store float %med3, ptr addrspace(1) %outgep 3532 ret void 3533} 3534 3535define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat4(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 { 3536; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat4: 3537; SI-SDAG: ; %bb.0: 3538; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 3539; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000 3540; SI-SDAG-NEXT: s_mov_b32 s10, 0 3541; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3542; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 3543; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11] 3544; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11] 3545; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 3546; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3] 3547; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5] 3548; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7] 3549; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 3550; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 3551; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc 3552; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 3553; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc 3554; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 3555; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11] 3556; SI-SDAG-NEXT: v_med3_f32 v2, v3, v2, v4 3557; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 3558; SI-SDAG-NEXT: s_endpgm 3559; 3560; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat4: 3561; SI-GISEL: ; %bb.0: 3562; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 3563; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3564; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 3565; SI-GISEL-NEXT: s_mov_b32 s10, 0 3566; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000 3567; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 3568; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3] 3569; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 3570; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 3571; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5] 3572; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc 3573; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 3574; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7] 3575; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc 3576; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 3577; SI-GISEL-NEXT: v_med3_f32 v2, v3, v2, v4 3578; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11] 3579; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 3580; SI-GISEL-NEXT: s_endpgm 3581; 3582; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat4: 3583; VI-SDAG: ; %bb.0: 3584; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 3585; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0 3586; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 3587; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 3588; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6 3589; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 3590; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5 3591; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6 3592; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 3593; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7 3594; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6 3595; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 3596; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc 3597; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 3598; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc 3599; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 3600; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc 3601; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 3602; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 3603; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6 3604; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 3605; VI-SDAG-NEXT: v_med3_f32 v2, v2, v7, v3 3606; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 3607; VI-SDAG-NEXT: s_endpgm 3608; 3609; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat4: 3610; VI-GISEL: ; %bb.0: 3611; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 3612; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0 3613; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 3614; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 3615; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 3616; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 3617; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 3618; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4 3619; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5 3620; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6 3621; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 3622; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6 3623; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7 3624; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6 3625; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 3626; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc 3627; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 3628; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc 3629; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 3630; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc 3631; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 3632; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 3633; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 3634; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 3635; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 3636; VI-GISEL-NEXT: v_med3_f32 v2, v2, v7, v3 3637; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 3638; VI-GISEL-NEXT: s_endpgm 3639; 3640; GFX9-LABEL: v_test_global_nnans_med3_f32_pat4: 3641; GFX9: ; %bb.0: 3642; GFX9-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 3643; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3644; GFX9-NEXT: s_waitcnt lgkmcnt(0) 3645; GFX9-NEXT: global_load_dword v1, v0, s[10:11] glc 3646; GFX9-NEXT: s_waitcnt vmcnt(0) 3647; GFX9-NEXT: global_load_dword v2, v0, s[12:13] glc 3648; GFX9-NEXT: s_waitcnt vmcnt(0) 3649; GFX9-NEXT: global_load_dword v3, v0, s[14:15] glc 3650; GFX9-NEXT: s_waitcnt vmcnt(0) 3651; GFX9-NEXT: v_med3_f32 v1, v2, v1, v3 3652; GFX9-NEXT: global_store_dword v0, v1, s[8:9] 3653; GFX9-NEXT: s_endpgm 3654; 3655; GFX11-LABEL: v_test_global_nnans_med3_f32_pat4: 3656; GFX11: ; %bb.0: 3657; GFX11-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 3658; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 3659; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 3660; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3661; GFX11-NEXT: s_waitcnt lgkmcnt(0) 3662; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc 3663; GFX11-NEXT: s_waitcnt vmcnt(0) 3664; GFX11-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc 3665; GFX11-NEXT: s_waitcnt vmcnt(0) 3666; GFX11-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc 3667; GFX11-NEXT: s_waitcnt vmcnt(0) 3668; GFX11-NEXT: v_med3_f32 v1, v2, v1, v3 3669; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 3670; GFX11-NEXT: s_endpgm 3671 %tid = call i32 @llvm.amdgcn.workitem.id.x() 3672 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid 3673 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid 3674 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid 3675 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid 3676 %a = load volatile float, ptr addrspace(1) %gep0 3677 %b = load volatile float, ptr addrspace(1) %gep1 3678 %c = load volatile float, ptr addrspace(1) %gep2 3679 %tmp0 = call float @llvm.minnum.f32(float %b, float %a) 3680 %tmp1 = call float @llvm.maxnum.f32(float %b, float %a) 3681 %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1) 3682 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2) 3683 store float %med3, ptr addrspace(1) %outgep 3684 ret void 3685} 3686 3687define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat5(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 { 3688; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat5: 3689; SI-SDAG: ; %bb.0: 3690; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 3691; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000 3692; SI-SDAG-NEXT: s_mov_b32 s10, 0 3693; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3694; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 3695; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11] 3696; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11] 3697; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 3698; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3] 3699; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5] 3700; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7] 3701; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 3702; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 3703; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc 3704; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 3705; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc 3706; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 3707; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11] 3708; SI-SDAG-NEXT: v_med3_f32 v2, v3, v2, v4 3709; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 3710; SI-SDAG-NEXT: s_endpgm 3711; 3712; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat5: 3713; SI-GISEL: ; %bb.0: 3714; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 3715; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3716; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 3717; SI-GISEL-NEXT: s_mov_b32 s10, 0 3718; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000 3719; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 3720; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3] 3721; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 3722; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 3723; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5] 3724; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc 3725; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 3726; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7] 3727; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc 3728; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 3729; SI-GISEL-NEXT: v_med3_f32 v2, v3, v2, v4 3730; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11] 3731; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 3732; SI-GISEL-NEXT: s_endpgm 3733; 3734; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat5: 3735; VI-SDAG: ; %bb.0: 3736; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 3737; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0 3738; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 3739; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 3740; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6 3741; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 3742; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5 3743; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6 3744; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 3745; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7 3746; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6 3747; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 3748; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc 3749; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 3750; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc 3751; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 3752; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc 3753; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 3754; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 3755; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6 3756; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 3757; VI-SDAG-NEXT: v_med3_f32 v2, v2, v7, v3 3758; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 3759; VI-SDAG-NEXT: s_endpgm 3760; 3761; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat5: 3762; VI-GISEL: ; %bb.0: 3763; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 3764; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0 3765; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 3766; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 3767; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 3768; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 3769; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 3770; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4 3771; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5 3772; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6 3773; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 3774; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6 3775; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7 3776; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6 3777; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 3778; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc 3779; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 3780; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc 3781; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 3782; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc 3783; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 3784; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 3785; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 3786; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 3787; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 3788; VI-GISEL-NEXT: v_med3_f32 v2, v2, v7, v3 3789; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 3790; VI-GISEL-NEXT: s_endpgm 3791; 3792; GFX9-LABEL: v_test_global_nnans_med3_f32_pat5: 3793; GFX9: ; %bb.0: 3794; GFX9-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 3795; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3796; GFX9-NEXT: s_waitcnt lgkmcnt(0) 3797; GFX9-NEXT: global_load_dword v1, v0, s[10:11] glc 3798; GFX9-NEXT: s_waitcnt vmcnt(0) 3799; GFX9-NEXT: global_load_dword v2, v0, s[12:13] glc 3800; GFX9-NEXT: s_waitcnt vmcnt(0) 3801; GFX9-NEXT: global_load_dword v3, v0, s[14:15] glc 3802; GFX9-NEXT: s_waitcnt vmcnt(0) 3803; GFX9-NEXT: v_med3_f32 v1, v2, v1, v3 3804; GFX9-NEXT: global_store_dword v0, v1, s[8:9] 3805; GFX9-NEXT: s_endpgm 3806; 3807; GFX11-LABEL: v_test_global_nnans_med3_f32_pat5: 3808; GFX11: ; %bb.0: 3809; GFX11-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 3810; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 3811; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 3812; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3813; GFX11-NEXT: s_waitcnt lgkmcnt(0) 3814; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc 3815; GFX11-NEXT: s_waitcnt vmcnt(0) 3816; GFX11-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc 3817; GFX11-NEXT: s_waitcnt vmcnt(0) 3818; GFX11-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc 3819; GFX11-NEXT: s_waitcnt vmcnt(0) 3820; GFX11-NEXT: v_med3_f32 v1, v2, v1, v3 3821; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 3822; GFX11-NEXT: s_endpgm 3823 %tid = call i32 @llvm.amdgcn.workitem.id.x() 3824 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid 3825 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid 3826 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid 3827 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid 3828 %a = load volatile float, ptr addrspace(1) %gep0 3829 %b = load volatile float, ptr addrspace(1) %gep1 3830 %c = load volatile float, ptr addrspace(1) %gep2 3831 %tmp0 = call float @llvm.minnum.f32(float %b, float %a) 3832 %tmp1 = call float @llvm.maxnum.f32(float %b, float %a) 3833 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c) 3834 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2) 3835 store float %med3, ptr addrspace(1) %outgep 3836 ret void 3837} 3838 3839define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat6(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 { 3840; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat6: 3841; SI-SDAG: ; %bb.0: 3842; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 3843; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000 3844; SI-SDAG-NEXT: s_mov_b32 s10, 0 3845; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3846; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 3847; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11] 3848; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11] 3849; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 3850; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3] 3851; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5] 3852; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7] 3853; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 3854; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 3855; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc 3856; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 3857; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc 3858; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 3859; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11] 3860; SI-SDAG-NEXT: v_med3_f32 v2, v3, v2, v4 3861; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 3862; SI-SDAG-NEXT: s_endpgm 3863; 3864; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat6: 3865; SI-GISEL: ; %bb.0: 3866; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 3867; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3868; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 3869; SI-GISEL-NEXT: s_mov_b32 s10, 0 3870; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000 3871; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 3872; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3] 3873; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 3874; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 3875; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5] 3876; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc 3877; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 3878; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7] 3879; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc 3880; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 3881; SI-GISEL-NEXT: v_med3_f32 v2, v3, v2, v4 3882; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11] 3883; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 3884; SI-GISEL-NEXT: s_endpgm 3885; 3886; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat6: 3887; VI-SDAG: ; %bb.0: 3888; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 3889; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0 3890; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 3891; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 3892; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6 3893; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 3894; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5 3895; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6 3896; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 3897; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7 3898; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6 3899; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 3900; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc 3901; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 3902; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc 3903; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 3904; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc 3905; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 3906; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 3907; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6 3908; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 3909; VI-SDAG-NEXT: v_med3_f32 v2, v2, v7, v3 3910; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 3911; VI-SDAG-NEXT: s_endpgm 3912; 3913; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat6: 3914; VI-GISEL: ; %bb.0: 3915; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 3916; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0 3917; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 3918; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 3919; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 3920; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 3921; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 3922; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4 3923; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5 3924; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6 3925; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 3926; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6 3927; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7 3928; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6 3929; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 3930; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc 3931; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 3932; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc 3933; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 3934; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc 3935; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 3936; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 3937; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 3938; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 3939; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 3940; VI-GISEL-NEXT: v_med3_f32 v2, v2, v7, v3 3941; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 3942; VI-GISEL-NEXT: s_endpgm 3943; 3944; GFX9-LABEL: v_test_global_nnans_med3_f32_pat6: 3945; GFX9: ; %bb.0: 3946; GFX9-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 3947; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3948; GFX9-NEXT: s_waitcnt lgkmcnt(0) 3949; GFX9-NEXT: global_load_dword v1, v0, s[10:11] glc 3950; GFX9-NEXT: s_waitcnt vmcnt(0) 3951; GFX9-NEXT: global_load_dword v2, v0, s[12:13] glc 3952; GFX9-NEXT: s_waitcnt vmcnt(0) 3953; GFX9-NEXT: global_load_dword v3, v0, s[14:15] glc 3954; GFX9-NEXT: s_waitcnt vmcnt(0) 3955; GFX9-NEXT: v_med3_f32 v1, v2, v1, v3 3956; GFX9-NEXT: global_store_dword v0, v1, s[8:9] 3957; GFX9-NEXT: s_endpgm 3958; 3959; GFX11-LABEL: v_test_global_nnans_med3_f32_pat6: 3960; GFX11: ; %bb.0: 3961; GFX11-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 3962; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 3963; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 3964; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3965; GFX11-NEXT: s_waitcnt lgkmcnt(0) 3966; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc 3967; GFX11-NEXT: s_waitcnt vmcnt(0) 3968; GFX11-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc 3969; GFX11-NEXT: s_waitcnt vmcnt(0) 3970; GFX11-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc 3971; GFX11-NEXT: s_waitcnt vmcnt(0) 3972; GFX11-NEXT: v_med3_f32 v1, v2, v1, v3 3973; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 3974; GFX11-NEXT: s_endpgm 3975 %tid = call i32 @llvm.amdgcn.workitem.id.x() 3976 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid 3977 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid 3978 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid 3979 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid 3980 %a = load volatile float, ptr addrspace(1) %gep0 3981 %b = load volatile float, ptr addrspace(1) %gep1 3982 %c = load volatile float, ptr addrspace(1) %gep2 3983 %tmp0 = call float @llvm.minnum.f32(float %b, float %a) 3984 %tmp1 = call float @llvm.maxnum.f32(float %a, float %b) 3985 %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1) 3986 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2) 3987 store float %med3, ptr addrspace(1) %outgep 3988 ret void 3989} 3990 3991define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat7(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 { 3992; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat7: 3993; SI-SDAG: ; %bb.0: 3994; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 3995; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000 3996; SI-SDAG-NEXT: s_mov_b32 s10, 0 3997; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 3998; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 3999; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11] 4000; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11] 4001; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 4002; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3] 4003; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5] 4004; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7] 4005; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 4006; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 4007; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc 4008; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 4009; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc 4010; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 4011; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11] 4012; SI-SDAG-NEXT: v_med3_f32 v2, v3, v2, v4 4013; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 4014; SI-SDAG-NEXT: s_endpgm 4015; 4016; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat7: 4017; SI-GISEL: ; %bb.0: 4018; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 4019; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 4020; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 4021; SI-GISEL-NEXT: s_mov_b32 s10, 0 4022; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000 4023; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 4024; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3] 4025; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 4026; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 4027; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5] 4028; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc 4029; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 4030; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7] 4031; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc 4032; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 4033; SI-GISEL-NEXT: v_med3_f32 v2, v3, v2, v4 4034; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11] 4035; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 4036; SI-GISEL-NEXT: s_endpgm 4037; 4038; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat7: 4039; VI-SDAG: ; %bb.0: 4040; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 4041; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0 4042; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 4043; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 4044; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6 4045; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 4046; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5 4047; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6 4048; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 4049; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7 4050; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6 4051; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 4052; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc 4053; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 4054; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc 4055; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 4056; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc 4057; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 4058; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 4059; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6 4060; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 4061; VI-SDAG-NEXT: v_med3_f32 v2, v2, v7, v3 4062; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 4063; VI-SDAG-NEXT: s_endpgm 4064; 4065; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat7: 4066; VI-GISEL: ; %bb.0: 4067; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 4068; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0 4069; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 4070; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 4071; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 4072; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 4073; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 4074; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4 4075; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5 4076; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6 4077; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 4078; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6 4079; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7 4080; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6 4081; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 4082; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc 4083; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 4084; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc 4085; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 4086; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc 4087; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 4088; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 4089; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 4090; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 4091; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 4092; VI-GISEL-NEXT: v_med3_f32 v2, v2, v7, v3 4093; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 4094; VI-GISEL-NEXT: s_endpgm 4095; 4096; GFX9-LABEL: v_test_global_nnans_med3_f32_pat7: 4097; GFX9: ; %bb.0: 4098; GFX9-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 4099; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 4100; GFX9-NEXT: s_waitcnt lgkmcnt(0) 4101; GFX9-NEXT: global_load_dword v1, v0, s[10:11] glc 4102; GFX9-NEXT: s_waitcnt vmcnt(0) 4103; GFX9-NEXT: global_load_dword v2, v0, s[12:13] glc 4104; GFX9-NEXT: s_waitcnt vmcnt(0) 4105; GFX9-NEXT: global_load_dword v3, v0, s[14:15] glc 4106; GFX9-NEXT: s_waitcnt vmcnt(0) 4107; GFX9-NEXT: v_med3_f32 v1, v2, v1, v3 4108; GFX9-NEXT: global_store_dword v0, v1, s[8:9] 4109; GFX9-NEXT: s_endpgm 4110; 4111; GFX11-LABEL: v_test_global_nnans_med3_f32_pat7: 4112; GFX11: ; %bb.0: 4113; GFX11-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 4114; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 4115; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 4116; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 4117; GFX11-NEXT: s_waitcnt lgkmcnt(0) 4118; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc 4119; GFX11-NEXT: s_waitcnt vmcnt(0) 4120; GFX11-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc 4121; GFX11-NEXT: s_waitcnt vmcnt(0) 4122; GFX11-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc 4123; GFX11-NEXT: s_waitcnt vmcnt(0) 4124; GFX11-NEXT: v_med3_f32 v1, v2, v1, v3 4125; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 4126; GFX11-NEXT: s_endpgm 4127 %tid = call i32 @llvm.amdgcn.workitem.id.x() 4128 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid 4129 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid 4130 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid 4131 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid 4132 %a = load volatile float, ptr addrspace(1) %gep0 4133 %b = load volatile float, ptr addrspace(1) %gep1 4134 %c = load volatile float, ptr addrspace(1) %gep2 4135 %tmp0 = call float @llvm.minnum.f32(float %b, float %a) 4136 %tmp1 = call float @llvm.maxnum.f32(float %b, float %a) 4137 %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1) 4138 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2) 4139 store float %med3, ptr addrspace(1) %outgep 4140 ret void 4141} 4142 4143define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat8(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 { 4144; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat8: 4145; SI-SDAG: ; %bb.0: 4146; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 4147; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000 4148; SI-SDAG-NEXT: s_mov_b32 s10, 0 4149; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 4150; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 4151; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11] 4152; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11] 4153; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 4154; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3] 4155; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5] 4156; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7] 4157; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 4158; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 4159; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc 4160; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 4161; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc 4162; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 4163; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11] 4164; SI-SDAG-NEXT: v_med3_f32 v2, v2, v3, v4 4165; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 4166; SI-SDAG-NEXT: s_endpgm 4167; 4168; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat8: 4169; SI-GISEL: ; %bb.0: 4170; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 4171; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 4172; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 4173; SI-GISEL-NEXT: s_mov_b32 s10, 0 4174; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000 4175; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 4176; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3] 4177; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 4178; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 4179; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5] 4180; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc 4181; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 4182; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7] 4183; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc 4184; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 4185; SI-GISEL-NEXT: v_med3_f32 v2, v2, v3, v4 4186; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11] 4187; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 4188; SI-GISEL-NEXT: s_endpgm 4189; 4190; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat8: 4191; VI-SDAG: ; %bb.0: 4192; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 4193; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0 4194; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 4195; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 4196; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6 4197; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 4198; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5 4199; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6 4200; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 4201; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7 4202; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6 4203; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 4204; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc 4205; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 4206; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc 4207; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 4208; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc 4209; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 4210; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 4211; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6 4212; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 4213; VI-SDAG-NEXT: v_med3_f32 v2, v7, v2, v3 4214; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 4215; VI-SDAG-NEXT: s_endpgm 4216; 4217; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat8: 4218; VI-GISEL: ; %bb.0: 4219; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 4220; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0 4221; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 4222; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 4223; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 4224; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 4225; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 4226; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4 4227; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5 4228; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6 4229; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 4230; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6 4231; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7 4232; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6 4233; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 4234; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc 4235; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 4236; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc 4237; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 4238; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc 4239; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 4240; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 4241; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 4242; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 4243; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 4244; VI-GISEL-NEXT: v_med3_f32 v2, v7, v2, v3 4245; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 4246; VI-GISEL-NEXT: s_endpgm 4247; 4248; GFX9-LABEL: v_test_global_nnans_med3_f32_pat8: 4249; GFX9: ; %bb.0: 4250; GFX9-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 4251; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 4252; GFX9-NEXT: s_waitcnt lgkmcnt(0) 4253; GFX9-NEXT: global_load_dword v1, v0, s[10:11] glc 4254; GFX9-NEXT: s_waitcnt vmcnt(0) 4255; GFX9-NEXT: global_load_dword v2, v0, s[12:13] glc 4256; GFX9-NEXT: s_waitcnt vmcnt(0) 4257; GFX9-NEXT: global_load_dword v3, v0, s[14:15] glc 4258; GFX9-NEXT: s_waitcnt vmcnt(0) 4259; GFX9-NEXT: v_med3_f32 v1, v1, v2, v3 4260; GFX9-NEXT: global_store_dword v0, v1, s[8:9] 4261; GFX9-NEXT: s_endpgm 4262; 4263; GFX11-LABEL: v_test_global_nnans_med3_f32_pat8: 4264; GFX11: ; %bb.0: 4265; GFX11-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 4266; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 4267; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 4268; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 4269; GFX11-NEXT: s_waitcnt lgkmcnt(0) 4270; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc 4271; GFX11-NEXT: s_waitcnt vmcnt(0) 4272; GFX11-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc 4273; GFX11-NEXT: s_waitcnt vmcnt(0) 4274; GFX11-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc 4275; GFX11-NEXT: s_waitcnt vmcnt(0) 4276; GFX11-NEXT: v_med3_f32 v1, v1, v2, v3 4277; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 4278; GFX11-NEXT: s_endpgm 4279 %tid = call i32 @llvm.amdgcn.workitem.id.x() 4280 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid 4281 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid 4282 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid 4283 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid 4284 %a = load volatile float, ptr addrspace(1) %gep0 4285 %b = load volatile float, ptr addrspace(1) %gep1 4286 %c = load volatile float, ptr addrspace(1) %gep2 4287 %tmp0 = call float @llvm.minnum.f32(float %a, float %b) 4288 %tmp1 = call float @llvm.maxnum.f32(float %a, float %b) 4289 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c) 4290 %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0) 4291 store float %med3, ptr addrspace(1) %outgep 4292 ret void 4293} 4294 4295define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat9(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 { 4296; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat9: 4297; SI-SDAG: ; %bb.0: 4298; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 4299; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000 4300; SI-SDAG-NEXT: s_mov_b32 s10, 0 4301; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 4302; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 4303; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11] 4304; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11] 4305; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 4306; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3] 4307; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5] 4308; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7] 4309; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 4310; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 4311; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc 4312; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 4313; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc 4314; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 4315; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11] 4316; SI-SDAG-NEXT: v_med3_f32 v2, v3, v2, v4 4317; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 4318; SI-SDAG-NEXT: s_endpgm 4319; 4320; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat9: 4321; SI-GISEL: ; %bb.0: 4322; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 4323; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 4324; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 4325; SI-GISEL-NEXT: s_mov_b32 s10, 0 4326; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000 4327; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 4328; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3] 4329; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 4330; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 4331; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5] 4332; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc 4333; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 4334; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7] 4335; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc 4336; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 4337; SI-GISEL-NEXT: v_med3_f32 v2, v3, v2, v4 4338; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11] 4339; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 4340; SI-GISEL-NEXT: s_endpgm 4341; 4342; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat9: 4343; VI-SDAG: ; %bb.0: 4344; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 4345; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0 4346; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 4347; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 4348; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6 4349; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 4350; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5 4351; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6 4352; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 4353; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7 4354; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6 4355; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 4356; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc 4357; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 4358; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc 4359; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 4360; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc 4361; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 4362; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 4363; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6 4364; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 4365; VI-SDAG-NEXT: v_med3_f32 v2, v2, v7, v3 4366; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 4367; VI-SDAG-NEXT: s_endpgm 4368; 4369; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat9: 4370; VI-GISEL: ; %bb.0: 4371; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 4372; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0 4373; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 4374; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 4375; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 4376; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 4377; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 4378; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4 4379; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5 4380; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6 4381; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 4382; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6 4383; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7 4384; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6 4385; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 4386; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc 4387; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 4388; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc 4389; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 4390; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc 4391; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 4392; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 4393; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 4394; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 4395; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 4396; VI-GISEL-NEXT: v_med3_f32 v2, v2, v7, v3 4397; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 4398; VI-GISEL-NEXT: s_endpgm 4399; 4400; GFX9-LABEL: v_test_global_nnans_med3_f32_pat9: 4401; GFX9: ; %bb.0: 4402; GFX9-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 4403; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 4404; GFX9-NEXT: s_waitcnt lgkmcnt(0) 4405; GFX9-NEXT: global_load_dword v1, v0, s[10:11] glc 4406; GFX9-NEXT: s_waitcnt vmcnt(0) 4407; GFX9-NEXT: global_load_dword v2, v0, s[12:13] glc 4408; GFX9-NEXT: s_waitcnt vmcnt(0) 4409; GFX9-NEXT: global_load_dword v3, v0, s[14:15] glc 4410; GFX9-NEXT: s_waitcnt vmcnt(0) 4411; GFX9-NEXT: v_med3_f32 v1, v2, v1, v3 4412; GFX9-NEXT: global_store_dword v0, v1, s[8:9] 4413; GFX9-NEXT: s_endpgm 4414; 4415; GFX11-LABEL: v_test_global_nnans_med3_f32_pat9: 4416; GFX11: ; %bb.0: 4417; GFX11-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 4418; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 4419; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 4420; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 4421; GFX11-NEXT: s_waitcnt lgkmcnt(0) 4422; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc 4423; GFX11-NEXT: s_waitcnt vmcnt(0) 4424; GFX11-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc 4425; GFX11-NEXT: s_waitcnt vmcnt(0) 4426; GFX11-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc 4427; GFX11-NEXT: s_waitcnt vmcnt(0) 4428; GFX11-NEXT: v_med3_f32 v1, v2, v1, v3 4429; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 4430; GFX11-NEXT: s_endpgm 4431 %tid = call i32 @llvm.amdgcn.workitem.id.x() 4432 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid 4433 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid 4434 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid 4435 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid 4436 %a = load volatile float, ptr addrspace(1) %gep0 4437 %b = load volatile float, ptr addrspace(1) %gep1 4438 %c = load volatile float, ptr addrspace(1) %gep2 4439 %tmp0 = call float @llvm.minnum.f32(float %a, float %b) 4440 %tmp1 = call float @llvm.maxnum.f32(float %b, float %a) 4441 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c) 4442 %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0) 4443 store float %med3, ptr addrspace(1) %outgep 4444 ret void 4445} 4446 4447define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat10(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 { 4448; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat10: 4449; SI-SDAG: ; %bb.0: 4450; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 4451; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000 4452; SI-SDAG-NEXT: s_mov_b32 s10, 0 4453; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 4454; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 4455; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11] 4456; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11] 4457; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 4458; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3] 4459; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5] 4460; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7] 4461; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 4462; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 4463; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc 4464; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 4465; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc 4466; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 4467; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11] 4468; SI-SDAG-NEXT: v_med3_f32 v2, v2, v3, v4 4469; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 4470; SI-SDAG-NEXT: s_endpgm 4471; 4472; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat10: 4473; SI-GISEL: ; %bb.0: 4474; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 4475; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 4476; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 4477; SI-GISEL-NEXT: s_mov_b32 s10, 0 4478; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000 4479; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 4480; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3] 4481; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 4482; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 4483; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5] 4484; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc 4485; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 4486; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7] 4487; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc 4488; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 4489; SI-GISEL-NEXT: v_med3_f32 v2, v2, v3, v4 4490; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11] 4491; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 4492; SI-GISEL-NEXT: s_endpgm 4493; 4494; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat10: 4495; VI-SDAG: ; %bb.0: 4496; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 4497; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0 4498; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 4499; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 4500; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6 4501; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 4502; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5 4503; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6 4504; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 4505; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7 4506; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6 4507; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 4508; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc 4509; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 4510; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc 4511; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 4512; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc 4513; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 4514; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 4515; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6 4516; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 4517; VI-SDAG-NEXT: v_med3_f32 v2, v7, v2, v3 4518; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 4519; VI-SDAG-NEXT: s_endpgm 4520; 4521; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat10: 4522; VI-GISEL: ; %bb.0: 4523; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 4524; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0 4525; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 4526; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 4527; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 4528; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 4529; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 4530; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4 4531; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5 4532; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6 4533; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 4534; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6 4535; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7 4536; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6 4537; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 4538; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc 4539; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 4540; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc 4541; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 4542; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc 4543; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 4544; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 4545; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 4546; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 4547; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 4548; VI-GISEL-NEXT: v_med3_f32 v2, v7, v2, v3 4549; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 4550; VI-GISEL-NEXT: s_endpgm 4551; 4552; GFX9-LABEL: v_test_global_nnans_med3_f32_pat10: 4553; GFX9: ; %bb.0: 4554; GFX9-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 4555; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 4556; GFX9-NEXT: s_waitcnt lgkmcnt(0) 4557; GFX9-NEXT: global_load_dword v1, v0, s[10:11] glc 4558; GFX9-NEXT: s_waitcnt vmcnt(0) 4559; GFX9-NEXT: global_load_dword v2, v0, s[12:13] glc 4560; GFX9-NEXT: s_waitcnt vmcnt(0) 4561; GFX9-NEXT: global_load_dword v3, v0, s[14:15] glc 4562; GFX9-NEXT: s_waitcnt vmcnt(0) 4563; GFX9-NEXT: v_med3_f32 v1, v1, v2, v3 4564; GFX9-NEXT: global_store_dword v0, v1, s[8:9] 4565; GFX9-NEXT: s_endpgm 4566; 4567; GFX11-LABEL: v_test_global_nnans_med3_f32_pat10: 4568; GFX11: ; %bb.0: 4569; GFX11-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 4570; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 4571; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 4572; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 4573; GFX11-NEXT: s_waitcnt lgkmcnt(0) 4574; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc 4575; GFX11-NEXT: s_waitcnt vmcnt(0) 4576; GFX11-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc 4577; GFX11-NEXT: s_waitcnt vmcnt(0) 4578; GFX11-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc 4579; GFX11-NEXT: s_waitcnt vmcnt(0) 4580; GFX11-NEXT: v_med3_f32 v1, v1, v2, v3 4581; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 4582; GFX11-NEXT: s_endpgm 4583 %tid = call i32 @llvm.amdgcn.workitem.id.x() 4584 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid 4585 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid 4586 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid 4587 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid 4588 %a = load volatile float, ptr addrspace(1) %gep0 4589 %b = load volatile float, ptr addrspace(1) %gep1 4590 %c = load volatile float, ptr addrspace(1) %gep2 4591 %tmp0 = call float @llvm.minnum.f32(float %a, float %b) 4592 %tmp1 = call float @llvm.maxnum.f32(float %a, float %b) 4593 %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1) 4594 %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0) 4595 store float %med3, ptr addrspace(1) %outgep 4596 ret void 4597} 4598 4599define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat11(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 { 4600; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat11: 4601; SI-SDAG: ; %bb.0: 4602; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 4603; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000 4604; SI-SDAG-NEXT: s_mov_b32 s10, 0 4605; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 4606; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 4607; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11] 4608; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11] 4609; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 4610; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3] 4611; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5] 4612; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7] 4613; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 4614; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 4615; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc 4616; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 4617; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc 4618; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 4619; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11] 4620; SI-SDAG-NEXT: v_med3_f32 v2, v3, v2, v4 4621; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 4622; SI-SDAG-NEXT: s_endpgm 4623; 4624; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat11: 4625; SI-GISEL: ; %bb.0: 4626; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 4627; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 4628; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 4629; SI-GISEL-NEXT: s_mov_b32 s10, 0 4630; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000 4631; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 4632; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3] 4633; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 4634; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 4635; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5] 4636; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc 4637; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 4638; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7] 4639; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc 4640; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 4641; SI-GISEL-NEXT: v_med3_f32 v2, v3, v2, v4 4642; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11] 4643; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 4644; SI-GISEL-NEXT: s_endpgm 4645; 4646; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat11: 4647; VI-SDAG: ; %bb.0: 4648; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 4649; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0 4650; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 4651; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 4652; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6 4653; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 4654; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5 4655; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6 4656; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 4657; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7 4658; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6 4659; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 4660; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc 4661; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 4662; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc 4663; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 4664; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc 4665; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 4666; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 4667; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6 4668; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 4669; VI-SDAG-NEXT: v_med3_f32 v2, v2, v7, v3 4670; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 4671; VI-SDAG-NEXT: s_endpgm 4672; 4673; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat11: 4674; VI-GISEL: ; %bb.0: 4675; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 4676; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0 4677; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 4678; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 4679; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 4680; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 4681; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 4682; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4 4683; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5 4684; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6 4685; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 4686; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6 4687; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7 4688; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6 4689; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 4690; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc 4691; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 4692; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc 4693; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 4694; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc 4695; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 4696; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 4697; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 4698; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 4699; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 4700; VI-GISEL-NEXT: v_med3_f32 v2, v2, v7, v3 4701; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 4702; VI-GISEL-NEXT: s_endpgm 4703; 4704; GFX9-LABEL: v_test_global_nnans_med3_f32_pat11: 4705; GFX9: ; %bb.0: 4706; GFX9-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 4707; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 4708; GFX9-NEXT: s_waitcnt lgkmcnt(0) 4709; GFX9-NEXT: global_load_dword v1, v0, s[10:11] glc 4710; GFX9-NEXT: s_waitcnt vmcnt(0) 4711; GFX9-NEXT: global_load_dword v2, v0, s[12:13] glc 4712; GFX9-NEXT: s_waitcnt vmcnt(0) 4713; GFX9-NEXT: global_load_dword v3, v0, s[14:15] glc 4714; GFX9-NEXT: s_waitcnt vmcnt(0) 4715; GFX9-NEXT: v_med3_f32 v1, v2, v1, v3 4716; GFX9-NEXT: global_store_dword v0, v1, s[8:9] 4717; GFX9-NEXT: s_endpgm 4718; 4719; GFX11-LABEL: v_test_global_nnans_med3_f32_pat11: 4720; GFX11: ; %bb.0: 4721; GFX11-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 4722; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 4723; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 4724; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 4725; GFX11-NEXT: s_waitcnt lgkmcnt(0) 4726; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc 4727; GFX11-NEXT: s_waitcnt vmcnt(0) 4728; GFX11-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc 4729; GFX11-NEXT: s_waitcnt vmcnt(0) 4730; GFX11-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc 4731; GFX11-NEXT: s_waitcnt vmcnt(0) 4732; GFX11-NEXT: v_med3_f32 v1, v2, v1, v3 4733; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 4734; GFX11-NEXT: s_endpgm 4735 %tid = call i32 @llvm.amdgcn.workitem.id.x() 4736 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid 4737 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid 4738 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid 4739 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid 4740 %a = load volatile float, ptr addrspace(1) %gep0 4741 %b = load volatile float, ptr addrspace(1) %gep1 4742 %c = load volatile float, ptr addrspace(1) %gep2 4743 %tmp0 = call float @llvm.minnum.f32(float %a, float %b) 4744 %tmp1 = call float @llvm.maxnum.f32(float %b, float %a) 4745 %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1) 4746 %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0) 4747 store float %med3, ptr addrspace(1) %outgep 4748 ret void 4749} 4750 4751define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat12(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 { 4752; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat12: 4753; SI-SDAG: ; %bb.0: 4754; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 4755; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000 4756; SI-SDAG-NEXT: s_mov_b32 s10, 0 4757; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 4758; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 4759; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11] 4760; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11] 4761; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 4762; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3] 4763; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5] 4764; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7] 4765; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 4766; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 4767; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc 4768; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 4769; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc 4770; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 4771; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11] 4772; SI-SDAG-NEXT: v_med3_f32 v2, v3, v2, v4 4773; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 4774; SI-SDAG-NEXT: s_endpgm 4775; 4776; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat12: 4777; SI-GISEL: ; %bb.0: 4778; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 4779; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 4780; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 4781; SI-GISEL-NEXT: s_mov_b32 s10, 0 4782; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000 4783; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 4784; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3] 4785; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 4786; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 4787; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5] 4788; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc 4789; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 4790; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7] 4791; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc 4792; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 4793; SI-GISEL-NEXT: v_med3_f32 v2, v3, v2, v4 4794; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11] 4795; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 4796; SI-GISEL-NEXT: s_endpgm 4797; 4798; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat12: 4799; VI-SDAG: ; %bb.0: 4800; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 4801; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0 4802; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 4803; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 4804; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6 4805; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 4806; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5 4807; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6 4808; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 4809; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7 4810; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6 4811; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 4812; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc 4813; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 4814; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc 4815; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 4816; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc 4817; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 4818; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 4819; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6 4820; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 4821; VI-SDAG-NEXT: v_med3_f32 v2, v2, v7, v3 4822; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 4823; VI-SDAG-NEXT: s_endpgm 4824; 4825; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat12: 4826; VI-GISEL: ; %bb.0: 4827; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 4828; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0 4829; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 4830; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 4831; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 4832; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 4833; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 4834; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4 4835; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5 4836; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6 4837; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 4838; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6 4839; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7 4840; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6 4841; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 4842; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc 4843; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 4844; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc 4845; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 4846; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc 4847; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 4848; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 4849; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 4850; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 4851; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 4852; VI-GISEL-NEXT: v_med3_f32 v2, v2, v7, v3 4853; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 4854; VI-GISEL-NEXT: s_endpgm 4855; 4856; GFX9-LABEL: v_test_global_nnans_med3_f32_pat12: 4857; GFX9: ; %bb.0: 4858; GFX9-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 4859; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 4860; GFX9-NEXT: s_waitcnt lgkmcnt(0) 4861; GFX9-NEXT: global_load_dword v1, v0, s[10:11] glc 4862; GFX9-NEXT: s_waitcnt vmcnt(0) 4863; GFX9-NEXT: global_load_dword v2, v0, s[12:13] glc 4864; GFX9-NEXT: s_waitcnt vmcnt(0) 4865; GFX9-NEXT: global_load_dword v3, v0, s[14:15] glc 4866; GFX9-NEXT: s_waitcnt vmcnt(0) 4867; GFX9-NEXT: v_med3_f32 v1, v2, v1, v3 4868; GFX9-NEXT: global_store_dword v0, v1, s[8:9] 4869; GFX9-NEXT: s_endpgm 4870; 4871; GFX11-LABEL: v_test_global_nnans_med3_f32_pat12: 4872; GFX11: ; %bb.0: 4873; GFX11-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 4874; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 4875; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 4876; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 4877; GFX11-NEXT: s_waitcnt lgkmcnt(0) 4878; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc 4879; GFX11-NEXT: s_waitcnt vmcnt(0) 4880; GFX11-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc 4881; GFX11-NEXT: s_waitcnt vmcnt(0) 4882; GFX11-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc 4883; GFX11-NEXT: s_waitcnt vmcnt(0) 4884; GFX11-NEXT: v_med3_f32 v1, v2, v1, v3 4885; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 4886; GFX11-NEXT: s_endpgm 4887 %tid = call i32 @llvm.amdgcn.workitem.id.x() 4888 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid 4889 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid 4890 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid 4891 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid 4892 %a = load volatile float, ptr addrspace(1) %gep0 4893 %b = load volatile float, ptr addrspace(1) %gep1 4894 %c = load volatile float, ptr addrspace(1) %gep2 4895 %tmp0 = call float @llvm.minnum.f32(float %b, float %a) 4896 %tmp1 = call float @llvm.maxnum.f32(float %b, float %a) 4897 %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1) 4898 %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0) 4899 store float %med3, ptr addrspace(1) %outgep 4900 ret void 4901} 4902 4903define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat13(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 { 4904; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat13: 4905; SI-SDAG: ; %bb.0: 4906; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 4907; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000 4908; SI-SDAG-NEXT: s_mov_b32 s10, 0 4909; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 4910; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 4911; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11] 4912; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11] 4913; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 4914; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3] 4915; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5] 4916; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7] 4917; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 4918; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 4919; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc 4920; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 4921; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc 4922; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 4923; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11] 4924; SI-SDAG-NEXT: v_med3_f32 v2, v3, v2, v4 4925; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 4926; SI-SDAG-NEXT: s_endpgm 4927; 4928; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat13: 4929; SI-GISEL: ; %bb.0: 4930; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 4931; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 4932; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 4933; SI-GISEL-NEXT: s_mov_b32 s10, 0 4934; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000 4935; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 4936; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3] 4937; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 4938; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 4939; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5] 4940; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc 4941; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 4942; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7] 4943; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc 4944; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 4945; SI-GISEL-NEXT: v_med3_f32 v2, v3, v2, v4 4946; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11] 4947; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 4948; SI-GISEL-NEXT: s_endpgm 4949; 4950; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat13: 4951; VI-SDAG: ; %bb.0: 4952; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 4953; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0 4954; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 4955; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 4956; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6 4957; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 4958; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5 4959; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6 4960; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 4961; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7 4962; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6 4963; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 4964; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc 4965; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 4966; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc 4967; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 4968; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc 4969; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 4970; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 4971; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6 4972; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 4973; VI-SDAG-NEXT: v_med3_f32 v2, v2, v7, v3 4974; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 4975; VI-SDAG-NEXT: s_endpgm 4976; 4977; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat13: 4978; VI-GISEL: ; %bb.0: 4979; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 4980; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0 4981; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 4982; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 4983; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 4984; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 4985; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 4986; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4 4987; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5 4988; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6 4989; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 4990; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6 4991; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7 4992; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6 4993; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 4994; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc 4995; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 4996; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc 4997; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 4998; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc 4999; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 5000; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 5001; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 5002; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 5003; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 5004; VI-GISEL-NEXT: v_med3_f32 v2, v2, v7, v3 5005; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 5006; VI-GISEL-NEXT: s_endpgm 5007; 5008; GFX9-LABEL: v_test_global_nnans_med3_f32_pat13: 5009; GFX9: ; %bb.0: 5010; GFX9-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 5011; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 5012; GFX9-NEXT: s_waitcnt lgkmcnt(0) 5013; GFX9-NEXT: global_load_dword v1, v0, s[10:11] glc 5014; GFX9-NEXT: s_waitcnt vmcnt(0) 5015; GFX9-NEXT: global_load_dword v2, v0, s[12:13] glc 5016; GFX9-NEXT: s_waitcnt vmcnt(0) 5017; GFX9-NEXT: global_load_dword v3, v0, s[14:15] glc 5018; GFX9-NEXT: s_waitcnt vmcnt(0) 5019; GFX9-NEXT: v_med3_f32 v1, v2, v1, v3 5020; GFX9-NEXT: global_store_dword v0, v1, s[8:9] 5021; GFX9-NEXT: s_endpgm 5022; 5023; GFX11-LABEL: v_test_global_nnans_med3_f32_pat13: 5024; GFX11: ; %bb.0: 5025; GFX11-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 5026; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 5027; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 5028; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 5029; GFX11-NEXT: s_waitcnt lgkmcnt(0) 5030; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc 5031; GFX11-NEXT: s_waitcnt vmcnt(0) 5032; GFX11-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc 5033; GFX11-NEXT: s_waitcnt vmcnt(0) 5034; GFX11-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc 5035; GFX11-NEXT: s_waitcnt vmcnt(0) 5036; GFX11-NEXT: v_med3_f32 v1, v2, v1, v3 5037; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 5038; GFX11-NEXT: s_endpgm 5039 %tid = call i32 @llvm.amdgcn.workitem.id.x() 5040 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid 5041 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid 5042 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid 5043 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid 5044 %a = load volatile float, ptr addrspace(1) %gep0 5045 %b = load volatile float, ptr addrspace(1) %gep1 5046 %c = load volatile float, ptr addrspace(1) %gep2 5047 %tmp0 = call float @llvm.minnum.f32(float %b, float %a) 5048 %tmp1 = call float @llvm.maxnum.f32(float %b, float %a) 5049 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c) 5050 %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0) 5051 store float %med3, ptr addrspace(1) %outgep 5052 ret void 5053} 5054 5055define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat14(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 { 5056; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat14: 5057; SI-SDAG: ; %bb.0: 5058; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 5059; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000 5060; SI-SDAG-NEXT: s_mov_b32 s10, 0 5061; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 5062; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 5063; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11] 5064; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11] 5065; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 5066; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3] 5067; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5] 5068; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7] 5069; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 5070; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 5071; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc 5072; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 5073; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc 5074; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 5075; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11] 5076; SI-SDAG-NEXT: v_med3_f32 v2, v2, v3, v4 5077; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 5078; SI-SDAG-NEXT: s_endpgm 5079; 5080; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat14: 5081; SI-GISEL: ; %bb.0: 5082; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 5083; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 5084; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 5085; SI-GISEL-NEXT: s_mov_b32 s10, 0 5086; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000 5087; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 5088; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3] 5089; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 5090; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 5091; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5] 5092; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc 5093; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 5094; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7] 5095; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc 5096; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 5097; SI-GISEL-NEXT: v_med3_f32 v2, v2, v3, v4 5098; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11] 5099; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 5100; SI-GISEL-NEXT: s_endpgm 5101; 5102; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat14: 5103; VI-SDAG: ; %bb.0: 5104; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 5105; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0 5106; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 5107; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 5108; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6 5109; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 5110; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5 5111; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6 5112; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 5113; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7 5114; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6 5115; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 5116; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc 5117; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 5118; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc 5119; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 5120; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc 5121; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 5122; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 5123; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6 5124; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 5125; VI-SDAG-NEXT: v_med3_f32 v2, v7, v2, v3 5126; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 5127; VI-SDAG-NEXT: s_endpgm 5128; 5129; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat14: 5130; VI-GISEL: ; %bb.0: 5131; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 5132; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0 5133; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 5134; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 5135; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 5136; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 5137; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 5138; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4 5139; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5 5140; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6 5141; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 5142; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6 5143; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7 5144; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6 5145; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 5146; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc 5147; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 5148; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc 5149; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 5150; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc 5151; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 5152; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 5153; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 5154; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 5155; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 5156; VI-GISEL-NEXT: v_med3_f32 v2, v7, v2, v3 5157; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 5158; VI-GISEL-NEXT: s_endpgm 5159; 5160; GFX9-LABEL: v_test_global_nnans_med3_f32_pat14: 5161; GFX9: ; %bb.0: 5162; GFX9-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 5163; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 5164; GFX9-NEXT: s_waitcnt lgkmcnt(0) 5165; GFX9-NEXT: global_load_dword v1, v0, s[10:11] glc 5166; GFX9-NEXT: s_waitcnt vmcnt(0) 5167; GFX9-NEXT: global_load_dword v2, v0, s[12:13] glc 5168; GFX9-NEXT: s_waitcnt vmcnt(0) 5169; GFX9-NEXT: global_load_dword v3, v0, s[14:15] glc 5170; GFX9-NEXT: s_waitcnt vmcnt(0) 5171; GFX9-NEXT: v_med3_f32 v1, v1, v2, v3 5172; GFX9-NEXT: global_store_dword v0, v1, s[8:9] 5173; GFX9-NEXT: s_endpgm 5174; 5175; GFX11-LABEL: v_test_global_nnans_med3_f32_pat14: 5176; GFX11: ; %bb.0: 5177; GFX11-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 5178; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 5179; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 5180; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 5181; GFX11-NEXT: s_waitcnt lgkmcnt(0) 5182; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc 5183; GFX11-NEXT: s_waitcnt vmcnt(0) 5184; GFX11-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc 5185; GFX11-NEXT: s_waitcnt vmcnt(0) 5186; GFX11-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc 5187; GFX11-NEXT: s_waitcnt vmcnt(0) 5188; GFX11-NEXT: v_med3_f32 v1, v1, v2, v3 5189; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 5190; GFX11-NEXT: s_endpgm 5191 %tid = call i32 @llvm.amdgcn.workitem.id.x() 5192 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid 5193 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid 5194 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid 5195 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid 5196 %a = load volatile float, ptr addrspace(1) %gep0 5197 %b = load volatile float, ptr addrspace(1) %gep1 5198 %c = load volatile float, ptr addrspace(1) %gep2 5199 %tmp0 = call float @llvm.minnum.f32(float %b, float %a) 5200 %tmp1 = call float @llvm.maxnum.f32(float %a, float %b) 5201 %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1) 5202 %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0) 5203 store float %med3, ptr addrspace(1) %outgep 5204 ret void 5205} 5206 5207define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat15(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 { 5208; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat15: 5209; SI-SDAG: ; %bb.0: 5210; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 5211; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000 5212; SI-SDAG-NEXT: s_mov_b32 s10, 0 5213; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 5214; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 5215; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11] 5216; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11] 5217; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 5218; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3] 5219; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5] 5220; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7] 5221; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 5222; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 5223; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc 5224; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 5225; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc 5226; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 5227; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11] 5228; SI-SDAG-NEXT: v_med3_f32 v2, v3, v2, v4 5229; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 5230; SI-SDAG-NEXT: s_endpgm 5231; 5232; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat15: 5233; SI-GISEL: ; %bb.0: 5234; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 5235; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 5236; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 5237; SI-GISEL-NEXT: s_mov_b32 s10, 0 5238; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000 5239; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 5240; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3] 5241; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 5242; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 5243; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5] 5244; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc 5245; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 5246; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7] 5247; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc 5248; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 5249; SI-GISEL-NEXT: v_med3_f32 v2, v3, v2, v4 5250; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11] 5251; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 5252; SI-GISEL-NEXT: s_endpgm 5253; 5254; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat15: 5255; VI-SDAG: ; %bb.0: 5256; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 5257; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0 5258; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 5259; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 5260; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6 5261; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 5262; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5 5263; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6 5264; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 5265; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7 5266; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6 5267; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 5268; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc 5269; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 5270; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc 5271; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 5272; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc 5273; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 5274; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 5275; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6 5276; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 5277; VI-SDAG-NEXT: v_med3_f32 v2, v2, v7, v3 5278; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 5279; VI-SDAG-NEXT: s_endpgm 5280; 5281; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat15: 5282; VI-GISEL: ; %bb.0: 5283; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 5284; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0 5285; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 5286; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 5287; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 5288; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 5289; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 5290; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4 5291; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5 5292; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6 5293; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 5294; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6 5295; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7 5296; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6 5297; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 5298; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc 5299; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 5300; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc 5301; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 5302; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc 5303; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 5304; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 5305; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 5306; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 5307; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 5308; VI-GISEL-NEXT: v_med3_f32 v2, v2, v7, v3 5309; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 5310; VI-GISEL-NEXT: s_endpgm 5311; 5312; GFX9-LABEL: v_test_global_nnans_med3_f32_pat15: 5313; GFX9: ; %bb.0: 5314; GFX9-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 5315; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 5316; GFX9-NEXT: s_waitcnt lgkmcnt(0) 5317; GFX9-NEXT: global_load_dword v1, v0, s[10:11] glc 5318; GFX9-NEXT: s_waitcnt vmcnt(0) 5319; GFX9-NEXT: global_load_dword v2, v0, s[12:13] glc 5320; GFX9-NEXT: s_waitcnt vmcnt(0) 5321; GFX9-NEXT: global_load_dword v3, v0, s[14:15] glc 5322; GFX9-NEXT: s_waitcnt vmcnt(0) 5323; GFX9-NEXT: v_med3_f32 v1, v2, v1, v3 5324; GFX9-NEXT: global_store_dword v0, v1, s[8:9] 5325; GFX9-NEXT: s_endpgm 5326; 5327; GFX11-LABEL: v_test_global_nnans_med3_f32_pat15: 5328; GFX11: ; %bb.0: 5329; GFX11-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 5330; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 5331; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 5332; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 5333; GFX11-NEXT: s_waitcnt lgkmcnt(0) 5334; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc 5335; GFX11-NEXT: s_waitcnt vmcnt(0) 5336; GFX11-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc 5337; GFX11-NEXT: s_waitcnt vmcnt(0) 5338; GFX11-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc 5339; GFX11-NEXT: s_waitcnt vmcnt(0) 5340; GFX11-NEXT: v_med3_f32 v1, v2, v1, v3 5341; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 5342; GFX11-NEXT: s_endpgm 5343 %tid = call i32 @llvm.amdgcn.workitem.id.x() 5344 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid 5345 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid 5346 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid 5347 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid 5348 %a = load volatile float, ptr addrspace(1) %gep0 5349 %b = load volatile float, ptr addrspace(1) %gep1 5350 %c = load volatile float, ptr addrspace(1) %gep2 5351 %tmp0 = call float @llvm.minnum.f32(float %b, float %a) 5352 %tmp1 = call float @llvm.maxnum.f32(float %b, float %a) 5353 %tmp2 = call float @llvm.minnum.f32(float %c, float %tmp1) 5354 %med3 = call float @llvm.maxnum.f32(float %tmp2, float %tmp0) 5355 store float %med3, ptr addrspace(1) %outgep 5356 ret void 5357} 5358 5359; Also handle `min` at the root: 5360; min(max(x, y), max(min(x, y), z)) 5361 5362define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat16(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 { 5363; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat16: 5364; SI-SDAG: ; %bb.0: 5365; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 5366; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000 5367; SI-SDAG-NEXT: s_mov_b32 s10, 0 5368; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 5369; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 5370; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11] 5371; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11] 5372; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 5373; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3] 5374; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5] 5375; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7] 5376; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 5377; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 5378; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc 5379; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 5380; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc 5381; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 5382; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11] 5383; SI-SDAG-NEXT: v_med3_f32 v2, v2, v3, v4 5384; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 5385; SI-SDAG-NEXT: s_endpgm 5386; 5387; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat16: 5388; SI-GISEL: ; %bb.0: 5389; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 5390; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 5391; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 5392; SI-GISEL-NEXT: s_mov_b32 s10, 0 5393; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000 5394; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 5395; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3] 5396; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 5397; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 5398; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5] 5399; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc 5400; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 5401; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7] 5402; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc 5403; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 5404; SI-GISEL-NEXT: v_med3_f32 v2, v2, v3, v4 5405; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11] 5406; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 5407; SI-GISEL-NEXT: s_endpgm 5408; 5409; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat16: 5410; VI-SDAG: ; %bb.0: 5411; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 5412; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0 5413; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 5414; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 5415; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6 5416; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 5417; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5 5418; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6 5419; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 5420; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7 5421; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6 5422; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 5423; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc 5424; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 5425; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc 5426; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 5427; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc 5428; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 5429; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 5430; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6 5431; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 5432; VI-SDAG-NEXT: v_med3_f32 v2, v7, v2, v3 5433; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 5434; VI-SDAG-NEXT: s_endpgm 5435; 5436; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat16: 5437; VI-GISEL: ; %bb.0: 5438; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 5439; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0 5440; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 5441; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 5442; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 5443; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 5444; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 5445; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4 5446; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5 5447; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6 5448; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 5449; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6 5450; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7 5451; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6 5452; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 5453; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc 5454; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 5455; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc 5456; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 5457; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc 5458; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 5459; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 5460; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 5461; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 5462; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 5463; VI-GISEL-NEXT: v_med3_f32 v2, v7, v2, v3 5464; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 5465; VI-GISEL-NEXT: s_endpgm 5466; 5467; GFX9-LABEL: v_test_global_nnans_med3_f32_pat16: 5468; GFX9: ; %bb.0: 5469; GFX9-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 5470; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 5471; GFX9-NEXT: s_waitcnt lgkmcnt(0) 5472; GFX9-NEXT: global_load_dword v1, v0, s[10:11] glc 5473; GFX9-NEXT: s_waitcnt vmcnt(0) 5474; GFX9-NEXT: global_load_dword v2, v0, s[12:13] glc 5475; GFX9-NEXT: s_waitcnt vmcnt(0) 5476; GFX9-NEXT: global_load_dword v3, v0, s[14:15] glc 5477; GFX9-NEXT: s_waitcnt vmcnt(0) 5478; GFX9-NEXT: v_med3_f32 v1, v1, v2, v3 5479; GFX9-NEXT: global_store_dword v0, v1, s[8:9] 5480; GFX9-NEXT: s_endpgm 5481; 5482; GFX11-LABEL: v_test_global_nnans_med3_f32_pat16: 5483; GFX11: ; %bb.0: 5484; GFX11-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 5485; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 5486; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 5487; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 5488; GFX11-NEXT: s_waitcnt lgkmcnt(0) 5489; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc 5490; GFX11-NEXT: s_waitcnt vmcnt(0) 5491; GFX11-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc 5492; GFX11-NEXT: s_waitcnt vmcnt(0) 5493; GFX11-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc 5494; GFX11-NEXT: s_waitcnt vmcnt(0) 5495; GFX11-NEXT: v_med3_f32 v1, v1, v2, v3 5496; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 5497; GFX11-NEXT: s_endpgm 5498 %tid = call i32 @llvm.amdgcn.workitem.id.x() 5499 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid 5500 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid 5501 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid 5502 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid 5503 %a = load volatile float, ptr addrspace(1) %gep0 5504 %b = load volatile float, ptr addrspace(1) %gep1 5505 %c = load volatile float, ptr addrspace(1) %gep2 5506 %tmp0 = call float @llvm.maxnum.f32(float %a, float %b) 5507 %tmp1 = call float @llvm.minnum.f32(float %a, float %b) 5508 %tmp2 = call float @llvm.maxnum.f32(float %tmp1, float %c) 5509 %med3 = call float @llvm.minnum.f32(float %tmp0, float %tmp2) 5510 store float %med3, ptr addrspace(1) %outgep 5511 ret void 5512} 5513 5514; --------------------------------------------------------------------- 5515; Negative patterns 5516; --------------------------------------------------------------------- 5517 5518define amdgpu_kernel void @v_test_safe_med3_f32_pat0_multi_use0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 { 5519; SI-SDAG-LABEL: v_test_safe_med3_f32_pat0_multi_use0: 5520; SI-SDAG: ; %bb.0: 5521; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 5522; SI-SDAG-NEXT: s_mov_b32 s10, 0 5523; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000 5524; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 5525; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 5526; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11] 5527; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11] 5528; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 5529; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3] 5530; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5] 5531; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7] 5532; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11] 5533; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 5534; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 5535; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc 5536; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 5537; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc 5538; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 5539; SI-SDAG-NEXT: s_mov_b32 s10, -1 5540; SI-SDAG-NEXT: v_mul_f32_e32 v2, 1.0, v2 5541; SI-SDAG-NEXT: v_mul_f32_e32 v3, 1.0, v3 5542; SI-SDAG-NEXT: v_mul_f32_e32 v4, 1.0, v4 5543; SI-SDAG-NEXT: v_min_f32_e32 v5, v2, v3 5544; SI-SDAG-NEXT: v_max_f32_e32 v2, v2, v3 5545; SI-SDAG-NEXT: buffer_store_dword v5, off, s[8:11], 0 5546; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 5547; SI-SDAG-NEXT: v_min_f32_e32 v2, v2, v4 5548; SI-SDAG-NEXT: v_max_f32_e32 v2, v5, v2 5549; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 5550; SI-SDAG-NEXT: s_endpgm 5551; 5552; SI-GISEL-LABEL: v_test_safe_med3_f32_pat0_multi_use0: 5553; SI-GISEL: ; %bb.0: 5554; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 5555; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 5556; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 5557; SI-GISEL-NEXT: s_mov_b32 s10, 0 5558; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000 5559; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 5560; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3] 5561; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 5562; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 5563; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5] 5564; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc 5565; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 5566; SI-GISEL-NEXT: s_mov_b32 s2, -1 5567; SI-GISEL-NEXT: s_mov_b32 s3, s11 5568; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7] 5569; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc 5570; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 5571; SI-GISEL-NEXT: v_mul_f32_e32 v2, 1.0, v2 5572; SI-GISEL-NEXT: v_mul_f32_e32 v3, 1.0, v3 5573; SI-GISEL-NEXT: v_min_f32_e32 v5, v2, v3 5574; SI-GISEL-NEXT: v_max_f32_e32 v2, v2, v3 5575; SI-GISEL-NEXT: v_mul_f32_e32 v3, 1.0, v4 5576; SI-GISEL-NEXT: buffer_store_dword v5, off, s[0:3], 0 5577; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 5578; SI-GISEL-NEXT: v_min_f32_e32 v2, v2, v3 5579; SI-GISEL-NEXT: v_max_f32_e32 v2, v5, v2 5580; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11] 5581; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 5582; SI-GISEL-NEXT: s_endpgm 5583; 5584; VI-SDAG-LABEL: v_test_safe_med3_f32_pat0_multi_use0: 5585; VI-SDAG: ; %bb.0: 5586; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 5587; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0 5588; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 5589; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 5590; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6 5591; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 5592; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5 5593; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6 5594; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 5595; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7 5596; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6 5597; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 5598; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc 5599; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 5600; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc 5601; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 5602; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc 5603; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 5604; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 5605; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6 5606; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 5607; VI-SDAG-NEXT: v_mul_f32_e32 v4, 1.0, v7 5608; VI-SDAG-NEXT: v_mul_f32_e32 v2, 1.0, v2 5609; VI-SDAG-NEXT: v_mul_f32_e32 v3, 1.0, v3 5610; VI-SDAG-NEXT: v_min_f32_e32 v5, v4, v2 5611; VI-SDAG-NEXT: v_max_f32_e32 v2, v4, v2 5612; VI-SDAG-NEXT: v_min_f32_e32 v2, v2, v3 5613; VI-SDAG-NEXT: v_max_f32_e32 v2, v5, v2 5614; VI-SDAG-NEXT: flat_store_dword v[0:1], v5 5615; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 5616; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 5617; VI-SDAG-NEXT: s_endpgm 5618; 5619; VI-GISEL-LABEL: v_test_safe_med3_f32_pat0_multi_use0: 5620; VI-GISEL: ; %bb.0: 5621; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 5622; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0 5623; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 5624; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 5625; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 5626; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 5627; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 5628; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4 5629; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5 5630; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6 5631; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 5632; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6 5633; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7 5634; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6 5635; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 5636; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc 5637; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 5638; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc 5639; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 5640; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc 5641; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 5642; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 5643; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 5644; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 5645; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 5646; VI-GISEL-NEXT: v_mul_f32_e32 v4, 1.0, v7 5647; VI-GISEL-NEXT: v_mul_f32_e32 v2, 1.0, v2 5648; VI-GISEL-NEXT: v_mul_f32_e32 v3, 1.0, v3 5649; VI-GISEL-NEXT: v_min_f32_e32 v5, v4, v2 5650; VI-GISEL-NEXT: v_max_f32_e32 v2, v4, v2 5651; VI-GISEL-NEXT: v_min_f32_e32 v2, v2, v3 5652; VI-GISEL-NEXT: v_max_f32_e32 v2, v5, v2 5653; VI-GISEL-NEXT: flat_store_dword v[0:1], v5 5654; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 5655; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 5656; VI-GISEL-NEXT: s_endpgm 5657; 5658; GFX9-LABEL: v_test_safe_med3_f32_pat0_multi_use0: 5659; GFX9: ; %bb.0: 5660; GFX9-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 5661; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 5662; GFX9-NEXT: s_waitcnt lgkmcnt(0) 5663; GFX9-NEXT: global_load_dword v1, v0, s[10:11] glc 5664; GFX9-NEXT: s_waitcnt vmcnt(0) 5665; GFX9-NEXT: global_load_dword v2, v0, s[12:13] glc 5666; GFX9-NEXT: s_waitcnt vmcnt(0) 5667; GFX9-NEXT: global_load_dword v3, v0, s[14:15] glc 5668; GFX9-NEXT: s_waitcnt vmcnt(0) 5669; GFX9-NEXT: v_max_f32_e32 v1, v1, v1 5670; GFX9-NEXT: v_max_f32_e32 v2, v2, v2 5671; GFX9-NEXT: v_max_f32_e32 v3, v3, v3 5672; GFX9-NEXT: v_min_f32_e32 v4, v1, v2 5673; GFX9-NEXT: v_max_f32_e32 v1, v1, v2 5674; GFX9-NEXT: global_store_dword v[0:1], v4, off 5675; GFX9-NEXT: s_waitcnt vmcnt(0) 5676; GFX9-NEXT: v_min_f32_e32 v1, v1, v3 5677; GFX9-NEXT: v_max_f32_e32 v1, v4, v1 5678; GFX9-NEXT: global_store_dword v0, v1, s[8:9] 5679; GFX9-NEXT: s_endpgm 5680; 5681; GFX11-LABEL: v_test_safe_med3_f32_pat0_multi_use0: 5682; GFX11: ; %bb.0: 5683; GFX11-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 5684; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 5685; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 5686; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 5687; GFX11-NEXT: s_waitcnt lgkmcnt(0) 5688; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc 5689; GFX11-NEXT: s_waitcnt vmcnt(0) 5690; GFX11-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc 5691; GFX11-NEXT: s_waitcnt vmcnt(0) 5692; GFX11-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc 5693; GFX11-NEXT: s_waitcnt vmcnt(0) 5694; GFX11-NEXT: v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v2, v2, v2 5695; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 5696; GFX11-NEXT: v_min_f32_e32 v4, v1, v2 5697; GFX11-NEXT: v_dual_max_f32 v1, v1, v2 :: v_dual_max_f32 v2, v3, v3 5698; GFX11-NEXT: v_minmax_f32 v1, v1, v2, v4 5699; GFX11-NEXT: global_store_b32 v[0:1], v4, off dlc 5700; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 5701; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 5702; GFX11-NEXT: s_endpgm 5703 %tid = call i32 @llvm.amdgcn.workitem.id.x() 5704 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid 5705 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid 5706 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid 5707 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid 5708 %a = load volatile float, ptr addrspace(1) %gep0 5709 %b = load volatile float, ptr addrspace(1) %gep1 5710 %c = load volatile float, ptr addrspace(1) %gep2 5711 %tmp0 = call float @llvm.minnum.f32(float %a, float %b) 5712 store volatile float %tmp0, ptr addrspace(1) undef 5713 %tmp1 = call float @llvm.maxnum.f32(float %a, float %b) 5714 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c) 5715 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2) 5716 store float %med3, ptr addrspace(1) %outgep 5717 ret void 5718} 5719 5720define amdgpu_kernel void @v_test_safe_med3_f32_pat0_multi_use1(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 { 5721; SI-SDAG-LABEL: v_test_safe_med3_f32_pat0_multi_use1: 5722; SI-SDAG: ; %bb.0: 5723; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 5724; SI-SDAG-NEXT: s_mov_b32 s10, 0 5725; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000 5726; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 5727; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 5728; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11] 5729; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11] 5730; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 5731; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3] 5732; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5] 5733; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7] 5734; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11] 5735; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 5736; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 5737; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc 5738; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 5739; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc 5740; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 5741; SI-SDAG-NEXT: s_mov_b32 s10, -1 5742; SI-SDAG-NEXT: v_mul_f32_e32 v2, 1.0, v2 5743; SI-SDAG-NEXT: v_mul_f32_e32 v3, 1.0, v3 5744; SI-SDAG-NEXT: v_mul_f32_e32 v4, 1.0, v4 5745; SI-SDAG-NEXT: v_min_f32_e32 v5, v2, v3 5746; SI-SDAG-NEXT: v_max_f32_e32 v2, v2, v3 5747; SI-SDAG-NEXT: buffer_store_dword v2, off, s[8:11], 0 5748; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) 5749; SI-SDAG-NEXT: v_min_f32_e32 v2, v2, v4 5750; SI-SDAG-NEXT: v_max_f32_e32 v2, v5, v2 5751; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 5752; SI-SDAG-NEXT: s_endpgm 5753; 5754; SI-GISEL-LABEL: v_test_safe_med3_f32_pat0_multi_use1: 5755; SI-GISEL: ; %bb.0: 5756; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 5757; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 5758; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 5759; SI-GISEL-NEXT: s_mov_b32 s10, 0 5760; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000 5761; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 5762; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3] 5763; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 5764; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 5765; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5] 5766; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc 5767; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 5768; SI-GISEL-NEXT: s_mov_b32 s2, -1 5769; SI-GISEL-NEXT: s_mov_b32 s3, s11 5770; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7] 5771; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc 5772; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 5773; SI-GISEL-NEXT: v_mul_f32_e32 v2, 1.0, v2 5774; SI-GISEL-NEXT: v_mul_f32_e32 v3, 1.0, v3 5775; SI-GISEL-NEXT: v_min_f32_e32 v5, v2, v3 5776; SI-GISEL-NEXT: v_max_f32_e32 v2, v2, v3 5777; SI-GISEL-NEXT: v_mul_f32_e32 v3, 1.0, v4 5778; SI-GISEL-NEXT: buffer_store_dword v2, off, s[0:3], 0 5779; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) 5780; SI-GISEL-NEXT: v_min_f32_e32 v2, v2, v3 5781; SI-GISEL-NEXT: v_max_f32_e32 v2, v5, v2 5782; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11] 5783; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 5784; SI-GISEL-NEXT: s_endpgm 5785; 5786; VI-SDAG-LABEL: v_test_safe_med3_f32_pat0_multi_use1: 5787; VI-SDAG: ; %bb.0: 5788; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 5789; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0 5790; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 5791; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 5792; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6 5793; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 5794; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5 5795; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6 5796; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 5797; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7 5798; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6 5799; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 5800; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc 5801; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 5802; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc 5803; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 5804; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc 5805; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 5806; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 5807; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6 5808; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 5809; VI-SDAG-NEXT: v_mul_f32_e32 v4, 1.0, v7 5810; VI-SDAG-NEXT: v_mul_f32_e32 v2, 1.0, v2 5811; VI-SDAG-NEXT: v_mul_f32_e32 v3, 1.0, v3 5812; VI-SDAG-NEXT: v_min_f32_e32 v5, v4, v2 5813; VI-SDAG-NEXT: v_max_f32_e32 v2, v4, v2 5814; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 5815; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 5816; VI-SDAG-NEXT: v_min_f32_e32 v2, v2, v3 5817; VI-SDAG-NEXT: v_max_f32_e32 v2, v5, v2 5818; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 5819; VI-SDAG-NEXT: s_endpgm 5820; 5821; VI-GISEL-LABEL: v_test_safe_med3_f32_pat0_multi_use1: 5822; VI-GISEL: ; %bb.0: 5823; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 5824; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0 5825; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 5826; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 5827; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 5828; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 5829; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 5830; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4 5831; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5 5832; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6 5833; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 5834; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6 5835; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7 5836; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6 5837; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 5838; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc 5839; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 5840; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc 5841; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 5842; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc 5843; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 5844; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 5845; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 5846; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 5847; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 5848; VI-GISEL-NEXT: v_mul_f32_e32 v4, 1.0, v7 5849; VI-GISEL-NEXT: v_mul_f32_e32 v2, 1.0, v2 5850; VI-GISEL-NEXT: v_mul_f32_e32 v3, 1.0, v3 5851; VI-GISEL-NEXT: v_min_f32_e32 v5, v4, v2 5852; VI-GISEL-NEXT: v_max_f32_e32 v2, v4, v2 5853; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 5854; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 5855; VI-GISEL-NEXT: v_min_f32_e32 v2, v2, v3 5856; VI-GISEL-NEXT: v_max_f32_e32 v2, v5, v2 5857; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 5858; VI-GISEL-NEXT: s_endpgm 5859; 5860; GFX9-LABEL: v_test_safe_med3_f32_pat0_multi_use1: 5861; GFX9: ; %bb.0: 5862; GFX9-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 5863; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 5864; GFX9-NEXT: s_waitcnt lgkmcnt(0) 5865; GFX9-NEXT: global_load_dword v1, v0, s[10:11] glc 5866; GFX9-NEXT: s_waitcnt vmcnt(0) 5867; GFX9-NEXT: global_load_dword v2, v0, s[12:13] glc 5868; GFX9-NEXT: s_waitcnt vmcnt(0) 5869; GFX9-NEXT: global_load_dword v3, v0, s[14:15] glc 5870; GFX9-NEXT: s_waitcnt vmcnt(0) 5871; GFX9-NEXT: v_max_f32_e32 v1, v1, v1 5872; GFX9-NEXT: v_max_f32_e32 v2, v2, v2 5873; GFX9-NEXT: v_max_f32_e32 v3, v3, v3 5874; GFX9-NEXT: v_min_f32_e32 v4, v1, v2 5875; GFX9-NEXT: v_max_f32_e32 v1, v1, v2 5876; GFX9-NEXT: global_store_dword v[0:1], v1, off 5877; GFX9-NEXT: s_waitcnt vmcnt(0) 5878; GFX9-NEXT: v_min_f32_e32 v1, v1, v3 5879; GFX9-NEXT: v_max_f32_e32 v1, v4, v1 5880; GFX9-NEXT: global_store_dword v0, v1, s[8:9] 5881; GFX9-NEXT: s_endpgm 5882; 5883; GFX11-SDAG-LABEL: v_test_safe_med3_f32_pat0_multi_use1: 5884; GFX11-SDAG: ; %bb.0: 5885; GFX11-SDAG-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 5886; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 5887; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 5888; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 5889; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 5890; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc 5891; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 5892; GFX11-SDAG-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc 5893; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 5894; GFX11-SDAG-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc 5895; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 5896; GFX11-SDAG-NEXT: v_max_f32_e32 v1, v1, v1 5897; GFX11-SDAG-NEXT: v_dual_max_f32 v3, v3, v3 :: v_dual_max_f32 v2, v2, v2 5898; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 5899; GFX11-SDAG-NEXT: v_max_f32_e32 v4, v1, v2 5900; GFX11-SDAG-NEXT: v_min_f32_e32 v3, v4, v3 5901; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 5902; GFX11-SDAG-NEXT: v_minmax_f32 v1, v1, v2, v3 5903; GFX11-SDAG-NEXT: global_store_b32 v[0:1], v4, off dlc 5904; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 5905; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1] 5906; GFX11-SDAG-NEXT: s_endpgm 5907; 5908; GFX11-GISEL-LABEL: v_test_safe_med3_f32_pat0_multi_use1: 5909; GFX11-GISEL: ; %bb.0: 5910; GFX11-GISEL-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 5911; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 5912; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 5913; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 5914; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 5915; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc 5916; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 5917; GFX11-GISEL-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc 5918; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 5919; GFX11-GISEL-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc 5920; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 5921; GFX11-GISEL-NEXT: v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v2, v2, v2 5922; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 5923; GFX11-GISEL-NEXT: v_min_f32_e32 v4, v1, v2 5924; GFX11-GISEL-NEXT: v_dual_max_f32 v1, v1, v2 :: v_dual_max_f32 v2, v3, v3 5925; GFX11-GISEL-NEXT: v_minmax_f32 v2, v1, v2, v4 5926; GFX11-GISEL-NEXT: global_store_b32 v[0:1], v1, off dlc 5927; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 5928; GFX11-GISEL-NEXT: global_store_b32 v0, v2, s[0:1] 5929; GFX11-GISEL-NEXT: s_endpgm 5930 %tid = call i32 @llvm.amdgcn.workitem.id.x() 5931 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid 5932 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid 5933 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid 5934 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid 5935 %a = load volatile float, ptr addrspace(1) %gep0 5936 %b = load volatile float, ptr addrspace(1) %gep1 5937 %c = load volatile float, ptr addrspace(1) %gep2 5938 %tmp0 = call float @llvm.minnum.f32(float %a, float %b) 5939 %tmp1 = call float @llvm.maxnum.f32(float %a, float %b) 5940 store volatile float %tmp1, ptr addrspace(1) undef 5941 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c) 5942 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2) 5943 store float %med3, ptr addrspace(1) %outgep 5944 ret void 5945} 5946 5947define amdgpu_kernel void @v_test_safe_med3_f32_pat0_multi_use2(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 { 5948; SI-SDAG-LABEL: v_test_safe_med3_f32_pat0_multi_use2: 5949; SI-SDAG: ; %bb.0: 5950; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 5951; SI-SDAG-NEXT: s_mov_b32 s10, 0 5952; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000 5953; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 5954; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 5955; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11] 5956; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11] 5957; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 5958; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3] 5959; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5] 5960; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7] 5961; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11] 5962; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 5963; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 5964; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc 5965; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 5966; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc 5967; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 5968; SI-SDAG-NEXT: s_mov_b32 s10, -1 5969; SI-SDAG-NEXT: v_mul_f32_e32 v2, 1.0, v2 5970; SI-SDAG-NEXT: v_mul_f32_e32 v3, 1.0, v3 5971; SI-SDAG-NEXT: v_mul_f32_e32 v4, 1.0, v4 5972; SI-SDAG-NEXT: v_min_f32_e32 v5, v2, v3 5973; SI-SDAG-NEXT: v_max_f32_e32 v2, v2, v3 5974; SI-SDAG-NEXT: v_min_f32_e32 v2, v2, v4 5975; SI-SDAG-NEXT: buffer_store_dword v2, off, s[8:11], 0 5976; SI-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) 5977; SI-SDAG-NEXT: v_max_f32_e32 v2, v5, v2 5978; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 5979; SI-SDAG-NEXT: s_endpgm 5980; 5981; SI-GISEL-LABEL: v_test_safe_med3_f32_pat0_multi_use2: 5982; SI-GISEL: ; %bb.0: 5983; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 5984; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 5985; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 5986; SI-GISEL-NEXT: s_mov_b32 s10, 0 5987; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000 5988; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 5989; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3] 5990; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 5991; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 5992; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5] 5993; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc 5994; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 5995; SI-GISEL-NEXT: s_mov_b32 s2, -1 5996; SI-GISEL-NEXT: s_mov_b32 s3, s11 5997; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7] 5998; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc 5999; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 6000; SI-GISEL-NEXT: v_mul_f32_e32 v2, 1.0, v2 6001; SI-GISEL-NEXT: v_mul_f32_e32 v3, 1.0, v3 6002; SI-GISEL-NEXT: v_min_f32_e32 v5, v2, v3 6003; SI-GISEL-NEXT: v_max_f32_e32 v2, v2, v3 6004; SI-GISEL-NEXT: v_mul_f32_e32 v3, 1.0, v4 6005; SI-GISEL-NEXT: v_min_f32_e32 v2, v2, v3 6006; SI-GISEL-NEXT: buffer_store_dword v2, off, s[0:3], 0 6007; SI-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) 6008; SI-GISEL-NEXT: v_max_f32_e32 v2, v5, v2 6009; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11] 6010; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 6011; SI-GISEL-NEXT: s_endpgm 6012; 6013; VI-SDAG-LABEL: v_test_safe_med3_f32_pat0_multi_use2: 6014; VI-SDAG: ; %bb.0: 6015; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 6016; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0 6017; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 6018; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 6019; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6 6020; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 6021; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5 6022; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6 6023; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 6024; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7 6025; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6 6026; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 6027; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc 6028; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 6029; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc 6030; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 6031; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc 6032; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 6033; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 6034; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6 6035; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 6036; VI-SDAG-NEXT: v_mul_f32_e32 v4, 1.0, v7 6037; VI-SDAG-NEXT: v_mul_f32_e32 v2, 1.0, v2 6038; VI-SDAG-NEXT: v_mul_f32_e32 v3, 1.0, v3 6039; VI-SDAG-NEXT: v_min_f32_e32 v5, v4, v2 6040; VI-SDAG-NEXT: v_max_f32_e32 v2, v4, v2 6041; VI-SDAG-NEXT: v_min_f32_e32 v2, v2, v3 6042; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 6043; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 6044; VI-SDAG-NEXT: v_max_f32_e32 v2, v5, v2 6045; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 6046; VI-SDAG-NEXT: s_endpgm 6047; 6048; VI-GISEL-LABEL: v_test_safe_med3_f32_pat0_multi_use2: 6049; VI-GISEL: ; %bb.0: 6050; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 6051; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0 6052; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 6053; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 6054; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 6055; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 6056; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 6057; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4 6058; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5 6059; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6 6060; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 6061; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6 6062; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7 6063; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6 6064; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 6065; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc 6066; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 6067; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc 6068; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 6069; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc 6070; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 6071; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 6072; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 6073; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 6074; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 6075; VI-GISEL-NEXT: v_mul_f32_e32 v4, 1.0, v7 6076; VI-GISEL-NEXT: v_mul_f32_e32 v2, 1.0, v2 6077; VI-GISEL-NEXT: v_mul_f32_e32 v3, 1.0, v3 6078; VI-GISEL-NEXT: v_min_f32_e32 v5, v4, v2 6079; VI-GISEL-NEXT: v_max_f32_e32 v2, v4, v2 6080; VI-GISEL-NEXT: v_min_f32_e32 v2, v2, v3 6081; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 6082; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 6083; VI-GISEL-NEXT: v_max_f32_e32 v2, v5, v2 6084; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 6085; VI-GISEL-NEXT: s_endpgm 6086; 6087; GFX9-LABEL: v_test_safe_med3_f32_pat0_multi_use2: 6088; GFX9: ; %bb.0: 6089; GFX9-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 6090; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 6091; GFX9-NEXT: s_waitcnt lgkmcnt(0) 6092; GFX9-NEXT: global_load_dword v1, v0, s[10:11] glc 6093; GFX9-NEXT: s_waitcnt vmcnt(0) 6094; GFX9-NEXT: global_load_dword v2, v0, s[12:13] glc 6095; GFX9-NEXT: s_waitcnt vmcnt(0) 6096; GFX9-NEXT: global_load_dword v3, v0, s[14:15] glc 6097; GFX9-NEXT: s_waitcnt vmcnt(0) 6098; GFX9-NEXT: v_max_f32_e32 v1, v1, v1 6099; GFX9-NEXT: v_max_f32_e32 v2, v2, v2 6100; GFX9-NEXT: v_max_f32_e32 v3, v3, v3 6101; GFX9-NEXT: v_min_f32_e32 v4, v1, v2 6102; GFX9-NEXT: v_max_f32_e32 v1, v1, v2 6103; GFX9-NEXT: v_min_f32_e32 v1, v1, v3 6104; GFX9-NEXT: global_store_dword v[0:1], v1, off 6105; GFX9-NEXT: s_waitcnt vmcnt(0) 6106; GFX9-NEXT: v_max_f32_e32 v1, v4, v1 6107; GFX9-NEXT: global_store_dword v0, v1, s[8:9] 6108; GFX9-NEXT: s_endpgm 6109; 6110; GFX11-LABEL: v_test_safe_med3_f32_pat0_multi_use2: 6111; GFX11: ; %bb.0: 6112; GFX11-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 6113; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 6114; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 6115; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 6116; GFX11-NEXT: s_waitcnt lgkmcnt(0) 6117; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc 6118; GFX11-NEXT: s_waitcnt vmcnt(0) 6119; GFX11-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc 6120; GFX11-NEXT: s_waitcnt vmcnt(0) 6121; GFX11-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc 6122; GFX11-NEXT: s_waitcnt vmcnt(0) 6123; GFX11-NEXT: v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v2, v2, v2 6124; GFX11-NEXT: v_max_f32_e32 v3, v3, v3 6125; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 6126; GFX11-NEXT: v_maxmin_f32 v3, v1, v2, v3 6127; GFX11-NEXT: v_minmax_f32 v1, v1, v2, v3 6128; GFX11-NEXT: global_store_b32 v[0:1], v3, off dlc 6129; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 6130; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 6131; GFX11-NEXT: s_endpgm 6132 %tid = call i32 @llvm.amdgcn.workitem.id.x() 6133 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid 6134 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid 6135 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid 6136 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid 6137 %a = load volatile float, ptr addrspace(1) %gep0 6138 %b = load volatile float, ptr addrspace(1) %gep1 6139 %c = load volatile float, ptr addrspace(1) %gep2 6140 %tmp0 = call float @llvm.minnum.f32(float %a, float %b) 6141 %tmp1 = call float @llvm.maxnum.f32(float %a, float %b) 6142 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c) 6143 store volatile float %tmp2, ptr addrspace(1) undef 6144 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2) 6145 store float %med3, ptr addrspace(1) %outgep 6146 ret void 6147} 6148 6149define amdgpu_kernel void @v_test_safe_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 { 6150; SI-SDAG-LABEL: v_test_safe_med3_f32_pat0: 6151; SI-SDAG: ; %bb.0: 6152; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 6153; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000 6154; SI-SDAG-NEXT: s_mov_b32 s10, 0 6155; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 6156; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 6157; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11] 6158; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 6159; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3] 6160; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5] 6161; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 6162; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 6163; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc 6164; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 6165; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[6:7] 6166; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11] 6167; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[12:15], 0 addr64 glc 6168; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 6169; SI-SDAG-NEXT: v_mul_f32_e32 v2, 1.0, v2 6170; SI-SDAG-NEXT: v_mul_f32_e32 v3, 1.0, v3 6171; SI-SDAG-NEXT: v_min_f32_e32 v5, v2, v3 6172; SI-SDAG-NEXT: v_max_f32_e32 v2, v2, v3 6173; SI-SDAG-NEXT: v_mul_f32_e32 v3, 1.0, v4 6174; SI-SDAG-NEXT: v_min_f32_e32 v2, v2, v3 6175; SI-SDAG-NEXT: v_max_f32_e32 v2, v5, v2 6176; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 6177; SI-SDAG-NEXT: s_endpgm 6178; 6179; SI-GISEL-LABEL: v_test_safe_med3_f32_pat0: 6180; SI-GISEL: ; %bb.0: 6181; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 6182; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 6183; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 6184; SI-GISEL-NEXT: s_mov_b32 s10, 0 6185; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000 6186; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 6187; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3] 6188; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 6189; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 6190; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5] 6191; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc 6192; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 6193; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7] 6194; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc 6195; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 6196; SI-GISEL-NEXT: v_mul_f32_e32 v2, 1.0, v2 6197; SI-GISEL-NEXT: v_mul_f32_e32 v3, 1.0, v3 6198; SI-GISEL-NEXT: v_min_f32_e32 v5, v2, v3 6199; SI-GISEL-NEXT: v_max_f32_e32 v2, v2, v3 6200; SI-GISEL-NEXT: v_mul_f32_e32 v3, 1.0, v4 6201; SI-GISEL-NEXT: v_min_f32_e32 v2, v2, v3 6202; SI-GISEL-NEXT: v_max_f32_e32 v2, v5, v2 6203; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11] 6204; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 6205; SI-GISEL-NEXT: s_endpgm 6206; 6207; VI-SDAG-LABEL: v_test_safe_med3_f32_pat0: 6208; VI-SDAG: ; %bb.0: 6209; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 6210; VI-SDAG-NEXT: v_lshlrev_b32_e32 v4, 2, v0 6211; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 6212; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 6213; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v4 6214; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 6215; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5 6216; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v4 6217; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 6218; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7 6219; VI-SDAG-NEXT: flat_load_dword v6, v[0:1] glc 6220; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 6221; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc 6222; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 6223; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s6, v4 6224; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v5, vcc 6225; VI-SDAG-NEXT: flat_load_dword v3, v[0:1] glc 6226; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 6227; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v4 6228; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 6229; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 6230; VI-SDAG-NEXT: v_mul_f32_e32 v4, 1.0, v6 6231; VI-SDAG-NEXT: v_mul_f32_e32 v2, 1.0, v2 6232; VI-SDAG-NEXT: v_min_f32_e32 v5, v4, v2 6233; VI-SDAG-NEXT: v_max_f32_e32 v2, v4, v2 6234; VI-SDAG-NEXT: v_mul_f32_e32 v3, 1.0, v3 6235; VI-SDAG-NEXT: v_min_f32_e32 v2, v2, v3 6236; VI-SDAG-NEXT: v_max_f32_e32 v2, v5, v2 6237; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 6238; VI-SDAG-NEXT: s_endpgm 6239; 6240; VI-GISEL-LABEL: v_test_safe_med3_f32_pat0: 6241; VI-GISEL: ; %bb.0: 6242; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 6243; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0 6244; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 6245; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 6246; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 6247; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 6248; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 6249; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4 6250; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5 6251; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6 6252; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 6253; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6 6254; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7 6255; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc 6256; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 6257; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc 6258; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 6259; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v4, v6 6260; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v5, vcc 6261; VI-GISEL-NEXT: flat_load_dword v3, v[0:1] glc 6262; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 6263; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 6264; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 6265; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 6266; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 6267; VI-GISEL-NEXT: v_mul_f32_e32 v4, 1.0, v7 6268; VI-GISEL-NEXT: v_mul_f32_e32 v2, 1.0, v2 6269; VI-GISEL-NEXT: v_min_f32_e32 v5, v4, v2 6270; VI-GISEL-NEXT: v_max_f32_e32 v2, v4, v2 6271; VI-GISEL-NEXT: v_mul_f32_e32 v3, 1.0, v3 6272; VI-GISEL-NEXT: v_min_f32_e32 v2, v2, v3 6273; VI-GISEL-NEXT: v_max_f32_e32 v2, v5, v2 6274; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 6275; VI-GISEL-NEXT: s_endpgm 6276; 6277; GFX9-LABEL: v_test_safe_med3_f32_pat0: 6278; GFX9: ; %bb.0: 6279; GFX9-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 6280; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 6281; GFX9-NEXT: s_waitcnt lgkmcnt(0) 6282; GFX9-NEXT: global_load_dword v1, v0, s[10:11] glc 6283; GFX9-NEXT: s_waitcnt vmcnt(0) 6284; GFX9-NEXT: global_load_dword v2, v0, s[12:13] glc 6285; GFX9-NEXT: s_waitcnt vmcnt(0) 6286; GFX9-NEXT: global_load_dword v3, v0, s[14:15] glc 6287; GFX9-NEXT: s_waitcnt vmcnt(0) 6288; GFX9-NEXT: v_max_f32_e32 v1, v1, v1 6289; GFX9-NEXT: v_max_f32_e32 v2, v2, v2 6290; GFX9-NEXT: v_min_f32_e32 v4, v1, v2 6291; GFX9-NEXT: v_max_f32_e32 v1, v1, v2 6292; GFX9-NEXT: v_max_f32_e32 v2, v3, v3 6293; GFX9-NEXT: v_min_f32_e32 v1, v1, v2 6294; GFX9-NEXT: v_max_f32_e32 v1, v4, v1 6295; GFX9-NEXT: global_store_dword v0, v1, s[8:9] 6296; GFX9-NEXT: s_endpgm 6297; 6298; GFX11-SDAG-LABEL: v_test_safe_med3_f32_pat0: 6299; GFX11-SDAG: ; %bb.0: 6300; GFX11-SDAG-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 6301; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 6302; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 6303; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 6304; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 6305; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc 6306; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 6307; GFX11-SDAG-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc 6308; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 6309; GFX11-SDAG-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc 6310; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 6311; GFX11-SDAG-NEXT: v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v2, v2, v2 6312; GFX11-SDAG-NEXT: v_max_f32_e32 v3, v3, v3 6313; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 6314; GFX11-SDAG-NEXT: v_maxmin_f32 v3, v1, v2, v3 6315; GFX11-SDAG-NEXT: v_minmax_f32 v1, v1, v2, v3 6316; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1] 6317; GFX11-SDAG-NEXT: s_endpgm 6318; 6319; GFX11-GISEL-LABEL: v_test_safe_med3_f32_pat0: 6320; GFX11-GISEL: ; %bb.0: 6321; GFX11-GISEL-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 6322; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 6323; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 6324; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 6325; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 6326; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc 6327; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 6328; GFX11-GISEL-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc 6329; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 6330; GFX11-GISEL-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc 6331; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 6332; GFX11-GISEL-NEXT: v_dual_max_f32 v1, v1, v1 :: v_dual_max_f32 v2, v2, v2 6333; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 6334; GFX11-GISEL-NEXT: v_min_f32_e32 v4, v1, v2 6335; GFX11-GISEL-NEXT: v_dual_max_f32 v1, v1, v2 :: v_dual_max_f32 v2, v3, v3 6336; GFX11-GISEL-NEXT: v_minmax_f32 v1, v1, v2, v4 6337; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1] 6338; GFX11-GISEL-NEXT: s_endpgm 6339 %tid = call i32 @llvm.amdgcn.workitem.id.x() 6340 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid 6341 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid 6342 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid 6343 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid 6344 %a = load volatile float, ptr addrspace(1) %gep0 6345 %b = load volatile float, ptr addrspace(1) %gep1 6346 %c = load volatile float, ptr addrspace(1) %gep2 6347 %tmp0 = call float @llvm.minnum.f32(float %a, float %b) 6348 %tmp1 = call float @llvm.maxnum.f32(float %a, float %b) 6349 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c) 6350 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2) 6351 store float %med3, ptr addrspace(1) %outgep 6352 ret void 6353} 6354 6355define amdgpu_kernel void @v_nnan_inputs_missing0_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 { 6356; SI-SDAG-LABEL: v_nnan_inputs_missing0_med3_f32_pat0: 6357; SI-SDAG: ; %bb.0: 6358; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 6359; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000 6360; SI-SDAG-NEXT: s_mov_b32 s10, 0 6361; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 6362; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 6363; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11] 6364; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11] 6365; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 6366; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3] 6367; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5] 6368; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7] 6369; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 6370; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 6371; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc 6372; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 6373; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc 6374; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 6375; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11] 6376; SI-SDAG-NEXT: v_add_f32_e32 v2, 1.0, v2 6377; SI-SDAG-NEXT: v_add_f32_e32 v3, 2.0, v3 6378; SI-SDAG-NEXT: v_add_f32_e32 v4, 4.0, v4 6379; SI-SDAG-NEXT: v_med3_f32 v2, v2, v3, v4 6380; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 6381; SI-SDAG-NEXT: s_endpgm 6382; 6383; SI-GISEL-LABEL: v_nnan_inputs_missing0_med3_f32_pat0: 6384; SI-GISEL: ; %bb.0: 6385; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 6386; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 6387; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 6388; SI-GISEL-NEXT: s_mov_b32 s10, 0 6389; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000 6390; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 6391; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3] 6392; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 6393; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 6394; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5] 6395; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc 6396; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 6397; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7] 6398; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc 6399; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 6400; SI-GISEL-NEXT: v_add_f32_e32 v2, 1.0, v2 6401; SI-GISEL-NEXT: v_add_f32_e32 v3, 2.0, v3 6402; SI-GISEL-NEXT: v_add_f32_e32 v4, 4.0, v4 6403; SI-GISEL-NEXT: v_med3_f32 v2, v2, v3, v4 6404; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11] 6405; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 6406; SI-GISEL-NEXT: s_endpgm 6407; 6408; VI-SDAG-LABEL: v_nnan_inputs_missing0_med3_f32_pat0: 6409; VI-SDAG: ; %bb.0: 6410; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 6411; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0 6412; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 6413; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 6414; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6 6415; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 6416; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5 6417; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6 6418; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 6419; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7 6420; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6 6421; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 6422; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc 6423; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 6424; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc 6425; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 6426; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc 6427; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 6428; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 6429; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6 6430; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 6431; VI-SDAG-NEXT: v_add_f32_e32 v4, 1.0, v7 6432; VI-SDAG-NEXT: v_add_f32_e32 v2, 2.0, v2 6433; VI-SDAG-NEXT: v_add_f32_e32 v3, 4.0, v3 6434; VI-SDAG-NEXT: v_med3_f32 v2, v4, v2, v3 6435; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 6436; VI-SDAG-NEXT: s_endpgm 6437; 6438; VI-GISEL-LABEL: v_nnan_inputs_missing0_med3_f32_pat0: 6439; VI-GISEL: ; %bb.0: 6440; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 6441; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0 6442; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 6443; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 6444; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 6445; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 6446; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 6447; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4 6448; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5 6449; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6 6450; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 6451; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6 6452; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7 6453; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6 6454; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 6455; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc 6456; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 6457; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc 6458; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 6459; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc 6460; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 6461; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 6462; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 6463; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 6464; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 6465; VI-GISEL-NEXT: v_add_f32_e32 v4, 1.0, v7 6466; VI-GISEL-NEXT: v_add_f32_e32 v2, 2.0, v2 6467; VI-GISEL-NEXT: v_add_f32_e32 v3, 4.0, v3 6468; VI-GISEL-NEXT: v_med3_f32 v2, v4, v2, v3 6469; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 6470; VI-GISEL-NEXT: s_endpgm 6471; 6472; GFX9-LABEL: v_nnan_inputs_missing0_med3_f32_pat0: 6473; GFX9: ; %bb.0: 6474; GFX9-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 6475; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 6476; GFX9-NEXT: s_waitcnt lgkmcnt(0) 6477; GFX9-NEXT: global_load_dword v1, v0, s[10:11] glc 6478; GFX9-NEXT: s_waitcnt vmcnt(0) 6479; GFX9-NEXT: global_load_dword v2, v0, s[12:13] glc 6480; GFX9-NEXT: s_waitcnt vmcnt(0) 6481; GFX9-NEXT: global_load_dword v3, v0, s[14:15] glc 6482; GFX9-NEXT: s_waitcnt vmcnt(0) 6483; GFX9-NEXT: v_add_f32_e32 v1, 1.0, v1 6484; GFX9-NEXT: v_add_f32_e32 v2, 2.0, v2 6485; GFX9-NEXT: v_add_f32_e32 v3, 4.0, v3 6486; GFX9-NEXT: v_med3_f32 v1, v1, v2, v3 6487; GFX9-NEXT: global_store_dword v0, v1, s[8:9] 6488; GFX9-NEXT: s_endpgm 6489; 6490; GFX11-LABEL: v_nnan_inputs_missing0_med3_f32_pat0: 6491; GFX11: ; %bb.0: 6492; GFX11-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 6493; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 6494; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 6495; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 6496; GFX11-NEXT: s_waitcnt lgkmcnt(0) 6497; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc 6498; GFX11-NEXT: s_waitcnt vmcnt(0) 6499; GFX11-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc 6500; GFX11-NEXT: s_waitcnt vmcnt(0) 6501; GFX11-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc 6502; GFX11-NEXT: s_waitcnt vmcnt(0) 6503; GFX11-NEXT: v_dual_add_f32 v1, 1.0, v1 :: v_dual_add_f32 v2, 2.0, v2 6504; GFX11-NEXT: v_add_f32_e32 v3, 4.0, v3 6505; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 6506; GFX11-NEXT: v_med3_f32 v1, v1, v2, v3 6507; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 6508; GFX11-NEXT: s_endpgm 6509 %tid = call i32 @llvm.amdgcn.workitem.id.x() 6510 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid 6511 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid 6512 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid 6513 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid 6514 %a = load volatile float, ptr addrspace(1) %gep0 6515 %b = load volatile float, ptr addrspace(1) %gep1 6516 %c = load volatile float, ptr addrspace(1) %gep2 6517 6518 %a.nnan = fadd float %a, 1.0 6519 %b.nnan = fadd nnan float %b, 2.0 6520 %c.nnan = fadd nnan float %c, 4.0 6521 6522 %tmp0 = call float @llvm.minnum.f32(float %a.nnan, float %b.nnan) 6523 %tmp1 = call float @llvm.maxnum.f32(float %a.nnan, float %b.nnan) 6524 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.nnan) 6525 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2) 6526 store float %med3, ptr addrspace(1) %outgep 6527 ret void 6528} 6529 6530define amdgpu_kernel void @v_nnan_inputs_missing1_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 { 6531; SI-SDAG-LABEL: v_nnan_inputs_missing1_med3_f32_pat0: 6532; SI-SDAG: ; %bb.0: 6533; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 6534; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000 6535; SI-SDAG-NEXT: s_mov_b32 s10, 0 6536; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 6537; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 6538; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11] 6539; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11] 6540; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 6541; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3] 6542; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5] 6543; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7] 6544; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 6545; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 6546; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc 6547; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 6548; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc 6549; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 6550; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11] 6551; SI-SDAG-NEXT: v_add_f32_e32 v2, 1.0, v2 6552; SI-SDAG-NEXT: v_add_f32_e32 v3, 2.0, v3 6553; SI-SDAG-NEXT: v_add_f32_e32 v4, 4.0, v4 6554; SI-SDAG-NEXT: v_med3_f32 v2, v2, v3, v4 6555; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 6556; SI-SDAG-NEXT: s_endpgm 6557; 6558; SI-GISEL-LABEL: v_nnan_inputs_missing1_med3_f32_pat0: 6559; SI-GISEL: ; %bb.0: 6560; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 6561; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 6562; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 6563; SI-GISEL-NEXT: s_mov_b32 s10, 0 6564; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000 6565; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 6566; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3] 6567; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 6568; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 6569; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5] 6570; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc 6571; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 6572; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7] 6573; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc 6574; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 6575; SI-GISEL-NEXT: v_add_f32_e32 v2, 1.0, v2 6576; SI-GISEL-NEXT: v_add_f32_e32 v3, 2.0, v3 6577; SI-GISEL-NEXT: v_add_f32_e32 v4, 4.0, v4 6578; SI-GISEL-NEXT: v_med3_f32 v2, v2, v3, v4 6579; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11] 6580; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 6581; SI-GISEL-NEXT: s_endpgm 6582; 6583; VI-SDAG-LABEL: v_nnan_inputs_missing1_med3_f32_pat0: 6584; VI-SDAG: ; %bb.0: 6585; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 6586; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0 6587; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 6588; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 6589; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6 6590; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 6591; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5 6592; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6 6593; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 6594; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7 6595; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6 6596; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 6597; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc 6598; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 6599; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc 6600; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 6601; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc 6602; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 6603; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 6604; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6 6605; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 6606; VI-SDAG-NEXT: v_add_f32_e32 v4, 1.0, v7 6607; VI-SDAG-NEXT: v_add_f32_e32 v2, 2.0, v2 6608; VI-SDAG-NEXT: v_add_f32_e32 v3, 4.0, v3 6609; VI-SDAG-NEXT: v_med3_f32 v2, v4, v2, v3 6610; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 6611; VI-SDAG-NEXT: s_endpgm 6612; 6613; VI-GISEL-LABEL: v_nnan_inputs_missing1_med3_f32_pat0: 6614; VI-GISEL: ; %bb.0: 6615; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 6616; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0 6617; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 6618; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 6619; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 6620; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 6621; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 6622; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4 6623; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5 6624; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6 6625; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 6626; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6 6627; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7 6628; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6 6629; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 6630; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc 6631; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 6632; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc 6633; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 6634; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc 6635; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 6636; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 6637; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 6638; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 6639; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 6640; VI-GISEL-NEXT: v_add_f32_e32 v4, 1.0, v7 6641; VI-GISEL-NEXT: v_add_f32_e32 v2, 2.0, v2 6642; VI-GISEL-NEXT: v_add_f32_e32 v3, 4.0, v3 6643; VI-GISEL-NEXT: v_med3_f32 v2, v4, v2, v3 6644; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 6645; VI-GISEL-NEXT: s_endpgm 6646; 6647; GFX9-LABEL: v_nnan_inputs_missing1_med3_f32_pat0: 6648; GFX9: ; %bb.0: 6649; GFX9-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 6650; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 6651; GFX9-NEXT: s_waitcnt lgkmcnt(0) 6652; GFX9-NEXT: global_load_dword v1, v0, s[10:11] glc 6653; GFX9-NEXT: s_waitcnt vmcnt(0) 6654; GFX9-NEXT: global_load_dword v2, v0, s[12:13] glc 6655; GFX9-NEXT: s_waitcnt vmcnt(0) 6656; GFX9-NEXT: global_load_dword v3, v0, s[14:15] glc 6657; GFX9-NEXT: s_waitcnt vmcnt(0) 6658; GFX9-NEXT: v_add_f32_e32 v1, 1.0, v1 6659; GFX9-NEXT: v_add_f32_e32 v2, 2.0, v2 6660; GFX9-NEXT: v_add_f32_e32 v3, 4.0, v3 6661; GFX9-NEXT: v_med3_f32 v1, v1, v2, v3 6662; GFX9-NEXT: global_store_dword v0, v1, s[8:9] 6663; GFX9-NEXT: s_endpgm 6664; 6665; GFX11-LABEL: v_nnan_inputs_missing1_med3_f32_pat0: 6666; GFX11: ; %bb.0: 6667; GFX11-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 6668; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 6669; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 6670; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 6671; GFX11-NEXT: s_waitcnt lgkmcnt(0) 6672; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc 6673; GFX11-NEXT: s_waitcnt vmcnt(0) 6674; GFX11-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc 6675; GFX11-NEXT: s_waitcnt vmcnt(0) 6676; GFX11-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc 6677; GFX11-NEXT: s_waitcnt vmcnt(0) 6678; GFX11-NEXT: v_dual_add_f32 v1, 1.0, v1 :: v_dual_add_f32 v2, 2.0, v2 6679; GFX11-NEXT: v_add_f32_e32 v3, 4.0, v3 6680; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 6681; GFX11-NEXT: v_med3_f32 v1, v1, v2, v3 6682; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 6683; GFX11-NEXT: s_endpgm 6684 %tid = call i32 @llvm.amdgcn.workitem.id.x() 6685 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid 6686 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid 6687 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid 6688 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid 6689 %a = load volatile float, ptr addrspace(1) %gep0 6690 %b = load volatile float, ptr addrspace(1) %gep1 6691 %c = load volatile float, ptr addrspace(1) %gep2 6692 6693 %a.nnan = fadd nnan float %a, 1.0 6694 %b.nnan = fadd float %b, 2.0 6695 %c.nnan = fadd nnan float %c, 4.0 6696 6697 %tmp0 = call float @llvm.minnum.f32(float %a.nnan, float %b.nnan) 6698 %tmp1 = call float @llvm.maxnum.f32(float %a.nnan, float %b.nnan) 6699 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.nnan) 6700 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2) 6701 store float %med3, ptr addrspace(1) %outgep 6702 ret void 6703} 6704 6705define amdgpu_kernel void @v_nnan_inputs_missing2_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 { 6706; SI-SDAG-LABEL: v_nnan_inputs_missing2_med3_f32_pat0: 6707; SI-SDAG: ; %bb.0: 6708; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 6709; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000 6710; SI-SDAG-NEXT: s_mov_b32 s10, 0 6711; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 6712; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 6713; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11] 6714; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11] 6715; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 6716; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3] 6717; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5] 6718; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7] 6719; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 6720; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 6721; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc 6722; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 6723; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc 6724; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 6725; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11] 6726; SI-SDAG-NEXT: v_add_f32_e32 v2, 1.0, v2 6727; SI-SDAG-NEXT: v_add_f32_e32 v3, 2.0, v3 6728; SI-SDAG-NEXT: v_add_f32_e32 v4, 4.0, v4 6729; SI-SDAG-NEXT: v_med3_f32 v2, v2, v3, v4 6730; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 6731; SI-SDAG-NEXT: s_endpgm 6732; 6733; SI-GISEL-LABEL: v_nnan_inputs_missing2_med3_f32_pat0: 6734; SI-GISEL: ; %bb.0: 6735; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 6736; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 6737; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 6738; SI-GISEL-NEXT: s_mov_b32 s10, 0 6739; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000 6740; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 6741; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3] 6742; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 6743; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 6744; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5] 6745; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc 6746; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 6747; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7] 6748; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc 6749; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 6750; SI-GISEL-NEXT: v_add_f32_e32 v2, 1.0, v2 6751; SI-GISEL-NEXT: v_add_f32_e32 v3, 2.0, v3 6752; SI-GISEL-NEXT: v_add_f32_e32 v4, 4.0, v4 6753; SI-GISEL-NEXT: v_med3_f32 v2, v2, v3, v4 6754; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11] 6755; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 6756; SI-GISEL-NEXT: s_endpgm 6757; 6758; VI-SDAG-LABEL: v_nnan_inputs_missing2_med3_f32_pat0: 6759; VI-SDAG: ; %bb.0: 6760; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 6761; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0 6762; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 6763; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 6764; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6 6765; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 6766; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5 6767; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6 6768; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 6769; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7 6770; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6 6771; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 6772; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc 6773; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 6774; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc 6775; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 6776; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc 6777; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 6778; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 6779; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6 6780; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 6781; VI-SDAG-NEXT: v_add_f32_e32 v4, 1.0, v7 6782; VI-SDAG-NEXT: v_add_f32_e32 v2, 2.0, v2 6783; VI-SDAG-NEXT: v_add_f32_e32 v3, 4.0, v3 6784; VI-SDAG-NEXT: v_med3_f32 v2, v4, v2, v3 6785; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 6786; VI-SDAG-NEXT: s_endpgm 6787; 6788; VI-GISEL-LABEL: v_nnan_inputs_missing2_med3_f32_pat0: 6789; VI-GISEL: ; %bb.0: 6790; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 6791; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0 6792; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 6793; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 6794; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 6795; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 6796; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 6797; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4 6798; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5 6799; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6 6800; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 6801; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6 6802; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7 6803; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6 6804; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 6805; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc 6806; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 6807; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc 6808; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 6809; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc 6810; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 6811; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 6812; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 6813; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 6814; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 6815; VI-GISEL-NEXT: v_add_f32_e32 v4, 1.0, v7 6816; VI-GISEL-NEXT: v_add_f32_e32 v2, 2.0, v2 6817; VI-GISEL-NEXT: v_add_f32_e32 v3, 4.0, v3 6818; VI-GISEL-NEXT: v_med3_f32 v2, v4, v2, v3 6819; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 6820; VI-GISEL-NEXT: s_endpgm 6821; 6822; GFX9-LABEL: v_nnan_inputs_missing2_med3_f32_pat0: 6823; GFX9: ; %bb.0: 6824; GFX9-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 6825; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 6826; GFX9-NEXT: s_waitcnt lgkmcnt(0) 6827; GFX9-NEXT: global_load_dword v1, v0, s[10:11] glc 6828; GFX9-NEXT: s_waitcnt vmcnt(0) 6829; GFX9-NEXT: global_load_dword v2, v0, s[12:13] glc 6830; GFX9-NEXT: s_waitcnt vmcnt(0) 6831; GFX9-NEXT: global_load_dword v3, v0, s[14:15] glc 6832; GFX9-NEXT: s_waitcnt vmcnt(0) 6833; GFX9-NEXT: v_add_f32_e32 v1, 1.0, v1 6834; GFX9-NEXT: v_add_f32_e32 v2, 2.0, v2 6835; GFX9-NEXT: v_add_f32_e32 v3, 4.0, v3 6836; GFX9-NEXT: v_med3_f32 v1, v1, v2, v3 6837; GFX9-NEXT: global_store_dword v0, v1, s[8:9] 6838; GFX9-NEXT: s_endpgm 6839; 6840; GFX11-LABEL: v_nnan_inputs_missing2_med3_f32_pat0: 6841; GFX11: ; %bb.0: 6842; GFX11-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 6843; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 6844; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 6845; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 6846; GFX11-NEXT: s_waitcnt lgkmcnt(0) 6847; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc 6848; GFX11-NEXT: s_waitcnt vmcnt(0) 6849; GFX11-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc 6850; GFX11-NEXT: s_waitcnt vmcnt(0) 6851; GFX11-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc 6852; GFX11-NEXT: s_waitcnt vmcnt(0) 6853; GFX11-NEXT: v_dual_add_f32 v1, 1.0, v1 :: v_dual_add_f32 v2, 2.0, v2 6854; GFX11-NEXT: v_add_f32_e32 v3, 4.0, v3 6855; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 6856; GFX11-NEXT: v_med3_f32 v1, v1, v2, v3 6857; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 6858; GFX11-NEXT: s_endpgm 6859 %tid = call i32 @llvm.amdgcn.workitem.id.x() 6860 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid 6861 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid 6862 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid 6863 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid 6864 %a = load volatile float, ptr addrspace(1) %gep0 6865 %b = load volatile float, ptr addrspace(1) %gep1 6866 %c = load volatile float, ptr addrspace(1) %gep2 6867 6868 %a.nnan = fadd nnan float %a, 1.0 6869 %b.nnan = fadd nnan float %b, 2.0 6870 %c.nnan = fadd float %c, 4.0 6871 6872 %tmp0 = call float @llvm.minnum.f32(float %a.nnan, float %b.nnan) 6873 %tmp1 = call float @llvm.maxnum.f32(float %a.nnan, float %b.nnan) 6874 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.nnan) 6875 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2) 6876 store float %med3, ptr addrspace(1) %outgep 6877 ret void 6878} 6879 6880define amdgpu_kernel void @v_test_nnan_on_call_med3_f32_pat0_srcmod0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 { 6881; SI-SDAG-LABEL: v_test_nnan_on_call_med3_f32_pat0_srcmod0: 6882; SI-SDAG: ; %bb.0: 6883; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 6884; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000 6885; SI-SDAG-NEXT: s_mov_b32 s10, 0 6886; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 6887; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 6888; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11] 6889; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11] 6890; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 6891; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3] 6892; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5] 6893; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7] 6894; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 6895; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 6896; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc 6897; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 6898; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc 6899; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 6900; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11] 6901; SI-SDAG-NEXT: v_med3_f32 v2, -v2, v3, v4 6902; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 6903; SI-SDAG-NEXT: s_endpgm 6904; 6905; SI-GISEL-LABEL: v_test_nnan_on_call_med3_f32_pat0_srcmod0: 6906; SI-GISEL: ; %bb.0: 6907; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 6908; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 6909; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 6910; SI-GISEL-NEXT: s_mov_b32 s10, 0 6911; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000 6912; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 6913; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3] 6914; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 6915; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 6916; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5] 6917; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc 6918; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 6919; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7] 6920; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc 6921; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 6922; SI-GISEL-NEXT: v_mul_f32_e32 v2, -1.0, v2 6923; SI-GISEL-NEXT: v_med3_f32 v2, v2, v3, v4 6924; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11] 6925; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 6926; SI-GISEL-NEXT: s_endpgm 6927; 6928; VI-SDAG-LABEL: v_test_nnan_on_call_med3_f32_pat0_srcmod0: 6929; VI-SDAG: ; %bb.0: 6930; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 6931; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0 6932; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 6933; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 6934; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6 6935; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 6936; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5 6937; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6 6938; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 6939; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7 6940; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6 6941; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 6942; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc 6943; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 6944; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc 6945; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 6946; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc 6947; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 6948; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 6949; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6 6950; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 6951; VI-SDAG-NEXT: v_med3_f32 v2, -v7, v2, v3 6952; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 6953; VI-SDAG-NEXT: s_endpgm 6954; 6955; VI-GISEL-LABEL: v_test_nnan_on_call_med3_f32_pat0_srcmod0: 6956; VI-GISEL: ; %bb.0: 6957; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 6958; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0 6959; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 6960; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 6961; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 6962; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 6963; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 6964; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4 6965; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5 6966; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6 6967; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 6968; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6 6969; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7 6970; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6 6971; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 6972; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc 6973; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 6974; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc 6975; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 6976; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc 6977; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 6978; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 6979; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 6980; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 6981; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 6982; VI-GISEL-NEXT: v_mul_f32_e32 v4, -1.0, v7 6983; VI-GISEL-NEXT: v_med3_f32 v2, v4, v2, v3 6984; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 6985; VI-GISEL-NEXT: s_endpgm 6986; 6987; GFX9-SDAG-LABEL: v_test_nnan_on_call_med3_f32_pat0_srcmod0: 6988; GFX9-SDAG: ; %bb.0: 6989; GFX9-SDAG-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 6990; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 6991; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) 6992; GFX9-SDAG-NEXT: global_load_dword v1, v0, s[10:11] glc 6993; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 6994; GFX9-SDAG-NEXT: global_load_dword v2, v0, s[12:13] glc 6995; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 6996; GFX9-SDAG-NEXT: global_load_dword v3, v0, s[14:15] glc 6997; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 6998; GFX9-SDAG-NEXT: v_med3_f32 v1, -v1, v2, v3 6999; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[8:9] 7000; GFX9-SDAG-NEXT: s_endpgm 7001; 7002; GFX9-GISEL-LABEL: v_test_nnan_on_call_med3_f32_pat0_srcmod0: 7003; GFX9-GISEL: ; %bb.0: 7004; GFX9-GISEL-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 7005; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 7006; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 7007; GFX9-GISEL-NEXT: global_load_dword v1, v0, s[10:11] glc 7008; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 7009; GFX9-GISEL-NEXT: global_load_dword v2, v0, s[12:13] glc 7010; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 7011; GFX9-GISEL-NEXT: global_load_dword v3, v0, s[14:15] glc 7012; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 7013; GFX9-GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1 7014; GFX9-GISEL-NEXT: v_med3_f32 v1, v1, v2, v3 7015; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[8:9] 7016; GFX9-GISEL-NEXT: s_endpgm 7017; 7018; GFX11-SDAG-LABEL: v_test_nnan_on_call_med3_f32_pat0_srcmod0: 7019; GFX11-SDAG: ; %bb.0: 7020; GFX11-SDAG-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 7021; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 7022; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 7023; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 7024; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 7025; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc 7026; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 7027; GFX11-SDAG-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc 7028; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 7029; GFX11-SDAG-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc 7030; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 7031; GFX11-SDAG-NEXT: v_med3_f32 v1, -v1, v2, v3 7032; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1] 7033; GFX11-SDAG-NEXT: s_endpgm 7034; 7035; GFX11-GISEL-LABEL: v_test_nnan_on_call_med3_f32_pat0_srcmod0: 7036; GFX11-GISEL: ; %bb.0: 7037; GFX11-GISEL-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 7038; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 7039; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 7040; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 7041; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 7042; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc 7043; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 7044; GFX11-GISEL-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc 7045; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 7046; GFX11-GISEL-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc 7047; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 7048; GFX11-GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1 7049; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 7050; GFX11-GISEL-NEXT: v_med3_f32 v1, v1, v2, v3 7051; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1] 7052; GFX11-GISEL-NEXT: s_endpgm 7053 %tid = call i32 @llvm.amdgcn.workitem.id.x() 7054 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid 7055 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid 7056 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid 7057 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid 7058 %a = load volatile float, ptr addrspace(1) %gep0 7059 %b = load volatile float, ptr addrspace(1) %gep1 7060 %c = load volatile float, ptr addrspace(1) %gep2 7061 %a.fneg = fsub float -0.0, %a 7062 %tmp0 = call nnan float @llvm.minnum.f32(float %a.fneg, float %b) 7063 %tmp1 = call nnan float @llvm.maxnum.f32(float %a.fneg, float %b) 7064 %tmp2 = call nnan float @llvm.minnum.f32(float %tmp1, float %c) 7065 %med3 = call nnan float @llvm.maxnum.f32(float %tmp0, float %tmp2) 7066 store float %med3, ptr addrspace(1) %outgep 7067 ret void 7068} 7069 7070define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod0_mismatch(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 { 7071; SI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod0_mismatch: 7072; SI-SDAG: ; %bb.0: 7073; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 7074; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000 7075; SI-SDAG-NEXT: s_mov_b32 s10, 0 7076; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 7077; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 7078; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11] 7079; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 7080; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3] 7081; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5] 7082; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 7083; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 7084; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc 7085; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 7086; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[6:7] 7087; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[12:15], 0 addr64 glc 7088; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 7089; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11] 7090; SI-SDAG-NEXT: v_min_f32_e64 v5, -v2, v3 7091; SI-SDAG-NEXT: v_max_f32_e32 v2, v2, v3 7092; SI-SDAG-NEXT: v_min_f32_e32 v2, v2, v4 7093; SI-SDAG-NEXT: v_max_f32_e32 v2, v5, v2 7094; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 7095; SI-SDAG-NEXT: s_endpgm 7096; 7097; SI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod0_mismatch: 7098; SI-GISEL: ; %bb.0: 7099; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 7100; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 7101; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 7102; SI-GISEL-NEXT: s_mov_b32 s10, 0 7103; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000 7104; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 7105; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3] 7106; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 7107; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 7108; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5] 7109; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc 7110; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 7111; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7] 7112; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc 7113; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 7114; SI-GISEL-NEXT: v_mul_f32_e32 v5, -1.0, v2 7115; SI-GISEL-NEXT: v_min_f32_e32 v5, v5, v3 7116; SI-GISEL-NEXT: v_max_f32_e32 v2, v2, v3 7117; SI-GISEL-NEXT: v_min_f32_e32 v2, v2, v4 7118; SI-GISEL-NEXT: v_max_f32_e32 v2, v5, v2 7119; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11] 7120; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 7121; SI-GISEL-NEXT: s_endpgm 7122; 7123; VI-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod0_mismatch: 7124; VI-SDAG: ; %bb.0: 7125; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 7126; VI-SDAG-NEXT: v_lshlrev_b32_e32 v4, 2, v0 7127; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 7128; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 7129; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v4 7130; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 7131; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5 7132; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v4 7133; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 7134; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7 7135; VI-SDAG-NEXT: flat_load_dword v6, v[0:1] glc 7136; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 7137; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc 7138; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 7139; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s6, v4 7140; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v5, vcc 7141; VI-SDAG-NEXT: flat_load_dword v3, v[0:1] glc 7142; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 7143; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v4 7144; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 7145; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 7146; VI-SDAG-NEXT: v_min_f32_e64 v4, -v6, v2 7147; VI-SDAG-NEXT: v_max_f32_e32 v2, v6, v2 7148; VI-SDAG-NEXT: v_min_f32_e32 v2, v2, v3 7149; VI-SDAG-NEXT: v_max_f32_e32 v2, v4, v2 7150; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 7151; VI-SDAG-NEXT: s_endpgm 7152; 7153; VI-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod0_mismatch: 7154; VI-GISEL: ; %bb.0: 7155; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 7156; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0 7157; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 7158; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 7159; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 7160; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 7161; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 7162; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4 7163; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5 7164; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6 7165; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 7166; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6 7167; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7 7168; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc 7169; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 7170; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc 7171; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 7172; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v4, v6 7173; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v5, vcc 7174; VI-GISEL-NEXT: flat_load_dword v3, v[0:1] glc 7175; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 7176; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 7177; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 7178; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 7179; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 7180; VI-GISEL-NEXT: v_mul_f32_e32 v4, -1.0, v7 7181; VI-GISEL-NEXT: v_max_f32_e32 v5, v7, v2 7182; VI-GISEL-NEXT: v_min_f32_e32 v2, v4, v2 7183; VI-GISEL-NEXT: v_min_f32_e32 v3, v5, v3 7184; VI-GISEL-NEXT: v_max_f32_e32 v2, v2, v3 7185; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 7186; VI-GISEL-NEXT: s_endpgm 7187; 7188; GFX9-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod0_mismatch: 7189; GFX9-SDAG: ; %bb.0: 7190; GFX9-SDAG-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 7191; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 7192; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) 7193; GFX9-SDAG-NEXT: global_load_dword v1, v0, s[10:11] glc 7194; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 7195; GFX9-SDAG-NEXT: global_load_dword v2, v0, s[12:13] glc 7196; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 7197; GFX9-SDAG-NEXT: global_load_dword v3, v0, s[14:15] glc 7198; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 7199; GFX9-SDAG-NEXT: v_min_f32_e64 v4, -v1, v2 7200; GFX9-SDAG-NEXT: v_max_f32_e32 v1, v1, v2 7201; GFX9-SDAG-NEXT: v_min_f32_e32 v1, v1, v3 7202; GFX9-SDAG-NEXT: v_max_f32_e32 v1, v4, v1 7203; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[8:9] 7204; GFX9-SDAG-NEXT: s_endpgm 7205; 7206; GFX9-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod0_mismatch: 7207; GFX9-GISEL: ; %bb.0: 7208; GFX9-GISEL-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 7209; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 7210; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 7211; GFX9-GISEL-NEXT: global_load_dword v1, v0, s[10:11] glc 7212; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 7213; GFX9-GISEL-NEXT: global_load_dword v2, v0, s[12:13] glc 7214; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 7215; GFX9-GISEL-NEXT: global_load_dword v3, v0, s[14:15] glc 7216; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 7217; GFX9-GISEL-NEXT: v_max_f32_e64 v4, -v1, -v1 7218; GFX9-GISEL-NEXT: v_max_f32_e32 v1, v1, v2 7219; GFX9-GISEL-NEXT: v_min_f32_e32 v2, v4, v2 7220; GFX9-GISEL-NEXT: v_min_f32_e32 v1, v1, v3 7221; GFX9-GISEL-NEXT: v_max_f32_e32 v1, v2, v1 7222; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[8:9] 7223; GFX9-GISEL-NEXT: s_endpgm 7224; 7225; GFX11-SDAG-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod0_mismatch: 7226; GFX11-SDAG: ; %bb.0: 7227; GFX11-SDAG-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 7228; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 7229; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 7230; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 7231; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 7232; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc 7233; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 7234; GFX11-SDAG-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc 7235; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 7236; GFX11-SDAG-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc 7237; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 7238; GFX11-SDAG-NEXT: v_maxmin_f32 v3, v1, v2, v3 7239; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 7240; GFX11-SDAG-NEXT: v_minmax_f32 v1, -v1, v2, v3 7241; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1] 7242; GFX11-SDAG-NEXT: s_endpgm 7243; 7244; GFX11-GISEL-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod0_mismatch: 7245; GFX11-GISEL: ; %bb.0: 7246; GFX11-GISEL-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 7247; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 7248; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 7249; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 7250; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 7251; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc 7252; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 7253; GFX11-GISEL-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc 7254; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 7255; GFX11-GISEL-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc 7256; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 7257; GFX11-GISEL-NEXT: v_max_f32_e64 v4, -v1, -v1 7258; GFX11-GISEL-NEXT: v_max_f32_e32 v1, v1, v2 7259; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 7260; GFX11-GISEL-NEXT: v_min_f32_e32 v4, v4, v2 7261; GFX11-GISEL-NEXT: v_minmax_f32 v1, v1, v3, v4 7262; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1] 7263; GFX11-GISEL-NEXT: s_endpgm 7264 %tid = call i32 @llvm.amdgcn.workitem.id.x() 7265 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid 7266 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid 7267 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid 7268 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid 7269 %a = load volatile float, ptr addrspace(1) %gep0 7270 %b = load volatile float, ptr addrspace(1) %gep1 7271 %c = load volatile float, ptr addrspace(1) %gep2 7272 %a.fneg = fsub float -0.0, %a 7273 %tmp0 = call float @llvm.minnum.f32(float %a.fneg, float %b) 7274 %tmp1 = call float @llvm.maxnum.f32(float %a, float %b) 7275 %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c) 7276 %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2) 7277 store float %med3, ptr addrspace(1) %outgep 7278 ret void 7279} 7280 7281; A simple min and max is not sufficient 7282define amdgpu_kernel void @v_test_global_nnans_min_max_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 { 7283; SI-SDAG-LABEL: v_test_global_nnans_min_max_f32: 7284; SI-SDAG: ; %bb.0: 7285; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 7286; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000 7287; SI-SDAG-NEXT: s_mov_b32 s10, 0 7288; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 7289; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 7290; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11] 7291; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11] 7292; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 7293; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3] 7294; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5] 7295; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7] 7296; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 7297; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 7298; SI-SDAG-NEXT: buffer_load_dword v3, v[0:1], s[12:15], 0 addr64 glc 7299; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 7300; SI-SDAG-NEXT: buffer_load_dword v4, v[0:1], s[16:19], 0 addr64 glc 7301; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 7302; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11] 7303; SI-SDAG-NEXT: v_max_f32_e32 v2, v2, v3 7304; SI-SDAG-NEXT: v_min_f32_e32 v2, v2, v4 7305; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 7306; SI-SDAG-NEXT: s_endpgm 7307; 7308; SI-GISEL-LABEL: v_test_global_nnans_min_max_f32: 7309; SI-GISEL: ; %bb.0: 7310; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 7311; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 7312; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 7313; SI-GISEL-NEXT: s_mov_b32 s10, 0 7314; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000 7315; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 7316; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3] 7317; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 7318; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 7319; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5] 7320; SI-GISEL-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 glc 7321; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 7322; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7] 7323; SI-GISEL-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 glc 7324; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 7325; SI-GISEL-NEXT: v_max_f32_e32 v2, v2, v3 7326; SI-GISEL-NEXT: v_min_f32_e32 v2, v2, v4 7327; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11] 7328; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 7329; SI-GISEL-NEXT: s_endpgm 7330; 7331; VI-SDAG-LABEL: v_test_global_nnans_min_max_f32: 7332; VI-SDAG: ; %bb.0: 7333; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 7334; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 2, v0 7335; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 7336; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 7337; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6 7338; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 7339; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5 7340; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6 7341; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 7342; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7 7343; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6 7344; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 7345; VI-SDAG-NEXT: flat_load_dword v7, v[0:1] glc 7346; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 7347; VI-SDAG-NEXT: flat_load_dword v2, v[2:3] glc 7348; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 7349; VI-SDAG-NEXT: flat_load_dword v3, v[4:5] glc 7350; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 7351; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 7352; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6 7353; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 7354; VI-SDAG-NEXT: v_max_f32_e32 v2, v7, v2 7355; VI-SDAG-NEXT: v_min_f32_e32 v2, v2, v3 7356; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 7357; VI-SDAG-NEXT: s_endpgm 7358; 7359; VI-GISEL-LABEL: v_test_global_nnans_min_max_f32: 7360; VI-GISEL: ; %bb.0: 7361; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 7362; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 2, v0 7363; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 7364; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 7365; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 7366; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 7367; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 7368; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4 7369; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5 7370; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6 7371; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 7372; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6 7373; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7 7374; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6 7375; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 7376; VI-GISEL-NEXT: flat_load_dword v7, v[0:1] glc 7377; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 7378; VI-GISEL-NEXT: flat_load_dword v2, v[2:3] glc 7379; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 7380; VI-GISEL-NEXT: flat_load_dword v3, v[4:5] glc 7381; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 7382; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 7383; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 7384; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 7385; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 7386; VI-GISEL-NEXT: v_max_f32_e32 v2, v7, v2 7387; VI-GISEL-NEXT: v_min_f32_e32 v2, v2, v3 7388; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 7389; VI-GISEL-NEXT: s_endpgm 7390; 7391; GFX9-LABEL: v_test_global_nnans_min_max_f32: 7392; GFX9: ; %bb.0: 7393; GFX9-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 7394; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 7395; GFX9-NEXT: s_waitcnt lgkmcnt(0) 7396; GFX9-NEXT: global_load_dword v1, v0, s[10:11] glc 7397; GFX9-NEXT: s_waitcnt vmcnt(0) 7398; GFX9-NEXT: global_load_dword v2, v0, s[12:13] glc 7399; GFX9-NEXT: s_waitcnt vmcnt(0) 7400; GFX9-NEXT: global_load_dword v3, v0, s[14:15] glc 7401; GFX9-NEXT: s_waitcnt vmcnt(0) 7402; GFX9-NEXT: v_max_f32_e32 v1, v1, v2 7403; GFX9-NEXT: v_min_f32_e32 v1, v1, v3 7404; GFX9-NEXT: global_store_dword v0, v1, s[8:9] 7405; GFX9-NEXT: s_endpgm 7406; 7407; GFX11-LABEL: v_test_global_nnans_min_max_f32: 7408; GFX11: ; %bb.0: 7409; GFX11-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 7410; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 7411; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 7412; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 7413; GFX11-NEXT: s_waitcnt lgkmcnt(0) 7414; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] glc dlc 7415; GFX11-NEXT: s_waitcnt vmcnt(0) 7416; GFX11-NEXT: global_load_b32 v2, v0, s[4:5] glc dlc 7417; GFX11-NEXT: s_waitcnt vmcnt(0) 7418; GFX11-NEXT: global_load_b32 v3, v0, s[6:7] glc dlc 7419; GFX11-NEXT: s_waitcnt vmcnt(0) 7420; GFX11-NEXT: v_maxmin_f32 v1, v1, v2, v3 7421; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 7422; GFX11-NEXT: s_endpgm 7423 %tid = call i32 @llvm.amdgcn.workitem.id.x() 7424 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid 7425 %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid 7426 %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid 7427 %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid 7428 %a = load volatile float, ptr addrspace(1) %gep0 7429 %b = load volatile float, ptr addrspace(1) %gep1 7430 %c = load volatile float, ptr addrspace(1) %gep2 7431 %max = call float @llvm.maxnum.f32(float %a, float %b) 7432 %minmax = call float @llvm.minnum.f32(float %max, float %c) 7433 store float %minmax, ptr addrspace(1) %outgep 7434 ret void 7435} 7436 7437define amdgpu_kernel void @v_test_nnan_input_fmed3_r_i_i_f16(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 { 7438; SI-SDAG-LABEL: v_test_nnan_input_fmed3_r_i_i_f16: 7439; SI-SDAG: ; %bb.0: 7440; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 7441; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 7442; SI-SDAG-NEXT: s_mov_b32 s6, 0 7443; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0 7444; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 7445; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 7446; SI-SDAG-NEXT: s_mov_b64 s[4:5], s[2:3] 7447; SI-SDAG-NEXT: buffer_load_ushort v2, v[0:1], s[4:7], 0 addr64 7448; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[6:7] 7449; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 7450; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 7451; SI-SDAG-NEXT: v_add_f32_e32 v2, 1.0, v2 7452; SI-SDAG-NEXT: v_med3_f32 v2, v2, 2.0, 4.0 7453; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 7454; SI-SDAG-NEXT: buffer_store_short v2, v[0:1], s[0:3], 0 addr64 7455; SI-SDAG-NEXT: s_endpgm 7456; 7457; SI-GISEL-LABEL: v_test_nnan_input_fmed3_r_i_i_f16: 7458; SI-GISEL: ; %bb.0: 7459; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 7460; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 7461; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 7462; SI-GISEL-NEXT: s_mov_b32 s6, 0 7463; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 7464; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 7465; SI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3] 7466; SI-GISEL-NEXT: buffer_load_ushort v2, v[0:1], s[4:7], 0 addr64 7467; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v3, 1.0 7468; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v4, 2.0 7469; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 7470; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 7471; SI-GISEL-NEXT: v_add_f32_e32 v2, v2, v3 7472; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2 7473; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 7474; SI-GISEL-NEXT: v_max_f32_e32 v2, v2, v4 7475; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2 7476; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 7477; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v3, 4.0 7478; SI-GISEL-NEXT: v_min_f32_e32 v2, v2, v3 7479; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2 7480; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7] 7481; SI-GISEL-NEXT: buffer_store_short v2, v[0:1], s[0:3], 0 addr64 7482; SI-GISEL-NEXT: s_endpgm 7483; 7484; VI-SDAG-LABEL: v_test_nnan_input_fmed3_r_i_i_f16: 7485; VI-SDAG: ; %bb.0: 7486; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 7487; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 1, v0 7488; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 7489; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 7490; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v2 7491; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 7492; VI-SDAG-NEXT: flat_load_ushort v3, v[0:1] 7493; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v2 7494; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 7495; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 7496; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 7497; VI-SDAG-NEXT: v_add_f16_e32 v2, 1.0, v3 7498; VI-SDAG-NEXT: v_max_f16_e32 v2, 2.0, v2 7499; VI-SDAG-NEXT: v_min_f16_e32 v2, 4.0, v2 7500; VI-SDAG-NEXT: flat_store_short v[0:1], v2 7501; VI-SDAG-NEXT: s_endpgm 7502; 7503; VI-GISEL-LABEL: v_test_nnan_input_fmed3_r_i_i_f16: 7504; VI-GISEL: ; %bb.0: 7505; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 7506; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 1, v0 7507; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 7508; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 7509; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 7510; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 7511; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 7512; VI-GISEL-NEXT: flat_load_ushort v3, v[0:1] 7513; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 7514; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 7515; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 7516; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 7517; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 7518; VI-GISEL-NEXT: v_add_f16_e32 v2, 1.0, v3 7519; VI-GISEL-NEXT: v_max_f16_e32 v2, 2.0, v2 7520; VI-GISEL-NEXT: v_min_f16_e32 v2, 4.0, v2 7521; VI-GISEL-NEXT: flat_store_short v[0:1], v2 7522; VI-GISEL-NEXT: s_endpgm 7523; 7524; GFX9-LABEL: v_test_nnan_input_fmed3_r_i_i_f16: 7525; GFX9: ; %bb.0: 7526; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 7527; GFX9-NEXT: v_lshlrev_b32_e32 v0, 1, v0 7528; GFX9-NEXT: s_waitcnt lgkmcnt(0) 7529; GFX9-NEXT: global_load_ushort v1, v0, s[2:3] 7530; GFX9-NEXT: s_waitcnt vmcnt(0) 7531; GFX9-NEXT: v_add_f16_e32 v1, 1.0, v1 7532; GFX9-NEXT: v_med3_f16 v1, v1, 2.0, 4.0 7533; GFX9-NEXT: global_store_short v0, v1, s[0:1] 7534; GFX9-NEXT: s_endpgm 7535; 7536; GFX11-SDAG-FAKE16-LABEL: v_test_nnan_input_fmed3_r_i_i_f16: 7537; GFX11-SDAG-FAKE16: ; %bb.0: 7538; GFX11-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 7539; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 7540; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_1) 7541; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 7542; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) 7543; GFX11-SDAG-FAKE16-NEXT: global_load_u16 v1, v0, s[2:3] 7544; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) 7545; GFX11-SDAG-FAKE16-NEXT: v_add_f16_e32 v1, 1.0, v1 7546; GFX11-SDAG-FAKE16-NEXT: v_med3_f16 v1, v1, 2.0, 4.0 7547; GFX11-SDAG-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1] 7548; GFX11-SDAG-FAKE16-NEXT: s_endpgm 7549; 7550; GFX11-GISEL-FAKE16-LABEL: v_test_nnan_input_fmed3_r_i_i_f16: 7551; GFX11-GISEL-FAKE16: ; %bb.0: 7552; GFX11-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 7553; GFX11-GISEL-FAKE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 7554; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_1) 7555; GFX11-GISEL-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 7556; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0) 7557; GFX11-GISEL-FAKE16-NEXT: global_load_u16 v1, v0, s[2:3] 7558; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) 7559; GFX11-GISEL-FAKE16-NEXT: v_add_f16_e32 v1, 1.0, v1 7560; GFX11-GISEL-FAKE16-NEXT: v_med3_f16 v1, v1, 2.0, 4.0 7561; GFX11-GISEL-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1] 7562; GFX11-GISEL-FAKE16-NEXT: s_endpgm 7563; 7564; GFX11-SDAG-TRUE16-LABEL: v_test_nnan_input_fmed3_r_i_i_f16: 7565; GFX11-SDAG-TRUE16: ; %bb.0: 7566; GFX11-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 7567; GFX11-SDAG-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 7568; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_1) 7569; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 7570; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) 7571; GFX11-SDAG-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3] 7572; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) 7573; GFX11-SDAG-TRUE16-NEXT: v_add_f16_e32 v0.l, 1.0, v0.l 7574; GFX11-SDAG-TRUE16-NEXT: v_med3_f16 v0.l, v0.l, 2.0, 4.0 7575; GFX11-SDAG-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] 7576; GFX11-SDAG-TRUE16-NEXT: s_endpgm 7577; 7578; GFX11-GISEL-TRUE16-LABEL: v_test_nnan_input_fmed3_r_i_i_f16: 7579; GFX11-GISEL-TRUE16: ; %bb.0: 7580; GFX11-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 7581; GFX11-GISEL-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 7582; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_1) 7583; GFX11-GISEL-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 7584; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0) 7585; GFX11-GISEL-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3] 7586; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) 7587; GFX11-GISEL-TRUE16-NEXT: v_add_f16_e32 v0.l, 1.0, v0.l 7588; GFX11-GISEL-TRUE16-NEXT: v_med3_f16 v0.l, v0.l, 2.0, 4.0 7589; GFX11-GISEL-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] 7590; GFX11-GISEL-TRUE16-NEXT: s_endpgm 7591 %tid = call i32 @llvm.amdgcn.workitem.id.x() 7592 %gep0 = getelementptr half, ptr addrspace(1) %aptr, i32 %tid 7593 %outgep = getelementptr half, ptr addrspace(1) %out, i32 %tid 7594 %a = load half, ptr addrspace(1) %gep0 7595 %a.add = fadd nnan half %a, 1.0 7596 %max = call half @llvm.maxnum.f16(half %a.add, half 2.0) 7597 %med = call half @llvm.minnum.f16(half %max, half 4.0) 7598 7599 store half %med, ptr addrspace(1) %outgep 7600 ret void 7601} 7602 7603define amdgpu_kernel void @v_nnan_inputs_med3_f16_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 { 7604; SI-SDAG-LABEL: v_nnan_inputs_med3_f16_pat0: 7605; SI-SDAG: ; %bb.0: 7606; SI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 7607; SI-SDAG-NEXT: s_mov_b32 s11, 0xf000 7608; SI-SDAG-NEXT: s_mov_b32 s10, 0 7609; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0 7610; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 7611; SI-SDAG-NEXT: s_mov_b64 s[14:15], s[10:11] 7612; SI-SDAG-NEXT: s_mov_b64 s[18:19], s[10:11] 7613; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 7614; SI-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3] 7615; SI-SDAG-NEXT: s_mov_b64 s[12:13], s[4:5] 7616; SI-SDAG-NEXT: s_mov_b64 s[16:17], s[6:7] 7617; SI-SDAG-NEXT: buffer_load_ushort v2, v[0:1], s[8:11], 0 addr64 glc 7618; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 7619; SI-SDAG-NEXT: buffer_load_ushort v3, v[0:1], s[12:15], 0 addr64 glc 7620; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 7621; SI-SDAG-NEXT: buffer_load_ushort v4, v[0:1], s[16:19], 0 addr64 glc 7622; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 7623; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[10:11] 7624; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2 7625; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3 7626; SI-SDAG-NEXT: v_cvt_f32_f16_e32 v4, v4 7627; SI-SDAG-NEXT: v_add_f32_e32 v2, 1.0, v2 7628; SI-SDAG-NEXT: v_add_f32_e32 v3, 2.0, v3 7629; SI-SDAG-NEXT: v_add_f32_e32 v4, 4.0, v4 7630; SI-SDAG-NEXT: v_med3_f32 v2, v2, v3, v4 7631; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2 7632; SI-SDAG-NEXT: buffer_store_short v2, v[0:1], s[0:3], 0 addr64 7633; SI-SDAG-NEXT: s_endpgm 7634; 7635; SI-GISEL-LABEL: v_nnan_inputs_med3_f16_pat0: 7636; SI-GISEL: ; %bb.0: 7637; SI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 7638; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 7639; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 7640; SI-GISEL-NEXT: s_mov_b32 s10, 0 7641; SI-GISEL-NEXT: s_mov_b32 s11, 0xf000 7642; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v2, 1.0 7643; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v3, 2.0 7644; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 7645; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3] 7646; SI-GISEL-NEXT: buffer_load_ushort v4, v[0:1], s[8:11], 0 addr64 glc 7647; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 7648; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v5, 4.0 7649; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[4:5] 7650; SI-GISEL-NEXT: buffer_load_ushort v6, v[0:1], s[8:11], 0 addr64 glc 7651; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 7652; SI-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7] 7653; SI-GISEL-NEXT: buffer_load_ushort v7, v[0:1], s[8:11], 0 addr64 glc 7654; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 7655; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v4 7656; SI-GISEL-NEXT: v_add_f32_e32 v2, v4, v2 7657; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v6 7658; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2 7659; SI-GISEL-NEXT: v_add_f32_e32 v3, v4, v3 7660; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v4, v7 7661; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v3, v3 7662; SI-GISEL-NEXT: v_add_f32_e32 v4, v4, v5 7663; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 7664; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v4, v4 7665; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3 7666; SI-GISEL-NEXT: v_min_f32_e32 v5, v2, v3 7667; SI-GISEL-NEXT: v_max_f32_e32 v2, v2, v3 7668; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v4 7669; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v4, v5 7670; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2 7671; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 7672; SI-GISEL-NEXT: v_min_f32_e32 v2, v2, v3 7673; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2 7674; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v4 7675; SI-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2 7676; SI-GISEL-NEXT: v_max_f32_e32 v2, v3, v2 7677; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2 7678; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11] 7679; SI-GISEL-NEXT: buffer_store_short v2, v[0:1], s[0:3], 0 addr64 7680; SI-GISEL-NEXT: s_endpgm 7681; 7682; VI-SDAG-LABEL: v_nnan_inputs_med3_f16_pat0: 7683; VI-SDAG: ; %bb.0: 7684; VI-SDAG-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 7685; VI-SDAG-NEXT: v_lshlrev_b32_e32 v6, 1, v0 7686; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 7687; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 7688; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v6 7689; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 7690; VI-SDAG-NEXT: v_mov_b32_e32 v3, s5 7691; VI-SDAG-NEXT: v_add_u32_e32 v2, vcc, s4, v6 7692; VI-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 7693; VI-SDAG-NEXT: v_mov_b32_e32 v5, s7 7694; VI-SDAG-NEXT: v_add_u32_e32 v4, vcc, s6, v6 7695; VI-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 7696; VI-SDAG-NEXT: flat_load_ushort v7, v[0:1] glc 7697; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 7698; VI-SDAG-NEXT: flat_load_ushort v2, v[2:3] glc 7699; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 7700; VI-SDAG-NEXT: flat_load_ushort v3, v[4:5] glc 7701; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 7702; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 7703; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v6 7704; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 7705; VI-SDAG-NEXT: v_add_f16_e32 v4, 1.0, v7 7706; VI-SDAG-NEXT: v_add_f16_e32 v2, 2.0, v2 7707; VI-SDAG-NEXT: v_add_f16_e32 v3, 4.0, v3 7708; VI-SDAG-NEXT: v_min_f16_e32 v5, v4, v2 7709; VI-SDAG-NEXT: v_max_f16_e32 v2, v4, v2 7710; VI-SDAG-NEXT: v_min_f16_e32 v2, v2, v3 7711; VI-SDAG-NEXT: v_max_f16_e32 v2, v5, v2 7712; VI-SDAG-NEXT: flat_store_short v[0:1], v2 7713; VI-SDAG-NEXT: s_endpgm 7714; 7715; VI-GISEL-LABEL: v_nnan_inputs_med3_f16_pat0: 7716; VI-GISEL: ; %bb.0: 7717; VI-GISEL-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 7718; VI-GISEL-NEXT: v_lshlrev_b32_e32 v6, 1, v0 7719; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 7720; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 7721; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 7722; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 7723; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 7724; VI-GISEL-NEXT: v_mov_b32_e32 v2, s4 7725; VI-GISEL-NEXT: v_mov_b32_e32 v3, s5 7726; VI-GISEL-NEXT: v_add_u32_e32 v2, vcc, v2, v6 7727; VI-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 7728; VI-GISEL-NEXT: v_mov_b32_e32 v4, s6 7729; VI-GISEL-NEXT: v_mov_b32_e32 v5, s7 7730; VI-GISEL-NEXT: v_add_u32_e32 v4, vcc, v4, v6 7731; VI-GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 7732; VI-GISEL-NEXT: flat_load_ushort v7, v[0:1] glc 7733; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 7734; VI-GISEL-NEXT: flat_load_ushort v2, v[2:3] glc 7735; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 7736; VI-GISEL-NEXT: flat_load_ushort v3, v[4:5] glc 7737; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 7738; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 7739; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 7740; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v6 7741; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 7742; VI-GISEL-NEXT: v_add_f16_e32 v4, 1.0, v7 7743; VI-GISEL-NEXT: v_add_f16_e32 v2, 2.0, v2 7744; VI-GISEL-NEXT: v_add_f16_e32 v3, 4.0, v3 7745; VI-GISEL-NEXT: v_min_f16_e32 v5, v4, v2 7746; VI-GISEL-NEXT: v_max_f16_e32 v2, v4, v2 7747; VI-GISEL-NEXT: v_min_f16_e32 v2, v2, v3 7748; VI-GISEL-NEXT: v_max_f16_e32 v2, v5, v2 7749; VI-GISEL-NEXT: flat_store_short v[0:1], v2 7750; VI-GISEL-NEXT: s_endpgm 7751; 7752; GFX9-LABEL: v_nnan_inputs_med3_f16_pat0: 7753; GFX9: ; %bb.0: 7754; GFX9-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 7755; GFX9-NEXT: v_lshlrev_b32_e32 v0, 1, v0 7756; GFX9-NEXT: s_waitcnt lgkmcnt(0) 7757; GFX9-NEXT: global_load_ushort v1, v0, s[10:11] glc 7758; GFX9-NEXT: s_waitcnt vmcnt(0) 7759; GFX9-NEXT: global_load_ushort v2, v0, s[12:13] glc 7760; GFX9-NEXT: s_waitcnt vmcnt(0) 7761; GFX9-NEXT: global_load_ushort v3, v0, s[14:15] glc 7762; GFX9-NEXT: s_waitcnt vmcnt(0) 7763; GFX9-NEXT: v_add_f16_e32 v1, 1.0, v1 7764; GFX9-NEXT: v_add_f16_e32 v2, 2.0, v2 7765; GFX9-NEXT: v_add_f16_e32 v3, 4.0, v3 7766; GFX9-NEXT: v_med3_f16 v1, v1, v2, v3 7767; GFX9-NEXT: global_store_short v0, v1, s[8:9] 7768; GFX9-NEXT: s_endpgm 7769; 7770; GFX11-SDAG-FAKE16-LABEL: v_nnan_inputs_med3_f16_pat0: 7771; GFX11-SDAG-FAKE16: ; %bb.0: 7772; GFX11-SDAG-FAKE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 7773; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 7774; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) 7775; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 7776; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) 7777; GFX11-SDAG-FAKE16-NEXT: global_load_u16 v1, v0, s[2:3] glc dlc 7778; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) 7779; GFX11-SDAG-FAKE16-NEXT: global_load_u16 v2, v0, s[4:5] glc dlc 7780; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) 7781; GFX11-SDAG-FAKE16-NEXT: global_load_u16 v3, v0, s[6:7] glc dlc 7782; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) 7783; GFX11-SDAG-FAKE16-NEXT: v_add_f16_e32 v1, 1.0, v1 7784; GFX11-SDAG-FAKE16-NEXT: v_add_f16_e32 v2, 2.0, v2 7785; GFX11-SDAG-FAKE16-NEXT: v_add_f16_e32 v3, 4.0, v3 7786; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) 7787; GFX11-SDAG-FAKE16-NEXT: v_med3_f16 v1, v1, v2, v3 7788; GFX11-SDAG-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1] 7789; GFX11-SDAG-FAKE16-NEXT: s_endpgm 7790; 7791; GFX11-GISEL-FAKE16-LABEL: v_nnan_inputs_med3_f16_pat0: 7792; GFX11-GISEL-FAKE16: ; %bb.0: 7793; GFX11-GISEL-FAKE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 7794; GFX11-GISEL-FAKE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 7795; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) 7796; GFX11-GISEL-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 7797; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0) 7798; GFX11-GISEL-FAKE16-NEXT: global_load_u16 v1, v0, s[2:3] glc dlc 7799; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) 7800; GFX11-GISEL-FAKE16-NEXT: global_load_u16 v2, v0, s[4:5] glc dlc 7801; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) 7802; GFX11-GISEL-FAKE16-NEXT: global_load_u16 v3, v0, s[6:7] glc dlc 7803; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) 7804; GFX11-GISEL-FAKE16-NEXT: v_add_f16_e32 v1, 1.0, v1 7805; GFX11-GISEL-FAKE16-NEXT: v_add_f16_e32 v2, 2.0, v2 7806; GFX11-GISEL-FAKE16-NEXT: v_add_f16_e32 v3, 4.0, v3 7807; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) 7808; GFX11-GISEL-FAKE16-NEXT: v_med3_f16 v1, v1, v2, v3 7809; GFX11-GISEL-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1] 7810; GFX11-GISEL-FAKE16-NEXT: s_endpgm 7811; 7812; GFX11-SDAG-TRUE16-LABEL: v_nnan_inputs_med3_f16_pat0: 7813; GFX11-SDAG-TRUE16: ; %bb.0: 7814; GFX11-SDAG-TRUE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 7815; GFX11-SDAG-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 7816; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) 7817; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b32_e32 v2, 1, v0 7818; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) 7819; GFX11-SDAG-TRUE16-NEXT: global_load_u16 v0, v2, s[2:3] glc dlc 7820; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) 7821; GFX11-SDAG-TRUE16-NEXT: global_load_u16 v1, v2, s[4:5] glc dlc 7822; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) 7823; GFX11-SDAG-TRUE16-NEXT: global_load_u16 v3, v2, s[6:7] glc dlc 7824; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) 7825; GFX11-SDAG-TRUE16-NEXT: v_add_f16_e32 v0.l, 1.0, v0.l 7826; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l 7827; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l 7828; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 7829; GFX11-SDAG-TRUE16-NEXT: v_add_f16_e32 v0.h, 2.0, v0.h 7830; GFX11-SDAG-TRUE16-NEXT: v_add_f16_e32 v1.l, 4.0, v1.l 7831; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) 7832; GFX11-SDAG-TRUE16-NEXT: v_med3_f16 v0.l, v0.l, v0.h, v1.l 7833; GFX11-SDAG-TRUE16-NEXT: global_store_b16 v2, v0, s[0:1] 7834; GFX11-SDAG-TRUE16-NEXT: s_endpgm 7835; 7836; GFX11-GISEL-TRUE16-LABEL: v_nnan_inputs_med3_f16_pat0: 7837; GFX11-GISEL-TRUE16: ; %bb.0: 7838; GFX11-GISEL-TRUE16-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 7839; GFX11-GISEL-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 7840; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) 7841; GFX11-GISEL-TRUE16-NEXT: v_lshlrev_b32_e32 v2, 1, v0 7842; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0) 7843; GFX11-GISEL-TRUE16-NEXT: global_load_u16 v0, v2, s[2:3] glc dlc 7844; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) 7845; GFX11-GISEL-TRUE16-NEXT: global_load_u16 v1, v2, s[4:5] glc dlc 7846; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) 7847; GFX11-GISEL-TRUE16-NEXT: global_load_u16 v3, v2, s[6:7] glc dlc 7848; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) 7849; GFX11-GISEL-TRUE16-NEXT: v_add_f16_e32 v0.l, 1.0, v0.l 7850; GFX11-GISEL-TRUE16-NEXT: v_add_f16_e32 v0.h, 2.0, v1.l 7851; GFX11-GISEL-TRUE16-NEXT: v_add_f16_e32 v1.l, 4.0, v3.l 7852; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) 7853; GFX11-GISEL-TRUE16-NEXT: v_med3_f16 v0.l, v0.l, v0.h, v1.l 7854; GFX11-GISEL-TRUE16-NEXT: global_store_b16 v2, v0, s[0:1] 7855; GFX11-GISEL-TRUE16-NEXT: s_endpgm 7856 %tid = call i32 @llvm.amdgcn.workitem.id.x() 7857 %gep0 = getelementptr half, ptr addrspace(1) %aptr, i32 %tid 7858 %gep1 = getelementptr half, ptr addrspace(1) %bptr, i32 %tid 7859 %gep2 = getelementptr half, ptr addrspace(1) %cptr, i32 %tid 7860 %outgep = getelementptr half, ptr addrspace(1) %out, i32 %tid 7861 %a = load volatile half, ptr addrspace(1) %gep0 7862 %b = load volatile half, ptr addrspace(1) %gep1 7863 %c = load volatile half, ptr addrspace(1) %gep2 7864 7865 %a.nnan = fadd nnan half %a, 1.0 7866 %b.nnan = fadd nnan half %b, 2.0 7867 %c.nnan = fadd nnan half %c, 4.0 7868 7869 %tmp0 = call half @llvm.minnum.f16(half %a.nnan, half %b.nnan) 7870 %tmp1 = call half @llvm.maxnum.f16(half %a.nnan, half %b.nnan) 7871 %tmp2 = call half @llvm.minnum.f16(half %tmp1, half %c.nnan) 7872 %med3 = call half @llvm.maxnum.f16(half %tmp0, half %tmp2) 7873 store half %med3, ptr addrspace(1) %outgep 7874 ret void 7875} 7876 7877define amdgpu_kernel void @two_non_inline_constant(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 { 7878; SI-SDAG-LABEL: two_non_inline_constant: 7879; SI-SDAG: ; %bb.0: 7880; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 7881; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 7882; SI-SDAG-NEXT: s_mov_b32 s6, 0 7883; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 7884; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 7885; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 7886; SI-SDAG-NEXT: s_mov_b64 s[4:5], s[2:3] 7887; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 7888; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[6:7] 7889; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 7890; SI-SDAG-NEXT: v_add_f32_e32 v2, 0.5, v2 7891; SI-SDAG-NEXT: v_max_f32_e32 v2, 0x41000000, v2 7892; SI-SDAG-NEXT: v_min_f32_e32 v2, 0x41800000, v2 7893; SI-SDAG-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 7894; SI-SDAG-NEXT: s_endpgm 7895; 7896; SI-GISEL-LABEL: two_non_inline_constant: 7897; SI-GISEL: ; %bb.0: 7898; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 7899; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 7900; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 7901; SI-GISEL-NEXT: s_mov_b32 s6, 0 7902; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 7903; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 7904; SI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3] 7905; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 7906; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 7907; SI-GISEL-NEXT: v_add_f32_e32 v2, 0.5, v2 7908; SI-GISEL-NEXT: v_max_f32_e32 v2, 0x41000000, v2 7909; SI-GISEL-NEXT: v_min_f32_e32 v2, 0x41800000, v2 7910; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7] 7911; SI-GISEL-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 7912; SI-GISEL-NEXT: s_endpgm 7913; 7914; VI-SDAG-LABEL: two_non_inline_constant: 7915; VI-SDAG: ; %bb.0: 7916; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 7917; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0 7918; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 7919; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 7920; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v2 7921; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 7922; VI-SDAG-NEXT: flat_load_dword v3, v[0:1] 7923; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v2 7924; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 7925; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 7926; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 7927; VI-SDAG-NEXT: v_add_f32_e32 v2, 0.5, v3 7928; VI-SDAG-NEXT: v_max_f32_e32 v2, 0x41000000, v2 7929; VI-SDAG-NEXT: v_min_f32_e32 v2, 0x41800000, v2 7930; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 7931; VI-SDAG-NEXT: s_endpgm 7932; 7933; VI-GISEL-LABEL: two_non_inline_constant: 7934; VI-GISEL: ; %bb.0: 7935; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 7936; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v0 7937; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 7938; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 7939; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 7940; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 7941; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 7942; VI-GISEL-NEXT: flat_load_dword v3, v[0:1] 7943; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 7944; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 7945; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 7946; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 7947; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 7948; VI-GISEL-NEXT: v_add_f32_e32 v2, 0.5, v3 7949; VI-GISEL-NEXT: v_max_f32_e32 v2, 0x41000000, v2 7950; VI-GISEL-NEXT: v_min_f32_e32 v2, 0x41800000, v2 7951; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 7952; VI-GISEL-NEXT: s_endpgm 7953; 7954; GFX9-LABEL: two_non_inline_constant: 7955; GFX9: ; %bb.0: 7956; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 7957; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 7958; GFX9-NEXT: s_waitcnt lgkmcnt(0) 7959; GFX9-NEXT: global_load_dword v1, v0, s[2:3] 7960; GFX9-NEXT: s_waitcnt vmcnt(0) 7961; GFX9-NEXT: v_add_f32_e32 v1, 0.5, v1 7962; GFX9-NEXT: v_max_f32_e32 v1, 0x41000000, v1 7963; GFX9-NEXT: v_min_f32_e32 v1, 0x41800000, v1 7964; GFX9-NEXT: global_store_dword v0, v1, s[0:1] 7965; GFX9-NEXT: s_endpgm 7966; 7967; GFX11-SDAG-LABEL: two_non_inline_constant: 7968; GFX11-SDAG: ; %bb.0: 7969; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 7970; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 7971; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 7972; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 7973; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 7974; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3] 7975; GFX11-SDAG-NEXT: s_mov_b32 s2, 0x41000000 7976; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 7977; GFX11-SDAG-NEXT: v_add_f32_e32 v1, 0.5, v1 7978; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 7979; GFX11-SDAG-NEXT: v_maxmin_f32 v1, v1, s2, 0x41800000 7980; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1] 7981; GFX11-SDAG-NEXT: s_endpgm 7982; 7983; GFX11-GISEL-LABEL: two_non_inline_constant: 7984; GFX11-GISEL: ; %bb.0: 7985; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 7986; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 7987; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0x41800000 7988; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_1) 7989; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 7990; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 7991; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3] 7992; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 7993; GFX11-GISEL-NEXT: v_add_f32_e32 v1, 0.5, v1 7994; GFX11-GISEL-NEXT: v_maxmin_f32 v1, v1, 0x41000000, v2 7995; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1] 7996; GFX11-GISEL-NEXT: s_endpgm 7997 %tid = call i32 @llvm.amdgcn.workitem.id.x() 7998 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid 7999 %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid 8000 %a = load float, ptr addrspace(1) %gep0 8001 %add = fadd nnan float %a, 0.5 8002 %max = call float @llvm.maxnum.f32(float %add, float 8.0) 8003 %med = call float @llvm.minnum.f32(float %max, float 16.0) 8004 8005 store float %med, ptr addrspace(1) %out.gep 8006 ret void 8007} 8008 8009; FIXME: Simple stores do not work as a multiple use because they are bitcasted to integer constants. 8010define amdgpu_kernel void @one_non_inline_constant(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 { 8011; SI-SDAG-LABEL: one_non_inline_constant: 8012; SI-SDAG: ; %bb.0: 8013; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 8014; SI-SDAG-NEXT: s_mov_b32 s6, 0 8015; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 8016; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 8017; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 8018; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 8019; SI-SDAG-NEXT: s_mov_b64 s[4:5], s[2:3] 8020; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 8021; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0x41800000 8022; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[6:7] 8023; SI-SDAG-NEXT: s_mov_b32 s6, -1 8024; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 8025; SI-SDAG-NEXT: v_add_f32_e32 v4, 0.5, v2 8026; SI-SDAG-NEXT: v_add_f32_e32 v2, 0x41800000, v2 8027; SI-SDAG-NEXT: v_med3_f32 v3, v4, 1.0, v3 8028; SI-SDAG-NEXT: buffer_store_dword v3, v[0:1], s[0:3], 0 addr64 8029; SI-SDAG-NEXT: buffer_store_dword v2, off, s[4:7], 0 8030; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 8031; SI-SDAG-NEXT: s_endpgm 8032; 8033; SI-GISEL-LABEL: one_non_inline_constant: 8034; SI-GISEL: ; %bb.0: 8035; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 8036; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 8037; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 8038; SI-GISEL-NEXT: s_mov_b32 s6, 0 8039; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 8040; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 8041; SI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3] 8042; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 8043; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x41800000 8044; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7] 8045; SI-GISEL-NEXT: s_mov_b32 s6, -1 8046; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 8047; SI-GISEL-NEXT: v_add_f32_e32 v4, 0.5, v2 8048; SI-GISEL-NEXT: v_add_f32_e32 v2, 0x41800000, v2 8049; SI-GISEL-NEXT: v_med3_f32 v3, v4, 1.0, v3 8050; SI-GISEL-NEXT: buffer_store_dword v3, v[0:1], s[0:3], 0 addr64 8051; SI-GISEL-NEXT: buffer_store_dword v2, off, s[4:7], 0 8052; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 8053; SI-GISEL-NEXT: s_endpgm 8054; 8055; VI-SDAG-LABEL: one_non_inline_constant: 8056; VI-SDAG: ; %bb.0: 8057; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 8058; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0 8059; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0x41800000 8060; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 8061; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 8062; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v2 8063; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 8064; VI-SDAG-NEXT: flat_load_dword v3, v[0:1] 8065; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 8066; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v2 8067; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 8068; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 8069; VI-SDAG-NEXT: v_add_f32_e32 v2, 0.5, v3 8070; VI-SDAG-NEXT: v_med3_f32 v2, v2, 1.0, v4 8071; VI-SDAG-NEXT: v_add_f32_e32 v3, 0x41800000, v3 8072; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 8073; VI-SDAG-NEXT: flat_store_dword v[0:1], v3 8074; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 8075; VI-SDAG-NEXT: s_endpgm 8076; 8077; VI-GISEL-LABEL: one_non_inline_constant: 8078; VI-GISEL: ; %bb.0: 8079; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 8080; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v0 8081; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x41800000 8082; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 8083; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 8084; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 8085; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 8086; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 8087; VI-GISEL-NEXT: flat_load_dword v3, v[0:1] 8088; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 8089; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 8090; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 8091; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 8092; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 8093; VI-GISEL-NEXT: v_add_f32_e32 v2, 0.5, v3 8094; VI-GISEL-NEXT: v_med3_f32 v2, v2, 1.0, v4 8095; VI-GISEL-NEXT: v_add_f32_e32 v3, 0x41800000, v3 8096; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 8097; VI-GISEL-NEXT: flat_store_dword v[0:1], v3 8098; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 8099; VI-GISEL-NEXT: s_endpgm 8100; 8101; GFX9-LABEL: one_non_inline_constant: 8102; GFX9: ; %bb.0: 8103; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 8104; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 8105; GFX9-NEXT: v_mov_b32_e32 v2, 0x41800000 8106; GFX9-NEXT: s_waitcnt lgkmcnt(0) 8107; GFX9-NEXT: global_load_dword v1, v0, s[2:3] 8108; GFX9-NEXT: s_waitcnt vmcnt(0) 8109; GFX9-NEXT: v_add_f32_e32 v3, 0.5, v1 8110; GFX9-NEXT: v_add_f32_e32 v1, 0x41800000, v1 8111; GFX9-NEXT: v_med3_f32 v2, v3, 1.0, v2 8112; GFX9-NEXT: global_store_dword v0, v2, s[0:1] 8113; GFX9-NEXT: global_store_dword v[0:1], v1, off 8114; GFX9-NEXT: s_waitcnt vmcnt(0) 8115; GFX9-NEXT: s_endpgm 8116; 8117; GFX11-LABEL: one_non_inline_constant: 8118; GFX11: ; %bb.0: 8119; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 8120; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 8121; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 8122; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 8123; GFX11-NEXT: s_waitcnt lgkmcnt(0) 8124; GFX11-NEXT: global_load_b32 v1, v0, s[2:3] 8125; GFX11-NEXT: s_waitcnt vmcnt(0) 8126; GFX11-NEXT: v_add_f32_e32 v2, 0.5, v1 8127; GFX11-NEXT: v_add_f32_e32 v1, 0x41800000, v1 8128; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 8129; GFX11-NEXT: v_med3_f32 v2, v2, 1.0, 0x41800000 8130; GFX11-NEXT: global_store_b32 v0, v2, s[0:1] 8131; GFX11-NEXT: global_store_b32 v[0:1], v1, off dlc 8132; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 8133; GFX11-NEXT: s_endpgm 8134 %tid = call i32 @llvm.amdgcn.workitem.id.x() 8135 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid 8136 %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid 8137 %a = load float, ptr addrspace(1) %gep0 8138 %add = fadd nnan float %a, 0.5 8139 %max = call float @llvm.maxnum.f32(float %add, float 1.0) 8140 %med = call float @llvm.minnum.f32(float %max, float 16.0) 8141 8142 store float %med, ptr addrspace(1) %out.gep 8143 8144 %extra.use = fadd float %a, 16.0 8145 store volatile float %extra.use, ptr addrspace(1) undef 8146 ret void 8147} 8148 8149define amdgpu_kernel void @two_non_inline_constant_multi_use(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 { 8150; SI-SDAG-LABEL: two_non_inline_constant_multi_use: 8151; SI-SDAG: ; %bb.0: 8152; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 8153; SI-SDAG-NEXT: s_mov_b32 s6, 0 8154; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 8155; SI-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 8156; SI-SDAG-NEXT: v_mov_b32_e32 v1, 0 8157; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 8158; SI-SDAG-NEXT: s_mov_b64 s[4:5], s[2:3] 8159; SI-SDAG-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 8160; SI-SDAG-NEXT: s_mov_b32 s4, 0x41000000 8161; SI-SDAG-NEXT: v_mov_b32_e32 v3, 0x41800000 8162; SI-SDAG-NEXT: s_mov_b64 s[2:3], s[6:7] 8163; SI-SDAG-NEXT: s_mov_b32 s6, -1 8164; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 8165; SI-SDAG-NEXT: v_add_f32_e32 v4, 0.5, v2 8166; SI-SDAG-NEXT: v_add_f32_e32 v5, 0x41800000, v2 8167; SI-SDAG-NEXT: v_add_f32_e32 v2, 0x41000000, v2 8168; SI-SDAG-NEXT: v_med3_f32 v3, v4, s4, v3 8169; SI-SDAG-NEXT: buffer_store_dword v3, v[0:1], s[0:3], 0 addr64 8170; SI-SDAG-NEXT: buffer_store_dword v5, off, s[4:7], 0 8171; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 8172; SI-SDAG-NEXT: buffer_store_dword v2, off, s[4:7], 0 8173; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 8174; SI-SDAG-NEXT: s_endpgm 8175; 8176; SI-GISEL-LABEL: two_non_inline_constant_multi_use: 8177; SI-GISEL: ; %bb.0: 8178; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 8179; SI-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 8180; SI-GISEL-NEXT: v_mov_b32_e32 v1, 0 8181; SI-GISEL-NEXT: s_mov_b32 s6, 0 8182; SI-GISEL-NEXT: s_mov_b32 s7, 0xf000 8183; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 8184; SI-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3] 8185; SI-GISEL-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 8186; SI-GISEL-NEXT: v_mov_b32_e32 v3, 0x41000000 8187; SI-GISEL-NEXT: v_mov_b32_e32 v4, 0x41800000 8188; SI-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7] 8189; SI-GISEL-NEXT: s_mov_b32 s6, -1 8190; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 8191; SI-GISEL-NEXT: v_add_f32_e32 v5, 0.5, v2 8192; SI-GISEL-NEXT: v_add_f32_e32 v6, 0x41800000, v2 8193; SI-GISEL-NEXT: v_add_f32_e32 v2, 0x41000000, v2 8194; SI-GISEL-NEXT: v_med3_f32 v3, v5, v3, v4 8195; SI-GISEL-NEXT: buffer_store_dword v3, v[0:1], s[0:3], 0 addr64 8196; SI-GISEL-NEXT: buffer_store_dword v6, off, s[4:7], 0 8197; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 8198; SI-GISEL-NEXT: buffer_store_dword v2, off, s[4:7], 0 8199; SI-GISEL-NEXT: s_waitcnt vmcnt(0) 8200; SI-GISEL-NEXT: s_endpgm 8201; 8202; VI-SDAG-LABEL: two_non_inline_constant_multi_use: 8203; VI-SDAG: ; %bb.0: 8204; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 8205; VI-SDAG-NEXT: v_lshlrev_b32_e32 v2, 2, v0 8206; VI-SDAG-NEXT: v_mov_b32_e32 v4, 0x41800000 8207; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 8208; VI-SDAG-NEXT: v_mov_b32_e32 v1, s3 8209; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v2 8210; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 8211; VI-SDAG-NEXT: flat_load_dword v3, v[0:1] 8212; VI-SDAG-NEXT: s_mov_b32 s2, 0x41000000 8213; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 8214; VI-SDAG-NEXT: v_add_u32_e32 v0, vcc, s0, v2 8215; VI-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 8216; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 8217; VI-SDAG-NEXT: v_add_f32_e32 v2, 0.5, v3 8218; VI-SDAG-NEXT: v_med3_f32 v2, v2, s2, v4 8219; VI-SDAG-NEXT: v_add_f32_e32 v5, 0x41800000, v3 8220; VI-SDAG-NEXT: v_add_f32_e32 v3, 0x41000000, v3 8221; VI-SDAG-NEXT: flat_store_dword v[0:1], v2 8222; VI-SDAG-NEXT: flat_store_dword v[0:1], v5 8223; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 8224; VI-SDAG-NEXT: flat_store_dword v[0:1], v3 8225; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 8226; VI-SDAG-NEXT: s_endpgm 8227; 8228; VI-GISEL-LABEL: two_non_inline_constant_multi_use: 8229; VI-GISEL: ; %bb.0: 8230; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 8231; VI-GISEL-NEXT: v_lshlrev_b32_e32 v2, 2, v0 8232; VI-GISEL-NEXT: v_mov_b32_e32 v4, 0x41000000 8233; VI-GISEL-NEXT: v_mov_b32_e32 v5, 0x41800000 8234; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 8235; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 8236; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 8237; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 8238; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 8239; VI-GISEL-NEXT: flat_load_dword v3, v[0:1] 8240; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 8241; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 8242; VI-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 8243; VI-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 8244; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 8245; VI-GISEL-NEXT: v_add_f32_e32 v2, 0.5, v3 8246; VI-GISEL-NEXT: v_med3_f32 v2, v2, v4, v5 8247; VI-GISEL-NEXT: v_add_f32_e32 v6, 0x41800000, v3 8248; VI-GISEL-NEXT: v_add_f32_e32 v3, 0x41000000, v3 8249; VI-GISEL-NEXT: flat_store_dword v[0:1], v2 8250; VI-GISEL-NEXT: flat_store_dword v[0:1], v6 8251; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 8252; VI-GISEL-NEXT: flat_store_dword v[0:1], v3 8253; VI-GISEL-NEXT: s_waitcnt vmcnt(0) 8254; VI-GISEL-NEXT: s_endpgm 8255; 8256; GFX9-SDAG-LABEL: two_non_inline_constant_multi_use: 8257; GFX9-SDAG: ; %bb.0: 8258; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 8259; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 8260; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, 0x41800000 8261; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) 8262; GFX9-SDAG-NEXT: global_load_dword v1, v0, s[2:3] 8263; GFX9-SDAG-NEXT: s_mov_b32 s2, 0x41000000 8264; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 8265; GFX9-SDAG-NEXT: v_add_f32_e32 v3, 0.5, v1 8266; GFX9-SDAG-NEXT: v_add_f32_e32 v4, 0x41800000, v1 8267; GFX9-SDAG-NEXT: v_add_f32_e32 v1, 0x41000000, v1 8268; GFX9-SDAG-NEXT: v_med3_f32 v2, v3, s2, v2 8269; GFX9-SDAG-NEXT: global_store_dword v0, v2, s[0:1] 8270; GFX9-SDAG-NEXT: global_store_dword v[0:1], v4, off 8271; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 8272; GFX9-SDAG-NEXT: global_store_dword v[0:1], v1, off 8273; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 8274; GFX9-SDAG-NEXT: s_endpgm 8275; 8276; GFX9-GISEL-LABEL: two_non_inline_constant_multi_use: 8277; GFX9-GISEL: ; %bb.0: 8278; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 8279; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 8280; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0x41000000 8281; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, 0x41800000 8282; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 8283; GFX9-GISEL-NEXT: global_load_dword v1, v0, s[2:3] 8284; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 8285; GFX9-GISEL-NEXT: v_add_f32_e32 v4, 0.5, v1 8286; GFX9-GISEL-NEXT: v_add_f32_e32 v5, 0x41800000, v1 8287; GFX9-GISEL-NEXT: v_add_f32_e32 v1, 0x41000000, v1 8288; GFX9-GISEL-NEXT: v_med3_f32 v2, v4, v2, v3 8289; GFX9-GISEL-NEXT: global_store_dword v0, v2, s[0:1] 8290; GFX9-GISEL-NEXT: global_store_dword v[0:1], v5, off 8291; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 8292; GFX9-GISEL-NEXT: global_store_dword v[0:1], v1, off 8293; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 8294; GFX9-GISEL-NEXT: s_endpgm 8295; 8296; GFX11-SDAG-LABEL: two_non_inline_constant_multi_use: 8297; GFX11-SDAG: ; %bb.0: 8298; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 8299; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 8300; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 8301; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 8302; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 8303; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3] 8304; GFX11-SDAG-NEXT: s_mov_b32 s2, 0x41000000 8305; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 8306; GFX11-SDAG-NEXT: v_add_f32_e32 v3, 0x41800000, v1 8307; GFX11-SDAG-NEXT: v_add_f32_e32 v2, 0.5, v1 8308; GFX11-SDAG-NEXT: v_add_f32_e32 v1, 0x41000000, v1 8309; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) 8310; GFX11-SDAG-NEXT: v_med3_f32 v2, v2, s2, 0x41800000 8311; GFX11-SDAG-NEXT: global_store_b32 v0, v2, s[0:1] 8312; GFX11-SDAG-NEXT: global_store_b32 v[0:1], v3, off dlc 8313; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 8314; GFX11-SDAG-NEXT: global_store_b32 v[0:1], v1, off dlc 8315; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 8316; GFX11-SDAG-NEXT: s_endpgm 8317; 8318; GFX11-GISEL-LABEL: two_non_inline_constant_multi_use: 8319; GFX11-GISEL: ; %bb.0: 8320; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 8321; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 8322; GFX11-GISEL-NEXT: v_mov_b32_e32 v2, 0x41800000 8323; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_1) 8324; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 8325; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 8326; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3] 8327; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 8328; GFX11-GISEL-NEXT: v_add_f32_e32 v3, 0.5, v1 8329; GFX11-GISEL-NEXT: v_med3_f32 v2, v3, 0x41000000, v2 8330; GFX11-GISEL-NEXT: v_add_f32_e32 v3, 0x41800000, v1 8331; GFX11-GISEL-NEXT: v_add_f32_e32 v1, 0x41000000, v1 8332; GFX11-GISEL-NEXT: global_store_b32 v0, v2, s[0:1] 8333; GFX11-GISEL-NEXT: global_store_b32 v[0:1], v3, off dlc 8334; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 8335; GFX11-GISEL-NEXT: global_store_b32 v[0:1], v1, off dlc 8336; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 8337; GFX11-GISEL-NEXT: s_endpgm 8338 %tid = call i32 @llvm.amdgcn.workitem.id.x() 8339 %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid 8340 %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid 8341 %a = load float, ptr addrspace(1) %gep0 8342 %add = fadd nnan float %a, 0.5 8343 %max = call float @llvm.maxnum.f32(float %add, float 8.0) 8344 %med = call float @llvm.minnum.f32(float %max, float 16.0) 8345 8346 store float %med, ptr addrspace(1) %out.gep 8347 8348 %extra.use0 = fadd float %a, 16.0 8349 store volatile float %extra.use0, ptr addrspace(1) undef 8350 %extra.use1 = fadd float %a, 8.0 8351 store volatile float %extra.use1, ptr addrspace(1) undef 8352 ret void 8353} 8354 8355declare i32 @llvm.amdgcn.workitem.id.x() #0 8356declare float @llvm.fabs.f32(float) #0 8357declare float @llvm.minnum.f32(float, float) #0 8358declare float @llvm.maxnum.f32(float, float) #0 8359declare double @llvm.minnum.f64(double, double) #0 8360declare double @llvm.maxnum.f64(double, double) #0 8361declare half @llvm.fabs.f16(half) #0 8362declare half @llvm.minnum.f16(half, half) #0 8363declare half @llvm.maxnum.f16(half, half) #0 8364 8365attributes #0 = { nounwind readnone } 8366attributes #1 = { nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="false" } 8367attributes #2 = { nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="true" } 8368;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 8369; SI: {{.*}} 8370