1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s -check-prefix=SI 3; RUN: llc < %s -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs | FileCheck %s -check-prefix=GFX7 4; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s -check-prefix=GFX10 5; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs | FileCheck %s -check-prefix=GFX1030 6; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck %s -check-prefix=GFX1100 7 8; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s -check-prefix=G_SI 9; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs | FileCheck %s -check-prefix=G_GFX7 10; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s -check-prefix=G_GFX10 11; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs | FileCheck %s -check-prefix=G_GFX1030 12; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck %s -check-prefix=G_GFX1100 13 14declare float @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f32(float, ptr addrspace(8), i32, i32, i32 immarg) 15declare float @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f32(float, ptr addrspace(8), i32, i32, i32 immarg) 16 17 18define amdgpu_kernel void @raw_ptr_buffer_atomic_min_noret_f32(ptr addrspace(8) inreg %rsrc, float %data, i32 %vindex) { 19; SI-LABEL: raw_ptr_buffer_atomic_min_noret_f32: 20; SI: ; %bb.0: ; %main_body 21; SI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0xd 22; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 23; SI-NEXT: s_waitcnt lgkmcnt(0) 24; SI-NEXT: v_mov_b32_e32 v0, s6 25; SI-NEXT: v_mov_b32_e32 v1, s7 26; SI-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 0 offen 27; SI-NEXT: s_endpgm 28; 29; GFX7-LABEL: raw_ptr_buffer_atomic_min_noret_f32: 30; GFX7: ; %bb.0: ; %main_body 31; GFX7-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0xd 32; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 33; GFX7-NEXT: s_waitcnt lgkmcnt(0) 34; GFX7-NEXT: v_mov_b32_e32 v0, s6 35; GFX7-NEXT: v_mov_b32_e32 v1, s7 36; GFX7-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 0 offen 37; GFX7-NEXT: s_endpgm 38; 39; GFX10-LABEL: raw_ptr_buffer_atomic_min_noret_f32: 40; GFX10: ; %bb.0: ; %main_body 41; GFX10-NEXT: s_clause 0x1 42; GFX10-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 43; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 44; GFX10-NEXT: s_waitcnt lgkmcnt(0) 45; GFX10-NEXT: v_mov_b32_e32 v0, s6 46; GFX10-NEXT: v_mov_b32_e32 v1, s7 47; GFX10-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 0 offen 48; GFX10-NEXT: s_endpgm 49; 50; GFX1030-LABEL: raw_ptr_buffer_atomic_min_noret_f32: 51; GFX1030: ; %bb.0: ; %main_body 52; GFX1030-NEXT: s_clause 0x1 53; GFX1030-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 54; GFX1030-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 55; GFX1030-NEXT: s_waitcnt lgkmcnt(0) 56; GFX1030-NEXT: v_mov_b32_e32 v0, s6 57; GFX1030-NEXT: v_mov_b32_e32 v1, s7 58; GFX1030-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 0 offen 59; GFX1030-NEXT: s_endpgm 60; 61; GFX1100-LABEL: raw_ptr_buffer_atomic_min_noret_f32: 62; GFX1100: ; %bb.0: ; %main_body 63; GFX1100-NEXT: s_clause 0x1 64; GFX1100-NEXT: s_load_b64 s[6:7], s[4:5], 0x34 65; GFX1100-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 66; GFX1100-NEXT: s_waitcnt lgkmcnt(0) 67; GFX1100-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7 68; GFX1100-NEXT: buffer_atomic_min_f32 v0, v1, s[0:3], 0 offen 69; GFX1100-NEXT: s_endpgm 70; 71; G_SI-LABEL: raw_ptr_buffer_atomic_min_noret_f32: 72; G_SI: ; %bb.0: ; %main_body 73; G_SI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0xd 74; G_SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 75; G_SI-NEXT: s_waitcnt lgkmcnt(0) 76; G_SI-NEXT: v_mov_b32_e32 v0, s6 77; G_SI-NEXT: v_mov_b32_e32 v1, s7 78; G_SI-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 0 offen 79; G_SI-NEXT: s_endpgm 80; 81; G_GFX7-LABEL: raw_ptr_buffer_atomic_min_noret_f32: 82; G_GFX7: ; %bb.0: ; %main_body 83; G_GFX7-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0xd 84; G_GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 85; G_GFX7-NEXT: s_waitcnt lgkmcnt(0) 86; G_GFX7-NEXT: v_mov_b32_e32 v0, s6 87; G_GFX7-NEXT: v_mov_b32_e32 v1, s7 88; G_GFX7-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 0 offen 89; G_GFX7-NEXT: s_endpgm 90; 91; G_GFX10-LABEL: raw_ptr_buffer_atomic_min_noret_f32: 92; G_GFX10: ; %bb.0: ; %main_body 93; G_GFX10-NEXT: s_clause 0x1 94; G_GFX10-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 95; G_GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 96; G_GFX10-NEXT: s_waitcnt lgkmcnt(0) 97; G_GFX10-NEXT: v_mov_b32_e32 v0, s6 98; G_GFX10-NEXT: v_mov_b32_e32 v1, s7 99; G_GFX10-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 0 offen 100; G_GFX10-NEXT: s_endpgm 101; 102; G_GFX1030-LABEL: raw_ptr_buffer_atomic_min_noret_f32: 103; G_GFX1030: ; %bb.0: ; %main_body 104; G_GFX1030-NEXT: s_clause 0x1 105; G_GFX1030-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 106; G_GFX1030-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 107; G_GFX1030-NEXT: s_waitcnt lgkmcnt(0) 108; G_GFX1030-NEXT: v_mov_b32_e32 v0, s6 109; G_GFX1030-NEXT: v_mov_b32_e32 v1, s7 110; G_GFX1030-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 0 offen 111; G_GFX1030-NEXT: s_endpgm 112; 113; G_GFX1100-LABEL: raw_ptr_buffer_atomic_min_noret_f32: 114; G_GFX1100: ; %bb.0: ; %main_body 115; G_GFX1100-NEXT: s_clause 0x1 116; G_GFX1100-NEXT: s_load_b64 s[6:7], s[4:5], 0x34 117; G_GFX1100-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 118; G_GFX1100-NEXT: s_waitcnt lgkmcnt(0) 119; G_GFX1100-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7 120; G_GFX1100-NEXT: buffer_atomic_min_f32 v0, v1, s[0:3], 0 offen 121; G_GFX1100-NEXT: s_endpgm 122main_body: 123 %ret = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f32(float %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0) 124 ret void 125} 126 127define amdgpu_ps void @raw_ptr_buffer_atomic_min_rtn_f32(ptr addrspace(8) inreg %rsrc, float %data, i32 %vindex) { 128; SI-LABEL: raw_ptr_buffer_atomic_min_rtn_f32: 129; SI: ; %bb.0: ; %main_body 130; SI-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 0 offen glc 131; SI-NEXT: s_mov_b32 s3, 0xf000 132; SI-NEXT: s_mov_b32 s2, -1 133; SI-NEXT: s_waitcnt vmcnt(0) 134; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 135; SI-NEXT: s_endpgm 136; 137; GFX7-LABEL: raw_ptr_buffer_atomic_min_rtn_f32: 138; GFX7: ; %bb.0: ; %main_body 139; GFX7-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 0 offen glc 140; GFX7-NEXT: s_mov_b32 s3, 0xf000 141; GFX7-NEXT: s_mov_b32 s2, -1 142; GFX7-NEXT: s_waitcnt vmcnt(0) 143; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 144; GFX7-NEXT: s_endpgm 145; 146; GFX10-LABEL: raw_ptr_buffer_atomic_min_rtn_f32: 147; GFX10: ; %bb.0: ; %main_body 148; GFX10-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 0 offen glc 149; GFX10-NEXT: s_waitcnt vmcnt(0) 150; GFX10-NEXT: global_store_dword v[0:1], v0, off 151; GFX10-NEXT: s_endpgm 152; 153; GFX1030-LABEL: raw_ptr_buffer_atomic_min_rtn_f32: 154; GFX1030: ; %bb.0: ; %main_body 155; GFX1030-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 0 offen glc 156; GFX1030-NEXT: s_waitcnt vmcnt(0) 157; GFX1030-NEXT: global_store_dword v[0:1], v0, off 158; GFX1030-NEXT: s_endpgm 159; 160; GFX1100-LABEL: raw_ptr_buffer_atomic_min_rtn_f32: 161; GFX1100: ; %bb.0: ; %main_body 162; GFX1100-NEXT: buffer_atomic_min_f32 v0, v1, s[0:3], 0 offen glc 163; GFX1100-NEXT: s_waitcnt vmcnt(0) 164; GFX1100-NEXT: global_store_b32 v[0:1], v0, off 165; GFX1100-NEXT: s_endpgm 166; 167; G_SI-LABEL: raw_ptr_buffer_atomic_min_rtn_f32: 168; G_SI: ; %bb.0: ; %main_body 169; G_SI-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 0 offen glc 170; G_SI-NEXT: s_mov_b32 s2, -1 171; G_SI-NEXT: s_mov_b32 s3, 0xf000 172; G_SI-NEXT: s_waitcnt vmcnt(0) 173; G_SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 174; G_SI-NEXT: s_endpgm 175; 176; G_GFX7-LABEL: raw_ptr_buffer_atomic_min_rtn_f32: 177; G_GFX7: ; %bb.0: ; %main_body 178; G_GFX7-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 0 offen glc 179; G_GFX7-NEXT: s_mov_b32 s2, -1 180; G_GFX7-NEXT: s_mov_b32 s3, 0xf000 181; G_GFX7-NEXT: s_waitcnt vmcnt(0) 182; G_GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 183; G_GFX7-NEXT: s_endpgm 184; 185; G_GFX10-LABEL: raw_ptr_buffer_atomic_min_rtn_f32: 186; G_GFX10: ; %bb.0: ; %main_body 187; G_GFX10-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 0 offen glc 188; G_GFX10-NEXT: s_waitcnt vmcnt(0) 189; G_GFX10-NEXT: global_store_dword v[0:1], v0, off 190; G_GFX10-NEXT: s_endpgm 191; 192; G_GFX1030-LABEL: raw_ptr_buffer_atomic_min_rtn_f32: 193; G_GFX1030: ; %bb.0: ; %main_body 194; G_GFX1030-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 0 offen glc 195; G_GFX1030-NEXT: s_waitcnt vmcnt(0) 196; G_GFX1030-NEXT: global_store_dword v[0:1], v0, off 197; G_GFX1030-NEXT: s_endpgm 198; 199; G_GFX1100-LABEL: raw_ptr_buffer_atomic_min_rtn_f32: 200; G_GFX1100: ; %bb.0: ; %main_body 201; G_GFX1100-NEXT: buffer_atomic_min_f32 v0, v1, s[0:3], 0 offen glc 202; G_GFX1100-NEXT: s_waitcnt vmcnt(0) 203; G_GFX1100-NEXT: global_store_b32 v[0:1], v0, off 204; G_GFX1100-NEXT: s_endpgm 205main_body: 206 %ret = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f32(float %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0) 207 store float %ret, ptr addrspace(1) undef 208 ret void 209} 210 211define amdgpu_kernel void @raw_ptr_buffer_atomic_min_rtn_f32_off4_slc(ptr addrspace(8) inreg %rsrc, float %data, i32 %vindex, ptr addrspace(3) %out) { 212; SI-LABEL: raw_ptr_buffer_atomic_min_rtn_f32_off4_slc: 213; SI: ; %bb.0: ; %main_body 214; SI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0xd 215; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 216; SI-NEXT: s_mov_b32 m0, -1 217; SI-NEXT: s_waitcnt lgkmcnt(0) 218; SI-NEXT: v_mov_b32_e32 v0, s6 219; SI-NEXT: v_mov_b32_e32 v1, s7 220; SI-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 4 offen glc slc 221; SI-NEXT: s_load_dword s0, s[4:5], 0xf 222; SI-NEXT: s_waitcnt lgkmcnt(0) 223; SI-NEXT: v_mov_b32_e32 v1, s0 224; SI-NEXT: s_waitcnt vmcnt(0) 225; SI-NEXT: ds_write_b32 v1, v0 226; SI-NEXT: s_endpgm 227; 228; GFX7-LABEL: raw_ptr_buffer_atomic_min_rtn_f32_off4_slc: 229; GFX7: ; %bb.0: ; %main_body 230; GFX7-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 231; GFX7-NEXT: s_mov_b32 m0, -1 232; GFX7-NEXT: s_waitcnt lgkmcnt(0) 233; GFX7-NEXT: v_mov_b32_e32 v0, s4 234; GFX7-NEXT: v_mov_b32_e32 v1, s5 235; GFX7-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 4 offen glc slc 236; GFX7-NEXT: v_mov_b32_e32 v1, s6 237; GFX7-NEXT: s_waitcnt vmcnt(0) 238; GFX7-NEXT: ds_write_b32 v1, v0 239; GFX7-NEXT: s_endpgm 240; 241; GFX10-LABEL: raw_ptr_buffer_atomic_min_rtn_f32_off4_slc: 242; GFX10: ; %bb.0: ; %main_body 243; GFX10-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 244; GFX10-NEXT: s_waitcnt lgkmcnt(0) 245; GFX10-NEXT: v_mov_b32_e32 v0, s12 246; GFX10-NEXT: v_mov_b32_e32 v1, s13 247; GFX10-NEXT: buffer_atomic_fmin v0, v1, s[8:11], 4 offen glc slc 248; GFX10-NEXT: v_mov_b32_e32 v1, s14 249; GFX10-NEXT: s_waitcnt vmcnt(0) 250; GFX10-NEXT: ds_write_b32 v1, v0 251; GFX10-NEXT: s_endpgm 252; 253; GFX1030-LABEL: raw_ptr_buffer_atomic_min_rtn_f32_off4_slc: 254; GFX1030: ; %bb.0: ; %main_body 255; GFX1030-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 256; GFX1030-NEXT: s_waitcnt lgkmcnt(0) 257; GFX1030-NEXT: v_mov_b32_e32 v0, s4 258; GFX1030-NEXT: v_mov_b32_e32 v1, s5 259; GFX1030-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 4 offen glc slc 260; GFX1030-NEXT: v_mov_b32_e32 v1, s6 261; GFX1030-NEXT: s_waitcnt vmcnt(0) 262; GFX1030-NEXT: ds_write_b32 v1, v0 263; GFX1030-NEXT: s_endpgm 264; 265; GFX1100-LABEL: raw_ptr_buffer_atomic_min_rtn_f32_off4_slc: 266; GFX1100: ; %bb.0: ; %main_body 267; GFX1100-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 268; GFX1100-NEXT: s_waitcnt lgkmcnt(0) 269; GFX1100-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5 270; GFX1100-NEXT: buffer_atomic_min_f32 v0, v1, s[0:3], 4 offen glc slc 271; GFX1100-NEXT: v_mov_b32_e32 v1, s6 272; GFX1100-NEXT: s_waitcnt vmcnt(0) 273; GFX1100-NEXT: ds_store_b32 v1, v0 274; GFX1100-NEXT: s_endpgm 275; 276; G_SI-LABEL: raw_ptr_buffer_atomic_min_rtn_f32_off4_slc: 277; G_SI: ; %bb.0: ; %main_body 278; G_SI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0xd 279; G_SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 280; G_SI-NEXT: s_mov_b32 m0, -1 281; G_SI-NEXT: s_waitcnt lgkmcnt(0) 282; G_SI-NEXT: v_mov_b32_e32 v0, s6 283; G_SI-NEXT: v_mov_b32_e32 v1, s7 284; G_SI-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 4 offen glc slc 285; G_SI-NEXT: s_load_dword s0, s[4:5], 0xf 286; G_SI-NEXT: s_waitcnt lgkmcnt(0) 287; G_SI-NEXT: v_mov_b32_e32 v1, s0 288; G_SI-NEXT: s_waitcnt vmcnt(0) 289; G_SI-NEXT: ds_write_b32 v1, v0 290; G_SI-NEXT: s_endpgm 291; 292; G_GFX7-LABEL: raw_ptr_buffer_atomic_min_rtn_f32_off4_slc: 293; G_GFX7: ; %bb.0: ; %main_body 294; G_GFX7-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0xd 295; G_GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 296; G_GFX7-NEXT: s_mov_b32 m0, -1 297; G_GFX7-NEXT: s_waitcnt lgkmcnt(0) 298; G_GFX7-NEXT: v_mov_b32_e32 v0, s6 299; G_GFX7-NEXT: v_mov_b32_e32 v1, s7 300; G_GFX7-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 4 offen glc slc 301; G_GFX7-NEXT: s_load_dword s0, s[4:5], 0xf 302; G_GFX7-NEXT: s_waitcnt lgkmcnt(0) 303; G_GFX7-NEXT: v_mov_b32_e32 v1, s0 304; G_GFX7-NEXT: s_waitcnt vmcnt(0) 305; G_GFX7-NEXT: ds_write_b32 v1, v0 306; G_GFX7-NEXT: s_endpgm 307; 308; G_GFX10-LABEL: raw_ptr_buffer_atomic_min_rtn_f32_off4_slc: 309; G_GFX10: ; %bb.0: ; %main_body 310; G_GFX10-NEXT: s_clause 0x1 311; G_GFX10-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 312; G_GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 313; G_GFX10-NEXT: s_waitcnt lgkmcnt(0) 314; G_GFX10-NEXT: v_mov_b32_e32 v0, s6 315; G_GFX10-NEXT: v_mov_b32_e32 v1, s7 316; G_GFX10-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 4 offen glc slc 317; G_GFX10-NEXT: s_waitcnt_depctr 0xffe3 318; G_GFX10-NEXT: s_load_dword s0, s[4:5], 0x3c 319; G_GFX10-NEXT: s_waitcnt lgkmcnt(0) 320; G_GFX10-NEXT: v_mov_b32_e32 v1, s0 321; G_GFX10-NEXT: s_waitcnt vmcnt(0) 322; G_GFX10-NEXT: ds_write_b32 v1, v0 323; G_GFX10-NEXT: s_endpgm 324; 325; G_GFX1030-LABEL: raw_ptr_buffer_atomic_min_rtn_f32_off4_slc: 326; G_GFX1030: ; %bb.0: ; %main_body 327; G_GFX1030-NEXT: s_clause 0x1 328; G_GFX1030-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 329; G_GFX1030-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 330; G_GFX1030-NEXT: s_waitcnt lgkmcnt(0) 331; G_GFX1030-NEXT: v_mov_b32_e32 v0, s6 332; G_GFX1030-NEXT: v_mov_b32_e32 v1, s7 333; G_GFX1030-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 4 offen glc slc 334; G_GFX1030-NEXT: s_load_dword s0, s[4:5], 0x3c 335; G_GFX1030-NEXT: s_waitcnt lgkmcnt(0) 336; G_GFX1030-NEXT: v_mov_b32_e32 v1, s0 337; G_GFX1030-NEXT: s_waitcnt vmcnt(0) 338; G_GFX1030-NEXT: ds_write_b32 v1, v0 339; G_GFX1030-NEXT: s_endpgm 340; 341; G_GFX1100-LABEL: raw_ptr_buffer_atomic_min_rtn_f32_off4_slc: 342; G_GFX1100: ; %bb.0: ; %main_body 343; G_GFX1100-NEXT: s_clause 0x1 344; G_GFX1100-NEXT: s_load_b64 s[6:7], s[4:5], 0x34 345; G_GFX1100-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 346; G_GFX1100-NEXT: s_waitcnt lgkmcnt(0) 347; G_GFX1100-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7 348; G_GFX1100-NEXT: buffer_atomic_min_f32 v0, v1, s[0:3], 4 offen glc slc 349; G_GFX1100-NEXT: s_load_b32 s0, s[4:5], 0x3c 350; G_GFX1100-NEXT: s_waitcnt lgkmcnt(0) 351; G_GFX1100-NEXT: v_mov_b32_e32 v1, s0 352; G_GFX1100-NEXT: s_waitcnt vmcnt(0) 353; G_GFX1100-NEXT: ds_store_b32 v1, v0 354; G_GFX1100-NEXT: s_endpgm 355; GFX1010-LABEL: raw_ptr_buffer_atomic_min_rtn_f32_off4_slc: 356main_body: 357 %ret = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f32(float %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 2) 358 store float %ret, ptr addrspace(3) %out, align 8 359 ret void 360} 361 362define amdgpu_kernel void @raw_ptr_buffer_atomic_max_noret_f32(ptr addrspace(8) inreg %rsrc, float %data, i32 %vindex) { 363; SI-LABEL: raw_ptr_buffer_atomic_max_noret_f32: 364; SI: ; %bb.0: ; %main_body 365; SI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0xd 366; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 367; SI-NEXT: s_waitcnt lgkmcnt(0) 368; SI-NEXT: v_mov_b32_e32 v0, s6 369; SI-NEXT: v_mov_b32_e32 v1, s7 370; SI-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 0 offen 371; SI-NEXT: s_endpgm 372; 373; GFX7-LABEL: raw_ptr_buffer_atomic_max_noret_f32: 374; GFX7: ; %bb.0: ; %main_body 375; GFX7-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0xd 376; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 377; GFX7-NEXT: s_waitcnt lgkmcnt(0) 378; GFX7-NEXT: v_mov_b32_e32 v0, s6 379; GFX7-NEXT: v_mov_b32_e32 v1, s7 380; GFX7-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 0 offen 381; GFX7-NEXT: s_endpgm 382; 383; GFX10-LABEL: raw_ptr_buffer_atomic_max_noret_f32: 384; GFX10: ; %bb.0: ; %main_body 385; GFX10-NEXT: s_clause 0x1 386; GFX10-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 387; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 388; GFX10-NEXT: s_waitcnt lgkmcnt(0) 389; GFX10-NEXT: v_mov_b32_e32 v0, s6 390; GFX10-NEXT: v_mov_b32_e32 v1, s7 391; GFX10-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 0 offen 392; GFX10-NEXT: s_endpgm 393; 394; GFX1030-LABEL: raw_ptr_buffer_atomic_max_noret_f32: 395; GFX1030: ; %bb.0: ; %main_body 396; GFX1030-NEXT: s_clause 0x1 397; GFX1030-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 398; GFX1030-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 399; GFX1030-NEXT: s_waitcnt lgkmcnt(0) 400; GFX1030-NEXT: v_mov_b32_e32 v0, s6 401; GFX1030-NEXT: v_mov_b32_e32 v1, s7 402; GFX1030-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 0 offen 403; GFX1030-NEXT: s_endpgm 404; 405; GFX1100-LABEL: raw_ptr_buffer_atomic_max_noret_f32: 406; GFX1100: ; %bb.0: ; %main_body 407; GFX1100-NEXT: s_clause 0x1 408; GFX1100-NEXT: s_load_b64 s[6:7], s[4:5], 0x34 409; GFX1100-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 410; GFX1100-NEXT: s_waitcnt lgkmcnt(0) 411; GFX1100-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7 412; GFX1100-NEXT: buffer_atomic_max_f32 v0, v1, s[0:3], 0 offen 413; GFX1100-NEXT: s_endpgm 414; 415; G_SI-LABEL: raw_ptr_buffer_atomic_max_noret_f32: 416; G_SI: ; %bb.0: ; %main_body 417; G_SI-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0xd 418; G_SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 419; G_SI-NEXT: s_waitcnt lgkmcnt(0) 420; G_SI-NEXT: v_mov_b32_e32 v0, s6 421; G_SI-NEXT: v_mov_b32_e32 v1, s7 422; G_SI-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 0 offen 423; G_SI-NEXT: s_endpgm 424; 425; G_GFX7-LABEL: raw_ptr_buffer_atomic_max_noret_f32: 426; G_GFX7: ; %bb.0: ; %main_body 427; G_GFX7-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0xd 428; G_GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 429; G_GFX7-NEXT: s_waitcnt lgkmcnt(0) 430; G_GFX7-NEXT: v_mov_b32_e32 v0, s6 431; G_GFX7-NEXT: v_mov_b32_e32 v1, s7 432; G_GFX7-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 0 offen 433; G_GFX7-NEXT: s_endpgm 434; 435; G_GFX10-LABEL: raw_ptr_buffer_atomic_max_noret_f32: 436; G_GFX10: ; %bb.0: ; %main_body 437; G_GFX10-NEXT: s_clause 0x1 438; G_GFX10-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 439; G_GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 440; G_GFX10-NEXT: s_waitcnt lgkmcnt(0) 441; G_GFX10-NEXT: v_mov_b32_e32 v0, s6 442; G_GFX10-NEXT: v_mov_b32_e32 v1, s7 443; G_GFX10-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 0 offen 444; G_GFX10-NEXT: s_endpgm 445; 446; G_GFX1030-LABEL: raw_ptr_buffer_atomic_max_noret_f32: 447; G_GFX1030: ; %bb.0: ; %main_body 448; G_GFX1030-NEXT: s_clause 0x1 449; G_GFX1030-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 450; G_GFX1030-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 451; G_GFX1030-NEXT: s_waitcnt lgkmcnt(0) 452; G_GFX1030-NEXT: v_mov_b32_e32 v0, s6 453; G_GFX1030-NEXT: v_mov_b32_e32 v1, s7 454; G_GFX1030-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 0 offen 455; G_GFX1030-NEXT: s_endpgm 456; 457; G_GFX1100-LABEL: raw_ptr_buffer_atomic_max_noret_f32: 458; G_GFX1100: ; %bb.0: ; %main_body 459; G_GFX1100-NEXT: s_clause 0x1 460; G_GFX1100-NEXT: s_load_b64 s[6:7], s[4:5], 0x34 461; G_GFX1100-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 462; G_GFX1100-NEXT: s_waitcnt lgkmcnt(0) 463; G_GFX1100-NEXT: v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7 464; G_GFX1100-NEXT: buffer_atomic_max_f32 v0, v1, s[0:3], 0 offen 465; G_GFX1100-NEXT: s_endpgm 466main_body: 467 %ret = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f32(float %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0) 468 ret void 469} 470 471define amdgpu_ps void @raw_ptr_buffer_atomic_max_rtn_f32(ptr addrspace(8) inreg %rsrc, float %data, i32 %vindex) { 472; SI-LABEL: raw_ptr_buffer_atomic_max_rtn_f32: 473; SI: ; %bb.0: ; %main_body 474; SI-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 0 offen glc 475; SI-NEXT: s_mov_b32 s3, 0xf000 476; SI-NEXT: s_mov_b32 s2, -1 477; SI-NEXT: s_waitcnt vmcnt(0) 478; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 479; SI-NEXT: s_endpgm 480; 481; GFX7-LABEL: raw_ptr_buffer_atomic_max_rtn_f32: 482; GFX7: ; %bb.0: ; %main_body 483; GFX7-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 0 offen glc 484; GFX7-NEXT: s_mov_b32 s3, 0xf000 485; GFX7-NEXT: s_mov_b32 s2, -1 486; GFX7-NEXT: s_waitcnt vmcnt(0) 487; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 488; GFX7-NEXT: s_endpgm 489; 490; GFX10-LABEL: raw_ptr_buffer_atomic_max_rtn_f32: 491; GFX10: ; %bb.0: ; %main_body 492; GFX10-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 0 offen glc 493; GFX10-NEXT: s_waitcnt vmcnt(0) 494; GFX10-NEXT: global_store_dword v[0:1], v0, off 495; GFX10-NEXT: s_endpgm 496; 497; GFX1030-LABEL: raw_ptr_buffer_atomic_max_rtn_f32: 498; GFX1030: ; %bb.0: ; %main_body 499; GFX1030-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 0 offen glc 500; GFX1030-NEXT: s_waitcnt vmcnt(0) 501; GFX1030-NEXT: global_store_dword v[0:1], v0, off 502; GFX1030-NEXT: s_endpgm 503; 504; GFX1100-LABEL: raw_ptr_buffer_atomic_max_rtn_f32: 505; GFX1100: ; %bb.0: ; %main_body 506; GFX1100-NEXT: buffer_atomic_max_f32 v0, v1, s[0:3], 0 offen glc 507; GFX1100-NEXT: s_waitcnt vmcnt(0) 508; GFX1100-NEXT: global_store_b32 v[0:1], v0, off 509; GFX1100-NEXT: s_endpgm 510; 511; G_SI-LABEL: raw_ptr_buffer_atomic_max_rtn_f32: 512; G_SI: ; %bb.0: ; %main_body 513; G_SI-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 0 offen glc 514; G_SI-NEXT: s_mov_b32 s2, -1 515; G_SI-NEXT: s_mov_b32 s3, 0xf000 516; G_SI-NEXT: s_waitcnt vmcnt(0) 517; G_SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 518; G_SI-NEXT: s_endpgm 519; 520; G_GFX7-LABEL: raw_ptr_buffer_atomic_max_rtn_f32: 521; G_GFX7: ; %bb.0: ; %main_body 522; G_GFX7-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 0 offen glc 523; G_GFX7-NEXT: s_mov_b32 s2, -1 524; G_GFX7-NEXT: s_mov_b32 s3, 0xf000 525; G_GFX7-NEXT: s_waitcnt vmcnt(0) 526; G_GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 527; G_GFX7-NEXT: s_endpgm 528; 529; G_GFX10-LABEL: raw_ptr_buffer_atomic_max_rtn_f32: 530; G_GFX10: ; %bb.0: ; %main_body 531; G_GFX10-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 0 offen glc 532; G_GFX10-NEXT: s_waitcnt vmcnt(0) 533; G_GFX10-NEXT: global_store_dword v[0:1], v0, off 534; G_GFX10-NEXT: s_endpgm 535; 536; G_GFX1030-LABEL: raw_ptr_buffer_atomic_max_rtn_f32: 537; G_GFX1030: ; %bb.0: ; %main_body 538; G_GFX1030-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 0 offen glc 539; G_GFX1030-NEXT: s_waitcnt vmcnt(0) 540; G_GFX1030-NEXT: global_store_dword v[0:1], v0, off 541; G_GFX1030-NEXT: s_endpgm 542; 543; G_GFX1100-LABEL: raw_ptr_buffer_atomic_max_rtn_f32: 544; G_GFX1100: ; %bb.0: ; %main_body 545; G_GFX1100-NEXT: buffer_atomic_max_f32 v0, v1, s[0:3], 0 offen glc 546; G_GFX1100-NEXT: s_waitcnt vmcnt(0) 547; G_GFX1100-NEXT: global_store_b32 v[0:1], v0, off 548; G_GFX1100-NEXT: s_endpgm 549main_body: 550 %ret = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f32(float %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0) 551 store float %ret, ptr addrspace(1) undef 552 ret void 553} 554 555define amdgpu_kernel void @raw_ptr_buffer_atomic_max_rtn_f32_off4_slc(ptr addrspace(8) inreg %rsrc, float %data, i32 %vindex, ptr addrspace(1) %out) { 556; SI-LABEL: raw_ptr_buffer_atomic_max_rtn_f32_off4_slc: 557; SI: ; %bb.0: ; %main_body 558; SI-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 559; SI-NEXT: s_waitcnt lgkmcnt(0) 560; SI-NEXT: v_mov_b32_e32 v0, s4 561; SI-NEXT: v_mov_b32_e32 v1, s5 562; SI-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 4 offen glc slc 563; SI-NEXT: s_mov_b32 s3, 0xf000 564; SI-NEXT: s_mov_b32 s2, -1 565; SI-NEXT: s_mov_b32 s0, s6 566; SI-NEXT: s_mov_b32 s1, s7 567; SI-NEXT: s_waitcnt vmcnt(0) 568; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 569; SI-NEXT: s_endpgm 570; 571; GFX7-LABEL: raw_ptr_buffer_atomic_max_rtn_f32_off4_slc: 572; GFX7: ; %bb.0: ; %main_body 573; GFX7-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 574; GFX7-NEXT: s_waitcnt lgkmcnt(0) 575; GFX7-NEXT: v_mov_b32_e32 v0, s4 576; GFX7-NEXT: v_mov_b32_e32 v1, s5 577; GFX7-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 4 offen glc slc 578; GFX7-NEXT: s_mov_b32 s3, 0xf000 579; GFX7-NEXT: s_mov_b32 s2, -1 580; GFX7-NEXT: s_mov_b32 s0, s6 581; GFX7-NEXT: s_mov_b32 s1, s7 582; GFX7-NEXT: s_waitcnt vmcnt(0) 583; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 584; GFX7-NEXT: s_endpgm 585; 586; GFX10-LABEL: raw_ptr_buffer_atomic_max_rtn_f32_off4_slc: 587; GFX10: ; %bb.0: ; %main_body 588; GFX10-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 589; GFX10-NEXT: s_waitcnt lgkmcnt(0) 590; GFX10-NEXT: v_mov_b32_e32 v0, s12 591; GFX10-NEXT: v_mov_b32_e32 v1, s13 592; GFX10-NEXT: buffer_atomic_fmax v0, v1, s[8:11], 4 offen glc slc 593; GFX10-NEXT: v_mov_b32_e32 v1, 0 594; GFX10-NEXT: s_waitcnt vmcnt(0) 595; GFX10-NEXT: global_store_dword v1, v0, s[14:15] 596; GFX10-NEXT: s_endpgm 597; 598; GFX1030-LABEL: raw_ptr_buffer_atomic_max_rtn_f32_off4_slc: 599; GFX1030: ; %bb.0: ; %main_body 600; GFX1030-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 601; GFX1030-NEXT: s_waitcnt lgkmcnt(0) 602; GFX1030-NEXT: v_mov_b32_e32 v0, s4 603; GFX1030-NEXT: v_mov_b32_e32 v1, s5 604; GFX1030-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 4 offen glc slc 605; GFX1030-NEXT: v_mov_b32_e32 v1, 0 606; GFX1030-NEXT: s_waitcnt vmcnt(0) 607; GFX1030-NEXT: global_store_dword v1, v0, s[6:7] 608; GFX1030-NEXT: s_endpgm 609; 610; GFX1100-LABEL: raw_ptr_buffer_atomic_max_rtn_f32_off4_slc: 611; GFX1100: ; %bb.0: ; %main_body 612; GFX1100-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 613; GFX1100-NEXT: s_waitcnt lgkmcnt(0) 614; GFX1100-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5 615; GFX1100-NEXT: buffer_atomic_max_f32 v0, v1, s[0:3], 4 offen glc slc 616; GFX1100-NEXT: v_mov_b32_e32 v1, 0 617; GFX1100-NEXT: s_waitcnt vmcnt(0) 618; GFX1100-NEXT: global_store_b32 v1, v0, s[6:7] 619; GFX1100-NEXT: s_endpgm 620; 621; G_SI-LABEL: raw_ptr_buffer_atomic_max_rtn_f32_off4_slc: 622; G_SI: ; %bb.0: ; %main_body 623; G_SI-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 624; G_SI-NEXT: s_waitcnt lgkmcnt(0) 625; G_SI-NEXT: v_mov_b32_e32 v0, s4 626; G_SI-NEXT: v_mov_b32_e32 v1, s5 627; G_SI-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 4 offen glc slc 628; G_SI-NEXT: s_mov_b32 s2, -1 629; G_SI-NEXT: s_mov_b32 s3, 0xf000 630; G_SI-NEXT: s_mov_b64 s[0:1], s[6:7] 631; G_SI-NEXT: s_waitcnt vmcnt(0) 632; G_SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 633; G_SI-NEXT: s_endpgm 634; 635; G_GFX7-LABEL: raw_ptr_buffer_atomic_max_rtn_f32_off4_slc: 636; G_GFX7: ; %bb.0: ; %main_body 637; G_GFX7-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 638; G_GFX7-NEXT: s_waitcnt lgkmcnt(0) 639; G_GFX7-NEXT: v_mov_b32_e32 v0, s4 640; G_GFX7-NEXT: v_mov_b32_e32 v1, s5 641; G_GFX7-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 4 offen glc slc 642; G_GFX7-NEXT: s_mov_b32 s2, -1 643; G_GFX7-NEXT: s_mov_b32 s3, 0xf000 644; G_GFX7-NEXT: s_mov_b64 s[0:1], s[6:7] 645; G_GFX7-NEXT: s_waitcnt vmcnt(0) 646; G_GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 647; G_GFX7-NEXT: s_endpgm 648; 649; G_GFX10-LABEL: raw_ptr_buffer_atomic_max_rtn_f32_off4_slc: 650; G_GFX10: ; %bb.0: ; %main_body 651; G_GFX10-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 652; G_GFX10-NEXT: s_waitcnt lgkmcnt(0) 653; G_GFX10-NEXT: v_mov_b32_e32 v0, s12 654; G_GFX10-NEXT: v_mov_b32_e32 v1, s13 655; G_GFX10-NEXT: buffer_atomic_fmax v0, v1, s[8:11], 4 offen glc slc 656; G_GFX10-NEXT: v_mov_b32_e32 v1, 0 657; G_GFX10-NEXT: s_waitcnt vmcnt(0) 658; G_GFX10-NEXT: global_store_dword v1, v0, s[14:15] 659; G_GFX10-NEXT: s_endpgm 660; 661; G_GFX1030-LABEL: raw_ptr_buffer_atomic_max_rtn_f32_off4_slc: 662; G_GFX1030: ; %bb.0: ; %main_body 663; G_GFX1030-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 664; G_GFX1030-NEXT: s_waitcnt lgkmcnt(0) 665; G_GFX1030-NEXT: v_mov_b32_e32 v0, s4 666; G_GFX1030-NEXT: v_mov_b32_e32 v1, s5 667; G_GFX1030-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 4 offen glc slc 668; G_GFX1030-NEXT: v_mov_b32_e32 v1, 0 669; G_GFX1030-NEXT: s_waitcnt vmcnt(0) 670; G_GFX1030-NEXT: global_store_dword v1, v0, s[6:7] 671; G_GFX1030-NEXT: s_endpgm 672; 673; G_GFX1100-LABEL: raw_ptr_buffer_atomic_max_rtn_f32_off4_slc: 674; G_GFX1100: ; %bb.0: ; %main_body 675; G_GFX1100-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 676; G_GFX1100-NEXT: s_waitcnt lgkmcnt(0) 677; G_GFX1100-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5 678; G_GFX1100-NEXT: buffer_atomic_max_f32 v0, v1, s[0:3], 4 offen glc slc 679; G_GFX1100-NEXT: v_mov_b32_e32 v1, 0 680; G_GFX1100-NEXT: s_waitcnt vmcnt(0) 681; G_GFX1100-NEXT: global_store_b32 v1, v0, s[6:7] 682; G_GFX1100-NEXT: s_endpgm 683main_body: 684 %ret = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f32(float %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 2) 685 store float %ret, ptr addrspace(1) %out, align 8 686 ret void 687} 688