1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 2; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=SI %s 3; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI %s 4; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9 %s 5; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11 %s 6; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX12 %s 7 8define amdgpu_kernel void @test_fmax3_olt_0_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #0 { 9; SI-LABEL: test_fmax3_olt_0_f32: 10; SI: ; %bb.0: 11; SI-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 12; SI-NEXT: s_mov_b32 s11, 0xf000 13; SI-NEXT: s_mov_b32 s10, -1 14; SI-NEXT: s_mov_b32 s14, s10 15; SI-NEXT: s_mov_b32 s15, s11 16; SI-NEXT: s_mov_b32 s18, s10 17; SI-NEXT: s_mov_b32 s19, s11 18; SI-NEXT: s_mov_b32 s22, s10 19; SI-NEXT: s_mov_b32 s23, s11 20; SI-NEXT: s_waitcnt lgkmcnt(0) 21; SI-NEXT: s_mov_b32 s12, s2 22; SI-NEXT: s_mov_b32 s13, s3 23; SI-NEXT: s_mov_b32 s16, s4 24; SI-NEXT: s_mov_b32 s17, s5 25; SI-NEXT: s_mov_b32 s20, s6 26; SI-NEXT: s_mov_b32 s21, s7 27; SI-NEXT: buffer_load_dword v0, off, s[12:15], 0 glc 28; SI-NEXT: s_waitcnt vmcnt(0) 29; SI-NEXT: buffer_load_dword v1, off, s[16:19], 0 glc 30; SI-NEXT: s_waitcnt vmcnt(0) 31; SI-NEXT: buffer_load_dword v2, off, s[20:23], 0 glc 32; SI-NEXT: s_waitcnt vmcnt(0) 33; SI-NEXT: s_mov_b32 s8, s0 34; SI-NEXT: s_mov_b32 s9, s1 35; SI-NEXT: v_max3_f32 v0, v0, v1, v2 36; SI-NEXT: buffer_store_dword v0, off, s[8:11], 0 37; SI-NEXT: s_endpgm 38; 39; VI-LABEL: test_fmax3_olt_0_f32: 40; VI: ; %bb.0: 41; VI-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 42; VI-NEXT: s_mov_b32 s11, 0xf000 43; VI-NEXT: s_mov_b32 s10, -1 44; VI-NEXT: s_mov_b32 s14, s10 45; VI-NEXT: s_mov_b32 s15, s11 46; VI-NEXT: s_waitcnt lgkmcnt(0) 47; VI-NEXT: s_mov_b32 s12, s2 48; VI-NEXT: s_mov_b32 s13, s3 49; VI-NEXT: s_mov_b32 s16, s4 50; VI-NEXT: s_mov_b32 s17, s5 51; VI-NEXT: s_mov_b32 s18, s10 52; VI-NEXT: s_mov_b32 s19, s11 53; VI-NEXT: s_mov_b32 s4, s6 54; VI-NEXT: s_mov_b32 s5, s7 55; VI-NEXT: s_mov_b32 s6, s10 56; VI-NEXT: s_mov_b32 s7, s11 57; VI-NEXT: buffer_load_dword v0, off, s[12:15], 0 glc 58; VI-NEXT: s_waitcnt vmcnt(0) 59; VI-NEXT: buffer_load_dword v1, off, s[16:19], 0 glc 60; VI-NEXT: s_waitcnt vmcnt(0) 61; VI-NEXT: buffer_load_dword v2, off, s[4:7], 0 glc 62; VI-NEXT: s_waitcnt vmcnt(0) 63; VI-NEXT: s_mov_b32 s8, s0 64; VI-NEXT: s_mov_b32 s9, s1 65; VI-NEXT: v_max3_f32 v0, v0, v1, v2 66; VI-NEXT: buffer_store_dword v0, off, s[8:11], 0 67; VI-NEXT: s_endpgm 68; 69; GFX9-LABEL: test_fmax3_olt_0_f32: 70; GFX9: ; %bb.0: 71; GFX9-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 72; GFX9-NEXT: s_mov_b32 s3, 0xf000 73; GFX9-NEXT: s_mov_b32 s2, -1 74; GFX9-NEXT: s_mov_b32 s6, s2 75; GFX9-NEXT: s_mov_b32 s7, s3 76; GFX9-NEXT: s_waitcnt lgkmcnt(0) 77; GFX9-NEXT: s_mov_b32 s4, s10 78; GFX9-NEXT: s_mov_b32 s5, s11 79; GFX9-NEXT: s_mov_b32 s16, s12 80; GFX9-NEXT: s_mov_b32 s17, s13 81; GFX9-NEXT: s_mov_b32 s18, s2 82; GFX9-NEXT: s_mov_b32 s19, s3 83; GFX9-NEXT: s_mov_b32 s12, s14 84; GFX9-NEXT: s_mov_b32 s13, s15 85; GFX9-NEXT: s_mov_b32 s14, s2 86; GFX9-NEXT: s_mov_b32 s15, s3 87; GFX9-NEXT: buffer_load_dword v0, off, s[4:7], 0 glc 88; GFX9-NEXT: s_waitcnt vmcnt(0) 89; GFX9-NEXT: buffer_load_dword v1, off, s[16:19], 0 glc 90; GFX9-NEXT: s_waitcnt vmcnt(0) 91; GFX9-NEXT: buffer_load_dword v2, off, s[12:15], 0 glc 92; GFX9-NEXT: s_waitcnt vmcnt(0) 93; GFX9-NEXT: s_mov_b32 s0, s8 94; GFX9-NEXT: s_mov_b32 s1, s9 95; GFX9-NEXT: v_max3_f32 v0, v0, v1, v2 96; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 97; GFX9-NEXT: s_endpgm 98; 99; GFX11-LABEL: test_fmax3_olt_0_f32: 100; GFX11: ; %bb.0: 101; GFX11-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 102; GFX11-NEXT: s_mov_b32 s10, -1 103; GFX11-NEXT: s_mov_b32 s11, 0x31016000 104; GFX11-NEXT: s_mov_b32 s14, s10 105; GFX11-NEXT: s_mov_b32 s15, s11 106; GFX11-NEXT: s_mov_b32 s18, s10 107; GFX11-NEXT: s_mov_b32 s19, s11 108; GFX11-NEXT: s_mov_b32 s22, s10 109; GFX11-NEXT: s_mov_b32 s23, s11 110; GFX11-NEXT: s_waitcnt lgkmcnt(0) 111; GFX11-NEXT: s_mov_b32 s12, s2 112; GFX11-NEXT: s_mov_b32 s13, s3 113; GFX11-NEXT: s_mov_b32 s16, s4 114; GFX11-NEXT: s_mov_b32 s17, s5 115; GFX11-NEXT: s_mov_b32 s20, s6 116; GFX11-NEXT: s_mov_b32 s21, s7 117; GFX11-NEXT: buffer_load_b32 v0, off, s[12:15], 0 glc dlc 118; GFX11-NEXT: s_waitcnt vmcnt(0) 119; GFX11-NEXT: buffer_load_b32 v1, off, s[16:19], 0 glc dlc 120; GFX11-NEXT: s_waitcnt vmcnt(0) 121; GFX11-NEXT: buffer_load_b32 v2, off, s[20:23], 0 glc dlc 122; GFX11-NEXT: s_waitcnt vmcnt(0) 123; GFX11-NEXT: s_mov_b32 s8, s0 124; GFX11-NEXT: s_mov_b32 s9, s1 125; GFX11-NEXT: v_max3_f32 v0, v0, v1, v2 126; GFX11-NEXT: buffer_store_b32 v0, off, s[8:11], 0 127; GFX11-NEXT: s_endpgm 128; 129; GFX12-LABEL: test_fmax3_olt_0_f32: 130; GFX12: ; %bb.0: 131; GFX12-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 132; GFX12-NEXT: s_mov_b32 s10, -1 133; GFX12-NEXT: s_mov_b32 s11, 0x31016000 134; GFX12-NEXT: s_mov_b32 s14, s10 135; GFX12-NEXT: s_mov_b32 s15, s11 136; GFX12-NEXT: s_mov_b32 s18, s10 137; GFX12-NEXT: s_mov_b32 s19, s11 138; GFX12-NEXT: s_mov_b32 s22, s10 139; GFX12-NEXT: s_mov_b32 s23, s11 140; GFX12-NEXT: s_wait_kmcnt 0x0 141; GFX12-NEXT: s_mov_b32 s12, s2 142; GFX12-NEXT: s_mov_b32 s13, s3 143; GFX12-NEXT: s_mov_b32 s16, s4 144; GFX12-NEXT: s_mov_b32 s17, s5 145; GFX12-NEXT: s_mov_b32 s20, s6 146; GFX12-NEXT: s_mov_b32 s21, s7 147; GFX12-NEXT: buffer_load_b32 v0, off, s[12:15], null scope:SCOPE_SYS 148; GFX12-NEXT: s_wait_loadcnt 0x0 149; GFX12-NEXT: buffer_load_b32 v1, off, s[16:19], null scope:SCOPE_SYS 150; GFX12-NEXT: s_wait_loadcnt 0x0 151; GFX12-NEXT: buffer_load_b32 v2, off, s[20:23], null scope:SCOPE_SYS 152; GFX12-NEXT: s_wait_loadcnt 0x0 153; GFX12-NEXT: s_mov_b32 s8, s0 154; GFX12-NEXT: s_mov_b32 s9, s1 155; GFX12-NEXT: v_max3_num_f32 v0, v0, v1, v2 156; GFX12-NEXT: buffer_store_b32 v0, off, s[8:11], null 157; GFX12-NEXT: s_endpgm 158 %a = load volatile float, ptr addrspace(1) %aptr, align 4 159 %b = load volatile float, ptr addrspace(1) %bptr, align 4 160 %c = load volatile float, ptr addrspace(1) %cptr, align 4 161 %f0 = call float @llvm.maxnum.f32(float %a, float %b) 162 %f1 = call float @llvm.maxnum.f32(float %f0, float %c) 163 store float %f1, ptr addrspace(1) %out, align 4 164 ret void 165} 166 167; Commute operand of second fmax 168define amdgpu_kernel void @test_fmax3_olt_1_f32(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #0 { 169; SI-LABEL: test_fmax3_olt_1_f32: 170; SI: ; %bb.0: 171; SI-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 172; SI-NEXT: s_mov_b32 s11, 0xf000 173; SI-NEXT: s_mov_b32 s10, -1 174; SI-NEXT: s_mov_b32 s14, s10 175; SI-NEXT: s_mov_b32 s15, s11 176; SI-NEXT: s_mov_b32 s18, s10 177; SI-NEXT: s_mov_b32 s19, s11 178; SI-NEXT: s_mov_b32 s22, s10 179; SI-NEXT: s_mov_b32 s23, s11 180; SI-NEXT: s_waitcnt lgkmcnt(0) 181; SI-NEXT: s_mov_b32 s12, s2 182; SI-NEXT: s_mov_b32 s13, s3 183; SI-NEXT: s_mov_b32 s16, s4 184; SI-NEXT: s_mov_b32 s17, s5 185; SI-NEXT: s_mov_b32 s20, s6 186; SI-NEXT: s_mov_b32 s21, s7 187; SI-NEXT: buffer_load_dword v0, off, s[12:15], 0 glc 188; SI-NEXT: s_waitcnt vmcnt(0) 189; SI-NEXT: buffer_load_dword v1, off, s[16:19], 0 glc 190; SI-NEXT: s_waitcnt vmcnt(0) 191; SI-NEXT: buffer_load_dword v2, off, s[20:23], 0 glc 192; SI-NEXT: s_waitcnt vmcnt(0) 193; SI-NEXT: s_mov_b32 s8, s0 194; SI-NEXT: s_mov_b32 s9, s1 195; SI-NEXT: v_max3_f32 v0, v2, v0, v1 196; SI-NEXT: buffer_store_dword v0, off, s[8:11], 0 197; SI-NEXT: s_endpgm 198; 199; VI-LABEL: test_fmax3_olt_1_f32: 200; VI: ; %bb.0: 201; VI-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 202; VI-NEXT: s_mov_b32 s11, 0xf000 203; VI-NEXT: s_mov_b32 s10, -1 204; VI-NEXT: s_mov_b32 s14, s10 205; VI-NEXT: s_mov_b32 s15, s11 206; VI-NEXT: s_waitcnt lgkmcnt(0) 207; VI-NEXT: s_mov_b32 s12, s2 208; VI-NEXT: s_mov_b32 s13, s3 209; VI-NEXT: s_mov_b32 s16, s4 210; VI-NEXT: s_mov_b32 s17, s5 211; VI-NEXT: s_mov_b32 s18, s10 212; VI-NEXT: s_mov_b32 s19, s11 213; VI-NEXT: s_mov_b32 s4, s6 214; VI-NEXT: s_mov_b32 s5, s7 215; VI-NEXT: s_mov_b32 s6, s10 216; VI-NEXT: s_mov_b32 s7, s11 217; VI-NEXT: buffer_load_dword v0, off, s[12:15], 0 glc 218; VI-NEXT: s_waitcnt vmcnt(0) 219; VI-NEXT: buffer_load_dword v1, off, s[16:19], 0 glc 220; VI-NEXT: s_waitcnt vmcnt(0) 221; VI-NEXT: buffer_load_dword v2, off, s[4:7], 0 glc 222; VI-NEXT: s_waitcnt vmcnt(0) 223; VI-NEXT: s_mov_b32 s8, s0 224; VI-NEXT: s_mov_b32 s9, s1 225; VI-NEXT: v_max3_f32 v0, v2, v0, v1 226; VI-NEXT: buffer_store_dword v0, off, s[8:11], 0 227; VI-NEXT: s_endpgm 228; 229; GFX9-LABEL: test_fmax3_olt_1_f32: 230; GFX9: ; %bb.0: 231; GFX9-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 232; GFX9-NEXT: s_mov_b32 s3, 0xf000 233; GFX9-NEXT: s_mov_b32 s2, -1 234; GFX9-NEXT: s_mov_b32 s6, s2 235; GFX9-NEXT: s_mov_b32 s7, s3 236; GFX9-NEXT: s_waitcnt lgkmcnt(0) 237; GFX9-NEXT: s_mov_b32 s4, s10 238; GFX9-NEXT: s_mov_b32 s5, s11 239; GFX9-NEXT: s_mov_b32 s16, s12 240; GFX9-NEXT: s_mov_b32 s17, s13 241; GFX9-NEXT: s_mov_b32 s18, s2 242; GFX9-NEXT: s_mov_b32 s19, s3 243; GFX9-NEXT: s_mov_b32 s12, s14 244; GFX9-NEXT: s_mov_b32 s13, s15 245; GFX9-NEXT: s_mov_b32 s14, s2 246; GFX9-NEXT: s_mov_b32 s15, s3 247; GFX9-NEXT: buffer_load_dword v0, off, s[4:7], 0 glc 248; GFX9-NEXT: s_waitcnt vmcnt(0) 249; GFX9-NEXT: buffer_load_dword v1, off, s[16:19], 0 glc 250; GFX9-NEXT: s_waitcnt vmcnt(0) 251; GFX9-NEXT: buffer_load_dword v2, off, s[12:15], 0 glc 252; GFX9-NEXT: s_waitcnt vmcnt(0) 253; GFX9-NEXT: s_mov_b32 s0, s8 254; GFX9-NEXT: s_mov_b32 s1, s9 255; GFX9-NEXT: v_max3_f32 v0, v2, v0, v1 256; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 257; GFX9-NEXT: s_endpgm 258; 259; GFX11-LABEL: test_fmax3_olt_1_f32: 260; GFX11: ; %bb.0: 261; GFX11-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 262; GFX11-NEXT: s_mov_b32 s10, -1 263; GFX11-NEXT: s_mov_b32 s11, 0x31016000 264; GFX11-NEXT: s_mov_b32 s14, s10 265; GFX11-NEXT: s_mov_b32 s15, s11 266; GFX11-NEXT: s_mov_b32 s18, s10 267; GFX11-NEXT: s_mov_b32 s19, s11 268; GFX11-NEXT: s_mov_b32 s22, s10 269; GFX11-NEXT: s_mov_b32 s23, s11 270; GFX11-NEXT: s_waitcnt lgkmcnt(0) 271; GFX11-NEXT: s_mov_b32 s12, s2 272; GFX11-NEXT: s_mov_b32 s13, s3 273; GFX11-NEXT: s_mov_b32 s16, s4 274; GFX11-NEXT: s_mov_b32 s17, s5 275; GFX11-NEXT: s_mov_b32 s20, s6 276; GFX11-NEXT: s_mov_b32 s21, s7 277; GFX11-NEXT: buffer_load_b32 v0, off, s[12:15], 0 glc dlc 278; GFX11-NEXT: s_waitcnt vmcnt(0) 279; GFX11-NEXT: buffer_load_b32 v1, off, s[16:19], 0 glc dlc 280; GFX11-NEXT: s_waitcnt vmcnt(0) 281; GFX11-NEXT: buffer_load_b32 v2, off, s[20:23], 0 glc dlc 282; GFX11-NEXT: s_waitcnt vmcnt(0) 283; GFX11-NEXT: s_mov_b32 s8, s0 284; GFX11-NEXT: s_mov_b32 s9, s1 285; GFX11-NEXT: v_max3_f32 v0, v2, v0, v1 286; GFX11-NEXT: buffer_store_b32 v0, off, s[8:11], 0 287; GFX11-NEXT: s_endpgm 288; 289; GFX12-LABEL: test_fmax3_olt_1_f32: 290; GFX12: ; %bb.0: 291; GFX12-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 292; GFX12-NEXT: s_mov_b32 s10, -1 293; GFX12-NEXT: s_mov_b32 s11, 0x31016000 294; GFX12-NEXT: s_mov_b32 s14, s10 295; GFX12-NEXT: s_mov_b32 s15, s11 296; GFX12-NEXT: s_mov_b32 s18, s10 297; GFX12-NEXT: s_mov_b32 s19, s11 298; GFX12-NEXT: s_mov_b32 s22, s10 299; GFX12-NEXT: s_mov_b32 s23, s11 300; GFX12-NEXT: s_wait_kmcnt 0x0 301; GFX12-NEXT: s_mov_b32 s12, s2 302; GFX12-NEXT: s_mov_b32 s13, s3 303; GFX12-NEXT: s_mov_b32 s16, s4 304; GFX12-NEXT: s_mov_b32 s17, s5 305; GFX12-NEXT: s_mov_b32 s20, s6 306; GFX12-NEXT: s_mov_b32 s21, s7 307; GFX12-NEXT: buffer_load_b32 v0, off, s[12:15], null scope:SCOPE_SYS 308; GFX12-NEXT: s_wait_loadcnt 0x0 309; GFX12-NEXT: buffer_load_b32 v1, off, s[16:19], null scope:SCOPE_SYS 310; GFX12-NEXT: s_wait_loadcnt 0x0 311; GFX12-NEXT: buffer_load_b32 v2, off, s[20:23], null scope:SCOPE_SYS 312; GFX12-NEXT: s_wait_loadcnt 0x0 313; GFX12-NEXT: s_mov_b32 s8, s0 314; GFX12-NEXT: s_mov_b32 s9, s1 315; GFX12-NEXT: v_max3_num_f32 v0, v2, v0, v1 316; GFX12-NEXT: buffer_store_b32 v0, off, s[8:11], null 317; GFX12-NEXT: s_endpgm 318 %a = load volatile float, ptr addrspace(1) %aptr, align 4 319 %b = load volatile float, ptr addrspace(1) %bptr, align 4 320 %c = load volatile float, ptr addrspace(1) %cptr, align 4 321 %f0 = call float @llvm.maxnum.f32(float %a, float %b) 322 %f1 = call float @llvm.maxnum.f32(float %c, float %f0) 323 store float %f1, ptr addrspace(1) %out, align 4 324 ret void 325} 326 327define amdgpu_kernel void @test_fmax3_olt_0_f16(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #0 { 328; SI-LABEL: test_fmax3_olt_0_f16: 329; SI: ; %bb.0: 330; SI-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 331; SI-NEXT: s_mov_b32 s11, 0xf000 332; SI-NEXT: s_mov_b32 s10, -1 333; SI-NEXT: s_mov_b32 s14, s10 334; SI-NEXT: s_mov_b32 s15, s11 335; SI-NEXT: s_mov_b32 s18, s10 336; SI-NEXT: s_mov_b32 s19, s11 337; SI-NEXT: s_mov_b32 s22, s10 338; SI-NEXT: s_mov_b32 s23, s11 339; SI-NEXT: s_waitcnt lgkmcnt(0) 340; SI-NEXT: s_mov_b32 s12, s2 341; SI-NEXT: s_mov_b32 s13, s3 342; SI-NEXT: s_mov_b32 s16, s4 343; SI-NEXT: s_mov_b32 s17, s5 344; SI-NEXT: s_mov_b32 s20, s6 345; SI-NEXT: s_mov_b32 s21, s7 346; SI-NEXT: buffer_load_ushort v0, off, s[12:15], 0 glc 347; SI-NEXT: s_waitcnt vmcnt(0) 348; SI-NEXT: buffer_load_ushort v1, off, s[16:19], 0 glc 349; SI-NEXT: s_waitcnt vmcnt(0) 350; SI-NEXT: buffer_load_ushort v2, off, s[20:23], 0 glc 351; SI-NEXT: s_waitcnt vmcnt(0) 352; SI-NEXT: s_mov_b32 s8, s0 353; SI-NEXT: s_mov_b32 s9, s1 354; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 355; SI-NEXT: v_cvt_f32_f16_e32 v1, v1 356; SI-NEXT: v_cvt_f32_f16_e32 v2, v2 357; SI-NEXT: v_max3_f32 v0, v0, v1, v2 358; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 359; SI-NEXT: buffer_store_short v0, off, s[8:11], 0 360; SI-NEXT: s_endpgm 361; 362; VI-LABEL: test_fmax3_olt_0_f16: 363; VI: ; %bb.0: 364; VI-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 365; VI-NEXT: s_mov_b32 s11, 0xf000 366; VI-NEXT: s_mov_b32 s10, -1 367; VI-NEXT: s_mov_b32 s14, s10 368; VI-NEXT: s_mov_b32 s15, s11 369; VI-NEXT: s_waitcnt lgkmcnt(0) 370; VI-NEXT: s_mov_b32 s12, s2 371; VI-NEXT: s_mov_b32 s13, s3 372; VI-NEXT: s_mov_b32 s16, s4 373; VI-NEXT: s_mov_b32 s17, s5 374; VI-NEXT: s_mov_b32 s18, s10 375; VI-NEXT: s_mov_b32 s19, s11 376; VI-NEXT: s_mov_b32 s4, s6 377; VI-NEXT: s_mov_b32 s5, s7 378; VI-NEXT: s_mov_b32 s6, s10 379; VI-NEXT: buffer_load_ushort v0, off, s[12:15], 0 glc 380; VI-NEXT: s_waitcnt vmcnt(0) 381; VI-NEXT: buffer_load_ushort v1, off, s[16:19], 0 glc 382; VI-NEXT: s_waitcnt vmcnt(0) 383; VI-NEXT: s_mov_b32 s7, s11 384; VI-NEXT: buffer_load_ushort v2, off, s[4:7], 0 glc 385; VI-NEXT: s_waitcnt vmcnt(0) 386; VI-NEXT: s_mov_b32 s8, s0 387; VI-NEXT: s_mov_b32 s9, s1 388; VI-NEXT: v_max_f16_e32 v0, v0, v0 389; VI-NEXT: v_max_f16_e32 v1, v1, v1 390; VI-NEXT: v_max_f16_e32 v0, v0, v1 391; VI-NEXT: v_max_f16_e32 v1, v2, v2 392; VI-NEXT: v_max_f16_e32 v0, v0, v1 393; VI-NEXT: buffer_store_short v0, off, s[8:11], 0 394; VI-NEXT: s_endpgm 395; 396; GFX9-LABEL: test_fmax3_olt_0_f16: 397; GFX9: ; %bb.0: 398; GFX9-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 399; GFX9-NEXT: s_mov_b32 s3, 0xf000 400; GFX9-NEXT: s_mov_b32 s2, -1 401; GFX9-NEXT: s_mov_b32 s6, s2 402; GFX9-NEXT: s_mov_b32 s7, s3 403; GFX9-NEXT: s_waitcnt lgkmcnt(0) 404; GFX9-NEXT: s_mov_b32 s4, s10 405; GFX9-NEXT: s_mov_b32 s5, s11 406; GFX9-NEXT: s_mov_b32 s16, s12 407; GFX9-NEXT: s_mov_b32 s17, s13 408; GFX9-NEXT: s_mov_b32 s18, s2 409; GFX9-NEXT: s_mov_b32 s19, s3 410; GFX9-NEXT: s_mov_b32 s12, s14 411; GFX9-NEXT: s_mov_b32 s13, s15 412; GFX9-NEXT: s_mov_b32 s14, s2 413; GFX9-NEXT: s_mov_b32 s15, s3 414; GFX9-NEXT: buffer_load_ushort v0, off, s[4:7], 0 glc 415; GFX9-NEXT: s_waitcnt vmcnt(0) 416; GFX9-NEXT: buffer_load_ushort v1, off, s[16:19], 0 glc 417; GFX9-NEXT: s_waitcnt vmcnt(0) 418; GFX9-NEXT: buffer_load_ushort v2, off, s[12:15], 0 glc 419; GFX9-NEXT: s_waitcnt vmcnt(0) 420; GFX9-NEXT: s_mov_b32 s0, s8 421; GFX9-NEXT: s_mov_b32 s1, s9 422; GFX9-NEXT: v_max3_f16 v0, v0, v1, v2 423; GFX9-NEXT: buffer_store_short v0, off, s[0:3], 0 424; GFX9-NEXT: s_endpgm 425; 426; GFX11-LABEL: test_fmax3_olt_0_f16: 427; GFX11: ; %bb.0: 428; GFX11-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 429; GFX11-NEXT: s_mov_b32 s10, -1 430; GFX11-NEXT: s_mov_b32 s11, 0x31016000 431; GFX11-NEXT: s_mov_b32 s14, s10 432; GFX11-NEXT: s_mov_b32 s15, s11 433; GFX11-NEXT: s_mov_b32 s18, s10 434; GFX11-NEXT: s_mov_b32 s19, s11 435; GFX11-NEXT: s_mov_b32 s22, s10 436; GFX11-NEXT: s_mov_b32 s23, s11 437; GFX11-NEXT: s_waitcnt lgkmcnt(0) 438; GFX11-NEXT: s_mov_b32 s12, s2 439; GFX11-NEXT: s_mov_b32 s13, s3 440; GFX11-NEXT: s_mov_b32 s16, s4 441; GFX11-NEXT: s_mov_b32 s17, s5 442; GFX11-NEXT: s_mov_b32 s20, s6 443; GFX11-NEXT: s_mov_b32 s21, s7 444; GFX11-NEXT: buffer_load_u16 v0, off, s[12:15], 0 glc dlc 445; GFX11-NEXT: s_waitcnt vmcnt(0) 446; GFX11-NEXT: buffer_load_u16 v1, off, s[16:19], 0 glc dlc 447; GFX11-NEXT: s_waitcnt vmcnt(0) 448; GFX11-NEXT: buffer_load_u16 v2, off, s[20:23], 0 glc dlc 449; GFX11-NEXT: s_waitcnt vmcnt(0) 450; GFX11-NEXT: s_mov_b32 s8, s0 451; GFX11-NEXT: s_mov_b32 s9, s1 452; GFX11-NEXT: v_max3_f16 v0, v0, v1, v2 453; GFX11-NEXT: buffer_store_b16 v0, off, s[8:11], 0 454; GFX11-NEXT: s_endpgm 455; 456; GFX12-LABEL: test_fmax3_olt_0_f16: 457; GFX12: ; %bb.0: 458; GFX12-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 459; GFX12-NEXT: s_mov_b32 s10, -1 460; GFX12-NEXT: s_mov_b32 s11, 0x31016000 461; GFX12-NEXT: s_mov_b32 s14, s10 462; GFX12-NEXT: s_mov_b32 s15, s11 463; GFX12-NEXT: s_mov_b32 s18, s10 464; GFX12-NEXT: s_mov_b32 s19, s11 465; GFX12-NEXT: s_mov_b32 s22, s10 466; GFX12-NEXT: s_mov_b32 s23, s11 467; GFX12-NEXT: s_wait_kmcnt 0x0 468; GFX12-NEXT: s_mov_b32 s12, s2 469; GFX12-NEXT: s_mov_b32 s13, s3 470; GFX12-NEXT: s_mov_b32 s16, s4 471; GFX12-NEXT: s_mov_b32 s17, s5 472; GFX12-NEXT: s_mov_b32 s20, s6 473; GFX12-NEXT: s_mov_b32 s21, s7 474; GFX12-NEXT: buffer_load_u16 v0, off, s[12:15], null scope:SCOPE_SYS 475; GFX12-NEXT: s_wait_loadcnt 0x0 476; GFX12-NEXT: buffer_load_u16 v1, off, s[16:19], null scope:SCOPE_SYS 477; GFX12-NEXT: s_wait_loadcnt 0x0 478; GFX12-NEXT: buffer_load_u16 v2, off, s[20:23], null scope:SCOPE_SYS 479; GFX12-NEXT: s_wait_loadcnt 0x0 480; GFX12-NEXT: s_mov_b32 s8, s0 481; GFX12-NEXT: s_mov_b32 s9, s1 482; GFX12-NEXT: v_max3_num_f16 v0, v0, v1, v2 483; GFX12-NEXT: buffer_store_b16 v0, off, s[8:11], null 484; GFX12-NEXT: s_endpgm 485 %a = load volatile half, ptr addrspace(1) %aptr, align 2 486 %b = load volatile half, ptr addrspace(1) %bptr, align 2 487 %c = load volatile half, ptr addrspace(1) %cptr, align 2 488 %f0 = call half @llvm.maxnum.f16(half %a, half %b) 489 %f1 = call half @llvm.maxnum.f16(half %f0, half %c) 490 store half %f1, ptr addrspace(1) %out, align 2 491 ret void 492} 493 494; Commute operand of second fmax 495define amdgpu_kernel void @test_fmax3_olt_1_f16(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #0 { 496; SI-LABEL: test_fmax3_olt_1_f16: 497; SI: ; %bb.0: 498; SI-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9 499; SI-NEXT: s_mov_b32 s11, 0xf000 500; SI-NEXT: s_mov_b32 s10, -1 501; SI-NEXT: s_mov_b32 s14, s10 502; SI-NEXT: s_mov_b32 s15, s11 503; SI-NEXT: s_mov_b32 s18, s10 504; SI-NEXT: s_mov_b32 s19, s11 505; SI-NEXT: s_mov_b32 s22, s10 506; SI-NEXT: s_mov_b32 s23, s11 507; SI-NEXT: s_waitcnt lgkmcnt(0) 508; SI-NEXT: s_mov_b32 s12, s2 509; SI-NEXT: s_mov_b32 s13, s3 510; SI-NEXT: s_mov_b32 s16, s4 511; SI-NEXT: s_mov_b32 s17, s5 512; SI-NEXT: s_mov_b32 s20, s6 513; SI-NEXT: s_mov_b32 s21, s7 514; SI-NEXT: buffer_load_ushort v0, off, s[12:15], 0 glc 515; SI-NEXT: s_waitcnt vmcnt(0) 516; SI-NEXT: buffer_load_ushort v1, off, s[16:19], 0 glc 517; SI-NEXT: s_waitcnt vmcnt(0) 518; SI-NEXT: buffer_load_ushort v2, off, s[20:23], 0 glc 519; SI-NEXT: s_waitcnt vmcnt(0) 520; SI-NEXT: s_mov_b32 s8, s0 521; SI-NEXT: s_mov_b32 s9, s1 522; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 523; SI-NEXT: v_cvt_f32_f16_e32 v1, v1 524; SI-NEXT: v_cvt_f32_f16_e32 v2, v2 525; SI-NEXT: v_max3_f32 v0, v2, v0, v1 526; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 527; SI-NEXT: buffer_store_short v0, off, s[8:11], 0 528; SI-NEXT: s_endpgm 529; 530; VI-LABEL: test_fmax3_olt_1_f16: 531; VI: ; %bb.0: 532; VI-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24 533; VI-NEXT: s_mov_b32 s11, 0xf000 534; VI-NEXT: s_mov_b32 s10, -1 535; VI-NEXT: s_mov_b32 s14, s10 536; VI-NEXT: s_mov_b32 s15, s11 537; VI-NEXT: s_waitcnt lgkmcnt(0) 538; VI-NEXT: s_mov_b32 s12, s2 539; VI-NEXT: s_mov_b32 s13, s3 540; VI-NEXT: s_mov_b32 s16, s4 541; VI-NEXT: s_mov_b32 s17, s5 542; VI-NEXT: s_mov_b32 s18, s10 543; VI-NEXT: s_mov_b32 s19, s11 544; VI-NEXT: s_mov_b32 s4, s6 545; VI-NEXT: s_mov_b32 s5, s7 546; VI-NEXT: s_mov_b32 s6, s10 547; VI-NEXT: buffer_load_ushort v0, off, s[12:15], 0 glc 548; VI-NEXT: s_waitcnt vmcnt(0) 549; VI-NEXT: buffer_load_ushort v1, off, s[16:19], 0 glc 550; VI-NEXT: s_waitcnt vmcnt(0) 551; VI-NEXT: s_mov_b32 s7, s11 552; VI-NEXT: buffer_load_ushort v2, off, s[4:7], 0 glc 553; VI-NEXT: s_waitcnt vmcnt(0) 554; VI-NEXT: s_mov_b32 s8, s0 555; VI-NEXT: s_mov_b32 s9, s1 556; VI-NEXT: v_max_f16_e32 v0, v0, v0 557; VI-NEXT: v_max_f16_e32 v1, v1, v1 558; VI-NEXT: v_max_f16_e32 v0, v0, v1 559; VI-NEXT: v_max_f16_e32 v1, v2, v2 560; VI-NEXT: v_max_f16_e32 v0, v1, v0 561; VI-NEXT: buffer_store_short v0, off, s[8:11], 0 562; VI-NEXT: s_endpgm 563; 564; GFX9-LABEL: test_fmax3_olt_1_f16: 565; GFX9: ; %bb.0: 566; GFX9-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24 567; GFX9-NEXT: s_mov_b32 s3, 0xf000 568; GFX9-NEXT: s_mov_b32 s2, -1 569; GFX9-NEXT: s_mov_b32 s6, s2 570; GFX9-NEXT: s_mov_b32 s7, s3 571; GFX9-NEXT: s_waitcnt lgkmcnt(0) 572; GFX9-NEXT: s_mov_b32 s4, s10 573; GFX9-NEXT: s_mov_b32 s5, s11 574; GFX9-NEXT: s_mov_b32 s16, s12 575; GFX9-NEXT: s_mov_b32 s17, s13 576; GFX9-NEXT: s_mov_b32 s18, s2 577; GFX9-NEXT: s_mov_b32 s19, s3 578; GFX9-NEXT: s_mov_b32 s12, s14 579; GFX9-NEXT: s_mov_b32 s13, s15 580; GFX9-NEXT: s_mov_b32 s14, s2 581; GFX9-NEXT: s_mov_b32 s15, s3 582; GFX9-NEXT: buffer_load_ushort v0, off, s[4:7], 0 glc 583; GFX9-NEXT: s_waitcnt vmcnt(0) 584; GFX9-NEXT: buffer_load_ushort v1, off, s[16:19], 0 glc 585; GFX9-NEXT: s_waitcnt vmcnt(0) 586; GFX9-NEXT: buffer_load_ushort v2, off, s[12:15], 0 glc 587; GFX9-NEXT: s_waitcnt vmcnt(0) 588; GFX9-NEXT: s_mov_b32 s0, s8 589; GFX9-NEXT: s_mov_b32 s1, s9 590; GFX9-NEXT: v_max3_f16 v0, v2, v0, v1 591; GFX9-NEXT: buffer_store_short v0, off, s[0:3], 0 592; GFX9-NEXT: s_endpgm 593; 594; GFX11-LABEL: test_fmax3_olt_1_f16: 595; GFX11: ; %bb.0: 596; GFX11-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 597; GFX11-NEXT: s_mov_b32 s10, -1 598; GFX11-NEXT: s_mov_b32 s11, 0x31016000 599; GFX11-NEXT: s_mov_b32 s14, s10 600; GFX11-NEXT: s_mov_b32 s15, s11 601; GFX11-NEXT: s_mov_b32 s18, s10 602; GFX11-NEXT: s_mov_b32 s19, s11 603; GFX11-NEXT: s_mov_b32 s22, s10 604; GFX11-NEXT: s_mov_b32 s23, s11 605; GFX11-NEXT: s_waitcnt lgkmcnt(0) 606; GFX11-NEXT: s_mov_b32 s12, s2 607; GFX11-NEXT: s_mov_b32 s13, s3 608; GFX11-NEXT: s_mov_b32 s16, s4 609; GFX11-NEXT: s_mov_b32 s17, s5 610; GFX11-NEXT: s_mov_b32 s20, s6 611; GFX11-NEXT: s_mov_b32 s21, s7 612; GFX11-NEXT: buffer_load_u16 v0, off, s[12:15], 0 glc dlc 613; GFX11-NEXT: s_waitcnt vmcnt(0) 614; GFX11-NEXT: buffer_load_u16 v1, off, s[16:19], 0 glc dlc 615; GFX11-NEXT: s_waitcnt vmcnt(0) 616; GFX11-NEXT: buffer_load_u16 v2, off, s[20:23], 0 glc dlc 617; GFX11-NEXT: s_waitcnt vmcnt(0) 618; GFX11-NEXT: s_mov_b32 s8, s0 619; GFX11-NEXT: s_mov_b32 s9, s1 620; GFX11-NEXT: v_max3_f16 v0, v2, v0, v1 621; GFX11-NEXT: buffer_store_b16 v0, off, s[8:11], 0 622; GFX11-NEXT: s_endpgm 623; 624; GFX12-LABEL: test_fmax3_olt_1_f16: 625; GFX12: ; %bb.0: 626; GFX12-NEXT: s_load_b256 s[0:7], s[4:5], 0x24 627; GFX12-NEXT: s_mov_b32 s10, -1 628; GFX12-NEXT: s_mov_b32 s11, 0x31016000 629; GFX12-NEXT: s_mov_b32 s14, s10 630; GFX12-NEXT: s_mov_b32 s15, s11 631; GFX12-NEXT: s_mov_b32 s18, s10 632; GFX12-NEXT: s_mov_b32 s19, s11 633; GFX12-NEXT: s_mov_b32 s22, s10 634; GFX12-NEXT: s_mov_b32 s23, s11 635; GFX12-NEXT: s_wait_kmcnt 0x0 636; GFX12-NEXT: s_mov_b32 s12, s2 637; GFX12-NEXT: s_mov_b32 s13, s3 638; GFX12-NEXT: s_mov_b32 s16, s4 639; GFX12-NEXT: s_mov_b32 s17, s5 640; GFX12-NEXT: s_mov_b32 s20, s6 641; GFX12-NEXT: s_mov_b32 s21, s7 642; GFX12-NEXT: buffer_load_u16 v0, off, s[12:15], null scope:SCOPE_SYS 643; GFX12-NEXT: s_wait_loadcnt 0x0 644; GFX12-NEXT: buffer_load_u16 v1, off, s[16:19], null scope:SCOPE_SYS 645; GFX12-NEXT: s_wait_loadcnt 0x0 646; GFX12-NEXT: buffer_load_u16 v2, off, s[20:23], null scope:SCOPE_SYS 647; GFX12-NEXT: s_wait_loadcnt 0x0 648; GFX12-NEXT: s_mov_b32 s8, s0 649; GFX12-NEXT: s_mov_b32 s9, s1 650; GFX12-NEXT: v_max3_num_f16 v0, v2, v0, v1 651; GFX12-NEXT: buffer_store_b16 v0, off, s[8:11], null 652; GFX12-NEXT: s_endpgm 653 %a = load volatile half, ptr addrspace(1) %aptr, align 2 654 %b = load volatile half, ptr addrspace(1) %bptr, align 2 655 %c = load volatile half, ptr addrspace(1) %cptr, align 2 656 %f0 = call half @llvm.maxnum.f16(half %a, half %b) 657 %f1 = call half @llvm.maxnum.f16(half %c, half %f0) 658 store half %f1, ptr addrspace(1) %out, align 2 659 ret void 660} 661 662; Checks whether the test passes; performMinMaxCombine() should not optimize vector patterns of max3 663; since there are no pack instructions for fmax3. 664define <2 x half> @no_fmax3_v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c, <2 x half> %d) #2 { 665; SI-LABEL: no_fmax3_v2f16: 666; SI: ; %bb.0: ; %entry 667; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 668; SI-NEXT: v_cvt_f16_f32_e32 v7, v7 669; SI-NEXT: v_cvt_f16_f32_e32 v5, v5 670; SI-NEXT: v_cvt_f16_f32_e32 v6, v6 671; SI-NEXT: v_cvt_f16_f32_e32 v4, v4 672; SI-NEXT: v_cvt_f16_f32_e32 v2, v2 673; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 674; SI-NEXT: v_cvt_f16_f32_e32 v3, v3 675; SI-NEXT: v_cvt_f16_f32_e32 v1, v1 676; SI-NEXT: v_cvt_f32_f16_e32 v7, v7 677; SI-NEXT: v_cvt_f32_f16_e32 v5, v5 678; SI-NEXT: v_cvt_f32_f16_e32 v6, v6 679; SI-NEXT: v_cvt_f32_f16_e32 v4, v4 680; SI-NEXT: v_cvt_f32_f16_e32 v2, v2 681; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 682; SI-NEXT: v_cvt_f32_f16_e32 v3, v3 683; SI-NEXT: v_cvt_f32_f16_e32 v1, v1 684; SI-NEXT: v_max_f32_e32 v1, v1, v3 685; SI-NEXT: v_max_f32_e32 v0, v0, v2 686; SI-NEXT: v_max3_f32 v0, v4, v0, v6 687; SI-NEXT: v_max3_f32 v1, v5, v1, v7 688; SI-NEXT: s_setpc_b64 s[30:31] 689; 690; VI-LABEL: no_fmax3_v2f16: 691; VI: ; %bb.0: ; %entry 692; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 693; VI-NEXT: v_max_f16_sdwa v4, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 694; VI-NEXT: v_max_f16_e32 v0, v0, v1 695; VI-NEXT: v_max_f16_sdwa v1, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 696; VI-NEXT: v_max_f16_e32 v0, v2, v0 697; VI-NEXT: v_max_f16_sdwa v1, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 698; VI-NEXT: v_max_f16_e32 v0, v0, v3 699; VI-NEXT: v_or_b32_e32 v0, v0, v1 700; VI-NEXT: s_setpc_b64 s[30:31] 701; 702; GFX9-LABEL: no_fmax3_v2f16: 703; GFX9: ; %bb.0: ; %entry 704; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 705; GFX9-NEXT: v_pk_max_f16 v0, v0, v1 706; GFX9-NEXT: v_pk_max_f16 v0, v2, v0 707; GFX9-NEXT: v_pk_max_f16 v0, v0, v3 708; GFX9-NEXT: s_setpc_b64 s[30:31] 709; 710; GFX11-LABEL: no_fmax3_v2f16: 711; GFX11: ; %bb.0: ; %entry 712; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 713; GFX11-NEXT: v_pk_max_f16 v0, v0, v1 714; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 715; GFX11-NEXT: v_pk_max_f16 v0, v2, v0 716; GFX11-NEXT: v_pk_max_f16 v0, v0, v3 717; GFX11-NEXT: s_setpc_b64 s[30:31] 718; 719; GFX12-LABEL: no_fmax3_v2f16: 720; GFX12: ; %bb.0: ; %entry 721; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 722; GFX12-NEXT: s_wait_expcnt 0x0 723; GFX12-NEXT: s_wait_samplecnt 0x0 724; GFX12-NEXT: s_wait_bvhcnt 0x0 725; GFX12-NEXT: s_wait_kmcnt 0x0 726; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v1 727; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 728; GFX12-NEXT: v_pk_max_num_f16 v0, v2, v0 729; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v3 730; GFX12-NEXT: s_setpc_b64 s[30:31] 731entry: 732 %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> %b) 733 %max1 = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %c, <2 x half> %max) 734 %res = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %max1, <2 x half> %d) 735 ret <2 x half> %res 736} 737 738declare i32 @llvm.amdgcn.workitem.id.x() #1 739declare float @llvm.maxnum.f32(float, float) #1 740declare half @llvm.maxnum.f16(half, half) #1 741declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>) 742 743attributes #0 = { nounwind } 744attributes #1 = { nounwind readnone speculatable } 745attributes #2 = { nounwind "no-nans-fp-math"="true" } 746