1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx703 < %s | FileCheck -check-prefixes=GCN,GFX7 %s 3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GCN,GFX8 %s 4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s 5; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX900 %s 6; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX950 %s 7; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX10 %s 8; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11 %s 9; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GCN,GFX12 %s 10 11define float @v_maximum_f32(float %src0, float %src1) { 12; GFX7-LABEL: v_maximum_f32: 13; GFX7: ; %bb.0: 14; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15; GFX7-NEXT: v_max_f32_e32 v2, v0, v1 16; GFX7-NEXT: v_mov_b32_e32 v3, 0x7fc00000 17; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 18; GFX7-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc 19; GFX7-NEXT: s_setpc_b64 s[30:31] 20; 21; GFX8-LABEL: v_maximum_f32: 22; GFX8: ; %bb.0: 23; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24; GFX8-NEXT: v_max_f32_e32 v2, v0, v1 25; GFX8-NEXT: v_mov_b32_e32 v3, 0x7fc00000 26; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 27; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc 28; GFX8-NEXT: s_setpc_b64 s[30:31] 29; 30; GFX900-LABEL: v_maximum_f32: 31; GFX900: ; %bb.0: 32; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 33; GFX900-NEXT: v_max_f32_e32 v2, v0, v1 34; GFX900-NEXT: v_mov_b32_e32 v3, 0x7fc00000 35; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 36; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc 37; GFX900-NEXT: s_setpc_b64 s[30:31] 38; 39; GFX950-LABEL: v_maximum_f32: 40; GFX950: ; %bb.0: 41; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 42; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1 43; GFX950-NEXT: s_setpc_b64 s[30:31] 44; 45; GFX10-LABEL: v_maximum_f32: 46; GFX10: ; %bb.0: 47; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 48; GFX10-NEXT: v_max_f32_e32 v2, v0, v1 49; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v1 50; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo 51; GFX10-NEXT: s_setpc_b64 s[30:31] 52; 53; GFX11-LABEL: v_maximum_f32: 54; GFX11: ; %bb.0: 55; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 56; GFX11-NEXT: v_max_f32_e32 v2, v0, v1 57; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v1 58; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 59; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo 60; GFX11-NEXT: s_setpc_b64 s[30:31] 61; 62; GFX12-LABEL: v_maximum_f32: 63; GFX12: ; %bb.0: 64; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 65; GFX12-NEXT: s_wait_expcnt 0x0 66; GFX12-NEXT: s_wait_samplecnt 0x0 67; GFX12-NEXT: s_wait_bvhcnt 0x0 68; GFX12-NEXT: s_wait_kmcnt 0x0 69; GFX12-NEXT: v_maximum_f32 v0, v0, v1 70; GFX12-NEXT: s_setpc_b64 s[30:31] 71 %op = call float @llvm.maximum.f32(float %src0, float %src1) 72 ret float %op 73} 74 75define float @v_maximum_f32__nnan(float %src0, float %src1) { 76; GFX7-LABEL: v_maximum_f32__nnan: 77; GFX7: ; %bb.0: 78; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 79; GFX7-NEXT: v_max_f32_e32 v0, v0, v1 80; GFX7-NEXT: s_setpc_b64 s[30:31] 81; 82; GFX8-LABEL: v_maximum_f32__nnan: 83; GFX8: ; %bb.0: 84; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 85; GFX8-NEXT: v_max_f32_e32 v0, v0, v1 86; GFX8-NEXT: s_setpc_b64 s[30:31] 87; 88; GFX900-LABEL: v_maximum_f32__nnan: 89; GFX900: ; %bb.0: 90; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 91; GFX900-NEXT: v_max_f32_e32 v0, v0, v1 92; GFX900-NEXT: s_setpc_b64 s[30:31] 93; 94; GFX950-LABEL: v_maximum_f32__nnan: 95; GFX950: ; %bb.0: 96; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 97; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1 98; GFX950-NEXT: s_setpc_b64 s[30:31] 99; 100; GFX10-LABEL: v_maximum_f32__nnan: 101; GFX10: ; %bb.0: 102; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 103; GFX10-NEXT: v_max_f32_e32 v0, v0, v1 104; GFX10-NEXT: s_setpc_b64 s[30:31] 105; 106; GFX11-LABEL: v_maximum_f32__nnan: 107; GFX11: ; %bb.0: 108; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 109; GFX11-NEXT: v_max_f32_e32 v0, v0, v1 110; GFX11-NEXT: s_setpc_b64 s[30:31] 111; 112; GFX12-LABEL: v_maximum_f32__nnan: 113; GFX12: ; %bb.0: 114; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 115; GFX12-NEXT: s_wait_expcnt 0x0 116; GFX12-NEXT: s_wait_samplecnt 0x0 117; GFX12-NEXT: s_wait_bvhcnt 0x0 118; GFX12-NEXT: s_wait_kmcnt 0x0 119; GFX12-NEXT: v_maximum_f32 v0, v0, v1 120; GFX12-NEXT: s_setpc_b64 s[30:31] 121 %op = call nnan float @llvm.maximum.f32(float %src0, float %src1) 122 ret float %op 123} 124 125define float @v_maximum_f32__nsz(float %src0, float %src1) { 126; GFX7-LABEL: v_maximum_f32__nsz: 127; GFX7: ; %bb.0: 128; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 129; GFX7-NEXT: v_max_f32_e32 v2, v0, v1 130; GFX7-NEXT: v_mov_b32_e32 v3, 0x7fc00000 131; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 132; GFX7-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc 133; GFX7-NEXT: s_setpc_b64 s[30:31] 134; 135; GFX8-LABEL: v_maximum_f32__nsz: 136; GFX8: ; %bb.0: 137; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 138; GFX8-NEXT: v_max_f32_e32 v2, v0, v1 139; GFX8-NEXT: v_mov_b32_e32 v3, 0x7fc00000 140; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 141; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc 142; GFX8-NEXT: s_setpc_b64 s[30:31] 143; 144; GFX900-LABEL: v_maximum_f32__nsz: 145; GFX900: ; %bb.0: 146; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 147; GFX900-NEXT: v_max_f32_e32 v2, v0, v1 148; GFX900-NEXT: v_mov_b32_e32 v3, 0x7fc00000 149; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 150; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc 151; GFX900-NEXT: s_setpc_b64 s[30:31] 152; 153; GFX950-LABEL: v_maximum_f32__nsz: 154; GFX950: ; %bb.0: 155; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 156; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1 157; GFX950-NEXT: s_setpc_b64 s[30:31] 158; 159; GFX10-LABEL: v_maximum_f32__nsz: 160; GFX10: ; %bb.0: 161; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 162; GFX10-NEXT: v_max_f32_e32 v2, v0, v1 163; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v1 164; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo 165; GFX10-NEXT: s_setpc_b64 s[30:31] 166; 167; GFX11-LABEL: v_maximum_f32__nsz: 168; GFX11: ; %bb.0: 169; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 170; GFX11-NEXT: v_max_f32_e32 v2, v0, v1 171; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v1 172; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 173; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo 174; GFX11-NEXT: s_setpc_b64 s[30:31] 175; 176; GFX12-LABEL: v_maximum_f32__nsz: 177; GFX12: ; %bb.0: 178; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 179; GFX12-NEXT: s_wait_expcnt 0x0 180; GFX12-NEXT: s_wait_samplecnt 0x0 181; GFX12-NEXT: s_wait_bvhcnt 0x0 182; GFX12-NEXT: s_wait_kmcnt 0x0 183; GFX12-NEXT: v_maximum_f32 v0, v0, v1 184; GFX12-NEXT: s_setpc_b64 s[30:31] 185 %op = call nsz float @llvm.maximum.f32(float %src0, float %src1) 186 ret float %op 187} 188 189define float @v_maximum_f32__nnan_nsz(float %src0, float %src1) { 190; GFX7-LABEL: v_maximum_f32__nnan_nsz: 191; GFX7: ; %bb.0: 192; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 193; GFX7-NEXT: v_max_f32_e32 v0, v0, v1 194; GFX7-NEXT: s_setpc_b64 s[30:31] 195; 196; GFX8-LABEL: v_maximum_f32__nnan_nsz: 197; GFX8: ; %bb.0: 198; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 199; GFX8-NEXT: v_max_f32_e32 v0, v0, v1 200; GFX8-NEXT: s_setpc_b64 s[30:31] 201; 202; GFX900-LABEL: v_maximum_f32__nnan_nsz: 203; GFX900: ; %bb.0: 204; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 205; GFX900-NEXT: v_max_f32_e32 v0, v0, v1 206; GFX900-NEXT: s_setpc_b64 s[30:31] 207; 208; GFX950-LABEL: v_maximum_f32__nnan_nsz: 209; GFX950: ; %bb.0: 210; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 211; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1 212; GFX950-NEXT: s_setpc_b64 s[30:31] 213; 214; GFX10-LABEL: v_maximum_f32__nnan_nsz: 215; GFX10: ; %bb.0: 216; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 217; GFX10-NEXT: v_max_f32_e32 v0, v0, v1 218; GFX10-NEXT: s_setpc_b64 s[30:31] 219; 220; GFX11-LABEL: v_maximum_f32__nnan_nsz: 221; GFX11: ; %bb.0: 222; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 223; GFX11-NEXT: v_max_f32_e32 v0, v0, v1 224; GFX11-NEXT: s_setpc_b64 s[30:31] 225; 226; GFX12-LABEL: v_maximum_f32__nnan_nsz: 227; GFX12: ; %bb.0: 228; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 229; GFX12-NEXT: s_wait_expcnt 0x0 230; GFX12-NEXT: s_wait_samplecnt 0x0 231; GFX12-NEXT: s_wait_bvhcnt 0x0 232; GFX12-NEXT: s_wait_kmcnt 0x0 233; GFX12-NEXT: v_maximum_f32 v0, v0, v1 234; GFX12-NEXT: s_setpc_b64 s[30:31] 235 %op = call nnan nsz float @llvm.maximum.f32(float %src0, float %src1) 236 ret float %op 237} 238 239define float @v_maximum_f32__nnan_src0(float %arg0, float %src1) { 240; GFX7-LABEL: v_maximum_f32__nnan_src0: 241; GFX7: ; %bb.0: 242; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 243; GFX7-NEXT: v_add_f32_e32 v0, 1.0, v0 244; GFX7-NEXT: v_max_f32_e32 v2, v0, v1 245; GFX7-NEXT: v_mov_b32_e32 v3, 0x7fc00000 246; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 247; GFX7-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc 248; GFX7-NEXT: s_setpc_b64 s[30:31] 249; 250; GFX8-LABEL: v_maximum_f32__nnan_src0: 251; GFX8: ; %bb.0: 252; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 253; GFX8-NEXT: v_add_f32_e32 v0, 1.0, v0 254; GFX8-NEXT: v_max_f32_e32 v2, v0, v1 255; GFX8-NEXT: v_mov_b32_e32 v3, 0x7fc00000 256; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 257; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc 258; GFX8-NEXT: s_setpc_b64 s[30:31] 259; 260; GFX900-LABEL: v_maximum_f32__nnan_src0: 261; GFX900: ; %bb.0: 262; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 263; GFX900-NEXT: v_add_f32_e32 v0, 1.0, v0 264; GFX900-NEXT: v_max_f32_e32 v2, v0, v1 265; GFX900-NEXT: v_mov_b32_e32 v3, 0x7fc00000 266; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 267; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc 268; GFX900-NEXT: s_setpc_b64 s[30:31] 269; 270; GFX950-LABEL: v_maximum_f32__nnan_src0: 271; GFX950: ; %bb.0: 272; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 273; GFX950-NEXT: v_add_f32_e32 v0, 1.0, v0 274; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1 275; GFX950-NEXT: s_setpc_b64 s[30:31] 276; 277; GFX10-LABEL: v_maximum_f32__nnan_src0: 278; GFX10: ; %bb.0: 279; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 280; GFX10-NEXT: v_add_f32_e32 v0, 1.0, v0 281; GFX10-NEXT: v_max_f32_e32 v2, v0, v1 282; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v1 283; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo 284; GFX10-NEXT: s_setpc_b64 s[30:31] 285; 286; GFX11-LABEL: v_maximum_f32__nnan_src0: 287; GFX11: ; %bb.0: 288; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 289; GFX11-NEXT: v_add_f32_e32 v0, 1.0, v0 290; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 291; GFX11-NEXT: v_max_f32_e32 v2, v0, v1 292; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v1 293; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo 294; GFX11-NEXT: s_setpc_b64 s[30:31] 295; 296; GFX12-LABEL: v_maximum_f32__nnan_src0: 297; GFX12: ; %bb.0: 298; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 299; GFX12-NEXT: s_wait_expcnt 0x0 300; GFX12-NEXT: s_wait_samplecnt 0x0 301; GFX12-NEXT: s_wait_bvhcnt 0x0 302; GFX12-NEXT: s_wait_kmcnt 0x0 303; GFX12-NEXT: v_add_f32_e32 v0, 1.0, v0 304; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 305; GFX12-NEXT: v_maximum_f32 v0, v0, v1 306; GFX12-NEXT: s_setpc_b64 s[30:31] 307 %src0 = fadd nnan float %arg0, 1.0 308 %op = call float @llvm.maximum.f32(float %src0, float %src1) 309 ret float %op 310} 311 312define float @v_maximum_f32__nnan_src1(float %src0, float %arg1) { 313; GFX7-LABEL: v_maximum_f32__nnan_src1: 314; GFX7: ; %bb.0: 315; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 316; GFX7-NEXT: v_add_f32_e32 v1, 1.0, v1 317; GFX7-NEXT: v_max_f32_e32 v2, v0, v1 318; GFX7-NEXT: v_mov_b32_e32 v3, 0x7fc00000 319; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 320; GFX7-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc 321; GFX7-NEXT: s_setpc_b64 s[30:31] 322; 323; GFX8-LABEL: v_maximum_f32__nnan_src1: 324; GFX8: ; %bb.0: 325; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 326; GFX8-NEXT: v_add_f32_e32 v1, 1.0, v1 327; GFX8-NEXT: v_max_f32_e32 v2, v0, v1 328; GFX8-NEXT: v_mov_b32_e32 v3, 0x7fc00000 329; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 330; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc 331; GFX8-NEXT: s_setpc_b64 s[30:31] 332; 333; GFX900-LABEL: v_maximum_f32__nnan_src1: 334; GFX900: ; %bb.0: 335; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 336; GFX900-NEXT: v_add_f32_e32 v1, 1.0, v1 337; GFX900-NEXT: v_max_f32_e32 v2, v0, v1 338; GFX900-NEXT: v_mov_b32_e32 v3, 0x7fc00000 339; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v0, v1 340; GFX900-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc 341; GFX900-NEXT: s_setpc_b64 s[30:31] 342; 343; GFX950-LABEL: v_maximum_f32__nnan_src1: 344; GFX950: ; %bb.0: 345; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 346; GFX950-NEXT: v_add_f32_e32 v1, 1.0, v1 347; GFX950-NEXT: v_maximum3_f32 v0, v0, v1, v1 348; GFX950-NEXT: s_setpc_b64 s[30:31] 349; 350; GFX10-LABEL: v_maximum_f32__nnan_src1: 351; GFX10: ; %bb.0: 352; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 353; GFX10-NEXT: v_add_f32_e32 v1, 1.0, v1 354; GFX10-NEXT: v_max_f32_e32 v2, v0, v1 355; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v1 356; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo 357; GFX10-NEXT: s_setpc_b64 s[30:31] 358; 359; GFX11-LABEL: v_maximum_f32__nnan_src1: 360; GFX11: ; %bb.0: 361; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 362; GFX11-NEXT: v_add_f32_e32 v1, 1.0, v1 363; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 364; GFX11-NEXT: v_max_f32_e32 v2, v0, v1 365; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v1 366; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo 367; GFX11-NEXT: s_setpc_b64 s[30:31] 368; 369; GFX12-LABEL: v_maximum_f32__nnan_src1: 370; GFX12: ; %bb.0: 371; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 372; GFX12-NEXT: s_wait_expcnt 0x0 373; GFX12-NEXT: s_wait_samplecnt 0x0 374; GFX12-NEXT: s_wait_bvhcnt 0x0 375; GFX12-NEXT: s_wait_kmcnt 0x0 376; GFX12-NEXT: v_add_f32_e32 v1, 1.0, v1 377; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 378; GFX12-NEXT: v_maximum_f32 v0, v0, v1 379; GFX12-NEXT: s_setpc_b64 s[30:31] 380 %src1 = fadd nnan float %arg1, 1.0 381 %op = call float @llvm.maximum.f32(float %src0, float %src1) 382 ret float %op 383} 384 385define void @s_maximum_f32(float inreg %src0, float inreg %src1) { 386; GFX7-LABEL: s_maximum_f32: 387; GFX7: ; %bb.0: 388; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 389; GFX7-NEXT: v_mov_b32_e32 v0, s17 390; GFX7-NEXT: v_max_f32_e32 v1, s16, v0 391; GFX7-NEXT: v_mov_b32_e32 v2, 0x7fc00000 392; GFX7-NEXT: v_cmp_o_f32_e32 vcc, s16, v0 393; GFX7-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 394; GFX7-NEXT: ;;#ASMSTART 395; GFX7-NEXT: ; use v0 396; GFX7-NEXT: ;;#ASMEND 397; GFX7-NEXT: s_setpc_b64 s[30:31] 398; 399; GFX8-LABEL: s_maximum_f32: 400; GFX8: ; %bb.0: 401; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 402; GFX8-NEXT: v_mov_b32_e32 v0, s17 403; GFX8-NEXT: v_max_f32_e32 v1, s16, v0 404; GFX8-NEXT: v_mov_b32_e32 v2, 0x7fc00000 405; GFX8-NEXT: v_cmp_o_f32_e32 vcc, s16, v0 406; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 407; GFX8-NEXT: ;;#ASMSTART 408; GFX8-NEXT: ; use v0 409; GFX8-NEXT: ;;#ASMEND 410; GFX8-NEXT: s_setpc_b64 s[30:31] 411; 412; GFX900-LABEL: s_maximum_f32: 413; GFX900: ; %bb.0: 414; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 415; GFX900-NEXT: v_mov_b32_e32 v0, s17 416; GFX900-NEXT: v_max_f32_e32 v1, s16, v0 417; GFX900-NEXT: v_mov_b32_e32 v2, 0x7fc00000 418; GFX900-NEXT: v_cmp_o_f32_e32 vcc, s16, v0 419; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 420; GFX900-NEXT: ;;#ASMSTART 421; GFX900-NEXT: ; use v0 422; GFX900-NEXT: ;;#ASMEND 423; GFX900-NEXT: s_setpc_b64 s[30:31] 424; 425; GFX950-LABEL: s_maximum_f32: 426; GFX950: ; %bb.0: 427; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 428; GFX950-NEXT: v_mov_b32_e32 v0, s0 429; GFX950-NEXT: v_maximum3_f32 v0, v0, s1, s1 430; GFX950-NEXT: ;;#ASMSTART 431; GFX950-NEXT: ; use v0 432; GFX950-NEXT: ;;#ASMEND 433; GFX950-NEXT: s_setpc_b64 s[30:31] 434; 435; GFX10-LABEL: s_maximum_f32: 436; GFX10: ; %bb.0: 437; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 438; GFX10-NEXT: v_max_f32_e64 v0, s16, s17 439; GFX10-NEXT: v_cmp_o_f32_e64 vcc_lo, s16, s17 440; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v0, vcc_lo 441; GFX10-NEXT: ;;#ASMSTART 442; GFX10-NEXT: ; use v0 443; GFX10-NEXT: ;;#ASMEND 444; GFX10-NEXT: s_setpc_b64 s[30:31] 445; 446; GFX11-LABEL: s_maximum_f32: 447; GFX11: ; %bb.0: 448; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 449; GFX11-NEXT: v_max_f32_e64 v0, s0, s1 450; GFX11-NEXT: v_cmp_o_f32_e64 vcc_lo, s0, s1 451; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 452; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v0, vcc_lo 453; GFX11-NEXT: ;;#ASMSTART 454; GFX11-NEXT: ; use v0 455; GFX11-NEXT: ;;#ASMEND 456; GFX11-NEXT: s_setpc_b64 s[30:31] 457; 458; GFX12-LABEL: s_maximum_f32: 459; GFX12: ; %bb.0: 460; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 461; GFX12-NEXT: s_wait_expcnt 0x0 462; GFX12-NEXT: s_wait_samplecnt 0x0 463; GFX12-NEXT: s_wait_bvhcnt 0x0 464; GFX12-NEXT: s_wait_kmcnt 0x0 465; GFX12-NEXT: s_maximum_f32 s0, s0, s1 466; GFX12-NEXT: ;;#ASMSTART 467; GFX12-NEXT: ; use s0 468; GFX12-NEXT: ;;#ASMEND 469; GFX12-NEXT: s_wait_alu 0xfffe 470; GFX12-NEXT: s_setpc_b64 s[30:31] 471 %op = call float @llvm.maximum.f32(float %src0, float %src1) 472 call void asm sideeffect "; use $0", "s"(float %op) 473 ret void 474} 475 476define <2 x float> @v_maximum_v2f32(<2 x float> %src0, <2 x float> %src1) { 477; GFX7-LABEL: v_maximum_v2f32: 478; GFX7: ; %bb.0: 479; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 480; GFX7-NEXT: v_max_f32_e32 v4, v0, v2 481; GFX7-NEXT: v_mov_b32_e32 v5, 0x7fc00000 482; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 483; GFX7-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc 484; GFX7-NEXT: v_max_f32_e32 v2, v1, v3 485; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 486; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc 487; GFX7-NEXT: s_setpc_b64 s[30:31] 488; 489; GFX8-LABEL: v_maximum_v2f32: 490; GFX8: ; %bb.0: 491; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 492; GFX8-NEXT: v_max_f32_e32 v4, v0, v2 493; GFX8-NEXT: v_mov_b32_e32 v5, 0x7fc00000 494; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 495; GFX8-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc 496; GFX8-NEXT: v_max_f32_e32 v2, v1, v3 497; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 498; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc 499; GFX8-NEXT: s_setpc_b64 s[30:31] 500; 501; GFX900-LABEL: v_maximum_v2f32: 502; GFX900: ; %bb.0: 503; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 504; GFX900-NEXT: v_max_f32_e32 v4, v0, v2 505; GFX900-NEXT: v_mov_b32_e32 v5, 0x7fc00000 506; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 507; GFX900-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc 508; GFX900-NEXT: v_max_f32_e32 v2, v1, v3 509; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 510; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc 511; GFX900-NEXT: s_setpc_b64 s[30:31] 512; 513; GFX950-LABEL: v_maximum_v2f32: 514; GFX950: ; %bb.0: 515; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 516; GFX950-NEXT: v_maximum3_f32 v0, v0, v2, v2 517; GFX950-NEXT: v_maximum3_f32 v1, v1, v3, v3 518; GFX950-NEXT: s_setpc_b64 s[30:31] 519; 520; GFX10-LABEL: v_maximum_v2f32: 521; GFX10: ; %bb.0: 522; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 523; GFX10-NEXT: v_max_f32_e32 v4, v0, v2 524; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v2 525; GFX10-NEXT: v_max_f32_e32 v5, v1, v3 526; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v4, vcc_lo 527; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v3 528; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v5, vcc_lo 529; GFX10-NEXT: s_setpc_b64 s[30:31] 530; 531; GFX11-LABEL: v_maximum_v2f32: 532; GFX11: ; %bb.0: 533; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 534; GFX11-NEXT: v_dual_max_f32 v4, v0, v2 :: v_dual_max_f32 v5, v1, v3 535; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v2 536; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_4) 537; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v4, vcc_lo 538; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v3 539; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v5, vcc_lo 540; GFX11-NEXT: s_setpc_b64 s[30:31] 541; 542; GFX12-LABEL: v_maximum_v2f32: 543; GFX12: ; %bb.0: 544; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 545; GFX12-NEXT: s_wait_expcnt 0x0 546; GFX12-NEXT: s_wait_samplecnt 0x0 547; GFX12-NEXT: s_wait_bvhcnt 0x0 548; GFX12-NEXT: s_wait_kmcnt 0x0 549; GFX12-NEXT: v_maximum_f32 v0, v0, v2 550; GFX12-NEXT: v_maximum_f32 v1, v1, v3 551; GFX12-NEXT: s_setpc_b64 s[30:31] 552 %op = call <2 x float> @llvm.maximum.v2f32(<2 x float> %src0, <2 x float> %src1) 553 ret <2 x float> %op 554} 555 556define <2 x float> @v_maximum_v2f32__nnan(<2 x float> %src0, <2 x float> %src1) { 557; GFX7-LABEL: v_maximum_v2f32__nnan: 558; GFX7: ; %bb.0: 559; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 560; GFX7-NEXT: v_max_f32_e32 v0, v0, v2 561; GFX7-NEXT: v_max_f32_e32 v1, v1, v3 562; GFX7-NEXT: s_setpc_b64 s[30:31] 563; 564; GFX8-LABEL: v_maximum_v2f32__nnan: 565; GFX8: ; %bb.0: 566; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 567; GFX8-NEXT: v_max_f32_e32 v0, v0, v2 568; GFX8-NEXT: v_max_f32_e32 v1, v1, v3 569; GFX8-NEXT: s_setpc_b64 s[30:31] 570; 571; GFX900-LABEL: v_maximum_v2f32__nnan: 572; GFX900: ; %bb.0: 573; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 574; GFX900-NEXT: v_max_f32_e32 v0, v0, v2 575; GFX900-NEXT: v_max_f32_e32 v1, v1, v3 576; GFX900-NEXT: s_setpc_b64 s[30:31] 577; 578; GFX950-LABEL: v_maximum_v2f32__nnan: 579; GFX950: ; %bb.0: 580; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 581; GFX950-NEXT: v_maximum3_f32 v0, v0, v2, v2 582; GFX950-NEXT: v_maximum3_f32 v1, v1, v3, v3 583; GFX950-NEXT: s_setpc_b64 s[30:31] 584; 585; GFX10-LABEL: v_maximum_v2f32__nnan: 586; GFX10: ; %bb.0: 587; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 588; GFX10-NEXT: v_max_f32_e32 v0, v0, v2 589; GFX10-NEXT: v_max_f32_e32 v1, v1, v3 590; GFX10-NEXT: s_setpc_b64 s[30:31] 591; 592; GFX11-LABEL: v_maximum_v2f32__nnan: 593; GFX11: ; %bb.0: 594; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 595; GFX11-NEXT: v_dual_max_f32 v0, v0, v2 :: v_dual_max_f32 v1, v1, v3 596; GFX11-NEXT: s_setpc_b64 s[30:31] 597; 598; GFX12-LABEL: v_maximum_v2f32__nnan: 599; GFX12: ; %bb.0: 600; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 601; GFX12-NEXT: s_wait_expcnt 0x0 602; GFX12-NEXT: s_wait_samplecnt 0x0 603; GFX12-NEXT: s_wait_bvhcnt 0x0 604; GFX12-NEXT: s_wait_kmcnt 0x0 605; GFX12-NEXT: v_maximum_f32 v0, v0, v2 606; GFX12-NEXT: v_maximum_f32 v1, v1, v3 607; GFX12-NEXT: s_setpc_b64 s[30:31] 608 %op = call nnan <2 x float> @llvm.maximum.v2f32(<2 x float> %src0, <2 x float> %src1) 609 ret <2 x float> %op 610} 611 612define <2 x float> @v_maximum_v2f32__nsz(<2 x float> %src0, <2 x float> %src1) { 613; GFX7-LABEL: v_maximum_v2f32__nsz: 614; GFX7: ; %bb.0: 615; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 616; GFX7-NEXT: v_max_f32_e32 v4, v0, v2 617; GFX7-NEXT: v_mov_b32_e32 v5, 0x7fc00000 618; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 619; GFX7-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc 620; GFX7-NEXT: v_max_f32_e32 v2, v1, v3 621; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 622; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc 623; GFX7-NEXT: s_setpc_b64 s[30:31] 624; 625; GFX8-LABEL: v_maximum_v2f32__nsz: 626; GFX8: ; %bb.0: 627; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 628; GFX8-NEXT: v_max_f32_e32 v4, v0, v2 629; GFX8-NEXT: v_mov_b32_e32 v5, 0x7fc00000 630; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 631; GFX8-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc 632; GFX8-NEXT: v_max_f32_e32 v2, v1, v3 633; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 634; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc 635; GFX8-NEXT: s_setpc_b64 s[30:31] 636; 637; GFX900-LABEL: v_maximum_v2f32__nsz: 638; GFX900: ; %bb.0: 639; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 640; GFX900-NEXT: v_max_f32_e32 v4, v0, v2 641; GFX900-NEXT: v_mov_b32_e32 v5, 0x7fc00000 642; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v0, v2 643; GFX900-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc 644; GFX900-NEXT: v_max_f32_e32 v2, v1, v3 645; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v1, v3 646; GFX900-NEXT: v_cndmask_b32_e32 v1, v5, v2, vcc 647; GFX900-NEXT: s_setpc_b64 s[30:31] 648; 649; GFX950-LABEL: v_maximum_v2f32__nsz: 650; GFX950: ; %bb.0: 651; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 652; GFX950-NEXT: v_maximum3_f32 v0, v0, v2, v2 653; GFX950-NEXT: v_maximum3_f32 v1, v1, v3, v3 654; GFX950-NEXT: s_setpc_b64 s[30:31] 655; 656; GFX10-LABEL: v_maximum_v2f32__nsz: 657; GFX10: ; %bb.0: 658; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 659; GFX10-NEXT: v_max_f32_e32 v4, v0, v2 660; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v2 661; GFX10-NEXT: v_max_f32_e32 v5, v1, v3 662; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v4, vcc_lo 663; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v3 664; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v5, vcc_lo 665; GFX10-NEXT: s_setpc_b64 s[30:31] 666; 667; GFX11-LABEL: v_maximum_v2f32__nsz: 668; GFX11: ; %bb.0: 669; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 670; GFX11-NEXT: v_dual_max_f32 v4, v0, v2 :: v_dual_max_f32 v5, v1, v3 671; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v2 672; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_4) 673; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v4, vcc_lo 674; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v3 675; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v5, vcc_lo 676; GFX11-NEXT: s_setpc_b64 s[30:31] 677; 678; GFX12-LABEL: v_maximum_v2f32__nsz: 679; GFX12: ; %bb.0: 680; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 681; GFX12-NEXT: s_wait_expcnt 0x0 682; GFX12-NEXT: s_wait_samplecnt 0x0 683; GFX12-NEXT: s_wait_bvhcnt 0x0 684; GFX12-NEXT: s_wait_kmcnt 0x0 685; GFX12-NEXT: v_maximum_f32 v0, v0, v2 686; GFX12-NEXT: v_maximum_f32 v1, v1, v3 687; GFX12-NEXT: s_setpc_b64 s[30:31] 688 %op = call nsz <2 x float> @llvm.maximum.v2f32(<2 x float> %src0, <2 x float> %src1) 689 ret <2 x float> %op 690} 691 692define <2 x float> @v_maximum_v2f32__nnan_nsz(<2 x float> %src0, <2 x float> %src1) { 693; GFX7-LABEL: v_maximum_v2f32__nnan_nsz: 694; GFX7: ; %bb.0: 695; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 696; GFX7-NEXT: v_max_f32_e32 v0, v0, v2 697; GFX7-NEXT: v_max_f32_e32 v1, v1, v3 698; GFX7-NEXT: s_setpc_b64 s[30:31] 699; 700; GFX8-LABEL: v_maximum_v2f32__nnan_nsz: 701; GFX8: ; %bb.0: 702; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 703; GFX8-NEXT: v_max_f32_e32 v0, v0, v2 704; GFX8-NEXT: v_max_f32_e32 v1, v1, v3 705; GFX8-NEXT: s_setpc_b64 s[30:31] 706; 707; GFX900-LABEL: v_maximum_v2f32__nnan_nsz: 708; GFX900: ; %bb.0: 709; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 710; GFX900-NEXT: v_max_f32_e32 v0, v0, v2 711; GFX900-NEXT: v_max_f32_e32 v1, v1, v3 712; GFX900-NEXT: s_setpc_b64 s[30:31] 713; 714; GFX950-LABEL: v_maximum_v2f32__nnan_nsz: 715; GFX950: ; %bb.0: 716; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 717; GFX950-NEXT: v_maximum3_f32 v0, v0, v2, v2 718; GFX950-NEXT: v_maximum3_f32 v1, v1, v3, v3 719; GFX950-NEXT: s_setpc_b64 s[30:31] 720; 721; GFX10-LABEL: v_maximum_v2f32__nnan_nsz: 722; GFX10: ; %bb.0: 723; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 724; GFX10-NEXT: v_max_f32_e32 v0, v0, v2 725; GFX10-NEXT: v_max_f32_e32 v1, v1, v3 726; GFX10-NEXT: s_setpc_b64 s[30:31] 727; 728; GFX11-LABEL: v_maximum_v2f32__nnan_nsz: 729; GFX11: ; %bb.0: 730; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 731; GFX11-NEXT: v_dual_max_f32 v0, v0, v2 :: v_dual_max_f32 v1, v1, v3 732; GFX11-NEXT: s_setpc_b64 s[30:31] 733; 734; GFX12-LABEL: v_maximum_v2f32__nnan_nsz: 735; GFX12: ; %bb.0: 736; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 737; GFX12-NEXT: s_wait_expcnt 0x0 738; GFX12-NEXT: s_wait_samplecnt 0x0 739; GFX12-NEXT: s_wait_bvhcnt 0x0 740; GFX12-NEXT: s_wait_kmcnt 0x0 741; GFX12-NEXT: v_maximum_f32 v0, v0, v2 742; GFX12-NEXT: v_maximum_f32 v1, v1, v3 743; GFX12-NEXT: s_setpc_b64 s[30:31] 744 %op = call nnan nsz <2 x float> @llvm.maximum.v2f32(<2 x float> %src0, <2 x float> %src1) 745 ret <2 x float> %op 746} 747 748define void @s_maximum_v2f32(<2 x float> inreg %src0, <2 x float> inreg %src1) { 749; GFX7-LABEL: s_maximum_v2f32: 750; GFX7: ; %bb.0: 751; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 752; GFX7-NEXT: v_mov_b32_e32 v0, s19 753; GFX7-NEXT: v_max_f32_e32 v1, s17, v0 754; GFX7-NEXT: v_mov_b32_e32 v2, 0x7fc00000 755; GFX7-NEXT: v_cmp_o_f32_e32 vcc, s17, v0 756; GFX7-NEXT: v_mov_b32_e32 v0, s18 757; GFX7-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc 758; GFX7-NEXT: v_max_f32_e32 v3, s16, v0 759; GFX7-NEXT: v_cmp_o_f32_e32 vcc, s16, v0 760; GFX7-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc 761; GFX7-NEXT: ;;#ASMSTART 762; GFX7-NEXT: ; use v[0:1] 763; GFX7-NEXT: ;;#ASMEND 764; GFX7-NEXT: s_setpc_b64 s[30:31] 765; 766; GFX8-LABEL: s_maximum_v2f32: 767; GFX8: ; %bb.0: 768; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 769; GFX8-NEXT: v_mov_b32_e32 v0, s19 770; GFX8-NEXT: v_max_f32_e32 v1, s17, v0 771; GFX8-NEXT: v_mov_b32_e32 v2, 0x7fc00000 772; GFX8-NEXT: v_cmp_o_f32_e32 vcc, s17, v0 773; GFX8-NEXT: v_mov_b32_e32 v0, s18 774; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc 775; GFX8-NEXT: v_max_f32_e32 v3, s16, v0 776; GFX8-NEXT: v_cmp_o_f32_e32 vcc, s16, v0 777; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc 778; GFX8-NEXT: ;;#ASMSTART 779; GFX8-NEXT: ; use v[0:1] 780; GFX8-NEXT: ;;#ASMEND 781; GFX8-NEXT: s_setpc_b64 s[30:31] 782; 783; GFX900-LABEL: s_maximum_v2f32: 784; GFX900: ; %bb.0: 785; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 786; GFX900-NEXT: v_mov_b32_e32 v0, s19 787; GFX900-NEXT: v_max_f32_e32 v1, s17, v0 788; GFX900-NEXT: v_mov_b32_e32 v2, 0x7fc00000 789; GFX900-NEXT: v_cmp_o_f32_e32 vcc, s17, v0 790; GFX900-NEXT: v_mov_b32_e32 v0, s18 791; GFX900-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc 792; GFX900-NEXT: v_max_f32_e32 v3, s16, v0 793; GFX900-NEXT: v_cmp_o_f32_e32 vcc, s16, v0 794; GFX900-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc 795; GFX900-NEXT: ;;#ASMSTART 796; GFX900-NEXT: ; use v[0:1] 797; GFX900-NEXT: ;;#ASMEND 798; GFX900-NEXT: s_setpc_b64 s[30:31] 799; 800; GFX950-LABEL: s_maximum_v2f32: 801; GFX950: ; %bb.0: 802; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 803; GFX950-NEXT: v_mov_b32_e32 v0, s1 804; GFX950-NEXT: v_maximum3_f32 v1, v0, s3, s3 805; GFX950-NEXT: v_mov_b32_e32 v0, s0 806; GFX950-NEXT: v_maximum3_f32 v0, v0, s2, s2 807; GFX950-NEXT: ;;#ASMSTART 808; GFX950-NEXT: ; use v[0:1] 809; GFX950-NEXT: ;;#ASMEND 810; GFX950-NEXT: s_setpc_b64 s[30:31] 811; 812; GFX10-LABEL: s_maximum_v2f32: 813; GFX10: ; %bb.0: 814; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 815; GFX10-NEXT: v_max_f32_e64 v0, s17, s19 816; GFX10-NEXT: v_cmp_o_f32_e64 vcc_lo, s17, s19 817; GFX10-NEXT: v_max_f32_e64 v2, s16, s18 818; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v0, vcc_lo 819; GFX10-NEXT: v_cmp_o_f32_e64 vcc_lo, s16, s18 820; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo 821; GFX10-NEXT: ;;#ASMSTART 822; GFX10-NEXT: ; use v[0:1] 823; GFX10-NEXT: ;;#ASMEND 824; GFX10-NEXT: s_setpc_b64 s[30:31] 825; 826; GFX11-LABEL: s_maximum_v2f32: 827; GFX11: ; %bb.0: 828; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 829; GFX11-NEXT: v_max_f32_e64 v0, s1, s3 830; GFX11-NEXT: v_cmp_o_f32_e64 vcc_lo, s1, s3 831; GFX11-NEXT: v_max_f32_e64 v2, s0, s2 832; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) 833; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v0, vcc_lo 834; GFX11-NEXT: v_cmp_o_f32_e64 vcc_lo, s0, s2 835; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v2, vcc_lo 836; GFX11-NEXT: ;;#ASMSTART 837; GFX11-NEXT: ; use v[0:1] 838; GFX11-NEXT: ;;#ASMEND 839; GFX11-NEXT: s_setpc_b64 s[30:31] 840; 841; GFX12-LABEL: s_maximum_v2f32: 842; GFX12: ; %bb.0: 843; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 844; GFX12-NEXT: s_wait_expcnt 0x0 845; GFX12-NEXT: s_wait_samplecnt 0x0 846; GFX12-NEXT: s_wait_bvhcnt 0x0 847; GFX12-NEXT: s_wait_kmcnt 0x0 848; GFX12-NEXT: s_maximum_f32 s1, s1, s3 849; GFX12-NEXT: s_maximum_f32 s0, s0, s2 850; GFX12-NEXT: ;;#ASMSTART 851; GFX12-NEXT: ; use s[0:1] 852; GFX12-NEXT: ;;#ASMEND 853; GFX12-NEXT: s_wait_alu 0xfffe 854; GFX12-NEXT: s_setpc_b64 s[30:31] 855 %op = call <2 x float> @llvm.maximum.v2f32(<2 x float> %src0, <2 x float> %src1) 856 call void asm sideeffect "; use $0", "s"(<2 x float> %op) 857 ret void 858} 859 860define <3 x float> @v_maximum_v3f32(<3 x float> %src0, <3 x float> %src1) { 861; GFX7-LABEL: v_maximum_v3f32: 862; GFX7: ; %bb.0: 863; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 864; GFX7-NEXT: v_max_f32_e32 v6, v0, v3 865; GFX7-NEXT: v_mov_b32_e32 v7, 0x7fc00000 866; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 867; GFX7-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc 868; GFX7-NEXT: v_max_f32_e32 v3, v1, v4 869; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 870; GFX7-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc 871; GFX7-NEXT: v_max_f32_e32 v3, v2, v5 872; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 873; GFX7-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc 874; GFX7-NEXT: s_setpc_b64 s[30:31] 875; 876; GFX8-LABEL: v_maximum_v3f32: 877; GFX8: ; %bb.0: 878; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 879; GFX8-NEXT: v_max_f32_e32 v6, v0, v3 880; GFX8-NEXT: v_mov_b32_e32 v7, 0x7fc00000 881; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 882; GFX8-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc 883; GFX8-NEXT: v_max_f32_e32 v3, v1, v4 884; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 885; GFX8-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc 886; GFX8-NEXT: v_max_f32_e32 v3, v2, v5 887; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 888; GFX8-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc 889; GFX8-NEXT: s_setpc_b64 s[30:31] 890; 891; GFX900-LABEL: v_maximum_v3f32: 892; GFX900: ; %bb.0: 893; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 894; GFX900-NEXT: v_max_f32_e32 v6, v0, v3 895; GFX900-NEXT: v_mov_b32_e32 v7, 0x7fc00000 896; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 897; GFX900-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc 898; GFX900-NEXT: v_max_f32_e32 v3, v1, v4 899; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 900; GFX900-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc 901; GFX900-NEXT: v_max_f32_e32 v3, v2, v5 902; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 903; GFX900-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc 904; GFX900-NEXT: s_setpc_b64 s[30:31] 905; 906; GFX950-LABEL: v_maximum_v3f32: 907; GFX950: ; %bb.0: 908; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 909; GFX950-NEXT: v_maximum3_f32 v0, v0, v3, v3 910; GFX950-NEXT: v_maximum3_f32 v1, v1, v4, v4 911; GFX950-NEXT: v_maximum3_f32 v2, v2, v5, v5 912; GFX950-NEXT: s_setpc_b64 s[30:31] 913; 914; GFX10-LABEL: v_maximum_v3f32: 915; GFX10: ; %bb.0: 916; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 917; GFX10-NEXT: v_max_f32_e32 v6, v0, v3 918; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v3 919; GFX10-NEXT: v_max_f32_e32 v7, v1, v4 920; GFX10-NEXT: v_max_f32_e32 v8, v2, v5 921; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v6, vcc_lo 922; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v4 923; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v7, vcc_lo 924; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v5 925; GFX10-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo 926; GFX10-NEXT: s_setpc_b64 s[30:31] 927; 928; GFX11-LABEL: v_maximum_v3f32: 929; GFX11: ; %bb.0: 930; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 931; GFX11-NEXT: v_dual_max_f32 v6, v0, v3 :: v_dual_max_f32 v7, v1, v4 932; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v3 933; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_4) 934; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v6, vcc_lo 935; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v4 936; GFX11-NEXT: v_dual_max_f32 v8, v2, v5 :: v_dual_cndmask_b32 v1, 0x7fc00000, v7 937; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v5 938; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 939; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo 940; GFX11-NEXT: s_setpc_b64 s[30:31] 941; 942; GFX12-LABEL: v_maximum_v3f32: 943; GFX12: ; %bb.0: 944; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 945; GFX12-NEXT: s_wait_expcnt 0x0 946; GFX12-NEXT: s_wait_samplecnt 0x0 947; GFX12-NEXT: s_wait_bvhcnt 0x0 948; GFX12-NEXT: s_wait_kmcnt 0x0 949; GFX12-NEXT: v_maximum_f32 v0, v0, v3 950; GFX12-NEXT: v_maximum_f32 v1, v1, v4 951; GFX12-NEXT: v_maximum_f32 v2, v2, v5 952; GFX12-NEXT: s_setpc_b64 s[30:31] 953 %op = call <3 x float> @llvm.maximum.v3f32(<3 x float> %src0, <3 x float> %src1) 954 ret <3 x float> %op 955} 956 957define <3 x float> @v_maximum_v3f32__nnan(<3 x float> %src0, <3 x float> %src1) { 958; GFX7-LABEL: v_maximum_v3f32__nnan: 959; GFX7: ; %bb.0: 960; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 961; GFX7-NEXT: v_max_f32_e32 v0, v0, v3 962; GFX7-NEXT: v_max_f32_e32 v1, v1, v4 963; GFX7-NEXT: v_max_f32_e32 v2, v2, v5 964; GFX7-NEXT: s_setpc_b64 s[30:31] 965; 966; GFX8-LABEL: v_maximum_v3f32__nnan: 967; GFX8: ; %bb.0: 968; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 969; GFX8-NEXT: v_max_f32_e32 v0, v0, v3 970; GFX8-NEXT: v_max_f32_e32 v1, v1, v4 971; GFX8-NEXT: v_max_f32_e32 v2, v2, v5 972; GFX8-NEXT: s_setpc_b64 s[30:31] 973; 974; GFX900-LABEL: v_maximum_v3f32__nnan: 975; GFX900: ; %bb.0: 976; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 977; GFX900-NEXT: v_max_f32_e32 v0, v0, v3 978; GFX900-NEXT: v_max_f32_e32 v1, v1, v4 979; GFX900-NEXT: v_max_f32_e32 v2, v2, v5 980; GFX900-NEXT: s_setpc_b64 s[30:31] 981; 982; GFX950-LABEL: v_maximum_v3f32__nnan: 983; GFX950: ; %bb.0: 984; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 985; GFX950-NEXT: v_maximum3_f32 v0, v0, v3, v3 986; GFX950-NEXT: v_maximum3_f32 v1, v1, v4, v4 987; GFX950-NEXT: v_maximum3_f32 v2, v2, v5, v5 988; GFX950-NEXT: s_setpc_b64 s[30:31] 989; 990; GFX10-LABEL: v_maximum_v3f32__nnan: 991; GFX10: ; %bb.0: 992; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 993; GFX10-NEXT: v_max_f32_e32 v0, v0, v3 994; GFX10-NEXT: v_max_f32_e32 v1, v1, v4 995; GFX10-NEXT: v_max_f32_e32 v2, v2, v5 996; GFX10-NEXT: s_setpc_b64 s[30:31] 997; 998; GFX11-LABEL: v_maximum_v3f32__nnan: 999; GFX11: ; %bb.0: 1000; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1001; GFX11-NEXT: v_dual_max_f32 v0, v0, v3 :: v_dual_max_f32 v1, v1, v4 1002; GFX11-NEXT: v_max_f32_e32 v2, v2, v5 1003; GFX11-NEXT: s_setpc_b64 s[30:31] 1004; 1005; GFX12-LABEL: v_maximum_v3f32__nnan: 1006; GFX12: ; %bb.0: 1007; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1008; GFX12-NEXT: s_wait_expcnt 0x0 1009; GFX12-NEXT: s_wait_samplecnt 0x0 1010; GFX12-NEXT: s_wait_bvhcnt 0x0 1011; GFX12-NEXT: s_wait_kmcnt 0x0 1012; GFX12-NEXT: v_maximum_f32 v0, v0, v3 1013; GFX12-NEXT: v_maximum_f32 v1, v1, v4 1014; GFX12-NEXT: v_maximum_f32 v2, v2, v5 1015; GFX12-NEXT: s_setpc_b64 s[30:31] 1016 %op = call nnan <3 x float> @llvm.maximum.v3f32(<3 x float> %src0, <3 x float> %src1) 1017 ret <3 x float> %op 1018} 1019 1020define <3 x float> @v_maximum_v3f32__nsz(<3 x float> %src0, <3 x float> %src1) { 1021; GFX7-LABEL: v_maximum_v3f32__nsz: 1022; GFX7: ; %bb.0: 1023; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1024; GFX7-NEXT: v_max_f32_e32 v6, v0, v3 1025; GFX7-NEXT: v_mov_b32_e32 v7, 0x7fc00000 1026; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 1027; GFX7-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc 1028; GFX7-NEXT: v_max_f32_e32 v3, v1, v4 1029; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 1030; GFX7-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc 1031; GFX7-NEXT: v_max_f32_e32 v3, v2, v5 1032; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 1033; GFX7-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc 1034; GFX7-NEXT: s_setpc_b64 s[30:31] 1035; 1036; GFX8-LABEL: v_maximum_v3f32__nsz: 1037; GFX8: ; %bb.0: 1038; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1039; GFX8-NEXT: v_max_f32_e32 v6, v0, v3 1040; GFX8-NEXT: v_mov_b32_e32 v7, 0x7fc00000 1041; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 1042; GFX8-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc 1043; GFX8-NEXT: v_max_f32_e32 v3, v1, v4 1044; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 1045; GFX8-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc 1046; GFX8-NEXT: v_max_f32_e32 v3, v2, v5 1047; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 1048; GFX8-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc 1049; GFX8-NEXT: s_setpc_b64 s[30:31] 1050; 1051; GFX900-LABEL: v_maximum_v3f32__nsz: 1052; GFX900: ; %bb.0: 1053; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1054; GFX900-NEXT: v_max_f32_e32 v6, v0, v3 1055; GFX900-NEXT: v_mov_b32_e32 v7, 0x7fc00000 1056; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v0, v3 1057; GFX900-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc 1058; GFX900-NEXT: v_max_f32_e32 v3, v1, v4 1059; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v1, v4 1060; GFX900-NEXT: v_cndmask_b32_e32 v1, v7, v3, vcc 1061; GFX900-NEXT: v_max_f32_e32 v3, v2, v5 1062; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v2, v5 1063; GFX900-NEXT: v_cndmask_b32_e32 v2, v7, v3, vcc 1064; GFX900-NEXT: s_setpc_b64 s[30:31] 1065; 1066; GFX950-LABEL: v_maximum_v3f32__nsz: 1067; GFX950: ; %bb.0: 1068; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1069; GFX950-NEXT: v_maximum3_f32 v0, v0, v3, v3 1070; GFX950-NEXT: v_maximum3_f32 v1, v1, v4, v4 1071; GFX950-NEXT: v_maximum3_f32 v2, v2, v5, v5 1072; GFX950-NEXT: s_setpc_b64 s[30:31] 1073; 1074; GFX10-LABEL: v_maximum_v3f32__nsz: 1075; GFX10: ; %bb.0: 1076; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1077; GFX10-NEXT: v_max_f32_e32 v6, v0, v3 1078; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v3 1079; GFX10-NEXT: v_max_f32_e32 v7, v1, v4 1080; GFX10-NEXT: v_max_f32_e32 v8, v2, v5 1081; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v6, vcc_lo 1082; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v4 1083; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v7, vcc_lo 1084; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v5 1085; GFX10-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo 1086; GFX10-NEXT: s_setpc_b64 s[30:31] 1087; 1088; GFX11-LABEL: v_maximum_v3f32__nsz: 1089; GFX11: ; %bb.0: 1090; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1091; GFX11-NEXT: v_dual_max_f32 v6, v0, v3 :: v_dual_max_f32 v7, v1, v4 1092; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v3 1093; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_4) 1094; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v6, vcc_lo 1095; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v4 1096; GFX11-NEXT: v_dual_max_f32 v8, v2, v5 :: v_dual_cndmask_b32 v1, 0x7fc00000, v7 1097; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v5 1098; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 1099; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo 1100; GFX11-NEXT: s_setpc_b64 s[30:31] 1101; 1102; GFX12-LABEL: v_maximum_v3f32__nsz: 1103; GFX12: ; %bb.0: 1104; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1105; GFX12-NEXT: s_wait_expcnt 0x0 1106; GFX12-NEXT: s_wait_samplecnt 0x0 1107; GFX12-NEXT: s_wait_bvhcnt 0x0 1108; GFX12-NEXT: s_wait_kmcnt 0x0 1109; GFX12-NEXT: v_maximum_f32 v0, v0, v3 1110; GFX12-NEXT: v_maximum_f32 v1, v1, v4 1111; GFX12-NEXT: v_maximum_f32 v2, v2, v5 1112; GFX12-NEXT: s_setpc_b64 s[30:31] 1113 %op = call nsz <3 x float> @llvm.maximum.v3f32(<3 x float> %src0, <3 x float> %src1) 1114 ret <3 x float> %op 1115} 1116 1117define <3 x float> @v_maximum_v3f32__nnan_nsz(<3 x float> %src0, <3 x float> %src1) { 1118; GFX7-LABEL: v_maximum_v3f32__nnan_nsz: 1119; GFX7: ; %bb.0: 1120; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1121; GFX7-NEXT: v_max_f32_e32 v0, v0, v3 1122; GFX7-NEXT: v_max_f32_e32 v1, v1, v4 1123; GFX7-NEXT: v_max_f32_e32 v2, v2, v5 1124; GFX7-NEXT: s_setpc_b64 s[30:31] 1125; 1126; GFX8-LABEL: v_maximum_v3f32__nnan_nsz: 1127; GFX8: ; %bb.0: 1128; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1129; GFX8-NEXT: v_max_f32_e32 v0, v0, v3 1130; GFX8-NEXT: v_max_f32_e32 v1, v1, v4 1131; GFX8-NEXT: v_max_f32_e32 v2, v2, v5 1132; GFX8-NEXT: s_setpc_b64 s[30:31] 1133; 1134; GFX900-LABEL: v_maximum_v3f32__nnan_nsz: 1135; GFX900: ; %bb.0: 1136; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1137; GFX900-NEXT: v_max_f32_e32 v0, v0, v3 1138; GFX900-NEXT: v_max_f32_e32 v1, v1, v4 1139; GFX900-NEXT: v_max_f32_e32 v2, v2, v5 1140; GFX900-NEXT: s_setpc_b64 s[30:31] 1141; 1142; GFX950-LABEL: v_maximum_v3f32__nnan_nsz: 1143; GFX950: ; %bb.0: 1144; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1145; GFX950-NEXT: v_maximum3_f32 v0, v0, v3, v3 1146; GFX950-NEXT: v_maximum3_f32 v1, v1, v4, v4 1147; GFX950-NEXT: v_maximum3_f32 v2, v2, v5, v5 1148; GFX950-NEXT: s_setpc_b64 s[30:31] 1149; 1150; GFX10-LABEL: v_maximum_v3f32__nnan_nsz: 1151; GFX10: ; %bb.0: 1152; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1153; GFX10-NEXT: v_max_f32_e32 v0, v0, v3 1154; GFX10-NEXT: v_max_f32_e32 v1, v1, v4 1155; GFX10-NEXT: v_max_f32_e32 v2, v2, v5 1156; GFX10-NEXT: s_setpc_b64 s[30:31] 1157; 1158; GFX11-LABEL: v_maximum_v3f32__nnan_nsz: 1159; GFX11: ; %bb.0: 1160; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1161; GFX11-NEXT: v_dual_max_f32 v0, v0, v3 :: v_dual_max_f32 v1, v1, v4 1162; GFX11-NEXT: v_max_f32_e32 v2, v2, v5 1163; GFX11-NEXT: s_setpc_b64 s[30:31] 1164; 1165; GFX12-LABEL: v_maximum_v3f32__nnan_nsz: 1166; GFX12: ; %bb.0: 1167; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1168; GFX12-NEXT: s_wait_expcnt 0x0 1169; GFX12-NEXT: s_wait_samplecnt 0x0 1170; GFX12-NEXT: s_wait_bvhcnt 0x0 1171; GFX12-NEXT: s_wait_kmcnt 0x0 1172; GFX12-NEXT: v_maximum_f32 v0, v0, v3 1173; GFX12-NEXT: v_maximum_f32 v1, v1, v4 1174; GFX12-NEXT: v_maximum_f32 v2, v2, v5 1175; GFX12-NEXT: s_setpc_b64 s[30:31] 1176 %op = call nnan nsz <3 x float> @llvm.maximum.v3f32(<3 x float> %src0, <3 x float> %src1) 1177 ret <3 x float> %op 1178} 1179 1180define <4 x float> @v_maximum_v4f32(<4 x float> %src0, <4 x float> %src1) { 1181; GFX7-LABEL: v_maximum_v4f32: 1182; GFX7: ; %bb.0: 1183; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1184; GFX7-NEXT: v_max_f32_e32 v8, v0, v4 1185; GFX7-NEXT: v_mov_b32_e32 v9, 0x7fc00000 1186; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v4 1187; GFX7-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc 1188; GFX7-NEXT: v_max_f32_e32 v4, v1, v5 1189; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v5 1190; GFX7-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc 1191; GFX7-NEXT: v_max_f32_e32 v4, v2, v6 1192; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v6 1193; GFX7-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc 1194; GFX7-NEXT: v_max_f32_e32 v4, v3, v7 1195; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v3, v7 1196; GFX7-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc 1197; GFX7-NEXT: s_setpc_b64 s[30:31] 1198; 1199; GFX8-LABEL: v_maximum_v4f32: 1200; GFX8: ; %bb.0: 1201; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1202; GFX8-NEXT: v_max_f32_e32 v8, v0, v4 1203; GFX8-NEXT: v_mov_b32_e32 v9, 0x7fc00000 1204; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v4 1205; GFX8-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc 1206; GFX8-NEXT: v_max_f32_e32 v4, v1, v5 1207; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v1, v5 1208; GFX8-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc 1209; GFX8-NEXT: v_max_f32_e32 v4, v2, v6 1210; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v2, v6 1211; GFX8-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc 1212; GFX8-NEXT: v_max_f32_e32 v4, v3, v7 1213; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v3, v7 1214; GFX8-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc 1215; GFX8-NEXT: s_setpc_b64 s[30:31] 1216; 1217; GFX900-LABEL: v_maximum_v4f32: 1218; GFX900: ; %bb.0: 1219; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1220; GFX900-NEXT: v_max_f32_e32 v8, v0, v4 1221; GFX900-NEXT: v_mov_b32_e32 v9, 0x7fc00000 1222; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v0, v4 1223; GFX900-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc 1224; GFX900-NEXT: v_max_f32_e32 v4, v1, v5 1225; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v1, v5 1226; GFX900-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc 1227; GFX900-NEXT: v_max_f32_e32 v4, v2, v6 1228; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v2, v6 1229; GFX900-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc 1230; GFX900-NEXT: v_max_f32_e32 v4, v3, v7 1231; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v3, v7 1232; GFX900-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc 1233; GFX900-NEXT: s_setpc_b64 s[30:31] 1234; 1235; GFX950-LABEL: v_maximum_v4f32: 1236; GFX950: ; %bb.0: 1237; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1238; GFX950-NEXT: v_maximum3_f32 v0, v0, v4, v4 1239; GFX950-NEXT: v_maximum3_f32 v1, v1, v5, v5 1240; GFX950-NEXT: v_maximum3_f32 v2, v2, v6, v6 1241; GFX950-NEXT: v_maximum3_f32 v3, v3, v7, v7 1242; GFX950-NEXT: s_setpc_b64 s[30:31] 1243; 1244; GFX10-LABEL: v_maximum_v4f32: 1245; GFX10: ; %bb.0: 1246; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1247; GFX10-NEXT: v_max_f32_e32 v8, v0, v4 1248; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v4 1249; GFX10-NEXT: v_max_f32_e32 v9, v1, v5 1250; GFX10-NEXT: v_max_f32_e32 v4, v2, v6 1251; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v8, vcc_lo 1252; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v5 1253; GFX10-NEXT: v_max_f32_e32 v8, v3, v7 1254; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v9, vcc_lo 1255; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v6 1256; GFX10-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v4, vcc_lo 1257; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v3, v7 1258; GFX10-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v8, vcc_lo 1259; GFX10-NEXT: s_setpc_b64 s[30:31] 1260; 1261; GFX11-LABEL: v_maximum_v4f32: 1262; GFX11: ; %bb.0: 1263; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1264; GFX11-NEXT: v_dual_max_f32 v8, v0, v4 :: v_dual_max_f32 v9, v1, v5 1265; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v4 1266; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_3) 1267; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v8, vcc_lo 1268; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v5 1269; GFX11-NEXT: v_max_f32_e32 v4, v2, v6 1270; GFX11-NEXT: v_dual_max_f32 v8, v3, v7 :: v_dual_cndmask_b32 v1, 0x7fc00000, v9 1271; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v6 1272; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v4, vcc_lo 1273; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v3, v7 1274; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) 1275; GFX11-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v8, vcc_lo 1276; GFX11-NEXT: s_setpc_b64 s[30:31] 1277; 1278; GFX12-LABEL: v_maximum_v4f32: 1279; GFX12: ; %bb.0: 1280; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1281; GFX12-NEXT: s_wait_expcnt 0x0 1282; GFX12-NEXT: s_wait_samplecnt 0x0 1283; GFX12-NEXT: s_wait_bvhcnt 0x0 1284; GFX12-NEXT: s_wait_kmcnt 0x0 1285; GFX12-NEXT: v_maximum_f32 v0, v0, v4 1286; GFX12-NEXT: v_maximum_f32 v1, v1, v5 1287; GFX12-NEXT: v_maximum_f32 v2, v2, v6 1288; GFX12-NEXT: v_maximum_f32 v3, v3, v7 1289; GFX12-NEXT: s_setpc_b64 s[30:31] 1290 %op = call <4 x float> @llvm.maximum.v4f32(<4 x float> %src0, <4 x float> %src1) 1291 ret <4 x float> %op 1292} 1293 1294define <4 x float> @v_maximum_v4f32__nnan(<4 x float> %src0, <4 x float> %src1) { 1295; GFX7-LABEL: v_maximum_v4f32__nnan: 1296; GFX7: ; %bb.0: 1297; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1298; GFX7-NEXT: v_max_f32_e32 v0, v0, v4 1299; GFX7-NEXT: v_max_f32_e32 v1, v1, v5 1300; GFX7-NEXT: v_max_f32_e32 v2, v2, v6 1301; GFX7-NEXT: v_max_f32_e32 v3, v3, v7 1302; GFX7-NEXT: s_setpc_b64 s[30:31] 1303; 1304; GFX8-LABEL: v_maximum_v4f32__nnan: 1305; GFX8: ; %bb.0: 1306; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1307; GFX8-NEXT: v_max_f32_e32 v0, v0, v4 1308; GFX8-NEXT: v_max_f32_e32 v1, v1, v5 1309; GFX8-NEXT: v_max_f32_e32 v2, v2, v6 1310; GFX8-NEXT: v_max_f32_e32 v3, v3, v7 1311; GFX8-NEXT: s_setpc_b64 s[30:31] 1312; 1313; GFX900-LABEL: v_maximum_v4f32__nnan: 1314; GFX900: ; %bb.0: 1315; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1316; GFX900-NEXT: v_max_f32_e32 v0, v0, v4 1317; GFX900-NEXT: v_max_f32_e32 v1, v1, v5 1318; GFX900-NEXT: v_max_f32_e32 v2, v2, v6 1319; GFX900-NEXT: v_max_f32_e32 v3, v3, v7 1320; GFX900-NEXT: s_setpc_b64 s[30:31] 1321; 1322; GFX950-LABEL: v_maximum_v4f32__nnan: 1323; GFX950: ; %bb.0: 1324; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1325; GFX950-NEXT: v_maximum3_f32 v0, v0, v4, v4 1326; GFX950-NEXT: v_maximum3_f32 v1, v1, v5, v5 1327; GFX950-NEXT: v_maximum3_f32 v2, v2, v6, v6 1328; GFX950-NEXT: v_maximum3_f32 v3, v3, v7, v7 1329; GFX950-NEXT: s_setpc_b64 s[30:31] 1330; 1331; GFX10-LABEL: v_maximum_v4f32__nnan: 1332; GFX10: ; %bb.0: 1333; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1334; GFX10-NEXT: v_max_f32_e32 v0, v0, v4 1335; GFX10-NEXT: v_max_f32_e32 v1, v1, v5 1336; GFX10-NEXT: v_max_f32_e32 v2, v2, v6 1337; GFX10-NEXT: v_max_f32_e32 v3, v3, v7 1338; GFX10-NEXT: s_setpc_b64 s[30:31] 1339; 1340; GFX11-LABEL: v_maximum_v4f32__nnan: 1341; GFX11: ; %bb.0: 1342; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1343; GFX11-NEXT: v_dual_max_f32 v0, v0, v4 :: v_dual_max_f32 v1, v1, v5 1344; GFX11-NEXT: v_dual_max_f32 v2, v2, v6 :: v_dual_max_f32 v3, v3, v7 1345; GFX11-NEXT: s_setpc_b64 s[30:31] 1346; 1347; GFX12-LABEL: v_maximum_v4f32__nnan: 1348; GFX12: ; %bb.0: 1349; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1350; GFX12-NEXT: s_wait_expcnt 0x0 1351; GFX12-NEXT: s_wait_samplecnt 0x0 1352; GFX12-NEXT: s_wait_bvhcnt 0x0 1353; GFX12-NEXT: s_wait_kmcnt 0x0 1354; GFX12-NEXT: v_maximum_f32 v0, v0, v4 1355; GFX12-NEXT: v_maximum_f32 v1, v1, v5 1356; GFX12-NEXT: v_maximum_f32 v2, v2, v6 1357; GFX12-NEXT: v_maximum_f32 v3, v3, v7 1358; GFX12-NEXT: s_setpc_b64 s[30:31] 1359 %op = call nnan <4 x float> @llvm.maximum.v4f32(<4 x float> %src0, <4 x float> %src1) 1360 ret <4 x float> %op 1361} 1362 1363define <4 x float> @v_maximum_v4f32__nsz(<4 x float> %src0, <4 x float> %src1) { 1364; GFX7-LABEL: v_maximum_v4f32__nsz: 1365; GFX7: ; %bb.0: 1366; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1367; GFX7-NEXT: v_max_f32_e32 v8, v0, v4 1368; GFX7-NEXT: v_mov_b32_e32 v9, 0x7fc00000 1369; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v4 1370; GFX7-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc 1371; GFX7-NEXT: v_max_f32_e32 v4, v1, v5 1372; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v5 1373; GFX7-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc 1374; GFX7-NEXT: v_max_f32_e32 v4, v2, v6 1375; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v6 1376; GFX7-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc 1377; GFX7-NEXT: v_max_f32_e32 v4, v3, v7 1378; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v3, v7 1379; GFX7-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc 1380; GFX7-NEXT: s_setpc_b64 s[30:31] 1381; 1382; GFX8-LABEL: v_maximum_v4f32__nsz: 1383; GFX8: ; %bb.0: 1384; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1385; GFX8-NEXT: v_max_f32_e32 v8, v0, v4 1386; GFX8-NEXT: v_mov_b32_e32 v9, 0x7fc00000 1387; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v4 1388; GFX8-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc 1389; GFX8-NEXT: v_max_f32_e32 v4, v1, v5 1390; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v1, v5 1391; GFX8-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc 1392; GFX8-NEXT: v_max_f32_e32 v4, v2, v6 1393; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v2, v6 1394; GFX8-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc 1395; GFX8-NEXT: v_max_f32_e32 v4, v3, v7 1396; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v3, v7 1397; GFX8-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc 1398; GFX8-NEXT: s_setpc_b64 s[30:31] 1399; 1400; GFX900-LABEL: v_maximum_v4f32__nsz: 1401; GFX900: ; %bb.0: 1402; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1403; GFX900-NEXT: v_max_f32_e32 v8, v0, v4 1404; GFX900-NEXT: v_mov_b32_e32 v9, 0x7fc00000 1405; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v0, v4 1406; GFX900-NEXT: v_cndmask_b32_e32 v0, v9, v8, vcc 1407; GFX900-NEXT: v_max_f32_e32 v4, v1, v5 1408; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v1, v5 1409; GFX900-NEXT: v_cndmask_b32_e32 v1, v9, v4, vcc 1410; GFX900-NEXT: v_max_f32_e32 v4, v2, v6 1411; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v2, v6 1412; GFX900-NEXT: v_cndmask_b32_e32 v2, v9, v4, vcc 1413; GFX900-NEXT: v_max_f32_e32 v4, v3, v7 1414; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v3, v7 1415; GFX900-NEXT: v_cndmask_b32_e32 v3, v9, v4, vcc 1416; GFX900-NEXT: s_setpc_b64 s[30:31] 1417; 1418; GFX950-LABEL: v_maximum_v4f32__nsz: 1419; GFX950: ; %bb.0: 1420; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1421; GFX950-NEXT: v_maximum3_f32 v0, v0, v4, v4 1422; GFX950-NEXT: v_maximum3_f32 v1, v1, v5, v5 1423; GFX950-NEXT: v_maximum3_f32 v2, v2, v6, v6 1424; GFX950-NEXT: v_maximum3_f32 v3, v3, v7, v7 1425; GFX950-NEXT: s_setpc_b64 s[30:31] 1426; 1427; GFX10-LABEL: v_maximum_v4f32__nsz: 1428; GFX10: ; %bb.0: 1429; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1430; GFX10-NEXT: v_max_f32_e32 v8, v0, v4 1431; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v4 1432; GFX10-NEXT: v_max_f32_e32 v9, v1, v5 1433; GFX10-NEXT: v_max_f32_e32 v4, v2, v6 1434; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v8, vcc_lo 1435; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v5 1436; GFX10-NEXT: v_max_f32_e32 v8, v3, v7 1437; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v9, vcc_lo 1438; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v6 1439; GFX10-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v4, vcc_lo 1440; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v3, v7 1441; GFX10-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v8, vcc_lo 1442; GFX10-NEXT: s_setpc_b64 s[30:31] 1443; 1444; GFX11-LABEL: v_maximum_v4f32__nsz: 1445; GFX11: ; %bb.0: 1446; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1447; GFX11-NEXT: v_dual_max_f32 v8, v0, v4 :: v_dual_max_f32 v9, v1, v5 1448; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v4 1449; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_3) 1450; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v8, vcc_lo 1451; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v5 1452; GFX11-NEXT: v_max_f32_e32 v4, v2, v6 1453; GFX11-NEXT: v_dual_max_f32 v8, v3, v7 :: v_dual_cndmask_b32 v1, 0x7fc00000, v9 1454; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v6 1455; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v4, vcc_lo 1456; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v3, v7 1457; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) 1458; GFX11-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v8, vcc_lo 1459; GFX11-NEXT: s_setpc_b64 s[30:31] 1460; 1461; GFX12-LABEL: v_maximum_v4f32__nsz: 1462; GFX12: ; %bb.0: 1463; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1464; GFX12-NEXT: s_wait_expcnt 0x0 1465; GFX12-NEXT: s_wait_samplecnt 0x0 1466; GFX12-NEXT: s_wait_bvhcnt 0x0 1467; GFX12-NEXT: s_wait_kmcnt 0x0 1468; GFX12-NEXT: v_maximum_f32 v0, v0, v4 1469; GFX12-NEXT: v_maximum_f32 v1, v1, v5 1470; GFX12-NEXT: v_maximum_f32 v2, v2, v6 1471; GFX12-NEXT: v_maximum_f32 v3, v3, v7 1472; GFX12-NEXT: s_setpc_b64 s[30:31] 1473 %op = call nsz <4 x float> @llvm.maximum.v4f32(<4 x float> %src0, <4 x float> %src1) 1474 ret <4 x float> %op 1475} 1476 1477define <4 x float> @v_maximum_v4f32__nnan_nsz(<4 x float> %src0, <4 x float> %src1) { 1478; GFX7-LABEL: v_maximum_v4f32__nnan_nsz: 1479; GFX7: ; %bb.0: 1480; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1481; GFX7-NEXT: v_max_f32_e32 v0, v0, v4 1482; GFX7-NEXT: v_max_f32_e32 v1, v1, v5 1483; GFX7-NEXT: v_max_f32_e32 v2, v2, v6 1484; GFX7-NEXT: v_max_f32_e32 v3, v3, v7 1485; GFX7-NEXT: s_setpc_b64 s[30:31] 1486; 1487; GFX8-LABEL: v_maximum_v4f32__nnan_nsz: 1488; GFX8: ; %bb.0: 1489; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1490; GFX8-NEXT: v_max_f32_e32 v0, v0, v4 1491; GFX8-NEXT: v_max_f32_e32 v1, v1, v5 1492; GFX8-NEXT: v_max_f32_e32 v2, v2, v6 1493; GFX8-NEXT: v_max_f32_e32 v3, v3, v7 1494; GFX8-NEXT: s_setpc_b64 s[30:31] 1495; 1496; GFX900-LABEL: v_maximum_v4f32__nnan_nsz: 1497; GFX900: ; %bb.0: 1498; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1499; GFX900-NEXT: v_max_f32_e32 v0, v0, v4 1500; GFX900-NEXT: v_max_f32_e32 v1, v1, v5 1501; GFX900-NEXT: v_max_f32_e32 v2, v2, v6 1502; GFX900-NEXT: v_max_f32_e32 v3, v3, v7 1503; GFX900-NEXT: s_setpc_b64 s[30:31] 1504; 1505; GFX950-LABEL: v_maximum_v4f32__nnan_nsz: 1506; GFX950: ; %bb.0: 1507; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1508; GFX950-NEXT: v_maximum3_f32 v0, v0, v4, v4 1509; GFX950-NEXT: v_maximum3_f32 v1, v1, v5, v5 1510; GFX950-NEXT: v_maximum3_f32 v2, v2, v6, v6 1511; GFX950-NEXT: v_maximum3_f32 v3, v3, v7, v7 1512; GFX950-NEXT: s_setpc_b64 s[30:31] 1513; 1514; GFX10-LABEL: v_maximum_v4f32__nnan_nsz: 1515; GFX10: ; %bb.0: 1516; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1517; GFX10-NEXT: v_max_f32_e32 v0, v0, v4 1518; GFX10-NEXT: v_max_f32_e32 v1, v1, v5 1519; GFX10-NEXT: v_max_f32_e32 v2, v2, v6 1520; GFX10-NEXT: v_max_f32_e32 v3, v3, v7 1521; GFX10-NEXT: s_setpc_b64 s[30:31] 1522; 1523; GFX11-LABEL: v_maximum_v4f32__nnan_nsz: 1524; GFX11: ; %bb.0: 1525; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1526; GFX11-NEXT: v_dual_max_f32 v0, v0, v4 :: v_dual_max_f32 v1, v1, v5 1527; GFX11-NEXT: v_dual_max_f32 v2, v2, v6 :: v_dual_max_f32 v3, v3, v7 1528; GFX11-NEXT: s_setpc_b64 s[30:31] 1529; 1530; GFX12-LABEL: v_maximum_v4f32__nnan_nsz: 1531; GFX12: ; %bb.0: 1532; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1533; GFX12-NEXT: s_wait_expcnt 0x0 1534; GFX12-NEXT: s_wait_samplecnt 0x0 1535; GFX12-NEXT: s_wait_bvhcnt 0x0 1536; GFX12-NEXT: s_wait_kmcnt 0x0 1537; GFX12-NEXT: v_maximum_f32 v0, v0, v4 1538; GFX12-NEXT: v_maximum_f32 v1, v1, v5 1539; GFX12-NEXT: v_maximum_f32 v2, v2, v6 1540; GFX12-NEXT: v_maximum_f32 v3, v3, v7 1541; GFX12-NEXT: s_setpc_b64 s[30:31] 1542 %op = call nnan nsz <4 x float> @llvm.maximum.v4f32(<4 x float> %src0, <4 x float> %src1) 1543 ret <4 x float> %op 1544} 1545 1546define <8 x float> @v_maximum_v8f32(<8 x float> %src0, <8 x float> %src1) { 1547; GFX7-LABEL: v_maximum_v8f32: 1548; GFX7: ; %bb.0: 1549; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1550; GFX7-NEXT: v_max_f32_e32 v16, v0, v8 1551; GFX7-NEXT: v_mov_b32_e32 v17, 0x7fc00000 1552; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v0, v8 1553; GFX7-NEXT: v_cndmask_b32_e32 v0, v17, v16, vcc 1554; GFX7-NEXT: v_max_f32_e32 v8, v1, v9 1555; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v9 1556; GFX7-NEXT: v_cndmask_b32_e32 v1, v17, v8, vcc 1557; GFX7-NEXT: v_max_f32_e32 v8, v2, v10 1558; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v2, v10 1559; GFX7-NEXT: v_cndmask_b32_e32 v2, v17, v8, vcc 1560; GFX7-NEXT: v_max_f32_e32 v8, v3, v11 1561; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v3, v11 1562; GFX7-NEXT: v_cndmask_b32_e32 v3, v17, v8, vcc 1563; GFX7-NEXT: v_max_f32_e32 v8, v4, v12 1564; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v4, v12 1565; GFX7-NEXT: v_cndmask_b32_e32 v4, v17, v8, vcc 1566; GFX7-NEXT: v_max_f32_e32 v8, v5, v13 1567; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v5, v13 1568; GFX7-NEXT: v_cndmask_b32_e32 v5, v17, v8, vcc 1569; GFX7-NEXT: v_max_f32_e32 v8, v6, v14 1570; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v6, v14 1571; GFX7-NEXT: v_cndmask_b32_e32 v6, v17, v8, vcc 1572; GFX7-NEXT: v_max_f32_e32 v8, v7, v15 1573; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v7, v15 1574; GFX7-NEXT: v_cndmask_b32_e32 v7, v17, v8, vcc 1575; GFX7-NEXT: s_setpc_b64 s[30:31] 1576; 1577; GFX8-LABEL: v_maximum_v8f32: 1578; GFX8: ; %bb.0: 1579; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1580; GFX8-NEXT: v_max_f32_e32 v16, v0, v8 1581; GFX8-NEXT: v_mov_b32_e32 v17, 0x7fc00000 1582; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v0, v8 1583; GFX8-NEXT: v_cndmask_b32_e32 v0, v17, v16, vcc 1584; GFX8-NEXT: v_max_f32_e32 v8, v1, v9 1585; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v1, v9 1586; GFX8-NEXT: v_cndmask_b32_e32 v1, v17, v8, vcc 1587; GFX8-NEXT: v_max_f32_e32 v8, v2, v10 1588; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v2, v10 1589; GFX8-NEXT: v_cndmask_b32_e32 v2, v17, v8, vcc 1590; GFX8-NEXT: v_max_f32_e32 v8, v3, v11 1591; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v3, v11 1592; GFX8-NEXT: v_cndmask_b32_e32 v3, v17, v8, vcc 1593; GFX8-NEXT: v_max_f32_e32 v8, v4, v12 1594; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v4, v12 1595; GFX8-NEXT: v_cndmask_b32_e32 v4, v17, v8, vcc 1596; GFX8-NEXT: v_max_f32_e32 v8, v5, v13 1597; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v5, v13 1598; GFX8-NEXT: v_cndmask_b32_e32 v5, v17, v8, vcc 1599; GFX8-NEXT: v_max_f32_e32 v8, v6, v14 1600; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v6, v14 1601; GFX8-NEXT: v_cndmask_b32_e32 v6, v17, v8, vcc 1602; GFX8-NEXT: v_max_f32_e32 v8, v7, v15 1603; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v7, v15 1604; GFX8-NEXT: v_cndmask_b32_e32 v7, v17, v8, vcc 1605; GFX8-NEXT: s_setpc_b64 s[30:31] 1606; 1607; GFX900-LABEL: v_maximum_v8f32: 1608; GFX900: ; %bb.0: 1609; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1610; GFX900-NEXT: v_max_f32_e32 v16, v0, v8 1611; GFX900-NEXT: v_mov_b32_e32 v17, 0x7fc00000 1612; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v0, v8 1613; GFX900-NEXT: v_cndmask_b32_e32 v0, v17, v16, vcc 1614; GFX900-NEXT: v_max_f32_e32 v8, v1, v9 1615; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v1, v9 1616; GFX900-NEXT: v_cndmask_b32_e32 v1, v17, v8, vcc 1617; GFX900-NEXT: v_max_f32_e32 v8, v2, v10 1618; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v2, v10 1619; GFX900-NEXT: v_cndmask_b32_e32 v2, v17, v8, vcc 1620; GFX900-NEXT: v_max_f32_e32 v8, v3, v11 1621; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v3, v11 1622; GFX900-NEXT: v_cndmask_b32_e32 v3, v17, v8, vcc 1623; GFX900-NEXT: v_max_f32_e32 v8, v4, v12 1624; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v4, v12 1625; GFX900-NEXT: v_cndmask_b32_e32 v4, v17, v8, vcc 1626; GFX900-NEXT: v_max_f32_e32 v8, v5, v13 1627; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v5, v13 1628; GFX900-NEXT: v_cndmask_b32_e32 v5, v17, v8, vcc 1629; GFX900-NEXT: v_max_f32_e32 v8, v6, v14 1630; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v6, v14 1631; GFX900-NEXT: v_cndmask_b32_e32 v6, v17, v8, vcc 1632; GFX900-NEXT: v_max_f32_e32 v8, v7, v15 1633; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v7, v15 1634; GFX900-NEXT: v_cndmask_b32_e32 v7, v17, v8, vcc 1635; GFX900-NEXT: s_setpc_b64 s[30:31] 1636; 1637; GFX950-LABEL: v_maximum_v8f32: 1638; GFX950: ; %bb.0: 1639; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1640; GFX950-NEXT: v_maximum3_f32 v0, v0, v8, v8 1641; GFX950-NEXT: v_maximum3_f32 v1, v1, v9, v9 1642; GFX950-NEXT: v_maximum3_f32 v2, v2, v10, v10 1643; GFX950-NEXT: v_maximum3_f32 v3, v3, v11, v11 1644; GFX950-NEXT: v_maximum3_f32 v4, v4, v12, v12 1645; GFX950-NEXT: v_maximum3_f32 v5, v5, v13, v13 1646; GFX950-NEXT: v_maximum3_f32 v6, v6, v14, v14 1647; GFX950-NEXT: v_maximum3_f32 v7, v7, v15, v15 1648; GFX950-NEXT: s_setpc_b64 s[30:31] 1649; 1650; GFX10-LABEL: v_maximum_v8f32: 1651; GFX10: ; %bb.0: 1652; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1653; GFX10-NEXT: v_max_f32_e32 v16, v0, v8 1654; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v8 1655; GFX10-NEXT: v_max_f32_e32 v17, v1, v9 1656; GFX10-NEXT: v_max_f32_e32 v8, v2, v10 1657; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v16, vcc_lo 1658; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v9 1659; GFX10-NEXT: v_max_f32_e32 v9, v3, v11 1660; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v17, vcc_lo 1661; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v10 1662; GFX10-NEXT: v_max_f32_e32 v10, v7, v15 1663; GFX10-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo 1664; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v3, v11 1665; GFX10-NEXT: v_max_f32_e32 v8, v4, v12 1666; GFX10-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v9, vcc_lo 1667; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v4, v12 1668; GFX10-NEXT: v_max_f32_e32 v9, v5, v13 1669; GFX10-NEXT: v_cndmask_b32_e32 v4, 0x7fc00000, v8, vcc_lo 1670; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v5, v13 1671; GFX10-NEXT: v_max_f32_e32 v8, v6, v14 1672; GFX10-NEXT: v_cndmask_b32_e32 v5, 0x7fc00000, v9, vcc_lo 1673; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v6, v14 1674; GFX10-NEXT: v_cndmask_b32_e32 v6, 0x7fc00000, v8, vcc_lo 1675; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v7, v15 1676; GFX10-NEXT: v_cndmask_b32_e32 v7, 0x7fc00000, v10, vcc_lo 1677; GFX10-NEXT: s_setpc_b64 s[30:31] 1678; 1679; GFX11-LABEL: v_maximum_v8f32: 1680; GFX11: ; %bb.0: 1681; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1682; GFX11-NEXT: v_dual_max_f32 v16, v0, v8 :: v_dual_max_f32 v17, v1, v9 1683; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v8 1684; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 1685; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v16, vcc_lo 1686; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v9 1687; GFX11-NEXT: v_dual_max_f32 v9, v3, v11 :: v_dual_max_f32 v8, v2, v10 1688; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v17, vcc_lo 1689; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v10 1690; GFX11-NEXT: v_max_f32_e32 v10, v7, v15 1691; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_2) 1692; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v8, vcc_lo 1693; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v3, v11 1694; GFX11-NEXT: v_dual_max_f32 v8, v4, v12 :: v_dual_cndmask_b32 v3, 0x7fc00000, v9 1695; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v4, v12 1696; GFX11-NEXT: v_dual_max_f32 v9, v5, v13 :: v_dual_cndmask_b32 v4, 0x7fc00000, v8 1697; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v5, v13 1698; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) 1699; GFX11-NEXT: v_dual_max_f32 v8, v6, v14 :: v_dual_cndmask_b32 v5, 0x7fc00000, v9 1700; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v6, v14 1701; GFX11-NEXT: v_cndmask_b32_e32 v6, 0x7fc00000, v8, vcc_lo 1702; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v7, v15 1703; GFX11-NEXT: v_cndmask_b32_e32 v7, 0x7fc00000, v10, vcc_lo 1704; GFX11-NEXT: s_setpc_b64 s[30:31] 1705; 1706; GFX12-LABEL: v_maximum_v8f32: 1707; GFX12: ; %bb.0: 1708; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1709; GFX12-NEXT: s_wait_expcnt 0x0 1710; GFX12-NEXT: s_wait_samplecnt 0x0 1711; GFX12-NEXT: s_wait_bvhcnt 0x0 1712; GFX12-NEXT: s_wait_kmcnt 0x0 1713; GFX12-NEXT: v_maximum_f32 v0, v0, v8 1714; GFX12-NEXT: v_maximum_f32 v1, v1, v9 1715; GFX12-NEXT: v_maximum_f32 v2, v2, v10 1716; GFX12-NEXT: v_maximum_f32 v3, v3, v11 1717; GFX12-NEXT: v_maximum_f32 v4, v4, v12 1718; GFX12-NEXT: v_maximum_f32 v5, v5, v13 1719; GFX12-NEXT: v_maximum_f32 v6, v6, v14 1720; GFX12-NEXT: v_maximum_f32 v7, v7, v15 1721; GFX12-NEXT: s_setpc_b64 s[30:31] 1722 %op = call <8 x float> @llvm.maximum.v8f32(<8 x float> %src0, <8 x float> %src1) 1723 ret <8 x float> %op 1724} 1725 1726define <16 x float> @v_maximum_v16f32(<16 x float> %src0, <16 x float> %src1) { 1727; GFX7-LABEL: v_maximum_v16f32: 1728; GFX7: ; %bb.0: 1729; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1730; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1 1731; GFX7-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill 1732; GFX7-NEXT: s_mov_b64 exec, s[4:5] 1733; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v1, v17 1734; GFX7-NEXT: v_max_f32_e32 v1, v1, v17 1735; GFX7-NEXT: buffer_load_dword v17, off, s[0:3], s32 1736; GFX7-NEXT: v_writelane_b32 v31, s30, 0 1737; GFX7-NEXT: v_writelane_b32 v31, s31, 1 1738; GFX7-NEXT: v_cmp_o_f32_e64 s[4:5], v2, v18 1739; GFX7-NEXT: v_max_f32_e32 v2, v2, v18 1740; GFX7-NEXT: v_cmp_o_f32_e64 s[6:7], v3, v19 1741; GFX7-NEXT: v_max_f32_e32 v3, v3, v19 1742; GFX7-NEXT: v_mov_b32_e32 v18, 0x7fc00000 1743; GFX7-NEXT: v_max_f32_e32 v19, v0, v16 1744; GFX7-NEXT: v_cmp_o_f32_e64 s[28:29], v0, v16 1745; GFX7-NEXT: v_max_f32_e32 v16, v14, v30 1746; GFX7-NEXT: v_cmp_o_f32_e64 s[30:31], v14, v30 1747; GFX7-NEXT: v_cmp_o_f32_e64 s[8:9], v4, v20 1748; GFX7-NEXT: v_max_f32_e32 v4, v4, v20 1749; GFX7-NEXT: v_cmp_o_f32_e64 s[10:11], v5, v21 1750; GFX7-NEXT: v_max_f32_e32 v5, v5, v21 1751; GFX7-NEXT: v_cmp_o_f32_e64 s[12:13], v6, v22 1752; GFX7-NEXT: v_max_f32_e32 v6, v6, v22 1753; GFX7-NEXT: v_cmp_o_f32_e64 s[14:15], v7, v23 1754; GFX7-NEXT: v_max_f32_e32 v7, v7, v23 1755; GFX7-NEXT: v_cmp_o_f32_e64 s[16:17], v8, v24 1756; GFX7-NEXT: v_max_f32_e32 v8, v8, v24 1757; GFX7-NEXT: v_cmp_o_f32_e64 s[18:19], v9, v25 1758; GFX7-NEXT: v_max_f32_e32 v9, v9, v25 1759; GFX7-NEXT: v_cmp_o_f32_e64 s[20:21], v10, v26 1760; GFX7-NEXT: v_max_f32_e32 v10, v10, v26 1761; GFX7-NEXT: v_cmp_o_f32_e64 s[22:23], v11, v27 1762; GFX7-NEXT: v_max_f32_e32 v11, v11, v27 1763; GFX7-NEXT: v_cmp_o_f32_e64 s[24:25], v12, v28 1764; GFX7-NEXT: v_max_f32_e32 v12, v12, v28 1765; GFX7-NEXT: v_cmp_o_f32_e64 s[26:27], v13, v29 1766; GFX7-NEXT: v_max_f32_e32 v13, v13, v29 1767; GFX7-NEXT: v_cndmask_b32_e32 v1, v18, v1, vcc 1768; GFX7-NEXT: v_cndmask_b32_e64 v14, v18, v16, s[30:31] 1769; GFX7-NEXT: v_cndmask_b32_e64 v0, v18, v19, s[28:29] 1770; GFX7-NEXT: v_cndmask_b32_e64 v2, v18, v2, s[4:5] 1771; GFX7-NEXT: v_cndmask_b32_e64 v3, v18, v3, s[6:7] 1772; GFX7-NEXT: v_cndmask_b32_e64 v4, v18, v4, s[8:9] 1773; GFX7-NEXT: v_cndmask_b32_e64 v5, v18, v5, s[10:11] 1774; GFX7-NEXT: v_cndmask_b32_e64 v6, v18, v6, s[12:13] 1775; GFX7-NEXT: v_cndmask_b32_e64 v7, v18, v7, s[14:15] 1776; GFX7-NEXT: v_cndmask_b32_e64 v8, v18, v8, s[16:17] 1777; GFX7-NEXT: v_cndmask_b32_e64 v9, v18, v9, s[18:19] 1778; GFX7-NEXT: v_cndmask_b32_e64 v10, v18, v10, s[20:21] 1779; GFX7-NEXT: v_cndmask_b32_e64 v11, v18, v11, s[22:23] 1780; GFX7-NEXT: v_cndmask_b32_e64 v12, v18, v12, s[24:25] 1781; GFX7-NEXT: v_cndmask_b32_e64 v13, v18, v13, s[26:27] 1782; GFX7-NEXT: v_readlane_b32 s31, v31, 1 1783; GFX7-NEXT: v_readlane_b32 s30, v31, 0 1784; GFX7-NEXT: s_waitcnt vmcnt(0) 1785; GFX7-NEXT: v_max_f32_e32 v16, v15, v17 1786; GFX7-NEXT: v_cmp_o_f32_e32 vcc, v15, v17 1787; GFX7-NEXT: v_cndmask_b32_e32 v15, v18, v16, vcc 1788; GFX7-NEXT: s_xor_saveexec_b64 s[4:5], -1 1789; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload 1790; GFX7-NEXT: s_mov_b64 exec, s[4:5] 1791; GFX7-NEXT: s_waitcnt vmcnt(0) 1792; GFX7-NEXT: s_setpc_b64 s[30:31] 1793; 1794; GFX8-LABEL: v_maximum_v16f32: 1795; GFX8: ; %bb.0: 1796; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1797; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 1798; GFX8-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill 1799; GFX8-NEXT: s_mov_b64 exec, s[4:5] 1800; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v1, v17 1801; GFX8-NEXT: v_max_f32_e32 v1, v1, v17 1802; GFX8-NEXT: buffer_load_dword v17, off, s[0:3], s32 1803; GFX8-NEXT: v_writelane_b32 v31, s30, 0 1804; GFX8-NEXT: v_writelane_b32 v31, s31, 1 1805; GFX8-NEXT: v_cmp_o_f32_e64 s[4:5], v2, v18 1806; GFX8-NEXT: v_max_f32_e32 v2, v2, v18 1807; GFX8-NEXT: v_cmp_o_f32_e64 s[6:7], v3, v19 1808; GFX8-NEXT: v_max_f32_e32 v3, v3, v19 1809; GFX8-NEXT: v_mov_b32_e32 v18, 0x7fc00000 1810; GFX8-NEXT: v_max_f32_e32 v19, v0, v16 1811; GFX8-NEXT: v_cmp_o_f32_e64 s[28:29], v0, v16 1812; GFX8-NEXT: v_max_f32_e32 v16, v14, v30 1813; GFX8-NEXT: v_cmp_o_f32_e64 s[30:31], v14, v30 1814; GFX8-NEXT: v_cmp_o_f32_e64 s[8:9], v4, v20 1815; GFX8-NEXT: v_max_f32_e32 v4, v4, v20 1816; GFX8-NEXT: v_cmp_o_f32_e64 s[10:11], v5, v21 1817; GFX8-NEXT: v_max_f32_e32 v5, v5, v21 1818; GFX8-NEXT: v_cmp_o_f32_e64 s[12:13], v6, v22 1819; GFX8-NEXT: v_max_f32_e32 v6, v6, v22 1820; GFX8-NEXT: v_cmp_o_f32_e64 s[14:15], v7, v23 1821; GFX8-NEXT: v_max_f32_e32 v7, v7, v23 1822; GFX8-NEXT: v_cmp_o_f32_e64 s[16:17], v8, v24 1823; GFX8-NEXT: v_max_f32_e32 v8, v8, v24 1824; GFX8-NEXT: v_cmp_o_f32_e64 s[18:19], v9, v25 1825; GFX8-NEXT: v_max_f32_e32 v9, v9, v25 1826; GFX8-NEXT: v_cmp_o_f32_e64 s[20:21], v10, v26 1827; GFX8-NEXT: v_max_f32_e32 v10, v10, v26 1828; GFX8-NEXT: v_cmp_o_f32_e64 s[22:23], v11, v27 1829; GFX8-NEXT: v_max_f32_e32 v11, v11, v27 1830; GFX8-NEXT: v_cmp_o_f32_e64 s[24:25], v12, v28 1831; GFX8-NEXT: v_max_f32_e32 v12, v12, v28 1832; GFX8-NEXT: v_cmp_o_f32_e64 s[26:27], v13, v29 1833; GFX8-NEXT: v_max_f32_e32 v13, v13, v29 1834; GFX8-NEXT: v_cndmask_b32_e32 v1, v18, v1, vcc 1835; GFX8-NEXT: v_cndmask_b32_e64 v14, v18, v16, s[30:31] 1836; GFX8-NEXT: v_cndmask_b32_e64 v0, v18, v19, s[28:29] 1837; GFX8-NEXT: v_cndmask_b32_e64 v2, v18, v2, s[4:5] 1838; GFX8-NEXT: v_cndmask_b32_e64 v3, v18, v3, s[6:7] 1839; GFX8-NEXT: v_cndmask_b32_e64 v4, v18, v4, s[8:9] 1840; GFX8-NEXT: v_cndmask_b32_e64 v5, v18, v5, s[10:11] 1841; GFX8-NEXT: v_cndmask_b32_e64 v6, v18, v6, s[12:13] 1842; GFX8-NEXT: v_cndmask_b32_e64 v7, v18, v7, s[14:15] 1843; GFX8-NEXT: v_cndmask_b32_e64 v8, v18, v8, s[16:17] 1844; GFX8-NEXT: v_cndmask_b32_e64 v9, v18, v9, s[18:19] 1845; GFX8-NEXT: v_cndmask_b32_e64 v10, v18, v10, s[20:21] 1846; GFX8-NEXT: v_cndmask_b32_e64 v11, v18, v11, s[22:23] 1847; GFX8-NEXT: v_cndmask_b32_e64 v12, v18, v12, s[24:25] 1848; GFX8-NEXT: v_cndmask_b32_e64 v13, v18, v13, s[26:27] 1849; GFX8-NEXT: v_readlane_b32 s31, v31, 1 1850; GFX8-NEXT: v_readlane_b32 s30, v31, 0 1851; GFX8-NEXT: s_waitcnt vmcnt(0) 1852; GFX8-NEXT: v_max_f32_e32 v16, v15, v17 1853; GFX8-NEXT: v_cmp_o_f32_e32 vcc, v15, v17 1854; GFX8-NEXT: v_cndmask_b32_e32 v15, v18, v16, vcc 1855; GFX8-NEXT: s_xor_saveexec_b64 s[4:5], -1 1856; GFX8-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload 1857; GFX8-NEXT: s_mov_b64 exec, s[4:5] 1858; GFX8-NEXT: s_waitcnt vmcnt(0) 1859; GFX8-NEXT: s_setpc_b64 s[30:31] 1860; 1861; GFX900-LABEL: v_maximum_v16f32: 1862; GFX900: ; %bb.0: 1863; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1864; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 1865; GFX900-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill 1866; GFX900-NEXT: s_mov_b64 exec, s[4:5] 1867; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v1, v17 1868; GFX900-NEXT: v_max_f32_e32 v1, v1, v17 1869; GFX900-NEXT: buffer_load_dword v17, off, s[0:3], s32 1870; GFX900-NEXT: v_writelane_b32 v31, s30, 0 1871; GFX900-NEXT: v_writelane_b32 v31, s31, 1 1872; GFX900-NEXT: v_cmp_o_f32_e64 s[4:5], v2, v18 1873; GFX900-NEXT: v_max_f32_e32 v2, v2, v18 1874; GFX900-NEXT: v_cmp_o_f32_e64 s[6:7], v3, v19 1875; GFX900-NEXT: v_max_f32_e32 v3, v3, v19 1876; GFX900-NEXT: v_mov_b32_e32 v18, 0x7fc00000 1877; GFX900-NEXT: v_max_f32_e32 v19, v0, v16 1878; GFX900-NEXT: v_cmp_o_f32_e64 s[28:29], v0, v16 1879; GFX900-NEXT: v_max_f32_e32 v16, v14, v30 1880; GFX900-NEXT: v_cmp_o_f32_e64 s[30:31], v14, v30 1881; GFX900-NEXT: v_cmp_o_f32_e64 s[8:9], v4, v20 1882; GFX900-NEXT: v_max_f32_e32 v4, v4, v20 1883; GFX900-NEXT: v_cmp_o_f32_e64 s[10:11], v5, v21 1884; GFX900-NEXT: v_max_f32_e32 v5, v5, v21 1885; GFX900-NEXT: v_cmp_o_f32_e64 s[12:13], v6, v22 1886; GFX900-NEXT: v_max_f32_e32 v6, v6, v22 1887; GFX900-NEXT: v_cmp_o_f32_e64 s[14:15], v7, v23 1888; GFX900-NEXT: v_max_f32_e32 v7, v7, v23 1889; GFX900-NEXT: v_cmp_o_f32_e64 s[16:17], v8, v24 1890; GFX900-NEXT: v_max_f32_e32 v8, v8, v24 1891; GFX900-NEXT: v_cmp_o_f32_e64 s[18:19], v9, v25 1892; GFX900-NEXT: v_max_f32_e32 v9, v9, v25 1893; GFX900-NEXT: v_cmp_o_f32_e64 s[20:21], v10, v26 1894; GFX900-NEXT: v_max_f32_e32 v10, v10, v26 1895; GFX900-NEXT: v_cmp_o_f32_e64 s[22:23], v11, v27 1896; GFX900-NEXT: v_max_f32_e32 v11, v11, v27 1897; GFX900-NEXT: v_cmp_o_f32_e64 s[24:25], v12, v28 1898; GFX900-NEXT: v_max_f32_e32 v12, v12, v28 1899; GFX900-NEXT: v_cmp_o_f32_e64 s[26:27], v13, v29 1900; GFX900-NEXT: v_max_f32_e32 v13, v13, v29 1901; GFX900-NEXT: v_cndmask_b32_e32 v1, v18, v1, vcc 1902; GFX900-NEXT: v_cndmask_b32_e64 v14, v18, v16, s[30:31] 1903; GFX900-NEXT: v_cndmask_b32_e64 v0, v18, v19, s[28:29] 1904; GFX900-NEXT: v_cndmask_b32_e64 v2, v18, v2, s[4:5] 1905; GFX900-NEXT: v_cndmask_b32_e64 v3, v18, v3, s[6:7] 1906; GFX900-NEXT: v_cndmask_b32_e64 v4, v18, v4, s[8:9] 1907; GFX900-NEXT: v_cndmask_b32_e64 v5, v18, v5, s[10:11] 1908; GFX900-NEXT: v_cndmask_b32_e64 v6, v18, v6, s[12:13] 1909; GFX900-NEXT: v_cndmask_b32_e64 v7, v18, v7, s[14:15] 1910; GFX900-NEXT: v_cndmask_b32_e64 v8, v18, v8, s[16:17] 1911; GFX900-NEXT: v_cndmask_b32_e64 v9, v18, v9, s[18:19] 1912; GFX900-NEXT: v_cndmask_b32_e64 v10, v18, v10, s[20:21] 1913; GFX900-NEXT: v_cndmask_b32_e64 v11, v18, v11, s[22:23] 1914; GFX900-NEXT: v_cndmask_b32_e64 v12, v18, v12, s[24:25] 1915; GFX900-NEXT: v_cndmask_b32_e64 v13, v18, v13, s[26:27] 1916; GFX900-NEXT: v_readlane_b32 s31, v31, 1 1917; GFX900-NEXT: v_readlane_b32 s30, v31, 0 1918; GFX900-NEXT: s_waitcnt vmcnt(0) 1919; GFX900-NEXT: v_max_f32_e32 v16, v15, v17 1920; GFX900-NEXT: v_cmp_o_f32_e32 vcc, v15, v17 1921; GFX900-NEXT: v_cndmask_b32_e32 v15, v18, v16, vcc 1922; GFX900-NEXT: s_xor_saveexec_b64 s[4:5], -1 1923; GFX900-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload 1924; GFX900-NEXT: s_mov_b64 exec, s[4:5] 1925; GFX900-NEXT: s_waitcnt vmcnt(0) 1926; GFX900-NEXT: s_setpc_b64 s[30:31] 1927; 1928; GFX950-LABEL: v_maximum_v16f32: 1929; GFX950: ; %bb.0: 1930; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1931; GFX950-NEXT: scratch_load_dword v31, off, s32 1932; GFX950-NEXT: v_maximum3_f32 v0, v0, v16, v16 1933; GFX950-NEXT: v_maximum3_f32 v1, v1, v17, v17 1934; GFX950-NEXT: v_maximum3_f32 v2, v2, v18, v18 1935; GFX950-NEXT: v_maximum3_f32 v3, v3, v19, v19 1936; GFX950-NEXT: v_maximum3_f32 v4, v4, v20, v20 1937; GFX950-NEXT: v_maximum3_f32 v5, v5, v21, v21 1938; GFX950-NEXT: v_maximum3_f32 v6, v6, v22, v22 1939; GFX950-NEXT: v_maximum3_f32 v7, v7, v23, v23 1940; GFX950-NEXT: v_maximum3_f32 v8, v8, v24, v24 1941; GFX950-NEXT: v_maximum3_f32 v9, v9, v25, v25 1942; GFX950-NEXT: v_maximum3_f32 v10, v10, v26, v26 1943; GFX950-NEXT: v_maximum3_f32 v11, v11, v27, v27 1944; GFX950-NEXT: v_maximum3_f32 v12, v12, v28, v28 1945; GFX950-NEXT: v_maximum3_f32 v13, v13, v29, v29 1946; GFX950-NEXT: v_maximum3_f32 v14, v14, v30, v30 1947; GFX950-NEXT: s_waitcnt vmcnt(0) 1948; GFX950-NEXT: v_maximum3_f32 v15, v15, v31, v31 1949; GFX950-NEXT: s_setpc_b64 s[30:31] 1950; 1951; GFX10-LABEL: v_maximum_v16f32: 1952; GFX10: ; %bb.0: 1953; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1954; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32 1955; GFX10-NEXT: v_max_f32_e32 v32, v0, v16 1956; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v16 1957; GFX10-NEXT: v_max_f32_e32 v33, v1, v17 1958; GFX10-NEXT: v_max_f32_e32 v34, v2, v18 1959; GFX10-NEXT: v_max_f32_e32 v35, v3, v19 1960; GFX10-NEXT: v_max_f32_e32 v36, v4, v20 1961; GFX10-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v32, vcc_lo 1962; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v17 1963; GFX10-NEXT: v_max_f32_e32 v37, v5, v21 1964; GFX10-NEXT: v_max_f32_e32 v38, v6, v22 1965; GFX10-NEXT: v_max_f32_e32 v39, v7, v23 1966; GFX10-NEXT: v_max_f32_e32 v48, v8, v24 1967; GFX10-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v33, vcc_lo 1968; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v18 1969; GFX10-NEXT: v_max_f32_e32 v49, v9, v25 1970; GFX10-NEXT: v_max_f32_e32 v50, v10, v26 1971; GFX10-NEXT: v_max_f32_e32 v51, v11, v27 1972; GFX10-NEXT: v_max_f32_e32 v52, v12, v28 1973; GFX10-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v34, vcc_lo 1974; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v3, v19 1975; GFX10-NEXT: v_max_f32_e32 v53, v13, v29 1976; GFX10-NEXT: v_max_f32_e32 v54, v14, v30 1977; GFX10-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v35, vcc_lo 1978; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v4, v20 1979; GFX10-NEXT: v_cndmask_b32_e32 v4, 0x7fc00000, v36, vcc_lo 1980; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v5, v21 1981; GFX10-NEXT: v_cndmask_b32_e32 v5, 0x7fc00000, v37, vcc_lo 1982; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v6, v22 1983; GFX10-NEXT: v_cndmask_b32_e32 v6, 0x7fc00000, v38, vcc_lo 1984; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v7, v23 1985; GFX10-NEXT: v_cndmask_b32_e32 v7, 0x7fc00000, v39, vcc_lo 1986; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v8, v24 1987; GFX10-NEXT: v_cndmask_b32_e32 v8, 0x7fc00000, v48, vcc_lo 1988; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v9, v25 1989; GFX10-NEXT: v_cndmask_b32_e32 v9, 0x7fc00000, v49, vcc_lo 1990; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v10, v26 1991; GFX10-NEXT: v_cndmask_b32_e32 v10, 0x7fc00000, v50, vcc_lo 1992; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v11, v27 1993; GFX10-NEXT: v_cndmask_b32_e32 v11, 0x7fc00000, v51, vcc_lo 1994; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v12, v28 1995; GFX10-NEXT: v_cndmask_b32_e32 v12, 0x7fc00000, v52, vcc_lo 1996; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v13, v29 1997; GFX10-NEXT: v_cndmask_b32_e32 v13, 0x7fc00000, v53, vcc_lo 1998; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v14, v30 1999; GFX10-NEXT: v_cndmask_b32_e32 v14, 0x7fc00000, v54, vcc_lo 2000; GFX10-NEXT: s_waitcnt vmcnt(0) 2001; GFX10-NEXT: v_max_f32_e32 v16, v15, v31 2002; GFX10-NEXT: v_cmp_o_f32_e32 vcc_lo, v15, v31 2003; GFX10-NEXT: v_cndmask_b32_e32 v15, 0x7fc00000, v16, vcc_lo 2004; GFX10-NEXT: s_setpc_b64 s[30:31] 2005; 2006; GFX11-LABEL: v_maximum_v16f32: 2007; GFX11: ; %bb.0: 2008; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2009; GFX11-NEXT: scratch_load_b32 v31, off, s32 2010; GFX11-NEXT: v_dual_max_f32 v32, v0, v16 :: v_dual_max_f32 v33, v1, v17 2011; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v0, v16 2012; GFX11-NEXT: v_dual_max_f32 v34, v2, v18 :: v_dual_max_f32 v35, v3, v19 2013; GFX11-NEXT: v_dual_max_f32 v36, v4, v20 :: v_dual_max_f32 v37, v5, v21 2014; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) 2015; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7fc00000, v32, vcc_lo 2016; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v1, v17 2017; GFX11-NEXT: v_max_f32_e32 v54, v14, v30 2018; GFX11-NEXT: v_dual_max_f32 v38, v6, v22 :: v_dual_max_f32 v39, v7, v23 2019; GFX11-NEXT: v_dual_max_f32 v48, v8, v24 :: v_dual_max_f32 v49, v9, v25 2020; GFX11-NEXT: v_cndmask_b32_e32 v1, 0x7fc00000, v33, vcc_lo 2021; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v2, v18 2022; GFX11-NEXT: v_dual_max_f32 v50, v10, v26 :: v_dual_max_f32 v51, v11, v27 2023; GFX11-NEXT: v_dual_max_f32 v52, v12, v28 :: v_dual_max_f32 v53, v13, v29 2024; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7fc00000, v34, vcc_lo 2025; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v3, v19 2026; GFX11-NEXT: v_cndmask_b32_e32 v3, 0x7fc00000, v35, vcc_lo 2027; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v4, v20 2028; GFX11-NEXT: v_cndmask_b32_e32 v4, 0x7fc00000, v36, vcc_lo 2029; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v5, v21 2030; GFX11-NEXT: v_cndmask_b32_e32 v5, 0x7fc00000, v37, vcc_lo 2031; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v6, v22 2032; GFX11-NEXT: v_cndmask_b32_e32 v6, 0x7fc00000, v38, vcc_lo 2033; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v7, v23 2034; GFX11-NEXT: v_cndmask_b32_e32 v7, 0x7fc00000, v39, vcc_lo 2035; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v8, v24 2036; GFX11-NEXT: v_cndmask_b32_e32 v8, 0x7fc00000, v48, vcc_lo 2037; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v9, v25 2038; GFX11-NEXT: v_cndmask_b32_e32 v9, 0x7fc00000, v49, vcc_lo 2039; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v10, v26 2040; GFX11-NEXT: v_cndmask_b32_e32 v10, 0x7fc00000, v50, vcc_lo 2041; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v11, v27 2042; GFX11-NEXT: v_cndmask_b32_e32 v11, 0x7fc00000, v51, vcc_lo 2043; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v12, v28 2044; GFX11-NEXT: v_cndmask_b32_e32 v12, 0x7fc00000, v52, vcc_lo 2045; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v13, v29 2046; GFX11-NEXT: v_cndmask_b32_e32 v13, 0x7fc00000, v53, vcc_lo 2047; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v14, v30 2048; GFX11-NEXT: v_cndmask_b32_e32 v14, 0x7fc00000, v54, vcc_lo 2049; GFX11-NEXT: s_waitcnt vmcnt(0) 2050; GFX11-NEXT: v_max_f32_e32 v16, v15, v31 2051; GFX11-NEXT: v_cmp_o_f32_e32 vcc_lo, v15, v31 2052; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 2053; GFX11-NEXT: v_cndmask_b32_e32 v15, 0x7fc00000, v16, vcc_lo 2054; GFX11-NEXT: s_setpc_b64 s[30:31] 2055; 2056; GFX12-LABEL: v_maximum_v16f32: 2057; GFX12: ; %bb.0: 2058; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2059; GFX12-NEXT: s_wait_expcnt 0x0 2060; GFX12-NEXT: s_wait_samplecnt 0x0 2061; GFX12-NEXT: s_wait_bvhcnt 0x0 2062; GFX12-NEXT: s_wait_kmcnt 0x0 2063; GFX12-NEXT: scratch_load_b32 v31, off, s32 2064; GFX12-NEXT: v_maximum_f32 v0, v0, v16 2065; GFX12-NEXT: v_maximum_f32 v1, v1, v17 2066; GFX12-NEXT: v_maximum_f32 v2, v2, v18 2067; GFX12-NEXT: v_maximum_f32 v3, v3, v19 2068; GFX12-NEXT: v_maximum_f32 v4, v4, v20 2069; GFX12-NEXT: v_maximum_f32 v5, v5, v21 2070; GFX12-NEXT: v_maximum_f32 v6, v6, v22 2071; GFX12-NEXT: v_maximum_f32 v7, v7, v23 2072; GFX12-NEXT: v_maximum_f32 v8, v8, v24 2073; GFX12-NEXT: v_maximum_f32 v9, v9, v25 2074; GFX12-NEXT: v_maximum_f32 v10, v10, v26 2075; GFX12-NEXT: v_maximum_f32 v11, v11, v27 2076; GFX12-NEXT: v_maximum_f32 v12, v12, v28 2077; GFX12-NEXT: v_maximum_f32 v13, v13, v29 2078; GFX12-NEXT: v_maximum_f32 v14, v14, v30 2079; GFX12-NEXT: s_wait_loadcnt 0x0 2080; GFX12-NEXT: v_maximum_f32 v15, v15, v31 2081; GFX12-NEXT: s_setpc_b64 s[30:31] 2082 %op = call <16 x float> @llvm.maximum.v16f32(<16 x float> %src0, <16 x float> %src1) 2083 ret <16 x float> %op 2084} 2085;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 2086; GCN: {{.*}} 2087; GFX9: {{.*}} 2088