1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GCN %s 3 4define half @test_s16(half %a) #0 { 5; GCN-LABEL: test_s16: 6; GCN: ; %bb.0: ; %entry 7; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8; GCN-NEXT: v_cmp_gt_f16_e32 vcc, 0, v0 9; GCN-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc 10; GCN-NEXT: s_setpc_b64 s[30:31] 11entry: 12 %fcmp = fcmp olt half %a, 0.0 13 %sel = select i1 %fcmp, half 0.0, half %a 14 ret half %sel 15} 16 17define float @test_s32(float %a) #0 { 18; GCN-LABEL: test_s32: 19; GCN: ; %bb.0: ; %entry 20; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21; GCN-NEXT: v_cmp_gt_f32_e32 vcc, 0, v0 22; GCN-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc 23; GCN-NEXT: s_setpc_b64 s[30:31] 24entry: 25 %fcmp = fcmp olt float %a, 0.0 26 %sel = select i1 %fcmp, float 0.0, float %a 27 ret float %sel 28} 29 30define double @test_s64(double %a) #0 { 31; GCN-LABEL: test_s64: 32; GCN: ; %bb.0: ; %entry 33; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 34; GCN-NEXT: v_cmp_gt_f64_e32 vcc, 0, v[0:1] 35; GCN-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc 36; GCN-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc 37; GCN-NEXT: s_setpc_b64 s[30:31] 38entry: 39 %fcmp = fcmp olt double %a, 0.0 40 %sel = select i1 %fcmp, double 0.0, double %a 41 ret double %sel 42} 43 44define <4 x half> @test_v4s16(<4 x half> %a) #0 { 45; GCN-LABEL: test_v4s16: 46; GCN: ; %bb.0: ; %entry 47; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 48; GCN-NEXT: v_mov_b32_e32 v4, 0 49; GCN-NEXT: v_cmp_gt_f16_e32 vcc, 0, v0 50; GCN-NEXT: v_lshrrev_b32_e32 v2, 16, v0 51; GCN-NEXT: v_cndmask_b32_e64 v5, v0, 0, vcc 52; GCN-NEXT: v_cmp_lt_f16_sdwa s[4:5], v0, v4 src0_sel:WORD_1 src1_sel:DWORD 53; GCN-NEXT: v_cmp_gt_f16_e32 vcc, 0, v1 54; GCN-NEXT: v_lshrrev_b32_e32 v3, 16, v1 55; GCN-NEXT: v_cndmask_b32_e64 v0, v2, 0, s[4:5] 56; GCN-NEXT: v_cndmask_b32_e64 v2, v1, 0, vcc 57; GCN-NEXT: v_cmp_lt_f16_sdwa s[4:5], v1, v4 src0_sel:WORD_1 src1_sel:DWORD 58; GCN-NEXT: v_cndmask_b32_e64 v1, v3, 0, s[4:5] 59; GCN-NEXT: v_and_b32_e32 v3, 0xffff, v5 60; GCN-NEXT: v_and_b32_e32 v2, 0xffff, v2 61; GCN-NEXT: v_lshl_or_b32 v0, v0, 16, v3 62; GCN-NEXT: v_lshl_or_b32 v1, v1, 16, v2 63; GCN-NEXT: s_setpc_b64 s[30:31] 64entry: 65 %fcmp = fcmp olt <4 x half> %a, zeroinitializer 66 %sel = select <4 x i1> %fcmp, <4 x half> zeroinitializer, <4 x half> %a 67 ret <4 x half> %sel 68} 69 70define <8 x half> @test_v8s16(<8 x half> %a) #0 { 71; GCN-LABEL: test_v8s16: 72; GCN: ; %bb.0: ; %entry 73; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 74; GCN-NEXT: v_mov_b32_e32 v8, 0 75; GCN-NEXT: v_cmp_gt_f16_e32 vcc, 0, v0 76; GCN-NEXT: v_lshrrev_b32_e32 v4, 16, v0 77; GCN-NEXT: v_cndmask_b32_e64 v9, v0, 0, vcc 78; GCN-NEXT: v_cmp_lt_f16_sdwa s[4:5], v0, v8 src0_sel:WORD_1 src1_sel:DWORD 79; GCN-NEXT: v_cmp_gt_f16_e32 vcc, 0, v1 80; GCN-NEXT: v_lshrrev_b32_e32 v5, 16, v1 81; GCN-NEXT: v_cndmask_b32_e64 v0, v4, 0, s[4:5] 82; GCN-NEXT: v_cndmask_b32_e64 v4, v1, 0, vcc 83; GCN-NEXT: v_cmp_lt_f16_sdwa s[4:5], v1, v8 src0_sel:WORD_1 src1_sel:DWORD 84; GCN-NEXT: v_cmp_gt_f16_e32 vcc, 0, v2 85; GCN-NEXT: v_lshrrev_b32_e32 v6, 16, v2 86; GCN-NEXT: v_cndmask_b32_e64 v1, v5, 0, s[4:5] 87; GCN-NEXT: v_cndmask_b32_e64 v5, v2, 0, vcc 88; GCN-NEXT: v_cmp_lt_f16_sdwa s[4:5], v2, v8 src0_sel:WORD_1 src1_sel:DWORD 89; GCN-NEXT: v_cmp_gt_f16_e32 vcc, 0, v3 90; GCN-NEXT: v_and_b32_e32 v4, 0xffff, v4 91; GCN-NEXT: v_lshrrev_b32_e32 v7, 16, v3 92; GCN-NEXT: v_cndmask_b32_e64 v2, v6, 0, s[4:5] 93; GCN-NEXT: v_cndmask_b32_e64 v6, v3, 0, vcc 94; GCN-NEXT: v_cmp_lt_f16_sdwa s[4:5], v3, v8 src0_sel:WORD_1 src1_sel:DWORD 95; GCN-NEXT: v_lshl_or_b32 v1, v1, 16, v4 96; GCN-NEXT: v_and_b32_e32 v4, 0xffff, v5 97; GCN-NEXT: v_cndmask_b32_e64 v3, v7, 0, s[4:5] 98; GCN-NEXT: v_and_b32_e32 v7, 0xffff, v9 99; GCN-NEXT: v_lshl_or_b32 v2, v2, 16, v4 100; GCN-NEXT: v_and_b32_e32 v4, 0xffff, v6 101; GCN-NEXT: v_lshl_or_b32 v0, v0, 16, v7 102; GCN-NEXT: v_lshl_or_b32 v3, v3, 16, v4 103; GCN-NEXT: s_setpc_b64 s[30:31] 104entry: 105 %fcmp = fcmp olt <8 x half> %a, zeroinitializer 106 %sel = select <8 x i1> %fcmp, <8 x half> zeroinitializer, <8 x half> %a 107 ret <8 x half> %sel 108} 109 110define <2 x float> @test_v2s32(<2 x float> %a) #0 { 111; GCN-LABEL: test_v2s32: 112; GCN: ; %bb.0: ; %entry 113; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 114; GCN-NEXT: v_cmp_gt_f32_e32 vcc, 0, v0 115; GCN-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc 116; GCN-NEXT: v_cmp_gt_f32_e32 vcc, 0, v1 117; GCN-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc 118; GCN-NEXT: s_setpc_b64 s[30:31] 119entry: 120 %fcmp = fcmp olt <2 x float> %a, zeroinitializer 121 %sel = select <2 x i1> %fcmp, <2 x float> zeroinitializer, <2 x float> %a 122 ret <2 x float> %sel 123} 124 125define <4 x float> @test_v4s32(<4 x float> %a) #0 { 126; GCN-LABEL: test_v4s32: 127; GCN: ; %bb.0: ; %entry 128; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 129; GCN-NEXT: v_cmp_gt_f32_e32 vcc, 0, v0 130; GCN-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc 131; GCN-NEXT: v_cmp_gt_f32_e32 vcc, 0, v1 132; GCN-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc 133; GCN-NEXT: v_cmp_gt_f32_e32 vcc, 0, v2 134; GCN-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc 135; GCN-NEXT: v_cmp_gt_f32_e32 vcc, 0, v3 136; GCN-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc 137; GCN-NEXT: s_setpc_b64 s[30:31] 138entry: 139 %fcmp = fcmp olt <4 x float> %a, zeroinitializer 140 %sel = select <4 x i1> %fcmp, <4 x float> zeroinitializer, <4 x float> %a 141 ret <4 x float> %sel 142} 143 144define <2 x double> @test_v2s64(<2 x double> %a) #0 { 145; GCN-LABEL: test_v2s64: 146; GCN: ; %bb.0: ; %entry 147; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 148; GCN-NEXT: v_cmp_gt_f64_e32 vcc, 0, v[0:1] 149; GCN-NEXT: v_cmp_gt_f64_e64 s[4:5], 0, v[2:3] 150; GCN-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc 151; GCN-NEXT: v_cndmask_b32_e64 v2, v2, 0, s[4:5] 152; GCN-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc 153; GCN-NEXT: v_cndmask_b32_e64 v3, v3, 0, s[4:5] 154; GCN-NEXT: s_setpc_b64 s[30:31] 155entry: 156 %fcmp = fcmp olt <2 x double> %a, zeroinitializer 157 %sel = select <2 x i1> %fcmp, <2 x double> zeroinitializer, <2 x double> %a 158 ret <2 x double> %sel 159} 160 161