1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 2; RUN: llc %s -o - -verify-machineinstrs -mtriple=arm64-none-linux-gnu | FileCheck %s 3 4; This is the analogue of AArch64's file of the same name. It's mostly testing 5; some form of correct lowering occurs, the tests are a little artificial but I 6; strongly suspect there's room for improved CodeGen (FIXME). 7 8define i64 @test_sext_extr_cmp_0(<1 x i64> %v1, <1 x i64> %v2) { 9; CHECK-LABEL: test_sext_extr_cmp_0: 10; CHECK: // %bb.0: 11; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 12; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 13; CHECK-NEXT: fmov x8, d1 14; CHECK-NEXT: fmov x9, d0 15; CHECK-NEXT: cmp x9, x8 16; CHECK-NEXT: cset w8, ge 17; CHECK-NEXT: sbfx x0, x8, #0, #1 18; CHECK-NEXT: ret 19 %1 = icmp sge <1 x i64> %v1, %v2 20 %2 = extractelement <1 x i1> %1, i32 0 21 %vget_lane = sext i1 %2 to i64 22 ret i64 %vget_lane 23} 24 25define i64 @test_sext_extr_cmp_1(<1 x double> %v1, <1 x double> %v2) { 26; CHECK-LABEL: test_sext_extr_cmp_1: 27; CHECK: // %bb.0: 28; CHECK-NEXT: fcmp d0, d1 29; CHECK-NEXT: cset w8, eq 30; CHECK-NEXT: sbfx x0, x8, #0, #1 31; CHECK-NEXT: ret 32 %1 = fcmp oeq <1 x double> %v1, %v2 33 %2 = extractelement <1 x i1> %1, i32 0 34 %vget_lane = sext i1 %2 to i64 35 ret i64 %vget_lane 36} 37 38define <1 x i64> @test_select_v1i1_0(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3) { 39; CHECK-LABEL: test_select_v1i1_0: 40; CHECK: // %bb.0: 41; CHECK-NEXT: cmeq d0, d0, d1 42; CHECK-NEXT: bic v0.8b, v2.8b, v0.8b 43; CHECK-NEXT: ret 44 %1 = icmp eq <1 x i64> %v1, %v2 45 %res = select <1 x i1> %1, <1 x i64> zeroinitializer, <1 x i64> %v3 46 ret <1 x i64> %res 47} 48 49define <1 x i64> @test_select_v1i1_1(<1 x double> %v1, <1 x double> %v2, <1 x i64> %v3) { 50; CHECK-LABEL: test_select_v1i1_1: 51; CHECK: // %bb.0: 52; CHECK-NEXT: fcmeq d0, d0, d1 53; CHECK-NEXT: bic v0.8b, v2.8b, v0.8b 54; CHECK-NEXT: ret 55 %1 = fcmp oeq <1 x double> %v1, %v2 56 %res = select <1 x i1> %1, <1 x i64> zeroinitializer, <1 x i64> %v3 57 ret <1 x i64> %res 58} 59 60define <1 x double> @test_select_v1i1_2(<1 x i64> %v1, <1 x i64> %v2, <1 x double> %v3) { 61; CHECK-LABEL: test_select_v1i1_2: 62; CHECK: // %bb.0: 63; CHECK-NEXT: cmeq d0, d0, d1 64; CHECK-NEXT: bic v0.8b, v2.8b, v0.8b 65; CHECK-NEXT: ret 66 %1 = icmp eq <1 x i64> %v1, %v2 67 %res = select <1 x i1> %1, <1 x double> zeroinitializer, <1 x double> %v3 68 ret <1 x double> %res 69} 70 71; For v1i64, it's not clear which of the vector or scalar compare is better. 72; Let's stick to the vector form, like for all other vector selects fed by a 73; scalar setcc. If anything, it exposes more ILP. 74define <1 x i64> @test_select_v1i1_3(i64 %lhs, i64 %rhs, <1 x i64> %v3) { 75; CHECK-LABEL: test_select_v1i1_3: 76; CHECK: // %bb.0: 77; CHECK-NEXT: fmov d1, x1 78; CHECK-NEXT: fmov d2, x0 79; CHECK-NEXT: cmeq d1, d2, d1 80; CHECK-NEXT: bic v0.8b, v0.8b, v1.8b 81; CHECK-NEXT: ret 82 %tst = icmp eq i64 %lhs, %rhs 83 %evil = insertelement <1 x i1> undef, i1 %tst, i32 0 84 %res = select <1 x i1> %evil, <1 x i64> zeroinitializer, <1 x i64> %v3 85 ret <1 x i64> %res 86} 87 88define i32 @test_br_extr_cmp(<1 x i64> %v1, <1 x i64> %v2) { 89; CHECK-LABEL: test_br_extr_cmp: 90; CHECK: // %bb.0: // %common.ret 91; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 92; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 93; CHECK-NEXT: fmov x8, d1 94; CHECK-NEXT: fmov x9, d0 95; CHECK-NEXT: cmp x9, x8 96; CHECK-NEXT: cset w0, eq 97; CHECK-NEXT: ret 98 %1 = icmp eq <1 x i64> %v1, %v2 99 %2 = extractelement <1 x i1> %1, i32 0 100 br i1 %2, label %if.end, label %if.then 101 102if.then: 103 ret i32 0; 104 105if.end: 106 ret i32 1; 107} 108 109 110define <1 x float> @test_vselect_f32(<1 x float> %i105, <1 x float> %in) { 111; CHECK-LABEL: test_vselect_f32: 112; CHECK: // %bb.0: 113; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 114; CHECK-NEXT: fcmp s0, s0 115; CHECK-NEXT: cset w8, vs 116; CHECK-NEXT: fmov s2, w8 117; CHECK-NEXT: shl v2.2s, v2.2s, #31 118; CHECK-NEXT: cmlt v2.2s, v2.2s, #0 119; CHECK-NEXT: bit v0.8b, v1.8b, v2.8b 120; CHECK-NEXT: ret 121 %i179 = fcmp uno <1 x float> %i105, zeroinitializer 122 %i180 = select <1 x i1> %i179, <1 x float> %in, <1 x float> %i105 123 ret <1 x float> %i180 124} 125 126define <1 x half> @test_vselect_f16(<1 x half> %i105, <1 x half> %in) { 127; CHECK-LABEL: test_vselect_f16: 128; CHECK: // %bb.0: 129; CHECK-NEXT: // kill: def $h0 killed $h0 def $s0 130; CHECK-NEXT: fcvt s2, h0 131; CHECK-NEXT: // kill: def $h1 killed $h1 def $s1 132; CHECK-NEXT: fcmp s2, s2 133; CHECK-NEXT: fcsel s0, s1, s0, vs 134; CHECK-NEXT: // kill: def $h0 killed $h0 killed $s0 135; CHECK-NEXT: ret 136 %i179 = fcmp uno <1 x half> %i105, zeroinitializer 137 %i180 = select <1 x i1> %i179, <1 x half> %in, <1 x half> %i105 138 ret <1 x half> %i180 139} 140 141define <1 x half> @test_select_f16(half %a, half %b, <1 x half> %c, <1 x half> %d ) { 142; CHECK-LABEL: test_select_f16: 143; CHECK: // %bb.0: 144; CHECK-NEXT: fcvt s1, h1 145; CHECK-NEXT: fcvt s0, h0 146; CHECK-NEXT: // kill: def $h3 killed $h3 def $s3 147; CHECK-NEXT: // kill: def $h2 killed $h2 def $s2 148; CHECK-NEXT: fcmp s0, s1 149; CHECK-NEXT: fcsel s0, s2, s3, eq 150; CHECK-NEXT: // kill: def $h0 killed $h0 killed $s0 151; CHECK-NEXT: ret 152 %cmp31 = fcmp oeq half %a, %b 153 %e = select i1 %cmp31, <1 x half> %c, <1 x half> %d 154 ret <1 x half> %e 155} 156 157define <1 x i16> @test_vselect_f16_i16(<1 x half> %i105, <1 x half> %in, <1 x i16> %x, <1 x i16> %y) { 158; CHECK-LABEL: test_vselect_f16_i16: 159; CHECK: // %bb.0: 160; CHECK-NEXT: fcvt s0, h0 161; CHECK-NEXT: fcmp s0, s0 162; CHECK-NEXT: cset w8, vs 163; CHECK-NEXT: fmov s0, w8 164; CHECK-NEXT: shl v0.4h, v0.4h, #15 165; CHECK-NEXT: cmlt v0.4h, v0.4h, #0 166; CHECK-NEXT: bsl v0.8b, v2.8b, v3.8b 167; CHECK-NEXT: ret 168 %i179 = fcmp uno <1 x half> %i105, zeroinitializer 169 %i180 = select <1 x i1> %i179, <1 x i16> %x, <1 x i16> %y 170 ret <1 x i16> %i180 171} 172 173define <1 x i16> @test_select_f16_i16(half %i105, half %in, <1 x i16> %x, <1 x i16> %y) { 174; CHECK-LABEL: test_select_f16_i16: 175; CHECK: // %bb.0: 176; CHECK-NEXT: fcvt s0, h0 177; CHECK-NEXT: fcmp s0, s0 178; CHECK-NEXT: csetm w8, vs 179; CHECK-NEXT: dup v0.4h, w8 180; CHECK-NEXT: bsl v0.8b, v2.8b, v3.8b 181; CHECK-NEXT: ret 182 %i179 = fcmp uno half %i105, zeroinitializer 183 %i180 = select i1 %i179, <1 x i16> %x, <1 x i16> %y 184 ret <1 x i16> %i180 185} 186 187define <1 x i32> @test_vselect_f16_i32(<1 x half> %i105, <1 x half> %in, <1 x i32> %x, <1 x i32> %y) { 188; CHECK-LABEL: test_vselect_f16_i32: 189; CHECK: // %bb.0: 190; CHECK-NEXT: fcvt s0, h0 191; CHECK-NEXT: fcmp s0, s0 192; CHECK-NEXT: cset w8, vs 193; CHECK-NEXT: fmov s0, w8 194; CHECK-NEXT: shl v0.2s, v0.2s, #31 195; CHECK-NEXT: cmlt v0.2s, v0.2s, #0 196; CHECK-NEXT: bsl v0.8b, v2.8b, v3.8b 197; CHECK-NEXT: ret 198 %i179 = fcmp uno <1 x half> %i105, zeroinitializer 199 %i180 = select <1 x i1> %i179, <1 x i32> %x, <1 x i32> %y 200 ret <1 x i32> %i180 201} 202 203define i64 @test_sext_extr_cmp_half(<1 x half> %v1, <1 x half> %v2) { 204; CHECK-LABEL: test_sext_extr_cmp_half: 205; CHECK: // %bb.0: 206; CHECK-NEXT: fcvt s1, h1 207; CHECK-NEXT: fcvt s0, h0 208; CHECK-NEXT: fcmp s0, s1 209; CHECK-NEXT: cset w8, eq 210; CHECK-NEXT: sbfx x0, x8, #0, #1 211; CHECK-NEXT: ret 212 %1 = fcmp oeq <1 x half> %v1, %v2 213 %2 = extractelement <1 x i1> %1, i32 0 214 %vget_lane = sext i1 %2 to i64 215 ret i64 %vget_lane 216} 217 218define <1 x i64> @test_select_v1i1_half(half %lhs, half %rhs, <1 x i64> %v3) { 219; CHECK-LABEL: test_select_v1i1_half: 220; CHECK: // %bb.0: 221; CHECK-NEXT: fcvt s1, h1 222; CHECK-NEXT: fcvt s0, h0 223; CHECK-NEXT: fcmp s0, s1 224; CHECK-NEXT: csetm x8, eq 225; CHECK-NEXT: fmov d0, x8 226; CHECK-NEXT: bic v0.8b, v2.8b, v0.8b 227; CHECK-NEXT: ret 228 %tst = fcmp oeq half %lhs, %rhs 229 %evil = insertelement <1 x i1> undef, i1 %tst, i32 0 230 %res = select <1 x i1> %evil, <1 x i64> zeroinitializer, <1 x i64> %v3 231 ret <1 x i64> %res 232} 233 234define i32 @test_br_extr_cmp_half(<1 x half> %v1, <1 x half> %v2) { 235; CHECK-LABEL: test_br_extr_cmp_half: 236; CHECK: // %bb.0: // %common.ret 237; CHECK-NEXT: fcvt s1, h1 238; CHECK-NEXT: fcvt s0, h0 239; CHECK-NEXT: fcmp s0, s1 240; CHECK-NEXT: cset w0, eq 241; CHECK-NEXT: ret 242 %1 = fcmp oeq <1 x half> %v1, %v2 243 %2 = extractelement <1 x i1> %1, i32 0 244 br i1 %2, label %if.end, label %if.then 245 246if.then: 247 ret i32 0; 248 249if.end: 250 ret i32 1; 251} 252