1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 -o - | FileCheck --check-prefixes=CHECK,CHECK_NO_EXTEND_ROUND %s 3; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 --combiner-vector-fcopysign-extend-round -o - | FileCheck --check-prefixes=CHECK,CHECK_EXTEND_ROUND %s 4 5target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" 6 7;============ v2f32 8 9define <vscale x 2 x float> @test_copysign_v2f32_v2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b) #0 { 10; CHECK-LABEL: test_copysign_v2f32_v2f32: 11; CHECK: // %bb.0: 12; CHECK-NEXT: mov z2.s, #0x7fffffff 13; CHECK-NEXT: bsl z0.d, z0.d, z1.d, z2.d 14; CHECK-NEXT: ret 15 %r = call <vscale x 2 x float> @llvm.copysign.v2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b) 16 ret <vscale x 2 x float> %r 17} 18 19define <vscale x 2 x float> @test_copysign_v2f32_v2f64(<vscale x 2 x float> %a, <vscale x 2 x double> %b) #0 { 20; CHECK-LABEL: test_copysign_v2f32_v2f64: 21; CHECK: // %bb.0: 22; CHECK-NEXT: ptrue p0.d 23; CHECK-NEXT: mov z2.s, #0x7fffffff 24; CHECK-NEXT: fcvt z1.s, p0/m, z1.d 25; CHECK-NEXT: bsl z0.d, z0.d, z1.d, z2.d 26; CHECK-NEXT: ret 27 %tmp0 = fptrunc <vscale x 2 x double> %b to <vscale x 2 x float> 28 %r = call <vscale x 2 x float> @llvm.copysign.v2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %tmp0) 29 ret <vscale x 2 x float> %r 30} 31 32declare <vscale x 2 x float> @llvm.copysign.v2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b) #0 33 34;============ v4f32 35 36define <vscale x 4 x float> @test_copysign_v4f32_v4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 { 37; CHECK-LABEL: test_copysign_v4f32_v4f32: 38; CHECK: // %bb.0: 39; CHECK-NEXT: mov z2.s, #0x7fffffff 40; CHECK-NEXT: bsl z0.d, z0.d, z1.d, z2.d 41; CHECK-NEXT: ret 42 %r = call <vscale x 4 x float> @llvm.copysign.v4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) 43 ret <vscale x 4 x float> %r 44} 45 46; SplitVecOp #1 47define <vscale x 4 x float> @test_copysign_v4f32_v4f64(<vscale x 4 x float> %a, <vscale x 4 x double> %b) #0 { 48; CHECK_NO_EXTEND_ROUND-LABEL: test_copysign_v4f32_v4f64: 49; CHECK_NO_EXTEND_ROUND: // %bb.0: 50; CHECK_NO_EXTEND_ROUND-NEXT: ptrue p0.d 51; CHECK_NO_EXTEND_ROUND-NEXT: mov z3.s, #0x7fffffff 52; CHECK_NO_EXTEND_ROUND-NEXT: fcvt z2.s, p0/m, z2.d 53; CHECK_NO_EXTEND_ROUND-NEXT: fcvt z1.s, p0/m, z1.d 54; CHECK_NO_EXTEND_ROUND-NEXT: uzp1 z1.s, z1.s, z2.s 55; CHECK_NO_EXTEND_ROUND-NEXT: bsl z0.d, z0.d, z1.d, z3.d 56; CHECK_NO_EXTEND_ROUND-NEXT: ret 57; 58; CHECK_EXTEND_ROUND-LABEL: test_copysign_v4f32_v4f64: 59; CHECK_EXTEND_ROUND: // %bb.0: 60; CHECK_EXTEND_ROUND-NEXT: ptrue p0.d 61; CHECK_EXTEND_ROUND-NEXT: uunpkhi z3.d, z0.s 62; CHECK_EXTEND_ROUND-NEXT: mov z4.s, #0x7fffffff 63; CHECK_EXTEND_ROUND-NEXT: uunpklo z0.d, z0.s 64; CHECK_EXTEND_ROUND-NEXT: fcvt z2.s, p0/m, z2.d 65; CHECK_EXTEND_ROUND-NEXT: fcvt z1.s, p0/m, z1.d 66; CHECK_EXTEND_ROUND-NEXT: bsl z3.d, z3.d, z2.d, z4.d 67; CHECK_EXTEND_ROUND-NEXT: bsl z0.d, z0.d, z1.d, z4.d 68; CHECK_EXTEND_ROUND-NEXT: uzp1 z0.s, z0.s, z3.s 69; CHECK_EXTEND_ROUND-NEXT: ret 70 %tmp0 = fptrunc <vscale x 4 x double> %b to <vscale x 4 x float> 71 %r = call <vscale x 4 x float> @llvm.copysign.v4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %tmp0) 72 ret <vscale x 4 x float> %r 73} 74 75declare <vscale x 4 x float> @llvm.copysign.v4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 76 77;============ v2f64 78 79define <vscale x 2 x double> @test_copysign_v2f64_v232(<vscale x 2 x double> %a, <vscale x 2 x float> %b) #0 { 80; CHECK-LABEL: test_copysign_v2f64_v232: 81; CHECK: // %bb.0: 82; CHECK-NEXT: ptrue p0.d 83; CHECK-NEXT: mov z2.d, #0x7fffffffffffffff 84; CHECK-NEXT: fcvt z1.d, p0/m, z1.s 85; CHECK-NEXT: bsl z0.d, z0.d, z1.d, z2.d 86; CHECK-NEXT: ret 87 %tmp0 = fpext <vscale x 2 x float> %b to <vscale x 2 x double> 88 %r = call <vscale x 2 x double> @llvm.copysign.v2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %tmp0) 89 ret <vscale x 2 x double> %r 90} 91 92define <vscale x 2 x double> @test_copysign_v2f64_v2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 { 93; CHECK-LABEL: test_copysign_v2f64_v2f64: 94; CHECK: // %bb.0: 95; CHECK-NEXT: mov z2.d, #0x7fffffffffffffff 96; CHECK-NEXT: bsl z0.d, z0.d, z1.d, z2.d 97; CHECK-NEXT: ret 98 %r = call <vscale x 2 x double> @llvm.copysign.v2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) 99 ret <vscale x 2 x double> %r 100} 101 102declare <vscale x 2 x double> @llvm.copysign.v2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 103 104;============ v4f64 105 106; SplitVecRes mismatched 107define <vscale x 4 x double> @test_copysign_v4f64_v4f32(<vscale x 4 x double> %a, <vscale x 4 x float> %b) #0 { 108; CHECK_NO_EXTEND_ROUND-LABEL: test_copysign_v4f64_v4f32: 109; CHECK_NO_EXTEND_ROUND: // %bb.0: 110; CHECK_NO_EXTEND_ROUND-NEXT: uunpkhi z3.d, z2.s 111; CHECK_NO_EXTEND_ROUND-NEXT: uunpklo z2.d, z2.s 112; CHECK_NO_EXTEND_ROUND-NEXT: ptrue p0.d 113; CHECK_NO_EXTEND_ROUND-NEXT: mov z4.d, #0x7fffffffffffffff 114; CHECK_NO_EXTEND_ROUND-NEXT: fcvt z3.d, p0/m, z3.s 115; CHECK_NO_EXTEND_ROUND-NEXT: fcvt z2.d, p0/m, z2.s 116; CHECK_NO_EXTEND_ROUND-NEXT: bsl z0.d, z0.d, z2.d, z4.d 117; CHECK_NO_EXTEND_ROUND-NEXT: bsl z1.d, z1.d, z3.d, z4.d 118; CHECK_NO_EXTEND_ROUND-NEXT: ret 119; 120; CHECK_EXTEND_ROUND-LABEL: test_copysign_v4f64_v4f32: 121; CHECK_EXTEND_ROUND: // %bb.0: 122; CHECK_EXTEND_ROUND-NEXT: uunpkhi z3.d, z2.s 123; CHECK_EXTEND_ROUND-NEXT: uunpklo z2.d, z2.s 124; CHECK_EXTEND_ROUND-NEXT: ptrue p0.d 125; CHECK_EXTEND_ROUND-NEXT: mov z4.d, #0x7fffffffffffffff 126; CHECK_EXTEND_ROUND-NEXT: fcvt z2.d, p0/m, z2.s 127; CHECK_EXTEND_ROUND-NEXT: fcvt z3.d, p0/m, z3.s 128; CHECK_EXTEND_ROUND-NEXT: bsl z0.d, z0.d, z2.d, z4.d 129; CHECK_EXTEND_ROUND-NEXT: bsl z1.d, z1.d, z3.d, z4.d 130; CHECK_EXTEND_ROUND-NEXT: ret 131 %tmp0 = fpext <vscale x 4 x float> %b to <vscale x 4 x double> 132 %r = call <vscale x 4 x double> @llvm.copysign.v4f64(<vscale x 4 x double> %a, <vscale x 4 x double> %tmp0) 133 ret <vscale x 4 x double> %r 134} 135 136; SplitVecRes same 137define <vscale x 4 x double> @test_copysign_v4f64_v4f64(<vscale x 4 x double> %a, <vscale x 4 x double> %b) #0 { 138; CHECK-LABEL: test_copysign_v4f64_v4f64: 139; CHECK: // %bb.0: 140; CHECK-NEXT: mov z4.d, #0x7fffffffffffffff 141; CHECK-NEXT: bsl z0.d, z0.d, z2.d, z4.d 142; CHECK-NEXT: bsl z1.d, z1.d, z3.d, z4.d 143; CHECK-NEXT: ret 144 %r = call <vscale x 4 x double> @llvm.copysign.v4f64(<vscale x 4 x double> %a, <vscale x 4 x double> %b) 145 ret <vscale x 4 x double> %r 146} 147 148declare <vscale x 4 x double> @llvm.copysign.v4f64(<vscale x 4 x double> %a, <vscale x 4 x double> %b) #0 149 150;============ v4f16 151 152define <vscale x 4 x half> @test_copysign_v4f16_v4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b) #0 { 153; CHECK-LABEL: test_copysign_v4f16_v4f16: 154; CHECK: // %bb.0: 155; CHECK-NEXT: mov z2.h, #32767 // =0x7fff 156; CHECK-NEXT: bsl z0.d, z0.d, z1.d, z2.d 157; CHECK-NEXT: ret 158 %r = call <vscale x 4 x half> @llvm.copysign.v4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b) 159 ret <vscale x 4 x half> %r 160} 161 162define <vscale x 4 x half> @test_copysign_v4f16_v4f32(<vscale x 4 x half> %a, <vscale x 4 x float> %b) #0 { 163; CHECK-LABEL: test_copysign_v4f16_v4f32: 164; CHECK: // %bb.0: 165; CHECK-NEXT: ptrue p0.s 166; CHECK-NEXT: mov z2.h, #32767 // =0x7fff 167; CHECK-NEXT: fcvt z1.h, p0/m, z1.s 168; CHECK-NEXT: bsl z0.d, z0.d, z1.d, z2.d 169; CHECK-NEXT: ret 170 %tmp0 = fptrunc <vscale x 4 x float> %b to <vscale x 4 x half> 171 %r = call <vscale x 4 x half> @llvm.copysign.v4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %tmp0) 172 ret <vscale x 4 x half> %r 173} 174 175define <vscale x 4 x half> @test_copysign_v4f16_v4f64(<vscale x 4 x half> %a, <vscale x 4 x double> %b) #0 { 176; CHECK_NO_EXTEND_ROUND-LABEL: test_copysign_v4f16_v4f64: 177; CHECK_NO_EXTEND_ROUND: // %bb.0: 178; CHECK_NO_EXTEND_ROUND-NEXT: ptrue p0.d 179; CHECK_NO_EXTEND_ROUND-NEXT: mov z3.h, #32767 // =0x7fff 180; CHECK_NO_EXTEND_ROUND-NEXT: fcvt z2.h, p0/m, z2.d 181; CHECK_NO_EXTEND_ROUND-NEXT: fcvt z1.h, p0/m, z1.d 182; CHECK_NO_EXTEND_ROUND-NEXT: uzp1 z1.s, z1.s, z2.s 183; CHECK_NO_EXTEND_ROUND-NEXT: bsl z0.d, z0.d, z1.d, z3.d 184; CHECK_NO_EXTEND_ROUND-NEXT: ret 185; 186; CHECK_EXTEND_ROUND-LABEL: test_copysign_v4f16_v4f64: 187; CHECK_EXTEND_ROUND: // %bb.0: 188; CHECK_EXTEND_ROUND-NEXT: ptrue p0.d 189; CHECK_EXTEND_ROUND-NEXT: uunpkhi z3.d, z0.s 190; CHECK_EXTEND_ROUND-NEXT: mov z4.h, #32767 // =0x7fff 191; CHECK_EXTEND_ROUND-NEXT: uunpklo z0.d, z0.s 192; CHECK_EXTEND_ROUND-NEXT: fcvt z2.h, p0/m, z2.d 193; CHECK_EXTEND_ROUND-NEXT: fcvt z1.h, p0/m, z1.d 194; CHECK_EXTEND_ROUND-NEXT: bsl z3.d, z3.d, z2.d, z4.d 195; CHECK_EXTEND_ROUND-NEXT: bsl z0.d, z0.d, z1.d, z4.d 196; CHECK_EXTEND_ROUND-NEXT: uzp1 z0.s, z0.s, z3.s 197; CHECK_EXTEND_ROUND-NEXT: ret 198 %tmp0 = fptrunc <vscale x 4 x double> %b to <vscale x 4 x half> 199 %r = call <vscale x 4 x half> @llvm.copysign.v4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %tmp0) 200 ret <vscale x 4 x half> %r 201} 202 203declare <vscale x 4 x half> @llvm.copysign.v4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b) #0 204 205;============ v8f16 206 207define <vscale x 8 x half> @test_copysign_v8f16_v8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 { 208; CHECK-LABEL: test_copysign_v8f16_v8f16: 209; CHECK: // %bb.0: 210; CHECK-NEXT: mov z2.h, #32767 // =0x7fff 211; CHECK-NEXT: bsl z0.d, z0.d, z1.d, z2.d 212; CHECK-NEXT: ret 213 %r = call <vscale x 8 x half> @llvm.copysign.v8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) 214 ret <vscale x 8 x half> %r 215} 216 217define <vscale x 8 x half> @test_copysign_v8f16_v8f32(<vscale x 8 x half> %a, <vscale x 8 x float> %b) #0 { 218; CHECK_NO_EXTEND_ROUND-LABEL: test_copysign_v8f16_v8f32: 219; CHECK_NO_EXTEND_ROUND: // %bb.0: 220; CHECK_NO_EXTEND_ROUND-NEXT: ptrue p0.s 221; CHECK_NO_EXTEND_ROUND-NEXT: mov z3.h, #32767 // =0x7fff 222; CHECK_NO_EXTEND_ROUND-NEXT: fcvt z2.h, p0/m, z2.s 223; CHECK_NO_EXTEND_ROUND-NEXT: fcvt z1.h, p0/m, z1.s 224; CHECK_NO_EXTEND_ROUND-NEXT: uzp1 z1.h, z1.h, z2.h 225; CHECK_NO_EXTEND_ROUND-NEXT: bsl z0.d, z0.d, z1.d, z3.d 226; CHECK_NO_EXTEND_ROUND-NEXT: ret 227; 228; CHECK_EXTEND_ROUND-LABEL: test_copysign_v8f16_v8f32: 229; CHECK_EXTEND_ROUND: // %bb.0: 230; CHECK_EXTEND_ROUND-NEXT: ptrue p0.s 231; CHECK_EXTEND_ROUND-NEXT: uunpkhi z3.s, z0.h 232; CHECK_EXTEND_ROUND-NEXT: mov z4.h, #32767 // =0x7fff 233; CHECK_EXTEND_ROUND-NEXT: uunpklo z0.s, z0.h 234; CHECK_EXTEND_ROUND-NEXT: fcvt z2.h, p0/m, z2.s 235; CHECK_EXTEND_ROUND-NEXT: fcvt z1.h, p0/m, z1.s 236; CHECK_EXTEND_ROUND-NEXT: bsl z3.d, z3.d, z2.d, z4.d 237; CHECK_EXTEND_ROUND-NEXT: bsl z0.d, z0.d, z1.d, z4.d 238; CHECK_EXTEND_ROUND-NEXT: uzp1 z0.h, z0.h, z3.h 239; CHECK_EXTEND_ROUND-NEXT: ret 240 %tmp0 = fptrunc <vscale x 8 x float> %b to <vscale x 8 x half> 241 %r = call <vscale x 8 x half> @llvm.copysign.v8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %tmp0) 242 ret <vscale x 8 x half> %r 243} 244 245declare <vscale x 8 x half> @llvm.copysign.v8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 246 247attributes #0 = { nounwind } 248