1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=aarch64 -mattr=+sve -o - | FileCheck --check-prefixes=CHECK,CHECK-NO-EXTEND-ROUND %s 3; RUN: llc < %s -mtriple=aarch64 -mattr=+sve --combiner-vector-fcopysign-extend-round -o - | FileCheck --check-prefixes=CHECK,CHECK-EXTEND-ROUND %s 4target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" 5 6;============ v2f32 7 8define <vscale x 2 x float> @test_copysign_v2f32_v2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b) #0 { 9; CHECK-LABEL: test_copysign_v2f32_v2f32: 10; CHECK: // %bb.0: 11; CHECK-NEXT: and z1.s, z1.s, #0x80000000 12; CHECK-NEXT: and z0.s, z0.s, #0x7fffffff 13; CHECK-NEXT: orr z0.d, z0.d, z1.d 14; CHECK-NEXT: ret 15 %r = call <vscale x 2 x float> @llvm.copysign.v2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b) 16 ret <vscale x 2 x float> %r 17} 18 19define <vscale x 2 x float> @test_copysign_v2f32_v2f64(<vscale x 2 x float> %a, <vscale x 2 x double> %b) #0 { 20; CHECK-LABEL: test_copysign_v2f32_v2f64: 21; CHECK: // %bb.0: 22; CHECK-NEXT: ptrue p0.d 23; CHECK-NEXT: and z0.s, z0.s, #0x7fffffff 24; CHECK-NEXT: fcvt z1.s, p0/m, z1.d 25; CHECK-NEXT: and z1.s, z1.s, #0x80000000 26; CHECK-NEXT: orr z0.d, z0.d, z1.d 27; CHECK-NEXT: ret 28 %tmp0 = fptrunc <vscale x 2 x double> %b to <vscale x 2 x float> 29 %r = call <vscale x 2 x float> @llvm.copysign.v2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %tmp0) 30 ret <vscale x 2 x float> %r 31} 32 33declare <vscale x 2 x float> @llvm.copysign.v2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b) #0 34 35;============ v4f32 36 37define <vscale x 4 x float> @test_copysign_v4f32_v4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 { 38; CHECK-LABEL: test_copysign_v4f32_v4f32: 39; CHECK: // %bb.0: 40; CHECK-NEXT: and z1.s, z1.s, #0x80000000 41; CHECK-NEXT: and z0.s, z0.s, #0x7fffffff 42; CHECK-NEXT: orr z0.d, z0.d, z1.d 43; CHECK-NEXT: ret 44 %r = call <vscale x 4 x float> @llvm.copysign.v4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) 45 ret <vscale x 4 x float> %r 46} 47 48; SplitVecOp #1 49define <vscale x 4 x float> @test_copysign_v4f32_v4f64(<vscale x 4 x float> %a, <vscale x 4 x double> %b) #0 { 50; CHECK-NO-EXTEND-ROUND-LABEL: test_copysign_v4f32_v4f64: 51; CHECK-NO-EXTEND-ROUND: // %bb.0: 52; CHECK-NO-EXTEND-ROUND-NEXT: ptrue p0.d 53; CHECK-NO-EXTEND-ROUND-NEXT: and z0.s, z0.s, #0x7fffffff 54; CHECK-NO-EXTEND-ROUND-NEXT: fcvt z2.s, p0/m, z2.d 55; CHECK-NO-EXTEND-ROUND-NEXT: fcvt z1.s, p0/m, z1.d 56; CHECK-NO-EXTEND-ROUND-NEXT: uzp1 z1.s, z1.s, z2.s 57; CHECK-NO-EXTEND-ROUND-NEXT: and z1.s, z1.s, #0x80000000 58; CHECK-NO-EXTEND-ROUND-NEXT: orr z0.d, z0.d, z1.d 59; CHECK-NO-EXTEND-ROUND-NEXT: ret 60; 61; CHECK-EXTEND-ROUND-LABEL: test_copysign_v4f32_v4f64: 62; CHECK-EXTEND-ROUND: // %bb.0: 63; CHECK-EXTEND-ROUND-NEXT: ptrue p0.d 64; CHECK-EXTEND-ROUND-NEXT: uunpkhi z3.d, z0.s 65; CHECK-EXTEND-ROUND-NEXT: uunpklo z0.d, z0.s 66; CHECK-EXTEND-ROUND-NEXT: fcvt z2.s, p0/m, z2.d 67; CHECK-EXTEND-ROUND-NEXT: fcvt z1.s, p0/m, z1.d 68; CHECK-EXTEND-ROUND-NEXT: and z3.s, z3.s, #0x7fffffff 69; CHECK-EXTEND-ROUND-NEXT: and z0.s, z0.s, #0x7fffffff 70; CHECK-EXTEND-ROUND-NEXT: and z2.s, z2.s, #0x80000000 71; CHECK-EXTEND-ROUND-NEXT: and z1.s, z1.s, #0x80000000 72; CHECK-EXTEND-ROUND-NEXT: orr z2.d, z3.d, z2.d 73; CHECK-EXTEND-ROUND-NEXT: orr z0.d, z0.d, z1.d 74; CHECK-EXTEND-ROUND-NEXT: uzp1 z0.s, z0.s, z2.s 75; CHECK-EXTEND-ROUND-NEXT: ret 76 %tmp0 = fptrunc <vscale x 4 x double> %b to <vscale x 4 x float> 77 %r = call <vscale x 4 x float> @llvm.copysign.v4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %tmp0) 78 ret <vscale x 4 x float> %r 79} 80 81declare <vscale x 4 x float> @llvm.copysign.v4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 82 83;============ v2f64 84 85define <vscale x 2 x double> @test_copysign_v2f64_v232(<vscale x 2 x double> %a, <vscale x 2 x float> %b) #0 { 86; CHECK-LABEL: test_copysign_v2f64_v232: 87; CHECK: // %bb.0: 88; CHECK-NEXT: ptrue p0.d 89; CHECK-NEXT: and z0.d, z0.d, #0x7fffffffffffffff 90; CHECK-NEXT: fcvt z1.d, p0/m, z1.s 91; CHECK-NEXT: and z1.d, z1.d, #0x8000000000000000 92; CHECK-NEXT: orr z0.d, z0.d, z1.d 93; CHECK-NEXT: ret 94 %tmp0 = fpext <vscale x 2 x float> %b to <vscale x 2 x double> 95 %r = call <vscale x 2 x double> @llvm.copysign.v2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %tmp0) 96 ret <vscale x 2 x double> %r 97} 98 99define <vscale x 2 x double> @test_copysign_v2f64_v2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 { 100; CHECK-LABEL: test_copysign_v2f64_v2f64: 101; CHECK: // %bb.0: 102; CHECK-NEXT: and z1.d, z1.d, #0x8000000000000000 103; CHECK-NEXT: and z0.d, z0.d, #0x7fffffffffffffff 104; CHECK-NEXT: orr z0.d, z0.d, z1.d 105; CHECK-NEXT: ret 106 %r = call <vscale x 2 x double> @llvm.copysign.v2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) 107 ret <vscale x 2 x double> %r 108} 109 110declare <vscale x 2 x double> @llvm.copysign.v2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 111 112;============ v4f64 113 114; SplitVecRes mismatched 115define <vscale x 4 x double> @test_copysign_v4f64_v4f32(<vscale x 4 x double> %a, <vscale x 4 x float> %b) #0 { 116; CHECK-LABEL: test_copysign_v4f64_v4f32: 117; CHECK: // %bb.0: 118; CHECK-NEXT: uunpklo z3.d, z2.s 119; CHECK-NEXT: uunpkhi z2.d, z2.s 120; CHECK-NEXT: ptrue p0.d 121; CHECK-NEXT: and z0.d, z0.d, #0x7fffffffffffffff 122; CHECK-NEXT: and z1.d, z1.d, #0x7fffffffffffffff 123; CHECK-NEXT: fcvt z3.d, p0/m, z3.s 124; CHECK-NEXT: fcvt z2.d, p0/m, z2.s 125; CHECK-NEXT: and z3.d, z3.d, #0x8000000000000000 126; CHECK-NEXT: and z2.d, z2.d, #0x8000000000000000 127; CHECK-NEXT: orr z0.d, z0.d, z3.d 128; CHECK-NEXT: orr z1.d, z1.d, z2.d 129; CHECK-NEXT: ret 130 %tmp0 = fpext <vscale x 4 x float> %b to <vscale x 4 x double> 131 %r = call <vscale x 4 x double> @llvm.copysign.v4f64(<vscale x 4 x double> %a, <vscale x 4 x double> %tmp0) 132 ret <vscale x 4 x double> %r 133} 134 135; SplitVecRes same 136define <vscale x 4 x double> @test_copysign_v4f64_v4f64(<vscale x 4 x double> %a, <vscale x 4 x double> %b) #0 { 137; CHECK-LABEL: test_copysign_v4f64_v4f64: 138; CHECK: // %bb.0: 139; CHECK-NEXT: and z2.d, z2.d, #0x8000000000000000 140; CHECK-NEXT: and z0.d, z0.d, #0x7fffffffffffffff 141; CHECK-NEXT: and z3.d, z3.d, #0x8000000000000000 142; CHECK-NEXT: and z1.d, z1.d, #0x7fffffffffffffff 143; CHECK-NEXT: orr z0.d, z0.d, z2.d 144; CHECK-NEXT: orr z1.d, z1.d, z3.d 145; CHECK-NEXT: ret 146 %r = call <vscale x 4 x double> @llvm.copysign.v4f64(<vscale x 4 x double> %a, <vscale x 4 x double> %b) 147 ret <vscale x 4 x double> %r 148} 149 150declare <vscale x 4 x double> @llvm.copysign.v4f64(<vscale x 4 x double> %a, <vscale x 4 x double> %b) #0 151 152;============ v4f16 153 154define <vscale x 4 x half> @test_copysign_v4f16_v4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b) #0 { 155; CHECK-LABEL: test_copysign_v4f16_v4f16: 156; CHECK: // %bb.0: 157; CHECK-NEXT: and z1.h, z1.h, #0x8000 158; CHECK-NEXT: and z0.h, z0.h, #0x7fff 159; CHECK-NEXT: orr z0.d, z0.d, z1.d 160; CHECK-NEXT: ret 161 %r = call <vscale x 4 x half> @llvm.copysign.v4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b) 162 ret <vscale x 4 x half> %r 163} 164 165define <vscale x 4 x half> @test_copysign_v4f16_v4f32(<vscale x 4 x half> %a, <vscale x 4 x float> %b) #0 { 166; CHECK-LABEL: test_copysign_v4f16_v4f32: 167; CHECK: // %bb.0: 168; CHECK-NEXT: ptrue p0.s 169; CHECK-NEXT: and z0.h, z0.h, #0x7fff 170; CHECK-NEXT: fcvt z1.h, p0/m, z1.s 171; CHECK-NEXT: and z1.h, z1.h, #0x8000 172; CHECK-NEXT: orr z0.d, z0.d, z1.d 173; CHECK-NEXT: ret 174 %tmp0 = fptrunc <vscale x 4 x float> %b to <vscale x 4 x half> 175 %r = call <vscale x 4 x half> @llvm.copysign.v4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %tmp0) 176 ret <vscale x 4 x half> %r 177} 178 179define <vscale x 4 x half> @test_copysign_v4f16_v4f64(<vscale x 4 x half> %a, <vscale x 4 x double> %b) #0 { 180; CHECK-NO-EXTEND-ROUND-LABEL: test_copysign_v4f16_v4f64: 181; CHECK-NO-EXTEND-ROUND: // %bb.0: 182; CHECK-NO-EXTEND-ROUND-NEXT: ptrue p0.d 183; CHECK-NO-EXTEND-ROUND-NEXT: and z0.h, z0.h, #0x7fff 184; CHECK-NO-EXTEND-ROUND-NEXT: fcvt z2.h, p0/m, z2.d 185; CHECK-NO-EXTEND-ROUND-NEXT: fcvt z1.h, p0/m, z1.d 186; CHECK-NO-EXTEND-ROUND-NEXT: uzp1 z1.s, z1.s, z2.s 187; CHECK-NO-EXTEND-ROUND-NEXT: and z1.h, z1.h, #0x8000 188; CHECK-NO-EXTEND-ROUND-NEXT: orr z0.d, z0.d, z1.d 189; CHECK-NO-EXTEND-ROUND-NEXT: ret 190; 191; CHECK-EXTEND-ROUND-LABEL: test_copysign_v4f16_v4f64: 192; CHECK-EXTEND-ROUND: // %bb.0: 193; CHECK-EXTEND-ROUND-NEXT: ptrue p0.d 194; CHECK-EXTEND-ROUND-NEXT: uunpkhi z3.d, z0.s 195; CHECK-EXTEND-ROUND-NEXT: uunpklo z0.d, z0.s 196; CHECK-EXTEND-ROUND-NEXT: fcvt z2.h, p0/m, z2.d 197; CHECK-EXTEND-ROUND-NEXT: fcvt z1.h, p0/m, z1.d 198; CHECK-EXTEND-ROUND-NEXT: and z3.h, z3.h, #0x7fff 199; CHECK-EXTEND-ROUND-NEXT: and z0.h, z0.h, #0x7fff 200; CHECK-EXTEND-ROUND-NEXT: and z2.h, z2.h, #0x8000 201; CHECK-EXTEND-ROUND-NEXT: and z1.h, z1.h, #0x8000 202; CHECK-EXTEND-ROUND-NEXT: orr z2.d, z3.d, z2.d 203; CHECK-EXTEND-ROUND-NEXT: orr z0.d, z0.d, z1.d 204; CHECK-EXTEND-ROUND-NEXT: uzp1 z0.s, z0.s, z2.s 205; CHECK-EXTEND-ROUND-NEXT: ret 206 %tmp0 = fptrunc <vscale x 4 x double> %b to <vscale x 4 x half> 207 %r = call <vscale x 4 x half> @llvm.copysign.v4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %tmp0) 208 ret <vscale x 4 x half> %r 209} 210 211declare <vscale x 4 x half> @llvm.copysign.v4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b) #0 212 213;============ v8f16 214 215define <vscale x 8 x half> @test_copysign_v8f16_v8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 { 216; CHECK-LABEL: test_copysign_v8f16_v8f16: 217; CHECK: // %bb.0: 218; CHECK-NEXT: and z1.h, z1.h, #0x8000 219; CHECK-NEXT: and z0.h, z0.h, #0x7fff 220; CHECK-NEXT: orr z0.d, z0.d, z1.d 221; CHECK-NEXT: ret 222 %r = call <vscale x 8 x half> @llvm.copysign.v8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) 223 ret <vscale x 8 x half> %r 224} 225 226define <vscale x 8 x half> @test_copysign_v8f16_v8f32(<vscale x 8 x half> %a, <vscale x 8 x float> %b) #0 { 227; CHECK-NO-EXTEND-ROUND-LABEL: test_copysign_v8f16_v8f32: 228; CHECK-NO-EXTEND-ROUND: // %bb.0: 229; CHECK-NO-EXTEND-ROUND-NEXT: ptrue p0.s 230; CHECK-NO-EXTEND-ROUND-NEXT: and z0.h, z0.h, #0x7fff 231; CHECK-NO-EXTEND-ROUND-NEXT: fcvt z2.h, p0/m, z2.s 232; CHECK-NO-EXTEND-ROUND-NEXT: fcvt z1.h, p0/m, z1.s 233; CHECK-NO-EXTEND-ROUND-NEXT: uzp1 z1.h, z1.h, z2.h 234; CHECK-NO-EXTEND-ROUND-NEXT: and z1.h, z1.h, #0x8000 235; CHECK-NO-EXTEND-ROUND-NEXT: orr z0.d, z0.d, z1.d 236; CHECK-NO-EXTEND-ROUND-NEXT: ret 237; 238; CHECK-EXTEND-ROUND-LABEL: test_copysign_v8f16_v8f32: 239; CHECK-EXTEND-ROUND: // %bb.0: 240; CHECK-EXTEND-ROUND-NEXT: ptrue p0.s 241; CHECK-EXTEND-ROUND-NEXT: uunpkhi z3.s, z0.h 242; CHECK-EXTEND-ROUND-NEXT: uunpklo z0.s, z0.h 243; CHECK-EXTEND-ROUND-NEXT: fcvt z2.h, p0/m, z2.s 244; CHECK-EXTEND-ROUND-NEXT: fcvt z1.h, p0/m, z1.s 245; CHECK-EXTEND-ROUND-NEXT: and z3.h, z3.h, #0x7fff 246; CHECK-EXTEND-ROUND-NEXT: and z0.h, z0.h, #0x7fff 247; CHECK-EXTEND-ROUND-NEXT: and z2.h, z2.h, #0x8000 248; CHECK-EXTEND-ROUND-NEXT: and z1.h, z1.h, #0x8000 249; CHECK-EXTEND-ROUND-NEXT: orr z2.d, z3.d, z2.d 250; CHECK-EXTEND-ROUND-NEXT: orr z0.d, z0.d, z1.d 251; CHECK-EXTEND-ROUND-NEXT: uzp1 z0.h, z0.h, z2.h 252; CHECK-EXTEND-ROUND-NEXT: ret 253 %tmp0 = fptrunc <vscale x 8 x float> %b to <vscale x 8 x half> 254 %r = call <vscale x 8 x half> @llvm.copysign.v8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %tmp0) 255 ret <vscale x 8 x half> %r 256} 257 258 259;========== FCOPYSIGN_EXTEND_ROUND 260 261define <vscale x 4 x half> @test_copysign_nxv4f32_nxv4f16(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 { 262; CHECK-NO-EXTEND-ROUND-LABEL: test_copysign_nxv4f32_nxv4f16: 263; CHECK-NO-EXTEND-ROUND: // %bb.0: 264; CHECK-NO-EXTEND-ROUND-NEXT: and z1.s, z1.s, #0x80000000 265; CHECK-NO-EXTEND-ROUND-NEXT: and z0.s, z0.s, #0x7fffffff 266; CHECK-NO-EXTEND-ROUND-NEXT: ptrue p0.s 267; CHECK-NO-EXTEND-ROUND-NEXT: orr z0.d, z0.d, z1.d 268; CHECK-NO-EXTEND-ROUND-NEXT: fcvt z0.h, p0/m, z0.s 269; CHECK-NO-EXTEND-ROUND-NEXT: ret 270; 271; CHECK-EXTEND-ROUND-LABEL: test_copysign_nxv4f32_nxv4f16: 272; CHECK-EXTEND-ROUND: // %bb.0: 273; CHECK-EXTEND-ROUND-NEXT: ptrue p0.s 274; CHECK-EXTEND-ROUND-NEXT: fcvt z0.h, p0/m, z0.s 275; CHECK-EXTEND-ROUND-NEXT: fcvt z1.h, p0/m, z1.s 276; CHECK-EXTEND-ROUND-NEXT: and z1.h, z1.h, #0x8000 277; CHECK-EXTEND-ROUND-NEXT: and z0.h, z0.h, #0x7fff 278; CHECK-EXTEND-ROUND-NEXT: orr z0.d, z0.d, z1.d 279; CHECK-EXTEND-ROUND-NEXT: ret 280 %t1 = call <vscale x 4 x float> @llvm.copysign.v4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) 281 %t2 = fptrunc <vscale x 4 x float> %t1 to <vscale x 4 x half> 282 ret <vscale x 4 x half> %t2 283} 284 285define <vscale x 2 x float> @test_copysign_nxv2f64_nxv2f32(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 { 286; CHECK-NO-EXTEND-ROUND-LABEL: test_copysign_nxv2f64_nxv2f32: 287; CHECK-NO-EXTEND-ROUND: // %bb.0: 288; CHECK-NO-EXTEND-ROUND-NEXT: and z1.d, z1.d, #0x8000000000000000 289; CHECK-NO-EXTEND-ROUND-NEXT: and z0.d, z0.d, #0x7fffffffffffffff 290; CHECK-NO-EXTEND-ROUND-NEXT: ptrue p0.d 291; CHECK-NO-EXTEND-ROUND-NEXT: orr z0.d, z0.d, z1.d 292; CHECK-NO-EXTEND-ROUND-NEXT: fcvt z0.s, p0/m, z0.d 293; CHECK-NO-EXTEND-ROUND-NEXT: ret 294; 295; CHECK-EXTEND-ROUND-LABEL: test_copysign_nxv2f64_nxv2f32: 296; CHECK-EXTEND-ROUND: // %bb.0: 297; CHECK-EXTEND-ROUND-NEXT: ptrue p0.d 298; CHECK-EXTEND-ROUND-NEXT: fcvt z0.s, p0/m, z0.d 299; CHECK-EXTEND-ROUND-NEXT: fcvt z1.s, p0/m, z1.d 300; CHECK-EXTEND-ROUND-NEXT: and z1.s, z1.s, #0x80000000 301; CHECK-EXTEND-ROUND-NEXT: and z0.s, z0.s, #0x7fffffff 302; CHECK-EXTEND-ROUND-NEXT: orr z0.d, z0.d, z1.d 303; CHECK-EXTEND-ROUND-NEXT: ret 304 %t1 = call <vscale x 2 x double> @llvm.copysign.v2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) 305 %t2 = fptrunc <vscale x 2 x double> %t1 to <vscale x 2 x float> 306 ret <vscale x 2 x float> %t2 307} 308 309declare <vscale x 8 x half> @llvm.copysign.v8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 310 311attributes #0 = { nounwind } 312