1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple aarch64-apple-darwin | FileCheck --check-prefixes=CHECK,NOFP16 %s 3; RUN: llc < %s -mtriple aarch64-apple-darwin -mattr=+v8.2a,+fullfp16 | FileCheck --check-prefixes=CHECK,FP16 %s 4 5target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" 6 7;============ v1f32 8 9; WidenVecRes same 10define <1 x float> @test_copysign_v1f32_v1f32(<1 x float> %a, <1 x float> %b) #0 { 11; CHECK-LABEL: test_copysign_v1f32_v1f32: 12; CHECK: ; %bb.0: 13; CHECK-NEXT: mvni.2s v2, #128, lsl #24 14; CHECK-NEXT: bif.8b v0, v1, v2 15; CHECK-NEXT: ret 16 %r = call <1 x float> @llvm.copysign.v1f32(<1 x float> %a, <1 x float> %b) 17 ret <1 x float> %r 18} 19 20; WidenVecRes mismatched 21define <1 x float> @test_copysign_v1f32_v1f64(<1 x float> %a, <1 x double> %b) #0 { 22; CHECK-LABEL: test_copysign_v1f32_v1f64: 23; CHECK: ; %bb.0: 24; CHECK-NEXT: ; kill: def $d1 killed $d1 def $q1 25; CHECK-NEXT: mvni.2s v2, #128, lsl #24 26; CHECK-NEXT: fcvtn v1.2s, v1.2d 27; CHECK-NEXT: bif.8b v0, v1, v2 28; CHECK-NEXT: ret 29 %tmp0 = fptrunc <1 x double> %b to <1 x float> 30 %r = call <1 x float> @llvm.copysign.v1f32(<1 x float> %a, <1 x float> %tmp0) 31 ret <1 x float> %r 32} 33 34declare <1 x float> @llvm.copysign.v1f32(<1 x float> %a, <1 x float> %b) #0 35 36;============ v1f64 37 38; WidenVecOp #1 39define <1 x double> @test_copysign_v1f64_v1f32(<1 x double> %a, <1 x float> %b) #0 { 40; CHECK-LABEL: test_copysign_v1f64_v1f32: 41; CHECK: ; %bb.0: 42; CHECK-NEXT: movi.2d v2, #0xffffffffffffffff 43; CHECK-NEXT: fcvtl v1.2d, v1.2s 44; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 45; CHECK-NEXT: fneg.2d v2, v2 46; CHECK-NEXT: bif.16b v0, v1, v2 47; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0 48; CHECK-NEXT: ret 49 %tmp0 = fpext <1 x float> %b to <1 x double> 50 %r = call <1 x double> @llvm.copysign.v1f64(<1 x double> %a, <1 x double> %tmp0) 51 ret <1 x double> %r 52} 53 54define <1 x double> @test_copysign_v1f64_v1f64(<1 x double> %a, <1 x double> %b) #0 { 55; CHECK-LABEL: test_copysign_v1f64_v1f64: 56; CHECK: ; %bb.0: 57; CHECK-NEXT: movi.2d v2, #0xffffffffffffffff 58; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 59; CHECK-NEXT: ; kill: def $d1 killed $d1 def $q1 60; CHECK-NEXT: fneg.2d v2, v2 61; CHECK-NEXT: bif.16b v0, v1, v2 62; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0 63; CHECK-NEXT: ret 64 %r = call <1 x double> @llvm.copysign.v1f64(<1 x double> %a, <1 x double> %b) 65 ret <1 x double> %r 66} 67 68declare <1 x double> @llvm.copysign.v1f64(<1 x double> %a, <1 x double> %b) #0 69 70;============ v2f32 71 72define <2 x float> @test_copysign_v2f32_v2f32(<2 x float> %a, <2 x float> %b) #0 { 73; CHECK-LABEL: test_copysign_v2f32_v2f32: 74; CHECK: ; %bb.0: 75; CHECK-NEXT: mvni.2s v2, #128, lsl #24 76; CHECK-NEXT: bif.8b v0, v1, v2 77; CHECK-NEXT: ret 78 %r = call <2 x float> @llvm.copysign.v2f32(<2 x float> %a, <2 x float> %b) 79 ret <2 x float> %r 80} 81 82define <2 x float> @test_copysign_v2f32_v2f64(<2 x float> %a, <2 x double> %b) #0 { 83; CHECK-LABEL: test_copysign_v2f32_v2f64: 84; CHECK: ; %bb.0: 85; CHECK-NEXT: fcvtn v1.2s, v1.2d 86; CHECK-NEXT: mvni.2s v2, #128, lsl #24 87; CHECK-NEXT: bif.8b v0, v1, v2 88; CHECK-NEXT: ret 89 %tmp0 = fptrunc <2 x double> %b to <2 x float> 90 %r = call <2 x float> @llvm.copysign.v2f32(<2 x float> %a, <2 x float> %tmp0) 91 ret <2 x float> %r 92} 93 94declare <2 x float> @llvm.copysign.v2f32(<2 x float> %a, <2 x float> %b) #0 95 96;============ v4f32 97 98define <4 x float> @test_copysign_v4f32_v4f32(<4 x float> %a, <4 x float> %b) #0 { 99; CHECK-LABEL: test_copysign_v4f32_v4f32: 100; CHECK: ; %bb.0: 101; CHECK-NEXT: mvni.4s v2, #128, lsl #24 102; CHECK-NEXT: bif.16b v0, v1, v2 103; CHECK-NEXT: ret 104 %r = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %b) 105 ret <4 x float> %r 106} 107 108; SplitVecOp #1 109define <4 x float> @test_copysign_v4f32_v4f64(<4 x float> %a, <4 x double> %b) #0 { 110; CHECK-LABEL: test_copysign_v4f32_v4f64: 111; CHECK: ; %bb.0: 112; CHECK-NEXT: fcvtn v1.2s, v1.2d 113; CHECK-NEXT: fcvtn2 v1.4s, v2.2d 114; CHECK-NEXT: mvni.4s v2, #128, lsl #24 115; CHECK-NEXT: bif.16b v0, v1, v2 116; CHECK-NEXT: ret 117 %tmp0 = fptrunc <4 x double> %b to <4 x float> 118 %r = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %tmp0) 119 ret <4 x float> %r 120} 121 122declare <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %b) #0 123 124;============ v2f64 125 126define <2 x double> @test_copysign_v2f64_v232(<2 x double> %a, <2 x float> %b) #0 { 127; CHECK-LABEL: test_copysign_v2f64_v232: 128; CHECK: ; %bb.0: 129; CHECK-NEXT: movi.2d v2, #0xffffffffffffffff 130; CHECK-NEXT: fcvtl v1.2d, v1.2s 131; CHECK-NEXT: fneg.2d v2, v2 132; CHECK-NEXT: bif.16b v0, v1, v2 133; CHECK-NEXT: ret 134 %tmp0 = fpext <2 x float> %b to <2 x double> 135 %r = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %tmp0) 136 ret <2 x double> %r 137} 138 139define <2 x double> @test_copysign_v2f64_v2f64(<2 x double> %a, <2 x double> %b) #0 { 140; CHECK-LABEL: test_copysign_v2f64_v2f64: 141; CHECK: ; %bb.0: 142; CHECK-NEXT: movi.2d v2, #0xffffffffffffffff 143; CHECK-NEXT: fneg.2d v2, v2 144; CHECK-NEXT: bif.16b v0, v1, v2 145; CHECK-NEXT: ret 146 %r = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %b) 147 ret <2 x double> %r 148} 149 150declare <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %b) #0 151 152;============ v4f64 153 154; SplitVecRes mismatched 155define <4 x double> @test_copysign_v4f64_v4f32(<4 x double> %a, <4 x float> %b) #0 { 156; CHECK-LABEL: test_copysign_v4f64_v4f32: 157; CHECK: ; %bb.0: 158; CHECK-NEXT: movi.2d v3, #0xffffffffffffffff 159; CHECK-NEXT: fcvtl v4.2d, v2.2s 160; CHECK-NEXT: fcvtl2 v2.2d, v2.4s 161; CHECK-NEXT: fneg.2d v3, v3 162; CHECK-NEXT: bif.16b v1, v2, v3 163; CHECK-NEXT: bif.16b v0, v4, v3 164; CHECK-NEXT: ret 165 %tmp0 = fpext <4 x float> %b to <4 x double> 166 %r = call <4 x double> @llvm.copysign.v4f64(<4 x double> %a, <4 x double> %tmp0) 167 ret <4 x double> %r 168} 169 170; SplitVecRes same 171define <4 x double> @test_copysign_v4f64_v4f64(<4 x double> %a, <4 x double> %b) #0 { 172; CHECK-LABEL: test_copysign_v4f64_v4f64: 173; CHECK: ; %bb.0: 174; CHECK-NEXT: movi.2d v4, #0xffffffffffffffff 175; CHECK-NEXT: fneg.2d v4, v4 176; CHECK-NEXT: bif.16b v0, v2, v4 177; CHECK-NEXT: bif.16b v1, v3, v4 178; CHECK-NEXT: ret 179 %r = call <4 x double> @llvm.copysign.v4f64(<4 x double> %a, <4 x double> %b) 180 ret <4 x double> %r 181} 182 183declare <4 x double> @llvm.copysign.v4f64(<4 x double> %a, <4 x double> %b) #0 184 185;============ v4f16 186 187define <4 x half> @test_copysign_v4f16_v4f16(<4 x half> %a, <4 x half> %b) #0 { 188; CHECK-LABEL: test_copysign_v4f16_v4f16: 189; CHECK: ; %bb.0: 190; CHECK-NEXT: mvni.4h v2, #128, lsl #8 191; CHECK-NEXT: bif.8b v0, v1, v2 192; CHECK-NEXT: ret 193 %r = call <4 x half> @llvm.copysign.v4f16(<4 x half> %a, <4 x half> %b) 194 ret <4 x half> %r 195} 196 197define <4 x half> @test_copysign_v4f16_v4f32(<4 x half> %a, <4 x float> %b) #0 { 198; CHECK-LABEL: test_copysign_v4f16_v4f32: 199; CHECK: ; %bb.0: 200; CHECK-NEXT: fcvtn v1.4h, v1.4s 201; CHECK-NEXT: mvni.4h v2, #128, lsl #8 202; CHECK-NEXT: bif.8b v0, v1, v2 203; CHECK-NEXT: ret 204 %tmp0 = fptrunc <4 x float> %b to <4 x half> 205 %r = call <4 x half> @llvm.copysign.v4f16(<4 x half> %a, <4 x half> %tmp0) 206 ret <4 x half> %r 207} 208 209define <4 x half> @test_copysign_v4f16_v4f64(<4 x half> %a, <4 x double> %b) #0 { 210; CHECK-LABEL: test_copysign_v4f16_v4f64: 211; CHECK: ; %bb.0: 212; CHECK-NEXT: fcvtxn v1.2s, v1.2d 213; CHECK-NEXT: fcvtxn2 v1.4s, v2.2d 214; CHECK-NEXT: mvni.4h v2, #128, lsl #8 215; CHECK-NEXT: fcvtn v1.4h, v1.4s 216; CHECK-NEXT: bif.8b v0, v1, v2 217; CHECK-NEXT: ret 218 %tmp0 = fptrunc <4 x double> %b to <4 x half> 219 %r = call <4 x half> @llvm.copysign.v4f16(<4 x half> %a, <4 x half> %tmp0) 220 ret <4 x half> %r 221} 222 223declare <4 x half> @llvm.copysign.v4f16(<4 x half> %a, <4 x half> %b) #0 224 225;============ v8f16 226 227define <8 x half> @test_copysign_v8f16_v8f16(<8 x half> %a, <8 x half> %b) #0 { 228; CHECK-LABEL: test_copysign_v8f16_v8f16: 229; CHECK: ; %bb.0: 230; CHECK-NEXT: mvni.8h v2, #128, lsl #8 231; CHECK-NEXT: bif.16b v0, v1, v2 232; CHECK-NEXT: ret 233 %r = call <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %b) 234 ret <8 x half> %r 235} 236 237define <8 x half> @test_copysign_v8f16_v8f32(<8 x half> %a, <8 x float> %b) #0 { 238; CHECK-LABEL: test_copysign_v8f16_v8f32: 239; CHECK: ; %bb.0: 240; CHECK-NEXT: fcvtn v1.4h, v1.4s 241; CHECK-NEXT: fcvtn2 v1.8h, v2.4s 242; CHECK-NEXT: mvni.8h v2, #128, lsl #8 243; CHECK-NEXT: bif.16b v0, v1, v2 244; CHECK-NEXT: ret 245 %tmp0 = fptrunc <8 x float> %b to <8 x half> 246 %r = call <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %tmp0) 247 ret <8 x half> %r 248} 249 250declare <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %b) #0 251 252;============ v4bf16 253 254define <4 x bfloat> @test_copysign_v4bf16_v4bf16(<4 x bfloat> %a, <4 x bfloat> %b) #0 { 255; CHECK-LABEL: test_copysign_v4bf16_v4bf16: 256; CHECK: ; %bb.0: 257; CHECK-NEXT: mvni.4h v2, #128, lsl #8 258; CHECK-NEXT: bif.8b v0, v1, v2 259; CHECK-NEXT: ret 260 %r = call <4 x bfloat> @llvm.copysign.v4bf16(<4 x bfloat> %a, <4 x bfloat> %b) 261 ret <4 x bfloat> %r 262} 263 264define <4 x bfloat> @test_copysign_v4bf16_v4f32(<4 x bfloat> %a, <4 x float> %b) #0 { 265; CHECK-LABEL: test_copysign_v4bf16_v4f32: 266; CHECK: ; %bb.0: 267; CHECK-NEXT: movi.4s v2, #1 268; CHECK-NEXT: movi.4s v3, #127, msl #8 269; CHECK-NEXT: ushr.4s v4, v1, #16 270; CHECK-NEXT: and.16b v2, v4, v2 271; CHECK-NEXT: add.4s v3, v1, v3 272; CHECK-NEXT: fcmeq.4s v4, v1, v1 273; CHECK-NEXT: orr.4s v1, #64, lsl #16 274; CHECK-NEXT: add.4s v2, v2, v3 275; CHECK-NEXT: bit.16b v1, v2, v4 276; CHECK-NEXT: mvni.4h v2, #128, lsl #8 277; CHECK-NEXT: shrn.4h v1, v1, #16 278; CHECK-NEXT: bif.8b v0, v1, v2 279; CHECK-NEXT: ret 280 %tmp0 = fptrunc <4 x float> %b to <4 x bfloat> 281 %r = call <4 x bfloat> @llvm.copysign.v4bf16(<4 x bfloat> %a, <4 x bfloat> %tmp0) 282 ret <4 x bfloat> %r 283} 284 285define <4 x bfloat> @test_copysign_v4bf16_v4f64(<4 x bfloat> %a, <4 x double> %b) #0 { 286; CHECK-LABEL: test_copysign_v4bf16_v4f64: 287; CHECK: ; %bb.0: 288; CHECK-NEXT: fcvtxn v1.2s, v1.2d 289; CHECK-NEXT: movi.4s v3, #127, msl #8 290; CHECK-NEXT: fcvtxn2 v1.4s, v2.2d 291; CHECK-NEXT: movi.4s v2, #1 292; CHECK-NEXT: ushr.4s v4, v1, #16 293; CHECK-NEXT: add.4s v3, v1, v3 294; CHECK-NEXT: and.16b v2, v4, v2 295; CHECK-NEXT: fcmeq.4s v4, v1, v1 296; CHECK-NEXT: orr.4s v1, #64, lsl #16 297; CHECK-NEXT: add.4s v2, v2, v3 298; CHECK-NEXT: bit.16b v1, v2, v4 299; CHECK-NEXT: mvni.4h v2, #128, lsl #8 300; CHECK-NEXT: shrn.4h v1, v1, #16 301; CHECK-NEXT: bif.8b v0, v1, v2 302; CHECK-NEXT: ret 303 %tmp0 = fptrunc <4 x double> %b to <4 x bfloat> 304 %r = call <4 x bfloat> @llvm.copysign.v4bf16(<4 x bfloat> %a, <4 x bfloat> %tmp0) 305 ret <4 x bfloat> %r 306} 307 308declare <4 x bfloat> @llvm.copysign.v4bf16(<4 x bfloat> %a, <4 x bfloat> %b) #0 309 310;============ v8bf16 311 312define <8 x bfloat> @test_copysign_v8bf16_v8bf16(<8 x bfloat> %a, <8 x bfloat> %b) #0 { 313; CHECK-LABEL: test_copysign_v8bf16_v8bf16: 314; CHECK: ; %bb.0: 315; CHECK-NEXT: mvni.8h v2, #128, lsl #8 316; CHECK-NEXT: bif.16b v0, v1, v2 317; CHECK-NEXT: ret 318 %r = call <8 x bfloat> @llvm.copysign.v8bf16(<8 x bfloat> %a, <8 x bfloat> %b) 319 ret <8 x bfloat> %r 320} 321 322define <8 x bfloat> @test_copysign_v8bf16_v8f32(<8 x bfloat> %a, <8 x float> %b) #0 { 323; CHECK-LABEL: test_copysign_v8bf16_v8f32: 324; CHECK: ; %bb.0: 325; CHECK-NEXT: movi.4s v3, #1 326; CHECK-NEXT: movi.4s v4, #127, msl #8 327; CHECK-NEXT: ushr.4s v5, v2, #16 328; CHECK-NEXT: ushr.4s v6, v1, #16 329; CHECK-NEXT: and.16b v5, v5, v3 330; CHECK-NEXT: add.4s v7, v2, v4 331; CHECK-NEXT: and.16b v3, v6, v3 332; CHECK-NEXT: add.4s v4, v1, v4 333; CHECK-NEXT: fcmeq.4s v6, v2, v2 334; CHECK-NEXT: orr.4s v2, #64, lsl #16 335; CHECK-NEXT: add.4s v5, v5, v7 336; CHECK-NEXT: fcmeq.4s v7, v1, v1 337; CHECK-NEXT: orr.4s v1, #64, lsl #16 338; CHECK-NEXT: add.4s v3, v3, v4 339; CHECK-NEXT: bit.16b v2, v5, v6 340; CHECK-NEXT: bit.16b v1, v3, v7 341; CHECK-NEXT: uzp2.8h v1, v1, v2 342; CHECK-NEXT: mvni.8h v2, #128, lsl #8 343; CHECK-NEXT: bif.16b v0, v1, v2 344; CHECK-NEXT: ret 345 %tmp0 = fptrunc <8 x float> %b to <8 x bfloat> 346 %r = call <8 x bfloat> @llvm.copysign.v8bf16(<8 x bfloat> %a, <8 x bfloat> %tmp0) 347 ret <8 x bfloat> %r 348} 349 350declare <8 x bfloat> @llvm.copysign.v8bf16(<8 x bfloat> %a, <8 x bfloat> %b) #0 351 352attributes #0 = { nounwind } 353;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 354; FP16: {{.*}} 355; NOFP16: {{.*}} 356