1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256,CHECK_NO_EXTEND_ROUND 3; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,CHECK_NO_EXTEND_ROUND 4; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,CHECK_NO_EXTEND_ROUND 5; RUN: llc -aarch64-sve-vector-bits-min=256 --combiner-vector-fcopysign-extend-round < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256,CHECK_EXTEND_ROUND 6; RUN: llc -aarch64-sve-vector-bits-min=512 --combiner-vector-fcopysign-extend-round < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,CHECK_EXTEND_ROUND 7; RUN: llc -aarch64-sve-vector-bits-min=2048 --combiner-vector-fcopysign-extend-round < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,CHECK_EXTEND_ROUND 8 9target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" 10 11target triple = "aarch64-unknown-linux-gnu" 12 13;============ f16 14 15define void @test_copysign_v4f16_v4f16(ptr %ap, ptr %bp) vscale_range(2,0) #0 { 16; CHECK-LABEL: test_copysign_v4f16_v4f16: 17; CHECK: // %bb.0: 18; CHECK-NEXT: mvni v0.4h, #128, lsl #8 19; CHECK-NEXT: ldr d1, [x0] 20; CHECK-NEXT: ldr d2, [x1] 21; CHECK-NEXT: bsl v0.8b, v1.8b, v2.8b 22; CHECK-NEXT: str d0, [x0] 23; CHECK-NEXT: ret 24 %a = load <4 x half>, ptr %ap 25 %b = load <4 x half>, ptr %bp 26 %r = call <4 x half> @llvm.copysign.v4f16(<4 x half> %a, <4 x half> %b) 27 store <4 x half> %r, ptr %ap 28 ret void 29} 30 31define void @test_copysign_v8f16_v8f16(ptr %ap, ptr %bp) vscale_range(2,0) #0 { 32; CHECK-LABEL: test_copysign_v8f16_v8f16: 33; CHECK: // %bb.0: 34; CHECK-NEXT: mvni v0.8h, #128, lsl #8 35; CHECK-NEXT: ldr q1, [x0] 36; CHECK-NEXT: ldr q2, [x1] 37; CHECK-NEXT: bsl v0.16b, v1.16b, v2.16b 38; CHECK-NEXT: str q0, [x0] 39; CHECK-NEXT: ret 40 %a = load <8 x half>, ptr %ap 41 %b = load <8 x half>, ptr %bp 42 %r = call <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %b) 43 store <8 x half> %r, ptr %ap 44 ret void 45} 46 47define void @test_copysign_v16f16_v16f16(ptr %ap, ptr %bp) vscale_range(2,0) #0 { 48; CHECK-LABEL: test_copysign_v16f16_v16f16: 49; CHECK: // %bb.0: 50; CHECK-NEXT: ptrue p0.h, vl16 51; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 52; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] 53; CHECK-NEXT: and z1.h, z1.h, #0x8000 54; CHECK-NEXT: and z0.h, z0.h, #0x7fff 55; CHECK-NEXT: orr z0.d, z0.d, z1.d 56; CHECK-NEXT: st1h { z0.h }, p0, [x0] 57; CHECK-NEXT: ret 58 %a = load <16 x half>, ptr %ap 59 %b = load <16 x half>, ptr %bp 60 %r = call <16 x half> @llvm.copysign.v16f16(<16 x half> %a, <16 x half> %b) 61 store <16 x half> %r, ptr %ap 62 ret void 63} 64 65define void @test_copysign_v32f16_v32f16(ptr %ap, ptr %bp) #0 { 66; VBITS_GE_256-LABEL: test_copysign_v32f16_v32f16: 67; VBITS_GE_256: // %bb.0: 68; VBITS_GE_256-NEXT: ptrue p0.h, vl16 69; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 70; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] 71; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x1, x8, lsl #1] 72; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x0] 73; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x1] 74; VBITS_GE_256-NEXT: and z1.h, z1.h, #0x8000 75; VBITS_GE_256-NEXT: and z0.h, z0.h, #0x7fff 76; VBITS_GE_256-NEXT: and z2.h, z2.h, #0x7fff 77; VBITS_GE_256-NEXT: and z3.h, z3.h, #0x8000 78; VBITS_GE_256-NEXT: orr z0.d, z0.d, z1.d 79; VBITS_GE_256-NEXT: orr z1.d, z2.d, z3.d 80; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1] 81; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0] 82; VBITS_GE_256-NEXT: ret 83; 84; VBITS_GE_512-LABEL: test_copysign_v32f16_v32f16: 85; VBITS_GE_512: // %bb.0: 86; VBITS_GE_512-NEXT: ptrue p0.h, vl32 87; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0] 88; VBITS_GE_512-NEXT: ld1h { z1.h }, p0/z, [x1] 89; VBITS_GE_512-NEXT: and z1.h, z1.h, #0x8000 90; VBITS_GE_512-NEXT: and z0.h, z0.h, #0x7fff 91; VBITS_GE_512-NEXT: orr z0.d, z0.d, z1.d 92; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0] 93; VBITS_GE_512-NEXT: ret 94 %a = load <32 x half>, ptr %ap 95 %b = load <32 x half>, ptr %bp 96 %r = call <32 x half> @llvm.copysign.v32f16(<32 x half> %a, <32 x half> %b) 97 store <32 x half> %r, ptr %ap 98 ret void 99} 100 101define void @test_copysign_v64f16_v64f16(ptr %ap, ptr %bp) vscale_range(8,0) #0 { 102; CHECK-LABEL: test_copysign_v64f16_v64f16: 103; CHECK: // %bb.0: 104; CHECK-NEXT: ptrue p0.h, vl64 105; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 106; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] 107; CHECK-NEXT: and z1.h, z1.h, #0x8000 108; CHECK-NEXT: and z0.h, z0.h, #0x7fff 109; CHECK-NEXT: orr z0.d, z0.d, z1.d 110; CHECK-NEXT: st1h { z0.h }, p0, [x0] 111; CHECK-NEXT: ret 112 %a = load <64 x half>, ptr %ap 113 %b = load <64 x half>, ptr %bp 114 %r = call <64 x half> @llvm.copysign.v64f16(<64 x half> %a, <64 x half> %b) 115 store <64 x half> %r, ptr %ap 116 ret void 117} 118 119define void @test_copysign_v128f16_v128f16(ptr %ap, ptr %bp) vscale_range(16,0) #0 { 120; CHECK-LABEL: test_copysign_v128f16_v128f16: 121; CHECK: // %bb.0: 122; CHECK-NEXT: ptrue p0.h, vl128 123; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 124; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] 125; CHECK-NEXT: and z1.h, z1.h, #0x8000 126; CHECK-NEXT: and z0.h, z0.h, #0x7fff 127; CHECK-NEXT: orr z0.d, z0.d, z1.d 128; CHECK-NEXT: st1h { z0.h }, p0, [x0] 129; CHECK-NEXT: ret 130 %a = load <128 x half>, ptr %ap 131 %b = load <128 x half>, ptr %bp 132 %r = call <128 x half> @llvm.copysign.v128f16(<128 x half> %a, <128 x half> %b) 133 store <128 x half> %r, ptr %ap 134 ret void 135} 136 137;============ f32 138 139define void @test_copysign_v2f32_v2f32(ptr %ap, ptr %bp) vscale_range(2,0) #0 { 140; CHECK-LABEL: test_copysign_v2f32_v2f32: 141; CHECK: // %bb.0: 142; CHECK-NEXT: mvni v0.2s, #128, lsl #24 143; CHECK-NEXT: ldr d1, [x0] 144; CHECK-NEXT: ldr d2, [x1] 145; CHECK-NEXT: bsl v0.8b, v1.8b, v2.8b 146; CHECK-NEXT: str d0, [x0] 147; CHECK-NEXT: ret 148 %a = load <2 x float>, ptr %ap 149 %b = load <2 x float>, ptr %bp 150 %r = call <2 x float> @llvm.copysign.v2f32(<2 x float> %a, <2 x float> %b) 151 store <2 x float> %r, ptr %ap 152 ret void 153} 154 155define void @test_copysign_v4f32_v4f32(ptr %ap, ptr %bp) vscale_range(2,0) #0 { 156; CHECK-LABEL: test_copysign_v4f32_v4f32: 157; CHECK: // %bb.0: 158; CHECK-NEXT: mvni v0.4s, #128, lsl #24 159; CHECK-NEXT: ldr q1, [x0] 160; CHECK-NEXT: ldr q2, [x1] 161; CHECK-NEXT: bsl v0.16b, v1.16b, v2.16b 162; CHECK-NEXT: str q0, [x0] 163; CHECK-NEXT: ret 164 %a = load <4 x float>, ptr %ap 165 %b = load <4 x float>, ptr %bp 166 %r = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %b) 167 store <4 x float> %r, ptr %ap 168 ret void 169} 170 171define void @test_copysign_v8f32_v8f32(ptr %ap, ptr %bp) vscale_range(2,0) #0 { 172; CHECK-LABEL: test_copysign_v8f32_v8f32: 173; CHECK: // %bb.0: 174; CHECK-NEXT: ptrue p0.s, vl8 175; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 176; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] 177; CHECK-NEXT: and z1.s, z1.s, #0x80000000 178; CHECK-NEXT: and z0.s, z0.s, #0x7fffffff 179; CHECK-NEXT: orr z0.d, z0.d, z1.d 180; CHECK-NEXT: st1w { z0.s }, p0, [x0] 181; CHECK-NEXT: ret 182 %a = load <8 x float>, ptr %ap 183 %b = load <8 x float>, ptr %bp 184 %r = call <8 x float> @llvm.copysign.v8f32(<8 x float> %a, <8 x float> %b) 185 store <8 x float> %r, ptr %ap 186 ret void 187} 188 189define void @test_copysign_v16f32_v16f32(ptr %ap, ptr %bp) #0 { 190; VBITS_GE_256-LABEL: test_copysign_v16f32_v16f32: 191; VBITS_GE_256: // %bb.0: 192; VBITS_GE_256-NEXT: ptrue p0.s, vl8 193; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 194; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] 195; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x1, x8, lsl #2] 196; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x0] 197; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x1] 198; VBITS_GE_256-NEXT: and z1.s, z1.s, #0x80000000 199; VBITS_GE_256-NEXT: and z0.s, z0.s, #0x7fffffff 200; VBITS_GE_256-NEXT: and z2.s, z2.s, #0x7fffffff 201; VBITS_GE_256-NEXT: and z3.s, z3.s, #0x80000000 202; VBITS_GE_256-NEXT: orr z0.d, z0.d, z1.d 203; VBITS_GE_256-NEXT: orr z1.d, z2.d, z3.d 204; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] 205; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0] 206; VBITS_GE_256-NEXT: ret 207; 208; VBITS_GE_512-LABEL: test_copysign_v16f32_v16f32: 209; VBITS_GE_512: // %bb.0: 210; VBITS_GE_512-NEXT: ptrue p0.s, vl16 211; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0] 212; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x1] 213; VBITS_GE_512-NEXT: and z1.s, z1.s, #0x80000000 214; VBITS_GE_512-NEXT: and z0.s, z0.s, #0x7fffffff 215; VBITS_GE_512-NEXT: orr z0.d, z0.d, z1.d 216; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0] 217; VBITS_GE_512-NEXT: ret 218 %a = load <16 x float>, ptr %ap 219 %b = load <16 x float>, ptr %bp 220 %r = call <16 x float> @llvm.copysign.v16f32(<16 x float> %a, <16 x float> %b) 221 store <16 x float> %r, ptr %ap 222 ret void 223} 224 225define void @test_copysign_v32f32_v32f32(ptr %ap, ptr %bp) vscale_range(8,0) #0 { 226; CHECK-LABEL: test_copysign_v32f32_v32f32: 227; CHECK: // %bb.0: 228; CHECK-NEXT: ptrue p0.s, vl32 229; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 230; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] 231; CHECK-NEXT: and z1.s, z1.s, #0x80000000 232; CHECK-NEXT: and z0.s, z0.s, #0x7fffffff 233; CHECK-NEXT: orr z0.d, z0.d, z1.d 234; CHECK-NEXT: st1w { z0.s }, p0, [x0] 235; CHECK-NEXT: ret 236 %a = load <32 x float>, ptr %ap 237 %b = load <32 x float>, ptr %bp 238 %r = call <32 x float> @llvm.copysign.v32f32(<32 x float> %a, <32 x float> %b) 239 store <32 x float> %r, ptr %ap 240 ret void 241} 242 243define void @test_copysign_v64f32_v64f32(ptr %ap, ptr %bp) vscale_range(16,0) #0 { 244; CHECK-LABEL: test_copysign_v64f32_v64f32: 245; CHECK: // %bb.0: 246; CHECK-NEXT: ptrue p0.s, vl64 247; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 248; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] 249; CHECK-NEXT: and z1.s, z1.s, #0x80000000 250; CHECK-NEXT: and z0.s, z0.s, #0x7fffffff 251; CHECK-NEXT: orr z0.d, z0.d, z1.d 252; CHECK-NEXT: st1w { z0.s }, p0, [x0] 253; CHECK-NEXT: ret 254 %a = load <64 x float>, ptr %ap 255 %b = load <64 x float>, ptr %bp 256 %r = call <64 x float> @llvm.copysign.v64f32(<64 x float> %a, <64 x float> %b) 257 store <64 x float> %r, ptr %ap 258 ret void 259} 260 261;============ f64 262 263define void @test_copysign_v2f64_v2f64(ptr %ap, ptr %bp) vscale_range(2,0) #0 { 264; CHECK-LABEL: test_copysign_v2f64_v2f64: 265; CHECK: // %bb.0: 266; CHECK-NEXT: movi v0.2d, #0xffffffffffffffff 267; CHECK-NEXT: ldr q1, [x0] 268; CHECK-NEXT: ldr q2, [x1] 269; CHECK-NEXT: fneg v0.2d, v0.2d 270; CHECK-NEXT: bsl v0.16b, v1.16b, v2.16b 271; CHECK-NEXT: str q0, [x0] 272; CHECK-NEXT: ret 273 %a = load <2 x double>, ptr %ap 274 %b = load <2 x double>, ptr %bp 275 %r = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %b) 276 store <2 x double> %r, ptr %ap 277 ret void 278} 279 280define void @test_copysign_v4f64_v4f64(ptr %ap, ptr %bp) vscale_range(2,0) #0 { 281; CHECK-LABEL: test_copysign_v4f64_v4f64: 282; CHECK: // %bb.0: 283; CHECK-NEXT: ptrue p0.d, vl4 284; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 285; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] 286; CHECK-NEXT: and z1.d, z1.d, #0x8000000000000000 287; CHECK-NEXT: and z0.d, z0.d, #0x7fffffffffffffff 288; CHECK-NEXT: orr z0.d, z0.d, z1.d 289; CHECK-NEXT: st1d { z0.d }, p0, [x0] 290; CHECK-NEXT: ret 291 %a = load <4 x double>, ptr %ap 292 %b = load <4 x double>, ptr %bp 293 %r = call <4 x double> @llvm.copysign.v4f64(<4 x double> %a, <4 x double> %b) 294 store <4 x double> %r, ptr %ap 295 ret void 296} 297 298define void @test_copysign_v8f64_v8f64(ptr %ap, ptr %bp) #0 { 299; VBITS_GE_256-LABEL: test_copysign_v8f64_v8f64: 300; VBITS_GE_256: // %bb.0: 301; VBITS_GE_256-NEXT: ptrue p0.d, vl4 302; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 303; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] 304; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x1, x8, lsl #3] 305; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x0] 306; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1] 307; VBITS_GE_256-NEXT: and z1.d, z1.d, #0x8000000000000000 308; VBITS_GE_256-NEXT: and z0.d, z0.d, #0x7fffffffffffffff 309; VBITS_GE_256-NEXT: and z2.d, z2.d, #0x7fffffffffffffff 310; VBITS_GE_256-NEXT: and z3.d, z3.d, #0x8000000000000000 311; VBITS_GE_256-NEXT: orr z0.d, z0.d, z1.d 312; VBITS_GE_256-NEXT: orr z1.d, z2.d, z3.d 313; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3] 314; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0] 315; VBITS_GE_256-NEXT: ret 316; 317; VBITS_GE_512-LABEL: test_copysign_v8f64_v8f64: 318; VBITS_GE_512: // %bb.0: 319; VBITS_GE_512-NEXT: ptrue p0.d, vl8 320; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0] 321; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x1] 322; VBITS_GE_512-NEXT: and z1.d, z1.d, #0x8000000000000000 323; VBITS_GE_512-NEXT: and z0.d, z0.d, #0x7fffffffffffffff 324; VBITS_GE_512-NEXT: orr z0.d, z0.d, z1.d 325; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0] 326; VBITS_GE_512-NEXT: ret 327 %a = load <8 x double>, ptr %ap 328 %b = load <8 x double>, ptr %bp 329 %r = call <8 x double> @llvm.copysign.v8f64(<8 x double> %a, <8 x double> %b) 330 store <8 x double> %r, ptr %ap 331 ret void 332} 333 334define void @test_copysign_v16f64_v16f64(ptr %ap, ptr %bp) vscale_range(8,0) #0 { 335; CHECK-LABEL: test_copysign_v16f64_v16f64: 336; CHECK: // %bb.0: 337; CHECK-NEXT: ptrue p0.d, vl16 338; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 339; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] 340; CHECK-NEXT: and z1.d, z1.d, #0x8000000000000000 341; CHECK-NEXT: and z0.d, z0.d, #0x7fffffffffffffff 342; CHECK-NEXT: orr z0.d, z0.d, z1.d 343; CHECK-NEXT: st1d { z0.d }, p0, [x0] 344; CHECK-NEXT: ret 345 %a = load <16 x double>, ptr %ap 346 %b = load <16 x double>, ptr %bp 347 %r = call <16 x double> @llvm.copysign.v16f64(<16 x double> %a, <16 x double> %b) 348 store <16 x double> %r, ptr %ap 349 ret void 350} 351 352define void @test_copysign_v32f64_v32f64(ptr %ap, ptr %bp) vscale_range(16,0) #0 { 353; CHECK-LABEL: test_copysign_v32f64_v32f64: 354; CHECK: // %bb.0: 355; CHECK-NEXT: ptrue p0.d, vl32 356; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 357; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] 358; CHECK-NEXT: and z1.d, z1.d, #0x8000000000000000 359; CHECK-NEXT: and z0.d, z0.d, #0x7fffffffffffffff 360; CHECK-NEXT: orr z0.d, z0.d, z1.d 361; CHECK-NEXT: st1d { z0.d }, p0, [x0] 362; CHECK-NEXT: ret 363 %a = load <32 x double>, ptr %ap 364 %b = load <32 x double>, ptr %bp 365 %r = call <32 x double> @llvm.copysign.v32f64(<32 x double> %a, <32 x double> %b) 366 store <32 x double> %r, ptr %ap 367 ret void 368} 369 370;============ v2f32 371 372define void @test_copysign_v2f32_v2f64(ptr %ap, ptr %bp) vscale_range(2,0) #0 { 373; CHECK-LABEL: test_copysign_v2f32_v2f64: 374; CHECK: // %bb.0: 375; CHECK-NEXT: ldr q0, [x1] 376; CHECK-NEXT: mvni v1.2s, #128, lsl #24 377; CHECK-NEXT: ldr d2, [x0] 378; CHECK-NEXT: fcvtn v0.2s, v0.2d 379; CHECK-NEXT: bit v0.8b, v2.8b, v1.8b 380; CHECK-NEXT: str d0, [x0] 381; CHECK-NEXT: ret 382 %a = load <2 x float>, ptr %ap 383 %b = load <2 x double>, ptr %bp 384 %tmp0 = fptrunc <2 x double> %b to <2 x float> 385 %r = call <2 x float> @llvm.copysign.v2f32(<2 x float> %a, <2 x float> %tmp0) 386 store <2 x float> %r, ptr %ap 387 ret void 388} 389 390;============ v4f32 391 392; SplitVecOp #1 393define void @test_copysign_v4f32_v4f64(ptr %ap, ptr %bp) vscale_range(2,0) #0 { 394; CHECK-LABEL: test_copysign_v4f32_v4f64: 395; CHECK: // %bb.0: 396; CHECK-NEXT: ptrue p0.d, vl4 397; CHECK-NEXT: mvni v1.4s, #128, lsl #24 398; CHECK-NEXT: ldr q2, [x0] 399; CHECK-NEXT: ld1d { z0.d }, p0/z, [x1] 400; CHECK-NEXT: ptrue p0.d 401; CHECK-NEXT: fcvt z0.s, p0/m, z0.d 402; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s 403; CHECK-NEXT: bit v0.16b, v2.16b, v1.16b 404; CHECK-NEXT: str q0, [x0] 405; CHECK-NEXT: ret 406 %a = load <4 x float>, ptr %ap 407 %b = load <4 x double>, ptr %bp 408 %tmp0 = fptrunc <4 x double> %b to <4 x float> 409 %r = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %tmp0) 410 store <4 x float> %r, ptr %ap 411 ret void 412} 413 414;============ v2f64 415 416define void @test_copysign_v2f64_v2f32(ptr %ap, ptr %bp) vscale_range(2,0) #0 { 417; CHECK-LABEL: test_copysign_v2f64_v2f32: 418; CHECK: // %bb.0: 419; CHECK-NEXT: movi v0.2d, #0xffffffffffffffff 420; CHECK-NEXT: ldr d1, [x1] 421; CHECK-NEXT: ldr q2, [x0] 422; CHECK-NEXT: fcvtl v1.2d, v1.2s 423; CHECK-NEXT: fneg v0.2d, v0.2d 424; CHECK-NEXT: bsl v0.16b, v2.16b, v1.16b 425; CHECK-NEXT: str q0, [x0] 426; CHECK-NEXT: ret 427 %a = load <2 x double>, ptr %ap 428 %b = load < 2 x float>, ptr %bp 429 %tmp0 = fpext <2 x float> %b to <2 x double> 430 %r = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %tmp0) 431 store <2 x double> %r, ptr %ap 432 ret void 433} 434 435;============ v4f64 436 437; SplitVecRes mismatched 438define void @test_copysign_v4f64_v4f32(ptr %ap, ptr %bp) vscale_range(2,0) #0 { 439; CHECK_NO_EXTEND_ROUND-LABEL: test_copysign_v4f64_v4f32: 440; CHECK_NO_EXTEND_ROUND: // %bb.0: 441; CHECK_NO_EXTEND_ROUND-NEXT: ptrue p0.d, vl4 442; CHECK_NO_EXTEND_ROUND-NEXT: ld1w { z0.d }, p0/z, [x1] 443; CHECK_NO_EXTEND_ROUND-NEXT: ld1d { z1.d }, p0/z, [x0] 444; CHECK_NO_EXTEND_ROUND-NEXT: fcvt z0.d, p0/m, z0.s 445; CHECK_NO_EXTEND_ROUND-NEXT: and z1.d, z1.d, #0x7fffffffffffffff 446; CHECK_NO_EXTEND_ROUND-NEXT: and z0.d, z0.d, #0x8000000000000000 447; CHECK_NO_EXTEND_ROUND-NEXT: orr z0.d, z1.d, z0.d 448; CHECK_NO_EXTEND_ROUND-NEXT: st1d { z0.d }, p0, [x0] 449; CHECK_NO_EXTEND_ROUND-NEXT: ret 450; 451; CHECK_EXTEND_ROUND-LABEL: test_copysign_v4f64_v4f32: 452; CHECK_EXTEND_ROUND: // %bb.0: 453; CHECK_EXTEND_ROUND-NEXT: ldr q0, [x1] 454; CHECK_EXTEND_ROUND-NEXT: ptrue p0.d, vl4 455; CHECK_EXTEND_ROUND-NEXT: uunpklo z0.d, z0.s 456; CHECK_EXTEND_ROUND-NEXT: ld1d { z1.d }, p0/z, [x0] 457; CHECK_EXTEND_ROUND-NEXT: and z1.d, z1.d, #0x7fffffffffffffff 458; CHECK_EXTEND_ROUND-NEXT: fcvt z0.d, p0/m, z0.s 459; CHECK_EXTEND_ROUND-NEXT: and z0.d, z0.d, #0x8000000000000000 460; CHECK_EXTEND_ROUND-NEXT: orr z0.d, z1.d, z0.d 461; CHECK_EXTEND_ROUND-NEXT: st1d { z0.d }, p0, [x0] 462; CHECK_EXTEND_ROUND-NEXT: ret 463 %a = load <4 x double>, ptr %ap 464 %b = load <4 x float>, ptr %bp 465 %tmp0 = fpext <4 x float> %b to <4 x double> 466 %r = call <4 x double> @llvm.copysign.v4f64(<4 x double> %a, <4 x double> %tmp0) 467 store <4 x double> %r, ptr %ap 468 ret void 469} 470 471;============ v4f16 472 473define void @test_copysign_v4f16_v4f32(ptr %ap, ptr %bp) vscale_range(2,0) #0 { 474; CHECK-LABEL: test_copysign_v4f16_v4f32: 475; CHECK: // %bb.0: 476; CHECK-NEXT: ldr q0, [x1] 477; CHECK-NEXT: mvni v1.4h, #128, lsl #8 478; CHECK-NEXT: ldr d2, [x0] 479; CHECK-NEXT: fcvtn v0.4h, v0.4s 480; CHECK-NEXT: bit v0.8b, v2.8b, v1.8b 481; CHECK-NEXT: str d0, [x0] 482; CHECK-NEXT: ret 483 %a = load <4 x half>, ptr %ap 484 %b = load <4 x float>, ptr %bp 485 %tmp0 = fptrunc <4 x float> %b to <4 x half> 486 %r = call <4 x half> @llvm.copysign.v4f16(<4 x half> %a, <4 x half> %tmp0) 487 store <4 x half> %r, ptr %ap 488 ret void 489} 490 491define void @test_copysign_v4f16_v4f64(ptr %ap, ptr %bp) vscale_range(2,0) #0 { 492; CHECK-LABEL: test_copysign_v4f16_v4f64: 493; CHECK: // %bb.0: 494; CHECK-NEXT: ptrue p0.d, vl4 495; CHECK-NEXT: mvni v1.4h, #128, lsl #8 496; CHECK-NEXT: ldr d2, [x0] 497; CHECK-NEXT: ld1d { z0.d }, p0/z, [x1] 498; CHECK-NEXT: ptrue p0.d 499; CHECK-NEXT: fcvt z0.h, p0/m, z0.d 500; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s 501; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h 502; CHECK-NEXT: bit v0.8b, v2.8b, v1.8b 503; CHECK-NEXT: str d0, [x0] 504; CHECK-NEXT: ret 505 %a = load <4 x half>, ptr %ap 506 %b = load <4 x double>, ptr %bp 507 %tmp0 = fptrunc <4 x double> %b to <4 x half> 508 %r = call <4 x half> @llvm.copysign.v4f16(<4 x half> %a, <4 x half> %tmp0) 509 store <4 x half> %r, ptr %ap 510 ret void 511} 512 513declare <4 x half> @llvm.copysign.v4f16(<4 x half> %a, <4 x half> %b) #0 514 515;============ v8f16 516 517 518define void @test_copysign_v8f16_v8f32(ptr %ap, ptr %bp) vscale_range(2,0) #0 { 519; CHECK-LABEL: test_copysign_v8f16_v8f32: 520; CHECK: // %bb.0: 521; CHECK-NEXT: ptrue p0.s, vl8 522; CHECK-NEXT: mvni v1.8h, #128, lsl #8 523; CHECK-NEXT: ldr q2, [x0] 524; CHECK-NEXT: ld1w { z0.s }, p0/z, [x1] 525; CHECK-NEXT: ptrue p0.s 526; CHECK-NEXT: fcvt z0.h, p0/m, z0.s 527; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h 528; CHECK-NEXT: bit v0.16b, v2.16b, v1.16b 529; CHECK-NEXT: str q0, [x0] 530; CHECK-NEXT: ret 531 %a = load <8 x half>, ptr %ap 532 %b = load <8 x float>, ptr %bp 533 %tmp0 = fptrunc <8 x float> %b to <8 x half> 534 %r = call <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %tmp0) 535 store <8 x half> %r, ptr %ap 536 ret void 537} 538 539declare <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %b) #0 540declare <16 x half> @llvm.copysign.v16f16(<16 x half> %a, <16 x half> %b) #0 541declare <32 x half> @llvm.copysign.v32f16(<32 x half> %a, <32 x half> %b) #0 542declare <64 x half> @llvm.copysign.v64f16(<64 x half> %a, <64 x half> %b) #0 543declare <128 x half> @llvm.copysign.v128f16(<128 x half> %a, <128 x half> %b) #0 544 545declare <2 x float> @llvm.copysign.v2f32(<2 x float> %a, <2 x float> %b) #0 546declare <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %b) #0 547declare <8 x float> @llvm.copysign.v8f32(<8 x float> %a, <8 x float> %b) #0 548declare <16 x float> @llvm.copysign.v16f32(<16 x float> %a, <16 x float> %b) #0 549declare <32 x float> @llvm.copysign.v32f32(<32 x float> %a, <32 x float> %b) #0 550declare <64 x float> @llvm.copysign.v64f32(<64 x float> %a, <64 x float> %b) #0 551 552declare <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %b) #0 553declare <4 x double> @llvm.copysign.v4f64(<4 x double> %a, <4 x double> %b) #0 554declare <8 x double> @llvm.copysign.v8f64(<8 x double> %a, <8 x double> %b) #0 555declare <16 x double> @llvm.copysign.v16f64(<16 x double> %a, <16 x double> %b) #0 556declare <32 x double> @llvm.copysign.v32f64(<32 x double> %a, <32 x double> %b) #0 557 558attributes #0 = { "target-features"="+sve" } 559