1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256,CHECK_NO_EXTEND_ROUND 3; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,CHECK_NO_EXTEND_ROUND 4; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,CHECK_NO_EXTEND_ROUND 5; RUN: llc -aarch64-sve-vector-bits-min=256 --combiner-vector-fcopysign-extend-round < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256,CHECK_EXTEND_ROUND 6; RUN: llc -aarch64-sve-vector-bits-min=512 --combiner-vector-fcopysign-extend-round < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,CHECK_EXTEND_ROUND 7; RUN: llc -aarch64-sve-vector-bits-min=2048 --combiner-vector-fcopysign-extend-round < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,CHECK_EXTEND_ROUND 8 9 10target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" 11 12target triple = "aarch64-unknown-linux-gnu" 13 14;============ f16 15 16define void @test_copysign_v4f16_v4f16(ptr %ap, ptr %bp) vscale_range(2,0) #0 { 17; CHECK-LABEL: test_copysign_v4f16_v4f16: 18; CHECK: // %bb.0: 19; CHECK-NEXT: mvni v0.4h, #128, lsl #8 20; CHECK-NEXT: ldr d1, [x0] 21; CHECK-NEXT: ldr d2, [x1] 22; CHECK-NEXT: bsl v0.8b, v1.8b, v2.8b 23; CHECK-NEXT: str d0, [x0] 24; CHECK-NEXT: ret 25 %a = load <4 x half>, ptr %ap 26 %b = load <4 x half>, ptr %bp 27 %r = call <4 x half> @llvm.copysign.v4f16(<4 x half> %a, <4 x half> %b) 28 store <4 x half> %r, ptr %ap 29 ret void 30} 31 32define void @test_copysign_v8f16_v8f16(ptr %ap, ptr %bp) vscale_range(2,0) #0 { 33; CHECK-LABEL: test_copysign_v8f16_v8f16: 34; CHECK: // %bb.0: 35; CHECK-NEXT: mvni v0.8h, #128, lsl #8 36; CHECK-NEXT: ldr q1, [x0] 37; CHECK-NEXT: ldr q2, [x1] 38; CHECK-NEXT: bsl v0.16b, v1.16b, v2.16b 39; CHECK-NEXT: str q0, [x0] 40; CHECK-NEXT: ret 41 %a = load <8 x half>, ptr %ap 42 %b = load <8 x half>, ptr %bp 43 %r = call <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %b) 44 store <8 x half> %r, ptr %ap 45 ret void 46} 47 48define void @test_copysign_v16f16_v16f16(ptr %ap, ptr %bp) vscale_range(2,0) #0 { 49; CHECK-LABEL: test_copysign_v16f16_v16f16: 50; CHECK: // %bb.0: 51; CHECK-NEXT: ptrue p0.h, vl16 52; CHECK-NEXT: mov z0.h, #32767 // =0x7fff 53; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0] 54; CHECK-NEXT: ld1h { z2.h }, p0/z, [x1] 55; CHECK-NEXT: bsl z1.d, z1.d, z2.d, z0.d 56; CHECK-NEXT: st1h { z1.h }, p0, [x0] 57; CHECK-NEXT: ret 58 %a = load <16 x half>, ptr %ap 59 %b = load <16 x half>, ptr %bp 60 %r = call <16 x half> @llvm.copysign.v16f16(<16 x half> %a, <16 x half> %b) 61 store <16 x half> %r, ptr %ap 62 ret void 63} 64 65define void @test_copysign_v32f16_v32f16(ptr %ap, ptr %bp) #0 { 66; VBITS_GE_256-LABEL: test_copysign_v32f16_v32f16: 67; VBITS_GE_256: // %bb.0: 68; VBITS_GE_256-NEXT: ptrue p0.h, vl16 69; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 70; VBITS_GE_256-NEXT: mov z0.h, #32767 // =0x7fff 71; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0, x8, lsl #1] 72; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x1, x8, lsl #1] 73; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x0] 74; VBITS_GE_256-NEXT: ld1h { z4.h }, p0/z, [x1] 75; VBITS_GE_256-NEXT: bsl z1.d, z1.d, z2.d, z0.d 76; VBITS_GE_256-NEXT: bsl z3.d, z3.d, z4.d, z0.d 77; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0, x8, lsl #1] 78; VBITS_GE_256-NEXT: st1h { z3.h }, p0, [x0] 79; VBITS_GE_256-NEXT: ret 80; 81; VBITS_GE_512-LABEL: test_copysign_v32f16_v32f16: 82; VBITS_GE_512: // %bb.0: 83; VBITS_GE_512-NEXT: ptrue p0.h, vl32 84; VBITS_GE_512-NEXT: mov z0.h, #32767 // =0x7fff 85; VBITS_GE_512-NEXT: ld1h { z1.h }, p0/z, [x0] 86; VBITS_GE_512-NEXT: ld1h { z2.h }, p0/z, [x1] 87; VBITS_GE_512-NEXT: bsl z1.d, z1.d, z2.d, z0.d 88; VBITS_GE_512-NEXT: st1h { z1.h }, p0, [x0] 89; VBITS_GE_512-NEXT: ret 90 %a = load <32 x half>, ptr %ap 91 %b = load <32 x half>, ptr %bp 92 %r = call <32 x half> @llvm.copysign.v32f16(<32 x half> %a, <32 x half> %b) 93 store <32 x half> %r, ptr %ap 94 ret void 95} 96 97define void @test_copysign_v64f16_v64f16(ptr %ap, ptr %bp) vscale_range(8,0) #0 { 98; CHECK-LABEL: test_copysign_v64f16_v64f16: 99; CHECK: // %bb.0: 100; CHECK-NEXT: ptrue p0.h, vl64 101; CHECK-NEXT: mov z0.h, #32767 // =0x7fff 102; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0] 103; CHECK-NEXT: ld1h { z2.h }, p0/z, [x1] 104; CHECK-NEXT: bsl z1.d, z1.d, z2.d, z0.d 105; CHECK-NEXT: st1h { z1.h }, p0, [x0] 106; CHECK-NEXT: ret 107 %a = load <64 x half>, ptr %ap 108 %b = load <64 x half>, ptr %bp 109 %r = call <64 x half> @llvm.copysign.v64f16(<64 x half> %a, <64 x half> %b) 110 store <64 x half> %r, ptr %ap 111 ret void 112} 113 114define void @test_copysign_v128f16_v128f16(ptr %ap, ptr %bp) vscale_range(16,0) #0 { 115; CHECK-LABEL: test_copysign_v128f16_v128f16: 116; CHECK: // %bb.0: 117; CHECK-NEXT: ptrue p0.h, vl128 118; CHECK-NEXT: mov z0.h, #32767 // =0x7fff 119; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0] 120; CHECK-NEXT: ld1h { z2.h }, p0/z, [x1] 121; CHECK-NEXT: bsl z1.d, z1.d, z2.d, z0.d 122; CHECK-NEXT: st1h { z1.h }, p0, [x0] 123; CHECK-NEXT: ret 124 %a = load <128 x half>, ptr %ap 125 %b = load <128 x half>, ptr %bp 126 %r = call <128 x half> @llvm.copysign.v128f16(<128 x half> %a, <128 x half> %b) 127 store <128 x half> %r, ptr %ap 128 ret void 129} 130 131;============ f32 132 133define void @test_copysign_v2f32_v2f32(ptr %ap, ptr %bp) vscale_range(2,0) #0 { 134; CHECK-LABEL: test_copysign_v2f32_v2f32: 135; CHECK: // %bb.0: 136; CHECK-NEXT: mvni v0.2s, #128, lsl #24 137; CHECK-NEXT: ldr d1, [x0] 138; CHECK-NEXT: ldr d2, [x1] 139; CHECK-NEXT: bsl v0.8b, v1.8b, v2.8b 140; CHECK-NEXT: str d0, [x0] 141; CHECK-NEXT: ret 142 %a = load <2 x float>, ptr %ap 143 %b = load <2 x float>, ptr %bp 144 %r = call <2 x float> @llvm.copysign.v2f32(<2 x float> %a, <2 x float> %b) 145 store <2 x float> %r, ptr %ap 146 ret void 147} 148 149define void @test_copysign_v4f32_v4f32(ptr %ap, ptr %bp) vscale_range(2,0) #0 { 150; CHECK-LABEL: test_copysign_v4f32_v4f32: 151; CHECK: // %bb.0: 152; CHECK-NEXT: mvni v0.4s, #128, lsl #24 153; CHECK-NEXT: ldr q1, [x0] 154; CHECK-NEXT: ldr q2, [x1] 155; CHECK-NEXT: bsl v0.16b, v1.16b, v2.16b 156; CHECK-NEXT: str q0, [x0] 157; CHECK-NEXT: ret 158 %a = load <4 x float>, ptr %ap 159 %b = load <4 x float>, ptr %bp 160 %r = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %b) 161 store <4 x float> %r, ptr %ap 162 ret void 163} 164 165define void @test_copysign_v8f32_v8f32(ptr %ap, ptr %bp) vscale_range(2,0) #0 { 166; CHECK-LABEL: test_copysign_v8f32_v8f32: 167; CHECK: // %bb.0: 168; CHECK-NEXT: ptrue p0.s, vl8 169; CHECK-NEXT: mov z0.s, #0x7fffffff 170; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0] 171; CHECK-NEXT: ld1w { z2.s }, p0/z, [x1] 172; CHECK-NEXT: bsl z1.d, z1.d, z2.d, z0.d 173; CHECK-NEXT: st1w { z1.s }, p0, [x0] 174; CHECK-NEXT: ret 175 %a = load <8 x float>, ptr %ap 176 %b = load <8 x float>, ptr %bp 177 %r = call <8 x float> @llvm.copysign.v8f32(<8 x float> %a, <8 x float> %b) 178 store <8 x float> %r, ptr %ap 179 ret void 180} 181 182define void @test_copysign_v16f32_v16f32(ptr %ap, ptr %bp) #0 { 183; VBITS_GE_256-LABEL: test_copysign_v16f32_v16f32: 184; VBITS_GE_256: // %bb.0: 185; VBITS_GE_256-NEXT: ptrue p0.s, vl8 186; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 187; VBITS_GE_256-NEXT: mov z0.s, #0x7fffffff 188; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0, x8, lsl #2] 189; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2] 190; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x0] 191; VBITS_GE_256-NEXT: ld1w { z4.s }, p0/z, [x1] 192; VBITS_GE_256-NEXT: bsl z1.d, z1.d, z2.d, z0.d 193; VBITS_GE_256-NEXT: bsl z3.d, z3.d, z4.d, z0.d 194; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0, x8, lsl #2] 195; VBITS_GE_256-NEXT: st1w { z3.s }, p0, [x0] 196; VBITS_GE_256-NEXT: ret 197; 198; VBITS_GE_512-LABEL: test_copysign_v16f32_v16f32: 199; VBITS_GE_512: // %bb.0: 200; VBITS_GE_512-NEXT: ptrue p0.s, vl16 201; VBITS_GE_512-NEXT: mov z0.s, #0x7fffffff 202; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x0] 203; VBITS_GE_512-NEXT: ld1w { z2.s }, p0/z, [x1] 204; VBITS_GE_512-NEXT: bsl z1.d, z1.d, z2.d, z0.d 205; VBITS_GE_512-NEXT: st1w { z1.s }, p0, [x0] 206; VBITS_GE_512-NEXT: ret 207 %a = load <16 x float>, ptr %ap 208 %b = load <16 x float>, ptr %bp 209 %r = call <16 x float> @llvm.copysign.v16f32(<16 x float> %a, <16 x float> %b) 210 store <16 x float> %r, ptr %ap 211 ret void 212} 213 214define void @test_copysign_v32f32_v32f32(ptr %ap, ptr %bp) vscale_range(8,0) #0 { 215; CHECK-LABEL: test_copysign_v32f32_v32f32: 216; CHECK: // %bb.0: 217; CHECK-NEXT: ptrue p0.s, vl32 218; CHECK-NEXT: mov z0.s, #0x7fffffff 219; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0] 220; CHECK-NEXT: ld1w { z2.s }, p0/z, [x1] 221; CHECK-NEXT: bsl z1.d, z1.d, z2.d, z0.d 222; CHECK-NEXT: st1w { z1.s }, p0, [x0] 223; CHECK-NEXT: ret 224 %a = load <32 x float>, ptr %ap 225 %b = load <32 x float>, ptr %bp 226 %r = call <32 x float> @llvm.copysign.v32f32(<32 x float> %a, <32 x float> %b) 227 store <32 x float> %r, ptr %ap 228 ret void 229} 230 231define void @test_copysign_v64f32_v64f32(ptr %ap, ptr %bp) vscale_range(16,0) #0 { 232; CHECK-LABEL: test_copysign_v64f32_v64f32: 233; CHECK: // %bb.0: 234; CHECK-NEXT: ptrue p0.s, vl64 235; CHECK-NEXT: mov z0.s, #0x7fffffff 236; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0] 237; CHECK-NEXT: ld1w { z2.s }, p0/z, [x1] 238; CHECK-NEXT: bsl z1.d, z1.d, z2.d, z0.d 239; CHECK-NEXT: st1w { z1.s }, p0, [x0] 240; CHECK-NEXT: ret 241 %a = load <64 x float>, ptr %ap 242 %b = load <64 x float>, ptr %bp 243 %r = call <64 x float> @llvm.copysign.v64f32(<64 x float> %a, <64 x float> %b) 244 store <64 x float> %r, ptr %ap 245 ret void 246} 247 248;============ f64 249 250define void @test_copysign_v2f64_v2f64(ptr %ap, ptr %bp) vscale_range(2,0) #0 { 251; CHECK-LABEL: test_copysign_v2f64_v2f64: 252; CHECK: // %bb.0: 253; CHECK-NEXT: movi v0.2d, #0xffffffffffffffff 254; CHECK-NEXT: ldr q1, [x0] 255; CHECK-NEXT: ldr q2, [x1] 256; CHECK-NEXT: fneg v0.2d, v0.2d 257; CHECK-NEXT: bsl v0.16b, v1.16b, v2.16b 258; CHECK-NEXT: str q0, [x0] 259; CHECK-NEXT: ret 260 %a = load <2 x double>, ptr %ap 261 %b = load <2 x double>, ptr %bp 262 %r = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %b) 263 store <2 x double> %r, ptr %ap 264 ret void 265} 266 267define void @test_copysign_v4f64_v4f64(ptr %ap, ptr %bp) vscale_range(2,0) #0 { 268; CHECK-LABEL: test_copysign_v4f64_v4f64: 269; CHECK: // %bb.0: 270; CHECK-NEXT: ptrue p0.d, vl4 271; CHECK-NEXT: mov z0.d, #0x7fffffffffffffff 272; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0] 273; CHECK-NEXT: ld1d { z2.d }, p0/z, [x1] 274; CHECK-NEXT: bsl z1.d, z1.d, z2.d, z0.d 275; CHECK-NEXT: st1d { z1.d }, p0, [x0] 276; CHECK-NEXT: ret 277 %a = load <4 x double>, ptr %ap 278 %b = load <4 x double>, ptr %bp 279 %r = call <4 x double> @llvm.copysign.v4f64(<4 x double> %a, <4 x double> %b) 280 store <4 x double> %r, ptr %ap 281 ret void 282} 283 284define void @test_copysign_v8f64_v8f64(ptr %ap, ptr %bp) #0 { 285; VBITS_GE_256-LABEL: test_copysign_v8f64_v8f64: 286; VBITS_GE_256: // %bb.0: 287; VBITS_GE_256-NEXT: ptrue p0.d, vl4 288; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 289; VBITS_GE_256-NEXT: mov z0.d, #0x7fffffffffffffff 290; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0, x8, lsl #3] 291; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] 292; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x0] 293; VBITS_GE_256-NEXT: ld1d { z4.d }, p0/z, [x1] 294; VBITS_GE_256-NEXT: bsl z1.d, z1.d, z2.d, z0.d 295; VBITS_GE_256-NEXT: bsl z3.d, z3.d, z4.d, z0.d 296; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0, x8, lsl #3] 297; VBITS_GE_256-NEXT: st1d { z3.d }, p0, [x0] 298; VBITS_GE_256-NEXT: ret 299; 300; VBITS_GE_512-LABEL: test_copysign_v8f64_v8f64: 301; VBITS_GE_512: // %bb.0: 302; VBITS_GE_512-NEXT: ptrue p0.d, vl8 303; VBITS_GE_512-NEXT: mov z0.d, #0x7fffffffffffffff 304; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x0] 305; VBITS_GE_512-NEXT: ld1d { z2.d }, p0/z, [x1] 306; VBITS_GE_512-NEXT: bsl z1.d, z1.d, z2.d, z0.d 307; VBITS_GE_512-NEXT: st1d { z1.d }, p0, [x0] 308; VBITS_GE_512-NEXT: ret 309 %a = load <8 x double>, ptr %ap 310 %b = load <8 x double>, ptr %bp 311 %r = call <8 x double> @llvm.copysign.v8f64(<8 x double> %a, <8 x double> %b) 312 store <8 x double> %r, ptr %ap 313 ret void 314} 315 316define void @test_copysign_v16f64_v16f64(ptr %ap, ptr %bp) vscale_range(8,0) #0 { 317; CHECK-LABEL: test_copysign_v16f64_v16f64: 318; CHECK: // %bb.0: 319; CHECK-NEXT: ptrue p0.d, vl16 320; CHECK-NEXT: mov z0.d, #0x7fffffffffffffff 321; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0] 322; CHECK-NEXT: ld1d { z2.d }, p0/z, [x1] 323; CHECK-NEXT: bsl z1.d, z1.d, z2.d, z0.d 324; CHECK-NEXT: st1d { z1.d }, p0, [x0] 325; CHECK-NEXT: ret 326 %a = load <16 x double>, ptr %ap 327 %b = load <16 x double>, ptr %bp 328 %r = call <16 x double> @llvm.copysign.v16f64(<16 x double> %a, <16 x double> %b) 329 store <16 x double> %r, ptr %ap 330 ret void 331} 332 333define void @test_copysign_v32f64_v32f64(ptr %ap, ptr %bp) vscale_range(16,0) #0 { 334; CHECK-LABEL: test_copysign_v32f64_v32f64: 335; CHECK: // %bb.0: 336; CHECK-NEXT: ptrue p0.d, vl32 337; CHECK-NEXT: mov z0.d, #0x7fffffffffffffff 338; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0] 339; CHECK-NEXT: ld1d { z2.d }, p0/z, [x1] 340; CHECK-NEXT: bsl z1.d, z1.d, z2.d, z0.d 341; CHECK-NEXT: st1d { z1.d }, p0, [x0] 342; CHECK-NEXT: ret 343 %a = load <32 x double>, ptr %ap 344 %b = load <32 x double>, ptr %bp 345 %r = call <32 x double> @llvm.copysign.v32f64(<32 x double> %a, <32 x double> %b) 346 store <32 x double> %r, ptr %ap 347 ret void 348} 349 350;============ v2f32 351 352define void @test_copysign_v2f32_v2f64(ptr %ap, ptr %bp) vscale_range(2,0) #0 { 353; CHECK-LABEL: test_copysign_v2f32_v2f64: 354; CHECK: // %bb.0: 355; CHECK-NEXT: ldr q0, [x1] 356; CHECK-NEXT: mvni v1.2s, #128, lsl #24 357; CHECK-NEXT: ldr d2, [x0] 358; CHECK-NEXT: fcvtn v0.2s, v0.2d 359; CHECK-NEXT: bit v0.8b, v2.8b, v1.8b 360; CHECK-NEXT: str d0, [x0] 361; CHECK-NEXT: ret 362 %a = load <2 x float>, ptr %ap 363 %b = load <2 x double>, ptr %bp 364 %tmp0 = fptrunc <2 x double> %b to <2 x float> 365 %r = call <2 x float> @llvm.copysign.v2f32(<2 x float> %a, <2 x float> %tmp0) 366 store <2 x float> %r, ptr %ap 367 ret void 368} 369 370;============ v4f32 371 372; SplitVecOp #1 373define void @test_copysign_v4f32_v4f64(ptr %ap, ptr %bp) vscale_range(2,0) #0 { 374; CHECK-LABEL: test_copysign_v4f32_v4f64: 375; CHECK: // %bb.0: 376; CHECK-NEXT: ptrue p0.d, vl4 377; CHECK-NEXT: mvni v1.4s, #128, lsl #24 378; CHECK-NEXT: ldr q2, [x0] 379; CHECK-NEXT: ld1d { z0.d }, p0/z, [x1] 380; CHECK-NEXT: ptrue p0.d 381; CHECK-NEXT: fcvt z0.s, p0/m, z0.d 382; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s 383; CHECK-NEXT: bit v0.16b, v2.16b, v1.16b 384; CHECK-NEXT: str q0, [x0] 385; CHECK-NEXT: ret 386 %a = load <4 x float>, ptr %ap 387 %b = load <4 x double>, ptr %bp 388 %tmp0 = fptrunc <4 x double> %b to <4 x float> 389 %r = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %tmp0) 390 store <4 x float> %r, ptr %ap 391 ret void 392} 393 394;============ v2f64 395 396define void @test_copysign_v2f64_v2f32(ptr %ap, ptr %bp) vscale_range(2,0) #0 { 397; CHECK-LABEL: test_copysign_v2f64_v2f32: 398; CHECK: // %bb.0: 399; CHECK-NEXT: movi v0.2d, #0xffffffffffffffff 400; CHECK-NEXT: ldr d1, [x1] 401; CHECK-NEXT: ldr q2, [x0] 402; CHECK-NEXT: fcvtl v1.2d, v1.2s 403; CHECK-NEXT: fneg v0.2d, v0.2d 404; CHECK-NEXT: bsl v0.16b, v2.16b, v1.16b 405; CHECK-NEXT: str q0, [x0] 406; CHECK-NEXT: ret 407 %a = load <2 x double>, ptr %ap 408 %b = load < 2 x float>, ptr %bp 409 %tmp0 = fpext <2 x float> %b to <2 x double> 410 %r = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %tmp0) 411 store <2 x double> %r, ptr %ap 412 ret void 413} 414 415;============ v4f64 416 417; SplitVecRes mismatched 418define void @test_copysign_v4f64_v4f32(ptr %ap, ptr %bp) vscale_range(2,0) #0 { 419; CHECK_NO_EXTEND_ROUND-LABEL: test_copysign_v4f64_v4f32: 420; CHECK_NO_EXTEND_ROUND: // %bb.0: 421; CHECK_NO_EXTEND_ROUND-NEXT: ptrue p0.d, vl4 422; CHECK_NO_EXTEND_ROUND-NEXT: mov z1.d, #0x7fffffffffffffff 423; CHECK_NO_EXTEND_ROUND-NEXT: ld1w { z0.d }, p0/z, [x1] 424; CHECK_NO_EXTEND_ROUND-NEXT: ld1d { z2.d }, p0/z, [x0] 425; CHECK_NO_EXTEND_ROUND-NEXT: fcvt z0.d, p0/m, z0.s 426; CHECK_NO_EXTEND_ROUND-NEXT: bsl z2.d, z2.d, z0.d, z1.d 427; CHECK_NO_EXTEND_ROUND-NEXT: st1d { z2.d }, p0, [x0] 428; CHECK_NO_EXTEND_ROUND-NEXT: ret 429; 430; CHECK_EXTEND_ROUND-LABEL: test_copysign_v4f64_v4f32: 431; CHECK_EXTEND_ROUND: // %bb.0: 432; CHECK_EXTEND_ROUND-NEXT: ldr q0, [x1] 433; CHECK_EXTEND_ROUND-NEXT: ptrue p0.d, vl4 434; CHECK_EXTEND_ROUND-NEXT: mov z1.d, #0x7fffffffffffffff 435; CHECK_EXTEND_ROUND-NEXT: uunpklo z0.d, z0.s 436; CHECK_EXTEND_ROUND-NEXT: ld1d { z2.d }, p0/z, [x0] 437; CHECK_EXTEND_ROUND-NEXT: fcvt z0.d, p0/m, z0.s 438; CHECK_EXTEND_ROUND-NEXT: bsl z2.d, z2.d, z0.d, z1.d 439; CHECK_EXTEND_ROUND-NEXT: st1d { z2.d }, p0, [x0] 440; CHECK_EXTEND_ROUND-NEXT: ret 441 %a = load <4 x double>, ptr %ap 442 %b = load <4 x float>, ptr %bp 443 %tmp0 = fpext <4 x float> %b to <4 x double> 444 %r = call <4 x double> @llvm.copysign.v4f64(<4 x double> %a, <4 x double> %tmp0) 445 store <4 x double> %r, ptr %ap 446 ret void 447} 448 449;============ v4f16 450 451define void @test_copysign_v4f16_v4f32(ptr %ap, ptr %bp) vscale_range(2,0) #0 { 452; CHECK-LABEL: test_copysign_v4f16_v4f32: 453; CHECK: // %bb.0: 454; CHECK-NEXT: ldr q0, [x1] 455; CHECK-NEXT: mvni v1.4h, #128, lsl #8 456; CHECK-NEXT: ldr d2, [x0] 457; CHECK-NEXT: fcvtn v0.4h, v0.4s 458; CHECK-NEXT: bit v0.8b, v2.8b, v1.8b 459; CHECK-NEXT: str d0, [x0] 460; CHECK-NEXT: ret 461 %a = load <4 x half>, ptr %ap 462 %b = load <4 x float>, ptr %bp 463 %tmp0 = fptrunc <4 x float> %b to <4 x half> 464 %r = call <4 x half> @llvm.copysign.v4f16(<4 x half> %a, <4 x half> %tmp0) 465 store <4 x half> %r, ptr %ap 466 ret void 467} 468 469define void @test_copysign_v4f16_v4f64(ptr %ap, ptr %bp) vscale_range(2,0) #0 { 470; CHECK-LABEL: test_copysign_v4f16_v4f64: 471; CHECK: // %bb.0: 472; CHECK-NEXT: ptrue p0.d, vl4 473; CHECK-NEXT: mvni v1.4h, #128, lsl #8 474; CHECK-NEXT: ldr d2, [x0] 475; CHECK-NEXT: ld1d { z0.d }, p0/z, [x1] 476; CHECK-NEXT: ptrue p0.d 477; CHECK-NEXT: fcvt z0.h, p0/m, z0.d 478; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s 479; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h 480; CHECK-NEXT: bit v0.8b, v2.8b, v1.8b 481; CHECK-NEXT: str d0, [x0] 482; CHECK-NEXT: ret 483 %a = load <4 x half>, ptr %ap 484 %b = load <4 x double>, ptr %bp 485 %tmp0 = fptrunc <4 x double> %b to <4 x half> 486 %r = call <4 x half> @llvm.copysign.v4f16(<4 x half> %a, <4 x half> %tmp0) 487 store <4 x half> %r, ptr %ap 488 ret void 489} 490 491declare <4 x half> @llvm.copysign.v4f16(<4 x half> %a, <4 x half> %b) #0 492 493;============ v8f16 494 495 496define void @test_copysign_v8f16_v8f32(ptr %ap, ptr %bp) vscale_range(2,0) #0 { 497; CHECK-LABEL: test_copysign_v8f16_v8f32: 498; CHECK: // %bb.0: 499; CHECK-NEXT: ptrue p0.s, vl8 500; CHECK-NEXT: mvni v1.8h, #128, lsl #8 501; CHECK-NEXT: ldr q2, [x0] 502; CHECK-NEXT: ld1w { z0.s }, p0/z, [x1] 503; CHECK-NEXT: ptrue p0.s 504; CHECK-NEXT: fcvt z0.h, p0/m, z0.s 505; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h 506; CHECK-NEXT: bit v0.16b, v2.16b, v1.16b 507; CHECK-NEXT: str q0, [x0] 508; CHECK-NEXT: ret 509 %a = load <8 x half>, ptr %ap 510 %b = load <8 x float>, ptr %bp 511 %tmp0 = fptrunc <8 x float> %b to <8 x half> 512 %r = call <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %tmp0) 513 store <8 x half> %r, ptr %ap 514 ret void 515} 516 517declare <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %b) #0 518declare <16 x half> @llvm.copysign.v16f16(<16 x half> %a, <16 x half> %b) #0 519declare <32 x half> @llvm.copysign.v32f16(<32 x half> %a, <32 x half> %b) #0 520declare <64 x half> @llvm.copysign.v64f16(<64 x half> %a, <64 x half> %b) #0 521declare <128 x half> @llvm.copysign.v128f16(<128 x half> %a, <128 x half> %b) #0 522 523declare <2 x float> @llvm.copysign.v2f32(<2 x float> %a, <2 x float> %b) #0 524declare <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %b) #0 525declare <8 x float> @llvm.copysign.v8f32(<8 x float> %a, <8 x float> %b) #0 526declare <16 x float> @llvm.copysign.v16f32(<16 x float> %a, <16 x float> %b) #0 527declare <32 x float> @llvm.copysign.v32f32(<32 x float> %a, <32 x float> %b) #0 528declare <64 x float> @llvm.copysign.v64f32(<64 x float> %a, <64 x float> %b) #0 529 530declare <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %b) #0 531declare <4 x double> @llvm.copysign.v4f64(<4 x double> %a, <4 x double> %b) #0 532declare <8 x double> @llvm.copysign.v8f64(<8 x double> %a, <8 x double> %b) #0 533declare <16 x double> @llvm.copysign.v16f64(<16 x double> %a, <16 x double> %b) #0 534declare <32 x double> @llvm.copysign.v32f64(<32 x double> %a, <32 x double> %b) #0 535 536attributes #0 = { "target-features"="+sve2" } 537