1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s --check-prefixes=SVE 3; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s --check-prefixes=SVE2 4; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=SVE2 5; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE 6 7target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" 8 9target triple = "aarch64-unknown-linux-gnu" 10 11;============ f16 12 13define void @test_copysign_v4f16_v4f16(ptr %ap, ptr %bp) { 14; SVE-LABEL: test_copysign_v4f16_v4f16: 15; SVE: // %bb.0: 16; SVE-NEXT: ldr d0, [x0] 17; SVE-NEXT: ldr d1, [x1] 18; SVE-NEXT: and z1.h, z1.h, #0x8000 19; SVE-NEXT: and z0.h, z0.h, #0x7fff 20; SVE-NEXT: orr z0.d, z0.d, z1.d 21; SVE-NEXT: str d0, [x0] 22; SVE-NEXT: ret 23; 24; SVE2-LABEL: test_copysign_v4f16_v4f16: 25; SVE2: // %bb.0: 26; SVE2-NEXT: mov z0.h, #32767 // =0x7fff 27; SVE2-NEXT: ldr d1, [x0] 28; SVE2-NEXT: ldr d2, [x1] 29; SVE2-NEXT: bsl z1.d, z1.d, z2.d, z0.d 30; SVE2-NEXT: str d1, [x0] 31; SVE2-NEXT: ret 32; 33; NONEON-NOSVE-LABEL: test_copysign_v4f16_v4f16: 34; NONEON-NOSVE: // %bb.0: 35; NONEON-NOSVE-NEXT: sub sp, sp, #48 36; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 37; NONEON-NOSVE-NEXT: ldr d0, [x1] 38; NONEON-NOSVE-NEXT: ldr d1, [x0] 39; NONEON-NOSVE-NEXT: stp d1, d0, [sp, #24] 40; NONEON-NOSVE-NEXT: ldr h0, [sp, #38] 41; NONEON-NOSVE-NEXT: str h0, [sp, #20] 42; NONEON-NOSVE-NEXT: ldr h0, [sp, #36] 43; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] 44; NONEON-NOSVE-NEXT: str h0, [sp, #16] 45; NONEON-NOSVE-NEXT: ldr h0, [sp, #34] 46; NONEON-NOSVE-NEXT: tst w8, #0x80 47; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] 48; NONEON-NOSVE-NEXT: str h0, [sp, #12] 49; NONEON-NOSVE-NEXT: ldr h0, [sp, #32] 50; NONEON-NOSVE-NEXT: str h0, [sp, #8] 51; NONEON-NOSVE-NEXT: ldr h0, [sp, #30] 52; NONEON-NOSVE-NEXT: fcvt s0, h0 53; NONEON-NOSVE-NEXT: fabs s0, s0 54; NONEON-NOSVE-NEXT: fneg s1, s0 55; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne 56; NONEON-NOSVE-NEXT: tst w8, #0x80 57; NONEON-NOSVE-NEXT: ldrb w8, [sp, #13] 58; NONEON-NOSVE-NEXT: fcvt h0, s0 59; NONEON-NOSVE-NEXT: str h0, [sp, #46] 60; NONEON-NOSVE-NEXT: ldr h0, [sp, #28] 61; NONEON-NOSVE-NEXT: fcvt s0, h0 62; NONEON-NOSVE-NEXT: fabs s0, s0 63; NONEON-NOSVE-NEXT: fneg s1, s0 64; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne 65; NONEON-NOSVE-NEXT: tst w8, #0x80 66; NONEON-NOSVE-NEXT: ldrb w8, [sp, #9] 67; NONEON-NOSVE-NEXT: fcvt h0, s0 68; NONEON-NOSVE-NEXT: str h0, [sp, #44] 69; NONEON-NOSVE-NEXT: ldr h0, [sp, #26] 70; NONEON-NOSVE-NEXT: fcvt s0, h0 71; NONEON-NOSVE-NEXT: fabs s0, s0 72; NONEON-NOSVE-NEXT: fneg s1, s0 73; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne 74; NONEON-NOSVE-NEXT: tst w8, #0x80 75; NONEON-NOSVE-NEXT: fcvt h0, s0 76; NONEON-NOSVE-NEXT: str h0, [sp, #42] 77; NONEON-NOSVE-NEXT: ldr h0, [sp, #24] 78; NONEON-NOSVE-NEXT: fcvt s0, h0 79; NONEON-NOSVE-NEXT: fabs s0, s0 80; NONEON-NOSVE-NEXT: fneg s1, s0 81; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne 82; NONEON-NOSVE-NEXT: fcvt h0, s0 83; NONEON-NOSVE-NEXT: str h0, [sp, #40] 84; NONEON-NOSVE-NEXT: ldr d0, [sp, #40] 85; NONEON-NOSVE-NEXT: str d0, [x0] 86; NONEON-NOSVE-NEXT: add sp, sp, #48 87; NONEON-NOSVE-NEXT: ret 88 %a = load <4 x half>, ptr %ap 89 %b = load <4 x half>, ptr %bp 90 %r = call <4 x half> @llvm.copysign.v4f16(<4 x half> %a, <4 x half> %b) 91 store <4 x half> %r, ptr %ap 92 ret void 93} 94 95define void @test_copysign_v8f16_v8f16(ptr %ap, ptr %bp) { 96; SVE-LABEL: test_copysign_v8f16_v8f16: 97; SVE: // %bb.0: 98; SVE-NEXT: ldr q0, [x0] 99; SVE-NEXT: ldr q1, [x1] 100; SVE-NEXT: and z1.h, z1.h, #0x8000 101; SVE-NEXT: and z0.h, z0.h, #0x7fff 102; SVE-NEXT: orr z0.d, z0.d, z1.d 103; SVE-NEXT: str q0, [x0] 104; SVE-NEXT: ret 105; 106; SVE2-LABEL: test_copysign_v8f16_v8f16: 107; SVE2: // %bb.0: 108; SVE2-NEXT: mov z0.h, #32767 // =0x7fff 109; SVE2-NEXT: ldr q1, [x0] 110; SVE2-NEXT: ldr q2, [x1] 111; SVE2-NEXT: bsl z1.d, z1.d, z2.d, z0.d 112; SVE2-NEXT: str q1, [x0] 113; SVE2-NEXT: ret 114; 115; NONEON-NOSVE-LABEL: test_copysign_v8f16_v8f16: 116; NONEON-NOSVE: // %bb.0: 117; NONEON-NOSVE-NEXT: sub sp, sp, #80 118; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80 119; NONEON-NOSVE-NEXT: ldr q0, [x1] 120; NONEON-NOSVE-NEXT: ldr q1, [x0] 121; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] 122; NONEON-NOSVE-NEXT: ldr h0, [sp, #62] 123; NONEON-NOSVE-NEXT: str h0, [sp, #28] 124; NONEON-NOSVE-NEXT: ldr h0, [sp, #60] 125; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29] 126; NONEON-NOSVE-NEXT: str h0, [sp, #24] 127; NONEON-NOSVE-NEXT: ldr h0, [sp, #58] 128; NONEON-NOSVE-NEXT: tst w8, #0x80 129; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25] 130; NONEON-NOSVE-NEXT: str h0, [sp, #20] 131; NONEON-NOSVE-NEXT: ldr h0, [sp, #56] 132; NONEON-NOSVE-NEXT: str h0, [sp, #16] 133; NONEON-NOSVE-NEXT: ldr h0, [sp, #54] 134; NONEON-NOSVE-NEXT: str h0, [sp, #12] 135; NONEON-NOSVE-NEXT: ldr h0, [sp, #52] 136; NONEON-NOSVE-NEXT: str h0, [sp, #8] 137; NONEON-NOSVE-NEXT: ldr h0, [sp, #50] 138; NONEON-NOSVE-NEXT: str h0, [sp, #4] 139; NONEON-NOSVE-NEXT: ldr h0, [sp, #48] 140; NONEON-NOSVE-NEXT: str h0, [sp] 141; NONEON-NOSVE-NEXT: ldr h0, [sp, #46] 142; NONEON-NOSVE-NEXT: fcvt s0, h0 143; NONEON-NOSVE-NEXT: fabs s0, s0 144; NONEON-NOSVE-NEXT: fneg s1, s0 145; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne 146; NONEON-NOSVE-NEXT: tst w8, #0x80 147; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] 148; NONEON-NOSVE-NEXT: fcvt h0, s0 149; NONEON-NOSVE-NEXT: str h0, [sp, #78] 150; NONEON-NOSVE-NEXT: ldr h0, [sp, #44] 151; NONEON-NOSVE-NEXT: fcvt s0, h0 152; NONEON-NOSVE-NEXT: fabs s0, s0 153; NONEON-NOSVE-NEXT: fneg s1, s0 154; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne 155; NONEON-NOSVE-NEXT: tst w8, #0x80 156; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] 157; NONEON-NOSVE-NEXT: fcvt h0, s0 158; NONEON-NOSVE-NEXT: str h0, [sp, #76] 159; NONEON-NOSVE-NEXT: ldr h0, [sp, #42] 160; NONEON-NOSVE-NEXT: fcvt s0, h0 161; NONEON-NOSVE-NEXT: fabs s0, s0 162; NONEON-NOSVE-NEXT: fneg s1, s0 163; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne 164; NONEON-NOSVE-NEXT: tst w8, #0x80 165; NONEON-NOSVE-NEXT: ldrb w8, [sp, #13] 166; NONEON-NOSVE-NEXT: fcvt h0, s0 167; NONEON-NOSVE-NEXT: str h0, [sp, #74] 168; NONEON-NOSVE-NEXT: ldr h0, [sp, #40] 169; NONEON-NOSVE-NEXT: fcvt s0, h0 170; NONEON-NOSVE-NEXT: fabs s0, s0 171; NONEON-NOSVE-NEXT: fneg s1, s0 172; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne 173; NONEON-NOSVE-NEXT: tst w8, #0x80 174; NONEON-NOSVE-NEXT: ldrb w8, [sp, #9] 175; NONEON-NOSVE-NEXT: fcvt h0, s0 176; NONEON-NOSVE-NEXT: str h0, [sp, #72] 177; NONEON-NOSVE-NEXT: ldr h0, [sp, #38] 178; NONEON-NOSVE-NEXT: fcvt s0, h0 179; NONEON-NOSVE-NEXT: fabs s0, s0 180; NONEON-NOSVE-NEXT: fneg s1, s0 181; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne 182; NONEON-NOSVE-NEXT: tst w8, #0x80 183; NONEON-NOSVE-NEXT: ldrb w8, [sp, #5] 184; NONEON-NOSVE-NEXT: fcvt h0, s0 185; NONEON-NOSVE-NEXT: str h0, [sp, #70] 186; NONEON-NOSVE-NEXT: ldr h0, [sp, #36] 187; NONEON-NOSVE-NEXT: fcvt s0, h0 188; NONEON-NOSVE-NEXT: fabs s0, s0 189; NONEON-NOSVE-NEXT: fneg s1, s0 190; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne 191; NONEON-NOSVE-NEXT: tst w8, #0x80 192; NONEON-NOSVE-NEXT: ldrb w8, [sp, #1] 193; NONEON-NOSVE-NEXT: fcvt h0, s0 194; NONEON-NOSVE-NEXT: str h0, [sp, #68] 195; NONEON-NOSVE-NEXT: ldr h0, [sp, #34] 196; NONEON-NOSVE-NEXT: fcvt s0, h0 197; NONEON-NOSVE-NEXT: fabs s0, s0 198; NONEON-NOSVE-NEXT: fneg s1, s0 199; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne 200; NONEON-NOSVE-NEXT: tst w8, #0x80 201; NONEON-NOSVE-NEXT: fcvt h0, s0 202; NONEON-NOSVE-NEXT: str h0, [sp, #66] 203; NONEON-NOSVE-NEXT: ldr h0, [sp, #32] 204; NONEON-NOSVE-NEXT: fcvt s0, h0 205; NONEON-NOSVE-NEXT: fabs s0, s0 206; NONEON-NOSVE-NEXT: fneg s1, s0 207; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne 208; NONEON-NOSVE-NEXT: fcvt h0, s0 209; NONEON-NOSVE-NEXT: str h0, [sp, #64] 210; NONEON-NOSVE-NEXT: ldr q0, [sp, #64] 211; NONEON-NOSVE-NEXT: str q0, [x0] 212; NONEON-NOSVE-NEXT: add sp, sp, #80 213; NONEON-NOSVE-NEXT: ret 214 %a = load <8 x half>, ptr %ap 215 %b = load <8 x half>, ptr %bp 216 %r = call <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %b) 217 store <8 x half> %r, ptr %ap 218 ret void 219} 220 221define void @test_copysign_v16f16_v16f16(ptr %ap, ptr %bp) { 222; SVE-LABEL: test_copysign_v16f16_v16f16: 223; SVE: // %bb.0: 224; SVE-NEXT: ldp q0, q3, [x1] 225; SVE-NEXT: ldp q1, q2, [x0] 226; SVE-NEXT: and z0.h, z0.h, #0x8000 227; SVE-NEXT: and z3.h, z3.h, #0x8000 228; SVE-NEXT: and z1.h, z1.h, #0x7fff 229; SVE-NEXT: and z2.h, z2.h, #0x7fff 230; SVE-NEXT: orr z0.d, z1.d, z0.d 231; SVE-NEXT: orr z1.d, z2.d, z3.d 232; SVE-NEXT: stp q0, q1, [x0] 233; SVE-NEXT: ret 234; 235; SVE2-LABEL: test_copysign_v16f16_v16f16: 236; SVE2: // %bb.0: 237; SVE2-NEXT: mov z0.h, #32767 // =0x7fff 238; SVE2-NEXT: ldp q1, q4, [x1] 239; SVE2-NEXT: ldp q2, q3, [x0] 240; SVE2-NEXT: bsl z2.d, z2.d, z1.d, z0.d 241; SVE2-NEXT: bsl z3.d, z3.d, z4.d, z0.d 242; SVE2-NEXT: stp q2, q3, [x0] 243; SVE2-NEXT: ret 244; 245; NONEON-NOSVE-LABEL: test_copysign_v16f16_v16f16: 246; NONEON-NOSVE: // %bb.0: 247; NONEON-NOSVE-NEXT: sub sp, sp, #160 248; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 160 249; NONEON-NOSVE-NEXT: ldp q1, q0, [x1] 250; NONEON-NOSVE-NEXT: ldp q2, q3, [x0] 251; NONEON-NOSVE-NEXT: stp q2, q1, [sp, #64] 252; NONEON-NOSVE-NEXT: stp q3, q0, [sp, #96] 253; NONEON-NOSVE-NEXT: ldr h0, [sp, #126] 254; NONEON-NOSVE-NEXT: str h0, [sp, #28] 255; NONEON-NOSVE-NEXT: ldr h0, [sp, #124] 256; NONEON-NOSVE-NEXT: ldrb w8, [sp, #29] 257; NONEON-NOSVE-NEXT: str h0, [sp, #24] 258; NONEON-NOSVE-NEXT: ldr h0, [sp, #122] 259; NONEON-NOSVE-NEXT: tst w8, #0x80 260; NONEON-NOSVE-NEXT: ldrb w8, [sp, #25] 261; NONEON-NOSVE-NEXT: str h0, [sp, #20] 262; NONEON-NOSVE-NEXT: ldr h0, [sp, #120] 263; NONEON-NOSVE-NEXT: str h0, [sp, #16] 264; NONEON-NOSVE-NEXT: ldr h0, [sp, #118] 265; NONEON-NOSVE-NEXT: str h0, [sp, #12] 266; NONEON-NOSVE-NEXT: ldr h0, [sp, #116] 267; NONEON-NOSVE-NEXT: str h0, [sp, #8] 268; NONEON-NOSVE-NEXT: ldr h0, [sp, #114] 269; NONEON-NOSVE-NEXT: str h0, [sp, #4] 270; NONEON-NOSVE-NEXT: ldr h0, [sp, #112] 271; NONEON-NOSVE-NEXT: str h0, [sp] 272; NONEON-NOSVE-NEXT: ldr h0, [sp, #94] 273; NONEON-NOSVE-NEXT: str h0, [sp, #60] 274; NONEON-NOSVE-NEXT: ldr h0, [sp, #92] 275; NONEON-NOSVE-NEXT: str h0, [sp, #56] 276; NONEON-NOSVE-NEXT: ldr h0, [sp, #90] 277; NONEON-NOSVE-NEXT: str h0, [sp, #52] 278; NONEON-NOSVE-NEXT: ldr h0, [sp, #88] 279; NONEON-NOSVE-NEXT: str h0, [sp, #48] 280; NONEON-NOSVE-NEXT: ldr h0, [sp, #86] 281; NONEON-NOSVE-NEXT: str h0, [sp, #44] 282; NONEON-NOSVE-NEXT: ldr h0, [sp, #84] 283; NONEON-NOSVE-NEXT: str h0, [sp, #40] 284; NONEON-NOSVE-NEXT: ldr h0, [sp, #82] 285; NONEON-NOSVE-NEXT: str h0, [sp, #36] 286; NONEON-NOSVE-NEXT: ldr h0, [sp, #80] 287; NONEON-NOSVE-NEXT: str h0, [sp, #32] 288; NONEON-NOSVE-NEXT: ldr h0, [sp, #110] 289; NONEON-NOSVE-NEXT: fcvt s0, h0 290; NONEON-NOSVE-NEXT: fabs s0, s0 291; NONEON-NOSVE-NEXT: fneg s1, s0 292; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne 293; NONEON-NOSVE-NEXT: tst w8, #0x80 294; NONEON-NOSVE-NEXT: ldrb w8, [sp, #21] 295; NONEON-NOSVE-NEXT: fcvt h0, s0 296; NONEON-NOSVE-NEXT: str h0, [sp, #158] 297; NONEON-NOSVE-NEXT: ldr h0, [sp, #108] 298; NONEON-NOSVE-NEXT: fcvt s0, h0 299; NONEON-NOSVE-NEXT: fabs s0, s0 300; NONEON-NOSVE-NEXT: fneg s1, s0 301; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne 302; NONEON-NOSVE-NEXT: tst w8, #0x80 303; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] 304; NONEON-NOSVE-NEXT: fcvt h0, s0 305; NONEON-NOSVE-NEXT: str h0, [sp, #156] 306; NONEON-NOSVE-NEXT: ldr h0, [sp, #106] 307; NONEON-NOSVE-NEXT: fcvt s0, h0 308; NONEON-NOSVE-NEXT: fabs s0, s0 309; NONEON-NOSVE-NEXT: fneg s1, s0 310; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne 311; NONEON-NOSVE-NEXT: tst w8, #0x80 312; NONEON-NOSVE-NEXT: ldrb w8, [sp, #13] 313; NONEON-NOSVE-NEXT: fcvt h0, s0 314; NONEON-NOSVE-NEXT: str h0, [sp, #154] 315; NONEON-NOSVE-NEXT: ldr h0, [sp, #104] 316; NONEON-NOSVE-NEXT: fcvt s0, h0 317; NONEON-NOSVE-NEXT: fabs s0, s0 318; NONEON-NOSVE-NEXT: fneg s1, s0 319; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne 320; NONEON-NOSVE-NEXT: tst w8, #0x80 321; NONEON-NOSVE-NEXT: ldrb w8, [sp, #9] 322; NONEON-NOSVE-NEXT: fcvt h0, s0 323; NONEON-NOSVE-NEXT: str h0, [sp, #152] 324; NONEON-NOSVE-NEXT: ldr h0, [sp, #102] 325; NONEON-NOSVE-NEXT: fcvt s0, h0 326; NONEON-NOSVE-NEXT: fabs s0, s0 327; NONEON-NOSVE-NEXT: fneg s1, s0 328; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne 329; NONEON-NOSVE-NEXT: tst w8, #0x80 330; NONEON-NOSVE-NEXT: ldrb w8, [sp, #5] 331; NONEON-NOSVE-NEXT: fcvt h0, s0 332; NONEON-NOSVE-NEXT: str h0, [sp, #150] 333; NONEON-NOSVE-NEXT: ldr h0, [sp, #100] 334; NONEON-NOSVE-NEXT: fcvt s0, h0 335; NONEON-NOSVE-NEXT: fabs s0, s0 336; NONEON-NOSVE-NEXT: fneg s1, s0 337; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne 338; NONEON-NOSVE-NEXT: tst w8, #0x80 339; NONEON-NOSVE-NEXT: ldrb w8, [sp, #1] 340; NONEON-NOSVE-NEXT: fcvt h0, s0 341; NONEON-NOSVE-NEXT: str h0, [sp, #148] 342; NONEON-NOSVE-NEXT: ldr h0, [sp, #98] 343; NONEON-NOSVE-NEXT: fcvt s0, h0 344; NONEON-NOSVE-NEXT: fabs s0, s0 345; NONEON-NOSVE-NEXT: fneg s1, s0 346; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne 347; NONEON-NOSVE-NEXT: tst w8, #0x80 348; NONEON-NOSVE-NEXT: ldrb w8, [sp, #61] 349; NONEON-NOSVE-NEXT: fcvt h0, s0 350; NONEON-NOSVE-NEXT: str h0, [sp, #146] 351; NONEON-NOSVE-NEXT: ldr h0, [sp, #96] 352; NONEON-NOSVE-NEXT: fcvt s0, h0 353; NONEON-NOSVE-NEXT: fabs s0, s0 354; NONEON-NOSVE-NEXT: fneg s1, s0 355; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne 356; NONEON-NOSVE-NEXT: tst w8, #0x80 357; NONEON-NOSVE-NEXT: ldrb w8, [sp, #57] 358; NONEON-NOSVE-NEXT: fcvt h0, s0 359; NONEON-NOSVE-NEXT: str h0, [sp, #144] 360; NONEON-NOSVE-NEXT: ldr h0, [sp, #78] 361; NONEON-NOSVE-NEXT: fcvt s0, h0 362; NONEON-NOSVE-NEXT: fabs s0, s0 363; NONEON-NOSVE-NEXT: fneg s1, s0 364; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne 365; NONEON-NOSVE-NEXT: tst w8, #0x80 366; NONEON-NOSVE-NEXT: ldrb w8, [sp, #53] 367; NONEON-NOSVE-NEXT: fcvt h0, s0 368; NONEON-NOSVE-NEXT: str h0, [sp, #142] 369; NONEON-NOSVE-NEXT: ldr h0, [sp, #76] 370; NONEON-NOSVE-NEXT: fcvt s0, h0 371; NONEON-NOSVE-NEXT: fabs s0, s0 372; NONEON-NOSVE-NEXT: fneg s1, s0 373; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne 374; NONEON-NOSVE-NEXT: tst w8, #0x80 375; NONEON-NOSVE-NEXT: ldrb w8, [sp, #49] 376; NONEON-NOSVE-NEXT: fcvt h0, s0 377; NONEON-NOSVE-NEXT: str h0, [sp, #140] 378; NONEON-NOSVE-NEXT: ldr h0, [sp, #74] 379; NONEON-NOSVE-NEXT: fcvt s0, h0 380; NONEON-NOSVE-NEXT: fabs s0, s0 381; NONEON-NOSVE-NEXT: fneg s1, s0 382; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne 383; NONEON-NOSVE-NEXT: tst w8, #0x80 384; NONEON-NOSVE-NEXT: ldrb w8, [sp, #45] 385; NONEON-NOSVE-NEXT: fcvt h0, s0 386; NONEON-NOSVE-NEXT: str h0, [sp, #138] 387; NONEON-NOSVE-NEXT: ldr h0, [sp, #72] 388; NONEON-NOSVE-NEXT: fcvt s0, h0 389; NONEON-NOSVE-NEXT: fabs s0, s0 390; NONEON-NOSVE-NEXT: fneg s1, s0 391; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne 392; NONEON-NOSVE-NEXT: tst w8, #0x80 393; NONEON-NOSVE-NEXT: ldrb w8, [sp, #41] 394; NONEON-NOSVE-NEXT: fcvt h0, s0 395; NONEON-NOSVE-NEXT: str h0, [sp, #136] 396; NONEON-NOSVE-NEXT: ldr h0, [sp, #70] 397; NONEON-NOSVE-NEXT: fcvt s0, h0 398; NONEON-NOSVE-NEXT: fabs s0, s0 399; NONEON-NOSVE-NEXT: fneg s1, s0 400; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne 401; NONEON-NOSVE-NEXT: tst w8, #0x80 402; NONEON-NOSVE-NEXT: ldrb w8, [sp, #37] 403; NONEON-NOSVE-NEXT: fcvt h0, s0 404; NONEON-NOSVE-NEXT: str h0, [sp, #134] 405; NONEON-NOSVE-NEXT: ldr h0, [sp, #68] 406; NONEON-NOSVE-NEXT: fcvt s0, h0 407; NONEON-NOSVE-NEXT: fabs s0, s0 408; NONEON-NOSVE-NEXT: fneg s1, s0 409; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne 410; NONEON-NOSVE-NEXT: tst w8, #0x80 411; NONEON-NOSVE-NEXT: ldrb w8, [sp, #33] 412; NONEON-NOSVE-NEXT: fcvt h0, s0 413; NONEON-NOSVE-NEXT: str h0, [sp, #132] 414; NONEON-NOSVE-NEXT: ldr h0, [sp, #66] 415; NONEON-NOSVE-NEXT: fcvt s0, h0 416; NONEON-NOSVE-NEXT: fabs s0, s0 417; NONEON-NOSVE-NEXT: fneg s1, s0 418; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne 419; NONEON-NOSVE-NEXT: tst w8, #0x80 420; NONEON-NOSVE-NEXT: fcvt h0, s0 421; NONEON-NOSVE-NEXT: str h0, [sp, #130] 422; NONEON-NOSVE-NEXT: ldr h0, [sp, #64] 423; NONEON-NOSVE-NEXT: fcvt s0, h0 424; NONEON-NOSVE-NEXT: fabs s0, s0 425; NONEON-NOSVE-NEXT: fneg s1, s0 426; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne 427; NONEON-NOSVE-NEXT: fcvt h0, s0 428; NONEON-NOSVE-NEXT: str h0, [sp, #128] 429; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #128] 430; NONEON-NOSVE-NEXT: stp q0, q1, [x0] 431; NONEON-NOSVE-NEXT: add sp, sp, #160 432; NONEON-NOSVE-NEXT: ret 433 %a = load <16 x half>, ptr %ap 434 %b = load <16 x half>, ptr %bp 435 %r = call <16 x half> @llvm.copysign.v16f16(<16 x half> %a, <16 x half> %b) 436 store <16 x half> %r, ptr %ap 437 ret void 438} 439 440;============ f32 441 442define void @test_copysign_v2f32_v2f32(ptr %ap, ptr %bp) { 443; SVE-LABEL: test_copysign_v2f32_v2f32: 444; SVE: // %bb.0: 445; SVE-NEXT: ldr d0, [x0] 446; SVE-NEXT: ldr d1, [x1] 447; SVE-NEXT: and z1.s, z1.s, #0x80000000 448; SVE-NEXT: and z0.s, z0.s, #0x7fffffff 449; SVE-NEXT: orr z0.d, z0.d, z1.d 450; SVE-NEXT: str d0, [x0] 451; SVE-NEXT: ret 452; 453; SVE2-LABEL: test_copysign_v2f32_v2f32: 454; SVE2: // %bb.0: 455; SVE2-NEXT: mov z0.s, #0x7fffffff 456; SVE2-NEXT: ldr d1, [x0] 457; SVE2-NEXT: ldr d2, [x1] 458; SVE2-NEXT: bsl z1.d, z1.d, z2.d, z0.d 459; SVE2-NEXT: str d1, [x0] 460; SVE2-NEXT: ret 461; 462; NONEON-NOSVE-LABEL: test_copysign_v2f32_v2f32: 463; NONEON-NOSVE: // %bb.0: 464; NONEON-NOSVE-NEXT: sub sp, sp, #32 465; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 466; NONEON-NOSVE-NEXT: ldr d0, [x0] 467; NONEON-NOSVE-NEXT: ldr d1, [x1] 468; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] 469; NONEON-NOSVE-NEXT: ldr s0, [sp, #12] 470; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #16] 471; NONEON-NOSVE-NEXT: fabs s0, s0 472; NONEON-NOSVE-NEXT: tst w9, #0x80000000 473; NONEON-NOSVE-NEXT: fneg s1, s0 474; NONEON-NOSVE-NEXT: fcsel s2, s1, s0, ne 475; NONEON-NOSVE-NEXT: ldr s0, [sp, #8] 476; NONEON-NOSVE-NEXT: tst w8, #0x80000000 477; NONEON-NOSVE-NEXT: fabs s0, s0 478; NONEON-NOSVE-NEXT: fneg s1, s0 479; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne 480; NONEON-NOSVE-NEXT: stp s0, s2, [sp, #24] 481; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] 482; NONEON-NOSVE-NEXT: str d0, [x0] 483; NONEON-NOSVE-NEXT: add sp, sp, #32 484; NONEON-NOSVE-NEXT: ret 485 %a = load <2 x float>, ptr %ap 486 %b = load <2 x float>, ptr %bp 487 %r = call <2 x float> @llvm.copysign.v2f32(<2 x float> %a, <2 x float> %b) 488 store <2 x float> %r, ptr %ap 489 ret void 490} 491 492define void @test_copysign_v4f32_v4f32(ptr %ap, ptr %bp) { 493; SVE-LABEL: test_copysign_v4f32_v4f32: 494; SVE: // %bb.0: 495; SVE-NEXT: ldr q0, [x0] 496; SVE-NEXT: ldr q1, [x1] 497; SVE-NEXT: and z1.s, z1.s, #0x80000000 498; SVE-NEXT: and z0.s, z0.s, #0x7fffffff 499; SVE-NEXT: orr z0.d, z0.d, z1.d 500; SVE-NEXT: str q0, [x0] 501; SVE-NEXT: ret 502; 503; SVE2-LABEL: test_copysign_v4f32_v4f32: 504; SVE2: // %bb.0: 505; SVE2-NEXT: mov z0.s, #0x7fffffff 506; SVE2-NEXT: ldr q1, [x0] 507; SVE2-NEXT: ldr q2, [x1] 508; SVE2-NEXT: bsl z1.d, z1.d, z2.d, z0.d 509; SVE2-NEXT: str q1, [x0] 510; SVE2-NEXT: ret 511; 512; NONEON-NOSVE-LABEL: test_copysign_v4f32_v4f32: 513; NONEON-NOSVE: // %bb.0: 514; NONEON-NOSVE-NEXT: ldr q0, [x0] 515; NONEON-NOSVE-NEXT: ldr q1, [x1] 516; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! 517; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 518; NONEON-NOSVE-NEXT: ldr s0, [sp, #12] 519; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #24] 520; NONEON-NOSVE-NEXT: fabs s0, s0 521; NONEON-NOSVE-NEXT: tst w9, #0x80000000 522; NONEON-NOSVE-NEXT: fneg s1, s0 523; NONEON-NOSVE-NEXT: fcsel s2, s1, s0, ne 524; NONEON-NOSVE-NEXT: ldr s0, [sp, #8] 525; NONEON-NOSVE-NEXT: tst w8, #0x80000000 526; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #16] 527; NONEON-NOSVE-NEXT: fabs s0, s0 528; NONEON-NOSVE-NEXT: fneg s1, s0 529; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne 530; NONEON-NOSVE-NEXT: tst w9, #0x80000000 531; NONEON-NOSVE-NEXT: stp s0, s2, [sp, #40] 532; NONEON-NOSVE-NEXT: ldr s0, [sp, #4] 533; NONEON-NOSVE-NEXT: fabs s0, s0 534; NONEON-NOSVE-NEXT: fneg s1, s0 535; NONEON-NOSVE-NEXT: fcsel s2, s1, s0, ne 536; NONEON-NOSVE-NEXT: ldr s0, [sp] 537; NONEON-NOSVE-NEXT: tst w8, #0x80000000 538; NONEON-NOSVE-NEXT: fabs s0, s0 539; NONEON-NOSVE-NEXT: fneg s1, s0 540; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne 541; NONEON-NOSVE-NEXT: stp s0, s2, [sp, #32] 542; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] 543; NONEON-NOSVE-NEXT: str q0, [x0] 544; NONEON-NOSVE-NEXT: add sp, sp, #48 545; NONEON-NOSVE-NEXT: ret 546 %a = load <4 x float>, ptr %ap 547 %b = load <4 x float>, ptr %bp 548 %r = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %b) 549 store <4 x float> %r, ptr %ap 550 ret void 551} 552 553define void @test_copysign_v8f32_v8f32(ptr %ap, ptr %bp) { 554; SVE-LABEL: test_copysign_v8f32_v8f32: 555; SVE: // %bb.0: 556; SVE-NEXT: ldp q0, q3, [x1] 557; SVE-NEXT: ldp q1, q2, [x0] 558; SVE-NEXT: and z0.s, z0.s, #0x80000000 559; SVE-NEXT: and z3.s, z3.s, #0x80000000 560; SVE-NEXT: and z1.s, z1.s, #0x7fffffff 561; SVE-NEXT: and z2.s, z2.s, #0x7fffffff 562; SVE-NEXT: orr z0.d, z1.d, z0.d 563; SVE-NEXT: orr z1.d, z2.d, z3.d 564; SVE-NEXT: stp q0, q1, [x0] 565; SVE-NEXT: ret 566; 567; SVE2-LABEL: test_copysign_v8f32_v8f32: 568; SVE2: // %bb.0: 569; SVE2-NEXT: mov z0.s, #0x7fffffff 570; SVE2-NEXT: ldp q1, q4, [x1] 571; SVE2-NEXT: ldp q2, q3, [x0] 572; SVE2-NEXT: bsl z2.d, z2.d, z1.d, z0.d 573; SVE2-NEXT: bsl z3.d, z3.d, z4.d, z0.d 574; SVE2-NEXT: stp q2, q3, [x0] 575; SVE2-NEXT: ret 576; 577; NONEON-NOSVE-LABEL: test_copysign_v8f32_v8f32: 578; NONEON-NOSVE: // %bb.0: 579; NONEON-NOSVE-NEXT: sub sp, sp, #96 580; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 581; NONEON-NOSVE-NEXT: ldp q2, q0, [x0] 582; NONEON-NOSVE-NEXT: ldp q3, q1, [x1] 583; NONEON-NOSVE-NEXT: stp q2, q3, [sp] 584; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #32] 585; NONEON-NOSVE-NEXT: ldr s0, [sp, #44] 586; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #56] 587; NONEON-NOSVE-NEXT: fabs s0, s0 588; NONEON-NOSVE-NEXT: tst w9, #0x80000000 589; NONEON-NOSVE-NEXT: fneg s1, s0 590; NONEON-NOSVE-NEXT: fcsel s2, s1, s0, ne 591; NONEON-NOSVE-NEXT: ldr s0, [sp, #40] 592; NONEON-NOSVE-NEXT: tst w8, #0x80000000 593; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #48] 594; NONEON-NOSVE-NEXT: fabs s0, s0 595; NONEON-NOSVE-NEXT: fneg s1, s0 596; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne 597; NONEON-NOSVE-NEXT: tst w9, #0x80000000 598; NONEON-NOSVE-NEXT: stp s0, s2, [sp, #88] 599; NONEON-NOSVE-NEXT: ldr s0, [sp, #36] 600; NONEON-NOSVE-NEXT: fabs s0, s0 601; NONEON-NOSVE-NEXT: fneg s1, s0 602; NONEON-NOSVE-NEXT: fcsel s2, s1, s0, ne 603; NONEON-NOSVE-NEXT: ldr s0, [sp, #32] 604; NONEON-NOSVE-NEXT: tst w8, #0x80000000 605; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #24] 606; NONEON-NOSVE-NEXT: fabs s0, s0 607; NONEON-NOSVE-NEXT: fneg s1, s0 608; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne 609; NONEON-NOSVE-NEXT: tst w9, #0x80000000 610; NONEON-NOSVE-NEXT: stp s0, s2, [sp, #80] 611; NONEON-NOSVE-NEXT: ldr s0, [sp, #12] 612; NONEON-NOSVE-NEXT: fabs s0, s0 613; NONEON-NOSVE-NEXT: fneg s1, s0 614; NONEON-NOSVE-NEXT: fcsel s2, s1, s0, ne 615; NONEON-NOSVE-NEXT: ldr s0, [sp, #8] 616; NONEON-NOSVE-NEXT: tst w8, #0x80000000 617; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #16] 618; NONEON-NOSVE-NEXT: fabs s0, s0 619; NONEON-NOSVE-NEXT: fneg s1, s0 620; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne 621; NONEON-NOSVE-NEXT: tst w9, #0x80000000 622; NONEON-NOSVE-NEXT: stp s0, s2, [sp, #72] 623; NONEON-NOSVE-NEXT: ldr s0, [sp, #4] 624; NONEON-NOSVE-NEXT: fabs s0, s0 625; NONEON-NOSVE-NEXT: fneg s1, s0 626; NONEON-NOSVE-NEXT: fcsel s2, s1, s0, ne 627; NONEON-NOSVE-NEXT: ldr s0, [sp] 628; NONEON-NOSVE-NEXT: tst w8, #0x80000000 629; NONEON-NOSVE-NEXT: fabs s0, s0 630; NONEON-NOSVE-NEXT: fneg s1, s0 631; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne 632; NONEON-NOSVE-NEXT: stp s0, s2, [sp, #64] 633; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] 634; NONEON-NOSVE-NEXT: stp q0, q1, [x0] 635; NONEON-NOSVE-NEXT: add sp, sp, #96 636; NONEON-NOSVE-NEXT: ret 637 %a = load <8 x float>, ptr %ap 638 %b = load <8 x float>, ptr %bp 639 %r = call <8 x float> @llvm.copysign.v8f32(<8 x float> %a, <8 x float> %b) 640 store <8 x float> %r, ptr %ap 641 ret void 642} 643 644;============ f64 645 646define void @test_copysign_v2f64_v2f64(ptr %ap, ptr %bp) { 647; SVE-LABEL: test_copysign_v2f64_v2f64: 648; SVE: // %bb.0: 649; SVE-NEXT: ldr q0, [x0] 650; SVE-NEXT: ldr q1, [x1] 651; SVE-NEXT: and z1.d, z1.d, #0x8000000000000000 652; SVE-NEXT: and z0.d, z0.d, #0x7fffffffffffffff 653; SVE-NEXT: orr z0.d, z0.d, z1.d 654; SVE-NEXT: str q0, [x0] 655; SVE-NEXT: ret 656; 657; SVE2-LABEL: test_copysign_v2f64_v2f64: 658; SVE2: // %bb.0: 659; SVE2-NEXT: mov z0.d, #0x7fffffffffffffff 660; SVE2-NEXT: ldr q1, [x0] 661; SVE2-NEXT: ldr q2, [x1] 662; SVE2-NEXT: bsl z1.d, z1.d, z2.d, z0.d 663; SVE2-NEXT: str q1, [x0] 664; SVE2-NEXT: ret 665; 666; NONEON-NOSVE-LABEL: test_copysign_v2f64_v2f64: 667; NONEON-NOSVE: // %bb.0: 668; NONEON-NOSVE-NEXT: ldr q0, [x0] 669; NONEON-NOSVE-NEXT: ldr q1, [x1] 670; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! 671; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 672; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] 673; NONEON-NOSVE-NEXT: ldp x8, x9, [sp, #16] 674; NONEON-NOSVE-NEXT: fabs d0, d0 675; NONEON-NOSVE-NEXT: tst x9, #0x8000000000000000 676; NONEON-NOSVE-NEXT: fneg d1, d0 677; NONEON-NOSVE-NEXT: fcsel d2, d1, d0, ne 678; NONEON-NOSVE-NEXT: ldr d0, [sp] 679; NONEON-NOSVE-NEXT: tst x8, #0x8000000000000000 680; NONEON-NOSVE-NEXT: fabs d0, d0 681; NONEON-NOSVE-NEXT: fneg d1, d0 682; NONEON-NOSVE-NEXT: fcsel d0, d1, d0, ne 683; NONEON-NOSVE-NEXT: stp d0, d2, [sp, #32] 684; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] 685; NONEON-NOSVE-NEXT: str q0, [x0] 686; NONEON-NOSVE-NEXT: add sp, sp, #48 687; NONEON-NOSVE-NEXT: ret 688 %a = load <2 x double>, ptr %ap 689 %b = load <2 x double>, ptr %bp 690 %r = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %b) 691 store <2 x double> %r, ptr %ap 692 ret void 693} 694 695define void @test_copysign_v4f64_v4f64(ptr %ap, ptr %bp) { 696; SVE-LABEL: test_copysign_v4f64_v4f64: 697; SVE: // %bb.0: 698; SVE-NEXT: ldp q0, q3, [x1] 699; SVE-NEXT: ldp q1, q2, [x0] 700; SVE-NEXT: and z0.d, z0.d, #0x8000000000000000 701; SVE-NEXT: and z3.d, z3.d, #0x8000000000000000 702; SVE-NEXT: and z1.d, z1.d, #0x7fffffffffffffff 703; SVE-NEXT: and z2.d, z2.d, #0x7fffffffffffffff 704; SVE-NEXT: orr z0.d, z1.d, z0.d 705; SVE-NEXT: orr z1.d, z2.d, z3.d 706; SVE-NEXT: stp q0, q1, [x0] 707; SVE-NEXT: ret 708; 709; SVE2-LABEL: test_copysign_v4f64_v4f64: 710; SVE2: // %bb.0: 711; SVE2-NEXT: mov z0.d, #0x7fffffffffffffff 712; SVE2-NEXT: ldp q1, q4, [x1] 713; SVE2-NEXT: ldp q2, q3, [x0] 714; SVE2-NEXT: bsl z2.d, z2.d, z1.d, z0.d 715; SVE2-NEXT: bsl z3.d, z3.d, z4.d, z0.d 716; SVE2-NEXT: stp q2, q3, [x0] 717; SVE2-NEXT: ret 718; 719; NONEON-NOSVE-LABEL: test_copysign_v4f64_v4f64: 720; NONEON-NOSVE: // %bb.0: 721; NONEON-NOSVE-NEXT: sub sp, sp, #96 722; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 723; NONEON-NOSVE-NEXT: ldp q2, q0, [x0] 724; NONEON-NOSVE-NEXT: ldp q3, q1, [x1] 725; NONEON-NOSVE-NEXT: stp q2, q3, [sp] 726; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #32] 727; NONEON-NOSVE-NEXT: ldr d0, [sp, #40] 728; NONEON-NOSVE-NEXT: ldp x8, x9, [sp, #48] 729; NONEON-NOSVE-NEXT: fabs d0, d0 730; NONEON-NOSVE-NEXT: tst x9, #0x8000000000000000 731; NONEON-NOSVE-NEXT: fneg d1, d0 732; NONEON-NOSVE-NEXT: fcsel d2, d1, d0, ne 733; NONEON-NOSVE-NEXT: ldr d0, [sp, #32] 734; NONEON-NOSVE-NEXT: tst x8, #0x8000000000000000 735; NONEON-NOSVE-NEXT: ldp x8, x9, [sp, #16] 736; NONEON-NOSVE-NEXT: fabs d0, d0 737; NONEON-NOSVE-NEXT: fneg d1, d0 738; NONEON-NOSVE-NEXT: fcsel d0, d1, d0, ne 739; NONEON-NOSVE-NEXT: tst x9, #0x8000000000000000 740; NONEON-NOSVE-NEXT: stp d0, d2, [sp, #80] 741; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] 742; NONEON-NOSVE-NEXT: fabs d0, d0 743; NONEON-NOSVE-NEXT: fneg d1, d0 744; NONEON-NOSVE-NEXT: fcsel d2, d1, d0, ne 745; NONEON-NOSVE-NEXT: ldr d0, [sp] 746; NONEON-NOSVE-NEXT: tst x8, #0x8000000000000000 747; NONEON-NOSVE-NEXT: fabs d0, d0 748; NONEON-NOSVE-NEXT: fneg d1, d0 749; NONEON-NOSVE-NEXT: fcsel d0, d1, d0, ne 750; NONEON-NOSVE-NEXT: stp d0, d2, [sp, #64] 751; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #64] 752; NONEON-NOSVE-NEXT: stp q0, q1, [x0] 753; NONEON-NOSVE-NEXT: add sp, sp, #96 754; NONEON-NOSVE-NEXT: ret 755 %a = load <4 x double>, ptr %ap 756 %b = load <4 x double>, ptr %bp 757 %r = call <4 x double> @llvm.copysign.v4f64(<4 x double> %a, <4 x double> %b) 758 store <4 x double> %r, ptr %ap 759 ret void 760} 761 762;============ v2f32 763 764define void @test_copysign_v2f32_v2f64(ptr %ap, ptr %bp) { 765; SVE-LABEL: test_copysign_v2f32_v2f64: 766; SVE: // %bb.0: 767; SVE-NEXT: ptrue p0.d 768; SVE-NEXT: ldr q0, [x1] 769; SVE-NEXT: ldr d1, [x0] 770; SVE-NEXT: fcvt z0.s, p0/m, z0.d 771; SVE-NEXT: and z1.s, z1.s, #0x7fffffff 772; SVE-NEXT: uzp1 z0.s, z0.s, z0.s 773; SVE-NEXT: and z0.s, z0.s, #0x80000000 774; SVE-NEXT: orr z0.d, z1.d, z0.d 775; SVE-NEXT: str d0, [x0] 776; SVE-NEXT: ret 777; 778; SVE2-LABEL: test_copysign_v2f32_v2f64: 779; SVE2: // %bb.0: 780; SVE2-NEXT: ptrue p0.d 781; SVE2-NEXT: ldr q0, [x1] 782; SVE2-NEXT: mov z1.s, #0x7fffffff 783; SVE2-NEXT: ldr d2, [x0] 784; SVE2-NEXT: fcvt z0.s, p0/m, z0.d 785; SVE2-NEXT: uzp1 z0.s, z0.s, z0.s 786; SVE2-NEXT: bsl z2.d, z2.d, z0.d, z1.d 787; SVE2-NEXT: str d2, [x0] 788; SVE2-NEXT: ret 789; 790; NONEON-NOSVE-LABEL: test_copysign_v2f32_v2f64: 791; NONEON-NOSVE: // %bb.0: 792; NONEON-NOSVE-NEXT: sub sp, sp, #48 793; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 794; NONEON-NOSVE-NEXT: ldr d1, [x0] 795; NONEON-NOSVE-NEXT: ldr q0, [x1] 796; NONEON-NOSVE-NEXT: str d1, [sp, #8] 797; NONEON-NOSVE-NEXT: str q0, [sp, #16] 798; NONEON-NOSVE-NEXT: ldr s0, [sp, #12] 799; NONEON-NOSVE-NEXT: ldp x8, x9, [sp, #16] 800; NONEON-NOSVE-NEXT: fabs s0, s0 801; NONEON-NOSVE-NEXT: tst x9, #0x8000000000000000 802; NONEON-NOSVE-NEXT: fneg s1, s0 803; NONEON-NOSVE-NEXT: fcsel s2, s1, s0, ne 804; NONEON-NOSVE-NEXT: ldr s0, [sp, #8] 805; NONEON-NOSVE-NEXT: tst x8, #0x8000000000000000 806; NONEON-NOSVE-NEXT: fabs s0, s0 807; NONEON-NOSVE-NEXT: fneg s1, s0 808; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne 809; NONEON-NOSVE-NEXT: stp s0, s2, [sp, #40] 810; NONEON-NOSVE-NEXT: ldr d0, [sp, #40] 811; NONEON-NOSVE-NEXT: str d0, [x0] 812; NONEON-NOSVE-NEXT: add sp, sp, #48 813; NONEON-NOSVE-NEXT: ret 814 %a = load <2 x float>, ptr %ap 815 %b = load <2 x double>, ptr %bp 816 %tmp0 = fptrunc <2 x double> %b to <2 x float> 817 %r = call <2 x float> @llvm.copysign.v2f32(<2 x float> %a, <2 x float> %tmp0) 818 store <2 x float> %r, ptr %ap 819 ret void 820} 821 822;============ v4f32 823 824; SplitVecOp #1 825define void @test_copysign_v4f32_v4f64(ptr %ap, ptr %bp) { 826; SVE-LABEL: test_copysign_v4f32_v4f64: 827; SVE: // %bb.0: 828; SVE-NEXT: ldp q0, q1, [x1] 829; SVE-NEXT: ptrue p0.d 830; SVE-NEXT: fcvt z1.s, p0/m, z1.d 831; SVE-NEXT: fcvt z0.s, p0/m, z0.d 832; SVE-NEXT: ptrue p0.s, vl2 833; SVE-NEXT: uzp1 z1.s, z1.s, z1.s 834; SVE-NEXT: uzp1 z0.s, z0.s, z0.s 835; SVE-NEXT: splice z0.s, p0, z0.s, z1.s 836; SVE-NEXT: ldr q1, [x0] 837; SVE-NEXT: and z1.s, z1.s, #0x7fffffff 838; SVE-NEXT: and z0.s, z0.s, #0x80000000 839; SVE-NEXT: orr z0.d, z1.d, z0.d 840; SVE-NEXT: str q0, [x0] 841; SVE-NEXT: ret 842; 843; SVE2-LABEL: test_copysign_v4f32_v4f64: 844; SVE2: // %bb.0: 845; SVE2-NEXT: ldp q1, q0, [x1] 846; SVE2-NEXT: ptrue p0.d 847; SVE2-NEXT: fcvt z0.s, p0/m, z0.d 848; SVE2-NEXT: fcvt z1.s, p0/m, z1.d 849; SVE2-NEXT: ptrue p0.s, vl2 850; SVE2-NEXT: uzp1 z3.s, z0.s, z0.s 851; SVE2-NEXT: uzp1 z2.s, z1.s, z1.s 852; SVE2-NEXT: mov z1.s, #0x7fffffff 853; SVE2-NEXT: splice z0.s, p0, { z2.s, z3.s } 854; SVE2-NEXT: ldr q2, [x0] 855; SVE2-NEXT: bsl z2.d, z2.d, z0.d, z1.d 856; SVE2-NEXT: str q2, [x0] 857; SVE2-NEXT: ret 858; 859; NONEON-NOSVE-LABEL: test_copysign_v4f32_v4f64: 860; NONEON-NOSVE: // %bb.0: 861; NONEON-NOSVE-NEXT: sub sp, sp, #64 862; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 863; NONEON-NOSVE-NEXT: ldp q1, q0, [x1] 864; NONEON-NOSVE-NEXT: ldr q2, [x0] 865; NONEON-NOSVE-NEXT: str q2, [sp] 866; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #16] 867; NONEON-NOSVE-NEXT: ldr s0, [sp, #12] 868; NONEON-NOSVE-NEXT: ldp x8, x9, [sp, #32] 869; NONEON-NOSVE-NEXT: fabs s0, s0 870; NONEON-NOSVE-NEXT: tst x9, #0x8000000000000000 871; NONEON-NOSVE-NEXT: fneg s1, s0 872; NONEON-NOSVE-NEXT: fcsel s2, s1, s0, ne 873; NONEON-NOSVE-NEXT: ldr s0, [sp, #8] 874; NONEON-NOSVE-NEXT: tst x8, #0x8000000000000000 875; NONEON-NOSVE-NEXT: ldp x8, x9, [sp, #16] 876; NONEON-NOSVE-NEXT: fabs s0, s0 877; NONEON-NOSVE-NEXT: fneg s1, s0 878; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne 879; NONEON-NOSVE-NEXT: tst x9, #0x8000000000000000 880; NONEON-NOSVE-NEXT: stp s0, s2, [sp, #56] 881; NONEON-NOSVE-NEXT: ldr s0, [sp, #4] 882; NONEON-NOSVE-NEXT: fabs s0, s0 883; NONEON-NOSVE-NEXT: fneg s1, s0 884; NONEON-NOSVE-NEXT: fcsel s2, s1, s0, ne 885; NONEON-NOSVE-NEXT: ldr s0, [sp] 886; NONEON-NOSVE-NEXT: tst x8, #0x8000000000000000 887; NONEON-NOSVE-NEXT: fabs s0, s0 888; NONEON-NOSVE-NEXT: fneg s1, s0 889; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne 890; NONEON-NOSVE-NEXT: stp s0, s2, [sp, #48] 891; NONEON-NOSVE-NEXT: ldr q0, [sp, #48] 892; NONEON-NOSVE-NEXT: str q0, [x0] 893; NONEON-NOSVE-NEXT: add sp, sp, #64 894; NONEON-NOSVE-NEXT: ret 895 %a = load <4 x float>, ptr %ap 896 %b = load <4 x double>, ptr %bp 897 %tmp0 = fptrunc <4 x double> %b to <4 x float> 898 %r = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %tmp0) 899 store <4 x float> %r, ptr %ap 900 ret void 901} 902 903;============ v2f64 904 905define void @test_copysign_v2f64_v2f32(ptr %ap, ptr %bp) { 906; SVE-LABEL: test_copysign_v2f64_v2f32: 907; SVE: // %bb.0: 908; SVE-NEXT: ptrue p0.d, vl2 909; SVE-NEXT: ldr q0, [x0] 910; SVE-NEXT: ld1w { z1.d }, p0/z, [x1] 911; SVE-NEXT: and z0.d, z0.d, #0x7fffffffffffffff 912; SVE-NEXT: fcvt z1.d, p0/m, z1.s 913; SVE-NEXT: and z1.d, z1.d, #0x8000000000000000 914; SVE-NEXT: orr z0.d, z0.d, z1.d 915; SVE-NEXT: str q0, [x0] 916; SVE-NEXT: ret 917; 918; SVE2-LABEL: test_copysign_v2f64_v2f32: 919; SVE2: // %bb.0: 920; SVE2-NEXT: ptrue p0.d, vl2 921; SVE2-NEXT: mov z1.d, #0x7fffffffffffffff 922; SVE2-NEXT: ldr q2, [x0] 923; SVE2-NEXT: ld1w { z0.d }, p0/z, [x1] 924; SVE2-NEXT: fcvt z0.d, p0/m, z0.s 925; SVE2-NEXT: bsl z2.d, z2.d, z0.d, z1.d 926; SVE2-NEXT: str q2, [x0] 927; SVE2-NEXT: ret 928; 929; NONEON-NOSVE-LABEL: test_copysign_v2f64_v2f32: 930; NONEON-NOSVE: // %bb.0: 931; NONEON-NOSVE-NEXT: sub sp, sp, #48 932; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 933; NONEON-NOSVE-NEXT: ldr q1, [x0] 934; NONEON-NOSVE-NEXT: ldr d0, [x1] 935; NONEON-NOSVE-NEXT: str q1, [sp] 936; NONEON-NOSVE-NEXT: str d0, [sp, #24] 937; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] 938; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #24] 939; NONEON-NOSVE-NEXT: fabs d0, d0 940; NONEON-NOSVE-NEXT: tst w9, #0x80000000 941; NONEON-NOSVE-NEXT: fneg d1, d0 942; NONEON-NOSVE-NEXT: fcsel d2, d1, d0, ne 943; NONEON-NOSVE-NEXT: ldr d0, [sp] 944; NONEON-NOSVE-NEXT: tst w8, #0x80000000 945; NONEON-NOSVE-NEXT: fabs d0, d0 946; NONEON-NOSVE-NEXT: fneg d1, d0 947; NONEON-NOSVE-NEXT: fcsel d0, d1, d0, ne 948; NONEON-NOSVE-NEXT: stp d0, d2, [sp, #32] 949; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] 950; NONEON-NOSVE-NEXT: str q0, [x0] 951; NONEON-NOSVE-NEXT: add sp, sp, #48 952; NONEON-NOSVE-NEXT: ret 953 %a = load <2 x double>, ptr %ap 954 %b = load < 2 x float>, ptr %bp 955 %tmp0 = fpext <2 x float> %b to <2 x double> 956 %r = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %tmp0) 957 store <2 x double> %r, ptr %ap 958 ret void 959} 960 961;============ v4f64 962 963; SplitVecRes mismatched 964define void @test_copysign_v4f64_v4f32(ptr %ap, ptr %bp) { 965; SVE-LABEL: test_copysign_v4f64_v4f32: 966; SVE: // %bb.0: 967; SVE-NEXT: ptrue p0.d, vl2 968; SVE-NEXT: mov x8, #2 // =0x2 969; SVE-NEXT: ldp q2, q3, [x0] 970; SVE-NEXT: ld1w { z0.d }, p0/z, [x1] 971; SVE-NEXT: ld1w { z1.d }, p0/z, [x1, x8, lsl #2] 972; SVE-NEXT: and z2.d, z2.d, #0x7fffffffffffffff 973; SVE-NEXT: and z3.d, z3.d, #0x7fffffffffffffff 974; SVE-NEXT: fcvt z0.d, p0/m, z0.s 975; SVE-NEXT: fcvt z1.d, p0/m, z1.s 976; SVE-NEXT: and z0.d, z0.d, #0x8000000000000000 977; SVE-NEXT: and z1.d, z1.d, #0x8000000000000000 978; SVE-NEXT: orr z0.d, z2.d, z0.d 979; SVE-NEXT: orr z1.d, z3.d, z1.d 980; SVE-NEXT: stp q0, q1, [x0] 981; SVE-NEXT: ret 982; 983; SVE2-LABEL: test_copysign_v4f64_v4f32: 984; SVE2: // %bb.0: 985; SVE2-NEXT: ptrue p0.d, vl2 986; SVE2-NEXT: mov x8, #2 // =0x2 987; SVE2-NEXT: mov z2.d, #0x7fffffffffffffff 988; SVE2-NEXT: ldp q3, q4, [x0] 989; SVE2-NEXT: ld1w { z0.d }, p0/z, [x1] 990; SVE2-NEXT: ld1w { z1.d }, p0/z, [x1, x8, lsl #2] 991; SVE2-NEXT: fcvt z0.d, p0/m, z0.s 992; SVE2-NEXT: fcvt z1.d, p0/m, z1.s 993; SVE2-NEXT: bsl z3.d, z3.d, z0.d, z2.d 994; SVE2-NEXT: bsl z4.d, z4.d, z1.d, z2.d 995; SVE2-NEXT: stp q3, q4, [x0] 996; SVE2-NEXT: ret 997; 998; NONEON-NOSVE-LABEL: test_copysign_v4f64_v4f32: 999; NONEON-NOSVE: // %bb.0: 1000; NONEON-NOSVE-NEXT: sub sp, sp, #96 1001; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 1002; NONEON-NOSVE-NEXT: ldp q1, q2, [x0] 1003; NONEON-NOSVE-NEXT: ldr q0, [x1] 1004; NONEON-NOSVE-NEXT: stp q0, q2, [sp, #16] 1005; NONEON-NOSVE-NEXT: ldp d0, d2, [sp, #16] 1006; NONEON-NOSVE-NEXT: str q1, [sp] 1007; NONEON-NOSVE-NEXT: stp d2, d0, [sp, #48] 1008; NONEON-NOSVE-NEXT: ldr d0, [sp, #40] 1009; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #48] 1010; NONEON-NOSVE-NEXT: fabs d0, d0 1011; NONEON-NOSVE-NEXT: tst w9, #0x80000000 1012; NONEON-NOSVE-NEXT: fneg d1, d0 1013; NONEON-NOSVE-NEXT: fcsel d2, d1, d0, ne 1014; NONEON-NOSVE-NEXT: ldr d0, [sp, #32] 1015; NONEON-NOSVE-NEXT: tst w8, #0x80000000 1016; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #56] 1017; NONEON-NOSVE-NEXT: fabs d0, d0 1018; NONEON-NOSVE-NEXT: fneg d1, d0 1019; NONEON-NOSVE-NEXT: fcsel d0, d1, d0, ne 1020; NONEON-NOSVE-NEXT: tst w9, #0x80000000 1021; NONEON-NOSVE-NEXT: stp d0, d2, [sp, #64] 1022; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] 1023; NONEON-NOSVE-NEXT: fabs d0, d0 1024; NONEON-NOSVE-NEXT: fneg d1, d0 1025; NONEON-NOSVE-NEXT: fcsel d2, d1, d0, ne 1026; NONEON-NOSVE-NEXT: ldr d0, [sp] 1027; NONEON-NOSVE-NEXT: tst w8, #0x80000000 1028; NONEON-NOSVE-NEXT: fabs d0, d0 1029; NONEON-NOSVE-NEXT: fneg d1, d0 1030; NONEON-NOSVE-NEXT: fcsel d0, d1, d0, ne 1031; NONEON-NOSVE-NEXT: stp d0, d2, [sp, #80] 1032; NONEON-NOSVE-NEXT: ldp q1, q0, [sp, #64] 1033; NONEON-NOSVE-NEXT: stp q0, q1, [x0] 1034; NONEON-NOSVE-NEXT: add sp, sp, #96 1035; NONEON-NOSVE-NEXT: ret 1036 %a = load <4 x double>, ptr %ap 1037 %b = load <4 x float>, ptr %bp 1038 %tmp0 = fpext <4 x float> %b to <4 x double> 1039 %r = call <4 x double> @llvm.copysign.v4f64(<4 x double> %a, <4 x double> %tmp0) 1040 store <4 x double> %r, ptr %ap 1041 ret void 1042} 1043 1044;============ v4f16 1045 1046define void @test_copysign_v4f16_v4f32(ptr %ap, ptr %bp) { 1047; SVE-LABEL: test_copysign_v4f16_v4f32: 1048; SVE: // %bb.0: 1049; SVE-NEXT: ptrue p0.s 1050; SVE-NEXT: ldr q0, [x1] 1051; SVE-NEXT: ldr d1, [x0] 1052; SVE-NEXT: fcvt z0.h, p0/m, z0.s 1053; SVE-NEXT: and z1.h, z1.h, #0x7fff 1054; SVE-NEXT: uzp1 z0.h, z0.h, z0.h 1055; SVE-NEXT: and z0.h, z0.h, #0x8000 1056; SVE-NEXT: orr z0.d, z1.d, z0.d 1057; SVE-NEXT: str d0, [x0] 1058; SVE-NEXT: ret 1059; 1060; SVE2-LABEL: test_copysign_v4f16_v4f32: 1061; SVE2: // %bb.0: 1062; SVE2-NEXT: ptrue p0.s 1063; SVE2-NEXT: ldr q0, [x1] 1064; SVE2-NEXT: mov z1.h, #32767 // =0x7fff 1065; SVE2-NEXT: ldr d2, [x0] 1066; SVE2-NEXT: fcvt z0.h, p0/m, z0.s 1067; SVE2-NEXT: uzp1 z0.h, z0.h, z0.h 1068; SVE2-NEXT: bsl z2.d, z2.d, z0.d, z1.d 1069; SVE2-NEXT: str d2, [x0] 1070; SVE2-NEXT: ret 1071; 1072; NONEON-NOSVE-LABEL: test_copysign_v4f16_v4f32: 1073; NONEON-NOSVE: // %bb.0: 1074; NONEON-NOSVE-NEXT: sub sp, sp, #48 1075; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 48 1076; NONEON-NOSVE-NEXT: ldr d1, [x0] 1077; NONEON-NOSVE-NEXT: ldr q0, [x1] 1078; NONEON-NOSVE-NEXT: str d1, [sp, #8] 1079; NONEON-NOSVE-NEXT: str q0, [sp, #16] 1080; NONEON-NOSVE-NEXT: ldr h0, [sp, #14] 1081; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #24] 1082; NONEON-NOSVE-NEXT: fcvt s0, h0 1083; NONEON-NOSVE-NEXT: tst w9, #0x80000000 1084; NONEON-NOSVE-NEXT: fabs s0, s0 1085; NONEON-NOSVE-NEXT: fneg s1, s0 1086; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne 1087; NONEON-NOSVE-NEXT: tst w8, #0x80000000 1088; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #16] 1089; NONEON-NOSVE-NEXT: fcvt h0, s0 1090; NONEON-NOSVE-NEXT: str h0, [sp, #46] 1091; NONEON-NOSVE-NEXT: ldr h0, [sp, #12] 1092; NONEON-NOSVE-NEXT: fcvt s0, h0 1093; NONEON-NOSVE-NEXT: fabs s0, s0 1094; NONEON-NOSVE-NEXT: fneg s1, s0 1095; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne 1096; NONEON-NOSVE-NEXT: tst w9, #0x80000000 1097; NONEON-NOSVE-NEXT: fcvt h0, s0 1098; NONEON-NOSVE-NEXT: str h0, [sp, #44] 1099; NONEON-NOSVE-NEXT: ldr h0, [sp, #10] 1100; NONEON-NOSVE-NEXT: fcvt s0, h0 1101; NONEON-NOSVE-NEXT: fabs s0, s0 1102; NONEON-NOSVE-NEXT: fneg s1, s0 1103; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne 1104; NONEON-NOSVE-NEXT: tst w8, #0x80000000 1105; NONEON-NOSVE-NEXT: fcvt h0, s0 1106; NONEON-NOSVE-NEXT: str h0, [sp, #42] 1107; NONEON-NOSVE-NEXT: ldr h0, [sp, #8] 1108; NONEON-NOSVE-NEXT: fcvt s0, h0 1109; NONEON-NOSVE-NEXT: fabs s0, s0 1110; NONEON-NOSVE-NEXT: fneg s1, s0 1111; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne 1112; NONEON-NOSVE-NEXT: fcvt h0, s0 1113; NONEON-NOSVE-NEXT: str h0, [sp, #40] 1114; NONEON-NOSVE-NEXT: ldr d0, [sp, #40] 1115; NONEON-NOSVE-NEXT: str d0, [x0] 1116; NONEON-NOSVE-NEXT: add sp, sp, #48 1117; NONEON-NOSVE-NEXT: ret 1118 %a = load <4 x half>, ptr %ap 1119 %b = load <4 x float>, ptr %bp 1120 %tmp0 = fptrunc <4 x float> %b to <4 x half> 1121 %r = call <4 x half> @llvm.copysign.v4f16(<4 x half> %a, <4 x half> %tmp0) 1122 store <4 x half> %r, ptr %ap 1123 ret void 1124} 1125 1126define void @test_copysign_v4f16_v4f64(ptr %ap, ptr %bp) { 1127; SVE-LABEL: test_copysign_v4f16_v4f64: 1128; SVE: // %bb.0: 1129; SVE-NEXT: ldp q0, q1, [x1] 1130; SVE-NEXT: mov z2.d, z1.d[1] 1131; SVE-NEXT: mov z3.d, z0.d[1] 1132; SVE-NEXT: fcvt h1, d1 1133; SVE-NEXT: fcvt h0, d0 1134; SVE-NEXT: fcvt h2, d2 1135; SVE-NEXT: fcvt h3, d3 1136; SVE-NEXT: zip1 z1.h, z1.h, z2.h 1137; SVE-NEXT: zip1 z0.h, z0.h, z3.h 1138; SVE-NEXT: zip1 z0.s, z0.s, z1.s 1139; SVE-NEXT: ldr d1, [x0] 1140; SVE-NEXT: and z1.h, z1.h, #0x7fff 1141; SVE-NEXT: and z0.h, z0.h, #0x8000 1142; SVE-NEXT: orr z0.d, z1.d, z0.d 1143; SVE-NEXT: str d0, [x0] 1144; SVE-NEXT: ret 1145; 1146; SVE2-LABEL: test_copysign_v4f16_v4f64: 1147; SVE2: // %bb.0: 1148; SVE2-NEXT: ldp q0, q1, [x1] 1149; SVE2-NEXT: mov z2.d, z1.d[1] 1150; SVE2-NEXT: mov z3.d, z0.d[1] 1151; SVE2-NEXT: fcvt h1, d1 1152; SVE2-NEXT: fcvt h0, d0 1153; SVE2-NEXT: fcvt h2, d2 1154; SVE2-NEXT: fcvt h3, d3 1155; SVE2-NEXT: zip1 z1.h, z1.h, z2.h 1156; SVE2-NEXT: zip1 z0.h, z0.h, z3.h 1157; SVE2-NEXT: mov z2.h, #32767 // =0x7fff 1158; SVE2-NEXT: zip1 z0.s, z0.s, z1.s 1159; SVE2-NEXT: ldr d1, [x0] 1160; SVE2-NEXT: bsl z1.d, z1.d, z0.d, z2.d 1161; SVE2-NEXT: str d1, [x0] 1162; SVE2-NEXT: ret 1163; 1164; NONEON-NOSVE-LABEL: test_copysign_v4f16_v4f64: 1165; NONEON-NOSVE: // %bb.0: 1166; NONEON-NOSVE-NEXT: sub sp, sp, #64 1167; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 1168; NONEON-NOSVE-NEXT: ldp q1, q0, [x1] 1169; NONEON-NOSVE-NEXT: ldr d2, [x0] 1170; NONEON-NOSVE-NEXT: str d2, [sp, #8] 1171; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #16] 1172; NONEON-NOSVE-NEXT: ldr h0, [sp, #14] 1173; NONEON-NOSVE-NEXT: ldp x8, x9, [sp, #32] 1174; NONEON-NOSVE-NEXT: fcvt s0, h0 1175; NONEON-NOSVE-NEXT: tst x9, #0x8000000000000000 1176; NONEON-NOSVE-NEXT: fabs s0, s0 1177; NONEON-NOSVE-NEXT: fneg s1, s0 1178; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne 1179; NONEON-NOSVE-NEXT: tst x8, #0x8000000000000000 1180; NONEON-NOSVE-NEXT: ldp x8, x9, [sp, #16] 1181; NONEON-NOSVE-NEXT: fcvt h0, s0 1182; NONEON-NOSVE-NEXT: str h0, [sp, #62] 1183; NONEON-NOSVE-NEXT: ldr h0, [sp, #12] 1184; NONEON-NOSVE-NEXT: fcvt s0, h0 1185; NONEON-NOSVE-NEXT: fabs s0, s0 1186; NONEON-NOSVE-NEXT: fneg s1, s0 1187; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne 1188; NONEON-NOSVE-NEXT: tst x9, #0x8000000000000000 1189; NONEON-NOSVE-NEXT: fcvt h0, s0 1190; NONEON-NOSVE-NEXT: str h0, [sp, #60] 1191; NONEON-NOSVE-NEXT: ldr h0, [sp, #10] 1192; NONEON-NOSVE-NEXT: fcvt s0, h0 1193; NONEON-NOSVE-NEXT: fabs s0, s0 1194; NONEON-NOSVE-NEXT: fneg s1, s0 1195; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne 1196; NONEON-NOSVE-NEXT: tst x8, #0x8000000000000000 1197; NONEON-NOSVE-NEXT: fcvt h0, s0 1198; NONEON-NOSVE-NEXT: str h0, [sp, #58] 1199; NONEON-NOSVE-NEXT: ldr h0, [sp, #8] 1200; NONEON-NOSVE-NEXT: fcvt s0, h0 1201; NONEON-NOSVE-NEXT: fabs s0, s0 1202; NONEON-NOSVE-NEXT: fneg s1, s0 1203; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne 1204; NONEON-NOSVE-NEXT: fcvt h0, s0 1205; NONEON-NOSVE-NEXT: str h0, [sp, #56] 1206; NONEON-NOSVE-NEXT: ldr d0, [sp, #56] 1207; NONEON-NOSVE-NEXT: str d0, [x0] 1208; NONEON-NOSVE-NEXT: add sp, sp, #64 1209; NONEON-NOSVE-NEXT: ret 1210 %a = load <4 x half>, ptr %ap 1211 %b = load <4 x double>, ptr %bp 1212 %tmp0 = fptrunc <4 x double> %b to <4 x half> 1213 %r = call <4 x half> @llvm.copysign.v4f16(<4 x half> %a, <4 x half> %tmp0) 1214 store <4 x half> %r, ptr %ap 1215 ret void 1216} 1217 1218;============ v8f16 1219 1220define void @test_copysign_v8f16_v8f32(ptr %ap, ptr %bp) { 1221; SVE-LABEL: test_copysign_v8f16_v8f32: 1222; SVE: // %bb.0: 1223; SVE-NEXT: ldp q0, q1, [x1] 1224; SVE-NEXT: ptrue p0.s 1225; SVE-NEXT: fcvt z1.h, p0/m, z1.s 1226; SVE-NEXT: fcvt z0.h, p0/m, z0.s 1227; SVE-NEXT: ptrue p0.h, vl4 1228; SVE-NEXT: uzp1 z1.h, z1.h, z1.h 1229; SVE-NEXT: uzp1 z0.h, z0.h, z0.h 1230; SVE-NEXT: splice z0.h, p0, z0.h, z1.h 1231; SVE-NEXT: ldr q1, [x0] 1232; SVE-NEXT: and z1.h, z1.h, #0x7fff 1233; SVE-NEXT: and z0.h, z0.h, #0x8000 1234; SVE-NEXT: orr z0.d, z1.d, z0.d 1235; SVE-NEXT: str q0, [x0] 1236; SVE-NEXT: ret 1237; 1238; SVE2-LABEL: test_copysign_v8f16_v8f32: 1239; SVE2: // %bb.0: 1240; SVE2-NEXT: ldp q1, q0, [x1] 1241; SVE2-NEXT: ptrue p0.s 1242; SVE2-NEXT: fcvt z0.h, p0/m, z0.s 1243; SVE2-NEXT: fcvt z1.h, p0/m, z1.s 1244; SVE2-NEXT: ptrue p0.h, vl4 1245; SVE2-NEXT: uzp1 z3.h, z0.h, z0.h 1246; SVE2-NEXT: uzp1 z2.h, z1.h, z1.h 1247; SVE2-NEXT: mov z1.h, #32767 // =0x7fff 1248; SVE2-NEXT: splice z0.h, p0, { z2.h, z3.h } 1249; SVE2-NEXT: ldr q2, [x0] 1250; SVE2-NEXT: bsl z2.d, z2.d, z0.d, z1.d 1251; SVE2-NEXT: str q2, [x0] 1252; SVE2-NEXT: ret 1253; 1254; NONEON-NOSVE-LABEL: test_copysign_v8f16_v8f32: 1255; NONEON-NOSVE: // %bb.0: 1256; NONEON-NOSVE-NEXT: sub sp, sp, #64 1257; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 1258; NONEON-NOSVE-NEXT: ldp q1, q0, [x1] 1259; NONEON-NOSVE-NEXT: ldr q2, [x0] 1260; NONEON-NOSVE-NEXT: str q2, [sp] 1261; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #16] 1262; NONEON-NOSVE-NEXT: ldr h0, [sp, #14] 1263; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #40] 1264; NONEON-NOSVE-NEXT: fcvt s0, h0 1265; NONEON-NOSVE-NEXT: tst w9, #0x80000000 1266; NONEON-NOSVE-NEXT: fabs s0, s0 1267; NONEON-NOSVE-NEXT: fneg s1, s0 1268; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne 1269; NONEON-NOSVE-NEXT: tst w8, #0x80000000 1270; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #32] 1271; NONEON-NOSVE-NEXT: fcvt h0, s0 1272; NONEON-NOSVE-NEXT: str h0, [sp, #62] 1273; NONEON-NOSVE-NEXT: ldr h0, [sp, #12] 1274; NONEON-NOSVE-NEXT: fcvt s0, h0 1275; NONEON-NOSVE-NEXT: fabs s0, s0 1276; NONEON-NOSVE-NEXT: fneg s1, s0 1277; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne 1278; NONEON-NOSVE-NEXT: tst w9, #0x80000000 1279; NONEON-NOSVE-NEXT: fcvt h0, s0 1280; NONEON-NOSVE-NEXT: str h0, [sp, #60] 1281; NONEON-NOSVE-NEXT: ldr h0, [sp, #10] 1282; NONEON-NOSVE-NEXT: fcvt s0, h0 1283; NONEON-NOSVE-NEXT: fabs s0, s0 1284; NONEON-NOSVE-NEXT: fneg s1, s0 1285; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne 1286; NONEON-NOSVE-NEXT: tst w8, #0x80000000 1287; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #24] 1288; NONEON-NOSVE-NEXT: fcvt h0, s0 1289; NONEON-NOSVE-NEXT: str h0, [sp, #58] 1290; NONEON-NOSVE-NEXT: ldr h0, [sp, #8] 1291; NONEON-NOSVE-NEXT: fcvt s0, h0 1292; NONEON-NOSVE-NEXT: fabs s0, s0 1293; NONEON-NOSVE-NEXT: fneg s1, s0 1294; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne 1295; NONEON-NOSVE-NEXT: tst w9, #0x80000000 1296; NONEON-NOSVE-NEXT: fcvt h0, s0 1297; NONEON-NOSVE-NEXT: str h0, [sp, #56] 1298; NONEON-NOSVE-NEXT: ldr h0, [sp, #6] 1299; NONEON-NOSVE-NEXT: fcvt s0, h0 1300; NONEON-NOSVE-NEXT: fabs s0, s0 1301; NONEON-NOSVE-NEXT: fneg s1, s0 1302; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne 1303; NONEON-NOSVE-NEXT: tst w8, #0x80000000 1304; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #16] 1305; NONEON-NOSVE-NEXT: fcvt h0, s0 1306; NONEON-NOSVE-NEXT: str h0, [sp, #54] 1307; NONEON-NOSVE-NEXT: ldr h0, [sp, #4] 1308; NONEON-NOSVE-NEXT: fcvt s0, h0 1309; NONEON-NOSVE-NEXT: fabs s0, s0 1310; NONEON-NOSVE-NEXT: fneg s1, s0 1311; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne 1312; NONEON-NOSVE-NEXT: tst w9, #0x80000000 1313; NONEON-NOSVE-NEXT: fcvt h0, s0 1314; NONEON-NOSVE-NEXT: str h0, [sp, #52] 1315; NONEON-NOSVE-NEXT: ldr h0, [sp, #2] 1316; NONEON-NOSVE-NEXT: fcvt s0, h0 1317; NONEON-NOSVE-NEXT: fabs s0, s0 1318; NONEON-NOSVE-NEXT: fneg s1, s0 1319; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne 1320; NONEON-NOSVE-NEXT: tst w8, #0x80000000 1321; NONEON-NOSVE-NEXT: fcvt h0, s0 1322; NONEON-NOSVE-NEXT: str h0, [sp, #50] 1323; NONEON-NOSVE-NEXT: ldr h0, [sp] 1324; NONEON-NOSVE-NEXT: fcvt s0, h0 1325; NONEON-NOSVE-NEXT: fabs s0, s0 1326; NONEON-NOSVE-NEXT: fneg s1, s0 1327; NONEON-NOSVE-NEXT: fcsel s0, s1, s0, ne 1328; NONEON-NOSVE-NEXT: fcvt h0, s0 1329; NONEON-NOSVE-NEXT: str h0, [sp, #48] 1330; NONEON-NOSVE-NEXT: ldr q0, [sp, #48] 1331; NONEON-NOSVE-NEXT: str q0, [x0] 1332; NONEON-NOSVE-NEXT: add sp, sp, #64 1333; NONEON-NOSVE-NEXT: ret 1334 %a = load <8 x half>, ptr %ap 1335 %b = load <8 x float>, ptr %bp 1336 %tmp0 = fptrunc <8 x float> %b to <8 x half> 1337 %r = call <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %tmp0) 1338 store <8 x half> %r, ptr %ap 1339 ret void 1340} 1341 1342declare <4 x half> @llvm.copysign.v4f16(<4 x half> %a, <4 x half> %b) #0 1343declare <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %b) #0 1344declare <16 x half> @llvm.copysign.v16f16(<16 x half> %a, <16 x half> %b) #0 1345 1346declare <2 x float> @llvm.copysign.v2f32(<2 x float> %a, <2 x float> %b) #0 1347declare <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %b) #0 1348declare <8 x float> @llvm.copysign.v8f32(<8 x float> %a, <8 x float> %b) #0 1349 1350declare <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %b) #0 1351declare <4 x double> @llvm.copysign.v4f64(<4 x double> %a, <4 x double> %b) #0 1352