1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mattr=+sve < %s | FileCheck %s 3 4; Streaming-compatible SVE doesn't include FADDA, so this shouldn't compile! 5; RUN: not --crash llc -mattr=+sve -force-streaming-compatible < %s 6 7target triple = "aarch64-linux-gnu" 8 9; FADD 10 11define half @fadda_nxv2f16(half %init, <vscale x 2 x half> %a) { 12; CHECK-LABEL: fadda_nxv2f16: 13; CHECK: // %bb.0: 14; CHECK-NEXT: ptrue p0.d 15; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 16; CHECK-NEXT: fadda h0, p0, h0, z1.h 17; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 18; CHECK-NEXT: ret 19 %res = call half @llvm.vector.reduce.fadd.nxv2f16(half %init, <vscale x 2 x half> %a) 20 ret half %res 21} 22 23define half @fadda_nxv4f16(half %init, <vscale x 4 x half> %a) { 24; CHECK-LABEL: fadda_nxv4f16: 25; CHECK: // %bb.0: 26; CHECK-NEXT: ptrue p0.s 27; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 28; CHECK-NEXT: fadda h0, p0, h0, z1.h 29; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 30; CHECK-NEXT: ret 31 %res = call half @llvm.vector.reduce.fadd.nxv4f16(half %init, <vscale x 4 x half> %a) 32 ret half %res 33} 34 35define half @fadda_nxv8f16(half %init, <vscale x 8 x half> %a) { 36; CHECK-LABEL: fadda_nxv8f16: 37; CHECK: // %bb.0: 38; CHECK-NEXT: ptrue p0.h 39; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 40; CHECK-NEXT: fadda h0, p0, h0, z1.h 41; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 42; CHECK-NEXT: ret 43 %res = call half @llvm.vector.reduce.fadd.nxv8f16(half %init, <vscale x 8 x half> %a) 44 ret half %res 45} 46 47define half @fadda_nxv6f16(<vscale x 6 x half> %v, half %s) { 48; CHECK-LABEL: fadda_nxv6f16: 49; CHECK: // %bb.0: 50; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 51; CHECK-NEXT: addvl sp, sp, #-1 52; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG 53; CHECK-NEXT: .cfi_offset w29, -16 54; CHECK-NEXT: mov w8, #32768 // =0x8000 55; CHECK-NEXT: ptrue p0.h 56; CHECK-NEXT: mov z2.h, w8 57; CHECK-NEXT: ptrue p1.d 58; CHECK-NEXT: st1h { z0.h }, p0, [sp] 59; CHECK-NEXT: fmov s0, s1 60; CHECK-NEXT: st1h { z2.d }, p1, [sp, #3, mul vl] 61; CHECK-NEXT: ld1h { z2.h }, p0/z, [sp] 62; CHECK-NEXT: fadda h0, p0, h0, z2.h 63; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 64; CHECK-NEXT: addvl sp, sp, #1 65; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 66; CHECK-NEXT: ret 67 %res = call half @llvm.vector.reduce.fadd.nxv6f16(half %s, <vscale x 6 x half> %v) 68 ret half %res 69} 70 71define half @fadda_nxv10f16(<vscale x 10 x half> %v, half %s) { 72; CHECK-LABEL: fadda_nxv10f16: 73; CHECK: // %bb.0: 74; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 75; CHECK-NEXT: addvl sp, sp, #-3 76; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG 77; CHECK-NEXT: .cfi_offset w29, -16 78; CHECK-NEXT: ptrue p0.h 79; CHECK-NEXT: // kill: def $h2 killed $h2 def $z2 80; CHECK-NEXT: mov w8, #32768 // =0x8000 81; CHECK-NEXT: ptrue p1.d 82; CHECK-NEXT: fadda h2, p0, h2, z0.h 83; CHECK-NEXT: st1h { z1.h }, p0, [sp] 84; CHECK-NEXT: mov z0.h, w8 85; CHECK-NEXT: addvl x8, sp, #1 86; CHECK-NEXT: st1h { z0.d }, p1, [sp, #1, mul vl] 87; CHECK-NEXT: ld1h { z1.h }, p0/z, [sp] 88; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl] 89; CHECK-NEXT: st1h { z0.d }, p1, [sp, #6, mul vl] 90; CHECK-NEXT: ld1h { z1.h }, p0/z, [sp, #1, mul vl] 91; CHECK-NEXT: st1h { z1.h }, p0, [sp, #2, mul vl] 92; CHECK-NEXT: st1h { z0.d }, p1, [x8, #7, mul vl] 93; CHECK-NEXT: ld1h { z0.h }, p0/z, [sp, #2, mul vl] 94; CHECK-NEXT: fadda h2, p0, h2, z0.h 95; CHECK-NEXT: fmov s0, s2 96; CHECK-NEXT: addvl sp, sp, #3 97; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 98; CHECK-NEXT: ret 99 %res = call half @llvm.vector.reduce.fadd.nxv10f16(half %s, <vscale x 10 x half> %v) 100 ret half %res 101} 102 103define half @fadda_nxv12f16(<vscale x 12 x half> %v, half %s) { 104; CHECK-LABEL: fadda_nxv12f16: 105; CHECK: // %bb.0: 106; CHECK-NEXT: ptrue p0.h 107; CHECK-NEXT: // kill: def $h2 killed $h2 def $z2 108; CHECK-NEXT: mov w8, #32768 // =0x8000 109; CHECK-NEXT: fadda h2, p0, h2, z0.h 110; CHECK-NEXT: uunpklo z0.s, z1.h 111; CHECK-NEXT: mov z1.h, w8 112; CHECK-NEXT: uzp1 z0.h, z0.h, z1.h 113; CHECK-NEXT: fadda h2, p0, h2, z0.h 114; CHECK-NEXT: fmov s0, s2 115; CHECK-NEXT: ret 116 %res = call half @llvm.vector.reduce.fadd.nxv12f16(half %s, <vscale x 12 x half> %v) 117 ret half %res 118} 119 120define float @fadda_nxv2f32(float %init, <vscale x 2 x float> %a) { 121; CHECK-LABEL: fadda_nxv2f32: 122; CHECK: // %bb.0: 123; CHECK-NEXT: ptrue p0.d 124; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0 125; CHECK-NEXT: fadda s0, p0, s0, z1.s 126; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 127; CHECK-NEXT: ret 128 %res = call float @llvm.vector.reduce.fadd.nxv2f32(float %init, <vscale x 2 x float> %a) 129 ret float %res 130} 131 132define float @fadda_nxv4f32(float %init, <vscale x 4 x float> %a) { 133; CHECK-LABEL: fadda_nxv4f32: 134; CHECK: // %bb.0: 135; CHECK-NEXT: ptrue p0.s 136; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0 137; CHECK-NEXT: fadda s0, p0, s0, z1.s 138; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 139; CHECK-NEXT: ret 140 %res = call float @llvm.vector.reduce.fadd.nxv4f32(float %init, <vscale x 4 x float> %a) 141 ret float %res 142} 143 144define double @fadda_nxv2f64(double %init, <vscale x 2 x double> %a) { 145; CHECK-LABEL: fadda_nxv2f64: 146; CHECK: // %bb.0: 147; CHECK-NEXT: ptrue p0.d 148; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 149; CHECK-NEXT: fadda d0, p0, d0, z1.d 150; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 151; CHECK-NEXT: ret 152 %res = call double @llvm.vector.reduce.fadd.nxv2f64(double %init, <vscale x 2 x double> %a) 153 ret double %res 154} 155 156declare half @llvm.vector.reduce.fadd.nxv2f16(half, <vscale x 2 x half>) 157declare half @llvm.vector.reduce.fadd.nxv4f16(half, <vscale x 4 x half>) 158declare half @llvm.vector.reduce.fadd.nxv8f16(half, <vscale x 8 x half>) 159declare half @llvm.vector.reduce.fadd.nxv6f16(half, <vscale x 6 x half>) 160declare half @llvm.vector.reduce.fadd.nxv10f16(half, <vscale x 10 x half>) 161declare half @llvm.vector.reduce.fadd.nxv12f16(half, <vscale x 12 x half>) 162declare float @llvm.vector.reduce.fadd.nxv2f32(float, <vscale x 2 x float>) 163declare float @llvm.vector.reduce.fadd.nxv4f32(float, <vscale x 4 x float>) 164declare float @llvm.vector.reduce.fadd.nxv8f32(float, <vscale x 8 x float>) 165declare double @llvm.vector.reduce.fadd.nxv2f64(double, <vscale x 2 x double>) 166