1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc --mtriple aarch64 -mattr=+fullfp16 < %s | FileCheck %s 3 4define float @faddp_2xfloat(<2 x float> %a) { 5; CHECK-LABEL: faddp_2xfloat: 6; CHECK: // %bb.0: // %entry 7; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 8; CHECK-NEXT: faddp s0, v0.2s 9; CHECK-NEXT: ret 10entry: 11 %shift = shufflevector <2 x float> %a, <2 x float> undef, <2 x i32> <i32 1, i32 undef> 12 %0 = fadd <2 x float> %a, %shift 13 %1 = extractelement <2 x float> %0, i32 0 14 ret float %1 15} 16 17define float @faddp_4xfloat(<4 x float> %a) { 18; CHECK-LABEL: faddp_4xfloat: 19; CHECK: // %bb.0: // %entry 20; CHECK-NEXT: faddp s0, v0.2s 21; CHECK-NEXT: ret 22entry: 23 %shift = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 24 %0 = fadd <4 x float> %a, %shift 25 %1 = extractelement <4 x float> %0, i32 0 26 ret float %1 27} 28 29define float @faddp_4xfloat_commute(<4 x float> %a) { 30; CHECK-LABEL: faddp_4xfloat_commute: 31; CHECK: // %bb.0: // %entry 32; CHECK-NEXT: faddp s0, v0.2s 33; CHECK-NEXT: ret 34entry: 35 %shift = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 36 %0 = fadd <4 x float> %shift, %a 37 %1 = extractelement <4 x float> %0, i32 0 38 ret float %1 39} 40 41define float @faddp_2xfloat_commute(<2 x float> %a) { 42; CHECK-LABEL: faddp_2xfloat_commute: 43; CHECK: // %bb.0: // %entry 44; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 45; CHECK-NEXT: faddp s0, v0.2s 46; CHECK-NEXT: ret 47entry: 48 %shift = shufflevector <2 x float> %a, <2 x float> undef, <2 x i32> <i32 1, i32 undef> 49 %0 = fadd <2 x float> %shift, %a 50 %1 = extractelement <2 x float> %0, i32 0 51 ret float %1 52} 53 54define double @faddp_2xdouble(<2 x double> %a) { 55; CHECK-LABEL: faddp_2xdouble: 56; CHECK: // %bb.0: // %entry 57; CHECK-NEXT: faddp d0, v0.2d 58; CHECK-NEXT: ret 59entry: 60 %shift = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 1, i32 undef> 61 %0 = fadd <2 x double> %a, %shift 62 %1 = extractelement <2 x double> %0, i32 0 63 ret double %1 64} 65 66define double @faddp_2xdouble_commute(<2 x double> %a) { 67; CHECK-LABEL: faddp_2xdouble_commute: 68; CHECK: // %bb.0: // %entry 69; CHECK-NEXT: faddp d0, v0.2d 70; CHECK-NEXT: ret 71entry: 72 %shift = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 1, i32 undef> 73 %0 = fadd <2 x double> %shift, %a 74 %1 = extractelement <2 x double> %0, i32 0 75 ret double %1 76} 77 78define i64 @addp_2xi64(<2 x i64> %a) { 79; CHECK-LABEL: addp_2xi64: 80; CHECK: // %bb.0: // %entry 81; CHECK-NEXT: addp d0, v0.2d 82; CHECK-NEXT: fmov x0, d0 83; CHECK-NEXT: ret 84entry: 85 %shift = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 1, i32 undef> 86 %0 = add <2 x i64> %a, %shift 87 %1 = extractelement <2 x i64> %0, i32 0 88 ret i64 %1 89} 90 91define i64 @addp_2xi64_commute(<2 x i64> %a) { 92; CHECK-LABEL: addp_2xi64_commute: 93; CHECK: // %bb.0: // %entry 94; CHECK-NEXT: addp d0, v0.2d 95; CHECK-NEXT: fmov x0, d0 96; CHECK-NEXT: ret 97entry: 98 %shift = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 1, i32 undef> 99 %0 = add <2 x i64> %shift, %a 100 %1 = extractelement <2 x i64> %0, i32 0 101 ret i64 %1 102} 103 104define float @faddp_2xfloat_strict(<2 x float> %a) #0 { 105; CHECK-LABEL: faddp_2xfloat_strict: 106; CHECK: // %bb.0: // %entry 107; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 108; CHECK-NEXT: faddp s0, v0.2s 109; CHECK-NEXT: ret 110entry: 111 %shift = shufflevector <2 x float> %a, <2 x float> undef, <2 x i32> <i32 1, i32 undef> 112 %0 = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %a, <2 x float> %shift, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 113 %1 = extractelement <2 x float> %0, i32 0 114 ret float %1 115} 116 117define float @faddp_4xfloat_strict(<4 x float> %a) #0 { 118; CHECK-LABEL: faddp_4xfloat_strict: 119; CHECK: // %bb.0: // %entry 120; CHECK-NEXT: faddp s0, v0.2s 121; CHECK-NEXT: ret 122entry: 123 %shift = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 124 %0 = call <4 x float> @llvm.experimental.constrained.fadd.v4f32(<4 x float> %a, <4 x float> %shift, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 125 %1 = extractelement <4 x float> %0, i32 0 126 ret float %1 127} 128 129define float @faddp_4xfloat_commute_strict(<4 x float> %a) #0 { 130; CHECK-LABEL: faddp_4xfloat_commute_strict: 131; CHECK: // %bb.0: // %entry 132; CHECK-NEXT: faddp s0, v0.2s 133; CHECK-NEXT: ret 134entry: 135 %shift = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 136 %0 = call <4 x float> @llvm.experimental.constrained.fadd.v4f32(<4 x float> %shift, <4 x float> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 137 %1 = extractelement <4 x float> %0, i32 0 138 ret float %1 139} 140 141define float @faddp_2xfloat_commute_strict(<2 x float> %a) #0 { 142; CHECK-LABEL: faddp_2xfloat_commute_strict: 143; CHECK: // %bb.0: // %entry 144; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 145; CHECK-NEXT: faddp s0, v0.2s 146; CHECK-NEXT: ret 147entry: 148 %shift = shufflevector <2 x float> %a, <2 x float> undef, <2 x i32> <i32 1, i32 undef> 149 %0 = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %shift, <2 x float> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 150 %1 = extractelement <2 x float> %0, i32 0 151 ret float %1 152} 153 154define double @faddp_2xdouble_strict(<2 x double> %a) #0 { 155; CHECK-LABEL: faddp_2xdouble_strict: 156; CHECK: // %bb.0: // %entry 157; CHECK-NEXT: faddp d0, v0.2d 158; CHECK-NEXT: ret 159entry: 160 %shift = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 1, i32 undef> 161 %0 = call <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double> %a, <2 x double> %shift, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 162 %1 = extractelement <2 x double> %0, i32 0 163 ret double %1 164} 165 166define double @faddp_2xdouble_commute_strict(<2 x double> %a) #0 { 167; CHECK-LABEL: faddp_2xdouble_commute_strict: 168; CHECK: // %bb.0: // %entry 169; CHECK-NEXT: faddp d0, v0.2d 170; CHECK-NEXT: ret 171entry: 172 %shift = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 1, i32 undef> 173 %0 = call <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double> %shift, <2 x double> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 174 %1 = extractelement <2 x double> %0, i32 0 175 ret double %1 176} 177 178 179define <2 x double> @addp_v2f64(<2 x double> %a) { 180; CHECK-LABEL: addp_v2f64: 181; CHECK: // %bb.0: // %entry 182; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 183; CHECK-NEXT: fadd v0.2d, v1.2d, v0.2d 184; CHECK-NEXT: ret 185entry: 186 %s = shufflevector <2 x double> %a, <2 x double> poison, <2 x i32> <i32 1, i32 0> 187 %b = fadd reassoc <2 x double> %s, %a 188 ret <2 x double> %b 189} 190 191define <4 x double> @addp_v4f64(<4 x double> %a) { 192; CHECK-LABEL: addp_v4f64: 193; CHECK: // %bb.0: // %entry 194; CHECK-NEXT: faddp v1.2d, v0.2d, v1.2d 195; CHECK-NEXT: dup v0.2d, v1.d[0] 196; CHECK-NEXT: dup v1.2d, v1.d[1] 197; CHECK-NEXT: ret 198entry: 199 %s = shufflevector <4 x double> %a, <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2> 200 %b = fadd reassoc <4 x double> %s, %a 201 ret <4 x double> %b 202} 203 204define <4 x float> @addp_v4f32(<4 x float> %a) { 205; CHECK-LABEL: addp_v4f32: 206; CHECK: // %bb.0: // %entry 207; CHECK-NEXT: rev64 v1.4s, v0.4s 208; CHECK-NEXT: fadd v0.4s, v1.4s, v0.4s 209; CHECK-NEXT: ret 210entry: 211 %s = shufflevector <4 x float> %a, <4 x float> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2> 212 %b = fadd reassoc <4 x float> %s, %a 213 ret <4 x float> %b 214} 215 216define <8 x float> @addp_v8f32(<8 x float> %a) { 217; CHECK-LABEL: addp_v8f32: 218; CHECK: // %bb.0: // %entry 219; CHECK-NEXT: rev64 v2.4s, v1.4s 220; CHECK-NEXT: rev64 v3.4s, v0.4s 221; CHECK-NEXT: fadd v0.4s, v3.4s, v0.4s 222; CHECK-NEXT: fadd v1.4s, v2.4s, v1.4s 223; CHECK-NEXT: ret 224entry: 225 %s = shufflevector <8 x float> %a, <8 x float> poison, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> 226 %b = fadd <8 x float> %s, %a 227 ret <8 x float> %b 228} 229 230define <8 x float> @addp_v8f32_slow(<8 x float> %a) { 231; CHECK-LABEL: addp_v8f32_slow: 232; CHECK: // %bb.0: // %entry 233; CHECK-NEXT: faddp v1.4s, v0.4s, v1.4s 234; CHECK-NEXT: zip1 v0.4s, v1.4s, v1.4s 235; CHECK-NEXT: zip2 v1.4s, v1.4s, v1.4s 236; CHECK-NEXT: ret 237entry: 238 %s = shufflevector <8 x float> %a, <8 x float> poison, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> 239 %b = fadd reassoc <8 x float> %s, %a 240 ret <8 x float> %b 241} 242 243define <16 x float> @addp_v16f32(<16 x float> %a) { 244; CHECK-LABEL: addp_v16f32: 245; CHECK: // %bb.0: // %entry 246; CHECK-NEXT: faddp v3.4s, v2.4s, v3.4s 247; CHECK-NEXT: faddp v1.4s, v0.4s, v1.4s 248; CHECK-NEXT: zip1 v2.4s, v3.4s, v3.4s 249; CHECK-NEXT: zip1 v0.4s, v1.4s, v1.4s 250; CHECK-NEXT: zip2 v1.4s, v1.4s, v1.4s 251; CHECK-NEXT: zip2 v3.4s, v3.4s, v3.4s 252; CHECK-NEXT: ret 253entry: 254 %s = shufflevector <16 x float> %a, <16 x float> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14> 255 %b = fadd reassoc <16 x float> %s, %a 256 ret <16 x float> %b 257} 258 259define float @faddp_v4f32(<4 x float> %a, <4 x float> %b) { 260; CHECK-LABEL: faddp_v4f32: 261; CHECK: // %bb.0: 262; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s 263; CHECK-NEXT: faddp v0.4s, v0.4s, v0.4s 264; CHECK-NEXT: faddp s0, v0.2s 265; CHECK-NEXT: ret 266 %1 = fadd <4 x float> %a, %b 267 %2 = shufflevector <4 x float> %1, <4 x float> poison, <2 x i32> <i32 0, i32 1> 268 %3 = shufflevector <4 x float> %1, <4 x float> poison, <2 x i32> <i32 2, i32 3> 269 %4 = tail call <2 x float> @llvm.aarch64.neon.faddp.v2f32(<2 x float> %2, <2 x float> %3) 270 %5 = shufflevector <2 x float> %4, <2 x float> poison, <2 x i32> <i32 1, i32 poison> 271 %6 = fadd <2 x float> %4, %5 272 %7 = extractelement <2 x float> %6, i64 0 273 ret float %7 274} 275 276define <4 x half> @faddp_v8f16(<8 x half> %a, <8 x half> %b) { 277; CHECK-LABEL: faddp_v8f16: 278; CHECK: // %bb.0: 279; CHECK-NEXT: fadd v0.8h, v0.8h, v1.8h 280; CHECK-NEXT: faddp v0.8h, v0.8h, v0.8h 281; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 282; CHECK-NEXT: ret 283 %1 = fadd <8 x half> %a, %b 284 %2 = shufflevector <8 x half> %1, <8 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 285 %3 = shufflevector <8 x half> %1, <8 x half> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 286 %4 = tail call <4 x half> @llvm.aarch64.neon.faddp.v4f16(<4 x half> %2, <4 x half> %3) 287 ret <4 x half> %4 288} 289 290declare <2 x float> @llvm.aarch64.neon.faddp.v2f32(<2 x float>, <2 x float>) 291declare <4 x half> @llvm.aarch64.neon.faddp.v4f16(<4 x half>, <4 x half>) 292 293attributes #0 = { strictfp } 294 295declare <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float>, <2 x float>, metadata, metadata) 296declare <4 x float> @llvm.experimental.constrained.fadd.v4f32(<4 x float>, <4 x float>, metadata, metadata) 297declare <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double>, <2 x double>, metadata, metadata) 298