1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc --mtriple=aarch64 -mattr=+fullfp16 < %s | FileCheck %s 3; RUN: llc --mtriple=aarch64 < %s | FileCheck %s --check-prefix=CHECKNOFP16 4 5define half @faddp_2xhalf(<2 x half> %a) { 6; CHECK-LABEL: faddp_2xhalf: 7; CHECK: // %bb.0: // %entry 8; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 9; CHECK-NEXT: faddp h0, v0.2h 10; CHECK-NEXT: ret 11; 12; CHECKNOFP16-LABEL: faddp_2xhalf: 13; CHECKNOFP16: // %bb.0: // %entry 14; CHECKNOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 15; CHECKNOFP16-NEXT: dup v1.4h, v0.h[1] 16; CHECKNOFP16-NEXT: fcvtl v0.4s, v0.4h 17; CHECKNOFP16-NEXT: fcvtl v1.4s, v1.4h 18; CHECKNOFP16-NEXT: fadd v0.4s, v0.4s, v1.4s 19; CHECKNOFP16-NEXT: fcvtn v0.4h, v0.4s 20; CHECKNOFP16-NEXT: // kill: def $h0 killed $h0 killed $q0 21; CHECKNOFP16-NEXT: ret 22entry: 23 %shift = shufflevector <2 x half> %a, <2 x half> undef, <2 x i32> <i32 1, i32 undef> 24 %0 = fadd <2 x half> %a, %shift 25 %1 = extractelement <2 x half> %0, i32 0 26 ret half %1 27} 28 29define half @faddp_2xhalf_commute(<2 x half> %a) { 30; CHECK-LABEL: faddp_2xhalf_commute: 31; CHECK: // %bb.0: // %entry 32; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 33; CHECK-NEXT: faddp h0, v0.2h 34; CHECK-NEXT: ret 35; 36; CHECKNOFP16-LABEL: faddp_2xhalf_commute: 37; CHECKNOFP16: // %bb.0: // %entry 38; CHECKNOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 39; CHECKNOFP16-NEXT: dup v1.4h, v0.h[1] 40; CHECKNOFP16-NEXT: fcvtl v0.4s, v0.4h 41; CHECKNOFP16-NEXT: fcvtl v1.4s, v1.4h 42; CHECKNOFP16-NEXT: fadd v0.4s, v1.4s, v0.4s 43; CHECKNOFP16-NEXT: fcvtn v0.4h, v0.4s 44; CHECKNOFP16-NEXT: // kill: def $h0 killed $h0 killed $q0 45; CHECKNOFP16-NEXT: ret 46entry: 47 %shift = shufflevector <2 x half> %a, <2 x half> undef, <2 x i32> <i32 1, i32 undef> 48 %0 = fadd <2 x half> %shift, %a 49 %1 = extractelement <2 x half> %0, i32 0 50 ret half %1 51} 52 53define half @faddp_4xhalf(<4 x half> %a) { 54; CHECK-LABEL: faddp_4xhalf: 55; CHECK: // %bb.0: // %entry 56; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 57; CHECK-NEXT: faddp h0, v0.2h 58; CHECK-NEXT: ret 59; 60; CHECKNOFP16-LABEL: faddp_4xhalf: 61; CHECKNOFP16: // %bb.0: // %entry 62; CHECKNOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 63; CHECKNOFP16-NEXT: dup v1.4h, v0.h[1] 64; CHECKNOFP16-NEXT: fcvtl v0.4s, v0.4h 65; CHECKNOFP16-NEXT: fcvtl v1.4s, v1.4h 66; CHECKNOFP16-NEXT: fadd v0.4s, v0.4s, v1.4s 67; CHECKNOFP16-NEXT: fcvtn v0.4h, v0.4s 68; CHECKNOFP16-NEXT: // kill: def $h0 killed $h0 killed $q0 69; CHECKNOFP16-NEXT: ret 70entry: 71 %shift = shufflevector <4 x half> %a, <4 x half> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 72 %0 = fadd <4 x half> %a, %shift 73 %1 = extractelement <4 x half> %0, i32 0 74 ret half %1 75} 76 77define half @faddp_4xhalf_commute(<4 x half> %a) { 78; CHECK-LABEL: faddp_4xhalf_commute: 79; CHECK: // %bb.0: // %entry 80; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 81; CHECK-NEXT: faddp h0, v0.2h 82; CHECK-NEXT: ret 83; 84; CHECKNOFP16-LABEL: faddp_4xhalf_commute: 85; CHECKNOFP16: // %bb.0: // %entry 86; CHECKNOFP16-NEXT: // kill: def $d0 killed $d0 def $q0 87; CHECKNOFP16-NEXT: dup v1.4h, v0.h[1] 88; CHECKNOFP16-NEXT: fcvtl v0.4s, v0.4h 89; CHECKNOFP16-NEXT: fcvtl v1.4s, v1.4h 90; CHECKNOFP16-NEXT: fadd v0.4s, v1.4s, v0.4s 91; CHECKNOFP16-NEXT: fcvtn v0.4h, v0.4s 92; CHECKNOFP16-NEXT: // kill: def $h0 killed $h0 killed $q0 93; CHECKNOFP16-NEXT: ret 94entry: 95 %shift = shufflevector <4 x half> %a, <4 x half> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 96 %0 = fadd <4 x half> %shift, %a 97 %1 = extractelement <4 x half> %0, i32 0 98 ret half %1 99} 100 101define half @faddp_8xhalf(<8 x half> %a) { 102; CHECK-LABEL: faddp_8xhalf: 103; CHECK: // %bb.0: // %entry 104; CHECK-NEXT: faddp h0, v0.2h 105; CHECK-NEXT: ret 106; 107; CHECKNOFP16-LABEL: faddp_8xhalf: 108; CHECKNOFP16: // %bb.0: // %entry 109; CHECKNOFP16-NEXT: dup v1.8h, v0.h[1] 110; CHECKNOFP16-NEXT: fcvtl v2.4s, v0.4h 111; CHECKNOFP16-NEXT: fcvtl2 v0.4s, v0.8h 112; CHECKNOFP16-NEXT: fcvtl v3.4s, v1.4h 113; CHECKNOFP16-NEXT: fcvtl2 v1.4s, v1.8h 114; CHECKNOFP16-NEXT: fadd v2.4s, v2.4s, v3.4s 115; CHECKNOFP16-NEXT: fadd v1.4s, v0.4s, v1.4s 116; CHECKNOFP16-NEXT: fcvtn v0.4h, v2.4s 117; CHECKNOFP16-NEXT: fcvtn2 v0.8h, v1.4s 118; CHECKNOFP16-NEXT: // kill: def $h0 killed $h0 killed $q0 119; CHECKNOFP16-NEXT: ret 120entry: 121 %shift = shufflevector <8 x half> %a, <8 x half> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 122 %0 = fadd <8 x half> %a, %shift 123 %1 = extractelement <8 x half> %0, i32 0 124 ret half %1 125} 126 127define half @faddp_8xhalf_commute(<8 x half> %a) { 128; CHECK-LABEL: faddp_8xhalf_commute: 129; CHECK: // %bb.0: // %entry 130; CHECK-NEXT: faddp h0, v0.2h 131; CHECK-NEXT: ret 132; 133; CHECKNOFP16-LABEL: faddp_8xhalf_commute: 134; CHECKNOFP16: // %bb.0: // %entry 135; CHECKNOFP16-NEXT: dup v1.8h, v0.h[1] 136; CHECKNOFP16-NEXT: fcvtl v2.4s, v0.4h 137; CHECKNOFP16-NEXT: fcvtl2 v0.4s, v0.8h 138; CHECKNOFP16-NEXT: fcvtl v3.4s, v1.4h 139; CHECKNOFP16-NEXT: fcvtl2 v1.4s, v1.8h 140; CHECKNOFP16-NEXT: fadd v2.4s, v3.4s, v2.4s 141; CHECKNOFP16-NEXT: fadd v1.4s, v1.4s, v0.4s 142; CHECKNOFP16-NEXT: fcvtn v0.4h, v2.4s 143; CHECKNOFP16-NEXT: fcvtn2 v0.8h, v1.4s 144; CHECKNOFP16-NEXT: // kill: def $h0 killed $h0 killed $q0 145; CHECKNOFP16-NEXT: ret 146entry: 147 %shift = shufflevector <8 x half> %a, <8 x half> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 148 %0 = fadd <8 x half> %shift, %a 149 %1 = extractelement <8 x half> %0, i32 0 150 ret half %1 151} 152 153define <8 x half> @addp_v8f16(<8 x half> %a) { 154; CHECK-LABEL: addp_v8f16: 155; CHECK: // %bb.0: // %entry 156; CHECK-NEXT: rev32 v1.8h, v0.8h 157; CHECK-NEXT: fadd v0.8h, v1.8h, v0.8h 158; CHECK-NEXT: ret 159; 160; CHECKNOFP16-LABEL: addp_v8f16: 161; CHECKNOFP16: // %bb.0: // %entry 162; CHECKNOFP16-NEXT: rev32 v1.8h, v0.8h 163; CHECKNOFP16-NEXT: fcvtl v2.4s, v0.4h 164; CHECKNOFP16-NEXT: fcvtl2 v0.4s, v0.8h 165; CHECKNOFP16-NEXT: fcvtl v3.4s, v1.4h 166; CHECKNOFP16-NEXT: fcvtl2 v1.4s, v1.8h 167; CHECKNOFP16-NEXT: fadd v2.4s, v3.4s, v2.4s 168; CHECKNOFP16-NEXT: fadd v1.4s, v1.4s, v0.4s 169; CHECKNOFP16-NEXT: fcvtn v0.4h, v2.4s 170; CHECKNOFP16-NEXT: fcvtn2 v0.8h, v1.4s 171; CHECKNOFP16-NEXT: ret 172entry: 173 %s = shufflevector <8 x half> %a, <8 x half> poison, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> 174 %b = fadd reassoc <8 x half> %s, %a 175 ret <8 x half> %b 176} 177 178define <16 x half> @addp_v16f16(<16 x half> %a) { 179; CHECK-LABEL: addp_v16f16: 180; CHECK: // %bb.0: // %entry 181; CHECK-NEXT: faddp v1.8h, v0.8h, v1.8h 182; CHECK-NEXT: zip1 v0.8h, v1.8h, v1.8h 183; CHECK-NEXT: zip2 v1.8h, v1.8h, v1.8h 184; CHECK-NEXT: ret 185; 186; CHECKNOFP16-LABEL: addp_v16f16: 187; CHECKNOFP16: // %bb.0: // %entry 188; CHECKNOFP16-NEXT: rev32 v2.8h, v0.8h 189; CHECKNOFP16-NEXT: rev32 v3.8h, v1.8h 190; CHECKNOFP16-NEXT: fcvtl v4.4s, v0.4h 191; CHECKNOFP16-NEXT: fcvtl v6.4s, v1.4h 192; CHECKNOFP16-NEXT: fcvtl2 v0.4s, v0.8h 193; CHECKNOFP16-NEXT: fcvtl2 v1.4s, v1.8h 194; CHECKNOFP16-NEXT: fcvtl v5.4s, v2.4h 195; CHECKNOFP16-NEXT: fcvtl v7.4s, v3.4h 196; CHECKNOFP16-NEXT: fcvtl2 v2.4s, v2.8h 197; CHECKNOFP16-NEXT: fcvtl2 v3.4s, v3.8h 198; CHECKNOFP16-NEXT: fadd v4.4s, v5.4s, v4.4s 199; CHECKNOFP16-NEXT: fadd v5.4s, v7.4s, v6.4s 200; CHECKNOFP16-NEXT: fadd v2.4s, v2.4s, v0.4s 201; CHECKNOFP16-NEXT: fadd v3.4s, v3.4s, v1.4s 202; CHECKNOFP16-NEXT: fcvtn v0.4h, v4.4s 203; CHECKNOFP16-NEXT: fcvtn v1.4h, v5.4s 204; CHECKNOFP16-NEXT: fcvtn2 v0.8h, v2.4s 205; CHECKNOFP16-NEXT: fcvtn2 v1.8h, v3.4s 206; CHECKNOFP16-NEXT: ret 207entry: 208 %s = shufflevector <16 x half> %a, <16 x half> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14> 209 %b = fadd reassoc <16 x half> %s, %a 210 ret <16 x half> %b 211} 212