1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 2; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -verify-machineinstrs -o - %s | FileCheck %s 3 4 5define <8 x i16> @concat_add(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16> %d) { 6; CHECK-LABEL: concat_add: 7; CHECK: // %bb.0: 8; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 9; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 10; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3 11; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 12; CHECK-NEXT: mov v1.d[1], v3.d[0] 13; CHECK-NEXT: mov v0.d[1], v2.d[0] 14; CHECK-NEXT: add v0.8h, v0.8h, v1.8h 15; CHECK-NEXT: ret 16 %x = add <4 x i16> %a, %b 17 %y = add <4 x i16> %c, %d 18 %z = shufflevector <4 x i16> %x, <4 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 19 ret <8 x i16> %z 20} 21 22define <8 x i16> @concat_addtunc(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) { 23; CHECK-LABEL: concat_addtunc: 24; CHECK: // %bb.0: 25; CHECK-NEXT: add v2.4s, v2.4s, v3.4s 26; CHECK-NEXT: add v0.4s, v0.4s, v1.4s 27; CHECK-NEXT: uzp1 v0.8h, v0.8h, v2.8h 28; CHECK-NEXT: ret 29 %x = add <4 x i32> %a, %b 30 %y = add <4 x i32> %c, %d 31 %xt = trunc <4 x i32> %x to <4 x i16> 32 %yt = trunc <4 x i32> %y to <4 x i16> 33 %z = shufflevector <4 x i16> %xt, <4 x i16> %yt, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 34 ret <8 x i16> %z 35} 36 37define <8 x i16> @concat_addtunc2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) { 38; CHECK-LABEL: concat_addtunc2: 39; CHECK: // %bb.0: 40; CHECK-NEXT: uzp1 v1.8h, v1.8h, v3.8h 41; CHECK-NEXT: uzp1 v0.8h, v0.8h, v2.8h 42; CHECK-NEXT: add v0.8h, v0.8h, v1.8h 43; CHECK-NEXT: ret 44 %at = trunc <4 x i32> %a to <4 x i16> 45 %bt = trunc <4 x i32> %b to <4 x i16> 46 %ct = trunc <4 x i32> %c to <4 x i16> 47 %dt = trunc <4 x i32> %d to <4 x i16> 48 %x = add <4 x i16> %at, %bt 49 %y = add <4 x i16> %ct, %dt 50 %z = shufflevector <4 x i16> %x, <4 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 51 ret <8 x i16> %z 52} 53 54define <8 x i16> @concat_sub(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16> %d) { 55; CHECK-LABEL: concat_sub: 56; CHECK: // %bb.0: 57; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 58; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 59; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3 60; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 61; CHECK-NEXT: mov v1.d[1], v3.d[0] 62; CHECK-NEXT: mov v0.d[1], v2.d[0] 63; CHECK-NEXT: sub v0.8h, v0.8h, v1.8h 64; CHECK-NEXT: ret 65 %x = sub <4 x i16> %a, %b 66 %y = sub <4 x i16> %c, %d 67 %z = shufflevector <4 x i16> %x, <4 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 68 ret <8 x i16> %z 69} 70 71define <8 x i16> @concat_mul(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16> %d) { 72; CHECK-LABEL: concat_mul: 73; CHECK: // %bb.0: 74; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 75; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 76; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3 77; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 78; CHECK-NEXT: mov v1.d[1], v3.d[0] 79; CHECK-NEXT: mov v0.d[1], v2.d[0] 80; CHECK-NEXT: mul v0.8h, v0.8h, v1.8h 81; CHECK-NEXT: ret 82 %x = mul <4 x i16> %a, %b 83 %y = mul <4 x i16> %c, %d 84 %z = shufflevector <4 x i16> %x, <4 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 85 ret <8 x i16> %z 86} 87 88define <8 x i16> @concat_xor(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16> %d) { 89; CHECK-LABEL: concat_xor: 90; CHECK: // %bb.0: 91; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 92; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 93; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3 94; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 95; CHECK-NEXT: mov v1.d[1], v3.d[0] 96; CHECK-NEXT: mov v0.d[1], v2.d[0] 97; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b 98; CHECK-NEXT: ret 99 %x = xor <4 x i16> %a, %b 100 %y = xor <4 x i16> %c, %d 101 %z = shufflevector <4 x i16> %x, <4 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 102 ret <8 x i16> %z 103} 104 105define <8 x half> @concat_fadd(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x half> %d) { 106; CHECK-LABEL: concat_fadd: 107; CHECK: // %bb.0: 108; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 109; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 110; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3 111; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 112; CHECK-NEXT: mov v1.d[1], v3.d[0] 113; CHECK-NEXT: mov v0.d[1], v2.d[0] 114; CHECK-NEXT: fadd v0.8h, v0.8h, v1.8h 115; CHECK-NEXT: ret 116 %x = fadd <4 x half> %a, %b 117 %y = fadd <4 x half> %c, %d 118 %z = shufflevector <4 x half> %x, <4 x half> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 119 ret <8 x half> %z 120} 121 122define <8 x half> @concat_fmul(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x half> %d) { 123; CHECK-LABEL: concat_fmul: 124; CHECK: // %bb.0: 125; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 126; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 127; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3 128; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 129; CHECK-NEXT: mov v1.d[1], v3.d[0] 130; CHECK-NEXT: mov v0.d[1], v2.d[0] 131; CHECK-NEXT: fmul v0.8h, v0.8h, v1.8h 132; CHECK-NEXT: ret 133 %x = fmul <4 x half> %a, %b 134 %y = fmul <4 x half> %c, %d 135 %z = shufflevector <4 x half> %x, <4 x half> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 136 ret <8 x half> %z 137} 138 139define <8 x half> @concat_min(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x half> %d) { 140; CHECK-LABEL: concat_min: 141; CHECK: // %bb.0: 142; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 143; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 144; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3 145; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 146; CHECK-NEXT: mov v1.d[1], v3.d[0] 147; CHECK-NEXT: mov v0.d[1], v2.d[0] 148; CHECK-NEXT: fminnm v0.8h, v0.8h, v1.8h 149; CHECK-NEXT: ret 150 %x = call <4 x half> @llvm.minnum.v4f16(<4 x half> %a, <4 x half> %b) 151 %y = call <4 x half> @llvm.minnum.v4f16(<4 x half> %c, <4 x half> %d) 152 %z = shufflevector <4 x half> %x, <4 x half> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 153 ret <8 x half> %z 154} 155 156define <8 x half> @concat_minmax(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x half> %d) { 157; CHECK-LABEL: concat_minmax: 158; CHECK: // %bb.0: 159; CHECK-NEXT: fmaxnm v2.4h, v2.4h, v3.4h 160; CHECK-NEXT: fminnm v0.4h, v0.4h, v1.4h 161; CHECK-NEXT: mov v0.d[1], v2.d[0] 162; CHECK-NEXT: ret 163 %x = call <4 x half> @llvm.minnum.v4f16(<4 x half> %a, <4 x half> %b) 164 %y = call <4 x half> @llvm.maxnum.v4f16(<4 x half> %c, <4 x half> %d) 165 %z = shufflevector <4 x half> %x, <4 x half> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 166 ret <8 x half> %z 167} 168 169define <16 x i8> @signOf_neon(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) { 170; CHECK-LABEL: signOf_neon: 171; CHECK: // %bb.0: // %entry 172; CHECK-NEXT: ldp q1, q2, [x0] 173; CHECK-NEXT: movi v0.16b, #1 174; CHECK-NEXT: ldp q3, q4, [x1] 175; CHECK-NEXT: cmhi v5.8h, v1.8h, v3.8h 176; CHECK-NEXT: cmhi v6.8h, v2.8h, v4.8h 177; CHECK-NEXT: cmhi v1.8h, v3.8h, v1.8h 178; CHECK-NEXT: cmhi v2.8h, v4.8h, v2.8h 179; CHECK-NEXT: uzp1 v3.16b, v5.16b, v6.16b 180; CHECK-NEXT: uzp1 v1.16b, v1.16b, v2.16b 181; CHECK-NEXT: and v0.16b, v3.16b, v0.16b 182; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b 183; CHECK-NEXT: ret 184entry: 185 %0 = load <8 x i16>, ptr %a, align 2 186 %add.ptr = getelementptr inbounds i8, ptr %a, i64 16 187 %1 = load <8 x i16>, ptr %add.ptr, align 2 188 %2 = load <8 x i16>, ptr %b, align 2 189 %add.ptr6 = getelementptr inbounds i8, ptr %b, i64 16 190 %3 = load <8 x i16>, ptr %add.ptr6, align 2 191 %cmp.i33 = icmp ugt <8 x i16> %0, %2 192 %cmp.i31 = icmp ugt <8 x i16> %1, %3 193 %cmp.i29 = icmp ugt <8 x i16> %2, %0 194 %cmp.i = icmp ugt <8 x i16> %3, %1 195 %vmovn.i38.neg = zext <8 x i1> %cmp.i33 to <8 x i8> 196 %vmovn.i37.neg = zext <8 x i1> %cmp.i31 to <8 x i8> 197 %4 = select <8 x i1> %cmp.i29, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <8 x i8> %vmovn.i38.neg 198 %5 = select <8 x i1> %cmp.i, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <8 x i8> %vmovn.i37.neg 199 %or.i = shufflevector <8 x i8> %4, <8 x i8> %5, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 200 ret <16 x i8> %or.i 201} 202