1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=aarch64 -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP16 3; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-FP16 4 5define ptr @mul_v16i8(ptr %p, ptr %ps, <16 x i8> %t) { 6; CHECK-LABEL: mul_v16i8: 7; CHECK: // %bb.0: 8; CHECK-NEXT: ld1r { v1.16b }, [x0], #1 9; CHECK-NEXT: mul v0.16b, v1.16b, v0.16b 10; CHECK-NEXT: str q0, [x1] 11; CHECK-NEXT: ret 12 %l = load i8, ptr %p 13 %i = insertelement <16 x i8> undef, i8 %l, i32 0 14 %s = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer 15 %m = mul <16 x i8> %s, %t 16 store <16 x i8> %m, ptr %ps 17 %g = getelementptr i8, ptr %p, i64 1 18 ret ptr %g 19} 20 21define ptr @mul_v8i16(ptr %p, ptr %ps, <8 x i16> %t) { 22; CHECK-LABEL: mul_v8i16: 23; CHECK: // %bb.0: 24; CHECK-NEXT: ld1r { v1.8h }, [x0], #2 25; CHECK-NEXT: mul v0.8h, v1.8h, v0.8h 26; CHECK-NEXT: str q0, [x1] 27; CHECK-NEXT: ret 28 %l = load i16, ptr %p 29 %i = insertelement <8 x i16> undef, i16 %l, i32 0 30 %s = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 31 %m = mul <8 x i16> %s, %t 32 store <8 x i16> %m, ptr %ps 33 %g = getelementptr i16, ptr %p, i64 1 34 ret ptr %g 35} 36 37define ptr @mul_v4i32(ptr %p, ptr %ps, <4 x i32> %t) { 38; CHECK-LABEL: mul_v4i32: 39; CHECK: // %bb.0: 40; CHECK-NEXT: ld1r { v1.4s }, [x0], #4 41; CHECK-NEXT: mul v0.4s, v1.4s, v0.4s 42; CHECK-NEXT: str q0, [x1] 43; CHECK-NEXT: ret 44 %l = load i32, ptr %p 45 %i = insertelement <4 x i32> undef, i32 %l, i32 0 46 %s = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 47 %m = mul <4 x i32> %s, %t 48 store <4 x i32> %m, ptr %ps 49 %g = getelementptr i32, ptr %p, i64 1 50 ret ptr %g 51} 52 53define ptr @fmul_v4f16(ptr %p, ptr %ps, <4 x half> %t) { 54; CHECK-NOFP16-LABEL: fmul_v4f16: 55; CHECK-NOFP16: // %bb.0: 56; CHECK-NOFP16-NEXT: ld1r { v1.4h }, [x0], #2 57; CHECK-NOFP16-NEXT: fcvtl v0.4s, v0.4h 58; CHECK-NOFP16-NEXT: fcvtl v1.4s, v1.4h 59; CHECK-NOFP16-NEXT: fmul v0.4s, v1.4s, v0.4s 60; CHECK-NOFP16-NEXT: fcvtn v0.4h, v0.4s 61; CHECK-NOFP16-NEXT: str d0, [x1] 62; CHECK-NOFP16-NEXT: ret 63; 64; CHECK-FP16-LABEL: fmul_v4f16: 65; CHECK-FP16: // %bb.0: 66; CHECK-FP16-NEXT: ldr h1, [x0], #2 67; CHECK-FP16-NEXT: fmul v0.4h, v0.4h, v1.h[0] 68; CHECK-FP16-NEXT: str d0, [x1] 69; CHECK-FP16-NEXT: ret 70 %l = load half, ptr %p 71 %i = insertelement <4 x half> undef, half %l, i32 0 72 %s = shufflevector <4 x half> %i, <4 x half> undef, <4 x i32> zeroinitializer 73 %m = fmul <4 x half> %s, %t 74 store <4 x half> %m, ptr %ps 75 %g = getelementptr half, ptr %p, i64 1 76 ret ptr %g 77} 78 79define ptr @fmla_v4f16(ptr %p, ptr %ps, <4 x half> %t, <4 x half> %u) { 80; CHECK-NOFP16-LABEL: fmla_v4f16: 81; CHECK-NOFP16: // %bb.0: 82; CHECK-NOFP16-NEXT: ld1r { v2.4h }, [x0], #2 83; CHECK-NOFP16-NEXT: fcvtl v0.4s, v0.4h 84; CHECK-NOFP16-NEXT: fcvtl v1.4s, v1.4h 85; CHECK-NOFP16-NEXT: fcvtl v2.4s, v2.4h 86; CHECK-NOFP16-NEXT: fmul v0.4s, v2.4s, v0.4s 87; CHECK-NOFP16-NEXT: fcvtn v0.4h, v0.4s 88; CHECK-NOFP16-NEXT: fcvtl v0.4s, v0.4h 89; CHECK-NOFP16-NEXT: fadd v0.4s, v0.4s, v1.4s 90; CHECK-NOFP16-NEXT: fcvtn v0.4h, v0.4s 91; CHECK-NOFP16-NEXT: str d0, [x1] 92; CHECK-NOFP16-NEXT: ret 93; 94; CHECK-FP16-LABEL: fmla_v4f16: 95; CHECK-FP16: // %bb.0: 96; CHECK-FP16-NEXT: ldr h2, [x0], #2 97; CHECK-FP16-NEXT: fmla v1.4h, v0.4h, v2.h[0] 98; CHECK-FP16-NEXT: str d1, [x1] 99; CHECK-FP16-NEXT: ret 100 %l = load half, ptr %p 101 %i = insertelement <4 x half> undef, half %l, i32 0 102 %s = shufflevector <4 x half> %i, <4 x half> undef, <4 x i32> zeroinitializer 103 %m = fmul fast <4 x half> %s, %t 104 %a = fadd fast <4 x half> %m, %u 105 store <4 x half> %a, ptr %ps 106 %g = getelementptr half, ptr %p, i64 1 107 ret ptr %g 108} 109 110define ptr @fmul_v4f32(ptr %p, ptr %ps, <4 x float> %t) { 111; CHECK-LABEL: fmul_v4f32: 112; CHECK: // %bb.0: 113; CHECK-NEXT: ldr s1, [x0], #4 114; CHECK-NEXT: fmul v0.4s, v0.4s, v1.s[0] 115; CHECK-NEXT: str q0, [x1] 116; CHECK-NEXT: ret 117 %l = load float, ptr %p 118 %i = insertelement <4 x float> undef, float %l, i32 0 119 %s = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer 120 %m = fmul <4 x float> %s, %t 121 store <4 x float> %m, ptr %ps 122 %g = getelementptr float, ptr %p, i64 1 123 ret ptr %g 124} 125 126define ptr @fmla_v4f32(ptr %p, ptr %ps, <4 x float> %t, <4 x float> %u) { 127; CHECK-LABEL: fmla_v4f32: 128; CHECK: // %bb.0: 129; CHECK-NEXT: ldr s2, [x0], #4 130; CHECK-NEXT: fmla v1.4s, v0.4s, v2.s[0] 131; CHECK-NEXT: str q1, [x1] 132; CHECK-NEXT: ret 133 %l = load float, ptr %p 134 %i = insertelement <4 x float> undef, float %l, i32 0 135 %s = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer 136 %m = fmul fast <4 x float> %s, %t 137 %a = fadd fast <4 x float> %m, %u 138 store <4 x float> %a, ptr %ps 139 %g = getelementptr float, ptr %p, i64 1 140 ret ptr %g 141} 142 143define ptr @fmul_v2f64(ptr %p, ptr %ps, <2 x double> %t) { 144; CHECK-LABEL: fmul_v2f64: 145; CHECK: // %bb.0: 146; CHECK-NEXT: ldr d1, [x0], #8 147; CHECK-NEXT: fmul v0.2d, v0.2d, v1.d[0] 148; CHECK-NEXT: str q0, [x1] 149; CHECK-NEXT: ret 150 %l = load double, ptr %p 151 %i = insertelement <2 x double> undef, double %l, i32 0 152 %s = shufflevector <2 x double> %i, <2 x double> undef, <2 x i32> zeroinitializer 153 %m = fmul <2 x double> %s, %t 154 store <2 x double> %m, ptr %ps 155 %g = getelementptr double, ptr %p, i64 1 156 ret ptr %g 157} 158 159define ptr @fmla_v2f64(ptr %p, ptr %ps, <2 x double> %t, <2 x double> %u) { 160; CHECK-LABEL: fmla_v2f64: 161; CHECK: // %bb.0: 162; CHECK-NEXT: ldr d2, [x0], #8 163; CHECK-NEXT: fmla v1.2d, v0.2d, v2.d[0] 164; CHECK-NEXT: str q1, [x1] 165; CHECK-NEXT: ret 166 %l = load double, ptr %p 167 %i = insertelement <2 x double> undef, double %l, i32 0 168 %s = shufflevector <2 x double> %i, <2 x double> undef, <2 x i32> zeroinitializer 169 %m = fmul fast <2 x double> %s, %t 170 %a = fadd fast <2 x double> %m, %u 171 store <2 x double> %a, ptr %ps 172 %g = getelementptr double, ptr %p, i64 1 173 ret ptr %g 174} 175