1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s 3 4; 5; Unpredicated dup instruction (which is an alias for mov): 6; * register + register, 7; * register + immediate 8; 9 10define <vscale x 16 x i8> @dup_i8(i8 %b) { 11; CHECK-LABEL: dup_i8: 12; CHECK: // %bb.0: 13; CHECK-NEXT: mov z0.b, w0 14; CHECK-NEXT: ret 15 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8(i8 %b) 16 ret <vscale x 16 x i8> %out 17} 18 19define <vscale x 16 x i8> @dup_imm_i8() { 20; CHECK-LABEL: dup_imm_i8: 21; CHECK: // %bb.0: 22; CHECK-NEXT: mov z0.b, #16 // =0x10 23; CHECK-NEXT: ret 24 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8(i8 16) 25 ret <vscale x 16 x i8> %out 26} 27 28define <vscale x 8 x i16> @dup_i16(i16 %b) { 29; CHECK-LABEL: dup_i16: 30; CHECK: // %bb.0: 31; CHECK-NEXT: mov z0.h, w0 32; CHECK-NEXT: ret 33 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 %b) 34 ret <vscale x 8 x i16> %out 35} 36 37define <vscale x 8 x i16> @dup_imm_i16(i16 %b) { 38; CHECK-LABEL: dup_imm_i16: 39; CHECK: // %bb.0: 40; CHECK-NEXT: mov z0.h, #16 // =0x10 41; CHECK-NEXT: ret 42 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16 16) 43 ret <vscale x 8 x i16> %out 44} 45 46define <vscale x 4 x i32> @dup_i32(i32 %b) { 47; CHECK-LABEL: dup_i32: 48; CHECK: // %bb.0: 49; CHECK-NEXT: mov z0.s, w0 50; CHECK-NEXT: ret 51 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 %b) 52 ret <vscale x 4 x i32> %out 53} 54 55define <vscale x 4 x i32> @dup_imm_i32(i32 %b) { 56; CHECK-LABEL: dup_imm_i32: 57; CHECK: // %bb.0: 58; CHECK-NEXT: mov z0.s, #16 // =0x10 59; CHECK-NEXT: ret 60 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32 16) 61 ret <vscale x 4 x i32> %out 62} 63 64define <vscale x 2 x i64> @dup_i64(i64 %b) { 65; CHECK-LABEL: dup_i64: 66; CHECK: // %bb.0: 67; CHECK-NEXT: mov z0.d, x0 68; CHECK-NEXT: ret 69 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64 %b) 70 ret <vscale x 2 x i64> %out 71} 72 73define <vscale x 2 x i64> @dup_imm_i64(i64 %b) { 74; CHECK-LABEL: dup_imm_i64: 75; CHECK: // %bb.0: 76; CHECK-NEXT: mov z0.d, #16 // =0x10 77; CHECK-NEXT: ret 78 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64 16) 79 ret <vscale x 2 x i64> %out 80} 81 82define <vscale x 8 x half> @dup_f16(half %b) { 83; CHECK-LABEL: dup_f16: 84; CHECK: // %bb.0: 85; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 86; CHECK-NEXT: mov z0.h, h0 87; CHECK-NEXT: ret 88 %out = call <vscale x 8 x half> @llvm.aarch64.sve.dup.x.nxv8f16(half %b) 89 ret <vscale x 8 x half> %out 90} 91 92define <vscale x 8 x bfloat> @dup_bf16(bfloat %b) #0 { 93; CHECK-LABEL: dup_bf16: 94; CHECK: // %bb.0: 95; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 96; CHECK-NEXT: mov z0.h, h0 97; CHECK-NEXT: ret 98 %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.x.nxv8bf16(bfloat %b) 99 ret <vscale x 8 x bfloat> %out 100} 101 102define <vscale x 8 x half> @dup_imm_f16(half %b) { 103; CHECK-LABEL: dup_imm_f16: 104; CHECK: // %bb.0: 105; CHECK-NEXT: fmov z0.h, #16.00000000 106; CHECK-NEXT: ret 107 %out = call <vscale x 8 x half> @llvm.aarch64.sve.dup.x.nxv8f16(half 16.) 108 ret <vscale x 8 x half> %out 109} 110 111define <vscale x 4 x float> @dup_f32(float %b) { 112; CHECK-LABEL: dup_f32: 113; CHECK: // %bb.0: 114; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0 115; CHECK-NEXT: mov z0.s, s0 116; CHECK-NEXT: ret 117 %out = call <vscale x 4 x float> @llvm.aarch64.sve.dup.x.nxv4f32(float %b) 118 ret <vscale x 4 x float> %out 119} 120 121define <vscale x 4 x float> @dup_imm_f32(float %b) { 122; CHECK-LABEL: dup_imm_f32: 123; CHECK: // %bb.0: 124; CHECK-NEXT: fmov z0.s, #16.00000000 125; CHECK-NEXT: ret 126 %out = call <vscale x 4 x float> @llvm.aarch64.sve.dup.x.nxv4f32(float 16.) 127 ret <vscale x 4 x float> %out 128} 129 130define <vscale x 2 x double> @dup_f64(double %b) { 131; CHECK-LABEL: dup_f64: 132; CHECK: // %bb.0: 133; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 134; CHECK-NEXT: mov z0.d, d0 135; CHECK-NEXT: ret 136 %out = call <vscale x 2 x double> @llvm.aarch64.sve.dup.x.nxv2f64(double %b) 137 ret <vscale x 2 x double> %out 138} 139 140define <vscale x 2 x double> @dup_imm_f64(double %b) { 141; CHECK-LABEL: dup_imm_f64: 142; CHECK: // %bb.0: 143; CHECK-NEXT: fmov z0.d, #16.00000000 144; CHECK-NEXT: ret 145 %out = call <vscale x 2 x double> @llvm.aarch64.sve.dup.x.nxv2f64(double 16.) 146 ret <vscale x 2 x double> %out 147} 148 149define <vscale x 2 x float> @dup_fmov_imm_f32_2() { 150; CHECK-LABEL: dup_fmov_imm_f32_2: 151; CHECK: // %bb.0: 152; CHECK-NEXT: mov w8, #1109917696 153; CHECK-NEXT: mov z0.s, w8 154; CHECK-NEXT: ret 155 %out = tail call <vscale x 2 x float> @llvm.aarch64.sve.dup.x.nxv2f32(float 4.200000e+01) 156 ret <vscale x 2 x float> %out 157} 158 159define <vscale x 4 x float> @dup_fmov_imm_f32_4() { 160; CHECK-LABEL: dup_fmov_imm_f32_4: 161; CHECK: // %bb.0: 162; CHECK-NEXT: mov w8, #1109917696 163; CHECK-NEXT: mov z0.s, w8 164; CHECK-NEXT: ret 165 %out = tail call <vscale x 4 x float> @llvm.aarch64.sve.dup.x.nxv4f32(float 4.200000e+01) 166 ret <vscale x 4 x float> %out 167} 168 169define <vscale x 2 x double> @dup_fmov_imm_f64_2() { 170; CHECK-LABEL: dup_fmov_imm_f64_2: 171; CHECK: // %bb.0: 172; CHECK-NEXT: mov x8, #4631107791820423168 173; CHECK-NEXT: mov z0.d, x8 174; CHECK-NEXT: ret 175 %out = tail call <vscale x 2 x double> @llvm.aarch64.sve.dup.x.nxv2f64(double 4.200000e+01) 176 ret <vscale x 2 x double> %out 177} 178 179declare <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8( i8) 180declare <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16) 181declare <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32) 182declare <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64) 183declare <vscale x 8 x half> @llvm.aarch64.sve.dup.x.nxv8f16(half) 184declare <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.x.nxv8bf16(bfloat) 185declare <vscale x 2 x float> @llvm.aarch64.sve.dup.x.nxv2f32(float) 186declare <vscale x 4 x float> @llvm.aarch64.sve.dup.x.nxv4f32(float) 187declare <vscale x 2 x double> @llvm.aarch64.sve.dup.x.nxv2f64(double) 188 189; +bf16 is required for the bfloat version. 190attributes #0 = { "target-features"="+sve,+bf16" } 191