1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256 3; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 4; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 5 6target triple = "aarch64-unknown-linux-gnu" 7 8; 9; extractelement 10; 11 12; Don't use SVE for 64-bit vectors. 13define half @extractelement_v4f16(<4 x half> %op1) vscale_range(2,0) #0 { 14; CHECK-LABEL: extractelement_v4f16: 15; CHECK: // %bb.0: 16; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 17; CHECK-NEXT: mov h0, v0.h[3] 18; CHECK-NEXT: ret 19 %r = extractelement <4 x half> %op1, i64 3 20 ret half %r 21} 22 23; Don't use SVE for 128-bit vectors. 24define half @extractelement_v8f16(<8 x half> %op1) vscale_range(2,0) #0 { 25; CHECK-LABEL: extractelement_v8f16: 26; CHECK: // %bb.0: 27; CHECK-NEXT: mov h0, v0.h[7] 28; CHECK-NEXT: ret 29 %r = extractelement <8 x half> %op1, i64 7 30 ret half %r 31} 32 33define half @extractelement_v16f16(ptr %a) vscale_range(2,0) #0 { 34; CHECK-LABEL: extractelement_v16f16: 35; CHECK: // %bb.0: 36; CHECK-NEXT: ptrue p0.h, vl16 37; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 38; CHECK-NEXT: mov z0.h, z0.h[15] 39; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 40; CHECK-NEXT: ret 41 %op1 = load <16 x half>, ptr %a 42 %r = extractelement <16 x half> %op1, i64 15 43 ret half %r 44} 45 46define half @extractelement_v32f16(ptr %a) #0 { 47; VBITS_GE_256-LABEL: extractelement_v32f16: 48; VBITS_GE_256: // %bb.0: 49; VBITS_GE_256-NEXT: ptrue p0.h, vl16 50; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 51; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] 52; VBITS_GE_256-NEXT: mov z0.h, z0.h[15] 53; VBITS_GE_256-NEXT: // kill: def $h0 killed $h0 killed $z0 54; VBITS_GE_256-NEXT: ret 55; 56; VBITS_GE_512-LABEL: extractelement_v32f16: 57; VBITS_GE_512: // %bb.0: 58; VBITS_GE_512-NEXT: ptrue p0.h, vl32 59; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0] 60; VBITS_GE_512-NEXT: mov z0.h, z0.h[31] 61; VBITS_GE_512-NEXT: // kill: def $h0 killed $h0 killed $z0 62; VBITS_GE_512-NEXT: ret 63 %op1 = load <32 x half>, ptr %a 64 %r = extractelement <32 x half> %op1, i64 31 65 ret half %r 66} 67 68define half @extractelement_v64f16(ptr %a) vscale_range(8,0) #0 { 69; CHECK-LABEL: extractelement_v64f16: 70; CHECK: // %bb.0: 71; CHECK-NEXT: ptrue p0.h, vl64 72; CHECK-NEXT: mov w8, #63 // =0x3f 73; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 74; CHECK-NEXT: whilels p0.h, xzr, x8 75; CHECK-NEXT: lastb h0, p0, z0.h 76; CHECK-NEXT: ret 77 %op1 = load <64 x half>, ptr %a 78 %r = extractelement <64 x half> %op1, i64 63 79 ret half %r 80} 81 82define half @extractelement_v128f16(ptr %a) vscale_range(16,0) #0 { 83; CHECK-LABEL: extractelement_v128f16: 84; CHECK: // %bb.0: 85; CHECK-NEXT: ptrue p0.h, vl128 86; CHECK-NEXT: mov w8, #127 // =0x7f 87; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 88; CHECK-NEXT: whilels p0.h, xzr, x8 89; CHECK-NEXT: lastb h0, p0, z0.h 90; CHECK-NEXT: ret 91 %op1 = load <128 x half>, ptr %a 92 %r = extractelement <128 x half> %op1, i64 127 93 ret half %r 94} 95 96; Don't use SVE for 64-bit vectors. 97define float @extractelement_v2f32(<2 x float> %op1) vscale_range(2,0) #0 { 98; CHECK-LABEL: extractelement_v2f32: 99; CHECK: // %bb.0: 100; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 101; CHECK-NEXT: mov s0, v0.s[1] 102; CHECK-NEXT: ret 103 %r = extractelement <2 x float> %op1, i64 1 104 ret float %r 105} 106 107; Don't use SVE for 128-bit vectors. 108define float @extractelement_v4f32(<4 x float> %op1) vscale_range(2,0) #0 { 109; CHECK-LABEL: extractelement_v4f32: 110; CHECK: // %bb.0: 111; CHECK-NEXT: mov s0, v0.s[3] 112; CHECK-NEXT: ret 113 %r = extractelement <4 x float> %op1, i64 3 114 ret float %r 115} 116 117define float @extractelement_v8f32(ptr %a) vscale_range(2,0) #0 { 118; CHECK-LABEL: extractelement_v8f32: 119; CHECK: // %bb.0: 120; CHECK-NEXT: ptrue p0.s, vl8 121; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 122; CHECK-NEXT: mov z0.s, z0.s[7] 123; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 124; CHECK-NEXT: ret 125 %op1 = load <8 x float>, ptr %a 126 %r = extractelement <8 x float> %op1, i64 7 127 ret float %r 128} 129 130define float @extractelement_v16f32(ptr %a) #0 { 131; VBITS_GE_256-LABEL: extractelement_v16f32: 132; VBITS_GE_256: // %bb.0: 133; VBITS_GE_256-NEXT: ptrue p0.s, vl8 134; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 135; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] 136; VBITS_GE_256-NEXT: mov z0.s, z0.s[7] 137; VBITS_GE_256-NEXT: // kill: def $s0 killed $s0 killed $z0 138; VBITS_GE_256-NEXT: ret 139; 140; VBITS_GE_512-LABEL: extractelement_v16f32: 141; VBITS_GE_512: // %bb.0: 142; VBITS_GE_512-NEXT: ptrue p0.s, vl16 143; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0] 144; VBITS_GE_512-NEXT: mov z0.s, z0.s[15] 145; VBITS_GE_512-NEXT: // kill: def $s0 killed $s0 killed $z0 146; VBITS_GE_512-NEXT: ret 147 %op1 = load <16 x float>, ptr %a 148 %r = extractelement <16 x float> %op1, i64 15 149 ret float %r 150} 151 152define float @extractelement_v32f32(ptr %a) vscale_range(8,0) #0 { 153; CHECK-LABEL: extractelement_v32f32: 154; CHECK: // %bb.0: 155; CHECK-NEXT: ptrue p0.s, vl32 156; CHECK-NEXT: mov w8, #31 // =0x1f 157; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 158; CHECK-NEXT: whilels p0.s, xzr, x8 159; CHECK-NEXT: lastb s0, p0, z0.s 160; CHECK-NEXT: ret 161 %op1 = load <32 x float>, ptr %a 162 %r = extractelement <32 x float> %op1, i64 31 163 ret float %r 164} 165 166define float @extractelement_v64f32(ptr %a) vscale_range(16,0) #0 { 167; CHECK-LABEL: extractelement_v64f32: 168; CHECK: // %bb.0: 169; CHECK-NEXT: ptrue p0.s, vl64 170; CHECK-NEXT: mov w8, #63 // =0x3f 171; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 172; CHECK-NEXT: whilels p0.s, xzr, x8 173; CHECK-NEXT: lastb s0, p0, z0.s 174; CHECK-NEXT: ret 175 %op1 = load <64 x float>, ptr %a 176 %r = extractelement <64 x float> %op1, i64 63 177 ret float %r 178} 179 180; Don't use SVE for 64-bit vectors. 181define double @extractelement_v1f64(<1 x double> %op1) vscale_range(2,0) #0 { 182; CHECK-LABEL: extractelement_v1f64: 183; CHECK: // %bb.0: 184; CHECK-NEXT: ret 185 %r = extractelement <1 x double> %op1, i64 0 186 ret double %r 187} 188 189; Don't use SVE for 128-bit vectors. 190define double @extractelement_v2f64(<2 x double> %op1) vscale_range(2,0) #0 { 191; CHECK-LABEL: extractelement_v2f64: 192; CHECK: // %bb.0: 193; CHECK-NEXT: mov d0, v0.d[1] 194; CHECK-NEXT: ret 195 %r = extractelement <2 x double> %op1, i64 1 196 ret double %r 197} 198 199define double @extractelement_v4f64(ptr %a) vscale_range(2,0) #0 { 200; CHECK-LABEL: extractelement_v4f64: 201; CHECK: // %bb.0: 202; CHECK-NEXT: ptrue p0.d, vl4 203; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 204; CHECK-NEXT: mov z0.d, z0.d[3] 205; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 206; CHECK-NEXT: ret 207 %op1 = load <4 x double>, ptr %a 208 %r = extractelement <4 x double> %op1, i64 3 209 ret double %r 210} 211 212define double @extractelement_v8f64(ptr %a) #0 { 213; VBITS_GE_256-LABEL: extractelement_v8f64: 214; VBITS_GE_256: // %bb.0: 215; VBITS_GE_256-NEXT: ptrue p0.d, vl4 216; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 217; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] 218; VBITS_GE_256-NEXT: mov z0.d, z0.d[3] 219; VBITS_GE_256-NEXT: // kill: def $d0 killed $d0 killed $z0 220; VBITS_GE_256-NEXT: ret 221; 222; VBITS_GE_512-LABEL: extractelement_v8f64: 223; VBITS_GE_512: // %bb.0: 224; VBITS_GE_512-NEXT: ptrue p0.d, vl8 225; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0] 226; VBITS_GE_512-NEXT: mov z0.d, z0.d[7] 227; VBITS_GE_512-NEXT: // kill: def $d0 killed $d0 killed $z0 228; VBITS_GE_512-NEXT: ret 229 %op1 = load <8 x double>, ptr %a 230 %r = extractelement <8 x double> %op1, i64 7 231 ret double %r 232} 233 234define double @extractelement_v16f64(ptr %a) vscale_range(8,0) #0 { 235; CHECK-LABEL: extractelement_v16f64: 236; CHECK: // %bb.0: 237; CHECK-NEXT: ptrue p0.d, vl16 238; CHECK-NEXT: mov w8, #15 // =0xf 239; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 240; CHECK-NEXT: whilels p0.d, xzr, x8 241; CHECK-NEXT: lastb d0, p0, z0.d 242; CHECK-NEXT: ret 243 %op1 = load <16 x double>, ptr %a 244 %r = extractelement <16 x double> %op1, i64 15 245 ret double %r 246} 247 248define double @extractelement_v32f64(ptr %a) vscale_range(16,0) #0 { 249; CHECK-LABEL: extractelement_v32f64: 250; CHECK: // %bb.0: 251; CHECK-NEXT: ptrue p0.d, vl32 252; CHECK-NEXT: mov w8, #31 // =0x1f 253; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 254; CHECK-NEXT: whilels p0.d, xzr, x8 255; CHECK-NEXT: lastb d0, p0, z0.d 256; CHECK-NEXT: ret 257 %op1 = load <32 x double>, ptr %a 258 %r = extractelement <32 x double> %op1, i64 31 259 ret double %r 260} 261 262attributes #0 = { "target-features"="+sve" } 263