1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -asm-verbose=1 < %s | FileCheck %s 3 4; 5; LD1SB/LD1B 6; 7 8define <vscale x 16 x i32> @ld1b_i8_sext_i32(ptr %base) { 9; CHECK-LABEL: ld1b_i8_sext_i32: 10; CHECK: // %bb.0: 11; CHECK-NEXT: ptrue p0.s 12; CHECK-NEXT: ld1sb { z0.s }, p0/z, [x0] 13; CHECK-NEXT: ld1sb { z1.s }, p0/z, [x0, #1, mul vl] 14; CHECK-NEXT: ld1sb { z2.s }, p0/z, [x0, #2, mul vl] 15; CHECK-NEXT: ld1sb { z3.s }, p0/z, [x0, #3, mul vl] 16; CHECK-NEXT: ret 17 %wide.load = load <vscale x 16 x i8>, ptr %base 18 %res = sext <vscale x 16 x i8> %wide.load to <vscale x 16 x i32> 19 ret <vscale x 16 x i32> %res 20} 21 22define <vscale x 16 x i32> @ld1b_i8_zext_i32(ptr %base) { 23; CHECK-LABEL: ld1b_i8_zext_i32: 24; CHECK: // %bb.0: 25; CHECK-NEXT: ptrue p0.s 26; CHECK-NEXT: ld1b { z0.s }, p0/z, [x0] 27; CHECK-NEXT: ld1b { z1.s }, p0/z, [x0, #1, mul vl] 28; CHECK-NEXT: ld1b { z2.s }, p0/z, [x0, #2, mul vl] 29; CHECK-NEXT: ld1b { z3.s }, p0/z, [x0, #3, mul vl] 30; CHECK-NEXT: ret 31 %wide.load = load <vscale x 16 x i8>, ptr %base 32 %res = zext <vscale x 16 x i8> %wide.load to <vscale x 16 x i32> 33 ret <vscale x 16 x i32> %res 34} 35 36define <vscale x 16 x i64> @ld1b_i8_sext(ptr %base) { 37; CHECK-LABEL: ld1b_i8_sext: 38; CHECK: // %bb.0: 39; CHECK-NEXT: ptrue p0.d 40; CHECK-NEXT: ld1sb { z0.d }, p0/z, [x0] 41; CHECK-NEXT: ld1sb { z1.d }, p0/z, [x0, #1, mul vl] 42; CHECK-NEXT: ld1sb { z2.d }, p0/z, [x0, #2, mul vl] 43; CHECK-NEXT: ld1sb { z3.d }, p0/z, [x0, #3, mul vl] 44; CHECK-NEXT: ld1sb { z4.d }, p0/z, [x0, #4, mul vl] 45; CHECK-NEXT: ld1sb { z5.d }, p0/z, [x0, #5, mul vl] 46; CHECK-NEXT: ld1sb { z6.d }, p0/z, [x0, #6, mul vl] 47; CHECK-NEXT: ld1sb { z7.d }, p0/z, [x0, #7, mul vl] 48; CHECK-NEXT: ret 49 %wide.load = load <vscale x 16 x i8>, ptr %base 50 %res = sext <vscale x 16 x i8> %wide.load to <vscale x 16 x i64> 51 ret <vscale x 16 x i64> %res 52} 53 54define <vscale x 16 x i64> @ld1b_i8_zext(ptr %base) { 55; CHECK-LABEL: ld1b_i8_zext: 56; CHECK: // %bb.0: 57; CHECK-NEXT: ptrue p0.d 58; CHECK-NEXT: ld1b { z0.d }, p0/z, [x0] 59; CHECK-NEXT: ld1b { z1.d }, p0/z, [x0, #1, mul vl] 60; CHECK-NEXT: ld1b { z2.d }, p0/z, [x0, #2, mul vl] 61; CHECK-NEXT: ld1b { z3.d }, p0/z, [x0, #3, mul vl] 62; CHECK-NEXT: ld1b { z4.d }, p0/z, [x0, #4, mul vl] 63; CHECK-NEXT: ld1b { z5.d }, p0/z, [x0, #5, mul vl] 64; CHECK-NEXT: ld1b { z6.d }, p0/z, [x0, #6, mul vl] 65; CHECK-NEXT: ld1b { z7.d }, p0/z, [x0, #7, mul vl] 66; CHECK-NEXT: ret 67 %wide.load = load <vscale x 16 x i8>, ptr %base 68 %res = zext <vscale x 16 x i8> %wide.load to <vscale x 16 x i64> 69 ret <vscale x 16 x i64> %res 70} 71 72; 73; LD1H 74; 75 76define <vscale x 8 x i64> @ld1h_i16_sext(ptr %base) { 77; CHECK-LABEL: ld1h_i16_sext: 78; CHECK: // %bb.0: 79; CHECK-NEXT: ptrue p0.d 80; CHECK-NEXT: ld1sh { z0.d }, p0/z, [x0] 81; CHECK-NEXT: ld1sh { z1.d }, p0/z, [x0, #1, mul vl] 82; CHECK-NEXT: ld1sh { z2.d }, p0/z, [x0, #2, mul vl] 83; CHECK-NEXT: ld1sh { z3.d }, p0/z, [x0, #3, mul vl] 84; CHECK-NEXT: ret 85 %wide.load = load <vscale x 8 x i16>, ptr %base 86 %res = sext <vscale x 8 x i16> %wide.load to <vscale x 8 x i64> 87 ret <vscale x 8 x i64> %res 88} 89 90define <vscale x 8 x i64> @ld1h_i16_zext(ptr %base) { 91; CHECK-LABEL: ld1h_i16_zext: 92; CHECK: // %bb.0: 93; CHECK-NEXT: ptrue p0.d 94; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0] 95; CHECK-NEXT: ld1h { z1.d }, p0/z, [x0, #1, mul vl] 96; CHECK-NEXT: ld1h { z2.d }, p0/z, [x0, #2, mul vl] 97; CHECK-NEXT: ld1h { z3.d }, p0/z, [x0, #3, mul vl] 98; CHECK-NEXT: ret 99 %wide.load = load <vscale x 8 x i16>, ptr %base 100 %res = zext <vscale x 8 x i16> %wide.load to <vscale x 8 x i64> 101 ret <vscale x 8 x i64> %res 102} 103 104; 105; LD1W 106; 107 108define <vscale x 4 x i64> @ld1w_i32_sext(ptr %base) { 109; CHECK-LABEL: ld1w_i32_sext: 110; CHECK: // %bb.0: 111; CHECK-NEXT: ptrue p0.d 112; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x0] 113; CHECK-NEXT: ld1sw { z1.d }, p0/z, [x0, #1, mul vl] 114; CHECK-NEXT: ret 115 %wide.load = load <vscale x 4 x i32>, ptr %base 116 %res = sext <vscale x 4 x i32> %wide.load to <vscale x 4 x i64> 117 ret <vscale x 4 x i64> %res 118} 119 120define <vscale x 4 x i64> @ld1w_i32_zext(ptr %base) { 121; CHECK-LABEL: ld1w_i32_zext: 122; CHECK: // %bb.0: 123; CHECK-NEXT: ptrue p0.d 124; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0] 125; CHECK-NEXT: ld1w { z1.d }, p0/z, [x0, #1, mul vl] 126; CHECK-NEXT: ret 127 %wide.load = load <vscale x 4 x i32>, ptr %base 128 %res = zext <vscale x 4 x i32> %wide.load to <vscale x 4 x i64> 129 ret <vscale x 4 x i64> %res 130} 131 132 133; Extending loads from unpacked to wide illegal types 134 135define <vscale x 4 x i64> @zload_4i8_4i64(ptr %a) { 136; CHECK-LABEL: zload_4i8_4i64: 137; CHECK: // %bb.0: 138; CHECK-NEXT: ptrue p0.d 139; CHECK-NEXT: ld1b { z0.d }, p0/z, [x0] 140; CHECK-NEXT: ld1b { z1.d }, p0/z, [x0, #1, mul vl] 141; CHECK-NEXT: ret 142 %aval = load <vscale x 4 x i8>, ptr %a 143 %aext = zext <vscale x 4 x i8> %aval to <vscale x 4 x i64> 144 ret <vscale x 4 x i64> %aext 145} 146 147define <vscale x 4 x i64> @zload_4i16_4i64(ptr %a) { 148; CHECK-LABEL: zload_4i16_4i64: 149; CHECK: // %bb.0: 150; CHECK-NEXT: ptrue p0.d 151; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0] 152; CHECK-NEXT: ld1h { z1.d }, p0/z, [x0, #1, mul vl] 153; CHECK-NEXT: ret 154 %aval = load <vscale x 4 x i16>, ptr %a 155 %aext = zext <vscale x 4 x i16> %aval to <vscale x 4 x i64> 156 ret <vscale x 4 x i64> %aext 157} 158 159define <vscale x 8 x i32> @zload_8i8_8i32(ptr %a) { 160; CHECK-LABEL: zload_8i8_8i32: 161; CHECK: // %bb.0: 162; CHECK-NEXT: ptrue p0.s 163; CHECK-NEXT: ld1b { z0.s }, p0/z, [x0] 164; CHECK-NEXT: ld1b { z1.s }, p0/z, [x0, #1, mul vl] 165; CHECK-NEXT: ret 166 %aval = load <vscale x 8 x i8>, ptr %a 167 %aext = zext <vscale x 8 x i8> %aval to <vscale x 8 x i32> 168 ret <vscale x 8 x i32> %aext 169} 170 171define <vscale x 8 x i64> @zload_8i8_8i64(ptr %a) { 172; CHECK-LABEL: zload_8i8_8i64: 173; CHECK: // %bb.0: 174; CHECK-NEXT: ptrue p0.d 175; CHECK-NEXT: ld1b { z0.d }, p0/z, [x0] 176; CHECK-NEXT: ld1b { z1.d }, p0/z, [x0, #1, mul vl] 177; CHECK-NEXT: ld1b { z2.d }, p0/z, [x0, #2, mul vl] 178; CHECK-NEXT: ld1b { z3.d }, p0/z, [x0, #3, mul vl] 179; CHECK-NEXT: ret 180 %aval = load <vscale x 8 x i8>, ptr %a 181 %aext = zext <vscale x 8 x i8> %aval to <vscale x 8 x i64> 182 ret <vscale x 8 x i64> %aext 183} 184 185define <vscale x 4 x i64> @sload_4i8_4i64(ptr %a) { 186; CHECK-LABEL: sload_4i8_4i64: 187; CHECK: // %bb.0: 188; CHECK-NEXT: ptrue p0.d 189; CHECK-NEXT: ld1sb { z0.d }, p0/z, [x0] 190; CHECK-NEXT: ld1sb { z1.d }, p0/z, [x0, #1, mul vl] 191; CHECK-NEXT: ret 192 %aval = load <vscale x 4 x i8>, ptr %a 193 %aext = sext <vscale x 4 x i8> %aval to <vscale x 4 x i64> 194 ret <vscale x 4 x i64> %aext 195} 196 197define <vscale x 4 x i64> @sload_4i16_4i64(ptr %a) { 198; CHECK-LABEL: sload_4i16_4i64: 199; CHECK: // %bb.0: 200; CHECK-NEXT: ptrue p0.d 201; CHECK-NEXT: ld1sh { z0.d }, p0/z, [x0] 202; CHECK-NEXT: ld1sh { z1.d }, p0/z, [x0, #1, mul vl] 203; CHECK-NEXT: ret 204 %aval = load <vscale x 4 x i16>, ptr %a 205 %aext = sext <vscale x 4 x i16> %aval to <vscale x 4 x i64> 206 ret <vscale x 4 x i64> %aext 207} 208 209define <vscale x 8 x i32> @sload_8i8_8i32(ptr %a) { 210; CHECK-LABEL: sload_8i8_8i32: 211; CHECK: // %bb.0: 212; CHECK-NEXT: ptrue p0.s 213; CHECK-NEXT: ld1sb { z0.s }, p0/z, [x0] 214; CHECK-NEXT: ld1sb { z1.s }, p0/z, [x0, #1, mul vl] 215; CHECK-NEXT: ret 216 %aval = load <vscale x 8 x i8>, ptr %a 217 %aext = sext <vscale x 8 x i8> %aval to <vscale x 8 x i32> 218 ret <vscale x 8 x i32> %aext 219} 220 221define <vscale x 8 x i64> @sload_8i8_8i64(ptr %a) { 222; CHECK-LABEL: sload_8i8_8i64: 223; CHECK: // %bb.0: 224; CHECK-NEXT: ptrue p0.d 225; CHECK-NEXT: ld1sb { z0.d }, p0/z, [x0] 226; CHECK-NEXT: ld1sb { z1.d }, p0/z, [x0, #1, mul vl] 227; CHECK-NEXT: ld1sb { z2.d }, p0/z, [x0, #2, mul vl] 228; CHECK-NEXT: ld1sb { z3.d }, p0/z, [x0, #3, mul vl] 229; CHECK-NEXT: ret 230 %aval = load <vscale x 8 x i8>, ptr %a 231 %aext = sext <vscale x 8 x i8> %aval to <vscale x 8 x i64> 232 ret <vscale x 8 x i64> %aext 233} 234