1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -asm-verbose=1 < %s | FileCheck %s 3 4; 5; LD1B 6; 7 8define <vscale x 16 x i32> @masked_ld1b_i8_sext_i32(ptr %base, <vscale x 16 x i1> %mask) { 9; CHECK-LABEL: masked_ld1b_i8_sext_i32: 10; CHECK: // %bb.0: 11; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] 12; CHECK-NEXT: sunpklo z1.h, z0.b 13; CHECK-NEXT: sunpkhi z3.h, z0.b 14; CHECK-NEXT: sunpklo z0.s, z1.h 15; CHECK-NEXT: sunpkhi z1.s, z1.h 16; CHECK-NEXT: sunpklo z2.s, z3.h 17; CHECK-NEXT: sunpkhi z3.s, z3.h 18; CHECK-NEXT: ret 19 %wide.masked.load = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr %base, i32 2, <vscale x 16 x i1> %mask, <vscale x 16 x i8> undef) 20 %res = sext <vscale x 16 x i8> %wide.masked.load to <vscale x 16 x i32> 21 ret <vscale x 16 x i32> %res 22} 23 24define <vscale x 8 x i32> @masked_ld1b_nxv8i8_sext_i32(ptr %a, <vscale x 8 x i1> %mask) { 25; CHECK-LABEL: masked_ld1b_nxv8i8_sext_i32: 26; CHECK: // %bb.0: 27; CHECK-NEXT: ld1sb { z1.h }, p0/z, [x0] 28; CHECK-NEXT: sunpklo z0.s, z1.h 29; CHECK-NEXT: sunpkhi z1.s, z1.h 30; CHECK-NEXT: ret 31 %wide.masked.load = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr %a, i32 1, <vscale x 8 x i1> %mask, <vscale x 8 x i8> poison) 32 %res = sext <vscale x 8 x i8> %wide.masked.load to <vscale x 8 x i32> 33 ret <vscale x 8 x i32> %res 34} 35 36define <vscale x 16 x i32> @masked_ld1b_i8_zext_i32(ptr %base, <vscale x 16 x i1> %mask) { 37; CHECK-LABEL: masked_ld1b_i8_zext_i32: 38; CHECK: // %bb.0: 39; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] 40; CHECK-NEXT: uunpklo z1.h, z0.b 41; CHECK-NEXT: uunpkhi z3.h, z0.b 42; CHECK-NEXT: uunpklo z0.s, z1.h 43; CHECK-NEXT: uunpkhi z1.s, z1.h 44; CHECK-NEXT: uunpklo z2.s, z3.h 45; CHECK-NEXT: uunpkhi z3.s, z3.h 46; CHECK-NEXT: ret 47 %wide.masked.load = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr %base, i32 2, <vscale x 16 x i1> %mask, <vscale x 16 x i8> undef) 48 %res = zext <vscale x 16 x i8> %wide.masked.load to <vscale x 16 x i32> 49 ret <vscale x 16 x i32> %res 50} 51 52define <vscale x 8 x i32> @masked_ld1b_nxv8i8_zext_i32(ptr %a, <vscale x 8 x i1> %mask) { 53; CHECK-LABEL: masked_ld1b_nxv8i8_zext_i32: 54; CHECK: // %bb.0: 55; CHECK-NEXT: ld1b { z1.h }, p0/z, [x0] 56; CHECK-NEXT: uunpklo z0.s, z1.h 57; CHECK-NEXT: uunpkhi z1.s, z1.h 58; CHECK-NEXT: ret 59 %wide.masked.load = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr %a, i32 1, <vscale x 8 x i1> %mask, <vscale x 8 x i8> poison) 60 %res = zext <vscale x 8 x i8> %wide.masked.load to <vscale x 8 x i32> 61 ret <vscale x 8 x i32> %res 62} 63 64define <vscale x 16 x i64> @masked_ld1b_i8_sext(ptr %base, <vscale x 16 x i1> %mask) { 65; CHECK-LABEL: masked_ld1b_i8_sext: 66; CHECK: // %bb.0: 67; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] 68; CHECK-NEXT: sunpklo z1.h, z0.b 69; CHECK-NEXT: sunpkhi z0.h, z0.b 70; CHECK-NEXT: sunpklo z2.s, z1.h 71; CHECK-NEXT: sunpkhi z3.s, z1.h 72; CHECK-NEXT: sunpklo z5.s, z0.h 73; CHECK-NEXT: sunpkhi z7.s, z0.h 74; CHECK-NEXT: sunpklo z0.d, z2.s 75; CHECK-NEXT: sunpkhi z1.d, z2.s 76; CHECK-NEXT: sunpklo z2.d, z3.s 77; CHECK-NEXT: sunpkhi z3.d, z3.s 78; CHECK-NEXT: sunpklo z4.d, z5.s 79; CHECK-NEXT: sunpkhi z5.d, z5.s 80; CHECK-NEXT: sunpklo z6.d, z7.s 81; CHECK-NEXT: sunpkhi z7.d, z7.s 82; CHECK-NEXT: ret 83 %wide.masked.load = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr %base, i32 2, <vscale x 16 x i1> %mask, <vscale x 16 x i8> undef) 84 %res = sext <vscale x 16 x i8> %wide.masked.load to <vscale x 16 x i64> 85 ret <vscale x 16 x i64> %res 86} 87 88define <vscale x 4 x i64> @masked_ld1b_nxv4i8_sext_i64(ptr %a, <vscale x 4 x i1> %mask) { 89; CHECK-LABEL: masked_ld1b_nxv4i8_sext_i64: 90; CHECK: // %bb.0: 91; CHECK-NEXT: ld1sb { z1.s }, p0/z, [x0] 92; CHECK-NEXT: sunpklo z0.d, z1.s 93; CHECK-NEXT: sunpkhi z1.d, z1.s 94; CHECK-NEXT: ret 95 %wide.masked.load = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr %a, i32 1, <vscale x 4 x i1> %mask, <vscale x 4 x i8> poison) 96 %res = sext <vscale x 4 x i8> %wide.masked.load to <vscale x 4 x i64> 97 ret <vscale x 4 x i64> %res 98} 99 100define <vscale x 16 x i64> @masked_ld1b_i8_zext(ptr %base, <vscale x 16 x i1> %mask) { 101; CHECK-LABEL: masked_ld1b_i8_zext: 102; CHECK: // %bb.0: 103; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] 104; CHECK-NEXT: uunpklo z1.h, z0.b 105; CHECK-NEXT: uunpkhi z0.h, z0.b 106; CHECK-NEXT: uunpklo z2.s, z1.h 107; CHECK-NEXT: uunpkhi z3.s, z1.h 108; CHECK-NEXT: uunpklo z5.s, z0.h 109; CHECK-NEXT: uunpkhi z7.s, z0.h 110; CHECK-NEXT: uunpklo z0.d, z2.s 111; CHECK-NEXT: uunpkhi z1.d, z2.s 112; CHECK-NEXT: uunpklo z2.d, z3.s 113; CHECK-NEXT: uunpkhi z3.d, z3.s 114; CHECK-NEXT: uunpklo z4.d, z5.s 115; CHECK-NEXT: uunpkhi z5.d, z5.s 116; CHECK-NEXT: uunpklo z6.d, z7.s 117; CHECK-NEXT: uunpkhi z7.d, z7.s 118; CHECK-NEXT: ret 119 %wide.masked.load = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr %base, i32 2, <vscale x 16 x i1> %mask, <vscale x 16 x i8> undef) 120 %res = zext <vscale x 16 x i8> %wide.masked.load to <vscale x 16 x i64> 121 ret <vscale x 16 x i64> %res 122} 123 124define <vscale x 4 x i64> @masked_ld1b_nxv4i8_zext_i64(ptr %a, <vscale x 4 x i1> %mask) { 125; CHECK-LABEL: masked_ld1b_nxv4i8_zext_i64: 126; CHECK: // %bb.0: 127; CHECK-NEXT: ld1b { z1.s }, p0/z, [x0] 128; CHECK-NEXT: uunpklo z0.d, z1.s 129; CHECK-NEXT: uunpkhi z1.d, z1.s 130; CHECK-NEXT: ret 131 %wide.masked.load = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr %a, i32 1, <vscale x 4 x i1> %mask, <vscale x 4 x i8> poison) 132 %res = zext <vscale x 4 x i8> %wide.masked.load to <vscale x 4 x i64> 133 ret <vscale x 4 x i64> %res 134} 135 136; 137; LD1H 138; 139 140define <vscale x 8 x i64> @masked_ld1h_i16_sext(ptr %base, <vscale x 8 x i1> %mask) { 141; CHECK-LABEL: masked_ld1h_i16_sext: 142; CHECK: // %bb.0: 143; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 144; CHECK-NEXT: sunpklo z1.s, z0.h 145; CHECK-NEXT: sunpkhi z3.s, z0.h 146; CHECK-NEXT: sunpklo z0.d, z1.s 147; CHECK-NEXT: sunpkhi z1.d, z1.s 148; CHECK-NEXT: sunpklo z2.d, z3.s 149; CHECK-NEXT: sunpkhi z3.d, z3.s 150; CHECK-NEXT: ret 151 %wide.masked.load = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr %base, i32 2, <vscale x 8 x i1> %mask, <vscale x 8 x i16> undef) 152 %res = sext <vscale x 8 x i16> %wide.masked.load to <vscale x 8 x i64> 153 ret <vscale x 8 x i64> %res 154} 155 156define <vscale x 4 x i64> @masked_ld1h_nxv4i16_sext(ptr %a, <vscale x 4 x i1> %mask) { 157; CHECK-LABEL: masked_ld1h_nxv4i16_sext: 158; CHECK: // %bb.0: 159; CHECK-NEXT: ld1sh { z1.s }, p0/z, [x0] 160; CHECK-NEXT: sunpklo z0.d, z1.s 161; CHECK-NEXT: sunpkhi z1.d, z1.s 162; CHECK-NEXT: ret 163 %wide.masked.load = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr %a, i32 1, <vscale x 4 x i1> %mask, <vscale x 4 x i16> poison) 164 %res = sext <vscale x 4 x i16> %wide.masked.load to <vscale x 4 x i64> 165 ret <vscale x 4 x i64> %res 166} 167 168define <vscale x 8 x i64> @masked_ld1h_i16_zext(ptr %base, <vscale x 8 x i1> %mask) { 169; CHECK-LABEL: masked_ld1h_i16_zext: 170; CHECK: // %bb.0: 171; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 172; CHECK-NEXT: uunpklo z1.s, z0.h 173; CHECK-NEXT: uunpkhi z3.s, z0.h 174; CHECK-NEXT: uunpklo z0.d, z1.s 175; CHECK-NEXT: uunpkhi z1.d, z1.s 176; CHECK-NEXT: uunpklo z2.d, z3.s 177; CHECK-NEXT: uunpkhi z3.d, z3.s 178; CHECK-NEXT: ret 179 %wide.masked.load = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr %base, i32 2, <vscale x 8 x i1> %mask, <vscale x 8 x i16> undef) 180 %res = zext <vscale x 8 x i16> %wide.masked.load to <vscale x 8 x i64> 181 ret <vscale x 8 x i64> %res 182} 183 184define <vscale x 4 x i64> @masked_ld1h_nxv4i16_zext(ptr %a, <vscale x 4 x i1> %mask) { 185; CHECK-LABEL: masked_ld1h_nxv4i16_zext: 186; CHECK: // %bb.0: 187; CHECK-NEXT: ld1h { z1.s }, p0/z, [x0] 188; CHECK-NEXT: uunpklo z0.d, z1.s 189; CHECK-NEXT: uunpkhi z1.d, z1.s 190; CHECK-NEXT: ret 191 %wide.masked.load = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr %a, i32 1, <vscale x 4 x i1> %mask, <vscale x 4 x i16> poison) 192 %res = zext <vscale x 4 x i16> %wide.masked.load to <vscale x 4 x i64> 193 ret <vscale x 4 x i64> %res 194} 195 196; 197; LD1W 198; 199 200define <vscale x 4 x i64> @masked_ld1w_i32_sext(ptr %base, <vscale x 4 x i1> %mask) { 201; CHECK-LABEL: masked_ld1w_i32_sext: 202; CHECK: // %bb.0: 203; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0] 204; CHECK-NEXT: sunpklo z0.d, z1.s 205; CHECK-NEXT: sunpkhi z1.d, z1.s 206; CHECK-NEXT: ret 207 %wide.masked.load = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr %base, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x i32> undef) 208 %res = sext <vscale x 4 x i32> %wide.masked.load to <vscale x 4 x i64> 209 ret <vscale x 4 x i64> %res 210} 211 212define <vscale x 4 x i64> @masked_ld1w_i32_zext(ptr %base, <vscale x 4 x i1> %mask) { 213; CHECK-LABEL: masked_ld1w_i32_zext: 214; CHECK: // %bb.0: 215; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0] 216; CHECK-NEXT: uunpklo z0.d, z1.s 217; CHECK-NEXT: uunpkhi z1.d, z1.s 218; CHECK-NEXT: ret 219 %wide.masked.load = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr %base, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x i32> undef) 220 %res = zext <vscale x 4 x i32> %wide.masked.load to <vscale x 4 x i64> 221 ret <vscale x 4 x i64> %res 222} 223 224declare <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr, i32 immarg, <vscale x 16 x i1>, <vscale x 16 x i8>) 225declare <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr, i32 immarg, <vscale x 8 x i1>, <vscale x 8 x i8>) 226declare <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr, i32 immarg, <vscale x 4 x i1>, <vscale x 4 x i8>) 227declare <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr, i32 immarg, <vscale x 8 x i1>, <vscale x 8 x i16>) 228declare <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr, i32 immarg, <vscale x 4 x i1>, <vscale x 4 x i16>) 229declare <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr, i32 immarg, <vscale x 4 x i1>, <vscale x 4 x i32>) 230