1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: opt -S -loop-reduce < %s | FileCheck %s --check-prefix=IR 3; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s --check-prefix=ASM 4; Note: To update this test, please run utils/update_test_checks.py and utils/update_llc_test_checks.py separately on opt/llc run line. 5 6target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" 7target triple = "aarch64-linux-gnu" 8 9; These tests check that the IR coming out of LSR does not cast input/output pointer from ptr to ptr type. 10; And scaled-index addressing mode is leveraged in the generated assembly, i.e. ld1h { z1.h }, p0/z, [x0, x8, lsl #1]. 11 12define void @ld_st_nxv8i16(ptr %in, ptr %out) { 13; IR-LABEL: @ld_st_nxv8i16( 14; IR-NEXT: entry: 15; IR-NEXT: br label [[LOOP_PH:%.*]] 16; IR: loop.ph: 17; IR-NEXT: [[P_VEC_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> undef, i16 3, i32 0 18; IR-NEXT: [[P_VEC_SPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[P_VEC_SPLATINSERT]], <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer 19; IR-NEXT: [[VSCALE:%.*]] = call i64 @llvm.vscale.i64() 20; IR-NEXT: [[SCALED_VF:%.*]] = shl i64 [[VSCALE]], 3 21; IR-NEXT: br label [[LOOP:%.*]] 22; IR: loop: 23; IR-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[LOOP_PH]] ], [ [[INDVAR_NEXT:%.*]], [[LOOP]] ] 24; IR-NEXT: [[TMP0:%.*]] = shl i64 [[INDVAR]], 1 25; IR-NEXT: [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 [[TMP0]] 26; IR-NEXT: [[TMP1:%.*]] = shl i64 [[INDVAR]], 1 27; IR-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[OUT:%.*]], i64 [[TMP1]] 28; IR-NEXT: [[VAL:%.*]] = load <vscale x 8 x i16>, ptr [[UGLYGEP1]], align 16 29; IR-NEXT: [[ADDP_VEC:%.*]] = add <vscale x 8 x i16> [[VAL]], [[P_VEC_SPLAT]] 30; IR-NEXT: store <vscale x 8 x i16> [[ADDP_VEC]], ptr [[UGLYGEP]], align 16 31; IR-NEXT: [[INDVAR_NEXT]] = add nsw i64 [[INDVAR]], [[SCALED_VF]] 32; IR-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], 1024 33; IR-NEXT: br i1 [[EXIT_COND]], label [[LOOP_EXIT:%.*]], label [[LOOP]] 34; IR: loop.exit: 35; IR-NEXT: br label [[EXIT:%.*]] 36; IR: exit: 37; IR-NEXT: ret void 38; 39; ASM-LABEL: ld_st_nxv8i16: 40; ASM: // %bb.0: // %entry 41; ASM-NEXT: mov z0.h, #3 // =0x3 42; ASM-NEXT: ptrue p0.h 43; ASM-NEXT: mov x8, xzr 44; ASM-NEXT: cnth x9 45; ASM-NEXT: .LBB0_1: // %loop 46; ASM-NEXT: // =>This Inner Loop Header: Depth=1 47; ASM-NEXT: ld1h { z1.h }, p0/z, [x0, x8, lsl #1] 48; ASM-NEXT: add z1.h, z1.h, z0.h 49; ASM-NEXT: st1h { z1.h }, p0, [x1, x8, lsl #1] 50; ASM-NEXT: add x8, x8, x9 51; ASM-NEXT: cmp x8, #1024 52; ASM-NEXT: b.ne .LBB0_1 53; ASM-NEXT: // %bb.2: // %exit 54; ASM-NEXT: ret 55entry: 56 br label %loop.ph 57 58loop.ph: 59 %p_vec.splatinsert = insertelement <vscale x 8 x i16> undef, i16 3, i32 0 60 %p_vec.splat = shufflevector <vscale x 8 x i16> %p_vec.splatinsert, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer 61 %vscale = call i64 @llvm.vscale.i64() 62 %scaled_vf = shl i64 %vscale, 3 63 br label %loop 64 65loop: ; preds = %loop, %loop.ph 66 %indvar = phi i64 [ 0, %loop.ph ], [ %indvar.next, %loop ] 67 %ptr.in = getelementptr inbounds i16, ptr %in, i64 %indvar 68 %ptr.out = getelementptr inbounds i16, ptr %out, i64 %indvar 69 %val = load <vscale x 8 x i16>, ptr %ptr.in, align 16 70 %addp_vec = add <vscale x 8 x i16> %val, %p_vec.splat 71 store <vscale x 8 x i16> %addp_vec, ptr %ptr.out, align 16 72 %indvar.next = add nsw i64 %indvar, %scaled_vf 73 %exit.cond = icmp eq i64 %indvar.next, 1024 74 br i1 %exit.cond, label %loop.exit, label %loop 75 76loop.exit: ; preds = %loop 77 br label %exit 78 79exit: 80 ret void 81} 82 83define void @masked_ld_st_nxv8i16(ptr %in, ptr %out, i64 %n) { 84; IR-LABEL: @masked_ld_st_nxv8i16( 85; IR-NEXT: entry: 86; IR-NEXT: br label [[LOOP_PH:%.*]] 87; IR: loop.ph: 88; IR-NEXT: [[P_VEC_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> undef, i16 3, i32 0 89; IR-NEXT: [[P_VEC_SPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[P_VEC_SPLATINSERT]], <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer 90; IR-NEXT: [[PTRUE_VEC_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i1> undef, i1 true, i32 0 91; IR-NEXT: [[PTRUE_VEC_SPLAT:%.*]] = shufflevector <vscale x 8 x i1> [[PTRUE_VEC_SPLATINSERT]], <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer 92; IR-NEXT: [[VSCALE:%.*]] = call i64 @llvm.vscale.i64() 93; IR-NEXT: [[SCALED_VF:%.*]] = shl i64 [[VSCALE]], 3 94; IR-NEXT: br label [[LOOP:%.*]] 95; IR: loop: 96; IR-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[LOOP_PH]] ], [ [[INDVAR_NEXT:%.*]], [[LOOP]] ] 97; IR-NEXT: [[TMP0:%.*]] = shl i64 [[INDVAR]], 1 98; IR-NEXT: [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 [[TMP0]] 99; IR-NEXT: [[TMP1:%.*]] = shl i64 [[INDVAR]], 1 100; IR-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[OUT:%.*]], i64 [[TMP1]] 101; IR-NEXT: [[VAL:%.*]] = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr [[UGLYGEP1]], i32 4, <vscale x 8 x i1> [[PTRUE_VEC_SPLAT]], <vscale x 8 x i16> undef) 102; IR-NEXT: [[ADDP_VEC:%.*]] = add <vscale x 8 x i16> [[VAL]], [[P_VEC_SPLAT]] 103; IR-NEXT: call void @llvm.masked.store.nxv8i16.p0(<vscale x 8 x i16> [[ADDP_VEC]], ptr [[UGLYGEP]], i32 4, <vscale x 8 x i1> [[PTRUE_VEC_SPLAT]]) 104; IR-NEXT: [[INDVAR_NEXT]] = add nsw i64 [[INDVAR]], [[SCALED_VF]] 105; IR-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[N:%.*]], [[INDVAR_NEXT]] 106; IR-NEXT: br i1 [[EXIT_COND]], label [[LOOP_EXIT:%.*]], label [[LOOP]] 107; IR: loop.exit: 108; IR-NEXT: br label [[EXIT:%.*]] 109; IR: exit: 110; IR-NEXT: ret void 111; 112; ASM-LABEL: masked_ld_st_nxv8i16: 113; ASM: // %bb.0: // %entry 114; ASM-NEXT: mov z0.h, #3 // =0x3 115; ASM-NEXT: ptrue p0.h 116; ASM-NEXT: mov x8, xzr 117; ASM-NEXT: cnth x9 118; ASM-NEXT: .LBB1_1: // %loop 119; ASM-NEXT: // =>This Inner Loop Header: Depth=1 120; ASM-NEXT: ld1h { z1.h }, p0/z, [x0, x8, lsl #1] 121; ASM-NEXT: add z1.h, z1.h, z0.h 122; ASM-NEXT: st1h { z1.h }, p0, [x1, x8, lsl #1] 123; ASM-NEXT: add x8, x8, x9 124; ASM-NEXT: cmp x2, x8 125; ASM-NEXT: b.ne .LBB1_1 126; ASM-NEXT: // %bb.2: // %exit 127; ASM-NEXT: ret 128entry: 129 br label %loop.ph 130 131loop.ph: 132 %p_vec.splatinsert = insertelement <vscale x 8 x i16> undef, i16 3, i32 0 133 %p_vec.splat = shufflevector <vscale x 8 x i16> %p_vec.splatinsert, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer 134 %ptrue_vec.splatinsert = insertelement <vscale x 8 x i1> undef, i1 true, i32 0 135 %ptrue_vec.splat = shufflevector <vscale x 8 x i1> %ptrue_vec.splatinsert, <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer 136 %vscale = call i64 @llvm.vscale.i64() 137 %scaled_vf = shl i64 %vscale, 3 138 br label %loop 139 140loop: ; preds = %loop, %loop.ph 141 %indvar = phi i64 [ 0, %loop.ph ], [ %indvar.next, %loop ] 142 %ptr.in = getelementptr inbounds i16, ptr %in, i64 %indvar 143 %ptr.out = getelementptr inbounds i16, ptr %out, i64 %indvar 144 %val = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr %ptr.in, i32 4, <vscale x 8 x i1> %ptrue_vec.splat, <vscale x 8 x i16> undef) 145 %addp_vec = add <vscale x 8 x i16> %val, %p_vec.splat 146 call void @llvm.masked.store.nxv8i16.p0(<vscale x 8 x i16> %addp_vec, ptr %ptr.out, i32 4, <vscale x 8 x i1> %ptrue_vec.splat) 147 %indvar.next = add nsw i64 %indvar, %scaled_vf 148 %exit.cond = icmp eq i64 %indvar.next, %n 149 br i1 %exit.cond, label %loop.exit, label %loop 150 151loop.exit: ; preds = %loop 152 br label %exit 153 154exit: 155 ret void 156} 157 158declare i64 @llvm.vscale.i64() 159 160declare <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr, i32 immarg, <vscale x 8 x i1>, <vscale x 8 x i16>) 161 162declare void @llvm.masked.store.nxv8i16.p0(<vscale x 8 x i16>, ptr, i32 immarg, <vscale x 8 x i1>) 163