1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 2; RUN: opt -S --passes='require<profile-summary>,function(codegenprepare)' < %s | FileCheck %s 3 4target triple = "aarch64-unknown-linux-gnu" 5 6; Sink the GEP to make use of scalar+vector addressing modes. 7define <vscale x 4 x float> @gather_offsets_sink_gep(ptr %base, <vscale x 4 x i32> %indices, <vscale x 4 x i1> %mask, i1 %cond) { 8; CHECK-LABEL: define <vscale x 4 x float> @gather_offsets_sink_gep( 9; CHECK-SAME: ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i1 [[COND:%.*]]) { 10; CHECK-NEXT: entry: 11; CHECK-NEXT: br i1 [[COND]], label [[COND_BLOCK:%.*]], label [[EXIT:%.*]] 12; CHECK: cond.block: 13; CHECK-NEXT: [[TMP0:%.*]] = getelementptr float, ptr [[BASE]], <vscale x 4 x i32> [[INDICES]] 14; CHECK-NEXT: [[LOAD:%.*]] = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[TMP0]], i32 4, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> poison) 15; CHECK-NEXT: ret <vscale x 4 x float> [[LOAD]] 16; CHECK: exit: 17; CHECK-NEXT: ret <vscale x 4 x float> zeroinitializer 18; 19entry: 20 %ptrs = getelementptr float, ptr %base, <vscale x 4 x i32> %indices 21 br i1 %cond, label %cond.block, label %exit 22 23cond.block: 24 %load = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32(<vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x float> poison) 25 br label %exit 26 27exit: 28 %ret = phi <vscale x 4 x float> [ zeroinitializer, %entry ], [ %load, %cond.block ] 29 ret <vscale x 4 x float> %ret 30} 31 32; Sink sext to make use of scalar+sxtw(vector) addressing modes. 33define <vscale x 4 x float> @gather_offsets_sink_sext(ptr %base, <vscale x 4 x i32> %indices, <vscale x 4 x i1> %mask, i1 %cond) { 34; CHECK-LABEL: define <vscale x 4 x float> @gather_offsets_sink_sext( 35; CHECK-SAME: ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i1 [[COND:%.*]]) { 36; CHECK-NEXT: entry: 37; CHECK-NEXT: br i1 [[COND]], label [[COND_BLOCK:%.*]], label [[EXIT:%.*]] 38; CHECK: cond.block: 39; CHECK-NEXT: [[TMP0:%.*]] = sext <vscale x 4 x i32> [[INDICES]] to <vscale x 4 x i64> 40; CHECK-NEXT: [[PTRS:%.*]] = getelementptr float, ptr [[BASE]], <vscale x 4 x i64> [[TMP0]] 41; CHECK-NEXT: [[LOAD:%.*]] = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[PTRS]], i32 4, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> poison) 42; CHECK-NEXT: ret <vscale x 4 x float> [[LOAD]] 43; CHECK: exit: 44; CHECK-NEXT: ret <vscale x 4 x float> zeroinitializer 45; 46entry: 47 %indices.sext = sext <vscale x 4 x i32> %indices to <vscale x 4 x i64> 48 br i1 %cond, label %cond.block, label %exit 49 50cond.block: 51 %ptrs = getelementptr float, ptr %base, <vscale x 4 x i64> %indices.sext 52 %load = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32(<vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x float> poison) 53 br label %exit 54 55exit: 56 %ret = phi <vscale x 4 x float> [ zeroinitializer, %entry ], [ %load, %cond.block ] 57 ret <vscale x 4 x float> %ret 58} 59 60; As above but ensure both the GEP and sext is sunk. 61define <vscale x 4 x float> @gather_offsets_sink_sext_get(ptr %base, <vscale x 4 x i32> %indices, <vscale x 4 x i1> %mask, i1 %cond) { 62; CHECK-LABEL: define <vscale x 4 x float> @gather_offsets_sink_sext_get( 63; CHECK-SAME: ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i1 [[COND:%.*]]) { 64; CHECK-NEXT: entry: 65; CHECK-NEXT: br i1 [[COND]], label [[COND_BLOCK:%.*]], label [[EXIT:%.*]] 66; CHECK: cond.block: 67; CHECK-NEXT: [[TMP0:%.*]] = sext <vscale x 4 x i32> [[INDICES]] to <vscale x 4 x i64> 68; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[BASE]], <vscale x 4 x i64> [[TMP0]] 69; CHECK-NEXT: [[LOAD:%.*]] = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[TMP1]], i32 4, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> poison) 70; CHECK-NEXT: ret <vscale x 4 x float> [[LOAD]] 71; CHECK: exit: 72; CHECK-NEXT: ret <vscale x 4 x float> zeroinitializer 73; 74entry: 75 %indices.sext = sext <vscale x 4 x i32> %indices to <vscale x 4 x i64> 76 %ptrs = getelementptr float, ptr %base, <vscale x 4 x i64> %indices.sext 77 br i1 %cond, label %cond.block, label %exit 78 79cond.block: 80 %load = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32(<vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x float> poison) 81 br label %exit 82 83exit: 84 %ret = phi <vscale x 4 x float> [ zeroinitializer, %entry ], [ %load, %cond.block ] 85 ret <vscale x 4 x float> %ret 86} 87 88; Don't sink GEPs that cannot benefit from SVE's scalar+vector addressing modes. 89define <vscale x 4 x float> @gather_no_scalar_base(<vscale x 4 x ptr> %bases, <vscale x 4 x i32> %indices, <vscale x 4 x i1> %mask, i1 %cond) { 90; CHECK-LABEL: define <vscale x 4 x float> @gather_no_scalar_base( 91; CHECK-SAME: <vscale x 4 x ptr> [[BASES:%.*]], <vscale x 4 x i32> [[INDICES:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i1 [[COND:%.*]]) { 92; CHECK-NEXT: entry: 93; CHECK-NEXT: [[PTRS:%.*]] = getelementptr float, <vscale x 4 x ptr> [[BASES]], <vscale x 4 x i32> [[INDICES]] 94; CHECK-NEXT: br i1 [[COND]], label [[COND_BLOCK:%.*]], label [[EXIT:%.*]] 95; CHECK: cond.block: 96; CHECK-NEXT: [[LOAD:%.*]] = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[PTRS]], i32 4, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> poison) 97; CHECK-NEXT: ret <vscale x 4 x float> [[LOAD]] 98; CHECK: exit: 99; CHECK-NEXT: ret <vscale x 4 x float> zeroinitializer 100; 101entry: 102 %ptrs = getelementptr float, <vscale x 4 x ptr> %bases, <vscale x 4 x i32> %indices 103 br i1 %cond, label %cond.block, label %exit 104 105cond.block: 106 %load = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32(<vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x float> poison) 107 br label %exit 108 109exit: 110 %ret = phi <vscale x 4 x float> [ zeroinitializer, %entry ], [ %load, %cond.block ] 111 ret <vscale x 4 x float> %ret 112} 113 114; Don't sink extends whose result type is already favourable for SVE's sxtw/uxtw addressing modes. 115; NOTE: We still want to sink the GEP. 116define <vscale x 4 x float> @gather_offset_type_too_small(ptr %base, <vscale x 4 x i8> %indices, <vscale x 4 x i1> %mask, i1 %cond) { 117; CHECK-LABEL: define <vscale x 4 x float> @gather_offset_type_too_small( 118; CHECK-SAME: ptr [[BASE:%.*]], <vscale x 4 x i8> [[INDICES:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i1 [[COND:%.*]]) { 119; CHECK-NEXT: entry: 120; CHECK-NEXT: [[INDICES_SEXT:%.*]] = sext <vscale x 4 x i8> [[INDICES]] to <vscale x 4 x i32> 121; CHECK-NEXT: br i1 [[COND]], label [[COND_BLOCK:%.*]], label [[EXIT:%.*]] 122; CHECK: cond.block: 123; CHECK-NEXT: [[TMP0:%.*]] = getelementptr float, ptr [[BASE]], <vscale x 4 x i32> [[INDICES_SEXT]] 124; CHECK-NEXT: [[LOAD:%.*]] = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[TMP0]], i32 4, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> poison) 125; CHECK-NEXT: ret <vscale x 4 x float> [[LOAD]] 126; CHECK: exit: 127; CHECK-NEXT: ret <vscale x 4 x float> zeroinitializer 128; 129entry: 130 %indices.sext = sext <vscale x 4 x i8> %indices to <vscale x 4 x i32> 131 %ptrs = getelementptr float, ptr %base, <vscale x 4 x i32> %indices.sext 132 br i1 %cond, label %cond.block, label %exit 133 134cond.block: 135 %load = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32(<vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x float> poison) 136 br label %exit 137 138exit: 139 %ret = phi <vscale x 4 x float> [ zeroinitializer, %entry ], [ %load, %cond.block ] 140 ret <vscale x 4 x float> %ret 141} 142 143; Don't sink extends that cannot benefit from SVE's sxtw/uxtw addressing modes. 144; NOTE: We still want to sink the GEP. 145define <vscale x 4 x float> @gather_offset_type_too_big(ptr %base, <vscale x 4 x i48> %indices, <vscale x 4 x i1> %mask, i1 %cond) { 146; CHECK-LABEL: define <vscale x 4 x float> @gather_offset_type_too_big( 147; CHECK-SAME: ptr [[BASE:%.*]], <vscale x 4 x i48> [[INDICES:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i1 [[COND:%.*]]) { 148; CHECK-NEXT: entry: 149; CHECK-NEXT: [[INDICES_SEXT:%.*]] = sext <vscale x 4 x i48> [[INDICES]] to <vscale x 4 x i64> 150; CHECK-NEXT: br i1 [[COND]], label [[COND_BLOCK:%.*]], label [[EXIT:%.*]] 151; CHECK: cond.block: 152; CHECK-NEXT: [[TMP0:%.*]] = getelementptr float, ptr [[BASE]], <vscale x 4 x i64> [[INDICES_SEXT]] 153; CHECK-NEXT: [[LOAD:%.*]] = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[TMP0]], i32 4, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> poison) 154; CHECK-NEXT: ret <vscale x 4 x float> [[LOAD]] 155; CHECK: exit: 156; CHECK-NEXT: ret <vscale x 4 x float> zeroinitializer 157; 158entry: 159 %indices.sext = sext <vscale x 4 x i48> %indices to <vscale x 4 x i64> 160 %ptrs = getelementptr float, ptr %base, <vscale x 4 x i64> %indices.sext 161 br i1 %cond, label %cond.block, label %exit 162 163cond.block: 164 %load = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32(<vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x float> poison) 165 br label %exit 166 167exit: 168 %ret = phi <vscale x 4 x float> [ zeroinitializer, %entry ], [ %load, %cond.block ] 169 ret <vscale x 4 x float> %ret 170} 171 172; Sink zext to make use of scalar+uxtw(vector) addressing modes. 173; TODO: There's an argument here to split the extend into i8->i32 and i32->i64, 174; which would be especially useful if the i8s are the result of a load because 175; it would maintain the use of sign-extending loads. 176define <vscale x 4 x float> @gather_offset_sink_zext(ptr %base, <vscale x 4 x i8> %indices, <vscale x 4 x i1> %mask, i1 %cond) { 177; CHECK-LABEL: define <vscale x 4 x float> @gather_offset_sink_zext( 178; CHECK-SAME: ptr [[BASE:%.*]], <vscale x 4 x i8> [[INDICES:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i1 [[COND:%.*]]) { 179; CHECK-NEXT: entry: 180; CHECK-NEXT: br i1 [[COND]], label [[COND_BLOCK:%.*]], label [[EXIT:%.*]] 181; CHECK: cond.block: 182; CHECK-NEXT: [[TMP0:%.*]] = zext <vscale x 4 x i8> [[INDICES]] to <vscale x 4 x i64> 183; CHECK-NEXT: [[PTRS:%.*]] = getelementptr float, ptr [[BASE]], <vscale x 4 x i64> [[TMP0]] 184; CHECK-NEXT: [[LOAD:%.*]] = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> [[PTRS]], i32 4, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> poison) 185; CHECK-NEXT: ret <vscale x 4 x float> [[LOAD]] 186; CHECK: exit: 187; CHECK-NEXT: ret <vscale x 4 x float> zeroinitializer 188; 189entry: 190 %indices.zext = zext <vscale x 4 x i8> %indices to <vscale x 4 x i64> 191 br i1 %cond, label %cond.block, label %exit 192 193cond.block: 194 %ptrs = getelementptr float, ptr %base, <vscale x 4 x i64> %indices.zext 195 %load = tail call <vscale x 4 x float> @llvm.masked.gather.nxv4f32(<vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x float> poison) 196 br label %exit 197 198exit: 199 %ret = phi <vscale x 4 x float> [ zeroinitializer, %entry ], [ %load, %cond.block ] 200 ret <vscale x 4 x float> %ret 201} 202 203; Ensure we support scatters as well as gathers. 204define void @scatter_offsets_sink_sext_get(<vscale x 4 x float> %data, ptr %base, <vscale x 4 x i32> %indices, <vscale x 4 x i1> %mask, i1 %cond) { 205; CHECK-LABEL: define void @scatter_offsets_sink_sext_get( 206; CHECK-SAME: <vscale x 4 x float> [[DATA:%.*]], ptr [[BASE:%.*]], <vscale x 4 x i32> [[INDICES:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i1 [[COND:%.*]]) { 207; CHECK-NEXT: entry: 208; CHECK-NEXT: br i1 [[COND]], label [[COND_BLOCK:%.*]], label [[EXIT:%.*]] 209; CHECK: cond.block: 210; CHECK-NEXT: [[TMP0:%.*]] = sext <vscale x 4 x i32> [[INDICES]] to <vscale x 4 x i64> 211; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[BASE]], <vscale x 4 x i64> [[TMP0]] 212; CHECK-NEXT: tail call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> [[DATA]], <vscale x 4 x ptr> [[TMP1]], i32 4, <vscale x 4 x i1> [[MASK]]) 213; CHECK-NEXT: ret void 214; CHECK: exit: 215; CHECK-NEXT: ret void 216; 217entry: 218 %indices.sext = sext <vscale x 4 x i32> %indices to <vscale x 4 x i64> 219 %ptrs = getelementptr float, ptr %base, <vscale x 4 x i64> %indices.sext 220 br i1 %cond, label %cond.block, label %exit 221 222cond.block: 223 tail call void @llvm.masked.scatter.nxv4f32(<vscale x 4 x float> %data, <vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> %mask) 224 br label %exit 225 226exit: 227 ret void 228} 229 230declare <vscale x 4 x float> @llvm.masked.gather.nxv4f32(<vscale x 4 x ptr>, i32, <vscale x 4 x i1>, <vscale x 4 x float>) 231declare void @llvm.masked.scatter.nxv4f32(<vscale x 4 x float>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>) 232