1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -passes=instcombine -S | FileCheck %s 3target datalayout = "E-p:64:64:64-p1:32:32:32-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" 4 5; Instcombine should be able to prove vector alignment in the 6; presence of a few mild address computation tricks. 7 8define void @test0(ptr %b, i64 %n, i64 %u, i64 %y) nounwind { 9; CHECK-LABEL: @test0( 10; CHECK-NEXT: entry: 11; CHECK-NEXT: [[C:%.*]] = ptrtoint ptr [[B:%.*]] to i64 12; CHECK-NEXT: [[D:%.*]] = and i64 [[C]], -16 13; CHECK-NEXT: [[E:%.*]] = inttoptr i64 [[D]] to ptr 14; CHECK-NEXT: [[V:%.*]] = shl i64 [[U:%.*]], 1 15; CHECK-NEXT: [[Z:%.*]] = and i64 [[Y:%.*]], -2 16; CHECK-NEXT: [[T1421:%.*]] = icmp eq i64 [[N:%.*]], 0 17; CHECK-NEXT: br i1 [[T1421]], label [[RETURN:%.*]], label [[BB:%.*]] 18; CHECK: bb: 19; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[INDVAR_NEXT:%.*]], [[BB]] ], [ 20, [[ENTRY:%.*]] ] 20; CHECK-NEXT: [[J:%.*]] = mul i64 [[I]], [[V]] 21; CHECK-NEXT: [[TMP0:%.*]] = getelementptr double, ptr [[E]], i64 [[J]] 22; CHECK-NEXT: [[T8:%.*]] = getelementptr double, ptr [[TMP0]], i64 [[Z]] 23; CHECK-NEXT: store <2 x double> zeroinitializer, ptr [[T8]], align 8 24; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[I]], 1 25; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[N]] 26; CHECK-NEXT: br i1 [[EXITCOND]], label [[RETURN]], label [[BB]] 27; CHECK: return: 28; CHECK-NEXT: ret void 29; 30entry: 31 %c = ptrtoint ptr %b to i64 32 %d = and i64 %c, -16 33 %e = inttoptr i64 %d to ptr 34 %v = mul i64 %u, 2 35 %z = and i64 %y, -2 36 %t1421 = icmp eq i64 %n, 0 37 br i1 %t1421, label %return, label %bb 38 39bb: 40 %i = phi i64 [ %indvar.next, %bb ], [ 20, %entry ] 41 %j = mul i64 %i, %v 42 %h = add i64 %j, %z 43 %t8 = getelementptr double, ptr %e, i64 %h 44 store <2 x double><double 0.0, double 0.0>, ptr %t8, align 8 45 %indvar.next = add i64 %i, 1 46 %exitcond = icmp eq i64 %indvar.next, %n 47 br i1 %exitcond, label %return, label %bb 48 49return: 50 ret void 51} 52 53; When we see a unaligned load from an insufficiently aligned global or 54; alloca, increase the alignment of the load, turning it into an aligned load. 55 56@GLOBAL = internal global [4 x i32] zeroinitializer 57 58define <16 x i8> @test1(<2 x i64> %x) { 59; CHECK-LABEL: @test1( 60; CHECK-NEXT: entry: 61; CHECK-NEXT: [[TMP:%.*]] = load <16 x i8>, ptr @GLOBAL, align 1 62; CHECK-NEXT: ret <16 x i8> [[TMP]] 63; 64entry: 65 %tmp = load <16 x i8>, ptr @GLOBAL, align 1 66 ret <16 x i8> %tmp 67} 68 69@GLOBAL_as1 = internal addrspace(1) global [4 x i32] zeroinitializer 70 71define <16 x i8> @test1_as1(<2 x i64> %x) { 72; CHECK-LABEL: @test1_as1( 73; CHECK-NEXT: [[TMP:%.*]] = load <16 x i8>, ptr addrspace(1) @GLOBAL_as1, align 1 74; CHECK-NEXT: ret <16 x i8> [[TMP]] 75; 76 %tmp = load <16 x i8>, ptr addrspace(1) @GLOBAL_as1, align 1 77 ret <16 x i8> %tmp 78} 79 80@GLOBAL_as1_gep = internal addrspace(1) global [8 x i32] zeroinitializer 81 82define <16 x i8> @test1_as1_gep(<2 x i64> %x) { 83; CHECK-LABEL: @test1_as1_gep( 84; CHECK-NEXT: [[TMP:%.*]] = load <16 x i8>, ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) @GLOBAL_as1_gep, i32 16), align 1 85; CHECK-NEXT: ret <16 x i8> [[TMP]] 86; 87 %tmp = load <16 x i8>, ptr addrspace(1) getelementptr ([8 x i32], ptr addrspace(1) @GLOBAL_as1_gep, i16 0, i16 4), align 1 88 ret <16 x i8> %tmp 89} 90 91 92; When a load or store lacks an explicit alignment, add one. 93 94define double @test2(ptr %p, double %n) nounwind { 95; CHECK-LABEL: @test2( 96; CHECK-NEXT: [[T:%.*]] = load double, ptr [[P:%.*]], align 8 97; CHECK-NEXT: store double [[N:%.*]], ptr [[P]], align 8 98; CHECK-NEXT: ret double [[T]] 99; 100 %t = load double, ptr %p 101 store double %n, ptr %p 102 ret double %t 103} 104 105declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1) nounwind 106 107declare void @use(ptr) 108 109%struct.s = type { i32, i32, i32, i32 } 110 111define void @test3(ptr sret(%struct.s) %a4) { 112; Check that the alignment is bumped up the alignment of the sret type. 113; CHECK-LABEL: @test3( 114; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 dereferenceable(16) [[A4:%.*]], i8 0, i64 16, i1 false) 115; CHECK-NEXT: call void @use(ptr nonnull [[A4]]) 116; CHECK-NEXT: ret void 117; 118 call void @llvm.memset.p0.i64(ptr %a4, i8 0, i64 16, i1 false) 119 call void @use(ptr %a4) 120 ret void 121} 122 123declare ptr @llvm.ptrmask.p0.i64(ptr, i64) 124 125define <16 x i8> @ptrmask_align_unknown_ptr_align1(ptr align 1 %ptr, i64 %mask) { 126; CHECK-LABEL: @ptrmask_align_unknown_ptr_align1( 127; CHECK-NEXT: [[ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 [[MASK:%.*]]) 128; CHECK-NEXT: [[LOAD:%.*]] = load <16 x i8>, ptr [[ALIGNED]], align 1 129; CHECK-NEXT: ret <16 x i8> [[LOAD]] 130; 131 %aligned = call ptr @llvm.ptrmask.p0.i64(ptr %ptr, i64 %mask) 132 %load = load <16 x i8>, ptr %aligned, align 1 133 ret <16 x i8> %load 134} 135 136define <16 x i8> @ptrmask_align_unknown_ptr_align8(ptr align 8 %ptr, i64 %mask) { 137; CHECK-LABEL: @ptrmask_align_unknown_ptr_align8( 138; CHECK-NEXT: [[ALIGNED:%.*]] = call align 8 ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 [[MASK:%.*]]) 139; CHECK-NEXT: [[LOAD:%.*]] = load <16 x i8>, ptr [[ALIGNED]], align 1 140; CHECK-NEXT: ret <16 x i8> [[LOAD]] 141; 142 %aligned = call ptr @llvm.ptrmask.p0.i64(ptr %ptr, i64 %mask) 143 %load = load <16 x i8>, ptr %aligned, align 1 144 ret <16 x i8> %load 145} 146 147; Increase load align from 1 to 2 148define <16 x i8> @ptrmask_align2_ptr_align1(ptr align 1 %ptr) { 149; CHECK-LABEL: @ptrmask_align2_ptr_align1( 150; CHECK-NEXT: [[ALIGNED:%.*]] = call align 2 ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -2) 151; CHECK-NEXT: [[LOAD:%.*]] = load <16 x i8>, ptr [[ALIGNED]], align 1 152; CHECK-NEXT: ret <16 x i8> [[LOAD]] 153; 154 %aligned = call ptr @llvm.ptrmask.p0.i64(ptr %ptr, i64 -2) 155 %load = load <16 x i8>, ptr %aligned, align 1 156 ret <16 x i8> %load 157} 158 159; Increase load align from 1 to 4 160define <16 x i8> @ptrmask_align4_ptr_align1(ptr align 1 %ptr) { 161; CHECK-LABEL: @ptrmask_align4_ptr_align1( 162; CHECK-NEXT: [[ALIGNED:%.*]] = call align 4 ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -4) 163; CHECK-NEXT: [[LOAD:%.*]] = load <16 x i8>, ptr [[ALIGNED]], align 1 164; CHECK-NEXT: ret <16 x i8> [[LOAD]] 165; 166 %aligned = call ptr @llvm.ptrmask.p0.i64(ptr %ptr, i64 -4) 167 %load = load <16 x i8>, ptr %aligned, align 1 168 ret <16 x i8> %load 169} 170 171; Increase load align from 1 to 8 172define <16 x i8> @ptrmask_align8_ptr_align1(ptr align 1 %ptr) { 173; CHECK-LABEL: @ptrmask_align8_ptr_align1( 174; CHECK-NEXT: [[ALIGNED:%.*]] = call align 8 ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -8) 175; CHECK-NEXT: [[LOAD:%.*]] = load <16 x i8>, ptr [[ALIGNED]], align 1 176; CHECK-NEXT: ret <16 x i8> [[LOAD]] 177; 178 %aligned = call ptr @llvm.ptrmask.p0.i64(ptr %ptr, i64 -8) 179 %load = load <16 x i8>, ptr %aligned, align 1 180 ret <16 x i8> %load 181} 182 183; Underlying alignment already the same as forced alignment by ptrmask 184define <16 x i8> @ptrmask_align8_ptr_align8(ptr align 8 %ptr) { 185; CHECK-LABEL: @ptrmask_align8_ptr_align8( 186; CHECK-NEXT: [[LOAD:%.*]] = load <16 x i8>, ptr [[PTR:%.*]], align 1 187; CHECK-NEXT: ret <16 x i8> [[LOAD]] 188; 189 %aligned = call ptr @llvm.ptrmask.p0.i64(ptr %ptr, i64 -8) 190 %load = load <16 x i8>, ptr %aligned, align 1 191 ret <16 x i8> %load 192} 193 194; Underlying alignment greater than alignment forced by ptrmask 195define <16 x i8> @ptrmask_align8_ptr_align16(ptr align 16 %ptr) { 196; CHECK-LABEL: @ptrmask_align8_ptr_align16( 197; CHECK-NEXT: [[LOAD:%.*]] = load <16 x i8>, ptr [[PTR:%.*]], align 1 198; CHECK-NEXT: ret <16 x i8> [[LOAD]] 199; 200 %aligned = call ptr @llvm.ptrmask.p0.i64(ptr %ptr, i64 -8) 201 %load = load <16 x i8>, ptr %aligned, align 1 202 ret <16 x i8> %load 203} 204