1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -passes=dse -S | FileCheck %s 3 4define void @write4to7(ptr nocapture %p) { 5; CHECK-LABEL: @write4to7( 6; CHECK-NEXT: entry: 7; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1 8; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX0]], i64 4 9; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[TMP0]], i8 0, i64 24, i1 false) 10; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 1 11; CHECK-NEXT: store i32 1, ptr [[ARRAYIDX1]], align 4 12; CHECK-NEXT: ret void 13; 14entry: 15 %arrayidx0 = getelementptr inbounds i32, ptr %p, i64 1 16 call void @llvm.memset.p0.i64(ptr align 4 %arrayidx0, i8 0, i64 28, i1 false) 17 %arrayidx1 = getelementptr inbounds i32, ptr %p, i64 1 18 store i32 1, ptr %arrayidx1, align 4 19 ret void 20} 21 22define void @write4to7_weird_element_type(ptr nocapture %p) { 23; CHECK-LABEL: @write4to7_weird_element_type( 24; CHECK-NEXT: entry: 25; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1 26; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX0]], i64 4 27; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[TMP1]], i8 0, i64 24, i1 false) 28; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 1 29; CHECK-NEXT: store i32 1, ptr [[ARRAYIDX1]], align 4 30; CHECK-NEXT: ret void 31; 32entry: 33 %arrayidx0 = getelementptr inbounds i32, ptr %p, i64 1 34 call void @llvm.memset.p0.i64(ptr align 4 %arrayidx0, i8 0, i64 28, i1 false) 35 %arrayidx1 = getelementptr inbounds i32, ptr %p, i64 1 36 store i32 1, ptr %arrayidx1, align 4 37 ret void 38} 39 40define void @write4to7_addrspace(ptr addrspace(1) nocapture %p) { 41; CHECK-LABEL: @write4to7_addrspace( 42; CHECK-NEXT: entry: 43; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[P:%.*]], i64 1 44; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[ARRAYIDX0]], i64 4 45; CHECK-NEXT: call void @llvm.memset.p1.i64(ptr addrspace(1) align 4 [[TMP0]], i8 0, i64 24, i1 false) 46; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[P]], i64 1 47; CHECK-NEXT: store i32 1, ptr addrspace(1) [[ARRAYIDX1]], align 4 48; CHECK-NEXT: ret void 49; 50entry: 51 %arrayidx0 = getelementptr inbounds i32, ptr addrspace(1) %p, i64 1 52 call void @llvm.memset.p1.i64(ptr addrspace(1) align 4 %arrayidx0, i8 0, i64 28, i1 false) 53 %arrayidx1 = getelementptr inbounds i32, ptr addrspace(1) %p, i64 1 54 store i32 1, ptr addrspace(1) %arrayidx1, align 4 55 ret void 56} 57 58define void @write4to7_atomic(ptr nocapture %p) { 59; CHECK-LABEL: @write4to7_atomic( 60; CHECK-NEXT: entry: 61; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1 62; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX0]], i64 4 63; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 4 [[TMP0]], i8 0, i64 24, i32 4) 64; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 1 65; CHECK-NEXT: store atomic i32 1, ptr [[ARRAYIDX1]] unordered, align 4 66; CHECK-NEXT: ret void 67; 68entry: 69 %arrayidx0 = getelementptr inbounds i32, ptr %p, i64 1 70 call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 4 %arrayidx0, i8 0, i64 28, i32 4) 71 %arrayidx1 = getelementptr inbounds i32, ptr %p, i64 1 72 store atomic i32 1, ptr %arrayidx1 unordered, align 4 73 ret void 74} 75 76define void @write0to3(ptr nocapture %p) { 77; CHECK-LABEL: @write0to3( 78; CHECK-NEXT: entry: 79; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 4 80; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[TMP0]], i8 0, i64 24, i1 false) 81; CHECK-NEXT: store i32 1, ptr [[P]], align 4 82; CHECK-NEXT: ret void 83; 84entry: 85 call void @llvm.memset.p0.i64(ptr align 4 %p, i8 0, i64 28, i1 false) 86 store i32 1, ptr %p, align 4 87 ret void 88} 89 90define void @write0to3_atomic(ptr nocapture %p) { 91; CHECK-LABEL: @write0to3_atomic( 92; CHECK-NEXT: entry: 93; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 4 94; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 4 [[TMP0]], i8 0, i64 24, i32 4) 95; CHECK-NEXT: store atomic i32 1, ptr [[P]] unordered, align 4 96; CHECK-NEXT: ret void 97; 98entry: 99 call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 4 %p, i8 0, i64 28, i32 4) 100 store atomic i32 1, ptr %p unordered, align 4 101 ret void 102} 103 104; Atomicity of the store is weaker from the memset 105define void @write0to3_atomic_weaker(ptr nocapture %p) { 106; CHECK-LABEL: @write0to3_atomic_weaker( 107; CHECK-NEXT: entry: 108; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 4 109; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 4 [[TMP0]], i8 0, i64 24, i32 4) 110; CHECK-NEXT: store i32 1, ptr [[P]], align 4 111; CHECK-NEXT: ret void 112; 113entry: 114 call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 4 %p, i8 0, i64 28, i32 4) 115 store i32 1, ptr %p, align 4 116 ret void 117} 118 119define void @write0to7(ptr nocapture %p) { 120; CHECK-LABEL: @write0to7( 121; CHECK-NEXT: entry: 122; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 8 123; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[TMP0]], i8 0, i64 24, i1 false) 124; CHECK-NEXT: store i64 1, ptr [[P]], align 8 125; CHECK-NEXT: ret void 126; 127entry: 128 call void @llvm.memset.p0.i64(ptr align 4 %p, i8 0, i64 32, i1 false) 129 store i64 1, ptr %p, align 8 130 ret void 131} 132 133; Changing the memset start and length is okay here because the 134; store is a multiple of the memset element size 135define void @write0to7_atomic(ptr nocapture %p) { 136; CHECK-LABEL: @write0to7_atomic( 137; CHECK-NEXT: entry: 138; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 8 139; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 4 [[TMP0]], i8 0, i64 24, i32 4) 140; CHECK-NEXT: store atomic i64 1, ptr [[P]] unordered, align 8 141; CHECK-NEXT: ret void 142; 143entry: 144 call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 4 %p, i8 0, i64 32, i32 4) 145 store atomic i64 1, ptr %p unordered, align 8 146 ret void 147} 148 149define void @write0to7_2(ptr nocapture %p) { 150; CHECK-LABEL: @write0to7_2( 151; CHECK-NEXT: entry: 152; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1 153; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX0]], i64 4 154; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[TMP0]], i8 0, i64 24, i1 false) 155; CHECK-NEXT: store i64 1, ptr [[P]], align 8 156; CHECK-NEXT: ret void 157; 158entry: 159 %arrayidx0 = getelementptr inbounds i32, ptr %p, i64 1 160 call void @llvm.memset.p0.i64(ptr align 4 %arrayidx0, i8 0, i64 28, i1 false) 161 store i64 1, ptr %p, align 8 162 ret void 163} 164 165define void @write0to7_2_atomic(ptr nocapture %p) { 166; CHECK-LABEL: @write0to7_2_atomic( 167; CHECK-NEXT: entry: 168; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1 169; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX0]], i64 4 170; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 4 [[TMP0]], i8 0, i64 24, i32 4) 171; CHECK-NEXT: store atomic i64 1, ptr [[P]] unordered, align 8 172; CHECK-NEXT: ret void 173; 174entry: 175 %arrayidx0 = getelementptr inbounds i32, ptr %p, i64 1 176 call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 4 %arrayidx0, i8 0, i64 28, i32 4) 177 store atomic i64 1, ptr %p unordered, align 8 178 ret void 179} 180 181; We do not trim the beginning of the eariler write if the alignment of the 182; start pointer is changed. 183define void @dontwrite0to3_align8(ptr nocapture %p) { 184; CHECK-LABEL: @dontwrite0to3_align8( 185; CHECK-NEXT: entry: 186; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[P:%.*]], i8 0, i64 32, i1 false) 187; CHECK-NEXT: store i32 1, ptr [[P]], align 4 188; CHECK-NEXT: ret void 189; 190entry: 191 call void @llvm.memset.p0.i64(ptr align 8 %p, i8 0, i64 32, i1 false) 192 store i32 1, ptr %p, align 4 193 ret void 194} 195 196define void @dontwrite0to3_align8_atomic(ptr nocapture %p) { 197; CHECK-LABEL: @dontwrite0to3_align8_atomic( 198; CHECK-NEXT: entry: 199; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 8 [[P:%.*]], i8 0, i64 32, i32 4) 200; CHECK-NEXT: store atomic i32 1, ptr [[P]] unordered, align 4 201; CHECK-NEXT: ret void 202; 203entry: 204 call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 8 %p, i8 0, i64 32, i32 4) 205 store atomic i32 1, ptr %p unordered, align 4 206 ret void 207} 208 209define void @dontwrite0to1(ptr nocapture %p) { 210; CHECK-LABEL: @dontwrite0to1( 211; CHECK-NEXT: entry: 212; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[P:%.*]], i8 0, i64 32, i1 false) 213; CHECK-NEXT: store i16 1, ptr [[P]], align 4 214; CHECK-NEXT: ret void 215; 216entry: 217 call void @llvm.memset.p0.i64(ptr align 4 %p, i8 0, i64 32, i1 false) 218 store i16 1, ptr %p, align 4 219 ret void 220} 221 222define void @dontwrite0to1_atomic(ptr nocapture %p) { 223; CHECK-LABEL: @dontwrite0to1_atomic( 224; CHECK-NEXT: entry: 225; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 4 [[P:%.*]], i8 0, i64 32, i32 4) 226; CHECK-NEXT: store atomic i16 1, ptr [[P]] unordered, align 4 227; CHECK-NEXT: ret void 228; 229entry: 230 call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 4 %p, i8 0, i64 32, i32 4) 231 store atomic i16 1, ptr %p unordered, align 4 232 ret void 233} 234 235define void @write2to10(ptr nocapture %p) { 236; CHECK-LABEL: @write2to10( 237; CHECK-NEXT: entry: 238; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1 239; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX0]], i64 4 240; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[TMP0]], i8 0, i64 28, i1 false) 241; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 1 242; CHECK-NEXT: store i64 1, ptr [[ARRAYIDX2]], align 8 243; CHECK-NEXT: ret void 244; 245entry: 246 %arrayidx0 = getelementptr inbounds i32, ptr %p, i64 1 247 call void @llvm.memset.p0.i64(ptr align 4 %arrayidx0, i8 0, i64 32, i1 false) 248 %arrayidx2 = getelementptr inbounds i16, ptr %p, i64 1 249 store i64 1, ptr %arrayidx2, align 8 250 ret void 251} 252 253define void @write2to10_atomic(ptr nocapture %p) { 254; CHECK-LABEL: @write2to10_atomic( 255; CHECK-NEXT: entry: 256; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1 257; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX0]], i64 4 258; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 4 [[TMP0]], i8 0, i64 28, i32 4) 259; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 1 260; CHECK-NEXT: store atomic i64 1, ptr [[ARRAYIDX2]] unordered, align 8 261; CHECK-NEXT: ret void 262; 263entry: 264 %arrayidx0 = getelementptr inbounds i32, ptr %p, i64 1 265 call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 4 %arrayidx0, i8 0, i64 32, i32 4) 266 %arrayidx2 = getelementptr inbounds i16, ptr %p, i64 1 267 store atomic i64 1, ptr %arrayidx2 unordered, align 8 268 ret void 269} 270 271define void @write8To15AndThen0To7(ptr nocapture %P) { 272; CHECK-LABEL: @write8To15AndThen0To7( 273; CHECK-NEXT: entry: 274; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 16 275; CHECK-NEXT: tail call void @llvm.memset.p0.i64(ptr align 8 [[TMP0]], i8 0, i64 16, i1 false) 276; CHECK-NEXT: [[BASE64_1:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 1 277; CHECK-NEXT: store i64 1, ptr [[BASE64_1]], align 4 278; CHECK-NEXT: store i64 2, ptr [[P]], align 4 279; CHECK-NEXT: ret void 280; 281entry: 282 283 tail call void @llvm.memset.p0.i64(ptr align 8 %P, i8 0, i64 32, i1 false) 284 285 %base64_1 = getelementptr inbounds i64, ptr %P, i64 1 286 287 store i64 1, ptr %base64_1 288 store i64 2, ptr %P 289 ret void 290} 291 292define void @write8To15AndThen0To7_atomic(ptr nocapture %P) { 293; CHECK-LABEL: @write8To15AndThen0To7_atomic( 294; CHECK-NEXT: entry: 295; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 16 296; CHECK-NEXT: tail call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 8 [[TMP0]], i8 0, i64 16, i32 8) 297; CHECK-NEXT: [[BASE64_1:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 1 298; CHECK-NEXT: store atomic i64 1, ptr [[BASE64_1]] unordered, align 8 299; CHECK-NEXT: store atomic i64 2, ptr [[P]] unordered, align 8 300; CHECK-NEXT: ret void 301; 302entry: 303 304 tail call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 8 %P, i8 0, i64 32, i32 8) 305 306 %base64_1 = getelementptr inbounds i64, ptr %P, i64 1 307 308 store atomic i64 1, ptr %base64_1 unordered, align 8 309 store atomic i64 2, ptr %P unordered, align 8 310 ret void 311} 312 313define void @write8To15AndThen0To7_atomic_weaker(ptr nocapture %P) { 314; CHECK-LABEL: @write8To15AndThen0To7_atomic_weaker( 315; CHECK-NEXT: entry: 316; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 16 317; CHECK-NEXT: tail call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 8 [[TMP0]], i8 0, i64 16, i32 8) 318; CHECK-NEXT: [[BASE64_1:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 1 319; CHECK-NEXT: store atomic i64 1, ptr [[BASE64_1]] unordered, align 8 320; CHECK-NEXT: store i64 2, ptr [[P]], align 8 321; CHECK-NEXT: ret void 322; 323entry: 324 325 tail call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 8 %P, i8 0, i64 32, i32 8) 326 327 %base64_1 = getelementptr inbounds i64, ptr %P, i64 1 328 329 store atomic i64 1, ptr %base64_1 unordered, align 8 330 store i64 2, ptr %P, align 8 331 ret void 332} 333 334define void @write8To15AndThen0To7_atomic_weaker_2(ptr nocapture %P) { 335; CHECK-LABEL: @write8To15AndThen0To7_atomic_weaker_2( 336; CHECK-NEXT: entry: 337; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 16 338; CHECK-NEXT: tail call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 8 [[TMP0]], i8 0, i64 16, i32 8) 339; CHECK-NEXT: [[BASE64_1:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 1 340; CHECK-NEXT: store i64 1, ptr [[BASE64_1]], align 8 341; CHECK-NEXT: store atomic i64 2, ptr [[P]] unordered, align 8 342; CHECK-NEXT: ret void 343; 344entry: 345 346 tail call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 8 %P, i8 0, i64 32, i32 8) 347 348 %base64_1 = getelementptr inbounds i64, ptr %P, i64 1 349 350 store i64 1, ptr %base64_1, align 8 351 store atomic i64 2, ptr %P unordered, align 8 352 ret void 353} 354 355declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1) nounwind 356declare void @llvm.memset.p1.i64(ptr addrspace(1) nocapture, i8, i64, i1) nounwind 357declare void @llvm.memset.element.unordered.atomic.p0.i64(ptr nocapture, i8, i64, i32) nounwind 358 359define void @ow_begin_align1(ptr nocapture %p) { 360; CHECK-LABEL: @ow_begin_align1( 361; CHECK-NEXT: entry: 362; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 1 363; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 7 364; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[TMP0]], i8 0, i64 25, i1 false) 365; CHECK-NEXT: store i64 1, ptr [[P]], align 1 366; CHECK-NEXT: ret void 367; 368entry: 369 %p1 = getelementptr inbounds i8, ptr %p, i64 1 370 call void @llvm.memset.p0.i64(ptr align 1 %p1, i8 0, i64 32, i1 false) 371 store i64 1, ptr %p, align 1 372 ret void 373} 374 375define void @ow_end_align4(ptr nocapture %p) { 376; CHECK-LABEL: @ow_end_align4( 377; CHECK-NEXT: entry: 378; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 1 379; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 4 380; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[TMP0]], i8 0, i64 28, i1 false) 381; CHECK-NEXT: store i64 1, ptr [[P]], align 1 382; CHECK-NEXT: ret void 383; 384entry: 385 %p1 = getelementptr inbounds i8, ptr %p, i64 1 386 call void @llvm.memset.p0.i64(ptr align 4 %p1, i8 0, i64 32, i1 false) 387 store i64 1, ptr %p, align 1 388 ret void 389} 390 391define void @ow_end_align8(ptr nocapture %p) { 392; CHECK-LABEL: @ow_end_align8( 393; CHECK-NEXT: entry: 394; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 1 395; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[P1]], i8 0, i64 32, i1 false) 396; CHECK-NEXT: store i64 1, ptr [[P]], align 1 397; CHECK-NEXT: ret void 398; 399entry: 400 %p1 = getelementptr inbounds i8, ptr %p, i64 1 401 call void @llvm.memset.p0.i64(ptr align 8 %p1, i8 0, i64 32, i1 false) 402 store i64 1, ptr %p, align 1 403 ret void 404} 405