1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-linux -mcpu=x86-64 -verify-machineinstrs | FileCheck %s -check-prefix=CHECK 3; RUN: llc < %s -mtriple=x86_64-linux -mcpu=x86-64 --x86-disable-avoid-SFB -verify-machineinstrs | FileCheck %s --check-prefix=DISABLED 4; RUN: llc < %s -mtriple=x86_64-linux -mcpu=core-avx2 -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-AVX2 5; RUN: llc < %s -mtriple=x86_64-linux -mcpu=skx -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-AVX512 6 7; ModuleID = '../testSFB/testOverlapBlocks.c' 8source_filename = "../testSFB/testOverlapBlocks.c" 9target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 10target triple = "x86_64-unknown-linux-gnu" 11 12; Function Attrs: nounwind uwtable 13define dso_local void @test_overlap_1(ptr nocapture %A, i32 %x) local_unnamed_addr #0 { 14; CHECK-LABEL: test_overlap_1: 15; CHECK: # %bb.0: # %entry 16; CHECK-NEXT: movl $7, -8(%rdi) 17; CHECK-NEXT: movq -16(%rdi), %rax 18; CHECK-NEXT: movq %rax, (%rdi) 19; CHECK-NEXT: movl -8(%rdi), %eax 20; CHECK-NEXT: movl %eax, 8(%rdi) 21; CHECK-NEXT: movl -4(%rdi), %eax 22; CHECK-NEXT: movl %eax, 12(%rdi) 23; CHECK-NEXT: movslq %esi, %rax 24; CHECK-NEXT: movq %rax, -9(%rdi) 25; CHECK-NEXT: movq %rax, -16(%rdi) 26; CHECK-NEXT: movb $0, -1(%rdi) 27; CHECK-NEXT: movq -16(%rdi), %rax 28; CHECK-NEXT: movq %rax, 16(%rdi) 29; CHECK-NEXT: movl -8(%rdi), %eax 30; CHECK-NEXT: movl %eax, 24(%rdi) 31; CHECK-NEXT: movzwl -4(%rdi), %eax 32; CHECK-NEXT: movw %ax, 28(%rdi) 33; CHECK-NEXT: movzbl -2(%rdi), %eax 34; CHECK-NEXT: movb %al, 30(%rdi) 35; CHECK-NEXT: movzbl -1(%rdi), %eax 36; CHECK-NEXT: movb %al, 31(%rdi) 37; CHECK-NEXT: retq 38; 39; DISABLED-LABEL: test_overlap_1: 40; DISABLED: # %bb.0: # %entry 41; DISABLED-NEXT: movl $7, -8(%rdi) 42; DISABLED-NEXT: movups -16(%rdi), %xmm0 43; DISABLED-NEXT: movups %xmm0, (%rdi) 44; DISABLED-NEXT: movslq %esi, %rax 45; DISABLED-NEXT: movq %rax, -9(%rdi) 46; DISABLED-NEXT: movq %rax, -16(%rdi) 47; DISABLED-NEXT: movb $0, -1(%rdi) 48; DISABLED-NEXT: movups -16(%rdi), %xmm0 49; DISABLED-NEXT: movups %xmm0, 16(%rdi) 50; DISABLED-NEXT: retq 51; 52; CHECK-AVX2-LABEL: test_overlap_1: 53; CHECK-AVX2: # %bb.0: # %entry 54; CHECK-AVX2-NEXT: movl $7, -8(%rdi) 55; CHECK-AVX2-NEXT: movq -16(%rdi), %rax 56; CHECK-AVX2-NEXT: movq %rax, (%rdi) 57; CHECK-AVX2-NEXT: movl -8(%rdi), %eax 58; CHECK-AVX2-NEXT: movl %eax, 8(%rdi) 59; CHECK-AVX2-NEXT: movl -4(%rdi), %eax 60; CHECK-AVX2-NEXT: movl %eax, 12(%rdi) 61; CHECK-AVX2-NEXT: movslq %esi, %rax 62; CHECK-AVX2-NEXT: movq %rax, -9(%rdi) 63; CHECK-AVX2-NEXT: movq %rax, -16(%rdi) 64; CHECK-AVX2-NEXT: movb $0, -1(%rdi) 65; CHECK-AVX2-NEXT: movq -16(%rdi), %rax 66; CHECK-AVX2-NEXT: movq %rax, 16(%rdi) 67; CHECK-AVX2-NEXT: movl -8(%rdi), %eax 68; CHECK-AVX2-NEXT: movl %eax, 24(%rdi) 69; CHECK-AVX2-NEXT: movzwl -4(%rdi), %eax 70; CHECK-AVX2-NEXT: movw %ax, 28(%rdi) 71; CHECK-AVX2-NEXT: movzbl -2(%rdi), %eax 72; CHECK-AVX2-NEXT: movb %al, 30(%rdi) 73; CHECK-AVX2-NEXT: movzbl -1(%rdi), %eax 74; CHECK-AVX2-NEXT: movb %al, 31(%rdi) 75; CHECK-AVX2-NEXT: retq 76; 77; CHECK-AVX512-LABEL: test_overlap_1: 78; CHECK-AVX512: # %bb.0: # %entry 79; CHECK-AVX512-NEXT: movl $7, -8(%rdi) 80; CHECK-AVX512-NEXT: movq -16(%rdi), %rax 81; CHECK-AVX512-NEXT: movq %rax, (%rdi) 82; CHECK-AVX512-NEXT: movl -8(%rdi), %eax 83; CHECK-AVX512-NEXT: movl %eax, 8(%rdi) 84; CHECK-AVX512-NEXT: movl -4(%rdi), %eax 85; CHECK-AVX512-NEXT: movl %eax, 12(%rdi) 86; CHECK-AVX512-NEXT: movslq %esi, %rax 87; CHECK-AVX512-NEXT: movq %rax, -9(%rdi) 88; CHECK-AVX512-NEXT: movq %rax, -16(%rdi) 89; CHECK-AVX512-NEXT: movb $0, -1(%rdi) 90; CHECK-AVX512-NEXT: movq -16(%rdi), %rax 91; CHECK-AVX512-NEXT: movq %rax, 16(%rdi) 92; CHECK-AVX512-NEXT: movl -8(%rdi), %eax 93; CHECK-AVX512-NEXT: movl %eax, 24(%rdi) 94; CHECK-AVX512-NEXT: movzwl -4(%rdi), %eax 95; CHECK-AVX512-NEXT: movw %ax, 28(%rdi) 96; CHECK-AVX512-NEXT: movzbl -2(%rdi), %eax 97; CHECK-AVX512-NEXT: movb %al, 30(%rdi) 98; CHECK-AVX512-NEXT: movzbl -1(%rdi), %eax 99; CHECK-AVX512-NEXT: movb %al, 31(%rdi) 100; CHECK-AVX512-NEXT: retq 101entry: 102 %add.ptr = getelementptr inbounds i8, ptr %A, i64 -16 103 %add.ptr1 = getelementptr inbounds i8, ptr %A, i64 -8 104 store i32 7, ptr %add.ptr1, align 4 105 tail call void @llvm.memcpy.p0.p0.i64(ptr align 4 %A, ptr nonnull align 4 %add.ptr, i64 16, i1 false) 106 %conv = sext i32 %x to i64 107 %add.ptr2 = getelementptr inbounds i8, ptr %A, i64 -9 108 store i64 %conv, ptr %add.ptr2, align 8 109 store i64 %conv, ptr %add.ptr, align 8 110 %add.ptr5 = getelementptr inbounds i8, ptr %A, i64 -1 111 store i8 0, ptr %add.ptr5, align 1 112 %add.ptr6 = getelementptr inbounds i8, ptr %A, i64 16 113 tail call void @llvm.memcpy.p0.p0.i64(ptr nonnull align 4 %add.ptr6, ptr nonnull align 4 %add.ptr, i64 16, i1 false) 114 ret void 115} 116 117; Function Attrs: argmemonly nounwind 118declare void @llvm.memcpy.p0.p0.i64(ptr nocapture writeonly, ptr nocapture readonly, i64, i1) #1 119 120; Function Attrs: nounwind uwtable 121define dso_local void @test_overlap_2(ptr nocapture %A, i32 %x) local_unnamed_addr #0 { 122; CHECK-LABEL: test_overlap_2: 123; CHECK: # %bb.0: # %entry 124; CHECK-NEXT: movslq %esi, %rax 125; CHECK-NEXT: movq %rax, -16(%rdi) 126; CHECK-NEXT: movq -16(%rdi), %rcx 127; CHECK-NEXT: movq %rcx, (%rdi) 128; CHECK-NEXT: movq -8(%rdi), %rcx 129; CHECK-NEXT: movq %rcx, 8(%rdi) 130; CHECK-NEXT: movq %rax, -8(%rdi) 131; CHECK-NEXT: movl $7, -12(%rdi) 132; CHECK-NEXT: movl -16(%rdi), %eax 133; CHECK-NEXT: movl %eax, 16(%rdi) 134; CHECK-NEXT: movl -12(%rdi), %eax 135; CHECK-NEXT: movl %eax, 20(%rdi) 136; CHECK-NEXT: movq -8(%rdi), %rax 137; CHECK-NEXT: movq %rax, 24(%rdi) 138; CHECK-NEXT: retq 139; 140; DISABLED-LABEL: test_overlap_2: 141; DISABLED: # %bb.0: # %entry 142; DISABLED-NEXT: movslq %esi, %rax 143; DISABLED-NEXT: movq %rax, -16(%rdi) 144; DISABLED-NEXT: movups -16(%rdi), %xmm0 145; DISABLED-NEXT: movups %xmm0, (%rdi) 146; DISABLED-NEXT: movq %rax, -8(%rdi) 147; DISABLED-NEXT: movl $7, -12(%rdi) 148; DISABLED-NEXT: movups -16(%rdi), %xmm0 149; DISABLED-NEXT: movups %xmm0, 16(%rdi) 150; DISABLED-NEXT: retq 151; 152; CHECK-AVX2-LABEL: test_overlap_2: 153; CHECK-AVX2: # %bb.0: # %entry 154; CHECK-AVX2-NEXT: movslq %esi, %rax 155; CHECK-AVX2-NEXT: movq %rax, -16(%rdi) 156; CHECK-AVX2-NEXT: movq -16(%rdi), %rcx 157; CHECK-AVX2-NEXT: movq %rcx, (%rdi) 158; CHECK-AVX2-NEXT: movq -8(%rdi), %rcx 159; CHECK-AVX2-NEXT: movq %rcx, 8(%rdi) 160; CHECK-AVX2-NEXT: movq %rax, -8(%rdi) 161; CHECK-AVX2-NEXT: movl $7, -12(%rdi) 162; CHECK-AVX2-NEXT: movl -16(%rdi), %eax 163; CHECK-AVX2-NEXT: movl %eax, 16(%rdi) 164; CHECK-AVX2-NEXT: movl -12(%rdi), %eax 165; CHECK-AVX2-NEXT: movl %eax, 20(%rdi) 166; CHECK-AVX2-NEXT: movq -8(%rdi), %rax 167; CHECK-AVX2-NEXT: movq %rax, 24(%rdi) 168; CHECK-AVX2-NEXT: retq 169; 170; CHECK-AVX512-LABEL: test_overlap_2: 171; CHECK-AVX512: # %bb.0: # %entry 172; CHECK-AVX512-NEXT: movslq %esi, %rax 173; CHECK-AVX512-NEXT: movq %rax, -16(%rdi) 174; CHECK-AVX512-NEXT: movq -16(%rdi), %rcx 175; CHECK-AVX512-NEXT: movq %rcx, (%rdi) 176; CHECK-AVX512-NEXT: movq -8(%rdi), %rcx 177; CHECK-AVX512-NEXT: movq %rcx, 8(%rdi) 178; CHECK-AVX512-NEXT: movq %rax, -8(%rdi) 179; CHECK-AVX512-NEXT: movl $7, -12(%rdi) 180; CHECK-AVX512-NEXT: movl -16(%rdi), %eax 181; CHECK-AVX512-NEXT: movl %eax, 16(%rdi) 182; CHECK-AVX512-NEXT: movl -12(%rdi), %eax 183; CHECK-AVX512-NEXT: movl %eax, 20(%rdi) 184; CHECK-AVX512-NEXT: movq -8(%rdi), %rax 185; CHECK-AVX512-NEXT: movq %rax, 24(%rdi) 186; CHECK-AVX512-NEXT: retq 187entry: 188 %add.ptr = getelementptr inbounds i8, ptr %A, i64 -16 189 %conv = sext i32 %x to i64 190 store i64 %conv, ptr %add.ptr, align 8 191 tail call void @llvm.memcpy.p0.p0.i64(ptr align 4 %A, ptr nonnull align 4 %add.ptr, i64 16, i1 false) 192 %add.ptr3 = getelementptr inbounds i8, ptr %A, i64 -8 193 store i64 %conv, ptr %add.ptr3, align 8 194 %add.ptr4 = getelementptr inbounds i8, ptr %A, i64 -12 195 store i32 7, ptr %add.ptr4, align 4 196 %add.ptr5 = getelementptr inbounds i8, ptr %A, i64 16 197 tail call void @llvm.memcpy.p0.p0.i64(ptr nonnull align 4 %add.ptr5, ptr nonnull align 4 %add.ptr, i64 16, i1 false) 198 ret void 199} 200 201; Function Attrs: nounwind uwtable 202define dso_local void @test_overlap_3(ptr nocapture %A, i32 %x) local_unnamed_addr #0 { 203; CHECK-LABEL: test_overlap_3: 204; CHECK: # %bb.0: # %entry 205; CHECK-NEXT: movl $7, -10(%rdi) 206; CHECK-NEXT: movl -16(%rdi), %eax 207; CHECK-NEXT: movl %eax, (%rdi) 208; CHECK-NEXT: movzwl -12(%rdi), %eax 209; CHECK-NEXT: movw %ax, 4(%rdi) 210; CHECK-NEXT: movl -10(%rdi), %eax 211; CHECK-NEXT: movl %eax, 6(%rdi) 212; CHECK-NEXT: movl -6(%rdi), %eax 213; CHECK-NEXT: movl %eax, 10(%rdi) 214; CHECK-NEXT: movzwl -2(%rdi), %eax 215; CHECK-NEXT: movw %ax, 14(%rdi) 216; CHECK-NEXT: movslq %esi, %rax 217; CHECK-NEXT: movq %rax, -9(%rdi) 218; CHECK-NEXT: movq %rax, -16(%rdi) 219; CHECK-NEXT: movb $0, -1(%rdi) 220; CHECK-NEXT: movq -16(%rdi), %rax 221; CHECK-NEXT: movq %rax, 16(%rdi) 222; CHECK-NEXT: movzwl -8(%rdi), %eax 223; CHECK-NEXT: movw %ax, 24(%rdi) 224; CHECK-NEXT: movl -6(%rdi), %eax 225; CHECK-NEXT: movl %eax, 26(%rdi) 226; CHECK-NEXT: movzbl -2(%rdi), %eax 227; CHECK-NEXT: movb %al, 30(%rdi) 228; CHECK-NEXT: movzbl -1(%rdi), %eax 229; CHECK-NEXT: movb %al, 31(%rdi) 230; CHECK-NEXT: retq 231; 232; DISABLED-LABEL: test_overlap_3: 233; DISABLED: # %bb.0: # %entry 234; DISABLED-NEXT: movl $7, -10(%rdi) 235; DISABLED-NEXT: movups -16(%rdi), %xmm0 236; DISABLED-NEXT: movups %xmm0, (%rdi) 237; DISABLED-NEXT: movslq %esi, %rax 238; DISABLED-NEXT: movq %rax, -9(%rdi) 239; DISABLED-NEXT: movq %rax, -16(%rdi) 240; DISABLED-NEXT: movb $0, -1(%rdi) 241; DISABLED-NEXT: movups -16(%rdi), %xmm0 242; DISABLED-NEXT: movups %xmm0, 16(%rdi) 243; DISABLED-NEXT: retq 244; 245; CHECK-AVX2-LABEL: test_overlap_3: 246; CHECK-AVX2: # %bb.0: # %entry 247; CHECK-AVX2-NEXT: movl $7, -10(%rdi) 248; CHECK-AVX2-NEXT: movl -16(%rdi), %eax 249; CHECK-AVX2-NEXT: movl %eax, (%rdi) 250; CHECK-AVX2-NEXT: movzwl -12(%rdi), %eax 251; CHECK-AVX2-NEXT: movw %ax, 4(%rdi) 252; CHECK-AVX2-NEXT: movl -10(%rdi), %eax 253; CHECK-AVX2-NEXT: movl %eax, 6(%rdi) 254; CHECK-AVX2-NEXT: movl -6(%rdi), %eax 255; CHECK-AVX2-NEXT: movl %eax, 10(%rdi) 256; CHECK-AVX2-NEXT: movzwl -2(%rdi), %eax 257; CHECK-AVX2-NEXT: movw %ax, 14(%rdi) 258; CHECK-AVX2-NEXT: movslq %esi, %rax 259; CHECK-AVX2-NEXT: movq %rax, -9(%rdi) 260; CHECK-AVX2-NEXT: movq %rax, -16(%rdi) 261; CHECK-AVX2-NEXT: movb $0, -1(%rdi) 262; CHECK-AVX2-NEXT: movq -16(%rdi), %rax 263; CHECK-AVX2-NEXT: movq %rax, 16(%rdi) 264; CHECK-AVX2-NEXT: movzwl -8(%rdi), %eax 265; CHECK-AVX2-NEXT: movw %ax, 24(%rdi) 266; CHECK-AVX2-NEXT: movl -6(%rdi), %eax 267; CHECK-AVX2-NEXT: movl %eax, 26(%rdi) 268; CHECK-AVX2-NEXT: movzbl -2(%rdi), %eax 269; CHECK-AVX2-NEXT: movb %al, 30(%rdi) 270; CHECK-AVX2-NEXT: movzbl -1(%rdi), %eax 271; CHECK-AVX2-NEXT: movb %al, 31(%rdi) 272; CHECK-AVX2-NEXT: retq 273; 274; CHECK-AVX512-LABEL: test_overlap_3: 275; CHECK-AVX512: # %bb.0: # %entry 276; CHECK-AVX512-NEXT: movl $7, -10(%rdi) 277; CHECK-AVX512-NEXT: movl -16(%rdi), %eax 278; CHECK-AVX512-NEXT: movl %eax, (%rdi) 279; CHECK-AVX512-NEXT: movzwl -12(%rdi), %eax 280; CHECK-AVX512-NEXT: movw %ax, 4(%rdi) 281; CHECK-AVX512-NEXT: movl -10(%rdi), %eax 282; CHECK-AVX512-NEXT: movl %eax, 6(%rdi) 283; CHECK-AVX512-NEXT: movl -6(%rdi), %eax 284; CHECK-AVX512-NEXT: movl %eax, 10(%rdi) 285; CHECK-AVX512-NEXT: movzwl -2(%rdi), %eax 286; CHECK-AVX512-NEXT: movw %ax, 14(%rdi) 287; CHECK-AVX512-NEXT: movslq %esi, %rax 288; CHECK-AVX512-NEXT: movq %rax, -9(%rdi) 289; CHECK-AVX512-NEXT: movq %rax, -16(%rdi) 290; CHECK-AVX512-NEXT: movb $0, -1(%rdi) 291; CHECK-AVX512-NEXT: movq -16(%rdi), %rax 292; CHECK-AVX512-NEXT: movq %rax, 16(%rdi) 293; CHECK-AVX512-NEXT: movzwl -8(%rdi), %eax 294; CHECK-AVX512-NEXT: movw %ax, 24(%rdi) 295; CHECK-AVX512-NEXT: movl -6(%rdi), %eax 296; CHECK-AVX512-NEXT: movl %eax, 26(%rdi) 297; CHECK-AVX512-NEXT: movzbl -2(%rdi), %eax 298; CHECK-AVX512-NEXT: movb %al, 30(%rdi) 299; CHECK-AVX512-NEXT: movzbl -1(%rdi), %eax 300; CHECK-AVX512-NEXT: movb %al, 31(%rdi) 301; CHECK-AVX512-NEXT: retq 302entry: 303 %add.ptr = getelementptr inbounds i8, ptr %A, i64 -16 304 %add.ptr1 = getelementptr inbounds i8, ptr %A, i64 -10 305 store i32 7, ptr %add.ptr1, align 4 306 tail call void @llvm.memcpy.p0.p0.i64(ptr align 4 %A, ptr nonnull align 4 %add.ptr, i64 16, i1 false) 307 %conv = sext i32 %x to i64 308 %add.ptr2 = getelementptr inbounds i8, ptr %A, i64 -9 309 store i64 %conv, ptr %add.ptr2, align 8 310 store i64 %conv, ptr %add.ptr, align 8 311 %add.ptr5 = getelementptr inbounds i8, ptr %A, i64 -1 312 store i8 0, ptr %add.ptr5, align 1 313 %add.ptr6 = getelementptr inbounds i8, ptr %A, i64 16 314 tail call void @llvm.memcpy.p0.p0.i64(ptr nonnull align 4 %add.ptr6, ptr nonnull align 4 %add.ptr, i64 16, i1 false) 315 ret void 316} 317 318; Function Attrs: nounwind uwtable 319define dso_local void @test_overlap_4(ptr nocapture %A, i32 %x) local_unnamed_addr #0 { 320; CHECK-LABEL: test_overlap_4: 321; CHECK: # %bb.0: # %entry 322; CHECK-NEXT: movups -16(%rdi), %xmm0 323; CHECK-NEXT: movups %xmm0, (%rdi) 324; CHECK-NEXT: movslq %esi, %rax 325; CHECK-NEXT: movq %rax, -8(%rdi) 326; CHECK-NEXT: movl %eax, -16(%rdi) 327; CHECK-NEXT: movl $0, -11(%rdi) 328; CHECK-NEXT: movl -16(%rdi), %eax 329; CHECK-NEXT: movl %eax, 16(%rdi) 330; CHECK-NEXT: movzbl -12(%rdi), %eax 331; CHECK-NEXT: movb %al, 20(%rdi) 332; CHECK-NEXT: movl -11(%rdi), %eax 333; CHECK-NEXT: movl %eax, 21(%rdi) 334; CHECK-NEXT: movl -7(%rdi), %eax 335; CHECK-NEXT: movl %eax, 25(%rdi) 336; CHECK-NEXT: movzwl -3(%rdi), %eax 337; CHECK-NEXT: movw %ax, 29(%rdi) 338; CHECK-NEXT: movzbl -1(%rdi), %eax 339; CHECK-NEXT: movb %al, 31(%rdi) 340; CHECK-NEXT: retq 341; 342; DISABLED-LABEL: test_overlap_4: 343; DISABLED: # %bb.0: # %entry 344; DISABLED-NEXT: movups -16(%rdi), %xmm0 345; DISABLED-NEXT: movups %xmm0, (%rdi) 346; DISABLED-NEXT: movslq %esi, %rax 347; DISABLED-NEXT: movq %rax, -8(%rdi) 348; DISABLED-NEXT: movl %eax, -16(%rdi) 349; DISABLED-NEXT: movl $0, -11(%rdi) 350; DISABLED-NEXT: movups -16(%rdi), %xmm0 351; DISABLED-NEXT: movups %xmm0, 16(%rdi) 352; DISABLED-NEXT: retq 353; 354; CHECK-AVX2-LABEL: test_overlap_4: 355; CHECK-AVX2: # %bb.0: # %entry 356; CHECK-AVX2-NEXT: vmovups -16(%rdi), %xmm0 357; CHECK-AVX2-NEXT: vmovups %xmm0, (%rdi) 358; CHECK-AVX2-NEXT: movslq %esi, %rax 359; CHECK-AVX2-NEXT: movq %rax, -8(%rdi) 360; CHECK-AVX2-NEXT: movl %eax, -16(%rdi) 361; CHECK-AVX2-NEXT: movl $0, -11(%rdi) 362; CHECK-AVX2-NEXT: movl -16(%rdi), %eax 363; CHECK-AVX2-NEXT: movl %eax, 16(%rdi) 364; CHECK-AVX2-NEXT: movzbl -12(%rdi), %eax 365; CHECK-AVX2-NEXT: movb %al, 20(%rdi) 366; CHECK-AVX2-NEXT: movl -11(%rdi), %eax 367; CHECK-AVX2-NEXT: movl %eax, 21(%rdi) 368; CHECK-AVX2-NEXT: movl -7(%rdi), %eax 369; CHECK-AVX2-NEXT: movl %eax, 25(%rdi) 370; CHECK-AVX2-NEXT: movzwl -3(%rdi), %eax 371; CHECK-AVX2-NEXT: movw %ax, 29(%rdi) 372; CHECK-AVX2-NEXT: movzbl -1(%rdi), %eax 373; CHECK-AVX2-NEXT: movb %al, 31(%rdi) 374; CHECK-AVX2-NEXT: retq 375; 376; CHECK-AVX512-LABEL: test_overlap_4: 377; CHECK-AVX512: # %bb.0: # %entry 378; CHECK-AVX512-NEXT: vmovups -16(%rdi), %xmm0 379; CHECK-AVX512-NEXT: vmovups %xmm0, (%rdi) 380; CHECK-AVX512-NEXT: movslq %esi, %rax 381; CHECK-AVX512-NEXT: movq %rax, -8(%rdi) 382; CHECK-AVX512-NEXT: movl %eax, -16(%rdi) 383; CHECK-AVX512-NEXT: movl $0, -11(%rdi) 384; CHECK-AVX512-NEXT: movl -16(%rdi), %eax 385; CHECK-AVX512-NEXT: movl %eax, 16(%rdi) 386; CHECK-AVX512-NEXT: movzbl -12(%rdi), %eax 387; CHECK-AVX512-NEXT: movb %al, 20(%rdi) 388; CHECK-AVX512-NEXT: movl -11(%rdi), %eax 389; CHECK-AVX512-NEXT: movl %eax, 21(%rdi) 390; CHECK-AVX512-NEXT: movl -7(%rdi), %eax 391; CHECK-AVX512-NEXT: movl %eax, 25(%rdi) 392; CHECK-AVX512-NEXT: movzwl -3(%rdi), %eax 393; CHECK-AVX512-NEXT: movw %ax, 29(%rdi) 394; CHECK-AVX512-NEXT: movzbl -1(%rdi), %eax 395; CHECK-AVX512-NEXT: movb %al, 31(%rdi) 396; CHECK-AVX512-NEXT: retq 397entry: 398 %add.ptr = getelementptr inbounds i8, ptr %A, i64 -16 399 tail call void @llvm.memcpy.p0.p0.i64(ptr align 4 %A, ptr nonnull align 4 %add.ptr, i64 16, i1 false) 400 %conv = sext i32 %x to i64 401 %add.ptr1 = getelementptr inbounds i8, ptr %A, i64 -8 402 store i64 %conv, ptr %add.ptr1, align 8 403 store i32 %x, ptr %add.ptr, align 4 404 %add.ptr3 = getelementptr inbounds i8, ptr %A, i64 -11 405 store i32 0, ptr %add.ptr3, align 4 406 %add.ptr4 = getelementptr inbounds i8, ptr %A, i64 16 407 tail call void @llvm.memcpy.p0.p0.i64(ptr nonnull align 4 %add.ptr4, ptr nonnull align 4 %add.ptr, i64 16, i1 false) 408 ret void 409} 410 411; Function Attrs: nounwind uwtable 412define dso_local void @test_overlap_5(ptr nocapture %A, i32 %x) local_unnamed_addr #0 { 413; CHECK-LABEL: test_overlap_5: 414; CHECK: # %bb.0: # %entry 415; CHECK-NEXT: movups -16(%rdi), %xmm0 416; CHECK-NEXT: movups %xmm0, (%rdi) 417; CHECK-NEXT: movslq %esi, %rax 418; CHECK-NEXT: movq %rax, -16(%rdi) 419; CHECK-NEXT: movb %al, -14(%rdi) 420; CHECK-NEXT: movb $0, -11(%rdi) 421; CHECK-NEXT: movzwl -16(%rdi), %eax 422; CHECK-NEXT: movw %ax, 16(%rdi) 423; CHECK-NEXT: movzbl -14(%rdi), %eax 424; CHECK-NEXT: movb %al, 18(%rdi) 425; CHECK-NEXT: movzwl -13(%rdi), %eax 426; CHECK-NEXT: movw %ax, 19(%rdi) 427; CHECK-NEXT: movzbl -11(%rdi), %eax 428; CHECK-NEXT: movb %al, 21(%rdi) 429; CHECK-NEXT: movq -10(%rdi), %rax 430; CHECK-NEXT: movq %rax, 22(%rdi) 431; CHECK-NEXT: movzwl -2(%rdi), %eax 432; CHECK-NEXT: movw %ax, 30(%rdi) 433; CHECK-NEXT: retq 434; 435; DISABLED-LABEL: test_overlap_5: 436; DISABLED: # %bb.0: # %entry 437; DISABLED-NEXT: movups -16(%rdi), %xmm0 438; DISABLED-NEXT: movups %xmm0, (%rdi) 439; DISABLED-NEXT: movslq %esi, %rax 440; DISABLED-NEXT: movq %rax, -16(%rdi) 441; DISABLED-NEXT: movb %al, -14(%rdi) 442; DISABLED-NEXT: movb $0, -11(%rdi) 443; DISABLED-NEXT: movups -16(%rdi), %xmm0 444; DISABLED-NEXT: movups %xmm0, 16(%rdi) 445; DISABLED-NEXT: retq 446; 447; CHECK-AVX2-LABEL: test_overlap_5: 448; CHECK-AVX2: # %bb.0: # %entry 449; CHECK-AVX2-NEXT: vmovups -16(%rdi), %xmm0 450; CHECK-AVX2-NEXT: vmovups %xmm0, (%rdi) 451; CHECK-AVX2-NEXT: movslq %esi, %rax 452; CHECK-AVX2-NEXT: movq %rax, -16(%rdi) 453; CHECK-AVX2-NEXT: movb %al, -14(%rdi) 454; CHECK-AVX2-NEXT: movb $0, -11(%rdi) 455; CHECK-AVX2-NEXT: movzwl -16(%rdi), %eax 456; CHECK-AVX2-NEXT: movw %ax, 16(%rdi) 457; CHECK-AVX2-NEXT: movzbl -14(%rdi), %eax 458; CHECK-AVX2-NEXT: movb %al, 18(%rdi) 459; CHECK-AVX2-NEXT: movzwl -13(%rdi), %eax 460; CHECK-AVX2-NEXT: movw %ax, 19(%rdi) 461; CHECK-AVX2-NEXT: movzbl -11(%rdi), %eax 462; CHECK-AVX2-NEXT: movb %al, 21(%rdi) 463; CHECK-AVX2-NEXT: movq -10(%rdi), %rax 464; CHECK-AVX2-NEXT: movq %rax, 22(%rdi) 465; CHECK-AVX2-NEXT: movzwl -2(%rdi), %eax 466; CHECK-AVX2-NEXT: movw %ax, 30(%rdi) 467; CHECK-AVX2-NEXT: retq 468; 469; CHECK-AVX512-LABEL: test_overlap_5: 470; CHECK-AVX512: # %bb.0: # %entry 471; CHECK-AVX512-NEXT: vmovups -16(%rdi), %xmm0 472; CHECK-AVX512-NEXT: vmovups %xmm0, (%rdi) 473; CHECK-AVX512-NEXT: movslq %esi, %rax 474; CHECK-AVX512-NEXT: movq %rax, -16(%rdi) 475; CHECK-AVX512-NEXT: movb %al, -14(%rdi) 476; CHECK-AVX512-NEXT: movb $0, -11(%rdi) 477; CHECK-AVX512-NEXT: movzwl -16(%rdi), %eax 478; CHECK-AVX512-NEXT: movw %ax, 16(%rdi) 479; CHECK-AVX512-NEXT: movzbl -14(%rdi), %eax 480; CHECK-AVX512-NEXT: movb %al, 18(%rdi) 481; CHECK-AVX512-NEXT: movzwl -13(%rdi), %eax 482; CHECK-AVX512-NEXT: movw %ax, 19(%rdi) 483; CHECK-AVX512-NEXT: movzbl -11(%rdi), %eax 484; CHECK-AVX512-NEXT: movb %al, 21(%rdi) 485; CHECK-AVX512-NEXT: movq -10(%rdi), %rax 486; CHECK-AVX512-NEXT: movq %rax, 22(%rdi) 487; CHECK-AVX512-NEXT: movzwl -2(%rdi), %eax 488; CHECK-AVX512-NEXT: movw %ax, 30(%rdi) 489; CHECK-AVX512-NEXT: retq 490entry: 491 %add.ptr = getelementptr inbounds i8, ptr %A, i64 -16 492 tail call void @llvm.memcpy.p0.p0.i64(ptr align 4 %A, ptr nonnull align 4 %add.ptr, i64 16, i1 false) 493 %conv = sext i32 %x to i64 494 store i64 %conv, ptr %add.ptr, align 8 495 %conv2 = trunc i32 %x to i8 496 %add.ptr3 = getelementptr inbounds i8, ptr %A, i64 -14 497 store i8 %conv2, ptr %add.ptr3, align 1 498 %add.ptr4 = getelementptr inbounds i8, ptr %A, i64 -11 499 store i8 0, ptr %add.ptr4, align 1 500 %add.ptr5 = getelementptr inbounds i8, ptr %A, i64 16 501 tail call void @llvm.memcpy.p0.p0.i64(ptr nonnull align 4 %add.ptr5, ptr nonnull align 4 %add.ptr, i64 16, i1 false) 502 ret void 503} 504 505attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } 506 507attributes #1 = { argmemonly nounwind } 508