1; Intel chips with slow unaligned memory accesses 2 3; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium3 2>&1 | FileCheck %s --check-prefixes=SLOW,SLOW-SCALAR 4; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium3m 2>&1 | FileCheck %s --check-prefixes=SLOW,SLOW-SCALAR 5; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium-m 2>&1 | FileCheck %s --check-prefixes=SLOW,SLOW-SSE 6; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium4 2>&1 | FileCheck %s --check-prefixes=SLOW,SLOW-SSE 7; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium4m 2>&1 | FileCheck %s --check-prefixes=SLOW,SLOW-SSE 8; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=yonah 2>&1 | FileCheck %s --check-prefixes=SLOW,SLOW-SSE 9; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=prescott 2>&1 | FileCheck %s --check-prefixes=SLOW,SLOW-SSE 10; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=nocona 2>&1 | FileCheck %s --check-prefixes=SLOW,SLOW-SSE 11; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=core2 2>&1 | FileCheck %s --check-prefixes=SLOW,SLOW-SSE 12; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=penryn 2>&1 | FileCheck %s --check-prefixes=SLOW,SLOW-SSE 13; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bonnell 2>&1 | FileCheck %s --check-prefixes=SLOW,SLOW-SSE 14 15; Intel chips with fast unaligned memory accesses 16 17; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=silvermont 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-SSE 18; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=nehalem 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-SSE 19; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=westmere 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-SSE 20; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=sandybridge 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX128 21; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=ivybridge 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX128 22; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=haswell 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX256 23; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=broadwell 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX256 24; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=knl 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX512 25; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=skylake-avx512 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX256 26 27; AMD chips with slow unaligned memory accesses 28 29; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=athlon-4 2>&1 | FileCheck %s --check-prefixes=SLOW,SLOW-SCALAR 30; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=athlon-xp 2>&1 | FileCheck %s --check-prefixes=SLOW,SLOW-SCALAR 31; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=k8 2>&1 | FileCheck %s --check-prefixes=SLOW,SLOW-SSE 32; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=opteron 2>&1 | FileCheck %s --check-prefixes=SLOW,SLOW-SSE 33; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=athlon64 2>&1 | FileCheck %s --check-prefixes=SLOW,SLOW-SSE 34; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=athlon-fx 2>&1 | FileCheck %s --check-prefixes=SLOW,SLOW-SSE 35; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=k8-sse3 2>&1 | FileCheck %s --check-prefixes=SLOW,SLOW-SSE 36; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=opteron-sse3 2>&1 | FileCheck %s --check-prefixes=SLOW,SLOW-SSE 37; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=athlon64-sse3 2>&1 | FileCheck %s --check-prefixes=SLOW,SLOW-SSE 38 39; AMD chips with fast unaligned memory accesses 40 41; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=amdfam10 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-SSE 42; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=barcelona 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-SSE 43; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=btver1 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-SSE 44; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=btver2 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX256 45; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver1 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX256 46; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver2 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX256 47; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver3 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX256 48; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver4 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX256 49; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver1 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX256 50; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver2 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX256 51; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver3 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX256 52; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver4 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX512 53; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver5 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX512 54 55; Other chips with slow unaligned memory accesses 56 57; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=c3-2 2>&1 | FileCheck %s --check-prefixes=SLOW 58 59; Verify that the slow/fast unaligned memory attribute is set correctly for each CPU model. 60; Slow chips use 4-byte stores. Fast chips with SSE or later use something other than 4-byte stores. 61; Chips that don't have SSE use 4-byte stores either way, so they're not tested. 62 63; Also verify that SSE4.2 or SSE4a imply fast unaligned accesses. 64 65; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=sse4.2 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-SSE 66; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=sse4a 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-SSE 67 68; SLOW-NOT: not a recognized processor 69; FAST-NOT: not a recognized processor 70define void @store_zeros(ptr %a) { 71; SLOW-SCALAR-LABEL: store_zeros: 72; SLOW-SCALAR: # %bb.0: 73; SLOW-SCALAR-NEXT: movl {{[0-9]+}}(%esp), %eax 74; SLOW-SCALAR-NEXT: movl $0 75; SLOW-SCALAR-NEXT: movl $0 76; SLOW-SCALAR-NEXT: movl $0 77; SLOW-SCALAR-NEXT: movl $0 78; SLOW-SCALAR-NEXT: movl $0 79; SLOW-SCALAR-NEXT: movl $0 80; SLOW-SCALAR-NEXT: movl $0 81; SLOW-SCALAR-NEXT: movl $0 82; SLOW-SCALAR-NEXT: movl $0 83; SLOW-SCALAR-NEXT: movl $0 84; SLOW-SCALAR-NEXT: movl $0 85; SLOW-SCALAR-NEXT: movl $0 86; SLOW-SCALAR-NEXT: movl $0 87; SLOW-SCALAR-NEXT: movl $0 88; SLOW-SCALAR-NEXT: movl $0 89; SLOW-SCALAR-NEXT: movl $0 90; SLOW-SCALAR-NOT: movl 91; 92; SLOW-SSE-LABEL: store_zeros: 93; SLOW-SSE: # %bb.0: 94; SLOW-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 95; SLOW-SSE-NEXT: xorps %xmm0, %xmm0 96; SLOW-SSE-NEXT: movsd %xmm0 97; SLOW-SSE-NEXT: movsd %xmm0 98; SLOW-SSE-NEXT: movsd %xmm0 99; SLOW-SSE-NEXT: movsd %xmm0 100; SLOW-SSE-NEXT: movsd %xmm0 101; SLOW-SSE-NEXT: movsd %xmm0 102; SLOW-SSE-NEXT: movsd %xmm0 103; SLOW-SSE-NEXT: movsd %xmm0 104; SLOW-SSE-NOT: movsd 105; 106; FAST-SSE-LABEL: store_zeros: 107; FAST-SSE: # %bb.0: 108; FAST-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 109; FAST-SSE-NEXT: xorps %xmm0, %xmm0 110; FAST-SSE-NEXT: movups %xmm0 111; FAST-SSE-NEXT: movups %xmm0 112; FAST-SSE-NEXT: movups %xmm0 113; FAST-SSE-NEXT: movups %xmm0 114; FAST-SSE-NOT: movups 115; 116; FAST-AVX128-LABEL: store_zeros: 117; FAST-AVX128: # %bb.0: 118; FAST-AVX128-NEXT: movl {{[0-9]+}}(%esp), %eax 119; FAST-AVX128-NEXT: vxorps %xmm0, %xmm0, %xmm0 120; FAST-AVX128-NEXT: vmovups %xmm0 121; FAST-AVX128-NEXT: vmovups %xmm0 122; FAST-AVX128-NEXT: vmovups %xmm0 123; FAST-AVX128-NEXT: vmovups %xmm0 124; FAST-AVX128-NOT: vmovups 125; 126; FAST-AVX256-LABEL: store_zeros: 127; FAST-AVX256: # %bb.0: 128; FAST-AVX256-NEXT: movl {{[0-9]+}}(%esp), %eax 129; FAST-AVX256-NEXT: vxorps %xmm0, %xmm0, %xmm0 130; FAST-AVX256-NEXT: vmovups %ymm0 131; FAST-AVX256-NEXT: vmovups %ymm0 132; FAST-AVX256-NOT: vmovups 133; 134; FAST-AVX512-LABEL: store_zeros: 135; FAST-AVX512: # %bb.0: 136; FAST-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax 137; FAST-AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 138; FAST-AVX512-NEXT: vmovups %zmm0, (%eax) 139; FAST-AVX512-NOT: vmovups 140 call void @llvm.memset.p0.i64(ptr %a, i8 0, i64 64, i1 false) 141 ret void 142} 143 144declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1) 145 146