1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2 3; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,SSE42 4; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 5; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 6; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=+avx2,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2 7; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=+avx2,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2 8; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=+avx512f | FileCheck %s --check-prefix=AVX512 9; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=+avx512f,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefix=AVX512 10; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=+avx512f,+fast-variable-perlane-shuffle | FileCheck %s --check-prefix=AVX512 11; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=+xop | FileCheck %s --check-prefixes=AVX,XOP 12 13define void @insert_v7i8_v2i16_2(ptr%a0, ptr%a1) nounwind { 14; SSE-LABEL: insert_v7i8_v2i16_2: 15; SSE: # %bb.0: 16; SSE-NEXT: movl (%rsi), %eax 17; SSE-NEXT: movd %eax, %xmm0 18; SSE-NEXT: movq (%rdi), %rcx 19; SSE-NEXT: movq %rcx, %xmm1 20; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 21; SSE-NEXT: shrq $48, %rcx 22; SSE-NEXT: movb %cl, 6(%rdi) 23; SSE-NEXT: shrl $16, %eax 24; SSE-NEXT: movw %ax, 4(%rdi) 25; SSE-NEXT: movd %xmm1, (%rdi) 26; SSE-NEXT: retq 27; 28; AVX-LABEL: insert_v7i8_v2i16_2: 29; AVX: # %bb.0: 30; AVX-NEXT: movl (%rsi), %eax 31; AVX-NEXT: vmovd %eax, %xmm0 32; AVX-NEXT: movq (%rdi), %rcx 33; AVX-NEXT: vmovq %rcx, %xmm1 34; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 35; AVX-NEXT: shrq $48, %rcx 36; AVX-NEXT: movb %cl, 6(%rdi) 37; AVX-NEXT: shrl $16, %eax 38; AVX-NEXT: movw %ax, 4(%rdi) 39; AVX-NEXT: vmovd %xmm0, (%rdi) 40; AVX-NEXT: retq 41; 42; AVX512-LABEL: insert_v7i8_v2i16_2: 43; AVX512: # %bb.0: 44; AVX512-NEXT: movl (%rsi), %eax 45; AVX512-NEXT: vmovd %eax, %xmm0 46; AVX512-NEXT: movq (%rdi), %rcx 47; AVX512-NEXT: vmovq %rcx, %xmm1 48; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 49; AVX512-NEXT: shrq $48, %rcx 50; AVX512-NEXT: movb %cl, 6(%rdi) 51; AVX512-NEXT: shrl $16, %eax 52; AVX512-NEXT: movw %ax, 4(%rdi) 53; AVX512-NEXT: vmovd %xmm0, (%rdi) 54; AVX512-NEXT: retq 55 %1 = load <2 x i16>, ptr%a1 56 %2 = bitcast <2 x i16> %1 to <4 x i8> 57 %3 = shufflevector <4 x i8> %2, <4 x i8> undef, <7 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef> 58 %4 = load <7 x i8>, ptr%a0 59 %5 = shufflevector <7 x i8> %4, <7 x i8> %3, <7 x i32> <i32 0, i32 1, i32 7, i32 8, i32 9, i32 10, i32 6> 60 store <7 x i8> %5, ptr %a0 61 ret void 62} 63 64%struct.Mat4 = type { %struct.storage } 65%struct.storage = type { [16 x float] } 66 67define void @PR40815(ptr nocapture readonly dereferenceable(64), ptr nocapture dereferenceable(64)) { 68; SSE-LABEL: PR40815: 69; SSE: # %bb.0: 70; SSE-NEXT: movaps (%rdi), %xmm0 71; SSE-NEXT: movaps 16(%rdi), %xmm1 72; SSE-NEXT: movaps 32(%rdi), %xmm2 73; SSE-NEXT: movaps 48(%rdi), %xmm3 74; SSE-NEXT: movaps %xmm3, (%rsi) 75; SSE-NEXT: movaps %xmm2, 16(%rsi) 76; SSE-NEXT: movaps %xmm1, 32(%rsi) 77; SSE-NEXT: movaps %xmm0, 48(%rsi) 78; SSE-NEXT: retq 79; 80; AVX-LABEL: PR40815: 81; AVX: # %bb.0: 82; AVX-NEXT: vmovaps (%rdi), %xmm0 83; AVX-NEXT: vmovaps 16(%rdi), %xmm1 84; AVX-NEXT: vmovaps 32(%rdi), %xmm2 85; AVX-NEXT: vmovaps 48(%rdi), %xmm3 86; AVX-NEXT: vmovaps %xmm2, 16(%rsi) 87; AVX-NEXT: vmovaps %xmm3, (%rsi) 88; AVX-NEXT: vmovaps %xmm0, 48(%rsi) 89; AVX-NEXT: vmovaps %xmm1, 32(%rsi) 90; AVX-NEXT: retq 91; 92; AVX512-LABEL: PR40815: 93; AVX512: # %bb.0: 94; AVX512-NEXT: vmovaps 48(%rdi), %xmm0 95; AVX512-NEXT: vmovups 16(%rdi), %ymm1 96; AVX512-NEXT: vinsertf128 $1, (%rdi), %ymm1, %ymm1 97; AVX512-NEXT: vinsertf128 $1, 32(%rdi), %ymm0, %ymm0 98; AVX512-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 99; AVX512-NEXT: vmovups %zmm0, (%rsi) 100; AVX512-NEXT: vzeroupper 101; AVX512-NEXT: retq 102 %3 = load <16 x float>, ptr %0, align 64 103 %4 = shufflevector <16 x float> %3, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 104 %5 = getelementptr inbounds %struct.Mat4, ptr %1, i64 0, i32 0, i32 0, i64 4 105 %6 = bitcast <16 x float> %3 to <4 x i128> 106 %7 = extractelement <4 x i128> %6, i32 1 107 %8 = getelementptr inbounds %struct.Mat4, ptr %1, i64 0, i32 0, i32 0, i64 8 108 %9 = bitcast <16 x float> %3 to <4 x i128> 109 %10 = extractelement <4 x i128> %9, i32 2 110 %11 = getelementptr inbounds %struct.Mat4, ptr %1, i64 0, i32 0, i32 0, i64 12 111 %12 = bitcast <16 x float> %3 to <4 x i128> 112 %13 = extractelement <4 x i128> %12, i32 3 113 store i128 %13, ptr %1, align 16 114 store i128 %10, ptr %5, align 16 115 store i128 %7, ptr %8, align 16 116 store <4 x float> %4, ptr %11, align 16 117 ret void 118} 119 120define <16 x i32> @PR42819(ptr %a0) { 121; SSE-LABEL: PR42819: 122; SSE: # %bb.0: 123; SSE-NEXT: movdqu (%rdi), %xmm3 124; SSE-NEXT: pslldq {{.*#+}} xmm3 = zero,zero,zero,zero,xmm3[0,1,2,3,4,5,6,7,8,9,10,11] 125; SSE-NEXT: xorps %xmm0, %xmm0 126; SSE-NEXT: xorps %xmm1, %xmm1 127; SSE-NEXT: xorps %xmm2, %xmm2 128; SSE-NEXT: retq 129; 130; AVX-LABEL: PR42819: 131; AVX: # %bb.0: 132; AVX-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,0,1,2] 133; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 134; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 135; AVX-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3,4],ymm0[5,6,7] 136; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 137; AVX-NEXT: retq 138; 139; AVX512-LABEL: PR42819: 140; AVX512: # %bb.0: 141; AVX512-NEXT: vmovdqu (%rdi), %ymm0 142; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 143; AVX512-NEXT: valignd {{.*#+}} zmm0 = zmm1[3,4,5,6,7,8,9,10,11,12,13,14,15],zmm0[0,1,2] 144; AVX512-NEXT: retq 145 %1 = load <8 x i32>, ptr %a0, align 4 146 %2 = shufflevector <8 x i32> %1, <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 147 %3 = shufflevector <16 x i32> zeroinitializer, <16 x i32> %2, <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18> 148 ret <16 x i32> %3 149} 150 151@b = dso_local local_unnamed_addr global i32 0, align 4 152@c = dso_local local_unnamed_addr global [49 x i32] zeroinitializer, align 16 153@d = dso_local local_unnamed_addr global [49 x i32] zeroinitializer, align 16 154 155define void @PR42833() { 156; SSE2-LABEL: PR42833: 157; SSE2: # %bb.0: 158; SSE2-NEXT: movl b(%rip), %eax 159; SSE2-NEXT: movdqa c+128(%rip), %xmm0 160; SSE2-NEXT: movdqa c+144(%rip), %xmm2 161; SSE2-NEXT: addl c+128(%rip), %eax 162; SSE2-NEXT: movd %eax, %xmm1 163; SSE2-NEXT: movd %eax, %xmm3 164; SSE2-NEXT: paddd %xmm0, %xmm3 165; SSE2-NEXT: movdqa d+144(%rip), %xmm4 166; SSE2-NEXT: psubd %xmm2, %xmm4 167; SSE2-NEXT: paddd %xmm2, %xmm2 168; SSE2-NEXT: movdqa %xmm0, %xmm5 169; SSE2-NEXT: paddd %xmm0, %xmm5 170; SSE2-NEXT: movss {{.*#+}} xmm5 = xmm3[0],xmm5[1,2,3] 171; SSE2-NEXT: movdqa %xmm2, c+144(%rip) 172; SSE2-NEXT: movaps %xmm5, c+128(%rip) 173; SSE2-NEXT: movdqa c+160(%rip), %xmm2 174; SSE2-NEXT: movdqa c+176(%rip), %xmm3 175; SSE2-NEXT: movdqa d+160(%rip), %xmm5 176; SSE2-NEXT: movdqa d+176(%rip), %xmm6 177; SSE2-NEXT: movdqa d+128(%rip), %xmm7 178; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 179; SSE2-NEXT: psubd %xmm0, %xmm7 180; SSE2-NEXT: psubd %xmm3, %xmm6 181; SSE2-NEXT: psubd %xmm2, %xmm5 182; SSE2-NEXT: movdqa %xmm5, d+160(%rip) 183; SSE2-NEXT: movdqa %xmm6, d+176(%rip) 184; SSE2-NEXT: movdqa %xmm4, d+144(%rip) 185; SSE2-NEXT: movdqa %xmm7, d+128(%rip) 186; SSE2-NEXT: paddd %xmm3, %xmm3 187; SSE2-NEXT: paddd %xmm2, %xmm2 188; SSE2-NEXT: movdqa %xmm2, c+160(%rip) 189; SSE2-NEXT: movdqa %xmm3, c+176(%rip) 190; SSE2-NEXT: retq 191; 192; SSE42-LABEL: PR42833: 193; SSE42: # %bb.0: 194; SSE42-NEXT: movl b(%rip), %eax 195; SSE42-NEXT: movdqa c+128(%rip), %xmm0 196; SSE42-NEXT: movdqa c+144(%rip), %xmm1 197; SSE42-NEXT: addl c+128(%rip), %eax 198; SSE42-NEXT: movd %eax, %xmm2 199; SSE42-NEXT: paddd %xmm0, %xmm2 200; SSE42-NEXT: movdqa d+144(%rip), %xmm3 201; SSE42-NEXT: psubd %xmm1, %xmm3 202; SSE42-NEXT: paddd %xmm1, %xmm1 203; SSE42-NEXT: movdqa %xmm0, %xmm4 204; SSE42-NEXT: paddd %xmm0, %xmm4 205; SSE42-NEXT: pblendw {{.*#+}} xmm4 = xmm2[0,1],xmm4[2,3,4,5,6,7] 206; SSE42-NEXT: movdqa %xmm1, c+144(%rip) 207; SSE42-NEXT: movdqa %xmm4, c+128(%rip) 208; SSE42-NEXT: movdqa c+160(%rip), %xmm1 209; SSE42-NEXT: movdqa c+176(%rip), %xmm2 210; SSE42-NEXT: movdqa d+160(%rip), %xmm4 211; SSE42-NEXT: movdqa d+176(%rip), %xmm5 212; SSE42-NEXT: movdqa d+128(%rip), %xmm6 213; SSE42-NEXT: pinsrd $0, %eax, %xmm0 214; SSE42-NEXT: psubd %xmm0, %xmm6 215; SSE42-NEXT: psubd %xmm2, %xmm5 216; SSE42-NEXT: psubd %xmm1, %xmm4 217; SSE42-NEXT: movdqa %xmm4, d+160(%rip) 218; SSE42-NEXT: movdqa %xmm5, d+176(%rip) 219; SSE42-NEXT: movdqa %xmm3, d+144(%rip) 220; SSE42-NEXT: movdqa %xmm6, d+128(%rip) 221; SSE42-NEXT: paddd %xmm2, %xmm2 222; SSE42-NEXT: paddd %xmm1, %xmm1 223; SSE42-NEXT: movdqa %xmm1, c+160(%rip) 224; SSE42-NEXT: movdqa %xmm2, c+176(%rip) 225; SSE42-NEXT: retq 226; 227; AVX1-LABEL: PR42833: 228; AVX1: # %bb.0: 229; AVX1-NEXT: movl b(%rip), %eax 230; AVX1-NEXT: addl c+128(%rip), %eax 231; AVX1-NEXT: vmovd %eax, %xmm0 232; AVX1-NEXT: vmovdqa c+128(%rip), %xmm1 233; AVX1-NEXT: vpaddd %xmm0, %xmm1, %xmm0 234; AVX1-NEXT: vpaddd %xmm1, %xmm1, %xmm2 235; AVX1-NEXT: vmovdqa c+144(%rip), %xmm3 236; AVX1-NEXT: vpaddd %xmm3, %xmm3, %xmm3 237; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 238; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm2[1,2,3,4,5,6,7] 239; AVX1-NEXT: vmovdqa d+144(%rip), %xmm2 240; AVX1-NEXT: vpsubd c+144(%rip), %xmm2, %xmm2 241; AVX1-NEXT: vmovups %ymm0, c+128(%rip) 242; AVX1-NEXT: vpinsrd $0, %eax, %xmm1, %xmm0 243; AVX1-NEXT: vmovdqa d+128(%rip), %xmm1 244; AVX1-NEXT: vpsubd %xmm0, %xmm1, %xmm0 245; AVX1-NEXT: vmovdqa d+176(%rip), %xmm1 246; AVX1-NEXT: vmovdqa c+176(%rip), %xmm3 247; AVX1-NEXT: vpsubd %xmm3, %xmm1, %xmm1 248; AVX1-NEXT: vmovdqa d+160(%rip), %xmm4 249; AVX1-NEXT: vmovdqa c+160(%rip), %xmm5 250; AVX1-NEXT: vpsubd %xmm5, %xmm4, %xmm4 251; AVX1-NEXT: vmovdqa %xmm2, d+144(%rip) 252; AVX1-NEXT: vmovdqa %xmm4, d+160(%rip) 253; AVX1-NEXT: vmovdqa %xmm1, d+176(%rip) 254; AVX1-NEXT: vmovdqa %xmm0, d+128(%rip) 255; AVX1-NEXT: vpaddd %xmm3, %xmm3, %xmm0 256; AVX1-NEXT: vpaddd %xmm5, %xmm5, %xmm1 257; AVX1-NEXT: vmovdqa %xmm1, c+160(%rip) 258; AVX1-NEXT: vmovdqa %xmm0, c+176(%rip) 259; AVX1-NEXT: vzeroupper 260; AVX1-NEXT: retq 261; 262; AVX2-LABEL: PR42833: 263; AVX2: # %bb.0: 264; AVX2-NEXT: movl b(%rip), %eax 265; AVX2-NEXT: vmovdqu c+128(%rip), %ymm0 266; AVX2-NEXT: addl c+128(%rip), %eax 267; AVX2-NEXT: vmovd %eax, %xmm1 268; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm2 269; AVX2-NEXT: vpaddd %ymm0, %ymm0, %ymm3 270; AVX2-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0],ymm3[1,2,3,4,5,6,7] 271; AVX2-NEXT: vmovdqu %ymm2, c+128(%rip) 272; AVX2-NEXT: vmovdqu c+160(%rip), %ymm2 273; AVX2-NEXT: vmovdqu d+160(%rip), %ymm3 274; AVX2-NEXT: vmovdqu d+128(%rip), %ymm4 275; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6,7] 276; AVX2-NEXT: vpsubd %ymm0, %ymm4, %ymm0 277; AVX2-NEXT: vpsubd %ymm2, %ymm3, %ymm1 278; AVX2-NEXT: vmovdqu %ymm1, d+160(%rip) 279; AVX2-NEXT: vmovdqu %ymm0, d+128(%rip) 280; AVX2-NEXT: vpaddd %ymm2, %ymm2, %ymm0 281; AVX2-NEXT: vmovdqu %ymm0, c+160(%rip) 282; AVX2-NEXT: vzeroupper 283; AVX2-NEXT: retq 284; 285; AVX512-LABEL: PR42833: 286; AVX512: # %bb.0: 287; AVX512-NEXT: movl b(%rip), %eax 288; AVX512-NEXT: vmovdqu c+128(%rip), %ymm0 289; AVX512-NEXT: vmovdqu64 c+128(%rip), %zmm1 290; AVX512-NEXT: addl c+128(%rip), %eax 291; AVX512-NEXT: vmovd %eax, %xmm2 292; AVX512-NEXT: vpaddd %ymm2, %ymm0, %ymm2 293; AVX512-NEXT: vpaddd %ymm0, %ymm0, %ymm0 294; AVX512-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0],ymm0[1,2,3,4,5,6,7] 295; AVX512-NEXT: vmovdqa c+128(%rip), %xmm2 296; AVX512-NEXT: vmovdqu %ymm0, c+128(%rip) 297; AVX512-NEXT: vmovdqu c+160(%rip), %ymm0 298; AVX512-NEXT: vmovdqu64 d+128(%rip), %zmm3 299; AVX512-NEXT: vpinsrd $0, %eax, %xmm2, %xmm2 300; AVX512-NEXT: vinserti32x4 $0, %xmm2, %zmm1, %zmm1 301; AVX512-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm1 302; AVX512-NEXT: vpsubd %zmm1, %zmm3, %zmm1 303; AVX512-NEXT: vmovdqu64 %zmm1, d+128(%rip) 304; AVX512-NEXT: vpaddd %ymm0, %ymm0, %ymm0 305; AVX512-NEXT: vmovdqu %ymm0, c+160(%rip) 306; AVX512-NEXT: vzeroupper 307; AVX512-NEXT: retq 308; 309; XOP-LABEL: PR42833: 310; XOP: # %bb.0: 311; XOP-NEXT: movl b(%rip), %eax 312; XOP-NEXT: addl c+128(%rip), %eax 313; XOP-NEXT: vmovd %eax, %xmm0 314; XOP-NEXT: vmovdqa c+128(%rip), %xmm1 315; XOP-NEXT: vpaddd %xmm0, %xmm1, %xmm0 316; XOP-NEXT: vpaddd %xmm1, %xmm1, %xmm2 317; XOP-NEXT: vmovdqa c+144(%rip), %xmm3 318; XOP-NEXT: vpaddd %xmm3, %xmm3, %xmm3 319; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 320; XOP-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm2[1,2,3,4,5,6,7] 321; XOP-NEXT: vmovdqa d+144(%rip), %xmm2 322; XOP-NEXT: vpsubd c+144(%rip), %xmm2, %xmm2 323; XOP-NEXT: vmovups %ymm0, c+128(%rip) 324; XOP-NEXT: vpinsrd $0, %eax, %xmm1, %xmm0 325; XOP-NEXT: vmovdqa d+128(%rip), %xmm1 326; XOP-NEXT: vpsubd %xmm0, %xmm1, %xmm0 327; XOP-NEXT: vmovdqa d+176(%rip), %xmm1 328; XOP-NEXT: vmovdqa c+176(%rip), %xmm3 329; XOP-NEXT: vpsubd %xmm3, %xmm1, %xmm1 330; XOP-NEXT: vmovdqa d+160(%rip), %xmm4 331; XOP-NEXT: vmovdqa c+160(%rip), %xmm5 332; XOP-NEXT: vpsubd %xmm5, %xmm4, %xmm4 333; XOP-NEXT: vmovdqa %xmm2, d+144(%rip) 334; XOP-NEXT: vmovdqa %xmm4, d+160(%rip) 335; XOP-NEXT: vmovdqa %xmm1, d+176(%rip) 336; XOP-NEXT: vmovdqa %xmm0, d+128(%rip) 337; XOP-NEXT: vpaddd %xmm3, %xmm3, %xmm0 338; XOP-NEXT: vpaddd %xmm5, %xmm5, %xmm1 339; XOP-NEXT: vmovdqa %xmm1, c+160(%rip) 340; XOP-NEXT: vmovdqa %xmm0, c+176(%rip) 341; XOP-NEXT: vzeroupper 342; XOP-NEXT: retq 343 %1 = load i32, ptr @b, align 4 344 %2 = load <8 x i32>, ptr getelementptr inbounds ([49 x i32], ptr @c, i64 0, i64 32), align 16 345 %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <16 x i32> <i32 undef, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 346 %4 = extractelement <8 x i32> %2, i32 0 347 %5 = add i32 %1, %4 348 %6 = insertelement <8 x i32> <i32 undef, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, i32 %5, i32 0 349 %7 = add <8 x i32> %2, %6 350 %8 = shl <8 x i32> %2, %6 351 %9 = shufflevector <8 x i32> %7, <8 x i32> %8, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 352 store <8 x i32> %9, ptr getelementptr inbounds ([49 x i32], ptr @c, i64 0, i64 32), align 16 353 %10 = load <8 x i32>, ptr getelementptr inbounds ([49 x i32], ptr @c, i64 0, i64 40), align 16 354 %11 = shufflevector <8 x i32> %10, <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 355 %12 = load <16 x i32>, ptr getelementptr inbounds ([49 x i32], ptr @d, i64 0, i64 32), align 16 356 %13 = insertelement <16 x i32> %3, i32 %5, i32 0 357 %14 = shufflevector <16 x i32> %13, <16 x i32> %11, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> 358 %15 = sub <16 x i32> %12, %14 359 store <16 x i32> %15, ptr getelementptr inbounds ([49 x i32], ptr @d, i64 0, i64 32), align 16 360 %16 = shl <8 x i32> %10, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 361 store <8 x i32> %16, ptr getelementptr inbounds ([49 x i32], ptr @c, i64 0, i64 40), align 16 362 ret void 363} 364