1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=i686-none-linux -fast-isel -fast-isel-abort=1 -mattr=+sse2 < %s | FileCheck %s --check-prefixes=X86,X86-SSE 3; RUN: llc -mtriple=x86_64-none-linux -fast-isel -fast-isel-abort=1 -mattr=+sse2 < %s | FileCheck %s --check-prefixes=X64,X64-SSE 4; RUN: llc -mtriple=i686-none-linux -fast-isel -fast-isel-abort=1 -mattr=+avx < %s | FileCheck %s --check-prefixes=X86,X86-AVX,X86-AVX1 5; RUN: llc -mtriple=x86_64-none-linux -fast-isel -fast-isel-abort=1 -mattr=+avx < %s | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX1 6; RUN: llc -mtriple=i686-none-linux -fast-isel -fast-isel-abort=1 -mattr=+avx512f < %s | FileCheck %s --check-prefixes=X86,X86-AVX,X86-AVX512 7; RUN: llc -mtriple=x86_64-none-linux -fast-isel -fast-isel-abort=1 -mattr=+avx512f < %s | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX512 8; RUN: llc -mtriple=i686-none-linux -fast-isel -fast-isel-abort=1 -mattr=+avx512f,+avx512dq,+avx512bw < %s | FileCheck %s --check-prefixes=X86,X86-AVX,X86-AVX512 9; RUN: llc -mtriple=x86_64-none-linux -fast-isel -fast-isel-abort=1 -mattr=+avx512vl,+avx512dq,+avx512bw < %s | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX512 10 11define i32 @test_store_32(ptr nocapture %addr, i32 %value) nounwind { 12; X86-LABEL: test_store_32: 13; X86: # %bb.0: # %entry 14; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 15; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 16; X86-NEXT: movl %eax, (%ecx) 17; X86-NEXT: retl 18; 19; X64-LABEL: test_store_32: 20; X64: # %bb.0: # %entry 21; X64-NEXT: movl %esi, %eax 22; X64-NEXT: movl %esi, (%rdi) 23; X64-NEXT: retq 24entry: 25 store i32 %value, ptr %addr, align 1 26 ret i32 %value 27} 28 29define i16 @test_store_16(ptr nocapture %addr, i16 %value) nounwind { 30; X86-LABEL: test_store_16: 31; X86: # %bb.0: # %entry 32; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax 33; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 34; X86-NEXT: movw %ax, (%ecx) 35; X86-NEXT: retl 36; 37; X64-LABEL: test_store_16: 38; X64: # %bb.0: # %entry 39; X64-NEXT: movl %esi, %eax 40; X64-NEXT: movw %ax, (%rdi) 41; X64-NEXT: # kill: def $ax killed $ax killed $eax 42; X64-NEXT: retq 43entry: 44 store i16 %value, ptr %addr, align 1 45 ret i16 %value 46} 47 48define <4 x i32> @test_store_4xi32(ptr nocapture %addr, <4 x i32> %value, <4 x i32> %value2) nounwind { 49; X86-SSE-LABEL: test_store_4xi32: 50; X86-SSE: # %bb.0: 51; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 52; X86-SSE-NEXT: paddd %xmm1, %xmm0 53; X86-SSE-NEXT: movdqu %xmm0, (%eax) 54; X86-SSE-NEXT: retl 55; 56; X64-SSE-LABEL: test_store_4xi32: 57; X64-SSE: # %bb.0: 58; X64-SSE-NEXT: paddd %xmm1, %xmm0 59; X64-SSE-NEXT: movdqu %xmm0, (%rdi) 60; X64-SSE-NEXT: retq 61; 62; X86-AVX-LABEL: test_store_4xi32: 63; X86-AVX: # %bb.0: 64; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 65; X86-AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 66; X86-AVX-NEXT: vmovdqu %xmm0, (%eax) 67; X86-AVX-NEXT: retl 68; 69; X64-AVX-LABEL: test_store_4xi32: 70; X64-AVX: # %bb.0: 71; X64-AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 72; X64-AVX-NEXT: vmovdqu %xmm0, (%rdi) 73; X64-AVX-NEXT: retq 74 %foo = add <4 x i32> %value, %value2 ; to force integer type on store 75 store <4 x i32> %foo, ptr %addr, align 1 76 ret <4 x i32> %foo 77} 78 79define <4 x i32> @test_store_4xi32_aligned(ptr nocapture %addr, <4 x i32> %value, <4 x i32> %value2) nounwind { 80; X86-SSE-LABEL: test_store_4xi32_aligned: 81; X86-SSE: # %bb.0: 82; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 83; X86-SSE-NEXT: paddd %xmm1, %xmm0 84; X86-SSE-NEXT: movdqa %xmm0, (%eax) 85; X86-SSE-NEXT: retl 86; 87; X64-SSE-LABEL: test_store_4xi32_aligned: 88; X64-SSE: # %bb.0: 89; X64-SSE-NEXT: paddd %xmm1, %xmm0 90; X64-SSE-NEXT: movdqa %xmm0, (%rdi) 91; X64-SSE-NEXT: retq 92; 93; X86-AVX-LABEL: test_store_4xi32_aligned: 94; X86-AVX: # %bb.0: 95; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 96; X86-AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 97; X86-AVX-NEXT: vmovdqa %xmm0, (%eax) 98; X86-AVX-NEXT: retl 99; 100; X64-AVX-LABEL: test_store_4xi32_aligned: 101; X64-AVX: # %bb.0: 102; X64-AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 103; X64-AVX-NEXT: vmovdqa %xmm0, (%rdi) 104; X64-AVX-NEXT: retq 105 %foo = add <4 x i32> %value, %value2 ; to force integer type on store 106 store <4 x i32> %foo, ptr %addr, align 16 107 ret <4 x i32> %foo 108} 109 110define <4 x float> @test_store_4xf32(ptr nocapture %addr, <4 x float> %value) nounwind { 111; X86-SSE-LABEL: test_store_4xf32: 112; X86-SSE: # %bb.0: 113; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 114; X86-SSE-NEXT: movups %xmm0, (%eax) 115; X86-SSE-NEXT: retl 116; 117; X64-SSE-LABEL: test_store_4xf32: 118; X64-SSE: # %bb.0: 119; X64-SSE-NEXT: movups %xmm0, (%rdi) 120; X64-SSE-NEXT: retq 121; 122; X86-AVX-LABEL: test_store_4xf32: 123; X86-AVX: # %bb.0: 124; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 125; X86-AVX-NEXT: vmovups %xmm0, (%eax) 126; X86-AVX-NEXT: retl 127; 128; X64-AVX-LABEL: test_store_4xf32: 129; X64-AVX: # %bb.0: 130; X64-AVX-NEXT: vmovups %xmm0, (%rdi) 131; X64-AVX-NEXT: retq 132 store <4 x float> %value, ptr %addr, align 1 133 ret <4 x float> %value 134} 135 136define <4 x float> @test_store_4xf32_aligned(ptr nocapture %addr, <4 x float> %value) nounwind { 137; X86-SSE-LABEL: test_store_4xf32_aligned: 138; X86-SSE: # %bb.0: 139; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 140; X86-SSE-NEXT: movaps %xmm0, (%eax) 141; X86-SSE-NEXT: retl 142; 143; X64-SSE-LABEL: test_store_4xf32_aligned: 144; X64-SSE: # %bb.0: 145; X64-SSE-NEXT: movaps %xmm0, (%rdi) 146; X64-SSE-NEXT: retq 147; 148; X86-AVX-LABEL: test_store_4xf32_aligned: 149; X86-AVX: # %bb.0: 150; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 151; X86-AVX-NEXT: vmovaps %xmm0, (%eax) 152; X86-AVX-NEXT: retl 153; 154; X64-AVX-LABEL: test_store_4xf32_aligned: 155; X64-AVX: # %bb.0: 156; X64-AVX-NEXT: vmovaps %xmm0, (%rdi) 157; X64-AVX-NEXT: retq 158 store <4 x float> %value, ptr %addr, align 16 159 ret <4 x float> %value 160} 161 162define <2 x double> @test_store_2xf64(ptr nocapture %addr, <2 x double> %value, <2 x double> %value2) nounwind { 163; X86-SSE-LABEL: test_store_2xf64: 164; X86-SSE: # %bb.0: 165; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 166; X86-SSE-NEXT: addpd %xmm1, %xmm0 167; X86-SSE-NEXT: movupd %xmm0, (%eax) 168; X86-SSE-NEXT: retl 169; 170; X64-SSE-LABEL: test_store_2xf64: 171; X64-SSE: # %bb.0: 172; X64-SSE-NEXT: addpd %xmm1, %xmm0 173; X64-SSE-NEXT: movupd %xmm0, (%rdi) 174; X64-SSE-NEXT: retq 175; 176; X86-AVX-LABEL: test_store_2xf64: 177; X86-AVX: # %bb.0: 178; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 179; X86-AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 180; X86-AVX-NEXT: vmovupd %xmm0, (%eax) 181; X86-AVX-NEXT: retl 182; 183; X64-AVX-LABEL: test_store_2xf64: 184; X64-AVX: # %bb.0: 185; X64-AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 186; X64-AVX-NEXT: vmovupd %xmm0, (%rdi) 187; X64-AVX-NEXT: retq 188 %foo = fadd <2 x double> %value, %value2 ; to force dobule type on store 189 store <2 x double> %foo, ptr %addr, align 1 190 ret <2 x double> %foo 191} 192 193define <2 x double> @test_store_2xf64_aligned(ptr nocapture %addr, <2 x double> %value, <2 x double> %value2) nounwind { 194; X86-SSE-LABEL: test_store_2xf64_aligned: 195; X86-SSE: # %bb.0: 196; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 197; X86-SSE-NEXT: addpd %xmm1, %xmm0 198; X86-SSE-NEXT: movapd %xmm0, (%eax) 199; X86-SSE-NEXT: retl 200; 201; X64-SSE-LABEL: test_store_2xf64_aligned: 202; X64-SSE: # %bb.0: 203; X64-SSE-NEXT: addpd %xmm1, %xmm0 204; X64-SSE-NEXT: movapd %xmm0, (%rdi) 205; X64-SSE-NEXT: retq 206; 207; X86-AVX-LABEL: test_store_2xf64_aligned: 208; X86-AVX: # %bb.0: 209; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 210; X86-AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 211; X86-AVX-NEXT: vmovapd %xmm0, (%eax) 212; X86-AVX-NEXT: retl 213; 214; X64-AVX-LABEL: test_store_2xf64_aligned: 215; X64-AVX: # %bb.0: 216; X64-AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 217; X64-AVX-NEXT: vmovapd %xmm0, (%rdi) 218; X64-AVX-NEXT: retq 219 %foo = fadd <2 x double> %value, %value2 ; to force dobule type on store 220 store <2 x double> %foo, ptr %addr, align 16 221 ret <2 x double> %foo 222} 223 224define <8 x i32> @test_store_8xi32(ptr nocapture %addr, <8 x i32> %value) nounwind { 225; X86-SSE-LABEL: test_store_8xi32: 226; X86-SSE: # %bb.0: 227; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 228; X86-SSE-NEXT: movups %xmm0, (%eax) 229; X86-SSE-NEXT: movups %xmm1, 16(%eax) 230; X86-SSE-NEXT: retl 231; 232; X64-SSE-LABEL: test_store_8xi32: 233; X64-SSE: # %bb.0: 234; X64-SSE-NEXT: movups %xmm0, (%rdi) 235; X64-SSE-NEXT: movups %xmm1, 16(%rdi) 236; X64-SSE-NEXT: retq 237; 238; X86-AVX-LABEL: test_store_8xi32: 239; X86-AVX: # %bb.0: 240; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 241; X86-AVX-NEXT: vmovups %ymm0, (%eax) 242; X86-AVX-NEXT: retl 243; 244; X64-AVX-LABEL: test_store_8xi32: 245; X64-AVX: # %bb.0: 246; X64-AVX-NEXT: vmovups %ymm0, (%rdi) 247; X64-AVX-NEXT: retq 248 store <8 x i32> %value, ptr %addr, align 1 249 ret <8 x i32> %value 250} 251 252define <8 x i32> @test_store_8xi32_aligned(ptr nocapture %addr, <8 x i32> %value) nounwind { 253; X86-SSE-LABEL: test_store_8xi32_aligned: 254; X86-SSE: # %bb.0: 255; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 256; X86-SSE-NEXT: movaps %xmm0, (%eax) 257; X86-SSE-NEXT: movaps %xmm1, 16(%eax) 258; X86-SSE-NEXT: retl 259; 260; X64-SSE-LABEL: test_store_8xi32_aligned: 261; X64-SSE: # %bb.0: 262; X64-SSE-NEXT: movaps %xmm0, (%rdi) 263; X64-SSE-NEXT: movaps %xmm1, 16(%rdi) 264; X64-SSE-NEXT: retq 265; 266; X86-AVX-LABEL: test_store_8xi32_aligned: 267; X86-AVX: # %bb.0: 268; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 269; X86-AVX-NEXT: vmovaps %ymm0, (%eax) 270; X86-AVX-NEXT: retl 271; 272; X64-AVX-LABEL: test_store_8xi32_aligned: 273; X64-AVX: # %bb.0: 274; X64-AVX-NEXT: vmovaps %ymm0, (%rdi) 275; X64-AVX-NEXT: retq 276 store <8 x i32> %value, ptr %addr, align 32 277 ret <8 x i32> %value 278} 279 280define <8 x float> @test_store_8xf32(ptr nocapture %addr, <8 x float> %value) nounwind { 281; X86-SSE-LABEL: test_store_8xf32: 282; X86-SSE: # %bb.0: 283; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 284; X86-SSE-NEXT: movups %xmm0, (%eax) 285; X86-SSE-NEXT: movups %xmm1, 16(%eax) 286; X86-SSE-NEXT: retl 287; 288; X64-SSE-LABEL: test_store_8xf32: 289; X64-SSE: # %bb.0: 290; X64-SSE-NEXT: movups %xmm0, (%rdi) 291; X64-SSE-NEXT: movups %xmm1, 16(%rdi) 292; X64-SSE-NEXT: retq 293; 294; X86-AVX-LABEL: test_store_8xf32: 295; X86-AVX: # %bb.0: 296; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 297; X86-AVX-NEXT: vmovups %ymm0, (%eax) 298; X86-AVX-NEXT: retl 299; 300; X64-AVX-LABEL: test_store_8xf32: 301; X64-AVX: # %bb.0: 302; X64-AVX-NEXT: vmovups %ymm0, (%rdi) 303; X64-AVX-NEXT: retq 304 store <8 x float> %value, ptr %addr, align 1 305 ret <8 x float> %value 306} 307 308define <8 x float> @test_store_8xf32_aligned(ptr nocapture %addr, <8 x float> %value) nounwind { 309; X86-SSE-LABEL: test_store_8xf32_aligned: 310; X86-SSE: # %bb.0: 311; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 312; X86-SSE-NEXT: movaps %xmm0, (%eax) 313; X86-SSE-NEXT: movaps %xmm1, 16(%eax) 314; X86-SSE-NEXT: retl 315; 316; X64-SSE-LABEL: test_store_8xf32_aligned: 317; X64-SSE: # %bb.0: 318; X64-SSE-NEXT: movaps %xmm0, (%rdi) 319; X64-SSE-NEXT: movaps %xmm1, 16(%rdi) 320; X64-SSE-NEXT: retq 321; 322; X86-AVX-LABEL: test_store_8xf32_aligned: 323; X86-AVX: # %bb.0: 324; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 325; X86-AVX-NEXT: vmovaps %ymm0, (%eax) 326; X86-AVX-NEXT: retl 327; 328; X64-AVX-LABEL: test_store_8xf32_aligned: 329; X64-AVX: # %bb.0: 330; X64-AVX-NEXT: vmovaps %ymm0, (%rdi) 331; X64-AVX-NEXT: retq 332 store <8 x float> %value, ptr %addr, align 32 333 ret <8 x float> %value 334} 335 336define <4 x double> @test_store_4xf64(ptr nocapture %addr, <4 x double> %value, <4 x double> %value2) nounwind { 337; X86-SSE-LABEL: test_store_4xf64: 338; X86-SSE: # %bb.0: 339; X86-SSE-NEXT: subl $12, %esp 340; X86-SSE-NEXT: movapd {{[0-9]+}}(%esp), %xmm3 341; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 342; X86-SSE-NEXT: addpd %xmm2, %xmm0 343; X86-SSE-NEXT: movupd %xmm0, (%eax) 344; X86-SSE-NEXT: addpd %xmm3, %xmm1 345; X86-SSE-NEXT: movupd %xmm1, 16(%eax) 346; X86-SSE-NEXT: addl $12, %esp 347; X86-SSE-NEXT: retl 348; 349; X64-SSE-LABEL: test_store_4xf64: 350; X64-SSE: # %bb.0: 351; X64-SSE-NEXT: addpd %xmm2, %xmm0 352; X64-SSE-NEXT: movupd %xmm0, (%rdi) 353; X64-SSE-NEXT: addpd %xmm3, %xmm1 354; X64-SSE-NEXT: movupd %xmm1, 16(%rdi) 355; X64-SSE-NEXT: retq 356; 357; X86-AVX-LABEL: test_store_4xf64: 358; X86-AVX: # %bb.0: 359; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 360; X86-AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 361; X86-AVX-NEXT: vmovupd %ymm0, (%eax) 362; X86-AVX-NEXT: retl 363; 364; X64-AVX-LABEL: test_store_4xf64: 365; X64-AVX: # %bb.0: 366; X64-AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 367; X64-AVX-NEXT: vmovupd %ymm0, (%rdi) 368; X64-AVX-NEXT: retq 369 %foo = fadd <4 x double> %value, %value2 ; to force dobule type on store 370 store <4 x double> %foo, ptr %addr, align 1 371 ret <4 x double> %foo 372} 373 374define <4 x double> @test_store_4xf64_aligned(ptr nocapture %addr, <4 x double> %value, <4 x double> %value2) nounwind { 375; X86-SSE-LABEL: test_store_4xf64_aligned: 376; X86-SSE: # %bb.0: 377; X86-SSE-NEXT: subl $12, %esp 378; X86-SSE-NEXT: movapd {{[0-9]+}}(%esp), %xmm3 379; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 380; X86-SSE-NEXT: addpd %xmm2, %xmm0 381; X86-SSE-NEXT: movapd %xmm0, (%eax) 382; X86-SSE-NEXT: addpd %xmm3, %xmm1 383; X86-SSE-NEXT: movapd %xmm1, 16(%eax) 384; X86-SSE-NEXT: addl $12, %esp 385; X86-SSE-NEXT: retl 386; 387; X64-SSE-LABEL: test_store_4xf64_aligned: 388; X64-SSE: # %bb.0: 389; X64-SSE-NEXT: addpd %xmm2, %xmm0 390; X64-SSE-NEXT: movapd %xmm0, (%rdi) 391; X64-SSE-NEXT: addpd %xmm3, %xmm1 392; X64-SSE-NEXT: movapd %xmm1, 16(%rdi) 393; X64-SSE-NEXT: retq 394; 395; X86-AVX-LABEL: test_store_4xf64_aligned: 396; X86-AVX: # %bb.0: 397; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 398; X86-AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 399; X86-AVX-NEXT: vmovapd %ymm0, (%eax) 400; X86-AVX-NEXT: retl 401; 402; X64-AVX-LABEL: test_store_4xf64_aligned: 403; X64-AVX: # %bb.0: 404; X64-AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 405; X64-AVX-NEXT: vmovapd %ymm0, (%rdi) 406; X64-AVX-NEXT: retq 407 %foo = fadd <4 x double> %value, %value2 ; to force dobule type on store 408 store <4 x double> %foo, ptr %addr, align 32 409 ret <4 x double> %foo 410} 411 412define <16 x i32> @test_store_16xi32(ptr nocapture %addr, <16 x i32> %value) nounwind { 413; X86-SSE-LABEL: test_store_16xi32: 414; X86-SSE: # %bb.0: 415; X86-SSE-NEXT: subl $12, %esp 416; X86-SSE-NEXT: movaps {{[0-9]+}}(%esp), %xmm3 417; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 418; X86-SSE-NEXT: movups %xmm0, (%eax) 419; X86-SSE-NEXT: movups %xmm1, 16(%eax) 420; X86-SSE-NEXT: movups %xmm2, 32(%eax) 421; X86-SSE-NEXT: movups %xmm3, 48(%eax) 422; X86-SSE-NEXT: addl $12, %esp 423; X86-SSE-NEXT: retl 424; 425; X64-SSE-LABEL: test_store_16xi32: 426; X64-SSE: # %bb.0: 427; X64-SSE-NEXT: movups %xmm0, (%rdi) 428; X64-SSE-NEXT: movups %xmm1, 16(%rdi) 429; X64-SSE-NEXT: movups %xmm2, 32(%rdi) 430; X64-SSE-NEXT: movups %xmm3, 48(%rdi) 431; X64-SSE-NEXT: retq 432; 433; X86-AVX1-LABEL: test_store_16xi32: 434; X86-AVX1: # %bb.0: 435; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax 436; X86-AVX1-NEXT: vmovups %ymm0, (%eax) 437; X86-AVX1-NEXT: vmovups %ymm1, 32(%eax) 438; X86-AVX1-NEXT: retl 439; 440; X64-AVX1-LABEL: test_store_16xi32: 441; X64-AVX1: # %bb.0: 442; X64-AVX1-NEXT: vmovups %ymm0, (%rdi) 443; X64-AVX1-NEXT: vmovups %ymm1, 32(%rdi) 444; X64-AVX1-NEXT: retq 445; 446; X86-AVX512-LABEL: test_store_16xi32: 447; X86-AVX512: # %bb.0: 448; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax 449; X86-AVX512-NEXT: vmovups %zmm0, (%eax) 450; X86-AVX512-NEXT: retl 451; 452; X64-AVX512-LABEL: test_store_16xi32: 453; X64-AVX512: # %bb.0: 454; X64-AVX512-NEXT: vmovups %zmm0, (%rdi) 455; X64-AVX512-NEXT: retq 456 store <16 x i32> %value, ptr %addr, align 1 457 ret <16 x i32> %value 458} 459 460define <16 x i32> @test_store_16xi32_aligned(ptr nocapture %addr, <16 x i32> %value) nounwind { 461; X86-SSE-LABEL: test_store_16xi32_aligned: 462; X86-SSE: # %bb.0: 463; X86-SSE-NEXT: subl $12, %esp 464; X86-SSE-NEXT: movaps {{[0-9]+}}(%esp), %xmm3 465; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 466; X86-SSE-NEXT: movaps %xmm0, (%eax) 467; X86-SSE-NEXT: movaps %xmm1, 16(%eax) 468; X86-SSE-NEXT: movaps %xmm2, 32(%eax) 469; X86-SSE-NEXT: movaps %xmm3, 48(%eax) 470; X86-SSE-NEXT: addl $12, %esp 471; X86-SSE-NEXT: retl 472; 473; X64-SSE-LABEL: test_store_16xi32_aligned: 474; X64-SSE: # %bb.0: 475; X64-SSE-NEXT: movaps %xmm0, (%rdi) 476; X64-SSE-NEXT: movaps %xmm1, 16(%rdi) 477; X64-SSE-NEXT: movaps %xmm2, 32(%rdi) 478; X64-SSE-NEXT: movaps %xmm3, 48(%rdi) 479; X64-SSE-NEXT: retq 480; 481; X86-AVX1-LABEL: test_store_16xi32_aligned: 482; X86-AVX1: # %bb.0: 483; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax 484; X86-AVX1-NEXT: vmovaps %ymm0, (%eax) 485; X86-AVX1-NEXT: vmovaps %ymm1, 32(%eax) 486; X86-AVX1-NEXT: retl 487; 488; X64-AVX1-LABEL: test_store_16xi32_aligned: 489; X64-AVX1: # %bb.0: 490; X64-AVX1-NEXT: vmovaps %ymm0, (%rdi) 491; X64-AVX1-NEXT: vmovaps %ymm1, 32(%rdi) 492; X64-AVX1-NEXT: retq 493; 494; X86-AVX512-LABEL: test_store_16xi32_aligned: 495; X86-AVX512: # %bb.0: 496; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax 497; X86-AVX512-NEXT: vmovaps %zmm0, (%eax) 498; X86-AVX512-NEXT: retl 499; 500; X64-AVX512-LABEL: test_store_16xi32_aligned: 501; X64-AVX512: # %bb.0: 502; X64-AVX512-NEXT: vmovaps %zmm0, (%rdi) 503; X64-AVX512-NEXT: retq 504 store <16 x i32> %value, ptr %addr, align 64 505 ret <16 x i32> %value 506} 507 508define <16 x float> @test_store_16xf32(ptr nocapture %addr, <16 x float> %value) nounwind { 509; X86-SSE-LABEL: test_store_16xf32: 510; X86-SSE: # %bb.0: 511; X86-SSE-NEXT: subl $12, %esp 512; X86-SSE-NEXT: movaps {{[0-9]+}}(%esp), %xmm3 513; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 514; X86-SSE-NEXT: movups %xmm0, (%eax) 515; X86-SSE-NEXT: movups %xmm1, 16(%eax) 516; X86-SSE-NEXT: movups %xmm2, 32(%eax) 517; X86-SSE-NEXT: movups %xmm3, 48(%eax) 518; X86-SSE-NEXT: addl $12, %esp 519; X86-SSE-NEXT: retl 520; 521; X64-SSE-LABEL: test_store_16xf32: 522; X64-SSE: # %bb.0: 523; X64-SSE-NEXT: movups %xmm0, (%rdi) 524; X64-SSE-NEXT: movups %xmm1, 16(%rdi) 525; X64-SSE-NEXT: movups %xmm2, 32(%rdi) 526; X64-SSE-NEXT: movups %xmm3, 48(%rdi) 527; X64-SSE-NEXT: retq 528; 529; X86-AVX1-LABEL: test_store_16xf32: 530; X86-AVX1: # %bb.0: 531; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax 532; X86-AVX1-NEXT: vmovups %ymm0, (%eax) 533; X86-AVX1-NEXT: vmovups %ymm1, 32(%eax) 534; X86-AVX1-NEXT: retl 535; 536; X64-AVX1-LABEL: test_store_16xf32: 537; X64-AVX1: # %bb.0: 538; X64-AVX1-NEXT: vmovups %ymm0, (%rdi) 539; X64-AVX1-NEXT: vmovups %ymm1, 32(%rdi) 540; X64-AVX1-NEXT: retq 541; 542; X86-AVX512-LABEL: test_store_16xf32: 543; X86-AVX512: # %bb.0: 544; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax 545; X86-AVX512-NEXT: vmovups %zmm0, (%eax) 546; X86-AVX512-NEXT: retl 547; 548; X64-AVX512-LABEL: test_store_16xf32: 549; X64-AVX512: # %bb.0: 550; X64-AVX512-NEXT: vmovups %zmm0, (%rdi) 551; X64-AVX512-NEXT: retq 552 store <16 x float> %value, ptr %addr, align 1 553 ret <16 x float> %value 554} 555 556define <16 x float> @test_store_16xf32_aligned(ptr nocapture %addr, <16 x float> %value) nounwind { 557; X86-SSE-LABEL: test_store_16xf32_aligned: 558; X86-SSE: # %bb.0: 559; X86-SSE-NEXT: subl $12, %esp 560; X86-SSE-NEXT: movaps {{[0-9]+}}(%esp), %xmm3 561; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 562; X86-SSE-NEXT: movaps %xmm0, (%eax) 563; X86-SSE-NEXT: movaps %xmm1, 16(%eax) 564; X86-SSE-NEXT: movaps %xmm2, 32(%eax) 565; X86-SSE-NEXT: movaps %xmm3, 48(%eax) 566; X86-SSE-NEXT: addl $12, %esp 567; X86-SSE-NEXT: retl 568; 569; X64-SSE-LABEL: test_store_16xf32_aligned: 570; X64-SSE: # %bb.0: 571; X64-SSE-NEXT: movaps %xmm0, (%rdi) 572; X64-SSE-NEXT: movaps %xmm1, 16(%rdi) 573; X64-SSE-NEXT: movaps %xmm2, 32(%rdi) 574; X64-SSE-NEXT: movaps %xmm3, 48(%rdi) 575; X64-SSE-NEXT: retq 576; 577; X86-AVX1-LABEL: test_store_16xf32_aligned: 578; X86-AVX1: # %bb.0: 579; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax 580; X86-AVX1-NEXT: vmovaps %ymm0, (%eax) 581; X86-AVX1-NEXT: vmovaps %ymm1, 32(%eax) 582; X86-AVX1-NEXT: retl 583; 584; X64-AVX1-LABEL: test_store_16xf32_aligned: 585; X64-AVX1: # %bb.0: 586; X64-AVX1-NEXT: vmovaps %ymm0, (%rdi) 587; X64-AVX1-NEXT: vmovaps %ymm1, 32(%rdi) 588; X64-AVX1-NEXT: retq 589; 590; X86-AVX512-LABEL: test_store_16xf32_aligned: 591; X86-AVX512: # %bb.0: 592; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax 593; X86-AVX512-NEXT: vmovaps %zmm0, (%eax) 594; X86-AVX512-NEXT: retl 595; 596; X64-AVX512-LABEL: test_store_16xf32_aligned: 597; X64-AVX512: # %bb.0: 598; X64-AVX512-NEXT: vmovaps %zmm0, (%rdi) 599; X64-AVX512-NEXT: retq 600 store <16 x float> %value, ptr %addr, align 64 601 ret <16 x float> %value 602} 603 604define <8 x double> @test_store_8xf64(ptr nocapture %addr, <8 x double> %value, <8 x double> %value2) nounwind { 605; X86-SSE-LABEL: test_store_8xf64: 606; X86-SSE: # %bb.0: 607; X86-SSE-NEXT: subl $12, %esp 608; X86-SSE-NEXT: movapd {{[0-9]+}}(%esp), %xmm4 609; X86-SSE-NEXT: movapd {{[0-9]+}}(%esp), %xmm5 610; X86-SSE-NEXT: movapd {{[0-9]+}}(%esp), %xmm6 611; X86-SSE-NEXT: movapd {{[0-9]+}}(%esp), %xmm3 612; X86-SSE-NEXT: addpd %xmm4, %xmm3 613; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 614; X86-SSE-NEXT: addpd {{[0-9]+}}(%esp), %xmm0 615; X86-SSE-NEXT: movupd %xmm0, (%eax) 616; X86-SSE-NEXT: addpd %xmm6, %xmm1 617; X86-SSE-NEXT: movupd %xmm1, 16(%eax) 618; X86-SSE-NEXT: addpd %xmm5, %xmm2 619; X86-SSE-NEXT: movupd %xmm2, 32(%eax) 620; X86-SSE-NEXT: movupd %xmm3, 48(%eax) 621; X86-SSE-NEXT: addl $12, %esp 622; X86-SSE-NEXT: retl 623; 624; X64-SSE-LABEL: test_store_8xf64: 625; X64-SSE: # %bb.0: 626; X64-SSE-NEXT: addpd %xmm4, %xmm0 627; X64-SSE-NEXT: movupd %xmm0, (%rdi) 628; X64-SSE-NEXT: addpd %xmm5, %xmm1 629; X64-SSE-NEXT: movupd %xmm1, 16(%rdi) 630; X64-SSE-NEXT: addpd %xmm6, %xmm2 631; X64-SSE-NEXT: movupd %xmm2, 32(%rdi) 632; X64-SSE-NEXT: addpd %xmm7, %xmm3 633; X64-SSE-NEXT: movupd %xmm3, 48(%rdi) 634; X64-SSE-NEXT: retq 635; 636; X86-AVX1-LABEL: test_store_8xf64: 637; X86-AVX1: # %bb.0: 638; X86-AVX1-NEXT: pushl %ebp 639; X86-AVX1-NEXT: movl %esp, %ebp 640; X86-AVX1-NEXT: andl $-32, %esp 641; X86-AVX1-NEXT: subl $32, %esp 642; X86-AVX1-NEXT: vmovapd 40(%ebp), %ymm3 643; X86-AVX1-NEXT: movl 8(%ebp), %eax 644; X86-AVX1-NEXT: vaddpd %ymm2, %ymm0, %ymm0 645; X86-AVX1-NEXT: vmovupd %ymm0, (%eax) 646; X86-AVX1-NEXT: vaddpd %ymm3, %ymm1, %ymm1 647; X86-AVX1-NEXT: vmovupd %ymm1, 32(%eax) 648; X86-AVX1-NEXT: movl %ebp, %esp 649; X86-AVX1-NEXT: popl %ebp 650; X86-AVX1-NEXT: retl 651; 652; X64-AVX1-LABEL: test_store_8xf64: 653; X64-AVX1: # %bb.0: 654; X64-AVX1-NEXT: vaddpd %ymm2, %ymm0, %ymm0 655; X64-AVX1-NEXT: vmovupd %ymm0, (%rdi) 656; X64-AVX1-NEXT: vaddpd %ymm3, %ymm1, %ymm1 657; X64-AVX1-NEXT: vmovupd %ymm1, 32(%rdi) 658; X64-AVX1-NEXT: retq 659; 660; X86-AVX512-LABEL: test_store_8xf64: 661; X86-AVX512: # %bb.0: 662; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax 663; X86-AVX512-NEXT: vaddpd %zmm1, %zmm0, %zmm0 664; X86-AVX512-NEXT: vmovupd %zmm0, (%eax) 665; X86-AVX512-NEXT: retl 666; 667; X64-AVX512-LABEL: test_store_8xf64: 668; X64-AVX512: # %bb.0: 669; X64-AVX512-NEXT: vaddpd %zmm1, %zmm0, %zmm0 670; X64-AVX512-NEXT: vmovupd %zmm0, (%rdi) 671; X64-AVX512-NEXT: retq 672 %foo = fadd <8 x double> %value, %value2 ; to force dobule type on store 673 store <8 x double> %foo, ptr %addr, align 1 674 ret <8 x double> %foo 675} 676 677define <8 x double> @test_store_8xf64_aligned(ptr nocapture %addr, <8 x double> %value, <8 x double> %value2) nounwind { 678; X86-SSE-LABEL: test_store_8xf64_aligned: 679; X86-SSE: # %bb.0: 680; X86-SSE-NEXT: subl $12, %esp 681; X86-SSE-NEXT: movapd {{[0-9]+}}(%esp), %xmm4 682; X86-SSE-NEXT: movapd {{[0-9]+}}(%esp), %xmm5 683; X86-SSE-NEXT: movapd {{[0-9]+}}(%esp), %xmm6 684; X86-SSE-NEXT: movapd {{[0-9]+}}(%esp), %xmm3 685; X86-SSE-NEXT: addpd %xmm4, %xmm3 686; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 687; X86-SSE-NEXT: addpd {{[0-9]+}}(%esp), %xmm0 688; X86-SSE-NEXT: movapd %xmm0, (%eax) 689; X86-SSE-NEXT: addpd %xmm6, %xmm1 690; X86-SSE-NEXT: movapd %xmm1, 16(%eax) 691; X86-SSE-NEXT: addpd %xmm5, %xmm2 692; X86-SSE-NEXT: movapd %xmm2, 32(%eax) 693; X86-SSE-NEXT: movapd %xmm3, 48(%eax) 694; X86-SSE-NEXT: addl $12, %esp 695; X86-SSE-NEXT: retl 696; 697; X64-SSE-LABEL: test_store_8xf64_aligned: 698; X64-SSE: # %bb.0: 699; X64-SSE-NEXT: addpd %xmm4, %xmm0 700; X64-SSE-NEXT: movapd %xmm0, (%rdi) 701; X64-SSE-NEXT: addpd %xmm5, %xmm1 702; X64-SSE-NEXT: movapd %xmm1, 16(%rdi) 703; X64-SSE-NEXT: addpd %xmm6, %xmm2 704; X64-SSE-NEXT: movapd %xmm2, 32(%rdi) 705; X64-SSE-NEXT: addpd %xmm7, %xmm3 706; X64-SSE-NEXT: movapd %xmm3, 48(%rdi) 707; X64-SSE-NEXT: retq 708; 709; X86-AVX1-LABEL: test_store_8xf64_aligned: 710; X86-AVX1: # %bb.0: 711; X86-AVX1-NEXT: pushl %ebp 712; X86-AVX1-NEXT: movl %esp, %ebp 713; X86-AVX1-NEXT: andl $-32, %esp 714; X86-AVX1-NEXT: subl $32, %esp 715; X86-AVX1-NEXT: vmovapd 40(%ebp), %ymm3 716; X86-AVX1-NEXT: movl 8(%ebp), %eax 717; X86-AVX1-NEXT: vaddpd %ymm2, %ymm0, %ymm0 718; X86-AVX1-NEXT: vmovapd %ymm0, (%eax) 719; X86-AVX1-NEXT: vaddpd %ymm3, %ymm1, %ymm1 720; X86-AVX1-NEXT: vmovapd %ymm1, 32(%eax) 721; X86-AVX1-NEXT: movl %ebp, %esp 722; X86-AVX1-NEXT: popl %ebp 723; X86-AVX1-NEXT: retl 724; 725; X64-AVX1-LABEL: test_store_8xf64_aligned: 726; X64-AVX1: # %bb.0: 727; X64-AVX1-NEXT: vaddpd %ymm2, %ymm0, %ymm0 728; X64-AVX1-NEXT: vmovapd %ymm0, (%rdi) 729; X64-AVX1-NEXT: vaddpd %ymm3, %ymm1, %ymm1 730; X64-AVX1-NEXT: vmovapd %ymm1, 32(%rdi) 731; X64-AVX1-NEXT: retq 732; 733; X86-AVX512-LABEL: test_store_8xf64_aligned: 734; X86-AVX512: # %bb.0: 735; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax 736; X86-AVX512-NEXT: vaddpd %zmm1, %zmm0, %zmm0 737; X86-AVX512-NEXT: vmovapd %zmm0, (%eax) 738; X86-AVX512-NEXT: retl 739; 740; X64-AVX512-LABEL: test_store_8xf64_aligned: 741; X64-AVX512: # %bb.0: 742; X64-AVX512-NEXT: vaddpd %zmm1, %zmm0, %zmm0 743; X64-AVX512-NEXT: vmovapd %zmm0, (%rdi) 744; X64-AVX512-NEXT: retq 745 %foo = fadd <8 x double> %value, %value2 ; to force dobule type on store 746 store <8 x double> %foo, ptr %addr, align 64 747 ret <8 x double> %foo 748} 749