xref: /llvm-project/llvm/test/CodeGen/X86/atomic128.ll (revision e6bf48d11047e970cb24554a01b65b566d6b5d22)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-apple-macosx10.9 -verify-machineinstrs -mattr=cx16 | FileCheck %s --check-prefixes=CHECK,CHECK-NOAVX
3; RUN: llc < %s -mtriple=x86_64-apple-macosx10.9 -verify-machineinstrs -mattr=cx16,avx | FileCheck %s --check-prefixes=CHECK,CHECK-AVX
4
5; Codegen of i128 without cx16 is tested in atomic-nocx16.ll
6
7@var = global i128 0
8
9; Due to the scheduling right after isel for cmpxchg and given the
10; machine scheduler and copy coalescer do not mess up with physical
11; register live-ranges, we end up with a useless copy.
12define i128 @val_compare_and_swap(ptr %p, i128 %oldval, i128 %newval) {
13; CHECK-LABEL: val_compare_and_swap:
14; CHECK:       ## %bb.0:
15; CHECK-NEXT:    pushq %rbx
16; CHECK-NEXT:    .cfi_def_cfa_offset 16
17; CHECK-NEXT:    .cfi_offset %rbx, -16
18; CHECK-NEXT:    movq %rcx, %rbx
19; CHECK-NEXT:    movq %rsi, %rax
20; CHECK-NEXT:    movq %r8, %rcx
21; CHECK-NEXT:    lock cmpxchg16b (%rdi)
22; CHECK-NEXT:    popq %rbx
23; CHECK-NEXT:    retq
24  %pair = cmpxchg ptr %p, i128 %oldval, i128 %newval acquire acquire
25  %val = extractvalue { i128, i1 } %pair, 0
26  ret i128 %val
27}
28
29@cmpxchg16b_global = external dso_local global { i128, i128 }, align 16
30
31;; Make sure we retain the offset of the global variable.
32define i128 @load_global_with_offset() nounwind {
33; CHECK-NOAVX-LABEL: load_global_with_offset:
34; CHECK-NOAVX:       ## %bb.0: ## %entry
35; CHECK-NOAVX-NEXT:    pushq %rbx
36; CHECK-NOAVX-NEXT:    xorl %eax, %eax
37; CHECK-NOAVX-NEXT:    xorl %edx, %edx
38; CHECK-NOAVX-NEXT:    xorl %ecx, %ecx
39; CHECK-NOAVX-NEXT:    xorl %ebx, %ebx
40; CHECK-NOAVX-NEXT:    lock cmpxchg16b _cmpxchg16b_global+16(%rip)
41; CHECK-NOAVX-NEXT:    popq %rbx
42; CHECK-NOAVX-NEXT:    retq
43;
44; CHECK-AVX-LABEL: load_global_with_offset:
45; CHECK-AVX:       ## %bb.0: ## %entry
46; CHECK-AVX-NEXT:    vmovdqa _cmpxchg16b_global+16(%rip), %xmm0
47; CHECK-AVX-NEXT:    vmovq %xmm0, %rax
48; CHECK-AVX-NEXT:    vpextrq $1, %xmm0, %rdx
49; CHECK-AVX-NEXT:    retq
50entry:
51  %0 = load atomic i128, ptr getelementptr inbounds ({i128, i128}, ptr @cmpxchg16b_global, i64 0, i32 1) acquire, align 16
52  ret i128 %0
53}
54
55define void @fetch_and_nand(ptr %p, i128 %bits) {
56; CHECK-LABEL: fetch_and_nand:
57; CHECK:       ## %bb.0:
58; CHECK-NEXT:    pushq %rbx
59; CHECK-NEXT:    .cfi_def_cfa_offset 16
60; CHECK-NEXT:    .cfi_offset %rbx, -16
61; CHECK-NEXT:    movq %rdx, %r8
62; CHECK-NEXT:    movq (%rdi), %rax
63; CHECK-NEXT:    movq 8(%rdi), %rdx
64; CHECK-NEXT:    .p2align 4
65; CHECK-NEXT:  LBB2_1: ## %atomicrmw.start
66; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1
67; CHECK-NEXT:    movq %rdx, %rcx
68; CHECK-NEXT:    andq %r8, %rcx
69; CHECK-NEXT:    movq %rax, %rbx
70; CHECK-NEXT:    andq %rsi, %rbx
71; CHECK-NEXT:    notq %rbx
72; CHECK-NEXT:    notq %rcx
73; CHECK-NEXT:    lock cmpxchg16b (%rdi)
74; CHECK-NEXT:    jne LBB2_1
75; CHECK-NEXT:  ## %bb.2: ## %atomicrmw.end
76; CHECK-NEXT:    movq %rax, _var(%rip)
77; CHECK-NEXT:    movq %rdx, _var+8(%rip)
78; CHECK-NEXT:    popq %rbx
79; CHECK-NEXT:    retq
80  %val = atomicrmw nand ptr %p, i128 %bits release
81  store i128 %val, ptr @var, align 16
82  ret void
83}
84
85define void @fetch_and_or(ptr %p, i128 %bits) {
86; CHECK-LABEL: fetch_and_or:
87; CHECK:       ## %bb.0:
88; CHECK-NEXT:    pushq %rbx
89; CHECK-NEXT:    .cfi_def_cfa_offset 16
90; CHECK-NEXT:    .cfi_offset %rbx, -16
91; CHECK-NEXT:    movq %rdx, %r8
92; CHECK-NEXT:    movq (%rdi), %rax
93; CHECK-NEXT:    movq 8(%rdi), %rdx
94; CHECK-NEXT:    .p2align 4
95; CHECK-NEXT:  LBB3_1: ## %atomicrmw.start
96; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1
97; CHECK-NEXT:    movq %rax, %rbx
98; CHECK-NEXT:    orq %rsi, %rbx
99; CHECK-NEXT:    movq %rdx, %rcx
100; CHECK-NEXT:    orq %r8, %rcx
101; CHECK-NEXT:    lock cmpxchg16b (%rdi)
102; CHECK-NEXT:    jne LBB3_1
103; CHECK-NEXT:  ## %bb.2: ## %atomicrmw.end
104; CHECK-NEXT:    movq %rax, _var(%rip)
105; CHECK-NEXT:    movq %rdx, _var+8(%rip)
106; CHECK-NEXT:    popq %rbx
107; CHECK-NEXT:    retq
108  %val = atomicrmw or ptr %p, i128 %bits seq_cst
109  store i128 %val, ptr @var, align 16
110  ret void
111}
112
113define void @fetch_and_add(ptr %p, i128 %bits) {
114; CHECK-LABEL: fetch_and_add:
115; CHECK:       ## %bb.0:
116; CHECK-NEXT:    pushq %rbx
117; CHECK-NEXT:    .cfi_def_cfa_offset 16
118; CHECK-NEXT:    .cfi_offset %rbx, -16
119; CHECK-NEXT:    movq %rdx, %r8
120; CHECK-NEXT:    movq (%rdi), %rax
121; CHECK-NEXT:    movq 8(%rdi), %rdx
122; CHECK-NEXT:    .p2align 4
123; CHECK-NEXT:  LBB4_1: ## %atomicrmw.start
124; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1
125; CHECK-NEXT:    movq %rax, %rbx
126; CHECK-NEXT:    addq %rsi, %rbx
127; CHECK-NEXT:    movq %rdx, %rcx
128; CHECK-NEXT:    adcq %r8, %rcx
129; CHECK-NEXT:    lock cmpxchg16b (%rdi)
130; CHECK-NEXT:    jne LBB4_1
131; CHECK-NEXT:  ## %bb.2: ## %atomicrmw.end
132; CHECK-NEXT:    movq %rax, _var(%rip)
133; CHECK-NEXT:    movq %rdx, _var+8(%rip)
134; CHECK-NEXT:    popq %rbx
135; CHECK-NEXT:    retq
136  %val = atomicrmw add ptr %p, i128 %bits seq_cst
137  store i128 %val, ptr @var, align 16
138  ret void
139}
140
141define void @fetch_and_sub(ptr %p, i128 %bits) {
142; CHECK-LABEL: fetch_and_sub:
143; CHECK:       ## %bb.0:
144; CHECK-NEXT:    pushq %rbx
145; CHECK-NEXT:    .cfi_def_cfa_offset 16
146; CHECK-NEXT:    .cfi_offset %rbx, -16
147; CHECK-NEXT:    movq %rdx, %r8
148; CHECK-NEXT:    movq (%rdi), %rax
149; CHECK-NEXT:    movq 8(%rdi), %rdx
150; CHECK-NEXT:    .p2align 4
151; CHECK-NEXT:  LBB5_1: ## %atomicrmw.start
152; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1
153; CHECK-NEXT:    movq %rax, %rbx
154; CHECK-NEXT:    subq %rsi, %rbx
155; CHECK-NEXT:    movq %rdx, %rcx
156; CHECK-NEXT:    sbbq %r8, %rcx
157; CHECK-NEXT:    lock cmpxchg16b (%rdi)
158; CHECK-NEXT:    jne LBB5_1
159; CHECK-NEXT:  ## %bb.2: ## %atomicrmw.end
160; CHECK-NEXT:    movq %rax, _var(%rip)
161; CHECK-NEXT:    movq %rdx, _var+8(%rip)
162; CHECK-NEXT:    popq %rbx
163; CHECK-NEXT:    retq
164  %val = atomicrmw sub ptr %p, i128 %bits seq_cst
165  store i128 %val, ptr @var, align 16
166  ret void
167}
168
169define void @fetch_and_min(ptr %p, i128 %bits) {
170; CHECK-LABEL: fetch_and_min:
171; CHECK:       ## %bb.0:
172; CHECK-NEXT:    pushq %rbx
173; CHECK-NEXT:    .cfi_def_cfa_offset 16
174; CHECK-NEXT:    .cfi_offset %rbx, -16
175; CHECK-NEXT:    movq %rdx, %r8
176; CHECK-NEXT:    movq (%rdi), %rax
177; CHECK-NEXT:    movq 8(%rdi), %rdx
178; CHECK-NEXT:    .p2align 4
179; CHECK-NEXT:  LBB6_1: ## %atomicrmw.start
180; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1
181; CHECK-NEXT:    cmpq %rax, %rsi
182; CHECK-NEXT:    movq %r8, %rcx
183; CHECK-NEXT:    sbbq %rdx, %rcx
184; CHECK-NEXT:    movq %r8, %rcx
185; CHECK-NEXT:    cmovgeq %rdx, %rcx
186; CHECK-NEXT:    movq %rsi, %rbx
187; CHECK-NEXT:    cmovgeq %rax, %rbx
188; CHECK-NEXT:    lock cmpxchg16b (%rdi)
189; CHECK-NEXT:    jne LBB6_1
190; CHECK-NEXT:  ## %bb.2: ## %atomicrmw.end
191; CHECK-NEXT:    movq %rax, _var(%rip)
192; CHECK-NEXT:    movq %rdx, _var+8(%rip)
193; CHECK-NEXT:    popq %rbx
194; CHECK-NEXT:    retq
195  %val = atomicrmw min ptr %p, i128 %bits seq_cst
196  store i128 %val, ptr @var, align 16
197  ret void
198}
199
200define void @fetch_and_max(ptr %p, i128 %bits) {
201; CHECK-LABEL: fetch_and_max:
202; CHECK:       ## %bb.0:
203; CHECK-NEXT:    pushq %rbx
204; CHECK-NEXT:    .cfi_def_cfa_offset 16
205; CHECK-NEXT:    .cfi_offset %rbx, -16
206; CHECK-NEXT:    movq %rdx, %r8
207; CHECK-NEXT:    movq (%rdi), %rax
208; CHECK-NEXT:    movq 8(%rdi), %rdx
209; CHECK-NEXT:    .p2align 4
210; CHECK-NEXT:  LBB7_1: ## %atomicrmw.start
211; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1
212; CHECK-NEXT:    cmpq %rax, %rsi
213; CHECK-NEXT:    movq %r8, %rcx
214; CHECK-NEXT:    sbbq %rdx, %rcx
215; CHECK-NEXT:    movq %r8, %rcx
216; CHECK-NEXT:    cmovlq %rdx, %rcx
217; CHECK-NEXT:    movq %rsi, %rbx
218; CHECK-NEXT:    cmovlq %rax, %rbx
219; CHECK-NEXT:    lock cmpxchg16b (%rdi)
220; CHECK-NEXT:    jne LBB7_1
221; CHECK-NEXT:  ## %bb.2: ## %atomicrmw.end
222; CHECK-NEXT:    movq %rax, _var(%rip)
223; CHECK-NEXT:    movq %rdx, _var+8(%rip)
224; CHECK-NEXT:    popq %rbx
225; CHECK-NEXT:    retq
226  %val = atomicrmw max ptr %p, i128 %bits seq_cst
227  store i128 %val, ptr @var, align 16
228  ret void
229}
230
231define void @fetch_and_umin(ptr %p, i128 %bits) {
232; CHECK-LABEL: fetch_and_umin:
233; CHECK:       ## %bb.0:
234; CHECK-NEXT:    pushq %rbx
235; CHECK-NEXT:    .cfi_def_cfa_offset 16
236; CHECK-NEXT:    .cfi_offset %rbx, -16
237; CHECK-NEXT:    movq %rdx, %r8
238; CHECK-NEXT:    movq (%rdi), %rax
239; CHECK-NEXT:    movq 8(%rdi), %rdx
240; CHECK-NEXT:    .p2align 4
241; CHECK-NEXT:  LBB8_1: ## %atomicrmw.start
242; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1
243; CHECK-NEXT:    cmpq %rax, %rsi
244; CHECK-NEXT:    movq %r8, %rcx
245; CHECK-NEXT:    sbbq %rdx, %rcx
246; CHECK-NEXT:    movq %r8, %rcx
247; CHECK-NEXT:    cmovaeq %rdx, %rcx
248; CHECK-NEXT:    movq %rsi, %rbx
249; CHECK-NEXT:    cmovaeq %rax, %rbx
250; CHECK-NEXT:    lock cmpxchg16b (%rdi)
251; CHECK-NEXT:    jne LBB8_1
252; CHECK-NEXT:  ## %bb.2: ## %atomicrmw.end
253; CHECK-NEXT:    movq %rax, _var(%rip)
254; CHECK-NEXT:    movq %rdx, _var+8(%rip)
255; CHECK-NEXT:    popq %rbx
256; CHECK-NEXT:    retq
257  %val = atomicrmw umin ptr %p, i128 %bits seq_cst
258  store i128 %val, ptr @var, align 16
259  ret void
260}
261
262define void @fetch_and_umax(ptr %p, i128 %bits) {
263; CHECK-LABEL: fetch_and_umax:
264; CHECK:       ## %bb.0:
265; CHECK-NEXT:    pushq %rbx
266; CHECK-NEXT:    .cfi_def_cfa_offset 16
267; CHECK-NEXT:    .cfi_offset %rbx, -16
268; CHECK-NEXT:    movq %rdx, %r8
269; CHECK-NEXT:    movq (%rdi), %rax
270; CHECK-NEXT:    movq 8(%rdi), %rdx
271; CHECK-NEXT:    .p2align 4
272; CHECK-NEXT:  LBB9_1: ## %atomicrmw.start
273; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1
274; CHECK-NEXT:    cmpq %rax, %rsi
275; CHECK-NEXT:    movq %r8, %rcx
276; CHECK-NEXT:    sbbq %rdx, %rcx
277; CHECK-NEXT:    movq %r8, %rcx
278; CHECK-NEXT:    cmovbq %rdx, %rcx
279; CHECK-NEXT:    movq %rsi, %rbx
280; CHECK-NEXT:    cmovbq %rax, %rbx
281; CHECK-NEXT:    lock cmpxchg16b (%rdi)
282; CHECK-NEXT:    jne LBB9_1
283; CHECK-NEXT:  ## %bb.2: ## %atomicrmw.end
284; CHECK-NEXT:    movq %rax, _var(%rip)
285; CHECK-NEXT:    movq %rdx, _var+8(%rip)
286; CHECK-NEXT:    popq %rbx
287; CHECK-NEXT:    retq
288  %val = atomicrmw umax ptr %p, i128 %bits seq_cst
289  store i128 %val, ptr @var, align 16
290  ret void
291}
292
293define i128 @atomic_load_seq_cst(ptr %p) {
294; CHECK-NOAVX-LABEL: atomic_load_seq_cst:
295; CHECK-NOAVX:       ## %bb.0:
296; CHECK-NOAVX-NEXT:    pushq %rbx
297; CHECK-NOAVX-NEXT:    .cfi_def_cfa_offset 16
298; CHECK-NOAVX-NEXT:    .cfi_offset %rbx, -16
299; CHECK-NOAVX-NEXT:    xorl %eax, %eax
300; CHECK-NOAVX-NEXT:    xorl %edx, %edx
301; CHECK-NOAVX-NEXT:    xorl %ecx, %ecx
302; CHECK-NOAVX-NEXT:    xorl %ebx, %ebx
303; CHECK-NOAVX-NEXT:    lock cmpxchg16b (%rdi)
304; CHECK-NOAVX-NEXT:    popq %rbx
305; CHECK-NOAVX-NEXT:    retq
306;
307; CHECK-AVX-LABEL: atomic_load_seq_cst:
308; CHECK-AVX:       ## %bb.0:
309; CHECK-AVX-NEXT:    vmovdqa (%rdi), %xmm0
310; CHECK-AVX-NEXT:    vmovq %xmm0, %rax
311; CHECK-AVX-NEXT:    vpextrq $1, %xmm0, %rdx
312; CHECK-AVX-NEXT:    retq
313   %r = load atomic i128, ptr %p seq_cst, align 16
314   ret i128 %r
315}
316
317define i128 @atomic_load_relaxed(ptr %p) {
318; CHECK-NOAVX-LABEL: atomic_load_relaxed:
319; CHECK-NOAVX:       ## %bb.0:
320; CHECK-NOAVX-NEXT:    pushq %rbx
321; CHECK-NOAVX-NEXT:    .cfi_def_cfa_offset 16
322; CHECK-NOAVX-NEXT:    .cfi_offset %rbx, -16
323; CHECK-NOAVX-NEXT:    xorl %eax, %eax
324; CHECK-NOAVX-NEXT:    xorl %edx, %edx
325; CHECK-NOAVX-NEXT:    xorl %ecx, %ecx
326; CHECK-NOAVX-NEXT:    xorl %ebx, %ebx
327; CHECK-NOAVX-NEXT:    lock cmpxchg16b (%rdi)
328; CHECK-NOAVX-NEXT:    popq %rbx
329; CHECK-NOAVX-NEXT:    retq
330;
331; CHECK-AVX-LABEL: atomic_load_relaxed:
332; CHECK-AVX:       ## %bb.0:
333; CHECK-AVX-NEXT:    vmovdqa (%rdi), %xmm0
334; CHECK-AVX-NEXT:    vmovq %xmm0, %rax
335; CHECK-AVX-NEXT:    vpextrq $1, %xmm0, %rdx
336; CHECK-AVX-NEXT:    retq
337   %r = load atomic i128, ptr %p monotonic, align 16
338   ret i128 %r
339}
340
341define void @atomic_store_seq_cst(ptr %p, i128 %in) {
342; CHECK-NOAVX-LABEL: atomic_store_seq_cst:
343; CHECK-NOAVX:       ## %bb.0:
344; CHECK-NOAVX-NEXT:    pushq %rbx
345; CHECK-NOAVX-NEXT:    .cfi_def_cfa_offset 16
346; CHECK-NOAVX-NEXT:    .cfi_offset %rbx, -16
347; CHECK-NOAVX-NEXT:    movq %rdx, %rcx
348; CHECK-NOAVX-NEXT:    movq %rsi, %rbx
349; CHECK-NOAVX-NEXT:    movq (%rdi), %rax
350; CHECK-NOAVX-NEXT:    movq 8(%rdi), %rdx
351; CHECK-NOAVX-NEXT:    .p2align 4
352; CHECK-NOAVX-NEXT:  LBB12_1: ## %atomicrmw.start
353; CHECK-NOAVX-NEXT:    ## =>This Inner Loop Header: Depth=1
354; CHECK-NOAVX-NEXT:    lock cmpxchg16b (%rdi)
355; CHECK-NOAVX-NEXT:    jne LBB12_1
356; CHECK-NOAVX-NEXT:  ## %bb.2: ## %atomicrmw.end
357; CHECK-NOAVX-NEXT:    popq %rbx
358; CHECK-NOAVX-NEXT:    retq
359;
360; CHECK-AVX-LABEL: atomic_store_seq_cst:
361; CHECK-AVX:       ## %bb.0:
362; CHECK-AVX-NEXT:    vmovq %rdx, %xmm0
363; CHECK-AVX-NEXT:    vmovq %rsi, %xmm1
364; CHECK-AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
365; CHECK-AVX-NEXT:    vmovdqa %xmm0, (%rdi)
366; CHECK-AVX-NEXT:    lock orl $0, -{{[0-9]+}}(%rsp)
367; CHECK-AVX-NEXT:    retq
368   store atomic i128 %in, ptr %p seq_cst, align 16
369   ret void
370}
371
372define void @atomic_store_release(ptr %p, i128 %in) {
373; CHECK-NOAVX-LABEL: atomic_store_release:
374; CHECK-NOAVX:       ## %bb.0:
375; CHECK-NOAVX-NEXT:    pushq %rbx
376; CHECK-NOAVX-NEXT:    .cfi_def_cfa_offset 16
377; CHECK-NOAVX-NEXT:    .cfi_offset %rbx, -16
378; CHECK-NOAVX-NEXT:    movq %rdx, %rcx
379; CHECK-NOAVX-NEXT:    movq %rsi, %rbx
380; CHECK-NOAVX-NEXT:    movq (%rdi), %rax
381; CHECK-NOAVX-NEXT:    movq 8(%rdi), %rdx
382; CHECK-NOAVX-NEXT:    .p2align 4
383; CHECK-NOAVX-NEXT:  LBB13_1: ## %atomicrmw.start
384; CHECK-NOAVX-NEXT:    ## =>This Inner Loop Header: Depth=1
385; CHECK-NOAVX-NEXT:    lock cmpxchg16b (%rdi)
386; CHECK-NOAVX-NEXT:    jne LBB13_1
387; CHECK-NOAVX-NEXT:  ## %bb.2: ## %atomicrmw.end
388; CHECK-NOAVX-NEXT:    popq %rbx
389; CHECK-NOAVX-NEXT:    retq
390;
391; CHECK-AVX-LABEL: atomic_store_release:
392; CHECK-AVX:       ## %bb.0:
393; CHECK-AVX-NEXT:    vmovq %rdx, %xmm0
394; CHECK-AVX-NEXT:    vmovq %rsi, %xmm1
395; CHECK-AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
396; CHECK-AVX-NEXT:    vmovdqa %xmm0, (%rdi)
397; CHECK-AVX-NEXT:    retq
398   store atomic i128 %in, ptr %p release, align 16
399   ret void
400}
401
402define void @atomic_store_relaxed(ptr %p, i128 %in) {
403; CHECK-NOAVX-LABEL: atomic_store_relaxed:
404; CHECK-NOAVX:       ## %bb.0:
405; CHECK-NOAVX-NEXT:    pushq %rbx
406; CHECK-NOAVX-NEXT:    .cfi_def_cfa_offset 16
407; CHECK-NOAVX-NEXT:    .cfi_offset %rbx, -16
408; CHECK-NOAVX-NEXT:    movq %rdx, %rcx
409; CHECK-NOAVX-NEXT:    movq %rsi, %rbx
410; CHECK-NOAVX-NEXT:    movq (%rdi), %rax
411; CHECK-NOAVX-NEXT:    movq 8(%rdi), %rdx
412; CHECK-NOAVX-NEXT:    .p2align 4
413; CHECK-NOAVX-NEXT:  LBB14_1: ## %atomicrmw.start
414; CHECK-NOAVX-NEXT:    ## =>This Inner Loop Header: Depth=1
415; CHECK-NOAVX-NEXT:    lock cmpxchg16b (%rdi)
416; CHECK-NOAVX-NEXT:    jne LBB14_1
417; CHECK-NOAVX-NEXT:  ## %bb.2: ## %atomicrmw.end
418; CHECK-NOAVX-NEXT:    popq %rbx
419; CHECK-NOAVX-NEXT:    retq
420;
421; CHECK-AVX-LABEL: atomic_store_relaxed:
422; CHECK-AVX:       ## %bb.0:
423; CHECK-AVX-NEXT:    vmovq %rdx, %xmm0
424; CHECK-AVX-NEXT:    vmovq %rsi, %xmm1
425; CHECK-AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
426; CHECK-AVX-NEXT:    vmovdqa %xmm0, (%rdi)
427; CHECK-AVX-NEXT:    retq
428   store atomic i128 %in, ptr %p unordered, align 16
429   ret void
430}
431
432
433