xref: /llvm-project/llvm/test/CodeGen/PowerPC/ppc64-i128-abi.ll (revision 5403c59c608c08c8ecd4303763f08eb046eb5e4d)
1; RUN: llc -relocation-model=pic -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
2; RUN:   -mcpu=pwr8 < %s | FileCheck %s -check-prefix=CHECK-LE \
3; RUN:   --implicit-check-not xxswapd
4
5; RUN: llc -relocation-model=pic -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
6; RUN:   -mcpu=pwr8 < %s | FileCheck %s -check-prefix=CHECK-BE
7
8; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix-xcoff \
9; RUN:   -mcpu=pwr8 < %s | FileCheck %s -check-prefix=CHECK-BE
10
11; RUN: llc -relocation-model=pic -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
12; RUN:   -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-NOVSX
13
14; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix-xcoff \
15; RUN:   -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-NOVSX
16
17; RUN: llc -relocation-model=pic -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
18; RUN:   -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-NOVSX \
19; RUN:   --implicit-check-not xxswapd
20
21; RUN: llc -relocation-model=pic -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
22; RUN:   -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-BE-NOVSX
23
24; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix-xcoff \
25; RUN:   -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-BE-NOVSX
26
27; RUN: llc -relocation-model=pic -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
28; RUN:   -mcpu=pwr8 -mattr=-vsx < %s | \
29; RUN:   FileCheck %s -check-prefix=CHECK-LE-NOVSX --implicit-check-not xxswapd
30
31; RUN: llc -relocation-model=pic -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
32; RUN:   -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
33; RUN:   FileCheck %s -check-prefix=CHECK-P9 --implicit-check-not xxswapd
34
35; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix-xcoff \
36; RUN:   -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | FileCheck %s -check-prefix=CHECK-P9
37
38; RUN: llc -relocation-model=pic -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
39; RUN:   -mcpu=pwr9 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-NOVSX \
40; RUN:   --implicit-check-not xxswapd
41
42; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix-xcoff \
43; RUN:   -mcpu=pwr9 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-NOVSX
44
45; RUN: llc -relocation-model=pic -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
46; RUN:   -mcpu=pwr9 -mattr=-power9-vector -mattr=-direct-move < %s | \
47; RUN:   FileCheck %s -check-prefix=CHECK-LE --implicit-check-not xxswapd
48
49@x = common global <1 x i128> zeroinitializer, align 16
50@y = common global <1 x i128> zeroinitializer, align 16
51@a = common global i128 zeroinitializer, align 16
52@b = common global i128 zeroinitializer, align 16
53
54; VSX:
55;   %a is passed in register 34
56;   The value of 1 is stored in the TOC.
57;   On LE, ensure the value of 1 is swapped before being used (using xxswapd).
58; VMX (no VSX):
59;   %a is passed in register 2
60;   The value of 1 is stored in the TOC.
61;   No swaps are necessary when using P8 Vector instructions on LE
62define <1 x i128> @v1i128_increment_by_one(<1 x i128> %a) nounwind {
63       %tmp = add <1 x i128> %a, <i128 1>
64       ret <1 x i128> %tmp
65
66; FIXME: Seems a 128-bit literal is materialized by loading from the TOC. There
67;        should be a better way of doing this.
68
69; CHECK-LE-LABEL: @v1i128_increment_by_one
70; CHECK-LE: lxvd2x [[VAL:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
71; CHECK-LE: xxswapd 35, [[VAL]]
72; CHECK-LE: vadduqm 2, 2, 3
73; CHECK-LE: blr
74
75; CHECK-P9-LABEL: @v1i128_increment_by_one
76; The below FIXME is due to the lowering for BUILD_VECTOR that will be fixed
77; in a subsequent patch.
78; FIXME: li [[R1:r[0-9]+]], 1
79; FIXME: li [[R2:r[0-9]+]], 0
80; FIXME: mtvsrdd [[V1:v[0-9]+]], [[R2]], [[R1]]
81; CHECK-P9: lxv [[V1:v[0-9]+]]
82; CHECK-P9: vadduqm v2, v2, [[V1]]
83; CHECK-P9: blr
84
85; CHECK-BE-LABEL: @v1i128_increment_by_one
86; CHECK-BE: lxvd2x 35, {{[0-9]+}}, {{[0-9]+}}
87; CHECK-BE-NOT: xxswapd
88; CHECK-BE: vadduqm 2, 2, 3
89; CHECK-BE-NOT: xxswapd 34, {{[0-9]+}}
90; CHECK-BE: blr
91
92; CHECK-NOVSX-LABEL: @v1i128_increment_by_one
93; CHECK-NOVSX-NOT: xxswapd {{[0-9]+}}, {{[0-9]+}}
94; CHECK-NOVSX-NOT: stxvd2x {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
95; CHECK-NOVSX: lvx [[VAL:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
96; CHECK-NOVSX-NOT: lxvd2x {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
97; CHECK-NOVSX-NOT: xxswapd {{[0-9]+}}, {{[0-9]+}}
98; CHECK-NOVSX: vadduqm 2, 2, [[VAL]]
99; CHECK-NOVSX: blr
100}
101
102; VSX:
103;   %a is passed in register 34
104;   %b is passed in register 35
105;   No swaps are necessary when using P8 Vector instructions on LE
106; VMX (no VSX):
107;   %a is passewd in register 2
108;   %b is passed in register 3
109;   On LE, do not need to swap contents of 2 and 3 because the lvx/stvx
110;   instructions no not swap elements
111define <1 x i128> @v1i128_increment_by_val(<1 x i128> %a, <1 x i128> %b) nounwind {
112       %tmp = add <1 x i128> %a, %b
113       ret <1 x i128> %tmp
114
115; CHECK-LE-LABEL: @v1i128_increment_by_val
116; CHECK-LE-NOT: xxswapd
117; CHECK-LE: adduqm 2, 2, 3
118; CHECK-LE: blr
119
120; CHECK-BE-LABEL: @v1i128_increment_by_val
121; CHECK-BE-NOT: xxswapd {{[0-9]+}}, 34
122; CHECK-BE-NOT: xxswapd {{[0-9]+}}, 35
123; CHECK-BE-NOT: xxswapd 34, {{[0-9]+}}
124; CHECK-BE: adduqm 2, 2, 3
125; CHECK-BE: blr
126
127; CHECK-NOVSX-LABEL: @v1i128_increment_by_val
128; CHECK-NOVSX-NOT: xxswapd 34, {{[0-9]+}}
129; CHECK-NOVSX: adduqm 2, 2, 3
130; CHECK-NOVSX: blr
131}
132
133; Little Endian (VSX and VMX):
134;   Lower 64-bits of %a are passed in register 3
135;   Upper 64-bits of %a are passed in register 4
136;   Increment lower 64-bits using addic (immediate value of 1)
137;   Increment upper 64-bits using add zero extended
138;   Results are placed in registers 3 and 4
139; Big Endian (VSX and VMX)
140;   Lower 64-bits of %a are passed in register 4
141;   Upper 64-bits of %a are passed in register 3
142;   Increment lower 64-bits using addic (immediate value of 1)
143;   Increment upper 64-bits using add zero extended
144;   Results are placed in registers 3 and 4
145define i128 @i128_increment_by_one(i128 %a) nounwind {
146       %tmp =  add i128 %a,  1
147       ret i128 %tmp
148; CHECK-LE-LABEL: @i128_increment_by_one
149; CHECK-LE: addic 3, 3, 1
150; CHECK-LE-NEXT: addze 4, 4
151; CHECK-LE: blr
152
153; CHECK-BE-LABEL: @i128_increment_by_one
154; CHECK-BE: addic 4, 4, 1
155; CHECK-BE-NEXT: addze 3, 3
156; CHECK-BE: blr
157
158; CHECK-LE-NOVSX-LABEL: @i128_increment_by_one
159; CHECK-LE-NOVSX: addic 3, 3, 1
160; CHECK-LE-NOVSX-NEXT: addze 4, 4
161; CHECK-LE-NOVSX: blr
162
163; CHECK-BE-NOVSX-LABEL: @i128_increment_by_one
164; CHECK-BE-NOVSX: addic 4, 4, 1
165; CHECK-BE-NOVSX-NEXT: addze 3, 3
166; CHECK-BE-NOVSX: blr
167}
168
169; Little Endian (VSX and VMX):
170;   Lower 64-bits of %a are passed in register 3
171;   Upper 64-bits of %a are passed in register 4
172;   Lower 64-bits of %b are passed in register 5
173;   Upper 64-bits of %b are passed in register 6
174;   Add the lower 64-bits using addc on registers 3 and 5
175;   Add the upper 64-bits using adde on registers 4 and 6
176;   Registers 3 and 4 should hold the result
177; Big Endian (VSX and VMX):
178;   Upper 64-bits of %a are passed in register 3
179;   Lower 64-bits of %a are passed in register 4
180;   Upper 64-bits of %b are passed in register 5
181;   Lower 64-bits of %b are passed in register 6
182;   Add the lower 64-bits using addc on registers 4 and 6
183;   Add the upper 64-bits using adde on registers 3 and 5
184;   Registers 3 and 4 should hold the result
185define i128 @i128_increment_by_val(i128 %a, i128 %b) nounwind {
186       %tmp =  add i128 %a, %b
187       ret i128 %tmp
188; CHECK-LE-LABEL: @i128_increment_by_val
189; CHECK-LE: addc 3, 3, 5
190; CHECK-LE-NEXT: adde 4, 4, 6
191; CHECK-LE: blr
192
193; CHECK-BE-LABEL: @i128_increment_by_val
194; CHECK-BE: addc 4, 4, 6
195; CHECK-BE-NEXT: adde 3, 3, 5
196; CHECK-BE: blr
197
198; CHECK-LE-NOVSX-LABEL: @i128_increment_by_val
199; CHECK-LE-NOVSX: addc 3, 3, 5
200; CHECK-LE-NOVSX-NEXT: adde 4, 4, 6
201; CHECK-LE-NOVSX: blr
202
203; CHECK-BE-NOVSX-LABEL: @i128_increment_by_val
204; CHECK-BE-NOVSX: addc 4, 4, 6
205; CHECK-BE-NOVSX-NEXT: adde 3, 3, 5
206; CHECK-BE-NOVSX: blr
207}
208
209
210; Callsites for the routines defined above.
211; Ensure the parameters are loaded in the same order that is expected by the
212; callee. See comments for individual functions above for details on registers
213; used for parameters.
214define <1 x i128> @call_v1i128_increment_by_one() nounwind {
215       %tmp = load <1 x i128>, ptr @x, align 16
216       %ret = call <1 x i128> @v1i128_increment_by_one(<1 x i128> %tmp)
217       ret <1 x i128> %ret
218
219; CHECK-LE-LABEL: @call_v1i128_increment_by_one
220; CHECK-LE: lxvd2x [[VAL:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
221; CHECK-LE: xxswapd 34, [[VAL]]
222; CHECK-LE: bl v1i128_increment_by_one
223; CHECK-LE: blr
224
225; CHECK-P9-LABEL: @call_v1i128_increment_by_one
226; CHECK-P9: lxv
227; CHECK-P9: bl {{.?}}v1i128_increment_by_one
228; CHECK-P9: blr
229
230; CHECK-BE-LABEL: @call_v1i128_increment_by_one
231; CHECK-BE: lxvw4x 34, {{[0-9]+}}, {{[0-9]+}}
232; CHECK-BE-NOT: xxswapd 34, {{[0-9]+}}
233; CHECK-BE: bl {{.?}}v1i128_increment_by_one
234; CHECK-BE: blr
235
236; CHECK-NOVSX-LABEL: @call_v1i128_increment_by_one
237; CHECK-NOVSX: lvx 2, {{[0-9]+}}, {{[0-9]+}}
238; CHECK-NOVSX-NOT: xxswapd {{[0-9]+}}, {{[0-9]+}}
239; CHECK-NOVSX: bl {{.?}}v1i128_increment_by_one
240; CHECK-NOVSX: blr
241}
242
243define <1 x i128> @call_v1i128_increment_by_val() nounwind {
244       %tmp = load <1 x i128>, ptr @x, align 16
245       %tmp2 = load <1 x i128>, ptr @y, align 16
246       %ret = call <1 x i128> @v1i128_increment_by_val(<1 x i128> %tmp, <1 x i128> %tmp2)
247       ret <1 x i128> %ret
248
249; CHECK-LE-LABEL: @call_v1i128_increment_by_val
250; CHECK-LE: lxvd2x [[VAL1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
251; CHECK-LE-DAG: lxvd2x [[VAL2:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
252; CHECK-LE-DAG: xxswapd 34, [[VAL1]]
253; CHECK-LE: xxswapd 35, [[VAL2]]
254; CHECK-LE: bl v1i128_increment_by_val
255; CHECK-LE: blr
256
257; CHECK-P9-LABEL: @call_v1i128_increment_by_val
258; CHECK-P9-DAG: lxv v2
259; CHECK-P9-DAG: lxv v3
260; CHECK-P9: bl {{.?}}v1i128_increment_by_val
261; CHECK-P9: blr
262
263; CHECK-BE-LABEL: @call_v1i128_increment_by_val
264
265
266; CHECK-BE-DAG: lxvw4x 35, {{[0-9]+}}, {{[0-9]+}}
267; CHECK-BE-NOT: xxswapd 34, {{[0-9]+}}
268; CHECK-BE-NOT: xxswapd 35, {{[0-9]+}}
269; CHECK-BE: bl {{.?}}v1i128_increment_by_val
270; CHECK-BE: blr
271
272; CHECK-NOVSX-LABEL: @call_v1i128_increment_by_val
273; CHECK-NOVSX-DAG: lvx 2, {{[0-9]+}}, {{[0-9]+}}
274; CHECK-NOVSX-DAG: lvx 3, {{[0-9]+}}, {{[0-9]+}}
275; CHECK-NOVSX-NOT: xxswapd 34, {{[0-9]+}}
276; CHECK-NOVSX-NOT: xxswapd 35, {{[0-9]+}}
277; CHECK-NOVSX: bl {{.?}}v1i128_increment_by_val
278; CHECK-NOVSX: blr
279
280}
281
282define i128 @call_i128_increment_by_one() nounwind {
283       %tmp = load i128, ptr @a, align 16
284       %ret = call i128 @i128_increment_by_one(i128 %tmp)
285       ret i128 %ret
286;       %ret4 = call i128 @i128_increment_by_val(i128 %tmp2, i128 %tmp2)
287; CHECK-LE-LABEL: @call_i128_increment_by_one
288; CHECK-LE-DAG: ld 3, 0([[BASEREG:[0-9]+]])
289; CHECK-LE-DAG: ld 4, 8([[BASEREG]])
290; CHECK-LE: bl i128_increment_by_one
291; CHECK-LE: blr
292
293; CHECK-BE-LABEL: @call_i128_increment_by_one
294; CHECK-BE-DAG: ld 3, 0([[BASEREG:[0-9]+]])
295; CHECK-BE-DAG: ld 4, 8([[BASEREG]])
296; CHECK-BE: bl {{.?}}i128_increment_by_one
297; CHECK-BE: blr
298
299; CHECK-NOVSX-LABEL: @call_i128_increment_by_one
300; CHECK-NOVSX-DAG: ld 3, 0([[BASEREG:[0-9]+]])
301; CHECK-NOVSX-DAG: ld 4, 8([[BASEREG]])
302; CHECK-NOVSX: bl {{.?}}i128_increment_by_one
303; CHECK-NOVSX: blr
304}
305
306define i128 @call_i128_increment_by_val() nounwind {
307       %tmp = load i128, ptr @a, align 16
308       %tmp2 = load i128, ptr @b, align 16
309       %ret = call i128 @i128_increment_by_val(i128 %tmp, i128 %tmp2)
310       ret i128 %ret
311; CHECK-LE-LABEL: @call_i128_increment_by_val
312; CHECK-LE-DAG: ld 3, 0([[P1BASEREG:[0-9]+]])
313; CHECK-LE-DAG: ld 4, 8([[P1BASEREG]])
314; CHECK-LE-DAG: ld 5, 0([[P2BASEREG:[0-9]+]])
315; CHECK-LE-DAG: ld 6, 8([[P2BASEREG]])
316; CHECK-LE: bl i128_increment_by_val
317; CHECK-LE: blr
318
319; CHECK-BE-LABEL: @call_i128_increment_by_val
320; CHECK-BE-DAG: ld 3, 0([[P1BASEREG:[0-9]+]])
321; CHECK-BE-DAG: ld 4, 8([[P1BASEREG]])
322; CHECK-BE-DAG: ld 5, 0([[P2BASEREG:[0-9]+]])
323; CHECK-BE-DAG: ld 6, 8([[P2BASEREG]])
324; CHECK-BE: bl {{.?}}i128_increment_by_val
325; CHECK-BE: blr
326
327; CHECK-NOVSX-LABEL: @call_i128_increment_by_val
328; CHECK-NOVSX-DAG: ld 3, 0([[P1BASEREG:[0-9]+]])
329; CHECK-NOVSX-DAG: ld 4, 8([[P1BASEREG]])
330; CHECK-NOVSX-DAG: ld 5, 0([[P2BASEREG:[0-9]+]])
331; CHECK-NOVSX-DAG: ld 6, 8([[P2BASEREG]])
332; CHECK-NOVSX: bl {{.?}}i128_increment_by_val
333; CHECK-NOVSX: blr
334}
335
336define i128 @callee_i128_split(i32 %i, i128 %i1280, i32 %i4, i32 %i5,
337                               i32 %i6, i32 %i7, i128 %i1281, i32 %i8, i128 %i1282){
338entry:
339  %tmp =  add i128 %i1280, %i1281
340  %tmp1 =  add i128 %tmp, %i1282
341
342  ret i128 %tmp1
343}
344; CHECK-LE-LABEL: @callee_i128_split
345; CHECK-LE-DAG: ld [[TMPREG:[0-9]+]], [[OFFSET:[0-9]+]](1)
346; CHECK-LE-DAG: addc [[TMPREG2:[0-9]+]], 4, 10
347; CHECK-LE-DAG: adde [[TMPREG3:[0-9]+]], 5, [[TMPREG]]
348
349; CHECK-LE-DAG: ld [[TMPREG4:[0-9]+]], [[OFFSET2:[0-9]+]](1)
350; CHECK-LE-DAG: ld [[TMPREG5:[0-9]+]], [[OFFSET3:[0-9]+]](1)
351; CHECK-LE-DAG: addc 3, [[TMPREG2]], [[TMPREG4]]
352; CHECK-LE-DAG: adde 4, [[TMPREG3]], [[TMPREG5]]
353
354; CHECK-BE-LABEL: @callee_i128_split
355; CHECK-BE-DAG: ld [[TMPREG:[0-9]+]], [[OFFSET:[0-9]+]](1)
356; CHECK-BE-DAG: addc [[TMPREG3:[0-9]+]], 5, [[TMPREG]]
357; CHECK-BE-DAG: adde [[TMPREG2:[0-9]+]], 4, 10
358
359; CHECK-BE-DAG: ld [[TMPREG4:[0-9]+]], [[OFFSET2:[0-9]+]](1)
360; CHECK-BE-DAG: ld [[TMPREG5:[0-9]+]], [[OFFSET3:[0-9]+]](1)
361; CHECK-BE-DAG: addc 4, [[TMPREG3]], [[TMPREG4]]
362; CHECK-BE-DAG: adde 3, [[TMPREG2]], [[TMPREG5]]
363
364define i128 @i128_split() {
365entry:
366  %0 = load i128, ptr @a, align 16
367  %1 = load i128, ptr @b, align 16
368  %call = tail call i128 @callee_i128_split(i32 1, i128 %0, i32 4, i32 5,
369                                           i32 6, i32 7, i128 %1, i32 8, i128 9)
370  ret i128 %call
371}
372
373; CHECK-LE-LABEL: @i128_split
374; CHECK-LE-DAG: li 3, 1
375; CHECK-LE-DAG: ld 4, 0([[P2BASEREG:[0-9]+]])
376; CHECK-LE-DAG: ld 5, 8([[P2BASEREG]])
377; CHECK-LE-DAG: li 6, 4
378; CHECK-LE-DAG: li 7, 5
379; CHECK-LE-DAG: li 8, 6
380; CHECK-LE-DAG: li 9, 7
381; CHECK-LE-DAG: ld 10, 0([[P7BASEREG:[0-9]+]])
382; CHECK-LE-DAG: ld [[TMPREG:[0-9]+]], 8([[P7BASEREG]])
383; CHECK-LE-DAG: std [[TMPREG]], [[OFFSET:[0-9]+]](1)
384; CHECK-LE: bl callee_i128_split
385
386
387; CHECK-BE-LABEL: @i128_split
388; CHECK-BE-DAG: li 3, 1
389; CHECK-BE-DAG: ld 4, 0([[P2BASEREG:[0-9]+]])
390; CHECK-BE-DAG: ld 5, 8([[P2BASEREG]])
391; CHECK-BE-DAG: li 6, 4
392; CHECK-BE-DAG: li 7, 5
393; CHECK-BE-DAG: li 8, 6
394; CHECK-BE-DAG: li 9, 7
395; CHECK-BE-DAG: ld 10, 0([[P7BASEREG:[0-9]+]])
396; CHECK-BE-DAG: ld [[TMPREG:[0-9]+]], 8([[P7BASEREG]])
397; CHECK-BE-DAG: std [[TMPREG]], [[OFFSET:[0-9]+]](1)
398; CHECK-BE: bl {{.?}}callee_i128_split
399
400; CHECK-NOVSX-LABEL: @i128_split
401; CHECK-NOVSX-DAG: li 3, 1
402; CHECK-NOVSX-DAG: ld 4, 0([[P2BASEREG:[0-9]+]])
403; CHECK-NOVSX-DAG: ld 5, 8([[P2BASEREG]])
404; CHECK-NOVSX-DAG: li 6, 4
405; CHECK-NOVSX-DAG: li 7, 5
406; CHECK-NOVSX-DAG: li 8, 6
407; CHECK-NOVSX-DAG: li 9, 7
408; CHECK-NOVSX-DAG: ld 10, 0([[P7BASEREG:[0-9]+]])
409; CHECK-NOVSX-DAG: ld [[TMPREG:[0-9]+]], 8([[P7BASEREG]])
410; CHECK-NOVSX-DAG: std [[TMPREG]], [[OFFSET:[0-9]+]](1)
411; CHECK-NOVSX: bl {{.?}}callee_i128_split
412