xref: /llvm-project/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbkb.ll (revision d78fe84d49b3db675ae4c502ead38ce9e5c2539f)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv64 -global-isel -verify-machineinstrs < %s \
3; RUN:   | FileCheck %s -check-prefix=RV64I
4; RUN: llc -mtriple=riscv64 -global-isel -mattr=+zbkb -verify-machineinstrs < %s \
5; RUN:   | FileCheck %s -check-prefix=RV64ZBKB
6
7; FIXME: Use packw
8define signext i32 @pack_i32(i32 signext %a, i32 signext %b) nounwind {
9; RV64I-LABEL: pack_i32:
10; RV64I:       # %bb.0:
11; RV64I-NEXT:    slli a0, a0, 48
12; RV64I-NEXT:    srli a0, a0, 48
13; RV64I-NEXT:    slliw a1, a1, 16
14; RV64I-NEXT:    or a0, a1, a0
15; RV64I-NEXT:    ret
16;
17; RV64ZBKB-LABEL: pack_i32:
18; RV64ZBKB:       # %bb.0:
19; RV64ZBKB-NEXT:    zext.h a0, a0
20; RV64ZBKB-NEXT:    slliw a1, a1, 16
21; RV64ZBKB-NEXT:    or a0, a1, a0
22; RV64ZBKB-NEXT:    ret
23  %shl = and i32 %a, 65535
24  %shl1 = shl i32 %b, 16
25  %or = or i32 %shl1, %shl
26  ret i32 %or
27}
28
29; FIXME: Use packw
30define signext i32 @pack_i32_2(i16 zeroext %a, i16 zeroext %b) nounwind {
31; RV64I-LABEL: pack_i32_2:
32; RV64I:       # %bb.0:
33; RV64I-NEXT:    slli a1, a1, 16
34; RV64I-NEXT:    or a0, a1, a0
35; RV64I-NEXT:    sext.w a0, a0
36; RV64I-NEXT:    ret
37;
38; RV64ZBKB-LABEL: pack_i32_2:
39; RV64ZBKB:       # %bb.0:
40; RV64ZBKB-NEXT:    slli a1, a1, 16
41; RV64ZBKB-NEXT:    or a0, a1, a0
42; RV64ZBKB-NEXT:    sext.w a0, a0
43; RV64ZBKB-NEXT:    ret
44  %zexta = zext i16 %a to i32
45  %zextb = zext i16 %b to i32
46  %shl1 = shl i32 %zextb, 16
47  %or = or i32 %shl1, %zexta
48  ret i32 %or
49}
50
51; Test case where we don't have a sign_extend_inreg after the or.
52; FIXME: Use packw
53define signext i32 @pack_i32_3(i16 zeroext %0, i16 zeroext %1, i32 signext %2) {
54; RV64I-LABEL: pack_i32_3:
55; RV64I:       # %bb.0:
56; RV64I-NEXT:    slli a0, a0, 16
57; RV64I-NEXT:    or a0, a0, a1
58; RV64I-NEXT:    addw a0, a0, a2
59; RV64I-NEXT:    ret
60;
61; RV64ZBKB-LABEL: pack_i32_3:
62; RV64ZBKB:       # %bb.0:
63; RV64ZBKB-NEXT:    slli a0, a0, 16
64; RV64ZBKB-NEXT:    or a0, a0, a1
65; RV64ZBKB-NEXT:    addw a0, a0, a2
66; RV64ZBKB-NEXT:    ret
67  %4 = zext i16 %0 to i32
68  %5 = shl nuw i32 %4, 16
69  %6 = zext i16 %1 to i32
70  %7 = or i32 %5, %6
71  %8 = add i32 %7, %2
72  ret i32 %8
73}
74
75define i64 @pack_i64(i64 %a, i64 %b) nounwind {
76; RV64I-LABEL: pack_i64:
77; RV64I:       # %bb.0:
78; RV64I-NEXT:    slli a0, a0, 32
79; RV64I-NEXT:    srli a0, a0, 32
80; RV64I-NEXT:    slli a1, a1, 32
81; RV64I-NEXT:    or a0, a1, a0
82; RV64I-NEXT:    ret
83;
84; RV64ZBKB-LABEL: pack_i64:
85; RV64ZBKB:       # %bb.0:
86; RV64ZBKB-NEXT:    pack a0, a0, a1
87; RV64ZBKB-NEXT:    ret
88  %shl = and i64 %a, 4294967295
89  %shl1 = shl i64 %b, 32
90  %or = or i64 %shl1, %shl
91  ret i64 %or
92}
93
94; FIXME: The slli+srli isn't needed with pack.
95define i64 @pack_i64_2(i32 signext %a, i32 signext %b) nounwind {
96; RV64I-LABEL: pack_i64_2:
97; RV64I:       # %bb.0:
98; RV64I-NEXT:    slli a0, a0, 32
99; RV64I-NEXT:    slli a1, a1, 32
100; RV64I-NEXT:    srli a0, a0, 32
101; RV64I-NEXT:    srli a1, a1, 32
102; RV64I-NEXT:    slli a1, a1, 32
103; RV64I-NEXT:    or a0, a1, a0
104; RV64I-NEXT:    ret
105;
106; RV64ZBKB-LABEL: pack_i64_2:
107; RV64ZBKB:       # %bb.0:
108; RV64ZBKB-NEXT:    slli a1, a1, 32
109; RV64ZBKB-NEXT:    srli a1, a1, 32
110; RV64ZBKB-NEXT:    pack a0, a0, a1
111; RV64ZBKB-NEXT:    ret
112  %zexta = zext i32 %a to i64
113  %zextb = zext i32 %b to i64
114  %shl1 = shl i64 %zextb, 32
115  %or = or i64 %shl1, %zexta
116  ret i64 %or
117}
118
119define i64 @pack_i64_3(ptr %0, ptr %1) {
120; RV64I-LABEL: pack_i64_3:
121; RV64I:       # %bb.0:
122; RV64I-NEXT:    lwu a0, 0(a0)
123; RV64I-NEXT:    lwu a1, 0(a1)
124; RV64I-NEXT:    slli a0, a0, 32
125; RV64I-NEXT:    or a0, a0, a1
126; RV64I-NEXT:    ret
127;
128; RV64ZBKB-LABEL: pack_i64_3:
129; RV64ZBKB:       # %bb.0:
130; RV64ZBKB-NEXT:    lwu a0, 0(a0)
131; RV64ZBKB-NEXT:    lwu a1, 0(a1)
132; RV64ZBKB-NEXT:    pack a0, a1, a0
133; RV64ZBKB-NEXT:    ret
134  %3 = load i32, ptr %0, align 4
135  %4 = zext i32 %3 to i64
136  %5 = shl i64 %4, 32
137  %6 = load i32, ptr %1, align 4
138  %7 = zext i32 %6 to i64
139  %8 = or i64 %5, %7
140  ret i64 %8
141}
142
143; FIXME: Use packh
144define signext i32 @packh_i32(i32 signext %a, i32 signext %b) nounwind {
145; RV64I-LABEL: packh_i32:
146; RV64I:       # %bb.0:
147; RV64I-NEXT:    lui a2, 16
148; RV64I-NEXT:    andi a0, a0, 255
149; RV64I-NEXT:    addiw a2, a2, -256
150; RV64I-NEXT:    slli a1, a1, 8
151; RV64I-NEXT:    and a1, a1, a2
152; RV64I-NEXT:    or a0, a1, a0
153; RV64I-NEXT:    ret
154;
155; RV64ZBKB-LABEL: packh_i32:
156; RV64ZBKB:       # %bb.0:
157; RV64ZBKB-NEXT:    lui a2, 16
158; RV64ZBKB-NEXT:    andi a0, a0, 255
159; RV64ZBKB-NEXT:    addiw a2, a2, -256
160; RV64ZBKB-NEXT:    slli a1, a1, 8
161; RV64ZBKB-NEXT:    and a1, a1, a2
162; RV64ZBKB-NEXT:    or a0, a1, a0
163; RV64ZBKB-NEXT:    ret
164  %and = and i32 %a, 255
165  %and1 = shl i32 %b, 8
166  %shl = and i32 %and1, 65280
167  %or = or i32 %shl, %and
168  ret i32 %or
169}
170
171define i32 @packh_i32_2(i32 %a, i32 %b) nounwind {
172; RV64I-LABEL: packh_i32_2:
173; RV64I:       # %bb.0:
174; RV64I-NEXT:    andi a0, a0, 255
175; RV64I-NEXT:    andi a1, a1, 255
176; RV64I-NEXT:    slli a1, a1, 8
177; RV64I-NEXT:    or a0, a1, a0
178; RV64I-NEXT:    ret
179;
180; RV64ZBKB-LABEL: packh_i32_2:
181; RV64ZBKB:       # %bb.0:
182; RV64ZBKB-NEXT:    packh a0, a0, a1
183; RV64ZBKB-NEXT:    ret
184  %and = and i32 %a, 255
185  %and1 = and i32 %b, 255
186  %shl = shl i32 %and1, 8
187  %or = or i32 %shl, %and
188  ret i32 %or
189}
190
191; FIXME: Use packh
192define i64 @packh_i64(i64 %a, i64 %b) nounwind {
193; RV64I-LABEL: packh_i64:
194; RV64I:       # %bb.0:
195; RV64I-NEXT:    lui a2, 16
196; RV64I-NEXT:    andi a0, a0, 255
197; RV64I-NEXT:    addiw a2, a2, -256
198; RV64I-NEXT:    slli a1, a1, 8
199; RV64I-NEXT:    and a1, a1, a2
200; RV64I-NEXT:    or a0, a1, a0
201; RV64I-NEXT:    ret
202;
203; RV64ZBKB-LABEL: packh_i64:
204; RV64ZBKB:       # %bb.0:
205; RV64ZBKB-NEXT:    lui a2, 16
206; RV64ZBKB-NEXT:    andi a0, a0, 255
207; RV64ZBKB-NEXT:    addiw a2, a2, -256
208; RV64ZBKB-NEXT:    slli a1, a1, 8
209; RV64ZBKB-NEXT:    and a1, a1, a2
210; RV64ZBKB-NEXT:    or a0, a1, a0
211; RV64ZBKB-NEXT:    ret
212  %and = and i64 %a, 255
213  %and1 = shl i64 %b, 8
214  %shl = and i64 %and1, 65280
215  %or = or i64 %shl, %and
216  ret i64 %or
217}
218
219define i64 @packh_i64_2(i64 %a, i64 %b) nounwind {
220; RV64I-LABEL: packh_i64_2:
221; RV64I:       # %bb.0:
222; RV64I-NEXT:    andi a0, a0, 255
223; RV64I-NEXT:    andi a1, a1, 255
224; RV64I-NEXT:    slli a1, a1, 8
225; RV64I-NEXT:    or a0, a1, a0
226; RV64I-NEXT:    ret
227;
228; RV64ZBKB-LABEL: packh_i64_2:
229; RV64ZBKB:       # %bb.0:
230; RV64ZBKB-NEXT:    packh a0, a0, a1
231; RV64ZBKB-NEXT:    ret
232  %and = and i64 %a, 255
233  %and1 = and i64 %b, 255
234  %shl = shl i64 %and1, 8
235  %or = or i64 %shl, %and
236  ret i64 %or
237}
238
239define zeroext i16 @packh_i16(i8 zeroext %a, i8 zeroext %b) nounwind {
240; RV64I-LABEL: packh_i16:
241; RV64I:       # %bb.0:
242; RV64I-NEXT:    slli a1, a1, 8
243; RV64I-NEXT:    or a0, a1, a0
244; RV64I-NEXT:    ret
245;
246; RV64ZBKB-LABEL: packh_i16:
247; RV64ZBKB:       # %bb.0:
248; RV64ZBKB-NEXT:    packh a0, a0, a1
249; RV64ZBKB-NEXT:    ret
250  %zext = zext i8 %a to i16
251  %zext1 = zext i8 %b to i16
252  %shl = shl i16 %zext1, 8
253  %or = or i16 %shl, %zext
254  ret i16 %or
255}
256
257define zeroext i16 @packh_i16_2(i8 zeroext %0, i8 zeroext %1, i8 zeroext %2) {
258; RV64I-LABEL: packh_i16_2:
259; RV64I:       # %bb.0:
260; RV64I-NEXT:    add a0, a1, a0
261; RV64I-NEXT:    andi a0, a0, 255
262; RV64I-NEXT:    slli a0, a0, 8
263; RV64I-NEXT:    or a0, a0, a2
264; RV64I-NEXT:    ret
265;
266; RV64ZBKB-LABEL: packh_i16_2:
267; RV64ZBKB:       # %bb.0:
268; RV64ZBKB-NEXT:    add a0, a1, a0
269; RV64ZBKB-NEXT:    packh a0, a2, a0
270; RV64ZBKB-NEXT:    ret
271  %4 = add i8 %1, %0
272  %5 = zext i8 %4 to i16
273  %6 = shl i16 %5, 8
274  %7 = zext i8 %2 to i16
275  %8 = or i16 %6, %7
276  ret i16 %8
277}
278
279define void @packh_i16_3(i8 zeroext %0, i8 zeroext %1, i8 zeroext %2, ptr %p) {
280; RV64I-LABEL: packh_i16_3:
281; RV64I:       # %bb.0:
282; RV64I-NEXT:    add a0, a1, a0
283; RV64I-NEXT:    andi a0, a0, 255
284; RV64I-NEXT:    slli a0, a0, 8
285; RV64I-NEXT:    or a0, a0, a2
286; RV64I-NEXT:    sh a0, 0(a3)
287; RV64I-NEXT:    ret
288;
289; RV64ZBKB-LABEL: packh_i16_3:
290; RV64ZBKB:       # %bb.0:
291; RV64ZBKB-NEXT:    add a0, a1, a0
292; RV64ZBKB-NEXT:    packh a0, a2, a0
293; RV64ZBKB-NEXT:    sh a0, 0(a3)
294; RV64ZBKB-NEXT:    ret
295  %4 = add i8 %1, %0
296  %5 = zext i8 %4 to i16
297  %6 = shl i16 %5, 8
298  %7 = zext i8 %2 to i16
299  %8 = or i16 %6, %7
300  store i16 %8, ptr %p
301  ret void
302}
303
304define i64 @pack_i64_allWUsers(i32 signext %0, i32 signext %1, i32 signext %2) {
305; RV64I-LABEL: pack_i64_allWUsers:
306; RV64I:       # %bb.0:
307; RV64I-NEXT:    add a0, a1, a0
308; RV64I-NEXT:    slli a2, a2, 32
309; RV64I-NEXT:    slli a0, a0, 32
310; RV64I-NEXT:    srli a0, a0, 32
311; RV64I-NEXT:    slli a0, a0, 32
312; RV64I-NEXT:    srli a2, a2, 32
313; RV64I-NEXT:    or a0, a0, a2
314; RV64I-NEXT:    ret
315;
316; RV64ZBKB-LABEL: pack_i64_allWUsers:
317; RV64ZBKB:       # %bb.0:
318; RV64ZBKB-NEXT:    add a0, a1, a0
319; RV64ZBKB-NEXT:    slli a0, a0, 32
320; RV64ZBKB-NEXT:    srli a0, a0, 32
321; RV64ZBKB-NEXT:    pack a0, a2, a0
322; RV64ZBKB-NEXT:    ret
323  %4 = add i32 %1, %0
324  %5 = zext i32 %4 to i64
325  %6 = shl i64 %5, 32
326  %7 = zext i32 %2 to i64
327  %8 = or i64 %6, %7
328  ret i64 %8
329}
330
331define signext i32 @pack_i32_allWUsers(i16 zeroext %0, i16 zeroext %1, i16 zeroext %2) {
332; RV64I-LABEL: pack_i32_allWUsers:
333; RV64I:       # %bb.0:
334; RV64I-NEXT:    add a0, a1, a0
335; RV64I-NEXT:    slli a0, a0, 48
336; RV64I-NEXT:    srli a0, a0, 48
337; RV64I-NEXT:    slli a0, a0, 16
338; RV64I-NEXT:    or a0, a0, a2
339; RV64I-NEXT:    sext.w a0, a0
340; RV64I-NEXT:    ret
341;
342; RV64ZBKB-LABEL: pack_i32_allWUsers:
343; RV64ZBKB:       # %bb.0:
344; RV64ZBKB-NEXT:    add a0, a1, a0
345; RV64ZBKB-NEXT:    zext.h a0, a0
346; RV64ZBKB-NEXT:    slli a0, a0, 16
347; RV64ZBKB-NEXT:    or a0, a0, a2
348; RV64ZBKB-NEXT:    sext.w a0, a0
349; RV64ZBKB-NEXT:    ret
350  %4 = add i16 %1, %0
351  %5 = zext i16 %4 to i32
352  %6 = shl i32 %5, 16
353  %7 = zext i16 %2 to i32
354  %8 = or i32 %6, %7
355  ret i32 %8
356}
357
358define i64 @pack_i64_imm() {
359; RV64I-LABEL: pack_i64_imm:
360; RV64I:       # %bb.0:
361; RV64I-NEXT:    lui a0, 65793
362; RV64I-NEXT:    slli a0, a0, 4
363; RV64I-NEXT:    addi a0, a0, 257
364; RV64I-NEXT:    slli a0, a0, 16
365; RV64I-NEXT:    addi a0, a0, 257
366; RV64I-NEXT:    slli a0, a0, 12
367; RV64I-NEXT:    addi a0, a0, 16
368; RV64I-NEXT:    ret
369;
370; RV64ZBKB-LABEL: pack_i64_imm:
371; RV64ZBKB:       # %bb.0:
372; RV64ZBKB-NEXT:    lui a0, 65793
373; RV64ZBKB-NEXT:    addi a0, a0, 16
374; RV64ZBKB-NEXT:    pack a0, a0, a0
375; RV64ZBKB-NEXT:    ret
376  ret i64 1157442765409226768 ; 0x0101010101010101
377}
378
379define i32 @zexth_i32(i32 %a) nounwind {
380; RV64I-LABEL: zexth_i32:
381; RV64I:       # %bb.0:
382; RV64I-NEXT:    slli a0, a0, 48
383; RV64I-NEXT:    srli a0, a0, 48
384; RV64I-NEXT:    ret
385;
386; RV64ZBKB-LABEL: zexth_i32:
387; RV64ZBKB:       # %bb.0:
388; RV64ZBKB-NEXT:    zext.h a0, a0
389; RV64ZBKB-NEXT:    ret
390  %and = and i32 %a, 65535
391  ret i32 %and
392}
393
394define i64 @zexth_i64(i64 %a) nounwind {
395; RV64I-LABEL: zexth_i64:
396; RV64I:       # %bb.0:
397; RV64I-NEXT:    slli a0, a0, 48
398; RV64I-NEXT:    srli a0, a0, 48
399; RV64I-NEXT:    ret
400;
401; RV64ZBKB-LABEL: zexth_i64:
402; RV64ZBKB:       # %bb.0:
403; RV64ZBKB-NEXT:    zext.h a0, a0
404; RV64ZBKB-NEXT:    ret
405  %and = and i64 %a, 65535
406  ret i64 %and
407}
408
409define i32 @zext_i16_to_i32(i16 %a) nounwind {
410; RV64I-LABEL: zext_i16_to_i32:
411; RV64I:       # %bb.0:
412; RV64I-NEXT:    slli a0, a0, 48
413; RV64I-NEXT:    srli a0, a0, 48
414; RV64I-NEXT:    ret
415;
416; RV64ZBKB-LABEL: zext_i16_to_i32:
417; RV64ZBKB:       # %bb.0:
418; RV64ZBKB-NEXT:    zext.h a0, a0
419; RV64ZBKB-NEXT:    ret
420  %1 = zext i16 %a to i32
421  ret i32 %1
422}
423
424define i64 @zext_i16_to_i64(i16 %a) nounwind {
425; RV64I-LABEL: zext_i16_to_i64:
426; RV64I:       # %bb.0:
427; RV64I-NEXT:    slli a0, a0, 48
428; RV64I-NEXT:    srli a0, a0, 48
429; RV64I-NEXT:    ret
430;
431; RV64ZBKB-LABEL: zext_i16_to_i64:
432; RV64ZBKB:       # %bb.0:
433; RV64ZBKB-NEXT:    zext.h a0, a0
434; RV64ZBKB-NEXT:    ret
435  %1 = zext i16 %a to i64
436  ret i64 %1
437}
438
439; This creates a i16->i32 G_ZEXT that we need to be able to select
440define i32 @zext_i16_i32_2(i1 %z, ptr %x, i32 %y) {
441; RV64I-LABEL: zext_i16_i32_2:
442; RV64I:       # %bb.0:
443; RV64I-NEXT:    andi a3, a0, 1
444; RV64I-NEXT:    bnez a3, .LBB20_2
445; RV64I-NEXT:  # %bb.1:
446; RV64I-NEXT:    mv a0, a2
447; RV64I-NEXT:    ret
448; RV64I-NEXT:  .LBB20_2:
449; RV64I-NEXT:    lh a0, 0(a1)
450; RV64I-NEXT:    slli a0, a0, 48
451; RV64I-NEXT:    srli a0, a0, 48
452; RV64I-NEXT:    ret
453;
454; RV64ZBKB-LABEL: zext_i16_i32_2:
455; RV64ZBKB:       # %bb.0:
456; RV64ZBKB-NEXT:    andi a3, a0, 1
457; RV64ZBKB-NEXT:    bnez a3, .LBB20_2
458; RV64ZBKB-NEXT:  # %bb.1:
459; RV64ZBKB-NEXT:    mv a0, a2
460; RV64ZBKB-NEXT:    ret
461; RV64ZBKB-NEXT:  .LBB20_2:
462; RV64ZBKB-NEXT:    lh a0, 0(a1)
463; RV64ZBKB-NEXT:    zext.h a0, a0
464; RV64ZBKB-NEXT:    ret
465  %w = load i16, ptr %x
466  %a = freeze i16 %w
467  %b = zext i16 %a to i32
468  %c = select i1 %z, i32 %b, i32 %y
469  ret i32 %c
470}
471
472; This creates a i16->i32 G_SEXT that we need to be able to select
473define i32 @sext_i16_i32(i1 %z, ptr %x, i32 %y) {
474; RV64I-LABEL: sext_i16_i32:
475; RV64I:       # %bb.0:
476; RV64I-NEXT:    andi a3, a0, 1
477; RV64I-NEXT:    bnez a3, .LBB21_2
478; RV64I-NEXT:  # %bb.1:
479; RV64I-NEXT:    mv a0, a2
480; RV64I-NEXT:    ret
481; RV64I-NEXT:  .LBB21_2:
482; RV64I-NEXT:    lh a0, 0(a1)
483; RV64I-NEXT:    slli a0, a0, 48
484; RV64I-NEXT:    srai a0, a0, 48
485; RV64I-NEXT:    ret
486;
487; RV64ZBKB-LABEL: sext_i16_i32:
488; RV64ZBKB:       # %bb.0:
489; RV64ZBKB-NEXT:    andi a3, a0, 1
490; RV64ZBKB-NEXT:    bnez a3, .LBB21_2
491; RV64ZBKB-NEXT:  # %bb.1:
492; RV64ZBKB-NEXT:    mv a0, a2
493; RV64ZBKB-NEXT:    ret
494; RV64ZBKB-NEXT:  .LBB21_2:
495; RV64ZBKB-NEXT:    lh a0, 0(a1)
496; RV64ZBKB-NEXT:    slli a0, a0, 48
497; RV64ZBKB-NEXT:    srai a0, a0, 48
498; RV64ZBKB-NEXT:    ret
499  %w = load i16, ptr %x
500  %a = freeze i16 %w
501  %b = sext i16 %a to i32
502  %c = select i1 %z, i32 %b, i32 %y
503  ret i32 %c
504}
505