xref: /llvm-project/llvm/test/CodeGen/RISCV/avgceilu.ll (revision 9122c5235ec85ce0c0ad337e862b006e7b349d84)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s | FileCheck -check-prefix=RV32I %s
3; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s | FileCheck -check-prefix=RV64I %s
4
5;
6; fixed avg(x,y) = sub(or(x,y),lshr(xor(x,y),1))
7;
8; ext avg(x,y) = trunc(lshr(add(zext(x),zext(y),1),1))
9;
10
11define i8 @test_fixed_i8(i8 %a0, i8 %a1) nounwind {
12; RV32I-LABEL: test_fixed_i8:
13; RV32I:       # %bb.0:
14; RV32I-NEXT:    andi a1, a1, 255
15; RV32I-NEXT:    andi a0, a0, 255
16; RV32I-NEXT:    add a0, a0, a1
17; RV32I-NEXT:    addi a0, a0, 1
18; RV32I-NEXT:    srli a0, a0, 1
19; RV32I-NEXT:    ret
20;
21; RV64I-LABEL: test_fixed_i8:
22; RV64I:       # %bb.0:
23; RV64I-NEXT:    andi a1, a1, 255
24; RV64I-NEXT:    andi a0, a0, 255
25; RV64I-NEXT:    add a0, a0, a1
26; RV64I-NEXT:    addi a0, a0, 1
27; RV64I-NEXT:    srli a0, a0, 1
28; RV64I-NEXT:    ret
29  %or = or i8 %a0, %a1
30  %xor = xor i8 %a0, %a1
31  %shift = lshr i8 %xor, 1
32  %res = sub i8 %or, %shift
33  ret i8 %res
34}
35
36define i8 @test_ext_i8(i8 %a0, i8 %a1) nounwind {
37; RV32I-LABEL: test_ext_i8:
38; RV32I:       # %bb.0:
39; RV32I-NEXT:    andi a1, a1, 255
40; RV32I-NEXT:    andi a0, a0, 255
41; RV32I-NEXT:    add a0, a0, a1
42; RV32I-NEXT:    addi a0, a0, 1
43; RV32I-NEXT:    srli a0, a0, 1
44; RV32I-NEXT:    ret
45;
46; RV64I-LABEL: test_ext_i8:
47; RV64I:       # %bb.0:
48; RV64I-NEXT:    andi a1, a1, 255
49; RV64I-NEXT:    andi a0, a0, 255
50; RV64I-NEXT:    add a0, a0, a1
51; RV64I-NEXT:    addi a0, a0, 1
52; RV64I-NEXT:    srli a0, a0, 1
53; RV64I-NEXT:    ret
54  %x0 = zext i8 %a0 to i16
55  %x1 = zext i8 %a1 to i16
56  %sum = add i16 %x0, %x1
57  %sum1 = add i16 %sum, 1
58  %shift = lshr i16 %sum1, 1
59  %res = trunc i16 %shift to i8
60  ret i8 %res
61}
62
63define i16 @test_fixed_i16(i16 %a0, i16 %a1) nounwind {
64; RV32I-LABEL: test_fixed_i16:
65; RV32I:       # %bb.0:
66; RV32I-NEXT:    lui a2, 16
67; RV32I-NEXT:    addi a2, a2, -1
68; RV32I-NEXT:    and a1, a1, a2
69; RV32I-NEXT:    and a0, a0, a2
70; RV32I-NEXT:    add a0, a0, a1
71; RV32I-NEXT:    addi a0, a0, 1
72; RV32I-NEXT:    srli a0, a0, 1
73; RV32I-NEXT:    ret
74;
75; RV64I-LABEL: test_fixed_i16:
76; RV64I:       # %bb.0:
77; RV64I-NEXT:    lui a2, 16
78; RV64I-NEXT:    addiw a2, a2, -1
79; RV64I-NEXT:    and a1, a1, a2
80; RV64I-NEXT:    and a0, a0, a2
81; RV64I-NEXT:    add a0, a0, a1
82; RV64I-NEXT:    addi a0, a0, 1
83; RV64I-NEXT:    srli a0, a0, 1
84; RV64I-NEXT:    ret
85  %or = or i16 %a0, %a1
86  %xor = xor i16 %a0, %a1
87  %shift = lshr i16 %xor, 1
88  %res = sub i16 %or, %shift
89  ret i16 %res
90}
91
92define i16 @test_ext_i16(i16 %a0, i16 %a1) nounwind {
93; RV32I-LABEL: test_ext_i16:
94; RV32I:       # %bb.0:
95; RV32I-NEXT:    lui a2, 16
96; RV32I-NEXT:    addi a2, a2, -1
97; RV32I-NEXT:    and a1, a1, a2
98; RV32I-NEXT:    and a0, a0, a2
99; RV32I-NEXT:    add a0, a0, a1
100; RV32I-NEXT:    addi a0, a0, 1
101; RV32I-NEXT:    srli a0, a0, 1
102; RV32I-NEXT:    ret
103;
104; RV64I-LABEL: test_ext_i16:
105; RV64I:       # %bb.0:
106; RV64I-NEXT:    lui a2, 16
107; RV64I-NEXT:    addiw a2, a2, -1
108; RV64I-NEXT:    and a1, a1, a2
109; RV64I-NEXT:    and a0, a0, a2
110; RV64I-NEXT:    add a0, a0, a1
111; RV64I-NEXT:    addi a0, a0, 1
112; RV64I-NEXT:    srli a0, a0, 1
113; RV64I-NEXT:    ret
114  %x0 = zext i16 %a0 to i32
115  %x1 = zext i16 %a1 to i32
116  %sum = add i32 %x0, %x1
117  %sum1 = add i32 %sum, 1
118  %shift = lshr i32 %sum1, 1
119  %res = trunc i32 %shift to i16
120  ret i16 %res
121}
122
123define i32 @test_fixed_i32(i32 %a0, i32 %a1) nounwind {
124; RV32I-LABEL: test_fixed_i32:
125; RV32I:       # %bb.0:
126; RV32I-NEXT:    or a2, a0, a1
127; RV32I-NEXT:    xor a0, a0, a1
128; RV32I-NEXT:    srli a0, a0, 1
129; RV32I-NEXT:    sub a0, a2, a0
130; RV32I-NEXT:    ret
131;
132; RV64I-LABEL: test_fixed_i32:
133; RV64I:       # %bb.0:
134; RV64I-NEXT:    slli a1, a1, 32
135; RV64I-NEXT:    slli a0, a0, 32
136; RV64I-NEXT:    srli a1, a1, 32
137; RV64I-NEXT:    srli a0, a0, 32
138; RV64I-NEXT:    add a0, a0, a1
139; RV64I-NEXT:    addi a0, a0, 1
140; RV64I-NEXT:    srli a0, a0, 1
141; RV64I-NEXT:    ret
142  %or = or i32 %a0, %a1
143  %xor = xor i32 %a1, %a0
144  %shift = lshr i32 %xor, 1
145  %res = sub i32 %or, %shift
146  ret i32 %res
147}
148
149define i32 @test_ext_i32(i32 %a0, i32 %a1) nounwind {
150; RV32I-LABEL: test_ext_i32:
151; RV32I:       # %bb.0:
152; RV32I-NEXT:    or a2, a0, a1
153; RV32I-NEXT:    xor a0, a0, a1
154; RV32I-NEXT:    srli a0, a0, 1
155; RV32I-NEXT:    sub a0, a2, a0
156; RV32I-NEXT:    ret
157;
158; RV64I-LABEL: test_ext_i32:
159; RV64I:       # %bb.0:
160; RV64I-NEXT:    slli a1, a1, 32
161; RV64I-NEXT:    slli a0, a0, 32
162; RV64I-NEXT:    srli a1, a1, 32
163; RV64I-NEXT:    srli a0, a0, 32
164; RV64I-NEXT:    add a0, a0, a1
165; RV64I-NEXT:    addi a0, a0, 1
166; RV64I-NEXT:    srli a0, a0, 1
167; RV64I-NEXT:    ret
168  %x0 = zext i32 %a0 to i64
169  %x1 = zext i32 %a1 to i64
170  %sum = add i64 %x0, %x1
171  %sum1 = add i64 %sum, 1
172  %shift = lshr i64 %sum1, 1
173  %res = trunc i64 %shift to i32
174  ret i32 %res
175}
176
177define i64 @test_fixed_i64(i64 %a0, i64 %a1) nounwind {
178; RV32I-LABEL: test_fixed_i64:
179; RV32I:       # %bb.0:
180; RV32I-NEXT:    or a4, a1, a3
181; RV32I-NEXT:    xor a1, a1, a3
182; RV32I-NEXT:    xor a3, a0, a2
183; RV32I-NEXT:    or a0, a0, a2
184; RV32I-NEXT:    srli a2, a1, 1
185; RV32I-NEXT:    slli a1, a1, 31
186; RV32I-NEXT:    srli a3, a3, 1
187; RV32I-NEXT:    sub a4, a4, a2
188; RV32I-NEXT:    or a3, a3, a1
189; RV32I-NEXT:    sltu a1, a0, a3
190; RV32I-NEXT:    sub a1, a4, a1
191; RV32I-NEXT:    sub a0, a0, a3
192; RV32I-NEXT:    ret
193;
194; RV64I-LABEL: test_fixed_i64:
195; RV64I:       # %bb.0:
196; RV64I-NEXT:    or a2, a0, a1
197; RV64I-NEXT:    xor a0, a0, a1
198; RV64I-NEXT:    srli a0, a0, 1
199; RV64I-NEXT:    sub a0, a2, a0
200; RV64I-NEXT:    ret
201  %or = or i64 %a0, %a1
202  %xor = xor i64 %a1, %a0
203  %shift = lshr i64 %xor, 1
204  %res = sub i64 %or, %shift
205  ret i64 %res
206}
207
208define i64 @test_ext_i64(i64 %a0, i64 %a1) nounwind {
209; RV32I-LABEL: test_ext_i64:
210; RV32I:       # %bb.0:
211; RV32I-NEXT:    or a4, a1, a3
212; RV32I-NEXT:    xor a1, a1, a3
213; RV32I-NEXT:    xor a3, a0, a2
214; RV32I-NEXT:    or a0, a0, a2
215; RV32I-NEXT:    srli a2, a1, 1
216; RV32I-NEXT:    slli a1, a1, 31
217; RV32I-NEXT:    srli a3, a3, 1
218; RV32I-NEXT:    sub a4, a4, a2
219; RV32I-NEXT:    or a3, a3, a1
220; RV32I-NEXT:    sltu a1, a0, a3
221; RV32I-NEXT:    sub a1, a4, a1
222; RV32I-NEXT:    sub a0, a0, a3
223; RV32I-NEXT:    ret
224;
225; RV64I-LABEL: test_ext_i64:
226; RV64I:       # %bb.0:
227; RV64I-NEXT:    or a2, a0, a1
228; RV64I-NEXT:    xor a0, a0, a1
229; RV64I-NEXT:    srli a0, a0, 1
230; RV64I-NEXT:    sub a0, a2, a0
231; RV64I-NEXT:    ret
232  %x0 = zext i64 %a0 to i128
233  %x1 = zext i64 %a1 to i128
234  %sum = add i128 %x0, %x1
235  %sum1 = add i128 %sum, 1
236  %shift = lshr i128 %sum1, 1
237  %res = trunc i128 %shift to i64
238  ret i64 %res
239}
240