xref: /inferno-os/libkern/vlop-sparc.s (revision 37da2899f40661e3e9631e497da8dc59b971cbd0)
1 *37da2899SCharles.ForsythTEXT	_mulv(SB), $0
2 *37da2899SCharles.Forsyth	MOVW	u1+8(FP), R8
3 *37da2899SCharles.Forsyth	MOVW	u2+16(FP), R13
4 *37da2899SCharles.Forsyth
5 *37da2899SCharles.Forsyth	MOVW	R13, R16		/* save low parts for later */
6 *37da2899SCharles.Forsyth	MOVW	R8, R12
7 *37da2899SCharles.Forsyth
8 *37da2899SCharles.Forsyth	/*
9 *37da2899SCharles.Forsyth	 * unsigned 32x32 => 64 multiply
10 *37da2899SCharles.Forsyth	 */
11 *37da2899SCharles.Forsyth	CMP	R13, R8
12 *37da2899SCharles.Forsyth	BLE	mul1
13 *37da2899SCharles.Forsyth	MOVW	R12, R13
14 *37da2899SCharles.Forsyth	MOVW	R16, R8
15 *37da2899SCharles.Forsythmul1:
16 *37da2899SCharles.Forsyth	MOVW	R13, Y
17 *37da2899SCharles.Forsyth	ANDNCC	$0xFFF, R13, R0
18 *37da2899SCharles.Forsyth	BE	mul_shortway
19 *37da2899SCharles.Forsyth	ANDCC	R0, R0, R9		/* zero partial product and clear N and V cond's */
20 *37da2899SCharles.Forsyth
21 *37da2899SCharles.Forsyth	/* long multiply */
22 *37da2899SCharles.Forsyth	MULSCC	R8, R9, R9		/* 0 */
23 *37da2899SCharles.Forsyth	MULSCC	R8, R9, R9		/* 1 */
24 *37da2899SCharles.Forsyth	MULSCC	R8, R9, R9		/* 2 */
25 *37da2899SCharles.Forsyth	MULSCC	R8, R9, R9		/* 3 */
26 *37da2899SCharles.Forsyth	MULSCC	R8, R9, R9		/* 4 */
27 *37da2899SCharles.Forsyth	MULSCC	R8, R9, R9		/* 5 */
28 *37da2899SCharles.Forsyth	MULSCC	R8, R9, R9		/* 6 */
29 *37da2899SCharles.Forsyth	MULSCC	R8, R9, R9		/* 7 */
30 *37da2899SCharles.Forsyth	MULSCC	R8, R9, R9		/* 8 */
31 *37da2899SCharles.Forsyth	MULSCC	R8, R9, R9		/* 9 */
32 *37da2899SCharles.Forsyth	MULSCC	R8, R9, R9		/* 10 */
33 *37da2899SCharles.Forsyth	MULSCC	R8, R9, R9		/* 11 */
34 *37da2899SCharles.Forsyth	MULSCC	R8, R9, R9		/* 12 */
35 *37da2899SCharles.Forsyth	MULSCC	R8, R9, R9		/* 13 */
36 *37da2899SCharles.Forsyth	MULSCC	R8, R9, R9		/* 14 */
37 *37da2899SCharles.Forsyth	MULSCC	R8, R9, R9		/* 15 */
38 *37da2899SCharles.Forsyth	MULSCC	R8, R9, R9		/* 16 */
39 *37da2899SCharles.Forsyth	MULSCC	R8, R9, R9		/* 17 */
40 *37da2899SCharles.Forsyth	MULSCC	R8, R9, R9		/* 18 */
41 *37da2899SCharles.Forsyth	MULSCC	R8, R9, R9		/* 19 */
42 *37da2899SCharles.Forsyth	MULSCC	R8, R9, R9		/* 20 */
43 *37da2899SCharles.Forsyth	MULSCC	R8, R9, R9		/* 21 */
44 *37da2899SCharles.Forsyth	MULSCC	R8, R9, R9		/* 22 */
45 *37da2899SCharles.Forsyth	MULSCC	R8, R9, R9		/* 23 */
46 *37da2899SCharles.Forsyth	MULSCC	R8, R9, R9		/* 24 */
47 *37da2899SCharles.Forsyth	MULSCC	R8, R9, R9		/* 25 */
48 *37da2899SCharles.Forsyth	MULSCC	R8, R9, R9		/* 26 */
49 *37da2899SCharles.Forsyth	MULSCC	R8, R9, R9		/* 27 */
50 *37da2899SCharles.Forsyth	MULSCC	R8, R9, R9		/* 28 */
51 *37da2899SCharles.Forsyth	MULSCC	R8, R9, R9		/* 29 */
52 *37da2899SCharles.Forsyth	MULSCC	R8, R9, R9		/* 30 */
53 *37da2899SCharles.Forsyth	MULSCC	R8, R9, R9		/* 31 */
54 *37da2899SCharles.Forsyth	MULSCC	R0, R9, R9		/* 32; shift only; r9 is high part */
55 *37da2899SCharles.Forsyth
56 *37da2899SCharles.Forsyth	/*
57 *37da2899SCharles.Forsyth	 * need to correct top word if top bit set
58 *37da2899SCharles.Forsyth	 */
59 *37da2899SCharles.Forsyth	CMP	R8, R0
60 *37da2899SCharles.Forsyth	BGE	mul_tstlow
61 *37da2899SCharles.Forsyth	ADD	R13, R9			/* adjust the high parts */
62 *37da2899SCharles.Forsyth
63 *37da2899SCharles.Forsythmul_tstlow:
64 *37da2899SCharles.Forsyth	MOVW	Y, R13			/* get low part */
65 *37da2899SCharles.Forsyth	BA	mul_done
66 *37da2899SCharles.Forsyth
67 *37da2899SCharles.Forsythmul_shortway:
68 *37da2899SCharles.Forsyth	ANDCC	R0, R0, R9		/* zero partial product and clear N and V cond's */
69 *37da2899SCharles.Forsyth	MULSCC	R8, R9, R9		/*  0 */
70 *37da2899SCharles.Forsyth	MULSCC	R8, R9, R9		/*  1 */
71 *37da2899SCharles.Forsyth	MULSCC	R8, R9, R9		/*  2 */
72 *37da2899SCharles.Forsyth	MULSCC	R8, R9, R9		/*  3 */
73 *37da2899SCharles.Forsyth	MULSCC	R8, R9, R9		/*  4 */
74 *37da2899SCharles.Forsyth	MULSCC	R8, R9, R9		/*  5 */
75 *37da2899SCharles.Forsyth	MULSCC	R8, R9, R9		/*  6 */
76 *37da2899SCharles.Forsyth	MULSCC	R8, R9, R9		/*  7 */
77 *37da2899SCharles.Forsyth	MULSCC	R8, R9, R9		/*  8 */
78 *37da2899SCharles.Forsyth	MULSCC	R8, R9, R9		/*  9 */
79 *37da2899SCharles.Forsyth	MULSCC	R8, R9, R9		/* 10 */
80 *37da2899SCharles.Forsyth	MULSCC	R8, R9, R9		/* 11 */
81 *37da2899SCharles.Forsyth	MULSCC	R0, R9, R9		/* 12; shift only; r9 is high part */
82 *37da2899SCharles.Forsyth
83 *37da2899SCharles.Forsyth	MOVW	Y, R8			/* make low part of partial low part & high part */
84 *37da2899SCharles.Forsyth	SLL	$12, R9, R13
85 *37da2899SCharles.Forsyth	SRL	$20, R8
86 *37da2899SCharles.Forsyth	OR	R8, R13
87 *37da2899SCharles.Forsyth
88 *37da2899SCharles.Forsyth	SRA	$20, R9			/* high part */
89 *37da2899SCharles.Forsyth
90 *37da2899SCharles.Forsythmul_done:
91 *37da2899SCharles.Forsyth
92 *37da2899SCharles.Forsyth	/*
93 *37da2899SCharles.Forsyth	 * mul by high halves if needed
94 *37da2899SCharles.Forsyth	 */
95 *37da2899SCharles.Forsyth	MOVW	R13, 4(R7)
96 *37da2899SCharles.Forsyth	MOVW	u2+12(FP), R11
97 *37da2899SCharles.Forsyth	CMP	R11, R0
98 *37da2899SCharles.Forsyth	BE	nomul1
99 *37da2899SCharles.Forsyth	MUL	R11, R12
100 *37da2899SCharles.Forsyth	ADD	R12, R9
101 *37da2899SCharles.Forsyth
102 *37da2899SCharles.Forsythnomul1:
103 *37da2899SCharles.Forsyth	MOVW	u1+4(FP), R11
104 *37da2899SCharles.Forsyth	CMP	R11, R0
105 *37da2899SCharles.Forsyth	BE	nomul2
106 *37da2899SCharles.Forsyth	MUL	R11, R16
107 *37da2899SCharles.Forsyth	ADD	R16, R9
108 *37da2899SCharles.Forsyth
109 *37da2899SCharles.Forsythnomul2:
110 *37da2899SCharles.Forsyth
111 *37da2899SCharles.Forsyth	MOVW	R9, 0(R7)
112 *37da2899SCharles.Forsyth	RETURN
113