xref: /netbsd-src/external/mit/isl/dist/doc/implementation.tex (revision 5971e316fdea024efff6be8f03536623db06833e)
1*5971e316Smrg\section{Sets and Relations}
2*5971e316Smrg
3*5971e316Smrg\begin{definition}[Polyhedral Set]
4*5971e316SmrgA {\em polyhedral set}\index{polyhedral set} $S$ is a finite union of basic sets
5*5971e316Smrg$S = \bigcup_i S_i$, each of which can be represented using affine
6*5971e316Smrgconstraints
7*5971e316Smrg$$
8*5971e316SmrgS_i : \Z^n \to 2^{\Z^d} : \vec s \mapsto
9*5971e316SmrgS_i(\vec s) =
10*5971e316Smrg\{\, \vec x \in \Z^d \mid \exists \vec z \in \Z^e :
11*5971e316SmrgA \vec x + B \vec s + D \vec z + \vec c \geq \vec 0 \,\}
12*5971e316Smrg,
13*5971e316Smrg$$
14*5971e316Smrgwith $A \in \Z^{m \times d}$,
15*5971e316Smrg$B \in \Z^{m \times n}$,
16*5971e316Smrg$D \in \Z^{m \times e}$
17*5971e316Smrgand $\vec c \in \Z^m$.
18*5971e316Smrg\end{definition}
19*5971e316Smrg
20*5971e316Smrg\begin{definition}[Parameter Domain of a Set]
21*5971e316SmrgLet $S \in \Z^n \to 2^{\Z^d}$ be a set.
22*5971e316SmrgThe {\em parameter domain} of $S$ is the set
23*5971e316Smrg$$\pdom S \coloneqq \{\, \vec s \in \Z^n \mid S(\vec s) \ne \emptyset \,\}.$$
24*5971e316Smrg\end{definition}
25*5971e316Smrg
26*5971e316Smrg\begin{definition}[Polyhedral Relation]
27*5971e316SmrgA {\em polyhedral relation}\index{polyhedral relation}
28*5971e316Smrg$R$ is a finite union of basic relations
29*5971e316Smrg$R = \bigcup_i R_i$ of type
30*5971e316Smrg$\Z^n \to 2^{\Z^{d_1+d_2}}$,
31*5971e316Smrgeach of which can be represented using affine
32*5971e316Smrgconstraints
33*5971e316Smrg$$
34*5971e316SmrgR_i = \vec s \mapsto
35*5971e316SmrgR_i(\vec s) =
36*5971e316Smrg\{\, \vec x_1 \to \vec x_2 \in \Z^{d_1} \times \Z^{d_2}
37*5971e316Smrg\mid \exists \vec z \in \Z^e :
38*5971e316SmrgA_1 \vec x_1 + A_2 \vec x_2 + B \vec s + D \vec z + \vec c \geq \vec 0 \,\}
39*5971e316Smrg,
40*5971e316Smrg$$
41*5971e316Smrgwith $A_i \in \Z^{m \times d_i}$,
42*5971e316Smrg$B \in \Z^{m \times n}$,
43*5971e316Smrg$D \in \Z^{m \times e}$
44*5971e316Smrgand $\vec c \in \Z^m$.
45*5971e316Smrg\end{definition}
46*5971e316Smrg
47*5971e316Smrg\begin{definition}[Parameter Domain of a Relation]
48*5971e316SmrgLet $R \in \Z^n \to 2^{\Z^{d+d}}$ be a relation.
49*5971e316SmrgThe {\em parameter domain} of $R$ is the set
50*5971e316Smrg$$\pdom R \coloneqq \{\, \vec s \in \Z^n \mid R(\vec s) \ne \emptyset \,\}.$$
51*5971e316Smrg\end{definition}
52*5971e316Smrg
53*5971e316Smrg\begin{definition}[Domain of a Relation]
54*5971e316SmrgLet $R \in \Z^n \to 2^{\Z^{d+d}}$ be a relation.
55*5971e316SmrgThe {\em domain} of $R$ is the polyhedral set
56*5971e316Smrg$$\domain R \coloneqq \vec s \mapsto
57*5971e316Smrg\{\, \vec x_1 \in \Z^{d_1} \mid \exists \vec x_2 \in \Z^{d_2} :
58*5971e316Smrg(\vec x_1, \vec x_2) \in R(\vec s) \,\}
59*5971e316Smrg.
60*5971e316Smrg$$
61*5971e316Smrg\end{definition}
62*5971e316Smrg
63*5971e316Smrg\begin{definition}[Range of a Relation]
64*5971e316SmrgLet $R \in \Z^n \to 2^{\Z^{d+d}}$ be a relation.
65*5971e316SmrgThe {\em range} of $R$ is the polyhedral set
66*5971e316Smrg$$
67*5971e316Smrg\range R \coloneqq \vec s \mapsto
68*5971e316Smrg\{\, \vec x_2 \in \Z^{d_2} \mid \exists \vec x_1 \in \Z^{d_1} :
69*5971e316Smrg(\vec x_1, \vec x_2) \in R(\vec s) \,\}
70*5971e316Smrg.
71*5971e316Smrg$$
72*5971e316Smrg\end{definition}
73*5971e316Smrg
74*5971e316Smrg\begin{definition}[Composition of Relations]
75*5971e316SmrgLet $R \in \Z^n \to 2^{\Z^{d_1+d_2}}$ and
76*5971e316Smrg$S \in \Z^n \to 2^{\Z^{d_2+d_3}}$ be two relations,
77*5971e316Smrgthen the composition of
78*5971e316Smrg$R$ and $S$ is defined as
79*5971e316Smrg$$
80*5971e316SmrgS \circ R \coloneqq
81*5971e316Smrg\vec s \mapsto
82*5971e316Smrg\{\, \vec x_1 \to \vec x_3 \in \Z^{d_1} \times \Z^{d_3}
83*5971e316Smrg\mid \exists \vec x_2 \in \Z^{d_2} :
84*5971e316Smrg\vec x_1 \to \vec x_2 \in R(\vec s) \wedge
85*5971e316Smrg\vec x_2 \to \vec x_3 \in S(\vec s)
86*5971e316Smrg\,\}
87*5971e316Smrg.
88*5971e316Smrg$$
89*5971e316Smrg\end{definition}
90*5971e316Smrg
91*5971e316Smrg\begin{definition}[Difference Set of a Relation]
92*5971e316SmrgLet $R \in \Z^n \to 2^{\Z^{d+d}}$ be a relation.
93*5971e316SmrgThe difference set ($\Delta \, R$) of $R$ is the set
94*5971e316Smrgof differences between image elements and the corresponding
95*5971e316Smrgdomain elements,
96*5971e316Smrg$$
97*5971e316Smrg\diff R \coloneqq
98*5971e316Smrg\vec s \mapsto
99*5971e316Smrg\{\, \vec \delta \in \Z^{d} \mid \exists \vec x \to \vec y \in R :
100*5971e316Smrg\vec \delta = \vec y - \vec x
101*5971e316Smrg\,\}
102*5971e316Smrg$$
103*5971e316Smrg\end{definition}
104*5971e316Smrg
105*5971e316Smrg\section{Simple Hull}\label{s:simple hull}
106*5971e316Smrg
107*5971e316SmrgIt is sometimes useful to have a single
108*5971e316Smrgbasic set or basic relation that contains a given set or relation.
109*5971e316SmrgFor rational sets, the obvious choice would be to compute the
110*5971e316Smrg(rational) convex hull.  For integer sets, the obvious choice
111*5971e316Smrgwould be the integer hull.
112*5971e316SmrgHowever, {\tt isl} currently does not support an integer hull operation
113*5971e316Smrgand even if it did, it would be fairly expensive to compute.
114*5971e316SmrgThe convex hull operation is supported, but it is also fairly
115*5971e316Smrgexpensive to compute given only an implicit representation.
116*5971e316Smrg
117*5971e316SmrgUsually, it is not required to compute the exact integer hull,
118*5971e316Smrgand an overapproximation of this hull is sufficient.
119*5971e316SmrgThe ``simple hull'' of a set is such an overapproximation
120*5971e316Smrgand it is defined as the (inclusion-wise) smallest basic set
121*5971e316Smrgthat is described by constraints that are translates of
122*5971e316Smrgthe constraints in the input set.
123*5971e316SmrgThis means that the simple hull is relatively cheap to compute
124*5971e316Smrgand that the number of constraints in the simple hull is no
125*5971e316Smrglarger than the number of constraints in the input.
126*5971e316Smrg\begin{definition}[Simple Hull of a Set]
127*5971e316SmrgThe {\em simple hull} of a set
128*5971e316Smrg$S = \bigcup_{1 \le i \le v} S_i$, with
129*5971e316Smrg$$
130*5971e316SmrgS : \Z^n \to 2^{\Z^d} : \vec s \mapsto
131*5971e316SmrgS(\vec s) =
132*5971e316Smrg\left\{\, \vec x \in \Z^d \mid \exists \vec z \in \Z^e :
133*5971e316Smrg\bigvee_{1 \le i \le v}
134*5971e316SmrgA_i \vec x + B_i \vec s + D_i \vec z + \vec c_i \geq \vec 0 \,\right\}
135*5971e316Smrg$$
136*5971e316Smrgis the set
137*5971e316Smrg$$
138*5971e316SmrgH : \Z^n \to 2^{\Z^d} : \vec s \mapsto
139*5971e316SmrgS(\vec s) =
140*5971e316Smrg\left\{\, \vec x \in \Z^d \mid \exists \vec z \in \Z^e :
141*5971e316Smrg\bigwedge_{1 \le i \le v}
142*5971e316SmrgA_i \vec x + B_i \vec s + D_i \vec z + \vec c_i + \vec K_i \geq \vec 0
143*5971e316Smrg\,\right\}
144*5971e316Smrg,
145*5971e316Smrg$$
146*5971e316Smrgwith $\vec K_i$ the (component-wise) smallest non-negative integer vectors
147*5971e316Smrgsuch that $S \subseteq H$.
148*5971e316Smrg\end{definition}
149*5971e316SmrgThe $\vec K_i$ can be obtained by solving a number of
150*5971e316SmrgLP problems, one for each element of each $\vec K_i$.
151*5971e316SmrgIf any LP problem is unbounded, then the corresponding constraint
152*5971e316Smrgis dropped.
153*5971e316Smrg
154*5971e316Smrg\section{Parametric Integer Programming}
155*5971e316Smrg
156*5971e316Smrg\subsection{Introduction}\label{s:intro}
157*5971e316Smrg
158*5971e316SmrgParametric integer programming \parencite{Feautrier88parametric}
159*5971e316Smrgis used to solve many problems within the context of the polyhedral model.
160*5971e316SmrgHere, we are mainly interested in dependence analysis \parencite{Fea91}
161*5971e316Smrgand in computing a unique representation for existentially quantified
162*5971e316Smrgvariables.  The latter operation has been used for counting elements
163*5971e316Smrgin sets involving such variables
164*5971e316Smrg\parencite{BouletRe98,Verdoolaege2005experiences} and lies at the core
165*5971e316Smrgof the internal representation of {\tt isl}.
166*5971e316Smrg
167*5971e316SmrgParametric integer programming was first implemented in \texttt{PipLib}.
168*5971e316SmrgAn alternative method for parametric integer programming
169*5971e316Smrgwas later implemented in {\tt barvinok} \cite{barvinok-0.22}.
170*5971e316SmrgThis method is not based on Feautrier's algorithm, but on rational
171*5971e316Smrggenerating functions \cite{Woods2003short} and was inspired by the
172*5971e316Smrg``digging'' technique of \textcite{DeLoera2004Three} for solving
173*5971e316Smrgnon-parametric integer programming problems.
174*5971e316Smrg
175*5971e316SmrgIn the following sections, we briefly recall the dual simplex
176*5971e316Smrgmethod combined with Gomory cuts and describe some extensions
177*5971e316Smrgand optimizations.  The main algorithm is applied to a matrix
178*5971e316Smrgdata structure known as a tableau.  In case of parametric problems,
179*5971e316Smrgthere are two tableaus, one for the main problem and one for
180*5971e316Smrgthe constraints on the parameters, known as the context tableau.
181*5971e316SmrgThe handling of the context tableau is described in \autoref{s:context}.
182*5971e316Smrg
183*5971e316Smrg\subsection{The Dual Simplex Method}
184*5971e316Smrg
185*5971e316SmrgTableaus can be represented in several slightly different ways.
186*5971e316SmrgIn {\tt isl}, the dual simplex method uses the same representation
187*5971e316Smrgas that used by its incremental LP solver based on the \emph{primal}
188*5971e316Smrgsimplex method.  The implementation of this LP solver is based
189*5971e316Smrgon that of {\tt Simplify} \parencite{Detlefs2005simplify}, which, in turn,
190*5971e316Smrgwas derived from the work of \textcite{Nelson1980phd}.
191*5971e316SmrgIn the original \parencite{Nelson1980phd}, the tableau was implemented
192*5971e316Smrgas a sparse matrix, but neither {\tt Simplify} nor the current
193*5971e316Smrgimplementation of {\tt isl} does so.
194*5971e316Smrg
195*5971e316SmrgGiven some affine constraints on the variables,
196*5971e316Smrg$A \vec x + \vec b \ge \vec 0$, the tableau represents the relationship
197*5971e316Smrgbetween the variables $\vec x$ and non-negative variables
198*5971e316Smrg$\vec y = A \vec x + \vec b$ corresponding to the constraints.
199*5971e316SmrgThe initial tableau contains $\begin{pmatrix}
200*5971e316Smrg\vec b & A
201*5971e316Smrg\end{pmatrix}$ and expresses the constraints $\vec y$ in the rows in terms
202*5971e316Smrgof the variables $\vec x$ in the columns.  The main operation defined
203*5971e316Smrgon a tableau exchanges a column and a row variable and is called a pivot.
204*5971e316SmrgDuring this process, some coefficients may become rational.
205*5971e316SmrgAs in the \texttt{PipLib} implementation,
206*5971e316Smrg{\tt isl} maintains a shared denominator per row.
207*5971e316SmrgThe sample value of a tableau is one where each column variable is assigned
208*5971e316Smrgzero and each row variable is assigned the constant term of the row.
209*5971e316SmrgThis sample value represents a valid solution if each constraint variable
210*5971e316Smrgis assigned a non-negative value, i.e., if the constant terms of
211*5971e316Smrgrows corresponding to constraints are all non-negative.
212*5971e316Smrg
213*5971e316SmrgThe dual simplex method starts from an initial sample value that
214*5971e316Smrgmay be invalid, but that is known to be (lexicographically) no
215*5971e316Smrggreater than any solution, and gradually increments this sample value
216*5971e316Smrgthrough pivoting until a valid solution is obtained.
217*5971e316SmrgIn particular, each pivot exchanges a row variable
218*5971e316Smrg$r = -n + \sum_i a_i \, c_i$ with negative
219*5971e316Smrgsample value $-n$ with a column variable $c_j$
220*5971e316Smrgsuch that $a_j > 0$.  Since $c_j = (n + r - \sum_{i\ne j} a_i \, c_i)/a_j$,
221*5971e316Smrgthe new row variable will have a positive sample value $n$.
222*5971e316SmrgIf no such column can be found, then the problem is infeasible.
223*5971e316SmrgBy always choosing the column that leads to the (lexicographically)
224*5971e316Smrgsmallest increment in the variables $\vec x$,
225*5971e316Smrgthe first solution found is guaranteed to be the (lexicographically)
226*5971e316Smrgminimal solution \cite{Feautrier88parametric}.
227*5971e316SmrgIn order to be able to determine the smallest increment, the tableau
228*5971e316Smrgis (implicitly) extended with extra rows defining the original
229*5971e316Smrgvariables in terms of the column variables.
230*5971e316SmrgIf we assume that all variables are non-negative, then we know
231*5971e316Smrgthat the zero vector is no greater than the minimal solution and
232*5971e316Smrgthen the initial extended tableau looks as follows.
233*5971e316Smrg$$
234*5971e316Smrg\begin{tikzpicture}
235*5971e316Smrg\matrix (m) [matrix of math nodes]
236*5971e316Smrg{
237*5971e316Smrg& {} & 1 & \vec c \\
238*5971e316Smrg\vec x && |(top)| \vec 0 & I \\
239*5971e316Smrg\vec r && \vec b & |(bottom)|A \\
240*5971e316Smrg};
241*5971e316Smrg\begin{pgfonlayer}{background}
242*5971e316Smrg\node (core) [inner sep=0pt,fill=black!20,right delimiter=),left delimiter=(,fit=(top)(bottom)] {};
243*5971e316Smrg\end{pgfonlayer}
244*5971e316Smrg\end{tikzpicture}
245*5971e316Smrg$$
246*5971e316SmrgEach column in this extended tableau is lexicographically positive
247*5971e316Smrgand will remain so because of the column choice explained above.
248*5971e316SmrgIt is then clear that the value of $\vec x$ will increase in each step.
249*5971e316SmrgNote that there is no need to store the extra rows explicitly.
250*5971e316SmrgIf a given $x_i$ is a column variable, then the corresponding row
251*5971e316Smrgis the unit vector $e_i$.  If, on the other hand, it is a row variable,
252*5971e316Smrgthen the row already appears somewhere else in the tableau.
253*5971e316Smrg
254*5971e316SmrgIn case of parametric problems, the sign of the constant term
255*5971e316Smrgmay depend on the parameters.  Each time the constant term of a constraint row
256*5971e316Smrgchanges, we therefore need to check whether the new term can attain
257*5971e316Smrgnegative and/or positive values over the current set of possible
258*5971e316Smrgparameter values, i.e., the context.
259*5971e316SmrgIf all these terms can only attain non-negative values, the current
260*5971e316Smrgstate of the tableau represents a solution.  If one of the terms
261*5971e316Smrgcan only attain non-positive values and is not identically zero,
262*5971e316Smrgthe corresponding row can be pivoted.
263*5971e316SmrgOtherwise, we pick one of the terms that can attain both positive
264*5971e316Smrgand negative values and split the context into a part where
265*5971e316Smrgit only attains non-negative values and a part where it only attains
266*5971e316Smrgnegative values.
267*5971e316Smrg
268*5971e316Smrg\subsection{Gomory Cuts}
269*5971e316Smrg
270*5971e316SmrgThe solution found by the dual simplex method may have
271*5971e316Smrgnon-integral coordinates.  If so, some rational solutions
272*5971e316Smrg(including the current sample value), can be cut off by
273*5971e316Smrgapplying a (parametric) Gomory cut.
274*5971e316SmrgLet $r = b(\vec p) + \sp {\vec a} {\vec c}$ be the row
275*5971e316Smrgcorresponding to the first non-integral coordinate of $\vec x$,
276*5971e316Smrgwith $b(\vec p)$ the constant term, an affine expression in the
277*5971e316Smrgparameters $\vec p$, i.e., $b(\vec p) = \sp {\vec f} {\vec p} + g$.
278*5971e316SmrgNote that only row variables can attain
279*5971e316Smrgnon-integral values as the sample value of the column variables is zero.
280*5971e316SmrgConsider the expression
281*5971e316Smrg$b(\vec p) - \ceil{b(\vec p)} + \sp {\fract{\vec a}} {\vec c}$,
282*5971e316Smrgwith $\ceil\cdot$ the ceiling function and $\fract\cdot$ the
283*5971e316Smrgfractional part.  This expression is negative at the sample value
284*5971e316Smrgsince $\vec c = \vec 0$ and $r = b(\vec p)$ is fractional, i.e.,
285*5971e316Smrg$\ceil{b(\vec p)} > b(\vec p)$.  On the other hand, for each integral
286*5971e316Smrgvalue of $r$ and $\vec c \ge 0$, the expression is non-negative
287*5971e316Smrgbecause $b(\vec p) - \ceil{b(\vec p)} > -1$.
288*5971e316SmrgImposing this expression to be non-negative therefore does not
289*5971e316Smrginvalidate any integral solutions, while it does cut away the current
290*5971e316Smrgfractional sample value.  To be able to formulate this constraint,
291*5971e316Smrga new variable $q = \floor{-b(\vec p)} = - \ceil{b(\vec p)}$ is added
292*5971e316Smrgto the context.  This integral variable is uniquely defined by the constraints
293*5971e316Smrg$0 \le -d \, b(\vec p) - d \, q \le d - 1$, with $d$ the common
294*5971e316Smrgdenominator of $\vec f$ and $g$.  In practice, the variable
295*5971e316Smrg$q' = \floor{\sp {\fract{-f}} {\vec p} + \fract{-g}}$ is used instead
296*5971e316Smrgand the coefficients of the new constraint are adjusted accordingly.
297*5971e316SmrgThe sign of the constant term of this new constraint need not be determined
298*5971e316Smrgas it is non-positive by construction.
299*5971e316SmrgWhen several of these extra context variables are added, it is important
300*5971e316Smrgto avoid adding duplicates.
301*5971e316SmrgRecent versions of {\tt PipLib} also check for such duplicates.
302*5971e316Smrg
303*5971e316Smrg\subsection{Negative Unknowns and Maximization}
304*5971e316Smrg
305*5971e316SmrgThere are two places in the above algorithm where the unknowns $\vec x$
306*5971e316Smrgare assumed to be non-negative: the initial tableau starts from
307*5971e316Smrgsample value $\vec x = \vec 0$ and $\vec c$ is assumed to be non-negative
308*5971e316Smrgduring the construction of Gomory cuts.
309*5971e316SmrgTo deal with negative unknowns, \textcite[Appendix A.2]{Fea91}
310*5971e316Smrgproposed to use a ``big parameter'', say $M$, that is taken to be
311*5971e316Smrgan arbitrarily large positive number.  Instead of looking for the
312*5971e316Smrglexicographically minimal value of $\vec x$, we search instead
313*5971e316Smrgfor the lexicographically minimal value of $\vec x' = \vec M + \vec x$.
314*5971e316SmrgThe sample value $\vec x' = \vec 0$ of the initial tableau then
315*5971e316Smrgcorresponds to $\vec x = -\vec M$, which is clearly not greater than
316*5971e316Smrgany potential solution.  The sign of the constant term of a row
317*5971e316Smrgis determined lexicographically, with the coefficient of $M$ considered
318*5971e316Smrgfirst.  That is, if the coefficient of $M$ is not zero, then its sign
319*5971e316Smrgis the sign of the entire term.  Otherwise, the sign is determined
320*5971e316Smrgby the remaining affine expression in the parameters.
321*5971e316SmrgIf the original problem has a bounded optimum, then the final sample
322*5971e316Smrgvalue will be of the form $\vec M + \vec v$ and the optimal value
323*5971e316Smrgof the original problem is then $\vec v$.
324*5971e316SmrgMaximization problems can be handled in a similar way by computing
325*5971e316Smrgthe minimum of $\vec M - \vec x$.
326*5971e316Smrg
327*5971e316SmrgWhen the optimum is unbounded, the optimal value computed for
328*5971e316Smrgthe original problem will involve the big parameter.
329*5971e316SmrgIn the original implementation of {\tt PipLib}, the big parameter could
330*5971e316Smrgeven appear in some of the extra variables $\vec q$ created during
331*5971e316Smrgthe application of a Gomory cut.  The final result could then contain
332*5971e316Smrgimplicit conditions on the big parameter through conditions on such
333*5971e316Smrg$\vec q$ variables.  This problem was resolved in later versions
334*5971e316Smrgof {\tt PipLib} by taking $M$ to be divisible by any positive number.
335*5971e316SmrgThe big parameter can then never appear in any $\vec q$ because
336*5971e316Smrg$\fract {\alpha M } = 0$.  It should be noted, though, that an unbounded
337*5971e316Smrgproblem usually (but not always)
338*5971e316Smrgindicates an incorrect formulation of the problem.
339*5971e316Smrg
340*5971e316SmrgThe original version of {\tt PipLib} required the user to ``manually''
341*5971e316Smrgadd a big parameter, perform the reformulation and interpret the result
342*5971e316Smrg\parencite{Feautrier02}.  Recent versions allow the user to simply
343*5971e316Smrgspecify that the unknowns may be negative or that the maximum should
344*5971e316Smrgbe computed and then these transformations are performed internally.
345*5971e316SmrgAlthough there are some application, e.g.,
346*5971e316Smrgthat of \textcite{Feautrier92multi},
347*5971e316Smrgwhere it is useful to have explicit control over the big parameter,
348*5971e316Smrgnegative unknowns and maximization are by far the most common applications
349*5971e316Smrgof the big parameter and we believe that the user should not be bothered
350*5971e316Smrgwith such implementation issues.
351*5971e316SmrgThe current version of {\tt isl} therefore does not
352*5971e316Smrgprovide any interface for specifying big parameters.  Instead, the user
353*5971e316Smrgcan specify whether a maximum needs to be computed and no assumptions
354*5971e316Smrgare made on the sign of the unknowns.  Instead, the sign of the unknowns
355*5971e316Smrgis checked internally and a big parameter is automatically introduced when
356*5971e316Smrgneeded.  For compatibility with {\tt PipLib}, the {\tt isl\_pip} tool
357*5971e316Smrgdoes explicitly add non-negativity constraints on the unknowns unless
358*5971e316Smrgthe \verb+Urs_unknowns+ option is specified.
359*5971e316SmrgCurrently, there is also no way in {\tt isl} of expressing a big
360*5971e316Smrgparameter in the output.  Even though
361*5971e316Smrg{\tt isl} makes the same divisibility assumption on the big parameter
362*5971e316Smrgas recent versions of {\tt PipLib}, it will therefore eventually
363*5971e316Smrgproduce an error if the problem turns out to be unbounded.
364*5971e316Smrg
365*5971e316Smrg\subsection{Preprocessing}
366*5971e316Smrg
367*5971e316SmrgIn this section, we describe some transformations that are
368*5971e316Smrgor can be applied in advance to reduce the running time
369*5971e316Smrgof the actual dual simplex method with Gomory cuts.
370*5971e316Smrg
371*5971e316Smrg\subsubsection{Feasibility Check and Detection of Equalities}
372*5971e316Smrg
373*5971e316SmrgExperience with the original {\tt PipLib} has shown that Gomory cuts
374*5971e316Smrgdo not perform very well on problems that are (non-obviously) empty,
375*5971e316Smrgi.e., problems with rational solutions, but no integer solutions.
376*5971e316SmrgIn {\tt isl}, we therefore first perform a feasibility check on
377*5971e316Smrgthe original problem considered as a non-parametric problem
378*5971e316Smrgover the combined space of unknowns and parameters.
379*5971e316SmrgIn fact, we do not simply check the feasibility, but we also
380*5971e316Smrgcheck for implicit equalities among the integer points by computing
381*5971e316Smrgthe integer affine hull.  The algorithm used is the same as that
382*5971e316Smrgdescribed in \autoref{s:GBR} below.
383*5971e316SmrgComputing the affine hull is fairly expensive, but it can
384*5971e316Smrgbring huge benefits if any equalities can be found or if the problem
385*5971e316Smrgturns out to be empty.
386*5971e316Smrg
387*5971e316Smrg\subsubsection{Constraint Simplification}
388*5971e316Smrg
389*5971e316SmrgIf the coefficients of the unknown and parameters in a constraint
390*5971e316Smrghave a common factor, then this factor should be removed, possibly
391*5971e316Smrgrounding down the constant term.  For example, the constraint
392*5971e316Smrg$2 x - 5 \ge 0$ should be simplified to $x - 3 \ge 0$.
393*5971e316Smrg{\tt isl} performs such simplifications on all sets and relations.
394*5971e316SmrgRecent versions of {\tt PipLib} also perform this simplification
395*5971e316Smrgon the input.
396*5971e316Smrg
397*5971e316Smrg\subsubsection{Exploiting Equalities}\label{s:equalities}
398*5971e316Smrg
399*5971e316SmrgIf there are any (explicit) equalities in the input description,
400*5971e316Smrg{\tt PipLib} converts each into a pair of inequalities.
401*5971e316SmrgIt is also possible to write $r$ equalities as $r+1$ inequalities
402*5971e316Smrg\parencite{Feautrier02}, but it is even better to \emph{exploit} the
403*5971e316Smrgequalities to reduce the dimensionality of the problem.
404*5971e316SmrgGiven an equality involving at least one unknown, we pivot
405*5971e316Smrgthe row corresponding to the equality with the column corresponding
406*5971e316Smrgto the last unknown with non-zero coefficient.  The new column variable
407*5971e316Smrgcan then be removed completely because it is identically zero,
408*5971e316Smrgthereby reducing the dimensionality of the problem by one.
409*5971e316SmrgThe last unknown is chosen to ensure that the columns of the initial
410*5971e316Smrgtableau remain lexicographically positive.  In particular, if
411*5971e316Smrgthe equality is of the form $b + \sum_{i \le j} a_i \, x_i = 0$ with
412*5971e316Smrg$a_j \ne 0$, then the (implicit) top rows of the initial tableau
413*5971e316Smrgare changed as follows
414*5971e316Smrg$$
415*5971e316Smrg\begin{tikzpicture}
416*5971e316Smrg\matrix [matrix of math nodes]
417*5971e316Smrg{
418*5971e316Smrg & {} & |(top)| 0 & I_1 & |(j)| &  \\
419*5971e316Smrgj && 0 & & 1 & \\
420*5971e316Smrg  && 0 & & & |(bottom)|I_2 \\
421*5971e316Smrg};
422*5971e316Smrg\node[overlay,above=2mm of j,anchor=south]{j};
423*5971e316Smrg\begin{pgfonlayer}{background}
424*5971e316Smrg\node (m) [inner sep=0pt,fill=black!20,right delimiter=),left delimiter=(,fit=(top)(bottom)] {};
425*5971e316Smrg\end{pgfonlayer}
426*5971e316Smrg\begin{scope}[xshift=4cm]
427*5971e316Smrg\matrix [matrix of math nodes]
428*5971e316Smrg{
429*5971e316Smrg & {} & |(top)| 0 & I_1 &  \\
430*5971e316Smrgj && |(left)| -b/a_j & -a_i/a_j & \\
431*5971e316Smrg  && 0 & & |(bottom)|I_2 \\
432*5971e316Smrg};
433*5971e316Smrg\begin{pgfonlayer}{background}
434*5971e316Smrg\node (m2) [inner sep=0pt,fill=black!20,right delimiter=),left delimiter=(,fit=(top)(bottom)(left)] {};
435*5971e316Smrg\end{pgfonlayer}
436*5971e316Smrg\end{scope}
437*5971e316Smrg \draw [shorten >=7mm,-to,thick,decorate,
438*5971e316Smrg        decoration={snake,amplitude=.4mm,segment length=2mm,
439*5971e316Smrg                    pre=moveto,pre length=5mm,post length=8mm}]
440*5971e316Smrg   (m) -- (m2);
441*5971e316Smrg\end{tikzpicture}
442*5971e316Smrg$$
443*5971e316SmrgCurrently, {\tt isl} also eliminates equalities involving only parameters
444*5971e316Smrgin a similar way, provided at least one of the coefficients is equal to one.
445*5971e316SmrgThe application of parameter compression (see below)
446*5971e316Smrgwould obviate the need for removing parametric equalities.
447*5971e316Smrg
448*5971e316Smrg\subsubsection{Offline Symmetry Detection}\label{s:offline}
449*5971e316Smrg
450*5971e316SmrgSome problems, notably those of \textcite{Bygde2010licentiate},
451*5971e316Smrghave a collection of constraints, say
452*5971e316Smrg$b_i(\vec p) + \sp {\vec a} {\vec x} \ge 0$,
453*5971e316Smrgthat only differ in their (parametric) constant terms.
454*5971e316SmrgThese constant terms will be non-negative on different parts
455*5971e316Smrgof the context and this context may have to be split for each
456*5971e316Smrgof the constraints.  In the worst case, the basic algorithm may
457*5971e316Smrghave to consider all possible orderings of the constant terms.
458*5971e316SmrgInstead, {\tt isl} introduces a new parameter, say $u$, and
459*5971e316Smrgreplaces the collection of constraints by the single
460*5971e316Smrgconstraint $u + \sp {\vec a} {\vec x} \ge 0$ along with
461*5971e316Smrgcontext constraints $u \le b_i(\vec p)$.
462*5971e316SmrgAny solution to the new system is also a solution
463*5971e316Smrgto the original system since
464*5971e316Smrg$\sp {\vec a} {\vec x} \ge -u \ge -b_i(\vec p)$.
465*5971e316SmrgConversely, $m = \min_i b_i(\vec p)$ satisfies the constraints
466*5971e316Smrgon $u$ and therefore extends a solution to the new system.
467*5971e316SmrgIt can also be plugged into a new solution.
468*5971e316SmrgSee \autoref{s:post} for how this substitution is currently performed
469*5971e316Smrgin {\tt isl}.
470*5971e316SmrgThe method described in this section can only detect symmetries
471*5971e316Smrgthat are explicitly available in the input.
472*5971e316SmrgSee \autoref{s:online} for the detection
473*5971e316Smrgand exploitation of symmetries that appear during the course of
474*5971e316Smrgthe dual simplex method.
475*5971e316Smrg
476*5971e316SmrgNote that the replacement of the $b_i(\vec p)$ by $u$ may lose
477*5971e316Smrginformation if the parameters that occur in $b_i(\vec p)$ also
478*5971e316Smrgoccur in other constraints.  The replacement is therefore currently
479*5971e316Smrgonly applied when all the parameters in all of the $b_i(\vec p)$
480*5971e316Smrgonly occur in a single constraint, i.e., the one in which
481*5971e316Smrgthe parameter is removed.
482*5971e316SmrgThis is the case for the examples from \textcite{Bygde2010licentiate}
483*5971e316Smrgin \autoref{t:comparison}.
484*5971e316SmrgThe version of {\tt isl} that was used during the experiments
485*5971e316Smrgof \autoref{s:pip:experiments} did not take into account
486*5971e316Smrgthis single-occurrence constraint.
487*5971e316Smrg
488*5971e316Smrg\subsubsection{Parameter Compression}\label{s:compression}
489*5971e316Smrg
490*5971e316SmrgIt may in some cases be apparent from the equalities in the problem
491*5971e316Smrgdescription that there can only be a solution for a sublattice
492*5971e316Smrgof the parameters.  In such cases ``parameter compression''
493*5971e316Smrg\parencite{Meister2004PhD,Meister2008} can be used to replace
494*5971e316Smrgthe parameters by alternative ``dense'' parameters.
495*5971e316SmrgFor example, if there is a constraint $2x = n$, then the system
496*5971e316Smrgwill only have solutions for even values of $n$ and $n$ can be replaced
497*5971e316Smrgby $2n'$.  Similarly, the parameters $n$ and $m$ in a system with
498*5971e316Smrgthe constraint $2n = 3m$ can be replaced by a single parameter $n'$
499*5971e316Smrgwith $n=3n'$ and $m=2n'$.
500*5971e316SmrgIt is also possible to perform a similar compression on the unknowns,
501*5971e316Smrgbut it would be more complicated as the compression would have to
502*5971e316Smrgpreserve the lexicographical order.  Moreover, due to our handling
503*5971e316Smrgof equalities described above there should be
504*5971e316Smrgno need for such variable compression.
505*5971e316SmrgAlthough parameter compression has been implemented in {\tt isl},
506*5971e316Smrgit is currently not yet used during parametric integer programming.
507*5971e316Smrg
508*5971e316Smrg\subsection{Postprocessing}\label{s:post}
509*5971e316Smrg
510*5971e316SmrgThe output of {\tt PipLib} is a quast (quasi-affine selection tree).
511*5971e316SmrgEach internal node in this tree corresponds to a split of the context
512*5971e316Smrgbased on a parametric constant term in the main tableau with indeterminate
513*5971e316Smrgsign.  Each of these nodes may introduce extra variables in the context
514*5971e316Smrgcorresponding to integer divisions.  Each leaf of the tree prescribes
515*5971e316Smrgthe solution in that part of the context that satisfies all the conditions
516*5971e316Smrgon the path leading to the leaf.
517*5971e316SmrgSuch a quast is a very economical way of representing the solution, but
518*5971e316Smrgit would not be suitable as the (only) internal representation of
519*5971e316Smrgsets and relations in {\tt isl}.  Instead, {\tt isl} represents
520*5971e316Smrgthe constraints of a set or relation in disjunctive normal form.
521*5971e316SmrgThe result of a parametric integer programming problem is then also
522*5971e316Smrgconverted to this internal representation.  Unfortunately, the conversion
523*5971e316Smrgto disjunctive normal form can lead to an explosion of the size
524*5971e316Smrgof the representation.
525*5971e316SmrgIn some cases, this overhead would have to be paid anyway in subsequent
526*5971e316Smrgoperations, but in other cases, especially for outside users that just
527*5971e316Smrgwant to solve parametric integer programming problems, we would like
528*5971e316Smrgto avoid this overhead in future.  That is, we are planning on introducing
529*5971e316Smrgquasts or a related representation as one of several possible internal
530*5971e316Smrgrepresentations and on allowing the output of {\tt isl\_pip} to optionally
531*5971e316Smrgbe printed as a quast.
532*5971e316Smrg
533*5971e316SmrgCurrently, {\tt isl} also does not have an internal representation
534*5971e316Smrgfor expressions such as $\min_i b_i(\vec p)$ from the offline
535*5971e316Smrgsymmetry detection of \autoref{s:offline}.
536*5971e316SmrgAssume that one of these expressions has $n$ bounds $b_i(\vec p)$.
537*5971e316SmrgIf the expression
538*5971e316Smrgdoes not appear in the affine expression describing the solution,
539*5971e316Smrgbut only in the constraints, and if moreover, the expression
540*5971e316Smrgonly appears with a positive coefficient, i.e.,
541*5971e316Smrg$\min_i b_i(\vec p) \ge f_j(\vec p)$, then each of these constraints
542*5971e316Smrgcan simply be reduplicated $n$ times, once for each of the bounds.
543*5971e316SmrgOtherwise, a conversion to disjunctive normal form
544*5971e316Smrgleads to $n$ cases, each described as $u = b_i(\vec p)$ with constraints
545*5971e316Smrg$b_i(\vec p) \le b_j(\vec p)$ for $j > i$
546*5971e316Smrgand
547*5971e316Smrg$b_i(\vec p)  < b_j(\vec p)$ for $j < i$.
548*5971e316SmrgNote that even though this conversion leads to a size increase
549*5971e316Smrgby a factor of $n$, not detecting the symmetry could lead to
550*5971e316Smrgan increase by a factor of $n!$ if all possible orderings end up being
551*5971e316Smrgconsidered.
552*5971e316Smrg
553*5971e316Smrg\subsection{Context Tableau}\label{s:context}
554*5971e316Smrg
555*5971e316SmrgThe main operation that a context tableau needs to provide is a test
556*5971e316Smrgon the sign of an affine expression over the elements of the context.
557*5971e316SmrgThis sign can be determined by solving two integer linear feasibility
558*5971e316Smrgproblems, one with a constraint added to the context that enforces
559*5971e316Smrgthe expression to be non-negative and one where the expression is
560*5971e316Smrgnegative.  As already mentioned by \textcite{Feautrier88parametric},
561*5971e316Smrgany integer linear feasibility solver could be used, but the {\tt PipLib}
562*5971e316Smrgimplementation uses a recursive call to the dual simplex with Gomory
563*5971e316Smrgcuts algorithm to determine the feasibility of a context.
564*5971e316SmrgIn {\tt isl}, two ways of handling the context have been implemented,
565*5971e316Smrgone that performs the recursive call and one, used by default, that
566*5971e316Smrguses generalized basis reduction.
567*5971e316SmrgWe start with some optimizations that are shared between the two
568*5971e316Smrgimplementations and then discuss additional details of each of them.
569*5971e316Smrg
570*5971e316Smrg\subsubsection{Maintaining Witnesses}\label{s:witness}
571*5971e316Smrg
572*5971e316SmrgA common feature of both integer linear feasibility solvers is that
573*5971e316Smrgthey will not only say whether a set is empty or not, but if the set
574*5971e316Smrgis non-empty, they will also provide a \emph{witness} for this result,
575*5971e316Smrgi.e., a point that belongs to the set.  By maintaining a list of such
576*5971e316Smrgwitnesses, we can avoid many feasibility tests during the determination
577*5971e316Smrgof the signs of affine expressions.  In particular, if the expression
578*5971e316Smrgevaluates to a positive number on some of these points and to a negative
579*5971e316Smrgnumber on some others, then no feasibility test needs to be performed.
580*5971e316SmrgIf all the evaluations are non-negative, we only need to check for the
581*5971e316Smrgpossibility of a negative value and similarly in case of all
582*5971e316Smrgnon-positive evaluations.  Finally, in the rare case that all points
583*5971e316Smrgevaluate to zero or at the start, when no points have been collected yet,
584*5971e316Smrgone or two feasibility tests need to be performed depending on the result
585*5971e316Smrgof the first test.
586*5971e316Smrg
587*5971e316SmrgWhen a new constraint is added to the context, the points that
588*5971e316Smrgviolate the constraint are temporarily removed.  They are reconsidered
589*5971e316Smrgwhen we backtrack over the addition of the constraint, as they will
590*5971e316Smrgsatisfy the negation of the constraint.  It is only when we backtrack
591*5971e316Smrgover the addition of the points that they are finally removed completely.
592*5971e316SmrgWhen an extra integer division is added to the context,
593*5971e316Smrgthe new coordinates of the
594*5971e316Smrgwitnesses can easily be computed by evaluating the integer division.
595*5971e316SmrgThe idea of keeping track of witnesses was first used in {\tt barvinok}.
596*5971e316Smrg
597*5971e316Smrg\subsubsection{Choice of Constant Term on which to Split}
598*5971e316Smrg
599*5971e316SmrgRecall that if there are no rows with a non-positive constant term,
600*5971e316Smrgbut there are rows with an indeterminate sign, then the context
601*5971e316Smrgneeds to be split along the constant term of one of these rows.
602*5971e316SmrgIf there is more than one such row, then we need to choose which row
603*5971e316Smrgto split on first.  {\tt PipLib} uses a heuristic based on the (absolute)
604*5971e316Smrgsizes of the coefficients.  In particular, it takes the largest coefficient
605*5971e316Smrgof each row and then selects the row where this largest coefficient is smaller
606*5971e316Smrgthan those of the other rows.
607*5971e316Smrg
608*5971e316SmrgIn {\tt isl}, we take that row for which non-negativity of its constant
609*5971e316Smrgterm implies non-negativity of as many of the constant terms of the other
610*5971e316Smrgrows as possible.  The intuition behind this heuristic is that on the
611*5971e316Smrgpositive side, we will have fewer negative and indeterminate signs,
612*5971e316Smrgwhile on the negative side, we need to perform a pivot, which may
613*5971e316Smrgaffect any number of rows meaning that the effect on the signs
614*5971e316Smrgis difficult to predict.  This heuristic is of course much more
615*5971e316Smrgexpensive to evaluate than the heuristic used by {\tt PipLib}.
616*5971e316SmrgMore extensive tests are needed to evaluate whether the heuristic is worthwhile.
617*5971e316Smrg
618*5971e316Smrg\subsubsection{Dual Simplex + Gomory Cuts}
619*5971e316Smrg
620*5971e316SmrgWhen a new constraint is added to the context, the first steps
621*5971e316Smrgof the dual simplex method applied to this new context will be the same
622*5971e316Smrgor at least very similar to those taken on the original context, i.e.,
623*5971e316Smrgbefore the constraint was added.  In {\tt isl}, we therefore apply
624*5971e316Smrgthe dual simplex method incrementally on the context and backtrack
625*5971e316Smrgto a previous state when a constraint is removed again.
626*5971e316SmrgAn initial implementation that was never made public would also
627*5971e316Smrgkeep the Gomory cuts, but the current implementation backtracks
628*5971e316Smrgto before the point where Gomory cuts are added before adding
629*5971e316Smrgan extra constraint to the context.
630*5971e316SmrgKeeping the Gomory cuts has the advantage that the sample value
631*5971e316Smrgis always an integer point and that this point may also satisfy
632*5971e316Smrgthe new constraint.  However, due to the technique of maintaining
633*5971e316Smrgwitnesses explained above,
634*5971e316Smrgwe would not perform a feasibility test in such cases and then
635*5971e316Smrgthe previously added cuts may be redundant, possibly resulting
636*5971e316Smrgin an accumulation of a large number of cuts.
637*5971e316Smrg
638*5971e316SmrgIf the parameters may be negative, then the same big parameter trick
639*5971e316Smrgused in the main tableau is applied to the context.  This big parameter
640*5971e316Smrgis of course unrelated to the big parameter from the main tableau.
641*5971e316SmrgNote that it is not a requirement for this parameter to be ``big'',
642*5971e316Smrgbut it does allow for some code reuse in {\tt isl}.
643*5971e316SmrgIn {\tt PipLib}, the extra parameter is not ``big'', but this may be because
644*5971e316Smrgthe big parameter of the main tableau also appears
645*5971e316Smrgin the context tableau.
646*5971e316Smrg
647*5971e316SmrgFinally, it was reported by \textcite{Galea2009personal}, who
648*5971e316Smrgworked on a parametric integer programming implementation
649*5971e316Smrgin {\tt PPL} \parencite{PPL},
650*5971e316Smrgthat it is beneficial to add cuts for \emph{all} rational coordinates
651*5971e316Smrgin the context tableau.  Based on this report,
652*5971e316Smrgthe initial {\tt isl} implementation was adapted accordingly.
653*5971e316Smrg
654*5971e316Smrg\subsubsection{Generalized Basis Reduction}\label{s:GBR}
655*5971e316Smrg
656*5971e316SmrgThe default algorithm used in {\tt isl} for feasibility checking
657*5971e316Smrgis generalized basis reduction \parencite{Cook1991implementation}.
658*5971e316SmrgThis algorithm is also used in the {\tt barvinok} implementation.
659*5971e316SmrgThe algorithm is fairly robust, but it has some overhead.
660*5971e316SmrgWe therefore try to avoid calling the algorithm in easy cases.
661*5971e316SmrgIn particular, we incrementally keep track of points for which
662*5971e316Smrgthe entire unit hypercube positioned at that point lies in the context.
663*5971e316SmrgThis set is described by translates of the constraints of the context
664*5971e316Smrgand if (rationally) non-empty, any rational point
665*5971e316Smrgin the set can be rounded up to yield an integer point in the context.
666*5971e316Smrg
667*5971e316SmrgA restriction of the algorithm is that it only works on bounded sets.
668*5971e316SmrgThe affine hull of the recession cone therefore needs to be projected
669*5971e316Smrgout first.  As soon as the algorithm is invoked, we then also
670*5971e316Smrgincrementally keep track of this recession cone.  The reduced basis
671*5971e316Smrgfound by one call of the algorithm is also reused as initial basis
672*5971e316Smrgfor the next call.
673*5971e316Smrg
674*5971e316SmrgSome problems lead to the
675*5971e316Smrgintroduction of many integer divisions.  Within a given context,
676*5971e316Smrgsome of these integer divisions may be equal to each other, even
677*5971e316Smrgif the expressions are not identical, or they may be equal to some
678*5971e316Smrgaffine combination of other variables.
679*5971e316SmrgTo detect such cases, we compute the affine hull of the context
680*5971e316Smrgeach time a new integer division is added.  The algorithm used
681*5971e316Smrgfor computing this affine hull is that of \textcite{Karr1976affine},
682*5971e316Smrgwhile the points used in this algorithm are obtained by performing
683*5971e316Smrginteger feasibility checks on that part of the context outside
684*5971e316Smrgthe current approximation of the affine hull.
685*5971e316SmrgThe list of witnesses is used to construct an initial approximation
686*5971e316Smrgof the hull, while any extra points found during the construction
687*5971e316Smrgof the hull is added to this list.
688*5971e316SmrgAny equality found in this way that expresses an integer division
689*5971e316Smrgas an \emph{integer} affine combination of other variables is
690*5971e316Smrgpropagated to the main tableau, where it is used to eliminate that
691*5971e316Smrginteger division.
692*5971e316Smrg
693*5971e316Smrg\subsection{Experiments}\label{s:pip:experiments}
694*5971e316Smrg
695*5971e316Smrg\autoref{t:comparison} compares the execution times of {\tt isl}
696*5971e316Smrg(with both types of context tableau)
697*5971e316Smrgon some more difficult instances to those of other tools,
698*5971e316Smrgrun on an Intel Xeon W3520 @ 2.66GHz.
699*5971e316SmrgThese instances are available in the \lstinline{testsets/pip} directory
700*5971e316Smrgof the {\tt isl} distribution.
701*5971e316SmrgEasier problems such as the
702*5971e316Smrgtest cases distributed with {\tt Pip\-Lib} can be solved so quickly
703*5971e316Smrgthat we would only be measuring overhead such as input/output and conversions
704*5971e316Smrgand not the running time of the actual algorithm.
705*5971e316SmrgWe compare the following versions:
706*5971e316Smrg{\tt piplib-1.4.0-5-g0132fd9},
707*5971e316Smrg{\tt barvinok-0.32.1-73-gc5d7751},
708*5971e316Smrg{\tt isl-0.05.1-82-g3a37260}
709*5971e316Smrgand {\tt PPL} version 0.11.2.
710*5971e316Smrg
711*5971e316SmrgThe first test case is the following dependence analysis problem
712*5971e316Smrgoriginating from the Phideo project \parencite{Verhaegh1995PhD}
713*5971e316Smrgthat was communicated to us by Bart Kienhuis:
714*5971e316Smrg\begin{lstlisting}[flexiblecolumns=true,breaklines=true]{}
715*5971e316Smrglexmax { [j1,j2] -> [i1,i2,i3,i4,i5,i6,i7,i8,i9,i10] : 1 <= i1,j1 <= 8 and 1 <= i2,i3,i4,i5,i6,i7,i8,i9,i10 <= 2 and 1 <= j2 <= 128 and i1-1 = j1-1 and i2-1+2*i3-2+4*i4-4+8*i5-8+16*i6-16+32*i7-32+64*i8-64+128*i9-128+256*i10-256=3*j2-3+66 };
716*5971e316Smrg\end{lstlisting}
717*5971e316SmrgThis problem was the main inspiration
718*5971e316Smrgfor some of the optimizations in \autoref{s:GBR}.
719*5971e316SmrgThe second group of test cases are projections used during counting.
720*5971e316SmrgThe first nine of these come from \textcite{Seghir2006minimizing}.
721*5971e316SmrgThe remaining two come from \textcite{Verdoolaege2005experiences} and
722*5971e316Smrgwere used to drive the first, Gomory cuts based, implementation
723*5971e316Smrgin {\tt isl}.
724*5971e316SmrgThe third and final group of test cases are borrowed from
725*5971e316Smrg\textcite{Bygde2010licentiate} and inspired the offline symmetry detection
726*5971e316Smrgof \autoref{s:offline}.  Without symmetry detection, the running times
727*5971e316Smrgare 11s and 5.9s.
728*5971e316SmrgAll running times of {\tt barvinok} and {\tt isl} include a conversion
729*5971e316Smrgto disjunctive normal form.  Without this conversion, the final two
730*5971e316Smrgcases can be solved in 0.07s and 0.21s.
731*5971e316SmrgThe {\tt PipLib} implementation has some fixed limits and will
732*5971e316Smrgsometimes report the problem to be too complex (TC), while on some other
733*5971e316Smrgproblems it will run out of memory (OOM).
734*5971e316SmrgThe {\tt barvinok} implementation does not support problems
735*5971e316Smrgwith a non-trivial lineality space (line) nor maximization problems (max).
736*5971e316SmrgThe Gomory cuts based {\tt isl} implementation was terminated after 1000
737*5971e316Smrgminutes on the first problem.  The gbr version introduces some
738*5971e316Smrgoverhead on some of the easier problems, but is overall the clear winner.
739*5971e316Smrg
740*5971e316Smrg\begin{table}
741*5971e316Smrg\begin{center}
742*5971e316Smrg\begin{tabular}{lrrrrr}
743*5971e316Smrg    & {\tt PipLib} & {\tt barvinok} & {\tt isl} cut & {\tt isl} gbr & {\tt PPL} \\
744*5971e316Smrg\hline
745*5971e316Smrg\hline
746*5971e316Smrg% bart.pip
747*5971e316SmrgPhideo & TC    & 793m   & $>$999m &   2.7s  & 372m \\
748*5971e316Smrg\hline
749*5971e316Smrge1 & 0.33s & 3.5s & 0.08s & 0.11s & 0.18s \\
750*5971e316Smrge3 & 0.14s & 0.13s & 0.10s & 0.10s & 0.17s \\
751*5971e316Smrge4 & 0.24s & 9.1s & 0.09s & 0.11s & 0.70s \\
752*5971e316Smrge5 & 0.12s & 6.0s & 0.06s & 0.14s & 0.17s \\
753*5971e316Smrge6 & 0.10s & 6.8s & 0.17s & 0.08s & 0.21s \\
754*5971e316Smrge7 & 0.03s & 0.27s & 0.04s & 0.04s & 0.03s \\
755*5971e316Smrge8 & 0.03s & 0.18s & 0.03s & 0.04s & 0.01s \\
756*5971e316Smrge9 & OOM & 70m & 2.6s & 0.94s & 22s \\
757*5971e316Smrgvd & 0.04s & 0.10s & 0.03s & 0.03s & 0.03s \\
758*5971e316Smrgbouleti & 0.25s & line & 0.06s & 0.06s & 0.15s \\
759*5971e316Smrgdifficult & OOM & 1.3s & 1.7s & 0.33s & 1.4s \\
760*5971e316Smrg\hline
761*5971e316Smrgcnt/sum & TC & max & 2.2s & 2.2s & OOM \\
762*5971e316Smrgjcomplex & TC & max & 3.7s & 3.9s & OOM \\
763*5971e316Smrg\end{tabular}
764*5971e316Smrg\caption{Comparison of Execution Times}
765*5971e316Smrg\label{t:comparison}
766*5971e316Smrg\end{center}
767*5971e316Smrg\end{table}
768*5971e316Smrg
769*5971e316Smrg\subsection{Online Symmetry Detection}\label{s:online}
770*5971e316Smrg
771*5971e316SmrgManual experiments on small instances of the problems of
772*5971e316Smrg\textcite{Bygde2010licentiate} and an analysis of the results
773*5971e316Smrgby the approximate MPA method developed by \textcite{Bygde2010licentiate}
774*5971e316Smrghave revealed that these problems contain many more symmetries
775*5971e316Smrgthan can be detected using the offline method of \autoref{s:offline}.
776*5971e316SmrgIn this section, we present an online detection mechanism that has
777*5971e316Smrgnot been implemented yet, but that has shown promising results
778*5971e316Smrgin manual applications.
779*5971e316Smrg
780*5971e316SmrgLet us first consider what happens when we do not perform offline
781*5971e316Smrgsymmetry detection.  At some point, one of the
782*5971e316Smrg$b_i(\vec p) + \sp {\vec a} {\vec x} \ge 0$ constraints,
783*5971e316Smrgsay the $j$th constraint, appears as a column
784*5971e316Smrgvariable, say $c_1$, while the other constraints are represented
785*5971e316Smrgas rows of the form $b_i(\vec p) - b_j(\vec p) + c$.
786*5971e316SmrgThe context is then split according to the relative order of
787*5971e316Smrg$b_j(\vec p)$ and one of the remaining $b_i(\vec p)$.
788*5971e316SmrgThe offline method avoids this split by replacing all $b_i(\vec p)$
789*5971e316Smrgby a single newly introduced parameter that represents the minimum
790*5971e316Smrgof these $b_i(\vec p)$.
791*5971e316SmrgIn the online method the split is similarly avoided by the introduction
792*5971e316Smrgof a new parameter.  In particular, a new parameter is introduced
793*5971e316Smrgthat represents
794*5971e316Smrg$\left| b_j(\vec p) - b_i(\vec p) \right|_+ =
795*5971e316Smrg\max(b_j(\vec p) - b_i(\vec p), 0)$.
796*5971e316Smrg
797*5971e316SmrgIn general, let $r = b(\vec p) + \sp {\vec a} {\vec c}$ be a row
798*5971e316Smrgof the tableau such that the sign of $b(\vec p)$ is indeterminate
799*5971e316Smrgand such that exactly one of the elements of $\vec a$ is a $1$,
800*5971e316Smrgwhile all remaining elements are non-positive.
801*5971e316SmrgThat is, $r = b(\vec p) + c_j - f$ with $f = -\sum_{i\ne j} a_i c_i \ge 0$.
802*5971e316SmrgWe introduce a new parameter $t$ with
803*5971e316Smrgcontext constraints $t \ge -b(\vec p)$ and $t \ge 0$ and replace
804*5971e316Smrgthe column variable $c_j$ by $c' + t$.  The row $r$ is now equal
805*5971e316Smrgto $b(\vec p) + t + c' - f$.  The constant term of this row is always
806*5971e316Smrgnon-negative because any negative value of $b(\vec p)$ is compensated
807*5971e316Smrgby $t \ge -b(\vec p)$ while and non-negative value remains non-negative
808*5971e316Smrgbecause $t \ge 0$.
809*5971e316Smrg
810*5971e316SmrgWe need to show that this transformation does not eliminate any valid
811*5971e316Smrgsolutions and that it does not introduce any spurious solutions.
812*5971e316SmrgGiven a valid solution for the original problem, we need to find
813*5971e316Smrga non-negative value of $c'$ satisfying the constraints.
814*5971e316SmrgIf $b(\vec p) \ge 0$, we can take $t = 0$ so that
815*5971e316Smrg$c' = c_j - t = c_j \ge 0$.
816*5971e316SmrgIf $b(\vec p) < 0$, we can take $t = -b(\vec p)$.
817*5971e316SmrgSince $r = b(\vec p) + c_j - f \ge 0$ and $f \ge 0$, we have
818*5971e316Smrg$c' = c_j + b(\vec p) \ge 0$.
819*5971e316SmrgNote that these choices amount to plugging in
820*5971e316Smrg$t = \left|-b(\vec p)\right|_+ = \max(-b(\vec p), 0)$.
821*5971e316SmrgConversely, given a solution to the new problem, we need to find
822*5971e316Smrga non-negative value of $c_j$, but this is easy since $c_j = c' + t$
823*5971e316Smrgand both of these are non-negative.
824*5971e316Smrg
825*5971e316SmrgPlugging in $t = \max(-b(\vec p), 0)$ can be performed as in
826*5971e316Smrg\autoref{s:post}, but, as in the case of offline symmetry detection,
827*5971e316Smrgit may be better to provide a direct representation for such
828*5971e316Smrgexpressions in the internal representation of sets and relations
829*5971e316Smrgor at least in a quast-like output format.
830*5971e316Smrg
831*5971e316Smrg\section{Coalescing}\label{s:coalescing}
832*5971e316Smrg
833*5971e316SmrgSee \textcite{Verdoolaege2015impact} for details on integer set coalescing.
834*5971e316Smrg
835*5971e316Smrg\section{Transitive Closure}
836*5971e316Smrg
837*5971e316Smrg\subsection{Introduction}
838*5971e316Smrg
839*5971e316Smrg\begin{definition}[Power of a Relation]
840*5971e316SmrgLet $R \in \Z^n \to 2^{\Z^{d+d}}$ be a relation and
841*5971e316Smrg$k \in \Z_{\ge 1}$
842*5971e316Smrga positive number, then power $k$ of relation $R$ is defined as
843*5971e316Smrg\begin{equation}
844*5971e316Smrg\label{eq:transitive:power}
845*5971e316SmrgR^k \coloneqq
846*5971e316Smrg\begin{cases}
847*5971e316SmrgR & \text{if $k = 1$}
848*5971e316Smrg\\
849*5971e316SmrgR \circ R^{k-1} & \text{if $k \ge 2$}
850*5971e316Smrg.
851*5971e316Smrg\end{cases}
852*5971e316Smrg\end{equation}
853*5971e316Smrg\end{definition}
854*5971e316Smrg
855*5971e316Smrg\begin{definition}[Transitive Closure of a Relation]
856*5971e316SmrgLet $R \in \Z^n \to 2^{\Z^{d+d}}$ be a relation,
857*5971e316Smrgthen the transitive closure $R^+$ of $R$ is the union
858*5971e316Smrgof all positive powers of $R$,
859*5971e316Smrg$$
860*5971e316SmrgR^+ \coloneqq \bigcup_{k \ge 1} R^k
861*5971e316Smrg.
862*5971e316Smrg$$
863*5971e316Smrg\end{definition}
864*5971e316SmrgAlternatively, the transitive closure may be defined
865*5971e316Smrginductively as
866*5971e316Smrg\begin{equation}
867*5971e316Smrg\label{eq:transitive:inductive}
868*5971e316SmrgR^+ \coloneqq R \cup \left(R \circ R^+\right)
869*5971e316Smrg.
870*5971e316Smrg\end{equation}
871*5971e316Smrg
872*5971e316SmrgSince the transitive closure of a polyhedral relation
873*5971e316Smrgmay no longer be a polyhedral relation \parencite{Kelly1996closure},
874*5971e316Smrgwe can, in the general case, only compute an approximation
875*5971e316Smrgof the transitive closure.
876*5971e316SmrgWhereas \textcite{Kelly1996closure} compute underapproximations,
877*5971e316Smrgwe, like \textcite{Beletska2009}, compute overapproximations.
878*5971e316SmrgThat is, given a relation $R$, we will compute a relation $T$
879*5971e316Smrgsuch that $R^+ \subseteq T$.  Of course, we want this approximation
880*5971e316Smrgto be as close as possible to the actual transitive closure
881*5971e316Smrg$R^+$ and we want to detect the cases where the approximation is
882*5971e316Smrgexact, i.e., where $T = R^+$.
883*5971e316Smrg
884*5971e316SmrgFor computing an approximation of the transitive closure of $R$,
885*5971e316Smrgwe follow the same general strategy as \textcite{Beletska2009}
886*5971e316Smrgand first compute an approximation of $R^k$ for $k \ge 1$ and then project
887*5971e316Smrgout the parameter $k$ from the resulting relation.
888*5971e316Smrg
889*5971e316Smrg\begin{example}
890*5971e316SmrgAs a trivial example, consider the relation
891*5971e316Smrg$R = \{\, x \to x + 1 \,\}$.  The $k$th power of this map
892*5971e316Smrgfor arbitrary $k$ is
893*5971e316Smrg$$
894*5971e316SmrgR^k = k \mapsto \{\, x \to x + k \mid k \ge 1 \,\}
895*5971e316Smrg.
896*5971e316Smrg$$
897*5971e316SmrgThe transitive closure is then
898*5971e316Smrg$$
899*5971e316Smrg\begin{aligned}
900*5971e316SmrgR^+ & = \{\, x \to y \mid \exists k \in \Z_{\ge 1} : y = x + k \,\}
901*5971e316Smrg\\
902*5971e316Smrg& = \{\, x \to y \mid y \ge x + 1 \,\}
903*5971e316Smrg.
904*5971e316Smrg\end{aligned}
905*5971e316Smrg$$
906*5971e316Smrg\end{example}
907*5971e316Smrg
908*5971e316Smrg\subsection{Computing an Approximation of $R^k$}
909*5971e316Smrg\label{s:power}
910*5971e316Smrg
911*5971e316SmrgThere are some special cases where the computation of $R^k$ is very easy.
912*5971e316SmrgOne such case is that where $R$ does not compose with itself,
913*5971e316Smrgi.e., $R \circ R = \emptyset$ or $\domain R \cap \range R = \emptyset$.
914*5971e316SmrgIn this case, $R^k$ is only non-empty for $k=1$ where it is equal
915*5971e316Smrgto $R$ itself.
916*5971e316Smrg
917*5971e316SmrgIn general, it is impossible to construct a closed form
918*5971e316Smrgof $R^k$ as a polyhedral relation.
919*5971e316SmrgWe will therefore need to make some approximations.
920*5971e316SmrgAs a first approximations, we will consider each of the basic
921*5971e316Smrgrelations in $R$ as simply adding one or more offsets to a domain element
922*5971e316Smrgto arrive at an image element and ignore the fact that some of these
923*5971e316Smrgoffsets may only be applied to some of the domain elements.
924*5971e316SmrgThat is, we will only consider the difference set $\Delta\,R$ of the relation.
925*5971e316SmrgIn particular, we will first construct a collection $P$ of paths
926*5971e316Smrgthat move through
927*5971e316Smrga total of $k$ offsets and then intersect domain and range of this
928*5971e316Smrgcollection with those of $R$.
929*5971e316SmrgThat is,
930*5971e316Smrg\begin{equation}
931*5971e316Smrg\label{eq:transitive:approx}
932*5971e316SmrgK = P \cap \left(\domain R \to \range R\right)
933*5971e316Smrg,
934*5971e316Smrg\end{equation}
935*5971e316Smrgwith
936*5971e316Smrg\begin{equation}
937*5971e316Smrg\label{eq:transitive:path}
938*5971e316SmrgP = \vec s \mapsto \{\, \vec x \to \vec y \mid
939*5971e316Smrg\exists k_i \in \Z_{\ge 0}, \vec\delta_i \in k_i \, \Delta_i(\vec s) :
940*5971e316Smrg\vec y = \vec x + \sum_i \vec\delta_i
941*5971e316Smrg\wedge
942*5971e316Smrg\sum_i k_i = k > 0
943*5971e316Smrg\,\}
944*5971e316Smrg\end{equation}
945*5971e316Smrgand with $\Delta_i$ the basic sets that compose
946*5971e316Smrgthe difference set $\Delta\,R$.
947*5971e316SmrgNote that the number of basic sets $\Delta_i$ need not be
948*5971e316Smrgthe same as the number of basic relations in $R$.
949*5971e316SmrgAlso note that since addition is commutative, it does not
950*5971e316Smrgmatter in which order we add the offsets and so we are allowed
951*5971e316Smrgto group them as we did in \eqref{eq:transitive:path}.
952*5971e316Smrg
953*5971e316SmrgIf all the $\Delta_i$s are singleton sets
954*5971e316Smrg$\Delta_i = \{\, \vec \delta_i \,\}$ with $\vec \delta_i \in \Z^d$,
955*5971e316Smrgthen \eqref{eq:transitive:path} simplifies to
956*5971e316Smrg\begin{equation}
957*5971e316Smrg\label{eq:transitive:singleton}
958*5971e316SmrgP = \{\, \vec x \to \vec y \mid
959*5971e316Smrg\exists k_i \in \Z_{\ge 0} :
960*5971e316Smrg\vec y = \vec x + \sum_i k_i \, \vec \delta_i
961*5971e316Smrg\wedge
962*5971e316Smrg\sum_i k_i = k > 0
963*5971e316Smrg\,\}
964*5971e316Smrg\end{equation}
965*5971e316Smrgand then the approximation computed in \eqref{eq:transitive:approx}
966*5971e316Smrgis essentially the same as that of \textcite{Beletska2009}.
967*5971e316SmrgIf some of the $\Delta_i$s are not singleton sets or if
968*5971e316Smrgsome of $\vec \delta_i$s are parametric, then we need
969*5971e316Smrgto resort to further approximations.
970*5971e316Smrg
971*5971e316SmrgTo ease both the exposition and the implementation, we will for
972*5971e316Smrgthe remainder of this section work with extended offsets
973*5971e316Smrg$\Delta_i' = \Delta_i \times \{\, 1 \,\}$.
974*5971e316SmrgThat is, each offset is extended with an extra coordinate that is
975*5971e316Smrgset equal to one.  The paths constructed by summing such extended
976*5971e316Smrgoffsets have the length encoded as the difference of their
977*5971e316Smrgfinal coordinates.  The path $P'$ can then be decomposed into
978*5971e316Smrgpaths $P_i'$, one for each $\Delta_i$,
979*5971e316Smrg\begin{equation}
980*5971e316Smrg\label{eq:transitive:decompose}
981*5971e316SmrgP' = \left(
982*5971e316Smrg(P_m' \cup \identity) \circ \cdots \circ
983*5971e316Smrg(P_2' \cup \identity) \circ
984*5971e316Smrg(P_1' \cup \identity)
985*5971e316Smrg\right) \cap
986*5971e316Smrg\{\,
987*5971e316Smrg\vec x' \to \vec y' \mid y_{d+1} - x_{d+1} = k > 0
988*5971e316Smrg\,\}
989*5971e316Smrg,
990*5971e316Smrg\end{equation}
991*5971e316Smrgwith
992*5971e316Smrg$$
993*5971e316SmrgP_i' = \vec s \mapsto \{\, \vec x' \to \vec y' \mid
994*5971e316Smrg\exists k \in \Z_{\ge 1}, \vec \delta \in k \, \Delta_i'(\vec s) :
995*5971e316Smrg\vec y' = \vec x' + \vec \delta
996*5971e316Smrg\,\}
997*5971e316Smrg.
998*5971e316Smrg$$
999*5971e316SmrgNote that each $P_i'$ contains paths of length at least one.
1000*5971e316SmrgWe therefore need to take the union with the identity relation
1001*5971e316Smrgwhen composing the $P_i'$s to allow for paths that do not contain
1002*5971e316Smrgany offsets from one or more $\Delta_i'$.
1003*5971e316SmrgThe path that consists of only identity relations is removed
1004*5971e316Smrgby imposing the constraint $y_{d+1} - x_{d+1} > 0$.
1005*5971e316SmrgTaking the union with the identity relation means that
1006*5971e316Smrgthat the relations we compose in \eqref{eq:transitive:decompose}
1007*5971e316Smrgeach consist of two basic relations.  If there are $m$
1008*5971e316Smrgdisjuncts in the input relation, then a direct application
1009*5971e316Smrgof the composition operation may therefore result in a relation
1010*5971e316Smrgwith $2^m$ disjuncts, which is prohibitively expensive.
1011*5971e316SmrgIt is therefore crucial to apply coalescing (\autoref{s:coalescing})
1012*5971e316Smrgafter each composition.
1013*5971e316Smrg
1014*5971e316SmrgLet us now consider how to compute an overapproximation of $P_i'$.
1015*5971e316SmrgThose that correspond to singleton $\Delta_i$s are grouped together
1016*5971e316Smrgand handled as in \eqref{eq:transitive:singleton}.
1017*5971e316SmrgNote that this is just an optimization.  The procedure described
1018*5971e316Smrgbelow would produce results that are at least as accurate.
1019*5971e316SmrgFor simplicity, we first assume that no constraint in $\Delta_i'$
1020*5971e316Smrginvolves any existentially quantified variables.
1021*5971e316SmrgWe will return to existentially quantified variables at the end
1022*5971e316Smrgof this section.
1023*5971e316SmrgWithout existentially quantified variables, we can classify
1024*5971e316Smrgthe constraints of $\Delta_i'$ as follows
1025*5971e316Smrg\begin{enumerate}
1026*5971e316Smrg\item non-parametric constraints
1027*5971e316Smrg\begin{equation}
1028*5971e316Smrg\label{eq:transitive:non-parametric}
1029*5971e316SmrgA_1 \vec x + \vec c_1 \geq \vec 0
1030*5971e316Smrg\end{equation}
1031*5971e316Smrg\item purely parametric constraints
1032*5971e316Smrg\begin{equation}
1033*5971e316Smrg\label{eq:transitive:parametric}
1034*5971e316SmrgB_2 \vec s + \vec c_2 \geq \vec 0
1035*5971e316Smrg\end{equation}
1036*5971e316Smrg\item negative mixed constraints
1037*5971e316Smrg\begin{equation}
1038*5971e316Smrg\label{eq:transitive:mixed}
1039*5971e316SmrgA_3 \vec x + B_3 \vec s + \vec c_3 \geq \vec 0
1040*5971e316Smrg\end{equation}
1041*5971e316Smrgsuch that for each row $j$ and for all $\vec s$,
1042*5971e316Smrg$$
1043*5971e316Smrg\Delta_i'(\vec s) \cap
1044*5971e316Smrg\{\, \vec \delta' \mid B_{3,j} \vec s + c_{3,j} > 0 \,\}
1045*5971e316Smrg= \emptyset
1046*5971e316Smrg$$
1047*5971e316Smrg\item positive mixed constraints
1048*5971e316Smrg$$
1049*5971e316SmrgA_4 \vec x + B_4 \vec s + \vec c_4 \geq \vec 0
1050*5971e316Smrg$$
1051*5971e316Smrgsuch that for each row $j$, there is at least one $\vec s$ such that
1052*5971e316Smrg$$
1053*5971e316Smrg\Delta_i'(\vec s) \cap
1054*5971e316Smrg\{\, \vec \delta' \mid B_{4,j} \vec s + c_{4,j} > 0 \,\}
1055*5971e316Smrg\ne \emptyset
1056*5971e316Smrg$$
1057*5971e316Smrg\end{enumerate}
1058*5971e316SmrgWe will use the following approximation $Q_i$ for $P_i'$:
1059*5971e316Smrg\begin{equation}
1060*5971e316Smrg\label{eq:transitive:Q}
1061*5971e316Smrg\begin{aligned}
1062*5971e316SmrgQ_i = \vec s \mapsto
1063*5971e316Smrg\{\,
1064*5971e316Smrg\vec x' \to \vec y'
1065*5971e316Smrg\mid {} & \exists k \in \Z_{\ge 1}, \vec f \in \Z^d :
1066*5971e316Smrg\vec y' = \vec x' + (\vec f, k)
1067*5971e316Smrg\wedge {}
1068*5971e316Smrg\\
1069*5971e316Smrg&
1070*5971e316SmrgA_1 \vec f + k \vec c_1 \geq \vec 0
1071*5971e316Smrg\wedge
1072*5971e316SmrgB_2 \vec s + \vec c_2 \geq \vec 0
1073*5971e316Smrg\wedge
1074*5971e316SmrgA_3 \vec f + B_3 \vec s + \vec c_3 \geq \vec 0
1075*5971e316Smrg\,\}
1076*5971e316Smrg.
1077*5971e316Smrg\end{aligned}
1078*5971e316Smrg\end{equation}
1079*5971e316SmrgTo prove that $Q_i$ is indeed an overapproximation of $P_i'$,
1080*5971e316Smrgwe need to show that for every $\vec s \in \Z^n$, for every
1081*5971e316Smrg$k \in \Z_{\ge 1}$ and for every $\vec f \in k \, \Delta_i(\vec s)$
1082*5971e316Smrgwe have that
1083*5971e316Smrg$(\vec f, k)$ satisfies the constraints in \eqref{eq:transitive:Q}.
1084*5971e316SmrgIf $\Delta_i(\vec s)$ is non-empty, then $\vec s$ must satisfy
1085*5971e316Smrgthe constraints in \eqref{eq:transitive:parametric}.
1086*5971e316SmrgEach element $(\vec f, k) \in k \, \Delta_i'(\vec s)$ is a sum
1087*5971e316Smrgof $k$ elements $(\vec f_j, 1)$ in $\Delta_i'(\vec s)$.
1088*5971e316SmrgEach of these elements satisfies the constraints in
1089*5971e316Smrg\eqref{eq:transitive:non-parametric}, i.e.,
1090*5971e316Smrg$$
1091*5971e316Smrg\left[
1092*5971e316Smrg\begin{matrix}
1093*5971e316SmrgA_1 & \vec c_1
1094*5971e316Smrg\end{matrix}
1095*5971e316Smrg\right]
1096*5971e316Smrg\left[
1097*5971e316Smrg\begin{matrix}
1098*5971e316Smrg\vec f_j \\ 1
1099*5971e316Smrg\end{matrix}
1100*5971e316Smrg\right]
1101*5971e316Smrg\ge \vec 0
1102*5971e316Smrg.
1103*5971e316Smrg$$
1104*5971e316SmrgThe sum of these elements therefore satisfies the same set of inequalities,
1105*5971e316Smrgi.e., $A_1 \vec f + k \vec c_1 \geq \vec 0$.
1106*5971e316SmrgFinally, the constraints in \eqref{eq:transitive:mixed} are such
1107*5971e316Smrgthat for any $\vec s$ in the parameter domain of $\Delta$,
1108*5971e316Smrgwe have $-\vec r(\vec s) \coloneqq B_3 \vec s + \vec c_3 \le \vec 0$,
1109*5971e316Smrgi.e., $A_3 \vec f_j \ge \vec r(\vec s) \ge \vec 0$
1110*5971e316Smrgand therefore also $A_3 \vec f \ge \vec r(\vec s)$.
1111*5971e316SmrgNote that if there are no mixed constraints and if the
1112*5971e316Smrgrational relaxation of $\Delta_i(\vec s)$, i.e.,
1113*5971e316Smrg$\{\, \vec x \in \Q^d \mid A_1 \vec x + \vec c_1 \ge \vec 0\,\}$,
1114*5971e316Smrghas integer vertices, then the approximation is exact, i.e.,
1115*5971e316Smrg$Q_i = P_i'$.  In this case, the vertices of $\Delta'_i(\vec s)$
1116*5971e316Smrggenerate the rational cone
1117*5971e316Smrg$\{\, \vec x' \in \Q^{d+1} \mid \left[
1118*5971e316Smrg\begin{matrix}
1119*5971e316SmrgA_1 & \vec c_1
1120*5971e316Smrg\end{matrix}
1121*5971e316Smrg\right] \vec x' \,\}$ and therefore $\Delta'_i(\vec s)$ is
1122*5971e316Smrga Hilbert basis of this cone \parencite[Theorem~16.4]{Schrijver1986}.
1123*5971e316Smrg
1124*5971e316SmrgNote however that, as pointed out by \textcite{DeSmet2010personal},
1125*5971e316Smrgif there \emph{are} any mixed constraints, then the above procedure may
1126*5971e316Smrgnot compute the most accurate affine approximation of
1127*5971e316Smrg$k \, \Delta_i(\vec s)$ with $k \ge 1$.
1128*5971e316SmrgIn particular, we only consider the negative mixed constraints that
1129*5971e316Smrghappen to appear in the description of $\Delta_i(\vec s)$, while we
1130*5971e316Smrgshould instead consider \emph{all} valid such constraints.
1131*5971e316SmrgIt is also sufficient to consider those constraints because any
1132*5971e316Smrgconstraint that is valid for $k \, \Delta_i(\vec s)$ is also
1133*5971e316Smrgvalid for $1 \, \Delta_i(\vec s) = \Delta_i(\vec s)$.
1134*5971e316SmrgTake therefore any constraint
1135*5971e316Smrg$\spv a x + \spv b s + c \ge 0$ valid for $\Delta_i(\vec s)$.
1136*5971e316SmrgThis constraint is also valid for $k \, \Delta_i(\vec s)$ iff
1137*5971e316Smrg$k \, \spv a x + \spv b s + c \ge 0$.
1138*5971e316SmrgIf $\spv b s + c$ can attain any positive value, then $\spv a x$
1139*5971e316Smrgmay be negative for some elements of $\Delta_i(\vec s)$.
1140*5971e316SmrgWe then have $k \, \spv a x < \spv a x$ for $k > 1$ and so the constraint
1141*5971e316Smrgis not valid for $k \, \Delta_i(\vec s)$.
1142*5971e316SmrgWe therefore need to impose $\spv b s + c \le 0$ for all values
1143*5971e316Smrgof $\vec s$ such that $\Delta_i(\vec s)$ is non-empty, i.e.,
1144*5971e316Smrg$\vec b$ and $c$ need to be such that $- \spv b s - c \ge 0$ is a valid
1145*5971e316Smrgconstraint of $\Delta_i(\vec s)$.  That is, $(\vec b, c)$ are the opposites
1146*5971e316Smrgof the coefficients of a valid constraint of $\Delta_i(\vec s)$.
1147*5971e316SmrgThe approximation of $k \, \Delta_i(\vec s)$ can therefore be obtained
1148*5971e316Smrgusing three applications of Farkas' lemma.  The first obtains the coefficients
1149*5971e316Smrgof constraints valid for $\Delta_i(\vec s)$.  The second obtains
1150*5971e316Smrgthe coefficients of constraints valid for the projection of $\Delta_i(\vec s)$
1151*5971e316Smrgonto the parameters.  The opposite of the second set is then computed
1152*5971e316Smrgand intersected with the first set.  The result is the set of coefficients
1153*5971e316Smrgof constraints valid for $k \, \Delta_i(\vec s)$.  A final application
1154*5971e316Smrgof Farkas' lemma is needed to obtain the approximation of
1155*5971e316Smrg$k \, \Delta_i(\vec s)$ itself.
1156*5971e316Smrg
1157*5971e316Smrg\begin{example}
1158*5971e316SmrgConsider the relation
1159*5971e316Smrg$$
1160*5971e316Smrgn \to \{\, (x, y) \to (1 + x, 1 - n + y) \mid n \ge 2 \,\}
1161*5971e316Smrg.
1162*5971e316Smrg$$
1163*5971e316SmrgThe difference set of this relation is
1164*5971e316Smrg$$
1165*5971e316Smrg\Delta = n \to \{\, (1, 1 - n) \mid n \ge 2 \,\}
1166*5971e316Smrg.
1167*5971e316Smrg$$
1168*5971e316SmrgUsing our approach, we would only consider the mixed constraint
1169*5971e316Smrg$y - 1 + n \ge 0$, leading to the following approximation of the
1170*5971e316Smrgtransitive closure:
1171*5971e316Smrg$$
1172*5971e316Smrgn \to \{\, (x, y) \to (o_0, o_1) \mid n \ge 2 \wedge o_1 \le 1 - n + y \wedge o_0 \ge 1 + x \,\}
1173*5971e316Smrg.
1174*5971e316Smrg$$
1175*5971e316SmrgIf, instead, we apply Farkas's lemma to $\Delta$, i.e.,
1176*5971e316Smrg\begin{verbatim}
1177*5971e316SmrgD := [n] -> { [1, 1 - n] : n >= 2 };
1178*5971e316SmrgCD := coefficients D;
1179*5971e316SmrgCD;
1180*5971e316Smrg\end{verbatim}
1181*5971e316Smrgwe obtain
1182*5971e316Smrg\begin{verbatim}
1183*5971e316Smrg{ rat: coefficients[[c_cst, c_n] -> [i2, i3]] : i3 <= c_n and
1184*5971e316Smrg  i3 <= c_cst + 2c_n + i2 }
1185*5971e316Smrg\end{verbatim}
1186*5971e316SmrgThe pure-parametric constraints valid for $\Delta$,
1187*5971e316Smrg\begin{verbatim}
1188*5971e316SmrgP := { [a,b] -> [] }(D);
1189*5971e316SmrgCP := coefficients P;
1190*5971e316SmrgCP;
1191*5971e316Smrg\end{verbatim}
1192*5971e316Smrgare
1193*5971e316Smrg\begin{verbatim}
1194*5971e316Smrg{ rat: coefficients[[c_cst, c_n] -> []] : c_n >= 0 and 2c_n >= -c_cst }
1195*5971e316Smrg\end{verbatim}
1196*5971e316SmrgNegating these coefficients and intersecting with \verb+CD+,
1197*5971e316Smrg\begin{verbatim}
1198*5971e316SmrgNCP := { rat: coefficients[[a,b] -> []]
1199*5971e316Smrg              -> coefficients[[-a,-b] -> []] }(CP);
1200*5971e316SmrgCK := wrap((unwrap CD) * (dom (unwrap NCP)));
1201*5971e316SmrgCK;
1202*5971e316Smrg\end{verbatim}
1203*5971e316Smrgwe obtain
1204*5971e316Smrg\begin{verbatim}
1205*5971e316Smrg{ rat: [[c_cst, c_n] -> [i2, i3]] : i3 <= c_n and
1206*5971e316Smrg  i3 <= c_cst + 2c_n + i2 and c_n <= 0 and 2c_n <= -c_cst }
1207*5971e316Smrg\end{verbatim}
1208*5971e316SmrgThe approximation for $k\,\Delta$,
1209*5971e316Smrg\begin{verbatim}
1210*5971e316SmrgK := solutions CK;
1211*5971e316SmrgK;
1212*5971e316Smrg\end{verbatim}
1213*5971e316Smrgis then
1214*5971e316Smrg\begin{verbatim}
1215*5971e316Smrg[n] -> { rat: [i0, i1] : i1 <= -i0 and i0 >= 1 and i1 <= 2 - n - i0 }
1216*5971e316Smrg\end{verbatim}
1217*5971e316SmrgFinally, the computed approximation for $R^+$,
1218*5971e316Smrg\begin{verbatim}
1219*5971e316SmrgT := unwrap({ [dx,dy] -> [[x,y] -> [x+dx,y+dy]] }(K));
1220*5971e316SmrgR := [n] -> { [x,y] -> [x+1,y+1-n] : n >= 2 };
1221*5971e316SmrgT := T * ((dom R) -> (ran R));
1222*5971e316SmrgT;
1223*5971e316Smrg\end{verbatim}
1224*5971e316Smrgis
1225*5971e316Smrg\begin{verbatim}
1226*5971e316Smrg[n] -> { [x, y] -> [o0, o1] : o1 <= x + y - o0 and
1227*5971e316Smrg         o0 >= 1 + x and o1 <= 2 - n + x + y - o0 and n >= 2 }
1228*5971e316Smrg\end{verbatim}
1229*5971e316Smrg\end{example}
1230*5971e316Smrg
1231*5971e316SmrgExistentially quantified variables can be handled by
1232*5971e316Smrgclassifying them into variables that are uniquely
1233*5971e316Smrgdetermined by the parameters, variables that are independent
1234*5971e316Smrgof the parameters and others.  The first set can be treated
1235*5971e316Smrgas parameters and the second as variables.  Constraints involving
1236*5971e316Smrgthe other existentially quantified variables are removed.
1237*5971e316Smrg
1238*5971e316Smrg\begin{example}
1239*5971e316SmrgConsider the relation
1240*5971e316Smrg$$
1241*5971e316SmrgR =
1242*5971e316Smrgn \to \{\, x \to y \mid \exists \, \alpha_0, \alpha_1: 7\alpha_0 = -2 + n \wedge 5\alpha_1 = -1 - x + y \wedge y \ge 6 + x \,\}
1243*5971e316Smrg.
1244*5971e316Smrg$$
1245*5971e316SmrgThe difference set of this relation is
1246*5971e316Smrg$$
1247*5971e316Smrg\Delta = \Delta \, R =
1248*5971e316Smrgn \to \{\, x \mid \exists \, \alpha_0, \alpha_1: 7\alpha_0 = -2 + n \wedge 5\alpha_1 = -1 + x \wedge x \ge 6 \,\}
1249*5971e316Smrg.
1250*5971e316Smrg$$
1251*5971e316SmrgThe existentially quantified variables can be defined in terms
1252*5971e316Smrgof the parameters and variables as
1253*5971e316Smrg$$
1254*5971e316Smrg\alpha_0 = \floor{\frac{-2 + n}7}
1255*5971e316Smrg\qquad
1256*5971e316Smrg\text{and}
1257*5971e316Smrg\qquad
1258*5971e316Smrg\alpha_1 = \floor{\frac{-1 + x}5}
1259*5971e316Smrg.
1260*5971e316Smrg$$
1261*5971e316Smrg$\alpha_0$ can therefore be treated as a parameter,
1262*5971e316Smrgwhile $\alpha_1$ can be treated as a variable.
1263*5971e316SmrgThis in turn means that $7\alpha_0 = -2 + n$ can be treated as
1264*5971e316Smrga purely parametric constraint, while the other two constraints are
1265*5971e316Smrgnon-parametric.
1266*5971e316SmrgThe corresponding $Q$~\eqref{eq:transitive:Q} is therefore
1267*5971e316Smrg$$
1268*5971e316Smrg\begin{aligned}
1269*5971e316Smrgn \to \{\, (x,z) \to (y,w) \mid
1270*5971e316Smrg\exists\, \alpha_0, \alpha_1, k, f : {} &
1271*5971e316Smrgk \ge 1 \wedge
1272*5971e316Smrgy = x + f \wedge
1273*5971e316Smrgw = z + k \wedge {} \\
1274*5971e316Smrg&
1275*5971e316Smrg7\alpha_0 = -2 + n \wedge
1276*5971e316Smrg5\alpha_1 = -k + x \wedge
1277*5971e316Smrgx \ge 6 k
1278*5971e316Smrg\,\}
1279*5971e316Smrg.
1280*5971e316Smrg\end{aligned}
1281*5971e316Smrg$$
1282*5971e316SmrgProjecting out the final coordinates encoding the length of the paths,
1283*5971e316Smrgresults in the exact transitive closure
1284*5971e316Smrg$$
1285*5971e316SmrgR^+ =
1286*5971e316Smrgn \to \{\, x \to y \mid \exists \, \alpha_0, \alpha_1: 7\alpha_1 = -2 + n \wedge 6\alpha_0 \ge -x + y \wedge 5\alpha_0 \le -1 - x + y \,\}
1287*5971e316Smrg.
1288*5971e316Smrg$$
1289*5971e316Smrg\end{example}
1290*5971e316Smrg
1291*5971e316SmrgThe fact that we ignore some impure constraints clearly leads
1292*5971e316Smrgto a loss of accuracy.  In some cases, some of this loss can be recovered
1293*5971e316Smrgby not considering the parameters in a special way.
1294*5971e316SmrgThat is, instead of considering the set
1295*5971e316Smrg$$
1296*5971e316Smrg\Delta = \diff R =
1297*5971e316Smrg\vec s \mapsto
1298*5971e316Smrg\{\, \vec \delta \in \Z^{d} \mid \exists \vec x \to \vec y \in R :
1299*5971e316Smrg\vec \delta = \vec y - \vec x
1300*5971e316Smrg\,\}
1301*5971e316Smrg$$
1302*5971e316Smrgwe consider the set
1303*5971e316Smrg$$
1304*5971e316Smrg\Delta' = \diff R' =
1305*5971e316Smrg\{\, \vec \delta \in \Z^{n+d} \mid \exists
1306*5971e316Smrg(\vec s, \vec x) \to (\vec s, \vec y) \in R' :
1307*5971e316Smrg\vec \delta = (\vec s - \vec s, \vec y - \vec x)
1308*5971e316Smrg\,\}
1309*5971e316Smrg.
1310*5971e316Smrg$$
1311*5971e316SmrgThe first $n$ coordinates of every element in $\Delta'$ are zero.
1312*5971e316SmrgProjecting out these zero coordinates from $\Delta'$ is equivalent
1313*5971e316Smrgto projecting out the parameters in $\Delta$.
1314*5971e316SmrgThe result is obviously a superset of $\Delta$, but all its constraints
1315*5971e316Smrgare of type \eqref{eq:transitive:non-parametric} and they can therefore
1316*5971e316Smrgall be used in the construction of $Q_i$.
1317*5971e316Smrg
1318*5971e316Smrg\begin{example}
1319*5971e316SmrgConsider the relation
1320*5971e316Smrg$$
1321*5971e316Smrg% [n] -> { [x, y] -> [1 + x, 1 - n + y] | n >= 2 }
1322*5971e316SmrgR = n \to \{\, (x, y) \to (1 + x, 1 - n + y) \mid n \ge 2 \,\}
1323*5971e316Smrg.
1324*5971e316Smrg$$
1325*5971e316SmrgWe have
1326*5971e316Smrg$$
1327*5971e316Smrg\diff R = n \to \{\, (1, 1 - n) \mid n \ge 2 \,\}
1328*5971e316Smrg$$
1329*5971e316Smrgand so, by treating the parameters in a special way, we obtain
1330*5971e316Smrgthe following approximation for $R^+$:
1331*5971e316Smrg$$
1332*5971e316Smrgn \to \{\, (x, y) \to (x', y') \mid n \ge 2 \wedge y' \le 1 - n + y \wedge x' \ge 1 + x \,\}
1333*5971e316Smrg.
1334*5971e316Smrg$$
1335*5971e316SmrgIf we consider instead
1336*5971e316Smrg$$
1337*5971e316SmrgR' = \{\, (n, x, y) \to (n, 1 + x, 1 - n + y) \mid n \ge 2 \,\}
1338*5971e316Smrg$$
1339*5971e316Smrgthen
1340*5971e316Smrg$$
1341*5971e316Smrg\diff R' = \{\, (0, 1, y) \mid y \le -1 \,\}
1342*5971e316Smrg$$
1343*5971e316Smrgand we obtain the approximation
1344*5971e316Smrg$$
1345*5971e316Smrgn \to \{\, (x, y) \to (x', y') \mid n \ge 2 \wedge x' \ge 1 + x \wedge y' \le x + y - x' \,\}
1346*5971e316Smrg.
1347*5971e316Smrg$$
1348*5971e316SmrgIf we consider both $\diff R$ and $\diff R'$, then we obtain
1349*5971e316Smrg$$
1350*5971e316Smrgn \to \{\, (x, y) \to (x', y') \mid n \ge 2 \wedge y' \le 1 - n + y \wedge x' \ge 1 + x \wedge y' \le x + y - x' \,\}
1351*5971e316Smrg.
1352*5971e316Smrg$$
1353*5971e316SmrgNote, however, that this is not the most accurate affine approximation that
1354*5971e316Smrgcan be obtained.  That would be
1355*5971e316Smrg$$
1356*5971e316Smrgn \to \{\, (x, y) \to (x', y') \mid y' \le 2 - n + x + y - x' \wedge n \ge 2 \wedge x' \ge 1 + x \,\}
1357*5971e316Smrg.
1358*5971e316Smrg$$
1359*5971e316Smrg\end{example}
1360*5971e316Smrg
1361*5971e316Smrg\subsection{Checking Exactness}
1362*5971e316Smrg
1363*5971e316SmrgThe approximation $T$ for the transitive closure $R^+$ can be obtained
1364*5971e316Smrgby projecting out the parameter $k$ from the approximation $K$
1365*5971e316Smrg\eqref{eq:transitive:approx} of the power $R^k$.
1366*5971e316SmrgSince $K$ is an overapproximation of $R^k$, $T$ will also be an
1367*5971e316Smrgoverapproximation of $R^+$.
1368*5971e316SmrgTo check whether the results are exact, we need to consider two
1369*5971e316Smrgcases depending on whether $R$ is {\em cyclic}, where $R$ is defined
1370*5971e316Smrgto be cyclic if $R^+$ maps any element to itself, i.e.,
1371*5971e316Smrg$R^+ \cap \identity \ne \emptyset$.
1372*5971e316SmrgIf $R$ is acyclic, then the inductive definition of
1373*5971e316Smrg\eqref{eq:transitive:inductive} is equivalent to its completion,
1374*5971e316Smrgi.e.,
1375*5971e316Smrg$$
1376*5971e316SmrgR^+ = R \cup \left(R \circ R^+\right)
1377*5971e316Smrg$$
1378*5971e316Smrgis a defining property.
1379*5971e316SmrgSince $T$ is known to be an overapproximation, we only need to check
1380*5971e316Smrgwhether
1381*5971e316Smrg$$
1382*5971e316SmrgT \subseteq R \cup \left(R \circ T\right)
1383*5971e316Smrg.
1384*5971e316Smrg$$
1385*5971e316SmrgThis is essentially Theorem~5 of \textcite{Kelly1996closure}.
1386*5971e316SmrgThe only difference is that they only consider lexicographically
1387*5971e316Smrgforward relations, a special case of acyclic relations.
1388*5971e316Smrg
1389*5971e316SmrgIf, on the other hand, $R$ is cyclic, then we have to resort
1390*5971e316Smrgto checking whether the approximation $K$ of the power is exact.
1391*5971e316SmrgNote that $T$ may be exact even if $K$ is not exact, so the check
1392*5971e316Smrgis sound, but incomplete.
1393*5971e316SmrgTo check exactness of the power, we simply need to check
1394*5971e316Smrg\eqref{eq:transitive:power}.  Since again $K$ is known
1395*5971e316Smrgto be an overapproximation, we only need to check whether
1396*5971e316Smrg$$
1397*5971e316Smrg\begin{aligned}
1398*5971e316SmrgK'|_{y_{d+1} - x_{d+1} = 1} & \subseteq R'
1399*5971e316Smrg\\
1400*5971e316SmrgK'|_{y_{d+1} - x_{d+1} \ge 2} & \subseteq R' \circ K'|_{y_{d+1} - x_{d+1} \ge 1}
1401*5971e316Smrg,
1402*5971e316Smrg\end{aligned}
1403*5971e316Smrg$$
1404*5971e316Smrgwhere $R' = \{\, \vec x' \to \vec y' \mid \vec x \to \vec y \in R
1405*5971e316Smrg\wedge y_{d+1} - x_{d+1} = 1\,\}$, i.e., $R$ extended with path
1406*5971e316Smrglengths equal to 1.
1407*5971e316Smrg
1408*5971e316SmrgAll that remains is to explain how to check the cyclicity of $R$.
1409*5971e316SmrgNote that the exactness on the power is always sound, even
1410*5971e316Smrgin the acyclic case, so we only need to be careful that we find
1411*5971e316Smrgall cyclic cases.  Now, if $R$ is cyclic, i.e.,
1412*5971e316Smrg$R^+ \cap \identity \ne \emptyset$, then, since $T$ is
1413*5971e316Smrgan overapproximation of $R^+$, also
1414*5971e316Smrg$T \cap \identity \ne \emptyset$.  This in turn means
1415*5971e316Smrgthat $\Delta \, K'$ contains a point whose first $d$ coordinates
1416*5971e316Smrgare zero and whose final coordinate is positive.
1417*5971e316SmrgIn the implementation we currently perform this test on $P'$ instead of $K'$.
1418*5971e316SmrgNote that if $R^+$ is acyclic and $T$ is not, then the approximation
1419*5971e316Smrgis clearly not exact and the approximation of the power $K$
1420*5971e316Smrgwill not be exact either.
1421*5971e316Smrg
1422*5971e316Smrg\subsection{Decomposing $R$ into strongly connected components}
1423*5971e316Smrg
1424*5971e316SmrgIf the input relation $R$ is a union of several basic relations
1425*5971e316Smrgthat can be partially ordered
1426*5971e316Smrgthen the accuracy of the approximation may be improved by computing
1427*5971e316Smrgan approximation of each strongly connected components separately.
1428*5971e316SmrgFor example, if $R = R_1 \cup R_2$ and $R_1 \circ R_2 = \emptyset$,
1429*5971e316Smrgthen we know that any path that passes through $R_2$ cannot later
1430*5971e316Smrgpass through $R_1$, i.e.,
1431*5971e316Smrg\begin{equation}
1432*5971e316Smrg\label{eq:transitive:components}
1433*5971e316SmrgR^+ = R_1^+ \cup R_2^+ \cup \left(R_2^+ \circ R_1^+\right)
1434*5971e316Smrg.
1435*5971e316Smrg\end{equation}
1436*5971e316SmrgWe can therefore compute (approximations of) transitive closures
1437*5971e316Smrgof $R_1$ and $R_2$ separately.
1438*5971e316SmrgNote, however, that the condition $R_1 \circ R_2 = \emptyset$
1439*5971e316Smrgis actually too strong.
1440*5971e316SmrgIf $R_1 \circ R_2$ is a subset of $R_2 \circ R_1$
1441*5971e316Smrgthen we can reorder the segments
1442*5971e316Smrgin any path that moves through both $R_1$ and $R_2$ to
1443*5971e316Smrgfirst move through $R_1$ and then through $R_2$.
1444*5971e316Smrg
1445*5971e316SmrgThis idea can be generalized to relations that are unions
1446*5971e316Smrgof more than two basic relations by constructing the
1447*5971e316Smrgstrongly connected components in the graph with as vertices
1448*5971e316Smrgthe basic relations and an edge between two basic relations
1449*5971e316Smrg$R_i$ and $R_j$ if $R_i$ needs to follow $R_j$ in some paths.
1450*5971e316SmrgThat is, there is an edge from $R_i$ to $R_j$ iff
1451*5971e316Smrg\begin{equation}
1452*5971e316Smrg\label{eq:transitive:edge}
1453*5971e316SmrgR_i \circ R_j
1454*5971e316Smrg\not\subseteq
1455*5971e316SmrgR_j \circ R_i
1456*5971e316Smrg.
1457*5971e316Smrg\end{equation}
1458*5971e316SmrgThe components can be obtained from the graph by applying
1459*5971e316SmrgTarjan's algorithm \parencite{Tarjan1972}.
1460*5971e316Smrg
1461*5971e316SmrgIn practice, we compute the (extended) powers $K_i'$ of each component
1462*5971e316Smrgseparately and then compose them as in \eqref{eq:transitive:decompose}.
1463*5971e316SmrgNote, however, that in this case the order in which we apply them is
1464*5971e316Smrgimportant and should correspond to a topological ordering of the
1465*5971e316Smrgstrongly connected components.  Simply applying Tarjan's
1466*5971e316Smrgalgorithm will produce topologically sorted strongly connected components.
1467*5971e316SmrgThe graph on which Tarjan's algorithm is applied is constructed on-the-fly.
1468*5971e316SmrgThat is, whenever the algorithm checks if there is an edge between
1469*5971e316Smrgtwo vertices, we evaluate \eqref{eq:transitive:edge}.
1470*5971e316SmrgThe exactness check is performed on each component separately.
1471*5971e316SmrgIf the approximation turns out to be inexact for any of the components,
1472*5971e316Smrgthen the entire result is marked inexact and the exactness check
1473*5971e316Smrgis skipped on the components that still need to be handled.
1474*5971e316Smrg
1475*5971e316SmrgIt should be noted that \eqref{eq:transitive:components}
1476*5971e316Smrgis only valid for exact transitive closures.
1477*5971e316SmrgIf overapproximations are computed in the right hand side, then the result will
1478*5971e316Smrgstill be an overapproximation of the left hand side, but this result
1479*5971e316Smrgmay not be transitively closed.  If we only separate components based
1480*5971e316Smrgon the condition $R_i \circ R_j = \emptyset$, then there is no problem,
1481*5971e316Smrgas this condition will still hold on the computed approximations
1482*5971e316Smrgof the transitive closures.  If, however, we have exploited
1483*5971e316Smrg\eqref{eq:transitive:edge} during the decomposition and if the
1484*5971e316Smrgresult turns out not to be exact, then we check whether
1485*5971e316Smrgthe result is transitively closed.  If not, we recompute
1486*5971e316Smrgthe transitive closure, skipping the decomposition.
1487*5971e316SmrgNote that testing for transitive closedness on the result may
1488*5971e316Smrgbe fairly expensive, so we may want to make this check
1489*5971e316Smrgconfigurable.
1490*5971e316Smrg
1491*5971e316Smrg\begin{figure}
1492*5971e316Smrg\begin{center}
1493*5971e316Smrg\begin{tikzpicture}[x=0.5cm,y=0.5cm,>=stealth,shorten >=1pt]
1494*5971e316Smrg\foreach \x in {1,...,10}{
1495*5971e316Smrg    \foreach \y in {1,...,10}{
1496*5971e316Smrg	\draw[->] (\x,\y) -- (\x,\y+1);
1497*5971e316Smrg    }
1498*5971e316Smrg}
1499*5971e316Smrg\foreach \x in {1,...,20}{
1500*5971e316Smrg    \foreach \y in {5,...,15}{
1501*5971e316Smrg	\draw[->] (\x,\y) -- (\x+1,\y);
1502*5971e316Smrg    }
1503*5971e316Smrg}
1504*5971e316Smrg\end{tikzpicture}
1505*5971e316Smrg\end{center}
1506*5971e316Smrg\caption{The relation from \autoref{ex:closure4}}
1507*5971e316Smrg\label{f:closure4}
1508*5971e316Smrg\end{figure}
1509*5971e316Smrg\begin{example}
1510*5971e316Smrg\label{ex:closure4}
1511*5971e316SmrgConsider the relation in example {\tt closure4} that comes with
1512*5971e316Smrgthe Omega calculator~\parencite{Omega_calc}, $R = R_1 \cup R_2$,
1513*5971e316Smrgwith
1514*5971e316Smrg$$
1515*5971e316Smrg\begin{aligned}
1516*5971e316SmrgR_1 & = \{\, (x,y) \to (x,y+1) \mid 1 \le x,y \le 10 \,\}
1517*5971e316Smrg\\
1518*5971e316SmrgR_2 & = \{\, (x,y) \to (x+1,y) \mid 1 \le x \le 20 \wedge 5 \le y \le 15 \,\}
1519*5971e316Smrg.
1520*5971e316Smrg\end{aligned}
1521*5971e316Smrg$$
1522*5971e316SmrgThis relation is shown graphically in \autoref{f:closure4}.
1523*5971e316SmrgWe have
1524*5971e316Smrg$$
1525*5971e316Smrg\begin{aligned}
1526*5971e316SmrgR_1 \circ R_2 &=
1527*5971e316Smrg\{\, (x,y) \to (x+1,y+1) \mid 1 \le x \le 9 \wedge 5 \le y \le 10 \,\}
1528*5971e316Smrg\\
1529*5971e316SmrgR_2 \circ R_1 &=
1530*5971e316Smrg\{\, (x,y) \to (x+1,y+1) \mid 1 \le x \le 10 \wedge 4 \le y \le 10 \,\}
1531*5971e316Smrg.
1532*5971e316Smrg\end{aligned}
1533*5971e316Smrg$$
1534*5971e316SmrgClearly, $R_1 \circ R_2 \subseteq R_2 \circ R_1$ and so
1535*5971e316Smrg$$
1536*5971e316Smrg\left(
1537*5971e316SmrgR_1 \cup R_2
1538*5971e316Smrg\right)^+
1539*5971e316Smrg=
1540*5971e316Smrg\left(R_2^+ \circ R_1^+\right)
1541*5971e316Smrg\cup R_1^+
1542*5971e316Smrg\cup R_2^+
1543*5971e316Smrg.
1544*5971e316Smrg$$
1545*5971e316Smrg\end{example}
1546*5971e316Smrg
1547*5971e316Smrg\begin{figure}
1548*5971e316Smrg\newcounter{n}
1549*5971e316Smrg\newcounter{t1}
1550*5971e316Smrg\newcounter{t2}
1551*5971e316Smrg\newcounter{t3}
1552*5971e316Smrg\newcounter{t4}
1553*5971e316Smrg\begin{center}
1554*5971e316Smrg\begin{tikzpicture}[>=stealth,shorten >=1pt]
1555*5971e316Smrg\setcounter{n}{7}
1556*5971e316Smrg\foreach \i in {1,...,\value{n}}{
1557*5971e316Smrg    \foreach \j in {1,...,\value{n}}{
1558*5971e316Smrg	\setcounter{t1}{2 * \j - 4 - \i + 1}
1559*5971e316Smrg	\setcounter{t2}{\value{n} - 3 - \i + 1}
1560*5971e316Smrg	\setcounter{t3}{2 * \i - 1 - \j + 1}
1561*5971e316Smrg	\setcounter{t4}{\value{n} - \j + 1}
1562*5971e316Smrg	\ifnum\value{t1}>0\ifnum\value{t2}>0
1563*5971e316Smrg	\ifnum\value{t3}>0\ifnum\value{t4}>0
1564*5971e316Smrg	    \draw[thick,->] (\i,\j) to[out=20] (\i+3,\j);
1565*5971e316Smrg	\fi\fi\fi\fi
1566*5971e316Smrg	\setcounter{t1}{2 * \j - 1 - \i + 1}
1567*5971e316Smrg	\setcounter{t2}{\value{n} - \i + 1}
1568*5971e316Smrg	\setcounter{t3}{2 * \i - 4 - \j + 1}
1569*5971e316Smrg	\setcounter{t4}{\value{n} - 3 - \j + 1}
1570*5971e316Smrg	\ifnum\value{t1}>0\ifnum\value{t2}>0
1571*5971e316Smrg	\ifnum\value{t3}>0\ifnum\value{t4}>0
1572*5971e316Smrg	    \draw[thick,->] (\i,\j) to[in=-20,out=20] (\i,\j+3);
1573*5971e316Smrg	\fi\fi\fi\fi
1574*5971e316Smrg	\setcounter{t1}{2 * \j - 1 - \i + 1}
1575*5971e316Smrg	\setcounter{t2}{\value{n} - 1 - \i + 1}
1576*5971e316Smrg	\setcounter{t3}{2 * \i - 1 - \j + 1}
1577*5971e316Smrg	\setcounter{t4}{\value{n} - 1 - \j + 1}
1578*5971e316Smrg	\ifnum\value{t1}>0\ifnum\value{t2}>0
1579*5971e316Smrg	\ifnum\value{t3}>0\ifnum\value{t4}>0
1580*5971e316Smrg	    \draw[thick,->] (\i,\j) to (\i+1,\j+1);
1581*5971e316Smrg	\fi\fi\fi\fi
1582*5971e316Smrg    }
1583*5971e316Smrg}
1584*5971e316Smrg\end{tikzpicture}
1585*5971e316Smrg\end{center}
1586*5971e316Smrg\caption{The relation from \autoref{ex:decomposition}}
1587*5971e316Smrg\label{f:decomposition}
1588*5971e316Smrg\end{figure}
1589*5971e316Smrg\begin{example}
1590*5971e316Smrg\label{ex:decomposition}
1591*5971e316SmrgConsider the relation on the right of \textcite[Figure~2]{Beletska2009},
1592*5971e316Smrgreproduced in \autoref{f:decomposition}.
1593*5971e316SmrgThe relation can be described as $R = R_1 \cup R_2 \cup R_3$,
1594*5971e316Smrgwith
1595*5971e316Smrg$$
1596*5971e316Smrg\begin{aligned}
1597*5971e316SmrgR_1 &= n \mapsto \{\, (i,j) \to (i+3,j) \mid
1598*5971e316Smrgi \le 2 j - 4 \wedge
1599*5971e316Smrgi \le n - 3 \wedge
1600*5971e316Smrgj \le 2 i - 1 \wedge
1601*5971e316Smrgj \le n \,\}
1602*5971e316Smrg\\
1603*5971e316SmrgR_2 &= n \mapsto \{\, (i,j) \to (i,j+3) \mid
1604*5971e316Smrgi \le 2 j - 1 \wedge
1605*5971e316Smrgi \le n \wedge
1606*5971e316Smrgj \le 2 i - 4 \wedge
1607*5971e316Smrgj \le n - 3 \,\}
1608*5971e316Smrg\\
1609*5971e316SmrgR_3 &= n \mapsto \{\, (i,j) \to (i+1,j+1) \mid
1610*5971e316Smrgi \le 2 j - 1 \wedge
1611*5971e316Smrgi \le n - 1 \wedge
1612*5971e316Smrgj \le 2 i - 1 \wedge
1613*5971e316Smrgj \le n - 1\,\}
1614*5971e316Smrg.
1615*5971e316Smrg\end{aligned}
1616*5971e316Smrg$$
1617*5971e316SmrgThe figure shows this relation for $n = 7$.
1618*5971e316SmrgBoth
1619*5971e316Smrg$R_3 \circ R_1 \subseteq R_1 \circ R_3$
1620*5971e316Smrgand
1621*5971e316Smrg$R_3 \circ R_2 \subseteq R_2 \circ R_3$,
1622*5971e316Smrgwhich the reader can verify using the {\tt iscc} calculator:
1623*5971e316Smrg\begin{verbatim}
1624*5971e316SmrgR1 := [n] -> { [i,j] -> [i+3,j] : i <= 2 j - 4 and i <= n - 3 and
1625*5971e316Smrg                                  j <= 2 i - 1 and j <= n };
1626*5971e316SmrgR2 := [n] -> { [i,j] -> [i,j+3] : i <= 2 j - 1 and i <= n and
1627*5971e316Smrg                                  j <= 2 i - 4 and j <= n - 3 };
1628*5971e316SmrgR3 := [n] -> { [i,j] -> [i+1,j+1] : i <= 2 j - 1 and i <= n - 1 and
1629*5971e316Smrg                                    j <= 2 i - 1 and j <= n - 1 };
1630*5971e316Smrg(R1 . R3) - (R3 . R1);
1631*5971e316Smrg(R2 . R3) - (R3 . R2);
1632*5971e316Smrg\end{verbatim}
1633*5971e316Smrg$R_3$ can therefore be moved forward in any path.
1634*5971e316SmrgFor the other two basic relations, we have both
1635*5971e316Smrg$R_2 \circ R_1 \not\subseteq R_1 \circ R_2$
1636*5971e316Smrgand
1637*5971e316Smrg$R_1 \circ R_2 \not\subseteq R_2 \circ R_1$
1638*5971e316Smrgand so $R_1$ and $R_2$ form a strongly connected component.
1639*5971e316SmrgBy computing the power of $R_3$ and $R_1 \cup R_2$ separately
1640*5971e316Smrgand composing the results, the power of $R$ can be computed exactly
1641*5971e316Smrgusing \eqref{eq:transitive:singleton}.
1642*5971e316SmrgAs explained by \textcite{Beletska2009}, applying the same formula
1643*5971e316Smrgto $R$ directly, without a decomposition, would result in
1644*5971e316Smrgan overapproximation of the power.
1645*5971e316Smrg\end{example}
1646*5971e316Smrg
1647*5971e316Smrg\subsection{Partitioning the domains and ranges of $R$}
1648*5971e316Smrg
1649*5971e316SmrgThe algorithm of \autoref{s:power} assumes that the input relation $R$
1650*5971e316Smrgcan be treated as a union of translations.
1651*5971e316SmrgThis is a reasonable assumption if $R$ maps elements of a given
1652*5971e316Smrgabstract domain to the same domain.
1653*5971e316SmrgHowever, if $R$ is a union of relations that map between different
1654*5971e316Smrgdomains, then this assumption no longer holds.
1655*5971e316SmrgIn particular, when an entire dependence graph is encoded
1656*5971e316Smrgin a single relation, as is done by, e.g.,
1657*5971e316Smrg\textcite[Section~6.1]{Barthou2000MSE}, then it does not make
1658*5971e316Smrgsense to look at differences between iterations of different domains.
1659*5971e316SmrgNow, arguably, a modified Floyd-Warshall algorithm should
1660*5971e316Smrgbe applied to the dependence graph, as advocated by
1661*5971e316Smrg\textcite{Kelly1996closure}, with the transitive closure operation
1662*5971e316Smrgonly being applied to relations from a given domain to itself.
1663*5971e316SmrgHowever, it is also possible to detect disjoint domains and ranges
1664*5971e316Smrgand to apply Floyd-Warshall internally.
1665*5971e316Smrg
1666*5971e316Smrg\LinesNumbered
1667*5971e316Smrg\begin{algorithm}
1668*5971e316Smrg\caption{The modified Floyd-Warshall algorithm of
1669*5971e316Smrg\protect\textcite{Kelly1996closure}}
1670*5971e316Smrg\label{a:Floyd}
1671*5971e316Smrg\SetKwInput{Input}{Input}
1672*5971e316Smrg\SetKwInput{Output}{Output}
1673*5971e316Smrg\Input{Relations $R_{pq}$, $0 \le p, q < n$}
1674*5971e316Smrg\Output{Updated relations $R_{pq}$ such that each relation
1675*5971e316Smrg$R_{pq}$ contains all indirect paths from $p$ to $q$ in the input graph}
1676*5971e316Smrg%
1677*5971e316Smrg\BlankLine
1678*5971e316Smrg\SetAlgoVlined
1679*5971e316Smrg\DontPrintSemicolon
1680*5971e316Smrg%
1681*5971e316Smrg\For{$r \in [0, n-1]$}{
1682*5971e316Smrg    $R_{rr} \coloneqq R_{rr}^+$ \nllabel{l:Floyd:closure}\;
1683*5971e316Smrg    \For{$p \in [0, n-1]$}{
1684*5971e316Smrg	\For{$q \in [0, n-1]$}{
1685*5971e316Smrg	    \If{$p \ne r$ or $q \ne r$}{
1686*5971e316Smrg		$R_{pq} \coloneqq R_{pq} \cup \left(R_{rq} \circ R_{pr}\right)
1687*5971e316Smrg			     \cup \left(R_{rq} \circ R_{rr} \circ R_{pr}\right)$
1688*5971e316Smrg	     \nllabel{l:Floyd:update}
1689*5971e316Smrg	     }
1690*5971e316Smrg	}
1691*5971e316Smrg    }
1692*5971e316Smrg}
1693*5971e316Smrg\end{algorithm}
1694*5971e316Smrg
1695*5971e316SmrgLet the input relation $R$ be a union of $m$ basic relations $R_i$.
1696*5971e316SmrgLet $D_{2i}$ be the domains of $R_i$ and $D_{2i+1}$ the ranges of $R_i$.
1697*5971e316SmrgThe first step is to group overlapping $D_j$ until a partition is
1698*5971e316Smrgobtained.  If the resulting partition consists of a single part,
1699*5971e316Smrgthen we continue with the algorithm of \autoref{s:power}.
1700*5971e316SmrgOtherwise, we apply Floyd-Warshall on the graph with as vertices
1701*5971e316Smrgthe parts of the partition and as edges the $R_i$ attached to
1702*5971e316Smrgthe appropriate pairs of vertices.
1703*5971e316SmrgIn particular, let there be $n$ parts $P_k$ in the partition.
1704*5971e316SmrgWe construct $n^2$ relations
1705*5971e316Smrg$$
1706*5971e316SmrgR_{pq} \coloneqq \bigcup_{i \text{ s.t. } \domain R_i \subseteq P_p \wedge
1707*5971e316Smrg				 \range R_i \subseteq P_q} R_i
1708*5971e316Smrg,
1709*5971e316Smrg$$
1710*5971e316Smrgapply \autoref{a:Floyd} and return the union of all resulting
1711*5971e316Smrg$R_{pq}$ as the transitive closure of $R$.
1712*5971e316SmrgEach iteration of the $r$-loop in \autoref{a:Floyd} updates
1713*5971e316Smrgall relations $R_{pq}$ to include paths that go from $p$ to $r$,
1714*5971e316Smrgpossibly stay there for a while, and then go from $r$ to $q$.
1715*5971e316SmrgNote that paths that ``stay in $r$'' include all paths that
1716*5971e316Smrgpass through earlier vertices since $R_{rr}$ itself has been updated
1717*5971e316Smrgaccordingly in previous iterations of the outer loop.
1718*5971e316SmrgIn principle, it would be sufficient to use the $R_{pr}$
1719*5971e316Smrgand $R_{rq}$ computed in the previous iteration of the
1720*5971e316Smrg$r$-loop in Line~\ref{l:Floyd:update}.
1721*5971e316SmrgHowever, from an implementation perspective, it is easier
1722*5971e316Smrgto allow either or both of these to have been updated
1723*5971e316Smrgin the same iteration of the $r$-loop.
1724*5971e316SmrgThis may result in duplicate paths, but these can usually
1725*5971e316Smrgbe removed by coalescing (\autoref{s:coalescing}) the result of the union
1726*5971e316Smrgin Line~\ref{l:Floyd:update}, which should be done in any case.
1727*5971e316SmrgThe transitive closure in Line~\ref{l:Floyd:closure}
1728*5971e316Smrgis performed using a recursive call.  This recursive call
1729*5971e316Smrgincludes the partitioning step, but the resulting partition will
1730*5971e316Smrgusually be a singleton.
1731*5971e316SmrgThe result of the recursive call will either be exact or an
1732*5971e316Smrgoverapproximation.  The final result of Floyd-Warshall is therefore
1733*5971e316Smrgalso exact or an overapproximation.
1734*5971e316Smrg
1735*5971e316Smrg\begin{figure}
1736*5971e316Smrg\begin{center}
1737*5971e316Smrg\begin{tikzpicture}[x=1cm,y=1cm,>=stealth,shorten >=3pt]
1738*5971e316Smrg\foreach \x/\y in {0/0,1/1,3/2} {
1739*5971e316Smrg    \fill (\x,\y) circle (2pt);
1740*5971e316Smrg}
1741*5971e316Smrg\foreach \x/\y in {0/1,2/2,3/3} {
1742*5971e316Smrg    \draw (\x,\y) circle (2pt);
1743*5971e316Smrg}
1744*5971e316Smrg\draw[->] (0,0) -- (0,1);
1745*5971e316Smrg\draw[->] (0,1) -- (1,1);
1746*5971e316Smrg\draw[->] (2,2) -- (3,2);
1747*5971e316Smrg\draw[->] (3,2) -- (3,3);
1748*5971e316Smrg\draw[->,dashed] (2,2) -- (3,3);
1749*5971e316Smrg\draw[->,dotted] (0,0) -- (1,1);
1750*5971e316Smrg\end{tikzpicture}
1751*5971e316Smrg\end{center}
1752*5971e316Smrg\caption{The relation (solid arrows) on the right of Figure~1 of
1753*5971e316Smrg\protect\textcite{Beletska2009} and its transitive closure}
1754*5971e316Smrg\label{f:COCOA:1}
1755*5971e316Smrg\end{figure}
1756*5971e316Smrg\begin{example}
1757*5971e316SmrgConsider the relation on the right of Figure~1 of
1758*5971e316Smrg\textcite{Beletska2009},
1759*5971e316Smrgreproduced in \autoref{f:COCOA:1}.
1760*5971e316SmrgThis relation can be described as
1761*5971e316Smrg$$
1762*5971e316Smrg\begin{aligned}
1763*5971e316Smrg\{\, (x, y) \to (x_2, y_2) \mid {} & (3y = 2x \wedge x_2 = x \wedge 3y_2 = 3 + 2x \wedge x \ge 0 \wedge x \le 3) \vee {} \\
1764*5971e316Smrg& (x_2 = 1 + x \wedge y_2 = y \wedge x \ge 0 \wedge 3y \ge 2 + 2x \wedge x \le 2 \wedge 3y \le 3 + 2x) \,\}
1765*5971e316Smrg.
1766*5971e316Smrg\end{aligned}
1767*5971e316Smrg$$
1768*5971e316SmrgNote that the domain of the upward relation overlaps with the range
1769*5971e316Smrgof the rightward relation and vice versa, but that the domain
1770*5971e316Smrgof neither relation overlaps with its own range or the domain of
1771*5971e316Smrgthe other relation.
1772*5971e316SmrgThe domains and ranges can therefore be partitioned into two parts,
1773*5971e316Smrg$P_0$ and $P_1$, shown as the white and black dots in \autoref{f:COCOA:1},
1774*5971e316Smrgrespectively.
1775*5971e316SmrgInitially, we have
1776*5971e316Smrg$$
1777*5971e316Smrg\begin{aligned}
1778*5971e316SmrgR_{00} & = \emptyset
1779*5971e316Smrg\\
1780*5971e316SmrgR_{01} & =
1781*5971e316Smrg\{\, (x, y) \to (x+1, y) \mid
1782*5971e316Smrg(x \ge 0 \wedge 3y \ge 2 + 2x \wedge x \le 2 \wedge 3y \le 3 + 2x) \,\}
1783*5971e316Smrg\\
1784*5971e316SmrgR_{10} & =
1785*5971e316Smrg\{\, (x, y) \to (x_2, y_2) \mid (3y = 2x \wedge x_2 = x \wedge 3y_2 = 3 + 2x \wedge x \ge 0 \wedge x \le 3) \,\}
1786*5971e316Smrg\\
1787*5971e316SmrgR_{11} & = \emptyset
1788*5971e316Smrg.
1789*5971e316Smrg\end{aligned}
1790*5971e316Smrg$$
1791*5971e316SmrgIn the first iteration, $R_{00}$ remains the same ($\emptyset^+ = \emptyset$).
1792*5971e316Smrg$R_{01}$ and $R_{10}$ are therefore also unaffected, but
1793*5971e316Smrg$R_{11}$ is updated to include $R_{01} \circ R_{10}$, i.e.,
1794*5971e316Smrgthe dashed arrow in the figure.
1795*5971e316SmrgThis new $R_{11}$ is obviously transitively closed, so it is not
1796*5971e316Smrgchanged in the second iteration and it does not have an effect
1797*5971e316Smrgon $R_{01}$ and $R_{10}$.  However, $R_{00}$ is updated to
1798*5971e316Smrginclude $R_{10} \circ R_{01}$, i.e., the dotted arrow in the figure.
1799*5971e316SmrgThe transitive closure of the original relation is then equal to
1800*5971e316Smrg$R_{00} \cup R_{01} \cup R_{10} \cup R_{11}$.
1801*5971e316Smrg\end{example}
1802*5971e316Smrg
1803*5971e316Smrg\subsection{Incremental Computation}
1804*5971e316Smrg\label{s:incremental}
1805*5971e316Smrg
1806*5971e316SmrgIn some cases it is possible and useful to compute the transitive closure
1807*5971e316Smrgof union of basic relations incrementally.  In particular,
1808*5971e316Smrgif $R$ is a union of $m$ basic maps,
1809*5971e316Smrg$$
1810*5971e316SmrgR = \bigcup_j R_j
1811*5971e316Smrg,
1812*5971e316Smrg$$
1813*5971e316Smrgthen we can pick some $R_i$ and compute the transitive closure of $R$ as
1814*5971e316Smrg\begin{equation}
1815*5971e316Smrg\label{eq:transitive:incremental}
1816*5971e316SmrgR^+ = R_i^+ \cup
1817*5971e316Smrg\left(
1818*5971e316Smrg\bigcup_{j \ne i}
1819*5971e316SmrgR_i^* \circ R_j \circ R_i^*
1820*5971e316Smrg\right)^+
1821*5971e316Smrg.
1822*5971e316Smrg\end{equation}
1823*5971e316SmrgFor this approach to be successful, it is crucial that each
1824*5971e316Smrgof the disjuncts in the argument of the second transitive
1825*5971e316Smrgclosure in \eqref{eq:transitive:incremental} be representable
1826*5971e316Smrgas a single basic relation, i.e., without a union.
1827*5971e316SmrgIf this condition holds, then by using \eqref{eq:transitive:incremental},
1828*5971e316Smrgthe number of disjuncts in the argument of the transitive closure
1829*5971e316Smrgcan be reduced by one.
1830*5971e316SmrgNow, $R_i^* = R_i^+ \cup \identity$, but in some cases it is possible
1831*5971e316Smrgto relax the constraints of $R_i^+$ to include part of the identity relation,
1832*5971e316Smrgsay on domain $D$.  We will use the notation
1833*5971e316Smrg${\cal C}(R_i,D) = R_i^+ \cup \identity_D$ to represent
1834*5971e316Smrgthis relaxed version of $R^+$.
1835*5971e316Smrg\textcite{Kelly1996closure} use the notation $R_i^?$.
1836*5971e316Smrg${\cal C}(R_i,D)$ can be computed by allowing $k$ to attain
1837*5971e316Smrgthe value $0$ in \eqref{eq:transitive:Q} and by using
1838*5971e316Smrg$$
1839*5971e316SmrgP \cap \left(D \to D\right)
1840*5971e316Smrg$$
1841*5971e316Smrginstead of \eqref{eq:transitive:approx}.
1842*5971e316SmrgTypically, $D$ will be a strict superset of both $\domain R_i$
1843*5971e316Smrgand $\range R_i$.  We therefore need to check that domain
1844*5971e316Smrgand range of the transitive closure are part of ${\cal C}(R_i,D)$,
1845*5971e316Smrgi.e., the part that results from the paths of positive length ($k \ge 1$),
1846*5971e316Smrgare equal to the domain and range of $R_i$.
1847*5971e316SmrgIf not, then the incremental approach cannot be applied for
1848*5971e316Smrgthe given choice of $R_i$ and $D$.
1849*5971e316Smrg
1850*5971e316SmrgIn order to be able to replace $R^*$ by ${\cal C}(R_i,D)$
1851*5971e316Smrgin \eqref{eq:transitive:incremental}, $D$ should be chosen
1852*5971e316Smrgto include both $\domain R$ and $\range R$, i.e., such
1853*5971e316Smrgthat $\identity_D \circ R_j \circ \identity_D = R_j$ for all $j\ne i$.
1854*5971e316Smrg\textcite{Kelly1996closure} say that they use
1855*5971e316Smrg$D = \domain R_i \cup \range R_i$, but presumably they mean that
1856*5971e316Smrgthey use $D = \domain R \cup \range R$.
1857*5971e316SmrgNow, this expression of $D$ contains a union, so it not directly usable.
1858*5971e316Smrg\textcite{Kelly1996closure} do not explain how they avoid this union.
1859*5971e316SmrgApparently, in their implementation,
1860*5971e316Smrgthey are using the convex hull of $\domain R \cup \range R$
1861*5971e316Smrgor at least an approximation of this convex hull.
1862*5971e316SmrgWe use the simple hull (\autoref{s:simple hull}) of $\domain R \cup \range R$.
1863*5971e316Smrg
1864*5971e316SmrgIt is also possible to use a domain $D$ that does {\em not\/}
1865*5971e316Smrginclude $\domain R \cup \range R$, but then we have to
1866*5971e316Smrgcompose with ${\cal C}(R_i,D)$ more selectively.
1867*5971e316SmrgIn particular, if we have
1868*5971e316Smrg\begin{equation}
1869*5971e316Smrg\label{eq:transitive:right}
1870*5971e316Smrg\text{for each $j \ne i$ either }
1871*5971e316Smrg\domain R_j \subseteq D \text{ or } \domain R_j \cap \range R_i = \emptyset
1872*5971e316Smrg\end{equation}
1873*5971e316Smrgand, similarly,
1874*5971e316Smrg\begin{equation}
1875*5971e316Smrg\label{eq:transitive:left}
1876*5971e316Smrg\text{for each $j \ne i$ either }
1877*5971e316Smrg\range R_j \subseteq D \text{ or } \range R_j \cap \domain R_i = \emptyset
1878*5971e316Smrg\end{equation}
1879*5971e316Smrgthen we can refine \eqref{eq:transitive:incremental} to
1880*5971e316Smrg$$
1881*5971e316SmrgR_i^+ \cup
1882*5971e316Smrg\left(
1883*5971e316Smrg\left(
1884*5971e316Smrg\bigcup_{\shortstack{$\scriptstyle\domain R_j \subseteq D $\\
1885*5971e316Smrg		     $\scriptstyle\range R_j \subseteq D$}}
1886*5971e316Smrg{\cal C} \circ R_j \circ {\cal C}
1887*5971e316Smrg\right)
1888*5971e316Smrg\cup
1889*5971e316Smrg\left(
1890*5971e316Smrg\bigcup_{\shortstack{$\scriptstyle\domain R_j \cap \range R_i = \emptyset$\\
1891*5971e316Smrg		     $\scriptstyle\range R_j \subseteq D$}}
1892*5971e316Smrg\!\!\!\!\!
1893*5971e316Smrg{\cal C} \circ R_j
1894*5971e316Smrg\right)
1895*5971e316Smrg\cup
1896*5971e316Smrg\left(
1897*5971e316Smrg\bigcup_{\shortstack{$\scriptstyle\domain R_j \subseteq D $\\
1898*5971e316Smrg		     $\scriptstyle\range R_j \cap \domain R_i = \emptyset$}}
1899*5971e316Smrg\!\!\!\!\!
1900*5971e316SmrgR_j \circ {\cal C}
1901*5971e316Smrg\right)
1902*5971e316Smrg\cup
1903*5971e316Smrg\left(
1904*5971e316Smrg\bigcup_{\shortstack{$\scriptstyle\domain R_j \cap \range R_i = \emptyset$\\
1905*5971e316Smrg		     $\scriptstyle\range R_j \cap \domain R_i = \emptyset$}}
1906*5971e316Smrg\!\!\!\!\!
1907*5971e316SmrgR_j
1908*5971e316Smrg\right)
1909*5971e316Smrg\right)^+
1910*5971e316Smrg.
1911*5971e316Smrg$$
1912*5971e316SmrgIf only property~\eqref{eq:transitive:right} holds,
1913*5971e316Smrgwe can use
1914*5971e316Smrg$$
1915*5971e316SmrgR_i^+ \cup
1916*5971e316Smrg\left(
1917*5971e316Smrg\left(
1918*5971e316SmrgR_i^+ \cup \identity
1919*5971e316Smrg\right)
1920*5971e316Smrg\circ
1921*5971e316Smrg\left(
1922*5971e316Smrg\left(
1923*5971e316Smrg\bigcup_{\shortstack{$\scriptstyle\domain R_j \subseteq D $}}
1924*5971e316SmrgR_j \circ {\cal C}
1925*5971e316Smrg\right)
1926*5971e316Smrg\cup
1927*5971e316Smrg\left(
1928*5971e316Smrg\bigcup_{\shortstack{$\scriptstyle\domain R_j \cap \range R_i = \emptyset$}}
1929*5971e316Smrg\!\!\!\!\!
1930*5971e316SmrgR_j
1931*5971e316Smrg\right)
1932*5971e316Smrg\right)^+
1933*5971e316Smrg\right)
1934*5971e316Smrg,
1935*5971e316Smrg$$
1936*5971e316Smrgwhile if only property~\eqref{eq:transitive:left} holds,
1937*5971e316Smrgwe can use
1938*5971e316Smrg$$
1939*5971e316SmrgR_i^+ \cup
1940*5971e316Smrg\left(
1941*5971e316Smrg\left(
1942*5971e316Smrg\left(
1943*5971e316Smrg\bigcup_{\shortstack{$\scriptstyle\range R_j \subseteq D $}}
1944*5971e316Smrg{\cal C} \circ R_j
1945*5971e316Smrg\right)
1946*5971e316Smrg\cup
1947*5971e316Smrg\left(
1948*5971e316Smrg\bigcup_{\shortstack{$\scriptstyle\range R_j \cap \domain R_i = \emptyset$}}
1949*5971e316Smrg\!\!\!\!\!
1950*5971e316SmrgR_j
1951*5971e316Smrg\right)
1952*5971e316Smrg\right)^+
1953*5971e316Smrg\circ
1954*5971e316Smrg\left(
1955*5971e316SmrgR_i^+ \cup \identity
1956*5971e316Smrg\right)
1957*5971e316Smrg\right)
1958*5971e316Smrg.
1959*5971e316Smrg$$
1960*5971e316Smrg
1961*5971e316SmrgIt should be noted that if we want the result of the incremental
1962*5971e316Smrgapproach to be transitively closed, then we can only apply it
1963*5971e316Smrgif all of the transitive closure operations involved are exact.
1964*5971e316SmrgIf, say, the second transitive closure in \eqref{eq:transitive:incremental}
1965*5971e316Smrgcontains extra elements, then the result does not necessarily contain
1966*5971e316Smrgthe composition of these extra elements with powers of $R_i$.
1967*5971e316Smrg
1968*5971e316Smrg\subsection{An {\tt Omega}-like implementation}
1969*5971e316Smrg
1970*5971e316SmrgWhile the main algorithm of \textcite{Kelly1996closure} is
1971*5971e316Smrgdesigned to compute and underapproximation of the transitive closure,
1972*5971e316Smrgthe authors mention that they could also compute overapproximations.
1973*5971e316SmrgIn this section, we describe our implementation of an algorithm
1974*5971e316Smrgthat is based on their ideas.
1975*5971e316SmrgNote that the {\tt Omega} library computes underapproximations
1976*5971e316Smrg\parencite[Section 6.4]{Omega_lib}.
1977*5971e316Smrg
1978*5971e316SmrgThe main tool is Equation~(2) of \textcite{Kelly1996closure}.
1979*5971e316SmrgThe input relation $R$ is first overapproximated by a ``d-form'' relation
1980*5971e316Smrg$$
1981*5971e316Smrg\{\, \vec i \to \vec j \mid \exists \vec \alpha :
1982*5971e316Smrg\vec L \le \vec j - \vec i \le \vec U
1983*5971e316Smrg\wedge
1984*5971e316Smrg(\forall p : j_p - i_p = M_p \alpha_p)
1985*5971e316Smrg\,\}
1986*5971e316Smrg,
1987*5971e316Smrg$$
1988*5971e316Smrgwhere $p$ ranges over the dimensions and $\vec L$, $\vec U$ and
1989*5971e316Smrg$\vec M$ are constant integer vectors.  The elements of $\vec U$
1990*5971e316Smrgmay be $\infty$, meaning that there is no upper bound corresponding
1991*5971e316Smrgto that element, and similarly for $\vec L$.
1992*5971e316SmrgSuch an overapproximation can be obtained by computing strides,
1993*5971e316Smrglower and upper bounds on the difference set $\Delta \, R$.
1994*5971e316SmrgThe transitive closure of such a ``d-form'' relation is
1995*5971e316Smrg\begin{equation}
1996*5971e316Smrg\label{eq:omega}
1997*5971e316Smrg\{\, \vec i \to \vec j \mid \exists \vec \alpha, k :
1998*5971e316Smrgk \ge 1 \wedge
1999*5971e316Smrgk \, \vec L \le \vec j - \vec i \le k \, \vec U
2000*5971e316Smrg\wedge
2001*5971e316Smrg(\forall p : j_p - i_p = M_p \alpha_p)
2002*5971e316Smrg\,\}
2003*5971e316Smrg.
2004*5971e316Smrg\end{equation}
2005*5971e316SmrgThe domain and range of this transitive closure are then
2006*5971e316Smrgintersected with those of the input relation.
2007*5971e316SmrgThis is a special case of the algorithm in \autoref{s:power}.
2008*5971e316Smrg
2009*5971e316SmrgIn their algorithm for computing lower bounds, the authors
2010*5971e316Smrguse the above algorithm as a substep on the disjuncts in the relation.
2011*5971e316SmrgAt the end, they say
2012*5971e316Smrg\begin{quote}
2013*5971e316SmrgIf an upper bound is required, it can be calculated in a manner
2014*5971e316Smrgsimilar to that of a single conjunct [sic] relation.
2015*5971e316Smrg\end{quote}
2016*5971e316SmrgPresumably, the authors mean that a ``d-form'' approximation
2017*5971e316Smrgof the whole input relation should be used.
2018*5971e316SmrgHowever, the accuracy can be improved by also trying to
2019*5971e316Smrgapply the incremental technique from the same paper,
2020*5971e316Smrgwhich is explained in more detail in \autoref{s:incremental}.
2021*5971e316SmrgIn this case, ${\cal C}(R_i,D)$ can be obtained by
2022*5971e316Smrgallowing the value zero for $k$ in \eqref{eq:omega},
2023*5971e316Smrgi.e., by computing
2024*5971e316Smrg$$
2025*5971e316Smrg\{\, \vec i \to \vec j \mid \exists \vec \alpha, k :
2026*5971e316Smrgk \ge 0 \wedge
2027*5971e316Smrgk \, \vec L \le \vec j - \vec i \le k \, \vec U
2028*5971e316Smrg\wedge
2029*5971e316Smrg(\forall p : j_p - i_p = M_p \alpha_p)
2030*5971e316Smrg\,\}
2031*5971e316Smrg.
2032*5971e316Smrg$$
2033*5971e316SmrgIn our implementation we take as $D$ the simple hull
2034*5971e316Smrg(\autoref{s:simple hull}) of $\domain R \cup \range R$.
2035*5971e316SmrgTo determine whether it is safe to use ${\cal C}(R_i,D)$,
2036*5971e316Smrgwe check the following conditions, as proposed by
2037*5971e316Smrg\textcite{Kelly1996closure}:
2038*5971e316Smrg${\cal C}(R_i,D) - R_i^+$ is not a union and for each $j \ne i$
2039*5971e316Smrgthe condition
2040*5971e316Smrg$$
2041*5971e316Smrg\left({\cal C}(R_i,D) - R_i^+\right)
2042*5971e316Smrg\circ
2043*5971e316SmrgR_j
2044*5971e316Smrg\circ
2045*5971e316Smrg\left({\cal C}(R_i,D) - R_i^+\right)
2046*5971e316Smrg=
2047*5971e316SmrgR_j
2048*5971e316Smrg$$
2049*5971e316Smrgholds.
2050