xref: /llvm-project/mlir/lib/Analysis/Presburger/Barvinok.cpp (revision 165f45354ae51bd00fe9000afbdcc4405e360b02)
1 //===- Barvinok.cpp - Barvinok's Algorithm ----------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "mlir/Analysis/Presburger/Barvinok.h"
10 #include "mlir/Analysis/Presburger/Utils.h"
11 #include "llvm/ADT/Sequence.h"
12 #include <algorithm>
13 
14 using namespace mlir;
15 using namespace presburger;
16 using namespace mlir::presburger::detail;
17 
18 /// Assuming that the input cone is pointed at the origin,
19 /// converts it to its dual in V-representation.
20 /// Essentially we just remove the all-zeroes constant column.
21 ConeV mlir::presburger::detail::getDual(ConeH cone) {
22   unsigned numIneq = cone.getNumInequalities();
23   unsigned numVar = cone.getNumCols() - 1;
24   ConeV dual(numIneq, numVar, 0, 0);
25   // Assuming that an inequality of the form
26   // a1*x1 + ... + an*xn + b ≥ 0
27   // is represented as a row [a1, ..., an, b]
28   // and that b = 0.
29 
30   for (auto i : llvm::seq<int>(0, numIneq)) {
31     assert(cone.atIneq(i, numVar) == 0 &&
32            "H-representation of cone is not centred at the origin!");
33     for (unsigned j = 0; j < numVar; ++j) {
34       dual.at(i, j) = cone.atIneq(i, j);
35     }
36   }
37 
38   // Now dual is of the form [ [a1, ..., an] , ... ]
39   // which is the V-representation of the dual.
40   return dual;
41 }
42 
43 /// Converts a cone in V-representation to the H-representation
44 /// of its dual, pointed at the origin (not at the original vertex).
45 /// Essentially adds a column consisting only of zeroes to the end.
46 ConeH mlir::presburger::detail::getDual(ConeV cone) {
47   unsigned rows = cone.getNumRows();
48   unsigned columns = cone.getNumColumns();
49   ConeH dual = defineHRep(columns);
50   // Add a new column (for constants) at the end.
51   // This will be initialized to zero.
52   cone.insertColumn(columns);
53 
54   for (unsigned i = 0; i < rows; ++i)
55     dual.addInequality(cone.getRow(i));
56 
57   // Now dual is of the form [ [a1, ..., an, 0] , ... ]
58   // which is the H-representation of the dual.
59   return dual;
60 }
61 
62 /// Find the index of a cone in V-representation.
63 DynamicAPInt mlir::presburger::detail::getIndex(const ConeV &cone) {
64   if (cone.getNumRows() > cone.getNumColumns())
65     return DynamicAPInt(0);
66 
67   return cone.determinant();
68 }
69 
70 /// Compute the generating function for a unimodular cone.
71 /// This consists of a single term of the form
72 /// sign * x^num / prod_j (1 - x^den_j)
73 ///
74 /// sign is either +1 or -1.
75 /// den_j is defined as the set of generators of the cone.
76 /// num is computed by expressing the vertex as a weighted
77 /// sum of the generators, and then taking the floor of the
78 /// coefficients.
79 GeneratingFunction
80 mlir::presburger::detail::computeUnimodularConeGeneratingFunction(
81     ParamPoint vertex, int sign, const ConeH &cone) {
82   // Consider a cone with H-representation [0  -1].
83   //                                       [-1 -2]
84   // Let the vertex be given by the matrix [ 2  2   0], with 2 params.
85   //                                       [-1 -1/2 1]
86 
87   // `cone` must be unimodular.
88   assert(abs(getIndex(getDual(cone))) == 1 && "input cone is not unimodular!");
89 
90   unsigned numVar = cone.getNumVars();
91   unsigned numIneq = cone.getNumInequalities();
92 
93   // Thus its ray matrix, U, is the inverse of the
94   // transpose of its inequality matrix, `cone`.
95   // The last column of the inequality matrix is null,
96   // so we remove it to obtain a square matrix.
97   FracMatrix transp = FracMatrix(cone.getInequalities()).transpose();
98   transp.removeRow(numVar);
99 
100   FracMatrix generators(numVar, numIneq);
101   transp.determinant(/*inverse=*/&generators); // This is the U-matrix.
102   // Thus the generators are given by U = [2  -1].
103   //                                      [-1  0]
104 
105   // The powers in the denominator of the generating
106   // function are given by the generators of the cone,
107   // i.e., the rows of the matrix U.
108   std::vector<Point> denominator(numIneq);
109   ArrayRef<Fraction> row;
110   for (auto i : llvm::seq<int>(0, numVar)) {
111     row = generators.getRow(i);
112     denominator[i] = Point(row);
113   }
114 
115   // The vertex is v \in Z^{d x (n+1)}
116   // We need to find affine functions of parameters λ_i(p)
117   // such that v = Σ λ_i(p)*u_i,
118   // where u_i are the rows of U (generators)
119   // The λ_i are given by the columns of Λ = v^T U^{-1}, and
120   // we have transp = U^{-1}.
121   // Then the exponent in the numerator will be
122   // Σ -floor(-λ_i(p))*u_i.
123   // Thus we store the (exponent of the) numerator as the affine function -Λ,
124   // since the generators u_i are already stored as the exponent of the
125   // denominator. Note that the outer -1 will have to be accounted for, as it is
126   // not stored. See end for an example.
127 
128   unsigned numColumns = vertex.getNumColumns();
129   unsigned numRows = vertex.getNumRows();
130   ParamPoint numerator(numColumns, numRows);
131   SmallVector<Fraction> ithCol(numRows);
132   for (auto i : llvm::seq<int>(0, numColumns)) {
133     for (auto j : llvm::seq<int>(0, numRows))
134       ithCol[j] = vertex(j, i);
135     numerator.setRow(i, transp.preMultiplyWithRow(ithCol));
136     numerator.negateRow(i);
137   }
138   // Therefore Λ will be given by [ 1    0 ] and the negation of this will be
139   //                              [ 1/2 -1 ]
140   //                              [ -1  -2 ]
141   // stored as the numerator.
142   // Algebraically, the numerator exponent is
143   // [ -2 ⌊ - N - M/2 + 1 ⌋ + 1 ⌊ 0 + M + 2 ⌋ ] -> first  COLUMN of U is [2, -1]
144   // [  1 ⌊ - N - M/2 + 1 ⌋ + 0 ⌊ 0 + M + 2 ⌋ ] -> second COLUMN of U is [-1, 0]
145 
146   return GeneratingFunction(numColumns - 1, SmallVector<int>(1, sign),
147                             std::vector({numerator}),
148                             std::vector({denominator}));
149 }
150 
151 /// We use Gaussian elimination to find the solution to a set of d equations
152 /// of the form
153 /// a_1 x_1 + ... + a_d x_d + b_1 m_1 + ... + b_p m_p + c = 0
154 /// where x_i are variables,
155 /// m_i are parameters and
156 /// a_i, b_i, c are rational coefficients.
157 ///
158 /// The solution expresses each x_i as an affine function of the m_i, and is
159 /// therefore represented as a matrix of size d x (p+1).
160 /// If there is no solution, we return null.
161 std::optional<ParamPoint>
162 mlir::presburger::detail::solveParametricEquations(FracMatrix equations) {
163   // equations is a d x (d + p + 1) matrix.
164   // Each row represents an equation.
165   unsigned d = equations.getNumRows();
166   unsigned numCols = equations.getNumColumns();
167 
168   // If the determinant is zero, there is no unique solution.
169   // Thus we return null.
170   if (FracMatrix(equations.getSubMatrix(/*fromRow=*/0, /*toRow=*/d - 1,
171                                         /*fromColumn=*/0,
172                                         /*toColumn=*/d - 1))
173           .determinant() == 0)
174     return std::nullopt;
175 
176   // Perform row operations to make each column all zeros except for the
177   // diagonal element, which is made to be one.
178   for (unsigned i = 0; i < d; ++i) {
179     // First ensure that the diagonal element is nonzero, by swapping
180     // it with a row that is non-zero at column i.
181     if (equations(i, i) != 0)
182       continue;
183     for (unsigned j = i + 1; j < d; ++j) {
184       if (equations(j, i) == 0)
185         continue;
186       equations.swapRows(j, i);
187       break;
188     }
189 
190     Fraction diagElement = equations(i, i);
191 
192     // Apply row operations to make all elements except the diagonal to zero.
193     for (unsigned j = 0; j < d; ++j) {
194       if (i == j)
195         continue;
196       if (equations(j, i) == 0)
197         continue;
198       // Apply row operations to make element (j, i) zero by subtracting the
199       // ith row, appropriately scaled.
200       Fraction currentElement = equations(j, i);
201       equations.addToRow(/*sourceRow=*/i, /*targetRow=*/j,
202                          /*scale=*/-currentElement / diagElement);
203     }
204   }
205 
206   // Rescale diagonal elements to 1.
207   for (unsigned i = 0; i < d; ++i)
208     equations.scaleRow(i, 1 / equations(i, i));
209 
210   // Now we have reduced the equations to the form
211   // x_i + b_1' m_1 + ... + b_p' m_p + c' = 0
212   // i.e. each variable appears exactly once in the system, and has coefficient
213   // one.
214   //
215   // Thus we have
216   // x_i = - b_1' m_1 - ... - b_p' m_p - c
217   // and so we return the negation of the last p + 1 columns of the matrix.
218   //
219   // We copy these columns and return them.
220   ParamPoint vertex =
221       equations.getSubMatrix(/*fromRow=*/0, /*toRow=*/d - 1,
222                              /*fromColumn=*/d, /*toColumn=*/numCols - 1);
223   vertex.negateMatrix();
224   return vertex;
225 }
226 
227 /// This is an implementation of the Clauss-Loechner algorithm for chamber
228 /// decomposition.
229 ///
230 /// We maintain a list of pairwise disjoint chambers and the generating
231 /// functions corresponding to each one. We iterate over the list of regions,
232 /// each time adding the current region's generating function to the chambers
233 /// where it is active and separating the chambers where it is not.
234 ///
235 /// Given the region each generating function is active in, for each subset of
236 /// generating functions the region that (the sum of) precisely this subset is
237 /// in, is the intersection of the regions that these are active in,
238 /// intersected with the complements of the remaining regions.
239 std::vector<std::pair<PresburgerSet, GeneratingFunction>>
240 mlir::presburger::detail::computeChamberDecomposition(
241     unsigned numSymbols, ArrayRef<std::pair<PresburgerSet, GeneratingFunction>>
242                              regionsAndGeneratingFunctions) {
243   assert(!regionsAndGeneratingFunctions.empty() &&
244          "there must be at least one chamber!");
245   // We maintain a list of regions and their associated generating function
246   // initialized with the universe and the empty generating function.
247   std::vector<std::pair<PresburgerSet, GeneratingFunction>> chambers = {
248       {PresburgerSet::getUniverse(PresburgerSpace::getSetSpace(numSymbols)),
249        GeneratingFunction(numSymbols, {}, {}, {})}};
250 
251   // We iterate over the region list.
252   //
253   // For each activity region R_j (corresponding to the generating function
254   // gf_j), we examine all the current chambers R_i.
255   //
256   // If R_j has a full-dimensional intersection with an existing chamber R_i,
257   // then that chamber is replaced by two new ones:
258   // 1. the intersection R_i \cap R_j, where the generating function is
259   // gf_i + gf_j.
260   // 2. the difference R_i - R_j, where the generating function is gf_i.
261   //
262   // At each step, we define a new chamber list after considering gf_j,
263   // replacing and appending chambers as discussed above.
264   //
265   // The loop has the invariant that the union over all the chambers gives the
266   // universe at every step.
267   for (const auto &[region, generatingFunction] :
268        regionsAndGeneratingFunctions) {
269     std::vector<std::pair<PresburgerSet, GeneratingFunction>> newChambers;
270 
271     for (const auto &[currentRegion, currentGeneratingFunction] : chambers) {
272       PresburgerSet intersection = currentRegion.intersect(region);
273 
274       // If the intersection is not full-dimensional, we do not modify
275       // the chamber list.
276       if (!intersection.isFullDim()) {
277         newChambers.emplace_back(currentRegion, currentGeneratingFunction);
278         continue;
279       }
280 
281       // If it is, we add the intersection and the difference as chambers.
282       newChambers.emplace_back(intersection,
283                                currentGeneratingFunction + generatingFunction);
284       newChambers.emplace_back(currentRegion.subtract(region),
285                                currentGeneratingFunction);
286     }
287     chambers = std::move(newChambers);
288   }
289 
290   return chambers;
291 }
292 
293 /// For a polytope expressed as a set of n inequalities, compute the generating
294 /// function corresponding to the lattice points included in the polytope. This
295 /// algorithm has three main steps:
296 /// 1. Enumerate the vertices, by iterating over subsets of inequalities and
297 ///    checking for satisfiability. For each d-subset of inequalities (where d
298 ///    is the number of variables), we solve to obtain the vertex in terms of
299 ///    the parameters, and then check for the region in parameter space where
300 ///    this vertex satisfies the remaining (n - d) inequalities.
301 /// 2. For each vertex, identify the tangent cone and compute the generating
302 ///    function corresponding to it. The generating function depends on the
303 ///    parametric expression of the vertex and the (non-parametric) generators
304 ///    of the tangent cone.
305 /// 3. [Clauss-Loechner decomposition] Identify the regions in parameter space
306 ///    (chambers) where each vertex is active, and accordingly compute the
307 ///    GF of the polytope in each chamber.
308 ///
309 /// Verdoolaege, Sven, et al. "Counting integer points in parametric
310 /// polytopes using Barvinok's rational functions." Algorithmica 48 (2007):
311 /// 37-66.
312 std::vector<std::pair<PresburgerSet, GeneratingFunction>>
313 mlir::presburger::detail::computePolytopeGeneratingFunction(
314     const PolyhedronH &poly) {
315   unsigned numVars = poly.getNumRangeVars();
316   unsigned numSymbols = poly.getNumSymbolVars();
317   unsigned numIneqs = poly.getNumInequalities();
318 
319   // We store a list of the computed vertices.
320   std::vector<ParamPoint> vertices;
321   // For each vertex, we store the corresponding active region and the
322   // generating functions of the tangent cone, in order.
323   std::vector<std::pair<PresburgerSet, GeneratingFunction>>
324       regionsAndGeneratingFunctions;
325 
326   // We iterate over all subsets of inequalities with cardinality numVars,
327   // using permutations of numVars 1's and (numIneqs - numVars) 0's.
328   //
329   // For a given permutation, we consider a subset which contains
330   // the i'th inequality if the i'th bit in the bitset is 1.
331   //
332   // We start with the permutation that takes the last numVars inequalities.
333   SmallVector<int> indicator(numIneqs);
334   for (unsigned i = numIneqs - numVars; i < numIneqs; ++i)
335     indicator[i] = 1;
336 
337   do {
338     // Collect the inequalities corresponding to the bits which are set
339     // and the remaining ones.
340     auto [subset, remainder] = poly.getInequalities().splitByBitset(indicator);
341     // All other inequalities are stored in a2 and b2c2.
342     //
343     // These are column-wise splits of the inequalities;
344     // a2 stores the coefficients of the variables, and
345     // b2c2 stores the coefficients of the parameters and the constant term.
346     FracMatrix a2(numIneqs - numVars, numVars);
347     FracMatrix b2c2(numIneqs - numVars, numSymbols + 1);
348     a2 = FracMatrix(
349         remainder.getSubMatrix(0, numIneqs - numVars - 1, 0, numVars - 1));
350     b2c2 = FracMatrix(remainder.getSubMatrix(0, numIneqs - numVars - 1, numVars,
351                                              numVars + numSymbols));
352 
353     // Find the vertex, if any, corresponding to the current subset of
354     // inequalities.
355     std::optional<ParamPoint> vertex =
356         solveParametricEquations(FracMatrix(subset)); // d x (p+1)
357 
358     if (!vertex)
359       continue;
360     if (llvm::is_contained(vertices, vertex))
361       continue;
362     // If this subset corresponds to a vertex that has not been considered,
363     // store it.
364     vertices.emplace_back(*vertex);
365 
366     // If a vertex is formed by the intersection of more than d facets, we
367     // assume that any d-subset of these facets can be solved to obtain its
368     // expression. This assumption is valid because, if the vertex has two
369     // distinct parametric expressions, then a nontrivial equality among the
370     // parameters holds, which is a contradiction as we know the parameter
371     // space to be full-dimensional.
372 
373     // Let the current vertex be [X | y], where
374     // X represents the coefficients of the parameters and
375     // y represents the constant term.
376     //
377     // The region (in parameter space) where this vertex is active is given
378     // by substituting the vertex into the *remaining* inequalities of the
379     // polytope (those which were not collected into `subset`), i.e., into the
380     // inequalities [A2 | B2 | c2].
381     //
382     // Thus, the coefficients of the parameters after substitution become
383     // (A2 • X + B2)
384     // and the constant terms become
385     // (A2 • y + c2).
386     //
387     // The region is therefore given by
388     // (A2 • X + B2) p + (A2 • y + c2) ≥ 0
389     //
390     // This is equivalent to A2 • [X | y] + [B2 | c2].
391     //
392     // Thus we premultiply [X | y] with each row of A2
393     // and add each row of [B2 | c2].
394     FracMatrix activeRegion(numIneqs - numVars, numSymbols + 1);
395     for (unsigned i = 0; i < numIneqs - numVars; i++) {
396       activeRegion.setRow(i, vertex->preMultiplyWithRow(a2.getRow(i)));
397       activeRegion.addToRow(i, b2c2.getRow(i), 1);
398     }
399 
400     // We convert the representation of the active region to an integers-only
401     // form so as to store it as a PresburgerSet.
402     IntegerPolyhedron activeRegionRel(
403         PresburgerSpace::getRelationSpace(0, numSymbols, 0, 0), activeRegion);
404 
405     // Now, we compute the generating function at this vertex.
406     // We collect the inequalities corresponding to each vertex to compute
407     // the tangent cone at that vertex.
408     //
409     // We only need the coefficients of the variables (NOT the parameters)
410     // as the generating function only depends on these.
411     // We translate the cones to be pointed at the origin by making the
412     // constant terms zero.
413     ConeH tangentCone = defineHRep(numVars);
414     for (unsigned j = 0, e = subset.getNumRows(); j < e; ++j) {
415       SmallVector<DynamicAPInt> ineq(numVars + 1);
416       for (unsigned k = 0; k < numVars; ++k)
417         ineq[k] = subset(j, k);
418       tangentCone.addInequality(ineq);
419     }
420     // We assume that the tangent cone is unimodular, so there is no need
421     // to decompose it.
422     //
423     // In the general case, the unimodular decomposition may have several
424     // cones.
425     GeneratingFunction vertexGf(numSymbols, {}, {}, {});
426     SmallVector<std::pair<int, ConeH>, 4> unimodCones = {{1, tangentCone}};
427     for (const std::pair<int, ConeH> &signedCone : unimodCones) {
428       auto [sign, cone] = signedCone;
429       vertexGf = vertexGf +
430                  computeUnimodularConeGeneratingFunction(*vertex, sign, cone);
431     }
432     // We store the vertex we computed with the generating function of its
433     // tangent cone.
434     regionsAndGeneratingFunctions.emplace_back(PresburgerSet(activeRegionRel),
435                                                vertexGf);
436   } while (std::next_permutation(indicator.begin(), indicator.end()));
437 
438   // Now, we use Clauss-Loechner decomposition to identify regions in parameter
439   // space where each vertex is active. These regions (chambers) have the
440   // property that no two of them have a full-dimensional intersection, i.e.,
441   // they may share "facets" or "edges", but their intersection can only have
442   // up to numVars - 1 dimensions.
443   //
444   // In each chamber, we sum up the generating functions of the active vertices
445   // to find the generating function of the polytope.
446   return computeChamberDecomposition(numSymbols, regionsAndGeneratingFunctions);
447 }
448 
449 /// We use an iterative procedure to find a vector not orthogonal
450 /// to a given set, ignoring the null vectors.
451 /// Let the inputs be {x_1, ..., x_k}, all vectors of length n.
452 ///
453 /// In the following,
454 /// vs[:i] means the elements of vs up to and including the i'th one,
455 /// <vs, us> means the dot product of vs and us,
456 /// vs ++ [v] means the vector vs with the new element v appended to it.
457 ///
458 /// We proceed iteratively; for steps d = 0, ... n-1, we construct a vector
459 /// which is not orthogonal to any of {x_1[:d], ..., x_n[:d]}, ignoring
460 /// the null vectors.
461 /// At step d = 0, we let vs = [1]. Clearly this is not orthogonal to
462 /// any vector in the set {x_1[0], ..., x_n[0]}, except the null ones,
463 /// which we ignore.
464 /// At step d > 0 , we need a number v
465 /// s.t. <x_i[:d], vs++[v]> != 0 for all i.
466 /// => <x_i[:d-1], vs> + x_i[d]*v != 0
467 /// => v != - <x_i[:d-1], vs> / x_i[d]
468 /// We compute this value for all x_i, and then
469 /// set v to be the maximum element of this set plus one. Thus
470 /// v is outside the set as desired, and we append it to vs
471 /// to obtain the result of the d'th step.
472 Point mlir::presburger::detail::getNonOrthogonalVector(
473     ArrayRef<Point> vectors) {
474   unsigned dim = vectors[0].size();
475   assert(llvm::all_of(
476              vectors,
477              [&dim](const Point &vector) { return vector.size() == dim; }) &&
478          "all vectors need to be the same size!");
479 
480   SmallVector<Fraction> newPoint = {Fraction(1, 1)};
481   Fraction maxDisallowedValue = -Fraction(1, 0),
482            disallowedValue = Fraction(0, 1);
483 
484   for (unsigned d = 1; d < dim; ++d) {
485     // Compute the disallowed values  - <x_i[:d-1], vs> / x_i[d] for each i.
486     maxDisallowedValue = -Fraction(1, 0);
487     for (const Point &vector : vectors) {
488       if (vector[d] == 0)
489         continue;
490       disallowedValue =
491           -dotProduct(ArrayRef(vector).slice(0, d), newPoint) / vector[d];
492 
493       // Find the biggest such value
494       maxDisallowedValue = std::max(maxDisallowedValue, disallowedValue);
495     }
496     newPoint.emplace_back(maxDisallowedValue + 1);
497   }
498   return newPoint;
499 }
500 
501 /// We use the following recursive formula to find the coefficient of
502 /// s^power in the rational function given by P(s)/Q(s).
503 ///
504 /// Let P[i] denote the coefficient of s^i in the polynomial P(s).
505 /// (P/Q)[r] =
506 /// if (r == 0) then
507 ///   P[0]/Q[0]
508 /// else
509 ///   (P[r] - {Σ_{i=1}^r (P/Q)[r-i] * Q[i])}/(Q[0])
510 /// We therefore recursively call `getCoefficientInRationalFunction` on
511 /// all i \in [0, power).
512 ///
513 /// https://math.ucdavis.edu/~deloera/researchsummary/
514 /// barvinokalgorithm-latte1.pdf, p. 1285
515 QuasiPolynomial mlir::presburger::detail::getCoefficientInRationalFunction(
516     unsigned power, ArrayRef<QuasiPolynomial> num, ArrayRef<Fraction> den) {
517   assert(!den.empty() && "division by empty denominator in rational function!");
518 
519   unsigned numParam = num[0].getNumInputs();
520   // We use the `isEqual` method of PresburgerSpace, which QuasiPolynomial
521   // inherits from.
522   assert(llvm::all_of(num,
523                       [&num](const QuasiPolynomial &qp) {
524                         return num[0].isEqual(qp);
525                       }) &&
526          "the quasipolynomials should all belong to the same space!");
527 
528   std::vector<QuasiPolynomial> coefficients;
529   coefficients.reserve(power + 1);
530 
531   coefficients.emplace_back(num[0] / den[0]);
532   for (unsigned i = 1; i <= power; ++i) {
533     // If the power is not there in the numerator, the coefficient is zero.
534     coefficients.emplace_back(i < num.size() ? num[i]
535                                              : QuasiPolynomial(numParam, 0));
536 
537     // After den.size(), the coefficients are zero, so we stop
538     // subtracting at that point (if it is less than i).
539     unsigned limit = std::min<unsigned long>(i, den.size() - 1);
540     for (unsigned j = 1; j <= limit; ++j)
541       coefficients[i] = coefficients[i] -
542                         coefficients[i - j] * QuasiPolynomial(numParam, den[j]);
543 
544     coefficients[i] = coefficients[i] / den[0];
545   }
546   return coefficients[power].simplify();
547 }
548 
549 /// Substitute x_i = t^μ_i in one term of a generating function, returning
550 /// a quasipolynomial which represents the exponent of the numerator
551 /// of the result, and a vector which represents the exponents of the
552 /// denominator of the result.
553 /// If the returned value is {num, dens}, it represents the function
554 /// t^num / \prod_j (1 - t^dens[j]).
555 /// v represents the affine functions whose floors are multiplied by the
556 /// generators, and ds represents the list of generators.
557 std::pair<QuasiPolynomial, std::vector<Fraction>>
558 substituteMuInTerm(unsigned numParams, const ParamPoint &v,
559                    const std::vector<Point> &ds, const Point &mu) {
560   unsigned numDims = mu.size();
561 #ifndef NDEBUG
562   for (const Point &d : ds)
563     assert(d.size() == numDims &&
564            "μ has to have the same number of dimensions as the generators!");
565 #endif
566 
567   // First, the exponent in the numerator becomes
568   // - (μ • u_1) * (floor(first col of v))
569   // - (μ • u_2) * (floor(second col of v)) - ...
570   // - (μ • u_d) * (floor(d'th col of v))
571   // So we store the negation of the dot products.
572 
573   // We have d terms, each of whose coefficient is the negative dot product.
574   SmallVector<Fraction> coefficients;
575   coefficients.reserve(numDims);
576   for (const Point &d : ds)
577     coefficients.emplace_back(-dotProduct(mu, d));
578 
579   // Then, the affine function is a single floor expression, given by the
580   // corresponding column of v.
581   ParamPoint vTranspose = v.transpose();
582   std::vector<std::vector<SmallVector<Fraction>>> affine;
583   affine.reserve(numDims);
584   for (unsigned j = 0; j < numDims; ++j)
585     affine.push_back({SmallVector<Fraction>{vTranspose.getRow(j)}});
586 
587   QuasiPolynomial num(numParams, coefficients, affine);
588   num = num.simplify();
589 
590   std::vector<Fraction> dens;
591   dens.reserve(ds.size());
592   // Similarly, each term in the denominator has exponent
593   // given by the dot product of μ with u_i.
594   for (const Point &d : ds) {
595     // This term in the denominator is
596     // (1 - t^dens.back())
597     dens.emplace_back(dotProduct(d, mu));
598   }
599 
600   return {num, dens};
601 }
602 
603 /// Normalize all denominator exponents `dens` to their absolute values
604 /// by multiplying and dividing by the inverses, in a function of the form
605 /// sign * t^num / prod_j (1 - t^dens[j]).
606 /// Here, sign = ± 1,
607 /// num is a QuasiPolynomial, and
608 /// each dens[j] is a Fraction.
609 void normalizeDenominatorExponents(int &sign, QuasiPolynomial &num,
610                                    std::vector<Fraction> &dens) {
611   // We track the number of exponents that are negative in the
612   // denominator, and convert them to their absolute values.
613   unsigned numNegExps = 0;
614   Fraction sumNegExps(0, 1);
615   for (const auto &den : dens) {
616     if (den < 0) {
617       numNegExps += 1;
618       sumNegExps += den;
619     }
620   }
621 
622   // If we have (1 - t^-c) in the denominator, for positive c,
623   // multiply and divide by t^c.
624   // We convert all negative-exponent terms at once; therefore
625   // we multiply and divide by t^sumNegExps.
626   // Then we get
627   // -(1 - t^c) in the denominator,
628   // increase the numerator by c, and
629   // flip the sign of the function.
630   if (numNegExps % 2 == 1)
631     sign = -sign;
632   num = num - QuasiPolynomial(num.getNumInputs(), sumNegExps);
633 }
634 
635 /// Compute the binomial coefficients nCi for 0 ≤ i ≤ r,
636 /// where n is a QuasiPolynomial.
637 std::vector<QuasiPolynomial> getBinomialCoefficients(const QuasiPolynomial &n,
638                                                      unsigned r) {
639   unsigned numParams = n.getNumInputs();
640   std::vector<QuasiPolynomial> coefficients;
641   coefficients.reserve(r + 1);
642   coefficients.emplace_back(numParams, 1);
643   for (unsigned j = 1; j <= r; ++j)
644     // We use the recursive formula for binomial coefficients here and below.
645     coefficients.emplace_back(
646         (coefficients[j - 1] * (n - QuasiPolynomial(numParams, j - 1)) /
647          Fraction(j, 1))
648             .simplify());
649   return coefficients;
650 }
651 
652 /// Compute the binomial coefficients nCi for 0 ≤ i ≤ r,
653 /// where n is a QuasiPolynomial.
654 std::vector<Fraction> getBinomialCoefficients(const Fraction &n,
655                                               const Fraction &r) {
656   std::vector<Fraction> coefficients;
657   coefficients.reserve((int64_t)floor(r));
658   coefficients.emplace_back(1);
659   for (unsigned j = 1; j <= r; ++j)
660     coefficients.emplace_back(coefficients[j - 1] * (n - (j - 1)) / (j));
661   return coefficients;
662 }
663 
664 /// We have a generating function of the form
665 /// f_p(x) = \sum_i sign_i * (x^n_i(p)) / (\prod_j (1 - x^d_{ij})
666 ///
667 /// where sign_i is ±1,
668 /// n_i \in Q^p -> Q^d is the sum of the vectors d_{ij}, weighted by the
669 /// floors of d affine functions on p parameters.
670 /// d_{ij} \in Q^d are vectors.
671 ///
672 /// We need to find the number of terms of the form x^t in the expansion of
673 /// this function.
674 /// However, direct substitution (x = (1, ..., 1)) causes the denominator
675 /// to become zero.
676 ///
677 /// We therefore use the following procedure instead:
678 /// 1. Substitute x_i = (s+1)^μ_i for some vector μ. This makes the generating
679 /// function a function of a scalar s.
680 /// 2. Write each term in this function as P(s)/Q(s), where P and Q are
681 /// polynomials. P has coefficients as quasipolynomials in d parameters, while
682 /// Q has coefficients as scalars.
683 /// 3. Find the constant term in the expansion of each term P(s)/Q(s). This is
684 /// equivalent to substituting s = 0.
685 ///
686 /// Verdoolaege, Sven, et al. "Counting integer points in parametric
687 /// polytopes using Barvinok's rational functions." Algorithmica 48 (2007):
688 /// 37-66.
689 QuasiPolynomial
690 mlir::presburger::detail::computeNumTerms(const GeneratingFunction &gf) {
691   // Step (1) We need to find a μ such that we can substitute x_i =
692   // (s+1)^μ_i. After this substitution, the exponent of (s+1) in the
693   // denominator is (μ_i • d_{ij}) in each term. Clearly, this cannot become
694   // zero. Hence we find a vector μ that is not orthogonal to any of the
695   // d_{ij} and substitute x accordingly.
696   std::vector<Point> allDenominators;
697   for (ArrayRef<Point> den : gf.getDenominators())
698     allDenominators.insert(allDenominators.end(), den.begin(), den.end());
699   Point mu = getNonOrthogonalVector(allDenominators);
700 
701   unsigned numParams = gf.getNumParams();
702   const std::vector<std::vector<Point>> &ds = gf.getDenominators();
703   QuasiPolynomial totalTerm(numParams, 0);
704   for (unsigned i = 0, e = ds.size(); i < e; ++i) {
705     int sign = gf.getSigns()[i];
706 
707     // Compute the new exponents of (s+1) for the numerator and the
708     // denominator after substituting μ.
709     auto [numExp, dens] =
710         substituteMuInTerm(numParams, gf.getNumerators()[i], ds[i], mu);
711     // Now the numerator is (s+1)^numExp
712     // and the denominator is \prod_j (1 - (s+1)^dens[j]).
713 
714     // Step (2) We need to express the terms in the function as quotients of
715     // polynomials. Each term is now of the form
716     // sign_i * (s+1)^numExp / (\prod_j (1 - (s+1)^dens[j]))
717     // For the i'th term, we first normalize the denominator to have only
718     // positive exponents. We convert all the dens[j] to their
719     // absolute values and change the sign and exponent in the numerator.
720     normalizeDenominatorExponents(sign, numExp, dens);
721 
722     // Then, using the formula for geometric series, we replace each (1 -
723     // (s+1)^(dens[j])) with
724     // (-s)(\sum_{0 ≤ k < dens[j]} (s+1)^k).
725     for (auto &j : dens)
726       j = abs(j) - 1;
727     // Note that at this point, the semantics of `dens[j]` changes to mean
728     // a term (\sum_{0 ≤ k ≤ dens[j]} (s+1)^k). The denominator is, as before,
729     // a product of these terms.
730 
731     // Since the -s are taken out, the sign changes if there is an odd number
732     // of such terms.
733     unsigned r = dens.size();
734     if (dens.size() % 2 == 1)
735       sign = -sign;
736 
737     // Thus the term overall now has the form
738     // sign'_i * (s+1)^numExp /
739     // (s^r * \prod_j (\sum_{0 ≤ k < dens[j]} (s+1)^k)).
740     // This means that
741     // the numerator is a polynomial in s, with coefficients as
742     // quasipolynomials (given by binomial coefficients), and the denominator
743     // is a polynomial in s, with integral coefficients (given by taking the
744     // convolution over all j).
745 
746     // Step (3) We need to find the constant term in the expansion of each
747     // term. Since each term has s^r as a factor in the denominator, we avoid
748     // substituting s = 0 directly; instead, we find the coefficient of s^r in
749     // sign'_i * (s+1)^numExp / (\prod_j (\sum_k (s+1)^k)),
750     // Letting P(s) = (s+1)^numExp and Q(s) = \prod_j (...),
751     // we need to find the coefficient of s^r in P(s)/Q(s),
752     // for which we use the `getCoefficientInRationalFunction()` function.
753 
754     // First, we compute the coefficients of P(s), which are binomial
755     // coefficients.
756     // We only need the first r+1 of these, as higher-order terms do not
757     // contribute to the coefficient of s^r.
758     std::vector<QuasiPolynomial> numeratorCoefficients =
759         getBinomialCoefficients(numExp, r);
760 
761     // Then we compute the coefficients of each individual term in Q(s),
762     // which are (dens[i]+1) C (k+1) for 0 ≤ k ≤ dens[i].
763     std::vector<std::vector<Fraction>> eachTermDenCoefficients;
764     std::vector<Fraction> singleTermDenCoefficients;
765     eachTermDenCoefficients.reserve(r);
766     for (const Fraction &den : dens) {
767       singleTermDenCoefficients = getBinomialCoefficients(den + 1, den + 1);
768       eachTermDenCoefficients.emplace_back(
769           ArrayRef<Fraction>(singleTermDenCoefficients).drop_front());
770     }
771 
772     // Now we find the coefficients in Q(s) itself
773     // by taking the convolution of the coefficients
774     // of all the terms.
775     std::vector<Fraction> denominatorCoefficients;
776     denominatorCoefficients = eachTermDenCoefficients[0];
777     for (unsigned j = 1, e = eachTermDenCoefficients.size(); j < e; ++j)
778       denominatorCoefficients = multiplyPolynomials(denominatorCoefficients,
779                                                     eachTermDenCoefficients[j]);
780 
781     totalTerm =
782         totalTerm + getCoefficientInRationalFunction(r, numeratorCoefficients,
783                                                      denominatorCoefficients) *
784                         QuasiPolynomial(numParams, sign);
785   }
786 
787   return totalTerm.simplify();
788 }
789