xref: /llvm-project/mlir/lib/ExecutionEngine/SparseTensor/File.cpp (revision 83839700c32996c58ddebc0c74e3dc4970e005bc)
1 //===- File.cpp - Parsing sparse tensors from files -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements parsing and printing of files in one of the
10 // following external formats:
11 //
12 // (1) Matrix Market Exchange (MME): *.mtx
13 //     https://math.nist.gov/MatrixMarket/formats.html
14 //
15 // (2) Formidable Repository of Open Sparse Tensors and Tools (FROSTT): *.tns
16 //     http://frostt.io/tensors/file-formats.html
17 //
18 // This file is part of the lightweight runtime support library for sparse
19 // tensor manipulations.  The functionality of the support library is meant
20 // to simplify benchmarking, testing, and debugging MLIR code operating on
21 // sparse tensors.  However, the provided functionality is **not** part of
22 // core MLIR itself.
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #include "mlir/ExecutionEngine/SparseTensor/File.h"
27 #include "llvm/Support/ErrorHandling.h"
28 
29 #include <cctype>
30 #include <cstring>
31 
32 using namespace mlir::sparse_tensor;
33 
34 /// Opens the file for reading.
35 void SparseTensorFile::openFile() {
36   if (file)
37     MLIR_SPARSETENSOR_FATAL("Already opened file %s\n", filename);
38   file = fopen(filename, "r");
39   if (!file)
40     MLIR_SPARSETENSOR_FATAL("Cannot find file %s\n", filename);
41 }
42 
43 /// Closes the file.
44 void SparseTensorFile::closeFile() {
45   if (file) {
46     fclose(file);
47     file = nullptr;
48   }
49 }
50 
51 // TODO(wrengr/bixia): figure out how to reorganize the element-parsing
52 // loop of `openSparseTensorCOO` into methods of this class, so we can
53 // avoid leaking access to the `line` pointer (both for general hygiene
54 // and because we can't mark it const due to the second argument of
55 // `strtoul`/`strtoud` being `char * *restrict` rather than
56 // `char const* *restrict`).
57 //
58 /// Attempts to read a line from the file.
59 char *SparseTensorFile::readLine() {
60   if (fgets(line, kColWidth, file))
61     return line;
62   MLIR_SPARSETENSOR_FATAL("Cannot read next line of %s\n", filename);
63 }
64 
65 /// Reads and parses the file's header.
66 void SparseTensorFile::readHeader() {
67   assert(file && "Attempt to readHeader() before openFile()");
68   if (strstr(filename, ".mtx"))
69     readMMEHeader();
70   else if (strstr(filename, ".tns"))
71     readExtFROSTTHeader();
72   else
73     MLIR_SPARSETENSOR_FATAL("Unknown format %s\n", filename);
74   assert(isValid() && "Failed to read the header");
75 }
76 
77 /// Asserts the shape subsumes the actual dimension sizes.  Is only
78 /// valid after parsing the header.
79 void SparseTensorFile::assertMatchesShape(uint64_t rank,
80                                           const uint64_t *shape) const {
81   assert(rank == getRank() && "Rank mismatch");
82   for (uint64_t r = 0; r < rank; ++r)
83     assert((shape[r] == 0 || shape[r] == idata[2 + r]) &&
84            "Dimension size mismatch");
85 }
86 
87 bool SparseTensorFile::canReadAs(PrimaryType valTy) const {
88   switch (valueKind_) {
89   case ValueKind::kInvalid:
90     assert(false && "Must readHeader() before calling canReadAs()");
91     return false; // In case assertions are disabled.
92   case ValueKind::kPattern:
93     return true;
94   case ValueKind::kInteger:
95     // When the file is specified to store integer values, we still
96     // allow implicitly converting those to floating primary-types.
97     return isRealPrimaryType(valTy);
98   case ValueKind::kReal:
99     // When the file is specified to store real/floating values, then
100     // we disallow implicit conversion to integer primary-types.
101     return isFloatingPrimaryType(valTy);
102   case ValueKind::kComplex:
103     // When the file is specified to store complex values, then we
104     // require a complex primary-type.
105     return isComplexPrimaryType(valTy);
106   case ValueKind::kUndefined:
107     // The "extended" FROSTT format doesn't specify a ValueKind.
108     // So we allow implicitly converting the stored values to both
109     // integer and floating primary-types.
110     return isRealPrimaryType(valTy);
111   }
112   llvm_unreachable("unknown ValueKind");
113 }
114 
115 /// Helper to convert C-style strings (i.e., '\0' terminated) to lower case.
116 static inline void toLower(char *token) {
117   for (char *c = token; *c; ++c)
118     *c = tolower(*c);
119 }
120 
121 /// Idiomatic name for checking string equality.
122 static inline bool streq(const char *lhs, const char *rhs) {
123   return strcmp(lhs, rhs) == 0;
124 }
125 
126 /// Idiomatic name for checking string inequality.
127 static inline bool strne(const char *lhs, const char *rhs) {
128   return strcmp(lhs, rhs); // aka `!= 0`
129 }
130 
131 /// Read the MME header of a general sparse matrix of type real.
132 void SparseTensorFile::readMMEHeader() {
133   char header[64];
134   char object[64];
135   char format[64];
136   char field[64];
137   char symmetry[64];
138   // Read header line.
139   if (fscanf(file, "%63s %63s %63s %63s %63s\n", header, object, format, field,
140              symmetry) != 5)
141     MLIR_SPARSETENSOR_FATAL("Corrupt header in %s\n", filename);
142   // Convert all to lowercase up front (to avoid accidental redundancy).
143   toLower(header);
144   toLower(object);
145   toLower(format);
146   toLower(field);
147   toLower(symmetry);
148   // Process `field`, which specify pattern or the data type of the values.
149   if (streq(field, "pattern"))
150     valueKind_ = ValueKind::kPattern;
151   else if (streq(field, "real"))
152     valueKind_ = ValueKind::kReal;
153   else if (streq(field, "integer"))
154     valueKind_ = ValueKind::kInteger;
155   else if (streq(field, "complex"))
156     valueKind_ = ValueKind::kComplex;
157   else
158     MLIR_SPARSETENSOR_FATAL("Unexpected header field value in %s\n", filename);
159   // Set properties.
160   isSymmetric_ = streq(symmetry, "symmetric");
161   // Make sure this is a general sparse matrix.
162   if (strne(header, "%%matrixmarket") || strne(object, "matrix") ||
163       strne(format, "coordinate") ||
164       (strne(symmetry, "general") && !isSymmetric_))
165     MLIR_SPARSETENSOR_FATAL("Cannot find a general sparse matrix in %s\n",
166                             filename);
167   // Skip comments.
168   while (true) {
169     readLine();
170     if (line[0] != '%')
171       break;
172   }
173   // Next line contains M N NNZ.
174   idata[0] = 2; // rank
175   if (sscanf(line, "%" PRIu64 "%" PRIu64 "%" PRIu64 "\n", idata + 2, idata + 3,
176              idata + 1) != 3)
177     MLIR_SPARSETENSOR_FATAL("Cannot find size in %s\n", filename);
178 }
179 
180 /// Read the "extended" FROSTT header. Although not part of the documented
181 /// format, we assume that the file starts with optional comments followed
182 /// by two lines that define the rank, the number of nonzeros, and the
183 /// dimensions sizes (one per rank) of the sparse tensor.
184 void SparseTensorFile::readExtFROSTTHeader() {
185   // Skip comments.
186   while (true) {
187     readLine();
188     if (line[0] != '#')
189       break;
190   }
191   // Next line contains RANK and NNZ.
192   if (sscanf(line, "%" PRIu64 "%" PRIu64 "\n", idata, idata + 1) != 2)
193     MLIR_SPARSETENSOR_FATAL("Cannot find metadata in %s\n", filename);
194   // Followed by a line with the dimension sizes (one per rank).
195   for (uint64_t r = 0; r < idata[0]; ++r)
196     if (fscanf(file, "%" PRIu64, idata + 2 + r) != 1)
197       MLIR_SPARSETENSOR_FATAL("Cannot find dimension size %s\n", filename);
198   readLine(); // end of line
199   // The FROSTT format does not define the data type of the nonzero elements.
200   valueKind_ = ValueKind::kUndefined;
201 }
202