1 //===- File.cpp - Parsing sparse tensors from files -----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements parsing and printing of files in one of the 10 // following external formats: 11 // 12 // (1) Matrix Market Exchange (MME): *.mtx 13 // https://math.nist.gov/MatrixMarket/formats.html 14 // 15 // (2) Formidable Repository of Open Sparse Tensors and Tools (FROSTT): *.tns 16 // http://frostt.io/tensors/file-formats.html 17 // 18 // This file is part of the lightweight runtime support library for sparse 19 // tensor manipulations. The functionality of the support library is meant 20 // to simplify benchmarking, testing, and debugging MLIR code operating on 21 // sparse tensors. However, the provided functionality is **not** part of 22 // core MLIR itself. 23 // 24 //===----------------------------------------------------------------------===// 25 26 #include "mlir/ExecutionEngine/SparseTensor/File.h" 27 #include "llvm/Support/ErrorHandling.h" 28 29 #include <cctype> 30 #include <cstring> 31 32 using namespace mlir::sparse_tensor; 33 34 /// Opens the file for reading. 35 void SparseTensorFile::openFile() { 36 if (file) 37 MLIR_SPARSETENSOR_FATAL("Already opened file %s\n", filename); 38 file = fopen(filename, "r"); 39 if (!file) 40 MLIR_SPARSETENSOR_FATAL("Cannot find file %s\n", filename); 41 } 42 43 /// Closes the file. 44 void SparseTensorFile::closeFile() { 45 if (file) { 46 fclose(file); 47 file = nullptr; 48 } 49 } 50 51 // TODO(wrengr/bixia): figure out how to reorganize the element-parsing 52 // loop of `openSparseTensorCOO` into methods of this class, so we can 53 // avoid leaking access to the `line` pointer (both for general hygiene 54 // and because we can't mark it const due to the second argument of 55 // `strtoul`/`strtoud` being `char * *restrict` rather than 56 // `char const* *restrict`). 57 // 58 /// Attempts to read a line from the file. 59 char *SparseTensorFile::readLine() { 60 if (fgets(line, kColWidth, file)) 61 return line; 62 MLIR_SPARSETENSOR_FATAL("Cannot read next line of %s\n", filename); 63 } 64 65 /// Reads and parses the file's header. 66 void SparseTensorFile::readHeader() { 67 assert(file && "Attempt to readHeader() before openFile()"); 68 if (strstr(filename, ".mtx")) 69 readMMEHeader(); 70 else if (strstr(filename, ".tns")) 71 readExtFROSTTHeader(); 72 else 73 MLIR_SPARSETENSOR_FATAL("Unknown format %s\n", filename); 74 assert(isValid() && "Failed to read the header"); 75 } 76 77 /// Asserts the shape subsumes the actual dimension sizes. Is only 78 /// valid after parsing the header. 79 void SparseTensorFile::assertMatchesShape(uint64_t rank, 80 const uint64_t *shape) const { 81 assert(rank == getRank() && "Rank mismatch"); 82 for (uint64_t r = 0; r < rank; ++r) 83 assert((shape[r] == 0 || shape[r] == idata[2 + r]) && 84 "Dimension size mismatch"); 85 } 86 87 bool SparseTensorFile::canReadAs(PrimaryType valTy) const { 88 switch (valueKind_) { 89 case ValueKind::kInvalid: 90 assert(false && "Must readHeader() before calling canReadAs()"); 91 return false; // In case assertions are disabled. 92 case ValueKind::kPattern: 93 return true; 94 case ValueKind::kInteger: 95 // When the file is specified to store integer values, we still 96 // allow implicitly converting those to floating primary-types. 97 return isRealPrimaryType(valTy); 98 case ValueKind::kReal: 99 // When the file is specified to store real/floating values, then 100 // we disallow implicit conversion to integer primary-types. 101 return isFloatingPrimaryType(valTy); 102 case ValueKind::kComplex: 103 // When the file is specified to store complex values, then we 104 // require a complex primary-type. 105 return isComplexPrimaryType(valTy); 106 case ValueKind::kUndefined: 107 // The "extended" FROSTT format doesn't specify a ValueKind. 108 // So we allow implicitly converting the stored values to both 109 // integer and floating primary-types. 110 return isRealPrimaryType(valTy); 111 } 112 llvm_unreachable("unknown ValueKind"); 113 } 114 115 /// Helper to convert C-style strings (i.e., '\0' terminated) to lower case. 116 static inline void toLower(char *token) { 117 for (char *c = token; *c; ++c) 118 *c = tolower(*c); 119 } 120 121 /// Idiomatic name for checking string equality. 122 static inline bool streq(const char *lhs, const char *rhs) { 123 return strcmp(lhs, rhs) == 0; 124 } 125 126 /// Idiomatic name for checking string inequality. 127 static inline bool strne(const char *lhs, const char *rhs) { 128 return strcmp(lhs, rhs); // aka `!= 0` 129 } 130 131 /// Read the MME header of a general sparse matrix of type real. 132 void SparseTensorFile::readMMEHeader() { 133 char header[64]; 134 char object[64]; 135 char format[64]; 136 char field[64]; 137 char symmetry[64]; 138 // Read header line. 139 if (fscanf(file, "%63s %63s %63s %63s %63s\n", header, object, format, field, 140 symmetry) != 5) 141 MLIR_SPARSETENSOR_FATAL("Corrupt header in %s\n", filename); 142 // Convert all to lowercase up front (to avoid accidental redundancy). 143 toLower(header); 144 toLower(object); 145 toLower(format); 146 toLower(field); 147 toLower(symmetry); 148 // Process `field`, which specify pattern or the data type of the values. 149 if (streq(field, "pattern")) 150 valueKind_ = ValueKind::kPattern; 151 else if (streq(field, "real")) 152 valueKind_ = ValueKind::kReal; 153 else if (streq(field, "integer")) 154 valueKind_ = ValueKind::kInteger; 155 else if (streq(field, "complex")) 156 valueKind_ = ValueKind::kComplex; 157 else 158 MLIR_SPARSETENSOR_FATAL("Unexpected header field value in %s\n", filename); 159 // Set properties. 160 isSymmetric_ = streq(symmetry, "symmetric"); 161 // Make sure this is a general sparse matrix. 162 if (strne(header, "%%matrixmarket") || strne(object, "matrix") || 163 strne(format, "coordinate") || 164 (strne(symmetry, "general") && !isSymmetric_)) 165 MLIR_SPARSETENSOR_FATAL("Cannot find a general sparse matrix in %s\n", 166 filename); 167 // Skip comments. 168 while (true) { 169 readLine(); 170 if (line[0] != '%') 171 break; 172 } 173 // Next line contains M N NNZ. 174 idata[0] = 2; // rank 175 if (sscanf(line, "%" PRIu64 "%" PRIu64 "%" PRIu64 "\n", idata + 2, idata + 3, 176 idata + 1) != 3) 177 MLIR_SPARSETENSOR_FATAL("Cannot find size in %s\n", filename); 178 } 179 180 /// Read the "extended" FROSTT header. Although not part of the documented 181 /// format, we assume that the file starts with optional comments followed 182 /// by two lines that define the rank, the number of nonzeros, and the 183 /// dimensions sizes (one per rank) of the sparse tensor. 184 void SparseTensorFile::readExtFROSTTHeader() { 185 // Skip comments. 186 while (true) { 187 readLine(); 188 if (line[0] != '#') 189 break; 190 } 191 // Next line contains RANK and NNZ. 192 if (sscanf(line, "%" PRIu64 "%" PRIu64 "\n", idata, idata + 1) != 2) 193 MLIR_SPARSETENSOR_FATAL("Cannot find metadata in %s\n", filename); 194 // Followed by a line with the dimension sizes (one per rank). 195 for (uint64_t r = 0; r < idata[0]; ++r) 196 if (fscanf(file, "%" PRIu64, idata + 2 + r) != 1) 197 MLIR_SPARSETENSOR_FATAL("Cannot find dimension size %s\n", filename); 198 readLine(); // end of line 199 // The FROSTT format does not define the data type of the nonzero elements. 200 valueKind_ = ValueKind::kUndefined; 201 } 202