1 //===- File.cpp - Parsing sparse tensors from files -----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements parsing and printing of files in one of the 10 // following external formats: 11 // 12 // (1) Matrix Market Exchange (MME): *.mtx 13 // https://math.nist.gov/MatrixMarket/formats.html 14 // 15 // (2) Formidable Repository of Open Sparse Tensors and Tools (FROSTT): *.tns 16 // http://frostt.io/tensors/file-formats.html 17 // 18 // This file is part of the lightweight runtime support library for sparse 19 // tensor manipulations. The functionality of the support library is meant 20 // to simplify benchmarking, testing, and debugging MLIR code operating on 21 // sparse tensors. However, the provided functionality is **not** part of 22 // core MLIR itself. 23 // 24 //===----------------------------------------------------------------------===// 25 26 #include "mlir/ExecutionEngine/SparseTensor/File.h" 27 28 #include <cctype> 29 #include <cstring> 30 31 using namespace mlir::sparse_tensor; 32 33 /// Opens the file for reading. 34 void SparseTensorFile::openFile() { 35 if (file) 36 MLIR_SPARSETENSOR_FATAL("Already opened file %s\n", filename); 37 file = fopen(filename, "r"); 38 if (!file) 39 MLIR_SPARSETENSOR_FATAL("Cannot find file %s\n", filename); 40 } 41 42 /// Closes the file. 43 void SparseTensorFile::closeFile() { 44 if (file) { 45 fclose(file); 46 file = nullptr; 47 } 48 } 49 50 // TODO(wrengr/bixia): figure out how to reorganize the element-parsing 51 // loop of `openSparseTensorCOO` into methods of this class, so we can 52 // avoid leaking access to the `line` pointer (both for general hygiene 53 // and because we can't mark it const due to the second argument of 54 // `strtoul`/`strtoud` being `char * *restrict` rather than 55 // `char const* *restrict`). 56 // 57 /// Attempts to read a line from the file. 58 char *SparseTensorFile::readLine() { 59 if (fgets(line, kColWidth, file)) 60 return line; 61 MLIR_SPARSETENSOR_FATAL("Cannot read next line of %s\n", filename); 62 } 63 64 /// Reads and parses the file's header. 65 void SparseTensorFile::readHeader() { 66 assert(file && "Attempt to readHeader() before openFile()"); 67 if (strstr(filename, ".mtx")) 68 readMMEHeader(); 69 else if (strstr(filename, ".tns")) 70 readExtFROSTTHeader(); 71 else 72 MLIR_SPARSETENSOR_FATAL("Unknown format %s\n", filename); 73 assert(isValid() && "Failed to read the header"); 74 } 75 76 /// Asserts the shape subsumes the actual dimension sizes. Is only 77 /// valid after parsing the header. 78 void SparseTensorFile::assertMatchesShape(uint64_t rank, 79 const uint64_t *shape) const { 80 assert(rank == getRank() && "Rank mismatch"); 81 for (uint64_t r = 0; r < rank; ++r) 82 assert((shape[r] == 0 || shape[r] == idata[2 + r]) && 83 "Dimension size mismatch"); 84 } 85 86 bool SparseTensorFile::canReadAs(PrimaryType valTy) const { 87 switch (valueKind_) { 88 case ValueKind::kInvalid: 89 assert(false && "Must readHeader() before calling canReadAs()"); 90 return false; // In case assertions are disabled. 91 case ValueKind::kPattern: 92 return true; 93 case ValueKind::kInteger: 94 // When the file is specified to store integer values, we still 95 // allow implicitly converting those to floating primary-types. 96 return isRealPrimaryType(valTy); 97 case ValueKind::kReal: 98 // When the file is specified to store real/floating values, then 99 // we disallow implicit conversion to integer primary-types. 100 return isFloatingPrimaryType(valTy); 101 case ValueKind::kComplex: 102 // When the file is specified to store complex values, then we 103 // require a complex primary-type. 104 return isComplexPrimaryType(valTy); 105 case ValueKind::kUndefined: 106 // The "extended" FROSTT format doesn't specify a ValueKind. 107 // So we allow implicitly converting the stored values to both 108 // integer and floating primary-types. 109 return isRealPrimaryType(valTy); 110 } 111 } 112 113 /// Helper to convert C-style strings (i.e., '\0' terminated) to lower case. 114 static inline char *toLower(char *token) { 115 for (char *c = token; *c; ++c) 116 *c = tolower(*c); 117 return token; 118 } 119 120 /// Idiomatic name for checking string equality. 121 static inline bool streq(const char *lhs, const char *rhs) { 122 return strcmp(lhs, rhs) == 0; 123 } 124 125 /// Idiomatic name for checking string inequality. 126 static inline bool strne(const char *lhs, const char *rhs) { 127 return strcmp(lhs, rhs); // aka `!= 0` 128 } 129 130 /// Read the MME header of a general sparse matrix of type real. 131 void SparseTensorFile::readMMEHeader() { 132 char header[64]; 133 char object[64]; 134 char format[64]; 135 char field[64]; 136 char symmetry[64]; 137 // Read header line. 138 if (fscanf(file, "%63s %63s %63s %63s %63s\n", header, object, format, field, 139 symmetry) != 5) 140 MLIR_SPARSETENSOR_FATAL("Corrupt header in %s\n", filename); 141 // Convert all to lowercase up front (to avoid accidental redundancy). 142 toLower(header); 143 toLower(object); 144 toLower(format); 145 toLower(field); 146 toLower(symmetry); 147 // Process `field`, which specify pattern or the data type of the values. 148 if (streq(field, "pattern")) 149 valueKind_ = ValueKind::kPattern; 150 else if (streq(field, "real")) 151 valueKind_ = ValueKind::kReal; 152 else if (streq(field, "integer")) 153 valueKind_ = ValueKind::kInteger; 154 else if (streq(field, "complex")) 155 valueKind_ = ValueKind::kComplex; 156 else 157 MLIR_SPARSETENSOR_FATAL("Unexpected header field value in %s\n", filename); 158 // Set properties. 159 isSymmetric_ = streq(symmetry, "symmetric"); 160 // Make sure this is a general sparse matrix. 161 if (strne(header, "%%matrixmarket") || strne(object, "matrix") || 162 strne(format, "coordinate") || 163 (strne(symmetry, "general") && !isSymmetric_)) 164 MLIR_SPARSETENSOR_FATAL("Cannot find a general sparse matrix in %s\n", 165 filename); 166 // Skip comments. 167 while (true) { 168 readLine(); 169 if (line[0] != '%') 170 break; 171 } 172 // Next line contains M N NNZ. 173 idata[0] = 2; // rank 174 if (sscanf(line, "%" PRIu64 "%" PRIu64 "%" PRIu64 "\n", idata + 2, idata + 3, 175 idata + 1) != 3) 176 MLIR_SPARSETENSOR_FATAL("Cannot find size in %s\n", filename); 177 } 178 179 /// Read the "extended" FROSTT header. Although not part of the documented 180 /// format, we assume that the file starts with optional comments followed 181 /// by two lines that define the rank, the number of nonzeros, and the 182 /// dimensions sizes (one per rank) of the sparse tensor. 183 void SparseTensorFile::readExtFROSTTHeader() { 184 // Skip comments. 185 while (true) { 186 readLine(); 187 if (line[0] != '#') 188 break; 189 } 190 // Next line contains RANK and NNZ. 191 if (sscanf(line, "%" PRIu64 "%" PRIu64 "\n", idata, idata + 1) != 2) 192 MLIR_SPARSETENSOR_FATAL("Cannot find metadata in %s\n", filename); 193 // Followed by a line with the dimension sizes (one per rank). 194 for (uint64_t r = 0; r < idata[0]; ++r) 195 if (fscanf(file, "%" PRIu64, idata + 2 + r) != 1) 196 MLIR_SPARSETENSOR_FATAL("Cannot find dimension size %s\n", filename); 197 readLine(); // end of line 198 // The FROSTT format does not define the data type of the nonzero elements. 199 valueKind_ = ValueKind::kUndefined; 200 } 201