1 //===- File.cpp - Parsing sparse tensors from files -----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements parsing and printing of files in one of the 10 // following external formats: 11 // 12 // (1) Matrix Market Exchange (MME): *.mtx 13 // https://math.nist.gov/MatrixMarket/formats.html 14 // 15 // (2) Formidable Repository of Open Sparse Tensors and Tools (FROSTT): *.tns 16 // http://frostt.io/tensors/file-formats.html 17 // 18 // This file is part of the lightweight runtime support library for sparse 19 // tensor manipulations. The functionality of the support library is meant 20 // to simplify benchmarking, testing, and debugging MLIR code operating on 21 // sparse tensors. However, the provided functionality is **not** part of 22 // core MLIR itself. 23 // 24 //===----------------------------------------------------------------------===// 25 26 #include "mlir/ExecutionEngine/SparseTensor/File.h" 27 28 #include <cctype> 29 #include <cstring> 30 31 using namespace mlir::sparse_tensor; 32 33 /// Opens the file for reading. 34 void SparseTensorFile::openFile() { 35 if (file) 36 MLIR_SPARSETENSOR_FATAL("Already opened file %s\n", filename); 37 file = fopen(filename, "r"); 38 if (!file) 39 MLIR_SPARSETENSOR_FATAL("Cannot find file %s\n", filename); 40 } 41 42 /// Closes the file. 43 void SparseTensorFile::closeFile() { 44 if (file) { 45 fclose(file); 46 file = nullptr; 47 } 48 } 49 50 // TODO(wrengr/bixia): figure out how to reorganize the element-parsing 51 // loop of `openSparseTensorCOO` into methods of this class, so we can 52 // avoid leaking access to the `line` pointer (both for general hygiene 53 // and because we can't mark it const due to the second argument of 54 // `strtoul`/`strtoud` being `char * *restrict` rather than 55 // `char const* *restrict`). 56 // 57 /// Attempts to read a line from the file. 58 char *SparseTensorFile::readLine() { 59 if (fgets(line, kColWidth, file)) 60 return line; 61 MLIR_SPARSETENSOR_FATAL("Cannot read next line of %s\n", filename); 62 } 63 64 /// Reads and parses the file's header. 65 void SparseTensorFile::readHeader() { 66 assert(file && "Attempt to readHeader() before openFile()"); 67 if (strstr(filename, ".mtx")) 68 readMMEHeader(); 69 else if (strstr(filename, ".tns")) 70 readExtFROSTTHeader(); 71 else 72 MLIR_SPARSETENSOR_FATAL("Unknown format %s\n", filename); 73 assert(isValid() && "Failed to read the header"); 74 } 75 76 /// Asserts the shape subsumes the actual dimension sizes. Is only 77 /// valid after parsing the header. 78 void SparseTensorFile::assertMatchesShape(uint64_t rank, 79 const uint64_t *shape) const { 80 assert(rank == getRank() && "Rank mismatch"); 81 for (uint64_t r = 0; r < rank; ++r) 82 assert((shape[r] == 0 || shape[r] == idata[2 + r]) && 83 "Dimension size mismatch"); 84 } 85 86 bool SparseTensorFile::canReadAs(PrimaryType valTy) const { 87 switch (valueKind_) { 88 case ValueKind::kInvalid: 89 assert(false && "Must readHeader() before calling canReadAs()"); 90 return false; // In case assertions are disabled. 91 case ValueKind::kPattern: 92 return true; 93 case ValueKind::kInteger: 94 // When the file is specified to store integer values, we still 95 // allow implicitly converting those to floating primary-types. 96 return isRealPrimaryType(valTy); 97 case ValueKind::kReal: 98 // When the file is specified to store real/floating values, then 99 // we disallow implicit conversion to integer primary-types. 100 return isFloatingPrimaryType(valTy); 101 case ValueKind::kComplex: 102 // When the file is specified to store complex values, then we 103 // require a complex primary-type. 104 return isComplexPrimaryType(valTy); 105 case ValueKind::kUndefined: 106 // The "extended" FROSTT format doesn't specify a ValueKind. 107 // So we allow implicitly converting the stored values to both 108 // integer and floating primary-types. 109 return isRealPrimaryType(valTy); 110 default: 111 MLIR_SPARSETENSOR_FATAL("Unknown ValueKind: %d\n", 112 static_cast<uint8_t>(valueKind_)); 113 } 114 } 115 116 /// Helper to convert C-style strings (i.e., '\0' terminated) to lower case. 117 static inline char *toLower(char *token) { 118 for (char *c = token; *c; ++c) 119 *c = tolower(*c); 120 return token; 121 } 122 123 /// Idiomatic name for checking string equality. 124 static inline bool streq(const char *lhs, const char *rhs) { 125 return strcmp(lhs, rhs) == 0; 126 } 127 128 /// Idiomatic name for checking string inequality. 129 static inline bool strne(const char *lhs, const char *rhs) { 130 return strcmp(lhs, rhs); // aka `!= 0` 131 } 132 133 /// Read the MME header of a general sparse matrix of type real. 134 void SparseTensorFile::readMMEHeader() { 135 char header[64]; 136 char object[64]; 137 char format[64]; 138 char field[64]; 139 char symmetry[64]; 140 // Read header line. 141 if (fscanf(file, "%63s %63s %63s %63s %63s\n", header, object, format, field, 142 symmetry) != 5) 143 MLIR_SPARSETENSOR_FATAL("Corrupt header in %s\n", filename); 144 // Convert all to lowercase up front (to avoid accidental redundancy). 145 toLower(header); 146 toLower(object); 147 toLower(format); 148 toLower(field); 149 toLower(symmetry); 150 // Process `field`, which specify pattern or the data type of the values. 151 if (streq(field, "pattern")) 152 valueKind_ = ValueKind::kPattern; 153 else if (streq(field, "real")) 154 valueKind_ = ValueKind::kReal; 155 else if (streq(field, "integer")) 156 valueKind_ = ValueKind::kInteger; 157 else if (streq(field, "complex")) 158 valueKind_ = ValueKind::kComplex; 159 else 160 MLIR_SPARSETENSOR_FATAL("Unexpected header field value in %s\n", filename); 161 // Set properties. 162 isSymmetric_ = streq(symmetry, "symmetric"); 163 // Make sure this is a general sparse matrix. 164 if (strne(header, "%%matrixmarket") || strne(object, "matrix") || 165 strne(format, "coordinate") || 166 (strne(symmetry, "general") && !isSymmetric_)) 167 MLIR_SPARSETENSOR_FATAL("Cannot find a general sparse matrix in %s\n", 168 filename); 169 // Skip comments. 170 while (true) { 171 readLine(); 172 if (line[0] != '%') 173 break; 174 } 175 // Next line contains M N NNZ. 176 idata[0] = 2; // rank 177 if (sscanf(line, "%" PRIu64 "%" PRIu64 "%" PRIu64 "\n", idata + 2, idata + 3, 178 idata + 1) != 3) 179 MLIR_SPARSETENSOR_FATAL("Cannot find size in %s\n", filename); 180 } 181 182 /// Read the "extended" FROSTT header. Although not part of the documented 183 /// format, we assume that the file starts with optional comments followed 184 /// by two lines that define the rank, the number of nonzeros, and the 185 /// dimensions sizes (one per rank) of the sparse tensor. 186 void SparseTensorFile::readExtFROSTTHeader() { 187 // Skip comments. 188 while (true) { 189 readLine(); 190 if (line[0] != '#') 191 break; 192 } 193 // Next line contains RANK and NNZ. 194 if (sscanf(line, "%" PRIu64 "%" PRIu64 "\n", idata, idata + 1) != 2) 195 MLIR_SPARSETENSOR_FATAL("Cannot find metadata in %s\n", filename); 196 // Followed by a line with the dimension sizes (one per rank). 197 for (uint64_t r = 0; r < idata[0]; ++r) 198 if (fscanf(file, "%" PRIu64, idata + 2 + r) != 1) 199 MLIR_SPARSETENSOR_FATAL("Cannot find dimension size %s\n", filename); 200 readLine(); // end of line 201 // The FROSTT format does not define the data type of the nonzero elements. 202 valueKind_ = ValueKind::kUndefined; 203 } 204