1 //===- File.cpp - Parsing sparse tensors from files -----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements parsing and printing of files in one of the 10 // following external formats: 11 // 12 // (1) Matrix Market Exchange (MME): *.mtx 13 // https://math.nist.gov/MatrixMarket/formats.html 14 // 15 // (2) Formidable Repository of Open Sparse Tensors and Tools (FROSTT): *.tns 16 // http://frostt.io/tensors/file-formats.html 17 // 18 // This file is part of the lightweight runtime support library for sparse 19 // tensor manipulations. The functionality of the support library is meant 20 // to simplify benchmarking, testing, and debugging MLIR code operating on 21 // sparse tensors. However, the provided functionality is **not** part of 22 // core MLIR itself. 23 // 24 //===----------------------------------------------------------------------===// 25 26 #include "mlir/ExecutionEngine/SparseTensor/File.h" 27 28 #include <cctype> 29 #include <cstring> 30 31 using namespace mlir::sparse_tensor; 32 33 /// Opens the file for reading. 34 void SparseTensorReader::openFile() { 35 if (file) 36 MLIR_SPARSETENSOR_FATAL("Already opened file %s\n", filename); 37 file = fopen(filename, "r"); 38 if (!file) 39 MLIR_SPARSETENSOR_FATAL("Cannot find file %s\n", filename); 40 } 41 42 /// Closes the file. 43 void SparseTensorReader::closeFile() { 44 if (file) { 45 fclose(file); 46 file = nullptr; 47 } 48 } 49 50 /// Attempts to read a line from the file. 51 void SparseTensorReader::readLine() { 52 if (!fgets(line, kColWidth, file)) 53 MLIR_SPARSETENSOR_FATAL("Cannot read next line of %s\n", filename); 54 } 55 56 /// Reads and parses the file's header. 57 void SparseTensorReader::readHeader() { 58 assert(file && "Attempt to readHeader() before openFile()"); 59 if (strstr(filename, ".mtx")) 60 readMMEHeader(); 61 else if (strstr(filename, ".tns")) 62 readExtFROSTTHeader(); 63 else 64 MLIR_SPARSETENSOR_FATAL("Unknown format %s\n", filename); 65 assert(isValid() && "Failed to read the header"); 66 } 67 68 /// Asserts the shape subsumes the actual dimension sizes. Is only 69 /// valid after parsing the header. 70 void SparseTensorReader::assertMatchesShape(uint64_t rank, 71 const uint64_t *shape) const { 72 assert(rank == getRank() && "Rank mismatch"); 73 for (uint64_t r = 0; r < rank; ++r) 74 assert((shape[r] == 0 || shape[r] == idata[2 + r]) && 75 "Dimension size mismatch"); 76 } 77 78 bool SparseTensorReader::canReadAs(PrimaryType valTy) const { 79 switch (valueKind_) { 80 case ValueKind::kInvalid: 81 assert(false && "Must readHeader() before calling canReadAs()"); 82 return false; // In case assertions are disabled. 83 case ValueKind::kPattern: 84 return true; 85 case ValueKind::kInteger: 86 // When the file is specified to store integer values, we still 87 // allow implicitly converting those to floating primary-types. 88 return isRealPrimaryType(valTy); 89 case ValueKind::kReal: 90 // When the file is specified to store real/floating values, then 91 // we disallow implicit conversion to integer primary-types. 92 return isFloatingPrimaryType(valTy); 93 case ValueKind::kComplex: 94 // When the file is specified to store complex values, then we 95 // require a complex primary-type. 96 return isComplexPrimaryType(valTy); 97 case ValueKind::kUndefined: 98 // The "extended" FROSTT format doesn't specify a ValueKind. 99 // So we allow implicitly converting the stored values to both 100 // integer and floating primary-types. 101 return isRealPrimaryType(valTy); 102 } 103 MLIR_SPARSETENSOR_FATAL("Unknown ValueKind: %d\n", 104 static_cast<uint8_t>(valueKind_)); 105 } 106 107 /// Helper to convert C-style strings (i.e., '\0' terminated) to lower case. 108 static inline void toLower(char *token) { 109 for (char *c = token; *c; ++c) 110 *c = tolower(*c); 111 } 112 113 /// Idiomatic name for checking string equality. 114 static inline bool streq(const char *lhs, const char *rhs) { 115 return strcmp(lhs, rhs) == 0; 116 } 117 118 /// Idiomatic name for checking string inequality. 119 static inline bool strne(const char *lhs, const char *rhs) { 120 return strcmp(lhs, rhs); // aka `!= 0` 121 } 122 123 /// Read the MME header of a general sparse matrix of type real. 124 void SparseTensorReader::readMMEHeader() { 125 char header[64]; 126 char object[64]; 127 char format[64]; 128 char field[64]; 129 char symmetry[64]; 130 // Read header line. 131 if (fscanf(file, "%63s %63s %63s %63s %63s\n", header, object, format, field, 132 symmetry) != 5) 133 MLIR_SPARSETENSOR_FATAL("Corrupt header in %s\n", filename); 134 // Convert all to lowercase up front (to avoid accidental redundancy). 135 toLower(header); 136 toLower(object); 137 toLower(format); 138 toLower(field); 139 toLower(symmetry); 140 // Process `field`, which specify pattern or the data type of the values. 141 if (streq(field, "pattern")) 142 valueKind_ = ValueKind::kPattern; 143 else if (streq(field, "real")) 144 valueKind_ = ValueKind::kReal; 145 else if (streq(field, "integer")) 146 valueKind_ = ValueKind::kInteger; 147 else if (streq(field, "complex")) 148 valueKind_ = ValueKind::kComplex; 149 else 150 MLIR_SPARSETENSOR_FATAL("Unexpected header field value in %s\n", filename); 151 // Set properties. 152 isSymmetric_ = streq(symmetry, "symmetric"); 153 // Make sure this is a general sparse matrix. 154 if (strne(header, "%%matrixmarket") || strne(object, "matrix") || 155 strne(format, "coordinate") || 156 (strne(symmetry, "general") && !isSymmetric_)) 157 MLIR_SPARSETENSOR_FATAL("Cannot find a general sparse matrix in %s\n", 158 filename); 159 // Skip comments. 160 while (true) { 161 readLine(); 162 if (line[0] != '%') 163 break; 164 } 165 // Next line contains M N NNZ. 166 idata[0] = 2; // rank 167 if (sscanf(line, "%" PRIu64 "%" PRIu64 "%" PRIu64 "\n", idata + 2, idata + 3, 168 idata + 1) != 3) 169 MLIR_SPARSETENSOR_FATAL("Cannot find size in %s\n", filename); 170 } 171 172 /// Read the "extended" FROSTT header. Although not part of the documented 173 /// format, we assume that the file starts with optional comments followed 174 /// by two lines that define the rank, the number of nonzeros, and the 175 /// dimensions sizes (one per rank) of the sparse tensor. 176 void SparseTensorReader::readExtFROSTTHeader() { 177 // Skip comments. 178 while (true) { 179 readLine(); 180 if (line[0] != '#') 181 break; 182 } 183 // Next line contains RANK and NNZ. 184 if (sscanf(line, "%" PRIu64 "%" PRIu64 "\n", idata, idata + 1) != 2) 185 MLIR_SPARSETENSOR_FATAL("Cannot find metadata in %s\n", filename); 186 // Followed by a line with the dimension sizes (one per rank). 187 for (uint64_t r = 0; r < idata[0]; ++r) 188 if (fscanf(file, "%" PRIu64, idata + 2 + r) != 1) 189 MLIR_SPARSETENSOR_FATAL("Cannot find dimension size %s\n", filename); 190 readLine(); // end of line 191 // The FROSTT format does not define the data type of the nonzero elements. 192 valueKind_ = ValueKind::kUndefined; 193 } 194