10b57cec5SDimitry Andric //===- YAMLParser.cpp - Simple YAML parser --------------------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This file implements a YAML parser.
100b57cec5SDimitry Andric //
110b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
120b57cec5SDimitry Andric
130b57cec5SDimitry Andric #include "llvm/Support/YAMLParser.h"
140b57cec5SDimitry Andric #include "llvm/ADT/AllocatorList.h"
150b57cec5SDimitry Andric #include "llvm/ADT/ArrayRef.h"
160b57cec5SDimitry Andric #include "llvm/ADT/STLExtras.h"
170b57cec5SDimitry Andric #include "llvm/ADT/SmallString.h"
180b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h"
190b57cec5SDimitry Andric #include "llvm/ADT/StringExtras.h"
200b57cec5SDimitry Andric #include "llvm/ADT/StringRef.h"
210b57cec5SDimitry Andric #include "llvm/ADT/Twine.h"
220b57cec5SDimitry Andric #include "llvm/Support/Compiler.h"
230b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h"
240b57cec5SDimitry Andric #include "llvm/Support/MemoryBuffer.h"
250b57cec5SDimitry Andric #include "llvm/Support/SMLoc.h"
260b57cec5SDimitry Andric #include "llvm/Support/SourceMgr.h"
270b57cec5SDimitry Andric #include "llvm/Support/Unicode.h"
280b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
290b57cec5SDimitry Andric #include <cassert>
300b57cec5SDimitry Andric #include <cstddef>
310b57cec5SDimitry Andric #include <cstdint>
320b57cec5SDimitry Andric #include <map>
330b57cec5SDimitry Andric #include <memory>
340b57cec5SDimitry Andric #include <string>
350b57cec5SDimitry Andric #include <system_error>
360b57cec5SDimitry Andric #include <utility>
370b57cec5SDimitry Andric
380b57cec5SDimitry Andric using namespace llvm;
390b57cec5SDimitry Andric using namespace yaml;
400b57cec5SDimitry Andric
410b57cec5SDimitry Andric enum UnicodeEncodingForm {
420b57cec5SDimitry Andric UEF_UTF32_LE, ///< UTF-32 Little Endian
430b57cec5SDimitry Andric UEF_UTF32_BE, ///< UTF-32 Big Endian
440b57cec5SDimitry Andric UEF_UTF16_LE, ///< UTF-16 Little Endian
450b57cec5SDimitry Andric UEF_UTF16_BE, ///< UTF-16 Big Endian
460b57cec5SDimitry Andric UEF_UTF8, ///< UTF-8 or ascii.
470b57cec5SDimitry Andric UEF_Unknown ///< Not a valid Unicode encoding.
480b57cec5SDimitry Andric };
490b57cec5SDimitry Andric
500b57cec5SDimitry Andric /// EncodingInfo - Holds the encoding type and length of the byte order mark if
510b57cec5SDimitry Andric /// it exists. Length is in {0, 2, 3, 4}.
520b57cec5SDimitry Andric using EncodingInfo = std::pair<UnicodeEncodingForm, unsigned>;
530b57cec5SDimitry Andric
540b57cec5SDimitry Andric /// getUnicodeEncoding - Reads up to the first 4 bytes to determine the Unicode
550b57cec5SDimitry Andric /// encoding form of \a Input.
560b57cec5SDimitry Andric ///
570b57cec5SDimitry Andric /// @param Input A string of length 0 or more.
580b57cec5SDimitry Andric /// @returns An EncodingInfo indicating the Unicode encoding form of the input
590b57cec5SDimitry Andric /// and how long the byte order mark is if one exists.
getUnicodeEncoding(StringRef Input)600b57cec5SDimitry Andric static EncodingInfo getUnicodeEncoding(StringRef Input) {
610b57cec5SDimitry Andric if (Input.empty())
620b57cec5SDimitry Andric return std::make_pair(UEF_Unknown, 0);
630b57cec5SDimitry Andric
640b57cec5SDimitry Andric switch (uint8_t(Input[0])) {
650b57cec5SDimitry Andric case 0x00:
660b57cec5SDimitry Andric if (Input.size() >= 4) {
670b57cec5SDimitry Andric if ( Input[1] == 0
680b57cec5SDimitry Andric && uint8_t(Input[2]) == 0xFE
690b57cec5SDimitry Andric && uint8_t(Input[3]) == 0xFF)
700b57cec5SDimitry Andric return std::make_pair(UEF_UTF32_BE, 4);
710b57cec5SDimitry Andric if (Input[1] == 0 && Input[2] == 0 && Input[3] != 0)
720b57cec5SDimitry Andric return std::make_pair(UEF_UTF32_BE, 0);
730b57cec5SDimitry Andric }
740b57cec5SDimitry Andric
750b57cec5SDimitry Andric if (Input.size() >= 2 && Input[1] != 0)
760b57cec5SDimitry Andric return std::make_pair(UEF_UTF16_BE, 0);
770b57cec5SDimitry Andric return std::make_pair(UEF_Unknown, 0);
780b57cec5SDimitry Andric case 0xFF:
790b57cec5SDimitry Andric if ( Input.size() >= 4
800b57cec5SDimitry Andric && uint8_t(Input[1]) == 0xFE
810b57cec5SDimitry Andric && Input[2] == 0
820b57cec5SDimitry Andric && Input[3] == 0)
830b57cec5SDimitry Andric return std::make_pair(UEF_UTF32_LE, 4);
840b57cec5SDimitry Andric
850b57cec5SDimitry Andric if (Input.size() >= 2 && uint8_t(Input[1]) == 0xFE)
860b57cec5SDimitry Andric return std::make_pair(UEF_UTF16_LE, 2);
870b57cec5SDimitry Andric return std::make_pair(UEF_Unknown, 0);
880b57cec5SDimitry Andric case 0xFE:
890b57cec5SDimitry Andric if (Input.size() >= 2 && uint8_t(Input[1]) == 0xFF)
900b57cec5SDimitry Andric return std::make_pair(UEF_UTF16_BE, 2);
910b57cec5SDimitry Andric return std::make_pair(UEF_Unknown, 0);
920b57cec5SDimitry Andric case 0xEF:
930b57cec5SDimitry Andric if ( Input.size() >= 3
940b57cec5SDimitry Andric && uint8_t(Input[1]) == 0xBB
950b57cec5SDimitry Andric && uint8_t(Input[2]) == 0xBF)
960b57cec5SDimitry Andric return std::make_pair(UEF_UTF8, 3);
970b57cec5SDimitry Andric return std::make_pair(UEF_Unknown, 0);
980b57cec5SDimitry Andric }
990b57cec5SDimitry Andric
1000b57cec5SDimitry Andric // It could still be utf-32 or utf-16.
1010b57cec5SDimitry Andric if (Input.size() >= 4 && Input[1] == 0 && Input[2] == 0 && Input[3] == 0)
1020b57cec5SDimitry Andric return std::make_pair(UEF_UTF32_LE, 0);
1030b57cec5SDimitry Andric
1040b57cec5SDimitry Andric if (Input.size() >= 2 && Input[1] == 0)
1050b57cec5SDimitry Andric return std::make_pair(UEF_UTF16_LE, 0);
1060b57cec5SDimitry Andric
1070b57cec5SDimitry Andric return std::make_pair(UEF_UTF8, 0);
1080b57cec5SDimitry Andric }
1090b57cec5SDimitry Andric
1100b57cec5SDimitry Andric /// Pin the vtables to this file.
anchor()1110b57cec5SDimitry Andric void Node::anchor() {}
anchor()1120b57cec5SDimitry Andric void NullNode::anchor() {}
anchor()1130b57cec5SDimitry Andric void ScalarNode::anchor() {}
anchor()1140b57cec5SDimitry Andric void BlockScalarNode::anchor() {}
anchor()1150b57cec5SDimitry Andric void KeyValueNode::anchor() {}
anchor()1160b57cec5SDimitry Andric void MappingNode::anchor() {}
anchor()1170b57cec5SDimitry Andric void SequenceNode::anchor() {}
anchor()1180b57cec5SDimitry Andric void AliasNode::anchor() {}
1190b57cec5SDimitry Andric
1200b57cec5SDimitry Andric namespace llvm {
1210b57cec5SDimitry Andric namespace yaml {
1220b57cec5SDimitry Andric
1230b57cec5SDimitry Andric /// Token - A single YAML token.
1240b57cec5SDimitry Andric struct Token {
1250b57cec5SDimitry Andric enum TokenKind {
1260b57cec5SDimitry Andric TK_Error, // Uninitialized token.
1270b57cec5SDimitry Andric TK_StreamStart,
1280b57cec5SDimitry Andric TK_StreamEnd,
1290b57cec5SDimitry Andric TK_VersionDirective,
1300b57cec5SDimitry Andric TK_TagDirective,
1310b57cec5SDimitry Andric TK_DocumentStart,
1320b57cec5SDimitry Andric TK_DocumentEnd,
1330b57cec5SDimitry Andric TK_BlockEntry,
1340b57cec5SDimitry Andric TK_BlockEnd,
1350b57cec5SDimitry Andric TK_BlockSequenceStart,
1360b57cec5SDimitry Andric TK_BlockMappingStart,
1370b57cec5SDimitry Andric TK_FlowEntry,
1380b57cec5SDimitry Andric TK_FlowSequenceStart,
1390b57cec5SDimitry Andric TK_FlowSequenceEnd,
1400b57cec5SDimitry Andric TK_FlowMappingStart,
1410b57cec5SDimitry Andric TK_FlowMappingEnd,
1420b57cec5SDimitry Andric TK_Key,
1430b57cec5SDimitry Andric TK_Value,
1440b57cec5SDimitry Andric TK_Scalar,
1450b57cec5SDimitry Andric TK_BlockScalar,
1460b57cec5SDimitry Andric TK_Alias,
1470b57cec5SDimitry Andric TK_Anchor,
1480b57cec5SDimitry Andric TK_Tag
1490b57cec5SDimitry Andric } Kind = TK_Error;
1500b57cec5SDimitry Andric
1510b57cec5SDimitry Andric /// A string of length 0 or more whose begin() points to the logical location
1520b57cec5SDimitry Andric /// of the token in the input.
1530b57cec5SDimitry Andric StringRef Range;
1540b57cec5SDimitry Andric
1550b57cec5SDimitry Andric /// The value of a block scalar node.
1560b57cec5SDimitry Andric std::string Value;
1570b57cec5SDimitry Andric
1580b57cec5SDimitry Andric Token() = default;
1590b57cec5SDimitry Andric };
1600b57cec5SDimitry Andric
1610b57cec5SDimitry Andric } // end namespace yaml
1620b57cec5SDimitry Andric } // end namespace llvm
1630b57cec5SDimitry Andric
1640b57cec5SDimitry Andric using TokenQueueT = BumpPtrList<Token>;
1650b57cec5SDimitry Andric
1660b57cec5SDimitry Andric namespace {
1670b57cec5SDimitry Andric
1680b57cec5SDimitry Andric /// This struct is used to track simple keys.
1690b57cec5SDimitry Andric ///
1700b57cec5SDimitry Andric /// Simple keys are handled by creating an entry in SimpleKeys for each Token
1710b57cec5SDimitry Andric /// which could legally be the start of a simple key. When peekNext is called,
1720b57cec5SDimitry Andric /// if the Token To be returned is referenced by a SimpleKey, we continue
1730b57cec5SDimitry Andric /// tokenizing until that potential simple key has either been found to not be
1740b57cec5SDimitry Andric /// a simple key (we moved on to the next line or went further than 1024 chars).
1750b57cec5SDimitry Andric /// Or when we run into a Value, and then insert a Key token (and possibly
1760b57cec5SDimitry Andric /// others) before the SimpleKey's Tok.
1770b57cec5SDimitry Andric struct SimpleKey {
1780b57cec5SDimitry Andric TokenQueueT::iterator Tok;
179480093f4SDimitry Andric unsigned Column = 0;
180480093f4SDimitry Andric unsigned Line = 0;
181480093f4SDimitry Andric unsigned FlowLevel = 0;
182480093f4SDimitry Andric bool IsRequired = false;
1830b57cec5SDimitry Andric
operator ==__anon4ecbf6ee0111::SimpleKey1840b57cec5SDimitry Andric bool operator ==(const SimpleKey &Other) {
1850b57cec5SDimitry Andric return Tok == Other.Tok;
1860b57cec5SDimitry Andric }
1870b57cec5SDimitry Andric };
1880b57cec5SDimitry Andric
1890b57cec5SDimitry Andric } // end anonymous namespace
1900b57cec5SDimitry Andric
1910b57cec5SDimitry Andric /// The Unicode scalar value of a UTF-8 minimal well-formed code unit
1920b57cec5SDimitry Andric /// subsequence and the subsequence's length in code units (uint8_t).
1930b57cec5SDimitry Andric /// A length of 0 represents an error.
1940b57cec5SDimitry Andric using UTF8Decoded = std::pair<uint32_t, unsigned>;
1950b57cec5SDimitry Andric
decodeUTF8(StringRef Range)1960b57cec5SDimitry Andric static UTF8Decoded decodeUTF8(StringRef Range) {
1970b57cec5SDimitry Andric StringRef::iterator Position= Range.begin();
1980b57cec5SDimitry Andric StringRef::iterator End = Range.end();
1990b57cec5SDimitry Andric // 1 byte: [0x00, 0x7f]
2000b57cec5SDimitry Andric // Bit pattern: 0xxxxxxx
201e8d8bef9SDimitry Andric if (Position < End && (*Position & 0x80) == 0) {
2020b57cec5SDimitry Andric return std::make_pair(*Position, 1);
2030b57cec5SDimitry Andric }
2040b57cec5SDimitry Andric // 2 bytes: [0x80, 0x7ff]
2050b57cec5SDimitry Andric // Bit pattern: 110xxxxx 10xxxxxx
206e8d8bef9SDimitry Andric if (Position + 1 < End && ((*Position & 0xE0) == 0xC0) &&
2070b57cec5SDimitry Andric ((*(Position + 1) & 0xC0) == 0x80)) {
2080b57cec5SDimitry Andric uint32_t codepoint = ((*Position & 0x1F) << 6) |
2090b57cec5SDimitry Andric (*(Position + 1) & 0x3F);
2100b57cec5SDimitry Andric if (codepoint >= 0x80)
2110b57cec5SDimitry Andric return std::make_pair(codepoint, 2);
2120b57cec5SDimitry Andric }
2130b57cec5SDimitry Andric // 3 bytes: [0x8000, 0xffff]
2140b57cec5SDimitry Andric // Bit pattern: 1110xxxx 10xxxxxx 10xxxxxx
215e8d8bef9SDimitry Andric if (Position + 2 < End && ((*Position & 0xF0) == 0xE0) &&
2160b57cec5SDimitry Andric ((*(Position + 1) & 0xC0) == 0x80) &&
2170b57cec5SDimitry Andric ((*(Position + 2) & 0xC0) == 0x80)) {
2180b57cec5SDimitry Andric uint32_t codepoint = ((*Position & 0x0F) << 12) |
2190b57cec5SDimitry Andric ((*(Position + 1) & 0x3F) << 6) |
2200b57cec5SDimitry Andric (*(Position + 2) & 0x3F);
2210b57cec5SDimitry Andric // Codepoints between 0xD800 and 0xDFFF are invalid, as
2220b57cec5SDimitry Andric // they are high / low surrogate halves used by UTF-16.
2230b57cec5SDimitry Andric if (codepoint >= 0x800 &&
2240b57cec5SDimitry Andric (codepoint < 0xD800 || codepoint > 0xDFFF))
2250b57cec5SDimitry Andric return std::make_pair(codepoint, 3);
2260b57cec5SDimitry Andric }
2270b57cec5SDimitry Andric // 4 bytes: [0x10000, 0x10FFFF]
2280b57cec5SDimitry Andric // Bit pattern: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
229e8d8bef9SDimitry Andric if (Position + 3 < End && ((*Position & 0xF8) == 0xF0) &&
2300b57cec5SDimitry Andric ((*(Position + 1) & 0xC0) == 0x80) &&
2310b57cec5SDimitry Andric ((*(Position + 2) & 0xC0) == 0x80) &&
2320b57cec5SDimitry Andric ((*(Position + 3) & 0xC0) == 0x80)) {
2330b57cec5SDimitry Andric uint32_t codepoint = ((*Position & 0x07) << 18) |
2340b57cec5SDimitry Andric ((*(Position + 1) & 0x3F) << 12) |
2350b57cec5SDimitry Andric ((*(Position + 2) & 0x3F) << 6) |
2360b57cec5SDimitry Andric (*(Position + 3) & 0x3F);
2370b57cec5SDimitry Andric if (codepoint >= 0x10000 && codepoint <= 0x10FFFF)
2380b57cec5SDimitry Andric return std::make_pair(codepoint, 4);
2390b57cec5SDimitry Andric }
2400b57cec5SDimitry Andric return std::make_pair(0, 0);
2410b57cec5SDimitry Andric }
2420b57cec5SDimitry Andric
2430b57cec5SDimitry Andric namespace llvm {
2440b57cec5SDimitry Andric namespace yaml {
2450b57cec5SDimitry Andric
2460b57cec5SDimitry Andric /// Scans YAML tokens from a MemoryBuffer.
2470b57cec5SDimitry Andric class Scanner {
2480b57cec5SDimitry Andric public:
2490b57cec5SDimitry Andric Scanner(StringRef Input, SourceMgr &SM, bool ShowColors = true,
2500b57cec5SDimitry Andric std::error_code *EC = nullptr);
2510b57cec5SDimitry Andric Scanner(MemoryBufferRef Buffer, SourceMgr &SM_, bool ShowColors = true,
2520b57cec5SDimitry Andric std::error_code *EC = nullptr);
2530b57cec5SDimitry Andric
2540b57cec5SDimitry Andric /// Parse the next token and return it without popping it.
2550b57cec5SDimitry Andric Token &peekNext();
2560b57cec5SDimitry Andric
2570b57cec5SDimitry Andric /// Parse the next token and pop it from the queue.
2580b57cec5SDimitry Andric Token getNext();
2590b57cec5SDimitry Andric
printError(SMLoc Loc,SourceMgr::DiagKind Kind,const Twine & Message,ArrayRef<SMRange> Ranges=std::nullopt)2600b57cec5SDimitry Andric void printError(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Message,
261bdd1243dSDimitry Andric ArrayRef<SMRange> Ranges = std::nullopt) {
262bdd1243dSDimitry Andric SM.PrintMessage(Loc, Kind, Message, Ranges, /* FixIts= */ std::nullopt,
263bdd1243dSDimitry Andric ShowColors);
2640b57cec5SDimitry Andric }
2650b57cec5SDimitry Andric
setError(const Twine & Message,StringRef::iterator Position)2660b57cec5SDimitry Andric void setError(const Twine &Message, StringRef::iterator Position) {
2675ffd83dbSDimitry Andric if (Position >= End)
2685ffd83dbSDimitry Andric Position = End - 1;
2690b57cec5SDimitry Andric
2700b57cec5SDimitry Andric // propagate the error if possible
2710b57cec5SDimitry Andric if (EC)
2720b57cec5SDimitry Andric *EC = make_error_code(std::errc::invalid_argument);
2730b57cec5SDimitry Andric
2740b57cec5SDimitry Andric // Don't print out more errors after the first one we encounter. The rest
2750b57cec5SDimitry Andric // are just the result of the first, and have no meaning.
2760b57cec5SDimitry Andric if (!Failed)
2775ffd83dbSDimitry Andric printError(SMLoc::getFromPointer(Position), SourceMgr::DK_Error, Message);
2780b57cec5SDimitry Andric Failed = true;
2790b57cec5SDimitry Andric }
2800b57cec5SDimitry Andric
2810b57cec5SDimitry Andric /// Returns true if an error occurred while parsing.
failed()2820b57cec5SDimitry Andric bool failed() {
2830b57cec5SDimitry Andric return Failed;
2840b57cec5SDimitry Andric }
2850b57cec5SDimitry Andric
2860b57cec5SDimitry Andric private:
2870b57cec5SDimitry Andric void init(MemoryBufferRef Buffer);
2880b57cec5SDimitry Andric
currentInput()2890b57cec5SDimitry Andric StringRef currentInput() {
2900b57cec5SDimitry Andric return StringRef(Current, End - Current);
2910b57cec5SDimitry Andric }
2920b57cec5SDimitry Andric
2930b57cec5SDimitry Andric /// Decode a UTF-8 minimal well-formed code unit subsequence starting
2940b57cec5SDimitry Andric /// at \a Position.
2950b57cec5SDimitry Andric ///
2960b57cec5SDimitry Andric /// If the UTF-8 code units starting at Position do not form a well-formed
2970b57cec5SDimitry Andric /// code unit subsequence, then the Unicode scalar value is 0, and the length
2980b57cec5SDimitry Andric /// is 0.
decodeUTF8(StringRef::iterator Position)2990b57cec5SDimitry Andric UTF8Decoded decodeUTF8(StringRef::iterator Position) {
3000b57cec5SDimitry Andric return ::decodeUTF8(StringRef(Position, End - Position));
3010b57cec5SDimitry Andric }
3020b57cec5SDimitry Andric
3030b57cec5SDimitry Andric // The following functions are based on the gramar rules in the YAML spec. The
3040b57cec5SDimitry Andric // style of the function names it meant to closely match how they are written
3050b57cec5SDimitry Andric // in the spec. The number within the [] is the number of the grammar rule in
3060b57cec5SDimitry Andric // the spec.
3070b57cec5SDimitry Andric //
3080b57cec5SDimitry Andric // See 4.2 [Production Naming Conventions] for the meaning of the prefixes.
3090b57cec5SDimitry Andric //
3100b57cec5SDimitry Andric // c-
3110b57cec5SDimitry Andric // A production starting and ending with a special character.
3120b57cec5SDimitry Andric // b-
3130b57cec5SDimitry Andric // A production matching a single line break.
3140b57cec5SDimitry Andric // nb-
3150b57cec5SDimitry Andric // A production starting and ending with a non-break character.
3160b57cec5SDimitry Andric // s-
3170b57cec5SDimitry Andric // A production starting and ending with a white space character.
3180b57cec5SDimitry Andric // ns-
3190b57cec5SDimitry Andric // A production starting and ending with a non-space character.
3200b57cec5SDimitry Andric // l-
3210b57cec5SDimitry Andric // A production matching complete line(s).
3220b57cec5SDimitry Andric
3230b57cec5SDimitry Andric /// Skip a single nb-char[27] starting at Position.
3240b57cec5SDimitry Andric ///
3250b57cec5SDimitry Andric /// A nb-char is 0x9 | [0x20-0x7E] | 0x85 | [0xA0-0xD7FF] | [0xE000-0xFEFE]
3260b57cec5SDimitry Andric /// | [0xFF00-0xFFFD] | [0x10000-0x10FFFF]
3270b57cec5SDimitry Andric ///
3280b57cec5SDimitry Andric /// @returns The code unit after the nb-char, or Position if it's not an
3290b57cec5SDimitry Andric /// nb-char.
3300b57cec5SDimitry Andric StringRef::iterator skip_nb_char(StringRef::iterator Position);
3310b57cec5SDimitry Andric
3320b57cec5SDimitry Andric /// Skip a single b-break[28] starting at Position.
3330b57cec5SDimitry Andric ///
3340b57cec5SDimitry Andric /// A b-break is 0xD 0xA | 0xD | 0xA
3350b57cec5SDimitry Andric ///
3360b57cec5SDimitry Andric /// @returns The code unit after the b-break, or Position if it's not a
3370b57cec5SDimitry Andric /// b-break.
3380b57cec5SDimitry Andric StringRef::iterator skip_b_break(StringRef::iterator Position);
3390b57cec5SDimitry Andric
3400b57cec5SDimitry Andric /// Skip a single s-space[31] starting at Position.
3410b57cec5SDimitry Andric ///
3420b57cec5SDimitry Andric /// An s-space is 0x20
3430b57cec5SDimitry Andric ///
3440b57cec5SDimitry Andric /// @returns The code unit after the s-space, or Position if it's not a
3450b57cec5SDimitry Andric /// s-space.
3460b57cec5SDimitry Andric StringRef::iterator skip_s_space(StringRef::iterator Position);
3470b57cec5SDimitry Andric
3480b57cec5SDimitry Andric /// Skip a single s-white[33] starting at Position.
3490b57cec5SDimitry Andric ///
3500b57cec5SDimitry Andric /// A s-white is 0x20 | 0x9
3510b57cec5SDimitry Andric ///
3520b57cec5SDimitry Andric /// @returns The code unit after the s-white, or Position if it's not a
3530b57cec5SDimitry Andric /// s-white.
3540b57cec5SDimitry Andric StringRef::iterator skip_s_white(StringRef::iterator Position);
3550b57cec5SDimitry Andric
3560b57cec5SDimitry Andric /// Skip a single ns-char[34] starting at Position.
3570b57cec5SDimitry Andric ///
3580b57cec5SDimitry Andric /// A ns-char is nb-char - s-white
3590b57cec5SDimitry Andric ///
3600b57cec5SDimitry Andric /// @returns The code unit after the ns-char, or Position if it's not a
3610b57cec5SDimitry Andric /// ns-char.
3620b57cec5SDimitry Andric StringRef::iterator skip_ns_char(StringRef::iterator Position);
3630b57cec5SDimitry Andric
3640b57cec5SDimitry Andric using SkipWhileFunc = StringRef::iterator (Scanner::*)(StringRef::iterator);
3650b57cec5SDimitry Andric
3660b57cec5SDimitry Andric /// Skip minimal well-formed code unit subsequences until Func
3670b57cec5SDimitry Andric /// returns its input.
3680b57cec5SDimitry Andric ///
3690b57cec5SDimitry Andric /// @returns The code unit after the last minimal well-formed code unit
3700b57cec5SDimitry Andric /// subsequence that Func accepted.
3710b57cec5SDimitry Andric StringRef::iterator skip_while( SkipWhileFunc Func
3720b57cec5SDimitry Andric , StringRef::iterator Position);
3730b57cec5SDimitry Andric
3740b57cec5SDimitry Andric /// Skip minimal well-formed code unit subsequences until Func returns its
3750b57cec5SDimitry Andric /// input.
3760b57cec5SDimitry Andric void advanceWhile(SkipWhileFunc Func);
3770b57cec5SDimitry Andric
3780b57cec5SDimitry Andric /// Scan ns-uri-char[39]s starting at Cur.
3790b57cec5SDimitry Andric ///
3800b57cec5SDimitry Andric /// This updates Cur and Column while scanning.
3810b57cec5SDimitry Andric void scan_ns_uri_char();
3820b57cec5SDimitry Andric
3830b57cec5SDimitry Andric /// Consume a minimal well-formed code unit subsequence starting at
3840b57cec5SDimitry Andric /// \a Cur. Return false if it is not the same Unicode scalar value as
3850b57cec5SDimitry Andric /// \a Expected. This updates \a Column.
3860b57cec5SDimitry Andric bool consume(uint32_t Expected);
3870b57cec5SDimitry Andric
3880b57cec5SDimitry Andric /// Skip \a Distance UTF-8 code units. Updates \a Cur and \a Column.
3890b57cec5SDimitry Andric void skip(uint32_t Distance);
3900b57cec5SDimitry Andric
3910b57cec5SDimitry Andric /// Return true if the minimal well-formed code unit subsequence at
3920b57cec5SDimitry Andric /// Pos is whitespace or a new line
3930b57cec5SDimitry Andric bool isBlankOrBreak(StringRef::iterator Position);
3940b57cec5SDimitry Andric
395*5f757f3fSDimitry Andric /// Return true if the minimal well-formed code unit subsequence at
396*5f757f3fSDimitry Andric /// Pos is considered a "safe" character for plain scalars.
397*5f757f3fSDimitry Andric bool isPlainSafeNonBlank(StringRef::iterator Position);
398*5f757f3fSDimitry Andric
39981ad6265SDimitry Andric /// Return true if the line is a line break, false otherwise.
40081ad6265SDimitry Andric bool isLineEmpty(StringRef Line);
40181ad6265SDimitry Andric
4020b57cec5SDimitry Andric /// Consume a single b-break[28] if it's present at the current position.
4030b57cec5SDimitry Andric ///
4040b57cec5SDimitry Andric /// Return false if the code unit at the current position isn't a line break.
4050b57cec5SDimitry Andric bool consumeLineBreakIfPresent();
4060b57cec5SDimitry Andric
4070b57cec5SDimitry Andric /// If IsSimpleKeyAllowed, create and push_back a new SimpleKey.
4080b57cec5SDimitry Andric void saveSimpleKeyCandidate( TokenQueueT::iterator Tok
4090b57cec5SDimitry Andric , unsigned AtColumn
4100b57cec5SDimitry Andric , bool IsRequired);
4110b57cec5SDimitry Andric
4120b57cec5SDimitry Andric /// Remove simple keys that can no longer be valid simple keys.
4130b57cec5SDimitry Andric ///
4140b57cec5SDimitry Andric /// Invalid simple keys are not on the current line or are further than 1024
4150b57cec5SDimitry Andric /// columns back.
4160b57cec5SDimitry Andric void removeStaleSimpleKeyCandidates();
4170b57cec5SDimitry Andric
4180b57cec5SDimitry Andric /// Remove all simple keys on FlowLevel \a Level.
4190b57cec5SDimitry Andric void removeSimpleKeyCandidatesOnFlowLevel(unsigned Level);
4200b57cec5SDimitry Andric
4210b57cec5SDimitry Andric /// Unroll indentation in \a Indents back to \a Col. Creates BlockEnd
4220b57cec5SDimitry Andric /// tokens if needed.
4230b57cec5SDimitry Andric bool unrollIndent(int ToColumn);
4240b57cec5SDimitry Andric
4250b57cec5SDimitry Andric /// Increase indent to \a Col. Creates \a Kind token at \a InsertPoint
4260b57cec5SDimitry Andric /// if needed.
4270b57cec5SDimitry Andric bool rollIndent( int ToColumn
4280b57cec5SDimitry Andric , Token::TokenKind Kind
4290b57cec5SDimitry Andric , TokenQueueT::iterator InsertPoint);
4300b57cec5SDimitry Andric
4310b57cec5SDimitry Andric /// Skip a single-line comment when the comment starts at the current
4320b57cec5SDimitry Andric /// position of the scanner.
4330b57cec5SDimitry Andric void skipComment();
4340b57cec5SDimitry Andric
4350b57cec5SDimitry Andric /// Skip whitespace and comments until the start of the next token.
4360b57cec5SDimitry Andric void scanToNextToken();
4370b57cec5SDimitry Andric
4380b57cec5SDimitry Andric /// Must be the first token generated.
4390b57cec5SDimitry Andric bool scanStreamStart();
4400b57cec5SDimitry Andric
4410b57cec5SDimitry Andric /// Generate tokens needed to close out the stream.
4420b57cec5SDimitry Andric bool scanStreamEnd();
4430b57cec5SDimitry Andric
4440b57cec5SDimitry Andric /// Scan a %BLAH directive.
4450b57cec5SDimitry Andric bool scanDirective();
4460b57cec5SDimitry Andric
4470b57cec5SDimitry Andric /// Scan a ... or ---.
4480b57cec5SDimitry Andric bool scanDocumentIndicator(bool IsStart);
4490b57cec5SDimitry Andric
4500b57cec5SDimitry Andric /// Scan a [ or { and generate the proper flow collection start token.
4510b57cec5SDimitry Andric bool scanFlowCollectionStart(bool IsSequence);
4520b57cec5SDimitry Andric
4530b57cec5SDimitry Andric /// Scan a ] or } and generate the proper flow collection end token.
4540b57cec5SDimitry Andric bool scanFlowCollectionEnd(bool IsSequence);
4550b57cec5SDimitry Andric
4560b57cec5SDimitry Andric /// Scan the , that separates entries in a flow collection.
4570b57cec5SDimitry Andric bool scanFlowEntry();
4580b57cec5SDimitry Andric
4590b57cec5SDimitry Andric /// Scan the - that starts block sequence entries.
4600b57cec5SDimitry Andric bool scanBlockEntry();
4610b57cec5SDimitry Andric
4620b57cec5SDimitry Andric /// Scan an explicit ? indicating a key.
4630b57cec5SDimitry Andric bool scanKey();
4640b57cec5SDimitry Andric
4650b57cec5SDimitry Andric /// Scan an explicit : indicating a value.
4660b57cec5SDimitry Andric bool scanValue();
4670b57cec5SDimitry Andric
4680b57cec5SDimitry Andric /// Scan a quoted scalar.
4690b57cec5SDimitry Andric bool scanFlowScalar(bool IsDoubleQuoted);
4700b57cec5SDimitry Andric
4710b57cec5SDimitry Andric /// Scan an unquoted scalar.
4720b57cec5SDimitry Andric bool scanPlainScalar();
4730b57cec5SDimitry Andric
4740b57cec5SDimitry Andric /// Scan an Alias or Anchor starting with * or &.
4750b57cec5SDimitry Andric bool scanAliasOrAnchor(bool IsAlias);
4760b57cec5SDimitry Andric
4770b57cec5SDimitry Andric /// Scan a block scalar starting with | or >.
4780b57cec5SDimitry Andric bool scanBlockScalar(bool IsLiteral);
4790b57cec5SDimitry Andric
48081ad6265SDimitry Andric /// Scan a block scalar style indicator and header.
48181ad6265SDimitry Andric ///
48281ad6265SDimitry Andric /// Note: This is distinct from scanBlockScalarHeader to mirror the fact that
48381ad6265SDimitry Andric /// YAML does not consider the style indicator to be a part of the header.
48481ad6265SDimitry Andric ///
48581ad6265SDimitry Andric /// Return false if an error occurred.
48681ad6265SDimitry Andric bool scanBlockScalarIndicators(char &StyleIndicator, char &ChompingIndicator,
48781ad6265SDimitry Andric unsigned &IndentIndicator, bool &IsDone);
48881ad6265SDimitry Andric
48981ad6265SDimitry Andric /// Scan a style indicator in a block scalar header.
49081ad6265SDimitry Andric char scanBlockStyleIndicator();
49181ad6265SDimitry Andric
4920b57cec5SDimitry Andric /// Scan a chomping indicator in a block scalar header.
4930b57cec5SDimitry Andric char scanBlockChompingIndicator();
4940b57cec5SDimitry Andric
4950b57cec5SDimitry Andric /// Scan an indentation indicator in a block scalar header.
4960b57cec5SDimitry Andric unsigned scanBlockIndentationIndicator();
4970b57cec5SDimitry Andric
4980b57cec5SDimitry Andric /// Scan a block scalar header.
4990b57cec5SDimitry Andric ///
5000b57cec5SDimitry Andric /// Return false if an error occurred.
5010b57cec5SDimitry Andric bool scanBlockScalarHeader(char &ChompingIndicator, unsigned &IndentIndicator,
5020b57cec5SDimitry Andric bool &IsDone);
5030b57cec5SDimitry Andric
5040b57cec5SDimitry Andric /// Look for the indentation level of a block scalar.
5050b57cec5SDimitry Andric ///
5060b57cec5SDimitry Andric /// Return false if an error occurred.
5070b57cec5SDimitry Andric bool findBlockScalarIndent(unsigned &BlockIndent, unsigned BlockExitIndent,
5080b57cec5SDimitry Andric unsigned &LineBreaks, bool &IsDone);
5090b57cec5SDimitry Andric
5100b57cec5SDimitry Andric /// Scan the indentation of a text line in a block scalar.
5110b57cec5SDimitry Andric ///
5120b57cec5SDimitry Andric /// Return false if an error occurred.
5130b57cec5SDimitry Andric bool scanBlockScalarIndent(unsigned BlockIndent, unsigned BlockExitIndent,
5140b57cec5SDimitry Andric bool &IsDone);
5150b57cec5SDimitry Andric
5160b57cec5SDimitry Andric /// Scan a tag of the form !stuff.
5170b57cec5SDimitry Andric bool scanTag();
5180b57cec5SDimitry Andric
5190b57cec5SDimitry Andric /// Dispatch to the next scanning function based on \a *Cur.
5200b57cec5SDimitry Andric bool fetchMoreTokens();
5210b57cec5SDimitry Andric
5220b57cec5SDimitry Andric /// The SourceMgr used for diagnostics and buffer management.
5230b57cec5SDimitry Andric SourceMgr &SM;
5240b57cec5SDimitry Andric
5250b57cec5SDimitry Andric /// The original input.
5260b57cec5SDimitry Andric MemoryBufferRef InputBuffer;
5270b57cec5SDimitry Andric
5280b57cec5SDimitry Andric /// The current position of the scanner.
5290b57cec5SDimitry Andric StringRef::iterator Current;
5300b57cec5SDimitry Andric
5310b57cec5SDimitry Andric /// The end of the input (one past the last character).
5320b57cec5SDimitry Andric StringRef::iterator End;
5330b57cec5SDimitry Andric
5340b57cec5SDimitry Andric /// Current YAML indentation level in spaces.
5350b57cec5SDimitry Andric int Indent;
5360b57cec5SDimitry Andric
5370b57cec5SDimitry Andric /// Current column number in Unicode code points.
5380b57cec5SDimitry Andric unsigned Column;
5390b57cec5SDimitry Andric
5400b57cec5SDimitry Andric /// Current line number.
5410b57cec5SDimitry Andric unsigned Line;
5420b57cec5SDimitry Andric
5430b57cec5SDimitry Andric /// How deep we are in flow style containers. 0 Means at block level.
5440b57cec5SDimitry Andric unsigned FlowLevel;
5450b57cec5SDimitry Andric
5460b57cec5SDimitry Andric /// Are we at the start of the stream?
5470b57cec5SDimitry Andric bool IsStartOfStream;
5480b57cec5SDimitry Andric
5490b57cec5SDimitry Andric /// Can the next token be the start of a simple key?
5500b57cec5SDimitry Andric bool IsSimpleKeyAllowed;
5510b57cec5SDimitry Andric
552*5f757f3fSDimitry Andric /// Can the next token be a value indicator even if it does not have a
553*5f757f3fSDimitry Andric /// trailing space?
554*5f757f3fSDimitry Andric bool IsAdjacentValueAllowedInFlow;
555*5f757f3fSDimitry Andric
5560b57cec5SDimitry Andric /// True if an error has occurred.
5570b57cec5SDimitry Andric bool Failed;
5580b57cec5SDimitry Andric
5590b57cec5SDimitry Andric /// Should colors be used when printing out the diagnostic messages?
5600b57cec5SDimitry Andric bool ShowColors;
5610b57cec5SDimitry Andric
5620b57cec5SDimitry Andric /// Queue of tokens. This is required to queue up tokens while looking
5630b57cec5SDimitry Andric /// for the end of a simple key. And for cases where a single character
5640b57cec5SDimitry Andric /// can produce multiple tokens (e.g. BlockEnd).
5650b57cec5SDimitry Andric TokenQueueT TokenQueue;
5660b57cec5SDimitry Andric
5670b57cec5SDimitry Andric /// Indentation levels.
5680b57cec5SDimitry Andric SmallVector<int, 4> Indents;
5690b57cec5SDimitry Andric
5700b57cec5SDimitry Andric /// Potential simple keys.
5710b57cec5SDimitry Andric SmallVector<SimpleKey, 4> SimpleKeys;
5720b57cec5SDimitry Andric
5730b57cec5SDimitry Andric std::error_code *EC;
5740b57cec5SDimitry Andric };
5750b57cec5SDimitry Andric
5760b57cec5SDimitry Andric } // end namespace yaml
5770b57cec5SDimitry Andric } // end namespace llvm
5780b57cec5SDimitry Andric
5790b57cec5SDimitry Andric /// encodeUTF8 - Encode \a UnicodeScalarValue in UTF-8 and append it to result.
encodeUTF8(uint32_t UnicodeScalarValue,SmallVectorImpl<char> & Result)5800b57cec5SDimitry Andric static void encodeUTF8( uint32_t UnicodeScalarValue
5810b57cec5SDimitry Andric , SmallVectorImpl<char> &Result) {
5820b57cec5SDimitry Andric if (UnicodeScalarValue <= 0x7F) {
5830b57cec5SDimitry Andric Result.push_back(UnicodeScalarValue & 0x7F);
5840b57cec5SDimitry Andric } else if (UnicodeScalarValue <= 0x7FF) {
5850b57cec5SDimitry Andric uint8_t FirstByte = 0xC0 | ((UnicodeScalarValue & 0x7C0) >> 6);
5860b57cec5SDimitry Andric uint8_t SecondByte = 0x80 | (UnicodeScalarValue & 0x3F);
5870b57cec5SDimitry Andric Result.push_back(FirstByte);
5880b57cec5SDimitry Andric Result.push_back(SecondByte);
5890b57cec5SDimitry Andric } else if (UnicodeScalarValue <= 0xFFFF) {
5900b57cec5SDimitry Andric uint8_t FirstByte = 0xE0 | ((UnicodeScalarValue & 0xF000) >> 12);
5910b57cec5SDimitry Andric uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6);
5920b57cec5SDimitry Andric uint8_t ThirdByte = 0x80 | (UnicodeScalarValue & 0x3F);
5930b57cec5SDimitry Andric Result.push_back(FirstByte);
5940b57cec5SDimitry Andric Result.push_back(SecondByte);
5950b57cec5SDimitry Andric Result.push_back(ThirdByte);
5960b57cec5SDimitry Andric } else if (UnicodeScalarValue <= 0x10FFFF) {
5970b57cec5SDimitry Andric uint8_t FirstByte = 0xF0 | ((UnicodeScalarValue & 0x1F0000) >> 18);
5980b57cec5SDimitry Andric uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0x3F000) >> 12);
5990b57cec5SDimitry Andric uint8_t ThirdByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6);
6000b57cec5SDimitry Andric uint8_t FourthByte = 0x80 | (UnicodeScalarValue & 0x3F);
6010b57cec5SDimitry Andric Result.push_back(FirstByte);
6020b57cec5SDimitry Andric Result.push_back(SecondByte);
6030b57cec5SDimitry Andric Result.push_back(ThirdByte);
6040b57cec5SDimitry Andric Result.push_back(FourthByte);
6050b57cec5SDimitry Andric }
6060b57cec5SDimitry Andric }
6070b57cec5SDimitry Andric
dumpTokens(StringRef Input,raw_ostream & OS)6080b57cec5SDimitry Andric bool yaml::dumpTokens(StringRef Input, raw_ostream &OS) {
6090b57cec5SDimitry Andric SourceMgr SM;
6100b57cec5SDimitry Andric Scanner scanner(Input, SM);
6110b57cec5SDimitry Andric while (true) {
6120b57cec5SDimitry Andric Token T = scanner.getNext();
6130b57cec5SDimitry Andric switch (T.Kind) {
6140b57cec5SDimitry Andric case Token::TK_StreamStart:
6150b57cec5SDimitry Andric OS << "Stream-Start: ";
6160b57cec5SDimitry Andric break;
6170b57cec5SDimitry Andric case Token::TK_StreamEnd:
6180b57cec5SDimitry Andric OS << "Stream-End: ";
6190b57cec5SDimitry Andric break;
6200b57cec5SDimitry Andric case Token::TK_VersionDirective:
6210b57cec5SDimitry Andric OS << "Version-Directive: ";
6220b57cec5SDimitry Andric break;
6230b57cec5SDimitry Andric case Token::TK_TagDirective:
6240b57cec5SDimitry Andric OS << "Tag-Directive: ";
6250b57cec5SDimitry Andric break;
6260b57cec5SDimitry Andric case Token::TK_DocumentStart:
6270b57cec5SDimitry Andric OS << "Document-Start: ";
6280b57cec5SDimitry Andric break;
6290b57cec5SDimitry Andric case Token::TK_DocumentEnd:
6300b57cec5SDimitry Andric OS << "Document-End: ";
6310b57cec5SDimitry Andric break;
6320b57cec5SDimitry Andric case Token::TK_BlockEntry:
6330b57cec5SDimitry Andric OS << "Block-Entry: ";
6340b57cec5SDimitry Andric break;
6350b57cec5SDimitry Andric case Token::TK_BlockEnd:
6360b57cec5SDimitry Andric OS << "Block-End: ";
6370b57cec5SDimitry Andric break;
6380b57cec5SDimitry Andric case Token::TK_BlockSequenceStart:
6390b57cec5SDimitry Andric OS << "Block-Sequence-Start: ";
6400b57cec5SDimitry Andric break;
6410b57cec5SDimitry Andric case Token::TK_BlockMappingStart:
6420b57cec5SDimitry Andric OS << "Block-Mapping-Start: ";
6430b57cec5SDimitry Andric break;
6440b57cec5SDimitry Andric case Token::TK_FlowEntry:
6450b57cec5SDimitry Andric OS << "Flow-Entry: ";
6460b57cec5SDimitry Andric break;
6470b57cec5SDimitry Andric case Token::TK_FlowSequenceStart:
6480b57cec5SDimitry Andric OS << "Flow-Sequence-Start: ";
6490b57cec5SDimitry Andric break;
6500b57cec5SDimitry Andric case Token::TK_FlowSequenceEnd:
6510b57cec5SDimitry Andric OS << "Flow-Sequence-End: ";
6520b57cec5SDimitry Andric break;
6530b57cec5SDimitry Andric case Token::TK_FlowMappingStart:
6540b57cec5SDimitry Andric OS << "Flow-Mapping-Start: ";
6550b57cec5SDimitry Andric break;
6560b57cec5SDimitry Andric case Token::TK_FlowMappingEnd:
6570b57cec5SDimitry Andric OS << "Flow-Mapping-End: ";
6580b57cec5SDimitry Andric break;
6590b57cec5SDimitry Andric case Token::TK_Key:
6600b57cec5SDimitry Andric OS << "Key: ";
6610b57cec5SDimitry Andric break;
6620b57cec5SDimitry Andric case Token::TK_Value:
6630b57cec5SDimitry Andric OS << "Value: ";
6640b57cec5SDimitry Andric break;
6650b57cec5SDimitry Andric case Token::TK_Scalar:
6660b57cec5SDimitry Andric OS << "Scalar: ";
6670b57cec5SDimitry Andric break;
6680b57cec5SDimitry Andric case Token::TK_BlockScalar:
6690b57cec5SDimitry Andric OS << "Block Scalar: ";
6700b57cec5SDimitry Andric break;
6710b57cec5SDimitry Andric case Token::TK_Alias:
6720b57cec5SDimitry Andric OS << "Alias: ";
6730b57cec5SDimitry Andric break;
6740b57cec5SDimitry Andric case Token::TK_Anchor:
6750b57cec5SDimitry Andric OS << "Anchor: ";
6760b57cec5SDimitry Andric break;
6770b57cec5SDimitry Andric case Token::TK_Tag:
6780b57cec5SDimitry Andric OS << "Tag: ";
6790b57cec5SDimitry Andric break;
6800b57cec5SDimitry Andric case Token::TK_Error:
6810b57cec5SDimitry Andric break;
6820b57cec5SDimitry Andric }
6830b57cec5SDimitry Andric OS << T.Range << "\n";
6840b57cec5SDimitry Andric if (T.Kind == Token::TK_StreamEnd)
6850b57cec5SDimitry Andric break;
6860b57cec5SDimitry Andric else if (T.Kind == Token::TK_Error)
6870b57cec5SDimitry Andric return false;
6880b57cec5SDimitry Andric }
6890b57cec5SDimitry Andric return true;
6900b57cec5SDimitry Andric }
6910b57cec5SDimitry Andric
scanTokens(StringRef Input)6920b57cec5SDimitry Andric bool yaml::scanTokens(StringRef Input) {
6930b57cec5SDimitry Andric SourceMgr SM;
6940b57cec5SDimitry Andric Scanner scanner(Input, SM);
6950b57cec5SDimitry Andric while (true) {
6960b57cec5SDimitry Andric Token T = scanner.getNext();
6970b57cec5SDimitry Andric if (T.Kind == Token::TK_StreamEnd)
6980b57cec5SDimitry Andric break;
6990b57cec5SDimitry Andric else if (T.Kind == Token::TK_Error)
7000b57cec5SDimitry Andric return false;
7010b57cec5SDimitry Andric }
7020b57cec5SDimitry Andric return true;
7030b57cec5SDimitry Andric }
7040b57cec5SDimitry Andric
escape(StringRef Input,bool EscapePrintable)7050b57cec5SDimitry Andric std::string yaml::escape(StringRef Input, bool EscapePrintable) {
7060b57cec5SDimitry Andric std::string EscapedInput;
7070b57cec5SDimitry Andric for (StringRef::iterator i = Input.begin(), e = Input.end(); i != e; ++i) {
7080b57cec5SDimitry Andric if (*i == '\\')
7090b57cec5SDimitry Andric EscapedInput += "\\\\";
7100b57cec5SDimitry Andric else if (*i == '"')
7110b57cec5SDimitry Andric EscapedInput += "\\\"";
7120b57cec5SDimitry Andric else if (*i == 0)
7130b57cec5SDimitry Andric EscapedInput += "\\0";
7140b57cec5SDimitry Andric else if (*i == 0x07)
7150b57cec5SDimitry Andric EscapedInput += "\\a";
7160b57cec5SDimitry Andric else if (*i == 0x08)
7170b57cec5SDimitry Andric EscapedInput += "\\b";
7180b57cec5SDimitry Andric else if (*i == 0x09)
7190b57cec5SDimitry Andric EscapedInput += "\\t";
7200b57cec5SDimitry Andric else if (*i == 0x0A)
7210b57cec5SDimitry Andric EscapedInput += "\\n";
7220b57cec5SDimitry Andric else if (*i == 0x0B)
7230b57cec5SDimitry Andric EscapedInput += "\\v";
7240b57cec5SDimitry Andric else if (*i == 0x0C)
7250b57cec5SDimitry Andric EscapedInput += "\\f";
7260b57cec5SDimitry Andric else if (*i == 0x0D)
7270b57cec5SDimitry Andric EscapedInput += "\\r";
7280b57cec5SDimitry Andric else if (*i == 0x1B)
7290b57cec5SDimitry Andric EscapedInput += "\\e";
7300b57cec5SDimitry Andric else if ((unsigned char)*i < 0x20) { // Control characters not handled above.
7310b57cec5SDimitry Andric std::string HexStr = utohexstr(*i);
7320b57cec5SDimitry Andric EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr;
7330b57cec5SDimitry Andric } else if (*i & 0x80) { // UTF-8 multiple code unit subsequence.
7340b57cec5SDimitry Andric UTF8Decoded UnicodeScalarValue
7350b57cec5SDimitry Andric = decodeUTF8(StringRef(i, Input.end() - i));
7360b57cec5SDimitry Andric if (UnicodeScalarValue.second == 0) {
7370b57cec5SDimitry Andric // Found invalid char.
7380b57cec5SDimitry Andric SmallString<4> Val;
7390b57cec5SDimitry Andric encodeUTF8(0xFFFD, Val);
740e8d8bef9SDimitry Andric llvm::append_range(EscapedInput, Val);
7410b57cec5SDimitry Andric // FIXME: Error reporting.
7420b57cec5SDimitry Andric return EscapedInput;
7430b57cec5SDimitry Andric }
7440b57cec5SDimitry Andric if (UnicodeScalarValue.first == 0x85)
7450b57cec5SDimitry Andric EscapedInput += "\\N";
7460b57cec5SDimitry Andric else if (UnicodeScalarValue.first == 0xA0)
7470b57cec5SDimitry Andric EscapedInput += "\\_";
7480b57cec5SDimitry Andric else if (UnicodeScalarValue.first == 0x2028)
7490b57cec5SDimitry Andric EscapedInput += "\\L";
7500b57cec5SDimitry Andric else if (UnicodeScalarValue.first == 0x2029)
7510b57cec5SDimitry Andric EscapedInput += "\\P";
7520b57cec5SDimitry Andric else if (!EscapePrintable &&
7530b57cec5SDimitry Andric sys::unicode::isPrintable(UnicodeScalarValue.first))
7540b57cec5SDimitry Andric EscapedInput += StringRef(i, UnicodeScalarValue.second);
7550b57cec5SDimitry Andric else {
7560b57cec5SDimitry Andric std::string HexStr = utohexstr(UnicodeScalarValue.first);
7570b57cec5SDimitry Andric if (HexStr.size() <= 2)
7580b57cec5SDimitry Andric EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr;
7590b57cec5SDimitry Andric else if (HexStr.size() <= 4)
7600b57cec5SDimitry Andric EscapedInput += "\\u" + std::string(4 - HexStr.size(), '0') + HexStr;
7610b57cec5SDimitry Andric else if (HexStr.size() <= 8)
7620b57cec5SDimitry Andric EscapedInput += "\\U" + std::string(8 - HexStr.size(), '0') + HexStr;
7630b57cec5SDimitry Andric }
7640b57cec5SDimitry Andric i += UnicodeScalarValue.second - 1;
7650b57cec5SDimitry Andric } else
7660b57cec5SDimitry Andric EscapedInput.push_back(*i);
7670b57cec5SDimitry Andric }
7680b57cec5SDimitry Andric return EscapedInput;
7690b57cec5SDimitry Andric }
7700b57cec5SDimitry Andric
parseBool(StringRef S)771bdd1243dSDimitry Andric std::optional<bool> yaml::parseBool(StringRef S) {
772e8d8bef9SDimitry Andric switch (S.size()) {
773e8d8bef9SDimitry Andric case 1:
774e8d8bef9SDimitry Andric switch (S.front()) {
775e8d8bef9SDimitry Andric case 'y':
776e8d8bef9SDimitry Andric case 'Y':
777e8d8bef9SDimitry Andric return true;
778e8d8bef9SDimitry Andric case 'n':
779e8d8bef9SDimitry Andric case 'N':
780e8d8bef9SDimitry Andric return false;
781e8d8bef9SDimitry Andric default:
782bdd1243dSDimitry Andric return std::nullopt;
783e8d8bef9SDimitry Andric }
784e8d8bef9SDimitry Andric case 2:
785e8d8bef9SDimitry Andric switch (S.front()) {
786e8d8bef9SDimitry Andric case 'O':
787e8d8bef9SDimitry Andric if (S[1] == 'N') // ON
788e8d8bef9SDimitry Andric return true;
789bdd1243dSDimitry Andric [[fallthrough]];
790e8d8bef9SDimitry Andric case 'o':
791e8d8bef9SDimitry Andric if (S[1] == 'n') //[Oo]n
792e8d8bef9SDimitry Andric return true;
793bdd1243dSDimitry Andric return std::nullopt;
794e8d8bef9SDimitry Andric case 'N':
795e8d8bef9SDimitry Andric if (S[1] == 'O') // NO
796e8d8bef9SDimitry Andric return false;
797bdd1243dSDimitry Andric [[fallthrough]];
798e8d8bef9SDimitry Andric case 'n':
799e8d8bef9SDimitry Andric if (S[1] == 'o') //[Nn]o
800e8d8bef9SDimitry Andric return false;
801bdd1243dSDimitry Andric return std::nullopt;
802e8d8bef9SDimitry Andric default:
803bdd1243dSDimitry Andric return std::nullopt;
804e8d8bef9SDimitry Andric }
805e8d8bef9SDimitry Andric case 3:
806e8d8bef9SDimitry Andric switch (S.front()) {
807e8d8bef9SDimitry Andric case 'O':
808e8d8bef9SDimitry Andric if (S.drop_front() == "FF") // OFF
809e8d8bef9SDimitry Andric return false;
810bdd1243dSDimitry Andric [[fallthrough]];
811e8d8bef9SDimitry Andric case 'o':
812e8d8bef9SDimitry Andric if (S.drop_front() == "ff") //[Oo]ff
813e8d8bef9SDimitry Andric return false;
814bdd1243dSDimitry Andric return std::nullopt;
815e8d8bef9SDimitry Andric case 'Y':
816e8d8bef9SDimitry Andric if (S.drop_front() == "ES") // YES
817e8d8bef9SDimitry Andric return true;
818bdd1243dSDimitry Andric [[fallthrough]];
819e8d8bef9SDimitry Andric case 'y':
820e8d8bef9SDimitry Andric if (S.drop_front() == "es") //[Yy]es
821e8d8bef9SDimitry Andric return true;
822bdd1243dSDimitry Andric return std::nullopt;
823e8d8bef9SDimitry Andric default:
824bdd1243dSDimitry Andric return std::nullopt;
825e8d8bef9SDimitry Andric }
826e8d8bef9SDimitry Andric case 4:
827e8d8bef9SDimitry Andric switch (S.front()) {
828e8d8bef9SDimitry Andric case 'T':
829e8d8bef9SDimitry Andric if (S.drop_front() == "RUE") // TRUE
830e8d8bef9SDimitry Andric return true;
831bdd1243dSDimitry Andric [[fallthrough]];
832e8d8bef9SDimitry Andric case 't':
833e8d8bef9SDimitry Andric if (S.drop_front() == "rue") //[Tt]rue
834e8d8bef9SDimitry Andric return true;
835bdd1243dSDimitry Andric return std::nullopt;
836e8d8bef9SDimitry Andric default:
837bdd1243dSDimitry Andric return std::nullopt;
838e8d8bef9SDimitry Andric }
839e8d8bef9SDimitry Andric case 5:
840e8d8bef9SDimitry Andric switch (S.front()) {
841e8d8bef9SDimitry Andric case 'F':
842e8d8bef9SDimitry Andric if (S.drop_front() == "ALSE") // FALSE
843e8d8bef9SDimitry Andric return false;
844bdd1243dSDimitry Andric [[fallthrough]];
845e8d8bef9SDimitry Andric case 'f':
846e8d8bef9SDimitry Andric if (S.drop_front() == "alse") //[Ff]alse
847e8d8bef9SDimitry Andric return false;
848bdd1243dSDimitry Andric return std::nullopt;
849e8d8bef9SDimitry Andric default:
850bdd1243dSDimitry Andric return std::nullopt;
851e8d8bef9SDimitry Andric }
852e8d8bef9SDimitry Andric default:
853bdd1243dSDimitry Andric return std::nullopt;
854e8d8bef9SDimitry Andric }
855e8d8bef9SDimitry Andric }
856e8d8bef9SDimitry Andric
Scanner(StringRef Input,SourceMgr & sm,bool ShowColors,std::error_code * EC)8570b57cec5SDimitry Andric Scanner::Scanner(StringRef Input, SourceMgr &sm, bool ShowColors,
8580b57cec5SDimitry Andric std::error_code *EC)
8590b57cec5SDimitry Andric : SM(sm), ShowColors(ShowColors), EC(EC) {
8600b57cec5SDimitry Andric init(MemoryBufferRef(Input, "YAML"));
8610b57cec5SDimitry Andric }
8620b57cec5SDimitry Andric
Scanner(MemoryBufferRef Buffer,SourceMgr & SM_,bool ShowColors,std::error_code * EC)8630b57cec5SDimitry Andric Scanner::Scanner(MemoryBufferRef Buffer, SourceMgr &SM_, bool ShowColors,
8640b57cec5SDimitry Andric std::error_code *EC)
8650b57cec5SDimitry Andric : SM(SM_), ShowColors(ShowColors), EC(EC) {
8660b57cec5SDimitry Andric init(Buffer);
8670b57cec5SDimitry Andric }
8680b57cec5SDimitry Andric
init(MemoryBufferRef Buffer)8690b57cec5SDimitry Andric void Scanner::init(MemoryBufferRef Buffer) {
8700b57cec5SDimitry Andric InputBuffer = Buffer;
8710b57cec5SDimitry Andric Current = InputBuffer.getBufferStart();
8720b57cec5SDimitry Andric End = InputBuffer.getBufferEnd();
8730b57cec5SDimitry Andric Indent = -1;
8740b57cec5SDimitry Andric Column = 0;
8750b57cec5SDimitry Andric Line = 0;
8760b57cec5SDimitry Andric FlowLevel = 0;
8770b57cec5SDimitry Andric IsStartOfStream = true;
8780b57cec5SDimitry Andric IsSimpleKeyAllowed = true;
879*5f757f3fSDimitry Andric IsAdjacentValueAllowedInFlow = false;
8800b57cec5SDimitry Andric Failed = false;
8810b57cec5SDimitry Andric std::unique_ptr<MemoryBuffer> InputBufferOwner =
882e8d8bef9SDimitry Andric MemoryBuffer::getMemBuffer(Buffer, /*RequiresNullTerminator=*/false);
8830b57cec5SDimitry Andric SM.AddNewSourceBuffer(std::move(InputBufferOwner), SMLoc());
8840b57cec5SDimitry Andric }
8850b57cec5SDimitry Andric
peekNext()8860b57cec5SDimitry Andric Token &Scanner::peekNext() {
8870b57cec5SDimitry Andric // If the current token is a possible simple key, keep parsing until we
8880b57cec5SDimitry Andric // can confirm.
8890b57cec5SDimitry Andric bool NeedMore = false;
8900b57cec5SDimitry Andric while (true) {
8910b57cec5SDimitry Andric if (TokenQueue.empty() || NeedMore) {
8920b57cec5SDimitry Andric if (!fetchMoreTokens()) {
8930b57cec5SDimitry Andric TokenQueue.clear();
894480093f4SDimitry Andric SimpleKeys.clear();
8950b57cec5SDimitry Andric TokenQueue.push_back(Token());
8960b57cec5SDimitry Andric return TokenQueue.front();
8970b57cec5SDimitry Andric }
8980b57cec5SDimitry Andric }
8990b57cec5SDimitry Andric assert(!TokenQueue.empty() &&
9000b57cec5SDimitry Andric "fetchMoreTokens lied about getting tokens!");
9010b57cec5SDimitry Andric
9020b57cec5SDimitry Andric removeStaleSimpleKeyCandidates();
9030b57cec5SDimitry Andric SimpleKey SK;
9040b57cec5SDimitry Andric SK.Tok = TokenQueue.begin();
9050b57cec5SDimitry Andric if (!is_contained(SimpleKeys, SK))
9060b57cec5SDimitry Andric break;
9070b57cec5SDimitry Andric else
9080b57cec5SDimitry Andric NeedMore = true;
9090b57cec5SDimitry Andric }
9100b57cec5SDimitry Andric return TokenQueue.front();
9110b57cec5SDimitry Andric }
9120b57cec5SDimitry Andric
getNext()9130b57cec5SDimitry Andric Token Scanner::getNext() {
9140b57cec5SDimitry Andric Token Ret = peekNext();
9150b57cec5SDimitry Andric // TokenQueue can be empty if there was an error getting the next token.
9160b57cec5SDimitry Andric if (!TokenQueue.empty())
9170b57cec5SDimitry Andric TokenQueue.pop_front();
9180b57cec5SDimitry Andric
9190b57cec5SDimitry Andric // There cannot be any referenced Token's if the TokenQueue is empty. So do a
9200b57cec5SDimitry Andric // quick deallocation of them all.
9210b57cec5SDimitry Andric if (TokenQueue.empty())
9220b57cec5SDimitry Andric TokenQueue.resetAlloc();
9230b57cec5SDimitry Andric
9240b57cec5SDimitry Andric return Ret;
9250b57cec5SDimitry Andric }
9260b57cec5SDimitry Andric
skip_nb_char(StringRef::iterator Position)9270b57cec5SDimitry Andric StringRef::iterator Scanner::skip_nb_char(StringRef::iterator Position) {
9280b57cec5SDimitry Andric if (Position == End)
9290b57cec5SDimitry Andric return Position;
9300b57cec5SDimitry Andric // Check 7 bit c-printable - b-char.
9310b57cec5SDimitry Andric if ( *Position == 0x09
9320b57cec5SDimitry Andric || (*Position >= 0x20 && *Position <= 0x7E))
9330b57cec5SDimitry Andric return Position + 1;
9340b57cec5SDimitry Andric
9350b57cec5SDimitry Andric // Check for valid UTF-8.
9360b57cec5SDimitry Andric if (uint8_t(*Position) & 0x80) {
9370b57cec5SDimitry Andric UTF8Decoded u8d = decodeUTF8(Position);
9380b57cec5SDimitry Andric if ( u8d.second != 0
9390b57cec5SDimitry Andric && u8d.first != 0xFEFF
9400b57cec5SDimitry Andric && ( u8d.first == 0x85
9410b57cec5SDimitry Andric || ( u8d.first >= 0xA0
9420b57cec5SDimitry Andric && u8d.first <= 0xD7FF)
9430b57cec5SDimitry Andric || ( u8d.first >= 0xE000
9440b57cec5SDimitry Andric && u8d.first <= 0xFFFD)
9450b57cec5SDimitry Andric || ( u8d.first >= 0x10000
9460b57cec5SDimitry Andric && u8d.first <= 0x10FFFF)))
9470b57cec5SDimitry Andric return Position + u8d.second;
9480b57cec5SDimitry Andric }
9490b57cec5SDimitry Andric return Position;
9500b57cec5SDimitry Andric }
9510b57cec5SDimitry Andric
skip_b_break(StringRef::iterator Position)9520b57cec5SDimitry Andric StringRef::iterator Scanner::skip_b_break(StringRef::iterator Position) {
9530b57cec5SDimitry Andric if (Position == End)
9540b57cec5SDimitry Andric return Position;
9550b57cec5SDimitry Andric if (*Position == 0x0D) {
9560b57cec5SDimitry Andric if (Position + 1 != End && *(Position + 1) == 0x0A)
9570b57cec5SDimitry Andric return Position + 2;
9580b57cec5SDimitry Andric return Position + 1;
9590b57cec5SDimitry Andric }
9600b57cec5SDimitry Andric
9610b57cec5SDimitry Andric if (*Position == 0x0A)
9620b57cec5SDimitry Andric return Position + 1;
9630b57cec5SDimitry Andric return Position;
9640b57cec5SDimitry Andric }
9650b57cec5SDimitry Andric
skip_s_space(StringRef::iterator Position)9660b57cec5SDimitry Andric StringRef::iterator Scanner::skip_s_space(StringRef::iterator Position) {
9670b57cec5SDimitry Andric if (Position == End)
9680b57cec5SDimitry Andric return Position;
9690b57cec5SDimitry Andric if (*Position == ' ')
9700b57cec5SDimitry Andric return Position + 1;
9710b57cec5SDimitry Andric return Position;
9720b57cec5SDimitry Andric }
9730b57cec5SDimitry Andric
skip_s_white(StringRef::iterator Position)9740b57cec5SDimitry Andric StringRef::iterator Scanner::skip_s_white(StringRef::iterator Position) {
9750b57cec5SDimitry Andric if (Position == End)
9760b57cec5SDimitry Andric return Position;
9770b57cec5SDimitry Andric if (*Position == ' ' || *Position == '\t')
9780b57cec5SDimitry Andric return Position + 1;
9790b57cec5SDimitry Andric return Position;
9800b57cec5SDimitry Andric }
9810b57cec5SDimitry Andric
skip_ns_char(StringRef::iterator Position)9820b57cec5SDimitry Andric StringRef::iterator Scanner::skip_ns_char(StringRef::iterator Position) {
9830b57cec5SDimitry Andric if (Position == End)
9840b57cec5SDimitry Andric return Position;
9850b57cec5SDimitry Andric if (*Position == ' ' || *Position == '\t')
9860b57cec5SDimitry Andric return Position;
9870b57cec5SDimitry Andric return skip_nb_char(Position);
9880b57cec5SDimitry Andric }
9890b57cec5SDimitry Andric
skip_while(SkipWhileFunc Func,StringRef::iterator Position)9900b57cec5SDimitry Andric StringRef::iterator Scanner::skip_while( SkipWhileFunc Func
9910b57cec5SDimitry Andric , StringRef::iterator Position) {
9920b57cec5SDimitry Andric while (true) {
9930b57cec5SDimitry Andric StringRef::iterator i = (this->*Func)(Position);
9940b57cec5SDimitry Andric if (i == Position)
9950b57cec5SDimitry Andric break;
9960b57cec5SDimitry Andric Position = i;
9970b57cec5SDimitry Andric }
9980b57cec5SDimitry Andric return Position;
9990b57cec5SDimitry Andric }
10000b57cec5SDimitry Andric
advanceWhile(SkipWhileFunc Func)10010b57cec5SDimitry Andric void Scanner::advanceWhile(SkipWhileFunc Func) {
10020b57cec5SDimitry Andric auto Final = skip_while(Func, Current);
10030b57cec5SDimitry Andric Column += Final - Current;
10040b57cec5SDimitry Andric Current = Final;
10050b57cec5SDimitry Andric }
10060b57cec5SDimitry Andric
is_ns_hex_digit(const char C)1007e8d8bef9SDimitry Andric static bool is_ns_hex_digit(const char C) { return isAlnum(C); }
10080b57cec5SDimitry Andric
is_ns_word_char(const char C)1009e8d8bef9SDimitry Andric static bool is_ns_word_char(const char C) { return C == '-' || isAlpha(C); }
10100b57cec5SDimitry Andric
scan_ns_uri_char()10110b57cec5SDimitry Andric void Scanner::scan_ns_uri_char() {
10120b57cec5SDimitry Andric while (true) {
10130b57cec5SDimitry Andric if (Current == End)
10140b57cec5SDimitry Andric break;
10150b57cec5SDimitry Andric if (( *Current == '%'
10160b57cec5SDimitry Andric && Current + 2 < End
10170b57cec5SDimitry Andric && is_ns_hex_digit(*(Current + 1))
10180b57cec5SDimitry Andric && is_ns_hex_digit(*(Current + 2)))
10190b57cec5SDimitry Andric || is_ns_word_char(*Current)
10200b57cec5SDimitry Andric || StringRef(Current, 1).find_first_of("#;/?:@&=+$,_.!~*'()[]")
10210b57cec5SDimitry Andric != StringRef::npos) {
10220b57cec5SDimitry Andric ++Current;
10230b57cec5SDimitry Andric ++Column;
10240b57cec5SDimitry Andric } else
10250b57cec5SDimitry Andric break;
10260b57cec5SDimitry Andric }
10270b57cec5SDimitry Andric }
10280b57cec5SDimitry Andric
consume(uint32_t Expected)10290b57cec5SDimitry Andric bool Scanner::consume(uint32_t Expected) {
1030480093f4SDimitry Andric if (Expected >= 0x80) {
10315ffd83dbSDimitry Andric setError("Cannot consume non-ascii characters", Current);
1032480093f4SDimitry Andric return false;
1033480093f4SDimitry Andric }
10340b57cec5SDimitry Andric if (Current == End)
10350b57cec5SDimitry Andric return false;
1036480093f4SDimitry Andric if (uint8_t(*Current) >= 0x80) {
10375ffd83dbSDimitry Andric setError("Cannot consume non-ascii characters", Current);
1038480093f4SDimitry Andric return false;
1039480093f4SDimitry Andric }
10400b57cec5SDimitry Andric if (uint8_t(*Current) == Expected) {
10410b57cec5SDimitry Andric ++Current;
10420b57cec5SDimitry Andric ++Column;
10430b57cec5SDimitry Andric return true;
10440b57cec5SDimitry Andric }
10450b57cec5SDimitry Andric return false;
10460b57cec5SDimitry Andric }
10470b57cec5SDimitry Andric
skip(uint32_t Distance)10480b57cec5SDimitry Andric void Scanner::skip(uint32_t Distance) {
10490b57cec5SDimitry Andric Current += Distance;
10500b57cec5SDimitry Andric Column += Distance;
10510b57cec5SDimitry Andric assert(Current <= End && "Skipped past the end");
10520b57cec5SDimitry Andric }
10530b57cec5SDimitry Andric
isBlankOrBreak(StringRef::iterator Position)10540b57cec5SDimitry Andric bool Scanner::isBlankOrBreak(StringRef::iterator Position) {
10550b57cec5SDimitry Andric if (Position == End)
10560b57cec5SDimitry Andric return false;
10570b57cec5SDimitry Andric return *Position == ' ' || *Position == '\t' || *Position == '\r' ||
10580b57cec5SDimitry Andric *Position == '\n';
10590b57cec5SDimitry Andric }
10600b57cec5SDimitry Andric
isPlainSafeNonBlank(StringRef::iterator Position)1061*5f757f3fSDimitry Andric bool Scanner::isPlainSafeNonBlank(StringRef::iterator Position) {
1062*5f757f3fSDimitry Andric if (Position == End || isBlankOrBreak(Position))
1063*5f757f3fSDimitry Andric return false;
1064*5f757f3fSDimitry Andric if (FlowLevel &&
1065*5f757f3fSDimitry Andric StringRef(Position, 1).find_first_of(",[]{}") != StringRef::npos)
1066*5f757f3fSDimitry Andric return false;
1067*5f757f3fSDimitry Andric return true;
1068*5f757f3fSDimitry Andric }
1069*5f757f3fSDimitry Andric
isLineEmpty(StringRef Line)107081ad6265SDimitry Andric bool Scanner::isLineEmpty(StringRef Line) {
107181ad6265SDimitry Andric for (const auto *Position = Line.begin(); Position != Line.end(); ++Position)
107281ad6265SDimitry Andric if (!isBlankOrBreak(Position))
107381ad6265SDimitry Andric return false;
107481ad6265SDimitry Andric return true;
107581ad6265SDimitry Andric }
107681ad6265SDimitry Andric
consumeLineBreakIfPresent()10770b57cec5SDimitry Andric bool Scanner::consumeLineBreakIfPresent() {
10780b57cec5SDimitry Andric auto Next = skip_b_break(Current);
10790b57cec5SDimitry Andric if (Next == Current)
10800b57cec5SDimitry Andric return false;
10810b57cec5SDimitry Andric Column = 0;
10820b57cec5SDimitry Andric ++Line;
10830b57cec5SDimitry Andric Current = Next;
10840b57cec5SDimitry Andric return true;
10850b57cec5SDimitry Andric }
10860b57cec5SDimitry Andric
saveSimpleKeyCandidate(TokenQueueT::iterator Tok,unsigned AtColumn,bool IsRequired)10870b57cec5SDimitry Andric void Scanner::saveSimpleKeyCandidate( TokenQueueT::iterator Tok
10880b57cec5SDimitry Andric , unsigned AtColumn
10890b57cec5SDimitry Andric , bool IsRequired) {
10900b57cec5SDimitry Andric if (IsSimpleKeyAllowed) {
10910b57cec5SDimitry Andric SimpleKey SK;
10920b57cec5SDimitry Andric SK.Tok = Tok;
10930b57cec5SDimitry Andric SK.Line = Line;
10940b57cec5SDimitry Andric SK.Column = AtColumn;
10950b57cec5SDimitry Andric SK.IsRequired = IsRequired;
10960b57cec5SDimitry Andric SK.FlowLevel = FlowLevel;
10970b57cec5SDimitry Andric SimpleKeys.push_back(SK);
10980b57cec5SDimitry Andric }
10990b57cec5SDimitry Andric }
11000b57cec5SDimitry Andric
removeStaleSimpleKeyCandidates()11010b57cec5SDimitry Andric void Scanner::removeStaleSimpleKeyCandidates() {
11020b57cec5SDimitry Andric for (SmallVectorImpl<SimpleKey>::iterator i = SimpleKeys.begin();
11030b57cec5SDimitry Andric i != SimpleKeys.end();) {
11040b57cec5SDimitry Andric if (i->Line != Line || i->Column + 1024 < Column) {
11050b57cec5SDimitry Andric if (i->IsRequired)
11060b57cec5SDimitry Andric setError( "Could not find expected : for simple key"
11070b57cec5SDimitry Andric , i->Tok->Range.begin());
11080b57cec5SDimitry Andric i = SimpleKeys.erase(i);
11090b57cec5SDimitry Andric } else
11100b57cec5SDimitry Andric ++i;
11110b57cec5SDimitry Andric }
11120b57cec5SDimitry Andric }
11130b57cec5SDimitry Andric
removeSimpleKeyCandidatesOnFlowLevel(unsigned Level)11140b57cec5SDimitry Andric void Scanner::removeSimpleKeyCandidatesOnFlowLevel(unsigned Level) {
11150b57cec5SDimitry Andric if (!SimpleKeys.empty() && (SimpleKeys.end() - 1)->FlowLevel == Level)
11160b57cec5SDimitry Andric SimpleKeys.pop_back();
11170b57cec5SDimitry Andric }
11180b57cec5SDimitry Andric
unrollIndent(int ToColumn)11190b57cec5SDimitry Andric bool Scanner::unrollIndent(int ToColumn) {
11200b57cec5SDimitry Andric Token T;
11210b57cec5SDimitry Andric // Indentation is ignored in flow.
11220b57cec5SDimitry Andric if (FlowLevel != 0)
11230b57cec5SDimitry Andric return true;
11240b57cec5SDimitry Andric
11250b57cec5SDimitry Andric while (Indent > ToColumn) {
11260b57cec5SDimitry Andric T.Kind = Token::TK_BlockEnd;
11270b57cec5SDimitry Andric T.Range = StringRef(Current, 1);
11280b57cec5SDimitry Andric TokenQueue.push_back(T);
11290b57cec5SDimitry Andric Indent = Indents.pop_back_val();
11300b57cec5SDimitry Andric }
11310b57cec5SDimitry Andric
11320b57cec5SDimitry Andric return true;
11330b57cec5SDimitry Andric }
11340b57cec5SDimitry Andric
rollIndent(int ToColumn,Token::TokenKind Kind,TokenQueueT::iterator InsertPoint)11350b57cec5SDimitry Andric bool Scanner::rollIndent( int ToColumn
11360b57cec5SDimitry Andric , Token::TokenKind Kind
11370b57cec5SDimitry Andric , TokenQueueT::iterator InsertPoint) {
11380b57cec5SDimitry Andric if (FlowLevel)
11390b57cec5SDimitry Andric return true;
11400b57cec5SDimitry Andric if (Indent < ToColumn) {
11410b57cec5SDimitry Andric Indents.push_back(Indent);
11420b57cec5SDimitry Andric Indent = ToColumn;
11430b57cec5SDimitry Andric
11440b57cec5SDimitry Andric Token T;
11450b57cec5SDimitry Andric T.Kind = Kind;
11460b57cec5SDimitry Andric T.Range = StringRef(Current, 0);
11470b57cec5SDimitry Andric TokenQueue.insert(InsertPoint, T);
11480b57cec5SDimitry Andric }
11490b57cec5SDimitry Andric return true;
11500b57cec5SDimitry Andric }
11510b57cec5SDimitry Andric
skipComment()11520b57cec5SDimitry Andric void Scanner::skipComment() {
1153e8d8bef9SDimitry Andric if (Current == End || *Current != '#')
11540b57cec5SDimitry Andric return;
11550b57cec5SDimitry Andric while (true) {
11560b57cec5SDimitry Andric // This may skip more than one byte, thus Column is only incremented
11570b57cec5SDimitry Andric // for code points.
11580b57cec5SDimitry Andric StringRef::iterator I = skip_nb_char(Current);
11590b57cec5SDimitry Andric if (I == Current)
11600b57cec5SDimitry Andric break;
11610b57cec5SDimitry Andric Current = I;
11620b57cec5SDimitry Andric ++Column;
11630b57cec5SDimitry Andric }
11640b57cec5SDimitry Andric }
11650b57cec5SDimitry Andric
scanToNextToken()11660b57cec5SDimitry Andric void Scanner::scanToNextToken() {
11670b57cec5SDimitry Andric while (true) {
1168e8d8bef9SDimitry Andric while (Current != End && (*Current == ' ' || *Current == '\t')) {
11690b57cec5SDimitry Andric skip(1);
11700b57cec5SDimitry Andric }
11710b57cec5SDimitry Andric
11720b57cec5SDimitry Andric skipComment();
11730b57cec5SDimitry Andric
11740b57cec5SDimitry Andric // Skip EOL.
11750b57cec5SDimitry Andric StringRef::iterator i = skip_b_break(Current);
11760b57cec5SDimitry Andric if (i == Current)
11770b57cec5SDimitry Andric break;
11780b57cec5SDimitry Andric Current = i;
11790b57cec5SDimitry Andric ++Line;
11800b57cec5SDimitry Andric Column = 0;
11810b57cec5SDimitry Andric // New lines may start a simple key.
11820b57cec5SDimitry Andric if (!FlowLevel)
11830b57cec5SDimitry Andric IsSimpleKeyAllowed = true;
11840b57cec5SDimitry Andric }
11850b57cec5SDimitry Andric }
11860b57cec5SDimitry Andric
scanStreamStart()11870b57cec5SDimitry Andric bool Scanner::scanStreamStart() {
11880b57cec5SDimitry Andric IsStartOfStream = false;
11890b57cec5SDimitry Andric
11900b57cec5SDimitry Andric EncodingInfo EI = getUnicodeEncoding(currentInput());
11910b57cec5SDimitry Andric
11920b57cec5SDimitry Andric Token T;
11930b57cec5SDimitry Andric T.Kind = Token::TK_StreamStart;
11940b57cec5SDimitry Andric T.Range = StringRef(Current, EI.second);
11950b57cec5SDimitry Andric TokenQueue.push_back(T);
11960b57cec5SDimitry Andric Current += EI.second;
11970b57cec5SDimitry Andric return true;
11980b57cec5SDimitry Andric }
11990b57cec5SDimitry Andric
scanStreamEnd()12000b57cec5SDimitry Andric bool Scanner::scanStreamEnd() {
12010b57cec5SDimitry Andric // Force an ending new line if one isn't present.
12020b57cec5SDimitry Andric if (Column != 0) {
12030b57cec5SDimitry Andric Column = 0;
12040b57cec5SDimitry Andric ++Line;
12050b57cec5SDimitry Andric }
12060b57cec5SDimitry Andric
12070b57cec5SDimitry Andric unrollIndent(-1);
12080b57cec5SDimitry Andric SimpleKeys.clear();
12090b57cec5SDimitry Andric IsSimpleKeyAllowed = false;
1210*5f757f3fSDimitry Andric IsAdjacentValueAllowedInFlow = false;
12110b57cec5SDimitry Andric
12120b57cec5SDimitry Andric Token T;
12130b57cec5SDimitry Andric T.Kind = Token::TK_StreamEnd;
12140b57cec5SDimitry Andric T.Range = StringRef(Current, 0);
12150b57cec5SDimitry Andric TokenQueue.push_back(T);
12160b57cec5SDimitry Andric return true;
12170b57cec5SDimitry Andric }
12180b57cec5SDimitry Andric
scanDirective()12190b57cec5SDimitry Andric bool Scanner::scanDirective() {
12200b57cec5SDimitry Andric // Reset the indentation level.
12210b57cec5SDimitry Andric unrollIndent(-1);
12220b57cec5SDimitry Andric SimpleKeys.clear();
12230b57cec5SDimitry Andric IsSimpleKeyAllowed = false;
1224*5f757f3fSDimitry Andric IsAdjacentValueAllowedInFlow = false;
12250b57cec5SDimitry Andric
12260b57cec5SDimitry Andric StringRef::iterator Start = Current;
12270b57cec5SDimitry Andric consume('%');
12280b57cec5SDimitry Andric StringRef::iterator NameStart = Current;
12290b57cec5SDimitry Andric Current = skip_while(&Scanner::skip_ns_char, Current);
12300b57cec5SDimitry Andric StringRef Name(NameStart, Current - NameStart);
12310b57cec5SDimitry Andric Current = skip_while(&Scanner::skip_s_white, Current);
12320b57cec5SDimitry Andric
12330b57cec5SDimitry Andric Token T;
12340b57cec5SDimitry Andric if (Name == "YAML") {
12350b57cec5SDimitry Andric Current = skip_while(&Scanner::skip_ns_char, Current);
12360b57cec5SDimitry Andric T.Kind = Token::TK_VersionDirective;
12370b57cec5SDimitry Andric T.Range = StringRef(Start, Current - Start);
12380b57cec5SDimitry Andric TokenQueue.push_back(T);
12390b57cec5SDimitry Andric return true;
12400b57cec5SDimitry Andric } else if(Name == "TAG") {
12410b57cec5SDimitry Andric Current = skip_while(&Scanner::skip_ns_char, Current);
12420b57cec5SDimitry Andric Current = skip_while(&Scanner::skip_s_white, Current);
12430b57cec5SDimitry Andric Current = skip_while(&Scanner::skip_ns_char, Current);
12440b57cec5SDimitry Andric T.Kind = Token::TK_TagDirective;
12450b57cec5SDimitry Andric T.Range = StringRef(Start, Current - Start);
12460b57cec5SDimitry Andric TokenQueue.push_back(T);
12470b57cec5SDimitry Andric return true;
12480b57cec5SDimitry Andric }
12490b57cec5SDimitry Andric return false;
12500b57cec5SDimitry Andric }
12510b57cec5SDimitry Andric
scanDocumentIndicator(bool IsStart)12520b57cec5SDimitry Andric bool Scanner::scanDocumentIndicator(bool IsStart) {
12530b57cec5SDimitry Andric unrollIndent(-1);
12540b57cec5SDimitry Andric SimpleKeys.clear();
12550b57cec5SDimitry Andric IsSimpleKeyAllowed = false;
1256*5f757f3fSDimitry Andric IsAdjacentValueAllowedInFlow = false;
12570b57cec5SDimitry Andric
12580b57cec5SDimitry Andric Token T;
12590b57cec5SDimitry Andric T.Kind = IsStart ? Token::TK_DocumentStart : Token::TK_DocumentEnd;
12600b57cec5SDimitry Andric T.Range = StringRef(Current, 3);
12610b57cec5SDimitry Andric skip(3);
12620b57cec5SDimitry Andric TokenQueue.push_back(T);
12630b57cec5SDimitry Andric return true;
12640b57cec5SDimitry Andric }
12650b57cec5SDimitry Andric
scanFlowCollectionStart(bool IsSequence)12660b57cec5SDimitry Andric bool Scanner::scanFlowCollectionStart(bool IsSequence) {
12670b57cec5SDimitry Andric Token T;
12680b57cec5SDimitry Andric T.Kind = IsSequence ? Token::TK_FlowSequenceStart
12690b57cec5SDimitry Andric : Token::TK_FlowMappingStart;
12700b57cec5SDimitry Andric T.Range = StringRef(Current, 1);
12710b57cec5SDimitry Andric skip(1);
12720b57cec5SDimitry Andric TokenQueue.push_back(T);
12730b57cec5SDimitry Andric
12740b57cec5SDimitry Andric // [ and { may begin a simple key.
12750b57cec5SDimitry Andric saveSimpleKeyCandidate(--TokenQueue.end(), Column - 1, false);
12760b57cec5SDimitry Andric
12770b57cec5SDimitry Andric // And may also be followed by a simple key.
12780b57cec5SDimitry Andric IsSimpleKeyAllowed = true;
1279*5f757f3fSDimitry Andric // Adjacent values are allowed in flows only after JSON-style keys.
1280*5f757f3fSDimitry Andric IsAdjacentValueAllowedInFlow = false;
12810b57cec5SDimitry Andric ++FlowLevel;
12820b57cec5SDimitry Andric return true;
12830b57cec5SDimitry Andric }
12840b57cec5SDimitry Andric
scanFlowCollectionEnd(bool IsSequence)12850b57cec5SDimitry Andric bool Scanner::scanFlowCollectionEnd(bool IsSequence) {
12860b57cec5SDimitry Andric removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
12870b57cec5SDimitry Andric IsSimpleKeyAllowed = false;
1288*5f757f3fSDimitry Andric IsAdjacentValueAllowedInFlow = true;
12890b57cec5SDimitry Andric Token T;
12900b57cec5SDimitry Andric T.Kind = IsSequence ? Token::TK_FlowSequenceEnd
12910b57cec5SDimitry Andric : Token::TK_FlowMappingEnd;
12920b57cec5SDimitry Andric T.Range = StringRef(Current, 1);
12930b57cec5SDimitry Andric skip(1);
12940b57cec5SDimitry Andric TokenQueue.push_back(T);
12950b57cec5SDimitry Andric if (FlowLevel)
12960b57cec5SDimitry Andric --FlowLevel;
12970b57cec5SDimitry Andric return true;
12980b57cec5SDimitry Andric }
12990b57cec5SDimitry Andric
scanFlowEntry()13000b57cec5SDimitry Andric bool Scanner::scanFlowEntry() {
13010b57cec5SDimitry Andric removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
13020b57cec5SDimitry Andric IsSimpleKeyAllowed = true;
1303*5f757f3fSDimitry Andric IsAdjacentValueAllowedInFlow = false;
13040b57cec5SDimitry Andric Token T;
13050b57cec5SDimitry Andric T.Kind = Token::TK_FlowEntry;
13060b57cec5SDimitry Andric T.Range = StringRef(Current, 1);
13070b57cec5SDimitry Andric skip(1);
13080b57cec5SDimitry Andric TokenQueue.push_back(T);
13090b57cec5SDimitry Andric return true;
13100b57cec5SDimitry Andric }
13110b57cec5SDimitry Andric
scanBlockEntry()13120b57cec5SDimitry Andric bool Scanner::scanBlockEntry() {
13130b57cec5SDimitry Andric rollIndent(Column, Token::TK_BlockSequenceStart, TokenQueue.end());
13140b57cec5SDimitry Andric removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
13150b57cec5SDimitry Andric IsSimpleKeyAllowed = true;
1316*5f757f3fSDimitry Andric IsAdjacentValueAllowedInFlow = false;
13170b57cec5SDimitry Andric Token T;
13180b57cec5SDimitry Andric T.Kind = Token::TK_BlockEntry;
13190b57cec5SDimitry Andric T.Range = StringRef(Current, 1);
13200b57cec5SDimitry Andric skip(1);
13210b57cec5SDimitry Andric TokenQueue.push_back(T);
13220b57cec5SDimitry Andric return true;
13230b57cec5SDimitry Andric }
13240b57cec5SDimitry Andric
scanKey()13250b57cec5SDimitry Andric bool Scanner::scanKey() {
13260b57cec5SDimitry Andric if (!FlowLevel)
13270b57cec5SDimitry Andric rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end());
13280b57cec5SDimitry Andric
13290b57cec5SDimitry Andric removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
13300b57cec5SDimitry Andric IsSimpleKeyAllowed = !FlowLevel;
1331*5f757f3fSDimitry Andric IsAdjacentValueAllowedInFlow = false;
13320b57cec5SDimitry Andric
13330b57cec5SDimitry Andric Token T;
13340b57cec5SDimitry Andric T.Kind = Token::TK_Key;
13350b57cec5SDimitry Andric T.Range = StringRef(Current, 1);
13360b57cec5SDimitry Andric skip(1);
13370b57cec5SDimitry Andric TokenQueue.push_back(T);
13380b57cec5SDimitry Andric return true;
13390b57cec5SDimitry Andric }
13400b57cec5SDimitry Andric
scanValue()13410b57cec5SDimitry Andric bool Scanner::scanValue() {
13420b57cec5SDimitry Andric // If the previous token could have been a simple key, insert the key token
13430b57cec5SDimitry Andric // into the token queue.
13440b57cec5SDimitry Andric if (!SimpleKeys.empty()) {
13450b57cec5SDimitry Andric SimpleKey SK = SimpleKeys.pop_back_val();
13460b57cec5SDimitry Andric Token T;
13470b57cec5SDimitry Andric T.Kind = Token::TK_Key;
13480b57cec5SDimitry Andric T.Range = SK.Tok->Range;
13490b57cec5SDimitry Andric TokenQueueT::iterator i, e;
13500b57cec5SDimitry Andric for (i = TokenQueue.begin(), e = TokenQueue.end(); i != e; ++i) {
13510b57cec5SDimitry Andric if (i == SK.Tok)
13520b57cec5SDimitry Andric break;
13530b57cec5SDimitry Andric }
1354480093f4SDimitry Andric if (i == e) {
1355480093f4SDimitry Andric Failed = true;
1356480093f4SDimitry Andric return false;
1357480093f4SDimitry Andric }
13580b57cec5SDimitry Andric i = TokenQueue.insert(i, T);
13590b57cec5SDimitry Andric
13600b57cec5SDimitry Andric // We may also need to add a Block-Mapping-Start token.
13610b57cec5SDimitry Andric rollIndent(SK.Column, Token::TK_BlockMappingStart, i);
13620b57cec5SDimitry Andric
13630b57cec5SDimitry Andric IsSimpleKeyAllowed = false;
13640b57cec5SDimitry Andric } else {
13650b57cec5SDimitry Andric if (!FlowLevel)
13660b57cec5SDimitry Andric rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end());
13670b57cec5SDimitry Andric IsSimpleKeyAllowed = !FlowLevel;
13680b57cec5SDimitry Andric }
1369*5f757f3fSDimitry Andric IsAdjacentValueAllowedInFlow = false;
13700b57cec5SDimitry Andric
13710b57cec5SDimitry Andric Token T;
13720b57cec5SDimitry Andric T.Kind = Token::TK_Value;
13730b57cec5SDimitry Andric T.Range = StringRef(Current, 1);
13740b57cec5SDimitry Andric skip(1);
13750b57cec5SDimitry Andric TokenQueue.push_back(T);
13760b57cec5SDimitry Andric return true;
13770b57cec5SDimitry Andric }
13780b57cec5SDimitry Andric
13790b57cec5SDimitry Andric // Forbidding inlining improves performance by roughly 20%.
13800b57cec5SDimitry Andric // FIXME: Remove once llvm optimizes this to the faster version without hints.
13810b57cec5SDimitry Andric LLVM_ATTRIBUTE_NOINLINE static bool
13820b57cec5SDimitry Andric wasEscaped(StringRef::iterator First, StringRef::iterator Position);
13830b57cec5SDimitry Andric
13840b57cec5SDimitry Andric // Returns whether a character at 'Position' was escaped with a leading '\'.
13850b57cec5SDimitry Andric // 'First' specifies the position of the first character in the string.
wasEscaped(StringRef::iterator First,StringRef::iterator Position)13860b57cec5SDimitry Andric static bool wasEscaped(StringRef::iterator First,
13870b57cec5SDimitry Andric StringRef::iterator Position) {
13880b57cec5SDimitry Andric assert(Position - 1 >= First);
13890b57cec5SDimitry Andric StringRef::iterator I = Position - 1;
13900b57cec5SDimitry Andric // We calculate the number of consecutive '\'s before the current position
13910b57cec5SDimitry Andric // by iterating backwards through our string.
13920b57cec5SDimitry Andric while (I >= First && *I == '\\') --I;
13930b57cec5SDimitry Andric // (Position - 1 - I) now contains the number of '\'s before the current
13940b57cec5SDimitry Andric // position. If it is odd, the character at 'Position' was escaped.
13950b57cec5SDimitry Andric return (Position - 1 - I) % 2 == 1;
13960b57cec5SDimitry Andric }
13970b57cec5SDimitry Andric
scanFlowScalar(bool IsDoubleQuoted)13980b57cec5SDimitry Andric bool Scanner::scanFlowScalar(bool IsDoubleQuoted) {
13990b57cec5SDimitry Andric StringRef::iterator Start = Current;
14000b57cec5SDimitry Andric unsigned ColStart = Column;
14010b57cec5SDimitry Andric if (IsDoubleQuoted) {
14020b57cec5SDimitry Andric do {
14030b57cec5SDimitry Andric ++Current;
14040b57cec5SDimitry Andric while (Current != End && *Current != '"')
14050b57cec5SDimitry Andric ++Current;
14060b57cec5SDimitry Andric // Repeat until the previous character was not a '\' or was an escaped
14070b57cec5SDimitry Andric // backslash.
14080b57cec5SDimitry Andric } while ( Current != End
14090b57cec5SDimitry Andric && *(Current - 1) == '\\'
14100b57cec5SDimitry Andric && wasEscaped(Start + 1, Current));
14110b57cec5SDimitry Andric } else {
14120b57cec5SDimitry Andric skip(1);
1413e8d8bef9SDimitry Andric while (Current != End) {
14140b57cec5SDimitry Andric // Skip a ' followed by another '.
14150b57cec5SDimitry Andric if (Current + 1 < End && *Current == '\'' && *(Current + 1) == '\'') {
14160b57cec5SDimitry Andric skip(2);
14170b57cec5SDimitry Andric continue;
14180b57cec5SDimitry Andric } else if (*Current == '\'')
14190b57cec5SDimitry Andric break;
14200b57cec5SDimitry Andric StringRef::iterator i = skip_nb_char(Current);
14210b57cec5SDimitry Andric if (i == Current) {
14220b57cec5SDimitry Andric i = skip_b_break(Current);
14230b57cec5SDimitry Andric if (i == Current)
14240b57cec5SDimitry Andric break;
14250b57cec5SDimitry Andric Current = i;
14260b57cec5SDimitry Andric Column = 0;
14270b57cec5SDimitry Andric ++Line;
14280b57cec5SDimitry Andric } else {
14290b57cec5SDimitry Andric if (i == End)
14300b57cec5SDimitry Andric break;
14310b57cec5SDimitry Andric Current = i;
14320b57cec5SDimitry Andric ++Column;
14330b57cec5SDimitry Andric }
14340b57cec5SDimitry Andric }
14350b57cec5SDimitry Andric }
14360b57cec5SDimitry Andric
14370b57cec5SDimitry Andric if (Current == End) {
14380b57cec5SDimitry Andric setError("Expected quote at end of scalar", Current);
14390b57cec5SDimitry Andric return false;
14400b57cec5SDimitry Andric }
14410b57cec5SDimitry Andric
14420b57cec5SDimitry Andric skip(1); // Skip ending quote.
14430b57cec5SDimitry Andric Token T;
14440b57cec5SDimitry Andric T.Kind = Token::TK_Scalar;
14450b57cec5SDimitry Andric T.Range = StringRef(Start, Current - Start);
14460b57cec5SDimitry Andric TokenQueue.push_back(T);
14470b57cec5SDimitry Andric
14480b57cec5SDimitry Andric saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);
14490b57cec5SDimitry Andric
14500b57cec5SDimitry Andric IsSimpleKeyAllowed = false;
1451*5f757f3fSDimitry Andric IsAdjacentValueAllowedInFlow = true;
14520b57cec5SDimitry Andric
14530b57cec5SDimitry Andric return true;
14540b57cec5SDimitry Andric }
14550b57cec5SDimitry Andric
scanPlainScalar()14560b57cec5SDimitry Andric bool Scanner::scanPlainScalar() {
14570b57cec5SDimitry Andric StringRef::iterator Start = Current;
14580b57cec5SDimitry Andric unsigned ColStart = Column;
14590b57cec5SDimitry Andric unsigned LeadingBlanks = 0;
14600b57cec5SDimitry Andric assert(Indent >= -1 && "Indent must be >= -1 !");
14610b57cec5SDimitry Andric unsigned indent = static_cast<unsigned>(Indent + 1);
1462e8d8bef9SDimitry Andric while (Current != End) {
14630b57cec5SDimitry Andric if (*Current == '#')
14640b57cec5SDimitry Andric break;
14650b57cec5SDimitry Andric
1466*5f757f3fSDimitry Andric while (Current != End &&
1467*5f757f3fSDimitry Andric ((*Current != ':' && isPlainSafeNonBlank(Current)) ||
1468*5f757f3fSDimitry Andric (*Current == ':' && isPlainSafeNonBlank(Current + 1)))) {
14690b57cec5SDimitry Andric StringRef::iterator i = skip_nb_char(Current);
14700b57cec5SDimitry Andric if (i == Current)
14710b57cec5SDimitry Andric break;
14720b57cec5SDimitry Andric Current = i;
14730b57cec5SDimitry Andric ++Column;
14740b57cec5SDimitry Andric }
14750b57cec5SDimitry Andric
14760b57cec5SDimitry Andric // Are we at the end?
14770b57cec5SDimitry Andric if (!isBlankOrBreak(Current))
14780b57cec5SDimitry Andric break;
14790b57cec5SDimitry Andric
14800b57cec5SDimitry Andric // Eat blanks.
14810b57cec5SDimitry Andric StringRef::iterator Tmp = Current;
14820b57cec5SDimitry Andric while (isBlankOrBreak(Tmp)) {
14830b57cec5SDimitry Andric StringRef::iterator i = skip_s_white(Tmp);
14840b57cec5SDimitry Andric if (i != Tmp) {
14850b57cec5SDimitry Andric if (LeadingBlanks && (Column < indent) && *Tmp == '\t') {
14860b57cec5SDimitry Andric setError("Found invalid tab character in indentation", Tmp);
14870b57cec5SDimitry Andric return false;
14880b57cec5SDimitry Andric }
14890b57cec5SDimitry Andric Tmp = i;
14900b57cec5SDimitry Andric ++Column;
14910b57cec5SDimitry Andric } else {
14920b57cec5SDimitry Andric i = skip_b_break(Tmp);
14930b57cec5SDimitry Andric if (!LeadingBlanks)
14940b57cec5SDimitry Andric LeadingBlanks = 1;
14950b57cec5SDimitry Andric Tmp = i;
14960b57cec5SDimitry Andric Column = 0;
14970b57cec5SDimitry Andric ++Line;
14980b57cec5SDimitry Andric }
14990b57cec5SDimitry Andric }
15000b57cec5SDimitry Andric
15010b57cec5SDimitry Andric if (!FlowLevel && Column < indent)
15020b57cec5SDimitry Andric break;
15030b57cec5SDimitry Andric
15040b57cec5SDimitry Andric Current = Tmp;
15050b57cec5SDimitry Andric }
15060b57cec5SDimitry Andric if (Start == Current) {
15070b57cec5SDimitry Andric setError("Got empty plain scalar", Start);
15080b57cec5SDimitry Andric return false;
15090b57cec5SDimitry Andric }
15100b57cec5SDimitry Andric Token T;
15110b57cec5SDimitry Andric T.Kind = Token::TK_Scalar;
15120b57cec5SDimitry Andric T.Range = StringRef(Start, Current - Start);
15130b57cec5SDimitry Andric TokenQueue.push_back(T);
15140b57cec5SDimitry Andric
15150b57cec5SDimitry Andric // Plain scalars can be simple keys.
15160b57cec5SDimitry Andric saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);
15170b57cec5SDimitry Andric
15180b57cec5SDimitry Andric IsSimpleKeyAllowed = false;
1519*5f757f3fSDimitry Andric IsAdjacentValueAllowedInFlow = false;
15200b57cec5SDimitry Andric
15210b57cec5SDimitry Andric return true;
15220b57cec5SDimitry Andric }
15230b57cec5SDimitry Andric
scanAliasOrAnchor(bool IsAlias)15240b57cec5SDimitry Andric bool Scanner::scanAliasOrAnchor(bool IsAlias) {
15250b57cec5SDimitry Andric StringRef::iterator Start = Current;
15260b57cec5SDimitry Andric unsigned ColStart = Column;
15270b57cec5SDimitry Andric skip(1);
1528e8d8bef9SDimitry Andric while (Current != End) {
15290b57cec5SDimitry Andric if ( *Current == '[' || *Current == ']'
15300b57cec5SDimitry Andric || *Current == '{' || *Current == '}'
15310b57cec5SDimitry Andric || *Current == ','
15320b57cec5SDimitry Andric || *Current == ':')
15330b57cec5SDimitry Andric break;
15340b57cec5SDimitry Andric StringRef::iterator i = skip_ns_char(Current);
15350b57cec5SDimitry Andric if (i == Current)
15360b57cec5SDimitry Andric break;
15370b57cec5SDimitry Andric Current = i;
15380b57cec5SDimitry Andric ++Column;
15390b57cec5SDimitry Andric }
15400b57cec5SDimitry Andric
1541e8d8bef9SDimitry Andric if (Start + 1 == Current) {
15420b57cec5SDimitry Andric setError("Got empty alias or anchor", Start);
15430b57cec5SDimitry Andric return false;
15440b57cec5SDimitry Andric }
15450b57cec5SDimitry Andric
15460b57cec5SDimitry Andric Token T;
15470b57cec5SDimitry Andric T.Kind = IsAlias ? Token::TK_Alias : Token::TK_Anchor;
15480b57cec5SDimitry Andric T.Range = StringRef(Start, Current - Start);
15490b57cec5SDimitry Andric TokenQueue.push_back(T);
15500b57cec5SDimitry Andric
15510b57cec5SDimitry Andric // Alias and anchors can be simple keys.
15520b57cec5SDimitry Andric saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);
15530b57cec5SDimitry Andric
15540b57cec5SDimitry Andric IsSimpleKeyAllowed = false;
1555*5f757f3fSDimitry Andric IsAdjacentValueAllowedInFlow = false;
15560b57cec5SDimitry Andric
15570b57cec5SDimitry Andric return true;
15580b57cec5SDimitry Andric }
15590b57cec5SDimitry Andric
scanBlockScalarIndicators(char & StyleIndicator,char & ChompingIndicator,unsigned & IndentIndicator,bool & IsDone)156081ad6265SDimitry Andric bool Scanner::scanBlockScalarIndicators(char &StyleIndicator,
156181ad6265SDimitry Andric char &ChompingIndicator,
156281ad6265SDimitry Andric unsigned &IndentIndicator,
156381ad6265SDimitry Andric bool &IsDone) {
156481ad6265SDimitry Andric StyleIndicator = scanBlockStyleIndicator();
156581ad6265SDimitry Andric if (!scanBlockScalarHeader(ChompingIndicator, IndentIndicator, IsDone))
156681ad6265SDimitry Andric return false;
156781ad6265SDimitry Andric return true;
156881ad6265SDimitry Andric }
156981ad6265SDimitry Andric
scanBlockStyleIndicator()157081ad6265SDimitry Andric char Scanner::scanBlockStyleIndicator() {
157181ad6265SDimitry Andric char Indicator = ' ';
157281ad6265SDimitry Andric if (Current != End && (*Current == '>' || *Current == '|')) {
157381ad6265SDimitry Andric Indicator = *Current;
157481ad6265SDimitry Andric skip(1);
157581ad6265SDimitry Andric }
157681ad6265SDimitry Andric return Indicator;
157781ad6265SDimitry Andric }
157881ad6265SDimitry Andric
scanBlockChompingIndicator()15790b57cec5SDimitry Andric char Scanner::scanBlockChompingIndicator() {
15800b57cec5SDimitry Andric char Indicator = ' ';
15810b57cec5SDimitry Andric if (Current != End && (*Current == '+' || *Current == '-')) {
15820b57cec5SDimitry Andric Indicator = *Current;
15830b57cec5SDimitry Andric skip(1);
15840b57cec5SDimitry Andric }
15850b57cec5SDimitry Andric return Indicator;
15860b57cec5SDimitry Andric }
15870b57cec5SDimitry Andric
15880b57cec5SDimitry Andric /// Get the number of line breaks after chomping.
15890b57cec5SDimitry Andric ///
15900b57cec5SDimitry Andric /// Return the number of trailing line breaks to emit, depending on
15910b57cec5SDimitry Andric /// \p ChompingIndicator.
getChompedLineBreaks(char ChompingIndicator,unsigned LineBreaks,StringRef Str)15920b57cec5SDimitry Andric static unsigned getChompedLineBreaks(char ChompingIndicator,
15930b57cec5SDimitry Andric unsigned LineBreaks, StringRef Str) {
15940b57cec5SDimitry Andric if (ChompingIndicator == '-') // Strip all line breaks.
15950b57cec5SDimitry Andric return 0;
15960b57cec5SDimitry Andric if (ChompingIndicator == '+') // Keep all line breaks.
15970b57cec5SDimitry Andric return LineBreaks;
15980b57cec5SDimitry Andric // Clip trailing lines.
15990b57cec5SDimitry Andric return Str.empty() ? 0 : 1;
16000b57cec5SDimitry Andric }
16010b57cec5SDimitry Andric
scanBlockIndentationIndicator()16020b57cec5SDimitry Andric unsigned Scanner::scanBlockIndentationIndicator() {
16030b57cec5SDimitry Andric unsigned Indent = 0;
16040b57cec5SDimitry Andric if (Current != End && (*Current >= '1' && *Current <= '9')) {
16050b57cec5SDimitry Andric Indent = unsigned(*Current - '0');
16060b57cec5SDimitry Andric skip(1);
16070b57cec5SDimitry Andric }
16080b57cec5SDimitry Andric return Indent;
16090b57cec5SDimitry Andric }
16100b57cec5SDimitry Andric
scanBlockScalarHeader(char & ChompingIndicator,unsigned & IndentIndicator,bool & IsDone)16110b57cec5SDimitry Andric bool Scanner::scanBlockScalarHeader(char &ChompingIndicator,
16120b57cec5SDimitry Andric unsigned &IndentIndicator, bool &IsDone) {
16130b57cec5SDimitry Andric auto Start = Current;
16140b57cec5SDimitry Andric
16150b57cec5SDimitry Andric ChompingIndicator = scanBlockChompingIndicator();
16160b57cec5SDimitry Andric IndentIndicator = scanBlockIndentationIndicator();
16170b57cec5SDimitry Andric // Check for the chomping indicator once again.
16180b57cec5SDimitry Andric if (ChompingIndicator == ' ')
16190b57cec5SDimitry Andric ChompingIndicator = scanBlockChompingIndicator();
16200b57cec5SDimitry Andric Current = skip_while(&Scanner::skip_s_white, Current);
16210b57cec5SDimitry Andric skipComment();
16220b57cec5SDimitry Andric
16230b57cec5SDimitry Andric if (Current == End) { // EOF, we have an empty scalar.
16240b57cec5SDimitry Andric Token T;
16250b57cec5SDimitry Andric T.Kind = Token::TK_BlockScalar;
16260b57cec5SDimitry Andric T.Range = StringRef(Start, Current - Start);
16270b57cec5SDimitry Andric TokenQueue.push_back(T);
16280b57cec5SDimitry Andric IsDone = true;
16290b57cec5SDimitry Andric return true;
16300b57cec5SDimitry Andric }
16310b57cec5SDimitry Andric
16320b57cec5SDimitry Andric if (!consumeLineBreakIfPresent()) {
16330b57cec5SDimitry Andric setError("Expected a line break after block scalar header", Current);
16340b57cec5SDimitry Andric return false;
16350b57cec5SDimitry Andric }
16360b57cec5SDimitry Andric return true;
16370b57cec5SDimitry Andric }
16380b57cec5SDimitry Andric
findBlockScalarIndent(unsigned & BlockIndent,unsigned BlockExitIndent,unsigned & LineBreaks,bool & IsDone)16390b57cec5SDimitry Andric bool Scanner::findBlockScalarIndent(unsigned &BlockIndent,
16400b57cec5SDimitry Andric unsigned BlockExitIndent,
16410b57cec5SDimitry Andric unsigned &LineBreaks, bool &IsDone) {
16420b57cec5SDimitry Andric unsigned MaxAllSpaceLineCharacters = 0;
16430b57cec5SDimitry Andric StringRef::iterator LongestAllSpaceLine;
16440b57cec5SDimitry Andric
16450b57cec5SDimitry Andric while (true) {
16460b57cec5SDimitry Andric advanceWhile(&Scanner::skip_s_space);
16470b57cec5SDimitry Andric if (skip_nb_char(Current) != Current) {
16480b57cec5SDimitry Andric // This line isn't empty, so try and find the indentation.
16490b57cec5SDimitry Andric if (Column <= BlockExitIndent) { // End of the block literal.
16500b57cec5SDimitry Andric IsDone = true;
16510b57cec5SDimitry Andric return true;
16520b57cec5SDimitry Andric }
16530b57cec5SDimitry Andric // We found the block's indentation.
16540b57cec5SDimitry Andric BlockIndent = Column;
16550b57cec5SDimitry Andric if (MaxAllSpaceLineCharacters > BlockIndent) {
16560b57cec5SDimitry Andric setError(
16570b57cec5SDimitry Andric "Leading all-spaces line must be smaller than the block indent",
16580b57cec5SDimitry Andric LongestAllSpaceLine);
16590b57cec5SDimitry Andric return false;
16600b57cec5SDimitry Andric }
16610b57cec5SDimitry Andric return true;
16620b57cec5SDimitry Andric }
16630b57cec5SDimitry Andric if (skip_b_break(Current) != Current &&
16640b57cec5SDimitry Andric Column > MaxAllSpaceLineCharacters) {
16650b57cec5SDimitry Andric // Record the longest all-space line in case it's longer than the
16660b57cec5SDimitry Andric // discovered block indent.
16670b57cec5SDimitry Andric MaxAllSpaceLineCharacters = Column;
16680b57cec5SDimitry Andric LongestAllSpaceLine = Current;
16690b57cec5SDimitry Andric }
16700b57cec5SDimitry Andric
16710b57cec5SDimitry Andric // Check for EOF.
16720b57cec5SDimitry Andric if (Current == End) {
16730b57cec5SDimitry Andric IsDone = true;
16740b57cec5SDimitry Andric return true;
16750b57cec5SDimitry Andric }
16760b57cec5SDimitry Andric
16770b57cec5SDimitry Andric if (!consumeLineBreakIfPresent()) {
16780b57cec5SDimitry Andric IsDone = true;
16790b57cec5SDimitry Andric return true;
16800b57cec5SDimitry Andric }
16810b57cec5SDimitry Andric ++LineBreaks;
16820b57cec5SDimitry Andric }
16830b57cec5SDimitry Andric return true;
16840b57cec5SDimitry Andric }
16850b57cec5SDimitry Andric
scanBlockScalarIndent(unsigned BlockIndent,unsigned BlockExitIndent,bool & IsDone)16860b57cec5SDimitry Andric bool Scanner::scanBlockScalarIndent(unsigned BlockIndent,
16870b57cec5SDimitry Andric unsigned BlockExitIndent, bool &IsDone) {
16880b57cec5SDimitry Andric // Skip the indentation.
16890b57cec5SDimitry Andric while (Column < BlockIndent) {
16900b57cec5SDimitry Andric auto I = skip_s_space(Current);
16910b57cec5SDimitry Andric if (I == Current)
16920b57cec5SDimitry Andric break;
16930b57cec5SDimitry Andric Current = I;
16940b57cec5SDimitry Andric ++Column;
16950b57cec5SDimitry Andric }
16960b57cec5SDimitry Andric
16970b57cec5SDimitry Andric if (skip_nb_char(Current) == Current)
16980b57cec5SDimitry Andric return true;
16990b57cec5SDimitry Andric
17000b57cec5SDimitry Andric if (Column <= BlockExitIndent) { // End of the block literal.
17010b57cec5SDimitry Andric IsDone = true;
17020b57cec5SDimitry Andric return true;
17030b57cec5SDimitry Andric }
17040b57cec5SDimitry Andric
17050b57cec5SDimitry Andric if (Column < BlockIndent) {
17060b57cec5SDimitry Andric if (Current != End && *Current == '#') { // Trailing comment.
17070b57cec5SDimitry Andric IsDone = true;
17080b57cec5SDimitry Andric return true;
17090b57cec5SDimitry Andric }
17100b57cec5SDimitry Andric setError("A text line is less indented than the block scalar", Current);
17110b57cec5SDimitry Andric return false;
17120b57cec5SDimitry Andric }
17130b57cec5SDimitry Andric return true; // A normal text line.
17140b57cec5SDimitry Andric }
17150b57cec5SDimitry Andric
scanBlockScalar(bool IsLiteral)17160b57cec5SDimitry Andric bool Scanner::scanBlockScalar(bool IsLiteral) {
17170b57cec5SDimitry Andric assert(*Current == '|' || *Current == '>');
171881ad6265SDimitry Andric char StyleIndicator;
17190b57cec5SDimitry Andric char ChompingIndicator;
17200b57cec5SDimitry Andric unsigned BlockIndent;
17210b57cec5SDimitry Andric bool IsDone = false;
172281ad6265SDimitry Andric if (!scanBlockScalarIndicators(StyleIndicator, ChompingIndicator, BlockIndent,
172381ad6265SDimitry Andric IsDone))
17240b57cec5SDimitry Andric return false;
17250b57cec5SDimitry Andric if (IsDone)
17260b57cec5SDimitry Andric return true;
172781ad6265SDimitry Andric bool IsFolded = StyleIndicator == '>';
17280b57cec5SDimitry Andric
172981ad6265SDimitry Andric const auto *Start = Current;
17300b57cec5SDimitry Andric unsigned BlockExitIndent = Indent < 0 ? 0 : (unsigned)Indent;
17310b57cec5SDimitry Andric unsigned LineBreaks = 0;
17320b57cec5SDimitry Andric if (BlockIndent == 0) {
17330b57cec5SDimitry Andric if (!findBlockScalarIndent(BlockIndent, BlockExitIndent, LineBreaks,
17340b57cec5SDimitry Andric IsDone))
17350b57cec5SDimitry Andric return false;
17360b57cec5SDimitry Andric }
17370b57cec5SDimitry Andric
17380b57cec5SDimitry Andric // Scan the block's scalars body.
17390b57cec5SDimitry Andric SmallString<256> Str;
17400b57cec5SDimitry Andric while (!IsDone) {
17410b57cec5SDimitry Andric if (!scanBlockScalarIndent(BlockIndent, BlockExitIndent, IsDone))
17420b57cec5SDimitry Andric return false;
17430b57cec5SDimitry Andric if (IsDone)
17440b57cec5SDimitry Andric break;
17450b57cec5SDimitry Andric
17460b57cec5SDimitry Andric // Parse the current line.
17470b57cec5SDimitry Andric auto LineStart = Current;
17480b57cec5SDimitry Andric advanceWhile(&Scanner::skip_nb_char);
17490b57cec5SDimitry Andric if (LineStart != Current) {
175081ad6265SDimitry Andric if (LineBreaks && IsFolded && !Scanner::isLineEmpty(Str)) {
175181ad6265SDimitry Andric // The folded style "folds" any single line break between content into a
175281ad6265SDimitry Andric // single space, except when that content is "empty" (only contains
175381ad6265SDimitry Andric // whitespace) in which case the line break is left as-is.
175481ad6265SDimitry Andric if (LineBreaks == 1) {
175581ad6265SDimitry Andric Str.append(LineBreaks,
175681ad6265SDimitry Andric isLineEmpty(StringRef(LineStart, Current - LineStart))
175781ad6265SDimitry Andric ? '\n'
175881ad6265SDimitry Andric : ' ');
175981ad6265SDimitry Andric }
176081ad6265SDimitry Andric // If we saw a single line break, we are completely replacing it and so
176181ad6265SDimitry Andric // want `LineBreaks == 0`. Otherwise this decrement accounts for the
176281ad6265SDimitry Andric // fact that the first line break is "trimmed", only being used to
176381ad6265SDimitry Andric // signal a sequence of line breaks which should not be folded.
176481ad6265SDimitry Andric LineBreaks--;
176581ad6265SDimitry Andric }
17660b57cec5SDimitry Andric Str.append(LineBreaks, '\n');
17670b57cec5SDimitry Andric Str.append(StringRef(LineStart, Current - LineStart));
17680b57cec5SDimitry Andric LineBreaks = 0;
17690b57cec5SDimitry Andric }
17700b57cec5SDimitry Andric
17710b57cec5SDimitry Andric // Check for EOF.
17720b57cec5SDimitry Andric if (Current == End)
17730b57cec5SDimitry Andric break;
17740b57cec5SDimitry Andric
17750b57cec5SDimitry Andric if (!consumeLineBreakIfPresent())
17760b57cec5SDimitry Andric break;
17770b57cec5SDimitry Andric ++LineBreaks;
17780b57cec5SDimitry Andric }
17790b57cec5SDimitry Andric
17800b57cec5SDimitry Andric if (Current == End && !LineBreaks)
17810b57cec5SDimitry Andric // Ensure that there is at least one line break before the end of file.
17820b57cec5SDimitry Andric LineBreaks = 1;
17830b57cec5SDimitry Andric Str.append(getChompedLineBreaks(ChompingIndicator, LineBreaks, Str), '\n');
17840b57cec5SDimitry Andric
17850b57cec5SDimitry Andric // New lines may start a simple key.
17860b57cec5SDimitry Andric if (!FlowLevel)
17870b57cec5SDimitry Andric IsSimpleKeyAllowed = true;
1788*5f757f3fSDimitry Andric IsAdjacentValueAllowedInFlow = false;
17890b57cec5SDimitry Andric
17900b57cec5SDimitry Andric Token T;
17910b57cec5SDimitry Andric T.Kind = Token::TK_BlockScalar;
17920b57cec5SDimitry Andric T.Range = StringRef(Start, Current - Start);
17935ffd83dbSDimitry Andric T.Value = std::string(Str);
17940b57cec5SDimitry Andric TokenQueue.push_back(T);
17950b57cec5SDimitry Andric return true;
17960b57cec5SDimitry Andric }
17970b57cec5SDimitry Andric
scanTag()17980b57cec5SDimitry Andric bool Scanner::scanTag() {
17990b57cec5SDimitry Andric StringRef::iterator Start = Current;
18000b57cec5SDimitry Andric unsigned ColStart = Column;
18010b57cec5SDimitry Andric skip(1); // Eat !.
18020b57cec5SDimitry Andric if (Current == End || isBlankOrBreak(Current)); // An empty tag.
18030b57cec5SDimitry Andric else if (*Current == '<') {
18040b57cec5SDimitry Andric skip(1);
18050b57cec5SDimitry Andric scan_ns_uri_char();
18060b57cec5SDimitry Andric if (!consume('>'))
18070b57cec5SDimitry Andric return false;
18080b57cec5SDimitry Andric } else {
18090b57cec5SDimitry Andric // FIXME: Actually parse the c-ns-shorthand-tag rule.
18100b57cec5SDimitry Andric Current = skip_while(&Scanner::skip_ns_char, Current);
18110b57cec5SDimitry Andric }
18120b57cec5SDimitry Andric
18130b57cec5SDimitry Andric Token T;
18140b57cec5SDimitry Andric T.Kind = Token::TK_Tag;
18150b57cec5SDimitry Andric T.Range = StringRef(Start, Current - Start);
18160b57cec5SDimitry Andric TokenQueue.push_back(T);
18170b57cec5SDimitry Andric
18180b57cec5SDimitry Andric // Tags can be simple keys.
18190b57cec5SDimitry Andric saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);
18200b57cec5SDimitry Andric
18210b57cec5SDimitry Andric IsSimpleKeyAllowed = false;
1822*5f757f3fSDimitry Andric IsAdjacentValueAllowedInFlow = false;
18230b57cec5SDimitry Andric
18240b57cec5SDimitry Andric return true;
18250b57cec5SDimitry Andric }
18260b57cec5SDimitry Andric
fetchMoreTokens()18270b57cec5SDimitry Andric bool Scanner::fetchMoreTokens() {
18280b57cec5SDimitry Andric if (IsStartOfStream)
18290b57cec5SDimitry Andric return scanStreamStart();
18300b57cec5SDimitry Andric
18310b57cec5SDimitry Andric scanToNextToken();
18320b57cec5SDimitry Andric
18330b57cec5SDimitry Andric if (Current == End)
18340b57cec5SDimitry Andric return scanStreamEnd();
18350b57cec5SDimitry Andric
18360b57cec5SDimitry Andric removeStaleSimpleKeyCandidates();
18370b57cec5SDimitry Andric
18380b57cec5SDimitry Andric unrollIndent(Column);
18390b57cec5SDimitry Andric
18400b57cec5SDimitry Andric if (Column == 0 && *Current == '%')
18410b57cec5SDimitry Andric return scanDirective();
18420b57cec5SDimitry Andric
18430b57cec5SDimitry Andric if (Column == 0 && Current + 4 <= End
18440b57cec5SDimitry Andric && *Current == '-'
18450b57cec5SDimitry Andric && *(Current + 1) == '-'
18460b57cec5SDimitry Andric && *(Current + 2) == '-'
18470b57cec5SDimitry Andric && (Current + 3 == End || isBlankOrBreak(Current + 3)))
18480b57cec5SDimitry Andric return scanDocumentIndicator(true);
18490b57cec5SDimitry Andric
18500b57cec5SDimitry Andric if (Column == 0 && Current + 4 <= End
18510b57cec5SDimitry Andric && *Current == '.'
18520b57cec5SDimitry Andric && *(Current + 1) == '.'
18530b57cec5SDimitry Andric && *(Current + 2) == '.'
18540b57cec5SDimitry Andric && (Current + 3 == End || isBlankOrBreak(Current + 3)))
18550b57cec5SDimitry Andric return scanDocumentIndicator(false);
18560b57cec5SDimitry Andric
18570b57cec5SDimitry Andric if (*Current == '[')
18580b57cec5SDimitry Andric return scanFlowCollectionStart(true);
18590b57cec5SDimitry Andric
18600b57cec5SDimitry Andric if (*Current == '{')
18610b57cec5SDimitry Andric return scanFlowCollectionStart(false);
18620b57cec5SDimitry Andric
18630b57cec5SDimitry Andric if (*Current == ']')
18640b57cec5SDimitry Andric return scanFlowCollectionEnd(true);
18650b57cec5SDimitry Andric
18660b57cec5SDimitry Andric if (*Current == '}')
18670b57cec5SDimitry Andric return scanFlowCollectionEnd(false);
18680b57cec5SDimitry Andric
18690b57cec5SDimitry Andric if (*Current == ',')
18700b57cec5SDimitry Andric return scanFlowEntry();
18710b57cec5SDimitry Andric
1872*5f757f3fSDimitry Andric if (*Current == '-' && (isBlankOrBreak(Current + 1) || Current + 1 == End))
18730b57cec5SDimitry Andric return scanBlockEntry();
18740b57cec5SDimitry Andric
1875*5f757f3fSDimitry Andric if (*Current == '?' && (Current + 1 == End || isBlankOrBreak(Current + 1)))
18760b57cec5SDimitry Andric return scanKey();
18770b57cec5SDimitry Andric
1878*5f757f3fSDimitry Andric if (*Current == ':' &&
1879*5f757f3fSDimitry Andric (!isPlainSafeNonBlank(Current + 1) || IsAdjacentValueAllowedInFlow))
18800b57cec5SDimitry Andric return scanValue();
18810b57cec5SDimitry Andric
18820b57cec5SDimitry Andric if (*Current == '*')
18830b57cec5SDimitry Andric return scanAliasOrAnchor(true);
18840b57cec5SDimitry Andric
18850b57cec5SDimitry Andric if (*Current == '&')
18860b57cec5SDimitry Andric return scanAliasOrAnchor(false);
18870b57cec5SDimitry Andric
18880b57cec5SDimitry Andric if (*Current == '!')
18890b57cec5SDimitry Andric return scanTag();
18900b57cec5SDimitry Andric
18910b57cec5SDimitry Andric if (*Current == '|' && !FlowLevel)
18920b57cec5SDimitry Andric return scanBlockScalar(true);
18930b57cec5SDimitry Andric
18940b57cec5SDimitry Andric if (*Current == '>' && !FlowLevel)
18950b57cec5SDimitry Andric return scanBlockScalar(false);
18960b57cec5SDimitry Andric
18970b57cec5SDimitry Andric if (*Current == '\'')
18980b57cec5SDimitry Andric return scanFlowScalar(false);
18990b57cec5SDimitry Andric
19000b57cec5SDimitry Andric if (*Current == '"')
19010b57cec5SDimitry Andric return scanFlowScalar(true);
19020b57cec5SDimitry Andric
19030b57cec5SDimitry Andric // Get a plain scalar.
19040b57cec5SDimitry Andric StringRef FirstChar(Current, 1);
1905*5f757f3fSDimitry Andric if ((!isBlankOrBreak(Current) &&
1906*5f757f3fSDimitry Andric FirstChar.find_first_of("-?:,[]{}#&*!|>'\"%@`") == StringRef::npos) ||
1907*5f757f3fSDimitry Andric (FirstChar.find_first_of("?:-") != StringRef::npos &&
1908*5f757f3fSDimitry Andric isPlainSafeNonBlank(Current + 1)))
19090b57cec5SDimitry Andric return scanPlainScalar();
19100b57cec5SDimitry Andric
19115ffd83dbSDimitry Andric setError("Unrecognized character while tokenizing.", Current);
19120b57cec5SDimitry Andric return false;
19130b57cec5SDimitry Andric }
19140b57cec5SDimitry Andric
Stream(StringRef Input,SourceMgr & SM,bool ShowColors,std::error_code * EC)19150b57cec5SDimitry Andric Stream::Stream(StringRef Input, SourceMgr &SM, bool ShowColors,
19160b57cec5SDimitry Andric std::error_code *EC)
191781ad6265SDimitry Andric : scanner(new Scanner(Input, SM, ShowColors, EC)) {}
19180b57cec5SDimitry Andric
Stream(MemoryBufferRef InputBuffer,SourceMgr & SM,bool ShowColors,std::error_code * EC)19190b57cec5SDimitry Andric Stream::Stream(MemoryBufferRef InputBuffer, SourceMgr &SM, bool ShowColors,
19200b57cec5SDimitry Andric std::error_code *EC)
192181ad6265SDimitry Andric : scanner(new Scanner(InputBuffer, SM, ShowColors, EC)) {}
19220b57cec5SDimitry Andric
19230b57cec5SDimitry Andric Stream::~Stream() = default;
19240b57cec5SDimitry Andric
failed()19250b57cec5SDimitry Andric bool Stream::failed() { return scanner->failed(); }
19260b57cec5SDimitry Andric
printError(Node * N,const Twine & Msg,SourceMgr::DiagKind Kind)1927e8d8bef9SDimitry Andric void Stream::printError(Node *N, const Twine &Msg, SourceMgr::DiagKind Kind) {
1928e8d8bef9SDimitry Andric printError(N ? N->getSourceRange() : SMRange(), Msg, Kind);
1929e8d8bef9SDimitry Andric }
1930e8d8bef9SDimitry Andric
printError(const SMRange & Range,const Twine & Msg,SourceMgr::DiagKind Kind)1931e8d8bef9SDimitry Andric void Stream::printError(const SMRange &Range, const Twine &Msg,
1932e8d8bef9SDimitry Andric SourceMgr::DiagKind Kind) {
1933e8d8bef9SDimitry Andric scanner->printError(Range.Start, Kind, Msg, Range);
19340b57cec5SDimitry Andric }
19350b57cec5SDimitry Andric
begin()19360b57cec5SDimitry Andric document_iterator Stream::begin() {
19370b57cec5SDimitry Andric if (CurrentDoc)
19380b57cec5SDimitry Andric report_fatal_error("Can only iterate over the stream once");
19390b57cec5SDimitry Andric
19400b57cec5SDimitry Andric // Skip Stream-Start.
19410b57cec5SDimitry Andric scanner->getNext();
19420b57cec5SDimitry Andric
19430b57cec5SDimitry Andric CurrentDoc.reset(new Document(*this));
19440b57cec5SDimitry Andric return document_iterator(CurrentDoc);
19450b57cec5SDimitry Andric }
19460b57cec5SDimitry Andric
end()19470b57cec5SDimitry Andric document_iterator Stream::end() {
19480b57cec5SDimitry Andric return document_iterator();
19490b57cec5SDimitry Andric }
19500b57cec5SDimitry Andric
skip()19510b57cec5SDimitry Andric void Stream::skip() {
19520eae32dcSDimitry Andric for (Document &Doc : *this)
19530eae32dcSDimitry Andric Doc.skip();
19540b57cec5SDimitry Andric }
19550b57cec5SDimitry Andric
Node(unsigned int Type,std::unique_ptr<Document> & D,StringRef A,StringRef T)19560b57cec5SDimitry Andric Node::Node(unsigned int Type, std::unique_ptr<Document> &D, StringRef A,
19570b57cec5SDimitry Andric StringRef T)
19580b57cec5SDimitry Andric : Doc(D), TypeID(Type), Anchor(A), Tag(T) {
19590b57cec5SDimitry Andric SMLoc Start = SMLoc::getFromPointer(peekNext().Range.begin());
19600b57cec5SDimitry Andric SourceRange = SMRange(Start, Start);
19610b57cec5SDimitry Andric }
19620b57cec5SDimitry Andric
getVerbatimTag() const19630b57cec5SDimitry Andric std::string Node::getVerbatimTag() const {
19640b57cec5SDimitry Andric StringRef Raw = getRawTag();
19650b57cec5SDimitry Andric if (!Raw.empty() && Raw != "!") {
19660b57cec5SDimitry Andric std::string Ret;
19670b57cec5SDimitry Andric if (Raw.find_last_of('!') == 0) {
19685ffd83dbSDimitry Andric Ret = std::string(Doc->getTagMap().find("!")->second);
19690b57cec5SDimitry Andric Ret += Raw.substr(1);
19700b57cec5SDimitry Andric return Ret;
1971*5f757f3fSDimitry Andric } else if (Raw.starts_with("!!")) {
19725ffd83dbSDimitry Andric Ret = std::string(Doc->getTagMap().find("!!")->second);
19730b57cec5SDimitry Andric Ret += Raw.substr(2);
19740b57cec5SDimitry Andric return Ret;
19750b57cec5SDimitry Andric } else {
19760b57cec5SDimitry Andric StringRef TagHandle = Raw.substr(0, Raw.find_last_of('!') + 1);
19770b57cec5SDimitry Andric std::map<StringRef, StringRef>::const_iterator It =
19780b57cec5SDimitry Andric Doc->getTagMap().find(TagHandle);
19790b57cec5SDimitry Andric if (It != Doc->getTagMap().end())
19805ffd83dbSDimitry Andric Ret = std::string(It->second);
19810b57cec5SDimitry Andric else {
19820b57cec5SDimitry Andric Token T;
19830b57cec5SDimitry Andric T.Kind = Token::TK_Tag;
19840b57cec5SDimitry Andric T.Range = TagHandle;
19850b57cec5SDimitry Andric setError(Twine("Unknown tag handle ") + TagHandle, T);
19860b57cec5SDimitry Andric }
19870b57cec5SDimitry Andric Ret += Raw.substr(Raw.find_last_of('!') + 1);
19880b57cec5SDimitry Andric return Ret;
19890b57cec5SDimitry Andric }
19900b57cec5SDimitry Andric }
19910b57cec5SDimitry Andric
19920b57cec5SDimitry Andric switch (getType()) {
19930b57cec5SDimitry Andric case NK_Null:
19940b57cec5SDimitry Andric return "tag:yaml.org,2002:null";
19950b57cec5SDimitry Andric case NK_Scalar:
19960b57cec5SDimitry Andric case NK_BlockScalar:
19970b57cec5SDimitry Andric // TODO: Tag resolution.
19980b57cec5SDimitry Andric return "tag:yaml.org,2002:str";
19990b57cec5SDimitry Andric case NK_Mapping:
20000b57cec5SDimitry Andric return "tag:yaml.org,2002:map";
20010b57cec5SDimitry Andric case NK_Sequence:
20020b57cec5SDimitry Andric return "tag:yaml.org,2002:seq";
20030b57cec5SDimitry Andric }
20040b57cec5SDimitry Andric
20050b57cec5SDimitry Andric return "";
20060b57cec5SDimitry Andric }
20070b57cec5SDimitry Andric
peekNext()20080b57cec5SDimitry Andric Token &Node::peekNext() {
20090b57cec5SDimitry Andric return Doc->peekNext();
20100b57cec5SDimitry Andric }
20110b57cec5SDimitry Andric
getNext()20120b57cec5SDimitry Andric Token Node::getNext() {
20130b57cec5SDimitry Andric return Doc->getNext();
20140b57cec5SDimitry Andric }
20150b57cec5SDimitry Andric
parseBlockNode()20160b57cec5SDimitry Andric Node *Node::parseBlockNode() {
20170b57cec5SDimitry Andric return Doc->parseBlockNode();
20180b57cec5SDimitry Andric }
20190b57cec5SDimitry Andric
getAllocator()20200b57cec5SDimitry Andric BumpPtrAllocator &Node::getAllocator() {
20210b57cec5SDimitry Andric return Doc->NodeAllocator;
20220b57cec5SDimitry Andric }
20230b57cec5SDimitry Andric
setError(const Twine & Msg,Token & Tok) const20240b57cec5SDimitry Andric void Node::setError(const Twine &Msg, Token &Tok) const {
20250b57cec5SDimitry Andric Doc->setError(Msg, Tok);
20260b57cec5SDimitry Andric }
20270b57cec5SDimitry Andric
failed() const20280b57cec5SDimitry Andric bool Node::failed() const {
20290b57cec5SDimitry Andric return Doc->failed();
20300b57cec5SDimitry Andric }
20310b57cec5SDimitry Andric
getValue(SmallVectorImpl<char> & Storage) const20320b57cec5SDimitry Andric StringRef ScalarNode::getValue(SmallVectorImpl<char> &Storage) const {
2033*5f757f3fSDimitry Andric if (Value[0] == '"')
2034*5f757f3fSDimitry Andric return getDoubleQuotedValue(Value, Storage);
2035*5f757f3fSDimitry Andric if (Value[0] == '\'')
2036*5f757f3fSDimitry Andric return getSingleQuotedValue(Value, Storage);
2037*5f757f3fSDimitry Andric return getPlainValue(Value, Storage);
2038*5f757f3fSDimitry Andric }
2039*5f757f3fSDimitry Andric
2040*5f757f3fSDimitry Andric /// parseScalarValue - A common parsing routine for all flow scalar styles.
2041*5f757f3fSDimitry Andric /// It handles line break characters by itself, adds regular content characters
2042*5f757f3fSDimitry Andric /// to the result, and forwards escaped sequences to the provided routine for
2043*5f757f3fSDimitry Andric /// the style-specific processing.
2044*5f757f3fSDimitry Andric ///
2045*5f757f3fSDimitry Andric /// \param UnquotedValue - An input value without quotation marks.
2046*5f757f3fSDimitry Andric /// \param Storage - A storage for the result if the input value is multiline or
2047*5f757f3fSDimitry Andric /// contains escaped characters.
2048*5f757f3fSDimitry Andric /// \param LookupChars - A set of special characters to search in the input
2049*5f757f3fSDimitry Andric /// string. Should include line break characters and the escape character
2050*5f757f3fSDimitry Andric /// specific for the processing scalar style, if any.
2051*5f757f3fSDimitry Andric /// \param UnescapeCallback - This is called when the escape character is found
2052*5f757f3fSDimitry Andric /// in the input.
2053*5f757f3fSDimitry Andric /// \returns - The unfolded and unescaped value.
2054*5f757f3fSDimitry Andric static StringRef
parseScalarValue(StringRef UnquotedValue,SmallVectorImpl<char> & Storage,StringRef LookupChars,std::function<StringRef (StringRef,SmallVectorImpl<char> &)> UnescapeCallback)2055*5f757f3fSDimitry Andric parseScalarValue(StringRef UnquotedValue, SmallVectorImpl<char> &Storage,
2056*5f757f3fSDimitry Andric StringRef LookupChars,
2057*5f757f3fSDimitry Andric std::function<StringRef(StringRef, SmallVectorImpl<char> &)>
2058*5f757f3fSDimitry Andric UnescapeCallback) {
2059*5f757f3fSDimitry Andric size_t I = UnquotedValue.find_first_of(LookupChars);
2060*5f757f3fSDimitry Andric if (I == StringRef::npos)
20610b57cec5SDimitry Andric return UnquotedValue;
2062*5f757f3fSDimitry Andric
20630b57cec5SDimitry Andric Storage.clear();
20640b57cec5SDimitry Andric Storage.reserve(UnquotedValue.size());
2065*5f757f3fSDimitry Andric char LastNewLineAddedAs = '\0';
2066*5f757f3fSDimitry Andric for (; I != StringRef::npos; I = UnquotedValue.find_first_of(LookupChars)) {
2067*5f757f3fSDimitry Andric if (UnquotedValue[I] != '\r' && UnquotedValue[I] != '\n') {
2068*5f757f3fSDimitry Andric llvm::append_range(Storage, UnquotedValue.take_front(I));
2069*5f757f3fSDimitry Andric UnquotedValue = UnescapeCallback(UnquotedValue.drop_front(I), Storage);
2070*5f757f3fSDimitry Andric LastNewLineAddedAs = '\0';
2071*5f757f3fSDimitry Andric continue;
2072*5f757f3fSDimitry Andric }
2073*5f757f3fSDimitry Andric if (size_t LastNonSWhite = UnquotedValue.find_last_not_of(" \t", I);
2074*5f757f3fSDimitry Andric LastNonSWhite != StringRef::npos) {
2075*5f757f3fSDimitry Andric llvm::append_range(Storage, UnquotedValue.take_front(LastNonSWhite + 1));
2076*5f757f3fSDimitry Andric Storage.push_back(' ');
2077*5f757f3fSDimitry Andric LastNewLineAddedAs = ' ';
2078*5f757f3fSDimitry Andric } else {
2079*5f757f3fSDimitry Andric // Note: we can't just check if the last character in Storage is ' ',
2080*5f757f3fSDimitry Andric // '\n', or something else; that would give a wrong result for double
2081*5f757f3fSDimitry Andric // quoted values containing an escaped space character before a new-line
2082*5f757f3fSDimitry Andric // character.
2083*5f757f3fSDimitry Andric switch (LastNewLineAddedAs) {
2084*5f757f3fSDimitry Andric case ' ':
2085*5f757f3fSDimitry Andric assert(!Storage.empty() && Storage.back() == ' ');
2086*5f757f3fSDimitry Andric Storage.back() = '\n';
2087*5f757f3fSDimitry Andric LastNewLineAddedAs = '\n';
2088*5f757f3fSDimitry Andric break;
2089*5f757f3fSDimitry Andric case '\n':
2090*5f757f3fSDimitry Andric assert(!Storage.empty() && Storage.back() == '\n');
2091*5f757f3fSDimitry Andric Storage.push_back('\n');
2092*5f757f3fSDimitry Andric break;
2093*5f757f3fSDimitry Andric default:
2094*5f757f3fSDimitry Andric Storage.push_back(' ');
2095*5f757f3fSDimitry Andric LastNewLineAddedAs = ' ';
2096*5f757f3fSDimitry Andric break;
2097*5f757f3fSDimitry Andric }
2098*5f757f3fSDimitry Andric }
2099*5f757f3fSDimitry Andric // Handle Windows-style EOL
2100*5f757f3fSDimitry Andric if (UnquotedValue.substr(I, 2) == "\r\n")
2101*5f757f3fSDimitry Andric I++;
2102*5f757f3fSDimitry Andric UnquotedValue = UnquotedValue.drop_front(I + 1).ltrim(" \t");
21030b57cec5SDimitry Andric }
2104e8d8bef9SDimitry Andric llvm::append_range(Storage, UnquotedValue);
21050b57cec5SDimitry Andric return StringRef(Storage.begin(), Storage.size());
21060b57cec5SDimitry Andric }
21070b57cec5SDimitry Andric
2108*5f757f3fSDimitry Andric StringRef
getDoubleQuotedValue(StringRef RawValue,SmallVectorImpl<char> & Storage) const2109*5f757f3fSDimitry Andric ScalarNode::getDoubleQuotedValue(StringRef RawValue,
2110*5f757f3fSDimitry Andric SmallVectorImpl<char> &Storage) const {
2111*5f757f3fSDimitry Andric assert(RawValue.size() >= 2 && RawValue.front() == '"' &&
2112*5f757f3fSDimitry Andric RawValue.back() == '"');
2113*5f757f3fSDimitry Andric StringRef UnquotedValue = RawValue.substr(1, RawValue.size() - 2);
21140b57cec5SDimitry Andric
2115*5f757f3fSDimitry Andric auto UnescapeFunc = [this](StringRef UnquotedValue,
2116*5f757f3fSDimitry Andric SmallVectorImpl<char> &Storage) {
2117*5f757f3fSDimitry Andric assert(UnquotedValue.take_front(1) == "\\");
2118480093f4SDimitry Andric if (UnquotedValue.size() == 1) {
2119480093f4SDimitry Andric Token T;
2120*5f757f3fSDimitry Andric T.Range = UnquotedValue;
2121480093f4SDimitry Andric setError("Unrecognized escape code", T);
2122*5f757f3fSDimitry Andric Storage.clear();
2123*5f757f3fSDimitry Andric return StringRef();
2124480093f4SDimitry Andric }
2125*5f757f3fSDimitry Andric UnquotedValue = UnquotedValue.drop_front(1);
21260b57cec5SDimitry Andric switch (UnquotedValue[0]) {
21270b57cec5SDimitry Andric default: {
21280b57cec5SDimitry Andric Token T;
2129*5f757f3fSDimitry Andric T.Range = UnquotedValue.take_front(1);
2130480093f4SDimitry Andric setError("Unrecognized escape code", T);
2131*5f757f3fSDimitry Andric Storage.clear();
2132*5f757f3fSDimitry Andric return StringRef();
21330b57cec5SDimitry Andric }
21340b57cec5SDimitry Andric case '\r':
2135*5f757f3fSDimitry Andric // Shrink the Windows-style EOL.
2136*5f757f3fSDimitry Andric if (UnquotedValue.size() >= 2 && UnquotedValue[1] == '\n')
2137*5f757f3fSDimitry Andric UnquotedValue = UnquotedValue.drop_front(1);
2138*5f757f3fSDimitry Andric [[fallthrough]];
21390b57cec5SDimitry Andric case '\n':
2140*5f757f3fSDimitry Andric return UnquotedValue.drop_front(1).ltrim(" \t");
21410b57cec5SDimitry Andric case '0':
21420b57cec5SDimitry Andric Storage.push_back(0x00);
21430b57cec5SDimitry Andric break;
21440b57cec5SDimitry Andric case 'a':
21450b57cec5SDimitry Andric Storage.push_back(0x07);
21460b57cec5SDimitry Andric break;
21470b57cec5SDimitry Andric case 'b':
21480b57cec5SDimitry Andric Storage.push_back(0x08);
21490b57cec5SDimitry Andric break;
21500b57cec5SDimitry Andric case 't':
21510b57cec5SDimitry Andric case 0x09:
21520b57cec5SDimitry Andric Storage.push_back(0x09);
21530b57cec5SDimitry Andric break;
21540b57cec5SDimitry Andric case 'n':
21550b57cec5SDimitry Andric Storage.push_back(0x0A);
21560b57cec5SDimitry Andric break;
21570b57cec5SDimitry Andric case 'v':
21580b57cec5SDimitry Andric Storage.push_back(0x0B);
21590b57cec5SDimitry Andric break;
21600b57cec5SDimitry Andric case 'f':
21610b57cec5SDimitry Andric Storage.push_back(0x0C);
21620b57cec5SDimitry Andric break;
21630b57cec5SDimitry Andric case 'r':
21640b57cec5SDimitry Andric Storage.push_back(0x0D);
21650b57cec5SDimitry Andric break;
21660b57cec5SDimitry Andric case 'e':
21670b57cec5SDimitry Andric Storage.push_back(0x1B);
21680b57cec5SDimitry Andric break;
21690b57cec5SDimitry Andric case ' ':
21700b57cec5SDimitry Andric Storage.push_back(0x20);
21710b57cec5SDimitry Andric break;
21720b57cec5SDimitry Andric case '"':
21730b57cec5SDimitry Andric Storage.push_back(0x22);
21740b57cec5SDimitry Andric break;
21750b57cec5SDimitry Andric case '/':
21760b57cec5SDimitry Andric Storage.push_back(0x2F);
21770b57cec5SDimitry Andric break;
21780b57cec5SDimitry Andric case '\\':
21790b57cec5SDimitry Andric Storage.push_back(0x5C);
21800b57cec5SDimitry Andric break;
21810b57cec5SDimitry Andric case 'N':
21820b57cec5SDimitry Andric encodeUTF8(0x85, Storage);
21830b57cec5SDimitry Andric break;
21840b57cec5SDimitry Andric case '_':
21850b57cec5SDimitry Andric encodeUTF8(0xA0, Storage);
21860b57cec5SDimitry Andric break;
21870b57cec5SDimitry Andric case 'L':
21880b57cec5SDimitry Andric encodeUTF8(0x2028, Storage);
21890b57cec5SDimitry Andric break;
21900b57cec5SDimitry Andric case 'P':
21910b57cec5SDimitry Andric encodeUTF8(0x2029, Storage);
21920b57cec5SDimitry Andric break;
21930b57cec5SDimitry Andric case 'x': {
21940b57cec5SDimitry Andric if (UnquotedValue.size() < 3)
21950b57cec5SDimitry Andric // TODO: Report error.
21960b57cec5SDimitry Andric break;
21970b57cec5SDimitry Andric unsigned int UnicodeScalarValue;
21980b57cec5SDimitry Andric if (UnquotedValue.substr(1, 2).getAsInteger(16, UnicodeScalarValue))
21990b57cec5SDimitry Andric // TODO: Report error.
22000b57cec5SDimitry Andric UnicodeScalarValue = 0xFFFD;
22010b57cec5SDimitry Andric encodeUTF8(UnicodeScalarValue, Storage);
2202*5f757f3fSDimitry Andric return UnquotedValue.drop_front(3);
22030b57cec5SDimitry Andric }
22040b57cec5SDimitry Andric case 'u': {
22050b57cec5SDimitry Andric if (UnquotedValue.size() < 5)
22060b57cec5SDimitry Andric // TODO: Report error.
22070b57cec5SDimitry Andric break;
22080b57cec5SDimitry Andric unsigned int UnicodeScalarValue;
22090b57cec5SDimitry Andric if (UnquotedValue.substr(1, 4).getAsInteger(16, UnicodeScalarValue))
22100b57cec5SDimitry Andric // TODO: Report error.
22110b57cec5SDimitry Andric UnicodeScalarValue = 0xFFFD;
22120b57cec5SDimitry Andric encodeUTF8(UnicodeScalarValue, Storage);
2213*5f757f3fSDimitry Andric return UnquotedValue.drop_front(5);
22140b57cec5SDimitry Andric }
22150b57cec5SDimitry Andric case 'U': {
22160b57cec5SDimitry Andric if (UnquotedValue.size() < 9)
22170b57cec5SDimitry Andric // TODO: Report error.
22180b57cec5SDimitry Andric break;
22190b57cec5SDimitry Andric unsigned int UnicodeScalarValue;
22200b57cec5SDimitry Andric if (UnquotedValue.substr(1, 8).getAsInteger(16, UnicodeScalarValue))
22210b57cec5SDimitry Andric // TODO: Report error.
22220b57cec5SDimitry Andric UnicodeScalarValue = 0xFFFD;
22230b57cec5SDimitry Andric encodeUTF8(UnicodeScalarValue, Storage);
2224*5f757f3fSDimitry Andric return UnquotedValue.drop_front(9);
22250b57cec5SDimitry Andric }
22260b57cec5SDimitry Andric }
2227*5f757f3fSDimitry Andric return UnquotedValue.drop_front(1);
2228*5f757f3fSDimitry Andric };
2229*5f757f3fSDimitry Andric
2230*5f757f3fSDimitry Andric return parseScalarValue(UnquotedValue, Storage, "\\\r\n", UnescapeFunc);
22310b57cec5SDimitry Andric }
2232*5f757f3fSDimitry Andric
getSingleQuotedValue(StringRef RawValue,SmallVectorImpl<char> & Storage)2233*5f757f3fSDimitry Andric StringRef ScalarNode::getSingleQuotedValue(StringRef RawValue,
2234*5f757f3fSDimitry Andric SmallVectorImpl<char> &Storage) {
2235*5f757f3fSDimitry Andric assert(RawValue.size() >= 2 && RawValue.front() == '\'' &&
2236*5f757f3fSDimitry Andric RawValue.back() == '\'');
2237*5f757f3fSDimitry Andric StringRef UnquotedValue = RawValue.substr(1, RawValue.size() - 2);
2238*5f757f3fSDimitry Andric
2239*5f757f3fSDimitry Andric auto UnescapeFunc = [](StringRef UnquotedValue,
2240*5f757f3fSDimitry Andric SmallVectorImpl<char> &Storage) {
2241*5f757f3fSDimitry Andric assert(UnquotedValue.take_front(2) == "''");
2242*5f757f3fSDimitry Andric Storage.push_back('\'');
2243*5f757f3fSDimitry Andric return UnquotedValue.drop_front(2);
2244*5f757f3fSDimitry Andric };
2245*5f757f3fSDimitry Andric
2246*5f757f3fSDimitry Andric return parseScalarValue(UnquotedValue, Storage, "'\r\n", UnescapeFunc);
22470b57cec5SDimitry Andric }
2248*5f757f3fSDimitry Andric
getPlainValue(StringRef RawValue,SmallVectorImpl<char> & Storage)2249*5f757f3fSDimitry Andric StringRef ScalarNode::getPlainValue(StringRef RawValue,
2250*5f757f3fSDimitry Andric SmallVectorImpl<char> &Storage) {
2251*5f757f3fSDimitry Andric // Trim trailing whitespace ('b-char' and 's-white').
2252*5f757f3fSDimitry Andric // NOTE: Alternatively we could change the scanner to not include whitespace
2253*5f757f3fSDimitry Andric // here in the first place.
2254*5f757f3fSDimitry Andric RawValue = RawValue.rtrim("\r\n \t");
2255*5f757f3fSDimitry Andric return parseScalarValue(RawValue, Storage, "\r\n", nullptr);
22560b57cec5SDimitry Andric }
22570b57cec5SDimitry Andric
getKey()22580b57cec5SDimitry Andric Node *KeyValueNode::getKey() {
22590b57cec5SDimitry Andric if (Key)
22600b57cec5SDimitry Andric return Key;
22610b57cec5SDimitry Andric // Handle implicit null keys.
22620b57cec5SDimitry Andric {
22630b57cec5SDimitry Andric Token &t = peekNext();
22640b57cec5SDimitry Andric if ( t.Kind == Token::TK_BlockEnd
22650b57cec5SDimitry Andric || t.Kind == Token::TK_Value
22660b57cec5SDimitry Andric || t.Kind == Token::TK_Error) {
22670b57cec5SDimitry Andric return Key = new (getAllocator()) NullNode(Doc);
22680b57cec5SDimitry Andric }
22690b57cec5SDimitry Andric if (t.Kind == Token::TK_Key)
22700b57cec5SDimitry Andric getNext(); // skip TK_Key.
22710b57cec5SDimitry Andric }
22720b57cec5SDimitry Andric
22730b57cec5SDimitry Andric // Handle explicit null keys.
22740b57cec5SDimitry Andric Token &t = peekNext();
22750b57cec5SDimitry Andric if (t.Kind == Token::TK_BlockEnd || t.Kind == Token::TK_Value) {
22760b57cec5SDimitry Andric return Key = new (getAllocator()) NullNode(Doc);
22770b57cec5SDimitry Andric }
22780b57cec5SDimitry Andric
22790b57cec5SDimitry Andric // We've got a normal key.
22800b57cec5SDimitry Andric return Key = parseBlockNode();
22810b57cec5SDimitry Andric }
22820b57cec5SDimitry Andric
getValue()22830b57cec5SDimitry Andric Node *KeyValueNode::getValue() {
22840b57cec5SDimitry Andric if (Value)
22850b57cec5SDimitry Andric return Value;
2286480093f4SDimitry Andric
2287480093f4SDimitry Andric if (Node* Key = getKey())
2288480093f4SDimitry Andric Key->skip();
2289480093f4SDimitry Andric else {
2290480093f4SDimitry Andric setError("Null key in Key Value.", peekNext());
2291480093f4SDimitry Andric return Value = new (getAllocator()) NullNode(Doc);
2292480093f4SDimitry Andric }
2293480093f4SDimitry Andric
22940b57cec5SDimitry Andric if (failed())
22950b57cec5SDimitry Andric return Value = new (getAllocator()) NullNode(Doc);
22960b57cec5SDimitry Andric
22970b57cec5SDimitry Andric // Handle implicit null values.
22980b57cec5SDimitry Andric {
22990b57cec5SDimitry Andric Token &t = peekNext();
23000b57cec5SDimitry Andric if ( t.Kind == Token::TK_BlockEnd
23010b57cec5SDimitry Andric || t.Kind == Token::TK_FlowMappingEnd
23020b57cec5SDimitry Andric || t.Kind == Token::TK_Key
23030b57cec5SDimitry Andric || t.Kind == Token::TK_FlowEntry
23040b57cec5SDimitry Andric || t.Kind == Token::TK_Error) {
23050b57cec5SDimitry Andric return Value = new (getAllocator()) NullNode(Doc);
23060b57cec5SDimitry Andric }
23070b57cec5SDimitry Andric
23080b57cec5SDimitry Andric if (t.Kind != Token::TK_Value) {
23090b57cec5SDimitry Andric setError("Unexpected token in Key Value.", t);
23100b57cec5SDimitry Andric return Value = new (getAllocator()) NullNode(Doc);
23110b57cec5SDimitry Andric }
23120b57cec5SDimitry Andric getNext(); // skip TK_Value.
23130b57cec5SDimitry Andric }
23140b57cec5SDimitry Andric
23150b57cec5SDimitry Andric // Handle explicit null values.
23160b57cec5SDimitry Andric Token &t = peekNext();
23170b57cec5SDimitry Andric if (t.Kind == Token::TK_BlockEnd || t.Kind == Token::TK_Key) {
23180b57cec5SDimitry Andric return Value = new (getAllocator()) NullNode(Doc);
23190b57cec5SDimitry Andric }
23200b57cec5SDimitry Andric
23210b57cec5SDimitry Andric // We got a normal value.
23220b57cec5SDimitry Andric return Value = parseBlockNode();
23230b57cec5SDimitry Andric }
23240b57cec5SDimitry Andric
increment()23250b57cec5SDimitry Andric void MappingNode::increment() {
23260b57cec5SDimitry Andric if (failed()) {
23270b57cec5SDimitry Andric IsAtEnd = true;
23280b57cec5SDimitry Andric CurrentEntry = nullptr;
23290b57cec5SDimitry Andric return;
23300b57cec5SDimitry Andric }
23310b57cec5SDimitry Andric if (CurrentEntry) {
23320b57cec5SDimitry Andric CurrentEntry->skip();
23330b57cec5SDimitry Andric if (Type == MT_Inline) {
23340b57cec5SDimitry Andric IsAtEnd = true;
23350b57cec5SDimitry Andric CurrentEntry = nullptr;
23360b57cec5SDimitry Andric return;
23370b57cec5SDimitry Andric }
23380b57cec5SDimitry Andric }
23390b57cec5SDimitry Andric Token T = peekNext();
23400b57cec5SDimitry Andric if (T.Kind == Token::TK_Key || T.Kind == Token::TK_Scalar) {
23410b57cec5SDimitry Andric // KeyValueNode eats the TK_Key. That way it can detect null keys.
23420b57cec5SDimitry Andric CurrentEntry = new (getAllocator()) KeyValueNode(Doc);
23430b57cec5SDimitry Andric } else if (Type == MT_Block) {
23440b57cec5SDimitry Andric switch (T.Kind) {
23450b57cec5SDimitry Andric case Token::TK_BlockEnd:
23460b57cec5SDimitry Andric getNext();
23470b57cec5SDimitry Andric IsAtEnd = true;
23480b57cec5SDimitry Andric CurrentEntry = nullptr;
23490b57cec5SDimitry Andric break;
23500b57cec5SDimitry Andric default:
23510b57cec5SDimitry Andric setError("Unexpected token. Expected Key or Block End", T);
2352bdd1243dSDimitry Andric [[fallthrough]];
23530b57cec5SDimitry Andric case Token::TK_Error:
23540b57cec5SDimitry Andric IsAtEnd = true;
23550b57cec5SDimitry Andric CurrentEntry = nullptr;
23560b57cec5SDimitry Andric }
23570b57cec5SDimitry Andric } else {
23580b57cec5SDimitry Andric switch (T.Kind) {
23590b57cec5SDimitry Andric case Token::TK_FlowEntry:
23600b57cec5SDimitry Andric // Eat the flow entry and recurse.
23610b57cec5SDimitry Andric getNext();
23620b57cec5SDimitry Andric return increment();
23630b57cec5SDimitry Andric case Token::TK_FlowMappingEnd:
23640b57cec5SDimitry Andric getNext();
2365bdd1243dSDimitry Andric [[fallthrough]];
23660b57cec5SDimitry Andric case Token::TK_Error:
23670b57cec5SDimitry Andric // Set this to end iterator.
23680b57cec5SDimitry Andric IsAtEnd = true;
23690b57cec5SDimitry Andric CurrentEntry = nullptr;
23700b57cec5SDimitry Andric break;
23710b57cec5SDimitry Andric default:
23720b57cec5SDimitry Andric setError( "Unexpected token. Expected Key, Flow Entry, or Flow "
23730b57cec5SDimitry Andric "Mapping End."
23740b57cec5SDimitry Andric , T);
23750b57cec5SDimitry Andric IsAtEnd = true;
23760b57cec5SDimitry Andric CurrentEntry = nullptr;
23770b57cec5SDimitry Andric }
23780b57cec5SDimitry Andric }
23790b57cec5SDimitry Andric }
23800b57cec5SDimitry Andric
increment()23810b57cec5SDimitry Andric void SequenceNode::increment() {
23820b57cec5SDimitry Andric if (failed()) {
23830b57cec5SDimitry Andric IsAtEnd = true;
23840b57cec5SDimitry Andric CurrentEntry = nullptr;
23850b57cec5SDimitry Andric return;
23860b57cec5SDimitry Andric }
23870b57cec5SDimitry Andric if (CurrentEntry)
23880b57cec5SDimitry Andric CurrentEntry->skip();
23890b57cec5SDimitry Andric Token T = peekNext();
23900b57cec5SDimitry Andric if (SeqType == ST_Block) {
23910b57cec5SDimitry Andric switch (T.Kind) {
23920b57cec5SDimitry Andric case Token::TK_BlockEntry:
23930b57cec5SDimitry Andric getNext();
23940b57cec5SDimitry Andric CurrentEntry = parseBlockNode();
23950b57cec5SDimitry Andric if (!CurrentEntry) { // An error occurred.
23960b57cec5SDimitry Andric IsAtEnd = true;
23970b57cec5SDimitry Andric CurrentEntry = nullptr;
23980b57cec5SDimitry Andric }
23990b57cec5SDimitry Andric break;
24000b57cec5SDimitry Andric case Token::TK_BlockEnd:
24010b57cec5SDimitry Andric getNext();
24020b57cec5SDimitry Andric IsAtEnd = true;
24030b57cec5SDimitry Andric CurrentEntry = nullptr;
24040b57cec5SDimitry Andric break;
24050b57cec5SDimitry Andric default:
24060b57cec5SDimitry Andric setError( "Unexpected token. Expected Block Entry or Block End."
24070b57cec5SDimitry Andric , T);
2408bdd1243dSDimitry Andric [[fallthrough]];
24090b57cec5SDimitry Andric case Token::TK_Error:
24100b57cec5SDimitry Andric IsAtEnd = true;
24110b57cec5SDimitry Andric CurrentEntry = nullptr;
24120b57cec5SDimitry Andric }
24130b57cec5SDimitry Andric } else if (SeqType == ST_Indentless) {
24140b57cec5SDimitry Andric switch (T.Kind) {
24150b57cec5SDimitry Andric case Token::TK_BlockEntry:
24160b57cec5SDimitry Andric getNext();
24170b57cec5SDimitry Andric CurrentEntry = parseBlockNode();
24180b57cec5SDimitry Andric if (!CurrentEntry) { // An error occurred.
24190b57cec5SDimitry Andric IsAtEnd = true;
24200b57cec5SDimitry Andric CurrentEntry = nullptr;
24210b57cec5SDimitry Andric }
24220b57cec5SDimitry Andric break;
24230b57cec5SDimitry Andric default:
24240b57cec5SDimitry Andric case Token::TK_Error:
24250b57cec5SDimitry Andric IsAtEnd = true;
24260b57cec5SDimitry Andric CurrentEntry = nullptr;
24270b57cec5SDimitry Andric }
24280b57cec5SDimitry Andric } else if (SeqType == ST_Flow) {
24290b57cec5SDimitry Andric switch (T.Kind) {
24300b57cec5SDimitry Andric case Token::TK_FlowEntry:
24310b57cec5SDimitry Andric // Eat the flow entry and recurse.
24320b57cec5SDimitry Andric getNext();
24330b57cec5SDimitry Andric WasPreviousTokenFlowEntry = true;
24340b57cec5SDimitry Andric return increment();
24350b57cec5SDimitry Andric case Token::TK_FlowSequenceEnd:
24360b57cec5SDimitry Andric getNext();
2437bdd1243dSDimitry Andric [[fallthrough]];
24380b57cec5SDimitry Andric case Token::TK_Error:
24390b57cec5SDimitry Andric // Set this to end iterator.
24400b57cec5SDimitry Andric IsAtEnd = true;
24410b57cec5SDimitry Andric CurrentEntry = nullptr;
24420b57cec5SDimitry Andric break;
24430b57cec5SDimitry Andric case Token::TK_StreamEnd:
24440b57cec5SDimitry Andric case Token::TK_DocumentEnd:
24450b57cec5SDimitry Andric case Token::TK_DocumentStart:
24460b57cec5SDimitry Andric setError("Could not find closing ]!", T);
24470b57cec5SDimitry Andric // Set this to end iterator.
24480b57cec5SDimitry Andric IsAtEnd = true;
24490b57cec5SDimitry Andric CurrentEntry = nullptr;
24500b57cec5SDimitry Andric break;
24510b57cec5SDimitry Andric default:
24520b57cec5SDimitry Andric if (!WasPreviousTokenFlowEntry) {
24530b57cec5SDimitry Andric setError("Expected , between entries!", T);
24540b57cec5SDimitry Andric IsAtEnd = true;
24550b57cec5SDimitry Andric CurrentEntry = nullptr;
24560b57cec5SDimitry Andric break;
24570b57cec5SDimitry Andric }
24580b57cec5SDimitry Andric // Otherwise it must be a flow entry.
24590b57cec5SDimitry Andric CurrentEntry = parseBlockNode();
24600b57cec5SDimitry Andric if (!CurrentEntry) {
24610b57cec5SDimitry Andric IsAtEnd = true;
24620b57cec5SDimitry Andric }
24630b57cec5SDimitry Andric WasPreviousTokenFlowEntry = false;
24640b57cec5SDimitry Andric break;
24650b57cec5SDimitry Andric }
24660b57cec5SDimitry Andric }
24670b57cec5SDimitry Andric }
24680b57cec5SDimitry Andric
Document(Stream & S)24690b57cec5SDimitry Andric Document::Document(Stream &S) : stream(S), Root(nullptr) {
24700b57cec5SDimitry Andric // Tag maps starts with two default mappings.
24710b57cec5SDimitry Andric TagMap["!"] = "!";
24720b57cec5SDimitry Andric TagMap["!!"] = "tag:yaml.org,2002:";
24730b57cec5SDimitry Andric
24740b57cec5SDimitry Andric if (parseDirectives())
24750b57cec5SDimitry Andric expectToken(Token::TK_DocumentStart);
24760b57cec5SDimitry Andric Token &T = peekNext();
24770b57cec5SDimitry Andric if (T.Kind == Token::TK_DocumentStart)
24780b57cec5SDimitry Andric getNext();
24790b57cec5SDimitry Andric }
24800b57cec5SDimitry Andric
skip()24810b57cec5SDimitry Andric bool Document::skip() {
24820b57cec5SDimitry Andric if (stream.scanner->failed())
24830b57cec5SDimitry Andric return false;
2484480093f4SDimitry Andric if (!Root && !getRoot())
2485480093f4SDimitry Andric return false;
24860b57cec5SDimitry Andric Root->skip();
24870b57cec5SDimitry Andric Token &T = peekNext();
24880b57cec5SDimitry Andric if (T.Kind == Token::TK_StreamEnd)
24890b57cec5SDimitry Andric return false;
24900b57cec5SDimitry Andric if (T.Kind == Token::TK_DocumentEnd) {
24910b57cec5SDimitry Andric getNext();
24920b57cec5SDimitry Andric return skip();
24930b57cec5SDimitry Andric }
24940b57cec5SDimitry Andric return true;
24950b57cec5SDimitry Andric }
24960b57cec5SDimitry Andric
peekNext()24970b57cec5SDimitry Andric Token &Document::peekNext() {
24980b57cec5SDimitry Andric return stream.scanner->peekNext();
24990b57cec5SDimitry Andric }
25000b57cec5SDimitry Andric
getNext()25010b57cec5SDimitry Andric Token Document::getNext() {
25020b57cec5SDimitry Andric return stream.scanner->getNext();
25030b57cec5SDimitry Andric }
25040b57cec5SDimitry Andric
setError(const Twine & Message,Token & Location) const25050b57cec5SDimitry Andric void Document::setError(const Twine &Message, Token &Location) const {
25060b57cec5SDimitry Andric stream.scanner->setError(Message, Location.Range.begin());
25070b57cec5SDimitry Andric }
25080b57cec5SDimitry Andric
failed() const25090b57cec5SDimitry Andric bool Document::failed() const {
25100b57cec5SDimitry Andric return stream.scanner->failed();
25110b57cec5SDimitry Andric }
25120b57cec5SDimitry Andric
parseBlockNode()25130b57cec5SDimitry Andric Node *Document::parseBlockNode() {
25140b57cec5SDimitry Andric Token T = peekNext();
25150b57cec5SDimitry Andric // Handle properties.
25160b57cec5SDimitry Andric Token AnchorInfo;
25170b57cec5SDimitry Andric Token TagInfo;
25180b57cec5SDimitry Andric parse_property:
25190b57cec5SDimitry Andric switch (T.Kind) {
25200b57cec5SDimitry Andric case Token::TK_Alias:
25210b57cec5SDimitry Andric getNext();
25220b57cec5SDimitry Andric return new (NodeAllocator) AliasNode(stream.CurrentDoc, T.Range.substr(1));
25230b57cec5SDimitry Andric case Token::TK_Anchor:
25240b57cec5SDimitry Andric if (AnchorInfo.Kind == Token::TK_Anchor) {
25250b57cec5SDimitry Andric setError("Already encountered an anchor for this node!", T);
25260b57cec5SDimitry Andric return nullptr;
25270b57cec5SDimitry Andric }
25280b57cec5SDimitry Andric AnchorInfo = getNext(); // Consume TK_Anchor.
25290b57cec5SDimitry Andric T = peekNext();
25300b57cec5SDimitry Andric goto parse_property;
25310b57cec5SDimitry Andric case Token::TK_Tag:
25320b57cec5SDimitry Andric if (TagInfo.Kind == Token::TK_Tag) {
25330b57cec5SDimitry Andric setError("Already encountered a tag for this node!", T);
25340b57cec5SDimitry Andric return nullptr;
25350b57cec5SDimitry Andric }
25360b57cec5SDimitry Andric TagInfo = getNext(); // Consume TK_Tag.
25370b57cec5SDimitry Andric T = peekNext();
25380b57cec5SDimitry Andric goto parse_property;
25390b57cec5SDimitry Andric default:
25400b57cec5SDimitry Andric break;
25410b57cec5SDimitry Andric }
25420b57cec5SDimitry Andric
25430b57cec5SDimitry Andric switch (T.Kind) {
25440b57cec5SDimitry Andric case Token::TK_BlockEntry:
25450b57cec5SDimitry Andric // We got an unindented BlockEntry sequence. This is not terminated with
25460b57cec5SDimitry Andric // a BlockEnd.
25470b57cec5SDimitry Andric // Don't eat the TK_BlockEntry, SequenceNode needs it.
25480b57cec5SDimitry Andric return new (NodeAllocator) SequenceNode( stream.CurrentDoc
25490b57cec5SDimitry Andric , AnchorInfo.Range.substr(1)
25500b57cec5SDimitry Andric , TagInfo.Range
25510b57cec5SDimitry Andric , SequenceNode::ST_Indentless);
25520b57cec5SDimitry Andric case Token::TK_BlockSequenceStart:
25530b57cec5SDimitry Andric getNext();
25540b57cec5SDimitry Andric return new (NodeAllocator)
25550b57cec5SDimitry Andric SequenceNode( stream.CurrentDoc
25560b57cec5SDimitry Andric , AnchorInfo.Range.substr(1)
25570b57cec5SDimitry Andric , TagInfo.Range
25580b57cec5SDimitry Andric , SequenceNode::ST_Block);
25590b57cec5SDimitry Andric case Token::TK_BlockMappingStart:
25600b57cec5SDimitry Andric getNext();
25610b57cec5SDimitry Andric return new (NodeAllocator)
25620b57cec5SDimitry Andric MappingNode( stream.CurrentDoc
25630b57cec5SDimitry Andric , AnchorInfo.Range.substr(1)
25640b57cec5SDimitry Andric , TagInfo.Range
25650b57cec5SDimitry Andric , MappingNode::MT_Block);
25660b57cec5SDimitry Andric case Token::TK_FlowSequenceStart:
25670b57cec5SDimitry Andric getNext();
25680b57cec5SDimitry Andric return new (NodeAllocator)
25690b57cec5SDimitry Andric SequenceNode( stream.CurrentDoc
25700b57cec5SDimitry Andric , AnchorInfo.Range.substr(1)
25710b57cec5SDimitry Andric , TagInfo.Range
25720b57cec5SDimitry Andric , SequenceNode::ST_Flow);
25730b57cec5SDimitry Andric case Token::TK_FlowMappingStart:
25740b57cec5SDimitry Andric getNext();
25750b57cec5SDimitry Andric return new (NodeAllocator)
25760b57cec5SDimitry Andric MappingNode( stream.CurrentDoc
25770b57cec5SDimitry Andric , AnchorInfo.Range.substr(1)
25780b57cec5SDimitry Andric , TagInfo.Range
25790b57cec5SDimitry Andric , MappingNode::MT_Flow);
25800b57cec5SDimitry Andric case Token::TK_Scalar:
25810b57cec5SDimitry Andric getNext();
25820b57cec5SDimitry Andric return new (NodeAllocator)
25830b57cec5SDimitry Andric ScalarNode( stream.CurrentDoc
25840b57cec5SDimitry Andric , AnchorInfo.Range.substr(1)
25850b57cec5SDimitry Andric , TagInfo.Range
25860b57cec5SDimitry Andric , T.Range);
25870b57cec5SDimitry Andric case Token::TK_BlockScalar: {
25880b57cec5SDimitry Andric getNext();
25890b57cec5SDimitry Andric StringRef NullTerminatedStr(T.Value.c_str(), T.Value.length() + 1);
25900b57cec5SDimitry Andric StringRef StrCopy = NullTerminatedStr.copy(NodeAllocator).drop_back();
25910b57cec5SDimitry Andric return new (NodeAllocator)
25920b57cec5SDimitry Andric BlockScalarNode(stream.CurrentDoc, AnchorInfo.Range.substr(1),
25930b57cec5SDimitry Andric TagInfo.Range, StrCopy, T.Range);
25940b57cec5SDimitry Andric }
25950b57cec5SDimitry Andric case Token::TK_Key:
25960b57cec5SDimitry Andric // Don't eat the TK_Key, KeyValueNode expects it.
25970b57cec5SDimitry Andric return new (NodeAllocator)
25980b57cec5SDimitry Andric MappingNode( stream.CurrentDoc
25990b57cec5SDimitry Andric , AnchorInfo.Range.substr(1)
26000b57cec5SDimitry Andric , TagInfo.Range
26010b57cec5SDimitry Andric , MappingNode::MT_Inline);
26020b57cec5SDimitry Andric case Token::TK_DocumentStart:
26030b57cec5SDimitry Andric case Token::TK_DocumentEnd:
26040b57cec5SDimitry Andric case Token::TK_StreamEnd:
26050b57cec5SDimitry Andric default:
26060b57cec5SDimitry Andric // TODO: Properly handle tags. "[!!str ]" should resolve to !!str "", not
26070b57cec5SDimitry Andric // !!null null.
26080b57cec5SDimitry Andric return new (NodeAllocator) NullNode(stream.CurrentDoc);
2609480093f4SDimitry Andric case Token::TK_FlowMappingEnd:
2610480093f4SDimitry Andric case Token::TK_FlowSequenceEnd:
2611480093f4SDimitry Andric case Token::TK_FlowEntry: {
2612480093f4SDimitry Andric if (Root && (isa<MappingNode>(Root) || isa<SequenceNode>(Root)))
2613480093f4SDimitry Andric return new (NodeAllocator) NullNode(stream.CurrentDoc);
2614480093f4SDimitry Andric
2615480093f4SDimitry Andric setError("Unexpected token", T);
2616480093f4SDimitry Andric return nullptr;
2617480093f4SDimitry Andric }
26180b57cec5SDimitry Andric case Token::TK_Error:
26190b57cec5SDimitry Andric return nullptr;
26200b57cec5SDimitry Andric }
26210b57cec5SDimitry Andric llvm_unreachable("Control flow shouldn't reach here.");
26220b57cec5SDimitry Andric return nullptr;
26230b57cec5SDimitry Andric }
26240b57cec5SDimitry Andric
parseDirectives()26250b57cec5SDimitry Andric bool Document::parseDirectives() {
26260b57cec5SDimitry Andric bool isDirective = false;
26270b57cec5SDimitry Andric while (true) {
26280b57cec5SDimitry Andric Token T = peekNext();
26290b57cec5SDimitry Andric if (T.Kind == Token::TK_TagDirective) {
26300b57cec5SDimitry Andric parseTAGDirective();
26310b57cec5SDimitry Andric isDirective = true;
26320b57cec5SDimitry Andric } else if (T.Kind == Token::TK_VersionDirective) {
26330b57cec5SDimitry Andric parseYAMLDirective();
26340b57cec5SDimitry Andric isDirective = true;
26350b57cec5SDimitry Andric } else
26360b57cec5SDimitry Andric break;
26370b57cec5SDimitry Andric }
26380b57cec5SDimitry Andric return isDirective;
26390b57cec5SDimitry Andric }
26400b57cec5SDimitry Andric
parseYAMLDirective()26410b57cec5SDimitry Andric void Document::parseYAMLDirective() {
26420b57cec5SDimitry Andric getNext(); // Eat %YAML <version>
26430b57cec5SDimitry Andric }
26440b57cec5SDimitry Andric
parseTAGDirective()26450b57cec5SDimitry Andric void Document::parseTAGDirective() {
26460b57cec5SDimitry Andric Token Tag = getNext(); // %TAG <handle> <prefix>
26470b57cec5SDimitry Andric StringRef T = Tag.Range;
26480b57cec5SDimitry Andric // Strip %TAG
26490b57cec5SDimitry Andric T = T.substr(T.find_first_of(" \t")).ltrim(" \t");
26500b57cec5SDimitry Andric std::size_t HandleEnd = T.find_first_of(" \t");
26510b57cec5SDimitry Andric StringRef TagHandle = T.substr(0, HandleEnd);
26520b57cec5SDimitry Andric StringRef TagPrefix = T.substr(HandleEnd).ltrim(" \t");
26530b57cec5SDimitry Andric TagMap[TagHandle] = TagPrefix;
26540b57cec5SDimitry Andric }
26550b57cec5SDimitry Andric
expectToken(int TK)26560b57cec5SDimitry Andric bool Document::expectToken(int TK) {
26570b57cec5SDimitry Andric Token T = getNext();
26580b57cec5SDimitry Andric if (T.Kind != TK) {
26590b57cec5SDimitry Andric setError("Unexpected token", T);
26600b57cec5SDimitry Andric return false;
26610b57cec5SDimitry Andric }
26620b57cec5SDimitry Andric return true;
26630b57cec5SDimitry Andric }
2664