1e5dd7070Spatrick //===--- ClangCommentHTMLNamedCharacterReferenceEmitter.cpp -----------------=//
2e5dd7070Spatrick //
3e5dd7070Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e5dd7070Spatrick // See https://llvm.org/LICENSE.txt for license information.
5e5dd7070Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6e5dd7070Spatrick //
7e5dd7070Spatrick //===----------------------------------------------------------------------===//
8e5dd7070Spatrick //
9e5dd7070Spatrick // This tablegen backend emits an efficient function to translate HTML named
10e5dd7070Spatrick // character references to UTF-8 sequences.
11e5dd7070Spatrick //
12e5dd7070Spatrick //===----------------------------------------------------------------------===//
13e5dd7070Spatrick
14e5dd7070Spatrick #include "TableGenBackends.h"
15e5dd7070Spatrick #include "llvm/ADT/SmallString.h"
16e5dd7070Spatrick #include "llvm/Support/ConvertUTF.h"
17e5dd7070Spatrick #include "llvm/TableGen/Error.h"
18e5dd7070Spatrick #include "llvm/TableGen/Record.h"
19e5dd7070Spatrick #include "llvm/TableGen/StringMatcher.h"
20e5dd7070Spatrick #include "llvm/TableGen/TableGenBackend.h"
21e5dd7070Spatrick #include <vector>
22e5dd7070Spatrick
23e5dd7070Spatrick using namespace llvm;
24e5dd7070Spatrick
25e5dd7070Spatrick /// Convert a code point to the corresponding UTF-8 sequence represented
26e5dd7070Spatrick /// as a C string literal.
27e5dd7070Spatrick ///
28e5dd7070Spatrick /// \returns true on success.
translateCodePointToUTF8(unsigned CodePoint,SmallVectorImpl<char> & CLiteral)29e5dd7070Spatrick static bool translateCodePointToUTF8(unsigned CodePoint,
30e5dd7070Spatrick SmallVectorImpl<char> &CLiteral) {
31e5dd7070Spatrick char Translated[UNI_MAX_UTF8_BYTES_PER_CODE_POINT];
32e5dd7070Spatrick char *TranslatedPtr = Translated;
33e5dd7070Spatrick if (!ConvertCodePointToUTF8(CodePoint, TranslatedPtr))
34e5dd7070Spatrick return false;
35e5dd7070Spatrick
36e5dd7070Spatrick StringRef UTF8(Translated, TranslatedPtr - Translated);
37e5dd7070Spatrick
38e5dd7070Spatrick raw_svector_ostream OS(CLiteral);
39e5dd7070Spatrick OS << "\"";
40e5dd7070Spatrick for (size_t i = 0, e = UTF8.size(); i != e; ++i) {
41e5dd7070Spatrick OS << "\\x";
42e5dd7070Spatrick OS.write_hex(static_cast<unsigned char>(UTF8[i]));
43e5dd7070Spatrick }
44e5dd7070Spatrick OS << "\"";
45e5dd7070Spatrick
46e5dd7070Spatrick return true;
47e5dd7070Spatrick }
48e5dd7070Spatrick
EmitClangCommentHTMLNamedCharacterReferences(RecordKeeper & Records,raw_ostream & OS)49e5dd7070Spatrick void clang::EmitClangCommentHTMLNamedCharacterReferences(RecordKeeper &Records,
50e5dd7070Spatrick raw_ostream &OS) {
51e5dd7070Spatrick std::vector<Record *> Tags = Records.getAllDerivedDefinitions("NCR");
52e5dd7070Spatrick std::vector<StringMatcher::StringPair> NameToUTF8;
53e5dd7070Spatrick SmallString<32> CLiteral;
54e5dd7070Spatrick for (std::vector<Record *>::iterator I = Tags.begin(), E = Tags.end();
55e5dd7070Spatrick I != E; ++I) {
56e5dd7070Spatrick Record &Tag = **I;
57*ec727ea7Spatrick std::string Spelling = std::string(Tag.getValueAsString("Spelling"));
58e5dd7070Spatrick uint64_t CodePoint = Tag.getValueAsInt("CodePoint");
59e5dd7070Spatrick CLiteral.clear();
60e5dd7070Spatrick CLiteral.append("return ");
61e5dd7070Spatrick if (!translateCodePointToUTF8(CodePoint, CLiteral)) {
62e5dd7070Spatrick SrcMgr.PrintMessage(Tag.getLoc().front(),
63e5dd7070Spatrick SourceMgr::DK_Error,
64e5dd7070Spatrick Twine("invalid code point"));
65e5dd7070Spatrick continue;
66e5dd7070Spatrick }
67e5dd7070Spatrick CLiteral.append(";");
68e5dd7070Spatrick
69*ec727ea7Spatrick StringMatcher::StringPair Match(Spelling, std::string(CLiteral.str()));
70e5dd7070Spatrick NameToUTF8.push_back(Match);
71e5dd7070Spatrick }
72e5dd7070Spatrick
73e5dd7070Spatrick emitSourceFileHeader("HTML named character reference to UTF-8 "
74e5dd7070Spatrick "translation", OS);
75e5dd7070Spatrick
76e5dd7070Spatrick OS << "StringRef translateHTMLNamedCharacterReferenceToUTF8(\n"
77e5dd7070Spatrick " StringRef Name) {\n";
78e5dd7070Spatrick StringMatcher("Name", NameToUTF8, OS).Emit();
79e5dd7070Spatrick OS << " return StringRef();\n"
80e5dd7070Spatrick << "}\n\n";
81e5dd7070Spatrick }
82