xref: /openbsd-src/gnu/llvm/clang/utils/TableGen/ClangCommentHTMLNamedCharacterReferenceEmitter.cpp (revision ec727ea710c91afd8ce4f788c5aaa8482b7b69b2)
1e5dd7070Spatrick //===--- ClangCommentHTMLNamedCharacterReferenceEmitter.cpp -----------------=//
2e5dd7070Spatrick //
3e5dd7070Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e5dd7070Spatrick // See https://llvm.org/LICENSE.txt for license information.
5e5dd7070Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6e5dd7070Spatrick //
7e5dd7070Spatrick //===----------------------------------------------------------------------===//
8e5dd7070Spatrick //
9e5dd7070Spatrick // This tablegen backend emits an efficient function to translate HTML named
10e5dd7070Spatrick // character references to UTF-8 sequences.
11e5dd7070Spatrick //
12e5dd7070Spatrick //===----------------------------------------------------------------------===//
13e5dd7070Spatrick 
14e5dd7070Spatrick #include "TableGenBackends.h"
15e5dd7070Spatrick #include "llvm/ADT/SmallString.h"
16e5dd7070Spatrick #include "llvm/Support/ConvertUTF.h"
17e5dd7070Spatrick #include "llvm/TableGen/Error.h"
18e5dd7070Spatrick #include "llvm/TableGen/Record.h"
19e5dd7070Spatrick #include "llvm/TableGen/StringMatcher.h"
20e5dd7070Spatrick #include "llvm/TableGen/TableGenBackend.h"
21e5dd7070Spatrick #include <vector>
22e5dd7070Spatrick 
23e5dd7070Spatrick using namespace llvm;
24e5dd7070Spatrick 
25e5dd7070Spatrick /// Convert a code point to the corresponding UTF-8 sequence represented
26e5dd7070Spatrick /// as a C string literal.
27e5dd7070Spatrick ///
28e5dd7070Spatrick /// \returns true on success.
translateCodePointToUTF8(unsigned CodePoint,SmallVectorImpl<char> & CLiteral)29e5dd7070Spatrick static bool translateCodePointToUTF8(unsigned CodePoint,
30e5dd7070Spatrick                                      SmallVectorImpl<char> &CLiteral) {
31e5dd7070Spatrick   char Translated[UNI_MAX_UTF8_BYTES_PER_CODE_POINT];
32e5dd7070Spatrick   char *TranslatedPtr = Translated;
33e5dd7070Spatrick   if (!ConvertCodePointToUTF8(CodePoint, TranslatedPtr))
34e5dd7070Spatrick     return false;
35e5dd7070Spatrick 
36e5dd7070Spatrick   StringRef UTF8(Translated, TranslatedPtr - Translated);
37e5dd7070Spatrick 
38e5dd7070Spatrick   raw_svector_ostream OS(CLiteral);
39e5dd7070Spatrick   OS << "\"";
40e5dd7070Spatrick   for (size_t i = 0, e = UTF8.size(); i != e; ++i) {
41e5dd7070Spatrick     OS << "\\x";
42e5dd7070Spatrick     OS.write_hex(static_cast<unsigned char>(UTF8[i]));
43e5dd7070Spatrick   }
44e5dd7070Spatrick   OS << "\"";
45e5dd7070Spatrick 
46e5dd7070Spatrick   return true;
47e5dd7070Spatrick }
48e5dd7070Spatrick 
EmitClangCommentHTMLNamedCharacterReferences(RecordKeeper & Records,raw_ostream & OS)49e5dd7070Spatrick void clang::EmitClangCommentHTMLNamedCharacterReferences(RecordKeeper &Records,
50e5dd7070Spatrick                                                          raw_ostream &OS) {
51e5dd7070Spatrick   std::vector<Record *> Tags = Records.getAllDerivedDefinitions("NCR");
52e5dd7070Spatrick   std::vector<StringMatcher::StringPair> NameToUTF8;
53e5dd7070Spatrick   SmallString<32> CLiteral;
54e5dd7070Spatrick   for (std::vector<Record *>::iterator I = Tags.begin(), E = Tags.end();
55e5dd7070Spatrick        I != E; ++I) {
56e5dd7070Spatrick     Record &Tag = **I;
57*ec727ea7Spatrick     std::string Spelling = std::string(Tag.getValueAsString("Spelling"));
58e5dd7070Spatrick     uint64_t CodePoint = Tag.getValueAsInt("CodePoint");
59e5dd7070Spatrick     CLiteral.clear();
60e5dd7070Spatrick     CLiteral.append("return ");
61e5dd7070Spatrick     if (!translateCodePointToUTF8(CodePoint, CLiteral)) {
62e5dd7070Spatrick       SrcMgr.PrintMessage(Tag.getLoc().front(),
63e5dd7070Spatrick                           SourceMgr::DK_Error,
64e5dd7070Spatrick                           Twine("invalid code point"));
65e5dd7070Spatrick       continue;
66e5dd7070Spatrick     }
67e5dd7070Spatrick     CLiteral.append(";");
68e5dd7070Spatrick 
69*ec727ea7Spatrick     StringMatcher::StringPair Match(Spelling, std::string(CLiteral.str()));
70e5dd7070Spatrick     NameToUTF8.push_back(Match);
71e5dd7070Spatrick   }
72e5dd7070Spatrick 
73e5dd7070Spatrick   emitSourceFileHeader("HTML named character reference to UTF-8 "
74e5dd7070Spatrick                        "translation", OS);
75e5dd7070Spatrick 
76e5dd7070Spatrick   OS << "StringRef translateHTMLNamedCharacterReferenceToUTF8(\n"
77e5dd7070Spatrick         "                                             StringRef Name) {\n";
78e5dd7070Spatrick   StringMatcher("Name", NameToUTF8, OS).Emit();
79e5dd7070Spatrick   OS << "  return StringRef();\n"
80e5dd7070Spatrick      << "}\n\n";
81e5dd7070Spatrick }
82