xref: /minix3/external/bsd/llvm/dist/clang/utils/TableGen/ClangCommentHTMLNamedCharacterReferenceEmitter.cpp (revision f4a2713ac843a11c696ec80c0a5e3e5d80b4d338)
1*f4a2713aSLionel Sambuc //===--- ClangCommentHTMLNamedCharacterReferenceEmitter.cpp -----------------=//
2*f4a2713aSLionel Sambuc //
3*f4a2713aSLionel Sambuc //                     The LLVM Compiler Infrastructure
4*f4a2713aSLionel Sambuc //
5*f4a2713aSLionel Sambuc // This file is distributed under the University of Illinois Open Source
6*f4a2713aSLionel Sambuc // License. See LICENSE.TXT for details.
7*f4a2713aSLionel Sambuc //
8*f4a2713aSLionel Sambuc //===----------------------------------------------------------------------===//
9*f4a2713aSLionel Sambuc //
10*f4a2713aSLionel Sambuc // This tablegen backend emits an fficient function to translate HTML named
11*f4a2713aSLionel Sambuc // character references to UTF-8 sequences.
12*f4a2713aSLionel Sambuc //
13*f4a2713aSLionel Sambuc //===----------------------------------------------------------------------===//
14*f4a2713aSLionel Sambuc 
15*f4a2713aSLionel Sambuc #include "llvm/ADT/SmallString.h"
16*f4a2713aSLionel Sambuc #include "llvm/Support/ConvertUTF.h"
17*f4a2713aSLionel Sambuc #include "llvm/TableGen/Error.h"
18*f4a2713aSLionel Sambuc #include "llvm/TableGen/Record.h"
19*f4a2713aSLionel Sambuc #include "llvm/TableGen/StringMatcher.h"
20*f4a2713aSLionel Sambuc #include "llvm/TableGen/TableGenBackend.h"
21*f4a2713aSLionel Sambuc #include <vector>
22*f4a2713aSLionel Sambuc 
23*f4a2713aSLionel Sambuc using namespace llvm;
24*f4a2713aSLionel Sambuc 
25*f4a2713aSLionel Sambuc /// \brief Convert a code point to the corresponding UTF-8 sequence represented
26*f4a2713aSLionel Sambuc /// as a C string literal.
27*f4a2713aSLionel Sambuc ///
28*f4a2713aSLionel Sambuc /// \returns true on success.
translateCodePointToUTF8(unsigned CodePoint,SmallVectorImpl<char> & CLiteral)29*f4a2713aSLionel Sambuc static bool translateCodePointToUTF8(unsigned CodePoint,
30*f4a2713aSLionel Sambuc                                      SmallVectorImpl<char> &CLiteral) {
31*f4a2713aSLionel Sambuc   char Translated[UNI_MAX_UTF8_BYTES_PER_CODE_POINT];
32*f4a2713aSLionel Sambuc   char *TranslatedPtr = Translated;
33*f4a2713aSLionel Sambuc   if (!ConvertCodePointToUTF8(CodePoint, TranslatedPtr))
34*f4a2713aSLionel Sambuc     return false;
35*f4a2713aSLionel Sambuc 
36*f4a2713aSLionel Sambuc   StringRef UTF8(Translated, TranslatedPtr - Translated);
37*f4a2713aSLionel Sambuc 
38*f4a2713aSLionel Sambuc   raw_svector_ostream OS(CLiteral);
39*f4a2713aSLionel Sambuc   OS << "\"";
40*f4a2713aSLionel Sambuc   for (size_t i = 0, e = UTF8.size(); i != e; ++i) {
41*f4a2713aSLionel Sambuc     OS << "\\x";
42*f4a2713aSLionel Sambuc     OS.write_hex(static_cast<unsigned char>(UTF8[i]));
43*f4a2713aSLionel Sambuc   }
44*f4a2713aSLionel Sambuc   OS << "\"";
45*f4a2713aSLionel Sambuc 
46*f4a2713aSLionel Sambuc   return true;
47*f4a2713aSLionel Sambuc }
48*f4a2713aSLionel Sambuc 
49*f4a2713aSLionel Sambuc namespace clang {
EmitClangCommentHTMLNamedCharacterReferences(RecordKeeper & Records,raw_ostream & OS)50*f4a2713aSLionel Sambuc void EmitClangCommentHTMLNamedCharacterReferences(RecordKeeper &Records,
51*f4a2713aSLionel Sambuc                                                   raw_ostream &OS) {
52*f4a2713aSLionel Sambuc   std::vector<Record *> Tags = Records.getAllDerivedDefinitions("NCR");
53*f4a2713aSLionel Sambuc   std::vector<StringMatcher::StringPair> NameToUTF8;
54*f4a2713aSLionel Sambuc   SmallString<32> CLiteral;
55*f4a2713aSLionel Sambuc   for (std::vector<Record *>::iterator I = Tags.begin(), E = Tags.end();
56*f4a2713aSLionel Sambuc        I != E; ++I) {
57*f4a2713aSLionel Sambuc     Record &Tag = **I;
58*f4a2713aSLionel Sambuc     std::string Spelling = Tag.getValueAsString("Spelling");
59*f4a2713aSLionel Sambuc     uint64_t CodePoint = Tag.getValueAsInt("CodePoint");
60*f4a2713aSLionel Sambuc     CLiteral.clear();
61*f4a2713aSLionel Sambuc     CLiteral.append("return ");
62*f4a2713aSLionel Sambuc     if (!translateCodePointToUTF8(CodePoint, CLiteral)) {
63*f4a2713aSLionel Sambuc       SrcMgr.PrintMessage(Tag.getLoc().front(),
64*f4a2713aSLionel Sambuc                           SourceMgr::DK_Error,
65*f4a2713aSLionel Sambuc                           Twine("invalid code point"));
66*f4a2713aSLionel Sambuc       continue;
67*f4a2713aSLionel Sambuc     }
68*f4a2713aSLionel Sambuc     CLiteral.append(";");
69*f4a2713aSLionel Sambuc 
70*f4a2713aSLionel Sambuc     StringMatcher::StringPair Match(Spelling, CLiteral.str());
71*f4a2713aSLionel Sambuc     NameToUTF8.push_back(Match);
72*f4a2713aSLionel Sambuc   }
73*f4a2713aSLionel Sambuc 
74*f4a2713aSLionel Sambuc   emitSourceFileHeader("HTML named character reference to UTF-8 "
75*f4a2713aSLionel Sambuc                        "translation", OS);
76*f4a2713aSLionel Sambuc 
77*f4a2713aSLionel Sambuc   OS << "StringRef translateHTMLNamedCharacterReferenceToUTF8(\n"
78*f4a2713aSLionel Sambuc         "                                             StringRef Name) {\n";
79*f4a2713aSLionel Sambuc   StringMatcher("Name", NameToUTF8, OS).Emit();
80*f4a2713aSLionel Sambuc   OS << "  return StringRef();\n"
81*f4a2713aSLionel Sambuc      << "}\n\n";
82*f4a2713aSLionel Sambuc }
83*f4a2713aSLionel Sambuc 
84*f4a2713aSLionel Sambuc } // end namespace clang
85*f4a2713aSLionel Sambuc 
86