1 //===-- ClangCommentHTMLNamedCharacterReferenceEmitter.cpp ----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This tablegen backend emits an efficient function to translate HTML named 10 // character references to UTF-8 sequences. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "TableGenBackends.h" 15 #include "llvm/ADT/SmallString.h" 16 #include "llvm/Support/ConvertUTF.h" 17 #include "llvm/TableGen/Error.h" 18 #include "llvm/TableGen/Record.h" 19 #include "llvm/TableGen/StringMatcher.h" 20 #include "llvm/TableGen/TableGenBackend.h" 21 #include <vector> 22 23 using namespace llvm; 24 25 /// Convert a code point to the corresponding UTF-8 sequence represented 26 /// as a C string literal. 27 /// 28 /// \returns true on success. 29 static bool translateCodePointToUTF8(unsigned CodePoint, 30 SmallVectorImpl<char> &CLiteral) { 31 char Translated[UNI_MAX_UTF8_BYTES_PER_CODE_POINT]; 32 char *TranslatedPtr = Translated; 33 if (!ConvertCodePointToUTF8(CodePoint, TranslatedPtr)) 34 return false; 35 36 StringRef UTF8(Translated, TranslatedPtr - Translated); 37 38 raw_svector_ostream OS(CLiteral); 39 OS << "\""; 40 for (size_t i = 0, e = UTF8.size(); i != e; ++i) { 41 OS << "\\x"; 42 OS.write_hex(static_cast<unsigned char>(UTF8[i])); 43 } 44 OS << "\""; 45 46 return true; 47 } 48 49 void clang::EmitClangCommentHTMLNamedCharacterReferences( 50 const RecordKeeper &Records, raw_ostream &OS) { 51 std::vector<StringMatcher::StringPair> NameToUTF8; 52 SmallString<32> CLiteral; 53 for (const Record *Tag : Records.getAllDerivedDefinitions("NCR")) { 54 std::string Spelling = Tag->getValueAsString("Spelling").str(); 55 uint64_t CodePoint = Tag->getValueAsInt("CodePoint"); 56 CLiteral.clear(); 57 CLiteral.append("return "); 58 if (!translateCodePointToUTF8(CodePoint, CLiteral)) { 59 SrcMgr.PrintMessage(Tag->getLoc().front(), SourceMgr::DK_Error, 60 Twine("invalid code point")); 61 continue; 62 } 63 CLiteral.append(";"); 64 65 StringMatcher::StringPair Match(Spelling, std::string(CLiteral)); 66 NameToUTF8.push_back(Match); 67 } 68 69 emitSourceFileHeader("HTML named character reference to UTF-8 translation", 70 OS, Records); 71 72 OS << "StringRef translateHTMLNamedCharacterReferenceToUTF8(\n" 73 " StringRef Name) {\n"; 74 StringMatcher("Name", NameToUTF8, OS).Emit(); 75 OS << " return StringRef();\n" 76 << "}\n\n"; 77 } 78