xref: /openbsd-src/gnu/llvm/clang/tools/include-mapping/gen_std.py (revision 12c855180aad702bbcca06e0398d774beeafb155)
1*12c85518Srobert#!/usr/bin/env python
2*12c85518Srobert#===- gen_std.py -  ------------------------------------------*- python -*--===#
3*12c85518Srobert#
4*12c85518Srobert# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5*12c85518Srobert# See https://llvm.org/LICENSE.txt for license information.
6*12c85518Srobert# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7*12c85518Srobert#
8*12c85518Srobert#===------------------------------------------------------------------------===#
9*12c85518Srobert
10*12c85518Srobert"""gen_std.py is a tool to generate a lookup table (from qualified names to
11*12c85518Srobertinclude headers) for C/C++ Standard Library symbols by parsing archived HTML
12*12c85518Srobertfiles from cppreference.
13*12c85518Srobert
14*12c85518SrobertThe generated files are located in clang/include/Tooling/Inclusions.
15*12c85518Srobert
16*12c85518SrobertCaveats and FIXMEs:
17*12c85518Srobert  - only symbols directly in "std" namespace are added, we should also add std's
18*12c85518Srobert    subnamespace symbols (e.g. chrono).
19*12c85518Srobert  - symbols with multiple variants or defined in multiple headers aren't added,
20*12c85518Srobert    e.g. std::move, std::swap
21*12c85518Srobert
22*12c85518SrobertUsage:
23*12c85518Srobert  1. Install BeautifulSoup dependency, see instruction:
24*12c85518Srobert       https://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-beautiful-soup
25*12c85518Srobert  2. Download cppreference offline HTML files (e.g. html_book_20181028.zip) at
26*12c85518Srobert       https://en.cppreference.com/w/Cppreference:Archives
27*12c85518Srobert  3. Unzip the zip file from step 2 (e.g., to a "cppreference" directory). You should
28*12c85518Srobert     get a "cppreference/reference" directory.
29*12c85518Srobert  4. Run the command:
30*12c85518Srobert       // Generate C++ symbols
31*12c85518Srobert       python3 gen_std.py -cppreference cppreference/reference -symbols=cpp > StdSymbolMap.inc
32*12c85518Srobert       // Generate C++ removed symbols
33*12c85518Srobert       python3 gen_std.py -cppreference cppreference/reference -symbols=cpp_removed > RemovedSymbolMap.inc
34*12c85518Srobert       // Generate C symbols
35*12c85518Srobert       python3 gen_std.py -cppreference cppreference/reference -symbols=c > CSymbolMap.inc
36*12c85518Srobert"""
37*12c85518Srobert
38*12c85518Srobert
39*12c85518Srobertimport cppreference_parser
40*12c85518Srobertimport argparse
41*12c85518Srobertimport datetime
42*12c85518Srobertimport os
43*12c85518Srobertimport sys
44*12c85518Srobert
45*12c85518SrobertCODE_PREFIX = """\
46*12c85518Srobert//===-- gen_std.py generated file -------------------------------*- C++ -*-===//
47*12c85518Srobert//
48*12c85518Srobert// Used to build a lookup table (qualified names => include headers) for %s
49*12c85518Srobert// Standard Library symbols.
50*12c85518Srobert//
51*12c85518Srobert// This file was generated automatically by
52*12c85518Srobert// clang/tools/include-mapping/gen_std.py, DO NOT EDIT!
53*12c85518Srobert//
54*12c85518Srobert// Generated from cppreference offline HTML book (modified on %s).
55*12c85518Srobert//===----------------------------------------------------------------------===//
56*12c85518Srobert"""
57*12c85518Srobert
58*12c85518Srobertdef ParseArg():
59*12c85518Srobert  parser = argparse.ArgumentParser(description='Generate StdGen file')
60*12c85518Srobert  parser.add_argument('-cppreference', metavar='PATH',
61*12c85518Srobert                      default='',
62*12c85518Srobert                      help='path to the cppreference offline HTML directory',
63*12c85518Srobert                      required=True
64*12c85518Srobert                     )
65*12c85518Srobert  parser.add_argument('-symbols',
66*12c85518Srobert                      default='cpp',
67*12c85518Srobert                      help='Generate c or cpp (removed) symbols. One of {cpp, c, cpp_removed}.',
68*12c85518Srobert                      required=True)
69*12c85518Srobert  return parser.parse_args()
70*12c85518Srobert
71*12c85518Srobert
72*12c85518Srobertdef main():
73*12c85518Srobert  args = ParseArg()
74*12c85518Srobert  if args.symbols == 'cpp':
75*12c85518Srobert    page_root = os.path.join(args.cppreference, "en", "cpp")
76*12c85518Srobert    symbol_index_root = os.path.join(page_root, "symbol_index")
77*12c85518Srobert    parse_pages =  [
78*12c85518Srobert      (page_root, "symbol_index.html", "std::"),
79*12c85518Srobert      # std sub-namespace symbols have separated pages.
80*12c85518Srobert      # We don't index std literal operators (e.g.
81*12c85518Srobert      # std::literals::chrono_literals::operator""d), these symbols can't be
82*12c85518Srobert      # accessed by std::<symbol_name>.
83*12c85518Srobert      # FIXME: index std::placeholders symbols, placeholders.html page is
84*12c85518Srobert      # different (which contains one entry for _1, _2, ..., _N), we need special
85*12c85518Srobert      # handling.
86*12c85518Srobert      (symbol_index_root, "chrono.html", "std::chrono::"),
87*12c85518Srobert      (symbol_index_root, "filesystem.html", "std::filesystem::"),
88*12c85518Srobert      (symbol_index_root, "pmr.html", "std::pmr::"),
89*12c85518Srobert      (symbol_index_root, "regex_constants.html", "std::regex_constants::"),
90*12c85518Srobert      (symbol_index_root, "this_thread.html", "std::this_thread::"),
91*12c85518Srobert    ]
92*12c85518Srobert  elif args.symbols == 'cpp_removed':
93*12c85518Srobert    page_root = os.path.join(args.cppreference, "en", "cpp")
94*12c85518Srobert    symbol_index_root = os.path.join(page_root, "symbol_index")
95*12c85518Srobert    parse_pages = [(symbol_index_root, "zombie_names.html", "std::")]
96*12c85518Srobert  elif args.symbols == 'c':
97*12c85518Srobert    page_root = os.path.join(args.cppreference, "en", "c")
98*12c85518Srobert    symbol_index_root = page_root
99*12c85518Srobert    parse_pages = [(page_root, "index.html", None)]
100*12c85518Srobert
101*12c85518Srobert  if not os.path.exists(symbol_index_root):
102*12c85518Srobert    exit("Path %s doesn't exist!" % symbol_index_root)
103*12c85518Srobert
104*12c85518Srobert  symbols = cppreference_parser.GetSymbols(parse_pages)
105*12c85518Srobert
106*12c85518Srobert  # We don't have version information from the unzipped offline HTML files.
107*12c85518Srobert  # so we use the modified time of the symbol_index.html as the version.
108*12c85518Srobert  index_page_path = os.path.join(page_root, "index.html")
109*12c85518Srobert  cppreference_modified_date = datetime.datetime.fromtimestamp(
110*12c85518Srobert    os.stat(index_page_path).st_mtime).strftime('%Y-%m-%d')
111*12c85518Srobert  print(CODE_PREFIX % (args.symbols.upper(), cppreference_modified_date))
112*12c85518Srobert  for symbol in symbols:
113*12c85518Srobert    if len(symbol.headers) == 1:
114*12c85518Srobert      # SYMBOL(unqualified_name, namespace, header)
115*12c85518Srobert      print("SYMBOL(%s, %s, %s)" % (symbol.name, symbol.namespace,
116*12c85518Srobert                                    symbol.headers[0]))
117*12c85518Srobert    elif len(symbol.headers) == 0:
118*12c85518Srobert      sys.stderr.write("No header found for symbol %s\n" % symbol.name)
119*12c85518Srobert    else:
120*12c85518Srobert      # FIXME: support symbols with multiple headers (e.g. std::move).
121*12c85518Srobert      sys.stderr.write("Ambiguous header for symbol %s: %s\n" % (
122*12c85518Srobert          symbol.name, ', '.join(symbol.headers)))
123*12c85518Srobert
124*12c85518Srobert
125*12c85518Srobertif __name__ == '__main__':
126*12c85518Srobert  main()
127