| #!/usr/bin/env python3 |
| # |
| # Syntax: mkdoc.py [-I<path> ..] [.. a list of header files ..] |
| # |
| # Extract documentation from C++ header files to use it in Python bindings |
| # |
| |
| import os |
| import sys |
| import platform |
| import re |
| import textwrap |
| |
| from clang import cindex |
| from clang.cindex import CursorKind |
| from collections import OrderedDict |
| from glob import glob |
| from threading import Thread, Semaphore |
| from multiprocessing import cpu_count |
| |
| RECURSE_LIST = [ |
| CursorKind.TRANSLATION_UNIT, |
| CursorKind.NAMESPACE, |
| CursorKind.CLASS_DECL, |
| CursorKind.STRUCT_DECL, |
| CursorKind.ENUM_DECL, |
| CursorKind.CLASS_TEMPLATE |
| ] |
| |
| PRINT_LIST = [ |
| CursorKind.CLASS_DECL, |
| CursorKind.STRUCT_DECL, |
| CursorKind.ENUM_DECL, |
| CursorKind.ENUM_CONSTANT_DECL, |
| CursorKind.CLASS_TEMPLATE, |
| CursorKind.FUNCTION_DECL, |
| CursorKind.FUNCTION_TEMPLATE, |
| CursorKind.CONVERSION_FUNCTION, |
| CursorKind.CXX_METHOD, |
| CursorKind.CONSTRUCTOR, |
| CursorKind.FIELD_DECL |
| ] |
| |
| PREFIX_BLACKLIST = [ |
| CursorKind.TRANSLATION_UNIT |
| ] |
| |
| CPP_OPERATORS = { |
| '<=': 'le', '>=': 'ge', '==': 'eq', '!=': 'ne', '[]': 'array', |
| '+=': 'iadd', '-=': 'isub', '*=': 'imul', '/=': 'idiv', '%=': |
| 'imod', '&=': 'iand', '|=': 'ior', '^=': 'ixor', '<<=': 'ilshift', |
| '>>=': 'irshift', '++': 'inc', '--': 'dec', '<<': 'lshift', '>>': |
| 'rshift', '&&': 'land', '||': 'lor', '!': 'lnot', '~': 'bnot', |
| '&': 'band', '|': 'bor', '+': 'add', '-': 'sub', '*': 'mul', '/': |
| 'div', '%': 'mod', '<': 'lt', '>': 'gt', '=': 'assign', '()': 'call' |
| } |
| |
| CPP_OPERATORS = OrderedDict( |
| sorted(CPP_OPERATORS.items(), key=lambda t: -len(t[0]))) |
| |
| job_count = cpu_count() |
| job_semaphore = Semaphore(job_count) |
| |
| |
| class NoFilenamesError(ValueError): |
| pass |
| |
| |
| def d(s): |
| return s if isinstance(s, str) else s.decode('utf8') |
| |
| |
| def sanitize_name(name): |
| name = re.sub(r'type-parameter-0-([0-9]+)', r'T\1', name) |
| for k, v in CPP_OPERATORS.items(): |
| name = name.replace('operator%s' % k, 'operator_%s' % v) |
| name = re.sub('<.*>', '', name) |
| name = ''.join([ch if ch.isalnum() else '_' for ch in name]) |
| name = re.sub('_$', '', re.sub('_+', '_', name)) |
| return '__doc_' + name |
| |
| |
| def process_comment(comment): |
| result = '' |
| |
| # Remove C++ comment syntax |
| leading_spaces = float('inf') |
| for s in comment.expandtabs(tabsize=4).splitlines(): |
| s = s.strip() |
| if s.startswith('/*'): |
| s = s[2:].lstrip('*') |
| elif s.endswith('*/'): |
| s = s[:-2].rstrip('*') |
| elif s.startswith('///'): |
| s = s[3:] |
| if s.startswith('*'): |
| s = s[1:] |
| if len(s) > 0: |
| leading_spaces = min(leading_spaces, len(s) - len(s.lstrip())) |
| result += s + '\n' |
| |
| if leading_spaces != float('inf'): |
| result2 = "" |
| for s in result.splitlines(): |
| result2 += s[leading_spaces:] + '\n' |
| result = result2 |
| |
| # Doxygen tags |
| cpp_group = '([\w:]+)' |
| param_group = '([\[\w:\]]+)' |
| |
| s = result |
| s = re.sub(r'\\c\s+%s' % cpp_group, r'``\1``', s) |
| s = re.sub(r'\\a\s+%s' % cpp_group, r'*\1*', s) |
| s = re.sub(r'\\e\s+%s' % cpp_group, r'*\1*', s) |
| s = re.sub(r'\\em\s+%s' % cpp_group, r'*\1*', s) |
| s = re.sub(r'\\b\s+%s' % cpp_group, r'**\1**', s) |
| s = re.sub(r'\\ingroup\s+%s' % cpp_group, r'', s) |
| s = re.sub(r'\\param%s?\s+%s' % (param_group, cpp_group), |
| r'\n\n$Parameter ``\2``:\n\n', s) |
| s = re.sub(r'\\tparam%s?\s+%s' % (param_group, cpp_group), |
| r'\n\n$Template parameter ``\2``:\n\n', s) |
| |
| for in_, out_ in { |
| 'return': 'Returns', |
| 'author': 'Author', |
| 'authors': 'Authors', |
| 'copyright': 'Copyright', |
| 'date': 'Date', |
| 'remark': 'Remark', |
| 'sa': 'See also', |
| 'see': 'See also', |
| 'extends': 'Extends', |
| 'throw': 'Throws', |
| 'throws': 'Throws' |
| }.items(): |
| s = re.sub(r'\\%s\s*' % in_, r'\n\n$%s:\n\n' % out_, s) |
| |
| s = re.sub(r'\\details\s*', r'\n\n', s) |
| s = re.sub(r'\\brief\s*', r'', s) |
| s = re.sub(r'\\short\s*', r'', s) |
| s = re.sub(r'\\ref\s*', r'', s) |
| |
| s = re.sub(r'\\code\s?(.*?)\s?\\endcode', |
| r"```\n\1\n```\n", s, flags=re.DOTALL) |
| |
| # HTML/TeX tags |
| s = re.sub(r'<tt>(.*?)</tt>', r'``\1``', s, flags=re.DOTALL) |
| s = re.sub(r'<pre>(.*?)</pre>', r"```\n\1\n```\n", s, flags=re.DOTALL) |
| s = re.sub(r'<em>(.*?)</em>', r'*\1*', s, flags=re.DOTALL) |
| s = re.sub(r'<b>(.*?)</b>', r'**\1**', s, flags=re.DOTALL) |
| s = re.sub(r'\\f\$(.*?)\\f\$', r'$\1$', s, flags=re.DOTALL) |
| s = re.sub(r'<li>', r'\n\n* ', s) |
| s = re.sub(r'</?ul>', r'', s) |
| s = re.sub(r'</li>', r'\n\n', s) |
| |
| s = s.replace('``true``', '``True``') |
| s = s.replace('``false``', '``False``') |
| |
| # Re-flow text |
| wrapper = textwrap.TextWrapper() |
| wrapper.expand_tabs = True |
| wrapper.replace_whitespace = True |
| wrapper.drop_whitespace = True |
| wrapper.width = 70 |
| wrapper.initial_indent = wrapper.subsequent_indent = '' |
| |
| result = '' |
| in_code_segment = False |
| for x in re.split(r'(```)', s): |
| if x == '```': |
| if not in_code_segment: |
| result += '```\n' |
| else: |
| result += '\n```\n\n' |
| in_code_segment = not in_code_segment |
| elif in_code_segment: |
| result += x.strip() |
| else: |
| for y in re.split(r'(?: *\n *){2,}', x): |
| wrapped = wrapper.fill(re.sub(r'\s+', ' ', y).strip()) |
| if len(wrapped) > 0 and wrapped[0] == '$': |
| result += wrapped[1:] + '\n' |
| wrapper.initial_indent = \ |
| wrapper.subsequent_indent = ' ' * 4 |
| else: |
| if len(wrapped) > 0: |
| result += wrapped + '\n\n' |
| wrapper.initial_indent = wrapper.subsequent_indent = '' |
| return result.rstrip().lstrip('\n') |
| |
| |
| def extract(filename, node, prefix, output): |
| if not (node.location.file is None or |
| os.path.samefile(d(node.location.file.name), filename)): |
| return 0 |
| if node.kind in RECURSE_LIST: |
| sub_prefix = prefix |
| if node.kind not in PREFIX_BLACKLIST: |
| if len(sub_prefix) > 0: |
| sub_prefix += '_' |
| sub_prefix += d(node.spelling) |
| for i in node.get_children(): |
| extract(filename, i, sub_prefix, output) |
| if node.kind in PRINT_LIST: |
| comment = d(node.raw_comment) if node.raw_comment is not None else '' |
| comment = process_comment(comment) |
| sub_prefix = prefix |
| if len(sub_prefix) > 0: |
| sub_prefix += '_' |
| if len(node.spelling) > 0: |
| name = sanitize_name(sub_prefix + d(node.spelling)) |
| output.append((name, filename, comment)) |
| |
| |
| class ExtractionThread(Thread): |
| def __init__(self, filename, parameters, output): |
| Thread.__init__(self) |
| self.filename = filename |
| self.parameters = parameters |
| self.output = output |
| job_semaphore.acquire() |
| |
| def run(self): |
| print('Processing "%s" ..' % self.filename, file=sys.stderr) |
| try: |
| index = cindex.Index( |
| cindex.conf.lib.clang_createIndex(False, True)) |
| tu = index.parse(self.filename, self.parameters) |
| extract(self.filename, tu.cursor, '', self.output) |
| finally: |
| job_semaphore.release() |
| |
| |
| def read_args(args): |
| parameters = [] |
| filenames = [] |
| if "-x" not in args: |
| parameters.extend(['-x', 'c++']) |
| if not any(it.startswith("-std=") for it in args): |
| parameters.append('-std=c++11') |
| |
| if platform.system() == 'Darwin': |
| dev_path = '/Applications/Xcode.app/Contents/Developer/' |
| lib_dir = dev_path + 'Toolchains/XcodeDefault.xctoolchain/usr/lib/' |
| sdk_dir = dev_path + 'Platforms/MacOSX.platform/Developer/SDKs' |
| libclang = lib_dir + 'libclang.dylib' |
| |
| if os.path.exists(libclang): |
| cindex.Config.set_library_path(os.path.dirname(libclang)) |
| |
| if os.path.exists(sdk_dir): |
| sysroot_dir = os.path.join(sdk_dir, next(os.walk(sdk_dir))[1][0]) |
| parameters.append('-isysroot') |
| parameters.append(sysroot_dir) |
| elif platform.system() == 'Linux': |
| # clang doesn't find its own base includes by default on Linux, |
| # but different distros install them in different paths. |
| # Try to autodetect, preferring the highest numbered version. |
| def clang_folder_version(d): |
| return [int(ver) for ver in re.findall(r'(?<!lib)(?<!\d)\d+', d)] |
| clang_include_dir = max(( |
| path |
| for libdir in ['lib64', 'lib', 'lib32'] |
| for path in glob('/usr/%s/clang/*/include' % libdir) |
| if os.path.isdir(path) |
| ), default=None, key=clang_folder_version) |
| if clang_include_dir: |
| parameters.extend(['-isystem', clang_include_dir]) |
| |
| for item in args: |
| if item.startswith('-'): |
| parameters.append(item) |
| else: |
| filenames.append(item) |
| |
| if len(filenames) == 0: |
| raise NoFilenamesError("args parameter did not contain any filenames") |
| |
| return parameters, filenames |
| |
| |
| def extract_all(args): |
| parameters, filenames = read_args(args) |
| output = [] |
| for filename in filenames: |
| thr = ExtractionThread(filename, parameters, output) |
| thr.start() |
| |
| print('Waiting for jobs to finish ..', file=sys.stderr) |
| for i in range(job_count): |
| job_semaphore.acquire() |
| |
| return output |
| |
| |
| def write_header(comments, out_file=sys.stdout): |
| print('''/* |
| This file contains docstrings for the Python bindings. |
| Do not edit! These were automatically extracted by mkdoc.py |
| */ |
| |
| #define __EXPAND(x) x |
| #define __COUNT(_1, _2, _3, _4, _5, _6, _7, COUNT, ...) COUNT |
| #define __VA_SIZE(...) __EXPAND(__COUNT(__VA_ARGS__, 7, 6, 5, 4, 3, 2, 1)) |
| #define __CAT1(a, b) a ## b |
| #define __CAT2(a, b) __CAT1(a, b) |
| #define __DOC1(n1) __doc_##n1 |
| #define __DOC2(n1, n2) __doc_##n1##_##n2 |
| #define __DOC3(n1, n2, n3) __doc_##n1##_##n2##_##n3 |
| #define __DOC4(n1, n2, n3, n4) __doc_##n1##_##n2##_##n3##_##n4 |
| #define __DOC5(n1, n2, n3, n4, n5) __doc_##n1##_##n2##_##n3##_##n4##_##n5 |
| #define __DOC6(n1, n2, n3, n4, n5, n6) __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6 |
| #define __DOC7(n1, n2, n3, n4, n5, n6, n7) __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6##_##n7 |
| #define DOC(...) __EXPAND(__EXPAND(__CAT2(__DOC, __VA_SIZE(__VA_ARGS__)))(__VA_ARGS__)) |
| |
| #if defined(__GNUG__) |
| #pragma GCC diagnostic push |
| #pragma GCC diagnostic ignored "-Wunused-variable" |
| #endif |
| ''', file=out_file) |
| |
| |
| name_ctr = 1 |
| name_prev = None |
| for name, _, comment in list(sorted(comments, key=lambda x: (x[0], x[1]))): |
| if name == name_prev: |
| name_ctr += 1 |
| name = name + "_%i" % name_ctr |
| else: |
| name_prev = name |
| name_ctr = 1 |
| print('\nstatic const char *%s =%sR"doc(%s)doc";' % |
| (name, '\n' if '\n' in comment else ' ', comment), file=out_file) |
| |
| print(''' |
| #if defined(__GNUG__) |
| #pragma GCC diagnostic pop |
| #endif |
| ''', file=out_file) |
| |
| |
| def mkdoc(args): |
| args = list(args) |
| out_path = None |
| for idx, arg in enumerate(args): |
| if arg.startswith("-o"): |
| args.remove(arg) |
| try: |
| out_path = arg[2:] or args.pop(idx) |
| except IndexError: |
| print("-o flag requires an argument") |
| exit(-1) |
| break |
| |
| comments = extract_all(args) |
| |
| if out_path: |
| try: |
| with open(out_path, 'w') as out_file: |
| write_header(comments, out_file) |
| except: |
| # In the event of an error, don't leave a partially-written |
| # output file. |
| try: |
| os.unlink(out_path) |
| except: |
| pass |
| raise |
| else: |
| write_header(comments) |
| |
| |
| if __name__ == '__main__': |
| try: |
| mkdoc(sys.argv[1:]) |
| except NoFilenamesError: |
| print('Syntax: %s [.. a list of header files ..]' % sys.argv[0]) |
| exit(-1) |