| #!/usr/bin/env python3 |
| |
| import os |
| import re |
| import sys |
| |
| from file_types import lang_type, find_files |
| |
| mode_line = re.compile('(-\*- *mode:.* *-\*-)') |
| shell_comment = re.compile(r'^\s*#') |
| lisp_comment = re.compile(r';') |
| cpp_comment = re.compile(r'//') |
| c_comment_start = re.compile(r'/\*') |
| c_comment_end = re.compile(r'\*/') |
| def find_copyright_block(lines, lang_type): |
| start = None |
| if lang_type in ('python', 'make', 'shell', 'perl', 'scons'): |
| for i,line in enumerate(lines): |
| if i == 0 and (line.startswith('#!') or mode_line.search(line)): |
| continue |
| |
| if shell_comment.search(line): |
| if start is None: |
| start = i |
| elif start is None: |
| if line.strip(): |
| return |
| else: |
| yield start, i-1 |
| start = None |
| |
| elif lang_type in ('lisp', ): |
| for i,line in enumerate(lines): |
| if i == 0 and mode_line.search(line): |
| continue |
| |
| if lisp_comment.search(line): |
| if start is None: |
| start = i |
| elif start is None: |
| if line.strip(): |
| return |
| else: |
| yield start, i-1 |
| start = None |
| |
| elif lang_type in ('C', 'C++', 'swig', 'isa', 'asm', 'slicc', |
| 'lex', 'yacc'): |
| mode = None |
| for i,line in enumerate(lines): |
| if i == 0 and mode_line.search(line): |
| continue |
| |
| if mode == 'C': |
| assert start is not None, 'on line %d' % (i + 1) |
| match = c_comment_end.search(line) |
| if match: |
| yield start, i |
| mode = None |
| continue |
| |
| cpp_match = cpp_comment.search(line) |
| c_match = c_comment_start.search(line) |
| |
| if cpp_match: |
| assert not c_match, 'on line %d' % (i + 1) |
| if line[:cpp_match.start()].strip(): |
| return |
| if mode is None: |
| mode = 'CPP' |
| start = i |
| else: |
| text = line[cpp_match.end():].lstrip() |
| if text.startswith("Copyright") > 0: |
| yield start, i-1 |
| start = i |
| continue |
| elif mode == 'CPP': |
| assert start is not None, 'on line %d' % (i + 1) |
| if not line.strip(): |
| continue |
| yield start, i-1 |
| mode = None |
| if not c_match: |
| return |
| |
| if c_match: |
| assert mode is None, 'on line %d' % (i + 1) |
| mode = 'C' |
| start = i |
| |
| if mode is None and line.strip(): |
| return |
| |
| else: |
| raise AttributeError("Could not handle language %s" % lang_type) |
| |
| date_range_re = re.compile(r'([0-9]{4})\s*-\s*([0-9]{4})') |
| def process_dates(dates): |
| dates = [ d.strip() for d in dates.split(',') ] |
| |
| output = set() |
| for date in dates: |
| match = date_range_re.match(date) |
| if match: |
| f,l = [ int(d) for d in match.groups() ] |
| for i in range(f, l+1): |
| output.add(i) |
| else: |
| try: |
| date = int(date) |
| output.add(date) |
| except ValueError: |
| pass |
| |
| return output |
| |
| copyright_re = \ |
| re.compile(r'Copyright (\([cC]\)) ([-, 0-9]+)[\s*#/]*([A-z-,. ]+)', |
| re.DOTALL) |
| |
| authors_re = re.compile(r'^[\s*#/]*Authors:\s*([A-z .]+)\s*$') |
| more_authors_re = re.compile(r'^[\s*#/]*([A-z .]+)\s*$') |
| |
| all_owners = set() |
| def get_data(lang_type, lines): |
| data = [] |
| last = None |
| for start,end in find_copyright_block(lines, lang_type): |
| joined = ''.join(lines[start:end+1]) |
| match = copyright_re.search(joined) |
| if not match: |
| continue |
| |
| c,dates,owner = match.groups() |
| dates = dates.strip() |
| owner = owner.strip() |
| |
| all_owners.add(owner) |
| try: |
| dates = process_dates(dates) |
| except Exception: |
| print(dates) |
| print(owner) |
| raise |
| |
| authors = [] |
| for i in range(start,end+1): |
| line = lines[i] |
| if not authors: |
| match = authors_re.search(line) |
| if match: |
| authors.append(match.group(1).strip()) |
| else: |
| match = more_authors_re.search(line) |
| if not match: |
| for j in range(i, end+1): |
| line = lines[j].strip() |
| if not line: |
| end = j |
| break |
| if line.startswith('//'): |
| line = line[2:].lstrip() |
| if line: |
| end = j - 1 |
| break |
| break |
| authors.append(match.group(1).strip()) |
| |
| info = (owner, dates, authors, start, end) |
| data.append(info) |
| |
| return data |
| |
| def datestr(dates): |
| dates = list(dates) |
| dates.sort() |
| |
| output = [] |
| def add_output(first, second): |
| if first == second: |
| output.append('%d' % (first)) |
| else: |
| output.append('%d-%d' % (first, second)) |
| |
| first = dates.pop(0) |
| second = first |
| while dates: |
| next = dates.pop(0) |
| if next == second + 1: |
| second = next |
| else: |
| add_output(first, second) |
| first = next |
| second = next |
| |
| add_output(first, second) |
| |
| return ','.join(output) |
| |
| usage_str = """usage: |
| %s [-v] <directory>""" |
| |
| def usage(exitcode): |
| print(usage_str % sys.argv[0]) |
| if exitcode is not None: |
| sys.exit(exitcode) |
| |
| if __name__ == '__main__': |
| import getopt |
| |
| show_counts = False |
| ignore = set() |
| verbose = False |
| try: |
| opts, args = getopt.getopt(sys.argv[1:], "ci:v") |
| except getopt.GetoptError: |
| usage(1) |
| |
| for o,a in opts: |
| if o == '-c': |
| show_counts = True |
| if o == '-i': |
| ignore.add(a) |
| if o == '-v': |
| verbose = True |
| |
| files = [] |
| |
| for base in args: |
| if os.path.isfile(base): |
| files += [ (base, lang_type(base)) ] |
| elif os.path.isdir(base): |
| files += find_files(base) |
| else: |
| raise AttributeError("can't access '%s'" % base) |
| |
| copyrights = {} |
| counts = {} |
| |
| for filename, lang in files: |
| f = file(filename, 'r') |
| lines = f.readlines() |
| if not lines: |
| continue |
| |
| lines = [ line.rstrip('\r\n') for line in lines ] |
| |
| lt = lang_type(filename, lines[0]) |
| try: |
| data = get_data(lt, lines) |
| except Exception as e: |
| if verbose: |
| if len(e.args) == 1: |
| e.args = ('%s (%s))' % (e, filename), ) |
| print("could not parse %s: %s" % (filename, e)) |
| continue |
| |
| for owner, dates, authors, start, end in data: |
| if owner not in copyrights: |
| copyrights[owner] = set() |
| if owner not in counts: |
| counts[owner] = 0 |
| |
| copyrights[owner] |= dates |
| counts[owner] += 1 |
| |
| info = [ (counts[o], d, o) for o,d in list(copyrights.items()) ] |
| |
| for count,dates,owner in sorted(info, reverse=True): |
| if show_counts: |
| owner = '%s (%s files)' % (owner, count) |
| print('Copyright (c) %s %s' % (datestr(dates), owner)) |