blob: 0bd0ef3a51a384282bc63a0cc494ec6fbceabb0e [file] [log] [blame] [edit]
#!/usr/bin/env python3
import os
import re
import sys
from file_types import lang_type, find_files
mode_line = re.compile("(-\*- *mode:.* *-\*-)")
shell_comment = re.compile(r"^\s*#")
lisp_comment = re.compile(r";")
cpp_comment = re.compile(r"//")
c_comment_start = re.compile(r"/\*")
c_comment_end = re.compile(r"\*/")
def find_copyright_block(lines, lang_type):
start = None
if lang_type in ("python", "make", "shell", "perl", "scons"):
for i, line in enumerate(lines):
if i == 0 and (line.startswith("#!") or mode_line.search(line)):
continue
if shell_comment.search(line):
if start is None:
start = i
elif start is None:
if line.strip():
return
else:
yield start, i - 1
start = None
elif lang_type in ("lisp",):
for i, line in enumerate(lines):
if i == 0 and mode_line.search(line):
continue
if lisp_comment.search(line):
if start is None:
start = i
elif start is None:
if line.strip():
return
else:
yield start, i - 1
start = None
elif lang_type in (
"C",
"C++",
"swig",
"isa",
"asm",
"slicc",
"lex",
"yacc",
):
mode = None
for i, line in enumerate(lines):
if i == 0 and mode_line.search(line):
continue
if mode == "C":
assert start is not None, "on line %d" % (i + 1)
match = c_comment_end.search(line)
if match:
yield start, i
mode = None
continue
cpp_match = cpp_comment.search(line)
c_match = c_comment_start.search(line)
if cpp_match:
assert not c_match, "on line %d" % (i + 1)
if line[: cpp_match.start()].strip():
return
if mode is None:
mode = "CPP"
start = i
else:
text = line[cpp_match.end() :].lstrip()
if text.startswith("Copyright") > 0:
yield start, i - 1
start = i
continue
elif mode == "CPP":
assert start is not None, "on line %d" % (i + 1)
if not line.strip():
continue
yield start, i - 1
mode = None
if not c_match:
return
if c_match:
assert mode is None, "on line %d" % (i + 1)
mode = "C"
start = i
if mode is None and line.strip():
return
else:
raise AttributeError(f"Could not handle language {lang_type}")
date_range_re = re.compile(r"([0-9]{4})\s*-\s*([0-9]{4})")
def process_dates(dates):
dates = [d.strip() for d in dates.split(",")]
output = set()
for date in dates:
match = date_range_re.match(date)
if match:
f, l = [int(d) for d in match.groups()]
for i in range(f, l + 1):
output.add(i)
else:
try:
date = int(date)
output.add(date)
except ValueError:
pass
return output
copyright_re = re.compile(
r"Copyright (\([cC]\)) ([-, 0-9]+)[\s*#/]*([A-z-,. ]+)", re.DOTALL
)
authors_re = re.compile(r"^[\s*#/]*Authors:\s*([A-z .]+)\s*$")
more_authors_re = re.compile(r"^[\s*#/]*([A-z .]+)\s*$")
all_owners = set()
def get_data(lang_type, lines):
data = []
last = None
for start, end in find_copyright_block(lines, lang_type):
joined = "".join(lines[start : end + 1])
match = copyright_re.search(joined)
if not match:
continue
c, dates, owner = match.groups()
dates = dates.strip()
owner = owner.strip()
all_owners.add(owner)
try:
dates = process_dates(dates)
except Exception:
print(dates)
print(owner)
raise
authors = []
for i in range(start, end + 1):
line = lines[i]
if not authors:
match = authors_re.search(line)
if match:
authors.append(match.group(1).strip())
else:
match = more_authors_re.search(line)
if not match:
for j in range(i, end + 1):
line = lines[j].strip()
if not line:
end = j
break
if line.startswith("//"):
line = line[2:].lstrip()
if line:
end = j - 1
break
break
authors.append(match.group(1).strip())
info = (owner, dates, authors, start, end)
data.append(info)
return data
def datestr(dates):
dates = list(dates)
dates.sort()
output = []
def add_output(first, second):
if first == second:
output.append("%d" % (first))
else:
output.append("%d-%d" % (first, second))
first = dates.pop(0)
second = first
while dates:
next = dates.pop(0)
if next == second + 1:
second = next
else:
add_output(first, second)
first = next
second = next
add_output(first, second)
return ",".join(output)
usage_str = """usage:
%s [-v] <directory>"""
def usage(exitcode):
print(usage_str % sys.argv[0])
if exitcode is not None:
sys.exit(exitcode)
if __name__ == "__main__":
import getopt
show_counts = False
ignore = set()
verbose = False
try:
opts, args = getopt.getopt(sys.argv[1:], "ci:v")
except getopt.GetoptError:
usage(1)
for o, a in opts:
if o == "-c":
show_counts = True
if o == "-i":
ignore.add(a)
if o == "-v":
verbose = True
files = []
for base in args:
if os.path.isfile(base):
files += [(base, lang_type(base))]
elif os.path.isdir(base):
files += find_files(base)
else:
raise AttributeError(f"can't access '{base}'")
copyrights = {}
counts = {}
for filename, lang in files:
f = file(filename, "r")
lines = f.readlines()
if not lines:
continue
lines = [line.rstrip("\r\n") for line in lines]
lt = lang_type(filename, lines[0])
try:
data = get_data(lt, lines)
except Exception as e:
if verbose:
if len(e.args) == 1:
e.args = (f"{e} ({filename}))",)
print(f"could not parse {filename}: {e}")
continue
for owner, dates, authors, start, end in data:
if owner not in copyrights:
copyrights[owner] = set()
if owner not in counts:
counts[owner] = 0
copyrights[owner] |= dates
counts[owner] += 1
info = [(counts[o], d, o) for o, d in list(copyrights.items())]
for count, dates, owner in sorted(info, reverse=True):
if show_counts:
owner = f"{owner} ({count} files)"
print(f"Copyright (c) {datestr(dates)} {owner}")