| #!/usr/bin/env python3 |
| # Run with directory arguments from any directory, with no special setup |
| # required. |
| |
| import os |
| from pathlib import Path |
| import re |
| import sys |
| from typing import Sequence |
| |
| VERBOSE = False |
| |
| copyrights = set() |
| |
| |
| def warn(s): |
| sys.stderr.write("warning: %s\n" % s) |
| |
| |
| def warn_verbose(s): |
| if VERBOSE: |
| warn(s) |
| |
| |
| def is_interesting(path_str: str) -> bool: |
| path = Path(path_str.lower()) |
| uninteresting_extensions = [ |
| ".bp", |
| ".map", |
| ".md", |
| ".mk", |
| ".py", |
| ".pyc", |
| ".swp", |
| ".txt", |
| ] |
| if path.suffix in uninteresting_extensions: |
| return False |
| if path.name in {"notice", "readme", "pylintrc"}: |
| return False |
| return True |
| |
| |
| def is_auto_generated(content): |
| if "Generated by gensyscalls.py" in content or "generated by genserv.py" in content: |
| return True |
| if "This header was automatically generated from a Linux kernel header" in content: |
| return True |
| return False |
| |
| |
| def is_copyright_end(line: str, first_line_was_hash: bool) -> bool: |
| endings = [ |
| " $FreeBSD: ", |
| "$Citrus$", |
| "$FreeBSD$", |
| "*/", |
| "From: @(#)", |
| # OpenBSD likes to say where stuff originally came from: |
| "Original version ID:", |
| "\t$Citrus: ", |
| "\t$NetBSD: ", |
| "\t$OpenBSD: ", |
| "\t@(#)", |
| "\tcitrus Id: ", |
| "\tfrom: @(#)", |
| "from OpenBSD:", |
| ] |
| if first_line_was_hash and not line: |
| return True |
| |
| for ending in endings: |
| if ending in line: |
| return True |
| |
| return False |
| |
| |
| def extract_copyright_at(lines: Sequence[str], i: int) -> int: |
| first_line_was_hash = lines[i].startswith("#") |
| |
| # Do we need to back up to find the start of the copyright header? |
| start = i |
| if not first_line_was_hash: |
| while start > 0: |
| if "/*" in lines[start - 1]: |
| break |
| start -= 1 |
| |
| # Read comment lines until we hit something that terminates a |
| # copyright header. |
| while i < len(lines): |
| if is_copyright_end(lines[i], first_line_was_hash): |
| break |
| i += 1 |
| |
| end = i |
| |
| # Trim trailing cruft. |
| while end > 0: |
| line = lines[end - 1] |
| if line not in { |
| " *", " * ====================================================" |
| }: |
| break |
| end -= 1 |
| |
| # Remove C/assembler comment formatting, pulling out just the text. |
| clean_lines = [] |
| for line in lines[start:end]: |
| line = line.replace("\t", " ") |
| line = line.replace("/* ", "") |
| line = re.sub(r"^ \* ", "", line) |
| line = line.replace("** ", "") |
| line = line.replace("# ", "") |
| if "SPDX-License-Identifier:" in line: |
| continue |
| if line.startswith("++Copyright++"): |
| continue |
| line = line.replace("--Copyright--", "") |
| line = line.rstrip() |
| # These come last and take care of "blank" comment lines. |
| if line in {"#", " *", "**", "-"}: |
| line = "" |
| clean_lines.append(line) |
| |
| # Trim blank lines from head and tail. |
| while clean_lines[0] == "": |
| clean_lines = clean_lines[1:] |
| while clean_lines[len(clean_lines) - 1] == "": |
| clean_lines = clean_lines[0:(len(clean_lines) - 1)] |
| |
| copyrights.add("\n".join(clean_lines)) |
| |
| return i |
| |
| |
| def do_file(path: str) -> None: |
| raw = Path(path).read_bytes() |
| try: |
| content = raw.decode("utf-8") |
| except UnicodeDecodeError: |
| warn("bad UTF-8 in %s" % path) |
| content = raw.decode("iso-8859-1") |
| |
| lines = content.split("\n") |
| |
| if len(lines) <= 4: |
| warn_verbose("ignoring short file %s" % path) |
| return |
| |
| if is_auto_generated(content): |
| warn_verbose("ignoring auto-generated file %s" % path) |
| return |
| |
| if not "Copyright" in content: |
| if "public domain" in content.lower(): |
| warn_verbose("ignoring public domain file %s" % path) |
| return |
| warn('no copyright notice found in "%s" (%d lines)' % |
| (path, len(lines))) |
| return |
| |
| # Manually iterate because extract_copyright_at tells us how many lines to |
| # skip. |
| i = 0 |
| while i < len(lines): |
| if "Copyright" in lines[i] and not "@(#) Copyright" in lines[i]: |
| i = extract_copyright_at(lines, i) |
| else: |
| i += 1 |
| |
| |
| def do_dir(arg): |
| for directory, sub_directories, filenames in os.walk(arg): |
| if ".git" in sub_directories: |
| sub_directories.remove(".git") |
| sub_directories = sorted(sub_directories) |
| |
| for filename in sorted(filenames): |
| path = os.path.join(directory, filename) |
| if is_interesting(path): |
| do_file(path) |
| |
| |
| def main() -> None: |
| args = sys.argv[1:] |
| if len(args) == 0: |
| args = ["."] |
| |
| for arg in args: |
| if os.path.isdir(arg): |
| do_dir(arg) |
| else: |
| do_file(arg) |
| |
| for notice in sorted(copyrights): |
| print(notice) |
| print() |
| print("-" * 67) |
| print() |
| |
| |
| if __name__ == "__main__": |
| main() |