Elliott Hughes | 6b586e7 | 2021-04-15 13:39:08 -0700 | [diff] [blame] | 1 | #!/usr/bin/env python3 |
Dan Albert | ffa5cbe | 2021-02-03 16:44:37 -0800 | [diff] [blame] | 2 | # Run with directory arguments from any directory, with no special setup |
| 3 | # required. |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 4 | |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 5 | import os |
Dan Albert | ffa5cbe | 2021-02-03 16:44:37 -0800 | [diff] [blame] | 6 | from pathlib import Path |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 7 | import re |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 8 | import sys |
Dan Albert | ffa5cbe | 2021-02-03 16:44:37 -0800 | [diff] [blame] | 9 | from typing import Sequence |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 10 | |
Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 11 | VERBOSE = False |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 12 | |
Dan Albert | ffa5cbe | 2021-02-03 16:44:37 -0800 | [diff] [blame] | 13 | copyrights = set() |
| 14 | |
| 15 | |
Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 16 | def warn(s): |
| 17 | sys.stderr.write("warning: %s\n" % s) |
| 18 | |
Dan Albert | ffa5cbe | 2021-02-03 16:44:37 -0800 | [diff] [blame] | 19 | |
Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 20 | def warn_verbose(s): |
| 21 | if VERBOSE: |
| 22 | warn(s) |
| 23 | |
Dan Albert | ffa5cbe | 2021-02-03 16:44:37 -0800 | [diff] [blame] | 24 | |
| 25 | def is_interesting(path_str: str) -> bool: |
| 26 | path = Path(path_str.lower()) |
Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 27 | uninteresting_extensions = [ |
| 28 | ".bp", |
| 29 | ".map", |
Elliott Hughes | c5db38a | 2020-06-15 17:26:58 -0700 | [diff] [blame] | 30 | ".md", |
Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 31 | ".mk", |
| 32 | ".py", |
| 33 | ".pyc", |
| 34 | ".swp", |
| 35 | ".txt", |
Christopher Ferris | 852f9b0 | 2023-06-02 16:34:28 -0700 | [diff] [blame] | 36 | ".xml", |
Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 37 | ] |
Dan Albert | ffa5cbe | 2021-02-03 16:44:37 -0800 | [diff] [blame] | 38 | if path.suffix in uninteresting_extensions: |
Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 39 | return False |
Dan Albert | ffa5cbe | 2021-02-03 16:44:37 -0800 | [diff] [blame] | 40 | if path.name in {"notice", "readme", "pylintrc"}: |
Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 41 | return False |
Dan Albert | 77d976c | 2021-04-19 14:05:59 -0700 | [diff] [blame] | 42 | # Backup files for some editors. |
| 43 | if path.match("*~"): |
| 44 | return False |
Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 45 | return True |
| 46 | |
Dan Albert | ffa5cbe | 2021-02-03 16:44:37 -0800 | [diff] [blame] | 47 | |
Dan Albert | ffa5cbe | 2021-02-03 16:44:37 -0800 | [diff] [blame] | 48 | def is_copyright_end(line: str, first_line_was_hash: bool) -> bool: |
| 49 | endings = [ |
| 50 | " $FreeBSD: ", |
| 51 | "$Citrus$", |
| 52 | "$FreeBSD$", |
| 53 | "*/", |
| 54 | "From: @(#)", |
| 55 | # OpenBSD likes to say where stuff originally came from: |
| 56 | "Original version ID:", |
| 57 | "\t$Citrus: ", |
| 58 | "\t$NetBSD: ", |
| 59 | "\t$OpenBSD: ", |
| 60 | "\t@(#)", |
| 61 | "\tcitrus Id: ", |
| 62 | "\tfrom: @(#)", |
| 63 | "from OpenBSD:", |
| 64 | ] |
| 65 | if first_line_was_hash and not line: |
| 66 | return True |
| 67 | |
| 68 | for ending in endings: |
| 69 | if ending in line: |
| 70 | return True |
| 71 | |
| 72 | return False |
| 73 | |
| 74 | |
| 75 | def extract_copyright_at(lines: Sequence[str], i: int) -> int: |
| 76 | first_line_was_hash = lines[i].startswith("#") |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 77 | |
Elliott Hughes | 261e223 | 2012-08-14 15:04:05 -0700 | [diff] [blame] | 78 | # Do we need to back up to find the start of the copyright header? |
| 79 | start = i |
Dan Albert | ffa5cbe | 2021-02-03 16:44:37 -0800 | [diff] [blame] | 80 | if not first_line_was_hash: |
Elliott Hughes | 261e223 | 2012-08-14 15:04:05 -0700 | [diff] [blame] | 81 | while start > 0: |
| 82 | if "/*" in lines[start - 1]: |
| 83 | break |
| 84 | start -= 1 |
| 85 | |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 86 | # Read comment lines until we hit something that terminates a |
| 87 | # copyright header. |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 88 | while i < len(lines): |
Dan Albert | ffa5cbe | 2021-02-03 16:44:37 -0800 | [diff] [blame] | 89 | if is_copyright_end(lines[i], first_line_was_hash): |
Elliott Hughes | bfa582d | 2014-05-05 14:58:17 -0700 | [diff] [blame] | 90 | break |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 91 | i += 1 |
| 92 | |
| 93 | end = i |
| 94 | |
| 95 | # Trim trailing cruft. |
| 96 | while end > 0: |
Dan Albert | ffa5cbe | 2021-02-03 16:44:37 -0800 | [diff] [blame] | 97 | line = lines[end - 1] |
| 98 | if line not in { |
| 99 | " *", " * ====================================================" |
| 100 | }: |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 101 | break |
| 102 | end -= 1 |
| 103 | |
| 104 | # Remove C/assembler comment formatting, pulling out just the text. |
| 105 | clean_lines = [] |
| 106 | for line in lines[start:end]: |
| 107 | line = line.replace("\t", " ") |
| 108 | line = line.replace("/* ", "") |
Dan Albert | ffa5cbe | 2021-02-03 16:44:37 -0800 | [diff] [blame] | 109 | line = re.sub(r"^ \* ", "", line) |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 110 | line = line.replace("** ", "") |
| 111 | line = line.replace("# ", "") |
| 112 | if line.startswith("++Copyright++"): |
| 113 | continue |
| 114 | line = line.replace("--Copyright--", "") |
| 115 | line = line.rstrip() |
| 116 | # These come last and take care of "blank" comment lines. |
Dan Albert | ffa5cbe | 2021-02-03 16:44:37 -0800 | [diff] [blame] | 117 | if line in {"#", " *", "**", "-"}: |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 118 | line = "" |
| 119 | clean_lines.append(line) |
| 120 | |
| 121 | # Trim blank lines from head and tail. |
| 122 | while clean_lines[0] == "": |
| 123 | clean_lines = clean_lines[1:] |
| 124 | while clean_lines[len(clean_lines) - 1] == "": |
| 125 | clean_lines = clean_lines[0:(len(clean_lines) - 1)] |
| 126 | |
Dan Albert | ffa5cbe | 2021-02-03 16:44:37 -0800 | [diff] [blame] | 127 | copyrights.add("\n".join(clean_lines)) |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 128 | |
| 129 | return i |
| 130 | |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 131 | |
Dan Albert | ffa5cbe | 2021-02-03 16:44:37 -0800 | [diff] [blame] | 132 | def do_file(path: str) -> None: |
| 133 | raw = Path(path).read_bytes() |
| 134 | try: |
| 135 | content = raw.decode("utf-8") |
| 136 | except UnicodeDecodeError: |
| 137 | warn("bad UTF-8 in %s" % path) |
| 138 | content = raw.decode("iso-8859-1") |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 139 | |
Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 140 | lines = content.split("\n") |
| 141 | |
| 142 | if len(lines) <= 4: |
| 143 | warn_verbose("ignoring short file %s" % path) |
| 144 | return |
| 145 | |
Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 146 | if not "Copyright" in content: |
| 147 | if "public domain" in content.lower(): |
Elliott Hughes | c5db38a | 2020-06-15 17:26:58 -0700 | [diff] [blame] | 148 | warn_verbose("ignoring public domain file %s" % path) |
Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 149 | return |
Dan Albert | ffa5cbe | 2021-02-03 16:44:37 -0800 | [diff] [blame] | 150 | warn('no copyright notice found in "%s" (%d lines)' % |
| 151 | (path, len(lines))) |
Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 152 | return |
| 153 | |
Dan Albert | ffa5cbe | 2021-02-03 16:44:37 -0800 | [diff] [blame] | 154 | # Manually iterate because extract_copyright_at tells us how many lines to |
| 155 | # skip. |
Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 156 | i = 0 |
| 157 | while i < len(lines): |
| 158 | if "Copyright" in lines[i] and not "@(#) Copyright" in lines[i]: |
| 159 | i = extract_copyright_at(lines, i) |
| 160 | else: |
| 161 | i += 1 |
| 162 | |
| 163 | |
Dan Albert | ffa5cbe | 2021-02-03 16:44:37 -0800 | [diff] [blame] | 164 | def do_dir(arg): |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 165 | for directory, sub_directories, filenames in os.walk(arg): |
| 166 | if ".git" in sub_directories: |
| 167 | sub_directories.remove(".git") |
| 168 | sub_directories = sorted(sub_directories) |
| 169 | |
| 170 | for filename in sorted(filenames): |
| 171 | path = os.path.join(directory, filename) |
Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 172 | if is_interesting(path): |
| 173 | do_file(path) |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 174 | |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 175 | |
Dan Albert | ffa5cbe | 2021-02-03 16:44:37 -0800 | [diff] [blame] | 176 | def main() -> None: |
| 177 | args = sys.argv[1:] |
| 178 | if len(args) == 0: |
| 179 | args = ["."] |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 180 | |
Dan Albert | ffa5cbe | 2021-02-03 16:44:37 -0800 | [diff] [blame] | 181 | for arg in args: |
| 182 | if os.path.isdir(arg): |
| 183 | do_dir(arg) |
| 184 | else: |
| 185 | do_file(arg) |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 186 | |
Dan Albert | ffa5cbe | 2021-02-03 16:44:37 -0800 | [diff] [blame] | 187 | for notice in sorted(copyrights): |
| 188 | print(notice) |
| 189 | print() |
| 190 | print("-" * 67) |
| 191 | print() |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 192 | |
Dan Albert | ffa5cbe | 2021-02-03 16:44:37 -0800 | [diff] [blame] | 193 | |
| 194 | if __name__ == "__main__": |
| 195 | main() |