Elliott Hughes | 6b586e7 | 2021-04-15 13:39:08 -0700 | [diff] [blame] | 1 | #!/usr/bin/env python3 |
Dan Albert | ffa5cbe | 2021-02-03 16:44:37 -0800 | [diff] [blame] | 2 | # Run with directory arguments from any directory, with no special setup |
| 3 | # required. |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 4 | |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 5 | import os |
Dan Albert | ffa5cbe | 2021-02-03 16:44:37 -0800 | [diff] [blame] | 6 | from pathlib import Path |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 7 | import re |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 8 | import sys |
Dan Albert | ffa5cbe | 2021-02-03 16:44:37 -0800 | [diff] [blame] | 9 | from typing import Sequence |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 10 | |
Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 11 | VERBOSE = False |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 12 | |
Dan Albert | ffa5cbe | 2021-02-03 16:44:37 -0800 | [diff] [blame] | 13 | copyrights = set() |
| 14 | |
| 15 | |
Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 16 | def warn(s): |
| 17 | sys.stderr.write("warning: %s\n" % s) |
| 18 | |
Dan Albert | ffa5cbe | 2021-02-03 16:44:37 -0800 | [diff] [blame] | 19 | |
Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 20 | def warn_verbose(s): |
| 21 | if VERBOSE: |
| 22 | warn(s) |
| 23 | |
Dan Albert | ffa5cbe | 2021-02-03 16:44:37 -0800 | [diff] [blame] | 24 | |
| 25 | def is_interesting(path_str: str) -> bool: |
| 26 | path = Path(path_str.lower()) |
Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 27 | uninteresting_extensions = [ |
| 28 | ".bp", |
| 29 | ".map", |
Elliott Hughes | c5db38a | 2020-06-15 17:26:58 -0700 | [diff] [blame] | 30 | ".md", |
Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 31 | ".mk", |
| 32 | ".py", |
| 33 | ".pyc", |
| 34 | ".swp", |
| 35 | ".txt", |
| 36 | ] |
Dan Albert | ffa5cbe | 2021-02-03 16:44:37 -0800 | [diff] [blame] | 37 | if path.suffix in uninteresting_extensions: |
Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 38 | return False |
Dan Albert | ffa5cbe | 2021-02-03 16:44:37 -0800 | [diff] [blame] | 39 | if path.name in {"notice", "readme", "pylintrc"}: |
Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 40 | return False |
Dan Albert | 77d976c | 2021-04-19 14:05:59 -0700 | [diff] [blame] | 41 | # Backup files for some editors. |
| 42 | if path.match("*~"): |
| 43 | return False |
Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 44 | return True |
| 45 | |
Dan Albert | ffa5cbe | 2021-02-03 16:44:37 -0800 | [diff] [blame] | 46 | |
Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 47 | def is_auto_generated(content): |
Elliott Hughes | 22a0d6f | 2014-03-06 15:10:22 -0800 | [diff] [blame] | 48 | if "Generated by gensyscalls.py" in content or "generated by genserv.py" in content: |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 49 | return True |
| 50 | if "This header was automatically generated from a Linux kernel header" in content: |
| 51 | return True |
| 52 | return False |
| 53 | |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 54 | |
Dan Albert | ffa5cbe | 2021-02-03 16:44:37 -0800 | [diff] [blame] | 55 | def is_copyright_end(line: str, first_line_was_hash: bool) -> bool: |
| 56 | endings = [ |
| 57 | " $FreeBSD: ", |
| 58 | "$Citrus$", |
| 59 | "$FreeBSD$", |
| 60 | "*/", |
| 61 | "From: @(#)", |
| 62 | # OpenBSD likes to say where stuff originally came from: |
| 63 | "Original version ID:", |
| 64 | "\t$Citrus: ", |
| 65 | "\t$NetBSD: ", |
| 66 | "\t$OpenBSD: ", |
| 67 | "\t@(#)", |
| 68 | "\tcitrus Id: ", |
| 69 | "\tfrom: @(#)", |
| 70 | "from OpenBSD:", |
| 71 | ] |
| 72 | if first_line_was_hash and not line: |
| 73 | return True |
| 74 | |
| 75 | for ending in endings: |
| 76 | if ending in line: |
| 77 | return True |
| 78 | |
| 79 | return False |
| 80 | |
| 81 | |
| 82 | def extract_copyright_at(lines: Sequence[str], i: int) -> int: |
| 83 | first_line_was_hash = lines[i].startswith("#") |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 84 | |
Elliott Hughes | 261e223 | 2012-08-14 15:04:05 -0700 | [diff] [blame] | 85 | # Do we need to back up to find the start of the copyright header? |
| 86 | start = i |
Dan Albert | ffa5cbe | 2021-02-03 16:44:37 -0800 | [diff] [blame] | 87 | if not first_line_was_hash: |
Elliott Hughes | 261e223 | 2012-08-14 15:04:05 -0700 | [diff] [blame] | 88 | while start > 0: |
| 89 | if "/*" in lines[start - 1]: |
| 90 | break |
| 91 | start -= 1 |
| 92 | |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 93 | # Read comment lines until we hit something that terminates a |
| 94 | # copyright header. |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 95 | while i < len(lines): |
Dan Albert | ffa5cbe | 2021-02-03 16:44:37 -0800 | [diff] [blame] | 96 | if is_copyright_end(lines[i], first_line_was_hash): |
Elliott Hughes | bfa582d | 2014-05-05 14:58:17 -0700 | [diff] [blame] | 97 | break |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 98 | i += 1 |
| 99 | |
| 100 | end = i |
| 101 | |
| 102 | # Trim trailing cruft. |
| 103 | while end > 0: |
Dan Albert | ffa5cbe | 2021-02-03 16:44:37 -0800 | [diff] [blame] | 104 | line = lines[end - 1] |
| 105 | if line not in { |
| 106 | " *", " * ====================================================" |
| 107 | }: |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 108 | break |
| 109 | end -= 1 |
| 110 | |
| 111 | # Remove C/assembler comment formatting, pulling out just the text. |
| 112 | clean_lines = [] |
| 113 | for line in lines[start:end]: |
| 114 | line = line.replace("\t", " ") |
| 115 | line = line.replace("/* ", "") |
Dan Albert | ffa5cbe | 2021-02-03 16:44:37 -0800 | [diff] [blame] | 116 | line = re.sub(r"^ \* ", "", line) |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 117 | line = line.replace("** ", "") |
| 118 | line = line.replace("# ", "") |
Elliott Hughes | ab52807 | 2018-07-24 00:01:52 +0000 | [diff] [blame] | 119 | if "SPDX-License-Identifier:" in line: |
| 120 | continue |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 121 | if line.startswith("++Copyright++"): |
| 122 | continue |
| 123 | line = line.replace("--Copyright--", "") |
| 124 | line = line.rstrip() |
| 125 | # These come last and take care of "blank" comment lines. |
Dan Albert | ffa5cbe | 2021-02-03 16:44:37 -0800 | [diff] [blame] | 126 | if line in {"#", " *", "**", "-"}: |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 127 | line = "" |
| 128 | clean_lines.append(line) |
| 129 | |
| 130 | # Trim blank lines from head and tail. |
| 131 | while clean_lines[0] == "": |
| 132 | clean_lines = clean_lines[1:] |
| 133 | while clean_lines[len(clean_lines) - 1] == "": |
| 134 | clean_lines = clean_lines[0:(len(clean_lines) - 1)] |
| 135 | |
Dan Albert | ffa5cbe | 2021-02-03 16:44:37 -0800 | [diff] [blame] | 136 | copyrights.add("\n".join(clean_lines)) |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 137 | |
| 138 | return i |
| 139 | |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 140 | |
Dan Albert | ffa5cbe | 2021-02-03 16:44:37 -0800 | [diff] [blame] | 141 | def do_file(path: str) -> None: |
| 142 | raw = Path(path).read_bytes() |
| 143 | try: |
| 144 | content = raw.decode("utf-8") |
| 145 | except UnicodeDecodeError: |
| 146 | warn("bad UTF-8 in %s" % path) |
| 147 | content = raw.decode("iso-8859-1") |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 148 | |
Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 149 | lines = content.split("\n") |
| 150 | |
| 151 | if len(lines) <= 4: |
| 152 | warn_verbose("ignoring short file %s" % path) |
| 153 | return |
| 154 | |
| 155 | if is_auto_generated(content): |
| 156 | warn_verbose("ignoring auto-generated file %s" % path) |
| 157 | return |
| 158 | |
| 159 | if not "Copyright" in content: |
| 160 | if "public domain" in content.lower(): |
Elliott Hughes | c5db38a | 2020-06-15 17:26:58 -0700 | [diff] [blame] | 161 | warn_verbose("ignoring public domain file %s" % path) |
Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 162 | return |
Dan Albert | ffa5cbe | 2021-02-03 16:44:37 -0800 | [diff] [blame] | 163 | warn('no copyright notice found in "%s" (%d lines)' % |
| 164 | (path, len(lines))) |
Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 165 | return |
| 166 | |
Dan Albert | ffa5cbe | 2021-02-03 16:44:37 -0800 | [diff] [blame] | 167 | # Manually iterate because extract_copyright_at tells us how many lines to |
| 168 | # skip. |
Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 169 | i = 0 |
| 170 | while i < len(lines): |
| 171 | if "Copyright" in lines[i] and not "@(#) Copyright" in lines[i]: |
| 172 | i = extract_copyright_at(lines, i) |
| 173 | else: |
| 174 | i += 1 |
| 175 | |
| 176 | |
Dan Albert | ffa5cbe | 2021-02-03 16:44:37 -0800 | [diff] [blame] | 177 | def do_dir(arg): |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 178 | for directory, sub_directories, filenames in os.walk(arg): |
| 179 | if ".git" in sub_directories: |
| 180 | sub_directories.remove(".git") |
| 181 | sub_directories = sorted(sub_directories) |
| 182 | |
| 183 | for filename in sorted(filenames): |
| 184 | path = os.path.join(directory, filename) |
Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 185 | if is_interesting(path): |
| 186 | do_file(path) |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 187 | |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 188 | |
Dan Albert | ffa5cbe | 2021-02-03 16:44:37 -0800 | [diff] [blame] | 189 | def main() -> None: |
| 190 | args = sys.argv[1:] |
| 191 | if len(args) == 0: |
| 192 | args = ["."] |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 193 | |
Dan Albert | ffa5cbe | 2021-02-03 16:44:37 -0800 | [diff] [blame] | 194 | for arg in args: |
| 195 | if os.path.isdir(arg): |
| 196 | do_dir(arg) |
| 197 | else: |
| 198 | do_file(arg) |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 199 | |
Dan Albert | ffa5cbe | 2021-02-03 16:44:37 -0800 | [diff] [blame] | 200 | for notice in sorted(copyrights): |
| 201 | print(notice) |
| 202 | print() |
| 203 | print("-" * 67) |
| 204 | print() |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 205 | |
Dan Albert | ffa5cbe | 2021-02-03 16:44:37 -0800 | [diff] [blame] | 206 | |
| 207 | if __name__ == "__main__": |
| 208 | main() |