blob: 034a3b3008b4f71f4b8b6fbbf06ba557636b0a82 [file] [log] [blame]
Elliott Hughes6b586e72021-04-15 13:39:08 -07001#!/usr/bin/env python3
Dan Albertffa5cbe2021-02-03 16:44:37 -08002# Run with directory arguments from any directory, with no special setup
3# required.
Elliott Hughes387d4b72012-08-09 15:17:46 -07004
Elliott Hughes387d4b72012-08-09 15:17:46 -07005import os
Dan Albertffa5cbe2021-02-03 16:44:37 -08006from pathlib import Path
Elliott Hughes387d4b72012-08-09 15:17:46 -07007import re
Elliott Hughes387d4b72012-08-09 15:17:46 -07008import sys
Dan Albertffa5cbe2021-02-03 16:44:37 -08009from typing import Sequence
Elliott Hughes387d4b72012-08-09 15:17:46 -070010
Elliott Hughesaac7c3a2017-07-14 10:00:32 -070011VERBOSE = False
Elliott Hughes387d4b72012-08-09 15:17:46 -070012
Dan Albertffa5cbe2021-02-03 16:44:37 -080013copyrights = set()
14
15
Elliott Hughesaac7c3a2017-07-14 10:00:32 -070016def warn(s):
17 sys.stderr.write("warning: %s\n" % s)
18
Dan Albertffa5cbe2021-02-03 16:44:37 -080019
Elliott Hughesaac7c3a2017-07-14 10:00:32 -070020def warn_verbose(s):
21 if VERBOSE:
22 warn(s)
23
Dan Albertffa5cbe2021-02-03 16:44:37 -080024
25def is_interesting(path_str: str) -> bool:
26 path = Path(path_str.lower())
Elliott Hughesaac7c3a2017-07-14 10:00:32 -070027 uninteresting_extensions = [
28 ".bp",
29 ".map",
Elliott Hughesc5db38a2020-06-15 17:26:58 -070030 ".md",
Elliott Hughesaac7c3a2017-07-14 10:00:32 -070031 ".mk",
32 ".py",
33 ".pyc",
34 ".swp",
35 ".txt",
Christopher Ferris852f9b02023-06-02 16:34:28 -070036 ".xml",
Elliott Hughesaac7c3a2017-07-14 10:00:32 -070037 ]
Dan Albertffa5cbe2021-02-03 16:44:37 -080038 if path.suffix in uninteresting_extensions:
Elliott Hughesaac7c3a2017-07-14 10:00:32 -070039 return False
Dan Albertffa5cbe2021-02-03 16:44:37 -080040 if path.name in {"notice", "readme", "pylintrc"}:
Elliott Hughesaac7c3a2017-07-14 10:00:32 -070041 return False
Dan Albert77d976c2021-04-19 14:05:59 -070042 # Backup files for some editors.
43 if path.match("*~"):
44 return False
Elliott Hughesaac7c3a2017-07-14 10:00:32 -070045 return True
46
Dan Albertffa5cbe2021-02-03 16:44:37 -080047
Dan Albertffa5cbe2021-02-03 16:44:37 -080048def is_copyright_end(line: str, first_line_was_hash: bool) -> bool:
49 endings = [
50 " $FreeBSD: ",
51 "$Citrus$",
52 "$FreeBSD$",
53 "*/",
54 "From: @(#)",
55 # OpenBSD likes to say where stuff originally came from:
56 "Original version ID:",
57 "\t$Citrus: ",
58 "\t$NetBSD: ",
59 "\t$OpenBSD: ",
60 "\t@(#)",
61 "\tcitrus Id: ",
62 "\tfrom: @(#)",
63 "from OpenBSD:",
64 ]
65 if first_line_was_hash and not line:
66 return True
67
68 for ending in endings:
69 if ending in line:
70 return True
71
72 return False
73
74
75def extract_copyright_at(lines: Sequence[str], i: int) -> int:
76 first_line_was_hash = lines[i].startswith("#")
Elliott Hughes387d4b72012-08-09 15:17:46 -070077
Elliott Hughes261e2232012-08-14 15:04:05 -070078 # Do we need to back up to find the start of the copyright header?
79 start = i
Dan Albertffa5cbe2021-02-03 16:44:37 -080080 if not first_line_was_hash:
Elliott Hughes261e2232012-08-14 15:04:05 -070081 while start > 0:
82 if "/*" in lines[start - 1]:
83 break
84 start -= 1
85
Elliott Hughes387d4b72012-08-09 15:17:46 -070086 # Read comment lines until we hit something that terminates a
87 # copyright header.
Elliott Hughes387d4b72012-08-09 15:17:46 -070088 while i < len(lines):
Dan Albertffa5cbe2021-02-03 16:44:37 -080089 if is_copyright_end(lines[i], first_line_was_hash):
Elliott Hughesbfa582d2014-05-05 14:58:17 -070090 break
Elliott Hughes387d4b72012-08-09 15:17:46 -070091 i += 1
92
93 end = i
94
95 # Trim trailing cruft.
96 while end > 0:
Dan Albertffa5cbe2021-02-03 16:44:37 -080097 line = lines[end - 1]
98 if line not in {
99 " *", " * ===================================================="
100 }:
Elliott Hughes387d4b72012-08-09 15:17:46 -0700101 break
102 end -= 1
103
104 # Remove C/assembler comment formatting, pulling out just the text.
105 clean_lines = []
106 for line in lines[start:end]:
107 line = line.replace("\t", " ")
108 line = line.replace("/* ", "")
Dan Albertffa5cbe2021-02-03 16:44:37 -0800109 line = re.sub(r"^ \* ", "", line)
Elliott Hughes387d4b72012-08-09 15:17:46 -0700110 line = line.replace("** ", "")
111 line = line.replace("# ", "")
112 if line.startswith("++Copyright++"):
113 continue
114 line = line.replace("--Copyright--", "")
115 line = line.rstrip()
116 # These come last and take care of "blank" comment lines.
Dan Albertffa5cbe2021-02-03 16:44:37 -0800117 if line in {"#", " *", "**", "-"}:
Elliott Hughes387d4b72012-08-09 15:17:46 -0700118 line = ""
119 clean_lines.append(line)
120
121 # Trim blank lines from head and tail.
122 while clean_lines[0] == "":
123 clean_lines = clean_lines[1:]
124 while clean_lines[len(clean_lines) - 1] == "":
125 clean_lines = clean_lines[0:(len(clean_lines) - 1)]
126
Dan Albertffa5cbe2021-02-03 16:44:37 -0800127 copyrights.add("\n".join(clean_lines))
Elliott Hughes387d4b72012-08-09 15:17:46 -0700128
129 return i
130
Elliott Hughes387d4b72012-08-09 15:17:46 -0700131
Dan Albertffa5cbe2021-02-03 16:44:37 -0800132def do_file(path: str) -> None:
133 raw = Path(path).read_bytes()
134 try:
135 content = raw.decode("utf-8")
136 except UnicodeDecodeError:
137 warn("bad UTF-8 in %s" % path)
138 content = raw.decode("iso-8859-1")
Elliott Hughes387d4b72012-08-09 15:17:46 -0700139
Elliott Hughesaac7c3a2017-07-14 10:00:32 -0700140 lines = content.split("\n")
141
142 if len(lines) <= 4:
143 warn_verbose("ignoring short file %s" % path)
144 return
145
Elliott Hughesaac7c3a2017-07-14 10:00:32 -0700146 if not "Copyright" in content:
147 if "public domain" in content.lower():
Elliott Hughesc5db38a2020-06-15 17:26:58 -0700148 warn_verbose("ignoring public domain file %s" % path)
Elliott Hughesaac7c3a2017-07-14 10:00:32 -0700149 return
Dan Albertffa5cbe2021-02-03 16:44:37 -0800150 warn('no copyright notice found in "%s" (%d lines)' %
151 (path, len(lines)))
Elliott Hughesaac7c3a2017-07-14 10:00:32 -0700152 return
153
Elliott Hughesf9e6d962025-02-10 09:10:03 -0500154 # Skip over our own files if they're SPDX licensed.
155 # Because we use the // comment style, without this we'd copy the whole source file!
156 if re.compile('^// Copyright \(C\) 2\d\d\d The Android Open Source Project\n' + \
157 '// SPDX-License-Identifier: ').match(content):
158 return
159
Dan Albertffa5cbe2021-02-03 16:44:37 -0800160 # Manually iterate because extract_copyright_at tells us how many lines to
161 # skip.
Elliott Hughesaac7c3a2017-07-14 10:00:32 -0700162 i = 0
163 while i < len(lines):
164 if "Copyright" in lines[i] and not "@(#) Copyright" in lines[i]:
165 i = extract_copyright_at(lines, i)
166 else:
167 i += 1
168
169
Dan Albertffa5cbe2021-02-03 16:44:37 -0800170def do_dir(arg):
Elliott Hughes387d4b72012-08-09 15:17:46 -0700171 for directory, sub_directories, filenames in os.walk(arg):
172 if ".git" in sub_directories:
173 sub_directories.remove(".git")
174 sub_directories = sorted(sub_directories)
175
176 for filename in sorted(filenames):
177 path = os.path.join(directory, filename)
Elliott Hughesaac7c3a2017-07-14 10:00:32 -0700178 if is_interesting(path):
179 do_file(path)
Elliott Hughes387d4b72012-08-09 15:17:46 -0700180
Elliott Hughes387d4b72012-08-09 15:17:46 -0700181
Dan Albertffa5cbe2021-02-03 16:44:37 -0800182def main() -> None:
183 args = sys.argv[1:]
184 if len(args) == 0:
185 args = ["."]
Elliott Hughes387d4b72012-08-09 15:17:46 -0700186
Dan Albertffa5cbe2021-02-03 16:44:37 -0800187 for arg in args:
188 if os.path.isdir(arg):
189 do_dir(arg)
190 else:
191 do_file(arg)
Elliott Hughes387d4b72012-08-09 15:17:46 -0700192
Dan Albertffa5cbe2021-02-03 16:44:37 -0800193 for notice in sorted(copyrights):
194 print(notice)
195 print()
196 print("-" * 67)
197 print()
Elliott Hughes387d4b72012-08-09 15:17:46 -0700198
Dan Albertffa5cbe2021-02-03 16:44:37 -0800199
200if __name__ == "__main__":
201 main()