blob: e004d74e9457f5f47d82b28dd77f5cc009431eb9 [file] [log] [blame]
Elliott Hughes6b586e72021-04-15 13:39:08 -07001#!/usr/bin/env python3
Dan Albertffa5cbe2021-02-03 16:44:37 -08002# Run with directory arguments from any directory, with no special setup
3# required.
Elliott Hughes387d4b72012-08-09 15:17:46 -07004
Elliott Hughes387d4b72012-08-09 15:17:46 -07005import os
Dan Albertffa5cbe2021-02-03 16:44:37 -08006from pathlib import Path
Elliott Hughes387d4b72012-08-09 15:17:46 -07007import re
Elliott Hughes387d4b72012-08-09 15:17:46 -07008import sys
Dan Albertffa5cbe2021-02-03 16:44:37 -08009from typing import Sequence
Elliott Hughes387d4b72012-08-09 15:17:46 -070010
Elliott Hughesaac7c3a2017-07-14 10:00:32 -070011VERBOSE = False
Elliott Hughes387d4b72012-08-09 15:17:46 -070012
Dan Albertffa5cbe2021-02-03 16:44:37 -080013copyrights = set()
14
15
Elliott Hughesaac7c3a2017-07-14 10:00:32 -070016def warn(s):
17 sys.stderr.write("warning: %s\n" % s)
18
Dan Albertffa5cbe2021-02-03 16:44:37 -080019
Elliott Hughesaac7c3a2017-07-14 10:00:32 -070020def warn_verbose(s):
21 if VERBOSE:
22 warn(s)
23
Dan Albertffa5cbe2021-02-03 16:44:37 -080024
25def is_interesting(path_str: str) -> bool:
26 path = Path(path_str.lower())
Elliott Hughesaac7c3a2017-07-14 10:00:32 -070027 uninteresting_extensions = [
28 ".bp",
29 ".map",
Elliott Hughesc5db38a2020-06-15 17:26:58 -070030 ".md",
Elliott Hughesaac7c3a2017-07-14 10:00:32 -070031 ".mk",
32 ".py",
33 ".pyc",
34 ".swp",
35 ".txt",
36 ]
Dan Albertffa5cbe2021-02-03 16:44:37 -080037 if path.suffix in uninteresting_extensions:
Elliott Hughesaac7c3a2017-07-14 10:00:32 -070038 return False
Dan Albertffa5cbe2021-02-03 16:44:37 -080039 if path.name in {"notice", "readme", "pylintrc"}:
Elliott Hughesaac7c3a2017-07-14 10:00:32 -070040 return False
Dan Albert77d976c2021-04-19 14:05:59 -070041 # Backup files for some editors.
42 if path.match("*~"):
43 return False
Elliott Hughesaac7c3a2017-07-14 10:00:32 -070044 return True
45
Dan Albertffa5cbe2021-02-03 16:44:37 -080046
Elliott Hughesaac7c3a2017-07-14 10:00:32 -070047def is_auto_generated(content):
Elliott Hughes22a0d6f2014-03-06 15:10:22 -080048 if "Generated by gensyscalls.py" in content or "generated by genserv.py" in content:
Elliott Hughes387d4b72012-08-09 15:17:46 -070049 return True
50 if "This header was automatically generated from a Linux kernel header" in content:
51 return True
52 return False
53
Elliott Hughes387d4b72012-08-09 15:17:46 -070054
Dan Albertffa5cbe2021-02-03 16:44:37 -080055def is_copyright_end(line: str, first_line_was_hash: bool) -> bool:
56 endings = [
57 " $FreeBSD: ",
58 "$Citrus$",
59 "$FreeBSD$",
60 "*/",
61 "From: @(#)",
62 # OpenBSD likes to say where stuff originally came from:
63 "Original version ID:",
64 "\t$Citrus: ",
65 "\t$NetBSD: ",
66 "\t$OpenBSD: ",
67 "\t@(#)",
68 "\tcitrus Id: ",
69 "\tfrom: @(#)",
70 "from OpenBSD:",
71 ]
72 if first_line_was_hash and not line:
73 return True
74
75 for ending in endings:
76 if ending in line:
77 return True
78
79 return False
80
81
82def extract_copyright_at(lines: Sequence[str], i: int) -> int:
83 first_line_was_hash = lines[i].startswith("#")
Elliott Hughes387d4b72012-08-09 15:17:46 -070084
Elliott Hughes261e2232012-08-14 15:04:05 -070085 # Do we need to back up to find the start of the copyright header?
86 start = i
Dan Albertffa5cbe2021-02-03 16:44:37 -080087 if not first_line_was_hash:
Elliott Hughes261e2232012-08-14 15:04:05 -070088 while start > 0:
89 if "/*" in lines[start - 1]:
90 break
91 start -= 1
92
Elliott Hughes387d4b72012-08-09 15:17:46 -070093 # Read comment lines until we hit something that terminates a
94 # copyright header.
Elliott Hughes387d4b72012-08-09 15:17:46 -070095 while i < len(lines):
Dan Albertffa5cbe2021-02-03 16:44:37 -080096 if is_copyright_end(lines[i], first_line_was_hash):
Elliott Hughesbfa582d2014-05-05 14:58:17 -070097 break
Elliott Hughes387d4b72012-08-09 15:17:46 -070098 i += 1
99
100 end = i
101
102 # Trim trailing cruft.
103 while end > 0:
Dan Albertffa5cbe2021-02-03 16:44:37 -0800104 line = lines[end - 1]
105 if line not in {
106 " *", " * ===================================================="
107 }:
Elliott Hughes387d4b72012-08-09 15:17:46 -0700108 break
109 end -= 1
110
111 # Remove C/assembler comment formatting, pulling out just the text.
112 clean_lines = []
113 for line in lines[start:end]:
114 line = line.replace("\t", " ")
115 line = line.replace("/* ", "")
Dan Albertffa5cbe2021-02-03 16:44:37 -0800116 line = re.sub(r"^ \* ", "", line)
Elliott Hughes387d4b72012-08-09 15:17:46 -0700117 line = line.replace("** ", "")
118 line = line.replace("# ", "")
Elliott Hughesab528072018-07-24 00:01:52 +0000119 if "SPDX-License-Identifier:" in line:
120 continue
Elliott Hughes387d4b72012-08-09 15:17:46 -0700121 if line.startswith("++Copyright++"):
122 continue
123 line = line.replace("--Copyright--", "")
124 line = line.rstrip()
125 # These come last and take care of "blank" comment lines.
Dan Albertffa5cbe2021-02-03 16:44:37 -0800126 if line in {"#", " *", "**", "-"}:
Elliott Hughes387d4b72012-08-09 15:17:46 -0700127 line = ""
128 clean_lines.append(line)
129
130 # Trim blank lines from head and tail.
131 while clean_lines[0] == "":
132 clean_lines = clean_lines[1:]
133 while clean_lines[len(clean_lines) - 1] == "":
134 clean_lines = clean_lines[0:(len(clean_lines) - 1)]
135
Dan Albertffa5cbe2021-02-03 16:44:37 -0800136 copyrights.add("\n".join(clean_lines))
Elliott Hughes387d4b72012-08-09 15:17:46 -0700137
138 return i
139
Elliott Hughes387d4b72012-08-09 15:17:46 -0700140
Dan Albertffa5cbe2021-02-03 16:44:37 -0800141def do_file(path: str) -> None:
142 raw = Path(path).read_bytes()
143 try:
144 content = raw.decode("utf-8")
145 except UnicodeDecodeError:
146 warn("bad UTF-8 in %s" % path)
147 content = raw.decode("iso-8859-1")
Elliott Hughes387d4b72012-08-09 15:17:46 -0700148
Elliott Hughesaac7c3a2017-07-14 10:00:32 -0700149 lines = content.split("\n")
150
151 if len(lines) <= 4:
152 warn_verbose("ignoring short file %s" % path)
153 return
154
155 if is_auto_generated(content):
156 warn_verbose("ignoring auto-generated file %s" % path)
157 return
158
159 if not "Copyright" in content:
160 if "public domain" in content.lower():
Elliott Hughesc5db38a2020-06-15 17:26:58 -0700161 warn_verbose("ignoring public domain file %s" % path)
Elliott Hughesaac7c3a2017-07-14 10:00:32 -0700162 return
Dan Albertffa5cbe2021-02-03 16:44:37 -0800163 warn('no copyright notice found in "%s" (%d lines)' %
164 (path, len(lines)))
Elliott Hughesaac7c3a2017-07-14 10:00:32 -0700165 return
166
Dan Albertffa5cbe2021-02-03 16:44:37 -0800167 # Manually iterate because extract_copyright_at tells us how many lines to
168 # skip.
Elliott Hughesaac7c3a2017-07-14 10:00:32 -0700169 i = 0
170 while i < len(lines):
171 if "Copyright" in lines[i] and not "@(#) Copyright" in lines[i]:
172 i = extract_copyright_at(lines, i)
173 else:
174 i += 1
175
176
Dan Albertffa5cbe2021-02-03 16:44:37 -0800177def do_dir(arg):
Elliott Hughes387d4b72012-08-09 15:17:46 -0700178 for directory, sub_directories, filenames in os.walk(arg):
179 if ".git" in sub_directories:
180 sub_directories.remove(".git")
181 sub_directories = sorted(sub_directories)
182
183 for filename in sorted(filenames):
184 path = os.path.join(directory, filename)
Elliott Hughesaac7c3a2017-07-14 10:00:32 -0700185 if is_interesting(path):
186 do_file(path)
Elliott Hughes387d4b72012-08-09 15:17:46 -0700187
Elliott Hughes387d4b72012-08-09 15:17:46 -0700188
Dan Albertffa5cbe2021-02-03 16:44:37 -0800189def main() -> None:
190 args = sys.argv[1:]
191 if len(args) == 0:
192 args = ["."]
Elliott Hughes387d4b72012-08-09 15:17:46 -0700193
Dan Albertffa5cbe2021-02-03 16:44:37 -0800194 for arg in args:
195 if os.path.isdir(arg):
196 do_dir(arg)
197 else:
198 do_file(arg)
Elliott Hughes387d4b72012-08-09 15:17:46 -0700199
Dan Albertffa5cbe2021-02-03 16:44:37 -0800200 for notice in sorted(copyrights):
201 print(notice)
202 print()
203 print("-" * 67)
204 print()
Elliott Hughes387d4b72012-08-09 15:17:46 -0700205
Dan Albertffa5cbe2021-02-03 16:44:37 -0800206
207if __name__ == "__main__":
208 main()