blob: b6deb9cd26973eda95725b669205358d9d633abc [file] [log] [blame]
Stephen Crane77bb5642017-08-31 15:08:26 -07001#!/usr/bin/env python
Elliott Hughes387d4b72012-08-09 15:17:46 -07002# Run with directory arguments from any directory, with no special setup required.
3
4import ftplib
5import hashlib
6import os
7import re
8import shutil
9import string
10import subprocess
11import sys
12import tarfile
13import tempfile
14
Elliott Hughesaac7c3a2017-07-14 10:00:32 -070015VERBOSE = False
Elliott Hughes387d4b72012-08-09 15:17:46 -070016
Elliott Hughesaac7c3a2017-07-14 10:00:32 -070017def warn(s):
18 sys.stderr.write("warning: %s\n" % s)
19
20def warn_verbose(s):
21 if VERBOSE:
22 warn(s)
23
24def is_interesting(path):
25 path = path.lower()
26 uninteresting_extensions = [
27 ".bp",
28 ".map",
Elliott Hughesc5db38a2020-06-15 17:26:58 -070029 ".md",
Elliott Hughesaac7c3a2017-07-14 10:00:32 -070030 ".mk",
31 ".py",
32 ".pyc",
33 ".swp",
34 ".txt",
35 ]
36 if os.path.splitext(path)[1] in uninteresting_extensions:
37 return False
Elliott Hughesc5db38a2020-06-15 17:26:58 -070038 if path.endswith("/notice") or path.endswith("/readme") or path.endswith("/pylintrc"):
Elliott Hughesaac7c3a2017-07-14 10:00:32 -070039 return False
40 return True
41
42def is_auto_generated(content):
Elliott Hughes22a0d6f2014-03-06 15:10:22 -080043 if "Generated by gensyscalls.py" in content or "generated by genserv.py" in content:
Elliott Hughes387d4b72012-08-09 15:17:46 -070044 return True
45 if "This header was automatically generated from a Linux kernel header" in content:
46 return True
47 return False
48
49copyrights = set()
50
Elliott Hughesaac7c3a2017-07-14 10:00:32 -070051def extract_copyright_at(lines, i):
Elliott Hughes387d4b72012-08-09 15:17:46 -070052 hash = lines[i].startswith("#")
53
Elliott Hughes261e2232012-08-14 15:04:05 -070054 # Do we need to back up to find the start of the copyright header?
55 start = i
56 if not hash:
57 while start > 0:
58 if "/*" in lines[start - 1]:
59 break
60 start -= 1
61
Elliott Hughes387d4b72012-08-09 15:17:46 -070062 # Read comment lines until we hit something that terminates a
63 # copyright header.
Elliott Hughes387d4b72012-08-09 15:17:46 -070064 while i < len(lines):
65 if "*/" in lines[i]:
66 break
67 if hash and len(lines[i]) == 0:
68 break
69 if "\t@(#)" in lines[i] or "\tfrom: @(#)" in lines[i] or "From: @(#)" in lines[i] or "from OpenBSD:" in lines[i]:
70 break
71 if "\tcitrus Id: " in lines[i]:
72 break
Elliott Hughesbfa582d2014-05-05 14:58:17 -070073 if "\t$Citrus: " in lines[i] or "\t$OpenBSD: " in lines[i] or " $FreeBSD: " in lines[i] or "\t$NetBSD: " in lines[i]:
Elliott Hughes387d4b72012-08-09 15:17:46 -070074 break
75 if "$FreeBSD$" in lines[i] or "$Citrus$" in lines[i]:
76 break
Elliott Hughesbfa582d2014-05-05 14:58:17 -070077 # OpenBSD likes to say where stuff originally came from:
78 if "Original version ID:" in lines[i]:
79 break
Elliott Hughes387d4b72012-08-09 15:17:46 -070080 i += 1
81
82 end = i
83
84 # Trim trailing cruft.
85 while end > 0:
86 if lines[end - 1] != " *" and lines[end - 1] != " * ====================================================":
87 break
88 end -= 1
89
90 # Remove C/assembler comment formatting, pulling out just the text.
91 clean_lines = []
92 for line in lines[start:end]:
93 line = line.replace("\t", " ")
94 line = line.replace("/* ", "")
Elliott Hughes3758a242014-07-22 21:24:47 -070095 line = re.sub("^ \* ", "", line)
Elliott Hughes387d4b72012-08-09 15:17:46 -070096 line = line.replace("** ", "")
97 line = line.replace("# ", "")
Elliott Hughesab528072018-07-24 00:01:52 +000098 if "SPDX-License-Identifier:" in line:
99 continue
Elliott Hughes387d4b72012-08-09 15:17:46 -0700100 if line.startswith("++Copyright++"):
101 continue
102 line = line.replace("--Copyright--", "")
103 line = line.rstrip()
104 # These come last and take care of "blank" comment lines.
105 if line == "#" or line == " *" or line == "**" or line == "-":
106 line = ""
107 clean_lines.append(line)
108
109 # Trim blank lines from head and tail.
110 while clean_lines[0] == "":
111 clean_lines = clean_lines[1:]
112 while clean_lines[len(clean_lines) - 1] == "":
113 clean_lines = clean_lines[0:(len(clean_lines) - 1)]
114
115 copyright = "\n".join(clean_lines)
116 copyrights.add(copyright)
117
118 return i
119
Elliott Hughes387d4b72012-08-09 15:17:46 -0700120
Elliott Hughesaac7c3a2017-07-14 10:00:32 -0700121def do_file(path):
122 with open(path, "r") as the_file:
123 try:
124 content = open(path, "r").read().decode("utf-8")
125 except UnicodeDecodeError:
126 warn("bad UTF-8 in %s" % path)
127 content = open(path, "r").read().decode("iso-8859-1")
Elliott Hughes387d4b72012-08-09 15:17:46 -0700128
Elliott Hughesaac7c3a2017-07-14 10:00:32 -0700129 lines = content.split("\n")
130
131 if len(lines) <= 4:
132 warn_verbose("ignoring short file %s" % path)
133 return
134
135 if is_auto_generated(content):
136 warn_verbose("ignoring auto-generated file %s" % path)
137 return
138
139 if not "Copyright" in content:
140 if "public domain" in content.lower():
Elliott Hughesc5db38a2020-06-15 17:26:58 -0700141 warn_verbose("ignoring public domain file %s" % path)
Elliott Hughesaac7c3a2017-07-14 10:00:32 -0700142 return
143 warn('no copyright notice found in "%s" (%d lines)' % (path, len(lines)))
144 return
145
146 # Manually iterate because extract_copyright_at tells us how many lines to skip.
147 i = 0
148 while i < len(lines):
149 if "Copyright" in lines[i] and not "@(#) Copyright" in lines[i]:
150 i = extract_copyright_at(lines, i)
151 else:
152 i += 1
153
154
155def do_dir(path):
Elliott Hughes387d4b72012-08-09 15:17:46 -0700156 for directory, sub_directories, filenames in os.walk(arg):
157 if ".git" in sub_directories:
158 sub_directories.remove(".git")
159 sub_directories = sorted(sub_directories)
160
161 for filename in sorted(filenames):
162 path = os.path.join(directory, filename)
Elliott Hughesaac7c3a2017-07-14 10:00:32 -0700163 if is_interesting(path):
164 do_file(path)
Elliott Hughes387d4b72012-08-09 15:17:46 -0700165
Elliott Hughes387d4b72012-08-09 15:17:46 -0700166
Elliott Hughesaac7c3a2017-07-14 10:00:32 -0700167args = sys.argv[1:]
168if len(args) == 0:
169 args = [ "." ]
Elliott Hughes387d4b72012-08-09 15:17:46 -0700170
Elliott Hughesaac7c3a2017-07-14 10:00:32 -0700171for arg in args:
172 if os.path.isdir(arg):
173 do_dir(arg)
174 else:
175 do_file(arg)
Elliott Hughes387d4b72012-08-09 15:17:46 -0700176
Elliott Hughes261e2232012-08-14 15:04:05 -0700177for copyright in sorted(copyrights):
Elliott Hughesaac7c3a2017-07-14 10:00:32 -0700178 print copyright.encode("utf-8")
Elliott Hughes387d4b72012-08-09 15:17:46 -0700179 print
Elliott Hughesaac7c3a2017-07-14 10:00:32 -0700180 print "-------------------------------------------------------------------"
Elliott Hughes387d4b72012-08-09 15:17:46 -0700181 print
182
183sys.exit(0)