blob: 72184454cf9d8ef147ba078e9861bfe53283bfb8 [file] [log] [blame]
Stephen Crane77bb5642017-08-31 15:08:26 -07001#!/usr/bin/env python
Elliott Hughes387d4b72012-08-09 15:17:46 -07002# Run with directory arguments from any directory, with no special setup required.
3
4import ftplib
5import hashlib
6import os
7import re
8import shutil
9import string
10import subprocess
11import sys
12import tarfile
13import tempfile
14
Elliott Hughesaac7c3a2017-07-14 10:00:32 -070015VERBOSE = False
Elliott Hughes387d4b72012-08-09 15:17:46 -070016
Elliott Hughesaac7c3a2017-07-14 10:00:32 -070017def warn(s):
18 sys.stderr.write("warning: %s\n" % s)
19
20def warn_verbose(s):
21 if VERBOSE:
22 warn(s)
23
24def is_interesting(path):
25 path = path.lower()
26 uninteresting_extensions = [
27 ".bp",
28 ".map",
29 ".mk",
30 ".py",
31 ".pyc",
32 ".swp",
33 ".txt",
34 ]
35 if os.path.splitext(path)[1] in uninteresting_extensions:
36 return False
37 if path.endswith("/notice") or path.endswith("/readme"):
38 return False
39 return True
40
41def is_auto_generated(content):
Elliott Hughes22a0d6f2014-03-06 15:10:22 -080042 if "Generated by gensyscalls.py" in content or "generated by genserv.py" in content:
Elliott Hughes387d4b72012-08-09 15:17:46 -070043 return True
44 if "This header was automatically generated from a Linux kernel header" in content:
45 return True
46 return False
47
48copyrights = set()
49
Elliott Hughesaac7c3a2017-07-14 10:00:32 -070050def extract_copyright_at(lines, i):
Elliott Hughes387d4b72012-08-09 15:17:46 -070051 hash = lines[i].startswith("#")
52
Elliott Hughes261e2232012-08-14 15:04:05 -070053 # Do we need to back up to find the start of the copyright header?
54 start = i
55 if not hash:
56 while start > 0:
57 if "/*" in lines[start - 1]:
58 break
59 start -= 1
60
Elliott Hughes387d4b72012-08-09 15:17:46 -070061 # Read comment lines until we hit something that terminates a
62 # copyright header.
Elliott Hughes387d4b72012-08-09 15:17:46 -070063 while i < len(lines):
64 if "*/" in lines[i]:
65 break
66 if hash and len(lines[i]) == 0:
67 break
68 if "\t@(#)" in lines[i] or "\tfrom: @(#)" in lines[i] or "From: @(#)" in lines[i] or "from OpenBSD:" in lines[i]:
69 break
70 if "\tcitrus Id: " in lines[i]:
71 break
Elliott Hughesbfa582d2014-05-05 14:58:17 -070072 if "\t$Citrus: " in lines[i] or "\t$OpenBSD: " in lines[i] or " $FreeBSD: " in lines[i] or "\t$NetBSD: " in lines[i]:
Elliott Hughes387d4b72012-08-09 15:17:46 -070073 break
74 if "$FreeBSD$" in lines[i] or "$Citrus$" in lines[i]:
75 break
Elliott Hughesbfa582d2014-05-05 14:58:17 -070076 # OpenBSD likes to say where stuff originally came from:
77 if "Original version ID:" in lines[i]:
78 break
Elliott Hughes387d4b72012-08-09 15:17:46 -070079 i += 1
80
81 end = i
82
83 # Trim trailing cruft.
84 while end > 0:
85 if lines[end - 1] != " *" and lines[end - 1] != " * ====================================================":
86 break
87 end -= 1
88
89 # Remove C/assembler comment formatting, pulling out just the text.
90 clean_lines = []
91 for line in lines[start:end]:
92 line = line.replace("\t", " ")
93 line = line.replace("/* ", "")
Elliott Hughes3758a242014-07-22 21:24:47 -070094 line = re.sub("^ \* ", "", line)
Elliott Hughes387d4b72012-08-09 15:17:46 -070095 line = line.replace("** ", "")
96 line = line.replace("# ", "")
Elliott Hughesab528072018-07-24 00:01:52 +000097 if "SPDX-License-Identifier:" in line:
98 continue
Elliott Hughes387d4b72012-08-09 15:17:46 -070099 if line.startswith("++Copyright++"):
100 continue
101 line = line.replace("--Copyright--", "")
102 line = line.rstrip()
103 # These come last and take care of "blank" comment lines.
104 if line == "#" or line == " *" or line == "**" or line == "-":
105 line = ""
106 clean_lines.append(line)
107
108 # Trim blank lines from head and tail.
109 while clean_lines[0] == "":
110 clean_lines = clean_lines[1:]
111 while clean_lines[len(clean_lines) - 1] == "":
112 clean_lines = clean_lines[0:(len(clean_lines) - 1)]
113
114 copyright = "\n".join(clean_lines)
115 copyrights.add(copyright)
116
117 return i
118
Elliott Hughes387d4b72012-08-09 15:17:46 -0700119
Elliott Hughesaac7c3a2017-07-14 10:00:32 -0700120def do_file(path):
121 with open(path, "r") as the_file:
122 try:
123 content = open(path, "r").read().decode("utf-8")
124 except UnicodeDecodeError:
125 warn("bad UTF-8 in %s" % path)
126 content = open(path, "r").read().decode("iso-8859-1")
Elliott Hughes387d4b72012-08-09 15:17:46 -0700127
Elliott Hughesaac7c3a2017-07-14 10:00:32 -0700128 lines = content.split("\n")
129
130 if len(lines) <= 4:
131 warn_verbose("ignoring short file %s" % path)
132 return
133
134 if is_auto_generated(content):
135 warn_verbose("ignoring auto-generated file %s" % path)
136 return
137
138 if not "Copyright" in content:
139 if "public domain" in content.lower():
140 warn("ignoring public domain file %s" % path)
141 return
142 warn('no copyright notice found in "%s" (%d lines)' % (path, len(lines)))
143 return
144
145 # Manually iterate because extract_copyright_at tells us how many lines to skip.
146 i = 0
147 while i < len(lines):
148 if "Copyright" in lines[i] and not "@(#) Copyright" in lines[i]:
149 i = extract_copyright_at(lines, i)
150 else:
151 i += 1
152
153
154def do_dir(path):
Elliott Hughes387d4b72012-08-09 15:17:46 -0700155 for directory, sub_directories, filenames in os.walk(arg):
156 if ".git" in sub_directories:
157 sub_directories.remove(".git")
158 sub_directories = sorted(sub_directories)
159
160 for filename in sorted(filenames):
161 path = os.path.join(directory, filename)
Elliott Hughesaac7c3a2017-07-14 10:00:32 -0700162 if is_interesting(path):
163 do_file(path)
Elliott Hughes387d4b72012-08-09 15:17:46 -0700164
Elliott Hughes387d4b72012-08-09 15:17:46 -0700165
Elliott Hughesaac7c3a2017-07-14 10:00:32 -0700166args = sys.argv[1:]
167if len(args) == 0:
168 args = [ "." ]
Elliott Hughes387d4b72012-08-09 15:17:46 -0700169
Elliott Hughesaac7c3a2017-07-14 10:00:32 -0700170for arg in args:
171 if os.path.isdir(arg):
172 do_dir(arg)
173 else:
174 do_file(arg)
Elliott Hughes387d4b72012-08-09 15:17:46 -0700175
Elliott Hughes261e2232012-08-14 15:04:05 -0700176for copyright in sorted(copyrights):
Elliott Hughesaac7c3a2017-07-14 10:00:32 -0700177 print copyright.encode("utf-8")
Elliott Hughes387d4b72012-08-09 15:17:46 -0700178 print
Elliott Hughesaac7c3a2017-07-14 10:00:32 -0700179 print "-------------------------------------------------------------------"
Elliott Hughes387d4b72012-08-09 15:17:46 -0700180 print
181
182sys.exit(0)