blob: 17429e1322f405d5ecb9316f1fcf6d18cc9bbc97 [file] [log] [blame]
Stephen Crane77bb5642017-08-31 15:08:26 -07001#!/usr/bin/env python
Elliott Hughes387d4b72012-08-09 15:17:46 -07002# Run with directory arguments from any directory, with no special setup required.
Elliott Hughes965b8672012-09-13 16:51:57 -07003# Or:
Elliott Hughes6b2b5852014-12-18 16:27:30 -08004# for i in libc libdl libm linker libstdc++ ; do ./libc/tools/generate-NOTICE.py $i > $i/NOTICE ; done
Elliott Hughes387d4b72012-08-09 15:17:46 -07005
6import ftplib
7import hashlib
8import os
9import re
10import shutil
11import string
12import subprocess
13import sys
14import tarfile
15import tempfile
16
Elliott Hughesaac7c3a2017-07-14 10:00:32 -070017VERBOSE = False
Elliott Hughes387d4b72012-08-09 15:17:46 -070018
Elliott Hughesaac7c3a2017-07-14 10:00:32 -070019def warn(s):
20 sys.stderr.write("warning: %s\n" % s)
21
22def warn_verbose(s):
23 if VERBOSE:
24 warn(s)
25
26def is_interesting(path):
27 path = path.lower()
28 uninteresting_extensions = [
29 ".bp",
30 ".map",
31 ".mk",
32 ".py",
33 ".pyc",
34 ".swp",
35 ".txt",
36 ]
37 if os.path.splitext(path)[1] in uninteresting_extensions:
38 return False
39 if path.endswith("/notice") or path.endswith("/readme"):
40 return False
41 return True
42
43def is_auto_generated(content):
Elliott Hughes22a0d6f2014-03-06 15:10:22 -080044 if "Generated by gensyscalls.py" in content or "generated by genserv.py" in content:
Elliott Hughes387d4b72012-08-09 15:17:46 -070045 return True
46 if "This header was automatically generated from a Linux kernel header" in content:
47 return True
48 return False
49
50copyrights = set()
51
Elliott Hughesaac7c3a2017-07-14 10:00:32 -070052def extract_copyright_at(lines, i):
Elliott Hughes387d4b72012-08-09 15:17:46 -070053 hash = lines[i].startswith("#")
54
Elliott Hughes261e2232012-08-14 15:04:05 -070055 # Do we need to back up to find the start of the copyright header?
56 start = i
57 if not hash:
58 while start > 0:
59 if "/*" in lines[start - 1]:
60 break
61 start -= 1
62
Elliott Hughes387d4b72012-08-09 15:17:46 -070063 # Read comment lines until we hit something that terminates a
64 # copyright header.
Elliott Hughes387d4b72012-08-09 15:17:46 -070065 while i < len(lines):
66 if "*/" in lines[i]:
67 break
68 if hash and len(lines[i]) == 0:
69 break
70 if "\t@(#)" in lines[i] or "\tfrom: @(#)" in lines[i] or "From: @(#)" in lines[i] or "from OpenBSD:" in lines[i]:
71 break
72 if "\tcitrus Id: " in lines[i]:
73 break
Elliott Hughesbfa582d2014-05-05 14:58:17 -070074 if "\t$Citrus: " in lines[i] or "\t$OpenBSD: " in lines[i] or " $FreeBSD: " in lines[i] or "\t$NetBSD: " in lines[i]:
Elliott Hughes387d4b72012-08-09 15:17:46 -070075 break
76 if "$FreeBSD$" in lines[i] or "$Citrus$" in lines[i]:
77 break
Elliott Hughesbfa582d2014-05-05 14:58:17 -070078 # OpenBSD likes to say where stuff originally came from:
79 if "Original version ID:" in lines[i]:
80 break
Elliott Hughes387d4b72012-08-09 15:17:46 -070081 i += 1
82
83 end = i
84
85 # Trim trailing cruft.
86 while end > 0:
87 if lines[end - 1] != " *" and lines[end - 1] != " * ====================================================":
88 break
89 end -= 1
90
91 # Remove C/assembler comment formatting, pulling out just the text.
92 clean_lines = []
93 for line in lines[start:end]:
94 line = line.replace("\t", " ")
95 line = line.replace("/* ", "")
Elliott Hughes3758a242014-07-22 21:24:47 -070096 line = re.sub("^ \* ", "", line)
Elliott Hughes387d4b72012-08-09 15:17:46 -070097 line = line.replace("** ", "")
98 line = line.replace("# ", "")
Elliott Hughesab528072018-07-24 00:01:52 +000099 if "SPDX-License-Identifier:" in line:
100 continue
Elliott Hughes387d4b72012-08-09 15:17:46 -0700101 if line.startswith("++Copyright++"):
102 continue
103 line = line.replace("--Copyright--", "")
104 line = line.rstrip()
105 # These come last and take care of "blank" comment lines.
106 if line == "#" or line == " *" or line == "**" or line == "-":
107 line = ""
108 clean_lines.append(line)
109
110 # Trim blank lines from head and tail.
111 while clean_lines[0] == "":
112 clean_lines = clean_lines[1:]
113 while clean_lines[len(clean_lines) - 1] == "":
114 clean_lines = clean_lines[0:(len(clean_lines) - 1)]
115
116 copyright = "\n".join(clean_lines)
117 copyrights.add(copyright)
118
119 return i
120
Elliott Hughes387d4b72012-08-09 15:17:46 -0700121
Elliott Hughesaac7c3a2017-07-14 10:00:32 -0700122def do_file(path):
123 with open(path, "r") as the_file:
124 try:
125 content = open(path, "r").read().decode("utf-8")
126 except UnicodeDecodeError:
127 warn("bad UTF-8 in %s" % path)
128 content = open(path, "r").read().decode("iso-8859-1")
Elliott Hughes387d4b72012-08-09 15:17:46 -0700129
Elliott Hughesaac7c3a2017-07-14 10:00:32 -0700130 lines = content.split("\n")
131
132 if len(lines) <= 4:
133 warn_verbose("ignoring short file %s" % path)
134 return
135
136 if is_auto_generated(content):
137 warn_verbose("ignoring auto-generated file %s" % path)
138 return
139
140 if not "Copyright" in content:
141 if "public domain" in content.lower():
142 warn("ignoring public domain file %s" % path)
143 return
144 warn('no copyright notice found in "%s" (%d lines)' % (path, len(lines)))
145 return
146
147 # Manually iterate because extract_copyright_at tells us how many lines to skip.
148 i = 0
149 while i < len(lines):
150 if "Copyright" in lines[i] and not "@(#) Copyright" in lines[i]:
151 i = extract_copyright_at(lines, i)
152 else:
153 i += 1
154
155
156def do_dir(path):
Elliott Hughes387d4b72012-08-09 15:17:46 -0700157 for directory, sub_directories, filenames in os.walk(arg):
158 if ".git" in sub_directories:
159 sub_directories.remove(".git")
160 sub_directories = sorted(sub_directories)
161
162 for filename in sorted(filenames):
163 path = os.path.join(directory, filename)
Elliott Hughesaac7c3a2017-07-14 10:00:32 -0700164 if is_interesting(path):
165 do_file(path)
Elliott Hughes387d4b72012-08-09 15:17:46 -0700166
Elliott Hughes387d4b72012-08-09 15:17:46 -0700167
Elliott Hughesaac7c3a2017-07-14 10:00:32 -0700168args = sys.argv[1:]
169if len(args) == 0:
170 args = [ "." ]
Elliott Hughes387d4b72012-08-09 15:17:46 -0700171
Elliott Hughesaac7c3a2017-07-14 10:00:32 -0700172for arg in args:
173 if os.path.isdir(arg):
174 do_dir(arg)
175 else:
176 do_file(arg)
Elliott Hughes387d4b72012-08-09 15:17:46 -0700177
Elliott Hughes261e2232012-08-14 15:04:05 -0700178for copyright in sorted(copyrights):
Elliott Hughesaac7c3a2017-07-14 10:00:32 -0700179 print copyright.encode("utf-8")
Elliott Hughes387d4b72012-08-09 15:17:46 -0700180 print
Elliott Hughesaac7c3a2017-07-14 10:00:32 -0700181 print "-------------------------------------------------------------------"
Elliott Hughes387d4b72012-08-09 15:17:46 -0700182 print
183
184sys.exit(0)