| Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 1 | #!/usr/bin/python | 
|  | 2 | # Run with directory arguments from any directory, with no special setup required. | 
| Elliott Hughes | 965b867 | 2012-09-13 16:51:57 -0700 | [diff] [blame] | 3 | # Or: | 
| Elliott Hughes | 6b2b585 | 2014-12-18 16:27:30 -0800 | [diff] [blame] | 4 | # for i in libc libdl libm linker libstdc++ ; do ./libc/tools/generate-NOTICE.py $i > $i/NOTICE ; done | 
| Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 5 |  | 
|  | 6 | import ftplib | 
|  | 7 | import hashlib | 
|  | 8 | import os | 
|  | 9 | import re | 
|  | 10 | import shutil | 
|  | 11 | import string | 
|  | 12 | import subprocess | 
|  | 13 | import sys | 
|  | 14 | import tarfile | 
|  | 15 | import tempfile | 
|  | 16 |  | 
| Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 17 | VERBOSE = False | 
| Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 18 |  | 
| Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 19 | def warn(s): | 
|  | 20 | sys.stderr.write("warning: %s\n" % s) | 
|  | 21 |  | 
|  | 22 | def warn_verbose(s): | 
|  | 23 | if VERBOSE: | 
|  | 24 | warn(s) | 
|  | 25 |  | 
|  | 26 | def is_interesting(path): | 
|  | 27 | path = path.lower() | 
|  | 28 | uninteresting_extensions = [ | 
|  | 29 | ".bp", | 
|  | 30 | ".map", | 
|  | 31 | ".mk", | 
|  | 32 | ".py", | 
|  | 33 | ".pyc", | 
|  | 34 | ".swp", | 
|  | 35 | ".txt", | 
|  | 36 | ] | 
|  | 37 | if os.path.splitext(path)[1] in uninteresting_extensions: | 
|  | 38 | return False | 
|  | 39 | if path.endswith("/notice") or path.endswith("/readme"): | 
|  | 40 | return False | 
|  | 41 | return True | 
|  | 42 |  | 
|  | 43 | def is_auto_generated(content): | 
| Elliott Hughes | 22a0d6f | 2014-03-06 15:10:22 -0800 | [diff] [blame] | 44 | if "Generated by gensyscalls.py" in content or "generated by genserv.py" in content: | 
| Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 45 | return True | 
|  | 46 | if "This header was automatically generated from a Linux kernel header" in content: | 
|  | 47 | return True | 
|  | 48 | return False | 
|  | 49 |  | 
|  | 50 | copyrights = set() | 
|  | 51 |  | 
| Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 52 | def extract_copyright_at(lines, i): | 
| Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 53 | hash = lines[i].startswith("#") | 
|  | 54 |  | 
| Elliott Hughes | 261e223 | 2012-08-14 15:04:05 -0700 | [diff] [blame] | 55 | # Do we need to back up to find the start of the copyright header? | 
|  | 56 | start = i | 
|  | 57 | if not hash: | 
|  | 58 | while start > 0: | 
|  | 59 | if "/*" in lines[start - 1]: | 
|  | 60 | break | 
|  | 61 | start -= 1 | 
|  | 62 |  | 
| Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 63 | # Read comment lines until we hit something that terminates a | 
|  | 64 | # copyright header. | 
| Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 65 | while i < len(lines): | 
|  | 66 | if "*/" in lines[i]: | 
|  | 67 | break | 
|  | 68 | if hash and len(lines[i]) == 0: | 
|  | 69 | break | 
|  | 70 | if "\t@(#)" in lines[i] or "\tfrom: @(#)" in lines[i] or "From: @(#)" in lines[i] or "from OpenBSD:" in lines[i]: | 
|  | 71 | break | 
|  | 72 | if "\tcitrus Id: " in lines[i]: | 
|  | 73 | break | 
| Elliott Hughes | bfa582d | 2014-05-05 14:58:17 -0700 | [diff] [blame] | 74 | if "\t$Citrus: " in lines[i] or "\t$OpenBSD: " in lines[i] or " $FreeBSD: " in lines[i] or "\t$NetBSD: " in lines[i]: | 
| Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 75 | break | 
|  | 76 | if "$FreeBSD$" in lines[i] or "$Citrus$" in lines[i]: | 
|  | 77 | break | 
| Elliott Hughes | bfa582d | 2014-05-05 14:58:17 -0700 | [diff] [blame] | 78 | # OpenBSD likes to say where stuff originally came from: | 
|  | 79 | if "Original version ID:" in lines[i]: | 
|  | 80 | break | 
| Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 81 | i += 1 | 
|  | 82 |  | 
|  | 83 | end = i | 
|  | 84 |  | 
|  | 85 | # Trim trailing cruft. | 
|  | 86 | while end > 0: | 
|  | 87 | if lines[end - 1] != " *" and lines[end - 1] != " * ====================================================": | 
|  | 88 | break | 
|  | 89 | end -= 1 | 
|  | 90 |  | 
|  | 91 | # Remove C/assembler comment formatting, pulling out just the text. | 
|  | 92 | clean_lines = [] | 
|  | 93 | for line in lines[start:end]: | 
|  | 94 | line = line.replace("\t", "    ") | 
|  | 95 | line = line.replace("/* ", "") | 
| Elliott Hughes | 3758a24 | 2014-07-22 21:24:47 -0700 | [diff] [blame] | 96 | line = re.sub("^ \* ", "", line) | 
| Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 97 | line = line.replace("** ", "") | 
|  | 98 | line = line.replace("# ", "") | 
|  | 99 | if line.startswith("++Copyright++"): | 
|  | 100 | continue | 
|  | 101 | line = line.replace("--Copyright--", "") | 
|  | 102 | line = line.rstrip() | 
|  | 103 | # These come last and take care of "blank" comment lines. | 
|  | 104 | if line == "#" or line == " *" or line == "**" or line == "-": | 
|  | 105 | line = "" | 
|  | 106 | clean_lines.append(line) | 
|  | 107 |  | 
|  | 108 | # Trim blank lines from head and tail. | 
|  | 109 | while clean_lines[0] == "": | 
|  | 110 | clean_lines = clean_lines[1:] | 
|  | 111 | while clean_lines[len(clean_lines) - 1] == "": | 
|  | 112 | clean_lines = clean_lines[0:(len(clean_lines) - 1)] | 
|  | 113 |  | 
|  | 114 | copyright = "\n".join(clean_lines) | 
|  | 115 | copyrights.add(copyright) | 
|  | 116 |  | 
|  | 117 | return i | 
|  | 118 |  | 
| Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 119 |  | 
| Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 120 | def do_file(path): | 
|  | 121 | with open(path, "r") as the_file: | 
|  | 122 | try: | 
|  | 123 | content = open(path, "r").read().decode("utf-8") | 
|  | 124 | except UnicodeDecodeError: | 
|  | 125 | warn("bad UTF-8 in %s" % path) | 
|  | 126 | content = open(path, "r").read().decode("iso-8859-1") | 
| Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 127 |  | 
| Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 128 | lines = content.split("\n") | 
|  | 129 |  | 
|  | 130 | if len(lines) <= 4: | 
|  | 131 | warn_verbose("ignoring short file %s" % path) | 
|  | 132 | return | 
|  | 133 |  | 
|  | 134 | if is_auto_generated(content): | 
|  | 135 | warn_verbose("ignoring auto-generated file %s" % path) | 
|  | 136 | return | 
|  | 137 |  | 
|  | 138 | if not "Copyright" in content: | 
|  | 139 | if "public domain" in content.lower(): | 
|  | 140 | warn("ignoring public domain file %s" % path) | 
|  | 141 | return | 
|  | 142 | warn('no copyright notice found in "%s" (%d lines)' % (path, len(lines))) | 
|  | 143 | return | 
|  | 144 |  | 
|  | 145 | # Manually iterate because extract_copyright_at tells us how many lines to skip. | 
|  | 146 | i = 0 | 
|  | 147 | while i < len(lines): | 
|  | 148 | if "Copyright" in lines[i] and not "@(#) Copyright" in lines[i]: | 
|  | 149 | i = extract_copyright_at(lines, i) | 
|  | 150 | else: | 
|  | 151 | i += 1 | 
|  | 152 |  | 
|  | 153 |  | 
|  | 154 | def do_dir(path): | 
| Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 155 | for directory, sub_directories, filenames in os.walk(arg): | 
|  | 156 | if ".git" in sub_directories: | 
|  | 157 | sub_directories.remove(".git") | 
|  | 158 | sub_directories = sorted(sub_directories) | 
|  | 159 |  | 
|  | 160 | for filename in sorted(filenames): | 
|  | 161 | path = os.path.join(directory, filename) | 
| Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 162 | if is_interesting(path): | 
|  | 163 | do_file(path) | 
| Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 164 |  | 
| Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 165 |  | 
| Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 166 | args = sys.argv[1:] | 
|  | 167 | if len(args) == 0: | 
|  | 168 | args = [ "." ] | 
| Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 169 |  | 
| Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 170 | for arg in args: | 
|  | 171 | if os.path.isdir(arg): | 
|  | 172 | do_dir(arg) | 
|  | 173 | else: | 
|  | 174 | do_file(arg) | 
| Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 175 |  | 
| Elliott Hughes | 261e223 | 2012-08-14 15:04:05 -0700 | [diff] [blame] | 176 | for copyright in sorted(copyrights): | 
| Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 177 | print copyright.encode("utf-8") | 
| Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 178 | print | 
| Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 179 | print "-------------------------------------------------------------------" | 
| Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 180 | print | 
|  | 181 |  | 
|  | 182 | sys.exit(0) |