Stephen Crane | 77bb564 | 2017-08-31 15:08:26 -0700 | [diff] [blame] | 1 | #!/usr/bin/env python |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 2 | # Run with directory arguments from any directory, with no special setup required. |
Elliott Hughes | 965b867 | 2012-09-13 16:51:57 -0700 | [diff] [blame] | 3 | # Or: |
Elliott Hughes | 6b2b585 | 2014-12-18 16:27:30 -0800 | [diff] [blame] | 4 | # for i in libc libdl libm linker libstdc++ ; do ./libc/tools/generate-NOTICE.py $i > $i/NOTICE ; done |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 5 | |
| 6 | import ftplib |
| 7 | import hashlib |
| 8 | import os |
| 9 | import re |
| 10 | import shutil |
| 11 | import string |
| 12 | import subprocess |
| 13 | import sys |
| 14 | import tarfile |
| 15 | import tempfile |
| 16 | |
Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 17 | VERBOSE = False |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 18 | |
Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 19 | def warn(s): |
| 20 | sys.stderr.write("warning: %s\n" % s) |
| 21 | |
| 22 | def warn_verbose(s): |
| 23 | if VERBOSE: |
| 24 | warn(s) |
| 25 | |
| 26 | def is_interesting(path): |
| 27 | path = path.lower() |
| 28 | uninteresting_extensions = [ |
| 29 | ".bp", |
| 30 | ".map", |
| 31 | ".mk", |
| 32 | ".py", |
| 33 | ".pyc", |
| 34 | ".swp", |
| 35 | ".txt", |
| 36 | ] |
| 37 | if os.path.splitext(path)[1] in uninteresting_extensions: |
| 38 | return False |
| 39 | if path.endswith("/notice") or path.endswith("/readme"): |
| 40 | return False |
| 41 | return True |
| 42 | |
| 43 | def is_auto_generated(content): |
Elliott Hughes | 22a0d6f | 2014-03-06 15:10:22 -0800 | [diff] [blame] | 44 | if "Generated by gensyscalls.py" in content or "generated by genserv.py" in content: |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 45 | return True |
| 46 | if "This header was automatically generated from a Linux kernel header" in content: |
| 47 | return True |
| 48 | return False |
| 49 | |
| 50 | copyrights = set() |
| 51 | |
Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 52 | def extract_copyright_at(lines, i): |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 53 | hash = lines[i].startswith("#") |
| 54 | |
Elliott Hughes | 261e223 | 2012-08-14 15:04:05 -0700 | [diff] [blame] | 55 | # Do we need to back up to find the start of the copyright header? |
| 56 | start = i |
| 57 | if not hash: |
| 58 | while start > 0: |
| 59 | if "/*" in lines[start - 1]: |
| 60 | break |
| 61 | start -= 1 |
| 62 | |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 63 | # Read comment lines until we hit something that terminates a |
| 64 | # copyright header. |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 65 | while i < len(lines): |
| 66 | if "*/" in lines[i]: |
| 67 | break |
| 68 | if hash and len(lines[i]) == 0: |
| 69 | break |
| 70 | if "\t@(#)" in lines[i] or "\tfrom: @(#)" in lines[i] or "From: @(#)" in lines[i] or "from OpenBSD:" in lines[i]: |
| 71 | break |
| 72 | if "\tcitrus Id: " in lines[i]: |
| 73 | break |
Elliott Hughes | bfa582d | 2014-05-05 14:58:17 -0700 | [diff] [blame] | 74 | if "\t$Citrus: " in lines[i] or "\t$OpenBSD: " in lines[i] or " $FreeBSD: " in lines[i] or "\t$NetBSD: " in lines[i]: |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 75 | break |
| 76 | if "$FreeBSD$" in lines[i] or "$Citrus$" in lines[i]: |
| 77 | break |
Elliott Hughes | bfa582d | 2014-05-05 14:58:17 -0700 | [diff] [blame] | 78 | # OpenBSD likes to say where stuff originally came from: |
| 79 | if "Original version ID:" in lines[i]: |
| 80 | break |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 81 | i += 1 |
| 82 | |
| 83 | end = i |
| 84 | |
| 85 | # Trim trailing cruft. |
| 86 | while end > 0: |
| 87 | if lines[end - 1] != " *" and lines[end - 1] != " * ====================================================": |
| 88 | break |
| 89 | end -= 1 |
| 90 | |
| 91 | # Remove C/assembler comment formatting, pulling out just the text. |
| 92 | clean_lines = [] |
| 93 | for line in lines[start:end]: |
| 94 | line = line.replace("\t", " ") |
| 95 | line = line.replace("/* ", "") |
Elliott Hughes | 3758a24 | 2014-07-22 21:24:47 -0700 | [diff] [blame] | 96 | line = re.sub("^ \* ", "", line) |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 97 | line = line.replace("** ", "") |
| 98 | line = line.replace("# ", "") |
Elliott Hughes | ab52807 | 2018-07-24 00:01:52 +0000 | [diff] [blame] | 99 | if "SPDX-License-Identifier:" in line: |
| 100 | continue |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 101 | if line.startswith("++Copyright++"): |
| 102 | continue |
| 103 | line = line.replace("--Copyright--", "") |
| 104 | line = line.rstrip() |
| 105 | # These come last and take care of "blank" comment lines. |
| 106 | if line == "#" or line == " *" or line == "**" or line == "-": |
| 107 | line = "" |
| 108 | clean_lines.append(line) |
| 109 | |
| 110 | # Trim blank lines from head and tail. |
| 111 | while clean_lines[0] == "": |
| 112 | clean_lines = clean_lines[1:] |
| 113 | while clean_lines[len(clean_lines) - 1] == "": |
| 114 | clean_lines = clean_lines[0:(len(clean_lines) - 1)] |
| 115 | |
| 116 | copyright = "\n".join(clean_lines) |
| 117 | copyrights.add(copyright) |
| 118 | |
| 119 | return i |
| 120 | |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 121 | |
Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 122 | def do_file(path): |
| 123 | with open(path, "r") as the_file: |
| 124 | try: |
| 125 | content = open(path, "r").read().decode("utf-8") |
| 126 | except UnicodeDecodeError: |
| 127 | warn("bad UTF-8 in %s" % path) |
| 128 | content = open(path, "r").read().decode("iso-8859-1") |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 129 | |
Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 130 | lines = content.split("\n") |
| 131 | |
| 132 | if len(lines) <= 4: |
| 133 | warn_verbose("ignoring short file %s" % path) |
| 134 | return |
| 135 | |
| 136 | if is_auto_generated(content): |
| 137 | warn_verbose("ignoring auto-generated file %s" % path) |
| 138 | return |
| 139 | |
| 140 | if not "Copyright" in content: |
| 141 | if "public domain" in content.lower(): |
| 142 | warn("ignoring public domain file %s" % path) |
| 143 | return |
| 144 | warn('no copyright notice found in "%s" (%d lines)' % (path, len(lines))) |
| 145 | return |
| 146 | |
| 147 | # Manually iterate because extract_copyright_at tells us how many lines to skip. |
| 148 | i = 0 |
| 149 | while i < len(lines): |
| 150 | if "Copyright" in lines[i] and not "@(#) Copyright" in lines[i]: |
| 151 | i = extract_copyright_at(lines, i) |
| 152 | else: |
| 153 | i += 1 |
| 154 | |
| 155 | |
| 156 | def do_dir(path): |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 157 | for directory, sub_directories, filenames in os.walk(arg): |
| 158 | if ".git" in sub_directories: |
| 159 | sub_directories.remove(".git") |
| 160 | sub_directories = sorted(sub_directories) |
| 161 | |
| 162 | for filename in sorted(filenames): |
| 163 | path = os.path.join(directory, filename) |
Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 164 | if is_interesting(path): |
| 165 | do_file(path) |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 166 | |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 167 | |
Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 168 | args = sys.argv[1:] |
| 169 | if len(args) == 0: |
| 170 | args = [ "." ] |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 171 | |
Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 172 | for arg in args: |
| 173 | if os.path.isdir(arg): |
| 174 | do_dir(arg) |
| 175 | else: |
| 176 | do_file(arg) |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 177 | |
Elliott Hughes | 261e223 | 2012-08-14 15:04:05 -0700 | [diff] [blame] | 178 | for copyright in sorted(copyrights): |
Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 179 | print copyright.encode("utf-8") |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 180 | print |
Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 181 | print "-------------------------------------------------------------------" |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 182 | print |
| 183 | |
| 184 | sys.exit(0) |