| Stephen Crane | 77bb564 | 2017-08-31 15:08:26 -0700 | [diff] [blame] | 1 | #!/usr/bin/env python | 
| Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 2 | # Run with directory arguments from any directory, with no special setup required. | 
 | 3 |  | 
 | 4 | import ftplib | 
 | 5 | import hashlib | 
 | 6 | import os | 
 | 7 | import re | 
 | 8 | import shutil | 
 | 9 | import string | 
 | 10 | import subprocess | 
 | 11 | import sys | 
 | 12 | import tarfile | 
 | 13 | import tempfile | 
 | 14 |  | 
| Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 15 | VERBOSE = False | 
| Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 16 |  | 
| Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 17 | def warn(s): | 
 | 18 |     sys.stderr.write("warning: %s\n" % s) | 
 | 19 |  | 
 | 20 | def warn_verbose(s): | 
 | 21 |     if VERBOSE: | 
 | 22 |         warn(s) | 
 | 23 |  | 
 | 24 | def is_interesting(path): | 
 | 25 |     path = path.lower() | 
 | 26 |     uninteresting_extensions = [ | 
 | 27 |         ".bp", | 
 | 28 |         ".map", | 
| Elliott Hughes | c5db38a | 2020-06-15 17:26:58 -0700 | [diff] [blame] | 29 |         ".md", | 
| Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 30 |         ".mk", | 
 | 31 |         ".py", | 
 | 32 |         ".pyc", | 
 | 33 |         ".swp", | 
 | 34 |         ".txt", | 
 | 35 |     ] | 
 | 36 |     if os.path.splitext(path)[1] in uninteresting_extensions: | 
 | 37 |         return False | 
| Elliott Hughes | c5db38a | 2020-06-15 17:26:58 -0700 | [diff] [blame] | 38 |     if path.endswith("/notice") or path.endswith("/readme") or path.endswith("/pylintrc"): | 
| Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 39 |         return False | 
 | 40 |     return True | 
 | 41 |  | 
 | 42 | def is_auto_generated(content): | 
| Elliott Hughes | 22a0d6f | 2014-03-06 15:10:22 -0800 | [diff] [blame] | 43 |     if "Generated by gensyscalls.py" in content or "generated by genserv.py" in content: | 
| Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 44 |         return True | 
 | 45 |     if "This header was automatically generated from a Linux kernel header" in content: | 
 | 46 |         return True | 
 | 47 |     return False | 
 | 48 |  | 
 | 49 | copyrights = set() | 
 | 50 |  | 
| Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 51 | def extract_copyright_at(lines, i): | 
| Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 52 |     hash = lines[i].startswith("#") | 
 | 53 |  | 
| Elliott Hughes | 261e223 | 2012-08-14 15:04:05 -0700 | [diff] [blame] | 54 |     # Do we need to back up to find the start of the copyright header? | 
 | 55 |     start = i | 
 | 56 |     if not hash: | 
 | 57 |         while start > 0: | 
 | 58 |             if "/*" in lines[start - 1]: | 
 | 59 |                 break | 
 | 60 |             start -= 1 | 
 | 61 |  | 
| Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 62 |     # Read comment lines until we hit something that terminates a | 
 | 63 |     # copyright header. | 
| Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 64 |     while i < len(lines): | 
 | 65 |         if "*/" in lines[i]: | 
 | 66 |             break | 
 | 67 |         if hash and len(lines[i]) == 0: | 
 | 68 |             break | 
 | 69 |         if "\t@(#)" in lines[i] or "\tfrom: @(#)" in lines[i] or "From: @(#)" in lines[i] or "from OpenBSD:" in lines[i]: | 
 | 70 |             break | 
 | 71 |         if "\tcitrus Id: " in lines[i]: | 
 | 72 |             break | 
| Elliott Hughes | bfa582d | 2014-05-05 14:58:17 -0700 | [diff] [blame] | 73 |         if "\t$Citrus: " in lines[i] or "\t$OpenBSD: " in lines[i] or " $FreeBSD: " in lines[i] or "\t$NetBSD: " in lines[i]: | 
| Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 74 |             break | 
 | 75 |         if "$FreeBSD$" in lines[i] or "$Citrus$" in lines[i]: | 
 | 76 |             break | 
| Elliott Hughes | bfa582d | 2014-05-05 14:58:17 -0700 | [diff] [blame] | 77 |         # OpenBSD likes to say where stuff originally came from: | 
 | 78 |         if "Original version ID:" in lines[i]: | 
 | 79 |             break | 
| Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 80 |         i += 1 | 
 | 81 |  | 
 | 82 |     end = i | 
 | 83 |  | 
 | 84 |     # Trim trailing cruft. | 
 | 85 |     while end > 0: | 
 | 86 |         if lines[end - 1] != " *" and lines[end - 1] != " * ====================================================": | 
 | 87 |             break | 
 | 88 |         end -= 1 | 
 | 89 |  | 
 | 90 |     # Remove C/assembler comment formatting, pulling out just the text. | 
 | 91 |     clean_lines = [] | 
 | 92 |     for line in lines[start:end]: | 
 | 93 |         line = line.replace("\t", "    ") | 
 | 94 |         line = line.replace("/* ", "") | 
| Elliott Hughes | 3758a24 | 2014-07-22 21:24:47 -0700 | [diff] [blame] | 95 |         line = re.sub("^ \* ", "", line) | 
| Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 96 |         line = line.replace("** ", "") | 
 | 97 |         line = line.replace("# ", "") | 
| Elliott Hughes | ab52807 | 2018-07-24 00:01:52 +0000 | [diff] [blame] | 98 |         if "SPDX-License-Identifier:" in line: | 
 | 99 |             continue | 
| Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 100 |         if line.startswith("++Copyright++"): | 
 | 101 |             continue | 
 | 102 |         line = line.replace("--Copyright--", "") | 
 | 103 |         line = line.rstrip() | 
 | 104 |         # These come last and take care of "blank" comment lines. | 
 | 105 |         if line == "#" or line == " *" or line == "**" or line == "-": | 
 | 106 |             line = "" | 
 | 107 |         clean_lines.append(line) | 
 | 108 |  | 
 | 109 |     # Trim blank lines from head and tail. | 
 | 110 |     while clean_lines[0] == "": | 
 | 111 |         clean_lines = clean_lines[1:] | 
 | 112 |     while clean_lines[len(clean_lines) - 1] == "": | 
 | 113 |         clean_lines = clean_lines[0:(len(clean_lines) - 1)] | 
 | 114 |  | 
 | 115 |     copyright = "\n".join(clean_lines) | 
 | 116 |     copyrights.add(copyright) | 
 | 117 |  | 
 | 118 |     return i | 
 | 119 |  | 
| Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 120 |  | 
| Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 121 | def do_file(path): | 
 | 122 |     with open(path, "r") as the_file: | 
 | 123 |         try: | 
 | 124 |             content = open(path, "r").read().decode("utf-8") | 
 | 125 |         except UnicodeDecodeError: | 
 | 126 |             warn("bad UTF-8 in %s" % path) | 
 | 127 |             content = open(path, "r").read().decode("iso-8859-1") | 
| Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 128 |  | 
| Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 129 |     lines = content.split("\n") | 
 | 130 |  | 
 | 131 |     if len(lines) <= 4: | 
 | 132 |         warn_verbose("ignoring short file %s" % path) | 
 | 133 |         return | 
 | 134 |  | 
 | 135 |     if is_auto_generated(content): | 
 | 136 |         warn_verbose("ignoring auto-generated file %s" % path) | 
 | 137 |         return | 
 | 138 |  | 
 | 139 |     if not "Copyright" in content: | 
 | 140 |         if "public domain" in content.lower(): | 
| Elliott Hughes | c5db38a | 2020-06-15 17:26:58 -0700 | [diff] [blame] | 141 |             warn_verbose("ignoring public domain file %s" % path) | 
| Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 142 |             return | 
 | 143 |         warn('no copyright notice found in "%s" (%d lines)' % (path, len(lines))) | 
 | 144 |         return | 
 | 145 |  | 
 | 146 |     # Manually iterate because extract_copyright_at tells us how many lines to skip. | 
 | 147 |     i = 0 | 
 | 148 |     while i < len(lines): | 
 | 149 |         if "Copyright" in lines[i] and not "@(#) Copyright" in lines[i]: | 
 | 150 |             i = extract_copyright_at(lines, i) | 
 | 151 |         else: | 
 | 152 |             i += 1 | 
 | 153 |  | 
 | 154 |  | 
 | 155 | def do_dir(path): | 
| Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 156 |     for directory, sub_directories, filenames in os.walk(arg): | 
 | 157 |         if ".git" in sub_directories: | 
 | 158 |             sub_directories.remove(".git") | 
 | 159 |         sub_directories = sorted(sub_directories) | 
 | 160 |  | 
 | 161 |         for filename in sorted(filenames): | 
 | 162 |             path = os.path.join(directory, filename) | 
| Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 163 |             if is_interesting(path): | 
 | 164 |                 do_file(path) | 
| Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 165 |  | 
| Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 166 |  | 
| Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 167 | args = sys.argv[1:] | 
 | 168 | if len(args) == 0: | 
 | 169 |     args = [ "." ] | 
| Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 170 |  | 
| Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 171 | for arg in args: | 
 | 172 |     if os.path.isdir(arg): | 
 | 173 |         do_dir(arg) | 
 | 174 |     else: | 
 | 175 |         do_file(arg) | 
| Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 176 |  | 
| Elliott Hughes | 261e223 | 2012-08-14 15:04:05 -0700 | [diff] [blame] | 177 | for copyright in sorted(copyrights): | 
| Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 178 |     print copyright.encode("utf-8") | 
| Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 179 |     print | 
| Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 180 |     print "-------------------------------------------------------------------" | 
| Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 181 |     print | 
 | 182 |  | 
 | 183 | sys.exit(0) |