Stephen Crane | 77bb564 | 2017-08-31 15:08:26 -0700 | [diff] [blame] | 1 | #!/usr/bin/env python |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 2 | # Run with directory arguments from any directory, with no special setup required. |
| 3 | |
| 4 | import ftplib |
| 5 | import hashlib |
| 6 | import os |
| 7 | import re |
| 8 | import shutil |
| 9 | import string |
| 10 | import subprocess |
| 11 | import sys |
| 12 | import tarfile |
| 13 | import tempfile |
| 14 | |
Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 15 | VERBOSE = False |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 16 | |
Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 17 | def warn(s): |
| 18 | sys.stderr.write("warning: %s\n" % s) |
| 19 | |
| 20 | def warn_verbose(s): |
| 21 | if VERBOSE: |
| 22 | warn(s) |
| 23 | |
| 24 | def is_interesting(path): |
| 25 | path = path.lower() |
| 26 | uninteresting_extensions = [ |
| 27 | ".bp", |
| 28 | ".map", |
| 29 | ".mk", |
| 30 | ".py", |
| 31 | ".pyc", |
| 32 | ".swp", |
| 33 | ".txt", |
| 34 | ] |
| 35 | if os.path.splitext(path)[1] in uninteresting_extensions: |
| 36 | return False |
| 37 | if path.endswith("/notice") or path.endswith("/readme"): |
| 38 | return False |
| 39 | return True |
| 40 | |
| 41 | def is_auto_generated(content): |
Elliott Hughes | 22a0d6f | 2014-03-06 15:10:22 -0800 | [diff] [blame] | 42 | if "Generated by gensyscalls.py" in content or "generated by genserv.py" in content: |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 43 | return True |
| 44 | if "This header was automatically generated from a Linux kernel header" in content: |
| 45 | return True |
| 46 | return False |
| 47 | |
| 48 | copyrights = set() |
| 49 | |
Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 50 | def extract_copyright_at(lines, i): |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 51 | hash = lines[i].startswith("#") |
| 52 | |
Elliott Hughes | 261e223 | 2012-08-14 15:04:05 -0700 | [diff] [blame] | 53 | # Do we need to back up to find the start of the copyright header? |
| 54 | start = i |
| 55 | if not hash: |
| 56 | while start > 0: |
| 57 | if "/*" in lines[start - 1]: |
| 58 | break |
| 59 | start -= 1 |
| 60 | |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 61 | # Read comment lines until we hit something that terminates a |
| 62 | # copyright header. |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 63 | while i < len(lines): |
| 64 | if "*/" in lines[i]: |
| 65 | break |
| 66 | if hash and len(lines[i]) == 0: |
| 67 | break |
| 68 | if "\t@(#)" in lines[i] or "\tfrom: @(#)" in lines[i] or "From: @(#)" in lines[i] or "from OpenBSD:" in lines[i]: |
| 69 | break |
| 70 | if "\tcitrus Id: " in lines[i]: |
| 71 | break |
Elliott Hughes | bfa582d | 2014-05-05 14:58:17 -0700 | [diff] [blame] | 72 | if "\t$Citrus: " in lines[i] or "\t$OpenBSD: " in lines[i] or " $FreeBSD: " in lines[i] or "\t$NetBSD: " in lines[i]: |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 73 | break |
| 74 | if "$FreeBSD$" in lines[i] or "$Citrus$" in lines[i]: |
| 75 | break |
Elliott Hughes | bfa582d | 2014-05-05 14:58:17 -0700 | [diff] [blame] | 76 | # OpenBSD likes to say where stuff originally came from: |
| 77 | if "Original version ID:" in lines[i]: |
| 78 | break |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 79 | i += 1 |
| 80 | |
| 81 | end = i |
| 82 | |
| 83 | # Trim trailing cruft. |
| 84 | while end > 0: |
| 85 | if lines[end - 1] != " *" and lines[end - 1] != " * ====================================================": |
| 86 | break |
| 87 | end -= 1 |
| 88 | |
| 89 | # Remove C/assembler comment formatting, pulling out just the text. |
| 90 | clean_lines = [] |
| 91 | for line in lines[start:end]: |
| 92 | line = line.replace("\t", " ") |
| 93 | line = line.replace("/* ", "") |
Elliott Hughes | 3758a24 | 2014-07-22 21:24:47 -0700 | [diff] [blame] | 94 | line = re.sub("^ \* ", "", line) |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 95 | line = line.replace("** ", "") |
| 96 | line = line.replace("# ", "") |
Elliott Hughes | ab52807 | 2018-07-24 00:01:52 +0000 | [diff] [blame] | 97 | if "SPDX-License-Identifier:" in line: |
| 98 | continue |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 99 | if line.startswith("++Copyright++"): |
| 100 | continue |
| 101 | line = line.replace("--Copyright--", "") |
| 102 | line = line.rstrip() |
| 103 | # These come last and take care of "blank" comment lines. |
| 104 | if line == "#" or line == " *" or line == "**" or line == "-": |
| 105 | line = "" |
| 106 | clean_lines.append(line) |
| 107 | |
| 108 | # Trim blank lines from head and tail. |
| 109 | while clean_lines[0] == "": |
| 110 | clean_lines = clean_lines[1:] |
| 111 | while clean_lines[len(clean_lines) - 1] == "": |
| 112 | clean_lines = clean_lines[0:(len(clean_lines) - 1)] |
| 113 | |
| 114 | copyright = "\n".join(clean_lines) |
| 115 | copyrights.add(copyright) |
| 116 | |
| 117 | return i |
| 118 | |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 119 | |
Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 120 | def do_file(path): |
| 121 | with open(path, "r") as the_file: |
| 122 | try: |
| 123 | content = open(path, "r").read().decode("utf-8") |
| 124 | except UnicodeDecodeError: |
| 125 | warn("bad UTF-8 in %s" % path) |
| 126 | content = open(path, "r").read().decode("iso-8859-1") |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 127 | |
Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 128 | lines = content.split("\n") |
| 129 | |
| 130 | if len(lines) <= 4: |
| 131 | warn_verbose("ignoring short file %s" % path) |
| 132 | return |
| 133 | |
| 134 | if is_auto_generated(content): |
| 135 | warn_verbose("ignoring auto-generated file %s" % path) |
| 136 | return |
| 137 | |
| 138 | if not "Copyright" in content: |
| 139 | if "public domain" in content.lower(): |
| 140 | warn("ignoring public domain file %s" % path) |
| 141 | return |
| 142 | warn('no copyright notice found in "%s" (%d lines)' % (path, len(lines))) |
| 143 | return |
| 144 | |
| 145 | # Manually iterate because extract_copyright_at tells us how many lines to skip. |
| 146 | i = 0 |
| 147 | while i < len(lines): |
| 148 | if "Copyright" in lines[i] and not "@(#) Copyright" in lines[i]: |
| 149 | i = extract_copyright_at(lines, i) |
| 150 | else: |
| 151 | i += 1 |
| 152 | |
| 153 | |
| 154 | def do_dir(path): |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 155 | for directory, sub_directories, filenames in os.walk(arg): |
| 156 | if ".git" in sub_directories: |
| 157 | sub_directories.remove(".git") |
| 158 | sub_directories = sorted(sub_directories) |
| 159 | |
| 160 | for filename in sorted(filenames): |
| 161 | path = os.path.join(directory, filename) |
Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 162 | if is_interesting(path): |
| 163 | do_file(path) |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 164 | |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 165 | |
Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 166 | args = sys.argv[1:] |
| 167 | if len(args) == 0: |
| 168 | args = [ "." ] |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 169 | |
Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 170 | for arg in args: |
| 171 | if os.path.isdir(arg): |
| 172 | do_dir(arg) |
| 173 | else: |
| 174 | do_file(arg) |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 175 | |
Elliott Hughes | 261e223 | 2012-08-14 15:04:05 -0700 | [diff] [blame] | 176 | for copyright in sorted(copyrights): |
Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 177 | print copyright.encode("utf-8") |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 178 | print |
Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 179 | print "-------------------------------------------------------------------" |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 180 | print |
| 181 | |
| 182 | sys.exit(0) |