Stephen Crane | 77bb564 | 2017-08-31 15:08:26 -0700 | [diff] [blame] | 1 | #!/usr/bin/env python |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 2 | # Run with directory arguments from any directory, with no special setup required. |
| 3 | |
| 4 | import ftplib |
| 5 | import hashlib |
| 6 | import os |
| 7 | import re |
| 8 | import shutil |
| 9 | import string |
| 10 | import subprocess |
| 11 | import sys |
| 12 | import tarfile |
| 13 | import tempfile |
| 14 | |
Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 15 | VERBOSE = False |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 16 | |
Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 17 | def warn(s): |
| 18 | sys.stderr.write("warning: %s\n" % s) |
| 19 | |
| 20 | def warn_verbose(s): |
| 21 | if VERBOSE: |
| 22 | warn(s) |
| 23 | |
| 24 | def is_interesting(path): |
| 25 | path = path.lower() |
| 26 | uninteresting_extensions = [ |
| 27 | ".bp", |
| 28 | ".map", |
Elliott Hughes | c5db38a | 2020-06-15 17:26:58 -0700 | [diff] [blame] | 29 | ".md", |
Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 30 | ".mk", |
| 31 | ".py", |
| 32 | ".pyc", |
| 33 | ".swp", |
| 34 | ".txt", |
| 35 | ] |
| 36 | if os.path.splitext(path)[1] in uninteresting_extensions: |
| 37 | return False |
Elliott Hughes | c5db38a | 2020-06-15 17:26:58 -0700 | [diff] [blame] | 38 | if path.endswith("/notice") or path.endswith("/readme") or path.endswith("/pylintrc"): |
Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 39 | return False |
| 40 | return True |
| 41 | |
| 42 | def is_auto_generated(content): |
Elliott Hughes | 22a0d6f | 2014-03-06 15:10:22 -0800 | [diff] [blame] | 43 | if "Generated by gensyscalls.py" in content or "generated by genserv.py" in content: |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 44 | return True |
| 45 | if "This header was automatically generated from a Linux kernel header" in content: |
| 46 | return True |
| 47 | return False |
| 48 | |
| 49 | copyrights = set() |
| 50 | |
Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 51 | def extract_copyright_at(lines, i): |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 52 | hash = lines[i].startswith("#") |
| 53 | |
Elliott Hughes | 261e223 | 2012-08-14 15:04:05 -0700 | [diff] [blame] | 54 | # Do we need to back up to find the start of the copyright header? |
| 55 | start = i |
| 56 | if not hash: |
| 57 | while start > 0: |
| 58 | if "/*" in lines[start - 1]: |
| 59 | break |
| 60 | start -= 1 |
| 61 | |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 62 | # Read comment lines until we hit something that terminates a |
| 63 | # copyright header. |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 64 | while i < len(lines): |
| 65 | if "*/" in lines[i]: |
| 66 | break |
| 67 | if hash and len(lines[i]) == 0: |
| 68 | break |
| 69 | if "\t@(#)" in lines[i] or "\tfrom: @(#)" in lines[i] or "From: @(#)" in lines[i] or "from OpenBSD:" in lines[i]: |
| 70 | break |
| 71 | if "\tcitrus Id: " in lines[i]: |
| 72 | break |
Elliott Hughes | bfa582d | 2014-05-05 14:58:17 -0700 | [diff] [blame] | 73 | if "\t$Citrus: " in lines[i] or "\t$OpenBSD: " in lines[i] or " $FreeBSD: " in lines[i] or "\t$NetBSD: " in lines[i]: |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 74 | break |
| 75 | if "$FreeBSD$" in lines[i] or "$Citrus$" in lines[i]: |
| 76 | break |
Elliott Hughes | bfa582d | 2014-05-05 14:58:17 -0700 | [diff] [blame] | 77 | # OpenBSD likes to say where stuff originally came from: |
| 78 | if "Original version ID:" in lines[i]: |
| 79 | break |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 80 | i += 1 |
| 81 | |
| 82 | end = i |
| 83 | |
| 84 | # Trim trailing cruft. |
| 85 | while end > 0: |
| 86 | if lines[end - 1] != " *" and lines[end - 1] != " * ====================================================": |
| 87 | break |
| 88 | end -= 1 |
| 89 | |
| 90 | # Remove C/assembler comment formatting, pulling out just the text. |
| 91 | clean_lines = [] |
| 92 | for line in lines[start:end]: |
| 93 | line = line.replace("\t", " ") |
| 94 | line = line.replace("/* ", "") |
Elliott Hughes | 3758a24 | 2014-07-22 21:24:47 -0700 | [diff] [blame] | 95 | line = re.sub("^ \* ", "", line) |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 96 | line = line.replace("** ", "") |
| 97 | line = line.replace("# ", "") |
Elliott Hughes | ab52807 | 2018-07-24 00:01:52 +0000 | [diff] [blame] | 98 | if "SPDX-License-Identifier:" in line: |
| 99 | continue |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 100 | if line.startswith("++Copyright++"): |
| 101 | continue |
| 102 | line = line.replace("--Copyright--", "") |
| 103 | line = line.rstrip() |
| 104 | # These come last and take care of "blank" comment lines. |
| 105 | if line == "#" or line == " *" or line == "**" or line == "-": |
| 106 | line = "" |
| 107 | clean_lines.append(line) |
| 108 | |
| 109 | # Trim blank lines from head and tail. |
| 110 | while clean_lines[0] == "": |
| 111 | clean_lines = clean_lines[1:] |
| 112 | while clean_lines[len(clean_lines) - 1] == "": |
| 113 | clean_lines = clean_lines[0:(len(clean_lines) - 1)] |
| 114 | |
| 115 | copyright = "\n".join(clean_lines) |
| 116 | copyrights.add(copyright) |
| 117 | |
| 118 | return i |
| 119 | |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 120 | |
Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 121 | def do_file(path): |
| 122 | with open(path, "r") as the_file: |
| 123 | try: |
| 124 | content = open(path, "r").read().decode("utf-8") |
| 125 | except UnicodeDecodeError: |
| 126 | warn("bad UTF-8 in %s" % path) |
| 127 | content = open(path, "r").read().decode("iso-8859-1") |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 128 | |
Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 129 | lines = content.split("\n") |
| 130 | |
| 131 | if len(lines) <= 4: |
| 132 | warn_verbose("ignoring short file %s" % path) |
| 133 | return |
| 134 | |
| 135 | if is_auto_generated(content): |
| 136 | warn_verbose("ignoring auto-generated file %s" % path) |
| 137 | return |
| 138 | |
| 139 | if not "Copyright" in content: |
| 140 | if "public domain" in content.lower(): |
Elliott Hughes | c5db38a | 2020-06-15 17:26:58 -0700 | [diff] [blame] | 141 | warn_verbose("ignoring public domain file %s" % path) |
Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 142 | return |
| 143 | warn('no copyright notice found in "%s" (%d lines)' % (path, len(lines))) |
| 144 | return |
| 145 | |
| 146 | # Manually iterate because extract_copyright_at tells us how many lines to skip. |
| 147 | i = 0 |
| 148 | while i < len(lines): |
| 149 | if "Copyright" in lines[i] and not "@(#) Copyright" in lines[i]: |
| 150 | i = extract_copyright_at(lines, i) |
| 151 | else: |
| 152 | i += 1 |
| 153 | |
| 154 | |
| 155 | def do_dir(path): |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 156 | for directory, sub_directories, filenames in os.walk(arg): |
| 157 | if ".git" in sub_directories: |
| 158 | sub_directories.remove(".git") |
| 159 | sub_directories = sorted(sub_directories) |
| 160 | |
| 161 | for filename in sorted(filenames): |
| 162 | path = os.path.join(directory, filename) |
Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 163 | if is_interesting(path): |
| 164 | do_file(path) |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 165 | |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 166 | |
Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 167 | args = sys.argv[1:] |
| 168 | if len(args) == 0: |
| 169 | args = [ "." ] |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 170 | |
Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 171 | for arg in args: |
| 172 | if os.path.isdir(arg): |
| 173 | do_dir(arg) |
| 174 | else: |
| 175 | do_file(arg) |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 176 | |
Elliott Hughes | 261e223 | 2012-08-14 15:04:05 -0700 | [diff] [blame] | 177 | for copyright in sorted(copyrights): |
Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 178 | print copyright.encode("utf-8") |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 179 | print |
Elliott Hughes | aac7c3a | 2017-07-14 10:00:32 -0700 | [diff] [blame] | 180 | print "-------------------------------------------------------------------" |
Elliott Hughes | 387d4b7 | 2012-08-09 15:17:46 -0700 | [diff] [blame] | 181 | print |
| 182 | |
| 183 | sys.exit(0) |