|  | #!/usr/bin/python | 
|  | # Run with directory arguments from any directory, with no special setup required. | 
|  | # Or: | 
|  | # for i in libc libdl libm linker libstdc++ libthread_db ; do ./libc/tools/generate-NOTICE.py $i > $i/NOTICE ; done | 
|  |  | 
|  | import ftplib | 
|  | import hashlib | 
|  | import os | 
|  | import re | 
|  | import shutil | 
|  | import string | 
|  | import subprocess | 
|  | import sys | 
|  | import tarfile | 
|  | import tempfile | 
|  |  | 
|  | def IsUninteresting(path): | 
|  | path = path.lower() | 
|  | if path.endswith(".mk") or path.endswith(".py") or path.endswith(".pyc") or path.endswith(".txt") or path.endswith(".3"): | 
|  | return True | 
|  | if path.endswith("/notice") or path.endswith("/readme") or path.endswith("/caveats"): | 
|  | return True | 
|  | if path.endswith("/tzdata") or path.endswith("/zoneinfo/generate"): | 
|  | return True | 
|  | return False | 
|  |  | 
|  | def IsAutoGenerated(content): | 
|  | if "Generated by gensyscalls.py" in content or "generated by genserv.py" in content: | 
|  | return True | 
|  | if "This header was automatically generated from a Linux kernel header" in content: | 
|  | return True | 
|  | return False | 
|  |  | 
|  | copyrights = set() | 
|  |  | 
|  | def ExtractCopyrightAt(lines, i): | 
|  | hash = lines[i].startswith("#") | 
|  |  | 
|  | # Do we need to back up to find the start of the copyright header? | 
|  | start = i | 
|  | if not hash: | 
|  | while start > 0: | 
|  | if "/*" in lines[start - 1]: | 
|  | break | 
|  | start -= 1 | 
|  |  | 
|  | # Read comment lines until we hit something that terminates a | 
|  | # copyright header. | 
|  | while i < len(lines): | 
|  | if "*/" in lines[i]: | 
|  | break | 
|  | if hash and len(lines[i]) == 0: | 
|  | break | 
|  | if "\t@(#)" in lines[i] or "\tfrom: @(#)" in lines[i] or "From: @(#)" in lines[i] or "from OpenBSD:" in lines[i]: | 
|  | break | 
|  | if "\tcitrus Id: " in lines[i]: | 
|  | break | 
|  | if "\t$Citrus: " in lines[i] or "\t$OpenBSD: " in lines[i] or " $FreeBSD: " in lines[i] or "\t$NetBSD: " in lines[i]: | 
|  | break | 
|  | if "$FreeBSD$" in lines[i] or "$Citrus$" in lines[i]: | 
|  | break | 
|  | # OpenBSD likes to say where stuff originally came from: | 
|  | if "Original version ID:" in lines[i]: | 
|  | break | 
|  | i += 1 | 
|  |  | 
|  | end = i | 
|  |  | 
|  | # Trim trailing cruft. | 
|  | while end > 0: | 
|  | if lines[end - 1] != " *" and lines[end - 1] != " * ====================================================": | 
|  | break | 
|  | end -= 1 | 
|  |  | 
|  | # Remove C/assembler comment formatting, pulling out just the text. | 
|  | clean_lines = [] | 
|  | for line in lines[start:end]: | 
|  | line = line.replace("\t", "    ") | 
|  | line = line.replace("/* ", "") | 
|  | line = re.sub("^ \* ", "", line) | 
|  | line = line.replace("** ", "") | 
|  | line = line.replace("# ", "") | 
|  | if line.startswith("++Copyright++"): | 
|  | continue | 
|  | line = line.replace("--Copyright--", "") | 
|  | line = line.rstrip() | 
|  | # These come last and take care of "blank" comment lines. | 
|  | if line == "#" or line == " *" or line == "**" or line == "-": | 
|  | line = "" | 
|  | clean_lines.append(line) | 
|  |  | 
|  | # Trim blank lines from head and tail. | 
|  | while clean_lines[0] == "": | 
|  | clean_lines = clean_lines[1:] | 
|  | while clean_lines[len(clean_lines) - 1] == "": | 
|  | clean_lines = clean_lines[0:(len(clean_lines) - 1)] | 
|  |  | 
|  | copyright = "\n".join(clean_lines) | 
|  | copyrights.add(copyright) | 
|  |  | 
|  | return i | 
|  |  | 
|  | args = sys.argv[1:] | 
|  | if len(args) == 0: | 
|  | args = [ "." ] | 
|  |  | 
|  | for arg in args: | 
|  | sys.stderr.write('Searching for source files in "%s"...\n' % arg) | 
|  |  | 
|  | for directory, sub_directories, filenames in os.walk(arg): | 
|  | if ".git" in sub_directories: | 
|  | sub_directories.remove(".git") | 
|  | sub_directories = sorted(sub_directories) | 
|  |  | 
|  | for filename in sorted(filenames): | 
|  | path = os.path.join(directory, filename) | 
|  | if IsUninteresting(path): | 
|  | #print "ignoring uninteresting file %s" % path | 
|  | continue | 
|  |  | 
|  | try: | 
|  | content = open(path, 'r').read().decode('utf-8') | 
|  | except: | 
|  | sys.stderr.write('warning: bad UTF-8 in %s\n' % path) | 
|  | content = open(path, 'r').read().decode('iso-8859-1') | 
|  |  | 
|  | lines = content.split("\n") | 
|  |  | 
|  | if len(lines) <= 4: | 
|  | #print "ignoring short file %s" % path | 
|  | continue | 
|  |  | 
|  | if IsAutoGenerated(content): | 
|  | #print "ignoring auto-generated file %s" % path | 
|  | continue | 
|  |  | 
|  | if not "Copyright" in content: | 
|  | if "public domain" in content.lower(): | 
|  | #print "ignoring public domain file %s" % path | 
|  | continue | 
|  | sys.stderr.write('warning: no copyright notice found in "%s" (%d lines)\n' % (path, len(lines))) | 
|  | continue | 
|  |  | 
|  | i = 0 | 
|  | while i < len(lines): | 
|  | if "Copyright" in lines[i] and not "@(#) Copyright" in lines[i]: | 
|  | i = ExtractCopyrightAt(lines, i) | 
|  | i += 1 | 
|  |  | 
|  | #print path | 
|  |  | 
|  | for copyright in sorted(copyrights): | 
|  | print copyright.encode('utf-8') | 
|  | print | 
|  | print '-------------------------------------------------------------------' | 
|  | print | 
|  |  | 
|  | sys.exit(0) |