Allow passing filenames to generate-NOTICE.py.

For the libandroid_support NOTICE file, we need to combine all the files
in that directory, plus the specific files pulled from bionic.

Also cleaned up some of the Python style.

Bug: N/A
Test: used for libandroid_support
Change-Id: If433e3a0f0478f06d99a9b3556e99dde06a7e5e1
diff --git a/libc/tools/generate-NOTICE.py b/libc/tools/generate-NOTICE.py
index 6573644..d40891c 100755
--- a/libc/tools/generate-NOTICE.py
+++ b/libc/tools/generate-NOTICE.py
@@ -14,17 +14,33 @@
 import tarfile
 import tempfile
 
-def IsUninteresting(path):
-    path = path.lower()
-    if path.endswith(".mk") or path.endswith(".py") or path.endswith(".pyc") or path.endswith(".txt") or path.endswith(".3") or path.endswith(".swp"):
-        return True
-    if path.endswith("/notice") or path.endswith("/readme") or path.endswith("/caveats"):
-        return True
-    if path.endswith("/tzdata") or path.endswith("/zoneinfo/generate"):
-        return True
-    return False
+VERBOSE = False
 
-def IsAutoGenerated(content):
+def warn(s):
+    sys.stderr.write("warning: %s\n" % s)
+
+def warn_verbose(s):
+    if VERBOSE:
+        warn(s)
+
+def is_interesting(path):
+    path = path.lower()
+    uninteresting_extensions = [
+        ".bp",
+        ".map",
+        ".mk",
+        ".py",
+        ".pyc",
+        ".swp",
+        ".txt",
+    ]
+    if os.path.splitext(path)[1] in uninteresting_extensions:
+        return False
+    if path.endswith("/notice") or path.endswith("/readme"):
+        return False
+    return True
+
+def is_auto_generated(content):
     if "Generated by gensyscalls.py" in content or "generated by genserv.py" in content:
         return True
     if "This header was automatically generated from a Linux kernel header" in content:
@@ -33,7 +49,7 @@
 
 copyrights = set()
 
-def ExtractCopyrightAt(lines, i):
+def extract_copyright_at(lines, i):
     hash = lines[i].startswith("#")
 
     # Do we need to back up to find the start of the copyright header?
@@ -100,13 +116,42 @@
 
     return i
 
-args = sys.argv[1:]
-if len(args) == 0:
-    args = [ "." ]
 
-for arg in args:
-    sys.stderr.write('Searching for source files in "%s"...\n' % arg)
+def do_file(path):
+    with open(path, "r") as the_file:
+        try:
+            content = open(path, "r").read().decode("utf-8")
+        except UnicodeDecodeError:
+            warn("bad UTF-8 in %s" % path)
+            content = open(path, "r").read().decode("iso-8859-1")
 
+    lines = content.split("\n")
+
+    if len(lines) <= 4:
+        warn_verbose("ignoring short file %s" % path)
+        return
+
+    if is_auto_generated(content):
+        warn_verbose("ignoring auto-generated file %s" % path)
+        return
+
+    if not "Copyright" in content:
+        if "public domain" in content.lower():
+            warn("ignoring public domain file %s" % path)
+            return
+        warn('no copyright notice found in "%s" (%d lines)' % (path, len(lines)))
+        return
+
+    # Manually iterate because extract_copyright_at tells us how many lines to skip.
+    i = 0
+    while i < len(lines):
+        if "Copyright" in lines[i] and not "@(#) Copyright" in lines[i]:
+            i = extract_copyright_at(lines, i)
+        else:
+            i += 1
+
+
+def do_dir(path):
     for directory, sub_directories, filenames in os.walk(arg):
         if ".git" in sub_directories:
             sub_directories.remove(".git")
@@ -114,45 +159,24 @@
 
         for filename in sorted(filenames):
             path = os.path.join(directory, filename)
-            if IsUninteresting(path):
-                #print "ignoring uninteresting file %s" % path
-                continue
+            if is_interesting(path):
+                do_file(path)
 
-            try:
-                content = open(path, 'r').read().decode('utf-8')
-            except:
-                sys.stderr.write('warning: bad UTF-8 in %s\n' % path)
-                content = open(path, 'r').read().decode('iso-8859-1')
 
-            lines = content.split("\n")
+args = sys.argv[1:]
+if len(args) == 0:
+    args = [ "." ]
 
-            if len(lines) <= 4:
-                #print "ignoring short file %s" % path
-                continue
-
-            if IsAutoGenerated(content):
-                #print "ignoring auto-generated file %s" % path
-                continue
-
-            if not "Copyright" in content:
-                if "public domain" in content.lower():
-                    #print "ignoring public domain file %s" % path
-                    continue
-                sys.stderr.write('warning: no copyright notice found in "%s" (%d lines)\n' % (path, len(lines)))
-                continue
-
-            i = 0
-            while i < len(lines):
-                if "Copyright" in lines[i] and not "@(#) Copyright" in lines[i]:
-                    i = ExtractCopyrightAt(lines, i)
-                i += 1
-
-            #print path
+for arg in args:
+    if os.path.isdir(arg):
+        do_dir(arg)
+    else:
+        do_file(arg)
 
 for copyright in sorted(copyrights):
-    print copyright.encode('utf-8')
+    print copyright.encode("utf-8")
     print
-    print '-------------------------------------------------------------------'
+    print "-------------------------------------------------------------------"
     print
 
 sys.exit(0)