Blame - libc/tools/generate-NOTICE.py - android_bionic

blob: d40891c597d83ee42f9b3043235610fe3845a7ea [file] [log] [blame]

Elliott Hughes	387d4b7	2012-08-09 15:17:46 -0700	[diff] [blame]	1	#!/usr/bin/python
				2	# Run with directory arguments from any directory, with no special setup required.
Elliott Hughes	965b867	2012-09-13 16:51:57 -0700	[diff] [blame]	3	# Or:
Elliott Hughes	6b2b585	2014-12-18 16:27:30 -0800	[diff] [blame]	4	# for i in libc libdl libm linker libstdc++ ; do ./libc/tools/generate-NOTICE.py $i > $i/NOTICE ; done
Elliott Hughes	387d4b7	2012-08-09 15:17:46 -0700	[diff] [blame]	5
				6	import ftplib
				7	import hashlib
				8	import os
				9	import re
				10	import shutil
				11	import string
				12	import subprocess
				13	import sys
				14	import tarfile
				15	import tempfile
				16
Elliott Hughes	aac7c3a	2017-07-14 10:00:32 -0700	[diff] [blame]	17	VERBOSE = False
Elliott Hughes	387d4b7	2012-08-09 15:17:46 -0700	[diff] [blame]	18
Elliott Hughes	aac7c3a	2017-07-14 10:00:32 -0700	[diff] [blame]	19	def warn(s):
				20	sys.stderr.write("warning: %s\n" % s)
				21
				22	def warn_verbose(s):
				23	if VERBOSE:
				24	warn(s)
				25
				26	def is_interesting(path):
				27	path = path.lower()
				28	uninteresting_extensions = [
				29	".bp",
				30	".map",
				31	".mk",
				32	".py",
				33	".pyc",
				34	".swp",
				35	".txt",
				36	]
				37	if os.path.splitext(path)[1] in uninteresting_extensions:
				38	return False
				39	if path.endswith("/notice") or path.endswith("/readme"):
				40	return False
				41	return True
				42
				43	def is_auto_generated(content):
Elliott Hughes	22a0d6f	2014-03-06 15:10:22 -0800	[diff] [blame]	44	if "Generated by gensyscalls.py" in content or "generated by genserv.py" in content:
Elliott Hughes	387d4b7	2012-08-09 15:17:46 -0700	[diff] [blame]	45	return True
				46	if "This header was automatically generated from a Linux kernel header" in content:
				47	return True
				48	return False
				49
				50	copyrights = set()
				51
Elliott Hughes	aac7c3a	2017-07-14 10:00:32 -0700	[diff] [blame]	52	def extract_copyright_at(lines, i):
Elliott Hughes	387d4b7	2012-08-09 15:17:46 -0700	[diff] [blame]	53	hash = lines[i].startswith("#")
				54
Elliott Hughes	261e223	2012-08-14 15:04:05 -0700	[diff] [blame]	55	# Do we need to back up to find the start of the copyright header?
				56	start = i
				57	if not hash:
				58	while start > 0:
				59	if "/*" in lines[start - 1]:
				60	break
				61	start -= 1
				62
Elliott Hughes	387d4b7	2012-08-09 15:17:46 -0700	[diff] [blame]	63	# Read comment lines until we hit something that terminates a
				64	# copyright header.
Elliott Hughes	387d4b7	2012-08-09 15:17:46 -0700	[diff] [blame]	65	while i < len(lines):
				66	if "*/" in lines[i]:
				67	break
				68	if hash and len(lines[i]) == 0:
				69	break
				70	if "\t@(#)" in lines[i] or "\tfrom: @(#)" in lines[i] or "From: @(#)" in lines[i] or "from OpenBSD:" in lines[i]:
				71	break
				72	if "\tcitrus Id: " in lines[i]:
				73	break
Elliott Hughes	bfa582d	2014-05-05 14:58:17 -0700	[diff] [blame]	74	if "\t$Citrus: " in lines[i] or "\t$OpenBSD: " in lines[i] or " $FreeBSD: " in lines[i] or "\t$NetBSD: " in lines[i]:
Elliott Hughes	387d4b7	2012-08-09 15:17:46 -0700	[diff] [blame]	75	break
				76	if "$FreeBSD$" in lines[i] or "$Citrus$" in lines[i]:
				77	break
Elliott Hughes	bfa582d	2014-05-05 14:58:17 -0700	[diff] [blame]	78	# OpenBSD likes to say where stuff originally came from:
				79	if "Original version ID:" in lines[i]:
				80	break
Elliott Hughes	387d4b7	2012-08-09 15:17:46 -0700	[diff] [blame]	81	i += 1
				82
				83	end = i
				84
				85	# Trim trailing cruft.
				86	while end > 0:
				87	if lines[end - 1] != " " and lines[end - 1] != " ====================================================":
				88	break
				89	end -= 1
				90
				91	# Remove C/assembler comment formatting, pulling out just the text.
				92	clean_lines = []
				93	for line in lines[start:end]:
				94	line = line.replace("\t", " ")
				95	line = line.replace("/* ", "")
Elliott Hughes	3758a24	2014-07-22 21:24:47 -0700	[diff] [blame]	96	line = re.sub("^ \* ", "", line)
Elliott Hughes	387d4b7	2012-08-09 15:17:46 -0700	[diff] [blame]	97	line = line.replace("** ", "")
				98	line = line.replace("# ", "")
				99	if line.startswith("++Copyright++"):
				100	continue
				101	line = line.replace("--Copyright--", "")
				102	line = line.rstrip()
				103	# These come last and take care of "blank" comment lines.
				104	if line == "#" or line == " " or line == "*" or line == "-":
				105	line = ""
				106	clean_lines.append(line)
				107
				108	# Trim blank lines from head and tail.
				109	while clean_lines[0] == "":
				110	clean_lines = clean_lines[1:]
				111	while clean_lines[len(clean_lines) - 1] == "":
				112	clean_lines = clean_lines[0:(len(clean_lines) - 1)]
				113
				114	copyright = "\n".join(clean_lines)
				115	copyrights.add(copyright)
				116
				117	return i
				118
Elliott Hughes	387d4b7	2012-08-09 15:17:46 -0700	[diff] [blame]	119
Elliott Hughes	aac7c3a	2017-07-14 10:00:32 -0700	[diff] [blame]	120	def do_file(path):
				121	with open(path, "r") as the_file:
				122	try:
				123	content = open(path, "r").read().decode("utf-8")
				124	except UnicodeDecodeError:
				125	warn("bad UTF-8 in %s" % path)
				126	content = open(path, "r").read().decode("iso-8859-1")
Elliott Hughes	387d4b7	2012-08-09 15:17:46 -0700	[diff] [blame]	127
Elliott Hughes	aac7c3a	2017-07-14 10:00:32 -0700	[diff] [blame]	128	lines = content.split("\n")
				129
				130	if len(lines) <= 4:
				131	warn_verbose("ignoring short file %s" % path)
				132	return
				133
				134	if is_auto_generated(content):
				135	warn_verbose("ignoring auto-generated file %s" % path)
				136	return
				137
				138	if not "Copyright" in content:
				139	if "public domain" in content.lower():
				140	warn("ignoring public domain file %s" % path)
				141	return
				142	warn('no copyright notice found in "%s" (%d lines)' % (path, len(lines)))
				143	return
				144
				145	# Manually iterate because extract_copyright_at tells us how many lines to skip.
				146	i = 0
				147	while i < len(lines):
				148	if "Copyright" in lines[i] and not "@(#) Copyright" in lines[i]:
				149	i = extract_copyright_at(lines, i)
				150	else:
				151	i += 1
				152
				153
				154	def do_dir(path):
Elliott Hughes	387d4b7	2012-08-09 15:17:46 -0700	[diff] [blame]	155	for directory, sub_directories, filenames in os.walk(arg):
				156	if ".git" in sub_directories:
				157	sub_directories.remove(".git")
				158	sub_directories = sorted(sub_directories)
				159
				160	for filename in sorted(filenames):
				161	path = os.path.join(directory, filename)
Elliott Hughes	aac7c3a	2017-07-14 10:00:32 -0700	[diff] [blame]	162	if is_interesting(path):
				163	do_file(path)
Elliott Hughes	387d4b7	2012-08-09 15:17:46 -0700	[diff] [blame]	164
Elliott Hughes	387d4b7	2012-08-09 15:17:46 -0700	[diff] [blame]	165
Elliott Hughes	aac7c3a	2017-07-14 10:00:32 -0700	[diff] [blame]	166	args = sys.argv[1:]
				167	if len(args) == 0:
				168	args = [ "." ]
Elliott Hughes	387d4b7	2012-08-09 15:17:46 -0700	[diff] [blame]	169
Elliott Hughes	aac7c3a	2017-07-14 10:00:32 -0700	[diff] [blame]	170	for arg in args:
				171	if os.path.isdir(arg):
				172	do_dir(arg)
				173	else:
				174	do_file(arg)
Elliott Hughes	387d4b7	2012-08-09 15:17:46 -0700	[diff] [blame]	175
Elliott Hughes	261e223	2012-08-14 15:04:05 -0700	[diff] [blame]	176	for copyright in sorted(copyrights):
Elliott Hughes	aac7c3a	2017-07-14 10:00:32 -0700	[diff] [blame]	177	print copyright.encode("utf-8")
Elliott Hughes	387d4b7	2012-08-09 15:17:46 -0700	[diff] [blame]	178	print
Elliott Hughes	aac7c3a	2017-07-14 10:00:32 -0700	[diff] [blame]	179	print "-------------------------------------------------------------------"
Elliott Hughes	387d4b7	2012-08-09 15:17:46 -0700	[diff] [blame]	180	print
				181
				182	sys.exit(0)