Blame - libc/tools/generate-NOTICE.py - android_bionic

blob: 17429e1322f405d5ecb9316f1fcf6d18cc9bbc97 [file] [log] [blame]

Stephen Crane	77bb564	2017-08-31 15:08:26 -0700	[diff] [blame]	1	#!/usr/bin/env python
Elliott Hughes	387d4b7	2012-08-09 15:17:46 -0700	[diff] [blame]	2	# Run with directory arguments from any directory, with no special setup required.
Elliott Hughes	965b867	2012-09-13 16:51:57 -0700	[diff] [blame]	3	# Or:
Elliott Hughes	6b2b585	2014-12-18 16:27:30 -0800	[diff] [blame]	4	# for i in libc libdl libm linker libstdc++ ; do ./libc/tools/generate-NOTICE.py $i > $i/NOTICE ; done
Elliott Hughes	387d4b7	2012-08-09 15:17:46 -0700	[diff] [blame]	5
				6	import ftplib
				7	import hashlib
				8	import os
				9	import re
				10	import shutil
				11	import string
				12	import subprocess
				13	import sys
				14	import tarfile
				15	import tempfile
				16
Elliott Hughes	aac7c3a	2017-07-14 10:00:32 -0700	[diff] [blame]	17	VERBOSE = False
Elliott Hughes	387d4b7	2012-08-09 15:17:46 -0700	[diff] [blame]	18
Elliott Hughes	aac7c3a	2017-07-14 10:00:32 -0700	[diff] [blame]	19	def warn(s):
				20	sys.stderr.write("warning: %s\n" % s)
				21
				22	def warn_verbose(s):
				23	if VERBOSE:
				24	warn(s)
				25
				26	def is_interesting(path):
				27	path = path.lower()
				28	uninteresting_extensions = [
				29	".bp",
				30	".map",
				31	".mk",
				32	".py",
				33	".pyc",
				34	".swp",
				35	".txt",
				36	]
				37	if os.path.splitext(path)[1] in uninteresting_extensions:
				38	return False
				39	if path.endswith("/notice") or path.endswith("/readme"):
				40	return False
				41	return True
				42
				43	def is_auto_generated(content):
Elliott Hughes	22a0d6f	2014-03-06 15:10:22 -0800	[diff] [blame]	44	if "Generated by gensyscalls.py" in content or "generated by genserv.py" in content:
Elliott Hughes	387d4b7	2012-08-09 15:17:46 -0700	[diff] [blame]	45	return True
				46	if "This header was automatically generated from a Linux kernel header" in content:
				47	return True
				48	return False
				49
				50	copyrights = set()
				51
Elliott Hughes	aac7c3a	2017-07-14 10:00:32 -0700	[diff] [blame]	52	def extract_copyright_at(lines, i):
Elliott Hughes	387d4b7	2012-08-09 15:17:46 -0700	[diff] [blame]	53	hash = lines[i].startswith("#")
				54
Elliott Hughes	261e223	2012-08-14 15:04:05 -0700	[diff] [blame]	55	# Do we need to back up to find the start of the copyright header?
				56	start = i
				57	if not hash:
				58	while start > 0:
				59	if "/*" in lines[start - 1]:
				60	break
				61	start -= 1
				62
Elliott Hughes	387d4b7	2012-08-09 15:17:46 -0700	[diff] [blame]	63	# Read comment lines until we hit something that terminates a
				64	# copyright header.
Elliott Hughes	387d4b7	2012-08-09 15:17:46 -0700	[diff] [blame]	65	while i < len(lines):
				66	if "*/" in lines[i]:
				67	break
				68	if hash and len(lines[i]) == 0:
				69	break
				70	if "\t@(#)" in lines[i] or "\tfrom: @(#)" in lines[i] or "From: @(#)" in lines[i] or "from OpenBSD:" in lines[i]:
				71	break
				72	if "\tcitrus Id: " in lines[i]:
				73	break
Elliott Hughes	bfa582d	2014-05-05 14:58:17 -0700	[diff] [blame]	74	if "\t$Citrus: " in lines[i] or "\t$OpenBSD: " in lines[i] or " $FreeBSD: " in lines[i] or "\t$NetBSD: " in lines[i]:
Elliott Hughes	387d4b7	2012-08-09 15:17:46 -0700	[diff] [blame]	75	break
				76	if "$FreeBSD$" in lines[i] or "$Citrus$" in lines[i]:
				77	break
Elliott Hughes	bfa582d	2014-05-05 14:58:17 -0700	[diff] [blame]	78	# OpenBSD likes to say where stuff originally came from:
				79	if "Original version ID:" in lines[i]:
				80	break
Elliott Hughes	387d4b7	2012-08-09 15:17:46 -0700	[diff] [blame]	81	i += 1
				82
				83	end = i
				84
				85	# Trim trailing cruft.
				86	while end > 0:
				87	if lines[end - 1] != " " and lines[end - 1] != " ====================================================":
				88	break
				89	end -= 1
				90
				91	# Remove C/assembler comment formatting, pulling out just the text.
				92	clean_lines = []
				93	for line in lines[start:end]:
				94	line = line.replace("\t", " ")
				95	line = line.replace("/* ", "")
Elliott Hughes	3758a24	2014-07-22 21:24:47 -0700	[diff] [blame]	96	line = re.sub("^ \* ", "", line)
Elliott Hughes	387d4b7	2012-08-09 15:17:46 -0700	[diff] [blame]	97	line = line.replace("** ", "")
				98	line = line.replace("# ", "")
Elliott Hughes	ab52807	2018-07-24 00:01:52 +0000	[diff] [blame]	99	if "SPDX-License-Identifier:" in line:
				100	continue
Elliott Hughes	387d4b7	2012-08-09 15:17:46 -0700	[diff] [blame]	101	if line.startswith("++Copyright++"):
				102	continue
				103	line = line.replace("--Copyright--", "")
				104	line = line.rstrip()
				105	# These come last and take care of "blank" comment lines.
				106	if line == "#" or line == " " or line == "*" or line == "-":
				107	line = ""
				108	clean_lines.append(line)
				109
				110	# Trim blank lines from head and tail.
				111	while clean_lines[0] == "":
				112	clean_lines = clean_lines[1:]
				113	while clean_lines[len(clean_lines) - 1] == "":
				114	clean_lines = clean_lines[0:(len(clean_lines) - 1)]
				115
				116	copyright = "\n".join(clean_lines)
				117	copyrights.add(copyright)
				118
				119	return i
				120
Elliott Hughes	387d4b7	2012-08-09 15:17:46 -0700	[diff] [blame]	121
Elliott Hughes	aac7c3a	2017-07-14 10:00:32 -0700	[diff] [blame]	122	def do_file(path):
				123	with open(path, "r") as the_file:
				124	try:
				125	content = open(path, "r").read().decode("utf-8")
				126	except UnicodeDecodeError:
				127	warn("bad UTF-8 in %s" % path)
				128	content = open(path, "r").read().decode("iso-8859-1")
Elliott Hughes	387d4b7	2012-08-09 15:17:46 -0700	[diff] [blame]	129
Elliott Hughes	aac7c3a	2017-07-14 10:00:32 -0700	[diff] [blame]	130	lines = content.split("\n")
				131
				132	if len(lines) <= 4:
				133	warn_verbose("ignoring short file %s" % path)
				134	return
				135
				136	if is_auto_generated(content):
				137	warn_verbose("ignoring auto-generated file %s" % path)
				138	return
				139
				140	if not "Copyright" in content:
				141	if "public domain" in content.lower():
				142	warn("ignoring public domain file %s" % path)
				143	return
				144	warn('no copyright notice found in "%s" (%d lines)' % (path, len(lines)))
				145	return
				146
				147	# Manually iterate because extract_copyright_at tells us how many lines to skip.
				148	i = 0
				149	while i < len(lines):
				150	if "Copyright" in lines[i] and not "@(#) Copyright" in lines[i]:
				151	i = extract_copyright_at(lines, i)
				152	else:
				153	i += 1
				154
				155
				156	def do_dir(path):
Elliott Hughes	387d4b7	2012-08-09 15:17:46 -0700	[diff] [blame]	157	for directory, sub_directories, filenames in os.walk(arg):
				158	if ".git" in sub_directories:
				159	sub_directories.remove(".git")
				160	sub_directories = sorted(sub_directories)
				161
				162	for filename in sorted(filenames):
				163	path = os.path.join(directory, filename)
Elliott Hughes	aac7c3a	2017-07-14 10:00:32 -0700	[diff] [blame]	164	if is_interesting(path):
				165	do_file(path)
Elliott Hughes	387d4b7	2012-08-09 15:17:46 -0700	[diff] [blame]	166
Elliott Hughes	387d4b7	2012-08-09 15:17:46 -0700	[diff] [blame]	167
Elliott Hughes	aac7c3a	2017-07-14 10:00:32 -0700	[diff] [blame]	168	args = sys.argv[1:]
				169	if len(args) == 0:
				170	args = [ "." ]
Elliott Hughes	387d4b7	2012-08-09 15:17:46 -0700	[diff] [blame]	171
Elliott Hughes	aac7c3a	2017-07-14 10:00:32 -0700	[diff] [blame]	172	for arg in args:
				173	if os.path.isdir(arg):
				174	do_dir(arg)
				175	else:
				176	do_file(arg)
Elliott Hughes	387d4b7	2012-08-09 15:17:46 -0700	[diff] [blame]	177
Elliott Hughes	261e223	2012-08-14 15:04:05 -0700	[diff] [blame]	178	for copyright in sorted(copyrights):
Elliott Hughes	aac7c3a	2017-07-14 10:00:32 -0700	[diff] [blame]	179	print copyright.encode("utf-8")
Elliott Hughes	387d4b7	2012-08-09 15:17:46 -0700	[diff] [blame]	180	print
Elliott Hughes	aac7c3a	2017-07-14 10:00:32 -0700	[diff] [blame]	181	print "-------------------------------------------------------------------"
Elliott Hughes	387d4b7	2012-08-09 15:17:46 -0700	[diff] [blame]	182	print
				183
				184	sys.exit(0)