Blame - src/xdiff/xpatience.c - android_external_vim

blob: f78c897ad8c8fee373a9586d0f1701c8ee8dc764 [file] [log] [blame]

Bram Moolenaar	e828b76	2018-09-10 17:51:58 +0200	[diff] [blame]	1	/*
				2	* LibXDiff by Davide Libenzi ( File Differential Library )
				3	* Copyright (C) 2003-2016 Davide Libenzi, Johannes E. Schindelin
				4	*
				5	* This library is free software; you can redistribute it and/or
				6	* modify it under the terms of the GNU Lesser General Public
				7	* License as published by the Free Software Foundation; either
				8	* version 2.1 of the License, or (at your option) any later version.
				9	*
				10	* This library is distributed in the hope that it will be useful,
				11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
				13	* Lesser General Public License for more details.
				14	*
				15	* You should have received a copy of the GNU Lesser General Public
				16	* License along with this library; if not, see
				17	* <http://www.gnu.org/licenses/>.
				18	*
				19	* Davide Libenzi <davidel@xmailserver.org>
				20	*
				21	*/
				22	#include "xinclude.h"
Bram Moolenaar	e828b76	2018-09-10 17:51:58 +0200	[diff] [blame]	23
				24	/*
				25	* The basic idea of patience diff is to find lines that are unique in
				26	* both files. These are intuitively the ones that we want to see as
				27	* common lines.
				28	*
				29	* The maximal ordered sequence of such line pairs (where ordered means
				30	* that the order in the sequence agrees with the order of the lines in
				31	* both files) naturally defines an initial set of common lines.
				32	*
				33	* Now, the algorithm tries to extend the set of common lines by growing
				34	* the line ranges where the files have identical lines.
				35	*
				36	* Between those common lines, the patience diff algorithm is applied
				37	* recursively, until no unique line pairs can be found; these line ranges
				38	* are handled by the well-known Myers algorithm.
				39	*/
				40
				41	#define NON_UNIQUE ULONG_MAX
				42
				43	/*
				44	* This is a hash mapping from line hash to line numbers in the first and
				45	* second file.
				46	*/
				47	struct hashmap {
				48	int nr, alloc;
				49	struct entry {
				50	unsigned long hash;
				51	/*
				52	* 0 = unused entry, 1 = first line, 2 = second, etc.
				53	* line2 is NON_UNIQUE if the line is not unique
				54	* in either the first or the second file.
				55	*/
				56	unsigned long line1, line2;
				57	/*
				58	* "next" & "previous" are used for the longest common
				59	* sequence;
				60	* initially, "next" reflects only the order in file1.
				61	*/
				62	struct entry next, previous;
				63
				64	/*
				65	* If 1, this entry can serve as an anchor. See
				66	* Documentation/diff-options.txt for more information.
				67	*/
				68	unsigned anchor : 1;
				69	} entries, first, *last;
Christian Brabandt	ba02e47	2021-08-31 20:46:39 +0200	[diff] [blame]	70	/* were common records found? */
Bram Moolenaar	e828b76	2018-09-10 17:51:58 +0200	[diff] [blame]	71	unsigned long has_matches;
				72	mmfile_t file1, file2;
				73	xdfenv_t *env;
				74	xpparam_t const *xpp;
				75	};
				76
				77	static int is_anchor(xpparam_t const xpp, const char line)
				78	{
Christian Brabandt	ba02e47	2021-08-31 20:46:39 +0200	[diff] [blame]	79	int i;
				80	for (i = 0; i < (int)xpp->anchors_nr; i++) {
Bram Moolenaar	e828b76	2018-09-10 17:51:58 +0200	[diff] [blame]	81	if (!strncmp(line, xpp->anchors[i], strlen(xpp->anchors[i])))
				82	return 1;
				83	}
				84	return 0;
				85	}
				86
Christian Brabandt	ba02e47	2021-08-31 20:46:39 +0200	[diff] [blame]	87	/* The argument "pass" is 1 for the first file, 2 for the second. */
Bram Moolenaar	e828b76	2018-09-10 17:51:58 +0200	[diff] [blame]	88	static void insert_record(xpparam_t const xpp, int line, struct hashmap map,
				89	int pass)
				90	{
				91	xrecord_t **records = pass == 1 ?
				92	map->env->xdf1.recs : map->env->xdf2.recs;
Christian Brabandt	ba02e47	2021-08-31 20:46:39 +0200	[diff] [blame]	93	xrecord_t *record = records[line - 1];
Bram Moolenaar	e828b76	2018-09-10 17:51:58 +0200	[diff] [blame]	94	/*
				95	* After xdl_prepare_env() (or more precisely, due to
				96	* xdl_classify_record()), the "ha" member of the records (AKA lines)
				97	* is _not_ the hash anymore, but a linearized version of it. In
				98	* other words, the "ha" member is guaranteed to start with 0 and
				99	* the second record's ha can only be 0 or 1, etc.
				100	*
				101	* So we multiply ha by 2 in the hope that the hashing was
				102	* "unique enough".
				103	*/
				104	int index = (int)((record->ha << 1) % map->alloc);
				105
				106	while (map->entries[index].line1) {
Christian Brabandt	ba02e47	2021-08-31 20:46:39 +0200	[diff] [blame]	107	if (map->entries[index].hash != record->ha) {
Bram Moolenaar	e828b76	2018-09-10 17:51:58 +0200	[diff] [blame]	108	if (++index >= map->alloc)
				109	index = 0;
				110	continue;
				111	}
				112	if (pass == 2)
				113	map->has_matches = 1;
				114	if (pass == 1 \|\| map->entries[index].line2)
				115	map->entries[index].line2 = NON_UNIQUE;
				116	else
				117	map->entries[index].line2 = line;
				118	return;
				119	}
				120	if (pass == 2)
				121	return;
				122	map->entries[index].line1 = line;
				123	map->entries[index].hash = record->ha;
				124	map->entries[index].anchor = is_anchor(xpp, map->env->xdf1.recs[line - 1]->ptr);
				125	if (!map->first)
				126	map->first = map->entries + index;
				127	if (map->last) {
				128	map->last->next = map->entries + index;
				129	map->entries[index].previous = map->last;
				130	}
				131	map->last = map->entries + index;
				132	map->nr++;
				133	}
				134
				135	/*
				136	* This function has to be called for each recursion into the inter-hunk
				137	* parts, as previously non-unique lines can become unique when being
				138	* restricted to a smaller part of the files.
				139	*
				140	* It is assumed that env has been prepared using xdl_prepare().
				141	*/
				142	static int fill_hashmap(mmfile_t file1, mmfile_t file2,
				143	xpparam_t const xpp, xdfenv_t env,
				144	struct hashmap *result,
				145	int line1, int count1, int line2, int count2)
				146	{
				147	result->file1 = file1;
				148	result->file2 = file2;
				149	result->xpp = xpp;
				150	result->env = env;
				151
Christian Brabandt	ba02e47	2021-08-31 20:46:39 +0200	[diff] [blame]	152	/* We know exactly how large we want the hash map */
Bram Moolenaar	e828b76	2018-09-10 17:51:58 +0200	[diff] [blame]	153	result->alloc = count1 * 2;
				154	result->entries = (struct entry *)
				155	xdl_malloc(result->alloc * sizeof(struct entry));
				156	if (!result->entries)
				157	return -1;
				158	memset(result->entries, 0, result->alloc * sizeof(struct entry));
				159
Christian Brabandt	ba02e47	2021-08-31 20:46:39 +0200	[diff] [blame]	160	/* First, fill with entries from the first file */
Bram Moolenaar	e828b76	2018-09-10 17:51:58 +0200	[diff] [blame]	161	while (count1--)
				162	insert_record(xpp, line1++, result, 1);
				163
Christian Brabandt	ba02e47	2021-08-31 20:46:39 +0200	[diff] [blame]	164	/* Then search for matches in the second file */
Bram Moolenaar	e828b76	2018-09-10 17:51:58 +0200	[diff] [blame]	165	while (count2--)
				166	insert_record(xpp, line2++, result, 2);
				167
				168	return 0;
				169	}
				170
				171	/*
				172	* Find the longest sequence with a smaller last element (meaning a smaller
				173	* line2, as we construct the sequence with entries ordered by line1).
				174	*/
				175	static int binary_search(struct entry **sequence, int longest,
				176	struct entry *entry)
				177	{
				178	int left = -1, right = longest;
				179
				180	while (left + 1 < right) {
				181	int middle = left + (right - left) / 2;
Christian Brabandt	ba02e47	2021-08-31 20:46:39 +0200	[diff] [blame]	182	/* by construction, no two entries can be equal */
Bram Moolenaar	e828b76	2018-09-10 17:51:58 +0200	[diff] [blame]	183	if (sequence[middle]->line2 > entry->line2)
				184	right = middle;
				185	else
				186	left = middle;
				187	}
Christian Brabandt	ba02e47	2021-08-31 20:46:39 +0200	[diff] [blame]	188	/* return the index in "sequence", _not_ the sequence length */
Bram Moolenaar	e828b76	2018-09-10 17:51:58 +0200	[diff] [blame]	189	return left;
				190	}
				191
				192	/*
				193	* The idea is to start with the list of common unique lines sorted by
				194	* the order in file1. For each of these pairs, the longest (partial)
				195	* sequence whose last element's line2 is smaller is determined.
				196	*
				197	* For efficiency, the sequences are kept in a list containing exactly one
				198	* item per sequence length: the sequence with the smallest last
				199	* element (in terms of line2).
				200	*/
				201	static struct entry find_longest_common_sequence(struct hashmap map)
				202	{
Christian Brabandt	ba02e47	2021-08-31 20:46:39 +0200	[diff] [blame]	203	struct entry *sequence = xdl_malloc(map->nr sizeof(struct entry *));
Bram Moolenaar	e828b76	2018-09-10 17:51:58 +0200	[diff] [blame]	204	int longest = 0, i;
				205	struct entry *entry;
Christian Brabandt	ba02e47	2021-08-31 20:46:39 +0200	[diff] [blame]	206
Bram Moolenaar	e828b76	2018-09-10 17:51:58 +0200	[diff] [blame]	207	/*
				208	* If not -1, this entry in sequence must never be overridden.
				209	* Therefore, overriding entries before this has no effect, so
				210	* do not do that either.
				211	*/
				212	int anchor_i = -1;
				213
Bram Moolenaar	707d226	2019-12-04 22:16:54 +0100	[diff] [blame]	214	// Added to silence Coverity.
Bram Moolenaar	5c6f574	2018-09-13 17:32:07 +0200	[diff] [blame]	215	if (sequence == NULL)
				216	return map->first;
				217
Bram Moolenaar	e828b76	2018-09-10 17:51:58 +0200	[diff] [blame]	218	for (entry = map->first; entry; entry = entry->next) {
				219	if (!entry->line2 \|\| entry->line2 == NON_UNIQUE)
				220	continue;
				221	i = binary_search(sequence, longest, entry);
				222	entry->previous = i < 0 ? NULL : sequence[i];
				223	++i;
				224	if (i <= anchor_i)
				225	continue;
				226	sequence[i] = entry;
				227	if (entry->anchor) {
				228	anchor_i = i;
				229	longest = anchor_i + 1;
				230	} else if (i == longest) {
				231	longest++;
				232	}
				233	}
				234
Christian Brabandt	ba02e47	2021-08-31 20:46:39 +0200	[diff] [blame]	235	/* No common unique lines were found */
Bram Moolenaar	e828b76	2018-09-10 17:51:58 +0200	[diff] [blame]	236	if (!longest) {
				237	xdl_free(sequence);
				238	return NULL;
				239	}
				240
Christian Brabandt	ba02e47	2021-08-31 20:46:39 +0200	[diff] [blame]	241	/* Iterate starting at the last element, adjusting the "next" members */
Bram Moolenaar	e828b76	2018-09-10 17:51:58 +0200	[diff] [blame]	242	entry = sequence[longest - 1];
				243	entry->next = NULL;
				244	while (entry->previous) {
				245	entry->previous->next = entry;
				246	entry = entry->previous;
				247	}
				248	xdl_free(sequence);
				249	return entry;
				250	}
				251
				252	static int match(struct hashmap *map, int line1, int line2)
				253	{
				254	xrecord_t *record1 = map->env->xdf1.recs[line1 - 1];
				255	xrecord_t *record2 = map->env->xdf2.recs[line2 - 1];
Christian Brabandt	ba02e47	2021-08-31 20:46:39 +0200	[diff] [blame]	256	return record1->ha == record2->ha;
Bram Moolenaar	e828b76	2018-09-10 17:51:58 +0200	[diff] [blame]	257	}
				258
				259	static int patience_diff(mmfile_t file1, mmfile_t file2,
				260	xpparam_t const xpp, xdfenv_t env,
				261	int line1, int count1, int line2, int count2);
				262
				263	static int walk_common_sequence(struct hashmap map, struct entry first,
				264	int line1, int count1, int line2, int count2)
				265	{
				266	int end1 = line1 + count1, end2 = line2 + count2;
				267	int next1, next2;
				268
				269	for (;;) {
Christian Brabandt	ba02e47	2021-08-31 20:46:39 +0200	[diff] [blame]	270	/* Try to grow the line ranges of common lines */
Bram Moolenaar	e828b76	2018-09-10 17:51:58 +0200	[diff] [blame]	271	if (first) {
				272	next1 = first->line1;
				273	next2 = first->line2;
				274	while (next1 > line1 && next2 > line2 &&
				275	match(map, next1 - 1, next2 - 1)) {
				276	next1--;
				277	next2--;
				278	}
				279	} else {
				280	next1 = end1;
				281	next2 = end2;
				282	}
				283	while (line1 < next1 && line2 < next2 &&
				284	match(map, line1, line2)) {
				285	line1++;
				286	line2++;
				287	}
				288
Christian Brabandt	ba02e47	2021-08-31 20:46:39 +0200	[diff] [blame]	289	/* Recurse */
Bram Moolenaar	e828b76	2018-09-10 17:51:58 +0200	[diff] [blame]	290	if (next1 > line1 \|\| next2 > line2) {
Bram Moolenaar	e828b76	2018-09-10 17:51:58 +0200	[diff] [blame]	291	if (patience_diff(map->file1, map->file2,
				292	map->xpp, map->env,
				293	line1, next1 - line1,
				294	line2, next2 - line2))
				295	return -1;
				296	}
				297
				298	if (!first)
				299	return 0;
				300
				301	while (first->next &&
				302	first->next->line1 == first->line1 + 1 &&
				303	first->next->line2 == first->line2 + 1)
				304	first = first->next;
				305
				306	line1 = first->line1 + 1;
				307	line2 = first->line2 + 1;
				308
				309	first = first->next;
				310	}
				311	}
				312
				313	static int fall_back_to_classic_diff(struct hashmap *map,
				314	int line1, int count1, int line2, int count2)
				315	{
				316	xpparam_t xpp;
Christian Brabandt	ba02e47	2021-08-31 20:46:39 +0200	[diff] [blame]	317
				318	memset(&xpp, 0, sizeof(xpp));
Bram Moolenaar	e828b76	2018-09-10 17:51:58 +0200	[diff] [blame]	319	xpp.flags = map->xpp->flags & ~XDF_DIFF_ALGORITHM_MASK;
				320
				321	return xdl_fall_back_diff(map->env, &xpp,
				322	line1, count1, line2, count2);
				323	}
				324
				325	/*
				326	* Recursively find the longest common sequence of unique lines,
				327	* and if none was found, ask xdl_do_diff() to do the job.
				328	*
				329	* This function assumes that env was prepared with xdl_prepare_env().
				330	*/
				331	static int patience_diff(mmfile_t file1, mmfile_t file2,
				332	xpparam_t const xpp, xdfenv_t env,
				333	int line1, int count1, int line2, int count2)
				334	{
				335	struct hashmap map;
				336	struct entry *first;
				337	int result = 0;
				338
Christian Brabandt	ba02e47	2021-08-31 20:46:39 +0200	[diff] [blame]	339	/* trivial case: one side is empty */
Bram Moolenaar	e828b76	2018-09-10 17:51:58 +0200	[diff] [blame]	340	if (!count1) {
				341	while(count2--)
				342	env->xdf2.rchg[line2++ - 1] = 1;
				343	return 0;
				344	} else if (!count2) {
				345	while(count1--)
				346	env->xdf1.rchg[line1++ - 1] = 1;
				347	return 0;
				348	}
				349
				350	memset(&map, 0, sizeof(map));
				351	if (fill_hashmap(file1, file2, xpp, env, &map,
				352	line1, count1, line2, count2))
				353	return -1;
				354
Christian Brabandt	ba02e47	2021-08-31 20:46:39 +0200	[diff] [blame]	355	/* are there any matching lines at all? */
Bram Moolenaar	e828b76	2018-09-10 17:51:58 +0200	[diff] [blame]	356	if (!map.has_matches) {
				357	while(count1--)
				358	env->xdf1.rchg[line1++ - 1] = 1;
				359	while(count2--)
				360	env->xdf2.rchg[line2++ - 1] = 1;
				361	xdl_free(map.entries);
				362	return 0;
				363	}
				364
				365	first = find_longest_common_sequence(&map);
				366	if (first)
				367	result = walk_common_sequence(&map, first,
				368	line1, count1, line2, count2);
				369	else
				370	result = fall_back_to_classic_diff(&map,
				371	line1, count1, line2, count2);
				372
				373	xdl_free(map.entries);
				374	return result;
				375	}
				376
				377	int xdl_do_patience_diff(mmfile_t file1, mmfile_t file2,
				378	xpparam_t const xpp, xdfenv_t env)
				379	{
				380	if (xdl_prepare_env(file1, file2, xpp, env) < 0)
				381	return -1;
				382
Christian Brabandt	ba02e47	2021-08-31 20:46:39 +0200	[diff] [blame]	383	/* environment is cleaned up in xdl_diff() */
Bram Moolenaar	e828b76	2018-09-10 17:51:58 +0200	[diff] [blame]	384	return patience_diff(file1, file2, xpp, env,
				385	1, env->xdf1.nrec, 1, env->xdf2.nrec);
				386	}