src/linematch.c - android_external_vim - Gitiles

 /* vi:set ts=8 sts=4 sw=4 noet:
  *
  * VIM - Vi IMproved	by Bram Moolenaar
  *
  * Do ":help uganda"  in Vim to read copying and usage conditions.
  * Do ":help credits" in Vim to see a list of people who contributed.
  * See README.txt for an overview of the Vim source code.
  */

 #include "vim.h"

 #define LN_MAX_BUFS 8
 #define LN_DECISION_MAX 255  // pow(2, LN_MAX_BUFS(8)) - 1 = 255

 // struct for running the diff linematch algorithm
 typedef struct diffcmppath_S diffcmppath_T;
 struct diffcmppath_S
 {
     // to keep track of the total score of this path
     int			df_lev_score;
     size_t		df_path_n;	// current index of this path
     int			df_choice_mem[LN_DECISION_MAX + 1];
     int			df_choice[LN_DECISION_MAX];
     // to keep track of this path traveled
     diffcmppath_T	*df_decision[LN_DECISION_MAX];
     size_t		df_optimal_choice;
 };

 static int matching_chars(const mmfile_t *m1, const mmfile_t *m2);
 static size_t unwrap_indexes(const int *values, const int *diff_len, const size_t ndiffs);
 static size_t test_charmatch_paths(diffcmppath_T *node, int lastdecision);

     static size_t
 line_len(const mmfile_t *m)
 {
     char	*s = m->ptr;
     char	*end;

     end = memchr(s, '\n', (size_t)m->size);
     return end ? (size_t)(end - s) : (size_t)m->size;
 }

 #define MATCH_CHAR_MAX_LEN 800

 /// Same as matching_chars but ignore whitespace
 ///
 /// @param s1
 /// @param s2
     static int
 matching_chars_iwhite(const mmfile_t *s1, const mmfile_t *s2)
 {
     // the newly processed strings that will be compared
     // delete the white space characters
     mmfile_t	sp[2];
     char	p[2][MATCH_CHAR_MAX_LEN];

     for (int k = 0; k < 2; k++)
     {
 	const	mmfile_t *s = k == 0 ? s1 : s2;
 	size_t	pi = 0;
 	size_t	slen = MIN(MATCH_CHAR_MAX_LEN - 1, line_len(s));

 	for (size_t i = 0; i <= slen; i++)
 	{
 	    char e = s->ptr[i];

 	    if (e != ' ' && e != '\t')
 	    {
 		p[k][pi] = e;
 		pi++;
 	    }
 	}

 	sp[k].ptr = p[k];
 	sp[k].size = (int)pi;
     }

     return matching_chars(&sp[0], &sp[1]);
 }

 /// Return matching characters between "s1" and "s2" whilst respecting sequence
 /// order.
 /// Consider the case of two strings 'AAACCC' and 'CCCAAA', the
 /// return value from this function will be 3, either to match
 /// the 3 C's, or the 3 A's.
 ///
 /// Examples:
 ///   matching_chars("aabc", "acba")               -> 2  // 'a' and 'b' in common
 ///   matching_chars("123hello567", "he123ll567o") -> 8  // '123', 'll' and '567' in common
 ///   matching_chars("abcdefg", "gfedcba")         -> 1  // all characters in common,
 ///                                                      // but only at most 1 in sequence
 ///
 /// @param m1
 /// @param m2
     static int
 matching_chars(const mmfile_t *m1, const mmfile_t *m2)
 {
     size_t	s1len = MIN(MATCH_CHAR_MAX_LEN - 1, line_len(m1));
     size_t	s2len = MIN(MATCH_CHAR_MAX_LEN - 1, line_len(m2));
     char	*s1 = m1->ptr;
     char	*s2 = m2->ptr;
     int		matrix[2][MATCH_CHAR_MAX_LEN] = { 0 };
     int		icur = 1;  // save space by storing only two rows for i axis

     for (size_t i = 0; i < s1len; i++)
     {
 	icur = (icur == 1 ? 0 : 1);
 	int *e1 = matrix[icur];
 	int *e2 = matrix[!icur];

 	for (size_t j = 0; j < s2len; j++)
 	{
 	    // skip char in s1
 	    if (e2[j + 1] > e1[j + 1])
 		e1[j + 1] = e2[j + 1];
 	    // skip char in s2
 	    if (e1[j] > e1[j + 1])
 		e1[j + 1] = e1[j];
 	    // compare char in s1 and s2
 	    if ((s1[i] == s2[j]) && (e2[j] + 1) > e1[j + 1])
 		e1[j + 1] = e2[j] + 1;
 	}
     }

     return matrix[icur][s2len];
 }

 /// count the matching characters between a variable number of strings "sp"
 /// mark the strings that have already been compared to extract them later
 /// without re-running the character match counting.
 /// @param sp
 /// @param fomvals
 /// @param n
     static int
 count_n_matched_chars(mmfile_t **sp, const size_t n, int iwhite)
 {
     int matched_chars = 0;
     int matched = 0;

     for (size_t i = 0; i < n; i++)
     {
 	for (size_t j = i + 1; j < n; j++)
 	{
 	    if (sp[i]->ptr != NULL && sp[j]->ptr != NULL)
 	    {
 		matched++;
 		// TODO(lewis6991): handle whitespace ignoring higher up in the
 		// stack
 		matched_chars += iwhite ? matching_chars_iwhite(sp[i], sp[j])
 					: matching_chars(sp[i], sp[j]);
 	    }
 	}
     }

     // prioritize a match of 3 (or more lines) equally to a match of 2 lines
     if (matched >= 2)
     {
 	matched_chars *= 2;
 	matched_chars /= matched;
     }

     return matched_chars;
 }

     static mmfile_t
 fastforward_buf_to_lnum(mmfile_t s, linenr_T lnum)
 {
     for (int i = 0; i < lnum - 1; i++)
     {
 	char *line_end;

 	line_end = memchr(s.ptr, '\n', (size_t)s.size);
 	s.size = line_end ? (int)(s.size - (line_end - s.ptr)) : 0;
 	s.ptr = line_end;
 	if (!s.ptr)
 	    break;
 	s.ptr++;
 	s.size--;
     }

     return s;
 }

 /// try all the different ways to compare these lines and use the one that
 /// results in the most matching characters
 /// @param df_iters
 /// @param paths
 /// @param npaths
 /// @param path_idx
 /// @param choice
 /// @param diffcmppath
 /// @param diff_len
 /// @param ndiffs
 /// @param diff_blk
     static void
 try_possible_paths(
     const int		*df_iters,
     const size_t	*paths,
     const int		npaths,
     const int		path_idx,
     int			*choice,
     diffcmppath_T	*diffcmppath,
     const int		*diff_len,
     const size_t	ndiffs,
     const mmfile_t	**diff_blk,
     int			iwhite)
 {
     if (path_idx == npaths)
     {
 	if ((*choice) > 0)
 	{
 	    int from_vals[LN_MAX_BUFS] = { 0 };
 	    const int *to_vals = df_iters;

 	    mmfile_t mm[LN_MAX_BUFS];  // stack memory for current_lines
 	    mmfile_t *current_lines[LN_MAX_BUFS];
 	    for (size_t k = 0; k < ndiffs; k++)
 	    {
 		from_vals[k] = df_iters[k];
 		// get the index at all of the places
 		if ((*choice) & (1 << k))
 		{
 		    from_vals[k]--;
 		    mm[k] = fastforward_buf_to_lnum(*diff_blk[k], df_iters[k]);
 		}
 		else
 		    CLEAR_FIELD(mm[k]);
 		current_lines[k] = &mm[k];
 	    }
 	    size_t unwrapped_idx_from = unwrap_indexes(from_vals, diff_len, ndiffs);
 	    size_t unwrapped_idx_to = unwrap_indexes(to_vals, diff_len, ndiffs);
 	    int matched_chars = count_n_matched_chars(current_lines, ndiffs, iwhite);
 	    int score = diffcmppath[unwrapped_idx_from].df_lev_score + matched_chars;

 	    if (score > diffcmppath[unwrapped_idx_to].df_lev_score)
 	    {
 		diffcmppath[unwrapped_idx_to].df_path_n = 1;
 		diffcmppath[unwrapped_idx_to].df_decision[0] =
 					&diffcmppath[unwrapped_idx_from];
 		diffcmppath[unwrapped_idx_to].df_choice[0] = *choice;
 		diffcmppath[unwrapped_idx_to].df_lev_score = score;
 	    }
 	    else if (score == diffcmppath[unwrapped_idx_to].df_lev_score)
 	    {
 		size_t k = diffcmppath[unwrapped_idx_to].df_path_n++;
 		diffcmppath[unwrapped_idx_to].df_decision[k] =
 					&diffcmppath[unwrapped_idx_from];
 		diffcmppath[unwrapped_idx_to].df_choice[k] = *choice;
 	    }
 	}
 	return;
     }

     size_t bit_place = paths[path_idx];
     *(choice) |= (1 << bit_place);  // set it to 1
     try_possible_paths(df_iters, paths, npaths, path_idx + 1, choice,
 	    diffcmppath, diff_len, ndiffs, diff_blk, iwhite);
     *(choice) &= ~(1 << bit_place);  // set it to 0
     try_possible_paths(df_iters, paths, npaths, path_idx + 1, choice,
 	    diffcmppath, diff_len, ndiffs, diff_blk, iwhite);
 }

 /// unwrap indexes to access n dimensional tensor
 /// @param values
 /// @param diff_len
 /// @param ndiffs
     static size_t
 unwrap_indexes(const int *values, const int *diff_len, const size_t ndiffs)
 {
     size_t num_unwrap_scalar = 1;

     for (size_t k = 0; k < ndiffs; k++)
 	num_unwrap_scalar *= (size_t)diff_len[k] + 1;

     size_t path_idx = 0;
     for (size_t k = 0; k < ndiffs; k++)
     {
 	num_unwrap_scalar /= (size_t)diff_len[k] + 1;

 	int n = values[k];
 	path_idx += num_unwrap_scalar * (size_t)n;
     }

     return path_idx;
 }

 /// populate the values of the linematch algorithm tensor, and find the best
 /// decision for how to compare the relevant lines from each of the buffers at
 /// each point in the tensor
 /// @param df_iters
 /// @param ch_dim
 /// @param diffcmppath
 /// @param diff_len
 /// @param ndiffs
 /// @param diff_blk
     static void
 populate_tensor(
     int			*df_iters,
     const size_t	ch_dim,
     diffcmppath_T	*diffcmppath,
     const int		*diff_len,
     const size_t	ndiffs,
     const mmfile_t	**diff_blk,
     int			iwhite)
 {
     if (ch_dim == ndiffs)
     {
 	int npaths = 0;
 	size_t paths[LN_MAX_BUFS];

 	for (size_t j = 0; j < ndiffs; j++)
 	{
 	    if (df_iters[j] > 0)
 	    {
 		paths[npaths] = j;
 		npaths++;
 	    }
 	}

 	int choice = 0;
 	size_t unwrapper_idx_to = unwrap_indexes(df_iters, diff_len, ndiffs);

 	diffcmppath[unwrapper_idx_to].df_lev_score = -1;
 	try_possible_paths(df_iters, paths, npaths, 0, &choice, diffcmppath,
 					diff_len, ndiffs, diff_blk, iwhite);
 	return;
     }

     for (int i = 0; i <= diff_len[ch_dim]; i++)
     {
 	df_iters[ch_dim] = i;
 	populate_tensor(df_iters, ch_dim + 1, diffcmppath, diff_len,
 		ndiffs, diff_blk, iwhite);
     }
 }

 /// algorithm to find an optimal alignment of lines of a diff block with 2 or
 /// more files. The algorithm is generalized to work for any number of files
 /// which corresponds to another dimension added to the tensor used in the
 /// algorithm
 ///
 /// for questions and information about the linematch algorithm please contact
 /// Jonathon White (jonathonwhite@protonmail.com)
 ///
 /// for explanation, a summary of the algorithm in 3 dimensions (3 files
 ///     compared) follows
 ///
 /// The 3d case (for 3 buffers) of the algorithm implemented when diffopt
 /// 'linematch' is enabled. The algorithm constructs a 3d tensor to
 /// compare a diff between 3 buffers. The dimensions of the tensor are
 /// the length of the diff in each buffer plus 1 A path is constructed by
 /// moving from one edge of the cube/3d tensor to the opposite edge.
 /// Motions from one cell of the cube to the next represent decisions. In
 /// a 3d cube, there are a total of 7 decisions that can be made,
 /// represented by the enum df_path3_choice which is defined in
 /// buffer_defs.h a comparison of buffer 0 and 1 represents a motion
 /// toward the opposite edge of the cube with components along the 0 and
 /// 1 axes.  a comparison of buffer 0, 1, and 2 represents a motion
 /// toward the opposite edge of the cube with components along the 0, 1,
 /// and 2 axes. A skip of buffer 0 represents a motion along only the 0
 /// axis. For each action, a point value is awarded, and the path is
 /// saved for reference later, if it is found to have been the optimal
 /// path. The optimal path has the highest score.  The score is
 /// calculated as the summation of the total characters matching between
 /// all of the lines which were compared. The structure of the algorithm
 /// is that of a dynamic programming problem.  We can calculate a point
 /// i,j,k in the cube as a function of i-1, j-1, and k-1. To find the
 /// score and path at point i,j,k, we must determine which path we want
 /// to use, this is done by looking at the possibilities and choosing
 /// the one which results in the local highest score.  The total highest
 /// scored path is, then in the end represented by the cell in the
 /// opposite corner from the start location.  The entire algorithm
 /// consists of populating the 3d cube with the optimal paths from which
 /// it may have came.
 ///
 /// Optimizations:
 /// As the function to calculate the cell of a tensor at point i,j,k is a
 /// function of the cells at i-1, j-1, k-1, the whole tensor doesn't need
 /// to be stored in memory at once. In the case of the 3d cube, only two
 /// slices (along k and j axis) are stored in memory. For the 2d matrix
 /// (for 2 files), only two rows are stored at a time. The next/previous
 /// slice (or row) is always calculated from the other, and they alternate
 /// at each iteration.
 /// In the 3d case, 3 arrays are populated to memorize the score (matched
 /// characters) of the 3 buffers, so a redundant calculation of the
 /// scores does not occur
 /// @param diff_blk
 /// @param diff_len
 /// @param ndiffs
 /// @param [out] [allocated] decisions
 /// @return the length of decisions
     size_t
 linematch_nbuffers(
     const mmfile_t	**diff_blk,
     const int		*diff_len,
     const size_t	ndiffs,
     int			**decisions,
     int			iwhite)
 {
     assert(ndiffs <= LN_MAX_BUFS);

     size_t memsize = 1;
     size_t memsize_decisions = 0;
     for (size_t i = 0; i < ndiffs; i++)
     {
 	assert(diff_len[i] >= 0);
 	memsize *= (size_t)(diff_len[i] + 1);
 	memsize_decisions += (size_t)diff_len[i];
     }

     // create the flattened path matrix
     diffcmppath_T *diffcmppath = lalloc(sizeof(diffcmppath_T) * memsize, TRUE);
     // allocate memory here
     for (size_t i = 0; i < memsize; i++)
     {
 	diffcmppath[i].df_lev_score = 0;
 	diffcmppath[i].df_path_n = 0;
 	for (size_t j = 0; j < (size_t)pow(2, (double)ndiffs); j++)
 	    diffcmppath[i].df_choice_mem[j] = -1;
     }

     // memory for avoiding repetitive calculations of score
     int df_iters[LN_MAX_BUFS];
     populate_tensor(df_iters, 0, diffcmppath, diff_len, ndiffs, diff_blk,
 		    iwhite);

     const size_t u = unwrap_indexes(diff_len, diff_len, ndiffs);
     diffcmppath_T *startNode = &diffcmppath[u];

     *decisions = lalloc(sizeof(int) * memsize_decisions, TRUE);
     size_t n_optimal = 0;
     test_charmatch_paths(startNode, 0);
     while (startNode->df_path_n > 0)
     {
 	size_t j = startNode->df_optimal_choice;
 	(*decisions)[n_optimal++] = startNode->df_choice[j];
 	startNode = startNode->df_decision[j];
     }
     // reverse array
     for (size_t i = 0; i < (n_optimal / 2); i++)
     {
 	int tmp = (*decisions)[i];
 	(*decisions)[i] = (*decisions)[n_optimal - 1 - i];
 	(*decisions)[n_optimal - 1 - i] = tmp;
     }

     vim_free(diffcmppath);

     return n_optimal;
 }

 // returns the minimum amount of path changes from start to end
     static size_t
 test_charmatch_paths(diffcmppath_T *node, int lastdecision)
 {
     // memoization
     if (node->df_choice_mem[lastdecision] == -1)
     {
 	if (node->df_path_n == 0)
 	    // we have reached the end of the tree
 	    node->df_choice_mem[lastdecision] = 0;
 	else
 	{
 	    // the minimum amount of turns required to reach the end
 	    size_t minimum_turns = SIZE_MAX;
 	    for (size_t i = 0; i < node->df_path_n; i++)
 	    {
 		// recurse
 		size_t t = test_charmatch_paths(node->df_decision[i],
 							node->df_choice[i]) +
 		    (lastdecision != node->df_choice[i] ? 1 : 0);
 		if (t < minimum_turns)
 		{
 		    node->df_optimal_choice = i;
 		    minimum_turns = t;
 		}
 	    }
 	    node->df_choice_mem[lastdecision] = (int)minimum_turns;
 	}
     }

     return (size_t)node->df_choice_mem[lastdecision];
 }
	/* vi:set ts=8 sts=4 sw=4 noet:
	*
	* VIM - Vi IMproved by Bram Moolenaar
	*
	* Do ":help uganda" in Vim to read copying and usage conditions.
	* Do ":help credits" in Vim to see a list of people who contributed.
	* See README.txt for an overview of the Vim source code.
	*/

	#include "vim.h"

	#define LN_MAX_BUFS 8
	#define LN_DECISION_MAX 255 // pow(2, LN_MAX_BUFS(8)) - 1 = 255

	// struct for running the diff linematch algorithm
	typedef struct diffcmppath_S diffcmppath_T;
	struct diffcmppath_S
	{
	// to keep track of the total score of this path
	int df_lev_score;
	size_t df_path_n; // current index of this path
	int df_choice_mem[LN_DECISION_MAX + 1];
	int df_choice[LN_DECISION_MAX];
	// to keep track of this path traveled
	diffcmppath_T *df_decision[LN_DECISION_MAX];
	size_t df_optimal_choice;
	};

	static int matching_chars(const mmfile_t m1, const mmfile_t m2);
	static size_t unwrap_indexes(const int values, const int diff_len, const size_t ndiffs);
	static size_t test_charmatch_paths(diffcmppath_T *node, int lastdecision);

	static size_t
	line_len(const mmfile_t *m)
	{
	char *s = m->ptr;
	char *end;

	end = memchr(s, '\n', (size_t)m->size);
	return end ? (size_t)(end - s) : (size_t)m->size;
	}

	#define MATCH_CHAR_MAX_LEN 800

	/// Same as matching_chars but ignore whitespace
	///
	/// @param s1
	/// @param s2
	static int
	matching_chars_iwhite(const mmfile_t s1, const mmfile_t s2)
	{
	// the newly processed strings that will be compared
	// delete the white space characters
	mmfile_t sp[2];
	char p[2][MATCH_CHAR_MAX_LEN];

	for (int k = 0; k < 2; k++)
	{
	const mmfile_t *s = k == 0 ? s1 : s2;
	size_t pi = 0;
	size_t slen = MIN(MATCH_CHAR_MAX_LEN - 1, line_len(s));

	for (size_t i = 0; i <= slen; i++)
	{
	char e = s->ptr[i];

	if (e != ' ' && e != '\t')
	{
	p[k][pi] = e;
	pi++;
	}
	}

	sp[k].ptr = p[k];
	sp[k].size = (int)pi;
	}

	return matching_chars(&sp[0], &sp[1]);
	}

	/// Return matching characters between "s1" and "s2" whilst respecting sequence
	/// order.
	/// Consider the case of two strings 'AAACCC' and 'CCCAAA', the
	/// return value from this function will be 3, either to match
	/// the 3 C's, or the 3 A's.
	///
	/// Examples:
	/// matching_chars("aabc", "acba") -> 2 // 'a' and 'b' in common
	/// matching_chars("123hello567", "he123ll567o") -> 8 // '123', 'll' and '567' in common
	/// matching_chars("abcdefg", "gfedcba") -> 1 // all characters in common,
	/// // but only at most 1 in sequence
	///
	/// @param m1
	/// @param m2
	static int
	matching_chars(const mmfile_t m1, const mmfile_t m2)
	{
	size_t s1len = MIN(MATCH_CHAR_MAX_LEN - 1, line_len(m1));
	size_t s2len = MIN(MATCH_CHAR_MAX_LEN - 1, line_len(m2));
	char *s1 = m1->ptr;
	char *s2 = m2->ptr;
	int matrix[2][MATCH_CHAR_MAX_LEN] = { 0 };
	int icur = 1; // save space by storing only two rows for i axis

	for (size_t i = 0; i < s1len; i++)
	{
	icur = (icur == 1 ? 0 : 1);
	int *e1 = matrix[icur];
	int *e2 = matrix[!icur];

	for (size_t j = 0; j < s2len; j++)
	{
	// skip char in s1
	if (e2[j + 1] > e1[j + 1])
	e1[j + 1] = e2[j + 1];
	// skip char in s2
	if (e1[j] > e1[j + 1])
	e1[j + 1] = e1[j];
	// compare char in s1 and s2
	if ((s1[i] == s2[j]) && (e2[j] + 1) > e1[j + 1])
	e1[j + 1] = e2[j] + 1;
	}
	}

	return matrix[icur][s2len];
	}

	/// count the matching characters between a variable number of strings "sp"
	/// mark the strings that have already been compared to extract them later
	/// without re-running the character match counting.
	/// @param sp
	/// @param fomvals
	/// @param n
	static int
	count_n_matched_chars(mmfile_t **sp, const size_t n, int iwhite)
	{
	int matched_chars = 0;
	int matched = 0;

	for (size_t i = 0; i < n; i++)
	{
	for (size_t j = i + 1; j < n; j++)
	{
	if (sp[i]->ptr != NULL && sp[j]->ptr != NULL)
	{
	matched++;
	// TODO(lewis6991): handle whitespace ignoring higher up in the
	// stack
	matched_chars += iwhite ? matching_chars_iwhite(sp[i], sp[j])
	: matching_chars(sp[i], sp[j]);
	}
	}
	}

	// prioritize a match of 3 (or more lines) equally to a match of 2 lines
	if (matched >= 2)
	{
	matched_chars *= 2;
	matched_chars /= matched;
	}

	return matched_chars;
	}

	static mmfile_t
	fastforward_buf_to_lnum(mmfile_t s, linenr_T lnum)
	{
	for (int i = 0; i < lnum - 1; i++)
	{
	char *line_end;

	line_end = memchr(s.ptr, '\n', (size_t)s.size);
	s.size = line_end ? (int)(s.size - (line_end - s.ptr)) : 0;
	s.ptr = line_end;
	if (!s.ptr)
	break;
	s.ptr++;
	s.size--;
	}

	return s;
	}

	/// try all the different ways to compare these lines and use the one that
	/// results in the most matching characters
	/// @param df_iters
	/// @param paths
	/// @param npaths
	/// @param path_idx
	/// @param choice
	/// @param diffcmppath
	/// @param diff_len
	/// @param ndiffs
	/// @param diff_blk
	static void
	try_possible_paths(
	const int *df_iters,
	const size_t *paths,
	const int npaths,
	const int path_idx,
	int *choice,
	diffcmppath_T *diffcmppath,
	const int *diff_len,
	const size_t ndiffs,
	const mmfile_t **diff_blk,
	int iwhite)
	{
	if (path_idx == npaths)
	{
	if ((*choice) > 0)
	{
	int from_vals[LN_MAX_BUFS] = { 0 };
	const int *to_vals = df_iters;

	mmfile_t mm[LN_MAX_BUFS]; // stack memory for current_lines
	mmfile_t *current_lines[LN_MAX_BUFS];
	for (size_t k = 0; k < ndiffs; k++)
	{
	from_vals[k] = df_iters[k];
	// get the index at all of the places
	if ((*choice) & (1 << k))
	{
	from_vals[k]--;
	mm[k] = fastforward_buf_to_lnum(*diff_blk[k], df_iters[k]);
	}
	else
	CLEAR_FIELD(mm[k]);
	current_lines[k] = &mm[k];
	}
	size_t unwrapped_idx_from = unwrap_indexes(from_vals, diff_len, ndiffs);
	size_t unwrapped_idx_to = unwrap_indexes(to_vals, diff_len, ndiffs);
	int matched_chars = count_n_matched_chars(current_lines, ndiffs, iwhite);
	int score = diffcmppath[unwrapped_idx_from].df_lev_score + matched_chars;

	if (score > diffcmppath[unwrapped_idx_to].df_lev_score)
	{
	diffcmppath[unwrapped_idx_to].df_path_n = 1;
	diffcmppath[unwrapped_idx_to].df_decision[0] =
	&diffcmppath[unwrapped_idx_from];
	diffcmppath[unwrapped_idx_to].df_choice[0] = *choice;
	diffcmppath[unwrapped_idx_to].df_lev_score = score;
	}
	else if (score == diffcmppath[unwrapped_idx_to].df_lev_score)
	{
	size_t k = diffcmppath[unwrapped_idx_to].df_path_n++;
	diffcmppath[unwrapped_idx_to].df_decision[k] =
	&diffcmppath[unwrapped_idx_from];
	diffcmppath[unwrapped_idx_to].df_choice[k] = *choice;
	}
	}
	return;
	}

	size_t bit_place = paths[path_idx];
	*(choice) \|= (1 << bit_place); // set it to 1
	try_possible_paths(df_iters, paths, npaths, path_idx + 1, choice,
	diffcmppath, diff_len, ndiffs, diff_blk, iwhite);
	*(choice) &= ~(1 << bit_place); // set it to 0
	try_possible_paths(df_iters, paths, npaths, path_idx + 1, choice,
	diffcmppath, diff_len, ndiffs, diff_blk, iwhite);
	}

	/// unwrap indexes to access n dimensional tensor
	/// @param values
	/// @param diff_len
	/// @param ndiffs
	static size_t
	unwrap_indexes(const int values, const int diff_len, const size_t ndiffs)
	{
	size_t num_unwrap_scalar = 1;

	for (size_t k = 0; k < ndiffs; k++)
	num_unwrap_scalar *= (size_t)diff_len[k] + 1;

	size_t path_idx = 0;
	for (size_t k = 0; k < ndiffs; k++)
	{
	num_unwrap_scalar /= (size_t)diff_len[k] + 1;

	int n = values[k];
	path_idx += num_unwrap_scalar * (size_t)n;
	}

	return path_idx;
	}

	/// populate the values of the linematch algorithm tensor, and find the best
	/// decision for how to compare the relevant lines from each of the buffers at
	/// each point in the tensor
	/// @param df_iters
	/// @param ch_dim
	/// @param diffcmppath
	/// @param diff_len
	/// @param ndiffs
	/// @param diff_blk
	static void
	populate_tensor(
	int *df_iters,
	const size_t ch_dim,
	diffcmppath_T *diffcmppath,
	const int *diff_len,
	const size_t ndiffs,
	const mmfile_t **diff_blk,
	int iwhite)
	{
	if (ch_dim == ndiffs)
	{
	int npaths = 0;
	size_t paths[LN_MAX_BUFS];

	for (size_t j = 0; j < ndiffs; j++)
	{
	if (df_iters[j] > 0)
	{
	paths[npaths] = j;
	npaths++;
	}
	}

	int choice = 0;
	size_t unwrapper_idx_to = unwrap_indexes(df_iters, diff_len, ndiffs);

	diffcmppath[unwrapper_idx_to].df_lev_score = -1;
	try_possible_paths(df_iters, paths, npaths, 0, &choice, diffcmppath,
	diff_len, ndiffs, diff_blk, iwhite);
	return;
	}

	for (int i = 0; i <= diff_len[ch_dim]; i++)
	{
	df_iters[ch_dim] = i;
	populate_tensor(df_iters, ch_dim + 1, diffcmppath, diff_len,
	ndiffs, diff_blk, iwhite);
	}
	}

	/// algorithm to find an optimal alignment of lines of a diff block with 2 or
	/// more files. The algorithm is generalized to work for any number of files
	/// which corresponds to another dimension added to the tensor used in the
	/// algorithm
	///
	/// for questions and information about the linematch algorithm please contact
	/// Jonathon White (jonathonwhite@protonmail.com)
	///
	/// for explanation, a summary of the algorithm in 3 dimensions (3 files
	/// compared) follows
	///
	/// The 3d case (for 3 buffers) of the algorithm implemented when diffopt
	/// 'linematch' is enabled. The algorithm constructs a 3d tensor to
	/// compare a diff between 3 buffers. The dimensions of the tensor are
	/// the length of the diff in each buffer plus 1 A path is constructed by
	/// moving from one edge of the cube/3d tensor to the opposite edge.
	/// Motions from one cell of the cube to the next represent decisions. In
	/// a 3d cube, there are a total of 7 decisions that can be made,
	/// represented by the enum df_path3_choice which is defined in
	/// buffer_defs.h a comparison of buffer 0 and 1 represents a motion
	/// toward the opposite edge of the cube with components along the 0 and
	/// 1 axes. a comparison of buffer 0, 1, and 2 represents a motion
	/// toward the opposite edge of the cube with components along the 0, 1,
	/// and 2 axes. A skip of buffer 0 represents a motion along only the 0
	/// axis. For each action, a point value is awarded, and the path is
	/// saved for reference later, if it is found to have been the optimal
	/// path. The optimal path has the highest score. The score is
	/// calculated as the summation of the total characters matching between
	/// all of the lines which were compared. The structure of the algorithm
	/// is that of a dynamic programming problem. We can calculate a point
	/// i,j,k in the cube as a function of i-1, j-1, and k-1. To find the
	/// score and path at point i,j,k, we must determine which path we want
	/// to use, this is done by looking at the possibilities and choosing
	/// the one which results in the local highest score. The total highest
	/// scored path is, then in the end represented by the cell in the
	/// opposite corner from the start location. The entire algorithm
	/// consists of populating the 3d cube with the optimal paths from which
	/// it may have came.
	///
	/// Optimizations:
	/// As the function to calculate the cell of a tensor at point i,j,k is a
	/// function of the cells at i-1, j-1, k-1, the whole tensor doesn't need
	/// to be stored in memory at once. In the case of the 3d cube, only two
	/// slices (along k and j axis) are stored in memory. For the 2d matrix
	/// (for 2 files), only two rows are stored at a time. The next/previous
	/// slice (or row) is always calculated from the other, and they alternate
	/// at each iteration.
	/// In the 3d case, 3 arrays are populated to memorize the score (matched
	/// characters) of the 3 buffers, so a redundant calculation of the
	/// scores does not occur
	/// @param diff_blk
	/// @param diff_len
	/// @param ndiffs
	/// @param [out] [allocated] decisions
	/// @return the length of decisions
	size_t
	linematch_nbuffers(
	const mmfile_t **diff_blk,
	const int *diff_len,
	const size_t ndiffs,
	int **decisions,
	int iwhite)
	{
	assert(ndiffs <= LN_MAX_BUFS);

	size_t memsize = 1;
	size_t memsize_decisions = 0;
	for (size_t i = 0; i < ndiffs; i++)
	{
	assert(diff_len[i] >= 0);
	memsize *= (size_t)(diff_len[i] + 1);
	memsize_decisions += (size_t)diff_len[i];
	}

	// create the flattened path matrix
	diffcmppath_T diffcmppath = lalloc(sizeof(diffcmppath_T) memsize, TRUE);
	// allocate memory here
	for (size_t i = 0; i < memsize; i++)
	{
	diffcmppath[i].df_lev_score = 0;
	diffcmppath[i].df_path_n = 0;
	for (size_t j = 0; j < (size_t)pow(2, (double)ndiffs); j++)
	diffcmppath[i].df_choice_mem[j] = -1;
	}

	// memory for avoiding repetitive calculations of score
	int df_iters[LN_MAX_BUFS];
	populate_tensor(df_iters, 0, diffcmppath, diff_len, ndiffs, diff_blk,
	iwhite);

	const size_t u = unwrap_indexes(diff_len, diff_len, ndiffs);
	diffcmppath_T *startNode = &diffcmppath[u];

	decisions = lalloc(sizeof(int) memsize_decisions, TRUE);
	size_t n_optimal = 0;
	test_charmatch_paths(startNode, 0);
	while (startNode->df_path_n > 0)
	{
	size_t j = startNode->df_optimal_choice;
	(*decisions)[n_optimal++] = startNode->df_choice[j];
	startNode = startNode->df_decision[j];
	}
	// reverse array
	for (size_t i = 0; i < (n_optimal / 2); i++)
	{
	int tmp = (*decisions)[i];
	(decisions)[i] = (decisions)[n_optimal - 1 - i];
	(*decisions)[n_optimal - 1 - i] = tmp;
	}

	vim_free(diffcmppath);

	return n_optimal;
	}

	// returns the minimum amount of path changes from start to end
	static size_t
	test_charmatch_paths(diffcmppath_T *node, int lastdecision)
	{
	// memoization
	if (node->df_choice_mem[lastdecision] == -1)
	{
	if (node->df_path_n == 0)
	// we have reached the end of the tree
	node->df_choice_mem[lastdecision] = 0;
	else
	{
	// the minimum amount of turns required to reach the end
	size_t minimum_turns = SIZE_MAX;
	for (size_t i = 0; i < node->df_path_n; i++)
	{
	// recurse
	size_t t = test_charmatch_paths(node->df_decision[i],
	node->df_choice[i]) +
	(lastdecision != node->df_choice[i] ? 1 : 0);
	if (t < minimum_turns)
	{
	node->df_optimal_choice = i;
	minimum_turns = t;
	}
	}
	node->df_choice_mem[lastdecision] = (int)minimum_turns;
	}
	}

	return (size_t)node->df_choice_mem[lastdecision];
	}