patch 9.1.0071: Need a diff() Vim script function
Problem: Need a diff() Vim script function
Solution: Add the diff() Vim script function using the
xdiff internal diff library, add support for
"unified" and "indices" mode.
(Yegappan Lakshmanan)
fixes: #4241
closes: #12321
Signed-off-by: Yegappan Lakshmanan <yegappan@yahoo.com>
Signed-off-by: Christian Brabandt <cb@256bit.org>
diff --git a/runtime/doc/builtin.txt b/runtime/doc/builtin.txt
index 5001205..19b9bc9 100644
--- a/runtime/doc/builtin.txt
+++ b/runtime/doc/builtin.txt
@@ -1,4 +1,4 @@
-*builtin.txt* For Vim version 9.1. Last change: 2024 Jan 29
+*builtin.txt* For Vim version 9.1. Last change: 2024 Feb 01
VIM REFERENCE MANUAL by Bram Moolenaar
@@ -147,6 +147,8 @@
deletebufline({buf}, {first} [, {last}])
Number delete lines from buffer {buf}
did_filetype() Number |TRUE| if FileType autocmd event used
+diff({fromlist}, {tolist} [, {options}])
+ List diff two Lists of strings
diff_filler({lnum}) Number diff filler lines about {lnum}
diff_hlID({lnum}, {col}) Number diff highlighting at {lnum}/{col}
digraph_get({chars}) String get the |digraph| of {chars}
@@ -2046,6 +2048,67 @@
editing another buffer to set 'filetype' and load a syntax
file.
+diff({fromlist}, {tolist} [, {options}]) *diff()*
+ Returns a String or a List containing the diff between the
+ strings in {fromlist} and {tolist}. Uses the Vim internal
+ diff library to compute the diff.
+
+ *E106*
+ The optional "output" item in {options} specifies the returned
+ diff format. The following values are supported:
+ indices Return a List of the starting and ending
+ indices and a count of the strings in each
+ diff hunk.
+ unified Return the unified diff output as a String.
+ This is the default.
+
+ If the "output" item in {options} is "indices", then a List is
+ returned. Each List item contains a Dict with the following
+ items for each diff hunk:
+ from_idx start index in {fromlist} for this diff hunk.
+ from_count number of strings in {fromlist} that are
+ added/removed/modified in this diff hunk.
+ to_idx start index in {tolist} for this diff hunk.
+ to_count number of strings in {tolist} that are
+ added/removed/modified in this diff hunk.
+
+ The {options} Dict argument also specifies diff options
+ (similar to 'diffopt') and supports the following items:
+ iblank ignore changes where lines are all
+ blank.
+ icase ignore changes in case of text.
+ iwhite ignore changes in amount of white
+ space.
+ iwhiteall ignore all white space changes.
+ iwhiteeol ignore white space changes at end of
+ line.
+ indent-heuristic use the indent heuristic for the
+ internal diff library.
+ algorithm Dict specifying the diff algorithm to
+ use. Supported boolean items are
+ "myers", "minimal", "patience" and
+ "histogram".
+ For more information about these options, refer to 'diffopt'.
+
+ Returns an empty List or String if {fromlist} and {tolist} are
+ identical.
+
+ Examples:
+ :echo diff(['abc'], ['xxx'])
+ @@ -1 +1 @@
+ -abc
+ +xxx
+
+ :echo diff(['abc'], ['xxx'], {'output': 'indices'})
+ [{'from_idx': 0, 'from_count': 1, 'to_idx': 0, 'to_count': 1}]
+ :echo diff(readfile('oldfile'), readfile('newfile'))
+ :echo diff(getbufline(5, 1, '$'), getbufline(6, 1, '$'))
+
+ For more examples, refer to |diff-func-examples|
+
+ Can also be used as a |method|: >
+ GetFromList->diff(to_list)
+<
diff_filler({lnum}) *diff_filler()*
Returns the number of filler lines above line {lnum}.
These are the lines that were inserted at this point in
diff --git a/runtime/doc/diff.txt b/runtime/doc/diff.txt
index 91b0047..05dd4a6 100644
--- a/runtime/doc/diff.txt
+++ b/runtime/doc/diff.txt
@@ -1,4 +1,4 @@
-*diff.txt* For Vim version 9.1. Last change: 2023 Apr 04
+*diff.txt* For Vim version 9.1. Last change: 2024 Feb 01
VIM REFERENCE MANUAL by Bram Moolenaar
@@ -476,4 +476,43 @@
option was set, thus script-local items are available.
+DIFF FUNCTION EXAMPLES *diff-func-examples*
+
+Some examples for using the |diff()| function to compute the diff indices
+between two Lists of strings are below.
+>
+ " some lines are changed
+ :echo diff(['abc', 'def', 'ghi'], ['abx', 'rrr', 'xhi'], {'output': 'indices'})
+ [{'from_idx': 0, 'from_count': 3, 'to_idx': 0, 'to_count': 3}]
+
+ " few lines added at the beginning
+ :echo diff(['ghi'], ['abc', 'def', 'ghi'], {'output': 'indices'})
+ [{'from_idx': 0, 'from_count': 0, 'to_idx': 0, 'to_count': 2}]
+
+ " few lines removed from the beginning
+ :echo diff(['abc', 'def', 'ghi'], ['ghi'], {'output': 'indices'})
+ [{'from_idx': 0, 'from_count': 2, 'to_idx': 0, 'to_count': 0}]
+
+ " few lines added in the middle
+ :echo diff(['abc', 'jkl'], ['abc', 'def', 'ghi', 'jkl'], {'output': 'indices'})
+ [{'from_idx': 1, 'from_count': 0, 'to_idx': 1, 'to_count': 2}]
+
+ " few lines removed in the middle
+ :echo diff(['abc', 'def', 'ghi', 'jkl'], ['abc', 'jkl'], {'output': 'indices'})
+ [{'from_idx': 1, 'from_count': 2, 'to_idx': 1, 'to_count': 0}]
+
+ " few lines added at the end
+ :echo diff(['abc'], ['abc', 'def', 'ghi'], {'output': 'indices'})
+ [{'from_idx': 1, 'from_count': 0, 'to_idx': 1, 'to_count': 2}]
+
+ " few lines removed from the end
+ :echo diff(['abc', 'def', 'ghi'], ['abc'], {'output': 'indices'})
+ [{'from_idx': 1, 'from_count': 2, 'to_idx': 1, 'to_count': 0}]
+
+ " disjointed changes
+ :echo diff(['ab', 'def', 'ghi', 'jkl'], ['abc', 'def', 'ghi', 'jk'], {'output': 'indices'})
+ [{'from_idx': 0, 'from_count': 1, 'to_idx': 0, 'to_count': 1},
+ {'from_idx': 3, 'from_count': 1, 'to_idx': 3, 'to_count': 1}]
+<
+
vim:tw=78:ts=8:noet:ft=help:norl:
diff --git a/runtime/doc/tags b/runtime/doc/tags
index 7074aab..ebde436 100644
--- a/runtime/doc/tags
+++ b/runtime/doc/tags
@@ -4137,6 +4137,7 @@
E1057 vim9.txt /*E1057*
E1058 vim9.txt /*E1058*
E1059 vim9.txt /*E1059*
+E106 builtin.txt /*E106*
E1060 vim9.txt /*E1060*
E1061 vim9.txt /*E1061*
E1062 eval.txt /*E1062*
@@ -6759,7 +6760,9 @@
dict-modification eval.txt /*dict-modification*
did_filetype() builtin.txt /*did_filetype()*
diff diff.txt /*diff*
+diff() builtin.txt /*diff()*
diff-diffexpr diff.txt /*diff-diffexpr*
+diff-func-examples diff.txt /*diff-func-examples*
diff-mode diff.txt /*diff-mode*
diff-options diff.txt /*diff-options*
diff-original-file diff.txt /*diff-original-file*
diff --git a/runtime/doc/todo.txt b/runtime/doc/todo.txt
index bb58597..953ae47 100644
--- a/runtime/doc/todo.txt
+++ b/runtime/doc/todo.txt
@@ -1,4 +1,4 @@
-*todo.txt* For Vim version 9.1. Last change: 2024 Jan 14
+*todo.txt* For Vim version 9.1. Last change: 2024 Feb 01
VIM REFERENCE MANUAL by Bram Moolenaar
@@ -956,9 +956,6 @@
scrolls back. Should allow for this scrolling, like 'scrolloff' does when
using CTRL-E. (Yee Cheng Chin, #3721)
-Add function to make use of internal diff, working on two lists and returning
-unified diff (list of lines).
-
When splitting a window with few text lines, the relative cursor position is
kept, which means part of the text isn't displayed. Better show all the text
when possible. (Dylan Lloyd, #3973)
diff --git a/runtime/doc/usr_41.txt b/runtime/doc/usr_41.txt
index 0990a06..0ca19be 100644
--- a/runtime/doc/usr_41.txt
+++ b/runtime/doc/usr_41.txt
@@ -1,4 +1,4 @@
-*usr_41.txt* For Vim version 9.1. Last change: 2024 Jan 13
+*usr_41.txt* For Vim version 9.1. Last change: 2024 Feb 01
VIM USER MANUAL - by Bram Moolenaar
@@ -1368,6 +1368,7 @@
changenr() return number of most recent change
cscope_connection() check if a cscope connection exists
did_filetype() check if a FileType autocommand was used
+ diff() diff two Lists of strings
eventhandler() check if invoked by an event handler
getpid() get process ID of Vim
getscriptinfo() get list of sourced vim scripts
diff --git a/src/diff.c b/src/diff.c
index 9b8c816..cb894ca 100644
--- a/src/diff.c
+++ b/src/diff.c
@@ -42,6 +42,10 @@
static long diff_algorithm = 0;
+#define DIFF_INTERNAL_OUTPUT_UNIFIED 1
+#define DIFF_INTERNAL_OUTPUT_INDICES 2
+static int diff_internal_output_fmt = DIFF_INTERNAL_OUTPUT_INDICES;
+
#define LBUFLEN 50 // length of line in diff file
static int diff_a_works = MAYBE; // TRUE when "diff -a" works, FALSE when it
@@ -97,7 +101,8 @@
static diff_T *diff_alloc_new(tabpage_T *tp, diff_T *dprev, diff_T *dp);
static int parse_diff_ed(char_u *line, diffhunk_T *hunk);
static int parse_diff_unified(char_u *line, diffhunk_T *hunk);
-static int xdiff_out(long start_a, long count_a, long start_b, long count_b, void *priv);
+static int xdiff_out_indices(long start_a, long count_a, long start_b, long count_b, void *priv);
+static int xdiff_out_unified(void *priv, mmbuffer_t *mb, int nbuf);
#define FOR_ALL_DIFFBLOCKS_IN_TAB(tp, dp) \
for ((dp) = (tp)->tp_first_diff; (dp) != NULL; (dp) = (dp)->df_next)
@@ -1142,7 +1147,10 @@
emit_cfg.ctxlen = 0; // don't need any diff_context here
emit_cb.priv = &diffio->dio_diff;
- emit_cfg.hunk_func = xdiff_out;
+ if (diff_internal_output_fmt == DIFF_INTERNAL_OUTPUT_INDICES)
+ emit_cfg.hunk_func = xdiff_out_indices;
+ else
+ emit_cb.out_line = xdiff_out_unified;
if (xdl_diff(&diffio->dio_orig.din_mmfile,
&diffio->dio_new.din_mmfile,
¶m, &emit_cfg, &emit_cb) < 0)
@@ -3327,10 +3335,10 @@
/*
* Callback function for the xdl_diff() function.
- * Stores the diff output in a grow array.
+ * Stores the diff output (indices) in a grow array.
*/
static int
-xdiff_out(
+xdiff_out_indices(
long start_a,
long count_a,
long start_b,
@@ -3357,6 +3365,25 @@
return 0;
}
+/*
+ * Callback function for the xdl_diff() function.
+ * Stores the unified diff output in a grow array.
+ */
+ static int
+xdiff_out_unified(
+ void *priv,
+ mmbuffer_t *mb,
+ int nbuf)
+{
+ diffout_T *dout = (diffout_T *)priv;
+ int i;
+
+ for (i = 0; i < nbuf; i++)
+ ga_concat_len(&dout->dout_ga, (char_u *)mb[i].ptr, mb[i].size);
+
+ return 0;
+}
+
#endif // FEAT_DIFF
#if defined(FEAT_EVAL) || defined(PROTO)
@@ -3439,4 +3466,205 @@
#endif
}
+/*
+ * Parse the diff options passed in "optarg" to the diff() function and return
+ * the options in "diffopts" and the diff algorithm in "diffalgo".
+ */
+ static int
+parse_diff_optarg(
+ typval_T *opts,
+ int *diffopts,
+ long *diffalgo,
+ int *diff_output_fmt)
+{
+ dict_T *d = opts->vval.v_dict;
+
+ char_u *algo = dict_get_string(d, "algorithm", FALSE);
+ if (algo != NULL)
+ {
+ if (STRNCMP(algo, "myers", 5) == 0)
+ *diffalgo = 0;
+ else if (STRNCMP(algo, "minimal", 7) == 0)
+ *diffalgo = XDF_NEED_MINIMAL;
+ else if (STRNCMP(algo, "patience", 8) == 0)
+ *diffalgo = XDF_PATIENCE_DIFF;
+ else if (STRNCMP(algo, "histogram", 9) == 0)
+ *diffalgo = XDF_HISTOGRAM_DIFF;
+ }
+
+ char_u *output_fmt = dict_get_string(d, "output", FALSE);
+ if (output_fmt != NULL)
+ {
+ if (STRNCMP(output_fmt, "unified", 7) == 0)
+ *diff_output_fmt = DIFF_INTERNAL_OUTPUT_UNIFIED;
+ else if (STRNCMP(output_fmt, "indices", 7) == 0)
+ *diff_output_fmt = DIFF_INTERNAL_OUTPUT_INDICES;
+ else
+ {
+ semsg(_(e_unsupported_diff_output_format_str), output_fmt);
+ return FAIL;
+ }
+ }
+
+ if (dict_get_bool(d, "iblank", FALSE))
+ *diffopts |= DIFF_IBLANK;
+ if (dict_get_bool(d, "icase", FALSE))
+ *diffopts |= DIFF_ICASE;
+ if (dict_get_bool(d, "iwhite", FALSE))
+ *diffopts |= DIFF_IWHITE;
+ if (dict_get_bool(d, "iwhiteall", FALSE))
+ *diffopts |= DIFF_IWHITEALL;
+ if (dict_get_bool(d, "iwhiteeol", FALSE))
+ *diffopts |= DIFF_IWHITEEOL;
+ if (dict_get_bool(d, "indent-heuristic", FALSE))
+ *diffalgo |= XDF_INDENT_HEURISTIC;
+
+ return OK;
+}
+
+/*
+ * Concatenate the List of strings in "l" and store the result in
+ * "din->din_mmfile.ptr" and the length in "din->din_mmfile.size".
+ */
+ static void
+list_to_diffin(list_T *l, diffin_T *din, int icase)
+{
+ garray_T ga;
+ listitem_T *li;
+ char_u *str;
+
+ ga_init2(&ga, 512, 4);
+
+ FOR_ALL_LIST_ITEMS(l, li)
+ {
+ str = tv_get_string(&li->li_tv);
+ if (icase)
+ {
+ str = strlow_save(str);
+ if (str == NULL)
+ continue;
+ }
+ ga_concat(&ga, str);
+ ga_concat(&ga, (char_u *)NL_STR);
+ if (icase)
+ vim_free(str);
+ }
+ if (ga.ga_len > 0)
+ ((char *)ga.ga_data)[ga.ga_len] = NUL;
+
+ din->din_mmfile.ptr = (char *)ga.ga_data;
+ din->din_mmfile.size = ga.ga_len;
+}
+
+/*
+ * Get the start and end indices from the diff "hunk".
+ */
+ static dict_T *
+get_diff_hunk_indices(diffhunk_T *hunk)
+{
+ dict_T *hunk_dict;
+
+ hunk_dict = dict_alloc();
+ if (hunk_dict == NULL)
+ return NULL;
+
+ dict_add_number(hunk_dict, "from_idx", hunk->lnum_orig - 1);
+ dict_add_number(hunk_dict, "from_count", hunk->count_orig);
+ dict_add_number(hunk_dict, "to_idx", hunk->lnum_new - 1);
+ dict_add_number(hunk_dict, "to_count", hunk->count_new);
+
+ return hunk_dict;
+}
+
+/*
+ * "diff()" function
+ */
+ void
+f_diff(typval_T *argvars UNUSED, typval_T *rettv UNUSED)
+{
+#ifdef FEAT_DIFF
+ diffio_T dio;
+
+ if (check_for_nonnull_list_arg(argvars, 0) == FAIL
+ || check_for_nonnull_list_arg(argvars, 1) == FAIL
+ || check_for_opt_nonnull_dict_arg(argvars, 2) == FAIL)
+ return;
+
+ CLEAR_FIELD(dio);
+ dio.dio_internal = TRUE;
+ ga_init2(&dio.dio_diff.dout_ga, sizeof(char *), 1000);
+
+ list_T *orig_list = argvars[0].vval.v_list;
+ list_T *new_list = argvars[1].vval.v_list;
+
+ // Save the 'diffopt' option value and restore it after getting the diff.
+ int save_diff_flags = diff_flags;
+ long save_diff_algorithm = diff_algorithm;
+ long save_diff_output_fmt = diff_internal_output_fmt;
+ diff_flags = DIFF_INTERNAL;
+ diff_algorithm = 0;
+ diff_internal_output_fmt = DIFF_INTERNAL_OUTPUT_UNIFIED;
+ if (argvars[2].v_type != VAR_UNKNOWN)
+ if (parse_diff_optarg(&argvars[2], &diff_flags, &diff_algorithm,
+ &diff_internal_output_fmt) == FAIL)
+ {
+ diff_internal_output_fmt = save_diff_output_fmt;
+ return;
+ }
+
+ // Concatenate the List of strings into a single string using newline
+ // separator. Internal diff library expects a single string.
+ list_to_diffin(orig_list, &dio.dio_orig, diff_flags & DIFF_ICASE);
+ list_to_diffin(new_list, &dio.dio_new, diff_flags & DIFF_ICASE);
+
+ // Compute the diff
+ int diff_status = diff_file(&dio);
+
+ if (diff_status == FAIL)
+ goto done;
+
+ int hunk_idx = 0;
+ dict_T *hunk_dict;
+
+ if (diff_internal_output_fmt == DIFF_INTERNAL_OUTPUT_INDICES)
+ {
+ if (rettv_list_alloc(rettv) != OK)
+ goto done;
+ list_T *l = rettv->vval.v_list;
+
+ // Process each diff hunk
+ diffhunk_T *hunk = NULL;
+ while (hunk_idx < dio.dio_diff.dout_ga.ga_len)
+ {
+ hunk = ((diffhunk_T **)dio.dio_diff.dout_ga.ga_data)[hunk_idx++];
+
+ hunk_dict = get_diff_hunk_indices(hunk);
+ if (hunk_dict == NULL)
+ goto done;
+
+ list_append_dict(l, hunk_dict);
+ }
+ }
+ else
+ {
+ ga_append(&dio.dio_diff.dout_ga, NUL);
+ rettv->v_type = VAR_STRING;
+ rettv->vval.v_string =
+ vim_strsave((char_u *)dio.dio_diff.dout_ga.ga_data);
+ }
+
+done:
+ clear_diffin(&dio.dio_new);
+ if (diff_internal_output_fmt == DIFF_INTERNAL_OUTPUT_INDICES)
+ clear_diffout(&dio.dio_diff);
+ else
+ ga_clear(&dio.dio_diff.dout_ga);
+ clear_diffin(&dio.dio_orig);
+ // Restore the 'diffopt' option value.
+ diff_flags = save_diff_flags;
+ diff_algorithm = save_diff_algorithm;
+ diff_internal_output_fmt = save_diff_output_fmt;
+#endif
+}
+
#endif
diff --git a/src/errors.h b/src/errors.h
index 351ced9..0099431 100644
--- a/src/errors.h
+++ b/src/errors.h
@@ -258,8 +258,9 @@
EXTERN char e_using_loadkeymap_not_in_sourced_file[]
INIT(= N_("E105: Using :loadkeymap not in a sourced file"));
#endif
-// E106 unused
#ifdef FEAT_EVAL
+EXTERN char e_unsupported_diff_output_format_str[]
+ INIT(= N_("E106: Unsupported diff output format: %s"));
EXTERN char e_missing_parenthesis_str[]
INIT(= N_("E107: Missing parentheses: %s"));
EXTERN char e_no_such_variable_str[]
diff --git a/src/evalfunc.c b/src/evalfunc.c
index 9b3bdf7..67a1427 100644
--- a/src/evalfunc.c
+++ b/src/evalfunc.c
@@ -1148,6 +1148,7 @@
static argcheck_T arg3_buffer_string_any[] = {arg_buffer, arg_string, arg_any};
static argcheck_T arg3_buffer_string_dict[] = {arg_buffer, arg_string, arg_dict_any};
static argcheck_T arg3_dict_number_number[] = {arg_dict_any, arg_number, arg_number};
+static argcheck_T arg3_diff[] = {arg_list_string, arg_list_string, arg_dict_any};
static argcheck_T arg3_list_string_dict[] = {arg_list_any, arg_string, arg_dict_any};
static argcheck_T arg3_lnum_number_bool[] = {arg_lnum, arg_number, arg_bool};
static argcheck_T arg3_number[] = {arg_number, arg_number, arg_number};
@@ -1950,6 +1951,8 @@
ret_number_bool, f_deletebufline},
{"did_filetype", 0, 0, 0, NULL,
ret_number_bool, f_did_filetype},
+ {"diff", 2, 3, FEARG_1, arg3_diff,
+ ret_list_dict_any, f_diff},
{"diff_filler", 1, 1, FEARG_1, arg1_lnum,
ret_number, f_diff_filler},
{"diff_hlID", 2, 2, FEARG_1, arg2_lnum_number,
diff --git a/src/proto/diff.pro b/src/proto/diff.pro
index d14907e..9c34dcf 100644
--- a/src/proto/diff.pro
+++ b/src/proto/diff.pro
@@ -30,4 +30,5 @@
linenr_T diff_lnum_win(linenr_T lnum, win_T *wp);
void f_diff_filler(typval_T *argvars, typval_T *rettv);
void f_diff_hlID(typval_T *argvars, typval_T *rettv);
+void f_diff(typval_T *argvars, typval_T *rettv);
/* vim: set ft=c : */
diff --git a/src/proto/typval.pro b/src/proto/typval.pro
index c9845e0..b6ea131 100644
--- a/src/proto/typval.pro
+++ b/src/proto/typval.pro
@@ -26,6 +26,7 @@
int check_for_dict_arg(typval_T *args, int idx);
int check_for_nonnull_dict_arg(typval_T *args, int idx);
int check_for_opt_dict_arg(typval_T *args, int idx);
+int check_for_opt_nonnull_dict_arg(typval_T *args, int idx);
int check_for_chan_or_job_arg(typval_T *args, int idx);
int check_for_opt_chan_or_job_arg(typval_T *args, int idx);
int check_for_job_arg(typval_T *args, int idx);
diff --git a/src/testdir/test_diffmode.vim b/src/testdir/test_diffmode.vim
index 64a8818..2211ba05a 100644
--- a/src/testdir/test_diffmode.vim
+++ b/src/testdir/test_diffmode.vim
Binary files differ
diff --git a/src/typval.c b/src/typval.c
index af96b31..62958f6 100644
--- a/src/typval.c
+++ b/src/typval.c
@@ -623,6 +623,16 @@
|| check_for_dict_arg(args, idx) != FAIL) ? OK : FAIL;
}
+/*
+ * Check for an optional non-NULL dict argument at 'idx'
+ */
+ int
+check_for_opt_nonnull_dict_arg(typval_T *args, int idx)
+{
+ return (args[idx].v_type == VAR_UNKNOWN
+ || check_for_nonnull_dict_arg(args, idx) != FAIL) ? OK : FAIL;
+}
+
#if defined(FEAT_JOB_CHANNEL) || defined(PROTO)
/*
* Give an error and return FAIL unless "args[idx]" is a channel or a job.
diff --git a/src/version.c b/src/version.c
index 75038dd..ad3d04b 100644
--- a/src/version.c
+++ b/src/version.c
@@ -705,6 +705,8 @@
static int included_patches[] =
{ /* Add new patch number below this line */
/**/
+ 71,
+/**/
70,
/**/
69,