patch 9.1.1476: no easy way to deduplicate text
Problem: no easy way to deduplicate text
Solution: add the :uniq ex command
(Hirohito Higashi)
closes: #17538
Signed-off-by: Hirohito Higashi <h.east.727@gmail.com>
Signed-off-by: Christian Brabandt <cb@256bit.org>
diff --git a/src/ex_cmds.c b/src/ex_cmds.c
index d3d757b..cd2c304 100644
--- a/src/ex_cmds.c
+++ b/src/ex_cmds.c
@@ -644,6 +644,237 @@
}
/*
+ * ":uniq".
+ */
+ void
+ex_uniq(exarg_T *eap)
+{
+ regmatch_T regmatch;
+ int len;
+ linenr_T lnum;
+ long maxlen = 0;
+ linenr_T count = eap->line2 - eap->line1 + 1;
+ char_u *p;
+ char_u *s;
+ char_u save_c; // temporary character storage
+ int keep_only_unique = FALSE;
+ int keep_only_not_unique = eap->forceit ? TRUE : FALSE;
+ long deleted = 0;
+ colnr_T start_col;
+ colnr_T end_col;
+ int change_occurred = FALSE; // Buffer contents changed.
+
+ // Uniq one line is really quick!
+ if (count <= 1)
+ return;
+
+ if (u_save((linenr_T)(eap->line1 - 1), (linenr_T)(eap->line2 + 1)) == FAIL)
+ return;
+ sortbuf1 = NULL;
+ regmatch.regprog = NULL;
+
+ sort_abort = sort_ic = sort_lc = sort_rx = sort_nr = 0;
+ sort_flt = 0;
+
+ for (p = eap->arg; *p != NUL; ++p)
+ {
+ if (VIM_ISWHITE(*p))
+ ;
+ else if (*p == 'i')
+ sort_ic = TRUE;
+ else if (*p == 'l')
+ sort_lc = TRUE;
+ else if (*p == 'r')
+ sort_rx = TRUE;
+ else if (*p == 'u')
+ {
+ // 'u' is only valid when '!' is not given.
+ if (!keep_only_not_unique)
+ keep_only_unique = TRUE;
+ }
+ else if (*p == '"') // comment start
+ break;
+ else if (eap->nextcmd == NULL && check_nextcmd(p) != NULL)
+ {
+ eap->nextcmd = check_nextcmd(p);
+ break;
+ }
+ else if (!ASCII_ISALPHA(*p) && regmatch.regprog == NULL)
+ {
+ s = skip_regexp_err(p + 1, *p, TRUE);
+ if (s == NULL)
+ goto uniqend;
+ *s = NUL;
+ // Use last search pattern if uniq pattern is empty.
+ if (s == p + 1)
+ {
+ if (last_search_pat() == NULL)
+ {
+ emsg(_(e_no_previous_regular_expression));
+ goto uniqend;
+ }
+ regmatch.regprog = vim_regcomp(last_search_pat(), RE_MAGIC);
+ }
+ else
+ regmatch.regprog = vim_regcomp(p + 1, RE_MAGIC);
+ if (regmatch.regprog == NULL)
+ goto uniqend;
+ p = s; // continue after the regexp
+ regmatch.rm_ic = p_ic;
+ }
+ else
+ {
+ semsg(_(e_invalid_argument_str), p);
+ goto uniqend;
+ }
+ }
+
+ // Make an array with all line numbers. This avoids having to copy all
+ // the lines into allocated memory.
+ // When remove deplicating on strings "start_col_nr" is the offset in the
+ // line, for numbers remove deplicating it's the number to uniq on. This
+ // means the pattern matching only has to be done once per line.
+ // Also get the longest line length for allocating "sortbuf".
+ for (lnum = eap->line1; lnum <= eap->line2; ++lnum)
+ {
+ s = ml_get(lnum);
+ len = ml_get_len(lnum);
+ if (maxlen < len)
+ maxlen = len;
+
+ if (got_int)
+ goto uniqend;
+ }
+
+ // Allocate a buffer that can hold the longest line.
+ sortbuf1 = alloc(maxlen + 1);
+ if (sortbuf1 == NULL)
+ goto uniqend;
+
+ // Delete lines according to options.
+ int match_continue = FALSE;
+ int next_is_unmatch = FALSE;
+ int is_match;
+ linenr_T done_lnum = eap->line1 - 1;
+ linenr_T delete_lnum = 0;
+ for (linenr_T i = 0; i < count; ++i)
+ {
+ linenr_T get_lnum = eap->line1 + i;
+
+ s = ml_get(get_lnum);
+ len = ml_get_len(get_lnum);
+
+ start_col = 0;
+ end_col = len;
+ if (regmatch.regprog != NULL && vim_regexec(®match, s, 0))
+ {
+ if (sort_rx)
+ {
+ start_col = (colnr_T)(regmatch.startp[0] - s);
+ end_col = (colnr_T)(regmatch.endp[0] - s);
+ }
+ else
+ start_col = (colnr_T)(regmatch.endp[0] - s);
+ }
+ else
+ if (regmatch.regprog != NULL)
+ end_col = 0;
+ if (end_col > 0)
+ {
+ save_c = s[end_col];
+ s[end_col] = NUL;
+ }
+
+ is_match = i > 0 ? !string_compare(&s[start_col], sortbuf1) : FALSE;
+ delete_lnum = 0;
+ if (next_is_unmatch)
+ {
+ is_match = FALSE;
+ next_is_unmatch = FALSE;
+ }
+
+ if (!keep_only_unique && !keep_only_not_unique)
+ {
+ if (is_match)
+ delete_lnum = get_lnum;
+ else
+ STRCPY(sortbuf1, &s[start_col]);
+ }
+ else if (keep_only_not_unique)
+ {
+ if (is_match)
+ {
+ done_lnum = get_lnum - 1;
+ delete_lnum = get_lnum;
+ match_continue = TRUE;
+ }
+ else
+ {
+ if (i > 0 && !match_continue && get_lnum - 1 > done_lnum)
+ {
+ delete_lnum = get_lnum - 1;
+ next_is_unmatch = TRUE;
+ }
+ else if (i >= count - 1)
+ delete_lnum = get_lnum;
+ match_continue = FALSE;
+ STRCPY(sortbuf1, &s[start_col]);
+ }
+ }
+ else // keep_only_unique
+ {
+ if (is_match)
+ {
+ if (!match_continue)
+ delete_lnum = get_lnum - 1;
+ else
+ delete_lnum = get_lnum;
+ match_continue = TRUE;
+ }
+ else
+ {
+ if (i == 0 && match_continue)
+ delete_lnum = get_lnum;
+ match_continue = FALSE;
+ STRCPY(sortbuf1, &s[start_col]);
+ }
+ }
+
+ if (end_col > 0)
+ s[end_col] = save_c;
+
+ if (delete_lnum > 0)
+ {
+ ml_delete(delete_lnum);
+ i -= get_lnum - delete_lnum + 1;
+ count--;
+ deleted++;
+ change_occurred = TRUE;
+ }
+
+ fast_breakcheck();
+ if (got_int)
+ goto uniqend;
+ }
+
+ // Adjust marks for deleted lines and prepare for displaying.
+ mark_adjust(eap->line2 - deleted, eap->line2, (long)MAXLNUM, -deleted);
+ msgmore(-deleted);
+
+ if (change_occurred)
+ changed_lines(eap->line1, 0, eap->line2 + 1, -deleted);
+
+ curwin->w_cursor.lnum = eap->line1;
+ beginline(BL_WHITE | BL_FIX);
+
+uniqend:
+ vim_free(sortbuf1);
+ vim_regfree(regmatch.regprog);
+ if (got_int)
+ emsg(_(e_interrupted));
+}
+
+/*
* :move command - move lines line1-line2 to line dest
*
* return FAIL for failure, OK otherwise