patch 9.1.1476: no easy way to deduplicate text
Problem: no easy way to deduplicate text
Solution: add the :uniq ex command
(Hirohito Higashi)
closes: #17538
Signed-off-by: Hirohito Higashi <h.east.727@gmail.com>
Signed-off-by: Christian Brabandt <cb@256bit.org>
diff --git a/runtime/doc/builtin.txt b/runtime/doc/builtin.txt
index 8c79f56..8bd914b 100644
--- a/runtime/doc/builtin.txt
+++ b/runtime/doc/builtin.txt
@@ -1,4 +1,4 @@
-*builtin.txt* For Vim version 9.1. Last change: 2025 Jun 07
+*builtin.txt* For Vim version 9.1. Last change: 2025 Jun 23
VIM REFERENCE MANUAL by Bram Moolenaar
@@ -12143,6 +12143,7 @@
:let newlist = uniq(copy(mylist))
< The default compare function uses the string representation of
each item. For the use of {func} and {dict} see |sort()|.
+ For deduplicating text in the current buffer see |:uniq|.
Returns zero if {list} is not a |List|.
diff --git a/runtime/doc/change.txt b/runtime/doc/change.txt
index 6bf9e2c..65e4e72 100644
--- a/runtime/doc/change.txt
+++ b/runtime/doc/change.txt
@@ -1,4 +1,4 @@
-*change.txt* For Vim version 9.1. Last change: 2025 May 28
+*change.txt* For Vim version 9.1. Last change: 2025 Jun 23
VIM REFERENCE MANUAL by Bram Moolenaar
@@ -20,6 +20,7 @@
5. Copying and moving text |copy-move|
6. Formatting text |formatting|
7. Sorting text |sorting|
+8. Deduplicating text |deduplicating|
For inserting text see |insert.txt|.
@@ -1895,6 +1896,7 @@
Vim has a sorting function and a sorting command. The sorting function can be
found here: |sort()|, |uniq()|.
+Also see |:uniq|.
*:sor* *:sort*
:[range]sor[t][!] [b][f][i][l][n][o][r][u][x] [/{pattern}/]
@@ -1904,7 +1906,7 @@
With [!] the order is reversed.
With [i] case is ignored.
-
+ *:sort-l*
With [l] sort uses the current collation locale.
Implementation details: strcoll() is used to compare
strings. See |:language| to check or set the collation
@@ -1937,13 +1939,14 @@
With [b] sorting is done on the first binary number in
the line (after or inside a {pattern} match).
-
+ *:sort-u* *:sort-uniq*
With [u] (u stands for unique) only keep the first of
a sequence of identical lines (ignoring case when [i]
is used). Without this flag, a sequence of identical
lines will be kept in their original order.
Note that leading and trailing white space may cause
lines to be different.
+ When you just want to make things unique, use |:uniq|.
When /{pattern}/ is specified and there is no [r] flag
the text matched with {pattern} is skipped, so that
@@ -1990,4 +1993,56 @@
process you may end up with duplicated lines. This also depends on the system
library function used.
+==============================================================================
+8. Deduplicating text *deduplicating* *unique*
+
+Vim has a deduplicating function and a deduplicating command. The
+deduplicating function can be found here: |uniq()|.
+Also see |:sort-uniq|.
+
+ *:uni* *:uniq*
+:[range]uni[q][!] [i][l][r][u] [/{pattern}/]
+ Remove duplicate lines that are adjacent to each other
+ in [range]. When no range is given, all lines are
+ processed.
+
+ With [i] case is ignored when comparing lines.
+
+ With [l] comparison uses the current collation locale.
+ See |:sort-l| for more details.
+
+ With [r] comparison is done on the text that matches
+ /{pattern}/ instead of the full line.
+
+ When /{pattern}/ is specified and [r] is not used, the
+ text matched with {pattern} is skipped and comparison
+ is done on what comes after the match.
+ 'ignorecase' applies to the pattern, but 'smartcase'
+ is not used.
+
+ Instead of the slash any non-letter can be used.
+
+ For example, to remove adjacent duplicate lines based
+ on the second comma-separated field: >
+ :uniq r /[^,]*,/
+< Or to keep only unique lines ignoring the first 5
+ characters: >
+ :uniq u /.\{5}/
+< If {pattern} is empty (e.g. // is used), the last
+ search pattern is used.
+
+ With [u] only keep lines that do not repeat (i.e., are
+ not immediately followed by the same line).
+
+ With [!] only keep lines that are immediately followed
+ by a duplicate.
+
+ If both [!] and [u] are given, [u] is ignored and [!]
+ takes effect.
+
+ Note that leading and trailing white space, and lines
+ that are not adjacent, are not considered duplicates.
+ To remove all duplicates regardless of position, use
+ |:sort-u| or external tools.
+
vim:tw=78:ts=8:noet:ft=help:norl:
diff --git a/runtime/doc/index.txt b/runtime/doc/index.txt
index ff27994..11c0efd 100644
--- a/runtime/doc/index.txt
+++ b/runtime/doc/index.txt
@@ -1,4 +1,4 @@
-*index.txt* For Vim version 9.1. Last change: 2025 Jun 02
+*index.txt* For Vim version 9.1. Last change: 2025 Jun 23
VIM REFERENCE MANUAL by Bram Moolenaar
@@ -1740,6 +1740,7 @@
|:unabbreviate| :una[bbreviate] remove abbreviation
|:unhide| :unh[ide] open a window for each loaded file in the
buffer list
+|:uniq| :uni[q] uniq lines
|:unlet| :unl[et] delete variable
|:unlockvar| :unlo[ckvar] unlock variables
|:unmap| :unm[ap] remove mapping
diff --git a/runtime/doc/tags b/runtime/doc/tags
index afa4a05..a8a0804 100644
--- a/runtime/doc/tags
+++ b/runtime/doc/tags
@@ -3331,6 +3331,9 @@
:so repeat.txt /*:so*
:sor change.txt /*:sor*
:sort change.txt /*:sort*
+:sort-l change.txt /*:sort-l*
+:sort-u change.txt /*:sort-u*
+:sort-uniq change.txt /*:sort-uniq*
:source repeat.txt /*:source*
:source! repeat.txt /*:source!*
:source-range repeat.txt /*:source-range*
@@ -3565,6 +3568,8 @@
:undolist undo.txt /*:undolist*
:unh windows.txt /*:unh*
:unhide windows.txt /*:unhide*
+:uni change.txt /*:uni*
+:uniq change.txt /*:uniq*
:unl eval.txt /*:unl*
:unlet eval.txt /*:unlet*
:unlet-$ eval.txt /*:unlet-$*
@@ -6864,6 +6869,7 @@
debugger.txt debugger.txt /*debugger.txt*
dec-mouse options.txt /*dec-mouse*
decada_members ft_ada.txt /*decada_members*
+deduplicating change.txt /*deduplicating*
deepcopy() builtin.txt /*deepcopy()*
default-constructor vim9class.txt /*default-constructor*
defaults.vim starting.txt /*defaults.vim*
@@ -11015,6 +11021,7 @@
undotree() builtin.txt /*undotree()*
unicode mbyte.txt /*unicode*
uniq() builtin.txt /*uniq()*
+unique change.txt /*unique*
unix os_unix.txt /*unix*
unlisted-buffer windows.txt /*unlisted-buffer*
up-down-motions motion.txt /*up-down-motions*
diff --git a/runtime/doc/version9.txt b/runtime/doc/version9.txt
index cd72d73..606ec79 100644
--- a/runtime/doc/version9.txt
+++ b/runtime/doc/version9.txt
@@ -1,4 +1,4 @@
-*version9.txt* For Vim version 9.1. Last change: 2025 Jun 16
+*version9.txt* For Vim version 9.1. Last change: 2025 Jun 23
VIM REFERENCE MANUAL by Bram Moolenaar
@@ -41756,6 +41756,7 @@
|:pbuffer| Edit buffer [N] from the buffer list in the preview
window
|:redrawtabpanel| Force updating the 'tabpanel'.
+|:uniq| Deduplicate text in the current buffer.
Options: ~
diff --git a/src/ex_cmdidxs.h b/src/ex_cmdidxs.h
index 1f01c2f..92176ce 100644
--- a/src/ex_cmdidxs.h
+++ b/src/ex_cmdidxs.h
@@ -26,11 +26,11 @@
/* s */ 406,
/* t */ 476,
/* u */ 523,
- /* v */ 534,
- /* w */ 555,
- /* x */ 569,
- /* y */ 579,
- /* z */ 580
+ /* v */ 535,
+ /* w */ 556,
+ /* x */ 570,
+ /* y */ 580,
+ /* z */ 581
};
/*
@@ -61,7 +61,7 @@
/* r */ { 0, 0, 0, 0, 0, 0, 0, 0, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, 20, 0, 0, 0, 0 },
/* s */ { 2, 6, 15, 0, 19, 23, 0, 25, 26, 0, 0, 29, 31, 35, 39, 41, 0, 50, 0, 51, 0, 64, 65, 0, 66, 0 },
/* t */ { 2, 0, 19, 0, 24, 26, 0, 27, 0, 29, 0, 30, 34, 37, 39, 40, 0, 41, 43, 0, 44, 0, 0, 0, 46, 0 },
- /* u */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+ /* u */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
/* v */ { 1, 0, 0, 0, 2, 0, 0, 0, 5, 0, 0, 0, 12, 15, 0, 0, 0, 0, 18, 0, 19, 0, 0, 0, 0, 0 },
/* w */ { 2, 0, 0, 0, 0, 0, 0, 3, 4, 0, 0, 0, 0, 8, 0, 9, 10, 0, 0, 0, 12, 13, 0, 0, 0, 0 },
/* x */ { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 5, 0, 0, 0, 7, 0, 0, 8, 0, 0, 0, 0, 0 },
@@ -69,4 +69,4 @@
/* z */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
};
-static const int command_count = 597;
+static const int command_count = 598;
diff --git a/src/ex_cmds.c b/src/ex_cmds.c
index d3d757b..cd2c304 100644
--- a/src/ex_cmds.c
+++ b/src/ex_cmds.c
@@ -644,6 +644,237 @@
}
/*
+ * ":uniq".
+ */
+ void
+ex_uniq(exarg_T *eap)
+{
+ regmatch_T regmatch;
+ int len;
+ linenr_T lnum;
+ long maxlen = 0;
+ linenr_T count = eap->line2 - eap->line1 + 1;
+ char_u *p;
+ char_u *s;
+ char_u save_c; // temporary character storage
+ int keep_only_unique = FALSE;
+ int keep_only_not_unique = eap->forceit ? TRUE : FALSE;
+ long deleted = 0;
+ colnr_T start_col;
+ colnr_T end_col;
+ int change_occurred = FALSE; // Buffer contents changed.
+
+ // Uniq one line is really quick!
+ if (count <= 1)
+ return;
+
+ if (u_save((linenr_T)(eap->line1 - 1), (linenr_T)(eap->line2 + 1)) == FAIL)
+ return;
+ sortbuf1 = NULL;
+ regmatch.regprog = NULL;
+
+ sort_abort = sort_ic = sort_lc = sort_rx = sort_nr = 0;
+ sort_flt = 0;
+
+ for (p = eap->arg; *p != NUL; ++p)
+ {
+ if (VIM_ISWHITE(*p))
+ ;
+ else if (*p == 'i')
+ sort_ic = TRUE;
+ else if (*p == 'l')
+ sort_lc = TRUE;
+ else if (*p == 'r')
+ sort_rx = TRUE;
+ else if (*p == 'u')
+ {
+ // 'u' is only valid when '!' is not given.
+ if (!keep_only_not_unique)
+ keep_only_unique = TRUE;
+ }
+ else if (*p == '"') // comment start
+ break;
+ else if (eap->nextcmd == NULL && check_nextcmd(p) != NULL)
+ {
+ eap->nextcmd = check_nextcmd(p);
+ break;
+ }
+ else if (!ASCII_ISALPHA(*p) && regmatch.regprog == NULL)
+ {
+ s = skip_regexp_err(p + 1, *p, TRUE);
+ if (s == NULL)
+ goto uniqend;
+ *s = NUL;
+ // Use last search pattern if uniq pattern is empty.
+ if (s == p + 1)
+ {
+ if (last_search_pat() == NULL)
+ {
+ emsg(_(e_no_previous_regular_expression));
+ goto uniqend;
+ }
+ regmatch.regprog = vim_regcomp(last_search_pat(), RE_MAGIC);
+ }
+ else
+ regmatch.regprog = vim_regcomp(p + 1, RE_MAGIC);
+ if (regmatch.regprog == NULL)
+ goto uniqend;
+ p = s; // continue after the regexp
+ regmatch.rm_ic = p_ic;
+ }
+ else
+ {
+ semsg(_(e_invalid_argument_str), p);
+ goto uniqend;
+ }
+ }
+
+ // Make an array with all line numbers. This avoids having to copy all
+ // the lines into allocated memory.
+ // When remove deplicating on strings "start_col_nr" is the offset in the
+ // line, for numbers remove deplicating it's the number to uniq on. This
+ // means the pattern matching only has to be done once per line.
+ // Also get the longest line length for allocating "sortbuf".
+ for (lnum = eap->line1; lnum <= eap->line2; ++lnum)
+ {
+ s = ml_get(lnum);
+ len = ml_get_len(lnum);
+ if (maxlen < len)
+ maxlen = len;
+
+ if (got_int)
+ goto uniqend;
+ }
+
+ // Allocate a buffer that can hold the longest line.
+ sortbuf1 = alloc(maxlen + 1);
+ if (sortbuf1 == NULL)
+ goto uniqend;
+
+ // Delete lines according to options.
+ int match_continue = FALSE;
+ int next_is_unmatch = FALSE;
+ int is_match;
+ linenr_T done_lnum = eap->line1 - 1;
+ linenr_T delete_lnum = 0;
+ for (linenr_T i = 0; i < count; ++i)
+ {
+ linenr_T get_lnum = eap->line1 + i;
+
+ s = ml_get(get_lnum);
+ len = ml_get_len(get_lnum);
+
+ start_col = 0;
+ end_col = len;
+ if (regmatch.regprog != NULL && vim_regexec(®match, s, 0))
+ {
+ if (sort_rx)
+ {
+ start_col = (colnr_T)(regmatch.startp[0] - s);
+ end_col = (colnr_T)(regmatch.endp[0] - s);
+ }
+ else
+ start_col = (colnr_T)(regmatch.endp[0] - s);
+ }
+ else
+ if (regmatch.regprog != NULL)
+ end_col = 0;
+ if (end_col > 0)
+ {
+ save_c = s[end_col];
+ s[end_col] = NUL;
+ }
+
+ is_match = i > 0 ? !string_compare(&s[start_col], sortbuf1) : FALSE;
+ delete_lnum = 0;
+ if (next_is_unmatch)
+ {
+ is_match = FALSE;
+ next_is_unmatch = FALSE;
+ }
+
+ if (!keep_only_unique && !keep_only_not_unique)
+ {
+ if (is_match)
+ delete_lnum = get_lnum;
+ else
+ STRCPY(sortbuf1, &s[start_col]);
+ }
+ else if (keep_only_not_unique)
+ {
+ if (is_match)
+ {
+ done_lnum = get_lnum - 1;
+ delete_lnum = get_lnum;
+ match_continue = TRUE;
+ }
+ else
+ {
+ if (i > 0 && !match_continue && get_lnum - 1 > done_lnum)
+ {
+ delete_lnum = get_lnum - 1;
+ next_is_unmatch = TRUE;
+ }
+ else if (i >= count - 1)
+ delete_lnum = get_lnum;
+ match_continue = FALSE;
+ STRCPY(sortbuf1, &s[start_col]);
+ }
+ }
+ else // keep_only_unique
+ {
+ if (is_match)
+ {
+ if (!match_continue)
+ delete_lnum = get_lnum - 1;
+ else
+ delete_lnum = get_lnum;
+ match_continue = TRUE;
+ }
+ else
+ {
+ if (i == 0 && match_continue)
+ delete_lnum = get_lnum;
+ match_continue = FALSE;
+ STRCPY(sortbuf1, &s[start_col]);
+ }
+ }
+
+ if (end_col > 0)
+ s[end_col] = save_c;
+
+ if (delete_lnum > 0)
+ {
+ ml_delete(delete_lnum);
+ i -= get_lnum - delete_lnum + 1;
+ count--;
+ deleted++;
+ change_occurred = TRUE;
+ }
+
+ fast_breakcheck();
+ if (got_int)
+ goto uniqend;
+ }
+
+ // Adjust marks for deleted lines and prepare for displaying.
+ mark_adjust(eap->line2 - deleted, eap->line2, (long)MAXLNUM, -deleted);
+ msgmore(-deleted);
+
+ if (change_occurred)
+ changed_lines(eap->line1, 0, eap->line2 + 1, -deleted);
+
+ curwin->w_cursor.lnum = eap->line1;
+ beginline(BL_WHITE | BL_FIX);
+
+uniqend:
+ vim_free(sortbuf1);
+ vim_regfree(regmatch.regprog);
+ if (got_int)
+ emsg(_(e_interrupted));
+}
+
+/*
* :move command - move lines line1-line2 to line dest
*
* return FAIL for failure, OK otherwise
diff --git a/src/ex_cmds.h b/src/ex_cmds.h
index 2bbf5ef..3cb8a18 100644
--- a/src/ex_cmds.h
+++ b/src/ex_cmds.h
@@ -1700,6 +1700,9 @@
EXCMD(CMD_unhide, "unhide", ex_buffer_all,
EX_RANGE|EX_COUNT|EX_TRLBAR,
ADDR_OTHER),
+EXCMD(CMD_uniq, "uniq", ex_uniq,
+ EX_RANGE|EX_DFLALL|EX_WHOLEFOLD|EX_BANG|EX_EXTRA|EX_NOTRLCOM|EX_MODIFY,
+ ADDR_LINES),
EXCMD(CMD_unlet, "unlet", ex_unlet,
EX_BANG|EX_EXTRA|EX_NEEDARG|EX_SBOXOK|EX_CMDWIN|EX_LOCK_OK,
ADDR_NONE),
diff --git a/src/ex_getln.c b/src/ex_getln.c
index a956023..36775ba 100644
--- a/src/ex_getln.c
+++ b/src/ex_getln.c
@@ -276,7 +276,8 @@
else if (*cmd == 's' && cmd[1] == 'n')
magic_overruled = OPTION_MAGIC_OFF;
}
- else if (STRNCMP(cmd, "sort", MAX(p - cmd, 3)) == 0)
+ else if (STRNCMP(cmd, "sort", MAX(p - cmd, 3)) == 0
+ || STRNCMP(cmd, "uniq", MAX(p - cmd, 3)) == 0)
{
// skip over ! and flags
if (*p == '!')
diff --git a/src/proto/ex_cmds.pro b/src/proto/ex_cmds.pro
index c083cec..36cb6bd 100644
--- a/src/proto/ex_cmds.pro
+++ b/src/proto/ex_cmds.pro
@@ -2,6 +2,7 @@
void do_ascii(exarg_T *eap);
void ex_align(exarg_T *eap);
void ex_sort(exarg_T *eap);
+void ex_uniq(exarg_T *eap);
int do_move(linenr_T line1, linenr_T line2, linenr_T dest);
void ex_copy(linenr_T line1, linenr_T line2, linenr_T n);
void free_prev_shellcmd(void);
diff --git a/src/testdir/Make_all.mak b/src/testdir/Make_all.mak
index ef86a7d..1cbede6 100644
--- a/src/testdir/Make_all.mak
+++ b/src/testdir/Make_all.mak
@@ -331,6 +331,7 @@
test_trycatch \
test_tuple \
test_undo \
+ test_uniq \
test_unlet \
test_user_func \
test_usercommands \
@@ -586,6 +587,7 @@
test_trycatch.res \
test_tuple.res \
test_undo.res \
+ test_uniq.res \
test_user_func.res \
test_usercommands.res \
test_vartabs.res \
diff --git a/src/testdir/test_uniq.vim b/src/testdir/test_uniq.vim
new file mode 100644
index 0000000..a60fd49
--- /dev/null
+++ b/src/testdir/test_uniq.vim
@@ -0,0 +1,612 @@
+" Tests for the ":uniq" command.
+
+source check.vim
+
+" Tests for the ":uniq" command.
+func Test_uniq_cmd()
+ let tests = [
+ \ {
+ \ 'name' : 'Alphabetical uniq #1',
+ \ 'cmd' : '%uniq',
+ \ 'input' : [
+ \ 'abc',
+ \ 'ab',
+ \ 'a',
+ \ 'a321',
+ \ 'a123',
+ \ 'a123',
+ \ 'a123',
+ \ 'a123',
+ \ 'a122',
+ \ 'a123',
+ \ 'b321',
+ \ 'c123d',
+ \ ' 123b',
+ \ 'c321d',
+ \ 'b322b',
+ \ 'b321',
+ \ 'b321b'
+ \ ],
+ \ 'expected' : [
+ \ 'abc',
+ \ 'ab',
+ \ 'a',
+ \ 'a321',
+ \ 'a123',
+ \ 'a122',
+ \ 'a123',
+ \ 'b321',
+ \ 'c123d',
+ \ ' 123b',
+ \ 'c321d',
+ \ 'b322b',
+ \ 'b321',
+ \ 'b321b'
+ \ ]
+ \ },
+ \ {
+ \ 'name' : 'Alphabetical uniq #2',
+ \ 'cmd' : '%uniq',
+ \ 'input' : [
+ \ 'abc',
+ \ 'abc',
+ \ 'abc',
+ \ 'ab',
+ \ 'a',
+ \ 'a321',
+ \ 'a122',
+ \ 'b321',
+ \ 'a123',
+ \ 'a123',
+ \ 'c123d',
+ \ ' 123b',
+ \ 'c321d',
+ \ 'b322b',
+ \ 'b321',
+ \ 'b321b'
+ \ ],
+ \ 'expected' : [
+ \ 'abc',
+ \ 'ab',
+ \ 'a',
+ \ 'a321',
+ \ 'a122',
+ \ 'b321',
+ \ 'a123',
+ \ 'c123d',
+ \ ' 123b',
+ \ 'c321d',
+ \ 'b322b',
+ \ 'b321',
+ \ 'b321b'
+ \ ]
+ \ },
+ \ {
+ \ 'name' : 'alphabetical, uniqed input',
+ \ 'cmd' : 'uniq',
+ \ 'input' : [
+ \ 'a',
+ \ 'b',
+ \ 'c',
+ \ ],
+ \ 'expected' : [
+ \ 'a',
+ \ 'b',
+ \ 'c',
+ \ ]
+ \ },
+ \ {
+ \ 'name' : 'alphabetical, uniqed input, unique at end',
+ \ 'cmd' : 'uniq',
+ \ 'input' : [
+ \ 'aa',
+ \ 'bb',
+ \ 'cc',
+ \ 'cc',
+ \ ],
+ \ 'expected' : [
+ \ 'aa',
+ \ 'bb',
+ \ 'cc',
+ \ ]
+ \ },
+ \ {
+ \ 'name' : 'uniq one line buffer',
+ \ 'cmd' : 'uniq',
+ \ 'input' : [
+ \ 'single line'
+ \ ],
+ \ 'expected' : [
+ \ 'single line'
+ \ ]
+ \ },
+ \ {
+ \ 'name' : 'uniq ignoring case',
+ \ 'cmd' : '%uniq i',
+ \ 'input' : [
+ \ 'BB',
+ \ 'Cc',
+ \ 'cc',
+ \ 'Cc',
+ \ 'aa'
+ \ ],
+ \ 'expected' : [
+ \ 'BB',
+ \ 'Cc',
+ \ 'aa'
+ \ ]
+ \ },
+ \ {
+ \ 'name' : 'uniq not uniqued #1',
+ \ 'cmd' : '%uniq!',
+ \ 'input' : [
+ \ 'aa',
+ \ 'cc',
+ \ 'cc',
+ \ 'cc',
+ \ 'bb',
+ \ 'aa',
+ \ 'yyy',
+ \ 'yyy',
+ \ 'zz'
+ \ ],
+ \ 'expected' : [
+ \ 'cc',
+ \ 'yyy',
+ \ ]
+ \ },
+ \ {
+ \ 'name' : 'uniq not uniqued #2',
+ \ 'cmd' : '%uniq!',
+ \ 'input' : [
+ \ 'aa',
+ \ 'aa',
+ \ 'bb',
+ \ 'cc',
+ \ 'cc',
+ \ 'cc',
+ \ 'yyy',
+ \ 'yyy',
+ \ 'zz'
+ \ ],
+ \ 'expected' : [
+ \ 'aa',
+ \ 'cc',
+ \ 'yyy',
+ \ ]
+ \ },
+ \ {
+ \ 'name' : 'uniq not uniqued ("u" is ignored)',
+ \ 'cmd' : '%uniq! u',
+ \ 'input' : [
+ \ 'aa',
+ \ 'cc',
+ \ 'cc',
+ \ 'cc',
+ \ 'bb',
+ \ 'aa',
+ \ 'yyy',
+ \ 'yyy',
+ \ 'zz'
+ \ ],
+ \ 'expected' : [
+ \ 'cc',
+ \ 'yyy',
+ \ ]
+ \ },
+ \ {
+ \ 'name' : 'uniq not uniqued, ignoring case',
+ \ 'cmd' : '%uniq! i',
+ \ 'input' : [
+ \ 'aa',
+ \ 'cc',
+ \ 'cc',
+ \ 'Cc',
+ \ 'bb',
+ \ 'aa',
+ \ 'yyy',
+ \ 'yyy',
+ \ 'zz'
+ \ ],
+ \ 'expected' : [
+ \ 'cc',
+ \ 'yyy',
+ \ ]
+ \ },
+ \ {
+ \ 'name' : 'uniq only unique #1',
+ \ 'cmd' : '%uniq u',
+ \ 'input' : [
+ \ 'aa',
+ \ 'cc',
+ \ 'cc',
+ \ 'cc',
+ \ 'bb',
+ \ 'aa',
+ \ 'yyy',
+ \ 'yyy',
+ \ 'zz'
+ \ ],
+ \ 'expected' : [
+ \ 'aa',
+ \ 'bb',
+ \ 'aa',
+ \ 'zz'
+ \ ]
+ \ },
+ \ {
+ \ 'name' : 'uniq only unique #2',
+ \ 'cmd' : '%uniq u',
+ \ 'input' : [
+ \ 'aa',
+ \ 'aa',
+ \ 'bb',
+ \ 'cc',
+ \ 'cc',
+ \ 'cc',
+ \ 'yyy',
+ \ 'yyy',
+ \ 'zz'
+ \ ],
+ \ 'expected' : [
+ \ 'bb',
+ \ 'zz'
+ \ ]
+ \ },
+ \ {
+ \ 'name' : 'uniq only unique, ignoring case',
+ \ 'cmd' : '%uniq ui',
+ \ 'input' : [
+ \ 'aa',
+ \ 'cc',
+ \ 'Cc',
+ \ 'cc',
+ \ 'bb',
+ \ 'aa',
+ \ 'yyy',
+ \ 'yyy',
+ \ 'zz'
+ \ ],
+ \ 'expected' : [
+ \ 'aa',
+ \ 'bb',
+ \ 'aa',
+ \ 'zz'
+ \ ]
+ \ },
+ \ {
+ \ 'name' : 'uniq on first 2 charscters',
+ \ 'cmd' : '%uniq r /^../',
+ \ 'input' : [
+ \ 'aa',
+ \ 'cc',
+ \ 'cc1',
+ \ 'cc2',
+ \ 'bb',
+ \ 'aa',
+ \ 'yyy',
+ \ 'yyy2',
+ \ 'zz'
+ \ ],
+ \ 'expected' : [
+ \ 'aa',
+ \ 'cc',
+ \ 'bb',
+ \ 'aa',
+ \ 'yyy',
+ \ 'zz'
+ \ ]
+ \ },
+ \ {
+ \ 'name' : 'uniq on after 2 charscters',
+ \ 'cmd' : '%uniq /^../',
+ \ 'input' : [
+ \ '11aa',
+ \ '11cc',
+ \ '13cc',
+ \ '13cc',
+ \ '13bb',
+ \ '13aa',
+ \ '12yyy',
+ \ '11yyy',
+ \ '11zz'
+ \ ],
+ \ 'expected' : [
+ \ '11aa',
+ \ '11cc',
+ \ '13bb',
+ \ '13aa',
+ \ '12yyy',
+ \ '11zz'
+ \ ]
+ \ },
+ \ {
+ \ 'name' : 'uniq on first 2 charscters, not uniqued',
+ \ 'cmd' : '%uniq! r /^../',
+ \ 'input' : [
+ \ 'aa',
+ \ 'cc',
+ \ 'cc1',
+ \ 'cc2',
+ \ 'bb',
+ \ 'aa',
+ \ 'yyy',
+ \ 'yyy2',
+ \ 'zz'
+ \ ],
+ \ 'expected' : [
+ \ 'cc',
+ \ 'yyy'
+ \ ]
+ \ },
+ \ {
+ \ 'name' : 'uniq on after 2 charscters, not uniqued',
+ \ 'cmd' : '%uniq! /^../',
+ \ 'input' : [
+ \ '11aa',
+ \ '11cc',
+ \ '13cc',
+ \ '13cc',
+ \ '13bb',
+ \ '13aa',
+ \ '12yyy',
+ \ '11yyy',
+ \ '11zz'
+ \ ],
+ \ 'expected' : [
+ \ '11cc',
+ \ '12yyy'
+ \ ]
+ \ },
+ \ {
+ \ 'name' : 'uniq on first 2 charscters, only unique',
+ \ 'cmd' : '%uniq ru /^../',
+ \ 'input' : [
+ \ 'aa',
+ \ 'cc',
+ \ 'cc1',
+ \ 'cc2',
+ \ 'bb',
+ \ 'aa',
+ \ 'yyy',
+ \ 'yyy2',
+ \ 'zz'
+ \ ],
+ \ 'expected' : [
+ \ 'aa',
+ \ 'bb',
+ \ 'aa',
+ \ 'zz'
+ \ ]
+ \ },
+ \ {
+ \ 'name' : 'uniq on after 2 charscters, only unique',
+ \ 'cmd' : '%uniq u /^../',
+ \ 'input' : [
+ \ '11aa',
+ \ '11cc',
+ \ '13cc',
+ \ '13cc',
+ \ '13bb',
+ \ '13aa',
+ \ '12yyy',
+ \ '11yyy',
+ \ '11zz'
+ \ ],
+ \ 'expected' : [
+ \ '11aa',
+ \ '13bb',
+ \ '13aa',
+ \ '11zz'
+ \ ]
+ \ }
+ \ ]
+
+ " This does not appear to work correctly on Mac.
+ if !has('mac')
+ if v:collate =~? '^\(en\|fr\)_ca.utf-\?8$'
+ " en_CA.utf-8 uniqs capitals before lower case
+ " 'Œ' is omitted because it can uniq before or after 'œ'
+ let tests += [
+ \ {
+ \ 'name' : 'uniq with locale ' .. v:collate,
+ \ 'cmd' : '%uniq l',
+ \ 'input' : [
+ \ 'A',
+ \ 'a',
+ \ 'À',
+ \ 'à',
+ \ 'E',
+ \ 'e',
+ \ 'É',
+ \ 'é',
+ \ 'È',
+ \ 'è',
+ \ 'O',
+ \ 'o',
+ \ 'Ô',
+ \ 'ô',
+ \ 'œ',
+ \ 'Z',
+ \ 'z'
+ \ ],
+ \ 'expected' : [
+ \ 'A',
+ \ 'a',
+ \ 'À',
+ \ 'à',
+ \ 'E',
+ \ 'e',
+ \ 'É',
+ \ 'é',
+ \ 'È',
+ \ 'è',
+ \ 'O',
+ \ 'o',
+ \ 'Ô',
+ \ 'ô',
+ \ 'œ',
+ \ 'Z',
+ \ 'z'
+ \ ]
+ \ },
+ \ ]
+ elseif v:collate =~? '^\(en\|es\|de\|fr\|it\|nl\).*\.utf-\?8$'
+ " With these locales, the accentuated letters are ordered
+ " similarly to the non-accentuated letters.
+ let tests += [
+ \ {
+ \ 'name' : 'uniq with locale ' .. v:collate,
+ \ 'cmd' : '%uniq li',
+ \ 'input' : [
+ \ 'A',
+ \ 'À',
+ \ 'a',
+ \ 'à',
+ \ 'à',
+ \ 'E',
+ \ 'È',
+ \ 'É',
+ \ 'o',
+ \ 'O',
+ \ 'Ô',
+ \ 'e',
+ \ 'è',
+ \ 'é',
+ \ 'ô',
+ \ 'Œ',
+ \ 'œ',
+ \ 'z',
+ \ 'Z'
+ \ ],
+ \ 'expected' : [
+ \ 'A',
+ \ 'À',
+ \ 'a',
+ \ 'à',
+ \ 'E',
+ \ 'È',
+ \ 'É',
+ \ 'o',
+ \ 'O',
+ \ 'Ô',
+ \ 'e',
+ \ 'è',
+ \ 'é',
+ \ 'ô',
+ \ 'Œ',
+ \ 'œ',
+ \ 'z',
+ \ 'Z'
+ \ ]
+ \ },
+ \ ]
+ endif
+ endif
+
+ for t in tests
+ enew!
+ call append(0, t.input)
+ $delete _
+ setlocal nomodified
+ execute t.cmd
+
+ call assert_equal(t.expected, getline(1, '$'), t.name)
+
+ " Previously, the ":uniq" command would set 'modified' even if the buffer
+ " contents did not change. Here, we check that this problem is fixed.
+ if t.input == t.expected
+ call assert_false(&modified, t.name . ': &mod is not correct')
+ else
+ call assert_true(&modified, t.name . ': &mod is not correct')
+ endif
+ endfor
+
+ " Needs at least two lines for this test
+ call setline(1, ['line1', 'line2'])
+ call assert_fails('uniq no', 'E475:')
+ call assert_fails('uniq c', 'E475:')
+ call assert_fails('uniq #pat%', 'E654:')
+ call assert_fails('uniq /\%(/', 'E53:')
+ call assert_fails('333uniq', 'E16:')
+ call assert_fails('1,999uniq', 'E16:')
+
+ enew!
+endfunc
+
+func Test_uniq_cmd_report()
+ enew!
+ call append(0, repeat([1], 3) + repeat([2], 3) + repeat([3], 3))
+ $delete _
+ setlocal nomodified
+ let res = execute('%uniq')
+
+ call assert_equal([1,2,3], map(getline(1, '$'), 'v:val+0'))
+ call assert_match("6 fewer lines", res)
+ enew!
+ call append(0, repeat([1], 3) + repeat([2], 3) + repeat([3], 3))
+ $delete _
+ setlocal nomodified report=10
+ let res = execute('%uniq')
+
+ call assert_equal([1,2,3], map(getline(1, '$'), 'v:val+0'))
+ call assert_equal("", res)
+ enew!
+ call append(0, repeat([1], 3) + repeat([2], 3) + repeat([3], 3))
+ $delete _
+ setl report&vim
+ setlocal nomodified
+ let res = execute('1g/^/%uniq')
+
+ call assert_equal([1,2,3], map(getline(1, '$'), 'v:val+0'))
+ " the output comes from the :g command, not from the :uniq
+ call assert_match("6 fewer lines", res)
+ enew!
+endfunc
+
+" Test for a :uniq command followed by another command
+func Test_uniq_followed_by_cmd()
+ new
+ let var = ''
+ call setline(1, ['cc', 'aa', 'bb'])
+ %uniq | let var = "uniqcmdtest"
+ call assert_equal(var, "uniqcmdtest")
+ call assert_equal(['cc', 'aa', 'bb'], getline(1, '$'))
+ " Test for :uniq followed by a comment
+ call setline(1, ['3b', '3b', '3b', '1c', '2a'])
+ %uniq " uniq alphabetically
+ call assert_equal(['3b', '1c', '2a'], getline(1, '$'))
+ bw!
+endfunc
+
+" Test for retaining marks across a :uniq
+func Test_uniq_with_marks()
+ new
+ call setline(1, ['cc', 'cc', 'aa', 'bb', 'bb', 'bb', 'bb'])
+ call setpos("'c", [0, 1, 0, 0])
+ call setpos("'a", [0, 4, 0, 0])
+ call setpos("'b", [0, 7, 0, 0])
+ %uniq
+ call assert_equal(['cc', 'aa', 'bb'], getline(1, '$'))
+ call assert_equal(1, line("'c"))
+ call assert_equal(0, line("'a"))
+ call assert_equal(0, line("'b"))
+ bw!
+endfunc
+
+" Test for undo after a :uniq
+func Test_uniq_undo()
+ new
+ let li = ['cc', 'cc', 'aa', 'bb', 'bb', 'bb', 'bb', 'aa']
+ call writefile(li, 'XfileUniq', 'D')
+ edit XfileUniq
+ uniq
+ call assert_equal(['cc', 'aa', 'bb', 'aa'], getline(1, '$'))
+ call assert_true(&modified)
+ undo
+ call assert_equal(li, getline(1, '$'))
+ call assert_false(&modified)
+ bw!
+endfunc
+
+" vim: shiftwidth=2 sts=2 expandtab