patch 9.1.1476: no easy way to deduplicate text

Problem:  no easy way to deduplicate text
Solution: add the :uniq ex command
          (Hirohito Higashi)

closes: #17538

Signed-off-by: Hirohito Higashi <h.east.727@gmail.com>
Signed-off-by: Christian Brabandt <cb@256bit.org>
diff --git a/src/testdir/test_uniq.vim b/src/testdir/test_uniq.vim
new file mode 100644
index 0000000..a60fd49
--- /dev/null
+++ b/src/testdir/test_uniq.vim
@@ -0,0 +1,612 @@
+" Tests for the ":uniq" command.
+
+source check.vim
+
+" Tests for the ":uniq" command.
+func Test_uniq_cmd()
+  let tests = [
+        \ {
+        \    'name' : 'Alphabetical uniq #1',
+        \    'cmd' : '%uniq',
+        \    'input' : [
+        \       'abc',
+        \       'ab',
+        \       'a',
+        \       'a321',
+        \       'a123',
+        \       'a123',
+        \       'a123',
+        \       'a123',
+        \       'a122',
+        \       'a123',
+        \       'b321',
+        \       'c123d',
+        \       ' 123b',
+        \       'c321d',
+        \       'b322b',
+        \       'b321',
+        \       'b321b'
+        \    ],
+        \    'expected' : [
+        \       'abc',
+        \       'ab',
+        \       'a',
+        \       'a321',
+        \       'a123',
+        \       'a122',
+        \       'a123',
+        \       'b321',
+        \       'c123d',
+        \       ' 123b',
+        \       'c321d',
+        \       'b322b',
+        \       'b321',
+        \       'b321b'
+        \    ]
+        \ },
+        \ {
+        \    'name' : 'Alphabetical uniq #2',
+        \    'cmd' : '%uniq',
+        \    'input' : [
+        \       'abc',
+        \       'abc',
+        \       'abc',
+        \       'ab',
+        \       'a',
+        \       'a321',
+        \       'a122',
+        \       'b321',
+        \       'a123',
+        \       'a123',
+        \       'c123d',
+        \       ' 123b',
+        \       'c321d',
+        \       'b322b',
+        \       'b321',
+        \       'b321b'
+        \    ],
+        \    'expected' : [
+        \       'abc',
+        \       'ab',
+        \       'a',
+        \       'a321',
+        \       'a122',
+        \       'b321',
+        \       'a123',
+        \       'c123d',
+        \       ' 123b',
+        \       'c321d',
+        \       'b322b',
+        \       'b321',
+        \       'b321b'
+        \    ]
+        \ },
+        \ {
+        \    'name' : 'alphabetical, uniqed input',
+        \    'cmd' : 'uniq',
+        \    'input' : [
+        \       'a',
+        \       'b',
+        \       'c',
+        \    ],
+        \    'expected' : [
+        \       'a',
+        \       'b',
+        \       'c',
+        \    ]
+        \ },
+        \ {
+        \    'name' : 'alphabetical, uniqed input, unique at end',
+        \    'cmd' : 'uniq',
+        \    'input' : [
+        \       'aa',
+        \       'bb',
+        \       'cc',
+        \       'cc',
+        \    ],
+        \    'expected' : [
+        \       'aa',
+        \       'bb',
+        \       'cc',
+        \    ]
+        \ },
+        \ {
+        \    'name' : 'uniq one line buffer',
+        \    'cmd' : 'uniq',
+        \    'input' : [
+        \       'single line'
+        \    ],
+        \    'expected' : [
+        \       'single line'
+        \    ]
+        \ },
+        \ {
+        \    'name' : 'uniq ignoring case',
+        \    'cmd' : '%uniq i',
+        \    'input' : [
+        \       'BB',
+        \       'Cc',
+        \       'cc',
+        \       'Cc',
+        \       'aa'
+        \    ],
+        \    'expected' : [
+        \       'BB',
+        \       'Cc',
+        \       'aa'
+        \    ]
+        \ },
+        \ {
+        \    'name' : 'uniq not uniqued #1',
+        \    'cmd' : '%uniq!',
+        \    'input' : [
+        \       'aa',
+        \       'cc',
+        \       'cc',
+        \       'cc',
+        \       'bb',
+        \       'aa',
+        \       'yyy',
+        \       'yyy',
+        \       'zz'
+        \    ],
+        \    'expected' : [
+        \       'cc',
+        \       'yyy',
+        \    ]
+        \ },
+        \ {
+        \    'name' : 'uniq not uniqued #2',
+        \    'cmd' : '%uniq!',
+        \    'input' : [
+        \       'aa',
+        \       'aa',
+        \       'bb',
+        \       'cc',
+        \       'cc',
+        \       'cc',
+        \       'yyy',
+        \       'yyy',
+        \       'zz'
+        \    ],
+        \    'expected' : [
+        \       'aa',
+        \       'cc',
+        \       'yyy',
+        \    ]
+        \ },
+        \ {
+        \    'name' : 'uniq not uniqued ("u" is ignored)',
+        \    'cmd' : '%uniq! u',
+        \    'input' : [
+        \       'aa',
+        \       'cc',
+        \       'cc',
+        \       'cc',
+        \       'bb',
+        \       'aa',
+        \       'yyy',
+        \       'yyy',
+        \       'zz'
+        \    ],
+        \    'expected' : [
+        \       'cc',
+        \       'yyy',
+        \    ]
+        \ },
+        \ {
+        \    'name' : 'uniq not uniqued, ignoring case',
+        \    'cmd' : '%uniq! i',
+        \    'input' : [
+        \       'aa',
+        \       'cc',
+        \       'cc',
+        \       'Cc',
+        \       'bb',
+        \       'aa',
+        \       'yyy',
+        \       'yyy',
+        \       'zz'
+        \    ],
+        \    'expected' : [
+        \       'cc',
+        \       'yyy',
+        \    ]
+        \ },
+        \ {
+        \    'name' : 'uniq only unique #1',
+        \    'cmd' : '%uniq u',
+        \    'input' : [
+        \       'aa',
+        \       'cc',
+        \       'cc',
+        \       'cc',
+        \       'bb',
+        \       'aa',
+        \       'yyy',
+        \       'yyy',
+        \       'zz'
+        \    ],
+        \    'expected' : [
+        \       'aa',
+        \       'bb',
+        \       'aa',
+        \       'zz'
+        \    ]
+        \ },
+        \ {
+        \    'name' : 'uniq only unique #2',
+        \    'cmd' : '%uniq u',
+        \    'input' : [
+        \       'aa',
+        \       'aa',
+        \       'bb',
+        \       'cc',
+        \       'cc',
+        \       'cc',
+        \       'yyy',
+        \       'yyy',
+        \       'zz'
+        \    ],
+        \    'expected' : [
+        \       'bb',
+        \       'zz'
+        \    ]
+        \ },
+        \ {
+        \    'name' : 'uniq only unique, ignoring case',
+        \    'cmd' : '%uniq ui',
+        \    'input' : [
+        \       'aa',
+        \       'cc',
+        \       'Cc',
+        \       'cc',
+        \       'bb',
+        \       'aa',
+        \       'yyy',
+        \       'yyy',
+        \       'zz'
+        \    ],
+        \    'expected' : [
+        \       'aa',
+        \       'bb',
+        \       'aa',
+        \       'zz'
+        \    ]
+        \ },
+        \ {
+        \    'name' : 'uniq on first 2 charscters',
+        \    'cmd' : '%uniq r /^../',
+        \    'input' : [
+        \       'aa',
+        \       'cc',
+        \       'cc1',
+        \       'cc2',
+        \       'bb',
+        \       'aa',
+        \       'yyy',
+        \       'yyy2',
+        \       'zz'
+        \    ],
+        \    'expected' : [
+        \       'aa',
+        \       'cc',
+        \       'bb',
+        \       'aa',
+        \       'yyy',
+        \       'zz'
+        \    ]
+        \ },
+        \ {
+        \    'name' : 'uniq on after 2 charscters',
+        \    'cmd' : '%uniq /^../',
+        \    'input' : [
+        \       '11aa',
+        \       '11cc',
+        \       '13cc',
+        \       '13cc',
+        \       '13bb',
+        \       '13aa',
+        \       '12yyy',
+        \       '11yyy',
+        \       '11zz'
+        \    ],
+        \    'expected' : [
+        \       '11aa',
+        \       '11cc',
+        \       '13bb',
+        \       '13aa',
+        \       '12yyy',
+        \       '11zz'
+        \    ]
+        \ },
+        \ {
+        \    'name' : 'uniq on first 2 charscters, not uniqued',
+        \    'cmd' : '%uniq! r /^../',
+        \    'input' : [
+        \       'aa',
+        \       'cc',
+        \       'cc1',
+        \       'cc2',
+        \       'bb',
+        \       'aa',
+        \       'yyy',
+        \       'yyy2',
+        \       'zz'
+        \    ],
+        \    'expected' : [
+        \       'cc',
+        \       'yyy'
+        \    ]
+        \ },
+        \ {
+        \    'name' : 'uniq on after 2 charscters, not uniqued',
+        \    'cmd' : '%uniq! /^../',
+        \    'input' : [
+        \       '11aa',
+        \       '11cc',
+        \       '13cc',
+        \       '13cc',
+        \       '13bb',
+        \       '13aa',
+        \       '12yyy',
+        \       '11yyy',
+        \       '11zz'
+        \    ],
+        \    'expected' : [
+        \       '11cc',
+        \       '12yyy'
+        \    ]
+        \ },
+        \ {
+        \    'name' : 'uniq on first 2 charscters, only unique',
+        \    'cmd' : '%uniq ru /^../',
+        \    'input' : [
+        \       'aa',
+        \       'cc',
+        \       'cc1',
+        \       'cc2',
+        \       'bb',
+        \       'aa',
+        \       'yyy',
+        \       'yyy2',
+        \       'zz'
+        \    ],
+        \    'expected' : [
+        \       'aa',
+        \       'bb',
+        \       'aa',
+        \       'zz'
+        \    ]
+        \ },
+        \ {
+        \    'name' : 'uniq on after 2 charscters, only unique',
+        \    'cmd' : '%uniq u /^../',
+        \    'input' : [
+        \       '11aa',
+        \       '11cc',
+        \       '13cc',
+        \       '13cc',
+        \       '13bb',
+        \       '13aa',
+        \       '12yyy',
+        \       '11yyy',
+        \       '11zz'
+        \    ],
+        \    'expected' : [
+        \       '11aa',
+        \       '13bb',
+        \       '13aa',
+        \       '11zz'
+        \    ]
+        \ }
+        \ ]
+
+    " This does not appear to work correctly on Mac.
+    if !has('mac')
+      if v:collate =~? '^\(en\|fr\)_ca.utf-\?8$'
+        " en_CA.utf-8 uniqs capitals before lower case
+        " 'Œ' is omitted because it can uniq before or after 'œ'
+        let tests += [
+          \ {
+          \    'name' : 'uniq with locale ' .. v:collate,
+          \    'cmd' : '%uniq l',
+          \    'input' : [
+          \     'A',
+          \     'a',
+          \     'À',
+          \     'à',
+          \     'E',
+          \     'e',
+          \     'É',
+          \     'é',
+          \     'È',
+          \     'è',
+          \     'O',
+          \     'o',
+          \     'Ô',
+          \     'ô',
+          \     'œ',
+          \     'Z',
+          \     'z'
+          \    ],
+          \    'expected' : [
+          \     'A',
+          \     'a',
+          \     'À',
+          \     'à',
+          \     'E',
+          \     'e',
+          \     'É',
+          \     'é',
+          \     'È',
+          \     'è',
+          \     'O',
+          \     'o',
+          \     'Ô',
+          \     'ô',
+          \     'œ',
+          \     'Z',
+          \     'z'
+          \    ]
+          \ },
+          \ ]
+      elseif v:collate =~? '^\(en\|es\|de\|fr\|it\|nl\).*\.utf-\?8$'
+      " With these locales, the accentuated letters are ordered
+      " similarly to the non-accentuated letters.
+        let tests += [
+          \ {
+          \    'name' : 'uniq with locale ' .. v:collate,
+          \    'cmd' : '%uniq li',
+          \    'input' : [
+          \     'A',
+          \     'À',
+          \     'a',
+          \     'à',
+          \     'à',
+          \     'E',
+          \     'È',
+          \     'É',
+          \     'o',
+          \     'O',
+          \     'Ô',
+          \     'e',
+          \     'è',
+          \     'é',
+          \     'ô',
+          \     'Œ',
+          \     'œ',
+          \     'z',
+          \     'Z'
+          \    ],
+          \    'expected' : [
+          \     'A',
+          \     'À',
+          \     'a',
+          \     'à',
+          \     'E',
+          \     'È',
+          \     'É',
+          \     'o',
+          \     'O',
+          \     'Ô',
+          \     'e',
+          \     'è',
+          \     'é',
+          \     'ô',
+          \     'Œ',
+          \     'œ',
+          \     'z',
+          \     'Z'
+          \    ]
+          \ },
+          \ ]
+    endif
+  endif
+
+  for t in tests
+    enew!
+    call append(0, t.input)
+    $delete _
+    setlocal nomodified
+    execute t.cmd
+
+    call assert_equal(t.expected, getline(1, '$'), t.name)
+
+    " Previously, the ":uniq" command would set 'modified' even if the buffer
+    " contents did not change.  Here, we check that this problem is fixed.
+    if t.input == t.expected
+      call assert_false(&modified, t.name . ': &mod is not correct')
+    else
+      call assert_true(&modified, t.name . ': &mod is not correct')
+    endif
+  endfor
+
+  " Needs at least two lines for this test
+  call setline(1, ['line1', 'line2'])
+  call assert_fails('uniq no', 'E475:')
+  call assert_fails('uniq c', 'E475:')
+  call assert_fails('uniq #pat%', 'E654:')
+  call assert_fails('uniq /\%(/', 'E53:')
+  call assert_fails('333uniq', 'E16:')
+  call assert_fails('1,999uniq', 'E16:')
+
+  enew!
+endfunc
+
+func Test_uniq_cmd_report()
+    enew!
+    call append(0, repeat([1], 3) + repeat([2], 3) + repeat([3], 3))
+    $delete _
+    setlocal nomodified
+    let res = execute('%uniq')
+
+    call assert_equal([1,2,3], map(getline(1, '$'), 'v:val+0'))
+    call assert_match("6 fewer lines", res)
+    enew!
+    call append(0, repeat([1], 3) + repeat([2], 3) + repeat([3], 3))
+    $delete _
+    setlocal nomodified report=10
+    let res = execute('%uniq')
+
+    call assert_equal([1,2,3], map(getline(1, '$'), 'v:val+0'))
+    call assert_equal("", res)
+    enew!
+    call append(0, repeat([1], 3) + repeat([2], 3) + repeat([3], 3))
+    $delete _
+    setl report&vim
+    setlocal nomodified
+    let res = execute('1g/^/%uniq')
+
+    call assert_equal([1,2,3], map(getline(1, '$'), 'v:val+0'))
+    " the output comes from the :g command, not from the :uniq
+    call assert_match("6 fewer lines", res)
+    enew!
+endfunc
+
+" Test for a :uniq command followed by another command
+func Test_uniq_followed_by_cmd()
+  new
+  let var = ''
+  call setline(1, ['cc', 'aa', 'bb'])
+  %uniq | let var = "uniqcmdtest"
+  call assert_equal(var, "uniqcmdtest")
+  call assert_equal(['cc', 'aa', 'bb'], getline(1, '$'))
+  " Test for :uniq followed by a comment
+  call setline(1, ['3b', '3b', '3b', '1c', '2a'])
+  %uniq " uniq alphabetically
+  call assert_equal(['3b', '1c', '2a'], getline(1, '$'))
+  bw!
+endfunc
+
+" Test for retaining marks across a :uniq
+func Test_uniq_with_marks()
+  new
+  call setline(1, ['cc', 'cc', 'aa', 'bb', 'bb', 'bb', 'bb'])
+  call setpos("'c", [0, 1, 0, 0])
+  call setpos("'a", [0, 4, 0, 0])
+  call setpos("'b", [0, 7, 0, 0])
+  %uniq
+  call assert_equal(['cc', 'aa', 'bb'], getline(1, '$'))
+  call assert_equal(1, line("'c"))
+  call assert_equal(0, line("'a"))
+  call assert_equal(0, line("'b"))
+  bw!
+endfunc
+
+" Test for undo after a :uniq
+func Test_uniq_undo()
+  new
+  let li = ['cc', 'cc', 'aa', 'bb', 'bb', 'bb', 'bb', 'aa']
+  call writefile(li, 'XfileUniq', 'D')
+  edit XfileUniq
+  uniq
+  call assert_equal(['cc', 'aa', 'bb', 'aa'], getline(1, '$'))
+  call assert_true(&modified)
+  undo
+  call assert_equal(li, getline(1, '$'))
+  call assert_false(&modified)
+  bw!
+endfunc
+
+" vim: shiftwidth=2 sts=2 expandtab