runtime(html): Optionally fold tags with the "expr" method

Tag folding poses a few difficulties.  Many elements, e.g.
"blockquote", are always delimited by start and end tags;
end tags for some elements, e.g. "p", can be omitted in
certain contexts; void elements, e.g. "hr", have no end tag.
Although the rules for supporting omissible end tags are
ad-hoc and involved, they apply to elements in scope.
Assuming syntactical wellformedness, an end tag can be
associated with its nearest matching start tag discoverable
in scope and towards the beginning of a file, whereas all
unbalanced tags and inlined tags can be disregarded.

For example:
------------------------------------------------------------
<!DOCTYPE html>
<html lang="en">		<!-- >1 : 1 -->
  <body>			<!-- >2 : 2 -->
    <p>Paragraph #1.		<!--  = : 2 -->
    <p>				<!-- >3 : 3 -->
      Paragraph #2.		<!--  = : 3 -->
    </p>			<!-- <3 : 3 -->
    <p>Paragraph #3.</p>	<!--  = : 2 -->
  </body>			<!-- <2 : 2 -->
</html>				<!-- <1 : 1 -->
------------------------------------------------------------

(HTML comments here, "<!-- ... -->", record two values for
each folded line that are separated by ":", a value obtained
from "&foldexpr" and a value obtained from "foldlevel()".)

Innermost foldedable tags will be flattened.  For example:
------------------------------------------------------------
<!DOCTYPE html>
<html lang="en">		<!-- >1 : 1 -->
  <body>			<!-- >2 : 2 -->
    <div class="block">		<!-- >3 : 3 -->
      <pre><code>		<!-- >4 : 4 -->
[CODE SNIPPET]			<!--  = : 4 -->
      </code></pre>		<!-- <4 : 4 -->
    </div>			<!-- <3 : 3 -->
  </body>			<!-- <2 : 2 -->
</html>				<!-- <1 : 1 -->
------------------------------------------------------------

No folding will be requested for the "<code>"-"</code>" tag
pair and reflected by "&foldexpr" because such a fold would
have claimed the same lines that the immediate fold of the
"<pre>"-"</pre>" tag already claims.

Run-on folded tags may confuse Vim.  When a file such as:
------------------------------------------------------------
<!DOCTYPE html>
<html lang="en">		<!-- >1 : 1 -->
  <body>			<!-- >2 : 2 -->
    <div class="block">		<!-- >3 : 3 -->
      <pre>			<!-- >4 : 4 -->
	<code>			<!-- >5 : 5 -->
[CODE SNIPPET #1]		<!--  = : 5 -->
	</code>			<!-- <5 : 5 -->
      </pre>			<!-- <4 : 4 -->
    </div>			<!-- <3 : 3 -->
				<!--  = : 3 -->
    <div class="block">		<!-- >3 : 3 -->
      <pre>			<!-- >4 : 4 -->
	<code>			<!-- >5 : 5 -->
[CODE SNIPPET #2]		<!--  = : 5 -->
	</code>			<!-- <5 : 5 -->
      </pre>			<!-- <4 : 4 -->
    </div>			<!-- <3 : 3 -->
  </body>			<!-- <2 : 2 -->
</html>				<!-- <1 : 1 -->
------------------------------------------------------------

is reformatted as follows:
------------------------------------------------------------
<!DOCTYPE html>
<html lang="en">		<!-- >1 : 1 -->
  <body>			<!-- >2 : 2 -->
    <div class="block">		<!-- >3 : 3 -->
      <pre>			<!-- >4 : 4 -->
	<code>			<!-- >5 : 5 -->
[CODE SNIPPET #1]		<!--  = : 5 -->
	</code>			<!-- <5 : 5 -->
      </pre>			<!-- <4 : 4 -->
    </div><div class="block"><pre><code> <!-- <3 : 3 -->
[CODE SNIPPET #2]		<!--  = : 2 ? -->
	</code>			<!-- <5 : 2 ? -->
      </pre>			<!-- <4 : 2 ? -->
    </div>			<!-- <3 : 2 ? -->
  </body>			<!-- <2 : 2 -->
</html>				<!-- <1 : 1 -->
------------------------------------------------------------

"&foldexpr" values will not be used as is for the lines
between (and including) "[CODE SNIPPET #2]" and "</div>".
(Cf. v9.1.0002.)

Having syntax highlighting in effect, tag folding using the
"fold-expr" method can be enabled with:
------------------------------------------------------------
	let g:html_expr_folding = 1
------------------------------------------------------------

By default, tag folding will be redone from scratch after
each occurrence of a TextChanged or an InsertLeave event.
Such frequency may not be desired, especially for large
files, and this recomputation can be disabled with:
------------------------------------------------------------
	let g:html_expr_folding_without_recomputation = 1
        doautocmd FileType
------------------------------------------------------------

To force another recomputation, do:
------------------------------------------------------------
	unlet! b:foldsmap
	normal zx
------------------------------------------------------------

References:
https://web.archive.org/web/20250328105626/https://html.spec.whatwg.org/multipage/syntax.html#optional-tags
https://en.wikipedia.org/wiki/Dangling_else

closes: #17141

Signed-off-by: Aliaksei Budavei <0x000c70@gmail.com>
Signed-off-by: Christian Brabandt <cb@256bit.org>
diff --git a/runtime/autoload/htmlfold.vim b/runtime/autoload/htmlfold.vim
new file mode 100644
index 0000000..76ccaef
--- /dev/null
+++ b/runtime/autoload/htmlfold.vim
@@ -0,0 +1,192 @@
+" HTML folding script, :h ft-html-plugin
+" Latest Change: 2025 May 10
+" Original Author: Aliaksei Budavei <0x000c70@gmail.com>
+
+function! htmlfold#MapBalancedTags() abort
+  " Describe only _a capturable-name prefix_ for start and end patterns of
+  " a tag so that start tags with attributes spanning across lines can also be
+  " matched with a single call of "getline()".
+  let tag = '\m\c</\=\([0-9A-Za-z-]\+\)'
+  let names = []
+  let pairs = []
+  let ends = []
+  let pos = getpos('.')
+
+  try
+    call cursor(1, 1)
+    let [lnum, cnum] = searchpos(tag, 'cnW')
+
+    " Pair up nearest non-inlined tags in scope.
+    while lnum > 0
+      let name_attr = synIDattr(synID(lnum, cnum, 0), 'name')
+
+      if name_attr ==# 'htmlTag' || name_attr ==# 'htmlScriptTag'
+	let name = get(matchlist(getline(lnum), tag, (cnum - 1)), 1, '')
+
+	if !empty(name)
+	  call insert(names, tolower(name), 0)
+	  call insert(pairs, [lnum, -1], 0)
+	endif
+      elseif name_attr ==# 'htmlEndTag'
+	let name = get(matchlist(getline(lnum), tag, (cnum - 1)), 1, '')
+
+	if !empty(name)
+	  let idx = index(names, tolower(name))
+
+	  if idx >= 0
+	    " Dismiss inlined balanced tags and opened-only tags.
+	    if pairs[idx][0] != lnum
+	      let pairs[idx][1] = lnum
+	      call add(ends, lnum)
+	    endif
+
+	    " Claim a pair.
+	    let names[: idx] = repeat([''], (idx + 1))
+	  endif
+	endif
+      endif
+
+      " Advance the cursor, at "<", past "</a", "<a>", etc.
+      call cursor(lnum, (cnum + 3))
+      let [lnum, cnum] = searchpos(tag, 'cnW')
+    endwhile
+  finally
+    call setpos('.', pos)
+  endtry
+
+  if empty(ends)
+    return {}
+  endif
+
+  let folds = {}
+  let pending_end = ends[0]
+  let level = 0
+
+  while !empty(pairs)
+    let [start, end] = remove(pairs, -1)
+
+    if end < 0
+      continue
+    endif
+
+    if start >= pending_end
+      " Mark a sibling tag.
+      call remove(ends, 0)
+
+      while start >= ends[0]
+	" Mark a parent tag.
+	call remove(ends, 0)
+	let level -= 1
+      endwhile
+
+      let pending_end = ends[0]
+    else
+      " Mark a child tag.
+      let level += 1
+    endif
+
+    " Flatten the innermost inlined folds.
+    let folds[start] = get(folds, start, ('>' . level))
+    let folds[end] = get(folds, end, ('<' . level))
+  endwhile
+
+  return folds
+endfunction
+
+" See ":help vim9-mix".
+if !has("vim9script")
+  finish
+endif
+
+def! g:htmlfold#MapBalancedTags(): dict<string>
+  # Describe only _a capturable-name prefix_ for start and end patterns of
+  # a tag so that start tags with attributes spanning across lines can also be
+  # matched with a single call of "getline()".
+  const tag: string = '\m\c</\=\([0-9A-Za-z-]\+\)'
+  var names: list<string> = []
+  var pairs: list<list<number>> = []
+  var ends: list<number> = []
+  const pos: list<number> = getpos('.')
+
+  try
+    cursor(1, 1)
+    var [lnum: number, cnum: number] = searchpos(tag, 'cnW')
+
+    # Pair up nearest non-inlined tags in scope.
+    while lnum > 0
+      const name_attr: string = synIDattr(synID(lnum, cnum, 0), 'name')
+
+      if name_attr ==# 'htmlTag' || name_attr ==# 'htmlScriptTag'
+	const name: string = get(matchlist(getline(lnum), tag, (cnum - 1)), 1, '')
+
+	if !empty(name)
+	  insert(names, tolower(name), 0)
+	  insert(pairs, [lnum, -1], 0)
+	endif
+      elseif name_attr ==# 'htmlEndTag'
+	const name: string = get(matchlist(getline(lnum), tag, (cnum - 1)), 1, '')
+
+	if !empty(name)
+	  const idx: number = index(names, tolower(name))
+
+	  if idx >= 0
+	    # Dismiss inlined balanced tags and opened-only tags.
+	    if pairs[idx][0] != lnum
+	      pairs[idx][1] = lnum
+	      add(ends, lnum)
+	    endif
+
+	    # Claim a pair.
+	    names[: idx] = repeat([''], (idx + 1))
+	  endif
+	endif
+      endif
+
+      # Advance the cursor, at "<", past "</a", "<a>", etc.
+      cursor(lnum, (cnum + 3))
+      [lnum, cnum] = searchpos(tag, 'cnW')
+    endwhile
+  finally
+    setpos('.', pos)
+  endtry
+
+  if empty(ends)
+    return {}
+  endif
+
+  var folds: dict<string> = {}
+  var pending_end: number = ends[0]
+  var level: number = 0
+
+  while !empty(pairs)
+    const [start: number, end: number] = remove(pairs, -1)
+
+    if end < 0
+      continue
+    endif
+
+    if start >= pending_end
+      # Mark a sibling tag.
+      remove(ends, 0)
+
+      while start >= ends[0]
+	# Mark a parent tag.
+	remove(ends, 0)
+	level -= 1
+      endwhile
+
+      pending_end = ends[0]
+    else
+      # Mark a child tag.
+      level += 1
+    endif
+
+    # Flatten the innermost inlined folds.
+    folds[start] = get(folds, start, ('>' .. level))
+    folds[end] = get(folds, end, ('<' .. level))
+  endwhile
+
+  return folds
+enddef
+
+" vim: fdm=syntax sw=2 ts=8 noet