blob: c2b3c0c58e8fcc791cfe1b72a511832d087c1bf0 [file] [log] [blame]
Bram Moolenaare37d50a2008-08-06 17:06:04 +00001" Vim indent file
Bram Moolenaar6e649222021-10-04 21:32:54 +01002" Language: DTD (Document Type Definition for XML)
3" Maintainer: Doug Kearns <dougkearns@gmail.com>
4" Previous Maintainer: Nikolai Weibull <now@bitwi.se>
5" Last Change: 24 Sep 2021
Bram Moolenaare37d50a2008-08-06 17:06:04 +00006
Bram Moolenaare37d50a2008-08-06 17:06:04 +00007setlocal indentexpr=GetDTDIndent()
8setlocal indentkeys=!^F,o,O,>
9setlocal nosmartindent
10
Bram Moolenaar6e649222021-10-04 21:32:54 +010011let b:undo_indent = "setl inde< indk< si<"
12
Bram Moolenaare37d50a2008-08-06 17:06:04 +000013if exists("*GetDTDIndent")
14 finish
15endif
16
Bram Moolenaarfc65cab2018-08-28 22:58:02 +020017let s:cpo_save = &cpo
18set cpo&vim
19
Bram Moolenaare37d50a2008-08-06 17:06:04 +000020" TODO: Needs to be adjusted to stop at [, <, and ].
21let s:token_pattern = '^[^[:space:]]\+'
22
23function s:lex1(input, start, ...)
24 let pattern = a:0 > 0 ? a:1 : s:token_pattern
25 let start = matchend(a:input, '^\_s*', a:start)
26 if start == -1
27 return ["", a:start]
28 endif
29 let end = matchend(a:input, pattern, start)
30 if end == -1
31 return ["", a:start]
32 endif
33 let token = strpart(a:input, start, end - start)
34 return [token, end]
35endfunction
36
37function s:lex(input, start, ...)
38 let pattern = a:0 > 0 ? a:1 : s:token_pattern
39 let info = s:lex1(a:input, a:start, pattern)
40 while info[0] == '--'
41 let info = s:lex1(a:input, info[1], pattern)
42 while info[0] != "" && info[0] != '--'
43 let info = s:lex1(a:input, info[1], pattern)
44 endwhile
45 if info[0] == ""
46 return info
47 endif
48 let info = s:lex1(a:input, info[1], pattern)
49 endwhile
50 return info
51endfunction
52
53function s:indent_to_innermost_parentheses(line, end)
54 let token = '('
55 let end = a:end
56 let parentheses = [end - 1]
57 while token != ""
Bram Moolenaar97293012011-07-18 19:40:27 +020058 let [token, end] = s:lex(a:line, end, '^\%([(),|]\|[A-Za-z0-9_-]\+\|#P\=CDATA\|%[A-Za-z0-9_-]\+;\)[?*+]\=')
Bram Moolenaare37d50a2008-08-06 17:06:04 +000059 if token[0] == '('
60 call add(parentheses, end - 1)
61 elseif token[0] == ')'
62 if len(parentheses) == 1
63 return [-1, end]
64 endif
65 call remove(parentheses, -1)
66 endif
67 endwhile
68 return [parentheses[-1] - strridx(a:line, "\n", parentheses[-1]), end]
69endfunction
70
71" TODO: Line and end could be script global (think OO members).
72function GetDTDIndent()
73 if v:lnum == 1
74 return 0
75 endif
76
77 " Begin by searching back for a <! that isnt inside a comment.
78 " From here, depending on what follows immediately after, parse to
79 " where were at to determine what to do.
80 if search('<!', 'bceW') == 0
81 return indent(v:lnum - 1)
82 endif
83 let lnum = line('.')
84 let col = col('.')
85 let indent = indent('.')
Bram Moolenaar00659062010-09-21 22:34:02 +020086 let line = lnum == v:lnum ? getline(lnum) : join(getline(lnum, v:lnum - 1), "\n")
Bram Moolenaare37d50a2008-08-06 17:06:04 +000087
88 let [declaration, end] = s:lex1(line, col)
89 if declaration == ""
Bram Moolenaar036986f2017-03-16 17:41:02 +010090 return indent + shiftwidth()
Bram Moolenaare37d50a2008-08-06 17:06:04 +000091 elseif declaration == '--'
92 " We’re looking at a comment. Now, simply determine if the comment is
93 " terminated or not. If it isnt, let Vim take care of that using
94 " 'comments' and 'autoindent'. Otherwise, indent to the first lines level.
95 while declaration != ""
96 let [declaration, end] = s:lex(line, end)
97 if declaration == "-->"
98 return indent
99 endif
100 endwhile
101 return -1
102 elseif declaration == 'ELEMENT'
103 " Check for element name. If none exists, indent one level.
104 let [name, end] = s:lex(line, end)
105 if name == ""
Bram Moolenaar036986f2017-03-16 17:41:02 +0100106 return indent + shiftwidth()
Bram Moolenaare37d50a2008-08-06 17:06:04 +0000107 endif
108
109 " Check for token following element name. This can be a specification of
110 " whether the start or end tag may be omitted. If nothing is found, indent
111 " one level.
Bram Moolenaar00659062010-09-21 22:34:02 +0200112 let [token, end] = s:lex(line, end, '^\%([-O(]\|ANY\|EMPTY\)')
Bram Moolenaare37d50a2008-08-06 17:06:04 +0000113 let n = 0
114 while token =~ '[-O]' && n < 2
115 let [token, end] = s:lex(line, end, '^\%([-O(]\|ANY\|EMPTY\)')
116 let n += 1
117 endwhile
118 if token == ""
Bram Moolenaar036986f2017-03-16 17:41:02 +0100119 return indent + shiftwidth()
Bram Moolenaare37d50a2008-08-06 17:06:04 +0000120 endif
121
122 " Next comes the content model. If the token weve found isnt a
123 " parenthesis it must be either ANY, EMPTY or some random junk. Either
124 " way, were done indenting this element, so set it to that of the first
Bram Moolenaar6c391a72021-09-09 21:55:11 +0200125 " line so that the terminating “>” winds up having the same indentation.
Bram Moolenaare37d50a2008-08-06 17:06:04 +0000126 if token != '('
127 return indent
128 endif
129
130 " Now go through the content model. We need to keep track of the nesting
131 " of parentheses. As soon as we hit 0 we’re done. If that happens we must
Bram Moolenaar6c391a72021-09-09 21:55:11 +0200132 " have a complete content model. Thus set indentation to be the same as that
Bram Moolenaare37d50a2008-08-06 17:06:04 +0000133 " of the first line so that the terminating “>” winds up having the same
Bram Moolenaar6c391a72021-09-09 21:55:11 +0200134 " indentation. Otherwise, well indent to the innermost parentheses not yet
Bram Moolenaare37d50a2008-08-06 17:06:04 +0000135 " matched.
136 let [indent_of_innermost, end] = s:indent_to_innermost_parentheses(line, end)
137 if indent_of_innermost != -1
138 return indent_of_innermost
139 endif
140
141 " Finally, look for any additions and/or exceptions to the content model.
142 " This is defined by a “+” or “-” followed by another content model
143 " declaration.
144 " TODO: Can the “-” be separated by whitespace from the “(”?
145 let seen = { '+(': 0, '-(': 0 }
146 while 1
147 let [additions_exceptions, end] = s:lex(line, end, '^[+-](')
148 if additions_exceptions != '+(' && additions_exceptions != '-('
149 let [token, end] = s:lex(line, end)
150 if token == '>'
151 return indent
152 endif
153 " TODO: Should use s:lex here on getline(v:lnum) and check for >.
Bram Moolenaar036986f2017-03-16 17:41:02 +0100154 return getline(v:lnum) =~ '^\s*>' || count(values(seen), 0) == 0 ? indent : (indent + shiftwidth())
Bram Moolenaare37d50a2008-08-06 17:06:04 +0000155 endif
156
157 " If we’ve seen an addition or exception already and this is of the same
158 " kind, the user is writing a broken DTD. Time to bail.
159 if seen[additions_exceptions]
160 return indent
161 endif
162 let seen[additions_exceptions] = 1
163
164 let [indent_of_innermost, end] = s:indent_to_innermost_parentheses(line, end)
165 if indent_of_innermost != -1
166 return indent_of_innermost
167 endif
168 endwhile
169 elseif declaration == 'ATTLIST'
170 " Check for element name. If none exists, indent one level.
171 let [name, end] = s:lex(line, end)
172 if name == ""
Bram Moolenaar036986f2017-03-16 17:41:02 +0100173 return indent + shiftwidth()
Bram Moolenaare37d50a2008-08-06 17:06:04 +0000174 endif
175
176 " Check for any number of attributes.
177 while 1
178 " Check for attribute name. If none exists, indent one level, unless the
179 " current line is a lone “>”, in which case we indent to the same level
180 " as the first line. Otherwise, if the attribute name is “>”, we have
181 " actually hit the end of the attribute list, in which case we indent to
182 " the same level as the first line.
183 let [name, end] = s:lex(line, end)
184 if name == ""
185 " TODO: Should use s:lex here on getline(v:lnum) and check for >.
Bram Moolenaar036986f2017-03-16 17:41:02 +0100186 return getline(v:lnum) =~ '^\s*>' ? indent : (indent + shiftwidth())
Bram Moolenaare37d50a2008-08-06 17:06:04 +0000187 elseif name == ">"
188 return indent
189 endif
190
191 " Check for attribute value declaration. If none exists, indent two
192 " levels. Otherwise, if its an enumerated value, check for nested
193 " parentheses and indent to the innermost one if we don’t reach the end
194 " of the listc. Otherwise, just continue with looking for the default
195 " attribute value.
196 " TODO: Do validation of keywords
197 " (CDATA|NMTOKEN|NMTOKENS|ID|IDREF|IDREFS|ENTITY|ENTITIES)?
198 let [value, end] = s:lex(line, end, '^\%((\|[^[:space:]]\+\)')
199 if value == ""
Bram Moolenaar036986f2017-03-16 17:41:02 +0100200 return indent + shiftwidth() * 2
Bram Moolenaare37d50a2008-08-06 17:06:04 +0000201 elseif value == 'NOTATION'
202 " If this is a enumerated value based on notations, read another token
203 " for the actual value. If it doesn’t exist, indent three levels.
204 " TODO: If validating according to above, value must be equal to '('.
205 let [value, end] = s:lex(line, end, '^\%((\|[^[:space:]]\+\)')
206 if value == ""
Bram Moolenaar036986f2017-03-16 17:41:02 +0100207 return indent + shiftwidth() * 3
Bram Moolenaare37d50a2008-08-06 17:06:04 +0000208 endif
209 endif
210
211 if value == '('
212 let [indent_of_innermost, end] = s:indent_to_innermost_parentheses(line, end)
213 if indent_of_innermost != -1
214 return indent_of_innermost
215 endif
216 endif
217
218 " Finally look for the attribute’s default value. If non exists, indent
219 " two levels.
Bram Moolenaar00659062010-09-21 22:34:02 +0200220 let [default, end] = s:lex(line, end, '^\%("\_[^"]*"\|#\(REQUIRED\|IMPLIED\|FIXED\)\)')
Bram Moolenaare37d50a2008-08-06 17:06:04 +0000221 if default == ""
Bram Moolenaar036986f2017-03-16 17:41:02 +0100222 return indent + shiftwidth() * 2
Bram Moolenaare37d50a2008-08-06 17:06:04 +0000223 elseif default == '#FIXED'
224 " We need to look for the fixed value. If non exists, indent three
225 " levels.
226 let [default, end] = s:lex(line, end, '^"\_[^"]*"')
227 if default == ""
Bram Moolenaar036986f2017-03-16 17:41:02 +0100228 return indent + shiftwidth() * 3
Bram Moolenaare37d50a2008-08-06 17:06:04 +0000229 endif
230 endif
231 endwhile
232 elseif declaration == 'ENTITY'
233 " Check for entity name. If none exists, indent one level. Otherwise, if
234 " the name actually turns out to be a percent sign, “%”, this is a
235 " parameter entity. Read another token to determine the entity name and,
236 " again, if none exists, indent one level.
237 let [name, end] = s:lex(line, end)
238 if name == ""
Bram Moolenaar036986f2017-03-16 17:41:02 +0100239 return indent + shiftwidth()
Bram Moolenaare37d50a2008-08-06 17:06:04 +0000240 elseif name == '%'
241 let [name, end] = s:lex(line, end)
242 if name == ""
Bram Moolenaar036986f2017-03-16 17:41:02 +0100243 return indent + shiftwidth()
Bram Moolenaare37d50a2008-08-06 17:06:04 +0000244 endif
245 endif
246
247 " Now check for the entity value. If none exists, indent one level. If it
248 " does exist, indent to same level as first line, as were now done with
249 " this entity.
250 "
251 " The entity value can be a string in single or double quotes (no escapes
252 " to worry about, as entities are used instead). However, it can also be
253 " that this is an external unparsed entity. In that case we have to look
254 " further for (possibly) a public ID and an URI followed by the NDATA
255 " keyword and the actual notation name. For the public ID and URI, indent
256 " two levels, if they dont exist. If the NDATA keyword doesnt exist,
257 " indent one level. Otherwise, if the actual notation name doesn’t exist,
258 " indent two level. If it does, indent to same level as first line, as
259 " we’re now done with this entity.
260 let [value, end] = s:lex(line, end)
261 if value == ""
Bram Moolenaar036986f2017-03-16 17:41:02 +0100262 return indent + shiftwidth()
Bram Moolenaare37d50a2008-08-06 17:06:04 +0000263 elseif value == 'SYSTEM' || value == 'PUBLIC'
264 let [quoted_string, end] = s:lex(line, end, '\%("[^"]\+"\|''[^'']\+''\)')
265 if quoted_string == ""
Bram Moolenaar036986f2017-03-16 17:41:02 +0100266 return indent + shiftwidth() * 2
Bram Moolenaare37d50a2008-08-06 17:06:04 +0000267 endif
268
269 if value == 'PUBLIC'
270 let [quoted_string, end] = s:lex(line, end, '\%("[^"]\+"\|''[^'']\+''\)')
271 if quoted_string == ""
Bram Moolenaar036986f2017-03-16 17:41:02 +0100272 return indent + shiftwidth() * 2
Bram Moolenaare37d50a2008-08-06 17:06:04 +0000273 endif
274 endif
275
276 let [ndata, end] = s:lex(line, end)
277 if ndata == ""
Bram Moolenaar036986f2017-03-16 17:41:02 +0100278 return indent + shiftwidth()
Bram Moolenaare37d50a2008-08-06 17:06:04 +0000279 endif
280
281 let [name, end] = s:lex(line, end)
Bram Moolenaar036986f2017-03-16 17:41:02 +0100282 return name == "" ? (indent + shiftwidth() * 2) : indent
Bram Moolenaare37d50a2008-08-06 17:06:04 +0000283 else
284 return indent
285 endif
286 elseif declaration == 'NOTATION'
287 " Check for notation name. If none exists, indent one level.
288 let [name, end] = s:lex(line, end)
289 if name == ""
Bram Moolenaar036986f2017-03-16 17:41:02 +0100290 return indent + shiftwidth()
Bram Moolenaare37d50a2008-08-06 17:06:04 +0000291 endif
292
293 " Now check for the external ID. If none exists, indent one level.
294 let [id, end] = s:lex(line, end)
295 if id == ""
Bram Moolenaar036986f2017-03-16 17:41:02 +0100296 return indent + shiftwidth()
Bram Moolenaare37d50a2008-08-06 17:06:04 +0000297 elseif id == 'SYSTEM' || id == 'PUBLIC'
298 let [quoted_string, end] = s:lex(line, end, '\%("[^"]\+"\|''[^'']\+''\)')
299 if quoted_string == ""
Bram Moolenaar036986f2017-03-16 17:41:02 +0100300 return indent + shiftwidth() * 2
Bram Moolenaare37d50a2008-08-06 17:06:04 +0000301 endif
302
303 if id == 'PUBLIC'
304 let [quoted_string, end] = s:lex(line, end, '\%("[^"]\+"\|''[^'']\+''\|>\)')
305 if quoted_string == ""
306 " TODO: Should use s:lex here on getline(v:lnum) and check for >.
Bram Moolenaar036986f2017-03-16 17:41:02 +0100307 return getline(v:lnum) =~ '^\s*>' ? indent : (indent + shiftwidth() * 2)
Bram Moolenaare37d50a2008-08-06 17:06:04 +0000308 elseif quoted_string == '>'
309 return indent
310 endif
311 endif
312 endif
313
314 return indent
315 endif
316
317 " TODO: Processing directives could be indented I suppose. But perhaps it’s
318 " just as well to let the user decide how to indent them (perhaps extending
319 " this function to include proper support for whatever processing directive
320 " language they want to use).
321
322 " Conditional sections are simply passed along to let Vim decide what to do
323 " (and hence the user).
324 return -1
325endfunction
326
327let &cpo = s:cpo_save
Bram Moolenaar61d35bd2012-03-28 20:51:51 +0200328unlet s:cpo_save