blob: 9fca296c0b8a6ebf4dc76459c7de1afc67ec89a5 [file] [log] [blame]
Bram Moolenaare37d50a2008-08-06 17:06:04 +00001" Vim indent file
Bram Moolenaar6e649222021-10-04 21:32:54 +01002" Language: DTD (Document Type Definition for XML)
3" Maintainer: Doug Kearns <dougkearns@gmail.com>
4" Previous Maintainer: Nikolai Weibull <now@bitwi.se>
5" Last Change: 24 Sep 2021
Bram Moolenaare37d50a2008-08-06 17:06:04 +00006
Bram Moolenaard592deb2022-06-17 15:42:40 +01007" Only load this indent file when no other was loaded.
8if exists("b:did_indent")
9 finish
10endif
11let b:did_indent = 1
12
Bram Moolenaare37d50a2008-08-06 17:06:04 +000013setlocal indentexpr=GetDTDIndent()
14setlocal indentkeys=!^F,o,O,>
15setlocal nosmartindent
16
Bram Moolenaar6e649222021-10-04 21:32:54 +010017let b:undo_indent = "setl inde< indk< si<"
18
Bram Moolenaare37d50a2008-08-06 17:06:04 +000019if exists("*GetDTDIndent")
20 finish
21endif
22
Bram Moolenaarfc65cab2018-08-28 22:58:02 +020023let s:cpo_save = &cpo
24set cpo&vim
25
Bram Moolenaare37d50a2008-08-06 17:06:04 +000026" TODO: Needs to be adjusted to stop at [, <, and ].
27let s:token_pattern = '^[^[:space:]]\+'
28
29function s:lex1(input, start, ...)
30 let pattern = a:0 > 0 ? a:1 : s:token_pattern
31 let start = matchend(a:input, '^\_s*', a:start)
32 if start == -1
33 return ["", a:start]
34 endif
35 let end = matchend(a:input, pattern, start)
36 if end == -1
37 return ["", a:start]
38 endif
39 let token = strpart(a:input, start, end - start)
40 return [token, end]
41endfunction
42
43function s:lex(input, start, ...)
44 let pattern = a:0 > 0 ? a:1 : s:token_pattern
45 let info = s:lex1(a:input, a:start, pattern)
46 while info[0] == '--'
47 let info = s:lex1(a:input, info[1], pattern)
48 while info[0] != "" && info[0] != '--'
49 let info = s:lex1(a:input, info[1], pattern)
50 endwhile
51 if info[0] == ""
52 return info
53 endif
54 let info = s:lex1(a:input, info[1], pattern)
55 endwhile
56 return info
57endfunction
58
59function s:indent_to_innermost_parentheses(line, end)
60 let token = '('
61 let end = a:end
62 let parentheses = [end - 1]
63 while token != ""
Bram Moolenaar97293012011-07-18 19:40:27 +020064 let [token, end] = s:lex(a:line, end, '^\%([(),|]\|[A-Za-z0-9_-]\+\|#P\=CDATA\|%[A-Za-z0-9_-]\+;\)[?*+]\=')
Bram Moolenaare37d50a2008-08-06 17:06:04 +000065 if token[0] == '('
66 call add(parentheses, end - 1)
67 elseif token[0] == ')'
68 if len(parentheses) == 1
69 return [-1, end]
70 endif
71 call remove(parentheses, -1)
72 endif
73 endwhile
74 return [parentheses[-1] - strridx(a:line, "\n", parentheses[-1]), end]
75endfunction
76
77" TODO: Line and end could be script global (think OO members).
78function GetDTDIndent()
79 if v:lnum == 1
80 return 0
81 endif
82
83 " Begin by searching back for a <! that isn’t inside a comment.
84 " From here, depending on what follows immediately after, parse to
85 " where we’re at to determine what to do.
86 if search('<!', 'bceW') == 0
87 return indent(v:lnum - 1)
88 endif
89 let lnum = line('.')
90 let col = col('.')
91 let indent = indent('.')
Bram Moolenaar00659062010-09-21 22:34:02 +020092 let line = lnum == v:lnum ? getline(lnum) : join(getline(lnum, v:lnum - 1), "\n")
Bram Moolenaare37d50a2008-08-06 17:06:04 +000093
94 let [declaration, end] = s:lex1(line, col)
95 if declaration == ""
Bram Moolenaar036986f2017-03-16 17:41:02 +010096 return indent + shiftwidth()
Bram Moolenaare37d50a2008-08-06 17:06:04 +000097 elseif declaration == '--'
98 " Were looking at a comment. Now, simply determine if the comment is
99 " terminated or not. If it isn’t, let Vim take care of that using
100 " 'comments' and 'autoindent'. Otherwise, indent to the first lines level.
101 while declaration != ""
102 let [declaration, end] = s:lex(line, end)
103 if declaration == "-->"
104 return indent
105 endif
106 endwhile
107 return -1
108 elseif declaration == 'ELEMENT'
109 " Check for element name. If none exists, indent one level.
110 let [name, end] = s:lex(line, end)
111 if name == ""
Bram Moolenaar036986f2017-03-16 17:41:02 +0100112 return indent + shiftwidth()
Bram Moolenaare37d50a2008-08-06 17:06:04 +0000113 endif
114
115 " Check for token following element name. This can be a specification of
116 " whether the start or end tag may be omitted. If nothing is found, indent
117 " one level.
Bram Moolenaar00659062010-09-21 22:34:02 +0200118 let [token, end] = s:lex(line, end, '^\%([-O(]\|ANY\|EMPTY\)')
Bram Moolenaare37d50a2008-08-06 17:06:04 +0000119 let n = 0
120 while token =~ '[-O]' && n < 2
121 let [token, end] = s:lex(line, end, '^\%([-O(]\|ANY\|EMPTY\)')
122 let n += 1
123 endwhile
124 if token == ""
Bram Moolenaar036986f2017-03-16 17:41:02 +0100125 return indent + shiftwidth()
Bram Moolenaare37d50a2008-08-06 17:06:04 +0000126 endif
127
128 " Next comes the content model. If the token we’ve found isn’t a
129 " parenthesis it must be either ANY, EMPTY or some random junk. Either
130 " way, we’re done indenting this element, so set it to that of the first
Bram Moolenaar6c391a72021-09-09 21:55:11 +0200131 " line so that the terminating “>” winds up having the same indentation.
Bram Moolenaare37d50a2008-08-06 17:06:04 +0000132 if token != '('
133 return indent
134 endif
135
136 " Now go through the content model. We need to keep track of the nesting
137 " of parentheses. As soon as we hit 0 were done. If that happens we must
Bram Moolenaar6c391a72021-09-09 21:55:11 +0200138 " have a complete content model. Thus set indentation to be the same as that
Bram Moolenaare37d50a2008-08-06 17:06:04 +0000139 " of the first line so that the terminating “>” winds up having the same
Bram Moolenaar6c391a72021-09-09 21:55:11 +0200140 " indentation. Otherwise, we’ll indent to the innermost parentheses not yet
Bram Moolenaare37d50a2008-08-06 17:06:04 +0000141 " matched.
142 let [indent_of_innermost, end] = s:indent_to_innermost_parentheses(line, end)
143 if indent_of_innermost != -1
144 return indent_of_innermost
145 endif
146
147 " Finally, look for any additions and/or exceptions to the content model.
148 " This is defined by a “+” or “-” followed by another content model
149 " declaration.
150 " TODO: Can the “-” be separated by whitespace from the “(”?
151 let seen = { '+(': 0, '-(': 0 }
152 while 1
153 let [additions_exceptions, end] = s:lex(line, end, '^[+-](')
154 if additions_exceptions != '+(' && additions_exceptions != '-('
155 let [token, end] = s:lex(line, end)
156 if token == '>'
157 return indent
158 endif
159 " TODO: Should use s:lex here on getline(v:lnum) and check for >.
Bram Moolenaar036986f2017-03-16 17:41:02 +0100160 return getline(v:lnum) =~ '^\s*>' || count(values(seen), 0) == 0 ? indent : (indent + shiftwidth())
Bram Moolenaare37d50a2008-08-06 17:06:04 +0000161 endif
162
163 " If weve seen an addition or exception already and this is of the same
164 " kind, the user is writing a broken DTD. Time to bail.
165 if seen[additions_exceptions]
166 return indent
167 endif
168 let seen[additions_exceptions] = 1
169
170 let [indent_of_innermost, end] = s:indent_to_innermost_parentheses(line, end)
171 if indent_of_innermost != -1
172 return indent_of_innermost
173 endif
174 endwhile
175 elseif declaration == 'ATTLIST'
176 " Check for element name. If none exists, indent one level.
177 let [name, end] = s:lex(line, end)
178 if name == ""
Bram Moolenaar036986f2017-03-16 17:41:02 +0100179 return indent + shiftwidth()
Bram Moolenaare37d50a2008-08-06 17:06:04 +0000180 endif
181
182 " Check for any number of attributes.
183 while 1
184 " Check for attribute name. If none exists, indent one level, unless the
185 " current line is a lone “>”, in which case we indent to the same level
186 " as the first line. Otherwise, if the attribute name is “>”, we have
187 " actually hit the end of the attribute list, in which case we indent to
188 " the same level as the first line.
189 let [name, end] = s:lex(line, end)
190 if name == ""
191 " TODO: Should use s:lex here on getline(v:lnum) and check for >.
Bram Moolenaar036986f2017-03-16 17:41:02 +0100192 return getline(v:lnum) =~ '^\s*>' ? indent : (indent + shiftwidth())
Bram Moolenaare37d50a2008-08-06 17:06:04 +0000193 elseif name == ">"
194 return indent
195 endif
196
197 " Check for attribute value declaration. If none exists, indent two
198 " levels. Otherwise, if it’s an enumerated value, check for nested
199 " parentheses and indent to the innermost one if we dont reach the end
200 " of the listc. Otherwise, just continue with looking for the default
201 " attribute value.
202 " TODO: Do validation of keywords
203 " (CDATA|NMTOKEN|NMTOKENS|ID|IDREF|IDREFS|ENTITY|ENTITIES)?
204 let [value, end] = s:lex(line, end, '^\%((\|[^[:space:]]\+\)')
205 if value == ""
Bram Moolenaar036986f2017-03-16 17:41:02 +0100206 return indent + shiftwidth() * 2
Bram Moolenaare37d50a2008-08-06 17:06:04 +0000207 elseif value == 'NOTATION'
208 " If this is a enumerated value based on notations, read another token
209 " for the actual value. If it doesnt exist, indent three levels.
210 " TODO: If validating according to above, value must be equal to '('.
211 let [value, end] = s:lex(line, end, '^\%((\|[^[:space:]]\+\)')
212 if value == ""
Bram Moolenaar036986f2017-03-16 17:41:02 +0100213 return indent + shiftwidth() * 3
Bram Moolenaare37d50a2008-08-06 17:06:04 +0000214 endif
215 endif
216
217 if value == '('
218 let [indent_of_innermost, end] = s:indent_to_innermost_parentheses(line, end)
219 if indent_of_innermost != -1
220 return indent_of_innermost
221 endif
222 endif
223
224 " Finally look for the attributes default value. If non exists, indent
225 " two levels.
Bram Moolenaar00659062010-09-21 22:34:02 +0200226 let [default, end] = s:lex(line, end, '^\%("\_[^"]*"\|#\(REQUIRED\|IMPLIED\|FIXED\)\)')
Bram Moolenaare37d50a2008-08-06 17:06:04 +0000227 if default == ""
Bram Moolenaar036986f2017-03-16 17:41:02 +0100228 return indent + shiftwidth() * 2
Bram Moolenaare37d50a2008-08-06 17:06:04 +0000229 elseif default == '#FIXED'
230 " We need to look for the fixed value. If non exists, indent three
231 " levels.
232 let [default, end] = s:lex(line, end, '^"\_[^"]*"')
233 if default == ""
Bram Moolenaar036986f2017-03-16 17:41:02 +0100234 return indent + shiftwidth() * 3
Bram Moolenaare37d50a2008-08-06 17:06:04 +0000235 endif
236 endif
237 endwhile
238 elseif declaration == 'ENTITY'
239 " Check for entity name. If none exists, indent one level. Otherwise, if
240 " the name actually turns out to be a percent sign, “%”, this is a
241 " parameter entity. Read another token to determine the entity name and,
242 " again, if none exists, indent one level.
243 let [name, end] = s:lex(line, end)
244 if name == ""
Bram Moolenaar036986f2017-03-16 17:41:02 +0100245 return indent + shiftwidth()
Bram Moolenaare37d50a2008-08-06 17:06:04 +0000246 elseif name == '%'
247 let [name, end] = s:lex(line, end)
248 if name == ""
Bram Moolenaar036986f2017-03-16 17:41:02 +0100249 return indent + shiftwidth()
Bram Moolenaare37d50a2008-08-06 17:06:04 +0000250 endif
251 endif
252
253 " Now check for the entity value. If none exists, indent one level. If it
254 " does exist, indent to same level as first line, as were now done with
255 " this entity.
256 "
257 " The entity value can be a string in single or double quotes (no escapes
258 " to worry about, as entities are used instead). However, it can also be
259 " that this is an external unparsed entity. In that case we have to look
260 " further for (possibly) a public ID and an URI followed by the NDATA
261 " keyword and the actual notation name. For the public ID and URI, indent
262 " two levels, if they dont exist. If the NDATA keyword doesnt exist,
263 " indent one level. Otherwise, if the actual notation name doesn’t exist,
264 " indent two level. If it does, indent to same level as first line, as
265 " we’re now done with this entity.
266 let [value, end] = s:lex(line, end)
267 if value == ""
Bram Moolenaar036986f2017-03-16 17:41:02 +0100268 return indent + shiftwidth()
Bram Moolenaare37d50a2008-08-06 17:06:04 +0000269 elseif value == 'SYSTEM' || value == 'PUBLIC'
270 let [quoted_string, end] = s:lex(line, end, '\%("[^"]\+"\|''[^'']\+''\)')
271 if quoted_string == ""
Bram Moolenaar036986f2017-03-16 17:41:02 +0100272 return indent + shiftwidth() * 2
Bram Moolenaare37d50a2008-08-06 17:06:04 +0000273 endif
274
275 if value == 'PUBLIC'
276 let [quoted_string, end] = s:lex(line, end, '\%("[^"]\+"\|''[^'']\+''\)')
277 if quoted_string == ""
Bram Moolenaar036986f2017-03-16 17:41:02 +0100278 return indent + shiftwidth() * 2
Bram Moolenaare37d50a2008-08-06 17:06:04 +0000279 endif
280 endif
281
282 let [ndata, end] = s:lex(line, end)
283 if ndata == ""
Bram Moolenaar036986f2017-03-16 17:41:02 +0100284 return indent + shiftwidth()
Bram Moolenaare37d50a2008-08-06 17:06:04 +0000285 endif
286
287 let [name, end] = s:lex(line, end)
Bram Moolenaar036986f2017-03-16 17:41:02 +0100288 return name == "" ? (indent + shiftwidth() * 2) : indent
Bram Moolenaare37d50a2008-08-06 17:06:04 +0000289 else
290 return indent
291 endif
292 elseif declaration == 'NOTATION'
293 " Check for notation name. If none exists, indent one level.
294 let [name, end] = s:lex(line, end)
295 if name == ""
Bram Moolenaar036986f2017-03-16 17:41:02 +0100296 return indent + shiftwidth()
Bram Moolenaare37d50a2008-08-06 17:06:04 +0000297 endif
298
299 " Now check for the external ID. If none exists, indent one level.
300 let [id, end] = s:lex(line, end)
301 if id == ""
Bram Moolenaar036986f2017-03-16 17:41:02 +0100302 return indent + shiftwidth()
Bram Moolenaare37d50a2008-08-06 17:06:04 +0000303 elseif id == 'SYSTEM' || id == 'PUBLIC'
304 let [quoted_string, end] = s:lex(line, end, '\%("[^"]\+"\|''[^'']\+''\)')
305 if quoted_string == ""
Bram Moolenaar036986f2017-03-16 17:41:02 +0100306 return indent + shiftwidth() * 2
Bram Moolenaare37d50a2008-08-06 17:06:04 +0000307 endif
308
309 if id == 'PUBLIC'
310 let [quoted_string, end] = s:lex(line, end, '\%("[^"]\+"\|''[^'']\+''\|>\)')
311 if quoted_string == ""
312 " TODO: Should use s:lex here on getline(v:lnum) and check for >.
Bram Moolenaar036986f2017-03-16 17:41:02 +0100313 return getline(v:lnum) =~ '^\s*>' ? indent : (indent + shiftwidth() * 2)
Bram Moolenaare37d50a2008-08-06 17:06:04 +0000314 elseif quoted_string == '>'
315 return indent
316 endif
317 endif
318 endif
319
320 return indent
321 endif
322
323 " TODO: Processing directives could be indented I suppose. But perhaps it’s
324 " just as well to let the user decide how to indent them (perhaps extending
325 " this function to include proper support for whatever processing directive
326 " language they want to use).
327
328 " Conditional sections are simply passed along to let Vim decide what to do
329 " (and hence the user).
330 return -1
331endfunction
332
333let &cpo = s:cpo_save
Bram Moolenaar61d35bd2012-03-28 20:51:51 +0200334unlet s:cpo_save