blob: 42a8789332dfea4384c95bf2c4af25c4f0d3ed67 [file] [log] [blame]
Bram Moolenaare37d50a2008-08-06 17:06:04 +00001" Vim indent file
2" Language: DTD (Document Type Definition for XML)
3" Maintainer: Nikolai Weibull <now@bitwi.se>
Bram Moolenaar00659062010-09-21 22:34:02 +02004" Latest Revision: 2010-09-21
Bram Moolenaare37d50a2008-08-06 17:06:04 +00005
6let s:cpo_save = &cpo
7set cpo&vim
8
9setlocal indentexpr=GetDTDIndent()
10setlocal indentkeys=!^F,o,O,>
11setlocal nosmartindent
12
13if exists("*GetDTDIndent")
14 finish
15endif
16
17" TODO: Needs to be adjusted to stop at [, <, and ].
18let s:token_pattern = '^[^[:space:]]\+'
19
20function s:lex1(input, start, ...)
21 let pattern = a:0 > 0 ? a:1 : s:token_pattern
22 let start = matchend(a:input, '^\_s*', a:start)
23 if start == -1
24 return ["", a:start]
25 endif
26 let end = matchend(a:input, pattern, start)
27 if end == -1
28 return ["", a:start]
29 endif
30 let token = strpart(a:input, start, end - start)
31 return [token, end]
32endfunction
33
34function s:lex(input, start, ...)
35 let pattern = a:0 > 0 ? a:1 : s:token_pattern
36 let info = s:lex1(a:input, a:start, pattern)
37 while info[0] == '--'
38 let info = s:lex1(a:input, info[1], pattern)
39 while info[0] != "" && info[0] != '--'
40 let info = s:lex1(a:input, info[1], pattern)
41 endwhile
42 if info[0] == ""
43 return info
44 endif
45 let info = s:lex1(a:input, info[1], pattern)
46 endwhile
47 return info
48endfunction
49
50function s:indent_to_innermost_parentheses(line, end)
51 let token = '('
52 let end = a:end
53 let parentheses = [end - 1]
54 while token != ""
Bram Moolenaar00659062010-09-21 22:34:02 +020055 let [token, end] = s:lex(a:line, end, '^\%([(),|]\|[A-Za-z0-9_-]\+\|#PCDATA\|%[A-Za-z0-9_-]\+;\)[?*+]\=')
Bram Moolenaare37d50a2008-08-06 17:06:04 +000056 if token[0] == '('
57 call add(parentheses, end - 1)
58 elseif token[0] == ')'
59 if len(parentheses) == 1
60 return [-1, end]
61 endif
62 call remove(parentheses, -1)
63 endif
64 endwhile
65 return [parentheses[-1] - strridx(a:line, "\n", parentheses[-1]), end]
66endfunction
67
68" TODO: Line and end could be script global (think OO members).
69function GetDTDIndent()
70 if v:lnum == 1
71 return 0
72 endif
73
74 " Begin by searching back for a <! that isn’t inside a comment.
75 " From here, depending on what follows immediately after, parse to
76 " where we’re at to determine what to do.
77 if search('<!', 'bceW') == 0
78 return indent(v:lnum - 1)
79 endif
80 let lnum = line('.')
81 let col = col('.')
82 let indent = indent('.')
Bram Moolenaar00659062010-09-21 22:34:02 +020083 let line = lnum == v:lnum ? getline(lnum) : join(getline(lnum, v:lnum - 1), "\n")
Bram Moolenaare37d50a2008-08-06 17:06:04 +000084
85 let [declaration, end] = s:lex1(line, col)
86 if declaration == ""
87 return indent + &sw
88 elseif declaration == '--'
89 " Were looking at a comment. Now, simply determine if the comment is
90 " terminated or not. If it isn’t, let Vim take care of that using
91 " 'comments' and 'autoindent'. Otherwise, indent to the first lines level.
92 while declaration != ""
93 let [declaration, end] = s:lex(line, end)
94 if declaration == "-->"
95 return indent
96 endif
97 endwhile
98 return -1
99 elseif declaration == 'ELEMENT'
100 " Check for element name. If none exists, indent one level.
101 let [name, end] = s:lex(line, end)
102 if name == ""
103 return indent + &sw
104 endif
105
106 " Check for token following element name. This can be a specification of
107 " whether the start or end tag may be omitted. If nothing is found, indent
108 " one level.
Bram Moolenaar00659062010-09-21 22:34:02 +0200109 let [token, end] = s:lex(line, end, '^\%([-O(]\|ANY\|EMPTY\)')
Bram Moolenaare37d50a2008-08-06 17:06:04 +0000110 let n = 0
111 while token =~ '[-O]' && n < 2
112 let [token, end] = s:lex(line, end, '^\%([-O(]\|ANY\|EMPTY\)')
113 let n += 1
114 endwhile
115 if token == ""
116 return indent + &sw
117 endif
118
119 " Next comes the content model. If the token we’ve found isn’t a
120 " parenthesis it must be either ANY, EMPTY or some random junk. Either
121 " way, we’re done indenting this element, so set it to that of the first
122 " line so that the terminating “>” winds up having the same indention.
123 if token != '('
124 return indent
125 endif
126
127 " Now go through the content model. We need to keep track of the nesting
128 " of parentheses. As soon as we hit 0 were done. If that happens we must
129 " have a complete content model. Thus set indention to be the same as that
130 " of the first line so that the terminating “>” winds up having the same
131 " indention. Otherwise, we’ll indent to the innermost parentheses not yet
132 " matched.
133 let [indent_of_innermost, end] = s:indent_to_innermost_parentheses(line, end)
134 if indent_of_innermost != -1
135 return indent_of_innermost
136 endif
137
138 " Finally, look for any additions and/or exceptions to the content model.
139 " This is defined by a “+” or “-” followed by another content model
140 " declaration.
141 " TODO: Can the “-” be separated by whitespace from the “(”?
142 let seen = { '+(': 0, '-(': 0 }
143 while 1
144 let [additions_exceptions, end] = s:lex(line, end, '^[+-](')
145 if additions_exceptions != '+(' && additions_exceptions != '-('
146 let [token, end] = s:lex(line, end)
147 if token == '>'
148 return indent
149 endif
150 " TODO: Should use s:lex here on getline(v:lnum) and check for >.
151 return getline(v:lnum) =~ '^\s*>' || count(values(seen), 0) == 0 ? indent : (indent + &sw)
152 endif
153
154 " If weve seen an addition or exception already and this is of the same
155 " kind, the user is writing a broken DTD. Time to bail.
156 if seen[additions_exceptions]
157 return indent
158 endif
159 let seen[additions_exceptions] = 1
160
161 let [indent_of_innermost, end] = s:indent_to_innermost_parentheses(line, end)
162 if indent_of_innermost != -1
163 return indent_of_innermost
164 endif
165 endwhile
166 elseif declaration == 'ATTLIST'
167 " Check for element name. If none exists, indent one level.
168 let [name, end] = s:lex(line, end)
169 if name == ""
170 return indent + &sw
171 endif
172
173 " Check for any number of attributes.
174 while 1
175 " Check for attribute name. If none exists, indent one level, unless the
176 " current line is a lone “>”, in which case we indent to the same level
177 " as the first line. Otherwise, if the attribute name is “>”, we have
178 " actually hit the end of the attribute list, in which case we indent to
179 " the same level as the first line.
180 let [name, end] = s:lex(line, end)
181 if name == ""
182 " TODO: Should use s:lex here on getline(v:lnum) and check for >.
183 return getline(v:lnum) =~ '^\s*>' ? indent : (indent + &sw)
184 elseif name == ">"
185 return indent
186 endif
187
188 " Check for attribute value declaration. If none exists, indent two
189 " levels. Otherwise, if it’s an enumerated value, check for nested
190 " parentheses and indent to the innermost one if we dont reach the end
191 " of the listc. Otherwise, just continue with looking for the default
192 " attribute value.
193 " TODO: Do validation of keywords
194 " (CDATA|NMTOKEN|NMTOKENS|ID|IDREF|IDREFS|ENTITY|ENTITIES)?
195 let [value, end] = s:lex(line, end, '^\%((\|[^[:space:]]\+\)')
196 if value == ""
197 return indent + &sw * 2
198 elseif value == 'NOTATION'
199 " If this is a enumerated value based on notations, read another token
200 " for the actual value. If it doesnt exist, indent three levels.
201 " TODO: If validating according to above, value must be equal to '('.
202 let [value, end] = s:lex(line, end, '^\%((\|[^[:space:]]\+\)')
203 if value == ""
204 return indent + &sw * 3
205 endif
206 endif
207
208 if value == '('
209 let [indent_of_innermost, end] = s:indent_to_innermost_parentheses(line, end)
210 if indent_of_innermost != -1
211 return indent_of_innermost
212 endif
213 endif
214
215 " Finally look for the attributes default value. If non exists, indent
216 " two levels.
Bram Moolenaar00659062010-09-21 22:34:02 +0200217 let [default, end] = s:lex(line, end, '^\%("\_[^"]*"\|#\(REQUIRED\|IMPLIED\|FIXED\)\)')
Bram Moolenaare37d50a2008-08-06 17:06:04 +0000218 if default == ""
219 return indent + &sw * 2
220 elseif default == '#FIXED'
221 " We need to look for the fixed value. If non exists, indent three
222 " levels.
223 let [default, end] = s:lex(line, end, '^"\_[^"]*"')
224 if default == ""
225 return indent + &sw * 3
226 endif
227 endif
228 endwhile
229 elseif declaration == 'ENTITY'
230 " Check for entity name. If none exists, indent one level. Otherwise, if
231 " the name actually turns out to be a percent sign, “%”, this is a
232 " parameter entity. Read another token to determine the entity name and,
233 " again, if none exists, indent one level.
234 let [name, end] = s:lex(line, end)
235 if name == ""
236 return indent + &sw
237 elseif name == '%'
238 let [name, end] = s:lex(line, end)
239 if name == ""
240 return indent + &sw
241 endif
242 endif
243
244 " Now check for the entity value. If none exists, indent one level. If it
245 " does exist, indent to same level as first line, as were now done with
246 " this entity.
247 "
248 " The entity value can be a string in single or double quotes (no escapes
249 " to worry about, as entities are used instead). However, it can also be
250 " that this is an external unparsed entity. In that case we have to look
251 " further for (possibly) a public ID and an URI followed by the NDATA
252 " keyword and the actual notation name. For the public ID and URI, indent
253 " two levels, if they dont exist. If the NDATA keyword doesnt exist,
254 " indent one level. Otherwise, if the actual notation name doesn’t exist,
255 " indent two level. If it does, indent to same level as first line, as
256 " we’re now done with this entity.
257 let [value, end] = s:lex(line, end)
258 if value == ""
259 return indent + &sw
260 elseif value == 'SYSTEM' || value == 'PUBLIC'
261 let [quoted_string, end] = s:lex(line, end, '\%("[^"]\+"\|''[^'']\+''\)')
262 if quoted_string == ""
263 return indent + &sw * 2
264 endif
265
266 if value == 'PUBLIC'
267 let [quoted_string, end] = s:lex(line, end, '\%("[^"]\+"\|''[^'']\+''\)')
268 if quoted_string == ""
269 return indent + &sw * 2
270 endif
271 endif
272
273 let [ndata, end] = s:lex(line, end)
274 if ndata == ""
275 return indent + &sw
276 endif
277
278 let [name, end] = s:lex(line, end)
279 return name == "" ? (indent + &sw * 2) : indent
280 else
281 return indent
282 endif
283 elseif declaration == 'NOTATION'
284 " Check for notation name. If none exists, indent one level.
285 let [name, end] = s:lex(line, end)
286 if name == ""
287 return indent + &sw
288 endif
289
290 " Now check for the external ID. If none exists, indent one level.
291 let [id, end] = s:lex(line, end)
292 if id == ""
293 return indent + &sw
294 elseif id == 'SYSTEM' || id == 'PUBLIC'
295 let [quoted_string, end] = s:lex(line, end, '\%("[^"]\+"\|''[^'']\+''\)')
296 if quoted_string == ""
297 return indent + &sw * 2
298 endif
299
300 if id == 'PUBLIC'
301 let [quoted_string, end] = s:lex(line, end, '\%("[^"]\+"\|''[^'']\+''\|>\)')
302 if quoted_string == ""
303 " TODO: Should use s:lex here on getline(v:lnum) and check for >.
304 return getline(v:lnum) =~ '^\s*>' ? indent : (indent + &sw * 2)
305 elseif quoted_string == '>'
306 return indent
307 endif
308 endif
309 endif
310
311 return indent
312 endif
313
314 " TODO: Processing directives could be indented I suppose. But perhaps it’s
315 " just as well to let the user decide how to indent them (perhaps extending
316 " this function to include proper support for whatever processing directive
317 " language they want to use).
318
319 " Conditional sections are simply passed along to let Vim decide what to do
320 " (and hence the user).
321 return -1
322endfunction
323
324let &cpo = s:cpo_save