Bram Moolenaar | e37d50a | 2008-08-06 17:06:04 +0000 | [diff] [blame] | 1 | " Vim indent file |
Bram Moolenaar | 6e64922 | 2021-10-04 21:32:54 +0100 | [diff] [blame] | 2 | " Language: DTD (Document Type Definition for XML) |
| 3 | " Maintainer: Doug Kearns <dougkearns@gmail.com> |
| 4 | " Previous Maintainer: Nikolai Weibull <now@bitwi.se> |
| 5 | " Last Change: 24 Sep 2021 |
Bram Moolenaar | e37d50a | 2008-08-06 17:06:04 +0000 | [diff] [blame] | 6 | |
Bram Moolenaar | d592deb | 2022-06-17 15:42:40 +0100 | [diff] [blame] | 7 | " Only load this indent file when no other was loaded. |
| 8 | if exists("b:did_indent") |
| 9 | finish |
| 10 | endif |
| 11 | let b:did_indent = 1 |
| 12 | |
Bram Moolenaar | e37d50a | 2008-08-06 17:06:04 +0000 | [diff] [blame] | 13 | setlocal indentexpr=GetDTDIndent() |
| 14 | setlocal indentkeys=!^F,o,O,> |
| 15 | setlocal nosmartindent |
| 16 | |
Bram Moolenaar | 6e64922 | 2021-10-04 21:32:54 +0100 | [diff] [blame] | 17 | let b:undo_indent = "setl inde< indk< si<" |
| 18 | |
Bram Moolenaar | e37d50a | 2008-08-06 17:06:04 +0000 | [diff] [blame] | 19 | if exists("*GetDTDIndent") |
| 20 | finish |
| 21 | endif |
| 22 | |
Bram Moolenaar | fc65cab | 2018-08-28 22:58:02 +0200 | [diff] [blame] | 23 | let s:cpo_save = &cpo |
| 24 | set cpo&vim |
| 25 | |
Bram Moolenaar | e37d50a | 2008-08-06 17:06:04 +0000 | [diff] [blame] | 26 | " TODO: Needs to be adjusted to stop at [, <, and ]. |
| 27 | let s:token_pattern = '^[^[:space:]]\+' |
| 28 | |
| 29 | function s:lex1(input, start, ...) |
| 30 | let pattern = a:0 > 0 ? a:1 : s:token_pattern |
| 31 | let start = matchend(a:input, '^\_s*', a:start) |
| 32 | if start == -1 |
| 33 | return ["", a:start] |
| 34 | endif |
| 35 | let end = matchend(a:input, pattern, start) |
| 36 | if end == -1 |
| 37 | return ["", a:start] |
| 38 | endif |
| 39 | let token = strpart(a:input, start, end - start) |
| 40 | return [token, end] |
| 41 | endfunction |
| 42 | |
| 43 | function s:lex(input, start, ...) |
| 44 | let pattern = a:0 > 0 ? a:1 : s:token_pattern |
| 45 | let info = s:lex1(a:input, a:start, pattern) |
| 46 | while info[0] == '--' |
| 47 | let info = s:lex1(a:input, info[1], pattern) |
| 48 | while info[0] != "" && info[0] != '--' |
| 49 | let info = s:lex1(a:input, info[1], pattern) |
| 50 | endwhile |
| 51 | if info[0] == "" |
| 52 | return info |
| 53 | endif |
| 54 | let info = s:lex1(a:input, info[1], pattern) |
| 55 | endwhile |
| 56 | return info |
| 57 | endfunction |
| 58 | |
| 59 | function s:indent_to_innermost_parentheses(line, end) |
| 60 | let token = '(' |
| 61 | let end = a:end |
| 62 | let parentheses = [end - 1] |
| 63 | while token != "" |
Bram Moolenaar | 9729301 | 2011-07-18 19:40:27 +0200 | [diff] [blame] | 64 | let [token, end] = s:lex(a:line, end, '^\%([(),|]\|[A-Za-z0-9_-]\+\|#P\=CDATA\|%[A-Za-z0-9_-]\+;\)[?*+]\=') |
Bram Moolenaar | e37d50a | 2008-08-06 17:06:04 +0000 | [diff] [blame] | 65 | if token[0] == '(' |
| 66 | call add(parentheses, end - 1) |
| 67 | elseif token[0] == ')' |
| 68 | if len(parentheses) == 1 |
| 69 | return [-1, end] |
| 70 | endif |
| 71 | call remove(parentheses, -1) |
| 72 | endif |
| 73 | endwhile |
| 74 | return [parentheses[-1] - strridx(a:line, "\n", parentheses[-1]), end] |
| 75 | endfunction |
| 76 | |
| 77 | " TODO: Line and end could be script global (think OO members). |
| 78 | function GetDTDIndent() |
| 79 | if v:lnum == 1 |
| 80 | return 0 |
| 81 | endif |
| 82 | |
| 83 | " Begin by searching back for a <! that isn’t inside a comment. |
| 84 | " From here, depending on what follows immediately after, parse to |
| 85 | " where we’re at to determine what to do. |
| 86 | if search('<!', 'bceW') == 0 |
| 87 | return indent(v:lnum - 1) |
| 88 | endif |
| 89 | let lnum = line('.') |
| 90 | let col = col('.') |
| 91 | let indent = indent('.') |
Bram Moolenaar | 0065906 | 2010-09-21 22:34:02 +0200 | [diff] [blame] | 92 | let line = lnum == v:lnum ? getline(lnum) : join(getline(lnum, v:lnum - 1), "\n") |
Bram Moolenaar | e37d50a | 2008-08-06 17:06:04 +0000 | [diff] [blame] | 93 | |
| 94 | let [declaration, end] = s:lex1(line, col) |
| 95 | if declaration == "" |
Bram Moolenaar | 036986f | 2017-03-16 17:41:02 +0100 | [diff] [blame] | 96 | return indent + shiftwidth() |
Bram Moolenaar | e37d50a | 2008-08-06 17:06:04 +0000 | [diff] [blame] | 97 | elseif declaration == '--' |
| 98 | " We’re looking at a comment. Now, simply determine if the comment is |
| 99 | " terminated or not. If it isn’t, let Vim take care of that using |
| 100 | " 'comments' and 'autoindent'. Otherwise, indent to the first lines level. |
| 101 | while declaration != "" |
| 102 | let [declaration, end] = s:lex(line, end) |
| 103 | if declaration == "-->" |
| 104 | return indent |
| 105 | endif |
| 106 | endwhile |
| 107 | return -1 |
| 108 | elseif declaration == 'ELEMENT' |
| 109 | " Check for element name. If none exists, indent one level. |
| 110 | let [name, end] = s:lex(line, end) |
| 111 | if name == "" |
Bram Moolenaar | 036986f | 2017-03-16 17:41:02 +0100 | [diff] [blame] | 112 | return indent + shiftwidth() |
Bram Moolenaar | e37d50a | 2008-08-06 17:06:04 +0000 | [diff] [blame] | 113 | endif |
| 114 | |
| 115 | " Check for token following element name. This can be a specification of |
| 116 | " whether the start or end tag may be omitted. If nothing is found, indent |
| 117 | " one level. |
Bram Moolenaar | 0065906 | 2010-09-21 22:34:02 +0200 | [diff] [blame] | 118 | let [token, end] = s:lex(line, end, '^\%([-O(]\|ANY\|EMPTY\)') |
Bram Moolenaar | e37d50a | 2008-08-06 17:06:04 +0000 | [diff] [blame] | 119 | let n = 0 |
| 120 | while token =~ '[-O]' && n < 2 |
| 121 | let [token, end] = s:lex(line, end, '^\%([-O(]\|ANY\|EMPTY\)') |
| 122 | let n += 1 |
| 123 | endwhile |
| 124 | if token == "" |
Bram Moolenaar | 036986f | 2017-03-16 17:41:02 +0100 | [diff] [blame] | 125 | return indent + shiftwidth() |
Bram Moolenaar | e37d50a | 2008-08-06 17:06:04 +0000 | [diff] [blame] | 126 | endif |
| 127 | |
| 128 | " Next comes the content model. If the token we’ve found isn’t a |
| 129 | " parenthesis it must be either ANY, EMPTY or some random junk. Either |
| 130 | " way, we’re done indenting this element, so set it to that of the first |
Bram Moolenaar | 6c391a7 | 2021-09-09 21:55:11 +0200 | [diff] [blame] | 131 | " line so that the terminating “>” winds up having the same indentation. |
Bram Moolenaar | e37d50a | 2008-08-06 17:06:04 +0000 | [diff] [blame] | 132 | if token != '(' |
| 133 | return indent |
| 134 | endif |
| 135 | |
| 136 | " Now go through the content model. We need to keep track of the nesting |
| 137 | " of parentheses. As soon as we hit 0 we’re done. If that happens we must |
Bram Moolenaar | 6c391a7 | 2021-09-09 21:55:11 +0200 | [diff] [blame] | 138 | " have a complete content model. Thus set indentation to be the same as that |
Bram Moolenaar | e37d50a | 2008-08-06 17:06:04 +0000 | [diff] [blame] | 139 | " of the first line so that the terminating “>” winds up having the same |
Bram Moolenaar | 6c391a7 | 2021-09-09 21:55:11 +0200 | [diff] [blame] | 140 | " indentation. Otherwise, we’ll indent to the innermost parentheses not yet |
Bram Moolenaar | e37d50a | 2008-08-06 17:06:04 +0000 | [diff] [blame] | 141 | " matched. |
| 142 | let [indent_of_innermost, end] = s:indent_to_innermost_parentheses(line, end) |
| 143 | if indent_of_innermost != -1 |
| 144 | return indent_of_innermost |
| 145 | endif |
| 146 | |
| 147 | " Finally, look for any additions and/or exceptions to the content model. |
| 148 | " This is defined by a “+” or “-” followed by another content model |
| 149 | " declaration. |
| 150 | " TODO: Can the “-” be separated by whitespace from the “(”? |
| 151 | let seen = { '+(': 0, '-(': 0 } |
| 152 | while 1 |
| 153 | let [additions_exceptions, end] = s:lex(line, end, '^[+-](') |
| 154 | if additions_exceptions != '+(' && additions_exceptions != '-(' |
| 155 | let [token, end] = s:lex(line, end) |
| 156 | if token == '>' |
| 157 | return indent |
| 158 | endif |
| 159 | " TODO: Should use s:lex here on getline(v:lnum) and check for >. |
Bram Moolenaar | 036986f | 2017-03-16 17:41:02 +0100 | [diff] [blame] | 160 | return getline(v:lnum) =~ '^\s*>' || count(values(seen), 0) == 0 ? indent : (indent + shiftwidth()) |
Bram Moolenaar | e37d50a | 2008-08-06 17:06:04 +0000 | [diff] [blame] | 161 | endif |
| 162 | |
| 163 | " If we’ve seen an addition or exception already and this is of the same |
| 164 | " kind, the user is writing a broken DTD. Time to bail. |
| 165 | if seen[additions_exceptions] |
| 166 | return indent |
| 167 | endif |
| 168 | let seen[additions_exceptions] = 1 |
| 169 | |
| 170 | let [indent_of_innermost, end] = s:indent_to_innermost_parentheses(line, end) |
| 171 | if indent_of_innermost != -1 |
| 172 | return indent_of_innermost |
| 173 | endif |
| 174 | endwhile |
| 175 | elseif declaration == 'ATTLIST' |
| 176 | " Check for element name. If none exists, indent one level. |
| 177 | let [name, end] = s:lex(line, end) |
| 178 | if name == "" |
Bram Moolenaar | 036986f | 2017-03-16 17:41:02 +0100 | [diff] [blame] | 179 | return indent + shiftwidth() |
Bram Moolenaar | e37d50a | 2008-08-06 17:06:04 +0000 | [diff] [blame] | 180 | endif |
| 181 | |
| 182 | " Check for any number of attributes. |
| 183 | while 1 |
| 184 | " Check for attribute name. If none exists, indent one level, unless the |
| 185 | " current line is a lone “>”, in which case we indent to the same level |
| 186 | " as the first line. Otherwise, if the attribute name is “>”, we have |
| 187 | " actually hit the end of the attribute list, in which case we indent to |
| 188 | " the same level as the first line. |
| 189 | let [name, end] = s:lex(line, end) |
| 190 | if name == "" |
| 191 | " TODO: Should use s:lex here on getline(v:lnum) and check for >. |
Bram Moolenaar | 036986f | 2017-03-16 17:41:02 +0100 | [diff] [blame] | 192 | return getline(v:lnum) =~ '^\s*>' ? indent : (indent + shiftwidth()) |
Bram Moolenaar | e37d50a | 2008-08-06 17:06:04 +0000 | [diff] [blame] | 193 | elseif name == ">" |
| 194 | return indent |
| 195 | endif |
| 196 | |
| 197 | " Check for attribute value declaration. If none exists, indent two |
| 198 | " levels. Otherwise, if it’s an enumerated value, check for nested |
| 199 | " parentheses and indent to the innermost one if we don’t reach the end |
| 200 | " of the listc. Otherwise, just continue with looking for the default |
| 201 | " attribute value. |
| 202 | " TODO: Do validation of keywords |
| 203 | " (CDATA|NMTOKEN|NMTOKENS|ID|IDREF|IDREFS|ENTITY|ENTITIES)? |
| 204 | let [value, end] = s:lex(line, end, '^\%((\|[^[:space:]]\+\)') |
| 205 | if value == "" |
Bram Moolenaar | 036986f | 2017-03-16 17:41:02 +0100 | [diff] [blame] | 206 | return indent + shiftwidth() * 2 |
Bram Moolenaar | e37d50a | 2008-08-06 17:06:04 +0000 | [diff] [blame] | 207 | elseif value == 'NOTATION' |
| 208 | " If this is a enumerated value based on notations, read another token |
| 209 | " for the actual value. If it doesn’t exist, indent three levels. |
| 210 | " TODO: If validating according to above, value must be equal to '('. |
| 211 | let [value, end] = s:lex(line, end, '^\%((\|[^[:space:]]\+\)') |
| 212 | if value == "" |
Bram Moolenaar | 036986f | 2017-03-16 17:41:02 +0100 | [diff] [blame] | 213 | return indent + shiftwidth() * 3 |
Bram Moolenaar | e37d50a | 2008-08-06 17:06:04 +0000 | [diff] [blame] | 214 | endif |
| 215 | endif |
| 216 | |
| 217 | if value == '(' |
| 218 | let [indent_of_innermost, end] = s:indent_to_innermost_parentheses(line, end) |
| 219 | if indent_of_innermost != -1 |
| 220 | return indent_of_innermost |
| 221 | endif |
| 222 | endif |
| 223 | |
| 224 | " Finally look for the attribute’s default value. If non exists, indent |
| 225 | " two levels. |
Bram Moolenaar | 0065906 | 2010-09-21 22:34:02 +0200 | [diff] [blame] | 226 | let [default, end] = s:lex(line, end, '^\%("\_[^"]*"\|#\(REQUIRED\|IMPLIED\|FIXED\)\)') |
Bram Moolenaar | e37d50a | 2008-08-06 17:06:04 +0000 | [diff] [blame] | 227 | if default == "" |
Bram Moolenaar | 036986f | 2017-03-16 17:41:02 +0100 | [diff] [blame] | 228 | return indent + shiftwidth() * 2 |
Bram Moolenaar | e37d50a | 2008-08-06 17:06:04 +0000 | [diff] [blame] | 229 | elseif default == '#FIXED' |
| 230 | " We need to look for the fixed value. If non exists, indent three |
| 231 | " levels. |
| 232 | let [default, end] = s:lex(line, end, '^"\_[^"]*"') |
| 233 | if default == "" |
Bram Moolenaar | 036986f | 2017-03-16 17:41:02 +0100 | [diff] [blame] | 234 | return indent + shiftwidth() * 3 |
Bram Moolenaar | e37d50a | 2008-08-06 17:06:04 +0000 | [diff] [blame] | 235 | endif |
| 236 | endif |
| 237 | endwhile |
| 238 | elseif declaration == 'ENTITY' |
| 239 | " Check for entity name. If none exists, indent one level. Otherwise, if |
| 240 | " the name actually turns out to be a percent sign, “%”, this is a |
| 241 | " parameter entity. Read another token to determine the entity name and, |
| 242 | " again, if none exists, indent one level. |
| 243 | let [name, end] = s:lex(line, end) |
| 244 | if name == "" |
Bram Moolenaar | 036986f | 2017-03-16 17:41:02 +0100 | [diff] [blame] | 245 | return indent + shiftwidth() |
Bram Moolenaar | e37d50a | 2008-08-06 17:06:04 +0000 | [diff] [blame] | 246 | elseif name == '%' |
| 247 | let [name, end] = s:lex(line, end) |
| 248 | if name == "" |
Bram Moolenaar | 036986f | 2017-03-16 17:41:02 +0100 | [diff] [blame] | 249 | return indent + shiftwidth() |
Bram Moolenaar | e37d50a | 2008-08-06 17:06:04 +0000 | [diff] [blame] | 250 | endif |
| 251 | endif |
| 252 | |
| 253 | " Now check for the entity value. If none exists, indent one level. If it |
| 254 | " does exist, indent to same level as first line, as we’re now done with |
| 255 | " this entity. |
| 256 | " |
| 257 | " The entity value can be a string in single or double quotes (no escapes |
| 258 | " to worry about, as entities are used instead). However, it can also be |
| 259 | " that this is an external unparsed entity. In that case we have to look |
| 260 | " further for (possibly) a public ID and an URI followed by the NDATA |
| 261 | " keyword and the actual notation name. For the public ID and URI, indent |
| 262 | " two levels, if they don’t exist. If the NDATA keyword doesn’t exist, |
| 263 | " indent one level. Otherwise, if the actual notation name doesn’t exist, |
| 264 | " indent two level. If it does, indent to same level as first line, as |
| 265 | " we’re now done with this entity. |
| 266 | let [value, end] = s:lex(line, end) |
| 267 | if value == "" |
Bram Moolenaar | 036986f | 2017-03-16 17:41:02 +0100 | [diff] [blame] | 268 | return indent + shiftwidth() |
Bram Moolenaar | e37d50a | 2008-08-06 17:06:04 +0000 | [diff] [blame] | 269 | elseif value == 'SYSTEM' || value == 'PUBLIC' |
| 270 | let [quoted_string, end] = s:lex(line, end, '\%("[^"]\+"\|''[^'']\+''\)') |
| 271 | if quoted_string == "" |
Bram Moolenaar | 036986f | 2017-03-16 17:41:02 +0100 | [diff] [blame] | 272 | return indent + shiftwidth() * 2 |
Bram Moolenaar | e37d50a | 2008-08-06 17:06:04 +0000 | [diff] [blame] | 273 | endif |
| 274 | |
| 275 | if value == 'PUBLIC' |
| 276 | let [quoted_string, end] = s:lex(line, end, '\%("[^"]\+"\|''[^'']\+''\)') |
| 277 | if quoted_string == "" |
Bram Moolenaar | 036986f | 2017-03-16 17:41:02 +0100 | [diff] [blame] | 278 | return indent + shiftwidth() * 2 |
Bram Moolenaar | e37d50a | 2008-08-06 17:06:04 +0000 | [diff] [blame] | 279 | endif |
| 280 | endif |
| 281 | |
| 282 | let [ndata, end] = s:lex(line, end) |
| 283 | if ndata == "" |
Bram Moolenaar | 036986f | 2017-03-16 17:41:02 +0100 | [diff] [blame] | 284 | return indent + shiftwidth() |
Bram Moolenaar | e37d50a | 2008-08-06 17:06:04 +0000 | [diff] [blame] | 285 | endif |
| 286 | |
| 287 | let [name, end] = s:lex(line, end) |
Bram Moolenaar | 036986f | 2017-03-16 17:41:02 +0100 | [diff] [blame] | 288 | return name == "" ? (indent + shiftwidth() * 2) : indent |
Bram Moolenaar | e37d50a | 2008-08-06 17:06:04 +0000 | [diff] [blame] | 289 | else |
| 290 | return indent |
| 291 | endif |
| 292 | elseif declaration == 'NOTATION' |
| 293 | " Check for notation name. If none exists, indent one level. |
| 294 | let [name, end] = s:lex(line, end) |
| 295 | if name == "" |
Bram Moolenaar | 036986f | 2017-03-16 17:41:02 +0100 | [diff] [blame] | 296 | return indent + shiftwidth() |
Bram Moolenaar | e37d50a | 2008-08-06 17:06:04 +0000 | [diff] [blame] | 297 | endif |
| 298 | |
| 299 | " Now check for the external ID. If none exists, indent one level. |
| 300 | let [id, end] = s:lex(line, end) |
| 301 | if id == "" |
Bram Moolenaar | 036986f | 2017-03-16 17:41:02 +0100 | [diff] [blame] | 302 | return indent + shiftwidth() |
Bram Moolenaar | e37d50a | 2008-08-06 17:06:04 +0000 | [diff] [blame] | 303 | elseif id == 'SYSTEM' || id == 'PUBLIC' |
| 304 | let [quoted_string, end] = s:lex(line, end, '\%("[^"]\+"\|''[^'']\+''\)') |
| 305 | if quoted_string == "" |
Bram Moolenaar | 036986f | 2017-03-16 17:41:02 +0100 | [diff] [blame] | 306 | return indent + shiftwidth() * 2 |
Bram Moolenaar | e37d50a | 2008-08-06 17:06:04 +0000 | [diff] [blame] | 307 | endif |
| 308 | |
| 309 | if id == 'PUBLIC' |
| 310 | let [quoted_string, end] = s:lex(line, end, '\%("[^"]\+"\|''[^'']\+''\|>\)') |
| 311 | if quoted_string == "" |
| 312 | " TODO: Should use s:lex here on getline(v:lnum) and check for >. |
Bram Moolenaar | 036986f | 2017-03-16 17:41:02 +0100 | [diff] [blame] | 313 | return getline(v:lnum) =~ '^\s*>' ? indent : (indent + shiftwidth() * 2) |
Bram Moolenaar | e37d50a | 2008-08-06 17:06:04 +0000 | [diff] [blame] | 314 | elseif quoted_string == '>' |
| 315 | return indent |
| 316 | endif |
| 317 | endif |
| 318 | endif |
| 319 | |
| 320 | return indent |
| 321 | endif |
| 322 | |
| 323 | " TODO: Processing directives could be indented I suppose. But perhaps it’s |
| 324 | " just as well to let the user decide how to indent them (perhaps extending |
| 325 | " this function to include proper support for whatever processing directive |
| 326 | " language they want to use). |
| 327 | |
| 328 | " Conditional sections are simply passed along to let Vim decide what to do |
| 329 | " (and hence the user). |
| 330 | return -1 |
| 331 | endfunction |
| 332 | |
| 333 | let &cpo = s:cpo_save |
Bram Moolenaar | 61d35bd | 2012-03-28 20:51:51 +0200 | [diff] [blame] | 334 | unlet s:cpo_save |