Bram Moolenaar | 299d8e5 | 2022-02-13 20:32:02 +0000 | [diff] [blame] | 1 | vim9script |
| 2 | |
| 3 | # Vim function for detecting a filetype from the file contents. |
| 4 | # Invoked from "scripts.vim" in 'runtimepath' |
| 5 | # |
| 6 | # Maintainer: Bram Moolenaar <Bram@vim.org> |
| 7 | # Last Change: 2022 Feb 13 |
| 8 | |
| 9 | export def DetectFiletype() |
| 10 | var line1 = getline(1) |
| 11 | if line1[0] == '#' && line1[1] == '!' |
| 12 | # File that starts with "#!". |
| 13 | DetectFromHashBang(line1) |
| 14 | else |
| 15 | # File does not start with "#!". |
| 16 | DetectFromText(line1) |
| 17 | endif |
| 18 | enddef |
| 19 | |
| 20 | # Called for a script that has "#!" in the first line. |
| 21 | def DetectFromHashBang(firstline: string) |
| 22 | var line1 = firstline |
| 23 | |
| 24 | # Check for a line like "#!/usr/bin/env {options} bash". Turn it into |
| 25 | # "#!/usr/bin/bash" to make matching easier. |
| 26 | # Recognize only a few {options} that are commonly used. |
| 27 | if line1 =~ '^#!\s*\S*\<env\s' |
| 28 | line1 = substitute(line1, '\S\+=\S\+', '', 'g') |
| 29 | line1 = substitute(line1, '\(-[iS]\|--ignore-environment\|--split-string\)', '', '') |
| 30 | line1 = substitute(line1, '\<env\s\+', '', '') |
| 31 | endif |
| 32 | |
| 33 | # Get the program name. |
| 34 | # Only accept spaces in PC style paths: "#!c:/program files/perl [args]". |
| 35 | # If the word env is used, use the first word after the space: |
| 36 | # "#!/usr/bin/env perl [path/args]" |
| 37 | # If there is no path use the first word: "#!perl [path/args]". |
| 38 | # Otherwise get the last word after a slash: "#!/usr/bin/perl [path/args]". |
| 39 | var name: string |
| 40 | if line1 =~ '^#!\s*\a:[/\\]' |
| 41 | name = substitute(line1, '^#!.*[/\\]\(\i\+\).*', '\1', '') |
| 42 | elseif line1 =~ '^#!.*\<env\>' |
| 43 | name = substitute(line1, '^#!.*\<env\>\s\+\(\i\+\).*', '\1', '') |
| 44 | elseif line1 =~ '^#!\s*[^/\\ ]*\>\([^/\\]\|$\)' |
| 45 | name = substitute(line1, '^#!\s*\([^/\\ ]*\>\).*', '\1', '') |
| 46 | else |
| 47 | name = substitute(line1, '^#!\s*\S*[/\\]\(\i\+\).*', '\1', '') |
| 48 | endif |
| 49 | |
| 50 | # tcl scripts may have #!/bin/sh in the first line and "exec wish" in the |
| 51 | # third line. Suggested by Steven Atkinson. |
| 52 | if getline(3) =~ '^exec wish' |
| 53 | name = 'wish' |
| 54 | endif |
| 55 | |
| 56 | # Bourne-like shell scripts: bash bash2 ksh ksh93 sh |
| 57 | if name =~ '^\(bash\d*\|\|ksh\d*\|sh\)\>' |
| 58 | call dist#ft#SetFileTypeSH(line1) |
| 59 | |
| 60 | # csh scripts |
| 61 | elseif name =~ '^csh\>' |
| 62 | if exists("g:filetype_csh") |
| 63 | call dist#ft#SetFileTypeShell(g:filetype_csh) |
| 64 | else |
| 65 | call dist#ft#SetFileTypeShell("csh") |
| 66 | endif |
| 67 | |
| 68 | # tcsh scripts |
| 69 | elseif name =~ '^tcsh\>' |
| 70 | call dist#ft#SetFileTypeShell("tcsh") |
| 71 | |
| 72 | # Z shell scripts |
| 73 | elseif name =~ '^zsh\>' |
| 74 | set ft=zsh |
| 75 | |
| 76 | # TCL scripts |
| 77 | elseif name =~ '^\(tclsh\|wish\|expectk\|itclsh\|itkwish\)\>' |
| 78 | set ft=tcl |
| 79 | |
| 80 | # Expect scripts |
| 81 | elseif name =~ '^expect\>' |
| 82 | set ft=expect |
| 83 | |
| 84 | # Gnuplot scripts |
| 85 | elseif name =~ '^gnuplot\>' |
| 86 | set ft=gnuplot |
| 87 | |
| 88 | # Makefiles |
| 89 | elseif name =~ 'make\>' |
| 90 | set ft=make |
| 91 | |
| 92 | # Pike |
| 93 | elseif name =~ '^pike\%(\>\|[0-9]\)' |
| 94 | set ft=pike |
| 95 | |
| 96 | # Lua |
| 97 | elseif name =~ 'lua' |
| 98 | set ft=lua |
| 99 | |
| 100 | # Perl |
| 101 | elseif name =~ 'perl' |
| 102 | set ft=perl |
| 103 | |
| 104 | # PHP |
| 105 | elseif name =~ 'php' |
| 106 | set ft=php |
| 107 | |
| 108 | # Python |
| 109 | elseif name =~ 'python' |
| 110 | set ft=python |
| 111 | |
| 112 | # Groovy |
| 113 | elseif name =~ '^groovy\>' |
| 114 | set ft=groovy |
| 115 | |
| 116 | # Raku |
| 117 | elseif name =~ 'raku' |
| 118 | set ft=raku |
| 119 | |
| 120 | # Ruby |
| 121 | elseif name =~ 'ruby' |
| 122 | set ft=ruby |
| 123 | |
| 124 | # JavaScript |
| 125 | elseif name =~ 'node\(js\)\=\>\|js\>' || name =~ 'rhino\>' |
| 126 | set ft=javascript |
| 127 | |
| 128 | # BC calculator |
| 129 | elseif name =~ '^bc\>' |
| 130 | set ft=bc |
| 131 | |
| 132 | # sed |
| 133 | elseif name =~ 'sed\>' |
| 134 | set ft=sed |
| 135 | |
| 136 | # OCaml-scripts |
| 137 | elseif name =~ 'ocaml' |
| 138 | set ft=ocaml |
| 139 | |
| 140 | # Awk scripts; also finds "gawk" |
| 141 | elseif name =~ 'awk\>' |
| 142 | set ft=awk |
| 143 | |
| 144 | # Website MetaLanguage |
| 145 | elseif name =~ 'wml' |
| 146 | set ft=wml |
| 147 | |
| 148 | # Scheme scripts |
| 149 | elseif name =~ 'scheme' |
| 150 | set ft=scheme |
| 151 | |
| 152 | # CFEngine scripts |
| 153 | elseif name =~ 'cfengine' |
| 154 | set ft=cfengine |
| 155 | |
| 156 | # Erlang scripts |
| 157 | elseif name =~ 'escript' |
| 158 | set ft=erlang |
| 159 | |
| 160 | # Haskell |
| 161 | elseif name =~ 'haskell' |
| 162 | set ft=haskell |
| 163 | |
| 164 | # Scala |
| 165 | elseif name =~ 'scala\>' |
| 166 | set ft=scala |
| 167 | |
| 168 | # Clojure |
| 169 | elseif name =~ 'clojure' |
| 170 | set ft=clojure |
| 171 | |
| 172 | # Free Pascal |
| 173 | elseif name =~ 'instantfpc\>' |
| 174 | set ft=pascal |
| 175 | |
| 176 | # Fennel |
| 177 | elseif name =~ 'fennel\>' |
| 178 | set ft=fennel |
| 179 | |
| 180 | # MikroTik RouterOS script |
| 181 | elseif name =~ 'rsc\>' |
| 182 | set ft=routeros |
| 183 | |
| 184 | # Fish shell |
| 185 | elseif name =~ 'fish\>' |
| 186 | set ft=fish |
| 187 | |
| 188 | # Gforth |
| 189 | elseif name =~ 'gforth\>' |
| 190 | set ft=forth |
| 191 | |
Bram Moolenaar | bf66146 | 2022-06-16 13:27:18 +0100 | [diff] [blame] | 192 | # Icon |
| 193 | elseif name =~ 'icon\>' |
| 194 | set ft=icon |
| 195 | |
Bram Moolenaar | 3244780 | 2022-07-05 21:56:39 +0100 | [diff] [blame] | 196 | # Guile |
| 197 | elseif name =~ 'guile' |
| 198 | set ft=scheme |
| 199 | |
Bram Moolenaar | 299d8e5 | 2022-02-13 20:32:02 +0000 | [diff] [blame] | 200 | endif |
| 201 | enddef |
| 202 | |
| 203 | |
| 204 | # Called for a script that does not have "#!" in the first line. |
| 205 | def DetectFromText(line1: string) |
| 206 | var line2 = getline(2) |
| 207 | var line3 = getline(3) |
| 208 | var line4 = getline(4) |
| 209 | var line5 = getline(5) |
| 210 | |
| 211 | # Bourne-like shell scripts: sh ksh bash bash2 |
| 212 | if line1 =~ '^:$' |
| 213 | call dist#ft#SetFileTypeSH(line1) |
| 214 | |
| 215 | # Z shell scripts |
| 216 | elseif line1 =~ '^#compdef\>' |
| 217 | || line1 =~ '^#autoload\>' |
| 218 | || "\n" .. line1 .. "\n" .. line2 .. "\n" .. line3 .. |
| 219 | "\n" .. line4 .. "\n" .. line5 |
| 220 | =~ '\n\s*emulate\s\+\%(-[LR]\s\+\)\=[ckz]\=sh\>' |
| 221 | set ft=zsh |
| 222 | |
| 223 | # ELM Mail files |
| 224 | elseif line1 =~ '^From \([a-zA-Z][a-zA-Z_0-9\.=-]*\(@[^ ]*\)\=\|-\) .* \(19\|20\)\d\d$' |
| 225 | set ft=mail |
| 226 | |
| 227 | # Mason |
| 228 | elseif line1 =~ '^<[%&].*>' |
| 229 | set ft=mason |
| 230 | |
| 231 | # Vim scripts (must have '" vim' as the first line to trigger this) |
| 232 | elseif line1 =~ '^" *[vV]im$' |
| 233 | set ft=vim |
| 234 | |
| 235 | # libcxx and libstdc++ standard library headers like "iostream" do not have |
| 236 | # an extension, recognize the Emacs file mode. |
| 237 | elseif line1 =~? '-\*-.*C++.*-\*-' |
| 238 | set ft=cpp |
| 239 | |
| 240 | # MOO |
| 241 | elseif line1 =~ '^\*\* LambdaMOO Database, Format Version \%([1-3]\>\)\@!\d\+ \*\*$' |
| 242 | set ft=moo |
| 243 | |
| 244 | # Diff file: |
| 245 | # - "diff" in first line (context diff) |
| 246 | # - "Only in " in first line |
| 247 | # - "--- " in first line and "+++ " in second line (unified diff). |
| 248 | # - "*** " in first line and "--- " in second line (context diff). |
| 249 | # - "# It was generated by makepatch " in the second line (makepatch diff). |
| 250 | # - "Index: <filename>" in the first line (CVS file) |
| 251 | # - "=== ", line of "=", "---", "+++ " (SVK diff) |
| 252 | # - "=== ", "--- ", "+++ " (bzr diff, common case) |
| 253 | # - "=== (removed|added|renamed|modified)" (bzr diff, alternative) |
| 254 | # - "# HG changeset patch" in first line (Mercurial export format) |
| 255 | elseif line1 =~ '^\(diff\>\|Only in \|\d\+\(,\d\+\)\=[cda]\d\+\>\|# It was generated by makepatch \|Index:\s\+\f\+\r\=$\|===== \f\+ \d\+\.\d\+ vs edited\|==== //\f\+#\d\+\|# HG changeset patch\)' |
| 256 | || (line1 =~ '^--- ' && line2 =~ '^+++ ') |
| 257 | || (line1 =~ '^\* looking for ' && line2 =~ '^\* comparing to ') |
| 258 | || (line1 =~ '^\*\*\* ' && line2 =~ '^--- ') |
| 259 | || (line1 =~ '^=== ' && ((line2 =~ '^=\{66\}' && line3 =~ '^--- ' && line4 =~ '^+++') || (line2 =~ '^--- ' && line3 =~ '^+++ '))) |
| 260 | || (line1 =~ '^=== \(removed\|added\|renamed\|modified\)') |
| 261 | set ft=diff |
| 262 | |
| 263 | # PostScript Files (must have %!PS as the first line, like a2ps output) |
| 264 | elseif line1 =~ '^%![ \t]*PS' |
| 265 | set ft=postscr |
| 266 | |
| 267 | # M4 scripts: Guess there is a line that starts with "dnl". |
| 268 | elseif line1 =~ '^\s*dnl\>' |
| 269 | || line2 =~ '^\s*dnl\>' |
| 270 | || line3 =~ '^\s*dnl\>' |
| 271 | || line4 =~ '^\s*dnl\>' |
| 272 | || line5 =~ '^\s*dnl\>' |
| 273 | set ft=m4 |
| 274 | |
| 275 | # AmigaDos scripts |
| 276 | elseif $TERM == "amiga" && (line1 =~ "^;" || line1 =~? '^\.bra') |
| 277 | set ft=amiga |
| 278 | |
| 279 | # SiCAD scripts (must have procn or procd as the first line to trigger this) |
| 280 | elseif line1 =~? '^ *proc[nd] *$' |
| 281 | set ft=sicad |
| 282 | |
| 283 | # Purify log files start with "**** Purify" |
| 284 | elseif line1 =~ '^\*\*\*\* Purify' |
| 285 | set ft=purifylog |
| 286 | |
| 287 | # XML |
| 288 | elseif line1 =~ '<?\s*xml.*?>' |
| 289 | set ft=xml |
| 290 | |
| 291 | # XHTML (e.g.: PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN") |
| 292 | elseif line1 =~ '\<DTD\s\+XHTML\s' |
| 293 | set ft=xhtml |
| 294 | |
| 295 | # HTML (e.g.: <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN") |
| 296 | # Avoid "doctype html", used by slim. |
| 297 | elseif line1 =~? '<!DOCTYPE\s\+html\>' |
| 298 | set ft=html |
| 299 | |
| 300 | # PDF |
| 301 | elseif line1 =~ '^%PDF-' |
| 302 | set ft=pdf |
| 303 | |
| 304 | # XXD output |
| 305 | elseif line1 =~ '^\x\{7}: \x\{2} \=\x\{2} \=\x\{2} \=\x\{2} ' |
| 306 | set ft=xxd |
| 307 | |
| 308 | # RCS/CVS log output |
| 309 | elseif line1 =~ '^RCS file:' || line2 =~ '^RCS file:' |
| 310 | set ft=rcslog |
| 311 | |
| 312 | # CVS commit |
| 313 | elseif line2 =~ '^CVS:' || getline("$") =~ '^CVS: ' |
| 314 | set ft=cvs |
| 315 | |
| 316 | # Prescribe |
| 317 | elseif line1 =~ '^!R!' |
| 318 | set ft=prescribe |
| 319 | |
| 320 | # Send-pr |
| 321 | elseif line1 =~ '^SEND-PR:' |
| 322 | set ft=sendpr |
| 323 | |
| 324 | # SNNS files |
| 325 | elseif line1 =~ '^SNNS network definition file' |
| 326 | set ft=snnsnet |
| 327 | elseif line1 =~ '^SNNS pattern definition file' |
| 328 | set ft=snnspat |
| 329 | elseif line1 =~ '^SNNS result file' |
| 330 | set ft=snnsres |
| 331 | |
| 332 | # Virata |
| 333 | elseif line1 =~ '^%.\{-}[Vv]irata' |
| 334 | || line2 =~ '^%.\{-}[Vv]irata' |
| 335 | || line3 =~ '^%.\{-}[Vv]irata' |
| 336 | || line4 =~ '^%.\{-}[Vv]irata' |
| 337 | || line5 =~ '^%.\{-}[Vv]irata' |
| 338 | set ft=virata |
| 339 | |
| 340 | # Strace |
| 341 | elseif line1 =~ '[0-9:.]* *execve(' || line1 =~ '^__libc_start_main' |
| 342 | set ft=strace |
| 343 | |
| 344 | # VSE JCL |
| 345 | elseif line1 =~ '^\* $$ JOB\>' || line1 =~ '^// *JOB\>' |
| 346 | set ft=vsejcl |
| 347 | |
| 348 | # TAK and SINDA |
| 349 | elseif line4 =~ 'K & K Associates' || line2 =~ 'TAK 2000' |
| 350 | set ft=takout |
| 351 | elseif line3 =~ 'S Y S T E M S I M P R O V E D ' |
| 352 | set ft=sindaout |
| 353 | elseif getline(6) =~ 'Run Date: ' |
| 354 | set ft=takcmp |
| 355 | elseif getline(9) =~ 'Node File 1' |
| 356 | set ft=sindacmp |
| 357 | |
| 358 | # DNS zone files |
| 359 | elseif line1 .. line2 .. line3 .. line4 =~ '^; <<>> DiG [0-9.]\+.* <<>>\|$ORIGIN\|$TTL\|IN\s\+SOA' |
| 360 | set ft=bindzone |
| 361 | |
| 362 | # BAAN |
| 363 | elseif line1 =~ '|\*\{1,80}' && line2 =~ 'VRC ' |
| 364 | || line2 =~ '|\*\{1,80}' && line3 =~ 'VRC ' |
| 365 | set ft=baan |
| 366 | |
| 367 | # Valgrind |
| 368 | elseif line1 =~ '^==\d\+== valgrind' || line3 =~ '^==\d\+== Using valgrind' |
| 369 | set ft=valgrind |
| 370 | |
| 371 | # Go docs |
| 372 | elseif line1 =~ '^PACKAGE DOCUMENTATION$' |
| 373 | set ft=godoc |
| 374 | |
| 375 | # Renderman Interface Bytestream |
| 376 | elseif line1 =~ '^##RenderMan' |
| 377 | set ft=rib |
| 378 | |
| 379 | # Scheme scripts |
| 380 | elseif line1 =~ 'exec\s\+\S*scheme' || line2 =~ 'exec\s\+\S*scheme' |
| 381 | set ft=scheme |
| 382 | |
| 383 | # Git output |
| 384 | elseif line1 =~ '^\(commit\|tree\|object\) \x\{40,\}\>\|^tag \S\+$' |
| 385 | set ft=git |
| 386 | |
| 387 | # Gprof (gnu profiler) |
| 388 | elseif line1 == 'Flat profile:' |
| 389 | && line2 == '' |
| 390 | && line3 =~ '^Each sample counts as .* seconds.$' |
| 391 | set ft=gprof |
| 392 | |
| 393 | # Erlang terms |
| 394 | # (See also: http://www.gnu.org/software/emacs/manual/html_node/emacs/Choosing-Modes.html#Choosing-Modes) |
| 395 | elseif line1 =~? '-\*-.*erlang.*-\*-' |
| 396 | set ft=erlang |
| 397 | |
| 398 | # YAML |
| 399 | elseif line1 =~ '^%YAML' |
| 400 | set ft=yaml |
| 401 | |
| 402 | # MikroTik RouterOS script |
| 403 | elseif line1 =~ '^#.*by RouterOS.*$' |
| 404 | set ft=routeros |
| 405 | |
| 406 | # Sed scripts |
| 407 | # #ncomment is allowed but most likely a false positive so require a space |
| 408 | # before any trailing comment text |
| 409 | elseif line1 =~ '^#n\%($\|\s\)' |
| 410 | set ft=sed |
| 411 | |
| 412 | else |
| 413 | var lnum = 1 |
| 414 | while getline(lnum) =~ "^? " && lnum < line("$") |
| 415 | lnum += 1 |
| 416 | endwhile |
| 417 | if getline(lnum) =~ '^Index:\s\+\f\+$' |
| 418 | # CVS diff |
| 419 | set ft=diff |
| 420 | |
| 421 | # locale input files: Formal Definitions of Cultural Conventions |
| 422 | # filename must be like en_US, fr_FR@euro or en_US.UTF-8 |
| 423 | elseif expand("%") =~ '\a\a_\a\a\($\|[.@]\)\|i18n$\|POSIX$\|translit_' |
| 424 | lnum = 1 |
| 425 | while lnum < 100 && lnum < line("$") |
| 426 | if getline(lnum) =~ '^LC_\(IDENTIFICATION\|CTYPE\|COLLATE\|MONETARY\|NUMERIC\|TIME\|MESSAGES\|PAPER\|TELEPHONE\|MEASUREMENT\|NAME\|ADDRESS\)$' |
| 427 | setf fdcc |
| 428 | break |
| 429 | endif |
| 430 | lnum += 1 |
| 431 | endwhile |
| 432 | endif |
| 433 | endif |
| 434 | enddef |