blob: cefc3b241dc18978a7c188e25999cb4c9cc974a8 [file] [log] [blame]
Bram Moolenaar299d8e52022-02-13 20:32:02 +00001vim9script
2
3# Vim function for detecting a filetype from the file contents.
4# Invoked from "scripts.vim" in 'runtimepath'
5#
6# Maintainer: Bram Moolenaar <Bram@vim.org>
Bram Moolenaar71badf92023-04-22 22:40:14 +01007# Last Change: 2023 Apr 17
Bram Moolenaar299d8e52022-02-13 20:32:02 +00008
9export def DetectFiletype()
10 var line1 = getline(1)
11 if line1[0] == '#' && line1[1] == '!'
12 # File that starts with "#!".
13 DetectFromHashBang(line1)
14 else
15 # File does not start with "#!".
16 DetectFromText(line1)
17 endif
18enddef
19
20# Called for a script that has "#!" in the first line.
21def DetectFromHashBang(firstline: string)
22 var line1 = firstline
23
24 # Check for a line like "#!/usr/bin/env {options} bash". Turn it into
25 # "#!/usr/bin/bash" to make matching easier.
26 # Recognize only a few {options} that are commonly used.
27 if line1 =~ '^#!\s*\S*\<env\s'
28 line1 = substitute(line1, '\S\+=\S\+', '', 'g')
29 line1 = substitute(line1, '\(-[iS]\|--ignore-environment\|--split-string\)', '', '')
30 line1 = substitute(line1, '\<env\s\+', '', '')
31 endif
32
33 # Get the program name.
34 # Only accept spaces in PC style paths: "#!c:/program files/perl [args]".
35 # If the word env is used, use the first word after the space:
36 # "#!/usr/bin/env perl [path/args]"
37 # If there is no path use the first word: "#!perl [path/args]".
38 # Otherwise get the last word after a slash: "#!/usr/bin/perl [path/args]".
39 var name: string
40 if line1 =~ '^#!\s*\a:[/\\]'
41 name = substitute(line1, '^#!.*[/\\]\(\i\+\).*', '\1', '')
42 elseif line1 =~ '^#!.*\<env\>'
43 name = substitute(line1, '^#!.*\<env\>\s\+\(\i\+\).*', '\1', '')
44 elseif line1 =~ '^#!\s*[^/\\ ]*\>\([^/\\]\|$\)'
45 name = substitute(line1, '^#!\s*\([^/\\ ]*\>\).*', '\1', '')
46 else
47 name = substitute(line1, '^#!\s*\S*[/\\]\(\i\+\).*', '\1', '')
48 endif
49
50 # tcl scripts may have #!/bin/sh in the first line and "exec wish" in the
51 # third line. Suggested by Steven Atkinson.
52 if getline(3) =~ '^exec wish'
53 name = 'wish'
54 endif
55
Eisuke Kawashima24482fb2022-11-24 10:58:10 +000056 # Bourne-like shell scripts: bash bash2 dash ksh ksh93 sh
57 if name =~ '^\(bash\d*\|dash\|ksh\d*\|sh\)\>'
Bram Moolenaar299d8e52022-02-13 20:32:02 +000058 call dist#ft#SetFileTypeSH(line1)
59
60 # csh scripts
61 elseif name =~ '^csh\>'
62 if exists("g:filetype_csh")
63 call dist#ft#SetFileTypeShell(g:filetype_csh)
64 else
65 call dist#ft#SetFileTypeShell("csh")
66 endif
67
68 # tcsh scripts
69 elseif name =~ '^tcsh\>'
70 call dist#ft#SetFileTypeShell("tcsh")
71
72 # Z shell scripts
73 elseif name =~ '^zsh\>'
74 set ft=zsh
75
76 # TCL scripts
77 elseif name =~ '^\(tclsh\|wish\|expectk\|itclsh\|itkwish\)\>'
78 set ft=tcl
79
80 # Expect scripts
81 elseif name =~ '^expect\>'
82 set ft=expect
83
84 # Gnuplot scripts
85 elseif name =~ '^gnuplot\>'
86 set ft=gnuplot
87
88 # Makefiles
89 elseif name =~ 'make\>'
90 set ft=make
91
92 # Pike
93 elseif name =~ '^pike\%(\>\|[0-9]\)'
94 set ft=pike
95
96 # Lua
97 elseif name =~ 'lua'
98 set ft=lua
99
100 # Perl
101 elseif name =~ 'perl'
102 set ft=perl
103
104 # PHP
105 elseif name =~ 'php'
106 set ft=php
107
108 # Python
109 elseif name =~ 'python'
110 set ft=python
111
112 # Groovy
113 elseif name =~ '^groovy\>'
114 set ft=groovy
115
116 # Raku
117 elseif name =~ 'raku'
118 set ft=raku
119
120 # Ruby
121 elseif name =~ 'ruby'
122 set ft=ruby
123
124 # JavaScript
125 elseif name =~ 'node\(js\)\=\>\|js\>' || name =~ 'rhino\>'
126 set ft=javascript
127
128 # BC calculator
129 elseif name =~ '^bc\>'
130 set ft=bc
131
132 # sed
133 elseif name =~ 'sed\>'
134 set ft=sed
135
136 # OCaml-scripts
137 elseif name =~ 'ocaml'
138 set ft=ocaml
139
140 # Awk scripts; also finds "gawk"
141 elseif name =~ 'awk\>'
142 set ft=awk
143
144 # Website MetaLanguage
145 elseif name =~ 'wml'
146 set ft=wml
147
148 # Scheme scripts
149 elseif name =~ 'scheme'
150 set ft=scheme
151
152 # CFEngine scripts
153 elseif name =~ 'cfengine'
154 set ft=cfengine
155
156 # Erlang scripts
157 elseif name =~ 'escript'
158 set ft=erlang
159
160 # Haskell
161 elseif name =~ 'haskell'
162 set ft=haskell
163
164 # Scala
165 elseif name =~ 'scala\>'
166 set ft=scala
167
168 # Clojure
169 elseif name =~ 'clojure'
170 set ft=clojure
171
172 # Free Pascal
173 elseif name =~ 'instantfpc\>'
174 set ft=pascal
175
176 # Fennel
177 elseif name =~ 'fennel\>'
178 set ft=fennel
179
180 # MikroTik RouterOS script
181 elseif name =~ 'rsc\>'
182 set ft=routeros
183
184 # Fish shell
185 elseif name =~ 'fish\>'
186 set ft=fish
187
188 # Gforth
189 elseif name =~ 'gforth\>'
190 set ft=forth
191
Bram Moolenaarbf661462022-06-16 13:27:18 +0100192 # Icon
193 elseif name =~ 'icon\>'
194 set ft=icon
195
Bram Moolenaar32447802022-07-05 21:56:39 +0100196 # Guile
197 elseif name =~ 'guile'
198 set ft=scheme
199
Bram Moolenaar299d8e52022-02-13 20:32:02 +0000200 endif
201enddef
202
203
204# Called for a script that does not have "#!" in the first line.
205def DetectFromText(line1: string)
206 var line2 = getline(2)
207 var line3 = getline(3)
208 var line4 = getline(4)
209 var line5 = getline(5)
210
211 # Bourne-like shell scripts: sh ksh bash bash2
212 if line1 =~ '^:$'
213 call dist#ft#SetFileTypeSH(line1)
214
215 # Z shell scripts
216 elseif line1 =~ '^#compdef\>'
217 || line1 =~ '^#autoload\>'
218 || "\n" .. line1 .. "\n" .. line2 .. "\n" .. line3 ..
219 "\n" .. line4 .. "\n" .. line5
220 =~ '\n\s*emulate\s\+\%(-[LR]\s\+\)\=[ckz]\=sh\>'
221 set ft=zsh
222
223 # ELM Mail files
224 elseif line1 =~ '^From \([a-zA-Z][a-zA-Z_0-9\.=-]*\(@[^ ]*\)\=\|-\) .* \(19\|20\)\d\d$'
225 set ft=mail
226
227 # Mason
228 elseif line1 =~ '^<[%&].*>'
229 set ft=mason
230
231 # Vim scripts (must have '" vim' as the first line to trigger this)
232 elseif line1 =~ '^" *[vV]im$'
233 set ft=vim
234
235 # libcxx and libstdc++ standard library headers like "iostream" do not have
236 # an extension, recognize the Emacs file mode.
237 elseif line1 =~? '-\*-.*C++.*-\*-'
238 set ft=cpp
239
240 # MOO
241 elseif line1 =~ '^\*\* LambdaMOO Database, Format Version \%([1-3]\>\)\@!\d\+ \*\*$'
242 set ft=moo
243
244 # Diff file:
245 # - "diff" in first line (context diff)
246 # - "Only in " in first line
247 # - "--- " in first line and "+++ " in second line (unified diff).
248 # - "*** " in first line and "--- " in second line (context diff).
249 # - "# It was generated by makepatch " in the second line (makepatch diff).
250 # - "Index: <filename>" in the first line (CVS file)
251 # - "=== ", line of "=", "---", "+++ " (SVK diff)
252 # - "=== ", "--- ", "+++ " (bzr diff, common case)
253 # - "=== (removed|added|renamed|modified)" (bzr diff, alternative)
254 # - "# HG changeset patch" in first line (Mercurial export format)
255 elseif line1 =~ '^\(diff\>\|Only in \|\d\+\(,\d\+\)\=[cda]\d\+\>\|# It was generated by makepatch \|Index:\s\+\f\+\r\=$\|===== \f\+ \d\+\.\d\+ vs edited\|==== //\f\+#\d\+\|# HG changeset patch\)'
256 || (line1 =~ '^--- ' && line2 =~ '^+++ ')
257 || (line1 =~ '^\* looking for ' && line2 =~ '^\* comparing to ')
258 || (line1 =~ '^\*\*\* ' && line2 =~ '^--- ')
259 || (line1 =~ '^=== ' && ((line2 =~ '^=\{66\}' && line3 =~ '^--- ' && line4 =~ '^+++') || (line2 =~ '^--- ' && line3 =~ '^+++ ')))
260 || (line1 =~ '^=== \(removed\|added\|renamed\|modified\)')
261 set ft=diff
262
263 # PostScript Files (must have %!PS as the first line, like a2ps output)
264 elseif line1 =~ '^%![ \t]*PS'
265 set ft=postscr
266
267 # M4 scripts: Guess there is a line that starts with "dnl".
268 elseif line1 =~ '^\s*dnl\>'
269 || line2 =~ '^\s*dnl\>'
270 || line3 =~ '^\s*dnl\>'
271 || line4 =~ '^\s*dnl\>'
272 || line5 =~ '^\s*dnl\>'
273 set ft=m4
274
275 # AmigaDos scripts
276 elseif $TERM == "amiga" && (line1 =~ "^;" || line1 =~? '^\.bra')
277 set ft=amiga
278
279 # SiCAD scripts (must have procn or procd as the first line to trigger this)
280 elseif line1 =~? '^ *proc[nd] *$'
281 set ft=sicad
282
283 # Purify log files start with "**** Purify"
284 elseif line1 =~ '^\*\*\*\* Purify'
285 set ft=purifylog
286
287 # XML
288 elseif line1 =~ '<?\s*xml.*?>'
289 set ft=xml
290
291 # XHTML (e.g.: PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN")
292 elseif line1 =~ '\<DTD\s\+XHTML\s'
293 set ft=xhtml
294
295 # HTML (e.g.: <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN")
296 # Avoid "doctype html", used by slim.
297 elseif line1 =~? '<!DOCTYPE\s\+html\>'
298 set ft=html
299
300 # PDF
301 elseif line1 =~ '^%PDF-'
302 set ft=pdf
303
304 # XXD output
305 elseif line1 =~ '^\x\{7}: \x\{2} \=\x\{2} \=\x\{2} \=\x\{2} '
306 set ft=xxd
307
308 # RCS/CVS log output
309 elseif line1 =~ '^RCS file:' || line2 =~ '^RCS file:'
310 set ft=rcslog
311
312 # CVS commit
313 elseif line2 =~ '^CVS:' || getline("$") =~ '^CVS: '
314 set ft=cvs
315
316 # Prescribe
317 elseif line1 =~ '^!R!'
318 set ft=prescribe
319
320 # Send-pr
321 elseif line1 =~ '^SEND-PR:'
322 set ft=sendpr
323
324 # SNNS files
325 elseif line1 =~ '^SNNS network definition file'
326 set ft=snnsnet
327 elseif line1 =~ '^SNNS pattern definition file'
328 set ft=snnspat
329 elseif line1 =~ '^SNNS result file'
330 set ft=snnsres
331
332 # Virata
333 elseif line1 =~ '^%.\{-}[Vv]irata'
334 || line2 =~ '^%.\{-}[Vv]irata'
335 || line3 =~ '^%.\{-}[Vv]irata'
336 || line4 =~ '^%.\{-}[Vv]irata'
337 || line5 =~ '^%.\{-}[Vv]irata'
338 set ft=virata
339
340 # Strace
Federico Mengozzi6e5a9f92023-04-17 22:31:38 +0100341 # inaccurate fast match first, then use accurate slow match
342 elseif (line1 =~ 'execve(' && line1 =~ '^[0-9:.]* *execve(')
343 || line1 =~ '^__libc_start_main'
Bram Moolenaar299d8e52022-02-13 20:32:02 +0000344 set ft=strace
345
346 # VSE JCL
347 elseif line1 =~ '^\* $$ JOB\>' || line1 =~ '^// *JOB\>'
348 set ft=vsejcl
349
350 # TAK and SINDA
351 elseif line4 =~ 'K & K Associates' || line2 =~ 'TAK 2000'
352 set ft=takout
353 elseif line3 =~ 'S Y S T E M S I M P R O V E D '
354 set ft=sindaout
355 elseif getline(6) =~ 'Run Date: '
356 set ft=takcmp
357 elseif getline(9) =~ 'Node File 1'
358 set ft=sindacmp
359
360 # DNS zone files
361 elseif line1 .. line2 .. line3 .. line4 =~ '^; <<>> DiG [0-9.]\+.* <<>>\|$ORIGIN\|$TTL\|IN\s\+SOA'
362 set ft=bindzone
363
364 # BAAN
365 elseif line1 =~ '|\*\{1,80}' && line2 =~ 'VRC '
366 || line2 =~ '|\*\{1,80}' && line3 =~ 'VRC '
367 set ft=baan
368
369 # Valgrind
370 elseif line1 =~ '^==\d\+== valgrind' || line3 =~ '^==\d\+== Using valgrind'
371 set ft=valgrind
372
373 # Go docs
374 elseif line1 =~ '^PACKAGE DOCUMENTATION$'
375 set ft=godoc
376
377 # Renderman Interface Bytestream
378 elseif line1 =~ '^##RenderMan'
379 set ft=rib
380
381 # Scheme scripts
382 elseif line1 =~ 'exec\s\+\S*scheme' || line2 =~ 'exec\s\+\S*scheme'
383 set ft=scheme
384
385 # Git output
386 elseif line1 =~ '^\(commit\|tree\|object\) \x\{40,\}\>\|^tag \S\+$'
387 set ft=git
388
389 # Gprof (gnu profiler)
390 elseif line1 == 'Flat profile:'
391 && line2 == ''
392 && line3 =~ '^Each sample counts as .* seconds.$'
393 set ft=gprof
394
395 # Erlang terms
396 # (See also: http://www.gnu.org/software/emacs/manual/html_node/emacs/Choosing-Modes.html#Choosing-Modes)
397 elseif line1 =~? '-\*-.*erlang.*-\*-'
398 set ft=erlang
399
400 # YAML
401 elseif line1 =~ '^%YAML'
402 set ft=yaml
403
404 # MikroTik RouterOS script
405 elseif line1 =~ '^#.*by RouterOS.*$'
406 set ft=routeros
407
408 # Sed scripts
409 # #ncomment is allowed but most likely a false positive so require a space
410 # before any trailing comment text
411 elseif line1 =~ '^#n\%($\|\s\)'
412 set ft=sed
413
414 else
415 var lnum = 1
416 while getline(lnum) =~ "^? " && lnum < line("$")
417 lnum += 1
418 endwhile
419 if getline(lnum) =~ '^Index:\s\+\f\+$'
420 # CVS diff
421 set ft=diff
422
423 # locale input files: Formal Definitions of Cultural Conventions
424 # filename must be like en_US, fr_FR@euro or en_US.UTF-8
425 elseif expand("%") =~ '\a\a_\a\a\($\|[.@]\)\|i18n$\|POSIX$\|translit_'
426 lnum = 1
427 while lnum < 100 && lnum < line("$")
428 if getline(lnum) =~ '^LC_\(IDENTIFICATION\|CTYPE\|COLLATE\|MONETARY\|NUMERIC\|TIME\|MESSAGES\|PAPER\|TELEPHONE\|MEASUREMENT\|NAME\|ADDRESS\)$'
429 setf fdcc
430 break
431 endif
432 lnum += 1
433 endwhile
434 endif
435 endif
436enddef