blob: 86070a0af9077eb663cf953dc90f7f023be864a3 [file] [log] [blame]
Bram Moolenaar299d8e52022-02-13 20:32:02 +00001vim9script
2
3# Vim function for detecting a filetype from the file contents.
4# Invoked from "scripts.vim" in 'runtimepath'
5#
6# Maintainer: Bram Moolenaar <Bram@vim.org>
7# Last Change: 2022 Feb 13
8
9export def DetectFiletype()
10 var line1 = getline(1)
11 if line1[0] == '#' && line1[1] == '!'
12 # File that starts with "#!".
13 DetectFromHashBang(line1)
14 else
15 # File does not start with "#!".
16 DetectFromText(line1)
17 endif
18enddef
19
20# Called for a script that has "#!" in the first line.
21def DetectFromHashBang(firstline: string)
22 var line1 = firstline
23
24 # Check for a line like "#!/usr/bin/env {options} bash". Turn it into
25 # "#!/usr/bin/bash" to make matching easier.
26 # Recognize only a few {options} that are commonly used.
27 if line1 =~ '^#!\s*\S*\<env\s'
28 line1 = substitute(line1, '\S\+=\S\+', '', 'g')
29 line1 = substitute(line1, '\(-[iS]\|--ignore-environment\|--split-string\)', '', '')
30 line1 = substitute(line1, '\<env\s\+', '', '')
31 endif
32
33 # Get the program name.
34 # Only accept spaces in PC style paths: "#!c:/program files/perl [args]".
35 # If the word env is used, use the first word after the space:
36 # "#!/usr/bin/env perl [path/args]"
37 # If there is no path use the first word: "#!perl [path/args]".
38 # Otherwise get the last word after a slash: "#!/usr/bin/perl [path/args]".
39 var name: string
40 if line1 =~ '^#!\s*\a:[/\\]'
41 name = substitute(line1, '^#!.*[/\\]\(\i\+\).*', '\1', '')
42 elseif line1 =~ '^#!.*\<env\>'
43 name = substitute(line1, '^#!.*\<env\>\s\+\(\i\+\).*', '\1', '')
44 elseif line1 =~ '^#!\s*[^/\\ ]*\>\([^/\\]\|$\)'
45 name = substitute(line1, '^#!\s*\([^/\\ ]*\>\).*', '\1', '')
46 else
47 name = substitute(line1, '^#!\s*\S*[/\\]\(\i\+\).*', '\1', '')
48 endif
49
50 # tcl scripts may have #!/bin/sh in the first line and "exec wish" in the
51 # third line. Suggested by Steven Atkinson.
52 if getline(3) =~ '^exec wish'
53 name = 'wish'
54 endif
55
56 # Bourne-like shell scripts: bash bash2 ksh ksh93 sh
57 if name =~ '^\(bash\d*\|\|ksh\d*\|sh\)\>'
58 call dist#ft#SetFileTypeSH(line1)
59
60 # csh scripts
61 elseif name =~ '^csh\>'
62 if exists("g:filetype_csh")
63 call dist#ft#SetFileTypeShell(g:filetype_csh)
64 else
65 call dist#ft#SetFileTypeShell("csh")
66 endif
67
68 # tcsh scripts
69 elseif name =~ '^tcsh\>'
70 call dist#ft#SetFileTypeShell("tcsh")
71
72 # Z shell scripts
73 elseif name =~ '^zsh\>'
74 set ft=zsh
75
76 # TCL scripts
77 elseif name =~ '^\(tclsh\|wish\|expectk\|itclsh\|itkwish\)\>'
78 set ft=tcl
79
80 # Expect scripts
81 elseif name =~ '^expect\>'
82 set ft=expect
83
84 # Gnuplot scripts
85 elseif name =~ '^gnuplot\>'
86 set ft=gnuplot
87
88 # Makefiles
89 elseif name =~ 'make\>'
90 set ft=make
91
92 # Pike
93 elseif name =~ '^pike\%(\>\|[0-9]\)'
94 set ft=pike
95
96 # Lua
97 elseif name =~ 'lua'
98 set ft=lua
99
100 # Perl
101 elseif name =~ 'perl'
102 set ft=perl
103
104 # PHP
105 elseif name =~ 'php'
106 set ft=php
107
108 # Python
109 elseif name =~ 'python'
110 set ft=python
111
112 # Groovy
113 elseif name =~ '^groovy\>'
114 set ft=groovy
115
116 # Raku
117 elseif name =~ 'raku'
118 set ft=raku
119
120 # Ruby
121 elseif name =~ 'ruby'
122 set ft=ruby
123
124 # JavaScript
125 elseif name =~ 'node\(js\)\=\>\|js\>' || name =~ 'rhino\>'
126 set ft=javascript
127
128 # BC calculator
129 elseif name =~ '^bc\>'
130 set ft=bc
131
132 # sed
133 elseif name =~ 'sed\>'
134 set ft=sed
135
136 # OCaml-scripts
137 elseif name =~ 'ocaml'
138 set ft=ocaml
139
140 # Awk scripts; also finds "gawk"
141 elseif name =~ 'awk\>'
142 set ft=awk
143
144 # Website MetaLanguage
145 elseif name =~ 'wml'
146 set ft=wml
147
148 # Scheme scripts
149 elseif name =~ 'scheme'
150 set ft=scheme
151
152 # CFEngine scripts
153 elseif name =~ 'cfengine'
154 set ft=cfengine
155
156 # Erlang scripts
157 elseif name =~ 'escript'
158 set ft=erlang
159
160 # Haskell
161 elseif name =~ 'haskell'
162 set ft=haskell
163
164 # Scala
165 elseif name =~ 'scala\>'
166 set ft=scala
167
168 # Clojure
169 elseif name =~ 'clojure'
170 set ft=clojure
171
172 # Free Pascal
173 elseif name =~ 'instantfpc\>'
174 set ft=pascal
175
176 # Fennel
177 elseif name =~ 'fennel\>'
178 set ft=fennel
179
180 # MikroTik RouterOS script
181 elseif name =~ 'rsc\>'
182 set ft=routeros
183
184 # Fish shell
185 elseif name =~ 'fish\>'
186 set ft=fish
187
188 # Gforth
189 elseif name =~ 'gforth\>'
190 set ft=forth
191
Bram Moolenaarbf661462022-06-16 13:27:18 +0100192 # Icon
193 elseif name =~ 'icon\>'
194 set ft=icon
195
Bram Moolenaar299d8e52022-02-13 20:32:02 +0000196 endif
197enddef
198
199
200# Called for a script that does not have "#!" in the first line.
201def DetectFromText(line1: string)
202 var line2 = getline(2)
203 var line3 = getline(3)
204 var line4 = getline(4)
205 var line5 = getline(5)
206
207 # Bourne-like shell scripts: sh ksh bash bash2
208 if line1 =~ '^:$'
209 call dist#ft#SetFileTypeSH(line1)
210
211 # Z shell scripts
212 elseif line1 =~ '^#compdef\>'
213 || line1 =~ '^#autoload\>'
214 || "\n" .. line1 .. "\n" .. line2 .. "\n" .. line3 ..
215 "\n" .. line4 .. "\n" .. line5
216 =~ '\n\s*emulate\s\+\%(-[LR]\s\+\)\=[ckz]\=sh\>'
217 set ft=zsh
218
219 # ELM Mail files
220 elseif line1 =~ '^From \([a-zA-Z][a-zA-Z_0-9\.=-]*\(@[^ ]*\)\=\|-\) .* \(19\|20\)\d\d$'
221 set ft=mail
222
223 # Mason
224 elseif line1 =~ '^<[%&].*>'
225 set ft=mason
226
227 # Vim scripts (must have '" vim' as the first line to trigger this)
228 elseif line1 =~ '^" *[vV]im$'
229 set ft=vim
230
231 # libcxx and libstdc++ standard library headers like "iostream" do not have
232 # an extension, recognize the Emacs file mode.
233 elseif line1 =~? '-\*-.*C++.*-\*-'
234 set ft=cpp
235
236 # MOO
237 elseif line1 =~ '^\*\* LambdaMOO Database, Format Version \%([1-3]\>\)\@!\d\+ \*\*$'
238 set ft=moo
239
240 # Diff file:
241 # - "diff" in first line (context diff)
242 # - "Only in " in first line
243 # - "--- " in first line and "+++ " in second line (unified diff).
244 # - "*** " in first line and "--- " in second line (context diff).
245 # - "# It was generated by makepatch " in the second line (makepatch diff).
246 # - "Index: <filename>" in the first line (CVS file)
247 # - "=== ", line of "=", "---", "+++ " (SVK diff)
248 # - "=== ", "--- ", "+++ " (bzr diff, common case)
249 # - "=== (removed|added|renamed|modified)" (bzr diff, alternative)
250 # - "# HG changeset patch" in first line (Mercurial export format)
251 elseif line1 =~ '^\(diff\>\|Only in \|\d\+\(,\d\+\)\=[cda]\d\+\>\|# It was generated by makepatch \|Index:\s\+\f\+\r\=$\|===== \f\+ \d\+\.\d\+ vs edited\|==== //\f\+#\d\+\|# HG changeset patch\)'
252 || (line1 =~ '^--- ' && line2 =~ '^+++ ')
253 || (line1 =~ '^\* looking for ' && line2 =~ '^\* comparing to ')
254 || (line1 =~ '^\*\*\* ' && line2 =~ '^--- ')
255 || (line1 =~ '^=== ' && ((line2 =~ '^=\{66\}' && line3 =~ '^--- ' && line4 =~ '^+++') || (line2 =~ '^--- ' && line3 =~ '^+++ ')))
256 || (line1 =~ '^=== \(removed\|added\|renamed\|modified\)')
257 set ft=diff
258
259 # PostScript Files (must have %!PS as the first line, like a2ps output)
260 elseif line1 =~ '^%![ \t]*PS'
261 set ft=postscr
262
263 # M4 scripts: Guess there is a line that starts with "dnl".
264 elseif line1 =~ '^\s*dnl\>'
265 || line2 =~ '^\s*dnl\>'
266 || line3 =~ '^\s*dnl\>'
267 || line4 =~ '^\s*dnl\>'
268 || line5 =~ '^\s*dnl\>'
269 set ft=m4
270
271 # AmigaDos scripts
272 elseif $TERM == "amiga" && (line1 =~ "^;" || line1 =~? '^\.bra')
273 set ft=amiga
274
275 # SiCAD scripts (must have procn or procd as the first line to trigger this)
276 elseif line1 =~? '^ *proc[nd] *$'
277 set ft=sicad
278
279 # Purify log files start with "**** Purify"
280 elseif line1 =~ '^\*\*\*\* Purify'
281 set ft=purifylog
282
283 # XML
284 elseif line1 =~ '<?\s*xml.*?>'
285 set ft=xml
286
287 # XHTML (e.g.: PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN")
288 elseif line1 =~ '\<DTD\s\+XHTML\s'
289 set ft=xhtml
290
291 # HTML (e.g.: <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN")
292 # Avoid "doctype html", used by slim.
293 elseif line1 =~? '<!DOCTYPE\s\+html\>'
294 set ft=html
295
296 # PDF
297 elseif line1 =~ '^%PDF-'
298 set ft=pdf
299
300 # XXD output
301 elseif line1 =~ '^\x\{7}: \x\{2} \=\x\{2} \=\x\{2} \=\x\{2} '
302 set ft=xxd
303
304 # RCS/CVS log output
305 elseif line1 =~ '^RCS file:' || line2 =~ '^RCS file:'
306 set ft=rcslog
307
308 # CVS commit
309 elseif line2 =~ '^CVS:' || getline("$") =~ '^CVS: '
310 set ft=cvs
311
312 # Prescribe
313 elseif line1 =~ '^!R!'
314 set ft=prescribe
315
316 # Send-pr
317 elseif line1 =~ '^SEND-PR:'
318 set ft=sendpr
319
320 # SNNS files
321 elseif line1 =~ '^SNNS network definition file'
322 set ft=snnsnet
323 elseif line1 =~ '^SNNS pattern definition file'
324 set ft=snnspat
325 elseif line1 =~ '^SNNS result file'
326 set ft=snnsres
327
328 # Virata
329 elseif line1 =~ '^%.\{-}[Vv]irata'
330 || line2 =~ '^%.\{-}[Vv]irata'
331 || line3 =~ '^%.\{-}[Vv]irata'
332 || line4 =~ '^%.\{-}[Vv]irata'
333 || line5 =~ '^%.\{-}[Vv]irata'
334 set ft=virata
335
336 # Strace
337 elseif line1 =~ '[0-9:.]* *execve(' || line1 =~ '^__libc_start_main'
338 set ft=strace
339
340 # VSE JCL
341 elseif line1 =~ '^\* $$ JOB\>' || line1 =~ '^// *JOB\>'
342 set ft=vsejcl
343
344 # TAK and SINDA
345 elseif line4 =~ 'K & K Associates' || line2 =~ 'TAK 2000'
346 set ft=takout
347 elseif line3 =~ 'S Y S T E M S I M P R O V E D '
348 set ft=sindaout
349 elseif getline(6) =~ 'Run Date: '
350 set ft=takcmp
351 elseif getline(9) =~ 'Node File 1'
352 set ft=sindacmp
353
354 # DNS zone files
355 elseif line1 .. line2 .. line3 .. line4 =~ '^; <<>> DiG [0-9.]\+.* <<>>\|$ORIGIN\|$TTL\|IN\s\+SOA'
356 set ft=bindzone
357
358 # BAAN
359 elseif line1 =~ '|\*\{1,80}' && line2 =~ 'VRC '
360 || line2 =~ '|\*\{1,80}' && line3 =~ 'VRC '
361 set ft=baan
362
363 # Valgrind
364 elseif line1 =~ '^==\d\+== valgrind' || line3 =~ '^==\d\+== Using valgrind'
365 set ft=valgrind
366
367 # Go docs
368 elseif line1 =~ '^PACKAGE DOCUMENTATION$'
369 set ft=godoc
370
371 # Renderman Interface Bytestream
372 elseif line1 =~ '^##RenderMan'
373 set ft=rib
374
375 # Scheme scripts
376 elseif line1 =~ 'exec\s\+\S*scheme' || line2 =~ 'exec\s\+\S*scheme'
377 set ft=scheme
378
379 # Git output
380 elseif line1 =~ '^\(commit\|tree\|object\) \x\{40,\}\>\|^tag \S\+$'
381 set ft=git
382
383 # Gprof (gnu profiler)
384 elseif line1 == 'Flat profile:'
385 && line2 == ''
386 && line3 =~ '^Each sample counts as .* seconds.$'
387 set ft=gprof
388
389 # Erlang terms
390 # (See also: http://www.gnu.org/software/emacs/manual/html_node/emacs/Choosing-Modes.html#Choosing-Modes)
391 elseif line1 =~? '-\*-.*erlang.*-\*-'
392 set ft=erlang
393
394 # YAML
395 elseif line1 =~ '^%YAML'
396 set ft=yaml
397
398 # MikroTik RouterOS script
399 elseif line1 =~ '^#.*by RouterOS.*$'
400 set ft=routeros
401
402 # Sed scripts
403 # #ncomment is allowed but most likely a false positive so require a space
404 # before any trailing comment text
405 elseif line1 =~ '^#n\%($\|\s\)'
406 set ft=sed
407
408 else
409 var lnum = 1
410 while getline(lnum) =~ "^? " && lnum < line("$")
411 lnum += 1
412 endwhile
413 if getline(lnum) =~ '^Index:\s\+\f\+$'
414 # CVS diff
415 set ft=diff
416
417 # locale input files: Formal Definitions of Cultural Conventions
418 # filename must be like en_US, fr_FR@euro or en_US.UTF-8
419 elseif expand("%") =~ '\a\a_\a\a\($\|[.@]\)\|i18n$\|POSIX$\|translit_'
420 lnum = 1
421 while lnum < 100 && lnum < line("$")
422 if getline(lnum) =~ '^LC_\(IDENTIFICATION\|CTYPE\|COLLATE\|MONETARY\|NUMERIC\|TIME\|MESSAGES\|PAPER\|TELEPHONE\|MEASUREMENT\|NAME\|ADDRESS\)$'
423 setf fdcc
424 break
425 endif
426 lnum += 1
427 endwhile
428 endif
429 endif
430enddef