patch 8.2.2400: Vim9: compiled functions are not profiled

Problem:    Vim9: compiled functions are not profiled.
Solution:   Add initial changes to profile compiled functions.  Fix that a
            script-local function was hard to debug.
diff --git a/src/debugger.c b/src/debugger.c
index f745761..4e8a173 100644
--- a/src/debugger.c
+++ b/src/debugger.c
@@ -864,7 +864,7 @@
  */
     static linenr_T
 debuggy_find(
-    int		file,	    // TRUE for a file, FALSE for a function
+    int		is_file,    // TRUE for a file, FALSE for a function
     char_u	*fname,	    // file or function name
     linenr_T	after,	    // after this line number
     garray_T	*gap,	    // either &dbg_breakp or &prof_ga
@@ -873,20 +873,25 @@
     struct debuggy *bp;
     int		i;
     linenr_T	lnum = 0;
-    char_u	*name = fname;
+    char_u	*name = NULL;
+    char_u	*short_name = fname;
     int		prev_got_int;
 
     // Return quickly when there are no breakpoints.
     if (gap->ga_len == 0)
 	return (linenr_T)0;
 
-    // Replace K_SNR in function name with "<SNR>".
-    if (!file && fname[0] == K_SPECIAL)
+    // For a script-local function remove the prefix, so that
+    // "profile func Func" matches "Func" in any script.  Otherwise it's very
+    // difficult to profile/debug a script-local function.  It may match a
+    // function in the wrong script, but that is much better than not being
+    // able to profile/debug a function in a script with unknown ID.
+    // Also match a script-specific name.
+    if (!is_file && fname[0] == K_SPECIAL)
     {
+	short_name = vim_strchr(fname, '_') + 1;
 	name = alloc(STRLEN(fname) + 3);
-	if (name == NULL)
-	    name = fname;
-	else
+	if (name != NULL)
 	{
 	    STRCPY(name, "<SNR>");
 	    STRCPY(name + 5, fname + 3);
@@ -898,8 +903,8 @@
 	// Skip entries that are not useful or are for a line that is beyond
 	// an already found breakpoint.
 	bp = &DEBUGGY(gap, i);
-	if (((bp->dbg_type == DBG_FILE) == file &&
-		bp->dbg_type != DBG_EXPR && (
+	if (((bp->dbg_type == DBG_FILE) == is_file
+		    && bp->dbg_type != DBG_EXPR && (
 #ifdef FEAT_PROFILE
 		gap == &prof_ga ||
 #endif
@@ -910,7 +915,10 @@
 	    // while matching should abort it.
 	    prev_got_int = got_int;
 	    got_int = FALSE;
-	    if (vim_regexec_prog(&bp->dbg_prog, FALSE, name, (colnr_T)0))
+	    if ((name != NULL
+		   && vim_regexec_prog(&bp->dbg_prog, FALSE, name, (colnr_T)0))
+		    || vim_regexec_prog(&bp->dbg_prog, FALSE,
+						       short_name, (colnr_T)0))
 	    {
 		lnum = bp->dbg_lnum;
 		if (fp != NULL)
diff --git a/src/ex_cmds.h b/src/ex_cmds.h
index 30f5221..01c6156 100644
--- a/src/ex_cmds.h
+++ b/src/ex_cmds.h
@@ -489,7 +489,7 @@
 	EX_BANG|EX_EXTRA|EX_TRLBAR|EX_CMDWIN|EX_LOCK_OK,
 	ADDR_NONE),
 EXCMD(CMD_disassemble,	"disassemble",	ex_disassemble,
-	EX_EXTRA|EX_NEEDARG|EX_TRLBAR|EX_CMDWIN|EX_LOCK_OK,
+	EX_BANG|EX_EXTRA|EX_NEEDARG|EX_TRLBAR|EX_CMDWIN|EX_LOCK_OK,
 	ADDR_NONE),
 EXCMD(CMD_djump,	"djump",	ex_findpat,
 	EX_BANG|EX_RANGE|EX_DFLALL|EX_WHOLEFOLD|EX_EXTRA,
diff --git a/src/ex_docmd.c b/src/ex_docmd.c
index b9ae13d..3dec65e 100644
--- a/src/ex_docmd.c
+++ b/src/ex_docmd.c
@@ -866,7 +866,7 @@
 	    if (do_profiling == PROF_YES)
 	    {
 		if (getline_is_func)
-		    func_line_start(real_cookie);
+		    func_line_start(real_cookie, SOURCING_LNUM);
 		else if (getline_equal(fgetline, cookie, getsourceline))
 		    script_line_start();
 	    }
diff --git a/src/profiler.c b/src/profiler.c
index 7896897..46ab3e3 100644
--- a/src/profiler.c
+++ b/src/profiler.c
@@ -555,6 +555,51 @@
 }
 
 /*
+ * When calling a function: may initialize for profiling.
+ */
+    void
+profile_may_start_func(profinfo_T *info, ufunc_T *fp, funccall_T *fc)
+{
+    if (do_profiling == PROF_YES)
+    {
+	if (!fp->uf_profiling && has_profiling(FALSE, fp->uf_name, NULL))
+	{
+	    info->pi_started_profiling = TRUE;
+	    func_do_profile(fp);
+	}
+	if (fp->uf_profiling
+		    || (fc->caller != NULL && fc->caller->func->uf_profiling))
+	{
+	    ++fp->uf_tm_count;
+	    profile_start(&info->pi_call_start);
+	    profile_zero(&fp->uf_tm_children);
+	}
+	script_prof_save(&info->pi_wait_start);
+    }
+}
+
+/*
+ * After calling a function: may handle profiling.  profile_may_start_func()
+ * must have been called previously.
+ */
+    void
+profile_may_end_func(profinfo_T *info, ufunc_T *fp, funccall_T *fc)
+{
+    profile_end(&info->pi_call_start);
+    profile_sub_wait(&info->pi_wait_start, &info->pi_call_start);
+    profile_add(&fp->uf_tm_total, &info->pi_call_start);
+    profile_self(&fp->uf_tm_self, &info->pi_call_start, &fp->uf_tm_children);
+    if (fc->caller != NULL && fc->caller->func->uf_profiling)
+    {
+	profile_add(&fc->caller->func->uf_tm_children, &info->pi_call_start);
+	profile_add(&fc->caller->func->uf_tml_children, &info->pi_call_start);
+    }
+    if (info->pi_started_profiling)
+	// make a ":profdel func" stop profiling the function
+	fp->uf_profiling = FALSE;
+}
+
+/*
  * Prepare profiling for entering a child or something else that is not
  * counted for the script/function itself.
  * Should always be called in pair with prof_child_exit().
@@ -597,15 +642,14 @@
  * until later and we need to store the time now.
  */
     void
-func_line_start(void *cookie)
+func_line_start(void *cookie, long lnum)
 {
     funccall_T	*fcp = (funccall_T *)cookie;
     ufunc_T	*fp = fcp->func;
 
-    if (fp->uf_profiling && SOURCING_LNUM >= 1
-				      && SOURCING_LNUM <= fp->uf_lines.ga_len)
+    if (fp->uf_profiling && lnum >= 1 && lnum <= fp->uf_lines.ga_len)
     {
-	fp->uf_tml_idx = SOURCING_LNUM - 1;
+	fp->uf_tml_idx = lnum - 1;
 	// Skip continuation lines.
 	while (fp->uf_tml_idx > 0 && FUNCLINE(fp, fp->uf_tml_idx) == NULL)
 	    --fp->uf_tml_idx;
diff --git a/src/proto/profiler.pro b/src/proto/profiler.pro
index 5e0b6a2..b2070df 100644
--- a/src/proto/profiler.pro
+++ b/src/proto/profiler.pro
@@ -19,9 +19,11 @@
 void prof_inchar_exit(void);
 int prof_def_func(void);
 void func_do_profile(ufunc_T *fp);
+void profile_may_start_func(profinfo_T *info, ufunc_T *fp, funccall_T *fc);
+void profile_may_end_func(profinfo_T *info, ufunc_T *fp, funccall_T *fc);
 void prof_child_enter(proftime_T *tm);
 void prof_child_exit(proftime_T *tm);
-void func_line_start(void *cookie);
+void func_line_start(void *cookie, long lnum);
 void func_line_exec(void *cookie);
 void func_line_end(void *cookie);
 void script_do_profile(scriptitem_T *si);
diff --git a/src/proto/vim9compile.pro b/src/proto/vim9compile.pro
index d1d1b0c..04c3c5e 100644
--- a/src/proto/vim9compile.pro
+++ b/src/proto/vim9compile.pro
@@ -3,6 +3,7 @@
 int check_compare_types(exprtype_T type, typval_T *tv1, typval_T *tv2);
 int use_typecheck(type_T *actual, type_T *expected);
 int need_type(type_T *actual, type_T *expected, int offset, int arg_idx, cctx_T *cctx, int silent, int actual_is_const);
+int func_needs_compiling(ufunc_T *ufunc, int profile);
 int get_script_item_idx(int sid, char_u *name, int check_writable, cctx_T *cctx);
 imported_T *find_imported(char_u *name, size_t len, cctx_T *cctx);
 imported_T *find_imported_in_script(char_u *name, size_t len, int sid);
@@ -15,7 +16,7 @@
 int assignment_len(char_u *p, int *heredoc);
 void vim9_declare_error(char_u *name);
 int check_vim9_unlet(char_u *name);
-int compile_def_function(ufunc_T *ufunc, int check_return_type, cctx_T *outer_cctx);
+int compile_def_function(ufunc_T *ufunc, int check_return_type, int profiling, cctx_T *outer_cctx);
 void set_function_type(ufunc_T *ufunc);
 void delete_instr(isn_T *isn);
 void unlink_def_function(ufunc_T *ufunc);
diff --git a/src/structs.h b/src/structs.h
index d6bf667..b092b45 100644
--- a/src/structs.h
+++ b/src/structs.h
@@ -1577,7 +1577,7 @@
 #if defined(FEAT_EVAL) || defined(PROTO)
 typedef struct funccall_S funccall_T;
 
-// values used for "uf_dfunc_idx"
+// values used for "uf_def_status"
 typedef enum {
     UF_NOT_COMPILED,
     UF_TO_BE_COMPILED,
@@ -1899,6 +1899,13 @@
 } sn_prl_T;
 
 #  define PRL_ITEM(si, idx)	(((sn_prl_T *)(si)->sn_prl_ga.ga_data)[(idx)])
+
+typedef struct {
+    int		pi_started_profiling;
+    proftime_T	pi_wait_start;
+    proftime_T	pi_call_start;
+} profinfo_T;
+
 # endif
 #else
 // dummy typedefs for use in function prototypes
diff --git a/src/testdir/test_profile.vim b/src/testdir/test_profile.vim
index 7bac118..9c7cb5c 100644
--- a/src/testdir/test_profile.vim
+++ b/src/testdir/test_profile.vim
@@ -7,22 +7,27 @@
 source screendump.vim
 
 func Test_profile_func()
+  call RunProfileFunc('func', 'let', 'let')
+  call RunProfileFunc('def', 'var', '')
+endfunc
+
+func RunProfileFunc(command, declare, assign)
   let lines =<< trim [CODE]
     profile start Xprofile_func.log
     profile func Foo*
-    func! Foo1()
-    endfunc
-    func! Foo2()
-      let l:count = 100
-      while l:count > 0
-        let l:count = l:count - 1
+    XXX Foo1()
+    endXXX
+    XXX Foo2()
+      DDD counter = 100
+      while counter > 0
+        AAA counter = counter - 1
       endwhile
       sleep 1m
-    endfunc
-    func! Foo3()
-    endfunc
-    func! Bar()
-    endfunc
+    endXXX
+    XXX Foo3()
+    endXXX
+    XXX Bar()
+    endXXX
     call Foo1()
     call Foo1()
     profile pause
@@ -37,6 +42,10 @@
     delfunc Foo3
   [CODE]
 
+  call map(lines, {k, v -> substitute(v, 'XXX', a:command, '') })
+  call map(lines, {k, v -> substitute(v, 'DDD', a:declare, '') })
+  call map(lines, {k, v -> substitute(v, 'AAA', a:assign, '') })
+
   call writefile(lines, 'Xprofile_func.vim')
   call system(GetVimCommand()
     \ . ' -es --clean'
@@ -69,10 +78,10 @@
   call assert_match('^ Self time:\s\+\d\+\.\d\+$',                 lines[12])
   call assert_equal('',                                            lines[13])
   call assert_equal('count  total (s)   self (s)',                 lines[14])
-  call assert_match('^\s*1\s\+.*\slet l:count = 100$',             lines[15])
-  call assert_match('^\s*101\s\+.*\swhile l:count > 0$',           lines[16])
-  call assert_match('^\s*100\s\+.*\s  let l:count = l:count - 1$', lines[17])
-  call assert_match('^\s*101\s\+.*\sendwhile$',                    lines[18])
+  call assert_match('^\s*1\s\+.*\s\(let\|var\) counter = 100$',    lines[15])
+  call assert_match('^\s*101\s\+.*\swhile counter > 0$',           lines[16])
+  call assert_match('^\s*100\s\+.*\s  \(let\)\= counter = counter - 1$', lines[17])
+  call assert_match('^\s*10[01]\s\+.*\sendwhile$',                 lines[18])
   call assert_match('^\s*1\s\+.\+sleep 1m$',                       lines[19])
   call assert_equal('',                                            lines[20])
   call assert_equal('FUNCTIONS SORTED ON TOTAL TIME',              lines[21])
diff --git a/src/testdir/test_vim9_disassemble.vim b/src/testdir/test_vim9_disassemble.vim
index f40bcf0..9a3401f 100644
--- a/src/testdir/test_vim9_disassemble.vim
+++ b/src/testdir/test_vim9_disassemble.vim
@@ -1842,6 +1842,27 @@
         res)
 enddef
 
+def s:Profiled(): string
+  echo "profiled"
+  return "done"
+enddef
+
+def Test_profiled()
+  var res = execute('disass! s:Profiled')
+  assert_match('<SNR>\d*_Profiled\_s*' ..
+        'echo "profiled"\_s*' ..
+        '\d PROFILE START line 1\_s*' ..
+        '\d PUSHS "profiled"\_s*' ..
+        '\d ECHO 1\_s*' ..
+        '\d PROFILE END\_s*' ..
+        'return "done"\_s*' ..
+        '\d PROFILE START line 2\_s*' ..
+        '\d PUSHS "done"\_s*' ..
+        '\d RETURN\_s*' ..
+        '\d PROFILE END',
+        res)
+enddef
+
 def s:SilentReturn(): string
   silent return "done"
 enddef
diff --git a/src/userfunc.c b/src/userfunc.c
index 5372260..9216685 100644
--- a/src/userfunc.c
+++ b/src/userfunc.c
@@ -1601,12 +1601,14 @@
     char_u	numbuf[NUMBUFLEN];
     char_u	*name;
 #ifdef FEAT_PROFILE
-    proftime_T	wait_start;
-    proftime_T	call_start;
-    int		started_profiling = FALSE;
+    profinfo_T	profile_info;
 #endif
     ESTACK_CHECK_DECLARATION
 
+#ifdef FEAT_PROFILE
+    CLEAR_FIELD(profile_info);
+#endif
+
     // If depth of calling is getting too high, don't execute the function.
     if (funcdepth_increment() == FAIL)
     {
@@ -1635,8 +1637,16 @@
     if (fp->uf_def_status != UF_NOT_COMPILED)
     {
 	// Execute the function, possibly compiling it first.
+#ifdef FEAT_PROFILE
+	profile_may_start_func(&profile_info, fp, fc);
+#endif
 	call_def_function(fp, argcount, argvars, funcexe->partial, rettv);
 	funcdepth_decrement();
+#ifdef FEAT_PROFILE
+	if (do_profiling == PROF_YES && (fp->uf_profiling
+		    || (fc->caller != NULL && fc->caller->func->uf_profiling)))
+	    profile_may_end_func(&profile_info, fp, fc);
+#endif
 	current_funccal = fc->caller;
 	free_funccal(fc);
 	return;
@@ -1849,22 +1859,7 @@
 	--no_wait_return;
     }
 #ifdef FEAT_PROFILE
-    if (do_profiling == PROF_YES)
-    {
-	if (!fp->uf_profiling && has_profiling(FALSE, fp->uf_name, NULL))
-	{
-	    started_profiling = TRUE;
-	    func_do_profile(fp);
-	}
-	if (fp->uf_profiling
-		    || (fc->caller != NULL && fc->caller->func->uf_profiling))
-	{
-	    ++fp->uf_tm_count;
-	    profile_start(&call_start);
-	    profile_zero(&fp->uf_tm_children);
-	}
-	script_prof_save(&wait_start);
-    }
+    profile_may_start_func(&profile_info, fp, fc);
 #endif
 
     save_current_sctx = current_sctx;
@@ -1902,20 +1897,7 @@
 #ifdef FEAT_PROFILE
     if (do_profiling == PROF_YES && (fp->uf_profiling
 		    || (fc->caller != NULL && fc->caller->func->uf_profiling)))
-    {
-	profile_end(&call_start);
-	profile_sub_wait(&wait_start, &call_start);
-	profile_add(&fp->uf_tm_total, &call_start);
-	profile_self(&fp->uf_tm_self, &call_start, &fp->uf_tm_children);
-	if (fc->caller != NULL && fc->caller->func->uf_profiling)
-	{
-	    profile_add(&fc->caller->func->uf_tm_children, &call_start);
-	    profile_add(&fc->caller->func->uf_tml_children, &call_start);
-	}
-	if (started_profiling)
-	    // make a ":profdel func" stop profiling the function
-	    fp->uf_profiling = FALSE;
-    }
+	profile_may_end_func(&profile_info, fp, fc);
 #endif
 
     // when being verbose, mention the return value
@@ -1964,7 +1946,7 @@
     current_sctx = save_current_sctx;
 #ifdef FEAT_PROFILE
     if (do_profiling == PROF_YES)
-	script_prof_restore(&wait_start);
+	script_prof_restore(&profile_info.pi_wait_start);
 #endif
     if (using_sandbox)
 	--sandbox;
@@ -3982,7 +3964,7 @@
 
 /*
  * :defcompile - compile all :def functions in the current script that need to
- * be compiled.  Except dead functions.
+ * be compiled.  Except dead functions.  Doesn't do profiling.
  */
     void
 ex_defcompile(exarg_T *eap UNUSED)
@@ -4002,7 +3984,7 @@
 		    && ufunc->uf_def_status == UF_TO_BE_COMPILED
 		    && (ufunc->uf_flags & FC_DEAD) == 0)
 	    {
-		compile_def_function(ufunc, FALSE, NULL);
+		compile_def_function(ufunc, FALSE, FALSE, NULL);
 
 		if (func_hashtab.ht_changed != changed)
 		{
@@ -4698,7 +4680,7 @@
 	    SOURCING_LNUM = fcp->linenr;
 #ifdef FEAT_PROFILE
 	    if (do_profiling == PROF_YES)
-		func_line_start(cookie);
+		func_line_start(cookie, SOURCING_LNUM);
 #endif
 	}
     }
diff --git a/src/version.c b/src/version.c
index 55ce713..41858ba 100644
--- a/src/version.c
+++ b/src/version.c
@@ -751,6 +751,8 @@
 static int included_patches[] =
 {   /* Add new patch number below this line */
 /**/
+    2400,
+/**/
     2399,
 /**/
     2398,
diff --git a/src/vim9.h b/src/vim9.h
index 8d4faa3..9446c15 100644
--- a/src/vim9.h
+++ b/src/vim9.h
@@ -152,6 +152,9 @@
     ISN_CMDMOD,	    // set cmdmod
     ISN_CMDMOD_REV, // undo ISN_CMDMOD
 
+    ISN_PROF_START, // start a line for profiling
+    ISN_PROF_END,   // end a line for profiling
+
     ISN_UNPACK,	    // unpack list into items, uses isn_arg.unpack
     ISN_SHUFFLE,    // move item on stack up or down
     ISN_DROP	    // pop stack and discard value
@@ -366,8 +369,12 @@
 				    // was compiled.
 
     garray_T	df_def_args_isn;    // default argument instructions
+
+    // After compiling "df_instr" and/or "df_instr_prof" is not NULL.
     isn_T	*df_instr;	    // function body to be executed
-    int		df_instr_count;
+    int		df_instr_count;	    // size of "df_instr"
+    isn_T	*df_instr_prof;		// like "df_instr" with profiling
+    int		df_instr_prof_count;	// size of "df_instr_prof"
 
     int		df_varcount;	    // number of local variables
     int		df_has_closure;	    // one if a closure was created
diff --git a/src/vim9compile.c b/src/vim9compile.c
index ae5cb12..5a7da12 100644
--- a/src/vim9compile.c
+++ b/src/vim9compile.c
@@ -123,6 +123,8 @@
     char_u	*ctx_line_start;    // start of current line or NULL
     garray_T	ctx_instr;	    // generated instructions
 
+    int		ctx_profiling;	    // when TRUE generate ISN_PROF_START
+
     garray_T	ctx_locals;	    // currently visible local variables
     int		ctx_locals_count;   // total number of local variables
 
@@ -1693,6 +1695,29 @@
 }
 
 /*
+ * Return TRUE if "ufunc" should be compiled, taking into account whether
+ * "profile" indicates profiling is to be done.
+ */
+    int
+func_needs_compiling(ufunc_T *ufunc, int profile)
+{
+    switch (ufunc->uf_def_status)
+    {
+	case UF_NOT_COMPILED: return FALSE;
+	case UF_TO_BE_COMPILED: return TRUE;
+	case UF_COMPILED:
+	{
+	    dfunc_T *dfunc = ((dfunc_T *)def_functions.ga_data)
+							 + ufunc->uf_dfunc_idx;
+
+	    return profile ? dfunc->df_instr_prof == NULL
+			   : dfunc->df_instr == NULL;
+	}
+	case UF_COMPILING: return FALSE;
+    }
+}
+
+/*
  * Generate an ISN_DCALL or ISN_UCALL instruction.
  * Return FAIL if the number of arguments is wrong.
  */
@@ -1744,10 +1769,10 @@
 		return FAIL;
 	    }
 	}
-	if (ufunc->uf_def_status == UF_TO_BE_COMPILED)
-	    if (compile_def_function(ufunc, ufunc->uf_ret_type == NULL, NULL)
-								       == FAIL)
-		return FAIL;
+	if (func_needs_compiling(ufunc, cctx->ctx_profiling)
+		&& compile_def_function(ufunc, ufunc->uf_ret_type == NULL,
+					    cctx->ctx_profiling, NULL) == FAIL)
+	    return FAIL;
     }
 
     if ((isn = generate_instr(cctx,
@@ -2063,6 +2088,19 @@
     return OK;
 }
 
+    static void
+may_generate_prof_end(cctx_T *cctx, int prof_lnum)
+{
+    if (cctx->ctx_profiling && prof_lnum >= 0)
+    {
+	int save_lnum = cctx->ctx_lnum;
+
+	cctx->ctx_lnum = prof_lnum;
+	generate_instr(cctx, ISN_PROF_END);
+	cctx->ctx_lnum = save_lnum;
+    }
+}
+
 /*
  * Reserve space for a local variable.
  * Return the variable or NULL if it failed.
@@ -2575,9 +2613,10 @@
 	return FAIL;
 
     // Need to compile any default values to get the argument types.
-    if (ufunc->uf_def_status == UF_TO_BE_COMPILED)
-	if (compile_def_function(ufunc, TRUE, NULL) == FAIL)
-	    return FAIL;
+    if (func_needs_compiling(ufunc, cctx->ctx_profiling)
+	    && compile_def_function(ufunc, TRUE, cctx->ctx_profiling, NULL)
+								       == FAIL)
+	return FAIL;
     return generate_PUSHFUNC(cctx, ufunc->uf_name, ufunc->uf_func_type);
 }
 
@@ -3070,7 +3109,7 @@
     clear_tv(&rettv);
 
     // Compile the function into instructions.
-    compile_def_function(ufunc, TRUE, cctx);
+    compile_def_function(ufunc, TRUE, cctx->ctx_profiling, cctx);
 
     clear_evalarg(&evalarg, NULL);
 
@@ -5047,8 +5086,9 @@
 	r = eap->skip ? OK : FAIL;
 	goto theend;
     }
-    if (ufunc->uf_def_status == UF_TO_BE_COMPILED
-	    && compile_def_function(ufunc, TRUE, cctx) == FAIL)
+    if (func_needs_compiling(ufunc, cctx->ctx_profiling)
+	    && compile_def_function(ufunc, TRUE, cctx->ctx_profiling, cctx)
+								       == FAIL)
     {
 	func_ptr_unref(ufunc);
 	goto theend;
@@ -7101,7 +7141,11 @@
     if (scope == NULL)
 	return NULL;
 
+    // "endwhile" jumps back here, one before when profiling
     scope->se_u.se_while.ws_top_label = instr->ga_len;
+    if (cctx->ctx_profiling && ((isn_T *)instr->ga_data)[instr->ga_len - 1]
+						   .isn_type == ISN_PROF_START)
+	--scope->se_u.se_while.ws_top_label;
 
     // compile "expr"
     if (compile_expr0(&p, cctx) == FAIL)
@@ -7134,6 +7178,9 @@
     cctx->ctx_scope = scope->se_outer;
     unwind_locals(cctx, scope->se_local_count);
 
+    // count the endwhile before jumping
+    may_generate_prof_end(cctx, cctx->ctx_lnum);
+
     // At end of ":for" scope jump back to the FOR instruction.
     generate_JUMP(cctx, JUMP_ALWAYS, scope->se_u.se_while.ws_top_label);
 
@@ -7794,13 +7841,18 @@
  * When "check_return_type" is set then set ufunc->uf_ret_type to the type of
  * the return statement (used for lambda).  When uf_ret_type is already set
  * then check that it matches.
+ * When "profiling" is true add ISN_PROF_START instructions.
  * "outer_cctx" is set for a nested function.
  * This can be used recursively through compile_lambda(), which may reallocate
  * "def_functions".
  * Returns OK or FAIL.
  */
     int
-compile_def_function(ufunc_T *ufunc, int check_return_type, cctx_T *outer_cctx)
+compile_def_function(
+	ufunc_T	    *ufunc,
+	int	    check_return_type,
+	int	    profiling,
+	cctx_T	    *outer_cctx)
 {
     char_u	*line = NULL;
     char_u	*p;
@@ -7813,6 +7865,7 @@
     int		save_estack_compiling = estack_compiling;
     int		do_estack_push;
     int		new_def_function = FALSE;
+    int		prof_lnum = -1;
 
     // When using a function that was compiled before: Free old instructions.
     // The index is reused.  Otherwise add a new entry in "def_functions".
@@ -7832,6 +7885,8 @@
     ufunc->uf_def_status = UF_COMPILING;
 
     CLEAR_FIELD(cctx);
+
+    cctx.ctx_profiling = profiling;
     cctx.ctx_ufunc = ufunc;
     cctx.ctx_lnum = -1;
     cctx.ctx_outer = outer_cctx;
@@ -7932,22 +7987,35 @@
 	{
 	    line = next_line_from_context(&cctx, FALSE);
 	    if (cctx.ctx_lnum >= ufunc->uf_lines.ga_len)
+	    {
 		// beyond the last line
+		may_generate_prof_end(&cctx, prof_lnum);
 		break;
+	    }
 	}
 
 	CLEAR_FIELD(ea);
 	ea.cmdlinep = &line;
 	ea.cmd = skipwhite(line);
 
+	if (*ea.cmd == '#')
+	{
+	    // "#" starts a comment
+	    line = (char_u *)"";
+	    continue;
+	}
+
+	if (cctx.ctx_profiling && cctx.ctx_lnum != prof_lnum)
+	{
+	    may_generate_prof_end(&cctx, prof_lnum);
+
+	    prof_lnum = cctx.ctx_lnum;
+	    generate_instr(&cctx, ISN_PROF_START);
+	}
+
 	// Some things can be recognized by the first character.
 	switch (*ea.cmd)
 	{
-	    case '#':
-		// "#" starts a comment
-		line = (char_u *)"";
-		continue;
-
 	    case '}':
 		{
 		    // "}" ends a block scope
@@ -8308,8 +8376,16 @@
 							 + ufunc->uf_dfunc_idx;
 	dfunc->df_deleted = FALSE;
 	dfunc->df_script_seq = current_sctx.sc_seq;
-	dfunc->df_instr = instr->ga_data;
-	dfunc->df_instr_count = instr->ga_len;
+	if (cctx.ctx_profiling)
+	{
+	    dfunc->df_instr_prof = instr->ga_data;
+	    dfunc->df_instr_prof_count = instr->ga_len;
+	}
+	else
+	{
+	    dfunc->df_instr = instr->ga_data;
+	    dfunc->df_instr_count = instr->ga_len;
+	}
 	dfunc->df_varcount = cctx.ctx_locals_count;
 	dfunc->df_has_closure = cctx.ctx_has_closure;
 	if (cctx.ctx_outer_used)
@@ -8586,6 +8662,8 @@
 	case ISN_OPNR:
 	case ISN_PCALL:
 	case ISN_PCALL_END:
+	case ISN_PROF_END:
+	case ISN_PROF_START:
 	case ISN_PUSHBOOL:
 	case ISN_PUSHF:
 	case ISN_PUSHNR:
diff --git a/src/vim9execute.c b/src/vim9execute.c
index 232c84e..e494885 100644
--- a/src/vim9execute.c
+++ b/src/vim9execute.c
@@ -294,7 +294,7 @@
     // Set execution state to the start of the called function.
     ectx->ec_dfunc_idx = cdf_idx;
     ectx->ec_instr = dfunc->df_instr;
-    entry = estack_push_ufunc(dfunc->df_ufunc, 1);
+    entry = estack_push_ufunc(ufunc, 1);
     if (entry != NULL)
     {
 	// Set the script context to the script where the function was defined.
@@ -645,9 +645,10 @@
     int		error;
     int		idx;
     int		did_emsg_before = did_emsg;
+    int		profiling = do_profiling == PROF_YES && ufunc->uf_profiling;
 
-    if (ufunc->uf_def_status == UF_TO_BE_COMPILED
-	    && compile_def_function(ufunc, FALSE, NULL) == FAIL)
+    if (func_needs_compiling(ufunc, profiling)
+		&& compile_def_function(ufunc, FALSE, profiling, NULL) == FAIL)
 	return FAIL;
     if (ufunc->uf_def_status == UF_COMPILED)
     {
@@ -1130,6 +1131,7 @@
     int		save_did_emsg_def = did_emsg_def;
     int		trylevel_at_start = trylevel;
     int		orig_funcdepth;
+    int		profiling = do_profiling == PROF_YES && ufunc->uf_profiling;
 
 // Get pointer to item in the stack.
 #define STACK_TV(idx) (((typval_T *)ectx.ec_stack.ga_data) + idx)
@@ -1142,8 +1144,9 @@
 #define STACK_TV_VAR(idx) (((typval_T *)ectx.ec_stack.ga_data) + ectx.ec_frame_idx + STACK_FRAME_SIZE + idx)
 
     if (ufunc->uf_def_status == UF_NOT_COMPILED
-	    || (ufunc->uf_def_status == UF_TO_BE_COMPILED
-			  && compile_def_function(ufunc, FALSE, NULL) == FAIL))
+	    || (func_needs_compiling(ufunc, profiling)
+			 && compile_def_function(ufunc, FALSE, profiling, NULL)
+								      == FAIL))
     {
 	if (did_emsg_cumul + did_emsg == did_emsg_before)
 	    semsg(_(e_function_is_not_compiled_str),
@@ -1155,7 +1158,7 @@
 	// Check the function was really compiled.
 	dfunc_T	*dfunc = ((dfunc_T *)def_functions.ga_data)
 							 + ufunc->uf_dfunc_idx;
-	if (dfunc->df_instr == NULL)
+	if ((profiling ? dfunc->df_instr_prof : dfunc->df_instr) == NULL)
 	{
 	    iemsg("using call_def_function() on not compiled function");
 	    return FAIL;
@@ -1294,7 +1297,7 @@
 	    ++ectx.ec_stack.ga_len;
 	}
 
-	ectx.ec_instr = dfunc->df_instr;
+	ectx.ec_instr = profiling ? dfunc->df_instr_prof : dfunc->df_instr;
     }
 
     // Following errors are in the function, not the caller.
@@ -3495,6 +3498,26 @@
 		}
 		break;
 
+	    case ISN_PROF_START:
+	    case ISN_PROF_END:
+		{
+		    funccall_T cookie;
+		    ufunc_T	    *cur_ufunc =
+				    (((dfunc_T *)def_functions.ga_data)
+						 + ectx.ec_dfunc_idx)->df_ufunc;
+
+		    cookie.func = cur_ufunc;
+		    if (iptr->isn_type == ISN_PROF_START)
+		    {
+			func_line_start(&cookie, iptr->isn_lnum);
+			// if we get here the instruction is executed
+			func_line_exec(&cookie);
+		    }
+		    else
+			func_line_end(&cookie);
+		}
+		break;
+
 	    case ISN_SHUFFLE:
 		{
 		    typval_T	tmp_tv;
@@ -3642,6 +3665,7 @@
     ufunc_T	*ufunc;
     dfunc_T	*dfunc;
     isn_T	*instr;
+    int		instr_count;
     int		current;
     int		line_idx = 0;
     int		prev_current = 0;
@@ -3677,8 +3701,8 @@
 	semsg(_(e_cannot_find_function_str), eap->arg);
 	return;
     }
-    if (ufunc->uf_def_status == UF_TO_BE_COMPILED
-	    && compile_def_function(ufunc, FALSE, NULL) == FAIL)
+    if (func_needs_compiling(ufunc, eap->forceit)
+	    && compile_def_function(ufunc, FALSE, eap->forceit, NULL) == FAIL)
 	return;
     if (ufunc->uf_def_status != UF_COMPILED)
     {
@@ -3691,8 +3715,10 @@
 	msg((char *)ufunc->uf_name);
 
     dfunc = ((dfunc_T *)def_functions.ga_data) + ufunc->uf_dfunc_idx;
-    instr = dfunc->df_instr;
-    for (current = 0; current < dfunc->df_instr_count; ++current)
+    instr = eap->forceit ? dfunc->df_instr_prof : dfunc->df_instr;
+    instr_count = eap->forceit ? dfunc->df_instr_prof_count
+						       : dfunc->df_instr_count;
+    for (current = 0; current < instr_count; ++current)
     {
 	isn_T	    *iptr = &instr[current];
 	char	    *line;
@@ -4319,6 +4345,14 @@
 		}
 	    case ISN_CMDMOD_REV: smsg("%4d CMDMOD_REV", current); break;
 
+	    case ISN_PROF_START:
+		 smsg("%4d PROFILE START line %d", current, iptr->isn_lnum);
+		 break;
+
+	    case ISN_PROF_END:
+		smsg("%4d PROFILE END", current);
+		break;
+
 	    case ISN_UNPACK: smsg("%4d UNPACK %d%s", current,
 			iptr->isn_arg.unpack.unp_count,
 			iptr->isn_arg.unpack.unp_semicolon ? " semicolon" : "");
diff --git a/src/vim9type.c b/src/vim9type.c
index 81b0db2..1374cee 100644
--- a/src/vim9type.c
+++ b/src/vim9type.c
@@ -336,7 +336,8 @@
 	    // May need to get the argument types from default values by
 	    // compiling the function.
 	    if (ufunc->uf_def_status == UF_TO_BE_COMPILED
-			    && compile_def_function(ufunc, TRUE, NULL) == FAIL)
+			    && compile_def_function(ufunc, TRUE, FALSE, NULL)
+								       == FAIL)
 		return NULL;
 	    if (ufunc->uf_func_type == NULL)
 		set_function_type(ufunc);