| Colin Cross | 9c5366b | 2010-04-13 19:48:59 -0700 | [diff] [blame] | 1 | /* | 
|  | 2 | * Copyright (C) 2010 The Android Open Source Project | 
|  | 3 | * | 
|  | 4 | * Licensed under the Apache License, Version 2.0 (the "License"); | 
|  | 5 | * you may not use this file except in compliance with the License. | 
|  | 6 | * You may obtain a copy of the License at | 
|  | 7 | * | 
|  | 8 | *      http://www.apache.org/licenses/LICENSE-2.0 | 
|  | 9 | * | 
|  | 10 | * Unless required by applicable law or agreed to in writing, software | 
|  | 11 | * distributed under the License is distributed on an "AS IS" BASIS, | 
|  | 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|  | 13 | * See the License for the specific language governing permissions and | 
|  | 14 | * limitations under the License. | 
|  | 15 | */ | 
|  | 16 |  | 
|  | 17 | #include <stdio.h> | 
|  | 18 | #include <errno.h> | 
|  | 19 | #include <signal.h> | 
|  | 20 | #include <unistd.h> | 
|  | 21 | #include <fcntl.h> | 
|  | 22 | #include <sys/types.h> | 
|  | 23 | #include <sys/socket.h> | 
|  | 24 | #include <sys/wait.h> | 
|  | 25 | #include <cutils/sockets.h> | 
|  | 26 | #include <sys/reboot.h> | 
|  | 27 |  | 
|  | 28 | #include "init.h" | 
| Colin Cross | ed8a7d8 | 2010-04-19 17:05:34 -0700 | [diff] [blame] | 29 | #include "list.h" | 
| Colin Cross | 3899e9f | 2010-04-13 20:35:46 -0700 | [diff] [blame] | 30 | #include "util.h" | 
| Colin Cross | ed8a7d8 | 2010-04-19 17:05:34 -0700 | [diff] [blame] | 31 | #include "log.h" | 
| Colin Cross | 9c5366b | 2010-04-13 19:48:59 -0700 | [diff] [blame] | 32 |  | 
|  | 33 | static int signal_fd = -1; | 
|  | 34 | static int signal_recv_fd = -1; | 
|  | 35 |  | 
|  | 36 | static void sigchld_handler(int s) | 
|  | 37 | { | 
|  | 38 | write(signal_fd, &s, 1); | 
|  | 39 | } | 
|  | 40 |  | 
| Colin Cross | 9c5366b | 2010-04-13 19:48:59 -0700 | [diff] [blame] | 41 | #define CRITICAL_CRASH_THRESHOLD    4       /* if we crash >4 times ... */ | 
|  | 42 | #define CRITICAL_CRASH_WINDOW       (4*60)  /* ... in 4 minutes, goto recovery*/ | 
|  | 43 |  | 
|  | 44 | static int wait_for_one_process(int block) | 
|  | 45 | { | 
|  | 46 | pid_t pid; | 
|  | 47 | int status; | 
|  | 48 | struct service *svc; | 
|  | 49 | struct socketinfo *si; | 
|  | 50 | time_t now; | 
|  | 51 | struct listnode *node; | 
|  | 52 | struct command *cmd; | 
|  | 53 |  | 
|  | 54 | while ( (pid = waitpid(-1, &status, block ? 0 : WNOHANG)) == -1 && errno == EINTR ); | 
|  | 55 | if (pid <= 0) return -1; | 
|  | 56 | INFO("waitpid returned pid %d, status = %08x\n", pid, status); | 
|  | 57 |  | 
|  | 58 | svc = service_find_by_pid(pid); | 
|  | 59 | if (!svc) { | 
|  | 60 | ERROR("untracked pid %d exited\n", pid); | 
|  | 61 | return 0; | 
|  | 62 | } | 
|  | 63 |  | 
|  | 64 | NOTICE("process '%s', pid %d exited\n", svc->name, pid); | 
|  | 65 |  | 
|  | 66 | if (!(svc->flags & SVC_ONESHOT)) { | 
|  | 67 | kill(-pid, SIGKILL); | 
|  | 68 | NOTICE("process '%s' killing any children in process group\n", svc->name); | 
|  | 69 | } | 
|  | 70 |  | 
|  | 71 | /* remove any sockets we may have created */ | 
|  | 72 | for (si = svc->sockets; si; si = si->next) { | 
|  | 73 | char tmp[128]; | 
|  | 74 | snprintf(tmp, sizeof(tmp), ANDROID_SOCKET_DIR"/%s", si->name); | 
|  | 75 | unlink(tmp); | 
|  | 76 | } | 
|  | 77 |  | 
|  | 78 | svc->pid = 0; | 
|  | 79 | svc->flags &= (~SVC_RUNNING); | 
|  | 80 |  | 
|  | 81 | /* oneshot processes go into the disabled state on exit */ | 
|  | 82 | if (svc->flags & SVC_ONESHOT) { | 
|  | 83 | svc->flags |= SVC_DISABLED; | 
|  | 84 | } | 
|  | 85 |  | 
|  | 86 | /* disabled processes do not get restarted automatically */ | 
|  | 87 | if (svc->flags & SVC_DISABLED) { | 
|  | 88 | notify_service_state(svc->name, "stopped"); | 
|  | 89 | return 0; | 
|  | 90 | } | 
|  | 91 |  | 
|  | 92 | now = gettime(); | 
|  | 93 | if (svc->flags & SVC_CRITICAL) { | 
|  | 94 | if (svc->time_crashed + CRITICAL_CRASH_WINDOW >= now) { | 
|  | 95 | if (++svc->nr_crashed > CRITICAL_CRASH_THRESHOLD) { | 
|  | 96 | ERROR("critical process '%s' exited %d times in %d minutes; " | 
|  | 97 | "rebooting into recovery mode\n", svc->name, | 
|  | 98 | CRITICAL_CRASH_THRESHOLD, CRITICAL_CRASH_WINDOW / 60); | 
|  | 99 | sync(); | 
|  | 100 | __reboot(LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, | 
|  | 101 | LINUX_REBOOT_CMD_RESTART2, "recovery"); | 
|  | 102 | return 0; | 
|  | 103 | } | 
|  | 104 | } else { | 
|  | 105 | svc->time_crashed = now; | 
|  | 106 | svc->nr_crashed = 1; | 
|  | 107 | } | 
|  | 108 | } | 
|  | 109 |  | 
|  | 110 | svc->flags |= SVC_RESTARTING; | 
|  | 111 |  | 
|  | 112 | /* Execute all onrestart commands for this service. */ | 
|  | 113 | list_for_each(node, &svc->onrestart.commands) { | 
|  | 114 | cmd = node_to_item(node, struct command, clist); | 
|  | 115 | cmd->func(cmd->nargs, cmd->args); | 
|  | 116 | } | 
|  | 117 | notify_service_state(svc->name, "restarting"); | 
|  | 118 | return 0; | 
|  | 119 | } | 
|  | 120 |  | 
|  | 121 | void handle_signal(void) | 
|  | 122 | { | 
|  | 123 | char tmp[32]; | 
|  | 124 |  | 
|  | 125 | /* we got a SIGCHLD - reap and restart as needed */ | 
|  | 126 | read(signal_recv_fd, tmp, sizeof(tmp)); | 
|  | 127 | while (!wait_for_one_process(0)) | 
|  | 128 | ; | 
|  | 129 | } | 
|  | 130 |  | 
| Colin Cross | 12541c6 | 2010-04-16 20:28:11 -0700 | [diff] [blame] | 131 | void signal_init(void) | 
|  | 132 | { | 
|  | 133 | int s[2]; | 
|  | 134 |  | 
|  | 135 | struct sigaction act; | 
|  | 136 |  | 
|  | 137 | act.sa_handler = sigchld_handler; | 
|  | 138 | act.sa_flags = SA_NOCLDSTOP; | 
|  | 139 | act.sa_mask = 0; | 
|  | 140 | act.sa_restorer = NULL; | 
|  | 141 | sigaction(SIGCHLD, &act, 0); | 
|  | 142 |  | 
|  | 143 | /* create a signalling mechanism for the sigchld handler */ | 
|  | 144 | if (socketpair(AF_UNIX, SOCK_STREAM, 0, s) == 0) { | 
|  | 145 | signal_fd = s[0]; | 
|  | 146 | signal_recv_fd = s[1]; | 
|  | 147 | fcntl(s[0], F_SETFD, FD_CLOEXEC); | 
|  | 148 | fcntl(s[0], F_SETFL, O_NONBLOCK); | 
|  | 149 | fcntl(s[1], F_SETFD, FD_CLOEXEC); | 
|  | 150 | fcntl(s[1], F_SETFL, O_NONBLOCK); | 
|  | 151 | } | 
|  | 152 |  | 
|  | 153 | handle_signal(); | 
|  | 154 | } | 
|  | 155 |  | 
| Colin Cross | 9c5366b | 2010-04-13 19:48:59 -0700 | [diff] [blame] | 156 | int get_signal_fd() | 
|  | 157 | { | 
|  | 158 | return signal_recv_fd; | 
|  | 159 | } |