- A+
1 /* 2 * This creates a new process as a copy of the old one, 3 * but does not actually start it yet. 4 * 5 * It copies the registers, and all the appropriate 6 * parts of the process environment (as per the clone 7 * flags). The actual kick-off is left to the caller. 8 */ 9 struct task_struct *copy_process(unsigned long clone_flags, 10 unsigned long stack_start, 11 struct pt_regs *regs, 12 unsigned long stack_size, 13 int __user *parent_tidptr, 14 int __user *child_tidptr) 15 { 16 int retval; 17 struct task_struct *p = NULL; 18 19 if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) 20 return ERR_PTR(-EINVAL); 21 22 /* 23 * Thread groups must share signals as well, and detached threads 24 * can only be started up within the thread group. 25 */ 26 if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND)) 27 return ERR_PTR(-EINVAL); 28 29 /* 30 * Shared signal handlers imply shared VM. By way of the above, 31 * thread groups also imply shared VM. Blocking this case allows 32 * for various simplifications in other code. 33 */ 34 if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM)) 35 return ERR_PTR(-EINVAL); 36 37 /* 38 * CLONE_DETACHED must match CLONE_THREAD: it's a historical 39 * thing. 40 */ 41 if (!(clone_flags & CLONE_DETACHED) != !(clone_flags & CLONE_THREAD)) { 42 /* Warn about the old no longer supported case so that we see it */ 43 if (clone_flags & CLONE_THREAD) { 44 static int count; 45 if (count < 5) { 46 count++; 47 printk(KERN_WARNING "%s trying to use CLONE_THREAD without CLONE_DETACHn", current->comm); 48 } 49 } 50 return ERR_PTR(-EINVAL); 51 } 52 53 retval = security_task_create(clone_flags); 54 if (retval) 55 goto fork_out; 56 57 retval = -ENOMEM; 58 p = dup_task_struct(current); 59 if (!p) 60 goto fork_out; 61 62 retval = -EAGAIN; 63 if (atomic_read(&p->user->processes) >= 64 p->rlim[RLIMIT_NPROC].rlim_cur) { 65 if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && 66 p->user != &root_user) 67 goto bad_fork_free; 68 } 69 70 atomic_inc(&p->user->__count); 71 atomic_inc(&p->user->processes); 72 73 /* 74 * If multiple threads are within copy_process(), then this check 75 * triggers too late. This doesn't hurt, the check is only there 76 * to stop root fork bombs. 77 */ 78 if (nr_threads >= max_threads) 79 goto bad_fork_cleanup_count; 80 81 if (!try_module_get(p->thread_info->exec_domain->module)) 82 goto bad_fork_cleanup_count; 83 84 if (p->binfmt && !try_module_get(p->binfmt->module)) 85 goto bad_fork_cleanup_put_domain; 86 87 #ifdef CONFIG_PREEMPT 88 /* 89 * schedule_tail drops this_rq()->lock so we compensate with a count 90 * of 1. Also, we want to start with kernel preemption disabled. 91 */ 92 p->thread_info->preempt_count = 1; 93 #endif 94 p->did_exec = 0; 95 p->state = TASK_UNINTERRUPTIBLE; 96 97 copy_flags(clone_flags, p); 98 if (clone_flags & CLONE_IDLETASK) 99 p->pid = 0; 100 else { 101 p->pid = alloc_pidmap(); 102 if (p->pid == -1) 103 goto bad_fork_cleanup; 104 } 105 retval = -EFAULT; 106 if (clone_flags & CLONE_PARENT_SETTID) 107 if (put_user(p->pid, parent_tidptr)) 108 goto bad_fork_cleanup; 109 110 p->proc_dentry = NULL; 111 112 INIT_LIST_HEAD(&p->run_list); 113 114 INIT_LIST_HEAD(&p->children); 115 INIT_LIST_HEAD(&p->sibling); 116 INIT_LIST_HEAD(&p->posix_timers); 117 init_waitqueue_head(&p->wait_chldexit); 118 p->vfork_done = NULL; 119 spin_lock_init(&p->alloc_lock); 120 spin_lock_init(&p->switch_lock); 121 spin_lock_init(&p->proc_lock); 122 123 clear_tsk_thread_flag(p, TIF_SIGPENDING); 124 init_sigpending(&p->pending); 125 126 p->it_real_value = p->it_virt_value = p->it_prof_value = 0; 127 p->it_real_incr = p->it_virt_incr = p->it_prof_incr = 0; 128 init_timer(&p->real_timer); 129 p->real_timer.data = (unsigned long) p; 130 131 p->leader = 0; /* session leadership doesn't inherit */ 132 p->tty_old_pgrp = 0; 133 p->utime = p->stime = 0; 134 p->cutime = p->cstime = 0; 135 p->array = NULL; 136 p->lock_depth = -1; /* -1 = no lock */ 137 p->start_time = get_jiffies_64(); 138 p->security = NULL; 139 p->io_context = NULL; 140 141 retval = -ENOMEM; 142 if ((retval = security_task_alloc(p))) 143 goto bad_fork_cleanup; 144 /* copy all the process information */ 145 if ((retval = copy_semundo(clone_flags, p))) 146 goto bad_fork_cleanup_security; 147 if ((retval = copy_files(clone_flags, p))) 148 goto bad_fork_cleanup_semundo; 149 if ((retval = copy_fs(clone_flags, p))) 150 goto bad_fork_cleanup_files; 151 if ((retval = copy_sighand(clone_flags, p))) 152 goto bad_fork_cleanup_fs; 153 if ((retval = copy_signal(clone_flags, p))) 154 goto bad_fork_cleanup_sighand; 155 if ((retval = copy_mm(clone_flags, p))) 156 goto bad_fork_cleanup_signal; 157 if ((retval = copy_namespace(clone_flags, p))) 158 goto bad_fork_cleanup_mm; 159 retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs); 160 if (retval) 161 goto bad_fork_cleanup_namespace; 162 163 p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL; 164 /* 165 * Clear TID on mm_release()? 166 */ 167 p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL; 168 169 /* 170 * Syscall tracing should be turned off in the child regardless 171 * of CLONE_PTRACE. 172 */ 173 clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE); 174 175 /* Our parent execution domain becomes current domain 176 These must match for thread signalling to apply */ 177 178 p->parent_exec_id = p->self_exec_id; 179 180 /* ok, now we should be set up.. */ 181 p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & CSIGNAL); 182 p->pdeath_signal = 0; 183 184 /* 185 * Share the timeslice between parent and child, thus the 186 * total amount of pending timeslices in the system doesn't change, 187 * resulting in more scheduling fairness. 188 */ 189 local_irq_disable(); 190 p->time_slice = (current->time_slice + 1) >> 1; 191 /* 192 * The remainder of the first timeslice might be recovered by 193 * the parent if the child exits early enough. 194 */ 195 p->first_time_slice = 1; 196 current->time_slice >>= 1; 197 p->timestamp = sched_clock(); 198 if (!current->time_slice) { 199 /* 200 * This case is rare, it happens when the parent has only 201 * a single jiffy left from its timeslice. Taking the 202 * runqueue lock is not a problem. 203 */ 204 current->time_slice = 1; 205 preempt_disable(); 206 scheduler_tick(0, 0); 207 local_irq_enable(); 208 preempt_enable(); 209 } else 210 local_irq_enable(); 211 /* 212 * Ok, add it to the run-queues and make it 213 * visible to the rest of the system. 214 * 215 * Let it rip! 216 */ 217 p->tgid = p->pid; 218 p->group_leader = p; 219 INIT_LIST_HEAD(&p->ptrace_children); 220 INIT_LIST_HEAD(&p->ptrace_list); 221 222 /* Need tasklist lock for parent etc handling! */ 223 write_lock_irq(&tasklist_lock); 224 /* 225 * Check for pending SIGKILL! The new thread should not be allowed 226 * to slip out of an OOM kill. (or normal SIGKILL.) 227 */ 228 if (sigismember(¤t->pending.signal, SIGKILL)) { 229 write_unlock_irq(&tasklist_lock); 230 retval = -EINTR; 231 goto bad_fork_cleanup_namespace; 232 } 233 234 /* CLONE_PARENT re-uses the old parent */ 235 if (clone_flags & CLONE_PARENT) 236 p->real_parent = current->real_parent; 237 else 238 p->real_parent = current; 239 p->parent = p->real_parent; 240 241 if (clone_flags & CLONE_THREAD) { 242 spin_lock(¤t->sighand->siglock); 243 /* 244 * Important: if an exit-all has been started then 245 * do not create this new thread - the whole thread 246 * group is supposed to exit anyway. 247 */ 248 if (current->signal->group_exit) { 249 spin_unlock(¤t->sighand->siglock); 250 write_unlock_irq(&tasklist_lock); 251 retval = -EAGAIN; 252 goto bad_fork_cleanup_namespace; 253 } 254 p->tgid = current->tgid; 255 p->group_leader = current->group_leader; 256 257 if (current->signal->group_stop_count > 0) { 258 /* 259 * There is an all-stop in progress for the group. 260 * We ourselves will stop as soon as we check signals. 261 * Make the new thread part of that group stop too. 262 */ 263 current->signal->group_stop_count++; 264 set_tsk_thread_flag(p, TIF_SIGPENDING); 265 } 266 267 spin_unlock(¤t->sighand->siglock); 268 } 269 270 SET_LINKS(p); 271 if (p->ptrace & PT_PTRACED) 272 __ptrace_link(p, current->parent); 273 274 attach_pid(p, PIDTYPE_PID, p->pid); 275 if (thread_group_leader(p)) { 276 attach_pid(p, PIDTYPE_TGID, p->tgid); 277 attach_pid(p, PIDTYPE_PGID, process_group(p)); 278 attach_pid(p, PIDTYPE_SID, p->session); 279 if (p->pid) 280 __get_cpu_var(process_counts)++; 281 } else 282 link_pid(p, p->pids + PIDTYPE_TGID, &p->group_leader->pids[PIDTYPE_TGID].pid); 283 284 nr_threads++; 285 write_unlock_irq(&tasklist_lock); 286 retval = 0; 287 288 fork_out: 289 if (retval) 290 return ERR_PTR(retval); 291 return p; 292 293 bad_fork_cleanup_namespace: 294 exit_namespace(p); 295 bad_fork_cleanup_mm: 296 exit_mm(p); 297 bad_fork_cleanup_signal: 298 exit_signal(p); 299 bad_fork_cleanup_sighand: 300 exit_sighand(p); 301 bad_fork_cleanup_fs: 302 exit_fs(p); /* blocking */ 303 bad_fork_cleanup_files: 304 exit_files(p); /* blocking */ 305 bad_fork_cleanup_semundo: 306 exit_sem(p); 307 bad_fork_cleanup_security: 308 security_task_free(p); 309 bad_fork_cleanup: 310 if (p->pid > 0) 311 free_pidmap(p->pid); 312 if (p->binfmt) 313 module_put(p->binfmt->module); 314 bad_fork_cleanup_put_domain: 315 module_put(p->thread_info->exec_domain->module); 316 bad_fork_cleanup_count: 317 atomic_dec(&p->user->processes); 318 free_uid(p->user); 319 bad_fork_free: 320 free_task(p); 321 goto fork_out; 322 }
17行:struct task_struct 结构体包含了进程相关的所有属性和信息(也叫进程控制块, Process Control Block, PCB)。包含:进程属性相关信息,进程间关系,进程调度信息,内存管理信息,文件管理信息,信号处理相关信息,资源限制相关信息。
19,26,34,41行:检查flags标记位, clone_flags 是在调用do_fork时的入参,不同的函数调用,参数不同。(通常对应的是不同的系统调用,fork,vfork)
53行:安全性检查,询问Linux Security Moudule(LSM)看当前任务是否可以创建一个新任务。
58行:为进程分配物理页面。其中调用 (alloc_task_struct,alloc_thread_info再调用__get_free_pages申请物理页面)
63,64行:检查进程资源限制。user指针指向user_struct 结构体,一共用户通常有多个进程,共享一个结构体。rlim指向资源限制结构体。
97行:复制flags, CLONE_IDLETASK代表0号进程。如果不是0号进程,申请pid。
101行:pid循环使用,使用pid位图来管理。默认pid最大值是32767,在64位系统中,用户可以通过写/proc/sys/kernel/pid_max文件,扩展到4194303。
142行:LSM Linux安全模块(后续学习)
145行:复制IPC信息。通过get_undo_list申请IPC结构体内存空间(是一个链表,并将链表放入undo_list中,将支持ipc的进程链接到一起。不支持ipc则设置为NULL.)
147行:复制已打开文件的控制结构,只有在CLONE_FILES标记位为0时才进行,否则共享父进程的结构。共享和复制的区别在于,如果是共享,子进程对文件操作会影响到父进程(比如lseek())。
149行:复制进程目录,权限等信息。(与copy_files() 类似)
151,153行:复制信号相关的数据。
155行:复制内存相关的数据。(内存相关的比较复杂,后续需要深究。)
157行:复制命名空间。(参考:https://cloud.tencent.com/developer/article/2129136)
159行:拷贝进程堆栈。
163,167行:set_child_tid 指向子进程的pid.当新进程执行时,将该进程pid。
178行:parent_exec_id 是父进程的执行域, self_exec_id 是本进程的执行域。
181,182行:exit_signal 是当前进程退出时向父进程发出的信号,pdeath_signal是父进程退出时,向子进程发出的信号。
190,196行:time_slice 是时间片。将当前进程的时间片分成两份,一份给当前进程,一份给子进程。
197行:获取进程时间戳。
217-220行:将进程链接到一起,加入到进程队列中。
235-239行:设置父进程,考虑到被调试的情况,需要parent 和 real_parent。
270行:将子进程的task_struct 链入到内核的进程队列中。
274-282行:处理进程关系(还没搞清楚)