Clean up forking and cloning.

The kernel now maintains the pthread_internal_t::tid field for us,
and __clone was only used in one place so let's inline it so we don't
have to leave such a dangerous function lying around. Also rename
files to match their content and remove some useless #includes.

Change-Id: I24299fb4a940e394de75f864ee36fdabbd9438f9
diff --git a/libc/arch-x86_64/bionic/clone.S b/libc/arch-x86_64/bionic/__bionic_clone.S
similarity index 91%
rename from libc/arch-x86_64/bionic/clone.S
rename to libc/arch-x86_64/bionic/__bionic_clone.S
index b37416b..309c365 100644
--- a/libc/arch-x86_64/bionic/clone.S
+++ b/libc/arch-x86_64/bionic/__bionic_clone.S
@@ -40,8 +40,13 @@
         movq    %rax, -8(%rsi)  # Write 'arg'.
 
         subq    $16, %rsi
+
+        # Translate to the kernel calling convention and swap the 'tls' and 'child_tid' arguments.
+        # They're flipped for x86-64 compared to all our other architectures and __bionic_clone.
         movq    %r8, %r10
         movq    %rcx, %r8
+
+        # Make the system call.
         movl    $__NR_clone, %eax
         syscall
         testl   %eax, %eax