Fix x86 __memset_chk.

The stack offsets assumed we'd pushed %ebx. Unlike x86-64 we can't fall
through, but we can avoid repeating work we've already done.

Change-Id: I7e5f9be50983ed6178dd0c0534042da7ea30ad0c
diff --git a/libc/arch-x86/atom/string/sse2-memset-atom.S b/libc/arch-x86/atom/string/sse2-memset-atom.S
index 30fb3f1..e03cd1a 100644
--- a/libc/arch-x86/atom/string/sse2-memset-atom.S
+++ b/libc/arch-x86/atom/string/sse2-memset-atom.S
@@ -113,11 +113,12 @@
 #endif
 
 ENTRY(__memset_chk)
-  movl LEN(%esp), %ecx
-  cmpl %ecx, CHK_DST_LEN(%esp)
-  jbe memset
+  ENTRANCE
 
-  jmp __memset_chk_fail
+  movl LEN(%esp), %ecx
+  cmpl CHK_DST_LEN(%esp), %ecx
+  ja __memset_chk_fail
+  jmp L(memset_length_loaded)
 END(__memset_chk)
 
 	.section .text.sse2,"ax",@progbits
@@ -126,6 +127,7 @@
 	ENTRANCE
 
 	movl	LEN(%esp), %ecx
+L(memset_length_loaded):
 	movzbl	CHR(%esp), %eax
 	movb	%al, %ah
 	/* Fill the whole EAX with pattern.  */