Unified sysroot: kill arch-specific include dirs.

<machine/asm.h> was internal use only.

<machine/fenv.h> is quite large, but can live in <bits/...>.

<machine/regdef.h> is trivially replaced by saying $x instead of x in
our assembler.

<machine/setjmp.h> is trivially inlined into <setjmp.h>.

<sgidefs.h> is unused.

Bug: N/A
Test: builds
Change-Id: Id05dbab43a2f9537486efb8f27a5ef167b055815
diff --git a/libc/arch-mips/string/memset.S b/libc/arch-mips/string/memset.S
index 7ea6753..85ba2e9 100644
--- a/libc/arch-mips/string/memset.S
+++ b/libc/arch-mips/string/memset.S
@@ -209,12 +209,12 @@
 LEAF(__memset_chk)
 #endif
 	.set	noreorder
-        sltu    t2, a3, a2
-        beq     t2, zero, memset
+        sltu    $t2, $a3, $a2
+        beq     $t2, $zero, memset
         nop
-        .cpsetup t9, t8, __memset_chk
-        LA      t9, __memset_chk_fail
-        jr      t9
+        .cpsetup $t9, $t8, __memset_chk
+        LA      $t9, __memset_chk_fail
+        jr      $t9
         nop
         .set	reorder
 END(__memset_chk)
@@ -229,41 +229,41 @@
 	.set	noreorder
 /* If the size is less than 2*NSIZE (8 or 16), go to L(lastb).  Regardless of
    size, copy dst pointer to v0 for the return value.  */
-	slti	t2,a2,(2 * NSIZE)
-	bne	t2,zero,L(lastb)
-	move	v0,a0
+	slti	$t2,$a2,(2 * NSIZE)
+	bne	$t2,$zero,L(lastb)
+	move	$v0,$a0
 
 /* If memset value is not zero, we copy it to all the bytes in a 32 or 64
    bit word.  */
-	beq	a1,zero,L(set0)		/* If memset value is zero no smear  */
-	PTR_SUBU a3,zero,a0
+	beq	$a1,$zero,L(set0)		/* If memset value is zero no smear  */
+	PTR_SUBU $a3,$zero,$a0
 	nop
 
 	/* smear byte into 32 or 64 bit word */
 #if ((__mips == 64) || (__mips == 32)) && (__mips_isa_rev >= 2)
 # ifdef USE_DOUBLE
-	dins	a1, a1, 8, 8        /* Replicate fill byte into half-word.  */
-	dins	a1, a1, 16, 16      /* Replicate fill byte into word.       */
-	dins	a1, a1, 32, 32      /* Replicate fill byte into dbl word.   */
+	dins	$a1, $a1, 8, 8        /* Replicate fill byte into half-word.  */
+	dins	$a1, $a1, 16, 16      /* Replicate fill byte into word.       */
+	dins	$a1, $a1, 32, 32      /* Replicate fill byte into dbl word.   */
 # else
-	ins	a1, a1, 8, 8        /* Replicate fill byte into half-word.  */
-	ins	a1, a1, 16, 16      /* Replicate fill byte into word.       */
+	ins	$a1, $a1, 8, 8        /* Replicate fill byte into half-word.  */
+	ins	$a1, $a1, 16, 16      /* Replicate fill byte into word.       */
 # endif
 #else
 # ifdef USE_DOUBLE
-        and     a1,0xff
-	dsll	t2,a1,8
-	or	a1,t2
-	dsll	t2,a1,16
-	or	a1,t2
-	dsll	t2,a1,32
-	or	a1,t2
+        and     $a1,0xff
+	dsll	$t2,$a1,8
+	or	$a1,$t2
+	dsll	$t2,$a1,16
+	or	$a1,$t2
+	dsll	$t2,$a1,32
+	or	$a1,$t2
 # else
-        and     a1,0xff
-	sll	t2,a1,8
-	or	a1,t2
-	sll	t2,a1,16
-	or	a1,t2
+        and     $a1,0xff
+	sll	$t2,$a1,8
+	or	$a1,$t2
+	sll	$t2,$a1,16
+	or	$a1,$t2
 # endif
 #endif
 
@@ -271,16 +271,16 @@
    aligned.  If it is already aligned just jump to L(aligned).  */
 L(set0):
 #ifndef R6_CODE
-	andi	t2,a3,(NSIZE-1)		/* word-unaligned address?          */
-	beq	t2,zero,L(aligned)	/* t2 is the unalignment count      */
-	PTR_SUBU a2,a2,t2
-	C_STHI	a1,0(a0)
-	PTR_ADDU a0,a0,t2
+	andi	$t2,$a3,(NSIZE-1)		/* word-unaligned address?          */
+	beq	$t2,$zero,L(aligned)	/* t2 is the unalignment count      */
+	PTR_SUBU $a2,$a2,$t2
+	C_STHI	$a1,0($a0)
+	PTR_ADDU $a0,$a0,$t2
 #else /* R6_CODE */
-	andi	t2,a0,(NSIZE-1)
-	lapc	t9,L(atable)
-	PTR_LSA	t9,t2,t9,2
-	jrc	t9
+	andi	$t2,$a0,(NSIZE-1)
+	lapc	$t9,L(atable)
+	PTR_LSA	$t9,$t2,$t9,2
+	jrc	$t9
 L(atable):
 	bc	L(aligned)
 # ifdef USE_DOUBLE
@@ -293,24 +293,24 @@
 	bc	L(lb2)
 	bc	L(lb1)
 L(lb7):
-	sb	a1,6(a0)
+	sb	$a1,6($a0)
 L(lb6):
-	sb	a1,5(a0)
+	sb	$a1,5($a0)
 L(lb5):
-	sb	a1,4(a0)
+	sb	$a1,4($a0)
 L(lb4):
-	sb	a1,3(a0)
+	sb	$a1,3($a0)
 L(lb3):
-	sb	a1,2(a0)
+	sb	$a1,2($a0)
 L(lb2):
-	sb	a1,1(a0)
+	sb	$a1,1($a0)
 L(lb1):
-	sb	a1,0(a0)
+	sb	$a1,0($a0)
 
-	li	t9,NSIZE
-	subu	t2,t9,t2
-	PTR_SUBU a2,a2,t2
-	PTR_ADDU a0,a0,t2
+	li	$t9,NSIZE
+	subu	$t2,$t9,$t2
+	PTR_SUBU $a2,$a2,$t2
+	PTR_ADDU $a0,$a0,$t2
 #endif /* R6_CODE */
 
 L(aligned):
@@ -320,11 +320,11 @@
    byte stores into one 8 byte store).  We know there are at least 4 bytes
    left to store or we would have jumped to L(lastb) earlier in the code.  */
 #ifdef DOUBLE_ALIGN
-	andi	t2,a3,4
-	beq	t2,zero,L(double_aligned)
-	PTR_SUBU a2,a2,t2
-	sw	a1,0(a0)
-	PTR_ADDU a0,a0,t2
+	andi	$t2,$a3,4
+	beq	$t2,$zero,L(double_aligned)
+	PTR_SUBU $a2,$a2,$t2
+	sw	$a1,0($a0)
+	PTR_ADDU $a0,$a0,$t2
 L(double_aligned):
 #endif
 
@@ -333,10 +333,10 @@
    chunks are copied and a3 to the dest pointer after all the 64/128 byte
    chunks have been copied.  We will loop, incrementing a0 until it equals
    a3.  */
-	andi	t8,a2,NSIZEDMASK /* any whole 64-byte/128-byte chunks? */
-	beq	a2,t8,L(chkw)	 /* if a2==t8, no 64-byte/128-byte chunks */
-	PTR_SUBU a3,a2,t8	 /* subtract from a2 the reminder */
-	PTR_ADDU a3,a0,a3	 /* Now a3 is the final dst after loop */
+	andi	$t8,$a2,NSIZEDMASK /* any whole 64-byte/128-byte chunks? */
+	beq	$a2,$t8,L(chkw)	 /* if a2==t8, no 64-byte/128-byte chunks */
+	PTR_SUBU $a3,$a2,$t8	 /* subtract from a2 the reminder */
+	PTR_ADDU $a3,$a0,$a3	 /* Now a3 is the final dst after loop */
 
 /* When in the loop we may prefetch with the 'prepare to store' hint,
    in this case the a0+x should not be past the "t0-32" address.  This
@@ -345,68 +345,68 @@
    will use "prefetch hint,128(a0)", so "t0-160" is the limit.  */
 #if defined(USE_PREFETCH) \
     && (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
-	PTR_ADDU t0,a0,a2		/* t0 is the "past the end" address */
-	PTR_SUBU t9,t0,PREFETCH_LIMIT	/* t9 is the "last safe pref" address */
+	PTR_ADDU $t0,$a0,$a2		/* t0 is the "past the end" address */
+	PTR_SUBU $t9,$t0,PREFETCH_LIMIT	/* t9 is the "last safe pref" address */
 #endif
 #if defined(USE_PREFETCH) \
     && (PREFETCH_STORE_HINT != PREFETCH_HINT_PREPAREFORSTORE)
-	PREFETCH_FOR_STORE (1, a0)
-	PREFETCH_FOR_STORE (2, a0)
-	PREFETCH_FOR_STORE (3, a0)
+	PREFETCH_FOR_STORE (1, $a0)
+	PREFETCH_FOR_STORE (2, $a0)
+	PREFETCH_FOR_STORE (3, $a0)
 #endif
 
 L(loop16w):
 #if defined(USE_PREFETCH) \
     && (PREFETCH_STORE_HINT == PREFETCH_HINT_PREPAREFORSTORE)
-	sltu	v1,t9,a0		/* If a0 > t9 don't use next prefetch */
-	bgtz	v1,L(skip_pref)
+	sltu	$v1,$t9,$a0		/* If a0 > t9 don't use next prefetch */
+	bgtz	$v1,L(skip_pref)
 	nop
 #endif
 #ifndef R6_CODE
-	PREFETCH_FOR_STORE (4, a0)
-	PREFETCH_FOR_STORE (5, a0)
+	PREFETCH_FOR_STORE (4, $a0)
+	PREFETCH_FOR_STORE (5, $a0)
 #else
-	PREFETCH_FOR_STORE (2, a0)
+	PREFETCH_FOR_STORE (2, $a0)
 #endif
 L(skip_pref):
-	C_ST	a1,UNIT(0)(a0)
-	C_ST	a1,UNIT(1)(a0)
-	C_ST	a1,UNIT(2)(a0)
-	C_ST	a1,UNIT(3)(a0)
-	C_ST	a1,UNIT(4)(a0)
-	C_ST	a1,UNIT(5)(a0)
-	C_ST	a1,UNIT(6)(a0)
-	C_ST	a1,UNIT(7)(a0)
-	C_ST	a1,UNIT(8)(a0)
-	C_ST	a1,UNIT(9)(a0)
-	C_ST	a1,UNIT(10)(a0)
-	C_ST	a1,UNIT(11)(a0)
-	C_ST	a1,UNIT(12)(a0)
-	C_ST	a1,UNIT(13)(a0)
-	C_ST	a1,UNIT(14)(a0)
-	C_ST	a1,UNIT(15)(a0)
-	PTR_ADDIU a0,a0,UNIT(16)	/* adding 64/128 to dest */
-	bne	a0,a3,L(loop16w)
+	C_ST	$a1,UNIT(0)($a0)
+	C_ST	$a1,UNIT(1)($a0)
+	C_ST	$a1,UNIT(2)($a0)
+	C_ST	$a1,UNIT(3)($a0)
+	C_ST	$a1,UNIT(4)($a0)
+	C_ST	$a1,UNIT(5)($a0)
+	C_ST	$a1,UNIT(6)($a0)
+	C_ST	$a1,UNIT(7)($a0)
+	C_ST	$a1,UNIT(8)($a0)
+	C_ST	$a1,UNIT(9)($a0)
+	C_ST	$a1,UNIT(10)($a0)
+	C_ST	$a1,UNIT(11)($a0)
+	C_ST	$a1,UNIT(12)($a0)
+	C_ST	$a1,UNIT(13)($a0)
+	C_ST	$a1,UNIT(14)($a0)
+	C_ST	$a1,UNIT(15)($a0)
+	PTR_ADDIU $a0,$a0,UNIT(16)	/* adding 64/128 to dest */
+	bne	$a0,$a3,L(loop16w)
 	nop
-	move	a2,t8
+	move	$a2,$t8
 
 /* Here we have dest word-aligned but less than 64-bytes or 128 bytes to go.
    Check for a 32(64) byte chunk and copy if if there is one.  Otherwise
    jump down to L(chk1w) to handle the tail end of the copy.  */
 L(chkw):
-	andi	t8,a2,NSIZEMASK	/* is there a 32-byte/64-byte chunk.  */
+	andi	$t8,$a2,NSIZEMASK	/* is there a 32-byte/64-byte chunk.  */
 				/* the t8 is the reminder count past 32-bytes */
-	beq	a2,t8,L(chk1w)/* when a2==t8, no 32-byte chunk */
+	beq	$a2,$t8,L(chk1w)/* when a2==t8, no 32-byte chunk */
 	nop
-	C_ST	a1,UNIT(0)(a0)
-	C_ST	a1,UNIT(1)(a0)
-	C_ST	a1,UNIT(2)(a0)
-	C_ST	a1,UNIT(3)(a0)
-	C_ST	a1,UNIT(4)(a0)
-	C_ST	a1,UNIT(5)(a0)
-	C_ST	a1,UNIT(6)(a0)
-	C_ST	a1,UNIT(7)(a0)
-	PTR_ADDIU a0,a0,UNIT(8)
+	C_ST	$a1,UNIT(0)($a0)
+	C_ST	$a1,UNIT(1)($a0)
+	C_ST	$a1,UNIT(2)($a0)
+	C_ST	$a1,UNIT(3)($a0)
+	C_ST	$a1,UNIT(4)($a0)
+	C_ST	$a1,UNIT(5)($a0)
+	C_ST	$a1,UNIT(6)($a0)
+	C_ST	$a1,UNIT(7)($a0)
+	PTR_ADDIU $a0,$a0,UNIT(8)
 
 /* Here we have less than 32(64) bytes to set.  Set up for a loop to
    copy one word (or double word) at a time.  Set a2 to count how many
@@ -414,27 +414,27 @@
    copied and a3 to the dest pointer after all the (d)word chunks have
    been copied.  We will loop, incrementing a0 until a0 equals a3.  */
 L(chk1w):
-	andi	a2,t8,(NSIZE-1)	/* a2 is the reminder past one (d)word chunks */
-	beq	a2,t8,L(lastb)
-	PTR_SUBU a3,t8,a2	/* a3 is count of bytes in one (d)word chunks */
-	PTR_ADDU a3,a0,a3	/* a3 is the dst address after loop */
+	andi	$a2,$t8,(NSIZE-1)	/* a2 is the reminder past one (d)word chunks */
+	beq	$a2,$t8,L(lastb)
+	PTR_SUBU $a3,$t8,$a2	/* a3 is count of bytes in one (d)word chunks */
+	PTR_ADDU $a3,$a0,$a3	/* a3 is the dst address after loop */
 
 /* copying in words (4-byte or 8 byte chunks) */
 L(wordCopy_loop):
-	PTR_ADDIU a0,a0,UNIT(1)
-	bne	a0,a3,L(wordCopy_loop)
-	C_ST	a1,UNIT(-1)(a0)
+	PTR_ADDIU $a0,$a0,UNIT(1)
+	bne	$a0,$a3,L(wordCopy_loop)
+	C_ST	$a1,UNIT(-1)($a0)
 
 /* Copy the last 8 (or 16) bytes */
 L(lastb):
-	blez	a2,L(leave)
-	PTR_ADDU a3,a0,a2       /* a3 is the last dst address */
+	blez	$a2,L(leave)
+	PTR_ADDU $a3,$a0,$a2       /* a3 is the last dst address */
 L(lastbloop):
-	PTR_ADDIU a0,a0,1
-	bne	a0,a3,L(lastbloop)
-	sb	a1,-1(a0)
+	PTR_ADDIU $a0,$a0,1
+	bne	$a0,$a3,L(lastbloop)
+	sb	$a1,-1($a0)
 L(leave):
-	j	ra
+	j	$ra
 	nop
 
 	.set	at
diff --git a/libc/arch-mips/string/strcmp.S b/libc/arch-mips/string/strcmp.S
index e1faf2d..4791a0d 100644
--- a/libc/arch-mips/string/strcmp.S
+++ b/libc/arch-mips/string/strcmp.S
@@ -100,18 +100,18 @@
    instructions so that the nop is not needed but testing showed that this
    code is actually faster (based on glibc strcmp test).  */
 #define BYTECMP01(OFFSET) \
-    lbu v0, OFFSET(a0); \
-    lbu v1, OFFSET(a1); \
-    beq v0, zero, L(bexit01); \
+    lbu $v0, OFFSET($a0); \
+    lbu $v1, OFFSET($a1); \
+    beq $v0, $zero, L(bexit01); \
     nop; \
-    bne v0, v1, L(bexit01)
+    bne $v0, $v1, L(bexit01)
 
 #define BYTECMP89(OFFSET) \
-    lbu t8, OFFSET(a0); \
-    lbu t9, OFFSET(a1); \
-    beq t8, zero, L(bexit89); \
+    lbu $t8, OFFSET($a0); \
+    lbu $t9, OFFSET($a1); \
+    beq $t8, $zero, L(bexit89); \
     nop;    \
-    bne t8, t9, L(bexit89)
+    bne $t8, $t9, L(bexit89)
 
 /* Allow the routine to be named something else if desired.  */
 #ifndef STRCMP_NAME
@@ -126,46 +126,46 @@
     .set    nomips16
     .set    noreorder
 
-    andi t1, a1, (NSIZE - 1)
-    beqz t1, L(exitalign)
-    or   t0, zero, NSIZE
-    SUBU t1, t0, t1 #process (NSIZE - 1) bytes at max
+    andi $t1, $a1, (NSIZE - 1)
+    beqz $t1, L(exitalign)
+    or   $t0, $zero, NSIZE
+    SUBU $t1, $t0, $t1 #process (NSIZE - 1) bytes at max
 
 L(alignloop): #do by bytes until a1 aligned
     BYTECMP01(0)
-    SUBU t1, t1, 0x1
-    PTR_ADDIU a0, a0, 0x1
-    bnez  t1, L(alignloop)
-    PTR_ADDIU a1, a1, 0x1
+    SUBU $t1, $t1, 0x1
+    PTR_ADDIU $a0, $a0, 0x1
+    bnez  $t1, L(alignloop)
+    PTR_ADDIU $a1, $a1, 0x1
 
 L(exitalign):
 
 /* string a1 is NSIZE byte aligned at this point. */
 
-    lui t8, 0x0101
-    ori t8, 0x0101
-    lui t9, 0x7f7f
-    ori t9, 0x7f7f
+    lui $t8, 0x0101
+    ori $t8, 0x0101
+    lui $t9, 0x7f7f
+    ori $t9, 0x7f7f
 #if __mips64
-    dsll t1, t8, 32
-    or  t8, t1
-    dsll t1, t9, 32
-    or  t9, t1
+    dsll $t1, $t8, 32
+    or  $t8, $t1
+    dsll $t1, $t9, 32
+    or  $t9, $t1
 #endif
 
-    andi t2, a0, (NSIZE - 1) #check if a0 aligned
-    SUBU t3, t0, t2 #t3 will be used as shifter
-    bnez t2, L(uloopenter)
-    SUBU a2, a0, t2 #bring back a0 to aligned position
+    andi $t2, $a0, (NSIZE - 1) #check if a0 aligned
+    SUBU $t3, $t0, $t2 #t3 will be used as shifter
+    bnez $t2, L(uloopenter)
+    SUBU $a2, $a0, $t2 #bring back a0 to aligned position
 
 #define STRCMPW(OFFSET) \
-    LW   v0, OFFSET(a0); \
-    LW   v1, OFFSET(a1); \
-    SUBU t0, v0, t8; \
-    bne  v0, v1, L(worddiff); \
-    nor  t1, v0, t9; \
-    and  t0, t0, t1; \
-    bne  t0, zero, L(returnzero);\
+    LW   $v0, OFFSET($a0); \
+    LW   $v1, OFFSET($a1); \
+    SUBU $t0, $v0, $t8; \
+    bne  $v0, $v1, L(worddiff); \
+    nor  $t1, $v0, $t9; \
+    and  $t0, $t0, $t1; \
+    bne  $t0, $zero, L(returnzero);\
 
 L(wordloop):
     STRCMPW(0 * NSIZE)
@@ -183,34 +183,34 @@
     STRCMPW(6 * NSIZE)
     DELAY_READ
     STRCMPW(7 * NSIZE)
-    PTR_ADDIU a0, a0, (8 * NSIZE)
+    PTR_ADDIU $a0, $a0, (8 * NSIZE)
     b   L(wordloop)
-    PTR_ADDIU a1, a1, (8 * NSIZE)
+    PTR_ADDIU $a1, $a1, (8 * NSIZE)
 
 #define USTRCMPW(OFFSET) \
-    LW  v1, OFFSET(a1); \
-    SUBU    t0, v0, t8; \
-    nor t1, v0, t9; \
-    and t0, t0, t1; \
-    bne t0, zero, L(worddiff); \
-    SRL v0, t2; \
-    LW  a3, (OFFSET + NSIZE)(a2); \
-    SUBU    t0, v1, t8; \
-    SLL t1, a3, t3; \
-    or v0, v0, t1; \
-    bne v0, v1, L(worddiff); \
-    nor t1, v1, t9; \
-    and t0, t0, t1; \
-    bne t0, zero, L(returnzero); \
-    move v0, a3;\
+    LW  $v1, OFFSET($a1); \
+    SUBU    $t0, $v0, $t8; \
+    nor $t1, $v0, $t9; \
+    and $t0, $t0, $t1; \
+    bne $t0, $zero, L(worddiff); \
+    SRL $v0, $t2; \
+    LW  $a3, (OFFSET + NSIZE)($a2); \
+    SUBU    $t0, $v1, $t8; \
+    SLL $t1, $a3, $t3; \
+    or $v0, $v0, $t1; \
+    bne $v0, $v1, L(worddiff); \
+    nor $t1, $v1, $t9; \
+    and $t0, $t0, $t1; \
+    bne $t0, $zero, L(returnzero); \
+    move $v0, $a3;\
 
 L(uloopenter):
-    LW  v0, 0(a2)
-    SLL t2, 3  #multiply by 8
-    SLL t3, 3  #multiply by 8
-    li  a3, -1 #all 1s
-    SRL a3, t3
-    or v0, a3 #replace with all 1s if zeros in unintented read
+    LW  $v0, 0($a2)
+    SLL $t2, 3  #multiply by 8
+    SLL $t3, 3  #multiply by 8
+    li  $a3, -1 #all 1s
+    SRL $a3, $t3
+    or $v0, $a3 #replace with all 1s if zeros in unintented read
 
 L(uwordloop):
     USTRCMPW(0 * NSIZE)
@@ -221,114 +221,114 @@
     USTRCMPW(5 * NSIZE)
     USTRCMPW(6 * NSIZE)
     USTRCMPW(7 * NSIZE)
-    PTR_ADDIU a2, a2, (8 * NSIZE)
+    PTR_ADDIU $a2, $a2, (8 * NSIZE)
     b   L(uwordloop)
-    PTR_ADDIU a1, a1, (8 * NSIZE)
+    PTR_ADDIU $a1, $a1, (8 * NSIZE)
 
 L(returnzero):
-    j   ra
-    move    v0, zero
+    j   $ra
+    move    $v0, $zero
 
 #if __mips_isa_rev > 1
 #define EXT_COMPARE01(POS) \
-    EXT t0, v0, POS, 8; \
-    beq t0, zero, L(wexit01); \
-    EXT t1, v1, POS, 8; \
-    bne t0, t1, L(wexit01)
+    EXT $t0, $v0, POS, 8; \
+    beq $t0, $zero, L(wexit01); \
+    EXT $t1, $v1, POS, 8; \
+    bne $t0, $t1, L(wexit01)
 #define EXT_COMPARE89(POS) \
-    EXT t8, v0, POS, 8; \
-    beq t8, zero, L(wexit89); \
-    EXT t9, v1, POS, 8; \
-    bne t8, t9, L(wexit89)
+    EXT $t8, $v0, POS, 8; \
+    beq $t8, $zero, L(wexit89); \
+    EXT $t9, $v1, POS, 8; \
+    bne $t8, $t9, L(wexit89)
 #else
 #define EXT_COMPARE01(POS) \
-    SRL  t0, v0, POS; \
-    SRL  t1, v1, POS; \
-    andi t0, t0, 0xff; \
-    beq  t0, zero, L(wexit01); \
-    andi t1, t1, 0xff; \
-    bne  t0, t1, L(wexit01)
+    SRL  $t0, $v0, POS; \
+    SRL  $t1, $v1, POS; \
+    andi $t0, $t0, 0xff; \
+    beq  $t0, $zero, L(wexit01); \
+    andi $t1, $t1, 0xff; \
+    bne  $t0, $t1, L(wexit01)
 #define EXT_COMPARE89(POS) \
-    SRL  t8, v0, POS; \
-    SRL  t9, v1, POS; \
-    andi t8, t8, 0xff; \
-    beq  t8, zero, L(wexit89); \
-    andi t9, t9, 0xff; \
-    bne  t8, t9, L(wexit89)
+    SRL  $t8, $v0, POS; \
+    SRL  $t9, $v1, POS; \
+    andi $t8, $t8, 0xff; \
+    beq  $t8, $zero, L(wexit89); \
+    andi $t9, $t9, 0xff; \
+    bne  $t8, $t9, L(wexit89)
 #endif
 
 L(worddiff):
 #ifdef USE_CLZ
-    SUBU    t0, v0, t8
-    nor t1, v0, t9
-    and t1, t0, t1
-    xor t0, v0, v1
-    or  t0, t0, t1
+    SUBU    $t0, $v0, $t8
+    nor $t1, $v0, $t9
+    and $t1, $t0, $t1
+    xor $t0, $v0, $v1
+    or  $t0, $t0, $t1
 # if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-    wsbh    t0, t0
-    rotr    t0, t0, 16
+    wsbh    $t0, $t0
+    rotr    $t0, $t0, 16
 # endif
-    clz t1, t0
-    and t1, 0xf8
+    clz $t1, $t0
+    and $t1, 0xf8
 # if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
-    neg t1
-    addu    t1, 24
+    neg $t1
+    addu    $t1, 24
 # endif
-    rotrv   v0, v0, t1
-    rotrv   v1, v1, t1
-    and v0, v0, 0xff
-    and v1, v1, 0xff
-    j   ra
-    SUBU    v0, v0, v1
+    rotrv   $v0, $v0, $t1
+    rotrv   $v1, $v1, $t1
+    and $v0, $v0, 0xff
+    and $v1, $v1, 0xff
+    j   $ra
+    SUBU    $v0, $v0, $v1
 #else /* USE_CLZ */
 # if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-    andi    t0, v0, 0xff
-    beq t0, zero, L(wexit01)
-    andi    t1, v1, 0xff
-    bne t0, t1, L(wexit01)
+    andi    $t0, $v0, 0xff
+    beq $t0, $zero, L(wexit01)
+    andi    $t1, $v1, 0xff
+    bne $t0, $t1, L(wexit01)
     EXT_COMPARE89(8)
     EXT_COMPARE01(16)
 #ifndef __mips64
-    SRL t8, v0, 24
-    SRL t9, v1, 24
+    SRL $t8, $v0, 24
+    SRL $t9, $v1, 24
 #else
     EXT_COMPARE89(24)
     EXT_COMPARE01(32)
     EXT_COMPARE89(40)
     EXT_COMPARE01(48)
-    SRL t8, v0, 56
-    SRL t9, v1, 56
+    SRL $t8, $v0, 56
+    SRL $t9, $v1, 56
 #endif
 
 # else /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */
 #ifdef __mips64
-    SRL t0, v0, 56
-    beq t0, zero, L(wexit01)
-    SRL t1, v1, 56
-    bne t0, t1, L(wexit01)
+    SRL $t0, $v0, 56
+    beq $t0, $zero, L(wexit01)
+    SRL $t1, $v1, 56
+    bne $t0, $t1, L(wexit01)
     EXT_COMPARE89(48)
     EXT_COMPARE01(40)
     EXT_COMPARE89(32)
     EXT_COMPARE01(24)
 #else
-    SRL t0, v0, 24
-    beq t0, zero, L(wexit01)
-    SRL t1, v1, 24
-    bne t0, t1, L(wexit01)
+    SRL $t0, $v0, 24
+    beq $t0, $zero, L(wexit01)
+    SRL $t1, $v1, 24
+    bne $t0, $t1, L(wexit01)
 #endif
     EXT_COMPARE89(16)
     EXT_COMPARE01(8)
 
-    andi    t8, v0, 0xff
-    andi    t9, v1, 0xff
+    andi    $t8, $v0, 0xff
+    andi    $t9, $v1, 0xff
 # endif /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */
 
 L(wexit89):
-    j   ra
-    SUBU    v0, t8, t9
+    j   $ra
+    SUBU    $v0, $t8, $t9
 L(wexit01):
-    j   ra
-    SUBU    v0, t0, t1
+    j   $ra
+    SUBU    $v0, $t0, $t1
 #endif /* USE_CLZ */
 
 L(byteloop):
@@ -340,16 +340,16 @@
     BYTECMP89(5)
     BYTECMP01(6)
     BYTECMP89(7)
-    PTR_ADDIU a0, a0, 8
+    PTR_ADDIU $a0, $a0, 8
     b   L(byteloop)
-    PTR_ADDIU a1, a1, 8
+    PTR_ADDIU $a1, $a1, 8
 
 L(bexit01):
-    j   ra
-    SUBU    v0, v0, v1
+    j   $ra
+    SUBU    $v0, $v0, $v1
 L(bexit89):
-    j   ra
-    SUBU    v0, t8, t9
+    j   $ra
+    SUBU    $v0, $t8, $t9
 
     .set    at
     .set    reorder
diff --git a/libc/arch-mips/string/strncmp.S b/libc/arch-mips/string/strncmp.S
index 4867c44..49250a0 100644
--- a/libc/arch-mips/string/strncmp.S
+++ b/libc/arch-mips/string/strncmp.S
@@ -102,18 +102,18 @@
    instructions so that the nop is not needed but testing showed that this
    code is actually faster (based on glibc strcmp test).  */
 #define BYTECMP01(OFFSET) \
-    lbu v0, OFFSET(a0); \
-    lbu v1, OFFSET(a1); \
-    beq v0, zero, L(bexit01); \
+    lbu $v0, OFFSET($a0); \
+    lbu $v1, OFFSET($a1); \
+    beq $v0, $zero, L(bexit01); \
     nop; \
-    bne v0, v1, L(bexit01)
+    bne $v0, $v1, L(bexit01)
 
 #define BYTECMP89(OFFSET) \
-    lbu t8, OFFSET(a0); \
-    lbu t9, OFFSET(a1); \
-    beq t8, zero, L(bexit89); \
+    lbu $t8, OFFSET($a0); \
+    lbu $t9, OFFSET($a1); \
+    beq $t8, $zero, L(bexit89); \
     nop;    \
-    bne t8, t9, L(bexit89)
+    bne $t8, $t9, L(bexit89)
 
 /* Allow the routine to be named something else if desired.  */
 #ifndef STRNCMP_NAME
@@ -128,34 +128,34 @@
     .set    nomips16
     .set    noreorder
 
-    srl t0, a2, (2 + NSIZE / 4)
-    beqz  t0, L(byteloop) #process by bytes if less than (2 * NSIZE)
-    andi t1, a1, (NSIZE - 1)
-    beqz  t1, L(exitalign)
-    or   t0, zero, NSIZE
-    SUBU t1, t0, t1 #process (NSIZE - 1) bytes at max
-    SUBU a2, a2, t1 #dec count by t1
+    srl $t0, $a2, (2 + NSIZE / 4)
+    beqz  $t0, L(byteloop) #process by bytes if less than (2 * NSIZE)
+    andi $t1, $a1, (NSIZE - 1)
+    beqz  $t1, L(exitalign)
+    or   $t0, $zero, NSIZE
+    SUBU $t1, $t0, $t1 #process (NSIZE - 1) bytes at max
+    SUBU $a2, $a2, $t1 #dec count by t1
 
 L(alignloop): #do by bytes until a1 aligned
     BYTECMP01(0)
-    SUBU t1, t1, 0x1
-    PTR_ADDIU a0, a0, 0x1
-    bne  t1, zero, L(alignloop)
-    PTR_ADDIU a1, a1, 0x1
+    SUBU $t1, $t1, 0x1
+    PTR_ADDIU $a0, $a0, 0x1
+    bne  $t1, $zero, L(alignloop)
+    PTR_ADDIU $a1, $a1, 0x1
 
 L(exitalign):
 
 /* string a1 is NSIZE byte aligned at this point. */
 #ifndef __mips1
-    lui t8, 0x0101
-    ori t8, 0x0101
-    lui t9, 0x7f7f
-    ori t9, 0x7f7f
+    lui $t8, 0x0101
+    ori $t8, 0x0101
+    lui $t9, 0x7f7f
+    ori $t9, 0x7f7f
 #if __mips64
-    dsll t0, t8, 32
-    or  t8, t0
-    dsll t1, t9, 32
-    or  t9, t1
+    dsll $t0, $t8, 32
+    or  $t8, $t0
+    dsll $t1, $t9, 32
+    or  $t9, $t1
 #endif
 #endif
 
@@ -164,25 +164,25 @@
    remainings archs need to implemented with unaligned instructions */
 
 #if __mips1
-    andi t0, a0, (NSIZE - 1)
-    bne  t0, zero, L(byteloop)
+    andi $t0, $a0, (NSIZE - 1)
+    bne  $t0, $zero, L(byteloop)
 #elif __mips_isa_rev < 6
-    andi t0, a0, (NSIZE - 1)
-    bne  t0, zero, L(uwordloop)
+    andi $t0, $a0, (NSIZE - 1)
+    bne  $t0, $zero, L(uwordloop)
 #endif
 
 #define STRCMPW(OFFSET) \
-    LW   v0, (OFFSET)(a0); \
-    LW   v1, (OFFSET)(a1); \
-    SUBU t0, v0, t8; \
-    bne  v0, v1, L(worddiff); \
-    nor  t1, v0, t9; \
-    and  t0, t0, t1; \
-    bne  t0, zero, L(returnzero);\
+    LW   $v0, (OFFSET)($a0); \
+    LW   $v1, (OFFSET)($a1); \
+    SUBU $t0, $v0, $t8; \
+    bne  $v0, $v1, L(worddiff); \
+    nor  $t1, $v0, $t9; \
+    and  $t0, $t0, $t1; \
+    bne  $t0, $zero, L(returnzero);\
 
 L(wordloop):
-    SUBU t1, a2, (8 * NSIZE)
-    bltz t1, L(onewords)
+    SUBU $t1, $a2, (8 * NSIZE)
+    bltz $t1, L(onewords)
     STRCMPW(0 * NSIZE)
     DELAY_READ
     STRCMPW(1 * NSIZE)
@@ -198,34 +198,34 @@
     STRCMPW(6 * NSIZE)
     DELAY_READ
     STRCMPW(7 * NSIZE)
-    SUBU a2, a2, (8 * NSIZE)
-    PTR_ADDIU a0, a0, (8 * NSIZE)
+    SUBU $a2, $a2, (8 * NSIZE)
+    PTR_ADDIU $a0, $a0, (8 * NSIZE)
     b   L(wordloop)
-    PTR_ADDIU a1, a1, (8 * NSIZE)
+    PTR_ADDIU $a1, $a1, (8 * NSIZE)
 
 L(onewords):
-    SUBU t1, a2, NSIZE
-    bltz t1, L(byteloop)
+    SUBU $t1, $a2, NSIZE
+    bltz $t1, L(byteloop)
     STRCMPW(0)
-    SUBU a2, a2, NSIZE
-    PTR_ADDIU a0, a0, NSIZE
+    SUBU $a2, $a2, NSIZE
+    PTR_ADDIU $a0, $a0, NSIZE
     b   L(onewords)
-    PTR_ADDIU a1, a1, NSIZE
+    PTR_ADDIU $a1, $a1, NSIZE
 
 #if __mips_isa_rev < 6 && !__mips1
 #define USTRCMPW(OFFSET) \
-    LWR v0, (OFFSET)(a0); \
-    LWL v0, (OFFSET + NSIZE - 1)(a0); \
-    LW  v1, (OFFSET)(a1); \
-    SUBU    t0, v0, t8; \
-    bne v0, v1, L(worddiff); \
-    nor t1, v0, t9; \
-    and t0, t0, t1; \
-    bne t0, zero, L(returnzero);\
+    LWR $v0, (OFFSET)($a0); \
+    LWL $v0, (OFFSET + NSIZE - 1)($a0); \
+    LW  $v1, (OFFSET)($a1); \
+    SUBU    $t0, $v0, $t8; \
+    bne $v0, $v1, L(worddiff); \
+    nor $t1, $v0, $t9; \
+    and $t0, $t0, $t1; \
+    bne $t0, $zero, L(returnzero);\
 
 L(uwordloop):
-    SUBU t1, a2, (8 * NSIZE)
-    bltz t1, L(uonewords)
+    SUBU $t1, $a2, (8 * NSIZE)
+    bltz $t1, L(uonewords)
     USTRCMPW(0 * NSIZE)
     DELAY_READ
     USTRCMPW(1 * NSIZE)
@@ -241,154 +241,154 @@
     USTRCMPW(6 * NSIZE)
     DELAY_READ
     USTRCMPW(7 * NSIZE)
-    SUBU a2, a2, (8 * NSIZE)
-    PTR_ADDIU a0, a0, (8 * NSIZE)
+    SUBU $a2, $a2, (8 * NSIZE)
+    PTR_ADDIU $a0, $a0, (8 * NSIZE)
     b   L(uwordloop)
-    PTR_ADDIU a1, a1, (8 * NSIZE)
+    PTR_ADDIU $a1, $a1, (8 * NSIZE)
 
 L(uonewords):
-    SUBU t1, a2, NSIZE
-    bltz t1, L(byteloop)
+    SUBU $t1, $a2, NSIZE
+    bltz $t1, L(byteloop)
     USTRCMPW(0)
-    SUBU a2, a2, NSIZE
-    PTR_ADDIU a0, a0, NSIZE
+    SUBU $a2, $a2, NSIZE
+    PTR_ADDIU $a0, $a0, NSIZE
     b   L(uonewords)
-    PTR_ADDIU a1, a1, NSIZE
+    PTR_ADDIU $a1, $a1, NSIZE
 
 #endif
 
 L(returnzero):
-    j   ra
-    move    v0, zero
+    j   $ra
+    move    $v0, $zero
 
 #if __mips_isa_rev > 1
 #define EXT_COMPARE01(POS) \
-    EXT t0, v0, POS, 8; \
-    beq t0, zero, L(wexit01); \
-    EXT t1, v1, POS, 8; \
-    bne t0, t1, L(wexit01)
+    EXT $t0, $v0, POS, 8; \
+    beq $t0, $zero, L(wexit01); \
+    EXT $t1, $v1, POS, 8; \
+    bne $t0, $t1, L(wexit01)
 #define EXT_COMPARE89(POS) \
-    EXT t8, v0, POS, 8; \
-    beq t8, zero, L(wexit89); \
-    EXT t9, v1, POS, 8; \
-    bne t8, t9, L(wexit89)
+    EXT $t8, $v0, POS, 8; \
+    beq $t8, $zero, L(wexit89); \
+    EXT $t9, $v1, POS, 8; \
+    bne $t8, $t9, L(wexit89)
 #else
 #define EXT_COMPARE01(POS) \
-    SRL  t0, v0, POS; \
-    SRL  t1, v1, POS; \
-    andi t0, t0, 0xff; \
-    beq  t0, zero, L(wexit01); \
-    andi t1, t1, 0xff; \
-    bne  t0, t1, L(wexit01)
+    SRL  $t0, $v0, POS; \
+    SRL  $t1, $v1, POS; \
+    andi $t0, $t0, 0xff; \
+    beq  $t0, $zero, L(wexit01); \
+    andi $t1, $t1, 0xff; \
+    bne  $t0, $t1, L(wexit01)
 #define EXT_COMPARE89(POS) \
-    SRL  t8, v0, POS; \
-    SRL  t9, v1, POS; \
-    andi t8, t8, 0xff; \
-    beq  t8, zero, L(wexit89); \
-    andi t9, t9, 0xff; \
-    bne  t8, t9, L(wexit89)
+    SRL  $t8, $v0, POS; \
+    SRL  $t9, $v1, POS; \
+    andi $t8, $t8, 0xff; \
+    beq  $t8, $zero, L(wexit89); \
+    andi $t9, $t9, 0xff; \
+    bne  $t8, $t9, L(wexit89)
 #endif
 
 L(worddiff):
 #ifdef USE_CLZ
-    SUBU    t0, v0, t8
-    nor t1, v0, t9
-    and t1, t0, t1
-    xor t0, v0, v1
-    or  t0, t0, t1
+    SUBU    $t0, $v0, $t8
+    nor $t1, $v0, $t9
+    and $t1, $t0, $t1
+    xor $t0, $v0, $v1
+    or  $t0, $t0, $t1
 # if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-    wsbh    t0, t0
-    rotr    t0, t0, 16
+    wsbh    $t0, $t0
+    rotr    $t0, $t0, 16
 # endif
-    clz t1, t0
-    and t1, 0xf8
+    clz $t1, $t0
+    and $t1, 0xf8
 # if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
-    neg t1
-    addu    t1, 24
+    neg $t1
+    addu    $t1, 24
 # endif
-    rotrv   v0, v0, t1
-    rotrv   v1, v1, t1
-    and v0, v0, 0xff
-    and v1, v1, 0xff
-    j   ra
-    SUBU    v0, v0, v1
+    rotrv   $v0, $v0, $t1
+    rotrv   $v1, $v1, $t1
+    and $v0, $v0, 0xff
+    and $v1, $v1, 0xff
+    j   $ra
+    SUBU    $v0, $v0, $v1
 #else /* USE_CLZ */
 # if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-    andi    t0, v0, 0xff
-    beq t0, zero, L(wexit01)
-    andi    t1, v1, 0xff
-    bne t0, t1, L(wexit01)
+    andi    $t0, $v0, 0xff
+    beq $t0, $zero, L(wexit01)
+    andi    $t1, $v1, 0xff
+    bne $t0, $t1, L(wexit01)
     EXT_COMPARE89(8)
     EXT_COMPARE01(16)
 #ifndef __mips64
-    SRL t8, v0, 24
-    SRL t9, v1, 24
+    SRL $t8, $v0, 24
+    SRL $t9, $v1, 24
 #else
     EXT_COMPARE89(24)
     EXT_COMPARE01(32)
     EXT_COMPARE89(40)
     EXT_COMPARE01(48)
-    SRL t8, v0, 56
-    SRL t9, v1, 56
+    SRL $t8, $v0, 56
+    SRL $t9, $v1, 56
 #endif
 
 # else /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */
 #ifdef __mips64
-    SRL t0, v0, 56
-    beq t0, zero, L(wexit01)
-    SRL t1, v1, 56
-    bne t0, t1, L(wexit01)
+    SRL $t0, $v0, 56
+    beq $t0, $zero, L(wexit01)
+    SRL $t1, $v1, 56
+    bne $t0, $t1, L(wexit01)
     EXT_COMPARE89(48)
     EXT_COMPARE01(40)
     EXT_COMPARE89(32)
     EXT_COMPARE01(24)
 #else
-    SRL t0, v0, 24
-    beq t0, zero, L(wexit01)
-    SRL t1, v1, 24
-    bne t0, t1, L(wexit01)
+    SRL $t0, $v0, 24
+    beq $t0, $zero, L(wexit01)
+    SRL $t1, $v1, 24
+    bne $t0, $t1, L(wexit01)
 #endif
     EXT_COMPARE89(16)
     EXT_COMPARE01(8)
 
-    andi    t8, v0, 0xff
-    andi    t9, v1, 0xff
+    andi    $t8, $v0, 0xff
+    andi    $t9, $v1, 0xff
 # endif /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */
 
 L(wexit89):
-    j   ra
-    SUBU    v0, t8, t9
+    j   $ra
+    SUBU    $v0, $t8, $t9
 L(wexit01):
-    j   ra
-    SUBU    v0, t0, t1
+    j   $ra
+    SUBU    $v0, $t0, $t1
 #endif /* USE_CLZ */
 
 L(byteloop):
-    beq a2, zero, L(returnzero)
-    SUBU a2, a2, 1
+    beq $a2, $zero, L(returnzero)
+    SUBU $a2, $a2, 1
     BYTECMP01(0)
     nop
-    beq a2, zero, L(returnzero)
-    SUBU a2, a2, 1
+    beq $a2, $zero, L(returnzero)
+    SUBU $a2, $a2, 1
     BYTECMP89(1)
     nop
-    beq a2, zero, L(returnzero)
-    SUBU a2, a2, 1
+    beq $a2, $zero, L(returnzero)
+    SUBU $a2, $a2, 1
     BYTECMP01(2)
     nop
-    beq a2, zero, L(returnzero)
-    SUBU a2, a2, 1
+    beq $a2, $zero, L(returnzero)
+    SUBU $a2, $a2, 1
     BYTECMP89(3)
-    PTR_ADDIU a0, a0, 4
+    PTR_ADDIU $a0, $a0, 4
     b   L(byteloop)
-    PTR_ADDIU a1, a1, 4
+    PTR_ADDIU $a1, $a1, 4
 
 L(bexit01):
-    j   ra
-    SUBU    v0, v0, v1
+    j   $ra
+    SUBU    $v0, $v0, $v1
 L(bexit89):
-    j   ra
-    SUBU    v0, t8, t9
+    j   $ra
+    SUBU    $v0, $t8, $t9
 
     .set    at
     .set    reorder