| Andy McFadden | 4ce737f | 2011-02-04 14:45:57 -0800 | [diff] [blame] | 1 | /* | 
 | 2 |  * Copyright (C) 2011 The Android Open Source Project | 
 | 3 |  * | 
 | 4 |  * Licensed under the Apache License, Version 2.0 (the "License"); | 
 | 5 |  * you may not use this file except in compliance with the License. | 
 | 6 |  * You may obtain a copy of the License at | 
 | 7 |  * | 
 | 8 |  *      http://www.apache.org/licenses/LICENSE-2.0 | 
 | 9 |  * | 
 | 10 |  * Unless required by applicable law or agreed to in writing, software | 
 | 11 |  * distributed under the License is distributed on an "AS IS" BASIS, | 
 | 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
 | 13 |  * See the License for the specific language governing permissions and | 
 | 14 |  * limitations under the License. | 
 | 15 |  */ | 
 | 16 |  | 
 | 17 | #include <stdlib.h> | 
| Carl Shapiro | 2cc2b2b | 2011-03-21 20:01:03 -0700 | [diff] [blame] | 18 | #include <stdint.h> | 
| Andy McFadden | 4ce737f | 2011-02-04 14:45:57 -0800 | [diff] [blame] | 19 | #include <assert.h> | 
 | 20 |  | 
 | 21 | /* | 
 | 22 |  * Works like memmove(), except: | 
 | 23 |  * - if all arguments are at least 32-bit aligned, we guarantee that we | 
 | 24 |  *   will use operations that preserve atomicity of 32-bit values | 
 | 25 |  * - if not, we guarantee atomicity of 16-bit values | 
 | 26 |  * | 
 | 27 |  * If all three arguments are not at least 16-bit aligned, the behavior | 
 | 28 |  * of this function is undefined.  (We could remove this restriction by | 
 | 29 |  * testing for unaligned values and punting to memmove(), but that's | 
 | 30 |  * not currently useful.) | 
 | 31 |  * | 
 | 32 |  * TODO: add loop for 64-bit alignment | 
 | 33 |  * TODO: use __builtin_prefetch | 
 | 34 |  * TODO: write an ARM-optimized version | 
 | 35 |  */ | 
 | 36 | void _memmove_words(void* dest, const void* src, size_t n) | 
 | 37 | { | 
 | 38 |     assert((((uintptr_t) dest | (uintptr_t) src | n) & 0x01) == 0); | 
 | 39 |  | 
 | 40 |     char* d = (char*) dest; | 
 | 41 |     const char* s = (const char*) src; | 
 | 42 |     size_t copyCount; | 
 | 43 |  | 
 | 44 |     /* | 
 | 45 |      * If the source and destination pointers are the same, this is | 
 | 46 |      * an expensive no-op.  Testing for an empty move now allows us | 
 | 47 |      * to skip a check later. | 
 | 48 |      */ | 
 | 49 |     if (n == 0 || d == s) | 
 | 50 |         return; | 
 | 51 |  | 
 | 52 |     /* | 
 | 53 |      * Determine if the source and destination buffers will overlap if | 
 | 54 |      * we copy data forward (i.e. *dest++ = *src++). | 
 | 55 |      * | 
 | 56 |      * It's okay if the destination buffer starts before the source and | 
 | 57 |      * there is some overlap, because the reader is always ahead of the | 
 | 58 |      * writer. | 
 | 59 |      */ | 
 | 60 |     if (__builtin_expect((d < s) || ((size_t)(d - s) >= n), 1)) { | 
 | 61 |         /* | 
 | 62 |          * Copy forward.  We prefer 32-bit loads and stores even for 16-bit | 
 | 63 |          * data, so sort that out. | 
 | 64 |          */ | 
 | 65 |         if ((((uintptr_t) d | (uintptr_t) s) & 0x03) != 0) { | 
 | 66 |             /* | 
 | 67 |              * Not 32-bit aligned.  Two possibilities: | 
 | 68 |              * (1) Congruent, we can align to 32-bit by copying one 16-bit val | 
 | 69 |              * (2) Non-congruent, we can do one of: | 
 | 70 |              *   a. copy whole buffer as a series of 16-bit values | 
 | 71 |              *   b. load/store 32 bits, using shifts to ensure alignment | 
 | 72 |              *   c. just copy the as 32-bit values and assume the CPU | 
 | 73 |              *      will do a reasonable job | 
 | 74 |              * | 
 | 75 |              * We're currently using (a), which is suboptimal. | 
 | 76 |              */ | 
 | 77 |             if ((((uintptr_t) d ^ (uintptr_t) s) & 0x03) != 0) { | 
 | 78 |                 copyCount = n; | 
 | 79 |             } else { | 
 | 80 |                 copyCount = 2; | 
 | 81 |             } | 
 | 82 |             n -= copyCount; | 
 | 83 |             copyCount /= sizeof(uint16_t); | 
 | 84 |  | 
 | 85 |             while (copyCount--) { | 
 | 86 |                 *(uint16_t*)d = *(uint16_t*)s; | 
 | 87 |                 d += sizeof(uint16_t); | 
 | 88 |                 s += sizeof(uint16_t); | 
 | 89 |             } | 
 | 90 |         } | 
 | 91 |  | 
 | 92 |         /* | 
 | 93 |          * Copy 32-bit aligned words. | 
 | 94 |          */ | 
 | 95 |         copyCount = n / sizeof(uint32_t); | 
 | 96 |         while (copyCount--) { | 
 | 97 |             *(uint32_t*)d = *(uint32_t*)s; | 
 | 98 |             d += sizeof(uint32_t); | 
 | 99 |             s += sizeof(uint32_t); | 
 | 100 |         } | 
 | 101 |  | 
 | 102 |         /* | 
 | 103 |          * Check for leftovers.  Either we finished exactly, or we have | 
 | 104 |          * one remaining 16-bit chunk. | 
 | 105 |          */ | 
 | 106 |         if ((n & 0x02) != 0) { | 
 | 107 |             *(uint16_t*)d = *(uint16_t*)s; | 
 | 108 |         } | 
 | 109 |     } else { | 
 | 110 |         /* | 
 | 111 |          * Copy backward, starting at the end. | 
 | 112 |          */ | 
 | 113 |         d += n; | 
 | 114 |         s += n; | 
 | 115 |  | 
 | 116 |         if ((((uintptr_t) d | (uintptr_t) s) & 0x03) != 0) { | 
 | 117 |             /* try for 32-bit alignment */ | 
 | 118 |             if ((((uintptr_t) d ^ (uintptr_t) s) & 0x03) != 0) { | 
 | 119 |                 copyCount = n; | 
 | 120 |             } else { | 
 | 121 |                 copyCount = 2; | 
 | 122 |             } | 
 | 123 |             n -= copyCount; | 
 | 124 |             copyCount /= sizeof(uint16_t); | 
 | 125 |  | 
 | 126 |             while (copyCount--) { | 
 | 127 |                 d -= sizeof(uint16_t); | 
 | 128 |                 s -= sizeof(uint16_t); | 
 | 129 |                 *(uint16_t*)d = *(uint16_t*)s; | 
 | 130 |             } | 
 | 131 |         } | 
 | 132 |  | 
 | 133 |         /* copy 32-bit aligned words */ | 
 | 134 |         copyCount = n / sizeof(uint32_t); | 
 | 135 |         while (copyCount--) { | 
 | 136 |             d -= sizeof(uint32_t); | 
 | 137 |             s -= sizeof(uint32_t); | 
 | 138 |             *(uint32_t*)d = *(uint32_t*)s; | 
 | 139 |         } | 
 | 140 |  | 
 | 141 |         /* copy leftovers */ | 
 | 142 |         if ((n & 0x02) != 0) { | 
 | 143 |             d -= sizeof(uint16_t); | 
 | 144 |             s -= sizeof(uint16_t); | 
 | 145 |             *(uint16_t*)d = *(uint16_t*)s; | 
 | 146 |         } | 
 | 147 |     } | 
 | 148 | } |