Pierre Ossman | 82c7f31 | 2009-03-09 13:21:27 +0000 | [diff] [blame] | 1 | ; |
| 2 | ; jsimdext.inc - common declarations |
| 3 | ; |
| 4 | ; Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
| 5 | ; |
| 6 | ; Based on |
| 7 | ; x86 SIMD extension for IJG JPEG library - version 1.02 |
| 8 | ; |
| 9 | ; Copyright (C) 1999-2006, MIYASAKA Masaru. |
| 10 | ; |
| 11 | ; This software is provided 'as-is', without any express or implied |
| 12 | ; warranty. In no event will the authors be held liable for any damages |
| 13 | ; arising from the use of this software. |
| 14 | ; |
| 15 | ; Permission is granted to anyone to use this software for any purpose, |
| 16 | ; including commercial applications, and to alter it and redistribute it |
| 17 | ; freely, subject to the following restrictions: |
| 18 | ; |
| 19 | ; 1. The origin of this software must not be misrepresented; you must not |
| 20 | ; claim that you wrote the original software. If you use this software |
| 21 | ; in a product, an acknowledgment in the product documentation would be |
| 22 | ; appreciated but is not required. |
| 23 | ; 2. Altered source versions must be plainly marked as such, and must not be |
| 24 | ; misrepresented as being the original software. |
| 25 | ; 3. This notice may not be removed or altered from any source distribution. |
| 26 | ; |
| 27 | ; [TAB8] |
| 28 | |
| 29 | ; ========================================================================== |
| 30 | ; System-dependent configurations |
| 31 | |
| 32 | %ifdef WIN32 ; ----(nasm -fwin32 -DWIN32 ...)-------- |
| 33 | ; * Microsoft Visual C++ |
| 34 | ; * MinGW (Minimalist GNU for Windows) |
| 35 | ; * CygWin |
| 36 | ; * LCC-Win32 |
| 37 | |
| 38 | ; -- segment definition -- |
| 39 | ; |
| 40 | %define SEG_TEXT .text align=16 public use32 class=CODE |
| 41 | %define SEG_CONST .rdata align=16 public use32 class=CONST |
| 42 | |
| 43 | %elifdef OBJ32 ; ----(nasm -fobj -DOBJ32 ...)---------- |
| 44 | ; * Borland C++ (Win32) |
| 45 | |
| 46 | ; -- segment definition -- |
| 47 | ; |
| 48 | %define SEG_TEXT .text align=16 public use32 class=CODE |
| 49 | %define SEG_CONST .data align=16 public use32 class=DATA |
| 50 | |
DRC | 246c3d9 | 2009-06-25 20:38:31 +0000 | [diff] [blame] | 51 | %elifdef ELF ; ----(nasm -felf[64] -DELF ...)------------ |
Pierre Ossman | 82c7f31 | 2009-03-09 13:21:27 +0000 | [diff] [blame] | 52 | ; * Linux |
| 53 | ; * *BSD family Unix using elf format |
| 54 | ; * Unix System V, including Solaris x86, UnixWare and SCO Unix |
| 55 | |
| 56 | ; -- segment definition -- |
| 57 | ; |
DRC | 246c3d9 | 2009-06-25 20:38:31 +0000 | [diff] [blame] | 58 | %ifdef __x86_64__ |
| 59 | %define SEG_TEXT .text progbits align=16 |
| 60 | %define SEG_CONST .rodata progbits align=16 |
| 61 | %else |
Pierre Ossman | 82c7f31 | 2009-03-09 13:21:27 +0000 | [diff] [blame] | 62 | %define SEG_TEXT .text progbits alloc exec nowrite align=16 |
| 63 | %define SEG_CONST .rodata progbits alloc noexec nowrite align=16 |
DRC | 246c3d9 | 2009-06-25 20:38:31 +0000 | [diff] [blame] | 64 | %endif |
Pierre Ossman | 82c7f31 | 2009-03-09 13:21:27 +0000 | [diff] [blame] | 65 | |
| 66 | ; To make the code position-independent, append -DPIC to the commandline |
| 67 | ; |
| 68 | %define GOT_SYMBOL _GLOBAL_OFFSET_TABLE_ ; ELF supports PIC |
| 69 | %define EXTN(name) name ; foo() -> foo |
| 70 | |
| 71 | %elifdef AOUT ; ----(nasm -faoutb/aout -DAOUT ...)---- |
| 72 | ; * Older Linux using a.out format (nasm -f aout -DAOUT ...) |
| 73 | ; * *BSD family Unix using a.out format (nasm -f aoutb -DAOUT ...) |
| 74 | |
| 75 | ; -- segment definition -- |
| 76 | ; |
| 77 | %define SEG_TEXT .text |
| 78 | %define SEG_CONST .data |
| 79 | |
| 80 | ; To make the code position-independent, append -DPIC to the commandline |
| 81 | ; |
| 82 | %define GOT_SYMBOL __GLOBAL_OFFSET_TABLE_ ; BSD-style a.out supports PIC |
| 83 | |
| 84 | %elifdef MACHO ; ----(nasm -fmacho -DMACHO ...)-------- |
| 85 | ; * NeXTstep/OpenStep/Rhapsody/Darwin/MacOS X (Mach-O format) |
| 86 | |
| 87 | ; -- segment definition -- |
| 88 | ; |
| 89 | %define SEG_TEXT .text ;align=16 ; nasm doesn't accept align=16. why? |
| 90 | %define SEG_CONST .rodata align=16 |
| 91 | |
| 92 | ; The generation of position-independent code (PIC) is the default on Darwin. |
| 93 | ; |
| 94 | %define PIC |
| 95 | %define GOT_SYMBOL _MACHO_PIC_ ; Mach-O style code-relative addressing |
| 96 | |
| 97 | %else ; ----(Other case)---------------------- |
| 98 | |
| 99 | ; -- segment definition -- |
| 100 | ; |
| 101 | %define SEG_TEXT .text |
| 102 | %define SEG_CONST .data |
| 103 | |
| 104 | %endif ; ---------------------------------------------- |
| 105 | |
| 106 | ; ========================================================================== |
| 107 | |
| 108 | ; -------------------------------------------------------------------------- |
| 109 | ; Common types |
| 110 | ; |
DRC | 246c3d9 | 2009-06-25 20:38:31 +0000 | [diff] [blame] | 111 | %ifdef __x86_64__ |
| 112 | %define POINTER qword ; general pointer type |
| 113 | %define SIZEOF_POINTER SIZEOF_QWORD ; sizeof(POINTER) |
| 114 | %define POINTER_BIT QWORD_BIT ; sizeof(POINTER)*BYTE_BIT |
| 115 | %else |
Pierre Ossman | 82c7f31 | 2009-03-09 13:21:27 +0000 | [diff] [blame] | 116 | %define POINTER dword ; general pointer type |
| 117 | %define SIZEOF_POINTER SIZEOF_DWORD ; sizeof(POINTER) |
| 118 | %define POINTER_BIT DWORD_BIT ; sizeof(POINTER)*BYTE_BIT |
DRC | 246c3d9 | 2009-06-25 20:38:31 +0000 | [diff] [blame] | 119 | %endif |
Pierre Ossman | 82c7f31 | 2009-03-09 13:21:27 +0000 | [diff] [blame] | 120 | |
Pierre Ossman | 3e0e2de | 2009-03-09 13:25:30 +0000 | [diff] [blame] | 121 | %define INT dword ; signed integer type |
| 122 | %define SIZEOF_INT SIZEOF_DWORD ; sizeof(INT) |
| 123 | %define INT_BIT DWORD_BIT ; sizeof(INT)*BYTE_BIT |
| 124 | |
Pierre Ossman | 2c2e54b | 2009-03-09 13:28:10 +0000 | [diff] [blame] | 125 | %define FP32 dword ; IEEE754 single |
| 126 | %define SIZEOF_FP32 SIZEOF_DWORD ; sizeof(FP32) |
| 127 | %define FP32_BIT DWORD_BIT ; sizeof(FP32)*BYTE_BIT |
| 128 | |
Pierre Ossman | 3e0e2de | 2009-03-09 13:25:30 +0000 | [diff] [blame] | 129 | %define MMWORD qword ; int64 (MMX register) |
| 130 | %define SIZEOF_MMWORD SIZEOF_QWORD ; sizeof(MMWORD) |
| 131 | %define MMWORD_BIT QWORD_BIT ; sizeof(MMWORD)*BYTE_BIT |
| 132 | |
Pierre Ossman | 0d37c57 | 2009-03-09 13:31:56 +0000 | [diff] [blame] | 133 | ; NASM is buggy and doesn't properly handle operand sizes for SSE |
| 134 | ; instructions, so for now we have to define XMMWORD as blank. |
| 135 | %define XMMWORD ; int128 (SSE register) |
| 136 | %define SIZEOF_XMMWORD SIZEOF_OWORD ; sizeof(XMMWORD) |
| 137 | %define XMMWORD_BIT OWORD_BIT ; sizeof(XMMWORD)*BYTE_BIT |
| 138 | |
Pierre Ossman | 7469386 | 2009-03-09 13:34:17 +0000 | [diff] [blame] | 139 | ; Similar hacks for when we load a dword or MMWORD into an xmm# register |
| 140 | %define XMM_DWORD |
| 141 | %define XMM_MMWORD |
| 142 | |
Pierre Ossman | 3e0e2de | 2009-03-09 13:25:30 +0000 | [diff] [blame] | 143 | %define SIZEOF_BYTE 1 ; sizeof(BYTE) |
| 144 | %define SIZEOF_WORD 2 ; sizeof(WORD) |
| 145 | %define SIZEOF_DWORD 4 ; sizeof(DWORD) |
| 146 | %define SIZEOF_QWORD 8 ; sizeof(QWORD) |
Pierre Ossman | 0d37c57 | 2009-03-09 13:31:56 +0000 | [diff] [blame] | 147 | %define SIZEOF_OWORD 16 ; sizeof(OWORD) |
Pierre Ossman | 3e0e2de | 2009-03-09 13:25:30 +0000 | [diff] [blame] | 148 | |
| 149 | %define BYTE_BIT 8 ; CHAR_BIT in C |
| 150 | %define WORD_BIT 16 ; sizeof(WORD)*BYTE_BIT |
| 151 | %define DWORD_BIT 32 ; sizeof(DWORD)*BYTE_BIT |
| 152 | %define QWORD_BIT 64 ; sizeof(QWORD)*BYTE_BIT |
Pierre Ossman | 0d37c57 | 2009-03-09 13:31:56 +0000 | [diff] [blame] | 153 | %define OWORD_BIT 128 ; sizeof(OWORD)*BYTE_BIT |
Pierre Ossman | 3e0e2de | 2009-03-09 13:25:30 +0000 | [diff] [blame] | 154 | |
Pierre Ossman | 82c7f31 | 2009-03-09 13:21:27 +0000 | [diff] [blame] | 155 | ; -------------------------------------------------------------------------- |
| 156 | ; External Symbol Name |
| 157 | ; |
| 158 | %ifndef EXTN |
| 159 | %define EXTN(name) _ %+ name ; foo() -> _foo |
| 160 | %endif |
| 161 | |
| 162 | ; -------------------------------------------------------------------------- |
| 163 | ; Macros for position-independent code (PIC) support |
| 164 | ; |
| 165 | %ifndef GOT_SYMBOL |
| 166 | %undef PIC |
| 167 | %endif |
| 168 | |
| 169 | %ifdef PIC ; ------------------------------------------- |
| 170 | |
| 171 | %ifidn GOT_SYMBOL,_MACHO_PIC_ ; -------------------- |
| 172 | |
| 173 | ; At present, nasm doesn't seem to support PIC generation for Mach-O. |
| 174 | ; The PIC support code below is a little tricky. |
| 175 | |
| 176 | SECTION SEG_CONST |
| 177 | const_base: |
| 178 | |
| 179 | %define GOTOFF(got,sym) (got) + (sym) - const_base |
| 180 | |
| 181 | %imacro get_GOT 1 |
| 182 | ; NOTE: this macro destroys ecx resister. |
| 183 | call %%geteip |
| 184 | add ecx, byte (%%ref - $) |
| 185 | jmp short %%adjust |
| 186 | %%geteip: |
| 187 | mov ecx, POINTER [esp] |
| 188 | ret |
| 189 | %%adjust: |
| 190 | push ebp |
| 191 | xor ebp,ebp ; ebp = 0 |
| 192 | %ifidni %1,ebx ; (%1 == ebx) |
| 193 | ; db 0x8D,0x9C + jmp near const_base = |
| 194 | ; lea ebx, [ecx+ebp*8+(const_base-%%ref)] ; 8D,9C,E9,(offset32) |
| 195 | db 0x8D,0x9C ; 8D,9C |
| 196 | jmp near const_base ; E9,(const_base-%%ref) |
| 197 | %%ref: |
| 198 | %else ; (%1 != ebx) |
| 199 | ; db 0x8D,0x8C + jmp near const_base = |
| 200 | ; lea ecx, [ecx+ebp*8+(const_base-%%ref)] ; 8D,8C,E9,(offset32) |
| 201 | db 0x8D,0x8C ; 8D,8C |
| 202 | jmp near const_base ; E9,(const_base-%%ref) |
| 203 | %%ref: mov %1, ecx |
| 204 | %endif ; (%1 == ebx) |
| 205 | pop ebp |
| 206 | %endmacro |
| 207 | |
| 208 | %else ; GOT_SYMBOL != _MACHO_PIC_ ---------------- |
| 209 | |
| 210 | %define GOTOFF(got,sym) (got) + (sym) wrt ..gotoff |
| 211 | |
| 212 | %imacro get_GOT 1 |
| 213 | extern GOT_SYMBOL |
| 214 | call %%geteip |
| 215 | add %1, GOT_SYMBOL + $$ - $ wrt ..gotpc |
| 216 | jmp short %%done |
| 217 | %%geteip: |
| 218 | mov %1, POINTER [esp] |
| 219 | ret |
| 220 | %%done: |
| 221 | %endmacro |
| 222 | |
| 223 | %endif ; GOT_SYMBOL == _MACHO_PIC_ ---------------- |
| 224 | |
| 225 | %imacro pushpic 1.nolist |
| 226 | push %1 |
| 227 | %endmacro |
| 228 | %imacro poppic 1.nolist |
| 229 | pop %1 |
| 230 | %endmacro |
| 231 | %imacro movpic 2.nolist |
| 232 | mov %1,%2 |
| 233 | %endmacro |
| 234 | |
| 235 | %else ; !PIC ----------------------------------------- |
| 236 | |
| 237 | %define GOTOFF(got,sym) (sym) |
| 238 | |
| 239 | %imacro get_GOT 1.nolist |
| 240 | %endmacro |
| 241 | %imacro pushpic 1.nolist |
| 242 | %endmacro |
| 243 | %imacro poppic 1.nolist |
| 244 | %endmacro |
| 245 | %imacro movpic 2.nolist |
| 246 | %endmacro |
| 247 | |
| 248 | %endif ; PIC ----------------------------------------- |
| 249 | |
| 250 | ; -------------------------------------------------------------------------- |
| 251 | ; Align the next instruction on {2,4,8,16,..}-byte boundary. |
| 252 | ; ".balign n,,m" in GNU as |
| 253 | ; |
| 254 | %define MSKLE(x,y) (~(((y) & 0xFFFF) - ((x) & 0xFFFF)) >> 16) |
| 255 | %define FILLB(b,n) (($$-(b)) & ((n)-1)) |
| 256 | |
| 257 | %imacro alignx 1-2.nolist 0xFFFF |
| 258 | %%bs: times MSKLE(FILLB(%%bs,%1),%2) & MSKLE(16,FILLB($,%1)) & FILLB($,%1) \ |
| 259 | db 0x90 ; nop |
| 260 | times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/9 \ |
| 261 | db 0x8D,0x9C,0x23,0x00,0x00,0x00,0x00 ; lea ebx,[ebx+0x00000000] |
| 262 | times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/7 \ |
| 263 | db 0x8D,0xAC,0x25,0x00,0x00,0x00,0x00 ; lea ebp,[ebp+0x00000000] |
| 264 | times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/6 \ |
| 265 | db 0x8D,0xAD,0x00,0x00,0x00,0x00 ; lea ebp,[ebp+0x00000000] |
| 266 | times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/4 \ |
| 267 | db 0x8D,0x6C,0x25,0x00 ; lea ebp,[ebp+0x00] |
| 268 | times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/3 \ |
| 269 | db 0x8D,0x6D,0x00 ; lea ebp,[ebp+0x00] |
| 270 | times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/2 \ |
| 271 | db 0x8B,0xED ; mov ebp,ebp |
| 272 | times MSKLE(FILLB(%%bs,%1),%2) & FILLB($,%1)/1 \ |
| 273 | db 0x90 ; nop |
| 274 | %endmacro |
| 275 | |
| 276 | ; Align the next data on {2,4,8,16,..}-byte boundary. |
| 277 | ; |
| 278 | %imacro alignz 1.nolist |
| 279 | align %1, db 0 ; filling zeros |
| 280 | %endmacro |
| 281 | |
DRC | 246c3d9 | 2009-06-25 20:38:31 +0000 | [diff] [blame] | 282 | %ifdef __x86_64__ |
| 283 | %imacro collect_args 0 |
| 284 | push r10 |
| 285 | push r11 |
| 286 | push r12 |
| 287 | push r13 |
| 288 | push r14 |
| 289 | push r15 |
| 290 | mov r10, rdi |
| 291 | mov r11, rsi |
| 292 | mov r12, rdx |
| 293 | mov r13, rcx |
| 294 | mov r14, r8 |
| 295 | mov r15, r9 |
| 296 | %endmacro |
| 297 | |
| 298 | %imacro uncollect_args 0 |
| 299 | pop r15 |
| 300 | pop r14 |
| 301 | pop r13 |
| 302 | pop r12 |
| 303 | pop r11 |
| 304 | pop r10 |
| 305 | %endmacro |
| 306 | |
| 307 | %endif |
Pierre Ossman | 82c7f31 | 2009-03-09 13:21:27 +0000 | [diff] [blame] | 308 | |
| 309 | ; -------------------------------------------------------------------------- |
| 310 | ; Defines picked up from the C headers |
| 311 | ; |
Pierre Ossman | 39170cf | 2009-03-16 13:34:18 +0000 | [diff] [blame] | 312 | %include "jsimdcfg.inc" |
Pierre Ossman | 82c7f31 | 2009-03-09 13:21:27 +0000 | [diff] [blame] | 313 | |
| 314 | ; -------------------------------------------------------------------------- |