improve GLES jumptables

in the common case this saves one instructions per jump
(which will help with the i-cache).

this change also gets rid of the "use slow tls" option,
which was useless. So at least now architectures that don't have
assembly bindings will perform much better.

Change-Id: I31be6c06ad2136b50ef3a1ac14682d7812ad40d2
diff --git a/opengl/libs/EGL/egl.cpp b/opengl/libs/EGL/egl.cpp
index 6ac8724..86637dc 100644
--- a/opengl/libs/EGL/egl.cpp
+++ b/opengl/libs/EGL/egl.cpp
@@ -230,9 +230,6 @@
 
 static void early_egl_init(void)
 {
-#if !USE_FAST_TLS_KEY
-    pthread_key_create(&gGLWrapperKey, NULL);
-#endif
 #if EGL_TRACE
     pthread_key_create(&gGLTraceKey, NULL);
     initEglTraceLevel();
@@ -341,42 +338,11 @@
 
 // ----------------------------------------------------------------------------
 
-#if USE_FAST_TLS_KEY
-
-// We have a dedicated TLS slot in bionic
-static inline gl_hooks_t const * volatile * get_tls_hooks() {
-    volatile void *tls_base = __get_tls();
-    gl_hooks_t const * volatile * tls_hooks =
-            reinterpret_cast<gl_hooks_t const * volatile *>(tls_base);
-    return tls_hooks;
-}
-
 void setGlThreadSpecific(gl_hooks_t const *value) {
     gl_hooks_t const * volatile * tls_hooks = get_tls_hooks();
     tls_hooks[TLS_SLOT_OPENGL_API] = value;
 }
 
-gl_hooks_t const* getGlThreadSpecific() {
-    gl_hooks_t const * volatile * tls_hooks = get_tls_hooks();
-    gl_hooks_t const* hooks = tls_hooks[TLS_SLOT_OPENGL_API];
-    if (hooks) return hooks;
-    return &gHooksNoContext;
-}
-
-#else
-
-void setGlThreadSpecific(gl_hooks_t const *value) {
-    pthread_setspecific(gGLWrapperKey, value);
-}
-
-gl_hooks_t const* getGlThreadSpecific() {
-    gl_hooks_t const* hooks =  static_cast<gl_hooks_t*>(pthread_getspecific(gGLWrapperKey));
-    if (hooks) return hooks;
-    return &gHooksNoContext;
-}
-
-#endif
-
 // ----------------------------------------------------------------------------
 // GL / EGL hooks
 // ----------------------------------------------------------------------------
diff --git a/opengl/libs/EGL/eglApi.cpp b/opengl/libs/EGL/eglApi.cpp
index 2bc9851..0358fcc 100644
--- a/opengl/libs/EGL/eglApi.cpp
+++ b/opengl/libs/EGL/eglApi.cpp
@@ -849,9 +849,7 @@
             }
 
             if (found) {
-#if USE_FAST_TLS_KEY
                 addr = gExtensionForwarders[slot];
-#endif
                 sGLExtentionMap.add(name, addr);
                 sGLExtentionSlot++;
             }
diff --git a/opengl/libs/EGL/getProcAddress.cpp b/opengl/libs/EGL/getProcAddress.cpp
index c160aa0..add2a79 100644
--- a/opengl/libs/EGL/getProcAddress.cpp
+++ b/opengl/libs/EGL/getProcAddress.cpp
@@ -34,9 +34,7 @@
 #undef GL_EXTENSION_LIST
 #undef GET_TLS
 
-#if USE_FAST_TLS_KEY
-
-    #if defined(__arm__)
+#if defined(__arm__)
 
     #define GET_TLS(reg) "mrc p15, 0, " #reg ", c13, c0, 3 \n"
 
@@ -58,7 +56,7 @@
             :                                                   \
             );
 
-    #elif defined(__mips__)
+#elif defined(__mips__)
 
         #define API_ENTRY(_api) __attribute__((noinline)) _api
 
@@ -88,27 +86,21 @@
                                           ext.extensions[_api]))    \
                 :                                                   \
             );
+#endif
 
-    #else
-        #error Unsupported architecture
-    #endif
-
+#if defined(CALL_GL_EXTENSION_API)
     #define GL_EXTENSION_NAME(_n)   __glExtFwd##_n
 
     #define GL_EXTENSION(_n)                         \
         void API_ENTRY(GL_EXTENSION_NAME(_n))() {    \
             CALL_GL_EXTENSION_API(_n);               \
         }
-
-
 #else
+        #define GL_EXTENSION_NAME(_n) NULL
 
-    #define GL_EXTENSION_NAME(_n) NULL
+        #define GL_EXTENSION(_n)
 
-    #define GL_EXTENSION(_n)
-
-    #warning "eglGetProcAddress() partially supported"
-
+        #warning "eglGetProcAddress() partially supported"
 #endif
 
 
diff --git a/opengl/libs/GLES2/gl2.cpp b/opengl/libs/GLES2/gl2.cpp
index fad2176..3134e56 100644
--- a/opengl/libs/GLES2/gl2.cpp
+++ b/opengl/libs/GLES2/gl2.cpp
@@ -40,13 +40,11 @@
 #undef CALL_GL_API
 #undef CALL_GL_API_RETURN
 
-#if USE_FAST_TLS_KEY
-
-  #if defined(__arm__)
+#if defined(__arm__) && !USE_SLOW_BINDING
 
     #define GET_TLS(reg) "mrc p15, 0, " #reg ", c13, c0, 3 \n"
 
-    #define API_ENTRY(_api) __attribute__((naked)) _api
+    #define API_ENTRY(_api) __attribute__((noinline)) _api
 
     #define CALL_GL_API(_api, ...)                              \
          asm volatile(                                          \
@@ -54,15 +52,13 @@
             "ldr   r12, [r12, %[tls]] \n"                       \
             "cmp   r12, #0            \n"                       \
             "ldrne pc,  [r12, %[api]] \n"                       \
-            "mov   r0, #0             \n"                       \
-            "bx    lr                 \n"                       \
             :                                                   \
             : [tls] "J"(TLS_SLOT_OPENGL_API*4),                 \
               [api] "J"(__builtin_offsetof(gl_hooks_t, gl._api))    \
             :                                                   \
             );
 
-  #elif defined(__mips__)
+#elif defined(__mips__) && !USE_SLOW_BINDING
 
     #define API_ENTRY(_api) __attribute__((noinline)) _api
 
@@ -94,30 +90,21 @@
             :                                                    \
             );
 
-  #else
-
-    #error Unsupported architecture
-
-  #endif
-
-    #define CALL_GL_API_RETURN(_api, ...) \
-        CALL_GL_API(_api, __VA_ARGS__) \
-        return 0; // placate gcc's warnings. never reached.
-
 #else
 
     #define API_ENTRY(_api) _api
 
     #define CALL_GL_API(_api, ...)                                       \
         gl_hooks_t::gl_t const * const _c = &getGlThreadSpecific()->gl;  \
-        _c->_api(__VA_ARGS__);
-
-    #define CALL_GL_API_RETURN(_api, ...)                                \
-        gl_hooks_t::gl_t const * const _c = &getGlThreadSpecific()->gl;  \
-        return _c->_api(__VA_ARGS__)
+        if (_c) return _c->_api(__VA_ARGS__);
 
 #endif
 
+#define CALL_GL_API_RETURN(_api, ...) \
+    CALL_GL_API(_api, __VA_ARGS__) \
+    return 0;
+
+
 
 extern "C" {
 #include "gl3_api.in"
@@ -139,7 +126,8 @@
 {
     const GLubyte * ret = egl_get_string_for_current_context(name);
     if (ret == NULL) {
-        ret = __glGetString(name);
+        gl_hooks_t::gl_t const * const _c = &getGlThreadSpecific()->gl;
+        ret = _c->glGetString(name);
     }
     return ret;
 }
diff --git a/opengl/libs/GLES_CM/gl.cpp b/opengl/libs/GLES_CM/gl.cpp
index a5bbdc6..18ef6f9 100644
--- a/opengl/libs/GLES_CM/gl.cpp
+++ b/opengl/libs/GLES_CM/gl.cpp
@@ -31,9 +31,6 @@
 
 using namespace android;
 
-// set this to 1 for crude GL debugging
-#define CHECK_FOR_GL_ERRORS     0
-
 // ----------------------------------------------------------------------------
 // extensions for the framework
 // ----------------------------------------------------------------------------
@@ -95,13 +92,11 @@
 #undef CALL_GL_API
 #undef CALL_GL_API_RETURN
 
-#if USE_FAST_TLS_KEY && !CHECK_FOR_GL_ERRORS
-
-  #if defined(__arm__)
+#if defined(__arm__) && !USE_SLOW_BINDING
 
     #define GET_TLS(reg) "mrc p15, 0, " #reg ", c13, c0, 3 \n"
 
-    #define API_ENTRY(_api) __attribute__((naked)) _api
+    #define API_ENTRY(_api) __attribute__((noinline)) _api
 
     #define CALL_GL_API(_api, ...)                              \
          asm volatile(                                          \
@@ -109,15 +104,13 @@
             "ldr   r12, [r12, %[tls]] \n"                       \
             "cmp   r12, #0            \n"                       \
             "ldrne pc,  [r12, %[api]] \n"                       \
-            "mov   r0, #0             \n"                       \
-            "bx    lr                 \n"                       \
             :                                                   \
             : [tls] "J"(TLS_SLOT_OPENGL_API*4),                 \
               [api] "J"(__builtin_offsetof(gl_hooks_t, gl._api))    \
             :                                                   \
             );
 
-  #elif defined(__mips__)
+#elif defined(__mips__) && !USE_SLOW_BINDING
 
     #define API_ENTRY(_api) __attribute__((noinline)) _api
 
@@ -149,43 +142,20 @@
             :                                                    \
             );
 
-  #else
-    #error Unsupported architecture
-  #endif
-
-    #define CALL_GL_API_RETURN(_api, ...) \
-        CALL_GL_API(_api, __VA_ARGS__) \
-        return 0; // placate gcc's warnings. never reached.
-
 #else
 
-    #if CHECK_FOR_GL_ERRORS
-    
-        #define CHECK_GL_ERRORS(_api) \
-            do { GLint err = glGetError(); \
-                ALOGE_IF(err != GL_NO_ERROR, "%s failed (0x%04X)", #_api, err); \
-            } while(false);
-
-    #else
-
-        #define CHECK_GL_ERRORS(_api) do { } while(false);
-
-    #endif
-
-
     #define API_ENTRY(_api) _api
 
-    #define CALL_GL_API(_api, ...)                                      \
-        gl_hooks_t::gl_t const * const _c = &getGlThreadSpecific()->gl; \
-        _c->_api(__VA_ARGS__);                                          \
-        CHECK_GL_ERRORS(_api)
-
-    #define CALL_GL_API_RETURN(_api, ...)                               \
-        gl_hooks_t::gl_t const * const _c = &getGlThreadSpecific()->gl; \
-        return _c->_api(__VA_ARGS__)
+    #define CALL_GL_API(_api, ...)                                       \
+        gl_hooks_t::gl_t const * const _c = &getGlThreadSpecific()->gl;  \
+        if (_c) return _c->_api(__VA_ARGS__);
 
 #endif
 
+#define CALL_GL_API_RETURN(_api, ...) \
+    CALL_GL_API(_api, __VA_ARGS__) \
+    return 0;
+
 
 extern "C" {
 #include "gl_api.in"
@@ -202,11 +172,11 @@
 
 extern "C" const GLubyte * __glGetString(GLenum name);
 
-const GLubyte * glGetString(GLenum name)
-{
+const GLubyte * glGetString(GLenum name) {
     const GLubyte * ret = egl_get_string_for_current_context(name);
     if (ret == NULL) {
-        ret = __glGetString(name);
+        gl_hooks_t::gl_t const * const _c = &getGlThreadSpecific()->gl;
+        ret = _c->glGetString(name);
     }
     return ret;
 }
diff --git a/opengl/libs/hooks.h b/opengl/libs/hooks.h
index b2a684c..4b43198 100644
--- a/opengl/libs/hooks.h
+++ b/opengl/libs/hooks.h
@@ -32,13 +32,11 @@
 #include <GLES3/gl3.h>
 #include <GLES3/gl3ext.h>
 
-#if !defined(__arm__) && !defined(__mips__)
-#define USE_SLOW_BINDING            1
-#else
-#define USE_SLOW_BINDING            0
-#endif
+// set to 1 for debugging
+#define USE_SLOW_BINDING    0
+
 #undef NELEM
-#define NELEM(x)                    (sizeof(x)/sizeof(*(x)))
+#define NELEM(x)            (sizeof(x)/sizeof(*(x)))
 
 // maximum number of GL extensions that can be used simultaneously in
 // a given process. this limitation exists because we need to have
@@ -47,15 +45,7 @@
 #define MAX_NUMBER_OF_GL_EXTENSIONS 256
 
 
-#if defined(HAVE_ANDROID_OS) && !USE_SLOW_BINDING && __OPTIMIZE__
-#define USE_FAST_TLS_KEY            1
-#else
-#define USE_FAST_TLS_KEY            0
-#endif
-
-#if USE_FAST_TLS_KEY
-#   include <bionic_tls.h>  /* special private C library header */
-#endif
+#include <bionic_tls.h>  /* special private C library header */
 
 // ----------------------------------------------------------------------------
 namespace android {
@@ -84,7 +74,20 @@
 #undef EGL_ENTRY
 
 EGLAPI void setGlThreadSpecific(gl_hooks_t const *value);
-EGLAPI gl_hooks_t const* getGlThreadSpecific();
+
+// We have a dedicated TLS slot in bionic
+inline gl_hooks_t const * volatile * get_tls_hooks() {
+    volatile void *tls_base = __get_tls();
+    gl_hooks_t const * volatile * tls_hooks =
+            reinterpret_cast<gl_hooks_t const * volatile *>(tls_base);
+    return tls_hooks;
+}
+
+inline EGLAPI gl_hooks_t const* getGlThreadSpecific() {
+    gl_hooks_t const * volatile * tls_hooks = get_tls_hooks();
+    gl_hooks_t const* hooks = tls_hooks[TLS_SLOT_OPENGL_API];
+    return hooks;
+}
 
 // ----------------------------------------------------------------------------
 }; // namespace android