changeset 449:d56536ef28e8

Improve render_elements’ speed a lot, and fix it in some corner cases. Thanks liori!
author Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
date Sat, 03 Aug 2013 15:49:11 +0200
parents 3bc37791f0a2
children 2a352118c55a
files pytouhou/ui/background.pyx pytouhou/ui/renderer.pxd pytouhou/ui/renderer.pyx pytouhou/ui/sprite.pyx pytouhou/ui/texture.pyx setup.py
diffstat 6 files changed, 88 insertions(+), 42 deletions(-) [+]
line wrap: on
line diff
--- a/pytouhou/ui/background.pyx
+++ b/pytouhou/ui/background.pyx
@@ -90,7 +90,8 @@ cdef class BackgroundRenderer:
 
                 nb_vertices += 4
 
-        self.texture, self.blendfunc = key
+        self.texture = key % MAX_TEXTURES
+        self.blendfunc = key // MAX_TEXTURES
         self.nb_vertices = nb_vertices
         self.vertex_buffer = <Vertex*> realloc(vertex_buffer, nb_vertices * sizeof(Vertex))
 
--- a/pytouhou/ui/renderer.pxd
+++ b/pytouhou/ui/renderer.pxd
@@ -1,3 +1,5 @@
+from cpython cimport PyObject
+
 cdef struct Vertex:
     int x, y, z
     float u, v
@@ -9,4 +11,8 @@ cdef class Renderer:
     cdef unsigned int vbo
     cdef Vertex *vertex_buffer
 
+    cdef unsigned short *indices[2][MAX_TEXTURES]
+    cdef unsigned short last_indices[2 * MAX_TEXTURES]
+    cdef PyObject *elements[640*3]
+
     cpdef render_elements(self, elements)
--- a/pytouhou/ui/renderer.pyx
+++ b/pytouhou/ui/renderer.pyx
@@ -13,9 +13,7 @@
 ##
 
 from libc.stdlib cimport malloc, free
-from itertools import chain
-
-from struct import pack
+from libc.string cimport memset
 
 from pytouhou.lib.opengl cimport \
          (glVertexPointer, glTexCoordPointer, glColorPointer,
@@ -26,16 +24,32 @@ from pytouhou.lib.opengl cimport \
           GL_ONE_MINUS_SRC_ALPHA, GL_ONE, GL_TEXTURE_2D, GL_TRIANGLES,
           glGenBuffers)
 
+from pytouhou.game.element cimport Element
 from .sprite cimport get_sprite_rendering_data
 from .texture import TextureManager
 
 
-MAX_ELEMENTS = 640*4*3
+DEF MAX_ELEMENTS = 640*4*3
+
+
+cdef long find_objects(Renderer self, object elements):
+    # Don’t type element as Element, or else the overriding of objects won’t work.
+    cdef Element obj
+    cdef long i = 0
+    for element in elements:
+        for obj in element.objects:
+            sprite = obj.sprite
+            if sprite and sprite.visible:
+                # warning: no reference is preserved on the object—assuming the object will not die accidentally
+                self.elements[i] = <PyObject*>obj
+                i += 1
+                if i >= 640*3-4:
+                    return i
+    return i
 
 
 cdef class Renderer:
     def __cinit__(self):
-        # Allocate buffers
         self.vertex_buffer = <Vertex*> malloc(MAX_ELEMENTS * sizeof(Vertex))
 
 
@@ -44,42 +58,63 @@ cdef class Renderer:
 
 
     def __init__(self, resource_loader):
-        self.texture_manager = TextureManager(resource_loader)
+        self.texture_manager = TextureManager(resource_loader, self)
 
         if not self.use_fixed_pipeline:
             glGenBuffers(1, &self.vbo)
 
 
-    cpdef render_elements(self, elements):
-        cdef unsigned short nb_vertices = 0, nb_indices, *new_indices
+    def add_texture(self, int texture):
+        for i in xrange(2):
+            self.indices[i][texture] = <unsigned short*> malloc(65536 * sizeof(unsigned short))
+
 
-        indices_by_texture = {}
+    def remove_texture(self, int texture):
+        for i in xrange(2):
+            free(self.indices[i][texture])
+
 
-        objects = chain(*[element.objects for element in elements])
-        for element in objects:
-            if nb_vertices >= MAX_ELEMENTS - 4:
-                break
+    cpdef render_elements(self, elements):
+        cdef int key
+        cdef int x1, y1, z1, x2, y2, z2, x3, y3, z3, x4, y4, z4, ox, oy
+        cdef float left, right, bottom, top
+        cdef unsigned char r, g, b, a
 
+        nb_vertices = 0
+        memset(self.last_indices, 0, sizeof(self.last_indices))
+
+        nb_elements = find_objects(self, elements)
+        for element_idx in xrange(nb_elements):
+            element = <object>self.elements[element_idx]
             sprite = element.sprite
-            if sprite and sprite.visible:
-                ox, oy = element.x, element.y
-                key, (vertices, uvs, colors) = get_sprite_rendering_data(sprite)
-                rec = indices_by_texture.setdefault(key, [])
+            ox, oy = element.x, element.y
+            key, (vertices, uvs, colors) = get_sprite_rendering_data(sprite)
+
+            blendfunc = key // MAX_TEXTURES
+            texture = key % MAX_TEXTURES
+
+            rec = self.indices[blendfunc][texture]
+            next_indice = self.last_indices[key]
 
-                # Pack data in buffer
-                x1, x2, x3, x4, y1, y2, y3, y4, z1, z2, z3, z4 = vertices
-                left, right, bottom, top = uvs
-                r, g, b, a = colors
-                self.vertex_buffer[nb_vertices] = Vertex(x1 + ox, y1 + oy, z1, left, bottom, r, g, b, a)
-                self.vertex_buffer[nb_vertices+1] = Vertex(x2 + ox, y2 + oy, z2, right, bottom, r, g, b, a)
-                self.vertex_buffer[nb_vertices+2] = Vertex(x3 + ox, y3 + oy, z3, right, top, r, g, b, a)
-                self.vertex_buffer[nb_vertices+3] = Vertex(x4 + ox, y4 + oy, z4, left, top, r, g, b, a)
+            # Pack data in buffer
+            x1, x2, x3, x4, y1, y2, y3, y4, z1, z2, z3, z4 = vertices
+            left, right, bottom, top = uvs
+            r, g, b, a = colors
+            self.vertex_buffer[nb_vertices] = Vertex(x1 + ox, y1 + oy, z1, left, bottom, r, g, b, a)
+            self.vertex_buffer[nb_vertices+1] = Vertex(x2 + ox, y2 + oy, z2, right, bottom, r, g, b, a)
+            self.vertex_buffer[nb_vertices+2] = Vertex(x3 + ox, y3 + oy, z3, right, top, r, g, b, a)
+            self.vertex_buffer[nb_vertices+3] = Vertex(x4 + ox, y4 + oy, z4, left, top, r, g, b, a)
 
-                # Add indices
-                index = nb_vertices
-                rec.extend((index, index + 1, index + 2, index + 2, index + 3, index))
+            # Add indices
+            rec[next_indice] = nb_vertices
+            rec[next_indice+1] = nb_vertices + 1
+            rec[next_indice+2] = nb_vertices + 2
+            rec[next_indice+3] = nb_vertices + 2
+            rec[next_indice+4] = nb_vertices + 3
+            rec[next_indice+5] = nb_vertices
+            self.last_indices[key] += 6
 
-                nb_vertices += 4
+            nb_vertices += 4
 
         if nb_vertices == 0:
             return
@@ -100,18 +135,17 @@ cdef class Renderer:
             glVertexAttribPointer(2, 4, GL_UNSIGNED_BYTE, True, sizeof(Vertex), <void*>20)
             glEnableVertexAttribArray(2)
 
-        for (texture, blendfunc), indices in indices_by_texture.items():
+        for key in xrange(2 * MAX_TEXTURES):
+            nb_indices = self.last_indices[key]
+            if not nb_indices:
+                continue
 
-            #TODO: find a more elegent way.
-            nb_indices = len(indices)
-            new_indices = <unsigned short*> malloc(nb_indices * sizeof(unsigned short))
-            for i in xrange(nb_indices):
-                new_indices[i] = indices[i]
+            blendfunc = key // MAX_TEXTURES
+            texture = key % MAX_TEXTURES
 
             glBlendFunc(GL_SRC_ALPHA, (GL_ONE_MINUS_SRC_ALPHA, GL_ONE)[blendfunc])
             glBindTexture(GL_TEXTURE_2D, texture)
-            glDrawElements(GL_TRIANGLES, nb_indices, GL_UNSIGNED_SHORT, new_indices)
-            free(new_indices)
+            glDrawElements(GL_TRIANGLES, nb_indices, GL_UNSIGNED_SHORT, self.indices[blendfunc][texture])
 
         if not self.use_fixed_pipeline:
             glBindBuffer(GL_ARRAY_BUFFER, 0)
--- a/pytouhou/ui/sprite.pyx
+++ b/pytouhou/ui/sprite.pyx
@@ -67,7 +67,7 @@ cpdef object get_sprite_rendering_data(S
            ty * y_1 + toy,
            (ty + th) * y_1 + toy)
 
-    key = sprite.anm.texture, sprite.blendfunc
+    key = MAX_TEXTURES * sprite.blendfunc + <long>sprite.anm.texture
     r, g, b = sprite.color
     values = tuple([x for x in vertmat.data[:12]]), uvs, (r, g, b, sprite.alpha)
     sprite._rendering_data = key, values
--- a/pytouhou/ui/texture.pyx
+++ b/pytouhou/ui/texture.pyx
@@ -29,21 +29,25 @@ class TextureId(int):
     def __del__(self):
         cdef GLuint texture = self
         glDeleteTextures(1, &texture)
+        self.renderer.remove_texture(self)
 
 
 class TextureManager(object):
-    def __init__(self, loader=None):
+    def __init__(self, loader=None, renderer=None):
         self.loader = loader
+        self.renderer = renderer
 
 
     def load(self, anm_list):
-        for anm in anm_list:
+        for anm in sorted(anm_list, key=lambda x: x[0].first_name.endswith('ascii.png')):
             for entry in anm:
                 if not hasattr(entry, 'texture'):
                     texture = decode_png(self.loader, entry.first_name, entry.secondary_name)
                     entry.texture = load_texture(texture)
                 elif not isinstance(entry.texture, TextureId):
                     entry.texture = load_texture(entry.texture)
+                self.renderer.add_texture(entry.texture)
+                entry.texture.renderer = self.renderer
 
 
 cdef decode_png(loader, first_name, secondary_name):
--- a/setup.py
+++ b/setup.py
@@ -64,6 +64,7 @@ setup(name='PyTouhou',
       packages=packages,
       ext_modules=cythonize(extensions, nthreads=4,
                             compiler_directives={'infer_types': True,
-                                                 'infer_types.verbose': True}),
+                                                 'infer_types.verbose': True},
+                            compile_time_env={'MAX_TEXTURES': 1024}),
       scripts=['eosd', 'anmviewer'],
       **extra)