DRW: Fix hair OSX workaround having a cap limit
authorClément Foucault <foucault.clem@gmail.com>
Thu, 7 Mar 2019 02:22:43 +0000 (03:22 +0100)
committerClément Foucault <foucault.clem@gmail.com>
Thu, 7 Mar 2019 02:22:57 +0000 (03:22 +0100)
This is still is a dirty workaround.

Note that we are drawing the whole sets of point multiple times. While this
is ineficient, the main bottleneck is CPU transformation.

source/blender/draw/intern/draw_hair.c
source/blender/draw/modes/shaders/common_hair_refine_vert.glsl

index 7c086c5ec93f2f997810cb09fb097c8e90f83c34..09d0fa06f22791cdbf1258c37178c877c9957425 100644 (file)
@@ -58,6 +58,8 @@ typedef struct ParticleRefineCall {
 } ParticleRefineCall;
 
 static ParticleRefineCall *g_tf_calls = NULL;
+static int g_tf_id_offset;
+static int g_tf_target_width;
 static int g_tf_target_height;
 #endif
 
@@ -204,6 +206,8 @@ static DRWShadingGroup *drw_shgroup_create_hair_procedural_ex(
                pr_call->vert_len = final_points_len;
                g_tf_calls = pr_call;
                DRW_shgroup_uniform_int(tf_shgrp, "targetHeight", &g_tf_target_height, 1);
+               DRW_shgroup_uniform_int(tf_shgrp, "targetWidth", &g_tf_target_width, 1);
+               DRW_shgroup_uniform_int(tf_shgrp, "idOffset", &g_tf_id_offset, 1);
 #endif
 
                DRW_shgroup_uniform_texture(tf_shgrp, "hairPointBuffer", hair_cache->point_tex);
@@ -255,9 +259,13 @@ void DRW_hair_update(void)
        }
 
        /* Create target Texture / Framebuffer */
-       int height = (1 + max_size / 8192);
-       GPUTexture *tex = DRW_texture_pool_query_2D(8192, height, GPU_RGBA32F, (void *)DRW_hair_update);
+       /* Don't use max size as it can be really heavy and fail.
+        * Do chunks of maximum 2048 * 2048 hair points. */
+       int width = 2048;
+       int height = min_ii(width, 1 + max_size / width);
+       GPUTexture *tex = DRW_texture_pool_query_2D(width, height, GPU_RGBA32F, (void *)DRW_hair_update);
        g_tf_target_height = height;
+       g_tf_target_width = width;
 
        GPUFrameBuffer *fb = NULL;
        GPU_framebuffer_ensure_config(&fb, {
@@ -265,18 +273,30 @@ void DRW_hair_update(void)
                GPU_ATTACHMENT_TEXTURE(tex),
        });
 
-       float *data = MEM_mallocN(sizeof(float) * 4 * 8192 * height, "tf fallback buffer");
+       float *data = MEM_mallocN(sizeof(float) * 4 * width * height, "tf fallback buffer");
 
        GPU_framebuffer_bind(fb);
        while (g_tf_calls != NULL) {
                ParticleRefineCall *pr_call = g_tf_calls;
                g_tf_calls = g_tf_calls->next;
-               DRW_draw_pass_subset(g_tf_pass, pr_call->shgrp, pr_call->shgrp);
-               /* Readback result to main memory. */
-               GPU_framebuffer_read_color(fb, 0, 0, 8192, height, 4, 0, data);
-               /* Upload back to VBO. */
-               GPU_vertbuf_use(pr_call->vbo);
-               glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(float) * 4 * pr_call->vert_len, data);
+
+               g_tf_id_offset = 0;
+               while (pr_call->vert_len > 0) {
+                       int max_read_px_len = min_ii(width * height, pr_call->vert_len);
+
+                       DRW_draw_pass_subset(g_tf_pass, pr_call->shgrp, pr_call->shgrp);
+                       /* Readback result to main memory. */
+                       GPU_framebuffer_read_color(fb, 0, 0, width, height, 4, 0, data);
+                       /* Upload back to VBO. */
+                       GPU_vertbuf_use(pr_call->vbo);
+                       glBufferSubData(GL_ARRAY_BUFFER,
+                                       sizeof(float) * 4 * g_tf_id_offset,
+                                       sizeof(float) * 4 * max_read_px_len,
+                                       data);
+
+                       g_tf_id_offset += max_read_px_len;
+                       pr_call->vert_len -= max_read_px_len;
+               }
 
                MEM_freeN(pr_call);
        }
index 5d21171bbf3fd972d4ba7cc449655c99c7f795c2..c193c307e018c0f093ebf381e2b158d0644f9e11 100644 (file)
@@ -45,7 +45,9 @@ vec4 interp_data(vec4 v0, vec4 v1, vec4 v2, vec4 v3, vec4 w)
 }
 
 #ifdef TF_WORKAROUND
+uniform int targetWidth;
 uniform int targetHeight;
+uniform int idOffset;
 #endif
 
 void main(void)
@@ -58,8 +60,9 @@ void main(void)
        finalColor = interp_data(data0, data1, data2, data3, weights);
 
 #ifdef TF_WORKAROUND
-       gl_Position.x = ((float(gl_VertexID % 8192) + 0.5) / 8192.0) * 2.0 - 1.0;
-       gl_Position.y = ((float(gl_VertexID / 8192) + 0.5) / float(targetHeight)) * 2.0 - 1.0;
+       int id = gl_VertexID - idOffset;
+       gl_Position.x = ((float(id % targetWidth) + 0.5) / float(targetWidth)) * 2.0 - 1.0;
+       gl_Position.y = ((float(id / targetWidth) + 0.5) / float(targetHeight)) * 2.0 - 1.0;
        gl_Position.z = 0.0;
        gl_Position.w = 1.0;