Make sure we don't perform any implicit address space conversion.
A bit annoying, but less intrusive approaches (like using temp private
variable in .cl kernel) do not work correct here.
Using generic address space will help from code side here, but will
be somewhat slower due to extra things happening as far as i know.
for(int order = 0; order < 10; order++) {
/* Sample microfacet height and normal */
for(int order = 0; order < 10; order++) {
/* Sample microfacet height and normal */
- if(!mf_sample_height(wr, &hr, &C1_r, &G1_r, &lambda_r, lcg_step_float(lcg_state)))
+ if(!mf_sample_height(wr, &hr, &C1_r, &G1_r, &lambda_r, lcg_step_float_addrspace(lcg_state)))
- float3 wm = mf_sample_vndf(-wr, alpha, make_float2(lcg_step_float(lcg_state), lcg_step_float(lcg_state)));
+ float3 wm = mf_sample_vndf(-wr, alpha, make_float2(lcg_step_float_addrspace(lcg_state),
+ lcg_step_float_addrspace(lcg_state)));
#ifdef MF_MULTI_DIFFUSE
if(order == 0) {
#ifdef MF_MULTI_DIFFUSE
if(order == 0) {
/* Bounce from the microfacet. */
#ifdef MF_MULTI_GLASS
bool next_outside;
/* Bounce from the microfacet. */
#ifdef MF_MULTI_GLASS
bool next_outside;
- wr = mf_sample_phase_glass(-wr, outside? eta: 1.0f/eta, wm, lcg_step_float(lcg_state), &next_outside);
+ wr = mf_sample_phase_glass(-wr, outside? eta: 1.0f/eta, wm, lcg_step_float_addrspace(lcg_state), &next_outside);
if(!next_outside) {
outside = !outside;
wr = -wr;
hr = -hr;
}
#elif defined(MF_MULTI_DIFFUSE)
if(!next_outside) {
outside = !outside;
wr = -wr;
hr = -hr;
}
#elif defined(MF_MULTI_DIFFUSE)
- wr = mf_sample_phase_diffuse(wm, lcg_step_float(lcg_state), lcg_step_float(lcg_state));
+ wr = mf_sample_phase_diffuse(wm,
+ lcg_step_float_addrspace(lcg_state),
+ lcg_step_float_addrspace(lcg_state));
#else /* MF_MULTI_GLOSSY */
wr = mf_sample_phase_glossy(-wr, n, k, &throughput, wm);
#endif
#else /* MF_MULTI_GLOSSY */
wr = mf_sample_phase_glossy(-wr, n, k, &throughput, wm);
#endif
int order;
for(order = 0; order < 10; order++) {
/* Sample microfacet height. */
int order;
for(order = 0; order < 10; order++) {
/* Sample microfacet height. */
- if(!mf_sample_height(wr, &hr, &C1_r, &G1_r, &lambda_r, lcg_step_float(lcg_state))) {
+ if(!mf_sample_height(wr, &hr, &C1_r, &G1_r, &lambda_r, lcg_step_float_addrspace(lcg_state))) {
/* The random walk has left the surface. */
*wo = outside? wr: -wr;
return throughput;
}
/* Sample microfacet normal. */
/* The random walk has left the surface. */
*wo = outside? wr: -wr;
return throughput;
}
/* Sample microfacet normal. */
- float3 wm = mf_sample_vndf(-wr, alpha, make_float2(lcg_step_float(lcg_state), lcg_step_float(lcg_state)));
+ float3 wm = mf_sample_vndf(-wr, alpha, make_float2(lcg_step_float_addrspace(lcg_state),
+ lcg_step_float_addrspace(lcg_state)));
/* First-bounce color is already accounted for in mix weight. */
if(order > 0)
/* First-bounce color is already accounted for in mix weight. */
if(order > 0)
/* Bounce from the microfacet. */
#ifdef MF_MULTI_GLASS
bool next_outside;
/* Bounce from the microfacet. */
#ifdef MF_MULTI_GLASS
bool next_outside;
- wr = mf_sample_phase_glass(-wr, outside? eta: 1.0f/eta, wm, lcg_step_float(lcg_state), &next_outside);
+ wr = mf_sample_phase_glass(-wr, outside? eta: 1.0f/eta, wm, lcg_step_float_addrspace(lcg_state), &next_outside);
if(!next_outside) {
hr = -hr;
wr = -wr;
outside = !outside;
}
#elif defined(MF_MULTI_DIFFUSE)
if(!next_outside) {
hr = -hr;
wr = -wr;
outside = !outside;
}
#elif defined(MF_MULTI_DIFFUSE)
- wr = mf_sample_phase_diffuse(wm, lcg_step_float(lcg_state), lcg_step_float(lcg_state));
+ wr = mf_sample_phase_diffuse(wm,
+ lcg_step_float_addrspace(lcg_state),
+ lcg_step_float_addrspace(lcg_state));
#else /* MF_MULTI_GLOSSY */
wr = mf_sample_phase_glossy(-wr, n, k, &throughput, wm);
#endif
#else /* MF_MULTI_GLOSSY */
wr = mf_sample_phase_glossy(-wr, n, k, &throughput, wm);
#endif
/* Linear Congruential Generator */
/* Linear Congruential Generator */
-ccl_device uint lcg_step_uint(ccl_addr_space uint *rng)
+ccl_device uint lcg_step_uint(uint *rng)
{
/* implicit mod 2^32 */
*rng = (1103515245*(*rng) + 12345);
return *rng;
}
{
/* implicit mod 2^32 */
*rng = (1103515245*(*rng) + 12345);
return *rng;
}
-ccl_device float lcg_step_float(ccl_addr_space uint *rng)
+ccl_device float lcg_step_float(uint *rng)
{
/* implicit mod 2^32 */
*rng = (1103515245*(*rng) + 12345);
{
/* implicit mod 2^32 */
*rng = (1103515245*(*rng) + 12345);
return lcg_init(*rng + state->rng_offset + state->sample*scramble);
}
return lcg_init(*rng + state->rng_offset + state->sample*scramble);
}
+/* TODO(sergey): For until we can use generic address space from OpenCL 2.0. */
+
+ccl_device_inline uint lcg_state_init_addrspace(ccl_addr_space RNG *rng,
+ const ccl_addr_space PathState *state,
+ uint scramble)
+{
+ return lcg_init(*rng + state->rng_offset + state->sample*scramble);
+}
+
+ccl_device float lcg_step_float_addrspace(ccl_addr_space uint *rng)
+{
+ /* implicit mod 2^32 */
+ *rng = (1103515245*(*rng) + 12345);
+ return (float)*rng * (1.0f/(float)0xFFFFFFFF);
+}
+
-ccl_device void shader_eval_surface(KernelGlobals *kg, ShaderData *sd, RNG *rng,
+ccl_device void shader_eval_surface(KernelGlobals *kg, ShaderData *sd, ccl_addr_space RNG *rng,
ccl_addr_space PathState *state, float randb, int path_flag, ShaderContext ctx)
{
ccl_fetch(sd, num_closure) = 0;
ccl_addr_space PathState *state, float randb, int path_flag, ShaderContext ctx)
{
ccl_fetch(sd, num_closure) = 0;
}
if(rng && (ccl_fetch(sd, flag) & SD_BSDF_NEEDS_LCG)) {
}
if(rng && (ccl_fetch(sd, flag) & SD_BSDF_NEEDS_LCG)) {
- ccl_fetch(sd, lcg_state) = lcg_state_init(rng, state, 0xb4bc3953);
+ ccl_fetch(sd, lcg_state) = lcg_state_init_addrspace(rng, state, 0xb4bc3953);