ccl_device_inline float3 fabs(const float3& a);
ccl_device_inline float3 mix(const float3& a, const float3& b, float t);
ccl_device_inline float3 rcp(const float3& a);
+ccl_device_inline float3 sqrt(const float3& a);
#endif /* !__KERNEL_OPENCL__ */
+ccl_device_inline float min3(float3 a);
ccl_device_inline float max3(float3 a);
ccl_device_inline float len(const float3 a);
ccl_device_inline float len_squared(const float3 a);
ccl_device_inline float3 operator*(const float f, const float3& a)
{
- /* TODO(sergey): Currently disabled, gives speedup but causes precision issues. */
-#if defined(__KERNEL_SSE__) && 0
+#if defined(__KERNEL_SSE__)
return float3(_mm_mul_ps(_mm_set1_ps(f), a.m128));
#else
return make_float3(a.x*f, a.y*f, a.z*f);
ccl_device_inline float3 operator/(const float f, const float3& a)
{
- /* TODO(sergey): Currently disabled, gives speedup but causes precision issues. */
-#if defined(__KERNEL_SSE__) && 0
- __m128 rc = _mm_rcp_ps(a.m128);
- return float3(_mm_mul_ps(_mm_set1_ps(f),rc));
+#if defined(__KERNEL_SSE__)
+ return float3(_mm_div_ps(_mm_set1_ps(f), a.m128));
#else
return make_float3(f / a.x, f / a.y, f / a.z);
#endif
ccl_device_inline float3 operator/(const float3& a, const float3& b)
{
- /* TODO(sergey): Currently disabled, gives speedup but causes precision issues. */
-#if defined(__KERNEL_SSE__) && 0
- __m128 rc = _mm_rcp_ps(b.m128);
- return float3(_mm_mul_ps(a, rc));
+#if defined(__KERNEL_SSE__)
+ return float3(_mm_div_ps(a.m128, b.m128));
#else
return make_float3(a.x / b.x, a.y / b.y, a.z / b.z);
#endif
#endif
}
+ccl_device_inline float3 sqrt(const float3& a)
+{
+#ifdef __KERNEL_SSE__
+ return float3(_mm_sqrt_ps(a));
+#else
+ return make_float3(sqrtf(a.x), sqrtf(a.y), sqrtf(a.z));
+#endif
+}
+
ccl_device_inline float3 mix(const float3& a, const float3& b, float t)
{
return a + t*(b - a);
ccl_device_inline float3 rcp(const float3& a)
{
#ifdef __KERNEL_SSE__
- const float4 r(_mm_rcp_ps(a.m128));
- return float3(_mm_sub_ps(_mm_add_ps(r, r),
- _mm_mul_ps(_mm_mul_ps(r, r), a)));
+ /* Don't use _mm_rcp_ps due to poor precision. */
+ return float3(_mm_div_ps(_mm_set_ps1(1.0f), a.m128));
#else
return make_float3(1.0f/a.x, 1.0f/a.y, 1.0f/a.z);
#endif
}
#endif /* !__KERNEL_OPENCL__ */
+ccl_device_inline float min3(float3 a)
+{
+ return min(min(a.x, a.y), a.z);
+}
+
ccl_device_inline float max3(float3 a)
{
return max(max(a.x, a.y), a.z);
#endif
}
+ccl_device_inline bool isfinite3_safe(float3 v)
+{
+ return isfinite_safe(v.x) && isfinite_safe(v.y) && isfinite_safe(v.z);
+}
+
ccl_device_inline float3 ensure_finite3(float3 v)
{
- if(!isfinite_safe(v.x)) v.x = 0.0;
- if(!isfinite_safe(v.y)) v.y = 0.0;
- if(!isfinite_safe(v.z)) v.z = 0.0;
+ if(!isfinite_safe(v.x)) v.x = 0.0f;
+ if(!isfinite_safe(v.y)) v.y = 0.0f;
+ if(!isfinite_safe(v.z)) v.z = 0.0f;
return v;
}