blob: b45f5cd71ddc85a3fe924c9348e77a315d32bb14 [file] [log] [blame]
/*! \file Triangle.hxx defines most typical triangle intersection code... */
#ifndef RTTL_TRIANGLE_HXX
#define RTTL_TRIANGLE_HXX
#include "../common/RTInclude.hxx"
#include "../common/RTBox.hxx"
#include "../common/RTRay.hxx"
using namespace RTTL;
// todo: use RTVec<3,sse_f> instead of explicit sse code, common origin case
template <int N, int LAYOUT, int MULTIPLE_ORIGINS, int SHADOW_RAYS>
_INLINE void IntersectTriangleMoellerTrumbore(const RTVec3f &v0,
const RTVec3f &v1,
const RTVec3f &v2,
const int triangleID,
const int shaderID,
RayPacket<N, LAYOUT, MULTIPLE_ORIGINS, SHADOW_RAYS> &packet,
const int first_active,
const int last_active)
{
#if 0
// only works once the v's are all sse_f's ...
const sse_f ax = splat4<0>(v0);
const sse_f ay = splat4<1>(v0);
const sse_f az = splat4<2>(v0);
const sse_f e0 = v1 - v0;
const sse_f e0_x = splat4<0>(e0);
const sse_f e0_y = splat4<1>(e0);
const sse_f e0_z = splat4<2>(e0);
const sse_f e1 = v2 - v0;
const sse_f e1_x = splat4<0>(e1);
const sse_f e1_y = splat4<1>(e1);
const sse_f e1_z = splat4<2>(e1);
#else
const sse_f ax = convert(v0[0]);
const sse_f ay = convert(v0[1]);
const sse_f az = convert(v0[2]);
const sse_f bx = convert(v1[0]);
const sse_f by = convert(v1[1]);
const sse_f bz = convert(v1[2]);
const sse_f cx = convert(v2[0]);
const sse_f cy = convert(v2[1]);
const sse_f cz = convert(v2[2]);
const sse_f e0_x = bx - ax;
const sse_f e0_y = by - ay;
const sse_f e0_z = bz - az;
const sse_f e1_x = cx - ax;
const sse_f e1_y = cy - ay;
const sse_f e1_z = cz - az;
#endif
const sse_i id = convert(triangleID);
const sse_i shader_id = convert(shaderID);
sse_f offset_x, offset_y, offset_z;
if (!MULTIPLE_ORIGINS)
{
offset_x = packet.originX(0) - ax;
offset_y = packet.originY(0) - ay;
offset_z = packet.originZ(0) - az;
}
for (int i = first_active; i < last_active; i++) {
if (MULTIPLE_ORIGINS)
{
offset_x = packet.originX(i) - ax;
offset_y = packet.originY(i) - ay;
offset_z = packet.originZ(i) - az;
}
const sse_f pvec_x = (packet.directionY(i) * e1_z) - (packet.directionZ(i) * e1_y);
const sse_f pvec_y = (packet.directionZ(i) * e1_x) - (packet.directionX(i) * e1_z);
const sse_f pvec_z = (packet.directionX(i) * e1_y) - (packet.directionY(i) * e1_x);
const sse_f det = (e0_x*pvec_x) + (e0_y*pvec_y) + (e0_z*pvec_z);
const sse_f inv_det = rcp(det);
const sse_f u = ((offset_x*pvec_x) + (offset_y*pvec_y) + (offset_z*pvec_z)) * inv_det;
const sse_f zero = _mm_setzero_ps();
const sse_f one = convert(1.0f);
const sse_f u_mask = _mm_and_ps(_mm_cmple_ps(zero,u),_mm_cmplt_ps(u,one));
if (_mm_movemask_ps(u_mask) == 0x0) continue;
const sse_f qvec_x = (offset_y*e0_z) - (offset_z*e0_y);
const sse_f qvec_y = (offset_z*e0_x) - (offset_x*e0_z);
const sse_f qvec_z = (offset_x*e0_y) - (offset_y*e0_x);
const sse_f v = ((packet.directionX(i)*qvec_x) + (packet.directionY(i)*qvec_y) + (packet.directionZ(i)*qvec_z)) * inv_det;
const sse_f v_mask = _mm_and_ps(_mm_cmple_ps(zero,v),_mm_cmple_ps(v,one));
if (_mm_movemask_ps(v_mask) == 0x0) continue;
const sse_f uv = u + v;
const sse_f uv_mask = _mm_and_ps(_mm_cmplt_ps(zero,uv),_mm_cmple_ps(uv,one));
const sse_f t = ((e1_x*qvec_x)+(e1_y*qvec_y)+(e1_z*qvec_z))*inv_det;
const sse_f t_mask = _mm_and_ps(_mm_cmplt_ps(packet.minDistance(i),t),
_mm_cmplt_ps(t,packet.maxDistance(i)));
const sse_f mask = u_mask & v_mask & uv_mask & t_mask;
if (_mm_movemask_ps(mask) == 0x0) continue;
packet.u(i) = _mm_blendv_ps(u,packet.u(i),mask);
packet.v(i) = _mm_blendv_ps(v,packet.v(i),mask);
packet.id(i) = _mm_blendv_epi8(id,packet.id(i),_mm_castps_si128(mask));
packet.shaderID(i) = _mm_blendv_epi8(shader_id,packet.shaderID(i),_mm_castps_si128(mask));
packet.maxDistance(i) = _mm_blendv_ps(t,packet.maxDistance(i),mask);
}
}
/*! intersect given triangle using moeller-trumbore algo. intersect
only the ray packlets (i.e., four-blocks of rays) as given by the
packID[packIDs] array */
template <int N, int LAYOUT, int MULTIPLE_ORIGINS, int SHADOW_RAYS>
_INLINE void IntersectTriangleMoellerTrumbore(const RTVec3f &v0,
const RTVec3f &v1,
const RTVec3f &v2,
const int triangleID,
const int shaderID,
RayPacket<N, LAYOUT, MULTIPLE_ORIGINS, SHADOW_RAYS> &packet,
const int packID[],
const int packIDs)
{
const sse_f ax = convert(v0[0]);
const sse_f ay = convert(v0[1]);
const sse_f az = convert(v0[2]);
const sse_f bx = convert(v1[0]);
const sse_f by = convert(v1[1]);
const sse_f bz = convert(v1[2]);
const sse_f cx = convert(v2[0]);
const sse_f cy = convert(v2[1]);
const sse_f cz = convert(v2[2]);
const sse_f e0_x = bx - ax;
const sse_f e0_y = by - ay;
const sse_f e0_z = bz - az;
const sse_f e1_x = cx - ax;
const sse_f e1_y = cy - ay;
const sse_f e1_z = cz - az;
const sse_i id = convert(triangleID);
const sse_i shader_id = convert(shaderID);
sse_f offset_x, offset_y, offset_z;
if (!MULTIPLE_ORIGINS)
{
offset_x = packet.originX(0) - ax;
offset_y = packet.originY(0) - ay;
offset_z = packet.originZ(0) - az;
}
for (int j=0; j<packIDs; j++)
{
const int i = packID[j];
if (MULTIPLE_ORIGINS)
{
offset_x = packet.originX(i) - ax;
offset_y = packet.originY(i) - ay;
offset_z = packet.originZ(i) - az;
}
const sse_f pvec_x = (packet.directionY(i) * e1_z) - (packet.directionZ(i) * e1_y);
const sse_f pvec_y = (packet.directionZ(i) * e1_x) - (packet.directionX(i) * e1_z);
const sse_f pvec_z = (packet.directionX(i) * e1_y) - (packet.directionY(i) * e1_x);
const sse_f det = (e0_x*pvec_x) + (e0_y*pvec_y) + (e0_z*pvec_z);
const sse_f inv_det = rcp(det);
const sse_f u = ((offset_x*pvec_x) + (offset_y*pvec_y) + (offset_z*pvec_z)) * inv_det;
const sse_f zero = _mm_setzero_ps();
const sse_f one = convert(1.0f);
const sse_f u_mask = _mm_and_ps(_mm_cmple_ps(zero,u),_mm_cmplt_ps(u,one));
if (_mm_movemask_ps(u_mask) == 0x0) continue;
const sse_f qvec_x = (offset_y*e0_z) - (offset_z*e0_y);
const sse_f qvec_y = (offset_z*e0_x) - (offset_x*e0_z);
const sse_f qvec_z = (offset_x*e0_y) - (offset_y*e0_x);
const sse_f v = ((packet.directionX(i)*qvec_x) + (packet.directionY(i)*qvec_y) + (packet.directionZ(i)*qvec_z)) * inv_det;
const sse_f v_mask = _mm_and_ps(_mm_cmple_ps(zero,v),_mm_cmple_ps(v,one));
if (_mm_movemask_ps(v_mask) == 0x0) continue;
const sse_f uv = u + v;
const sse_f uv_mask = _mm_and_ps(_mm_cmplt_ps(zero,uv),_mm_cmple_ps(uv,one));
const sse_f t = ((e1_x*qvec_x)+(e1_y*qvec_y)+(e1_z*qvec_z))*inv_det;
const sse_f t_mask = _mm_and_ps(_mm_cmplt_ps(packet.minDistance(i),t),
_mm_cmplt_ps(t,packet.maxDistance(i)));
const sse_f mask = u_mask & v_mask & uv_mask & t_mask;
if (_mm_movemask_ps(mask) == 0x0) continue;
packet.u(i) = _mm_blendv_ps(u,packet.u(i),mask);
packet.v(i) = _mm_blendv_ps(v,packet.v(i),mask);
packet.id(i) = _mm_blendv_epi8(id,packet.id(i),_mm_castps_si128(mask));
packet.shaderID(i) = _mm_blendv_epi8(shader_id,packet.shaderID(i),_mm_castps_si128(mask));
packet.maxDistance(i) = _mm_blendv_ps(t,packet.maxDistance(i),mask);
}
}
#endif