Hehe, wow this thread was sort of hijacked. But yeah, as hybrid said, it is just a simple math problem. Here is the solution(as answered by Dorth):
And I don't know if this counts, but a lot of companies use the SPU Intrinsics library, which is virtually a one-to-one mapping of assembly, but much easier to read and write. In fact, if you get a job at some places you will be using SPU intrinsics a heck of a lot, and it improves code (if you know how to) very much.
Code: Select all
/*
* Copyright (c) 2007, Insomniac Games
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the Insomniac Games nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY <copyright holder> ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL <copyright holder> BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* -----------------------------------------------------------------------------
*
* a simple root finding algorithm is used to find the value of 't' that
* satisfies:
* d|D(t)|^2/dt = 0
* where:
* |D(t)| = |p(t)-b(t)|
* where p(t) is a point on the line parameterized by t:
* p(t) = p1 + t*(p2-p1)
* and b(t) is that same point clipped to the boundary of the box. in box-
* relative coordinates d|D(t)|^2/dt is the sum of three x,y,z components
* each of which looks like this:
*
* t_lo /
* ______/ -->t
* / t_hi
* /
*
* t_lo and t_hi are the t values where the line passes through the planes
* corresponding to the sides of the box. the algorithm computes d|D(t)|^2/dt
* in a piecewise fashion from t=0 to t=1, stopping at the point where
* d|D(t)|^2/dt crosses from negative to positive.
*/
float DistanceLA(const PrimLineseg *linesegA, const PrimAABB *aabbB, Result *result) {
qword linesegA_m_vert0 = *((qword *)( &linesegA->m_vert0) );
qword linesegA_m_vert1 = *((qword *)( &linesegA->m_vert1) );
qword aabbB_m_center = *((qword *)( &aabbB->m_center) );
qword aabbB_m_radii = *((qword *)( &aabbB->m_radii) );
qword result_m_ipA = *((qword *)( &result->m_ipA) );
qword result_m_ipB = *((qword *)( &result->m_ipB) );
// find the line segment point first end point in the coordinate frame of the AABB
qword line_v0_offset = si_fs( linesegA_m_vert0, aabbB_m_center );
// find the line segment vector from the first point to the last
qword line_vector = si_fs( linesegA_m_vert1, linesegA_m_vert0 );
const qword zero = si_from_int( 0 );
const qword one = si_from_int( 1 );
const qword neg_one = si_from_int( -1 );
const qword flt_half = si_from_float( 0.5f );
const qword flt_one = si_from_float( 1.f );
const qword flt_two = si_from_float( 2.f );
const qword flt_neg_one = si_from_float( -1.f );
const qword broadcast_x = si_ila( 0x10203);
qword mask_lv_lt_zero = si_fcgt( zero, line_vector );
qword sign = si_selb( flt_one, flt_neg_one, mask_lv_lt_zero );
qword line_vector_mirrored = si_fm( line_vector, sign );
qword line_v0_offset_mirrored = si_fm( line_v0_offset, sign );
// domain (odd calls this region) is -1,0,+1 depending on which side of the box planes each
// coordinate is on. t_anchor (ode calls this tanchor) is the next t value at which there is a
// transition, or the last one if there are no more.
qword neg_aabbB_m_radii = si_fm( aabbB_m_radii, flt_neg_one );
qword mask_lvm_gt_zero = si_fcgt( line_vector_mirrored, zero );
qword mask_lv0om_lt_neg_rad = si_fcgt( neg_aabbB_m_radii, line_v0_offset_mirrored );
qword recip_lvm = si_frest( line_vector_mirrored );
recip_lvm = si_fi( line_vector_mirrored, recip_lvm);
qword t_anchor1 = si_fs( neg_aabbB_m_radii, line_v0_offset_mirrored );
t_anchor1 = si_fm( t_anchor1, recip_lvm );
// reused later...
qword t_anchor2 = si_fs( aabbB_m_radii, line_v0_offset_mirrored );
t_anchor2 = si_fm( t_anchor2, recip_lvm );
qword t_anchor3 = si_selb( t_anchor2, t_anchor1, mask_lv0om_lt_neg_rad );
qword t_anchor = si_selb( flt_two, t_anchor3, mask_lvm_gt_zero );
qword lv0om_gt_rad_temp = si_fcgt( line_v0_offset_mirrored, aabbB_m_radii );
qword lv0om_gt_rad = si_shli( lv0om_gt_rad_temp, 0x1f );
qword domain_tmp = si_selb( lv0om_gt_rad, neg_one, mask_lv0om_lt_neg_rad );
qword domain = si_selb( zero, domain_tmp, mask_lvm_gt_zero );
// square the line segment vector
qword line_vector_mirrored_2 = si_fm( line_vector_mirrored, line_vector_mirrored );
// compute deriv, the solution to d|d|^2/dt for t=0. if it's >= 0 then p1 is the closest point
qword lvm2_mul_ta = si_fm( line_vector_mirrored_2, t_anchor );
qword mask_domain_eq_zero = si_ceq( domain, zero );
qword deriv_mult = si_selb( flt_neg_one, zero, mask_domain_eq_zero );
qword deriv = si_fm( lvm2_mul_ta, deriv_mult );
qword deriv_y = si_shlqbyi( deriv, 0x04 );
qword deriv_z = si_shlqbyi( deriv, 0x08 );
deriv = si_fa( deriv, deriv_y );
deriv = si_fa( deriv, deriv_z );
qword mask_deriv_gt_zero = si_fcgt( deriv, zero );
qword mask_deriv_eq_zero = si_fceq( deriv, zero );
qword solution = si_or( mask_deriv_gt_zero, mask_deriv_eq_zero );
// set initial t value
qword t = zero;
qword done = solution;
while( !si_to_uint(done) )
{
qword mask_t_lt_one;
// t_next is the next t value, next_t in ode
// find the point on the line that is at the next clip plane boundary
qword t_next = flt_one;
qword mask_0_anchor_gt_t = si_fcgt( t_anchor, t );
qword mask_0_anchor_lt_one = si_fcgt( flt_one, t_anchor );
qword mask_0_anchor_lt_tn = si_fcgt( t_next, t_anchor );
qword mask_0_combined = si_and( mask_0_anchor_gt_t, mask_0_anchor_lt_one );
mask_0_combined = si_and( mask_0_combined, mask_0_anchor_lt_tn );
t_next = si_selb( t_next, t_anchor, mask_0_combined );
qword anchor_y = si_shlqbyi( t_anchor, 0x04 );
qword mask_1_anchor_gt_t = si_fcgt( anchor_y, t );
qword mask_1_anchor_lt_one = si_fcgt( flt_one, anchor_y );
qword mask_1_anchor_lt_tn = si_fcgt( t_next, anchor_y );
qword mask_1_combined = si_and( mask_1_anchor_gt_t, mask_1_anchor_lt_one );
mask_1_combined = si_and( mask_1_combined, mask_1_anchor_lt_tn );
t_next = si_selb( t_next, anchor_y, mask_1_combined );
qword anchor_z = si_shlqbyi( t_anchor, 0x08 );
qword mask_2_anchor_gt_t = si_fcgt( anchor_z, t );
qword mask_2_anchor_lt_one = si_fcgt( flt_one, anchor_z );
qword mask_2_anchor_lt_tn = si_fcgt( t_next, anchor_z );
qword mask_2_combined = si_and( mask_2_anchor_gt_t, mask_2_anchor_lt_one );
mask_2_combined = si_and( mask_2_combined, mask_2_anchor_lt_tn );
t_next = si_selb( t_next, anchor_z, mask_2_combined );
// compute the deriv = d|d|^2/dt for the next t
t_next = si_shufb( t_next, t_next, broadcast_x);
qword mask_domain_eq_zero = si_ceqi( domain, 0x0 );
qword next_sub_anchor = si_fs( t_next, t_anchor );
qword mult_lvm2 = si_fm( line_vector_mirrored_2, next_sub_anchor );
qword deriv_mult = si_selb( flt_one, zero, mask_domain_eq_zero );
qword deriv_next = si_fm( mult_lvm2, deriv_mult );
qword deriv_next_y = si_shlqbyi( deriv_next, 0x04 );
qword deriv_next_z = si_shlqbyi( deriv_next, 0x08 );
deriv_next = si_fa( deriv_next, deriv_next_y );
deriv_next = si_fa( deriv_next, deriv_next_z );
// if the sign of d|d|^2/dt has changed, solution = the crossover point
qword mask_dn_gt_zero = si_fcgt( deriv_next, zero );
qword mask_dn_eq_zero = si_fceq( deriv_next, zero );
qword mask_gte_zero = si_or( mask_dn_gt_zero, mask_dn_eq_zero );
if( si_to_uint(mask_gte_zero) )
{
qword numer = si_fs( deriv_next, deriv );
qword denom = si_fs( t_next, t );
qword recip_denom = si_frest( denom );
recip_denom = si_fi( denom, recip_denom );
qword derivs = si_fm( numer, recip_denom );
qword recip_derivs = si_frest( derivs );
recip_derivs = si_fi( derivs, recip_derivs );
qword ratio = si_fm( deriv, recip_derivs );
t = si_fs( t, ratio );
solution = one;
break;
}
// advance to the next anchor point / region
qword mask_anchor_eq_tn = si_fceq( t_anchor, t_next );
t_anchor = si_selb( t_anchor, t_anchor2, mask_anchor_eq_tn );
qword domain_add_amt = si_selb( zero, one, mask_anchor_eq_tn );
domain = si_a( domain, domain_add_amt );
t = t_next;
deriv = deriv_next;
// we're done when t >= 1
qword mask_t_gt_one = si_fcgt( t, flt_one );
qword mask_t_eq_one = si_fceq( t, flt_one );
done = si_or( mask_t_gt_one, mask_t_eq_one );
}
// p1 is the closest point
solution = si_andc( one, solution );
t = si_shufb( t, t, broadcast_x);
t = si_selb( t, flt_one, solution );
// compute closest point on the line
// note: line_vector=p2-p1
result_m_ipA = si_fma( line_vector, t, linesegA_m_vert0 );
// compute closest point on the box
qword scale = si_fma( t, line_vector_mirrored, line_v0_offset_mirrored );
line_v0_offset_mirrored = si_fm( sign, scale );
mask_lv0om_lt_neg_rad = si_fcgt( neg_aabbB_m_radii, line_v0_offset_mirrored );
line_v0_offset_mirrored = si_selb( line_v0_offset_mirrored, neg_aabbB_m_radii, mask_lv0om_lt_neg_rad );
qword mask_lv0om_gt_rad = si_fcgt( line_v0_offset_mirrored, aabbB_m_radii );
qword mask_combined = si_andc( mask_lv0om_gt_rad, mask_lv0om_lt_neg_rad );
line_v0_offset_mirrored = si_selb( line_v0_offset_mirrored, aabbB_m_radii, mask_combined );
result_m_ipB = si_fa( line_v0_offset_mirrored, aabbB_m_center );
// return the distance between the closest points
qword delta = si_fs( result_m_ipB, result_m_ipA);
qword delta_sqr = si_fm( delta, delta);
qword delta_sqr_y = si_shlqbyi( delta_sqr, 0x04 );
qword delta_sqr_z = si_shlqbyi( delta_sqr, 0x08 );
qword len_sqr = si_fa( delta_sqr, delta_sqr_y );
len_sqr = si_fa( len_sqr, delta_sqr_z );
qword est = si_frsqest( len_sqr );
est = si_fi( len_sqr, est );
qword est_sqr = si_fm( est, est );
qword len = si_fnms( len_sqr, est_sqr, flt_one );
// perform single Newton-Raphson step to increase accuracy
qword nr = si_fm( est, flt_half );
len = si_fma( len, nr, est );
// we have reciprocal square root, so now multiply to give 'normal' square root
len = si_fm( len, len_sqr );
// handle the case where our input component is zero
qword mask_eq_zero = si_fcgt( len_sqr, zero );
len = si_selb( zero, len, mask_eq_zero );
// convert results to output format
*((qword *)(&result->m_ipA)) = result_m_ipA;
*((qword *)(&result->m_ipB)) = result_m_ipB;
return si_to_float(len);
}