Skip to content

Commit 8acf608

Browse files
committed
engine: optimize traces
1 parent 0235b1e commit 8acf608

File tree

5 files changed

+30
-30
lines changed

5 files changed

+30
-30
lines changed

engine/cmodel.cpp

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -873,9 +873,9 @@ bool IntersectRayWithBoxBrush( TraceInfo_t *pTraceInfo, const cbrush_t *pBrush,
873873
FPExceptionDisabler hideExceptions;
874874

875875
// Load the unaligned ray/box parameters into SIMD registers
876-
fltx4 start = LoadUnaligned3SIMD(pTraceInfo->m_start.Base());
877-
fltx4 extents = LoadUnaligned3SIMD(pTraceInfo->m_extents.Base());
878-
fltx4 delta = LoadUnaligned3SIMD(pTraceInfo->m_delta.Base());
876+
fltx4 start = LoadAlignedSIMD(pTraceInfo->m_start.Base());
877+
fltx4 extents = LoadAlignedSIMD(pTraceInfo->m_extents.Base());
878+
fltx4 delta = LoadAlignedSIMD(pTraceInfo->m_delta.Base());
879879
fltx4 boxMins = LoadAlignedSIMD( pBox->mins.Base() );
880880
fltx4 boxMaxs = LoadAlignedSIMD( pBox->maxs.Base() );
881881

@@ -899,7 +899,7 @@ bool IntersectRayWithBoxBrush( TraceInfo_t *pTraceInfo, const cbrush_t *pBrush,
899899

900900
fltx4 crossPlane = OrSIMD(XorSIMD(startOutMins,endOutMins), XorSIMD(startOutMaxs,endOutMaxs));
901901
// now build the per-axis interval of t for intersections
902-
fltx4 invDelta = LoadUnaligned3SIMD(pTraceInfo->m_invDelta.Base());
902+
fltx4 invDelta = LoadAlignedSIMD(pTraceInfo->m_invDelta.Base());
903903
fltx4 tmins = MulSIMD( offsetMinsExpanded, invDelta );
904904
fltx4 tmaxs = MulSIMD( offsetMaxsExpanded, invDelta );
905905
// now sort the interval per axis
@@ -1037,9 +1037,9 @@ bool IntersectRayWithBox( const Ray_t &ray, const VectorAligned &inInvDelta, con
10371037
pTrace->fraction = 1.0f;
10381038

10391039
// Load the unaligned ray/box parameters into SIMD registers
1040-
fltx4 start = LoadUnaligned3SIMD(ray.m_Start.Base());
1041-
fltx4 extents = LoadUnaligned3SIMD(ray.m_Extents.Base());
1042-
fltx4 delta = LoadUnaligned3SIMD(ray.m_Delta.Base());
1040+
fltx4 start = LoadAlignedSIMD(ray.m_Start.Base());
1041+
fltx4 extents = LoadAlignedSIMD(ray.m_Extents.Base());
1042+
fltx4 delta = LoadAlignedSIMD(ray.m_Delta.Base());
10431043
fltx4 boxMins = LoadAlignedSIMD( inBoxMins.Base() );
10441044
fltx4 boxMaxs = LoadAlignedSIMD( inBoxMaxs.Base() );
10451045

@@ -1372,9 +1372,9 @@ void FASTCALL CM_ClipBoxToBrush( TraceInfo_t * RESTRICT pTraceInfo, const cbrush
13721372

13731373
inline bool IsTraceBoxIntersectingBoxBrush( TraceInfo_t *pTraceInfo, cboxbrush_t *pBox )
13741374
{
1375-
fltx4 start = LoadUnaligned3SIMD(pTraceInfo->m_start.Base());
1376-
fltx4 mins = LoadUnaligned3SIMD(pTraceInfo->m_mins.Base());
1377-
fltx4 maxs = LoadUnaligned3SIMD(pTraceInfo->m_maxs.Base());
1375+
fltx4 start = LoadAlignedSIMD(pTraceInfo->m_start.Base());
1376+
fltx4 mins = LoadAlignedSIMD(pTraceInfo->m_mins.Base());
1377+
fltx4 maxs = LoadAlignedSIMD(pTraceInfo->m_maxs.Base());
13781378

13791379
fltx4 boxMins = LoadAlignedSIMD( pBox->mins.Base() );
13801380
fltx4 boxMaxs = LoadAlignedSIMD( pBox->maxs.Base() );
@@ -1569,15 +1569,15 @@ void FASTCALL CM_TraceToLeaf( TraceInfo_t * RESTRICT pTraceInfo, int ndxLeaf, fl
15691569
if (IsX360())
15701570
{
15711571
// set up some relatively constant variables we'll use in the loop below
1572-
fltx4 traceStart = LoadUnaligned3SIMD(pTraceInfo->m_start.Base());
1573-
fltx4 traceDelta = LoadUnaligned3SIMD(pTraceInfo->m_delta.Base());
1574-
fltx4 traceInvDelta = LoadUnaligned3SIMD(pTraceInfo->m_invDelta.Base());
1572+
fltx4 traceStart = LoadAlignedSIMD(pTraceInfo->m_start.Base());
1573+
fltx4 traceDelta = LoadAlignedSIMD(pTraceInfo->m_delta.Base());
1574+
fltx4 traceInvDelta = LoadAlignedSIMD(pTraceInfo->m_invDelta.Base());
15751575
static const fltx4 vecEpsilon = {DISPCOLL_DIST_EPSILON,DISPCOLL_DIST_EPSILON,DISPCOLL_DIST_EPSILON,DISPCOLL_DIST_EPSILON};
15761576
// only used in !IS_POINT version:
15771577
fltx4 extents;
15781578
if (!IS_POINT)
15791579
{
1580-
extents = LoadUnaligned3SIMD(pTraceInfo->m_extents.Base());
1580+
extents = LoadAlignedSIMD(pTraceInfo->m_extents.Base());
15811581
}
15821582

15831583
// TODO: this loop probably ought to be unrolled so that we can make a more efficient

engine/cmodel_private.h

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -42,13 +42,13 @@ struct TraceInfo_t
4242
m_nCheckDepth = -1;
4343
}
4444

45-
Vector m_start;
46-
Vector m_end;
47-
Vector m_mins;
48-
Vector m_maxs;
49-
Vector m_extents;
50-
Vector m_delta;
51-
Vector m_invDelta;
45+
VectorAligned m_start;
46+
VectorAligned m_end;
47+
VectorAligned m_mins;
48+
VectorAligned m_maxs;
49+
VectorAligned m_extents;
50+
VectorAligned m_delta;
51+
VectorAligned m_invDelta;
5252

5353
trace_t m_trace;
5454
trace_t m_stabTrace;

engine/gl_rsurf.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4934,7 +4934,7 @@ static bool EnumerateLeafInBox_R(mnode_t * RESTRICT node, const EnumLeafBoxInfo_
49344934
*/
49354935

49364936
// take advantage of high throughput/high latency
4937-
fltx4 planeNormal = LoadUnaligned3SIMD( plane->normal.Base() );
4937+
fltx4 planeNormal = LoadAlignedSIMD( plane->normal.Base() );
49384938
fltx4 vecBoxMin = LoadAlignedSIMD(pInfo->m_vecBoxMin);
49394939
fltx4 vecBoxMax = LoadAlignedSIMD(pInfo->m_vecBoxMax);
49404940
fltx4 cornermin, cornermax;

engine/spatialpartition.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -987,7 +987,7 @@ class CPartitionVisitor
987987
int m_iTree;
988988
};
989989

990-
990+
/*
991991
class CIntersectPoint : public CPartitionVisitor
992992
{
993993
public:
@@ -1009,7 +1009,7 @@ class CIntersectPoint : public CPartitionVisitor
10091009
private:
10101010
fltx4 m_f4Point;
10111011
};
1012-
1012+
*/
10131013

10141014
class CIntersectBox : public CPartitionVisitor
10151015
{
@@ -1040,8 +1040,8 @@ class CIntersectRay : public CPartitionVisitor
10401040
public:
10411041
CIntersectRay( CVoxelTree *pPartition, const Ray_t &ray, const Vector &vecInvDelta ) : CPartitionVisitor( pPartition )
10421042
{
1043-
m_f4Start = LoadUnaligned3SIMD( ray.m_Start.Base() );
1044-
m_f4Delta = LoadUnaligned3SIMD( ray.m_Delta.Base() );
1043+
m_f4Start = LoadAlignedSIMD( ray.m_Start.Base() );
1044+
m_f4Delta = LoadAlignedSIMD( ray.m_Delta.Base() );
10451045
m_f4InvDelta = LoadUnaligned3SIMD( vecInvDelta.Base() );
10461046
}
10471047

@@ -1069,10 +1069,10 @@ class CIntersectSweptBox : public CPartitionVisitor
10691069
public:
10701070
CIntersectSweptBox( CVoxelTree *pPartition, const Ray_t &ray, const Vector &vecInvDelta ) : CPartitionVisitor( pPartition )
10711071
{
1072-
m_f4Start = LoadUnaligned3SIMD( ray.m_Start.Base() );
1073-
m_f4Delta = LoadUnaligned3SIMD( ray.m_Delta.Base() );
1072+
m_f4Start = LoadAlignedSIMD( ray.m_Start.Base() );
1073+
m_f4Delta = LoadAlignedSIMD( ray.m_Delta.Base() );
1074+
m_f4Extents = LoadAlignedSIMD( ray.m_Extents.Base() );
10741075
m_f4InvDelta = LoadUnaligned3SIMD( vecInvDelta.Base() );
1075-
m_f4Extents = LoadUnaligned3SIMD( ray.m_Extents.Base() );
10761076
}
10771077

10781078
bool Intersects( const float *pMins, const float *pMaxs ) const

public/mathlib/mathlib.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ inline T clamp( T const &val, T const &minVal, T const &maxVal )
114114
// FIXME: this should move to a different file
115115
struct cplane_t
116116
{
117-
Vector normal;
117+
VectorAligned normal;
118118
float dist;
119119
byte type; // for fast side tests
120120
byte signbits; // signx + (signy<<1) + (signz<<1)

0 commit comments

Comments
 (0)