d3d10/include/directxmath/directxmathmisc.inl
Zack Buhman 7985ad3434 remove dependency on d3dx
This is mostly because it is not possible to statically link d3dx10.

Coincidentally, the directxmath API appears to be superior to d3dxmath in most
ways.
2026-01-13 12:50:02 -06:00

2426 lines
78 KiB
C++

//-------------------------------------------------------------------------------------
// DirectXMathMisc.inl -- SIMD C++ Math library
//
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
//
// http://go.microsoft.com/fwlink/?LinkID=615560
//-------------------------------------------------------------------------------------
#pragma once
/****************************************************************************
*
* Quaternion
*
****************************************************************************/
//------------------------------------------------------------------------------
// Comparison operations
//------------------------------------------------------------------------------
//------------------------------------------------------------------------------
inline bool XM_CALLCONV XMQuaternionEqual
(
FXMVECTOR Q1,
FXMVECTOR Q2
) noexcept
{
return XMVector4Equal(Q1, Q2);
}
//------------------------------------------------------------------------------
inline bool XM_CALLCONV XMQuaternionNotEqual
(
FXMVECTOR Q1,
FXMVECTOR Q2
) noexcept
{
return XMVector4NotEqual(Q1, Q2);
}
//------------------------------------------------------------------------------
inline bool XM_CALLCONV XMQuaternionIsNaN(FXMVECTOR Q) noexcept
{
return XMVector4IsNaN(Q);
}
//------------------------------------------------------------------------------
inline bool XM_CALLCONV XMQuaternionIsInfinite(FXMVECTOR Q) noexcept
{
return XMVector4IsInfinite(Q);
}
//------------------------------------------------------------------------------
inline bool XM_CALLCONV XMQuaternionIsIdentity(FXMVECTOR Q) noexcept
{
return XMVector4Equal(Q, g_XMIdentityR3.v);
}
//------------------------------------------------------------------------------
// Computation operations
//------------------------------------------------------------------------------
//------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMQuaternionDot
(
FXMVECTOR Q1,
FXMVECTOR Q2
) noexcept
{
return XMVector4Dot(Q1, Q2);
}
//------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMQuaternionMultiply
(
FXMVECTOR Q1,
FXMVECTOR Q2
) noexcept
{
// Returns the product Q2*Q1 (which is the concatenation of a rotation Q1 followed by the rotation Q2)
// [ (Q2.w * Q1.x) + (Q2.x * Q1.w) + (Q2.y * Q1.z) - (Q2.z * Q1.y),
// (Q2.w * Q1.y) - (Q2.x * Q1.z) + (Q2.y * Q1.w) + (Q2.z * Q1.x),
// (Q2.w * Q1.z) + (Q2.x * Q1.y) - (Q2.y * Q1.x) + (Q2.z * Q1.w),
// (Q2.w * Q1.w) - (Q2.x * Q1.x) - (Q2.y * Q1.y) - (Q2.z * Q1.z) ]
#if defined(_XM_NO_INTRINSICS_)
XMVECTORF32 Result = { { {
(Q2.vector4_f32[3] * Q1.vector4_f32[0]) + (Q2.vector4_f32[0] * Q1.vector4_f32[3]) + (Q2.vector4_f32[1] * Q1.vector4_f32[2]) - (Q2.vector4_f32[2] * Q1.vector4_f32[1]),
(Q2.vector4_f32[3] * Q1.vector4_f32[1]) - (Q2.vector4_f32[0] * Q1.vector4_f32[2]) + (Q2.vector4_f32[1] * Q1.vector4_f32[3]) + (Q2.vector4_f32[2] * Q1.vector4_f32[0]),
(Q2.vector4_f32[3] * Q1.vector4_f32[2]) + (Q2.vector4_f32[0] * Q1.vector4_f32[1]) - (Q2.vector4_f32[1] * Q1.vector4_f32[0]) + (Q2.vector4_f32[2] * Q1.vector4_f32[3]),
(Q2.vector4_f32[3] * Q1.vector4_f32[3]) - (Q2.vector4_f32[0] * Q1.vector4_f32[0]) - (Q2.vector4_f32[1] * Q1.vector4_f32[1]) - (Q2.vector4_f32[2] * Q1.vector4_f32[2])
} } };
return Result.v;
#elif defined(_XM_ARM_NEON_INTRINSICS_)
static const XMVECTORF32 ControlWZYX = { { { 1.0f, -1.0f, 1.0f, -1.0f } } };
static const XMVECTORF32 ControlZWXY = { { { 1.0f, 1.0f, -1.0f, -1.0f } } };
static const XMVECTORF32 ControlYXWZ = { { { -1.0f, 1.0f, 1.0f, -1.0f } } };
float32x2_t Q2L = vget_low_f32(Q2);
float32x2_t Q2H = vget_high_f32(Q2);
float32x4_t Q2X = vdupq_lane_f32(Q2L, 0);
float32x4_t Q2Y = vdupq_lane_f32(Q2L, 1);
float32x4_t Q2Z = vdupq_lane_f32(Q2H, 0);
XMVECTOR vResult = vmulq_lane_f32(Q1, Q2H, 1);
// Mul by Q1WZYX
float32x4_t vTemp = vrev64q_f32(Q1);
vTemp = vcombine_f32(vget_high_f32(vTemp), vget_low_f32(vTemp));
Q2X = vmulq_f32(Q2X, vTemp);
vResult = vmlaq_f32(vResult, Q2X, ControlWZYX);
// Mul by Q1ZWXY
vTemp = vrev64q_u32(vTemp);
Q2Y = vmulq_f32(Q2Y, vTemp);
vResult = vmlaq_f32(vResult, Q2Y, ControlZWXY);
// Mul by Q1YXWZ
vTemp = vrev64q_u32(vTemp);
vTemp = vcombine_f32(vget_high_f32(vTemp), vget_low_f32(vTemp));
Q2Z = vmulq_f32(Q2Z, vTemp);
vResult = vmlaq_f32(vResult, Q2Z, ControlYXWZ);
return vResult;
#elif defined(_XM_SSE_INTRINSICS_)
static const XMVECTORF32 ControlWZYX = { { { 1.0f, -1.0f, 1.0f, -1.0f } } };
static const XMVECTORF32 ControlZWXY = { { { 1.0f, 1.0f, -1.0f, -1.0f } } };
static const XMVECTORF32 ControlYXWZ = { { { -1.0f, 1.0f, 1.0f, -1.0f } } };
// Copy to SSE registers and use as few as possible for x86
XMVECTOR Q2X = Q2;
XMVECTOR Q2Y = Q2;
XMVECTOR Q2Z = Q2;
XMVECTOR vResult = Q2;
// Splat with one instruction
vResult = XM_PERMUTE_PS(vResult, _MM_SHUFFLE(3, 3, 3, 3));
Q2X = XM_PERMUTE_PS(Q2X, _MM_SHUFFLE(0, 0, 0, 0));
Q2Y = XM_PERMUTE_PS(Q2Y, _MM_SHUFFLE(1, 1, 1, 1));
Q2Z = XM_PERMUTE_PS(Q2Z, _MM_SHUFFLE(2, 2, 2, 2));
// Retire Q1 and perform Q1*Q2W
vResult = _mm_mul_ps(vResult, Q1);
XMVECTOR Q1Shuffle = Q1;
// Shuffle the copies of Q1
Q1Shuffle = XM_PERMUTE_PS(Q1Shuffle, _MM_SHUFFLE(0, 1, 2, 3));
// Mul by Q1WZYX
Q2X = _mm_mul_ps(Q2X, Q1Shuffle);
Q1Shuffle = XM_PERMUTE_PS(Q1Shuffle, _MM_SHUFFLE(2, 3, 0, 1));
// Flip the signs on y and z
vResult = XM_FMADD_PS(Q2X, ControlWZYX, vResult);
// Mul by Q1ZWXY
Q2Y = _mm_mul_ps(Q2Y, Q1Shuffle);
Q1Shuffle = XM_PERMUTE_PS(Q1Shuffle, _MM_SHUFFLE(0, 1, 2, 3));
// Flip the signs on z and w
Q2Y = _mm_mul_ps(Q2Y, ControlZWXY);
// Mul by Q1YXWZ
Q2Z = _mm_mul_ps(Q2Z, Q1Shuffle);
// Flip the signs on x and w
Q2Y = XM_FMADD_PS(Q2Z, ControlYXWZ, Q2Y);
vResult = _mm_add_ps(vResult, Q2Y);
return vResult;
#endif
}
//------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMQuaternionLengthSq(FXMVECTOR Q) noexcept
{
return XMVector4LengthSq(Q);
}
//------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMQuaternionReciprocalLength(FXMVECTOR Q) noexcept
{
return XMVector4ReciprocalLength(Q);
}
//------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMQuaternionLength(FXMVECTOR Q) noexcept
{
return XMVector4Length(Q);
}
//------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMQuaternionNormalizeEst(FXMVECTOR Q) noexcept
{
return XMVector4NormalizeEst(Q);
}
//------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMQuaternionNormalize(FXMVECTOR Q) noexcept
{
return XMVector4Normalize(Q);
}
//------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMQuaternionConjugate(FXMVECTOR Q) noexcept
{
#if defined(_XM_NO_INTRINSICS_)
XMVECTORF32 Result = { { {
-Q.vector4_f32[0],
-Q.vector4_f32[1],
-Q.vector4_f32[2],
Q.vector4_f32[3]
} } };
return Result.v;
#elif defined(_XM_ARM_NEON_INTRINSICS_)
static const XMVECTORF32 NegativeOne3 = { { { -1.0f, -1.0f, -1.0f, 1.0f } } };
return vmulq_f32(Q, NegativeOne3.v);
#elif defined(_XM_SSE_INTRINSICS_)
static const XMVECTORF32 NegativeOne3 = { { { -1.0f, -1.0f, -1.0f, 1.0f } } };
return _mm_mul_ps(Q, NegativeOne3);
#endif
}
//------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMQuaternionInverse(FXMVECTOR Q) noexcept
{
const XMVECTOR Zero = XMVectorZero();
XMVECTOR L = XMVector4LengthSq(Q);
XMVECTOR Conjugate = XMQuaternionConjugate(Q);
XMVECTOR Control = XMVectorLessOrEqual(L, g_XMEpsilon.v);
XMVECTOR Result = XMVectorDivide(Conjugate, L);
Result = XMVectorSelect(Result, Zero, Control);
return Result;
}
//------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMQuaternionLn(FXMVECTOR Q) noexcept
{
static const XMVECTORF32 OneMinusEpsilon = { { { 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f } } };
XMVECTOR QW = XMVectorSplatW(Q);
XMVECTOR Q0 = XMVectorSelect(g_XMSelect1110.v, Q, g_XMSelect1110.v);
XMVECTOR ControlW = XMVectorInBounds(QW, OneMinusEpsilon.v);
XMVECTOR Theta = XMVectorACos(QW);
XMVECTOR SinTheta = XMVectorSin(Theta);
XMVECTOR S = XMVectorDivide(Theta, SinTheta);
XMVECTOR Result = XMVectorMultiply(Q0, S);
Result = XMVectorSelect(Q0, Result, ControlW);
return Result;
}
//------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMQuaternionExp(FXMVECTOR Q) noexcept
{
XMVECTOR Theta = XMVector3Length(Q);
XMVECTOR SinTheta, CosTheta;
XMVectorSinCos(&SinTheta, &CosTheta, Theta);
XMVECTOR S = XMVectorDivide(SinTheta, Theta);
XMVECTOR Result = XMVectorMultiply(Q, S);
const XMVECTOR Zero = XMVectorZero();
XMVECTOR Control = XMVectorNearEqual(Theta, Zero, g_XMEpsilon.v);
Result = XMVectorSelect(Result, Q, Control);
Result = XMVectorSelect(CosTheta, Result, g_XMSelect1110.v);
return Result;
}
//------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMQuaternionSlerp
(
FXMVECTOR Q0,
FXMVECTOR Q1,
float t
) noexcept
{
XMVECTOR T = XMVectorReplicate(t);
return XMQuaternionSlerpV(Q0, Q1, T);
}
//------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMQuaternionSlerpV
(
FXMVECTOR Q0,
FXMVECTOR Q1,
FXMVECTOR T
) noexcept
{
assert((XMVectorGetY(T) == XMVectorGetX(T)) && (XMVectorGetZ(T) == XMVectorGetX(T)) && (XMVectorGetW(T) == XMVectorGetX(T)));
// Result = Q0 * sin((1.0 - t) * Omega) / sin(Omega) + Q1 * sin(t * Omega) / sin(Omega)
#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
const XMVECTORF32 OneMinusEpsilon = { { { 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f } } };
XMVECTOR CosOmega = XMQuaternionDot(Q0, Q1);
const XMVECTOR Zero = XMVectorZero();
XMVECTOR Control = XMVectorLess(CosOmega, Zero);
XMVECTOR Sign = XMVectorSelect(g_XMOne.v, g_XMNegativeOne.v, Control);
CosOmega = XMVectorMultiply(CosOmega, Sign);
Control = XMVectorLess(CosOmega, OneMinusEpsilon);
XMVECTOR SinOmega = XMVectorNegativeMultiplySubtract(CosOmega, CosOmega, g_XMOne.v);
SinOmega = XMVectorSqrt(SinOmega);
XMVECTOR Omega = XMVectorATan2(SinOmega, CosOmega);
XMVECTOR SignMask = XMVectorSplatSignMask();
XMVECTOR V01 = XMVectorShiftLeft(T, Zero, 2);
SignMask = XMVectorShiftLeft(SignMask, Zero, 3);
V01 = XMVectorXorInt(V01, SignMask);
V01 = XMVectorAdd(g_XMIdentityR0.v, V01);
XMVECTOR InvSinOmega = XMVectorReciprocal(SinOmega);
XMVECTOR S0 = XMVectorMultiply(V01, Omega);
S0 = XMVectorSin(S0);
S0 = XMVectorMultiply(S0, InvSinOmega);
S0 = XMVectorSelect(V01, S0, Control);
XMVECTOR S1 = XMVectorSplatY(S0);
S0 = XMVectorSplatX(S0);
S1 = XMVectorMultiply(S1, Sign);
XMVECTOR Result = XMVectorMultiply(Q0, S0);
Result = XMVectorMultiplyAdd(Q1, S1, Result);
return Result;
#elif defined(_XM_SSE_INTRINSICS_)
static const XMVECTORF32 OneMinusEpsilon = { { { 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f } } };
static const XMVECTORU32 SignMask2 = { { { 0x80000000, 0x00000000, 0x00000000, 0x00000000 } } };
XMVECTOR CosOmega = XMQuaternionDot(Q0, Q1);
const XMVECTOR Zero = XMVectorZero();
XMVECTOR Control = XMVectorLess(CosOmega, Zero);
XMVECTOR Sign = XMVectorSelect(g_XMOne, g_XMNegativeOne, Control);
CosOmega = _mm_mul_ps(CosOmega, Sign);
Control = XMVectorLess(CosOmega, OneMinusEpsilon);
XMVECTOR SinOmega = _mm_mul_ps(CosOmega, CosOmega);
SinOmega = _mm_sub_ps(g_XMOne, SinOmega);
SinOmega = _mm_sqrt_ps(SinOmega);
XMVECTOR Omega = XMVectorATan2(SinOmega, CosOmega);
XMVECTOR V01 = XM_PERMUTE_PS(T, _MM_SHUFFLE(2, 3, 0, 1));
V01 = _mm_and_ps(V01, g_XMMaskXY);
V01 = _mm_xor_ps(V01, SignMask2);
V01 = _mm_add_ps(g_XMIdentityR0, V01);
XMVECTOR S0 = _mm_mul_ps(V01, Omega);
S0 = XMVectorSin(S0);
S0 = _mm_div_ps(S0, SinOmega);
S0 = XMVectorSelect(V01, S0, Control);
XMVECTOR S1 = XMVectorSplatY(S0);
S0 = XMVectorSplatX(S0);
S1 = _mm_mul_ps(S1, Sign);
XMVECTOR Result = _mm_mul_ps(Q0, S0);
S1 = _mm_mul_ps(S1, Q1);
Result = _mm_add_ps(Result, S1);
return Result;
#endif
}
//------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMQuaternionSquad
(
FXMVECTOR Q0,
FXMVECTOR Q1,
FXMVECTOR Q2,
GXMVECTOR Q3,
float t
) noexcept
{
XMVECTOR T = XMVectorReplicate(t);
return XMQuaternionSquadV(Q0, Q1, Q2, Q3, T);
}
//------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMQuaternionSquadV
(
FXMVECTOR Q0,
FXMVECTOR Q1,
FXMVECTOR Q2,
GXMVECTOR Q3,
HXMVECTOR T
) noexcept
{
assert((XMVectorGetY(T) == XMVectorGetX(T)) && (XMVectorGetZ(T) == XMVectorGetX(T)) && (XMVectorGetW(T) == XMVectorGetX(T)));
XMVECTOR TP = T;
const XMVECTOR Two = XMVectorSplatConstant(2, 0);
XMVECTOR Q03 = XMQuaternionSlerpV(Q0, Q3, T);
XMVECTOR Q12 = XMQuaternionSlerpV(Q1, Q2, T);
TP = XMVectorNegativeMultiplySubtract(TP, TP, TP);
TP = XMVectorMultiply(TP, Two);
XMVECTOR Result = XMQuaternionSlerpV(Q03, Q12, TP);
return Result;
}
//------------------------------------------------------------------------------
_Use_decl_annotations_
inline void XM_CALLCONV XMQuaternionSquadSetup
(
XMVECTOR* pA,
XMVECTOR* pB,
XMVECTOR* pC,
FXMVECTOR Q0,
FXMVECTOR Q1,
FXMVECTOR Q2,
GXMVECTOR Q3
) noexcept
{
assert(pA);
assert(pB);
assert(pC);
XMVECTOR LS12 = XMQuaternionLengthSq(XMVectorAdd(Q1, Q2));
XMVECTOR LD12 = XMQuaternionLengthSq(XMVectorSubtract(Q1, Q2));
XMVECTOR SQ2 = XMVectorNegate(Q2);
XMVECTOR Control1 = XMVectorLess(LS12, LD12);
SQ2 = XMVectorSelect(Q2, SQ2, Control1);
XMVECTOR LS01 = XMQuaternionLengthSq(XMVectorAdd(Q0, Q1));
XMVECTOR LD01 = XMQuaternionLengthSq(XMVectorSubtract(Q0, Q1));
XMVECTOR SQ0 = XMVectorNegate(Q0);
XMVECTOR LS23 = XMQuaternionLengthSq(XMVectorAdd(SQ2, Q3));
XMVECTOR LD23 = XMQuaternionLengthSq(XMVectorSubtract(SQ2, Q3));
XMVECTOR SQ3 = XMVectorNegate(Q3);
XMVECTOR Control0 = XMVectorLess(LS01, LD01);
XMVECTOR Control2 = XMVectorLess(LS23, LD23);
SQ0 = XMVectorSelect(Q0, SQ0, Control0);
SQ3 = XMVectorSelect(Q3, SQ3, Control2);
XMVECTOR InvQ1 = XMQuaternionInverse(Q1);
XMVECTOR InvQ2 = XMQuaternionInverse(SQ2);
XMVECTOR LnQ0 = XMQuaternionLn(XMQuaternionMultiply(InvQ1, SQ0));
XMVECTOR LnQ2 = XMQuaternionLn(XMQuaternionMultiply(InvQ1, SQ2));
XMVECTOR LnQ1 = XMQuaternionLn(XMQuaternionMultiply(InvQ2, Q1));
XMVECTOR LnQ3 = XMQuaternionLn(XMQuaternionMultiply(InvQ2, SQ3));
const XMVECTOR NegativeOneQuarter = XMVectorSplatConstant(-1, 2);
XMVECTOR ExpQ02 = XMVectorMultiply(XMVectorAdd(LnQ0, LnQ2), NegativeOneQuarter);
XMVECTOR ExpQ13 = XMVectorMultiply(XMVectorAdd(LnQ1, LnQ3), NegativeOneQuarter);
ExpQ02 = XMQuaternionExp(ExpQ02);
ExpQ13 = XMQuaternionExp(ExpQ13);
*pA = XMQuaternionMultiply(Q1, ExpQ02);
*pB = XMQuaternionMultiply(SQ2, ExpQ13);
*pC = SQ2;
}
//------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMQuaternionBaryCentric
(
FXMVECTOR Q0,
FXMVECTOR Q1,
FXMVECTOR Q2,
float f,
float g
) noexcept
{
float s = f + g;
XMVECTOR Result;
if ((s < 0.00001f) && (s > -0.00001f))
{
Result = Q0;
}
else
{
XMVECTOR Q01 = XMQuaternionSlerp(Q0, Q1, s);
XMVECTOR Q02 = XMQuaternionSlerp(Q0, Q2, s);
Result = XMQuaternionSlerp(Q01, Q02, g / s);
}
return Result;
}
//------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMQuaternionBaryCentricV
(
FXMVECTOR Q0,
FXMVECTOR Q1,
FXMVECTOR Q2,
GXMVECTOR F,
HXMVECTOR G
) noexcept
{
assert((XMVectorGetY(F) == XMVectorGetX(F)) && (XMVectorGetZ(F) == XMVectorGetX(F)) && (XMVectorGetW(F) == XMVectorGetX(F)));
assert((XMVectorGetY(G) == XMVectorGetX(G)) && (XMVectorGetZ(G) == XMVectorGetX(G)) && (XMVectorGetW(G) == XMVectorGetX(G)));
const XMVECTOR Epsilon = XMVectorSplatConstant(1, 16);
XMVECTOR S = XMVectorAdd(F, G);
XMVECTOR Result;
if (XMVector4InBounds(S, Epsilon))
{
Result = Q0;
}
else
{
XMVECTOR Q01 = XMQuaternionSlerpV(Q0, Q1, S);
XMVECTOR Q02 = XMQuaternionSlerpV(Q0, Q2, S);
XMVECTOR GS = XMVectorReciprocal(S);
GS = XMVectorMultiply(G, GS);
Result = XMQuaternionSlerpV(Q01, Q02, GS);
}
return Result;
}
//------------------------------------------------------------------------------
// Transformation operations
//------------------------------------------------------------------------------
//------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMQuaternionIdentity() noexcept
{
return g_XMIdentityR3.v;
}
//------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMQuaternionRotationRollPitchYaw
(
float Pitch,
float Yaw,
float Roll
) noexcept
{
XMVECTOR Angles = XMVectorSet(Pitch, Yaw, Roll, 0.0f);
XMVECTOR Q = XMQuaternionRotationRollPitchYawFromVector(Angles);
return Q;
}
//------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMQuaternionRotationRollPitchYawFromVector
(
FXMVECTOR Angles // <Pitch, Yaw, Roll, 0>
) noexcept
{
static const XMVECTORF32 Sign = { { { 1.0f, -1.0f, -1.0f, 1.0f } } };
XMVECTOR HalfAngles = XMVectorMultiply(Angles, g_XMOneHalf.v);
XMVECTOR SinAngles, CosAngles;
XMVectorSinCos(&SinAngles, &CosAngles, HalfAngles);
XMVECTOR P0 = XMVectorPermute<XM_PERMUTE_0X, XM_PERMUTE_1X, XM_PERMUTE_1X, XM_PERMUTE_1X>(SinAngles, CosAngles);
XMVECTOR Y0 = XMVectorPermute<XM_PERMUTE_1Y, XM_PERMUTE_0Y, XM_PERMUTE_1Y, XM_PERMUTE_1Y>(SinAngles, CosAngles);
XMVECTOR R0 = XMVectorPermute<XM_PERMUTE_1Z, XM_PERMUTE_1Z, XM_PERMUTE_0Z, XM_PERMUTE_1Z>(SinAngles, CosAngles);
XMVECTOR P1 = XMVectorPermute<XM_PERMUTE_0X, XM_PERMUTE_1X, XM_PERMUTE_1X, XM_PERMUTE_1X>(CosAngles, SinAngles);
XMVECTOR Y1 = XMVectorPermute<XM_PERMUTE_1Y, XM_PERMUTE_0Y, XM_PERMUTE_1Y, XM_PERMUTE_1Y>(CosAngles, SinAngles);
XMVECTOR R1 = XMVectorPermute<XM_PERMUTE_1Z, XM_PERMUTE_1Z, XM_PERMUTE_0Z, XM_PERMUTE_1Z>(CosAngles, SinAngles);
XMVECTOR Q1 = XMVectorMultiply(P1, Sign.v);
XMVECTOR Q0 = XMVectorMultiply(P0, Y0);
Q1 = XMVectorMultiply(Q1, Y1);
Q0 = XMVectorMultiply(Q0, R0);
XMVECTOR Q = XMVectorMultiplyAdd(Q1, R1, Q0);
return Q;
}
//------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMQuaternionRotationNormal
(
FXMVECTOR NormalAxis,
float Angle
) noexcept
{
#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
XMVECTOR N = XMVectorSelect(g_XMOne.v, NormalAxis, g_XMSelect1110.v);
float SinV, CosV;
XMScalarSinCos(&SinV, &CosV, 0.5f * Angle);
XMVECTOR Scale = XMVectorSet(SinV, SinV, SinV, CosV);
return XMVectorMultiply(N, Scale);
#elif defined(_XM_SSE_INTRINSICS_)
XMVECTOR N = _mm_and_ps(NormalAxis, g_XMMask3);
N = _mm_or_ps(N, g_XMIdentityR3);
XMVECTOR Scale = _mm_set_ps1(0.5f * Angle);
XMVECTOR vSine;
XMVECTOR vCosine;
XMVectorSinCos(&vSine, &vCosine, Scale);
Scale = _mm_and_ps(vSine, g_XMMask3);
vCosine = _mm_and_ps(vCosine, g_XMMaskW);
Scale = _mm_or_ps(Scale, vCosine);
N = _mm_mul_ps(N, Scale);
return N;
#endif
}
//------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMQuaternionRotationAxis
(
FXMVECTOR Axis,
float Angle
) noexcept
{
assert(!XMVector3Equal(Axis, XMVectorZero()));
assert(!XMVector3IsInfinite(Axis));
XMVECTOR Normal = XMVector3Normalize(Axis);
XMVECTOR Q = XMQuaternionRotationNormal(Normal, Angle);
return Q;
}
//------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMQuaternionRotationMatrix(FXMMATRIX M) noexcept
{
#if defined(_XM_NO_INTRINSICS_)
XMVECTORF32 q;
float r22 = M.m[2][2];
if (r22 <= 0.f) // x^2 + y^2 >= z^2 + w^2
{
float dif10 = M.m[1][1] - M.m[0][0];
float omr22 = 1.f - r22;
if (dif10 <= 0.f) // x^2 >= y^2
{
float fourXSqr = omr22 - dif10;
float inv4x = 0.5f / sqrtf(fourXSqr);
q.f[0] = fourXSqr * inv4x;
q.f[1] = (M.m[0][1] + M.m[1][0]) * inv4x;
q.f[2] = (M.m[0][2] + M.m[2][0]) * inv4x;
q.f[3] = (M.m[1][2] - M.m[2][1]) * inv4x;
}
else // y^2 >= x^2
{
float fourYSqr = omr22 + dif10;
float inv4y = 0.5f / sqrtf(fourYSqr);
q.f[0] = (M.m[0][1] + M.m[1][0]) * inv4y;
q.f[1] = fourYSqr * inv4y;
q.f[2] = (M.m[1][2] + M.m[2][1]) * inv4y;
q.f[3] = (M.m[2][0] - M.m[0][2]) * inv4y;
}
}
else // z^2 + w^2 >= x^2 + y^2
{
float sum10 = M.m[1][1] + M.m[0][0];
float opr22 = 1.f + r22;
if (sum10 <= 0.f) // z^2 >= w^2
{
float fourZSqr = opr22 - sum10;
float inv4z = 0.5f / sqrtf(fourZSqr);
q.f[0] = (M.m[0][2] + M.m[2][0]) * inv4z;
q.f[1] = (M.m[1][2] + M.m[2][1]) * inv4z;
q.f[2] = fourZSqr * inv4z;
q.f[3] = (M.m[0][1] - M.m[1][0]) * inv4z;
}
else // w^2 >= z^2
{
float fourWSqr = opr22 + sum10;
float inv4w = 0.5f / sqrtf(fourWSqr);
q.f[0] = (M.m[1][2] - M.m[2][1]) * inv4w;
q.f[1] = (M.m[2][0] - M.m[0][2]) * inv4w;
q.f[2] = (M.m[0][1] - M.m[1][0]) * inv4w;
q.f[3] = fourWSqr * inv4w;
}
}
return q.v;
#elif defined(_XM_ARM_NEON_INTRINSICS_)
static const XMVECTORF32 XMPMMP = { { { +1.0f, -1.0f, -1.0f, +1.0f } } };
static const XMVECTORF32 XMMPMP = { { { -1.0f, +1.0f, -1.0f, +1.0f } } };
static const XMVECTORF32 XMMMPP = { { { -1.0f, -1.0f, +1.0f, +1.0f } } };
static const XMVECTORU32 Select0110 = { { { XM_SELECT_0, XM_SELECT_1, XM_SELECT_1, XM_SELECT_0 } } };
static const XMVECTORU32 Select0010 = { { { XM_SELECT_0, XM_SELECT_0, XM_SELECT_1, XM_SELECT_0 } } };
XMVECTOR r0 = M.r[0];
XMVECTOR r1 = M.r[1];
XMVECTOR r2 = M.r[2];
XMVECTOR r00 = vdupq_lane_f32(vget_low_f32(r0), 0);
XMVECTOR r11 = vdupq_lane_f32(vget_low_f32(r1), 1);
XMVECTOR r22 = vdupq_lane_f32(vget_high_f32(r2), 0);
// x^2 >= y^2 equivalent to r11 - r00 <= 0
XMVECTOR r11mr00 = vsubq_f32(r11, r00);
XMVECTOR x2gey2 = vcleq_f32(r11mr00, g_XMZero);
// z^2 >= w^2 equivalent to r11 + r00 <= 0
XMVECTOR r11pr00 = vaddq_f32(r11, r00);
XMVECTOR z2gew2 = vcleq_f32(r11pr00, g_XMZero);
// x^2 + y^2 >= z^2 + w^2 equivalent to r22 <= 0
XMVECTOR x2py2gez2pw2 = vcleq_f32(r22, g_XMZero);
// (4*x^2, 4*y^2, 4*z^2, 4*w^2)
XMVECTOR t0 = vmulq_f32(XMPMMP, r00);
XMVECTOR x2y2z2w2 = vmlaq_f32(t0, XMMPMP, r11);
x2y2z2w2 = vmlaq_f32(x2y2z2w2, XMMMPP, r22);
x2y2z2w2 = vaddq_f32(x2y2z2w2, g_XMOne);
// (r01, r02, r12, r11)
t0 = vextq_f32(r0, r0, 1);
XMVECTOR t1 = vextq_f32(r1, r1, 1);
t0 = vcombine_f32(vget_low_f32(t0), vrev64_f32(vget_low_f32(t1)));
// (r10, r20, r21, r10)
t1 = vextq_f32(r2, r2, 3);
XMVECTOR r10 = vdupq_lane_f32(vget_low_f32(r1), 0);
t1 = vbslq_f32(Select0110, t1, r10);
// (4*x*y, 4*x*z, 4*y*z, unused)
XMVECTOR xyxzyz = vaddq_f32(t0, t1);
// (r21, r20, r10, r10)
t0 = vcombine_f32(vrev64_f32(vget_low_f32(r2)), vget_low_f32(r10));
// (r12, r02, r01, r12)
XMVECTOR t2 = vcombine_f32(vrev64_f32(vget_high_f32(r0)), vrev64_f32(vget_low_f32(r0)));
XMVECTOR t3 = vdupq_lane_f32(vget_high_f32(r1), 0);
t1 = vbslq_f32(Select0110, t2, t3);
// (4*x*w, 4*y*w, 4*z*w, unused)
XMVECTOR xwywzw = vsubq_f32(t0, t1);
xwywzw = vmulq_f32(XMMPMP, xwywzw);
// (4*x*x, 4*x*y, 4*x*z, 4*x*w)
t0 = vextq_f32(xyxzyz, xyxzyz, 3);
t1 = vbslq_f32(Select0110, t0, x2y2z2w2);
t2 = vdupq_lane_f32(vget_low_f32(xwywzw), 0);
XMVECTOR tensor0 = vbslq_f32(g_XMSelect1110, t1, t2);
// (4*y*x, 4*y*y, 4*y*z, 4*y*w)
t0 = vbslq_f32(g_XMSelect1011, xyxzyz, x2y2z2w2);
t1 = vdupq_lane_f32(vget_low_f32(xwywzw), 1);
XMVECTOR tensor1 = vbslq_f32(g_XMSelect1110, t0, t1);
// (4*z*x, 4*z*y, 4*z*z, 4*z*w)
t0 = vextq_f32(xyxzyz, xyxzyz, 1);
t1 = vcombine_f32(vget_low_f32(t0), vrev64_f32(vget_high_f32(xwywzw)));
XMVECTOR tensor2 = vbslq_f32(Select0010, x2y2z2w2, t1);
// (4*w*x, 4*w*y, 4*w*z, 4*w*w)
XMVECTOR tensor3 = vbslq_f32(g_XMSelect1110, xwywzw, x2y2z2w2);
// Select the row of the tensor-product matrix that has the largest
// magnitude.
t0 = vbslq_f32(x2gey2, tensor0, tensor1);
t1 = vbslq_f32(z2gew2, tensor2, tensor3);
t2 = vbslq_f32(x2py2gez2pw2, t0, t1);
// Normalize the row. No division by zero is possible because the
// quaternion is unit-length (and the row is a nonzero multiple of
// the quaternion).
t0 = XMVector4Length(t2);
return XMVectorDivide(t2, t0);
#elif defined(_XM_SSE_INTRINSICS_)
static const XMVECTORF32 XMPMMP = { { { +1.0f, -1.0f, -1.0f, +1.0f } } };
static const XMVECTORF32 XMMPMP = { { { -1.0f, +1.0f, -1.0f, +1.0f } } };
static const XMVECTORF32 XMMMPP = { { { -1.0f, -1.0f, +1.0f, +1.0f } } };
XMVECTOR r0 = M.r[0]; // (r00, r01, r02, 0)
XMVECTOR r1 = M.r[1]; // (r10, r11, r12, 0)
XMVECTOR r2 = M.r[2]; // (r20, r21, r22, 0)
// (r00, r00, r00, r00)
XMVECTOR r00 = XM_PERMUTE_PS(r0, _MM_SHUFFLE(0, 0, 0, 0));
// (r11, r11, r11, r11)
XMVECTOR r11 = XM_PERMUTE_PS(r1, _MM_SHUFFLE(1, 1, 1, 1));
// (r22, r22, r22, r22)
XMVECTOR r22 = XM_PERMUTE_PS(r2, _MM_SHUFFLE(2, 2, 2, 2));
// x^2 >= y^2 equivalent to r11 - r00 <= 0
// (r11 - r00, r11 - r00, r11 - r00, r11 - r00)
XMVECTOR r11mr00 = _mm_sub_ps(r11, r00);
XMVECTOR x2gey2 = _mm_cmple_ps(r11mr00, g_XMZero);
// z^2 >= w^2 equivalent to r11 + r00 <= 0
// (r11 + r00, r11 + r00, r11 + r00, r11 + r00)
XMVECTOR r11pr00 = _mm_add_ps(r11, r00);
XMVECTOR z2gew2 = _mm_cmple_ps(r11pr00, g_XMZero);
// x^2 + y^2 >= z^2 + w^2 equivalent to r22 <= 0
XMVECTOR x2py2gez2pw2 = _mm_cmple_ps(r22, g_XMZero);
// (4*x^2, 4*y^2, 4*z^2, 4*w^2)
XMVECTOR t0 = XM_FMADD_PS(XMPMMP, r00, g_XMOne);
XMVECTOR t1 = _mm_mul_ps(XMMPMP, r11);
XMVECTOR t2 = XM_FMADD_PS(XMMMPP, r22, t0);
XMVECTOR x2y2z2w2 = _mm_add_ps(t1, t2);
// (r01, r02, r12, r11)
t0 = _mm_shuffle_ps(r0, r1, _MM_SHUFFLE(1, 2, 2, 1));
// (r10, r10, r20, r21)
t1 = _mm_shuffle_ps(r1, r2, _MM_SHUFFLE(1, 0, 0, 0));
// (r10, r20, r21, r10)
t1 = XM_PERMUTE_PS(t1, _MM_SHUFFLE(1, 3, 2, 0));
// (4*x*y, 4*x*z, 4*y*z, unused)
XMVECTOR xyxzyz = _mm_add_ps(t0, t1);
// (r21, r20, r10, r10)
t0 = _mm_shuffle_ps(r2, r1, _MM_SHUFFLE(0, 0, 0, 1));
// (r12, r12, r02, r01)
t1 = _mm_shuffle_ps(r1, r0, _MM_SHUFFLE(1, 2, 2, 2));
// (r12, r02, r01, r12)
t1 = XM_PERMUTE_PS(t1, _MM_SHUFFLE(1, 3, 2, 0));
// (4*x*w, 4*y*w, 4*z*w, unused)
XMVECTOR xwywzw = _mm_sub_ps(t0, t1);
xwywzw = _mm_mul_ps(XMMPMP, xwywzw);
// (4*x^2, 4*y^2, 4*x*y, unused)
t0 = _mm_shuffle_ps(x2y2z2w2, xyxzyz, _MM_SHUFFLE(0, 0, 1, 0));
// (4*z^2, 4*w^2, 4*z*w, unused)
t1 = _mm_shuffle_ps(x2y2z2w2, xwywzw, _MM_SHUFFLE(0, 2, 3, 2));
// (4*x*z, 4*y*z, 4*x*w, 4*y*w)
t2 = _mm_shuffle_ps(xyxzyz, xwywzw, _MM_SHUFFLE(1, 0, 2, 1));
// (4*x*x, 4*x*y, 4*x*z, 4*x*w)
XMVECTOR tensor0 = _mm_shuffle_ps(t0, t2, _MM_SHUFFLE(2, 0, 2, 0));
// (4*y*x, 4*y*y, 4*y*z, 4*y*w)
XMVECTOR tensor1 = _mm_shuffle_ps(t0, t2, _MM_SHUFFLE(3, 1, 1, 2));
// (4*z*x, 4*z*y, 4*z*z, 4*z*w)
XMVECTOR tensor2 = _mm_shuffle_ps(t2, t1, _MM_SHUFFLE(2, 0, 1, 0));
// (4*w*x, 4*w*y, 4*w*z, 4*w*w)
XMVECTOR tensor3 = _mm_shuffle_ps(t2, t1, _MM_SHUFFLE(1, 2, 3, 2));
// Select the row of the tensor-product matrix that has the largest
// magnitude.
t0 = _mm_and_ps(x2gey2, tensor0);
t1 = _mm_andnot_ps(x2gey2, tensor1);
t0 = _mm_or_ps(t0, t1);
t1 = _mm_and_ps(z2gew2, tensor2);
t2 = _mm_andnot_ps(z2gew2, tensor3);
t1 = _mm_or_ps(t1, t2);
t0 = _mm_and_ps(x2py2gez2pw2, t0);
t1 = _mm_andnot_ps(x2py2gez2pw2, t1);
t2 = _mm_or_ps(t0, t1);
// Normalize the row. No division by zero is possible because the
// quaternion is unit-length (and the row is a nonzero multiple of
// the quaternion).
t0 = XMVector4Length(t2);
return _mm_div_ps(t2, t0);
#endif
}
//------------------------------------------------------------------------------
// Conversion operations
//------------------------------------------------------------------------------
//------------------------------------------------------------------------------
_Use_decl_annotations_
inline void XM_CALLCONV XMQuaternionToAxisAngle
(
XMVECTOR* pAxis,
float* pAngle,
FXMVECTOR Q
) noexcept
{
assert(pAxis);
assert(pAngle);
*pAxis = Q;
*pAngle = 2.0f * XMScalarACos(XMVectorGetW(Q));
}
/****************************************************************************
*
* Plane
*
****************************************************************************/
//------------------------------------------------------------------------------
// Comparison operations
//------------------------------------------------------------------------------
//------------------------------------------------------------------------------
inline bool XM_CALLCONV XMPlaneEqual
(
FXMVECTOR P1,
FXMVECTOR P2
) noexcept
{
return XMVector4Equal(P1, P2);
}
//------------------------------------------------------------------------------
inline bool XM_CALLCONV XMPlaneNearEqual
(
FXMVECTOR P1,
FXMVECTOR P2,
FXMVECTOR Epsilon
) noexcept
{
XMVECTOR NP1 = XMPlaneNormalize(P1);
XMVECTOR NP2 = XMPlaneNormalize(P2);
return XMVector4NearEqual(NP1, NP2, Epsilon);
}
//------------------------------------------------------------------------------
inline bool XM_CALLCONV XMPlaneNotEqual
(
FXMVECTOR P1,
FXMVECTOR P2
) noexcept
{
return XMVector4NotEqual(P1, P2);
}
//------------------------------------------------------------------------------
inline bool XM_CALLCONV XMPlaneIsNaN(FXMVECTOR P) noexcept
{
return XMVector4IsNaN(P);
}
//------------------------------------------------------------------------------
inline bool XM_CALLCONV XMPlaneIsInfinite(FXMVECTOR P) noexcept
{
return XMVector4IsInfinite(P);
}
//------------------------------------------------------------------------------
// Computation operations
//------------------------------------------------------------------------------
//------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMPlaneDot
(
FXMVECTOR P,
FXMVECTOR V
) noexcept
{
return XMVector4Dot(P, V);
}
//------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMPlaneDotCoord
(
FXMVECTOR P,
FXMVECTOR V
) noexcept
{
// Result = P[0] * V[0] + P[1] * V[1] + P[2] * V[2] + P[3]
XMVECTOR V3 = XMVectorSelect(g_XMOne.v, V, g_XMSelect1110.v);
XMVECTOR Result = XMVector4Dot(P, V3);
return Result;
}
//------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMPlaneDotNormal
(
FXMVECTOR P,
FXMVECTOR V
) noexcept
{
return XMVector3Dot(P, V);
}
//------------------------------------------------------------------------------
// XMPlaneNormalizeEst uses a reciprocal estimate and
// returns QNaN on zero and infinite vectors.
inline XMVECTOR XM_CALLCONV XMPlaneNormalizeEst(FXMVECTOR P) noexcept
{
#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
XMVECTOR Result = XMVector3ReciprocalLengthEst(P);
return XMVectorMultiply(P, Result);
#elif defined(_XM_SSE4_INTRINSICS_)
XMVECTOR vTemp = _mm_dp_ps(P, P, 0x7f);
XMVECTOR vResult = _mm_rsqrt_ps(vTemp);
return _mm_mul_ps(vResult, P);
#elif defined(_XM_SSE_INTRINSICS_)
// Perform the dot product
XMVECTOR vDot = _mm_mul_ps(P, P);
// x=Dot.y, y=Dot.z
XMVECTOR vTemp = XM_PERMUTE_PS(vDot, _MM_SHUFFLE(2, 1, 2, 1));
// Result.x = x+y
vDot = _mm_add_ss(vDot, vTemp);
// x=Dot.z
vTemp = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(1, 1, 1, 1));
// Result.x = (x+y)+z
vDot = _mm_add_ss(vDot, vTemp);
// Splat x
vDot = XM_PERMUTE_PS(vDot, _MM_SHUFFLE(0, 0, 0, 0));
// Get the reciprocal
vDot = _mm_rsqrt_ps(vDot);
// Get the reciprocal
vDot = _mm_mul_ps(vDot, P);
return vDot;
#endif
}
//------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMPlaneNormalize(FXMVECTOR P) noexcept
{
#if defined(_XM_NO_INTRINSICS_)
float fLengthSq = sqrtf((P.vector4_f32[0] * P.vector4_f32[0]) + (P.vector4_f32[1] * P.vector4_f32[1]) + (P.vector4_f32[2] * P.vector4_f32[2]));
// Prevent divide by zero
if (fLengthSq > 0)
{
fLengthSq = 1.0f / fLengthSq;
}
XMVECTORF32 vResult = { { {
P.vector4_f32[0] * fLengthSq,
P.vector4_f32[1] * fLengthSq,
P.vector4_f32[2] * fLengthSq,
P.vector4_f32[3] * fLengthSq
} } };
return vResult.v;
#elif defined(_XM_ARM_NEON_INTRINSICS_)
XMVECTOR vLength = XMVector3ReciprocalLength(P);
return XMVectorMultiply(P, vLength);
#elif defined(_XM_SSE4_INTRINSICS_)
XMVECTOR vLengthSq = _mm_dp_ps(P, P, 0x7f);
// Prepare for the division
XMVECTOR vResult = _mm_sqrt_ps(vLengthSq);
// Failsafe on zero (Or epsilon) length planes
// If the length is infinity, set the elements to zero
vLengthSq = _mm_cmpneq_ps(vLengthSq, g_XMInfinity);
// Reciprocal mul to perform the normalization
vResult = _mm_div_ps(P, vResult);
// Any that are infinity, set to zero
vResult = _mm_and_ps(vResult, vLengthSq);
return vResult;
#elif defined(_XM_SSE_INTRINSICS_)
// Perform the dot product on x,y and z only
XMVECTOR vLengthSq = _mm_mul_ps(P, P);
XMVECTOR vTemp = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(2, 1, 2, 1));
vLengthSq = _mm_add_ss(vLengthSq, vTemp);
vTemp = XM_PERMUTE_PS(vTemp, _MM_SHUFFLE(1, 1, 1, 1));
vLengthSq = _mm_add_ss(vLengthSq, vTemp);
vLengthSq = XM_PERMUTE_PS(vLengthSq, _MM_SHUFFLE(0, 0, 0, 0));
// Prepare for the division
XMVECTOR vResult = _mm_sqrt_ps(vLengthSq);
// Failsafe on zero (Or epsilon) length planes
// If the length is infinity, set the elements to zero
vLengthSq = _mm_cmpneq_ps(vLengthSq, g_XMInfinity);
// Reciprocal mul to perform the normalization
vResult = _mm_div_ps(P, vResult);
// Any that are infinity, set to zero
vResult = _mm_and_ps(vResult, vLengthSq);
return vResult;
#endif
}
//------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMPlaneIntersectLine
(
FXMVECTOR P,
FXMVECTOR LinePoint1,
FXMVECTOR LinePoint2
) noexcept
{
XMVECTOR V1 = XMVector3Dot(P, LinePoint1);
XMVECTOR V2 = XMVector3Dot(P, LinePoint2);
XMVECTOR D = XMVectorSubtract(V1, V2);
XMVECTOR VT = XMPlaneDotCoord(P, LinePoint1);
VT = XMVectorDivide(VT, D);
XMVECTOR Point = XMVectorSubtract(LinePoint2, LinePoint1);
Point = XMVectorMultiplyAdd(Point, VT, LinePoint1);
const XMVECTOR Zero = XMVectorZero();
XMVECTOR Control = XMVectorNearEqual(D, Zero, g_XMEpsilon.v);
return XMVectorSelect(Point, g_XMQNaN.v, Control);
}
//------------------------------------------------------------------------------
_Use_decl_annotations_
inline void XM_CALLCONV XMPlaneIntersectPlane
(
XMVECTOR* pLinePoint1,
XMVECTOR* pLinePoint2,
FXMVECTOR P1,
FXMVECTOR P2
) noexcept
{
assert(pLinePoint1);
assert(pLinePoint2);
XMVECTOR V1 = XMVector3Cross(P2, P1);
XMVECTOR LengthSq = XMVector3LengthSq(V1);
XMVECTOR V2 = XMVector3Cross(P2, V1);
XMVECTOR P1W = XMVectorSplatW(P1);
XMVECTOR Point = XMVectorMultiply(V2, P1W);
XMVECTOR V3 = XMVector3Cross(V1, P1);
XMVECTOR P2W = XMVectorSplatW(P2);
Point = XMVectorMultiplyAdd(V3, P2W, Point);
XMVECTOR LinePoint1 = XMVectorDivide(Point, LengthSq);
XMVECTOR LinePoint2 = XMVectorAdd(LinePoint1, V1);
XMVECTOR Control = XMVectorLessOrEqual(LengthSq, g_XMEpsilon.v);
*pLinePoint1 = XMVectorSelect(LinePoint1, g_XMQNaN.v, Control);
*pLinePoint2 = XMVectorSelect(LinePoint2, g_XMQNaN.v, Control);
}
//------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMPlaneTransform
(
FXMVECTOR P,
FXMMATRIX M
) noexcept
{
XMVECTOR W = XMVectorSplatW(P);
XMVECTOR Z = XMVectorSplatZ(P);
XMVECTOR Y = XMVectorSplatY(P);
XMVECTOR X = XMVectorSplatX(P);
XMVECTOR Result = XMVectorMultiply(W, M.r[3]);
Result = XMVectorMultiplyAdd(Z, M.r[2], Result);
Result = XMVectorMultiplyAdd(Y, M.r[1], Result);
Result = XMVectorMultiplyAdd(X, M.r[0], Result);
return Result;
}
//------------------------------------------------------------------------------
_Use_decl_annotations_
inline XMFLOAT4* XM_CALLCONV XMPlaneTransformStream
(
XMFLOAT4* pOutputStream,
size_t OutputStride,
const XMFLOAT4* pInputStream,
size_t InputStride,
size_t PlaneCount,
FXMMATRIX M
) noexcept
{
return XMVector4TransformStream(pOutputStream,
OutputStride,
pInputStream,
InputStride,
PlaneCount,
M);
}
//------------------------------------------------------------------------------
// Conversion operations
//------------------------------------------------------------------------------
//------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMPlaneFromPointNormal
(
FXMVECTOR Point,
FXMVECTOR Normal
) noexcept
{
XMVECTOR W = XMVector3Dot(Point, Normal);
W = XMVectorNegate(W);
return XMVectorSelect(W, Normal, g_XMSelect1110.v);
}
//------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMPlaneFromPoints
(
FXMVECTOR Point1,
FXMVECTOR Point2,
FXMVECTOR Point3
) noexcept
{
XMVECTOR V21 = XMVectorSubtract(Point1, Point2);
XMVECTOR V31 = XMVectorSubtract(Point1, Point3);
XMVECTOR N = XMVector3Cross(V21, V31);
N = XMVector3Normalize(N);
XMVECTOR D = XMPlaneDotNormal(N, Point1);
D = XMVectorNegate(D);
XMVECTOR Result = XMVectorSelect(D, N, g_XMSelect1110.v);
return Result;
}
/****************************************************************************
*
* Color
*
****************************************************************************/
//------------------------------------------------------------------------------
// Comparison operations
//------------------------------------------------------------------------------
//------------------------------------------------------------------------------
inline bool XM_CALLCONV XMColorEqual
(
FXMVECTOR C1,
FXMVECTOR C2
) noexcept
{
return XMVector4Equal(C1, C2);
}
//------------------------------------------------------------------------------
inline bool XM_CALLCONV XMColorNotEqual
(
FXMVECTOR C1,
FXMVECTOR C2
) noexcept
{
return XMVector4NotEqual(C1, C2);
}
//------------------------------------------------------------------------------
inline bool XM_CALLCONV XMColorGreater
(
FXMVECTOR C1,
FXMVECTOR C2
) noexcept
{
return XMVector4Greater(C1, C2);
}
//------------------------------------------------------------------------------
inline bool XM_CALLCONV XMColorGreaterOrEqual
(
FXMVECTOR C1,
FXMVECTOR C2
) noexcept
{
return XMVector4GreaterOrEqual(C1, C2);
}
//------------------------------------------------------------------------------
inline bool XM_CALLCONV XMColorLess
(
FXMVECTOR C1,
FXMVECTOR C2
) noexcept
{
return XMVector4Less(C1, C2);
}
//------------------------------------------------------------------------------
inline bool XM_CALLCONV XMColorLessOrEqual
(
FXMVECTOR C1,
FXMVECTOR C2
) noexcept
{
return XMVector4LessOrEqual(C1, C2);
}
//------------------------------------------------------------------------------
inline bool XM_CALLCONV XMColorIsNaN(FXMVECTOR C) noexcept
{
return XMVector4IsNaN(C);
}
//------------------------------------------------------------------------------
inline bool XM_CALLCONV XMColorIsInfinite(FXMVECTOR C) noexcept
{
return XMVector4IsInfinite(C);
}
//------------------------------------------------------------------------------
// Computation operations
//------------------------------------------------------------------------------
//------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMColorNegative(FXMVECTOR vColor) noexcept
{
#if defined(_XM_NO_INTRINSICS_)
XMVECTORF32 vResult = { { {
1.0f - vColor.vector4_f32[0],
1.0f - vColor.vector4_f32[1],
1.0f - vColor.vector4_f32[2],
vColor.vector4_f32[3]
} } };
return vResult.v;
#elif defined(_XM_ARM_NEON_INTRINSICS_)
XMVECTOR vTemp = veorq_u32(vColor, g_XMNegate3);
return vaddq_f32(vTemp, g_XMOne3);
#elif defined(_XM_SSE_INTRINSICS_)
// Negate only x,y and z.
XMVECTOR vTemp = _mm_xor_ps(vColor, g_XMNegate3);
// Add 1,1,1,0 to -x,-y,-z,w
return _mm_add_ps(vTemp, g_XMOne3);
#endif
}
//------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMColorModulate
(
FXMVECTOR C1,
FXMVECTOR C2
) noexcept
{
return XMVectorMultiply(C1, C2);
}
//------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMColorAdjustSaturation
(
FXMVECTOR vColor,
float fSaturation
) noexcept
{
// Luminance = 0.2125f * C[0] + 0.7154f * C[1] + 0.0721f * C[2];
// Result = (C - Luminance) * Saturation + Luminance;
const XMVECTORF32 gvLuminance = { { { 0.2125f, 0.7154f, 0.0721f, 0.0f } } };
#if defined(_XM_NO_INTRINSICS_)
float fLuminance = (vColor.vector4_f32[0] * gvLuminance.f[0]) + (vColor.vector4_f32[1] * gvLuminance.f[1]) + (vColor.vector4_f32[2] * gvLuminance.f[2]);
XMVECTOR vResult;
vResult.vector4_f32[0] = ((vColor.vector4_f32[0] - fLuminance) * fSaturation) + fLuminance;
vResult.vector4_f32[1] = ((vColor.vector4_f32[1] - fLuminance) * fSaturation) + fLuminance;
vResult.vector4_f32[2] = ((vColor.vector4_f32[2] - fLuminance) * fSaturation) + fLuminance;
vResult.vector4_f32[3] = vColor.vector4_f32[3];
return vResult;
#elif defined(_XM_ARM_NEON_INTRINSICS_)
XMVECTOR vLuminance = XMVector3Dot(vColor, gvLuminance);
XMVECTOR vResult = vsubq_f32(vColor, vLuminance);
vResult = vmlaq_n_f32(vLuminance, vResult, fSaturation);
return vbslq_f32(g_XMSelect1110, vResult, vColor);
#elif defined(_XM_SSE_INTRINSICS_)
XMVECTOR vLuminance = XMVector3Dot(vColor, gvLuminance);
// Splat fSaturation
XMVECTOR vSaturation = _mm_set_ps1(fSaturation);
// vResult = ((vColor-vLuminance)*vSaturation)+vLuminance;
XMVECTOR vResult = _mm_sub_ps(vColor, vLuminance);
vResult = XM_FMADD_PS(vResult, vSaturation, vLuminance);
// Retain w from the source color
vLuminance = _mm_shuffle_ps(vResult, vColor, _MM_SHUFFLE(3, 2, 2, 2)); // x = vResult.z,y = vResult.z,z = vColor.z,w=vColor.w
vResult = _mm_shuffle_ps(vResult, vLuminance, _MM_SHUFFLE(3, 0, 1, 0)); // x = vResult.x,y = vResult.y,z = vResult.z,w=vColor.w
return vResult;
#endif
}
//------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMColorAdjustContrast
(
FXMVECTOR vColor,
float fContrast
) noexcept
{
// Result = (vColor - 0.5f) * fContrast + 0.5f;
#if defined(_XM_NO_INTRINSICS_)
XMVECTORF32 vResult = { { {
((vColor.vector4_f32[0] - 0.5f) * fContrast) + 0.5f,
((vColor.vector4_f32[1] - 0.5f) * fContrast) + 0.5f,
((vColor.vector4_f32[2] - 0.5f) * fContrast) + 0.5f,
vColor.vector4_f32[3] // Leave W untouched
} } };
return vResult.v;
#elif defined(_XM_ARM_NEON_INTRINSICS_)
XMVECTOR vResult = vsubq_f32(vColor, g_XMOneHalf.v);
vResult = vmlaq_n_f32(g_XMOneHalf.v, vResult, fContrast);
return vbslq_f32(g_XMSelect1110, vResult, vColor);
#elif defined(_XM_SSE_INTRINSICS_)
XMVECTOR vScale = _mm_set_ps1(fContrast); // Splat the scale
XMVECTOR vResult = _mm_sub_ps(vColor, g_XMOneHalf); // Subtract 0.5f from the source (Saving source)
vResult = XM_FMADD_PS(vResult, vScale, g_XMOneHalf);
// Retain w from the source color
vScale = _mm_shuffle_ps(vResult, vColor, _MM_SHUFFLE(3, 2, 2, 2)); // x = vResult.z,y = vResult.z,z = vColor.z,w=vColor.w
vResult = _mm_shuffle_ps(vResult, vScale, _MM_SHUFFLE(3, 0, 1, 0)); // x = vResult.x,y = vResult.y,z = vResult.z,w=vColor.w
return vResult;
#endif
}
//------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMColorRGBToHSL(FXMVECTOR rgb) noexcept
{
XMVECTOR r = XMVectorSplatX(rgb);
XMVECTOR g = XMVectorSplatY(rgb);
XMVECTOR b = XMVectorSplatZ(rgb);
XMVECTOR min = XMVectorMin(r, XMVectorMin(g, b));
XMVECTOR max = XMVectorMax(r, XMVectorMax(g, b));
XMVECTOR l = XMVectorMultiply(XMVectorAdd(min, max), g_XMOneHalf);
XMVECTOR d = XMVectorSubtract(max, min);
XMVECTOR la = XMVectorSelect(rgb, l, g_XMSelect1110);
if (XMVector3Less(d, g_XMEpsilon))
{
// Achromatic, assume H and S of 0
return XMVectorSelect(la, g_XMZero, g_XMSelect1100);
}
else
{
XMVECTOR s, h;
XMVECTOR d2 = XMVectorAdd(min, max);
if (XMVector3Greater(l, g_XMOneHalf))
{
// d / (2-max-min)
s = XMVectorDivide(d, XMVectorSubtract(g_XMTwo, d2));
}
else
{
// d / (max+min)
s = XMVectorDivide(d, d2);
}
if (XMVector3Equal(r, max))
{
// Red is max
h = XMVectorDivide(XMVectorSubtract(g, b), d);
}
else if (XMVector3Equal(g, max))
{
// Green is max
h = XMVectorDivide(XMVectorSubtract(b, r), d);
h = XMVectorAdd(h, g_XMTwo);
}
else
{
// Blue is max
h = XMVectorDivide(XMVectorSubtract(r, g), d);
h = XMVectorAdd(h, g_XMFour);
}
h = XMVectorDivide(h, g_XMSix);
if (XMVector3Less(h, g_XMZero))
h = XMVectorAdd(h, g_XMOne);
XMVECTOR lha = XMVectorSelect(la, h, g_XMSelect1100);
return XMVectorSelect(s, lha, g_XMSelect1011);
}
}
//------------------------------------------------------------------------------
namespace Internal
{
inline XMVECTOR XM_CALLCONV XMColorHue2Clr(FXMVECTOR p, FXMVECTOR q, FXMVECTOR h) noexcept
{
static const XMVECTORF32 oneSixth = { { { 1.0f / 6.0f, 1.0f / 6.0f, 1.0f / 6.0f, 1.0f / 6.0f } } };
static const XMVECTORF32 twoThirds = { { { 2.0f / 3.0f, 2.0f / 3.0f, 2.0f / 3.0f, 2.0f / 3.0f } } };
XMVECTOR t = h;
if (XMVector3Less(t, g_XMZero))
t = XMVectorAdd(t, g_XMOne);
if (XMVector3Greater(t, g_XMOne))
t = XMVectorSubtract(t, g_XMOne);
if (XMVector3Less(t, oneSixth))
{
// p + (q - p) * 6 * t
XMVECTOR t1 = XMVectorSubtract(q, p);
XMVECTOR t2 = XMVectorMultiply(g_XMSix, t);
return XMVectorMultiplyAdd(t1, t2, p);
}
if (XMVector3Less(t, g_XMOneHalf))
return q;
if (XMVector3Less(t, twoThirds))
{
// p + (q - p) * 6 * (2/3 - t)
XMVECTOR t1 = XMVectorSubtract(q, p);
XMVECTOR t2 = XMVectorMultiply(g_XMSix, XMVectorSubtract(twoThirds, t));
return XMVectorMultiplyAdd(t1, t2, p);
}
return p;
}
} // namespace Internal
inline XMVECTOR XM_CALLCONV XMColorHSLToRGB(FXMVECTOR hsl) noexcept
{
static const XMVECTORF32 oneThird = { { { 1.0f / 3.0f, 1.0f / 3.0f, 1.0f / 3.0f, 1.0f / 3.0f } } };
XMVECTOR s = XMVectorSplatY(hsl);
XMVECTOR l = XMVectorSplatZ(hsl);
if (XMVector3NearEqual(s, g_XMZero, g_XMEpsilon))
{
// Achromatic
return XMVectorSelect(hsl, l, g_XMSelect1110);
}
else
{
XMVECTOR h = XMVectorSplatX(hsl);
XMVECTOR q;
if (XMVector3Less(l, g_XMOneHalf))
{
q = XMVectorMultiply(l, XMVectorAdd(g_XMOne, s));
}
else
{
q = XMVectorSubtract(XMVectorAdd(l, s), XMVectorMultiply(l, s));
}
XMVECTOR p = XMVectorSubtract(XMVectorMultiply(g_XMTwo, l), q);
XMVECTOR r = DirectX::Internal::XMColorHue2Clr(p, q, XMVectorAdd(h, oneThird));
XMVECTOR g = DirectX::Internal::XMColorHue2Clr(p, q, h);
XMVECTOR b = DirectX::Internal::XMColorHue2Clr(p, q, XMVectorSubtract(h, oneThird));
XMVECTOR rg = XMVectorSelect(g, r, g_XMSelect1000);
XMVECTOR ba = XMVectorSelect(hsl, b, g_XMSelect1110);
return XMVectorSelect(ba, rg, g_XMSelect1100);
}
}
//------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMColorRGBToHSV(FXMVECTOR rgb) noexcept
{
XMVECTOR r = XMVectorSplatX(rgb);
XMVECTOR g = XMVectorSplatY(rgb);
XMVECTOR b = XMVectorSplatZ(rgb);
XMVECTOR min = XMVectorMin(r, XMVectorMin(g, b));
XMVECTOR v = XMVectorMax(r, XMVectorMax(g, b));
XMVECTOR d = XMVectorSubtract(v, min);
XMVECTOR s = (XMVector3NearEqual(v, g_XMZero, g_XMEpsilon)) ? g_XMZero : XMVectorDivide(d, v);
if (XMVector3Less(d, g_XMEpsilon))
{
// Achromatic, assume H of 0
XMVECTOR hv = XMVectorSelect(v, g_XMZero, g_XMSelect1000);
XMVECTOR hva = XMVectorSelect(rgb, hv, g_XMSelect1110);
return XMVectorSelect(s, hva, g_XMSelect1011);
}
else
{
XMVECTOR h;
if (XMVector3Equal(r, v))
{
// Red is max
h = XMVectorDivide(XMVectorSubtract(g, b), d);
if (XMVector3Less(g, b))
h = XMVectorAdd(h, g_XMSix);
}
else if (XMVector3Equal(g, v))
{
// Green is max
h = XMVectorDivide(XMVectorSubtract(b, r), d);
h = XMVectorAdd(h, g_XMTwo);
}
else
{
// Blue is max
h = XMVectorDivide(XMVectorSubtract(r, g), d);
h = XMVectorAdd(h, g_XMFour);
}
h = XMVectorDivide(h, g_XMSix);
XMVECTOR hv = XMVectorSelect(v, h, g_XMSelect1000);
XMVECTOR hva = XMVectorSelect(rgb, hv, g_XMSelect1110);
return XMVectorSelect(s, hva, g_XMSelect1011);
}
}
//------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMColorHSVToRGB(FXMVECTOR hsv) noexcept
{
XMVECTOR h = XMVectorSplatX(hsv);
XMVECTOR s = XMVectorSplatY(hsv);
XMVECTOR v = XMVectorSplatZ(hsv);
XMVECTOR h6 = XMVectorMultiply(h, g_XMSix);
XMVECTOR i = XMVectorFloor(h6);
XMVECTOR f = XMVectorSubtract(h6, i);
// p = v* (1-s)
XMVECTOR p = XMVectorMultiply(v, XMVectorSubtract(g_XMOne, s));
// q = v*(1-f*s)
XMVECTOR q = XMVectorMultiply(v, XMVectorSubtract(g_XMOne, XMVectorMultiply(f, s)));
// t = v*(1 - (1-f)*s)
XMVECTOR t = XMVectorMultiply(v, XMVectorSubtract(g_XMOne, XMVectorMultiply(XMVectorSubtract(g_XMOne, f), s)));
auto ii = static_cast<int>(XMVectorGetX(XMVectorMod(i, g_XMSix)));
XMVECTOR _rgb;
switch (ii)
{
case 0: // rgb = vtp
{
XMVECTOR vt = XMVectorSelect(t, v, g_XMSelect1000);
_rgb = XMVectorSelect(p, vt, g_XMSelect1100);
}
break;
case 1: // rgb = qvp
{
XMVECTOR qv = XMVectorSelect(v, q, g_XMSelect1000);
_rgb = XMVectorSelect(p, qv, g_XMSelect1100);
}
break;
case 2: // rgb = pvt
{
XMVECTOR pv = XMVectorSelect(v, p, g_XMSelect1000);
_rgb = XMVectorSelect(t, pv, g_XMSelect1100);
}
break;
case 3: // rgb = pqv
{
XMVECTOR pq = XMVectorSelect(q, p, g_XMSelect1000);
_rgb = XMVectorSelect(v, pq, g_XMSelect1100);
}
break;
case 4: // rgb = tpv
{
XMVECTOR tp = XMVectorSelect(p, t, g_XMSelect1000);
_rgb = XMVectorSelect(v, tp, g_XMSelect1100);
}
break;
default: // rgb = vpq
{
XMVECTOR vp = XMVectorSelect(p, v, g_XMSelect1000);
_rgb = XMVectorSelect(q, vp, g_XMSelect1100);
}
break;
}
return XMVectorSelect(hsv, _rgb, g_XMSelect1110);
}
//------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMColorRGBToYUV(FXMVECTOR rgb) noexcept
{
static const XMVECTORF32 Scale0 = { { { 0.299f, -0.147f, 0.615f, 0.0f } } };
static const XMVECTORF32 Scale1 = { { { 0.587f, -0.289f, -0.515f, 0.0f } } };
static const XMVECTORF32 Scale2 = { { { 0.114f, 0.436f, -0.100f, 0.0f } } };
XMMATRIX M(Scale0, Scale1, Scale2, g_XMZero);
XMVECTOR clr = XMVector3Transform(rgb, M);
return XMVectorSelect(rgb, clr, g_XMSelect1110);
}
//------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMColorYUVToRGB(FXMVECTOR yuv) noexcept
{
static const XMVECTORF32 Scale1 = { { { 0.0f, -0.395f, 2.032f, 0.0f } } };
static const XMVECTORF32 Scale2 = { { { 1.140f, -0.581f, 0.0f, 0.0f } } };
XMMATRIX M(g_XMOne, Scale1, Scale2, g_XMZero);
XMVECTOR clr = XMVector3Transform(yuv, M);
return XMVectorSelect(yuv, clr, g_XMSelect1110);
}
//------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMColorRGBToYUV_HD(FXMVECTOR rgb) noexcept
{
static const XMVECTORF32 Scale0 = { { { 0.2126f, -0.0997f, 0.6150f, 0.0f } } };
static const XMVECTORF32 Scale1 = { { { 0.7152f, -0.3354f, -0.5586f, 0.0f } } };
static const XMVECTORF32 Scale2 = { { { 0.0722f, 0.4351f, -0.0564f, 0.0f } } };
XMMATRIX M(Scale0, Scale1, Scale2, g_XMZero);
XMVECTOR clr = XMVector3Transform(rgb, M);
return XMVectorSelect(rgb, clr, g_XMSelect1110);
}
//------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMColorYUVToRGB_HD(FXMVECTOR yuv) noexcept
{
static const XMVECTORF32 Scale1 = { { { 0.0f, -0.2153f, 2.1324f, 0.0f } } };
static const XMVECTORF32 Scale2 = { { { 1.2803f, -0.3806f, 0.0f, 0.0f } } };
XMMATRIX M(g_XMOne, Scale1, Scale2, g_XMZero);
XMVECTOR clr = XMVector3Transform(yuv, M);
return XMVectorSelect(yuv, clr, g_XMSelect1110);
}
//------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMColorRGBToXYZ(FXMVECTOR rgb) noexcept
{
static const XMVECTORF32 Scale0 = { { { 0.4887180f, 0.1762044f, 0.0000000f, 0.0f } } };
static const XMVECTORF32 Scale1 = { { { 0.3106803f, 0.8129847f, 0.0102048f, 0.0f } } };
static const XMVECTORF32 Scale2 = { { { 0.2006017f, 0.0108109f, 0.9897952f, 0.0f } } };
static const XMVECTORF32 Scale = { { { 1.f / 0.17697f, 1.f / 0.17697f, 1.f / 0.17697f, 0.0f } } };
XMMATRIX M(Scale0, Scale1, Scale2, g_XMZero);
XMVECTOR clr = XMVectorMultiply(XMVector3Transform(rgb, M), Scale);
return XMVectorSelect(rgb, clr, g_XMSelect1110);
}
inline XMVECTOR XM_CALLCONV XMColorXYZToRGB(FXMVECTOR xyz) noexcept
{
static const XMVECTORF32 Scale0 = { { { 2.3706743f, -0.5138850f, 0.0052982f, 0.0f } } };
static const XMVECTORF32 Scale1 = { { { -0.9000405f, 1.4253036f, -0.0146949f, 0.0f } } };
static const XMVECTORF32 Scale2 = { { { -0.4706338f, 0.0885814f, 1.0093968f, 0.0f } } };
static const XMVECTORF32 Scale = { { { 0.17697f, 0.17697f, 0.17697f, 0.0f } } };
XMMATRIX M(Scale0, Scale1, Scale2, g_XMZero);
XMVECTOR clr = XMVector3Transform(XMVectorMultiply(xyz, Scale), M);
return XMVectorSelect(xyz, clr, g_XMSelect1110);
}
//------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMColorXYZToSRGB(FXMVECTOR xyz) noexcept
{
static const XMVECTORF32 Scale0 = { { { 3.2406f, -0.9689f, 0.0557f, 0.0f } } };
static const XMVECTORF32 Scale1 = { { { -1.5372f, 1.8758f, -0.2040f, 0.0f } } };
static const XMVECTORF32 Scale2 = { { { -0.4986f, 0.0415f, 1.0570f, 0.0f } } };
static const XMVECTORF32 Cutoff = { { { 0.0031308f, 0.0031308f, 0.0031308f, 0.0f } } };
static const XMVECTORF32 Exp = { { { 1.0f / 2.4f, 1.0f / 2.4f, 1.0f / 2.4f, 1.0f } } };
XMMATRIX M(Scale0, Scale1, Scale2, g_XMZero);
XMVECTOR lclr = XMVector3Transform(xyz, M);
XMVECTOR sel = XMVectorGreater(lclr, Cutoff);
// clr = 12.92 * lclr for lclr <= 0.0031308f
XMVECTOR smallC = XMVectorMultiply(lclr, g_XMsrgbScale);
// clr = (1+a)*pow(lclr, 1/2.4) - a for lclr > 0.0031308 (where a = 0.055)
XMVECTOR largeC = XMVectorSubtract(XMVectorMultiply(g_XMsrgbA1, XMVectorPow(lclr, Exp)), g_XMsrgbA);
XMVECTOR clr = XMVectorSelect(smallC, largeC, sel);
return XMVectorSelect(xyz, clr, g_XMSelect1110);
}
//------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMColorSRGBToXYZ(FXMVECTOR srgb) noexcept
{
static const XMVECTORF32 Scale0 = { { { 0.4124f, 0.2126f, 0.0193f, 0.0f } } };
static const XMVECTORF32 Scale1 = { { { 0.3576f, 0.7152f, 0.1192f, 0.0f } } };
static const XMVECTORF32 Scale2 = { { { 0.1805f, 0.0722f, 0.9505f, 0.0f } } };
static const XMVECTORF32 Cutoff = { { { 0.04045f, 0.04045f, 0.04045f, 0.0f } } };
static const XMVECTORF32 Exp = { { { 2.4f, 2.4f, 2.4f, 1.0f } } };
XMVECTOR sel = XMVectorGreater(srgb, Cutoff);
// lclr = clr / 12.92
XMVECTOR smallC = XMVectorDivide(srgb, g_XMsrgbScale);
// lclr = pow( (clr + a) / (1+a), 2.4 )
XMVECTOR largeC = XMVectorPow(XMVectorDivide(XMVectorAdd(srgb, g_XMsrgbA), g_XMsrgbA1), Exp);
XMVECTOR lclr = XMVectorSelect(smallC, largeC, sel);
XMMATRIX M(Scale0, Scale1, Scale2, g_XMZero);
XMVECTOR clr = XMVector3Transform(lclr, M);
return XMVectorSelect(srgb, clr, g_XMSelect1110);
}
//------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMColorRGBToSRGB(FXMVECTOR rgb) noexcept
{
static const XMVECTORF32 Cutoff = { { { 0.0031308f, 0.0031308f, 0.0031308f, 1.f } } };
static const XMVECTORF32 Linear = { { { 12.92f, 12.92f, 12.92f, 1.f } } };
static const XMVECTORF32 Scale = { { { 1.055f, 1.055f, 1.055f, 1.f } } };
static const XMVECTORF32 Bias = { { { 0.055f, 0.055f, 0.055f, 0.f } } };
static const XMVECTORF32 InvGamma = { { { 1.0f / 2.4f, 1.0f / 2.4f, 1.0f / 2.4f, 1.f } } };
XMVECTOR V = XMVectorSaturate(rgb);
XMVECTOR V0 = XMVectorMultiply(V, Linear);
XMVECTOR V1 = XMVectorSubtract(XMVectorMultiply(Scale, XMVectorPow(V, InvGamma)), Bias);
XMVECTOR select = XMVectorLess(V, Cutoff);
V = XMVectorSelect(V1, V0, select);
return XMVectorSelect(rgb, V, g_XMSelect1110);
}
//------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMColorSRGBToRGB(FXMVECTOR srgb) noexcept
{
static const XMVECTORF32 Cutoff = { { { 0.04045f, 0.04045f, 0.04045f, 1.f } } };
static const XMVECTORF32 ILinear = { { { 1.f / 12.92f, 1.f / 12.92f, 1.f / 12.92f, 1.f } } };
static const XMVECTORF32 Scale = { { { 1.f / 1.055f, 1.f / 1.055f, 1.f / 1.055f, 1.f } } };
static const XMVECTORF32 Bias = { { { 0.055f, 0.055f, 0.055f, 0.f } } };
static const XMVECTORF32 Gamma = { { { 2.4f, 2.4f, 2.4f, 1.f } } };
XMVECTOR V = XMVectorSaturate(srgb);
XMVECTOR V0 = XMVectorMultiply(V, ILinear);
XMVECTOR V1 = XMVectorPow(XMVectorMultiply(XMVectorAdd(V, Bias), Scale), Gamma);
XMVECTOR select = XMVectorGreater(V, Cutoff);
V = XMVectorSelect(V0, V1, select);
return XMVectorSelect(srgb, V, g_XMSelect1110);
}
/****************************************************************************
*
* Miscellaneous
*
****************************************************************************/
//------------------------------------------------------------------------------
inline bool XMVerifyCPUSupport() noexcept
{
#if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
int CPUInfo[4] = { -1 };
#if defined(__clang__) || defined(__GNUC__)
__cpuid(0, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]);
#else
__cpuid(CPUInfo, 0);
#endif
#ifdef __AVX2__
if (CPUInfo[0] < 7)
return false;
#else
if (CPUInfo[0] < 1)
return false;
#endif
#if defined(__clang__) || defined(__GNUC__)
__cpuid(1, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]);
#else
__cpuid(CPUInfo, 1);
#endif
#if defined(__AVX2__) || defined(_XM_AVX2_INTRINSICS_)
// The compiler can emit FMA3 instructions even without explicit intrinsics use
if ((CPUInfo[2] & 0x38081001) != 0x38081001)
return false; // No F16C/AVX/OSXSAVE/SSE4.1/FMA3/SSE3 support
#elif defined(_XM_FMA3_INTRINSICS_) && defined(_XM_F16C_INTRINSICS_)
if ((CPUInfo[2] & 0x38081001) != 0x38081001)
return false; // No F16C/AVX/OSXSAVE/SSE4.1/FMA3/SSE3 support
#elif defined(_XM_FMA3_INTRINSICS_)
if ((CPUInfo[2] & 0x18081001) != 0x18081001)
return false; // No AVX/OSXSAVE/SSE4.1/FMA3/SSE3 support
#elif defined(_XM_F16C_INTRINSICS_)
if ((CPUInfo[2] & 0x38080001) != 0x38080001)
return false; // No F16C/AVX/OSXSAVE/SSE4.1/SSE3 support
#elif defined(__AVX__) || defined(_XM_AVX_INTRINSICS_)
if ((CPUInfo[2] & 0x18080001) != 0x18080001)
return false; // No AVX/OSXSAVE/SSE4.1/SSE3 support
#elif defined(_XM_SSE4_INTRINSICS_)
if ((CPUInfo[2] & 0x80001) != 0x80001)
return false; // No SSE3/SSE4.1 support
#elif defined(_XM_SSE3_INTRINSICS_)
if (!(CPUInfo[2] & 0x1))
return false; // No SSE3 support
#endif
// The x64 processor model requires SSE2 support, but no harm in checking
if ((CPUInfo[3] & 0x6000000) != 0x6000000)
return false; // No SSE2/SSE support
#if defined(__AVX2__) || defined(_XM_AVX2_INTRINSICS_)
#if defined(__clang__) || defined(__GNUC__)
__cpuid_count(7, 0, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]);
#else
__cpuidex(CPUInfo, 7, 0);
#endif
if (!(CPUInfo[1] & 0x20))
return false; // No AVX2 support
#endif
return true;
#elif defined(_XM_ARM_NEON_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
// ARM-NEON support is required for the Windows on ARM platform
return true;
#else
// No intrinsics path always supported
return true;
#endif
}
//------------------------------------------------------------------------------
inline XMVECTOR XM_CALLCONV XMFresnelTerm
(
FXMVECTOR CosIncidentAngle,
FXMVECTOR RefractionIndex
) noexcept
{
assert(!XMVector4IsInfinite(CosIncidentAngle));
// Result = 0.5f * (g - c)^2 / (g + c)^2 * ((c * (g + c) - 1)^2 / (c * (g - c) + 1)^2 + 1) where
// c = CosIncidentAngle
// g = sqrt(c^2 + RefractionIndex^2 - 1)
#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
XMVECTOR G = XMVectorMultiplyAdd(RefractionIndex, RefractionIndex, g_XMNegativeOne.v);
G = XMVectorMultiplyAdd(CosIncidentAngle, CosIncidentAngle, G);
G = XMVectorAbs(G);
G = XMVectorSqrt(G);
XMVECTOR S = XMVectorAdd(G, CosIncidentAngle);
XMVECTOR D = XMVectorSubtract(G, CosIncidentAngle);
XMVECTOR V0 = XMVectorMultiply(D, D);
XMVECTOR V1 = XMVectorMultiply(S, S);
V1 = XMVectorReciprocal(V1);
V0 = XMVectorMultiply(g_XMOneHalf.v, V0);
V0 = XMVectorMultiply(V0, V1);
XMVECTOR V2 = XMVectorMultiplyAdd(CosIncidentAngle, S, g_XMNegativeOne.v);
XMVECTOR V3 = XMVectorMultiplyAdd(CosIncidentAngle, D, g_XMOne.v);
V2 = XMVectorMultiply(V2, V2);
V3 = XMVectorMultiply(V3, V3);
V3 = XMVectorReciprocal(V3);
V2 = XMVectorMultiplyAdd(V2, V3, g_XMOne.v);
XMVECTOR Result = XMVectorMultiply(V0, V2);
Result = XMVectorSaturate(Result);
return Result;
#elif defined(_XM_SSE_INTRINSICS_)
// G = sqrt(abs((RefractionIndex^2-1) + CosIncidentAngle^2))
XMVECTOR G = _mm_mul_ps(RefractionIndex, RefractionIndex);
XMVECTOR vTemp = _mm_mul_ps(CosIncidentAngle, CosIncidentAngle);
G = _mm_sub_ps(G, g_XMOne);
vTemp = _mm_add_ps(vTemp, G);
// max((0-vTemp),vTemp) == abs(vTemp)
// The abs is needed to deal with refraction and cosine being zero
G = _mm_setzero_ps();
G = _mm_sub_ps(G, vTemp);
G = _mm_max_ps(G, vTemp);
// Last operation, the sqrt()
G = _mm_sqrt_ps(G);
// Calc G-C and G+C
XMVECTOR GAddC = _mm_add_ps(G, CosIncidentAngle);
XMVECTOR GSubC = _mm_sub_ps(G, CosIncidentAngle);
// Perform the term (0.5f *(g - c)^2) / (g + c)^2
XMVECTOR vResult = _mm_mul_ps(GSubC, GSubC);
vTemp = _mm_mul_ps(GAddC, GAddC);
vResult = _mm_mul_ps(vResult, g_XMOneHalf);
vResult = _mm_div_ps(vResult, vTemp);
// Perform the term ((c * (g + c) - 1)^2 / (c * (g - c) + 1)^2 + 1)
GAddC = _mm_mul_ps(GAddC, CosIncidentAngle);
GSubC = _mm_mul_ps(GSubC, CosIncidentAngle);
GAddC = _mm_sub_ps(GAddC, g_XMOne);
GSubC = _mm_add_ps(GSubC, g_XMOne);
GAddC = _mm_mul_ps(GAddC, GAddC);
GSubC = _mm_mul_ps(GSubC, GSubC);
GAddC = _mm_div_ps(GAddC, GSubC);
GAddC = _mm_add_ps(GAddC, g_XMOne);
// Multiply the two term parts
vResult = _mm_mul_ps(vResult, GAddC);
// Clamp to 0.0 - 1.0f
vResult = _mm_max_ps(vResult, g_XMZero);
vResult = _mm_min_ps(vResult, g_XMOne);
return vResult;
#endif
}
//------------------------------------------------------------------------------
inline bool XMScalarNearEqual
(
float S1,
float S2,
float Epsilon
) noexcept
{
float Delta = S1 - S2;
return (fabsf(Delta) <= Epsilon);
}
//------------------------------------------------------------------------------
// Modulo the range of the given angle such that -XM_PI <= Angle < XM_PI
inline float XMScalarModAngle(float Angle) noexcept
{
// Note: The modulo is performed with unsigned math only to work
// around a precision error on numbers that are close to PI
// Normalize the range from 0.0f to XM_2PI
Angle = Angle + XM_PI;
// Perform the modulo, unsigned
float fTemp = fabsf(Angle);
fTemp = fTemp - (XM_2PI * static_cast<float>(static_cast<int32_t>(fTemp / XM_2PI)));
// Restore the number to the range of -XM_PI to XM_PI-epsilon
fTemp = fTemp - XM_PI;
// If the modulo'd value was negative, restore negation
if (Angle < 0.0f)
{
fTemp = -fTemp;
}
return fTemp;
}
//------------------------------------------------------------------------------
inline float XMScalarSin(float Value) noexcept
{
// Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder.
float quotient = XM_1DIV2PI * Value;
if (Value >= 0.0f)
{
quotient = static_cast<float>(static_cast<int>(quotient + 0.5f));
}
else
{
quotient = static_cast<float>(static_cast<int>(quotient - 0.5f));
}
float y = Value - XM_2PI * quotient;
// Map y to [-pi/2,pi/2] with sin(y) = sin(Value).
if (y > XM_PIDIV2)
{
y = XM_PI - y;
}
else if (y < -XM_PIDIV2)
{
y = -XM_PI - y;
}
// 11-degree minimax approximation
float y2 = y * y;
return (((((-2.3889859e-08f * y2 + 2.7525562e-06f) * y2 - 0.00019840874f) * y2 + 0.0083333310f) * y2 - 0.16666667f) * y2 + 1.0f) * y;
}
//------------------------------------------------------------------------------
inline float XMScalarSinEst(float Value) noexcept
{
// Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder.
float quotient = XM_1DIV2PI * Value;
if (Value >= 0.0f)
{
quotient = static_cast<float>(static_cast<int>(quotient + 0.5f));
}
else
{
quotient = static_cast<float>(static_cast<int>(quotient - 0.5f));
}
float y = Value - XM_2PI * quotient;
// Map y to [-pi/2,pi/2] with sin(y) = sin(Value).
if (y > XM_PIDIV2)
{
y = XM_PI - y;
}
else if (y < -XM_PIDIV2)
{
y = -XM_PI - y;
}
// 7-degree minimax approximation
float y2 = y * y;
return (((-0.00018524670f * y2 + 0.0083139502f) * y2 - 0.16665852f) * y2 + 1.0f) * y;
}
//------------------------------------------------------------------------------
inline float XMScalarCos(float Value) noexcept
{
// Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder.
float quotient = XM_1DIV2PI * Value;
if (Value >= 0.0f)
{
quotient = static_cast<float>(static_cast<int>(quotient + 0.5f));
}
else
{
quotient = static_cast<float>(static_cast<int>(quotient - 0.5f));
}
float y = Value - XM_2PI * quotient;
// Map y to [-pi/2,pi/2] with cos(y) = sign*cos(x).
float sign;
if (y > XM_PIDIV2)
{
y = XM_PI - y;
sign = -1.0f;
}
else if (y < -XM_PIDIV2)
{
y = -XM_PI - y;
sign = -1.0f;
}
else
{
sign = +1.0f;
}
// 10-degree minimax approximation
float y2 = y * y;
float p = ((((-2.6051615e-07f * y2 + 2.4760495e-05f) * y2 - 0.0013888378f) * y2 + 0.041666638f) * y2 - 0.5f) * y2 + 1.0f;
return sign * p;
}
//------------------------------------------------------------------------------
inline float XMScalarCosEst(float Value) noexcept
{
// Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder.
float quotient = XM_1DIV2PI * Value;
if (Value >= 0.0f)
{
quotient = static_cast<float>(static_cast<int>(quotient + 0.5f));
}
else
{
quotient = static_cast<float>(static_cast<int>(quotient - 0.5f));
}
float y = Value - XM_2PI * quotient;
// Map y to [-pi/2,pi/2] with cos(y) = sign*cos(x).
float sign;
if (y > XM_PIDIV2)
{
y = XM_PI - y;
sign = -1.0f;
}
else if (y < -XM_PIDIV2)
{
y = -XM_PI - y;
sign = -1.0f;
}
else
{
sign = +1.0f;
}
// 6-degree minimax approximation
float y2 = y * y;
float p = ((-0.0012712436f * y2 + 0.041493919f) * y2 - 0.49992746f) * y2 + 1.0f;
return sign * p;
}
//------------------------------------------------------------------------------
_Use_decl_annotations_
inline void XMScalarSinCos
(
float* pSin,
float* pCos,
float Value
) noexcept
{
assert(pSin);
assert(pCos);
// Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder.
float quotient = XM_1DIV2PI * Value;
if (Value >= 0.0f)
{
quotient = static_cast<float>(static_cast<int>(quotient + 0.5f));
}
else
{
quotient = static_cast<float>(static_cast<int>(quotient - 0.5f));
}
float y = Value - XM_2PI * quotient;
// Map y to [-pi/2,pi/2] with sin(y) = sin(Value).
float sign;
if (y > XM_PIDIV2)
{
y = XM_PI - y;
sign = -1.0f;
}
else if (y < -XM_PIDIV2)
{
y = -XM_PI - y;
sign = -1.0f;
}
else
{
sign = +1.0f;
}
float y2 = y * y;
// 11-degree minimax approximation
*pSin = (((((-2.3889859e-08f * y2 + 2.7525562e-06f) * y2 - 0.00019840874f) * y2 + 0.0083333310f) * y2 - 0.16666667f) * y2 + 1.0f) * y;
// 10-degree minimax approximation
float p = ((((-2.6051615e-07f * y2 + 2.4760495e-05f) * y2 - 0.0013888378f) * y2 + 0.041666638f) * y2 - 0.5f) * y2 + 1.0f;
*pCos = sign * p;
}
//------------------------------------------------------------------------------
_Use_decl_annotations_
inline void XMScalarSinCosEst
(
float* pSin,
float* pCos,
float Value
) noexcept
{
assert(pSin);
assert(pCos);
// Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder.
float quotient = XM_1DIV2PI * Value;
if (Value >= 0.0f)
{
quotient = static_cast<float>(static_cast<int>(quotient + 0.5f));
}
else
{
quotient = static_cast<float>(static_cast<int>(quotient - 0.5f));
}
float y = Value - XM_2PI * quotient;
// Map y to [-pi/2,pi/2] with sin(y) = sin(Value).
float sign;
if (y > XM_PIDIV2)
{
y = XM_PI - y;
sign = -1.0f;
}
else if (y < -XM_PIDIV2)
{
y = -XM_PI - y;
sign = -1.0f;
}
else
{
sign = +1.0f;
}
float y2 = y * y;
// 7-degree minimax approximation
*pSin = (((-0.00018524670f * y2 + 0.0083139502f) * y2 - 0.16665852f) * y2 + 1.0f) * y;
// 6-degree minimax approximation
float p = ((-0.0012712436f * y2 + 0.041493919f) * y2 - 0.49992746f) * y2 + 1.0f;
*pCos = sign * p;
}
//------------------------------------------------------------------------------
inline float XMScalarASin(float Value) noexcept
{
// Clamp input to [-1,1].
bool nonnegative = (Value >= 0.0f);
float x = fabsf(Value);
float omx = 1.0f - x;
if (omx < 0.0f)
{
omx = 0.0f;
}
float root = sqrtf(omx);
// 7-degree minimax approximation
float result = ((((((-0.0012624911f * x + 0.0066700901f) * x - 0.0170881256f) * x + 0.0308918810f) * x - 0.0501743046f) * x + 0.0889789874f) * x - 0.2145988016f) * x + 1.5707963050f;
result *= root; // acos(|x|)
// acos(x) = pi - acos(-x) when x < 0, asin(x) = pi/2 - acos(x)
return (nonnegative ? XM_PIDIV2 - result : result - XM_PIDIV2);
}
//------------------------------------------------------------------------------
inline float XMScalarASinEst(float Value) noexcept
{
// Clamp input to [-1,1].
bool nonnegative = (Value >= 0.0f);
float x = fabsf(Value);
float omx = 1.0f - x;
if (omx < 0.0f)
{
omx = 0.0f;
}
float root = sqrtf(omx);
// 3-degree minimax approximation
float result = ((-0.0187293f * x + 0.0742610f) * x - 0.2121144f) * x + 1.5707288f;
result *= root; // acos(|x|)
// acos(x) = pi - acos(-x) when x < 0, asin(x) = pi/2 - acos(x)
return (nonnegative ? XM_PIDIV2 - result : result - XM_PIDIV2);
}
//------------------------------------------------------------------------------
inline float XMScalarACos(float Value) noexcept
{
// Clamp input to [-1,1].
bool nonnegative = (Value >= 0.0f);
float x = fabsf(Value);
float omx = 1.0f - x;
if (omx < 0.0f)
{
omx = 0.0f;
}
float root = sqrtf(omx);
// 7-degree minimax approximation
float result = ((((((-0.0012624911f * x + 0.0066700901f) * x - 0.0170881256f) * x + 0.0308918810f) * x - 0.0501743046f) * x + 0.0889789874f) * x - 0.2145988016f) * x + 1.5707963050f;
result *= root;
// acos(x) = pi - acos(-x) when x < 0
return (nonnegative ? result : XM_PI - result);
}
//------------------------------------------------------------------------------
inline float XMScalarACosEst(float Value) noexcept
{
// Clamp input to [-1,1].
bool nonnegative = (Value >= 0.0f);
float x = fabsf(Value);
float omx = 1.0f - x;
if (omx < 0.0f)
{
omx = 0.0f;
}
float root = sqrtf(omx);
// 3-degree minimax approximation
float result = ((-0.0187293f * x + 0.0742610f) * x - 0.2121144f) * x + 1.5707288f;
result *= root;
// acos(x) = pi - acos(-x) when x < 0
return (nonnegative ? result : XM_PI - result);
}