Output:
Matrix:
| 1 2 3 4 |
| 5 6 7 8 |
| 8 7 6 5 |
| 4 3 2 1 |
Matrix, transposed:
| 1 5 8 4 |
| 2 6 7 3 |
| 3 7 6 2 |
| 4 8 5 1 |
Code:
/*Please refactor identifiers, as you like*/
#define eCG/*G++*/
/*#define eCV*//*VC++*/
#ifdef eCG
#define okN(p)__attribute__((aligned(p)))
#elif defined eCV
#define okN(p)__declspec(align(p))
#endif
#include<emmintrin.h>
#include<iostream>
struct okN(0x10) tR4x4{
float m[0x10];/*Column-major, not row-major -- OpenGL FTW*/
#define o_(a,b)\
float N##a##b()const noexcept{return m[((b-1)<<2)+(a-1)];}\
float&N##a##b()noexcept{return m[((b-1)<<2)+(a-1)];}
o_(1,1)o_(1,2)o_(1,3)o_(1,4)
o_(2,1)o_(2,2)o_(2,3)o_(2,4)
o_(3,1)o_(3,2)o_(3,3)o_(3,4)
o_(4,1)o_(4,2)o_(4,3)o_(4,4)
#undef o_
tR4x4(
float p11,float p12,float p13,float p14,
float p21,float p22,float p23,float p24,
float p31,float p32,float p33,float p34,
float p41,float p42,float p43,float p44
)noexcept{
#define o_(a,b)N##a##b()=p##a##b;
o_(1,1)o_(1,2)o_(1,3)o_(1,4)
o_(2,1)o_(2,2)o_(2,3)o_(2,4)
o_(3,1)o_(3,2)o_(3,3)o_(3,4)
o_(4,1)o_(4,2)o_(4,3)o_(4,4)
#undef o_
}
tR4x4(__m128i const&a,__m128i const&b,__m128i const&c,__m128i const&d)noexcept{
((__m128i*)this)[0]=a,((__m128i*)this)[1]=b,
((__m128i*)this)[2]=c,((__m128i*)this)[3]=d;}
#if 0/*Without SIMD SSE2*/
tR4x4 kTp()const noexcept{return tR4x4(
N11(),N21(),N31(),N41(),
N12(),N22(),N32(),N42(),
N13(),N23(),N33(),N43(),
N14(),N24(),N34(),N44());}
#endif
tR4x4 kTp()const noexcept{__m128i const
l1=_mm_unpacklo_epi32(((__m128i*)this)[0],((__m128i*)this)[2]),
l2=_mm_unpacklo_epi32(((__m128i*)this)[1],((__m128i*)this)[3]),
l3=_mm_unpackhi_epi32(((__m128i*)this)[0],((__m128i*)this)[2]),
l4=_mm_unpackhi_epi32(((__m128i*)this)[1],((__m128i*)this)[3]);
return tR4x4(_mm_unpacklo_epi32(l1,l2),_mm_unpackhi_epi32(l1,l2),_mm_unpacklo_epi32(l3,l4),_mm_unpackhi_epi32(l3,l4));}
friend std::ostream&operator<<(std::ostream&,tR4x4 const&)noexcept;};
std::ostream&operator<<(std::ostream&q,tR4x4 const&p)noexcept{q
<<"| "<<p.N11()<<" "<<p.N12()<<" "<<p.N13()<<" "<<p.N14()<<" |\n"
<<"| "<<p.N21()<<" "<<p.N22()<<" "<<p.N23()<<" "<<p.N24()<<" |\n"
<<"| "<<p.N31()<<" "<<p.N32()<<" "<<p.N33()<<" "<<p.N34()<<" |\n"
<<"| "<<p.N41()<<" "<<p.N42()<<" "<<p.N43()<<" "<<p.N44()<<" |\n";return q;}
void f()noexcept{
tR4x4 l(
1.F,2.F,3.F,4.F,
5.F,6.F,7.F,8.F,
8.F,7.F,6.F,5.F,
4.F,3.F,2.F,1.F);
std::cout<<"Matrix:\n"<<l<<std::endl;
std::cout<<"Matrix, transposed:\n"<<l.kTp()<<std::endl;}
int main(){f();return 0;}
No comments:
Post a Comment