Output:
Matrix:
| 1  2  3  4 |
| 5  6  7  8 |
| 8  7  6  5 |
| 4  3  2  1 |
Matrix, transposed:
| 1  5  8  4 |
| 2  6  7  3 |
| 3  7  6  2 |
| 4  8  5  1 |
Code:
/*Please refactor identifiers, as you like*/
#define eCG/*G++*/
/*#define eCV*//*VC++*/
#ifdef eCG
#define okN(p)__attribute__((aligned(p)))
#elif defined eCV
#define okN(p)__declspec(align(p))
#endif
#include<emmintrin.h>
#include<iostream>
struct okN(0x10) tR4x4{
float m[0x10];/*Column-major, not row-major -- OpenGL FTW*/
#define o_(a,b)\
float N##a##b()const noexcept{return m[((b-1)<<2)+(a-1)];}\
float&N##a##b()noexcept{return m[((b-1)<<2)+(a-1)];}
o_(1,1)o_(1,2)o_(1,3)o_(1,4)
o_(2,1)o_(2,2)o_(2,3)o_(2,4)
o_(3,1)o_(3,2)o_(3,3)o_(3,4)
o_(4,1)o_(4,2)o_(4,3)o_(4,4)
#undef o_
tR4x4(
float p11,float p12,float p13,float p14,
float p21,float p22,float p23,float p24,
float p31,float p32,float p33,float p34,
float p41,float p42,float p43,float p44
)noexcept{
#define o_(a,b)N##a##b()=p##a##b;
o_(1,1)o_(1,2)o_(1,3)o_(1,4)
o_(2,1)o_(2,2)o_(2,3)o_(2,4)
o_(3,1)o_(3,2)o_(3,3)o_(3,4)
o_(4,1)o_(4,2)o_(4,3)o_(4,4)
#undef o_
}
tR4x4(__m128i const&a,__m128i const&b,__m128i const&c,__m128i const&d)noexcept{
((__m128i*)this)[0]=a,((__m128i*)this)[1]=b,
((__m128i*)this)[2]=c,((__m128i*)this)[3]=d;}
#if 0/*Without SIMD SSE2*/
tR4x4 kTp()const noexcept{return tR4x4(
N11(),N21(),N31(),N41(),
N12(),N22(),N32(),N42(),
N13(),N23(),N33(),N43(),
N14(),N24(),N34(),N44());}
#endif
tR4x4 kTp()const noexcept{__m128i const
l1=_mm_unpacklo_epi32(((__m128i*)this)[0],((__m128i*)this)[2]),
l2=_mm_unpacklo_epi32(((__m128i*)this)[1],((__m128i*)this)[3]),
l3=_mm_unpackhi_epi32(((__m128i*)this)[0],((__m128i*)this)[2]),
l4=_mm_unpackhi_epi32(((__m128i*)this)[1],((__m128i*)this)[3]);
return tR4x4(_mm_unpacklo_epi32(l1,l2),_mm_unpackhi_epi32(l1,l2),_mm_unpacklo_epi32(l3,l4),_mm_unpackhi_epi32(l3,l4));}
friend std::ostream&operator<<(std::ostream&,tR4x4 const&)noexcept;};
std::ostream&operator<<(std::ostream&q,tR4x4 const&p)noexcept{q
<<"| "<<p.N11()<<"  "<<p.N12()<<"  "<<p.N13()<<"  "<<p.N14()<<" |\n"
<<"| "<<p.N21()<<"  "<<p.N22()<<"  "<<p.N23()<<"  "<<p.N24()<<" |\n"
<<"| "<<p.N31()<<"  "<<p.N32()<<"  "<<p.N33()<<"  "<<p.N34()<<" |\n"
<<"| "<<p.N41()<<"  "<<p.N42()<<"  "<<p.N43()<<"  "<<p.N44()<<" |\n";return q;}
void f()noexcept{
tR4x4 l(
1.F,2.F,3.F,4.F,
5.F,6.F,7.F,8.F,
8.F,7.F,6.F,5.F,
4.F,3.F,2.F,1.F);
std::cout<<"Matrix:\n"<<l<<std::endl;
std::cout<<"Matrix, transposed:\n"<<l.kTp()<<std::endl;}
int main(){f();return 0;}
 
No comments:
Post a Comment