21 template<
class T,
int Dim>
23 static_assert(Dim == 2 || Dim == 4 || Dim == 8,
"Dimension must be 2, 4, or 8.");
24 static_assert(std::is_same<T, float>::value
25 || std::is_same<T, double>::value
26 || std::is_same<T, int32_t>::value
27 || std::is_same<T, int64_t>::value,
28 "Type must be float, double, in32 or int64.");
34 for (
int i = 0; i < Dim; ++i)
35 res.
v[i] = lhs.
v[i] * rhs.
v[i];
41 for (
int i = 0; i < Dim; ++i)
42 res.
v[i] = lhs.
v[i] / rhs.
v[i];
48 for (
int i = 0; i < Dim; ++i)
49 res.
v[i] = lhs.
v[i] + rhs.
v[i];
55 for (
int i = 0; i < Dim; ++i)
56 res.
v[i] = lhs.
v[i] - rhs.
v[i];
62 for (
int i = 0; i < Dim; ++i)
63 res.
v[i] = lhs.
v[i] * rhs;
69 for (
int i = 0; i < Dim; ++i)
70 res.
v[i] = lhs.
v[i] / rhs;
76 for (
int i = 0; i < Dim; ++i)
77 res.
v[i] = lhs.
v[i] + rhs;
83 for (
int i = 0; i < Dim; ++i)
84 res.
v[i] = lhs.
v[i] - rhs;
94 for (
int i = 0; i < Dim; ++i)
99 template<
class... Args>
100 static inline Simd set(Args... args) {
102 static_assert(
sizeof...(Args) == Dim,
"Number of arguments must be equal to dimension.");
103 T table[] = {T(args)...};
104 for (
int i = 0; i < Dim; ++i)
109 template<
int Count = Dim>
111 static_assert(Count <= Dim,
"Number of elements to dot must be smaller or equal to dimension.");
112 static_assert(0 < Count,
"Count must not be zero.");
113 T sum = lhs.
v[0] * rhs.
v[0];
114 for (
int i = 1; i < Count; ++i)
115 sum += lhs.
v[i] * rhs.
v[i];
119 template<
int i0,
int i1>
121 static_assert(Dim == 2,
"Only for 2-way simd.");
123 ret.
v[1] = arg.
v[i0];
124 ret.
v[0] = arg.
v[i1];
128 template<
int i0,
int i1,
int i2,
int i3>
130 static_assert(Dim == 4,
"Only for 4-way simd.");
132 ret.
v[3] = arg.
v[i0];
133 ret.
v[2] = arg.
v[i1];
134 ret.
v[1] = arg.
v[i2];
135 ret.
v[0] = arg.
v[i3];
139 template<
int i0,
int i1,
int i2,
int i3,
int i4,
int i5,
int i6,
int i7>
141 static_assert(Dim == 8,
"Only for 8-way simd.");
143 ret.
v[7] = arg.
v[i0];
144 ret.
v[6] = arg.
v[i1];
145 ret.
v[5] = arg.
v[i2];
146 ret.
v[4] = arg.
v[i3];
147 ret.
v[3] = arg.
v[i4];
148 ret.
v[2] = arg.
v[i5];
149 ret.
v[1] = arg.
v[i6];
150 ret.
v[0] = arg.
v[i7];
158 #if defined(__SSE2__) || _M_IX86_FP >= 2 || _M_X64 static Simd mul(const Simd &lhs, T rhs)
Definition: Simd.hpp:60
static Simd mad(const Simd &a, const Simd &b, const Simd &c)
Definition: Simd.hpp:88
static Simd spread(T value)
Definition: Simd.hpp:92
static Simd add(const Simd &lhs, T rhs)
Definition: Simd.hpp:74
T v[Dim]
Definition: Simd.hpp:23
static Simd div(const Simd &lhs, T rhs)
Definition: Simd.hpp:67
static Simd shuffle(Simd arg)
Definition: Simd.hpp:140
static T dot(const Simd &lhs, const Simd &rhs)
Definition: Simd.hpp:110
static Simd div(const Simd &lhs, const Simd &rhs)
Definition: Simd.hpp:39
Definition: Approx.hpp:11
static Simd mul(const Simd &lhs, const Simd &rhs)
Definition: Simd.hpp:32
static Simd add(const Simd &lhs, const Simd &rhs)
Definition: Simd.hpp:46
static Simd sub(const Simd &lhs, T rhs)
Definition: Simd.hpp:81
static Simd sub(const Simd &lhs, const Simd &rhs)
Definition: Simd.hpp:53
2,4 or 8 dimension float or double parameters accepted. Uses SSE2 or AVX acceleration if enabled in t...
Definition: Simd.hpp:22
static Simd shuffle(Simd arg)
Definition: Simd.hpp:129
static Simd shuffle(Simd arg)
Definition: Simd.hpp:120