test/zserio/FloatUtilTest.cpp
Line | Count | Source |
1 | | #include <array> |
2 | | |
3 | | #include "gtest/gtest.h" |
4 | | #include "zserio/FloatUtil.h" |
5 | | |
6 | | namespace zserio |
7 | | { |
8 | | |
9 | | class FloatUtilTest : public ::testing::Test |
10 | | { |
11 | | protected: |
12 | | uint16_t createFloat16Value(uint16_t sign, uint16_t exponent, uint16_t significand) |
13 | 23 | { |
14 | 23 | return static_cast<uint16_t>((static_cast<uint32_t>(sign) << FLOAT16_SIGN_BIT_POSITION) | |
15 | 23 | (static_cast<uint32_t>(exponent) << FLOAT16_EXPONENT_BIT_POSITION) | significand); |
16 | 23 | } |
17 | | |
18 | | uint32_t createFloat32Value(uint32_t sign, uint32_t exponent, uint32_t significand) |
19 | 35 | { |
20 | 35 | return (sign << FLOAT32_SIGN_BIT_POSITION) | (exponent << FLOAT32_EXPONENT_BIT_POSITION) | significand; |
21 | 35 | } |
22 | | |
23 | | uint64_t createFloat64Value(uint64_t sign, uint64_t exponent, uint64_t significand) |
24 | 16 | { |
25 | 16 | return (sign << FLOAT64_SIGN_BIT_POSITION) | (exponent << FLOAT64_EXPONENT_BIT_POSITION) | significand; |
26 | 16 | } |
27 | | |
28 | | void checkFloat16ToFloat32Conversion(uint16_t float16Value, uint32_t expectedFloat32Value) |
29 | 7 | { |
30 | 7 | const float float32 = convertUInt16ToFloat(float16Value); |
31 | 7 | ASSERT_EQ(expectedFloat32Value, convertFloatToUInt32(float32)); |
32 | 7 | } |
33 | | |
34 | | void checkFloat16ToFloat32Conversion(uint16_t float16Value, float expectedFloat32) |
35 | 4 | { |
36 | 4 | ASSERT_EQ(expectedFloat32, convertUInt16ToFloat(float16Value)); |
37 | 4 | } |
38 | | |
39 | | void checkFloat32ToFloat16Conversion(uint32_t float32Value, uint16_t expectedFloat16Value) |
40 | 12 | { |
41 | 12 | const float float32 = convertUInt32ToFloat(float32Value); |
42 | 12 | ASSERT_EQ(expectedFloat16Value, convertFloatToUInt16(float32)); |
43 | 12 | } |
44 | | |
45 | | void checkFloat32ToFloat16Conversion(float float32, uint16_t expectedFloat16Value) |
46 | 4 | { |
47 | 4 | ASSERT_EQ(expectedFloat16Value, convertFloatToUInt16(float32)); |
48 | 4 | } |
49 | | |
50 | | struct TestFloat32Element |
51 | | { |
52 | | uint32_t sign; |
53 | | uint32_t exponent; |
54 | | uint32_t significand; |
55 | | float expectedFloat; |
56 | | }; |
57 | | |
58 | | struct TestFloat64Element |
59 | | { |
60 | | uint64_t sign; |
61 | | uint64_t exponent; |
62 | | uint64_t significand; |
63 | | double expectedDouble; |
64 | | }; |
65 | | |
66 | | static const std::array<TestFloat32Element, 8> TEST_FLOAT32_DATA; |
67 | | static const std::array<TestFloat64Element, 8> TEST_FLOAT64_DATA; |
68 | | |
69 | | private: |
70 | | static const uint16_t FLOAT16_SIGN_BIT_POSITION; |
71 | | static const uint16_t FLOAT16_EXPONENT_BIT_POSITION; |
72 | | |
73 | | static const uint32_t FLOAT32_SIGN_BIT_POSITION; |
74 | | static const uint32_t FLOAT32_EXPONENT_BIT_POSITION; |
75 | | |
76 | | static const uint64_t FLOAT64_SIGN_BIT_POSITION; |
77 | | static const uint64_t FLOAT64_EXPONENT_BIT_POSITION; |
78 | | }; |
79 | | |
80 | | const std::array<FloatUtilTest::TestFloat32Element, 8> FloatUtilTest::TEST_FLOAT32_DATA = { |
81 | | TestFloat32Element{0, 0, UINT32_C(0), 0.0F}, |
82 | | TestFloat32Element{1, 0, UINT32_C(0), -0.0F}, |
83 | | TestFloat32Element{0, 127, UINT32_C(0), +1.0F}, |
84 | | TestFloat32Element{1, 127, UINT32_C(0), -1.0F}, |
85 | | // 2^1 (1 + 2^-1 + 2^-2) |
86 | | TestFloat32Element{0, 128, UINT32_C(0x600000), 3.5F}, |
87 | | // 2^-1 (1 + 2^-1 + 2^-2) |
88 | | TestFloat32Element{0, 126, UINT32_C(0x600000), 0.875F}, |
89 | | // 2^3 (1 + 2^-3 + 2^-4 + 2^-5 + 2^-6) |
90 | | TestFloat32Element{0, 130, UINT32_C(0x1E0000), 9.875F}, |
91 | | // 2^-3 (1 + 2^-3 + 2^-4 + 2^-5 + 2^-6) |
92 | | TestFloat32Element{0, 126, UINT32_C(0x1E0000), 0.6171875F}, |
93 | | }; |
94 | | |
95 | | const std::array<FloatUtilTest::TestFloat64Element, 8> FloatUtilTest::TEST_FLOAT64_DATA = { |
96 | | TestFloat64Element{0, 0, UINT64_C(0), 0.0}, |
97 | | TestFloat64Element{1, 0, UINT64_C(0), -0.0}, |
98 | | TestFloat64Element{0, 1023, UINT64_C(0), +1.0}, |
99 | | TestFloat64Element{1, 1023, UINT64_C(0), -1.0}, |
100 | | // 2^1 (1 + 2^-1 + 2^-2) |
101 | | TestFloat64Element{0, 1024, UINT64_C(0xC000000000000), 3.5}, |
102 | | // 2^-1 (1 + 2^-1 + 2^-2) |
103 | | TestFloat64Element{0, 1022, UINT64_C(0xC000000000000), 0.875}, |
104 | | // 2^3 (1 + 2^-3 + 2^-4 + 2^-5 + 2^-6) |
105 | | TestFloat64Element{0, 1026, UINT64_C(0x3C00000000000), 9.875}, |
106 | | // 2^-3 (1 + 2^-3 + 2^-4 + 2^-5 + 2^-6) |
107 | | TestFloat64Element{0, 1022, UINT64_C(0x3C00000000000), 0.6171875}, |
108 | | }; |
109 | | |
110 | | const uint16_t FloatUtilTest::FLOAT16_SIGN_BIT_POSITION = UINT16_C(15); |
111 | | const uint16_t FloatUtilTest::FLOAT16_EXPONENT_BIT_POSITION = UINT16_C(10); |
112 | | |
113 | | const uint32_t FloatUtilTest::FLOAT32_SIGN_BIT_POSITION = UINT16_C(31); |
114 | | const uint32_t FloatUtilTest::FLOAT32_EXPONENT_BIT_POSITION = UINT16_C(23); |
115 | | |
116 | | const uint64_t FloatUtilTest::FLOAT64_SIGN_BIT_POSITION = UINT16_C(63); |
117 | | const uint64_t FloatUtilTest::FLOAT64_EXPONENT_BIT_POSITION = UINT16_C(52); |
118 | | |
119 | | TEST_F(FloatUtilTest, convertUInt16ToFloat) |
120 | 1 | { |
121 | | // plus zero |
122 | 1 | const uint16_t float16ValuePlusZero = createFloat16Value(0, 0, 0); // +0.0 |
123 | 1 | checkFloat16ToFloat32Conversion(float16ValuePlusZero, 0.0F); |
124 | | |
125 | | // minus zero |
126 | 1 | const uint16_t float16ValueMinusZero = createFloat16Value(1, 0, 0); // -0.0 |
127 | 1 | checkFloat16ToFloat32Conversion(float16ValueMinusZero, -0.0F); |
128 | | |
129 | | // plus infinity |
130 | 1 | const uint16_t float16ValuePlusInfinity = createFloat16Value(0, 0x1F, 0); // +INF |
131 | 1 | const uint32_t float32ValuePlusInfinity = createFloat32Value(0, 0xFF, 0); // +INF |
132 | 1 | checkFloat16ToFloat32Conversion(float16ValuePlusInfinity, float32ValuePlusInfinity); |
133 | | |
134 | | // minus infinity |
135 | 1 | const uint16_t float16ValueMinusInfinity = createFloat16Value(1, 0x1F, 0); // -INF |
136 | 1 | const uint32_t float32ValueMinusInfinity = createFloat32Value(1, 0xFF, 0); // -INF |
137 | 1 | checkFloat16ToFloat32Conversion(float16ValueMinusInfinity, float32ValueMinusInfinity); |
138 | | |
139 | | // quiet NaN |
140 | 1 | const uint16_t float16ValueQuietNan = createFloat16Value(0, 0x1F, 0x3FF); // +NaN |
141 | 1 | const uint32_t float32ValueQuietNan = createFloat32Value(0, 0xFF, 0x7FE000); // +NaN |
142 | 1 | checkFloat16ToFloat32Conversion(float16ValueQuietNan, float32ValueQuietNan); |
143 | | |
144 | | // signaling NaN |
145 | 1 | const uint16_t float16ValueSignalingNan = createFloat16Value(1, 0x1F, 0x3FF); // -NaN |
146 | 1 | const uint32_t float32ValueSignalingNan = createFloat32Value(1, 0xFF, 0x7FE000); // -NaN |
147 | 1 | checkFloat16ToFloat32Conversion(float16ValueSignalingNan, float32ValueSignalingNan); |
148 | | |
149 | | // normal numbers |
150 | 1 | const uint16_t float16ValueOne = createFloat16Value(0, 15, 0); // 1.0 |
151 | 1 | checkFloat16ToFloat32Conversion(float16ValueOne, 1.0F); |
152 | | |
153 | 1 | const uint16_t float16ValueOnePlus = createFloat16Value(0, 15, 0x01); // 1.0 + 2^-10 |
154 | 1 | const uint32_t float32ValueOnePlus = createFloat32Value(0, 127, 0x2000); // 1.0 + 2^-10 |
155 | 1 | checkFloat16ToFloat32Conversion(float16ValueOnePlus, float32ValueOnePlus); |
156 | | |
157 | 1 | const uint16_t float16ValueMax = createFloat16Value(0, 30, 0x3FF); // 2^15 (1 + 2^-1 + ... + 2^-10) |
158 | 1 | checkFloat16ToFloat32Conversion(float16ValueMax, 65504.0F); |
159 | | |
160 | | // subnormal numbers |
161 | 1 | const uint16_t float16ValueMinSubnormal = createFloat16Value(0, 0, 1); // 2^-14 (2^-10) |
162 | 1 | const uint32_t float32ValueMinSubnormal = createFloat32Value(0, 103, 0); // 2^-24 |
163 | 1 | checkFloat16ToFloat32Conversion(float16ValueMinSubnormal, float32ValueMinSubnormal); |
164 | | |
165 | 1 | const uint16_t float16ValueMaxSubnormal = createFloat16Value(0, 0, 0x3FF); // 2^-14 (2^-1 + ... + 2^-10) |
166 | | // 2^-15 (1 + 2^-1 + ... + 2^-9) |
167 | 1 | const uint32_t float32ValueMaxSubnormal = createFloat32Value(0, 112, 0x7FC000); |
168 | 1 | checkFloat16ToFloat32Conversion(float16ValueMaxSubnormal, float32ValueMaxSubnormal); |
169 | 1 | } |
170 | | |
171 | | TEST_F(FloatUtilTest, convertFloatToUInt16) |
172 | 1 | { |
173 | | // plus zero |
174 | 1 | const uint16_t float16ValuePlusZero = createFloat16Value(0, 0, 0); // +0.0 |
175 | 1 | checkFloat32ToFloat16Conversion(0.0F, float16ValuePlusZero); |
176 | | |
177 | | // minus zero |
178 | 1 | const uint16_t float16ValueMinusZero = createFloat16Value(1, 0, 0); // -0.0 |
179 | 1 | checkFloat32ToFloat16Conversion(-0.0F, float16ValueMinusZero); |
180 | | |
181 | | // plus infinity |
182 | 1 | const uint32_t float32ValuePlusInfinity = createFloat32Value(0, 0xFF, 0); // +INF |
183 | 1 | const uint16_t float16ValuePlusInfinity = createFloat16Value(0, 0x1F, 0); // +INF |
184 | 1 | checkFloat32ToFloat16Conversion(float32ValuePlusInfinity, float16ValuePlusInfinity); |
185 | | |
186 | | // minus infinity |
187 | 1 | const uint32_t float32ValueMinusInfinity = createFloat32Value(1, 0xFF, 0); // -INF |
188 | 1 | const uint16_t float16ValueMinusInfinity = createFloat16Value(1, 0x1F, 0); // -INF |
189 | 1 | checkFloat32ToFloat16Conversion(float32ValueMinusInfinity, float16ValueMinusInfinity); |
190 | | |
191 | | // quiet NaN |
192 | 1 | const uint32_t float32ValueQuietNan = createFloat32Value(0, 0xFF, 0x7FE000); // +NaN |
193 | 1 | const uint16_t float16ValueQuietNan = createFloat16Value(0, 0x1F, 0x3FF); // +NaN |
194 | 1 | checkFloat32ToFloat16Conversion(float32ValueQuietNan, float16ValueQuietNan); |
195 | | |
196 | | // signaling NaN |
197 | 1 | const uint32_t float32ValueSignalingNan = createFloat32Value(1, 0xFF, 0x7FE000); // -NaN |
198 | 1 | const uint16_t float16ValueSignalingNan = createFloat16Value(1, 0x1F, 0x3FF); // -NaN |
199 | 1 | checkFloat32ToFloat16Conversion(float32ValueSignalingNan, float16ValueSignalingNan); |
200 | | |
201 | | // normal numbers |
202 | 1 | const uint16_t float16ValueOne = createFloat16Value(0, 15, 0); // 1.0 |
203 | 1 | checkFloat32ToFloat16Conversion(1.0F, float16ValueOne); |
204 | | |
205 | 1 | const uint32_t float32ValueOnePlus = createFloat32Value(0, 127, 0x2000); // 1.0 + 2^-10 |
206 | 1 | const uint16_t float16ValueOnePlus = createFloat16Value(0, 15, 0x01); // 1.0 + 2^-10 |
207 | 1 | checkFloat32ToFloat16Conversion(float32ValueOnePlus, float16ValueOnePlus); |
208 | | |
209 | 1 | const uint16_t float16ValueMax = createFloat16Value(0, 30, 0x3FF); // 2^15 (1 + 2^-1 + ... + 2^-10) |
210 | 1 | checkFloat32ToFloat16Conversion(65504.0F, float16ValueMax); |
211 | | |
212 | | // normal numbers converted to zero |
213 | 1 | const uint32_t float32ValueUnderflow = createFloat32Value(0, 102, 0); // 2^-25 |
214 | 1 | checkFloat32ToFloat16Conversion(float32ValueUnderflow, float16ValuePlusZero); |
215 | | |
216 | | // normal numbers converted to subnormal numbers |
217 | 1 | const uint32_t float32ValueMinUnderflow = createFloat32Value(0, 103, 1); // 2^-24 (1 + 2^-23) |
218 | 1 | const uint16_t float16ValueMinSubnormal = createFloat16Value(0, 0, 1); // 2^-24 |
219 | 1 | checkFloat32ToFloat16Conversion(float32ValueMinUnderflow, float16ValueMinSubnormal); |
220 | | |
221 | | // normal numbers converted to subnormal numbers with rounding |
222 | 1 | const uint32_t float32ValueMinUnderflowRounding = createFloat32Value(0, 104, 0x200000); // 2^-23 (1 + 2^-2) |
223 | 1 | const uint16_t float16ValueMinSubnormalRounding = createFloat16Value(0, 0, 0x3); // 2^-14 (2^-9 + 2^-10) |
224 | 1 | checkFloat32ToFloat16Conversion(float32ValueMinUnderflowRounding, float16ValueMinSubnormalRounding); |
225 | | |
226 | | // normal numbers converted to infinity |
227 | 1 | const uint32_t float32ValueOverflow = createFloat32Value(0, 144, 0); // 2^17 |
228 | 1 | checkFloat32ToFloat16Conversion(float32ValueOverflow, float16ValuePlusInfinity); |
229 | | |
230 | | // normal numbers converted with rounding |
231 | 1 | const uint32_t float32ValueRounding = createFloat32Value(0, 127, 0x401000); // 1 + 2^-1 + 2^-11 |
232 | 1 | const uint16_t float16ValueRounding = createFloat16Value(0, 15, 0x201); // 1 + 2^-1 + 2^-10 |
233 | 1 | checkFloat32ToFloat16Conversion(float32ValueRounding, float16ValueRounding); |
234 | | |
235 | | // subnormal numbers |
236 | 1 | const uint32_t float32ValueMinSubnormal = createFloat32Value(0, 0, 1); // 2^-126 (2^-23) |
237 | 1 | checkFloat32ToFloat16Conversion(float32ValueMinSubnormal, float16ValuePlusZero); |
238 | | |
239 | | // 2^-126 (2^-1 + ... + 2^-23) |
240 | 1 | const uint32_t float32ValueMaxSubnormal = createFloat32Value(0, 0, 0x007FFFFF); |
241 | 1 | checkFloat32ToFloat16Conversion(float32ValueMaxSubnormal, float16ValuePlusZero); |
242 | 1 | } |
243 | | |
244 | | TEST_F(FloatUtilTest, convertUInt32ToFloat) |
245 | 1 | { |
246 | 1 | for (TestFloat32Element testElement : TEST_FLOAT32_DATA) |
247 | 8 | { |
248 | 8 | const uint32_t float32Value = |
249 | 8 | createFloat32Value(testElement.sign, testElement.exponent, testElement.significand); |
250 | 8 | const float convertedFloat = convertUInt32ToFloat(float32Value); |
251 | | |
252 | 8 | ASSERT_EQ(testElement.expectedFloat, convertedFloat); |
253 | 8 | } |
254 | 1 | } |
255 | | |
256 | | TEST_F(FloatUtilTest, convertFloatToUInt32) |
257 | 1 | { |
258 | 1 | for (TestFloat32Element testElement : TEST_FLOAT32_DATA) |
259 | 8 | { |
260 | 8 | const uint32_t convertedFloatValue = convertFloatToUInt32(testElement.expectedFloat); |
261 | 8 | const uint32_t expectedFloatValue = |
262 | 8 | createFloat32Value(testElement.sign, testElement.exponent, testElement.significand); |
263 | | |
264 | 8 | ASSERT_EQ(expectedFloatValue, convertedFloatValue); |
265 | 8 | } |
266 | 1 | } |
267 | | |
268 | | TEST_F(FloatUtilTest, convertUInt64ToDouble) |
269 | 1 | { |
270 | 1 | for (TestFloat64Element testElement : TEST_FLOAT64_DATA) |
271 | 8 | { |
272 | 8 | const uint64_t float64Value = |
273 | 8 | createFloat64Value(testElement.sign, testElement.exponent, testElement.significand); |
274 | 8 | const double convertedDouble = convertUInt64ToDouble(float64Value); |
275 | | |
276 | 8 | ASSERT_EQ(testElement.expectedDouble, convertedDouble); |
277 | 8 | } |
278 | 1 | } |
279 | | |
280 | | TEST_F(FloatUtilTest, convertDoubleToUInt64) |
281 | 1 | { |
282 | 1 | for (TestFloat64Element testElement : TEST_FLOAT64_DATA) |
283 | 8 | { |
284 | 8 | const uint64_t convertedDoubleValue = convertDoubleToUInt64(testElement.expectedDouble); |
285 | 8 | const uint64_t expectedDoubleValue = |
286 | 8 | createFloat64Value(testElement.sign, testElement.exponent, testElement.significand); |
287 | | |
288 | 8 | ASSERT_EQ(expectedDoubleValue, convertedDoubleValue); |
289 | 8 | } |
290 | 1 | } |
291 | | |
292 | | } // namespace zserio |