Line | Count | Source (jump to first uncovered line) |
1 | | #ifndef ZSERIO_JSON_DECODER_H_INC |
2 | | #define ZSERIO_JSON_DECODER_H_INC |
3 | | |
4 | | #include <cerrno> |
5 | | #include <cmath> |
6 | | #include <cstdlib> |
7 | | #include <cstring> |
8 | | #include <utility> |
9 | | |
10 | | #include "zserio/AllocatorHolder.h" |
11 | | #include "zserio/AnyHolder.h" |
12 | | #include "zserio/CppRuntimeException.h" |
13 | | #include "zserio/String.h" |
14 | | #include "zserio/StringView.h" |
15 | | |
16 | | namespace zserio |
17 | | { |
18 | | |
19 | | /** |
20 | | * JSON value decoder. |
21 | | */ |
22 | | template <typename ALLOC = std::allocator<uint8_t>> |
23 | | class BasicJsonDecoder : public AllocatorHolder<ALLOC> |
24 | | { |
25 | | public: |
26 | | using AllocatorHolder<ALLOC>::get_allocator; |
27 | | |
28 | | /** |
29 | | * Decoder result value. |
30 | | */ |
31 | | struct DecoderResult |
32 | | { |
33 | | /** |
34 | | * Constructor used for decoder failure. |
35 | | * |
36 | | * \param numRead Number of processed characters. |
37 | | * \param allocator Allocator to use. |
38 | | */ |
39 | | DecoderResult(size_t numRead, const ALLOC& allocator) : |
40 | | numReadChars(numRead), |
41 | | value(allocator), |
42 | | integerOverflow(false) |
43 | 136 | {} |
44 | | |
45 | | /** |
46 | | * Constructor for decoder success. |
47 | | * |
48 | | * \param numRead Number of processed characters. |
49 | | * \param decodedValue Value decoded from JSON stream. |
50 | | * \param allocator Allocator to use. |
51 | | */ |
52 | | template <typename T> |
53 | | DecoderResult(size_t numRead, T&& decodedValue, const ALLOC& allocator) : |
54 | | numReadChars(numRead), |
55 | | value(std::forward<T>(decodedValue), allocator), |
56 | | integerOverflow(false) |
57 | 20.3k | {} |
58 | | |
59 | | /** |
60 | | * Constructor used for integer decoder. |
61 | | * |
62 | | * \param numRead Number of processed characters. |
63 | | * \param decodedValue Value decoded from JSON stream. |
64 | | * \param overflow True in case of integer overflow. |
65 | | * \param allocator Allocator to use. |
66 | | */ |
67 | | template <typename T> |
68 | | DecoderResult(size_t numRead, T&& decodedValue, bool overflow, const ALLOC& allocator) : |
69 | | numReadChars(numRead), |
70 | | value(createValue(decodedValue, overflow, allocator)), |
71 | | integerOverflow(overflow) |
72 | 4.11k | {} |
73 | | |
74 | | size_t numReadChars; /**< Number of processed characters. */ |
75 | | AnyHolder<ALLOC> value; /**< Decoded value. Empty on failure. */ |
76 | | bool integerOverflow; /**< True if decoded value was bigger than UINT64_MAX or was not in interval |
77 | | <INT64_MIN, INT64_MAX>. */ |
78 | | |
79 | | private: |
80 | | template <typename T> |
81 | | AnyHolder<ALLOC> createValue(T&& decodedValue, bool overflow, const ALLOC& allocator) |
82 | 4.11k | { |
83 | 4.11k | return overflow ? AnyHolder<ALLOC>(allocator)2 |
84 | 4.11k | : AnyHolder<ALLOC>(std::forward<T>(decodedValue), allocator)4.11k ; |
85 | 4.11k | } |
86 | | }; |
87 | | |
88 | | /** |
89 | | * Empty constructor. |
90 | | */ |
91 | | BasicJsonDecoder() : |
92 | | AllocatorHolder<ALLOC>(ALLOC()) |
93 | 12 | {} |
94 | | |
95 | | /** |
96 | | * Constructor from given allocator. |
97 | | * |
98 | | * \param allocator Allocator to use. |
99 | | */ |
100 | | explicit BasicJsonDecoder(const ALLOC& allocator) : |
101 | | AllocatorHolder<ALLOC>(allocator) |
102 | 93 | {} |
103 | | |
104 | | /** |
105 | | * Decodes the JSON value from the input. |
106 | | * |
107 | | * \param input Input to decode from. |
108 | | * |
109 | | * \return Decoder result. |
110 | | */ |
111 | | DecoderResult decodeValue(StringView input) |
112 | 24.5k | { |
113 | 24.5k | if (input.empty()) |
114 | 1 | { |
115 | 1 | return DecoderResult(0, get_allocator()); |
116 | 1 | } |
117 | | |
118 | 24.5k | switch (input[0]) |
119 | 24.5k | { |
120 | 9 | case 'n': |
121 | 9 | return decodeLiteral(input, "null"_sv, nullptr); |
122 | 7 | case 't': |
123 | 7 | return decodeLiteral(input, "true"_sv, true); |
124 | 4 | case 'f': |
125 | 4 | return decodeLiteral(input, "false"_sv, false); |
126 | 4 | case 'N': |
127 | 4 | return decodeLiteral(input, "NaN"_sv, static_cast<double>(NAN)); |
128 | 4 | case 'I': |
129 | 4 | return decodeLiteral(input, "Infinity"_sv, static_cast<double>(INFINITY)); |
130 | 16.3k | case '"': |
131 | 16.3k | return decodeString(input); |
132 | 24 | case '-': |
133 | 24 | if (input.size() > 1 && input[1] == 'I'23 ) |
134 | 5 | { |
135 | 5 | return decodeLiteral(input, "-Infinity"_sv, -static_cast<double>(INFINITY)); |
136 | 5 | } |
137 | 19 | return decodeNumber(input); |
138 | 8.13k | default: |
139 | 8.13k | return decodeNumber(input); |
140 | 24.5k | } |
141 | 24.5k | } |
142 | | |
143 | | private: |
144 | | template <typename T> |
145 | | DecoderResult decodeLiteral(StringView input, StringView literal, T&& value); |
146 | | DecoderResult decodeString(StringView input); |
147 | | static bool decodeUnicodeEscape( |
148 | | StringView input, StringView::const_iterator& inputIt, string<ALLOC>& value); |
149 | | static char decodeHex(char character); |
150 | | size_t checkNumber(StringView input, bool& isDouble, bool& isSigned); |
151 | | DecoderResult decodeNumber(StringView input); |
152 | | DecoderResult decodeSigned(StringView input); |
153 | | DecoderResult decodeUnsigned(StringView input); |
154 | | DecoderResult decodeDouble(StringView input, size_t numChars); |
155 | | }; |
156 | | |
157 | | template <typename ALLOC> |
158 | | template <typename T> |
159 | | typename BasicJsonDecoder<ALLOC>::DecoderResult BasicJsonDecoder<ALLOC>::decodeLiteral( |
160 | | StringView input, StringView literal, T&& value) |
161 | 33 | { |
162 | 33 | StringView::const_iterator literalIt = literal.begin(); |
163 | 33 | StringView::const_iterator inputIt = input.begin(); |
164 | 174 | while (inputIt != input.end() && literalIt != literal.end()162 ) |
165 | 148 | { |
166 | 148 | if (*inputIt++ != *literalIt++) |
167 | 7 | { |
168 | | // failure, not decoded |
169 | 7 | return DecoderResult(static_cast<size_t>(inputIt - input.begin()), get_allocator()); |
170 | 7 | } |
171 | 148 | } |
172 | | |
173 | 26 | if (literalIt != literal.end()) |
174 | 6 | { |
175 | | // short input, not decoded |
176 | 6 | return DecoderResult(input.size(), get_allocator()); |
177 | 6 | } |
178 | | |
179 | | // success |
180 | 20 | return DecoderResult(literal.size(), std::forward<T>(value), get_allocator()); |
181 | 26 | } |
182 | | |
183 | | template <typename ALLOC> |
184 | | typename BasicJsonDecoder<ALLOC>::DecoderResult BasicJsonDecoder<ALLOC>::decodeString(StringView input) |
185 | 16.3k | { |
186 | 16.3k | StringView::const_iterator inputIt = input.begin() + 1; // we know that at the beginning is '"' |
187 | 16.3k | string<ALLOC> value(get_allocator()); |
188 | | |
189 | 83.1k | while (inputIt != input.end()) |
190 | 83.1k | { |
191 | 83.1k | if (*inputIt == '\\') |
192 | 24 | { |
193 | 24 | ++inputIt; |
194 | 24 | if (inputIt == input.end()) |
195 | 1 | { |
196 | | // wrong escape, not decoded |
197 | 1 | return DecoderResult(static_cast<size_t>(inputIt - input.begin()), get_allocator()); |
198 | 1 | } |
199 | | |
200 | 23 | char nextChar = *inputIt; |
201 | 23 | switch (nextChar) |
202 | 23 | { |
203 | 2 | case '\\': |
204 | 4 | case '"': |
205 | 4 | value.push_back(nextChar); |
206 | 4 | ++inputIt; |
207 | 4 | break; |
208 | 1 | case 'b': |
209 | 1 | value.push_back('\b'); |
210 | 1 | ++inputIt; |
211 | 1 | break; |
212 | 1 | case 'f': |
213 | 1 | value.push_back('\f'); |
214 | 1 | ++inputIt; |
215 | 1 | break; |
216 | 2 | case 'n': |
217 | 2 | value.push_back('\n'); |
218 | 2 | ++inputIt; |
219 | 2 | break; |
220 | 1 | case 'r': |
221 | 1 | value.push_back('\r'); |
222 | 1 | ++inputIt; |
223 | 1 | break; |
224 | 2 | case 't': |
225 | 2 | value.push_back('\t'); |
226 | 2 | ++inputIt; |
227 | 2 | break; |
228 | 11 | case 'u': // unicode escape |
229 | 11 | { |
230 | 11 | ++inputIt; |
231 | 11 | if (!decodeUnicodeEscape(input, inputIt, value)) |
232 | 10 | { |
233 | | // unsupported unicode escape, not decoded |
234 | 10 | return DecoderResult(static_cast<size_t>(inputIt - input.begin()), get_allocator()); |
235 | 10 | } |
236 | 1 | break; |
237 | 11 | } |
238 | 1 | default: |
239 | 1 | ++inputIt; |
240 | | // unknown escape, not decoded |
241 | 1 | return DecoderResult(static_cast<size_t>(inputIt - input.begin()), get_allocator()); |
242 | 23 | } |
243 | 23 | } |
244 | 83.0k | else if (*inputIt == '"') |
245 | 16.3k | { |
246 | 16.3k | ++inputIt; |
247 | | // successfully decoded |
248 | 16.3k | return DecoderResult( |
249 | 16.3k | static_cast<size_t>(inputIt - input.begin()), std::move(value), get_allocator()); |
250 | 16.3k | } |
251 | 66.7k | else |
252 | 66.7k | { |
253 | 66.7k | value.push_back(*inputIt++); |
254 | 66.7k | } |
255 | 83.1k | } |
256 | | |
257 | | // unterminated string, not decoded |
258 | 2 | return DecoderResult(input.size(), get_allocator()); |
259 | 16.3k | } |
260 | | |
261 | | template <typename ALLOC> |
262 | | bool BasicJsonDecoder<ALLOC>::decodeUnicodeEscape( |
263 | | StringView input, StringView::const_iterator& inputIt, string<ALLOC>& value) |
264 | 11 | { |
265 | | // TODO[Mi-L@]: Simplified just to decode what zserio encodes, for complex solution we could use |
266 | | // std::wstring_convert but it's deprecated in C++17. |
267 | 11 | if (inputIt == input.end() || *inputIt++ != '0'10 ) |
268 | 2 | { |
269 | 2 | return false; |
270 | 2 | } |
271 | 9 | if (inputIt == input.end() || *inputIt++ != '0'8 ) |
272 | 2 | { |
273 | 2 | return false; |
274 | 2 | } |
275 | | |
276 | 7 | if (inputIt == input.end()) |
277 | 1 | { |
278 | 1 | return false; |
279 | 1 | } |
280 | 6 | const char char1 = decodeHex(*inputIt++); |
281 | 6 | if (char1 == -1) |
282 | 2 | { |
283 | 2 | return false; |
284 | 2 | } |
285 | | |
286 | 4 | if (inputIt == input.end()) |
287 | 1 | { |
288 | 1 | return false; |
289 | 1 | } |
290 | 3 | const char char2 = decodeHex(*inputIt++); |
291 | 3 | if (char2 == -1) |
292 | 2 | { |
293 | 2 | return false; |
294 | 2 | } |
295 | | |
296 | 1 | value.push_back(static_cast<char>((static_cast<uint32_t>(char1) << 4U) | static_cast<uint32_t>(char2))); |
297 | 1 | return true; |
298 | 3 | } |
299 | | |
300 | | template <typename ALLOC> |
301 | | char BasicJsonDecoder<ALLOC>::decodeHex(char character) |
302 | 9 | { |
303 | 9 | if (character >= '0' && character <= '9'8 ) |
304 | 3 | { |
305 | 3 | return static_cast<char>(character - '0'); |
306 | 3 | } |
307 | 6 | else if (character >= 'a' && character <= 'f'2 ) |
308 | 1 | { |
309 | 1 | return static_cast<char>(character - 'a' + 10); |
310 | 1 | } |
311 | 5 | else if (character >= 'A' && character <= 'F'4 ) |
312 | 1 | { |
313 | 1 | return static_cast<char>(character - 'A' + 10); |
314 | 1 | } |
315 | | |
316 | 4 | return -1; |
317 | 9 | } |
318 | | |
319 | | template <typename ALLOC> |
320 | | size_t BasicJsonDecoder<ALLOC>::checkNumber(StringView input, bool& isDouble, bool& isSigned) |
321 | 8.15k | { |
322 | 8.15k | StringView::const_iterator inputIt = input.begin(); |
323 | 8.15k | bool acceptExpSign = false; |
324 | 8.15k | bool isScientificDouble = false; |
325 | 8.15k | isDouble = false; |
326 | | |
327 | 8.15k | if (*inputIt == '-') // we know that at the beginning is at least one character |
328 | 19 | { |
329 | 19 | ++inputIt; |
330 | 19 | isSigned = true; |
331 | 19 | } |
332 | 8.13k | else |
333 | 8.13k | { |
334 | 8.13k | isSigned = false; |
335 | 8.13k | } |
336 | | |
337 | 56.6k | while (inputIt != input.end()) |
338 | 56.6k | { |
339 | 56.6k | if (acceptExpSign) |
340 | 4.01k | { |
341 | 4.01k | acceptExpSign = false; |
342 | 4.01k | if (*inputIt == '+' || *inputIt == '-'4.01k ) |
343 | 16 | { |
344 | 16 | ++inputIt; |
345 | 16 | continue; |
346 | 16 | } |
347 | 4.01k | } |
348 | | |
349 | 56.5k | if (*inputIt >= '0' && *inputIt <= '9'48.4k ) |
350 | 44.4k | { |
351 | 44.4k | ++inputIt; |
352 | 44.4k | continue; |
353 | 44.4k | } |
354 | | |
355 | 12.1k | if ((*inputIt == 'e' || *inputIt == 'E'8.15k ) && !isScientificDouble4.02k ) |
356 | 4.02k | { |
357 | 4.02k | isDouble = true; |
358 | 4.02k | isScientificDouble = true; |
359 | 4.02k | acceptExpSign = true; |
360 | 4.02k | ++inputIt; |
361 | 4.02k | continue; |
362 | 4.02k | } |
363 | | |
364 | 8.13k | if (*inputIt == '.' && !isDouble14 ) |
365 | 13 | { |
366 | 13 | isDouble = true; |
367 | 13 | ++inputIt; |
368 | 13 | continue; |
369 | 13 | } |
370 | | |
371 | 8.12k | break; // end of a number |
372 | 8.13k | } |
373 | | |
374 | 8.15k | const size_t numberLen = static_cast<size_t>(inputIt - input.begin()); |
375 | 8.15k | if (isSigned && numberLen == 119 ) |
376 | 3 | { |
377 | 3 | return 0; // single minus is not a number |
378 | 3 | } |
379 | | |
380 | 8.15k | return numberLen; |
381 | 8.15k | } |
382 | | |
383 | | template <typename ALLOC> |
384 | | typename BasicJsonDecoder<ALLOC>::DecoderResult BasicJsonDecoder<ALLOC>::decodeNumber(StringView input) |
385 | 8.15k | { |
386 | 8.15k | bool isDouble = false; |
387 | 8.15k | bool isSigned = false; |
388 | 8.15k | const size_t numChars = checkNumber(input, isDouble, isSigned); |
389 | 8.15k | if (numChars == 0) |
390 | 7 | { |
391 | 7 | return DecoderResult(1, get_allocator()); |
392 | 7 | } |
393 | | |
394 | | // for decodeSigned and decodeUnsigned, we know that all numChars will be processed because checkNumber |
395 | | // already checked this |
396 | 8.14k | if (isDouble) |
397 | 4.03k | { |
398 | 4.03k | return decodeDouble(input, numChars); |
399 | 4.03k | } |
400 | 4.11k | else if (isSigned) |
401 | 9 | { |
402 | 9 | return decodeSigned(input); |
403 | 9 | } |
404 | 4.11k | else |
405 | 4.11k | { |
406 | 4.11k | return decodeUnsigned(input); |
407 | 4.11k | } |
408 | 8.14k | } |
409 | | |
410 | | template <typename ALLOC> |
411 | | typename BasicJsonDecoder<ALLOC>::DecoderResult BasicJsonDecoder<ALLOC>::decodeSigned(StringView input) |
412 | 9 | { |
413 | 9 | char* pEnd = nullptr; |
414 | 9 | errno = 0; // no library function sets its value back to zero once changed |
415 | 9 | const int64_t value = std::strtoll(input.begin(), &pEnd, 10); |
416 | | |
417 | 9 | const bool overflow = (errno == ERANGE); |
418 | | |
419 | 9 | return DecoderResult(static_cast<size_t>(pEnd - input.begin()), value, overflow, get_allocator()); |
420 | 9 | } |
421 | | |
422 | | template <typename ALLOC> |
423 | | typename BasicJsonDecoder<ALLOC>::DecoderResult BasicJsonDecoder<ALLOC>::decodeUnsigned(StringView input) |
424 | 4.11k | { |
425 | 4.11k | char* pEnd = nullptr; |
426 | 4.11k | errno = 0; // no library function sets its value back to zero once changed |
427 | 4.11k | const uint64_t value = std::strtoull(input.begin(), &pEnd, 10); |
428 | | |
429 | 4.11k | const bool overflow = (errno == ERANGE); |
430 | | |
431 | 4.11k | return DecoderResult(static_cast<size_t>(pEnd - input.begin()), value, overflow, get_allocator()); |
432 | 4.11k | } |
433 | | |
434 | | template <typename ALLOC> |
435 | | typename BasicJsonDecoder<ALLOC>::DecoderResult BasicJsonDecoder<ALLOC>::decodeDouble( |
436 | | StringView input, size_t numChars) |
437 | 4.03k | { |
438 | 4.03k | char* pEnd = nullptr; |
439 | 4.03k | const double value = std::strtod(input.begin(), &pEnd); |
440 | 4.03k | if (static_cast<size_t>(pEnd - input.begin()) != numChars) |
441 | 8 | { |
442 | 8 | return DecoderResult(numChars, get_allocator()); |
443 | 8 | } |
444 | | |
445 | 4.02k | return DecoderResult(numChars, value, get_allocator()); |
446 | 4.03k | } |
447 | | |
448 | | } // namespace zserio |
449 | | |
450 | | #endif // ZSERIO_JSON_DECODER_H_INC |