Line | Count | Source (jump to first uncovered line) |
1 | | #ifndef ZSERIO_JSON_DECODER_H_INC |
2 | | #define ZSERIO_JSON_DECODER_H_INC |
3 | | |
4 | | #include <cerrno> |
5 | | #include <cmath> |
6 | | #include <cstdlib> |
7 | | #include <cstring> |
8 | | #include <utility> |
9 | | |
10 | | #include "zserio/AllocatorHolder.h" |
11 | | #include "zserio/AnyHolder.h" |
12 | | #include "zserio/CppRuntimeException.h" |
13 | | #include "zserio/String.h" |
14 | | #include "zserio/StringView.h" |
15 | | |
16 | | namespace zserio |
17 | | { |
18 | | |
19 | | /** |
20 | | * JSON value decoder. |
21 | | */ |
22 | | template <typename ALLOC = std::allocator<uint8_t>> |
23 | | class BasicJsonDecoder : public AllocatorHolder<ALLOC> |
24 | | { |
25 | | public: |
26 | | using AllocatorHolder<ALLOC>::get_allocator; |
27 | | |
28 | | /** |
29 | | * Decoder result value. |
30 | | */ |
31 | | struct DecoderResult |
32 | | { |
33 | | /** |
34 | | * Constructor used for decoder failure. |
35 | | * |
36 | | * \param numRead Number of processed characters. |
37 | | * \param allocator Allocator to use. |
38 | | */ |
39 | | DecoderResult(size_t numRead, const ALLOC& allocator) : |
40 | | numReadChars(numRead), |
41 | | value(allocator), |
42 | | integerOverflow(false) |
43 | 136 | {} |
44 | | |
45 | | /** |
46 | | * Constructor for decoder success. |
47 | | * |
48 | | * \param numRead Number of processed characters. |
49 | | * \param decodedValue Value decoded from JSON stream. |
50 | | * \param allocator Allocator to use. |
51 | | */ |
52 | | template <typename T> |
53 | | DecoderResult(size_t numRead, T&& decodedValue, const ALLOC& allocator) : |
54 | | numReadChars(numRead), |
55 | | value(std::forward<T>(decodedValue), allocator), |
56 | | integerOverflow(false) |
57 | 20.3k | {} |
58 | | |
59 | | /** |
60 | | * Constructor used for integer decoder. |
61 | | * |
62 | | * \param numRead Number of processed characters. |
63 | | * \param decodedValue Value decoded from JSON stream. |
64 | | * \param overflow True in case of integer overflow. |
65 | | * \param allocator Allocator to use. |
66 | | */ |
67 | | template <typename T> |
68 | | DecoderResult(size_t numRead, T&& decodedValue, bool overflow, const ALLOC& allocator) : |
69 | | numReadChars(numRead), |
70 | | value(createValue(decodedValue, overflow, allocator)), |
71 | | integerOverflow(overflow) |
72 | 4.11k | {} |
73 | | |
74 | | size_t numReadChars; /**< Number of processed characters. */ |
75 | | AnyHolder<ALLOC> value; /**< Decoded value. Empty on failure. */ |
76 | | bool integerOverflow; /**< True if decoded value was bigger than UINT64_MAX or was not in interval |
77 | | <INT64_MIN, INT64_MAX>. */ |
78 | | |
79 | | private: |
80 | | template <typename T> |
81 | | AnyHolder<ALLOC> createValue(T&& decodedValue, bool overflow, const ALLOC& allocator) |
82 | 4.11k | { |
83 | 4.11k | return overflow ? AnyHolder<ALLOC>(allocator)2 |
84 | 4.11k | : AnyHolder<ALLOC>(std::forward<T>(decodedValue), allocator)4.11k ; |
85 | 4.11k | } |
86 | | }; |
87 | | |
88 | | /** |
89 | | * Empty constructor. |
90 | | */ |
91 | | BasicJsonDecoder() : |
92 | | AllocatorHolder<ALLOC>(ALLOC()) |
93 | 12 | {} |
94 | | |
95 | | /** |
96 | | * Constructor from given allocator. |
97 | | * |
98 | | * \param allocator Allocator to use. |
99 | | */ |
100 | | explicit BasicJsonDecoder(const ALLOC& allocator) : |
101 | | AllocatorHolder<ALLOC>(allocator) |
102 | 93 | {} |
103 | | |
104 | | /** |
105 | | * Decodes the JSON value from the input. |
106 | | * |
107 | | * \param input Input to decode from. |
108 | | * |
109 | | * \return Decoder result. |
110 | | */ |
111 | | DecoderResult decodeValue(StringView input) |
112 | 24.5k | { |
113 | 24.5k | if (input.empty()) |
114 | 1 | return DecoderResult(0, get_allocator()); |
115 | | |
116 | 24.5k | switch (input[0]) |
117 | 24.5k | { |
118 | 9 | case 'n': |
119 | 9 | return decodeLiteral(input, "null"_sv, nullptr); |
120 | 7 | case 't': |
121 | 7 | return decodeLiteral(input, "true"_sv, true); |
122 | 4 | case 'f': |
123 | 4 | return decodeLiteral(input, "false"_sv, false); |
124 | 4 | case 'N': |
125 | 4 | return decodeLiteral(input, "NaN"_sv, static_cast<double>(NAN)); |
126 | 4 | case 'I': |
127 | 4 | return decodeLiteral(input, "Infinity"_sv, static_cast<double>(INFINITY)); |
128 | 16.3k | case '"': |
129 | 16.3k | return decodeString(input); |
130 | 24 | case '-': |
131 | 24 | if (input.size() > 1 && input[1] == 'I'23 ) |
132 | 5 | return decodeLiteral(input, "-Infinity"_sv, -static_cast<double>(INFINITY)); |
133 | 19 | return decodeNumber(input); |
134 | 8.13k | default: |
135 | 8.13k | return decodeNumber(input); |
136 | 24.5k | } |
137 | 24.5k | } |
138 | | |
139 | | private: |
140 | | template <typename T> |
141 | | DecoderResult decodeLiteral(StringView input, StringView literal, T&& value); |
142 | | DecoderResult decodeString(StringView input); |
143 | | static bool decodeUnicodeEscape( |
144 | | StringView input, StringView::const_iterator& inputIt, string<ALLOC>& value); |
145 | | static char decodeHex(char character); |
146 | | size_t checkNumber(StringView input, bool& isDouble, bool& isSigned); |
147 | | DecoderResult decodeNumber(StringView input); |
148 | | DecoderResult decodeSigned(StringView input); |
149 | | DecoderResult decodeUnsigned(StringView input); |
150 | | DecoderResult decodeDouble(StringView input, size_t numChars); |
151 | | }; |
152 | | |
153 | | template <typename ALLOC> |
154 | | template <typename T> |
155 | | typename BasicJsonDecoder<ALLOC>::DecoderResult BasicJsonDecoder<ALLOC>::decodeLiteral( |
156 | | StringView input, StringView literal, T&& value) |
157 | 33 | { |
158 | 33 | StringView::const_iterator literalIt = literal.begin(); |
159 | 33 | StringView::const_iterator inputIt = input.begin(); |
160 | 174 | while (inputIt != input.end() && literalIt != literal.end()162 ) |
161 | 148 | { |
162 | 148 | if (*inputIt++ != *literalIt++) |
163 | 7 | { |
164 | | // failure, not decoded |
165 | 7 | return DecoderResult(static_cast<size_t>(inputIt - input.begin()), get_allocator()); |
166 | 7 | } |
167 | 148 | } |
168 | | |
169 | 26 | if (literalIt != literal.end()) |
170 | 6 | { |
171 | | // short input, not decoded |
172 | 6 | return DecoderResult(input.size(), get_allocator()); |
173 | 6 | } |
174 | | |
175 | | // success |
176 | 20 | return DecoderResult(literal.size(), std::forward<T>(value), get_allocator()); |
177 | 26 | } |
178 | | |
179 | | template <typename ALLOC> |
180 | | typename BasicJsonDecoder<ALLOC>::DecoderResult BasicJsonDecoder<ALLOC>::decodeString(StringView input) |
181 | 16.3k | { |
182 | 16.3k | StringView::const_iterator inputIt = input.begin() + 1; // we know that at the beginning is '"' |
183 | 16.3k | string<ALLOC> value(get_allocator()); |
184 | | |
185 | 83.1k | while (inputIt != input.end()) |
186 | 83.1k | { |
187 | 83.1k | if (*inputIt == '\\') |
188 | 24 | { |
189 | 24 | ++inputIt; |
190 | 24 | if (inputIt == input.end()) |
191 | 1 | { |
192 | | // wrong escape, not decoded |
193 | 1 | return DecoderResult(static_cast<size_t>(inputIt - input.begin()), get_allocator()); |
194 | 1 | } |
195 | | |
196 | 23 | char nextChar = *inputIt; |
197 | 23 | switch (nextChar) |
198 | 23 | { |
199 | 2 | case '\\': |
200 | 4 | case '"': |
201 | 4 | value.push_back(nextChar); |
202 | 4 | ++inputIt; |
203 | 4 | break; |
204 | 1 | case 'b': |
205 | 1 | value.push_back('\b'); |
206 | 1 | ++inputIt; |
207 | 1 | break; |
208 | 1 | case 'f': |
209 | 1 | value.push_back('\f'); |
210 | 1 | ++inputIt; |
211 | 1 | break; |
212 | 2 | case 'n': |
213 | 2 | value.push_back('\n'); |
214 | 2 | ++inputIt; |
215 | 2 | break; |
216 | 1 | case 'r': |
217 | 1 | value.push_back('\r'); |
218 | 1 | ++inputIt; |
219 | 1 | break; |
220 | 2 | case 't': |
221 | 2 | value.push_back('\t'); |
222 | 2 | ++inputIt; |
223 | 2 | break; |
224 | 11 | case 'u': // unicode escape |
225 | 11 | { |
226 | 11 | ++inputIt; |
227 | 11 | if (!decodeUnicodeEscape(input, inputIt, value)) |
228 | 10 | { |
229 | | // unsupported unicode escape, not decoded |
230 | 10 | return DecoderResult(static_cast<size_t>(inputIt - input.begin()), get_allocator()); |
231 | 10 | } |
232 | 1 | break; |
233 | 11 | } |
234 | 1 | default: |
235 | 1 | ++inputIt; |
236 | | // unknown escape, not decoded |
237 | 1 | return DecoderResult(static_cast<size_t>(inputIt - input.begin()), get_allocator()); |
238 | 23 | } |
239 | 23 | } |
240 | 83.0k | else if (*inputIt == '"') |
241 | 16.3k | { |
242 | 16.3k | ++inputIt; |
243 | | // successfully decoded |
244 | 16.3k | return DecoderResult( |
245 | 16.3k | static_cast<size_t>(inputIt - input.begin()), std::move(value), get_allocator()); |
246 | 16.3k | } |
247 | 66.7k | else |
248 | 66.7k | { |
249 | 66.7k | value.push_back(*inputIt++); |
250 | 66.7k | } |
251 | 83.1k | } |
252 | | |
253 | | // unterminated string, not decoded |
254 | 2 | return DecoderResult(input.size(), get_allocator()); |
255 | 16.3k | } |
256 | | |
257 | | template <typename ALLOC> |
258 | | bool BasicJsonDecoder<ALLOC>::decodeUnicodeEscape( |
259 | | StringView input, StringView::const_iterator& inputIt, string<ALLOC>& value) |
260 | 11 | { |
261 | | // TODO[Mi-L@]: Simplified just to decode what zserio encodes, for complex solution we could use |
262 | | // std::wstring_convert but it's deprecated in C++17. |
263 | 11 | if (inputIt == input.end() || *inputIt++ != '0'10 ) |
264 | 2 | return false; |
265 | 9 | if (inputIt == input.end() || *inputIt++ != '0'8 ) |
266 | 2 | return false; |
267 | | |
268 | 7 | if (inputIt == input.end()) |
269 | 1 | return false; |
270 | 6 | const char char1 = decodeHex(*inputIt++); |
271 | 6 | if (char1 == -1) |
272 | 2 | return false; |
273 | | |
274 | 4 | if (inputIt == input.end()) |
275 | 1 | return false; |
276 | 3 | const char char2 = decodeHex(*inputIt++); |
277 | 3 | if (char2 == -1) |
278 | 2 | return false; |
279 | | |
280 | 1 | value.push_back(static_cast<char>((static_cast<uint32_t>(char1) << 4U) | static_cast<uint32_t>(char2))); |
281 | 1 | return true; |
282 | 3 | } |
283 | | |
284 | | template <typename ALLOC> |
285 | | char BasicJsonDecoder<ALLOC>::decodeHex(char character) |
286 | 9 | { |
287 | 9 | if (character >= '0' && character <= '9'8 ) |
288 | 3 | return static_cast<char>(character - '0'); |
289 | 6 | else if (character >= 'a' && character <= 'f'2 ) |
290 | 1 | return static_cast<char>(character - 'a' + 10); |
291 | 5 | else if (character >= 'A' && character <= 'F'4 ) |
292 | 1 | return static_cast<char>(character - 'A' + 10); |
293 | | |
294 | 4 | return -1; |
295 | 9 | } |
296 | | |
297 | | template <typename ALLOC> |
298 | | size_t BasicJsonDecoder<ALLOC>::checkNumber(StringView input, bool& isDouble, bool& isSigned) |
299 | 8.15k | { |
300 | 8.15k | StringView::const_iterator inputIt = input.begin(); |
301 | 8.15k | bool acceptExpSign = false; |
302 | 8.15k | bool isScientificDouble = false; |
303 | 8.15k | isDouble = false; |
304 | | |
305 | 8.15k | if (*inputIt == '-') // we know that at the beginning is at least one character |
306 | 19 | { |
307 | 19 | ++inputIt; |
308 | 19 | isSigned = true; |
309 | 19 | } |
310 | 8.13k | else |
311 | 8.13k | { |
312 | 8.13k | isSigned = false; |
313 | 8.13k | } |
314 | | |
315 | 56.6k | while (inputIt != input.end()) |
316 | 56.6k | { |
317 | 56.6k | if (acceptExpSign) |
318 | 4.01k | { |
319 | 4.01k | acceptExpSign = false; |
320 | 4.01k | if (*inputIt == '+' || *inputIt == '-'4.01k ) |
321 | 16 | { |
322 | 16 | ++inputIt; |
323 | 16 | continue; |
324 | 16 | } |
325 | 4.01k | } |
326 | | |
327 | 56.5k | if (*inputIt >= '0' && *inputIt <= '9'48.4k ) |
328 | 44.4k | { |
329 | 44.4k | ++inputIt; |
330 | 44.4k | continue; |
331 | 44.4k | } |
332 | | |
333 | 12.1k | if ((*inputIt == 'e' || *inputIt == 'E'8.15k ) && !isScientificDouble4.02k ) |
334 | 4.02k | { |
335 | 4.02k | isDouble = true; |
336 | 4.02k | isScientificDouble = true; |
337 | 4.02k | acceptExpSign = true; |
338 | 4.02k | ++inputIt; |
339 | 4.02k | continue; |
340 | 4.02k | } |
341 | | |
342 | 8.13k | if (*inputIt == '.' && !isDouble14 ) |
343 | 13 | { |
344 | 13 | isDouble = true; |
345 | 13 | ++inputIt; |
346 | 13 | continue; |
347 | 13 | } |
348 | | |
349 | 8.12k | break; // end of a number |
350 | 8.13k | } |
351 | | |
352 | 8.15k | const size_t numberLen = static_cast<size_t>(inputIt - input.begin()); |
353 | 8.15k | if (isSigned && numberLen == 119 ) |
354 | 3 | return 0; // single minus is not a number |
355 | | |
356 | 8.15k | return numberLen; |
357 | 8.15k | } |
358 | | |
359 | | template <typename ALLOC> |
360 | | typename BasicJsonDecoder<ALLOC>::DecoderResult BasicJsonDecoder<ALLOC>::decodeNumber(StringView input) |
361 | 8.15k | { |
362 | 8.15k | bool isDouble = false; |
363 | 8.15k | bool isSigned = false; |
364 | 8.15k | const size_t numChars = checkNumber(input, isDouble, isSigned); |
365 | 8.15k | if (numChars == 0) |
366 | 7 | return DecoderResult(1, get_allocator()); |
367 | | |
368 | | // for decodeSigned and decodeUnsigned, we know that all numChars will be processed because checkNumber |
369 | | // already checked this |
370 | 8.14k | if (isDouble) |
371 | 4.03k | return decodeDouble(input, numChars); |
372 | 4.11k | else if (isSigned) |
373 | 9 | return decodeSigned(input); |
374 | 4.11k | else |
375 | 4.11k | return decodeUnsigned(input); |
376 | 8.14k | } |
377 | | |
378 | | template <typename ALLOC> |
379 | | typename BasicJsonDecoder<ALLOC>::DecoderResult BasicJsonDecoder<ALLOC>::decodeSigned(StringView input) |
380 | 9 | { |
381 | 9 | char* pEnd = nullptr; |
382 | 9 | errno = 0; // no library function sets its value back to zero once changed |
383 | 9 | const int64_t value = std::strtoll(input.begin(), &pEnd, 10); |
384 | | |
385 | 9 | const bool overflow = (errno == ERANGE); |
386 | | |
387 | 9 | return DecoderResult(static_cast<size_t>(pEnd - input.begin()), value, overflow, get_allocator()); |
388 | 9 | } |
389 | | |
390 | | template <typename ALLOC> |
391 | | typename BasicJsonDecoder<ALLOC>::DecoderResult BasicJsonDecoder<ALLOC>::decodeUnsigned(StringView input) |
392 | 4.11k | { |
393 | 4.11k | char* pEnd = nullptr; |
394 | 4.11k | errno = 0; // no library function sets its value back to zero once changed |
395 | 4.11k | const uint64_t value = std::strtoull(input.begin(), &pEnd, 10); |
396 | | |
397 | 4.11k | const bool overflow = (errno == ERANGE); |
398 | | |
399 | 4.11k | return DecoderResult(static_cast<size_t>(pEnd - input.begin()), value, overflow, get_allocator()); |
400 | 4.11k | } |
401 | | |
402 | | template <typename ALLOC> |
403 | | typename BasicJsonDecoder<ALLOC>::DecoderResult BasicJsonDecoder<ALLOC>::decodeDouble( |
404 | | StringView input, size_t numChars) |
405 | 4.03k | { |
406 | 4.03k | char* pEnd = nullptr; |
407 | 4.03k | const double value = std::strtod(input.begin(), &pEnd); |
408 | 4.03k | if (static_cast<size_t>(pEnd - input.begin()) != numChars) |
409 | 8 | return DecoderResult(numChars, get_allocator()); |
410 | | |
411 | 4.02k | return DecoderResult(numChars, value, get_allocator()); |
412 | 4.03k | } |
413 | | |
414 | | } // namespace zserio |
415 | | |
416 | | #endif // ZSERIO_JSON_DECODER_H_INC |