Coverage Report

Created: 2024-04-30 09:35

src/zserio/JsonDecoder.h
Line
Count
Source (jump to first uncovered line)
1
#ifndef ZSERIO_JSON_DECODER_H_INC
2
#define ZSERIO_JSON_DECODER_H_INC
3
4
#include <cerrno>
5
#include <cmath>
6
#include <cstdlib>
7
#include <cstring>
8
#include <utility>
9
10
#include "zserio/AllocatorHolder.h"
11
#include "zserio/AnyHolder.h"
12
#include "zserio/CppRuntimeException.h"
13
#include "zserio/String.h"
14
#include "zserio/StringView.h"
15
16
namespace zserio
17
{
18
19
/**
20
 * JSON value decoder.
21
 */
22
template <typename ALLOC = std::allocator<uint8_t>>
23
class BasicJsonDecoder : public AllocatorHolder<ALLOC>
24
{
25
public:
26
    using AllocatorHolder<ALLOC>::get_allocator;
27
28
    /**
29
     * Decoder result value.
30
     */
31
    struct DecoderResult
32
    {
33
        /**
34
         * Constructor used for decoder failure.
35
         *
36
         * \param numRead Number of processed characters.
37
         * \param allocator Allocator to use.
38
         */
39
        DecoderResult(size_t numRead, const ALLOC& allocator) :
40
                numReadChars(numRead),
41
                value(allocator),
42
                integerOverflow(false)
43
136
        {}
44
45
        /**
46
         * Constructor for decoder success.
47
         *
48
         * \param numRead Number of processed characters.
49
         * \param decodedValue Value decoded from JSON stream.
50
         * \param allocator Allocator to use.
51
         */
52
        template <typename T>
53
        DecoderResult(size_t numRead, T&& decodedValue, const ALLOC& allocator) :
54
                numReadChars(numRead),
55
                value(std::forward<T>(decodedValue), allocator),
56
                integerOverflow(false)
57
20.3k
        {}
58
59
        /**
60
         * Constructor used for integer decoder.
61
         *
62
         * \param numRead Number of processed characters.
63
         * \param decodedValue Value decoded from JSON stream.
64
         * \param overflow True in case of integer overflow.
65
         * \param allocator Allocator to use.
66
         */
67
        template <typename T>
68
        DecoderResult(size_t numRead, T&& decodedValue, bool overflow, const ALLOC& allocator) :
69
                numReadChars(numRead),
70
                value(createValue(decodedValue, overflow, allocator)),
71
                integerOverflow(overflow)
72
4.11k
        {}
73
74
        size_t numReadChars; /**< Number of processed characters. */
75
        AnyHolder<ALLOC> value; /**< Decoded value. Empty on failure. */
76
        bool integerOverflow; /**< True if decoded value was bigger than UINT64_MAX or was not in interval
77
                                   <INT64_MIN, INT64_MAX>. */
78
79
    private:
80
        template <typename T>
81
        AnyHolder<ALLOC> createValue(T&& decodedValue, bool overflow, const ALLOC& allocator)
82
4.11k
        {
83
4.11k
            return overflow ? 
AnyHolder<ALLOC>(allocator)2
84
4.11k
                            : 
AnyHolder<ALLOC>(std::forward<T>(decodedValue), allocator)4.11k
;
85
4.11k
        }
86
    };
87
88
    /**
89
     * Empty constructor.
90
     */
91
    BasicJsonDecoder() :
92
            AllocatorHolder<ALLOC>(ALLOC())
93
12
    {}
94
95
    /**
96
     * Constructor from given allocator.
97
     *
98
     * \param allocator Allocator to use.
99
     */
100
    explicit BasicJsonDecoder(const ALLOC& allocator) :
101
            AllocatorHolder<ALLOC>(allocator)
102
93
    {}
103
104
    /**
105
     * Decodes the JSON value from the input.
106
     *
107
     * \param input Input to decode from.
108
     *
109
     * \return Decoder result.
110
     */
111
    DecoderResult decodeValue(StringView input)
112
24.5k
    {
113
24.5k
        if (input.empty())
114
1
            return DecoderResult(0, get_allocator());
115
116
24.5k
        switch (input[0])
117
24.5k
        {
118
9
        case 'n':
119
9
            return decodeLiteral(input, "null"_sv, nullptr);
120
7
        case 't':
121
7
            return decodeLiteral(input, "true"_sv, true);
122
4
        case 'f':
123
4
            return decodeLiteral(input, "false"_sv, false);
124
4
        case 'N':
125
4
            return decodeLiteral(input, "NaN"_sv, static_cast<double>(NAN));
126
4
        case 'I':
127
4
            return decodeLiteral(input, "Infinity"_sv, static_cast<double>(INFINITY));
128
16.3k
        case '"':
129
16.3k
            return decodeString(input);
130
24
        case '-':
131
24
            if (input.size() > 1 && 
input[1] == 'I'23
)
132
5
                return decodeLiteral(input, "-Infinity"_sv, -static_cast<double>(INFINITY));
133
19
            return decodeNumber(input);
134
8.13k
        default:
135
8.13k
            return decodeNumber(input);
136
24.5k
        }
137
24.5k
    }
138
139
private:
140
    template <typename T>
141
    DecoderResult decodeLiteral(StringView input, StringView literal, T&& value);
142
    DecoderResult decodeString(StringView input);
143
    static bool decodeUnicodeEscape(
144
            StringView input, StringView::const_iterator& inputIt, string<ALLOC>& value);
145
    static char decodeHex(char character);
146
    size_t checkNumber(StringView input, bool& isDouble, bool& isSigned);
147
    DecoderResult decodeNumber(StringView input);
148
    DecoderResult decodeSigned(StringView input);
149
    DecoderResult decodeUnsigned(StringView input);
150
    DecoderResult decodeDouble(StringView input, size_t numChars);
151
};
152
153
template <typename ALLOC>
154
template <typename T>
155
typename BasicJsonDecoder<ALLOC>::DecoderResult BasicJsonDecoder<ALLOC>::decodeLiteral(
156
        StringView input, StringView literal, T&& value)
157
33
{
158
33
    StringView::const_iterator literalIt = literal.begin();
159
33
    StringView::const_iterator inputIt = input.begin();
160
174
    while (inputIt != input.end() && 
literalIt != literal.end()162
)
161
148
    {
162
148
        if (*inputIt++ != *literalIt++)
163
7
        {
164
            // failure, not decoded
165
7
            return DecoderResult(static_cast<size_t>(inputIt - input.begin()), get_allocator());
166
7
        }
167
148
    }
168
169
26
    if (literalIt != literal.end())
170
6
    {
171
        // short input, not decoded
172
6
        return DecoderResult(input.size(), get_allocator());
173
6
    }
174
175
    // success
176
20
    return DecoderResult(literal.size(), std::forward<T>(value), get_allocator());
177
26
}
178
179
template <typename ALLOC>
180
typename BasicJsonDecoder<ALLOC>::DecoderResult BasicJsonDecoder<ALLOC>::decodeString(StringView input)
181
16.3k
{
182
16.3k
    StringView::const_iterator inputIt = input.begin() + 1; // we know that at the beginning is '"'
183
16.3k
    string<ALLOC> value(get_allocator());
184
185
83.1k
    while (inputIt != input.end())
186
83.1k
    {
187
83.1k
        if (*inputIt == '\\')
188
24
        {
189
24
            ++inputIt;
190
24
            if (inputIt == input.end())
191
1
            {
192
                // wrong escape, not decoded
193
1
                return DecoderResult(static_cast<size_t>(inputIt - input.begin()), get_allocator());
194
1
            }
195
196
23
            char nextChar = *inputIt;
197
23
            switch (nextChar)
198
23
            {
199
2
            case '\\':
200
4
            case '"':
201
4
                value.push_back(nextChar);
202
4
                ++inputIt;
203
4
                break;
204
1
            case 'b':
205
1
                value.push_back('\b');
206
1
                ++inputIt;
207
1
                break;
208
1
            case 'f':
209
1
                value.push_back('\f');
210
1
                ++inputIt;
211
1
                break;
212
2
            case 'n':
213
2
                value.push_back('\n');
214
2
                ++inputIt;
215
2
                break;
216
1
            case 'r':
217
1
                value.push_back('\r');
218
1
                ++inputIt;
219
1
                break;
220
2
            case 't':
221
2
                value.push_back('\t');
222
2
                ++inputIt;
223
2
                break;
224
11
            case 'u': // unicode escape
225
11
                {
226
11
                    ++inputIt;
227
11
                    if (!decodeUnicodeEscape(input, inputIt, value))
228
10
                    {
229
                        // unsupported unicode escape, not decoded
230
10
                        return DecoderResult(static_cast<size_t>(inputIt - input.begin()), get_allocator());
231
10
                    }
232
1
                    break;
233
11
                }
234
1
            default:
235
1
                ++inputIt;
236
                // unknown escape, not decoded
237
1
                return DecoderResult(static_cast<size_t>(inputIt - input.begin()), get_allocator());
238
23
            }
239
23
        }
240
83.0k
        else if (*inputIt == '"')
241
16.3k
        {
242
16.3k
            ++inputIt;
243
            // successfully decoded
244
16.3k
            return DecoderResult(
245
16.3k
                    static_cast<size_t>(inputIt - input.begin()), std::move(value), get_allocator());
246
16.3k
        }
247
66.7k
        else
248
66.7k
        {
249
66.7k
            value.push_back(*inputIt++);
250
66.7k
        }
251
83.1k
    }
252
253
    // unterminated string, not decoded
254
2
    return DecoderResult(input.size(), get_allocator());
255
16.3k
}
256
257
template <typename ALLOC>
258
bool BasicJsonDecoder<ALLOC>::decodeUnicodeEscape(
259
        StringView input, StringView::const_iterator& inputIt, string<ALLOC>& value)
260
11
{
261
    // TODO[Mi-L@]: Simplified just to decode what zserio encodes, for complex solution we could use
262
    //              std::wstring_convert but it's deprecated in C++17.
263
11
    if (inputIt == input.end() || 
*inputIt++ != '0'10
)
264
2
        return false;
265
9
    if (inputIt == input.end() || 
*inputIt++ != '0'8
)
266
2
        return false;
267
268
7
    if (inputIt == input.end())
269
1
        return false;
270
6
    const char char1 = decodeHex(*inputIt++);
271
6
    if (char1 == -1)
272
2
        return false;
273
274
4
    if (inputIt == input.end())
275
1
        return false;
276
3
    const char char2 = decodeHex(*inputIt++);
277
3
    if (char2 == -1)
278
2
        return false;
279
280
1
    value.push_back(static_cast<char>((static_cast<uint32_t>(char1) << 4U) | static_cast<uint32_t>(char2)));
281
1
    return true;
282
3
}
283
284
template <typename ALLOC>
285
char BasicJsonDecoder<ALLOC>::decodeHex(char character)
286
9
{
287
9
    if (character >= '0' && 
character <= '9'8
)
288
3
        return static_cast<char>(character - '0');
289
6
    else if (character >= 'a' && 
character <= 'f'2
)
290
1
        return static_cast<char>(character - 'a' + 10);
291
5
    else if (character >= 'A' && 
character <= 'F'4
)
292
1
        return static_cast<char>(character - 'A' + 10);
293
294
4
    return -1;
295
9
}
296
297
template <typename ALLOC>
298
size_t BasicJsonDecoder<ALLOC>::checkNumber(StringView input, bool& isDouble, bool& isSigned)
299
8.15k
{
300
8.15k
    StringView::const_iterator inputIt = input.begin();
301
8.15k
    bool acceptExpSign = false;
302
8.15k
    bool isScientificDouble = false;
303
8.15k
    isDouble = false;
304
305
8.15k
    if (*inputIt == '-') // we know that at the beginning is at least one character
306
19
    {
307
19
        ++inputIt;
308
19
        isSigned = true;
309
19
    }
310
8.13k
    else
311
8.13k
    {
312
8.13k
        isSigned = false;
313
8.13k
    }
314
315
56.6k
    while (inputIt != input.end())
316
56.6k
    {
317
56.6k
        if (acceptExpSign)
318
4.01k
        {
319
4.01k
            acceptExpSign = false;
320
4.01k
            if (*inputIt == '+' || 
*inputIt == '-'4.01k
)
321
16
            {
322
16
                ++inputIt;
323
16
                continue;
324
16
            }
325
4.01k
        }
326
327
56.5k
        if (*inputIt >= '0' && 
*inputIt <= '9'48.4k
)
328
44.4k
        {
329
44.4k
            ++inputIt;
330
44.4k
            continue;
331
44.4k
        }
332
333
12.1k
        if ((*inputIt == 'e' || 
*inputIt == 'E'8.15k
) &&
!isScientificDouble4.02k
)
334
4.02k
        {
335
4.02k
            isDouble = true;
336
4.02k
            isScientificDouble = true;
337
4.02k
            acceptExpSign = true;
338
4.02k
            ++inputIt;
339
4.02k
            continue;
340
4.02k
        }
341
342
8.13k
        if (*inputIt == '.' && 
!isDouble14
)
343
13
        {
344
13
            isDouble = true;
345
13
            ++inputIt;
346
13
            continue;
347
13
        }
348
349
8.12k
        break; // end of a number
350
8.13k
    }
351
352
8.15k
    const size_t numberLen = static_cast<size_t>(inputIt - input.begin());
353
8.15k
    if (isSigned && 
numberLen == 119
)
354
3
        return 0; // single minus is not a number
355
356
8.15k
    return numberLen;
357
8.15k
}
358
359
template <typename ALLOC>
360
typename BasicJsonDecoder<ALLOC>::DecoderResult BasicJsonDecoder<ALLOC>::decodeNumber(StringView input)
361
8.15k
{
362
8.15k
    bool isDouble = false;
363
8.15k
    bool isSigned = false;
364
8.15k
    const size_t numChars = checkNumber(input, isDouble, isSigned);
365
8.15k
    if (numChars == 0)
366
7
        return DecoderResult(1, get_allocator());
367
368
    // for decodeSigned and decodeUnsigned, we know that all numChars will be processed because checkNumber
369
    // already checked this
370
8.14k
    if (isDouble)
371
4.03k
        return decodeDouble(input, numChars);
372
4.11k
    else if (isSigned)
373
9
        return decodeSigned(input);
374
4.11k
    else
375
4.11k
        return decodeUnsigned(input);
376
8.14k
}
377
378
template <typename ALLOC>
379
typename BasicJsonDecoder<ALLOC>::DecoderResult BasicJsonDecoder<ALLOC>::decodeSigned(StringView input)
380
9
{
381
9
    char* pEnd = nullptr;
382
9
    errno = 0; // no library function sets its value back to zero once changed
383
9
    const int64_t value = std::strtoll(input.begin(), &pEnd, 10);
384
385
9
    const bool overflow = (errno == ERANGE);
386
387
9
    return DecoderResult(static_cast<size_t>(pEnd - input.begin()), value, overflow, get_allocator());
388
9
}
389
390
template <typename ALLOC>
391
typename BasicJsonDecoder<ALLOC>::DecoderResult BasicJsonDecoder<ALLOC>::decodeUnsigned(StringView input)
392
4.11k
{
393
4.11k
    char* pEnd = nullptr;
394
4.11k
    errno = 0; // no library function sets its value back to zero once changed
395
4.11k
    const uint64_t value = std::strtoull(input.begin(), &pEnd, 10);
396
397
4.11k
    const bool overflow = (errno == ERANGE);
398
399
4.11k
    return DecoderResult(static_cast<size_t>(pEnd - input.begin()), value, overflow, get_allocator());
400
4.11k
}
401
402
template <typename ALLOC>
403
typename BasicJsonDecoder<ALLOC>::DecoderResult BasicJsonDecoder<ALLOC>::decodeDouble(
404
        StringView input, size_t numChars)
405
4.03k
{
406
4.03k
    char* pEnd = nullptr;
407
4.03k
    const double value = std::strtod(input.begin(), &pEnd);
408
4.03k
    if (static_cast<size_t>(pEnd - input.begin()) != numChars)
409
8
        return DecoderResult(numChars, get_allocator());
410
411
4.02k
    return DecoderResult(numChars, value, get_allocator());
412
4.03k
}
413
414
} // namespace zserio
415
416
#endif // ZSERIO_JSON_DECODER_H_INC