Coverage Report

Created: 2024-07-18 11:41

src/zserio/JsonTokenizer.h
Line
Count
Source (jump to first uncovered line)
1
#ifndef ZSERIO_JSON_TOKENIZER_H_INC
2
#define ZSERIO_JSON_TOKENIZER_H_INC
3
4
#include <array>
5
#include <istream>
6
#include <memory>
7
8
#include "zserio/AnyHolder.h"
9
#include "zserio/CppRuntimeException.h"
10
#include "zserio/JsonDecoder.h"
11
#include "zserio/Types.h"
12
13
namespace zserio
14
{
15
16
/**
17
 * Tokens used by Json Tokenizer.
18
 */
19
enum class JsonToken : int8_t
20
{
21
    UNKNOWN = -1,
22
    BEGIN_OF_FILE,
23
    END_OF_FILE,
24
    BEGIN_OBJECT,
25
    END_OBJECT,
26
    BEGIN_ARRAY,
27
    END_ARRAY,
28
    KEY_SEPARATOR,
29
    ITEM_SEPARATOR,
30
    VALUE
31
};
32
33
/**
34
 * Exception used to distinguish exceptions from the JsonParser.
35
 */
36
class JsonParserException : public CppRuntimeException
37
{
38
public:
39
    using CppRuntimeException::CppRuntimeException;
40
};
41
42
/**
43
 * Allows to append JsonToken to CppRuntimeException.
44
 *
45
 * \param exception Exception to modify.
46
 * \param token JSON Token to append.
47
 *
48
 * \return Reference to the exception to allow operator chaining.
49
 */
50
CppRuntimeException& operator<<(CppRuntimeException& exception, JsonToken token);
51
52
/**
53
 * Json Tokenizer used by Json Parser.
54
 */
55
template <typename ALLOC = std::allocator<uint8_t>>
56
class BasicJsonTokenizer
57
{
58
public:
59
    /**
60
     * Constructor.
61
     *
62
     * \param in Input stream to tokenize.
63
     * \param allocator Allocator to use.
64
     */
65
    BasicJsonTokenizer(std::istream& in, const ALLOC& allocator) :
66
            m_buffer(),
67
            m_in(in),
68
            m_decoder(allocator),
69
            m_decoderResult(0, allocator),
70
            m_content(readContent(allocator)),
71
            m_value(allocator)
72
93
    {
73
93
        m_token = m_content.empty() ? 
JsonToken::END_OF_FILE1
:
JsonToken::BEGIN_OF_FILE92
;
74
93
    }
75
76
    /**
77
     * Move to the next token.
78
     *
79
     * \return Next token.
80
     * \throw JsonParserException In case that tokenizing fails - i.e. unknown token is reached.
81
     */
82
    JsonToken next();
83
84
    /**
85
     * Gets current token.
86
     *
87
     * \return Current token.
88
     */
89
    JsonToken getToken() const
90
1.87k
    {
91
1.87k
        return m_token;
92
1.87k
    }
93
94
    /**
95
     * Gets current value.
96
     *
97
     * Any holder can be either unset - i.e. beginning or end of the input,
98
     * or it can hold one of the types defined in IObserver::visitValue.
99
     *
100
     * \return Current value as an AnyHolder.
101
     */
102
    const AnyHolder<ALLOC>& getValue() const
103
48.4k
    {
104
48.4k
        return m_value;
105
48.4k
    }
106
107
    /**
108
     * Gets line number of the current token.
109
     *
110
     * \return Line number.
111
     */
112
    size_t getLine() const
113
48.0k
    {
114
48.0k
        return m_lineNumber;
115
48.0k
    }
116
117
    /**
118
     * Gets column number of the current token.
119
     *
120
     * \return Column number.
121
     */
122
    size_t getColumn() const
123
48.0k
    {
124
48.0k
        return m_tokenColumnNumber;
125
48.0k
    }
126
127
private:
128
    string<ALLOC> readContent(const ALLOC& allocator);
129
130
    bool decodeNext();
131
    bool skipWhitespaces();
132
133
    template <typename T>
134
    void setToken(JsonToken token, T&& value);
135
    void setToken(JsonToken token, AnyHolder<ALLOC>&& value);
136
    void setToken(JsonToken token);
137
    void setPosition(size_t newPos, size_t newColumnNumber);
138
    void setTokenValue();
139
140
    static constexpr size_t BUFFER_SIZE = 64 * 1024;
141
    std::array<char, BUFFER_SIZE> m_buffer;
142
143
    std::istream& m_in;
144
    BasicJsonDecoder<ALLOC> m_decoder;
145
    typename BasicJsonDecoder<ALLOC>::DecoderResult m_decoderResult;
146
    string<ALLOC> m_content;
147
    size_t m_lineNumber = 1;
148
    size_t m_columnNumber = 1;
149
    size_t m_tokenColumnNumber = 1;
150
    size_t m_pos = 0;
151
    JsonToken m_token;
152
    AnyHolder<ALLOC> m_value;
153
};
154
155
template <typename ALLOC>
156
JsonToken BasicJsonTokenizer<ALLOC>::next()
157
49.1k
{
158
49.2k
    while (!decodeNext())
159
60
    {
160
60
        string<ALLOC> newContent = readContent(m_content.get_allocator());
161
60
        if (newContent.empty())
162
57
        {
163
57
            if (m_token == JsonToken::END_OF_FILE)
164
52
            {
165
52
                m_tokenColumnNumber = m_columnNumber;
166
52
            }
167
5
            else
168
5
            {
169
                // stream is finished but last token is not EOF => value must be at the end
170
5
                setTokenValue();
171
5
            }
172
173
57
            return m_token;
174
57
        }
175
176
3
        m_content = m_content.substr(m_pos) + newContent;
177
3
        m_pos = 0;
178
3
    }
179
180
49.1k
    return m_token;
181
49.1k
}
182
183
template <typename ALLOC>
184
string<ALLOC> BasicJsonTokenizer<ALLOC>::readContent(const ALLOC& allocator)
185
153
{
186
153
    const size_t count = static_cast<size_t>(m_in.rdbuf()->sgetn(m_buffer.data(), BUFFER_SIZE));
187
153
    return string<ALLOC>(m_buffer.data(), count, allocator);
188
153
}
189
190
template <typename ALLOC>
191
bool BasicJsonTokenizer<ALLOC>::decodeNext()
192
49.2k
{
193
49.2k
    if (!skipWhitespaces())
194
52
    {
195
52
        return false;
196
52
    }
197
198
49.1k
    m_tokenColumnNumber = m_columnNumber;
199
200
49.1k
    const char nextChar = m_content[m_pos];
201
49.1k
    switch (nextChar)
202
49.1k
    {
203
164
    case '{':
204
164
        setToken(JsonToken::BEGIN_OBJECT, nextChar);
205
164
        setPosition(m_pos + 1, m_columnNumber + 1);
206
164
        break;
207
96
    case '}':
208
96
        setToken(JsonToken::END_OBJECT, nextChar);
209
96
        setPosition(m_pos + 1, m_columnNumber + 1);
210
96
        break;
211
44
    case '[':
212
44
        setToken(JsonToken::BEGIN_ARRAY, nextChar);
213
44
        setPosition(m_pos + 1, m_columnNumber + 1);
214
44
        break;
215
38
    case ']':
216
38
        setToken(JsonToken::END_ARRAY, nextChar);
217
38
        setPosition(m_pos + 1, m_columnNumber + 1);
218
38
        break;
219
12.2k
    case ':':
220
12.2k
        setToken(JsonToken::KEY_SEPARATOR, nextChar);
221
12.2k
        setPosition(m_pos + 1, m_columnNumber + 1);
222
12.2k
        break;
223
12.1k
    case ',':
224
12.1k
        setToken(JsonToken::ITEM_SEPARATOR, nextChar);
225
12.1k
        setPosition(m_pos + 1, m_columnNumber + 1);
226
12.1k
        break;
227
24.4k
    default:
228
24.4k
        m_decoderResult = m_decoder.decodeValue(StringView(m_content.data()).substr(m_pos));
229
24.4k
        if (m_pos + m_decoderResult.numReadChars >= m_content.size())
230
8
        {
231
8
            return false; // we are at the end of chunk => read more
232
8
        }
233
234
24.4k
        setTokenValue();
235
24.4k
        break;
236
49.1k
    }
237
238
49.1k
    return true;
239
49.1k
}
240
241
template <typename ALLOC>
242
bool BasicJsonTokenizer<ALLOC>::skipWhitespaces()
243
49.2k
{
244
125k
    while (true)
245
125k
    {
246
125k
        if (m_pos >= m_content.size())
247
51
        {
248
51
            setToken(JsonToken::END_OF_FILE);
249
51
            return false;
250
51
        }
251
252
125k
        const char nextChar = m_content[m_pos];
253
125k
        switch (nextChar)
254
125k
        {
255
63.4k
        case ' ':
256
63.4k
        case '\t':
257
63.4k
            setPosition(m_pos + 1, m_columnNumber + 1);
258
63.4k
            break;
259
12.4k
        case '\n':
260
12.4k
            m_lineNumber++;
261
12.4k
            setPosition(m_pos + 1, 1);
262
12.4k
            break;
263
3
        case '\r':
264
3
            if (m_pos + 1 >= m_content.size())
265
1
            {
266
1
                setToken(JsonToken::END_OF_FILE);
267
1
                return false;
268
1
            }
269
2
            m_lineNumber++;
270
2
            setPosition(m_pos + (m_content[m_pos + 1] == '\n' ? 
21
:
11
), 1);
271
2
            break;
272
49.1k
        default:
273
49.1k
            return true;
274
125k
        }
275
125k
    }
276
49.2k
}
277
278
template <typename ALLOC>
279
template <typename T>
280
void BasicJsonTokenizer<ALLOC>::setToken(JsonToken token, T&& value)
281
24.6k
{
282
24.6k
    m_token = token;
283
24.6k
    m_value.set(std::forward<T>(value));
284
24.6k
}
285
286
template <typename ALLOC>
287
void BasicJsonTokenizer<ALLOC>::setToken(JsonToken token, AnyHolder<ALLOC>&& value)
288
24.4k
{
289
24.4k
    m_token = token;
290
24.4k
    m_value = std::move(value);
291
24.4k
}
292
293
template <typename ALLOC>
294
void BasicJsonTokenizer<ALLOC>::setToken(JsonToken token)
295
52
{
296
52
    m_token = token;
297
52
    m_value.reset();
298
52
}
299
300
template <typename ALLOC>
301
void BasicJsonTokenizer<ALLOC>::setPosition(size_t newPos, size_t newColumnNumber)
302
125k
{
303
125k
    m_pos = newPos;
304
125k
    m_columnNumber = newColumnNumber;
305
125k
}
306
307
template <typename ALLOC>
308
void BasicJsonTokenizer<ALLOC>::setTokenValue()
309
24.4k
{
310
24.4k
    if (!m_decoderResult.value.hasValue())
311
2
    {
312
2
        throw JsonParserException("JsonTokenizer:")
313
2
                << m_lineNumber << ":" << m_tokenColumnNumber << ": "
314
2
                << (m_decoderResult.integerOverflow ? 
"Value is outside of the 64-bit integer range!"1
315
2
                                                    : 
"Unknown token!"1
);
316
2
    }
317
318
24.4k
    setToken(JsonToken::VALUE, std::move(m_decoderResult.value));
319
24.4k
    setPosition(m_pos + m_decoderResult.numReadChars, m_columnNumber + m_decoderResult.numReadChars);
320
24.4k
}
321
322
} // namespace zserio
323
324
#endif // ZSERIO_JSON_TOKENIZER_H_INC