GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: src/zserio/JsonTokenizer.h Lines: 98 98 100.0 %
Date: 2023-12-13 14:51:09 Branches: 93 178 52.2 %

Line Branch Exec Source
1
#ifndef ZSERIO_JSON_TOKENIZER_H_INC
2
#define ZSERIO_JSON_TOKENIZER_H_INC
3
4
#include <memory>
5
#include <istream>
6
#include <array>
7
8
#include "zserio/AnyHolder.h"
9
#include "zserio/CppRuntimeException.h"
10
#include "zserio/JsonDecoder.h"
11
#include "zserio/Types.h"
12
13
namespace zserio
14
{
15
16
/**
17
 * Tokens used by Json Tokenizer.
18
 */
19
enum class JsonToken : int8_t
20
{
21
    UNKNOWN = -1,
22
    BEGIN_OF_FILE,
23
    END_OF_FILE,
24
    BEGIN_OBJECT,
25
    END_OBJECT,
26
    BEGIN_ARRAY,
27
    END_ARRAY,
28
    KEY_SEPARATOR,
29
    ITEM_SEPARATOR,
30
    VALUE
31
};
32
33
34
/**
35
 * Exception used to distinguish exceptions from the JsonParser.
36
 */
37
47
class JsonParserException : public CppRuntimeException
38
{
39
public:
40
9
    using CppRuntimeException::CppRuntimeException;
41
};
42
43
/**
44
 * Allows to append JsonToken to CppRuntimeException.
45
 *
46
 * \param exception Exception to modify.
47
 * \param token JSON Token to append.
48
 *
49
 * \return Reference to the exception to allow operator chaining.
50
 */
51
CppRuntimeException& operator<<(CppRuntimeException& exception, JsonToken token);
52
53
/**
54
 * Json Tokenizer used by Json Parser.
55
 */
56
template <typename ALLOC = std::allocator<uint8_t>>
57
91
class BasicJsonTokenizer
58
{
59
public:
60
    /**
61
     * Constructor.
62
     *
63
     * \param in Input stream to tokenize.
64
     * \param allocator Allocator to use.
65
     */
66
91
    BasicJsonTokenizer(std::istream& in, const ALLOC& allocator) :
67
            m_buffer(), m_in(in), m_decoder(allocator), m_decoderResult(0, allocator),
68


91
            m_content(readContent(allocator)), m_value(allocator)
69
    {
70

91
        m_token = m_content.empty() ? JsonToken::END_OF_FILE : JsonToken::BEGIN_OF_FILE;
71
91
    }
72
73
    /**
74
     * Move to the next token.
75
     *
76
     * \return Next token.
77
     * \throw JsonParserException In case that tokenizing fails - i.e. unknown token is reached.
78
     */
79
    JsonToken next();
80
81
    /**
82
     * Gets current token.
83
     *
84
     * \return Current token.
85
     */
86
1741
    JsonToken getToken() const { return m_token; }
87
88
    /**
89
     * Gets current value.
90
     *
91
     * Any holder can be either unset - i.e. beginning or end of the input,
92
     * or it can hold one of the types defined in IObserver::visitValue.
93
     *
94
     * \return Current value as an AnyHolder.
95
     */
96
48425
    const AnyHolder<ALLOC>& getValue() const { return m_value; }
97
98
    /**
99
     * Gets line number of the current token.
100
     *
101
     * \return Line number.
102
     */
103
48056
    size_t getLine() const { return m_lineNumber; }
104
105
    /**
106
     * Gets column number of the current token.
107
     *
108
     * \return Column number.
109
     */
110
48056
    size_t getColumn() const { return m_tokenColumnNumber; }
111
112
private:
113
    string<ALLOC> readContent(const ALLOC& allocator);
114
115
    bool decodeNext();
116
    bool skipWhitespaces();
117
118
    template <typename T>
119
    void setToken(JsonToken token, T&& value);
120
    void setToken(JsonToken token, AnyHolder<ALLOC>&& value);
121
    void setToken(JsonToken token);
122
    void setPosition(size_t newPos, size_t newColumnNumber);
123
    void setTokenValue();
124
125
    static constexpr size_t BUFFER_SIZE = 64 * 1024;
126
    std::array<char, BUFFER_SIZE> m_buffer;
127
128
    std::istream& m_in;
129
    BasicJsonDecoder<ALLOC> m_decoder;
130
    typename BasicJsonDecoder<ALLOC>::DecoderResult m_decoderResult;
131
    string<ALLOC> m_content;
132
    size_t m_lineNumber = 1;
133
    size_t m_columnNumber = 1;
134
    size_t m_tokenColumnNumber = 1;
135
    size_t m_pos = 0;
136
    JsonToken m_token;
137
    AnyHolder<ALLOC> m_value;
138
};
139
140
template <typename ALLOC>
141
49103
JsonToken BasicJsonTokenizer<ALLOC>::next()
142
{
143

49106
    while (!decodeNext())
144
    {
145

61
        string<ALLOC> newContent = readContent(m_content.get_allocator());
146

58
        if (newContent.empty())
147
        {
148

55
            if (m_token == JsonToken::END_OF_FILE)
149
            {
150
50
                m_tokenColumnNumber = m_columnNumber;
151
            }
152
            else
153
            {
154
                // stream is finished but last token is not EOF => value must be at the end
155

5
                setTokenValue();
156
            }
157
158
55
            return m_token;
159
        }
160
161



3
        m_content = m_content.substr(m_pos) + newContent;
162

3
        m_pos = 0;
163
    }
164
165
49043
    return m_token;
166
}
167
168
template <typename ALLOC>
169
149
string<ALLOC> BasicJsonTokenizer<ALLOC>::readContent(const ALLOC& allocator)
170
{
171
149
    const size_t count = static_cast<size_t>(m_in.rdbuf()->sgetn(m_buffer.data(), BUFFER_SIZE));
172

149
    return string<ALLOC>(m_buffer.data(), count, allocator);
173
}
174
175
template <typename ALLOC>
176
49103
bool BasicJsonTokenizer<ALLOC>::decodeNext()
177
{
178


49103
    if (!skipWhitespaces())
179
50
        return false;
180
181
49053
    m_tokenColumnNumber = m_columnNumber;
182
183
49053
    const char nextChar = m_content[m_pos];
184



49053
    switch (nextChar)
185
    {
186
    case '{':
187

156
        setToken(JsonToken::BEGIN_OBJECT, nextChar);
188
156
        setPosition(m_pos + 1, m_columnNumber + 1);
189
156
        break;
190
    case '}':
191

88
        setToken(JsonToken::END_OBJECT, nextChar);
192
88
        setPosition(m_pos + 1, m_columnNumber + 1);
193
88
        break;
194
    case '[':
195

40
        setToken(JsonToken::BEGIN_ARRAY, nextChar);
196
40
        setPosition(m_pos + 1, m_columnNumber + 1);
197
40
        break;
198
    case ']':
199

34
        setToken(JsonToken::END_ARRAY, nextChar);
200
34
        setPosition(m_pos + 1, m_columnNumber + 1);
201
34
        break;
202
    case ':':
203

12221
        setToken(JsonToken::KEY_SEPARATOR, nextChar);
204
12221
        setPosition(m_pos + 1, m_columnNumber + 1);
205
12221
        break;
206
    case ',':
207

12098
        setToken(JsonToken::ITEM_SEPARATOR, nextChar);
208
12098
        setPosition(m_pos + 1, m_columnNumber + 1);
209
12098
        break;
210
    default:
211



24416
        m_decoderResult = m_decoder.decodeValue(StringView(m_content.data()).substr(m_pos));
212

24416
        if (m_pos + m_decoderResult.numReadChars >= m_content.size())
213
8
            return false; // we are at the end of chunk => read more
214
215

24408
        setTokenValue();
216
24406
        break;
217
    }
218
219
49043
    return true;
220
}
221
222
template <typename ALLOC>
223
124615
bool BasicJsonTokenizer<ALLOC>::skipWhitespaces()
224
{
225
75512
    while (true)
226
    {
227

124615
        if (m_pos >= m_content.size())
228
        {
229
49
            setToken(JsonToken::END_OF_FILE);
230
49
            return false;
231
        }
232
233
124566
        const char nextChar = m_content[m_pos];
234


124566
        switch (nextChar)
235
        {
236
        case ' ':
237
        case '\t':
238
63114
            setPosition(m_pos + 1, m_columnNumber + 1);
239
63114
            break;
240
        case '\n':
241
12396
            m_lineNumber++;
242
12396
            setPosition(m_pos + 1, 1);
243
12396
            break;
244
        case '\r':
245

3
            if (m_pos + 1 >= m_content.size())
246
            {
247
1
                setToken(JsonToken::END_OF_FILE);
248
1
                return false;
249
            }
250
2
            m_lineNumber++;
251

2
            setPosition(m_pos + (m_content[m_pos + 1] == '\n' ? 2 : 1), 1);
252
2
            break;
253
        default:
254
49053
            return true;
255
        }
256
    }
257
}
258
259
template <typename ALLOC>
260
template <typename T>
261
24637
void BasicJsonTokenizer<ALLOC>::setToken(JsonToken token, T&& value)
262
{
263
24637
    m_token = token;
264
24637
    m_value.set(std::forward<T>(value));
265
24637
}
266
267
template <typename ALLOC>
268
24411
void BasicJsonTokenizer<ALLOC>::setToken(JsonToken token, AnyHolder<ALLOC>&& value)
269
{
270
24411
    m_token = token;
271
24411
    m_value = std::move(value);
272
24411
}
273
274
template <typename ALLOC>
275
50
void BasicJsonTokenizer<ALLOC>::setToken(JsonToken token)
276
{
277
50
    m_token = token;
278
50
    m_value.reset();
279
50
}
280
281
template <typename ALLOC>
282
124560
void BasicJsonTokenizer<ALLOC>::setPosition(size_t newPos, size_t newColumnNumber)
283
{
284
124560
    m_pos = newPos;
285
124560
    m_columnNumber = newColumnNumber;
286
124560
}
287
288
template <typename ALLOC>
289
24413
void BasicJsonTokenizer<ALLOC>::setTokenValue()
290
{
291

24413
    if (!m_decoderResult.value.hasValue())
292
    {
293







6
        throw JsonParserException("JsonTokenizer:") << m_lineNumber << ":" << m_tokenColumnNumber << ": " <<
294
                (m_decoderResult.integerOverflow
295
2
                        ? "Value is outside of the 64-bit integer range!"
296
4
                        : "Unknown token!");
297
    }
298
299
24411
    setToken(JsonToken::VALUE, std::move(m_decoderResult.value));
300
24411
    setPosition(m_pos + m_decoderResult.numReadChars, m_columnNumber + m_decoderResult.numReadChars);
301
24411
}
302
303
} // namespace zserio
304
305
#endif // ZSERIO_JSON_TOKENIZER_H_INC