test/zserio/JsonTokenizerTest.cpp
Line | Count | Source |
1 | | #include "gtest/gtest.h" |
2 | | |
3 | | #include <sstream> |
4 | | |
5 | | #include "zserio/JsonTokenizer.h" |
6 | | |
7 | | namespace zserio |
8 | | { |
9 | | |
10 | | using JsonTokenizer = BasicJsonTokenizer<>; |
11 | | |
12 | | TEST(JsonTokenizerTest, tokens) |
13 | 1 | { |
14 | 1 | std::stringstream str("{\"array\":\n[\n{\"key\":\n10}]}"); |
15 | 1 | auto tokenizer = JsonTokenizer(str, std::allocator<uint8_t>()); |
16 | 1 | ASSERT_EQ(JsonToken::BEGIN_OBJECT, tokenizer.next()); |
17 | 1 | ASSERT_EQ('{', tokenizer.getValue().get<char>()); |
18 | 1 | ASSERT_EQ(JsonToken::VALUE, tokenizer.next()); |
19 | 1 | ASSERT_EQ("array", tokenizer.getValue().get<string<>>()); |
20 | 1 | ASSERT_EQ(JsonToken::KEY_SEPARATOR, tokenizer.next()); |
21 | 1 | ASSERT_EQ(':', tokenizer.getValue().get<char>()); |
22 | 1 | ASSERT_EQ(JsonToken::BEGIN_ARRAY, tokenizer.next()); |
23 | 1 | ASSERT_EQ('[', tokenizer.getValue().get<char>()); |
24 | 1 | ASSERT_EQ(JsonToken::BEGIN_OBJECT, tokenizer.next()); |
25 | 1 | ASSERT_EQ('{', tokenizer.getValue().get<char>()); |
26 | 1 | ASSERT_EQ(JsonToken::VALUE, tokenizer.next()); |
27 | 1 | ASSERT_EQ("key", tokenizer.getValue().get<string<>>()); |
28 | 1 | ASSERT_EQ(JsonToken::KEY_SEPARATOR, tokenizer.next()); |
29 | 1 | ASSERT_EQ(':', tokenizer.getValue().get<char>()); |
30 | 1 | ASSERT_EQ(JsonToken::VALUE, tokenizer.next()); |
31 | 1 | ASSERT_EQ(10, tokenizer.getValue().get<uint64_t>()); |
32 | 1 | ASSERT_EQ(JsonToken::END_OBJECT, tokenizer.next()); |
33 | 1 | ASSERT_EQ('}', tokenizer.getValue().get<char>()); |
34 | 1 | ASSERT_EQ(JsonToken::END_ARRAY, tokenizer.next()); |
35 | 1 | ASSERT_EQ(']', tokenizer.getValue().get<char>()); |
36 | 1 | ASSERT_EQ(JsonToken::END_OBJECT, tokenizer.next()); |
37 | 1 | ASSERT_EQ('}', tokenizer.getValue().get<char>()); |
38 | 1 | ASSERT_EQ(JsonToken::END_OF_FILE, tokenizer.next()); |
39 | 1 | } |
40 | | |
41 | | TEST(JsonTokenizerTest, lineColumn) |
42 | 1 | { |
43 | 1 | std::stringstream str("\n\t{\r \"key\" \r\n\t :\n10}\r"); |
44 | 1 | JsonTokenizer tokenizer(str, std::allocator<uint8_t>()); |
45 | | |
46 | 1 | ASSERT_EQ(JsonToken::BEGIN_OBJECT, tokenizer.next()); |
47 | 1 | ASSERT_EQ('{', tokenizer.getValue().get<char>()); |
48 | 1 | ASSERT_EQ(2, tokenizer.getLine()); |
49 | 1 | ASSERT_EQ(2, tokenizer.getColumn()); |
50 | | |
51 | 1 | ASSERT_EQ(JsonToken::VALUE, tokenizer.next()); |
52 | 1 | ASSERT_EQ("key", tokenizer.getValue().get<string<>>()); |
53 | 1 | ASSERT_EQ(3, tokenizer.getLine()); |
54 | 1 | ASSERT_EQ(4, tokenizer.getColumn()); |
55 | | |
56 | 1 | ASSERT_EQ(JsonToken::KEY_SEPARATOR, tokenizer.next()); |
57 | 1 | ASSERT_EQ(':', tokenizer.getValue().get<char>()); |
58 | 1 | ASSERT_EQ(4, tokenizer.getLine()); |
59 | 1 | ASSERT_EQ(3, tokenizer.getColumn()); |
60 | | |
61 | 1 | ASSERT_EQ(JsonToken::VALUE, tokenizer.next()); |
62 | 1 | ASSERT_EQ(10, tokenizer.getValue().get<uint64_t>()); |
63 | 1 | ASSERT_EQ(5, tokenizer.getLine()); |
64 | 1 | ASSERT_EQ(1, tokenizer.getColumn()); |
65 | | |
66 | 1 | ASSERT_EQ(JsonToken::END_OBJECT, tokenizer.next()); |
67 | 1 | ASSERT_EQ('}', tokenizer.getValue().get<char>()); |
68 | 1 | ASSERT_EQ(5, tokenizer.getLine()); |
69 | 1 | ASSERT_EQ(3, tokenizer.getColumn()); |
70 | | |
71 | 1 | ASSERT_EQ(JsonToken::END_OF_FILE, tokenizer.next()); |
72 | 1 | ASSERT_FALSE(tokenizer.getValue().hasValue()); |
73 | 1 | ASSERT_EQ(5, tokenizer.getLine()); |
74 | 1 | ASSERT_EQ(4, tokenizer.getColumn()); |
75 | 1 | } |
76 | | |
77 | | TEST(JsonTokenizerTest, longInputSplitInNumber) |
78 | 1 | { |
79 | 1 | std::stringstream str; |
80 | 1 | str << "{\n"; // 2 chars |
81 | 4.00k | for (size_t i = 0; i < 4000; ++i4.00k ) // 20 x 4000 > 65534 to check reading by chunks |
82 | 4.00k | { |
83 | | // BUFFER_SIZE is 65536, thus 65534 % 20 gives position within the string below |
84 | | // where the buffer will be split => 14, which is somewhere in the middle of the number |
85 | | // |-> <-| |
86 | 4.00k | str << " \"key\": 100000000,\n"; // 20 chars |
87 | 4.00k | } |
88 | 1 | str << " \"key\": 100000000\n"; |
89 | 1 | str << '}'; |
90 | | |
91 | 1 | JsonTokenizer tokenizer(str, std::allocator<uint8_t>()); |
92 | | |
93 | 1 | ASSERT_EQ(JsonToken::BEGIN_OBJECT, tokenizer.next()); |
94 | 1 | ASSERT_EQ('{', tokenizer.getValue().get<char>()); |
95 | 1 | ASSERT_EQ(1, tokenizer.getLine()); |
96 | 1 | ASSERT_EQ(1, tokenizer.getColumn()); |
97 | | |
98 | 1 | size_t i = 0; |
99 | 4.00k | for (; i < 4000; ++i4.00k ) |
100 | 4.00k | { |
101 | 8.00k | ASSERT_EQ(JsonToken::VALUE, tokenizer.next()) << "i=" << i; |
102 | 8.00k | ASSERT_EQ("key", tokenizer.getValue().get<string<>>()) << "i=" << i; |
103 | 8.00k | ASSERT_EQ(1 + i + 1, tokenizer.getLine()) << "i=" << i; |
104 | 8.00k | ASSERT_EQ(3, tokenizer.getColumn()) << "i=" << i; |
105 | | |
106 | 8.00k | ASSERT_EQ(JsonToken::KEY_SEPARATOR, tokenizer.next()) << "i=" << i; |
107 | 8.00k | ASSERT_EQ(':', tokenizer.getValue().get<char>()) << "i=" << i; |
108 | 8.00k | ASSERT_EQ(1 + i + 1, tokenizer.getLine()) << "i=" << i; |
109 | 8.00k | ASSERT_EQ(8, tokenizer.getColumn()) << "i=" << i; |
110 | | |
111 | 8.00k | ASSERT_EQ(JsonToken::VALUE, tokenizer.next()) << "i=" << i; |
112 | 8.00k | ASSERT_EQ(100000000, tokenizer.getValue().get<uint64_t>()) << "i=" << i; |
113 | 8.00k | ASSERT_EQ(1 + i + 1, tokenizer.getLine()) << "i=" << i; |
114 | 8.00k | ASSERT_EQ(10, tokenizer.getColumn()) << "i=" << i; |
115 | | |
116 | 8.00k | ASSERT_EQ(JsonToken::ITEM_SEPARATOR, tokenizer.next()) << "i=" << i; |
117 | 8.00k | ASSERT_EQ(',', tokenizer.getValue().get<char>()) << "i=" << i; |
118 | 8.00k | ASSERT_EQ(1 + i + 1, tokenizer.getLine()) << "i=" << i; |
119 | 8.00k | ASSERT_EQ(19, tokenizer.getColumn()) << "i=" << i; |
120 | 4.00k | } |
121 | | |
122 | 2 | ASSERT_EQ(JsonToken::VALUE, tokenizer.next()) << "i=" << i; |
123 | 2 | ASSERT_EQ("key", tokenizer.getValue().get<string<>>()) << "i=" << i; |
124 | 2 | ASSERT_EQ(1 + i + 1, tokenizer.getLine()) << "i=" << i; |
125 | 2 | ASSERT_EQ(3, tokenizer.getColumn()) << "i=" << i; |
126 | | |
127 | 2 | ASSERT_EQ(JsonToken::KEY_SEPARATOR, tokenizer.next()) << "i=" << i; |
128 | 2 | ASSERT_EQ(':', tokenizer.getValue().get<char>()) << "i=" << i; |
129 | 2 | ASSERT_EQ(1 + i + 1, tokenizer.getLine()) << "i=" << i; |
130 | 2 | ASSERT_EQ(8, tokenizer.getColumn()) << "i=" << i; |
131 | | |
132 | 2 | ASSERT_EQ(JsonToken::VALUE, tokenizer.next()) << "i=" << i; |
133 | 2 | ASSERT_EQ(100000000, tokenizer.getValue().get<uint64_t>()) << "i=" << i; |
134 | 2 | ASSERT_EQ(1 + i + 1, tokenizer.getLine()) << "i=" << i; |
135 | 2 | ASSERT_EQ(10, tokenizer.getColumn()) << "i=" << i; |
136 | | |
137 | 1 | ASSERT_EQ(JsonToken::END_OBJECT, tokenizer.next()); |
138 | 1 | ASSERT_EQ(1 + i + 2, tokenizer.getLine()); |
139 | 1 | ASSERT_EQ(1, tokenizer.getColumn()); |
140 | 1 | } |
141 | | |
142 | | TEST(JsonTokenizerTest, longInputSplitInString) |
143 | 1 | { |
144 | 1 | std::stringstream str; |
145 | 1 | str << "{\n"; // 2 chars |
146 | 4.00k | for (size_t i = 0; i < 4000; ++i4.00k ) // 20 x 4000 > 65534 to check reading by chunks |
147 | 4.00k | { |
148 | | // BUFFER_SIZE is 65536, thus 65534 % 20 gives position within the string below |
149 | | // where the buffer will be split => 14, which is somewhere in the middle of the number |
150 | | // |-> <-| |
151 | 4.00k | str << " \"key\": \"1000000\",\n"; // 20 chars |
152 | 4.00k | } |
153 | 1 | str << " \"key\": \"1000000\"\n"; |
154 | 1 | str << '}'; |
155 | | |
156 | 1 | JsonTokenizer tokenizer(str, std::allocator<uint8_t>()); |
157 | | |
158 | 1 | ASSERT_EQ(JsonToken::BEGIN_OBJECT, tokenizer.next()); |
159 | 1 | ASSERT_EQ('{', tokenizer.getValue().get<char>()); |
160 | 1 | ASSERT_EQ(1, tokenizer.getLine()); |
161 | 1 | ASSERT_EQ(1, tokenizer.getColumn()); |
162 | | |
163 | 1 | size_t i = 0; |
164 | 4.00k | for (; i < 4000; ++i4.00k ) |
165 | 4.00k | { |
166 | 8.00k | ASSERT_EQ(JsonToken::VALUE, tokenizer.next()) << "i=" << i; |
167 | 8.00k | ASSERT_EQ("key", tokenizer.getValue().get<string<>>()) << "i=" << i; |
168 | 8.00k | ASSERT_EQ(1 + i + 1, tokenizer.getLine()) << "i=" << i; |
169 | 8.00k | ASSERT_EQ(3, tokenizer.getColumn()) << "i=" << i; |
170 | | |
171 | 8.00k | ASSERT_EQ(JsonToken::KEY_SEPARATOR, tokenizer.next()) << "i=" << i; |
172 | 8.00k | ASSERT_EQ(':', tokenizer.getValue().get<char>()) << "i=" << i; |
173 | 8.00k | ASSERT_EQ(1 + i + 1, tokenizer.getLine()) << "i=" << i; |
174 | 8.00k | ASSERT_EQ(8, tokenizer.getColumn()) << "i=" << i; |
175 | | |
176 | 8.00k | ASSERT_EQ(JsonToken::VALUE, tokenizer.next()) << "i=" << i; |
177 | 8.00k | ASSERT_EQ("1000000", tokenizer.getValue().get<string<>>()) << "i=" << i; |
178 | 8.00k | ASSERT_EQ(1 + i + 1, tokenizer.getLine()) << "i=" << i; |
179 | 8.00k | ASSERT_EQ(10, tokenizer.getColumn()) << "i=" << i; |
180 | | |
181 | 8.00k | ASSERT_EQ(JsonToken::ITEM_SEPARATOR, tokenizer.next()) << "i=" << i; |
182 | 8.00k | ASSERT_EQ(',', tokenizer.getValue().get<char>()) << "i=" << i; |
183 | 8.00k | ASSERT_EQ(1 + i + 1, tokenizer.getLine()) << "i=" << i; |
184 | 8.00k | ASSERT_EQ(19, tokenizer.getColumn()) << "i=" << i; |
185 | 4.00k | } |
186 | | |
187 | 2 | ASSERT_EQ(JsonToken::VALUE, tokenizer.next()) << "i=" << i; |
188 | 2 | ASSERT_EQ("key", tokenizer.getValue().get<string<>>()) << "i=" << i; |
189 | 2 | ASSERT_EQ(1 + i + 1, tokenizer.getLine()) << "i=" << i; |
190 | 2 | ASSERT_EQ(3, tokenizer.getColumn()) << "i=" << i; |
191 | | |
192 | 2 | ASSERT_EQ(JsonToken::KEY_SEPARATOR, tokenizer.next()) << "i=" << i; |
193 | 2 | ASSERT_EQ(':', tokenizer.getValue().get<char>()) << "i=" << i; |
194 | 2 | ASSERT_EQ(1 + i + 1, tokenizer.getLine()) << "i=" << i; |
195 | 2 | ASSERT_EQ(8, tokenizer.getColumn()) << "i=" << i; |
196 | | |
197 | 2 | ASSERT_EQ(JsonToken::VALUE, tokenizer.next()) << "i=" << i; |
198 | 2 | ASSERT_EQ("1000000", tokenizer.getValue().get<string<>>()) << "i=" << i; |
199 | 2 | ASSERT_EQ(1 + i + 1, tokenizer.getLine()) << "i=" << i; |
200 | 2 | ASSERT_EQ(10, tokenizer.getColumn()) << "i=" << i; |
201 | | |
202 | 1 | ASSERT_EQ(JsonToken::END_OBJECT, tokenizer.next()); |
203 | 1 | ASSERT_EQ(1 + i + 2, tokenizer.getLine()); |
204 | 2 | ASSERT_EQ(1, tokenizer.getColumn()) << "i=" << i; |
205 | 1 | } |
206 | | |
207 | | TEST(JsonTokenizerTest, longInputSplitInDoubleAfterE) |
208 | 1 | { |
209 | 1 | std::stringstream str; |
210 | 1 | str << "{\n"; // 2 chars |
211 | 4.00k | for (size_t i = 0; i < 4000; ++i4.00k ) // 20 x 4000 > 65534 to check reading by chunks |
212 | 4.00k | { |
213 | | // BUFFER_SIZE is 65536, thus 65534 % 20 gives position within the string below |
214 | | // where the buffer will be split => 14, which is somewhere in the middle of the number |
215 | | // |-> <-| |
216 | 4.00k | str << " \"key\": 1e5 ,\n"; // 20 chars |
217 | 4.00k | } |
218 | 1 | str << " \"key\": 1e5 \n"; |
219 | 1 | str << '}'; |
220 | | |
221 | 1 | JsonTokenizer tokenizer(str, std::allocator<uint8_t>()); |
222 | | |
223 | 1 | ASSERT_EQ(JsonToken::BEGIN_OBJECT, tokenizer.next()); |
224 | 1 | ASSERT_EQ('{', tokenizer.getValue().get<char>()); |
225 | 1 | ASSERT_EQ(1, tokenizer.getLine()); |
226 | 1 | ASSERT_EQ(1, tokenizer.getColumn()); |
227 | | |
228 | 1 | size_t i = 0; |
229 | 4.00k | for (; i < 4000; ++i4.00k ) |
230 | 4.00k | { |
231 | 8.00k | ASSERT_EQ(JsonToken::VALUE, tokenizer.next()) << "i=" << i; |
232 | 8.00k | ASSERT_EQ("key", tokenizer.getValue().get<string<>>()) << "i=" << i; |
233 | 8.00k | ASSERT_EQ(1 + i + 1, tokenizer.getLine()) << "i=" << i; |
234 | 8.00k | ASSERT_EQ(3, tokenizer.getColumn()) << "i=" << i; |
235 | | |
236 | 8.00k | ASSERT_EQ(JsonToken::KEY_SEPARATOR, tokenizer.next()) << "i=" << i; |
237 | 8.00k | ASSERT_EQ(':', tokenizer.getValue().get<char>()) << "i=" << i; |
238 | 8.00k | ASSERT_EQ(1 + i + 1, tokenizer.getLine()) << "i=" << i; |
239 | 8.00k | ASSERT_EQ(8, tokenizer.getColumn()) << "i=" << i; |
240 | | |
241 | 8.00k | ASSERT_EQ(JsonToken::VALUE, tokenizer.next()) << "i=" << i; |
242 | 8.00k | ASSERT_EQ(1e5, tokenizer.getValue().get<double>()) << "i=" << i; |
243 | 8.00k | ASSERT_EQ(1 + i + 1, tokenizer.getLine()) << "i=" << i; |
244 | 8.00k | ASSERT_EQ(13, tokenizer.getColumn()) << "i=" << i; |
245 | | |
246 | 8.00k | ASSERT_EQ(JsonToken::ITEM_SEPARATOR, tokenizer.next()) << "i=" << i; |
247 | 8.00k | ASSERT_EQ(',', tokenizer.getValue().get<char>()) << "i=" << i; |
248 | 8.00k | ASSERT_EQ(1 + i + 1, tokenizer.getLine()) << "i=" << i; |
249 | 8.00k | ASSERT_EQ(19, tokenizer.getColumn()) << "i=" << i; |
250 | 4.00k | } |
251 | | |
252 | 2 | ASSERT_EQ(JsonToken::VALUE, tokenizer.next()) << "i=" << i; |
253 | 2 | ASSERT_EQ("key", tokenizer.getValue().get<string<>>()) << "i=" << i; |
254 | 2 | ASSERT_EQ(1 + i + 1, tokenizer.getLine()) << "i=" << i; |
255 | 2 | ASSERT_EQ(3, tokenizer.getColumn()) << "i=" << i; |
256 | | |
257 | 2 | ASSERT_EQ(JsonToken::KEY_SEPARATOR, tokenizer.next()) << "i=" << i; |
258 | 2 | ASSERT_EQ(':', tokenizer.getValue().get<char>()) << "i=" << i; |
259 | 2 | ASSERT_EQ(1 + i + 1, tokenizer.getLine()) << "i=" << i; |
260 | 2 | ASSERT_EQ(8, tokenizer.getColumn()) << "i=" << i; |
261 | | |
262 | 2 | ASSERT_EQ(JsonToken::VALUE, tokenizer.next()) << "i=" << i; |
263 | 2 | ASSERT_EQ(1e5, tokenizer.getValue().get<double>()) << "i=" << i; |
264 | 2 | ASSERT_EQ(1 + i + 1, tokenizer.getLine()) << "i=" << i; |
265 | 2 | ASSERT_EQ(13, tokenizer.getColumn()) << "i=" << i; |
266 | | |
267 | 1 | ASSERT_EQ(JsonToken::END_OBJECT, tokenizer.next()); |
268 | 1 | ASSERT_EQ(1 + i + 2, tokenizer.getLine()); |
269 | 2 | ASSERT_EQ(1, tokenizer.getColumn()) << "i=" << i; |
270 | 1 | } |
271 | | |
272 | | TEST(JsonTokenizerTest, unknownToken) |
273 | 1 | { |
274 | 1 | std::stringstream str("\\\n"); |
275 | 1 | JsonTokenizer tokenizer(str, std::allocator<uint8_t>()); |
276 | 1 | ASSERT_THROW({ |
277 | 1 | try |
278 | 1 | { |
279 | 1 | tokenizer.next(); |
280 | 1 | } |
281 | 1 | catch (const JsonParserException& e) |
282 | 1 | { |
283 | 1 | ASSERT_STREQ("JsonTokenizer:1:1: Unknown token!", e.what()); |
284 | 1 | throw; |
285 | 1 | } |
286 | 1 | }, JsonParserException); |
287 | 1 | } |
288 | | |
289 | | TEST(JsonTokenizerTest, cppRuntimeExceptionOperator) |
290 | 1 | { |
291 | 1 | ASSERT_STREQ("UNKNOWN", (JsonParserException() << (JsonToken::UNKNOWN)).what()); |
292 | 1 | ASSERT_STREQ("BEGIN_OF_FILE", (JsonParserException() << (JsonToken::BEGIN_OF_FILE)).what()); |
293 | 1 | ASSERT_STREQ("END_OF_FILE", (JsonParserException() << (JsonToken::END_OF_FILE)).what()); |
294 | 1 | ASSERT_STREQ("BEGIN_OBJECT", (JsonParserException() << (JsonToken::BEGIN_OBJECT)).what()); |
295 | 1 | ASSERT_STREQ("END_OBJECT", (JsonParserException() << (JsonToken::END_OBJECT)).what()); |
296 | 1 | ASSERT_STREQ("BEGIN_ARRAY", (JsonParserException() << (JsonToken::BEGIN_ARRAY)).what()); |
297 | 1 | ASSERT_STREQ("END_ARRAY", (JsonParserException() << (JsonToken::END_ARRAY)).what()); |
298 | 1 | ASSERT_STREQ("KEY_SEPARATOR", (JsonParserException() << (JsonToken::KEY_SEPARATOR)).what()); |
299 | 1 | ASSERT_STREQ("ITEM_SEPARATOR", (JsonParserException() << (JsonToken::ITEM_SEPARATOR)).what()); |
300 | 1 | ASSERT_STREQ("VALUE", (JsonParserException() << (JsonToken::VALUE)).what()); |
301 | 1 | } |
302 | | |
303 | | } // namespace zserio |