145
|
1 //Written in the D programming language
|
|
2
|
|
3 /**
|
|
4 * Implements functionality to read Comma Separated Values and its variants
|
|
5 * from an input range of $(D dchar).
|
|
6 *
|
|
7 * Comma Separated Values provide a simple means to transfer and store
|
|
8 * tabular data. It has been common for programs to use their own
|
|
9 * variant of the CSV format. This parser will loosely follow the
|
|
10 * $(HTTP tools.ietf.org/html/rfc4180, RFC-4180). CSV input should adhere
|
|
11 * to the following criteria (differences from RFC-4180 in parentheses):
|
|
12 *
|
|
13 * $(UL
|
|
14 * $(LI A record is separated by a new line (CRLF,LF,CR))
|
|
15 * $(LI A final record may end with a new line)
|
|
16 * $(LI A header may be provided as the first record in input)
|
|
17 * $(LI A record has fields separated by a comma (customizable))
|
|
18 * $(LI A field containing new lines, commas, or double quotes
|
|
19 * should be enclosed in double quotes (customizable))
|
|
20 * $(LI Double quotes in a field are escaped with a double quote)
|
|
21 * $(LI Each record should contain the same number of fields)
|
|
22 * )
|
|
23 *
|
|
24 * Example:
|
|
25 *
|
|
26 * -------
|
|
27 * import std.algorithm;
|
|
28 * import std.array;
|
|
29 * import std.csv;
|
|
30 * import std.stdio;
|
|
31 * import std.typecons;
|
|
32 *
|
|
33 * void main()
|
|
34 * {
|
|
35 * auto text = "Joe,Carpenter,300000\nFred,Blacksmith,400000\r\n";
|
|
36 *
|
|
37 * foreach (record; csvReader!(Tuple!(string, string, int))(text))
|
|
38 * {
|
|
39 * writefln("%s works as a %s and earns $%d per year",
|
|
40 * record[0], record[1], record[2]);
|
|
41 * }
|
|
42 *
|
|
43 * // To read the same string from the file "filename.csv":
|
|
44 *
|
|
45 * auto file = File("filename.csv", "r");
|
|
46 * foreach (record;
|
|
47 * file.byLine.joiner("\n").csvReader!(Tuple!(string, string, int)))
|
|
48 * {
|
|
49 * writefln("%s works as a %s and earns $%d per year",
|
|
50 * record[0], record[1], record[2]);
|
|
51 * }
|
|
52 }
|
|
53 * }
|
|
54 * -------
|
|
55 *
|
|
56 * When an input contains a header the $(D Contents) can be specified as an
|
|
57 * associative array. Passing null to signify that a header is present.
|
|
58 *
|
|
59 * -------
|
|
60 * auto text = "Name,Occupation,Salary\r"
|
|
61 * "Joe,Carpenter,300000\nFred,Blacksmith,400000\r\n";
|
|
62 *
|
|
63 * foreach (record; csvReader!(string[string])
|
|
64 * (text, null))
|
|
65 * {
|
|
66 * writefln("%s works as a %s and earns $%s per year.",
|
|
67 * record["Name"], record["Occupation"],
|
|
68 * record["Salary"]);
|
|
69 * }
|
|
70 * -------
|
|
71 *
|
|
72 * This module allows content to be iterated by record stored in a struct,
|
|
73 * class, associative array, or as a range of fields. Upon detection of an
|
|
74 * error an CSVException is thrown (can be disabled). csvNextToken has been
|
|
75 * made public to allow for attempted recovery.
|
|
76 *
|
|
77 * Disabling exceptions will lift many restrictions specified above. A quote
|
|
78 * can appear in a field if the field was not quoted. If in a quoted field any
|
|
79 * quote by itself, not at the end of a field, will end processing for that
|
|
80 * field. The field is ended when there is no input, even if the quote was not
|
|
81 * closed.
|
|
82 *
|
|
83 * See_Also:
|
|
84 * $(HTTP en.wikipedia.org/wiki/Comma-separated_values, Wikipedia
|
|
85 * Comma-separated values)
|
|
86 *
|
|
87 * Copyright: Copyright 2011
|
|
88 * License: $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
|
|
89 * Authors: Jesse Phillips
|
|
90 * Source: $(PHOBOSSRC std/_csv.d)
|
|
91 */
|
|
92 module std.csv;
|
|
93
|
|
94 import std.conv;
|
|
95 import std.exception; // basicExceptionCtors
|
|
96 import std.range.primitives;
|
|
97 import std.traits;
|
|
98
|
|
99 /**
|
|
100 * Exception containing the row and column for when an exception was thrown.
|
|
101 *
|
|
102 * Numbering of both row and col start at one and corresponds to the location
|
|
103 * in the file rather than any specified header. Special consideration should
|
|
104 * be made when there is failure to match the header see $(LREF
|
|
105 * HeaderMismatchException) for details.
|
|
106 *
|
|
107 * When performing type conversions, $(REF ConvException, std,conv) is stored in
|
|
108 * the $(D next) field.
|
|
109 */
|
|
110 class CSVException : Exception
|
|
111 {
|
|
112 ///
|
|
113 size_t row, col;
|
|
114
|
|
115 // FIXME: Use std.exception.basicExceptionCtors here once bug #11500 is fixed
|
|
116
|
|
117 this(string msg, string file = __FILE__, size_t line = __LINE__,
|
|
118 Throwable next = null) @nogc @safe pure nothrow
|
|
119 {
|
|
120 super(msg, file, line, next);
|
|
121 }
|
|
122
|
|
123 this(string msg, Throwable next, string file = __FILE__,
|
|
124 size_t line = __LINE__) @nogc @safe pure nothrow
|
|
125 {
|
|
126 super(msg, file, line, next);
|
|
127 }
|
|
128
|
|
129 this(string msg, size_t row, size_t col, Throwable next = null,
|
|
130 string file = __FILE__, size_t line = __LINE__) @nogc @safe pure nothrow
|
|
131 {
|
|
132 super(msg, next, file, line);
|
|
133 this.row = row;
|
|
134 this.col = col;
|
|
135 }
|
|
136
|
|
137 override string toString() @safe pure const
|
|
138 {
|
|
139 return "(Row: " ~ to!string(row) ~
|
|
140 ", Col: " ~ to!string(col) ~ ") " ~ msg;
|
|
141 }
|
|
142 }
|
|
143
|
|
144 @safe pure unittest
|
|
145 {
|
|
146 import std.string;
|
|
147 auto e1 = new Exception("Foobar");
|
|
148 auto e2 = new CSVException("args", e1);
|
|
149 assert(e2.next is e1);
|
|
150
|
|
151 size_t r = 13;
|
|
152 size_t c = 37;
|
|
153
|
|
154 auto e3 = new CSVException("argv", r, c);
|
|
155 assert(e3.row == r);
|
|
156 assert(e3.col == c);
|
|
157
|
|
158 auto em = e3.toString();
|
|
159 assert(em.indexOf("13") != -1);
|
|
160 assert(em.indexOf("37") != -1);
|
|
161 }
|
|
162
|
|
163 /**
|
|
164 * Exception thrown when a Token is identified to not be completed: a quote is
|
|
165 * found in an unquoted field, data continues after a closing quote, or the
|
|
166 * quoted field was not closed before data was empty.
|
|
167 */
|
|
168 class IncompleteCellException : CSVException
|
|
169 {
|
|
170 /**
|
|
171 * Data pulled from input before finding a problem
|
|
172 *
|
|
173 * This field is populated when using $(LREF csvReader)
|
|
174 * but not by $(LREF csvNextToken) as this data will have
|
|
175 * already been fed to the output range.
|
|
176 */
|
|
177 dstring partialData;
|
|
178
|
|
179 mixin basicExceptionCtors;
|
|
180 }
|
|
181
|
|
182 @safe pure unittest
|
|
183 {
|
|
184 auto e1 = new Exception("Foobar");
|
|
185 auto e2 = new IncompleteCellException("args", e1);
|
|
186 assert(e2.next is e1);
|
|
187 }
|
|
188
|
|
189 /**
|
|
190 * Exception thrown under different conditions based on the type of $(D
|
|
191 * Contents).
|
|
192 *
|
|
193 * Structure, Class, and Associative Array
|
|
194 * $(UL
|
|
195 * $(LI When a header is provided but a matching column is not found)
|
|
196 * )
|
|
197 *
|
|
198 * Other
|
|
199 * $(UL
|
|
200 * $(LI When a header is provided but a matching column is not found)
|
|
201 * $(LI Order did not match that found in the input)
|
|
202 * )
|
|
203 *
|
|
204 * Since a row and column is not meaningful when a column specified by the
|
|
205 * header is not found in the data, both row and col will be zero. Otherwise
|
|
206 * row is always one and col is the first instance found in header that
|
|
207 * occurred before the previous starting at one.
|
|
208 */
|
|
209 class HeaderMismatchException : CSVException
|
|
210 {
|
|
211 mixin basicExceptionCtors;
|
|
212 }
|
|
213
|
|
214 @safe pure unittest
|
|
215 {
|
|
216 auto e1 = new Exception("Foobar");
|
|
217 auto e2 = new HeaderMismatchException("args", e1);
|
|
218 assert(e2.next is e1);
|
|
219 }
|
|
220
|
|
221 /**
|
|
222 * Determines the behavior for when an error is detected.
|
|
223 *
|
|
224 * Disabling exception will follow these rules:
|
|
225 * $(UL
|
|
226 * $(LI A quote can appear in a field if the field was not quoted.)
|
|
227 * $(LI If in a quoted field any quote by itself, not at the end of a
|
|
228 * field, will end processing for that field.)
|
|
229 * $(LI The field is ended when there is no input, even if the quote was
|
|
230 * not closed.)
|
|
231 * $(LI If the given header does not match the order in the input, the
|
|
232 * content will return as it is found in the input.)
|
|
233 * $(LI If the given header contains columns not found in the input they
|
|
234 * will be ignored.)
|
|
235 * )
|
|
236 */
|
|
237 enum Malformed
|
|
238 {
|
|
239 ignore, /// No exceptions are thrown due to incorrect CSV.
|
|
240 throwException /// Use exceptions when input has incorrect CSV.
|
|
241 }
|
|
242
|
|
243 /**
|
|
244 * Returns an input range for iterating over records found in $(D
|
|
245 * input).
|
|
246 *
|
|
247 * The $(D Contents) of the input can be provided if all the records are the
|
|
248 * same type such as all integer data:
|
|
249 *
|
|
250 * -------
|
|
251 * string str = `76,26,22`;
|
|
252 * int[] ans = [76,26,22];
|
|
253 * auto records = csvReader!int(str);
|
|
254 *
|
|
255 * foreach (record; records)
|
|
256 * {
|
|
257 * assert(equal(record, ans));
|
|
258 * }
|
|
259 * -------
|
|
260 *
|
|
261 * Example using a struct with modified delimiter:
|
|
262 *
|
|
263 * -------
|
|
264 * string str = "Hello;65;63.63\nWorld;123;3673.562";
|
|
265 * struct Layout
|
|
266 * {
|
|
267 * string name;
|
|
268 * int value;
|
|
269 * double other;
|
|
270 * }
|
|
271 *
|
|
272 * auto records = csvReader!Layout(str,';');
|
|
273 *
|
|
274 * foreach (record; records)
|
|
275 * {
|
|
276 * writeln(record.name);
|
|
277 * writeln(record.value);
|
|
278 * writeln(record.other);
|
|
279 * }
|
|
280 * -------
|
|
281 *
|
|
282 * Specifying $(D ErrorLevel) as Malformed.ignore will lift restrictions
|
|
283 * on the format. This example shows that an exception is not thrown when
|
|
284 * finding a quote in a field not quoted.
|
|
285 *
|
|
286 * -------
|
|
287 * string str = "A \" is now part of the data";
|
|
288 * auto records = csvReader!(string,Malformed.ignore)(str);
|
|
289 * auto record = records.front;
|
|
290 *
|
|
291 * assert(record.front == str);
|
|
292 * -------
|
|
293 *
|
|
294 * Returns:
|
|
295 * An input range R as defined by
|
|
296 * $(REF isInputRange, std,range,primitives). When $(D Contents) is a
|
|
297 * struct, class, or an associative array, the element type of R is
|
|
298 * $(D Contents), otherwise the element type of R is itself a range with
|
|
299 * element type $(D Contents).
|
|
300 *
|
|
301 * Throws:
|
|
302 * $(LREF CSVException) When a quote is found in an unquoted field,
|
|
303 * data continues after a closing quote, the quoted field was not
|
|
304 * closed before data was empty, a conversion failed, or when the row's
|
|
305 * length does not match the previous length.
|
|
306 *
|
|
307 * $(LREF HeaderMismatchException) when a header is provided but a
|
|
308 * matching column is not found or the order did not match that found in
|
|
309 * the input. Read the exception documentation for specific details of
|
|
310 * when the exception is thrown for different types of $(D Contents).
|
|
311 */
|
|
312 auto csvReader(Contents = string,Malformed ErrorLevel = Malformed.throwException, Range, Separator = char)(Range input,
|
|
313 Separator delimiter = ',', Separator quote = '"')
|
|
314 if (isInputRange!Range && is(Unqual!(ElementType!Range) == dchar)
|
|
315 && isSomeChar!(Separator)
|
|
316 && !is(Contents T : T[U], U : string))
|
|
317 {
|
|
318 return CsvReader!(Contents,ErrorLevel,Range,
|
|
319 Unqual!(ElementType!Range),string[])
|
|
320 (input, delimiter, quote);
|
|
321 }
|
|
322
|
|
323 /**
|
|
324 * An optional $(D header) can be provided. The first record will be read in
|
|
325 * as the header. If $(D Contents) is a struct then the header provided is
|
|
326 * expected to correspond to the fields in the struct. When $(D Contents) is
|
|
327 * not a type which can contain the entire record, the $(D header) must be
|
|
328 * provided in the same order as the input or an exception is thrown.
|
|
329 *
|
|
330 * Read only column "b":
|
|
331 *
|
|
332 * -------
|
|
333 * string str = "a,b,c\nHello,65,63.63\nWorld,123,3673.562";
|
|
334 * auto records = csvReader!int(str, ["b"]);
|
|
335 *
|
|
336 * auto ans = [[65],[123]];
|
|
337 * foreach (record; records)
|
|
338 * {
|
|
339 * assert(equal(record, ans.front));
|
|
340 * ans.popFront();
|
|
341 * }
|
|
342 * -------
|
|
343 *
|
|
344 * Read from header of different order:
|
|
345 *
|
|
346 * -------
|
|
347 * string str = "a,b,c\nHello,65,63.63\nWorld,123,3673.562";
|
|
348 * struct Layout
|
|
349 * {
|
|
350 * int value;
|
|
351 * double other;
|
|
352 * string name;
|
|
353 * }
|
|
354 *
|
|
355 * auto records = csvReader!Layout(str, ["b","c","a"]);
|
|
356 * -------
|
|
357 *
|
|
358 * The header can also be left empty if the input contains a header but
|
|
359 * all columns should be iterated. The header from the input can always
|
|
360 * be accessed from the header field.
|
|
361 *
|
|
362 * -------
|
|
363 * string str = "a,b,c\nHello,65,63.63\nWorld,123,3673.562";
|
|
364 * auto records = csvReader(str, null);
|
|
365 *
|
|
366 * assert(records.header == ["a","b","c"]);
|
|
367 * -------
|
|
368 *
|
|
369 * Returns:
|
|
370 * An input range R as defined by
|
|
371 * $(REF isInputRange, std,range,primitives). When $(D Contents) is a
|
|
372 * struct, class, or an associative array, the element type of R is
|
|
373 * $(D Contents), otherwise the element type of R is itself a range with
|
|
374 * element type $(D Contents).
|
|
375 *
|
|
376 * The returned range provides a header field for accessing the header
|
|
377 * from the input in array form.
|
|
378 *
|
|
379 * -------
|
|
380 * string str = "a,b,c\nHello,65,63.63";
|
|
381 * auto records = csvReader(str, ["a"]);
|
|
382 *
|
|
383 * assert(records.header == ["a","b","c"]);
|
|
384 * -------
|
|
385 *
|
|
386 * Throws:
|
|
387 * $(LREF CSVException) When a quote is found in an unquoted field,
|
|
388 * data continues after a closing quote, the quoted field was not
|
|
389 * closed before data was empty, a conversion failed, or when the row's
|
|
390 * length does not match the previous length.
|
|
391 *
|
|
392 * $(LREF HeaderMismatchException) when a header is provided but a
|
|
393 * matching column is not found or the order did not match that found in
|
|
394 * the input. Read the exception documentation for specific details of
|
|
395 * when the exception is thrown for different types of $(D Contents).
|
|
396 */
|
|
397 auto csvReader(Contents = string,
|
|
398 Malformed ErrorLevel = Malformed.throwException,
|
|
399 Range, Header, Separator = char)
|
|
400 (Range input, Header header,
|
|
401 Separator delimiter = ',', Separator quote = '"')
|
|
402 if (isInputRange!Range && is(Unqual!(ElementType!Range) == dchar)
|
|
403 && isSomeChar!(Separator)
|
|
404 && isForwardRange!Header
|
|
405 && isSomeString!(ElementType!Header))
|
|
406 {
|
|
407 return CsvReader!(Contents,ErrorLevel,Range,
|
|
408 Unqual!(ElementType!Range),Header)
|
|
409 (input, header, delimiter, quote);
|
|
410 }
|
|
411
|
|
412 ///
|
|
413 auto csvReader(Contents = string,
|
|
414 Malformed ErrorLevel = Malformed.throwException,
|
|
415 Range, Header, Separator = char)
|
|
416 (Range input, Header header,
|
|
417 Separator delimiter = ',', Separator quote = '"')
|
|
418 if (isInputRange!Range && is(Unqual!(ElementType!Range) == dchar)
|
|
419 && isSomeChar!(Separator)
|
|
420 && is(Header : typeof(null)))
|
|
421 {
|
|
422 return CsvReader!(Contents,ErrorLevel,Range,
|
|
423 Unqual!(ElementType!Range),string[])
|
|
424 (input, cast(string[]) null, delimiter, quote);
|
|
425 }
|
|
426
|
|
427 // Test standard iteration over input.
|
|
428 @safe pure unittest
|
|
429 {
|
|
430 string str = `one,"two ""quoted"""` ~ "\n\"three\nnew line\",\nfive,six";
|
|
431 auto records = csvReader(str);
|
|
432
|
|
433 int count;
|
|
434 foreach (record; records)
|
|
435 {
|
|
436 foreach (cell; record)
|
|
437 {
|
|
438 count++;
|
|
439 }
|
|
440 }
|
|
441 assert(count == 6);
|
|
442 }
|
|
443
|
|
444 // Test newline on last record
|
|
445 @safe pure unittest
|
|
446 {
|
|
447 string str = "one,two\nthree,four\n";
|
|
448 auto records = csvReader(str);
|
|
449 records.popFront();
|
|
450 records.popFront();
|
|
451 assert(records.empty);
|
|
452 }
|
|
453
|
|
454 // Test shorter row length
|
|
455 @safe pure unittest
|
|
456 {
|
|
457 wstring str = "one,1\ntwo\nthree"w;
|
|
458 struct Layout
|
|
459 {
|
|
460 string name;
|
|
461 int value;
|
|
462 }
|
|
463
|
|
464 Layout[3] ans;
|
|
465 ans[0].name = "one";
|
|
466 ans[0].value = 1;
|
|
467 ans[1].name = "two";
|
|
468 ans[1].value = 0;
|
|
469 ans[2].name = "three";
|
|
470 ans[2].value = 0;
|
|
471
|
|
472 auto records = csvReader!(Layout,Malformed.ignore)(str);
|
|
473
|
|
474 int count;
|
|
475 foreach (record; records)
|
|
476 {
|
|
477 assert(ans[count].name == record.name);
|
|
478 assert(ans[count].value == record.value);
|
|
479 count++;
|
|
480 }
|
|
481 }
|
|
482
|
|
483 // Test shorter row length exception
|
|
484 @safe pure unittest
|
|
485 {
|
|
486 import std.exception;
|
|
487
|
|
488 struct A
|
|
489 {
|
|
490 string a,b,c;
|
|
491 }
|
|
492
|
|
493 auto strs = ["one,1\ntwo",
|
|
494 "one\ntwo,2,二\nthree,3,三",
|
|
495 "one\ntwo,2\nthree,3",
|
|
496 "one,1\ntwo\nthree,3"];
|
|
497
|
|
498 foreach (str; strs)
|
|
499 {
|
|
500 auto records = csvReader!A(str);
|
|
501 assertThrown!CSVException((){foreach (record; records) { }}());
|
|
502 }
|
|
503 }
|
|
504
|
|
505
|
|
506 // Test structure conversion interface with unicode.
|
|
507 @safe pure unittest
|
|
508 {
|
|
509 import std.math : abs;
|
|
510
|
|
511 wstring str = "\U00010143Hello,65,63.63\nWorld,123,3673.562"w;
|
|
512 struct Layout
|
|
513 {
|
|
514 string name;
|
|
515 int value;
|
|
516 double other;
|
|
517 }
|
|
518
|
|
519 Layout[2] ans;
|
|
520 ans[0].name = "\U00010143Hello";
|
|
521 ans[0].value = 65;
|
|
522 ans[0].other = 63.63;
|
|
523 ans[1].name = "World";
|
|
524 ans[1].value = 123;
|
|
525 ans[1].other = 3673.562;
|
|
526
|
|
527 auto records = csvReader!Layout(str);
|
|
528
|
|
529 int count;
|
|
530 foreach (record; records)
|
|
531 {
|
|
532 assert(ans[count].name == record.name);
|
|
533 assert(ans[count].value == record.value);
|
|
534 assert(abs(ans[count].other - record.other) < 0.00001);
|
|
535 count++;
|
|
536 }
|
|
537 assert(count == ans.length);
|
|
538 }
|
|
539
|
|
540 // Test input conversion interface
|
|
541 @safe pure unittest
|
|
542 {
|
|
543 import std.algorithm;
|
|
544 string str = `76,26,22`;
|
|
545 int[] ans = [76,26,22];
|
|
546 auto records = csvReader!int(str);
|
|
547
|
|
548 foreach (record; records)
|
|
549 {
|
|
550 assert(equal(record, ans));
|
|
551 }
|
|
552 }
|
|
553
|
|
554 // Test struct & header interface and same unicode
|
|
555 @safe unittest
|
|
556 {
|
|
557 import std.math : abs;
|
|
558
|
|
559 string str = "a,b,c\nHello,65,63.63\n➊➋➂❹,123,3673.562";
|
|
560 struct Layout
|
|
561 {
|
|
562 int value;
|
|
563 double other;
|
|
564 string name;
|
|
565 }
|
|
566
|
|
567 auto records = csvReader!Layout(str, ["b","c","a"]);
|
|
568
|
|
569 Layout[2] ans;
|
|
570 ans[0].name = "Hello";
|
|
571 ans[0].value = 65;
|
|
572 ans[0].other = 63.63;
|
|
573 ans[1].name = "➊➋➂❹";
|
|
574 ans[1].value = 123;
|
|
575 ans[1].other = 3673.562;
|
|
576
|
|
577 int count;
|
|
578 foreach (record; records)
|
|
579 {
|
|
580 assert(ans[count].name == record.name);
|
|
581 assert(ans[count].value == record.value);
|
|
582 assert(abs(ans[count].other - record.other) < 0.00001);
|
|
583 count++;
|
|
584 }
|
|
585 assert(count == ans.length);
|
|
586
|
|
587 }
|
|
588
|
|
589 // Test header interface
|
|
590 @safe unittest
|
|
591 {
|
|
592 import std.algorithm;
|
|
593
|
|
594 string str = "a,b,c\nHello,65,63.63\nWorld,123,3673.562";
|
|
595 auto records = csvReader!int(str, ["b"]);
|
|
596
|
|
597 auto ans = [[65],[123]];
|
|
598 foreach (record; records)
|
|
599 {
|
|
600 assert(equal(record, ans.front));
|
|
601 ans.popFront();
|
|
602 }
|
|
603
|
|
604 try
|
|
605 {
|
|
606 csvReader(str, ["c","b"]);
|
|
607 assert(0);
|
|
608 }
|
|
609 catch (HeaderMismatchException e)
|
|
610 {
|
|
611 assert(e.col == 2);
|
|
612 }
|
|
613 auto records2 = csvReader!(string,Malformed.ignore)
|
|
614 (str, ["b","a"], ',', '"');
|
|
615
|
|
616 auto ans2 = [["Hello","65"],["World","123"]];
|
|
617 foreach (record; records2)
|
|
618 {
|
|
619 assert(equal(record, ans2.front));
|
|
620 ans2.popFront();
|
|
621 }
|
|
622
|
|
623 str = "a,c,e\nJoe,Carpenter,300000\nFred,Fly,4";
|
|
624 records2 = csvReader!(string,Malformed.ignore)
|
|
625 (str, ["a","b","c","d"], ',', '"');
|
|
626
|
|
627 ans2 = [["Joe","Carpenter"],["Fred","Fly"]];
|
|
628 foreach (record; records2)
|
|
629 {
|
|
630 assert(equal(record, ans2.front));
|
|
631 ans2.popFront();
|
|
632 }
|
|
633 }
|
|
634
|
|
635 // Test null header interface
|
|
636 @safe unittest
|
|
637 {
|
|
638 string str = "a,b,c\nHello,65,63.63\nWorld,123,3673.562";
|
|
639 auto records = csvReader(str, ["a"]);
|
|
640
|
|
641 assert(records.header == ["a","b","c"]);
|
|
642 }
|
|
643
|
|
644 // Test unchecked read
|
|
645 @safe pure unittest
|
|
646 {
|
|
647 string str = "one \"quoted\"";
|
|
648 foreach (record; csvReader!(string,Malformed.ignore)(str))
|
|
649 {
|
|
650 foreach (cell; record)
|
|
651 {
|
|
652 assert(cell == "one \"quoted\"");
|
|
653 }
|
|
654 }
|
|
655
|
|
656 str = "one \"quoted\",two \"quoted\" end";
|
|
657 struct Ans
|
|
658 {
|
|
659 string a,b;
|
|
660 }
|
|
661 foreach (record; csvReader!(Ans,Malformed.ignore)(str))
|
|
662 {
|
|
663 assert(record.a == "one \"quoted\"");
|
|
664 assert(record.b == "two \"quoted\" end");
|
|
665 }
|
|
666 }
|
|
667
|
|
668 // Test partial data returned
|
|
669 @safe pure unittest
|
|
670 {
|
|
671 string str = "\"one\nnew line";
|
|
672
|
|
673 try
|
|
674 {
|
|
675 foreach (record; csvReader(str))
|
|
676 {}
|
|
677 assert(0);
|
|
678 }
|
|
679 catch (IncompleteCellException ice)
|
|
680 {
|
|
681 assert(ice.partialData == "one\nnew line");
|
|
682 }
|
|
683 }
|
|
684
|
|
685 // Test Windows line break
|
|
686 @safe pure unittest
|
|
687 {
|
|
688 string str = "one,two\r\nthree";
|
|
689
|
|
690 auto records = csvReader(str);
|
|
691 auto record = records.front;
|
|
692 assert(record.front == "one");
|
|
693 record.popFront();
|
|
694 assert(record.front == "two");
|
|
695 records.popFront();
|
|
696 record = records.front;
|
|
697 assert(record.front == "three");
|
|
698 }
|
|
699
|
|
700
|
|
701 // Test associative array support with unicode separator
|
|
702 @safe unittest
|
|
703 {
|
|
704 string str = "1❁2❁3\n34❁65❁63\n34❁65❁63";
|
|
705
|
|
706 auto records = csvReader!(string[string])(str,["3","1"],'❁');
|
|
707 int count;
|
|
708 foreach (record; records)
|
|
709 {
|
|
710 count++;
|
|
711 assert(record["1"] == "34");
|
|
712 assert(record["3"] == "63");
|
|
713 }
|
|
714 assert(count == 2);
|
|
715 }
|
|
716
|
|
717 // Test restricted range
|
|
718 @safe unittest
|
|
719 {
|
|
720 import std.typecons;
|
|
721 struct InputRange
|
|
722 {
|
|
723 dstring text;
|
|
724
|
|
725 this(dstring txt)
|
|
726 {
|
|
727 text = txt;
|
|
728 }
|
|
729
|
|
730 @property auto empty()
|
|
731 {
|
|
732 return text.empty;
|
|
733 }
|
|
734
|
|
735 void popFront()
|
|
736 {
|
|
737 text.popFront();
|
|
738 }
|
|
739
|
|
740 @property dchar front()
|
|
741 {
|
|
742 return text[0];
|
|
743 }
|
|
744 }
|
|
745 auto ir = InputRange("Name,Occupation,Salary\r"d~
|
|
746 "Joe,Carpenter,300000\nFred,Blacksmith,400000\r\n"d);
|
|
747
|
|
748 foreach (record; csvReader(ir, cast(string[]) null))
|
|
749 foreach (cell; record) {}
|
|
750 foreach (record; csvReader!(Tuple!(string, string, int))
|
|
751 (ir,cast(string[]) null)) {}
|
|
752 foreach (record; csvReader!(string[string])
|
|
753 (ir,cast(string[]) null)) {}
|
|
754 }
|
|
755
|
|
756 @safe unittest // const/immutable dchars
|
|
757 {
|
|
758 import std.algorithm.iteration : map;
|
|
759 import std.array : array;
|
|
760 const(dchar)[] c = "foo,bar\n";
|
|
761 assert(csvReader(c).map!array.array == [["foo", "bar"]]);
|
|
762 immutable(dchar)[] i = "foo,bar\n";
|
|
763 assert(csvReader(i).map!array.array == [["foo", "bar"]]);
|
|
764 }
|
|
765
|
|
766 /*
|
|
767 * This struct is stored on the heap for when the structures
|
|
768 * are passed around.
|
|
769 */
|
|
770 private pure struct Input(Range, Malformed ErrorLevel)
|
|
771 {
|
|
772 Range range;
|
|
773 size_t row, col;
|
|
774 static if (ErrorLevel == Malformed.throwException)
|
|
775 size_t rowLength;
|
|
776 }
|
|
777
|
|
778 /*
|
|
779 * Range for iterating CSV records.
|
|
780 *
|
|
781 * This range is returned by the $(LREF csvReader) functions. It can be
|
|
782 * created in a similar manner to allow $(D ErrorLevel) be set to $(LREF
|
|
783 * Malformed).ignore if best guess processing should take place.
|
|
784 */
|
|
785 private struct CsvReader(Contents, Malformed ErrorLevel, Range, Separator, Header)
|
|
786 if (isSomeChar!Separator && isInputRange!Range
|
|
787 && is(Unqual!(ElementType!Range) == dchar)
|
|
788 && isForwardRange!Header && isSomeString!(ElementType!Header))
|
|
789 {
|
|
790 private:
|
|
791 Input!(Range, ErrorLevel)* _input;
|
|
792 Separator _separator;
|
|
793 Separator _quote;
|
|
794 size_t[] indices;
|
|
795 bool _empty;
|
|
796 static if (is(Contents == struct) || is(Contents == class))
|
|
797 {
|
|
798 Contents recordContent;
|
|
799 CsvRecord!(string, ErrorLevel, Range, Separator) recordRange;
|
|
800 }
|
|
801 else static if (is(Contents T : T[U], U : string))
|
|
802 {
|
|
803 Contents recordContent;
|
|
804 CsvRecord!(T, ErrorLevel, Range, Separator) recordRange;
|
|
805 }
|
|
806 else
|
|
807 CsvRecord!(Contents, ErrorLevel, Range, Separator) recordRange;
|
|
808 public:
|
|
809 /**
|
|
810 * Header from the input in array form.
|
|
811 *
|
|
812 * -------
|
|
813 * string str = "a,b,c\nHello,65,63.63";
|
|
814 * auto records = csvReader(str, ["a"]);
|
|
815 *
|
|
816 * assert(records.header == ["a","b","c"]);
|
|
817 * -------
|
|
818 */
|
|
819 string[] header;
|
|
820
|
|
821 /**
|
|
822 * Constructor to initialize the input, delimiter and quote for input
|
|
823 * without a header.
|
|
824 *
|
|
825 * -------
|
|
826 * string str = `76;^26^;22`;
|
|
827 * int[] ans = [76,26,22];
|
|
828 * auto records = CsvReader!(int,Malformed.ignore,string,char,string[])
|
|
829 * (str, ';', '^');
|
|
830 *
|
|
831 * foreach (record; records)
|
|
832 * {
|
|
833 * assert(equal(record, ans));
|
|
834 * }
|
|
835 * -------
|
|
836 */
|
|
837 this(Range input, Separator delimiter, Separator quote)
|
|
838 {
|
|
839 _input = new Input!(Range, ErrorLevel)(input);
|
|
840 _separator = delimiter;
|
|
841 _quote = quote;
|
|
842
|
|
843 prime();
|
|
844 }
|
|
845
|
|
846 /**
|
|
847 * Constructor to initialize the input, delimiter and quote for input
|
|
848 * with a header.
|
|
849 *
|
|
850 * -------
|
|
851 * string str = `high;mean;low\n76;^26^;22`;
|
|
852 * auto records = CsvReader!(int,Malformed.ignore,string,char,string[])
|
|
853 * (str, ["high","low"], ';', '^');
|
|
854 *
|
|
855 * int[] ans = [76,22];
|
|
856 * foreach (record; records)
|
|
857 * {
|
|
858 * assert(equal(record, ans));
|
|
859 * }
|
|
860 * -------
|
|
861 *
|
|
862 * Throws:
|
|
863 * $(LREF HeaderMismatchException) when a header is provided but a
|
|
864 * matching column is not found or the order did not match that found
|
|
865 * in the input (non-struct).
|
|
866 */
|
|
867 this(Range input, Header colHeaders, Separator delimiter, Separator quote)
|
|
868 {
|
|
869 _input = new Input!(Range, ErrorLevel)(input);
|
|
870 _separator = delimiter;
|
|
871 _quote = quote;
|
|
872
|
|
873 size_t[string] colToIndex;
|
|
874 foreach (h; colHeaders)
|
|
875 {
|
|
876 colToIndex[h] = size_t.max;
|
|
877 }
|
|
878
|
|
879 auto r = CsvRecord!(string, ErrorLevel, Range, Separator)
|
|
880 (_input, _separator, _quote, indices);
|
|
881
|
|
882 size_t colIndex;
|
|
883 foreach (col; r)
|
|
884 {
|
|
885 header ~= col;
|
|
886 auto ptr = col in colToIndex;
|
|
887 if (ptr)
|
|
888 *ptr = colIndex;
|
|
889 colIndex++;
|
|
890 }
|
|
891 // The above loop empties the header row.
|
|
892 recordRange._empty = true;
|
|
893
|
|
894 indices.length = colToIndex.length;
|
|
895 int i;
|
|
896 foreach (h; colHeaders)
|
|
897 {
|
|
898 immutable index = colToIndex[h];
|
|
899 static if (ErrorLevel != Malformed.ignore)
|
|
900 if (index == size_t.max)
|
|
901 throw new HeaderMismatchException
|
|
902 ("Header not found: " ~ to!string(h));
|
|
903 indices[i++] = index;
|
|
904 }
|
|
905
|
|
906 static if (!is(Contents == struct) && !is(Contents == class))
|
|
907 {
|
|
908 static if (is(Contents T : T[U], U : string))
|
|
909 {
|
|
910 import std.algorithm.sorting : sort;
|
|
911 sort(indices);
|
|
912 }
|
|
913 else static if (ErrorLevel == Malformed.ignore)
|
|
914 {
|
|
915 import std.algorithm.sorting : sort;
|
|
916 sort(indices);
|
|
917 }
|
|
918 else
|
|
919 {
|
|
920 import std.algorithm.searching : findAdjacent;
|
|
921 import std.algorithm.sorting : isSorted;
|
|
922 if (!isSorted(indices))
|
|
923 {
|
|
924 auto ex = new HeaderMismatchException
|
|
925 ("Header in input does not match specified header.");
|
|
926 findAdjacent!"a > b"(indices);
|
|
927 ex.row = 1;
|
|
928 ex.col = indices.front;
|
|
929
|
|
930 throw ex;
|
|
931 }
|
|
932 }
|
|
933 }
|
|
934
|
|
935 popFront();
|
|
936 }
|
|
937
|
|
938 /**
|
|
939 * Part of an input range as defined by
|
|
940 * $(REF isInputRange, std,range,primitives).
|
|
941 *
|
|
942 * Returns:
|
|
943 * If $(D Contents) is a struct, will be filled with record data.
|
|
944 *
|
|
945 * If $(D Contents) is a class, will be filled with record data.
|
|
946 *
|
|
947 * If $(D Contents) is a associative array, will be filled
|
|
948 * with record data.
|
|
949 *
|
|
950 * If $(D Contents) is non-struct, a $(LREF CsvRecord) will be
|
|
951 * returned.
|
|
952 */
|
|
953 @property auto front()
|
|
954 {
|
|
955 assert(!empty);
|
|
956 static if (is(Contents == struct) || is(Contents == class))
|
|
957 {
|
|
958 return recordContent;
|
|
959 }
|
|
960 else static if (is(Contents T : T[U], U : string))
|
|
961 {
|
|
962 return recordContent;
|
|
963 }
|
|
964 else
|
|
965 {
|
|
966 return recordRange;
|
|
967 }
|
|
968 }
|
|
969
|
|
970 /**
|
|
971 * Part of an input range as defined by
|
|
972 * $(REF isInputRange, std,range,primitives).
|
|
973 */
|
|
974 @property bool empty() @safe @nogc pure nothrow const
|
|
975 {
|
|
976 return _empty;
|
|
977 }
|
|
978
|
|
979 /**
|
|
980 * Part of an input range as defined by
|
|
981 * $(REF isInputRange, std,range,primitives).
|
|
982 *
|
|
983 * Throws:
|
|
984 * $(LREF CSVException) When a quote is found in an unquoted field,
|
|
985 * data continues after a closing quote, the quoted field was not
|
|
986 * closed before data was empty, a conversion failed, or when the
|
|
987 * row's length does not match the previous length.
|
|
988 */
|
|
989 void popFront()
|
|
990 {
|
|
991 while (!recordRange.empty)
|
|
992 {
|
|
993 recordRange.popFront();
|
|
994 }
|
|
995
|
|
996 static if (ErrorLevel == Malformed.throwException)
|
|
997 if (_input.rowLength == 0)
|
|
998 _input.rowLength = _input.col;
|
|
999
|
|
1000 _input.col = 0;
|
|
1001
|
|
1002 if (!_input.range.empty)
|
|
1003 {
|
|
1004 if (_input.range.front == '\r')
|
|
1005 {
|
|
1006 _input.range.popFront();
|
|
1007 if (!_input.range.empty && _input.range.front == '\n')
|
|
1008 _input.range.popFront();
|
|
1009 }
|
|
1010 else if (_input.range.front == '\n')
|
|
1011 _input.range.popFront();
|
|
1012 }
|
|
1013
|
|
1014 if (_input.range.empty)
|
|
1015 {
|
|
1016 _empty = true;
|
|
1017 return;
|
|
1018 }
|
|
1019
|
|
1020 prime();
|
|
1021 }
|
|
1022
|
|
1023 private void prime()
|
|
1024 {
|
|
1025 if (_empty)
|
|
1026 return;
|
|
1027 _input.row++;
|
|
1028 static if (is(Contents == struct) || is(Contents == class))
|
|
1029 {
|
|
1030 recordRange = typeof(recordRange)
|
|
1031 (_input, _separator, _quote, null);
|
|
1032 }
|
|
1033 else
|
|
1034 {
|
|
1035 recordRange = typeof(recordRange)
|
|
1036 (_input, _separator, _quote, indices);
|
|
1037 }
|
|
1038
|
|
1039 static if (is(Contents T : T[U], U : string))
|
|
1040 {
|
|
1041 T[U] aa;
|
|
1042 try
|
|
1043 {
|
|
1044 for (; !recordRange.empty; recordRange.popFront())
|
|
1045 {
|
|
1046 aa[header[_input.col-1]] = recordRange.front;
|
|
1047 }
|
|
1048 }
|
|
1049 catch (ConvException e)
|
|
1050 {
|
|
1051 throw new CSVException(e.msg, _input.row, _input.col, e);
|
|
1052 }
|
|
1053
|
|
1054 recordContent = aa;
|
|
1055 }
|
|
1056 else static if (is(Contents == struct) || is(Contents == class))
|
|
1057 {
|
|
1058 static if (is(Contents == class))
|
|
1059 recordContent = new typeof(recordContent)();
|
|
1060 else
|
|
1061 recordContent = typeof(recordContent).init;
|
|
1062 size_t colIndex;
|
|
1063 try
|
|
1064 {
|
|
1065 for (; !recordRange.empty;)
|
|
1066 {
|
|
1067 auto colData = recordRange.front;
|
|
1068 scope(exit) colIndex++;
|
|
1069 if (indices.length > 0)
|
|
1070 {
|
|
1071 foreach (ti, ToType; Fields!(Contents))
|
|
1072 {
|
|
1073 if (indices[ti] == colIndex)
|
|
1074 {
|
|
1075 static if (!isSomeString!ToType) skipWS(colData);
|
|
1076 recordContent.tupleof[ti] = to!ToType(colData);
|
|
1077 }
|
|
1078 }
|
|
1079 }
|
|
1080 else
|
|
1081 {
|
|
1082 foreach (ti, ToType; Fields!(Contents))
|
|
1083 {
|
|
1084 if (ti == colIndex)
|
|
1085 {
|
|
1086 static if (!isSomeString!ToType) skipWS(colData);
|
|
1087 recordContent.tupleof[ti] = to!ToType(colData);
|
|
1088 }
|
|
1089 }
|
|
1090 }
|
|
1091 recordRange.popFront();
|
|
1092 }
|
|
1093 }
|
|
1094 catch (ConvException e)
|
|
1095 {
|
|
1096 throw new CSVException(e.msg, _input.row, colIndex, e);
|
|
1097 }
|
|
1098 }
|
|
1099 }
|
|
1100 }
|
|
1101
|
|
1102 @safe pure unittest
|
|
1103 {
|
|
1104 import std.algorithm.comparison : equal;
|
|
1105
|
|
1106 string str = `76;^26^;22`;
|
|
1107 int[] ans = [76,26,22];
|
|
1108 auto records = CsvReader!(int,Malformed.ignore,string,char,string[])
|
|
1109 (str, ';', '^');
|
|
1110
|
|
1111 foreach (record; records)
|
|
1112 {
|
|
1113 assert(equal(record, ans));
|
|
1114 }
|
|
1115 }
|
|
1116
|
|
1117 // Bugzilla 15545
|
|
1118 // @system due to the catch for Throwable
|
|
1119 @system pure unittest
|
|
1120 {
|
|
1121 import std.exception : assertNotThrown;
|
|
1122 enum failData =
|
|
1123 "name, surname, age
|
|
1124 Joe, Joker, 99\r";
|
|
1125 auto r = csvReader(failData);
|
|
1126 assertNotThrown((){foreach (entry; r){}}());
|
|
1127 }
|
|
1128
|
|
1129 /*
|
|
1130 * This input range is accessible through $(LREF CsvReader) when the
|
|
1131 * requested $(D Contents) type is neither a structure or an associative array.
|
|
1132 */
|
|
1133 private struct CsvRecord(Contents, Malformed ErrorLevel, Range, Separator)
|
|
1134 if (!is(Contents == class) && !is(Contents == struct))
|
|
1135 {
|
|
1136 import std.array : appender;
|
|
1137 private:
|
|
1138 Input!(Range, ErrorLevel)* _input;
|
|
1139 Separator _separator;
|
|
1140 Separator _quote;
|
|
1141 Contents curContentsoken;
|
|
1142 typeof(appender!(dchar[])()) _front;
|
|
1143 bool _empty;
|
|
1144 size_t[] _popCount;
|
|
1145 public:
|
|
1146 /*
|
|
1147 * Params:
|
|
1148 * input = Pointer to a character input range
|
|
1149 * delimiter = Separator for each column
|
|
1150 * quote = Character used for quotation
|
|
1151 * indices = An array containing which columns will be returned.
|
|
1152 * If empty, all columns are returned. List must be in order.
|
|
1153 */
|
|
1154 this(Input!(Range, ErrorLevel)* input, Separator delimiter,
|
|
1155 Separator quote, size_t[] indices)
|
|
1156 {
|
|
1157 _input = input;
|
|
1158 _separator = delimiter;
|
|
1159 _quote = quote;
|
|
1160 _front = appender!(dchar[])();
|
|
1161 _popCount = indices.dup;
|
|
1162
|
|
1163 // If a header was given, each call to popFront will need
|
|
1164 // to eliminate so many tokens. This calculates
|
|
1165 // how many will be skipped to get to the next header column
|
|
1166 size_t normalizer;
|
|
1167 foreach (ref c; _popCount)
|
|
1168 {
|
|
1169 static if (ErrorLevel == Malformed.ignore)
|
|
1170 {
|
|
1171 // If we are not throwing exceptions
|
|
1172 // a header may not exist, indices are sorted
|
|
1173 // and will be size_t.max if not found.
|
|
1174 if (c == size_t.max)
|
|
1175 break;
|
|
1176 }
|
|
1177 c -= normalizer;
|
|
1178 normalizer += c + 1;
|
|
1179 }
|
|
1180
|
|
1181 prime();
|
|
1182 }
|
|
1183
|
|
1184 /**
|
|
1185 * Part of an input range as defined by
|
|
1186 * $(REF isInputRange, std,range,primitives).
|
|
1187 */
|
|
1188 @property Contents front() @safe pure
|
|
1189 {
|
|
1190 assert(!empty);
|
|
1191 return curContentsoken;
|
|
1192 }
|
|
1193
|
|
1194 /**
|
|
1195 * Part of an input range as defined by
|
|
1196 * $(REF isInputRange, std,range,primitives).
|
|
1197 */
|
|
1198 @property bool empty() @safe pure nothrow @nogc const
|
|
1199 {
|
|
1200 return _empty;
|
|
1201 }
|
|
1202
|
|
1203 /*
|
|
1204 * CsvRecord is complete when input
|
|
1205 * is empty or starts with record break
|
|
1206 */
|
|
1207 private bool recordEnd()
|
|
1208 {
|
|
1209 if (_input.range.empty
|
|
1210 || _input.range.front == '\n'
|
|
1211 || _input.range.front == '\r')
|
|
1212 {
|
|
1213 return true;
|
|
1214 }
|
|
1215 return false;
|
|
1216 }
|
|
1217
|
|
1218
|
|
1219 /**
|
|
1220 * Part of an input range as defined by
|
|
1221 * $(REF isInputRange, std,range,primitives).
|
|
1222 *
|
|
1223 * Throws:
|
|
1224 * $(LREF CSVException) When a quote is found in an unquoted field,
|
|
1225 * data continues after a closing quote, the quoted field was not
|
|
1226 * closed before data was empty, a conversion failed, or when the
|
|
1227 * row's length does not match the previous length.
|
|
1228 */
|
|
1229 void popFront()
|
|
1230 {
|
|
1231 static if (ErrorLevel == Malformed.throwException)
|
|
1232 import std.format : format;
|
|
1233 // Skip last of record when header is depleted.
|
|
1234 if (_popCount.ptr && _popCount.empty)
|
|
1235 while (!recordEnd())
|
|
1236 {
|
|
1237 prime(1);
|
|
1238 }
|
|
1239
|
|
1240 if (recordEnd())
|
|
1241 {
|
|
1242 _empty = true;
|
|
1243 static if (ErrorLevel == Malformed.throwException)
|
|
1244 if (_input.rowLength != 0)
|
|
1245 if (_input.col != _input.rowLength)
|
|
1246 throw new CSVException(
|
|
1247 format("Row %s's length %s does not match "~
|
|
1248 "previous length of %s.", _input.row,
|
|
1249 _input.col, _input.rowLength));
|
|
1250 return;
|
|
1251 }
|
|
1252 else
|
|
1253 {
|
|
1254 static if (ErrorLevel == Malformed.throwException)
|
|
1255 if (_input.rowLength != 0)
|
|
1256 if (_input.col > _input.rowLength)
|
|
1257 throw new CSVException(
|
|
1258 format("Row %s's length %s does not match "~
|
|
1259 "previous length of %s.", _input.row,
|
|
1260 _input.col, _input.rowLength));
|
|
1261 }
|
|
1262
|
|
1263 // Separator is left on the end of input from the last call.
|
|
1264 // This cannot be moved to after the call to csvNextToken as
|
|
1265 // there may be an empty record after it.
|
|
1266 if (_input.range.front == _separator)
|
|
1267 _input.range.popFront();
|
|
1268
|
|
1269 _front.shrinkTo(0);
|
|
1270
|
|
1271 prime();
|
|
1272 }
|
|
1273
|
|
1274 /*
|
|
1275 * Handles moving to the next skipNum token.
|
|
1276 */
|
|
1277 private void prime(size_t skipNum)
|
|
1278 {
|
|
1279 foreach (i; 0 .. skipNum)
|
|
1280 {
|
|
1281 _input.col++;
|
|
1282 _front.shrinkTo(0);
|
|
1283 if (_input.range.front == _separator)
|
|
1284 _input.range.popFront();
|
|
1285
|
|
1286 try
|
|
1287 csvNextToken!(Range, ErrorLevel, Separator)
|
|
1288 (_input.range, _front, _separator, _quote,false);
|
|
1289 catch (IncompleteCellException ice)
|
|
1290 {
|
|
1291 ice.row = _input.row;
|
|
1292 ice.col = _input.col;
|
|
1293 ice.partialData = _front.data.idup;
|
|
1294 throw ice;
|
|
1295 }
|
|
1296 catch (ConvException e)
|
|
1297 {
|
|
1298 throw new CSVException(e.msg, _input.row, _input.col, e);
|
|
1299 }
|
|
1300 }
|
|
1301 }
|
|
1302
|
|
1303 private void prime()
|
|
1304 {
|
|
1305 try
|
|
1306 {
|
|
1307 _input.col++;
|
|
1308 csvNextToken!(Range, ErrorLevel, Separator)
|
|
1309 (_input.range, _front, _separator, _quote,false);
|
|
1310 }
|
|
1311 catch (IncompleteCellException ice)
|
|
1312 {
|
|
1313 ice.row = _input.row;
|
|
1314 ice.col = _input.col;
|
|
1315 ice.partialData = _front.data.idup;
|
|
1316 throw ice;
|
|
1317 }
|
|
1318
|
|
1319 auto skipNum = _popCount.empty ? 0 : _popCount.front;
|
|
1320 if (!_popCount.empty)
|
|
1321 _popCount.popFront();
|
|
1322
|
|
1323 if (skipNum == size_t.max)
|
|
1324 {
|
|
1325 while (!recordEnd())
|
|
1326 prime(1);
|
|
1327 _empty = true;
|
|
1328 return;
|
|
1329 }
|
|
1330
|
|
1331 if (skipNum)
|
|
1332 prime(skipNum);
|
|
1333
|
|
1334 auto data = _front.data;
|
|
1335 static if (!isSomeString!Contents) skipWS(data);
|
|
1336 try curContentsoken = to!Contents(data);
|
|
1337 catch (ConvException e)
|
|
1338 {
|
|
1339 throw new CSVException(e.msg, _input.row, _input.col, e);
|
|
1340 }
|
|
1341 }
|
|
1342 }
|
|
1343
|
|
1344 /**
|
|
1345 * Lower level control over parsing CSV
|
|
1346 *
|
|
1347 * This function consumes the input. After each call the input will
|
|
1348 * start with either a delimiter or record break (\n, \r\n, \r) which
|
|
1349 * must be removed for subsequent calls.
|
|
1350 *
|
|
1351 * Params:
|
|
1352 * input = Any CSV input
|
|
1353 * ans = The first field in the input
|
|
1354 * sep = The character to represent a comma in the specification
|
|
1355 * quote = The character to represent a quote in the specification
|
|
1356 * startQuoted = Whether the input should be considered to already be in
|
|
1357 * quotes
|
|
1358 *
|
|
1359 * Throws:
|
|
1360 * $(LREF IncompleteCellException) When a quote is found in an unquoted
|
|
1361 * field, data continues after a closing quote, or the quoted field was
|
|
1362 * not closed before data was empty.
|
|
1363 */
|
|
1364 void csvNextToken(Range, Malformed ErrorLevel = Malformed.throwException,
|
|
1365 Separator, Output)
|
|
1366 (ref Range input, ref Output ans,
|
|
1367 Separator sep, Separator quote,
|
|
1368 bool startQuoted = false)
|
|
1369 if (isSomeChar!Separator && isInputRange!Range
|
|
1370 && is(Unqual!(ElementType!Range) == dchar)
|
|
1371 && isOutputRange!(Output, dchar))
|
|
1372 {
|
|
1373 bool quoted = startQuoted;
|
|
1374 bool escQuote;
|
|
1375 if (input.empty)
|
|
1376 return;
|
|
1377
|
|
1378 if (input.front == '\n')
|
|
1379 return;
|
|
1380 if (input.front == '\r')
|
|
1381 return;
|
|
1382
|
|
1383 if (input.front == quote)
|
|
1384 {
|
|
1385 quoted = true;
|
|
1386 input.popFront();
|
|
1387 }
|
|
1388
|
|
1389 while (!input.empty)
|
|
1390 {
|
|
1391 assert(!(quoted && escQuote));
|
|
1392 if (!quoted)
|
|
1393 {
|
|
1394 // When not quoted the token ends at sep
|
|
1395 if (input.front == sep)
|
|
1396 break;
|
|
1397 if (input.front == '\r')
|
|
1398 break;
|
|
1399 if (input.front == '\n')
|
|
1400 break;
|
|
1401 }
|
|
1402 if (!quoted && !escQuote)
|
|
1403 {
|
|
1404 if (input.front == quote)
|
|
1405 {
|
|
1406 // Not quoted, but quote found
|
|
1407 static if (ErrorLevel == Malformed.throwException)
|
|
1408 throw new IncompleteCellException(
|
|
1409 "Quote located in unquoted token");
|
|
1410 else static if (ErrorLevel == Malformed.ignore)
|
|
1411 ans.put(quote);
|
|
1412 }
|
|
1413 else
|
|
1414 {
|
|
1415 // Not quoted, non-quote character
|
|
1416 ans.put(input.front);
|
|
1417 }
|
|
1418 }
|
|
1419 else
|
|
1420 {
|
|
1421 if (input.front == quote)
|
|
1422 {
|
|
1423 // Quoted, quote found
|
|
1424 // By turning off quoted and turning on escQuote
|
|
1425 // I can tell when to add a quote to the string
|
|
1426 // escQuote is turned to false when it escapes a
|
|
1427 // quote or is followed by a non-quote (see outside else).
|
|
1428 // They are mutually exclusive, but provide different
|
|
1429 // information.
|
|
1430 if (escQuote)
|
|
1431 {
|
|
1432 escQuote = false;
|
|
1433 quoted = true;
|
|
1434 ans.put(quote);
|
|
1435 } else
|
|
1436 {
|
|
1437 escQuote = true;
|
|
1438 quoted = false;
|
|
1439 }
|
|
1440 }
|
|
1441 else
|
|
1442 {
|
|
1443 // Quoted, non-quote character
|
|
1444 if (escQuote)
|
|
1445 {
|
|
1446 static if (ErrorLevel == Malformed.throwException)
|
|
1447 throw new IncompleteCellException(
|
|
1448 "Content continues after end quote, " ~
|
|
1449 "or needs to be escaped.");
|
|
1450 else static if (ErrorLevel == Malformed.ignore)
|
|
1451 break;
|
|
1452 }
|
|
1453 ans.put(input.front);
|
|
1454 }
|
|
1455 }
|
|
1456 input.popFront();
|
|
1457 }
|
|
1458
|
|
1459 static if (ErrorLevel == Malformed.throwException)
|
|
1460 if (quoted && (input.empty || input.front == '\n' || input.front == '\r'))
|
|
1461 throw new IncompleteCellException(
|
|
1462 "Data continues on future lines or trailing quote");
|
|
1463
|
|
1464 }
|
|
1465
|
|
1466 ///
|
|
1467 @safe unittest
|
|
1468 {
|
|
1469 import std.array : appender;
|
|
1470 import std.range.primitives : popFront;
|
|
1471
|
|
1472 string str = "65,63\n123,3673";
|
|
1473
|
|
1474 auto a = appender!(char[])();
|
|
1475
|
|
1476 csvNextToken(str,a,',','"');
|
|
1477 assert(a.data == "65");
|
|
1478 assert(str == ",63\n123,3673");
|
|
1479
|
|
1480 str.popFront();
|
|
1481 a.shrinkTo(0);
|
|
1482 csvNextToken(str,a,',','"');
|
|
1483 assert(a.data == "63");
|
|
1484 assert(str == "\n123,3673");
|
|
1485
|
|
1486 str.popFront();
|
|
1487 a.shrinkTo(0);
|
|
1488 csvNextToken(str,a,',','"');
|
|
1489 assert(a.data == "123");
|
|
1490 assert(str == ",3673");
|
|
1491 }
|
|
1492
|
|
1493 // Test csvNextToken on simplest form and correct format.
|
|
1494 @safe pure unittest
|
|
1495 {
|
|
1496 import std.array;
|
|
1497
|
|
1498 string str = "\U00010143Hello,65,63.63\nWorld,123,3673.562";
|
|
1499
|
|
1500 auto a = appender!(dchar[])();
|
|
1501 csvNextToken!string(str,a,',','"');
|
|
1502 assert(a.data == "\U00010143Hello");
|
|
1503 assert(str == ",65,63.63\nWorld,123,3673.562");
|
|
1504
|
|
1505 str.popFront();
|
|
1506 a.shrinkTo(0);
|
|
1507 csvNextToken(str,a,',','"');
|
|
1508 assert(a.data == "65");
|
|
1509 assert(str == ",63.63\nWorld,123,3673.562");
|
|
1510
|
|
1511 str.popFront();
|
|
1512 a.shrinkTo(0);
|
|
1513 csvNextToken(str,a,',','"');
|
|
1514 assert(a.data == "63.63");
|
|
1515 assert(str == "\nWorld,123,3673.562");
|
|
1516
|
|
1517 str.popFront();
|
|
1518 a.shrinkTo(0);
|
|
1519 csvNextToken(str,a,',','"');
|
|
1520 assert(a.data == "World");
|
|
1521 assert(str == ",123,3673.562");
|
|
1522
|
|
1523 str.popFront();
|
|
1524 a.shrinkTo(0);
|
|
1525 csvNextToken(str,a,',','"');
|
|
1526 assert(a.data == "123");
|
|
1527 assert(str == ",3673.562");
|
|
1528
|
|
1529 str.popFront();
|
|
1530 a.shrinkTo(0);
|
|
1531 csvNextToken(str,a,',','"');
|
|
1532 assert(a.data == "3673.562");
|
|
1533 assert(str == "");
|
|
1534 }
|
|
1535
|
|
1536 // Test quoted tokens
|
|
1537 @safe pure unittest
|
|
1538 {
|
|
1539 import std.array;
|
|
1540
|
|
1541 string str = `one,two,"three ""quoted""","",` ~ "\"five\nnew line\"\nsix";
|
|
1542
|
|
1543 auto a = appender!(dchar[])();
|
|
1544 csvNextToken!string(str,a,',','"');
|
|
1545 assert(a.data == "one");
|
|
1546 assert(str == `,two,"three ""quoted""","",` ~ "\"five\nnew line\"\nsix");
|
|
1547
|
|
1548 str.popFront();
|
|
1549 a.shrinkTo(0);
|
|
1550 csvNextToken(str,a,',','"');
|
|
1551 assert(a.data == "two");
|
|
1552 assert(str == `,"three ""quoted""","",` ~ "\"five\nnew line\"\nsix");
|
|
1553
|
|
1554 str.popFront();
|
|
1555 a.shrinkTo(0);
|
|
1556 csvNextToken(str,a,',','"');
|
|
1557 assert(a.data == "three \"quoted\"");
|
|
1558 assert(str == `,"",` ~ "\"five\nnew line\"\nsix");
|
|
1559
|
|
1560 str.popFront();
|
|
1561 a.shrinkTo(0);
|
|
1562 csvNextToken(str,a,',','"');
|
|
1563 assert(a.data == "");
|
|
1564 assert(str == ",\"five\nnew line\"\nsix");
|
|
1565
|
|
1566 str.popFront();
|
|
1567 a.shrinkTo(0);
|
|
1568 csvNextToken(str,a,',','"');
|
|
1569 assert(a.data == "five\nnew line");
|
|
1570 assert(str == "\nsix");
|
|
1571
|
|
1572 str.popFront();
|
|
1573 a.shrinkTo(0);
|
|
1574 csvNextToken(str,a,',','"');
|
|
1575 assert(a.data == "six");
|
|
1576 assert(str == "");
|
|
1577 }
|
|
1578
|
|
1579 // Test empty data is pulled at end of record.
|
|
1580 @safe pure unittest
|
|
1581 {
|
|
1582 import std.array;
|
|
1583
|
|
1584 string str = "one,";
|
|
1585 auto a = appender!(dchar[])();
|
|
1586 csvNextToken(str,a,',','"');
|
|
1587 assert(a.data == "one");
|
|
1588 assert(str == ",");
|
|
1589
|
|
1590 a.shrinkTo(0);
|
|
1591 csvNextToken(str,a,',','"');
|
|
1592 assert(a.data == "");
|
|
1593 }
|
|
1594
|
|
1595 // Test exceptions
|
|
1596 @safe pure unittest
|
|
1597 {
|
|
1598 import std.array;
|
|
1599
|
|
1600 string str = "\"one\nnew line";
|
|
1601
|
|
1602 typeof(appender!(dchar[])()) a;
|
|
1603 try
|
|
1604 {
|
|
1605 a = appender!(dchar[])();
|
|
1606 csvNextToken(str,a,',','"');
|
|
1607 assert(0);
|
|
1608 }
|
|
1609 catch (IncompleteCellException ice)
|
|
1610 {
|
|
1611 assert(a.data == "one\nnew line");
|
|
1612 assert(str == "");
|
|
1613 }
|
|
1614
|
|
1615 str = "Hello world\"";
|
|
1616
|
|
1617 try
|
|
1618 {
|
|
1619 a = appender!(dchar[])();
|
|
1620 csvNextToken(str,a,',','"');
|
|
1621 assert(0);
|
|
1622 }
|
|
1623 catch (IncompleteCellException ice)
|
|
1624 {
|
|
1625 assert(a.data == "Hello world");
|
|
1626 assert(str == "\"");
|
|
1627 }
|
|
1628
|
|
1629 str = "one, two \"quoted\" end";
|
|
1630
|
|
1631 a = appender!(dchar[])();
|
|
1632 csvNextToken!(string,Malformed.ignore)(str,a,',','"');
|
|
1633 assert(a.data == "one");
|
|
1634 str.popFront();
|
|
1635 a.shrinkTo(0);
|
|
1636 csvNextToken!(string,Malformed.ignore)(str,a,',','"');
|
|
1637 assert(a.data == " two \"quoted\" end");
|
|
1638 }
|
|
1639
|
|
1640 // Test modifying token delimiter
|
|
1641 @safe pure unittest
|
|
1642 {
|
|
1643 import std.array;
|
|
1644
|
|
1645 string str = `one|two|/three "quoted"/|//`;
|
|
1646
|
|
1647 auto a = appender!(dchar[])();
|
|
1648 csvNextToken(str,a, '|','/');
|
|
1649 assert(a.data == "one"d);
|
|
1650 assert(str == `|two|/three "quoted"/|//`);
|
|
1651
|
|
1652 str.popFront();
|
|
1653 a.shrinkTo(0);
|
|
1654 csvNextToken(str,a, '|','/');
|
|
1655 assert(a.data == "two"d);
|
|
1656 assert(str == `|/three "quoted"/|//`);
|
|
1657
|
|
1658 str.popFront();
|
|
1659 a.shrinkTo(0);
|
|
1660 csvNextToken(str,a, '|','/');
|
|
1661 assert(a.data == `three "quoted"`);
|
|
1662 assert(str == `|//`);
|
|
1663
|
|
1664 str.popFront();
|
|
1665 a.shrinkTo(0);
|
|
1666 csvNextToken(str,a, '|','/');
|
|
1667 assert(a.data == ""d);
|
|
1668 }
|
|
1669
|
|
1670 // Bugzilla 8908
|
|
1671 @safe pure unittest
|
|
1672 {
|
|
1673 string csv = ` 1.0, 2.0, 3.0
|
|
1674 4.0, 5.0, 6.0`;
|
|
1675
|
|
1676 static struct Data { real a, b, c; }
|
|
1677 size_t i = 0;
|
|
1678 foreach (data; csvReader!Data(csv)) with (data)
|
|
1679 {
|
|
1680 int[] row = [cast(int) a, cast(int) b, cast(int) c];
|
|
1681 if (i == 0)
|
|
1682 assert(row == [1, 2, 3]);
|
|
1683 else
|
|
1684 assert(row == [4, 5, 6]);
|
|
1685 ++i;
|
|
1686 }
|
|
1687
|
|
1688 i = 0;
|
|
1689 foreach (data; csvReader!real(csv))
|
|
1690 {
|
|
1691 auto a = data.front; data.popFront();
|
|
1692 auto b = data.front; data.popFront();
|
|
1693 auto c = data.front;
|
|
1694 int[] row = [cast(int) a, cast(int) b, cast(int) c];
|
|
1695 if (i == 0)
|
|
1696 assert(row == [1, 2, 3]);
|
|
1697 else
|
|
1698 assert(row == [4, 5, 6]);
|
|
1699 ++i;
|
|
1700 }
|
|
1701 }
|