annotate libphobos/src/std/csv.d @ 158:494b0b89df80 default tip

...
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Mon, 25 May 2020 18:13:55 +0900
parents 1830386684a0
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
145
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1 //Written in the D programming language
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
2
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
3 /**
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
4 * Implements functionality to read Comma Separated Values and its variants
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
5 * from an input range of $(D dchar).
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
6 *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
7 * Comma Separated Values provide a simple means to transfer and store
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
8 * tabular data. It has been common for programs to use their own
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
9 * variant of the CSV format. This parser will loosely follow the
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
10 * $(HTTP tools.ietf.org/html/rfc4180, RFC-4180). CSV input should adhere
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
11 * to the following criteria (differences from RFC-4180 in parentheses):
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
12 *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
13 * $(UL
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
14 * $(LI A record is separated by a new line (CRLF,LF,CR))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
15 * $(LI A final record may end with a new line)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
16 * $(LI A header may be provided as the first record in input)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
17 * $(LI A record has fields separated by a comma (customizable))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
18 * $(LI A field containing new lines, commas, or double quotes
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
19 * should be enclosed in double quotes (customizable))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
20 * $(LI Double quotes in a field are escaped with a double quote)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
21 * $(LI Each record should contain the same number of fields)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
22 * )
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
23 *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
24 * Example:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
25 *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
26 * -------
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
27 * import std.algorithm;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
28 * import std.array;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
29 * import std.csv;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
30 * import std.stdio;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
31 * import std.typecons;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
32 *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
33 * void main()
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
34 * {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
35 * auto text = "Joe,Carpenter,300000\nFred,Blacksmith,400000\r\n";
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
36 *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
37 * foreach (record; csvReader!(Tuple!(string, string, int))(text))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
38 * {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
39 * writefln("%s works as a %s and earns $%d per year",
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
40 * record[0], record[1], record[2]);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
41 * }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
42 *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
43 * // To read the same string from the file "filename.csv":
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
44 *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
45 * auto file = File("filename.csv", "r");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
46 * foreach (record;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
47 * file.byLine.joiner("\n").csvReader!(Tuple!(string, string, int)))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
48 * {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
49 * writefln("%s works as a %s and earns $%d per year",
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
50 * record[0], record[1], record[2]);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
51 * }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
52 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
53 * }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
54 * -------
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
55 *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
56 * When an input contains a header the $(D Contents) can be specified as an
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
57 * associative array. Passing null to signify that a header is present.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
58 *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
59 * -------
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
60 * auto text = "Name,Occupation,Salary\r"
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
61 * "Joe,Carpenter,300000\nFred,Blacksmith,400000\r\n";
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
62 *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
63 * foreach (record; csvReader!(string[string])
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
64 * (text, null))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
65 * {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
66 * writefln("%s works as a %s and earns $%s per year.",
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
67 * record["Name"], record["Occupation"],
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
68 * record["Salary"]);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
69 * }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
70 * -------
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
71 *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
72 * This module allows content to be iterated by record stored in a struct,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
73 * class, associative array, or as a range of fields. Upon detection of an
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
74 * error an CSVException is thrown (can be disabled). csvNextToken has been
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
75 * made public to allow for attempted recovery.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
76 *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
77 * Disabling exceptions will lift many restrictions specified above. A quote
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
78 * can appear in a field if the field was not quoted. If in a quoted field any
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
79 * quote by itself, not at the end of a field, will end processing for that
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
80 * field. The field is ended when there is no input, even if the quote was not
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
81 * closed.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
82 *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
83 * See_Also:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
84 * $(HTTP en.wikipedia.org/wiki/Comma-separated_values, Wikipedia
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
85 * Comma-separated values)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
86 *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
87 * Copyright: Copyright 2011
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
88 * License: $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
89 * Authors: Jesse Phillips
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
90 * Source: $(PHOBOSSRC std/_csv.d)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
91 */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
92 module std.csv;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
93
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
94 import std.conv;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
95 import std.exception; // basicExceptionCtors
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
96 import std.range.primitives;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
97 import std.traits;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
98
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
99 /**
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
100 * Exception containing the row and column for when an exception was thrown.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
101 *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
102 * Numbering of both row and col start at one and corresponds to the location
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
103 * in the file rather than any specified header. Special consideration should
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
104 * be made when there is failure to match the header see $(LREF
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
105 * HeaderMismatchException) for details.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
106 *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
107 * When performing type conversions, $(REF ConvException, std,conv) is stored in
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
108 * the $(D next) field.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
109 */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
110 class CSVException : Exception
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
111 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
112 ///
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
113 size_t row, col;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
114
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
115 // FIXME: Use std.exception.basicExceptionCtors here once bug #11500 is fixed
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
116
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
117 this(string msg, string file = __FILE__, size_t line = __LINE__,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
118 Throwable next = null) @nogc @safe pure nothrow
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
119 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
120 super(msg, file, line, next);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
121 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
122
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
123 this(string msg, Throwable next, string file = __FILE__,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
124 size_t line = __LINE__) @nogc @safe pure nothrow
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
125 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
126 super(msg, file, line, next);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
127 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
128
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
129 this(string msg, size_t row, size_t col, Throwable next = null,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
130 string file = __FILE__, size_t line = __LINE__) @nogc @safe pure nothrow
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
131 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
132 super(msg, next, file, line);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
133 this.row = row;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
134 this.col = col;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
135 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
136
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
137 override string toString() @safe pure const
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
138 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
139 return "(Row: " ~ to!string(row) ~
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
140 ", Col: " ~ to!string(col) ~ ") " ~ msg;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
141 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
142 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
143
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
144 @safe pure unittest
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
145 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
146 import std.string;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
147 auto e1 = new Exception("Foobar");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
148 auto e2 = new CSVException("args", e1);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
149 assert(e2.next is e1);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
150
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
151 size_t r = 13;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
152 size_t c = 37;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
153
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
154 auto e3 = new CSVException("argv", r, c);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
155 assert(e3.row == r);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
156 assert(e3.col == c);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
157
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
158 auto em = e3.toString();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
159 assert(em.indexOf("13") != -1);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
160 assert(em.indexOf("37") != -1);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
161 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
162
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
163 /**
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
164 * Exception thrown when a Token is identified to not be completed: a quote is
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
165 * found in an unquoted field, data continues after a closing quote, or the
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
166 * quoted field was not closed before data was empty.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
167 */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
168 class IncompleteCellException : CSVException
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
169 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
170 /**
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
171 * Data pulled from input before finding a problem
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
172 *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
173 * This field is populated when using $(LREF csvReader)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
174 * but not by $(LREF csvNextToken) as this data will have
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
175 * already been fed to the output range.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
176 */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
177 dstring partialData;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
178
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
179 mixin basicExceptionCtors;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
180 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
181
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
182 @safe pure unittest
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
183 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
184 auto e1 = new Exception("Foobar");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
185 auto e2 = new IncompleteCellException("args", e1);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
186 assert(e2.next is e1);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
187 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
188
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
189 /**
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
190 * Exception thrown under different conditions based on the type of $(D
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
191 * Contents).
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
192 *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
193 * Structure, Class, and Associative Array
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
194 * $(UL
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
195 * $(LI When a header is provided but a matching column is not found)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
196 * )
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
197 *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
198 * Other
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
199 * $(UL
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
200 * $(LI When a header is provided but a matching column is not found)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
201 * $(LI Order did not match that found in the input)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
202 * )
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
203 *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
204 * Since a row and column is not meaningful when a column specified by the
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
205 * header is not found in the data, both row and col will be zero. Otherwise
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
206 * row is always one and col is the first instance found in header that
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
207 * occurred before the previous starting at one.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
208 */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
209 class HeaderMismatchException : CSVException
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
210 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
211 mixin basicExceptionCtors;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
212 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
213
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
214 @safe pure unittest
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
215 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
216 auto e1 = new Exception("Foobar");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
217 auto e2 = new HeaderMismatchException("args", e1);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
218 assert(e2.next is e1);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
219 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
220
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
221 /**
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
222 * Determines the behavior for when an error is detected.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
223 *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
224 * Disabling exception will follow these rules:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
225 * $(UL
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
226 * $(LI A quote can appear in a field if the field was not quoted.)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
227 * $(LI If in a quoted field any quote by itself, not at the end of a
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
228 * field, will end processing for that field.)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
229 * $(LI The field is ended when there is no input, even if the quote was
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
230 * not closed.)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
231 * $(LI If the given header does not match the order in the input, the
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
232 * content will return as it is found in the input.)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
233 * $(LI If the given header contains columns not found in the input they
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
234 * will be ignored.)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
235 * )
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
236 */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
237 enum Malformed
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
238 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
239 ignore, /// No exceptions are thrown due to incorrect CSV.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
240 throwException /// Use exceptions when input has incorrect CSV.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
241 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
242
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
243 /**
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
244 * Returns an input range for iterating over records found in $(D
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
245 * input).
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
246 *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
247 * The $(D Contents) of the input can be provided if all the records are the
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
248 * same type such as all integer data:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
249 *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
250 * -------
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
251 * string str = `76,26,22`;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
252 * int[] ans = [76,26,22];
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
253 * auto records = csvReader!int(str);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
254 *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
255 * foreach (record; records)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
256 * {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
257 * assert(equal(record, ans));
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
258 * }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
259 * -------
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
260 *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
261 * Example using a struct with modified delimiter:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
262 *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
263 * -------
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
264 * string str = "Hello;65;63.63\nWorld;123;3673.562";
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
265 * struct Layout
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
266 * {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
267 * string name;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
268 * int value;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
269 * double other;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
270 * }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
271 *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
272 * auto records = csvReader!Layout(str,';');
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
273 *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
274 * foreach (record; records)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
275 * {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
276 * writeln(record.name);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
277 * writeln(record.value);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
278 * writeln(record.other);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
279 * }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
280 * -------
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
281 *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
282 * Specifying $(D ErrorLevel) as Malformed.ignore will lift restrictions
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
283 * on the format. This example shows that an exception is not thrown when
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
284 * finding a quote in a field not quoted.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
285 *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
286 * -------
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
287 * string str = "A \" is now part of the data";
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
288 * auto records = csvReader!(string,Malformed.ignore)(str);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
289 * auto record = records.front;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
290 *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
291 * assert(record.front == str);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
292 * -------
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
293 *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
294 * Returns:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
295 * An input range R as defined by
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
296 * $(REF isInputRange, std,range,primitives). When $(D Contents) is a
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
297 * struct, class, or an associative array, the element type of R is
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
298 * $(D Contents), otherwise the element type of R is itself a range with
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
299 * element type $(D Contents).
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
300 *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
301 * Throws:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
302 * $(LREF CSVException) When a quote is found in an unquoted field,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
303 * data continues after a closing quote, the quoted field was not
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
304 * closed before data was empty, a conversion failed, or when the row's
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
305 * length does not match the previous length.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
306 *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
307 * $(LREF HeaderMismatchException) when a header is provided but a
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
308 * matching column is not found or the order did not match that found in
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
309 * the input. Read the exception documentation for specific details of
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
310 * when the exception is thrown for different types of $(D Contents).
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
311 */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
312 auto csvReader(Contents = string,Malformed ErrorLevel = Malformed.throwException, Range, Separator = char)(Range input,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
313 Separator delimiter = ',', Separator quote = '"')
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
314 if (isInputRange!Range && is(Unqual!(ElementType!Range) == dchar)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
315 && isSomeChar!(Separator)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
316 && !is(Contents T : T[U], U : string))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
317 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
318 return CsvReader!(Contents,ErrorLevel,Range,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
319 Unqual!(ElementType!Range),string[])
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
320 (input, delimiter, quote);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
321 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
322
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
323 /**
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
324 * An optional $(D header) can be provided. The first record will be read in
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
325 * as the header. If $(D Contents) is a struct then the header provided is
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
326 * expected to correspond to the fields in the struct. When $(D Contents) is
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
327 * not a type which can contain the entire record, the $(D header) must be
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
328 * provided in the same order as the input or an exception is thrown.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
329 *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
330 * Read only column "b":
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
331 *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
332 * -------
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
333 * string str = "a,b,c\nHello,65,63.63\nWorld,123,3673.562";
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
334 * auto records = csvReader!int(str, ["b"]);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
335 *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
336 * auto ans = [[65],[123]];
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
337 * foreach (record; records)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
338 * {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
339 * assert(equal(record, ans.front));
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
340 * ans.popFront();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
341 * }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
342 * -------
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
343 *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
344 * Read from header of different order:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
345 *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
346 * -------
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
347 * string str = "a,b,c\nHello,65,63.63\nWorld,123,3673.562";
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
348 * struct Layout
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
349 * {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
350 * int value;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
351 * double other;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
352 * string name;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
353 * }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
354 *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
355 * auto records = csvReader!Layout(str, ["b","c","a"]);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
356 * -------
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
357 *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
358 * The header can also be left empty if the input contains a header but
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
359 * all columns should be iterated. The header from the input can always
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
360 * be accessed from the header field.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
361 *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
362 * -------
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
363 * string str = "a,b,c\nHello,65,63.63\nWorld,123,3673.562";
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
364 * auto records = csvReader(str, null);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
365 *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
366 * assert(records.header == ["a","b","c"]);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
367 * -------
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
368 *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
369 * Returns:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
370 * An input range R as defined by
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
371 * $(REF isInputRange, std,range,primitives). When $(D Contents) is a
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
372 * struct, class, or an associative array, the element type of R is
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
373 * $(D Contents), otherwise the element type of R is itself a range with
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
374 * element type $(D Contents).
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
375 *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
376 * The returned range provides a header field for accessing the header
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
377 * from the input in array form.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
378 *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
379 * -------
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
380 * string str = "a,b,c\nHello,65,63.63";
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
381 * auto records = csvReader(str, ["a"]);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
382 *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
383 * assert(records.header == ["a","b","c"]);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
384 * -------
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
385 *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
386 * Throws:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
387 * $(LREF CSVException) When a quote is found in an unquoted field,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
388 * data continues after a closing quote, the quoted field was not
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
389 * closed before data was empty, a conversion failed, or when the row's
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
390 * length does not match the previous length.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
391 *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
392 * $(LREF HeaderMismatchException) when a header is provided but a
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
393 * matching column is not found or the order did not match that found in
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
394 * the input. Read the exception documentation for specific details of
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
395 * when the exception is thrown for different types of $(D Contents).
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
396 */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
397 auto csvReader(Contents = string,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
398 Malformed ErrorLevel = Malformed.throwException,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
399 Range, Header, Separator = char)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
400 (Range input, Header header,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
401 Separator delimiter = ',', Separator quote = '"')
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
402 if (isInputRange!Range && is(Unqual!(ElementType!Range) == dchar)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
403 && isSomeChar!(Separator)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
404 && isForwardRange!Header
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
405 && isSomeString!(ElementType!Header))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
406 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
407 return CsvReader!(Contents,ErrorLevel,Range,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
408 Unqual!(ElementType!Range),Header)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
409 (input, header, delimiter, quote);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
410 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
411
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
412 ///
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
413 auto csvReader(Contents = string,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
414 Malformed ErrorLevel = Malformed.throwException,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
415 Range, Header, Separator = char)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
416 (Range input, Header header,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
417 Separator delimiter = ',', Separator quote = '"')
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
418 if (isInputRange!Range && is(Unqual!(ElementType!Range) == dchar)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
419 && isSomeChar!(Separator)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
420 && is(Header : typeof(null)))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
421 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
422 return CsvReader!(Contents,ErrorLevel,Range,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
423 Unqual!(ElementType!Range),string[])
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
424 (input, cast(string[]) null, delimiter, quote);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
425 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
426
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
427 // Test standard iteration over input.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
428 @safe pure unittest
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
429 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
430 string str = `one,"two ""quoted"""` ~ "\n\"three\nnew line\",\nfive,six";
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
431 auto records = csvReader(str);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
432
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
433 int count;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
434 foreach (record; records)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
435 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
436 foreach (cell; record)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
437 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
438 count++;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
439 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
440 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
441 assert(count == 6);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
442 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
443
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
444 // Test newline on last record
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
445 @safe pure unittest
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
446 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
447 string str = "one,two\nthree,four\n";
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
448 auto records = csvReader(str);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
449 records.popFront();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
450 records.popFront();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
451 assert(records.empty);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
452 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
453
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
454 // Test shorter row length
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
455 @safe pure unittest
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
456 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
457 wstring str = "one,1\ntwo\nthree"w;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
458 struct Layout
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
459 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
460 string name;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
461 int value;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
462 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
463
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
464 Layout[3] ans;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
465 ans[0].name = "one";
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
466 ans[0].value = 1;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
467 ans[1].name = "two";
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
468 ans[1].value = 0;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
469 ans[2].name = "three";
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
470 ans[2].value = 0;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
471
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
472 auto records = csvReader!(Layout,Malformed.ignore)(str);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
473
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
474 int count;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
475 foreach (record; records)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
476 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
477 assert(ans[count].name == record.name);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
478 assert(ans[count].value == record.value);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
479 count++;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
480 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
481 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
482
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
483 // Test shorter row length exception
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
484 @safe pure unittest
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
485 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
486 import std.exception;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
487
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
488 struct A
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
489 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
490 string a,b,c;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
491 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
492
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
493 auto strs = ["one,1\ntwo",
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
494 "one\ntwo,2,二\nthree,3,三",
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
495 "one\ntwo,2\nthree,3",
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
496 "one,1\ntwo\nthree,3"];
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
497
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
498 foreach (str; strs)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
499 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
500 auto records = csvReader!A(str);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
501 assertThrown!CSVException((){foreach (record; records) { }}());
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
502 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
503 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
504
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
505
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
506 // Test structure conversion interface with unicode.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
507 @safe pure unittest
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
508 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
509 import std.math : abs;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
510
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
511 wstring str = "\U00010143Hello,65,63.63\nWorld,123,3673.562"w;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
512 struct Layout
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
513 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
514 string name;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
515 int value;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
516 double other;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
517 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
518
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
519 Layout[2] ans;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
520 ans[0].name = "\U00010143Hello";
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
521 ans[0].value = 65;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
522 ans[0].other = 63.63;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
523 ans[1].name = "World";
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
524 ans[1].value = 123;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
525 ans[1].other = 3673.562;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
526
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
527 auto records = csvReader!Layout(str);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
528
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
529 int count;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
530 foreach (record; records)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
531 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
532 assert(ans[count].name == record.name);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
533 assert(ans[count].value == record.value);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
534 assert(abs(ans[count].other - record.other) < 0.00001);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
535 count++;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
536 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
537 assert(count == ans.length);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
538 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
539
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
540 // Test input conversion interface
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
541 @safe pure unittest
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
542 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
543 import std.algorithm;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
544 string str = `76,26,22`;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
545 int[] ans = [76,26,22];
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
546 auto records = csvReader!int(str);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
547
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
548 foreach (record; records)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
549 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
550 assert(equal(record, ans));
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
551 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
552 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
553
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
554 // Test struct & header interface and same unicode
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
555 @safe unittest
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
556 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
557 import std.math : abs;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
558
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
559 string str = "a,b,c\nHello,65,63.63\n➊➋➂❹,123,3673.562";
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
560 struct Layout
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
561 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
562 int value;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
563 double other;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
564 string name;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
565 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
566
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
567 auto records = csvReader!Layout(str, ["b","c","a"]);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
568
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
569 Layout[2] ans;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
570 ans[0].name = "Hello";
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
571 ans[0].value = 65;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
572 ans[0].other = 63.63;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
573 ans[1].name = "➊➋➂❹";
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
574 ans[1].value = 123;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
575 ans[1].other = 3673.562;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
576
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
577 int count;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
578 foreach (record; records)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
579 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
580 assert(ans[count].name == record.name);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
581 assert(ans[count].value == record.value);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
582 assert(abs(ans[count].other - record.other) < 0.00001);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
583 count++;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
584 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
585 assert(count == ans.length);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
586
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
587 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
588
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
589 // Test header interface
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
590 @safe unittest
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
591 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
592 import std.algorithm;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
593
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
594 string str = "a,b,c\nHello,65,63.63\nWorld,123,3673.562";
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
595 auto records = csvReader!int(str, ["b"]);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
596
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
597 auto ans = [[65],[123]];
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
598 foreach (record; records)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
599 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
600 assert(equal(record, ans.front));
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
601 ans.popFront();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
602 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
603
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
604 try
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
605 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
606 csvReader(str, ["c","b"]);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
607 assert(0);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
608 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
609 catch (HeaderMismatchException e)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
610 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
611 assert(e.col == 2);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
612 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
613 auto records2 = csvReader!(string,Malformed.ignore)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
614 (str, ["b","a"], ',', '"');
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
615
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
616 auto ans2 = [["Hello","65"],["World","123"]];
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
617 foreach (record; records2)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
618 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
619 assert(equal(record, ans2.front));
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
620 ans2.popFront();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
621 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
622
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
623 str = "a,c,e\nJoe,Carpenter,300000\nFred,Fly,4";
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
624 records2 = csvReader!(string,Malformed.ignore)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
625 (str, ["a","b","c","d"], ',', '"');
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
626
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
627 ans2 = [["Joe","Carpenter"],["Fred","Fly"]];
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
628 foreach (record; records2)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
629 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
630 assert(equal(record, ans2.front));
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
631 ans2.popFront();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
632 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
633 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
634
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
635 // Test null header interface
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
636 @safe unittest
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
637 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
638 string str = "a,b,c\nHello,65,63.63\nWorld,123,3673.562";
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
639 auto records = csvReader(str, ["a"]);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
640
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
641 assert(records.header == ["a","b","c"]);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
642 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
643
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
644 // Test unchecked read
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
645 @safe pure unittest
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
646 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
647 string str = "one \"quoted\"";
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
648 foreach (record; csvReader!(string,Malformed.ignore)(str))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
649 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
650 foreach (cell; record)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
651 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
652 assert(cell == "one \"quoted\"");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
653 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
654 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
655
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
656 str = "one \"quoted\",two \"quoted\" end";
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
657 struct Ans
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
658 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
659 string a,b;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
660 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
661 foreach (record; csvReader!(Ans,Malformed.ignore)(str))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
662 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
663 assert(record.a == "one \"quoted\"");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
664 assert(record.b == "two \"quoted\" end");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
665 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
666 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
667
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
668 // Test partial data returned
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
669 @safe pure unittest
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
670 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
671 string str = "\"one\nnew line";
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
672
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
673 try
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
674 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
675 foreach (record; csvReader(str))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
676 {}
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
677 assert(0);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
678 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
679 catch (IncompleteCellException ice)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
680 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
681 assert(ice.partialData == "one\nnew line");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
682 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
683 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
684
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
685 // Test Windows line break
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
686 @safe pure unittest
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
687 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
688 string str = "one,two\r\nthree";
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
689
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
690 auto records = csvReader(str);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
691 auto record = records.front;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
692 assert(record.front == "one");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
693 record.popFront();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
694 assert(record.front == "two");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
695 records.popFront();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
696 record = records.front;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
697 assert(record.front == "three");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
698 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
699
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
700
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
701 // Test associative array support with unicode separator
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
702 @safe unittest
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
703 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
704 string str = "1❁2❁3\n34❁65❁63\n34❁65❁63";
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
705
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
706 auto records = csvReader!(string[string])(str,["3","1"],'❁');
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
707 int count;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
708 foreach (record; records)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
709 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
710 count++;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
711 assert(record["1"] == "34");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
712 assert(record["3"] == "63");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
713 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
714 assert(count == 2);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
715 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
716
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
717 // Test restricted range
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
718 @safe unittest
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
719 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
720 import std.typecons;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
721 struct InputRange
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
722 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
723 dstring text;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
724
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
725 this(dstring txt)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
726 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
727 text = txt;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
728 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
729
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
730 @property auto empty()
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
731 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
732 return text.empty;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
733 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
734
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
735 void popFront()
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
736 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
737 text.popFront();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
738 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
739
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
740 @property dchar front()
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
741 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
742 return text[0];
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
743 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
744 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
745 auto ir = InputRange("Name,Occupation,Salary\r"d~
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
746 "Joe,Carpenter,300000\nFred,Blacksmith,400000\r\n"d);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
747
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
748 foreach (record; csvReader(ir, cast(string[]) null))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
749 foreach (cell; record) {}
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
750 foreach (record; csvReader!(Tuple!(string, string, int))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
751 (ir,cast(string[]) null)) {}
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
752 foreach (record; csvReader!(string[string])
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
753 (ir,cast(string[]) null)) {}
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
754 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
755
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
756 @safe unittest // const/immutable dchars
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
757 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
758 import std.algorithm.iteration : map;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
759 import std.array : array;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
760 const(dchar)[] c = "foo,bar\n";
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
761 assert(csvReader(c).map!array.array == [["foo", "bar"]]);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
762 immutable(dchar)[] i = "foo,bar\n";
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
763 assert(csvReader(i).map!array.array == [["foo", "bar"]]);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
764 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
765
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
766 /*
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
767 * This struct is stored on the heap for when the structures
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
768 * are passed around.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
769 */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
770 private pure struct Input(Range, Malformed ErrorLevel)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
771 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
772 Range range;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
773 size_t row, col;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
774 static if (ErrorLevel == Malformed.throwException)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
775 size_t rowLength;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
776 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
777
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
778 /*
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
779 * Range for iterating CSV records.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
780 *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
781 * This range is returned by the $(LREF csvReader) functions. It can be
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
782 * created in a similar manner to allow $(D ErrorLevel) be set to $(LREF
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
783 * Malformed).ignore if best guess processing should take place.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
784 */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
785 private struct CsvReader(Contents, Malformed ErrorLevel, Range, Separator, Header)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
786 if (isSomeChar!Separator && isInputRange!Range
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
787 && is(Unqual!(ElementType!Range) == dchar)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
788 && isForwardRange!Header && isSomeString!(ElementType!Header))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
789 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
790 private:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
791 Input!(Range, ErrorLevel)* _input;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
792 Separator _separator;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
793 Separator _quote;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
794 size_t[] indices;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
795 bool _empty;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
796 static if (is(Contents == struct) || is(Contents == class))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
797 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
798 Contents recordContent;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
799 CsvRecord!(string, ErrorLevel, Range, Separator) recordRange;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
800 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
801 else static if (is(Contents T : T[U], U : string))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
802 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
803 Contents recordContent;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
804 CsvRecord!(T, ErrorLevel, Range, Separator) recordRange;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
805 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
806 else
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
807 CsvRecord!(Contents, ErrorLevel, Range, Separator) recordRange;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
808 public:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
809 /**
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
810 * Header from the input in array form.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
811 *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
812 * -------
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
813 * string str = "a,b,c\nHello,65,63.63";
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
814 * auto records = csvReader(str, ["a"]);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
815 *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
816 * assert(records.header == ["a","b","c"]);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
817 * -------
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
818 */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
819 string[] header;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
820
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
821 /**
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
822 * Constructor to initialize the input, delimiter and quote for input
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
823 * without a header.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
824 *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
825 * -------
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
826 * string str = `76;^26^;22`;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
827 * int[] ans = [76,26,22];
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
828 * auto records = CsvReader!(int,Malformed.ignore,string,char,string[])
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
829 * (str, ';', '^');
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
830 *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
831 * foreach (record; records)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
832 * {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
833 * assert(equal(record, ans));
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
834 * }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
835 * -------
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
836 */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
837 this(Range input, Separator delimiter, Separator quote)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
838 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
839 _input = new Input!(Range, ErrorLevel)(input);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
840 _separator = delimiter;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
841 _quote = quote;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
842
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
843 prime();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
844 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
845
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
846 /**
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
847 * Constructor to initialize the input, delimiter and quote for input
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
848 * with a header.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
849 *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
850 * -------
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
851 * string str = `high;mean;low\n76;^26^;22`;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
852 * auto records = CsvReader!(int,Malformed.ignore,string,char,string[])
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
853 * (str, ["high","low"], ';', '^');
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
854 *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
855 * int[] ans = [76,22];
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
856 * foreach (record; records)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
857 * {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
858 * assert(equal(record, ans));
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
859 * }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
860 * -------
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
861 *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
862 * Throws:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
863 * $(LREF HeaderMismatchException) when a header is provided but a
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
864 * matching column is not found or the order did not match that found
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
865 * in the input (non-struct).
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
866 */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
867 this(Range input, Header colHeaders, Separator delimiter, Separator quote)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
868 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
869 _input = new Input!(Range, ErrorLevel)(input);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
870 _separator = delimiter;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
871 _quote = quote;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
872
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
873 size_t[string] colToIndex;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
874 foreach (h; colHeaders)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
875 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
876 colToIndex[h] = size_t.max;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
877 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
878
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
879 auto r = CsvRecord!(string, ErrorLevel, Range, Separator)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
880 (_input, _separator, _quote, indices);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
881
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
882 size_t colIndex;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
883 foreach (col; r)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
884 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
885 header ~= col;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
886 auto ptr = col in colToIndex;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
887 if (ptr)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
888 *ptr = colIndex;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
889 colIndex++;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
890 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
891 // The above loop empties the header row.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
892 recordRange._empty = true;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
893
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
894 indices.length = colToIndex.length;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
895 int i;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
896 foreach (h; colHeaders)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
897 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
898 immutable index = colToIndex[h];
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
899 static if (ErrorLevel != Malformed.ignore)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
900 if (index == size_t.max)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
901 throw new HeaderMismatchException
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
902 ("Header not found: " ~ to!string(h));
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
903 indices[i++] = index;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
904 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
905
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
906 static if (!is(Contents == struct) && !is(Contents == class))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
907 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
908 static if (is(Contents T : T[U], U : string))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
909 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
910 import std.algorithm.sorting : sort;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
911 sort(indices);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
912 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
913 else static if (ErrorLevel == Malformed.ignore)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
914 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
915 import std.algorithm.sorting : sort;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
916 sort(indices);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
917 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
918 else
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
919 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
920 import std.algorithm.searching : findAdjacent;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
921 import std.algorithm.sorting : isSorted;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
922 if (!isSorted(indices))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
923 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
924 auto ex = new HeaderMismatchException
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
925 ("Header in input does not match specified header.");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
926 findAdjacent!"a > b"(indices);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
927 ex.row = 1;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
928 ex.col = indices.front;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
929
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
930 throw ex;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
931 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
932 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
933 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
934
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
935 popFront();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
936 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
937
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
938 /**
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
939 * Part of an input range as defined by
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
940 * $(REF isInputRange, std,range,primitives).
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
941 *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
942 * Returns:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
943 * If $(D Contents) is a struct, will be filled with record data.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
944 *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
945 * If $(D Contents) is a class, will be filled with record data.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
946 *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
947 * If $(D Contents) is a associative array, will be filled
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
948 * with record data.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
949 *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
950 * If $(D Contents) is non-struct, a $(LREF CsvRecord) will be
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
951 * returned.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
952 */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
953 @property auto front()
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
954 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
955 assert(!empty);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
956 static if (is(Contents == struct) || is(Contents == class))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
957 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
958 return recordContent;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
959 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
960 else static if (is(Contents T : T[U], U : string))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
961 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
962 return recordContent;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
963 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
964 else
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
965 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
966 return recordRange;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
967 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
968 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
969
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
970 /**
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
971 * Part of an input range as defined by
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
972 * $(REF isInputRange, std,range,primitives).
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
973 */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
974 @property bool empty() @safe @nogc pure nothrow const
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
975 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
976 return _empty;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
977 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
978
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
979 /**
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
980 * Part of an input range as defined by
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
981 * $(REF isInputRange, std,range,primitives).
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
982 *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
983 * Throws:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
984 * $(LREF CSVException) When a quote is found in an unquoted field,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
985 * data continues after a closing quote, the quoted field was not
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
986 * closed before data was empty, a conversion failed, or when the
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
987 * row's length does not match the previous length.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
988 */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
989 void popFront()
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
990 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
991 while (!recordRange.empty)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
992 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
993 recordRange.popFront();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
994 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
995
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
996 static if (ErrorLevel == Malformed.throwException)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
997 if (_input.rowLength == 0)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
998 _input.rowLength = _input.col;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
999
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1000 _input.col = 0;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1001
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1002 if (!_input.range.empty)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1003 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1004 if (_input.range.front == '\r')
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1005 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1006 _input.range.popFront();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1007 if (!_input.range.empty && _input.range.front == '\n')
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1008 _input.range.popFront();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1009 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1010 else if (_input.range.front == '\n')
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1011 _input.range.popFront();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1012 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1013
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1014 if (_input.range.empty)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1015 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1016 _empty = true;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1017 return;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1018 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1019
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1020 prime();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1021 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1022
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1023 private void prime()
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1024 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1025 if (_empty)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1026 return;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1027 _input.row++;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1028 static if (is(Contents == struct) || is(Contents == class))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1029 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1030 recordRange = typeof(recordRange)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1031 (_input, _separator, _quote, null);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1032 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1033 else
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1034 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1035 recordRange = typeof(recordRange)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1036 (_input, _separator, _quote, indices);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1037 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1038
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1039 static if (is(Contents T : T[U], U : string))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1040 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1041 T[U] aa;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1042 try
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1043 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1044 for (; !recordRange.empty; recordRange.popFront())
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1045 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1046 aa[header[_input.col-1]] = recordRange.front;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1047 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1048 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1049 catch (ConvException e)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1050 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1051 throw new CSVException(e.msg, _input.row, _input.col, e);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1052 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1053
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1054 recordContent = aa;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1055 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1056 else static if (is(Contents == struct) || is(Contents == class))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1057 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1058 static if (is(Contents == class))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1059 recordContent = new typeof(recordContent)();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1060 else
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1061 recordContent = typeof(recordContent).init;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1062 size_t colIndex;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1063 try
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1064 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1065 for (; !recordRange.empty;)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1066 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1067 auto colData = recordRange.front;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1068 scope(exit) colIndex++;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1069 if (indices.length > 0)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1070 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1071 foreach (ti, ToType; Fields!(Contents))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1072 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1073 if (indices[ti] == colIndex)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1074 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1075 static if (!isSomeString!ToType) skipWS(colData);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1076 recordContent.tupleof[ti] = to!ToType(colData);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1077 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1078 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1079 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1080 else
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1081 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1082 foreach (ti, ToType; Fields!(Contents))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1083 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1084 if (ti == colIndex)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1085 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1086 static if (!isSomeString!ToType) skipWS(colData);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1087 recordContent.tupleof[ti] = to!ToType(colData);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1088 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1089 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1090 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1091 recordRange.popFront();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1092 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1093 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1094 catch (ConvException e)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1095 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1096 throw new CSVException(e.msg, _input.row, colIndex, e);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1097 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1098 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1099 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1100 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1101
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1102 @safe pure unittest
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1103 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1104 import std.algorithm.comparison : equal;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1105
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1106 string str = `76;^26^;22`;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1107 int[] ans = [76,26,22];
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1108 auto records = CsvReader!(int,Malformed.ignore,string,char,string[])
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1109 (str, ';', '^');
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1110
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1111 foreach (record; records)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1112 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1113 assert(equal(record, ans));
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1114 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1115 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1116
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1117 // Bugzilla 15545
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1118 // @system due to the catch for Throwable
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1119 @system pure unittest
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1120 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1121 import std.exception : assertNotThrown;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1122 enum failData =
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1123 "name, surname, age
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1124 Joe, Joker, 99\r";
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1125 auto r = csvReader(failData);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1126 assertNotThrown((){foreach (entry; r){}}());
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1127 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1128
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1129 /*
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1130 * This input range is accessible through $(LREF CsvReader) when the
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1131 * requested $(D Contents) type is neither a structure or an associative array.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1132 */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1133 private struct CsvRecord(Contents, Malformed ErrorLevel, Range, Separator)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1134 if (!is(Contents == class) && !is(Contents == struct))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1135 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1136 import std.array : appender;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1137 private:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1138 Input!(Range, ErrorLevel)* _input;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1139 Separator _separator;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1140 Separator _quote;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1141 Contents curContentsoken;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1142 typeof(appender!(dchar[])()) _front;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1143 bool _empty;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1144 size_t[] _popCount;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1145 public:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1146 /*
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1147 * Params:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1148 * input = Pointer to a character input range
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1149 * delimiter = Separator for each column
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1150 * quote = Character used for quotation
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1151 * indices = An array containing which columns will be returned.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1152 * If empty, all columns are returned. List must be in order.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1153 */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1154 this(Input!(Range, ErrorLevel)* input, Separator delimiter,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1155 Separator quote, size_t[] indices)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1156 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1157 _input = input;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1158 _separator = delimiter;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1159 _quote = quote;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1160 _front = appender!(dchar[])();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1161 _popCount = indices.dup;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1162
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1163 // If a header was given, each call to popFront will need
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1164 // to eliminate so many tokens. This calculates
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1165 // how many will be skipped to get to the next header column
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1166 size_t normalizer;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1167 foreach (ref c; _popCount)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1168 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1169 static if (ErrorLevel == Malformed.ignore)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1170 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1171 // If we are not throwing exceptions
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1172 // a header may not exist, indices are sorted
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1173 // and will be size_t.max if not found.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1174 if (c == size_t.max)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1175 break;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1176 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1177 c -= normalizer;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1178 normalizer += c + 1;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1179 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1180
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1181 prime();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1182 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1183
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1184 /**
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1185 * Part of an input range as defined by
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1186 * $(REF isInputRange, std,range,primitives).
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1187 */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1188 @property Contents front() @safe pure
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1189 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1190 assert(!empty);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1191 return curContentsoken;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1192 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1193
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1194 /**
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1195 * Part of an input range as defined by
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1196 * $(REF isInputRange, std,range,primitives).
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1197 */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1198 @property bool empty() @safe pure nothrow @nogc const
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1199 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1200 return _empty;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1201 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1202
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1203 /*
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1204 * CsvRecord is complete when input
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1205 * is empty or starts with record break
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1206 */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1207 private bool recordEnd()
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1208 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1209 if (_input.range.empty
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1210 || _input.range.front == '\n'
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1211 || _input.range.front == '\r')
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1212 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1213 return true;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1214 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1215 return false;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1216 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1217
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1218
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1219 /**
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1220 * Part of an input range as defined by
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1221 * $(REF isInputRange, std,range,primitives).
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1222 *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1223 * Throws:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1224 * $(LREF CSVException) When a quote is found in an unquoted field,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1225 * data continues after a closing quote, the quoted field was not
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1226 * closed before data was empty, a conversion failed, or when the
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1227 * row's length does not match the previous length.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1228 */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1229 void popFront()
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1230 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1231 static if (ErrorLevel == Malformed.throwException)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1232 import std.format : format;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1233 // Skip last of record when header is depleted.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1234 if (_popCount.ptr && _popCount.empty)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1235 while (!recordEnd())
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1236 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1237 prime(1);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1238 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1239
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1240 if (recordEnd())
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1241 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1242 _empty = true;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1243 static if (ErrorLevel == Malformed.throwException)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1244 if (_input.rowLength != 0)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1245 if (_input.col != _input.rowLength)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1246 throw new CSVException(
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1247 format("Row %s's length %s does not match "~
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1248 "previous length of %s.", _input.row,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1249 _input.col, _input.rowLength));
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1250 return;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1251 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1252 else
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1253 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1254 static if (ErrorLevel == Malformed.throwException)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1255 if (_input.rowLength != 0)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1256 if (_input.col > _input.rowLength)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1257 throw new CSVException(
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1258 format("Row %s's length %s does not match "~
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1259 "previous length of %s.", _input.row,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1260 _input.col, _input.rowLength));
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1261 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1262
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1263 // Separator is left on the end of input from the last call.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1264 // This cannot be moved to after the call to csvNextToken as
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1265 // there may be an empty record after it.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1266 if (_input.range.front == _separator)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1267 _input.range.popFront();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1268
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1269 _front.shrinkTo(0);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1270
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1271 prime();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1272 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1273
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1274 /*
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1275 * Handles moving to the next skipNum token.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1276 */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1277 private void prime(size_t skipNum)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1278 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1279 foreach (i; 0 .. skipNum)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1280 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1281 _input.col++;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1282 _front.shrinkTo(0);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1283 if (_input.range.front == _separator)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1284 _input.range.popFront();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1285
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1286 try
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1287 csvNextToken!(Range, ErrorLevel, Separator)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1288 (_input.range, _front, _separator, _quote,false);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1289 catch (IncompleteCellException ice)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1290 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1291 ice.row = _input.row;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1292 ice.col = _input.col;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1293 ice.partialData = _front.data.idup;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1294 throw ice;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1295 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1296 catch (ConvException e)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1297 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1298 throw new CSVException(e.msg, _input.row, _input.col, e);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1299 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1300 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1301 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1302
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1303 private void prime()
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1304 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1305 try
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1306 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1307 _input.col++;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1308 csvNextToken!(Range, ErrorLevel, Separator)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1309 (_input.range, _front, _separator, _quote,false);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1310 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1311 catch (IncompleteCellException ice)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1312 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1313 ice.row = _input.row;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1314 ice.col = _input.col;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1315 ice.partialData = _front.data.idup;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1316 throw ice;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1317 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1318
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1319 auto skipNum = _popCount.empty ? 0 : _popCount.front;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1320 if (!_popCount.empty)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1321 _popCount.popFront();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1322
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1323 if (skipNum == size_t.max)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1324 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1325 while (!recordEnd())
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1326 prime(1);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1327 _empty = true;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1328 return;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1329 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1330
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1331 if (skipNum)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1332 prime(skipNum);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1333
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1334 auto data = _front.data;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1335 static if (!isSomeString!Contents) skipWS(data);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1336 try curContentsoken = to!Contents(data);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1337 catch (ConvException e)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1338 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1339 throw new CSVException(e.msg, _input.row, _input.col, e);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1340 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1341 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1342 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1343
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1344 /**
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1345 * Lower level control over parsing CSV
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1346 *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1347 * This function consumes the input. After each call the input will
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1348 * start with either a delimiter or record break (\n, \r\n, \r) which
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1349 * must be removed for subsequent calls.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1350 *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1351 * Params:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1352 * input = Any CSV input
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1353 * ans = The first field in the input
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1354 * sep = The character to represent a comma in the specification
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1355 * quote = The character to represent a quote in the specification
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1356 * startQuoted = Whether the input should be considered to already be in
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1357 * quotes
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1358 *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1359 * Throws:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1360 * $(LREF IncompleteCellException) When a quote is found in an unquoted
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1361 * field, data continues after a closing quote, or the quoted field was
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1362 * not closed before data was empty.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1363 */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1364 void csvNextToken(Range, Malformed ErrorLevel = Malformed.throwException,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1365 Separator, Output)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1366 (ref Range input, ref Output ans,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1367 Separator sep, Separator quote,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1368 bool startQuoted = false)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1369 if (isSomeChar!Separator && isInputRange!Range
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1370 && is(Unqual!(ElementType!Range) == dchar)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1371 && isOutputRange!(Output, dchar))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1372 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1373 bool quoted = startQuoted;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1374 bool escQuote;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1375 if (input.empty)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1376 return;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1377
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1378 if (input.front == '\n')
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1379 return;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1380 if (input.front == '\r')
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1381 return;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1382
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1383 if (input.front == quote)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1384 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1385 quoted = true;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1386 input.popFront();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1387 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1388
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1389 while (!input.empty)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1390 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1391 assert(!(quoted && escQuote));
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1392 if (!quoted)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1393 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1394 // When not quoted the token ends at sep
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1395 if (input.front == sep)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1396 break;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1397 if (input.front == '\r')
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1398 break;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1399 if (input.front == '\n')
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1400 break;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1401 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1402 if (!quoted && !escQuote)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1403 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1404 if (input.front == quote)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1405 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1406 // Not quoted, but quote found
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1407 static if (ErrorLevel == Malformed.throwException)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1408 throw new IncompleteCellException(
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1409 "Quote located in unquoted token");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1410 else static if (ErrorLevel == Malformed.ignore)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1411 ans.put(quote);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1412 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1413 else
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1414 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1415 // Not quoted, non-quote character
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1416 ans.put(input.front);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1417 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1418 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1419 else
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1420 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1421 if (input.front == quote)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1422 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1423 // Quoted, quote found
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1424 // By turning off quoted and turning on escQuote
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1425 // I can tell when to add a quote to the string
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1426 // escQuote is turned to false when it escapes a
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1427 // quote or is followed by a non-quote (see outside else).
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1428 // They are mutually exclusive, but provide different
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1429 // information.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1430 if (escQuote)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1431 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1432 escQuote = false;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1433 quoted = true;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1434 ans.put(quote);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1435 } else
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1436 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1437 escQuote = true;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1438 quoted = false;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1439 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1440 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1441 else
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1442 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1443 // Quoted, non-quote character
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1444 if (escQuote)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1445 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1446 static if (ErrorLevel == Malformed.throwException)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1447 throw new IncompleteCellException(
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1448 "Content continues after end quote, " ~
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1449 "or needs to be escaped.");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1450 else static if (ErrorLevel == Malformed.ignore)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1451 break;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1452 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1453 ans.put(input.front);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1454 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1455 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1456 input.popFront();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1457 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1458
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1459 static if (ErrorLevel == Malformed.throwException)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1460 if (quoted && (input.empty || input.front == '\n' || input.front == '\r'))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1461 throw new IncompleteCellException(
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1462 "Data continues on future lines or trailing quote");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1463
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1464 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1465
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1466 ///
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1467 @safe unittest
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1468 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1469 import std.array : appender;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1470 import std.range.primitives : popFront;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1471
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1472 string str = "65,63\n123,3673";
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1473
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1474 auto a = appender!(char[])();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1475
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1476 csvNextToken(str,a,',','"');
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1477 assert(a.data == "65");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1478 assert(str == ",63\n123,3673");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1479
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1480 str.popFront();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1481 a.shrinkTo(0);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1482 csvNextToken(str,a,',','"');
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1483 assert(a.data == "63");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1484 assert(str == "\n123,3673");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1485
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1486 str.popFront();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1487 a.shrinkTo(0);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1488 csvNextToken(str,a,',','"');
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1489 assert(a.data == "123");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1490 assert(str == ",3673");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1491 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1492
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1493 // Test csvNextToken on simplest form and correct format.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1494 @safe pure unittest
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1495 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1496 import std.array;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1497
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1498 string str = "\U00010143Hello,65,63.63\nWorld,123,3673.562";
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1499
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1500 auto a = appender!(dchar[])();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1501 csvNextToken!string(str,a,',','"');
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1502 assert(a.data == "\U00010143Hello");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1503 assert(str == ",65,63.63\nWorld,123,3673.562");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1504
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1505 str.popFront();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1506 a.shrinkTo(0);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1507 csvNextToken(str,a,',','"');
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1508 assert(a.data == "65");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1509 assert(str == ",63.63\nWorld,123,3673.562");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1510
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1511 str.popFront();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1512 a.shrinkTo(0);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1513 csvNextToken(str,a,',','"');
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1514 assert(a.data == "63.63");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1515 assert(str == "\nWorld,123,3673.562");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1516
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1517 str.popFront();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1518 a.shrinkTo(0);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1519 csvNextToken(str,a,',','"');
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1520 assert(a.data == "World");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1521 assert(str == ",123,3673.562");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1522
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1523 str.popFront();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1524 a.shrinkTo(0);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1525 csvNextToken(str,a,',','"');
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1526 assert(a.data == "123");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1527 assert(str == ",3673.562");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1528
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1529 str.popFront();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1530 a.shrinkTo(0);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1531 csvNextToken(str,a,',','"');
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1532 assert(a.data == "3673.562");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1533 assert(str == "");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1534 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1535
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1536 // Test quoted tokens
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1537 @safe pure unittest
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1538 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1539 import std.array;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1540
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1541 string str = `one,two,"three ""quoted""","",` ~ "\"five\nnew line\"\nsix";
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1542
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1543 auto a = appender!(dchar[])();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1544 csvNextToken!string(str,a,',','"');
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1545 assert(a.data == "one");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1546 assert(str == `,two,"three ""quoted""","",` ~ "\"five\nnew line\"\nsix");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1547
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1548 str.popFront();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1549 a.shrinkTo(0);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1550 csvNextToken(str,a,',','"');
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1551 assert(a.data == "two");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1552 assert(str == `,"three ""quoted""","",` ~ "\"five\nnew line\"\nsix");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1553
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1554 str.popFront();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1555 a.shrinkTo(0);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1556 csvNextToken(str,a,',','"');
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1557 assert(a.data == "three \"quoted\"");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1558 assert(str == `,"",` ~ "\"five\nnew line\"\nsix");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1559
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1560 str.popFront();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1561 a.shrinkTo(0);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1562 csvNextToken(str,a,',','"');
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1563 assert(a.data == "");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1564 assert(str == ",\"five\nnew line\"\nsix");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1565
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1566 str.popFront();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1567 a.shrinkTo(0);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1568 csvNextToken(str,a,',','"');
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1569 assert(a.data == "five\nnew line");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1570 assert(str == "\nsix");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1571
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1572 str.popFront();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1573 a.shrinkTo(0);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1574 csvNextToken(str,a,',','"');
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1575 assert(a.data == "six");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1576 assert(str == "");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1577 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1578
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1579 // Test empty data is pulled at end of record.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1580 @safe pure unittest
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1581 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1582 import std.array;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1583
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1584 string str = "one,";
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1585 auto a = appender!(dchar[])();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1586 csvNextToken(str,a,',','"');
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1587 assert(a.data == "one");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1588 assert(str == ",");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1589
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1590 a.shrinkTo(0);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1591 csvNextToken(str,a,',','"');
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1592 assert(a.data == "");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1593 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1594
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1595 // Test exceptions
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1596 @safe pure unittest
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1597 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1598 import std.array;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1599
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1600 string str = "\"one\nnew line";
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1601
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1602 typeof(appender!(dchar[])()) a;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1603 try
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1604 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1605 a = appender!(dchar[])();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1606 csvNextToken(str,a,',','"');
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1607 assert(0);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1608 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1609 catch (IncompleteCellException ice)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1610 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1611 assert(a.data == "one\nnew line");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1612 assert(str == "");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1613 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1614
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1615 str = "Hello world\"";
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1616
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1617 try
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1618 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1619 a = appender!(dchar[])();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1620 csvNextToken(str,a,',','"');
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1621 assert(0);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1622 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1623 catch (IncompleteCellException ice)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1624 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1625 assert(a.data == "Hello world");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1626 assert(str == "\"");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1627 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1628
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1629 str = "one, two \"quoted\" end";
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1630
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1631 a = appender!(dchar[])();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1632 csvNextToken!(string,Malformed.ignore)(str,a,',','"');
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1633 assert(a.data == "one");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1634 str.popFront();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1635 a.shrinkTo(0);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1636 csvNextToken!(string,Malformed.ignore)(str,a,',','"');
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1637 assert(a.data == " two \"quoted\" end");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1638 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1639
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1640 // Test modifying token delimiter
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1641 @safe pure unittest
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1642 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1643 import std.array;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1644
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1645 string str = `one|two|/three "quoted"/|//`;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1646
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1647 auto a = appender!(dchar[])();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1648 csvNextToken(str,a, '|','/');
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1649 assert(a.data == "one"d);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1650 assert(str == `|two|/three "quoted"/|//`);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1651
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1652 str.popFront();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1653 a.shrinkTo(0);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1654 csvNextToken(str,a, '|','/');
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1655 assert(a.data == "two"d);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1656 assert(str == `|/three "quoted"/|//`);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1657
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1658 str.popFront();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1659 a.shrinkTo(0);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1660 csvNextToken(str,a, '|','/');
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1661 assert(a.data == `three "quoted"`);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1662 assert(str == `|//`);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1663
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1664 str.popFront();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1665 a.shrinkTo(0);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1666 csvNextToken(str,a, '|','/');
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1667 assert(a.data == ""d);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1668 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1669
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1670 // Bugzilla 8908
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1671 @safe pure unittest
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1672 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1673 string csv = ` 1.0, 2.0, 3.0
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1674 4.0, 5.0, 6.0`;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1675
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1676 static struct Data { real a, b, c; }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1677 size_t i = 0;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1678 foreach (data; csvReader!Data(csv)) with (data)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1679 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1680 int[] row = [cast(int) a, cast(int) b, cast(int) c];
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1681 if (i == 0)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1682 assert(row == [1, 2, 3]);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1683 else
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1684 assert(row == [4, 5, 6]);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1685 ++i;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1686 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1687
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1688 i = 0;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1689 foreach (data; csvReader!real(csv))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1690 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1691 auto a = data.front; data.popFront();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1692 auto b = data.front; data.popFront();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1693 auto c = data.front;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1694 int[] row = [cast(int) a, cast(int) b, cast(int) c];
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1695 if (i == 0)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1696 assert(row == [1, 2, 3]);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1697 else
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1698 assert(row == [4, 5, 6]);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1699 ++i;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1700 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1701 }