1 //Written in the D programming language
2
3 /**
4 * Implements functionality to read Comma Separated Values and its variants
5 * from an $(REF_ALTTEXT input range, isInputRange, std,range,primitives) of `dchar`.
6 *
7 * Comma Separated Values provide a simple means to transfer and store
8 * tabular data. It has been common for programs to use their own
9 * variant of the CSV format. This parser will loosely follow the
10 * $(HTTP tools.ietf.org/html/rfc4180, RFC-4180). CSV input should adhere
11 * to the following criteria (differences from RFC-4180 in parentheses):
12 *
13 * $(UL
14 * $(LI A record is separated by a new line (CRLF,LF,CR))
15 * $(LI A final record may end with a new line)
16 * $(LI A header may be provided as the first record in input)
17 * $(LI A record has fields separated by a comma (customizable))
18 * $(LI A field containing new lines, commas, or double quotes
19 * should be enclosed in double quotes (customizable))
20 * $(LI Double quotes in a field are escaped with a double quote)
21 * $(LI Each record should contain the same number of fields)
22 * )
23 *
24 * Example:
25 *
26 * -------
27 * import std.algorithm;
28 * import std.array;
29 * import std.csv;
30 * import std.stdio;
31 * import std.typecons;
32 *
33 * void main()
34 * {
35 * auto text = "Joe,Carpenter,300000\nFred,Blacksmith,400000\r\n";
36 *
37 * foreach (record; csvReader!(Tuple!(string, string, int))(text))
38 * {
39 * writefln("%s works as a %s and earns $%d per year",
40 * record[0], record[1], record[2]);
41 * }
42 *
43 * // To read the same string from the file "filename.csv":
44 *
45 * auto file = File("filename.csv", "r");
46 * foreach (record;
47 * file.byLine.joiner("\n").csvReader!(Tuple!(string, string, int)))
48 * {
49 * writefln("%s works as a %s and earns $%d per year",
50 * record[0], record[1], record[2]);
51 * }
52 }
53 * }
54 * -------
55 *
56 * When an input contains a header the `Contents` can be specified as an
57 * associative array. Passing null to signify that a header is present.
58 *
59 * -------
60 * auto text = "Name,Occupation,Salary\r" ~
61 * "Joe,Carpenter,300000\nFred,Blacksmith,400000\r\n";
62 *
63 * foreach (record; csvReader!(string[string])
64 * (text, null))
65 * {
66 * writefln("%s works as a %s and earns $%s per year.",
67 * record["Name"], record["Occupation"],
68 * record["Salary"]);
69 * }
70 *
71 * // To read the same string from the file "filename.csv":
72 *
73 * auto file = File("filename.csv", "r");
74 *
75 * foreach (record; csvReader!(string[string])
76 * (file.byLine.joiner("\n"), null))
77 * {
78 * writefln("%s works as a %s and earns $%s per year.",
79 * record["Name"], record["Occupation"],
80 * record["Salary"]);
81 * }
82 * -------
83 *
84 * This module allows content to be iterated by record stored in a struct,
85 * class, associative array, or as a range of fields. Upon detection of an
86 * error an CSVException is thrown (can be disabled). csvNextToken has been
87 * made public to allow for attempted recovery.
88 *
89 * Disabling exceptions will lift many restrictions specified above. A quote
90 * can appear in a field if the field was not quoted. If in a quoted field any
91 * quote by itself, not at the end of a field, will end processing for that
92 * field. The field is ended when there is no input, even if the quote was not
93 * closed.
94 *
95 * See_Also:
96 * $(HTTP en.wikipedia.org/wiki/Comma-separated_values, Wikipedia
97 * Comma-separated values)
98 *
99 * Copyright: Copyright 2011
100 * License: $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
101 * Authors: Jesse Phillips
102 * Source: $(PHOBOSSRC std/csv.d)
103 */
104 module std.csv;
105
106 import std.conv;
107 import std.exception : basicExceptionCtors;
108 import std.range.primitives;
109 import std.traits;
110
111 /**
112 * Exception containing the row and column for when an exception was thrown.
113 *
114 * Numbering of both row and col start at one and corresponds to the location
115 * in the file rather than any specified header. Special consideration should
116 * be made when there is failure to match the header see $(LREF
117 * HeaderMismatchException) for details.
118 *
119 * When performing type conversions, $(REF ConvException, std,conv) is stored in
120 * the `next` field.
121 */
122 class CSVException : Exception
123 {
124 ///
125 size_t row, col;
126
127 // FIXME: Use std.exception.basicExceptionCtors here once
128 // https://issues.dlang.org/show_bug.cgi?id=11500 is fixed
129
130 this(string msg, string file = __FILE__, size_t line = __LINE__,
131 Throwable next = null) @nogc @safe pure nothrow
132 {
133 super(msg, file, line, next);
134 }
135
136 this(string msg, Throwable next, string file = __FILE__,
137 size_t line = __LINE__) @nogc @safe pure nothrow
138 {
139 super(msg, file, line, next);
140 }
141
142 this(string msg, size_t row, size_t col, Throwable next = null,
143 string file = __FILE__, size_t line = __LINE__) @nogc @safe pure nothrow
144 {
145 super(msg, next, file, line);
146 this.row = row;
147 this.col = col;
148 }
149
toString()150 override string toString() @safe pure const
151 {
152 return "(Row: " ~ to!string(row) ~
153 ", Col: " ~ to!string(col) ~ ") " ~ msg;
154 }
155 }
156
157 ///
158 @safe unittest
159 {
160 import std.exception : collectException;
161 import std.algorithm.searching : count;
162 string text = "a,b,c\nHello,65";
163 auto ex = collectException!CSVException(csvReader(text).count);
164 assert(ex.toString == "(Row: 0, Col: 0) Row 2's length 2 does not match previous length of 3.");
165 }
166
167 ///
168 @safe unittest
169 {
170 import std.exception : collectException;
171 import std.algorithm.searching : count;
172 import std.typecons : Tuple;
173 string text = "a,b\nHello,65";
174 auto ex = collectException!CSVException(csvReader!(Tuple!(string,int))(text).count);
175 assert(ex.toString == "(Row: 1, Col: 2) Unexpected 'b' when converting from type string to type int");
176 }
177
178 @safe pure unittest
179 {
180 import std.string;
181 auto e1 = new Exception("Foobar");
182 auto e2 = new CSVException("args", e1);
183 assert(e2.next is e1);
184
185 size_t r = 13;
186 size_t c = 37;
187
188 auto e3 = new CSVException("argv", r, c);
189 assert(e3.row == r);
190 assert(e3.col == c);
191
192 auto em = e3.toString();
193 assert(em.indexOf("13") != -1);
194 assert(em.indexOf("37") != -1);
195 }
196
197 /**
198 * Exception thrown when a Token is identified to not be completed: a quote is
199 * found in an unquoted field, data continues after a closing quote, or the
200 * quoted field was not closed before data was empty.
201 */
202 class IncompleteCellException : CSVException
203 {
204 /**
205 * Data pulled from input before finding a problem
206 *
207 * This field is populated when using $(LREF csvReader)
208 * but not by $(LREF csvNextToken) as this data will have
209 * already been fed to the output range.
210 */
211 dstring partialData;
212
213 mixin basicExceptionCtors;
214 }
215
216 ///
217 @safe unittest
218 {
219 import std.exception : assertThrown;
220 string text = "a,\"b,c\nHello,65,2.5";
221 assertThrown!IncompleteCellException(text.csvReader(["a","b","c"]));
222 }
223
224 @safe pure unittest
225 {
226 auto e1 = new Exception("Foobar");
227 auto e2 = new IncompleteCellException("args", e1);
228 assert(e2.next is e1);
229 }
230
231 /**
232 * Exception thrown under different conditions based on the type of $(D
233 * Contents).
234 *
235 * Structure, Class, and Associative Array
236 * $(UL
237 * $(LI When a header is provided but a matching column is not found)
238 * )
239 *
240 * Other
241 * $(UL
242 * $(LI When a header is provided but a matching column is not found)
243 * $(LI Order did not match that found in the input)
244 * )
245 *
246 * Since a row and column is not meaningful when a column specified by the
247 * header is not found in the data, both row and col will be zero. Otherwise
248 * row is always one and col is the first instance found in header that
249 * occurred before the previous starting at one.
250 */
251 class HeaderMismatchException : CSVException
252 {
253 mixin basicExceptionCtors;
254 }
255
256 ///
257 @safe unittest
258 {
259 import std.exception : assertThrown;
260 string text = "a,b,c\nHello,65,2.5";
261 assertThrown!HeaderMismatchException(text.csvReader(["b","c","invalid"]));
262 }
263
264 @safe pure unittest
265 {
266 auto e1 = new Exception("Foobar");
267 auto e2 = new HeaderMismatchException("args", e1);
268 assert(e2.next is e1);
269 }
270
271 /**
272 * Determines the behavior for when an error is detected.
273 *
274 * Disabling exception will follow these rules:
275 * $(UL
276 * $(LI A quote can appear in a field if the field was not quoted.)
277 * $(LI If in a quoted field any quote by itself, not at the end of a
278 * field, will end processing for that field.)
279 * $(LI The field is ended when there is no input, even if the quote was
280 * not closed.)
281 * $(LI If the given header does not match the order in the input, the
282 * content will return as it is found in the input.)
283 * $(LI If the given header contains columns not found in the input they
284 * will be ignored.)
285 * )
286 */
287 enum Malformed
288 {
289 ignore, /// No exceptions are thrown due to incorrect CSV.
290 throwException /// Use exceptions when input has incorrect CSV.
291 }
292
293 ///
294 @safe unittest
295 {
296 import std.algorithm.comparison : equal;
297 import std.algorithm.searching : count;
298 import std.exception : assertThrown;
299
300 string text = "a,b,c\nHello,65,\"2.5";
301 assertThrown!IncompleteCellException(text.csvReader.count);
302
303 // ignore the exceptions and try to handle invalid CSV
304 auto firstLine = text.csvReader!(string, Malformed.ignore)(null).front;
305 assert(firstLine.equal(["Hello", "65", "2.5"]));
306 }
307
308 /**
309 Returns an $(REF_ALTTEXT input range, isInputRange, std,range,primitives)
310 for iterating over records found in `input`.
311
312 An optional `header` can be provided. The first record will be read in
313 as the header. If `Contents` is a struct then the header provided is
314 expected to correspond to the fields in the struct. When `Contents` is
315 not a type which can contain the entire record, the `header` must be
316 provided in the same order as the input or an exception is thrown.
317
318 Returns:
319 An input range R as defined by
320 $(REF isInputRange, std,range,primitives). When `Contents` is a
321 struct, class, or an associative array, the element type of R is
322 `Contents`, otherwise the element type of R is itself a range with
323 element type `Contents`.
324
325 If a `header` argument is provided,
326 the returned range provides a `header` field for accessing the header
327 from the input in array form.
328
329 Throws:
330 $(LREF CSVException) When a quote is found in an unquoted field,
331 data continues after a closing quote, the quoted field was not
332 closed before data was empty, a conversion failed, or when the row's
333 length does not match the previous length.
334
335 $(LREF HeaderMismatchException) when a header is provided but a
336 matching column is not found or the order did not match that found in
337 the input. Read the exception documentation for specific details of
338 when the exception is thrown for different types of `Contents`.
339 */
340 auto csvReader(Contents = string,Malformed ErrorLevel = Malformed.throwException, Range, Separator = char)(Range input,
341 Separator delimiter = ',', Separator quote = '"',
342 bool allowInconsistentDelimiterCount = false)
343 if (isInputRange!Range && is(immutable ElementType!Range == immutable dchar)
344 && isSomeChar!(Separator)
345 && !is(Contents T : T[U], U : string))
346 {
347 return CsvReader!(Contents,ErrorLevel,Range,
348 Unqual!(ElementType!Range),string[])
349 (input, delimiter, quote, allowInconsistentDelimiterCount);
350 }
351
352 /// ditto
353 auto csvReader(Contents = string,
354 Malformed ErrorLevel = Malformed.throwException,
355 Range, Header, Separator = char)
356 (Range input, Header header,
357 Separator delimiter = ',', Separator quote = '"',
358 bool allowInconsistentDelimiterCount = false)
359 if (isInputRange!Range && is(immutable ElementType!Range == immutable dchar)
360 && isSomeChar!(Separator)
361 && isForwardRange!Header
362 && isSomeString!(ElementType!Header))
363 {
364 return CsvReader!(Contents,ErrorLevel,Range,
365 Unqual!(ElementType!Range),Header)
366 (input, header, delimiter, quote, allowInconsistentDelimiterCount);
367 }
368
369 /// ditto
370 auto csvReader(Contents = string,
371 Malformed ErrorLevel = Malformed.throwException,
372 Range, Header, Separator = char)
373 (Range input, Header header,
374 Separator delimiter = ',', Separator quote = '"',
375 bool allowInconsistentDelimiterCount = false)
376 if (isInputRange!Range && is(immutable ElementType!Range == immutable dchar)
377 && isSomeChar!(Separator)
378 && is(Header : typeof(null)))
379 {
380 return CsvReader!(Contents,ErrorLevel,Range,
381 Unqual!(ElementType!Range),string[])
382 (input, cast(string[]) null, delimiter, quote,
383 allowInconsistentDelimiterCount);
384 }
385
386
387 /**
388 The `Contents` of the input can be provided if all the records are the
389 same type such as all integer data:
390 */
391 @safe unittest
392 {
393 import std.algorithm.comparison : equal;
394 string text = "76,26,22";
395 auto records = text.csvReader!int;
396 assert(records.equal!equal([
397 [76, 26, 22],
398 ]));
399 }
400
401 /**
402 Using a struct with modified delimiter:
403 */
404 @safe unittest
405 {
406 import std.algorithm.comparison : equal;
407 string text = "Hello;65;2.5\nWorld;123;7.5";
408 struct Layout
409 {
410 string name;
411 int value;
412 double other;
413 }
414
415 auto records = text.csvReader!Layout(';');
416 assert(records.equal([
417 Layout("Hello", 65, 2.5),
418 Layout("World", 123, 7.5),
419 ]));
420 }
421
422 /**
423 Specifying `ErrorLevel` as $(LREF Malformed.ignore) will lift restrictions
424 on the format. This example shows that an exception is not thrown when
425 finding a quote in a field not quoted.
426 */
427 @safe unittest
428 {
429 string text = "A \" is now part of the data";
430 auto records = text.csvReader!(string, Malformed.ignore);
431 auto record = records.front;
432
433 assert(record.front == text);
434 }
435
436 /// Read only column "b"
437 @safe unittest
438 {
439 import std.algorithm.comparison : equal;
440 string text = "a,b,c\nHello,65,63.63\nWorld,123,3673.562";
441 auto records = text.csvReader!int(["b"]);
442
443 assert(records.equal!equal([
444 [65],
445 [123],
446 ]));
447 }
448
449 /// Read while rearranging the columns by specifying a header with a different order"
450 @safe unittest
451 {
452 import std.algorithm.comparison : equal;
453 string text = "a,b,c\nHello,65,2.5\nWorld,123,7.5";
454 struct Layout
455 {
456 int value;
457 double other;
458 string name;
459 }
460
461 auto records = text.csvReader!Layout(["b","c","a"]);
462 assert(records.equal([
463 Layout(65, 2.5, "Hello"),
464 Layout(123, 7.5, "World")
465 ]));
466 }
467
468 /**
469 The header can also be left empty if the input contains a header row
470 and all columns should be iterated.
471 The header from the input can always be accessed from the `header` field.
472 */
473 @safe unittest
474 {
475 string text = "a,b,c\nHello,65,63.63";
476 auto records = text.csvReader(null);
477
478 assert(records.header == ["a","b","c"]);
479 }
480
481 /**
482 Handcrafted csv files tend to have an variable amount of columns.
483
484 By default `std.csv` will throw if the number of columns on a line
485 is unequal to the number of columns of the first line.
486 To allow, or disallow, a variable amount of columns a `bool` can be passed to
487 all overloads of the `csvReader` function as shown below.
488 */
489 @safe unittest
490 {
491 import std.algorithm.comparison : equal;
492
493 string text = "76,26,22\n1,2\n3,4,5,6";
494 auto records = text.csvReader!int(',', '"', true);
495
496 assert(records.equal!equal([
497 [76, 26, 22],
498 [1, 2],
499 [3, 4, 5, 6]
500 ]));
501 }
502
503 /// ditto
504 @safe unittest
505 {
506 import std.algorithm.comparison : equal;
507
508 static struct Three
509 {
510 int a;
511 int b;
512 int c;
513 }
514
515 string text = "76,26,22\n1,2\n3,4,5,6";
516 auto records = text.csvReader!Three(',', '"', true);
517
518 assert(records.equal([
519 Three(76, 26, 22),
520 Three(1, 2, 0),
521 Three(3, 4, 5)
522 ]));
523 }
524
525 /// ditto
526 @safe unittest
527 {
528 import std.algorithm.comparison : equal;
529
530 auto text = "Name,Occupation,Salary\r" ~
531 "Joe,Carpenter,300000\nFred,Blacksmith\r\n";
532
533 auto r = csvReader!(string[string])(text, null, ',', '"', true);
534
535 assert(r.equal([
536 [ "Name" : "Joe", "Occupation" : "Carpenter", "Salary" : "300000" ],
537 [ "Name" : "Fred", "Occupation" : "Blacksmith" ]
538 ]));
539 }
540
541 // Test standard iteration over input.
542 @safe pure unittest
543 {
544 string str = `one,"two ""quoted"""` ~ "\n\"three\nnew line\",\nfive,six";
545 auto records = csvReader(str);
546
547 int count;
foreach(record;records)548 foreach (record; records)
549 {
550 foreach (cell; record)
551 {
552 count++;
553 }
554 }
555 assert(count == 6);
556 }
557
558 // Test newline on last record
559 @safe pure unittest
560 {
561 string str = "one,two\nthree,four\n";
562 auto records = csvReader(str);
563 records.popFront();
564 records.popFront();
565 assert(records.empty);
566 }
567
568 // Test shorter row length
569 @safe pure unittest
570 {
571 wstring str = "one,1\ntwo\nthree"w;
572 struct Layout
573 {
574 string name;
575 int value;
576 }
577
578 Layout[3] ans;
579 ans[0].name = "one";
580 ans[0].value = 1;
581 ans[1].name = "two";
582 ans[1].value = 0;
583 ans[2].name = "three";
584 ans[2].value = 0;
585
586 auto records = csvReader!(Layout,Malformed.ignore)(str);
587
588 int count;
foreach(record;records)589 foreach (record; records)
590 {
591 assert(ans[count].name == record.name);
592 assert(ans[count].value == record.value);
593 count++;
594 }
595 }
596
597 // Test shorter row length exception
598 @safe pure unittest
599 {
600 import std.exception;
601
602 struct A
603 {
604 string a,b,c;
605 }
606
607 auto strs = ["one,1\ntwo",
608 "one\ntwo,2,二\nthree,3,三",
609 "one\ntwo,2\nthree,3",
610 "one,1\ntwo\nthree,3"];
611
foreach(str;strs)612 foreach (str; strs)
613 {
614 auto records = csvReader!A(str);
615 assertThrown!CSVException((){foreach (record; records) { }}());
616 }
617 }
618
619
620 // Test structure conversion interface with unicode.
621 @safe pure unittest
622 {
623 import std.math.algebraic : abs;
624
625 wstring str = "\U00010143Hello,65,63.63\nWorld,123,3673.562"w;
626 struct Layout
627 {
628 string name;
629 int value;
630 double other;
631 }
632
633 Layout[2] ans;
634 ans[0].name = "\U00010143Hello";
635 ans[0].value = 65;
636 ans[0].other = 63.63;
637 ans[1].name = "World";
638 ans[1].value = 123;
639 ans[1].other = 3673.562;
640
641 auto records = csvReader!Layout(str);
642
643 int count;
foreach(record;records)644 foreach (record; records)
645 {
646 assert(ans[count].name == record.name);
647 assert(ans[count].value == record.value);
648 assert(abs(ans[count].other - record.other) < 0.00001);
649 count++;
650 }
651 assert(count == ans.length);
652 }
653
654 // Test input conversion interface
655 @safe pure unittest
656 {
657 import std.algorithm;
658 string str = `76,26,22`;
659 int[] ans = [76,26,22];
660 auto records = csvReader!int(str);
661
foreach(record;records)662 foreach (record; records)
663 {
664 assert(equal(record, ans));
665 }
666 }
667
668 // Test struct & header interface and same unicode
669 @safe unittest
670 {
671 import std.math.algebraic : abs;
672
673 string str = "a,b,c\nHello,65,63.63\n➊➋➂❹,123,3673.562";
674 struct Layout
675 {
676 int value;
677 double other;
678 string name;
679 }
680
681 auto records = csvReader!Layout(str, ["b","c","a"]);
682
683 Layout[2] ans;
684 ans[0].name = "Hello";
685 ans[0].value = 65;
686 ans[0].other = 63.63;
687 ans[1].name = "➊➋➂❹";
688 ans[1].value = 123;
689 ans[1].other = 3673.562;
690
691 int count;
foreach(record;records)692 foreach (record; records)
693 {
694 assert(ans[count].name == record.name);
695 assert(ans[count].value == record.value);
696 assert(abs(ans[count].other - record.other) < 0.00001);
697 count++;
698 }
699 assert(count == ans.length);
700
701 }
702
703 // Test header interface
704 @safe unittest
705 {
706 import std.algorithm;
707
708 string str = "a,b,c\nHello,65,63.63\nWorld,123,3673.562";
709 auto records = csvReader!int(str, ["b"]);
710
711 auto ans = [[65],[123]];
foreach(record;records)712 foreach (record; records)
713 {
714 assert(equal(record, ans.front));
715 ans.popFront();
716 }
717
718 try
719 {
720 csvReader(str, ["c","b"]);
721 assert(0);
722 }
catch(HeaderMismatchException e)723 catch (HeaderMismatchException e)
724 {
725 assert(e.col == 2);
726 }
727 auto records2 = csvReader!(string,Malformed.ignore)
728 (str, ["b","a"], ',', '"');
729
730 auto ans2 = [["Hello","65"],["World","123"]];
foreach(record;records2)731 foreach (record; records2)
732 {
733 assert(equal(record, ans2.front));
734 ans2.popFront();
735 }
736
737 str = "a,c,e\nJoe,Carpenter,300000\nFred,Fly,4";
738 records2 = csvReader!(string,Malformed.ignore)
739 (str, ["a","b","c","d"], ',', '"');
740
741 ans2 = [["Joe","Carpenter"],["Fred","Fly"]];
foreach(record;records2)742 foreach (record; records2)
743 {
744 assert(equal(record, ans2.front));
745 ans2.popFront();
746 }
747 }
748
749 // Test null header interface
750 @safe unittest
751 {
752 string str = "a,b,c\nHello,65,63.63\nWorld,123,3673.562";
753 auto records = csvReader(str, ["a"]);
754
755 assert(records.header == ["a","b","c"]);
756 }
757
758 // Test unchecked read
759 @safe pure unittest
760 {
761 string str = "one \"quoted\"";
762 foreach (record; csvReader!(string,Malformed.ignore)(str))
763 {
foreach(cell;record)764 foreach (cell; record)
765 {
766 assert(cell == "one \"quoted\"");
767 }
768 }
769
770 str = "one \"quoted\",two \"quoted\" end";
771 struct Ans
772 {
773 string a,b;
774 }
775 foreach (record; csvReader!(Ans,Malformed.ignore)(str))
776 {
777 assert(record.a == "one \"quoted\"");
778 assert(record.b == "two \"quoted\" end");
779 }
780 }
781
782 // Test partial data returned
783 @safe pure unittest
784 {
785 string str = "\"one\nnew line";
786
787 try
788 {
foreach(record;csvReader (str))789 foreach (record; csvReader(str))
790 {}
791 assert(0);
792 }
catch(IncompleteCellException ice)793 catch (IncompleteCellException ice)
794 {
795 assert(ice.partialData == "one\nnew line");
796 }
797 }
798
799 // Test Windows line break
800 @safe pure unittest
801 {
802 string str = "one,two\r\nthree";
803
804 auto records = csvReader(str);
805 auto record = records.front;
806 assert(record.front == "one");
807 record.popFront();
808 assert(record.front == "two");
809 records.popFront();
810 record = records.front;
811 assert(record.front == "three");
812 }
813
814
815 // Test associative array support with unicode separator
816 @safe unittest
817 {
818 string str = "1❁2❁3\n34❁65❁63\n34❁65❁63";
819
820 auto records = csvReader!(string[string])(str,["3","1"],'❁');
821 int count;
foreach(record;records)822 foreach (record; records)
823 {
824 count++;
825 assert(record["1"] == "34");
826 assert(record["3"] == "63");
827 }
828 assert(count == 2);
829 }
830
831 // Test restricted range
832 @safe unittest
833 {
834 import std.typecons;
835 struct InputRange
836 {
837 dstring text;
838
thisInputRange839 this(dstring txt)
840 {
841 text = txt;
842 }
843
emptyInputRange844 @property auto empty()
845 {
846 return text.empty;
847 }
848
popFrontInputRange849 void popFront()
850 {
851 text.popFront();
852 }
853
frontInputRange854 @property dchar front()
855 {
856 return text[0];
857 }
858 }
859 auto ir = InputRange("Name,Occupation,Salary\r"d~
860 "Joe,Carpenter,300000\nFred,Blacksmith,400000\r\n"d);
861
862 foreach (record; csvReader(ir, cast(string[]) null))
foreach(cell;record)863 foreach (cell; record) {}
864 foreach (record; csvReader!(Tuple!(string, string, int))
865 (ir,cast(string[]) null)) {}
866 foreach (record; csvReader!(string[string])
867 (ir,cast(string[]) null)) {}
868 }
869
870 @safe unittest // const/immutable dchars
871 {
872 import std.algorithm.iteration : map;
873 import std.array : array;
874 const(dchar)[] c = "foo,bar\n";
875 assert(csvReader(c).map!array.array == [["foo", "bar"]]);
876 immutable(dchar)[] i = "foo,bar\n";
877 assert(csvReader(i).map!array.array == [["foo", "bar"]]);
878 }
879
880 /*
881 * This struct is stored on the heap for when the structures
882 * are passed around.
883 */
Input(Range,Malformed ErrorLevel)884 private pure struct Input(Range, Malformed ErrorLevel)
885 {
886 Range range;
887 size_t row, col;
888 static if (ErrorLevel == Malformed.throwException)
889 size_t rowLength;
890 }
891
892 /*
893 * Range for iterating CSV records.
894 *
895 * This range is returned by the $(LREF csvReader) functions. It can be
896 * created in a similar manner to allow `ErrorLevel` be set to $(LREF
897 * Malformed).ignore if best guess processing should take place.
898 */
899 private struct CsvReader(Contents, Malformed ErrorLevel, Range, Separator, Header)
900 if (isSomeChar!Separator && isInputRange!Range
901 && is(immutable ElementType!Range == immutable dchar)
902 && isForwardRange!Header && isSomeString!(ElementType!Header))
903 {
904 private:
905 Input!(Range, ErrorLevel)* _input;
906 Separator _separator;
907 Separator _quote;
908 size_t[] indices;
909 bool _empty;
910 bool _allowInconsistentDelimiterCount;
911 static if (is(Contents == struct) || is(Contents == class))
912 {
913 Contents recordContent;
914 CsvRecord!(string, ErrorLevel, Range, Separator) recordRange;
915 }
916 else static if (is(Contents T : T[U], U : string))
917 {
918 Contents recordContent;
919 CsvRecord!(T, ErrorLevel, Range, Separator) recordRange;
920 }
921 else
922 CsvRecord!(Contents, ErrorLevel, Range, Separator) recordRange;
923 public:
924 /**
925 * Header from the input in array form.
926 *
927 * -------
928 * string str = "a,b,c\nHello,65,63.63";
929 * auto records = csvReader(str, ["a"]);
930 *
931 * assert(records.header == ["a","b","c"]);
932 * -------
933 */
934 string[] header;
935
936 /**
937 * Constructor to initialize the input, delimiter and quote for input
938 * without a header.
939 *
940 * -------
941 * string str = `76;^26^;22`;
942 * int[] ans = [76,26,22];
943 * auto records = CsvReader!(int,Malformed.ignore,string,char,string[])
944 * (str, ';', '^');
945 *
946 * foreach (record; records)
947 * {
948 * assert(equal(record, ans));
949 * }
950 * -------
951 */
this(Range input,Separator delimiter,Separator quote,bool allowInconsistentDelimiterCount)952 this(Range input, Separator delimiter, Separator quote,
953 bool allowInconsistentDelimiterCount)
954 {
955 _input = new Input!(Range, ErrorLevel)(input);
956 _separator = delimiter;
957 _quote = quote;
958 _allowInconsistentDelimiterCount = allowInconsistentDelimiterCount;
959
960 if (_input.range.empty)
961 {
962 _empty = true;
963 return;
964 }
965
966 prime();
967 }
968
969 /**
970 * Constructor to initialize the input, delimiter and quote for input
971 * with a header.
972 *
973 * -------
974 * string str = `high;mean;low\n76;^26^;22`;
975 * auto records = CsvReader!(int,Malformed.ignore,string,char,string[])
976 * (str, ["high","low"], ';', '^');
977 *
978 * int[] ans = [76,22];
979 * foreach (record; records)
980 * {
981 * assert(equal(record, ans));
982 * }
983 * -------
984 *
985 * Throws:
986 * $(LREF HeaderMismatchException) when a header is provided but a
987 * matching column is not found or the order did not match that found
988 * in the input (non-struct).
989 */
this(Range input,Header colHeaders,Separator delimiter,Separator quote,bool allowInconsistentDelimiterCount)990 this(Range input, Header colHeaders, Separator delimiter, Separator quote,
991 bool allowInconsistentDelimiterCount)
992 {
993 _input = new Input!(Range, ErrorLevel)(input);
994 _separator = delimiter;
995 _quote = quote;
996 _allowInconsistentDelimiterCount = allowInconsistentDelimiterCount;
997
998 if (_input.range.empty)
999 {
1000 _empty = true;
1001 return;
1002 }
1003
1004 size_t[string] colToIndex;
1005 foreach (h; colHeaders)
1006 {
1007 colToIndex[h] = size_t.max;
1008 }
1009
1010 auto r = CsvRecord!(string, ErrorLevel, Range, Separator)
1011 (_input, _separator, _quote, indices,
1012 _allowInconsistentDelimiterCount);
1013
1014 size_t colIndex;
1015 foreach (col; r)
1016 {
1017 header ~= col;
1018 auto ptr = col in colToIndex;
1019 if (ptr)
1020 *ptr = colIndex;
1021 colIndex++;
1022 }
1023 // The above loop empties the header row.
1024 recordRange._empty = true;
1025 recordRange._allowInconsistentDelimiterCount =
1026 allowInconsistentDelimiterCount;
1027
1028 indices.length = colToIndex.length;
1029 int i;
1030 foreach (h; colHeaders)
1031 {
1032 immutable index = colToIndex[h];
1033 static if (ErrorLevel != Malformed.ignore)
1034 if (index == size_t.max)
1035 throw new HeaderMismatchException
1036 ("Header not found: " ~ to!string(h));
1037 indices[i++] = index;
1038 }
1039
1040 static if (!is(Contents == struct) && !is(Contents == class))
1041 {
1042 static if (is(Contents T : T[U], U : string))
1043 {
1044 import std.algorithm.sorting : sort;
1045 sort(indices);
1046 }
1047 else static if (ErrorLevel == Malformed.ignore)
1048 {
1049 import std.algorithm.sorting : sort;
1050 sort(indices);
1051 }
1052 else
1053 {
1054 import std.algorithm.searching : findAdjacent;
1055 import std.algorithm.sorting : isSorted;
1056 if (!isSorted(indices))
1057 {
1058 auto ex = new HeaderMismatchException
1059 ("Header in input does not match specified header.");
1060 findAdjacent!"a > b"(indices);
1061 ex.row = 1;
1062 ex.col = indices.front;
1063
1064 throw ex;
1065 }
1066 }
1067 }
1068
1069 popFront();
1070 }
1071
1072 /**
1073 * Part of an input range as defined by
1074 * $(REF isInputRange, std,range,primitives).
1075 *
1076 * Returns:
1077 * If `Contents` is a struct, will be filled with record data.
1078 *
1079 * If `Contents` is a class, will be filled with record data.
1080 *
1081 * If `Contents` is a associative array, will be filled
1082 * with record data.
1083 *
1084 * If `Contents` is non-struct, a $(LREF CsvRecord) will be
1085 * returned.
1086 */
front()1087 @property auto front()
1088 {
1089 assert(!empty, "Attempting to fetch the front of an empty CsvReader");
1090 static if (is(Contents == struct) || is(Contents == class))
1091 {
1092 return recordContent;
1093 }
1094 else static if (is(Contents T : T[U], U : string))
1095 {
1096 return recordContent;
1097 }
1098 else
1099 {
1100 return recordRange;
1101 }
1102 }
1103
1104 /**
1105 * Part of an input range as defined by
1106 * $(REF isInputRange, std,range,primitives).
1107 */
empty()1108 @property bool empty() @safe @nogc pure nothrow const
1109 {
1110 return _empty;
1111 }
1112
1113 /**
1114 * Part of an input range as defined by
1115 * $(REF isInputRange, std,range,primitives).
1116 *
1117 * Throws:
1118 * $(LREF CSVException) When a quote is found in an unquoted field,
1119 * data continues after a closing quote, the quoted field was not
1120 * closed before data was empty, a conversion failed, or when the
1121 * row's length does not match the previous length.
1122 */
popFront()1123 void popFront()
1124 {
1125 while (!recordRange.empty)
1126 {
1127 recordRange.popFront();
1128 }
1129
1130 static if (ErrorLevel == Malformed.throwException)
1131 if (_input.rowLength == 0)
1132 _input.rowLength = _input.col;
1133
1134 _input.col = 0;
1135
1136 if (!_input.range.empty)
1137 {
1138 if (_input.range.front == '\r')
1139 {
1140 _input.range.popFront();
1141 if (!_input.range.empty && _input.range.front == '\n')
1142 _input.range.popFront();
1143 }
1144 else if (_input.range.front == '\n')
1145 _input.range.popFront();
1146 }
1147
1148 if (_input.range.empty)
1149 {
1150 _empty = true;
1151 return;
1152 }
1153
1154 prime();
1155 }
1156
prime()1157 private void prime()
1158 {
1159 if (_empty)
1160 return;
1161 _input.row++;
1162 static if (is(Contents == struct) || is(Contents == class))
1163 {
1164 recordRange = typeof(recordRange)
1165 (_input, _separator, _quote, null,
1166 _allowInconsistentDelimiterCount);
1167 }
1168 else
1169 {
1170 recordRange = typeof(recordRange)
1171 (_input, _separator, _quote, indices,
1172 _allowInconsistentDelimiterCount);
1173 }
1174
1175 static if (is(Contents T : T[U], U : string))
1176 {
1177 T[U] aa;
1178 try
1179 {
1180 for (; !recordRange.empty; recordRange.popFront())
1181 {
1182 aa[header[_input.col-1]] = recordRange.front;
1183 }
1184 }
1185 catch (ConvException e)
1186 {
1187 throw new CSVException(e.msg, _input.row, _input.col, e);
1188 }
1189
1190 recordContent = aa;
1191 }
1192 else static if (is(Contents == struct) || is(Contents == class))
1193 {
1194 static if (is(Contents == class))
1195 recordContent = new typeof(recordContent)();
1196 else
1197 recordContent = typeof(recordContent).init;
1198 size_t colIndex;
1199 try
1200 {
1201 for (; !recordRange.empty;)
1202 {
1203 auto colData = recordRange.front;
1204 scope(exit) colIndex++;
1205 if (indices.length > 0)
1206 {
1207 foreach (ti, ToType; Fields!(Contents))
1208 {
1209 if (indices[ti] == colIndex)
1210 {
1211 static if (!isSomeString!ToType) skipWS(colData);
1212 recordContent.tupleof[ti] = to!ToType(colData);
1213 }
1214 }
1215 }
1216 else
1217 {
1218 foreach (ti, ToType; Fields!(Contents))
1219 {
1220 if (ti == colIndex)
1221 {
1222 static if (!isSomeString!ToType) skipWS(colData);
1223 recordContent.tupleof[ti] = to!ToType(colData);
1224 }
1225 }
1226 }
1227 recordRange.popFront();
1228 }
1229 }
1230 catch (ConvException e)
1231 {
1232 throw new CSVException(e.msg, _input.row, colIndex, e);
1233 }
1234 }
1235 }
1236 }
1237
1238 @safe pure unittest
1239 {
1240 import std.algorithm.comparison : equal;
1241
1242 string str = `76;^26^;22`;
1243 int[] ans = [76,26,22];
1244 auto records = CsvReader!(int,Malformed.ignore,string,char,string[])
1245 (str, ';', '^', false);
1246
foreach(record;records)1247 foreach (record; records)
1248 {
1249 assert(equal(record, ans));
1250 }
1251 }
1252
1253 // https://issues.dlang.org/show_bug.cgi?id=15545
1254 // @system due to the catch for Throwable
1255 @system pure unittest
1256 {
1257 import std.exception : assertNotThrown;
1258 enum failData =
1259 "name, surname, age
1260 Joe, Joker, 99\r";
1261 auto r = csvReader(failData);
1262 assertNotThrown((){foreach (entry; r){}}());
1263 }
1264
1265 /*
1266 * This input range is accessible through $(LREF CsvReader) when the
1267 * requested `Contents` type is neither a structure or an associative array.
1268 */
1269 private struct CsvRecord(Contents, Malformed ErrorLevel, Range, Separator)
1270 if (!is(Contents == class) && !is(Contents == struct))
1271 {
1272 import std.array : appender;
1273 private:
1274 Input!(Range, ErrorLevel)* _input;
1275 Separator _separator;
1276 Separator _quote;
1277 Contents curContentsoken;
1278 typeof(appender!(dchar[])()) _front;
1279 bool _empty;
1280 bool _allowInconsistentDelimiterCount;
1281 size_t[] _popCount;
1282 public:
1283 /*
1284 * Params:
1285 * input = Pointer to a character $(REF_ALTTEXT input range, isInputRange, std,range,primitives)
1286 * delimiter = Separator for each column
1287 * quote = Character used for quotation
1288 * indices = An array containing which columns will be returned.
1289 * If empty, all columns are returned. List must be in order.
1290 */
1291 this(Input!(Range, ErrorLevel)* input, Separator delimiter,
1292 Separator quote, size_t[] indices,
1293 bool allowInconsistentDelimiterCount)
1294 {
1295 _input = input;
1296 _separator = delimiter;
1297 _quote = quote;
1298
1299 _front = appender!(dchar[])();
1300 _popCount = indices.dup;
1301 _allowInconsistentDelimiterCount = allowInconsistentDelimiterCount;
1302
1303 // If a header was given, each call to popFront will need
1304 // to eliminate so many tokens. This calculates
1305 // how many will be skipped to get to the next header column
1306 size_t normalizer;
foreach(ref c;_popCount)1307 foreach (ref c; _popCount)
1308 {
1309 static if (ErrorLevel == Malformed.ignore)
1310 {
1311 // If we are not throwing exceptions
1312 // a header may not exist, indices are sorted
1313 // and will be size_t.max if not found.
1314 if (c == size_t.max)
1315 break;
1316 }
1317 c -= normalizer;
1318 normalizer += c + 1;
1319 }
1320
1321 prime();
1322 }
1323
1324 /**
1325 * Part of an input range as defined by
1326 * $(REF isInputRange, std,range,primitives).
1327 */
front()1328 @property Contents front() @safe pure
1329 {
1330 assert(!empty, "Attempting to fetch the front of an empty CsvRecord");
1331 return curContentsoken;
1332 }
1333
1334 /**
1335 * Part of an input range as defined by
1336 * $(REF isInputRange, std,range,primitives).
1337 */
empty()1338 @property bool empty() @safe pure nothrow @nogc const
1339 {
1340 return _empty;
1341 }
1342
1343 /*
1344 * CsvRecord is complete when input
1345 * is empty or starts with record break
1346 */
recordEnd()1347 private bool recordEnd()
1348 {
1349 if (_input.range.empty
1350 || _input.range.front == '\n'
1351 || _input.range.front == '\r')
1352 {
1353 return true;
1354 }
1355 return false;
1356 }
1357
1358
1359 /**
1360 * Part of an input range as defined by
1361 * $(REF isInputRange, std,range,primitives).
1362 *
1363 * Throws:
1364 * $(LREF CSVException) When a quote is found in an unquoted field,
1365 * data continues after a closing quote, the quoted field was not
1366 * closed before data was empty, a conversion failed, or when the
1367 * row's length does not match the previous length.
1368 */
popFront()1369 void popFront()
1370 {
1371 static if (ErrorLevel == Malformed.throwException)
1372 import std.format : format;
1373 // Skip last of record when header is depleted.
1374 if (_popCount.ptr && _popCount.empty)
1375 while (!recordEnd())
1376 {
1377 prime(1);
1378 }
1379
1380 if (recordEnd())
1381 {
1382 _empty = true;
1383 static if (ErrorLevel == Malformed.throwException)
1384 {
1385 if (_input.rowLength != 0 && _input.col != _input.rowLength
1386 && !_allowInconsistentDelimiterCount)
1387 {
1388 throw new CSVException(
1389 format("Row %s's length %s does not match "~
1390 "previous length of %s.", _input.row,
1391 _input.col, _input.rowLength));
1392 }
1393 }
1394 return;
1395 }
1396 else
1397 {
1398 static if (ErrorLevel == Malformed.throwException)
1399 {
1400 if (_input.rowLength != 0 && _input.col > _input.rowLength)
1401 {
1402 if (!_allowInconsistentDelimiterCount)
1403 {
1404 throw new CSVException(
1405 format("Row %s's length %s does not match "~
1406 "previous length of %s.", _input.row,
1407 _input.col, _input.rowLength));
1408 }
1409 else
1410 {
1411 _empty = true;
1412 return;
1413 }
1414 }
1415 }
1416 }
1417
1418 // Separator is left on the end of input from the last call.
1419 // This cannot be moved to after the call to csvNextToken as
1420 // there may be an empty record after it.
1421 if (_input.range.front == _separator)
1422 _input.range.popFront();
1423
1424 _front.shrinkTo(0);
1425
1426 prime();
1427 }
1428
1429 /*
1430 * Handles moving to the next skipNum token.
1431 */
prime(size_t skipNum)1432 private void prime(size_t skipNum)
1433 {
1434 foreach (i; 0 .. skipNum)
1435 {
1436 _input.col++;
1437 _front.shrinkTo(0);
1438 if (_input.range.front == _separator)
1439 _input.range.popFront();
1440
1441 try
1442 csvNextToken!(Range, ErrorLevel, Separator)
1443 (_input.range, _front, _separator, _quote,false);
1444 catch (IncompleteCellException ice)
1445 {
1446 ice.row = _input.row;
1447 ice.col = _input.col;
1448 ice.partialData = _front.data.idup;
1449 throw ice;
1450 }
1451 catch (ConvException e)
1452 {
1453 throw new CSVException(e.msg, _input.row, _input.col, e);
1454 }
1455 }
1456 }
1457
prime()1458 private void prime()
1459 {
1460 try
1461 {
1462 _input.col++;
1463 csvNextToken!(Range, ErrorLevel, Separator)
1464 (_input.range, _front, _separator, _quote,false);
1465 }
1466 catch (IncompleteCellException ice)
1467 {
1468 ice.row = _input.row;
1469 ice.col = _input.col;
1470 ice.partialData = _front.data.idup;
1471 throw ice;
1472 }
1473
1474 auto skipNum = _popCount.empty ? 0 : _popCount.front;
1475 if (!_popCount.empty)
1476 _popCount.popFront();
1477
1478 if (skipNum == size_t.max)
1479 {
1480 while (!recordEnd())
1481 prime(1);
1482 _empty = true;
1483 return;
1484 }
1485
1486 if (skipNum)
1487 prime(skipNum);
1488
1489 auto data = _front.data;
1490 static if (!isSomeString!Contents) skipWS(data);
1491 try curContentsoken = to!Contents(data);
1492 catch (ConvException e)
1493 {
1494 throw new CSVException(e.msg, _input.row, _input.col, e);
1495 }
1496 }
1497 }
1498
1499 /**
1500 * Lower level control over parsing CSV
1501 *
1502 * This function consumes the input. After each call the input will
1503 * start with either a delimiter or record break (\n, \r\n, \r) which
1504 * must be removed for subsequent calls.
1505 *
1506 * Params:
1507 * input = Any CSV input
1508 * ans = The first field in the input
1509 * sep = The character to represent a comma in the specification
1510 * quote = The character to represent a quote in the specification
1511 * startQuoted = Whether the input should be considered to already be in
1512 * quotes
1513 *
1514 * Throws:
1515 * $(LREF IncompleteCellException) When a quote is found in an unquoted
1516 * field, data continues after a closing quote, or the quoted field was
1517 * not closed before data was empty.
1518 */
1519 void csvNextToken(Range, Malformed ErrorLevel = Malformed.throwException,
1520 Separator, Output)
1521 (ref Range input, ref Output ans,
1522 Separator sep, Separator quote,
1523 bool startQuoted = false)
1524 if (isSomeChar!Separator && isInputRange!Range
1525 && is(immutable ElementType!Range == immutable dchar)
1526 && isOutputRange!(Output, dchar))
1527 {
1528 bool quoted = startQuoted;
1529 bool escQuote;
1530 if (input.empty)
1531 return;
1532
1533 if (input.front == '\n')
1534 return;
1535 if (input.front == '\r')
1536 return;
1537
1538 if (input.front == quote)
1539 {
1540 quoted = true;
1541 input.popFront();
1542 }
1543
1544 while (!input.empty)
1545 {
1546 assert(!(quoted && escQuote),
1547 "Invalid quotation state in csvNextToken");
1548 if (!quoted)
1549 {
1550 // When not quoted the token ends at sep
1551 if (input.front == sep)
1552 break;
1553 if (input.front == '\r')
1554 break;
1555 if (input.front == '\n')
1556 break;
1557 }
1558 if (!quoted && !escQuote)
1559 {
1560 if (input.front == quote)
1561 {
1562 // Not quoted, but quote found
1563 static if (ErrorLevel == Malformed.throwException)
1564 throw new IncompleteCellException(
1565 "Quote located in unquoted token");
1566 else static if (ErrorLevel == Malformed.ignore)
1567 ans.put(quote);
1568 }
1569 else
1570 {
1571 // Not quoted, non-quote character
1572 ans.put(input.front);
1573 }
1574 }
1575 else
1576 {
1577 if (input.front == quote)
1578 {
1579 // Quoted, quote found
1580 // By turning off quoted and turning on escQuote
1581 // I can tell when to add a quote to the string
1582 // escQuote is turned to false when it escapes a
1583 // quote or is followed by a non-quote (see outside else).
1584 // They are mutually exclusive, but provide different
1585 // information.
1586 if (escQuote)
1587 {
1588 escQuote = false;
1589 quoted = true;
1590 ans.put(quote);
1591 } else
1592 {
1593 escQuote = true;
1594 quoted = false;
1595 }
1596 }
1597 else
1598 {
1599 // Quoted, non-quote character
1600 if (escQuote)
1601 {
1602 static if (ErrorLevel == Malformed.throwException)
1603 throw new IncompleteCellException(
1604 "Content continues after end quote, " ~
1605 "or needs to be escaped.");
1606 else static if (ErrorLevel == Malformed.ignore)
1607 break;
1608 }
1609 ans.put(input.front);
1610 }
1611 }
1612 input.popFront();
1613 }
1614
1615 static if (ErrorLevel == Malformed.throwException)
1616 if (quoted && (input.empty || input.front == '\n' || input.front == '\r'))
1617 throw new IncompleteCellException(
1618 "Data continues on future lines or trailing quote");
1619
1620 }
1621
1622 ///
1623 @safe unittest
1624 {
1625 import std.array : appender;
1626 import std.range.primitives : popFront;
1627
1628 string str = "65,63\n123,3673";
1629
1630 auto a = appender!(char[])();
1631
1632 csvNextToken(str,a,',','"');
1633 assert(a.data == "65");
1634 assert(str == ",63\n123,3673");
1635
1636 str.popFront();
1637 a.shrinkTo(0);
1638 csvNextToken(str,a,',','"');
1639 assert(a.data == "63");
1640 assert(str == "\n123,3673");
1641
1642 str.popFront();
1643 a.shrinkTo(0);
1644 csvNextToken(str,a,',','"');
1645 assert(a.data == "123");
1646 assert(str == ",3673");
1647 }
1648
1649 // Test csvNextToken on simplest form and correct format.
1650 @safe pure unittest
1651 {
1652 import std.array;
1653
1654 string str = "\U00010143Hello,65,63.63\nWorld,123,3673.562";
1655
1656 auto a = appender!(dchar[])();
1657 csvNextToken!string(str,a,',','"');
1658 assert(a.data == "\U00010143Hello");
1659 assert(str == ",65,63.63\nWorld,123,3673.562");
1660
1661 str.popFront();
1662 a.shrinkTo(0);
1663 csvNextToken(str,a,',','"');
1664 assert(a.data == "65");
1665 assert(str == ",63.63\nWorld,123,3673.562");
1666
1667 str.popFront();
1668 a.shrinkTo(0);
1669 csvNextToken(str,a,',','"');
1670 assert(a.data == "63.63");
1671 assert(str == "\nWorld,123,3673.562");
1672
1673 str.popFront();
1674 a.shrinkTo(0);
1675 csvNextToken(str,a,',','"');
1676 assert(a.data == "World");
1677 assert(str == ",123,3673.562");
1678
1679 str.popFront();
1680 a.shrinkTo(0);
1681 csvNextToken(str,a,',','"');
1682 assert(a.data == "123");
1683 assert(str == ",3673.562");
1684
1685 str.popFront();
1686 a.shrinkTo(0);
1687 csvNextToken(str,a,',','"');
1688 assert(a.data == "3673.562");
1689 assert(str == "");
1690 }
1691
1692 // Test quoted tokens
1693 @safe pure unittest
1694 {
1695 import std.array;
1696
1697 string str = `one,two,"three ""quoted""","",` ~ "\"five\nnew line\"\nsix";
1698
1699 auto a = appender!(dchar[])();
1700 csvNextToken!string(str,a,',','"');
1701 assert(a.data == "one");
1702 assert(str == `,two,"three ""quoted""","",` ~ "\"five\nnew line\"\nsix");
1703
1704 str.popFront();
1705 a.shrinkTo(0);
1706 csvNextToken(str,a,',','"');
1707 assert(a.data == "two");
1708 assert(str == `,"three ""quoted""","",` ~ "\"five\nnew line\"\nsix");
1709
1710 str.popFront();
1711 a.shrinkTo(0);
1712 csvNextToken(str,a,',','"');
1713 assert(a.data == "three \"quoted\"");
1714 assert(str == `,"",` ~ "\"five\nnew line\"\nsix");
1715
1716 str.popFront();
1717 a.shrinkTo(0);
1718 csvNextToken(str,a,',','"');
1719 assert(a.data == "");
1720 assert(str == ",\"five\nnew line\"\nsix");
1721
1722 str.popFront();
1723 a.shrinkTo(0);
1724 csvNextToken(str,a,',','"');
1725 assert(a.data == "five\nnew line");
1726 assert(str == "\nsix");
1727
1728 str.popFront();
1729 a.shrinkTo(0);
1730 csvNextToken(str,a,',','"');
1731 assert(a.data == "six");
1732 assert(str == "");
1733 }
1734
1735 // Test empty data is pulled at end of record.
1736 @safe pure unittest
1737 {
1738 import std.array;
1739
1740 string str = "one,";
1741 auto a = appender!(dchar[])();
1742 csvNextToken(str,a,',','"');
1743 assert(a.data == "one");
1744 assert(str == ",");
1745
1746 a.shrinkTo(0);
1747 csvNextToken(str,a,',','"');
1748 assert(a.data == "");
1749 }
1750
1751 // Test exceptions
1752 @safe pure unittest
1753 {
1754 import std.array;
1755
1756 string str = "\"one\nnew line";
1757
1758 typeof(appender!(dchar[])()) a;
1759 try
1760 {
1761 a = appender!(dchar[])();
1762 csvNextToken(str,a,',','"');
1763 assert(0);
1764 }
catch(IncompleteCellException ice)1765 catch (IncompleteCellException ice)
1766 {
1767 assert(a.data == "one\nnew line");
1768 assert(str == "");
1769 }
1770
1771 str = "Hello world\"";
1772
1773 try
1774 {
1775 a = appender!(dchar[])();
1776 csvNextToken(str,a,',','"');
1777 assert(0);
1778 }
catch(IncompleteCellException ice)1779 catch (IncompleteCellException ice)
1780 {
1781 assert(a.data == "Hello world");
1782 assert(str == "\"");
1783 }
1784
1785 str = "one, two \"quoted\" end";
1786
1787 a = appender!(dchar[])();
1788 csvNextToken!(string,Malformed.ignore)(str,a,',','"');
1789 assert(a.data == "one");
1790 str.popFront();
1791 a.shrinkTo(0);
1792 csvNextToken!(string,Malformed.ignore)(str,a,',','"');
1793 assert(a.data == " two \"quoted\" end");
1794 }
1795
1796 // Test modifying token delimiter
1797 @safe pure unittest
1798 {
1799 import std.array;
1800
1801 string str = `one|two|/three "quoted"/|//`;
1802
1803 auto a = appender!(dchar[])();
1804 csvNextToken(str,a, '|','/');
1805 assert(a.data == "one"d);
1806 assert(str == `|two|/three "quoted"/|//`);
1807
1808 str.popFront();
1809 a.shrinkTo(0);
1810 csvNextToken(str,a, '|','/');
1811 assert(a.data == "two"d);
1812 assert(str == `|/three "quoted"/|//`);
1813
1814 str.popFront();
1815 a.shrinkTo(0);
1816 csvNextToken(str,a, '|','/');
1817 assert(a.data == `three "quoted"`);
1818 assert(str == `|//`);
1819
1820 str.popFront();
1821 a.shrinkTo(0);
1822 csvNextToken(str,a, '|','/');
1823 assert(a.data == ""d);
1824 }
1825
1826 // https://issues.dlang.org/show_bug.cgi?id=8908
1827 @safe pure unittest
1828 {
1829 string csv = ` 1.0, 2.0, 3.0
1830 4.0, 5.0, 6.0`;
1831
1832 static struct Data { real a, b, c; }
1833 size_t i = 0;
1834 foreach (data; csvReader!Data(csv)) with (data)
1835 {
1836 int[] row = [cast(int) a, cast(int) b, cast(int) c];
1837 if (i == 0)
1838 assert(row == [1, 2, 3]);
1839 else
1840 assert(row == [4, 5, 6]);
1841 ++i;
1842 }
1843
1844 i = 0;
1845 foreach (data; csvReader!real(csv))
1846 {
1847 auto a = data.front; data.popFront();
1848 auto b = data.front; data.popFront();
1849 auto c = data.front;
1850 int[] row = [cast(int) a, cast(int) b, cast(int) c];
1851 if (i == 0)
1852 assert(row == [1, 2, 3]);
1853 else
1854 assert(row == [4, 5, 6]);
1855 ++i;
1856 }
1857 }
1858
1859 // https://issues.dlang.org/show_bug.cgi?id=21629
1860 @safe pure unittest
1861 {
1862 import std.typecons : Tuple;
1863 struct Reccord
1864 {
1865 string a;
1866 string b;
1867 }
1868
1869 auto header = ["a" ,"b"];
1870 string input = "";
1871 assert(csvReader!Reccord(input).empty, "This should be empty");
1872 assert(csvReader!Reccord(input, header).empty, "This should be empty");
1873 assert(csvReader!(Tuple!(string,string))(input).empty, "This should be empty");
1874 assert(csvReader!(string[string])(input, header).empty, "This should be empty");
1875 assert(csvReader!(string[string])(input, null).empty, "This should be empty");
1876 assert(csvReader!(int)(input, null).empty, "This should be empty");
1877 }
1878