1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32 package org.spf4j.io.csv;
33
34 import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
35 import gnu.trove.map.hash.THashMap;
36 import java.io.BufferedReader;
37 import java.io.File;
38 import java.io.IOException;
39 import java.io.InputStreamReader;
40 import java.io.Reader;
41 import java.io.UncheckedIOException;
42 import java.io.Writer;
43 import java.nio.charset.Charset;
44 import java.nio.file.Files;
45 import java.util.ArrayList;
46 import java.util.Iterator;
47 import java.util.List;
48 import java.util.Map;
49 import javax.annotation.CheckReturnValue;
50 import javax.annotation.ParametersAreNonnullByDefault;
51 import org.spf4j.base.Arrays;
52 import org.spf4j.base.CharSequences;
53 import org.spf4j.io.PushbackReader;
54
55
56
57
58
59
60
61
62
63
64 @ParametersAreNonnullByDefault
65 @SuppressFBWarnings("NP_LOAD_OF_KNOWN_NULL_VALUE")
66 public final class CharSeparatedValues {
67
68
69
70
71 public static final int UTF_BOM = '\uFEFF';
72
73 private final char separator;
74 private final char[] toEscape;
75
76 public CharSeparatedValues(final char separator) {
77 if (separator == '\n' || separator == '\r' || separator == '"') {
78 throw new IllegalArgumentException("Illegal separator character " + separator);
79 }
80 this.separator = separator;
81 this.toEscape = new char[]{separator, '\n', '\r', '"'};
82 }
83
84 public CharSeparatedValues(final char separator, final char... extraCharsToEscape) {
85 if (separator == '\n' || separator == '\r' || separator == '"') {
86 throw new IllegalArgumentException("Illegal separator character " + separator);
87 }
88 this.separator = separator;
89 this.toEscape = new char[4 + extraCharsToEscape.length];
90 this.toEscape[0] = separator;
91 this.toEscape[1] = '\n';
92 this.toEscape[2] = '\r';
93 this.toEscape[3] = '"';
94 System.arraycopy(extraCharsToEscape, 0, this.toEscape, 4, extraCharsToEscape.length);
95 }
96
97 public void writeCsvRow(final Appendable writer, final Object... elems) throws IOException {
98 writeCsvRowNoEOL(writer, elems);
99 writer.append('\n');
100 }
101
102 @SafeVarargs
103 public final String toCsvRowString(final Object... elems) {
104 StringBuilder result = new StringBuilder(elems.length * 8);
105 try {
106 writeCsvRowNoEOL(result, elems);
107 } catch (IOException ex) {
108 throw new UncheckedIOException(ex);
109 }
110 return result.toString();
111 }
112
113 public void writeCsvRowNoEOL(final Appendable writer, final Object... elems) throws IOException {
114 if (elems.length > 0) {
115 int i = 0;
116 Object elem = elems[i++];
117 if (elem != null) {
118 writeCsvElement(elem.toString(), writer);
119 }
120 while (i < elems.length) {
121 writer.append(separator);
122 elem = elems[i++];
123 if (elem != null) {
124 writeCsvElement(elem.toString(), writer);
125 }
126 }
127 }
128 }
129
130 public void writeCsvRow2(final Appendable writer, final Object obj, final Object... elems)
131 throws IOException {
132 if (obj != null) {
133 writeCsvElement(obj.toString(), writer);
134 }
135 for (Object elem : elems) {
136 writer.append(separator);
137 if (elem != null) {
138 writeCsvElement(elem.toString(), writer);
139 }
140 }
141 writer.append('\n');
142 }
143
144 public void writeCsvRow(final Appendable writer, final long... elems) throws IOException {
145 writeCsvRowNoEOL(elems, writer);
146 writer.append('\n');
147 }
148
149 public void writeCsvRowNoEOL(final long[] elems, final Appendable writer) throws IOException {
150 if (elems.length > 0) {
151 int i = 0;
152 writer.append(Long.toString(elems[i++]));
153 while (i < elems.length) {
154 writer.append(separator);
155 writer.append(Long.toString(elems[i++]));
156 }
157 }
158 }
159
160 public void writeCsvRow(final Appendable writer, final Iterable<?> elems) throws IOException {
161 writeCsvRowNoEOL(elems, writer);
162 writer.append('\n');
163 }
164
165 public void writeCsvRowNoEOL(final Iterable<?> elems, final Appendable writer) throws IOException {
166 Iterator<?> it = elems.iterator();
167 if (it.hasNext()) {
168 Object next = it.next();
169 if (next != null) {
170 writeCsvElement(next.toString(), writer);
171 }
172 while (it.hasNext()) {
173 writer.append(separator);
174 next = it.next();
175 if (next != null) {
176 writeCsvElement(next.toString(), writer);
177 }
178 }
179 }
180 }
181
182 public <T> T read(final File file, final Charset charset,
183 final CsvMapHandler<T> handler) throws IOException, CsvParseException {
184 try (BufferedReader br = new BufferedReader(new InputStreamReader(Files.newInputStream(file.toPath()), charset))) {
185 return read(br, handler);
186 }
187 }
188
189 public <T> T read(final File file, final Charset charset,
190 final CsvHandler<T> handler) throws IOException, CsvParseException {
191 try (BufferedReader br = new BufferedReader(new InputStreamReader(Files.newInputStream(file.toPath()), charset))) {
192 return read(br, handler);
193 }
194 }
195
196 public List<Map<String, String>> read(final Reader preader) throws IOException, CsvParseException {
197 return read(preader, new ToListMapHandler());
198 }
199
200 public <T> T read(final Reader preader,
201 final CsvMapHandler<T> handler) throws IOException, CsvParseException {
202 return read(preader, new CsvMapHandler2CsvHandler<>(handler));
203 }
204
205 public List<String> readRow(final Reader reader) throws IOException, CsvParseException {
206 return readRow(reader, new CsvRow2List());
207 }
208
209 public <T> T readRow(final Reader reader, final CsvRowHandler<T> handler) throws IOException, CsvParseException {
210 return read(reader, new OneRowHandler<>(handler));
211 }
212
213 public <T> T read(final Reader preader,
214 final CsvHandler<T> handler) throws IOException, CsvParseException {
215 PushbackReader reader = new PushbackReader(preader);
216 int firstChar = reader.read();
217 if (firstChar != UTF_BOM && firstChar >= 0) {
218 reader.unread(firstChar);
219 }
220 return readNoBom(reader, handler);
221 }
222
223
224
225
226
227
228
229
230
231
232 public <T> T readNoBom(final PushbackReader reader, final CsvHandler<T> handler)
233 throws IOException, CsvParseException {
234 CsvReader r = reader(reader);
235 handler.startRow(0);
236 CsvReader.TokenType token = r.next();
237 while (token != CsvReader.TokenType.END_DOCUMENT) {
238 if (token == CsvReader.TokenType.ELEMENT) {
239 handler.element(r.getElement());
240 token = r.next();
241 } else if (token == CsvReader.TokenType.END_ROW) {
242 handler.endRow();
243 token = r.next();
244 if (token == CsvReader.TokenType.ELEMENT) {
245 handler.startRow(r.currentLineNumber());
246 }
247 }
248 }
249 return handler.eof();
250 }
251
252
253
254
255
256
257
258
259 public Iterable<Iterable<String>> asIterable(final Reader preader) {
260 return () -> {
261 try {
262 return new CsvReader2Iterator(reader(preader));
263 } catch (IOException ex) {
264 throw new UncheckedIOException(ex);
265 }
266 };
267
268 }
269
270
271
272
273
274
275
276 public Iterable<CharSequence> singleRow(final Reader preader) {
277 try {
278 CsvReader reader = reader(preader);
279 return () -> new OneRowIterator(reader);
280 } catch (IOException ex) {
281 throw new UncheckedIOException(ex);
282 }
283 }
284
285 public CsvReader reader(final Reader preader) throws IOException {
286 PushbackReader reader = new PushbackReader(preader);
287 int firstChar = reader.read();
288 if (firstChar != UTF_BOM && firstChar >= 0) {
289 reader.unread(firstChar);
290 }
291 return readerNoBOMILEL(reader);
292 }
293
294
295
296
297
298
299
300
301 @Deprecated
302 public CsvReader readerILEL(final Reader preader) throws IOException {
303 return reader(preader);
304 }
305
306
307
308
309
310
311 public CsvReader readerNoBOM(final PushbackReader reader) {
312 return new CsvReaderImpl(reader);
313 }
314
315
316
317
318
319
320
321 @Deprecated
322 public CsvReader readerNoBOMILEL(final PushbackReader reader) {
323 return new CsvReaderImpl(reader);
324 }
325
326 public CsvWriter writer(final Writer writer) {
327 return new CsvWriterImpl(writer);
328 }
329
330 public void writeCsvElement(final CharSequence elem, final Appendable writer) throws IOException {
331 if (CharSequences.containsAnyChar(elem, toEscape)) {
332 writeQuotedCsvElement(elem, writer);
333 } else {
334 writer.append(elem);
335 }
336 }
337
338 public static void writeQuotedCsvElement(final CharSequence elem, final Appendable writer) throws IOException {
339 writer.append('"');
340 writeQuotedElementContent(elem, 0, elem.length(), writer);
341 writer.append('"');
342 }
343
344 public static void writeQuotedElementContent(final CharSequence elem,
345 final int start, final int end, final Appendable writer) throws IOException {
346 for (int i = start; i < end; i++) {
347 char c = elem.charAt(i);
348 writeQuotedChar(c, writer);
349 }
350 }
351
352 public static void writeQuotedChar(final char c, final Appendable writer) throws IOException {
353 if (c == '"') {
354 writer.append("\"\"");
355 } else {
356 writer.append(c);
357 }
358 }
359
360 public CharSequence toCsvElement(final CharSequence elem) {
361 if (CharSequences.containsAnyChar(elem, toEscape)) {
362 StringBuilder sw = new StringBuilder(elem.length() + 4);
363 try {
364 writeQuotedCsvElement(elem, sw);
365 } catch (IOException ex) {
366 throw new UncheckedIOException(ex);
367 }
368 return sw;
369 } else {
370 return elem;
371 }
372 }
373
374 public String toCsvElement(final String elem) {
375 if (CharSequences.containsAnyChar(elem, toEscape)) {
376 StringBuilder sw = new StringBuilder(elem.length() + 4);
377 try {
378 writeQuotedCsvElement(elem, sw);
379 } catch (IOException ex) {
380 throw new UncheckedIOException(ex);
381 }
382 return sw.toString();
383 } else {
384 return elem;
385 }
386 }
387
388
389
390
391
392
393
394
395
396 @CheckReturnValue
397 public int readCsvElement(final Reader reader, final StringBuilder addElemTo, final long lineNr)
398 throws IOException, CsvParseException {
399 int c = reader.read();
400 if (c < 0) {
401 return c;
402 }
403 if (c == '"') {
404 c = reader.read();
405 while (c >= 0) {
406 if (c == '"') {
407 int c2 = reader.read();
408 if (c2 >= 0) {
409 if (c2 == '"') {
410 addElemTo.append((char) c);
411 } else {
412 return c2;
413 }
414 } else {
415 return c2;
416 }
417 } else {
418 addElemTo.append((char) c);
419 }
420 c = reader.read();
421 }
422 throw new CsvParseException("Escaped CSV element " + addElemTo + " not terminated correctly at " + lineNr);
423 } else {
424 while (c != separator && c != '\n' && c != '\r' && c >= 0) {
425 addElemTo.append((char) c);
426 c = reader.read();
427 }
428 }
429 return c;
430 }
431
432 @Override
433 public String toString() {
434 return "CharSepValues{" + "separator=" + separator + '}';
435 }
436
437 private static class ToListMapHandler implements CsvMapHandler<List<Map<String, String>>> {
438
439 private List<Map<String, String>> result = new ArrayList<>();
440
441 @Override
442 public void row(final Map<String, String> row) {
443 result.add(row);
444 }
445
446 @Override
447 public List<Map<String, String>> eof() {
448 return result;
449 }
450 }
451
452 private static class CsvMapHandler2CsvHandler<T> implements CsvHandler<T> {
453
454 private final CsvMapHandler<T> handler;
455 private boolean first = true;
456 private final List<String> header = new ArrayList<>();
457 private int elemIdx;
458 private Map<String, String> row = null;
459 private long lineNr;
460
461
462 CsvMapHandler2CsvHandler(final CsvMapHandler<T> handler) {
463 this.handler = handler;
464 }
465
466 @Override
467 public void startRow(final long ln) {
468 lineNr = ln;
469 elemIdx = 0;
470 if (!first) {
471 row = new THashMap<>(header.size());
472 }
473 }
474
475 @Override
476 public void element(final CharSequence elem) throws CsvParseException {
477 if (first) {
478 header.add(elem.toString());
479 } else {
480 if (header.size() <= elemIdx) {
481 throw new CsvParseException("Too many elements in row " + row + " at line " + lineNr);
482 }
483 row.put(header.get(elemIdx), elem.toString());
484 }
485 elemIdx++;
486 }
487
488 @Override
489 public void endRow() {
490 if (first) {
491 first = false;
492 } else {
493 handler.row(row);
494 }
495 }
496
497 @Override
498 public T eof() {
499 return handler.eof();
500 }
501 }
502
503 private class CsvReaderImpl implements CsvReader {
504
505 private final PushbackReader reader;
506 private final StringBuilder currentElement = new StringBuilder();
507 private CsvReader.TokenType currentToken;
508 private CsvReader.TokenType nextToken;
509 private long lineNr = 0;
510
511 CsvReaderImpl(final PushbackReader reader) {
512 this.reader = reader;
513 this.currentToken = CsvReader.TokenType.START_DOCUMENT;
514 this.nextToken = null;
515 }
516
517 @SuppressFBWarnings("SF_SWITCH_FALLTHROUGH")
518 private void readNext() throws IOException, CsvParseException {
519
520 switch (currentToken) {
521 case END_DOCUMENT:
522 nextToken = TokenType.END_DOCUMENT;
523 return;
524 case END_ROW:
525
526 int peek = reader.read();
527 if (peek < 0) {
528 currentToken = TokenType.END_DOCUMENT;
529 nextToken = TokenType.END_DOCUMENT;
530 return;
531 }
532 reader.unread(peek);
533 case START_DOCUMENT:
534 case ELEMENT:
535 currentElement.setLength(0);
536 int next = readCsvElement(reader, currentElement, lineNr);
537 currentToken = CsvReader.TokenType.ELEMENT;
538 switch (next) {
539 case '\r':
540 lineNr++;
541 nextToken = CsvReader.TokenType.END_ROW;
542 int c2 = reader.read();
543 if (c2 < 0) {
544 return;
545 }
546 if (c2 != '\n') {
547 reader.unread(c2);
548 }
549 return;
550 case '\n':
551 lineNr++;
552 nextToken = CsvReader.TokenType.END_ROW;
553 c2 = reader.read();
554 if (c2 < 0) {
555 return;
556 }
557 if (c2 != '\r') {
558 reader.unread(c2);
559 break;
560 }
561 break;
562 default:
563 if (next != separator) {
564 if (next < 0) {
565 nextToken = CsvReader.TokenType.END_ROW;
566 } else {
567 throw new CsvParseException("Unexpected character " + next + " at line" + lineNr);
568 }
569 }
570 }
571 return;
572 default:
573 throw new IllegalStateException("Invalid current token " + currentToken);
574
575 }
576
577 }
578
579 @Override
580 public CsvReader.TokenType next() throws IOException, CsvParseException {
581 if (nextToken == null) {
582 readNext();
583 return currentToken;
584 } else {
585 CsvReader.TokenType result = nextToken;
586 if (result != CsvReader.TokenType.END_DOCUMENT) {
587 nextToken = null;
588 }
589 currentToken = result;
590 return result;
591 }
592 }
593
594 @Override
595 public CsvReader.TokenType current() {
596 return currentToken;
597 }
598
599 @Override
600 public CharSequence getElement() {
601 if (currentToken != TokenType.ELEMENT) {
602 throw new IllegalStateException("No current element, current token is " + currentToken);
603 }
604 return currentElement;
605 }
606
607 @Override
608 public long currentLineNumber() {
609 return lineNr;
610 }
611
612 }
613
614 private static class OneRowHandler<T> implements CsvHandler<T> {
615
616 private final CsvRowHandler<T> handler;
617
618
619 OneRowHandler(final CsvRowHandler<T> handler) {
620 this.handler = handler;
621 }
622
623 @Override
624 public void startRow(final long rowNr) {
625 if (rowNr > 0) {
626 throw new IllegalArgumentException("Multiple rows encountered for " + this);
627 }
628 }
629
630 @Override
631 public void element(final CharSequence elem) {
632 handler.element(elem);
633 }
634
635 @Override
636 public T eof() {
637 return handler.eof();
638 }
639
640 }
641
642 private static final class CsvRow2List implements CsvRowHandler<List<String>> {
643
644 private final List<String> result = new ArrayList<>();
645
646 @Override
647 public void element(final CharSequence elem) {
648 result.add(elem.toString());
649 }
650
651 @Override
652 public List<String> eof() {
653 return result;
654 }
655 }
656
657 private class CsvWriterImpl implements CsvWriter {
658
659 private final Writer writer;
660
661 CsvWriterImpl(final Writer writer) {
662 this.writer = writer;
663 }
664 private boolean isStartLine = true;
665
666 @Override
667 public void writeElement(final CharSequence cs) throws IOException {
668 addComma();
669 writeCsvElement(cs, writer);
670 }
671
672 private void addComma() throws IOException {
673 if (isStartLine) {
674 isStartLine = false;
675 } else {
676 writer.append(separator);
677 }
678 }
679
680 @Override
681 public void writeEol() throws IOException {
682 writer.append('\n');
683 isStartLine = true;
684 }
685
686 @Override
687 public void flush() throws IOException {
688 writer.flush();
689 }
690
691
692 @Override
693 public ElementAppendable startQuotedElement() throws IOException {
694 addComma();
695 writer.write('"');
696 return new ElementAppendable() {
697 @Override
698 public Appendable append(final CharSequence csq) throws IOException {
699 writeQuotedElementContent(csq, 0, csq.length(), writer);
700 return this;
701 }
702
703 @Override
704 public Appendable append(final CharSequence csq, final int start, final int end) throws IOException {
705 writeQuotedElementContent(csq, start, end, writer);
706 return this;
707 }
708
709 @Override
710 public Appendable append(final char c) throws IOException {
711 writeQuotedChar(c, writer);
712 return this;
713 }
714
715 @Override
716 public void close() throws IOException {
717 writer.write('"');
718 }
719 };
720 }
721
722 @Override
723 public Appendable startRawElement() throws IOException {
724 addComma();
725 return new Appendable() {
726 @Override
727 public Appendable append(final CharSequence csq) throws IOException {
728 if (CharSequences.containsAnyChar(csq, toEscape)) {
729 throw new IllegalStateException("Attempting to write str containing escapeable seq " + csq);
730 }
731 writer.append(csq);
732 return this;
733 }
734
735 @Override
736 public Appendable append(final CharSequence csq, final int start, final int end) throws IOException {
737 if (CharSequences.containsAnyChar(csq, start, end, toEscape)) {
738 throw new IllegalStateException("Attempting to write str containing escapeable seq " + csq);
739 }
740 writer.append(csq, start, end);
741 return this;
742 }
743
744 @Override
745 public Appendable append(final char c) throws IOException {
746 if (Arrays.search(toEscape, c) >= 0) {
747 throw new IllegalStateException("Attempting to write str containing escapeable seq " + c);
748 }
749 writer.append(c);
750 return this;
751 }
752 };
753 }
754 }
755
756 }