gerbv
csv.c
Go to the documentation of this file.
1 /* csv - read write comma separated value format
2  * Copyright (c) 2003 Michael B. Allen <mba2000 ioplex.com>
3  *
4  * The MIT License
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included
14  * in all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  */
24 
25 /* We (Juergen Haas and Tomasz Motylewski) execute our rights given above
26  * to distribute and sublicence this file (csv.c) and csv.h, csv_defines.h
27  * under General Pulic Licence version 2 or any later version.
28  *
29  * This file is derived from libmba : A library of generic C modules
30  * http://www.ioplex.com/~miallen/libmba/dl/libmba-0.8.9.tar.gz
31  */
32 
38 #ifdef HAVE_CONFIG_H
39 #include <config.h>
40 #endif /* HAVE_CONFIG_H */
41 
42 
43 #include <stdlib.h>
44 #include <string.h>
45 #include <stdio.h>
46 #include <ctype.h>
47 #include <errno.h>
48 #include <wchar.h>
49 #include <wctype.h>
50 
51 #include "common.h"
52 #include "gerbv.h"
53 #include "csv.h"
54 #include "csv_defines.h"
55 #define ST_START 1
56 #define ST_COLLECT 2
57 #define ST_TAILSPACE 3
58 #define ST_END_QUOTE 4
59 #define istspace iswspace
60 
61 
62 struct sinput {
63  FILE *in;
64  const char *src;
65  size_t sn;
66  size_t count;
67 };
68 
69 
70 struct winput {
71  const wchar_t *src;
72  size_t sn;
73  size_t count;
74 };
75 
76 
77 static int
78 snextch(struct sinput *in)
79 {
80  int ch;
81 
82  if (in->in) {
83  if ((ch = fgetc(in->in)) == EOF) {
84  if (ferror(in->in)) {
85  GERB_MESSAGE("errno:%d", errno);
86  return -1;
87  }
88  return 0;
89  }
90  } else {
91  if (in->sn == 0) {
92  return 0;
93  }
94  ch = (unsigned char) *(in->src)++;
95  in->sn--;
96  }
97  in->count++;
98 
99  return ch;
100 }/* snextch */
101 
102 
103 static int
104 wnextch(struct winput *in)
105 {
106  int ch;
107 
108  if (in->sn == 0) {
109  return 0;
110  }
111  ch = *(in->src)++;
112  in->sn--;
113  in->count++;
114 
115  return ch;
116 }/* wnextch */
117 
118 static int
119 csv_parse_str(struct sinput *in, char *buf, size_t bn, char *row[], int rn, int sep, int flags)
120 {
121  int trim, quotes, ch, state, r, j, t, inquotes;
122 
123  trim = flags & CSV_TRIM;
124  quotes = flags & CSV_QUOTES;
125  state = ST_START;
126  inquotes = 0;
127  ch = r = j = t = 0;
128 
129  memset(row, 0, sizeof(char *) * rn);
130 
131  while (rn && bn && (ch = snextch(in)) > 0) {
132  switch (state) {
133  case ST_START:
134  if (ch != '\n' && ch != sep && isspace(ch)) {
135  if (!trim) {
136  buf[j++] = ch; bn--;
137  t = j;
138  }
139  break;
140  } else if (quotes && ch == '"') {
141  j = t = 0;
142  state = ST_COLLECT;
143  inquotes = 1;
144  break;
145  }
146  state = ST_COLLECT;
147  [[fallthrough]];
148  case ST_COLLECT:
149  if (inquotes) {
150  if (ch == '"') {
151  state = ST_END_QUOTE;
152  break;
153  }
154  } else if (ch == sep || ch == '\n') {
155  row[r++] = buf; rn--;
156  buf[t] = '\0'; bn--;
157  buf += t + 1;
158  j = t = 0;
159 
160  state = ST_START;
161  inquotes = 0;
162  if (ch == '\n') {
163  rn = 0;
164  }
165  break;
166  } else if (quotes && ch == '"') {
167  errno = EILSEQ;
168  GERB_MESSAGE(_("%d: unexpected quote in element"),errno);
169  return -1;
170  }
171  buf[j++] = ch; bn--;
172  if (!trim || isspace(ch) == 0) {
173  t = j;
174  }
175  break;
176  case ST_TAILSPACE:
177  case ST_END_QUOTE:
178  if (ch == sep || ch == '\n') {
179  row[r++] = buf; rn--;
180  buf[j] = '\0'; bn--;
181  buf += j + 1;
182  j = t = 0;
183  state = ST_START;
184  inquotes = 0;
185  if (ch == '\n') {
186  rn = 0;
187  }
188  break;
189  } else if (quotes && ch == '"' && state != ST_TAILSPACE) {
190  buf[j++] = '"'; bn--; /* nope, just an escaped quote */
191  t = j;
192  state = ST_COLLECT;
193  break;
194  } else if (isspace(ch)) {
195  state = ST_TAILSPACE;
196  break;
197  }
198  errno = EILSEQ;
199  GERB_MESSAGE(_("%d: bad end quote in element"), errno);
200  return -1;
201  }
202  }
203  if (ch <= 0) {
204  /* treat EOF as EOL, so the last record is accepted even when
205  \n is not present. Some users parse strings, not lines */
206  if(state == ST_TAILSPACE || state == ST_END_QUOTE
207  || (state == ST_COLLECT && ! inquotes)) {
208  row[r++] = buf; rn--;
209  buf[j] = '\0'; bn--;
210  buf += j + 1;
211  inquotes = 0;
212  rn = 0;
213  } else {
214  // AMSG("");
215  return -1;
216  }
217  }
218  if (bn == 0) {
219  errno = E2BIG;
220  GERB_MESSAGE("E2BIG %d ", errno);
221  return -1;
222  }
223  if (rn) {
224  if (inquotes) {
225  errno = EILSEQ;
226  GERB_MESSAGE("EILSEQ %d ", errno);
227  return -1;
228  }
229  row[r] = buf;
230  buf[t] = '\0';
231  }
232  // return error if we can't read the minimum number of fields
233  if (r < 4) {
234  return -1;
235  }
236  return in->count;
237 }/* csv_parse_str */
238 
239 
240 static int
241 csv_parse_wcs(struct winput *in, wchar_t *buf, size_t bn, wchar_t *row[], int rn, wint_t sep, int flags)
242 {
243  int trim, quotes, state, r, j, t, inquotes;
244  wint_t ch;
245 
246  trim = flags & CSV_TRIM;
247  quotes = flags & CSV_QUOTES;
248  state = ST_START;
249  inquotes = 0;
250  ch = r = j = t = 0;
251 
252  memset(row, 0, sizeof(wchar_t *) * rn);
253 
254  while (rn && bn && (ch = wnextch(in)) > 0) {
255  switch (state) {
256  case ST_START:
257  if (ch != L'\n' && ch != sep && iswspace(ch)) {
258  if (!trim) {
259  buf[j++] = ch; bn--;
260  t = j;
261  }
262  break;
263  } else if (quotes && ch == L'"') {
264  j = t = 0;
265  state = ST_COLLECT;
266  inquotes = 1;
267  break;
268  }
269  state = ST_COLLECT;
270  [[fallthrough]];
271  case ST_COLLECT:
272  if (inquotes) {
273  if (ch == L'"') {
274  state = ST_END_QUOTE;
275  break;
276  }
277  } else if (ch == sep || ch == L'\n') {
278  row[r++] = buf; rn--;
279  buf[t] = L'\0'; bn--;
280  buf += t + 1;
281  j = t = 0;
282  state = ST_START;
283  inquotes = 0;
284  if (ch == L'\n') {
285  rn = 0;
286  }
287  break;
288  } else if (quotes && ch == L'"') {
289  errno = EILSEQ;
290  GERB_MESSAGE(_("%d: unexpected quote in element"), errno);
291  return -1;
292  }
293  buf[j++] = ch; bn--;
294  if (!trim || iswspace(ch) == 0) {
295  t = j;
296  }
297  break;
298  case ST_TAILSPACE:
299  case ST_END_QUOTE:
300  if (ch == sep || ch == L'\n') {
301  row[r++] = buf; rn--;
302  buf[j] = L'\0'; bn--;
303  buf += j + 1;
304  j = t = 0;
305  state = ST_START;
306  inquotes = 0;
307  if (ch == L'\n') {
308  rn = 0;
309  }
310  break;
311  } else if (quotes && ch == L'"' && state != ST_TAILSPACE) {
312  buf[j++] = L'"'; bn--; /* nope, just an escaped quote */
313  t = j;
314  state = ST_COLLECT;
315  break;
316  } else if (iswspace(ch)) {
317  state = ST_TAILSPACE;
318  break;
319  }
320  errno = EILSEQ;
321  GERB_MESSAGE(_("%d: bad end quote in element "), errno);
322  return -1;
323  }
324  }
325  if (ch <= 0) {
326  /* treat EOF as EOL, so the last record is accepted even when
327  \n is not present. Some users parse strings, not lines */
328  if(state == ST_TAILSPACE || state == ST_END_QUOTE
329  || (state == ST_COLLECT && ! inquotes)) {
330  row[r++] = buf; rn--;
331  buf[j] = L'\0'; bn--;
332  buf += j + 1;
333  inquotes = 0;
334  rn = 0;
335  } else {
336  // AMSG("");
337  return -1;
338  }
339  }
340  if (bn == 0) {
341  errno = E2BIG;
342  GERB_MESSAGE("%d", errno);
343  return -1;
344  }
345  if (rn) {
346  if (inquotes) {
347  errno = EILSEQ;
348  GERB_MESSAGE("%d", errno);
349  return -1;
350  }
351  row[r] = buf;
352  buf[t] = L'\0';
353  }
354 
355  return in->count;
356 }/*csv_row_parse_wcs*/
357 
358 
359 int
360 csv_row_parse_wcs(const wchar_t *src, size_t sn, wchar_t *buf, size_t bn, wchar_t *row[], int rn, int sep, int trim)
361 {
362  struct winput input;
363  input.src = src;
364  input.sn = sn;
365  input.count = 0;
366  return csv_parse_wcs(&input, buf, bn, row, rn, (wint_t)sep, trim);
367 }/*csv_row_parse_wcs*/
368 
369 
370 int
371 csv_row_parse_str(const char *src, size_t sn, char *buf, size_t bn, char *row[], int rn, int sep, int trim)
372 {
373  struct sinput input;
374  input.in = NULL;
375  input.src = src;
376  input.sn = sn;
377  input.count = 0;
378  return csv_parse_str(&input, buf, bn, row, rn, sep, trim);
379 }/*csv_row_parse_str*/
380 
381 
382 int
383 csv_row_fread(FILE *in, char *buf, size_t bn, char *row[], int numcols, int sep, int trim)
384 {
385  struct sinput input;
386  input.in = in;
387  input.count = 0;
388  return csv_parse_str(&input, buf, bn, row, numcols, sep, trim);
389 }/*csv_row_fread*/
390 
Header info for the parsing support functions for the pick and place parser.
Sets up internal definitions for handling csv-style files.
The main header file for the libgerbv library.