gerbv  2.10.1-dev~93f1b5
csv.c
Go to the documentation of this file.
1 /* csv - read write comma separated value format
2  * Copyright (c) 2003 Michael B. Allen <mba2000 ioplex.com>
3  *
4  * The MIT License
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included
14  * in all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  */
24 
25 /* We (Juergen Haas and Tomasz Motylewski) execute our rights given above
26  * to distribute and sublicence this file (csv.c) and csv.h, csv_defines.h
27  * under General Pulic Licence version 2 or any later version.
28  *
29  * This file is derived from libmba : A library of generic C modules
30  * http://www.ioplex.com/~miallen/libmba/dl/libmba-0.8.9.tar.gz
31  */
32 
38 #ifdef HAVE_CONFIG_H
39 #include <config.h>
40 #endif /* HAVE_CONFIG_H */
41 
42 #include <stdlib.h>
43 #include <string.h>
44 #include <stdio.h>
45 #include <ctype.h>
46 #include <errno.h>
47 #include <wchar.h>
48 #include <wctype.h>
49 
50 #include "common.h"
51 #include "gerbv.h"
52 #include "csv.h"
53 #include "csv_defines.h"
54 #define ST_START 1
55 #define ST_COLLECT 2
56 #define ST_TAILSPACE 3
57 #define ST_END_QUOTE 4
58 #define istspace iswspace
59 
60 struct sinput {
61  FILE* in;
62  const char* src;
63  size_t sn;
64  size_t count;
65 };
66 
67 struct winput {
68  const wchar_t* src;
69  size_t sn;
70  size_t count;
71 };
72 
73 static int
74 snextch(struct sinput* in) {
75  int ch;
76 
77  if (in->in) {
78  if ((ch = fgetc(in->in)) == EOF) {
79  if (ferror(in->in)) {
80  GERB_MESSAGE("errno:%d", errno);
81  return -1;
82  }
83  return 0;
84  }
85  } else {
86  if (in->sn == 0) {
87  return 0;
88  }
89  ch = (unsigned char)*(in->src)++;
90  in->sn--;
91  }
92  in->count++;
93 
94  return ch;
95 } /* snextch */
96 
97 static int
98 wnextch(struct winput* in) {
99  int ch;
100 
101  if (in->sn == 0) {
102  return 0;
103  }
104  ch = *(in->src)++;
105  in->sn--;
106  in->count++;
107 
108  return ch;
109 } /* wnextch */
110 
111 static int
112 csv_parse_str(struct sinput* in, char* buf, size_t bn, char* row[], int rn, int sep, int flags) {
113  int trim, quotes, ch, state, r, j, t, inquotes;
114 
115  trim = flags & CSV_TRIM;
116  quotes = flags & CSV_QUOTES;
117  state = ST_START;
118  inquotes = 0;
119  ch = r = j = t = 0;
120 
121  memset(row, 0, sizeof(char*) * rn);
122 
123  while (rn && bn && (ch = snextch(in)) > 0) {
124  switch (state) {
125  case ST_START:
126  if (ch != '\n' && ch != sep && isspace(ch)) {
127  if (!trim) {
128  buf[j++] = ch;
129  bn--;
130  t = j;
131  }
132  break;
133  } else if (quotes && ch == '"') {
134  j = t = 0;
135  state = ST_COLLECT;
136  inquotes = 1;
137  break;
138  }
139  state = ST_COLLECT;
140  case ST_COLLECT:
141  if (inquotes) {
142  if (ch == '"') {
143  state = ST_END_QUOTE;
144  break;
145  }
146  } else if (ch == sep || ch == '\n') {
147  row[r++] = buf;
148  rn--;
149  buf[t] = '\0';
150  bn--;
151  buf += t + 1;
152  j = t = 0;
153 
154  state = ST_START;
155  inquotes = 0;
156  if (ch == '\n') {
157  rn = 0;
158  }
159  break;
160  } else if (quotes && ch == '"') {
161  errno = EILSEQ;
162  GERB_MESSAGE(_("%d: unexpected quote in element"), errno);
163  return -1;
164  }
165  buf[j++] = ch;
166  bn--;
167  if (!trim || isspace(ch) == 0) {
168  t = j;
169  }
170  break;
171  case ST_TAILSPACE:
172  case ST_END_QUOTE:
173  if (ch == sep || ch == '\n') {
174  row[r++] = buf;
175  rn--;
176  buf[j] = '\0';
177  bn--;
178  buf += j + 1;
179  j = t = 0;
180  state = ST_START;
181  inquotes = 0;
182  if (ch == '\n') {
183  rn = 0;
184  }
185  break;
186  } else if (quotes && ch == '"' && state != ST_TAILSPACE) {
187  buf[j++] = '"';
188  bn--; /* nope, just an escaped quote */
189  t = j;
190  state = ST_COLLECT;
191  break;
192  } else if (isspace(ch)) {
193  state = ST_TAILSPACE;
194  break;
195  }
196  errno = EILSEQ;
197  GERB_MESSAGE(_("%d: bad end quote in element"), errno);
198  return -1;
199  }
200  }
201  if (ch <= 0) {
202  /* treat EOF as EOL, so the last record is accepted even when
203  \n is not present. Some users parse strings, not lines */
204  if (state == ST_TAILSPACE || state == ST_END_QUOTE || (state == ST_COLLECT && !inquotes)) {
205  row[r++] = buf;
206  rn--;
207  buf[j] = '\0';
208  bn--;
209  buf += j + 1;
210  inquotes = 0;
211  rn = 0;
212  } else {
213  // AMSG("");
214  return -1;
215  }
216  }
217  if (bn == 0) {
218  errno = E2BIG;
219  GERB_MESSAGE("E2BIG %d ", errno);
220  return -1;
221  }
222  if (rn) {
223  if (inquotes) {
224  errno = EILSEQ;
225  GERB_MESSAGE("EILSEQ %d ", errno);
226  return -1;
227  }
228  row[r] = buf;
229  buf[t] = '\0';
230  }
231  // return error if we can't read the minimum number of fields
232  if (r < 4) {
233  return -1;
234  }
235  return in->count;
236 } /* csv_parse_str */
237 
238 static int
239 csv_parse_wcs(struct winput* in, wchar_t* buf, size_t bn, wchar_t* row[], int rn, wint_t sep, int flags) {
240  int trim, quotes, state, r, j, t, inquotes;
241  wint_t ch;
242 
243  trim = flags & CSV_TRIM;
244  quotes = flags & CSV_QUOTES;
245  state = ST_START;
246  inquotes = 0;
247  ch = r = j = t = 0;
248 
249  memset(row, 0, sizeof(wchar_t*) * rn);
250 
251  while (rn && bn && (ch = wnextch(in)) > 0) {
252  switch (state) {
253  case ST_START:
254  if (ch != L'\n' && ch != sep && iswspace(ch)) {
255  if (!trim) {
256  buf[j++] = ch;
257  bn--;
258  t = j;
259  }
260  break;
261  } else if (quotes && ch == L'"') {
262  j = t = 0;
263  state = ST_COLLECT;
264  inquotes = 1;
265  break;
266  }
267  state = ST_COLLECT;
268  case ST_COLLECT:
269  if (inquotes) {
270  if (ch == L'"') {
271  state = ST_END_QUOTE;
272  break;
273  }
274  } else if (ch == sep || ch == L'\n') {
275  row[r++] = buf;
276  rn--;
277  buf[t] = L'\0';
278  bn--;
279  buf += t + 1;
280  j = t = 0;
281  state = ST_START;
282  inquotes = 0;
283  if (ch == L'\n') {
284  rn = 0;
285  }
286  break;
287  } else if (quotes && ch == L'"') {
288  errno = EILSEQ;
289  GERB_MESSAGE(_("%d: unexpected quote in element"), errno);
290  return -1;
291  }
292  buf[j++] = ch;
293  bn--;
294  if (!trim || iswspace(ch) == 0) {
295  t = j;
296  }
297  break;
298  case ST_TAILSPACE:
299  case ST_END_QUOTE:
300  if (ch == sep || ch == L'\n') {
301  row[r++] = buf;
302  rn--;
303  buf[j] = L'\0';
304  bn--;
305  buf += j + 1;
306  j = t = 0;
307  state = ST_START;
308  inquotes = 0;
309  if (ch == L'\n') {
310  rn = 0;
311  }
312  break;
313  } else if (quotes && ch == L'"' && state != ST_TAILSPACE) {
314  buf[j++] = L'"';
315  bn--; /* nope, just an escaped quote */
316  t = j;
317  state = ST_COLLECT;
318  break;
319  } else if (iswspace(ch)) {
320  state = ST_TAILSPACE;
321  break;
322  }
323  errno = EILSEQ;
324  GERB_MESSAGE(_("%d: bad end quote in element "), errno);
325  return -1;
326  }
327  }
328  if (ch <= 0) {
329  /* treat EOF as EOL, so the last record is accepted even when
330  \n is not present. Some users parse strings, not lines */
331  if (state == ST_TAILSPACE || state == ST_END_QUOTE || (state == ST_COLLECT && !inquotes)) {
332  row[r++] = buf;
333  rn--;
334  buf[j] = L'\0';
335  bn--;
336  buf += j + 1;
337  inquotes = 0;
338  rn = 0;
339  } else {
340  // AMSG("");
341  return -1;
342  }
343  }
344  if (bn == 0) {
345  errno = E2BIG;
346  GERB_MESSAGE("%d", errno);
347  return -1;
348  }
349  if (rn) {
350  if (inquotes) {
351  errno = EILSEQ;
352  GERB_MESSAGE("%d", errno);
353  return -1;
354  }
355  row[r] = buf;
356  buf[t] = L'\0';
357  }
358 
359  return in->count;
360 } /*csv_row_parse_wcs*/
361 
362 int
363 csv_row_parse_wcs(const wchar_t* src, size_t sn, wchar_t* buf, size_t bn, wchar_t* row[], int rn, int sep, int trim) {
364  struct winput input;
365  input.src = src;
366  input.sn = sn;
367  input.count = 0;
368  return csv_parse_wcs(&input, buf, bn, row, rn, (wint_t)sep, trim);
369 } /*csv_row_parse_wcs*/
370 
371 int
372 csv_row_parse_str(const char* src, size_t sn, char* buf, size_t bn, char* row[], int rn, int sep, int trim) {
373  struct sinput input;
374  input.in = NULL;
375  input.src = src;
376  input.sn = sn;
377  input.count = 0;
378  return csv_parse_str(&input, buf, bn, row, rn, sep, trim);
379 } /*csv_row_parse_str*/
380 
381 int
382 csv_row_fread(FILE* in, char* buf, size_t bn, char* row[], int numcols, int sep, int trim) {
383  struct sinput input;
384  input.in = in;
385  input.count = 0;
386  return csv_parse_str(&input, buf, bn, row, numcols, sep, trim);
387 } /*csv_row_fread*/
Contains basic defines.
Header info for the parsing support functions for the pick and place parser.
Sets up internal definitions for handling csv-style files.
The main header file for the libgerbv library.