csvtable.c
Go to the documentation of this file.
1 
19 #ifdef STANDALONE
20 #include <sqlite3.h>
21 #else
22 #include <sqlite3ext.h>
23 static SQLITE_EXTENSION_INIT1
24 #endif
25 
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <limits.h>
30 
31 #ifdef _WIN32
32 #include <windows.h>
33 #define strcasecmp _stricmp
34 #define strncasecmp _strnicmp
35 #endif
36 
43 typedef struct csv_file {
44  FILE *f;
45  char *sep;
46  char *quot;
47  int isdos;
48  int maxl;
49  char *line;
50  long pos0;
51  int maxc;
52  int ncols;
53  char **cols;
54 } csv_file;
55 
62 typedef struct csv_guess_fmt {
63  int nlines;
64  int hist[256];
66 
73 typedef struct csv_vtab {
74  sqlite3_vtab vtab;
76  int convert;
77  char coltypes[1];
78 } csv_vtab;
79 
86 typedef struct {
87  sqlite3_vtab_cursor cursor;
88  long pos;
89 } csv_cursor;
90 
96 static void
97 append_free(char **in)
98 {
99  long *p = (long *) *in;
100 
101  if (p) {
102  p -= 2;
103  sqlite3_free(p);
104  *in = 0;
105  }
106 }
107 
117 static char *
118 append(char **in, char const *append, char quote)
119 {
120  long *p = (long *) *in;
121  long len, maxlen, actlen;
122  int i;
123  char *pp;
124  int nappend = append ? strlen(append) : 0;
125 
126  if (p) {
127  p -= 2;
128  maxlen = p[0];
129  actlen = p[1];
130  } else {
131  maxlen = actlen = 0;
132  }
133  len = nappend + actlen;
134  if (quote) {
135  len += 2;
136  for (i = 0; i < nappend; i++) {
137  if (append[i] == quote) {
138  len++;
139  }
140  }
141  } else if (!nappend) {
142  return *in;
143  }
144  if (len >= maxlen - 1) {
145  long *q;
146 
147  maxlen = (len + 0x03ff) & (~0x3ff);
148  q = (long *) sqlite3_realloc(p, maxlen + 1 + 2 * sizeof (long));
149  if (!q) {
150  return 0;
151  }
152  if (!p) {
153  q[1] = 0;
154  }
155  p = q;
156  p[0] = maxlen;
157  *in = (char *) (p + 2);
158  }
159  pp = *in + actlen;
160  if (quote) {
161  *pp++ = quote;
162  for (i = 0; i < nappend; i++) {
163  *pp++ = append[i];
164  if (append[i] == quote) {
165  *pp++ = quote;
166  }
167  }
168  *pp++ = quote;
169  *pp = '\0';
170  } else {
171  if (nappend) {
172  memcpy(pp, append, nappend);
173  pp += nappend;
174  *pp = '\0';
175  }
176  }
177  p[1] = pp - *in;
178  return *in;
179 }
180 
187 static char *
188 unquote(char const *in)
189 {
190  char c, *ret;
191  int i;
192 
193  ret = sqlite3_malloc(strlen(in) + 1);
194  if (ret) {
195  c = in[0];
196  if ((c == '"') || (c == '\'')) {
197  i = strlen(in + 1);
198  if ((i > 0) && (in[i] == c)) {
199  strcpy(ret, in + 1);
200  ret[i - 1] = '\0';
201  return ret;
202  }
203  }
204  strcpy(ret, in);
205  }
206  return ret;
207 }
208 
215 static int
216 maptype(char const *type)
217 {
218  int typelen = type ? strlen(type) : 0;
219 
220  if ((typelen >= 3) &&
221  (strncasecmp(type, "integer", 7) == 0)) {
222  return SQLITE_INTEGER;
223  }
224  if ((typelen >= 6) &&
225  (strncasecmp(type, "double", 6) == 0)) {
226  return SQLITE_FLOAT;
227  }
228  if ((typelen >= 5) &&
229  (strncasecmp(type, "float", 5) == 0)) {
230  return SQLITE_FLOAT;
231  }
232  if ((typelen >= 4) &&
233  (strncasecmp(type, "real", 4) == 0)) {
234  return SQLITE_FLOAT;
235  }
236  return SQLITE_TEXT;
237 }
238 
245 static void
246 conv_names(char **names, int ncols)
247 {
248  int i;
249  char *p, *q;
250  static const char ws[] = "\n\t\r\b\v ";
251 
252  if (!names || ncols <= 0) {
253  return;
254  }
255  for (i = 0; i < ncols; i++) {
256  p = names[i];
257 
258  while (*p) {
259  if (strchr(ws, *p)) {
260  *p++ = '_';
261  q = p;
262  while (*q && strchr(ws, *q)) {
263  ++q;
264  }
265  if (*q && q > p) {
266  strcpy(p, q);
267  }
268  continue;
269  }
270  ++p;
271  }
272  }
273 }
274 
285 static void
286 result_or_bind(sqlite3_context *ctx, sqlite3_stmt *stmt, int idx,
287  char *data, int len, int type)
288 {
289  char *endp;
290 
291  if (!data) {
292  if (ctx) {
293  sqlite3_result_null(ctx);
294  } else {
295  sqlite3_bind_null(stmt, idx);
296  }
297  return;
298  }
299  if (type == SQLITE_INTEGER) {
300  sqlite_int64 val;
301 #if defined(_WIN32) || defined(_WIN64)
302  char endc;
303 
304  if (sscanf(data, "%I64d%c", &val, &endc) == 1) {
305  if (ctx) {
306  sqlite3_result_int64(ctx, val);
307  } else {
308  sqlite3_bind_int64(stmt, idx, val);
309  }
310  return;
311  }
312 #else
313  endp = 0;
314 #ifdef __osf__
315  val = strtol(data, &endp, 0);
316 #else
317  val = strtoll(data, &endp, 0);
318 #endif
319  if (endp && (endp != data) && !*endp) {
320  if (ctx) {
321  sqlite3_result_int64(ctx, val);
322  } else {
323  sqlite3_bind_int64(stmt, idx, val);
324  }
325  return;
326  }
327 #endif
328  } else if (type == SQLITE_FLOAT) {
329  double val;
330 
331  endp = 0;
332  val = strtod(data, &endp);
333  if (endp && (endp != data) && !*endp) {
334  if (ctx) {
335  sqlite3_result_double(ctx, val);
336  } else {
337  sqlite3_bind_double(stmt, idx, val);
338  }
339  return;
340  }
341  }
342  if (ctx) {
343  sqlite3_result_text(ctx, data, len, SQLITE_TRANSIENT);
344  } else {
345  sqlite3_bind_text(stmt, idx, data, len, SQLITE_TRANSIENT);
346  }
347 }
348 
359 static int
360 process_col(sqlite3_context *ctx, sqlite3_stmt *stmt, int idx,
361  char *data, int type, int conv)
362 {
363  char c, *p;
364  const char flchars[] = "Ee+-.,0123456789";
365 
366  if (!data) {
367  goto putdata;
368  }
369 
370  /*
371  * Floating point number test,
372  * converts single comma to dot.
373  */
374  c = data[0];
375  if ((c != '\0') && strchr(flchars + 2, c)) {
376  p = data + 1;
377  while (*p && strchr(flchars, *p)) {
378  ++p;
379  }
380  if (*p == '\0') {
381  char *first = 0;
382  int n = 0;
383 
384  p = data;
385  while (p) {
386  p = strchr(p, ',');
387  if (!p) {
388  break;
389  }
390  if (++n == 1) {
391  first = p;
392  }
393  ++p;
394  }
395  if (first) {
396  *first = '.';
397  goto putdata;
398  }
399  }
400  }
401  if (conv) {
402  char *utf = sqlite3_malloc(strlen(data) * 2 + 2);
403 
404  if (utf) {
405  p = utf;
406  while ((c = *data) != '\0') {
407  if (((conv & 10) == 10) && (c == '\\')) {
408  if (data[1] == 'q') {
409  *p++ = '\'';
410  data += 2;
411  continue;
412  }
413  }
414  if ((conv & 2) && (c == '\\')) {
415  char c2 = data[1];
416 
417  switch (c2) {
418  case '\0':
419  goto convdone;
420  case 'n':
421  *p = '\n';
422  break;
423  case 't':
424  *p = '\t';
425  break;
426  case 'r':
427  *p = '\r';
428  break;
429  case 'f':
430  *p = '\f';
431  break;
432  case 'v':
433  *p = '\v';
434  break;
435  case 'b':
436  *p = '\b';
437  break;
438  case 'a':
439  *p = '\a';
440  break;
441  case '?':
442  *p = '\?';
443  break;
444  case '\'':
445  *p = '\'';
446  break;
447  case '"':
448  *p = '\"';
449  break;
450  case '\\':
451  *p = '\\';
452  break;
453  default:
454  *p++ = c;
455  *p = c2;
456  break;
457  }
458  p++;
459  data += 2;
460  continue;
461  }
462  if ((conv & 1) && (c & 0x80)) {
463  *p++ = 0xc0 | ((c >> 6) & 0x1f);
464  *p++ = 0x80 | (c & 0x3f);
465  } else {
466  *p++ = c;
467  }
468  data++;
469  }
470 convdone:
471  *p = '\0';
472  result_or_bind(ctx, stmt, idx, utf, p - utf, type);
473  sqlite3_free(utf);
474  return SQLITE_OK;
475  } else {
476  if (ctx) {
477  sqlite3_result_error(ctx, "out of memory", -1);
478  }
479  return SQLITE_NOMEM;
480  }
481  }
482 putdata:
483  result_or_bind(ctx, stmt, idx, data, -1, type);
484  return SQLITE_OK;
485 }
486 
495 static csv_file *
496 csv_open(const char *filename, const char *sep, const char *quot)
497 {
498  FILE *f;
499  csv_file *csv;
500 
501 #ifdef _WIN32
502  f = fopen(filename, "rb");
503 #else
504  f = fopen(filename, "r");
505 #endif
506  if (!f) {
507  return 0;
508  }
509  csv = sqlite3_malloc(sizeof (csv_file));
510  if (!csv) {
511 error0:
512  fclose(f);
513  return 0;
514  }
515  csv->f = f;
516  if (sep && sep[0]) {
517  csv->sep = sqlite3_malloc(strlen(sep) + 1);
518  if (!csv->sep) {
519 error1:
520  sqlite3_free(csv);
521  goto error0;
522  }
523  strcpy(csv->sep, sep);
524  } else {
525  csv->sep = 0;
526  }
527  if (quot && quot[0]) {
528  csv->quot = sqlite3_malloc(strlen(quot) + 1);
529  if (!csv->quot) {
530  if (csv->sep) {
531  sqlite3_free(csv->sep);
532  }
533  goto error1;
534  }
535  strcpy(csv->quot, quot);
536  } else {
537  csv->quot = 0;
538  }
539  csv->isdos = 0;
540  csv->maxl = 0;
541  csv->line = 0;
542  csv->pos0 = 0;
543  csv->maxc = 0;
544  csv->ncols = 0;
545  csv->cols = 0;
546  return csv;
547 }
548 
554 static void
556 {
557  if (csv) {
558  if (csv->sep) {
559  sqlite3_free(csv->sep);
560  }
561  if (csv->quot) {
562  sqlite3_free(csv->quot);
563  }
564  if (csv->line) {
565  sqlite3_free(csv->line);
566  }
567  if (csv->cols) {
568  sqlite3_free(csv->cols);
569  }
570  if (csv->f) {
571  fclose(csv->f);
572  }
573  sqlite3_free(csv);
574  }
575 }
576 
583 static int
585 {
586  if (csv && csv->f) {
587  return feof(csv->f);
588  }
589  return 1;
590 }
591 
599 static long
600 csv_seek(csv_file *csv, long pos)
601 {
602  if (csv && csv->f) {
603  return fseek(csv->f, pos, SEEK_SET);
604  }
605  return EOF;
606 }
607 
613 static void
615 {
616  if (csv && csv->f) {
617  csv_seek(csv, csv->pos0);
618  }
619 }
620 
627 static long
629 {
630  if (csv && csv->f) {
631  return ftell(csv->f);
632  }
633  return EOF;
634 }
635 
643 static int
645 {
646  int i, index = 0, inq = 0, c, col;
647  char *p, *sep;
648 
649  if (!csv || !csv->f) {
650  return EOF;
651  }
652  while (1) {
653  c = fgetc(csv->f);
654  if (c == EOF) {
655  if (index > 0) {
656  break;
657  }
658  return EOF;
659  }
660  if (c == '\0') {
661  continue;
662  }
663  if (c == '\r') {
664  int c2 = fgetc(csv->f);
665  c = '\n';
666 
667  if (c2 == '\n') {
668  csv->isdos = 1;
669  } else if (c2 != EOF) {
670  ungetc(c2, csv->f);
671  }
672  }
673  /* check for DOS EOF (Ctrl-Z) */
674  if (csv->isdos && (c == '\032')) {
675  int c2 = fgetc(csv->f);
676 
677  if (c2 == EOF) {
678  if (index > 0) {
679  break;
680  }
681  return EOF;
682  }
683  ungetc(c2, csv->f);
684  }
685  if (index >= csv->maxl - 1) {
686  int n = csv->maxl * 2;
687  char *line;
688 
689  if (n <= 0) {
690  n = 4096;
691  }
692  line = sqlite3_malloc(n);
693  if (!line) {
694  return EOF;
695  }
696  if (csv->line) {
697  memcpy(line, csv->line, index);
698  sqlite3_free(csv->line);
699  }
700  csv->maxl = n;
701  csv->line = line;
702  }
703  csv->line[index++] = c;
704  if (csv->quot && (p = strchr(csv->quot, c))) {
705  if (inq) {
706  if (*p == inq) {
707  inq = 0;
708  }
709  } else {
710  inq = *p;
711  }
712  }
713  if (!inq && (c == '\n')) {
714  break;
715  }
716  }
717  if (guess) {
718  for (i = 0; i < index; i++) {
719  guess->hist[csv->line[i] & 0xFF] += 1;
720  }
721  guess->nlines += 1;
722  csv->ncols = 0;
723  return 0;
724  }
725 
726  for (i = index - 1; i >= 0; i--) {
727  if (csv->line[i] != '\n') {
728  break;
729  }
730  }
731  index = i + 1;
732  csv->line[index] = '\0';
733  i = inq = col = 0;
734  sep = csv->sep ? csv->sep : ";";
735  if (!csv->cols) {
736  int n = 128;
737 
738  csv->cols = sqlite3_malloc(sizeof (char *) * n);
739  if (!csv->cols) {
740  return EOF;
741  }
742  csv->maxc = n;
743  }
744  csv->cols[col++] = csv->line;
745  while (i < index) {
746  if (csv->quot && (p = strchr(csv->quot, csv->line[i]))) {
747  if (inq) {
748  if (*p == inq) {
749  inq = 0;
750  }
751  } else {
752  inq = *p;
753  }
754  }
755  if (!inq && (p = strchr(sep, csv->line[i]))) {
756  p = csv->line + i;
757  *p = '\0';
758  if (col >= csv->maxc) {
759  int n = csv->maxc * 2;
760  char **cols;
761 
762  cols = sqlite3_realloc(csv->cols, sizeof (char *) * n);
763  if (!cols) {
764  return EOF;
765  }
766  csv->cols = cols;
767  csv->maxc = n;
768  }
769  csv->cols[col++] = p + 1;
770  }
771  ++i;
772  }
773  csv->ncols = col;
774 
775  /* strip off quotes */
776  if (csv->quot) {
777  for (i = 0; i < col; i++) {
778  if (*csv->cols[i]) {
779  p = strchr(csv->quot, *csv->cols[i]);
780  if (p) {
781  char *src, *dst;
782 
783  c = *p;
784  csv->cols[i] += 1;
785  sep = csv->cols[i] + strlen(csv->cols[i]) - 1;
786  if ((sep >= csv->cols[i]) && (*sep == c)) {
787  *sep = '\0';
788  }
789  /* collapse quote escape sequences */
790  src = csv->cols[i];
791  dst = 0;
792  while (*src) {
793  if ((*src == c) && (src[1] == c)) {
794  if (!dst) {
795  dst = src;
796  }
797  src++;
798  while (*src) {
799  *dst++ = *src++;
800  if (*src == c) {
801  --src;
802  break;
803  }
804  }
805  }
806  ++src;
807  }
808  if (dst) {
809  *dst++ = '\0';
810  }
811  }
812  }
813  }
814  }
815  return col;
816 }
817 
824 static int
826 {
827  if (csv && csv->cols) {
828  return csv->ncols;
829  }
830  return 0;
831 }
832 
840 static char *
841 csv_coldata(csv_file *csv, int n)
842 {
843  if (csv && csv->cols && (n >= 0) && (n < csv->ncols)) {
844  return csv->cols[n];
845  }
846  return 0;
847 }
848 
855 static int
857 {
858  csv_guess_fmt guess;
859  int i, n;
860  char *p, sep[32], quot[4];
861  const struct {
862  int c;
863  int min;
864  } sep_test[] = {
865  { ',', 2 },
866  { ';', 2 },
867  { '\t', 2 },
868  { ' ', 4 },
869  { '|', 2 }
870  };
871 
872  if (!csv) {
873  return EOF;
874  }
875  memset(&guess, 0, sizeof (guess));
876  csv->pos0 = 0;
877  csv_rewind(csv);
878  for (i = n = 0; i < 10; i++) {
879  n = csv_getline(csv, &guess);
880  if (n == EOF) {
881  break;
882  }
883  }
884  csv_rewind(csv);
885  if (n && !i) {
886  return EOF;
887  }
888  p = quot;
889  n = '"';
890  if (guess.hist[n] > 1) {
891  *p++ = n;
892  }
893  n = '\'';
894  if (guess.hist[n] > 1) {
895  *p++ = n;
896  }
897  *p = '\0';
898  p = sep;
899  for (i = 0; i < sizeof (sep_test) / sizeof (sep_test[0]); i++) {
900  if (guess.hist[sep_test[i].c] > sep_test[i].min * guess.nlines) {
901  *p++ = sep_test[i].c;
902  }
903  }
904  *p = '\0';
905  if (quot[0]) {
906  p = sqlite3_malloc(strlen(quot) + 1);
907  if (p) {
908  strcpy(p, quot);
909  if (csv->quot) {
910  sqlite3_free(csv->quot);
911  }
912  csv->quot = p;
913  } else {
914  return EOF;
915  }
916  }
917  if (sep[0]) {
918  p = sqlite3_malloc(strlen(sep) + 1);
919  if (p) {
920  strcpy(p, sep);
921  if (csv->sep) {
922  sqlite3_free(csv->sep);
923  }
924  csv->sep = p;
925  } else {
926  return EOF;
927  }
928  }
929  return 0;
930 }
931 
965 static int
966 csv_vtab_connect(sqlite3* db, void *aux, int argc, const char * const *argv,
967  sqlite3_vtab **vtabp, char **errp)
968 {
969  csv_file *csv;
970  int rc = SQLITE_ERROR, i, ncnames, row1;
971  char **cnames, *schema = 0, **nargv;
972  csv_vtab *vtab;
973 
974  if (argc < 4) {
975  *errp = sqlite3_mprintf("input file name missing");
976  return SQLITE_ERROR;
977  }
978  nargv = sqlite3_malloc(sizeof (char *) * argc);
979  memset(nargv, 0, sizeof (char *) * argc);
980  for (i = 3; i < argc; i++) {
981  nargv[i] = unquote(argv[i]);
982  }
983  csv = csv_open(nargv[3], (argc > 6) ? nargv[6] : 0,
984  (argc > 7) ? nargv[7] : 0);
985  if (!csv) {
986  *errp = sqlite3_mprintf("unable to open input file");
987 cleanup:
988  append_free(&schema);
989  for (i = 3; i < argc; i++) {
990  if (nargv[i]) {
991  sqlite3_free(nargv[i]);
992  }
993  }
994  return rc;
995  }
996  if (!csv->sep && !csv->quot) {
997  csv_guess(csv);
998  }
999  csv->pos0 = 0;
1000  row1 = 0;
1001  if (argc > 4) {
1002  row1 = strtol(nargv[4], 0, 10);
1003  }
1004  if (row1) {
1005  /* use column names from 1st row */
1006  csv_getline(csv, 0);
1007  if (csv->ncols < 1) {
1008  csv_close(csv);
1009  *errp = sqlite3_mprintf("unable to get column names");
1010  goto cleanup;
1011  }
1012  csv->pos0 = csv_tell(csv);
1013  csv_rewind(csv);
1014  ncnames = csv_ncols(csv);
1015  cnames = csv->cols;
1016  } else if (argc > 8) {
1017  ncnames = argc - 8;
1018  cnames = (char **) nargv + 8;
1019  } else {
1020  /* use number of columns from 1st row */
1021  csv_getline(csv, 0);
1022  if (csv->ncols < 1) {
1023  csv_close(csv);
1024  *errp = sqlite3_mprintf("unable to get column names");
1025  goto cleanup;
1026  }
1027  csv_rewind(csv);
1028  ncnames = csv_ncols(csv);
1029  cnames = 0;
1030  }
1031  vtab = sqlite3_malloc(sizeof(csv_vtab) + ncnames);
1032  if (!vtab) {
1033  csv_close(csv);
1034  *errp = sqlite3_mprintf("out of memory");
1035  goto cleanup;
1036  }
1037  memset(vtab, 0, sizeof (*vtab));
1038  vtab->convert = 0;
1039  if (argc > 5) {
1040  vtab->convert = strtol(nargv[5], 0, 10);
1041  if (row1 && (vtab->convert & 4)) {
1042  conv_names(cnames, ncnames);
1043  }
1044  }
1045  vtab->csv = csv;
1046  append(&schema, "CREATE TABLE x(", 0);
1047  for (i = 0; i < ncnames; i++) {
1048  vtab->coltypes[i] = SQLITE_TEXT;
1049  if (!cnames || !cnames[i]) {
1050  char colname[64];
1051 
1052  sprintf(colname, "column_%d", i + 1);
1053  append(&schema, colname, '"');
1054  } else if (row1 > 0) {
1055  append(&schema, cnames[i], '"');
1056  } else if (row1 < 0) {
1057  append(&schema, cnames[i], '"');
1058  if (i + 8 < argc) {
1059  char *type = nargv[i + 8];
1060 
1061  append(&schema, " ", 0);
1062  append(&schema, type, 0);
1063  vtab->coltypes[i] = maptype(type);
1064  }
1065  } else {
1066  char *type = cnames[i];
1067 
1068  append(&schema, cnames[i], 0);
1069  while (*type && !strchr(" \t", *type)) {
1070  type++;
1071  }
1072  while (*type && strchr(" \t", *type)) {
1073  type++;
1074  }
1075  vtab->coltypes[i] = maptype(type);
1076  }
1077  if (i < ncnames - 1) {
1078  append(&schema, ",", 0);
1079  }
1080  }
1081  append(&schema, ")", 0);
1082  rc = sqlite3_declare_vtab(db, schema);
1083  if (rc != SQLITE_OK) {
1084  csv_close(csv);
1085  sqlite3_free(vtab);
1086  *errp = sqlite3_mprintf("table definition failed, error %d, "
1087  "schema '%s'", rc, schema);
1088  goto cleanup;
1089  }
1090  *vtabp = &vtab->vtab;
1091  *errp = 0;
1092  goto cleanup;
1093 }
1094 
1106 static int
1107 csv_vtab_create(sqlite3* db, void *aux, int argc,
1108  const char *const *argv,
1109  sqlite3_vtab **vtabp, char **errp)
1110 {
1111  return csv_vtab_connect(db, aux, argc, argv, vtabp, errp);
1112 }
1113 
1120 static int
1121 csv_vtab_disconnect(sqlite3_vtab *vtab)
1122 {
1123  csv_vtab *tab = (csv_vtab *) vtab;
1124 
1125  csv_close(tab->csv);
1126  sqlite3_free(tab);
1127  return SQLITE_OK;
1128 }
1129 
1136 static int
1137 csv_vtab_destroy(sqlite3_vtab *vtab)
1138 {
1139  return csv_vtab_disconnect(vtab);
1140 }
1141 
1149 static int
1150 csv_vtab_bestindex(sqlite3_vtab *vtab, sqlite3_index_info *info)
1151 {
1152  return SQLITE_OK;
1153 }
1154 
1162 static int
1163 csv_vtab_open(sqlite3_vtab *vtab, sqlite3_vtab_cursor **cursorp)
1164 {
1165  csv_cursor *cur = sqlite3_malloc(sizeof(*cur));
1166  csv_vtab *tab = (csv_vtab *) vtab;
1167 
1168  if (!cur) {
1169  return SQLITE_ERROR;
1170  }
1171  cur->cursor.pVtab = vtab;
1172  csv_rewind(tab->csv);
1173  cur->pos = csv_tell(tab->csv);
1174  *cursorp = &cur->cursor;
1175  return SQLITE_OK;
1176 }
1177 
1184 static int
1185 csv_vtab_close(sqlite3_vtab_cursor *cursor)
1186 {
1187  sqlite3_free(cursor);
1188  return SQLITE_OK;
1189 }
1190 
1197 static int
1198 csv_vtab_next(sqlite3_vtab_cursor *cursor)
1199 {
1200  csv_cursor *cur = (csv_cursor *) cursor;
1201  csv_vtab *tab = (csv_vtab *) cur->cursor.pVtab;
1202 
1203  cur->pos = csv_tell(tab->csv);
1204  csv_getline(tab->csv, 0);
1205  return SQLITE_OK;
1206 }
1207 
1218 static int
1219 csv_vtab_filter(sqlite3_vtab_cursor *cursor, int idxNum,
1220  const char *idxStr, int argc, sqlite3_value **argv)
1221 {
1222  csv_cursor *cur = (csv_cursor *) cursor;
1223  csv_vtab *tab = (csv_vtab *) cur->cursor.pVtab;
1224 
1225  csv_rewind(tab->csv);
1226  return csv_vtab_next(cursor);
1227 }
1228 
1235 static int
1236 csv_vtab_eof(sqlite3_vtab_cursor *cursor)
1237 {
1238  csv_cursor *cur = (csv_cursor *) cursor;
1239  csv_vtab *tab = (csv_vtab *) cur->cursor.pVtab;
1240 
1241  return csv_eof(tab->csv);
1242 }
1243 
1252 static int
1253 csv_vtab_column(sqlite3_vtab_cursor *cursor, sqlite3_context *ctx, int n)
1254 {
1255  csv_cursor *cur = (csv_cursor *) cursor;
1256  csv_vtab *tab = (csv_vtab *) cur->cursor.pVtab;
1257  char *data = csv_coldata(tab->csv, n);
1258 
1259  return process_col(ctx, 0, 0, data, tab->coltypes[n], tab->convert);
1260 }
1261 
1269 static int
1270 csv_vtab_rowid(sqlite3_vtab_cursor *cursor, sqlite_int64 *rowidp)
1271 {
1272  csv_cursor *cur = (csv_cursor *) cursor;
1273 
1274  *rowidp = cur->pos;
1275  return SQLITE_OK;
1276 }
1277 
1278 #if (SQLITE_VERSION_NUMBER > 3004000)
1279 
1286 static int
1287 csv_vtab_rename(sqlite3_vtab *vtab, const char *newname)
1288 {
1289  return SQLITE_OK;
1290 }
1291 
1292 #endif
1293 
1298 static const sqlite3_module csv_vtab_mod = {
1299  1, /* iVersion */
1300  csv_vtab_create, /* xCreate */
1301  csv_vtab_connect, /* xConnect */
1302  csv_vtab_bestindex, /* xBestIndex */
1303  csv_vtab_disconnect, /* xDisconnect */
1304  csv_vtab_destroy, /* xDestroy */
1305  csv_vtab_open, /* xOpen */
1306  csv_vtab_close, /* xClose */
1307  csv_vtab_filter, /* xFilter */
1308  csv_vtab_next, /* xNext */
1309  csv_vtab_eof, /* xEof */
1310  csv_vtab_column, /* xColumn */
1311  csv_vtab_rowid, /* xRowid */
1312  0, /* xUpdate */
1313  0, /* xBegin */
1314  0, /* xSync */
1315  0, /* xCommit */
1316  0, /* xRollback */
1317  0, /* xFindFunction */
1318 #if (SQLITE_VERSION_NUMBER > 3004000)
1319  csv_vtab_rename, /* xRename */
1320 #endif
1321 };
1322 
1350 static void
1351 csv_import_func(sqlite3_context *ctx, int argc, sqlite3_value **argv)
1352 {
1353  csv_file *csv;
1354  int rc, i, ncnames, row1, convert = 0, useargs = 0;
1355  char *tname, *fname, *sql = 0, **cnames, *coltypes = 0;
1356  sqlite3 *db = (sqlite3 *) sqlite3_user_data(ctx);
1357  sqlite3_stmt *stmt = 0;
1358 
1359  if (argc < 2) {
1360  sqlite3_result_error(ctx, "need at least 2 arguments", -1);
1361  return;
1362  }
1363  tname = (char *) sqlite3_value_text(argv[0]);
1364  if (!tname) {
1365  sqlite3_result_error(ctx, "table name is NULL", -1);
1366  return;
1367  }
1368  fname = (char *) sqlite3_value_text(argv[1]);
1369  if (!fname) {
1370  sqlite3_result_error(ctx, "file name is NULL", -1);
1371  return;
1372  }
1373  csv = csv_open(fname,
1374  (argc > 4) ? (char *) sqlite3_value_text(argv[4]) : 0,
1375  (argc > 5) ? (char *) sqlite3_value_text(argv[5]) : 0);
1376  if (!csv) {
1377  sqlite3_result_error(ctx, "unable to open input file", -1);
1378 cleanup:
1379  if (stmt) {
1380  sqlite3_finalize(stmt);
1381  }
1382  append_free(&sql);
1383  if (coltypes) {
1384  sqlite3_free(coltypes);
1385  }
1386  if (csv) {
1387  csv_close(csv);
1388  }
1389  return;
1390  }
1391  if (!csv->sep && !csv->quot) {
1392  csv_guess(csv);
1393  }
1394  csv->pos0 = 0;
1395  row1 = 0;
1396  if (argc > 2) {
1397  row1 = sqlite3_value_int(argv[2]);
1398  }
1399  if (row1) {
1400  /* use column names from 1st row */
1401  csv_getline(csv, 0);
1402  if (csv->ncols < 1) {
1403  sqlite3_result_error(ctx, "unable to get column names", -1);
1404  goto cleanup;
1405  }
1406  csv->pos0 = csv_tell(csv);
1407  csv_rewind(csv);
1408  ncnames = csv_ncols(csv);
1409  cnames = csv->cols;
1410  } else if (argc > 6) {
1411  ncnames = argc - 6;
1412  cnames = 0;
1413  useargs = 1;
1414  } else {
1415  /* use number of columns from 1st row */
1416  csv_getline(csv, 0);
1417  if (csv->ncols < 1) {
1418  sqlite3_result_error(ctx, "unable to get column names", -1);
1419  goto cleanup;
1420  }
1421  csv_rewind(csv);
1422  ncnames = csv_ncols(csv);
1423  cnames = 0;
1424  }
1425  convert = 0;
1426  if (argc > 3) {
1427  convert = sqlite3_value_int(argv[3]);
1428  if (row1 && (convert & 4)) {
1429  conv_names(cnames, ncnames);
1430  }
1431  }
1432  /* test if table exists */
1433  append(&sql, "PRAGMA table_info(", 0);
1434  append(&sql, tname, '"');
1435  append(&sql, ")", 0);
1436  if (!sql) {
1437 oom:
1438  sqlite3_result_error(ctx, "out of memory", -1);
1439  goto cleanup;
1440  }
1441  rc = sqlite3_prepare_v2(db, sql, -1, &stmt, 0);
1442  append_free(&sql);
1443  if (rc != SQLITE_OK) {
1444 prepfail:
1445  sqlite3_result_error(ctx, "prepare failed", -1);
1446  goto cleanup;
1447  }
1448  /* find number of colums */
1449  i = 0;
1450  rc = sqlite3_step(stmt);
1451  while (rc == SQLITE_ROW) {
1452  i++;
1453  rc = sqlite3_step(stmt);
1454  }
1455  if (rc != SQLITE_DONE) {
1456 selfail:
1457  sqlite3_result_error(ctx, "select failed", -1);
1458  goto cleanup;
1459  }
1460  if (i > 0) {
1461  /* get column types */
1462  sqlite3_reset(stmt);
1463  ncnames = i;
1464  coltypes = sqlite3_malloc(ncnames);
1465  if (!coltypes) {
1466  goto oom;
1467  }
1468  rc = sqlite3_step(stmt);
1469  i = 0;
1470  while (rc == SQLITE_ROW) {
1471  coltypes[i++] = maptype((char *) sqlite3_column_text(stmt, 2));
1472  rc = sqlite3_step(stmt);
1473  }
1474  if (rc != SQLITE_DONE) {
1475  goto selfail;
1476  }
1477  } else {
1478  /* create new table */
1479  sqlite3_finalize(stmt);
1480  stmt = 0;
1481  coltypes = sqlite3_malloc(ncnames);
1482  if (!coltypes) {
1483  goto oom;
1484  }
1485  append(&sql, "CREATE TABLE ", 0);
1486  append(&sql, tname, '"');
1487  append(&sql, "(", 0);
1488  for (i = 0; i < ncnames; i++) {
1489  char colname[64];
1490 
1491  coltypes[i] = SQLITE_TEXT;
1492  if (useargs) {
1493  char *type = (char *) sqlite3_value_text(argv[i + 6]);
1494 
1495  if (!type) {
1496  goto defcol;
1497  }
1498  append(&sql, type, 0);
1499  while (*type && !strchr(" \t", *type)) {
1500  type++;
1501  }
1502  while (*type && strchr(" \t", *type)) {
1503  type++;
1504  }
1505  coltypes[i] = maptype(type);
1506  } else if (!cnames || !cnames[i]) {
1507 defcol:
1508  sprintf(colname, "column_%d", i + 1);
1509  append(&sql, colname, '"');
1510  } else if (row1 > 0) {
1511  append(&sql, cnames[i], '"');
1512  } else if (row1 < 0) {
1513  append(&sql, cnames[i], '"');
1514  if (i + 6 < argc) {
1515  char *type = (char *) sqlite3_value_text(argv[i + 6]);
1516 
1517  if (type) {
1518  append(&sql, " ", 0);
1519  append(&sql, type, 0);
1520  coltypes[i] = maptype(type);
1521  }
1522  }
1523  }
1524  if (i < ncnames - 1) {
1525  append(&sql, ",", 0);
1526  }
1527  }
1528  append(&sql, ")", 0);
1529  rc = sqlite3_prepare_v2(db, sql, -1, &stmt, 0);
1530  if (rc != SQLITE_OK) {
1531  goto prepfail;
1532  }
1533  rc = sqlite3_step(stmt);
1534  if ((rc != SQLITE_DONE) && (rc != SQLITE_OK)) {
1535  sqlite3_result_error(ctx, "create table failed", -1);
1536  goto cleanup;
1537  }
1538  append_free(&sql);
1539  }
1540  sqlite3_finalize(stmt);
1541  stmt = 0;
1542  /* make INSERT statement */
1543  append(&sql, "INSERT INTO ", 0);
1544  append(&sql, tname, '"');
1545  append(&sql, " VALUES(", 0);
1546  for (i = 0; i < ncnames; i++) {
1547  append(&sql, (i < ncnames - 1) ? "?," : "?)", 0);
1548  }
1549  rc = sqlite3_prepare_v2(db, sql, -1, &stmt, 0);
1550  if (rc != SQLITE_OK) {
1551  goto prepfail;
1552  }
1553  append_free(&sql);
1554  /* import the CSV file */
1555  row1 = 0;
1556  while (csv_getline(csv, 0) != EOF) {
1557  for (i = 0; i < ncnames; i++) {
1558  char *data = csv_coldata(csv, i);
1559 
1560  rc = process_col(0, stmt, i + 1, data, coltypes[i], convert);
1561  if (rc != SQLITE_OK) {
1562  goto inserr;
1563  }
1564  }
1565  rc = sqlite3_step(stmt);
1566  if ((rc != SQLITE_DONE) && (rc != SQLITE_OK)) {
1567  if ((rc != SQLITE_MISMATCH) && (rc != SQLITE_CONSTRAINT)) {
1568 inserr:
1569  sqlite3_result_error(ctx, "insert failed", -1);
1570  goto cleanup;
1571  }
1572  } else {
1573  row1++;
1574  }
1575  sqlite3_reset(stmt);
1576  }
1577  sqlite3_result_int(ctx, row1);
1578  goto cleanup;
1579 }
1580 
1587 #ifndef STANDALONE
1588 static
1589 #endif
1590 int
1591 csv_vtab_init(sqlite3 *db)
1592 {
1593  sqlite3_create_function(db, "import_csv", -1, SQLITE_UTF8,
1594  (void *) db, csv_import_func, 0, 0);
1595  return sqlite3_create_module(db, "csvtable", &csv_vtab_mod, 0);
1596 }
1597 
1598 #ifndef STANDALONE
1599 
1608 int
1609 sqlite3_extension_init(sqlite3 *db, char **errmsg,
1610  const sqlite3_api_routines *api)
1611 {
1612  SQLITE_EXTENSION_INIT2(api);
1613  return csv_vtab_init(db);
1614 }
1615 
1616 #endif
static int csv_eof(csv_file *csv)
Test EOF on CSV file handle.
Definition: csvtable.c:584
static char * append(char **in, char const *append, char quote)
Append a string to dynamically allocated string buffer with optional quoting.
Definition: csvtable.c:118
static int csv_vtab_filter(sqlite3_vtab_cursor *cursor, int idxNum, const char *idxStr, int argc, sqlite3_value **argv)
Filter function for virtual table.
Definition: csvtable.c:1219
int sqlite3_extension_init(sqlite3 *db, char **errmsg, const sqlite3_api_routines *api)
Initializer for SQLite extension load mechanism.
Definition: csvtable.c:1609
static int csv_vtab_init(sqlite3 *db)
Module initializer creating SQLite functions and modules.
Definition: csvtable.c:1591
static void csv_close(csv_file *csv)
Close CSV file handle.
Definition: csvtable.c:555
csv_file * csv
CSV file handle.
Definition: csvtable.c:75
static int csv_guess(csv_file *csv)
Guess CSV layout of CSV file handle.
Definition: csvtable.c:856
static int csv_getline(csv_file *csv, csv_guess_fmt *guess)
Read and process one line of CSV file handle.
Definition: csvtable.c:644
Driver internal structure representing SQL statement (HSTMT).
Definition: sqlite3odbc.h:230
static void csv_rewind(csv_file *csv)
Rewind CSV file handle.
Definition: csvtable.c:614
static void csv_import_func(sqlite3_context *ctx, int argc, sqlite3_value **argv)
Import CSV file as table into database.
Definition: csvtable.c:1351
static int csv_vtab_create(sqlite3 *db, void *aux, int argc, const char *const *argv, sqlite3_vtab **vtabp, char **errp)
Create virtual table.
Definition: csvtable.c:1107
static int process_col(sqlite3_context *ctx, sqlite3_stmt *stmt, int idx, char *data, int type, int conv)
Process one column of the current row.
Definition: csvtable.c:360
static int csv_vtab_destroy(sqlite3_vtab *vtab)
Destroy virtual table.
Definition: csvtable.c:1137
char * sep
column separator characters
Definition: csvtable.c:45
char * quot
text quoting characters
Definition: csvtable.c:46
int hist[256]
Definition: csvtable.c:64
static long csv_seek(csv_file *csv, long pos)
Position CSV file handle.
Definition: csvtable.c:600
static char * unquote(char const *in)
Strip off quotes given string.
Definition: csvtable.c:188
Info to guess CSV layout.
Definition: csvtable.c:62
static char * csv_coldata(csv_file *csv, int n)
Return nth column of current row in CSV file.
Definition: csvtable.c:841
Structure to implement CSV file handle.
Definition: csvtable.c:43
static int csv_vtab_eof(sqlite3_vtab_cursor *cursor)
Return end of table state of virtual table cursor.
Definition: csvtable.c:1236
static const sqlite3_module csv_vtab_mod
SQLite module descriptor.
Definition: csvtable.c:1298
long pos0
file position for rewind
Definition: csvtable.c:50
static int csv_ncols(csv_file *csv)
Return number of columns of current row in CSV file.
Definition: csvtable.c:825
static csv_file * csv_open(const char *filename, const char *sep, const char *quot)
Open CSV file for reading and return handle to it.
Definition: csvtable.c:496
int ncols
number of columns
Definition: csvtable.c:52
long pos
CSV file position.
Definition: csvtable.c:88
static int maptype(char const *type)
Map string to SQLite data type.
Definition: csvtable.c:216
struct csv_vtab csv_vtab
sqlite3_vtab_cursor cursor
SQLite virtual table cursor.
Definition: csvtable.c:87
static int csv_vtab_open(sqlite3_vtab *vtab, sqlite3_vtab_cursor **cursorp)
Open virtual table and return cursor.
Definition: csvtable.c:1163
char coltypes[1]
column types
Definition: csvtable.c:77
char ** cols
column buffer
Definition: csvtable.c:53
FILE * f
CSV file.
Definition: csvtable.c:44
static int csv_vtab_connect(sqlite3 *db, void *aux, int argc, const char *const *argv, sqlite3_vtab **vtabp, char **errp)
Connect to virtual table.
Definition: csvtable.c:966
static int csv_vtab_close(sqlite3_vtab_cursor *cursor)
Close virtual table cursor.
Definition: csvtable.c:1185
static int csv_vtab_rowid(sqlite3_vtab_cursor *cursor, sqlite_int64 *rowidp)
Return current rowid of virtual table cursor.
Definition: csvtable.c:1270
int isdos
true, when DOS format detected
Definition: csvtable.c:47
int convert
convert flags
Definition: csvtable.c:76
#define min(a, b)
Definition: sqlite3odbc.c:225
static int csv_vtab_column(sqlite3_vtab_cursor *cursor, sqlite3_context *ctx, int n)
Return column data of virtual table.
Definition: csvtable.c:1253
Structure to describe a CSV virtual table.
Definition: csvtable.c:73
static long csv_tell(csv_file *csv)
Return current position of CSV file handle.
Definition: csvtable.c:628
Structure to describe CSV virtual table cursor.
Definition: csvtable.c:86
char * line
line buffer
Definition: csvtable.c:49
static void append_free(char **in)
Free dynamically allocated string buffer.
Definition: csvtable.c:97
int maxc
max.
Definition: csvtable.c:51
struct csv_guess_fmt csv_guess_fmt
int maxl
max.
Definition: csvtable.c:48
struct csv_file csv_file
static int csv_vtab_disconnect(sqlite3_vtab *vtab)
Disconnect virtual table.
Definition: csvtable.c:1121
static void result_or_bind(sqlite3_context *ctx, sqlite3_stmt *stmt, int idx, char *data, int len, int type)
Make result data or parameter binding accoring to type.
Definition: csvtable.c:286
static int csv_vtab_bestindex(sqlite3_vtab *vtab, sqlite3_index_info *info)
Determines information for filter function according to constraints.
Definition: csvtable.c:1150
static int csv_vtab_next(sqlite3_vtab_cursor *cursor)
Retrieve next row from virtual table cursor.
Definition: csvtable.c:1198
static void conv_names(char **names, int ncols)
Convert and collapse white space in column names to underscore.
Definition: csvtable.c:246
sqlite3_vtab vtab
SQLite virtual table.
Definition: csvtable.c:74

Generated on Mon Aug 22 2016 by doxygen.
Contact: chw@ch-werner.de