Pass complete concatenated text in end callback.
[gedcom-parse.git] / gedcom / gedcom_lex_common.c
index 045cea22d3ba46e2bb3c64500efafe5c784263d1..902225f2f64b678827152cf8dc210ddf83d12059 100644 (file)
@@ -21,7 +21,7 @@
 /* $Id$ */
 /* $Name$ */
 
-#ifndef IN_LEX
+#if LEX_SECTION == 1
 
 #include "gedcom_internal.h"
 #include "multilex.h"
 #include "gedcom.h"
 #include "gedcom.tabgen.h"
 #include "compat.h"
+#include "utf8.h"
 
 static size_t encoding_width;
 static int current_level = -1;
 static int level_diff=MAXGEDCLEVEL;
 static size_t line_len = 0;
 
-static char ptr_buf[MAXGEDCPTRLEN * UTF_FACTOR + 1];
-static char tag_buf[MAXGEDCTAGLEN * UTF_FACTOR + 1];
-static char str_buf[MAXGEDCLINELEN * UTF_FACTOR + 1];
+static struct conv_buffer* ptr_buffer = NULL;
+static struct conv_buffer* tag_buffer = NULL;
+static struct conv_buffer* str_buffer = NULL;
+
+#define INITIAL_PTR_BUFFER_LEN MAXGEDCPTRLEN * UTF_FACTOR + 1
+#define INITIAL_TAG_BUFFER_LEN MAXGEDCTAGLEN * UTF_FACTOR + 1
+#define INITIAL_STR_BUFFER_LEN MAXGEDCLINELEN * UTF_FACTOR + 1
 
 #ifdef LEXER_TEST 
 YYSTYPE gedcom_lval;
@@ -136,17 +141,14 @@ static void error_unexpected_character(const char* str, char ch)
   gedcom_error(_("Unexpected character: '%s' (0x%02x)"), str, ch);
 }
 
-static void yylex_cleanup()
-{
-  /* fix memory leak in lex */
-  yy_delete_buffer(yy_current_buffer);
-  yy_current_buffer = NULL;
-}
+/* This is to bypass the iconv conversion (if the input is UTF-8 coming
+   from the program) */
+static int dummy_conv = 0;
 
-#else  /* of #ifndef IN_LEX */
+#elif LEX_SECTION == 2
 
 #define TO_INTERNAL(STR,OUTBUF) \
-  to_internal(STR, yyleng, OUTBUF, sizeof(OUTBUF))
+  (dummy_conv ? STR : to_internal(STR, yyleng, OUTBUF))
 
 #define INIT_LINE_LEN \
   line_len = 0;
@@ -164,9 +166,10 @@ static void yylex_cleanup()
 
 #define MKTAGACTION(THETAG)                                                  \
   { CHECK_LINE_LEN;                                                          \
-    gedcom_lval.tag.string = TO_INTERNAL(yytext, tag_buf);                   \
+    gedcom_lval.tag.string = TO_INTERNAL(yytext, tag_buffer);                \
     gedcom_lval.tag.value  = TAG_##THETAG;                                   \
     BEGIN(NORMAL);                                                           \
+    line_no++;                                                               \
     return TAG_##THETAG;                                                     \
   }
 
@@ -234,10 +237,11 @@ static void yylex_cleanup()
 
 
 #define ACTION_DIGITS                                                         \
-   { int level = atoi(TO_INTERNAL(yytext, str_buf));                          \
+   { int level = atoi(TO_INTERNAL(yytext, str_buffer));                       \
      CHECK_LINE_LEN;                                                          \
      if ((level < 0) || (level > MAXGEDCLEVEL)) {                             \
        error_level_out_of_range();                                            \
+       line_no++;                                                             \
        return BADTOKEN;                                                       \
      }                                                                        \
      level_diff = level - current_level;                                      \
@@ -255,6 +259,7 @@ static void yylex_cleanup()
      else {                                                                   \
        /* should never happen (error to GEDCOM spec) */                       \
        error_level_too_high(level_diff);                                      \
+       line_no++;                                                             \
        return BADTOKEN;                                                       \
      }                                                                        \
    } 
@@ -263,19 +268,21 @@ static void yylex_cleanup()
 #define ACTION_ALPHANUM                                                       \
    { if (strlen(yytext) > MAXGEDCTAGLEN * encoding_width) {                   \
        error_tag_too_long(yytext);                                            \
+       line_no++;                                                             \
        return BADTOKEN;                                                       \
      }                                                                        \
      CHECK_LINE_LEN;                                                          \
-     gedcom_lval.tag.string = TO_INTERNAL(yytext, tag_buf);                   \
+     gedcom_lval.tag.string = TO_INTERNAL(yytext, tag_buffer);                \
      gedcom_lval.tag.value  = USERTAG;                                        \
      BEGIN(NORMAL);                                                           \
+     line_no++;                                                               \
      return USERTAG;                                                          \
    }
 
 
 #define ACTION_DELIM                                                          \
   { CHECK_LINE_LEN;                                                           \
-    gedcom_lval.string = TO_INTERNAL(yytext, str_buf);                        \
+    gedcom_lval.string = TO_INTERNAL(yytext, str_buffer);                     \
     return DELIM;                                                             \
   }
 
@@ -283,7 +290,7 @@ static void yylex_cleanup()
 #define ACTION_ANY                                                            \
   { char* tmp;                                                                \
     CHECK_LINE_LEN;                                                           \
-    tmp = TO_INTERNAL(yytext, str_buf);                                       \
+    tmp = TO_INTERNAL(yytext, str_buffer);                                    \
     if (!tmp) {                                                               \
       /* Something went wrong during conversion... */                         \
           error_invalid_character(yytext, yytext[0]);                         \
@@ -305,7 +312,7 @@ static void yylex_cleanup()
 
 #define ACTION_ESCAPE                                                         \
   { CHECK_LINE_LEN;                                                           \
-    gedcom_lval.string = TO_INTERNAL(yytext, str_buf);                        \
+    gedcom_lval.string = TO_INTERNAL(yytext, str_buffer);                     \
     return ESCAPE;                                                            \
   }
 
@@ -316,7 +323,7 @@ static void yylex_cleanup()
       error_pointer_too_long(yytext);                                         \
       return BADTOKEN;                                                        \
     }                                                                         \
-    gedcom_lval.string = TO_INTERNAL(yytext, ptr_buf);                        \
+    gedcom_lval.string = TO_INTERNAL(yytext, ptr_buffer);                     \
     return POINTER;                                                           \
   }
 
@@ -330,7 +337,6 @@ static void yylex_cleanup()
 #define ACTION_TERMINATOR                                                     \
   { CHECK_LINE_LEN;                                                           \
     INIT_LINE_LEN;                                                            \
-    line_no++;                                                                \
     BEGIN(INITIAL);                                                           \
   }
 
@@ -347,10 +353,7 @@ static void yylex_cleanup()
     }                                                                         \
     else {                                                                    \
       char* ptr; int size;                                                    \
-      /* Reset our state */                                                   \
-      current_level = -1;                                                     \
-      level_diff = MAXGEDCLEVEL;                                              \
-      /* ... then terminate lex */                                            \
+      /* ... terminate lex */                                                 \
       yyterminate();                                                          \
       /* Get rid of f*cking compiler warning from lex generated code */       \
       /* yyterminate does return(), so program will never come here  */       \
@@ -383,4 +386,50 @@ static void yylex_cleanup()
     return BADTOKEN;                                                          \
   }
 
-#endif /* IN_LEX */
+#elif LEX_SECTION == 3
+
+int yywrap()
+{
+  return 1;
+}
+
+static void free_conv_buffers()
+{
+  free_conv_buffer(ptr_buffer);
+  free_conv_buffer(tag_buffer);
+  free_conv_buffer(str_buffer);
+}
+
+static void yylex_cleanup()
+{
+  /* fix memory leak in lex */
+  yy_delete_buffer(yy_current_buffer);
+  yy_current_buffer = NULL;
+  free_conv_buffers();
+}
+
+static void init_conv_buffers()
+{
+  if (!ptr_buffer) {
+    ptr_buffer = create_conv_buffer(INITIAL_PTR_BUFFER_LEN);
+    tag_buffer = create_conv_buffer(INITIAL_TAG_BUFFER_LEN);
+    str_buffer = create_conv_buffer(INITIAL_STR_BUFFER_LEN);
+  }
+}
+
+static int exitfuncregistered = 0;
+
+void yymyinit(FILE *f)
+{
+  if (! exitfuncregistered && atexit(yylex_cleanup) == 0)
+    exitfuncregistered = 1;
+  init_conv_buffers();
+  yyin = f;
+  yyrestart(f);
+  /* Reset our state */
+  current_level = -1;
+  level_diff = MAXGEDCLEVEL;
+  BEGIN(INITIAL);
+}
+
+#endif