Pass encoding terminator to encoding state.
[gedcom-parse.git] / gedcom / gedcom_lex_common.c
index d0d329cab5e031d81a20bcab027e8bed6384ada1..703a9de9d3f21987ec3b22f51d267218603b596e 100644 (file)
@@ -35,14 +35,17 @@ static int current_level = -1;
 static int level_diff=MAXGEDCLEVEL;
 static size_t line_len = 0;
 
-static char ptr_buf[MAXGEDCPTRLEN * UTF_FACTOR + 1];
-static char tag_buf[MAXGEDCTAGLEN * UTF_FACTOR + 1];
-static char str_buf[MAXGEDCLINELEN * UTF_FACTOR + 1];
+static struct conv_buffer* ptr_buffer = NULL;
+static struct conv_buffer* tag_buffer = NULL;
+static struct conv_buffer* str_buffer = NULL;
+
+#define INITIAL_PTR_BUFFER_LEN MAXGEDCPTRLEN * UTF_FACTOR + 1
+#define INITIAL_TAG_BUFFER_LEN MAXGEDCTAGLEN * UTF_FACTOR + 1
+#define INITIAL_STR_BUFFER_LEN MAXGEDCLINELEN * UTF_FACTOR + 1
 
 #ifdef LEXER_TEST 
 YYSTYPE gedcom_lval;
 int line_no = 1;
-int compat_at = 0;
 
 int gedcom_lex();
 
@@ -88,9 +91,10 @@ int test_loop(ENCODING enc, const char* code)
 /* These are defined as functions here, because xgettext has trouble
    extracting the strings out of long pre-processor defined */
 
-static void error_line_too_long()
+static void error_line_too_long(const char *line)
 {
-  gedcom_error(_("Line too long, max %d characters allowed"), MAXGEDCLINELEN); 
+  gedcom_error(_("Line too long, max %d characters allowed: %s"),
+              MAXGEDCLINELEN, line); 
 }
 
 static void error_level_leading_zero()
@@ -136,10 +140,14 @@ static void error_unexpected_character(const char* str, char ch)
   gedcom_error(_("Unexpected character: '%s' (0x%02x)"), str, ch);
 }
 
+/* This is to bypass the iconv conversion (if the input is UTF-8 coming
+   from the program) */
+static int dummy_conv = 0;
+
 #elif LEX_SECTION == 2
 
 #define TO_INTERNAL(STR,OUTBUF) \
-  to_internal(STR, yyleng, OUTBUF, sizeof(OUTBUF))
+  (dummy_conv ? STR : to_internal(STR, yyleng, OUTBUF))
 
 #define INIT_LINE_LEN \
   line_len = 0;
@@ -148,7 +156,7 @@ static void error_unexpected_character(const char* str, char ch)
   { if (line_len != (size_t)-1) {                                             \
       line_len += strlen(yytext);                                             \
       if (line_len > MAXGEDCLINELEN * encoding_width) {                       \
-        error_line_too_long();                                                \
+        error_line_too_long(yytext);                                          \
         line_len = (size_t)-1;                                                \
         return BADTOKEN;                                                      \
       }                                                                       \
@@ -157,9 +165,10 @@ static void error_unexpected_character(const char* str, char ch)
 
 #define MKTAGACTION(THETAG)                                                  \
   { CHECK_LINE_LEN;                                                          \
-    gedcom_lval.tag.string = TO_INTERNAL(yytext, tag_buf);                   \
+    gedcom_lval.tag.string = TO_INTERNAL(yytext, tag_buffer);                \
     gedcom_lval.tag.value  = TAG_##THETAG;                                   \
     BEGIN(NORMAL);                                                           \
+    line_no++;                                                               \
     return TAG_##THETAG;                                                     \
   }
 
@@ -227,10 +236,11 @@ static void error_unexpected_character(const char* str, char ch)
 
 
 #define ACTION_DIGITS                                                         \
-   { int level = atoi(TO_INTERNAL(yytext, str_buf));                          \
+   { int level = atoi(TO_INTERNAL(yytext, str_buffer));                       \
      CHECK_LINE_LEN;                                                          \
      if ((level < 0) || (level > MAXGEDCLEVEL)) {                             \
        error_level_out_of_range();                                            \
+       line_no++;                                                             \
        return BADTOKEN;                                                       \
      }                                                                        \
      level_diff = level - current_level;                                      \
@@ -248,6 +258,7 @@ static void error_unexpected_character(const char* str, char ch)
      else {                                                                   \
        /* should never happen (error to GEDCOM spec) */                       \
        error_level_too_high(level_diff);                                      \
+       line_no++;                                                             \
        return BADTOKEN;                                                       \
      }                                                                        \
    } 
@@ -256,19 +267,21 @@ static void error_unexpected_character(const char* str, char ch)
 #define ACTION_ALPHANUM                                                       \
    { if (strlen(yytext) > MAXGEDCTAGLEN * encoding_width) {                   \
        error_tag_too_long(yytext);                                            \
+       line_no++;                                                             \
        return BADTOKEN;                                                       \
      }                                                                        \
      CHECK_LINE_LEN;                                                          \
-     gedcom_lval.tag.string = TO_INTERNAL(yytext, tag_buf);                   \
+     gedcom_lval.tag.string = TO_INTERNAL(yytext, tag_buffer);                \
      gedcom_lval.tag.value  = USERTAG;                                        \
      BEGIN(NORMAL);                                                           \
+     line_no++;                                                               \
      return USERTAG;                                                          \
    }
 
 
 #define ACTION_DELIM                                                          \
   { CHECK_LINE_LEN;                                                           \
-    gedcom_lval.string = TO_INTERNAL(yytext, str_buf);                        \
+    gedcom_lval.string = TO_INTERNAL(yytext, str_buffer);                     \
     return DELIM;                                                             \
   }
 
@@ -276,7 +289,7 @@ static void error_unexpected_character(const char* str, char ch)
 #define ACTION_ANY                                                            \
   { char* tmp;                                                                \
     CHECK_LINE_LEN;                                                           \
-    tmp = TO_INTERNAL(yytext, str_buf);                                       \
+    tmp = TO_INTERNAL(yytext, str_buffer);                                    \
     if (!tmp) {                                                               \
       /* Something went wrong during conversion... */                         \
           error_invalid_character(yytext, yytext[0]);                         \
@@ -298,7 +311,7 @@ static void error_unexpected_character(const char* str, char ch)
 
 #define ACTION_ESCAPE                                                         \
   { CHECK_LINE_LEN;                                                           \
-    gedcom_lval.string = TO_INTERNAL(yytext, str_buf);                        \
+    gedcom_lval.string = TO_INTERNAL(yytext, str_buffer);                     \
     return ESCAPE;                                                            \
   }
 
@@ -309,7 +322,7 @@ static void error_unexpected_character(const char* str, char ch)
       error_pointer_too_long(yytext);                                         \
       return BADTOKEN;                                                        \
     }                                                                         \
-    gedcom_lval.string = TO_INTERNAL(yytext, ptr_buf);                        \
+    gedcom_lval.string = TO_INTERNAL(yytext, ptr_buffer);                     \
     return POINTER;                                                           \
   }
 
@@ -323,7 +336,8 @@ static void error_unexpected_character(const char* str, char ch)
 #define ACTION_TERMINATOR                                                     \
   { CHECK_LINE_LEN;                                                           \
     INIT_LINE_LEN;                                                            \
-    line_no++;                                                                \
+    if (line_no == 1)                                                         \
+      set_encoding_terminator(TO_INTERNAL(yytext, str_buffer));               \
     BEGIN(INITIAL);                                                           \
   }
 
@@ -349,7 +363,7 @@ static void error_unexpected_character(const char* str, char ch)
   } 
 
 #define ACTION_NORMAL_AT                                                      \
-  { if (compat_at) {                                                          \
+  { if (compat_mode(C_NO_DOUBLE_AT)) {                                        \
       int i, j;                                                               \
       char *yycopy = strdup(yytext);                                          \
       if (yycopy) {                                                           \
@@ -380,11 +394,28 @@ int yywrap()
   return 1;
 }
 
+static void free_conv_buffers()
+{
+  free_conv_buffer(ptr_buffer);
+  free_conv_buffer(tag_buffer);
+  free_conv_buffer(str_buffer);
+}
+
 static void yylex_cleanup()
 {
   /* fix memory leak in lex */
   yy_delete_buffer(yy_current_buffer);
   yy_current_buffer = NULL;
+  free_conv_buffers();
+}
+
+static void init_conv_buffers()
+{
+  if (!ptr_buffer) {
+    ptr_buffer = create_conv_buffer(INITIAL_PTR_BUFFER_LEN);
+    tag_buffer = create_conv_buffer(INITIAL_TAG_BUFFER_LEN);
+    str_buffer = create_conv_buffer(INITIAL_STR_BUFFER_LEN);
+  }
 }
 
 static int exitfuncregistered = 0;
@@ -393,11 +424,13 @@ void yymyinit(FILE *f)
 {
   if (! exitfuncregistered && atexit(yylex_cleanup) == 0)
     exitfuncregistered = 1;
+  init_conv_buffers();
   yyin = f;
   yyrestart(f);
   /* Reset our state */
   current_level = -1;
   level_diff = MAXGEDCLEVEL;
+  BEGIN(INITIAL);
 }
 
 #endif