Header fully covered with callbacks.
[gedcom-parse.git] / gedcom_lex_common.c
index 41f2da1bb19653927c4c2a6cc65244a8fe38cf6d..bc7387de4f8797aabaf61916c1fc6cd3437c3cd5 100644 (file)
 
 #ifndef IN_LEX
 
-#include "gedcom.tab.h"
+#include "external.h"
 #include "gedcom.h"
+#include "gedcom.tab.h"
 #include "multilex.h"
 #include "encoding.h"
 
 #define YY_NO_UNPUT
-  
-static int current_level=-1;
+
+static size_t encoding_width;
+static int current_level = -1;
 static int level_diff=MAXGEDCLEVEL;
+static size_t line_len = 0;
+
+static char ptr_buf[MAXGEDCPTRLEN * UTF_FACTOR + 1];
+static char tag_buf[MAXGEDCTAGLEN * UTF_FACTOR + 1];
+static char str_buf[MAXGEDCLINELEN * UTF_FACTOR + 1];
+
 #ifdef LEXER_TEST 
 YYSTYPE gedcom_lval;
 int line_no = 1;
@@ -43,14 +50,14 @@ int test_loop(ENCODING enc, char* code)
   while (tok) {
     switch(tok) {
       case BADTOKEN: printf("BADTOKEN "); break;
-      case OPEN: printf("OPEN(%d) ", gedcom_lval.level); break;
+      case OPEN: printf("OPEN(%d) ", gedcom_lval.number); break;
       case CLOSE: printf("CLOSE "); break;
       case ESCAPE: printf("ESCAPE(%s) ", gedcom_lval.string); break;
       case DELIM: printf("DELIM "); break;
       case ANYCHAR: printf("%s ", gedcom_lval.string); break;
-      case POINTER: printf("POINTER(%s) ", gedcom_lval.pointer); break;
-      case USERTAG: printf("USERTAG(%s) ", gedcom_lval.tag); break;
-      default: printf("TAG(%s) ", gedcom_lval.tag); break;
+      case POINTER: printf("POINTER(%s) ", gedcom_lval.string); break;
+      case USERTAG: printf("USERTAG(%s) ", gedcom_lval.string); break;
+      default: printf("TAG(%s) ", gedcom_lval.string); break;
     }
     tok = gedcom_lex();
   }
@@ -63,17 +70,31 @@ int test_loop(ENCODING enc, char* code)
 
 #else  /* of #ifndef IN_LEX */
 
-char string_buf[MAXGEDCLINELEN+1];
-#define TO_INTERNAL(str) to_internal(str, yyleng) 
+#define TO_INTERNAL(STR,OUTBUF) \
+  to_internal(STR, yyleng, OUTBUF, sizeof(OUTBUF))
+
+#define INIT_LINE_LEN \
+  line_len = 0;
+
+#define CHECK_LINE_LEN                                                        \
+  { if (line_len != (size_t)-1) {                                             \
+      line_len += strlen(yytext);                                             \
+      if (line_len > MAXGEDCLINELEN * encoding_width) {                       \
+        gedcom_error("Line too long, max %d characters",                      \
+                    MAXGEDCLINELEN);                                         \
+        line_len = (size_t)-1;                                                \
+        return BADTOKEN;                                                      \
+      }                                                                       \
+    }                                                                         \
+  }
 
-#define MKTAGACTION(the_tag)                                                 \
-  { gedcom_lval.tag = TO_INTERNAL(yytext);                                   \
+#define MKTAGACTION(THETAG)                                                  \
+  { CHECK_LINE_LEN;                                                          \
+    gedcom_lval.string = TO_INTERNAL(yytext, tag_buf);                       \
     BEGIN(NORMAL);                                                           \
-    return TAG_##the_tag;                                                    \
+    return TAG_##THETAG;                                                     \
   }
 
-
 /* The GEDCOM level number is converted into a sequence of opening
    and closing brackets.  Simply put, the following GEDCOM fragment:
    
@@ -116,13 +137,19 @@ char string_buf[MAXGEDCLINELEN+1];
      }                                                                        \
      else if (level_diff == 1) {                                              \
        level_diff++;                                                          \
-       gedcom_lval.level = current_level;                                     \
+       gedcom_lval.number = current_level;                                    \
        return OPEN;                                                           \
      }                                                                        \
      else {                                                                   \
        /* out of brackets... */                                               \
      }                                                                        \
-   } 
+   }
+
+
+#define ACTION_INITIAL_WHITESPACE                                             \
+  { CHECK_LINE_LEN;                                                           \
+    /* ignore initial whitespace further */                                   \
+  }
 
 
 #define ACTION_0_DIGITS                                                       \
@@ -132,7 +159,8 @@ char string_buf[MAXGEDCLINELEN+1];
 
 
 #define ACTION_DIGITS                                                         \
-   { int level = atoi(TO_INTERNAL(yytext));                                   \
+   { int level = atoi(TO_INTERNAL(yytext, str_buf));                          \
+     CHECK_LINE_LEN;                                                          \
      if ((level < 0) || (level > MAXGEDCLEVEL)) {                             \
        gedcom_error ("Level number out of range [0..%d]",                     \
                     MAXGEDCLEVEL);                                           \
@@ -147,7 +175,7 @@ char string_buf[MAXGEDCLINELEN+1];
      }                                                                        \
      else if (level_diff == 1) {                                              \
        level_diff++;                                                          \
-       gedcom_lval.level = current_level;                                     \
+       gedcom_lval.number = current_level;                                    \
        return OPEN;                                                           \
      }                                                                        \
      else {                                                                   \
@@ -161,28 +189,31 @@ char string_buf[MAXGEDCLINELEN+1];
 
 
 #define ACTION_ALPHANUM                                                       \
-   { if (strlen(yytext) > MAXGEDCTAGLEN) {                                    \
-       gedcom_error("Tag '%s' too long, max %d chars");                       \
+   { if (strlen(yytext) > MAXGEDCTAGLEN * encoding_width) {                   \
+       gedcom_error("Tag '%s' too long, max %d characters",                   \
+                   yytext, MAXGEDCTAGLEN);                                   \
        return BADTOKEN;                                                       \
      }                                                                        \
-     strncpy(string_buf, yytext, MAXGEDCTAGLEN+1);                            \
-     gedcom_lval.tag = TO_INTERNAL(string_buf);                               \
+     CHECK_LINE_LEN;                                                          \
+     gedcom_lval.string = TO_INTERNAL(yytext, tag_buf);                       \
      BEGIN(NORMAL);                                                           \
      return USERTAG;                                                          \
    }
 
 
 #define ACTION_DELIM                                                          \
-  { gedcom_lval.string = TO_INTERNAL(yytext);                                 \
+  { CHECK_LINE_LEN;                                                           \
+    gedcom_lval.string = TO_INTERNAL(yytext, str_buf);                        \
     return DELIM;                                                             \
   }
 
 
 #define ACTION_ANY                                                            \
-  { gedcom_lval.string = TO_INTERNAL(yytext);                                 \
-    /* Due to character conversions, it is possible                           \
-       that the current character will be combined with                       \
-       the next, and so now we don't have a character yet...                  \
+  { CHECK_LINE_LEN;                                                           \
+    gedcom_lval.string = TO_INTERNAL(yytext, str_buf);                        \
+    /* Due to character conversions, it is possible that the current          \
+       character will be combined with the next, and so now we don't have a   \
+       character yet...                                                       \
        In principle, this is only applicable to the 1byte case (e.g. ANSEL),  \
        but it doesn't harm the unicode case.                                  \
     */                                                                        \
@@ -192,13 +223,20 @@ char string_buf[MAXGEDCLINELEN+1];
 
 
 #define ACTION_ESCAPE                                                         \
-  { gedcom_lval.string = TO_INTERNAL(yytext);                                 \
+  { CHECK_LINE_LEN;                                                           \
+    gedcom_lval.string = TO_INTERNAL(yytext, str_buf);                        \
     return ESCAPE;                                                            \
   }
 
 
 #define ACTION_POINTER                                                        \
-  { gedcom_lval.pointer = TO_INTERNAL(yytext);                                \
+  { CHECK_LINE_LEN;                                                           \
+    if (strlen(yytext) > MAXGEDCPTRLEN * encoding_width) {                    \
+      gedcom_error("Pointer '%s' too long, max %d characters",                \
+                  yytext, MAXGEDCPTRLEN);                                    \
+      return BADTOKEN;                                                        \
+    }                                                                         \
+    gedcom_lval.string = TO_INTERNAL(yytext, ptr_buf);                        \
     return POINTER;                                                           \
   }
 
@@ -210,7 +248,9 @@ char string_buf[MAXGEDCLINELEN+1];
 */
 
 #define ACTION_TERMINATOR                                                     \
-  { line_no++;                                                                \
+  { CHECK_LINE_LEN;                                                           \
+    INIT_LINE_LEN;                                                            \
+    line_no++;                                                                \
     BEGIN(INITIAL);                                                           \
   }
 
@@ -226,6 +266,10 @@ char string_buf[MAXGEDCLINELEN+1];
       return CLOSE;                                                           \
     }                                                                         \
     else {                                                                    \
+      /* Reset our state */                                                   \
+      current_level = -1;                                                     \
+      level_diff = MAXGEDCLEVEL;                                              \
+      /* ... then terminate lex */                                            \
       yyterminate();                                                          \
     }                                                                         \
   }