Copied from old documentation. Removed all Gedcom_val details.
[gedcom-parse.git] / gedcom / gedcom_lex_common.c
index 779af3bde742a0ab1366c89ecad485000d40b956..06c1f77a8a5fefb51696d6fb7ac63e9b1c7f917c 100644 (file)
 #include "gedcom_internal.h"
 #include "multilex.h"
 #include "encoding.h"
+#include "encoding_state.h"
 #include "gedcom.h"
 #include "gedcom.tabgen.h"
 #include "compat.h"
 
 static size_t encoding_width;
 static int current_level = -1;
-static int level_diff=MAXGEDCLEVEL;
+static int level_diff = MAXGEDCLEVEL;
 static size_t line_len = 0;
+static int tab_space = 0;
+static int current_tag = -1;
 
 static struct conv_buffer* ptr_buffer = NULL;
 static struct conv_buffer* tag_buffer = NULL;
@@ -93,7 +96,8 @@ int test_loop(ENCODING enc, const char* code)
 
 static void error_line_too_long()
 {
-  gedcom_error(_("Line too long, max %d characters allowed"), MAXGEDCLINELEN); 
+  gedcom_error(_("Line too long, max %d characters allowed"),
+              MAXGEDCLINELEN); 
 }
 
 static void error_level_leading_zero()
@@ -134,6 +138,11 @@ static void error_at_character()
   gedcom_error(_("'@' character should be written as '@@' in values"));
 }
 
+static void error_tab_character()
+{
+  gedcom_error(_("Tab character is not allowed in values"));
+}
+
 static void error_unexpected_character(const char* str, char ch)
 {
   gedcom_error(_("Unexpected character: '%s' (0x%02x)"), str, ch);
@@ -154,7 +163,8 @@ static int dummy_conv = 0;
 #define CHECK_LINE_LEN                                                        \
   { if (line_len != (size_t)-1) {                                             \
       line_len += strlen(yytext);                                             \
-      if (line_len > MAXGEDCLINELEN * encoding_width) {                       \
+      if (line_len > MAXGEDCLINELEN * encoding_width                          \
+         && ! compat_long_line(current_level, current_tag)) {                \
         error_line_too_long();                                                \
         line_len = (size_t)-1;                                                \
         return BADTOKEN;                                                      \
@@ -162,13 +172,20 @@ static int dummy_conv = 0;
     }                                                                         \
   }
 
+#define GENERATE_TAB_SPACE                                                    \
+  { gedcom_lval.string = " ";                                                 \
+    tab_space--;                                                              \
+    return DELIM;                                                             \
+  }
+
 #define MKTAGACTION(THETAG)                                                  \
   { CHECK_LINE_LEN;                                                          \
     gedcom_lval.tag.string = TO_INTERNAL(yytext, tag_buffer);                \
-    gedcom_lval.tag.value  = TAG_##THETAG;                                   \
+    current_tag            = TAG_##THETAG;                                   \
+    gedcom_lval.tag.value  = current_tag;                                    \
     BEGIN(NORMAL);                                                           \
     line_no++;                                                               \
-    return TAG_##THETAG;                                                     \
+    return current_tag;                                                      \
   }
 
 /* The GEDCOM level number is converted into a sequence of opening
@@ -204,10 +221,16 @@ static int dummy_conv = 0;
 
    But because this means that one token is converted into a series
    of tokens, there is some initial code following immediately here
-   that returns "pending" tokens. */
+   that returns "pending" tokens.
+
+   Also, for compatibility tabs are converted into spaces, which is
+   also handled here */
 
 #define ACTION_BEFORE_REGEXPS                                                 \
-   { if (level_diff < 1) {                                                    \
+   { if (compat_mode(C_TAB_CHARACTER) && tab_space-- > 0) {                   \
+       GENERATE_TAB_SPACE;                                                    \
+     }                                                                        \
+     else if (level_diff < 1) {                                               \
        level_diff++;                                                          \
        return CLOSE;                                                          \
      }                                                                        \
@@ -335,6 +358,8 @@ static int dummy_conv = 0;
 #define ACTION_TERMINATOR                                                     \
   { CHECK_LINE_LEN;                                                           \
     INIT_LINE_LEN;                                                            \
+    if (line_no == 1)                                                         \
+      set_read_encoding_terminator(TO_INTERNAL(yytext, str_buffer));          \
     BEGIN(INITIAL);                                                           \
   }
 
@@ -379,6 +404,17 @@ static int dummy_conv = 0;
     }                                                                         \
   }
 
+#define ACTION_TAB                                                            \
+  { if (compat_mode(C_TAB_CHARACTER)) {                                       \
+      tab_space = 8;                                                          \
+      GENERATE_TAB_SPACE;                                                     \
+    }                                                                         \
+    else {                                                                    \
+      error_tab_character();                                                  \
+      return BADTOKEN;                                                        \
+    }                                                                         \
+  }
+
 #define ACTION_UNEXPECTED                                                     \
   { error_unexpected_character(yytext, yytext[0]);                            \
     return BADTOKEN;                                                          \