X-Git-Url: https://git.dlugolecki.net.pl/?a=blobdiff_plain;f=gedcom%2Fgedcom_lex_common.c;h=149a8a8bda9630855cb7fed8115ac38bd1fdfcec;hb=eb6f3993afd2330bf4ae536bc8679d791b681dc4;hp=f2fe44afeebf40290175267f3dedefdc4fc33a80;hpb=177218adadc7aa92a1c14e6b8b33080dc77d6e4e;p=gedcom-parse.git

diff --git a/gedcom/gedcom_lex_common.c b/gedcom/gedcom_lex_common.c
index f2fe44a..149a8a8 100644
--- a/gedcom/gedcom_lex_common.c
+++ b/gedcom/gedcom_lex_common.c
@@ -26,6 +26,7 @@
 #include "gedcom_internal.h"
 #include "multilex.h"
 #include "encoding.h"
+#include "encoding_state.h"
 #include "gedcom.h"
 #include "gedcom.tabgen.h"
 #include "compat.h"
@@ -34,6 +35,7 @@ static size_t encoding_width;
 static int current_level = -1;
 static int level_diff=MAXGEDCLEVEL;
 static size_t line_len = 0;
+static int tab_space = 0;
 
 static struct conv_buffer* ptr_buffer = NULL;
 static struct conv_buffer* tag_buffer = NULL;
@@ -46,7 +48,6 @@ static struct conv_buffer* str_buffer = NULL;
 #ifdef LEXER_TEST 
 YYSTYPE gedcom_lval;
 int line_no = 1;
-int compat_at = 0;
 
 int gedcom_lex();
 
@@ -92,9 +93,10 @@ int test_loop(ENCODING enc, const char* code)
 /* These are defined as functions here, because xgettext has trouble
    extracting the strings out of long pre-processor defined */
 
-static void error_line_too_long()
+static void error_line_too_long(const char *line)
 {
-  gedcom_error(_("Line too long, max %d characters allowed"), MAXGEDCLINELEN); 
+  gedcom_error(_("Line too long, max %d characters allowed: %s"),
+	       MAXGEDCLINELEN, line); 
 }
 
 static void error_level_leading_zero()
@@ -135,6 +137,11 @@ static void error_at_character()
   gedcom_error(_("'@' character should be written as '@@' in values"));
 }
 
+static void error_tab_character()
+{
+  gedcom_error(_("Tab character is not allowed in values"));
+}
+
 static void error_unexpected_character(const char* str, char ch)
 {
   gedcom_error(_("Unexpected character: '%s' (0x%02x)"), str, ch);
@@ -156,13 +163,19 @@ static int dummy_conv = 0;
   { if (line_len != (size_t)-1) {                                             \
       line_len += strlen(yytext);                                             \
       if (line_len > MAXGEDCLINELEN * encoding_width) {                       \
-        error_line_too_long();                                                \
+        error_line_too_long(yytext);                                          \
         line_len = (size_t)-1;                                                \
         return BADTOKEN;                                                      \
       }                                                                       \
     }                                                                         \
   }
 
+#define GENERATE_TAB_SPACE                                                    \
+  { gedcom_lval.string = " ";                                                 \
+    tab_space--;                                                              \
+    return DELIM;                                                             \
+  }
+
 #define MKTAGACTION(THETAG)                                                  \
   { CHECK_LINE_LEN;                                                          \
     gedcom_lval.tag.string = TO_INTERNAL(yytext, tag_buffer);                \
@@ -205,10 +218,16 @@ static int dummy_conv = 0;
 
    But because this means that one token is converted into a series
    of tokens, there is some initial code following immediately here
-   that returns "pending" tokens. */
+   that returns "pending" tokens.
+
+   Also, for compatibility tabs are converted into spaces, which is
+   also handled here */
 
 #define ACTION_BEFORE_REGEXPS                                                 \
-   { if (level_diff < 1) {                                                    \
+   { if (compat_mode(C_TAB_CHARACTER) && tab_space-- > 0) {                   \
+       GENERATE_TAB_SPACE;                                                    \
+     }                                                                        \
+     else if (level_diff < 1) {                                               \
        level_diff++;                                                          \
        return CLOSE;                                                          \
      }                                                                        \
@@ -336,6 +355,8 @@ static int dummy_conv = 0;
 #define ACTION_TERMINATOR                                                     \
   { CHECK_LINE_LEN;                                                           \
     INIT_LINE_LEN;                                                            \
+    if (line_no == 1)                                                         \
+      set_read_encoding_terminator(TO_INTERNAL(yytext, str_buffer));          \
     BEGIN(INITIAL);                                                           \
   }
 
@@ -361,7 +382,7 @@ static int dummy_conv = 0;
   } 
 
 #define ACTION_NORMAL_AT                                                      \
-  { if (compat_at) {                                                          \
+  { if (compat_mode(C_NO_DOUBLE_AT)) {                                        \
       int i, j;                                                               \
       char *yycopy = strdup(yytext);                                          \
       if (yycopy) {                                                           \
@@ -380,6 +401,17 @@ static int dummy_conv = 0;
     }                                                                         \
   }
 
+#define ACTION_TAB                                                            \
+  { if (compat_mode(C_TAB_CHARACTER)) {                                       \
+      tab_space = 8;                                                          \
+      GENERATE_TAB_SPACE;                                                     \
+    }                                                                         \
+    else {                                                                    \
+      error_tab_character();                                                  \
+      return BADTOKEN;                                                        \
+    }                                                                         \
+  }
+
 #define ACTION_UNEXPECTED                                                     \
   { error_unexpected_character(yytext, yytext[0]);                            \
     return BADTOKEN;                                                          \