X-Git-Url: https://git.dlugolecki.net.pl/?a=blobdiff_plain;f=gedcom%2Fgedcom_lex_common.c;h=149a8a8bda9630855cb7fed8115ac38bd1fdfcec;hb=eb6f3993afd2330bf4ae536bc8679d791b681dc4;hp=f2fe44afeebf40290175267f3dedefdc4fc33a80;hpb=177218adadc7aa92a1c14e6b8b33080dc77d6e4e;p=gedcom-parse.git diff --git a/gedcom/gedcom_lex_common.c b/gedcom/gedcom_lex_common.c index f2fe44a..149a8a8 100644 --- a/gedcom/gedcom_lex_common.c +++ b/gedcom/gedcom_lex_common.c @@ -26,6 +26,7 @@ #include "gedcom_internal.h" #include "multilex.h" #include "encoding.h" +#include "encoding_state.h" #include "gedcom.h" #include "gedcom.tabgen.h" #include "compat.h" @@ -34,6 +35,7 @@ static size_t encoding_width; static int current_level = -1; static int level_diff=MAXGEDCLEVEL; static size_t line_len = 0; +static int tab_space = 0; static struct conv_buffer* ptr_buffer = NULL; static struct conv_buffer* tag_buffer = NULL; @@ -46,7 +48,6 @@ static struct conv_buffer* str_buffer = NULL; #ifdef LEXER_TEST YYSTYPE gedcom_lval; int line_no = 1; -int compat_at = 0; int gedcom_lex(); @@ -92,9 +93,10 @@ int test_loop(ENCODING enc, const char* code) /* These are defined as functions here, because xgettext has trouble extracting the strings out of long pre-processor defined */ -static void error_line_too_long() +static void error_line_too_long(const char *line) { - gedcom_error(_("Line too long, max %d characters allowed"), MAXGEDCLINELEN); + gedcom_error(_("Line too long, max %d characters allowed: %s"), + MAXGEDCLINELEN, line); } static void error_level_leading_zero() @@ -135,6 +137,11 @@ static void error_at_character() gedcom_error(_("'@' character should be written as '@@' in values")); } +static void error_tab_character() +{ + gedcom_error(_("Tab character is not allowed in values")); +} + static void error_unexpected_character(const char* str, char ch) { gedcom_error(_("Unexpected character: '%s' (0x%02x)"), str, ch); @@ -156,13 +163,19 @@ static int dummy_conv = 0; { if (line_len != (size_t)-1) { \ line_len += strlen(yytext); \ if (line_len > MAXGEDCLINELEN * encoding_width) { \ - error_line_too_long(); \ + error_line_too_long(yytext); \ line_len = (size_t)-1; \ return BADTOKEN; \ } \ } \ } +#define GENERATE_TAB_SPACE \ + { gedcom_lval.string = " "; \ + tab_space--; \ + return DELIM; \ + } + #define MKTAGACTION(THETAG) \ { CHECK_LINE_LEN; \ gedcom_lval.tag.string = TO_INTERNAL(yytext, tag_buffer); \ @@ -205,10 +218,16 @@ static int dummy_conv = 0; But because this means that one token is converted into a series of tokens, there is some initial code following immediately here - that returns "pending" tokens. */ + that returns "pending" tokens. + + Also, for compatibility tabs are converted into spaces, which is + also handled here */ #define ACTION_BEFORE_REGEXPS \ - { if (level_diff < 1) { \ + { if (compat_mode(C_TAB_CHARACTER) && tab_space-- > 0) { \ + GENERATE_TAB_SPACE; \ + } \ + else if (level_diff < 1) { \ level_diff++; \ return CLOSE; \ } \ @@ -336,6 +355,8 @@ static int dummy_conv = 0; #define ACTION_TERMINATOR \ { CHECK_LINE_LEN; \ INIT_LINE_LEN; \ + if (line_no == 1) \ + set_read_encoding_terminator(TO_INTERNAL(yytext, str_buffer)); \ BEGIN(INITIAL); \ } @@ -361,7 +382,7 @@ static int dummy_conv = 0; } #define ACTION_NORMAL_AT \ - { if (compat_at) { \ + { if (compat_mode(C_NO_DOUBLE_AT)) { \ int i, j; \ char *yycopy = strdup(yytext); \ if (yycopy) { \ @@ -380,6 +401,17 @@ static int dummy_conv = 0; } \ } +#define ACTION_TAB \ + { if (compat_mode(C_TAB_CHARACTER)) { \ + tab_space = 8; \ + GENERATE_TAB_SPACE; \ + } \ + else { \ + error_tab_character(); \ + return BADTOKEN; \ + } \ + } + #define ACTION_UNEXPECTED \ { error_unexpected_character(yytext, yytext[0]); \ return BADTOKEN; \