From: Peter Verthez Date: Mon, 27 Jan 2003 20:13:55 +0000 (+0000) Subject: Handle tab character generated by PAF 5. X-Git-Url: https://git.dlugolecki.net.pl/?a=commitdiff_plain;h=9d87c36948e5cc9c90c14bd09b5007c519ed8b4b;p=gedcom-parse.git Handle tab character generated by PAF 5. --- diff --git a/gedcom/compat.c b/gedcom/compat.c index 3b05680..196113f 100644 --- a/gedcom/compat.c +++ b/gedcom/compat.c @@ -61,7 +61,8 @@ enum _COMPAT { C_PAF5 = 0x0004, C_PAF2 = 0x0008, C_FAMORIG = 0x0010, - C_EASYTREE = 0x0020 + C_EASYTREE = 0x0020, + C_PAF4 = 0x0040 }; struct program_data data[] = { @@ -92,6 +93,7 @@ struct program_data data[] = { - '@' not written as '@@' in values - some 5.5.1 (draft) tags are used: EMAIL, FONE, ROMN - no FAMC field in SLGC + - uses tab character (will be converted to 8 spaces here) - Personal Ancestral File 2: - '@' not written as '@@' in values @@ -107,6 +109,9 @@ struct program_data data[] = { - no submitter link in the header - NOTE doesn't have a value - NOTE.NOTE instead of NOTE.COND + + - Personal Ancestral File 4: + - '@' not written as '@@' in values */ int compat_matrix[] = @@ -117,15 +122,17 @@ int compat_matrix[] = /* C_NO_GEDC */ C_LIFELINES | C_PAF2, /* C_NO_CHAR */ C_LIFELINES, /* C_HEAD_TIME */ C_LIFELINES, - /* C_NO_DOUBLE_AT */ C_LIFELINES | C_PAF5 | C_PAF2 | C_FAMORIG, - /* C_NO_REQUIRED_VALUES */ C_LIFELINES, + /* C_NO_DOUBLE_AT */ C_LIFELINES | C_PAF5 | C_PAF2 | C_FAMORIG + | C_PAF4, + /* C_NO_REQUIRED_VALUES */ C_LIFELINES | C_PAF5, /* C_551_TAGS */ C_PAF5, /* C_NO_SLGC_FAMC */ C_PAF5, /* C_SUBM_COMM */ C_PAF2, - /* C_DOUBLE_DATES_4 */ C_PAF2, + /* C_DOUBLE_DATES_4 */ C_PAF2 | C_PAF5 | C_PAF4, /* C_CONC_NEEDS_SPACE */ C_FAMORIG, /* C_NO_GEDC_FORM */ C_EASYTREE, - /* C_NOTE_NOTE */ C_EASYTREE + /* C_NOTE_NOTE */ C_EASYTREE, + /* C_TAB_CHARACTER */ C_PAF5 }; int compat_state[C_NR_OF_RULES]; @@ -201,6 +208,10 @@ void compute_compatibility() compatibility = C_PAF2; version = 2; } + if (compatibility_version >= 40000 && compatibility_version < 50000) { + compatibility = C_PAF4; + version = 4; + } else if (compatibility_version >= 50000) { compatibility = C_PAF5; version = 5; diff --git a/gedcom/compat.h b/gedcom/compat.h index b575469..1d59822 100644 --- a/gedcom/compat.h +++ b/gedcom/compat.h @@ -43,6 +43,7 @@ typedef enum _COMPAT_RULES { C_CONC_NEEDS_SPACE, C_NO_GEDC_FORM, C_NOTE_NOTE, + C_TAB_CHARACTER, C_NR_OF_RULES } Compat_rule; diff --git a/gedcom/gedcom_1byte.lex b/gedcom/gedcom_1byte.lex index 7c47f18..69d6d15 100644 --- a/gedcom/gedcom_1byte.lex +++ b/gedcom/gedcom_1byte.lex @@ -214,6 +214,8 @@ ACTION_BEFORE_REGEXPS {normal_at} ACTION_NORMAL_AT +{tab} ACTION_TAB + . ACTION_UNEXPECTED %% diff --git a/gedcom/gedcom_hilo.lex b/gedcom/gedcom_hilo.lex index 91735f1..8d1997e 100644 --- a/gedcom/gedcom_hilo.lex +++ b/gedcom/gedcom_hilo.lex @@ -217,6 +217,8 @@ ACTION_BEFORE_REGEXPS {normal_at} ACTION_NORMAL_AT +{tab} ACTION_TAB + . ACTION_UNEXPECTED %% diff --git a/gedcom/gedcom_lex_common.c b/gedcom/gedcom_lex_common.c index 68db840..149a8a8 100644 --- a/gedcom/gedcom_lex_common.c +++ b/gedcom/gedcom_lex_common.c @@ -35,6 +35,7 @@ static size_t encoding_width; static int current_level = -1; static int level_diff=MAXGEDCLEVEL; static size_t line_len = 0; +static int tab_space = 0; static struct conv_buffer* ptr_buffer = NULL; static struct conv_buffer* tag_buffer = NULL; @@ -136,6 +137,11 @@ static void error_at_character() gedcom_error(_("'@' character should be written as '@@' in values")); } +static void error_tab_character() +{ + gedcom_error(_("Tab character is not allowed in values")); +} + static void error_unexpected_character(const char* str, char ch) { gedcom_error(_("Unexpected character: '%s' (0x%02x)"), str, ch); @@ -164,6 +170,12 @@ static int dummy_conv = 0; } \ } +#define GENERATE_TAB_SPACE \ + { gedcom_lval.string = " "; \ + tab_space--; \ + return DELIM; \ + } + #define MKTAGACTION(THETAG) \ { CHECK_LINE_LEN; \ gedcom_lval.tag.string = TO_INTERNAL(yytext, tag_buffer); \ @@ -206,10 +218,16 @@ static int dummy_conv = 0; But because this means that one token is converted into a series of tokens, there is some initial code following immediately here - that returns "pending" tokens. */ + that returns "pending" tokens. + + Also, for compatibility tabs are converted into spaces, which is + also handled here */ #define ACTION_BEFORE_REGEXPS \ - { if (level_diff < 1) { \ + { if (compat_mode(C_TAB_CHARACTER) && tab_space-- > 0) { \ + GENERATE_TAB_SPACE; \ + } \ + else if (level_diff < 1) { \ level_diff++; \ return CLOSE; \ } \ @@ -383,6 +401,17 @@ static int dummy_conv = 0; } \ } +#define ACTION_TAB \ + { if (compat_mode(C_TAB_CHARACTER)) { \ + tab_space = 8; \ + GENERATE_TAB_SPACE; \ + } \ + else { \ + error_tab_character(); \ + return BADTOKEN; \ + } \ + } + #define ACTION_UNEXPECTED \ { error_unexpected_character(yytext, yytext[0]); \ return BADTOKEN; \ diff --git a/gedcom/gedcom_lohi.lex b/gedcom/gedcom_lohi.lex index 1678875..21f84eb 100644 --- a/gedcom/gedcom_lohi.lex +++ b/gedcom/gedcom_lohi.lex @@ -217,6 +217,8 @@ ACTION_BEFORE_REGEXPS {normal_at} ACTION_NORMAL_AT +{tab} ACTION_TAB + . ACTION_UNEXPECTED %%