X-Git-Url: https://git.dlugolecki.net.pl/?a=blobdiff_plain;f=gedcom_hilo.lex;h=2a05b311660b5846f83e8110239c7f0020a1a380;hb=b4557a98f5842835e9e65370fd062b6d65af0b14;hp=e5135dc68111cb8de1073b81a7b0deb52e09d3d2;hpb=0c341b31ca4de86b12b8b98f724b4f746e98a5d2;p=gedcom-parse.git diff --git a/gedcom_hilo.lex b/gedcom_hilo.lex index e5135dc..2a05b31 100644 --- a/gedcom_hilo.lex +++ b/gedcom_hilo.lex @@ -55,98 +55,18 @@ int line_no = 1; %% - /* The GEDCOM level number is converted into a sequence of opening - and closing brackets. Simply put, the following GEDCOM fragment: - - 0 HEAD - 1 SOUR genes - 2 VERS 1.6 - 2 NAME Genes - 1 DATE 07 OCT 2001 - ... - 0 TRLR - - is converted into: - - { HEAD (initial) - { SOUR genes (1 higher: no closing brackets) - { VERS 1.6 (1 higher: no closing brackets) - } { NAME Genes (same level: 1 closing bracket) - } } { DATE 07 OCT 2001 (1 lower: 2 closing brackets) - ... - } { TRLR } - - or more clearly: - - { HEAD - { SOUR genes - { VERS 1.6 } - { NAME Genes } } - { DATE 07 OCT 2001 - ... } - { TRLR } - - But because this means that one token is converted into a series - of tokens, there is some initial code following immediately here - that returns "pending" tokens. */ - %{ -char string_buf[MAXGEDCLINELEN+1]; - -if (level_diff < 1) { - level_diff++; - return CLOSE; -} -else if (level_diff == 1) { - level_diff++; - gedcom_lval.number = current_level; - return OPEN; -} -else { - /* out of brackets... */ -} - -#define TO_INTERNAL(str) to_internal(str, yyleng) - -#define MKTAGACTION(tag) \ - { gedcom_lval.string = TO_INTERNAL(yytext); \ - BEGIN(NORMAL); \ - return TAG_##tag; } +#include "gedcom_lex_common.c" +ACTION_BEFORE_REGEXPS + %} {gen_delim}* /* ignore leading whitespace (also tabs) */ -\x00[0]{digit}+ { gedcom_error ("Level number with leading zero"); - return BADTOKEN; - } +\x00[0]{digit}+ ACTION_0_DIGITS -{digit}+ { int level = atoi(TO_INTERNAL(yytext)); - if ((level < 0) || (level > MAXGEDCLEVEL)) { - gedcom_error ("Level number out of range [0..%d]", - MAXGEDCLEVEL); - return BADTOKEN; - } - level_diff = level - current_level; - BEGIN(EXPECT_TAG); - current_level = level; - if (level_diff < 1) { - level_diff++; - return CLOSE; - } - else if (level_diff == 1) { - level_diff++; - gedcom_lval.number = current_level; - return OPEN; - } - else { - /* should never happen (error to GEDCOM spec) */ - gedcom_error ("GEDCOM level number is %d higher than " - "previous", - level_diff); - return BADTOKEN; - } - } +{digit}+ ACTION_DIGITS \x00A\x00B\x00B\x00R MKTAGACTION(ABBR) \x00A\x00D\x00D\x00R MKTAGACTION(ADDR) @@ -278,57 +198,21 @@ else { \x00W\x00I\x00F\x00E MKTAGACTION(WIFE) \x00W\x00I\x00L\x00L MKTAGACTION(WILL) -{alphanum}+ { if (strlen(yytext) > MAXGEDCTAGLEN) { - gedcom_error("Tag '%s' too long, max %d chars"); - return BADTOKEN; - } - strncpy(string_buf, yytext, MAXGEDCTAGLEN+1); - gedcom_lval.string = TO_INTERNAL(string_buf); - BEGIN(NORMAL); - return USERTAG; - } - -{delim} { gedcom_lval.string = TO_INTERNAL(yytext); - return DELIM; - } - -{any_but_delim} { gedcom_lval.string = TO_INTERNAL(yytext); - return ANYCHAR; - } +{alphanum}+ ACTION_ALPHANUM -{escape}/{non_at} { gedcom_lval.string = TO_INTERNAL(yytext); - return ESCAPE; - } +{delim} ACTION_DELIM -{pointer} { gedcom_lval.string = TO_INTERNAL(yytext); - return POINTER; - } +{any_but_delim} ACTION_ANY - /* Due to the conversion of level numbers into brackets, the - terminator is not important, so no token is returned here. - Although not strictly according to the GEDCOM spec, we'll ignore - whitespace just before the terminator. - */ +{escape}/{non_at} ACTION_ESCAPE -{gen_delim}*{terminator} { line_no++; BEGIN(INITIAL); } +{pointer} ACTION_POINTER - /* Eventually we have to return 1 closing bracket (for the trailer). - We can detect whether we have sent the closing bracket using the - level_diff (at eof, first it is 2, then we increment it ourselves) */ +{gen_delim}*{terminator} ACTION_TERMINATOR -<> { if (level_diff == 2) { - level_diff++; - return CLOSE; - } - else { - yyterminate(); - } - } +<> ACTION_EOF -. { gedcom_error("Unexpected character: '%s' (0x%02x)", - yytext, yytext[0]); - return BADTOKEN; - } +. ACTION_UNEXPECTED %% @@ -354,14 +238,14 @@ int main() while (tok) { switch(tok) { case BADTOKEN: printf("BADTOKEN "); break; - case OPEN: printf("OPEN(%d) ", gedcom_lval.number); break; + case OPEN: printf("OPEN(%d) ", gedcom_lval.level); break; case CLOSE: printf("CLOSE "); break; case ESCAPE: printf("ESCAPE(%s) ", gedcom_lval.string); break; case DELIM: printf("DELIM "); break; case ANYCHAR: printf("%s ", gedcom_lval.string); break; - case POINTER: printf("POINTER(%s) ", gedcom_lval.string); break; - case USERTAG: printf("USERTAG(%s) ", gedcom_lval.string); break; - default: printf("TAG(%s) ", gedcom_lval.string); break; + case POINTER: printf("POINTER(%s) ", gedcom_lval.pointer); break; + case USERTAG: printf("USERTAG(%s) ", gedcom_lval.tag); break; + default: printf("TAG(%s) ", gedcom_lval.tag); break; } tok = gedcom_hilo_lex(); }