From b4557a98f5842835e9e65370fd062b6d65af0b14 Mon Sep 17 00:00:00 2001 From: Peter Verthez Date: Sat, 1 Dec 2001 17:58:55 +0000 Subject: [PATCH] Use of gedcom_lex_common.c. --- gedcom_1byte.lex | 156 ++++++----------------------------------------- gedcom_hilo.lex | 150 ++++++--------------------------------------- gedcom_lohi.lex | 150 ++++++--------------------------------------- 3 files changed, 51 insertions(+), 405 deletions(-) diff --git a/gedcom_1byte.lex b/gedcom_1byte.lex index df4559a..b7c1d16 100644 --- a/gedcom_1byte.lex +++ b/gedcom_1byte.lex @@ -53,98 +53,18 @@ int line_no = 1; %% - /* The GEDCOM level number is converted into a sequence of opening - and closing brackets. Simply put, the following GEDCOM fragment: - - 0 HEAD - 1 SOUR genes - 2 VERS 1.6 - 2 NAME Genes - 1 DATE 07 OCT 2001 - ... - 0 TRLR - - is converted into: - - { HEAD (initial) - { SOUR genes (1 higher: no closing brackets) - { VERS 1.6 (1 higher: no closing brackets) - } { NAME Genes (same level: 1 closing bracket) - } } { DATE 07 OCT 2001 (1 lower: 2 closing brackets) - ... - } { TRLR } - - or more clearly: - - { HEAD - { SOUR genes - { VERS 1.6 } - { NAME Genes } } - { DATE 07 OCT 2001 - ... } - { TRLR } - - But because this means that one token is converted into a series - of tokens, there is some initial code following immediately here - that returns "pending" tokens. */ - %{ -char string_buf[MAXGEDCLINELEN+1]; - -if (level_diff < 1) { - level_diff++; - return CLOSE; -} -else if (level_diff == 1) { - level_diff++; - gedcom_lval.number = current_level; - return OPEN; -} -else { - /* out of brackets... */ -} - -#define TO_INTERNAL(str) to_internal(str, yyleng) - -#define MKTAGACTION(tag) \ - { gedcom_lval.string = TO_INTERNAL(yytext); \ - BEGIN(NORMAL); \ - return TAG_##tag; } +#include "gedcom_lex_common.c" +ACTION_BEFORE_REGEXPS + %} {gen_delim}* /* ignore leading whitespace (also tabs) */ -0{digit}+ { gedcom_error ("Level number with leading zero"); - return BADTOKEN; - } +0{digit}+ ACTION_0_DIGITS -{digit}+ { int level = atoi(TO_INTERNAL(yytext)); - if ((level < 0) || (level > MAXGEDCLEVEL)) { - gedcom_error ("Level number out of range [0..%d]", - MAXGEDCLEVEL); - return BADTOKEN; - } - level_diff = level - current_level; - BEGIN(EXPECT_TAG); - current_level = level; - if (level_diff < 1) { - level_diff++; - return CLOSE; - } - else if (level_diff == 1) { - level_diff++; - gedcom_lval.number = current_level; - return OPEN; - } - else { - /* should never happen (error to GEDCOM spec) */ - gedcom_error ("GEDCOM level number is %d higher than " - "previous", - level_diff); - return BADTOKEN; - } - } +{digit}+ ACTION_DIGITS ABBR MKTAGACTION(ABBR) ADDR MKTAGACTION(ADDR) @@ -276,63 +196,21 @@ else { WIFE MKTAGACTION(WIFE) WILL MKTAGACTION(WILL) -{alphanum}+ { if (strlen(yytext) > MAXGEDCTAGLEN) { - gedcom_error("Tag '%s' too long, max %d chars"); - return BADTOKEN; - } - strncpy(string_buf, yytext, MAXGEDCTAGLEN+1); - gedcom_lval.string = TO_INTERNAL(string_buf); - BEGIN(NORMAL); - return USERTAG; - } - -{delim} { gedcom_lval.string = TO_INTERNAL(yytext); - return DELIM; - } - -{any_but_delim} { gedcom_lval.string = TO_INTERNAL(yytext); - /* Due to character conversions, it is possible - that the current character will be combined with - the next, and so now we don't have a character yet... - This is only applicable to the 1byte case (e.g. ANSEL). - */ - if (strlen(gedcom_lval.string) > 0) - return ANYCHAR; - } +{alphanum}+ ACTION_ALPHANUM -{escape}/{non_at} { gedcom_lval.string = TO_INTERNAL(yytext); - return ESCAPE; - } +{delim} ACTION_DELIM -{pointer} { gedcom_lval.string = TO_INTERNAL(yytext); - return POINTER; - } +{any_but_delim} ACTION_ANY - /* Due to the conversion of level numbers into brackets, the - terminator is not important, so no token is returned here. - Although not strictly according to the GEDCOM spec, we'll ignore - whitespace just before the terminator. - */ +{escape}/{non_at} ACTION_ESCAPE -{gen_delim}*{terminator} { line_no++; BEGIN(INITIAL); } +{pointer} ACTION_POINTER - /* Eventually we have to return 1 closing bracket (for the trailer). - We can detect whether we have sent the closing bracket using the - level_diff (at eof, first it is 2, then we increment it ourselves) */ +{gen_delim}*{terminator} ACTION_TERMINATOR -<> { if (level_diff == 2) { - level_diff++; - return CLOSE; - } - else { - yyterminate(); - } - } +<> ACTION_EOF -. { gedcom_error("Unexpected character: '%s' (0x%02x)", - yytext, yytext[0]); - return BADTOKEN; - } +. ACTION_UNEXPECTED %% @@ -357,14 +235,14 @@ int main() while (tok) { switch(tok) { case BADTOKEN: printf("BADTOKEN "); break; - case OPEN: printf("OPEN(%d) ", gedcom_lval.number); break; + case OPEN: printf("OPEN(%d) ", gedcom_lval.level); break; case CLOSE: printf("CLOSE "); break; case ESCAPE: printf("ESCAPE(%s) ", gedcom_lval.string); break; case DELIM: printf("DELIM "); break; case ANYCHAR: printf("%s ", gedcom_lval.string); break; - case POINTER: printf("POINTER(%s) ", gedcom_lval.string); break; - case USERTAG: printf("USERTAG(%s) ", gedcom_lval.string); break; - default: printf("TAG(%s) ", gedcom_lval.string); break; + case POINTER: printf("POINTER(%s) ", gedcom_lval.pointer); break; + case USERTAG: printf("USERTAG(%s) ", gedcom_lval.tag); break; + default: printf("TAG(%s) ", gedcom_lval.tag); break; } tok = gedcom_1byte_lex(); } diff --git a/gedcom_hilo.lex b/gedcom_hilo.lex index e5135dc..2a05b31 100644 --- a/gedcom_hilo.lex +++ b/gedcom_hilo.lex @@ -55,98 +55,18 @@ int line_no = 1; %% - /* The GEDCOM level number is converted into a sequence of opening - and closing brackets. Simply put, the following GEDCOM fragment: - - 0 HEAD - 1 SOUR genes - 2 VERS 1.6 - 2 NAME Genes - 1 DATE 07 OCT 2001 - ... - 0 TRLR - - is converted into: - - { HEAD (initial) - { SOUR genes (1 higher: no closing brackets) - { VERS 1.6 (1 higher: no closing brackets) - } { NAME Genes (same level: 1 closing bracket) - } } { DATE 07 OCT 2001 (1 lower: 2 closing brackets) - ... - } { TRLR } - - or more clearly: - - { HEAD - { SOUR genes - { VERS 1.6 } - { NAME Genes } } - { DATE 07 OCT 2001 - ... } - { TRLR } - - But because this means that one token is converted into a series - of tokens, there is some initial code following immediately here - that returns "pending" tokens. */ - %{ -char string_buf[MAXGEDCLINELEN+1]; - -if (level_diff < 1) { - level_diff++; - return CLOSE; -} -else if (level_diff == 1) { - level_diff++; - gedcom_lval.number = current_level; - return OPEN; -} -else { - /* out of brackets... */ -} - -#define TO_INTERNAL(str) to_internal(str, yyleng) - -#define MKTAGACTION(tag) \ - { gedcom_lval.string = TO_INTERNAL(yytext); \ - BEGIN(NORMAL); \ - return TAG_##tag; } +#include "gedcom_lex_common.c" +ACTION_BEFORE_REGEXPS + %} {gen_delim}* /* ignore leading whitespace (also tabs) */ -\x00[0]{digit}+ { gedcom_error ("Level number with leading zero"); - return BADTOKEN; - } +\x00[0]{digit}+ ACTION_0_DIGITS -{digit}+ { int level = atoi(TO_INTERNAL(yytext)); - if ((level < 0) || (level > MAXGEDCLEVEL)) { - gedcom_error ("Level number out of range [0..%d]", - MAXGEDCLEVEL); - return BADTOKEN; - } - level_diff = level - current_level; - BEGIN(EXPECT_TAG); - current_level = level; - if (level_diff < 1) { - level_diff++; - return CLOSE; - } - else if (level_diff == 1) { - level_diff++; - gedcom_lval.number = current_level; - return OPEN; - } - else { - /* should never happen (error to GEDCOM spec) */ - gedcom_error ("GEDCOM level number is %d higher than " - "previous", - level_diff); - return BADTOKEN; - } - } +{digit}+ ACTION_DIGITS \x00A\x00B\x00B\x00R MKTAGACTION(ABBR) \x00A\x00D\x00D\x00R MKTAGACTION(ADDR) @@ -278,57 +198,21 @@ else { \x00W\x00I\x00F\x00E MKTAGACTION(WIFE) \x00W\x00I\x00L\x00L MKTAGACTION(WILL) -{alphanum}+ { if (strlen(yytext) > MAXGEDCTAGLEN) { - gedcom_error("Tag '%s' too long, max %d chars"); - return BADTOKEN; - } - strncpy(string_buf, yytext, MAXGEDCTAGLEN+1); - gedcom_lval.string = TO_INTERNAL(string_buf); - BEGIN(NORMAL); - return USERTAG; - } - -{delim} { gedcom_lval.string = TO_INTERNAL(yytext); - return DELIM; - } - -{any_but_delim} { gedcom_lval.string = TO_INTERNAL(yytext); - return ANYCHAR; - } +{alphanum}+ ACTION_ALPHANUM -{escape}/{non_at} { gedcom_lval.string = TO_INTERNAL(yytext); - return ESCAPE; - } +{delim} ACTION_DELIM -{pointer} { gedcom_lval.string = TO_INTERNAL(yytext); - return POINTER; - } +{any_but_delim} ACTION_ANY - /* Due to the conversion of level numbers into brackets, the - terminator is not important, so no token is returned here. - Although not strictly according to the GEDCOM spec, we'll ignore - whitespace just before the terminator. - */ +{escape}/{non_at} ACTION_ESCAPE -{gen_delim}*{terminator} { line_no++; BEGIN(INITIAL); } +{pointer} ACTION_POINTER - /* Eventually we have to return 1 closing bracket (for the trailer). - We can detect whether we have sent the closing bracket using the - level_diff (at eof, first it is 2, then we increment it ourselves) */ +{gen_delim}*{terminator} ACTION_TERMINATOR -<> { if (level_diff == 2) { - level_diff++; - return CLOSE; - } - else { - yyterminate(); - } - } +<> ACTION_EOF -. { gedcom_error("Unexpected character: '%s' (0x%02x)", - yytext, yytext[0]); - return BADTOKEN; - } +. ACTION_UNEXPECTED %% @@ -354,14 +238,14 @@ int main() while (tok) { switch(tok) { case BADTOKEN: printf("BADTOKEN "); break; - case OPEN: printf("OPEN(%d) ", gedcom_lval.number); break; + case OPEN: printf("OPEN(%d) ", gedcom_lval.level); break; case CLOSE: printf("CLOSE "); break; case ESCAPE: printf("ESCAPE(%s) ", gedcom_lval.string); break; case DELIM: printf("DELIM "); break; case ANYCHAR: printf("%s ", gedcom_lval.string); break; - case POINTER: printf("POINTER(%s) ", gedcom_lval.string); break; - case USERTAG: printf("USERTAG(%s) ", gedcom_lval.string); break; - default: printf("TAG(%s) ", gedcom_lval.string); break; + case POINTER: printf("POINTER(%s) ", gedcom_lval.pointer); break; + case USERTAG: printf("USERTAG(%s) ", gedcom_lval.tag); break; + default: printf("TAG(%s) ", gedcom_lval.tag); break; } tok = gedcom_hilo_lex(); } diff --git a/gedcom_lohi.lex b/gedcom_lohi.lex index 9b76ac9..e91d4f0 100644 --- a/gedcom_lohi.lex +++ b/gedcom_lohi.lex @@ -55,98 +55,18 @@ int line_no = 1; %% - /* The GEDCOM level number is converted into a sequence of opening - and closing brackets. Simply put, the following GEDCOM fragment: - - 0 HEAD - 1 SOUR genes - 2 VERS 1.6 - 2 NAME Genes - 1 DATE 07 OCT 2001 - ... - 0 TRLR - - is converted into: - - { HEAD (initial) - { SOUR genes (1 higher: no closing brackets) - { VERS 1.6 (1 higher: no closing brackets) - } { NAME Genes (same level: 1 closing bracket) - } } { DATE 07 OCT 2001 (1 lower: 2 closing brackets) - ... - } { TRLR } - - or more clearly: - - { HEAD - { SOUR genes - { VERS 1.6 } - { NAME Genes } } - { DATE 07 OCT 2001 - ... } - { TRLR } - - But because this means that one token is converted into a series - of tokens, there is some initial code following immediately here - that returns "pending" tokens. */ - %{ -char string_buf[MAXGEDCLINELEN+1]; - -if (level_diff < 1) { - level_diff++; - return CLOSE; -} -else if (level_diff == 1) { - level_diff++; - gedcom_lval.number = current_level; - return OPEN; -} -else { - /* out of brackets... */ -} - -#define TO_INTERNAL(str) to_internal(str, yyleng) - -#define MKTAGACTION(tag) \ - { gedcom_lval.string = TO_INTERNAL(yytext); \ - BEGIN(NORMAL); \ - return TAG_##tag; } +#include "gedcom_lex_common.c" +ACTION_BEFORE_REGEXPS + %} {gen_delim}* /* ignore leading whitespace (also tabs) */ -\x00[0]{digit}+ { gedcom_error ("Level number with leading zero"); - return BADTOKEN; - } +\x00[0]{digit}+ ACTION_0_DIGITS -{digit}+ { int level = atoi(TO_INTERNAL(yytext)); - if ((level < 0) || (level > MAXGEDCLEVEL)) { - gedcom_error ("Level number out of range [0..%d]", - MAXGEDCLEVEL); - return BADTOKEN; - } - level_diff = level - current_level; - BEGIN(EXPECT_TAG); - current_level = level; - if (level_diff < 1) { - level_diff++; - return CLOSE; - } - else if (level_diff == 1) { - level_diff++; - gedcom_lval.number = current_level; - return OPEN; - } - else { - /* should never happen (error to GEDCOM spec) */ - gedcom_error ("GEDCOM level number is %d higher than " - "previous", - level_diff); - return BADTOKEN; - } - } +{digit}+ ACTION_DIGITS A\x00B\x00B\x00R\x00 MKTAGACTION(ABBR) A\x00D\x00D\x00R\x00 MKTAGACTION(ADDR) @@ -278,57 +198,21 @@ else { W\x00I\x00F\x00E\x00 MKTAGACTION(WIFE) W\x00I\x00L\x00L\x00 MKTAGACTION(WILL) -{alphanum}+ { if (strlen(yytext) > MAXGEDCTAGLEN) { - gedcom_error("Tag '%s' too long, max %d chars"); - return BADTOKEN; - } - strncpy(string_buf, yytext, MAXGEDCTAGLEN+1); - gedcom_lval.string = TO_INTERNAL(string_buf); - BEGIN(NORMAL); - return USERTAG; - } - -{delim} { gedcom_lval.string = TO_INTERNAL(yytext); - return DELIM; - } - -{any_but_delim} { gedcom_lval.string = TO_INTERNAL(yytext); - return ANYCHAR; - } +{alphanum}+ ACTION_ALPHANUM -{escape}/{non_at} { gedcom_lval.string = TO_INTERNAL(yytext); - return ESCAPE; - } +{delim} ACTION_DELIM -{pointer} { gedcom_lval.string = TO_INTERNAL(yytext); - return POINTER; - } +{any_but_delim} ACTION_ANY - /* Due to the conversion of level numbers into brackets, the - terminator is not important, so no token is returned here. - Although not strictly according to the GEDCOM spec, we'll ignore - whitespace just before the terminator. - */ +{escape}/{non_at} ACTION_ESCAPE -{gen_delim}*{terminator} { line_no++; BEGIN(INITIAL); } +{pointer} ACTION_POINTER - /* Eventually we have to return 1 closing bracket (for the trailer). - We can detect whether we have sent the closing bracket using the - level_diff (at eof, first it is 2, then we increment it ourselves) */ +{gen_delim}*{terminator} ACTION_TERMINATOR -<> { if (level_diff == 2) { - level_diff++; - return CLOSE; - } - else { - yyterminate(); - } - } +<> ACTION_EOF -. { gedcom_error("Unexpected character: '%s' (0x%02x)", - yytext, yytext[0]); - return BADTOKEN; - } +. ACTION_UNEXPECTED %% @@ -354,14 +238,14 @@ int main() while (tok) { switch(tok) { case BADTOKEN: printf("BADTOKEN "); break; - case OPEN: printf("OPEN(%d) ", gedcom_lval.number); break; + case OPEN: printf("OPEN(%d) ", gedcom_lval.level); break; case CLOSE: printf("CLOSE "); break; case ESCAPE: printf("ESCAPE(%s) ", gedcom_lval.string); break; case DELIM: printf("DELIM "); break; case ANYCHAR: printf("%s ", gedcom_lval.string); break; - case POINTER: printf("POINTER(%s) ", gedcom_lval.string); break; - case USERTAG: printf("USERTAG(%s) ", gedcom_lval.string); break; - default: printf("TAG(%s) ", gedcom_lval.string); break; + case POINTER: printf("POINTER(%s) ", gedcom_lval.pointer); break; + case USERTAG: printf("USERTAG(%s) ", gedcom_lval.tag); break; + default: printf("TAG(%s) ", gedcom_lval.tag); break; } tok = gedcom_lohi_lex(); } -- 2.30.2