Use of gedcom_lex_common.c.
authorPeter Verthez <Peter.Verthez@advalvas.be>
Sat, 1 Dec 2001 17:58:55 +0000 (17:58 +0000)
committerPeter Verthez <Peter.Verthez@advalvas.be>
Sat, 1 Dec 2001 17:58:55 +0000 (17:58 +0000)
gedcom_1byte.lex
gedcom_hilo.lex
gedcom_lohi.lex

index df4559abc7da5b45b3f143700a2b6d674e2689dd..b7c1d16875ee03a531a4365efc03cfa2bc5ff44a 100644 (file)
@@ -53,98 +53,18 @@ int line_no = 1;
 
 %%
 
-    /* The GEDCOM level number is converted into a sequence of opening
-       and closing brackets.  Simply put, the following GEDCOM fragment:
-
-         0 HEAD
-        1 SOUR genes
-        2 VERS 1.6
-        2 NAME Genes
-        1 DATE 07 OCT 2001
-        ...
-        0 TRLR
-
-       is converted into:
-
-         { HEAD                     (initial)  
-        { SOUR genes               (1 higher: no closing brackets)
-        { VERS 1.6                 (1 higher: no closing brackets)
-        } { NAME Genes             (same level: 1 closing bracket)
-        } } { DATE 07 OCT 2001     (1 lower: 2 closing brackets)
-        ...
-        } { TRLR }
-
-       or more clearly:
-
-         { HEAD
-          { SOUR genes
-            { VERS 1.6 }
-            { NAME Genes } }
-          { DATE 07 OCT 2001
-        ... }
-        { TRLR }
-
-       But because this means that one token is converted into a series
-       of tokens, there is some initial code following immediately here
-       that returns "pending" tokens. */
-
 %{
-char string_buf[MAXGEDCLINELEN+1];
-if (level_diff < 1) {
-  level_diff++;
-  return CLOSE;
-}
-else if (level_diff == 1) {
-  level_diff++;
-  gedcom_lval.number = current_level;
-  return OPEN;
-}
-else {
-  /* out of brackets... */
-}
-
-#define TO_INTERNAL(str) to_internal(str, yyleng) 
-
-#define MKTAGACTION(tag) \
-  { gedcom_lval.string = TO_INTERNAL(yytext); \
-    BEGIN(NORMAL); \
-    return TAG_##tag; }
+#include "gedcom_lex_common.c"
 
+ACTION_BEFORE_REGEXPS
+  
 %}
 
 <INITIAL>{gen_delim}* /* ignore leading whitespace (also tabs) */
 
-<INITIAL>0{digit}+ { gedcom_error ("Level number with leading zero");
-                     return BADTOKEN;
-                   }
+<INITIAL>0{digit}+    ACTION_0_DIGITS
 
-<INITIAL>{digit}+ { int level = atoi(TO_INTERNAL(yytext));
-                    if ((level < 0) || (level > MAXGEDCLEVEL)) {
-                     gedcom_error ("Level number out of range [0..%d]",
-                                   MAXGEDCLEVEL);
-                     return BADTOKEN;
-                   }
-                    level_diff = level - current_level;
-                   BEGIN(EXPECT_TAG);
-                   current_level = level;
-                   if (level_diff < 1) {
-                     level_diff++;
-                     return CLOSE;
-                   }
-                   else if (level_diff == 1) {
-                     level_diff++;
-                     gedcom_lval.number = current_level;
-                     return OPEN;
-                   }
-                   else {
-                     /* should never happen (error to GEDCOM spec) */
-                     gedcom_error ("GEDCOM level number is %d higher than "
-                                   "previous",
-                                   level_diff);
-                     return BADTOKEN;
-                   }
-                  }
+<INITIAL>{digit}+     ACTION_DIGITS
 
 <EXPECT_TAG>ABBR  MKTAGACTION(ABBR)
 <EXPECT_TAG>ADDR  MKTAGACTION(ADDR)
@@ -276,63 +196,21 @@ else {
 <EXPECT_TAG>WIFE  MKTAGACTION(WIFE)
 <EXPECT_TAG>WILL  MKTAGACTION(WILL)
      
-<EXPECT_TAG>{alphanum}+ { if (strlen(yytext) > MAXGEDCTAGLEN) {
-                            gedcom_error("Tag '%s' too long, max %d chars");
-                            return BADTOKEN;
-                          }
-                          strncpy(string_buf, yytext, MAXGEDCTAGLEN+1);
-                         gedcom_lval.string = TO_INTERNAL(string_buf);
-                         BEGIN(NORMAL);
-                         return USERTAG;
-                        }
-
-{delim}      { gedcom_lval.string = TO_INTERNAL(yytext);
-               return DELIM;
-             }
-
-{any_but_delim} { gedcom_lval.string = TO_INTERNAL(yytext);
-                  /* Due to character conversions, it is possible
-                    that the current character will be combined with
-                    the next, and so now we don't have a character yet...
-                    This is only applicable to the 1byte case (e.g. ANSEL).
-                 */
-                  if (strlen(gedcom_lval.string) > 0) 
-                    return ANYCHAR;
-                }
+<EXPECT_TAG>{alphanum}+   ACTION_ALPHANUM
 
-{escape}/{non_at}  { gedcom_lval.string = TO_INTERNAL(yytext);
-                     return ESCAPE;
-                   }
+{delim}                   ACTION_DELIM
 
-{pointer}    { gedcom_lval.string = TO_INTERNAL(yytext);
-               return POINTER;
-             }
+{any_but_delim}           ACTION_ANY
 
-   /* Due to the conversion of level numbers into brackets, the
-      terminator is not important, so no token is returned here.
-      Although not strictly according to the GEDCOM spec, we'll ignore
-      whitespace just before the terminator.
-   */
+{escape}/{non_at}         ACTION_ESCAPE
 
-{gen_delim}*{terminator} { line_no++; BEGIN(INITIAL); }
+{pointer}                 ACTION_POINTER
 
-   /* Eventually we have to return 1 closing bracket (for the trailer).
-      We can detect whether we have sent the closing bracket using the
-      level_diff (at eof, first it is 2, then we increment it ourselves) */
+{gen_delim}*{terminator}  ACTION_TERMINATOR
 
-<<EOF>> { if (level_diff == 2) {
-           level_diff++;
-            return CLOSE;
-          }
-          else {
-           yyterminate();
-         }
-        } 
+<<EOF>>                   ACTION_EOF
 
-.  { gedcom_error("Unexpected character: '%s' (0x%02x)",
-                 yytext, yytext[0]);
-     return BADTOKEN;
-   }
+.                         ACTION_UNEXPECTED
 
 %%
 
@@ -357,14 +235,14 @@ int main()
   while (tok) {
     switch(tok) {
       case BADTOKEN: printf("BADTOKEN "); break;
-      case OPEN: printf("OPEN(%d) ", gedcom_lval.number); break;
+      case OPEN: printf("OPEN(%d) ", gedcom_lval.level); break;
       case CLOSE: printf("CLOSE "); break;
       case ESCAPE: printf("ESCAPE(%s) ", gedcom_lval.string); break;
       case DELIM: printf("DELIM "); break;
       case ANYCHAR: printf("%s ", gedcom_lval.string); break;
-      case POINTER: printf("POINTER(%s) ", gedcom_lval.string); break;
-      case USERTAG: printf("USERTAG(%s) ", gedcom_lval.string); break;
-      default: printf("TAG(%s) ", gedcom_lval.string); break;
+      case POINTER: printf("POINTER(%s) ", gedcom_lval.pointer); break;
+      case USERTAG: printf("USERTAG(%s) ", gedcom_lval.tag); break;
+      default: printf("TAG(%s) ", gedcom_lval.tag); break;
     }
     tok = gedcom_1byte_lex();
   }
index e5135dc68111cb8de1073b81a7b0deb52e09d3d2..2a05b311660b5846f83e8110239c7f0020a1a380 100644 (file)
@@ -55,98 +55,18 @@ int line_no = 1;
 
 %%
 
-    /* The GEDCOM level number is converted into a sequence of opening
-       and closing brackets.  Simply put, the following GEDCOM fragment:
-
-         0 HEAD
-        1 SOUR genes
-        2 VERS 1.6
-        2 NAME Genes
-        1 DATE 07 OCT 2001
-        ...
-        0 TRLR
-
-       is converted into:
-
-         { HEAD                     (initial)  
-        { SOUR genes               (1 higher: no closing brackets)
-        { VERS 1.6                 (1 higher: no closing brackets)
-        } { NAME Genes             (same level: 1 closing bracket)
-        } } { DATE 07 OCT 2001     (1 lower: 2 closing brackets)
-        ...
-        } { TRLR }
-
-       or more clearly:
-
-         { HEAD
-          { SOUR genes
-            { VERS 1.6 }
-            { NAME Genes } }
-          { DATE 07 OCT 2001
-        ... }
-        { TRLR }
-
-       But because this means that one token is converted into a series
-       of tokens, there is some initial code following immediately here
-       that returns "pending" tokens. */
-
 %{
-char string_buf[MAXGEDCLINELEN+1];
-if (level_diff < 1) {
-  level_diff++;
-  return CLOSE;
-}
-else if (level_diff == 1) {
-  level_diff++;
-  gedcom_lval.number = current_level;
-  return OPEN;
-}
-else {
-  /* out of brackets... */
-}
-
-#define TO_INTERNAL(str) to_internal(str, yyleng) 
-
-#define MKTAGACTION(tag) \
-  { gedcom_lval.string = TO_INTERNAL(yytext); \
-    BEGIN(NORMAL); \
-    return TAG_##tag; }
+#include "gedcom_lex_common.c"
 
+ACTION_BEFORE_REGEXPS
+  
 %}
 
 <INITIAL>{gen_delim}* /* ignore leading whitespace (also tabs) */
 
-<INITIAL>\x00[0]{digit}+ { gedcom_error ("Level number with leading zero");
-                           return BADTOKEN;
-                         }
+<INITIAL>\x00[0]{digit}+ ACTION_0_DIGITS
 
-<INITIAL>{digit}+ { int level = atoi(TO_INTERNAL(yytext));
-                    if ((level < 0) || (level > MAXGEDCLEVEL)) {
-                     gedcom_error ("Level number out of range [0..%d]",
-                                   MAXGEDCLEVEL);
-                     return BADTOKEN;
-                   }
-                    level_diff = level - current_level;
-                   BEGIN(EXPECT_TAG);
-                   current_level = level;
-                   if (level_diff < 1) {
-                     level_diff++;
-                     return CLOSE;
-                   }
-                   else if (level_diff == 1) {
-                     level_diff++;
-                     gedcom_lval.number = current_level;
-                     return OPEN;
-                   }
-                   else {
-                     /* should never happen (error to GEDCOM spec) */
-                     gedcom_error ("GEDCOM level number is %d higher than "
-                                   "previous",
-                                   level_diff);
-                     return BADTOKEN;
-                   }
-                  }
+<INITIAL>{digit}+        ACTION_DIGITS
 
 <EXPECT_TAG>\x00A\x00B\x00B\x00R  MKTAGACTION(ABBR)
 <EXPECT_TAG>\x00A\x00D\x00D\x00R  MKTAGACTION(ADDR)
@@ -278,57 +198,21 @@ else {
 <EXPECT_TAG>\x00W\x00I\x00F\x00E  MKTAGACTION(WIFE)
 <EXPECT_TAG>\x00W\x00I\x00L\x00L  MKTAGACTION(WILL)
      
-<EXPECT_TAG>{alphanum}+ { if (strlen(yytext) > MAXGEDCTAGLEN) {
-                            gedcom_error("Tag '%s' too long, max %d chars");
-                            return BADTOKEN;
-                          }
-                          strncpy(string_buf, yytext, MAXGEDCTAGLEN+1);
-                         gedcom_lval.string = TO_INTERNAL(string_buf);
-                         BEGIN(NORMAL);
-                         return USERTAG;
-                        }
-
-{delim}      { gedcom_lval.string = TO_INTERNAL(yytext);
-               return DELIM;
-             }
-
-{any_but_delim} { gedcom_lval.string = TO_INTERNAL(yytext);
-                  return ANYCHAR;
-                }
+<EXPECT_TAG>{alphanum}+  ACTION_ALPHANUM
 
-{escape}/{non_at}  { gedcom_lval.string = TO_INTERNAL(yytext);
-                     return ESCAPE;
-                   }
+{delim}                  ACTION_DELIM
 
-{pointer}    { gedcom_lval.string = TO_INTERNAL(yytext);
-               return POINTER;
-             }
+{any_but_delim}          ACTION_ANY
 
-   /* Due to the conversion of level numbers into brackets, the
-      terminator is not important, so no token is returned here.
-      Although not strictly according to the GEDCOM spec, we'll ignore
-      whitespace just before the terminator.
-   */
+{escape}/{non_at}        ACTION_ESCAPE
 
-{gen_delim}*{terminator} { line_no++; BEGIN(INITIAL); }
+{pointer}                ACTION_POINTER
 
-   /* Eventually we have to return 1 closing bracket (for the trailer).
-      We can detect whether we have sent the closing bracket using the
-      level_diff (at eof, first it is 2, then we increment it ourselves) */
+{gen_delim}*{terminator} ACTION_TERMINATOR
 
-<<EOF>> { if (level_diff == 2) {
-           level_diff++;
-            return CLOSE;
-          }
-          else {
-           yyterminate();
-         }
-        } 
+<<EOF>>                  ACTION_EOF
 
-.  { gedcom_error("Unexpected character: '%s' (0x%02x)",
-                 yytext, yytext[0]);
-     return BADTOKEN;
-   }
+.                        ACTION_UNEXPECTED
 
 %%
 
@@ -354,14 +238,14 @@ int main()
   while (tok) {
     switch(tok) {
       case BADTOKEN: printf("BADTOKEN "); break;
-      case OPEN: printf("OPEN(%d) ", gedcom_lval.number); break;
+      case OPEN: printf("OPEN(%d) ", gedcom_lval.level); break;
       case CLOSE: printf("CLOSE "); break;
       case ESCAPE: printf("ESCAPE(%s) ", gedcom_lval.string); break;
       case DELIM: printf("DELIM "); break;
       case ANYCHAR: printf("%s ", gedcom_lval.string); break;
-      case POINTER: printf("POINTER(%s) ", gedcom_lval.string); break;
-      case USERTAG: printf("USERTAG(%s) ", gedcom_lval.string); break;
-      default: printf("TAG(%s) ", gedcom_lval.string); break;
+      case POINTER: printf("POINTER(%s) ", gedcom_lval.pointer); break;
+      case USERTAG: printf("USERTAG(%s) ", gedcom_lval.tag); break;
+      default: printf("TAG(%s) ", gedcom_lval.tag); break;
     }
     tok = gedcom_hilo_lex();
   }
index 9b76ac9f19c94e6f618f6867275e67e904ab597f..e91d4f0693d1a40738ad6c1b2cb81f9f38c4548a 100644 (file)
@@ -55,98 +55,18 @@ int line_no = 1;
 
 %%
 
-    /* The GEDCOM level number is converted into a sequence of opening
-       and closing brackets.  Simply put, the following GEDCOM fragment:
-
-         0 HEAD
-        1 SOUR genes
-        2 VERS 1.6
-        2 NAME Genes
-        1 DATE 07 OCT 2001
-        ...
-        0 TRLR
-
-       is converted into:
-
-         { HEAD                     (initial)  
-        { SOUR genes               (1 higher: no closing brackets)
-        { VERS 1.6                 (1 higher: no closing brackets)
-        } { NAME Genes             (same level: 1 closing bracket)
-        } } { DATE 07 OCT 2001     (1 lower: 2 closing brackets)
-        ...
-        } { TRLR }
-
-       or more clearly:
-
-         { HEAD
-          { SOUR genes
-            { VERS 1.6 }
-            { NAME Genes } }
-          { DATE 07 OCT 2001
-        ... }
-        { TRLR }
-
-       But because this means that one token is converted into a series
-       of tokens, there is some initial code following immediately here
-       that returns "pending" tokens. */
-
 %{
-char string_buf[MAXGEDCLINELEN+1];
-if (level_diff < 1) {
-  level_diff++;
-  return CLOSE;
-}
-else if (level_diff == 1) {
-  level_diff++;
-  gedcom_lval.number = current_level;
-  return OPEN;
-}
-else {
-  /* out of brackets... */
-}
-
-#define TO_INTERNAL(str) to_internal(str, yyleng) 
-
-#define MKTAGACTION(tag) \
-  { gedcom_lval.string = TO_INTERNAL(yytext); \
-    BEGIN(NORMAL); \
-    return TAG_##tag; }
+#include "gedcom_lex_common.c"
 
+ACTION_BEFORE_REGEXPS
+  
 %}
 
 <INITIAL>{gen_delim}* /* ignore leading whitespace (also tabs) */
 
-<INITIAL>\x00[0]{digit}+ { gedcom_error ("Level number with leading zero");
-                           return BADTOKEN;
-                         }
+<INITIAL>\x00[0]{digit}+  ACTION_0_DIGITS
 
-<INITIAL>{digit}+ { int level = atoi(TO_INTERNAL(yytext));
-                    if ((level < 0) || (level > MAXGEDCLEVEL)) {
-                     gedcom_error ("Level number out of range [0..%d]",
-                                   MAXGEDCLEVEL);
-                     return BADTOKEN;
-                   }
-                    level_diff = level - current_level;
-                   BEGIN(EXPECT_TAG);
-                   current_level = level;
-                   if (level_diff < 1) {
-                     level_diff++;
-                     return CLOSE;
-                   }
-                   else if (level_diff == 1) {
-                     level_diff++;
-                     gedcom_lval.number = current_level;
-                     return OPEN;
-                   }
-                   else {
-                     /* should never happen (error to GEDCOM spec) */
-                     gedcom_error ("GEDCOM level number is %d higher than "
-                                   "previous",
-                                   level_diff);
-                     return BADTOKEN;
-                   }
-                  }
+<INITIAL>{digit}+         ACTION_DIGITS
 
 <EXPECT_TAG>A\x00B\x00B\x00R\x00  MKTAGACTION(ABBR)
 <EXPECT_TAG>A\x00D\x00D\x00R\x00  MKTAGACTION(ADDR)
@@ -278,57 +198,21 @@ else {
 <EXPECT_TAG>W\x00I\x00F\x00E\x00  MKTAGACTION(WIFE)
 <EXPECT_TAG>W\x00I\x00L\x00L\x00  MKTAGACTION(WILL)
      
-<EXPECT_TAG>{alphanum}+ { if (strlen(yytext) > MAXGEDCTAGLEN) {
-                            gedcom_error("Tag '%s' too long, max %d chars");
-                            return BADTOKEN;
-                          }
-                          strncpy(string_buf, yytext, MAXGEDCTAGLEN+1);
-                         gedcom_lval.string = TO_INTERNAL(string_buf);
-                         BEGIN(NORMAL);
-                         return USERTAG;
-                        }
-
-{delim}      { gedcom_lval.string = TO_INTERNAL(yytext);
-               return DELIM;
-             }
-
-{any_but_delim} { gedcom_lval.string = TO_INTERNAL(yytext);
-                  return ANYCHAR;
-                }
+<EXPECT_TAG>{alphanum}+  ACTION_ALPHANUM
 
-{escape}/{non_at}  { gedcom_lval.string = TO_INTERNAL(yytext);
-                     return ESCAPE;
-                   }
+{delim}                  ACTION_DELIM
 
-{pointer}    { gedcom_lval.string = TO_INTERNAL(yytext);
-               return POINTER;
-             }
+{any_but_delim}          ACTION_ANY
 
-   /* Due to the conversion of level numbers into brackets, the
-      terminator is not important, so no token is returned here.
-      Although not strictly according to the GEDCOM spec, we'll ignore
-      whitespace just before the terminator.
-   */
+{escape}/{non_at}        ACTION_ESCAPE
 
-{gen_delim}*{terminator} { line_no++; BEGIN(INITIAL); }
+{pointer}                ACTION_POINTER
 
-   /* Eventually we have to return 1 closing bracket (for the trailer).
-      We can detect whether we have sent the closing bracket using the
-      level_diff (at eof, first it is 2, then we increment it ourselves) */
+{gen_delim}*{terminator} ACTION_TERMINATOR
 
-<<EOF>> { if (level_diff == 2) {
-           level_diff++;
-            return CLOSE;
-          }
-          else {
-           yyterminate();
-         }
-        } 
+<<EOF>>                  ACTION_EOF
 
-.  { gedcom_error("Unexpected character: '%s' (0x%02x)",
-                 yytext, yytext[0]);
-     return BADTOKEN;
-   }
+.                        ACTION_UNEXPECTED
 
 %%
 
@@ -354,14 +238,14 @@ int main()
   while (tok) {
     switch(tok) {
       case BADTOKEN: printf("BADTOKEN "); break;
-      case OPEN: printf("OPEN(%d) ", gedcom_lval.number); break;
+      case OPEN: printf("OPEN(%d) ", gedcom_lval.level); break;
       case CLOSE: printf("CLOSE "); break;
       case ESCAPE: printf("ESCAPE(%s) ", gedcom_lval.string); break;
       case DELIM: printf("DELIM "); break;
       case ANYCHAR: printf("%s ", gedcom_lval.string); break;
-      case POINTER: printf("POINTER(%s) ", gedcom_lval.string); break;
-      case USERTAG: printf("USERTAG(%s) ", gedcom_lval.string); break;
-      default: printf("TAG(%s) ", gedcom_lval.string); break;
+      case POINTER: printf("POINTER(%s) ", gedcom_lval.pointer); break;
+      case USERTAG: printf("USERTAG(%s) ", gedcom_lval.tag); break;
+      default: printf("TAG(%s) ", gedcom_lval.tag); break;
     }
     tok = gedcom_lohi_lex();
   }