Release 0.9
[gedcom-parse.git] / gedcom.y
index 5416d50b67527e7193bbedb9159ee81dd5f9e86e..45dc37490c0613a53d0e934884f86fc2d994d529 100644 (file)
--- a/gedcom.y
+++ b/gedcom.y
 
 /* General notes:
 
-   - The syntax analysis doesn't handle the contents of the line values
-     or their encoding; this is done in the semantic analysis.
+   - The syntax analysis doesn't handle the contents of the line values;
+     this is done in the semantic analysis.
 
  */
 
@@ -139,9 +139,10 @@ int  fail           = 0;
 int  compat_enabled = 1;
 int  gedcom_high_level_debug = 0; 
 int  compatibility  = 0; 
-MECHANISM error_mechanism=IMMED_FAIL;
-char string_buf[MAXGEDCLINELEN*4+1];
-char *string_buf_ptr;
+MECHANISM error_mechanism = IMMED_FAIL;
+char line_item_buf[MAXGEDCLINELEN * UTF_FACTOR + 1];
+char *line_item_buf_ptr;
 
 enum _COMPAT {
   C_FTREE = 0x01
@@ -157,64 +158,64 @@ int  check_occurrence(int tag);
 void set_compatibility(char* program);
 int  compat_mode(int flags); 
 
-#define CLEAR_BUFFER(BUF) { memset(BUF, 0, sizeof(BUF)); } 
+#define CLEAR_BUFFER(BUF)                                                     \
+     memset(BUF, 0, sizeof(BUF));
  
-#define HANDLE_ERROR \
-     { \
-       if (error_mechanism == IMMED_FAIL) { \
-        YYABORT; \
-       } \
-       else if (error_mechanism == DEFER_FAIL) { \
-         yyerrok; fail = 1; \
-       } \
-       else if (error_mechanism == IGNORE_ERRORS) { \
-        yyerrok; \
-       } \
+#define HANDLE_ERROR                                                          \
+     { if (error_mechanism == IMMED_FAIL) {                                   \
+        YYABORT;                                                             \
+       }                                                                      \
+       else if (error_mechanism == DEFER_FAIL) {                              \
+         yyerrok; fail = 1;                                                   \
+       }                                                                      \
+       else if (error_mechanism == IGNORE_ERRORS) {                           \
+        yyerrok;                                                             \
+       }                                                                      \
      }
-#define START(PARENTTAG) \
-     { ++count_level; \
-       set_parenttag(#PARENTTAG); \
-       push_countarray(); \
+#define START(PARENTTAG)                                                      \
+     { ++count_level;                                                         \
+       set_parenttag(#PARENTTAG);                                             \
+       push_countarray();                                                     \
      }
-#define CHK(TAG) \
-     { if (!check_occurrence(TAG_##TAG)) { \
-         char* parenttag = get_parenttag(); \
-         gedcom_error("The tag '%s' is mandatory within '%s', but missing", \
-                     #TAG, parenttag); \
-         HANDLE_ERROR; \
-       } \
+#define CHK(TAG)                                                              \
+     { if (!check_occurrence(TAG_##TAG)) {                                    \
+         char* parenttag = get_parenttag();                                   \
+         gedcom_error("The tag '%s' is mandatory within '%s', but missing",   \
+                     #TAG, parenttag);                                       \
+         HANDLE_ERROR;                                                        \
+       }                                                                      \
      }
-#define POP \
-     { pop_countarray(); \
-       --count_level; \
+#define POP                                                                   \
+     { pop_countarray();                                                      \
+       --count_level;                                                         \
      }
 #define CHECK0 POP; 
 #define CHECK1(TAG1) { CHK(TAG1); POP; }
-#define CHECK2(TAG1,TAG2) \
+#define CHECK2(TAG1,TAG2)                                                     \
      { CHK(TAG1); CHK(TAG2); POP; }
-#define CHECK3(TAG1,TAG2,TAG3) \
+#define CHECK3(TAG1,TAG2,TAG3)                                                \
      { CHK(TAG1); CHK(TAG2); CHK(TAG3); POP; }
-#define CHECK4(TAG1,TAG2,TAG3,TAG4) \
+#define CHECK4(TAG1,TAG2,TAG3,TAG4)                                           \
      { CHK(TAG1); CHK(TAG2); CHK(TAG3); CHK(TAG4); POP; } 
 #define OCCUR1(CHILDTAG, MIN) { count_tag(TAG_##CHILDTAG); } 
-#define OCCUR2(CHILDTAG, MIN, MAX) \
-     { int num = count_tag(TAG_##CHILDTAG); \
-       if (num > MAX) { \
-         char* parenttag = get_parenttag(); \
-         gedcom_error("The tag '%s' can maximally occur %d " \
-                     "time(s) within '%s'", \
-                     #CHILDTAG, MAX, parenttag); \
-         HANDLE_ERROR; \
-       } \
+#define OCCUR2(CHILDTAG, MIN, MAX)                                            \
+     { int num = count_tag(TAG_##CHILDTAG);                                   \
+       if (num > MAX) {                                                       \
+         char* parenttag = get_parenttag();                                   \
+         gedcom_error("The tag '%s' can maximally occur %d "                  \
+                     "time(s) within '%s'",                                  \
+                     #CHILDTAG, MAX, parenttag);                             \
+         HANDLE_ERROR;                                                        \
+       }                                                                      \
      }
-#define INVALID_TAG(CHILDTAG) \
-     { char* parenttag = get_parenttag(); \
-       gedcom_error("The tag '%s' is not a valid tag within '%s'", \
-                   CHILDTAG, parenttag); \
-       HANDLE_ERROR; \
+#define INVALID_TAG(CHILDTAG)                                                 \
+     { char* parenttag = get_parenttag();                                     \
+       gedcom_error("The tag '%s' is not a valid tag within '%s'",            \
+                   CHILDTAG, parenttag);                                     \
+       HANDLE_ERROR;                                                          \
      }
-#define INVALID_TOP_TAG(CHILDTAG) \
-     { gedcom_error("The tag '%s' is not a valid top-level tag", \
+#define INVALID_TOP_TAG(CHILDTAG)                                             \
+     { gedcom_error("The tag '%s' is not a valid top-level tag",              \
                    CHILDTAG); \
        HANDLE_ERROR; \
      }
@@ -222,9 +223,7 @@ int  compat_mode(int flags);
 %}
 
 %union {
-  int  level;
-  char *pointer;
-  char *tag;
+  int  number;
   char *string;
 }
 
@@ -232,145 +231,145 @@ int  compat_mode(int flags);
 %expect 300
 
 %token <string> BADTOKEN
-%token <level> OPEN
+%token <number> OPEN
 %token <string> CLOSE
 %token <string> ESCAPE
 %token <string> DELIM
 %token <string> ANYCHAR
-%token <pointer> POINTER
-%token <tag> USERTAG
-%token <tag> TAG_ABBR
-%token <tag> TAG_ADDR
-%token <tag> TAG_ADR1
-%token <tag> TAG_ADR2
-%token <tag> TAG_ADOP
-%token <tag> TAG_AFN
-%token <tag> TAG_AGE
-%token <tag> TAG_AGNC
-%token <tag> TAG_ALIA
-%token <tag> TAG_ANCE
-%token <tag> TAG_ANCI
-%token <tag> TAG_ANUL
-%token <tag> TAG_ASSO
-%token <tag> TAG_AUTH
-%token <tag> TAG_BAPL
-%token <tag> TAG_BAPM
-%token <tag> TAG_BARM
-%token <tag> TAG_BASM
-%token <tag> TAG_BIRT
-%token <tag> TAG_BLES
-%token <tag> TAG_BLOB
-%token <tag> TAG_BURI
-%token <tag> TAG_CALN
-%token <tag> TAG_CAST
-%token <tag> TAG_CAUS
-%token <tag> TAG_CENS
-%token <tag> TAG_CHAN
-%token <tag> TAG_CHAR
-%token <tag> TAG_CHIL
-%token <tag> TAG_CHR
-%token <tag> TAG_CHRA
-%token <tag> TAG_CITY
-%token <tag> TAG_CONC
-%token <tag> TAG_CONF
-%token <tag> TAG_CONL
-%token <tag> TAG_CONT
-%token <tag> TAG_COPR
-%token <tag> TAG_CORP
-%token <tag> TAG_CREM
-%token <tag> TAG_CTRY
-%token <tag> TAG_DATA
-%token <tag> TAG_DATE
-%token <tag> TAG_DEAT
-%token <tag> TAG_DESC
-%token <tag> TAG_DESI
-%token <tag> TAG_DEST
-%token <tag> TAG_DIV
-%token <tag> TAG_DIVF
-%token <tag> TAG_DSCR
-%token <tag> TAG_EDUC
-%token <tag> TAG_EMIG
-%token <tag> TAG_ENDL
-%token <tag> TAG_ENGA
-%token <tag> TAG_EVEN
-%token <tag> TAG_FAM
-%token <tag> TAG_FAMC
-%token <tag> TAG_FAMF
-%token <tag> TAG_FAMS
-%token <tag> TAG_FCOM
-%token <tag> TAG_FILE
-%token <tag> TAG_FORM
-%token <tag> TAG_GEDC
-%token <tag> TAG_GIVN
-%token <tag> TAG_GRAD
-%token <tag> TAG_HEAD
-%token <tag> TAG_HUSB
-%token <tag> TAG_IDNO
-%token <tag> TAG_IMMI
-%token <tag> TAG_INDI
-%token <tag> TAG_LANG
-%token <tag> TAG_LEGA
-%token <tag> TAG_MARB
-%token <tag> TAG_MARC
-%token <tag> TAG_MARL
-%token <tag> TAG_MARR
-%token <tag> TAG_MARS
-%token <tag> TAG_MEDI
-%token <tag> TAG_NAME
-%token <tag> TAG_NATI
-%token <tag> TAG_NATU
-%token <tag> TAG_NCHI
-%token <tag> TAG_NICK
-%token <tag> TAG_NMR
-%token <tag> TAG_NOTE
-%token <tag> TAG_NPFX
-%token <tag> TAG_NSFX
-%token <tag> TAG_OBJE
-%token <tag> TAG_OCCU
-%token <tag> TAG_ORDI
-%token <tag> TAG_ORDN
-%token <tag> TAG_PAGE
-%token <tag> TAG_PEDI
-%token <tag> TAG_PHON
-%token <tag> TAG_PLAC
-%token <tag> TAG_POST
-%token <tag> TAG_PROB
-%token <tag> TAG_PROP
-%token <tag> TAG_PUBL
-%token <tag> TAG_QUAY
-%token <tag> TAG_REFN
-%token <tag> TAG_RELA
-%token <tag> TAG_RELI
-%token <tag> TAG_REPO
-%token <tag> TAG_RESI
-%token <tag> TAG_RESN
-%token <tag> TAG_RETI
-%token <tag> TAG_RFN
-%token <tag> TAG_RIN
-%token <tag> TAG_ROLE
-%token <tag> TAG_SEX
-%token <tag> TAG_SLGC
-%token <tag> TAG_SLGS
-%token <tag> TAG_SOUR
-%token <tag> TAG_SPFX
-%token <tag> TAG_SSN
-%token <tag> TAG_STAE
-%token <tag> TAG_STAT
-%token <tag> TAG_SUBM
-%token <tag> TAG_SUBN
-%token <tag> TAG_SURN
-%token <tag> TAG_TEMP
-%token <tag> TAG_TEXT
-%token <tag> TAG_TIME
-%token <tag> TAG_TITL
-%token <tag> TAG_TRLR
-%token <tag> TAG_TYPE
-%token <tag> TAG_VERS
-%token <tag> TAG_WIFE
-%token <tag> TAG_WILL
-
-%type <tag> anystdtag
-%type <tag> anytoptag
+%token <string> POINTER
+%token <string> USERTAG
+%token <string> TAG_ABBR
+%token <string> TAG_ADDR
+%token <string> TAG_ADR1
+%token <string> TAG_ADR2
+%token <string> TAG_ADOP
+%token <string> TAG_AFN
+%token <string> TAG_AGE
+%token <string> TAG_AGNC
+%token <string> TAG_ALIA
+%token <string> TAG_ANCE
+%token <string> TAG_ANCI
+%token <string> TAG_ANUL
+%token <string> TAG_ASSO
+%token <string> TAG_AUTH
+%token <string> TAG_BAPL
+%token <string> TAG_BAPM
+%token <string> TAG_BARM
+%token <string> TAG_BASM
+%token <string> TAG_BIRT
+%token <string> TAG_BLES
+%token <string> TAG_BLOB
+%token <string> TAG_BURI
+%token <string> TAG_CALN
+%token <string> TAG_CAST
+%token <string> TAG_CAUS
+%token <string> TAG_CENS
+%token <string> TAG_CHAN
+%token <string> TAG_CHAR
+%token <string> TAG_CHIL
+%token <string> TAG_CHR
+%token <string> TAG_CHRA
+%token <string> TAG_CITY
+%token <string> TAG_CONC
+%token <string> TAG_CONF
+%token <string> TAG_CONL
+%token <string> TAG_CONT
+%token <string> TAG_COPR
+%token <string> TAG_CORP
+%token <string> TAG_CREM
+%token <string> TAG_CTRY
+%token <string> TAG_DATA
+%token <string> TAG_DATE
+%token <string> TAG_DEAT
+%token <string> TAG_DESC
+%token <string> TAG_DESI
+%token <string> TAG_DEST
+%token <string> TAG_DIV
+%token <string> TAG_DIVF
+%token <string> TAG_DSCR
+%token <string> TAG_EDUC
+%token <string> TAG_EMIG
+%token <string> TAG_ENDL
+%token <string> TAG_ENGA
+%token <string> TAG_EVEN
+%token <string> TAG_FAM
+%token <string> TAG_FAMC
+%token <string> TAG_FAMF
+%token <string> TAG_FAMS
+%token <string> TAG_FCOM
+%token <string> TAG_FILE
+%token <string> TAG_FORM
+%token <string> TAG_GEDC
+%token <string> TAG_GIVN
+%token <string> TAG_GRAD
+%token <string> TAG_HEAD
+%token <string> TAG_HUSB
+%token <string> TAG_IDNO
+%token <string> TAG_IMMI
+%token <string> TAG_INDI
+%token <string> TAG_LANG
+%token <string> TAG_LEGA
+%token <string> TAG_MARB
+%token <string> TAG_MARC
+%token <string> TAG_MARL
+%token <string> TAG_MARR
+%token <string> TAG_MARS
+%token <string> TAG_MEDI
+%token <string> TAG_NAME
+%token <string> TAG_NATI
+%token <string> TAG_NATU
+%token <string> TAG_NCHI
+%token <string> TAG_NICK
+%token <string> TAG_NMR
+%token <string> TAG_NOTE
+%token <string> TAG_NPFX
+%token <string> TAG_NSFX
+%token <string> TAG_OBJE
+%token <string> TAG_OCCU
+%token <string> TAG_ORDI
+%token <string> TAG_ORDN
+%token <string> TAG_PAGE
+%token <string> TAG_PEDI
+%token <string> TAG_PHON
+%token <string> TAG_PLAC
+%token <string> TAG_POST
+%token <string> TAG_PROB
+%token <string> TAG_PROP
+%token <string> TAG_PUBL
+%token <string> TAG_QUAY
+%token <string> TAG_REFN
+%token <string> TAG_RELA
+%token <string> TAG_RELI
+%token <string> TAG_REPO
+%token <string> TAG_RESI
+%token <string> TAG_RESN
+%token <string> TAG_RETI
+%token <string> TAG_RFN
+%token <string> TAG_RIN
+%token <string> TAG_ROLE
+%token <string> TAG_SEX
+%token <string> TAG_SLGC
+%token <string> TAG_SLGS
+%token <string> TAG_SOUR
+%token <string> TAG_SPFX
+%token <string> TAG_SSN
+%token <string> TAG_STAE
+%token <string> TAG_STAT
+%token <string> TAG_SUBM
+%token <string> TAG_SUBN
+%token <string> TAG_SURN
+%token <string> TAG_TEMP
+%token <string> TAG_TEXT
+%token <string> TAG_TIME
+%token <string> TAG_TITL
+%token <string> TAG_TRLR
+%token <string> TAG_TYPE
+%token <string> TAG_VERS
+%token <string> TAG_WIFE
+%token <string> TAG_WILL
+
+%type <string> anystdtag
+%type <string> anytoptag
 %type <string> line_item
 %type <string> mand_line_item
 %type <string> note_line_item
@@ -433,7 +432,8 @@ head_sub     : head_sour_sect  { OCCUR2(SOUR, 1, 1) }
 /* HEAD.SOUR */
 head_sour_sect : OPEN DELIM TAG_SOUR mand_line_item 
                  { set_compatibility($4);
-                  gedcom_debug_print("===Source: '%s'\n", $4);
+                  gedcom_debug_print("===Source: '%s', '%s'\n",
+                                     $4, $3);
                   START(SOUR)
                 }
                  head_sour_subs
@@ -2125,40 +2125,34 @@ opt_line_item : /* empty */ { }
               ;
 
 line_item   : anychar  { size_t i;
-                        CLEAR_BUFFER(string_buf);
-                         string_buf_ptr = string_buf;
+                        CLEAR_BUFFER(line_item_buf);
+                        line_item_buf_ptr = line_item_buf;
                         /* The following also takes care of '@@' */
                         if (!strncmp($1, "@@", 3))
-                          *string_buf_ptr++ = '@';
+                          *line_item_buf_ptr++ = '@';
                         else
                           for (i=0; i < strlen($1); i++)
-                            *string_buf_ptr++ = $1[i];
-                        $$ = string_buf;
+                            *line_item_buf_ptr++ = $1[i];
+                        $$ = line_item_buf;
                        }
-            | ESCAPE   { CLEAR_BUFFER(string_buf);
-                        string_buf_ptr = string_buf;
+            | ESCAPE   { CLEAR_BUFFER(line_item_buf);
+                        line_item_buf_ptr = line_item_buf;
                         /* For now, ignore escapes */
-                        $$ = string_buf;
+                        $$ = line_item_buf;
                       }
             | line_item anychar
-                  { if (strlen(string_buf) >= MAXGEDCLINELEN) {
-                     gedcom_error("Line too long");
-                     YYERROR;
-                   }
-                   else {
-                     size_t i;
-                     /* The following also takes care of '@@' */
-                     if (!strncmp($2, "@@", 3))
-                       *string_buf_ptr++ = '@';
-                     else
-                       for (i=0; i < strlen($2); i++)
-                         *string_buf_ptr++ = $2[i];
-                     $$ = string_buf;
-                   }
+                  { size_t i;
+                   /* The following also takes care of '@@' */
+                   if (!strncmp($2, "@@", 3))
+                     *line_item_buf_ptr++ = '@';
+                   else
+                     for (i=0; i < strlen($2); i++)
+                       *line_item_buf_ptr++ = $2[i];
+                   $$ = line_item_buf;
                  }
             | line_item ESCAPE
                   { /* For now, ignore escapes */
-                   $$ = string_buf;
+                   $$ = line_item_buf;
                  }
             ;
 
@@ -2345,7 +2339,7 @@ anystdtag   : TAG_ABBR
 /* Functions that handle the counting of subtags */
 
 int* count_arrays[MAXGEDCLEVEL+1];
-char tag_stack[MAXGEDCLEVEL+1][MAXSTDTAGLENGTH+1];
+char tag_stack[MAXGEDCLEVEL+1][MAXSTDTAGLEN+1];
 
 void push_countarray()
 {
@@ -2368,7 +2362,7 @@ void push_countarray()
 
 void set_parenttag(char* tag)
 {
-  strncpy(tag_stack[count_level], tag, MAXSTDTAGLENGTH+1);
+  strncpy(tag_stack[count_level], tag, MAXSTDTAGLEN+1);
 }
 
 char* get_parenttag()