Check maximum lengths properly.
[gedcom-parse.git] / gedcom.y
index 68b2f488755b53b04c286bebbc3a0f7ddb522e65..5416d50b67527e7193bbedb9159ee81dd5f9e86e 100644 (file)
--- a/gedcom.y
+++ b/gedcom.y
@@ -1,3 +1,12 @@
+/*  This program is free software; you can redistribute it and/or modify  *
+ *  it under the terms of the GNU General Public License as published by  *
+ *  the Free Software Foundation; either version 2 of the License, or     *
+ *  (at your option) any later version.                                   *
+
+ (C) 2001 by The Genes Development Team
+ Original author: Peter Verthez (Peter.Verthez@advalvas.be)
+*/
+
 /* $Id$ */
 /* $Name$ */
 
 
 %{
 #include "gedcom.h"
+#include "multilex.h"
+#include "encoding.h"
 
 int  count_level    = 0;
 int  fail           = 0;
 int  compat_enabled = 1;
+int  gedcom_high_level_debug = 0; 
 int  compatibility  = 0; 
-MECHANISM curr_mechanism=IMMED_FAIL;
-char string_buf[MAXGEDCLINELEN+1];
+MECHANISM error_mechanism=IMMED_FAIL;
+char string_buf[MAXGEDCLINELEN*4+1];
 char *string_buf_ptr;
 
 enum _COMPAT {
@@ -149,13 +161,13 @@ int  compat_mode(int flags);
  
 #define HANDLE_ERROR \
      { \
-       if (curr_mechanism == IMMED_FAIL) { \
+       if (error_mechanism == IMMED_FAIL) { \
         YYABORT; \
        } \
-       else if (curr_mechanism == DEFER_FAIL) { \
+       else if (error_mechanism == DEFER_FAIL) { \
          yyerrok; fail = 1; \
        } \
-       else if (curr_mechanism == IGNORE_ERRORS) { \
+       else if (error_mechanism == IGNORE_ERRORS) { \
         yyerrok; \
        } \
      }
@@ -210,6 +222,9 @@ int  compat_mode(int flags);
 %}
 
 %union {
+  int  level;
+  char *pointer;
+  char *tag;
   char *string;
 }
 
@@ -217,145 +232,145 @@ int  compat_mode(int flags);
 %expect 300
 
 %token <string> BADTOKEN
-%token <string> OPEN
+%token <level> OPEN
 %token <string> CLOSE
 %token <string> ESCAPE
 %token <string> DELIM
 %token <string> ANYCHAR
-%token <string> POINTER
-%token <string> USERTAG
-%token <string> TAG_ABBR
-%token <string> TAG_ADDR
-%token <string> TAG_ADR1
-%token <string> TAG_ADR2
-%token <string> TAG_ADOP
-%token <string> TAG_AFN
-%token <string> TAG_AGE
-%token <string> TAG_AGNC
-%token <string> TAG_ALIA
-%token <string> TAG_ANCE
-%token <string> TAG_ANCI
-%token <string> TAG_ANUL
-%token <string> TAG_ASSO
-%token <string> TAG_AUTH
-%token <string> TAG_BAPL
-%token <string> TAG_BAPM
-%token <string> TAG_BARM
-%token <string> TAG_BASM
-%token <string> TAG_BIRT
-%token <string> TAG_BLES
-%token <string> TAG_BLOB
-%token <string> TAG_BURI
-%token <string> TAG_CALN
-%token <string> TAG_CAST
-%token <string> TAG_CAUS
-%token <string> TAG_CENS
-%token <string> TAG_CHAN
-%token <string> TAG_CHAR
-%token <string> TAG_CHIL
-%token <string> TAG_CHR
-%token <string> TAG_CHRA
-%token <string> TAG_CITY
-%token <string> TAG_CONC
-%token <string> TAG_CONF
-%token <string> TAG_CONL
-%token <string> TAG_CONT
-%token <string> TAG_COPR
-%token <string> TAG_CORP
-%token <string> TAG_CREM
-%token <string> TAG_CTRY
-%token <string> TAG_DATA
-%token <string> TAG_DATE
-%token <string> TAG_DEAT
-%token <string> TAG_DESC
-%token <string> TAG_DESI
-%token <string> TAG_DEST
-%token <string> TAG_DIV
-%token <string> TAG_DIVF
-%token <string> TAG_DSCR
-%token <string> TAG_EDUC
-%token <string> TAG_EMIG
-%token <string> TAG_ENDL
-%token <string> TAG_ENGA
-%token <string> TAG_EVEN
-%token <string> TAG_FAM
-%token <string> TAG_FAMC
-%token <string> TAG_FAMF
-%token <string> TAG_FAMS
-%token <string> TAG_FCOM
-%token <string> TAG_FILE
-%token <string> TAG_FORM
-%token <string> TAG_GEDC
-%token <string> TAG_GIVN
-%token <string> TAG_GRAD
-%token <string> TAG_HEAD
-%token <string> TAG_HUSB
-%token <string> TAG_IDNO
-%token <string> TAG_IMMI
-%token <string> TAG_INDI
-%token <string> TAG_LANG
-%token <string> TAG_LEGA
-%token <string> TAG_MARB
-%token <string> TAG_MARC
-%token <string> TAG_MARL
-%token <string> TAG_MARR
-%token <string> TAG_MARS
-%token <string> TAG_MEDI
-%token <string> TAG_NAME
-%token <string> TAG_NATI
-%token <string> TAG_NATU
-%token <string> TAG_NCHI
-%token <string> TAG_NICK
-%token <string> TAG_NMR
-%token <string> TAG_NOTE
-%token <string> TAG_NPFX
-%token <string> TAG_NSFX
-%token <string> TAG_OBJE
-%token <string> TAG_OCCU
-%token <string> TAG_ORDI
-%token <string> TAG_ORDN
-%token <string> TAG_PAGE
-%token <string> TAG_PEDI
-%token <string> TAG_PHON
-%token <string> TAG_PLAC
-%token <string> TAG_POST
-%token <string> TAG_PROB
-%token <string> TAG_PROP
-%token <string> TAG_PUBL
-%token <string> TAG_QUAY
-%token <string> TAG_REFN
-%token <string> TAG_RELA
-%token <string> TAG_RELI
-%token <string> TAG_REPO
-%token <string> TAG_RESI
-%token <string> TAG_RESN
-%token <string> TAG_RETI
-%token <string> TAG_RFN
-%token <string> TAG_RIN
-%token <string> TAG_ROLE
-%token <string> TAG_SEX
-%token <string> TAG_SLGC
-%token <string> TAG_SLGS
-%token <string> TAG_SOUR
-%token <string> TAG_SPFX
-%token <string> TAG_SSN
-%token <string> TAG_STAE
-%token <string> TAG_STAT
-%token <string> TAG_SUBM
-%token <string> TAG_SUBN
-%token <string> TAG_SURN
-%token <string> TAG_TEMP
-%token <string> TAG_TEXT
-%token <string> TAG_TIME
-%token <string> TAG_TITL
-%token <string> TAG_TRLR
-%token <string> TAG_TYPE
-%token <string> TAG_VERS
-%token <string> TAG_WIFE
-%token <string> TAG_WILL
-
-%type <string> anystdtag
-%type <string> anytoptag
+%token <pointer> POINTER
+%token <tag> USERTAG
+%token <tag> TAG_ABBR
+%token <tag> TAG_ADDR
+%token <tag> TAG_ADR1
+%token <tag> TAG_ADR2
+%token <tag> TAG_ADOP
+%token <tag> TAG_AFN
+%token <tag> TAG_AGE
+%token <tag> TAG_AGNC
+%token <tag> TAG_ALIA
+%token <tag> TAG_ANCE
+%token <tag> TAG_ANCI
+%token <tag> TAG_ANUL
+%token <tag> TAG_ASSO
+%token <tag> TAG_AUTH
+%token <tag> TAG_BAPL
+%token <tag> TAG_BAPM
+%token <tag> TAG_BARM
+%token <tag> TAG_BASM
+%token <tag> TAG_BIRT
+%token <tag> TAG_BLES
+%token <tag> TAG_BLOB
+%token <tag> TAG_BURI
+%token <tag> TAG_CALN
+%token <tag> TAG_CAST
+%token <tag> TAG_CAUS
+%token <tag> TAG_CENS
+%token <tag> TAG_CHAN
+%token <tag> TAG_CHAR
+%token <tag> TAG_CHIL
+%token <tag> TAG_CHR
+%token <tag> TAG_CHRA
+%token <tag> TAG_CITY
+%token <tag> TAG_CONC
+%token <tag> TAG_CONF
+%token <tag> TAG_CONL
+%token <tag> TAG_CONT
+%token <tag> TAG_COPR
+%token <tag> TAG_CORP
+%token <tag> TAG_CREM
+%token <tag> TAG_CTRY
+%token <tag> TAG_DATA
+%token <tag> TAG_DATE
+%token <tag> TAG_DEAT
+%token <tag> TAG_DESC
+%token <tag> TAG_DESI
+%token <tag> TAG_DEST
+%token <tag> TAG_DIV
+%token <tag> TAG_DIVF
+%token <tag> TAG_DSCR
+%token <tag> TAG_EDUC
+%token <tag> TAG_EMIG
+%token <tag> TAG_ENDL
+%token <tag> TAG_ENGA
+%token <tag> TAG_EVEN
+%token <tag> TAG_FAM
+%token <tag> TAG_FAMC
+%token <tag> TAG_FAMF
+%token <tag> TAG_FAMS
+%token <tag> TAG_FCOM
+%token <tag> TAG_FILE
+%token <tag> TAG_FORM
+%token <tag> TAG_GEDC
+%token <tag> TAG_GIVN
+%token <tag> TAG_GRAD
+%token <tag> TAG_HEAD
+%token <tag> TAG_HUSB
+%token <tag> TAG_IDNO
+%token <tag> TAG_IMMI
+%token <tag> TAG_INDI
+%token <tag> TAG_LANG
+%token <tag> TAG_LEGA
+%token <tag> TAG_MARB
+%token <tag> TAG_MARC
+%token <tag> TAG_MARL
+%token <tag> TAG_MARR
+%token <tag> TAG_MARS
+%token <tag> TAG_MEDI
+%token <tag> TAG_NAME
+%token <tag> TAG_NATI
+%token <tag> TAG_NATU
+%token <tag> TAG_NCHI
+%token <tag> TAG_NICK
+%token <tag> TAG_NMR
+%token <tag> TAG_NOTE
+%token <tag> TAG_NPFX
+%token <tag> TAG_NSFX
+%token <tag> TAG_OBJE
+%token <tag> TAG_OCCU
+%token <tag> TAG_ORDI
+%token <tag> TAG_ORDN
+%token <tag> TAG_PAGE
+%token <tag> TAG_PEDI
+%token <tag> TAG_PHON
+%token <tag> TAG_PLAC
+%token <tag> TAG_POST
+%token <tag> TAG_PROB
+%token <tag> TAG_PROP
+%token <tag> TAG_PUBL
+%token <tag> TAG_QUAY
+%token <tag> TAG_REFN
+%token <tag> TAG_RELA
+%token <tag> TAG_RELI
+%token <tag> TAG_REPO
+%token <tag> TAG_RESI
+%token <tag> TAG_RESN
+%token <tag> TAG_RETI
+%token <tag> TAG_RFN
+%token <tag> TAG_RIN
+%token <tag> TAG_ROLE
+%token <tag> TAG_SEX
+%token <tag> TAG_SLGC
+%token <tag> TAG_SLGS
+%token <tag> TAG_SOUR
+%token <tag> TAG_SPFX
+%token <tag> TAG_SSN
+%token <tag> TAG_STAE
+%token <tag> TAG_STAT
+%token <tag> TAG_SUBM
+%token <tag> TAG_SUBN
+%token <tag> TAG_SURN
+%token <tag> TAG_TEMP
+%token <tag> TAG_TEXT
+%token <tag> TAG_TIME
+%token <tag> TAG_TITL
+%token <tag> TAG_TRLR
+%token <tag> TAG_TYPE
+%token <tag> TAG_VERS
+%token <tag> TAG_WIFE
+%token <tag> TAG_WILL
+
+%type <tag> anystdtag
+%type <tag> anytoptag
 %type <string> line_item
 %type <string> mand_line_item
 %type <string> note_line_item
@@ -388,13 +403,11 @@ record      : fam_rec
 head_sect    : OPEN DELIM TAG_HEAD
                { START(HEAD) }
                head_subs
-               { if (compat_mode(C_FTREE)) {
-                  CHECK3(SOUR, GEDC, CHAR);
-                }
-                else {
-                  CHECK4(SOUR, SUBM, GEDC, CHAR);
-                }
-               }
+               { if (compat_mode(C_FTREE))
+                  CHECK3(SOUR, GEDC, CHAR)
+                else
+                  CHECK4(SOUR, SUBM, GEDC, CHAR)
+              }
                CLOSE { }
              ;
 
@@ -420,6 +433,7 @@ head_sub     : head_sour_sect  { OCCUR2(SOUR, 1, 1) }
 /* HEAD.SOUR */
 head_sour_sect : OPEN DELIM TAG_SOUR mand_line_item 
                  { set_compatibility($4);
+                  gedcom_debug_print("===Source: '%s'\n", $4);
                   START(SOUR)
                 }
                  head_sour_subs
@@ -440,14 +454,17 @@ head_sour_sub : head_sour_vers_sect  { OCCUR2(VERS, 0, 1) }
 
 head_sour_vers_sect : OPEN DELIM TAG_VERS mand_line_item
                       { START(VERS)} no_std_subs { CHECK0 } CLOSE
-                            { }
+                      { gedcom_debug_print("===Source version: '%s'\n", $4);
+                     }
                     ;
 head_sour_name_sect : OPEN DELIM TAG_NAME mand_line_item
                       { START(NAME) } no_std_subs { CHECK0 } CLOSE
-                            { }
+                      { gedcom_debug_print("===Source name: '%s'\n", $4);
+                     }
                     ;
 head_sour_corp_sect : OPEN DELIM TAG_CORP mand_line_item 
-                      { START(CORP) }
+                      { gedcom_debug_print("===Source corp name: '%s'\n", $4);
+                       START(CORP) }
                       head_sour_corp_subs
                      { CHECK0 }
                       CLOSE
@@ -564,7 +581,8 @@ head_gedc_form_sect : OPEN DELIM TAG_FORM mand_line_item
 
 /* HEAD.CHAR */
 head_char_sect : OPEN DELIM TAG_CHAR mand_line_item 
-                 { START(CHAR) }
+                 { if (open_conv_to_internal($4) == 0) YYERROR;
+                  START(CHAR) }
                  head_char_subs
                 { CHECK0 }
                  CLOSE
@@ -2106,10 +2124,15 @@ opt_line_item : /* empty */ { }
               | DELIM line_item { }
               ;
 
-line_item   : anychar  { CLEAR_BUFFER(string_buf);
+line_item   : anychar  { size_t i;
+                        CLEAR_BUFFER(string_buf);
                          string_buf_ptr = string_buf;
                         /* The following also takes care of '@@' */
-                        *string_buf_ptr++ = $1[0];
+                        if (!strncmp($1, "@@", 3))
+                          *string_buf_ptr++ = '@';
+                        else
+                          for (i=0; i < strlen($1); i++)
+                            *string_buf_ptr++ = $1[i];
                         $$ = string_buf;
                        }
             | ESCAPE   { CLEAR_BUFFER(string_buf);
@@ -2123,8 +2146,13 @@ line_item   : anychar  { CLEAR_BUFFER(string_buf);
                      YYERROR;
                    }
                    else {
+                     size_t i;
                      /* The following also takes care of '@@' */
-                     *string_buf_ptr++ = $2[0];
+                     if (!strncmp($2, "@@", 3))
+                       *string_buf_ptr++ = '@';
+                     else
+                       for (i=0; i < strlen($2); i++)
+                         *string_buf_ptr++ = $2[i];
                      $$ = string_buf;
                    }
                  }
@@ -2375,17 +2403,37 @@ void pop_countarray()
 }
 
 /* Enabling debug mode */
-void gedcom_enable_debug()
+/* level 0: no debugging */
+/* level 1: only internal */
+/* level 2: also bison */
+void gedcom_set_debug_level(int level)
 {
+  if (level > 0) {
+    gedcom_high_level_debug = 1;
+  }
+  if (level > 1) {
 #if YYDEBUG != 0
-  gedcom_debug = 1;
+    gedcom_debug = 1;
 #endif
+  }
+}
+
+int gedcom_debug_print(char* s, ...)
+{
+  int res;
+  if (gedcom_high_level_debug) {
+    va_list ap;
+    va_start(ap, s);
+    res = vfprintf(stderr, s, ap);
+    va_end(ap);
+  }
+  return(res);
 }
 
 /* Setting the error mechanism */
 void gedcom_set_error_handling(MECHANISM mechanism)
 {
-  curr_mechanism = mechanism;
+  error_mechanism = mechanism;
 }
 
 /* Compatibility handling */
@@ -2413,3 +2461,4 @@ int compat_mode(int compat_flags)
 {
   return (compat_flags & compatibility);
 }
+