Release 0.9
[gedcom-parse.git] / gedcom.y
index b455587196502464fb3ac9da71f9b519f2099b7f..45dc37490c0613a53d0e934884f86fc2d994d529 100644 (file)
--- a/gedcom.y
+++ b/gedcom.y
@@ -1,3 +1,12 @@
+/*  This program is free software; you can redistribute it and/or modify  *
+ *  it under the terms of the GNU General Public License as published by  *
+ *  the Free Software Foundation; either version 2 of the License, or     *
+ *  (at your option) any later version.                                   *
+
+ (C) 2001 by The Genes Development Team
+ Original author: Peter Verthez (Peter.Verthez@advalvas.be)
+*/
+
 /* $Id$ */
 /* $Name$ */
 
 
 /* General notes:
 
-   - The syntax analysis doesn't handle the contents of the line values
-     or their encoding; this is done in the semantic analysis.
+   - The syntax analysis doesn't handle the contents of the line values;
+     this is done in the semantic analysis.
 
  */
 
 %{
 #include "gedcom.h"
+#include "multilex.h"
+#include "encoding.h"
 
 int  count_level    = 0;
 int  fail           = 0;
 int  compat_enabled = 1;
 int  gedcom_high_level_debug = 0; 
 int  compatibility  = 0; 
-MECHANISM error_mechanism=IMMED_FAIL;
-char string_buf[MAXGEDCLINELEN+1];
-char *string_buf_ptr;
+MECHANISM error_mechanism = IMMED_FAIL;
+char line_item_buf[MAXGEDCLINELEN * UTF_FACTOR + 1];
+char *line_item_buf_ptr;
 
 enum _COMPAT {
   C_FTREE = 0x01
@@ -146,64 +158,64 @@ int  check_occurrence(int tag);
 void set_compatibility(char* program);
 int  compat_mode(int flags); 
 
-#define CLEAR_BUFFER(BUF) { memset(BUF, 0, sizeof(BUF)); } 
+#define CLEAR_BUFFER(BUF)                                                     \
+     memset(BUF, 0, sizeof(BUF));
  
-#define HANDLE_ERROR \
-     { \
-       if (error_mechanism == IMMED_FAIL) { \
-        YYABORT; \
-       } \
-       else if (error_mechanism == DEFER_FAIL) { \
-         yyerrok; fail = 1; \
-       } \
-       else if (error_mechanism == IGNORE_ERRORS) { \
-        yyerrok; \
-       } \
+#define HANDLE_ERROR                                                          \
+     { if (error_mechanism == IMMED_FAIL) {                                   \
+        YYABORT;                                                             \
+       }                                                                      \
+       else if (error_mechanism == DEFER_FAIL) {                              \
+         yyerrok; fail = 1;                                                   \
+       }                                                                      \
+       else if (error_mechanism == IGNORE_ERRORS) {                           \
+        yyerrok;                                                             \
+       }                                                                      \
      }
-#define START(PARENTTAG) \
-     { ++count_level; \
-       set_parenttag(#PARENTTAG); \
-       push_countarray(); \
+#define START(PARENTTAG)                                                      \
+     { ++count_level;                                                         \
+       set_parenttag(#PARENTTAG);                                             \
+       push_countarray();                                                     \
      }
-#define CHK(TAG) \
-     { if (!check_occurrence(TAG_##TAG)) { \
-         char* parenttag = get_parenttag(); \
-         gedcom_error("The tag '%s' is mandatory within '%s', but missing", \
-                     #TAG, parenttag); \
-         HANDLE_ERROR; \
-       } \
+#define CHK(TAG)                                                              \
+     { if (!check_occurrence(TAG_##TAG)) {                                    \
+         char* parenttag = get_parenttag();                                   \
+         gedcom_error("The tag '%s' is mandatory within '%s', but missing",   \
+                     #TAG, parenttag);                                       \
+         HANDLE_ERROR;                                                        \
+       }                                                                      \
      }
-#define POP \
-     { pop_countarray(); \
-       --count_level; \
+#define POP                                                                   \
+     { pop_countarray();                                                      \
+       --count_level;                                                         \
      }
 #define CHECK0 POP; 
 #define CHECK1(TAG1) { CHK(TAG1); POP; }
-#define CHECK2(TAG1,TAG2) \
+#define CHECK2(TAG1,TAG2)                                                     \
      { CHK(TAG1); CHK(TAG2); POP; }
-#define CHECK3(TAG1,TAG2,TAG3) \
+#define CHECK3(TAG1,TAG2,TAG3)                                                \
      { CHK(TAG1); CHK(TAG2); CHK(TAG3); POP; }
-#define CHECK4(TAG1,TAG2,TAG3,TAG4) \
+#define CHECK4(TAG1,TAG2,TAG3,TAG4)                                           \
      { CHK(TAG1); CHK(TAG2); CHK(TAG3); CHK(TAG4); POP; } 
 #define OCCUR1(CHILDTAG, MIN) { count_tag(TAG_##CHILDTAG); } 
-#define OCCUR2(CHILDTAG, MIN, MAX) \
-     { int num = count_tag(TAG_##CHILDTAG); \
-       if (num > MAX) { \
-         char* parenttag = get_parenttag(); \
-         gedcom_error("The tag '%s' can maximally occur %d " \
-                     "time(s) within '%s'", \
-                     #CHILDTAG, MAX, parenttag); \
-         HANDLE_ERROR; \
-       } \
+#define OCCUR2(CHILDTAG, MIN, MAX)                                            \
+     { int num = count_tag(TAG_##CHILDTAG);                                   \
+       if (num > MAX) {                                                       \
+         char* parenttag = get_parenttag();                                   \
+         gedcom_error("The tag '%s' can maximally occur %d "                  \
+                     "time(s) within '%s'",                                  \
+                     #CHILDTAG, MAX, parenttag);                             \
+         HANDLE_ERROR;                                                        \
+       }                                                                      \
      }
-#define INVALID_TAG(CHILDTAG) \
-     { char* parenttag = get_parenttag(); \
-       gedcom_error("The tag '%s' is not a valid tag within '%s'", \
-                   CHILDTAG, parenttag); \
-       HANDLE_ERROR; \
+#define INVALID_TAG(CHILDTAG)                                                 \
+     { char* parenttag = get_parenttag();                                     \
+       gedcom_error("The tag '%s' is not a valid tag within '%s'",            \
+                   CHILDTAG, parenttag);                                     \
+       HANDLE_ERROR;                                                          \
      }
-#define INVALID_TOP_TAG(CHILDTAG) \
-     { gedcom_error("The tag '%s' is not a valid top-level tag", \
+#define INVALID_TOP_TAG(CHILDTAG)                                             \
+     { gedcom_error("The tag '%s' is not a valid top-level tag",              \
                    CHILDTAG); \
        HANDLE_ERROR; \
      }
@@ -211,6 +223,7 @@ int  compat_mode(int flags);
 %}
 
 %union {
+  int  number;
   char *string;
 }
 
@@ -218,7 +231,7 @@ int  compat_mode(int flags);
 %expect 300
 
 %token <string> BADTOKEN
-%token <string> OPEN
+%token <number> OPEN
 %token <string> CLOSE
 %token <string> ESCAPE
 %token <string> DELIM
@@ -419,7 +432,8 @@ head_sub     : head_sour_sect  { OCCUR2(SOUR, 1, 1) }
 /* HEAD.SOUR */
 head_sour_sect : OPEN DELIM TAG_SOUR mand_line_item 
                  { set_compatibility($4);
-                  gedcom_debug_print("===Source: '%s'\n", $4);
+                  gedcom_debug_print("===Source: '%s', '%s'\n",
+                                     $4, $3);
                   START(SOUR)
                 }
                  head_sour_subs
@@ -567,7 +581,8 @@ head_gedc_form_sect : OPEN DELIM TAG_FORM mand_line_item
 
 /* HEAD.CHAR */
 head_char_sect : OPEN DELIM TAG_CHAR mand_line_item 
-                 { START(CHAR) }
+                 { if (open_conv_to_internal($4) == 0) YYERROR;
+                  START(CHAR) }
                  head_char_subs
                 { CHECK0 }
                  CLOSE
@@ -2109,31 +2124,35 @@ opt_line_item : /* empty */ { }
               | DELIM line_item { }
               ;
 
-line_item   : anychar  { CLEAR_BUFFER(string_buf);
-                         string_buf_ptr = string_buf;
+line_item   : anychar  { size_t i;
+                        CLEAR_BUFFER(line_item_buf);
+                        line_item_buf_ptr = line_item_buf;
                         /* The following also takes care of '@@' */
-                        *string_buf_ptr++ = $1[0];
-                        $$ = string_buf;
+                        if (!strncmp($1, "@@", 3))
+                          *line_item_buf_ptr++ = '@';
+                        else
+                          for (i=0; i < strlen($1); i++)
+                            *line_item_buf_ptr++ = $1[i];
+                        $$ = line_item_buf;
                        }
-            | ESCAPE   { CLEAR_BUFFER(string_buf);
-                        string_buf_ptr = string_buf;
+            | ESCAPE   { CLEAR_BUFFER(line_item_buf);
+                        line_item_buf_ptr = line_item_buf;
                         /* For now, ignore escapes */
-                        $$ = string_buf;
+                        $$ = line_item_buf;
                       }
             | line_item anychar
-                  { if (strlen(string_buf) >= MAXGEDCLINELEN) {
-                     gedcom_error("Line too long");
-                     YYERROR;
-                   }
-                   else {
-                     /* The following also takes care of '@@' */
-                     *string_buf_ptr++ = $2[0];
-                     $$ = string_buf;
-                   }
+                  { size_t i;
+                   /* The following also takes care of '@@' */
+                   if (!strncmp($2, "@@", 3))
+                     *line_item_buf_ptr++ = '@';
+                   else
+                     for (i=0; i < strlen($2); i++)
+                       *line_item_buf_ptr++ = $2[i];
+                   $$ = line_item_buf;
                  }
             | line_item ESCAPE
                   { /* For now, ignore escapes */
-                   $$ = string_buf;
+                   $$ = line_item_buf;
                  }
             ;
 
@@ -2320,7 +2339,7 @@ anystdtag   : TAG_ABBR
 /* Functions that handle the counting of subtags */
 
 int* count_arrays[MAXGEDCLEVEL+1];
-char tag_stack[MAXGEDCLEVEL+1][MAXSTDTAGLENGTH+1];
+char tag_stack[MAXGEDCLEVEL+1][MAXSTDTAGLEN+1];
 
 void push_countarray()
 {
@@ -2343,7 +2362,7 @@ void push_countarray()
 
 void set_parenttag(char* tag)
 {
-  strncpy(tag_stack[count_level], tag, MAXSTDTAGLENGTH+1);
+  strncpy(tag_stack[count_level], tag, MAXSTDTAGLEN+1);
 }
 
 char* get_parenttag()
@@ -2436,3 +2455,4 @@ int compat_mode(int compat_flags)
 {
   return (compat_flags & compatibility);
 }
+