Allow to run the test file 2 or 3 times, to see whether the state is
[gedcom-parse.git] / gedcom.y
index b455587196502464fb3ac9da71f9b519f2099b7f..336426940b3bc8a9560c6ae18fa4ff0ca689a192 100644 (file)
--- a/gedcom.y
+++ b/gedcom.y
@@ -1,3 +1,12 @@
+/*  This program is free software; you can redistribute it and/or modify  *
+ *  it under the terms of the GNU General Public License as published by  *
+ *  the Free Software Foundation; either version 2 of the License, or     *
+ *  (at your option) any later version.                                   *
+
+ (C) 2001 by The Genes Development Team
+ Original author: Peter Verthez (Peter.Verthez@advalvas.be)
+*/
+
 /* $Id$ */
 /* $Name$ */
 
 
 /* General notes:
 
-   - The syntax analysis doesn't handle the contents of the line values
-     or their encoding; this is done in the semantic analysis.
+   - The syntax analysis doesn't handle the contents of the line values;
+     this is done in the semantic analysis.
 
  */
 
 %{
 #include "gedcom.h"
+#include "multilex.h"
+#include "encoding.h"
 
 int  count_level    = 0;
 int  fail           = 0;
 int  compat_enabled = 1;
 int  gedcom_high_level_debug = 0; 
 int  compatibility  = 0; 
-MECHANISM error_mechanism=IMMED_FAIL;
-char string_buf[MAXGEDCLINELEN+1];
-char *string_buf_ptr;
+MECHANISM error_mechanism = IMMED_FAIL;
+char line_item_buf[MAXGEDCLINELEN * UTF_FACTOR + 1];
+char *line_item_buf_ptr;
 
 enum _COMPAT {
   C_FTREE = 0x01
@@ -211,6 +223,7 @@ int  compat_mode(int flags);
 %}
 
 %union {
+  int  number;
   char *string;
 }
 
@@ -218,7 +231,7 @@ int  compat_mode(int flags);
 %expect 300
 
 %token <string> BADTOKEN
-%token <string> OPEN
+%token <number> OPEN
 %token <string> CLOSE
 %token <string> ESCAPE
 %token <string> DELIM
@@ -419,7 +432,8 @@ head_sub     : head_sour_sect  { OCCUR2(SOUR, 1, 1) }
 /* HEAD.SOUR */
 head_sour_sect : OPEN DELIM TAG_SOUR mand_line_item 
                  { set_compatibility($4);
-                  gedcom_debug_print("===Source: '%s'\n", $4);
+                  gedcom_debug_print("===Source: '%s', '%s'\n",
+                                     $4, $3);
                   START(SOUR)
                 }
                  head_sour_subs
@@ -567,7 +581,8 @@ head_gedc_form_sect : OPEN DELIM TAG_FORM mand_line_item
 
 /* HEAD.CHAR */
 head_char_sect : OPEN DELIM TAG_CHAR mand_line_item 
-                 { START(CHAR) }
+                 { if (open_conv_to_internal($4) == 0) YYERROR;
+                  START(CHAR) }
                  head_char_subs
                 { CHECK0 }
                  CLOSE
@@ -2109,31 +2124,35 @@ opt_line_item : /* empty */ { }
               | DELIM line_item { }
               ;
 
-line_item   : anychar  { CLEAR_BUFFER(string_buf);
-                         string_buf_ptr = string_buf;
+line_item   : anychar  { size_t i;
+                        CLEAR_BUFFER(line_item_buf);
+                        line_item_buf_ptr = line_item_buf;
                         /* The following also takes care of '@@' */
-                        *string_buf_ptr++ = $1[0];
-                        $$ = string_buf;
+                        if (!strncmp($1, "@@", 3))
+                          *line_item_buf_ptr++ = '@';
+                        else
+                          for (i=0; i < strlen($1); i++)
+                            *line_item_buf_ptr++ = $1[i];
+                        $$ = line_item_buf;
                        }
-            | ESCAPE   { CLEAR_BUFFER(string_buf);
-                        string_buf_ptr = string_buf;
+            | ESCAPE   { CLEAR_BUFFER(line_item_buf);
+                        line_item_buf_ptr = line_item_buf;
                         /* For now, ignore escapes */
-                        $$ = string_buf;
+                        $$ = line_item_buf;
                       }
             | line_item anychar
-                  { if (strlen(string_buf) >= MAXGEDCLINELEN) {
-                     gedcom_error("Line too long");
-                     YYERROR;
-                   }
-                   else {
-                     /* The following also takes care of '@@' */
-                     *string_buf_ptr++ = $2[0];
-                     $$ = string_buf;
-                   }
+                  { size_t i;
+                   /* The following also takes care of '@@' */
+                   if (!strncmp($2, "@@", 3))
+                     *line_item_buf_ptr++ = '@';
+                   else
+                     for (i=0; i < strlen($2); i++)
+                       *line_item_buf_ptr++ = $2[i];
+                   $$ = line_item_buf;
                  }
             | line_item ESCAPE
                   { /* For now, ignore escapes */
-                   $$ = string_buf;
+                   $$ = line_item_buf;
                  }
             ;
 
@@ -2320,7 +2339,7 @@ anystdtag   : TAG_ABBR
 /* Functions that handle the counting of subtags */
 
 int* count_arrays[MAXGEDCLEVEL+1];
-char tag_stack[MAXGEDCLEVEL+1][MAXSTDTAGLENGTH+1];
+char tag_stack[MAXGEDCLEVEL+1][MAXSTDTAGLEN+1];
 
 void push_countarray()
 {
@@ -2343,7 +2362,7 @@ void push_countarray()
 
 void set_parenttag(char* tag)
 {
-  strncpy(tag_stack[count_level], tag, MAXSTDTAGLENGTH+1);
+  strncpy(tag_stack[count_level], tag, MAXSTDTAGLEN+1);
 }
 
 char* get_parenttag()
@@ -2436,3 +2455,4 @@ int compat_mode(int compat_flags)
 {
   return (compat_flags & compatibility);
 }
+