X-Git-Url: https://git.dlugolecki.net.pl/?a=blobdiff_plain;f=gedcom%2Fgedcom_lex_common.c;h=1da7da2ee2a8f1eef5487325ba999d1702d8a36f;hb=aa8e5450d4e93e743e848a0ac045e04c94474347;hp=5d734c44689adb6e8fe9ae0b46c9bb15594af1f9;hpb=8093e53a57e174b019f07760f5bf815271ceee9b;p=gedcom-parse.git

diff --git a/gedcom/gedcom_lex_common.c b/gedcom/gedcom_lex_common.c
index 5d734c4..1da7da2 100644
--- a/gedcom/gedcom_lex_common.c
+++ b/gedcom/gedcom_lex_common.c
@@ -1,11 +1,22 @@
-/*  This program is free software; you can redistribute it and/or modify  *
- *  it under the terms of the GNU General Public License as published by  *
- *  the Free Software Foundation; either version 2 of the License, or     *
- *  (at your option) any later version.                                   *
-
- (C) 2001 by The Genes Development Team
- Original author: Peter Verthez (Peter.Verthez@advalvas.be)
-*/
+/* Common lexer code.
+   Copyright (C) 2001, 2002 The Genes Development Team
+   This file is part of the Gedcom parser library.
+   Contributed by Peter Verthez <Peter.Verthez@advalvas.be>, 2001.
+
+   The Gedcom parser library is free software; you can redistribute it
+   and/or modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The Gedcom parser library is distributed in the hope that it will be
+   useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the Gedcom parser library; if not, write to the
+   Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
 
 /* $Id$ */
 /* $Name$ */
@@ -16,9 +27,8 @@
 #include "multilex.h"
 #include "encoding.h"
 #include "gedcom.h"
-#include "gedcom.tab.h"
-
-#define YY_NO_UNPUT
+#include "gedcom.tabgen.h"
+#include "compat.h"
 
 static size_t encoding_width;
 static int current_level = -1;
@@ -32,14 +42,21 @@ static char str_buf[MAXGEDCLINELEN * UTF_FACTOR + 1];
 #ifdef LEXER_TEST 
 YYSTYPE gedcom_lval;
 int line_no = 1;
+int compat_at = 0;
 
 int gedcom_lex();
 
+void message_handler(Gedcom_msg_type type, char *msg)
+{
+  fprintf(stderr, "(%d) %s\n", type, msg);
+}
+
 int test_loop(ENCODING enc, char* code)
 {
   int tok, res;
   init_encodings();
   set_encoding_width(enc);
+  gedcom_set_message_handler(message_handler);
   res = open_conv_to_internal(code);
   if (!res) {
     gedcom_error("Unable to open conversion context: %s",
@@ -56,8 +73,8 @@ int test_loop(ENCODING enc, char* code)
       case DELIM: printf("DELIM "); break;
       case ANYCHAR: printf("%s ", gedcom_lval.string); break;
       case POINTER: printf("POINTER(%s) ", gedcom_lval.string); break;
-      case USERTAG: printf("USERTAG(%s) ", gedcom_lval.string); break;
-      default: printf("TAG(%s) ", gedcom_lval.string); break;
+      case USERTAG: printf("USERTAG(%s) ", gedcom_lval.tag.string); break;
+      default: printf("TAG(%s) ", gedcom_lval.tag.string); break;
     }
     tok = gedcom_lex();
   }
@@ -80,7 +97,7 @@ int test_loop(ENCODING enc, char* code)
   { if (line_len != (size_t)-1) {                                             \
       line_len += strlen(yytext);                                             \
       if (line_len > MAXGEDCLINELEN * encoding_width) {                       \
-        gedcom_error("Line too long, max %d characters",                      \
+        gedcom_error(_("Line too long, max %d characters allowed"),           \
 		     MAXGEDCLINELEN);                                         \
         line_len = (size_t)-1;                                                \
         return BADTOKEN;                                                      \
@@ -90,7 +107,8 @@ int test_loop(ENCODING enc, char* code)
 
 #define MKTAGACTION(THETAG)                                                  \
   { CHECK_LINE_LEN;                                                          \
-    gedcom_lval.string = TO_INTERNAL(yytext, tag_buf);                       \
+    gedcom_lval.tag.string = TO_INTERNAL(yytext, tag_buf);                   \
+    gedcom_lval.tag.value  = TAG_##THETAG;                                   \
     BEGIN(NORMAL);                                                           \
     return TAG_##THETAG;                                                     \
   }
@@ -153,7 +171,7 @@ int test_loop(ENCODING enc, char* code)
 
 
 #define ACTION_0_DIGITS                                                       \
-   { gedcom_error ("Level number with leading zero");                         \
+   { gedcom_error (_("Level number with leading zero not allowed"));          \
      return BADTOKEN;                                                         \
    } 
 
@@ -162,7 +180,7 @@ int test_loop(ENCODING enc, char* code)
    { int level = atoi(TO_INTERNAL(yytext, str_buf));                          \
      CHECK_LINE_LEN;                                                          \
      if ((level < 0) || (level > MAXGEDCLEVEL)) {                             \
-       gedcom_error ("Level number out of range [0..%d]",                     \
+       gedcom_error (_("Level number out of range [0..%d]"),                  \
 		     MAXGEDCLEVEL);                                           \
        return BADTOKEN;                                                       \
      }                                                                        \
@@ -180,8 +198,7 @@ int test_loop(ENCODING enc, char* code)
      }                                                                        \
      else {                                                                   \
        /* should never happen (error to GEDCOM spec) */                       \
-       gedcom_error ("GEDCOM level number is %d higher than "                 \
-		     "previous",                                              \
+       gedcom_error (_("GEDCOM level number is %d higher than previous"),     \
 		     level_diff);                                             \
        return BADTOKEN;                                                       \
      }                                                                        \
@@ -190,12 +207,13 @@ int test_loop(ENCODING enc, char* code)
 
 #define ACTION_ALPHANUM                                                       \
    { if (strlen(yytext) > MAXGEDCTAGLEN * encoding_width) {                   \
-       gedcom_error("Tag '%s' too long, max %d characters",                   \
+       gedcom_error(_("Tag '%s' too long, max %d characters allowed"),        \
 		    yytext, MAXGEDCTAGLEN);                                   \
        return BADTOKEN;                                                       \
      }                                                                        \
      CHECK_LINE_LEN;                                                          \
-     gedcom_lval.string = TO_INTERNAL(yytext, tag_buf);                       \
+     gedcom_lval.tag.string = TO_INTERNAL(yytext, tag_buf);                   \
+     gedcom_lval.tag.value  = USERTAG;                                        \
      BEGIN(NORMAL);                                                           \
      return USERTAG;                                                          \
    }
@@ -209,16 +227,26 @@ int test_loop(ENCODING enc, char* code)
 
 
 #define ACTION_ANY                                                            \
-  { CHECK_LINE_LEN;                                                           \
-    gedcom_lval.string = TO_INTERNAL(yytext, str_buf);                        \
-    /* Due to character conversions, it is possible that the current          \
-       character will be combined with the next, and so now we don't have a   \
-       character yet...                                                       \
-       In principle, this is only applicable to the 1byte case (e.g. ANSEL),  \
-       but it doesn't harm the unicode case.                                  \
-    */                                                                        \
-    if (strlen(gedcom_lval.string) > 0)                                       \
-      return ANYCHAR;                                                         \
+  { char* tmp;                                                                \
+    CHECK_LINE_LEN;                                                           \
+    tmp = TO_INTERNAL(yytext, str_buf);                                       \
+    if (!tmp) {                                                               \
+      /* Something went wrong during conversion... */                         \
+          gedcom_error(_("Invalid character for encoding: '%s' (0x%02x)"),    \
+		 yytext, yytext[0]);                                          \
+          return BADTOKEN;                                                    \
+    }                                                                         \
+    else {                                                                    \
+      gedcom_lval.string = tmp;                                               \
+      /* Due to character conversions, it is possible that the current        \
+         character will be combined with the next, and so now we don't have a \
+         character yet...                                                     \
+         In principle, this is only applicable to the 1byte case (e.g. ANSEL),\
+         but it doesn't harm the unicode case.                                \
+      */                                                                      \
+      if (strlen(gedcom_lval.string) > 0)                                     \
+        return ANYCHAR;                                                       \
+    }                                                                         \
   }
 
 
@@ -232,7 +260,7 @@ int test_loop(ENCODING enc, char* code)
 #define ACTION_POINTER                                                        \
   { CHECK_LINE_LEN;                                                           \
     if (strlen(yytext) > MAXGEDCPTRLEN * encoding_width) {                    \
-      gedcom_error("Pointer '%s' too long, max %d characters",                \
+      gedcom_error(_("Pointer '%s' too long, max %d characters allowed"),     \
 		   yytext, MAXGEDCPTRLEN);                                    \
       return BADTOKEN;                                                        \
     }                                                                         \
@@ -266,17 +294,40 @@ int test_loop(ENCODING enc, char* code)
       return CLOSE;                                                           \
     }                                                                         \
     else {                                                                    \
+      char* ptr; int size;                                                    \
       /* Reset our state */                                                   \
       current_level = -1;                                                     \
       level_diff = MAXGEDCLEVEL;                                              \
       /* ... then terminate lex */                                            \
       yyterminate();                                                          \
+      /* Get rid of f*cking compiler warning from lex generated code */       \
+      /* yyterminate does return(), so program will never come here  */       \
+      yy_flex_realloc(ptr, size);                                             \
     }                                                                         \
   } 
 
+#define ACTION_NORMAL_AT                                                      \
+  { if (compat_at) {                                                          \
+      int i, j;                                                               \
+      char *yycopy = strdup(yytext);                                          \
+      if (yycopy) {                                                           \
+        for (i = 0; i < 2; i++)                                               \
+          for (j = yyleng - 1; j >= 0; --j)                                   \
+            unput(yycopy[j]);                                                 \
+        free(yycopy);                                                         \
+      }                                                                       \
+      else {                                                                  \
+        MEMORY_ERROR;                                                         \
+      }                                                                       \
+    }                                                                         \
+    else {                                                                    \
+      gedcom_error(_("'@' character should be written as '@@' in values"));   \
+      return BADTOKEN;                                                        \
+    }                                                                         \
+  }
 
 #define ACTION_UNEXPECTED                                                     \
-  { gedcom_error("Unexpected character: '%s' (0x%02x)",                       \
+  { gedcom_error(_("Unexpected character: '%s' (0x%02x)"),                    \
 		 yytext, yytext[0]);                                          \
     return BADTOKEN;                                                          \
   }