/* Common lexer code.
- Copyright (C) 2001 The Genes Development Team
+ Copyright (C) 2001, 2002 The Genes Development Team
This file is part of the Gedcom parser library.
Contributed by Peter Verthez <Peter.Verthez@advalvas.be>, 2001.
#include "multilex.h"
#include "encoding.h"
#include "gedcom.h"
-#include "gedcom.tab.h"
-
-#define YY_NO_UNPUT
+#include "gedcom.tabgen.h"
+#include "compat.h"
static size_t encoding_width;
static int current_level = -1;
#ifdef LEXER_TEST
YYSTYPE gedcom_lval;
int line_no = 1;
+int compat_at = 0;
int gedcom_lex();
void message_handler(Gedcom_msg_type type, char *msg)
{
- fprintf(stderr, msg);
+ fprintf(stderr, "(%d) %s\n", type, msg);
}
int test_loop(ENCODING enc, char* code)
case DELIM: printf("DELIM "); break;
case ANYCHAR: printf("%s ", gedcom_lval.string); break;
case POINTER: printf("POINTER(%s) ", gedcom_lval.string); break;
- case USERTAG: printf("USERTAG(%s) ", gedcom_lval.string); break;
- default: printf("TAG(%s) ", gedcom_lval.string); break;
+ case USERTAG: printf("USERTAG(%s) ", gedcom_lval.tag.string); break;
+ default: printf("TAG(%s) ", gedcom_lval.tag.string); break;
}
tok = gedcom_lex();
}
{ if (line_len != (size_t)-1) { \
line_len += strlen(yytext); \
if (line_len > MAXGEDCLINELEN * encoding_width) { \
- gedcom_error(_("Line too long, max %d characters"), \
+ gedcom_error(_("Line too long, max %d characters allowed"), \
MAXGEDCLINELEN); \
line_len = (size_t)-1; \
return BADTOKEN; \
#define MKTAGACTION(THETAG) \
{ CHECK_LINE_LEN; \
- gedcom_lval.string = TO_INTERNAL(yytext, tag_buf); \
+ gedcom_lval.tag.string = TO_INTERNAL(yytext, tag_buf); \
+ gedcom_lval.tag.value = TAG_##THETAG; \
BEGIN(NORMAL); \
return TAG_##THETAG; \
}
#define ACTION_0_DIGITS \
- { gedcom_error (_("Level number with leading zero")); \
+ { gedcom_error (_("Level number with leading zero not allowed")); \
return BADTOKEN; \
}
#define ACTION_ALPHANUM \
{ if (strlen(yytext) > MAXGEDCTAGLEN * encoding_width) { \
- gedcom_error(_("Tag '%s' too long, max %d characters"), \
+ gedcom_error(_("Tag '%s' too long, max %d characters allowed"), \
yytext, MAXGEDCTAGLEN); \
return BADTOKEN; \
} \
CHECK_LINE_LEN; \
- gedcom_lval.string = TO_INTERNAL(yytext, tag_buf); \
+ gedcom_lval.tag.string = TO_INTERNAL(yytext, tag_buf); \
+ gedcom_lval.tag.value = USERTAG; \
BEGIN(NORMAL); \
return USERTAG; \
}
#define ACTION_ANY \
- { CHECK_LINE_LEN; \
- gedcom_lval.string = TO_INTERNAL(yytext, str_buf); \
- /* Due to character conversions, it is possible that the current \
- character will be combined with the next, and so now we don't have a \
- character yet... \
- In principle, this is only applicable to the 1byte case (e.g. ANSEL), \
- but it doesn't harm the unicode case. \
- */ \
- if (strlen(gedcom_lval.string) > 0) \
- return ANYCHAR; \
+ { char* tmp; \
+ CHECK_LINE_LEN; \
+ tmp = TO_INTERNAL(yytext, str_buf); \
+ if (!tmp) { \
+ /* Something went wrong during conversion... */ \
+ gedcom_error(_("Invalid character for encoding: '%s' (0x%02x)"), \
+ yytext, yytext[0]); \
+ return BADTOKEN; \
+ } \
+ else { \
+ gedcom_lval.string = tmp; \
+ /* Due to character conversions, it is possible that the current \
+ character will be combined with the next, and so now we don't have a \
+ character yet... \
+ In principle, this is only applicable to the 1byte case (e.g. ANSEL),\
+ but it doesn't harm the unicode case. \
+ */ \
+ if (strlen(gedcom_lval.string) > 0) \
+ return ANYCHAR; \
+ } \
}
#define ACTION_POINTER \
{ CHECK_LINE_LEN; \
if (strlen(yytext) > MAXGEDCPTRLEN * encoding_width) { \
- gedcom_error(_("Pointer '%s' too long, max %d characters"), \
+ gedcom_error(_("Pointer '%s' too long, max %d characters allowed"), \
yytext, MAXGEDCPTRLEN); \
return BADTOKEN; \
} \
return CLOSE; \
} \
else { \
+ char* ptr; int size; \
/* Reset our state */ \
current_level = -1; \
level_diff = MAXGEDCLEVEL; \
/* ... then terminate lex */ \
yyterminate(); \
+ /* Get rid of f*cking compiler warning from lex generated code */ \
+ /* yyterminate does return(), so program will never come here */ \
+ yy_flex_realloc(ptr, size); \
} \
}
+#define ACTION_NORMAL_AT \
+ { if (compat_at) { \
+ int i, j; \
+ char *yycopy = strdup(yytext); \
+ for (i = 0; i < 2; i++) \
+ for (j = yyleng - 1; j >= 0; --j) \
+ unput(yycopy[j]); \
+ free(yycopy); \
+ } \
+ else { \
+ gedcom_error(_("'@' character should be written as '@@' in values")); \
+ return BADTOKEN; \
+ } \
+ }
#define ACTION_UNEXPECTED \
{ gedcom_error(_("Unexpected character: '%s' (0x%02x)"), \