/* $Id$ */
/* $Name$ */
-#ifndef IN_LEX
+#if LEX_SECTION == 1
#include "gedcom_internal.h"
#include "multilex.h"
#include "encoding.h"
#include "gedcom.h"
-#include "gedcom.tab.h"
-
-#define YY_NO_UNPUT
+#include "gedcom.tabgen.h"
+#include "compat.h"
static size_t encoding_width;
static int current_level = -1;
#ifdef LEXER_TEST
YYSTYPE gedcom_lval;
int line_no = 1;
+int compat_at = 0;
int gedcom_lex();
void message_handler(Gedcom_msg_type type, char *msg)
{
- fprintf(stderr, msg);
+ fprintf(stderr, "(%d) %s\n", type, msg);
}
-int test_loop(ENCODING enc, char* code)
+int test_loop(ENCODING enc, const char* code)
{
int tok, res;
init_encodings();
#endif /* of #ifdef LEXER_TEST */
-#else /* of #ifndef IN_LEX */
+/* These are defined as functions here, because xgettext has trouble
+ extracting the strings out of long pre-processor defined */
+
+static void error_line_too_long()
+{
+ gedcom_error(_("Line too long, max %d characters allowed"), MAXGEDCLINELEN);
+}
+
+static void error_level_leading_zero()
+{
+ gedcom_error (_("Level number with leading zero not allowed"));
+}
+
+static void error_level_out_of_range()
+{
+ gedcom_error (_("Level number out of range [0..%d]"), MAXGEDCLEVEL);
+}
+
+static void error_level_too_high(int level_diff)
+{
+ gedcom_error (_("GEDCOM level number is %d higher than previous"),
+ level_diff);
+}
+
+static void error_tag_too_long(const char *tag)
+{
+ gedcom_error(_("Tag '%s' too long, max %d characters allowed"),
+ tag, MAXGEDCTAGLEN);
+}
+
+static void error_invalid_character(const char *str, char ch)
+{
+ gedcom_error(_("Invalid character for encoding: '%s' (0x%02x)"), str, ch);
+}
+
+static void error_pointer_too_long(const char *ptr)
+{
+ gedcom_error(_("Pointer '%s' too long, max %d characters allowed"),
+ ptr, MAXGEDCPTRLEN);
+}
+
+static void error_at_character()
+{
+ gedcom_error(_("'@' character should be written as '@@' in values"));
+}
+
+static void error_unexpected_character(const char* str, char ch)
+{
+ gedcom_error(_("Unexpected character: '%s' (0x%02x)"), str, ch);
+}
+
+/* This is to bypass the iconv conversion (if the input is UTF-8 coming
+ from the program) */
+static int dummy_conv = 0;
+
+#elif LEX_SECTION == 2
#define TO_INTERNAL(STR,OUTBUF) \
- to_internal(STR, yyleng, OUTBUF, sizeof(OUTBUF))
+ (dummy_conv ? STR : to_internal(STR, yyleng, OUTBUF, sizeof(OUTBUF)))
#define INIT_LINE_LEN \
line_len = 0;
{ if (line_len != (size_t)-1) { \
line_len += strlen(yytext); \
if (line_len > MAXGEDCLINELEN * encoding_width) { \
- gedcom_error(_("Line too long, max %d characters allowed"), \
- MAXGEDCLINELEN); \
+ error_line_too_long(); \
line_len = (size_t)-1; \
return BADTOKEN; \
} \
#define ACTION_0_DIGITS \
- { gedcom_error (_("Level number with leading zero not allowed")); \
+ { error_level_leading_zero(); \
return BADTOKEN; \
}
{ int level = atoi(TO_INTERNAL(yytext, str_buf)); \
CHECK_LINE_LEN; \
if ((level < 0) || (level > MAXGEDCLEVEL)) { \
- gedcom_error (_("Level number out of range [0..%d]"), \
- MAXGEDCLEVEL); \
+ error_level_out_of_range(); \
return BADTOKEN; \
} \
level_diff = level - current_level; \
} \
else { \
/* should never happen (error to GEDCOM spec) */ \
- gedcom_error (_("GEDCOM level number is %d higher than previous"), \
- level_diff); \
+ error_level_too_high(level_diff); \
return BADTOKEN; \
} \
}
#define ACTION_ALPHANUM \
{ if (strlen(yytext) > MAXGEDCTAGLEN * encoding_width) { \
- gedcom_error(_("Tag '%s' too long, max %d characters allowed"), \
- yytext, MAXGEDCTAGLEN); \
+ error_tag_too_long(yytext); \
return BADTOKEN; \
} \
CHECK_LINE_LEN; \
#define ACTION_ANY \
- { CHECK_LINE_LEN; \
- gedcom_lval.string = TO_INTERNAL(yytext, str_buf); \
- /* Due to character conversions, it is possible that the current \
- character will be combined with the next, and so now we don't have a \
- character yet... \
- In principle, this is only applicable to the 1byte case (e.g. ANSEL), \
- but it doesn't harm the unicode case. \
- */ \
- if (strlen(gedcom_lval.string) > 0) \
- return ANYCHAR; \
+ { char* tmp; \
+ CHECK_LINE_LEN; \
+ tmp = TO_INTERNAL(yytext, str_buf); \
+ if (!tmp) { \
+ /* Something went wrong during conversion... */ \
+ error_invalid_character(yytext, yytext[0]); \
+ return BADTOKEN; \
+ } \
+ else { \
+ gedcom_lval.string = tmp; \
+ /* Due to character conversions, it is possible that the current \
+ character will be combined with the next, and so now we don't have a \
+ character yet... \
+ In principle, this is only applicable to the 1byte case (e.g. ANSEL),\
+ but it doesn't harm the unicode case. \
+ */ \
+ if (strlen(gedcom_lval.string) > 0) \
+ return ANYCHAR; \
+ } \
}
#define ACTION_POINTER \
{ CHECK_LINE_LEN; \
if (strlen(yytext) > MAXGEDCPTRLEN * encoding_width) { \
- gedcom_error(_("Pointer '%s' too long, max %d characters allowed"), \
- yytext, MAXGEDCPTRLEN); \
+ error_pointer_too_long(yytext); \
return BADTOKEN; \
} \
gedcom_lval.string = TO_INTERNAL(yytext, ptr_buf); \
return CLOSE; \
} \
else { \
- /* Reset our state */ \
- current_level = -1; \
- level_diff = MAXGEDCLEVEL; \
- /* ... then terminate lex */ \
+ char* ptr; int size; \
+ /* ... terminate lex */ \
yyterminate(); \
+ /* Get rid of f*cking compiler warning from lex generated code */ \
+ /* yyterminate does return(), so program will never come here */ \
+ yy_flex_realloc(ptr, size); \
} \
}
+#define ACTION_NORMAL_AT \
+ { if (compat_at) { \
+ int i, j; \
+ char *yycopy = strdup(yytext); \
+ if (yycopy) { \
+ for (i = 0; i < 2; i++) \
+ for (j = yyleng - 1; j >= 0; --j) \
+ unput(yycopy[j]); \
+ free(yycopy); \
+ } \
+ else { \
+ MEMORY_ERROR; \
+ } \
+ } \
+ else { \
+ error_at_character(); \
+ return BADTOKEN; \
+ } \
+ }
#define ACTION_UNEXPECTED \
- { gedcom_error(_("Unexpected character: '%s' (0x%02x)"), \
- yytext, yytext[0]); \
+ { error_unexpected_character(yytext, yytext[0]); \
return BADTOKEN; \
}
-#endif /* IN_LEX */
+#elif LEX_SECTION == 3
+
+int yywrap()
+{
+ return 1;
+}
+
+static void yylex_cleanup()
+{
+ /* fix memory leak in lex */
+ yy_delete_buffer(yy_current_buffer);
+ yy_current_buffer = NULL;
+}
+
+static int exitfuncregistered = 0;
+
+void yymyinit(FILE *f)
+{
+ if (! exitfuncregistered && atexit(yylex_cleanup) == 0)
+ exitfuncregistered = 1;
+ yyin = f;
+ yyrestart(f);
+ /* Reset our state */
+ current_level = -1;
+ level_diff = MAXGEDCLEVEL;
+ BEGIN(INITIAL);
+}
+
+#endif