/* $Id$ */
/* $Name$ */
-char string_buf[MAXGEDCLINELEN+1];
+#ifndef IN_LEX
+
+#include "gedcom_internal.h"
+#include "multilex.h"
+#include "encoding.h"
+#include "gedcom.h"
+#include "gedcom.tab.h"
+
+#define YY_NO_UNPUT
+
+static size_t encoding_width;
+static int current_level = -1;
+static int level_diff=MAXGEDCLEVEL;
+static size_t line_len = 0;
+
+static char ptr_buf[MAXGEDCPTRLEN * UTF_FACTOR + 1];
+static char tag_buf[MAXGEDCTAGLEN * UTF_FACTOR + 1];
+static char str_buf[MAXGEDCLINELEN * UTF_FACTOR + 1];
+
+#ifdef LEXER_TEST
+YYSTYPE gedcom_lval;
+int line_no = 1;
+
+int gedcom_lex();
+
+int test_loop(ENCODING enc, char* code)
+{
+ int tok, res;
+ init_encodings();
+ set_encoding_width(enc);
+ res = open_conv_to_internal(code);
+ if (!res) {
+ gedcom_error("Unable to open conversion context: %s",
+ strerror(errno));
+ return 1;
+ }
+ tok = gedcom_lex();
+ while (tok) {
+ switch(tok) {
+ case BADTOKEN: printf("BADTOKEN "); break;
+ case OPEN: printf("OPEN(%d) ", gedcom_lval.number); break;
+ case CLOSE: printf("CLOSE "); break;
+ case ESCAPE: printf("ESCAPE(%s) ", gedcom_lval.string); break;
+ case DELIM: printf("DELIM "); break;
+ case ANYCHAR: printf("%s ", gedcom_lval.string); break;
+ case POINTER: printf("POINTER(%s) ", gedcom_lval.string); break;
+ case USERTAG: printf("USERTAG(%s) ", gedcom_lval.string); break;
+ default: printf("TAG(%s) ", gedcom_lval.string); break;
+ }
+ tok = gedcom_lex();
+ }
+ printf("\n");
+ close_conv_to_internal();
+ return 0;
+}
-#define TO_INTERNAL(str) to_internal(str, yyleng)
+#endif /* of #ifdef LEXER_TEST */
+
+#else /* of #ifndef IN_LEX */
+
+#define TO_INTERNAL(STR,OUTBUF) \
+ to_internal(STR, yyleng, OUTBUF, sizeof(OUTBUF))
-#define MKTAGACTION(the_tag) \
- { gedcom_lval.tag = TO_INTERNAL(yytext); \
- BEGIN(NORMAL); \
- return TAG_##the_tag; }
+#define INIT_LINE_LEN \
+ line_len = 0;
+#define CHECK_LINE_LEN \
+ { if (line_len != (size_t)-1) { \
+ line_len += strlen(yytext); \
+ if (line_len > MAXGEDCLINELEN * encoding_width) { \
+ gedcom_error("Line too long, max %d characters", \
+ MAXGEDCLINELEN); \
+ line_len = (size_t)-1; \
+ return BADTOKEN; \
+ } \
+ } \
+ }
+
+#define MKTAGACTION(THETAG) \
+ { CHECK_LINE_LEN; \
+ gedcom_lval.string = TO_INTERNAL(yytext, tag_buf); \
+ BEGIN(NORMAL); \
+ return TAG_##THETAG; \
+ }
/* The GEDCOM level number is converted into a sequence of opening
and closing brackets. Simply put, the following GEDCOM fragment:
} \
else if (level_diff == 1) { \
level_diff++; \
- gedcom_lval.level = current_level; \
+ gedcom_lval.number = current_level; \
return OPEN; \
} \
else { \
/* out of brackets... */ \
} \
- }
+ }
+
+
+#define ACTION_INITIAL_WHITESPACE \
+ { CHECK_LINE_LEN; \
+ /* ignore initial whitespace further */ \
+ }
#define ACTION_0_DIGITS \
#define ACTION_DIGITS \
- { int level = atoi(TO_INTERNAL(yytext)); \
+ { int level = atoi(TO_INTERNAL(yytext, str_buf)); \
+ CHECK_LINE_LEN; \
if ((level < 0) || (level > MAXGEDCLEVEL)) { \
gedcom_error ("Level number out of range [0..%d]", \
MAXGEDCLEVEL); \
} \
else if (level_diff == 1) { \
level_diff++; \
- gedcom_lval.level = current_level; \
+ gedcom_lval.number = current_level; \
return OPEN; \
} \
else { \
#define ACTION_ALPHANUM \
- { if (strlen(yytext) > MAXGEDCTAGLEN) { \
- gedcom_error("Tag '%s' too long, max %d chars"); \
+ { if (strlen(yytext) > MAXGEDCTAGLEN * encoding_width) { \
+ gedcom_error("Tag '%s' too long, max %d characters", \
+ yytext, MAXGEDCTAGLEN); \
return BADTOKEN; \
} \
- strncpy(string_buf, yytext, MAXGEDCTAGLEN+1); \
- gedcom_lval.tag = TO_INTERNAL(string_buf); \
+ CHECK_LINE_LEN; \
+ gedcom_lval.string = TO_INTERNAL(yytext, tag_buf); \
BEGIN(NORMAL); \
return USERTAG; \
}
#define ACTION_DELIM \
- { gedcom_lval.string = TO_INTERNAL(yytext); \
+ { CHECK_LINE_LEN; \
+ gedcom_lval.string = TO_INTERNAL(yytext, str_buf); \
return DELIM; \
}
#define ACTION_ANY \
- { gedcom_lval.string = TO_INTERNAL(yytext); \
- /* Due to character conversions, it is possible \
- that the current character will be combined with \
- the next, and so now we don't have a character yet... \
+ { CHECK_LINE_LEN; \
+ gedcom_lval.string = TO_INTERNAL(yytext, str_buf); \
+ /* Due to character conversions, it is possible that the current \
+ character will be combined with the next, and so now we don't have a \
+ character yet... \
In principle, this is only applicable to the 1byte case (e.g. ANSEL), \
but it doesn't harm the unicode case. \
*/ \
#define ACTION_ESCAPE \
- { gedcom_lval.string = TO_INTERNAL(yytext); \
+ { CHECK_LINE_LEN; \
+ gedcom_lval.string = TO_INTERNAL(yytext, str_buf); \
return ESCAPE; \
}
#define ACTION_POINTER \
- { gedcom_lval.pointer = TO_INTERNAL(yytext); \
+ { CHECK_LINE_LEN; \
+ if (strlen(yytext) > MAXGEDCPTRLEN * encoding_width) { \
+ gedcom_error("Pointer '%s' too long, max %d characters", \
+ yytext, MAXGEDCPTRLEN); \
+ return BADTOKEN; \
+ } \
+ gedcom_lval.string = TO_INTERNAL(yytext, ptr_buf); \
return POINTER; \
}
*/
#define ACTION_TERMINATOR \
- { line_no++; \
+ { CHECK_LINE_LEN; \
+ INIT_LINE_LEN; \
+ line_no++; \
BEGIN(INITIAL); \
}
return CLOSE; \
} \
else { \
+ /* Reset our state */ \
+ current_level = -1; \
+ level_diff = MAXGEDCLEVEL; \
+ /* ... then terminate lex */ \
yyterminate(); \
} \
}
yytext, yytext[0]); \
return BADTOKEN; \
}
+
+#endif /* IN_LEX */