From 6e1c05d011f3db288b5fc19cb52f494152392cd7 Mon Sep 17 00:00:00 2001 From: Peter Verthez Date: Sat, 1 Dec 2001 15:27:21 +0000 Subject: [PATCH] Put documentation of parser in CVS. --- doc/images/schema.obj | 251 ++++++++++++++++++++++++++++++++++++++++++ doc/images/schema.png | Bin 0 -> 1771 bytes doc/parser.html | 129 ++++++++++++++++++++++ 3 files changed, 380 insertions(+) create mode 100644 doc/images/schema.obj create mode 100644 doc/images/schema.png create mode 100644 doc/parser.html diff --git a/doc/images/schema.obj b/doc/images/schema.obj new file mode 100644 index 0000000..779c19c --- /dev/null +++ b/doc/images/schema.obj @@ -0,0 +1,251 @@ +%TGIF 4.1.39 +state(0,37,100.000,0,0,0,8,1,9,1,1,0,0,1,0,1,0,'Courier',0,57600,0,3,0,10,0,0,1,1,0,16,0,0,1,1,1,1,1088,1408,1,0,2880,0). +% +% @(#)$Header$ +% %W% +% +unit("1 pixel/pixel"). +color_info(11,65535,0,[ + "magenta", 65535, 0, 65535, 65535, 0, 65535, 1, + "red", 65535, 0, 0, 65535, 0, 0, 1, + "green", 0, 65535, 0, 0, 65535, 0, 1, + "blue", 0, 0, 65535, 0, 0, 65535, 1, + "yellow", 65535, 65535, 0, 65535, 65535, 0, 1, + "pink", 65535, 49344, 52171, 65535, 49344, 52171, 1, + "cyan", 0, 65535, 65535, 0, 65535, 65535, 1, + "CadetBlue", 24415, 40606, 41120, 24415, 40606, 41120, 1, + "white", 65535, 65535, 65535, 65535, 65535, 65535, 1, + "black", 0, 0, 0, 0, 0, 0, 1, + "DarkSlateGray", 12079, 20303, 20303, 12079, 20303, 20303, 1 +]). +script_frac("0.6"). +fg_bg_colors('black','white'). +page(1,"",1,''). +text('black',16,159,1,0,1,54,11,0,9,2,0,0,0,0,2,54,11,0,0,"",0,0,0,0,168,'',[ +minilines(54,11,0,0,0,0,0,[ +mini_line(54,9,2,0,0,0,[ +str_block(0,54,9,2,0,-1,0,0,0,[ +str_seg('black','Courier',0,57600,54,9,2,0,-1,0,0,0,0,0, + "inputfile")]) +]) +])]). +box('black','',112,152,176,176,0,1,1,4,0,0,0,0,0,'1',0,[ +]). +text('black',120,159,1,0,1,48,11,7,9,2,0,0,0,0,2,48,11,0,0,"",0,0,0,0,168,'',[ +minilines(48,11,0,0,0,0,0,[ +mini_line(48,9,2,0,0,0,[ +str_block(0,48,9,2,0,-1,0,0,0,[ +str_seg('black','Courier',0,57600,48,9,2,0,-1,0,0,0,0,0, + "multilex")]) +]) +])]). +poly('black','',2,[ + 72,168,112,168],1,1,1,11,0,0,0,0,0,0,0,'1',0,0, + "0","",[ + 0,8,3,0,'8','3','0'],[0,8,3,0,'8','3','0'],[ +]). +box('black','',224,192,296,216,0,1,1,28,0,0,0,0,0,'1',0,[ +]). +text('black',232,199,1,0,1,48,11,29,9,2,0,0,0,0,2,48,11,0,0,"",0,0,0,0,208,'',[ +minilines(48,11,0,0,0,0,0,[ +mini_line(48,9,2,0,0,0,[ +str_block(0,48,9,2,0,0,0,0,0,[ +str_seg('black','Courier',0,57600,48,9,2,0,0,0,0,0,0,0, + "lex_lohi")]) +]) +])]). +poly('black','',2,[ + 176,152,224,128],1,1,1,34,0,0,0,0,0,0,0,'1',0,0, + "0","",[ + 0,8,3,0,'8','3','0'],[0,8,3,0,'8','3','0'],[ +]). +poly('black','',2,[ + 176,160,224,160],1,1,1,37,0,0,0,0,0,0,0,'1',0,0, + "0","",[ + 0,8,3,0,'8','3','0'],[0,8,3,0,'8','3','0'],[ +]). +poly('black','',2,[ + 176,176,224,208],1,1,1,40,0,0,0,0,0,0,0,'1',0,0, + "0","",[ + 0,8,3,0,'8','3','0'],[0,8,3,0,'8','3','0'],[ +]). +box('black','',344,152,408,176,0,1,1,43,0,0,0,0,0,'1',0,[ +]). +text('black',360,159,1,0,1,36,11,44,9,2,0,0,0,0,2,36,11,0,0,"",0,0,0,0,168,'',[ +minilines(36,11,0,0,0,0,0,[ +mini_line(36,9,2,0,0,0,[ +str_block(0,36,9,2,0,0,0,0,0,[ +str_seg('black','Courier',0,57600,36,9,2,0,0,0,0,0,0,0, + "parser")]) +]) +])]). +poly('black','',2,[ + 296,120,344,152],1,1,1,48,0,0,0,0,0,0,0,'1',0,0, + "0","",[ + 0,8,3,0,'8','3','0'],[0,8,3,0,'8','3','0'],[ +]). +poly('black','',2,[ + 296,160,344,160],1,1,1,49,0,0,0,0,0,0,0,'1',0,0, + "0","",[ + 0,8,3,0,'8','3','0'],[0,8,3,0,'8','3','0'],[ +]). +poly('black','',2,[ + 296,200,344,176],1,1,1,50,0,0,0,0,0,0,0,'1',0,0, + "0","",[ + 0,8,3,0,'8','3','0'],[0,8,3,0,'8','3','0'],[ +]). +box('black','',224,56,296,80,0,1,1,51,0,0,0,0,0,'1',0,[ +]). +text('black',240,63,1,0,1,48,11,52,9,2,0,0,0,0,2,48,11,0,0,"",0,0,0,0,72,'',[ +minilines(48,11,0,0,0,0,0,[ +mini_line(48,9,2,0,0,0,[ +str_block(0,48,9,2,0,0,0,0,0,[ +str_seg('black','Courier',0,57600,48,9,2,0,0,0,0,0,0,0, + "encoding")]) +]) +])]). +poly('black','',3,[ + 144,152,144,64,224,64],1,1,1,58,0,0,5,0,0,0,0,'1',0,0, + "0","",[ + 0,8,3,0,'8','3','0'],[0,8,3,0,'8','3','0'],[ +]). +poly('black','',3,[ + 376,152,376,64,296,64],1,1,1,59,0,0,5,0,0,0,0,'1',0,0, + "0","",[ + 0,8,3,0,'8','3','0'],[0,8,3,0,'8','3','0'],[ +]). +poly('black','',2,[ + 248,80,248,112],3,1,1,60,0,0,8,0,0,0,0,'1',0,0, + "0","",[ + 0,8,3,0,'8','3','0'],[0,8,3,0,'8','3','0'],[ +]). +poly('black','',2,[ + 256,80,256,152],3,1,1,61,0,0,8,0,0,0,0,'1',0,0, + "0","",[ + 0,8,3,0,'8','3','0'],[0,8,3,0,'8','3','0'],[ +]). +poly('black','',2,[ + 264,80,264,192],3,1,1,62,0,0,8,0,0,0,0,'1',0,0, + "0","",[ + 0,8,3,0,'8','3','0'],[0,8,3,0,'8','3','0'],[ +]). +box('black','',224,112,296,136,2,1,1,20,0,0,0,0,0,'1',0,[ +]). +text('black',232,119,1,0,1,54,11,21,9,2,0,0,0,0,2,54,11,0,0,"",0,0,0,0,128,'',[ +minilines(54,11,0,0,0,0,0,[ +mini_line(54,9,2,0,0,0,[ +str_block(0,54,9,2,0,-1,0,0,0,[ +str_seg('black','Courier',0,57600,54,9,2,0,-1,0,0,0,0,0, + "lex_1byte")]) +]) +])]). +box('black','',224,152,296,176,2,1,1,26,0,0,0,0,0,'1',0,[ +]). +text('black',232,159,1,0,1,48,11,27,9,2,0,0,0,0,2,48,11,0,0,"",0,0,0,0,168,'',[ +minilines(48,11,0,0,0,0,0,[ +mini_line(48,9,2,0,0,0,[ +str_block(0,48,9,2,0,-1,0,0,0,[ +str_seg('black','Courier',0,57600,48,9,2,0,-1,0,0,0,0,0, + "lex_hilo")]) +]) +])]). +text('black',144,55,1,0,1,42,11,75,9,2,2,0,0,0,2,42,11,0,0,"",0,0,0,0,64,'',[ +minilines(42,11,0,0,0,0,0,[ +mini_line(42,9,2,0,0,0,[ +str_block(0,42,9,2,0,0,0,0,0,[ +str_seg('black','Courier',0,57600,42,9,2,0,0,0,0,0,0,0, + "initial")]) +]) +])]). +text('black',336,55,1,0,1,36,11,79,9,2,2,0,0,0,2,36,11,0,0,"",0,0,0,0,64,'',[ +minilines(36,11,0,0,0,0,0,[ +mini_line(36,9,2,0,0,0,[ +str_block(0,36,9,2,0,0,0,0,0,[ +str_seg('black','Courier',0,57600,36,9,2,0,0,0,0,0,0,0, + "adjust")]) +]) +])]). +box('black','',472,152,536,176,0,1,1,83,0,0,0,0,0,'1',0,[ +]). +text('black',480,159,1,0,1,48,11,84,9,2,0,0,0,0,2,48,11,0,0,"",0,0,0,0,168,'',[ +minilines(48,11,0,0,0,0,0,[ +mini_line(48,9,2,0,0,0,[ +str_block(0,48,9,2,0,0,0,0,0,[ +str_seg('black','Courier',0,57600,48,9,2,0,0,0,0,0,0,0, + "app_data")]) +]) +])]). +poly('black','',2,[ + 408,160,472,160],1,1,1,86,0,2,0,0,0,0,0,'1',0,0, + "0","",[ + 0,8,3,0,'8','3','0'],[0,8,3,0,'8','3','0'],[ +]). +text('black',424,167,1,0,1,30,11,87,9,2,2,0,0,0,2,30,11,0,0,"",0,0,0,0,176,'',[ +minilines(30,11,0,0,0,0,0,[ +mini_line(30,9,2,0,0,0,[ +str_block(0,30,9,2,0,-1,0,0,0,[ +str_seg('black','Courier',0,57600,30,9,2,0,-1,0,0,0,0,0, + "utf-8")]) +]) +])]). +box('black','',584,152,648,176,0,1,1,89,0,0,0,0,0,'1',0,[ +]). +text('black',592,159,1,0,1,42,11,90,9,2,0,0,0,0,2,42,11,0,0,"",0,0,0,0,168,'',[ +minilines(42,11,0,0,0,0,0,[ +mini_line(42,9,2,0,0,0,[ +str_block(0,42,9,2,0,0,0,0,0,[ +str_seg('black','Courier',0,57600,42,9,2,0,0,0,0,0,0,0, + "app_gui")]) +]) +])]). +box('black','',472,80,536,104,0,1,1,94,0,0,0,0,0,'1',0,[ +]). +text('black',480,87,1,0,1,48,11,95,9,2,0,0,0,0,2,48,11,0,0,"",0,0,0,0,96,'',[ +minilines(48,11,0,0,0,0,0,[ +mini_line(48,9,2,0,0,0,[ +str_block(0,48,9,2,0,0,0,0,0,[ +str_seg('black','Courier',0,57600,48,9,2,0,0,0,0,0,0,0, + "encoding")]) +]) +])]). +text('black',472,23,1,0,1,78,11,117,9,2,2,0,0,0,2,78,11,0,0,"",0,0,0,0,32,'',[ +minilines(78,11,0,0,0,0,0,[ +mini_line(78,9,2,0,0,0,[ +str_block(0,78,9,2,0,-3,0,0,0,[ +str_seg('black','Courier',0,57600,78,9,2,0,-3,0,0,0,0,0, + "LANG env.var.")]) +]) +])]). +poly('black','',2,[ + 504,40,504,80],1,1,1,119,0,2,5,0,0,0,0,'1',0,0, + "0","",[ + 0,8,3,0,'8','3','0'],[0,8,3,0,'8','3','0'],[ +]). +poly('black','',2,[ + 504,104,504,152],3,1,1,127,0,0,8,0,0,0,0,'1',0,0, + "0","",[ + 0,8,3,0,'8','3','0'],[0,8,3,0,'8','3','0'],[ +]). +poly('black','',2,[ + 536,160,584,160],1,1,1,129,0,2,0,0,0,0,0,'1',0,0, + "0","",[ + 0,8,3,0,'8','3','0'],[0,8,3,0,'8','3','0'],[ +]). +text('black',544,167,1,0,1,36,11,130,9,2,2,0,0,0,2,36,11,0,0,"",0,0,0,0,176,'',[ +minilines(36,11,0,0,0,0,0,[ +mini_line(36,9,2,0,0,0,[ +str_block(0,36,9,2,0,-1,0,0,0,[ +str_seg('black','Courier',0,57600,36,9,2,0,-1,0,0,0,0,0, + "locale")]) +]) +])]). +box('black','',88,40,416,224,0,1,1,133,3,0,0,0,0,'1',0,[ +]). +text('black',328,23,1,0,1,78,11,141,9,2,0,0,0,0,2,78,11,0,0,"",0,0,0,0,32,'',[ +minilines(78,11,0,0,0,0,0,[ +mini_line(78,9,2,0,0,0,[ +str_block(0,78,9,2,0,0,0,0,0,[ +str_seg('black','Courier',0,57600,78,9,2,0,0,0,0,0,0,0, + "gedcom parser")]) +]) +])]). diff --git a/doc/images/schema.png b/doc/images/schema.png new file mode 100644 index 0000000000000000000000000000000000000000..cff1475467c2bd62a5861cdd0b6cbebfd80eae34 GIT binary patch literal 1771 zcmV;P)h8?^^LaeG-kn*y zV=&*eGWHmOg}gz)j$+a_oJFiXtf=b#qm*B#=kIsEyYJqlf>9=n;`*$~c z7<{(@iw$%7j$RM#lMN5-EWby_ONsLt+gBL z-6J!rsO00fZ^2Kl_wK*hqvMppzWFyz@35w~-28C1|i&7yL6m1c7;jB9}ndoq*Wkx$Ag25y~2KJyHxVCt~ zsY^iT)3cppBY{z+Tbgxy60l#IYK!d^!I}i@l%^bAaJuA!HQapJ&)TJ_enSUbnK;p$ zH@uyT82ru$yI0DmTRLEnoVmFR|BU_0*~{`q{lJ9bqU6jIc1 z$O|MpIHm)x$Ad#HPs|0>oaF4&>z*~AC;A4PWG~MDI zn|iV&0;7Sapa)tI6!HQqeh<$;(BfkFHmYFrQsJjlA9jMntq*e1Lw{#GMNtE*Ucn!x zZ-D_*p$j$vsVT&OqMZP)QeZs;h-1bn(4k{QMF|{~D6pRcNX^CpexS>Pq8$%f%;n3} z0J&r^_vJcOBPuH3z+|!xwXX-RM~$P_8*)&zV?m1r>CcKP2$+q*#)V%hWw?BCLx_%w zKDdqw5z&%@;tRxq77uP{gJCkl4OysAmqZen{2ZGG(GZd;YIIcCK=m0c8bWWOf{GfF zKy?rep|?;$d#kcQLjeR#AZhUcS89OdEF>+OS-^Eth+9`>AhnwaTBHy+3BH0G34~;2 zVlcUAor2fLfDLAA#~2VWTL7h5)M-SBehk*2OW60K;VGny?2lsFmw(+od zuhoX8Foxkykna!0Fa$)|JFFT-(ZGYcr2|4QeFsvtH0#c4bXDEWUB zgHk52C6tmWqZa50rDV#e4AKc76}%ELvVnNM+Je@5bA+-v}1%DY6!Is z3OOnUvq?{JAbL;}Ny74ZWRUyi9X*74AbL;}m0D0W#e#L_dT7v9Y@jMgU6LVShfojP zph0#B^}qoQvO}l`dY4238aaU8QNaa2nt6aNQt&-nqX}-1f;l~qOWUMjJr{6j1{1&S z9YN3pVI!FM?TCUdxEg$)a;>Zl@^KHs?^Euk2;&N^1_m0}Ki&>+c+t)8hjs#wn*K8?4TZsr{LW;Q#T*30U& + + + + + Gedcom parser in Genes + + + +
+

Gedcom parser in Genes

+ +
The intention of this page is to provide some explanation + of the gedcom parser, to aid development on and with it.  Currently, + the parser is in a state that it works, but some parts are still missing, + notably the interface towards applications.  First, some practical issues + of testing with the parser will be explained.
+
+ +

Basic testing
+

+ The parser is located in the "gedcom" subdirectory of the Genes source +code.  You should be able to perform a basic test using the commands:
+ +
make clean
+ make
+ make test

+
+ If everything goes OK, you'll see that some gedcom files are parsed, and + that each parse is successful.  Note that the used gedcom files are +made by Heiner Eichmann + and are an excellent way to test gedcom parsers thoroughly.
+
+ +

Preparing for further testing

+ The basic testing described above doesn't show anything else than "Parse + succeeded", which is nice, but not very interesting.  Some more detailed + tests are possible, via the gedcom-parse program that is generated + by make test.  
+
+ However, since the output that gedcom-parse generates is +in UTF-8 format (more on this later), some preparation is necessary to have +a full view on it.  Basically, you need a terminal that understands and +can display UTF-8 encoded characters, and you need to proper fonts installed + to display them.  I'll give some advice on this here, based on the Red + Hat 7.1 distribution that I use, with glibc 2.2 and XFree86 4.0.x.  Any + other distribution that has the same or newer versions for these components + should give the same results.
+
+ For the first issue, the UTF-8 capable terminal, the safest bet is to +use xterm in its unicode mode (which is supported by the + xterm coming with XFree86 4.0.x).  UTF-8 capabilities +have only recently been added to gnome-terminal, so probably +that is not in your distribution yet (it certainly isn't in Red Hat 7.1).
+
+ For the second issue, you'll need the ISO 10646-1 fonts.  These come + also with XFree86 4.0.x.
+
+ The way to start xterm in unicode mode is then e.g. (put +everything on 1 line !):
+ +
LANG=en_GB.UTF-8 xterm -bg 'black' -fg 'DarkGrey' -cm + -fn '-Misc-Fixed-Medium-R-SemiCondensed--13-120-75-75-C-60-ISO10646-1'
+
+ This first sets the LANG variable to a locale that uses + UTF-8, and then starts xterm with a proper Unicode font.  Some + sample UTF-8 plain text files can be found + here + .  Just cat them on the command line and see the result.
+
+ +

Testing the parser with debugging

+ Given the UTF-8 capable terminal, you can now let the gedcom-parse + program print the values that it parses.  An example of a command +line is (in the gedcom directory):
+ +
./gedcom_parse -dg t/ulhc.ged
+
+ The -dg option instructs the parser to show its own debug +messages  (see ./gedcom_parse -h for the full set of options). + If everything is OK, you'll see the values from the gedcom file, containing +a lot of special characters.
+
+ For the ANSEL test file (t/ansel.ged), you have to set the +environment variable GCONV_PATH to the ansel subdirectory +of the gedcom directory:
+ +
export GCONV_PATH=./ansel
+ ./gedcom_parse -dg t/ansel.ged
+
+ This is because for the ANSEL character set an extra module is needed for + the iconv library (more on this later).  But again, this should show + a lot of special characters.
+
+ +

Testing the lexers separately

+ The lexers themselves can be tested separately.  For the 1-byte lexer + (i.e. supporting the encodings with 1 byte per characters, such as ASCII, + ANSI and ANSEL), the sequence of commands would be:
+ +
make clean
+ make test_1byte
+ cat t/allged.ged | ./test_1byte

+
+ This will show all tokens in the t/allged.ged test file.  With + the lexers you have to make sure that you use the proper lexer for each +test file.  The test_1byte test program is OK for +allged.ged and ansel.ged (the last one again with the +environment variable set); for the uhl*.ged files you need +the test_hilo test program; for the ulh*.ged +files you need the test_lohi program.
+
+ This concludes the testing setup.  Now for some explanations...
+
+ +

Structure of the parser

+ I see the structure of a program using the gedcom parser as follows:
+
+ Gedcom parsing scheme +
+
+
+ TO BE COMPLETED...
+
+
+
+ + + -- 2.30.2