diff -rcN ../gawk-3.1.4/Makefile.in ../gawk-3.1.4.xml/Makefile.in *** ../gawk-3.1.4/Makefile.in 2004-08-02 11:16:30.000000000 +0200 --- ../gawk-3.1.4.xml/Makefile.in 2004-08-11 19:05:12.000000000 +0200 *************** *** 101,107 **** am__objects_1 = array$U.$(OBJEXT) awkgram$U.$(OBJEXT) \ builtin$U.$(OBJEXT) dfa$U.$(OBJEXT) ext$U.$(OBJEXT) \ field$U.$(OBJEXT) gawkmisc$U.$(OBJEXT) getopt$U.$(OBJEXT) \ ! getopt1$U.$(OBJEXT) io$U.$(OBJEXT) main$U.$(OBJEXT) \ msg$U.$(OBJEXT) node$U.$(OBJEXT) random$U.$(OBJEXT) \ re$U.$(OBJEXT) regex$U.$(OBJEXT) replace$U.$(OBJEXT) \ version$U.$(OBJEXT) --- 101,107 ---- am__objects_1 = array$U.$(OBJEXT) awkgram$U.$(OBJEXT) \ builtin$U.$(OBJEXT) dfa$U.$(OBJEXT) ext$U.$(OBJEXT) \ field$U.$(OBJEXT) gawkmisc$U.$(OBJEXT) getopt$U.$(OBJEXT) \ ! getopt1$U.$(OBJEXT) io$U.$(OBJEXT) xml_puller$U.$(OBJEXT) main$U.$(OBJEXT) \ msg$U.$(OBJEXT) node$U.$(OBJEXT) random$U.$(OBJEXT) \ re$U.$(OBJEXT) regex$U.$(OBJEXT) replace$U.$(OBJEXT) \ version$U.$(OBJEXT) *************** *** 124,130 **** @AMDEP_TRUE@ ./$(DEPDIR)/eval_p$U.Po ./$(DEPDIR)/ext$U.Po \ @AMDEP_TRUE@ ./$(DEPDIR)/field$U.Po ./$(DEPDIR)/gawkmisc$U.Po \ @AMDEP_TRUE@ ./$(DEPDIR)/getopt$U.Po ./$(DEPDIR)/getopt1$U.Po \ ! @AMDEP_TRUE@ ./$(DEPDIR)/io$U.Po ./$(DEPDIR)/main$U.Po \ @AMDEP_TRUE@ ./$(DEPDIR)/msg$U.Po ./$(DEPDIR)/node$U.Po \ @AMDEP_TRUE@ ./$(DEPDIR)/profile$U.Po \ @AMDEP_TRUE@ ./$(DEPDIR)/profile_p$U.Po ./$(DEPDIR)/random$U.Po \ --- 124,130 ---- @AMDEP_TRUE@ ./$(DEPDIR)/eval_p$U.Po ./$(DEPDIR)/ext$U.Po \ @AMDEP_TRUE@ ./$(DEPDIR)/field$U.Po ./$(DEPDIR)/gawkmisc$U.Po \ @AMDEP_TRUE@ ./$(DEPDIR)/getopt$U.Po ./$(DEPDIR)/getopt1$U.Po \ ! @AMDEP_TRUE@ ./$(DEPDIR)/io$U.Po ./$(DEPDIR)/xml_puller$U.Po ./$(DEPDIR)/main$U.Po \ @AMDEP_TRUE@ ./$(DEPDIR)/msg$U.Po ./$(DEPDIR)/node$U.Po \ @AMDEP_TRUE@ ./$(DEPDIR)/profile$U.Po \ @AMDEP_TRUE@ ./$(DEPDIR)/profile_p$U.Po ./$(DEPDIR)/random$U.Po \ *************** *** 199,209 **** INTLLIBS = @INTLLIBS@ INTLOBJS = @INTLOBJS@ INTL_LIBTOOL_SUFFIX_PREFIX = @INTL_LIBTOOL_SUFFIX_PREFIX@ ! LDFLAGS = @LDFLAGS@ LIBICONV = @LIBICONV@ LIBINTL = @LIBINTL@ LIBOBJS = @LIBOBJS@ ! LIBS = @LIBS@ LN_S = @LN_S@ LTLIBICONV = @LTLIBICONV@ LTLIBINTL = @LTLIBINTL@ --- 199,209 ---- INTLLIBS = @INTLLIBS@ INTLOBJS = @INTLOBJS@ INTL_LIBTOOL_SUFFIX_PREFIX = @INTL_LIBTOOL_SUFFIX_PREFIX@ ! LDFLAGS = @LDFLAGS@ -L/usr/lib LIBICONV = @LIBICONV@ LIBINTL = @LIBINTL@ LIBOBJS = @LIBOBJS@ ! LIBS = @LIBS@ -lexpat LN_S = @LN_S@ LTLIBICONV = @LTLIBICONV@ LTLIBINTL = @LTLIBINTL@ *************** *** 343,348 **** --- 343,350 ---- gettext.h \ io.c \ mbsupport.h \ + xml_puller.c \ + xml_puller.h \ main.c \ msg.c \ node.c \ *************** *** 407,412 **** --- 409,415 ---- $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) $(SHELL) ./config.status --recheck + @AMDEP_TRUE@ ./$(DEPDIR)/xml_puller$U.Po \ $(top_srcdir)/configure: $(am__configure_deps) cd $(srcdir) && $(AUTOCONF) $(ACLOCAL_M4): $(am__aclocal_m4_deps) *************** *** 489,494 **** --- 492,498 ---- @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/getopt$U.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/getopt1$U.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/io$U.Po@am__quote@ + @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/xml_puller$U.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/main$U.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/msg$U.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/node$U.Po@am__quote@ *************** *** 539,544 **** --- 543,550 ---- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/getopt1.c; then echo $(srcdir)/getopt1.c; else echo getopt1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@ io_.c: io.c $(ANSI2KNR) $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/io.c; then echo $(srcdir)/io.c; else echo io.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@ + xml_puller_.c: xml_puller.c $(ANSI2KNR) + $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/xml_puller.c; then echo $(srcdir)/xml_puller.c; else echo xml_puller.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@ main_.c: main.c $(ANSI2KNR) $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/main.c; then echo $(srcdir)/main.c; else echo main.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@ msg_.c: msg.c $(ANSI2KNR) *************** *** 561,567 **** $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/version.c; then echo $(srcdir)/version.c; else echo version.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@ array_.$(OBJEXT) awkgram_.$(OBJEXT) builtin_.$(OBJEXT) dfa_.$(OBJEXT) \ eval_.$(OBJEXT) eval_p_.$(OBJEXT) ext_.$(OBJEXT) field_.$(OBJEXT) \ ! gawkmisc_.$(OBJEXT) getopt_.$(OBJEXT) getopt1_.$(OBJEXT) io_.$(OBJEXT) \ main_.$(OBJEXT) msg_.$(OBJEXT) node_.$(OBJEXT) profile_.$(OBJEXT) \ profile_p_.$(OBJEXT) random_.$(OBJEXT) re_.$(OBJEXT) regex_.$(OBJEXT) \ replace_.$(OBJEXT) version_.$(OBJEXT) : $(ANSI2KNR) --- 567,573 ---- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/version.c; then echo $(srcdir)/version.c; else echo version.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@ array_.$(OBJEXT) awkgram_.$(OBJEXT) builtin_.$(OBJEXT) dfa_.$(OBJEXT) \ eval_.$(OBJEXT) eval_p_.$(OBJEXT) ext_.$(OBJEXT) field_.$(OBJEXT) \ ! gawkmisc_.$(OBJEXT) getopt_.$(OBJEXT) getopt1_.$(OBJEXT) io_.$(OBJEXT) xml_puller_.$(OBJEXT) \ main_.$(OBJEXT) msg_.$(OBJEXT) node_.$(OBJEXT) profile_.$(OBJEXT) \ profile_p_.$(OBJEXT) random_.$(OBJEXT) re_.$(OBJEXT) regex_.$(OBJEXT) \ replace_.$(OBJEXT) version_.$(OBJEXT) : $(ANSI2KNR) diff -rcN ../gawk-3.1.4/awk.h ../gawk-3.1.4.xml/awk.h *** ../gawk-3.1.4/awk.h 2004-07-26 16:11:05.000000000 +0200 --- ../gawk-3.1.4.xml/awk.h 2004-08-13 10:59:03.763270608 +0200 *************** *** 82,87 **** --- 82,89 ---- #include #endif + #include + /* ----------------- System dependencies (with more includes) -----------*/ /* This section is the messiest one in the file, not a lot that can be done */ *************** *** 424,429 **** --- 426,433 ---- Node_array_ref, /* array passed by ref as parameter */ Node_BINMODE, /* variables recognized in the grammar */ + Node_XMLMODE, + Node_XMLCHARSET, Node_CONVFMT, Node_FIELDWIDTHS, Node_FNR, *************** *** 588,593 **** --- 592,598 ---- ssize_t count; /* amount read last time */ size_t scanoff; /* where we were in the buffer when we had to regrow/refill */ + XML_Puller xml_puller; /* set by iop_alloc when needed */ int flag; # define IOP_IS_TTY 1 # define IOP_IS_INTERNAL 2 *************** *** 595,600 **** --- 600,606 ---- # define IOP_NOFREE_OBJ 8 # define IOP_AT_EOF 16 # define IOP_CLOSED 32 + # define IOP_XML 64 } IOBUF; typedef void (*Func_ptr) P((void)); *************** *** 666,671 **** --- 672,678 ---- extern long NR; extern long FNR; extern int BINMODE; + extern int XMLMODE; extern int IGNORECASE; extern int RS_is_null; extern char *OFS; *************** *** 677,682 **** --- 684,697 ---- ATTRIBUTE_EXPORTED extern int CONVFMTidx; extern int OFMTidx; extern char *TEXTDOMAIN; + extern NODE *XMLMODE_node, *XMLCHARSET_node; + extern NODE *XMLSTARTELEM_node, *XMLENDELEM_node; + extern NODE *XMLCHARDATA_node, *XMLPROCINST_node, *XMLCOMMENT_node; + extern NODE *XMLSTARTCDATA_node, *XMLENDCDATA_node; + extern NODE *XMLVERSION_node, *XMLENCODING_node; + extern NODE *XMLSTARTDOCT_node, *XMLENDDOCT_node; + extern NODE *XMLDOCTPUBID_node, *XMLDOCTSYSID_node; + extern NODE *XMLUNPARSED_node; extern NODE *BINMODE_node, *CONVFMT_node, *FIELDWIDTHS_node, *FILENAME_node; extern NODE *FNR_node, *FS_node, *IGNORECASE_node, *NF_node; extern NODE *NR_node, *OFMT_node, *OFS_node, *ORS_node, *RLENGTH_node; *************** *** 1001,1006 **** --- 1016,1023 ---- extern void set_OFMT P((void)); extern void set_CONVFMT P((void)); extern void set_BINMODE P((void)); + extern void set_XMLMODE P((void)); + extern void set_XMLCHARSET P((void)); extern void set_LINT P((void)); extern void set_TEXTDOMAIN P((void)); extern void update_ERRNO P((void)); *************** *** 1071,1076 **** --- 1088,1095 ---- extern int main P((int argc, char **argv)); extern NODE *load_environ P((void)); extern NODE *load_procinfo P((void)); + extern NODE *load_xmlattr P((void)); + extern void update_xmlattr P((const char **attributes)); extern int arg_assign P((char *arg, int initing)); /* msg.c */ extern void err P((const char *s, const char *emsg, va_list argp)) ATTRIBUTE_PRINTF(2, 0); diff -rcN ../gawk-3.1.4/awkgram.y ../gawk-3.1.4.xml/awkgram.y *** ../gawk-3.1.4/awkgram.y 2004-07-26 16:11:12.000000000 +0200 --- ../gawk-3.1.4.xml/awkgram.y 2004-08-11 19:05:12.000000000 +0200 *************** *** 3035,3041 **** } else { /* not found */ ! if (! do_traditional && STREQ(name, "PROCINFO")) r = load_procinfo(); else if (STREQ(name, "ENVIRON")) r = load_environ(); --- 3035,3045 ---- } else { /* not found */ ! if (! do_traditional && STREQ(name, "XMLATTR")) ! r = load_xmlattr(); ! else if (! do_traditional && STREQ(name, "XMLATTRPOS")) ! r = load_xmlattrpos(); ! else if (! do_traditional && STREQ(name, "PROCINFO")) r = load_procinfo(); else if (STREQ(name, "ENVIRON")) r = load_environ(); diff -rcN ../gawk-3.1.4/eval.c ../gawk-3.1.4.xml/eval.c *** ../gawk-3.1.4/eval.c 2004-07-26 16:11:57.000000000 +0200 --- ../gawk-3.1.4.xml/eval.c 2004-08-11 19:05:12.000000000 +0200 *************** *** 272,277 **** --- 272,279 ---- "Node_ahash", "Node_array_ref", "Node_BINMODE", + "Node_XMLMODE", + "Node_XMLCHARSET", "Node_CONVFMT", "Node_FIELDWIDTHS", "Node_FNR", *************** *** 977,982 **** --- 979,986 ---- case Node_OFMT: case Node_CONVFMT: case Node_BINMODE: + case Node_XMLMODE: + case Node_XMLCHARSET: case Node_LINT: case Node_SUBSEP: case Node_TEXTDOMAIN: *************** *** 1951,1956 **** --- 1955,1972 ---- *assign = set_BINMODE; break; + case Node_XMLMODE: + aptr = &(XMLMODE_node->var_value); + if (assign != NULL) + *assign = set_XMLMODE; + break; + + case Node_XMLCHARSET: + aptr = &(XMLCHARSET_node->var_value); + if (assign != NULL) + *assign = set_XMLCHARSET; + break; + case Node_LINT: aptr = &(LINT_node->var_value); if (assign != NULL) *************** *** 2171,2176 **** --- 2187,2225 ---- BINMODE = 0; /* shouldn't happen */ } + /* set_XMLMODE --- set parsing mode */ + + void + set_XMLMODE() + { + static int warned = FALSE; + char *p, *cp, save; + NODE *v; + int digits = FALSE; + + if ((do_lint || do_traditional) && ! warned) { + warned = TRUE; + lintwarn(_("`XMLMODE' is a gawk extension")); + } + if (do_traditional) + XMLMODE = 0; + else if ((XMLMODE_node->var_value->flags & NUMBER) != 0) + XMLMODE = (int) force_number(XMLMODE_node->var_value); + else if ((XMLMODE_node->var_value->flags & STRING) != 0) { + /* arbitrary string, assume XML */ + XMLMODE = 1; + warning("XMLMODE: arbitary string value treated as \"1\""); + } else + XMLMODE = 0; /* shouldn't happen */ + } + + /* set_XMLCHARSET --- set the output character set */ + + void + set_XMLCHARSET() + { + } + /* set_OFS --- update OFS related variables when OFS assigned to */ void diff -rcN ../gawk-3.1.4/io.c ../gawk-3.1.4.xml/io.c *** ../gawk-3.1.4/io.c 2004-07-28 15:41:56.000000000 +0200 --- ../gawk-3.1.4.xml/io.c 2004-08-13 11:59:13.867451352 +0200 *************** *** 391,396 **** --- 391,401 ---- else ret = close(iop->fd); + if ((iop->flag & IOP_XML) != 0) { + XML_PullerFree(iop->xml_puller); + iop->xml_puller = NULL; + } + if (ret == -1) warning(_("close of fd %d (`%s') failed (%s)"), iop->fd, iop->name, strerror(errno)); *************** *** 2439,2444 **** --- 2444,2523 ---- } #endif + static void + resetXMLvars(void) + { + unref(XMLSTARTELEM_node->var_value); + XMLSTARTELEM_node->var_value=Nnull_string; + unref(XMLENDELEM_node->var_value); + XMLENDELEM_node->var_value=Nnull_string; + unref(XMLCHARDATA_node->var_value); + XMLCHARDATA_node->var_value=Nnull_string; + unref(XMLPROCINST_node->var_value); + XMLPROCINST_node->var_value=Nnull_string; + unref(XMLCOMMENT_node->var_value); + XMLCOMMENT_node->var_value=Nnull_string; + unref(XMLSTARTCDATA_node->var_value); + XMLSTARTCDATA_node->var_value=Nnull_string; + unref(XMLENDCDATA_node->var_value); + XMLENDCDATA_node->var_value=Nnull_string; + unref(XMLVERSION_node->var_value); + XMLVERSION_node->var_value=Nnull_string; + unref(XMLENCODING_node->var_value); + XMLENCODING_node->var_value=Nnull_string; + unref(XMLSTARTDOCT_node->var_value); + XMLSTARTDOCT_node->var_value=Nnull_string; + unref(XMLENDDOCT_node->var_value); + XMLENDDOCT_node->var_value=Nnull_string; + unref(XMLDOCTPUBID_node->var_value); + XMLDOCTPUBID_node->var_value=Nnull_string; + unref(XMLDOCTSYSID_node->var_value); + XMLDOCTSYSID_node->var_value=Nnull_string; + unref(XMLUNPARSED_node->var_value); + XMLUNPARSED_node->var_value=Nnull_string; + + do_delete(load_xmlattr(), NULL); + do_delete(load_xmlattrpos(), NULL); + + } + + /* update_xmlattr --- populate the XMLATTR and the XMLATTRPOS array + * Upon invokation, We assume that all previous entries in the + * XMLATTR array are already deleted. + */ + + void + update_xmlattr(const char **attributes) + { + extern NODE *XMLATTR_node; + extern NODE *XMLATTRPOS_node; + char *var, *val; + NODE **aptr; + int i; + + /* Take each attribute and enter it into the XMLATTR array. + * Take each attribute and enter its name into the XMLATTRPOS array. + * attributes[i ] is the pointer to the name of the attribute. + * attributes[i+1] is the pointer to the value of the attribute. + */ + for (i = 0; attributes[i] != NULL && attributes[i+1] != NULL; i += 2) { + /* First, enter this attribute into XMLATTR. */ + var = attributes[i]; + val = attributes[i+1]; + aptr = assoc_lookup(XMLATTR_node, tmp_string(var, strlen(var)), + FALSE); + *aptr = make_string(val, strlen(val)); + (*aptr)->flags |= MAYBE_NUM; + + /* Second, enter this attribute's name into XMLATTRPOS. */ + aptr = assoc_lookup(XMLATTRPOS_node, tmp_number((AWKNUM) 1 + i/2), + FALSE); + *aptr = make_string(var, strlen(var)); + (*aptr)->flags |= MAYBE_NUM; + } + } + + /* iop_alloc --- allocate an IOBUF structure for an open fd */ static IOBUF * *************** *** 2467,2472 **** --- 2546,2574 ---- iop->dataend = NULL; iop->end = iop->buf + iop->size; iop->flag = 0; + if (XMLMODE == 0) { + iop->xml_puller = NULL; + } else { + iop->flag |= IOP_XML; + iop->xml_puller = XML_PullerCreate( + iop->fd, + XMLCHARSET_node->var_value->stptr, + 8192); + if (iop->xml_puller == NULL) + fatal(_("cannot create XML puller")); + XML_PullerEnable (iop->xml_puller, + XML_PULLER_START_ELEMENT | + XML_PULLER_END_ELEMENT | + XML_PULLER_CHARDATA | + XML_PULLER_START_CDATA | + XML_PULLER_END_CDATA | + XML_PULLER_PROC_INST | + XML_PULLER_COMMENT | + XML_PULLER_DECL | + XML_PULLER_START_DOCT | + XML_PULLER_END_DOCT | + XML_PULLER_UNPARSED); + } return iop; } *************** *** 2836,2841 **** --- 2938,3027 ---- return REC_OK; } + /* get_xml_record --- read an XML token from IOP into out, return length of EOF, do not set RT */ + static int + get_xml_record(char **out, /* pointer to pointer to data */ + IOBUF *iop, /* input IOP */ + int *errcode) /* pointer to error variable */ + { + int cnt = 0; + XML_PullerToken token = XML_PullerNext(iop->xml_puller); + + resetXMLvars(); + *out = NULL; + if (token == NULL) { + if (iop->xml_puller->status != XML_STATUS_OK) + warning(_("XML error: %s at line %d\n"), + iop->xml_puller->error, + iop->xml_puller->line); + iop->flag |= IOP_AT_EOF; + } else { + switch (token->kind) { + case XML_PULLER_START_ELEMENT: + XMLSTARTELEM_node->var_value = make_string(token->name, strlen(token->name)); + update_xmlattr(token->attr); + break; + case XML_PULLER_END_ELEMENT: + XMLENDELEM_node->var_value = make_string(token->name, strlen(token->name)); + break; + case XML_PULLER_CHARDATA: + XMLCHARDATA_node->var_value = make_number((AWKNUM) 1); + *out = token->data; + cnt = token->number; + break; + case XML_PULLER_START_CDATA: + XMLSTARTCDATA_node->var_value = make_number((AWKNUM) 1); + *out = NULL; + cnt = 0; + break; + case XML_PULLER_END_CDATA: + XMLENDCDATA_node->var_value = make_number((AWKNUM) 1); + *out = NULL; + cnt = 0; + break; + case XML_PULLER_PROC_INST: + XMLPROCINST_node->var_value = make_string(token->name, strlen(token->name)); + *out = token->data; + cnt = strlen(token->data); + break; + case XML_PULLER_COMMENT: + XMLCOMMENT_node->var_value = make_number((AWKNUM) 1); + *out = token->data; + cnt = strlen(token->data); + break; + case XML_PULLER_DECL: + if (token->name != NULL) + XMLVERSION_node->var_value = make_string(token->name, strlen(token->name)); + if (token->data != NULL) + XMLENCODING_node->var_value = make_string(token->data, strlen(token->data)); + /* We choose to ignore token->number ("standalone"). */ + break; + case XML_PULLER_START_DOCT: + if (token->name != NULL) + XMLSTARTDOCT_node->var_value = make_string(token->name, strlen(token->name)); + if (token->attr != NULL) + XMLDOCTPUBID_node->var_value = make_string((char *) token->attr, strlen((char *) token->attr)); + if (token->data != NULL) + XMLDOCTSYSID_node->var_value = make_string(token->data, strlen(token->data)); + *out = NULL; + cnt = 0; + break; + case XML_PULLER_END_DOCT: + XMLENDDOCT_node->var_value = make_number((AWKNUM) 1); + *out = NULL; + cnt = 0; + break; + case XML_PULLER_UNPARSED: + XMLUNPARSED_node->var_value = make_number((AWKNUM) 1); + *out = token->data; + cnt = token->number; + break; + } + } + + return cnt; + } + /* = */ /* get_a_record --- read a record from IOP into out, return length of EOF, set RT */ *************** *** 2854,2859 **** --- 3040,3048 ---- if (at_eof(iop) && no_data_left(iop)) return EOF; + if ((iop->flag & IOP_XML) != 0) + return get_xml_record(out, iop, errcode); + /* = */ if (has_no_data(iop) || no_data_left(iop)) { iop->count = read(iop->fd, iop->buf, iop->readsize); *************** *** 2875,2881 **** } - /* = */ state = NOSTATE; for (;;) { --- 3064,3069 ---- diff -rcN ../gawk-3.1.4/main.c ../gawk-3.1.4.xml/main.c *** ../gawk-3.1.4/main.c 2004-07-28 15:42:19.000000000 +0200 --- ../gawk-3.1.4.xml/main.c 2004-08-13 11:03:06.298399664 +0200 *************** *** 52,57 **** --- 52,65 ---- static void init_groupset P((void)); /* These nodes store all the special variables AWK uses */ + NODE *XMLMODE_node, *XMLCHARSET_node, *XMLATTR_node, *XMLATTRPOS_node; + NODE *XMLSTARTELEM_node, *XMLENDELEM_node; + NODE *XMLCHARDATA_node, *XMLPROCINST_node, *XMLCOMMENT_node; + NODE *XMLSTARTCDATA_node, *XMLENDCDATA_node; + NODE *XMLVERSION_node, *XMLENCODING_node; + NODE *XMLSTARTDOCT_node, *XMLENDDOCT_node; + NODE *XMLDOCTPUBID_node, *XMLDOCTSYSID_node; + NODE *XMLUNPARSED_node; NODE *ARGC_node, *ARGIND_node, *ARGV_node, *BINMODE_node, *CONVFMT_node; NODE *ENVIRON_node, *ERRNO_node, *FIELDWIDTHS_node, *FILENAME_node, *FNR_node; NODE *FS_node, *IGNORECASE_node, *NF_node, *NR_node, *OFMT_node, *OFS_node; *************** *** 62,67 **** --- 70,76 ---- long NR; long FNR; int BINMODE; + int XMLMODE; int IGNORECASE; char *OFS; char *ORS; *************** *** 532,537 **** --- 541,552 ---- init_args(optind, argc, (char *) myname, argv); (void) tokexpand(); + /* Set up an empty array of attributes in the XMLATTR array */ + XMLATTR_node = install("XMLATTR", + node((NODE *) NULL, Node_var_array, (NODE *) NULL)); + XMLATTRPOS_node = install("XMLATTRPOS", + node((NODE *) NULL, Node_var_array, (NODE *) NULL)); + /* Read in the program */ if (yyparse() != 0 || errcount != 0) exit(1); *************** *** 797,802 **** --- 812,833 ---- {&BINMODE_node, "BINMODE", Node_BINMODE, NULL, 0, NULL }, {&LINT_node, "LINT", Node_LINT, NULL, 0, NULL }, {&TEXTDOMAIN_node, "TEXTDOMAIN", Node_TEXTDOMAIN, "messages", 0, set_TEXTDOMAIN }, + {&XMLMODE_node, "XMLMODE", Node_XMLMODE, NULL, 0, NULL }, + {&XMLCHARSET_node, "XMLCHARSET", Node_XMLCHARSET, "US-ASCII", 0, set_XMLCHARSET}, + {&XMLSTARTELEM_node, "XMLSTARTELEM", Node_var, NULL, 0, NULL }, + {&XMLENDELEM_node, "XMLENDELEM", Node_var, NULL, 0, NULL }, + {&XMLCHARDATA_node, "XMLCHARDATA", Node_var, NULL, 0, NULL }, + {&XMLPROCINST_node, "XMLPROCINST", Node_var, NULL, 0, NULL }, + {&XMLCOMMENT_node, "XMLCOMMENT", Node_var, NULL, 0, NULL }, + {&XMLSTARTCDATA_node, "XMLSTARTCDATA", Node_var, NULL, 0, NULL }, + {&XMLENDCDATA_node, "XMLENDCDATA", Node_var, NULL, 0, NULL }, + {&XMLVERSION_node, "XMLVERSION", Node_var, NULL, 0, NULL }, + {&XMLENCODING_node, "XMLENCODING", Node_var, NULL, 0, NULL }, + {&XMLSTARTDOCT_node, "XMLSTARTDOCT", Node_var, NULL, 0, NULL }, + {&XMLENDDOCT_node, "XMLENDDOCT", Node_var, NULL, 0, NULL }, + {&XMLDOCTPUBID_node, "XMLDOCTPUBID", Node_var, NULL, 0, NULL }, + {&XMLDOCTSYSID_node, "XMLDOCTSYSID", Node_var, NULL, 0, NULL }, + {&XMLUNPARSED_node, "XMLUNPARSED", Node_var, NULL, 0, NULL }, {0, NULL, Node_illegal, NULL, 0, NULL }, }; *************** *** 940,945 **** --- 971,992 ---- return PROCINFO_node; } + /* load_xmlattr --- return a pointer to the XMLATTR array node */ + + NODE * + load_xmlattr(void) + { + return XMLATTR_node; + } + + /* load_xmlattrpos --- return a pointer to the XMLATTRPOS array node */ + + NODE * + load_xmlattrpos(void) + { + return XMLATTRPOS_node; + } + /* arg_assign --- process a command-line assignment */ int diff -rcN ../gawk-3.1.4/profile.c ../gawk-3.1.4.xml/profile.c *** ../gawk-3.1.4/profile.c 2004-07-28 15:47:17.000000000 +0200 --- ../gawk-3.1.4.xml/profile.c 2004-08-11 19:05:12.000000000 +0200 *************** *** 592,597 **** --- 592,605 ---- fprintf(prof_fp, "SUBSEP"); return; + case Node_XMLMODE: + fprintf(prof_fp, "XMLMODE"); + return; + + case Node_XMLCHARSET: + fprintf(prof_fp, "XMLCHARSET"); + return; + case Node_TEXTDOMAIN: fprintf(prof_fp, "TEXTDOMAIN"); return; *************** *** 874,879 **** --- 882,895 ---- fprintf(prof_fp, "BINMODE"); break; + case Node_XMLMODE: + fprintf(prof_fp, "XMLMODE"); + break; + + case Node_XMLCHARSET: + fprintf(prof_fp, "XMLCHARSET"); + break; + case Node_LINT: fprintf(prof_fp, "LINT"); break; *************** *** 1330,1335 **** --- 1346,1353 ---- case Node_var_array: case Node_val: case Node_BINMODE: + case Node_XMLMODE: + case Node_XMLCHARSET: case Node_CONVFMT: case Node_FIELDWIDTHS: case Node_FNR: *************** *** 1367,1372 **** --- 1385,1392 ---- case Node_val: case Node_builtin: case Node_BINMODE: + case Node_XMLMODE: + case Node_XMLCHARSET: case Node_CONVFMT: case Node_FIELDWIDTHS: case Node_FNR: diff -rcN ../gawk-3.1.4/xml_puller.c ../gawk-3.1.4.xml/xml_puller.c *** ../gawk-3.1.4/xml_puller.c 1970-01-01 01:00:00.000000000 +0100 --- ../gawk-3.1.4.xml/xml_puller.c 2004-08-12 18:25:05.000000000 +0200 *************** *** 0 **** --- 1,639 ---- + /* + * xml_puller.c --- routines for reading XML input with expat + */ + + /* + * Copyright (C) 1986, 1988, 1989, 1991-2003 the Free Software Foundation, Inc. + * + * This file is part of GAWK, the GNU implementation of the + * AWK Programming Language. + * + * GAWK is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GAWK is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + + #include + #include + #include + #include + #include + + + static void XML_PullerSetError (XML_Puller puller) + { + /* We want to report the first error, so we forbid overwriting. */ + if (puller->status == XML_STATUS_OK) { + puller->status = XML_STATUS_ERROR; + puller->line = XML_GetCurrentLineNumber(puller->parser); + puller->error = XML_ErrorString(XML_GetErrorCode(puller->parser)); + } + } + + + static char * XML_PullerAllocateAndCheck ( + const char * source, + int length, + int * new_length, + iconv_t converter) + { + size_t ibl = length; + size_t obl = 4 * length; + char * retval = (char *) malloc(obl); + char * input = (char *) source; + char * output = retval; + + if (retval != NULL) { + int actual_length = length; + if (converter == NULL) { + memcpy(retval, source, length); + } else { + /* Multibyte characters which are split upon buffers will + * not occur because expat prevents it. + */ + if (iconv(converter, & input, &ibl, & output, & obl) == (size_t)(-1)) { + switch (errno) { + case E2BIG: /* insufficient memory */ + break; + case EILSEQ: /* invalid multibyte sequence */ + break; + case EINVAL: /* incomplete multibyte sequence */ + break; + default: + break; + } + } else { + actual_length = 4 * length - obl; + } + } + retval = (char *) realloc(retval, actual_length); + if (new_length != NULL) + * new_length = actual_length; + } + return retval; + } + + + static void XML_PullerInsertTokenData ( + void * userData, + XML_PullerTokenKindType kind, + const char * name, + const char * data, + const char ** attr, + int number + ) + { + XML_Puller puller = (XML_Puller) userData; + XML_PullerToken tok; + int i; + int num_attr; + + if (puller->status != XML_STATUS_OK) + return; + + if (kind == XML_PULLER_CHARDATA) { + if (data == NULL) { + /* The CDATA token is complete. Enlist token below in the switch statement. */ + } else { + /* This is some more CDATA to be appended to puller->cdata. */ + char * append = (char *) realloc(puller->cdata, puller->cdata_len + number); + if (append == NULL) { + free(puller->cdata); + puller->cdata = NULL; + puller->cdata_len = 0; + return; + } + memcpy(append + puller->cdata_len, data, number); + puller->cdata = append; + puller->cdata_len += number; + return; + } + } else if (puller->cdata != NULL) { + /* The token to be enlisted is not a CDATA token. + * Before we can enlist it, we must enlist the pending CDATA. + */ + XML_PullerInsertTokenData (userData, XML_PULLER_CHARDATA, NULL, NULL, NULL, 0); + } + + tok = (XML_PullerToken) malloc(sizeof(struct XML_PullerTokenDataType)); + if (tok == NULL) { + XML_PullerSetError(puller); + return; + } + + tok->next = NULL; + tok->kind = kind; + + switch (kind) { + case XML_PULLER_START_ELEMENT: + for (num_attr = 0; attr[2*num_attr] != NULL ; num_attr ++) + ; + tok->attr = (char **) malloc((2*num_attr+1) * sizeof(char *)); + if (tok->attr == NULL) { + free(tok); + XML_PullerSetError(puller); + return; + } + for (i = 0; iattr[2*i ] = XML_PullerAllocateAndCheck( + attr[2*i ], strlen(attr[2*i ])+1, NULL, puller->converter); + tok->attr[2*i+1] = XML_PullerAllocateAndCheck( + attr[2*i+1], strlen(attr[2*i+1])+1, NULL, puller->converter); + } + tok->attr[2*num_attr] = NULL; + case XML_PULLER_END_ELEMENT: + tok->name = XML_PullerAllocateAndCheck(name, strlen(name)+1, NULL, puller->converter); + break; + + case XML_PULLER_CHARDATA: + /* Allocating once more is not a good idea, but we do it because of iconv. */ + tok->data = XML_PullerAllocateAndCheck(puller->cdata, puller->cdata_len, + & tok->number, puller->converter); + free(puller->cdata); + puller->cdata = NULL; + puller->cdata_len = 0; + if (tok->data == NULL) { + free(tok); + return; + } + break; + + case XML_PULLER_START_CDATA: + break; + + case XML_PULLER_END_CDATA: + break; + + case XML_PULLER_PROC_INST: + tok->name = XML_PullerAllocateAndCheck(name, strlen(name)+1, NULL, puller->converter); + if (tok->name == NULL) { + free(tok); + return; + } + tok->data = XML_PullerAllocateAndCheck(data, strlen(data)+1, NULL, puller->converter); + if (tok->data == NULL) { + free(tok->name); + free(tok); + return; + } + break; + + case XML_PULLER_COMMENT: + tok->data = XML_PullerAllocateAndCheck(data, strlen(data)+1, NULL, puller->converter); + if (tok->data == NULL) { + free(tok); + return; + } + break; + + case XML_PULLER_DECL: + tok->name = (char *) name; + tok->data = (char *) data; + tok->number = number; + if (tok->name != NULL) { + tok->name = XML_PullerAllocateAndCheck(name, strlen(name)+1, NULL, puller->converter); + if (tok->name == NULL) { + free(tok); + return; + } + } + if (tok->data != NULL) { + tok->data = XML_PullerAllocateAndCheck(data, strlen(data)+1, NULL, puller->converter); + if (tok->data == NULL) { + free(tok->name); + free(tok); + return; + } + } + break; + + case XML_PULLER_START_DOCT: + tok->name = (char *) name; + tok->data = (char *) data; + tok->attr = (char **) attr; + if (tok->name != NULL) { + tok->name = XML_PullerAllocateAndCheck(name, strlen(name)+1, NULL, puller->converter); + if (tok->name == NULL) { + free(tok); + return; + } + } + if (tok->data != NULL) { + tok->data = XML_PullerAllocateAndCheck(data, strlen(data)+1, NULL, puller->converter); + if (tok->data == NULL) { + free(tok->name); + free(tok); + return; + } + } + if (tok->attr != NULL) { + tok->attr = (char **) XML_PullerAllocateAndCheck((char *)attr, strlen((char *)attr)+1, NULL, puller->converter); + if (tok->attr == NULL) { + free(tok->data); + free(tok->name); + free(tok); + return; + } + } + break; + + case XML_PULLER_END_DOCT: + break; + + case XML_PULLER_UNPARSED: + tok->data = (char *) data; + tok->number = number; + if (tok->data != NULL) { + tok->data = XML_PullerAllocateAndCheck(data, number, NULL, NULL); + if (tok->data == NULL) { + free(tok); + return; + } + } + break; + } + + /* Append the token to the list of pending tokens. */ + if (puller->tok_head == NULL) + { + puller->tok_head = tok; + } + else + { + puller->tok_tail->next = tok; + } + + puller->tok_tail = tok; + } + + + void XML_PullerFreeTokenData(XML_PullerToken tok) + { + int i; + + if (tok == NULL) + return; + + XML_PullerFreeTokenData(tok->next); + + switch (tok->kind) { + case XML_PULLER_START_ELEMENT: + for (i=0; tok->attr[i] != NULL; i++) + free(tok->attr[i]); + free(tok->attr); + case XML_PULLER_END_ELEMENT: + free(tok->name); + break; + + case XML_PULLER_CHARDATA: + free(tok->data); + break; + + case XML_PULLER_START_CDATA: + break; + + case XML_PULLER_END_CDATA: + break; + + case XML_PULLER_PROC_INST: + free(tok->name); + free(tok->data); + break; + + case XML_PULLER_COMMENT: + free(tok->data); + break; + + case XML_PULLER_DECL: + free(tok->name); + free(tok->data); + break; + + case XML_PULLER_START_DOCT: + free(tok->name); + free(tok->data); + free((char *) tok->attr); + break; + + case XML_PULLER_END_DOCT: + break; + + case XML_PULLER_UNPARSED: + free(tok->data); + break; + + } + + free(tok); + } + + + static void + start_element_handler(void *userData, const char *name, const char **attr) + { + XML_PullerInsertTokenData (userData, XML_PULLER_START_ELEMENT, name, NULL, attr, 0); + } + + + static void + end_element_handler(void *userData, const char *name) + { + XML_PullerInsertTokenData (userData, XML_PULLER_END_ELEMENT, name, NULL, NULL, 0); + } + + + static void + chardata_handler(void *userData, const XML_Char *s, int len) + { + XML_PullerInsertTokenData (userData, XML_PULLER_CHARDATA, NULL, s, NULL, len); + } + + + static void + proc_inst_handler(void *userData, const XML_Char *target, const XML_Char *data) + { + XML_PullerInsertTokenData (userData, XML_PULLER_PROC_INST, target, data, NULL, 0); + } + + + static void + comment_handler(void *userData, const XML_Char *data) + { + XML_PullerInsertTokenData (userData, XML_PULLER_COMMENT, NULL, data, NULL, 0); + } + + + static void + start_cdata_handler(void *userData) + { + XML_PullerInsertTokenData (userData, XML_PULLER_START_CDATA, NULL, NULL, NULL, 0); + } + + + static void + end_cdata_handler(void *userData) + { + XML_PullerInsertTokenData (userData, XML_PULLER_END_CDATA, NULL, NULL, NULL, 0); + } + + + static void + decl_handler(void *userData, const XML_Char *version, const XML_Char *encoding, int standalone) + { + XML_PullerInsertTokenData (userData, XML_PULLER_DECL, version, encoding, NULL, standalone); + } + + + static void + start_doct_handler(void *userData, + const XML_Char *doctypeName, + const XML_Char *sysid, + const XML_Char *pubid, + int has_internal_subset) + { + XML_PullerInsertTokenData (userData, XML_PULLER_START_DOCT, doctypeName, sysid, + (const char **) pubid, has_internal_subset); + } + + + static void + end_doct_handler(void *userData) + { + XML_PullerInsertTokenData (userData, XML_PULLER_END_DOCT, NULL, NULL, NULL, 0); + } + + + static void + unparsed_handler(void *userData, const XML_Char *s, int len) + { + XML_PullerInsertTokenData (userData, XML_PULLER_UNPARSED, NULL, s, NULL, len); + } + + + XML_Puller XML_PullerCreate (int filedesc, char * encoding, int buffer_length) + { + XML_Puller puller; + + if (buffer_length < 1) + return NULL; + + puller = (XML_Puller) malloc(sizeof(struct XML_PullerDataType)); + if (puller == NULL) + return NULL; + + puller->buffer = NULL; + puller->buffer_length = buffer_length; + puller->converter = NULL; + puller->to_be_freed = NULL; + puller->cdata = NULL; + puller->cdata_len = 0; + puller->tok_head = NULL; + puller->tok_tail = NULL; + puller->status = XML_STATUS_OK; + puller->line = 0; + puller->error = NULL; + puller->filedesc = filedesc; + + if (puller->filedesc < 0) { + free(puller); + return NULL; + } + + puller->buffer = (char *) malloc(puller->buffer_length); + if (puller->buffer == NULL) { + free(puller); + return NULL; + } + + if (encoding != NULL) { + puller->converter = iconv_open(encoding, "utf-8"); + if (puller->converter == (iconv_t) -1) { + free(puller->buffer); + free(puller); + return NULL; + } + } + + puller->parser = XML_ParserCreate(NULL); + if (puller->parser == NULL) { + iconv_close(puller->converter); + free(puller->buffer); + free(puller); + return NULL; + } + + XML_SetUserData(puller->parser, (void *) puller); + + return puller; + } + + + void XML_PullerFree(XML_Puller puller) + { + if (puller == NULL) + return; + + free(puller->buffer); + + if (puller->converter != NULL) + iconv_close(puller->converter); + + if (puller->parser != NULL) + XML_ParserFree(puller->parser); + + XML_PullerFreeTokenData(puller->to_be_freed); + XML_PullerFreeTokenData(puller->tok_head); + + free(puller->cdata); + free(puller); + } + + + void XML_PullerEnable (XML_Puller puller, + XML_PullerTokenKindType enabledTokenKindSet) + { + if (enabledTokenKindSet & XML_PULLER_START_ELEMENT) + XML_SetStartElementHandler(puller->parser, start_element_handler); + + if (enabledTokenKindSet & XML_PULLER_END_ELEMENT) + XML_SetEndElementHandler(puller->parser, end_element_handler); + + if (enabledTokenKindSet & XML_PULLER_CHARDATA) + XML_SetCharacterDataHandler(puller->parser, chardata_handler); + + if (enabledTokenKindSet & XML_PULLER_START_CDATA) + XML_SetStartCdataSectionHandler(puller->parser, start_cdata_handler); + + if (enabledTokenKindSet & XML_PULLER_END_CDATA) + XML_SetEndCdataSectionHandler(puller->parser, end_cdata_handler); + + if (enabledTokenKindSet & XML_PULLER_PROC_INST) + XML_SetProcessingInstructionHandler(puller->parser, proc_inst_handler); + + if (enabledTokenKindSet & XML_PULLER_COMMENT) + XML_SetCommentHandler(puller->parser, comment_handler); + + if (enabledTokenKindSet & XML_PULLER_DECL) + XML_SetXmlDeclHandler(puller->parser, decl_handler); + + if (enabledTokenKindSet & XML_PULLER_START_DOCT) + XML_SetStartDoctypeDeclHandler(puller->parser, start_doct_handler); + + if (enabledTokenKindSet & XML_PULLER_END_DOCT) + XML_SetEndDoctypeDeclHandler(puller->parser, end_doct_handler); + + if (enabledTokenKindSet & XML_PULLER_UNPARSED) + XML_SetDefaultHandler(puller->parser, unparsed_handler); + + } + + + void XML_PullerDisable (XML_Puller puller, + XML_PullerTokenKindType disabledTokenKindSet) + { + if (disabledTokenKindSet & XML_PULLER_START_ELEMENT) + XML_SetStartElementHandler(puller->parser, NULL); + + if (disabledTokenKindSet & XML_PULLER_END_ELEMENT) + XML_SetEndElementHandler(puller->parser, NULL); + + if (disabledTokenKindSet & XML_PULLER_CHARDATA) { + free(puller->cdata); + puller->cdata = NULL; + puller->cdata_len = 0; + XML_SetCharacterDataHandler(puller->parser, NULL); + } + + if (disabledTokenKindSet & XML_PULLER_START_CDATA) + XML_SetStartCdataSectionHandler(puller->parser, NULL); + + if (disabledTokenKindSet & XML_PULLER_END_CDATA) + XML_SetEndCdataSectionHandler(puller->parser, NULL); + + if (disabledTokenKindSet & XML_PULLER_PROC_INST) + XML_SetProcessingInstructionHandler(puller->parser, NULL); + + if (disabledTokenKindSet & XML_PULLER_COMMENT) + XML_SetCommentHandler(puller->parser, NULL); + + if (disabledTokenKindSet & XML_PULLER_DECL) + XML_SetXmlDeclHandler(puller->parser, NULL); + + if (disabledTokenKindSet & XML_PULLER_START_DOCT) + XML_SetDefaultHandler(puller->parser, NULL); + + if (disabledTokenKindSet & XML_PULLER_END_DOCT) + XML_SetDefaultHandler(puller->parser, NULL); + + if (disabledTokenKindSet & XML_PULLER_UNPARSED) + XML_SetDefaultHandler(puller->parser, NULL); + + } + + + XML_PullerToken XML_PullerNext (XML_Puller puller) + { + XML_PullerToken tok = NULL; + + if (puller == NULL) + return NULL; + + XML_PullerFreeTokenData(puller->to_be_freed); + puller->to_be_freed = NULL; + + /* Read blocks of characters until there is at least one token. */ + while (puller->tok_head == NULL) { + int len; + + /* We check for previous errors as late as here because + * we want to make sure that every correct token can be + * read by the user if he chooses to do so. + */ + if (puller->status != XML_STATUS_OK) + return NULL; + + len = read(puller->filedesc, puller->buffer, puller->buffer_length); + + if (len < 0) + break; + + if (XML_Parse(puller->parser, puller->buffer, len, len == 0) == XML_STATUS_ERROR) { + XML_PullerSetError(puller); + break; + } + + if (len == 0) + break; + } + + if (puller->tok_head != NULL) { + /* Remove the token from the list of pending tokens and deliver it. */ + tok = puller->to_be_freed = puller->tok_head; + puller->tok_head = puller->tok_head->next; + tok->next = NULL; + } + + return tok; + } + + + XML_PullerToken XML_PullerNext_m (XML_Puller puller) + { + XML_PullerToken tok = XML_PullerNext(puller); + + puller->to_be_freed = NULL; + + return tok; + } + + diff -rcN ../gawk-3.1.4/xml_puller.h ../gawk-3.1.4.xml/xml_puller.h *** ../gawk-3.1.4/xml_puller.h 1970-01-01 01:00:00.000000000 +0100 --- ../gawk-3.1.4.xml/xml_puller.h 2004-08-12 17:10:10.000000000 +0200 *************** *** 0 **** --- 1,204 ---- + /* + * xml_puller.h --- a pull-parser API for reading XML input + */ + + /* + * Copyright (C) 1986, 1988, 1989, 1991-2003 the Free Software Foundation, Inc. + * + * This file is part of GAWK, the GNU implementation of the + * AWK Programming Language. + * + * GAWK is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GAWK is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + + #ifndef _XML_PULLER_H + #define _XML_PULLER_H 1 + + #include + #include + + /* We want to use expat for parsing XML files. + * It is quite a good guess that expat might + * be installed as xmlparser.h on systems which + * have an older GNU compiler or none at all. + * But not on RedHat 7.2, which has gcc 2.96 + * and expat.h. + */ + #if __GNUC__ < 3 + #include + #else + #include + #endif + + /* We intend to support older versions of expat. See expat.h. */ + #ifndef XML_STATUS_OK + #define XML_STATUS_OK 1 + #define XML_STATUS_ERROR 0 + #endif + + + /* These are the kinds of token which are returned by + * the XML pull parser. By default, no kind of token + * will be returned. You have to switch on each kind + * you want to see by setting the respective bit in + * the mask. You can switch these bits on and off during + * the parsing process at any time. + * The expat callback functions are not mapped one-to-one + * on the token kinds. For example, all occurences of + * character data are accumulated before they are returned + * in one token. + */ + typedef int XML_PullerTokenKindType; + #define XML_PULLER_START_ELEMENT 1 + #define XML_PULLER_END_ELEMENT 2 + #define XML_PULLER_CHARDATA 4 + #define XML_PULLER_START_CDATA 8 + #define XML_PULLER_END_CDATA 16 + #define XML_PULLER_PROC_INST 32 + #define XML_PULLER_COMMENT 64 + #define XML_PULLER_DECL 128 + #define XML_PULLER_START_DOCT 256 + #define XML_PULLER_END_DOCT 512 + #define XML_PULLER_UNPARSED 1024 + + + /* The pull parser returns pointers to tokens. + * This is the contents of a token. When such + * a pointer is returned to you, remember that + * its contents was properly allocated and has + * to be freed some time later. By default, you + * do not have to care about memory allocation; + * the pull parser will do it for you. But this + * comfort has a price: The content of one token + * will only be valid until you ask for the next + * token. If you want a token's content to survive + * the succeeding tokens, you can let a different + * function of the pull parser return this token + * to you (see below). + */ + struct XML_PullerTokenDataType { + XML_PullerTokenKindType kind; + char * name; + char * data; + char ** attr; + int number; + struct XML_PullerTokenDataType *next; + }; + typedef struct XML_PullerTokenDataType * XML_PullerToken; + + + /* This is the pull parser's content. You have to + * create one for each file you are going to process. + * Create as many as needed and none of them will + * interfere with another one's data. Do not change + * any of the fields in this struct. + * If, at any time, an error occurs, the "status" field + * will be set to XML_STATUS_ERROR, the "line" field + * contains the number of the line in the source file + * where the error occured and the "error" field contains + * an explanatory text of the error. In case of flawless + * completion of file reading, the "status" field will + * be set to XML_STATUS_OK. + */ + struct XML_PullerDataType + { + int filedesc; + char * buffer; + int buffer_length; + iconv_t converter; + XML_Parser parser; + volatile XML_PullerToken tok_head; + volatile XML_PullerToken tok_tail; + XML_PullerToken to_be_freed; + char * cdata; + int cdata_len; + int status; + int line; + const char * error; + }; + typedef struct XML_PullerDataType * XML_Puller; + + + /* These are the functions for creating, destroying and + * using the XML pull parser. You will need one parser + * for each file to process. If you pass 0 as the + * file descriptor, standard input will be used. File + * descriptors are not closed upon destruction of the + * pull parser. For the characters returned by the XML + * pull parser, you can choose among all the charactor + * encodings supported by your local iconv library. + * Just pass the name of the encoding or pass NULL if + * you do not care and UTF-8 is good enough. + * Each pull parser is associated with a character buffer + * whose length is determined at time of creation. Remember + * that this buffer length influences the blocking behavior + * of the parser when reading the file. If you need a parser + * with minimum lookahead, use a buffer_length of 1. + */ + XML_Puller XML_PullerCreate (int filedesc, char * encoding, int buffer_length); + + /* Each XML pull parser object has to be destroyed after use, + * in order to properly free all ressources allocated to the + * parser instance. + */ + void XML_PullerFree (XML_Puller puller); + + /* This function is probably most important to you. + * No line will be read from the XML file, unless you invoke it. + * It reads the next token from the XML file and passes + * it to you. Remember that the token is actually a pointer + * to a chunk of memory which will be freed upon next invokation + * of this function. So, do not dereference one token after + * having called for its successor. + * The end of the token stream will be indicated by a + * NULL pointer being returned. A NULL pointer is also + * returned in case of an error while reading. So, you + * have to check the above mentioned status indicators + * for detecting the presence and the cause of an error. + * Upon first occurence of an error, all subsequent invokations + * of this functions will return NULL and the status of + * the XML puller will remain unchanged for you to find + * the cause of the first error. + */ + XML_PullerToken XML_PullerNext (XML_Puller puller); + + /* Use this function if you want the XML pull parser not + * to free a specific token at any time. Remember that you are + * responsible for proper deallocation of the token by + * invokation of a function below. You can freely mix + * invokations of this function and the previous one. + * The result will be that some tokens are freed automatically + * and some have to be freed by you. + */ + XML_PullerToken XML_PullerNext_m (XML_Puller puller); + + /* This is the function that actually frees the ressources + * allocated to a token. + */ + void XML_PullerFreeToken (XML_PullerToken token); + + /* Use the following two functions to switch on or off the + * production of specific kinds of tokens. If you do not + * invoke the first function, you will never receive a + * token at all from your parser (although the XML file + * is actually read properly). + */ + void XML_PullerEnable (XML_Puller puller, + XML_PullerTokenKindType enabledTokenKindSet); + void XML_PullerDisable (XML_Puller puller, + XML_PullerTokenKindType disabledTokenKindSet); + + #endif /* _XML_PULLER_H */ +