summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthew Turk <satai@gentoo.org>2003-01-03 05:43:19 +0000
committerMatthew Turk <satai@gentoo.org>2003-01-03 05:43:19 +0000
commit8091f15a2a06f83c3824d6e9709f03e42bcaf510 (patch)
tree8d50d21e9f20fef0f7ec12a0a4afa74aca89c255 /app-text/htmltidy/files
parentGreat ebuild and patch for GCC 3.x from Richard Garand <richard@garandnet.net>, (diff)
downloadgentoo-2-8091f15a2a06f83c3824d6e9709f03e42bcaf510.tar.gz
gentoo-2-8091f15a2a06f83c3824d6e9709f03e42bcaf510.tar.bz2
gentoo-2-8091f15a2a06f83c3824d6e9709f03e42bcaf510.zip
Closing 10515 with patch from ferdy@ferdyx.org to convert HTML to
DocBook/XML and DocBook/SGML.
Diffstat (limited to 'app-text/htmltidy/files')
-rw-r--r--app-text/htmltidy/files/digest-htmltidy-2.7.18-r11
-rw-r--r--app-text/htmltidy/files/htmltidy-dbpatch.diff890
2 files changed, 891 insertions, 0 deletions
diff --git a/app-text/htmltidy/files/digest-htmltidy-2.7.18-r1 b/app-text/htmltidy/files/digest-htmltidy-2.7.18-r1
new file mode 100644
index 000000000000..40d794f8de51
--- /dev/null
+++ b/app-text/htmltidy/files/digest-htmltidy-2.7.18-r1
@@ -0,0 +1 @@
+MD5 7694dcdb7d451b17477292d60186f477 tidy_src_020718.tgz 153771
diff --git a/app-text/htmltidy/files/htmltidy-dbpatch.diff b/app-text/htmltidy/files/htmltidy-dbpatch.diff
new file mode 100644
index 000000000000..f3322e273a72
--- /dev/null
+++ b/app-text/htmltidy/files/htmltidy-dbpatch.diff
@@ -0,0 +1,890 @@
+--- /cise/tmp/ppadala/tidy/include/html.h Fri May 31 17:52:04 2002
++++ include/html.h Tue Jul 2 15:04:28 2002
+@@ -653,6 +653,8 @@
+ Lexer *lexer, Node *node);
+ void PPrintXMLTree(Out *fout, uint mode, uint indent,
+ Lexer *lexer, Node *node);
++void PrintSgml(Out *fout, uint mode, uint indent,
++ Lexer *lexer, Node *node);
+ void PFlushLine(Out *out, uint indent);
+ void PCondFlushLine(Out *out, uint indent);
+ void PrintBody(Out *fout, Lexer *lexer, Node *root); /* Feature request #434940 - fix by Dave Raggett/Ignacio Vazquez-Abrams 21 Jun 01 */
+@@ -908,6 +910,8 @@
+ extern Bool XmlOut;
+ extern Bool xHTML;
+ extern Bool HtmlOut; /* Yes means set explicitly. */
++extern Bool DbSgml;
++extern Bool DbXml;
+ extern Bool XmlPi; /* add <?xml?> */
+ extern Bool XmlPIs; /* assume PIs end with ?> as per XML */
+ extern Bool XmlSpace;
+--- /cise/tmp/ppadala/tidy/src/config.c Sun Jul 7 23:24:52 2002
++++ src/config.c Sat Jul 13 18:14:47 2002
+@@ -81,6 +81,8 @@
+ Bool XmlOut = no; /* create output as XML */
+ Bool xHTML = no; /* output extensible HTML */
+ Bool HtmlOut = no; /* output plain-old HTML, even for XHTML input. Yes means set explicitly. */
++Bool DbSgml = no; /* output docbook SGML */
++Bool DbXml = no; /* output docbook XML */
+ Bool XmlPi = no; /* add <?xml?> for XML docs */
+ Bool RawOut = no; /* avoid mapping values > 127 to entities: not used for anything yet */
+ Bool UpperCaseTags = no; /* output tags in upper not lower case */
+--- /cise/tmp/ppadala/tidy/src/lexer.c Sun Jul 7 23:25:47 2002
++++ src/lexer.c Sat Jul 13 18:14:47 2002
+@@ -1674,6 +1674,35 @@
+ return doctype;
+ }
+
++Bool SetSgmlDocType(Lexer *lexer, Node *root)
++{ char *fpi, *sysid;
++ Node *doctype;
++
++ if (doctype_mode == doctype_user && doctype_str)
++ {
++ fpi = doctype_str;
++ sysid = "";
++ }
++ doctype = FindDocType(root);
++ if(!doctype) /* The html file doesn't contain doctype */
++ if ( !(doctype = NewXhtmlDocTypeNode( root )) )
++ return no;
++
++ lexer->txtstart = lexer->txtend = lexer->lexsize;
++
++ /* add public identifier */
++ AddStringLiteral(lexer, fpi);
++ /* add system identifier */
++ AddStringLiteral(lexer, sysid);
++
++ lexer->txtend = lexer->lexsize;
++
++ doctype->start = lexer->txtstart;
++ doctype->end = lexer->txtend;
++
++ return no;
++}
++
+ Bool SetXHTMLDocType(Lexer *lexer, Node *root)
+ {
+ char *fpi = "", *sysid = "", *dtdsub, *name_space = XHTML_NAMESPACE; /* #578005 - fix by Anonymous 05 Jul 02 */
+--- /cise/tmp/ppadala/tidy/src/localize.c Sun Jul 7 23:26:39 2002
++++ src/localize.c Sat Jul 13 18:14:47 2002
+@@ -1054,6 +1054,8 @@
+ tidy_out(out, " -asxml to convert HTML to well formed XHTML\n");
+ tidy_out(out, " -asxhtml to convert HTML to well formed XHTML\n");
+ tidy_out(out, " -ashtml to force XHTML to well formed HTML\n");
++ tidy_out(out, " -dbsgml to convert HTML to Docbook SGML\n");
++ tidy_out(out, " -dbxml to convert HTML to Docbook XML\n");
+ tidy_out(out, " -slides to burst into slides on H2 elements\n");
+
+ /* TRT */
+--- /cise/tmp/ppadala/tidy/src/parser.c Sun Jul 7 23:27:35 2002
++++ src/parser.c Sat Jul 13 18:14:47 2002
+@@ -501,7 +501,7 @@
+ TrimTrailingSpace(lexer, element, text);
+ }
+
+-static Bool DescendantOf(Node *element, Dict *tag)
++Bool DescendantOf(Node *element, Dict *tag)
+ {
+ Node *parent;
+
+--- /cise/tmp/ppadala/tidy/src/pprint.c Thu Jul 18 14:21:27 2002
++++ src/pprint.c Tue Jul 30 13:50:53 2002
+@@ -6,9 +6,9 @@
+
+ CVS Info :
+
+- $Author: satai $
+- $Date: 2003/01/03 05:43:19 $
+- $Revision: 1.1 $
++ $Author: satai $
++ $Date: 2003/01/03 05:43:19 $
++ $Revision: 1.1 $
+
+ */
+
+@@ -36,6 +36,13 @@
+ static void PPrintPhp(Out *fout, uint indent,
+ Lexer *lexer, Node *node);
+
++/* Tag types to distinguish printing */
++typedef enum {
++ SgmlTagStart,
++ SgmlTagEnd
++}SgmlTagType;
++
++extern Bool DescendantOf(Node *element, Dict *tag);
+
+ #define NORMAL 0
+ #define PREFORMATTED 1
+@@ -1769,6 +1776,634 @@
+ }
+ }
+
++void PrintSgmlDefault(Out *fout)
++{
++ char *str = "SGML cannot contain these elements";
++
++ fprintf(stderr, str);
++}
++
++void PrintSgmlBodyStart(Out *fout, uint indent)
++{
++ char *str = "<article>";
++ PPrintString(fout, indent, str);
++}
++
++#define DIGIT(c) (c - 48)
++#define TOTAL_H 6
++static Bool seen_h[TOTAL_H] = {no, no, no, no, no, no};
++
++/* Yuck ugly. FIXME */
++#define SECT(i) (i - startsect)
++static startsect = 0; /* We are at level 0(H1) initially */
++
++void PrintSgmlBodyEnd(Out *fout, uint indent)
++{ int i = TOTAL_H - 1;
++ char str[10];
++
++ while(i >= 0) {
++ if(seen_h[i] == yes) {
++ if(i == 5)
++ sprintf(str, "</simpleect>");
++ else
++ sprintf(str, "</sect%d>", SECT(i) + 1);
++ PPrintString(fout, indent, str);
++ seen_h[i] = no;
++ }
++ --i;
++ }
++
++ sprintf(str, "</article>");
++ PPrintString(fout, indent, str);
++}
++
++char *GetContent(Lexer *lexer, Node *node)
++{ Node *content, *temp_node;
++ char *str, *temp, c;
++ Bool flag = no;
++ int i;
++
++ content = node->content;
++
++ /* Find the <a> tag */
++ for (temp_node = content;
++ temp_node && temp_node->tag != tag_a;
++ temp_node = temp_node->next)
++ ;
++
++ if(temp_node == NULL) { /* There is no <a> .. </a> tag */
++ /* Discard all elements which are not text nodes */
++ temp_node = content;
++ for (temp_node = content;
++ temp_node && temp_node->type != TextNode;
++ temp_node = temp_node->next)
++ ;
++ if(temp_node == NULL) { /* There's no TextNode either */
++ str = MemAlloc(1);
++ str[0] = '\0';
++ return str;
++ }
++ }
++ content = temp_node;
++
++ if(content->type == TextNode) {
++ int size = content->end - content->start;
++
++ str = MemAlloc(size + 1);
++ str[size] = '\0';
++ wstrncpy(str, lexer->lexbuf + content->start, size);
++ }
++ else if(content->tag == tag_a){
++ AttVal *name;
++ int size;
++
++ name = GetAttrByName(content, "name");
++ if(name == NULL)
++ name = GetAttrByName(content, "href");
++
++ if(name == NULL) { /* No href or name, let's take empty id */
++ size = 0;
++ str = MemAlloc(size + 1);
++ str[size] = '\0';
++ }
++ else {
++ size = wstrlen(name->value);
++ str = MemAlloc(size + 1);
++ str[size] = '\0';
++ wstrncpy(str, name->value, size);
++ }
++ }
++
++ temp = str;
++ if(str[0] == '#')
++ flag = yes;
++
++#define SGML_NAMELEN 44 /* Maximum id namelength */
++
++ i = 0;
++
++ while(*temp && i < SGML_NAMELEN) {
++ if(flag)
++ *temp = *(temp + 1);
++ if(*temp == ' ')
++ *temp = '_';
++ ++temp;
++ ++i;
++ }
++ *temp = '\0';
++ return str;
++}
++
++void PrintSectTag( Out *fout, uint indent, Lexer *lexer, Node *node,
++ uint startsect)
++{ char sectnum = node->element[1];
++ char str[100];
++
++ char *id = GetContent(lexer, node);
++
++ if(sectnum == '6') /* there's no sect6. We can do variety of
++ things here. may be <section> .. */
++ sprintf(str, "<simplesect id=\"%s\"><title>", id);
++ else
++ sprintf(str, "<sect%c id=\"%s\"><title>", SECT(sectnum), id);
++ PPrintString(fout, indent, str);
++ MemFree(id);
++}
++
++Bool ImmediateDescendantOfHTags(Node *element)
++{ Node *parent = element->parent;
++
++ if (strlen(parent->element) == 2 &&
++ parent->element[0] == 'h' &&
++ IsDigit(parent->element[1]))
++ return yes;
++ return no;
++}
++
++void PrintSgmlLink(Out *fout, uint indent, Node *node)
++{ AttVal *addr;
++ char str[500]; /* FIXME allocate dynamically later */
++
++ addr = GetAttrByName(node, "name");
++ if(addr == NULL) {
++ addr = GetAttrByName(node, "href");
++ if(!ImmediateDescendantOfHTags(node)) {
++ if(addr->value[0] == '#')
++ sprintf(str, "<link linkend=\"%s\">", addr->value + 1);
++ else
++ sprintf(str, "<ulink url=\"%s\">", addr->value);
++ if( !DescendantOf(node, tag_p) &&
++ node->prev && node->prev->type == TextNode)
++ PPrintString(fout, indent, "<para>");
++ PPrintString(fout, indent, str);
++ }
++ }
++ else {
++ if(!ImmediateDescendantOfHTags(node)) {
++ if(!DescendantOf(node, tag_p))
++ sprintf(str, "<para id=\"%s\">", addr->value);
++ else /* We cannnot have a <para> inside another <para> */
++ sprintf(str, "<anchor id=\"%s\"/>", addr->value);
++ PPrintString(fout, indent, str);
++ }
++ }
++}
++
++void PrintSgmlLinkEnd(Out *fout, uint indent, Node *node)
++{ AttVal *addr;
++
++ addr = GetAttrByName(node, "name");
++ if(addr == NULL) {
++ addr = GetAttrByName(node, "href");
++ if(!ImmediateDescendantOfHTags(node)) {
++ if(addr->value[0] == '#')
++ PPrintString(fout, indent, "</link>");
++ else
++ PPrintString(fout, indent, "</ulink>");
++ if( !DescendantOf(node, tag_p) &&
++ node->prev && node->prev->type == TextNode)
++ PPrintString(fout, indent, "</para>");
++ }
++ }
++ else {
++ if(!ImmediateDescendantOfHTags(node)) {
++ if(!DescendantOf(node, tag_p))
++ PPrintString(fout, indent, "</para>");
++ /* else
++ <anchor .. /> has already been placed. no need to
++ do any thing */
++ }
++ }
++}
++
++
++void PrintSgmlTagString(Out *fout, uint mode, uint indent,
++ SgmlTagType sgmltag_type, char *str)
++{ PPrintChar(str[0], mode | CDATA);
++ if(sgmltag_type == SgmlTagEnd)
++ PPrintChar('/', mode);
++ PPrintString(fout, indent, str + 1);
++}
++
++void PrintSgmlList(Lexer *lexer, Out *fout,
++ uint mode, uint indent,
++ Node *node)
++{ if(node->tag == tag_ul)
++ PPrintString(fout, indent, "<itemizedlist>");
++ else if(node->tag == tag_ol)
++ PPrintString(fout, indent, "<orderedlist>");
++ else if(node->tag == tag_dl)
++ PPrintString(fout, indent, "<variablelist>");
++}
++
++void PrintSgmlListEnd(Lexer *lexer, Out *fout,
++ uint mode, uint indent,
++ Node *node)
++{ if(node->tag == tag_ul)
++ PPrintString(fout, indent, "</itemizedlist>");
++ else if(node->tag == tag_ol)
++ PPrintString(fout, indent, "</orderedlist>");
++ else if(node->tag == tag_dl)
++ PPrintString(fout, indent, "</variablelist>");
++}
++
++void PrintSgmlListItem(Out *fout, uint indent, Node *node)
++{ if(node->tag == tag_li)
++ PPrintString(fout, indent, "<listitem>");
++ else if(node->tag == tag_dd)
++ PPrintString(fout, indent, "<listitem>");
++}
++
++void PrintSgmlListItemEnd(Out *fout, uint indent, Node *node)
++{ if(node->tag == tag_li)
++ PPrintString(fout, indent, "</listitem>");
++ else if(node->tag == tag_dd)
++ PPrintString(fout, indent, "</listitem></varlistentry>");
++}
++
++void PrintSgmlImage(Out *fout, uint indent, Node *node)
++{ AttVal *addr;
++ char str[100];
++
++ addr = GetAttrByName(node, "src");
++ /* We can get other attributes like width, height etc.. */
++ if(addr != NULL) {
++ PPrintString(fout, indent, "<inlinemediaobject><imageobject>");
++ PCondFlushLine(fout, indent);
++ sprintf(str, "<imagedata fileref=\"%s\">", addr->value);
++ PPrintString(fout, indent, str);
++ PCondFlushLine(fout, indent);
++ PPrintString(fout, indent, "</imageobject></inlinemediaobject>");
++ PCondFlushLine(fout, indent);
++ }
++}
++
++int CountColumns(Node *node)
++{ Node *temp, *row_content;
++ int ncols = 0;
++
++ temp = node->content;
++
++ /* FIXME */
++ /* Perhaps this is not needed, check with HTML standard later */
++ while(temp->tag != tag_tr)
++ temp = temp->next;
++
++ /* This can contain th or td's */
++ row_content = temp->content;
++ while(row_content) {
++ if(row_content->tag == tag_th || row_content->tag == tag_td) {
++ AttVal *colspan;
++
++ colspan = GetAttrByName(row_content, "colspan");
++ if(colspan)
++ ncols += atoi(colspan->value);
++ else
++ ++ncols;
++ }
++ else
++ fprintf(stderr, "PrintSgml: error in table processing\n");
++ row_content = row_content->next;
++ }
++ return ncols;
++}
++
++void PrintSgmlTable(Out *fout, uint indent, Node *node)
++{ int ncols;
++ char str[100];
++
++ PPrintString(fout, indent, "<informaltable>");
++ ncols = CountColumns(node);
++ sprintf(str, "<tgroup cols=\"%d\"><tbody>", ncols);
++ PPrintString(fout, indent, str);
++}
++
++void PrintSgmlTableEnd(Out *fout, uint indent, Node *node)
++{
++ PPrintString(fout, indent, "</tbody></tgroup></informaltable>");
++}
++
++Bool DescendantOfAddress(Node *element)
++{
++ Node *parent;
++
++ for (parent = element->parent;
++ parent != null; parent = parent->parent)
++ { if (parent->element && wstrcasecmp(parent->element, "address") == 0)
++ return yes;
++ }
++
++ return no;
++}
++
++void PrintSgmlTag( Out *fout, uint mode, uint indent, Lexer *lexer, Node *node,
++ SgmlTagType sgmltag_type)
++{ static level = -1;
++
++ if(node->tag == tag_html) {
++ if(sgmltag_type == SgmlTagStart)
++ PrintSgmlBodyStart(fout, indent);
++ else if(sgmltag_type == SgmlTagEnd)
++ PrintSgmlBodyEnd(fout, indent);
++ }
++ else if(node->tag == tag_head)
++ PrintSgmlTagString(fout, mode, indent, sgmltag_type,"<articleinfo>");
++ else if(node->tag == tag_title)
++ PrintSgmlTagString(fout, mode, indent, sgmltag_type,"<title>");
++ /* May be we can replace with node->model & CM_LIST */
++ else if(node->tag == tag_ul || node->tag == tag_ol ||
++ node->tag == tag_dl) {
++ if(sgmltag_type == SgmlTagStart)
++ PrintSgmlList(lexer, fout, mode, indent, node);
++ else if(sgmltag_type == SgmlTagEnd)
++ PrintSgmlListEnd(lexer, fout, mode, indent, node);
++ }
++ else if(node->tag == tag_dt) {
++ if(sgmltag_type == SgmlTagStart)
++ PPrintString(fout, indent, "<varlistentry><term>");
++ else if(sgmltag_type == SgmlTagEnd)
++ PPrintString(fout, indent, "</term>");
++ }
++ else if(node->tag == tag_li || node->tag == tag_dd) {
++ if(sgmltag_type == SgmlTagStart)
++ PrintSgmlListItem(fout, indent, node);
++ else if(sgmltag_type == SgmlTagEnd)
++ PrintSgmlListItemEnd(fout, indent, node);
++ }
++ /* Later we should clean this before coming to PrintSgml */
++ else if(node->tag == tag_p &&
++ /* Table <entry> processing */
++ !DescendantOf(node, tag_th) && !DescendantOf(node, tag_td) &&
++ !DescendantOfAddress(node))
++ PrintSgmlTagString(fout, mode, indent, sgmltag_type, "<para>");
++ else if(node->tag == tag_blockquote)
++ PrintSgmlTagString(fout, mode, indent, sgmltag_type, "<blockquote>");
++ else if(node->tag == tag_pre &&
++ /* Table <entry> processing */
++ !DescendantOf(node, tag_th) && !DescendantOf(node, tag_td))
++ PrintSgmlTagString(fout, mode, indent, sgmltag_type,
++ "<programlisting>");
++ else if(node->tag == tag_a) {
++ if(sgmltag_type == SgmlTagStart)
++ PrintSgmlLink(fout, indent, node);
++ else if(sgmltag_type == SgmlTagEnd)
++ PrintSgmlLinkEnd(fout, indent, node);
++ }
++ /* Table would require more processing */
++ else if(node->tag == tag_table) {
++ if(sgmltag_type == SgmlTagStart)
++ PrintSgmlTable(fout, indent, node);
++ else if(sgmltag_type == SgmlTagEnd)
++ PrintSgmlTableEnd(fout, indent, node);
++ }
++ else if(node->tag == tag_tr)
++ PrintSgmlTagString(fout, mode, indent, sgmltag_type, "<row>");
++ else if(node->tag == tag_td || node->tag == tag_th)
++ PrintSgmlTagString(fout, mode, indent, sgmltag_type, "<entry>");
++ else if(node->tag == tag_img) { /* This is a StartEndTag */
++ if(sgmltag_type == SgmlTagStart)
++ PrintSgmlImage(fout, indent, node);
++ }
++
++ else if(wstrcasecmp(node->element, "cite") == 0)
++ PrintSgmlTagString(fout, mode, indent, sgmltag_type,
++ "<citation>");
++ /* We should distinguish tag_strong and tag_em later
++ haven't found proper docbook tag for <strong> */
++ else if(node->tag == tag_em || node->tag == tag_strong ||
++ wstrcasecmp(node->element, "address") == 0) {
++ if(sgmltag_type == SgmlTagStart) {
++ if(DescendantOf(node, tag_p) || DescendantOf(node, tag_pre))
++ PPrintString(fout, indent, "<emphasis>");
++ else
++ PPrintString(fout, indent, "<para><emphasis>");
++ }
++ else if(sgmltag_type == SgmlTagEnd) {
++ if(DescendantOf(node, tag_p) || DescendantOf(node, tag_pre))
++ PPrintString(fout, indent, "</emphasis>");
++ else
++ PPrintString(fout, indent, "</emphasis></para>");
++ }
++ }
++ else {
++ if(wstrcasecmp(node->element, "code") == 0 &&
++ !(node->parent->tag == tag_dd ||
++ node->parent->tag == tag_li))
++ PrintSgmlTagString(fout, mode, indent,
++ sgmltag_type, "<literal>");
++ else if(strlen(node->element) == 2 &&
++ node->element[0] == 'h' &&
++ IsDigit(node->element[1])) {
++ if(sgmltag_type == SgmlTagStart) {
++ int sectnum = DIGIT(node->element[1]) - 1;
++ char str[10];
++ if(seen_h[sectnum] == no)
++ seen_h[sectnum] = yes;
++ else {
++ int i = level;
++ while(i > sectnum && seen_h[i] == yes) {
++ if(i == 5)
++ sprintf(str, "</simplesect>");
++ else
++ sprintf(str, "</sect%d>", SECT(i) + 1);
++ PPrintString(fout, indent, str);
++ seen_h[i] = no;
++ --i;
++ }
++ if(sectnum == 5)
++ sprintf(str, "</simplesect>");
++ else
++ sprintf(str, "</sect%d>", SECT(sectnum) + 1);
++ PPrintString(fout, indent, str);
++ }
++ /* H1 is not the first level
++ like the curses man2html pages */
++ if(level == -1 && sectnum > 0)
++ startsect = sectnum;
++
++ PrintSectTag(fout, indent, lexer, node, startsect);
++ level = sectnum;
++ }
++ else
++ PPrintString(fout, indent, "</title>");
++ }
++ }
++}
++
++void PrintSgml( Out *fout, uint mode, uint indent,
++ Lexer *lexer, Node *node)
++{ Node *content;
++
++ if (node == null)
++ return;
++
++ if (node->type == TextNode) {
++ if(DescendantOf(node, tag_dd) && !DescendantOf(node, tag_a) &&
++ !DescendantOf(node, tag_p) &&
++ /* We have to descide on this table stuff later
++ * <entry> processing is complex */
++ !DescendantOf(node, tag_td) && !DescendantOf(node, tag_th))
++ /* && wstrcasecmp(node->parent->element, "code") != 0)
++ above line may be needed later to properly convert <code> stuff */
++ {
++ PPrintString(fout, indent, "<para>");
++ PPrintText(fout, mode, indent, lexer, node->start, node->end);
++ PPrintString(fout, indent, "</para>");
++ }
++ else {
++ if(DescendantOf(node, tag_style))
++ fprintf(stderr, "PrintSgml: skipping style elements\n\n");
++ else
++ PPrintText(fout, mode, indent, lexer, node->start, node->end);
++ }
++ }
++ else if(node->type == CDATATag && EscapeCdata)
++ PPrintText(fout, mode, indent, lexer, node->start, node->end);
++ else if (node->type == CommentTag)
++ PPrintComment(fout, indent, lexer, node);
++ else if (node->type == RootNode)
++ {
++ for (content = node->content;
++ content != null;
++ content = content->next)
++ PrintSgml(fout, mode, indent, lexer, content);
++ }
++ else if (node->type == DocTypeTag)
++ PPrintDocType(fout, indent, lexer, node);
++ else if (node->type == CDATATag)
++ PPrintCDATA(fout, indent, lexer, node);
++ else if (node->type == SectionTag)
++ PPrintSection(fout, indent, lexer, node);
++ else if (node->type == AspTag ||
++ node->type == JsteTag ||
++ node->type == PhpTag )
++ PrintSgmlDefault(fout);
++ else if (node->type == ProcInsTag)
++ PPrintPI(fout, indent, lexer, node);
++ else if (node->type == XmlDecl)// && DbXml May be this is needed
++ PPrintXmlDecl(fout, indent, lexer, node);
++ else if (node->tag->model & CM_EMPTY ||
++ (node->type == StartEndTag && !xHTML))
++ {
++ if (!(node->tag->model & CM_INLINE))
++ PCondFlushLine(fout, indent);
++
++ if (MakeClean && node->tag == tag_wbr)
++ PPrintString(fout, indent, " ");
++ else
++ PrintSgmlTag(fout, mode, indent, lexer, node, SgmlTagStart);
++ }
++ else {
++ if (node->type == StartEndTag)
++ node->type = StartTag;
++
++ if (node->tag && node->tag->parser == ParsePre)
++ {
++ PCondFlushLine(fout, indent);
++
++ indent = 0;
++ PCondFlushLine(fout, indent);
++
++ PrintSgmlTag(fout, mode, indent, lexer, node, SgmlTagStart);
++ PFlushLine(fout, indent);
++
++ for (content = node->content;
++ content != null;
++ content = content->next)
++ PrintSgml(fout, (mode | PREFORMATTED | NOWRAP),
++ indent, lexer, content);
++
++ PCondFlushLine(fout, indent);
++ PrintSgmlTag(fout, mode, indent, lexer, node, SgmlTagEnd);
++ PFlushLine(fout, indent);
++
++ if (IndentContent == no && node->next != null)
++ PFlushLine(fout, indent);
++ }
++ else if (node->tag->model & CM_INLINE)
++ { PrintSgmlTag(fout, mode, indent, lexer, node, SgmlTagStart);
++
++ if (ShouldIndent(node))
++ {
++ PCondFlushLine(fout, indent);
++ indent += spaces;
++
++ for (content = node->content;
++ content != null;
++ content = content->next)
++ PrintSgml(fout, mode, indent, lexer, content);
++
++ PCondFlushLine(fout, indent);
++ indent -= spaces;
++ PCondFlushLine(fout, indent);
++ }
++ else
++ {
++
++ for (content = node->content;
++ content != null;
++ content = content->next)
++ PrintSgml(fout, mode, indent, lexer, content);
++ }
++
++ PrintSgmlTag(fout, mode, indent, lexer, node, SgmlTagEnd);
++ }
++ else
++ { PCondFlushLine(fout, indent);
++ if (SmartIndent && node->prev != null)
++ PFlushLine(fout, indent);
++
++ PrintSgmlTag(fout, mode ,indent, lexer, node, SgmlTagStart);
++ if (ShouldIndent(node))
++ PCondFlushLine(fout, indent);
++ else if (node->tag->model & CM_HTML ||
++ node->tag == tag_noframes ||
++ (node->tag->model & CM_HEAD && !(node->tag == tag_title)))
++ PFlushLine(fout, indent);
++
++ if (ShouldIndent(node))
++ { PCondFlushLine(fout, indent);
++ indent += spaces;
++
++ for (content = node->content;
++ content != null;
++ content = content->next)
++ PrintSgml(fout, mode, indent, lexer, content);
++ PCondFlushLine(fout, indent);
++ indent -= spaces;
++ PCondFlushLine(fout, indent);
++ }
++ else
++ { Node *last;
++ last = null;
++ for (content = node->content;
++ content != null;
++ content = content->next) {
++ /* kludge for naked text before block level tag */
++ if (last && !IndentContent && last->type == TextNode &&
++ content->tag && !(content->tag->model & CM_INLINE) )
++ {
++ /* PFlushLine(fout, indent); */
++ PFlushLine(fout, indent);
++ }
++
++ PrintSgml(fout, mode,
++ (ShouldIndent(node) ? indent+spaces : indent),
++ lexer, content);
++ last = content;
++ }
++ }
++ PrintSgmlTag(fout, mode, indent, lexer, node, SgmlTagEnd);
++ PFlushLine(fout, indent);
++ if (IndentContent == no &&
++ node->next != null &&
++ HideEndTags == no &&
++ (node->tag->model & (CM_BLOCK|CM_LIST|CM_DEFLIST|CM_TABLE)))
++ PFlushLine(fout, indent);
++ }
++ }
++}
++
+ void PPrintTree(Out *fout, uint mode, uint indent,
+ Lexer *lexer, Node *node)
+ {
+@@ -2034,17 +2669,14 @@
+ PPrintJste(fout, indent, lexer, node);
+ else if (node->type == PhpTag)
+ PPrintPhp(fout, indent, lexer, node);
+- else if ( node->tag->model & CM_EMPTY
+- || (node->type == StartEndTag && !xHTML) )
++ else if (node->tag->model & CM_EMPTY || (node->type == StartEndTag && !xHTML))
+ {
+ PCondFlushLine(fout, indent);
+ PPrintTag(lexer, fout, mode, indent, node);
+ PFlushLine(fout, indent);
+
+- /* CPR: folks don't want so much vertical spacing in XML
+ if (node->next)
+ PFlushLine(fout, indent);
+- */
+ }
+ else /* some kind of container element */
+ {
+@@ -2076,7 +2708,7 @@
+
+ PPrintTag(lexer, fout, mode, indent, node);
+
+- if ( !mixed && node->content )
++ if (!mixed)
+ PFlushLine(fout, indent);
+
+ for (content = node->content;
+@@ -2084,16 +2716,14 @@
+ content = content->next)
+ PPrintXMLTree(fout, mode, cindent, lexer, content);
+
+- if ( !mixed && node->content )
++ if (!mixed)
+ PCondFlushLine(fout, cindent);
+
+ PPrintEndTag(fout, mode, indent, node);
+ PCondFlushLine(fout, indent);
+
+- /* CPR: folks don't want so much vertical spacing in XML
+ if (node->next)
+ PFlushLine(fout, indent);
+- */
+ }
+ }
+
+--- /cise/tmp/ppadala/tidy/src/tab2space.c Wed Feb 6 04:09:37 2002
++++ src/tab2space.c Sat Jul 6 23:50:55 2002
+@@ -2,7 +2,7 @@
+ #include <stdlib.h>
+ #include <string.h>
+
+-#ifndef __BEOS__
++#if !(defined(__BEOS__) || defined(linux))
+ typedef unsigned int uint;
+ #endif
+ typedef unsigned char byte;
+--- /cise/tmp/ppadala/tidy/src/tidy.c Sun Jul 7 23:29:25 2002
++++ src/tidy.c Fri Jul 19 01:22:54 2002
+@@ -1853,6 +1853,10 @@
+ IndentContent = yes;
+ SmartIndent = yes;
+ }
++ else if (wstrcasecmp(arg, "dbsgml") == 0)
++ DbSgml = yes;
++ else if(wstrcasecmp(arg, "dbxml") == 0)
++ DbXml = yes;
+ else if (wstrcasecmp(arg, "omit") == 0)
+ HideEndTags = yes;
+ else if (wstrcasecmp(arg, "upper") == 0)
+@@ -2180,6 +2184,28 @@
+ else
+ {
+ lexer->warnings = 0;
++
++ if (DbSgml || DbXml) {
++ char *str;
++
++ if(DbSgml)
++ str = "article PUBLIC \"-//OASIS//DTD DocBook V4.1//EN\"";
++ else
++ str = "article PUBLIC \"-//OASIS//DTD DocBk XML V4.1.2 //EN\"";
++
++ EncloseBodyText = yes; /* We want those <p>s */
++ EncloseBlockText = yes;
++ LogicalEmphasis = yes;
++ DropFontTags = yes; /* <font> .. </font> are not needed */
++
++ /* May be this should be decided by user */
++ QuoteMarks = yes;
++
++ doctype_mode = doctype_user;
++ /* TidyDeInit does MemFree(doctype_str) if it's != NULL */
++ doctype_str = MemAlloc(wstrlen(str));
++ wstrcpy(doctype_str, str);
++ }
+
+ document = ParseDocument(lexer);
+
+@@ -2226,6 +2252,10 @@
+ {
+ if (xHTML)
+ SetXHTMLDocType(lexer, document);
++ else if(DbSgml)
++ SetSgmlDocType(lexer, document);
++ else if(DbXml)
++ SetSgmlDocType(lexer, document);
+ else
+ FixDocType(lexer, document);
+
+@@ -2247,7 +2277,7 @@
+ }
+
+ /* ensure presence of initial <?XML version="1.0"?> */
+- if (XmlOut && XmlPi)
++ if ((XmlOut && XmlPi) || DbXml)
+ FixXmlDecl(lexer, document);
+
+ /*
+@@ -2381,9 +2411,12 @@
+ /* Feature request #434940 - fix by Dave Raggett/Ignacio Vazquez-Abrams 21 Jun 01 */
+ else if (BodyOnly)
+ PrintBody(&out, lexer, document);
+- else
+- PPrintTree(&out, null, 0, lexer, document);
+-
++ else {
++ if(DbSgml || DbXml)
++ PrintSgml(&out, null, 0, lexer, document);
++ else
++ PPrintTree(&out, null, 0, lexer, document);
++ }
+ PFlushLine(&out, 0);
+ }
+