| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| |
|
| | #define IN_LIBXML |
| | #include "libxml.h" |
| | #ifdef LIBXML_HTML_ENABLED |
| |
|
| | #include <string.h> |
| | #include <ctype.h> |
| | #include <stdlib.h> |
| |
|
| | #include <libxml/xmlmemory.h> |
| | #include <libxml/HTMLparser.h> |
| | #include <libxml/HTMLtree.h> |
| | #include <libxml/entities.h> |
| | #include <libxml/xmlerror.h> |
| | #include <libxml/parserInternals.h> |
| | #include <libxml/uri.h> |
| |
|
| | #include "private/buf.h" |
| | #include "private/html.h" |
| | #include "private/error.h" |
| | #include "private/html.h" |
| | #include "private/io.h" |
| | #include "private/save.h" |
| | #include "private/tree.h" |
| |
|
| | |
| | |
| | |
| | |
| | |
| |
|
| | typedef struct { |
| | xmlAttrPtr attr; |
| | const xmlChar *attrValue; |
| | htmlMetaEncodingOffsets off; |
| | } htmlMetaEncoding; |
| |
|
| | static htmlNodePtr |
| | htmlFindFirstChild(htmlNodePtr parent, const char *name) { |
| | htmlNodePtr child; |
| |
|
| | for (child = parent->children; child != NULL; child = child->next) { |
| | if ((child->type == XML_ELEMENT_NODE) && |
| | (xmlStrcasecmp(child->name, BAD_CAST name) == 0)) |
| | return(child); |
| | } |
| |
|
| | return(NULL); |
| | } |
| |
|
| | static htmlNodePtr |
| | htmlFindHead(htmlDocPtr doc) { |
| | htmlNodePtr html; |
| |
|
| | if (doc == NULL) |
| | return(NULL); |
| |
|
| | html = htmlFindFirstChild((htmlNodePtr) doc, "html"); |
| | if (html == NULL) |
| | return(NULL); |
| |
|
| | return(htmlFindFirstChild(html, "head")); |
| | } |
| |
|
| | int |
| | htmlParseContentType(const xmlChar *val, htmlMetaEncodingOffsets *off) { |
| | const xmlChar *p = val; |
| |
|
| | while (1) { |
| | size_t start, end; |
| |
|
| | while ((*p != 'c') && (*p != 'C')) { |
| | if (*p == 0) |
| | return(0); |
| | p += 1; |
| | } |
| | p += 1; |
| |
|
| | if (xmlStrncasecmp(p, BAD_CAST "harset", 6) != 0) |
| | continue; |
| |
|
| | p += 6; |
| | while (IS_WS_HTML(*p)) p += 1; |
| |
|
| | if (*p != '=') |
| | continue; |
| |
|
| | p += 1; |
| | while (IS_WS_HTML(*p)) p += 1; |
| |
|
| | if (*p == 0) |
| | return(0); |
| |
|
| | if ((*p == '"') || (*p == '\'')) { |
| | int quote = *p; |
| |
|
| | p += 1; |
| | while (IS_WS_HTML(*p)) p += 1; |
| |
|
| | start = p - val; |
| | end = start; |
| |
|
| | while (*p != quote) { |
| | if (*p == 0) |
| | return(0); |
| | if (!IS_WS_HTML(*p)) |
| | end = p + 1 - val; |
| | p += 1; |
| | } |
| | } else { |
| | start = p - val; |
| |
|
| | while ((*p != 0) && (*p != ';') && (!IS_WS_HTML(*p))) |
| | p += 1; |
| |
|
| | end = p - val; |
| | } |
| |
|
| | off->start = start; |
| | off->end = end; |
| | off->size = p - val + strlen((char *) p); |
| |
|
| | return(1); |
| | } |
| |
|
| | return(0); |
| | } |
| |
|
| | static xmlAttrPtr |
| | htmlFindMetaEncodingAttr(htmlNodePtr elem, int *outIsContentType) { |
| | xmlAttrPtr attr, contentAttr = NULL; |
| | int isContentType = 0; |
| |
|
| | if (xmlStrcasecmp(elem->name, BAD_CAST "meta") != 0) |
| | return(NULL); |
| |
|
| | for (attr = elem->properties; attr != NULL; attr = attr->next) { |
| | if (attr->ns != NULL) |
| | continue; |
| | if (xmlStrcasecmp(attr->name, BAD_CAST "charset") == 0) { |
| | *outIsContentType = 0; |
| | return(attr); |
| | } |
| | if (xmlStrcasecmp(attr->name, BAD_CAST "content") == 0) |
| | contentAttr = attr; |
| | if ((xmlStrcasecmp(attr->name, BAD_CAST "http-equiv") == 0) && |
| | (attr->children != NULL) && |
| | (attr->children->type == XML_TEXT_NODE) && |
| | (attr->children->next == NULL) && |
| | (xmlStrcasecmp(attr->children->content, |
| | BAD_CAST "Content-Type") == 0)) |
| | isContentType = 1; |
| | } |
| |
|
| | if ((isContentType) && (contentAttr != NULL)) { |
| | *outIsContentType = 1; |
| | return(contentAttr); |
| | } |
| |
|
| | return(NULL); |
| | } |
| |
|
| | static int |
| | htmlParseMetaEncoding(htmlNodePtr elem, htmlMetaEncoding *menc) { |
| | xmlAttrPtr attr; |
| | const xmlChar *val = NULL; |
| | int isContentType; |
| |
|
| | if ((elem->type != XML_ELEMENT_NODE) || |
| | (xmlStrcasecmp(elem->name, BAD_CAST "meta") != 0)) |
| | return(0); |
| |
|
| | attr = htmlFindMetaEncodingAttr(elem, &isContentType); |
| | if (attr == NULL) |
| | return(0); |
| |
|
| | if ((attr->children != NULL) && |
| | (attr->children->type == XML_TEXT_NODE) && |
| | (attr->children->next == NULL) && |
| | (attr->children->content != NULL)) |
| | val = attr->children->content; |
| | else |
| | val = BAD_CAST ""; |
| |
|
| |
|
| | if (!isContentType) { |
| | size_t size = strlen((char *) val); |
| | size_t start = 0; |
| | size_t end = size; |
| |
|
| | while ((start < size) && (IS_WS_HTML(val[start]))) |
| | start += 1; |
| |
|
| | while ((end > 0) && (IS_WS_HTML(val[end-1]))) |
| | end -= 1; |
| |
|
| | menc->attr = attr; |
| | menc->attrValue = val; |
| | menc->off.start = start; |
| | menc->off.end = end; |
| | menc->off.size = size; |
| |
|
| | return(1); |
| | } else { |
| | if (htmlParseContentType(val, &menc->off)) { |
| | menc->attr = attr; |
| | menc->attrValue = val; |
| |
|
| | return(1); |
| | } |
| | } |
| |
|
| | return(0); |
| | } |
| |
|
| | static xmlChar * |
| | htmlUpdateMetaEncoding(htmlMetaEncoding *menc, const char *encoding) { |
| | xmlChar *newVal, *p; |
| | size_t size, oldEncSize, newEncSize; |
| |
|
| | |
| | |
| | |
| | if (xmlStrcasecmp(BAD_CAST encoding, BAD_CAST "HTML") == 0) |
| | encoding = "ASCII"; |
| |
|
| | oldEncSize = menc->off.end - menc->off.start; |
| | newEncSize = strlen((char *) encoding); |
| | size = menc->off.size - oldEncSize + newEncSize; |
| | newVal = xmlMalloc(size + 1); |
| | if (newVal == NULL) |
| | return(NULL); |
| |
|
| | p = newVal; |
| | memcpy(p, menc->attrValue, menc->off.start); |
| | p += menc->off.start; |
| | memcpy(p, encoding, newEncSize); |
| | p += newEncSize; |
| | memcpy(p, menc->attrValue + menc->off.end, menc->off.size - menc->off.end); |
| | newVal[size] = 0; |
| |
|
| | return(newVal); |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | const xmlChar * |
| | htmlGetMetaEncoding(xmlDoc *doc) { |
| | htmlNodePtr head, node; |
| |
|
| | head = htmlFindHead(doc); |
| | if (head == NULL) |
| | return(NULL); |
| |
|
| | for (node = head->children; node != NULL; node = node->next) { |
| | htmlMetaEncoding menc; |
| |
|
| | if (htmlParseMetaEncoding(node, &menc)) { |
| | |
| | |
| | |
| | |
| | |
| | |
| | return(menc.attrValue + menc.off.start); |
| | } |
| | } |
| |
|
| | return(NULL); |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | int |
| | htmlSetMetaEncoding(xmlDoc *doc, const xmlChar *encoding) { |
| | htmlNodePtr head, meta; |
| | int found = 0; |
| |
|
| | if (encoding == NULL) |
| | return(1); |
| |
|
| | head = htmlFindHead(doc); |
| | if (head == NULL) |
| | return(1); |
| |
|
| | for (meta = head->children; meta != NULL; meta = meta->next) { |
| | htmlMetaEncoding menc; |
| |
|
| | if (htmlParseMetaEncoding(meta, &menc)) { |
| | xmlChar *newVal; |
| | int ret; |
| |
|
| | found = 1; |
| |
|
| | newVal = htmlUpdateMetaEncoding(&menc, (char *) encoding); |
| | if (newVal == NULL) |
| | return(-1); |
| | xmlNodeSetContent((xmlNodePtr) menc.attr, NULL); |
| | ret = xmlNodeAddContent((xmlNodePtr) menc.attr, newVal); |
| | xmlFree(newVal); |
| |
|
| | if (ret < 0) |
| | return(-1); |
| | } |
| | } |
| |
|
| | if (found) |
| | return(0); |
| |
|
| | meta = xmlNewDocNode(head->doc, NULL, BAD_CAST "meta", NULL); |
| | if (meta == NULL) |
| | return(-1); |
| |
|
| | if (xmlNewProp(meta, BAD_CAST "charset", encoding) == NULL) { |
| | xmlFreeNode(meta); |
| | return(-1); |
| | } |
| |
|
| | if (head->children == NULL) |
| | xmlAddChild(head, meta); |
| | else |
| | xmlAddPrevSibling(head->children, meta); |
| |
|
| | return(0); |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | int |
| | htmlIsBooleanAttr(const xmlChar *name) |
| | { |
| | const char *str = NULL; |
| |
|
| | if (name == NULL) |
| | return(0); |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | switch (name[0] | 0x20) { |
| | case 'c': |
| | name += 1; |
| | switch (name[0] | 0x20) { |
| | case 'h': str = "ecked"; break; |
| | case 'o': str = "mpact"; break; |
| | } |
| | break; |
| | case 'd': |
| | name += 1; |
| | switch (name[0] | 0x20) { |
| | case 'e': |
| | name += 1; |
| | switch (name[0] | 0x20) { |
| | case 'c': str = "lare"; break; |
| | case 'f': str = "er"; break; |
| | } |
| | break; |
| | case 'i': str = "sabled"; break; |
| | } |
| | break; |
| | case 'i': |
| | str = "smap"; |
| | break; |
| | case 'm': |
| | str = "ultiple"; |
| | break; |
| | case 'n': |
| | name += 1; |
| | if ((name[0] | 0x20) != 'o') |
| | break; |
| | name += 1; |
| | switch (name[0] | 0x20) { |
| | case 'h': str = "ref"; break; |
| | case 'r': str = "esize"; break; |
| | case 's': str = "hade"; break; |
| | case 'w': str = "rap"; break; |
| | } |
| | break; |
| | case 'r': |
| | str = "eadonly"; |
| | break; |
| | case 's': |
| | str = "elected"; |
| | break; |
| | } |
| |
|
| | if (str == NULL) |
| | return(0); |
| |
|
| | return(xmlStrcasecmp(name + 1, BAD_CAST str) == 0); |
| | } |
| |
|
| | #ifdef LIBXML_OUTPUT_ENABLED |
| | |
| | |
| | |
| | |
| | |
| |
|
| | static xmlParserErrors |
| | htmlFindOutputEncoder(const char *encoding, xmlCharEncodingHandler **out) { |
| | |
| | |
| | |
| | if (encoding == NULL) |
| | encoding = "HTML"; |
| |
|
| | return(xmlOpenCharEncodingHandler(encoding, 1, out)); |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | static size_t |
| | htmlBufNodeDumpFormat(xmlBufPtr buf, xmlDocPtr doc ATTRIBUTE_UNUSED, |
| | xmlNodePtr cur, int format) { |
| | size_t use; |
| | size_t ret; |
| | xmlOutputBufferPtr outbuf; |
| |
|
| | if (cur == NULL) { |
| | return ((size_t) -1); |
| | } |
| | if (buf == NULL) { |
| | return ((size_t) -1); |
| | } |
| | outbuf = (xmlOutputBufferPtr) xmlMalloc(sizeof(xmlOutputBuffer)); |
| | if (outbuf == NULL) |
| | return ((size_t) -1); |
| | memset(outbuf, 0, sizeof(xmlOutputBuffer)); |
| | outbuf->buffer = buf; |
| | outbuf->encoder = NULL; |
| | outbuf->writecallback = NULL; |
| | outbuf->closecallback = NULL; |
| | outbuf->context = NULL; |
| | outbuf->written = 0; |
| |
|
| | use = xmlBufUse(buf); |
| | htmlNodeDumpInternal(outbuf, cur, NULL, format); |
| | if (outbuf->error) |
| | ret = (size_t) -1; |
| | else |
| | ret = xmlBufUse(buf) - use; |
| | xmlFree(outbuf); |
| | return (ret); |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | int |
| | htmlNodeDump(xmlBuffer *buf, xmlDoc *doc, xmlNode *cur) { |
| | xmlBufPtr buffer; |
| | size_t ret1; |
| | int ret2; |
| |
|
| | if ((buf == NULL) || (cur == NULL)) |
| | return(-1); |
| |
|
| | xmlInitParser(); |
| | buffer = xmlBufFromBuffer(buf); |
| | if (buffer == NULL) |
| | return(-1); |
| |
|
| | ret1 = htmlBufNodeDumpFormat(buffer, doc, cur, 1); |
| |
|
| | ret2 = xmlBufBackToBuffer(buffer, buf); |
| |
|
| | if ((ret1 == (size_t) -1) || (ret2 < 0)) |
| | return(-1); |
| | return(ret1 > INT_MAX ? INT_MAX : ret1); |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | int |
| | htmlNodeDumpFileFormat(FILE *out, xmlDoc *doc ATTRIBUTE_UNUSED, |
| | xmlNode *cur, const char *encoding, int format) { |
| | xmlOutputBufferPtr buf; |
| | xmlCharEncodingHandlerPtr handler; |
| | int ret; |
| |
|
| | xmlInitParser(); |
| |
|
| | |
| | |
| | |
| | if (htmlFindOutputEncoder(encoding, &handler) != XML_ERR_OK) |
| | return(-1); |
| | buf = xmlOutputBufferCreateFile(out, handler); |
| | if (buf == NULL) { |
| | xmlCharEncCloseFunc(handler); |
| | return(-1); |
| | } |
| |
|
| | htmlNodeDumpInternal(buf, cur, NULL, format); |
| |
|
| | ret = xmlOutputBufferClose(buf); |
| | return(ret); |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | void |
| | htmlNodeDumpFile(FILE *out, xmlDoc *doc, xmlNode *cur) { |
| | htmlNodeDumpFileFormat(out, doc, cur, NULL, 1); |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | void |
| | htmlDocDumpMemoryFormat(xmlDoc *cur, xmlChar**mem, int *size, int format) { |
| | xmlOutputBufferPtr buf; |
| | xmlCharEncodingHandlerPtr handler = NULL; |
| |
|
| | xmlInitParser(); |
| |
|
| | if ((mem == NULL) || (size == NULL)) |
| | return; |
| | *mem = NULL; |
| | *size = 0; |
| | if (cur == NULL) |
| | return; |
| |
|
| | if (htmlFindOutputEncoder((char *) cur->encoding, &handler) != XML_ERR_OK) |
| | return; |
| | buf = xmlAllocOutputBuffer(handler); |
| | if (buf == NULL) { |
| | xmlCharEncCloseFunc(handler); |
| | return; |
| | } |
| |
|
| | htmlDocContentDumpFormatOutput(buf, cur, NULL, format); |
| |
|
| | xmlOutputBufferFlush(buf); |
| |
|
| | if (!buf->error) { |
| | if (buf->conv != NULL) { |
| | *size = xmlBufUse(buf->conv); |
| | *mem = xmlStrndup(xmlBufContent(buf->conv), *size); |
| | } else { |
| | *size = xmlBufUse(buf->buffer); |
| | *mem = xmlStrndup(xmlBufContent(buf->buffer), *size); |
| | } |
| | } |
| |
|
| | xmlOutputBufferClose(buf); |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | void |
| | htmlDocDumpMemory(xmlDoc *cur, xmlChar**mem, int *size) { |
| | htmlDocDumpMemoryFormat(cur, mem, size, 1); |
| | } |
| |
|
| |
|
| | |
| | |
| | |
| | |
| | |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | static void |
| | htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, |
| | const char *encoding ATTRIBUTE_UNUSED) { |
| | xmlDtdPtr cur = doc->intSubset; |
| |
|
| | if (cur == NULL) |
| | return; |
| | xmlOutputBufferWrite(buf, 10, "<!DOCTYPE "); |
| | xmlOutputBufferWriteString(buf, (const char *)cur->name); |
| | if (cur->ExternalID != NULL) { |
| | xmlOutputBufferWrite(buf, 8, " PUBLIC "); |
| | xmlOutputBufferWriteQuotedString(buf, cur->ExternalID); |
| | if (cur->SystemID != NULL) { |
| | xmlOutputBufferWrite(buf, 1, " "); |
| | xmlOutputBufferWriteQuotedString(buf, cur->SystemID); |
| | } |
| | } else if (cur->SystemID != NULL && |
| | xmlStrcmp(cur->SystemID, BAD_CAST "about:legacy-compat")) { |
| | xmlOutputBufferWrite(buf, 8, " SYSTEM "); |
| | xmlOutputBufferWriteQuotedString(buf, cur->SystemID); |
| | } |
| | xmlOutputBufferWrite(buf, 2, ">\n"); |
| | } |
| |
|
| | static void |
| | htmlSerializeUri(xmlOutputBufferPtr buf, const xmlChar *content) { |
| | const xmlChar *tmp = content; |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | |
| | while (IS_WS_HTML(*tmp)) tmp++; |
| | if (tmp > content) { |
| | xmlOutputBufferWrite(buf, tmp - content, (char *) content); |
| | content = tmp; |
| | } |
| |
|
| | while (1) { |
| | char escbuf[3]; |
| | const char *repl; |
| | int replSize; |
| | int c = *tmp; |
| |
|
| | while ((c > 0x20) && (c < 0x7F) && (c != '"') && (c != '&')) { |
| | tmp += 1; |
| | c = *tmp; |
| | } |
| |
|
| | if (tmp > content) |
| | xmlOutputBufferWrite(buf, tmp - content, (char *) content); |
| |
|
| | if ((c <= 0x20) || (c >= 0x7F)) { |
| | static const char hex[16] = { |
| | '0', '1', '2', '3', '4', '5', '6', '7', |
| | '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' |
| | }; |
| |
|
| | if (c == 0) |
| | break; |
| |
|
| | escbuf[0] = '%'; |
| | escbuf[1] = hex[(c >> 4) & 0x0F]; |
| | escbuf[2] = hex[c & 0x0F]; |
| | repl = escbuf; |
| | replSize = 3; |
| | } else if (c == '"') { |
| | repl = """; |
| | replSize = 6; |
| | } else { |
| | repl = "&"; |
| | replSize = 5; |
| | } |
| |
|
| | xmlOutputBufferWrite(buf, replSize, repl); |
| | tmp += 1; |
| | content = tmp; |
| | } |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | static void |
| | htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlAttrPtr cur) { |
| | xmlOutputBufferWrite(buf, 1, " "); |
| |
|
| | if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) { |
| | xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix); |
| | xmlOutputBufferWrite(buf, 1, ":"); |
| | } |
| | xmlOutputBufferWriteString(buf, (const char *)cur->name); |
| |
|
| | |
| | |
| | |
| | |
| | |
| | if ((cur->children != NULL) && (!htmlIsBooleanAttr(cur->name))) { |
| | xmlNodePtr child; |
| | int isUri; |
| |
|
| | xmlOutputBufferWrite(buf, 2, "=\""); |
| |
|
| | |
| | |
| | |
| | |
| | isUri = (cur->ns == NULL) && (cur->parent != NULL) && |
| | (cur->parent->ns == NULL) && |
| | ((!xmlStrcasecmp(cur->name, BAD_CAST "href")) || |
| | (!xmlStrcasecmp(cur->name, BAD_CAST "action")) || |
| | (!xmlStrcasecmp(cur->name, BAD_CAST "src")) || |
| | ((!xmlStrcasecmp(cur->name, BAD_CAST "name")) && |
| | (!xmlStrcasecmp(cur->parent->name, BAD_CAST "a")))); |
| |
|
| | for (child = cur->children; child != NULL; child = child->next) { |
| | if (child->type == XML_TEXT_NODE) { |
| | const xmlChar *content = child->content; |
| |
|
| | if (content == NULL) |
| | continue; |
| |
|
| | if (isUri) { |
| | htmlSerializeUri(buf, content); |
| | } else { |
| | xmlSerializeText(buf, content, SIZE_MAX, |
| | XML_ESCAPE_HTML | XML_ESCAPE_ATTR); |
| | } |
| | } else if (child->type == XML_ENTITY_REF_NODE) { |
| | |
| | xmlOutputBufferWrite(buf, 1, "&"); |
| | xmlOutputBufferWriteString(buf, (char *) child->name); |
| | xmlOutputBufferWrite(buf, 1, ";"); |
| | } |
| | } |
| |
|
| | xmlOutputBufferWrite(buf, 1, "\""); |
| | } |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | void |
| | htmlNodeDumpInternal(xmlOutputBuffer *buf, xmlNode *cur, |
| | const char *encoding, int format) { |
| | xmlNodePtr root, parent, metaHead = NULL; |
| | xmlAttrPtr attr; |
| | const htmlElemDesc * info; |
| | int isRaw = 0; |
| |
|
| | xmlInitParser(); |
| |
|
| | if ((cur == NULL) || (buf == NULL)) { |
| | return; |
| | } |
| |
|
| | root = cur; |
| | parent = cur->parent; |
| | while (1) { |
| | switch (cur->type) { |
| | case XML_HTML_DOCUMENT_NODE: |
| | case XML_DOCUMENT_NODE: |
| | if (((xmlDocPtr) cur)->intSubset != NULL) { |
| | htmlDtdDumpOutput(buf, (xmlDocPtr) cur, NULL); |
| | } |
| | if (cur->children != NULL) { |
| | |
| | if (cur->parent == parent) { |
| | parent = cur; |
| | cur = cur->children; |
| | continue; |
| | } |
| | } else { |
| | xmlOutputBufferWrite(buf, 1, "\n"); |
| | } |
| | break; |
| |
|
| | case XML_ELEMENT_NODE: { |
| | htmlMetaEncoding menc; |
| | int isMeta = 0; |
| | int addMeta = 0; |
| |
|
| | |
| | |
| | |
| | |
| | |
| | if ((cur->parent != parent) && (cur->children != NULL)) { |
| | htmlNodeDumpInternal(buf, cur, encoding, format); |
| | break; |
| | } |
| |
|
| | |
| | |
| | |
| | if (cur->ns == NULL) |
| | info = htmlTagLookup(cur->name); |
| | else |
| | info = NULL; |
| |
|
| | if (encoding != NULL) { |
| | isMeta = htmlParseMetaEncoding(cur, &menc); |
| |
|
| | |
| | |
| | |
| | if ((xmlStrcasecmp(BAD_CAST encoding, |
| | BAD_CAST "HTML") != 0) && |
| | (xmlStrcasecmp(cur->name, BAD_CAST "head") == 0) && |
| | (parent != NULL) && |
| | (xmlStrcasecmp(parent->name, BAD_CAST "html") == 0) && |
| | (parent->parent != NULL) && |
| | (parent->parent->parent == NULL) && |
| | (metaHead == NULL)) { |
| | xmlNodePtr n; |
| |
|
| | metaHead = cur; |
| | addMeta = 1; |
| |
|
| | for (n = cur->children; n != NULL; n = n->next) { |
| | int unused; |
| |
|
| | if (htmlFindMetaEncodingAttr(n, &unused) != NULL) { |
| | metaHead = NULL; |
| | addMeta = 0; |
| | break; |
| | } |
| | } |
| | } |
| | } |
| |
|
| | xmlOutputBufferWrite(buf, 1, "<"); |
| | if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) { |
| | xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix); |
| | xmlOutputBufferWrite(buf, 1, ":"); |
| | } |
| | xmlOutputBufferWriteString(buf, (const char *)cur->name); |
| | if (cur->nsDef) |
| | xmlNsListDumpOutput(buf, cur->nsDef); |
| | attr = cur->properties; |
| | while (attr != NULL) { |
| | if ((!isMeta) || (attr != menc.attr)) { |
| | htmlAttrDumpOutput(buf, attr); |
| | } else { |
| | xmlOutputBufferWrite(buf, 1, " "); |
| | xmlOutputBufferWriteString(buf, (char *) attr->name); |
| |
|
| | xmlOutputBufferWrite(buf, 2, "=\""); |
| | xmlSerializeText(buf, menc.attrValue, menc.off.start, |
| | XML_ESCAPE_HTML | XML_ESCAPE_ATTR); |
| | xmlSerializeText(buf, BAD_CAST encoding, SIZE_MAX, |
| | XML_ESCAPE_HTML | XML_ESCAPE_ATTR); |
| | xmlSerializeText(buf, menc.attrValue + menc.off.end, |
| | menc.off.size - menc.off.end, |
| | XML_ESCAPE_HTML | XML_ESCAPE_ATTR); |
| | xmlOutputBufferWrite(buf, 1, "\""); |
| | } |
| | attr = attr->next; |
| | } |
| |
|
| | if ((info != NULL) && (info->empty)) { |
| | xmlOutputBufferWrite(buf, 1, ">"); |
| | } else if (cur->children == NULL) { |
| | if (addMeta) { |
| | xmlOutputBufferWrite(buf, 16, "><meta charset=\""); |
| | xmlSerializeText(buf, BAD_CAST encoding, SIZE_MAX, |
| | XML_ESCAPE_HTML | XML_ESCAPE_ATTR); |
| | xmlOutputBufferWrite(buf, 4, "\"></"); |
| | } else { |
| | xmlOutputBufferWrite(buf, 3, "></"); |
| | } |
| | if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) { |
| | xmlOutputBufferWriteString(buf, |
| | (const char *)cur->ns->prefix); |
| | xmlOutputBufferWrite(buf, 1, ":"); |
| | } |
| | xmlOutputBufferWriteString(buf, (const char *)cur->name); |
| | xmlOutputBufferWrite(buf, 1, ">"); |
| | } else { |
| | xmlOutputBufferWrite(buf, 1, ">"); |
| | if ((format) && |
| | ((addMeta) || |
| | ((info != NULL) && (!info->isinline) && |
| | (cur->children->type != HTML_TEXT_NODE) && |
| | (cur->children->type != HTML_ENTITY_REF_NODE) && |
| | (cur->children != cur->last) && |
| | (cur->name != NULL) && |
| | (cur->name[0] != 'p')))) |
| | xmlOutputBufferWrite(buf, 1, "\n"); |
| | if (addMeta) { |
| | xmlOutputBufferWrite(buf, 15, "<meta charset=\""); |
| | xmlSerializeText(buf, BAD_CAST encoding, SIZE_MAX, |
| | XML_ESCAPE_HTML | XML_ESCAPE_ATTR); |
| | xmlOutputBufferWrite(buf, 2, "\">"); |
| | if ((format) && |
| | (cur->children->type != HTML_TEXT_NODE) && |
| | (cur->children->type != HTML_ENTITY_REF_NODE)) |
| | xmlOutputBufferWrite(buf, 1, "\n"); |
| | } |
| |
|
| | if ((info != NULL) && (info->dataMode >= DATA_RAWTEXT)) |
| | isRaw = 1; |
| |
|
| | parent = cur; |
| | cur = cur->children; |
| | continue; |
| | } |
| |
|
| | if ((format) && (cur->next != NULL) && |
| | (info != NULL) && (!info->isinline)) { |
| | if ((cur->next->type != HTML_TEXT_NODE) && |
| | (cur->next->type != HTML_ENTITY_REF_NODE) && |
| | (parent != NULL) && |
| | (parent->name != NULL) && |
| | (parent->name[0] != 'p')) |
| | xmlOutputBufferWrite(buf, 1, "\n"); |
| | } |
| |
|
| | break; |
| | } |
| |
|
| | case XML_ATTRIBUTE_NODE: |
| | htmlAttrDumpOutput(buf, (xmlAttrPtr) cur); |
| | break; |
| |
|
| | case HTML_TEXT_NODE: |
| | if (cur->content == NULL) |
| | break; |
| | if ((cur->name == (const xmlChar *)xmlStringTextNoenc) || |
| | (isRaw)) { |
| | xmlOutputBufferWriteString(buf, (const char *)cur->content); |
| | } else { |
| | xmlSerializeText(buf, cur->content, SIZE_MAX, XML_ESCAPE_HTML); |
| | } |
| | break; |
| |
|
| | case HTML_COMMENT_NODE: |
| | if (cur->content != NULL) { |
| | xmlOutputBufferWrite(buf, 4, "<!--"); |
| | xmlOutputBufferWriteString(buf, (const char *)cur->content); |
| | xmlOutputBufferWrite(buf, 3, "-->"); |
| | } |
| | break; |
| |
|
| | case HTML_PI_NODE: |
| | if (cur->name != NULL) { |
| | xmlOutputBufferWrite(buf, 2, "<?"); |
| | xmlOutputBufferWriteString(buf, (const char *)cur->name); |
| | if (cur->content != NULL) { |
| | xmlOutputBufferWrite(buf, 1, " "); |
| | xmlOutputBufferWriteString(buf, |
| | (const char *)cur->content); |
| | } |
| | xmlOutputBufferWrite(buf, 1, ">"); |
| | } |
| | break; |
| |
|
| | case HTML_ENTITY_REF_NODE: |
| | xmlOutputBufferWrite(buf, 1, "&"); |
| | xmlOutputBufferWriteString(buf, (const char *)cur->name); |
| | xmlOutputBufferWrite(buf, 1, ";"); |
| | break; |
| |
|
| | case HTML_PRESERVE_NODE: |
| | if (cur->content != NULL) { |
| | xmlOutputBufferWriteString(buf, (const char *)cur->content); |
| | } |
| | break; |
| |
|
| | default: |
| | break; |
| | } |
| |
|
| | while (1) { |
| | if (cur == root) |
| | return; |
| | if (cur->next != NULL) { |
| | cur = cur->next; |
| | break; |
| | } |
| |
|
| | isRaw = 0; |
| |
|
| | cur = parent; |
| | |
| | parent = cur->parent; |
| |
|
| | if ((cur->type == XML_HTML_DOCUMENT_NODE) || |
| | (cur->type == XML_DOCUMENT_NODE)) { |
| | xmlOutputBufferWrite(buf, 1, "\n"); |
| | } else { |
| | if ((format) && (cur->ns == NULL)) |
| | info = htmlTagLookup(cur->name); |
| | else |
| | info = NULL; |
| |
|
| | if ((format) && (info != NULL) && (!info->isinline) && |
| | (cur->last->type != HTML_TEXT_NODE) && |
| | (cur->last->type != HTML_ENTITY_REF_NODE) && |
| | ((cur->children != cur->last) || (cur == metaHead)) && |
| | (cur->name != NULL) && |
| | (cur->name[0] != 'p')) |
| | xmlOutputBufferWrite(buf, 1, "\n"); |
| |
|
| | xmlOutputBufferWrite(buf, 2, "</"); |
| | if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) { |
| | xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix); |
| | xmlOutputBufferWrite(buf, 1, ":"); |
| | } |
| | xmlOutputBufferWriteString(buf, (const char *)cur->name); |
| | xmlOutputBufferWrite(buf, 1, ">"); |
| |
|
| | if ((format) && (info != NULL) && (!info->isinline) && |
| | (cur->next != NULL)) { |
| | if ((cur->next->type != HTML_TEXT_NODE) && |
| | (cur->next->type != HTML_ENTITY_REF_NODE) && |
| | (parent != NULL) && |
| | (parent->name != NULL) && |
| | (parent->name[0] != 'p')) |
| | xmlOutputBufferWrite(buf, 1, "\n"); |
| | } |
| |
|
| | if (cur == metaHead) |
| | metaHead = NULL; |
| | } |
| | } |
| | } |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | void |
| | htmlNodeDumpFormatOutput(xmlOutputBuffer *buf, |
| | xmlDoc *doc ATTRIBUTE_UNUSED, xmlNode *cur, |
| | const char *encoding ATTRIBUTE_UNUSED, int format) { |
| | htmlNodeDumpInternal(buf, cur, NULL, format); |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | void |
| | htmlNodeDumpOutput(xmlOutputBuffer *buf, xmlDoc *doc ATTRIBUTE_UNUSED, |
| | xmlNode *cur, const char *encoding ATTRIBUTE_UNUSED) { |
| | htmlNodeDumpInternal(buf, cur, NULL, 1); |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | void |
| | htmlDocContentDumpFormatOutput(xmlOutputBuffer *buf, xmlDoc *cur, |
| | const char *encoding ATTRIBUTE_UNUSED, |
| | int format) { |
| | htmlNodeDumpInternal(buf, (xmlNodePtr) cur, NULL, format); |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | void |
| | htmlDocContentDumpOutput(xmlOutputBuffer *buf, xmlDoc *cur, |
| | const char *encoding ATTRIBUTE_UNUSED) { |
| | htmlNodeDumpInternal(buf, (xmlNodePtr) cur, NULL, 1); |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | int |
| | htmlDocDump(FILE *f, xmlDoc *cur) { |
| | xmlOutputBufferPtr buf; |
| | xmlCharEncodingHandlerPtr handler = NULL; |
| | int ret; |
| |
|
| | xmlInitParser(); |
| |
|
| | if ((cur == NULL) || (f == NULL)) { |
| | return(-1); |
| | } |
| |
|
| | if (htmlFindOutputEncoder((char *) cur->encoding, &handler) != XML_ERR_OK) |
| | return(-1); |
| | buf = xmlOutputBufferCreateFile(f, handler); |
| | if (buf == NULL) { |
| | xmlCharEncCloseFunc(handler); |
| | return(-1); |
| | } |
| | htmlDocContentDumpOutput(buf, cur, NULL); |
| |
|
| | ret = xmlOutputBufferClose(buf); |
| | return(ret); |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | int |
| | htmlSaveFile(const char *filename, xmlDoc *cur) { |
| | return(htmlSaveFileFormat(filename, cur, NULL, 1)); |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | int |
| | htmlSaveFileFormat(const char *filename, xmlDoc *cur, |
| | const char *encoding, int format) { |
| | xmlOutputBufferPtr buf; |
| | xmlCharEncodingHandlerPtr handler = NULL; |
| | int ret; |
| |
|
| | if ((cur == NULL) || (filename == NULL)) |
| | return(-1); |
| |
|
| | xmlInitParser(); |
| |
|
| | if (htmlFindOutputEncoder(encoding, &handler) != XML_ERR_OK) |
| | return(-1); |
| |
|
| | |
| | |
| | |
| | buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression); |
| | if (buf == NULL) { |
| | xmlCharEncCloseFunc(handler); |
| | return(0); |
| | } |
| |
|
| | htmlDocContentDumpFormatOutput(buf, cur, encoding, format); |
| |
|
| | ret = xmlOutputBufferClose(buf); |
| | return(ret); |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | int |
| | htmlSaveFileEnc(const char *filename, xmlDoc *cur, const char *encoding) { |
| | return(htmlSaveFileFormat(filename, cur, encoding, 1)); |
| | } |
| |
|
| | #endif |
| |
|
| | #endif |
| |
|