/*
Copyright (C) 2003 Simon Mourier
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. The name of the author may not be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
using System;
using System.Collections;
using System.IO;
using System.Xml;
using System.Xml.XPath;
namespace HtmlAgilityPack
{
///
/// Flags that describe the behavior of an Element node.
///
public enum HtmlElementFlag
{
///
/// The node is a CDATA node.
///
CData = 1,
///
/// The node is empty. META or IMG are example of such nodes.
///
Empty = 2,
///
/// The node will automatically be closed during parsing.
///
Closed = 4,
///
/// The node can overlap.
///
CanOverlap = 8
}
///
/// Represents the type of a node.
///
public enum HtmlNodeType
{
///
/// The root of a document.
///
Document,
///
/// An HTML element.
///
Element,
///
/// An HTML comment.
///
Comment,
///
/// A text node is always the child of an element or a document node.
///
Text,
}
///
/// Represents an HTML node.
///
public class HtmlNode: IXPathNavigable
{
///
/// Gets the name of a comment node. It is actually defined as '#comment'.
///
public static readonly string HtmlNodeTypeNameComment = "#comment";
///
/// Gets the name of the document node. It is actually defined as '#document'.
///
public static readonly string HtmlNodeTypeNameDocument = "#document";
///
/// Gets the name of a text node. It is actually defined as '#text'.
///
public static readonly string HtmlNodeTypeNameText = "#text";
///
/// Gets a collection of flags that define specific behaviors for specific element nodes.
/// The table contains a DictionaryEntry list with the lowercase tag name as the Key, and a combination of HtmlElementFlags as the Value.
///
public static Hashtable ElementsFlags;
internal HtmlNodeType _nodetype;
internal HtmlNode _nextnode;
internal HtmlNode _prevnode;
internal HtmlNode _parentnode;
internal HtmlDocument _ownerdocument;
internal HtmlNodeCollection _childnodes;
internal HtmlAttributeCollection _attributes;
internal int _line = 0;
internal int _lineposition = 0;
internal int _streamposition = 0;
internal int _innerstartindex = 0;
internal int _innerlength = 0;
internal int _outerstartindex = 0;
internal int _outerlength = 0;
internal int _namestartindex = 0;
internal int _namelength = 0;
internal bool _starttag = false;
internal string _name;
internal HtmlNode _prevwithsamename = null;
internal HtmlNode _endnode;
internal bool _innerchanged = false;
internal bool _outerchanged = false;
internal string _innerhtml;
internal string _outerhtml;
static HtmlNode()
{
// tags whose content may be anything
ElementsFlags = new Hashtable();
ElementsFlags.Add("script", HtmlElementFlag.CData);
ElementsFlags.Add("style", HtmlElementFlag.CData);
ElementsFlags.Add("noxhtml", HtmlElementFlag.CData);
// tags that can not contain other tags
ElementsFlags.Add("base", HtmlElementFlag.Empty);
ElementsFlags.Add("link", HtmlElementFlag.Empty);
ElementsFlags.Add("meta", HtmlElementFlag.Empty);
ElementsFlags.Add("isindex", HtmlElementFlag.Empty);
ElementsFlags.Add("hr", HtmlElementFlag.Empty);
ElementsFlags.Add("col", HtmlElementFlag.Empty);
ElementsFlags.Add("img", HtmlElementFlag.Empty);
ElementsFlags.Add("param", HtmlElementFlag.Empty);
ElementsFlags.Add("embed", HtmlElementFlag.Empty);
ElementsFlags.Add("frame", HtmlElementFlag.Empty);
ElementsFlags.Add("wbr", HtmlElementFlag.Empty);
ElementsFlags.Add("bgsound", HtmlElementFlag.Empty);
ElementsFlags.Add("spacer", HtmlElementFlag.Empty);
ElementsFlags.Add("keygen", HtmlElementFlag.Empty);
ElementsFlags.Add("area", HtmlElementFlag.Empty);
ElementsFlags.Add("input", HtmlElementFlag.Empty);
ElementsFlags.Add("basefont", HtmlElementFlag.Empty);
//ElementsFlags.Add("form", HtmlElementFlag.CanOverlap | HtmlElementFlag.Empty);
ElementsFlags.Add("form", HtmlElementFlag.CanOverlap);
// they sometimes contain, and sometimes they don 't...
ElementsFlags.Add("option", HtmlElementFlag.Empty);
// tag whose closing tag is equivalent to open tag:
// bla
bla will be transformed into bla
bla
// bla
bla will be transformed into
bla
bla and not
bla>
bla
or bla
bla
//
see above
ElementsFlags.Add("br", HtmlElementFlag.Empty | HtmlElementFlag.Closed);
ElementsFlags.Add("p", HtmlElementFlag.Empty | HtmlElementFlag.Closed);
}
///
/// Determines if an element node is closed.
///
/// The name of the element node to check. May not be null.
/// true if the name is the name of a closed element node, false otherwise.
public static bool IsClosedElement(string name)
{
if (name == null)
{
throw new ArgumentNullException("name");
}
object flag = ElementsFlags[name.ToLower()];
if (flag == null)
{
return false;
}
return (((HtmlElementFlag)flag)&HtmlElementFlag.Closed) != 0;
}
///
/// Determines if an element node can be kept overlapped.
///
/// The name of the element node to check. May not be null.
/// true if the name is the name of an element node that can be kept overlapped, false otherwise.
public static bool CanOverlapElement(string name)
{
if (name == null)
{
throw new ArgumentNullException("name");
}
object flag = ElementsFlags[name.ToLower()];
if (flag == null)
{
return false;
}
return (((HtmlElementFlag)flag)&HtmlElementFlag.CanOverlap) != 0;
}
///
/// Determines if a text corresponds to the closing tag of an node that can be kept overlapped.
///
/// The text to check. May not be null.
/// true or false.
public static bool IsOverlappedClosingElement(string text)
{
if (text == null)
{
throw new ArgumentNullException("text");
}
// min is : 4
if (text.Length <= 4)
return false;
if ((text[0] != '<') ||
(text[text.Length - 1] != '>') ||
(text[1] != '/'))
return false;
string name = text.Substring(2, text.Length - 3);
return CanOverlapElement(name);
}
///
/// Determines if an element node is a CDATA element node.
///
/// The name of the element node to check. May not be null.
/// true if the name is the name of a CDATA element node, false otherwise.
public static bool IsCDataElement(string name)
{
if (name == null)
{
throw new ArgumentNullException("name");
}
object flag = ElementsFlags[name.ToLower()];
if (flag == null)
{
return false;
}
return (((HtmlElementFlag)flag)&HtmlElementFlag.CData) != 0;
}
///
/// Determines if an element node is defined as empty.
///
/// The name of the element node to check. May not be null.
/// true if the name is the name of an empty element node, false otherwise.
public static bool IsEmptyElement(string name)
{
if (name == null)
{
throw new ArgumentNullException("name");
}
if (name.Length == 0)
{
return true;
}
//
/// Creates an HTML node from a string representing literal HTML.
///
/// The HTML text.
/// The newly created node instance.
public static HtmlNode CreateNode(string html)
{
// REVIEW: this is *not* optimum...
HtmlDocument doc = new HtmlDocument();
doc.LoadHtml(html);
return doc.DocumentNode.FirstChild;
}
///
/// Creates a duplicate of the node and the subtree under it.
///
/// The node to duplicate. May not be null.
public void CopyFrom(HtmlNode node)
{
CopyFrom(node, true);
}
///
/// Creates a duplicate of the node.
///
/// The node to duplicate. May not be null.
/// true to recursively clone the subtree under the specified node, false to clone only the node itself.
public void CopyFrom(HtmlNode node, bool deep)
{
if (node == null)
{
throw new ArgumentNullException("node");
}
Attributes.RemoveAll();
if (node.HasAttributes)
{
foreach(HtmlAttribute att in node.Attributes)
{
SetAttributeValue(att.Name, att.Value);
}
}
if (!deep)
{
RemoveAllChildren();
if (node.HasChildNodes)
{
foreach(HtmlNode child in node.ChildNodes)
{
AppendChild(child.CloneNode(true));
}
}
}
}
internal HtmlNode(HtmlNodeType type, HtmlDocument ownerdocument, int index)
{
_nodetype = type;
_ownerdocument = ownerdocument;
_outerstartindex = index;
switch(type)
{
case HtmlNodeType.Comment:
_name = HtmlNodeTypeNameComment;
_endnode = this;
break;
case HtmlNodeType.Document:
_name = HtmlNodeTypeNameDocument;
_endnode = this;
break;
case HtmlNodeType.Text:
_name = HtmlNodeTypeNameText;
_endnode = this;
break;
}
if (_ownerdocument._openednodes != null)
{
if (!Closed)
{
// we use the index as the key
// -1 means the node comes from public
if (-1 != index)
{
_ownerdocument._openednodes.Add(index, this);
}
}
}
if ((-1 == index) && (type != HtmlNodeType.Comment) && (type != HtmlNodeType.Text))
{
// innerhtml and outerhtml must be calculated
_outerchanged = true;
_innerchanged = true;
}
}
internal void CloseNode(HtmlNode endnode)
{
if (!_ownerdocument.OptionAutoCloseOnEnd)
{
// close all children
if (_childnodes != null)
{
foreach(HtmlNode child in _childnodes)
{
if (child.Closed)
continue;
// create a fake closer node
HtmlNode close = new HtmlNode(NodeType, _ownerdocument, -1);
close._endnode = close;
child.CloseNode(close);
}
}
}
if (!Closed)
{
_endnode = endnode;
if (_ownerdocument._openednodes != null)
{
_ownerdocument._openednodes.Remove(_outerstartindex);
}
HtmlNode self = _ownerdocument._lastnodes[Name] as HtmlNode;
if (self == this)
{
_ownerdocument._lastnodes.Remove(Name);
_ownerdocument.UpdateLastParentNode();
}
if (endnode == this)
return;
// create an inner section
_innerstartindex = _outerstartindex + _outerlength;
_innerlength = endnode._outerstartindex - _innerstartindex;
// update full length
_outerlength = (endnode._outerstartindex + endnode._outerlength) - _outerstartindex;
}
}
internal HtmlNode EndNode
{
get
{
return _endnode;
}
}
internal string GetId()
{
HtmlAttribute att = Attributes["id"];
if (att == null)
{
return null;
}
return att.Value;
}
internal void SetId(string id)
{
HtmlAttribute att = Attributes["id"];
if (att == null)
{
att = _ownerdocument.CreateAttribute("id");
}
att.Value = id;
_ownerdocument.SetIdForNode(this, att.Value);
_outerchanged = true;
}
///
/// Creates a new XPathNavigator object for navigating this HTML node.
///
/// An XPathNavigator object. The XPathNavigator is positioned on the node from which the method was called. It is not positioned on the root of the document.
public XPathNavigator CreateNavigator()
{
return new HtmlNodeNavigator(_ownerdocument, this);
}
///
/// Selects the first XmlNode that matches the XPath expression.
///
/// The XPath expression. May not be null.
/// The first HtmlNode that matches the XPath query or a null reference if no matching node was found.
public HtmlNode SelectSingleNode(string xpath)
{
if (xpath == null)
{
throw new ArgumentNullException("xpath");
}
HtmlNodeNavigator nav = new HtmlNodeNavigator(_ownerdocument, this);
XPathNodeIterator it = nav.Select(xpath);
if (!it.MoveNext())
{
return null;
}
HtmlNodeNavigator node = (HtmlNodeNavigator)it.Current;
return node.CurrentNode;
}
///
/// Selects a list of nodes matching the XPath expression.
///
/// The XPath expression.
/// An HtmlNodeCollection containing a collection of nodes matching the XPath query, or null if no node matched the XPath expression.
public HtmlNodeCollection SelectNodes(string xpath)
{
HtmlNodeCollection list = new HtmlNodeCollection(null);
HtmlNodeNavigator nav = new HtmlNodeNavigator(_ownerdocument, this);
XPathNodeIterator it = nav.Select(xpath);
while (it.MoveNext())
{
HtmlNodeNavigator n = (HtmlNodeNavigator)it.Current;
list.Add(n.CurrentNode);
}
if (list.Count == 0)
{
return null;
}
return list;
}
///
/// Gets or sets the value of the 'id' HTML attribute. The document must have been parsed using the OptionUseIdAttribute set to true.
///
public string Id
{
get
{
if (_ownerdocument._nodesid == null)
{
throw new Exception(HtmlDocument.HtmlExceptionUseIdAttributeFalse);
}
return GetId();
}
set
{
if (_ownerdocument._nodesid == null)
{
throw new Exception(HtmlDocument.HtmlExceptionUseIdAttributeFalse);
}
if (value == null)
{
throw new ArgumentNullException("value");
}
SetId(value);
}
}
public bool StartTag
{
get
{
return _starttag;
}
}
///
/// Gets the line number of this node in the document.
///
public int Line
{
get
{
return _line;
}
}
///
/// Gets the column number of this node in the document.
///
public int LinePosition
{
get
{
return _lineposition;
}
}
///
/// Gets the stream position of this node in the document, relative to the start of the document.
///
public int StreamPosition
{
get
{
return _streamposition;
}
}
///
/// Gets a value indicating if this node has been closed or not.
///
public bool Closed
{
get
{
return (_endnode != null);
}
}
///
/// Gets or sets this node's name.
///
public string Name
{
get
{
if (_name == null)
{
_name = _ownerdocument._text.Substring(_namestartindex, _namelength).ToLower();
}
return _name;
}
set
{
_name = value;
}
}
///
/// Gets or Sets the text between the start and end tags of the object.
///
public virtual string InnerText
{
get
{
if (_nodetype == HtmlNodeType.Text)
{
return ((HtmlTextNode)this).Text;
}
if (_nodetype == HtmlNodeType.Comment)
{
return ((HtmlCommentNode)this).Comment;
}
// note: right now, this method is *slow*, because we recompute everything.
// it could be optimised like innerhtml
if (!HasChildNodes)
{
return string.Empty;
}
string s = null;
foreach(HtmlNode node in ChildNodes)
{
s += node.InnerText;
}
return s;
}
}
///
/// Gets or Sets the HTML between the start and end tags of the object.
///
public virtual string InnerHtml
{
get
{
if (_innerchanged)
{
_innerhtml = WriteContentTo();
_innerchanged = false;
return _innerhtml;
}
if (_innerhtml != null)
{
return _innerhtml;
}
if (_innerstartindex < 0)
{
return string.Empty;
}
return _ownerdocument._text.Substring(_innerstartindex, _innerlength);
}
set
{
HtmlDocument doc = new HtmlDocument();
doc.LoadHtml(value);
RemoveAllChildren();
AppendChildren(doc.DocumentNode.ChildNodes);
}
}
///
/// Gets or Sets the object and its content in HTML.
///
public virtual string OuterHtml
{
get
{
if (_outerchanged)
{
_outerhtml = WriteTo();
_outerchanged = false;
return _outerhtml;
}
if (_outerhtml != null)
{
return _outerhtml;
}
if (_outerstartindex < 0)
{
return string.Empty;
}
return _ownerdocument._text.Substring(_outerstartindex, _outerlength);
}
}
///
/// Creates a duplicate of the node
///
///
public HtmlNode Clone()
{
return CloneNode(true);
}
///
/// Creates a duplicate of the node and changes its name at the same time.
///
/// The new name of the cloned node. May not be null.
/// The cloned node.
public HtmlNode CloneNode(string newName)
{
return CloneNode(newName, true);
}
///
/// Creates a duplicate of the node and changes its name at the same time.
///
/// The new name of the cloned node. May not be null.
/// true to recursively clone the subtree under the specified node; false to clone only the node itself.
/// The cloned node.
public HtmlNode CloneNode(string newName, bool deep)
{
if (newName == null)
{
throw new ArgumentNullException("newName");
}
HtmlNode node = CloneNode(deep);
node._name = newName;
return node;
}
///
/// Creates a duplicate of the node.
///
/// true to recursively clone the subtree under the specified node; false to clone only the node itself.
/// The cloned node.
public HtmlNode CloneNode(bool deep)
{
HtmlNode node = _ownerdocument.CreateNode(_nodetype);
node._name = Name;
switch(_nodetype)
{
case HtmlNodeType.Comment:
((HtmlCommentNode)node).Comment = ((HtmlCommentNode)this).Comment;
return node;
case HtmlNodeType.Text:
((HtmlTextNode)node).Text = ((HtmlTextNode)this).Text;
return node;
}
// attributes
if (HasAttributes)
{
foreach(HtmlAttribute att in _attributes)
{
HtmlAttribute newatt = att.Clone();
node.Attributes.Append(newatt);
}
}
// closing attributes
if (HasClosingAttributes)
{
node._endnode = _endnode.CloneNode(false);
foreach(HtmlAttribute att in _endnode._attributes)
{
HtmlAttribute newatt = att.Clone();
node._endnode._attributes.Append(newatt);
}
}
if (!deep)
{
return node;
}
if (!HasChildNodes)
{
return node;
}
// child nodes
foreach(HtmlNode child in _childnodes)
{
HtmlNode newchild = child.Clone();
node.AppendChild(newchild);
}
return node;
}
///
/// Gets the HTML node immediately following this element.
///
public HtmlNode NextSibling
{
get
{
return _nextnode;
}
}
///
/// Gets the node immediately preceding this node.
///
public HtmlNode PreviousSibling
{
get
{
return _prevnode;
}
}
///
/// Removes all the children and/or attributes of the current node.
///
public void RemoveAll()
{
RemoveAllChildren();
if (HasAttributes)
{
_attributes.Clear();
}
if ((_endnode != null) && (_endnode != this))
{
if (_endnode._attributes != null)
{
_endnode._attributes.Clear();
}
}
_outerchanged = true;
_innerchanged = true;
}
///
/// Removes all the children of the current node.
///
public void RemoveAllChildren()
{
if (!HasChildNodes)
{
return;
}
if (_ownerdocument.OptionUseIdAttribute)
{
// remove nodes from id list
foreach(HtmlNode node in _childnodes)
{
_ownerdocument.SetIdForNode(null, node.GetId());
}
}
_childnodes.Clear();
_outerchanged = true;
_innerchanged = true;
}
///
/// Removes the specified child node.
///
/// The node being removed. May not be null.
/// The node removed.
public HtmlNode RemoveChild(HtmlNode oldChild)
{
if (oldChild == null)
{
throw new ArgumentNullException("oldChild");
}
int index = -1;
if (_childnodes != null)
{
index = _childnodes[oldChild];
}
if (index == -1)
{
throw new ArgumentException(HtmlDocument.HtmlExceptionRefNotChild);
}
_childnodes.Remove(index);
_ownerdocument.SetIdForNode(null, oldChild.GetId());
_outerchanged = true;
_innerchanged = true;
return oldChild;
}
///
/// Removes the specified child node.
///
/// The node being removed. May not be null.
/// true to keep grand children of the node, false otherwise.
/// The node removed.
public HtmlNode RemoveChild(HtmlNode oldChild, bool keepGrandChildren)
{
if (oldChild == null)
{
throw new ArgumentNullException("oldChild");
}
if ((oldChild._childnodes != null) && keepGrandChildren)
{
// get prev sibling
HtmlNode prev = oldChild.PreviousSibling;
// reroute grand children to ourselves
foreach(HtmlNode grandchild in oldChild._childnodes)
{
InsertAfter(grandchild, prev);
}
}
RemoveChild(oldChild);
_outerchanged = true;
_innerchanged = true;
return oldChild;
}
///
/// Replaces the child node oldChild with newChild node.
///
/// The new node to put in the child list.
/// The node being replaced in the list.
/// The node replaced.
public HtmlNode ReplaceChild(HtmlNode newChild, HtmlNode oldChild)
{
if (newChild == null)
{
return RemoveChild(oldChild);
}
if (oldChild == null)
{
return AppendChild(newChild);
}
int index = -1;
if (_childnodes != null)
{
index = _childnodes[oldChild];
}
if (index == -1)
{
throw new ArgumentException(HtmlDocument.HtmlExceptionRefNotChild);
}
_childnodes.Replace(index, newChild);
_ownerdocument.SetIdForNode(null, oldChild.GetId());
_ownerdocument.SetIdForNode(newChild, newChild.GetId());
_outerchanged = true;
_innerchanged = true;
return newChild;
}
///
/// Inserts the specified node immediately before the specified reference node.
///
/// The node to insert. May not be null.
/// The node that is the reference node. The newChild is placed before this node.
/// The node being inserted.
public HtmlNode InsertBefore(HtmlNode newChild, HtmlNode refChild)
{
if (newChild == null)
{
throw new ArgumentNullException("newChild");
}
if (refChild == null)
{
return AppendChild(newChild);
}
if (newChild == refChild)
{
return newChild;
}
int index = -1;
if (_childnodes != null)
{
index = _childnodes[refChild];
}
if (index == -1)
{
throw new ArgumentException(HtmlDocument.HtmlExceptionRefNotChild);
}
_childnodes.Insert(index, newChild);
_ownerdocument.SetIdForNode(newChild, newChild.GetId());
_outerchanged = true;
_innerchanged = true;
return newChild;
}
///
/// Inserts the specified node immediately after the specified reference node.
///
/// The node to insert. May not be null.
/// The node that is the reference node. The newNode is placed after the refNode.
/// The node being inserted.
public HtmlNode InsertAfter(HtmlNode newChild, HtmlNode refChild)
{
if (newChild == null)
{
throw new ArgumentNullException("newChild");
}
if (refChild == null)
{
return PrependChild(newChild);
}
if (newChild == refChild)
{
return newChild;
}
int index = -1;
if (_childnodes != null)
{
index = _childnodes[refChild];
}
if (index == -1)
{
throw new ArgumentException(HtmlDocument.HtmlExceptionRefNotChild);
}
_childnodes.Insert(index + 1, newChild);
_ownerdocument.SetIdForNode(newChild, newChild.GetId());
_outerchanged = true;
_innerchanged = true;
return newChild;
}
///
/// Gets the first child of the node.
///
public HtmlNode FirstChild
{
get
{
if (!HasChildNodes)
{
return null;
}
return _childnodes[0];
}
}
///
/// Gets the last child of the node.
///
public HtmlNode LastChild
{
get
{
if (!HasChildNodes)
{
return null;
}
return _childnodes[_childnodes.Count-1];
}
}
///
/// Gets the type of this node.
///
public HtmlNodeType NodeType
{
get
{
return _nodetype;
}
}
///
/// Gets the parent of this node (for nodes that can have parents).
///
public HtmlNode ParentNode
{
get
{
return _parentnode;
}
}
///
/// Gets the HtmlDocument to which this node belongs.
///
public HtmlDocument OwnerDocument
{
get
{
return _ownerdocument;
}
}
///
/// Gets all the children of the node.
///
public HtmlNodeCollection ChildNodes
{
get
{
if (_childnodes == null)
{
_childnodes = new HtmlNodeCollection(this);
}
return _childnodes;
}
}
///
/// Adds the specified node to the beginning of the list of children of this node.
///
/// The node to add. May not be null.
/// The node added.
public HtmlNode PrependChild(HtmlNode newChild)
{
if (newChild == null)
{
throw new ArgumentNullException("newChild");
}
ChildNodes.Prepend(newChild);
_ownerdocument.SetIdForNode(newChild, newChild.GetId());
_outerchanged = true;
_innerchanged = true;
return newChild;
}
///
/// Adds the specified node list to the beginning of the list of children of this node.
///
/// The node list to add. May not be null.
public void PrependChildren(HtmlNodeCollection newChildren)
{
if (newChildren == null)
{
throw new ArgumentNullException("newChildren");
}
foreach(HtmlNode newChild in newChildren)
{
PrependChild(newChild);
}
}
///
/// Adds the specified node to the end of the list of children of this node.
///
/// The node to add. May not be null.
/// The node added.
public HtmlNode AppendChild(HtmlNode newChild)
{
if (newChild == null)
{
throw new ArgumentNullException("newChild");
}
ChildNodes.Append(newChild);
_ownerdocument.SetIdForNode(newChild, newChild.GetId());
_outerchanged = true;
_innerchanged = true;
return newChild;
}
///
/// Adds the specified node to the end of the list of children of this node.
///
/// The node list to add. May not be null.
public void AppendChildren(HtmlNodeCollection newChildren)
{
if (newChildren == null)
throw new ArgumentNullException("newChildrend");
foreach(HtmlNode newChild in newChildren)
{
AppendChild(newChild);
}
}
///
/// Gets a value indicating whether the current node has any attributes.
///
public bool HasAttributes
{
get
{
if (_attributes == null)
{
return false;
}
if (_attributes.Count <= 0)
{
return false;
}
return true;
}
}
///
/// Gets a value indicating whether the current node has any attributes on the closing tag.
///
public bool HasClosingAttributes
{
get
{
if ((_endnode == null) || (_endnode == this))
{
return false;
}
if (_endnode._attributes == null)
{
return false;
}
if (_endnode._attributes.Count <= 0)
{
return false;
}
return true;
}
}
///
/// Gets a value indicating whether this node has any child nodes.
///
public bool HasChildNodes
{
get
{
if (_childnodes == null)
{
return false;
}
if (_childnodes.Count <= 0)
{
return false;
}
return true;
}
}
///
/// Helper method to get the value of an attribute of this node. If the attribute is not found, the default value will be returned.
///
/// The name of the attribute to get. May not be null.
/// The default value to return if not found.
/// The value of the attribute if found, the default value if not found.
public string GetAttributeValue(string name, string def)
{
if (name == null)
{
throw new ArgumentNullException("name");
}
if (!HasAttributes)
{
return def;
}
HtmlAttribute att = Attributes[name];
if (att == null)
{
return def;
}
return att.Value;
}
///
/// Helper method to get the value of an attribute of this node. If the attribute is not found, the default value will be returned.
///
/// The name of the attribute to get. May not be null.
/// The default value to return if not found.
/// The value of the attribute if found, the default value if not found.
public int GetAttributeValue(string name, int def)
{
if (name == null)
{
throw new ArgumentNullException("name");
}
if (!HasAttributes)
{
return def;
}
HtmlAttribute att = Attributes[name];
if (att == null)
{
return def;
}
try
{
return Convert.ToInt32(att.Value);
}
catch
{
return def;
}
}
///
/// Helper method to get the value of an attribute of this node. If the attribute is not found, the default value will be returned.
///
/// The name of the attribute to get. May not be null.
/// The default value to return if not found.
/// The value of the attribute if found, the default value if not found.
public bool GetAttributeValue(string name, bool def)
{
if (name == null)
{
throw new ArgumentNullException("name");
}
if (!HasAttributes)
{
return def;
}
HtmlAttribute att = Attributes[name];
if (att == null)
{
return def;
}
try
{
return Convert.ToBoolean(att.Value);
}
catch
{
return def;
}
}
///
/// Helper method to set the value of an attribute of this node. If the attribute is not found, it will be created automatically.
///
/// The name of the attribute to set. May not be null.
/// The value for the attribute.
/// The corresponding attribute instance.
public HtmlAttribute SetAttributeValue(string name, string value)
{
if (name == null)
{
throw new ArgumentNullException("name");
}
HtmlAttribute att = Attributes[name];
if (att == null)
{
return Attributes.Append(_ownerdocument.CreateAttribute(name, value));
}
att.Value = value;
return att;
}
///
/// Gets the collection of HTML attributes for this node. May not be null.
///
public HtmlAttributeCollection Attributes
{
get
{
if (!HasAttributes)
{
_attributes = new HtmlAttributeCollection(this);
}
return _attributes;
}
}
///
/// Gets the collection of HTML attributes for the closing tag. May not be null.
///
public HtmlAttributeCollection ClosingAttributes
{
get
{
if (!HasClosingAttributes)
{
return new HtmlAttributeCollection(this);
}
return _endnode.Attributes;
}
}
internal void WriteAttribute(TextWriter outText, HtmlAttribute att)
{
string name;
if (_ownerdocument.OptionOutputAsXml)
{
if (_ownerdocument.OptionOutputUpperCase)
{
name = att.XmlName.ToUpper();
}
else
{
name = att.XmlName;
}
outText.Write(" " + name + "=\"" + HtmlDocument.HtmlEncode(att.XmlValue) + "\"");
}
else
{
if (_ownerdocument.OptionOutputUpperCase)
{
name = att.Name.ToUpper();
}
else
{
name = att.Name;
}
if (att.Name.Length >= 4)
{
if ((att.Name[0] == '<') && (att.Name[1] == '%') &&
(att.Name[att.Name.Length-1] == '>') && (att.Name[att.Name.Length-2] == '%'))
{
outText.Write(" " + name);
return;
}
}
if (_ownerdocument.OptionOutputOptimizeAttributeValues)
{
if (att.Value.IndexOfAny(new Char[]{(char)10, (char)13, (char)9, ' '}) < 0)
{
outText.Write(" " + name + "=" + att.Value);
}
else
{
outText.Write(" " + name + "=\"" + att.Value + "\"");
}
}
else
{
outText.Write(" " + name + "=\"" + att.Value + "\"");
}
}
}
internal static void WriteAttributes(XmlWriter writer, HtmlNode node)
{
if (!node.HasAttributes)
{
return;
}
// we use _hashitems to make sure attributes are written only once
foreach(HtmlAttribute att in node.Attributes._hashitems.Values)
{
writer.WriteAttributeString(att.XmlName, att.Value);
}
}
internal void WriteAttributes(TextWriter outText, bool closing)
{
if (_ownerdocument.OptionOutputAsXml)
{
if (_attributes == null)
{
return;
}
// we use _hashitems to make sure attributes are written only once
foreach(HtmlAttribute att in _attributes._hashitems.Values)
{
WriteAttribute(outText, att);
}
return;
}
if (!closing)
{
if (_attributes != null)
{
foreach(HtmlAttribute att in _attributes)
{
WriteAttribute(outText, att);
}
}
if (_ownerdocument.OptionAddDebuggingAttributes)
{
WriteAttribute(outText, _ownerdocument.CreateAttribute("_closed", Closed.ToString()));
WriteAttribute(outText, _ownerdocument.CreateAttribute("_children", ChildNodes.Count.ToString()));
int i = 0;
foreach(HtmlNode n in ChildNodes)
{
WriteAttribute(outText, _ownerdocument.CreateAttribute("_child_" + i,
n.Name));
i++;
}
}
}
else
{
if (_endnode == null)
{
return;
}
if (_endnode._attributes == null)
{
return;
}
if (_endnode == this)
{
return;
}
foreach(HtmlAttribute att in _endnode._attributes)
{
WriteAttribute(outText, att);
}
if (_ownerdocument.OptionAddDebuggingAttributes)
{
WriteAttribute(outText, _ownerdocument.CreateAttribute("_closed", Closed.ToString()));
WriteAttribute(outText, _ownerdocument.CreateAttribute("_children", ChildNodes.Count.ToString()));
}
}
}
internal static string GetXmlComment(HtmlCommentNode comment)
{
string s = comment.Comment;
return s.Substring(4, s.Length-7).Replace("--", " - -");
}
///
/// Saves the current node to the specified TextWriter.
///
/// The TextWriter to which you want to save.
public void WriteTo(TextWriter outText)
{
string html;
switch(_nodetype)
{
case HtmlNodeType.Comment:
html = ((HtmlCommentNode)this).Comment;
if (_ownerdocument.OptionOutputAsXml)
{
outText.Write("");
}
else
{
outText.Write(html);
}
break;
case HtmlNodeType.Document:
if (_ownerdocument.OptionOutputAsXml)
{
outText.Write("");
// check there is a root element
if (_ownerdocument.DocumentNode.HasChildNodes)
{
int rootnodes = _ownerdocument.DocumentNode._childnodes.Count;
if (rootnodes > 0)
{
HtmlNode xml = _ownerdocument.GetXmlDeclaration();
if (xml != null)
{
rootnodes --;
}
if (rootnodes > 1)
{
if (_ownerdocument.OptionOutputUpperCase)
{
outText.Write("");
WriteContentTo(outText);
outText.Write("");
}
else
{
outText.Write("");
WriteContentTo(outText);
outText.Write("");
}
break;
}
}
}
}
WriteContentTo(outText);
break;
case HtmlNodeType.Text:
html = ((HtmlTextNode)this).Text;
if (_ownerdocument.OptionOutputAsXml)
{
outText.Write(HtmlDocument.HtmlEncode(html));
}
else
{
outText.Write(html);
}
break;
case HtmlNodeType.Element:
string name;
if (_ownerdocument.OptionOutputUpperCase)
{
name = Name.ToUpper();
}
else
{
name = Name;
}
if (_ownerdocument.OptionOutputAsXml)
{
if (name.Length > 0)
{
if (name[0] == '?')
{
// forget this one, it's been done at the document level
break;
}
if (name.Trim().Length == 0)
{
break;
}
name = HtmlAttribute.GetXmlName(name);
}
else
{
break;
}
}
outText.Write("<" + name);
WriteAttributes(outText, false);
if (!HasChildNodes)
{
if (HtmlNode.IsEmptyElement(Name))
{
if ((_ownerdocument.OptionWriteEmptyNodes) || (_ownerdocument.OptionOutputAsXml))
{
outText.Write(" />");
}
else
{
if (Name.Length > 0)
{
if (Name[0] == '?')
{
outText.Write("?");
}
}
outText.Write(">");
}
}
else
{
outText.Write(">" + name + ">");
}
}
else
{
outText.Write(">");
bool cdata = false;
if (_ownerdocument.OptionOutputAsXml)
{
if (HtmlNode.IsCDataElement(Name))
{
// this code and the following tries to output things as nicely as possible for old browsers.
cdata = true;
outText.Write("\r\n////\r\n");
}
else
{
WriteContentTo(outText);
}
outText.Write("" + name);
if (!_ownerdocument.OptionOutputAsXml)
{
WriteAttributes(outText, true);
}
outText.Write(">");
}
break;
}
}
///
/// Saves the current node to the specified XmlWriter.
///
/// The XmlWriter to which you want to save.
public void WriteTo(XmlWriter writer)
{
string html;
switch(_nodetype)
{
case HtmlNodeType.Comment:
writer.WriteComment(GetXmlComment((HtmlCommentNode)this));
break;
case HtmlNodeType.Document:
writer.WriteProcessingInstruction("xml", "version=\"1.0\" encoding=\"" + _ownerdocument.GetOutEncoding().BodyName + "\"");
if (HasChildNodes)
{
foreach(HtmlNode subnode in ChildNodes)
{
subnode.WriteTo(writer);
}
}
break;
case HtmlNodeType.Text:
html = ((HtmlTextNode)this).Text;
writer.WriteString(html);
break;
case HtmlNodeType.Element:
string name;
if (_ownerdocument.OptionOutputUpperCase)
{
name = Name.ToUpper();
}
else
{
name = Name;
}
writer.WriteStartElement(name);
WriteAttributes(writer, this);
if (HasChildNodes)
{
foreach(HtmlNode subnode in ChildNodes)
{
subnode.WriteTo(writer);
}
}
writer.WriteEndElement();
break;
}
}
///
/// Saves all the children of the node to the specified TextWriter.
///
/// The TextWriter to which you want to save.
public void WriteContentTo(TextWriter outText)
{
if (_childnodes == null)
{
return;
}
foreach(HtmlNode node in _childnodes)
{
node.WriteTo(outText);
}
}
///
/// Saves the current node to a string.
///
/// The saved string.
public string WriteTo()
{
StringWriter sw = new StringWriter();
WriteTo(sw);
sw.Flush();
return sw.ToString();
}
///
/// Saves all the children of the node to a string.
///
/// The saved string.
public string WriteContentTo()
{
StringWriter sw = new StringWriter();
WriteContentTo(sw);
sw.Flush();
return sw.ToString();
}
}
///
/// Represents a combined list and collection of HTML nodes.
///
public class HtmlNodeCollection: IEnumerable
{
private ArrayList _items = new ArrayList();
private HtmlNode _parentnode;
internal HtmlNodeCollection(HtmlNode parentnode)
{
_parentnode = parentnode; // may be null
}
///
/// Gets the number of elements actually contained in the list.
///
public int Count
{
get
{
return _items.Count;
}
}
internal void Clear()
{
foreach(HtmlNode node in _items)
{
node._parentnode = null;
node._nextnode = null;
node._prevnode = null;
}
_items.Clear();
}
internal void Remove(int index)
{
HtmlNode next = null;
HtmlNode prev = null;
HtmlNode oldnode = (HtmlNode)_items[index];
if (index > 0)
{
prev = (HtmlNode)_items[index-1];
}
if (index < (_items.Count-1))
{
next = (HtmlNode)_items[index+1];
}
_items.RemoveAt(index);
if (prev != null)
{
if (next == prev)
{
throw new InvalidProgramException("Unexpected error.");
}
prev._nextnode = next;
}
if (next != null)
{
next._prevnode = prev;
}
oldnode._prevnode = null;
oldnode._nextnode = null;
oldnode._parentnode = null;
}
internal void Replace(int index, HtmlNode node)
{
HtmlNode next = null;
HtmlNode prev = null;
HtmlNode oldnode = (HtmlNode)_items[index];
if (index>0)
{
prev = (HtmlNode)_items[index-1];
}
if (index<(_items.Count-1))
{
next = (HtmlNode)_items[index+1];
}
_items[index] = node;
if (prev != null)
{
if (node == prev)
{
throw new InvalidProgramException("Unexpected error.");
}
prev._nextnode = node;
}
if (next!=null)
{
next._prevnode = node;
}
node._prevnode = prev;
if (next == node)
{
throw new InvalidProgramException("Unexpected error.");
}
node._nextnode = next;
node._parentnode = _parentnode;
oldnode._prevnode = null;
oldnode._nextnode = null;
oldnode._parentnode = null;
}
internal void Insert(int index, HtmlNode node)
{
HtmlNode next = null;
HtmlNode prev = null;
if (index>0)
{
prev = (HtmlNode)_items[index-1];
}
if (index<_items.Count)
{
next = (HtmlNode)_items[index];
}
_items.Insert(index, node);
if (prev != null)
{
if (node == prev)
{
throw new InvalidProgramException("Unexpected error.");
}
prev._nextnode = node;
}
if (next != null)
{
next._prevnode = node;
}
node._prevnode = prev;
if (next == node)
{
throw new InvalidProgramException("Unexpected error.");
}
node._nextnode = next;
node._parentnode = _parentnode;
}
internal void Append(HtmlNode node)
{
HtmlNode last = null;
if (_items.Count > 0)
{
last = (HtmlNode)_items[_items.Count-1];
}
_items.Add(node);
node._prevnode = last;
node._nextnode = null;
node._parentnode = _parentnode;
if (last != null)
{
if (last == node)
{
throw new InvalidProgramException("Unexpected error.");
}
last._nextnode = node;
}
}
internal void Prepend(HtmlNode node)
{
HtmlNode first = null;
if (_items.Count > 0)
{
first = (HtmlNode)_items[0];
}
_items.Insert(0, node);
if (node == first)
{
throw new InvalidProgramException("Unexpected error.");
}
node._nextnode = first;
node._prevnode = null;
node._parentnode = _parentnode;
if (first != null)
{
first._prevnode = node;
}
}
internal void Add(HtmlNode node)
{
_items.Add(node);
}
///
/// Gets the node at the specified index.
///
public HtmlNode this[int index]
{
get
{
return _items[index] as HtmlNode;
}
}
internal int GetNodeIndex(HtmlNode node)
{
// TODO: should we rewrite this? what would be the key of a node?
for(int i=0;i<_items.Count;i++)
{
if (node == ((HtmlNode)_items[i]))
{
return i;
}
}
return -1;
}
///
/// Gets a given node from the list.
///
public int this[HtmlNode node]
{
get
{
int index = GetNodeIndex(node);
if (index == -1)
{
throw new ArgumentOutOfRangeException("node", "Node \"" + node.CloneNode(false).OuterHtml + "\" was not found in the collection");
}
return index;
}
}
///
/// Returns an enumerator that can iterate through the list.
///
/// An IEnumerator for the entire list.
public HtmlNodeEnumerator GetEnumerator()
{
return new HtmlNodeEnumerator(_items);
}
IEnumerator IEnumerable.GetEnumerator()
{
return GetEnumerator();
}
///
/// Represents an enumerator that can iterate through the list.
///
public class HtmlNodeEnumerator: IEnumerator
{
int _index;
ArrayList _items;
internal HtmlNodeEnumerator(ArrayList items)
{
_items = items;
_index = -1;
}
///
/// Sets the enumerator to its initial position, which is before the first element in the collection.
///
public void Reset()
{
_index = -1;
}
///
/// Advances the enumerator to the next element of the collection.
///
/// true if the enumerator was successfully advanced to the next element, false if the enumerator has passed the end of the collection.
public bool MoveNext()
{
_index++;
return (_index<_items.Count);
}
///
/// Gets the current element in the collection.
///
public HtmlNode Current
{
get
{
return (HtmlNode)(_items[_index]);
}
}
///
/// Gets the current element in the collection.
///
object IEnumerator.Current
{
get
{
return (Current);
}
}
}
}
///
/// Represents an HTML text node.
///
public class HtmlTextNode: HtmlNode
{
private string _text;
internal HtmlTextNode(HtmlDocument ownerdocument, int index):
base(HtmlNodeType.Text, ownerdocument, index)
{
}
///
/// Gets or Sets the HTML between the start and end tags of the object. In the case of a text node, it is equals to OuterHtml.
///
public override string InnerHtml
{
get
{
return OuterHtml;
}
set
{
_text = value;
}
}
///
/// Gets or Sets the object and its content in HTML.
///
public override string OuterHtml
{
get
{
if (_text == null)
{
return base.OuterHtml;
}
return _text;
}
}
///
/// Gets or Sets the text of the node.
///
public string Text
{
get
{
if (_text == null)
{
return base.OuterHtml;
}
return _text;
}
set
{
_text = value;
}
}
}
///
/// Represents an HTML comment.
///
public class HtmlCommentNode: HtmlNode
{
private string _comment;
internal HtmlCommentNode(HtmlDocument ownerdocument, int index):
base(HtmlNodeType.Comment, ownerdocument, index)
{
}
///
/// Gets or Sets the HTML between the start and end tags of the object. In the case of a text node, it is equals to OuterHtml.
///
public override string InnerHtml
{
get
{
if (_comment == null)
{
return base.InnerHtml;
}
return _comment;
}
set
{
_comment = value;
}
}
///
/// Gets or Sets the object and its content in HTML.
///
public override string OuterHtml
{
get
{
if (_comment == null)
{
return base.OuterHtml;
}
return "";
}
}
///
/// Gets or Sets the comment text of the node.
///
public string Comment
{
get
{
if (_comment == null)
{
return base.InnerHtml;
}
return _comment;
}
set
{
_comment = value;
}
}
}
}