root->innerHtml();
}
/**
* A simple wrapper around the root node.
*
* @param string $name
* @return mixed
*/
public function __get($name)
{
return $this->root->$name;
}
/**
* Attempts to load the dom from any resource, string, file, or URL.
*
* @param string $str
* @param array $options
* @return $this
*/
public function load($str, $options = array())
{
// check if it's a file
if (strpos($str, "\n") === false && is_file($str)) {
return $this->loadFromFile($str, $options);
}
// check if it's a url
if (preg_match("/^https?:\/\//i", $str)) {
return $this->loadFromUrl($str, $options);
}
return $this->loadStr($str, $options);
}
/**
* Loads the dom from a document file/url
*
* @param string $file
* @param array $options
* @return $this
*/
public function loadFromFile($file, $options = array())
{
return $this->loadStr(file_get_contents($file), $options);
}
/**
* Use a curl interface implementation to attempt to load
* the content from a url.
*
* @param string $url
* @param array $options
* @param CurlInterface $curl
* @return $this
*/
public function loadFromUrl($url, $options = array(), CurlInterface $curl = null)
{
if (is_null($curl)) {
// use the default curl interface
$curl = new Curl;
}
$content = $curl->get($url);
return $this->loadStr($content, $options);
}
/**
* Parsers the html of the given string. Used for load(), loadFromFile(),
* and loadFromUrl().
*
* @param string $str
* @param array $option
* @return $this
*/
public function loadStr($str, $option)
{
$this->options = new Options;
$this->options->setOptions($this->globalOptions)
->setOptions($option);
$this->rawSize = strlen($str);
$this->raw = $str;
$html = $this->clean($str);
$this->size = strlen($str);
$this->content = new Content($html);
$this->parse();
$this->detectCharset();
return $this;
}
/**
* Sets a global options array to be used by all load calls.
*
* @param array $options
* @return $this
*/
public function setOptions(array $options)
{
$this->globalOptions = $options;
return $this;
}
/**
* Find elements by css selector on the root node.
*
* @param string $selector
* @param int $nth
* @return array
*/
public function find($selector, $nth = null)
{
$this->isLoaded();
return $this->root->find($selector, $nth);
}
/**
* Adds the tag (or tags in an array) to the list of tags that will always
* be self closing.
*
* @param string|array $tag
* @return $this
*/
public function addSelfClosingTag($tag)
{
if ( ! is_array($tag)) {
$tag = array($tag);
}
foreach ($tag as $value) {
$this->selfClosing[] = $value;
}
return $this;
}
/**
* Removes the tag (or tags in an array) from the list of tags that will
* always be self closing.
*
* @param string|array $tag
* @return $this
*/
public function removeSelfClosingTag($tag)
{
if ( ! is_array($tag)) {
$tag = array($tag);
}
$this->selfClosing = array_diff($this->selfClosing, $tag);
return $this;
}
/**
* Sets the list of self closing tags to empty.
*
* @return $this
*/
public function clearSelfClosingTags()
{
$this->selfClosing = array();
return $this;
}
/**
* Simple wrapper function that returns the first child.
*
* @return \PHPHtmlParser\Dom\AbstractNode
*/
public function firstChild()
{
$this->isLoaded();
return $this->root->firstChild();
}
/**
* Simple wrapper function that returns the last child.
*
* @return \PHPHtmlParser\Dom\AbstractNode
*/
public function lastChild()
{
$this->isLoaded();
return $this->root->lastChild();
}
/**
* Simple wrapper function that returns an element by the
* id.
*
* @param string $id
* @return \PHPHtmlParser\Dom\AbstractNode
*/
public function getElementById($id)
{
$this->isLoaded();
return $this->find('#'.$id, 0);
}
/**
* Simple wrapper function that returns all elements by
* tag name.
*
* @param string $name
* @return array
*/
public function getElementsByTag($name)
{
$this->isLoaded();
return $this->find($name);
}
/**
* Simple wrapper function that returns all elements by
* class name.
*
* @param string $class
* @return array
*/
public function getElementsByClass($class)
{
$this->isLoaded();
return $this->find('.'.$class);
}
/**
* Checks if the load methods have been called.
*
* @throws NotLoadedException
*/
protected function isLoaded()
{
if (is_null($this->content)) {
throw new NotLoadedException('Content is not loaded!');
}
}
/**
* Cleans the html of any none-html information.
*
* @param string $str
* @return string
*/
protected function clean($str)
{
if ($this->options->get('cleanupInput') != true) {
// skip entire cleanup step
return $str;
}
// remove white space before closing tags
$str = mb_eregi_replace("'\s+>", "'>", $str);
$str = mb_eregi_replace('"\s+>', '">', $str);
// clean out the \n\r
$replace = ' ';
if ($this->options->get('preserveLineBreaks')) {
$replace = '
';
}
$str = str_replace(array("\r\n", "\r", "\n"), $replace, $str);
// strip the doctype
$str = mb_eregi_replace("", '', $str);
// strip out comments
$str = mb_eregi_replace("", '', $str);
// strip out cdata
$str = mb_eregi_replace("", '', $str);
// strip out