root->innerHtml(); } /** * A simple wrapper around the root node. * * @param string $name * @return mixed */ public function __get($name) { return $this->root->$name; } /** * Attempts to load the dom from any resource, string, file, or URL. * * @param string $str * @param array $options * @return $this */ public function load($str, $options = array()) { // check if it's a file if (strpos($str, "\n") === false && is_file($str)) { return $this->loadFromFile($str, $options); } // check if it's a url if (preg_match("/^https?:\/\//i", $str)) { return $this->loadFromUrl($str, $options); } return $this->loadStr($str, $options); } /** * Loads the dom from a document file/url * * @param string $file * @param array $options * @return $this */ public function loadFromFile($file, $options = array()) { return $this->loadStr(file_get_contents($file), $options); } /** * Use a curl interface implementation to attempt to load * the content from a url. * * @param string $url * @param array $options * @param CurlInterface $curl * @return $this */ public function loadFromUrl($url, $options = array(), CurlInterface $curl = null) { if (is_null($curl)) { // use the default curl interface $curl = new Curl; } $content = $curl->get($url); return $this->loadStr($content, $options); } /** * Parsers the html of the given string. Used for load(), loadFromFile(), * and loadFromUrl(). * * @param string $str * @param array $option * @return $this */ public function loadStr($str, $option) { $this->options = new Options; $this->options->setOptions($this->globalOptions) ->setOptions($option); $this->rawSize = strlen($str); $this->raw = $str; $html = $this->clean($str); $this->size = strlen($str); $this->content = new Content($html); $this->parse(); $this->detectCharset(); return $this; } /** * Sets a global options array to be used by all load calls. * * @param array $options * @return $this */ public function setOptions(array $options) { $this->globalOptions = $options; return $this; } /** * Find elements by css selector on the root node. * * @param string $selector * @param int $nth * @return array */ public function find($selector, $nth = null) { $this->isLoaded(); return $this->root->find($selector, $nth); } /** * Adds the tag (or tags in an array) to the list of tags that will always * be self closing. * * @param string|array $tag * @return $this */ public function addSelfClosingTag($tag) { if ( ! is_array($tag)) { $tag = array($tag); } foreach ($tag as $value) { $this->selfClosing[] = $value; } return $this; } /** * Removes the tag (or tags in an array) from the list of tags that will * always be self closing. * * @param string|array $tag * @return $this */ public function removeSelfClosingTag($tag) { if ( ! is_array($tag)) { $tag = array($tag); } $this->selfClosing = array_diff($this->selfClosing, $tag); return $this; } /** * Sets the list of self closing tags to empty. * * @return $this */ public function clearSelfClosingTags() { $this->selfClosing = array(); return $this; } /** * Simple wrapper function that returns the first child. * * @return \PHPHtmlParser\Dom\AbstractNode */ public function firstChild() { $this->isLoaded(); return $this->root->firstChild(); } /** * Simple wrapper function that returns the last child. * * @return \PHPHtmlParser\Dom\AbstractNode */ public function lastChild() { $this->isLoaded(); return $this->root->lastChild(); } /** * Simple wrapper function that returns an element by the * id. * * @param string $id * @return \PHPHtmlParser\Dom\AbstractNode */ public function getElementById($id) { $this->isLoaded(); return $this->find('#'.$id, 0); } /** * Simple wrapper function that returns all elements by * tag name. * * @param string $name * @return array */ public function getElementsByTag($name) { $this->isLoaded(); return $this->find($name); } /** * Simple wrapper function that returns all elements by * class name. * * @param string $class * @return array */ public function getElementsByClass($class) { $this->isLoaded(); return $this->find('.'.$class); } /** * Checks if the load methods have been called. * * @throws NotLoadedException */ protected function isLoaded() { if (is_null($this->content)) { throw new NotLoadedException('Content is not loaded!'); } } /** * Cleans the html of any none-html information. * * @param string $str * @return string */ protected function clean($str) { if ($this->options->get('cleanupInput') != true) { // skip entire cleanup step return $str; } // remove white space before closing tags $str = mb_eregi_replace("'\s+>", "'>", $str); $str = mb_eregi_replace('"\s+>', '">', $str); // clean out the \n\r $replace = ' '; if ($this->options->get('preserveLineBreaks')) { $replace = ' '; } $str = str_replace(array("\r\n", "\r", "\n"), $replace, $str); // strip the doctype $str = mb_eregi_replace("", '', $str); // strip out comments $str = mb_eregi_replace("", '', $str); // strip out cdata $str = mb_eregi_replace("", '', $str); // strip out