loadHTML( '' . $content . '' ); libxml_clear_errors(); libxml_use_internal_errors( $libxml_previous_state ); if ( ! $result ) { return false; } return $dom; } /** * Return valid HTML content extracted from the DOMDocument passed as a parameter. * * @see Reciprocal function get_dom_from_content() * * @since 0.2 * * @param DOMDocument $dom Represents an HTML document from which to extract HTML content. * * @return string Returns the HTML content represented in the DOMDocument */ public static function get_content_from_dom( $dom ) { /** * We only want children of the body tag, since we have a subset of HTML. */ $body = $dom->getElementsByTagName( 'body' )->item( 0 ); /** * The DOMDocument may contain no body. In which case return nothing. */ if ( is_null( $body ) ) { return ''; } $out = ''; foreach ( $body->childNodes as $child_node ) { $out .= self::get_content_from_dom_node( $dom, $child_node ); } return $out; } /** * Return valid HTML content extracted from the DOMNode passed as a parameter. * * @see Called by function get_content_from_dom() * * @since 0.6 * * @param DOMDocument $dom Represents an HTML document. * @param DOMNode $node Represents an HTML element of the $dom from which to extract HTML content. * @return string Returns the HTML content represented in the DOMNode */ public static function get_content_from_dom_node( $dom, $node ) { /** * Self closing tags regex. * * @var string Regular expression to match self-closing tags * that saveXML() has generated a closing tag for. */ static $self_closing_tags_regex; /* * Most AMP elements need closing tags. To force them, we cannot use * saveHTML (node support is 5.3+) and LIBXML_NOEMPTYTAG results in * issues with self-closing tags like `br` and `hr`. So, we're manually * forcing closing tags. */ self::recursive_force_closing_tags( $dom, $node ); /* * Cache this regex so we don't have to recreate it every call. */ if ( ! isset( $self_closing_tags_regex ) ) { $self_closing_tags = implode( '|', self::get_self_closing_tags() ); $self_closing_tags_regex = "#>#i"; } $html = $dom->saveXML( $node ); // Whitespace just causes unit tests to fail... so whitespace begone. if ( '' === trim( $html ) ) { return ''; } /* * Travis w/PHP 7.1 generates

and
vs.
and
, respectively. * Travis w/PHP 7.x generates vs. . Etc. * Seems like LIBXML_NOEMPTYTAG was passed, but as you can see it was not. * This does not happen in my (@mikeschinkel) local testing, btw. */ $html = preg_replace( $self_closing_tags_regex, '/>', $html ); return $html; } /** * Create a new node w/attributes (a DOMElement) and add to the passed DOMDocument. * * @since 0.2 * * @param DOMDocument $dom A representation of an HTML document to add the new node to. * @param string $tag A valid HTML element tag for the element to be added. * @param string[] $attributes One of more valid attributes for the new node. * * @return DOMElement|false The DOMElement for the given $tag, or false on failure */ public static function create_node( $dom, $tag, $attributes ) { $node = $dom->createElement( $tag ); self::add_attributes_to_node( $node, $attributes ); return $node; } /** * Extract a DOMElement node's HTML element attributes and return as an array. * * @since 0.2 * * @param DOMNode $node Represents an HTML element for which to extract attributes. * * @return string[] The attributes for the passed node, or an * empty array if it has no attributes. */ public static function get_node_attributes_as_assoc_array( $node ) { $attributes = array(); if ( ! $node->hasAttributes() ) { return $attributes; } foreach ( $node->attributes as $attribute ) { $attributes[ $attribute->nodeName ] = $attribute->nodeValue; } return $attributes; } /** * Add one or more HTML element attributes to a node's DOMElement. * * @since 0.2 * * @param DOMElement $node Represents an HTML element. * @param string[] $attributes One or more attributes for the node's HTML element. */ public static function add_attributes_to_node( $node, $attributes ) { foreach ( $attributes as $name => $value ) { $node->setAttribute( $name, $value ); } } /** * Determines if a DOMElement's node is empty or not.. * * @since 0.2 * * @param DOMElement $node Represents an HTML element. * @return bool Returns true if the DOMElement has no child nodes and * the textContent property of the DOMElement is empty; * Otherwise it returns false. */ public static function is_node_empty( $node ) { return false === $node->hasChildNodes() && empty( $node->textContent ); } /** * Forces HTML element closing tags given a DOMDocument and optional DOMElement * * @since 0.2 * * @param DOMDocument $dom Represents HTML document on which to force closing tags. * @param DOMElement $node Represents HTML element to start closing tags on. * If not passed, defaults to first child of body. */ public static function recursive_force_closing_tags( $dom, $node = null ) { if ( is_null( $node ) ) { $node = $dom->getElementsByTagName( 'body' )->item( 0 ); } if ( XML_ELEMENT_NODE !== $node->nodeType ) { return; } if ( self::is_self_closing_tag( $node->nodeName ) ) { /* * Ensure there is no text content to accidentally force a child */ $node->textContent = null; return; } if ( self::is_node_empty( $node ) ) { $text_node = $dom->createTextNode( '' ); $node->appendChild( $text_node ); return; } $num_children = $node->childNodes->length; for ( $i = $num_children - 1; $i >= 0; $i -- ) { $child = $node->childNodes->item( $i ); self::recursive_force_closing_tags( $dom, $child ); } } /** * Determines if an HTML element tag is validly a self-closing tag per W3C HTML5 specs. * * @since 0.2 * * @param string $tag Tag. * @return bool Returns true if a valid self-closing tag, false if not. */ private static function is_self_closing_tag( $tag ) { return in_array( $tag, self::get_self_closing_tags(), true ); } /** * Returns array of self closing tags * * @since 0.6 * * @return string[] */ private static function get_self_closing_tags() { /* * As this function is called a lot the static var * prevents having to re-create the array every time. */ static $self_closing_tags; if ( ! isset( $self_closing_tags ) ) { /* * https://www.w3.org/TR/html5/syntax.html#serializing-html-fragments * Not all are valid AMP, but we include them for completeness. */ $self_closing_tags = array( 'area', 'base', 'basefont', 'bgsound', 'br', 'col', 'embed', 'frame', 'hr', 'img', 'input', 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr', ); } return $self_closing_tags; } }