element. * * By default, the Airstory API will include a full HTML document, including the , * tags, and more. WordPress only needs the actual post contents, so we'll load the provided HTML * into DOMDocument, reduce it to the
, then manually strip the opening and closing node. */ function get_body_contents( $content ) { $use_internal = libxml_use_internal_errors( true ); $doc = new \DOMDocument( '1.0', 'UTF-8' ); $doc->loadHTML( mb_convert_encoding( $content, 'HTML-ENTITIES', 'UTF-8' ) ); // Will retrieve the entire node. $body_node = $doc->getElementsByTagName( 'body' ); if ( 0 === $body_node->length ) { return ''; } $body = $doc->saveHTML( $body_node->item( 0 ) ); // If an error occurred while parsing the data, return an empty string. $errors = libxml_get_errors(); if ( ! empty( $errors ) ) { foreach ( $errors as $error ) { // phpcs:disable WordPress.PHP.DevelopmentFunctions.error_log_trigger_error trigger_error( esc_html( format_libxml_error( $error ) ), E_USER_WARNING ); // phpcs:enable WordPress.PHP.DevelopmentFunctions.error_log_trigger_error } $body = ''; } // Reset the original error handling approach for libxml. libxml_clear_errors(); libxml_use_internal_errors( $use_internal ); // If the body's empty at this point, no further work is necessary. if ( empty( $body ) ) { return $body; } // Strip opening and trailing tags (plus any whitespace). $body = preg_replace( '/\\s*/i', '', $body ); $body = preg_replace( '/\s*\<\/body\>/i', '', $body ); return $body; } add_filter( 'airstory_before_insert_content', __NAMESPACE__ . '\get_body_contents', 1 ); /** * Sideload a single image from a remote URL. * * @param string $url The remote URL for the image. * @param int $post_id Optional. The post the newly-uploaded image should be attached to. * Default is 0 (unattached). * @param array $metadata Optional. Additional post meta keys to assign once the attachment post * has been created. These keys and values are assumed to be sanitized. * Default is an empty array. */ function sideload_single_image( $url, $post_id = 0, $metadata = array() ) { if ( ! filter_var( $url, FILTER_VALIDATE_URL ) ) { return 0; } require_once ABSPATH . 'wp-admin/includes/media.php'; require_once ABSPATH . 'wp-admin/includes/file.php'; require_once ABSPATH . 'wp-admin/includes/image.php'; $tmp_file = download_url( esc_url_raw( $url ) ); $file_array = array( 'name' => basename( $url ), 'tmp_name' => $tmp_file, ); // Something went wrong downloading the image. if ( is_wp_error( $tmp_file ) ) { // phpcs:disable WordPress.PHP.DevelopmentFunctions.error_log_trigger_error, Generic.PHP.NoSilencedErrors.Discouraged @unlink( $file_array['tmp_name'] ); trigger_error( esc_html( $tmp_file->get_error_message() ), E_USER_WARNING ); // phpcs:enable WordPress.PHP.DevelopmentFunctions.error_log_trigger_error, Generic.PHP.NoSilencedErrors.Discouraged return 0; } // Sideload the media. $image_id = media_handle_sideload( $file_array, $post_id ); if ( is_wp_error( $image_id ) ) { // phpcs:disable WordPress.PHP.DevelopmentFunctions.error_log_trigger_error, Generic.PHP.NoSilencedErrors.Discouraged @unlink( $file_array['tmp_name'] ); trigger_error( esc_html( $image_id->get_error_message() ), E_USER_WARNING ); // phpcs:enable WordPress.PHP.DevelopmentFunctions.error_log_trigger_error, Generic.PHP.NoSilencedErrors.Discouraged return 0; } /* * Finally, store post meta. We'll always set _airstory_origin (the original image URL), but any * non-empty values in $metadata will also be set. */ add_post_meta( $image_id, '_airstory_origin', esc_url( $url ) ); if ( ! empty( $metadata ) ) { foreach ( (array) $metadata as $meta_key => $meta_value ) { if ( ! empty( $meta_value ) ) { update_post_meta( $image_id, $meta_key, $meta_value ); } } } /** * Fires after an image has been side-loaded into WordPress. * * @param string $url The remote URL for the image. * @param int $post_id The post the newly-uploaded image should be attached to. * @param array $metadata Additional post meta keys to assign once the attachment post bas been * created. These keys and values are assumed to be sanitized. */ do_action( 'airstory_sideload_single_image', $url, $post_id, $metadata ); return $image_id; } /** * Sideload media referenced from within the Airstory content. * * While this could be a good use for DOMDocument, that extension can get rather finicky. As we're * only replacing links to https://images.airstory.co, we can safely accomplish this with regex. * * @param int $post_id The ID of the post to scan for media to sideload. * @return int The number of replacements made. */ function sideload_all_images( $post_id ) { $post = get_post( $post_id ); // Return early (with "0" replacements) if no matching post was found. if ( ! $post ) { return 0; } /* * Use DOMDocument to find all images in the post content. * * To avoid DOMDocument::saveHTML() from destroying the inner contents, we'll temporarily inject * a generic