I have a plugin that does this manually in batch processes via ajax and I’ve been getting a lot of request for a way to make it automated.
This is the function that loads the post, downloads the images into the uploads directory and attaches them to the post. Then it does a search and replace for the old img urls and replaces them with the new ones. Attach this to all the publish actions and you should be good to go.
/**
* Extracts all images in content adds to media library
* if external and updates content with new url
*
* @param object $post The post object
*
*/
function prefix_extract_external_images( $post ) {
if ( ! is_object( $post ) ) {
$post = get_post( $post );
}
$html = $post->post_content;
$path = wp_upload_dir();
$path = $path['baseurl'];
if ( stripos( $html, '<img' ) !== false ) {
$regex = '#<\s*img [^\>]*src\s*=\s*(["\'])(.*?)\1#im';
preg_match_all( $regex, $html, $matches );
if ( is_array( $matches ) && ! empty( $matches ) ) {
$new = array();
$old = array();
foreach ( $matches[2] as $img ) {
// Compare image source against upload directory
// to prevent adding same attachment multiple times
if ( stripos( $img, $path ) !== false ) {
continue;
}
$tmp = download_url( $img );
preg_match(
'/[^\?]+\.(jpg|JPG|jpe|JPE|jpeg|JPEG|gif|GIF|png|PNG)/', $img, $matches
);
$file_array['name'] = basename( $matches[0] );
$file_array['tmp_name'] = $tmp;
// If error storing temporarily, unlink
if ( is_wp_error( $tmp ) ) {
@unlink( $file_array['tmp_name'] );
$file_array['tmp_name'] = '';
continue;
}
$id = media_handle_sideload( $file_array, $post->ID );
if ( ! is_wp_error( $id ) ) {
$url = wp_get_attachment_url( $id );
array_push( $new, $url );
array_push( $old, $img );
}
} // end foreach
if ( ! empty( $new ) ) {
$content = str_ireplace( $old, $new, $html );
$post_args = array( 'ID' => $post->ID, 'post_content' => $content, );
if ( ! empty( $content ) ) {
$post_id = wp_update_post( $post_args );
}
}
} // end if ( is_array( $matches ) && ! empty( $matches ) )
} // end if ( stripos( $html, '<img' ) !== false )
return $post;
} // end function
$action_arrays = array( 'new_to_publish', 'pending_to_publish', 'draft_to_publish' );
foreach ( $action_array as $action ) {
add_action( $action, 'prefix_extract_external_images' );
}
Bonus: Set the first image found as the featured image.
Add this right before the return $post
in the function above.
$atts = get_first_attachment( $post->ID );
foreach ( $atts as $a ) {
$img = set_post_thumbnail( $post->ID, $a['ID'] );
}
This function will also be needed by the above code.
/**
* Queries for attached images
* @param int $post_id The post id to check if attachments exist
* @return array|bool The 1st attached on success false if no attachments
*/
function get_first_attachment( $post_id ) {
return get_children( array (
'post_parent' => $post_id,
'post_type' => 'attachment',
'post_mime_type' => 'image',
'posts_per_page' => (int)1
), ARRAY_A );
}