You can use the code below with $ignore_path = true to test for duplicates before uploading. If this function returns the id of an attachment, use that instead of re-uploading. This is currently based on filename only but you could easily add crc to it. I will be adding an md5 check eventually and will post that at: https://wordpress.stackexchange.com/a/50207/11310
// get the id of attachment if filename matches
// props to Rarst https://wordpress.stackexchange.com/a/7094/11310
function get_attachment_id( $url, $ignore_path = false ) {
if ( ! $ignore_path ) {
$dir = wp_upload_dir();
$dir = trailingslashit($dir['baseurl']);
if( false === strpos( $url, $dir ) )
return false;
}
$file = basename($url);
$query = array(
'post_type' => 'attachment',
'fields' => 'ids',
'meta_query' => array(
array(
'value' => $file,
'compare' => 'LIKE',
)
)
);
$query['meta_query'][0]['key'] = '_wp_attached_file';
$ids = get_posts( $query );
foreach( $ids as $id ) {
$match = array_shift( wp_get_attachment_image_src($id, 'full') );
if( $url == $match || ( $ignore_path && strstr( $match, $file ) ) )
return $id;
}
$query['meta_query'][0]['key'] = '_wp_attachment_metadata';
$ids = get_posts( $query );
foreach( $ids as $id ) {
$meta = wp_get_attachment_metadata($id);
foreach( $meta['sizes'] as $size => $values ) {
if( $values['file'] == $file && ( $ignore_path || $url == array_shift( wp_get_attachment_image_src($id, $size) ) ) )
return $id;
}
}
return false;
}