Thumbnail + Excerpt = loss in word count

For everybody that is looking for a great excerpt that keep html tags in tact and want the excerpt not to cut off mid sentence and have a true word count, here is the code

 function pietergoosen_custom_wp_trim_excerpt($text) {
global $post;
$raw_excerpt = $text;
    if ( '' == $text ) {

        $text = get_the_content('');
        $text = strip_shortcodes( $text );
        $text = apply_filters('the_content', $text);
        $text = str_replace(']]>', ']]>', $text);

            //Add the allowed HTML tags separated by a comma.
            $allowed_tags = array(
            'head', 'title', 'base', 'link', 'meta', 'style', 'script', 'noscript', 'body', 'section', 'nav',
            'article', 'aside', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'footer', 'address', 'main', 'p', 'hr',
            'pre', 'blockquote', 'ol', 'ul', 'li', 'dl', 'dt', 'dd', 'figure', 'figcaption', 'div', 'a', 'em', 'strong',
            'small', 's', 'cite', 'q', 'dfn', 'abbr', 'data', 'time', 'code', 'var', 'samp', 'kbd', 'sub', 'sup', 'i', 'b',
            'u', 'mark', 'ruby', 'rt', 'rp', 'bdi', 'bdo', 'span', 'br', 'wbr', 'ins', 'del', 'img', 'iframe', 'embed',
            'object', 'param', 'video' ,'audio', 'source', 'track', 'canvas', 'map', 'area', 'svg', 'math', 'table',
            'caption', 'colgroup', 'col', 'tbody', 'thead', 'tfoot', 'tr', 'td', 'th', 'form', 'fieldset', 'legend', 'label',
            'input', 'button', 'select', 'datalist', 'optgroup', 'option', 'textarea', 'keygen', 'output', 'progress', 'meter',
            'details', 'summary', 'menuitem', 'menu'
            );

            $tag_string = '<' . implode('><', $allowed_tags) . '>';

        $text = strip_tags($text, $tag_string);

        //Set the excerpt word count and only break after sentence is complete.
            $excerpt_word_count = 75;
            $excerpt_length = apply_filters('excerpt_length', $excerpt_word_count); 
            $tokens = array();
            $excerptOutput="";
            $count = 0;

            // Divide the string into tokens; HTML tags, or words, followed by any whitespace
            preg_match_all('/(<[^>]+>|[^<>\s]+)\s*/u', $text, $tokens);

            foreach ($tokens[0] as $token) { 

                if ($count >= $excerpt_word_count && preg_match('/[\?\.\!]\s*$/uS', $token)) { 
                // Limit reached, continue until ? . or ! occur at the end
                    $excerptOutput .= trim($token);
                    break;
                }

                // Add words to complete sentence
                $count++;

                // Append what's left of the token
                $excerptOutput .= $token;
            }

        $text = trim(force_balance_tags($excerptOutput));

            $excerpt_end = ' <a href="'. esc_url( get_permalink() ) . '">' . '&nbsp;&raquo;&nbsp;' . sprintf(__( 'Read more about: %s &nbsp;&raquo;', 'pietergoosen' ), get_the_title()) . '</a>'; 
            $excerpt_more = apply_filters('excerpt_more', ' ' . $excerpt_end); 

            $pos = strrpos($text, '</');

        // Add 'Read more' text inside last HTML tag
        $text = substr_replace($text, $excerpt_end, $pos, 0);

    }
    return apply_filters('wp_trim_excerpt', $text, $raw_excerpt);
}

remove_filter('get_the_excerpt', 'wp_trim_excerpt');
add_filter('get_the_excerpt', 'pietergoosen_custom_wp_trim_excerpt'); 

Leave a Comment