Javascript word-count for any given DOM element

Question 1

I'm wondering if there's a way to count the words inside a div for example. Say we have a div like so:

<div id="content">
hello how are you?
</div>

Then have the JS function return an integer of 4.

Is this possible? I have done this with form elements but can't seem to do it for non-form ones.

Any ideas?

g

Question 2

If you know that the DIV is only going to have text in it, you can KISS:

var count = document.getElementById('content').innerHTML.split(' ').length;

If the div can have HTML tags in it, you're going to have to traverse its children looking for text nodes:

function get_text(el) {
 ret = "";
 var length = el.childNodes.length;
 for(var i = 0; i < length; i++) {
 var node = el.childNodes[i];
 if(node.nodeType != 8) {
 ret += node.nodeType != 1 ? node.nodeValue : get_text(node);
 }
 }
 return ret;
}
var words = get_text(document.getElementById('content'));
var count = words.split(' ').length;

This is the same logic that the jQuery library uses to achieve the effect of its text() function. jQuery is a pretty awesome library that in this case is not necessary. However, if you find yourself doing a lot of DOM manipulation or AJAX then you might want to check it out.

EDIT:

As noted by Gumbo in the comments, the way we are splitting the strings above would count two consecutive spaces as a word. If you expect that sort of thing (and even if you don't) it's probably best to avoid it by splitting on a regular expression instead of on a simple space character. Keeping that in mind, instead of doing the above split, you should do something like this:

var count = words.split(/\s+/).length;

The only difference being on what we're passing to the split function.

Question 3

You'll have to get the text node first.

Question 4

This will count tags as words though, which is why I would prefer the text() version provided by jQuery.

Question 5

I know, I was working as soon as I posted it to port text over to plain javascript to provide that as an alternative. Not everyone needs jQuery in their lives. :)

Question 6

+1, I do like that you took the logic for text from the jQuery library :)

Question 7

You should better use a regular expression to take multiple whitespace characters into account.

Question 8

Paolo Bergantino's second solution is incorrect for empty strings or strings that begin or end with whitespaces. Here's the fix:

var count = !s ? 0 : (s.split(/^\s+$/).length === 2 ? 0 : 2 +
 s.split(/\s+/).length - s.split(/^\s+/).length - s.split(/\s+$/).length);

Explanation: If the string is empty, there are zero words; If the string has only whitespaces, there are zero words; Else, count the number of whitespace groups without the ones from the beginning and the end of the string.

Question 9

string_var.match(/[^\s]+/g).length

seems like it's a better method than

string_var.split(/\s+/).length

At least it won't count "word " as 2 words -- ['word'] rather than ['word', '']. And it doesn't really require any funny add-on logic.

Question 10

Better but it stil counts empty string '' as 1. Try: string_var.match(/[^\s]+/g).length - 1;

Question 11

Or just use Countable.js to do the hard job ;)

Question 12

document.deepText= function(hoo){
 var A= [];
 if(hoo){
 hoo= hoo.firstChild;
 while(hoo!= null){
 if(hoo.nodeType== 3){
 A[A.length]= hoo.data;
 }
 else A= A.concat(arguments.callee(hoo));
 hoo= hoo.nextSibling;
 }
 }
 return A;
}

I'd be fairly strict about what a word is-

function countwords(hoo){
 var text= document.deepText(hoo).join(' ');
 return text.match(/[A-Za-z\'\-]+/g).length;
}
alert(countwords(document.body))

Question 13

Or you can do this:

function CountWords (this_field, show_word_count, show_char_count) {
 if (show_word_count == null) {
 show_word_count = true;
 }
 if (show_char_count == null) {
 show_char_count = false;
 }
 var char_count = this_field.value.length;
 var fullStr = this_field.value + " ";
 var initial_whitespace_rExp = /^[^A-Za-z0-9]+/gi;
 var left_trimmedStr = fullStr.replace(initial_whitespace_rExp, "");
 var non_alphanumerics_rExp = rExp = /[^A-Za-z0-9]+/gi;
 var cleanedStr = left_trimmedStr.replace(non_alphanumerics_rExp, " ");
 var splitString = cleanedStr.split(" ");
 var word_count = splitString.length -1;
 if (fullStr.length <2) {
 word_count = 0;
 }
 if (word_count == 1) {
 wordOrWords = " word";
 } else {
 wordOrWords = " words";
 }
 if (char_count == 1) {
 charOrChars = " character";
 } else {
 charOrChars = " characters";
 }
 if (show_word_count & show_char_count) {
 alert ("Word Count:\n" + " " + word_count + wordOrWords + "\n" + " " + char_count + charOrChars);
 } else {
 if (show_word_count) {
 alert ("Word Count: " + word_count + wordOrWords);
 } else {
 if (show_char_count) {
 alert ("Character Count: " + char_count + charOrChars);
 }
 }
 }
 return word_count;
}

Question 14

The get_text function in Paolo Bergantino's answer didn't work properly for me when two child nodes have no space between them. eg <h1>heading</h1><p>paragraph</p> would be returned as headingparagraph (notice lack of space between the words). So prepending a space to the nodeValue fixes this. But it introduces a space at the front of the text but I found a word count function that trims it off (plus it uses several regexps to ensure it counts words only). Word count and edited get_text functions below:

function get_text(el) {
 ret = "";
 var length = el.childNodes.length;
 for(var i = 0; i < length; i++) {
 var node = el.childNodes[i];
 if(node.nodeType != 8) {
 ret += node.nodeType != 1 ? ' '+node.nodeValue : get_text(node);
 }
 }
 return ret;
}
function wordCount(fullStr) {
 if (fullStr.length == 0) {
 return 0;
 } else {
 fullStr = fullStr.replace(/\r+/g, " ");
 fullStr = fullStr.replace(/\n+/g, " ");
 fullStr = fullStr.replace(/[^A-Za-z0-9 ]+/gi, "");
 fullStr = fullStr.replace(/^\s+/, "");
 fullStr = fullStr.replace(/\s+$/, "");
 fullStr = fullStr.replace(/\s+/gi, " ");
 var splitString = fullStr.split(" ");
 return splitString.length;
 }
}

EDIT

kennebec's word counter is really good. But the one I've found includes a number as a word which is what I needed. Still, that's easy to add to kennebec's. But kennebec's text retrieval function will have the same problem.

Question 15

This should account for preceding & trailing whitespaces

const wordCount = document.querySelector('#content').innerText.trim().split(/\s+/).length;

Question 16

+1 for innerText. The innerText property provides built-in functionality to get text from an HTML element, while the other answers provide custom solutions to the same problem.

Question 17

string_var.match(/[^\s]+/g).length - 1;

Paolo Bergantino 490k83 gold badges524 silver badges437 bronze badges · Accepted Answer · 2009-04-19 13:38:16Z

If you know that the DIV is only going to have text in it, you can KISS:

var count = document.getElementById('content').innerHTML.split(' ').length;

If the div can have HTML tags in it, you're going to have to traverse its children looking for text nodes:

function get_text(el) {
 ret = "";
 var length = el.childNodes.length;
 for(var i = 0; i < length; i++) {
 var node = el.childNodes[i];
 if(node.nodeType != 8) {
 ret += node.nodeType != 1 ? node.nodeValue : get_text(node);
 }
 }
 return ret;
}
var words = get_text(document.getElementById('content'));
var count = words.split(' ').length;

This is the same logic that the jQuery library uses to achieve the effect of its text() function. jQuery is a pretty awesome library that in this case is not necessary. However, if you find yourself doing a lot of DOM manipulation or AJAX then you might want to check it out.

EDIT:

As noted by Gumbo in the comments, the way we are splitting the strings above would count two consecutive spaces as a word. If you expect that sort of thing (and even if you don't) it's probably best to avoid it by splitting on a regular expression instead of on a simple space character. Keeping that in mind, instead of doing the above split, you should do something like this:

var count = words.split(/\s+/).length;

The only difference being on what we're passing to the split function.

This will count tags as words though, which is why I would prefer the text() version provided by jQuery.
I know, I was working as soon as I posted it to port text over to plain javascript to provide that as an alternative. Not everyone needs jQuery in their lives. :)
+1, I do like that you took the logic for text from the jQuery library :)
You should better use a regular expression to take multiple whitespace characters into account.

CollectivesTM on Stack Overflow

Javascript word-count for any given DOM element

9 Answers 9

9 Comments

Comments

1 Comment

Comments

Comments

Comments

Comments

1 Comment

Comments

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Linked

Hot Network Questions

CollectivesTM on Stack Overflow

9 Answers 9

9 Comments

Comments

1 Comment

Comments

Comments

Comments

Comments

1 Comment

Comments

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Linked

Related