I'm wondering if there's a way to count the words inside a div for example. Say we have a div like so:
<div id="content">
hello how are you?
</div>
Then have the JS function return an integer of 4.
Is this possible? I have done this with form elements but can't seem to do it for non-form ones.
Any ideas?
g
9 Answers 9
If you know that the DIV is only going to have text in it, you can KISS:
var count = document.getElementById('content').innerHTML.split(' ').length;
If the div can have HTML tags in it, you're going to have to traverse its children looking for text nodes:
function get_text(el) {
ret = "";
var length = el.childNodes.length;
for(var i = 0; i < length; i++) {
var node = el.childNodes[i];
if(node.nodeType != 8) {
ret += node.nodeType != 1 ? node.nodeValue : get_text(node);
}
}
return ret;
}
var words = get_text(document.getElementById('content'));
var count = words.split(' ').length;
This is the same logic that the jQuery library uses to achieve the effect of its text() function. jQuery is a pretty awesome library that in this case is not necessary. However, if you find yourself doing a lot of DOM manipulation or AJAX then you might want to check it out.
EDIT:
As noted by Gumbo in the comments, the way we are splitting the strings above would count two consecutive spaces as a word. If you expect that sort of thing (and even if you don't) it's probably best to avoid it by splitting on a regular expression instead of on a simple space character. Keeping that in mind, instead of doing the above split, you should do something like this:
var count = words.split(/\s+/).length;
The only difference being on what we're passing to the split function.
9 Comments
Paolo Bergantino's second solution is incorrect for empty strings or strings that begin or end with whitespaces. Here's the fix:
var count = !s ? 0 : (s.split(/^\s+$/).length === 2 ? 0 : 2 +
s.split(/\s+/).length - s.split(/^\s+/).length - s.split(/\s+$/).length);
Explanation: If the string is empty, there are zero words; If the string has only whitespaces, there are zero words; Else, count the number of whitespace groups without the ones from the beginning and the end of the string.
Comments
string_var.match(/[^\s]+/g).length
seems like it's a better method than
string_var.split(/\s+/).length
At least it won't count "word " as 2 words -- ['word'] rather than ['word', '']. And it doesn't really require any funny add-on logic.
1 Comment
string_var.match(/[^\s]+/g).length - 1;Or just use Countable.js to do the hard job ;)
Comments
document.deepText= function(hoo){
var A= [];
if(hoo){
hoo= hoo.firstChild;
while(hoo!= null){
if(hoo.nodeType== 3){
A[A.length]= hoo.data;
}
else A= A.concat(arguments.callee(hoo));
hoo= hoo.nextSibling;
}
}
return A;
}
I'd be fairly strict about what a word is-
function countwords(hoo){
var text= document.deepText(hoo).join(' ');
return text.match(/[A-Za-z\'\-]+/g).length;
}
alert(countwords(document.body))
Comments
Or you can do this:
function CountWords (this_field, show_word_count, show_char_count) {
if (show_word_count == null) {
show_word_count = true;
}
if (show_char_count == null) {
show_char_count = false;
}
var char_count = this_field.value.length;
var fullStr = this_field.value + " ";
var initial_whitespace_rExp = /^[^A-Za-z0-9]+/gi;
var left_trimmedStr = fullStr.replace(initial_whitespace_rExp, "");
var non_alphanumerics_rExp = rExp = /[^A-Za-z0-9]+/gi;
var cleanedStr = left_trimmedStr.replace(non_alphanumerics_rExp, " ");
var splitString = cleanedStr.split(" ");
var word_count = splitString.length -1;
if (fullStr.length <2) {
word_count = 0;
}
if (word_count == 1) {
wordOrWords = " word";
} else {
wordOrWords = " words";
}
if (char_count == 1) {
charOrChars = " character";
} else {
charOrChars = " characters";
}
if (show_word_count & show_char_count) {
alert ("Word Count:\n" + " " + word_count + wordOrWords + "\n" + " " + char_count + charOrChars);
} else {
if (show_word_count) {
alert ("Word Count: " + word_count + wordOrWords);
} else {
if (show_char_count) {
alert ("Character Count: " + char_count + charOrChars);
}
}
}
return word_count;
}
Comments
The get_text function in Paolo Bergantino's answer didn't work properly for me when two child nodes have no space between them. eg <h1>heading</h1><p>paragraph</p> would be returned as headingparagraph (notice lack of space between the words). So prepending a space to the nodeValue fixes this. But it introduces a space at the front of the text but I found a word count function that trims it off (plus it uses several regexps to ensure it counts words only). Word count and edited get_text functions below:
function get_text(el) {
ret = "";
var length = el.childNodes.length;
for(var i = 0; i < length; i++) {
var node = el.childNodes[i];
if(node.nodeType != 8) {
ret += node.nodeType != 1 ? ' '+node.nodeValue : get_text(node);
}
}
return ret;
}
function wordCount(fullStr) {
if (fullStr.length == 0) {
return 0;
} else {
fullStr = fullStr.replace(/\r+/g, " ");
fullStr = fullStr.replace(/\n+/g, " ");
fullStr = fullStr.replace(/[^A-Za-z0-9 ]+/gi, "");
fullStr = fullStr.replace(/^\s+/, "");
fullStr = fullStr.replace(/\s+$/, "");
fullStr = fullStr.replace(/\s+/gi, " ");
var splitString = fullStr.split(" ");
return splitString.length;
}
}
EDIT
kennebec's word counter is really good. But the one I've found includes a number as a word which is what I needed. Still, that's easy to add to kennebec's. But kennebec's text retrieval function will have the same problem.
Comments
This should account for preceding & trailing whitespaces
const wordCount = document.querySelector('#content').innerText.trim().split(/\s+/).length;
1 Comment
innerText. The innerText property provides built-in functionality to get text from an HTML element, while the other answers provide custom solutions to the same problem.string_var.match(/[^\s]+/g).length - 1;