2
\$\begingroup\$

I've tomcat running on my Macbook pro and a web application. Making a request to a servlet (below i'm showing post method code) and tipping top command in terminal show a java process with 200/300% cpu usage. The servlet make a request to a mongo database to retrieve 389 documents and then iterate over this documents,process the text with a language detector and Stanford ner classifier, and index it to a local solr server. Is it normal? Can i improve code or something else to reduce cpu load?

this is the code

@Override
protected void doPost(HttpServletRequest req, HttpServletResponse resp)
 throws ServletException, IOException {
 SolrServer server = null;
 try {
 server = new CommonsHttpSolrServer(conf.getString("solr.url"));
 } catch (Exception e) {
 e.printStackTrace();
 }
 if (req.getParameter("delete") != null) {
 try {
 server.deleteByQuery("*:*");
 server.commit();
 } catch (SolrServerException e) {
 e.printStackTrace();
 }
 }
 BasicDBObject findUsersQuery = new BasicDBObject();
 findUsersQuery.put("indexed", false);
 DBCursor cur = uColl.find(findUsersQuery);
 int i = 0;
 while (cur.hasNext()) {
 Collection<SolrInputDocument> usersProfiles = new ArrayList<SolrInputDocument>();
 // String containing comma separated entities like places, organizations
 String namedEntities = "";
 // String containing other terms associated to facebook user
 String bagOfWords = "";
 CrowdUser user = Converter.toObject(CrowdUser.class, cur.next());
 log.debug("Indexing User:" + user.getFacebook().getFacebookID() + " "
 + user.getFacebook().getFacebookUser().getFirstName() + " "
 + user.getFacebook().getFacebookUser().getLastName());
 FacebookUser fu = user.getFacebook().getFacebookUser();
 // Hometown
 if (fu.getHometown() != null) {
 String name = fu.getHometown().getName();
 log.debug("Location:" + name);
 if (name != null) {
 namedEntities += name + " ";
 }
 }
 // Location
 if (fu.getLocation() != null) {
 String name = fu.getLocation().getName();
 log.debug("Hometown:" + name);
 if (name != null) {
 namedEntities += name + " ";
 }
 }
 // Work
 for (FacebookWork work : fu.getWork()) {
 log.debug("Works:");
 String description = work.getDescription();
 String lang = "";
 if (description != null)
 lang = detector.detectLang(description);
 if (description != null && "en".equals(lang)) {
 log.debug("Language detected: " + lang);
 ArrayList<String> result = NERUtils.getNamedEntities(description,
 serializedClassifier);
 namedEntities += result.get(1);
 bagOfWords += result.get(0) + " ";
 }
 if (work.getEmployer() != null && work.getEmployer().getName() != null)
 namedEntities += work.getEmployer().getName() + " ";
 if (work.getPosition() != null && work.getPosition().getName() != null)
 namedEntities += work.getPosition().getName() + " ";
 if (work.getLocation() != null && work.getLocation().getName() != null)
 namedEntities += work.getLocation().getName() + " ";
 }
 // Education
 for (FacebookEducation education : fu.getEducation()) {
 log.debug("Education:");
 if (education.getSchool() != null && education.getSchool().getName() != null)
 namedEntities += education.getSchool().getName() + " ";
 if (education.getDegree() != null && education.getDegree().getName() != null)
 namedEntities += education.getSchool().getName() + " ";
 if (education.getConcentration() != null) {
 for (FacebookDataType concentration : education.getConcentration()) {
 namedEntities += concentration.getName() + " ";
 }
 }
 }
 String about = fu.getAbout();
 if (about != null) {
 String[] strings = about.split("\\n");
 for (String string : strings) {
 log.debug("About: " + string);
 String lang = detector.detectLang(string);
 log.debug("Language detected: " + lang);
 if ("en".equals(lang)) {
 ArrayList<String> result = NERUtils.getNamedEntities(string,
 serializedClassifier);
 namedEntities += result.get(1);
 bagOfWords += result.get(0) + " ";
 }
 }
 }
 String bio = fu.getBio();
 if (bio != null) {
 String[] strings = bio.split("\\n");
 for (String string : strings) {
 log.debug("Bio: " + string);
 String lang = detector.detectLang(string);
 log.debug("Language detected: " + lang);
 if ("en".equals(lang)) {
 ArrayList<String> result = NERUtils.getNamedEntities(string,
 serializedClassifier);
 namedEntities += result.get(1);
 bagOfWords += result.get(0) + " ";
 }
 }
 }
 String quotes = fu.getQuotes();
 if (quotes != null) {
 String[] strings = quotes.split("\\n");
 for (String string : strings) {
 log.debug("Quote: " + string);
 String lang = detector.detectLang(string);
 log.debug("Language detected: " + lang);
 if ("en".equals(lang)) {
 ArrayList<String> result = NERUtils.getNamedEntities(string,
 serializedClassifier);
 namedEntities += result.get(1);
 bagOfWords += result.get(0) + " ";
 }
 }
 }
 String political = fu.getPolitical();
 log.debug("Political : " + political);
 if (political != null)
 namedEntities += political + " ";
 String religion = fu.getReligion();
 log.debug("Religion : " + religion);
 if (religion != null)
 namedEntities += religion + " ";
 if (fu.getWebsite() != null) {
 String HTMLtext = URLUtils.getURLContent(user.getFacebook().getFacebookUser()
 .getWebsite());
 if (HTMLtext != null) {
 String parsedText = Jsoup.parse(HTMLtext).text();
 log.debug("Web Site Content: " + parsedText);
 // log.debug("Language detected: " + detector.detectLang(parsedText));
 ArrayList<String> result = NERUtils.getNamedEntities(parsedText,
 serializedClassifier);
 namedEntities += result.get(1);
 bagOfWords += result.get(0) + " ";
 }
 }
 ArrayList<FacebookDocument> pages = (ArrayList<FacebookDocument>) MongoUtils
 .getDocumentsByFacebookID(dColl, user.getFacebook().getFacebookUser()
 .getFacebookPages_id());
 for (FacebookDocument doc : pages) {
 FacebookPage page = (FacebookPage) doc;
 log.debug("Page: " + page.getFacebook_id() + " " + page.getName());
 if (page.getCategory() != null)
 bagOfWords += page.getCategory() + " ";
 if (page.getDescription() != null) {
 String[] strings = page.getDescription().split("\\n");
 for (String string : strings) {
 log.debug("Page description: " + string);
 String lang = detector.detectLang(string);
 log.debug("Language detected: " + lang);
 if ("en".equals(lang)) {
 string = Jsoup.parse(string).text();
 ArrayList<String> result = NERUtils.getNamedEntities(string,
 serializedClassifier);
 namedEntities += result.get(1);
 bagOfWords += result.get(0) + " ";
 }
 }
 }
 String name = page.getName();
 if (name != null) {
 log.debug("Page name: " + name);
 String lang = detector.detectLang(name);
 log.debug("Language detected: " + lang);
 ArrayList<String> result = NERUtils.getNamedEntities(name,
 serializedClassifier);
 if ("en".equals(lang)) {
 namedEntities += result.get(1);
 bagOfWords += result.get(0) + " ";
 } else {
 namedEntities += result.get(1);
 }
 }
 HashMap<String, String> others = page.getOthers();
 for (String key : others.keySet()) {
 try {
 log.debug("Key: " + key + " Value: " + others.get(key));
 if (key.equals("location")) {
 try {
 ObjectMapper mapper = new ObjectMapper();
 JsonNode location = mapper
 .readValue(others.get(key), JsonNode.class);
 if (location.get("state") != null)
 namedEntities += location.get("state").getTextValue() + " ";
 if (location.get("country") != null)
 namedEntities += location.get("country").getTextValue() + " ";
 if (location.get("city") != null)
 namedEntities += location.get("city").getTextValue() + " ";
 } catch (Exception e) {
 log.debug("Parsing Error");
 }
 } else if (!key.equals("founded") && !key.equals("is_published")
 && !key.equals("hours") && !key.equals("username")) {
 String[] strings = others.get(key).split("\\n");
 for (String string : strings) {
 String lang = detector.detectLang(string);
 log.debug("Language detected: " + lang);
 if ("en".equals(lang)) {
 string = Jsoup.parse(string).text();
 ArrayList<String> result = NERUtils.getNamedEntities(string,
 serializedClassifier);
 namedEntities += result.get(1);
 bagOfWords += result.get(0) + " ";
 }
 }
 }
 } catch (NullPointerException e) {
 e.printStackTrace();
 }
 }
 }
 ArrayList<FacebookDocument> groups = (ArrayList<FacebookDocument>) MongoUtils
 .getDocumentsByFacebookID(dColl, user.getFacebook().getFacebookUser()
 .getFacebookGroups_id());
 for (FacebookDocument doc : groups) {
 FacebookGroup group = (FacebookGroup) doc;
 log.debug("Group: " + group.getFacebook_id() + " " + group.getName());
 if (group.getDescription() != null) {
 String[] strings = group.getDescription().split("\\n");
 for (String string : strings) {
 log.debug("Group description: " + string);
 String lang = detector.detectLang(string);
 log.debug("Language detected: " + lang);
 if ("en".equals(lang)) {
 string = Jsoup.parse(string).text();
 ArrayList<String> result = NERUtils.getNamedEntities(string,
 serializedClassifier);
 namedEntities += result.get(1);
 bagOfWords += result.get(0) + " ";
 }
 }
 }
 String name = group.getName();
 if (name != null) {
 log.debug("Group name: " + name);
 String lang = detector.detectLang(name);
 log.debug("Language detected: " + lang);
 ArrayList<String> result = NERUtils.getNamedEntities(name,
 serializedClassifier);
 if ("en".equals(lang)) {
 namedEntities += result.get(1);
 bagOfWords += result.get(0) + " ";
 } else {
 namedEntities += result.get(1);
 }
 }
 if (group.getDiscussions() != null) {
 String discussion = "";
 for (FacebookDiscussion disc : group.getDiscussions()) {
 String message = disc.getMessage();
 log.debug("Discussion: " + message);
 if (message != null && "en".equals(detector.detectLang(message))) {
 discussion += message + ".\n";
 }
 if (disc.getComments() != null) {
 for (String comment : disc.getComments()) {
 log.debug("Comment:" + comment);
 if ("en".equals(detector.detectLang(comment))) {
 discussion += comment + ".\n";
 }
 }
 }
 }
 ArrayList<String> result = NERUtils.getNamedEntities(discussion,
 serializedClassifier);
 namedEntities += result.get(1);
 bagOfWords += result.get(0) + " ";
 }
 }
 log.debug("NamedEntities: " + namedEntities);
 log.debug("bagOfWords: " + bagOfWords);
 SolrInputDocument doc1 = new SolrInputDocument();
 doc1.addField("id", user.getFacebook().getFacebookID(), 1.0f);
 doc1.addField("name", user.getFacebook().getFacebookUser().getFirstName() + " "
 + user.getFacebook().getFacebookUser().getLastName(), 1.0f);
 doc1.addField("bagofwords", bagOfWords, 1.0f);
 doc1.addField("namedentities", namedEntities);
 usersProfiles.add(doc1);
 try {
 server.add(usersProfiles);
 server.commit();
 user.setIndexed(true);
 MongoUtils.updateUserData(uColl, user);
 } catch (SolrServerException e) {
 // TODO Auto-generated catch block
 e.printStackTrace();
 log.debug("Not committed");
 }
 i++;
 log.debug("Indexed Profile Number:" + i);
 }
}
asked Feb 15, 2012 at 18:02
\$\endgroup\$
5
  • \$\begingroup\$ No, it's not normal to get more CPU usage than the machine can provide (200-300%?). And I don't know about anything else, but please break this up into smaller methods - among other things, it'll allow more targeted profiling. \$\endgroup\$ Commented Feb 15, 2012 at 21:38
  • \$\begingroup\$ @X-Zero: It's normal if you have more than one CPU core. \$\endgroup\$ Commented Feb 15, 2012 at 22:26
  • \$\begingroup\$ @palacsint - On windows at least, total output always maxes out at 100%. Two cores would max out at 50% (of total) each. \$\endgroup\$ Commented Feb 15, 2012 at 22:45
  • \$\begingroup\$ @X-Zero: Yes, you're right. Macbook Pro also could run Windows, but top usually a Unix/Linux command :-) \$\endgroup\$ Commented Feb 15, 2012 at 22:59
  • \$\begingroup\$ yes it's normal on unix, but i'm asking if (in your opinion) it is normal for this kind of code \$\endgroup\$ Commented Feb 16, 2012 at 9:57

1 Answer 1

3
\$\begingroup\$

Some tips:

  1. Use StringBuilder instead of string concatenation. (namedEntities, bagOfWords)

  2. Maybe you should close the cursor at the end of the method.

Some other notes:

  1. Handle exceptions. Why don't you log them with the log object as well? Or should they have proper handling?

  2. Do not reuse variables. The string variable in the following snippet is confusing:

    for (String string : strings) {
     log.debug("Group description: " + string);
     String lang = detector.detectLang(string);
     log.debug("Language detected: " + lang);
     if ("en".equals(lang)) {
     string = Jsoup.parse(string).text();
     ArrayList<String> result = NERUtils.getNamedEntities(string,
     serializedClassifier);
     namedEntities += result.get(1);
     bagOfWords += result.get(0) + " ";
     }
    }
    

    Declare a new variable inside the condition:

    final String string = Jsoup.parse(string).text();
    
  3. Use meaningful variable names. string is not readable as a variable name.

    String[] strings = others.get(key).split("\\n");
    for (String string : strings) {
    

    What does it store? It could be keyLine, for example.

  4. As @X-Zero already mentioned, extract out some methods with descriptive names. It would improve readability a lot.

  5. ArrayList<String> result ...
    

    should be

    List<String> result ...
    

    Type List vs type ArrayList in Java

answered Feb 15, 2012 at 23:04
\$\endgroup\$
0

Your Answer

Draft saved
Draft discarded

Sign up or log in

Sign up using Google
Sign up using Email and Password

Post as a guest

Required, but never shown

Post as a guest

Required, but never shown

By clicking "Post Your Answer", you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.