Java Utililty Methods HTML to String

List of utility methods to do HTML to String

  1. HOME
  2. Java
  3. H
  4. HTML to String

Description

The list of methods to do HTML to String are organized into topic(s).

Method

String html2Plain(String text)
html Plain
StringBuilder result = new StringBuilder();
for (int i = 0; i < text.length(); i++) {
 if (text.charAt(i) != '&' || i >= text.length() - 2) {
 result.append(text.charAt(i));
 } else {
 i++;
 if (text.charAt(i) != '#') {
 result.append('&');
...
String html2Text(String html)
Cuts all the html tags/comments/styles from the html-text and returns the only printable text.
StringBuilder sb = new StringBuilder();
int state = 0;
int styleStart = html.toLowerCase().indexOf("<style");
int styleEnd = html.toLowerCase().indexOf("/style>");
while (styleStart != -1 && styleEnd > styleStart) {
 html = html.substring(0, styleStart) + html.substring(styleEnd + "/style>".length());
 styleStart = html.toLowerCase().indexOf("<style");
 styleEnd = html.toLowerCase().indexOf("/style>");
...
String htmlToStr(String htmlStr, int max_count)
html To Str
String result = "";
boolean flag = true;
if (htmlStr == null) {
 return null;
char[] a = htmlStr.toCharArray();
int length = a.length;
for (int i = 0; i < length; i++) {
...
String htmlToString(String aS_Text)
html To String
if (aS_Text == null) {
 return null;
StringBuffer l_StringBuffer = new StringBuffer();
int li_length = aS_Text.length();
for (int li_index = 0; li_index < li_length; li_index++) {
 char l_char = aS_Text.charAt(li_index);
 switch (l_char) {
...
String htmlToString(String s)
Unescape HTML escape characters in String.
StringBuilder sb = new StringBuilder(s.length());
int len = s.length();
for (int i = 0; i < len; ++i) {
 char c = s.charAt(i);
 if (c != '&') {
 sb.append(c);
 } else {
 if (i + 2 < len && s.charAt(i + 1) == '#') { 
...
String htmlToString(String string)
html To String
String ans = string.replaceAll(""", "\"");
ans = ans.replaceAll("&", "&");
ans = ans.replaceAll("<", "<");
ans = ans.replaceAll(">", ">");
ans = ans.replaceAll("<.+?>", "");
return ans;
String htmlToText(String html)
Simply removes the <...> tags.
char ch = '\u0000';
int idx = 0;
int len = html.length();
StringBuffer sb = new StringBuffer();
for (int i = 0; i < len; i++) {
 ch = html.charAt(i);
 if (ch != '<') {
 sb.append(ch);
...
String htmlToText(String html)
html To Text
if (html != null) {
 html = html.replace("å", "\u00e5");
 html = html.replace("ä", "\u00e4");
 html = html.replace("ö", "\u00f6");
 html = html.replace("Å", "\u00c5");
 html = html.replace("Ä", "\u00c4");
 html = html.replace("Ö", "\u00d6");
return html;
String htmlToText(String input)
Converts HTML to plain text, according to the following rules:
  • Replaces any newlines or carriage returns in the source text with single spaces.
    if (input == null)
     input = "";
    input = input.replaceAll("[\r\n]+", " ");
    StringBuilder buf = new StringBuilder(input.trim());
    int openIdx = 0;
    while ((openIdx = buf.indexOf("<", openIdx)) >= 0) {
     int closeIdx = buf.indexOf(">", openIdx);
     if (closeIdx < 0) {
    ...
    
String htmlToText(String sHTML)
html To Text
String sText = "";
if (sHTML != null) {
 int i = 0;
 while (i < sHTML.length()) {
 char c = sHTML.charAt(i);
 switch (c) {
 case '<':
 i++;
...


AltStyle によって変換されたページ (->オリジナル) /