The list of methods to do HTML to String are organized into topic(s).
String
html2Plain(String text) html Plain
StringBuilder result = new StringBuilder();
for (int i = 0; i < text.length(); i++) {
if (text.charAt(i) != '&' || i >= text.length() - 2) {
result.append(text.charAt(i));
} else {
i++;
if (text.charAt(i) != '#') {
result.append('&');
...
String
html2Text(String html) Cuts all the html tags/comments/styles from the html-text and returns the only printable text.
StringBuilder sb = new StringBuilder();
int state = 0;
int styleStart = html.toLowerCase().indexOf("<style");
int styleEnd = html.toLowerCase().indexOf("/style>");
while (styleStart != -1 && styleEnd > styleStart) {
html = html.substring(0, styleStart) + html.substring(styleEnd + "/style>".length());
styleStart = html.toLowerCase().indexOf("<style");
styleEnd = html.toLowerCase().indexOf("/style>");
...
String
htmlToStr(String htmlStr, int max_count) html To Str
String result = "";
boolean flag = true;
if (htmlStr == null) {
return null;
char[] a = htmlStr.toCharArray();
int length = a.length;
for (int i = 0; i < length; i++) {
...
String
htmlToString(String aS_Text) html To String
if (aS_Text == null) {
return null;
StringBuffer l_StringBuffer = new StringBuffer();
int li_length = aS_Text.length();
for (int li_index = 0; li_index < li_length; li_index++) {
char l_char = aS_Text.charAt(li_index);
switch (l_char) {
...
String
htmlToString(String s) Unescape HTML escape characters in String.
StringBuilder sb = new StringBuilder(s.length());
int len = s.length();
for (int i = 0; i < len; ++i) {
char c = s.charAt(i);
if (c != '&') {
sb.append(c);
} else {
if (i + 2 < len && s.charAt(i + 1) == '#') {
...
String
htmlToString(String string) html To String
String ans = string.replaceAll(""", "\"");
ans = ans.replaceAll("&", "&");
ans = ans.replaceAll("<", "<");
ans = ans.replaceAll(">", ">");
ans = ans.replaceAll("<.+?>", "");
return ans;
String
htmlToText(String html) Simply removes the <...> tags.
char ch = '\u0000';
int idx = 0;
int len = html.length();
StringBuffer sb = new StringBuffer();
for (int i = 0; i < len; i++) {
ch = html.charAt(i);
if (ch != '<') {
sb.append(ch);
...
String
htmlToText(String html) html To Text
if (html != null) {
html = html.replace("å", "\u00e5");
html = html.replace("ä", "\u00e4");
html = html.replace("ö", "\u00f6");
html = html.replace("Å", "\u00c5");
html = html.replace("Ä", "\u00c4");
html = html.replace("Ö", "\u00d6");
return html;
String
htmlToText(String input) Converts HTML to plain text, according to the following rules:
- Replaces any newlines or carriage returns in the source text with single spaces.
if (input == null)
input = "";
input = input.replaceAll("[\r\n]+", " ");
StringBuilder buf = new StringBuilder(input.trim());
int openIdx = 0;
while ((openIdx = buf.indexOf("<", openIdx)) >= 0) {
int closeIdx = buf.indexOf(">", openIdx);
if (closeIdx < 0) {
...
String
htmlToText(String sHTML) html To Text
String sText = "";
if (sHTML != null) {
int i = 0;
while (i < sHTML.length()) {
char c = sHTML.charAt(i);
switch (c) {
case '<':
i++;
...