Became Hot Network Question
Get histogram of bytes in any set of files in Java - take II
Intro
This post is the continuation of Get histogram of bytes in any set of files in Java. This time, I have incorporated all (?) the suggestions provided by Alexander Ivanchenko.
The entire repository is in GitHub.
Code
com.github.coderodde.file.util.ByteHistogramApp.java:
package com.github.coderodde.file.util;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
/**
* This class implements a program for counting byte histograms in files.
*
* @version 1.0.0 (Nov 13, 2024)
* @since 1.0.0 (Nov 13, 2024)
*/
public final class ByteHistogramApp {
public static void main(String[] args) {
List<InputStream> inputStreamList = null;
// Prepare the input streams from which to build the (shared) byte
// histogram:
try {
inputStreamList = getInputStreams(args);
} catch (final PairException ex) {
ex.getFileNotFoundException()
.getExceptionList()
.forEach((e) -> System.err.println(e.getMessage()));
ex.getIOException()
.getExceptionList()
.forEach((e) -> System.err.println(e.getMessage()));
System.exit(-1);
}
// Once here, we have valid input streams. Request the histogram and
// print it in the console:
try {
System.out.println(processInputStreamList(inputStreamList));
} catch (MultipleIOException ex) {
ex.getExceptionList()
.forEach((e) -> System.err.println(e.getMessage()));
System.exit(-2);
}
}
/**
* Converts the input argument list to the list of input streams.
*
* @param args the names of the files to process.
*
* @return the input stream list.
*
* @throws MultipleFileNotFoundException if any file failed.
*/
private static List<InputStream> getInputStreams(final String[] args)
throws PairException {
if (args.length == 0) {
return List.of(System.in);
}
final List<InputStream> inputStreamList = new ArrayList<>(args.length);
final MultipleFileNotFoundException exceptionListFileNotFound =
new MultipleFileNotFoundException();
final MultipleIOException exceptionListIO =
new MultipleIOException();
for (final String fileName : args) {
try {
final InputStream is = new FileInputStream(new File(fileName));
inputStreamList.add(is);
} catch (final FileNotFoundException ex) {
// Add the exception ex to the exceptionList:
exceptionListFileNotFound.add(ex);
for (final InputStream is : inputStreamList) {
try {
is.close();
} catch (final IOException ioException) {
exceptionListIO.add(ioException);
}
}
}
}
if (!exceptionListIO.isEmpty() ||
!exceptionListFileNotFound.isEmpty()) {
// Once here, something went wrong. Throw:
throw new PairException(exceptionListFileNotFound,
exceptionListIO);
}
return inputStreamList;
}
/**
* Builds the shared histogram from the input streams in the argument.
*
* @param inputStreamList the list of input stream supplying the bytes.
*
* @return the shared byte histogram.
*
* @throws MultipleIOException if any stream threw.
*/
private static ByteHistogram
processInputStreamList(final List<InputStream> inputStreamList)
throws MultipleIOException {
final ByteHistogram histogram = new ByteHistogram();
final MultipleIOException ex = new MultipleIOException();
for (final InputStream is : inputStreamList) {
try {
processInputStream(
new BufferedInputStream(is),
histogram);
} catch (final IOException e) {
// Add the new I/O exception e to ex::
ex.add(e);
}
}
if (!ex.isEmpty()) {
// Once here, something went wrong. Throw:
throw ex;
}
return histogram;
}
/**
* Processes the input stream reading bytes from it until end of file is
* reached.
*
* @param is the input stream.
* @param histogram the target histogram.
*
* @throws IOException if I/O fails.
*/
private static void processInputStream(final InputStream is,
final ByteHistogram histogram)
throws IOException {
int i;
while ((i = is.read()) != -1) {
histogram.insert(i);
}
is.close();
}
}
com.github.coderodde.file.util.ByteHistogram.java:
package com.github.coderodde.file.util;
/**
* This class implements the byte histogram.
*
* @version 1.0.0 (Nov 13, 2024)
* @since 1.0.0 (Nov 13, 2024)
*/
public final class ByteHistogram {
private static final int HISTOGRAM_CAPACITY = 256;
private static final int SCREEN_WIDTH = 80;
private static final int LINE_PREAMBLE_WIDTH = 11;
private final long[] data = new long[HISTOGRAM_CAPACITY];
/**
* Account the byte {@code b}.
*
* @param b the byte to account.
*/
public void insert(final int b) {
data[b]++;
}
/**
* Converts this byte histogram to an ASCII art.
*
* @return ASCII art version of this byte histogram.
*/
@Override
public String toString() {
final StringBuilder sb = new StringBuilder();
final long maximumCount = computeMaximumCount();
final int countStringLength =
computeCountStringLength(maximumCount);
final String lineFormat = getLineFormat(countStringLength);
for (int i = 0; i < data.length; i++) {
loadLine(sb,
lineFormat,
countStringLength,
i,
maximumCount);
}
return sb.toString();
}
/**
* Builds the format for printing the lines in the output.
*
* @param countStringLength the count string length in characters.
*
* @return the format for printing the lines in the output.
*/
private static String getLineFormat(final int countStringLength) {
return String.format("0x%%02x [%%c]: %% %dd %%s\n", countStringLength);
}
/**
* Loads a single line to the total output of this byte histogram.
*
* @param sb the string builder.
* @param lineFormat the format of the line.
* @param countStringLength the length of the count string.
* @param index the byte index.
* @param maximumCount the maximum count in the histogram.
*/
private void loadLine(final StringBuilder sb,
final String lineFormat,
final int countStringLength,
final int index,
final long maximumCount) {
sb.append(
String.format(
lineFormat,
index,
!Character.isISOControl((char) index) ?
(char) index :
'?',
data[index],
computeBarAscii(data[index],
maximumCount,
countStringLength)));
}
/**
* Computes and returns the bar ASCII art.
*
* @param count the count of the line we are processing.
* @param maximumCount the maximum count in the byte histogram.
* @param countStringLength the count string length.
*
* @return the bar ASCII art.
*/
private static String computeBarAscii(final long count,
final long maximumCount,
final int countStringLength) {
final float ratio = ((float) count) / ((float) maximumCount);
final int maximumBarLength = SCREEN_WIDTH
- LINE_PREAMBLE_WIDTH
- countStringLength;
final int barLength = (int)(ratio * maximumBarLength);
final StringBuilder sb = new StringBuilder(barLength);
for (int i = 0; i < barLength; i++) {
sb.append("*");
}
return sb.toString();
}
/**
* Computes the maximum count in this byte histogram.
*
* @return the maximum count.
*/
private long computeMaximumCount() {
long m = 0L;
for (final long count : data) {
m = Math.max(m, count);
}
return m;
}
/**
* Computes and returns the length of the widest length string.
*
* @param maximumLength the maximum length of the byte histogram.
*
* @return the widest length of the count string in characters.
*/
private static int computeCountStringLength(final long maximumLength) {
return Long.toString(maximumLength).length();
}
}
Typical output
C:\Users\rodio\OneDrive\Documents\NetBeansProjects\ByteHistogram.java\target\classes>echo Hello World | java com.github.coderodde.file.util.ByteHistogramApp
0x00 [?]: 0
0x01 [?]: 0
0x02 [?]: 0
0x03 [?]: 0
0x04 [?]: 0
0x05 [?]: 0
0x06 [?]: 0
0x07 [?]: 0
0x08 [?]: 0
0x09 [?]: 0
0x0a [?]: 1 **********************
0x0b [?]: 0
0x0c [?]: 0
0x0d [?]: 1 **********************
0x0e [?]: 0
0x0f [?]: 0
0x10 [?]: 0
0x11 [?]: 0
0x12 [?]: 0
0x13 [?]: 0
0x14 [?]: 0
0x15 [?]: 0
0x16 [?]: 0
0x17 [?]: 0
0x18 [?]: 0
0x19 [?]: 0
0x1a [?]: 0
0x1b [?]: 0
0x1c [?]: 0
0x1d [?]: 0
0x1e [?]: 0
0x1f [?]: 0
0x20 [ ]: 2 *********************************************
0x21 [!]: 0
0x22 ["]: 0
0x23 [#]: 0
0x24 [$]: 0
0x25 [%]: 0
0x26 [&]: 0
0x27 [']: 0
0x28 [(]: 0
0x29 [)]: 0
0x2a [*]: 0
0x2b [+]: 0
0x2c [,]: 0
0x2d [-]: 0
0x2e [.]: 0
0x2f [/]: 0
0x30 [0]: 0
0x31 [1]: 0
0x32 [2]: 0
0x33 [3]: 0
0x34 [4]: 0
0x35 [5]: 0
0x36 [6]: 0
0x37 [7]: 0
0x38 [8]: 0
0x39 [9]: 0
0x3a [:]: 0
0x3b [;]: 0
0x3c [<]: 0
0x3d [=]: 0
0x3e [>]: 0
0x3f [?]: 0
0x40 [@]: 0
0x41 [A]: 0
0x42 [B]: 0
0x43 [C]: 0
0x44 [D]: 0
0x45 [E]: 0
0x46 [F]: 0
0x47 [G]: 0
0x48 [H]: 1 **********************
0x49 [I]: 0
0x4a [J]: 0
0x4b [K]: 0
0x4c [L]: 0
0x4d [M]: 0
0x4e [N]: 0
0x4f [O]: 0
0x50 [P]: 0
0x51 [Q]: 0
0x52 [R]: 0
0x53 [S]: 0
0x54 [T]: 0
0x55 [U]: 0
0x56 [V]: 0
0x57 [W]: 1 **********************
0x58 [X]: 0
0x59 [Y]: 0
0x5a [Z]: 0
0x5b [[]: 0
0x5c [\]: 0
0x5d []]: 0
0x5e [^]: 0
0x5f [_]: 0
0x60 [`]: 0
0x61 [a]: 0
0x62 [b]: 0
0x63 [c]: 0
0x64 [d]: 1 **********************
0x65 [e]: 1 **********************
0x66 [f]: 0
0x67 [g]: 0
0x68 [h]: 0
0x69 [i]: 0
0x6a [j]: 0
0x6b [k]: 0
0x6c [l]: 3 ********************************************************************
0x6d [m]: 0
0x6e [n]: 0
0x6f [o]: 2 *********************************************
0x70 [p]: 0
0x71 [q]: 0
0x72 [r]: 1 **********************
0x73 [s]: 0
0x74 [t]: 0
0x75 [u]: 0
0x76 [v]: 0
0x77 [w]: 0
0x78 [x]: 0
0x79 [y]: 0
0x7a [z]: 0
0x7b [{]: 0
0x7c [|]: 0
0x7d [}]: 0
0x7e [~]: 0
0x7f [?]: 0
0x80 [?]: 0
0x81 [?]: 0
0x82 [?]: 0
0x83 [?]: 0
0x84 [?]: 0
0x85 [?]: 0
0x86 [?]: 0
0x87 [?]: 0
0x88 [?]: 0
0x89 [?]: 0
0x8a [?]: 0
0x8b [?]: 0
0x8c [?]: 0
0x8d [?]: 0
0x8e [?]: 0
0x8f [?]: 0
0x90 [?]: 0
0x91 [?]: 0
0x92 [?]: 0
0x93 [?]: 0
0x94 [?]: 0
0x95 [?]: 0
0x96 [?]: 0
0x97 [?]: 0
0x98 [?]: 0
0x99 [?]: 0
0x9a [?]: 0
0x9b [?]: 0
0x9c [?]: 0
0x9d [?]: 0
0x9e [?]: 0
0x9f [?]: 0
0xa0 [ ]: 0
0xa1 [¡]: 0
0xa2 [¢]: 0
0xa3 [£]: 0
0xa4 [¤]: 0
0xa5 [\]: 0
0xa6 [¦]: 0
0xa7 [§]: 0
0xa8 [ ̈]: 0
0xa9 [©]: 0
0xaa [a]: 0
0xab [«]: 0
0xac [¬]: 0
0xad []: 0
0xae [®]: 0
0xaf [ ̄]: 0
0xb0 [°]: 0
0xb1 [±]: 0
0xb2 [2]: 0
0xb3 [3]: 0
0xb4 [ ́]: 0
0xb5 [μ]: 0
0xb6 [¶]: 0
0xb7 [·]: 0
0xb8 [ ̧]: 0
0xb9 [1]: 0
0xba [o]: 0
0xbb [»]: 0
0xbc [1⁄4]: 0
0xbd [1⁄2]: 0
0xbe [3⁄4]: 0
0xbf [¿]: 0
0xc0 [À]: 0
0xc1 [Á]: 0
0xc2 [Â]: 0
0xc3 [Ã]: 0
0xc4 [Ä]: 0
0xc5 [Å]: 0
0xc6 [Æ]: 0
0xc7 [Ç]: 0
0xc8 [È]: 0
0xc9 [É]: 0
0xca [Ê]: 0
0xcb [Ë]: 0
0xcc [Ì]: 0
0xcd [Í]: 0
0xce [Î]: 0
0xcf [Ï]: 0
0xd0 [Ð]: 0
0xd1 [Ñ]: 0
0xd2 [Ò]: 0
0xd3 [Ó]: 0
0xd4 [Ô]: 0
0xd5 [Õ]: 0
0xd6 [Ö]: 0
0xd7 ×ばつ]: 0
0xd8 [Ø]: 0
0xd9 [Ù]: 0
0xda [Ú]: 0
0xdb [Û]: 0
0xdc [Ü]: 0
0xdd [Ý]: 0
0xde [Þ]: 0
0xdf [ß]: 0
0xe0 [à]: 0
0xe1 [á]: 0
0xe2 [â]: 0
0xe3 [ã]: 0
0xe4 [ä]: 0
0xe5 [å]: 0
0xe6 [æ]: 0
0xe7 [ç]: 0
0xe8 [è]: 0
0xe9 [é]: 0
0xea [ê]: 0
0xeb [ë]: 0
0xec [ì]: 0
0xed [í]: 0
0xee [î]: 0
0xef [ï]: 0
0xf0 [ð]: 0
0xf1 [ñ]: 0
0xf2 [ò]: 0
0xf3 [ó]: 0
0xf4 [ô]: 0
0xf5 [õ]: 0
0xf6 [ö]: 0
0xf7 [÷]: 0
0xf8 [ø]: 0
0xf9 [ù]: 0
0xfa [ú]: 0
0xfb [û]: 0
0xfc [ü]: 0
0xfd [ý]: 0
0xfe [þ]: 0
0xff [ÿ]: 0
Critique request
Please tell me anything that comes to mind.
lang-java