Skip to main content
Code Review

Return to Question

edited tags
Link
Toby Speight
  • 87.7k
  • 14
  • 104
  • 325
added 3 characters in body
Source Link

The entire repository is inon my GitHub.

The entire repository is in GitHub.

The entire repository is on my GitHub.

Became Hot Network Question
Source Link
coderodde
  • 31.7k
  • 15
  • 77
  • 202

Get histogram of bytes in any set of files in Java - take II

Intro

This post is the continuation of Get histogram of bytes in any set of files in Java. This time, I have incorporated all (?) the suggestions provided by Alexander Ivanchenko.

The entire repository is in GitHub.

Code

com.github.coderodde.file.util.ByteHistogramApp.java:

package com.github.coderodde.file.util;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
/**
 * This class implements a program for counting byte histograms in files.
 * 
 * @version 1.0.0 (Nov 13, 2024)
 * @since 1.0.0 (Nov 13, 2024)
 */
public final class ByteHistogramApp {
 public static void main(String[] args) {
 List<InputStream> inputStreamList = null;
 
 // Prepare the input streams from which to build the (shared) byte 
 // histogram:
 try {
 inputStreamList = getInputStreams(args);
 } catch (final PairException ex) {
 
 ex.getFileNotFoundException()
 .getExceptionList()
 .forEach((e) -> System.err.println(e.getMessage()));
 
 ex.getIOException()
 .getExceptionList()
 .forEach((e) -> System.err.println(e.getMessage()));
 
 System.exit(-1);
 }
 
 // Once here, we have valid input streams. Request the histogram and 
 // print it in the console:
 try {
 System.out.println(processInputStreamList(inputStreamList));
 } catch (MultipleIOException ex) {
 
 ex.getExceptionList()
 .forEach((e) -> System.err.println(e.getMessage()));
 
 System.exit(-2);
 }
 }
 
 /**
 * Converts the input argument list to the list of input streams.
 * 
 * @param args the names of the files to process.
 * 
 * @return the input stream list.
 * 
 * @throws MultipleFileNotFoundException if any file failed.
 */
 private static List<InputStream> getInputStreams(final String[] args)
 throws PairException {
 
 if (args.length == 0) {
 return List.of(System.in);
 }
 
 final List<InputStream> inputStreamList = new ArrayList<>(args.length);
 
 final MultipleFileNotFoundException exceptionListFileNotFound = 
 new MultipleFileNotFoundException();
 
 final MultipleIOException exceptionListIO =
 new MultipleIOException();
 
 for (final String fileName : args) {
 try {
 final InputStream is = new FileInputStream(new File(fileName));
 inputStreamList.add(is);
 } catch (final FileNotFoundException ex) {
 // Add the exception ex to the exceptionList:
 exceptionListFileNotFound.add(ex);
 
 for (final InputStream is : inputStreamList) {
 try {
 is.close();
 } catch (final IOException ioException) {
 exceptionListIO.add(ioException);
 }
 }
 }
 }
 
 if (!exceptionListIO.isEmpty() || 
 !exceptionListFileNotFound.isEmpty()) {
 // Once here, something went wrong. Throw:
 
 throw new PairException(exceptionListFileNotFound,
 exceptionListIO);
 }
 
 return inputStreamList;
 }
 
 /**
 * Builds the shared histogram from the input streams in the argument.
 * 
 * @param inputStreamList the list of input stream supplying the bytes.
 * 
 * @return the shared byte histogram.
 * 
 * @throws MultipleIOException if any stream threw.
 */
 private static ByteHistogram
 processInputStreamList(final List<InputStream> inputStreamList) 
 throws MultipleIOException {
 
 final ByteHistogram histogram = new ByteHistogram();
 final MultipleIOException ex = new MultipleIOException();
 
 for (final InputStream is : inputStreamList) {
 try {
 processInputStream(
 new BufferedInputStream(is), 
 histogram);
 
 } catch (final IOException e) {
 // Add the new I/O exception e to ex::
 ex.add(e);
 }
 }
 
 if (!ex.isEmpty()) {
 // Once here, something went wrong. Throw:
 throw ex;
 }
 
 return histogram;
 }
 
 /**
 * Processes the input stream reading bytes from it until end of file is 
 * reached.
 * 
 * @param is the input stream.
 * @param histogram the target histogram.
 * 
 * @throws IOException if I/O fails.
 */
 private static void processInputStream(final InputStream is,
 final ByteHistogram histogram) 
 throws IOException {
 
 int i;
 
 while ((i = is.read()) != -1) {
 histogram.insert(i);
 }
 
 is.close();
 }
}

com.github.coderodde.file.util.ByteHistogram.java:

package com.github.coderodde.file.util;
/**
 * This class implements the byte histogram.
 * 
 * @version 1.0.0 (Nov 13, 2024)
 * @since 1.0.0 (Nov 13, 2024)
 */
public final class ByteHistogram {
 
 private static final int HISTOGRAM_CAPACITY = 256;
 private static final int SCREEN_WIDTH = 80;
 private static final int LINE_PREAMBLE_WIDTH = 11;
 
 private final long[] data = new long[HISTOGRAM_CAPACITY];
 
 /**
 * Account the byte {@code b}.
 * 
 * @param b the byte to account. 
 */
 public void insert(final int b) {
 data[b]++;
 }
 
 /**
 * Converts this byte histogram to an ASCII art.
 * 
 * @return ASCII art version of this byte histogram. 
 */
 @Override
 public String toString() {
 final StringBuilder sb = new StringBuilder();
 final long maximumCount = computeMaximumCount();
 final int countStringLength = 
 computeCountStringLength(maximumCount);
 
 final String lineFormat = getLineFormat(countStringLength);
 
 for (int i = 0; i < data.length; i++) {
 loadLine(sb,
 lineFormat,
 countStringLength,
 i,
 maximumCount);
 }
 
 return sb.toString();
 }
 
 /**
 * Builds the format for printing the lines in the output.
 * 
 * @param countStringLength the count string length in characters.
 * 
 * @return the format for printing the lines in the output.
 */
 private static String getLineFormat(final int countStringLength) {
 return String.format("0x%%02x [%%c]: %% %dd %%s\n", countStringLength);
 }
 
 /**
 * Loads a single line to the total output of this byte histogram.
 * 
 * @param sb the string builder.
 * @param lineFormat the format of the line.
 * @param countStringLength the length of the count string.
 * @param index the byte index.
 * @param maximumCount the maximum count in the histogram.
 */
 private void loadLine(final StringBuilder sb, 
 final String lineFormat,
 final int countStringLength,
 final int index, 
 final long maximumCount) {
 
 sb.append(
 String.format(
 lineFormat,
 index, 
 !Character.isISOControl((char) index) ?
 (char) index : 
 '?',
 data[index],
 computeBarAscii(data[index],
 maximumCount,
 countStringLength)));
 }
 
 /**
 * Computes and returns the bar ASCII art.
 * 
 * @param count the count of the line we are processing.
 * @param maximumCount the maximum count in the byte histogram.
 * @param countStringLength the count string length.
 * 
 * @return the bar ASCII art. 
 */
 private static String computeBarAscii(final long count,
 final long maximumCount,
 final int countStringLength) {
 
 final float ratio = ((float) count) / ((float) maximumCount);
 
 final int maximumBarLength = SCREEN_WIDTH
 - LINE_PREAMBLE_WIDTH 
 - countStringLength;
 
 final int barLength = (int)(ratio * maximumBarLength);
 
 final StringBuilder sb = new StringBuilder(barLength);
 
 for (int i = 0; i < barLength; i++) {
 sb.append("*");
 }
 
 return sb.toString();
 }
 
 /**
 * Computes the maximum count in this byte histogram.
 * 
 * @return the maximum count.
 */
 private long computeMaximumCount() {
 long m = 0L;
 
 for (final long count : data) {
 m = Math.max(m, count);
 }
 
 return m;
 }
 
 /**
 * Computes and returns the length of the widest length string.
 * 
 * @param maximumLength the maximum length of the byte histogram.
 * 
 * @return the widest length of the count string in characters.
 */
 private static int computeCountStringLength(final long maximumLength) {
 return Long.toString(maximumLength).length();
 }
}

Typical output

C:\Users\rodio\OneDrive\Documents\NetBeansProjects\ByteHistogram.java\target\classes>echo Hello World | java com.github.coderodde.file.util.ByteHistogramApp
0x00 [?]: 0
0x01 [?]: 0
0x02 [?]: 0
0x03 [?]: 0
0x04 [?]: 0
0x05 [?]: 0
0x06 [?]: 0
0x07 [?]: 0
0x08 [?]: 0
0x09 [?]: 0
0x0a [?]: 1 **********************
0x0b [?]: 0
0x0c [?]: 0
0x0d [?]: 1 **********************
0x0e [?]: 0
0x0f [?]: 0
0x10 [?]: 0
0x11 [?]: 0
0x12 [?]: 0
0x13 [?]: 0
0x14 [?]: 0
0x15 [?]: 0
0x16 [?]: 0
0x17 [?]: 0
0x18 [?]: 0
0x19 [?]: 0
0x1a [?]: 0
0x1b [?]: 0
0x1c [?]: 0
0x1d [?]: 0
0x1e [?]: 0
0x1f [?]: 0
0x20 [ ]: 2 *********************************************
0x21 [!]: 0
0x22 ["]: 0
0x23 [#]: 0
0x24 [$]: 0
0x25 [%]: 0
0x26 [&]: 0
0x27 [']: 0
0x28 [(]: 0
0x29 [)]: 0
0x2a [*]: 0
0x2b [+]: 0
0x2c [,]: 0
0x2d [-]: 0
0x2e [.]: 0
0x2f [/]: 0
0x30 [0]: 0
0x31 [1]: 0
0x32 [2]: 0
0x33 [3]: 0
0x34 [4]: 0
0x35 [5]: 0
0x36 [6]: 0
0x37 [7]: 0
0x38 [8]: 0
0x39 [9]: 0
0x3a [:]: 0
0x3b [;]: 0
0x3c [<]: 0
0x3d [=]: 0
0x3e [>]: 0
0x3f [?]: 0
0x40 [@]: 0
0x41 [A]: 0
0x42 [B]: 0
0x43 [C]: 0
0x44 [D]: 0
0x45 [E]: 0
0x46 [F]: 0
0x47 [G]: 0
0x48 [H]: 1 **********************
0x49 [I]: 0
0x4a [J]: 0
0x4b [K]: 0
0x4c [L]: 0
0x4d [M]: 0
0x4e [N]: 0
0x4f [O]: 0
0x50 [P]: 0
0x51 [Q]: 0
0x52 [R]: 0
0x53 [S]: 0
0x54 [T]: 0
0x55 [U]: 0
0x56 [V]: 0
0x57 [W]: 1 **********************
0x58 [X]: 0
0x59 [Y]: 0
0x5a [Z]: 0
0x5b [[]: 0
0x5c [\]: 0
0x5d []]: 0
0x5e [^]: 0
0x5f [_]: 0
0x60 [`]: 0
0x61 [a]: 0
0x62 [b]: 0
0x63 [c]: 0
0x64 [d]: 1 **********************
0x65 [e]: 1 **********************
0x66 [f]: 0
0x67 [g]: 0
0x68 [h]: 0
0x69 [i]: 0
0x6a [j]: 0
0x6b [k]: 0
0x6c [l]: 3 ********************************************************************
0x6d [m]: 0
0x6e [n]: 0
0x6f [o]: 2 *********************************************
0x70 [p]: 0
0x71 [q]: 0
0x72 [r]: 1 **********************
0x73 [s]: 0
0x74 [t]: 0
0x75 [u]: 0
0x76 [v]: 0
0x77 [w]: 0
0x78 [x]: 0
0x79 [y]: 0
0x7a [z]: 0
0x7b [{]: 0
0x7c [|]: 0
0x7d [}]: 0
0x7e [~]: 0
0x7f [?]: 0
0x80 [?]: 0
0x81 [?]: 0
0x82 [?]: 0
0x83 [?]: 0
0x84 [?]: 0
0x85 [?]: 0
0x86 [?]: 0
0x87 [?]: 0
0x88 [?]: 0
0x89 [?]: 0
0x8a [?]: 0
0x8b [?]: 0
0x8c [?]: 0
0x8d [?]: 0
0x8e [?]: 0
0x8f [?]: 0
0x90 [?]: 0
0x91 [?]: 0
0x92 [?]: 0
0x93 [?]: 0
0x94 [?]: 0
0x95 [?]: 0
0x96 [?]: 0
0x97 [?]: 0
0x98 [?]: 0
0x99 [?]: 0
0x9a [?]: 0
0x9b [?]: 0
0x9c [?]: 0
0x9d [?]: 0
0x9e [?]: 0
0x9f [?]: 0
0xa0 [ ]: 0
0xa1 [¡]: 0
0xa2 [¢]: 0
0xa3 [£]: 0
0xa4 [¤]: 0
0xa5 [\]: 0
0xa6 [¦]: 0
0xa7 [§]: 0
0xa8 [ ̈]: 0
0xa9 [©]: 0
0xaa [a]: 0
0xab [«]: 0
0xac [¬]: 0
0xad [­]: 0
0xae [®]: 0
0xaf [ ̄]: 0
0xb0 [°]: 0
0xb1 [±]: 0
0xb2 [2]: 0
0xb3 [3]: 0
0xb4 [ ́]: 0
0xb5 [μ]: 0
0xb6 [¶]: 0
0xb7 [·]: 0
0xb8 [ ̧]: 0
0xb9 [1]: 0
0xba [o]: 0
0xbb [»]: 0
0xbc [1⁄4]: 0
0xbd [1⁄2]: 0
0xbe [3⁄4]: 0
0xbf [¿]: 0
0xc0 [À]: 0
0xc1 [Á]: 0
0xc2 [Â]: 0
0xc3 [Ã]: 0
0xc4 [Ä]: 0
0xc5 [Å]: 0
0xc6 [Æ]: 0
0xc7 [Ç]: 0
0xc8 [È]: 0
0xc9 [É]: 0
0xca [Ê]: 0
0xcb [Ë]: 0
0xcc [Ì]: 0
0xcd [Í]: 0
0xce [Î]: 0
0xcf [Ï]: 0
0xd0 [Ð]: 0
0xd1 [Ñ]: 0
0xd2 [Ò]: 0
0xd3 [Ó]: 0
0xd4 [Ô]: 0
0xd5 [Õ]: 0
0xd6 [Ö]: 0
0xd7 ×ばつ]: 0
0xd8 [Ø]: 0
0xd9 [Ù]: 0
0xda [Ú]: 0
0xdb [Û]: 0
0xdc [Ü]: 0
0xdd [Ý]: 0
0xde [Þ]: 0
0xdf [ß]: 0
0xe0 [à]: 0
0xe1 [á]: 0
0xe2 [â]: 0
0xe3 [ã]: 0
0xe4 [ä]: 0
0xe5 [å]: 0
0xe6 [æ]: 0
0xe7 [ç]: 0
0xe8 [è]: 0
0xe9 [é]: 0
0xea [ê]: 0
0xeb [ë]: 0
0xec [ì]: 0
0xed [í]: 0
0xee [î]: 0
0xef [ï]: 0
0xf0 [ð]: 0
0xf1 [ñ]: 0
0xf2 [ò]: 0
0xf3 [ó]: 0
0xf4 [ô]: 0
0xf5 [õ]: 0
0xf6 [ö]: 0
0xf7 [÷]: 0
0xf8 [ø]: 0
0xf9 [ù]: 0
0xfa [ú]: 0
0xfb [û]: 0
0xfc [ü]: 0
0xfd [ý]: 0
0xfe [þ]: 0
0xff [ÿ]: 0

Critique request

Please tell me anything that comes to mind.

lang-java

AltStyle によって変換されたページ (->オリジナル) /