001///////////////////////////////////////////////////////////////////////////////////////////////
002// checkstyle: Checks Java source code and other text files for adherence to a set of rules.
003// Copyright (C) 2001-2025 the original author or authors.
004//
005// This library is free software; you can redistribute it and/or
006// modify it under the terms of the GNU Lesser General Public
007// License as published by the Free Software Foundation; either
008// version 2.1 of the License, or (at your option) any later version.
009//
010// This library is distributed in the hope that it will be useful,
011// but WITHOUT ANY WARRANTY; without even the implied warranty of
012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
013// Lesser General Public License for more details.
014//
015// You should have received a copy of the GNU Lesser General Public
016// License along with this library; if not, write to the Free Software
017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
018///////////////////////////////////////////////////////////////////////////////////////////////
019
020package com.puppycrawl.tools.checkstyle.api;
021
022import java.io.BufferedReader;
023import java.io.File;
024import java.io.FileNotFoundException;
025import java.io.IOException;
026import java.io.InputStream;
027import java.io.InputStreamReader;
028import java.io.Reader;
029import java.io.StringReader;
030import java.nio.charset.Charset;
031import java.nio.charset.CharsetDecoder;
032import java.nio.charset.CodingErrorAction;
033import java.nio.charset.UnsupportedCharsetException;
034import java.nio.file.Files;
035import java.util.ArrayList;
036import java.util.Arrays;
037import java.util.List;
038import java.util.regex.Matcher;
039import java.util.regex.Pattern;
040
041import com.puppycrawl.tools.checkstyle.utils.CommonUtil;
042
043/**
044 * Represents the text contents of a file of arbitrary plain text type.
045 *
046 * <p>
047 * This class will be passed to instances of class FileSetCheck by
048 * Checker.
049 * </p>
050 *
051 */
052public final class FileText {
053
054 /**
055 * The number of characters to read in one go.
056 */
057 private static final int READ_BUFFER_SIZE = 1024;
058
059 /**
060 * Regular expression pattern matching all line terminators.
061 */
062 private static final Pattern LINE_TERMINATOR = Pattern.compile("\\n|\\r\\n?");
063
064 // For now, we always keep both full text and lines array.
065 // In the long run, however, the one passed at initialization might be
066 // enough, while the other could be lazily created when requested.
067 // This would save memory but cost CPU cycles.
068
069 /**
070 * The name of the file.
071 * {@code null} if no file name is available for whatever reason.
072 */
073 private final File file;
074
075 /**
076 * The charset used to read the file.
077 * {@code null} if the file was reconstructed from a list of lines.
078 */
079 private final Charset charset;
080
081 /**
082 * The lines of the file, without terminators.
083 */
084 private final String[] lines;
085
086 /**
087 * The full text contents of the file.
088 *
089 * @noinspection FieldMayBeFinal
090 * @noinspectionreason FieldMayBeFinal - field is not final to ease reaching full test coverage.
091 */
092 private String fullText;
093
094 /**
095 * The first position of each line within the full text.
096 */
097 private int[] lineBreaks;
098
099 /**
100 * Copy constructor.
101 *
102 * @param fileText to make copy of
103 */
104 public FileText(FileText fileText) {
105 file = fileText.file;
106 charset = fileText.charset;
107 fullText = fileText.fullText;
108 lines = fileText.lines.clone();
109 if (fileText.lineBreaks != null) {
110 lineBreaks = fileText.lineBreaks.clone();
111 }
112 }
113
114 /**
115 * Compatibility constructor.
116 *
117 * <p>This constructor reconstructs the text of the file by joining
118 * lines with linefeed characters. This process does not restore
119 * the original line terminators and should therefore be avoided.
120 *
121 * @param file the name of the file
122 * @param lines the lines of the text, without terminators
123 * @throws NullPointerException if the lines array is null
124 */
125 public FileText(File file, List<String> lines) {
126 final StringBuilder buf = new StringBuilder(1024);
127 for (final String line : lines) {
128 buf.append(line).append('\n');
129 }
130
131 this.file = file;
132 charset = null;
133 fullText = buf.toString();
134 this.lines = lines.toArray(CommonUtil.EMPTY_STRING_ARRAY);
135 }
136
137 /**
138 * Creates a new file text representation.
139 *
140 * <p>The file will be read using the specified encoding, replacing
141 * malformed input and unmappable characters with the default
142 * replacement character.
143 *
144 * @param file the name of the file
145 * @param charsetName the encoding to use when reading the file
146 * @throws NullPointerException if the text is null
147 * @throws IllegalStateException if the charset is not supported.
148 * @throws IOException if the file could not be read
149 */
150 public FileText(File file, String charsetName) throws IOException {
151 this.file = file;
152
153 // We use our own decoder, to be sure we have complete control
154 // about replacements.
155 final CharsetDecoder decoder;
156 try {
157 charset = Charset.forName(charsetName);
158 decoder = charset.newDecoder();
159 decoder.onMalformedInput(CodingErrorAction.REPLACE);
160 decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
161 }
162 catch (final UnsupportedCharsetException exc) {
163 final String message = "Unsupported charset: " + charsetName;
164 throw new IllegalStateException(message, exc);
165 }
166
167 fullText = readFile(file, decoder);
168
169 // Use the BufferedReader to break down the lines as this
170 // is about 30% faster than using the
171 // LINE_TERMINATOR.split(fullText, -1) method
172 try (BufferedReader reader = new BufferedReader(new StringReader(fullText))) {
173 final ArrayList<String> textLines = new ArrayList<>();
174 while (true) {
175 final String line = reader.readLine();
176 if (line == null) {
177 break;
178 }
179 textLines.add(line);
180 }
181 lines = textLines.toArray(CommonUtil.EMPTY_STRING_ARRAY);
182 }
183 }
184
185 /**
186 * Reads file using specific decoder and returns all its content as a String.
187 *
188 * @param inputFile File to read
189 * @param decoder Charset decoder
190 * @return File's text
191 * @throws IOException Unable to open or read the file
192 * @throws FileNotFoundException when inputFile does not exist
193 */
194 private static String readFile(final File inputFile, final CharsetDecoder decoder)
195 throws IOException {
196 if (!inputFile.exists()) {
197 throw new FileNotFoundException(inputFile.getPath() + " (No such file or directory)");
198 }
199 final StringBuilder buf = new StringBuilder(1024);
200 final InputStream stream = Files.newInputStream(inputFile.toPath());
201 try (Reader reader = new InputStreamReader(stream, decoder)) {
202 final char[] chars = new char[READ_BUFFER_SIZE];
203 while (true) {
204 final int len = reader.read(chars);
205 if (len == -1) {
206 break;
207 }
208 buf.append(chars, 0, len);
209 }
210 }
211 return buf.toString();
212 }
213
214 /**
215 * Retrieves a line of the text by its number.
216 * The returned line will not contain a trailing terminator.
217 *
218 * @param lineNo the number of the line to get, starting at zero
219 * @return the line with the given number
220 */
221 public String get(final int lineNo) {
222 return lines[lineNo];
223 }
224
225 /**
226 * Get the name of the file.
227 *
228 * @return an object containing the name of the file
229 */
230 public File getFile() {
231 return file;
232 }
233
234 /**
235 * Get the character set which was used to read the file.
236 * Will be {@code null} for a file reconstructed from its lines.
237 *
238 * @return the charset used when the file was read
239 */
240 public Charset getCharset() {
241 return charset;
242 }
243
244 /**
245 * Retrieve the full text of the file.
246 *
247 * @return the full text of the file
248 */
249 public CharSequence getFullText() {
250 return fullText;
251 }
252
253 /**
254 * Returns an array of all lines.
255 * {@code text.toLinesArray()} is equivalent to
256 * {@code text.toArray(new String[text.size()])}.
257 *
258 * @return an array of all lines of the text
259 */
260 public String[] toLinesArray() {
261 return lines.clone();
262 }
263
264 /**
265 * Determine line and column numbers in full text.
266 *
267 * @param pos the character position in the full text
268 * @return the line and column numbers of this character
269 */
270 public LineColumn lineColumn(int pos) {
271 final int[] lineBreakPositions = findLineBreaks();
272 int lineNo = Arrays.binarySearch(lineBreakPositions, pos);
273 if (lineNo < 0) {
274 // we have: lineNo = -(insertion point) - 1
275 // we want: lineNo = (insertion point) - 1
276 lineNo = -lineNo - 2;
277 }
278 final int startOfLine = lineBreakPositions[lineNo];
279 final int columnNo = pos - startOfLine;
280 // now we have lineNo and columnNo, both starting at zero.
281 return new LineColumn(lineNo + 1, columnNo);
282 }
283
284 /**
285 * Find positions of line breaks in the full text.
286 *
287 * @return an array giving the first positions of each line.
288 */
289 private int[] findLineBreaks() {
290 if (lineBreaks == null) {
291 final int[] lineBreakPositions = new int[size() + 1];
292 lineBreakPositions[0] = 0;
293 int lineNo = 1;
294 final Matcher matcher = LINE_TERMINATOR.matcher(fullText);
295 while (matcher.find()) {
296 lineBreakPositions[lineNo] = matcher.end();
297 lineNo++;
298 }
299 if (lineNo < lineBreakPositions.length) {
300 lineBreakPositions[lineNo] = fullText.length();
301 }
302 lineBreaks = lineBreakPositions;
303 }
304 return lineBreaks;
305 }
306
307 /**
308 * Counts the lines of the text.
309 *
310 * @return the number of lines in the text
311 */
312 public int size() {
313 return lines.length;
314 }
315
316}