1//===-- DataExtractor.h -----------------------------------------*- C++ -*-===//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7//===----------------------------------------------------------------------===//
9#ifndef LLVM_SUPPORT_DATAEXTRACTOR_H
10#define LLVM_SUPPORT_DATAEXTRACTOR_H
19/// An auxiliary type to facilitate extraction of 3-byte entities.
25 int LoIx = IsLittleEndian ? 0 : 2;
31static_assert(
sizeof(
uint24_t) == 3,
"sizeof(uint24_t) != 3");
33/// Needed by swapByteOrder().
35 return uint24_t(
C.Bytes[2],
C.Bytes[1],
C.Bytes[0]);
43 /// A class representing a position in a DataExtractor, as well as any error
44 /// encountered during extraction. It enables one to extract a sequence of
45 /// values without error-checking and then checking for errors in bulk at the
46 /// end. The class holds an Error object, so failing to check the result of
47 /// the parse will result in a runtime error. The error flag is sticky and
48 /// will cause all subsequent extraction functions to fail without even
49 /// attempting to parse and without updating the Cursor offset. After clearing
50 /// the error flag, one can again use the Cursor object for parsing.
58 /// Construct a cursor for extraction from the given offset.
61 /// Checks whether the cursor is valid (i.e. no errors were encountered). In
62 /// case of errors, this does not clear the error flag -- one must call
63 /// takeError() instead.
64 explicit operator bool() {
return !Err; }
66 /// Return the current position of this Cursor. In the error state this is
67 /// the position of the Cursor before the first error was encountered.
70 /// Set the cursor to the new offset. This does not impact the error state.
73 /// Return error contained inside this Cursor, if any. Clears the internal
78 /// Construct with a buffer that is owned by the caller.
80 /// This constructor allows us to use data that is owned by the
81 /// caller. The data must stay around as long as this object is
84 : Data(Data), IsLittleEndian(IsLittleEndian), AddressSize(AddressSize) {}
89 IsLittleEndian(IsLittleEndian), AddressSize(AddressSize) {}
91 /// Get the data pointed to by this extractor.
93 /// Get the endianness for this extractor.
95 /// Get the address size for this extractor.
97 /// Set the address size for this extractor.
100 /// Extract a C string from \a *offset_ptr.
102 /// Returns a pointer to a C String from the data at the offset
103 /// pointed to by \a offset_ptr. A variable length NULL terminated C
104 /// string will be extracted and the \a offset_ptr will be
105 /// updated with the offset of the byte that follows the NULL
108 /// @param[in,out] OffsetPtr
109 /// A pointer to an offset within the data that will be advanced
110 /// by the appropriate number of bytes if the value is extracted
111 /// correctly. If the offset is out of bounds or there are not
112 /// enough bytes to extract this value, the offset will be left
115 /// @param[in,out] Err
116 /// A pointer to an Error object. Upon return the Error object is set to
117 /// indicate the result (success/failure) of the function. If the Error
118 /// object is already set when calling this function, no extraction is
122 /// A pointer to the C string value in the data. If the offset
123 /// pointed to by \a offset_ptr is out of bounds, or if the
124 /// offset plus the length of the C string is out of bounds,
125 /// NULL will be returned.
130 /// Extract a C string from the location given by the cursor. In case of an
131 /// extraction error, or if the cursor is already in an error state, a
132 /// nullptr is returned.
135 /// Extract a C string from \a *offset_ptr.
137 /// Returns a StringRef for the C String from the data at the offset
138 /// pointed to by \a offset_ptr. A variable length NULL terminated C
139 /// string will be extracted and the \a offset_ptr will be
140 /// updated with the offset of the byte that follows the NULL
143 /// \param[in,out] OffsetPtr
144 /// A pointer to an offset within the data that will be advanced
145 /// by the appropriate number of bytes if the value is extracted
146 /// correctly. If the offset is out of bounds or there are not
147 /// enough bytes to extract this value, the offset will be left
150 /// @param[in,out] Err
151 /// A pointer to an Error object. Upon return the Error object is set to
152 /// indicate the result (success/failure) of the function. If the Error
153 /// object is already set when calling this function, no extraction is
157 /// A StringRef for the C string value in the data. If the offset
158 /// pointed to by \a offset_ptr is out of bounds, or if the
159 /// offset plus the length of the C string is out of bounds,
160 /// a default-initialized StringRef will be returned.
162 Error *Err =
nullptr)
const;
164 /// Extract a C string (as a StringRef) from the location given by the cursor.
165 /// In case of an extraction error, or if the cursor is already in an error
166 /// state, a default-initialized StringRef is returned.
171 /// Extract a fixed length string from \a *OffsetPtr and consume \a Length
174 /// Returns a StringRef for the string from the data at the offset
175 /// pointed to by \a OffsetPtr. A fixed length C string will be extracted
176 /// and the \a OffsetPtr will be advanced by \a Length bytes.
178 /// \param[in,out] OffsetPtr
179 /// A pointer to an offset within the data that will be advanced
180 /// by the appropriate number of bytes if the value is extracted
181 /// correctly. If the offset is out of bounds or there are not
182 /// enough bytes to extract this value, the offset will be left
185 /// \param[in] Length
186 /// The length of the fixed length string to extract. If there are not
187 /// enough bytes in the data to extract the full string, the offset will
188 /// be left unmodified.
190 /// \param[in] TrimChars
191 /// A set of characters to trim from the end of the string. Fixed length
192 /// strings are commonly either NULL terminated by one or more zero
193 /// bytes. Some clients have one or more spaces at the end of the string,
194 /// but a good default is to trim the NULL characters.
197 /// A StringRef for the C string value in the data. If the offset
198 /// pointed to by \a OffsetPtr is out of bounds, or if the
199 /// offset plus the length of the C string is out of bounds,
200 /// a default-initialized StringRef will be returned.
205 /// Extract a fixed number of bytes from the specified offset.
207 /// Returns a StringRef for the bytes from the data at the offset
208 /// pointed to by \a OffsetPtr. A fixed length C string will be extracted
209 /// and the \a OffsetPtr will be advanced by \a Length bytes.
211 /// \param[in,out] OffsetPtr
212 /// A pointer to an offset within the data that will be advanced
213 /// by the appropriate number of bytes if the value is extracted
214 /// correctly. If the offset is out of bounds or there are not
215 /// enough bytes to extract this value, the offset will be left
218 /// \param[in] Length
219 /// The number of bytes to extract. If there are not enough bytes in the
220 /// data to extract all of the bytes, the offset will be left unmodified.
222 /// @param[in,out] Err
223 /// A pointer to an Error object. Upon return the Error object is set to
224 /// indicate the result (success/failure) of the function. If the Error
225 /// object is already set when calling this function, no extraction is
229 /// A StringRef for the extracted bytes. If the offset pointed to by
230 /// \a OffsetPtr is out of bounds, or if the offset plus the length
231 /// is out of bounds, a default-initialized StringRef will be returned.
233 Error *Err =
nullptr)
const;
235 /// Extract a fixed number of bytes from the location given by the cursor. In
236 /// case of an extraction error, or if the cursor is already in an error
237 /// state, a default-initialized StringRef is returned.
242 /// Extract an unsigned integer of size \a byte_size from \a
245 /// Extract a single unsigned integer value and update the offset
246 /// pointed to by \a offset_ptr. The size of the extracted integer
247 /// is specified by the \a byte_size argument. \a byte_size should
248 /// have a value greater than or equal to one and less than or equal
249 /// to eight since the return value is 64 bits wide. Any
250 /// \a byte_size values less than 1 or greater than 8 will result in
251 /// nothing being extracted, and zero being returned.
253 /// @param[in,out] offset_ptr
254 /// A pointer to an offset within the data that will be advanced
255 /// by the appropriate number of bytes if the value is extracted
256 /// correctly. If the offset is out of bounds or there are not
257 /// enough bytes to extract this value, the offset will be left
260 /// @param[in] byte_size
261 /// The size in byte of the integer to extract.
263 /// @param[in,out] Err
264 /// A pointer to an Error object. Upon return the Error object is set to
265 /// indicate the result (success/failure) of the function. If the Error
266 /// object is already set when calling this function, no extraction is
270 /// The unsigned integer value that was extracted, or zero on
273 Error *Err =
nullptr)
const;
275 /// Extract an unsigned integer of the given size from the location given by
276 /// the cursor. In case of an extraction error, or if the cursor is already in
277 /// an error state, zero is returned.
282 /// Extract an signed integer of size \a byte_size from \a *offset_ptr.
284 /// Extract a single signed integer value (sign extending if required)
285 /// and update the offset pointed to by \a offset_ptr. The size of
286 /// the extracted integer is specified by the \a byte_size argument.
287 /// \a byte_size should have a value greater than or equal to one
288 /// and less than or equal to eight since the return value is 64
289 /// bits wide. Any \a byte_size values less than 1 or greater than
290 /// 8 will result in nothing being extracted, and zero being returned.
292 /// @param[in,out] offset_ptr
293 /// A pointer to an offset within the data that will be advanced
294 /// by the appropriate number of bytes if the value is extracted
295 /// correctly. If the offset is out of bounds or there are not
296 /// enough bytes to extract this value, the offset will be left
300 /// The size in bytes of the integer to extract.
303 /// The sign extended signed integer value that was extracted,
304 /// or zero on failure.
307 //------------------------------------------------------------------
308 /// Extract an pointer from \a *offset_ptr.
310 /// Extract a single pointer from the data and update the offset
311 /// pointed to by \a offset_ptr. The size of the extracted pointer
312 /// is \a getAddressSize(), so the address size has to be
313 /// set correctly prior to extracting any pointer values.
315 /// @param[in,out] offset_ptr
316 /// A pointer to an offset within the data that will be advanced
317 /// by the appropriate number of bytes if the value is extracted
318 /// correctly. If the offset is out of bounds or there are not
319 /// enough bytes to extract this value, the offset will be left
323 /// The extracted pointer value as a 64 integer.
328 /// Extract a pointer-sized unsigned integer from the location given by the
329 /// cursor. In case of an extraction error, or if the cursor is already in
330 /// an error state, zero is returned.
333 /// Extract a uint8_t value from \a *offset_ptr.
335 /// Extract a single uint8_t from the binary data at the offset
336 /// pointed to by \a offset_ptr, and advance the offset on success.
338 /// @param[in,out] offset_ptr
339 /// A pointer to an offset within the data that will be advanced
340 /// by the appropriate number of bytes if the value is extracted
341 /// correctly. If the offset is out of bounds or there are not
342 /// enough bytes to extract this value, the offset will be left
345 /// @param[in,out] Err
346 /// A pointer to an Error object. Upon return the Error object is set to
347 /// indicate the result (success/failure) of the function. If the Error
348 /// object is already set when calling this function, no extraction is
352 /// The extracted uint8_t value.
355 /// Extract a single uint8_t value from the location given by the cursor. In
356 /// case of an extraction error, or if the cursor is already in an error
357 /// state, zero is returned.
360 /// Extract \a count uint8_t values from \a *offset_ptr.
362 /// Extract \a count uint8_t values from the binary data at the
363 /// offset pointed to by \a offset_ptr, and advance the offset on
364 /// success. The extracted values are copied into \a dst.
366 /// @param[in,out] offset_ptr
367 /// A pointer to an offset within the data that will be advanced
368 /// by the appropriate number of bytes if the value is extracted
369 /// correctly. If the offset is out of bounds or there are not
370 /// enough bytes to extract this value, the offset will be left
374 /// A buffer to copy \a count uint8_t values into. \a dst must
375 /// be large enough to hold all requested data.
378 /// The number of uint8_t values to extract.
381 /// \a dst if all values were properly extracted and copied,
386 /// Extract \a Count uint8_t values from the location given by the cursor and
387 /// store them into the destination buffer. In case of an extraction error, or
388 /// if the cursor is already in an error state, a nullptr is returned and the
389 /// destination buffer is left unchanged.
392 /// Extract \a Count uint8_t values from the location given by the cursor and
393 /// store them into the destination vector. The vector is resized to fit the
394 /// extracted data. In case of an extraction error, or if the cursor is
395 /// already in an error state, the destination vector is left unchanged and
396 /// cursor is placed into an error state.
401 // This relies on the fact that getU8 will not attempt to write to the
402 // buffer if isValidOffsetForDataOfSize(C.Offset, Count) is false.
406 /// Extract a int8_t value from \a *OffsetPtr. In case of an extraction error,
407 /// or if error is already set, zero is returned and the offset is left
410 return static_cast<int8_t
>(
getU8(OffsetPtr, Err));
413 /// Extract a int8_t value from \a *OffsetPtr. In case of an extraction error,
414 /// or if the cursor is already in an error state, zero is returned and the
415 /// offset is left unmodified.
418 //------------------------------------------------------------------
419 /// Extract a uint16_t value from \a *offset_ptr.
421 /// Extract a single uint16_t from the binary data at the offset
422 /// pointed to by \a offset_ptr, and update the offset on success.
424 /// @param[in,out] offset_ptr
425 /// A pointer to an offset within the data that will be advanced
426 /// by the appropriate number of bytes if the value is extracted
427 /// correctly. If the offset is out of bounds or there are not
428 /// enough bytes to extract this value, the offset will be left
431 /// @param[in,out] Err
432 /// A pointer to an Error object. Upon return the Error object is set to
433 /// indicate the result (success/failure) of the function. If the Error
434 /// object is already set when calling this function, no extraction is
438 /// The extracted uint16_t value.
439 //------------------------------------------------------------------
442 /// Extract a single uint16_t value from the location given by the cursor. In
443 /// case of an extraction error, or if the cursor is already in an error
444 /// state, zero is returned.
447 /// Extract \a count uint16_t values from \a *offset_ptr.
449 /// Extract \a count uint16_t values from the binary data at the
450 /// offset pointed to by \a offset_ptr, and advance the offset on
451 /// success. The extracted values are copied into \a dst.
453 /// @param[in,out] offset_ptr
454 /// A pointer to an offset within the data that will be advanced
455 /// by the appropriate number of bytes if the value is extracted
456 /// correctly. If the offset is out of bounds or there are not
457 /// enough bytes to extract this value, the offset will be left
461 /// A buffer to copy \a count uint16_t values into. \a dst must
462 /// be large enough to hold all requested data.
465 /// The number of uint16_t values to extract.
468 /// \a dst if all values were properly extracted and copied,
473 /// Extract a int16_t value from \a *OffsetPtr. In case of an extraction
474 /// error, or if error is already set, zero is returned and the offset is left
477 return static_cast<int16_t
>(
getU16(OffsetPtr, Err));
480 /// Extract a int16_t value from \a *OffsetPtr. In case of an extraction
481 /// error, or if the cursor is already in an error state, zero is returned and
482 /// the offset is left unmodified.
485 /// Extract a 24-bit unsigned value from \a *offset_ptr and return it
488 /// Extract 3 bytes from the binary data at the offset pointed to by
489 /// \a offset_ptr, construct a uint32_t from them and update the offset
492 /// @param[in,out] OffsetPtr
493 /// A pointer to an offset within the data that will be advanced
494 /// by the 3 bytes if the value is extracted correctly. If the offset
495 /// is out of bounds or there are not enough bytes to extract this value,
496 /// the offset will be left unmodified.
498 /// @param[in,out] Err
499 /// A pointer to an Error object. Upon return the Error object is set to
500 /// indicate the result (success/failure) of the function. If the Error
501 /// object is already set when calling this function, no extraction is
505 /// The extracted 24-bit value represented in a uint32_t.
508 /// Extract a single 24-bit unsigned value from the location given by the
509 /// cursor. In case of an extraction error, or if the cursor is already in an
510 /// error state, zero is returned.
513 /// Extract a uint32_t value from \a *offset_ptr.
515 /// Extract a single uint32_t from the binary data at the offset
516 /// pointed to by \a offset_ptr, and update the offset on success.
518 /// @param[in,out] offset_ptr
519 /// A pointer to an offset within the data that will be advanced
520 /// by the appropriate number of bytes if the value is extracted
521 /// correctly. If the offset is out of bounds or there are not
522 /// enough bytes to extract this value, the offset will be left
525 /// @param[in,out] Err
526 /// A pointer to an Error object. Upon return the Error object is set to
527 /// indicate the result (success/failure) of the function. If the Error
528 /// object is already set when calling this function, no extraction is
532 /// The extracted uint32_t value.
535 /// Extract a single uint32_t value from the location given by the cursor. In
536 /// case of an extraction error, or if the cursor is already in an error
537 /// state, zero is returned.
540 /// Extract \a count uint32_t values from \a *offset_ptr.
542 /// Extract \a count uint32_t values from the binary data at the
543 /// offset pointed to by \a offset_ptr, and advance the offset on
544 /// success. The extracted values are copied into \a dst.
546 /// @param[in,out] offset_ptr
547 /// A pointer to an offset within the data that will be advanced
548 /// by the appropriate number of bytes if the value is extracted
549 /// correctly. If the offset is out of bounds or there are not
550 /// enough bytes to extract this value, the offset will be left
554 /// A buffer to copy \a count uint32_t values into. \a dst must
555 /// be large enough to hold all requested data.
558 /// The number of uint32_t values to extract.
561 /// \a dst if all values were properly extracted and copied,
566 /// Extract a int32_t value from \a *OffsetPtr. In case of an extraction
567 /// error, or if error is already set, zero is returned and the offset is left
570 return static_cast<int32_t
>(
getU32(OffsetPtr, Err));
573 /// Extract a int32_t value from \a *OffsetPtr. In case of an extraction
574 /// error, or if the cursor is already in an error state, zero is returned and
575 /// the offset is left unmodified.
578 /// Extract a uint64_t value from \a *offset_ptr.
580 /// Extract a single uint64_t from the binary data at the offset
581 /// pointed to by \a offset_ptr, and update the offset on success.
583 /// @param[in,out] offset_ptr
584 /// A pointer to an offset within the data that will be advanced
585 /// by the appropriate number of bytes if the value is extracted
586 /// correctly. If the offset is out of bounds or there are not
587 /// enough bytes to extract this value, the offset will be left
590 /// @param[in,out] Err
591 /// A pointer to an Error object. Upon return the Error object is set to
592 /// indicate the result (success/failure) of the function. If the Error
593 /// object is already set when calling this function, no extraction is
597 /// The extracted uint64_t value.
600 /// Extract a single uint64_t value from the location given by the cursor. In
601 /// case of an extraction error, or if the cursor is already in an error
602 /// state, zero is returned.
605 /// Extract \a count uint64_t values from \a *offset_ptr.
607 /// Extract \a count uint64_t values from the binary data at the
608 /// offset pointed to by \a offset_ptr, and advance the offset on
609 /// success. The extracted values are copied into \a dst.
611 /// @param[in,out] offset_ptr
612 /// A pointer to an offset within the data that will be advanced
613 /// by the appropriate number of bytes if the value is extracted
614 /// correctly. If the offset is out of bounds or there are not
615 /// enough bytes to extract this value, the offset will be left
619 /// A buffer to copy \a count uint64_t values into. \a dst must
620 /// be large enough to hold all requested data.
623 /// The number of uint64_t values to extract.
626 /// \a dst if all values were properly extracted and copied,
631 /// Extract a int64_t value from \a *OffsetPtr. In case of an extraction
632 /// error, or if error is already set, zero is returned and the offset is left
635 return static_cast<int64_t
>(
getU64(OffsetPtr, Err));
638 /// Extract a int64_t value from \a *OffsetPtr. In case of an extraction
639 /// error, or if the cursor is already in an error state, zero is returned and
640 /// the offset is left unmodified.
643 /// Extract a signed LEB128 value from \a *offset_ptr.
645 /// Extracts an signed LEB128 number from this object's data
646 /// starting at the offset pointed to by \a offset_ptr. The offset
647 /// pointed to by \a offset_ptr will be updated with the offset of
648 /// the byte following the last extracted byte.
650 /// @param[in,out] OffsetPtr
651 /// A pointer to an offset within the data that will be advanced
652 /// by the appropriate number of bytes if the value is extracted
653 /// correctly. If the offset is out of bounds or there are not
654 /// enough bytes to extract this value, the offset will be left
657 /// @param[in,out] Err
658 /// A pointer to an Error object. Upon return the Error object is set to
659 /// indicate the result (success/failure) of the function. If the Error
660 /// object is already set when calling this function, no extraction is
664 /// The extracted signed integer value.
667 /// Extract an signed LEB128 value from the location given by the cursor.
668 /// In case of an extraction error, or if the cursor is already in an error
669 /// state, zero is returned.
672 /// Extract a unsigned LEB128 value from \a *offset_ptr.
674 /// Extracts an unsigned LEB128 number from this object's data
675 /// starting at the offset pointed to by \a offset_ptr. The offset
676 /// pointed to by \a offset_ptr will be updated with the offset of
677 /// the byte following the last extracted byte.
679 /// @param[in,out] offset_ptr
680 /// A pointer to an offset within the data that will be advanced
681 /// by the appropriate number of bytes if the value is extracted
682 /// correctly. If the offset is out of bounds or there are not
683 /// enough bytes to extract this value, the offset will be left
686 /// @param[in,out] Err
687 /// A pointer to an Error object. Upon return the Error object is set to
688 /// indicate the result (success/failure) of the function. If the Error
689 /// object is already set when calling this function, no extraction is
693 /// The extracted unsigned integer value.
697 /// Extract an unsigned LEB128 value from the location given by the cursor.
698 /// In case of an extraction error, or if the cursor is already in an error
699 /// state, zero is returned.
702 /// Advance the Cursor position by the given number of bytes. No-op if the
703 /// cursor is in an error state.
706 /// Return true iff the cursor is at the end of the buffer, regardless of the
707 /// error state of the cursor. The only way both eof and error states can be
708 /// true is if one attempts a read while the cursor is at the very end of the
712 /// Test the validity of \a offset.
715 /// \b true if \a offset is a valid offset into the data in this
716 /// object, \b false otherwise.
719 /// Test the availability of \a length bytes of data from \a offset.
722 /// \b true if \a offset is a valid offset and there are \a
723 /// length bytes available at that offset, \b false otherwise.
725 return offset + length >= offset &&
isValidOffset(offset + length - 1);
728 /// Test the availability of enough bytes of data for a pointer from
729 /// \a offset. The size of a pointer is \a getAddressSize().
732 /// \b true if \a offset is a valid offset and there are enough
733 /// bytes for a pointer available at that offset, \b false
739 /// Return the number of bytes in the underlying buffer.
740 size_t size()
const {
return Data.size(); }
743 // Make it possible for subclasses to access these fields without making them
749 /// If it is possible to read \a Size bytes at offset \a Offset, returns \b
750 /// true. Otherwise, returns \b false. If \a E is not nullptr, also sets the
751 /// error object to indicate an error.
754 template <
typename T>
T getU(
uint64_t *OffsetPtr,
Error *Err)
const;
755 template <
typename T>
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Lightweight error class with error context and mandatory checking.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
StringRef - Represent a constant reference to a string, i.e.
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
@ C
The default llvm calling convention, compatible with C.
This is an optimization pass for GlobalISel generic memory operations.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
FunctionAddr VTableAddr Count
uint24_t getSwappedBytes(uint24_t C)
Needed by swapByteOrder().
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
LogicalResult success(bool IsSuccess=true)
Utility function to generate a LogicalResult.
An auxiliary type to facilitate extraction of 3-byte entities.
uint32_t getAsUint32(bool IsLittleEndian) const
Uint24(uint8_t U0, uint8_t U1, uint8_t U2)