Main Page Namespace List Class Hierarchy Alphabetical List Compound List File List Namespace Members Compound Members File Members Related Pages

JRegex.C

Go to the documentation of this file.
00001 /***************************************************************************
00002 *cr
00003 *cr (C) Copyright 1995-2019 The Board of Trustees of the
00004 *cr University of Illinois
00005 *cr All Rights Reserved
00006 *cr
00007 ***************************************************************************/
00008 
00009 /***************************************************************************
00010 * RCS INFORMATION:
00011 *
00012 * $RCSfile: JRegex.C,v $
00013 * $Author: johns $ $Locker: $ $State: Exp $
00014 * $Revision: 1.15 $ $Date: 2024年03月01日 02:01:37 $
00015 *
00016 ***************************************************************************
00017 * DESCRIPTION:
00018 * Interface for performing regular expression pattern matching, 
00019 * encapsulating the PCRE regular expression package.
00020 ***************************************************************************/
00021 
00022 //
00023 // Online regex info and testing: 
00024 // https://regex101.com/
00025 //
00026 // PCRE regex library:
00027 // https://www.pcre.org/
00028 //
00029 
00030 #include "JRegex.h"
00031 #include "Inform.h" 
00032 
00033 #if defined(VMDUSEPCRE2)
00034 #define PCRE2_CODE_UNIT_WIDTH 8
00035 #include "pcre2.h"
00036 #else
00037 #include "pcre.h"
00038 #endif
00039 
00040 JRegex::JRegex(const char *pattern, int) {
00041 if (pattern == NULL) {
00042 msgErr << "NULL pattern passed to JRegex!" << sendmsg;
00043 }
00044 else {
00045 #if defined(VMDUSEPCRE2)
00046 //
00047 // PCRE2 API
00048 // https://www.pcre.org/current/doc/html/pcre2api.html
00049 // https://www.pcre.org/current/doc/html/pcre2demo.html
00050 //
00051 int errornumber=0;
00052 size_t erroroffset=0;
00053 md = NULL;
00054 JIT=0;
00055 rpat = pcre2_compile((PCRE2_SPTR) pattern, // the regex pattern string
00056 PCRE2_ZERO_TERMINATED, // C style string
00057 0, // default options
00058 &errornumber, // error number
00059 &erroroffset, // offset to error location
00060 NULL); // use default compile context
00061 
00062 if (rpat == NULL) {
00063 PCRE2_UCHAR errbuf[256];
00064 pcre2_get_error_message(errornumber, errbuf, sizeof(errbuf));
00065 msgWarn << "JRegex: Error in pcre2_compile(), " << sendmsg;
00066 msgWarn << "Error in regex pattern begins with " << pattern+erroroffset
00067 << sendmsg;
00068 } else {
00069 #if 1
00070 //
00071 // enable JIT compilation of the regex, for large selection traversals
00072 // https://www.pcre.org/current/doc/html/pcre2jit.html
00073 //
00074 int rc=0;
00075 rc=pcre2_jit_compile((pcre2_code *) rpat, PCRE2_JIT_COMPLETE); 
00076 if (rc && rc != PCRE2_ERROR_JIT_BADOPTION) 
00077 msgWarn << "JRegex: pcre2_jit_compile() returned an error." << sendmsg;
00078 
00079 // check that JIT succeeded so we can use the fast-path if possible
00080 if (!rc) {
00081 size_t len=0;
00082 pcre2_pattern_info((pcre2_code *) rpat, PCRE2_INFO_JITSIZE, &len);
00083 // msgInfo << "PCRE2 JIT size: " << len << sendmsg;
00084 if (len != 0)
00085 JIT=1;
00086 }
00087 #endif
00088 
00089 // md = pcre2_match_data_create(1, NULL);
00090 md = pcre2_match_data_create_from_pattern((pcre2_code *) rpat, NULL);
00091 }
00092 #else
00093 const char *errptr;
00094 int erroffset;
00095 rpat = vmdpcre_compile(pattern, // the regex pattern string
00096 0, // options
00097 &errptr, // points to error message, if any
00098 &erroffset, // offset to error location
00099 NULL); // Table pointer; NULL for use default
00100 if (rpat == NULL) {
00101 msgWarn << "JRegex: Error in pcre_compile(), " << errptr << sendmsg;
00102 msgWarn << "Error in regex pattern begins with " << pattern+erroffset
00103 << sendmsg;
00104 }
00105 #endif
00106 }
00107 }
00108 
00109 JRegex::~JRegex() {
00110 #if defined(VMDUSEPCRE2)
00111 if (md)
00112 pcre2_match_data_free((pcre2_match_data *) md); 
00113 if (rpat)
00114 pcre2_code_free((pcre2_code *) rpat);
00115 #else
00116 vmdpcre_free(rpat);
00117 #endif
00118 }
00119 
00120 int JRegex::match(const char *str, int len) const {
00121 #if defined(VMDUSEPCRE2)
00122 if (rpat==NULL || md == NULL) {
00123 // msgWarn << "JRegex::match: bad regex pattern, no match" << sendmsg;
00124 return -1;
00125 } 
00126 
00127 int rc=0;
00128 
00129 // if the regular expression has successfully JITted, we call the 
00130 // JIT fast path to avoid overheads
00131 if (JIT) {
00132 rc=pcre2_jit_match((pcre2_code *) rpat, // compiled regex pattern
00133 (PCRE2_SPTR) str, // subject of the search
00134 len, // strlen of str
00135 0, // match starting offset
00136 0, // options
00137 (pcre2_match_data *) md, // match data block
00138 NULL); // match ctx, NULL for defaults
00139 return rc;
00140 }
00141 
00142 rc=pcre2_match((pcre2_code *) rpat, // compiled regex pattern
00143 (PCRE2_SPTR) str, // subject of the search
00144 len, // strlen of str
00145 0, // match starting offset
00146 0, // options
00147 (pcre2_match_data *) md, // match data block
00148 NULL); // match ctx, NULL for defaults
00149 return rc;
00150 #else
00151 if (rpat==NULL) {
00152 // msgWarn << "JRegex::match: bad regex pattern, no match" << sendmsg;
00153 return -1;
00154 } 
00155 int retval;
00156 retval=vmdpcre_exec(rpat, // compiled regex pattern
00157 NULL, // No extra study wisdom
00158 str, // subject of the search
00159 len, // strlen of str
00160 0, // offset at which to start finding substrings
00161 0, // options
00162 NULL, // return vector for location of substrings
00163 0); // size of return vector
00164 return retval;
00165 #endif
00166 }
00167 
00168 int JRegex::search(const char *str, int len, int &length, int start) {
00169 #if defined(VMDUSEPCRE2)
00170 return -1; // not implemented 
00171 #else
00172 if (rpat==NULL) {
00173 // msgWarn << "JRegex::search: bad regex pattern, no match" << sendmsg;
00174 return -1;
00175 } 
00176 int ovec[6], retval;
00177 retval=vmdpcre_exec(rpat, // my regex pattern
00178 NULL, // No extra study wisdom
00179 str, // subject of the search
00180 len, // strlen of str
00181 start,// offset at which to start finding substrings
00182 0, // options
00183 ovec, // return vector for location of substrings
00184 6); // size of return vector
00185 if (retval < 0) 
00186 return retval;
00187 length = ovec[1]-ovec[0]; 
00188 return ovec[0]; 
00189 #endif
00190 }
00191 

Generated on Tue Nov 18 02:47:15 2025 for VMD (current) by doxygen1.2.14 written by Dimitri van Heesch, © 1997-2002

AltStyle によって変換されたページ (->オリジナル) /