Main Page | Namespace List | Class Hierarchy | Alphabetical List | Data Structures | Directories | File List | Namespace Members | Data Fields | Globals
persist » xml

parse.cpp

Go to the documentation of this file.
00001 // See ../../license.txt for license information.
00002 //
00003 // parse.cpp
00004 //
00005 // NOTES
00006 // XML Parser for the persistence framework.
00007 //
00008 // 30-Jun-2003 phamilton Created
00009 //
00010 
00011 #define PERSIST_IN_LIBRARY_SOURCE
00012 
00013 #include "parse.hpp"
00014 #include <iostream>
00015 #include <fstream>
00016 #include "expat-1.95.5/lib/expat.h"
00017 #include "boost/lexical_cast.hpp"
00018 #include "boost/format.hpp"
00019 
00020 using namespace ph::persist::xml;
00021 
00022 // format strings.
00023 const char XMLFmt_error[] = "file: %s %s at line %d";
00024 const char XMLFmt_expected[] = "expected %s=\"string\"";
00025 
00026 // common possible error messages.
00027 const char XMLErr_mismatched_end_tag[] = "mismatched end element tag.";
00028 
00029 #define PROGRESS_UNIT 512
00030 #define BUFFER_SIZE 1024
00031 
00032 bool parse::parse_xml(std::istream *stream, const std::string &streampath, parse *parser, parse_progress *progress)
00033 {
00034 
00035 if (progress)
00036 {
00037 stream->seekg(0, std::ios_base::end);
00038 long len = stream->tellg();
00039 long count = len / PROGRESS_UNIT;
00040 progress->total(count > 0 ? count : 1);
00041 }
00042 
00043 if (progress)
00044 progress->progress(0);
00045 
00046 parser->startparse(streampath);
00047 int done = 0;
00048 long total = 0;
00049 bool parseresult = true;
00050 while (!done)
00051 {
00052 char buf[BUFFER_SIZE];
00053 stream->read(buf, sizeof(buf));
00054 long len = stream->gcount();
00055 done = len < (long)sizeof(buf);
00056 int error = parser->doparse(buf, len, done);
00057 if (error != PARSE_SUCCESS)
00058 {
00059 parseresult = false;
00060 done = 1;
00061 }
00062 total += len;
00063 if (progress)
00064 {
00065 long p = total / PROGRESS_UNIT;
00066 progress->progress(p);
00067 }
00068 }
00069 parser->endparse();
00070 
00071 return parseresult;
00072 }
00073 
00074 void parse::startparse(const std::string &streamname)
00075 /*
00076  Called at the start of a parse. Set's to the expat
00077  data structures.
00078 */
00079 {
00080 assert(_parser == NULL);
00081 _parser = XML_ParserCreate(NULL);
00082 _filename = streamname; // for error messages.
00083 _error = PARSE_SUCCESS;
00084 
00085 XML_SetUserData(_parser, this);
00086 XML_SetElementHandler(_parser, sstartelement_handler, sendelement_handler);
00087 XML_SetCharacterDataHandler(_parser, scdata_handler);
00088 XML_SetCommentHandler(_parser, scomment_handler);
00089 XML_SetDefaultHandler(_parser, sdefault_handler);
00090 }
00091 
00092 int parse::doparse(char *buf, long len, int done)
00093 /*
00094  Called in the middle of a parse. Feed more XML into the
00095  parser.
00096 */
00097 {
00098 try
00099 {
00100 if (XML_Parse(_parser, buf, len, done))
00101 return PARSE_SUCCESS;
00102 }
00103 catch (...)
00104 {
00105 // the only thrown exception is to end the parse.
00106 _error = PARSE_BADXMLTYPE;
00107 return _error;
00108 }
00109 
00110 // for some reason, the string returned is not actually UNICODE! So we convert it to unicode here.
00111 /* TBD
00112  CWStr m;
00113  m.Convert(string);
00114 
00115  XMLError(m.c_str());
00116 */
00117 return _error;
00118 }
00119 
00120 void parse::endparse()
00121 /*
00122  Called at the end of a parse, or to abort a parse.
00123 */
00124 {
00125 finish_handler();
00126 XML_ParserFree(_parser);
00127 _parser = NULL;
00128 }
00129 
00130 int parse::parsestream(std::istream *stream, const std::string &streamname)
00131 /*
00132  Wrapper function to parse a file of XML.
00133 */
00134 {
00135 startparse(streamname);
00136 
00137 int done = 0;
00138 while (!done)
00139 {
00140 char buf[BUFFER_SIZE];
00141 stream->read(buf, sizeof(buf));
00142 long len = stream->gcount();
00143 done = len < (long)sizeof(buf);
00144 int error = doparse(buf, len, done);
00145 if (error != PARSE_SUCCESS)
00146 return _error;
00147 }
00148 endparse();
00149 
00150 return _error;
00151 }
00152 
00153 int parse::doparsefile(const std::string &filename)
00154 /*
00155  Parse a file given a filename of a file on disk.
00156 */
00157 {
00158 int result = PARSE_NOFILE;
00159 std::ifstream f(filename.c_str());
00160 if (f.is_open())
00161 {
00162 result = parsestream(&f, filename);
00163 f.close();
00164 }
00165 return result;
00166 }
00167 
00168 void parse::sstartelement_handler(void *userData, const XML_Char *name, const XML_Char **atts)
00169 {
00170 parse *me = reinterpret_cast<parse *>(userData);
00171 
00172 // push this element.
00173 me->_elementstack.push_back(name);
00174 
00175 xmlstring n(name);
00176 std::vector<xmlstring> a;
00177 if (atts)
00178 for (int i=0; atts[i]; i++)
00179 a.push_back(atts[i]);
00180 me->startelement_handler(n, a);
00181 }
00182 
00183 void parse::sendelement_handler(void *userData, const XML_Char *name)
00184 {
00185 parse *me = reinterpret_cast<parse *>(userData);
00186 
00187 me->endelement_handler(name);
00188 
00189 // pop the element.
00190 if (me->_elementstack.back() == name)
00191 me->_elementstack.pop_back();
00192 else
00193 me->error(XMLErr_mismatched_end_tag);
00194 }
00195 
00196 void parse::scdata_handler(void *userData, const XML_Char *s, int len)
00197 {
00198 parse *me = reinterpret_cast<parse *>(userData);
00199 
00200 // use the length in conversion.
00201 xmlstring ws(s, len);
00202 
00203 me->cdata_handler(ws, len);
00204 }
00205 
00206 void parse::scomment_handler(void *userData, const XML_Char *data)
00207 {
00208 parse *me = reinterpret_cast<parse *>(userData);
00209 
00210 // use the length in conversion.
00211 xmlstring ws(data);
00212 
00213 me->comment_handler(data);
00214 }
00215 
00216 void parse::sdefault_handler(void *userData, const XML_Char *s, int len)
00217 {
00218 parse *me = reinterpret_cast<parse *>(userData);
00219 
00220 // use the length in conversion.
00221 xmlstring ws(s, len);
00222 
00223 me->default_handler(ws, len);
00224 }
00225 
00226 xmlstring parse::attr(const std::vector<xmlstring> &attrs, int index)
00227 {
00228 if ((int)attrs.size() > (index * 2))
00229 return attrs[index * 2];
00230 return S("");
00231 }
00232 
00233 xmlstring parse::attrval(const std::vector<xmlstring> &attrs, int index)
00234 {
00235 if ((int)attrs.size() > ((index * 2) + 1))
00236 return attrs[(index * 2) + 1];
00237 return S("");
00238 }
00239 
00240 xmlstring parse::attr(const std::vector<xmlstring> &attrs, const xmlstring &token)
00241 {
00242 // these are processed in pairs. So a simple for() is best here.
00243 for (int i=0; i < (int)attrs.size(); i++)
00244 {
00245 if (token == attrs[i])
00246 return attrs[i+1];
00247 i++;
00248 }
00249 
00250 return S("");
00251 }
00252 
00253 xmlstring parse::expectedattr(const std::vector<xmlstring> &attrs, const xmlstring &token)
00254 {
00255 xmlstring a = attr(attrs, token);
00256 if (!a.empty())
00257 return a;
00258 
00259 expected_error(token);
00260 return S("");
00261 }
00262 
00263 void parse::expected_error(const xmlstring &token)
00264 {
00265 error(boost::io::str(boost::format(XMLFmt_expected) % boost::lexical_cast<std::string>(token)));
00266 }
00267 
00268 void parse::error(const std::string &s, bool detail)
00269 /*
00270  Default error does a message box.
00271 */
00272 {
00273 if (!_silent)
00274 {
00275 if (_errorhandler)
00276 {
00277 if (detail)
00278 {
00279 // on debian, the direct version of this that uses a stream doesn't seem to work, so just cast to a string
00280 // for now.
00281 *_errorhandler << boost::io::str(boost::format(XMLFmt_error) % _filename % s % XML_GetCurrentLineNumber(_parser)) << std::endl;
00282 }
00283 else
00284 *_errorhandler << s << std::endl;
00285 }
00286 }
00287 
00288 _error = PARSE_XMLERROR;
00289 }
00290 
00291 void parse::error(const std::string &format, const std::string &s1, bool detail)
00292 {
00293 error(boost::io::str(boost::format(format) % s1), detail);
00294 }
00295 
00296 void parse::error(const std::string &format, const std::string &s1, const std::string &s2, bool detail)
00297 {
00298 error(boost::io::str(boost::format(format) % s1 % s2), detail);
00299 }
00300 
00301 // we escape all data with 2 sets of this char...
00302 const char kEscapeChar = '\\';
00303 
00304 // and here are the things that we escape.
00305 static struct { char c; const char *s; } gXMLEncodingTable[] = 
00306 {
00307 { '<', S("lt") },
00308 { '>', S("gt") },
00309 { '&', S("amp") },
00310 { 0, 0 }
00311 };
00312 
00313 // some helper strings. The short header is used as a way of telling whether a particular
00314 // string contains XML or not.
00315 const xmlstring kXMLShortHeader = S("<?xml version=\"1.0\"");
00316 const xmlstring kXMLLongHeader = S("<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>");
00317 
00318 bool parse::encodexmldata(const xmlstring &s, xmlstring *news)
00319 {
00320 *news = S("");
00321 for (xmlstring::const_iterator i = s.begin(); i != s.end(); i++)
00322 {
00323 if (*i == kEscapeChar)
00324 {
00325 *news += kEscapeChar;
00326 *news += kEscapeChar;
00327 
00328 }
00329 else
00330 {
00331 int j=0;
00332 while (gXMLEncodingTable[j].c && gXMLEncodingTable[j].c != *i)
00333 j++;
00334 if (gXMLEncodingTable[j].c)
00335 {
00336 *news += kEscapeChar;
00337 *news += gXMLEncodingTable[j].s;
00338 *news += kEscapeChar;
00339 }
00340 else
00341 *news += *i;
00342 }
00343 }
00344 
00345 return s.length() != news->length();
00346 }
00347 
00348 bool parse::decodexmldata(const xmlstring &s, xmlstring *news)
00349 {
00350 // if the string is actually a chunk of XML, then we don't decode (we are already
00351 // decoded).
00352 if (s.substr(0, kXMLShortHeader.length()) == kXMLShortHeader)
00353 return false;
00354 
00355 bool escape = false;
00356 xmlstring escdata;
00357 *news = S("");
00358 for (xmlstring::const_iterator i = s.begin(); i != s.end(); i++)
00359 {
00360 if (escape)
00361 {
00362 if (*i == kEscapeChar)
00363 {
00364 if (escdata == S(""))
00365 *news += kEscapeChar;
00366 else
00367 {
00368 // finished escaping.
00369 int j=0;
00370 while (gXMLEncodingTable[j].c && gXMLEncodingTable[j].s != escdata)
00371 j++;
00372 if (gXMLEncodingTable[j].c)
00373 *news += gXMLEncodingTable[j].c;
00374 else
00375 *news += escdata;
00376 }
00377 escape = false;
00378 }
00379 else
00380 escdata += *i;
00381 }
00382 else if (*i == kEscapeChar)
00383 {
00384 escdata = S("");
00385 escape = true;
00386 }
00387 else
00388 *news += *i;
00389 }
00390 
00391 if (escape)
00392 *news += escdata;
00393 
00394 return s.length() != news->length();
00395 }
00396 

Generated on Wed Apr 5 22:03:25 2006 for cppxmlobj by doxygen 1.4.3

AltStyle によって変換されたページ (->オリジナル) /