OW_XMLParserCore.cpp

Go to the documentation of this file.
00001 /******************************************************************************
00002 * Copyright (C) 2001-2004 Vintela, Inc. All rights reserved.
00003 *
00004 * Redistribution and use in source and binary forms, with or without
00005 * modification, are permitted provided that the following conditions are met:
00006 *
00007 *  - Redistributions of source code must retain the above copyright notice,
00008 *    this list of conditions and the following disclaimer.
00009 *
00010 *  - Redistributions in binary form must reproduce the above copyright notice,
00011 *    this list of conditions and the following disclaimer in the documentation
00012 *    and/or other materials provided with the distribution.
00013 *
00014 *  - Neither the name of Vintela, Inc. nor the names of its
00015 *    contributors may be used to endorse or promote products derived from this
00016 *    software without specific prior written permission.
00017 *
00018 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS''
00019 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00020 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00021 * ARE DISCLAIMED. IN NO EVENT SHALL Vintela, Inc. OR THE CONTRIBUTORS
00022 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
00023 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
00024 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
00025 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
00026 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
00027 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
00028 * POSSIBILITY OF SUCH DAMAGE.
00029 *******************************************************************************/
00030 
00035 #include "OW_config.h"
00036 #include "OW_XMLParserCore.hpp"
00037 #include "OW_Format.hpp"
00038 #include "OW_XMLParseException.hpp"
00039 
00040 #include <cctype>
00041 
00042 namespace OW_NAMESPACE
00043 {
00044 
00045 // Note that we don't use the OW_THROW macro in this file to throw
00046 // an XMLParseException.  This is because it needs extra information not
00047 // available with OW_THROW.
00048 
00050 //
00051 // XMLParserCore
00052 //
00054 bool XMLParserCore::next(XMLToken& entry)
00055 {
00056    IstreamBufIterator iterEOF;
00057    if (m_current == iterEOF || *m_current == 0)
00058    {
00059       if (!m_stack.empty())
00060       {
00061          OW_THROWXMLLINE(XMLParseException::UNCLOSED_TAGS, m_line);
00062       }
00063       return false;
00064    }
00065    // if the last tag was a <.../> then set the next token to END_TAG so that
00066    // the caller doesn't need to worry about <.../>, it will look like
00067    // <...></...>
00068    if (m_tagIsEmpty)
00069    {
00070       m_tagIsEmpty = false;
00071       entry.type = XMLToken::END_TAG;
00072       entry.attributeCount = 0;
00073       return true;
00074    }
00075    // Either a "<...>" or content begins next:
00076    if (*m_current == '<')
00077    {
00078       // Skip over any whitespace:
00079       skipWhitespace();
00080       m_current++;
00081       getElement(entry);
00082       if (entry.type == XMLToken::START_TAG)
00083       {
00084          if (m_stack.empty() && m_foundRoot)
00085          {
00086             OW_THROWXMLLINE(XMLParseException::MULTIPLE_ROOTS, m_line);
00087          }
00088          m_foundRoot = true;
00089          if (!m_tagIsEmpty)
00090          {
00091             m_stack.push(entry.text.toString());
00092          }
00093       }
00094       else if (entry.type == XMLToken::END_TAG)
00095       {
00096          if (m_stack.empty())
00097          {
00098             OW_THROWXMLLINE(XMLParseException::START_END_MISMATCH, m_line);
00099          }
00100          if (m_stack.top() != entry.text.toString())
00101          {
00102             OW_THROWXMLLINE(XMLParseException::START_END_MISMATCH, m_line);
00103          }
00104          m_stack.pop();
00105       }
00106       return true;
00107    }
00108    else
00109    {
00110       entry.type = XMLToken::CONTENT;
00111       //bool isSpaces;
00112       //getContent(entry, isSpaces);
00113       getContent(entry);
00114       //if (isSpaces)
00115       //{
00116          // content is entirely white space, so just skip it.
00117       // return next(entry);
00118       //}
00119       return true;
00120    }
00121 }
00122 /*
00123 void XMLParserCore::putBack(XMLToken& entry)
00124 {
00125    _putBackStack.push(entry);
00126 }
00127 */
00128 void XMLParserCore::skipWhitespace()
00129 {
00130    while (isspace(*m_current))
00131    {
00132       if (*m_current == '\n')
00133       {
00134          ++m_line;
00135       }
00136       ++m_current;
00137    }
00138 }
00139 
00140 inline bool isNameChar(char c)
00141 {
00142    return isalnum(c) || c == '_' || c == '-' ||
00143           c == ':' || c == '.';
00144 }
00145 
00146 bool XMLParserCore::getElementName(XMLToken& entry)
00147 {
00148    if (!isalpha(*m_current) && *m_current != '_')
00149    {
00150       OW_THROWXMLLINE(XMLParseException::BAD_START_TAG, m_line);
00151    }
00152    entry.text.reset();
00153    while (isNameChar(*m_current))
00154    {
00155       entry.text += *m_current++;
00156    }
00157    // The next character might be a space:
00158    skipWhitespace();
00159    
00160    if (*m_current == '>')
00161    {
00162       ++m_current;
00163       return true;
00164    }
00165    return false;
00166 }
00167 
00168 bool XMLParserCore::getOpenElementName(XMLToken& entry, bool& openCloseElement)
00169 {
00170    openCloseElement = false;
00171    if (getElementName(entry))
00172    {
00173       return true;
00174    }
00175    if (*m_current == '/')
00176    {
00177       ++m_current;
00178       if (*m_current == '>')
00179       {
00180          openCloseElement = true;
00181          ++m_current;
00182          return true;
00183       }
00184    }
00185    return false;
00186 }
00187 void XMLParserCore::getAttributeNameAndEqual(XMLToken::Attribute& att)
00188 {
00189    if (!isalpha(*m_current) && *m_current != '_')
00190    {
00191       OW_THROWXMLLINEMSG(XMLParseException::BAD_ATTRIBUTE_NAME,
00192          m_line, Format("Expected alpha or _; got %1", *m_current).c_str());
00193    }
00194    att.name.reset();
00195    while (isalnum(*m_current) || *m_current == '_' || *m_current == '-' ||
00196           *m_current == ':' || *m_current == '.')
00197    {
00198       att.name += *m_current++;
00199    }
00200    skipWhitespace();
00201    if (*m_current != '=')
00202    {
00203       OW_THROWXMLLINEMSG(XMLParseException::BAD_ATTRIBUTE_NAME,
00204          m_line, Format("Expected =; got %1", *m_current).c_str());
00205    }
00206    m_current++;
00207    skipWhitespace();
00208 }
00209 void XMLParserCore::getAttributeValue(XMLToken::Attribute& att)
00210 {
00211    // ATTN-B: handle values contained in semiquotes:
00212    if (*m_current != '"' && *m_current != '\'')
00213    {
00214       OW_THROWXMLLINEMSG(XMLParseException::BAD_ATTRIBUTE_VALUE,
00215          m_line, Format("Expecting \" or '; got %1", *m_current).c_str());
00216    }
00217    char startChar = *m_current++;
00218    att.value.reset();
00219    while (*m_current && *m_current != startChar)
00220    {
00221       att.value += *m_current++;
00222    }
00223       
00224    if (*m_current != startChar)
00225    {
00226       OW_THROWXMLLINEMSG(XMLParseException::BAD_ATTRIBUTE_VALUE,
00227          m_line, Format("Expecting %1; Got %2", startChar, static_cast<int>(*m_current)).c_str());
00228    }
00229    ++m_current;
00230 }
00231 void XMLParserCore::getComment()
00232 {
00233    // Now p points to first non-whitespace character beyond "<--" sequence:
00234    for (; *m_current; m_current++)
00235    {
00236       if (*m_current == '-')
00237       {
00238          ++m_current;
00239          if (*m_current == '-')
00240          {
00241             ++m_current;
00242             if (*m_current == '>')
00243             {
00244                ++m_current;
00245                return;
00246             }
00247             else
00248             {
00249                OW_THROWXMLLINE(
00250                      XMLParseException::MINUS_MINUS_IN_COMMENT, m_line);
00251             }
00252          }
00253       }
00254    }
00255    // If it got this far, then the comment is unterminated:
00256    OW_THROWXMLLINE(XMLParseException::UNTERMINATED_COMMENT, m_line);
00257 }
00258 void XMLParserCore::getCData(XMLToken& entry)
00259 {
00260    // At this point m_current points one past "<![CDATA[" sequence:
00261    entry.text.reset();
00262    for (; *m_current; m_current++)
00263    {
00264       if (*m_current == ']')
00265       {
00266          ++m_current;
00267          if (*m_current == ']')
00268          {
00269             ++m_current;
00270             if (*m_current == '>')
00271             {
00272                ++m_current;
00273                return;
00274             }
00275             else
00276             {
00277                entry.text += ']';
00278                entry.text += ']';
00279             }
00280          }
00281          else
00282          {
00283             entry.text += ']';
00284          }
00285       }
00286       if (*m_current == '\n')
00287       {
00288          ++m_line;
00289       }
00290       entry.text += *m_current;
00291    }
00292    // If it got this far, then the cdata is unterminated:
00293    OW_THROWXMLLINE(XMLParseException::UNTERMINATED_CDATA, m_line);
00294 }
00295 void XMLParserCore::getDocType()
00296 {
00297    // Just ignore the DOCTYPE command for now:
00298    for (; *m_current && *m_current != '>'; ++m_current)
00299    {
00300       if (*m_current == '\n')
00301       {
00302          ++m_line;
00303       }
00304    }
00305    if (*m_current != '>')
00306    {
00307       OW_THROWXMLLINE(XMLParseException::UNTERMINATED_DOCTYPE, m_line);
00308    }
00309    m_current++;
00310 }
00311 //void XMLParserCore::getContent(XMLToken& entry, bool& isWhiteSpace)
00312 void XMLParserCore::getContent(XMLToken& entry)
00313 {
00314    entry.text.reset();
00315    //isWhiteSpace = true;
00316    while (*m_current && *m_current != '<')
00317    {
00318       if (*m_current == '\n')
00319       {
00320          ++m_line;
00321       }
00322       //isWhiteSpace &= isspace(*m_current);
00323       //if (isWhiteSpace)
00324       //{
00325       // isWhiteSpace = isspace(*m_current);
00326       //}
00327       entry.text += *m_current++;
00328    }
00329 }
00330 void XMLParserCore::getElement(XMLToken& entry)
00331 {
00332    entry.attributeCount = 0;
00333    entry.text.reset();
00334    //--------------------------------------------------------------------------
00335    // Get the element name (expect one of these: '?', '!', [A-Za-z_])
00336    //--------------------------------------------------------------------------
00337    if (*m_current == '?')
00338    {
00339       entry.type = XMLToken::XML_DECLARATION;
00340       ++m_current;
00341       if (getElementName(entry))
00342       {
00343          return;
00344       }
00345    }
00346    else if (*m_current == '!')
00347    {
00348       m_current++;
00349       // Expect a comment or CDATA:
00350       if (*m_current == '-')
00351       {
00352          ++m_current;
00353          if (*m_current == '-')
00354          {
00355             ++m_current;
00356             entry.type = XMLToken::COMMENT;
00357             getComment();
00358             return;
00359          }
00360       }
00361       else if (*m_current == '[')
00362       {
00363          char string[] = "CDATA[";
00364          char *curChar = string;
00365          m_current++;
00366          while (*curChar)
00367          {
00368             if (*curChar++ != *m_current++)
00369             {
00370                OW_THROWXMLLINE(XMLParseException::EXPECTED_COMMENT_OR_CDATA, m_line);
00371             }
00372          }
00373          entry.type = XMLToken::CDATA;
00374          getCData(entry);
00375          return;
00376       }
00377       else if (*m_current == 'D')
00378       {
00379          char string[] = "OCTYPE";
00380          char *curChar = string;
00381          m_current++;
00382          while (*curChar)
00383          {
00384             if (*curChar++ != *m_current++)
00385             {
00386                OW_THROWXMLLINE(XMLParseException::EXPECTED_COMMENT_OR_CDATA, m_line);
00387             }
00388          }
00389          entry.type = XMLToken::DOCTYPE;
00390          getDocType();
00391          return;
00392       }
00393       OW_THROWXMLLINE(XMLParseException::EXPECTED_COMMENT_OR_CDATA, m_line);
00394    }
00395    else if (*m_current == '/')
00396    {
00397       entry.type = XMLToken::END_TAG;
00398       ++m_current;
00399       if (!getElementName(entry))
00400       {
00401          OW_THROWXMLLINE(XMLParseException::BAD_END_TAG, m_line);
00402       }
00403       return;
00404    }
00405    else if (isalpha(*m_current) || *m_current == '_')
00406    {
00407       entry.type = XMLToken::START_TAG;
00408       bool openCloseElement;
00409       if (getOpenElementName(entry, openCloseElement))
00410       {
00411          if (openCloseElement)
00412          {
00413             entry.type = XMLToken::START_TAG;
00414             m_tagIsEmpty = true;
00415          }
00416          return;
00417       }
00418    }
00419    else
00420       OW_THROWXMLLINE(XMLParseException::BAD_START_TAG, m_line);
00421    //--------------------------------------------------------------------------
00422    // Grab all the attributes:
00423    //--------------------------------------------------------------------------
00424    for (;;)
00425    {
00426       skipWhitespace();
00427       if (entry.type == XMLToken::XML_DECLARATION)
00428       {
00429          if (*m_current == '?')
00430          {
00431             ++m_current;
00432             if (*m_current == '>')
00433             {
00434                ++m_current;
00435                return;
00436             }
00437             else
00438             {
00439                OW_THROWXMLLINEMSG(
00440                   XMLParseException::BAD_ATTRIBUTE_VALUE, m_line,
00441                   Format("Expecting >; Got %1", *m_current).c_str());
00442             }
00443          }
00444       }
00445       else if (entry.type == XMLToken::START_TAG && *m_current == '/')
00446       {
00447          ++m_current;
00448          if (*m_current =='>')
00449          {
00450             entry.type = XMLToken::START_TAG;
00451             m_tagIsEmpty = true;
00452             ++m_current;
00453             return;
00454          }
00455          else
00456          {
00457             OW_THROWXMLLINEMSG(XMLParseException::BAD_ATTRIBUTE_VALUE,
00458                m_line, Format("Expecting >; Got %1", *m_current).c_str());
00459          }
00460       }
00461       else if (*m_current == '>')
00462       {
00463          ++m_current;
00464          return;
00465       }
00466       ++entry.attributeCount;
00467       XMLToken::Attribute& attr = entry.attributes[entry.attributeCount - 1];
00468       getAttributeNameAndEqual(attr);
00469       getAttributeValue(attr);
00470       if (entry.attributeCount == XMLToken::MAX_ATTRIBUTES)
00471       {
00472          OW_THROWXMLLINE(XMLParseException::TOO_MANY_ATTRIBUTES, m_line);
00473       }
00474    }
00475 }
00476 
00477 } // end namespace OW_NAMESPACE
00478 

Generated on Thu Feb 9 08:48:19 2006 for openwbem by  doxygen 1.4.6