00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 
00017 
00018 
00019 
00020 
00021 
00022 
00023 
00024 
00025 
00026 
00027 
00028 
00029 
00030 
00035 #include "OW_config.h"
00036 #include "OW_XMLParserCore.hpp"
00037 #include "OW_Format.hpp"
00038 #include "OW_XMLParseException.hpp"
00039 
00040 #include <cctype>
00041 
00042 namespace OW_NAMESPACE
00043 {
00044 
00045 
00046 
00047 
00048 
00050 
00051 
00052 
00054 bool XMLParserCore::next(XMLToken& entry)
00055 {
00056    IstreamBufIterator iterEOF;
00057    if (m_current == iterEOF || *m_current == 0)
00058    {
00059       if (!m_stack.empty())
00060       {
00061          OW_THROWXMLLINE(XMLParseException::UNCLOSED_TAGS, m_line);
00062       }
00063       return false;
00064    }
00065    
00066    
00067    
00068    if (m_tagIsEmpty)
00069    {
00070       m_tagIsEmpty = false;
00071       entry.type = XMLToken::END_TAG;
00072       entry.attributeCount = 0;
00073       return true;
00074    }
00075    
00076    if (*m_current == '<')
00077    {
00078       
00079       skipWhitespace();
00080       m_current++;
00081       getElement(entry);
00082       if (entry.type == XMLToken::START_TAG)
00083       {
00084          if (m_stack.empty() && m_foundRoot)
00085          {
00086             OW_THROWXMLLINE(XMLParseException::MULTIPLE_ROOTS, m_line);
00087          }
00088          m_foundRoot = true;
00089          if (!m_tagIsEmpty)
00090          {
00091             m_stack.push(entry.text.toString());
00092          }
00093       }
00094       else if (entry.type == XMLToken::END_TAG)
00095       {
00096          if (m_stack.empty())
00097          {
00098             OW_THROWXMLLINE(XMLParseException::START_END_MISMATCH, m_line);
00099          }
00100          if (m_stack.top() != entry.text.toString())
00101          {
00102             OW_THROWXMLLINE(XMLParseException::START_END_MISMATCH, m_line);
00103          }
00104          m_stack.pop();
00105       }
00106       return true;
00107    }
00108    else
00109    {
00110       entry.type = XMLToken::CONTENT;
00111       
00112       
00113       getContent(entry);
00114       
00115       
00116          
00117       
00118       
00119       return true;
00120    }
00121 }
00122 
00123 
00124 
00125 
00126 
00127 
00128 void XMLParserCore::skipWhitespace()
00129 {
00130    while (isspace(*m_current))
00131    {
00132       if (*m_current == '\n')
00133       {
00134          ++m_line;
00135       }
00136       ++m_current;
00137    }
00138 }
00139 
00140 inline bool isNameChar(char c)
00141 {
00142    return isalnum(c) || c == '_' || c == '-' ||
00143           c == ':' || c == '.';
00144 }
00145 
00146 bool XMLParserCore::getElementName(XMLToken& entry)
00147 {
00148    if (!isalpha(*m_current) && *m_current != '_')
00149    {
00150       OW_THROWXMLLINE(XMLParseException::BAD_START_TAG, m_line);
00151    }
00152    entry.text.reset();
00153    while (isNameChar(*m_current))
00154    {
00155       entry.text += *m_current++;
00156    }
00157    
00158    skipWhitespace();
00159    
00160    if (*m_current == '>')
00161    {
00162       ++m_current;
00163       return true;
00164    }
00165    return false;
00166 }
00167 
00168 bool XMLParserCore::getOpenElementName(XMLToken& entry, bool& openCloseElement)
00169 {
00170    openCloseElement = false;
00171    if (getElementName(entry))
00172    {
00173       return true;
00174    }
00175    if (*m_current == '/')
00176    {
00177       ++m_current;
00178       if (*m_current == '>')
00179       {
00180          openCloseElement = true;
00181          ++m_current;
00182          return true;
00183       }
00184    }
00185    return false;
00186 }
00187 void XMLParserCore::getAttributeNameAndEqual(XMLToken::Attribute& att)
00188 {
00189    if (!isalpha(*m_current) && *m_current != '_')
00190    {
00191       OW_THROWXMLLINEMSG(XMLParseException::BAD_ATTRIBUTE_NAME,
00192          m_line, Format("Expected alpha or _; got %1", *m_current).c_str());
00193    }
00194    att.name.reset();
00195    while (isalnum(*m_current) || *m_current == '_' || *m_current == '-' ||
00196           *m_current == ':' || *m_current == '.')
00197    {
00198       att.name += *m_current++;
00199    }
00200    skipWhitespace();
00201    if (*m_current != '=')
00202    {
00203       OW_THROWXMLLINEMSG(XMLParseException::BAD_ATTRIBUTE_NAME,
00204          m_line, Format("Expected =; got %1", *m_current).c_str());
00205    }
00206    m_current++;
00207    skipWhitespace();
00208 }
00209 void XMLParserCore::getAttributeValue(XMLToken::Attribute& att)
00210 {
00211    
00212    if (*m_current != '"' && *m_current != '\'')
00213    {
00214       OW_THROWXMLLINEMSG(XMLParseException::BAD_ATTRIBUTE_VALUE,
00215          m_line, Format("Expecting \" or '; got %1", *m_current).c_str());
00216    }
00217    char startChar = *m_current++;
00218    att.value.reset();
00219    while (*m_current && *m_current != startChar)
00220    {
00221       att.value += *m_current++;
00222    }
00223       
00224    if (*m_current != startChar)
00225    {
00226       OW_THROWXMLLINEMSG(XMLParseException::BAD_ATTRIBUTE_VALUE,
00227          m_line, Format("Expecting %1; Got %2", startChar, static_cast<int>(*m_current)).c_str());
00228    }
00229    ++m_current;
00230 }
00231 void XMLParserCore::getComment()
00232 {
00233    
00234    for (; *m_current; m_current++)
00235    {
00236       if (*m_current == '-')
00237       {
00238          ++m_current;
00239          if (*m_current == '-')
00240          {
00241             ++m_current;
00242             if (*m_current == '>')
00243             {
00244                ++m_current;
00245                return;
00246             }
00247             else
00248             {
00249                OW_THROWXMLLINE(
00250                      XMLParseException::MINUS_MINUS_IN_COMMENT, m_line);
00251             }
00252          }
00253       }
00254    }
00255    
00256    OW_THROWXMLLINE(XMLParseException::UNTERMINATED_COMMENT, m_line);
00257 }
00258 void XMLParserCore::getCData(XMLToken& entry)
00259 {
00260    
00261    entry.text.reset();
00262    for (; *m_current; m_current++)
00263    {
00264       if (*m_current == ']')
00265       {
00266          ++m_current;
00267          if (*m_current == ']')
00268          {
00269             ++m_current;
00270             if (*m_current == '>')
00271             {
00272                ++m_current;
00273                return;
00274             }
00275             else
00276             {
00277                entry.text += ']';
00278                entry.text += ']';
00279             }
00280          }
00281          else
00282          {
00283             entry.text += ']';
00284          }
00285       }
00286       if (*m_current == '\n')
00287       {
00288          ++m_line;
00289       }
00290       entry.text += *m_current;
00291    }
00292    
00293    OW_THROWXMLLINE(XMLParseException::UNTERMINATED_CDATA, m_line);
00294 }
00295 void XMLParserCore::getDocType()
00296 {
00297    
00298    for (; *m_current && *m_current != '>'; ++m_current)
00299    {
00300       if (*m_current == '\n')
00301       {
00302          ++m_line;
00303       }
00304    }
00305    if (*m_current != '>')
00306    {
00307       OW_THROWXMLLINE(XMLParseException::UNTERMINATED_DOCTYPE, m_line);
00308    }
00309    m_current++;
00310 }
00311 
00312 void XMLParserCore::getContent(XMLToken& entry)
00313 {
00314    entry.text.reset();
00315    
00316    while (*m_current && *m_current != '<')
00317    {
00318       if (*m_current == '\n')
00319       {
00320          ++m_line;
00321       }
00322       
00323       
00324       
00325       
00326       
00327       entry.text += *m_current++;
00328    }
00329 }
00330 void XMLParserCore::getElement(XMLToken& entry)
00331 {
00332    entry.attributeCount = 0;
00333    entry.text.reset();
00334    
00335    
00336    
00337    if (*m_current == '?')
00338    {
00339       entry.type = XMLToken::XML_DECLARATION;
00340       ++m_current;
00341       if (getElementName(entry))
00342       {
00343          return;
00344       }
00345    }
00346    else if (*m_current == '!')
00347    {
00348       m_current++;
00349       
00350       if (*m_current == '-')
00351       {
00352          ++m_current;
00353          if (*m_current == '-')
00354          {
00355             ++m_current;
00356             entry.type = XMLToken::COMMENT;
00357             getComment();
00358             return;
00359          }
00360       }
00361       else if (*m_current == '[')
00362       {
00363          char string[] = "CDATA[";
00364          char *curChar = string;
00365          m_current++;
00366          while (*curChar)
00367          {
00368             if (*curChar++ != *m_current++)
00369             {
00370                OW_THROWXMLLINE(XMLParseException::EXPECTED_COMMENT_OR_CDATA, m_line);
00371             }
00372          }
00373          entry.type = XMLToken::CDATA;
00374          getCData(entry);
00375          return;
00376       }
00377       else if (*m_current == 'D')
00378       {
00379          char string[] = "OCTYPE";
00380          char *curChar = string;
00381          m_current++;
00382          while (*curChar)
00383          {
00384             if (*curChar++ != *m_current++)
00385             {
00386                OW_THROWXMLLINE(XMLParseException::EXPECTED_COMMENT_OR_CDATA, m_line);
00387             }
00388          }
00389          entry.type = XMLToken::DOCTYPE;
00390          getDocType();
00391          return;
00392       }
00393       OW_THROWXMLLINE(XMLParseException::EXPECTED_COMMENT_OR_CDATA, m_line);
00394    }
00395    else if (*m_current == '/')
00396    {
00397       entry.type = XMLToken::END_TAG;
00398       ++m_current;
00399       if (!getElementName(entry))
00400       {
00401          OW_THROWXMLLINE(XMLParseException::BAD_END_TAG, m_line);
00402       }
00403       return;
00404    }
00405    else if (isalpha(*m_current) || *m_current == '_')
00406    {
00407       entry.type = XMLToken::START_TAG;
00408       bool openCloseElement;
00409       if (getOpenElementName(entry, openCloseElement))
00410       {
00411          if (openCloseElement)
00412          {
00413             entry.type = XMLToken::START_TAG;
00414             m_tagIsEmpty = true;
00415          }
00416          return;
00417       }
00418    }
00419    else
00420       OW_THROWXMLLINE(XMLParseException::BAD_START_TAG, m_line);
00421    
00422    
00423    
00424    for (;;)
00425    {
00426       skipWhitespace();
00427       if (entry.type == XMLToken::XML_DECLARATION)
00428       {
00429          if (*m_current == '?')
00430          {
00431             ++m_current;
00432             if (*m_current == '>')
00433             {
00434                ++m_current;
00435                return;
00436             }
00437             else
00438             {
00439                OW_THROWXMLLINEMSG(
00440                   XMLParseException::BAD_ATTRIBUTE_VALUE, m_line,
00441                   Format("Expecting >; Got %1", *m_current).c_str());
00442             }
00443          }
00444       }
00445       else if (entry.type == XMLToken::START_TAG && *m_current == '/')
00446       {
00447          ++m_current;
00448          if (*m_current =='>')
00449          {
00450             entry.type = XMLToken::START_TAG;
00451             m_tagIsEmpty = true;
00452             ++m_current;
00453             return;
00454          }
00455          else
00456          {
00457             OW_THROWXMLLINEMSG(XMLParseException::BAD_ATTRIBUTE_VALUE,
00458                m_line, Format("Expecting >; Got %1", *m_current).c_str());
00459          }
00460       }
00461       else if (*m_current == '>')
00462       {
00463          ++m_current;
00464          return;
00465       }
00466       ++entry.attributeCount;
00467       XMLToken::Attribute& attr = entry.attributes[entry.attributeCount - 1];
00468       getAttributeNameAndEqual(attr);
00469       getAttributeValue(attr);
00470       if (entry.attributeCount == XMLToken::MAX_ATTRIBUTES)
00471       {
00472          OW_THROWXMLLINE(XMLParseException::TOO_MANY_ATTRIBUTES, m_line);
00473       }
00474    }
00475 }
00476 
00477 } 
00478