Clover coverage report - JBind Project
Coverage timestamp: Fr Mai 28 2004 11:17:36 CEST
file stats: LOC: 217   Methods: 6
NCLOC: 155   Classes: 1
This license of Clover is provided to support the development of JBind only. Please visit http://www.thecortex.net/clover to obtain a licensed version of Clover.
 
 Source file Conditionals Statements Methods TOTAL
InputSourceParser.java 55% 57,7% 100% 58,7%
 1   
 /*
 2   
  * JBind
 3   
  *
 4   
  * Copyright (c) by Stefan Wachter. All rights reserved.
 5   
  *
 6   
  * Usage, modification, and redistribution is subject to license terms that are
 7   
  * available at 'http://www.jbind.org'. The JBind license is like the
 8   
  * 'Apache Software License V 1.1'.
 9   
  */
 10   
 package org.jbind.xml.parser;
 11   
 
 12   
 import java.io.BufferedInputStream;
 13   
 import java.io.InputStream;
 14   
 import java.io.InputStreamReader;
 15   
 import java.io.Reader;
 16   
 import java.net.URL;
 17   
 import java.nio.charset.Charset;
 18   
 import java.util.HashMap;
 19   
 import java.util.Map;
 20   
 
 21   
 import org.jbind.xml.base.InputSourceLocation;
 22   
 import org.jbind.xml.msg.XmlMessages;
 23   
 import org.xml.sax.EntityResolver;
 24   
 import org.xml.sax.InputSource;
 25   
 
 26   
 /**
 27   
  * Parser for an arbitrary input source.
 28   
  */
 29   
 public class InputSourceParser {
 30   
 
 31   
   private Parser myParser = null;
 32   
 
 33   
   private byte[] myStartBytes = new byte[4];
 34   
 
 35   
   private static final Charset myUtf8 = Charset.forName("UTF-8");
 36   
   private static final Charset myUtf16be = Charset.forName("UTF-16BE");
 37   
   private static final Charset myUtf16le = Charset.forName("UTF-16LE");
 38   
 
 39   
   private static final Map ourInternalEntities = new HashMap();
 40   
 
 41   
   static {
 42  3
     ourInternalEntities.put("lt", "<");
 43  3
     ourInternalEntities.put("gt", ">");
 44  3
     ourInternalEntities.put("amp", "&");
 45  3
     ourInternalEntities.put("apos", "'");
 46  3
     ourInternalEntities.put("quot", "\"");
 47   
   }
 48   
 
 49   
 
 50  398
   public InputSourceParser() {}
 51   
 
 52   
   /**
 53   
    * Determines the charset that was used to encode the input stream.
 54   
    *
 55   
    * @param anInputStream <i>(required)</i>. The stream is advanced over any
 56   
    * leading byte order marks after this method returned.
 57   
    * @param anErrorHandler <i>(required)</i>. Used to report errors.
 58   
    * @param anInputSource <i>(required)</i>. The source that is parsed.
 59   
    * @return <i>(required)</i>
 60   
    */
 61  462
   private Charset determineCharset(InputStream anInputStream, IErrorHandler anErrorHandler, InputSource anInputSource) throws Exception {
 62  462
     anInputStream.mark(1024);
 63  462
     if (4 != anInputStream.read(myStartBytes, 0, 4)) {
 64  0
       anErrorHandler.fatalError(XmlMessages.eof(new InputSourceLocation(anInputSource, -1, -1)));
 65  0
       throw new ParsingAbortedException();
 66   
     }
 67   
 
 68  462
     long start = myStartBytes[0] | myStartBytes[1] | myStartBytes[2] | myStartBytes[3];
 69   
 
 70  462
     Charset charset = null;
 71  462
     if (start == 0x3C3F786D) {
 72  0
       charset = myUtf8;
 73  0
       anInputStream.reset();
 74  462
     } else if (start == 0x003C003F) {
 75  0
       charset = myUtf16be;
 76  0
       anInputStream.reset();
 77  462
     } else if (start == 0x3C003F00) {
 78  0
       charset = myUtf16le;
 79  0
       anInputStream.reset();
 80  462
     } else if ((start & 0xFFFFFF00) == 0xEFBBBF00) {
 81  0
       charset = myUtf8;
 82  0
       anInputStream.reset();
 83  0
       anInputStream.read(myStartBytes, 0, 3);
 84  462
     } else if ((start & 0xFFFF0000) == 0xFEFF0000 && (start & 0xFFFF) != 0) {
 85  0
       charset = myUtf16be;
 86  0
       anInputStream.reset();
 87  0
       anInputStream.read(myStartBytes, 0, 2);
 88  462
     } else if ((start & 0xFFFF0000) == 0xFFFE0000 && (start & 0xFFFF) != 0) {
 89  0
       charset = myUtf16le;
 90  0
       anInputStream.reset();
 91  0
       anInputStream.read(myStartBytes, 0, 2);
 92  462
     } else if (start == 0x4C6FA794) {
 93  0
       anErrorHandler.fatalError(XmlMessages.unsupportedEncoding("EBCDIC", new InputSourceLocation(anInputSource, -1, -1)));
 94  0
       throw new ParsingAbortedException();
 95  462
     } else if ((start == 0x3C) || (start == 0x3C00) || (start == 0x3C0000) || (start == 0x3C000000)) {
 96  0
       anErrorHandler.fatalError(XmlMessages.unsupportedEncoding("UCS-4", new InputSourceLocation(anInputSource, -1, -1)));
 97  0
       throw new ParsingAbortedException();
 98   
     } else {
 99  462
       charset = myUtf8;
 100  462
       anInputStream.reset();
 101   
     }
 102  462
     return charset;
 103   
   }
 104   
 
 105  467
   private Parser getParser(Reader aReader) {
 106  467
     if (null == myParser) {
 107  398
       myParser = new Parser(aReader);
 108   
     } else {
 109  69
       myParser.ReInit(aReader);
 110   
     }
 111  467
     return myParser;
 112   
   }
 113   
 
 114  466
   public void parse(InputSource anInputSource, IContentHandler aContentHandler, IErrorHandler anErrorHandler, EntityResolver anEntityResolver) {
 115  466
     try {
 116  466
       doParse(anInputSource, aContentHandler, anErrorHandler, anEntityResolver, true);
 117   
     } catch (ParsingAbortedException e) {
 118   
       // ignore
 119   
     }
 120   
   }
 121   
 
 122  1
   public void xInclude(InputSource anInputSource, IContentHandler aContentHandler, IErrorHandler anErrorHandler, EntityResolver anEntityResolver) throws ParsingAbortedException {
 123  1
     doParse(anInputSource, aContentHandler, anErrorHandler, anEntityResolver, false);
 124   
   }
 125   
 
 126   
   /**
 127   
    * @param aSignalDocumentEvents Determines whether the start and end of an xml document
 128   
    * is signalled to the content handler or not. This functionallity is used
 129   
    * for XIncludes where the start and the end of an included document is not signalled.
 130   
    */
 131  467
   private synchronized void doParse(InputSource anInputSource, IContentHandler aContentHandler, IErrorHandler anErrorHandler, EntityResolver anEntityResolver, boolean aSignalDocumentEvents) throws ParsingAbortedException {
 132  467
     InputSource source = anInputSource;
 133   
 
 134  467
     try {
 135   
 
 136  467
       if ((null == anInputSource.getByteStream()) && (null == anInputSource.getCharacterStream())) {
 137  218
         if (null != anEntityResolver) {
 138  218
           source = anEntityResolver.resolveEntity(anInputSource.getPublicId(), anInputSource.getSystemId());
 139  218
           if (null == source) {
 140  218
             source = anInputSource;
 141   
           }
 142   
         }
 143   
       }
 144   
 
 145  467
       INamespaceContext namespaceContext = new NamespaceContext();
 146   
 
 147  467
       ParserContext parserContext = new ParserContext(source, aContentHandler, anErrorHandler, anEntityResolver, namespaceContext, ourInternalEntities, aSignalDocumentEvents);
 148   
 
 149  467
       if (null != source.getCharacterStream()) {
 150  5
         Parser parser = getParser(source.getCharacterStream());
 151  5
         parser.initParser(parserContext);
 152  5
         parser.document();
 153   
       } else {
 154  462
         InputStream inputStream = source.getByteStream();
 155  462
         if (null == inputStream) {
 156  218
           if (null == source.getSystemId()) {
 157  0
             anErrorHandler.fatalError(XmlMessages.invalidInputSource(new InputSourceLocation(anInputSource, -1, -1)));
 158   
           }
 159  218
           URL url = new URL(source.getSystemId());
 160  218
           inputStream = url.openStream();
 161   
         }
 162   
 
 163  462
         BufferedInputStream in = new BufferedInputStream(inputStream, 4096);
 164   
 
 165  462
         Charset charset = null;
 166  462
         if (null != source.getEncoding()) {
 167  0
           charset = Charset.forName(source.getEncoding());
 168   
         } else {
 169  462
           charset = determineCharset(in, anErrorHandler, source);
 170   
         }
 171   
 
 172  462
         in.mark(1024);
 173  462
         Reader reader = new InputStreamReader(in, charset);
 174  462
         Parser parser = getParser(reader);
 175  462
         parser.initParser(parserContext);
 176   
 
 177  462
         XmlDecl xmlDecl = parser.optionalXmlDecl();
 178  462
         if (null != xmlDecl) {
 179  0
           String encoding = xmlDecl.getEncoding();
 180  0
           if ((null != encoding) && !encoding.equals(charset.name())) {
 181   
             // the declared encoding is different from the used encoding
 182   
             // -> reset the stream and start again with the proper encoding
 183  0
             in.reset();
 184  0
             charset = Charset.forName(encoding);
 185  0
             reader = new InputStreamReader(in, charset);
 186  0
             parser.ReInit(reader);
 187   
           }
 188   
 
 189   
         }
 190   
 
 191  462
         parser.document();
 192   
 
 193   
       }
 194   
 
 195   
     } catch (ParsingAbortedException e) {
 196  0
       throw e;
 197   
     } catch (TokenMgrError e) {
 198  0
       anErrorHandler.fatalError(XmlMessages.wrappedException(e, new InputSourceLocation(source, e.getErrorLine(), e.getErrorColumn())));
 199  0
       throw new ParsingAbortedException();
 200   
     } catch (ParseException e) {
 201  0
       int line = -1;
 202  0
       int column = -1;
 203  0
       if (null != e.currentToken) {
 204  0
         line = e.currentToken.beginLine;
 205  0
         column = e.currentToken.beginColumn;
 206   
       }
 207  0
       anErrorHandler.fatalError(XmlMessages.wrappedException(e, new InputSourceLocation(source, line, column)));
 208  0
       throw new ParsingAbortedException();
 209   
     } catch (Exception e) {
 210  0
       anErrorHandler.exception(e, source);
 211  0
       throw new ParsingAbortedException();
 212   
     }
 213   
 
 214   
   }
 215   
 
 216   
 }
 217