View Javadoc

1   /*
2    * Copyright (c) 2003, Henri Yandell
3    * All rights reserved.
4    * 
5    * Redistribution and use in source and binary forms, with or 
6    * without modification, are permitted provided that the 
7    * following conditions are met:
8    * 
9    * + Redistributions of source code must retain the above copyright notice, 
10   *   this list of conditions and the following disclaimer.
11   * 
12   * + Redistributions in binary form must reproduce the above copyright notice, 
13   *   this list of conditions and the following disclaimer in the documentation 
14   *   and/or other materials provided with the distribution.
15   * 
16   * + Neither the name of XmlWriter nor the names of its contributors 
17   *   may be used to endorse or promote products derived from this software 
18   *   without specific prior written permission.
19   * 
20   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
21   * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
30   * POSSIBILITY OF SUCH DAMAGE.
31   */
32  package com.generationjava.io.xml;
33  
34  /***
35   * XML helping static methods.
36   *
37   * @author <a href="mailto:bayard@apache.org">Henri Yandell</a>
38   * @version 1.0
39   */
40  public final class XmlUtils {
41  
42      public static String escapeXml(String str) {
43          str = str.replaceAll("&","&amp;");
44          str = str.replaceAll("<","&lt;");
45          str = str.replaceAll(">","&gt;");
46          str = str.replaceAll("\"","&quot;");
47          str = str.replaceAll("'","&apos;");
48          return str;
49      }
50  
51      public static String unescapeXml(String str) {
52          str = str.replaceAll("&amp;","&");
53          str = str.replaceAll("&lt;","<");
54          str = str.replaceAll("&gt;",">");
55          str = str.replaceAll("&quot;","\"");
56          str = str.replaceAll("&apos;","'");
57          return str;
58      }
59  
60      /***
61       * Remove any xml tags from a String.
62       * Same as HtmlW's method.
63       */
64      public static String removeXml(String str) {
65          int sz = str.length();
66          StringBuffer buffer = new StringBuffer(sz);
67          boolean inString = false;
68          boolean inTag = false;
69          for(int i=0; i<sz; i++) {
70              char ch = str.charAt(i);
71              if(ch == '<') {
72                  inTag = true;
73              } else
74              if(ch == '>') {
75                  inTag = false;
76                  continue;
77              }
78              if(!inTag) {
79                  buffer.append(ch);
80              }
81          }
82          return buffer.toString();
83      }
84  
85      public static String getContent(String tag, String text) {
86          int idx = XmlUtils.getIndexOpeningTag(tag, text);
87          if(idx == -1) {
88              return "";
89          }
90          text = text.substring(idx);
91          int end = XmlUtils.getIndexClosingTag(tag, text);
92          idx = text.indexOf('>');
93          if(idx == -1) {
94              return "";
95          }
96          return text.substring(idx+1, end);
97      }
98  
99      public static int getIndexOpeningTag(String tag, String text) {
100         return getIndexOpeningTag(tag, text, 0);
101     }
102     private static int getIndexOpeningTag(String tag, String text, int start) {
103         // consider whitespace?
104         int idx = text.indexOf("<"+tag, start);
105         if(idx == -1) {
106             return -1;
107         }
108         char next = text.charAt(idx+1+tag.length());
109         if( (next == '>') || Character.isWhitespace(next) ) {
110             return idx;
111         } else {
112             return getIndexOpeningTag(tag, text, idx+1);
113         }
114     }
115 
116     // Pass in "para" and a string that starts with 
117     // <para> and it will return the index of the matching </para>
118     // It assumes well-formed xml. Or well enough.
119     public static int getIndexClosingTag(String tag, String text) {
120         return getIndexClosingTag(tag, text, 0);
121     }
122     public static int getIndexClosingTag(String tag, String text, int start) {
123         String open = "<"+tag;
124         String close = "</"+tag+">";
125 //        System.err.println("OPEN: "+open);
126 //        System.err.println("CLOSE: "+close);
127         int closeSz = close.length();
128         int nextCloseIdx = text.indexOf(close, start);
129 //        System.err.println("first close: "+nextCloseIdx);
130         if(nextCloseIdx == -1) {
131             return -1;
132         }
133         int count = XmlUtils.countMatches(text.substring(start, nextCloseIdx), open);
134 //        System.err.println("count: "+count);
135         if(count == 0) {
136             return -1;  // tag is never opened
137         }
138         int expected = 1;
139         while(count != expected) {
140             nextCloseIdx = text.indexOf(close, nextCloseIdx+closeSz);
141             if(nextCloseIdx == -1) {
142                 return -1;
143             }
144             count = XmlUtils.countMatches(text.substring(start, nextCloseIdx), open);
145             expected++;
146         }
147         return nextCloseIdx;
148     }
149 
150     public static String getAttribute(String attribute, String text) {
151         return getAttribute(attribute, text, 0);
152     }
153     public static String getAttribute(String attribute, String text, int idx) {
154          int close = text.indexOf(">", idx);
155          int attrIdx = text.indexOf(attribute+"=\"", idx);
156          if(attrIdx == -1) {
157              return null;
158          }
159          if(attrIdx > close) {
160              return null;
161          }
162          int attrStartIdx = attrIdx + attribute.length() + 2;
163          int attrCloseIdx = text.indexOf("\"", attrStartIdx);
164          if(attrCloseIdx > close) {
165              return null;
166          }
167          return unescapeXml(text.substring(attrStartIdx, attrCloseIdx));
168     }
169 
170     // Taken from Commons Lang StringUtils 2.x
171     private static int countMatches(String str, String sub) {
172         if (str == null || str.length() == 0 || sub == null || sub.length() == 0) {
173             return 0;
174         }
175         int count = 0;
176         int idx = 0;
177         while ((idx = str.indexOf(sub, idx)) != -1) {
178             count++;
179             idx += sub.length();
180         }
181         return count;
182     }
183 
184 
185 }