View Javadoc

1   /*
2    * Copyright (c) 2003, Henri Yandell
3    * All rights reserved.
4    * 
5    * Redistribution and use in source and binary forms, with or 
6    * without modification, are permitted provided that the 
7    * following conditions are met:
8    * 
9    * + Redistributions of source code must retain the above copyright notice, 
10   *   this list of conditions and the following disclaimer.
11   * 
12   * + Redistributions in binary form must reproduce the above copyright notice, 
13   *   this list of conditions and the following disclaimer in the documentation 
14   *   and/or other materials provided with the distribution.
15   * 
16   * + Neither the name of Genjava-Core nor the names of its contributors 
17   *   may be used to endorse or promote products derived from this software 
18   *   without specific prior written permission.
19   * 
20   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
21   * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
22   * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
23   * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
24   * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
25   * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
26   * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
27   * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
28   * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
29   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
30   * POSSIBILITY OF SUCH DAMAGE.
31   */
32  package com.generationjava.compare;
33  
34  import java.util.Comparator;
35  
36  /***
37   * A Comparator which deals with alphabet characters 'naturally', but 
38   * deals with numerics numerically. Leading 0's are ignored numerically,
39   * but do come into play if the number is equal. Thus aaa119yyyy comes before 
40   * aaa0119xxxx regardless of x or y.
41   *
42   * The comparison should be very performant as it only ever deals with 
43   * issues at a character level and never tries to consider the 
44   * numerics as numbers.
45   *
46   * @author bayard@generationjava.com
47   */
48  public class NumericStringComparator implements Comparator {
49  
50      public NumericStringComparator() {
51      }
52  
53      public int compare(Object o1, Object o2) {
54  // System.out.println("Comparing: "+o1+" and "+o2);
55          if(o1 == null) {
56              return 1;
57          } else
58          if(o2 == null) {
59              return -1;
60          }
61  
62          String s1 = o1.toString();
63          String s2 = o2.toString();
64  
65          // find the first digit.
66          int idx1 = getFirstDigitIndex(s1);
67          int idx2 = getFirstDigitIndex(s2);
68  
69          if( ( idx1 == -1 )   || 
70              ( idx2 == -1 ) ||
71              ( !s1.substring(0,idx1).equals(s2.substring(0,idx2)) )
72            )
73          {
74  // System.out.println("Shortcutted. ");
75              return s1.compareTo(s2);
76          }
77  
78          // find the last digit
79          int edx1 = getLastDigitIndex(s1, idx1);
80          int edx2 = getLastDigitIndex(s2, idx2);
81  
82          String sub1 = null;
83          String sub2 = null;
84  
85          if(edx1 == -1) {
86              sub1 = s1.substring(idx1);
87          } else {
88              sub1 = s1.substring(idx1, edx1);
89          }
90  
91          if(edx2 == -1) {
92              sub2 = s2.substring(idx2);
93          } else {
94              sub2 = s2.substring(idx2, edx2);
95          }
96  
97          // deal with zeros at start of each number
98          int zero1 = countZeroes(sub1);
99          int zero2 = countZeroes(sub2);
100 
101         sub1 = sub1.substring(zero1);
102         sub2 = sub2.substring(zero2);
103 
104         // if equal, then recurse with the rest of the string
105         // need to deal with zeroes so that 00119 appears after 119
106         if(sub1.equals(sub2)) {
107             int ret = 0;
108             if(zero1 > zero2) {
109                 ret = 1;
110             } else
111             if(zero1 < zero2) {
112                 ret = -1;
113             }
114 // System.out.println("EDXs: "+edx1+" & "+edx2);
115             if(edx1 == -1) {
116                 s1 = "";
117             } else {
118                 s1 = s1.substring(edx1);
119             }
120             if(edx2 == -1) {
121                 s2 = "";
122             } else {
123                 s2 = s2.substring(edx2);
124             }
125 
126             int comp = compare(s1, s2);
127             if(comp != 0) {
128                 ret = comp;
129             }
130 // System.out.println("Dealt with rest of string: "+ret);
131             return ret;
132         } else {
133             // if a numerical string is smaller in length than another
134             // then it must be less. 
135             if(sub1.length() != sub2.length()) {
136 // System.out.println("Ahah, different length. ");
137                 return ( sub1.length() < sub2.length() ) ? -1 : 1;
138             }
139         }
140 
141 
142         // now we get to do the string based numerical thing :)
143         // going to assume that the individual character for the 
144         // number has the right order. ie) '9' > '0'
145         // possibly bad in i18n.
146         char[] chr1 = sub1.toCharArray();
147         char[] chr2 = sub2.toCharArray();
148 
149         int sz = chr1.length;
150         for(int i=0; i<sz; i++) {
151             // this should give better speed
152             if(chr1[i] != chr2[i]) {
153 // System.out.println("Length is different. ");
154                 return (chr1[i] < chr2[i]) ? -1 : 1;
155             }
156         }
157 
158 // System.out.println("Default. Boo. ");
159         return 0;
160     }
161 
162     private int getFirstDigitIndex(String str) {
163         return getFirstDigitIndex(str, 0);
164     }
165     private int getFirstDigitIndex(String str, int start) {
166         return getFirstDigitIndex(str.toCharArray(), start);
167     }
168     private int getFirstDigitIndex(char[] chrs, int start) {
169         int sz = chrs.length;
170 
171         for(int i=start; i<sz; i++) {
172             if(Character.isDigit(chrs[i])) {
173                 return i;
174             }
175         }
176 
177         return -1;
178     }
179 
180     private int getLastDigitIndex(String str, int start) {
181         return getLastDigitIndex(str.toCharArray(), start);
182     }
183     private int getLastDigitIndex(char[] chrs, int start) {
184         int sz = chrs.length;
185 
186         for(int i=start; i<sz; i++) {
187             if(!Character.isDigit(chrs[i])) {
188                 return i;
189             }
190         }
191 
192         return -1;
193     }
194 
195     public int countZeroes(String str) {
196         int count = 0;
197 
198         // assuming str is small...
199         for(int i=0; i<str.length(); i++) {
200             if(str.charAt(i) == '0') {
201                 count++;
202             } else {
203                 break;
204             }
205         }
206 
207         return count;
208     }
209 
210     // UNUSED
211     private boolean containsOnly(String str, char ch) {
212         return containsOnly( str.toCharArray(), ch );
213     }
214     private boolean containsOnly(char[] chrs, char ch) {
215         int sz = chrs.length;
216 
217         for(int i=0; i<sz; i++) {
218             if(chrs[i] != ch) {
219                 return false;
220             }
221         }
222 
223         return true;
224     }
225         
226 }