1 /*
2 * Copyright (c) 2003, Henri Yandell
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or
6 * without modification, are permitted provided that the
7 * following conditions are met:
8 *
9 * + Redistributions of source code must retain the above copyright notice,
10 * this list of conditions and the following disclaimer.
11 *
12 * + Redistributions in binary form must reproduce the above copyright notice,
13 * this list of conditions and the following disclaimer in the documentation
14 * and/or other materials provided with the distribution.
15 *
16 * + Neither the name of Genjava-Core nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32 package com.generationjava.compare;
33
34 import java.util.Comparator;
35
36 /***
37 * A Comparator which deals with alphabet characters 'naturally', but
38 * deals with numerics numerically. Leading 0's are ignored numerically,
39 * but do come into play if the number is equal. Thus aaa119yyyy comes before
40 * aaa0119xxxx regardless of x or y.
41 *
42 * The comparison should be very performant as it only ever deals with
43 * issues at a character level and never tries to consider the
44 * numerics as numbers.
45 *
46 * @author bayard@generationjava.com
47 */
48 public class NumericStringComparator implements Comparator {
49
50 public NumericStringComparator() {
51 }
52
53 public int compare(Object o1, Object o2) {
54 // System.out.println("Comparing: "+o1+" and "+o2);
55 if(o1 == null) {
56 return 1;
57 } else
58 if(o2 == null) {
59 return -1;
60 }
61
62 String s1 = o1.toString();
63 String s2 = o2.toString();
64
65 // find the first digit.
66 int idx1 = getFirstDigitIndex(s1);
67 int idx2 = getFirstDigitIndex(s2);
68
69 if( ( idx1 == -1 ) ||
70 ( idx2 == -1 ) ||
71 ( !s1.substring(0,idx1).equals(s2.substring(0,idx2)) )
72 )
73 {
74 // System.out.println("Shortcutted. ");
75 return s1.compareTo(s2);
76 }
77
78 // find the last digit
79 int edx1 = getLastDigitIndex(s1, idx1);
80 int edx2 = getLastDigitIndex(s2, idx2);
81
82 String sub1 = null;
83 String sub2 = null;
84
85 if(edx1 == -1) {
86 sub1 = s1.substring(idx1);
87 } else {
88 sub1 = s1.substring(idx1, edx1);
89 }
90
91 if(edx2 == -1) {
92 sub2 = s2.substring(idx2);
93 } else {
94 sub2 = s2.substring(idx2, edx2);
95 }
96
97 // deal with zeros at start of each number
98 int zero1 = countZeroes(sub1);
99 int zero2 = countZeroes(sub2);
100
101 sub1 = sub1.substring(zero1);
102 sub2 = sub2.substring(zero2);
103
104 // if equal, then recurse with the rest of the string
105 // need to deal with zeroes so that 00119 appears after 119
106 if(sub1.equals(sub2)) {
107 int ret = 0;
108 if(zero1 > zero2) {
109 ret = 1;
110 } else
111 if(zero1 < zero2) {
112 ret = -1;
113 }
114 // System.out.println("EDXs: "+edx1+" & "+edx2);
115 if(edx1 == -1) {
116 s1 = "";
117 } else {
118 s1 = s1.substring(edx1);
119 }
120 if(edx2 == -1) {
121 s2 = "";
122 } else {
123 s2 = s2.substring(edx2);
124 }
125
126 int comp = compare(s1, s2);
127 if(comp != 0) {
128 ret = comp;
129 }
130 // System.out.println("Dealt with rest of string: "+ret);
131 return ret;
132 } else {
133 // if a numerical string is smaller in length than another
134 // then it must be less.
135 if(sub1.length() != sub2.length()) {
136 // System.out.println("Ahah, different length. ");
137 return ( sub1.length() < sub2.length() ) ? -1 : 1;
138 }
139 }
140
141
142 // now we get to do the string based numerical thing :)
143 // going to assume that the individual character for the
144 // number has the right order. ie) '9' > '0'
145 // possibly bad in i18n.
146 char[] chr1 = sub1.toCharArray();
147 char[] chr2 = sub2.toCharArray();
148
149 int sz = chr1.length;
150 for(int i=0; i<sz; i++) {
151 // this should give better speed
152 if(chr1[i] != chr2[i]) {
153 // System.out.println("Length is different. ");
154 return (chr1[i] < chr2[i]) ? -1 : 1;
155 }
156 }
157
158 // System.out.println("Default. Boo. ");
159 return 0;
160 }
161
162 private int getFirstDigitIndex(String str) {
163 return getFirstDigitIndex(str, 0);
164 }
165 private int getFirstDigitIndex(String str, int start) {
166 return getFirstDigitIndex(str.toCharArray(), start);
167 }
168 private int getFirstDigitIndex(char[] chrs, int start) {
169 int sz = chrs.length;
170
171 for(int i=start; i<sz; i++) {
172 if(Character.isDigit(chrs[i])) {
173 return i;
174 }
175 }
176
177 return -1;
178 }
179
180 private int getLastDigitIndex(String str, int start) {
181 return getLastDigitIndex(str.toCharArray(), start);
182 }
183 private int getLastDigitIndex(char[] chrs, int start) {
184 int sz = chrs.length;
185
186 for(int i=start; i<sz; i++) {
187 if(!Character.isDigit(chrs[i])) {
188 return i;
189 }
190 }
191
192 return -1;
193 }
194
195 public int countZeroes(String str) {
196 int count = 0;
197
198 // assuming str is small...
199 for(int i=0; i<str.length(); i++) {
200 if(str.charAt(i) == '0') {
201 count++;
202 } else {
203 break;
204 }
205 }
206
207 return count;
208 }
209
210 // UNUSED
211 private boolean containsOnly(String str, char ch) {
212 return containsOnly( str.toCharArray(), ch );
213 }
214 private boolean containsOnly(char[] chrs, char ch) {
215 int sz = chrs.length;
216
217 for(int i=0; i<sz; i++) {
218 if(chrs[i] != ch) {
219 return false;
220 }
221 }
222
223 return true;
224 }
225
226 }