001 /*
002 * Java Genetic Algorithm Library (jenetics-1.5.0).
003 * Copyright (c) 2007-2013 Franz Wilhelmstötter
004 *
005 * Licensed under the Apache License, Version 2.0 (the "License");
006 * you may not use this file except in compliance with the License.
007 * You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 *
017 * Author:
018 * Franz Wilhelmstötter (franz.wilhelmstoetter@gmx.at)
019 */
020 package org.jenetics.util;
021
022 import static java.lang.String.format;
023 import static java.util.Objects.requireNonNull;
024 import static org.jenetics.util.object.eq;
025 import static org.jenetics.util.object.hashCodeOf;
026
027 import java.io.Serializable;
028 import java.util.Arrays;
029 import java.util.Iterator;
030 import java.util.NoSuchElementException;
031 import java.util.regex.PatternSyntaxException;
032
033 import javolution.lang.Immutable;
034
035 /**
036 * This class is used for holding the valid characters of an
037 * {@link org.jenetics.CharacterGene}. It is not a character sequence in the
038 * classical sense. The characters of this sequence are sorted and doesn't
039 * contain duplicate values, like a set.
040 *
041 * [code]
042 * final CharSeq cs1 = new CharSeq("abcdeaafg");
043 * final CharSeq cs2 = new CharSeq("gfedcbabb");
044 * assert(cs1.equals(cs2));
045 * [/code]
046 *
047 * @author <a href="mailto:franz.wilhelmstoetter@gmx.at">Franz Wilhelmstötter</a>
048 * @since 1.0
049 * @version 1.2 — <em>$Date: 2013-12-02 $</em>
050 */
051 public final class CharSeq
052 extends AbstractCharSeq
053 implements
054 CharSequence,
055 ISeq<Character>,
056 Comparable<CharSeq>,
057 Immutable,
058 Serializable
059 {
060 private static final long serialVersionUID = 2L;
061
062 /**
063 * Create a new (distinct) CharSeq from the given {@code characters}. The
064 * given {@link CharSequence} is sorted and duplicate values are removed
065 *
066 * @see #CharSeq(CharSequence)
067 *
068 * @param characters the characters.
069 * @throws NullPointerException if the {@code characters} are {@code null}.
070 */
071 public CharSeq(final char[] characters) {
072 super(distinct(characters.clone()));
073 }
074
075 /**
076 * Create a new (distinct) CharSeq from the given {@code characters}. The
077 * given {@link CharSequence} is sorted and duplicate values are removed.
078 *
079 * @param characters the characters.
080 * @throws NullPointerException if the {@code characters} are {@code null}.
081 */
082 public CharSeq(final CharSequence characters) {
083 this(toCharArray(characters));
084 }
085
086 private static char[] toCharArray(final CharSequence characters) {
087 requireNonNull(characters, "Characters");
088
089 final char[] chars = new char[characters.length()];
090 for (int i = 0; i < characters.length(); ++i) {
091 chars[i] = characters.charAt(i);
092 }
093
094 return chars;
095 }
096
097 private static char[] distinct(final char[] chars) {
098 char[] result = chars;
099
100 if (chars.length > 0) {
101 Arrays.sort(result);
102
103 int nextIndex = 0;
104 int count = 1;
105 char last = result[0];
106
107 for (int i = 1; i < result.length; ++i) {
108 while (nextIndex < result.length && result[nextIndex] == last) {
109 ++nextIndex;
110 }
111 if (nextIndex < result.length) {
112 last = result[nextIndex];
113 result[i] = last;
114 ++count;
115 }
116 }
117
118 char[] array = new char[count];
119 System.arraycopy(result, 0, array, 0, count);
120 result = array;
121 }
122
123 return result;
124 }
125
126 @Override
127 public boolean contains(final Object object) {
128 boolean contains = false;
129 if (object instanceof Character) {
130 contains = contains((Character)object);
131 }
132
133 return contains;
134 }
135
136 /**
137 * Test whether this character set contains the given character {@code c}.
138 *
139 * @param c the character to test.
140 * @return {@code true} if this character set contains the given character,
141 * {@code false} otherwise.
142 * @throws NullPointerException if the given character {@code c} is
143 * {@code null}.
144 */
145 public boolean contains(final Character c) {
146 return contains(c.charValue());
147 }
148
149 /**
150 * Test whether this character set contains the given character {@code c}.
151 *
152 * @param c the character to test.
153 * @return {@code true} if this character set contains the given character,
154 * {@code false} otherwise.
155 */
156 public boolean contains(final char c) {
157 return Arrays.binarySearch(_characters, c) >= 0;
158 }
159
160 @Override
161 public char charAt(int index) {
162 return _characters[index];
163 }
164
165 @Override
166 public int length() {
167 return _characters.length;
168 }
169
170 @Override
171 public CharSeq subSequence(int start, int end) {
172 return new CharSeq(new String(_characters, start, end - start));
173 }
174
175 /**
176 * Test whether this character set is empty.
177 *
178 * @return {@code true} if this character set is empty, {@code false}
179 * otherwise.
180 */
181 public boolean isEmpty() {
182 return _characters.length == 0;
183 }
184
185 @Override
186 public Iterator<Character> iterator() {
187 return new Iterator<Character>() {
188 private int _pos = 0;
189 @Override public boolean hasNext() {
190 return _pos < _characters.length;
191 }
192 @Override public Character next() {
193 if (!hasNext()) {
194 throw new NoSuchElementException(format(
195 "Index %s is out of range [0, %s)",
196 _pos, _characters.length
197 ));
198 }
199 return _characters[_pos++];
200 }
201 @Override public void remove() {
202 throw new UnsupportedOperationException();
203 }
204 };
205 }
206
207 @Override
208 public int hashCode() {
209 return hashCodeOf(getClass()).and(_characters).value();
210 }
211
212 @Override
213 public boolean equals(final Object object) {
214 if (object == this) {
215 return true;
216 }
217 if (!(object instanceof CharSeq)) {
218 return false;
219 }
220
221 final CharSeq ch = (CharSeq)object;
222 return eq(_characters, ch._characters);
223 }
224
225 @Override
226 public int compareTo(final CharSeq set) {
227 int result = 0;
228
229 final int n = Math.min(_characters.length, set._characters.length);
230 for (int i = 0; i < n && result == 0; ++i) {
231 result = _characters[i] - set._characters[i];
232 }
233 if (result == 0) {
234 result = _characters.length - set._characters.length;
235 }
236
237 return result;
238 }
239
240 @Override
241 public String toString() {
242 return new String(_characters);
243 }
244
245 /**
246 * Expands the character range for the given {@code pattern}. E.g
247 * {@code a-zA-Z0-1} will return a string containing all upper and lower
248 * case characters (from a to z) and all digits form 0 to 9.
249 *
250 * @param pattern the {@code pattern} to expand.
251 * @return the expanded pattern.
252 * @throws PatternSyntaxException if the pattern could not be expanded.
253 * @throws NullPointerException if the pattern is {@code null}.
254 */
255 public static String expand(final CharSequence pattern) {
256 requireNonNull(pattern, "Pattern");
257 final StringBuilder out = new StringBuilder();
258
259 for (int i = 0, n = pattern.length(); i < n; ++i) {
260 if (pattern.charAt(i) == '\\') {
261 ++i;
262 if (i < pattern.length()) {
263 out.append(pattern.charAt(i));
264 }
265 } else if (pattern.charAt(i) == '-') {
266 if (i <= 0 || i >= (pattern.length() - 1)) {
267 throw new PatternSyntaxException(
268 "Dangling range operator '-'", pattern.toString(),
269 pattern.length() - 1
270 );
271 }
272
273 final String range = expand(
274 pattern.charAt(i - 1),
275 pattern.charAt(i + 1)
276 );
277 out.append(range);
278
279 ++i;
280 } else if (i + 1 == n || pattern.charAt(i + 1) != '-') {
281 out.append(pattern.charAt(i));
282 }
283 }
284
285 return out.toString();
286 }
287
288 /**
289 * Expands the characters between {@code a} and {@code b}.
290 *
291 * @param a the start character.
292 * @param b the stop character.
293 * @return the expanded characters.
294 */
295 public static String expand(final char a, final char b) {
296 final StringBuilder out = new StringBuilder();
297
298 if (a < b) {
299 char c = a;
300 while (c <= b) {
301 out.append(c);
302 c = (char) (c + 1);
303 }
304 } else if (a > b) {
305 char c = a;
306 while (c >= b) {
307 out.append(c);
308 c = (char) (c - 1);
309 }
310 }
311
312 return out.toString();
313 }
314
315 /**
316 * Expands the character range for the given {@code pattern}. E.g
317 * {@code a-zA-Z0-1} will return a string containing all upper and lower
318 * case characters (from a to z) and all digits form 0 to 9.
319 *
320 * @see #expand(CharSequence)
321 *
322 * @param pattern the {@code pattern} to expand.
323 * @return the expanded pattern.
324 * @throws PatternSyntaxException if the pattern could not be expanded.
325 * @throws NullPointerException if the pattern is {@code null}.
326 */
327 public static CharSeq valueOf(final CharSequence pattern) {
328 return new CharSeq(expand(pattern));
329 }
330
331 /**
332 * Expands the characters between {@code a} and {@code b}.
333 *
334 * @see #expand(char, char)
335 *
336 * @param a the start character.
337 * @param b the stop character.
338 * @return the expanded characters.
339 */
340 public static CharSeq valueOf(final char a, final char b) {
341 return new CharSeq(expand(a, b));
342 }
343
344 /**
345 * Helper method for creating a sequence of characters from the given
346 * {@code CharSequence}. The returned sequence will contain all characters
347 * in the original order.
348 *
349 * @param chars the char sequence to convert.
350 * @return a sequence which will contain all given chars in the original
351 * order.
352 */
353 public static ISeq<Character> toISeq(final CharSequence chars) {
354 final Array<Character> seq = new Array<>(chars.length());
355 for (int i = 0; i < chars.length(); ++i) {
356 seq.set(i, chars.charAt(i));
357 }
358
359 return seq.toISeq();
360 }
361
362 }
363
364
365
|