001 /*
002 * Sonar, open source software quality management tool.
003 * Copyright (C) 2008-2011 SonarSource
004 * mailto:contact AT sonarsource DOT com
005 *
006 * Sonar is free software; you can redistribute it and/or
007 * modify it under the terms of the GNU Lesser General Public
008 * License as published by the Free Software Foundation; either
009 * version 3 of the License, or (at your option) any later version.
010 *
011 * Sonar is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
014 * Lesser General Public License for more details.
015 *
016 * You should have received a copy of the GNU Lesser General Public
017 * License along with Sonar; if not, write to the Free Software
018 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02
019 */
020
021 /**
022 * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
023 */
024 package net.sourceforge.pmd.util;
025
026 import java.util.ArrayList;
027 import java.util.Iterator;
028 import java.util.List;
029
030 public class StringUtil {
031
032 public static final String[] EMPTY_STRINGS = new String[0];
033 private static final boolean supportsUTF8 = System.getProperty("net.sourceforge.pmd.supportUTF8", "no").equals("yes");
034 private static final String[] ENTITIES;
035
036 static {
037 ENTITIES = new String[256 - 126];
038 for (int i = 126; i <= 255; i++) {
039 ENTITIES[i - 126] = "&#" + i + ';';
040 }
041 }
042
043 public static String replaceString(String original, char oldChar, String newString) {
044
045 String fixedNew = newString == null ? "" : newString;
046
047 StringBuffer desc = new StringBuffer();
048 int index = original.indexOf(oldChar);
049 int last = 0;
050 while (index != -1) {
051 desc.append(original.substring(last, index));
052 desc.append(fixedNew);
053 last = index + 1;
054 index = original.indexOf(oldChar, last);
055 }
056 desc.append(original.substring(last));
057 return desc.toString();
058 }
059
060 public static String replaceString(String original, String oldString, String newString) {
061
062 String fixedNew = newString == null ? "" : newString;
063
064 StringBuffer desc = new StringBuffer();
065 int index = original.indexOf(oldString);
066 int last = 0;
067 while (index != -1) {
068 desc.append(original.substring(last, index));
069 desc.append(fixedNew);
070 last = index + oldString.length();
071 index = original.indexOf(oldString, last);
072 }
073 desc.append(original.substring(last));
074 return desc.toString();
075 }
076
077 /**
078 * Appends to a StringBuffer the String src where non-ASCII and
079 * XML special chars are escaped.
080 *
081 * @param buf The destination XML stream
082 * @param src The String to append to the stream
083 */
084 public static void appendXmlEscaped(StringBuffer buf, String src) {
085 appendXmlEscaped(buf, src, supportsUTF8);
086 }
087
088 public static String htmlEncode(String string) {
089 String encoded = StringUtil.replaceString(string, '&', "&");
090 encoded = StringUtil.replaceString(encoded, '<', "<");
091 return StringUtil.replaceString(encoded, '>', ">");
092 }
093
094 // TODO - unify the method above with the one below
095
096 private static void appendXmlEscaped(StringBuffer buf, String src, boolean supportUTF8) {
097 char c;
098 for (int i = 0; i < src.length(); i++) {
099 c = src.charAt(i);
100 if (c > '~') {// 126
101 if (!supportUTF8) {
102 if (c <= 255) {
103 buf.append(ENTITIES[c - 126]);
104 } else {
105 buf.append("&u").append(Integer.toHexString(c)).append(';');
106 }
107 } else {
108 buf.append(c);
109 }
110 } else if (c == '&')
111 buf.append("&");
112 else if (c == '"')
113 buf.append(""");
114 else if (c == '<')
115 buf.append("<");
116 else if (c == '>')
117 buf.append(">");
118 else
119 buf.append(c);
120 }
121 }
122
123 /**
124 * Parses the input source using the delimiter specified. This method is much
125 * faster than using the StringTokenizer or String.split(char) approach and
126 * serves as a replacement for String.split() for JDK1.3 that doesn't have it.
127 *
128 * FIXME - we're on JDK 1.4 now, can we replace this with String.split?
129 *
130 * @param source String
131 * @param delimiter char
132 * @return String[]
133 */
134 public static String[] substringsOf(String source, char delimiter) {
135
136 if (source == null || source.length() == 0) {
137 return EMPTY_STRINGS;
138 }
139
140 int delimiterCount = 0;
141 int length = source.length();
142 char[] chars = source.toCharArray();
143
144 for (int i=0; i<length; i++) {
145 if (chars[i] == delimiter) delimiterCount++;
146 }
147
148 if (delimiterCount == 0) return new String[] { source };
149
150 String results[] = new String[delimiterCount+1];
151
152 int i = 0;
153 int offset = 0;
154
155 while (offset <= length) {
156 int pos = source.indexOf(delimiter, offset);
157 if (pos < 0) pos = length;
158 results[i++] = pos == offset ? "" : source.substring(offset, pos);
159 offset = pos + 1;
160 }
161
162 return results;
163 }
164
165 /**
166 * Much more efficient than StringTokenizer.
167 *
168 * @param str String
169 * @param separator char
170 * @return String[]
171 */
172 public static String[] substringsOf(String str, String separator) {
173
174 if (str == null || str.length() == 0) {
175 return EMPTY_STRINGS;
176 }
177
178 int index = str.indexOf(separator);
179 if (index == -1) {
180 return new String[]{str};
181 }
182
183 List<String> list = new ArrayList<String>();
184 int currPos = 0;
185 int len = separator.length();
186 while (index != -1) {
187 list.add(str.substring(currPos, index));
188 currPos = index + len;
189 index = str.indexOf(separator, currPos);
190 }
191 list.add(str.substring(currPos));
192 return list.toArray(new String[list.size()]);
193 }
194
195
196 /**
197 * Copies the elements returned by the iterator onto the string buffer
198 * each delimited by the separator.
199 *
200 * @param sb StringBuffer
201 * @param iter Iterator
202 * @param separator String
203 */
204 public static void asStringOn(StringBuffer sb, Iterator iter, String separator) {
205
206 if (!iter.hasNext()) return;
207
208 sb.append(iter.next());
209
210 while (iter.hasNext()) {
211 sb.append(separator);
212 sb.append(iter.next());
213 }
214 }
215 /**
216 * Return the length of the shortest string in the array.
217 * If any one of them is null then it returns 0.
218 *
219 * @param strings String[]
220 * @return int
221 */
222 public static int lengthOfShortestIn(String[] strings) {
223
224 int minLength = Integer.MAX_VALUE;
225
226 for (int i=0; i<strings.length; i++) {
227 if (strings[i] == null) return 0;
228 minLength = Math.min(minLength, strings[i].length());
229 }
230
231 return minLength;
232 }
233
234 /**
235 * Determine the maximum number of common leading whitespace characters
236 * the strings share in the same sequence. Useful for determining how
237 * many leading characters can be removed to shift all the text in the
238 * strings to the left without misaligning them.
239 *
240 * @param strings String[]
241 * @return int
242 */
243 public static int maxCommonLeadingWhitespaceForAll(String[] strings) {
244
245 int shortest = lengthOfShortestIn(strings);
246 if (shortest == 0) return 0;
247
248 char[] matches = new char[shortest];
249
250 String str;
251 for (int m=0; m<matches.length; m++) {
252 matches[m] = strings[0].charAt(m);
253 if (!Character.isWhitespace(matches[m])) return m;
254 for (int i=0; i<strings.length; i++) {
255 str = strings[i];
256 if (str.charAt(m) != matches[m]) return m;
257 }
258 }
259
260 return shortest;
261 }
262
263 /**
264 * Trims off the leading characters off the strings up to the trimDepth
265 * specified. Returns the same strings if trimDepth = 0
266 *
267 * @param strings
268 * @param trimDepth
269 * @return String[]
270 */
271 public static String[] trimStartOn(String[] strings, int trimDepth) {
272
273 if (trimDepth == 0) return strings;
274
275 String[] results = new String[strings.length];
276 for (int i=0; i<strings.length; i++) {
277 results[i] = strings[i].substring(trimDepth);
278 }
279 return results;
280 }
281
282 /**
283 * Left pads a string.
284 * @param s The String to pad
285 * @param length The desired minimum length of the resulting padded String
286 * @return The resulting left padded String
287 */
288 public static String lpad(String s, int length) {
289 String res = s;
290 if (length - s.length() > 0) {
291 char [] arr = new char[length - s.length()];
292 java.util.Arrays.fill(arr, ' ');
293 res = new StringBuffer(length).append(arr).append(s).toString();
294 }
295 return res;
296 }
297
298 /**
299 * Are the two String values the same.
300 * The Strings can be optionally trimmed before checking.
301 * The Strings can be optionally compared ignoring case.
302 * The Strings can be have embedded whitespace standardized before comparing.
303 * Two null values are treated as equal.
304 *
305 * @param s1 The first String.
306 * @param s2 The second String.
307 * @param trim Indicates if the Strings should be trimmed before comparison.
308 * @param ignoreCase Indicates if the case of the Strings should ignored during comparison.
309 * @param standardizeWhitespace Indicates if the embedded whitespace should be standardized before comparison.
310 * @return <code>true</code> if the Strings are the same, <code>false</code> otherwise.
311 */
312 public static boolean isSame(String s1, String s2, boolean trim, boolean ignoreCase, boolean standardizeWhitespace) {
313 if (s1 == s2) {
314 return true;
315 } else if (s1 == null || s2 == null) {
316 return false;
317 } else {
318 if (trim) {
319 s1 = s1.trim();
320 s2 = s2.trim();
321 }
322 if (standardizeWhitespace) {
323 // Replace all whitespace with a standard single space character.
324 s1 = s1.replaceAll("\\s+", " ");
325 s2 = s2.replaceAll("\\s+", " ");
326 }
327 return ignoreCase ? s1.equalsIgnoreCase(s2) : s1.equals(s2);
328 }
329 }
330 }