001 /* 002 * Sonar, open source software quality management tool. 003 * Copyright (C) 2008-2011 SonarSource 004 * mailto:contact AT sonarsource DOT com 005 * 006 * Sonar is free software; you can redistribute it and/or 007 * modify it under the terms of the GNU Lesser General Public 008 * License as published by the Free Software Foundation; either 009 * version 3 of the License, or (at your option) any later version. 010 * 011 * Sonar is distributed in the hope that it will be useful, 012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 014 * Lesser General Public License for more details. 015 * 016 * You should have received a copy of the GNU Lesser General Public 017 * License along with Sonar; if not, write to the Free Software 018 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02 019 */ 020 021 /** 022 * BSD-style license; for more info see http://pmd.sourceforge.net/license.html 023 */ 024 package net.sourceforge.pmd.util; 025 026 import java.util.ArrayList; 027 import java.util.Iterator; 028 import java.util.List; 029 030 public class StringUtil { 031 032 public static final String[] EMPTY_STRINGS = new String[0]; 033 private static final boolean supportsUTF8 = System.getProperty("net.sourceforge.pmd.supportUTF8", "no").equals("yes"); 034 private static final String[] ENTITIES; 035 036 static { 037 ENTITIES = new String[256 - 126]; 038 for (int i = 126; i <= 255; i++) { 039 ENTITIES[i - 126] = "&#" + i + ';'; 040 } 041 } 042 043 public static String replaceString(String original, char oldChar, String newString) { 044 045 String fixedNew = newString == null ? "" : newString; 046 047 StringBuffer desc = new StringBuffer(); 048 int index = original.indexOf(oldChar); 049 int last = 0; 050 while (index != -1) { 051 desc.append(original.substring(last, index)); 052 desc.append(fixedNew); 053 last = index + 1; 054 index = original.indexOf(oldChar, last); 055 } 056 desc.append(original.substring(last)); 057 return desc.toString(); 058 } 059 060 public static String replaceString(String original, String oldString, String newString) { 061 062 String fixedNew = newString == null ? "" : newString; 063 064 StringBuffer desc = new StringBuffer(); 065 int index = original.indexOf(oldString); 066 int last = 0; 067 while (index != -1) { 068 desc.append(original.substring(last, index)); 069 desc.append(fixedNew); 070 last = index + oldString.length(); 071 index = original.indexOf(oldString, last); 072 } 073 desc.append(original.substring(last)); 074 return desc.toString(); 075 } 076 077 /** 078 * Appends to a StringBuffer the String src where non-ASCII and 079 * XML special chars are escaped. 080 * 081 * @param buf The destination XML stream 082 * @param src The String to append to the stream 083 */ 084 public static void appendXmlEscaped(StringBuffer buf, String src) { 085 appendXmlEscaped(buf, src, supportsUTF8); 086 } 087 088 public static String htmlEncode(String string) { 089 String encoded = StringUtil.replaceString(string, '&', "&"); 090 encoded = StringUtil.replaceString(encoded, '<', "<"); 091 return StringUtil.replaceString(encoded, '>', ">"); 092 } 093 094 // TODO - unify the method above with the one below 095 096 private static void appendXmlEscaped(StringBuffer buf, String src, boolean supportUTF8) { 097 char c; 098 for (int i = 0; i < src.length(); i++) { 099 c = src.charAt(i); 100 if (c > '~') {// 126 101 if (!supportUTF8) { 102 if (c <= 255) { 103 buf.append(ENTITIES[c - 126]); 104 } else { 105 buf.append("&u").append(Integer.toHexString(c)).append(';'); 106 } 107 } else { 108 buf.append(c); 109 } 110 } else if (c == '&') 111 buf.append("&"); 112 else if (c == '"') 113 buf.append("""); 114 else if (c == '<') 115 buf.append("<"); 116 else if (c == '>') 117 buf.append(">"); 118 else 119 buf.append(c); 120 } 121 } 122 123 /** 124 * Parses the input source using the delimiter specified. This method is much 125 * faster than using the StringTokenizer or String.split(char) approach and 126 * serves as a replacement for String.split() for JDK1.3 that doesn't have it. 127 * 128 * FIXME - we're on JDK 1.4 now, can we replace this with String.split? 129 * 130 * @param source String 131 * @param delimiter char 132 * @return String[] 133 */ 134 public static String[] substringsOf(String source, char delimiter) { 135 136 if (source == null || source.length() == 0) { 137 return EMPTY_STRINGS; 138 } 139 140 int delimiterCount = 0; 141 int length = source.length(); 142 char[] chars = source.toCharArray(); 143 144 for (int i=0; i<length; i++) { 145 if (chars[i] == delimiter) delimiterCount++; 146 } 147 148 if (delimiterCount == 0) return new String[] { source }; 149 150 String results[] = new String[delimiterCount+1]; 151 152 int i = 0; 153 int offset = 0; 154 155 while (offset <= length) { 156 int pos = source.indexOf(delimiter, offset); 157 if (pos < 0) pos = length; 158 results[i++] = pos == offset ? "" : source.substring(offset, pos); 159 offset = pos + 1; 160 } 161 162 return results; 163 } 164 165 /** 166 * Much more efficient than StringTokenizer. 167 * 168 * @param str String 169 * @param separator char 170 * @return String[] 171 */ 172 public static String[] substringsOf(String str, String separator) { 173 174 if (str == null || str.length() == 0) { 175 return EMPTY_STRINGS; 176 } 177 178 int index = str.indexOf(separator); 179 if (index == -1) { 180 return new String[]{str}; 181 } 182 183 List<String> list = new ArrayList<String>(); 184 int currPos = 0; 185 int len = separator.length(); 186 while (index != -1) { 187 list.add(str.substring(currPos, index)); 188 currPos = index + len; 189 index = str.indexOf(separator, currPos); 190 } 191 list.add(str.substring(currPos)); 192 return list.toArray(new String[list.size()]); 193 } 194 195 196 /** 197 * Copies the elements returned by the iterator onto the string buffer 198 * each delimited by the separator. 199 * 200 * @param sb StringBuffer 201 * @param iter Iterator 202 * @param separator String 203 */ 204 public static void asStringOn(StringBuffer sb, Iterator iter, String separator) { 205 206 if (!iter.hasNext()) return; 207 208 sb.append(iter.next()); 209 210 while (iter.hasNext()) { 211 sb.append(separator); 212 sb.append(iter.next()); 213 } 214 } 215 /** 216 * Return the length of the shortest string in the array. 217 * If any one of them is null then it returns 0. 218 * 219 * @param strings String[] 220 * @return int 221 */ 222 public static int lengthOfShortestIn(String[] strings) { 223 224 int minLength = Integer.MAX_VALUE; 225 226 for (int i=0; i<strings.length; i++) { 227 if (strings[i] == null) return 0; 228 minLength = Math.min(minLength, strings[i].length()); 229 } 230 231 return minLength; 232 } 233 234 /** 235 * Determine the maximum number of common leading whitespace characters 236 * the strings share in the same sequence. Useful for determining how 237 * many leading characters can be removed to shift all the text in the 238 * strings to the left without misaligning them. 239 * 240 * @param strings String[] 241 * @return int 242 */ 243 public static int maxCommonLeadingWhitespaceForAll(String[] strings) { 244 245 int shortest = lengthOfShortestIn(strings); 246 if (shortest == 0) return 0; 247 248 char[] matches = new char[shortest]; 249 250 String str; 251 for (int m=0; m<matches.length; m++) { 252 matches[m] = strings[0].charAt(m); 253 if (!Character.isWhitespace(matches[m])) return m; 254 for (int i=0; i<strings.length; i++) { 255 str = strings[i]; 256 if (str.charAt(m) != matches[m]) return m; 257 } 258 } 259 260 return shortest; 261 } 262 263 /** 264 * Trims off the leading characters off the strings up to the trimDepth 265 * specified. Returns the same strings if trimDepth = 0 266 * 267 * @param strings 268 * @param trimDepth 269 * @return String[] 270 */ 271 public static String[] trimStartOn(String[] strings, int trimDepth) { 272 273 if (trimDepth == 0) return strings; 274 275 String[] results = new String[strings.length]; 276 for (int i=0; i<strings.length; i++) { 277 results[i] = strings[i].substring(trimDepth); 278 } 279 return results; 280 } 281 282 /** 283 * Left pads a string. 284 * @param s The String to pad 285 * @param length The desired minimum length of the resulting padded String 286 * @return The resulting left padded String 287 */ 288 public static String lpad(String s, int length) { 289 String res = s; 290 if (length - s.length() > 0) { 291 char [] arr = new char[length - s.length()]; 292 java.util.Arrays.fill(arr, ' '); 293 res = new StringBuffer(length).append(arr).append(s).toString(); 294 } 295 return res; 296 } 297 298 /** 299 * Are the two String values the same. 300 * The Strings can be optionally trimmed before checking. 301 * The Strings can be optionally compared ignoring case. 302 * The Strings can be have embedded whitespace standardized before comparing. 303 * Two null values are treated as equal. 304 * 305 * @param s1 The first String. 306 * @param s2 The second String. 307 * @param trim Indicates if the Strings should be trimmed before comparison. 308 * @param ignoreCase Indicates if the case of the Strings should ignored during comparison. 309 * @param standardizeWhitespace Indicates if the embedded whitespace should be standardized before comparison. 310 * @return <code>true</code> if the Strings are the same, <code>false</code> otherwise. 311 */ 312 public static boolean isSame(String s1, String s2, boolean trim, boolean ignoreCase, boolean standardizeWhitespace) { 313 if (s1 == s2) { 314 return true; 315 } else if (s1 == null || s2 == null) { 316 return false; 317 } else { 318 if (trim) { 319 s1 = s1.trim(); 320 s2 = s2.trim(); 321 } 322 if (standardizeWhitespace) { 323 // Replace all whitespace with a standard single space character. 324 s1 = s1.replaceAll("\\s+", " "); 325 s2 = s2.replaceAll("\\s+", " "); 326 } 327 return ignoreCase ? s1.equalsIgnoreCase(s2) : s1.equals(s2); 328 } 329 } 330 }