001    /*
002     * Sonar, open source software quality management tool.
003     * Copyright (C) 2008-2011 SonarSource
004     * mailto:contact AT sonarsource DOT com
005     *
006     * Sonar is free software; you can redistribute it and/or
007     * modify it under the terms of the GNU Lesser General Public
008     * License as published by the Free Software Foundation; either
009     * version 3 of the License, or (at your option) any later version.
010     *
011     * Sonar is distributed in the hope that it will be useful,
012     * but WITHOUT ANY WARRANTY; without even the implied warranty of
013     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
014     * Lesser General Public License for more details.
015     *
016     * You should have received a copy of the GNU Lesser General Public
017     * License along with Sonar; if not, write to the Free Software
018     * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02
019     */
020    
021    /**
022     * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
023     */
024    package net.sourceforge.pmd.util;
025    
026    import java.util.ArrayList;
027    import java.util.Iterator;
028    import java.util.List;
029    
030    public class StringUtil {
031    
032            public static final String[] EMPTY_STRINGS = new String[0];
033        private static final boolean supportsUTF8 = System.getProperty("net.sourceforge.pmd.supportUTF8", "no").equals("yes");
034        private static final String[] ENTITIES;
035    
036        static {
037            ENTITIES = new String[256 - 126];
038            for (int i = 126; i <= 255; i++) {
039                ENTITIES[i - 126] = "&#" + i + ';';
040            }
041        }
042    
043        public static String replaceString(String original, char oldChar, String newString) {
044            
045            String fixedNew = newString == null ? "" : newString;
046    
047            StringBuffer desc = new StringBuffer();
048            int index = original.indexOf(oldChar);
049            int last = 0;
050            while (index != -1) {
051                desc.append(original.substring(last, index));
052                desc.append(fixedNew);
053                last = index + 1;
054                index = original.indexOf(oldChar, last);
055            }
056            desc.append(original.substring(last));
057            return desc.toString();
058        }
059    
060        public static String replaceString(String original, String oldString, String newString) {
061            
062            String fixedNew = newString == null ? "" : newString;
063            
064            StringBuffer desc = new StringBuffer();
065            int index = original.indexOf(oldString);
066            int last = 0;
067            while (index != -1) {
068                desc.append(original.substring(last, index));
069                desc.append(fixedNew);
070                last = index + oldString.length();
071                index = original.indexOf(oldString, last);
072            }
073            desc.append(original.substring(last));
074            return desc.toString();
075        }
076    
077        /**
078         * Appends to a StringBuffer the String src where non-ASCII and
079         * XML special chars are escaped.
080         *
081         * @param buf The destination XML stream
082         * @param src The String to append to the stream
083         */
084        public static void appendXmlEscaped(StringBuffer buf, String src) {
085            appendXmlEscaped(buf, src, supportsUTF8);
086        }
087    
088        public static String htmlEncode(String string) {
089            String encoded = StringUtil.replaceString(string, '&', "&amp;");
090            encoded = StringUtil.replaceString(encoded, '<', "&lt;");
091            return StringUtil.replaceString(encoded, '>', "&gt;");
092        }
093        
094        // TODO - unify the method above with the one below
095        
096        private static void appendXmlEscaped(StringBuffer buf, String src, boolean supportUTF8) {
097            char c;
098            for (int i = 0; i < src.length(); i++) {
099                c = src.charAt(i);
100                if (c > '~') {// 126
101                    if (!supportUTF8) {
102                        if (c <= 255) {
103                            buf.append(ENTITIES[c - 126]);
104                        } else {
105                            buf.append("&u").append(Integer.toHexString(c)).append(';');
106                        }
107                    } else {
108                        buf.append(c);
109                    }
110                } else if (c == '&')
111                    buf.append("&amp;");
112                else if (c == '"')
113                    buf.append("&quot;");
114                else if (c == '<')
115                    buf.append("&lt;");
116                else if (c == '>')
117                    buf.append("&gt;");
118                else
119                    buf.append(c);
120            }
121        }
122    
123            /**
124             * Parses the input source using the delimiter specified. This method is much
125             * faster than using the StringTokenizer or String.split(char) approach and
126             * serves as a replacement for String.split() for JDK1.3 that doesn't have it.
127         *
128         * FIXME - we're on JDK 1.4 now, can we replace this with String.split?
129             *
130             * @param source String
131             * @param delimiter char
132             * @return String[]
133             */
134            public static String[] substringsOf(String source, char delimiter) {
135    
136                    if (source == null || source.length() == 0) {
137                return EMPTY_STRINGS;
138            }
139                    
140                    int delimiterCount = 0;
141                    int length = source.length();
142                    char[] chars = source.toCharArray();
143    
144                    for (int i=0; i<length; i++) {
145                            if (chars[i] == delimiter) delimiterCount++;
146                            }
147    
148                    if (delimiterCount == 0) return new String[] { source };
149    
150                    String results[] = new String[delimiterCount+1];
151    
152                    int i = 0;
153                    int offset = 0;
154    
155                    while (offset <= length) {
156                            int pos = source.indexOf(delimiter, offset);
157                            if (pos < 0) pos = length;
158                            results[i++] = pos == offset ? "" : source.substring(offset, pos);
159                            offset = pos + 1;
160                            }
161    
162                    return results;
163            }
164            
165            /**
166             * Much more efficient than StringTokenizer.
167             * 
168             * @param str String
169             * @param separator char
170             * @return String[]
171             */
172              public static String[] substringsOf(String str, String separator) {
173                      
174                    if (str == null || str.length() == 0) {
175                        return EMPTY_STRINGS;
176                    }
177    
178                    int index = str.indexOf(separator);
179                    if (index == -1) {
180                        return new String[]{str};
181                    }
182    
183                    List<String> list = new ArrayList<String>();
184                    int currPos = 0;
185                    int len = separator.length();
186                    while (index != -1) {
187                        list.add(str.substring(currPos, index));
188                        currPos = index + len;
189                        index = str.indexOf(separator, currPos);
190                    }
191                    list.add(str.substring(currPos));
192                    return list.toArray(new String[list.size()]);
193                }
194            
195            
196            /**
197             * Copies the elements returned by the iterator onto the string buffer
198             * each delimited by the separator.
199             *
200             * @param sb StringBuffer
201             * @param iter Iterator
202             * @param separator String
203             */
204            public static void asStringOn(StringBuffer sb, Iterator iter, String separator) {
205                    
206                if (!iter.hasNext()) return;
207                
208                sb.append(iter.next());
209                
210                while (iter.hasNext()) {
211                    sb.append(separator);
212                    sb.append(iter.next());
213                }
214            }
215            /**
216             * Return the length of the shortest string in the array.
217             * If any one of them is null then it returns 0.
218             * 
219             * @param strings String[]
220             * @return int
221             */
222            public static int lengthOfShortestIn(String[] strings) {
223                    
224                    int minLength = Integer.MAX_VALUE;
225                    
226                    for (int i=0; i<strings.length; i++) {
227                            if (strings[i] == null) return 0;
228                            minLength = Math.min(minLength, strings[i].length());
229                    }
230                    
231                    return minLength;
232            }
233            
234            /**
235             * Determine the maximum number of common leading whitespace characters
236             * the strings share in the same sequence. Useful for determining how
237             * many leading characters can be removed to shift all the text in the
238             * strings to the left without misaligning them.
239             * 
240             * @param strings String[]
241             * @return int
242             */
243            public static int maxCommonLeadingWhitespaceForAll(String[] strings) {
244                    
245                    int shortest = lengthOfShortestIn(strings);
246                    if (shortest == 0) return 0;
247                    
248                    char[] matches = new char[shortest];
249                    
250                    String str;
251                    for (int m=0; m<matches.length; m++) {
252                            matches[m] = strings[0].charAt(m);
253                            if (!Character.isWhitespace(matches[m])) return m;
254                            for (int i=0; i<strings.length; i++) {
255                                    str = strings[i];
256                                    if (str.charAt(m) != matches[m])  return m; 
257                                    }
258                    }
259                    
260                    return shortest;
261            }
262            
263            /**
264             * Trims off the leading characters off the strings up to the trimDepth 
265             * specified. Returns the same strings if trimDepth = 0
266             * 
267             * @param strings
268             * @param trimDepth
269             * @return String[]
270             */
271            public static String[] trimStartOn(String[] strings, int trimDepth) {
272                    
273                    if (trimDepth == 0) return strings;
274                    
275                    String[] results = new String[strings.length];
276                    for (int i=0; i<strings.length; i++) {
277                            results[i] = strings[i].substring(trimDepth);
278                    }
279                    return results;
280       }
281            
282        /**
283         * Left pads a string.
284         * @param s The String to pad
285         * @param length The desired minimum length of the resulting padded String
286         * @return The resulting left padded String
287         */
288        public static String lpad(String s, int length) {
289             String res = s;
290             if (length - s.length() > 0) {
291                 char [] arr = new char[length - s.length()];
292                 java.util.Arrays.fill(arr, ' ');
293                 res = new StringBuffer(length).append(arr).append(s).toString();
294             }
295             return res;
296        }
297        
298        /**
299         * Are the two String values the same.
300         * The Strings can be optionally trimmed before checking.
301         * The Strings can be optionally compared ignoring case.
302         * The Strings can be have embedded whitespace standardized before comparing.
303         * Two null values are treated as equal.
304         * 
305         * @param s1 The first String.
306         * @param s2 The second String.
307         * @param trim Indicates if the Strings should be trimmed before comparison.
308         * @param ignoreCase Indicates if the case of the Strings should ignored during comparison.
309         * @param standardizeWhitespace Indicates if the embedded whitespace should be standardized before comparison.
310         * @return <code>true</code> if the Strings are the same, <code>false</code> otherwise.
311         */
312        public static boolean isSame(String s1, String s2, boolean trim, boolean ignoreCase, boolean standardizeWhitespace) {
313                    if (s1 == s2) {
314                            return true;
315                    } else if (s1 == null || s2 == null) {
316                            return false;
317                    } else {
318                            if (trim) {
319                                    s1 = s1.trim();
320                                    s2 = s2.trim();
321                            }
322                            if (standardizeWhitespace) {
323                                    // Replace all whitespace with a standard single space character.
324                                    s1 = s1.replaceAll("\\s+", " ");
325                                    s2 = s2.replaceAll("\\s+", " ");
326                            }
327                            return ignoreCase ? s1.equalsIgnoreCase(s2) : s1.equals(s2);
328                    }
329        }
330    }