001/*
002 * SonarQube
003 * Copyright (C) 2009-2017 SonarSource SA
004 * mailto:info AT sonarsource DOT com
005 *
006 * This program is free software; you can redistribute it and/or
007 * modify it under the terms of the GNU Lesser General Public
008 * License as published by the Free Software Foundation; either
009 * version 3 of the License, or (at your option) any later version.
010 *
011 * This program is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
014 * Lesser General Public License for more details.
015 *
016 * You should have received a copy of the GNU Lesser General Public License
017 * along with this program; if not, write to the Free Software Foundation,
018 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
019 */
020package org.sonar.api.utils;
021
022import java.util.Collections;
023import java.util.HashMap;
024import java.util.Map;
025import java.util.regex.Pattern;
026import javax.annotation.Nullable;
027import javax.annotation.concurrent.ThreadSafe;
028
029import org.apache.commons.lang.StringUtils;
030
031/**
032 * Implementation of Ant-style matching patterns.
033 * Contrary to other implementations (like AntPathMatcher from Spring Framework) it is based on {@link Pattern Java Regular Expressions}.
034 * To increase performance it holds an internal cache of all processed patterns.
035 * <p>
036 * Following rules are applied:
037 * <ul>
038 * <li>? matches single character</li>
039 * <li>* matches zero or more characters</li>
040 * <li>** matches zero or more 'directories'</li>
041 * </ul>
042 * <p>
043 * Some examples of patterns:
044 * <ul>
045 * <li><code>org/T?st.java</code> - matches <code>org/Test.java</code> and also <code>org/Tost.java</code></li>
046 * <li><code>org/*.java</code> - matches all <code>.java</code> files in the <code>org</code> directory,
047 * e.g. <code>org/Foo.java</code> or <code>org/Bar.java</code></li>
048 * <li><code>org/**</code> - matches all files underneath the <code>org</code> directory,
049 * e.g. <code>org/Foo.java</code> or <code>org/foo/bar.jsp</code></li>
050 * <li><code>org/&#42;&#42;/Test.java</code> - matches all <code>Test.java</code> files underneath the <code>org</code> directory,
051 * e.g. <code>org/Test.java</code> or <code>org/foo/Test.java</code> or <code>org/foo/bar/Test.java</code></li>
052 * <li><code>org/&#42;&#42;/*.java</code> - matches all <code>.java</code> files underneath the <code>org</code> directory,
053 * e.g. <code>org/Foo.java</code> or <code>org/foo/Bar.java</code> or <code>org/foo/bar/Baz.java</code></li>
054 * </ul>
055 * <p>
056 * Another implementation, which is also based on Java Regular Expressions, can be found in
057 * <a href="https://github.com/JetBrains/intellij-community/blob/idea/107.743/platform/util/src/com/intellij/openapi/util/io/FileUtil.java#L847">FileUtil</a>
058 * from IntelliJ OpenAPI.
059 * 
060 * @since 1.10
061 */
062@ThreadSafe
063public class WildcardPattern {
064
065  private static final Map<String, WildcardPattern> CACHE = Collections.synchronizedMap(new HashMap<>());
066  private static final String SPECIAL_CHARS = "()[]^$.{}+|";
067
068  private Pattern pattern;
069  private String stringRepresentation;
070
071  protected WildcardPattern(String pattern, String directorySeparator) {
072    this.stringRepresentation = pattern;
073    this.pattern = Pattern.compile(toRegexp(pattern, directorySeparator));
074  }
075
076  private static String toRegexp(String antPattern, String directorySeparator) {
077    final String escapedDirectorySeparator = '\\' + directorySeparator;
078
079    final StringBuilder sb = new StringBuilder(antPattern.length());
080
081    sb.append('^');
082
083    int i = antPattern.startsWith("/") || antPattern.startsWith("\\") ? 1 : 0;
084    while (i < antPattern.length()) {
085      final char ch = antPattern.charAt(i);
086
087      if (SPECIAL_CHARS.indexOf(ch) != -1) {
088        // Escape regexp-specific characters
089        sb.append('\\').append(ch);
090      } else if (ch == '*') {
091        if (i + 1 < antPattern.length() && antPattern.charAt(i + 1) == '*') {
092          // Double asterisk
093          // Zero or more directories
094          if (i + 2 < antPattern.length() && isSlash(antPattern.charAt(i + 2))) {
095            sb.append("(?:.*").append(escapedDirectorySeparator).append("|)");
096            i += 2;
097          } else {
098            sb.append(".*");
099            i += 1;
100          }
101        } else {
102          // Single asterisk
103          // Zero or more characters excluding directory separator
104          sb.append("[^").append(escapedDirectorySeparator).append("]*?");
105        }
106      } else if (ch == '?') {
107        // Any single character excluding directory separator
108        sb.append("[^").append(escapedDirectorySeparator).append("]");
109      } else if (isSlash(ch)) {
110        // Directory separator
111        sb.append(escapedDirectorySeparator);
112      } else {
113        // Single character
114        sb.append(ch);
115      }
116
117      i++;
118    }
119
120    sb.append('$');
121
122    return sb.toString();
123  }
124
125  private static boolean isSlash(char ch) {
126    return ch == '/' || ch == '\\';
127  }
128
129  /**
130   * Returns string representation of this pattern.
131   * 
132   * @since 2.5
133   */
134  @Override
135  public String toString() {
136    return stringRepresentation;
137  }
138
139  /**
140   * Returns true if specified value matches this pattern.
141   */
142  public boolean match(String value) {
143    value = StringUtils.removeStart(value, "/");
144    value = StringUtils.removeEnd(value, "/");
145    return pattern.matcher(value).matches();
146  }
147
148  /**
149   * Returns true if specified value matches one of specified patterns.
150   * 
151   * @since 2.4
152   */
153  public static boolean match(WildcardPattern[] patterns, String value) {
154    for (WildcardPattern pattern : patterns) {
155      if (pattern.match(value)) {
156        return true;
157      }
158    }
159    return false;
160  }
161
162  /**
163   * Creates pattern with "/" as a directory separator.
164   * 
165   * @see #create(String, String)
166   */
167  public static WildcardPattern create(String pattern) {
168    return create(pattern, "/");
169  }
170
171  /**
172   * Creates array of patterns with "/" as a directory separator.
173   * 
174   * @see #create(String, String)
175   */
176  public static WildcardPattern[] create(@Nullable String[] patterns) {
177    if (patterns == null) {
178      return new WildcardPattern[0];
179    }
180    WildcardPattern[] exclusionPAtterns = new WildcardPattern[patterns.length];
181    for (int i = 0; i < patterns.length; i++) {
182      exclusionPAtterns[i] = create(patterns[i]);
183    }
184    return exclusionPAtterns;
185  }
186
187  /**
188   * Creates pattern with specified separator for directories.
189   * <p>
190   * This is used to match Java-classes, i.e. <code>org.foo.Bar</code> against <code>org/**</code>.
191   * <b>However usage of character other than "/" as a directory separator is misleading and should be avoided,
192   * so method {@link #create(String)} is preferred over this one.</b>
193   * 
194   * <p>
195   * Also note that no matter whether forward or backward slashes were used in the <code>antPattern</code>
196   * the returned pattern will use <code>directorySeparator</code>.
197   * Thus to match Windows-style path "dir\file.ext" against pattern "dir/file.ext" normalization should be performed.
198   * 
199   */
200  public static WildcardPattern create(String pattern, String directorySeparator) {
201    String key = pattern + directorySeparator;
202    return CACHE.computeIfAbsent(key, k -> new WildcardPattern(pattern, directorySeparator));
203  }
204}