001    /*
002     * Sonar, open source software quality management tool.
003     * Copyright (C) 2008-2011 SonarSource
004     * mailto:contact AT sonarsource DOT com
005     *
006     * Sonar is free software; you can redistribute it and/or
007     * modify it under the terms of the GNU Lesser General Public
008     * License as published by the Free Software Foundation; either
009     * version 3 of the License, or (at your option) any later version.
010     *
011     * Sonar is distributed in the hope that it will be useful,
012     * but WITHOUT ANY WARRANTY; without even the implied warranty of
013     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
014     * Lesser General Public License for more details.
015     *
016     * You should have received a copy of the GNU Lesser General Public
017     * License along with Sonar; if not, write to the Free Software
018     * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02
019     */
020    package org.sonar.colorizer;
021    
022    import java.util.Collections;
023    import java.util.HashSet;
024    import java.util.Set;
025    import java.util.regex.Matcher;
026    import java.util.regex.Pattern;
027    
028    import org.sonar.channel.CodeReader;
029    
030    /**
031     * Detect case-sensitive keywords
032     */
033    public class KeywordsTokenizer extends NotThreadSafeTokenizer {
034    
035      private final String tagBefore;
036      private final String tagAfter;
037      private boolean caseInsensitive = false;
038      private Matcher matcher;
039      private final StringBuilder tmpBuilder = new StringBuilder();
040      private final static String DEFAULT_REGEX = "[a-zA-Z_][a-zA-Z0-9_]*+";
041    
042      private Set<String> keywords = new HashSet<String>();
043    
044      public KeywordsTokenizer(String tagBefore, String tagAfter, Set<String> keywords) {
045        this(tagBefore, tagAfter, keywords, DEFAULT_REGEX);
046      }
047    
048      public KeywordsTokenizer(String tagBefore, String tagAfter, Set<String> keywords, String regex) {
049        this.tagBefore = tagBefore;
050        this.tagAfter = tagAfter;
051        this.keywords = keywords;
052        this.matcher = Pattern.compile(regex).matcher("");
053      }
054    
055      public KeywordsTokenizer(String tagBefore, String tagAfter, String... keywords) {
056        this.tagBefore = tagBefore;
057        this.tagAfter = tagAfter;
058        Collections.addAll(this.keywords, keywords);
059        this.matcher = Pattern.compile(DEFAULT_REGEX).matcher("");
060      }
061    
062      public boolean consume(CodeReader code, HtmlCodeBuilder codeBuilder) {
063        if (code.popTo(matcher, tmpBuilder) > 0) {
064          if (isKeyword(tmpBuilder.toString())) {
065            codeBuilder.appendWithoutTransforming(tagBefore);
066            codeBuilder.append(tmpBuilder);
067            codeBuilder.appendWithoutTransforming(tagAfter);
068          } else {
069            codeBuilder.append(tmpBuilder);
070          }
071          tmpBuilder.delete(0, tmpBuilder.length());
072          return true;
073        }
074        return false;
075      }
076    
077      private boolean isKeyword(String word) {
078        if ( !caseInsensitive && keywords.contains(word)) {
079          return true;
080        } else if (caseInsensitive && keywords.contains(word.toUpperCase())) {
081          return true;
082        }
083        return false;
084      }
085    
086      public void setCaseInsensitive(boolean caseInsensitive) {
087        this.caseInsensitive = caseInsensitive;
088      }
089    
090      public KeywordsTokenizer clone() {
091        KeywordsTokenizer clone = new KeywordsTokenizer(tagBefore, tagAfter, keywords, matcher.pattern().pattern());
092        clone.caseInsensitive = caseInsensitive;
093        return clone;
094      }
095    }