001    /*
002     * Sonar, open source software quality management tool.
003     * Copyright (C) 2008-2012 SonarSource
004     * mailto:contact AT sonarsource DOT com
005     *
006     * Sonar is free software; you can redistribute it and/or
007     * modify it under the terms of the GNU Lesser General Public
008     * License as published by the Free Software Foundation; either
009     * version 3 of the License, or (at your option) any later version.
010     *
011     * Sonar is distributed in the hope that it will be useful,
012     * but WITHOUT ANY WARRANTY; without even the implied warranty of
013     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
014     * Lesser General Public License for more details.
015     *
016     * You should have received a copy of the GNU Lesser General Public
017     * License along with Sonar; if not, write to the Free Software
018     * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02
019     */
020    package org.sonar.duplications.token;
021    
022    import java.io.Reader;
023    import java.io.StringReader;
024    
025    import org.sonar.channel.ChannelDispatcher;
026    import org.sonar.channel.CodeReader;
027    import org.sonar.duplications.DuplicationsException;
028    
029    public final class TokenChunker {
030    
031      private final ChannelDispatcher<TokenQueue> channelDispatcher;
032    
033      public static Builder builder() {
034        return new Builder();
035      }
036    
037      private TokenChunker(Builder builder) {
038        this.channelDispatcher = builder.getChannelDispatcher();
039      }
040    
041      public TokenQueue chunk(String sourceCode) {
042        return chunk(new StringReader(sourceCode));
043      }
044    
045      public TokenQueue chunk(Reader reader) {
046        CodeReader code = new CodeReader(reader);
047        TokenQueue queue = new TokenQueue();
048        try {
049          channelDispatcher.consume(code, queue);
050          return queue;
051        } catch (Exception e) {
052          throw new DuplicationsException("Unable to lex source code at line : " + code.getLinePosition() + " and column : " + code.getColumnPosition(), e);
053        }
054      }
055    
056      /**
057       * Note that order is important, e.g.
058       * <code>token("A").ignore("A")</code> for the input string "A" will produce token, whereas
059       * <code>ignore("A").token("A")</code> will not.
060       */
061      public static final class Builder {
062    
063        private ChannelDispatcher.Builder channelDispatcherBuilder = ChannelDispatcher.builder();
064    
065        private Builder() {
066        }
067    
068        public TokenChunker build() {
069          return new TokenChunker(this);
070        }
071    
072        /**
073         * Defines that sequence of characters must be ignored, if it matches specified regular expression.
074         */
075        public Builder ignore(String regularExpression) {
076          channelDispatcherBuilder.addChannel(new BlackHoleTokenChannel(regularExpression));
077          return this;
078        }
079    
080        /**
081         * Defines that sequence of characters, which is matched specified regular expression, is a token.
082         */
083        public Builder token(String regularExpression) {
084          channelDispatcherBuilder.addChannel(new TokenChannel(regularExpression));
085          return this;
086        }
087    
088        /**
089         * Defines that sequence of characters, which is matched specified regular expression, is a token with specified value.
090         */
091        public Builder token(String regularExpression, String normalizationValue) {
092          channelDispatcherBuilder.addChannel(new TokenChannel(regularExpression, normalizationValue));
093          return this;
094        }
095    
096        private ChannelDispatcher<TokenQueue> getChannelDispatcher() {
097          return channelDispatcherBuilder.build();
098        }
099    
100      }
101    
102    }