001/* 002 * Sonar, open source software quality management tool. 003 * Copyright (C) 2008-2012 SonarSource 004 * mailto:contact AT sonarsource DOT com 005 * 006 * Sonar is free software; you can redistribute it and/or 007 * modify it under the terms of the GNU Lesser General Public 008 * License as published by the Free Software Foundation; either 009 * version 3 of the License, or (at your option) any later version. 010 * 011 * Sonar is distributed in the hope that it will be useful, 012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 014 * Lesser General Public License for more details. 015 * 016 * You should have received a copy of the GNU Lesser General Public 017 * License along with Sonar; if not, write to the Free Software 018 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02 019 */ 020package org.sonar.duplications.token; 021 022import java.io.Reader; 023import java.io.StringReader; 024 025import org.sonar.channel.ChannelDispatcher; 026import org.sonar.channel.CodeReader; 027import org.sonar.duplications.DuplicationsException; 028 029public final class TokenChunker { 030 031 private final ChannelDispatcher<TokenQueue> channelDispatcher; 032 033 public static Builder builder() { 034 return new Builder(); 035 } 036 037 private TokenChunker(Builder builder) { 038 this.channelDispatcher = builder.getChannelDispatcher(); 039 } 040 041 public TokenQueue chunk(String sourceCode) { 042 return chunk(new StringReader(sourceCode)); 043 } 044 045 public TokenQueue chunk(Reader reader) { 046 CodeReader code = new CodeReader(reader); 047 TokenQueue queue = new TokenQueue(); 048 try { 049 channelDispatcher.consume(code, queue); 050 return queue; 051 } catch (Exception e) { 052 throw new DuplicationsException("Unable to lex source code at line : " + code.getLinePosition() + " and column : " + code.getColumnPosition(), e); 053 } 054 } 055 056 /** 057 * Note that order is important, e.g. 058 * <code>token("A").ignore("A")</code> for the input string "A" will produce token, whereas 059 * <code>ignore("A").token("A")</code> will not. 060 */ 061 public static final class Builder { 062 063 private ChannelDispatcher.Builder channelDispatcherBuilder = ChannelDispatcher.builder(); 064 065 private Builder() { 066 } 067 068 public TokenChunker build() { 069 return new TokenChunker(this); 070 } 071 072 /** 073 * Defines that sequence of characters must be ignored, if it matches specified regular expression. 074 */ 075 public Builder ignore(String regularExpression) { 076 channelDispatcherBuilder.addChannel(new BlackHoleTokenChannel(regularExpression)); 077 return this; 078 } 079 080 /** 081 * Defines that sequence of characters, which is matched specified regular expression, is a token. 082 */ 083 public Builder token(String regularExpression) { 084 channelDispatcherBuilder.addChannel(new TokenChannel(regularExpression)); 085 return this; 086 } 087 088 /** 089 * Defines that sequence of characters, which is matched specified regular expression, is a token with specified value. 090 */ 091 public Builder token(String regularExpression, String normalizationValue) { 092 channelDispatcherBuilder.addChannel(new TokenChannel(regularExpression, normalizationValue)); 093 return this; 094 } 095 096 private ChannelDispatcher<TokenQueue> getChannelDispatcher() { 097 return channelDispatcherBuilder.build(); 098 } 099 100 } 101 102}