001 /*
002 * Sonar, open source software quality management tool.
003 * Copyright (C) 2008-2011 SonarSource
004 * mailto:contact AT sonarsource DOT com
005 *
006 * Sonar is free software; you can redistribute it and/or
007 * modify it under the terms of the GNU Lesser General Public
008 * License as published by the Free Software Foundation; either
009 * version 3 of the License, or (at your option) any later version.
010 *
011 * Sonar is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
014 * Lesser General Public License for more details.
015 *
016 * You should have received a copy of the GNU Lesser General Public
017 * License along with Sonar; if not, write to the Free Software
018 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02
019 */
020
021 /**
022 * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
023 */
024 package org.sonar.duplications.cpd;
025
026 import java.util.ArrayList;
027 import java.util.Collections;
028 import java.util.HashMap;
029 import java.util.HashSet;
030 import java.util.Iterator;
031 import java.util.List;
032 import java.util.Map;
033 import java.util.Set;
034
035 import net.sourceforge.pmd.cpd.TokenEntry;
036
037 public class MatchCollector {
038
039 private MatchAlgorithm ma;
040 private Map<Match.MatchCode, Match> startMap = new HashMap<Match.MatchCode, Match>();
041 private Map<String, List<Match>> fileMap = new HashMap<String, List<Match>>();
042
043 public MatchCollector(MatchAlgorithm ma) {
044 this.ma = ma;
045 }
046
047 public void collect(List<TokenEntry> marks) {
048 // first get a pairwise collection of all maximal matches
049 for (int i = 0; i < marks.size() - 1; i++) {
050 TokenEntry mark1 = marks.get(i);
051 for (int j = i + 1; j < marks.size(); j++) {
052 TokenEntry mark2 = marks.get(j);
053 int diff = mark1.getIndex() - mark2.getIndex();
054 if ( -diff < ma.getMinimumTileSize()) {
055 continue;
056 }
057 if (hasPreviousDupe(mark1, mark2)) {
058 continue;
059 }
060
061 // "match too small" check
062 int dupes = countDuplicateTokens(mark1, mark2);
063 if (dupes < ma.getMinimumTileSize()) {
064 continue;
065 }
066 // is it still too close together
067 if (diff + dupes >= 1) {
068 continue;
069 }
070 determineMatch(mark1, mark2, dupes);
071 }
072 }
073 }
074
075 @SuppressWarnings("PMD.CompareObjectsWithEquals")
076 public List<Match> getMatches() {
077 List<Match> matchList = new ArrayList<Match>(startMap.values());
078 Collections.sort(matchList);
079 Set<Match.MatchCode> matchSet = new HashSet<Match.MatchCode>();
080 Match.MatchCode matchCode = new Match.MatchCode();
081 for (int i = matchList.size(); i > 1; i--) {
082 Match match1 = matchList.get(i - 1);
083 TokenEntry mark1 = match1.getMarkSet().iterator().next();
084 matchSet.clear();
085 matchSet.add(match1.getMatchCode());
086 for (int j = i - 1; j > 0; j--) {
087 Match match2 = matchList.get(j - 1);
088 if (match1.getTokenCount() != match2.getTokenCount()) {
089 break;
090 }
091 TokenEntry mark2 = null;
092 for (Iterator<TokenEntry> iter = match2.getMarkSet().iterator(); iter.hasNext();) {
093 mark2 = iter.next();
094 if (mark2 != mark1) {
095 break;
096 }
097 }
098 int dupes = countDuplicateTokens(mark1, mark2);
099 if (dupes < match1.getTokenCount()) {
100 break;
101 }
102 matchSet.add(match2.getMatchCode());
103 match1.getMarkSet().addAll(match2.getMarkSet());
104 matchList.remove(i - 2);
105 i--;
106 }
107 if (matchSet.size() == 1) {
108 continue;
109 }
110 // prune the mark set
111 Set<TokenEntry> pruned = match1.getMarkSet();
112 boolean done = false;
113 ArrayList<TokenEntry> a1 = new ArrayList<TokenEntry>(match1.getMarkSet());
114 Collections.sort(a1);
115 for (int outer = 0; outer < a1.size() - 1 && !done; outer++) {
116 TokenEntry cmark1 = a1.get(outer);
117 for (int inner = outer + 1; inner < a1.size() && !done; inner++) {
118 TokenEntry cmark2 = a1.get(inner);
119 matchCode.setFirst(cmark1.getIndex());
120 matchCode.setSecond(cmark2.getIndex());
121 if ( !matchSet.contains(matchCode)) {
122 if (pruned.size() > 2) {
123 pruned.remove(cmark2);
124 }
125 if (pruned.size() == 2) {
126 done = true;
127 }
128 }
129 }
130 }
131 }
132 return matchList;
133 }
134
135 /**
136 * A greedy algorithm for determining non-overlapping matches
137 */
138 private void determineMatch(TokenEntry mark1, TokenEntry mark2, int dupes) {
139 Match match = new Match(dupes, mark1, mark2);
140 String fileKey = mark1.getTokenSrcID() + mark2.getTokenSrcID();
141 List<Match> pairMatches = fileMap.get(fileKey);
142 if (pairMatches == null) {
143 pairMatches = new ArrayList<Match>();
144 fileMap.put(fileKey, pairMatches);
145 }
146 boolean add = true;
147 for (int i = 0; i < pairMatches.size(); i++) {
148 Match other = pairMatches.get(i);
149 if (other.getFirstMark().getIndex() + other.getTokenCount() - mark1.getIndex() > 0) {
150 boolean ordered = other.getSecondMark().getIndex() - mark2.getIndex() < 0;
151 if ((ordered && (other.getEndIndex() - mark2.getIndex() > 0))
152 || ( !ordered && (match.getEndIndex() - other.getSecondMark().getIndex()) > 0)) {
153 if (other.getTokenCount() >= match.getTokenCount()) {
154 add = false;
155 break;
156 } else {
157 pairMatches.remove(i);
158 startMap.remove(other.getMatchCode());
159 }
160 }
161 }
162 }
163 if (add) {
164 pairMatches.add(match);
165 startMap.put(match.getMatchCode(), match);
166 }
167 }
168
169 private boolean hasPreviousDupe(TokenEntry mark1, TokenEntry mark2) {
170 if (mark1.getIndex() == 0) {
171 return false;
172 }
173 return !matchEnded(ma.tokenAt( -1, mark1), ma.tokenAt( -1, mark2));
174 }
175
176 private int countDuplicateTokens(TokenEntry mark1, TokenEntry mark2) {
177 int index = 0;
178 while ( !matchEnded(ma.tokenAt(index, mark1), ma.tokenAt(index, mark2))) {
179 index++;
180 }
181 return index;
182 }
183
184 private boolean matchEnded(TokenEntry token1, TokenEntry token2) {
185 return token1.getIdentifier() != token2.getIdentifier() || token1 == TokenEntry.EOF || token2 == TokenEntry.EOF;
186 }
187 }