diff options
author | Michael Bien <[email protected]> | 2011-01-17 01:11:23 +0100 |
---|---|---|
committer | Michael Bien <[email protected]> | 2011-01-17 01:11:23 +0100 |
commit | 239c5694cb08d72c042b8cfaa2609200e87e94d8 (patch) | |
tree | bc50d07db2d57908bf3f76c0dcc574f19575afdd | |
parent | 5f199d741fa2a4c7307dc018f4aef7d8ab7bc0f3 (diff) |
duplicate entries are now ignored.
-rw-r--r-- | src/main/java/com/jogamp/hungryharry/FeedAggregator.java | 30 |
1 files changed, 23 insertions, 7 deletions
diff --git a/src/main/java/com/jogamp/hungryharry/FeedAggregator.java b/src/main/java/com/jogamp/hungryharry/FeedAggregator.java index 1750264..cc009f2 100644 --- a/src/main/java/com/jogamp/hungryharry/FeedAggregator.java +++ b/src/main/java/com/jogamp/hungryharry/FeedAggregator.java @@ -33,7 +33,9 @@ import java.io.Writer; import java.util.Comparator; import java.util.Date; import java.util.HashMap; +import java.util.HashSet; import java.util.Map; +import java.util.Set; import java.util.logging.Logger; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -52,7 +54,7 @@ import static java.io.File.*; * */ public class FeedAggregator { - + private static final Logger LOG = Logger.getLogger(FeedAggregator.class.getName()); private final String configFile; @@ -143,7 +145,8 @@ public class FeedAggregator { int max = planet.maxEntriesPerPage; int pages = (int) Math.ceil(aggregatedEntries.size() / (float)max); - System.out.println(pages); + LOG.info("pagecount: "+pages); + for(int i = 0; i < pages; i++) { List<Map<String, Object>> subList = aggregatedEntries.subList(i * max, Math.min(i * max + max, aggregatedEntries.size()-1)); generatePage(i, pages, syndFeeds, subList, planet); @@ -222,21 +225,34 @@ public class FeedAggregator { } } - private List<SyndEntry> downloadFeeds(List<Feed> feeds, List<SyndFeed> list) throws IllegalArgumentException { + private List<SyndEntry> downloadFeeds(List<Feed> feeds, List<SyndFeed> downloadedFeeds) throws IllegalArgumentException { FeedFetcherCache feedInfoCache = HashMapFeedInfoCache.getInstance(); FeedFetcher feedFetcher = new HttpURLFeedFetcher(feedInfoCache); List<SyndEntry> collectedEntries = new ArrayList<SyndEntry>(); - + + Set<String> ids = new HashSet<String>(); + for (Config.Feed feed : feeds) { LOG.info("downloading "+feed); try { SyndFeed inFeed = feedFetcher.retrieveFeed(new URL(feed.url)); - list.add(inFeed); - List entries = inFeed.getEntries(); + downloadedFeeds.add(inFeed); + List<SyndEntry> entries = inFeed.getEntries(); LOG.info("downloaded "+entries.size()+ " entries"); - collectedEntries.addAll(entries); + + //skip duplicates + for (SyndEntry entry : entries) { + String uid = entry.getLink(); + if(!ids.contains(uid)) { + ids.add(uid); + collectedEntries.add(entry); + }else{ + LOG.info("skiping duplicate entry: "+uid); + } + } + } catch (Exception ex) { LOG.log(WARNING, "skipping feed", ex); } |