VisClient/org/hfbk/vis/source/SourceWikipedia.java

Go to the documentation of this file.
00001 package org.hfbk.vis.source;
00002 
00003 import java.io.IOException;
00004 import java.net.URL;
00005 import java.util.regex.Matcher;
00006 import java.util.regex.Pattern;
00007 
00008 import org.dronus.graph.Node;
00009 import org.hfbk.util.HTMLUtils;
00010 import org.hfbk.util.HTTPUtils;
00011 
00012 public class SourceWikipedia extends SourceRegExp {
00013         
00014         public SourceWikipedia(URL url) {               
00015                 super(url);
00016                 root=add(keyword,"keycloud",null);              
00017         }
00018 
00019         String url(){
00020                 return "http://www.google.de/search?btnI=yes&q=inurl:wikipedia.org+"+HTTPUtils.encode(keyword);
00021         }
00022         
00023         String matcher(){
00024                 return "(?si)(?:href=\"/wiki/([^:\"]*?)\")|(?:src=\"(http://upload.[^\"]*?)\")";
00025         }
00026         
00027         void buildGraph() throws IOException {          
00028                 
00029                 String page=HTTPUtils.fetch(url(), silent);
00030                 
00031                 page=page.substring(page.indexOf("<!-- start content -->"), page.lastIndexOf("<!-- end content -->"));
00032         
00033                 Pattern p=Pattern.compile(matcher());
00034                 Matcher matcher=p.matcher(page);
00035                 while (matcher.find()){
00036                         String imgMatch=matcher.group(2);
00037                         if(imgMatch!=null) {
00038                                 String imgUrl=imgMatch.replace("thumb/","");
00039                                 imgUrl=imgUrl.substring(0,imgUrl.lastIndexOf('/'));
00040                                 Node img=add(imgUrl,"image",root);
00041                                 add(imgMatch, "thumbnail", img);
00042                         }
00043                         else add(matcher.group(1),"keyword",root);
00044                 }
00045         }
00046         
00047         //unused
00048         void parse(String[] matches) throws IOException {}
00049 }

Generated on Tue Apr 7 17:57:20 2009 for visclient by  doxygen 1.5.1