VisClient/org/hfbk/vis/source/SourceGoogle.java

Go to the documentation of this file.
00001 package org.hfbk.vis.source;
00002 
00003 import java.net.URL;
00004 
00005 import org.dronus.graph.Node;
00006 import org.hfbk.util.HTMLUtils;
00007 import org.hfbk.util.HTTPUtils;
00008 
00009 public class SourceGoogle extends SourceRegExp {
00010         
00011         public SourceGoogle(URL url) {          
00012                 super(url);
00013                 root=add(keyword,"keyword",null);               
00014         }
00015 
00016         String url(){
00017                 return "http://www.google.com/search?q="+HTTPUtils.encode(keyword);
00018         }
00019         
00020         String matcher(){
00021                 return "(?si)class=r><a href=\"(.*?)\".*?>(.*?)</a>.*?<div class=(.*?)>(.*?)<br";
00022         }
00023         
00024         void parse(String[] matches){
00025                 String text=HTMLUtils.clean(matches[4]);
00026                 
00027                 if (text.length()==0) return;
00028                 
00029                 Node textNode=add(text,"text",root);
00030                 add(matches[1], "URL" ,textNode);
00031                 add(HTMLUtils.clean(matches[2]), "headline", textNode); 
00032                 
00033                 //split out keywords
00034                 for(String kw : text.split(" ")){
00035                         //$kw=preg_replace('§[^\wüäöÜÄÖß]§','',$kw);
00036                         if (kw.length()>3) 
00037                                 add(HTMLUtils.removePunctuation(kw),"keyword",textNode);
00038                 }
00039         }       
00040 }

Generated on Tue Apr 7 17:57:20 2009 for visclient by  doxygen 1.5.1