VisClient/org/hfbk/vis/source/SourceYoutube.java

Go to the documentation of this file.
00001 package org.hfbk.vis.source;
00002 
00003 import java.io.IOException;
00004 import java.io.UnsupportedEncodingException;
00005 import java.net.URL;
00006 import java.util.HashSet;
00007 import java.util.regex.Matcher;
00008 import java.util.regex.Pattern;
00009 
00010 import org.dronus.graph.Node;
00011 import org.hfbk.util.HTTPUtils;
00012 import org.hfbk.util.Sleeper;
00013 
00030 public class SourceYoutube extends SourceRegExp {
00031 
00032 Pattern endOfInterestingPart=Pattern.compile(matcher());
00036         HashSet<String> parsed=new HashSet<String>();
00037         
00039         int subfetches=0;
00040         
00041         
00042         public SourceYoutube(URL url) {
00043                 super(url);
00044                 root=add(keyword, "imagefield", null); 
00045                 String start=getParam(url, "start");
00046                 if (start==null) start="0";
00047                 add(""+Integer.parseInt(start)+1, "next", root);
00048         }
00049 
00050         
00051         String matcher() {
00052                 return "(?i)href=\"(/watch\\?v=[^\"]*)\".*?img.*?src=\"([^\"]*)\".*?title=\"[^\"]*\"";
00053         }
00054 
00055         String url() throws UnsupportedEncodingException {
00056                 String start=getParam(url, "start");
00057                 if (start==null) start="";
00058                 return "http://youtube.com/results?search_query="+HTTPUtils.encode(keyword)
00059                         +"&page="+HTTPUtils.encode(start);
00060 
00061         }
00062 
00069         void parse(final String[] matches) throws IOException {
00070                 if (parsed.contains(matches[1]) || parsed.size()>=8) return;
00071                 parsed.add(matches[1]);
00072                 
00073                 // now launch a subfetch to parse the movie page for this search result
00074                 Thread subfetcher=new Thread(){
00075                         public void run(){
00076                                 String playerURL="http://youtube.com"+matches[1];
00077                                 
00078                                 String playerpage=HTTPUtils.fetchUntil(playerURL, endOfInterestingPart, silent);
00079                 
00080                                 Pattern p=Pattern.compile("(?i)\"video_id\": \"(.*?)\".*?\"t\": \"(.*?)\"");
00081                                 Matcher m=p.matcher(playerpage);
00082                                 m.find();
00083                                 String fetchURL="http://www.youtube.com/get_video?video_id="+m.group(1)+"&t="+m.group(2);
00084                 
00085                                 synchronized(graph){
00086                                         Node video=add(fetchURL, "video", root);
00087                                         add(matches[2], "thumbnail", video);
00088                                         add(playerURL, "URL", video);
00089                                 }
00090                                 subfetches--;
00091                         }
00092                 };
00093                 subfetcher.start();
00094                 subfetches++;
00095         }
00096         
00098         void buildGraph() throws IOException {
00099                 super.buildGraph();
00100                 while(subfetches>0)
00101                         Sleeper.sleep(10);              
00102         }
00103 }

Generated on Tue Apr 7 17:57:20 2009 for visclient by  doxygen 1.5.1