Mercurial > hg > Members > nobuyasu > TestNeo4j
view src/wikigraph/LinkConvertGraph.java @ 21:f9ef906676eb draft
modify LinkToNode.java
author | one |
---|---|
date | Tue, 28 Aug 2012 14:04:15 +0900 |
parents | 2c3a10047ec6 |
children | fbf0cf550b06 |
line wrap: on
line source
package wikigraph; import howtouse.TextTagParser; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.util.HashMap; import java.util.HashSet; import java.util.Map; import javax.xml.parsers.ParserConfigurationException; import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; import org.neo4j.graphdb.GraphDatabaseService; import org.neo4j.graphdb.Node; import org.neo4j.graphdb.Transaction; import org.neo4j.kernel.EmbeddedGraphDatabase; import org.xml.sax.SAXException; public class LinkConvertGraph { private String filename; private FileInputStream fis; private SAXParserFactory factory; private SAXParser parser; private TextTagParser xmlParser; private HashMap<String,HashSet<String>> hash; LinkConvertGraph() throws ParserConfigurationException, SAXException { xmlParser = new TextTagParser(); factory = SAXParserFactory.newInstance(); parser = factory.newSAXParser(); } LinkConvertGraph(final String filename) throws FileNotFoundException, ParserConfigurationException, SAXException { this.filename = filename; fis = new FileInputStream(filename); xmlParser = new TextTagParser(); factory = SAXParserFactory.newInstance(); parser = factory.newSAXParser(); } private void setFilename(final String filename) throws FileNotFoundException { this.filename = filename; this.fis = new FileInputStream(filename); } private void parseXml() throws SAXException, IOException { parser.parse(this.fis, this.xmlParser); hash = xmlParser.getHash(); } private HashMap<String,HashSet<String>> getHash() { return hash; } private void printHash() { for (Map.Entry<String,HashSet<String>> entry : hash.entrySet()) { String title = entry.getKey(); System.out.println("title: " + title); for (String link : entry.getValue()) { System.out.println("\t"+link); } System.out.println(); } } private void printHash(FileOutputStream os) throws IOException { for (Map.Entry<String,HashSet<String>> entry : hash.entrySet()) { String title = entry.getKey(); os.write( ("title: " + title + "\n").getBytes()); for (String link : entry.getValue()) { os.write( ("\t"+link+"\n").getBytes()); } os.write( ("\n").getBytes()); os.flush(); } } public static void main(String[] args) { // final String filename = "./resource/article.xml"; final String filename = "/Users/aotokage/testProgram/wiki/ja-pages_current.xml"; LinkConvertGraph lcg; try { lcg = new LinkConvertGraph(filename); lcg.parseXml(); // lcg.printHash(); FileOutputStream fos = new FileOutputStream("./resource/wikilink.log"); lcg.printHash(fos); HashMap<String,HashSet<String>> hash = lcg.getHash(); GraphDatabaseService graphDb = new EmbeddedGraphDatabase("wikiLinkDB"); LinkToNode ltn = new LinkToNode(graphDb); Transaction tx = graphDb. beginTx(); Node delNode = graphDb.getNodeById(0); delNode.delete(); for (Map.Entry<String,HashSet<String>> map : hash.entrySet()) { String pageTitle = map.getKey(); Node pageNode; if ( ltn.getId(pageTitle) == null) { pageNode = ltn.createNodeWithPageTitle(pageTitle); } else { pageNode = ltn.getNode(pageTitle); } for (String linkPageTitle : map.getValue()) { Node linkNode; if ( ltn.getId(linkPageTitle) == null) { linkNode = ltn.createNodeWithPageTitle(linkPageTitle); ltn.setPageRank(linkNode, 0); } else { linkNode = ltn.getNode(linkPageTitle); } ltn.setHasLink(pageNode, linkNode); } } // ltn.printAllNodes(); tx.success(); tx.finish(); graphDb.shutdown(); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (SAXException e) { e.printStackTrace(); } catch (ParserConfigurationException e) { e.printStackTrace(); } catch (IOException e) { System.err.println("Failed to parse xml"); e.printStackTrace(); } } }