annotate src/howtouse/TextTagParser.java @ 16:5c8c554ee3ac draft

hg mv TestSaxParser.java TextTagParser.java
author one
date Thu, 23 Aug 2012 09:03:33 +0900
parents src/howtouse/TestSaxParser.java@fa38ded4c054
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
8
d8d0855bcdfd add CharReader.java , WikiLinkParser.java
one
parents:
diff changeset
1 package howtouse;
d8d0855bcdfd add CharReader.java , WikiLinkParser.java
one
parents:
diff changeset
2
10
e98ca9548c78 fix TestSaxParser.java
one
parents: 8
diff changeset
3 import java.util.HashMap;
e98ca9548c78 fix TestSaxParser.java
one
parents: 8
diff changeset
4 import java.util.HashSet;
8
d8d0855bcdfd add CharReader.java , WikiLinkParser.java
one
parents:
diff changeset
5 import java.util.Stack;
d8d0855bcdfd add CharReader.java , WikiLinkParser.java
one
parents:
diff changeset
6
d8d0855bcdfd add CharReader.java , WikiLinkParser.java
one
parents:
diff changeset
7 import org.xml.sax.Attributes;
d8d0855bcdfd add CharReader.java , WikiLinkParser.java
one
parents:
diff changeset
8 import org.xml.sax.SAXException;
d8d0855bcdfd add CharReader.java , WikiLinkParser.java
one
parents:
diff changeset
9 import org.xml.sax.helpers.DefaultHandler;
d8d0855bcdfd add CharReader.java , WikiLinkParser.java
one
parents:
diff changeset
10
16
5c8c554ee3ac hg mv TestSaxParser.java TextTagParser.java
one
parents: 15
diff changeset
11 public class TextTagParser extends DefaultHandler {
8
d8d0855bcdfd add CharReader.java , WikiLinkParser.java
one
parents:
diff changeset
12
d8d0855bcdfd add CharReader.java , WikiLinkParser.java
one
parents:
diff changeset
13 protected Stack<MyObject> stack;
d8d0855bcdfd add CharReader.java , WikiLinkParser.java
one
parents:
diff changeset
14 private MyObject currentObj;
d8d0855bcdfd add CharReader.java , WikiLinkParser.java
one
parents:
diff changeset
15 private Attributes currentAttr;
d8d0855bcdfd add CharReader.java , WikiLinkParser.java
one
parents:
diff changeset
16 private String currentTag;
10
e98ca9548c78 fix TestSaxParser.java
one
parents: 8
diff changeset
17 private String currentTitleName;
8
d8d0855bcdfd add CharReader.java , WikiLinkParser.java
one
parents:
diff changeset
18 final static String TAGNAME_TITLE = "title";
d8d0855bcdfd add CharReader.java , WikiLinkParser.java
one
parents:
diff changeset
19 final static String TAGNAME_TEXT = "text";
10
e98ca9548c78 fix TestSaxParser.java
one
parents: 8
diff changeset
20 WikiLinkParser linkParser = new WikiLinkParser();
e98ca9548c78 fix TestSaxParser.java
one
parents: 8
diff changeset
21
e98ca9548c78 fix TestSaxParser.java
one
parents: 8
diff changeset
22 HashMap<String,HashSet<String>> hash = new HashMap<String,HashSet<String>>();
e98ca9548c78 fix TestSaxParser.java
one
parents: 8
diff changeset
23 HashSet<String> currentLinkHash = new HashSet<String>();
8
d8d0855bcdfd add CharReader.java , WikiLinkParser.java
one
parents:
diff changeset
24
16
5c8c554ee3ac hg mv TestSaxParser.java TextTagParser.java
one
parents: 15
diff changeset
25 public TextTagParser() {
8
d8d0855bcdfd add CharReader.java , WikiLinkParser.java
one
parents:
diff changeset
26 stack = new Stack<MyObject>();
d8d0855bcdfd add CharReader.java , WikiLinkParser.java
one
parents:
diff changeset
27 }
10
e98ca9548c78 fix TestSaxParser.java
one
parents: 8
diff changeset
28
e98ca9548c78 fix TestSaxParser.java
one
parents: 8
diff changeset
29 public HashMap<String,HashSet<String>> getHash() {
e98ca9548c78 fix TestSaxParser.java
one
parents: 8
diff changeset
30 return hash;
e98ca9548c78 fix TestSaxParser.java
one
parents: 8
diff changeset
31 }
8
d8d0855bcdfd add CharReader.java , WikiLinkParser.java
one
parents:
diff changeset
32
d8d0855bcdfd add CharReader.java , WikiLinkParser.java
one
parents:
diff changeset
33 public void startDocument() {
13
84dd614f1414 add LinkConvertGraph.java
one
parents: 12
diff changeset
34 // System.out.println("read start");
8
d8d0855bcdfd add CharReader.java , WikiLinkParser.java
one
parents:
diff changeset
35 }
d8d0855bcdfd add CharReader.java , WikiLinkParser.java
one
parents:
diff changeset
36
d8d0855bcdfd add CharReader.java , WikiLinkParser.java
one
parents:
diff changeset
37 public void startElement(String uri, String localName, String qName,
d8d0855bcdfd add CharReader.java , WikiLinkParser.java
one
parents:
diff changeset
38 Attributes attributes) throws SAXException {
d8d0855bcdfd add CharReader.java , WikiLinkParser.java
one
parents:
diff changeset
39 currentTag = qName;
d8d0855bcdfd add CharReader.java , WikiLinkParser.java
one
parents:
diff changeset
40 if (qName.equals(TAGNAME_TITLE)) {
d8d0855bcdfd add CharReader.java , WikiLinkParser.java
one
parents:
diff changeset
41 currentObj = new MyObject(attributes);
d8d0855bcdfd add CharReader.java , WikiLinkParser.java
one
parents:
diff changeset
42 stack.push(currentObj);
d8d0855bcdfd add CharReader.java , WikiLinkParser.java
one
parents:
diff changeset
43 } else if (qName.equals(TAGNAME_TEXT)) {
d8d0855bcdfd add CharReader.java , WikiLinkParser.java
one
parents:
diff changeset
44 currentObj = new MyObject(attributes);
d8d0855bcdfd add CharReader.java , WikiLinkParser.java
one
parents:
diff changeset
45 stack.push(currentObj);
d8d0855bcdfd add CharReader.java , WikiLinkParser.java
one
parents:
diff changeset
46 } else {
d8d0855bcdfd add CharReader.java , WikiLinkParser.java
one
parents:
diff changeset
47
d8d0855bcdfd add CharReader.java , WikiLinkParser.java
one
parents:
diff changeset
48 }
d8d0855bcdfd add CharReader.java , WikiLinkParser.java
one
parents:
diff changeset
49 }
d8d0855bcdfd add CharReader.java , WikiLinkParser.java
one
parents:
diff changeset
50
d8d0855bcdfd add CharReader.java , WikiLinkParser.java
one
parents:
diff changeset
51 public void characters(char[] ch, int offset, int length) {
d8d0855bcdfd add CharReader.java , WikiLinkParser.java
one
parents:
diff changeset
52
d8d0855bcdfd add CharReader.java , WikiLinkParser.java
one
parents:
diff changeset
53 String value = new String(ch, offset, length);
d8d0855bcdfd add CharReader.java , WikiLinkParser.java
one
parents:
diff changeset
54 if (currentObj != null) {
d8d0855bcdfd add CharReader.java , WikiLinkParser.java
one
parents:
diff changeset
55 currentObj.setValue(currentTag, currentAttr, value);
10
e98ca9548c78 fix TestSaxParser.java
one
parents: 8
diff changeset
56
e98ca9548c78 fix TestSaxParser.java
one
parents: 8
diff changeset
57 if(currentTag.equals(TAGNAME_TITLE)) {
e98ca9548c78 fix TestSaxParser.java
one
parents: 8
diff changeset
58 currentTitleName = value;
e98ca9548c78 fix TestSaxParser.java
one
parents: 8
diff changeset
59 }
e98ca9548c78 fix TestSaxParser.java
one
parents: 8
diff changeset
60 if(currentTag.equals(TAGNAME_TEXT)) {
e98ca9548c78 fix TestSaxParser.java
one
parents: 8
diff changeset
61 HashSet<String> tmpHash = linkParser.parse(value);
e98ca9548c78 fix TestSaxParser.java
one
parents: 8
diff changeset
62 if (tmpHash.size() <= 0) return;
e98ca9548c78 fix TestSaxParser.java
one
parents: 8
diff changeset
63 for (String link: tmpHash) {
e98ca9548c78 fix TestSaxParser.java
one
parents: 8
diff changeset
64 currentLinkHash.add(link);
e98ca9548c78 fix TestSaxParser.java
one
parents: 8
diff changeset
65 }
8
d8d0855bcdfd add CharReader.java , WikiLinkParser.java
one
parents:
diff changeset
66 }
d8d0855bcdfd add CharReader.java , WikiLinkParser.java
one
parents:
diff changeset
67 }
d8d0855bcdfd add CharReader.java , WikiLinkParser.java
one
parents:
diff changeset
68 }
d8d0855bcdfd add CharReader.java , WikiLinkParser.java
one
parents:
diff changeset
69
d8d0855bcdfd add CharReader.java , WikiLinkParser.java
one
parents:
diff changeset
70 public void endElement(String uri, String localName, String qName) {
10
e98ca9548c78 fix TestSaxParser.java
one
parents: 8
diff changeset
71
8
d8d0855bcdfd add CharReader.java , WikiLinkParser.java
one
parents:
diff changeset
72 if (currentObj == null)
d8d0855bcdfd add CharReader.java , WikiLinkParser.java
one
parents:
diff changeset
73 return;
d8d0855bcdfd add CharReader.java , WikiLinkParser.java
one
parents:
diff changeset
74 if (qName.equals(TAGNAME_TITLE)) {
d8d0855bcdfd add CharReader.java , WikiLinkParser.java
one
parents:
diff changeset
75 stack.pop();
d8d0855bcdfd add CharReader.java , WikiLinkParser.java
one
parents:
diff changeset
76 } else if (qName.equals(TAGNAME_TEXT)) {
10
e98ca9548c78 fix TestSaxParser.java
one
parents: 8
diff changeset
77 hash.put(currentTitleName, currentLinkHash);
e98ca9548c78 fix TestSaxParser.java
one
parents: 8
diff changeset
78 currentLinkHash = new HashSet<String>();
8
d8d0855bcdfd add CharReader.java , WikiLinkParser.java
one
parents:
diff changeset
79 stack.pop();
d8d0855bcdfd add CharReader.java , WikiLinkParser.java
one
parents:
diff changeset
80 } else {
d8d0855bcdfd add CharReader.java , WikiLinkParser.java
one
parents:
diff changeset
81
d8d0855bcdfd add CharReader.java , WikiLinkParser.java
one
parents:
diff changeset
82 }
d8d0855bcdfd add CharReader.java , WikiLinkParser.java
one
parents:
diff changeset
83 if (stack.empty())
d8d0855bcdfd add CharReader.java , WikiLinkParser.java
one
parents:
diff changeset
84 currentObj = null;
d8d0855bcdfd add CharReader.java , WikiLinkParser.java
one
parents:
diff changeset
85 else
d8d0855bcdfd add CharReader.java , WikiLinkParser.java
one
parents:
diff changeset
86 currentObj = (MyObject) stack.peek();
d8d0855bcdfd add CharReader.java , WikiLinkParser.java
one
parents:
diff changeset
87 }
d8d0855bcdfd add CharReader.java , WikiLinkParser.java
one
parents:
diff changeset
88
d8d0855bcdfd add CharReader.java , WikiLinkParser.java
one
parents:
diff changeset
89 public void endDocument() {
13
84dd614f1414 add LinkConvertGraph.java
one
parents: 12
diff changeset
90 // System.out.println("end reading file.");
8
d8d0855bcdfd add CharReader.java , WikiLinkParser.java
one
parents:
diff changeset
91 }
d8d0855bcdfd add CharReader.java , WikiLinkParser.java
one
parents:
diff changeset
92
d8d0855bcdfd add CharReader.java , WikiLinkParser.java
one
parents:
diff changeset
93 }