summaryrefslogtreecommitdiffstats
path: root/Algorithms/Part-II/1-WordNet/WordNet.java
diff options
context:
space:
mode:
Diffstat (limited to 'Algorithms/Part-II/1-WordNet/WordNet.java')
-rw-r--r--Algorithms/Part-II/1-WordNet/WordNet.java155
1 files changed, 140 insertions, 15 deletions
diff --git a/Algorithms/Part-II/1-WordNet/WordNet.java b/Algorithms/Part-II/1-WordNet/WordNet.java
index 830ccdf..2a854fa 100644
--- a/Algorithms/Part-II/1-WordNet/WordNet.java
+++ b/Algorithms/Part-II/1-WordNet/WordNet.java
@@ -1,58 +1,183 @@
/* vim: set expandtab tabstop=4 shiftwidth=4 : */
+import java.util.HashMap;
+import java.util.Iterator;
+
public class WordNet
{
// data type space linear in the input size
+ private HashMap<String, Bag<Integer>> wordsToIds;
+ private HashMap<Integer, String> idToString;
+ private Digraph dag;
+
+ private SAP sap;
// constructor takes the name of the two input files
+ // time linearithmic in the input size
public WordNet(String synsets, String hypernyms)
{
- // time linearithmic in the input size
+ int size = 0;
+ wordsToIds = new HashMap<String, Bag<Integer>>();
+ idToString = new HashMap<Integer, String>();
+
+ // load words from synsets
+ In syn = new In(synsets);
+ while (syn.hasNextLine()) {
+ String line = syn.readLine();
+ String[] fields = line.split(",");
+ int id = Integer.parseInt(fields[0]);
+ String[] words = fields[1].split(" ");
+ for (String word : words) {
+ Bag<Integer> bag = wordsToIds.get(word);
+ if (bag == null) {
+ bag = new Bag<Integer>();
+ bag.add(id);
+ wordsToIds.put(word, bag);
+ } else {
+ bag.add(id);
+ }
+ }
+ idToString.put(id, fields[1]);
+ size++;
+ }
+ syn.close();
+
+ dag = new Digraph(size);
+
+ boolean[] roots = new boolean[dag.V()];
+ for (int v = 0; v < dag.V(); v++)
+ roots[v] = true;
- // throw a java.lang.IllegalArgumentException
- // if the input does not correspond to a rooted DAG
+ // load edges from hypernyms
+ In hyp = new In(hypernyms);
+ while (hyp.hasNextLine()) {
+ String line = hyp.readLine();
+ String[] fields = line.split(",");
+ int id = Integer.parseInt(fields[0]);
+ if (fields.length > 1) roots[id] = false;
+ for (int i = 1; i < fields.length; i++) {
+ dag.addEdge(id, Integer.parseInt(fields[i]));
+ }
+ }
+ hyp.close();
+
+ int rootsCount = 0;
+ for (int v = 0; v < dag.V(); v++)
+ if (roots[v]) rootsCount++;
+ if (rootsCount != 1)
+ throw new IllegalArgumentException("roots "+rootsCount);
+
+ checkIndegrees();
+
+ sap = new SAP(dag);
+ }
+
+ private void checkIndegrees()
+ {
+ int[] indegree = new int[dag.V()];
+
+ // compute indegrees
+ for (int v = 0; v < dag.V(); v++) {
+ for (int w : dag.adj(v)) {
+ indegree[w]++;
+ }
+ }
+
+ // initialize queue to contain all vertices with indegree = 0 (leaves)
+ Queue<Integer> queue = new Queue<Integer>();
+ for (int v = 0; v < dag.V(); v++)
+ if (indegree[v] == 0) queue.enqueue(v);
+
+ // BFS and decrease indegrees
+ for (int j = 0; !queue.isEmpty(); j++) {
+ int v = queue.dequeue();
+ for (int w : dag.adj(v)) {
+ indegree[w]--;
+ if (indegree[w] == 0) queue.enqueue(w);
+ }
+ }
+
+ // check that all indegrees are 0
+ for (int v = 0; v < indegree.length; v++) {
+ if (indegree[v] != 0)
+ throw new IllegalArgumentException("indegrees");
+ }
}
// the set of nouns (no duplicates), returned as an Iterable
public Iterable<String> nouns()
{
- return null;
+ if (wordsToIds == null) return null;
+ return wordsToIds.keySet();
}
// is the word a WordNet noun?
public boolean isNoun(String word)
{
- // run in time logarithmic in the number of nouns
-
- return false;
+ if (wordsToIds == null) return false;
+ return wordsToIds.containsKey(word);
}
// distance between nounA and nounB (defined below)
public int distance(String nounA, String nounB)
{
- // run in time linear in the size of the WordNet digraph
+ if (!isNoun(nounA) || !isNoun(nounB))
+ throw new IllegalArgumentException();
+
+ Iterable<Integer> it0 = nounIterable(nounA);
+ Iterable<Integer> it1 = nounIterable(nounB);
- // throw java.lang.IllegalArgumentException
- // unless both of the noun arguments are WordNet nouns
+ return sap.length(it0, it1);
+ }
- return 0;
+ private Iterable<Integer> nounIterable(final String noun) {
+ return new Iterable<Integer>() {
+ public Iterator<Integer> iterator() {
+ return wordsToIds.get(noun).iterator();
+ }
+ };
}
// a synset that is the common ancestor of nounA and nounB
// in a shortest ancestral path
public String sap(String nounA, String nounB)
{
- // run in time linear in the size of the WordNet digraph
+ if (!isNoun(nounA) || !isNoun(nounB))
+ throw new java.lang.IllegalArgumentException();
- // throw java.lang.IllegalArgumentException
- // unless both of the noun arguments are WordNet nouns
+ Iterable<Integer> it0 = nounIterable(nounA);
+ Iterable<Integer> it1 = nounIterable(nounB);
- return "";
+ return idToString.get(sap.ancestor(it0, it1));
}
// for unit testing of this class
public static void main(String[] args)
{
+ WordNet wordNet = new WordNet("./data/synsets.txt", "./data/hypernyms.txt");
+
+ StdOut.println("23 white_marlin, mileage: "
+ + wordNet.distance("white_marlin", "mileage"));
+ StdOut.println("32 Black_Plague, black_marlin: "
+ + wordNet.distance("Black_Plague", "black_marlin"));
+ StdOut.println("32 American_water_spaniel, histology: "
+ + wordNet.distance("American_water_spaniel", "histology"));
+ StdOut.println("32 Brown_Swiss, barrel_roll: "
+ + wordNet.distance("Brown_Swiss", "barrel_roll"));
+ StdOut.println("bedspring Carolus_Linnaeus: "
+ + wordNet.sap("bedspring", "Carolus_Linnaeus"));
+ try {
+ StdOut.println("check roots");
+ new WordNet("./data/synsets.txt", "./data/hypernymsInvalidTwoRoots.txt");
+ StdOut.println("BAD");
+ }
+ catch (IllegalArgumentException e) { StdOut.println(e.getMessage()); }
+ try {
+ StdOut.println("check cycles");
+ new WordNet("./data/synsets.txt", "./data/hypernymsInvalidCycle.txt");
+ StdOut.println("BAD");
+ }
+ catch (IllegalArgumentException e) { StdOut.println(e.getMessage()); }
}
}