/* vim: set expandtab tabstop=4 shiftwidth=4 : */ import java.util.HashMap; import java.util.Iterator; public class WordNet { // data type space linear in the input size private HashMap> wordsToIds; private HashMap idToString; private Digraph dag; private SAP sap; // constructor takes the name of the two input files // time linearithmic in the input size public WordNet(String synsets, String hypernyms) { int size = 0; wordsToIds = new HashMap>(); idToString = new HashMap(); // load words from synsets In syn = new In(synsets); while (syn.hasNextLine()) { String line = syn.readLine(); String[] fields = line.split(","); int id = Integer.parseInt(fields[0]); String[] words = fields[1].split(" "); for (String word : words) { Bag bag = wordsToIds.get(word); if (bag == null) { bag = new Bag(); bag.add(id); wordsToIds.put(word, bag); } else { bag.add(id); } } idToString.put(id, fields[1]); size++; } syn.close(); dag = new Digraph(size); boolean[] roots = new boolean[dag.V()]; for (int v = 0; v < dag.V(); v++) roots[v] = true; // load edges from hypernyms In hyp = new In(hypernyms); while (hyp.hasNextLine()) { String line = hyp.readLine(); String[] fields = line.split(","); int id = Integer.parseInt(fields[0]); if (fields.length > 1) roots[id] = false; for (int i = 1; i < fields.length; i++) { dag.addEdge(id, Integer.parseInt(fields[i])); } } hyp.close(); int rootsCount = 0; for (int v = 0; v < dag.V(); v++) if (roots[v]) rootsCount++; if (rootsCount != 1) throw new IllegalArgumentException("roots "+rootsCount); checkIndegrees(); sap = new SAP(dag); } private void checkIndegrees() { int[] indegree = new int[dag.V()]; // compute indegrees for (int v = 0; v < dag.V(); v++) { for (int w : dag.adj(v)) { indegree[w]++; } } // initialize queue to contain all vertices with indegree = 0 (leaves) Queue queue = new Queue(); for (int v = 0; v < dag.V(); v++) if (indegree[v] == 0) queue.enqueue(v); // BFS and decrease indegrees for (int j = 0; !queue.isEmpty(); j++) { int v = queue.dequeue(); for (int w : dag.adj(v)) { indegree[w]--; if (indegree[w] == 0) queue.enqueue(w); } } // check that all indegrees are 0 for (int v = 0; v < indegree.length; v++) { if (indegree[v] != 0) throw new IllegalArgumentException("indegrees"); } } // the set of nouns (no duplicates), returned as an Iterable public Iterable nouns() { if (wordsToIds == null) return null; return wordsToIds.keySet(); } // is the word a WordNet noun? public boolean isNoun(String word) { if (wordsToIds == null) return false; return wordsToIds.containsKey(word); } // distance between nounA and nounB (defined below) public int distance(String nounA, String nounB) { if (!isNoun(nounA) || !isNoun(nounB)) throw new IllegalArgumentException(); Iterable it0 = nounIterable(nounA); Iterable it1 = nounIterable(nounB); return sap.length(it0, it1); } private Iterable nounIterable(final String noun) { return new Iterable() { public Iterator iterator() { return wordsToIds.get(noun).iterator(); } }; } // a synset that is the common ancestor of nounA and nounB // in a shortest ancestral path public String sap(String nounA, String nounB) { if (!isNoun(nounA) || !isNoun(nounB)) throw new java.lang.IllegalArgumentException(); Iterable it0 = nounIterable(nounA); Iterable it1 = nounIterable(nounB); return idToString.get(sap.ancestor(it0, it1)); } // for unit testing of this class public static void main(String[] args) { WordNet wordNet = new WordNet("./data/synsets.txt", "./data/hypernyms.txt"); StdOut.println("23 white_marlin, mileage: " + wordNet.distance("white_marlin", "mileage")); StdOut.println("32 Black_Plague, black_marlin: " + wordNet.distance("Black_Plague", "black_marlin")); StdOut.println("32 American_water_spaniel, histology: " + wordNet.distance("American_water_spaniel", "histology")); StdOut.println("32 Brown_Swiss, barrel_roll: " + wordNet.distance("Brown_Swiss", "barrel_roll")); StdOut.println("bedspring Carolus_Linnaeus: " + wordNet.sap("bedspring", "Carolus_Linnaeus")); try { StdOut.println("check roots"); new WordNet("./data/synsets.txt", "./data/hypernymsInvalidTwoRoots.txt"); StdOut.println("BAD"); } catch (IllegalArgumentException e) { StdOut.println(e.getMessage()); } try { StdOut.println("check cycles"); new WordNet("./data/synsets.txt", "./data/hypernymsInvalidCycle.txt"); StdOut.println("BAD"); } catch (IllegalArgumentException e) { StdOut.println(e.getMessage()); } } }