#!/local/usr/bin/python # Evaluation script for Functional Clause Boundary Detection # Casey Whitelaw, 2005 # casey@it.usyd.edu.au """ This calculates the precision, recall, and accuracy of detecting functional clause boundaries. Since clauses can be nested, it does so by considering each depth of clause separately. This will allow a comparative analysis of a system's performance on different types of clauses. usage: eval.py tagged_file The tagged file should be formatted so that the last two (tab-separated) columns contain the real tag and the predicted tag, respectively. Any previous columns are ignored. """ import sys class ClauseLocations: """ list of clause boundary locations in a sentence """ def __init__(self): self.stack = [ ] self.completed = [ ] self.cur = 0 # start of current one self.index = 0 self.depth = 0 def feed(self, tag): for ch in tag: if ch == "(": # open a clause at this depth self.stack.append([ self.depth, self.index, None ]) self.depth += 1 elif ch == ")": # close the last open clause on the stack if len(self.stack) > 0: cur = self.stack.pop() cur[2] = self.index self.completed.append(tuple(cur)) self.depth -= 1 self.index += 1 def getClauseLocations(self): self.completed.sort() return self.completed if len(sys.argv) != 2: sys.stderr.write("usage: eval.py tagged_file") sys.exit(1) fname = sys.argv[1] f = file(fname) TOTAL = 0 PROPOSED = 1 REAL = 2 CORRECT = 3 MAX_DEPTH = 10 levels = [ [0, 0, 0, 0] for i in range(MAX_DEPTH) ] answer = ClauseLocations() tagged = ClauseLocations() for line in f: line = line.strip() if len(line) == 0: #sentence boundary a_locs = answer.getClauseLocations() t_locs = tagged.getClauseLocations() for clause in a_locs: level, start, end = clause levels[level][REAL] += 1 if clause in t_locs: levels[level][CORRECT] += 1 for clause in t_locs: level, start, end = clause levels[level][PROPOSED] += 1 answer = ClauseLocations() tagged = ClauseLocations() continue fields = line.split("\t") answer.feed(fields[-2]) tagged.feed(fields[-1]) all_levels = [ 0, 0, 0, 0 ] for i in range(MAX_DEPTH): total, proposed, real, correct = levels[i] for j, val in enumerate(levels[i]): all_levels[j] += val if real == 0: continue try: precision = float(correct) / float(proposed) except ZeroDivisionError: precision = 0.0 try: recall = float(correct) / float(real) except ZeroDivisionError: recall = 0.0 try: fval = 2*precision*recall / (precision + recall) except ZeroDivisionError: fval = 0.0 print "Level %d clauses: %d clauses, %d identified, %d correct (%.2f%% precision, %.2f%% recall, F=%.2f)" %(i, real, proposed, correct, 100*precision, 100*recall, fval) total, proposed, real, correct = all_levels try: precision = float(correct) / float(proposed) except ZeroDivisionError: precision = 0.0 try: recall = float(correct) / float(real) except ZeroDivisionError: recall = 0.0 try: fval = 2*precision*recall / (precision + recall) except ZeroDivisionError: fval = 0.0 print "Overall: %d clauses, %d identified, %d correct (%.2f%% precision, %.2f%% recall, F=%.2f)" %(real, proposed, correct, 100*precision, 100*recall, fval)