Source code for Assign_GUP.topics


# Copyright (c) 2009 - 2016, UChicago Argonne, LLC.
# See LICENSE file for details.

'''
Support AGUP topics
'''

import agup_data
import xml_utility
from lxml import etree

DEFAULT_TOPIC_VALUE = 0.0


[docs]class Topics(object): ''' manage the list of AGUP topics (known here as ``key``) ''' def __init__(self): self.clearAll() def __len__(self): return len(self.topics) def __iter__(self): for key in self.inOrder(): yield key
[docs] def inOrder(self): ''' ''' return sorted(self.topics)
[docs] def valueOrder(self): ''' sort by topic values ''' # make a dict with value as key and list(topics) as values db = {} for topic in self: val = str(self.get(topic)) if val not in db: db[val] = [] db[val].append(topic) # list of topics ordered by values (sub-ordered alphabetically) result = [] for value in sorted(db.keys(), reverse=True): result += sorted(db[value]) return result
[docs] def exists(self, key): ''' Is ``key`` already known? ''' return key in self.topics
[docs] def add(self, key, value = DEFAULT_TOPIC_VALUE): ''' define a new topic (known here as ``key``) ''' if self.exists(key): raise KeyError('This topic is already defined: ' + key) key = key.strip() if len(key) == 0: raise KeyError('Must give a name for the topic') checkTopicValueRange(value) self.topics[key] = float(value) self._topics_string_ = ' '.join(self.getTopicList())
[docs] def addTopics(self, key_list): ''' add several topics at once (with default values) :param [str] key_list: list of topics (strings) to be added ''' for key in key_list: self.add(key)
[docs] def get(self, key): ''' return value of an existing topic (known here as ``key``) topic must exist or KeyError exception will be raised ''' if not self.exists(key): raise KeyError('This topic is not defined: ' + key) return self.topics[key]
[docs] def getTopicList(self): ''' return a list of all topics ''' return sorted(self.topics.keys())
[docs] def set(self, key, value): ''' set value of an existing topic (known here as ``key``) topic must exist or KeyError exception will be raised ''' if not self.exists(key): raise KeyError('This topic is not defined: ' + key) self.topics[key] = float(value)
[docs] def clearAll(self): ''' remove all keys from the list of topics ''' self.topics = {} self._topics_string_ = '' # to optimize comparisons of different Topics() objects
[docs] def remove(self, key): ''' remove the named topic :param str key: topic to be removed ''' if self.exists(key): del self.topics[key] else: raise KeyError('Cannot remove (does not exist): ' + key)
[docs] def removeTopics(self, key_list): ''' remove several topics at once :param [str] key_list: list of topics (strings) to be removed ''' for key in key_list: self.remove(key)
[docs] def compare(self, other_topics_object): ''' compare topics in self.topics with the other_topics_object, return True if identical compares sorted list of topics between each object :param obj other_topics_object: instance of Topics() ''' return other_topics_object._topics_string_ == self._topics_string_
[docs] def diff(self, other_topics_object): ''' differences in list of topics between self.topics and other_topics_object Comparison assumes that self.topics is the final result. Returned result shows topics added and removed from *other_topics_object* to obtain current list. :param obj other_topics_object: instance of Topics() :returns ([],[]): first list is topics added, second list is topics removed ''' return diffLists(self.getTopicList(), other_topics_object.getTopicList())
[docs] def dotProduct(self, other): r''' normalized dot product of Proposal (*self*) and Reviewer (*other*) topic strengths, :math:`\vec{p} \cdot \vec{r}` :param obj other: instance of Topics() :returns: :math:`\sum{\vec{p} \cdot \vec{r}} / \sum{\vec{p}}` * :math:`\vec{p}` is array of topic value strengths for Proposal * :math:`\vec{r}` is array of topic value strengths for Reviewer ''' if not self.compare(other): raise KeyError('these two lists of topics are not the same, cannot dot product') if len(self.getTopicList()) == 0: return 0.0 # trivial result and avoids div-by-zero error props = [self.get(topic) for topic in self.getTopicList()] # proposals denominator = sum(props) if denominator == 0.0: return 0.0 rvwrs = [other.get(topic) for topic in self.getTopicList()] # reviewers numerator = sum([u*v for u, v in zip(props, rvwrs)]) dot_product = numerator / denominator # sum(proposal_weight * reviewer_strength) return dot_product
[docs] def importXml(self, xmlFile, read_values=True): ''' :param str filename: name of XML file with Topics :param bool read_values: import topic values? ''' root_tag = agup_data.AGUP_MASTER_ROOT_TAG xsd_file = agup_data.AGUP_XML_SCHEMA_FILE doc = xml_utility.readValidXmlDoc(xmlFile, root_tag, xsd_file) self.clearAll() self.importXmlTopics(doc.getroot(), read_values)
[docs] def importXmlTopics(self, parent_node, read_values=True): ''' make this common code segment reuseable :param obj parent_node: XML parent node :param bool read_values: import topic values? ''' node = parent_node.find('Topics') if node is not None: for subnode in node.findall('Topic'): topic = subnode.attrib['name'] value = DEFAULT_TOPIC_VALUE if read_values: value = subnode.attrib['value'] self.add(topic, value)
[docs] def writeXml(self, specified_node, write_values=True): ''' write Topics' data to a specified node in the XML document :param obj specified_node: XML node to contain this data :param bool read_values: write topic values? ''' from lxml import etree node = etree.SubElement(specified_node, 'Topics') if self.topics is not None: for topic in self.topics: subnode = etree.SubElement(node, 'Topic') subnode.attrib['name'] = topic if write_values: subnode.attrib['value'] = str(self.get(topic))
[docs]def checkTopicValueRange(value): ''' topic values must be 0..1 inclusive: standardize this check :param float value: topic value to be checked ''' if not 0 <= float(value) <= 1.0: msg = 'value must be between 0 and 1: given=' + str(value) raise ValueError(msg)
[docs]def diffLists(new_list, old_list): ''' differences between two lists, return tuple([items added], [items removed]) assumes each list had only unique entries, no redundancies :param [str] new_list: new list of strings to be compared :param [str] old_list: old list of strings to be compared ''' added_items = [str(_) for _ in new_list if _ not in old_list] removed_items = [str(_) for _ in old_list if _ not in new_list] return added_items, removed_items
[docs]def sortListUnique(the_list): ''' sort list and eliminate redundant items * make a dictionary with each list item * redundancies will be overwritten :param [str] the_list: list of strings to be sorted ''' the_dict = {_:None for _ in the_list} return sorted( the_dict.keys() )
[docs]def synchronizeTopics(a_list, b_list): ''' make the topic names in each list be the same * assumes each topics list had only unique entries, no redundancies * modifies objects in place :param obj a_list: instance of Topics() :param obj b_list: instance of Topics() ''' if not a_list.compare(b_list): added, removed = a_list.diff(b_list) b_list.addTopics(added) # topics not in b_list a_list.addTopics(removed) # topics not in a_list