gramps/src/plugins/tool/PatchNames.py
Benny Malengier 6d596ad987 4198: Person view does not remove a row correctly when two people are merged.
This is a major patch by Michael Nauta. It means all writes happen immediately to bsddb, and the bsddb 
rollback is used on a crash. Transaction is no longer used to store changes and do them on commit.
Undo database is set on end. 
At the same time with statement is used throughout for transactions
At the same time, the original bug in merge code should be fixed
Still some issues, that will be ironed out


svn: r16523
2011-01-31 21:54:58 +00:00

556 lines
23 KiB
Python

#
# Gramps - a GTK+/GNOME based genealogy program
#
# Copyright (C) 2000-2006 Donald N. Allingham
# Copyright (C) 2008 Brian G. Matherly
# Copyright (C) 2010 Jakim Friant
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
# $Id$
"""Tools/Database Processing/Extract Information from Names"""
#-------------------------------------------------------------------------
#
# python modules
#
#-------------------------------------------------------------------------
import re
#-------------------------------------------------------------------------
#
# gnome/gtk
#
#-------------------------------------------------------------------------
import gtk
import gobject
#-------------------------------------------------------------------------
#
# gramps modules
#
#-------------------------------------------------------------------------
import const
from gui.utils import ProgressMeter
from gui.plug import tool
from QuestionDialog import OkDialog
import ManagedWindow
import GrampsDisplay
import gen.lib
from gen.ggettext import sgettext as _
from glade import Glade
#-------------------------------------------------------------------------
#
# Constants
#
#-------------------------------------------------------------------------
WIKI_HELP_PAGE = '%s_-_Tools' % const.URL_MANUAL_PAGE
WIKI_HELP_SEC = _('manual|Extract_Information_from_Names')
#-------------------------------------------------------------------------
#
# constants
#
#-------------------------------------------------------------------------
# List of possible surname prefixes. Notice that you must run the tool
# multiple times for prefixes such as "van der".
PREFIX_LIST = [
"de", "van", "von", "di", "le", "du", "dela", "della",
"des", "vande", "ten", "da", "af", "den", "das", "dello",
"del", "en", "ein", "el" "et", "les", "lo", "los", "un",
"um", "una", "uno", "der", "ter", "te", "die",
]
CONNECTOR_LIST = ['e', 'y', ]
CONNECTOR_LIST_NONSPLIT = ['de', 'van']
_title_re = re.compile(r"^ ([A-Za-z][A-Za-z]+\.) \s+ (.+) $", re.VERBOSE)
_nick_re = re.compile(r"(.+) \s* [(\"] (.+) [)\"]", re.VERBOSE)
#-------------------------------------------------------------------------
#
# Search each name in the database, and compare the firstname against the
# form of "Name (Nickname)". If it matches, change the first name entry
# to "Name" and add "Nickname" into the nickname field. Also, search for
# surname prefixes. If found, change the name entry and put the prefix in
# the name prefix field.
#
#-------------------------------------------------------------------------
class PatchNames(tool.BatchTool, ManagedWindow.ManagedWindow):
titleid = 1
nickid = 2
pref1id = 3
compid = 4
def __init__(self, dbstate, uistate, options_class, name, callback=None):
self.label = _('Name and title extraction tool')
ManagedWindow.ManagedWindow.__init__(self, uistate, [], self.__class__)
self.set_window(gtk.Window(), gtk.Label(), '')
tool.BatchTool.__init__(self, dbstate, options_class, name)
if self.fail:
return
winprefix = gtk.Dialog("Default prefix and connector settings",
self.uistate.window,
gtk.DIALOG_MODAL|gtk.DIALOG_DESTROY_WITH_PARENT,
(gtk.STOCK_OK, gtk.RESPONSE_ACCEPT))
winprefix.set_has_separator(False)
winprefix.vbox.set_spacing(5)
hboxpref = gtk.HBox()
hboxpref.pack_start(gtk.Label(_('Prefixes to search for:')),
expand=False, padding=5)
self.prefixbox = gtk.Entry()
self.prefixbox.set_text(', '.join(PREFIX_LIST))
hboxpref.pack_start(self.prefixbox)
winprefix.vbox.pack_start(hboxpref)
hboxcon = gtk.HBox()
hboxcon.pack_start(gtk.Label(_('Connectors splitting surnames:')),
expand=False, padding=5)
self.conbox = gtk.Entry()
self.conbox.set_text(', '.join(CONNECTOR_LIST))
hboxcon.pack_start(self.conbox)
winprefix.vbox.pack_start(hboxcon)
hboxconns = gtk.HBox()
hboxconns.pack_start(gtk.Label(_('Connectors not splitting surnames:')),
expand=False, padding=5)
self.connsbox = gtk.Entry()
self.connsbox.set_text(', '.join(CONNECTOR_LIST_NONSPLIT))
hboxconns.pack_start(self.connsbox)
winprefix.vbox.pack_start(hboxconns)
winprefix.show_all()
winprefix.resize(700, 100)
response = winprefix.run()
self.prefix_list = self.prefixbox.get_text().split(',')
self.prefix_list = map(strip, self.prefix_list)
self.prefixbox = None
self.connector_list = self.conbox.get_text().split(',')
self.connector_list = map(strip, self.connector_list)
self.conbox = None
self.connector_list_nonsplit = self.connsbox.get_text().split(',')
self.connector_list_nonsplit = map(strip, self.connector_list_nonsplit)
self.connsbox = None
# Find a prefix in the first_name
self._fn_prefix_re = re.compile("(\S+)\s+(%s)\s*$" % '|'.join(self.prefix_list),
re.IGNORECASE)
# Find a prefix in the surname
self._sn_prefix_re = re.compile("^\s*(%s)\s+(.+)" % '|'.join(self.prefix_list),
re.IGNORECASE)
# Find a connector in the surname
self._sn_con_re = re.compile("^\s*(.+)\s+(%s)\s+(.+)" % '|'.join(self.connector_list),
re.IGNORECASE)
winprefix.destroy()
self.cb = callback
self.handle_to_action = {}
self.progress = ProgressMeter(
_('Extracting Information from Names'), '')
self.progress.set_pass(_('Analyzing names'),
self.db.get_number_of_people())
for person in self.db.iter_people():
key = person.handle
name = person.get_primary_name()
first = name.get_first_name()
sname = name.get_surname()
old_prefix = []
old_surn = []
old_con = []
old_prim = []
old_orig = []
for surn in name.get_surname_list():
old_prefix.append(surn.get_prefix())
old_surn.append(surn.get_surname())
old_con.append(surn.get_connector())
old_prim.append(surn.get_primary())
old_orig.append(surn.get_origintype())
if name.get_title():
old_title = [name.get_title()]
else:
old_title = []
new_title = []
match = _title_re.match(first)
while match:
groups = match.groups()
first = groups[1]
new_title.append(groups[0])
match = _title_re.match(first)
matchnick = _nick_re.match(first)
if new_title:
titleval = (" ".join(old_title+new_title), first)
if key in self.handle_to_action:
self.handle_to_action[key][self.titleid] = titleval
else:
self.handle_to_action[key] = {self.titleid: titleval}
elif matchnick:
# we check for nick, which changes given name like title
groups = matchnick.groups()
nickval = (groups[0], groups[1])
if key in self.handle_to_action:
self.handle_to_action[key][self.nickid] = nickval
else:
self.handle_to_action[key] = {self.nickid: nickval}
else:
# Try to find the name prefix in the given name, also this
# changes given name
match = self._fn_prefix_re.match(first)
if match:
groups = match.groups()
if old_prefix[0]:
# Put the found prefix before the old prefix
new_prefix = " ".join([groups[1], old_prefix[0]])
else:
new_prefix = groups[1]
pref1val = (groups[0], new_prefix, groups[1])
if key in self.handle_to_action:
self.handle_to_action[key][self.pref1id] = pref1val
else:
self.handle_to_action[key] = {self.pref1id: pref1val}
#check for Gedcom import of compound surnames
if len(old_surn) == 1 and old_con[0] == '':
prefixes = old_prefix[0].split(',')
surnames = old_surn[0].split(',')
if len(prefixes) > 1 and len(prefixes) == len(surnames):
#assume a list of prefix and a list of surnames
prefixes = map(strip, prefixes)
surnames = map(strip, surnames)
primaries = [False] * len(prefixes)
primaries[0] = True
origs = []
for ind in range(len(prefixes)):
origs.append(gen.lib.NameOriginType())
origs[0] = old_orig[0]
compoundval = (surnames, prefixes, ['']*len(prefixes),
primaries, origs)
if key in self.handle_to_action:
self.handle_to_action[key][self.compid] = compoundval
else:
self.handle_to_action[key] = {self.compid: compoundval}
#we cannot check compound surnames, so continue the loop
continue
# Next, try to split surname in compounds: prefix surname connector
found = False
new_prefix_list = []
new_surname_list = []
new_connector_list = []
new_prim_list = []
new_orig_list = []
ind = 0
cont = True
for pref, surn, con, prim, orig in zip(old_prefix, old_surn,
old_con, old_prim, old_orig):
surnval = surn.split()
if surnval == []:
new_prefix_list.append(pref)
new_surname_list.append('')
new_connector_list.append(con)
new_prim_list.append(prim)
new_orig_list.append(orig)
cont = False
continue
val = surnval.pop(0)
while cont:
new_prefix_list.append(pref)
new_surname_list.append('')
new_connector_list.append(con)
new_prim_list.append(prim)
new_orig_list.append(orig)
while cont and (val.lower() in self.prefix_list):
found = True
if new_prefix_list[-1]:
new_prefix_list[-1] += ' ' + val
else:
new_prefix_list[-1] = val
try:
val = surnval.pop(0)
except IndexError:
val = ''
cont = False
#after prefix we have a surname
if cont:
new_surname_list[-1] = val
try:
val = surnval.pop(0)
except IndexError:
val = ''
cont = False
#if value after surname indicates continue, then continue
while cont and (val.lower() in self.connector_list_nonsplit):
#add this val to the current surname
new_surname_list[-1] += ' ' + val
try:
val = surnval.pop(0)
except IndexError:
val = ''
cont = False
# if previous is non-splitting connector, then add new val to
# current surname
if cont and (new_surname_list[-1].split()[-1].lower()
in self.connector_list_nonsplit):
new_surname_list[-1] += ' ' + val
try:
val = surnval.pop(0)
except IndexError:
val = ''
cont = False
#if next is a connector, add it to the surname
if cont and val.lower() in self.connector_list:
found = True
if new_connector_list[-1]:
new_connector_list[-1] = ' ' + val
else:
new_connector_list[-1] = val
try:
val = surnval.pop(0)
except IndexError:
val = ''
cont = False
#initialize for a next surname in case there are still
#val
if cont:
found = True # we split surname
pref=''
con = ''
prim = False
orig = gen.lib.NameOriginType()
ind += 1
if found:
compoundval = (new_surname_list, new_prefix_list,
new_connector_list, new_prim_list, new_orig_list)
if key in self.handle_to_action:
self.handle_to_action[key][self.compid] = compoundval
else:
self.handle_to_action[key] = {self.compid: compoundval}
self.progress.step()
if self.handle_to_action:
self.display()
else:
self.progress.close()
self.close()
OkDialog(_('No modifications made'),
_("No titles, nicknames or prefixes were found"))
def build_menu_names(self, obj):
return (self.label, None)
def toggled(self, cell, path_string):
path = tuple(map(int, path_string.split(':')))
row = self.model[path]
row[0] = not row[0]
self.model.row_changed(path, row.iter)
def display(self):
self.top = Glade()
window = self.top.toplevel
self.top.connect_signals({
"destroy_passed_object" : self.close,
"on_ok_clicked" : self.on_ok_clicked,
"on_help_clicked" : self.on_help_clicked,
"on_delete_event" : self.close,
})
self.list = self.top.get_object("list")
self.set_window(window, self.top.get_object('title'), self.label)
self.model = gtk.ListStore(gobject.TYPE_BOOLEAN, gobject.TYPE_STRING,
gobject.TYPE_STRING, gobject.TYPE_STRING,
gobject.TYPE_STRING)
r = gtk.CellRendererToggle()
r.connect('toggled', self.toggled)
c = gtk.TreeViewColumn(_('Select'), r, active=0)
self.list.append_column(c)
c = gtk.TreeViewColumn(_('ID'), gtk.CellRendererText(), text=1)
self.list.append_column(c)
c = gtk.TreeViewColumn(_('Type'), gtk.CellRendererText(), text=2)
self.list.append_column(c)
c = gtk.TreeViewColumn(_('Value'), gtk.CellRendererText(), text=3)
self.list.append_column(c)
c = gtk.TreeViewColumn(_('Current Name'), gtk.CellRendererText(), text=4)
self.list.append_column(c)
self.list.set_model(self.model)
self.nick_hash = {}
self.title_hash = {}
self.prefix1_hash = {}
self.compound_hash = {}
self.progress.set_pass(_('Building display'),
len(self.handle_to_action.keys()))
for key, data in self.handle_to_action.items():
p = self.db.get_person_from_handle(key)
gid = p.get_gramps_id()
if self.nickid in data:
given, nick = data[self.nickid]
handle = self.model.append()
self.model.set_value(handle, 0, 1)
self.model.set_value(handle, 1, gid)
self.model.set_value(handle, 2, _('Nickname'))
self.model.set_value(handle, 3, nick)
self.model.set_value(handle, 4, p.get_primary_name().get_name())
self.nick_hash[key] = handle
if self.titleid in data:
title, given = data[self.titleid]
handle = self.model.append()
self.model.set_value(handle, 0, 1)
self.model.set_value(handle, 1, gid)
self.model.set_value(handle, 2, _('Person|Title'))
self.model.set_value(handle, 3, title)
self.model.set_value(handle, 4, p.get_primary_name().get_name())
self.title_hash[key] = handle
if self.pref1id in data:
given, prefixtotal, new_prefix = data[self.pref1id]
handle = self.model.append()
self.model.set_value(handle, 0, 1)
self.model.set_value(handle, 1, gid)
self.model.set_value(handle, 2, _('Prefix in given name'))
self.model.set_value(handle, 3, prefixtotal)
self.model.set_value(handle, 4, p.get_primary_name().get_name())
self.prefix1_hash[key] = handle
if self.compid in data:
surn_list, pref_list, con_list, prims, origs = data[self.compid]
handle = self.model.append()
self.model.set_value(handle, 0, 1)
self.model.set_value(handle, 1, gid)
self.model.set_value(handle, 2, _('Compound surname'))
newval = ''
for sur, pre, con in zip(surn_list, pref_list, con_list):
if newval:
newval += '-['
else:
newval = '['
newval += pre + ',' + sur
if con:
newval += ',' + con + ']'
else:
newval += ']'
self.model.set_value(handle, 3, newval)
self.model.set_value(handle, 4, p.get_primary_name().get_name())
self.compound_hash[key] = handle
self.progress.step()
self.progress.close()
self.show()
def on_help_clicked(self, obj):
"""Display the relevant portion of GRAMPS manual"""
GrampsDisplay.help(webpage=WIKI_HELP_PAGE, section=WIKI_HELP_SEC)
def on_ok_clicked(self, obj):
with self.db.transaction_begin(_("Extract information from names"),
batch=True) as trans:
self.db.disable_signals()
for key, data in self.handle_to_action.items():
p = self.db.get_person_from_handle(key)
if self.nickid in data:
modelhandle = self.nick_hash[key]
val = self.model.get_value(modelhandle, 0)
if val:
given, nick = data[self.nickid]
name = p.get_primary_name()
name.set_first_name(given.strip())
name.set_nick_name(nick.strip())
if self.titleid in data:
modelhandle = self.title_hash[key]
val = self.model.get_value(modelhandle, 0)
if val:
title, given = data[self.titleid]
name = p.get_primary_name()
name.set_first_name(given.strip())
name.set_title(title.strip())
if self.pref1id in data:
modelhandle = self.prefix1_hash[key]
val = self.model.get_value(modelhandle, 0)
if val:
given, prefixtotal, prefix = data[self.pref1id]
name = p.get_primary_name()
name.set_first_name(given.strip())
oldpref = name.get_surname_list()[0].get_prefix().strip()
if oldpref == '' or oldpref == prefix.strip():
name.get_surname_list()[0].set_prefix(prefix)
else:
name.get_surname_list()[0].set_prefix('%s %s' % (prefix, oldpref))
if self.compid in data:
modelhandle = self.compound_hash[key]
val = self.model.get_value(modelhandle, 0)
if val:
surns, prefs, cons, prims, origs = data[self.compid]
name = p.get_primary_name()
new_surn_list = []
for surn, pref, con, prim, orig in zip(surns, prefs, cons,
prims, origs):
new_surn_list.append(gen.lib.Surname())
new_surn_list[-1].set_surname(surn.strip())
new_surn_list[-1].set_prefix(pref.strip())
new_surn_list[-1].set_connector(con.strip())
new_surn_list[-1].set_primary(prim)
new_surn_list[-1].set_origintype(orig)
name.set_surname_list(new_surn_list)
self.db.commit_person(p, trans)
self.db.enable_signals()
self.db.request_rebuild()
self.close()
self.cb()
class PatchNamesOptions(tool.ToolOptions):
"""
Defines options and provides handling interface.
"""
def __init__(self, name, person_id=None):
tool.ToolOptions.__init__(self, name, person_id)
def strip(arg):
return arg.strip()