This is a major patch by Michael Nauta. It means all writes happen immediately to bsddb, and the bsddb rollback is used on a crash. Transaction is no longer used to store changes and do them on commit. Undo database is set on end. At the same time with statement is used throughout for transactions At the same time, the original bug in merge code should be fixed Still some issues, that will be ironed out svn: r16523
556 lines
23 KiB
Python
556 lines
23 KiB
Python
#
|
|
# Gramps - a GTK+/GNOME based genealogy program
|
|
#
|
|
# Copyright (C) 2000-2006 Donald N. Allingham
|
|
# Copyright (C) 2008 Brian G. Matherly
|
|
# Copyright (C) 2010 Jakim Friant
|
|
#
|
|
# This program is free software; you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation; either version 2 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program; if not, write to the Free Software
|
|
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
#
|
|
|
|
# $Id$
|
|
|
|
"""Tools/Database Processing/Extract Information from Names"""
|
|
|
|
#-------------------------------------------------------------------------
|
|
#
|
|
# python modules
|
|
#
|
|
#-------------------------------------------------------------------------
|
|
import re
|
|
|
|
#-------------------------------------------------------------------------
|
|
#
|
|
# gnome/gtk
|
|
#
|
|
#-------------------------------------------------------------------------
|
|
import gtk
|
|
import gobject
|
|
|
|
#-------------------------------------------------------------------------
|
|
#
|
|
# gramps modules
|
|
#
|
|
#-------------------------------------------------------------------------
|
|
import const
|
|
from gui.utils import ProgressMeter
|
|
from gui.plug import tool
|
|
from QuestionDialog import OkDialog
|
|
import ManagedWindow
|
|
import GrampsDisplay
|
|
import gen.lib
|
|
from gen.ggettext import sgettext as _
|
|
from glade import Glade
|
|
|
|
#-------------------------------------------------------------------------
|
|
#
|
|
# Constants
|
|
#
|
|
#-------------------------------------------------------------------------
|
|
WIKI_HELP_PAGE = '%s_-_Tools' % const.URL_MANUAL_PAGE
|
|
WIKI_HELP_SEC = _('manual|Extract_Information_from_Names')
|
|
|
|
#-------------------------------------------------------------------------
|
|
#
|
|
# constants
|
|
#
|
|
#-------------------------------------------------------------------------
|
|
|
|
# List of possible surname prefixes. Notice that you must run the tool
|
|
# multiple times for prefixes such as "van der".
|
|
PREFIX_LIST = [
|
|
"de", "van", "von", "di", "le", "du", "dela", "della",
|
|
"des", "vande", "ten", "da", "af", "den", "das", "dello",
|
|
"del", "en", "ein", "el" "et", "les", "lo", "los", "un",
|
|
"um", "una", "uno", "der", "ter", "te", "die",
|
|
]
|
|
|
|
CONNECTOR_LIST = ['e', 'y', ]
|
|
CONNECTOR_LIST_NONSPLIT = ['de', 'van']
|
|
|
|
_title_re = re.compile(r"^ ([A-Za-z][A-Za-z]+\.) \s+ (.+) $", re.VERBOSE)
|
|
_nick_re = re.compile(r"(.+) \s* [(\"] (.+) [)\"]", re.VERBOSE)
|
|
|
|
|
|
#-------------------------------------------------------------------------
|
|
#
|
|
# Search each name in the database, and compare the firstname against the
|
|
# form of "Name (Nickname)". If it matches, change the first name entry
|
|
# to "Name" and add "Nickname" into the nickname field. Also, search for
|
|
# surname prefixes. If found, change the name entry and put the prefix in
|
|
# the name prefix field.
|
|
#
|
|
#-------------------------------------------------------------------------
|
|
|
|
|
|
class PatchNames(tool.BatchTool, ManagedWindow.ManagedWindow):
|
|
titleid = 1
|
|
nickid = 2
|
|
pref1id = 3
|
|
compid = 4
|
|
|
|
def __init__(self, dbstate, uistate, options_class, name, callback=None):
|
|
self.label = _('Name and title extraction tool')
|
|
ManagedWindow.ManagedWindow.__init__(self, uistate, [], self.__class__)
|
|
self.set_window(gtk.Window(), gtk.Label(), '')
|
|
|
|
tool.BatchTool.__init__(self, dbstate, options_class, name)
|
|
if self.fail:
|
|
return
|
|
|
|
winprefix = gtk.Dialog("Default prefix and connector settings",
|
|
self.uistate.window,
|
|
gtk.DIALOG_MODAL|gtk.DIALOG_DESTROY_WITH_PARENT,
|
|
(gtk.STOCK_OK, gtk.RESPONSE_ACCEPT))
|
|
|
|
winprefix.set_has_separator(False)
|
|
winprefix.vbox.set_spacing(5)
|
|
hboxpref = gtk.HBox()
|
|
hboxpref.pack_start(gtk.Label(_('Prefixes to search for:')),
|
|
expand=False, padding=5)
|
|
self.prefixbox = gtk.Entry()
|
|
self.prefixbox.set_text(', '.join(PREFIX_LIST))
|
|
hboxpref.pack_start(self.prefixbox)
|
|
winprefix.vbox.pack_start(hboxpref)
|
|
hboxcon = gtk.HBox()
|
|
hboxcon.pack_start(gtk.Label(_('Connectors splitting surnames:')),
|
|
expand=False, padding=5)
|
|
self.conbox = gtk.Entry()
|
|
self.conbox.set_text(', '.join(CONNECTOR_LIST))
|
|
hboxcon.pack_start(self.conbox)
|
|
winprefix.vbox.pack_start(hboxcon)
|
|
hboxconns = gtk.HBox()
|
|
hboxconns.pack_start(gtk.Label(_('Connectors not splitting surnames:')),
|
|
expand=False, padding=5)
|
|
self.connsbox = gtk.Entry()
|
|
self.connsbox.set_text(', '.join(CONNECTOR_LIST_NONSPLIT))
|
|
hboxconns.pack_start(self.connsbox)
|
|
winprefix.vbox.pack_start(hboxconns)
|
|
winprefix.show_all()
|
|
winprefix.resize(700, 100)
|
|
|
|
response = winprefix.run()
|
|
self.prefix_list = self.prefixbox.get_text().split(',')
|
|
self.prefix_list = map(strip, self.prefix_list)
|
|
self.prefixbox = None
|
|
self.connector_list = self.conbox.get_text().split(',')
|
|
self.connector_list = map(strip, self.connector_list)
|
|
self.conbox = None
|
|
self.connector_list_nonsplit = self.connsbox.get_text().split(',')
|
|
self.connector_list_nonsplit = map(strip, self.connector_list_nonsplit)
|
|
self.connsbox = None
|
|
|
|
# Find a prefix in the first_name
|
|
self._fn_prefix_re = re.compile("(\S+)\s+(%s)\s*$" % '|'.join(self.prefix_list),
|
|
re.IGNORECASE)
|
|
|
|
# Find a prefix in the surname
|
|
self._sn_prefix_re = re.compile("^\s*(%s)\s+(.+)" % '|'.join(self.prefix_list),
|
|
re.IGNORECASE)
|
|
# Find a connector in the surname
|
|
self._sn_con_re = re.compile("^\s*(.+)\s+(%s)\s+(.+)" % '|'.join(self.connector_list),
|
|
re.IGNORECASE)
|
|
winprefix.destroy()
|
|
|
|
self.cb = callback
|
|
self.handle_to_action = {}
|
|
|
|
self.progress = ProgressMeter(
|
|
_('Extracting Information from Names'), '')
|
|
self.progress.set_pass(_('Analyzing names'),
|
|
self.db.get_number_of_people())
|
|
|
|
for person in self.db.iter_people():
|
|
key = person.handle
|
|
name = person.get_primary_name()
|
|
first = name.get_first_name()
|
|
sname = name.get_surname()
|
|
|
|
old_prefix = []
|
|
old_surn = []
|
|
old_con = []
|
|
old_prim = []
|
|
old_orig = []
|
|
for surn in name.get_surname_list():
|
|
old_prefix.append(surn.get_prefix())
|
|
old_surn.append(surn.get_surname())
|
|
old_con.append(surn.get_connector())
|
|
old_prim.append(surn.get_primary())
|
|
old_orig.append(surn.get_origintype())
|
|
|
|
if name.get_title():
|
|
old_title = [name.get_title()]
|
|
else:
|
|
old_title = []
|
|
new_title = []
|
|
|
|
match = _title_re.match(first)
|
|
while match:
|
|
groups = match.groups()
|
|
first = groups[1]
|
|
new_title.append(groups[0])
|
|
match = _title_re.match(first)
|
|
matchnick = _nick_re.match(first)
|
|
|
|
if new_title:
|
|
titleval = (" ".join(old_title+new_title), first)
|
|
if key in self.handle_to_action:
|
|
self.handle_to_action[key][self.titleid] = titleval
|
|
else:
|
|
self.handle_to_action[key] = {self.titleid: titleval}
|
|
elif matchnick:
|
|
# we check for nick, which changes given name like title
|
|
groups = matchnick.groups()
|
|
nickval = (groups[0], groups[1])
|
|
if key in self.handle_to_action:
|
|
self.handle_to_action[key][self.nickid] = nickval
|
|
else:
|
|
self.handle_to_action[key] = {self.nickid: nickval}
|
|
else:
|
|
# Try to find the name prefix in the given name, also this
|
|
# changes given name
|
|
match = self._fn_prefix_re.match(first)
|
|
if match:
|
|
groups = match.groups()
|
|
if old_prefix[0]:
|
|
# Put the found prefix before the old prefix
|
|
new_prefix = " ".join([groups[1], old_prefix[0]])
|
|
else:
|
|
new_prefix = groups[1]
|
|
pref1val = (groups[0], new_prefix, groups[1])
|
|
if key in self.handle_to_action:
|
|
self.handle_to_action[key][self.pref1id] = pref1val
|
|
else:
|
|
self.handle_to_action[key] = {self.pref1id: pref1val}
|
|
|
|
#check for Gedcom import of compound surnames
|
|
if len(old_surn) == 1 and old_con[0] == '':
|
|
prefixes = old_prefix[0].split(',')
|
|
surnames = old_surn[0].split(',')
|
|
if len(prefixes) > 1 and len(prefixes) == len(surnames):
|
|
#assume a list of prefix and a list of surnames
|
|
prefixes = map(strip, prefixes)
|
|
surnames = map(strip, surnames)
|
|
primaries = [False] * len(prefixes)
|
|
primaries[0] = True
|
|
origs = []
|
|
for ind in range(len(prefixes)):
|
|
origs.append(gen.lib.NameOriginType())
|
|
origs[0] = old_orig[0]
|
|
compoundval = (surnames, prefixes, ['']*len(prefixes),
|
|
primaries, origs)
|
|
if key in self.handle_to_action:
|
|
self.handle_to_action[key][self.compid] = compoundval
|
|
else:
|
|
self.handle_to_action[key] = {self.compid: compoundval}
|
|
#we cannot check compound surnames, so continue the loop
|
|
continue
|
|
|
|
# Next, try to split surname in compounds: prefix surname connector
|
|
found = False
|
|
new_prefix_list = []
|
|
new_surname_list = []
|
|
new_connector_list = []
|
|
new_prim_list = []
|
|
new_orig_list = []
|
|
ind = 0
|
|
cont = True
|
|
for pref, surn, con, prim, orig in zip(old_prefix, old_surn,
|
|
old_con, old_prim, old_orig):
|
|
surnval = surn.split()
|
|
if surnval == []:
|
|
new_prefix_list.append(pref)
|
|
new_surname_list.append('')
|
|
new_connector_list.append(con)
|
|
new_prim_list.append(prim)
|
|
new_orig_list.append(orig)
|
|
cont = False
|
|
continue
|
|
val = surnval.pop(0)
|
|
while cont:
|
|
new_prefix_list.append(pref)
|
|
new_surname_list.append('')
|
|
new_connector_list.append(con)
|
|
new_prim_list.append(prim)
|
|
new_orig_list.append(orig)
|
|
|
|
while cont and (val.lower() in self.prefix_list):
|
|
found = True
|
|
if new_prefix_list[-1]:
|
|
new_prefix_list[-1] += ' ' + val
|
|
else:
|
|
new_prefix_list[-1] = val
|
|
try:
|
|
val = surnval.pop(0)
|
|
except IndexError:
|
|
val = ''
|
|
cont = False
|
|
#after prefix we have a surname
|
|
if cont:
|
|
new_surname_list[-1] = val
|
|
try:
|
|
val = surnval.pop(0)
|
|
except IndexError:
|
|
val = ''
|
|
cont = False
|
|
#if value after surname indicates continue, then continue
|
|
while cont and (val.lower() in self.connector_list_nonsplit):
|
|
#add this val to the current surname
|
|
new_surname_list[-1] += ' ' + val
|
|
try:
|
|
val = surnval.pop(0)
|
|
except IndexError:
|
|
val = ''
|
|
cont = False
|
|
# if previous is non-splitting connector, then add new val to
|
|
# current surname
|
|
if cont and (new_surname_list[-1].split()[-1].lower()
|
|
in self.connector_list_nonsplit):
|
|
new_surname_list[-1] += ' ' + val
|
|
try:
|
|
val = surnval.pop(0)
|
|
except IndexError:
|
|
val = ''
|
|
cont = False
|
|
#if next is a connector, add it to the surname
|
|
if cont and val.lower() in self.connector_list:
|
|
found = True
|
|
if new_connector_list[-1]:
|
|
new_connector_list[-1] = ' ' + val
|
|
else:
|
|
new_connector_list[-1] = val
|
|
try:
|
|
val = surnval.pop(0)
|
|
except IndexError:
|
|
val = ''
|
|
cont = False
|
|
#initialize for a next surname in case there are still
|
|
#val
|
|
if cont:
|
|
found = True # we split surname
|
|
pref=''
|
|
con = ''
|
|
prim = False
|
|
orig = gen.lib.NameOriginType()
|
|
ind += 1
|
|
if found:
|
|
compoundval = (new_surname_list, new_prefix_list,
|
|
new_connector_list, new_prim_list, new_orig_list)
|
|
if key in self.handle_to_action:
|
|
self.handle_to_action[key][self.compid] = compoundval
|
|
else:
|
|
self.handle_to_action[key] = {self.compid: compoundval}
|
|
|
|
self.progress.step()
|
|
|
|
if self.handle_to_action:
|
|
self.display()
|
|
else:
|
|
self.progress.close()
|
|
self.close()
|
|
OkDialog(_('No modifications made'),
|
|
_("No titles, nicknames or prefixes were found"))
|
|
|
|
def build_menu_names(self, obj):
|
|
return (self.label, None)
|
|
|
|
def toggled(self, cell, path_string):
|
|
path = tuple(map(int, path_string.split(':')))
|
|
row = self.model[path]
|
|
row[0] = not row[0]
|
|
self.model.row_changed(path, row.iter)
|
|
|
|
def display(self):
|
|
|
|
self.top = Glade()
|
|
window = self.top.toplevel
|
|
self.top.connect_signals({
|
|
"destroy_passed_object" : self.close,
|
|
"on_ok_clicked" : self.on_ok_clicked,
|
|
"on_help_clicked" : self.on_help_clicked,
|
|
"on_delete_event" : self.close,
|
|
})
|
|
|
|
self.list = self.top.get_object("list")
|
|
self.set_window(window, self.top.get_object('title'), self.label)
|
|
|
|
self.model = gtk.ListStore(gobject.TYPE_BOOLEAN, gobject.TYPE_STRING,
|
|
gobject.TYPE_STRING, gobject.TYPE_STRING,
|
|
gobject.TYPE_STRING)
|
|
|
|
r = gtk.CellRendererToggle()
|
|
r.connect('toggled', self.toggled)
|
|
c = gtk.TreeViewColumn(_('Select'), r, active=0)
|
|
self.list.append_column(c)
|
|
|
|
c = gtk.TreeViewColumn(_('ID'), gtk.CellRendererText(), text=1)
|
|
self.list.append_column(c)
|
|
|
|
c = gtk.TreeViewColumn(_('Type'), gtk.CellRendererText(), text=2)
|
|
self.list.append_column(c)
|
|
|
|
c = gtk.TreeViewColumn(_('Value'), gtk.CellRendererText(), text=3)
|
|
self.list.append_column(c)
|
|
|
|
c = gtk.TreeViewColumn(_('Current Name'), gtk.CellRendererText(), text=4)
|
|
self.list.append_column(c)
|
|
|
|
self.list.set_model(self.model)
|
|
|
|
self.nick_hash = {}
|
|
self.title_hash = {}
|
|
self.prefix1_hash = {}
|
|
self.compound_hash = {}
|
|
|
|
self.progress.set_pass(_('Building display'),
|
|
len(self.handle_to_action.keys()))
|
|
|
|
for key, data in self.handle_to_action.items():
|
|
p = self.db.get_person_from_handle(key)
|
|
gid = p.get_gramps_id()
|
|
if self.nickid in data:
|
|
given, nick = data[self.nickid]
|
|
handle = self.model.append()
|
|
self.model.set_value(handle, 0, 1)
|
|
self.model.set_value(handle, 1, gid)
|
|
self.model.set_value(handle, 2, _('Nickname'))
|
|
self.model.set_value(handle, 3, nick)
|
|
self.model.set_value(handle, 4, p.get_primary_name().get_name())
|
|
self.nick_hash[key] = handle
|
|
|
|
if self.titleid in data:
|
|
title, given = data[self.titleid]
|
|
handle = self.model.append()
|
|
self.model.set_value(handle, 0, 1)
|
|
self.model.set_value(handle, 1, gid)
|
|
self.model.set_value(handle, 2, _('Person|Title'))
|
|
self.model.set_value(handle, 3, title)
|
|
self.model.set_value(handle, 4, p.get_primary_name().get_name())
|
|
self.title_hash[key] = handle
|
|
|
|
if self.pref1id in data:
|
|
given, prefixtotal, new_prefix = data[self.pref1id]
|
|
handle = self.model.append()
|
|
self.model.set_value(handle, 0, 1)
|
|
self.model.set_value(handle, 1, gid)
|
|
self.model.set_value(handle, 2, _('Prefix in given name'))
|
|
self.model.set_value(handle, 3, prefixtotal)
|
|
self.model.set_value(handle, 4, p.get_primary_name().get_name())
|
|
self.prefix1_hash[key] = handle
|
|
|
|
if self.compid in data:
|
|
surn_list, pref_list, con_list, prims, origs = data[self.compid]
|
|
handle = self.model.append()
|
|
self.model.set_value(handle, 0, 1)
|
|
self.model.set_value(handle, 1, gid)
|
|
self.model.set_value(handle, 2, _('Compound surname'))
|
|
newval = ''
|
|
for sur, pre, con in zip(surn_list, pref_list, con_list):
|
|
if newval:
|
|
newval += '-['
|
|
else:
|
|
newval = '['
|
|
newval += pre + ',' + sur
|
|
if con:
|
|
newval += ',' + con + ']'
|
|
else:
|
|
newval += ']'
|
|
self.model.set_value(handle, 3, newval)
|
|
self.model.set_value(handle, 4, p.get_primary_name().get_name())
|
|
self.compound_hash[key] = handle
|
|
|
|
self.progress.step()
|
|
|
|
self.progress.close()
|
|
self.show()
|
|
|
|
def on_help_clicked(self, obj):
|
|
"""Display the relevant portion of GRAMPS manual"""
|
|
GrampsDisplay.help(webpage=WIKI_HELP_PAGE, section=WIKI_HELP_SEC)
|
|
|
|
def on_ok_clicked(self, obj):
|
|
with self.db.transaction_begin(_("Extract information from names"),
|
|
batch=True) as trans:
|
|
self.db.disable_signals()
|
|
|
|
for key, data in self.handle_to_action.items():
|
|
p = self.db.get_person_from_handle(key)
|
|
if self.nickid in data:
|
|
modelhandle = self.nick_hash[key]
|
|
val = self.model.get_value(modelhandle, 0)
|
|
if val:
|
|
given, nick = data[self.nickid]
|
|
name = p.get_primary_name()
|
|
name.set_first_name(given.strip())
|
|
name.set_nick_name(nick.strip())
|
|
|
|
if self.titleid in data:
|
|
modelhandle = self.title_hash[key]
|
|
val = self.model.get_value(modelhandle, 0)
|
|
if val:
|
|
title, given = data[self.titleid]
|
|
name = p.get_primary_name()
|
|
name.set_first_name(given.strip())
|
|
name.set_title(title.strip())
|
|
|
|
if self.pref1id in data:
|
|
modelhandle = self.prefix1_hash[key]
|
|
val = self.model.get_value(modelhandle, 0)
|
|
if val:
|
|
given, prefixtotal, prefix = data[self.pref1id]
|
|
name = p.get_primary_name()
|
|
name.set_first_name(given.strip())
|
|
oldpref = name.get_surname_list()[0].get_prefix().strip()
|
|
if oldpref == '' or oldpref == prefix.strip():
|
|
name.get_surname_list()[0].set_prefix(prefix)
|
|
else:
|
|
name.get_surname_list()[0].set_prefix('%s %s' % (prefix, oldpref))
|
|
|
|
if self.compid in data:
|
|
modelhandle = self.compound_hash[key]
|
|
val = self.model.get_value(modelhandle, 0)
|
|
if val:
|
|
surns, prefs, cons, prims, origs = data[self.compid]
|
|
name = p.get_primary_name()
|
|
new_surn_list = []
|
|
for surn, pref, con, prim, orig in zip(surns, prefs, cons,
|
|
prims, origs):
|
|
new_surn_list.append(gen.lib.Surname())
|
|
new_surn_list[-1].set_surname(surn.strip())
|
|
new_surn_list[-1].set_prefix(pref.strip())
|
|
new_surn_list[-1].set_connector(con.strip())
|
|
new_surn_list[-1].set_primary(prim)
|
|
new_surn_list[-1].set_origintype(orig)
|
|
name.set_surname_list(new_surn_list)
|
|
|
|
self.db.commit_person(p, trans)
|
|
|
|
self.db.enable_signals()
|
|
self.db.request_rebuild()
|
|
self.close()
|
|
self.cb()
|
|
|
|
class PatchNamesOptions(tool.ToolOptions):
|
|
"""
|
|
Defines options and provides handling interface.
|
|
"""
|
|
|
|
def __init__(self, name, person_id=None):
|
|
tool.ToolOptions.__init__(self, name, person_id)
|
|
|
|
def strip(arg):
|
|
return arg.strip()
|
|
|