#!/usr/bin/env python
from __future__ import print_function, unicode_literals
import argparse
import csv
import os
import re
import sys
if __name__ == '__main__':
CUR_FILE = os.path.abspath(__file__)
DJANGO_DIR = os.path.abspath(os.path.join(os.path.dirname(CUR_FILE), '..', '..'))
sys.path.append(DJANGO_DIR)
os.environ['DJANGO_SETTINGS_MODULE'] = 'squaresdb.settings'
#pylint:disable=wrong-import-position
import django
django.setup()
from django.core import management
from django.db import transaction
from django.contrib.auth import get_user_model
import reversion as revisions
import squaresdb.membership.models
# Format:
# %A Name
# %D Class (from docs: date this item updated)
# %N Email
# %K Frequency
# %T Town
# %P Zip code
# %L Street address
# %S State
# %H Phone number (home?)
# %O MIT affiliation (alum/staff/student) (from docs: other comments)
# %W Phone number (work?)
# %E comments (pronunciation, maiden name, parents phone number, etc.) -- docs mention employer
# %J Reason for removing from signin?
# %Q name for mailings (eg, "John and Jane Doe")
# %U reason no longer receives mailing (commonly, combined with spouse)
# %M Campus mailing address (in a few cases, indicated doesn't want to give
# address and %U is set too)
# %I prepaid subscription ("subscription" or number of weeks)
# %C Country (or "Cambridge"...)
# %V Additional address line (mostly FSILG, but some others)
# %B PO Box (1 case)
FIELDS = (
('#', 'row'),
('A', 'name'),
('d', 'class'),
('D', 'last_update_info'),
('N', 'email'),
('O', 'mitaffil'),
('E', 'comments'),
('J', 'no_signin_reason'),
('U', 'no_mail_reason'),
# address info
('Q', 'mail_name'),
('M', 'campus_addr'),
('B', 'pobox'),
('L', 'street'),
('V', 'addr2'),
('T', 'town'),
('P', 'zip'),
('S', 'state'),
('C', 'country'),
# ignored fields
('K', 'frequency'),
('I', 'subscription'),
('H', 'home'),
('W', 'work'),
)
CODES = dict(FIELDS)
LABELS = [l for k, l in FIELDS]
CLASS_RE = re.compile(r"(?P<class>(spring|fall) \d+ (?P<pe>PE |))class[ ,.;]*(?P<update>.*)", re.I)
[docs]
def initial_dict(entry):
return {'row':entry}
def _parse_line(line, names, data):
code = line[1]
label = CODES[code]
_code_str, _space, rest = line.partition(' ')
if code == 'A':
names.append(rest)
elif code == 'E':
if label in data:
data[label] += '\n' + rest
else:
data[label] = rest
elif code == 'D':
match = CLASS_RE.match(rest)
if match:
data['class'] = match.group('class')
data[label] = match.group('update')
else:
data[label] = rest
else:
assert label not in data, "Non-unique line: data=%s, line='%s'" % (data, line)
data[label] = rest
[docs]
def parse_to_dicts(db_fp):
entries = []
is_first = True
names = []
entry = 1
for line in db_fp:
line = line.strip()
if not line or is_first:
if names:
for name in names:
data = data.copy()
data['name'] = name
entries.append(data)
entry += 1
data = initial_dict(entry)
names = []
is_first = False
elif line.startswith(r'.\"'):
pass # comment line
else:
assert len(line) >= 2 and line[0] == '%', "unexpected line: %s" % (line, )
_parse_line(line, names, data)
return entries
[docs]
def dump_dicts(csv_fp, entries):
writer = csv.DictWriter(csv_fp, LABELS)
writer.writeheader()
writer.writerows(entries)
[docs]
def parse_person_type(affil):
if 'alum' in affil:
return 'alum', 'full'
elif affil in ('MIT undergrad', 'MIT student'):
# fix "MIT Student" entries before final import
return 'undergrad', 'mit-student'
elif affil == 'MIT grad student':
return 'grad', 'mit-student'
elif affil == 'student':
return 'none', 'student'
elif affil == 'staff':
return 'staff', 'full'
else:
assert affil == ''
return 'none', 'full'
[docs]
def load_row(row, system_people):
person = squaresdb.membership.models.Person()
person.name = row['name']
person.email = row['email']
person.level_id = '?'
if row['class']:
person.status_id = 'grad'
tsclass, _created = squaresdb.membership.models.TSClass.objects.get_or_create(
label=row['class'], defaults=dict(coordinator=system_people['cc'])
)
else:
person.status_id = 'member'
tsclass = None
person.mit_affil_id, person.fee_cat_id = parse_person_type(row['mitaffil'])
person.frequency_id = row['frequency'] or "never"
if person.frequency_id.startswith('rarely '):
person.frequency_id = 'rarely'
person.save()
if tsclass:
TSClassMember = squaresdb.membership.models.TSClassMember
TSClassMember.objects.create(student=person, clas=tsclass, pe=False)
comments = []
if row['comments']:
comments.append("Comments: "+row['comments'])
if row['last_update_info']:
comments.append("Last update: "+row['last_update_info'])
if row['no_signin_reason']:
comments.append("No signin reason: "+row['no_signin_reason'])
if row['no_mail_reason']:
comments.append("No mail reason: "+row['no_mail_reason'])
if comments:
author = get_user_model().objects.get(username='importer@SYSTEM')
comment = squaresdb.membership.models.PersonComment(author=author, person=person)
comment.body = '\n\n'.join(comments)
comment.save()
[docs]
@transaction.atomic
@revisions.create_revision()
def load_csv(csv_fp):
revisions.set_comment("Loading people from CSV file")
importer = get_user_model().objects.get(username='importer@SYSTEM')
revisions.set_user(importer)
reader = csv.DictReader(csv_fp)
Person = squaresdb.membership.models.Person
system_people = dict(
cc=Person.objects.get(email='squaresdb-placeholder-cc@mit.edu'),
)
for row in reader:
load_row(row, system_people)
[docs]
def parse_args():
parser = argparse.ArgumentParser(description='parse legacy Tech Squares DB')
parser.add_argument('mode', type=str, choices=('legacy2csv', 'csv2django'))
parser.add_argument('--csv', type=str, required=True)
parser.add_argument('--no-initial-revisions', action='store_false', dest='initial_revs')
args = parser.parse_args()
return args
[docs]
def main():
args = parse_args()
if args.mode == 'legacy2csv':
parsed_db = parse_to_dicts(sys.stdin)
with open(args.csv, 'w', encoding='utf-8') as csv_fp:
dump_dicts(csv_fp, parsed_db)
else:
assert args.mode == 'csv2django'
if args.initial_revs:
print("Creating initial revisions...")
management.call_command('createinitialrevisions', 'membership',
comment='Initial revision (pre-import)')
print("Created initial revisions.")
with open(args.csv, 'r', encoding='utf-8') as csv_fp:
print("Importing CSV file...")
load_csv(csv_fp)
if __name__ == '__main__':
main()