-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathpreprocess.py
107 lines (83 loc) · 3.84 KB
/
preprocess.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import csv
import sys
import dataset
import secrets
from utils import *
from config import DB as DB_LOC
def import_data(filename: str):
sanitize = lambda name: str(name).replace(' ', '').replace(',', '').replace('.', '').replace('-', '').lower().rstrip()
# Connecting to the DB
db = dataset.connect(DB_LOC, engine_kwargs={'pool_recycle': 3600})
courses = db['courses']
teachers = db['teachers']
with open(filename) as f:
reader = csv.DictReader(f)
for count, each_row in enumerate(reader):
c = courses.find_one(course=each_row['course'], sec=each_row['sec'], student_email=each_row['student_email'])
if c:
each_row['id'] = c['id']
if c and c['meeting_id'] != 0:
log.info("Preserved Meeting id " + str(c['meeting_id']))
each_row['meeting_id'] = c['meeting_id']
else:
each_row['meeting_id'] = 0
# log.info(each_row)
each_row['teacher_name'] = each_row['first_name'].rstrip().title() + ' ' + each_row['last_name'].rstrip().title()
each_row['first_name'] = sanitize(each_row['first_name'])
each_row['last_name'] = sanitize(each_row['last_name'])
teacher = teachers.find_one(name=each_row['teacher_name'])
block = each_row['block']
if block == "":
continue
if teacher is None:
if (len(block) > 1):
teacher = {"name":each_row['teacher_name'],
'first_name': each_row['first_name'],
'last_name': each_row['last_name'],
'email': each_row['teacher_email'],
'office_id':0}
block = block.replace("Fri", "fri")
for b in "ABCDEFG":
if b in block:
teacher[b] = each_row['course'] + " " + each_row['sec']
teacher[b + "_id"] = 0
else:
teacher = {"name":each_row['teacher_name'],
'first_name': each_row['first_name'],
'last_name': each_row['last_name'],
'email': each_row['teacher_email'],
'office_id':0,
str(block):each_row['course'] + " " + each_row['sec'], str(block) + "_id":0}
teachers.upsert(teacher, ["id"])
else:
if not teacher.get("email"):
teacher['email'] = each_row["teacher_email"]
if (len(block) > 1):
block = block.replace("Fri", "fri")
for b in "ABCDEFG":
if b in block:
teacher[b] = each_row['course'] + " " + each_row['sec']
if not teacher.get(b + "_id"):
teacher[b + "_id"] = 0
else:
log.info("Teacher id " + str(teacher.get(b + "_id")) + " preserved")
else:
teacher[block] = each_row['course'] + " " + each_row['sec']
if not teacher.get(block + "_id"):
teacher[block + "_id"] = 0
else:
log.info("Teacher id " + str(teacher.get(block + "_id")) + " preserved")
teachers.upsert(teacher, ["id"])
courses.upsert(dict(each_row), ["id"]) #upserting info
def upsert_db(db, email):
pass
if __name__ == "__main__":
"""
Usage:
python3 preprocess.py data/courses.csv
Final argument optional
"""
if len(sys.argv) == 2:
import_data(sys.argv[1])
else:
import_data("data/courses.csv")