Skip to article frontmatterSkip to article content

Design Alterations

After defining a table, modifications to its table definition in the DataJoint table class will have no effect. The definition string is only used at table definition time and is ignored if the corresponding table already exists.

Changes in schema design are often accomplished by schema migration, i.e. defining new tables and migrating the data into them and then dropping the old tables.


import datajoint as dj
schema = dj.schema('university')
schema.spawn_missing_classes()
dj.Diagram(schema)
Loading...

Let’s define table Exam to contain the results of midterm and final exams.

@schema
class Exam(dj.Manual):
    definition = """
    -> Enroll 
    ---
    score  : decimal(5,2)   # percent of total 
    """
Exam()
Loading...
dj.Diagram(schema)
Loading...

Let’s change the definition to record the exam date too

@schema
class Exam(dj.Manual):
    definition = """
    -> Enroll
    ---
    exam_date : date
    score  : decimal(5,2)   # percent of total 
    """

Even though we updated the definition in the class, the change is not reflected on the server:

Exam()
Loading...

You can examine the definition on the server using the describe method:

print(Exam.describe())
-> Enroll
---
score                : decimal(5,2)                 # percent of total

Exam.heading
# student_id : int unsigned # university-wide ID number dept : varchar(6) # abbreviated department name, e.g. BIOL course : int unsigned # course number, e.g. 1010 term_year : year # term : enum('Spring','Summer','Fall') # section : char(1) # --- score : decimal(5,2) # percent of total

One solution is to simply drop the table and declare it again, which is fine when it contains no valuable data. But let’s consider the case when the table is already populated and we wish to keep the existing data.

First, let’s insert some exam entries:

# pick 100 random enrollments from the current term
import random
keys = random.sample(((Enroll - Exam) & CurrentTerm).fetch('KEY'), 100)
# assign random scores
for key in keys:
    Exam.insert1(dict(key, score=random.randint(0,10000)/100))

Now we can alter the Exam table with the new definition:

Exam()
Loading...

We can now use the alter method to apply the new definition:

Exam.alter()
---------------------------------------------------------------------------
OperationalError                          Traceback (most recent call last)
Cell In[18], line 1
----> 1 Exam.alter()

File /opt/conda/lib/python3.13/site-packages/datajoint/table.py:150, in Table.alter(self, prompt, context)
    148     for store in external_stores:
    149         self.connection.schemas[self.database].external[store]
--> 150     self.connection.query(sql)
    151 except AccessError:
    152     # skip if no create privilege
    153     pass

File /opt/conda/lib/python3.13/site-packages/datajoint/connection.py:319, in Connection.query(self, query, args, as_dict, suppress_warnings, reconnect)
    317 cursor = self._conn.cursor(cursor=cursor_class)
    318 try:
--> 319     self._execute_query(cursor, query, args, suppress_warnings)
    320 except errors.LostConnectionError:
    321     if not reconnect:

File /opt/conda/lib/python3.13/site-packages/datajoint/connection.py:275, in Connection._execute_query(cursor, query, args, suppress_warnings)
    273         cursor.execute(query, args)
    274 except client.err.Error as err:
--> 275     raise translate_query_error(err, query)

File /opt/conda/lib/python3.13/site-packages/datajoint/connection.py:273, in Connection._execute_query(cursor, query, args, suppress_warnings)
    270         if suppress_warnings:
    271             # suppress all warnings arising from underlying SQL library
    272             warnings.simplefilter("ignore")
--> 273         cursor.execute(query, args)
    274 except client.err.Error as err:
    275     raise translate_query_error(err, query)

File /opt/conda/lib/python3.13/site-packages/pymysql/cursors.py:153, in Cursor.execute(self, query, args)
    149     pass
    151 query = self.mogrify(query, args)
--> 153 result = self._query(query)
    154 self._executed = query
    155 return result

File /opt/conda/lib/python3.13/site-packages/pymysql/cursors.py:322, in Cursor._query(self, q)
    320 conn = self._get_db()
    321 self._clear_result()
--> 322 conn.query(q)
    323 self._do_get_result()
    324 return self.rowcount

File /opt/conda/lib/python3.13/site-packages/pymysql/connections.py:575, in Connection.query(self, sql, unbuffered)
    573     sql = sql.encode(self.encoding, "surrogateescape")
    574 self._execute_command(COMMAND.COM_QUERY, sql)
--> 575 self._affected_rows = self._read_query_result(unbuffered=unbuffered)
    576 return self._affected_rows

File /opt/conda/lib/python3.13/site-packages/pymysql/connections.py:826, in Connection._read_query_result(self, unbuffered)
    824     result.init_unbuffered_query()
    825 else:
--> 826     result.read()
    827 self._result = result
    828 if result.server_status is not None:

File /opt/conda/lib/python3.13/site-packages/pymysql/connections.py:1203, in MySQLResult.read(self)
   1201 def read(self):
   1202     try:
-> 1203         first_packet = self.connection._read_packet()
   1205         if first_packet.is_ok_packet():
   1206             self._read_ok_packet(first_packet)

File /opt/conda/lib/python3.13/site-packages/pymysql/connections.py:782, in Connection._read_packet(self, packet_type)
    780     if self._result is not None and self._result.unbuffered_active is True:
    781         self._result.unbuffered_active = False
--> 782     packet.raise_for_error()
    783 return packet

File /opt/conda/lib/python3.13/site-packages/pymysql/protocol.py:219, in MysqlPacket.raise_for_error(self)
    217 if DEBUG:
    218     print("errno =", errno)
--> 219 err.raise_mysql_exception(self._data)

File /opt/conda/lib/python3.13/site-packages/pymysql/err.py:150, in raise_mysql_exception(data)
    148 if errorclass is None:
    149     errorclass = InternalError if errno < 1000 else OperationalError
--> 150 raise errorclass(errno, errval)

OperationalError: (1292, "Incorrect date value: '0000-00-00' for column 'exam_date' at row 1")

Oh! New attributes cannot be added to tables with existing data without providing a default value. Let’s update the definition to allow exam_date to be empty (default to null) and alter the table again.

@schema
class Exam(dj.Manual):
    definition = """
    -> Enroll 
    ---
    exam_date = null: date  
    score  : decimal(5,2)   # percent of total 
    """
Exam.alter()
Exam()

Now let’s add some grades for today’s exam:

from datetime import datetime
today = datetime.now().date().isoformat()
today
# pick 20 random enrollments from the current term
keys = random.sample(((Enroll - Exam) & CurrentTerm).fetch('KEY'), 20)
# assign random scores
for key in keys:
    Exam.insert1(dict(key, score=random.randint(0,10000)/100, exam_date=today))
Exam()

Now let’s say we want to rename score into exam_score:

@schema
class Exam(dj.Manual):
    definition = """
    -> Enroll 
    ---
    exam_date = null: date  
    exam_score  : decimal(5,2)   # percent of total 
    """
# Say NO!
Exam.alter()

Note that rather than renaming, alter attempted to drop the old attribute and add the new one.

To rename, we must indicate the old attribute name in curly brackets as the first thing in the attribute comment:

@schema
class Exam(dj.Manual):
    definition = """
    -> Enroll 
    ---
    exam_score  : decimal(5,2)   # percent of total 
    exam_date = null: date  
    """
Exam.alter()
Exam()

The old attribute name is preserved in the comment just in case, but you can remove it with another alter command:

Exam.describe();
@schema
class Exam(dj.Manual):
    definition = """
    -> Enroll 
    ---
    exam_date = null: date  
    exam_score  : decimal(6,3)   # percent of total 
    photocopy: longb  
    """
Exam.alter()
Exam()
Exam.describe();

We can change the data type of an existing attribute as long as the existing data can fit. However, some precision may be lost.

For example, let’s attempt to change the exam_score attribute into a tinyint unsigned. This will result in the rounding of the scores:

@schema
class Exam(dj.Manual):
    definition = """
    -> Enroll 
    ---
    exam_date = null: date  
    exam_score  : tinyint unsigned  # percent of total 
    """
Exam.alter()
Exam()

But if we attmept to change it to decimal(3,2), for example, we may get an error because some values are too large to fit in this data type.

@schema
class Exam(dj.Manual):
    definition = """
    -> Enroll 
    ---
    exam_date = null: date  
    exam_score  : decimal(3,2)   # percent of total 
    """
Exam.alter()
@schema
class Exam(dj.Manual):
    definition = """
    -> Enroll 
    ---
    exam_date = null: date  
    exam_score  : decimal(5,2)   # percent of total 
    """
Exam.alter()
# restored to higher precision but fractional part has been lost:
Exam()

Finally, let’s add a table comment and reorder the attributes:

@schema
class Exam(dj.Manual):
    definition = """
    # Exam taken by a student enrolled in a course section and the grade
    -> Enroll 
    ---
    exam_score  : decimal(5,2)   # percent of total 
    """
Exam.alter()
Exam()
#clean up
Exam.drop()