Several diagramming notations have been used for designing relational schema, as already introduced in the section on Relational Theory.
DataJoint comes with a powerful diagramming notation that becomes useful for both data modeling and for exploring an existing schema.
A DataJoint schema is depicted as a Directed Acyclic Graph (DAG), where nodes are tables and edges represent the foreign key constraints between them.
Let’s start with a simple schema called “projects” depicting employees, projects, and assignments of projects to employees, a many-to-many relationship.
import datajoint as dj
schema = dj.Schema("projects")
@schema
class Employee(dj.Manual):
definition = """
employee_id : int
---
employee_name : varchar(60)
"""
@schema
class Project(dj.Manual):
definition = """
project_code : varchar(8)
---
project_title : varchar(50)
start_date : date
end_date : date
"""
@schema
class Assignment(dj.Manual):
definition = """
-> Employee
-> Project
---
percent_effort : decimal(4,1) unsigned
"""
[2024-11-01 10:50:30,702][INFO]: Connecting root@localhost:3306
---------------------------------------------------------------------------
OSError Traceback (most recent call last)
File /opt/conda/lib/python3.11/site-packages/pymysql/connections.py:649, in Connection.connect(self, sock)
648 try:
--> 649 sock = socket.create_connection(
650 (self.host, self.port), self.connect_timeout, **kwargs
651 )
652 break
File /opt/conda/lib/python3.11/socket.py:863, in create_connection(address, timeout, source_address, all_errors)
862 if not all_errors:
--> 863 raise exceptions[0]
864 raise ExceptionGroup("create_connection failed", exceptions)
File /opt/conda/lib/python3.11/socket.py:848, in create_connection(address, timeout, source_address, all_errors)
847 sock.bind(source_address)
--> 848 sock.connect(sa)
849 # Break explicitly a reference cycle
OSError: [Errno 99] Cannot assign requested address
During handling of the above exception, another exception occurred:
OperationalError Traceback (most recent call last)
Cell In[1], line 3
1 import datajoint as dj
----> 3 schema = dj.Schema("projects")
6 @schema
7 class Employee(dj.Manual):
8 definition = """
9 employee_id : int
10 ---
11 employee_name : varchar(60)
12 """
File /opt/conda/lib/python3.11/site-packages/datajoint/schemas.py:77, in Schema.__init__(self, schema_name, context, connection, create_schema, create_tables, add_objects)
75 self.declare_list = []
76 if schema_name:
---> 77 self.activate(schema_name)
File /opt/conda/lib/python3.11/site-packages/datajoint/schemas.py:119, in Schema.activate(self, schema_name, connection, create_schema, create_tables, add_objects)
117 self.connection = connection
118 if self.connection is None:
--> 119 self.connection = conn()
120 self.database = schema_name
121 if create_schema is not None:
File /opt/conda/lib/python3.11/site-packages/datajoint/connection.py:134, in conn(host, user, password, init_fun, reset, use_tls)
130 init_fun = (
131 init_fun if init_fun is not None else config["connection.init_function"]
132 )
133 use_tls = use_tls if use_tls is not None else config["database.use_tls"]
--> 134 conn.connection = Connection(host, user, password, None, init_fun, use_tls)
135 return conn.connection
File /opt/conda/lib/python3.11/site-packages/datajoint/connection.py:196, in Connection.__init__(self, host, user, password, port, init_fun, use_tls)
194 self._conn = None
195 self._query_cache = None
--> 196 connect_host_hook(self)
197 if self.is_connected:
198 logger.info("Connected {user}@{host}:{port}".format(**self.conn_info))
File /opt/conda/lib/python3.11/site-packages/datajoint/connection.py:53, in connect_host_hook(connection_obj)
49 raise errors.DataJointError(
50 "Connection plugin '{}' not found.".format(plugin_name)
51 )
52 else:
---> 53 connection_obj.connect()
File /opt/conda/lib/python3.11/site-packages/datajoint/connection.py:220, in Connection.connect(self)
218 warnings.filterwarnings("ignore", ".*deprecated.*")
219 try:
--> 220 self._conn = client.connect(
221 init_command=self.init_fun,
222 sql_mode="NO_ZERO_DATE,NO_ZERO_IN_DATE,ERROR_FOR_DIVISION_BY_ZERO,"
223 "STRICT_ALL_TABLES,NO_ENGINE_SUBSTITUTION,ONLY_FULL_GROUP_BY",
224 charset=config["connection.charset"],
225 **{
226 k: v
227 for k, v in self.conn_info.items()
228 if k not in ["ssl_input", "host_input"]
229 },
230 )
231 except client.err.InternalError:
232 self._conn = client.connect(
233 init_command=self.init_fun,
234 sql_mode="NO_ZERO_DATE,NO_ZERO_IN_DATE,ERROR_FOR_DIVISION_BY_ZERO,"
(...)
245 },
246 )
File /opt/conda/lib/python3.11/site-packages/pymysql/connections.py:361, in Connection.__init__(self, user, password, host, database, unix_socket, port, charset, collation, sql_mode, read_default_file, conv, use_unicode, client_flag, cursorclass, init_command, connect_timeout, read_default_group, autocommit, local_infile, max_allowed_packet, defer_connect, auth_plugin_map, read_timeout, write_timeout, bind_address, binary_prefix, program_name, server_public_key, ssl, ssl_ca, ssl_cert, ssl_disabled, ssl_key, ssl_key_password, ssl_verify_cert, ssl_verify_identity, compress, named_pipe, passwd, db)
359 self._sock = None
360 else:
--> 361 self.connect()
File /opt/conda/lib/python3.11/site-packages/pymysql/connections.py:716, in Connection.connect(self, sock)
714 if DEBUG:
715 print(exc.traceback)
--> 716 raise exc
718 # If e is neither DatabaseError or IOError, It's a bug.
719 # But raising AssertionError hides original error.
720 # So just reraise it.
721 raise
OperationalError: (2003, "Can't connect to MySQL server on 'localhost' ([Errno 99] Cannot assign requested address)")
d = dj.Diagram(schema)
d.save('project.svg')