Diagramming - The DataJoint Book

Several diagramming notations have been used for designing relational schema, as already introduced in the section on Relational Theory.

DataJoint comes with a powerful diagramming notation that becomes useful for both data modeling and for exploring an existing schema.

A DataJoint schema is depicted as a Directed Acyclic Graph (DAG), where nodes are tables and edges represent the foreign key constraints between them.

Let’s start with a simple schema called “projects” depicting employees, projects, and assignments of projects to employees, a many-to-many relationship.

import datajoint as dj

schema = dj.Schema("projects")

@schema
class Employee(dj.Manual):
    definition = """
    employee_id : int
    ---
    employee_name : varchar(60)
    """


@schema
class Project(dj.Manual):
    definition = """
    project_code  : varchar(8)
    ---
    project_title : varchar(50)
    start_date : date
    end_date : date
    """

@schema
class Assignment(dj.Manual):
    definition = """
    -> Employee
    -> Project
    ---
    percent_effort : decimal(4,1) unsigned
    """

[2024-11-01 10:50:30,702][INFO]: Connecting root@localhost:3306

---------------------------------------------------------------------------
OSError                                   Traceback (most recent call last)
File /opt/conda/lib/python3.11/site-packages/pymysql/connections.py:649, in Connection.connect(self, sock)
    648 try:
--> 649     sock = socket.create_connection(
    650         (self.host, self.port), self.connect_timeout, **kwargs
    651     )
    652     break

File /opt/conda/lib/python3.11/socket.py:863, in create_connection(address, timeout, source_address, all_errors)
    862 if not all_errors:
--> 863     raise exceptions[0]
    864 raise ExceptionGroup("create_connection failed", exceptions)

File /opt/conda/lib/python3.11/socket.py:848, in create_connection(address, timeout, source_address, all_errors)
    847     sock.bind(source_address)
--> 848 sock.connect(sa)
    849 # Break explicitly a reference cycle

OSError: [Errno 99] Cannot assign requested address

During handling of the above exception, another exception occurred:

OperationalError                          Traceback (most recent call last)
Cell In[1], line 3
      1 import datajoint as dj
----> 3 schema = dj.Schema("projects")
      6 @schema
      7 class Employee(dj.Manual):
      8     definition = """
      9     employee_id : int
     10     ---
     11     employee_name : varchar(60)
     12     """

File /opt/conda/lib/python3.11/site-packages/datajoint/schemas.py:77, in Schema.__init__(self, schema_name, context, connection, create_schema, create_tables, add_objects)
     75 self.declare_list = []
     76 if schema_name:
---> 77     self.activate(schema_name)

File /opt/conda/lib/python3.11/site-packages/datajoint/schemas.py:119, in Schema.activate(self, schema_name, connection, create_schema, create_tables, add_objects)
    117     self.connection = connection
    118 if self.connection is None:
--> 119     self.connection = conn()
    120 self.database = schema_name
    121 if create_schema is not None:

File /opt/conda/lib/python3.11/site-packages/datajoint/connection.py:134, in conn(host, user, password, init_fun, reset, use_tls)
    130     init_fun = (
    131         init_fun if init_fun is not None else config["connection.init_function"]
    132     )
    133     use_tls = use_tls if use_tls is not None else config["database.use_tls"]
--> 134     conn.connection = Connection(host, user, password, None, init_fun, use_tls)
    135 return conn.connection

File /opt/conda/lib/python3.11/site-packages/datajoint/connection.py:196, in Connection.__init__(self, host, user, password, port, init_fun, use_tls)
    194 self._conn = None
    195 self._query_cache = None
--> 196 connect_host_hook(self)
    197 if self.is_connected:
    198     logger.info("Connected {user}@{host}:{port}".format(**self.conn_info))

File /opt/conda/lib/python3.11/site-packages/datajoint/connection.py:53, in connect_host_hook(connection_obj)
     49         raise errors.DataJointError(
     50             "Connection plugin '{}' not found.".format(plugin_name)
     51         )
     52 else:
---> 53     connection_obj.connect()

File /opt/conda/lib/python3.11/site-packages/datajoint/connection.py:220, in Connection.connect(self)
    218 warnings.filterwarnings("ignore", ".*deprecated.*")
    219 try:
--> 220     self._conn = client.connect(
    221         init_command=self.init_fun,
    222         sql_mode="NO_ZERO_DATE,NO_ZERO_IN_DATE,ERROR_FOR_DIVISION_BY_ZERO,"
    223         "STRICT_ALL_TABLES,NO_ENGINE_SUBSTITUTION,ONLY_FULL_GROUP_BY",
    224         charset=config["connection.charset"],
    225         **{
    226             k: v
    227             for k, v in self.conn_info.items()
    228             if k not in ["ssl_input", "host_input"]
    229         },
    230     )
    231 except client.err.InternalError:
    232     self._conn = client.connect(
    233         init_command=self.init_fun,
    234         sql_mode="NO_ZERO_DATE,NO_ZERO_IN_DATE,ERROR_FOR_DIVISION_BY_ZERO,"
   (...)
    245         },
    246     )

File /opt/conda/lib/python3.11/site-packages/pymysql/connections.py:361, in Connection.__init__(self, user, password, host, database, unix_socket, port, charset, collation, sql_mode, read_default_file, conv, use_unicode, client_flag, cursorclass, init_command, connect_timeout, read_default_group, autocommit, local_infile, max_allowed_packet, defer_connect, auth_plugin_map, read_timeout, write_timeout, bind_address, binary_prefix, program_name, server_public_key, ssl, ssl_ca, ssl_cert, ssl_disabled, ssl_key, ssl_key_password, ssl_verify_cert, ssl_verify_identity, compress, named_pipe, passwd, db)
    359     self._sock = None
    360 else:
--> 361     self.connect()

File /opt/conda/lib/python3.11/site-packages/pymysql/connections.py:716, in Connection.connect(self, sock)
    714     if DEBUG:
    715         print(exc.traceback)
--> 716     raise exc
    718 # If e is neither DatabaseError or IOError, It's a bug.
    719 # But raising AssertionError hides original error.
    720 # So just reraise it.
    721 raise

OperationalError: (2003, "Can't connect to MySQL server on 'localhost' ([Errno 99] Cannot assign requested address)")

d = dj.Diagram(schema)

d.save('project.svg')