Using UUIDs - The DataJoint Book

Universally Unique Identifiers (UUIDs) provide a convenient mechanism for generating unique identifiers for entities inside an information system. The use of UUIDs is standardized by as RFC 4122.

A UUID consists of 128-bits (16 bytes) that are displayed consistently as a 36-character string comprised of hex digits hyphenated as 8-4-4-4-12.

For example e45ba2cc-39db-11e9-8e62-7470fdf23ef1 is a valid UUID.

Python provides the UUID module as part of its standard library.

import uuid

help(uuid.uuid1)

Help on function uuid1 in module uuid:

uuid1(node=None, clock_seq=None)
    Generate a UUID from a host ID, sequence number, and the current time.
    If 'node' is not given, getnode() is used to obtain the hardware
    address.  If 'clock_seq' is given, it is used as the sequence number;
    otherwise a random 14-bit sequence number is chosen.

uuid.uuid1()

UUID('f9a46b8c-6d43-11ee-8c98-0242ac120002')

# use the current hardware address and time
[uuid.uuid1() for _ in range(8)]

[UUID('fa69277e-6d43-11ee-8c98-0242ac120002'),
 UUID('fa6928e6-6d43-11ee-8c98-0242ac120002'),
 UUID('fa6929d6-6d43-11ee-8c98-0242ac120002'),
 UUID('fa692ac6-6d43-11ee-8c98-0242ac120002'),
 UUID('fa692b8e-6d43-11ee-8c98-0242ac120002'),
 UUID('fa692c56-6d43-11ee-8c98-0242ac120002'),
 UUID('fa692d1e-6d43-11ee-8c98-0242ac120002'),
 UUID('fa692de6-6d43-11ee-8c98-0242ac120002')]

# use the current hardware address and time
[uuid.uuid1() for _ in range(5)]

[UUID('faf96eb0-6d43-11ee-8c98-0242ac120002'),
 UUID('faf96fb4-6d43-11ee-8c98-0242ac120002'),
 UUID('faf97054-6d43-11ee-8c98-0242ac120002'),
 UUID('faf970e0-6d43-11ee-8c98-0242ac120002'),
 UUID('faf97162-6d43-11ee-8c98-0242ac120002')]

# use fixed values
[uuid.uuid1(3, 1) for _ in range(5)]

[UUID('fc601494-6d43-11ee-8001-000000000003'),
 UUID('fc60155a-6d43-11ee-8001-000000000003'),
 UUID('fc6015b2-6d43-11ee-8001-000000000003'),
 UUID('fc6015e3-6d43-11ee-8001-000000000003'),
 UUID('fc60160a-6d43-11ee-8001-000000000003')]

help(uuid.uuid1)

Help on function uuid1 in module uuid:

uuid1(node=None, clock_seq=None)
    Generate a UUID from a host ID, sequence number, and the current time.
    If 'node' is not given, getnode() is used to obtain the hardware
    address.  If 'clock_seq' is given, it is used as the sequence number;
    otherwise a random 14-bit sequence number is chosen.

help(uuid.uuid3)

Help on function uuid3 in module uuid:

uuid3(namespace, name)
    Generate a UUID from the MD5 hash of a namespace UUID and a name.

help(uuid.uuid5)

Help on function uuid5 in module uuid:

uuid5(namespace, name)
    Generate a UUID from the SHA-1 hash of a namespace UUID and a name.

top = uuid.UUID('00000000-0000-0000-0000-000000000000')

top

UUID('00000000-0000-0000-0000-000000000000')

topic = uuid.uuid5(top, 'Neuroscience')
topic

UUID('913e0174-a390-5c08-b50a-623690546dd5')

subject1 = uuid.uuid5(topic, 'Habenula')
subject1

UUID('b5804c3f-57b1-54e3-8176-3b45aa443a97')

top = uuid.UUID('00000000-0000-0000-0000-000000000000')
topic = uuid.uuid5(top, 'Neuroscience')
subject1 = uuid.uuid5(topic, 'Habenula')
subject2 = uuid.uuid5(topic, 'Entorhinal cortex')
subject3 = uuid.uuid5(topic, 'Habenula')

topic = uuid.uuid5(top, 'Philosophy')
subject4 = uuid.uuid5(topic, 'Habenula')

topic, subject1, subject2, subject3, subject4

(UUID('345b4a08-7955-5b86-8646-f0826799afe9'),
 UUID('b5804c3f-57b1-54e3-8176-3b45aa443a97'),
 UUID('58571fff-c6bd-583f-88ac-ef0b8ff2981f'),
 UUID('b5804c3f-57b1-54e3-8176-3b45aa443a97'),
 UUID('6340129b-3a59-5354-aec6-5df769ae2ce7'))

uuid.uuid5(subject4, 'study'*1000000)

UUID('3d9d9035-dec3-5fc8-b66c-38cd8537acbe')

help(uuid.uuid4)

Help on function uuid4 in module uuid:

uuid4()
    Generate a random UUID.

[uuid.uuid4() for _ in range(12)]

[UUID('e97fb242-f513-491b-b628-cdc10745bed7'),
 UUID('6240cec4-bf4d-434e-b284-53722fd2c9b7'),
 UUID('789fcb88-ea0f-4a1e-a670-914a82ba7e07'),
 UUID('efb07c22-1a94-4b5f-bc6e-7653321176c0'),
 UUID('706203b8-de8b-4802-b83b-d6cf1c8e671f'),
 UUID('660d7e08-2e03-49b8-a4c1-5dd124750c69'),
 UUID('3dd8e385-0c68-4ae5-b5f5-c075b2b3aa5a'),
 UUID('d1cc073e-d1d1-449b-82f9-f57b509dfe7d'),
 UUID('8356b5b5-90d0-489c-8fa9-7e6352a3cca6'),
 UUID('32d015c0-c3b1-4f65-a9c7-57ac1b1e6ca1'),
 UUID('a108f427-5ce6-456e-84f8-e4620ed489db'),
 UUID('b4e58cd8-e0c2-47be-ab5b-d839f27bda1b')]

UUIDs in DataJoint¶

import datajoint as dj
dj.__version__

'0.14.1'

schema = dj.Schema('uuid')

[2023-10-17 23:22:36,428][INFO]: Connecting root@fakeservices.datajoint.io:3306
[2023-10-17 23:22:36,446][INFO]: Connected root@fakeservices.datajoint.io:3306

@schema
class Message(dj.Manual):
    definition = """
    message_id : uuid  # internal message id
    ---
    message_body : varchar(1000) 
    """

print(Message.describe())

message_id           : uuid                         # internal message id
---
message_body         : varchar(1000)

# For the curious: Internally, DataJoint represents uuids as BINARY(16) 
Message.heading['message_id'].sql

'`message_id` binary(16) NOT NULL COMMENT ":uuid:internal message id"'

Message.insert1((uuid.uuid1(), 'Hello, world!'))
Message.insert1((uuid.uuid1(), 'Cogito ergo sum'))

Message()

Message.insert1((uuid.uuid1(), 'I will be back'))
Message.insert1((uuid.uuid1(), 'Must destroy humans.'))

b = uuid.uuid4().bytes
b

b"\xa4\xfd\xe8\x94\x0f@@\x95\xa7 '5$\xf8\x06\x97"

uuid.UUID(bytes=b)

UUID('a4fde894-0f40-4095-a720-273524f80697')

Message()

Message.insert1((uuid.uuid4(), 'Hasta la vista baby'))

Message()

@schema
class Comment(dj.Manual):
    definition = """
    comment_id : uuid
    --- 
    -> Message
    comment_body : varchar(1000)
    """

# For the curious: This is how the table was declared in SQL
print(schema.connection.query('show create table `uuid`.`comment`').fetchall()[0][1])

CREATE TABLE `comment` (
  `comment_id` binary(16) NOT NULL COMMENT ':uuid:',
  `message_id` binary(16) NOT NULL COMMENT ':uuid:internal message id',
  `comment_body` varchar(1000) NOT NULL,
  PRIMARY KEY (`comment_id`),
  KEY `message_id` (`message_id`),
  CONSTRAINT `comment_ibfk_1` FOREIGN KEY (`message_id`) REFERENCES `message` (`message_id`) ON DELETE RESTRICT ON UPDATE CASCADE
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci

dj.Diagram(schema)

keys = Message.fetch('KEY')

keys

[{'message_id': UUID('553e2582-bc00-4c3e-a316-47c754f0677f')},
 {'message_id': UUID('681a997e-6d44-11ee-8c98-0242ac120002')},
 {'message_id': UUID('68214b2a-6d44-11ee-8c98-0242ac120002')},
 {'message_id': UUID('71a3e82e-6d44-11ee-8c98-0242ac120002')},
 {'message_id': UUID('71aa3580-6d44-11ee-8c98-0242ac120002')},
 {'message_id': UUID('82f8a010-6d44-11ee-8c98-0242ac120002')},
 {'message_id': UUID('82ffbb84-6d44-11ee-8c98-0242ac120002')},
 {'message_id': UUID('83f36c84-6d44-11ee-8c98-0242ac120002')},
 {'message_id': UUID('83f890ba-6d44-11ee-8c98-0242ac120002')},
 {'message_id': UUID('84dd3aa8-6d44-11ee-8c98-0242ac120002')},
 {'message_id': UUID('84e27202-6d44-11ee-8c98-0242ac120002')}]

Comment.insert1(dict(keys[0], comment_id=uuid.uuid1(), comment_body="thank you"))

Message * Comment

Message * Comment & keys[0]

Message & keys[1:4]

Comment.insert1(dict(keys[1], comment_id=uuid.uuid1(), comment_body="thank you"))

Comment()

Message & Comment

Message - Comment

Message * Comment

Schema Design

Modeling Relationships

Schema Design

Indexes