Skip to article frontmatterSkip to article content

Using UUIDs

Universally Unique Identifiers (UUIDs) provide a convenient mechanism for generating unique identifiers for entities inside an information system. The use of UUIDs is standardized by as RFC 4122.

A UUID consists of 128-bits (16 bytes) that are displayed consistently as a 36-character string comprised of hex digits hyphenated as 8-4-4-4-12.

For example e45ba2cc-39db-11e9-8e62-7470fdf23ef1 is a valid UUID.

Python provides the UUID module as part of its standard library.

import uuid
help(uuid.uuid1)
Help on function uuid1 in module uuid:

uuid1(node=None, clock_seq=None)
    Generate a UUID from a host ID, sequence number, and the current time.
    If 'node' is not given, getnode() is used to obtain the hardware
    address.  If 'clock_seq' is given, it is used as the sequence number;
    otherwise a random 14-bit sequence number is chosen.

uuid.uuid1()
UUID('f9a46b8c-6d43-11ee-8c98-0242ac120002')
# use the current hardware address and time
[uuid.uuid1() for _ in range(8)]
[UUID('fa69277e-6d43-11ee-8c98-0242ac120002'), UUID('fa6928e6-6d43-11ee-8c98-0242ac120002'), UUID('fa6929d6-6d43-11ee-8c98-0242ac120002'), UUID('fa692ac6-6d43-11ee-8c98-0242ac120002'), UUID('fa692b8e-6d43-11ee-8c98-0242ac120002'), UUID('fa692c56-6d43-11ee-8c98-0242ac120002'), UUID('fa692d1e-6d43-11ee-8c98-0242ac120002'), UUID('fa692de6-6d43-11ee-8c98-0242ac120002')]
# use the current hardware address and time
[uuid.uuid1() for _ in range(5)]
[UUID('faf96eb0-6d43-11ee-8c98-0242ac120002'), UUID('faf96fb4-6d43-11ee-8c98-0242ac120002'), UUID('faf97054-6d43-11ee-8c98-0242ac120002'), UUID('faf970e0-6d43-11ee-8c98-0242ac120002'), UUID('faf97162-6d43-11ee-8c98-0242ac120002')]
# use fixed values
[uuid.uuid1(3, 1) for _ in range(5)]
[UUID('fc601494-6d43-11ee-8001-000000000003'), UUID('fc60155a-6d43-11ee-8001-000000000003'), UUID('fc6015b2-6d43-11ee-8001-000000000003'), UUID('fc6015e3-6d43-11ee-8001-000000000003'), UUID('fc60160a-6d43-11ee-8001-000000000003')]
help(uuid.uuid1)
Help on function uuid1 in module uuid:

uuid1(node=None, clock_seq=None)
    Generate a UUID from a host ID, sequence number, and the current time.
    If 'node' is not given, getnode() is used to obtain the hardware
    address.  If 'clock_seq' is given, it is used as the sequence number;
    otherwise a random 14-bit sequence number is chosen.

help(uuid.uuid3)
Help on function uuid3 in module uuid:

uuid3(namespace, name)
    Generate a UUID from the MD5 hash of a namespace UUID and a name.

help(uuid.uuid5)
Help on function uuid5 in module uuid:

uuid5(namespace, name)
    Generate a UUID from the SHA-1 hash of a namespace UUID and a name.

top = uuid.UUID('00000000-0000-0000-0000-000000000000')
top
UUID('00000000-0000-0000-0000-000000000000')
topic = uuid.uuid5(top, 'Neuroscience')
topic
UUID('913e0174-a390-5c08-b50a-623690546dd5')
subject1 = uuid.uuid5(topic, 'Habenula')
subject1
UUID('b5804c3f-57b1-54e3-8176-3b45aa443a97')
top = uuid.UUID('00000000-0000-0000-0000-000000000000')
topic = uuid.uuid5(top, 'Neuroscience')
subject1 = uuid.uuid5(topic, 'Habenula')
subject2 = uuid.uuid5(topic, 'Entorhinal cortex')
subject3 = uuid.uuid5(topic, 'Habenula')

topic = uuid.uuid5(top, 'Philosophy')
subject4 = uuid.uuid5(topic, 'Habenula')

topic, subject1, subject2, subject3, subject4
(UUID('345b4a08-7955-5b86-8646-f0826799afe9'), UUID('b5804c3f-57b1-54e3-8176-3b45aa443a97'), UUID('58571fff-c6bd-583f-88ac-ef0b8ff2981f'), UUID('b5804c3f-57b1-54e3-8176-3b45aa443a97'), UUID('6340129b-3a59-5354-aec6-5df769ae2ce7'))
uuid.uuid5(subject4, 'study'*1000000)
UUID('3d9d9035-dec3-5fc8-b66c-38cd8537acbe')
help(uuid.uuid4)
Help on function uuid4 in module uuid:

uuid4()
    Generate a random UUID.

[uuid.uuid4() for _ in range(12)]
[UUID('e97fb242-f513-491b-b628-cdc10745bed7'), UUID('6240cec4-bf4d-434e-b284-53722fd2c9b7'), UUID('789fcb88-ea0f-4a1e-a670-914a82ba7e07'), UUID('efb07c22-1a94-4b5f-bc6e-7653321176c0'), UUID('706203b8-de8b-4802-b83b-d6cf1c8e671f'), UUID('660d7e08-2e03-49b8-a4c1-5dd124750c69'), UUID('3dd8e385-0c68-4ae5-b5f5-c075b2b3aa5a'), UUID('d1cc073e-d1d1-449b-82f9-f57b509dfe7d'), UUID('8356b5b5-90d0-489c-8fa9-7e6352a3cca6'), UUID('32d015c0-c3b1-4f65-a9c7-57ac1b1e6ca1'), UUID('a108f427-5ce6-456e-84f8-e4620ed489db'), UUID('b4e58cd8-e0c2-47be-ab5b-d839f27bda1b')]

UUIDs in DataJoint

import datajoint as dj
dj.__version__
'0.14.1'
schema = dj.Schema('uuid')
[2023-10-17 23:22:36,428][INFO]: Connecting root@fakeservices.datajoint.io:3306
[2023-10-17 23:22:36,446][INFO]: Connected root@fakeservices.datajoint.io:3306
@schema
class Message(dj.Manual):
    definition = """
    message_id : uuid  # internal message id
    ---
    message_body : varchar(1000) 
    """
print(Message.describe())
message_id           : uuid                         # internal message id
---
message_body         : varchar(1000)                

# For the curious: Internally, DataJoint represents uuids as BINARY(16) 
Message.heading['message_id'].sql
'`message_id` binary(16) NOT NULL COMMENT ":uuid:internal message id"'
Message.insert1((uuid.uuid1(), 'Hello, world!'))
Message.insert1((uuid.uuid1(), 'Cogito ergo sum'))
Message()
Loading...
Message.insert1((uuid.uuid1(), 'I will be back'))
Message.insert1((uuid.uuid1(), 'Must destroy humans.'))
b = uuid.uuid4().bytes
b
b"\xa4\xfd\xe8\x94\x0f@@\x95\xa7 '5$\xf8\x06\x97"
uuid.UUID(bytes=b)
UUID('a4fde894-0f40-4095-a720-273524f80697')
Message()
Loading...
Message.insert1((uuid.uuid4(), 'Hasta la vista baby'))
Message()
Loading...
@schema
class Comment(dj.Manual):
    definition = """
    comment_id : uuid
    --- 
    -> Message
    comment_body : varchar(1000)
    """
# For the curious: This is how the table was declared in SQL
print(schema.connection.query('show create table `uuid`.`comment`').fetchall()[0][1])
CREATE TABLE `comment` (
  `comment_id` binary(16) NOT NULL COMMENT ':uuid:',
  `message_id` binary(16) NOT NULL COMMENT ':uuid:internal message id',
  `comment_body` varchar(1000) NOT NULL,
  PRIMARY KEY (`comment_id`),
  KEY `message_id` (`message_id`),
  CONSTRAINT `comment_ibfk_1` FOREIGN KEY (`message_id`) REFERENCES `message` (`message_id`) ON DELETE RESTRICT ON UPDATE CASCADE
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci
dj.Diagram(schema)
Loading...
keys = Message.fetch('KEY')
keys
[{'message_id': UUID('553e2582-bc00-4c3e-a316-47c754f0677f')}, {'message_id': UUID('681a997e-6d44-11ee-8c98-0242ac120002')}, {'message_id': UUID('68214b2a-6d44-11ee-8c98-0242ac120002')}, {'message_id': UUID('71a3e82e-6d44-11ee-8c98-0242ac120002')}, {'message_id': UUID('71aa3580-6d44-11ee-8c98-0242ac120002')}, {'message_id': UUID('82f8a010-6d44-11ee-8c98-0242ac120002')}, {'message_id': UUID('82ffbb84-6d44-11ee-8c98-0242ac120002')}, {'message_id': UUID('83f36c84-6d44-11ee-8c98-0242ac120002')}, {'message_id': UUID('83f890ba-6d44-11ee-8c98-0242ac120002')}, {'message_id': UUID('84dd3aa8-6d44-11ee-8c98-0242ac120002')}, {'message_id': UUID('84e27202-6d44-11ee-8c98-0242ac120002')}]
Comment.insert1(dict(keys[0], comment_id=uuid.uuid1(), comment_body="thank you"))
Message * Comment
Loading...
Message * Comment & keys[0]
Loading...
Message & keys[1:4]
Loading...
Comment.insert1(dict(keys[1], comment_id=uuid.uuid1(), comment_body="thank you"))
Comment()
Loading...
Message & Comment
Loading...
Message - Comment
Loading...
Message * Comment
Loading...