Slide 1: Basic Dictionary Operations and Initialization
Python dictionaries serve as versatile hash table implementations, offering constant-time complexity for key-based operations. Understanding fundamental dictionary operations is crucial for efficient data manipulation and storage in Python programming, particularly when dealing with structured data representations.
# Different ways to initialize dictionaries
dict1 = {'name': 'John', 'age': 30} # Direct initialization
dict2 = dict(name='Alice', age=25) # Using dict() constructor
dict3 = dict([('city', 'New York'), ('country', 'USA')]) # From sequence of pairs
# Basic operations
dict1['profession'] = 'Engineer' # Adding new key-value pair
dict1['age'] = 31 # Updating existing value
removed_value = dict1.pop('age') # Removing key-value pair
print(f"Updated dict1: {dict1}")
print(f"Removed value: {removed_value}")
print(f"Keys: {dict1.keys()}")
print(f"Values: {dict1.values()}")Slide 2: Dictionary Comprehension and Advanced Creation
Dictionary comprehensions provide a concise way to create dictionaries using existing iterables. This powerful feature enables sophisticated data transformation and filtering operations while maintaining readable and efficient code structure.
# Dictionary comprehension examples
squares = {x: x**2 for x in range(5)}
filtered_dict = {k: v for k, v in squares.items() if v % 2 == 0}
# Advanced dictionary creation techniques
from collections import defaultdict
from itertools import zip_longest
# Using defaultdict for automatic default values
counts = defaultdict(int)
words = ['apple', 'banana', 'apple', 'cherry']
for word in words:
counts[word] += 1
# Creating dictionary from two lists
keys = ['a', 'b', 'c']
values = [1, 2, 3, 4]
combined = dict(zip_longest(keys, values, fillvalue=None))
print(f"Squares: {squares}")
print(f"Filtered: {filtered_dict}")
print(f"Word counts: {dict(counts)}")
print(f"Combined lists: {combined}")Slide 3: Nested Dictionaries and Deep Operations
When working with complex data structures, nested dictionaries become essential for representing hierarchical relationships. Understanding how to manipulate and traverse nested dictionaries is crucial for handling real-world data structures efficiently.
# Complex nested dictionary operations
def deep_update(d, u):
"""Recursively update nested dictionary."""
for k, v in u.items():
if isinstance(v, dict):
d[k] = deep_update(d.get(k, {}), v)
else:
d[k] = v
return d
# Example nested structure
config = {
'database': {
'host': 'localhost',
'port': 5432,
'credentials': {
'username': 'admin',
'password': 'secret'
}
},
'api': {
'version': '1.0',
'endpoints': ['users', 'posts']
}
}
# Update nested values
updates = {
'database': {
'port': 5433,
'credentials': {
'password': 'new_secret'
}
}
}
updated_config = deep_update(config.copy(), updates)
print(f"Updated config: {updated_config}")Slide 4: Dictionary Performance Optimization
Understanding the performance characteristics of dictionary operations is crucial for optimizing Python applications. This exploration covers time complexity analysis and best practices for efficient dictionary usage in performance-critical scenarios.
import timeit
import sys
from collections import ChainMap
# Memory efficiency comparison
small_dict = dict.fromkeys(range(1000))
large_dict = dict.fromkeys(range(1000000))
# ChainMap for efficient dictionary combining
dict1 = {'a': 1, 'b': 2}
dict2 = {'b': 3, 'c': 4}
chain = ChainMap(dict1, dict2)
# Performance measurement
setup = """
d = dict.fromkeys(range(1000))
key = 999
"""
lookup_time = timeit.timeit('key in d', setup=setup, number=1000000)
print(f"Small dict size: {sys.getsizeof(small_dict)} bytes")
print(f"Large dict size: {sys.getsizeof(large_dict)} bytes")
print(f"Average lookup time: {lookup_time/1000000:.9f} seconds")
print(f"ChainMap result: {dict(chain)}")Slide 5: Dictionary Views and Iteration Patterns
Dictionary views provide a dynamic window into dictionary contents, automatically reflecting changes to the underlying dictionary. Understanding view objects and their behaviors is essential for maintaining data consistency and implementing efficient iteration patterns.
# Working with dictionary views
user_data = {'id': 1, 'name': 'Alice', 'role': 'admin'}
# Create views
keys_view = user_data.keys()
values_view = user_data.values()
items_view = user_data.items()
# Demonstrate dynamic nature of views
print("Initial views:")
print(f"Keys: {keys_view}")
print(f"Values: {values_view}")
print(f"Items: {items_view}")
# Modify dictionary and observe view updates
user_data['email'] = 'alice@example.com'
print("\nViews after modification:")
print(f"Keys: {keys_view}")
print(f"Values: {values_view}")
print(f"Items: {items_view}")
# Efficient iteration patterns
for k, v in items_view:
print(f"{k}: {v}")Slide 6: Dictionary Merging and Update Operations
Dictionary merging operations become crucial when combining data from multiple sources. Python 3.9+ introduced the union operator (|) for dictionaries, providing more elegant ways to merge dictionaries while preserving data integrity.
# Different methods of dictionary merging
defaults = {'timeout': 30, 'retries': 3, 'debug': False}
user_config = {'timeout': 45, 'api_key': 'abc123'}
# Method 1: Using update()
config1 = defaults.copy()
config1.update(user_config)
# Method 2: Using | operator (Python 3.9+)
config2 = defaults | user_config
# Method 3: Using dictionary unpacking
config3 = {**defaults, **user_config}
# Method 4: Using collections.ChainMap
from collections import ChainMap
config4 = dict(ChainMap(user_config, defaults))
print("Merged configurations:")
print(f"Method 1: {config1}")
print(f"Method 2: {config2}")
print(f"Method 3: {config3}")
print(f"Method 4: {config4}")Slide 7: Real-world Application: Cache Implementation
Implementing a custom caching system demonstrates practical dictionary usage in production environments. This example showcases a time-based cache with automatic expiration and cleanup mechanisms.
from time import time
from typing import Any, Dict, Optional
from dataclasses import dataclass
@dataclass
class CacheEntry:
value: Any
timestamp: float
ttl: int # Time to live in seconds
class TimeBasedCache:
def __init__(self, default_ttl: int = 300):
self._cache: Dict[str, CacheEntry] = {}
self.default_ttl = default_ttl
def set(self, key: str, value: Any, ttl: Optional[int] = None) -> None:
self._cache[key] = CacheEntry(
value=value,
timestamp=time(),
ttl=ttl or self.default_ttl
)
def get(self, key: str) -> Optional[Any]:
if key not in self._cache:
return None
entry = self._cache[key]
if time() - entry.timestamp > entry.ttl:
del self._cache[key]
return None
return entry.value
def cleanup(self) -> int:
"""Remove expired entries and return count of removed items."""
current_time = time()
expired = [
k for k, v in self._cache.items()
if current_time - v.timestamp > v.ttl
]
for k in expired:
del self._cache[k]
return len(expired)
# Usage example
cache = TimeBasedCache()
cache.set("user:123", {"name": "John", "age": 30}, ttl=5)
print(f"Cached value: {cache.get('user:123')}")Slide 8: Performance Metrics for Cache Implementation
import time
# Performance testing for TimeBasedCache
def performance_test():
cache = TimeBasedCache()
# Write performance
start_time = time.time()
for i in range(10000):
cache.set(f"key_{i}", f"value_{i}")
write_time = time.time() - start_time
# Read performance
start_time = time.time()
for i in range(10000):
_ = cache.get(f"key_{i}")
read_time = time.time() - start_time
# Memory usage
import sys
memory_usage = sys.getsizeof(cache._cache)
return {
"write_time": write_time,
"read_time": read_time,
"memory_usage": memory_usage,
"items_count": len(cache._cache)
}
results = performance_test()
print("Performance Metrics:")
print(f"Write Time: {results['write_time']:.4f} seconds")
print(f"Read Time: {results['read_time']:.4f} seconds")
print(f"Memory Usage: {results['memory_usage']} bytes")
print(f"Items Count: {results['items_count']}")Slide 9: Dictionary Serialization and Persistence
Understanding dictionary serialization is crucial for data persistence and inter-process communication. This implementation demonstrates various methods of serializing dictionary data while maintaining data integrity and type preservation.
import json
import pickle
import yaml # requires PyYAML
from typing import Dict, Any
import base64
class DictionarySerializer:
@staticmethod
def to_json(data: Dict[str, Any], file_path: str = None) -> str:
json_str = json.dumps(data, indent=2)
if file_path:
with open(file_path, 'w') as f:
f.write(json_str)
return json_str
@staticmethod
def to_pickle(data: Dict[str, Any], file_path: str = None) -> bytes:
pickle_bytes = pickle.dumps(data)
if file_path:
with open(file_path, 'wb') as f:
f.write(pickle_bytes)
return pickle_bytes
@staticmethod
def to_yaml(data: Dict[str, Any], file_path: str = None) -> str:
yaml_str = yaml.dump(data, default_flow_style=False)
if file_path:
with open(file_path, 'w') as f:
f.write(yaml_str)
return yaml_str
# Example usage
complex_dict = {
'name': 'Project X',
'config': {
'version': 2.0,
'enabled_features': ['a', 'b', 'c'],
'binary_data': base64.b64encode(b'Hello World').decode()
},
'metadata': {
'created_at': '2024-01-01',
'is_active': True
}
}
serializer = DictionarySerializer()
print("JSON format:")
print(serializer.to_json(complex_dict))
print("\nPickle size:", len(serializer.to_pickle(complex_dict)))
print("\nYAML format:")
print(serializer.to_yaml(complex_dict))Slide 10: Advanced Dictionary Pattern: Observable Dictionary
Implementation of an observable dictionary pattern that allows monitoring and reacting to dictionary modifications, useful for building reactive systems and maintaining data consistency across components.
from typing import Callable, Dict, Any, Set
from dataclasses import dataclass
from enum import Enum
class DictOperation(Enum):
SET = 'set'
DELETE = 'delete'
CLEAR = 'clear'
@dataclass
class DictEvent:
operation: DictOperation
key: Any = None
value: Any = None
old_value: Any = None
class ObservableDict:
def __init__(self):
self._data: Dict[Any, Any] = {}
self._observers: Set[Callable[[DictEvent], None]] = set()
def subscribe(self, observer: Callable[[DictEvent], None]):
self._observers.add(observer)
return lambda: self._observers.remove(observer)
def _notify(self, event: DictEvent):
for observer in self._observers:
observer(event)
def __setitem__(self, key, value):
old_value = self._data.get(key)
self._data[key] = value
self._notify(DictEvent(
operation=DictOperation.SET,
key=key,
value=value,
old_value=old_value
))
def __getitem__(self, key):
return self._data[key]
def __delitem__(self, key):
old_value = self._data[key]
del self._data[key]
self._notify(DictEvent(
operation=DictOperation.DELETE,
key=key,
old_value=old_value
))
# Example usage
def dict_observer(event: DictEvent):
print(f"Operation: {event.operation.value}")
print(f"Key: {event.key}")
print(f"Value: {event.value}")
print(f"Old Value: {event.old_value}\n")
observable_dict = ObservableDict()
unsubscribe = observable_dict.subscribe(dict_observer)
# Test operations
observable_dict['user'] = 'Alice'
observable_dict['user'] = 'Bob'
del observable_dict['user']
unsubscribe() # Remove observerSlide 11: Real-world Application: Configuration Management System
A practical implementation of a configuration management system using dictionaries, demonstrating inheritance, validation, and environment-specific configurations commonly used in production systems.
from typing import Dict, Any, Optional
import json
import os
from copy import deepcopy
class ConfigurationManager:
def __init__(self, base_config: Dict[str, Any]):
self._base_config = deepcopy(base_config)
self._env_config: Dict[str, Dict[str, Any]] = {}
self._active_config: Optional[Dict[str, Any]] = None
self._env = os.getenv('APP_ENV', 'development')
def add_environment(self, env: str, config: Dict[str, Any]):
"""Add environment-specific configuration."""
merged = deepcopy(self._base_config)
self._deep_merge(merged, config)
self._env_config[env] = merged
def _deep_merge(self, base: Dict[str, Any], override: Dict[str, Any]):
"""Recursively merge configuration dictionaries."""
for key, value in override.items():
if key in base and isinstance(base[key], dict) and isinstance(value, dict):
self._deep_merge(base[key], value)
else:
base[key] = deepcopy(value)
def validate_config(self, config: Dict[str, Any]) -> bool:
"""Validate configuration structure and required fields."""
required_fields = {'database', 'api_keys', 'features'}
return all(
field in config and isinstance(config[field], dict)
for field in required_fields
)
def get_config(self) -> Dict[str, Any]:
"""Get active configuration for current environment."""
if self._active_config is None:
self._active_config = self._env_config.get(
self._env, self._base_config
)
return deepcopy(self._active_config)
def export_config(self, file_path: str):
"""Export current configuration to JSON file."""
with open(file_path, 'w') as f:
json.dump(self.get_config(), f, indent=2)
# Usage example
base_config = {
'database': {
'host': 'localhost',
'port': 5432,
'timeout': 30
},
'api_keys': {
'service_a': None,
'service_b': None
},
'features': {
'cache_enabled': True,
'debug_mode': False
}
}
# Initialize manager
config_manager = ConfigurationManager(base_config)
# Add environment-specific configurations
config_manager.add_environment('production', {
'database': {
'host': 'prod.db.example.com',
'port': 5433
},
'api_keys': {
'service_a': 'prod_key_a',
'service_b': 'prod_key_b'
},
'features': {
'debug_mode': False
}
})
# Get active configuration
active_config = config_manager.get_config()
print(json.dumps(active_config, indent=2))Slide 12: Advanced Dictionary Patterns: Proxy Dictionary
Implementation of a proxy dictionary pattern that allows for controlled access to dictionary operations, useful for implementing access control, logging, and custom behavior injection.
from typing import Dict, Any, Optional, Callable
from datetime import datetime
import logging
class ProxyDict:
def __init__(
self,
data: Dict[str, Any],
access_control: Optional[Callable[[str], bool]] = None,
log_operations: bool = True
):
self._data = data
self._access_control = access_control
self._log_operations = log_operations
self._logger = logging.getLogger(__name__)
def __getitem__(self, key: str) -> Any:
if self._check_access(key, 'read'):
self._log_operation('read', key)
return self._data[key]
raise PermissionError(f"Access denied to key: {key}")
def __setitem__(self, key: str, value: Any):
if self._check_access(key, 'write'):
self._log_operation('write', key, value)
self._data[key] = value
else:
raise PermissionError(f"Write access denied to key: {key}")
def __delitem__(self, key: str):
if self._check_access(key, 'delete'):
self._log_operation('delete', key)
del self._data[key]
else:
raise PermissionError(f"Delete access denied to key: {key}")
def _check_access(self, key: str, operation: str) -> bool:
if self._access_control is None:
return True
return self._access_control(key)
def _log_operation(self, operation: str, key: str, value: Any = None):
if self._log_operations:
timestamp = datetime.now().isoformat()
message = f"{timestamp} - {operation} operation on key '{key}'"
if value is not None:
message += f" with value: {value}"
self._logger.info(message)
# Example usage
def simple_access_control(key: str) -> bool:
return not key.startswith('_')
# Configure logging
logging.basicConfig(level=logging.INFO)
# Create proxy dictionary
sensitive_data = {
'public_key': 'abc123',
'_private_key': 'xyz789',
'user_id': 42
}
proxy = ProxyDict(
sensitive_data,
access_control=simple_access_control,
log_operations=True
)
# Test operations
try:
print(proxy['public_key'])
proxy['user_id'] = 43
print(proxy['_private_key']) # This will raise PermissionError
except PermissionError as e:
print(f"Error: {e}")Slide 13: Memory-Efficient Dictionary Implementation
This implementation demonstrates an advanced memory-efficient dictionary design utilizing slots and weak references, particularly useful when dealing with large-scale data structures in memory-constrained environments.
from weakref import WeakKeyDictionary
from typing import Any, Optional
import sys
class SlottedDict:
__slots__ = ('_storage', '_size', '_weakref_storage')
def __init__(self):
self._storage = {}
self._size = 0
self._weakref_storage = WeakKeyDictionary()
def __setitem__(self, key: Any, value: Any):
if isinstance(key, (str, int, float, bool)):
self._storage[key] = value
else:
self._weakref_storage[key] = value
self._size += 1
def __getitem__(self, key: Any) -> Any:
if isinstance(key, (str, int, float, bool)):
return self._storage[key]
return self._weakref_storage[key]
def memory_usage(self) -> dict:
return {
'main_storage': sys.getsizeof(self._storage),
'weakref_storage': sys.getsizeof(self._weakref_storage),
'total_size': self._size
}
# Performance comparison
normal_dict = {}
slotted_dict = SlottedDict()
# Populate both dictionaries
for i in range(1000):
key = f"key_{i}"
value = f"value_{i}" * 100 # Create large strings
normal_dict[key] = value
slotted_dict[key] = value
# Memory usage comparison
print("Memory Usage Comparison:")
print(f"Normal Dict: {sys.getsizeof(normal_dict)} bytes")
print(f"Slotted Dict: {slotted_dict.memory_usage()}")Slide 14: Real-world Application: Event Aggregation System
This implementation showcases a practical event aggregation system using dictionaries for efficient event processing and analysis in real-time systems.
from collections import defaultdict
from datetime import datetime, timedelta
from typing import Dict, List, Any, Optional
import json
class EventAggregator:
def __init__(self, retention_period: timedelta = timedelta(hours=1)):
self._events: Dict[str, List[Dict[str, Any]]] = defaultdict(list)
self._retention_period = retention_period
self._aggregations: Dict[str, Dict[str, Any]] = {}
def add_event(self, event_type: str, data: Dict[str, Any]):
timestamp = datetime.now()
event = {
'timestamp': timestamp,
'data': data
}
self._events[event_type].append(event)
self._clean_old_events(event_type)
self._update_aggregations(event_type)
def _clean_old_events(self, event_type: str):
cutoff = datetime.now() - self._retention_period
self._events[event_type] = [
event for event in self._events[event_type]
if event['timestamp'] > cutoff
]
def _update_aggregations(self, event_type: str):
events = self._events[event_type]
if not events:
return
# Calculate aggregations
self._aggregations[event_type] = {
'count': len(events),
'first_seen': min(e['timestamp'] for e in events),
'last_seen': max(e['timestamp'] for e in events),
'unique_values': len({
json.dumps(e['data'], sort_keys=True)
for e in events
})
}
def get_aggregation(self, event_type: str) -> Optional[Dict[str, Any]]:
return self._aggregations.get(event_type)
# Usage example
aggregator = EventAggregator(retention_period=timedelta(minutes=5))
# Simulate events
events_data = [
('user_login', {'user_id': 1, 'success': True}),
('user_login', {'user_id': 2, 'success': False}),
('api_call', {'endpoint': '/users', 'status': 200}),
('user_login', {'user_id': 1, 'success': True}),
]
for event_type, data in events_data:
aggregator.add_event(event_type, data)
# Get aggregations
for event_type in ('user_login', 'api_call'):
agg = aggregator.get_aggregation(event_type)
print(f"\nAggregation for {event_type}:")
print(json.dumps(agg, default=str, indent=2))Slide 15: Additional Resources
- Python Dictionary Implementation - Internal Workings and Performance Analysis
- Efficient Dictionary-Based Data Structures for Large-Scale Applications
- Memory-Efficient Hash Table Implementations in Dynamic Languages
- Advanced Dictionary Patterns in Modern Software Architecture
- Performance Optimization Techniques for Hash-Based Data Structures