Introduction to Threading
Threading in Python allows you to run multiple threads (smaller units of a process) concurrently. While Python's Global Interpreter Lock (GIL) means threads don't run truly parallel for CPU-bound tasks, they're excellent for I/O-bound operations.
# Basic threading example
import threading
import time
def print_numbers():
for i in range(5):
time.sleep(1)
print(i)
def print_letters():
for letter in 'abcde':
time.sleep(1.5)
print(letter)
# Create threads
t1 = threading.Thread(target=print_numbers)
t2 = threading.Thread(target=print_letters)
# Start threads
t1.start()
t2.start()
# Wait for threads to complete
t1.join()
t2.join()
print("Done!")
This example shows two threads running concurrently, with their output interleaving based on their sleep times.
Thread Class
You can create threads by subclassing threading.Thread and overriding the run() method.
# Creating a thread by subclassing Thread
class MyThread(threading.Thread):
def __init__(self, name):
threading.Thread.__init__(self)
self.name = name
def run(self):
print(f"Starting {self.name}")
time.sleep(2)
print(f"Exiting {self.name}")
# Create and start threads
t1 = MyThread("Thread-1")
t2 = MyThread("Thread-2")
t1.start()
t2.start()
t1.join()
t2.join()
print("All threads completed")
Subclassing Thread gives you more control and allows you to maintain state within your thread class.
Thread Synchronization
When threads share data, synchronization mechanisms like Locks, RLocks, Semaphores, and Events prevent race conditions.
# Using a Lock to synchronize threads
shared_counter = 0
lock = threading.Lock()
def increment_counter():
global shared_counter
for _ in range(100000):
with lock:
shared_counter += 1
# Create and start threads
threads = []
for i in range(5):
t = threading.Thread(target=increment_counter)
threads.append(t)
t.start()
# Wait for all threads to complete
for t in threads:
t.join()
print(f"Final counter value: {shared_counter}")
# Using an Event to coordinate threads
event = threading.Event()
def wait_for_event():
print("Waiting for event to start")
event.wait()
print("Event triggered, continuing execution")
t = threading.Thread(target=wait_for_event)
t.start()
time.sleep(3)
event.set() # Trigger the event
t.join()
Proper synchronization is crucial when threads access shared resources to avoid data corruption and race conditions.
Thread Pools
Thread pools manage a pool of worker threads to efficiently execute tasks without the overhead of creating new threads for each task.
# Using ThreadPoolExecutor from concurrent.futures
from concurrent.futures import ThreadPoolExecutor
import urllib.request
URLs = [
'https://www.python.org/',
'https://docs.python.org/',
'https://pypi.org/',
'https://github.com/python'
]
def load_url(url, timeout):
with urllib.request.urlopen(url, timeout=timeout) as conn:
return conn.read()
# Using a with statement ensures threads are cleaned up promptly
with ThreadPoolExecutor(max_workers=4) as executor:
# Start the load operations and mark each future with its URL
future_to_url = {executor.submit(load_url, url, 60): url for url in URLs}
for future in concurrent.futures.as_completed(future_to_url):
url = future_to_url[future]
try:
data = future.result()
except Exception as exc:
print(f'{url} generated an exception: {exc}')
else:
print(f'{url} page is {len(data)} bytes')
Thread pools are ideal for I/O-bound tasks where you want to limit the number of concurrent operations.
GIL and Multiprocessing
Python's Global Interpreter Lock (GIL) means threads aren't suitable for CPU-bound tasks. For true parallelism, use the multiprocessing module.
# Comparing threading and multiprocessing for CPU-bound tasks
import multiprocessing
import math
def compute(start, end):
total = 0
for i in range(start, end):
total += math.sqrt(i)
return total
# Threading version (limited by GIL)
def run_with_threads():
threads = []
results = []
for i in range(0, 1000000, 250000):
t = threading.Thread(target=lambda: results.append(compute(i, i+250000)))
threads.append(t)
t.start()
for t in threads:
t.join()
return sum(results)
# Multiprocessing version (true parallelism)
def run_with_processes():
with multiprocessing.Pool(4) as pool:
ranges = [(i, i+250000) for i in range(0, 1000000, 250000)]
results = pool.starmap(compute, ranges)
return sum(results)
# Time both approaches
start = time.time()
run_with_threads()
print(f"Threading took {time.time() - start} seconds")
start = time.time()
run_with_processes()
print(f"Multiprocessing took {time.time() - start} seconds")
For CPU-bound tasks, multiprocessing will typically outperform threading due to Python's GIL limitations.
Python Threading Videos
Master Python threading with these handpicked YouTube tutorials:
Learn the fundamentals of Python threading:
Deep dive into threading techniques:
Understanding Python's GIL and performance implications:
Real-world threading use cases: