Module wtracker.utils.threading_utils
View Source
import queue
import threading
import multiprocessing
from typing import Callable
from tqdm.auto import tqdm
def adjust_num_workers(num_tasks: int, chunk_size: int, num_workers: int = None) -> int:
"""
Adjust the number of workers based on the number of tasks and chunk size.
Args:
num_tasks (int): The number of tasks to be processed.
chunk_size (int): The size of each processing chunk.
num_workers (int, optional): The number of workers to use for parallel processing.
If None, the number of workers is determined automatically.
"""
if num_workers is None: # if None then choose automatically
num_workers = min(multiprocessing.cpu_count() / 2, num_tasks / (2 * chunk_size))
num_workers = round(num_workers)
use_multiprocessing = num_workers > 0
num_workers = min(num_workers, num_tasks // chunk_size) # no point having workers without tasks
num_workers = min(num_workers, multiprocessing.cpu_count()) # no point having more workers than cpus
if num_workers < 0: # make sure value is valid
num_workers = 0
if use_multiprocessing:
num_workers = max(num_workers, 1)
elif not use_multiprocessing and num_workers == 1:
num_workers = 0
return num_workers
class TqdmQueue(queue.Queue):
"""
A subclass of `queue.Queue` that provides progress tracking using `tqdm`.
Args:
maxsize (int): The maximum size of the queue (default: 0).
**kwargs: Additional keyword arguments to be passed to the tqdm progress bar.
Attributes:
pbar (tqdm.tqdm): The progress bar object.
total (int): The total number of items processed.
Example:
queue = ProgressQueue(maxsize=10)
queue.put(item)
queue.task_done()
queue.join()
"""
def __init__(self, maxsize: int = 0, **kwargs):
super().__init__(maxsize=maxsize)
self.pbar = tqdm(total=1, **kwargs)
self.total = 0 # Keep our own total tracker so we can update the Progressbar
def task_done(self):
"""
Mark the task as done and update the progress bar.
This method should be called when a task is completed. It updates the progress bar to reflect the completion
of the task.
"""
super().task_done()
self.pbar.update()
self.pbar.refresh() # Redraw the progressbar
def _put(self, item):
super()._put(item)
self.total += 1
processed = self.pbar.n # Get current progress to re-apply
self.pbar.reset(self.total) # Reset and update total
self.pbar.update(processed) # Re-apply progress
self.pbar.refresh() # Redraw the progressbar
def join(self):
"""
Blocks until all items in the Queue have been gotten and processed.
"""
super().join()
self.pbar.close()
class TaskScheduler:
"""
This class is used to schedule tasks to be executed by a worker thread.
Args:
task_func (Callable): The function to be executed by the worker thread.
maxsize (int, optional): The maximum number of items that can be in the queue.
tqdm (bool, optional): Whether to use tqdm for progress tracking.
**tqdm_kwargs: Additional keyword arguments to be passed to the TqdmQueue constructor.
"""
def __init__(
self,
task_func: Callable,
maxsize: int = 0,
tqdm: bool = True,
**tqdm_kwargs,
):
self._queue = TqdmQueue(maxsize, **tqdm_kwargs) if tqdm else queue.Queue(maxsize)
self._worker_thread = threading.Thread(target=self._worker, args=(self._queue,))
self._task_func = task_func
def start(self):
"""
Starts the worker thread.
"""
self._worker_thread.start()
def __enter__(self):
self.start()
return self
def __exit__(self, exc_type, exc_value, traceback):
self.close()
def schedule_save(self, *params):
"""
Schedules a task by putting task parameters into the queue.
Args:
*params: The parameters to be passed to the task function.
"""
self._queue.put(item=params, block=True)
def _worker(self, q: queue.Queue):
while True:
params = q.get(block=True)
# exit if signaled
if params is None:
break
self._task_func(params)
q.task_done()
def close(self):
"""
Waits for the queue to empty and then closes the worker thread.
"""
self._queue.join()
self._queue.put(None)
self._worker_thread.join()
Functions
adjust_num_workers
def adjust_num_workers(
num_tasks: int,
chunk_size: int,
num_workers: int = None
) -> int
Adjust the number of workers based on the number of tasks and chunk size.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
num_tasks | int | The number of tasks to be processed. | None |
chunk_size | int | The size of each processing chunk. | None |
num_workers | int | The number of workers to use for parallel processing. If None, the number of workers is determined automatically. |
None |
View Source
def adjust_num_workers(num_tasks: int, chunk_size: int, num_workers: int = None) -> int:
"""
Adjust the number of workers based on the number of tasks and chunk size.
Args:
num_tasks (int): The number of tasks to be processed.
chunk_size (int): The size of each processing chunk.
num_workers (int, optional): The number of workers to use for parallel processing.
If None, the number of workers is determined automatically.
"""
if num_workers is None: # if None then choose automatically
num_workers = min(multiprocessing.cpu_count() / 2, num_tasks / (2 * chunk_size))
num_workers = round(num_workers)
use_multiprocessing = num_workers > 0
num_workers = min(num_workers, num_tasks // chunk_size) # no point having workers without tasks
num_workers = min(num_workers, multiprocessing.cpu_count()) # no point having more workers than cpus
if num_workers < 0: # make sure value is valid
num_workers = 0
if use_multiprocessing:
num_workers = max(num_workers, 1)
elif not use_multiprocessing and num_workers == 1:
num_workers = 0
return num_workers
Classes
TaskScheduler
class TaskScheduler(
task_func: Callable,
maxsize: int = 0,
tqdm: bool = True,
**tqdm_kwargs
)
This class is used to schedule tasks to be executed by a worker thread.
Attributes
Name | Type | Description | Default |
---|---|---|---|
task_func | Callable | The function to be executed by the worker thread. | None |
maxsize | int | The maximum number of items that can be in the queue. | None |
tqdm | bool | Whether to use tqdm for progress tracking. | None |
**tqdm_kwargs | None | Additional keyword arguments to be passed to the TqdmQueue constructor. | None |
View Source
class TaskScheduler:
"""
This class is used to schedule tasks to be executed by a worker thread.
Args:
task_func (Callable): The function to be executed by the worker thread.
maxsize (int, optional): The maximum number of items that can be in the queue.
tqdm (bool, optional): Whether to use tqdm for progress tracking.
**tqdm_kwargs: Additional keyword arguments to be passed to the TqdmQueue constructor.
"""
def __init__(
self,
task_func: Callable,
maxsize: int = 0,
tqdm: bool = True,
**tqdm_kwargs,
):
self._queue = TqdmQueue(maxsize, **tqdm_kwargs) if tqdm else queue.Queue(maxsize)
self._worker_thread = threading.Thread(target=self._worker, args=(self._queue,))
self._task_func = task_func
def start(self):
"""
Starts the worker thread.
"""
self._worker_thread.start()
def __enter__(self):
self.start()
return self
def __exit__(self, exc_type, exc_value, traceback):
self.close()
def schedule_save(self, *params):
"""
Schedules a task by putting task parameters into the queue.
Args:
*params: The parameters to be passed to the task function.
"""
self._queue.put(item=params, block=True)
def _worker(self, q: queue.Queue):
while True:
params = q.get(block=True)
# exit if signaled
if params is None:
break
self._task_func(params)
q.task_done()
def close(self):
"""
Waits for the queue to empty and then closes the worker thread.
"""
self._queue.join()
self._queue.put(None)
self._worker_thread.join()
Descendants
- wtracker.utils.io_utils.FrameSaver
- wtracker.utils.io_utils.ImageSaver
Methods
close
def close(
self
)
Waits for the queue to empty and then closes the worker thread.
View Source
def close(self):
"""
Waits for the queue to empty and then closes the worker thread.
"""
self._queue.join()
self._queue.put(None)
self._worker_thread.join()
schedule_save
def schedule_save(
self,
*params
)
Schedules a task by putting task parameters into the queue.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
*params | None | The parameters to be passed to the task function. | None |
View Source
def schedule_save(self, *params):
"""
Schedules a task by putting task parameters into the queue.
Args:
*params: The parameters to be passed to the task function.
"""
self._queue.put(item=params, block=True)
start
def start(
self
)
Starts the worker thread.
View Source
def start(self):
"""
Starts the worker thread.
"""
self._worker_thread.start()
TqdmQueue
class TqdmQueue(
maxsize: int = 0,
**kwargs
)
A subclass of queue.Queue
that provides progress tracking using tqdm
.
Attributes
Name | Type | Description | Default |
---|---|---|---|
maxsize | int | The maximum size of the queue (default: 0). | None |
**kwargs | None | Additional keyword arguments to be passed to the tqdm progress bar. | None |
pbar | tqdm.tqdm | The progress bar object. | None |
total | int | The total number of items processed. | None |
View Source
class TqdmQueue(queue.Queue):
"""
A subclass of `queue.Queue` that provides progress tracking using `tqdm`.
Args:
maxsize (int): The maximum size of the queue (default: 0).
**kwargs: Additional keyword arguments to be passed to the tqdm progress bar.
Attributes:
pbar (tqdm.tqdm): The progress bar object.
total (int): The total number of items processed.
Example:
queue = ProgressQueue(maxsize=10)
queue.put(item)
queue.task_done()
queue.join()
"""
def __init__(self, maxsize: int = 0, **kwargs):
super().__init__(maxsize=maxsize)
self.pbar = tqdm(total=1, **kwargs)
self.total = 0 # Keep our own total tracker so we can update the Progressbar
def task_done(self):
"""
Mark the task as done and update the progress bar.
This method should be called when a task is completed. It updates the progress bar to reflect the completion
of the task.
"""
super().task_done()
self.pbar.update()
self.pbar.refresh() # Redraw the progressbar
def _put(self, item):
super()._put(item)
self.total += 1
processed = self.pbar.n # Get current progress to re-apply
self.pbar.reset(self.total) # Reset and update total
self.pbar.update(processed) # Re-apply progress
self.pbar.refresh() # Redraw the progressbar
def join(self):
"""
Blocks until all items in the Queue have been gotten and processed.
"""
super().join()
self.pbar.close()
Ancestors (in MRO)
- queue.Queue
Methods
empty
def empty(
self
)
Return True if the queue is empty, False otherwise (not reliable!).
This method is likely to be removed at some point. Use qsize() == 0 as a direct substitute, but be aware that either approach risks a race condition where a queue can grow before the result of empty() or qsize() can be used.
To create code that needs to wait for all queued tasks to be completed, the preferred technique is to use the join() method.
View Source
def empty(self):
'''Return True if the queue is empty, False otherwise (not reliable!).
This method is likely to be removed at some point. Use qsize() == 0
as a direct substitute, but be aware that either approach risks a race
condition where a queue can grow before the result of empty() or
qsize() can be used.
To create code that needs to wait for all queued tasks to be
completed, the preferred technique is to use the join() method.
'''
with self.mutex:
return not self._qsize()
full
def full(
self
)
Return True if the queue is full, False otherwise (not reliable!).
This method is likely to be removed at some point. Use qsize() >= n as a direct substitute, but be aware that either approach risks a race condition where a queue can shrink before the result of full() or qsize() can be used.
View Source
def full(self):
'''Return True if the queue is full, False otherwise (not reliable!).
This method is likely to be removed at some point. Use qsize() >= n
as a direct substitute, but be aware that either approach risks a race
condition where a queue can shrink before the result of full() or
qsize() can be used.
'''
with self.mutex:
return 0 < self.maxsize <= self._qsize()
get
def get(
self,
block=True,
timeout=None
)
Remove and return an item from the queue.
If optional args 'block' is true and 'timeout' is None (the default), block if necessary until an item is available. If 'timeout' is a non-negative number, it blocks at most 'timeout' seconds and raises the Empty exception if no item was available within that time. Otherwise ('block' is false), return an item if one is immediately available, else raise the Empty exception ('timeout' is ignored in that case).
View Source
def get(self, block=True, timeout=None):
'''Remove and return an item from the queue.
If optional args 'block' is true and 'timeout' is None (the default),
block if necessary until an item is available. If 'timeout' is
a non-negative number, it blocks at most 'timeout' seconds and raises
the Empty exception if no item was available within that time.
Otherwise ('block' is false), return an item if one is immediately
available, else raise the Empty exception ('timeout' is ignored
in that case).
'''
with self.not_empty:
if not block:
if not self._qsize():
raise Empty
elif timeout is None:
while not self._qsize():
self.not_empty.wait()
elif timeout < 0:
raise ValueError("'timeout' must be a non-negative number")
else:
endtime = time() + timeout
while not self._qsize():
remaining = endtime - time()
if remaining <= 0.0:
raise Empty
self.not_empty.wait(remaining)
item = self._get()
self.not_full.notify()
return item
get_nowait
def get_nowait(
self
)
Remove and return an item from the queue without blocking.
Only get an item if one is immediately available. Otherwise raise the Empty exception.
View Source
def get_nowait(self):
'''Remove and return an item from the queue without blocking.
Only get an item if one is immediately available. Otherwise
raise the Empty exception.
'''
return self.get(block=False)
join
def join(
self
)
Blocks until all items in the Queue have been gotten and processed.
View Source
def join(self):
"""
Blocks until all items in the Queue have been gotten and processed.
"""
super().join()
self.pbar.close()
put
def put(
self,
item,
block=True,
timeout=None
)
Put an item into the queue.
If optional args 'block' is true and 'timeout' is None (the default), block if necessary until a free slot is available. If 'timeout' is a non-negative number, it blocks at most 'timeout' seconds and raises the Full exception if no free slot was available within that time. Otherwise ('block' is false), put an item on the queue if a free slot is immediately available, else raise the Full exception ('timeout' is ignored in that case).
View Source
def put(self, item, block=True, timeout=None):
'''Put an item into the queue.
If optional args 'block' is true and 'timeout' is None (the default),
block if necessary until a free slot is available. If 'timeout' is
a non-negative number, it blocks at most 'timeout' seconds and raises
the Full exception if no free slot was available within that time.
Otherwise ('block' is false), put an item on the queue if a free slot
is immediately available, else raise the Full exception ('timeout'
is ignored in that case).
'''
with self.not_full:
if self.maxsize > 0:
if not block:
if self._qsize() >= self.maxsize:
raise Full
elif timeout is None:
while self._qsize() >= self.maxsize:
self.not_full.wait()
elif timeout < 0:
raise ValueError("'timeout' must be a non-negative number")
else:
endtime = time() + timeout
while self._qsize() >= self.maxsize:
remaining = endtime - time()
if remaining <= 0.0:
raise Full
self.not_full.wait(remaining)
self._put(item)
self.unfinished_tasks += 1
self.not_empty.notify()
put_nowait
def put_nowait(
self,
item
)
Put an item into the queue without blocking.
Only enqueue the item if a free slot is immediately available. Otherwise raise the Full exception.
View Source
def put_nowait(self, item):
'''Put an item into the queue without blocking.
Only enqueue the item if a free slot is immediately available.
Otherwise raise the Full exception.
'''
return self.put(item, block=False)
qsize
def qsize(
self
)
Return the approximate size of the queue (not reliable!).
View Source
def qsize(self):
'''Return the approximate size of the queue (not reliable!).'''
with self.mutex:
return self._qsize()
task_done
def task_done(
self
)
Mark the task as done and update the progress bar.
This method should be called when a task is completed. It updates the progress bar to reflect the completion of the task.
View Source
def task_done(self):
"""
Mark the task as done and update the progress bar.
This method should be called when a task is completed. It updates the progress bar to reflect the completion
of the task.
"""
super().task_done()
self.pbar.update()
self.pbar.refresh() # Redraw the progressbar