Skip to content

Module wtracker.utils.threading_utils

View Source
import queue

import threading

import multiprocessing

from typing import Callable

from tqdm.auto import tqdm

def adjust_num_workers(num_tasks: int, chunk_size: int, num_workers: int = None) -> int:

    """

    Adjust the number of workers based on the number of tasks and chunk size.

    Args:

        num_tasks (int): The number of tasks to be processed.

        chunk_size (int): The size of each processing chunk.

        num_workers (int, optional): The number of workers to use for parallel processing.

            If None, the number of workers is determined automatically.

    """

    if num_workers is None:  # if None then choose automatically

        num_workers = min(multiprocessing.cpu_count() / 2, num_tasks / (2 * chunk_size))

        num_workers = round(num_workers)

    use_multiprocessing = num_workers > 0

    num_workers = min(num_workers, num_tasks // chunk_size)  # no point having workers without tasks

    num_workers = min(num_workers, multiprocessing.cpu_count())  # no point having more workers than cpus

    if num_workers < 0:  # make sure value is valid

        num_workers = 0

    if use_multiprocessing:

        num_workers = max(num_workers, 1)

    elif not use_multiprocessing and num_workers == 1:

        num_workers = 0

    return num_workers

class TqdmQueue(queue.Queue):

    """

    A subclass of `queue.Queue` that provides progress tracking using `tqdm`.

    Args:

        maxsize (int): The maximum size of the queue (default: 0).

        **kwargs: Additional keyword arguments to be passed to the tqdm progress bar.

    Attributes:

        pbar (tqdm.tqdm): The progress bar object.

        total (int): The total number of items processed.

    Example:

        queue = ProgressQueue(maxsize=10)

        queue.put(item)

        queue.task_done()

        queue.join()

    """

    def __init__(self, maxsize: int = 0, **kwargs):

        super().__init__(maxsize=maxsize)

        self.pbar = tqdm(total=1, **kwargs)

        self.total = 0  # Keep our own total tracker so we can update the Progressbar

    def task_done(self):

        """

        Mark the task as done and update the progress bar.

        This method should be called when a task is completed. It updates the progress bar to reflect the completion

        of the task.

        """

        super().task_done()

        self.pbar.update()

        self.pbar.refresh()  # Redraw the progressbar

    def _put(self, item):

        super()._put(item)

        self.total += 1

        processed = self.pbar.n  # Get current progress to re-apply

        self.pbar.reset(self.total)  # Reset and update total

        self.pbar.update(processed)  # Re-apply progress

        self.pbar.refresh()  # Redraw the progressbar

    def join(self):

        """

        Blocks until all items in the Queue have been gotten and processed.

        """

        super().join()

        self.pbar.close()

class TaskScheduler:

    """

    This class is used to schedule tasks to be executed by a worker thread.

    Args:

        task_func (Callable): The function to be executed by the worker thread.

        maxsize (int, optional): The maximum number of items that can be in the queue.

        tqdm (bool, optional): Whether to use tqdm for progress tracking.

        **tqdm_kwargs: Additional keyword arguments to be passed to the TqdmQueue constructor.

    """

    def __init__(

        self,

        task_func: Callable,

        maxsize: int = 0,

        tqdm: bool = True,

        **tqdm_kwargs,

    ):

        self._queue = TqdmQueue(maxsize, **tqdm_kwargs) if tqdm else queue.Queue(maxsize)

        self._worker_thread = threading.Thread(target=self._worker, args=(self._queue,))

        self._task_func = task_func

    def start(self):

        """

        Starts the worker thread.

        """

        self._worker_thread.start()

    def __enter__(self):

        self.start()

        return self

    def __exit__(self, exc_type, exc_value, traceback):

        self.close()

    def schedule_save(self, *params):

        """

        Schedules a task by putting task parameters into the queue.

        Args:

            *params: The parameters to be passed to the task function.

        """

        self._queue.put(item=params, block=True)

    def _worker(self, q: queue.Queue):

        while True:

            params = q.get(block=True)

            # exit if signaled

            if params is None:

                break

            self._task_func(params)

            q.task_done()

    def close(self):

        """

        Waits for the queue to empty and then closes the worker thread.

        """

        self._queue.join()

        self._queue.put(None)

        self._worker_thread.join()

Functions

adjust_num_workers

def adjust_num_workers(
    num_tasks: int,
    chunk_size: int,
    num_workers: int = None
) -> int

Adjust the number of workers based on the number of tasks and chunk size.

Parameters:

Name Type Description Default
num_tasks int The number of tasks to be processed. None
chunk_size int The size of each processing chunk. None
num_workers int The number of workers to use for parallel processing.
If None, the number of workers is determined automatically.
None
View Source
def adjust_num_workers(num_tasks: int, chunk_size: int, num_workers: int = None) -> int:

    """

    Adjust the number of workers based on the number of tasks and chunk size.

    Args:

        num_tasks (int): The number of tasks to be processed.

        chunk_size (int): The size of each processing chunk.

        num_workers (int, optional): The number of workers to use for parallel processing.

            If None, the number of workers is determined automatically.

    """

    if num_workers is None:  # if None then choose automatically

        num_workers = min(multiprocessing.cpu_count() / 2, num_tasks / (2 * chunk_size))

        num_workers = round(num_workers)

    use_multiprocessing = num_workers > 0

    num_workers = min(num_workers, num_tasks // chunk_size)  # no point having workers without tasks

    num_workers = min(num_workers, multiprocessing.cpu_count())  # no point having more workers than cpus

    if num_workers < 0:  # make sure value is valid

        num_workers = 0

    if use_multiprocessing:

        num_workers = max(num_workers, 1)

    elif not use_multiprocessing and num_workers == 1:

        num_workers = 0

    return num_workers

Classes

TaskScheduler

class TaskScheduler(
    task_func: Callable,
    maxsize: int = 0,
    tqdm: bool = True,
    **tqdm_kwargs
)

This class is used to schedule tasks to be executed by a worker thread.

Attributes

Name Type Description Default
task_func Callable The function to be executed by the worker thread. None
maxsize int The maximum number of items that can be in the queue. None
tqdm bool Whether to use tqdm for progress tracking. None
**tqdm_kwargs None Additional keyword arguments to be passed to the TqdmQueue constructor. None
View Source
class TaskScheduler:

    """

    This class is used to schedule tasks to be executed by a worker thread.

    Args:

        task_func (Callable): The function to be executed by the worker thread.

        maxsize (int, optional): The maximum number of items that can be in the queue.

        tqdm (bool, optional): Whether to use tqdm for progress tracking.

        **tqdm_kwargs: Additional keyword arguments to be passed to the TqdmQueue constructor.

    """

    def __init__(

        self,

        task_func: Callable,

        maxsize: int = 0,

        tqdm: bool = True,

        **tqdm_kwargs,

    ):

        self._queue = TqdmQueue(maxsize, **tqdm_kwargs) if tqdm else queue.Queue(maxsize)

        self._worker_thread = threading.Thread(target=self._worker, args=(self._queue,))

        self._task_func = task_func

    def start(self):

        """

        Starts the worker thread.

        """

        self._worker_thread.start()

    def __enter__(self):

        self.start()

        return self

    def __exit__(self, exc_type, exc_value, traceback):

        self.close()

    def schedule_save(self, *params):

        """

        Schedules a task by putting task parameters into the queue.

        Args:

            *params: The parameters to be passed to the task function.

        """

        self._queue.put(item=params, block=True)

    def _worker(self, q: queue.Queue):

        while True:

            params = q.get(block=True)

            # exit if signaled

            if params is None:

                break

            self._task_func(params)

            q.task_done()

    def close(self):

        """

        Waits for the queue to empty and then closes the worker thread.

        """

        self._queue.join()

        self._queue.put(None)

        self._worker_thread.join()

Descendants

  • wtracker.utils.io_utils.FrameSaver
  • wtracker.utils.io_utils.ImageSaver

Methods

close

def close(
    self
)

Waits for the queue to empty and then closes the worker thread.

View Source
    def close(self):

        """

        Waits for the queue to empty and then closes the worker thread.

        """

        self._queue.join()

        self._queue.put(None)

        self._worker_thread.join()

schedule_save

def schedule_save(
    self,
    *params
)

Schedules a task by putting task parameters into the queue.

Parameters:

Name Type Description Default
*params None The parameters to be passed to the task function. None
View Source
    def schedule_save(self, *params):

        """

        Schedules a task by putting task parameters into the queue.

        Args:

            *params: The parameters to be passed to the task function.

        """

        self._queue.put(item=params, block=True)

start

def start(
    self
)

Starts the worker thread.

View Source
    def start(self):

        """

        Starts the worker thread.

        """

        self._worker_thread.start()

TqdmQueue

class TqdmQueue(
    maxsize: int = 0,
    **kwargs
)

A subclass of queue.Queue that provides progress tracking using tqdm.

Attributes

Name Type Description Default
maxsize int The maximum size of the queue (default: 0). None
**kwargs None Additional keyword arguments to be passed to the tqdm progress bar. None
pbar tqdm.tqdm The progress bar object. None
total int The total number of items processed. None
View Source
class TqdmQueue(queue.Queue):

    """

    A subclass of `queue.Queue` that provides progress tracking using `tqdm`.

    Args:

        maxsize (int): The maximum size of the queue (default: 0).

        **kwargs: Additional keyword arguments to be passed to the tqdm progress bar.

    Attributes:

        pbar (tqdm.tqdm): The progress bar object.

        total (int): The total number of items processed.

    Example:

        queue = ProgressQueue(maxsize=10)

        queue.put(item)

        queue.task_done()

        queue.join()

    """

    def __init__(self, maxsize: int = 0, **kwargs):

        super().__init__(maxsize=maxsize)

        self.pbar = tqdm(total=1, **kwargs)

        self.total = 0  # Keep our own total tracker so we can update the Progressbar

    def task_done(self):

        """

        Mark the task as done and update the progress bar.

        This method should be called when a task is completed. It updates the progress bar to reflect the completion

        of the task.

        """

        super().task_done()

        self.pbar.update()

        self.pbar.refresh()  # Redraw the progressbar

    def _put(self, item):

        super()._put(item)

        self.total += 1

        processed = self.pbar.n  # Get current progress to re-apply

        self.pbar.reset(self.total)  # Reset and update total

        self.pbar.update(processed)  # Re-apply progress

        self.pbar.refresh()  # Redraw the progressbar

    def join(self):

        """

        Blocks until all items in the Queue have been gotten and processed.

        """

        super().join()

        self.pbar.close()

Ancestors (in MRO)

  • queue.Queue

Methods

empty

def empty(
    self
)

Return True if the queue is empty, False otherwise (not reliable!).

This method is likely to be removed at some point. Use qsize() == 0 as a direct substitute, but be aware that either approach risks a race condition where a queue can grow before the result of empty() or qsize() can be used.

To create code that needs to wait for all queued tasks to be completed, the preferred technique is to use the join() method.

View Source
    def empty(self):

        '''Return True if the queue is empty, False otherwise (not reliable!).

        This method is likely to be removed at some point.  Use qsize() == 0

        as a direct substitute, but be aware that either approach risks a race

        condition where a queue can grow before the result of empty() or

        qsize() can be used.

        To create code that needs to wait for all queued tasks to be

        completed, the preferred technique is to use the join() method.

        '''

        with self.mutex:

            return not self._qsize()

full

def full(
    self
)

Return True if the queue is full, False otherwise (not reliable!).

This method is likely to be removed at some point. Use qsize() >= n as a direct substitute, but be aware that either approach risks a race condition where a queue can shrink before the result of full() or qsize() can be used.

View Source
    def full(self):

        '''Return True if the queue is full, False otherwise (not reliable!).

        This method is likely to be removed at some point.  Use qsize() >= n

        as a direct substitute, but be aware that either approach risks a race

        condition where a queue can shrink before the result of full() or

        qsize() can be used.

        '''

        with self.mutex:

            return 0 < self.maxsize <= self._qsize()

get

def get(
    self,
    block=True,
    timeout=None
)

Remove and return an item from the queue.

If optional args 'block' is true and 'timeout' is None (the default), block if necessary until an item is available. If 'timeout' is a non-negative number, it blocks at most 'timeout' seconds and raises the Empty exception if no item was available within that time. Otherwise ('block' is false), return an item if one is immediately available, else raise the Empty exception ('timeout' is ignored in that case).

View Source
    def get(self, block=True, timeout=None):

        '''Remove and return an item from the queue.

        If optional args 'block' is true and 'timeout' is None (the default),

        block if necessary until an item is available. If 'timeout' is

        a non-negative number, it blocks at most 'timeout' seconds and raises

        the Empty exception if no item was available within that time.

        Otherwise ('block' is false), return an item if one is immediately

        available, else raise the Empty exception ('timeout' is ignored

        in that case).

        '''

        with self.not_empty:

            if not block:

                if not self._qsize():

                    raise Empty

            elif timeout is None:

                while not self._qsize():

                    self.not_empty.wait()

            elif timeout < 0:

                raise ValueError("'timeout' must be a non-negative number")

            else:

                endtime = time() + timeout

                while not self._qsize():

                    remaining = endtime - time()

                    if remaining <= 0.0:

                        raise Empty

                    self.not_empty.wait(remaining)

            item = self._get()

            self.not_full.notify()

            return item

get_nowait

def get_nowait(
    self
)

Remove and return an item from the queue without blocking.

Only get an item if one is immediately available. Otherwise raise the Empty exception.

View Source
    def get_nowait(self):

        '''Remove and return an item from the queue without blocking.

        Only get an item if one is immediately available. Otherwise

        raise the Empty exception.

        '''

        return self.get(block=False)

join

def join(
    self
)

Blocks until all items in the Queue have been gotten and processed.

View Source
    def join(self):

        """

        Blocks until all items in the Queue have been gotten and processed.

        """

        super().join()

        self.pbar.close()

put

def put(
    self,
    item,
    block=True,
    timeout=None
)

Put an item into the queue.

If optional args 'block' is true and 'timeout' is None (the default), block if necessary until a free slot is available. If 'timeout' is a non-negative number, it blocks at most 'timeout' seconds and raises the Full exception if no free slot was available within that time. Otherwise ('block' is false), put an item on the queue if a free slot is immediately available, else raise the Full exception ('timeout' is ignored in that case).

View Source
    def put(self, item, block=True, timeout=None):

        '''Put an item into the queue.

        If optional args 'block' is true and 'timeout' is None (the default),

        block if necessary until a free slot is available. If 'timeout' is

        a non-negative number, it blocks at most 'timeout' seconds and raises

        the Full exception if no free slot was available within that time.

        Otherwise ('block' is false), put an item on the queue if a free slot

        is immediately available, else raise the Full exception ('timeout'

        is ignored in that case).

        '''

        with self.not_full:

            if self.maxsize > 0:

                if not block:

                    if self._qsize() >= self.maxsize:

                        raise Full

                elif timeout is None:

                    while self._qsize() >= self.maxsize:

                        self.not_full.wait()

                elif timeout < 0:

                    raise ValueError("'timeout' must be a non-negative number")

                else:

                    endtime = time() + timeout

                    while self._qsize() >= self.maxsize:

                        remaining = endtime - time()

                        if remaining <= 0.0:

                            raise Full

                        self.not_full.wait(remaining)

            self._put(item)

            self.unfinished_tasks += 1

            self.not_empty.notify()

put_nowait

def put_nowait(
    self,
    item
)

Put an item into the queue without blocking.

Only enqueue the item if a free slot is immediately available. Otherwise raise the Full exception.

View Source
    def put_nowait(self, item):

        '''Put an item into the queue without blocking.

        Only enqueue the item if a free slot is immediately available.

        Otherwise raise the Full exception.

        '''

        return self.put(item, block=False)

qsize

def qsize(
    self
)

Return the approximate size of the queue (not reliable!).

View Source
    def qsize(self):

        '''Return the approximate size of the queue (not reliable!).'''

        with self.mutex:

            return self._qsize()

task_done

def task_done(
    self
)

Mark the task as done and update the progress bar.

This method should be called when a task is completed. It updates the progress bar to reflect the completion of the task.

View Source
    def task_done(self):

        """

        Mark the task as done and update the progress bar.

        This method should be called when a task is completed. It updates the progress bar to reflect the completion

        of the task.

        """

        super().task_done()

        self.pbar.update()

        self.pbar.refresh()  # Redraw the progressbar