schematic.utils.general

General utils

  1"""General utils"""
  2
  3# pylint: disable=logging-fstring-interpolation
  4
  5import logging
  6import os
  7import pstats
  8from pathlib import Path
  9import tempfile
 10from cProfile import Profile
 11from datetime import datetime, timedelta
 12from functools import wraps
 13from typing import Any, Callable, Optional, Sequence, TypeVar, Union
 14
 15from synapseclient import Synapse  # type: ignore
 16from synapseclient.core import cache  # type: ignore
 17from synapseclient.core.exceptions import SynapseHTTPError  # type: ignore
 18from synapseclient.entity import File, Folder, Project  # type: ignore
 19from synapseclient.table import EntityViewSchema  # type: ignore
 20
 21from schematic.store.synapse_tracker import SynapseEntityTracker
 22
 23logger = logging.getLogger(__name__)
 24
 25T = TypeVar("T")
 26
 27SYN_ID_REGEX = r"(syn\d+\,?)+"
 28LIKE_PATTERN_SPECIAL_CHARS = ["%", "_"]
 29
 30
 31def find_duplicates(_list: list[T]) -> set[T]:
 32    """Find duplicate items in a list"""
 33    return {x for x in _list if _list.count(x) > 1}
 34
 35
 36def dict2list(item: Any) -> Optional[Union[dict, list]]:
 37    """Puts a dictionary into a list
 38
 39    Args:
 40        item (Any): Any type of input
 41
 42    Returns:
 43        Optional[Union[dict, list]]:
 44          If input is a list, return it
 45          If input is a dict, return it in a list
 46          Return None for anything else
 47    """
 48    if isinstance(item, list):
 49        return item
 50    if isinstance(item, dict):
 51        return [item]
 52    return None
 53
 54
 55def str2list(item: Any) -> Optional[list]:
 56    """Puts a string into a list
 57
 58    Args:
 59        item (Any): Any type of input
 60
 61    Returns:
 62        Optional[list]:
 63          If input is a list, return it
 64          If input is a string, return it in a list
 65          Return None for anything else
 66    """
 67    if isinstance(item, str):
 68        return [item]
 69    if isinstance(item, list):
 70        return item
 71    return None
 72
 73
 74X = TypeVar("X")
 75
 76
 77def unlist(seq: Sequence[X]) -> Union[Sequence[X], X]:
 78    """Returns the first item of a sequence
 79
 80    Args:
 81        seq (Sequence[X]): A Sequence of any type
 82
 83    Returns:
 84        Union[Sequence[X], X]:
 85          if sequence is length one, return the first item
 86          otherwise return the sequence
 87    """
 88    if len(seq) == 1:
 89        return seq[0]
 90    return seq
 91
 92
 93def get_dir_size(path: str) -> int:
 94    """
 95    Recursively descend the directory tree rooted at the top and call
 96      .st_size function to calculate size of files in bytes.
 97    Args:
 98        path: path to a folder
 99    return: total size of all the files in a given directory in bytes.
100    """
101    total = 0
102    # Recursively scan directory to find entries
103    with os.scandir(path) as itr:
104        for entry in itr:
105            if entry.is_file():
106                total += entry.stat().st_size
107            elif entry.is_dir():
108                total += get_dir_size(entry.path)
109    return total
110
111
112def calculate_datetime(
113    minutes: int, input_date: datetime, before_or_after: str = "before"
114) -> datetime:
115    """calculate date time
116
117    Args:
118        input_date (datetime): date time object provided by users
119        minutes (int): number of minutes
120        before_or_after (str): default to "before". if "before", calculate x minutes before
121         current date time. if "after", calculate x minutes after current date time.
122
123    Returns:
124        datetime:  return result of date time calculation
125    """
126    if before_or_after == "before":
127        date_time_result = input_date - timedelta(minutes=minutes)
128    elif before_or_after == "after":
129        date_time_result = input_date + timedelta(minutes=minutes)
130    else:
131        raise ValueError("Invalid value. Use either 'before' or 'after'.")
132    return date_time_result
133
134
135def check_synapse_cache_size(directory: str = "/root/.synapseCache") -> float:
136    """Calculate size of .synapseCache directory in bytes using pathlib.
137
138    Args:
139        directory (str, optional): .synapseCache directory. Defaults to '/root/.synapseCache'
140
141    Returns:
142        float: size of .synapsecache directory in bytes
143    """
144    total_size = sum(
145        f.stat().st_size for f in Path(directory).rglob("*") if f.is_file()
146    )
147    return total_size
148
149
150def clear_synapse_cache(synapse_cache: cache.Cache, minutes: int) -> int:
151    """clear synapse cache before a certain time
152
153    Args:
154        synapse_cache: an object of synapseclient Cache.
155        minutes (int): all files before this minute will be removed
156    Returns:
157        int: number of files that get deleted
158    """
159    current_date = datetime.utcnow()
160    minutes_earlier = calculate_datetime(
161        input_date=current_date, minutes=minutes, before_or_after="before"
162    )
163    num_of_deleted_files = synapse_cache.purge(before_date=minutes_earlier)
164    return num_of_deleted_files
165
166
167def entity_type_mapping(
168    syn: Synapse,
169    entity_id: str,
170    synapse_entity_tracker: Optional[SynapseEntityTracker] = None,
171) -> str:
172    """Return the entity type of manifest
173
174    Args:
175        syn (Synapse): Synapse object
176        entity_id (str): id of an entity
177        synapse_entity_tracker: Tracker for a pull-through cache of Synapse entities
178
179    Raises:
180        SynapseHTTPError: Re-raised SynapseHTTPError
181
182    Returns:
183        str: type of the manifest being returned
184    """
185    # check the type of entity
186    try:
187        if not synapse_entity_tracker:
188            synapse_entity_tracker = SynapseEntityTracker()
189        entity = synapse_entity_tracker.get(
190            synapse_id=entity_id, syn=syn, download_file=False
191        )
192    except SynapseHTTPError as exc:
193        logger.error(
194            f"cannot get {entity_id} from asset store. Please make sure that {entity_id} exists"
195        )
196        raise SynapseHTTPError(
197            f"cannot get {entity_id} from asset store. Please make sure that {entity_id} exists"
198        ) from exc
199
200    if isinstance(entity, EntityViewSchema):
201        entity_type = "asset view"
202    elif isinstance(entity, Folder):
203        entity_type = "folder"
204    elif isinstance(entity, File):
205        entity_type = "file"
206    elif isinstance(entity, Project):
207        entity_type = "project"
208    else:
209        assert entity is not None
210        # if there's no matching type, return concreteType
211        entity_type = entity.concreteType
212    return entity_type
213
214
215def create_temp_folder(path: str, prefix: Optional[str] = None) -> str:
216    """This function creates a temporary directory in the specified directory
217    Args:
218        path(str): a directory path where all the temporary files will live
219        prefix(str): a prefix to be added to the temporary directory name
220    Returns: returns the absolute pathname of the new directory.
221    """
222    if not os.path.exists(path):
223        os.makedirs(path, exist_ok=True)
224
225    # Create a temporary directory in the specified directory
226    path = tempfile.mkdtemp(dir=path, prefix=prefix)
227    return path
228
229
230def profile(
231    output_file: Optional[str] = None,
232    sort_by: Any = "cumulative",
233    lines_to_print: Optional[int] = None,
234    strip_dirs: bool = False,
235) -> Callable:
236    """
237    The function was initially taken from:
238    https://towardsdatascience.com/how-to-profile-your-code-in-python-e70c834fad89
239    A time profiler decorator.
240    Inspired by and modified the profile decorator of Giampaolo Rodola:
241    http://code.activestate.com/recipes/577817-profile-decorator/
242
243    Args:
244        output_file (Optional[str], optional):
245            Path of the output file. If only name of the file is given, it's
246            saved in the current directory.
247            If it's None, the name of the decorated function is used.
248            Defaults to None.
249        sort_by (str, optional):
250            str or SortKey enum or tuple/list of str/SortKey enum
251            Sorting criteria for the Stats object.
252            For a list of valid string and SortKey refer to:
253            https://docs.python.org/3/library/profile.html#pstats.Stats.sort_stats
254            Defaults to "cumulative".
255        lines_to_print (Optional[int], optional):
256            Number of lines to print.
257            This is useful in reducing the size of the printout, especially
258            that sorting by 'cumulative', the time consuming operations
259            are printed toward the top of the file.
260            Default (None) is for all the lines.
261        strip_dirs (bool, optional):
262            Whether to remove the leading path info from file names.
263            This is also useful in reducing the size of the printout
264            Defaults to False.
265
266    Returns:
267        Callable: Profile of the decorated function
268    """
269
270    def inner(func: Callable) -> Callable:
271        @wraps(func)
272        def wrapper(*args: Any, **kwargs: Any) -> Callable:
273            _output_file = output_file or func.__name__ + ".prof"
274            profiler = Profile()
275            profiler.enable()
276            retval = func(*args, **kwargs)
277            profiler.disable()
278            profiler.dump_stats(_output_file)
279
280            # if we are running the functions on AWS:
281            if "SECRETS_MANAGER_SECRETS" in os.environ:
282                p_stats = pstats.Stats(profiler)
283                # limit this to 30 line for now otherwise it will be too long for AWS log
284                p_stats.sort_stats("cumulative").print_stats(30)
285            else:
286                with open(_output_file, "w", encoding="utf-8") as fle:
287                    p_stats = pstats.Stats(profiler, stream=fle)
288                    if strip_dirs:
289                        p_stats.strip_dirs()
290                    if isinstance(sort_by, (tuple, list)):
291                        p_stats.sort_stats(*sort_by)
292                    else:
293                        p_stats.sort_stats(sort_by)
294                    p_stats.print_stats(lines_to_print)  # type: ignore
295            return retval
296
297        return wrapper
298
299    return inner
300
301
302def normalize_path(path: str, parent_folder: str) -> str:
303    """
304    Normalizes a path.
305    If the path is relative, the parent_folder is added to make it an absolute path.
306
307    Args:
308        path (str): The path to the file to normalize.
309        parent_folder (str): The folder the file is in.
310
311    Returns:
312        str: The normalized path.
313    """
314    if not os.path.isabs(path):
315        path = os.path.join(parent_folder, path)
316    return os.path.normpath(path)
317
318
319def create_like_statement(synapse_path: str) -> str:
320    """
321    Creates a sql like statement for a Synapse table query
322    See:
323      https://rest-docs.synapse.org/rest/org/sagebionetworks/repo/web/controller/TableExamples.html
324
325    The statement is used to find all files in the folder, should be something like:
326      path like '<synapse_path>/%'
327
328    Certain special characters can also be used in like statements and these need to escaped with
329      a special character of the users choice. This function will use the '|' character like the
330      documentation shows. These need to have the "escape '|'" string added at the end, and
331      it will look like:
332
333      path like '<synapse_path>/%' escape '|'
334
335
336    Args:
337        synapse_path (str): The input synapse path to be made into a like statement
338
339    Raises:
340        ValueError: If the input path contains a '|' character
341
342    Returns:
343        str: A SQL like statement
344    """
345    if "|" in synapse_path:
346        raise ValueError("Pattern can not contain '|' character.")
347    like_pattern = escape_synapse_path(synapse_path)
348    # Adding the % wildcard makes this find any file in the input path
349    like_pattern = f"'{like_pattern}/%'"
350    statement = f"path like {like_pattern}"
351    # If there are any like special characters, the escape char needs to be indicated
352    if any((char in synapse_path for char in LIKE_PATTERN_SPECIAL_CHARS)):
353        statement = f"{statement} escape '|'"
354    return statement
355
356
357def escape_synapse_path(synapse_path: str) -> str:
358    """
359    Escapes certain characters in a synapse_path for a Synapse Table Query like statement
360    See:
361      https://rest-docs.synapse.org/rest/org/sagebionetworks/repo/web/controller/TableExamples.html
362
363    Like patterns in appear in select statements such as:
364      select * from syn123 where foo like 'bar%'
365    The like pattern is in single quotes
366    Single quotes must be escaped by using 2x single quotes:
367      select * from syn123 where foo like 'Children''s Hospital'
368
369    Certain special characters can also be used in like statements and these need to escaped with
370      a special character of the users choice. This function will use the '|' character like the
371      documentation shows.
372
373
374    Args:
375        synapse_path (str): The synapse_path that needs to be escaped
376
377    Returns:
378        str: The like pattern with problematic characters escaped
379    """
380    pattern = synapse_path.replace("'", "''")
381    for char in LIKE_PATTERN_SPECIAL_CHARS:
382        pattern = pattern.replace(char, f"|{char}")
383    return pattern
logger = <Logger schematic.utils.general (WARNING)>
SYN_ID_REGEX = '(syn\\d+\\,?)+'
LIKE_PATTERN_SPECIAL_CHARS = ['%', '_']
def find_duplicates(_list: list[~T]) -> set[~T]:
32def find_duplicates(_list: list[T]) -> set[T]:
33    """Find duplicate items in a list"""
34    return {x for x in _list if _list.count(x) > 1}

Find duplicate items in a list

def dict2list(item: Any) -> Union[dict, list, NoneType]:
37def dict2list(item: Any) -> Optional[Union[dict, list]]:
38    """Puts a dictionary into a list
39
40    Args:
41        item (Any): Any type of input
42
43    Returns:
44        Optional[Union[dict, list]]:
45          If input is a list, return it
46          If input is a dict, return it in a list
47          Return None for anything else
48    """
49    if isinstance(item, list):
50        return item
51    if isinstance(item, dict):
52        return [item]
53    return None

Puts a dictionary into a list

Arguments:
  • item (Any): Any type of input
Returns:

Optional[Union[dict, list]]: If input is a list, return it If input is a dict, return it in a list Return None for anything else

def str2list(item: Any) -> Optional[list]:
56def str2list(item: Any) -> Optional[list]:
57    """Puts a string into a list
58
59    Args:
60        item (Any): Any type of input
61
62    Returns:
63        Optional[list]:
64          If input is a list, return it
65          If input is a string, return it in a list
66          Return None for anything else
67    """
68    if isinstance(item, str):
69        return [item]
70    if isinstance(item, list):
71        return item
72    return None

Puts a string into a list

Arguments:
  • item (Any): Any type of input
Returns:

Optional[list]: If input is a list, return it If input is a string, return it in a list Return None for anything else

def unlist(seq: Sequence[~X]) -> Union[Sequence[~X], ~X]:
78def unlist(seq: Sequence[X]) -> Union[Sequence[X], X]:
79    """Returns the first item of a sequence
80
81    Args:
82        seq (Sequence[X]): A Sequence of any type
83
84    Returns:
85        Union[Sequence[X], X]:
86          if sequence is length one, return the first item
87          otherwise return the sequence
88    """
89    if len(seq) == 1:
90        return seq[0]
91    return seq

Returns the first item of a sequence

Arguments:
  • seq (Sequence[X]): A Sequence of any type
Returns:

Union[Sequence[X], X]: if sequence is length one, return the first item otherwise return the sequence

def get_dir_size(path: str) -> int:
 94def get_dir_size(path: str) -> int:
 95    """
 96    Recursively descend the directory tree rooted at the top and call
 97      .st_size function to calculate size of files in bytes.
 98    Args:
 99        path: path to a folder
100    return: total size of all the files in a given directory in bytes.
101    """
102    total = 0
103    # Recursively scan directory to find entries
104    with os.scandir(path) as itr:
105        for entry in itr:
106            if entry.is_file():
107                total += entry.stat().st_size
108            elif entry.is_dir():
109                total += get_dir_size(entry.path)
110    return total

Recursively descend the directory tree rooted at the top and call .st_size function to calculate size of files in bytes.

Arguments:
  • path: path to a folder

return: total size of all the files in a given directory in bytes.

def calculate_datetime( minutes: int, input_date: datetime.datetime, before_or_after: str = 'before') -> datetime.datetime:
113def calculate_datetime(
114    minutes: int, input_date: datetime, before_or_after: str = "before"
115) -> datetime:
116    """calculate date time
117
118    Args:
119        input_date (datetime): date time object provided by users
120        minutes (int): number of minutes
121        before_or_after (str): default to "before". if "before", calculate x minutes before
122         current date time. if "after", calculate x minutes after current date time.
123
124    Returns:
125        datetime:  return result of date time calculation
126    """
127    if before_or_after == "before":
128        date_time_result = input_date - timedelta(minutes=minutes)
129    elif before_or_after == "after":
130        date_time_result = input_date + timedelta(minutes=minutes)
131    else:
132        raise ValueError("Invalid value. Use either 'before' or 'after'.")
133    return date_time_result

calculate date time

Arguments:
  • input_date (datetime): date time object provided by users
  • minutes (int): number of minutes
  • before_or_after (str): default to "before". if "before", calculate x minutes before current date time. if "after", calculate x minutes after current date time.
Returns:

datetime: return result of date time calculation

def check_synapse_cache_size(directory: str = '/root/.synapseCache') -> float:
136def check_synapse_cache_size(directory: str = "/root/.synapseCache") -> float:
137    """Calculate size of .synapseCache directory in bytes using pathlib.
138
139    Args:
140        directory (str, optional): .synapseCache directory. Defaults to '/root/.synapseCache'
141
142    Returns:
143        float: size of .synapsecache directory in bytes
144    """
145    total_size = sum(
146        f.stat().st_size for f in Path(directory).rglob("*") if f.is_file()
147    )
148    return total_size

Calculate size of .synapseCache directory in bytes using pathlib.

Arguments:
  • directory (str, optional): .synapseCache directory. Defaults to '/root/.synapseCache'
Returns:

float: size of .synapsecache directory in bytes

def clear_synapse_cache(synapse_cache: synapseclient.core.cache.Cache, minutes: int) -> int:
151def clear_synapse_cache(synapse_cache: cache.Cache, minutes: int) -> int:
152    """clear synapse cache before a certain time
153
154    Args:
155        synapse_cache: an object of synapseclient Cache.
156        minutes (int): all files before this minute will be removed
157    Returns:
158        int: number of files that get deleted
159    """
160    current_date = datetime.utcnow()
161    minutes_earlier = calculate_datetime(
162        input_date=current_date, minutes=minutes, before_or_after="before"
163    )
164    num_of_deleted_files = synapse_cache.purge(before_date=minutes_earlier)
165    return num_of_deleted_files

clear synapse cache before a certain time

Arguments:
  • synapse_cache: an object of synapseclient Cache.
  • minutes (int): all files before this minute will be removed
Returns:

int: number of files that get deleted

def entity_type_mapping( syn: synapseclient.client.Synapse, entity_id: str, synapse_entity_tracker: Optional[schematic.store.synapse_tracker.SynapseEntityTracker] = None) -> str:
168def entity_type_mapping(
169    syn: Synapse,
170    entity_id: str,
171    synapse_entity_tracker: Optional[SynapseEntityTracker] = None,
172) -> str:
173    """Return the entity type of manifest
174
175    Args:
176        syn (Synapse): Synapse object
177        entity_id (str): id of an entity
178        synapse_entity_tracker: Tracker for a pull-through cache of Synapse entities
179
180    Raises:
181        SynapseHTTPError: Re-raised SynapseHTTPError
182
183    Returns:
184        str: type of the manifest being returned
185    """
186    # check the type of entity
187    try:
188        if not synapse_entity_tracker:
189            synapse_entity_tracker = SynapseEntityTracker()
190        entity = synapse_entity_tracker.get(
191            synapse_id=entity_id, syn=syn, download_file=False
192        )
193    except SynapseHTTPError as exc:
194        logger.error(
195            f"cannot get {entity_id} from asset store. Please make sure that {entity_id} exists"
196        )
197        raise SynapseHTTPError(
198            f"cannot get {entity_id} from asset store. Please make sure that {entity_id} exists"
199        ) from exc
200
201    if isinstance(entity, EntityViewSchema):
202        entity_type = "asset view"
203    elif isinstance(entity, Folder):
204        entity_type = "folder"
205    elif isinstance(entity, File):
206        entity_type = "file"
207    elif isinstance(entity, Project):
208        entity_type = "project"
209    else:
210        assert entity is not None
211        # if there's no matching type, return concreteType
212        entity_type = entity.concreteType
213    return entity_type

Return the entity type of manifest

Arguments:
  • syn (Synapse): Synapse object
  • entity_id (str): id of an entity
  • synapse_entity_tracker: Tracker for a pull-through cache of Synapse entities
Raises:
  • SynapseHTTPError: Re-raised SynapseHTTPError
Returns:

str: type of the manifest being returned

def create_temp_folder(path: str, prefix: Optional[str] = None) -> str:
216def create_temp_folder(path: str, prefix: Optional[str] = None) -> str:
217    """This function creates a temporary directory in the specified directory
218    Args:
219        path(str): a directory path where all the temporary files will live
220        prefix(str): a prefix to be added to the temporary directory name
221    Returns: returns the absolute pathname of the new directory.
222    """
223    if not os.path.exists(path):
224        os.makedirs(path, exist_ok=True)
225
226    # Create a temporary directory in the specified directory
227    path = tempfile.mkdtemp(dir=path, prefix=prefix)
228    return path

This function creates a temporary directory in the specified directory

Arguments:
  • path(str): a directory path where all the temporary files will live
  • prefix(str): a prefix to be added to the temporary directory name

Returns: returns the absolute pathname of the new directory.

def profile( output_file: Optional[str] = None, sort_by: Any = 'cumulative', lines_to_print: Optional[int] = None, strip_dirs: bool = False) -> Callable:
231def profile(
232    output_file: Optional[str] = None,
233    sort_by: Any = "cumulative",
234    lines_to_print: Optional[int] = None,
235    strip_dirs: bool = False,
236) -> Callable:
237    """
238    The function was initially taken from:
239    https://towardsdatascience.com/how-to-profile-your-code-in-python-e70c834fad89
240    A time profiler decorator.
241    Inspired by and modified the profile decorator of Giampaolo Rodola:
242    http://code.activestate.com/recipes/577817-profile-decorator/
243
244    Args:
245        output_file (Optional[str], optional):
246            Path of the output file. If only name of the file is given, it's
247            saved in the current directory.
248            If it's None, the name of the decorated function is used.
249            Defaults to None.
250        sort_by (str, optional):
251            str or SortKey enum or tuple/list of str/SortKey enum
252            Sorting criteria for the Stats object.
253            For a list of valid string and SortKey refer to:
254            https://docs.python.org/3/library/profile.html#pstats.Stats.sort_stats
255            Defaults to "cumulative".
256        lines_to_print (Optional[int], optional):
257            Number of lines to print.
258            This is useful in reducing the size of the printout, especially
259            that sorting by 'cumulative', the time consuming operations
260            are printed toward the top of the file.
261            Default (None) is for all the lines.
262        strip_dirs (bool, optional):
263            Whether to remove the leading path info from file names.
264            This is also useful in reducing the size of the printout
265            Defaults to False.
266
267    Returns:
268        Callable: Profile of the decorated function
269    """
270
271    def inner(func: Callable) -> Callable:
272        @wraps(func)
273        def wrapper(*args: Any, **kwargs: Any) -> Callable:
274            _output_file = output_file or func.__name__ + ".prof"
275            profiler = Profile()
276            profiler.enable()
277            retval = func(*args, **kwargs)
278            profiler.disable()
279            profiler.dump_stats(_output_file)
280
281            # if we are running the functions on AWS:
282            if "SECRETS_MANAGER_SECRETS" in os.environ:
283                p_stats = pstats.Stats(profiler)
284                # limit this to 30 line for now otherwise it will be too long for AWS log
285                p_stats.sort_stats("cumulative").print_stats(30)
286            else:
287                with open(_output_file, "w", encoding="utf-8") as fle:
288                    p_stats = pstats.Stats(profiler, stream=fle)
289                    if strip_dirs:
290                        p_stats.strip_dirs()
291                    if isinstance(sort_by, (tuple, list)):
292                        p_stats.sort_stats(*sort_by)
293                    else:
294                        p_stats.sort_stats(sort_by)
295                    p_stats.print_stats(lines_to_print)  # type: ignore
296            return retval
297
298        return wrapper
299
300    return inner

The function was initially taken from: https://towardsdatascience.com/how-to-profile-your-code-in-python-e70c834fad89 A time profiler decorator. Inspired by and modified the profile decorator of Giampaolo Rodola: http://code.activestate.com/recipes/577817-profile-decorator/

Arguments:
  • output_file (Optional[str], optional): Path of the output file. If only name of the file is given, it's saved in the current directory. If it's None, the name of the decorated function is used. Defaults to None.
  • sort_by (str, optional): str or SortKey enum or tuple/list of str/SortKey enum Sorting criteria for the Stats object. For a list of valid string and SortKey refer to: https://docs.python.org/3/library/profile.html#pstats.Stats.sort_stats Defaults to "cumulative".
  • lines_to_print (Optional[int], optional): Number of lines to print. This is useful in reducing the size of the printout, especially that sorting by 'cumulative', the time consuming operations are printed toward the top of the file. Default (None) is for all the lines.
  • strip_dirs (bool, optional): Whether to remove the leading path info from file names. This is also useful in reducing the size of the printout Defaults to False.
Returns:

Callable: Profile of the decorated function

def normalize_path(path: str, parent_folder: str) -> str:
303def normalize_path(path: str, parent_folder: str) -> str:
304    """
305    Normalizes a path.
306    If the path is relative, the parent_folder is added to make it an absolute path.
307
308    Args:
309        path (str): The path to the file to normalize.
310        parent_folder (str): The folder the file is in.
311
312    Returns:
313        str: The normalized path.
314    """
315    if not os.path.isabs(path):
316        path = os.path.join(parent_folder, path)
317    return os.path.normpath(path)

Normalizes a path. If the path is relative, the parent_folder is added to make it an absolute path.

Arguments:
  • path (str): The path to the file to normalize.
  • parent_folder (str): The folder the file is in.
Returns:

str: The normalized path.

def create_like_statement(synapse_path: str) -> str:
320def create_like_statement(synapse_path: str) -> str:
321    """
322    Creates a sql like statement for a Synapse table query
323    See:
324      https://rest-docs.synapse.org/rest/org/sagebionetworks/repo/web/controller/TableExamples.html
325
326    The statement is used to find all files in the folder, should be something like:
327      path like '<synapse_path>/%'
328
329    Certain special characters can also be used in like statements and these need to escaped with
330      a special character of the users choice. This function will use the '|' character like the
331      documentation shows. These need to have the "escape '|'" string added at the end, and
332      it will look like:
333
334      path like '<synapse_path>/%' escape '|'
335
336
337    Args:
338        synapse_path (str): The input synapse path to be made into a like statement
339
340    Raises:
341        ValueError: If the input path contains a '|' character
342
343    Returns:
344        str: A SQL like statement
345    """
346    if "|" in synapse_path:
347        raise ValueError("Pattern can not contain '|' character.")
348    like_pattern = escape_synapse_path(synapse_path)
349    # Adding the % wildcard makes this find any file in the input path
350    like_pattern = f"'{like_pattern}/%'"
351    statement = f"path like {like_pattern}"
352    # If there are any like special characters, the escape char needs to be indicated
353    if any((char in synapse_path for char in LIKE_PATTERN_SPECIAL_CHARS)):
354        statement = f"{statement} escape '|'"
355    return statement

Creates a sql like statement for a Synapse table query

See:

https://rest-docs.synapse.org/rest/org/sagebionetworks/repo/web/controller/TableExamples.html

The statement is used to find all files in the folder, should be something like: path like '/%'

Certain special characters can also be used in like statements and these need to escaped with a special character of the users choice. This function will use the '|' character like the documentation shows. These need to have the "escape '|'" string added at the end, and it will look like:

path like '/%' escape '|'

Arguments:
  • synapse_path (str): The input synapse path to be made into a like statement
Raises:
  • ValueError: If the input path contains a '|' character
Returns:

str: A SQL like statement

def escape_synapse_path(synapse_path: str) -> str:
358def escape_synapse_path(synapse_path: str) -> str:
359    """
360    Escapes certain characters in a synapse_path for a Synapse Table Query like statement
361    See:
362      https://rest-docs.synapse.org/rest/org/sagebionetworks/repo/web/controller/TableExamples.html
363
364    Like patterns in appear in select statements such as:
365      select * from syn123 where foo like 'bar%'
366    The like pattern is in single quotes
367    Single quotes must be escaped by using 2x single quotes:
368      select * from syn123 where foo like 'Children''s Hospital'
369
370    Certain special characters can also be used in like statements and these need to escaped with
371      a special character of the users choice. This function will use the '|' character like the
372      documentation shows.
373
374
375    Args:
376        synapse_path (str): The synapse_path that needs to be escaped
377
378    Returns:
379        str: The like pattern with problematic characters escaped
380    """
381    pattern = synapse_path.replace("'", "''")
382    for char in LIKE_PATTERN_SPECIAL_CHARS:
383        pattern = pattern.replace(char, f"|{char}")
384    return pattern

Escapes certain characters in a synapse_path for a Synapse Table Query like statement

See:

https://rest-docs.synapse.org/rest/org/sagebionetworks/repo/web/controller/TableExamples.html

Like patterns in appear in select statements such as:

select * from syn123 where foo like 'bar%'

The like pattern is in single quotes Single quotes must be escaped by using 2x single quotes: select * from syn123 where foo like 'Children''s Hospital'

Certain special characters can also be used in like statements and these need to escaped with a special character of the users choice. This function will use the '|' character like the documentation shows.

Arguments:
  • synapse_path (str): The synapse_path that needs to be escaped
Returns:

str: The like pattern with problematic characters escaped