schematic.utils.general
General utils
1"""General utils""" 2 3# pylint: disable=logging-fstring-interpolation 4 5import logging 6import os 7import pstats 8from pathlib import Path 9import tempfile 10from cProfile import Profile 11from datetime import datetime, timedelta 12from functools import wraps 13from typing import Any, Callable, Optional, Sequence, TypeVar, Union 14 15from synapseclient import Synapse # type: ignore 16from synapseclient.core import cache # type: ignore 17from synapseclient.core.exceptions import SynapseHTTPError # type: ignore 18from synapseclient.entity import File, Folder, Project # type: ignore 19from synapseclient.table import EntityViewSchema # type: ignore 20 21from schematic.store.synapse_tracker import SynapseEntityTracker 22 23logger = logging.getLogger(__name__) 24 25T = TypeVar("T") 26 27SYN_ID_REGEX = r"(syn\d+\,?)+" 28LIKE_PATTERN_SPECIAL_CHARS = ["%", "_"] 29 30 31def find_duplicates(_list: list[T]) -> set[T]: 32 """Find duplicate items in a list""" 33 return {x for x in _list if _list.count(x) > 1} 34 35 36def dict2list(item: Any) -> Optional[Union[dict, list]]: 37 """Puts a dictionary into a list 38 39 Args: 40 item (Any): Any type of input 41 42 Returns: 43 Optional[Union[dict, list]]: 44 If input is a list, return it 45 If input is a dict, return it in a list 46 Return None for anything else 47 """ 48 if isinstance(item, list): 49 return item 50 if isinstance(item, dict): 51 return [item] 52 return None 53 54 55def str2list(item: Any) -> Optional[list]: 56 """Puts a string into a list 57 58 Args: 59 item (Any): Any type of input 60 61 Returns: 62 Optional[list]: 63 If input is a list, return it 64 If input is a string, return it in a list 65 Return None for anything else 66 """ 67 if isinstance(item, str): 68 return [item] 69 if isinstance(item, list): 70 return item 71 return None 72 73 74X = TypeVar("X") 75 76 77def unlist(seq: Sequence[X]) -> Union[Sequence[X], X]: 78 """Returns the first item of a sequence 79 80 Args: 81 seq (Sequence[X]): A Sequence of any type 82 83 Returns: 84 Union[Sequence[X], X]: 85 if sequence is length one, return the first item 86 otherwise return the sequence 87 """ 88 if len(seq) == 1: 89 return seq[0] 90 return seq 91 92 93def get_dir_size(path: str) -> int: 94 """ 95 Recursively descend the directory tree rooted at the top and call 96 .st_size function to calculate size of files in bytes. 97 Args: 98 path: path to a folder 99 return: total size of all the files in a given directory in bytes. 100 """ 101 total = 0 102 # Recursively scan directory to find entries 103 with os.scandir(path) as itr: 104 for entry in itr: 105 if entry.is_file(): 106 total += entry.stat().st_size 107 elif entry.is_dir(): 108 total += get_dir_size(entry.path) 109 return total 110 111 112def calculate_datetime( 113 minutes: int, input_date: datetime, before_or_after: str = "before" 114) -> datetime: 115 """calculate date time 116 117 Args: 118 input_date (datetime): date time object provided by users 119 minutes (int): number of minutes 120 before_or_after (str): default to "before". if "before", calculate x minutes before 121 current date time. if "after", calculate x minutes after current date time. 122 123 Returns: 124 datetime: return result of date time calculation 125 """ 126 if before_or_after == "before": 127 date_time_result = input_date - timedelta(minutes=minutes) 128 elif before_or_after == "after": 129 date_time_result = input_date + timedelta(minutes=minutes) 130 else: 131 raise ValueError("Invalid value. Use either 'before' or 'after'.") 132 return date_time_result 133 134 135def check_synapse_cache_size(directory: str = "/root/.synapseCache") -> float: 136 """Calculate size of .synapseCache directory in bytes using pathlib. 137 138 Args: 139 directory (str, optional): .synapseCache directory. Defaults to '/root/.synapseCache' 140 141 Returns: 142 float: size of .synapsecache directory in bytes 143 """ 144 total_size = sum( 145 f.stat().st_size for f in Path(directory).rglob("*") if f.is_file() 146 ) 147 return total_size 148 149 150def clear_synapse_cache(synapse_cache: cache.Cache, minutes: int) -> int: 151 """clear synapse cache before a certain time 152 153 Args: 154 synapse_cache: an object of synapseclient Cache. 155 minutes (int): all files before this minute will be removed 156 Returns: 157 int: number of files that get deleted 158 """ 159 current_date = datetime.utcnow() 160 minutes_earlier = calculate_datetime( 161 input_date=current_date, minutes=minutes, before_or_after="before" 162 ) 163 num_of_deleted_files = synapse_cache.purge(before_date=minutes_earlier) 164 return num_of_deleted_files 165 166 167def entity_type_mapping( 168 syn: Synapse, 169 entity_id: str, 170 synapse_entity_tracker: Optional[SynapseEntityTracker] = None, 171) -> str: 172 """Return the entity type of manifest 173 174 Args: 175 syn (Synapse): Synapse object 176 entity_id (str): id of an entity 177 synapse_entity_tracker: Tracker for a pull-through cache of Synapse entities 178 179 Raises: 180 SynapseHTTPError: Re-raised SynapseHTTPError 181 182 Returns: 183 str: type of the manifest being returned 184 """ 185 # check the type of entity 186 try: 187 if not synapse_entity_tracker: 188 synapse_entity_tracker = SynapseEntityTracker() 189 entity = synapse_entity_tracker.get( 190 synapse_id=entity_id, syn=syn, download_file=False 191 ) 192 except SynapseHTTPError as exc: 193 logger.error( 194 f"cannot get {entity_id} from asset store. Please make sure that {entity_id} exists" 195 ) 196 raise SynapseHTTPError( 197 f"cannot get {entity_id} from asset store. Please make sure that {entity_id} exists" 198 ) from exc 199 200 if isinstance(entity, EntityViewSchema): 201 entity_type = "asset view" 202 elif isinstance(entity, Folder): 203 entity_type = "folder" 204 elif isinstance(entity, File): 205 entity_type = "file" 206 elif isinstance(entity, Project): 207 entity_type = "project" 208 else: 209 assert entity is not None 210 # if there's no matching type, return concreteType 211 entity_type = entity.concreteType 212 return entity_type 213 214 215def create_temp_folder(path: str, prefix: Optional[str] = None) -> str: 216 """This function creates a temporary directory in the specified directory 217 Args: 218 path(str): a directory path where all the temporary files will live 219 prefix(str): a prefix to be added to the temporary directory name 220 Returns: returns the absolute pathname of the new directory. 221 """ 222 if not os.path.exists(path): 223 os.makedirs(path, exist_ok=True) 224 225 # Create a temporary directory in the specified directory 226 path = tempfile.mkdtemp(dir=path, prefix=prefix) 227 return path 228 229 230def profile( 231 output_file: Optional[str] = None, 232 sort_by: Any = "cumulative", 233 lines_to_print: Optional[int] = None, 234 strip_dirs: bool = False, 235) -> Callable: 236 """ 237 The function was initially taken from: 238 https://towardsdatascience.com/how-to-profile-your-code-in-python-e70c834fad89 239 A time profiler decorator. 240 Inspired by and modified the profile decorator of Giampaolo Rodola: 241 http://code.activestate.com/recipes/577817-profile-decorator/ 242 243 Args: 244 output_file (Optional[str], optional): 245 Path of the output file. If only name of the file is given, it's 246 saved in the current directory. 247 If it's None, the name of the decorated function is used. 248 Defaults to None. 249 sort_by (str, optional): 250 str or SortKey enum or tuple/list of str/SortKey enum 251 Sorting criteria for the Stats object. 252 For a list of valid string and SortKey refer to: 253 https://docs.python.org/3/library/profile.html#pstats.Stats.sort_stats 254 Defaults to "cumulative". 255 lines_to_print (Optional[int], optional): 256 Number of lines to print. 257 This is useful in reducing the size of the printout, especially 258 that sorting by 'cumulative', the time consuming operations 259 are printed toward the top of the file. 260 Default (None) is for all the lines. 261 strip_dirs (bool, optional): 262 Whether to remove the leading path info from file names. 263 This is also useful in reducing the size of the printout 264 Defaults to False. 265 266 Returns: 267 Callable: Profile of the decorated function 268 """ 269 270 def inner(func: Callable) -> Callable: 271 @wraps(func) 272 def wrapper(*args: Any, **kwargs: Any) -> Callable: 273 _output_file = output_file or func.__name__ + ".prof" 274 profiler = Profile() 275 profiler.enable() 276 retval = func(*args, **kwargs) 277 profiler.disable() 278 profiler.dump_stats(_output_file) 279 280 # if we are running the functions on AWS: 281 if "SECRETS_MANAGER_SECRETS" in os.environ: 282 p_stats = pstats.Stats(profiler) 283 # limit this to 30 line for now otherwise it will be too long for AWS log 284 p_stats.sort_stats("cumulative").print_stats(30) 285 else: 286 with open(_output_file, "w", encoding="utf-8") as fle: 287 p_stats = pstats.Stats(profiler, stream=fle) 288 if strip_dirs: 289 p_stats.strip_dirs() 290 if isinstance(sort_by, (tuple, list)): 291 p_stats.sort_stats(*sort_by) 292 else: 293 p_stats.sort_stats(sort_by) 294 p_stats.print_stats(lines_to_print) # type: ignore 295 return retval 296 297 return wrapper 298 299 return inner 300 301 302def normalize_path(path: str, parent_folder: str) -> str: 303 """ 304 Normalizes a path. 305 If the path is relative, the parent_folder is added to make it an absolute path. 306 307 Args: 308 path (str): The path to the file to normalize. 309 parent_folder (str): The folder the file is in. 310 311 Returns: 312 str: The normalized path. 313 """ 314 if not os.path.isabs(path): 315 path = os.path.join(parent_folder, path) 316 return os.path.normpath(path) 317 318 319def create_like_statement(synapse_path: str) -> str: 320 """ 321 Creates a sql like statement for a Synapse table query 322 See: 323 https://rest-docs.synapse.org/rest/org/sagebionetworks/repo/web/controller/TableExamples.html 324 325 The statement is used to find all files in the folder, should be something like: 326 path like '<synapse_path>/%' 327 328 Certain special characters can also be used in like statements and these need to escaped with 329 a special character of the users choice. This function will use the '|' character like the 330 documentation shows. These need to have the "escape '|'" string added at the end, and 331 it will look like: 332 333 path like '<synapse_path>/%' escape '|' 334 335 336 Args: 337 synapse_path (str): The input synapse path to be made into a like statement 338 339 Raises: 340 ValueError: If the input path contains a '|' character 341 342 Returns: 343 str: A SQL like statement 344 """ 345 if "|" in synapse_path: 346 raise ValueError("Pattern can not contain '|' character.") 347 like_pattern = escape_synapse_path(synapse_path) 348 # Adding the % wildcard makes this find any file in the input path 349 like_pattern = f"'{like_pattern}/%'" 350 statement = f"path like {like_pattern}" 351 # If there are any like special characters, the escape char needs to be indicated 352 if any((char in synapse_path for char in LIKE_PATTERN_SPECIAL_CHARS)): 353 statement = f"{statement} escape '|'" 354 return statement 355 356 357def escape_synapse_path(synapse_path: str) -> str: 358 """ 359 Escapes certain characters in a synapse_path for a Synapse Table Query like statement 360 See: 361 https://rest-docs.synapse.org/rest/org/sagebionetworks/repo/web/controller/TableExamples.html 362 363 Like patterns in appear in select statements such as: 364 select * from syn123 where foo like 'bar%' 365 The like pattern is in single quotes 366 Single quotes must be escaped by using 2x single quotes: 367 select * from syn123 where foo like 'Children''s Hospital' 368 369 Certain special characters can also be used in like statements and these need to escaped with 370 a special character of the users choice. This function will use the '|' character like the 371 documentation shows. 372 373 374 Args: 375 synapse_path (str): The synapse_path that needs to be escaped 376 377 Returns: 378 str: The like pattern with problematic characters escaped 379 """ 380 pattern = synapse_path.replace("'", "''") 381 for char in LIKE_PATTERN_SPECIAL_CHARS: 382 pattern = pattern.replace(char, f"|{char}") 383 return pattern
32def find_duplicates(_list: list[T]) -> set[T]: 33 """Find duplicate items in a list""" 34 return {x for x in _list if _list.count(x) > 1}
Find duplicate items in a list
37def dict2list(item: Any) -> Optional[Union[dict, list]]: 38 """Puts a dictionary into a list 39 40 Args: 41 item (Any): Any type of input 42 43 Returns: 44 Optional[Union[dict, list]]: 45 If input is a list, return it 46 If input is a dict, return it in a list 47 Return None for anything else 48 """ 49 if isinstance(item, list): 50 return item 51 if isinstance(item, dict): 52 return [item] 53 return None
Puts a dictionary into a list
Arguments:
- item (Any): Any type of input
Returns:
Optional[Union[dict, list]]: If input is a list, return it If input is a dict, return it in a list Return None for anything else
56def str2list(item: Any) -> Optional[list]: 57 """Puts a string into a list 58 59 Args: 60 item (Any): Any type of input 61 62 Returns: 63 Optional[list]: 64 If input is a list, return it 65 If input is a string, return it in a list 66 Return None for anything else 67 """ 68 if isinstance(item, str): 69 return [item] 70 if isinstance(item, list): 71 return item 72 return None
Puts a string into a list
Arguments:
- item (Any): Any type of input
Returns:
Optional[list]: If input is a list, return it If input is a string, return it in a list Return None for anything else
78def unlist(seq: Sequence[X]) -> Union[Sequence[X], X]: 79 """Returns the first item of a sequence 80 81 Args: 82 seq (Sequence[X]): A Sequence of any type 83 84 Returns: 85 Union[Sequence[X], X]: 86 if sequence is length one, return the first item 87 otherwise return the sequence 88 """ 89 if len(seq) == 1: 90 return seq[0] 91 return seq
Returns the first item of a sequence
Arguments:
- seq (Sequence[X]): A Sequence of any type
Returns:
Union[Sequence[X], X]: if sequence is length one, return the first item otherwise return the sequence
94def get_dir_size(path: str) -> int: 95 """ 96 Recursively descend the directory tree rooted at the top and call 97 .st_size function to calculate size of files in bytes. 98 Args: 99 path: path to a folder 100 return: total size of all the files in a given directory in bytes. 101 """ 102 total = 0 103 # Recursively scan directory to find entries 104 with os.scandir(path) as itr: 105 for entry in itr: 106 if entry.is_file(): 107 total += entry.stat().st_size 108 elif entry.is_dir(): 109 total += get_dir_size(entry.path) 110 return total
Recursively descend the directory tree rooted at the top and call .st_size function to calculate size of files in bytes.
Arguments:
- path: path to a folder
return: total size of all the files in a given directory in bytes.
113def calculate_datetime( 114 minutes: int, input_date: datetime, before_or_after: str = "before" 115) -> datetime: 116 """calculate date time 117 118 Args: 119 input_date (datetime): date time object provided by users 120 minutes (int): number of minutes 121 before_or_after (str): default to "before". if "before", calculate x minutes before 122 current date time. if "after", calculate x minutes after current date time. 123 124 Returns: 125 datetime: return result of date time calculation 126 """ 127 if before_or_after == "before": 128 date_time_result = input_date - timedelta(minutes=minutes) 129 elif before_or_after == "after": 130 date_time_result = input_date + timedelta(minutes=minutes) 131 else: 132 raise ValueError("Invalid value. Use either 'before' or 'after'.") 133 return date_time_result
calculate date time
Arguments:
- input_date (datetime): date time object provided by users
- minutes (int): number of minutes
- before_or_after (str): default to "before". if "before", calculate x minutes before current date time. if "after", calculate x minutes after current date time.
Returns:
datetime: return result of date time calculation
136def check_synapse_cache_size(directory: str = "/root/.synapseCache") -> float: 137 """Calculate size of .synapseCache directory in bytes using pathlib. 138 139 Args: 140 directory (str, optional): .synapseCache directory. Defaults to '/root/.synapseCache' 141 142 Returns: 143 float: size of .synapsecache directory in bytes 144 """ 145 total_size = sum( 146 f.stat().st_size for f in Path(directory).rglob("*") if f.is_file() 147 ) 148 return total_size
Calculate size of .synapseCache directory in bytes using pathlib.
Arguments:
- directory (str, optional): .synapseCache directory. Defaults to '/root/.synapseCache'
Returns:
float: size of .synapsecache directory in bytes
151def clear_synapse_cache(synapse_cache: cache.Cache, minutes: int) -> int: 152 """clear synapse cache before a certain time 153 154 Args: 155 synapse_cache: an object of synapseclient Cache. 156 minutes (int): all files before this minute will be removed 157 Returns: 158 int: number of files that get deleted 159 """ 160 current_date = datetime.utcnow() 161 minutes_earlier = calculate_datetime( 162 input_date=current_date, minutes=minutes, before_or_after="before" 163 ) 164 num_of_deleted_files = synapse_cache.purge(before_date=minutes_earlier) 165 return num_of_deleted_files
clear synapse cache before a certain time
Arguments:
- synapse_cache: an object of synapseclient Cache.
- minutes (int): all files before this minute will be removed
Returns:
int: number of files that get deleted
168def entity_type_mapping( 169 syn: Synapse, 170 entity_id: str, 171 synapse_entity_tracker: Optional[SynapseEntityTracker] = None, 172) -> str: 173 """Return the entity type of manifest 174 175 Args: 176 syn (Synapse): Synapse object 177 entity_id (str): id of an entity 178 synapse_entity_tracker: Tracker for a pull-through cache of Synapse entities 179 180 Raises: 181 SynapseHTTPError: Re-raised SynapseHTTPError 182 183 Returns: 184 str: type of the manifest being returned 185 """ 186 # check the type of entity 187 try: 188 if not synapse_entity_tracker: 189 synapse_entity_tracker = SynapseEntityTracker() 190 entity = synapse_entity_tracker.get( 191 synapse_id=entity_id, syn=syn, download_file=False 192 ) 193 except SynapseHTTPError as exc: 194 logger.error( 195 f"cannot get {entity_id} from asset store. Please make sure that {entity_id} exists" 196 ) 197 raise SynapseHTTPError( 198 f"cannot get {entity_id} from asset store. Please make sure that {entity_id} exists" 199 ) from exc 200 201 if isinstance(entity, EntityViewSchema): 202 entity_type = "asset view" 203 elif isinstance(entity, Folder): 204 entity_type = "folder" 205 elif isinstance(entity, File): 206 entity_type = "file" 207 elif isinstance(entity, Project): 208 entity_type = "project" 209 else: 210 assert entity is not None 211 # if there's no matching type, return concreteType 212 entity_type = entity.concreteType 213 return entity_type
Return the entity type of manifest
Arguments:
- syn (Synapse): Synapse object
- entity_id (str): id of an entity
- synapse_entity_tracker: Tracker for a pull-through cache of Synapse entities
Raises:
- SynapseHTTPError: Re-raised SynapseHTTPError
Returns:
str: type of the manifest being returned
216def create_temp_folder(path: str, prefix: Optional[str] = None) -> str: 217 """This function creates a temporary directory in the specified directory 218 Args: 219 path(str): a directory path where all the temporary files will live 220 prefix(str): a prefix to be added to the temporary directory name 221 Returns: returns the absolute pathname of the new directory. 222 """ 223 if not os.path.exists(path): 224 os.makedirs(path, exist_ok=True) 225 226 # Create a temporary directory in the specified directory 227 path = tempfile.mkdtemp(dir=path, prefix=prefix) 228 return path
This function creates a temporary directory in the specified directory
Arguments:
- path(str): a directory path where all the temporary files will live
- prefix(str): a prefix to be added to the temporary directory name
Returns: returns the absolute pathname of the new directory.
231def profile( 232 output_file: Optional[str] = None, 233 sort_by: Any = "cumulative", 234 lines_to_print: Optional[int] = None, 235 strip_dirs: bool = False, 236) -> Callable: 237 """ 238 The function was initially taken from: 239 https://towardsdatascience.com/how-to-profile-your-code-in-python-e70c834fad89 240 A time profiler decorator. 241 Inspired by and modified the profile decorator of Giampaolo Rodola: 242 http://code.activestate.com/recipes/577817-profile-decorator/ 243 244 Args: 245 output_file (Optional[str], optional): 246 Path of the output file. If only name of the file is given, it's 247 saved in the current directory. 248 If it's None, the name of the decorated function is used. 249 Defaults to None. 250 sort_by (str, optional): 251 str or SortKey enum or tuple/list of str/SortKey enum 252 Sorting criteria for the Stats object. 253 For a list of valid string and SortKey refer to: 254 https://docs.python.org/3/library/profile.html#pstats.Stats.sort_stats 255 Defaults to "cumulative". 256 lines_to_print (Optional[int], optional): 257 Number of lines to print. 258 This is useful in reducing the size of the printout, especially 259 that sorting by 'cumulative', the time consuming operations 260 are printed toward the top of the file. 261 Default (None) is for all the lines. 262 strip_dirs (bool, optional): 263 Whether to remove the leading path info from file names. 264 This is also useful in reducing the size of the printout 265 Defaults to False. 266 267 Returns: 268 Callable: Profile of the decorated function 269 """ 270 271 def inner(func: Callable) -> Callable: 272 @wraps(func) 273 def wrapper(*args: Any, **kwargs: Any) -> Callable: 274 _output_file = output_file or func.__name__ + ".prof" 275 profiler = Profile() 276 profiler.enable() 277 retval = func(*args, **kwargs) 278 profiler.disable() 279 profiler.dump_stats(_output_file) 280 281 # if we are running the functions on AWS: 282 if "SECRETS_MANAGER_SECRETS" in os.environ: 283 p_stats = pstats.Stats(profiler) 284 # limit this to 30 line for now otherwise it will be too long for AWS log 285 p_stats.sort_stats("cumulative").print_stats(30) 286 else: 287 with open(_output_file, "w", encoding="utf-8") as fle: 288 p_stats = pstats.Stats(profiler, stream=fle) 289 if strip_dirs: 290 p_stats.strip_dirs() 291 if isinstance(sort_by, (tuple, list)): 292 p_stats.sort_stats(*sort_by) 293 else: 294 p_stats.sort_stats(sort_by) 295 p_stats.print_stats(lines_to_print) # type: ignore 296 return retval 297 298 return wrapper 299 300 return inner
The function was initially taken from: https://towardsdatascience.com/how-to-profile-your-code-in-python-e70c834fad89 A time profiler decorator. Inspired by and modified the profile decorator of Giampaolo Rodola: http://code.activestate.com/recipes/577817-profile-decorator/
Arguments:
- output_file (Optional[str], optional): Path of the output file. If only name of the file is given, it's saved in the current directory. If it's None, the name of the decorated function is used. Defaults to None.
- sort_by (str, optional): str or SortKey enum or tuple/list of str/SortKey enum Sorting criteria for the Stats object. For a list of valid string and SortKey refer to: https://docs.python.org/3/library/profile.html#pstats.Stats.sort_stats Defaults to "cumulative".
- lines_to_print (Optional[int], optional): Number of lines to print. This is useful in reducing the size of the printout, especially that sorting by 'cumulative', the time consuming operations are printed toward the top of the file. Default (None) is for all the lines.
- strip_dirs (bool, optional): Whether to remove the leading path info from file names. This is also useful in reducing the size of the printout Defaults to False.
Returns:
Callable: Profile of the decorated function
303def normalize_path(path: str, parent_folder: str) -> str: 304 """ 305 Normalizes a path. 306 If the path is relative, the parent_folder is added to make it an absolute path. 307 308 Args: 309 path (str): The path to the file to normalize. 310 parent_folder (str): The folder the file is in. 311 312 Returns: 313 str: The normalized path. 314 """ 315 if not os.path.isabs(path): 316 path = os.path.join(parent_folder, path) 317 return os.path.normpath(path)
Normalizes a path. If the path is relative, the parent_folder is added to make it an absolute path.
Arguments:
- path (str): The path to the file to normalize.
- parent_folder (str): The folder the file is in.
Returns:
str: The normalized path.
320def create_like_statement(synapse_path: str) -> str: 321 """ 322 Creates a sql like statement for a Synapse table query 323 See: 324 https://rest-docs.synapse.org/rest/org/sagebionetworks/repo/web/controller/TableExamples.html 325 326 The statement is used to find all files in the folder, should be something like: 327 path like '<synapse_path>/%' 328 329 Certain special characters can also be used in like statements and these need to escaped with 330 a special character of the users choice. This function will use the '|' character like the 331 documentation shows. These need to have the "escape '|'" string added at the end, and 332 it will look like: 333 334 path like '<synapse_path>/%' escape '|' 335 336 337 Args: 338 synapse_path (str): The input synapse path to be made into a like statement 339 340 Raises: 341 ValueError: If the input path contains a '|' character 342 343 Returns: 344 str: A SQL like statement 345 """ 346 if "|" in synapse_path: 347 raise ValueError("Pattern can not contain '|' character.") 348 like_pattern = escape_synapse_path(synapse_path) 349 # Adding the % wildcard makes this find any file in the input path 350 like_pattern = f"'{like_pattern}/%'" 351 statement = f"path like {like_pattern}" 352 # If there are any like special characters, the escape char needs to be indicated 353 if any((char in synapse_path for char in LIKE_PATTERN_SPECIAL_CHARS)): 354 statement = f"{statement} escape '|'" 355 return statement
Creates a sql like statement for a Synapse table query
See:
https://rest-docs.synapse.org/rest/org/sagebionetworks/repo/web/controller/TableExamples.html
The statement is used to find all files in the folder, should be something like:
path like '
Certain special characters can also be used in like statements and these need to escaped with a special character of the users choice. This function will use the '|' character like the documentation shows. These need to have the "escape '|'" string added at the end, and it will look like:
path like '
Arguments:
- synapse_path (str): The input synapse path to be made into a like statement
Raises:
- ValueError: If the input path contains a '|' character
Returns:
str: A SQL like statement
358def escape_synapse_path(synapse_path: str) -> str: 359 """ 360 Escapes certain characters in a synapse_path for a Synapse Table Query like statement 361 See: 362 https://rest-docs.synapse.org/rest/org/sagebionetworks/repo/web/controller/TableExamples.html 363 364 Like patterns in appear in select statements such as: 365 select * from syn123 where foo like 'bar%' 366 The like pattern is in single quotes 367 Single quotes must be escaped by using 2x single quotes: 368 select * from syn123 where foo like 'Children''s Hospital' 369 370 Certain special characters can also be used in like statements and these need to escaped with 371 a special character of the users choice. This function will use the '|' character like the 372 documentation shows. 373 374 375 Args: 376 synapse_path (str): The synapse_path that needs to be escaped 377 378 Returns: 379 str: The like pattern with problematic characters escaped 380 """ 381 pattern = synapse_path.replace("'", "''") 382 for char in LIKE_PATTERN_SPECIAL_CHARS: 383 pattern = pattern.replace(char, f"|{char}") 384 return pattern
Escapes certain characters in a synapse_path for a Synapse Table Query like statement
See:
https://rest-docs.synapse.org/rest/org/sagebionetworks/repo/web/controller/TableExamples.html
Like patterns in appear in select statements such as:
select * from syn123 where foo like 'bar%'
The like pattern is in single quotes Single quotes must be escaped by using 2x single quotes: select * from syn123 where foo like 'Children''s Hospital'
Certain special characters can also be used in like statements and these need to escaped with a special character of the users choice. This function will use the '|' character like the documentation shows.
Arguments:
- synapse_path (str): The synapse_path that needs to be escaped
Returns:
str: The like pattern with problematic characters escaped