Source code for scilpy.utils.scilpy_bot

# -*- coding: utf-8 -*-
import ast
from colorama import Fore, Style
from importlib.resources import files
import itertools
import multiprocessing
import pathlib
import re
import subprocess

from tqdm import tqdm
import nltk
from nltk.stem import PorterStemmer

from scilpy import SCILPY_HOME

SPACING_LEN = 80

stemmer = PorterStemmer()

# Path to the JSON file containing script information and keywords
VOCAB_FILE_PATH = files("scilpy").joinpath("data/vocabulary.json")


OBJECTS = [
    'aodf', 'bids', 'bingham', 'btensor', 'bundle',
    'connectivity', 'denoising', 'dki', 'dti', 'dwi',
    'fodf', 'freewater', 'frf', 'gradients', 'header',
    'json', 'labels', 'lesions', 'mti', 'NODDI', 'sh',
    'surface', 'tracking', 'tractogram', 'viz', 'volume',
    'qball', 'rgb', 'lesions'
]


[docs] def prompt_user_for_object(): """ Prompts the user to select an object from the list of available objects. """ print("Available objects:") for idx, obj in enumerate(OBJECTS): print(f"{idx + 1}. {obj}") while True: try: choice = int( input("Choose the object you want to work on " "(enter the number): ")) if 1 <= choice <= len(OBJECTS): return OBJECTS[choice - 1] else: print(f"Please enter a number between 1 and {len(OBJECTS)}.") except ValueError: print("Invalid input. Please enter a number.")
def _make_title(text): """ Returns a formatted title string with centered text and spacing """ return f'{Fore.LIGHTBLUE_EX}{Style.BRIGHT}{text.center(SPACING_LEN, "=")}' \ f'{Style.RESET_ALL}' def _get_docstring_from_script_path(script): """ Extract a python file's docstring from a filepath. Parameters ---------- script : str Path to python file Returns ------- docstring : str The file's docstring, or an empty string if there was no docstring. """ with open(script, 'r') as reader: file_contents = reader.read() module = ast.parse(file_contents) docstring = ast.get_docstring(module) or '' return docstring def _split_first_sentence(text): """ Split the first sentence from the rest of a string by finding the first dot or newline. If there is no dot or newline, return the full string as the first sentence, and None as the remaining text. Parameters ---------- text : str Text to parse. Returns ------- first_sentence : str The first sentence, or the full text if no dot or newline was found. remaining : str Everything after the first sentence. """ candidates = ['. ', '.\n'] sentence_idx = -1 for candidate in candidates: idx = text.find(candidate) if idx != -1 and idx < sentence_idx or sentence_idx == -1: sentence_idx = idx split_idx = (sentence_idx + 1) or None sentence = text[:split_idx] remaining = text[split_idx:] if split_idx else "" return sentence, remaining def _stem_word(word): """ Stem a word using two different stemmers and return the most appropriate stem. Parameters ---------- word : str Word to stem. Returns ------- str Stemmed word. """ if len(word) <= 3: return word version_b = stemmer.stem(word) return version_b def _stem_keywords(keywords): """ Stem a list of keywords using PorterStemmer. Parameters ---------- keywords : list of str Keywords to be stemmed. Returns ------- list of str Stemmed keywords. """ return [_stem_word(keyword) for keyword in keywords] def _stem_text(text): """ Stem all words in a text using PorterStemmer. Parameters ---------- text : str Text to be stemmed. Returns ------- str Stemmed text. """ words = nltk.word_tokenize(text) return ' '.join([_stem_word(word) for word in words]) def _stem_phrase(phrase): """ Stem all words in a phrase using PorterStemmer. Parameters ---------- phrase : str Phrase to be stemmed. Returns ------- str Stemmed phrase. """ words = phrase.split() return ' '.join([_stem_word(word) for word in words]) def _generate_help_file(args): """ Generate help file for each script """ script = args[0] hidden_dir = args[1] help_file = hidden_dir / f'{script.name}.help' # Check if help file already exists if help_file.exists(): return # Run the script with --h and capture the output result = subprocess.run(['python', script, '--h'], capture_output=True, text=True) # Save the output to the hidden file with open(help_file, 'w') as f: f.write(result.stdout) def _generate_help_files(nbr_cpu=1): """ This function iterates over all Python scripts in the 'scripts' directory, runs each script with the '--h' flag to generate help text, and saves the output in the '.hidden' directory. By doing this, we can precompute the help outputs for each script, which can be useful for faster searches. If a help file already exists for a script, the script is skipped, and the existing help file is left unchanged. The help output is saved in a hidden directory to avoid clutter in the main scripts directory. """ scripts_dir = files("scilpy").joinpath("cli") # Hidden directory to store help files hidden_dir = pathlib.Path(SCILPY_HOME) / ".hidden" hidden_dir.mkdir(exist_ok=True) scripts = [script for script in scripts_dir.glob('*.py') if script.name not in ['__init__.py', 'scil_search_keywords.py']] helps = [help for help in hidden_dir.glob('*.help')] scripts_to_regenerate = [script for script in scripts if hidden_dir / f'{script.name}.help' not in helps] # Check if all help files are present if len(scripts_to_regenerate) == 0: print("All help files are already generated.") return with multiprocessing.Pool(processes=nbr_cpu) as pool: _ = list(tqdm(pool.imap(_generate_help_file, zip(scripts_to_regenerate, itertools.repeat(hidden_dir))), total=len(scripts_to_regenerate))) def _highlight_keywords(text, all_expressions): """ Highlight the stemmed keywords in the given text using colorama. Parameters ---------- text : str Text to highlight keywords in. all_expressions : list of str List of all things to highlight. Returns ------- str Text with highlighted keywords. """ # Iterate over each keyword in the list for kw in all_expressions: # Create a regex pattern to match any word containing the keyword pattern = re.compile( r'\b(\w?' + re.escape(kw) + r's?\w?)\b', re.IGNORECASE) # Function to apply highlighting to the matched word def apply_highlight(match): return f'{Fore.LIGHTYELLOW_EX}{Style.BRIGHT}{match.group(0)}' \ f'{Style.RESET_ALL}' # Replace the matched word with its highlighted version text = pattern.sub(apply_highlight, text) return text def _get_synonyms(keyword, synonyms_data): """ Get synonyms for a given keyword from the synonyms data. Parameters ---------- keyword : str Keyword to find synonyms for. synonyms_data : dict Dictionary containing synonyms data. Returns ------- list of str List of synonyms for the given keyword. """ keyword = keyword.lower() complete_synonyms = [] for synonym_set in synonyms_data: synonym_set = [synonym.lower() for synonym in synonym_set] stemmed_synonyms_set = [_stem_word(synonym) for synonym in synonym_set] if keyword in synonym_set or _stem_word(keyword) in stemmed_synonyms_set: complete_synonyms.extend(synonym_set) return list(set(complete_synonyms)) def _extract_keywords_and_phrases(expressions): """ Extract keywords and phrases from the provided list. Parameters ---------- expressions : list of str List of keywords and phrases. Returns ------- list of str, list of str List of individual keywords and list of phrases. """ keywords_set = set() phrases_set = set() for expression in expressions: # if keyword contain blank space (contains more that 1 word) if ' ' in expression: phrases_set.add(expression.lower()) else: keywords_set.add(expression.lower()) return list(keywords_set), list(phrases_set) def _calculate_score(keywords, phrases, text, filename, suffix=''): """ Calculate a score for how well the text and filename match the keywords. Parameters ---------- keywords : list of str Keywords to search for. phrases : list of str Phrases to search for. text : str Text to search within. filename : str Filename to search within. Returns ------- dict Score details based on the frequency of keywords in the text and filename. """ stemmed_text = _stem_text(text.lower()) stemmed_filename = _stem_text(filename.lower()) score_details = {} def is_match(found_word, keyword): if len(keyword) <= 3: return found_word == keyword return _stem_word(found_word) == _stem_word(keyword) for keyword in keywords: keyword = keyword.lower() # Use regular expressions to match whole words only keyword_pattern = re.compile( r'\b(\w?' + re.escape(keyword) + r's?\w?)\b', re.IGNORECASE) found_words = keyword_pattern.findall(stemmed_text) \ + keyword_pattern.findall(stemmed_filename) keyword_score = 0 for found_word in found_words: if is_match(found_word, keyword): keyword_score += 1 continue if keyword_score > 0: score_details[keyword + suffix] = keyword_score for phrase in phrases: phrase_stemmed = _stem_text(phrase.lower()) phrase_score = stemmed_text.count(phrase_stemmed) if phrase_score > 0: score_details[phrase + suffix] = phrase_score return score_details
[docs] def update_matches_and_scores(scores, filename, score_details): """ Update the matches and scores for the given filename based on the score details. Parameters ---------- scores : dict A dictionary containing the scores for the keywords (to be updated). filename : str The name of the script file being analyzed. score_details : dict A dictionary containing the scores for the keywords and phrases found in the script. Returns ------- None Just updates the global `matches` and `scores` lists/dictionaries. """ for key, value in score_details.items(): if value == 0: continue if filename not in scores: scores[filename] = {key: value} elif key not in scores[filename]: scores[filename].update({key: value}) else: scores[filename][key] += value return scores