Bayesian Selective Inference: Non-informative Priors

ClinGen Variant Curation Interface: A Variant Classification Platform for the Application of Evidence Criteria from ACMG/AMP Guidelines

Christine G. Preston, Matt W. Wright, Rao Madhavrao,

medRxiv 2021.02.12.21251663; doi: https://doi.org/10.1101/2021.02.12.21251663

Add to Selected Citations

\n ''' # Remove space in seach query search_query=search_query.strip().replace(", ", "+").replace(" ", "+").replace(",", "+").split('+') subject_str = ('%20').join(search_query[0].split()) for subject in search_query[1:]: subject_str = subject_str + '%252B' + ('%20').join(subject.split()) # print(subject_str) # Call arXiv API # bio_arXiv_url=f'http://export.arxiv.org/api/query?search_query={search_by}:{search_query}&sortBy={sort_by}&start=0&max_results={max_results}' # "https://api.biorxiv.org" current_dateTime = datetime.now() today = str(current_dateTime)[:10] journal = 'biorxiv' # journals_str = '%20jcode%3Amedrxiv%7C%7Cbiorxiv' bio_arXiv_url = f'https://www.biorxiv.org/search/' # kwd_str = 'abstract_title%3A' + ('%252C%2B').join([search_query[0]] + [('%2B').join(keyword.split()) for keyword in search_query[1:]]) # print(kwd_str) # kwd_str = kwd_str + '%20abstract_title_flags%3Amatch-' + 'all' # bio_arXiv_url += '%20' + kwd_str launch_dates = {"biorxiv": "2013-01-01", "medrxiv": "2019-06-01"} both = False bio_only = True med_only = False if bio_only: print('https://www.biorxiv.org/search/serverless%252Bcomputing%252Bbioinformatics%20jcode%3Abiorxiv%20limit_from%3A2021-06-13%20limit_to%3A2023-02-17%20numresults%3A25%20sort%3Arelevance-rank%20format_result%3Astandard\n bio_only') journal = 'biorxiv' journals_str = f'%20jcode%3A{journal}' if both: # print('https://www.biorxiv.org/search/serverless%252Bcomputing%252Bbioinformatics%20jcode%3Amedrxiv%7C%7Cbiorxiv%20limit_from%3A2022-11-06%20limit_to%3A2023-02-17%20numresults%3A10%20sort%3Arelevance-rank%20format_result%3Astandard\n both') journal = 'biorxiv' journals_str = f'%20jcode%3A{journal}%7C%7Cmedrxiv' if med_only: # print('https://www.biorxiv.org/search/serverless%252Bcomputing%252Bbioinformatics%20jcode%3Amedrxiv%20limit_from%3A2021-06-13%20limit_to%3A2023-02-17%20numresults%3A10%20sort%3Arelevance-rank%20format_result%3Astandard\n med_only') journal = 'medrxiv' journals_str = f'%20jcode%3A{journal}' start_day = launch_dates[journal] bio_arXiv_url += subject_str + journals_str + f'%20limit_from%3A2{start_day}%20limit_to%3A{today}%20numresults%3A{max_results}%20sort%3Arelevance-rank%20format_result%3Astandard' # print(bio_arXiv_url) url_response = requests.post(bio_arXiv_url) html = bs(url_response.text, features='html.parser') pdf_entries = html.find_all(attrs={'class': 'search-result'}) # print(articles) # with urllib.request.urlopen(bio_arXiv_url) as url: # s = url.read() # # Parse the xml data # root = html.fromstring(s) # # Fetch relevant pdf information # pdf_entries = root.xpath("entry") # print(pdf_entries) pdf_titles = [] pdf_authors = [] pdf_urls = [] pdf_categories = [] folder_names = [] pdf_citation = [] pdf_years = [] for i, pdf in enumerate(pdf_entries): # print(pdf.xpath('updated/text()')[0][:4]) # xpath return a list with every ocurrence of the html path. Since we're getting each entry individually, we'll take the first element to avoid an unecessary list # print(pdf) # [article.find('span', attrs={'class': 'highwire-cite-title'}).text.strip() if article.find('span', attrs={'class': 'highwire-cite-title'}) is not None else None for article in articles] pdf_titles.append(pdf.find('span', attrs={'class': 'highwire-cite-title'}).text.strip()) # print(pdf.find('span', attrs={'class': 'highwire-citation-authors'}).text.strip()) pdf_authors.append(pdf.find('span', attrs={'class': 'highwire-citation-authors'}).text.strip().split(', ')) # print(pdf_authors) # print(f'http://www.{journal}.org') pdf_url = pdf.find('a', href=True)['href'] if pdf_url[:4] != 'http': pdf_url = f'http://www.biorxiv.org'+ pdf_url pdf_urls.append(pdf_url) pdf_categories.append(pdf.find('span', attrs={'class': 'highwire-cite-metadata-journal highwire-cite-metadata'}).text.strip()) # print(pdf_categories) folder_names.append(folder_name) pdf_years.append(pdf.find('span', attrs={'class': 'highwire-cite-metadata-pages highwire-cite-metadata'}).text.strip()[:4]) pdf_citation.append(f"{', '.join(pdf_authors[i])}, {pdf_titles[i]}. {pdf_categories[i]} ({pdf_years[i]}), (available at {pdf_urls[i]}).") # print(pdf_citation) # break pdf_info=list(zip(pdf_titles, pdf_urls, pdf_authors, pdf_categories, folder_names, pdf_citation)) # Check number of available files print('Requesting {max_results} files'.format(max_results=max_results)) if len(pdf_urls)