Augment OpenAlex entities with vector representations computed from their associated works' abstracts.
Basic example: where should I post an artificial intelligence preprint?
fromvecaleximportconfig, Sources, Subfieldsfromsklearn.metrics.pairwiseimportcosine_similarityai_subfields=Subfields().search("artificial intelligence").get() # fetch subfields with AI in their namerepositories= [ # fetch the 25 most cited preprint repositoriessourceforpageinSources().filter(type="repository").sort(cited_by_count="desc").paginate(n_max=25) forsourceinpage ] subfield_repo_similarities=cosine_similarity( [s["vec"] forsinai_subfields], [r["vec"] forrinrepositories], ) # print top 3 most similar repositories for each subfieldfori, subfieldinenumerate(ai_subfields): print(f"Top repositories for subfield {subfield['display_name']}:") top3_indices=subfield_repo_similarities[i].argsort()[-3:][::-1] forrank, idxinenumerate(top3_indices, start=1): repo=repositories[idx] sim=subfield_repo_similarities[i][idx] print(f" {rank}. {repo['display_name']} (similarity: {sim:.4f})") # Sample output:# Top repositories for subfield Artificial Intelligence:# 1. arXiv (Cornell University) (similarity: 0.8439)# 2. OPAL (Open@LaTrobe) (La Trobe University) (similarity: 0.5735)# 3. Munich Personal RePEc Archive (Ludwig Maximilian University of Munich) (similarity: 0.5101)