soft_search package#

Subpackages#

Submodules#

soft_search.constants module#

class soft_search.constants.NSFFields[source]#

Bases: object

Fields that can be provided to the get_nsf_dataset function dataset_fields parameter.

Examples

>>> soft_search.nsf.get_nsf_dataset(
...     start_date="2017-01-01",
...     dataset_fields=[NSFFields.id_, NSFFields.abstractText],
... )
abstractText = 'abstractText'#
agency = 'agency'#
awardAgencyCode = 'awardAgencyCode'#
awardee = 'awardee'#
awardeeAddress = 'awardeeAddress'#
awardeeCity = 'awardeeCity'#
awardeeCountryCode = 'awardeeCountryCode'#
awardeeCounty = 'awardeeCounty'#
awardeeDistrictCode = 'awardeeDistrictCode'#
awardeeName = 'awardeeName'#
awardeeStateCode = 'awardeeStateCode'#
awardeeZipCode = 'awardeeZipCode'#
cfdaNumber = 'cfdaNumber'#
coPDPI = 'coPDPI'#
date = 'date'#
dunsNumber = 'dunsNumber'#
estimatedTotalAmt = 'estimatedTotalAmt'#
expDate = 'expDate'#
fundAgencyCode = 'fundAgencyCode'#
fundProgramName = 'fundProgramName'#
fundsObligatedAmt = 'fundsObligatedAmt'#
id_ = 'id'#
parentDunsNumber = 'parentDunsNumber'#
pdPIName = 'pdPIName'#
perfAddress = 'perfAddress'#
perfCity = 'perfCity'#
perfCountryCode = 'perfCountryCode'#
perfCounty = 'perfCounty'#
perfDistrictCode = 'perfDistrictCode'#
perfLocation = 'perfLocation'#
perfStateCode = 'perfStateCode'#
perfZipCode = 'perfZipCode'#
piEmail = 'piEmail'#
piFirstName = 'piFirstName'#
piLastName = 'piLastName'#
piPhone = 'piPhone'#
poEmail = 'poEmail'#
poName = 'poName'#
poPhone = 'poPhone'#
primaryProgram = 'primaryProgram'#
projectOutComesReport = 'projectOutComesReport'#
publicAccessMandate = 'publicAccessMandate'#
publicationConference = 'publicationConference'#
publicationResearch = 'publicationResearch'#
rpp = 'rpp'#
startDate = 'startDate'#
title = 'title'#
transType = 'transType'#
class soft_search.constants.NSFPrograms[source]#

Bases: object

Biological_Sciences = 'BIO'#
Computer_and_Information_Science_and_Engineering = 'CISE'#
Education_and_Human_Resources = 'EHR'#
Engineering = 'ENG'#
Geosciences = 'GEO'#
Integrative_Activities = 'OIA'#
International_Science_and_Engineering = 'OISE'#
Mathematical_and_Physical_Sciences = 'MPS'#
Social_Behavioral_and_Economic_Sciences = 'SBE'#
Technology_Innovation_and_Partnerships = 'TIP'#
class soft_search.constants.PredictionLabels[source]#

Bases: object

SoftwareNotPredicted = 'software-not-predicted'#
SoftwarePredicted = 'software-predicted'#

soft_search.metrics module#

class soft_search.metrics.EvaluationMetrics(model: str, accuracy: float, precision: float, recall: float, f1: float)[source]#

Bases: DataClassJsonMixin

accuracy: float#
f1: float#
model: str#
precision: float#
recall: float#

soft_search.nsf module#

soft_search.nsf.get_nsf_dataset(start_date: str | datetime, end_date: str | datetime | None = None, program_name: str = 'BIO', agency: str = 'NSF', transaction_type: str = 'Grant', dataset_fields: List[str] = ['abstractText', 'agency', 'awardAgencyCode', 'awardee', 'awardeeAddress', 'awardeeCity', 'awardeeCountryCode', 'awardeeCounty', 'awardeeDistrictCode', 'awardeeName', 'awardeeStateCode', 'awardeeZipCode', 'cfdaNumber', 'coPDPI', 'date', 'dunsNumber', 'estimatedTotalAmt', 'expDate', 'fundAgencyCode', 'fundProgramName', 'fundsObligatedAmt', 'id', 'parentDunsNumber', 'pdPIName', 'perfAddress', 'perfCity', 'perfCountryCode', 'perfCounty', 'perfDistrictCode', 'perfLocation', 'perfStateCode', 'perfZipCode', 'piEmail', 'piFirstName', 'piLastName', 'piPhone', 'poEmail', 'poName', 'poPhone', 'primaryProgram', 'projectOutComesReport', 'publicAccessMandate', 'publicationConference', 'publicationResearch', 'rpp', 'startDate', 'title', 'transType'], require_project_outcomes_doc: bool = True) DataFrame[source]#

Fetch an NSF awards dataset. Wraps the NSF Award Search API: https://www.research.gov/common/webapi/awardapisearch-v1.htm.

Parameters:
start_date: Union[str, datetime]

The datetime for which awards were granted after. When provided as a string, “MM/DD/YYYY” and “YYYY-MM-DD” formats are accepted.

end_date: Optional[Union[str, datetime]]

The datetime for which awards were granted before. When provided as a string, “MM/DD/YYYY” and “YYYY-MM-DD” formats are accepted. Default: None (no end date)

program_name: str

The program to search for awards against. Default: “BIO”

agency: str

The funding agency. Default: “NSF”

transaction_type: str

The award type. Default: “Grant”

dataset_fields: List[str]

The fields to retrieve. Default: All fields available in the soft_search.constants.NSFFields object.

require_project_outcomes_doc: bool

Should only awards that have already returned project outcomes documents be requested. Default: True (request only projects with outcomes)

Returns:
pd.DataFrame

All awards found as a pandas DataFrame.

See also

soft_search.constants.NSFFields

Available dataset fields to request.

soft_search.constants.NSFPrograms

Available programs to request.

Notes

After a lot of testing, it seems like the NSF Award Search API does not return all results available via “Simple Search” or “Advanced Search”.

This function is safe for prototyping but for research purposes it is recommended to download data files from the “Advanced Search” webpage.

Examples

Get all grants funded by the NSF that have project outcomes under the BIO program from 2017 onward.

>>> from soft_search.nsf import get_nsf_dataset
>>> get_nsf_dataset(start_date="2017-01-01")

Get all grants funded by the NSF that have project outcomes under the BIO program from 2017 onward but only return the id and abstractText fields.

>>> from soft_search.nsf import get_nsf_dataset
>>> from soft_search.constants import NSFFields
>>> get_nsf_dataset(
...     start_date="2017-01-01",
...     dataset_fields=[
...         NSFFields.id_,
...         NSFFields.abstractText,
...     ]
... )

soft_search.seed module#

soft_search.seed.set_seed(seed: int = 0) None[source]#