diff --git a/docs/sphinx/source/reference/iotools.rst b/docs/sphinx/source/reference/iotools.rst index 8e2a3034d2..7c71c797d9 100644 --- a/docs/sphinx/source/reference/iotools.rst +++ b/docs/sphinx/source/reference/iotools.rst @@ -31,7 +31,6 @@ of sources and file formats relevant to solar energy modeling. iotools.get_nsrdb_psm4_conus iotools.get_nsrdb_psm4_full_disc iotools.read_nsrdb_psm4 - iotools.parse_nsrdb_psm4 iotools.get_psm3 iotools.read_psm3 iotools.parse_psm3 diff --git a/docs/sphinx/source/whatsnew/v0.12.1.rst b/docs/sphinx/source/whatsnew/v0.12.1.rst index 9f13e32476..5b8e6c103f 100644 --- a/docs/sphinx/source/whatsnew/v0.12.1.rst +++ b/docs/sphinx/source/whatsnew/v0.12.1.rst @@ -23,9 +23,9 @@ Enhancements :py:func:`~pvlib.iotools.get_nsrdb_psm4_aggregated`, :py:func:`~pvlib.iotools.get_nsrdb_psm4_tmy`, :py:func:`~pvlib.iotools.get_nsrdb_psm4_conus`, - :py:func:`~pvlib.iotools.get_nsrdb_psm4_full_disc`, - :py:func:`~pvlib.iotools.read_nsrdb_psm4`, and - :py:func:`~pvlib.iotools.parse_nsrdb_psm4`. (:issue:`2326`, :pull:`2378`) + :py:func:`~pvlib.iotools.get_nsrdb_psm4_full_disc`, and + :py:func:`~pvlib.iotools.read_nsrdb_psm4`. (:issue:`2326`, :pull:`2378`, + :pull:`2445`) Documentation ~~~~~~~~~~~~~ diff --git a/pvlib/iotools/__init__.py b/pvlib/iotools/__init__.py index d188435879..352044e5cd 100644 --- a/pvlib/iotools/__init__.py +++ b/pvlib/iotools/__init__.py @@ -16,7 +16,6 @@ from pvlib.iotools.psm4 import get_nsrdb_psm4_conus # noqa: F401 from pvlib.iotools.psm4 import get_nsrdb_psm4_full_disc # noqa: F401 from pvlib.iotools.psm4 import read_nsrdb_psm4 # noqa: F401 -from pvlib.iotools.psm4 import parse_nsrdb_psm4 # noqa: F401 from pvlib.iotools.pvgis import get_pvgis_tmy, read_pvgis_tmy # noqa: F401 from pvlib.iotools.pvgis import read_pvgis_hourly # noqa: F401 from pvlib.iotools.pvgis import get_pvgis_hourly # noqa: F401 diff --git a/pvlib/iotools/psm4.py b/pvlib/iotools/psm4.py index 119002388c..2c3522b952 100644 --- a/pvlib/iotools/psm4.py +++ b/pvlib/iotools/psm4.py @@ -11,6 +11,7 @@ import requests import pandas as pd from json import JSONDecodeError +from pvlib import tools NSRDB_API_BASE = "https://developer.nrel.gov/api/nsrdb/v2/solar/" PSM4_AGG_ENDPOINT = "nsrdb-GOES-aggregated-v4-0-0-download.csv" @@ -82,7 +83,7 @@ def get_nsrdb_psm4_aggregated(latitude, longitude, api_key, email, Aggregated v4 API. The NSRDB is described in [1]_ and the PSM4 NSRDB GOES Aggregated v4 API is - described in [2]_,. + described in [2]_. Parameters ---------- @@ -132,7 +133,7 @@ def get_nsrdb_psm4_aggregated(latitude, longitude, api_key, email, timeseries data from NREL PSM4 metadata : dict metadata from NREL PSM4 about the record, see - :func:`pvlib.iotools.parse_nsrdb_psm4` for fields + :func:`pvlib.iotools.read_nsrdb_psm4` for fields Raises ------ @@ -151,19 +152,15 @@ def get_nsrdb_psm4_aggregated(latitude, longitude, api_key, email, result in rejected requests. .. warning:: PSM4 is limited to data found in the NSRDB, please consult - the references below for locations with available data. Additionally, - querying data with < 30-minute resolution uses a different API endpoint - with fewer available fields (see [4]_). + the references below for locations with available data. See Also -------- pvlib.iotools.get_nsrdb_psm4_tmy, pvlib.iotools.get_nsrdb_psm4_conus, - pvlib.iotools.get_nsrdb_psm4_full_disc, pvlib.iotools.read_nsrdb_psm4, - pvlib.iotools.parse_nsrdb_psm4 + pvlib.iotools.get_nsrdb_psm4_full_disc, pvlib.iotools.read_nsrdb_psm4 References ---------- - .. [1] `NREL National Solar Radiation Database (NSRDB) `_ .. [2] `NSRDB GOES Aggregated V4.0.0 @@ -213,7 +210,7 @@ def get_nsrdb_psm4_aggregated(latitude, longitude, api_key, email, # the CSV is in the response content as a UTF-8 bytestring # to use pandas we need to create a file buffer from the response fbuf = io.StringIO(response.content.decode('utf-8')) - return parse_nsrdb_psm4(fbuf, map_variables) + return read_nsrdb_psm4(fbuf, map_variables) def get_nsrdb_psm4_tmy(latitude, longitude, api_key, email, year='tmy', @@ -225,7 +222,7 @@ def get_nsrdb_psm4_tmy(latitude, longitude, api_key, email, year='tmy', TMY v4 API. The NSRDB is described in [1]_ and the PSM4 NSRDB GOES TMY v4 API is - described in [2]_,. + described in [2]_. Parameters ---------- @@ -276,7 +273,7 @@ def get_nsrdb_psm4_tmy(latitude, longitude, api_key, email, year='tmy', timeseries data from NREL PSM4 metadata : dict metadata from NREL PSM4 about the record, see - :func:`pvlib.iotools.parse_nsrdb_psm4` for fields + :func:`pvlib.iotools.read_nsrdb_psm4` for fields Raises ------ @@ -295,19 +292,16 @@ def get_nsrdb_psm4_tmy(latitude, longitude, api_key, email, year='tmy', result in rejected requests. .. warning:: PSM4 is limited to data found in the NSRDB, please consult - the references below for locations with available data. Additionally, - querying data with < 30-minute resolution uses a different API endpoint - with fewer available fields (see [4]_). + the references below for locations with available data. See Also -------- pvlib.iotools.get_nsrdb_psm4_aggregated, pvlib.iotools.get_nsrdb_psm4_conus, pvlib.iotools.get_nsrdb_psm4_full_disc, - pvlib.iotools.read_nsrdb_psm4,pvlib.iotools.parse_nsrdb_psm4 + pvlib.iotools.read_nsrdb_psm4 References ---------- - .. [1] `NREL National Solar Radiation Database (NSRDB) `_ .. [2] `NSRDB GOES Tmy V4.0.0 @@ -357,7 +351,7 @@ def get_nsrdb_psm4_tmy(latitude, longitude, api_key, email, year='tmy', # the CSV is in the response content as a UTF-8 bytestring # to use pandas we need to create a file buffer from the response fbuf = io.StringIO(response.content.decode('utf-8')) - return parse_nsrdb_psm4(fbuf, map_variables) + return read_nsrdb_psm4(fbuf, map_variables) def get_nsrdb_psm4_conus(latitude, longitude, api_key, email, year='2023', @@ -369,7 +363,7 @@ def get_nsrdb_psm4_conus(latitude, longitude, api_key, email, year='2023', v4 API. The NSRDB is described in [1]_ and the PSM4 NSRDB GOES CONUS v4 API is - described in [2]_,. + described in [2]_. Parameters ---------- @@ -418,7 +412,7 @@ def get_nsrdb_psm4_conus(latitude, longitude, api_key, email, year='2023', timeseries data from NREL PSM4 metadata : dict metadata from NREL PSM4 about the record, see - :func:`pvlib.iotools.parse_nsrdb_psm4` for fields + :func:`pvlib.iotools.read_nsrdb_psm4` for fields Raises ------ @@ -437,19 +431,16 @@ def get_nsrdb_psm4_conus(latitude, longitude, api_key, email, year='2023', result in rejected requests. .. warning:: PSM4 is limited to data found in the NSRDB, please consult - the references below for locations with available data. Additionally, - querying data with < 30-minute resolution uses a different API endpoint - with fewer available fields (see [4]_). + the references below for locations with available data. See Also -------- pvlib.iotools.get_nsrdb_psm4_aggregated, pvlib.iotools.get_nsrdb_psm4_tmy, pvlib.iotools.get_nsrdb_psm4_full_disc, - pvlib.iotools.read_nsrdb_psm4, pvlib.iotools.parse_nsrdb_psm4 + pvlib.iotools.read_nsrdb_psm4 References ---------- - .. [1] `NREL National Solar Radiation Database (NSRDB) `_ .. [2] `NSRDB GOES Conus V4.0.0 @@ -499,7 +490,7 @@ def get_nsrdb_psm4_conus(latitude, longitude, api_key, email, year='2023', # the CSV is in the response content as a UTF-8 bytestring # to use pandas we need to create a file buffer from the response fbuf = io.StringIO(response.content.decode('utf-8')) - return parse_nsrdb_psm4(fbuf, map_variables) + return read_nsrdb_psm4(fbuf, map_variables) def get_nsrdb_psm4_full_disc(latitude, longitude, api_key, email, @@ -513,7 +504,7 @@ def get_nsrdb_psm4_full_disc(latitude, longitude, api_key, email, Disc v4 API. The NSRDB is described in [1]_ and the PSM4 NSRDB GOES Full Disc v4 API is - described in [2]_,. + described in [2]_. Parameters ---------- @@ -563,7 +554,7 @@ def get_nsrdb_psm4_full_disc(latitude, longitude, api_key, email, timeseries data from NREL PSM4 metadata : dict metadata from NREL PSM4 about the record, see - :func:`pvlib.iotools.parse_nsrdb_psm4` for fields + :func:`pvlib.iotools.read_nsrdb_psm4` for fields Raises ------ @@ -582,19 +573,16 @@ def get_nsrdb_psm4_full_disc(latitude, longitude, api_key, email, result in rejected requests. .. warning:: PSM4 is limited to data found in the NSRDB, please consult - the references below for locations with available data. Additionally, - querying data with < 30-minute resolution uses a different API endpoint - with fewer available fields (see [4]_). + the references below for locations with available data. See Also -------- pvlib.iotools.get_nsrdb_psm4_aggregated, pvlib.iotools.get_nsrdb_psm4_tmy, pvlib.iotools.get_nsrdb_psm4_conus, - pvlib.iotools.read_nsrdb_psm4, pvlib.iotools.parse_nsrdb_psm4 + pvlib.iotools.read_nsrdb_psm4 References ---------- - .. [1] `NREL National Solar Radiation Database (NSRDB) `_ .. [2] `NSRDB GOES Full Disc V4.0.0 @@ -644,19 +632,19 @@ def get_nsrdb_psm4_full_disc(latitude, longitude, api_key, email, # the CSV is in the response content as a UTF-8 bytestring # to use pandas we need to create a file buffer from the response fbuf = io.StringIO(response.content.decode('utf-8')) - return parse_nsrdb_psm4(fbuf, map_variables) + return read_nsrdb_psm4(fbuf, map_variables) -def parse_nsrdb_psm4(fbuf, map_variables=True): +def read_nsrdb_psm4(filename, map_variables=True): """ - Parse an NSRDB PSM4 weather file (formatted as SAM CSV). + Read an NSRDB PSM4 weather file (formatted as SAM CSV). The NSRDB is described in [1]_ and the SAM CSV format is described in [2]_. Parameters ---------- - fbuf: file-like object - File-like object containing data to read. + filename: str, path-like, or buffer + Filename or in-memory buffer of a file containing data to read. map_variables: bool, default True When true, renames columns of the Dataframe to pvlib variable names where applicable. See variable :const:`VARIABLE_MAP`. @@ -726,12 +714,19 @@ def parse_nsrdb_psm4(fbuf, map_variables=True): Examples -------- >>> # Read a local PSM4 file: + >>> df, metadata = iotools.read_nsrdb_psm4("data.csv") # doctest: +SKIP + + >>> # Read a file object or an in-memory buffer: >>> with open(filename, 'r') as f: # doctest: +SKIP - ... df, metadata = iotools.parse_nsrdb_psm4(f) # doctest: +SKIP + ... df, metadata = iotools.read_nsrdb_psm4(f) # doctest: +SKIP See Also -------- - pvlib.iotools.read_nsrdb_psm4, pvlib.iotools.get_psm4 + pvlib.iotools.get_nsrdb_psm4_aggregated + pvlib.iotools.get_nsrdb_psm4_tmy + pvlib.iotools.get_nsrdb_psm4_conus + pvlib.iotools.get_nsrdb_psm4_full_disc + pvlib.iotools.read_psm3 References ---------- @@ -740,11 +735,26 @@ def parse_nsrdb_psm4(fbuf, map_variables=True): .. [2] `Standard Time Series Data File Format `_ """ - # The first 2 lines of the response are headers with metadata - metadata_fields = fbuf.readline().split(',') - metadata_fields[-1] = metadata_fields[-1].strip() # strip trailing newline - metadata_values = fbuf.readline().split(',') - metadata_values[-1] = metadata_values[-1].strip() # strip trailing newline + with tools._file_context_manager(filename) as fbuf: + # The first 2 lines of the response are headers with metadata + metadata_fields = fbuf.readline().split(',') + metadata_values = fbuf.readline().split(',') + # get the column names so we can set the dtypes + columns = fbuf.readline().split(',') + columns[-1] = columns[-1].strip() # strip trailing newline + # Since the header has so many columns, excel saves blank cols in the + # data below the header lines. + columns = [col for col in columns if col != ''] + dtypes = dict.fromkeys(columns, float) + dtypes.update({'Year': int, 'Month': int, 'Day': int, 'Hour': int, + 'Minute': int, 'Cloud Type': int, 'Fill Flag': int}) + + data = pd.read_csv( + fbuf, header=None, names=columns, usecols=columns, dtype=dtypes, + delimiter=',', lineterminator='\n') # skip carriage returns \r + + metadata_fields[-1] = metadata_fields[-1].strip() # trailing newline + metadata_values[-1] = metadata_values[-1].strip() # trailing newline metadata = dict(zip(metadata_fields, metadata_values)) # the response is all strings, so set some metadata types to numbers metadata['Local Time Zone'] = int(metadata['Local Time Zone']) @@ -752,22 +762,9 @@ def parse_nsrdb_psm4(fbuf, map_variables=True): metadata['Latitude'] = float(metadata['Latitude']) metadata['Longitude'] = float(metadata['Longitude']) metadata['Elevation'] = int(metadata['Elevation']) - # get the column names so we can set the dtypes - columns = fbuf.readline().split(',') - columns[-1] = columns[-1].strip() # strip trailing newline - # Since the header has so many columns, excel saves blank cols in the - # data below the header lines. - columns = [col for col in columns if col != ''] - dtypes = dict.fromkeys(columns, float) # all floats except datevec - dtypes.update(Year=int, Month=int, Day=int, Hour=int, Minute=int) - dtypes['Cloud Type'] = int - dtypes['Fill Flag'] = int - data = pd.read_csv( - fbuf, header=None, names=columns, usecols=columns, dtype=dtypes, - delimiter=',', lineterminator='\n') # skip carriage returns \r + # the response 1st 5 columns are a date vector, convert to datetime - dtidx = pd.to_datetime( - data[['Year', 'Month', 'Day', 'Hour', 'Minute']]) + dtidx = pd.to_datetime(data[['Year', 'Month', 'Day', 'Hour', 'Minute']]) # in USA all timezones are integers tz = 'Etc/GMT%+d' % -metadata['Time Zone'] data.index = pd.DatetimeIndex(dtidx).tz_localize(tz) @@ -779,41 +776,3 @@ def parse_nsrdb_psm4(fbuf, map_variables=True): metadata['altitude'] = metadata.pop('Elevation') return data, metadata - - -def read_nsrdb_psm4(filename, map_variables=True): - """ - Read an NSRDB PSM4 weather file (formatted as SAM CSV). - - The NSRDB is described in [1]_ and the SAM CSV format is described in [2]_. - - Parameters - ---------- - filename: str or path-like - Filename of a file containing data to read. - map_variables: bool, default True - When true, renames columns of the Dataframe to pvlib variable names - where applicable. See variable :const:`VARIABLE_MAP`. - - Returns - ------- - data : pandas.DataFrame - timeseries data from NREL PSM4 - metadata : dict - metadata from NREL PSM4 about the record, see - :func:`pvlib.iotools.parse_nsrdb_psm4` for fields - - See Also - -------- - pvlib.iotools.parse_nsrdb_psm4, pvlib.iotools.get_psm4 - - References - ---------- - .. [1] `NREL National Solar Radiation Database (NSRDB) - `_ - .. [2] `Standard Time Series Data File Format - `_ - """ - with open(str(filename), 'r') as fbuf: - content = parse_nsrdb_psm4(fbuf, map_variables) - return content diff --git a/pvlib/tools.py b/pvlib/tools.py index b08d061676..63406f4e0d 100644 --- a/pvlib/tools.py +++ b/pvlib/tools.py @@ -2,6 +2,7 @@ Collection of functions used in pvlib_python """ +import contextlib import datetime as dt import warnings @@ -559,3 +560,29 @@ def normalize_max2one(a): except ValueError: # fails for pandas objects res = a.div(a.abs().max(axis=0, skipna=True)) return res + + +def _file_context_manager(filename_or_object, mode='r'): + """ + Open a filename/path for reading, or pass a file-like object + through unchanged. + + Parameters + ---------- + filename_or_object : str, path-like, or file-like object + The filename/path or object to convert to an object + + Returns + ------- + context : context manager + A file-like object to be used via python's "with [context] as buffer:" + syntax. + """ + + if hasattr(filename_or_object, "read"): + # already a file-like object + context = contextlib.nullcontext(filename_or_object) + else: + # otherwise, assume a filename or path + context = open(str(filename_or_object), mode=mode) + return context diff --git a/tests/iotools/test_psm4.py b/tests/iotools/test_psm4.py index 3efe279a61..6447aed33b 100644 --- a/tests/iotools/test_psm4.py +++ b/tests/iotools/test_psm4.py @@ -150,22 +150,22 @@ def test_get_nsrdb_psm4_aggregated_errors( @pytest.fixture def io_input(request): - """file-like object for parse_nsrdb_psm4""" + """file-like object for read_nsrdb_psm4""" with MANUAL_TEST_DATA.open() as f: data = f.read() obj = StringIO(data) return obj -def test_parse_nsrdb_psm4(io_input): - """test parse_nsrdb_psm4""" - data, metadata = psm4.parse_nsrdb_psm4(io_input, map_variables=False) +def test_read_nsrdb_psm4_buffer(io_input): + """test read_nsrdb_psm4 with a file-like object as input""" + data, metadata = psm4.read_nsrdb_psm4(io_input, map_variables=False) expected = pd.read_csv(YEAR_TEST_DATA) assert_psm4_equal(data, metadata, expected) -def test_read_nsrdb_psm4(): - """test read_nsrdb_psm4""" +def test_read_nsrdb_psm4_path(): + """test read_nsrdb_psm4 with a file path as input""" data, metadata = psm4.read_nsrdb_psm4(MANUAL_TEST_DATA, map_variables=False) expected = pd.read_csv(YEAR_TEST_DATA) diff --git a/tests/test_tools.py b/tests/test_tools.py index 013716549b..821b9fec65 100644 --- a/tests/test_tools.py +++ b/tests/test_tools.py @@ -1,4 +1,8 @@ from datetime import datetime +from io import StringIO +import os +from pathlib import Path +import tempfile from zoneinfo import ZoneInfo import numpy as np @@ -252,3 +256,29 @@ def test_djd_to_datetime(): expected = datetime(1974, 6, 22, 23, 30, 15, tzinfo=ZoneInfo("UTC")) assert tools.djd_to_datetime(djd) == expected + + +def test__file_context_manager(): + with tempfile.TemporaryDirectory() as td: + # make a test file + filename = os.path.join(td, 'test.txt') + with open(filename, 'w') as fh: + fh.write('test content') + + # test with filename as string: + with tools._file_context_manager(filename) as obj: + assert obj.read() == "test content" + + # test with filename as Path: + with tools._file_context_manager(Path(filename)) as obj: + assert obj.read() == "test content" + + # test with file object: + with open(filename, "r") as f: + with tools._file_context_manager(f) as obj: + assert obj.read() == "test content" + + # test with buffer: + buffer = StringIO("test content") + with tools._file_context_manager(buffer) as obj: + assert obj.read() == "test content"