Skip to content

Commit 6dd7837

Browse files
parse http modified date to compress the cached representation
1 parent a913ca7 commit 6dd7837

File tree

1 file changed

+68
-16
lines changed

1 file changed

+68
-16
lines changed

src/pip/_internal/index/package_finder.py

Lines changed: 68 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
11
"""Routines related to PyPI, indexes"""
22

3+
import binascii
4+
import datetime
35
import enum
46
import functools
57
import itertools
68
import logging
79
import os
810
import re
11+
import time
912
from hashlib import sha256
1013
from pathlib import Path
1114
from typing import (
@@ -800,48 +803,54 @@ def evaluate_links(
800803

801804
return candidates
802805

803-
@staticmethod
806+
_HTTP_DATE_FORMAT = "%a, %d %b %Y %H:%M:%S %Z"
807+
808+
@classmethod
804809
def _try_load_http_cache_headers(
810+
cls,
805811
etag_path: Path,
806812
date_path: Path,
807813
checksum_path: Path,
808814
project_url: Link,
809815
headers: Dict[str, str],
810-
) -> Tuple[Optional[str], Optional[str], Optional[bytes]]:
816+
) -> Tuple[Optional[str], Optional[datetime.datetime], Optional[bytes]]:
811817
etag: Optional[str] = None
812818
try:
813819
etag = etag_path.read_text()
820+
etag = f'"{etag}"'
814821
logger.debug(
815822
"found cached etag for url %s at %s: %s",
816823
project_url,
817824
etag_path,
818825
etag,
819826
)
820-
headers["If-None-Match"] = f'"{etag}"'
827+
headers["If-None-Match"] = etag
821828
except OSError as e:
822829
logger.debug("no etag found for url %s (%s)", project_url, str(e))
823830

824-
date: Optional[str] = None
831+
date: Optional[datetime.datetime] = None
825832
try:
826-
date = date_path.read_text()
833+
date_bytes = date_path.read_bytes()
834+
date_int = int.from_bytes(date_bytes, byteorder="big", signed=False)
835+
date = datetime.datetime.fromtimestamp(date_int, tz=datetime.timezone.utc)
827836
logger.debug(
828-
"found cached date for url %s at %s: %s",
837+
"found cached date for url %s at %s: '%s'",
829838
project_url,
830839
date_path,
831840
date,
832841
)
833-
headers["If-Modified-Since"] = date
842+
headers["If-Modified-Since"] = date.strftime(cls._HTTP_DATE_FORMAT)
834843
except OSError as e:
835844
logger.debug("no date found for url %s (%s)", project_url, str(e))
836845

837846
checksum: Optional[bytes] = None
838847
try:
839848
checksum = checksum_path.read_bytes()
840849
logger.debug(
841-
"found checksum for url %s at %s: %s",
850+
"found checksum for url %s at %s: '%s'",
842851
project_url,
843852
checksum_path,
844-
checksum,
853+
binascii.b2a_base64(checksum, newline=False).decode("ascii"),
845854
)
846855
except OSError as e:
847856
logger.debug("no checksum found for url %s (%s)", project_url, str(e))
@@ -854,6 +863,11 @@ def _try_load_http_cache_headers(
854863
def _strip_quoted_value(cls, value: str) -> str:
855864
return cls._quoted_value.sub(r"\1", value)
856865

866+
_now_local = datetime.datetime.now().astimezone()
867+
_local_tz = _now_local.tzinfo
868+
assert _local_tz is not None
869+
_local_tz_name = _local_tz.tzname(_now_local)
870+
857871
@classmethod
858872
def _write_http_cache_info(
859873
cls,
@@ -864,7 +878,7 @@ def _write_http_cache_info(
864878
index_response: IndexContent,
865879
prev_etag: Optional[str],
866880
prev_checksum: Optional[bytes],
867-
) -> Tuple[Optional[str], Optional[str], bytes, bool]:
881+
) -> Tuple[Optional[str], Optional[datetime.datetime], bytes, bool]:
868882
hasher = sha256()
869883
hasher.update(index_response.content)
870884
new_checksum = hasher.digest()
@@ -894,18 +908,56 @@ def _write_http_cache_info(
894908
)
895909
assert page_unmodified
896910

897-
new_date: Optional[str] = index_response.date
898-
if new_date is None:
911+
new_date: Optional[datetime.datetime] = None
912+
date_str: Optional[str] = index_response.date
913+
if date_str is None:
899914
logger.debug(
900-
"no date could be parsed from response for url %s", project_url
915+
"no date header was provided in response for url %s", project_url
901916
)
917+
else:
918+
date_str = date_str.strip()
919+
new_time = time.strptime(date_str, cls._HTTP_DATE_FORMAT)
920+
new_date = datetime.datetime.strptime(date_str, cls._HTTP_DATE_FORMAT)
921+
# strptime() doesn't set the timezone according to the parsed %Z arg, which
922+
# may be any of "UTC", "GMT", or any element of `time.tzname`.
923+
if new_time.tm_zone in ["UTC", "GMT"]:
924+
logger.debug(
925+
"a UTC timezone was found in response for url %s", project_url
926+
)
927+
new_date = new_date.replace(tzinfo=datetime.timezone.utc)
928+
else:
929+
assert new_time.tm_zone in time.tzname, new_time
930+
logger.debug(
931+
"a local timezone %s was found in response for url %s",
932+
new_time.tm_zone,
933+
project_url,
934+
)
935+
if new_time.tm_zone == cls._local_tz_name:
936+
new_date = new_date.replace(tzinfo=cls._local_tz)
937+
else:
938+
logger.debug(
939+
"a local timezone %s had to be discarded in response %s",
940+
new_time.tm_zone,
941+
project_url,
942+
)
943+
new_date = None
944+
945+
if new_date is not None:
946+
timestamp = new_date.timestamp()
947+
# The timestamp will only have second resolution according to the parse
948+
# format string _HTTP_DATE_FORMAT.
949+
assert not (timestamp % 1), (new_date, timestamp)
950+
epoch = int(timestamp)
951+
assert epoch >= 0, (new_date, timestamp, epoch)
952+
date_bytes = epoch.to_bytes(length=4, byteorder="big", signed=False)
953+
date_path.write_bytes(date_bytes)
954+
955+
logger.debug('date "%s" written for url %s', new_date, project_url)
956+
if new_date is None:
902957
try:
903958
date_path.unlink()
904959
except OSError:
905960
pass
906-
else:
907-
logger.debug('date "%s" written for url %s', new_date, project_url)
908-
date_path.write_text(new_date)
909961

910962
return (new_etag, new_date, new_checksum, page_unmodified)
911963

0 commit comments

Comments
 (0)