1
1
"""Routines related to PyPI, indexes"""
2
2
3
+ import binascii
4
+ import datetime
3
5
import enum
4
6
import functools
5
7
import itertools
6
8
import logging
7
9
import os
8
10
import re
11
+ import time
9
12
from hashlib import sha256
10
13
from pathlib import Path
11
14
from typing import (
@@ -800,48 +803,54 @@ def evaluate_links(
800
803
801
804
return candidates
802
805
803
- @staticmethod
806
+ _HTTP_DATE_FORMAT = "%a, %d %b %Y %H:%M:%S %Z"
807
+
808
+ @classmethod
804
809
def _try_load_http_cache_headers (
810
+ cls ,
805
811
etag_path : Path ,
806
812
date_path : Path ,
807
813
checksum_path : Path ,
808
814
project_url : Link ,
809
815
headers : Dict [str , str ],
810
- ) -> Tuple [Optional [str ], Optional [str ], Optional [bytes ]]:
816
+ ) -> Tuple [Optional [str ], Optional [datetime . datetime ], Optional [bytes ]]:
811
817
etag : Optional [str ] = None
812
818
try :
813
819
etag = etag_path .read_text ()
820
+ etag = f'"{ etag } "'
814
821
logger .debug (
815
822
"found cached etag for url %s at %s: %s" ,
816
823
project_url ,
817
824
etag_path ,
818
825
etag ,
819
826
)
820
- headers ["If-None-Match" ] = f'" { etag } "'
827
+ headers ["If-None-Match" ] = etag
821
828
except OSError as e :
822
829
logger .debug ("no etag found for url %s (%s)" , project_url , str (e ))
823
830
824
- date : Optional [str ] = None
831
+ date : Optional [datetime . datetime ] = None
825
832
try :
826
- date = date_path .read_text ()
833
+ date_bytes = date_path .read_bytes ()
834
+ date_int = int .from_bytes (date_bytes , byteorder = "big" , signed = False )
835
+ date = datetime .datetime .fromtimestamp (date_int , tz = datetime .timezone .utc )
827
836
logger .debug (
828
- "found cached date for url %s at %s: %s " ,
837
+ "found cached date for url %s at %s: '%s' " ,
829
838
project_url ,
830
839
date_path ,
831
840
date ,
832
841
)
833
- headers ["If-Modified-Since" ] = date
842
+ headers ["If-Modified-Since" ] = date . strftime ( cls . _HTTP_DATE_FORMAT )
834
843
except OSError as e :
835
844
logger .debug ("no date found for url %s (%s)" , project_url , str (e ))
836
845
837
846
checksum : Optional [bytes ] = None
838
847
try :
839
848
checksum = checksum_path .read_bytes ()
840
849
logger .debug (
841
- "found checksum for url %s at %s: %s " ,
850
+ "found checksum for url %s at %s: '%s' " ,
842
851
project_url ,
843
852
checksum_path ,
844
- checksum ,
853
+ binascii . b2a_base64 ( checksum , newline = False ). decode ( "ascii" ) ,
845
854
)
846
855
except OSError as e :
847
856
logger .debug ("no checksum found for url %s (%s)" , project_url , str (e ))
@@ -854,6 +863,11 @@ def _try_load_http_cache_headers(
854
863
def _strip_quoted_value (cls , value : str ) -> str :
855
864
return cls ._quoted_value .sub (r"\1" , value )
856
865
866
+ _now_local = datetime .datetime .now ().astimezone ()
867
+ _local_tz = _now_local .tzinfo
868
+ assert _local_tz is not None
869
+ _local_tz_name = _local_tz .tzname (_now_local )
870
+
857
871
@classmethod
858
872
def _write_http_cache_info (
859
873
cls ,
@@ -864,7 +878,7 @@ def _write_http_cache_info(
864
878
index_response : IndexContent ,
865
879
prev_etag : Optional [str ],
866
880
prev_checksum : Optional [bytes ],
867
- ) -> Tuple [Optional [str ], Optional [str ], bytes , bool ]:
881
+ ) -> Tuple [Optional [str ], Optional [datetime . datetime ], bytes , bool ]:
868
882
hasher = sha256 ()
869
883
hasher .update (index_response .content )
870
884
new_checksum = hasher .digest ()
@@ -894,18 +908,56 @@ def _write_http_cache_info(
894
908
)
895
909
assert page_unmodified
896
910
897
- new_date : Optional [str ] = index_response .date
898
- if new_date is None :
911
+ new_date : Optional [datetime .datetime ] = None
912
+ date_str : Optional [str ] = index_response .date
913
+ if date_str is None :
899
914
logger .debug (
900
- "no date could be parsed from response for url %s" , project_url
915
+ "no date header was provided in response for url %s" , project_url
901
916
)
917
+ else :
918
+ date_str = date_str .strip ()
919
+ new_time = time .strptime (date_str , cls ._HTTP_DATE_FORMAT )
920
+ new_date = datetime .datetime .strptime (date_str , cls ._HTTP_DATE_FORMAT )
921
+ # strptime() doesn't set the timezone according to the parsed %Z arg, which
922
+ # may be any of "UTC", "GMT", or any element of `time.tzname`.
923
+ if new_time .tm_zone in ["UTC" , "GMT" ]:
924
+ logger .debug (
925
+ "a UTC timezone was found in response for url %s" , project_url
926
+ )
927
+ new_date = new_date .replace (tzinfo = datetime .timezone .utc )
928
+ else :
929
+ assert new_time .tm_zone in time .tzname , new_time
930
+ logger .debug (
931
+ "a local timezone %s was found in response for url %s" ,
932
+ new_time .tm_zone ,
933
+ project_url ,
934
+ )
935
+ if new_time .tm_zone == cls ._local_tz_name :
936
+ new_date = new_date .replace (tzinfo = cls ._local_tz )
937
+ else :
938
+ logger .debug (
939
+ "a local timezone %s had to be discarded in response %s" ,
940
+ new_time .tm_zone ,
941
+ project_url ,
942
+ )
943
+ new_date = None
944
+
945
+ if new_date is not None :
946
+ timestamp = new_date .timestamp ()
947
+ # The timestamp will only have second resolution according to the parse
948
+ # format string _HTTP_DATE_FORMAT.
949
+ assert not (timestamp % 1 ), (new_date , timestamp )
950
+ epoch = int (timestamp )
951
+ assert epoch >= 0 , (new_date , timestamp , epoch )
952
+ date_bytes = epoch .to_bytes (length = 4 , byteorder = "big" , signed = False )
953
+ date_path .write_bytes (date_bytes )
954
+
955
+ logger .debug ('date "%s" written for url %s' , new_date , project_url )
956
+ if new_date is None :
902
957
try :
903
958
date_path .unlink ()
904
959
except OSError :
905
960
pass
906
- else :
907
- logger .debug ('date "%s" written for url %s' , new_date , project_url )
908
- date_path .write_text (new_date )
909
961
910
962
return (new_etag , new_date , new_checksum , page_unmodified )
911
963
0 commit comments