Skip to content

Commit e9c6ec3

Browse files
add shallow download network utilities
make types pass add --shallow-wheels cli arg add news rename news make the metadata test pass on windows use --shallow-wheels unconditionally and remove the cli arg download all wheels at the end of the run add a hack to avoid signal() erroring in a background thread avoid using shallow wheels for non-remote file paths add --unstable-feature=shallow_wheels!
1 parent acab2ee commit e9c6ec3

22 files changed

+868
-4
lines changed

news/8448.feature

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Add a set of utilities in ``pip._internal.network.shallow`` for fetching metadata from remote wheel files without downloading the entire file. Link these utilities into the v2 resolver by adding a new ShallowWheelDistribution AbstractDistribution subclass. Expose this behavior via a --unstable-feature=shallow_wheels command-line option to ``pip download``. This produces a marked performance improvement.

src/pip/_internal/cli/cmdoptions.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -919,7 +919,7 @@ def check_list_path_option(options):
919919
metavar='feature',
920920
action='append',
921921
default=[],
922-
choices=['resolver'],
922+
choices=['resolver', 'shallow_wheels'],
923923
help=SUPPRESS_HELP, # TODO: Enable this when the resolver actually works.
924924
# help='Enable unstable feature(s) that may be backward incompatible.',
925925
) # type: Callable[..., Option]

src/pip/_internal/cli/progress_bars.py

+16-3
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from pip._internal.utils.typing import MYPY_CHECK_RUNNING
1515

1616
if MYPY_CHECK_RUNNING:
17-
from typing import Any, Dict, List
17+
from typing import Any, Dict, List, Optional
1818

1919
try:
2020
from pip._vendor import colorama
@@ -24,6 +24,18 @@
2424
colorama = None
2525

2626

27+
def _signal_unless_backgrounded(signum, handler):
28+
# type: (int, Any) -> Optional[Any]
29+
try:
30+
return signal(signum, handler)
31+
except ValueError:
32+
# FIXME: this otherwise doesn't work when called from a non-main
33+
# thread. This therefore fails if we try to download more than one
34+
# wheel at once via threading, which calls back to Downloader, which
35+
# uses this progress bar.
36+
return None
37+
38+
2739
def _select_progress_class(preferred, fallback):
2840
# type: (Bar, Bar) -> Bar
2941
encoding = getattr(preferred.file, "encoding", None)
@@ -84,7 +96,8 @@ def __init__(self, *args, **kwargs):
8496
**kwargs
8597
)
8698

87-
self.original_handler = signal(SIGINT, self.handle_sigint)
99+
self.original_handler = _signal_unless_backgrounded(
100+
SIGINT, self.handle_sigint)
88101

89102
# If signal() returns None, the previous handler was not installed from
90103
# Python, and we cannot restore it. This probably should not happen,
@@ -103,7 +116,7 @@ def finish(self):
103116
normally, or gets interrupted.
104117
"""
105118
super(InterruptibleMixin, self).finish() # type: ignore
106-
signal(SIGINT, self.original_handler)
119+
_signal_unless_backgrounded(SIGINT, self.original_handler)
107120

108121
def handle_sigint(self, signum, frame): # type: ignore
109122
"""

src/pip/_internal/cli/req_command.py

+1
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,7 @@ def make_requirement_preparer(
231231
finder=finder,
232232
require_hashes=options.require_hashes,
233233
use_user_site=use_user_site,
234+
use_shallow_wheels=('shallow_wheels' in options.unstable_features),
234235
)
235236

236237
@staticmethod

src/pip/_internal/commands/download.py

+1
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,7 @@ def run(self, options, args):
133133
requirement_set = resolver.resolve(
134134
reqs, check_supported_wheels=True
135135
)
136+
requirement_set.perform_all_final_hydration()
136137

137138
downloaded = ' '.join([req.name # type: ignore
138139
for req in requirement_set.requirements.values()

src/pip/_internal/commands/install.py

+1
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,7 @@ def run(self, options, args):
326326
requirement_set = resolver.resolve(
327327
reqs, check_supported_wheels=not options.target_dir
328328
)
329+
requirement_set.perform_all_final_hydration()
329330

330331
try:
331332
pip_req = requirement_set.get_requirement("pip")

src/pip/_internal/commands/wheel.py

+1
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,7 @@ def run(self, options, args):
155155
requirement_set = resolver.resolve(
156156
reqs, check_supported_wheels=True
157157
)
158+
requirement_set.perform_all_final_hydration()
158159

159160
reqs_to_build = [
160161
r for r in requirement_set.requirements.values()
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
import os
2+
3+
from pip._vendor.pkg_resources import DistInfoDistribution
4+
5+
from pip._internal.distributions.base import AbstractDistribution
6+
from pip._internal.network.shallow.httpfile import Context as HttpContext
7+
from pip._internal.network.shallow.httpfile import Url
8+
from pip._internal.network.shallow.wheel import Context as WheelContext
9+
from pip._internal.network.shallow.wheel import (
10+
ProjectName,
11+
WheelMetadataRequest,
12+
)
13+
from pip._internal.network.shallow.zipfile import Context as ZipContext
14+
from pip._internal.utils.typing import MYPY_CHECK_RUNNING
15+
from pip._internal.utils.wheel import WheelMetadata
16+
17+
if MYPY_CHECK_RUNNING:
18+
from typing import Any
19+
from pip._vendor.pkg_resources import Distribution
20+
from pip._internal.index.package_finder import PackageFinder
21+
from pip._internal.models.link import Link
22+
from pip._internal.network.download import Downloader
23+
from pip._internal.req import InstallRequirement
24+
25+
26+
class DistributionNeedingFinalHydration(DistInfoDistribution):
27+
def __init__(self, link, downloader, download_dir, *args, **kwargs):
28+
# type: (Link, Downloader, str, Any, Any) -> None
29+
super(DistributionNeedingFinalHydration, self).__init__(
30+
*args, **kwargs)
31+
self.final_link = link
32+
self.downloader = downloader
33+
self.download_dir = download_dir
34+
35+
def finally_hydrate(self):
36+
# type: () -> None
37+
download = self.downloader(self.final_link)
38+
output_filename = os.path.join(self.download_dir, download.filename)
39+
with open(output_filename, 'wb') as f:
40+
for chunk in download.chunks:
41+
f.write(chunk)
42+
43+
44+
class ShallowWheelDistribution(AbstractDistribution):
45+
"""Represents a wheel distribution.
46+
47+
This does not need any preparation as wheels can be directly unpacked.
48+
"""
49+
50+
def __init__(self, req, downloader, download_dir):
51+
# type: (InstallRequirement, Downloader, str) -> None
52+
super(ShallowWheelDistribution, self).__init__(req)
53+
self._downloader = downloader
54+
self._download_dir = download_dir
55+
56+
@property
57+
def _wheel_context(self):
58+
# type: () -> WheelContext
59+
http_ctx = HttpContext(self._downloader.get_session())
60+
zip_ctx = ZipContext(http_ctx)
61+
wheel_ctx = WheelContext(zip_ctx)
62+
return wheel_ctx
63+
64+
def get_pkg_resources_distribution(self):
65+
# type: () -> Distribution
66+
"""Loads the metadata from the shallow wheel file into memory and
67+
returns a Distribution that uses it, not relying on the wheel file or
68+
requirement.
69+
"""
70+
# Wheels are never unnamed.
71+
assert self.req.name
72+
assert self.req.link
73+
74+
project_name = ProjectName(self.req.name)
75+
remote_location = Url(self.req.link.url)
76+
77+
wheel_req = WheelMetadataRequest(
78+
url=remote_location,
79+
project_name=project_name,
80+
)
81+
metadata = (self
82+
._wheel_context
83+
.extract_wheel_metadata(wheel_req)
84+
.contents)
85+
86+
wheel_filename = self.req.link.filename
87+
wheel_metadata = WheelMetadata({'METADATA': metadata}, wheel_filename)
88+
89+
return DistributionNeedingFinalHydration(
90+
link=self.req.link,
91+
downloader=self._downloader,
92+
download_dir=self._download_dir,
93+
location=wheel_filename,
94+
metadata=wheel_metadata,
95+
project_name=project_name.name,
96+
)
97+
98+
def prepare_distribution_metadata(self, finder, build_isolation):
99+
# type: (PackageFinder, bool) -> None
100+
pass

src/pip/_internal/network/download.py

+4
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,10 @@ def __init__(
183183
self._session = session
184184
self._progress_bar = progress_bar
185185

186+
def get_session(self):
187+
# type: () -> PipSession
188+
return self._session
189+
186190
def __call__(self, link):
187191
# type: (Link) -> Download
188192
try:

src/pip/_internal/network/shallow/__init__.py

Whitespace-only changes.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
"""
2+
Download ranges of files over remote http.
3+
"""
4+
5+
from collections import namedtuple
6+
7+
from pip._vendor import requests
8+
9+
from pip._internal.utils.typing import MYPY_CHECK_RUNNING
10+
from pip._internal.utils.urls import get_url_scheme
11+
12+
if MYPY_CHECK_RUNNING:
13+
from typing import Any, Optional
14+
15+
16+
def url_is_remote(url):
17+
# type: (str) -> bool
18+
return get_url_scheme(url) in ['http', 'https']
19+
20+
21+
class Url(namedtuple('Url', ['url'])):
22+
23+
def __new__(cls, url):
24+
# type: (str) -> Url
25+
assert url_is_remote(url)
26+
return super(Url, cls).__new__(cls, url)
27+
28+
29+
class HttpFileRequest(namedtuple('HttpFileRequest', ['url'])):
30+
pass
31+
32+
33+
class Size(namedtuple('Size', ['size'])):
34+
def __new__(cls, size=0):
35+
# type: (int) -> Size
36+
assert size >= 0
37+
return super(Size, cls).__new__(cls, size)
38+
39+
def __add__(self, other):
40+
# type: (Any) -> Size
41+
assert isinstance(other, type(self))
42+
return Size(self.size + other.size)
43+
44+
def __sub__(self, other):
45+
# type: (Any) -> Size
46+
assert isinstance(other, type(self))
47+
return Size(self.size - other.size)
48+
49+
def __lt__(self, other):
50+
# type: (Any) -> bool
51+
assert isinstance(other, type(self))
52+
return self.size < other.size
53+
54+
def __le__(self, other):
55+
# type: (Any) -> bool
56+
assert isinstance(other, type(self))
57+
return self.size <= other.size
58+
59+
def __gt__(self, other):
60+
# type: (Any) -> bool
61+
assert isinstance(other, type(self))
62+
return self.size > other.size
63+
64+
def __ge__(self, other):
65+
# type: (Any) -> bool
66+
assert isinstance(other, type(self))
67+
return self.size >= other.size
68+
69+
70+
class ByteRange(namedtuple('ByteRange', ['start', 'end'])):
71+
def __new__(cls, start, end):
72+
# type: (Size, Size) -> ByteRange
73+
assert end >= start
74+
return super(ByteRange, cls).__new__(cls, start, end)
75+
76+
def as_bytes_range_header(self):
77+
# type: () -> str
78+
return "bytes={start}-{end}".format(
79+
start=self.start.size,
80+
# NB: The byte ranges accepted here are inclusive, so remove one
81+
# from the end.
82+
end=(self.end.size - 1))
83+
84+
def size_diff(self):
85+
# type: () -> Size
86+
return self.end - self.start
87+
88+
89+
class BytesRangeRequest(namedtuple('BytesRangeRequest', ['start', 'end'])):
90+
def __new__(cls, start, end):
91+
# type: (Optional[Size], Optional[Size]) -> BytesRangeRequest
92+
if (start is not None) and (end is not None):
93+
assert end >= start
94+
return super(BytesRangeRequest, cls).__new__(cls, start, end)
95+
96+
def get_byte_range(self, size):
97+
# type: (Size) -> ByteRange
98+
if self.start is None:
99+
start = 0
100+
else:
101+
assert self.start <= size, "???/start={start},size={size}".format(
102+
start=self.start, size=size)
103+
start = self.start.size
104+
105+
if self.end is None:
106+
end = size.size
107+
else:
108+
assert self.end <= size
109+
end = self.end.size
110+
111+
return ByteRange(start=Size(start), end=Size(end))
112+
113+
114+
class HttpFile(namedtuple('HttpFile', ['url', 'size'])):
115+
pass
116+
117+
118+
class Context(object):
119+
120+
def __init__(self, session=None):
121+
# type: (Optional[requests.Session]) -> None
122+
self.session = session or requests.Session()
123+
124+
def head(self, request):
125+
# type: (HttpFileRequest) -> HttpFile
126+
resp = self.session.head(request.url.url)
127+
resp.raise_for_status()
128+
assert (
129+
"bytes" in resp.headers["Accept-Ranges"]
130+
), "???/bytes was not found in range headers"
131+
content_length = int(resp.headers["Content-Length"])
132+
return HttpFile(url=request.url, size=Size(content_length))
133+
134+
def range_request(self, http_file, request):
135+
# type: (HttpFile, BytesRangeRequest) -> bytes
136+
byte_range = request.get_byte_range(http_file.size)
137+
resp = self.session.get(
138+
http_file.url.url,
139+
headers={"Range": byte_range.as_bytes_range_header()})
140+
resp.raise_for_status()
141+
142+
if Size(len(resp.content)) == http_file.size:
143+
# This request for the full URL contents is cached, and we should
144+
# return just the requested byte range.
145+
start = byte_range.start.size
146+
end = byte_range.end.size
147+
response_bytes = resp.content[start:end]
148+
else:
149+
response_bytes = resp.content
150+
151+
size_diff = byte_range.size_diff()
152+
assert (
153+
Size(len(response_bytes)) == size_diff
154+
), ("???/response should have been length {}, but got (size {}):\n{!r}"
155+
.format(size_diff, len(response_bytes), response_bytes))
156+
return response_bytes

0 commit comments

Comments
 (0)