Skip to content

Implement #624: Use shorter hashes with CPM_SOURCE_CACHE #631

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 56 additions & 0 deletions cmake/CPM.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,52 @@ function(cpm_package_name_from_git_uri URI RESULT)
endif()
endfunction()


# Find the shortest hash that can be used
# eg, if origin_hash is cccb77ae9609d2768ed80dd42cec54f77b1f1455
# the following files will be checked, until one is found that
# is either empty (allowing us to assign origin_hash), or whose contents matches
# ${origin_hash}
#
# - .../cccb.hash
# - .../cccb77ae.hash
# - .../cccb77ae9609.hash
# - .../cccb77ae9609d276.hash
# etc
# We will be able to use a shorter path with very high probability, but in the
# (rare) event that the first couple characters collide, we will check
# longer and longer substrings.
function(cpm_get_shortest_hash source_cache_dir origin_hash short_hash_output_var)
foreach(len RANGE 4 40 4)
string(SUBSTRING "${origin_hash}" 0 ${len} short_hash)
set(hash_lock ${source_cache_dir}/${short_hash}.lock)
set(hash_fp ${source_cache_dir}/${short_hash}.hash)
# Take a lock, so we don't have a race condition with another instance
# of cmake. We will release this lock when we can, however, if there
# is an error, we want to ensure it gets released on it's own on exit
# from the function.
file(LOCK ${hash_lock} GUARD FUNCTION)

# Load the contents of .../${short_hash}.hash
file(TOUCH ${hash_fp})
file(READ ${hash_fp} hash_fp_contents)

if(hash_fp_contents STREQUAL "")
# Write the origin hash
file(WRITE ${hash_fp} ${origin_hash})
file(LOCK ${hash_lock} RELEASE)
break()
elseif(hash_fp_contents STREQUAL origin_hash)
file(LOCK ${hash_lock} RELEASE)
break()
else()
file(LOCK ${hash_lock} RELEASE)
endif()
endforeach()
set(${short_hash_output_var} "${short_hash}" PARENT_SCOPE)
endfunction()


# Try to infer package name and version from a url
function(cpm_package_name_and_ver_from_url url outName outVer)
if(url MATCHES "[/\\?]([a-zA-Z0-9_\\.-]+)\\.(tar|tar\\.gz|tar\\.bz2|zip|ZIP)(\\?|/|$)")
Expand Down Expand Up @@ -798,9 +844,19 @@ function(CPMAddPackage)
set(download_directory ${CPM_SOURCE_CACHE}/${lower_case_name}/${CPM_ARGS_CUSTOM_CACHE_KEY})
elseif(CPM_USE_NAMED_CACHE_DIRECTORIES)
string(SHA1 origin_hash "${origin_parameters};NEW_CACHE_STRUCTURE_TAG")
cpm_get_shortest_hash(
"${CPM_SOURCE_CACHE}/${lower_case_name}" # source cache directory
"${origin_hash}" # Input hash
origin_hash # Computed hash
)
set(download_directory ${CPM_SOURCE_CACHE}/${lower_case_name}/${origin_hash}/${CPM_ARGS_NAME})
else()
string(SHA1 origin_hash "${origin_parameters}")
cpm_get_shortest_hash(
"${CPM_SOURCE_CACHE}/${lower_case_name}" # source cache directory
"${origin_hash}" # Input hash
origin_hash # Computed hash
)
set(download_directory ${CPM_SOURCE_CACHE}/${lower_case_name}/${origin_hash})
endif()
# Expand `download_directory` relative path. This is important because EXISTS doesn't work for
Expand Down
13 changes: 13 additions & 0 deletions cmake/testing.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -79,3 +79,16 @@ function(ASSERT_NOT_EXISTS file)
message(FATAL_ERROR "assertion failed: file ${file} exists")
endif()
endfunction()

function(ASSERT_CONTENTS_EQUAL file content)
if(EXISTS ${file})
file(READ ${file} file_content)
if(content STREQUAL file_content)
message(STATUS "test passed: '${file}' exists and contains '${content}'")
else()
message(FATAL_ERROR "assertion failed: file '${file}' does not contain expected content.")
endif()
else()
message(FATAL_ERROR "assertion failed: file '${file} does not exist")
endif()
endfunction()
13 changes: 12 additions & 1 deletion test/integration/test_source_cache.rb
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,17 @@ def check_package_cache(name, ver, dir_sha1)
assert_equal ver, package.ver
expected_parent_dir = File.join(@cache_dir, name.downcase)
assert package.src_dir.start_with?(expected_parent_dir), "#{package.src_dir} must be in #{expected_parent_dir}"
assert_equal dir_sha1, File.basename(package.src_dir)

# The hash has been shortened by cpm_get_shortest_hash. The following
# should hold:
# - The short hash should be a prefix of the input hash
# - There should be a file ".../${short_hash}.hash" which matches the full hash
short_hash = File.basename(package.src_dir)
assert dir_sha1.start_with?(short_hash), "short_hash should be a prefix of dir_sha1"

# Check that the full hash is stored in the .hash file
hash_file = "#{package.src_dir}.hash"
assert File.exist?(hash_file), "Hash file #{hash_file} should exist"
assert_equal dir_sha1, File.read(hash_file), "Hash file should contain the full original hash"
end
end
84 changes: 84 additions & 0 deletions test/unit/get_shortest_hash.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
cmake_minimum_required(VERSION 3.14 FATAL_ERROR)

include(${CPM_PATH}/CPM.cmake)
include(${CPM_PATH}/testing.cmake)

# Random suffix
string(RANDOM LENGTH 6 ALPHABET "0123456789abcdef" tmpdir_suffix)

# Seconds since epoch
string(TIMESTAMP tmpdir_base "%s" UTC)

set(tmp "get_shortest_hash-${tmpdir_base}-${tmpdir_suffix}")

if(IS_DIRECTORY ${tmp})
message(FATAL_ERROR "Test directory ${tmp} already exists")
endif()

file(MAKE_DIRECTORY "${tmp}")

# 1. Sanity check: none of these directories should exist yet

assert_not_exists(${tmp}/cccb.hash)
assert_not_exists(${tmp}/cccb77ae.hash)
assert_not_exists(${tmp}/cccb77ae9609.hash)
assert_not_exists(${tmp}/cccb77ae9608.hash)
assert_not_exists(${tmp}/cccb77be.hash)

# 2. The directory is empty, so it should get a 4-character hash
cpm_get_shortest_hash(${tmp} "cccb77ae9609d2768ed80dd42cec54f77b1f1455" hash)
assert_equal(${hash} "cccb")
assert_contents_equal(${tmp}/cccb.hash cccb77ae9609d2768ed80dd42cec54f77b1f1455)

# 3. Calling the function with a new hash that differs subtly should result
# in more characters being used, enough to uniquely identify the hash

cpm_get_shortest_hash(${tmp} "cccb77ae9609d2768ed80dd42cec54f77b1f1456" hash)
assert_equal(${hash} "cccb77ae")
assert_contents_equal(${tmp}/cccb77ae.hash cccb77ae9609d2768ed80dd42cec54f77b1f1456)

cpm_get_shortest_hash(${tmp} "cccb77ae9609d2768ed80dd42cec54f77b1f1457" hash)
assert_equal(${hash} "cccb77ae9609")
assert_contents_equal(${tmp}/cccb77ae9609.hash cccb77ae9609d2768ed80dd42cec54f77b1f1457)

cpm_get_shortest_hash(${tmp} "cccb77ae9608d2768ed80dd42cec54f77b1f1455" hash)
assert_equal(${hash} "cccb77ae9608")
assert_contents_equal(${tmp}/cccb77ae9608.hash cccb77ae9608d2768ed80dd42cec54f77b1f1455)

cpm_get_shortest_hash(${tmp} "cccb77be9609d2768ed80dd42cec54f77b1f1456" hash)
assert_equal(${hash} "cccb77be")
assert_contents_equal(${tmp}/cccb77be.hash cccb77be9609d2768ed80dd42cec54f77b1f1456)

# 4. The old file should still exist, and have the same content
assert_contents_equal(${tmp}/cccb.hash cccb77ae9609d2768ed80dd42cec54f77b1f1455)
assert_contents_equal(${tmp}/cccb77ae.hash cccb77ae9609d2768ed80dd42cec54f77b1f1456)
assert_contents_equal(${tmp}/cccb77ae9609.hash cccb77ae9609d2768ed80dd42cec54f77b1f1457)
assert_contents_equal(${tmp}/cccb77ae9608.hash cccb77ae9608d2768ed80dd42cec54f77b1f1455)
assert_contents_equal(${tmp}/cccb77be.hash cccb77be9609d2768ed80dd42cec54f77b1f1456)

# 5. Confirm idempotence: calling any of these function should produce the same hash
# as before (hash lookups work correctly once the .hash files are created)

cpm_get_shortest_hash(${tmp} "cccb77ae9609d2768ed80dd42cec54f77b1f1455" hash)
assert_equal(${hash} "cccb")
assert_contents_equal(${tmp}/cccb.hash cccb77ae9609d2768ed80dd42cec54f77b1f1455)

cpm_get_shortest_hash(${tmp} "cccb77ae9609d2768ed80dd42cec54f77b1f1456" hash)
assert_equal(${hash} "cccb77ae")
assert_contents_equal(${tmp}/cccb77ae.hash cccb77ae9609d2768ed80dd42cec54f77b1f1456)

cpm_get_shortest_hash(${tmp} "cccb77ae9609d2768ed80dd42cec54f77b1f1457" hash)
assert_equal(${hash} "cccb77ae9609")
assert_contents_equal(${tmp}/cccb77ae9609.hash cccb77ae9609d2768ed80dd42cec54f77b1f1457)

cpm_get_shortest_hash(${tmp} "cccb77ae9608d2768ed80dd42cec54f77b1f1455" hash)
assert_equal(${hash} "cccb77ae9608")
assert_contents_equal(${tmp}/cccb77ae9608.hash cccb77ae9608d2768ed80dd42cec54f77b1f1455)

cpm_get_shortest_hash(${tmp} "cccb77be9609d2768ed80dd42cec54f77b1f1456" hash)
assert_equal(${hash} "cccb77be")
assert_contents_equal(${tmp}/cccb77be.hash cccb77be9609d2768ed80dd42cec54f77b1f1456)

# 6. Cleanup - remove the temporary directory that we created

file(REMOVE_RECURSE ${tmp})