1
Fork 0

Rollup merge of #109124 - ferrocene:pa-compression-mode, r=Mark-Simulacrum

Add `dist.compression-profile` option to control compression speed

PR #108534 reduced the size of compressed archives, but (as expected) it also resulted in way longer compression times and memory usage during compression.

It's desirable to keep status quo (smaller archives but more CI usage), but it should also be configurable so that downstream users don't have to waste that much time on CI. As a data point, this resulted in doubling the time of Ferrocene's dist jobs, and required us to increase the RAM allocation for one of such jobs.

This PR adds a new `config.toml` setting, `dist.compression-profile`. The values can be:

* `fast`: equivalent to the gzip and xz preset of "1"
* `balanced`: equivalent to the gzip and xz preset of "6" (the CLI defaults as far as I'm aware)
* `best`: equivalent to the gzip present of "9", and our custom xz profile

The default has also been moved back to `balanced`, to try and avoid the compression time regression for downstream users. I don't feel too strongly on the default, and I'm open to changing it.

Also, for the `best` profile the XZ settings do not match the "9" preset used by the CLI, and it might be confusing. Should we create a `custom-rustc-ci`/`ultra` profile for that?

r? ``@Mark-Simulacrum``
This commit is contained in:
nils 2023-03-21 13:00:23 +01:00 committed by GitHub
commit 09b1254eb2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 123 additions and 43 deletions

View file

@ -806,3 +806,9 @@ changelog-seen = 2
# #
# This list must be non-empty. # This list must be non-empty.
#compression-formats = ["gz", "xz"] #compression-formats = ["gz", "xz"]
# How much time should be spent compressing the tarballs. The better the
# compression profile, the longer compression will take.
#
# Available options: fast, balanced, best
#compression-profile = "fast"

View file

@ -191,6 +191,7 @@ pub struct Config {
pub dist_sign_folder: Option<PathBuf>, pub dist_sign_folder: Option<PathBuf>,
pub dist_upload_addr: Option<String>, pub dist_upload_addr: Option<String>,
pub dist_compression_formats: Option<Vec<String>>, pub dist_compression_formats: Option<Vec<String>>,
pub dist_compression_profile: String,
pub dist_include_mingw_linker: bool, pub dist_include_mingw_linker: bool,
// libstd features // libstd features
@ -703,6 +704,7 @@ define_config! {
src_tarball: Option<bool> = "src-tarball", src_tarball: Option<bool> = "src-tarball",
missing_tools: Option<bool> = "missing-tools", missing_tools: Option<bool> = "missing-tools",
compression_formats: Option<Vec<String>> = "compression-formats", compression_formats: Option<Vec<String>> = "compression-formats",
compression_profile: Option<String> = "compression-profile",
include_mingw_linker: Option<bool> = "include-mingw-linker", include_mingw_linker: Option<bool> = "include-mingw-linker",
} }
} }
@ -821,6 +823,7 @@ impl Config {
config.deny_warnings = true; config.deny_warnings = true;
config.bindir = "bin".into(); config.bindir = "bin".into();
config.dist_include_mingw_linker = true; config.dist_include_mingw_linker = true;
config.dist_compression_profile = "fast".into();
// set by build.rs // set by build.rs
config.build = TargetSelection::from_user(&env!("BUILD_TRIPLE")); config.build = TargetSelection::from_user(&env!("BUILD_TRIPLE"));
@ -1308,6 +1311,7 @@ impl Config {
config.dist_sign_folder = t.sign_folder.map(PathBuf::from); config.dist_sign_folder = t.sign_folder.map(PathBuf::from);
config.dist_upload_addr = t.upload_addr; config.dist_upload_addr = t.upload_addr;
config.dist_compression_formats = t.compression_formats; config.dist_compression_formats = t.compression_formats;
set(&mut config.dist_compression_profile, t.compression_profile);
set(&mut config.rust_dist_src, t.src_tarball); set(&mut config.rust_dist_src, t.src_tarball);
set(&mut config.missing_tools, t.missing_tools); set(&mut config.missing_tools, t.missing_tools);
set(&mut config.dist_include_mingw_linker, t.include_mingw_linker) set(&mut config.dist_include_mingw_linker, t.include_mingw_linker)

View file

@ -11,3 +11,7 @@ extended = true
[llvm] [llvm]
# Most users installing from source want to build all parts of the project from source, not just rustc itself. # Most users installing from source want to build all parts of the project from source, not just rustc itself.
download-ci-llvm = false download-ci-llvm = false
[dist]
# Use better compression when preparing tarballs.
compression-profile = "balanced"

View file

@ -318,6 +318,7 @@ impl<'a> Tarball<'a> {
assert!(!formats.is_empty(), "dist.compression-formats can't be empty"); assert!(!formats.is_empty(), "dist.compression-formats can't be empty");
cmd.arg("--compression-formats").arg(formats.join(",")); cmd.arg("--compression-formats").arg(formats.join(","));
} }
cmd.args(&["--compression-profile", &self.builder.config.dist_compression_profile]);
self.builder.run(&mut cmd); self.builder.run(&mut cmd);
// Ensure there are no symbolic links in the tarball. In particular, // Ensure there are no symbolic links in the tarball. In particular,

View file

@ -58,6 +58,7 @@ RUST_CONFIGURE_ARGS="$RUST_CONFIGURE_ARGS --disable-manage-submodules"
RUST_CONFIGURE_ARGS="$RUST_CONFIGURE_ARGS --enable-locked-deps" RUST_CONFIGURE_ARGS="$RUST_CONFIGURE_ARGS --enable-locked-deps"
RUST_CONFIGURE_ARGS="$RUST_CONFIGURE_ARGS --enable-cargo-native-static" RUST_CONFIGURE_ARGS="$RUST_CONFIGURE_ARGS --enable-cargo-native-static"
RUST_CONFIGURE_ARGS="$RUST_CONFIGURE_ARGS --set rust.codegen-units-std=1" RUST_CONFIGURE_ARGS="$RUST_CONFIGURE_ARGS --set rust.codegen-units-std=1"
RUST_CONFIGURE_ARGS="$RUST_CONFIGURE_ARGS --set dist.compression-profile=best"
# Only produce xz tarballs on CI. gz tarballs will be generated by the release # Only produce xz tarballs on CI. gz tarballs will be generated by the release
# process by recompressing the existing xz ones. This decreases the storage # process by recompressing the existing xz ones. This decreases the storage

View file

@ -1,7 +1,7 @@
use super::Scripter; use super::Scripter;
use super::Tarballer; use super::Tarballer;
use crate::{ use crate::{
compression::{CompressionFormat, CompressionFormats}, compression::{CompressionFormat, CompressionFormats, CompressionProfile},
util::*, util::*,
}; };
use anyhow::{bail, Context, Result}; use anyhow::{bail, Context, Result};
@ -48,6 +48,10 @@ actor! {
#[clap(value_name = "DIR")] #[clap(value_name = "DIR")]
output_dir: String = "./dist", output_dir: String = "./dist",
/// The profile used to compress the tarball.
#[clap(value_name = "FORMAT", default_value_t)]
compression_profile: CompressionProfile,
/// The formats used to compress the tarball /// The formats used to compress the tarball
#[clap(value_name = "FORMAT", default_value_t)] #[clap(value_name = "FORMAT", default_value_t)]
compression_formats: CompressionFormats, compression_formats: CompressionFormats,
@ -153,6 +157,7 @@ impl Combiner {
.work_dir(self.work_dir) .work_dir(self.work_dir)
.input(self.package_name) .input(self.package_name)
.output(path_to_str(&output)?.into()) .output(path_to_str(&output)?.into())
.compression_profile(self.compression_profile)
.compression_formats(self.compression_formats.clone()); .compression_formats(self.compression_formats.clone());
tarballer.run()?; tarballer.run()?;

View file

@ -4,6 +4,37 @@ use rayon::prelude::*;
use std::{convert::TryFrom, fmt, io::Read, io::Write, path::Path, str::FromStr}; use std::{convert::TryFrom, fmt, io::Read, io::Write, path::Path, str::FromStr};
use xz2::{read::XzDecoder, write::XzEncoder}; use xz2::{read::XzDecoder, write::XzEncoder};
#[derive(Default, Debug, Copy, Clone)]
pub enum CompressionProfile {
Fast,
#[default]
Balanced,
Best,
}
impl FromStr for CompressionProfile {
type Err = Error;
fn from_str(input: &str) -> Result<Self, Error> {
Ok(match input {
"fast" => Self::Fast,
"balanced" => Self::Balanced,
"best" => Self::Best,
other => anyhow::bail!("invalid compression profile: {other}"),
})
}
}
impl fmt::Display for CompressionProfile {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
CompressionProfile::Fast => f.write_str("fast"),
CompressionProfile::Balanced => f.write_str("balanced"),
CompressionProfile::Best => f.write_str("best"),
}
}
}
#[derive(Debug, Copy, Clone)] #[derive(Debug, Copy, Clone)]
pub enum CompressionFormat { pub enum CompressionFormat {
Gz, Gz,
@ -26,7 +57,11 @@ impl CompressionFormat {
} }
} }
pub(crate) fn encode(&self, path: impl AsRef<Path>) -> Result<Box<dyn Encoder>, Error> { pub(crate) fn encode(
&self,
path: impl AsRef<Path>,
profile: CompressionProfile,
) -> Result<Box<dyn Encoder>, Error> {
let mut os = path.as_ref().as_os_str().to_os_string(); let mut os = path.as_ref().as_os_str().to_os_string();
os.push(format!(".{}", self.extension())); os.push(format!(".{}", self.extension()));
let path = Path::new(&os); let path = Path::new(&os);
@ -37,49 +72,64 @@ impl CompressionFormat {
let file = crate::util::create_new_file(path)?; let file = crate::util::create_new_file(path)?;
Ok(match self { Ok(match self {
CompressionFormat::Gz => Box::new(GzEncoder::new(file, flate2::Compression::best())), CompressionFormat::Gz => Box::new(GzEncoder::new(
file,
match profile {
CompressionProfile::Fast => flate2::Compression::fast(),
CompressionProfile::Balanced => flate2::Compression::new(6),
CompressionProfile::Best => flate2::Compression::best(),
},
)),
CompressionFormat::Xz => { CompressionFormat::Xz => {
let mut filters = xz2::stream::Filters::new(); let encoder = match profile {
// the preset is overridden by the other options so it doesn't matter CompressionProfile::Fast => {
let mut lzma_ops = xz2::stream::LzmaOptions::new_preset(9).unwrap(); xz2::stream::MtStreamBuilder::new().threads(6).preset(1).encoder().unwrap()
// This sets the overall dictionary size, which is also how much memory (baseline) }
// is needed for decompression. CompressionProfile::Balanced => {
lzma_ops.dict_size(64 * 1024 * 1024); xz2::stream::MtStreamBuilder::new().threads(6).preset(6).encoder().unwrap()
// Use the best match finder for compression ratio. }
lzma_ops.match_finder(xz2::stream::MatchFinder::BinaryTree4); CompressionProfile::Best => {
lzma_ops.mode(xz2::stream::Mode::Normal); let mut filters = xz2::stream::Filters::new();
// Set nice len to the maximum for best compression ratio // the preset is overridden by the other options so it doesn't matter
lzma_ops.nice_len(273); let mut lzma_ops = xz2::stream::LzmaOptions::new_preset(9).unwrap();
// Set depth to a reasonable value, 0 means auto, 1000 is somwhat high but gives // This sets the overall dictionary size, which is also how much memory (baseline)
// good results. // is needed for decompression.
lzma_ops.depth(1000); lzma_ops.dict_size(64 * 1024 * 1024);
// 2 is the default and does well for most files // Use the best match finder for compression ratio.
lzma_ops.position_bits(2); lzma_ops.match_finder(xz2::stream::MatchFinder::BinaryTree4);
// 0 is the default and does well for most files lzma_ops.mode(xz2::stream::Mode::Normal);
lzma_ops.literal_position_bits(0); // Set nice len to the maximum for best compression ratio
// 3 is the default and does well for most files lzma_ops.nice_len(273);
lzma_ops.literal_context_bits(3); // Set depth to a reasonable value, 0 means auto, 1000 is somwhat high but gives
// good results.
lzma_ops.depth(1000);
// 2 is the default and does well for most files
lzma_ops.position_bits(2);
// 0 is the default and does well for most files
lzma_ops.literal_position_bits(0);
// 3 is the default and does well for most files
lzma_ops.literal_context_bits(3);
filters.lzma2(&lzma_ops); filters.lzma2(&lzma_ops);
let mut builder = xz2::stream::MtStreamBuilder::new(); let mut builder = xz2::stream::MtStreamBuilder::new();
builder.filters(filters); builder.filters(filters);
// On 32-bit platforms limit ourselves to 3 threads, otherwise we exceed memory // On 32-bit platforms limit ourselves to 3 threads, otherwise we exceed memory
// usage this process can take. In the future we'll likely only do super-fast // usage this process can take. In the future we'll likely only do super-fast
// compression in CI and move this heavyweight processing to promote-release (which // compression in CI and move this heavyweight processing to promote-release (which
// is always 64-bit and can run on big-memory machines) but for now this lets us // is always 64-bit and can run on big-memory machines) but for now this lets us
// move forward. // move forward.
if std::mem::size_of::<usize>() == 4 { if std::mem::size_of::<usize>() == 4 {
builder.threads(3); builder.threads(3);
} else { } else {
builder.threads(6); builder.threads(6);
} }
builder.encoder().unwrap()
}
};
let compressor = XzEncoder::new_stream( let compressor = XzEncoder::new_stream(std::io::BufWriter::new(file), encoder);
std::io::BufWriter::new(file),
builder.encoder().unwrap(),
);
Box::new(compressor) Box::new(compressor)
} }
}) })

View file

@ -1,6 +1,6 @@
use super::Scripter; use super::Scripter;
use super::Tarballer; use super::Tarballer;
use crate::compression::CompressionFormats; use crate::compression::{CompressionFormats, CompressionProfile};
use crate::util::*; use crate::util::*;
use anyhow::{bail, format_err, Context, Result}; use anyhow::{bail, format_err, Context, Result};
use std::collections::BTreeSet; use std::collections::BTreeSet;
@ -54,6 +54,10 @@ actor! {
#[clap(value_name = "DIR")] #[clap(value_name = "DIR")]
output_dir: String = "./dist", output_dir: String = "./dist",
/// The profile used to compress the tarball.
#[clap(value_name = "FORMAT", default_value_t)]
compression_profile: CompressionProfile,
/// The formats used to compress the tarball /// The formats used to compress the tarball
#[clap(value_name = "FORMAT", default_value_t)] #[clap(value_name = "FORMAT", default_value_t)]
compression_formats: CompressionFormats, compression_formats: CompressionFormats,
@ -113,6 +117,7 @@ impl Generator {
.work_dir(self.work_dir) .work_dir(self.work_dir)
.input(self.package_name) .input(self.package_name)
.output(path_to_str(&output)?.into()) .output(path_to_str(&output)?.into())
.compression_profile(self.compression_profile)
.compression_formats(self.compression_formats.clone()); .compression_formats(self.compression_formats.clone());
tarballer.run()?; tarballer.run()?;

View file

@ -6,7 +6,7 @@ use tar::{Builder, Header};
use walkdir::WalkDir; use walkdir::WalkDir;
use crate::{ use crate::{
compression::{CombinedEncoder, CompressionFormats}, compression::{CombinedEncoder, CompressionFormats, CompressionProfile},
util::*, util::*,
}; };
@ -25,6 +25,10 @@ actor! {
#[clap(value_name = "DIR")] #[clap(value_name = "DIR")]
work_dir: String = "./workdir", work_dir: String = "./workdir",
/// The profile used to compress the tarball.
#[clap(value_name = "FORMAT", default_value_t)]
compression_profile: CompressionProfile,
/// The formats used to compress the tarball. /// The formats used to compress the tarball.
#[clap(value_name = "FORMAT", default_value_t)] #[clap(value_name = "FORMAT", default_value_t)]
compression_formats: CompressionFormats, compression_formats: CompressionFormats,
@ -38,7 +42,7 @@ impl Tarballer {
let encoder = CombinedEncoder::new( let encoder = CombinedEncoder::new(
self.compression_formats self.compression_formats
.iter() .iter()
.map(|f| f.encode(&tarball_name)) .map(|f| f.encode(&tarball_name, self.compression_profile))
.collect::<Result<Vec<_>>>()?, .collect::<Result<Vec<_>>>()?,
); );