From 568bce00085cc301029d0b39c39cf15b97aa1d3d Mon Sep 17 00:00:00 2001 From: bytedream Date: Sun, 15 Oct 2023 22:39:53 +0200 Subject: [PATCH] Manually implement filename sanitizing to allow the usage of file separators --- Cargo.lock | 11 ----- crunchy-cli-core/Cargo.toml | 1 - crunchy-cli-core/src/archive/command.rs | 2 +- crunchy-cli-core/src/download/command.rs | 2 +- crunchy-cli-core/src/utils/format.rs | 55 ++++++++++++------------ crunchy-cli-core/src/utils/os.rs | 48 +++++++++++++++++++++ 6 files changed, 77 insertions(+), 42 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 09cad3e..409a005 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -401,7 +401,6 @@ dependencies = [ "regex", "reqwest", "rustls-native-certs", - "sanitize-filename", "serde", "serde_json", "serde_plain", @@ -1517,16 +1516,6 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741" -[[package]] -name = "sanitize-filename" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ed72fbaf78e6f2d41744923916966c4fbe3d7c74e3037a8ee482f1115572603" -dependencies = [ - "lazy_static", - "regex", -] - [[package]] name = "schannel" version = "0.1.22" diff --git a/crunchy-cli-core/Cargo.toml b/crunchy-cli-core/Cargo.toml index fc298f3..e38cb1a 100644 --- a/crunchy-cli-core/Cargo.toml +++ b/crunchy-cli-core/Cargo.toml @@ -28,7 +28,6 @@ log = { version = "0.4", features = ["std"] } num_cpus = "1.16" regex = "1.9" reqwest = { version = "0.11", default-features = false, features = ["socks"] } -sanitize-filename = "0.5" serde = "1.0" serde_json = "1.0" serde_plain = "1.0" diff --git a/crunchy-cli-core/src/archive/command.rs b/crunchy-cli-core/src/archive/command.rs index bb4794f..3f455ed 100644 --- a/crunchy-cli-core/src/archive/command.rs +++ b/crunchy-cli-core/src/archive/command.rs @@ -173,7 +173,7 @@ impl Execute for Archive { downloader.add_format(download_format) } - let formatted_path = format.format_path((&self.output).into(), true); + let formatted_path = format.format_path((&self.output).into()); let (path, changed) = free_file(formatted_path.clone()); if changed && self.skip_existing { diff --git a/crunchy-cli-core/src/download/command.rs b/crunchy-cli-core/src/download/command.rs index 760bf38..9c9ecb4 100644 --- a/crunchy-cli-core/src/download/command.rs +++ b/crunchy-cli-core/src/download/command.rs @@ -165,7 +165,7 @@ impl Execute for Download { let mut downloader = download_builder.clone().build(); downloader.add_format(download_format); - let formatted_path = format.format_path((&self.output).into(), true); + let formatted_path = format.format_path((&self.output).into()); let (path, changed) = free_file(formatted_path.clone()); if changed && self.skip_existing { diff --git a/crunchy-cli-core/src/utils/format.rs b/crunchy-cli-core/src/utils/format.rs index 618f7d5..f462263 100644 --- a/crunchy-cli-core/src/utils/format.rs +++ b/crunchy-cli-core/src/utils/format.rs @@ -1,6 +1,6 @@ use crate::utils::filter::real_dedup_vec; use crate::utils::log::tab_info; -use crate::utils::os::is_special_file; +use crate::utils::os::{is_special_file, sanitize}; use anyhow::Result; use chrono::Duration; use crunchyroll_rs::media::{Resolution, Stream, Subtitle, VariantData}; @@ -368,46 +368,45 @@ impl Format { } } - /// Formats the given string if it has specific pattern in it. It's possible to sanitize it which - /// removes characters which can cause failures if the output string is used as a file name. - pub fn format_path(&self, path: PathBuf, sanitize: bool) -> PathBuf { - let path = path - .to_string_lossy() - .to_string() - .replace("{title}", &self.title) + /// Formats the given string if it has specific pattern in it. It also sanitizes the filename. + pub fn format_path(&self, path: PathBuf) -> PathBuf { + let mut path = sanitize(path.to_string_lossy(), false); + path = path + .replace("{title}", &sanitize(&self.title, true)) .replace( "{audio}", - &self - .locales - .iter() - .map(|(a, _)| a.to_string()) - .collect::>() - .join("|"), + &sanitize( + self.locales + .iter() + .map(|(a, _)| a.to_string()) + .collect::>() + .join("|"), + true, + ), ) - .replace("{resolution}", &self.resolution.to_string()) - .replace("{series_id}", &self.series_id) - .replace("{series_name}", &self.series_name) - .replace("{season_id}", &self.season_id) - .replace("{season_name}", &self.season_title) + .replace("{resolution}", &sanitize(self.resolution.to_string(), true)) + .replace("{series_id}", &sanitize(&self.series_id, true)) + .replace("{series_name}", &sanitize(&self.series_name, true)) + .replace("{season_id}", &sanitize(&self.season_id, true)) + .replace("{season_name}", &sanitize(&self.season_title, true)) .replace( "{season_number}", - &format!("{:0>2}", self.season_number.to_string()), + &format!("{:0>2}", sanitize(self.season_number.to_string(), true)), ) - .replace("{episode_id}", &self.episode_id) + .replace("{episode_id}", &sanitize(&self.episode_id, true)) .replace( "{episode_number}", - &format!("{:0>2}", self.episode_number.to_string()), + &format!("{:0>2}", sanitize(&self.episode_number, true)), ) .replace( "{relative_episode_number}", - &self.relative_episode_number.unwrap_or_default().to_string(), + &sanitize( + self.relative_episode_number.unwrap_or_default().to_string(), + true, + ), ); - if sanitize { - PathBuf::from(sanitize_filename::sanitize(path)) - } else { - PathBuf::from(path) - } + PathBuf::from(path) } pub fn visual_output(&self, dst: &Path) { diff --git a/crunchy-cli-core/src/utils/os.rs b/crunchy-cli-core/src/utils/os.rs index f6d9ad0..1f76b90 100644 --- a/crunchy-cli-core/src/utils/os.rs +++ b/crunchy-cli-core/src/utils/os.rs @@ -1,4 +1,6 @@ use log::debug; +use regex::{Regex, RegexBuilder}; +use std::borrow::Cow; use std::io::ErrorKind; use std::path::{Path, PathBuf}; use std::process::{Command, Stdio}; @@ -78,3 +80,49 @@ pub fn free_file(mut path: PathBuf) -> (PathBuf, bool) { pub fn is_special_file>(path: P) -> bool { path.as_ref().exists() && !path.as_ref().is_file() && !path.as_ref().is_dir() } + +lazy_static::lazy_static! { + static ref ILLEGAL_RE: Regex = Regex::new(r#"[\?<>:\*\|":]"#).unwrap(); + static ref CONTROL_RE: Regex = Regex::new(r"[\x00-\x1f\x80-\x9f]").unwrap(); + static ref RESERVED_RE: Regex = Regex::new(r"^\.+$").unwrap(); + static ref WINDOWS_RESERVED_RE: Regex = RegexBuilder::new(r"(?i)^(con|prn|aux|nul|com[0-9]|lpt[0-9])(\..*)?$") + .case_insensitive(true) + .build() + .unwrap(); + static ref WINDOWS_TRAILING_RE: Regex = Regex::new(r"[\. ]+$").unwrap(); +} + +/// Sanitizes a filename with the option to include/exclude the path separator from sanitizing. This +/// is based of the implementation of the +/// [`sanitize-filename`](https://crates.io/crates/sanitize-filename) crate. +pub fn sanitize>(path: S, include_path_separator: bool) -> String { + let path = Cow::from(path.as_ref()); + + let path = ILLEGAL_RE.replace_all(&path, ""); + let path = CONTROL_RE.replace_all(&path, ""); + let path = RESERVED_RE.replace(&path, ""); + + let collect = |name: String| { + if name.len() > 255 { + name[..255].to_string() + } else { + name + } + }; + + if cfg!(windows) { + let path = WINDOWS_RESERVED_RE.replace(&path, ""); + let path = WINDOWS_TRAILING_RE.replace(&path, ""); + let mut path = path.to_string(); + if include_path_separator { + path = path.replace(['\\', '/'], ""); + } + collect(path) + } else { + let mut path = path.to_string(); + if include_path_separator { + path = path.replace('/', ""); + } + collect(path) + } +}