Manually implement filename sanitizing to allow the usage of file separators

This commit is contained in:
bytedream 2023-10-15 22:39:53 +02:00
parent bbb5a78765
commit 568bce0008
6 changed files with 77 additions and 42 deletions

11
Cargo.lock generated
View file

@ -401,7 +401,6 @@ dependencies = [
"regex", "regex",
"reqwest", "reqwest",
"rustls-native-certs", "rustls-native-certs",
"sanitize-filename",
"serde", "serde",
"serde_json", "serde_json",
"serde_plain", "serde_plain",
@ -1517,16 +1516,6 @@ version = "1.0.15"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741" checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741"
[[package]]
name = "sanitize-filename"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2ed72fbaf78e6f2d41744923916966c4fbe3d7c74e3037a8ee482f1115572603"
dependencies = [
"lazy_static",
"regex",
]
[[package]] [[package]]
name = "schannel" name = "schannel"
version = "0.1.22" version = "0.1.22"

View file

@ -28,7 +28,6 @@ log = { version = "0.4", features = ["std"] }
num_cpus = "1.16" num_cpus = "1.16"
regex = "1.9" regex = "1.9"
reqwest = { version = "0.11", default-features = false, features = ["socks"] } reqwest = { version = "0.11", default-features = false, features = ["socks"] }
sanitize-filename = "0.5"
serde = "1.0" serde = "1.0"
serde_json = "1.0" serde_json = "1.0"
serde_plain = "1.0" serde_plain = "1.0"

View file

@ -173,7 +173,7 @@ impl Execute for Archive {
downloader.add_format(download_format) downloader.add_format(download_format)
} }
let formatted_path = format.format_path((&self.output).into(), true); let formatted_path = format.format_path((&self.output).into());
let (path, changed) = free_file(formatted_path.clone()); let (path, changed) = free_file(formatted_path.clone());
if changed && self.skip_existing { if changed && self.skip_existing {

View file

@ -165,7 +165,7 @@ impl Execute for Download {
let mut downloader = download_builder.clone().build(); let mut downloader = download_builder.clone().build();
downloader.add_format(download_format); downloader.add_format(download_format);
let formatted_path = format.format_path((&self.output).into(), true); let formatted_path = format.format_path((&self.output).into());
let (path, changed) = free_file(formatted_path.clone()); let (path, changed) = free_file(formatted_path.clone());
if changed && self.skip_existing { if changed && self.skip_existing {

View file

@ -1,6 +1,6 @@
use crate::utils::filter::real_dedup_vec; use crate::utils::filter::real_dedup_vec;
use crate::utils::log::tab_info; use crate::utils::log::tab_info;
use crate::utils::os::is_special_file; use crate::utils::os::{is_special_file, sanitize};
use anyhow::Result; use anyhow::Result;
use chrono::Duration; use chrono::Duration;
use crunchyroll_rs::media::{Resolution, Stream, Subtitle, VariantData}; use crunchyroll_rs::media::{Resolution, Stream, Subtitle, VariantData};
@ -368,47 +368,46 @@ impl Format {
} }
} }
/// Formats the given string if it has specific pattern in it. It's possible to sanitize it which /// Formats the given string if it has specific pattern in it. It also sanitizes the filename.
/// removes characters which can cause failures if the output string is used as a file name. pub fn format_path(&self, path: PathBuf) -> PathBuf {
pub fn format_path(&self, path: PathBuf, sanitize: bool) -> PathBuf { let mut path = sanitize(path.to_string_lossy(), false);
let path = path path = path
.to_string_lossy() .replace("{title}", &sanitize(&self.title, true))
.to_string()
.replace("{title}", &self.title)
.replace( .replace(
"{audio}", "{audio}",
&self &sanitize(
.locales self.locales
.iter() .iter()
.map(|(a, _)| a.to_string()) .map(|(a, _)| a.to_string())
.collect::<Vec<String>>() .collect::<Vec<String>>()
.join("|"), .join("|"),
true,
),
) )
.replace("{resolution}", &self.resolution.to_string()) .replace("{resolution}", &sanitize(self.resolution.to_string(), true))
.replace("{series_id}", &self.series_id) .replace("{series_id}", &sanitize(&self.series_id, true))
.replace("{series_name}", &self.series_name) .replace("{series_name}", &sanitize(&self.series_name, true))
.replace("{season_id}", &self.season_id) .replace("{season_id}", &sanitize(&self.season_id, true))
.replace("{season_name}", &self.season_title) .replace("{season_name}", &sanitize(&self.season_title, true))
.replace( .replace(
"{season_number}", "{season_number}",
&format!("{:0>2}", self.season_number.to_string()), &format!("{:0>2}", sanitize(self.season_number.to_string(), true)),
) )
.replace("{episode_id}", &self.episode_id) .replace("{episode_id}", &sanitize(&self.episode_id, true))
.replace( .replace(
"{episode_number}", "{episode_number}",
&format!("{:0>2}", self.episode_number.to_string()), &format!("{:0>2}", sanitize(&self.episode_number, true)),
) )
.replace( .replace(
"{relative_episode_number}", "{relative_episode_number}",
&self.relative_episode_number.unwrap_or_default().to_string(), &sanitize(
self.relative_episode_number.unwrap_or_default().to_string(),
true,
),
); );
if sanitize {
PathBuf::from(sanitize_filename::sanitize(path))
} else {
PathBuf::from(path) PathBuf::from(path)
} }
}
pub fn visual_output(&self, dst: &Path) { pub fn visual_output(&self, dst: &Path) {
info!( info!(

View file

@ -1,4 +1,6 @@
use log::debug; use log::debug;
use regex::{Regex, RegexBuilder};
use std::borrow::Cow;
use std::io::ErrorKind; use std::io::ErrorKind;
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use std::process::{Command, Stdio}; use std::process::{Command, Stdio};
@ -78,3 +80,49 @@ pub fn free_file(mut path: PathBuf) -> (PathBuf, bool) {
pub fn is_special_file<P: AsRef<Path>>(path: P) -> bool { pub fn is_special_file<P: AsRef<Path>>(path: P) -> bool {
path.as_ref().exists() && !path.as_ref().is_file() && !path.as_ref().is_dir() path.as_ref().exists() && !path.as_ref().is_file() && !path.as_ref().is_dir()
} }
lazy_static::lazy_static! {
static ref ILLEGAL_RE: Regex = Regex::new(r#"[\?<>:\*\|":]"#).unwrap();
static ref CONTROL_RE: Regex = Regex::new(r"[\x00-\x1f\x80-\x9f]").unwrap();
static ref RESERVED_RE: Regex = Regex::new(r"^\.+$").unwrap();
static ref WINDOWS_RESERVED_RE: Regex = RegexBuilder::new(r"(?i)^(con|prn|aux|nul|com[0-9]|lpt[0-9])(\..*)?$")
.case_insensitive(true)
.build()
.unwrap();
static ref WINDOWS_TRAILING_RE: Regex = Regex::new(r"[\. ]+$").unwrap();
}
/// Sanitizes a filename with the option to include/exclude the path separator from sanitizing. This
/// is based of the implementation of the
/// [`sanitize-filename`](https://crates.io/crates/sanitize-filename) crate.
pub fn sanitize<S: AsRef<str>>(path: S, include_path_separator: bool) -> String {
let path = Cow::from(path.as_ref());
let path = ILLEGAL_RE.replace_all(&path, "");
let path = CONTROL_RE.replace_all(&path, "");
let path = RESERVED_RE.replace(&path, "");
let collect = |name: String| {
if name.len() > 255 {
name[..255].to_string()
} else {
name
}
};
if cfg!(windows) {
let path = WINDOWS_RESERVED_RE.replace(&path, "");
let path = WINDOWS_TRAILING_RE.replace(&path, "");
let mut path = path.to_string();
if include_path_separator {
path = path.replace(['\\', '/'], "");
}
collect(path)
} else {
let mut path = path.to_string();
if include_path_separator {
path = path.replace('/', "");
}
collect(path)
}
}