diff --git a/crunchy-cli-core/src/archive/command.rs b/crunchy-cli-core/src/archive/command.rs index bf61fdf..600065a 100644 --- a/crunchy-cli-core/src/archive/command.rs +++ b/crunchy-cli-core/src/archive/command.rs @@ -6,7 +6,7 @@ use crate::utils::download::{ use crate::utils::ffmpeg::FFmpegPreset; use crate::utils::filter::Filter; use crate::utils::format::{Format, SingleFormat}; -use crate::utils::locale::all_locale_in_locales; +use crate::utils::locale::{all_locale_in_locales, resolve_locales, LanguageTagging}; use crate::utils::log::progress; use crate::utils::os::{free_file, has_ffmpeg, is_special_file}; use crate::utils::parse::parse_url; @@ -20,6 +20,7 @@ use crunchyroll_rs::Locale; use log::{debug, warn}; use regex::Regex; use std::fmt::{Display, Formatter}; +use std::iter::zip; use std::ops::Sub; use std::path::{Path, PathBuf}; use std::process::{Command, Stdio}; @@ -31,15 +32,19 @@ pub struct Archive { #[arg(help = format!("Audio languages. Can be used multiple times. \ Available languages are: {}", Locale::all().into_iter().map(|l| l.to_string()).collect::>().join(", ")))] #[arg(long_help = format!("Audio languages. Can be used multiple times. \ - Available languages are:\n {}", Locale::all().into_iter().map(|l| format!("{:<6} → {}", l.to_string(), l.to_human_readable())).collect::>().join("\n ")))] + Available languages are:\n {}\nIETF tagged language codes for the shown available locales can be used too", Locale::all().into_iter().map(|l| format!("{:<6} → {}", l.to_string(), l.to_human_readable())).collect::>().join("\n ")))] #[arg(short, long, default_values_t = vec![Locale::ja_JP, crate::utils::locale::system_locale()])] pub(crate) audio: Vec, + #[arg(skip)] + output_audio_locales: Vec, #[arg(help = format!("Subtitle languages. Can be used multiple times. \ Available languages are: {}", Locale::all().into_iter().map(|l| l.to_string()).collect::>().join(", ")))] #[arg(long_help = format!("Subtitle languages. Can be used multiple times. \ - Available languages are: {}", Locale::all().into_iter().map(|l| l.to_string()).collect::>().join(", ")))] + Available languages are: {}\nIETF tagged language codes for the shown available locales can be used too", Locale::all().into_iter().map(|l| l.to_string()).collect::>().join(", ")))] #[arg(short, long, default_values_t = Locale::all())] pub(crate) subtitle: Vec, + #[arg(skip)] + output_subtitle_locales: Vec, #[arg(help = "Name of the output file")] #[arg(long_help = "Name of the output file. \ @@ -95,12 +100,22 @@ pub struct Archive { #[arg(short, long, default_value = "auto")] #[arg(value_parser = MergeBehavior::parse)] pub(crate) merge: MergeBehavior, - #[arg( help = "If the merge behavior is 'auto', only download multiple video tracks if their length difference is higher than the given milliseconds" )] #[arg(long, default_value_t = 200)] pub(crate) merge_auto_tolerance: u32, + #[arg( + long, + help = "Specified which language tagging the audio and subtitle tracks and language specific format options should have. \ + Valid options are: 'default' (how Crunchyroll uses it internally), 'ietf' (according to the IETF standard)" + )] + #[arg( + long_help = "Specified which language tagging the audio and subtitle tracks and language specific format options should have. \ + Valid options are: 'default' (how Crunchyroll uses it internally), 'ietf' (according to the IETF standard; you might run in issues as there are multiple locales which resolve to the same IETF language code, e.g. 'es-LA' and 'es-ES' are both resolving to 'es')" + )] + #[arg(value_parser = LanguageTagging::parse)] + pub(crate) language_tagging: Option, #[arg(help = format!("Presets for converting the video to a specific coding format. \ Available presets: \n {}", FFmpegPreset::available_matches_human_readable().join("\n ")))] @@ -217,6 +232,26 @@ impl Execute for Archive { self.audio = all_locale_in_locales(self.audio.clone()); self.subtitle = all_locale_in_locales(self.subtitle.clone()); + if let Some(language_tagging) = &self.language_tagging { + self.audio = resolve_locales(&self.audio); + self.subtitle = resolve_locales(&self.subtitle); + self.output_audio_locales = language_tagging.convert_locales(&self.audio); + self.output_subtitle_locales = language_tagging.convert_locales(&self.subtitle); + } else { + self.output_audio_locales = self + .audio + .clone() + .into_iter() + .map(|l| l.to_string()) + .collect(); + self.output_subtitle_locales = self + .subtitle + .clone() + .into_iter() + .map(|l| l.to_string()) + .collect(); + } + Ok(()) } @@ -259,7 +294,13 @@ impl Execute for Archive { .audio_sort(Some(self.audio.clone())) .subtitle_sort(Some(self.subtitle.clone())) .no_closed_caption(self.no_closed_caption) - .threads(self.threads); + .threads(self.threads) + .audio_locale_output_map( + zip(self.audio.clone(), self.output_audio_locales.clone()).collect(), + ) + .subtitle_locale_output_map( + zip(self.subtitle.clone(), self.output_subtitle_locales.clone()).collect(), + ); for single_formats in single_format_collection.into_iter() { let (download_formats, mut format) = get_format(&self, &single_formats).await?; @@ -275,9 +316,14 @@ impl Execute for Archive { .as_ref() .map_or((&self.output).into(), |so| so.into()), self.universal_output, + self.language_tagging.as_ref(), ) } else { - format.format_path((&self.output).into(), self.universal_output) + format.format_path( + (&self.output).into(), + self.universal_output, + self.language_tagging.as_ref(), + ) }; let (mut path, changed) = free_file(formatted_path.clone()); diff --git a/crunchy-cli-core/src/download/command.rs b/crunchy-cli-core/src/download/command.rs index 08756e0..843f5cd 100644 --- a/crunchy-cli-core/src/download/command.rs +++ b/crunchy-cli-core/src/download/command.rs @@ -4,6 +4,7 @@ use crate::utils::download::{DownloadBuilder, DownloadFormat, DownloadFormatMeta use crate::utils::ffmpeg::{FFmpegPreset, SOFTSUB_CONTAINERS}; use crate::utils::filter::Filter; use crate::utils::format::{Format, SingleFormat}; +use crate::utils::locale::{resolve_locales, LanguageTagging}; use crate::utils::log::progress; use crate::utils::os::{free_file, has_ffmpeg, is_special_file}; use crate::utils::parse::parse_url; @@ -14,6 +15,7 @@ use anyhow::Result; use crunchyroll_rs::media::Resolution; use crunchyroll_rs::Locale; use log::{debug, warn}; +use std::collections::HashMap; use std::path::Path; #[derive(Clone, Debug, clap::Parser)] @@ -23,14 +25,18 @@ pub struct Download { #[arg(help = format!("Audio language. Can only be used if the provided url(s) point to a series. \ Available languages are: {}", Locale::all().into_iter().map(|l| l.to_string()).collect::>().join(", ")))] #[arg(long_help = format!("Audio language. Can only be used if the provided url(s) point to a series. \ - Available languages are:\n {}", Locale::all().into_iter().map(|l| format!("{:<6} → {}", l.to_string(), l.to_human_readable())).collect::>().join("\n ")))] + Available languages are:\n {}\nIETF tagged language codes for the shown available locales can be used too", Locale::all().into_iter().map(|l| format!("{:<6} → {}", l.to_string(), l.to_human_readable())).collect::>().join("\n ")))] #[arg(short, long, default_value_t = crate::utils::locale::system_locale())] pub(crate) audio: Locale, + #[arg(skip)] + output_audio_locale: String, #[arg(help = format!("Subtitle language. Available languages are: {}", Locale::all().into_iter().map(|l| l.to_string()).collect::>().join(", ")))] #[arg(long_help = format!("Subtitle language. If set, the subtitle will be burned into the video and cannot be disabled. \ - Available languages are: {}", Locale::all().into_iter().map(|l| l.to_string()).collect::>().join(", ")))] + Available languages are: {}\nIETF tagged language codes for the shown available locales can be used too", Locale::all().into_iter().map(|l| l.to_string()).collect::>().join(", ")))] #[arg(short, long)] pub(crate) subtitle: Option, + #[arg(skip)] + output_subtitle_locale: String, #[arg(help = "Name of the output file")] #[arg(long_help = "Name of the output file. \ @@ -75,6 +81,18 @@ pub struct Download { #[arg(value_parser = crate::utils::clap::clap_parse_resolution)] pub(crate) resolution: Resolution, + #[arg( + long, + help = "Specified which language tagging the audio and subtitle tracks and language specific format options should have. \ + Valid options are: 'default' (how Crunchyroll uses it internally), 'ietf' (according to the IETF standard)" + )] + #[arg( + long_help = "Specified which language tagging the audio and subtitle tracks and language specific format options should have. \ + Valid options are: 'default' (how Crunchyroll uses it internally), 'ietf' (according to the IETF standard; you might run in issues as there are multiple locales which resolve to the same IETF language code, e.g. 'es-LA' and 'es-ES' are both resolving to 'es')" + )] + #[arg(value_parser = LanguageTagging::parse)] + pub(crate) language_tagging: Option, + #[arg(help = format!("Presets for converting the video to a specific coding format. \ Available presets: \n {}", FFmpegPreset::available_matches_human_readable().join("\n ")))] #[arg(long_help = format!("Presets for converting the video to a specific coding format. \ @@ -178,6 +196,27 @@ impl Execute for Download { warn!("The '{{resolution}}' format option is deprecated and will be removed in a future version. Please use '{{width}}' and '{{height}}' instead") } + if let Some(language_tagging) = &self.language_tagging { + self.audio = resolve_locales(&[self.audio.clone()]).remove(0); + self.subtitle = self + .subtitle + .as_ref() + .map(|s| resolve_locales(&[s.clone()]).remove(0)); + self.output_audio_locale = language_tagging.for_locale(&self.audio); + self.output_subtitle_locale = self + .subtitle + .as_ref() + .map(|s| language_tagging.for_locale(s)) + .unwrap_or_default() + } else { + self.output_audio_locale = self.audio.to_string(); + self.output_subtitle_locale = self + .subtitle + .as_ref() + .map(|s| s.to_string()) + .unwrap_or_default(); + } + Ok(()) } @@ -240,7 +279,16 @@ impl Execute for Download { }) .ffmpeg_preset(self.ffmpeg_preset.clone().unwrap_or_default()) .ffmpeg_threads(self.ffmpeg_threads) - .threads(self.threads); + .threads(self.threads) + .audio_locale_output_map(HashMap::from([( + self.audio.clone(), + self.output_audio_locale.clone(), + )])) + .subtitle_locale_output_map( + self.subtitle.as_ref().map_or(HashMap::new(), |s| { + HashMap::from([(s.clone(), self.output_subtitle_locale.clone())]) + }), + ); for mut single_formats in single_format_collection.into_iter() { // the vec contains always only one item @@ -268,9 +316,14 @@ impl Execute for Download { .as_ref() .map_or((&self.output).into(), |so| so.into()), self.universal_output, + self.language_tagging.as_ref(), ) } else { - format.format_path((&self.output).into(), self.universal_output) + format.format_path( + (&self.output).into(), + self.universal_output, + self.language_tagging.as_ref(), + ) }; let (path, changed) = free_file(formatted_path.clone()); diff --git a/crunchy-cli-core/src/utils/download.rs b/crunchy-cli-core/src/utils/download.rs index c768195..f3dc631 100644 --- a/crunchy-cli-core/src/utils/download.rs +++ b/crunchy-cli-core/src/utils/download.rs @@ -12,7 +12,7 @@ use regex::Regex; use reqwest::Client; use std::borrow::Borrow; use std::cmp::Ordering; -use std::collections::BTreeMap; +use std::collections::{BTreeMap, HashMap}; use std::io::Write; use std::path::{Path, PathBuf}; use std::process::{Command, Stdio}; @@ -61,6 +61,8 @@ pub struct DownloadBuilder { no_closed_caption: bool, threads: usize, ffmpeg_threads: Option, + audio_locale_output_map: HashMap, + subtitle_locale_output_map: HashMap, } impl DownloadBuilder { @@ -78,6 +80,8 @@ impl DownloadBuilder { no_closed_caption: false, threads: num_cpus::get(), ffmpeg_threads: None, + audio_locale_output_map: HashMap::new(), + subtitle_locale_output_map: HashMap::new(), } } @@ -99,6 +103,9 @@ impl DownloadBuilder { ffmpeg_threads: self.ffmpeg_threads, formats: vec![], + + audio_locale_output_map: self.audio_locale_output_map, + subtitle_locale_output_map: self.subtitle_locale_output_map, } } } @@ -138,6 +145,9 @@ pub struct Downloader { ffmpeg_threads: Option, formats: Vec, + + audio_locale_output_map: HashMap, + subtitle_locale_output_map: HashMap, } impl Downloader { @@ -426,7 +436,12 @@ impl Downloader { maps.extend(["-map".to_string(), (i + videos.len()).to_string()]); metadata.extend([ format!("-metadata:s:a:{}", i), - format!("language={}", meta.language), + format!( + "language={}", + self.audio_locale_output_map + .get(&meta.language) + .unwrap_or(&meta.language.to_string()) + ), ]); metadata.extend([ format!("-metadata:s:a:{}", i), @@ -457,7 +472,12 @@ impl Downloader { ]); metadata.extend([ format!("-metadata:s:s:{}", i), - format!("language={}", meta.language), + format!( + "language={}", + self.subtitle_locale_output_map + .get(&meta.language) + .unwrap_or(&meta.language.to_string()) + ), ]); metadata.extend([ format!("-metadata:s:s:{}", i), diff --git a/crunchy-cli-core/src/utils/format.rs b/crunchy-cli-core/src/utils/format.rs index 8956a04..7146a55 100644 --- a/crunchy-cli-core/src/utils/format.rs +++ b/crunchy-cli-core/src/utils/format.rs @@ -1,4 +1,5 @@ use crate::utils::filter::real_dedup_vec; +use crate::utils::locale::LanguageTagging; use crate::utils::log::tab_info; use crate::utils::os::{is_special_file, sanitize}; use anyhow::Result; @@ -417,7 +418,12 @@ impl Format { } /// Formats the given string if it has specific pattern in it. It also sanitizes the filename. - pub fn format_path(&self, path: PathBuf, universal: bool) -> PathBuf { + pub fn format_path( + &self, + path: PathBuf, + universal: bool, + language_tagging: Option<&LanguageTagging>, + ) -> PathBuf { let path = path .to_string_lossy() .to_string() @@ -427,7 +433,7 @@ impl Format { &sanitize( self.locales .iter() - .map(|(a, _)| a.to_string()) + .map(|(a, _)| language_tagging.map_or(a.to_string(), |t| t.for_locale(a))) .collect::>() .join( &env::var("CRUNCHY_CLI_FORMAT_DELIMITER") diff --git a/crunchy-cli-core/src/utils/locale.rs b/crunchy-cli-core/src/utils/locale.rs index 8651078..0827299 100644 --- a/crunchy-cli-core/src/utils/locale.rs +++ b/crunchy-cli-core/src/utils/locale.rs @@ -1,4 +1,124 @@ use crunchyroll_rs::Locale; +use log::warn; + +#[derive(Clone, Debug)] +#[allow(clippy::upper_case_acronyms)] +pub enum LanguageTagging { + Default, + IETF, +} + +impl LanguageTagging { + pub fn parse(s: &str) -> Result { + Ok(match s.to_lowercase().as_str() { + "default" => Self::Default, + "ietf" => Self::IETF, + _ => return Err(format!("'{}' is not a valid language tagging", s)), + }) + } + + pub fn convert_locales(&self, locales: &[Locale]) -> Vec { + let ietf_language_codes = ietf_language_codes(); + let mut converted = vec![]; + + match &self { + LanguageTagging::Default => { + for locale in locales { + let Some((_, available)) = + ietf_language_codes.iter().find(|(_, l)| l.contains(locale)) + else { + // if no matching IETF language code was found, just pass it as it is + converted.push(locale.to_string()); + continue; + }; + converted.push(available.first().unwrap().to_string()) + } + } + LanguageTagging::IETF => { + for locale in locales { + let Some((tag, _)) = + ietf_language_codes.iter().find(|(_, l)| l.contains(locale)) + else { + // if no matching IETF language code was found, just pass it as it is + converted.push(locale.to_string()); + continue; + }; + converted.push(tag.to_string()) + } + } + } + + converted + } + + pub fn for_locale(&self, locale: &Locale) -> String { + match &self { + LanguageTagging::Default => ietf_language_codes() + .iter() + .find(|(_, l)| l.contains(locale)) + .map_or(locale.to_string(), |(_, l)| l[0].to_string()), + LanguageTagging::IETF => ietf_language_codes() + .iter() + .find(|(_, l)| l.contains(locale)) + .map_or(locale.to_string(), |(tag, _)| tag.to_string()), + } + } +} + +pub fn resolve_locales(locales: &[Locale]) -> Vec { + let ietf_language_codes = ietf_language_codes(); + let all_locales = Locale::all(); + + let mut resolved = vec![]; + for locale in locales { + if all_locales.contains(locale) { + resolved.push(locale.clone()) + } else if let Some((_, resolved_locales)) = ietf_language_codes + .iter() + .find(|(tag, _)| tag == &locale.to_string().as_str()) + { + let (first, alternatives) = resolved_locales.split_first().unwrap(); + + resolved.push(first.clone()); + // ignoring `Locale::en_IN` because I think the majority of users which want english + // audio / subs want the "actual" english version and not the hindi accent dub + if !alternatives.is_empty() && resolved_locales.first().unwrap() != &Locale::en_IN { + warn!("Resolving locale '{}' to '{}', but there are some alternatives: {}. If you an alternative instead, please write it completely out instead of '{}'", locale, first, alternatives.iter().map(|l| format!("'{l}'")).collect::>().join(", "), locale) + } + } else { + resolved.push(locale.clone()); + warn!("Unknown locale '{}'", locale) + } + } + + resolved +} + +fn ietf_language_codes<'a>() -> Vec<(&'a str, Vec)> { + vec![ + ("ar", vec![Locale::ar_ME, Locale::ar_SA]), + ("ca", vec![Locale::ca_ES]), + ("de", vec![Locale::de_DE]), + ("en", vec![Locale::en_US, Locale::hi_IN]), + ("es", vec![Locale::es_ES, Locale::es_419, Locale::es_LA]), + ("fr", vec![Locale::fr_FR]), + ("hi", vec![Locale::hi_IN]), + ("id", vec![Locale::id_ID]), + ("it", vec![Locale::it_IT]), + ("ja", vec![Locale::ja_JP]), + ("ko", vec![Locale::ko_KR]), + ("ms", vec![Locale::ms_MY]), + ("pl", vec![Locale::pl_PL]), + ("pt", vec![Locale::pt_PT, Locale::pt_BR]), + ("ru", vec![Locale::ru_RU]), + ("ta", vec![Locale::ta_IN]), + ("te", vec![Locale::te_IN]), + ("th", vec![Locale::th_TH]), + ("tr", vec![Locale::tr_TR]), + ("vi", vec![Locale::vi_VN]), + ("zh", vec![Locale::zh_CN, Locale::zh_HK, Locale::zh_TW]), + ] +} /// Return the locale of the system. pub fn system_locale() -> Locale {