diff --git a/README.md b/README.md index 7316f57..88c13ee 100644 --- a/README.md +++ b/README.md @@ -462,7 +462,7 @@ The `archive` command lets you download episodes with multiple audios and subtit In the best case, when multiple audio & subtitle tracks are used, there is only one *video* track and all other languages can be stored as audio-only. But, as said, this is not always the case. With the `-m` / `--merge` flag you can define the behaviour when an episodes' video tracks differ in length. - Valid options are `audio` - store one video and all other languages as audio only; `video` - store the video + audio for every language; `auto` - detect if videos differ in length: if so, behave like `video` - otherwise like `audio`. + Valid options are `audio` - store one video and all other languages as audio only; `video` - store the video + audio for every language; `auto` - detect if videos differ in length: if so, behave like `video` - otherwise like `audio`; `sync` - detect if videos differ in length: if so, it tries to find the offset of matching audio parts and removes the offset from the beginning, otherwise it behaves like `audio`. Subtitles will always match the primary audio and video. ```shell @@ -482,15 +482,12 @@ The `archive` command lets you download episodes with multiple audios and subtit Default are `200` milliseconds. -- Sync start +- Sync tolerance - If you want that all videos of the same episode should start at the same time and `--merge` doesn't fit your needs (e.g. one video has an intro, all other doesn't), you might consider using the `--sync-start`. - It tries to sync the timing of all downloaded audios to match one video. - This is done by downloading the first few segments/frames of all video tracks that differ in length and comparing them frame by frame. - The flag takes an optional value determines how accurate the syncing is, generally speaking everything over 15 begins to be more inaccurate and everything below 6 is too accurate (and won't succeed). - When the syncing fails, the command is continued as if `--sync-start` wasn't provided for this episode. + Sometimes two video tracks are downloaded with `--merge` set to `sync` because the audio fingerprinting fails to identify matching audio parts (e.g. opening). + To prevent this, you can use the "--sync-tolerance" flag to specify the difference by which two fingerprints are considered equal. - Default is `7.5`. + Default is `6`. - Language tagging diff --git a/crunchy-cli-core/src/archive/command.rs b/crunchy-cli-core/src/archive/command.rs index 3525a4a..87c133e 100644 --- a/crunchy-cli-core/src/archive/command.rs +++ b/crunchy-cli-core/src/archive/command.rs @@ -90,32 +90,31 @@ pub struct Archive { pub(crate) resolution: Resolution, #[arg( - help = "Sets the behavior of the stream merging. Valid behaviors are 'auto', 'audio' and 'video'" + help = "Sets the behavior of the stream merging. Valid behaviors are 'auto', 'sync', 'audio' and 'video'" )] #[arg( long_help = "Because of local restrictions (or other reasons) some episodes with different languages does not have the same length (e.g. when some scenes were cut out). \ With this flag you can set the behavior when handling multiple language. - Valid options are 'audio' (stores one video and all other languages as audio only), 'video' (stores the video + audio for every language) and 'auto' (detects if videos differ in length: if so, behave like 'video' else like 'audio')" + Valid options are 'audio' (stores one video and all other languages as audio only), 'video' (stores the video + audio for every language), 'auto' (detects if videos differ in length: if so, behave like 'video' else like 'audio') and 'sync' (detects if videos differ in length: if so, tries to find the offset of matching audio parts and removes it from the beginning, otherwise it behaves like 'audio')" )] #[arg(short, long, default_value = "auto")] #[arg(value_parser = MergeBehavior::parse)] pub(crate) merge: MergeBehavior, #[arg( - help = "If the merge behavior is 'auto', only download multiple video tracks if their length difference is higher than the given milliseconds" + help = "If the merge behavior is 'auto' or 'sync', consider videos to be of equal lengths if the difference in length is smaller than the specified milliseconds" )] #[arg(long, default_value_t = 200)] pub(crate) merge_time_tolerance: u32, - #[arg(help = "Tries to sync the timing of all downloaded audios to match one video")] #[arg( - long_help = "Tries to sync the timing of all downloaded audios to match one video. \ - This is done by downloading the first few segments/frames of all video tracks that differ in length and comparing them frame by frame. \ - The value of this flag determines how accurate the syncing is, generally speaking everything over 15 begins to be more inaccurate and everything below 6 is too accurate (and won't succeed). \ - If you want to provide a custom value to this flag, you have to set it with an equals (e.g. `--sync-start=10` instead of `--sync-start 10`). \ - When the syncing fails, the command is continued as if `--sync-start` wasn't provided for this episode - " + help = "If the merge behavior is 'sync', specify the difference by which two fingerprints are considered equal" )] - #[arg(long, require_equals = true, num_args = 0..=1, default_missing_value = "7.5")] - pub(crate) sync_start: Option, + #[arg(long, default_value_t = 6)] + pub(crate) sync_tolerance: u32, + #[arg( + help = "If the merge behavior is 'sync', specify the amount of offset determination runs from which the final offset is calculated" + )] + #[arg(long, default_value_t = 4)] + pub(crate) sync_precision: u32, #[arg( help = "Specified which language tagging the audio and subtitle tracks and language specific format options should have. \ @@ -229,18 +228,10 @@ impl Execute for Archive { } if self.include_chapters + && !matches!(self.merge, MergeBehavior::Sync) && !matches!(self.merge, MergeBehavior::Audio) - && self.sync_start.is_none() { - bail!("`--include-chapters` can only be used if `--merge` is set to 'audio' or `--sync-start` is set") - } - - if !matches!(self.merge, MergeBehavior::Auto) && self.sync_start.is_some() { - bail!("`--sync-start` can only be used if `--merge` is set to `auto`") - } - - if self.sync_start.is_some() && self.ffmpeg_preset.is_none() { - warn!("Using `--sync-start` without `--ffmpeg-preset` might produce worse sync results than with `--ffmpeg-preset` set") + bail!("`--include-chapters` can only be used if `--merge` is set to 'audio' or 'sync'") } self.audio = all_locale_in_locales(self.audio.clone()); @@ -317,7 +308,14 @@ impl Execute for Archive { .audio_sort(Some(self.audio.clone())) .subtitle_sort(Some(self.subtitle.clone())) .no_closed_caption(self.no_closed_caption) - .sync_start_value(self.sync_start) + .sync_tolerance(match self.merge { + MergeBehavior::Sync => Some(self.sync_tolerance), + _ => None, + }) + .sync_precision(match self.merge { + MergeBehavior::Sync => Some(self.sync_precision), + _ => None, + }) .threads(self.threads) .audio_locale_output_map( zip(self.audio.clone(), self.output_audio_locales.clone()).collect(), @@ -560,7 +558,7 @@ async fn get_format( }, }, }), - MergeBehavior::Auto => { + MergeBehavior::Auto | MergeBehavior::Sync => { let mut d_formats: Vec<(Duration, DownloadFormat)> = vec![]; for (single_format, video, audio, subtitles) in format_pairs { diff --git a/crunchy-cli-core/src/utils/download.rs b/crunchy-cli-core/src/utils/download.rs index bd7bf3d..67cb66a 100644 --- a/crunchy-cli-core/src/utils/download.rs +++ b/crunchy-cli-core/src/utils/download.rs @@ -2,15 +2,13 @@ use crate::utils::ffmpeg::FFmpegPreset; use crate::utils::filter::real_dedup_vec; use crate::utils::fmt::format_time_delta; use crate::utils::log::progress; -use crate::utils::os::{ - cache_dir, is_special_file, temp_directory, temp_named_pipe, tempdir, tempfile, -}; +use crate::utils::os::{cache_dir, is_special_file, temp_directory, temp_named_pipe, tempfile}; use crate::utils::rate_limit::RateLimiterService; +use crate::utils::sync::{sync_audios, SyncAudio}; use anyhow::{bail, Result}; use chrono::{NaiveTime, TimeDelta}; use crunchyroll_rs::media::{SkipEvents, SkipEventsEvent, StreamData, StreamSegment, Subtitle}; use crunchyroll_rs::Locale; -use image_hasher::{Hasher, HasherConfig, ImageHash}; use indicatif::{ProgressBar, ProgressDrawTarget, ProgressFinish, ProgressStyle}; use log::{debug, warn, LevelFilter}; use regex::Regex; @@ -39,6 +37,7 @@ pub enum MergeBehavior { Video, Audio, Auto, + Sync, } impl MergeBehavior { @@ -47,6 +46,7 @@ impl MergeBehavior { "video" => MergeBehavior::Video, "audio" => MergeBehavior::Audio, "auto" => MergeBehavior::Auto, + "sync" => MergeBehavior::Sync, _ => return Err(format!("'{}' is not a valid merge behavior", s)), }) } @@ -64,7 +64,8 @@ pub struct DownloadBuilder { force_hardsub: bool, download_fonts: bool, no_closed_caption: bool, - sync_start_value: Option, + sync_tolerance: Option, + sync_precision: Option, threads: usize, ffmpeg_threads: Option, audio_locale_output_map: HashMap, @@ -84,7 +85,8 @@ impl DownloadBuilder { force_hardsub: false, download_fonts: false, no_closed_caption: false, - sync_start_value: None, + sync_tolerance: None, + sync_precision: None, threads: num_cpus::get(), ffmpeg_threads: None, audio_locale_output_map: HashMap::new(), @@ -106,7 +108,8 @@ impl DownloadBuilder { download_fonts: self.download_fonts, no_closed_caption: self.no_closed_caption, - sync_start_value: self.sync_start_value, + sync_tolerance: self.sync_tolerance, + sync_precision: self.sync_precision, download_threads: self.threads, ffmpeg_threads: self.ffmpeg_threads, @@ -165,7 +168,8 @@ pub struct Downloader { download_fonts: bool, no_closed_caption: bool, - sync_start_value: Option, + sync_tolerance: Option, + sync_precision: Option, download_threads: usize, ffmpeg_threads: Option, @@ -245,6 +249,7 @@ impl Downloader { let mut video_offset = None; let mut audio_offsets = HashMap::new(); let mut subtitle_offsets = HashMap::new(); + let mut raw_audios = vec![]; let mut videos = vec![]; let mut audios = vec![]; let mut subtitles = vec![]; @@ -263,40 +268,32 @@ impl Downloader { .max() .unwrap(); - if self.formats.len() > 1 && self.sync_start_value.is_some() { - let all_segments_count: Vec = self - .formats - .iter() - .map(|f| f.video.0.segments().len()) - .collect(); - let sync_segments = 11.max( - all_segments_count.iter().max().unwrap() - all_segments_count.iter().min().unwrap(), - ); - let mut sync_vids = vec![]; - for (i, format) in self.formats.iter().enumerate() { + // downloads all audios + for (i, format) in self.formats.iter().enumerate() { + for (stream_data, locale) in &format.audios { let path = self - .download_video( - &format.video.0, - format!("Downloading video #{} sync segments", i + 1), - Some(sync_segments), + .download_audio( + stream_data, + format!("{:<1$}", format!("Downloading {} audio", locale), fmt_space), ) .await?; - sync_vids.push(SyncVideo { + raw_audios.push(SyncAudio { + format_id: i, path, - length: len_from_segments(&format.video.0.segments()), - available_frames: (len_from_segments( - &format.video.0.segments()[0..sync_segments], - ) - .num_milliseconds() as f64 - * format.video.0.fps().unwrap() - / 1000.0) as u64, - idx: i, + locale: locale.clone(), + video_idx: i, }) } + } + if self.formats.len() > 1 && self.sync_tolerance.is_some() { let _progress_handler = progress!("Syncing video start times (this might take some time)"); - let mut offsets = sync_videos(sync_vids, self.sync_start_value.unwrap())?; + let mut offsets = sync_audios( + &raw_audios, + self.sync_tolerance.unwrap(), + self.sync_precision.unwrap(), + )?; drop(_progress_handler); let mut offset_pre_checked = false; @@ -307,19 +304,14 @@ impl Downloader { .enumerate() .map(|(i, f)| { len_from_segments(&f.video.0.segments()) - - TimeDelta::milliseconds( - tmp_offsets - .get(&i) - .map(|o| (*o as f64 / f.video.0.fps().unwrap() * 1000.0) as i64) - .unwrap_or_default(), - ) + - tmp_offsets.get(&i).map(|o| *o).unwrap_or_default() }) .collect(); let min = formats_with_offset.iter().min().unwrap(); let max = formats_with_offset.iter().max().unwrap(); if max.num_seconds() - min.num_seconds() > 15 { - warn!("Found difference of >15 seconds after sync, skipping applying it"); + warn!("Found difference of >15 seconds after sync, so the application was skipped"); offsets = None; offset_pre_checked = true } @@ -331,7 +323,7 @@ impl Downloader { let mut audio_count: usize = 0; let mut subtitle_count: usize = 0; for (i, format) in self.formats.iter().enumerate() { - let format_fps = format.video.0.fps().unwrap(); + let offset = offsets.get(&i).map(|o| *o).unwrap_or_default(); let format_len = format .video .0 @@ -339,7 +331,7 @@ impl Downloader { .iter() .map(|s| s.length.as_millis()) .sum::() as u64 - - offsets.get(&i).map_or(0, |o| *o); + - offset.num_milliseconds() as u64; if format_len > root_format_length { root_format_idx = i; root_format_length = format_len; @@ -347,23 +339,13 @@ impl Downloader { for _ in &format.audios { if let Some(offset) = &offsets.get(&i) { - audio_offsets.insert( - audio_count, - TimeDelta::milliseconds( - (**offset as f64 / format_fps * 1000.0) as i64, - ), - ); + audio_offsets.insert(audio_count, **offset); } audio_count += 1 } for _ in &format.subtitles { if let Some(offset) = &offsets.get(&i) { - subtitle_offsets.insert( - subtitle_count, - TimeDelta::milliseconds( - (**offset as f64 / format_fps * 1000.0) as i64, - ), - ); + subtitle_offsets.insert(subtitle_count, **offset); } subtitle_count += 1 } @@ -390,20 +372,28 @@ impl Downloader { root_format.subtitles.extend(subtitle_append); self.formats = vec![root_format]; - video_offset = offsets.get(&root_format_idx).map(|o| { - TimeDelta::milliseconds( - (*o as f64 / self.formats[0].video.0.fps().unwrap() * 1000.0) as i64, - ) - }) + video_offset = offsets.get(&root_format_idx).map(|o| *o); + for raw_audio in raw_audios.iter_mut() { + raw_audio.video_idx = root_format_idx; + } } else { for format in &mut self.formats { format.metadata.skip_events = None } + if !offset_pre_checked { + warn!("Couldn't find reliable sync positions") + } } + } - if !offset_pre_checked { - warn!("Couldn't find reliable sync positions") - } + // add audio metadata + for raw_audio in raw_audios { + audios.push(FFmpegAudioMeta { + path: raw_audio.path, + locale: raw_audio.locale, + start_time: audio_offsets.get(&raw_audio.format_id).map(|o| *o), + video_idx: raw_audio.video_idx, + }) } // downloads all videos @@ -435,24 +425,6 @@ impl Downloader { }) } - // downloads all audios - for (i, format) in self.formats.iter().enumerate() { - for (j, (stream_data, locale)) in format.audios.iter().enumerate() { - let path = self - .download_audio( - stream_data, - format!("{:<1$}", format!("Downloading {} audio", locale), fmt_space), - ) - .await?; - audios.push(FFmpegAudioMeta { - path, - locale: locale.clone(), - start_time: audio_offsets.get(&j).cloned(), - video_idx: i, - }) - } - } - for (i, format) in self.formats.iter().enumerate() { if format.subtitles.is_empty() { continue; @@ -1538,134 +1510,6 @@ async fn ffmpeg_progress( Ok(()) } -struct SyncVideo { - path: TempPath, - length: TimeDelta, - available_frames: u64, - idx: usize, -} - -fn sync_videos(mut sync_videos: Vec, value: f64) -> Result>> { - let mut result = HashMap::new(); - let hasher = HasherConfig::new().preproc_dct().to_hasher(); - let start_frame = 300; - - sync_videos.sort_by_key(|sv| sv.length); - - let sync_base = sync_videos.remove(0); - let sync_hashes = extract_frame_hashes(&sync_base.path, start_frame, 50, &hasher)?; - - for sync_video in sync_videos { - let mut highest_frame_match = f64::INFINITY; - let mut frame = start_frame; - let mut hashes = vec![]; - - loop { - if frame == sync_video.available_frames { - debug!( - "Failed to sync videos, end of stream {} reached (highest frame match: {})", - sync_video.idx + 1, - highest_frame_match - ); - return Ok(None); - } - - hashes.drain(0..(hashes.len() as i32 - sync_hashes.len() as i32).max(0) as usize); - hashes.extend(extract_frame_hashes( - &sync_video.path, - frame, - 300 - hashes.len() as u64, - &hasher, - )?); - - let mut check_frame_windows_result: Vec<(usize, f64)> = - check_frame_windows(&sync_hashes, &hashes) - .into_iter() - .enumerate() - .collect(); - check_frame_windows_result.sort_by(|(_, a), (_, b)| a.partial_cmp(&b).unwrap()); - if check_frame_windows_result[0].1 <= value { - result.insert( - sync_video.idx, - frame + check_frame_windows_result[0].0 as u64 - start_frame, - ); - break; - } else if check_frame_windows_result[0].1 < highest_frame_match { - highest_frame_match = check_frame_windows_result[0].1 - } - - frame = (frame + 300 - sync_hashes.len() as u64).min(sync_video.available_frames) - } - } - - Ok(Some(result)) -} - -fn extract_frame_hashes( - input_file: &Path, - start_frame: u64, - frame_count: u64, - hasher: &Hasher, -) -> Result> { - let frame_dir = tempdir(format!( - "{}_sync_frames", - input_file - .file_name() - .unwrap_or_default() - .to_string_lossy() - .trim_end_matches( - &input_file - .file_stem() - .unwrap_or_default() - .to_string_lossy() - .to_string() - ) - ))?; - let extract_output = Command::new("ffmpeg") - .arg("-hide_banner") - .arg("-y") - .args(["-i", input_file.to_string_lossy().to_string().as_str()]) - .args([ - "-vf", - format!( - r#"select=between(n\,{}\,{}),setpts=PTS-STARTPTS,scale=-1:240"#, - start_frame, - start_frame + frame_count - ) - .as_str(), - ]) - .args(["-vframes", frame_count.to_string().as_str()]) - .arg(format!("{}/%03d.jpg", frame_dir.path().to_string_lossy())) - .output()?; - if !extract_output.status.success() { - bail!( - "{}", - String::from_utf8_lossy(extract_output.stderr.as_slice()) - ) - } - - let mut hashes = vec![]; - for file in frame_dir.path().read_dir()? { - let file = file?; - let img = image::open(file.path())?; - hashes.push(hasher.hash_image(&img)) - } - Ok(hashes) -} - -fn check_frame_windows(base_hashes: &[ImageHash], check_hashes: &[ImageHash]) -> Vec { - let mut results = vec![]; - - for i in 0..(check_hashes.len() - base_hashes.len()) { - let check_window = &check_hashes[i..(base_hashes.len() + i)]; - let sum = std::iter::zip(base_hashes, check_window) - .map(|(a, b)| a.dist(b)) - .sum::(); - results.push(sum as f64 / check_window.len() as f64); - } - results -} - fn len_from_segments(segments: &[StreamSegment]) -> TimeDelta { TimeDelta::milliseconds(segments.iter().map(|s| s.length.as_millis()).sum::() as i64) } diff --git a/crunchy-cli-core/src/utils/mod.rs b/crunchy-cli-core/src/utils/mod.rs index 72a0908..6260047 100644 --- a/crunchy-cli-core/src/utils/mod.rs +++ b/crunchy-cli-core/src/utils/mod.rs @@ -11,4 +11,5 @@ pub mod log; pub mod os; pub mod parse; pub mod rate_limit; +pub mod sync; pub mod video; diff --git a/crunchy-cli-core/src/utils/os.rs b/crunchy-cli-core/src/utils/os.rs index b65abc2..a216f87 100644 --- a/crunchy-cli-core/src/utils/os.rs +++ b/crunchy-cli-core/src/utils/os.rs @@ -7,7 +7,7 @@ use std::pin::Pin; use std::process::{Command, Stdio}; use std::task::{Context, Poll}; use std::{env, fs, io}; -use tempfile::{Builder, NamedTempFile, TempDir, TempPath}; +use tempfile::{Builder, NamedTempFile, TempPath}; use tokio::io::{AsyncRead, ReadBuf}; pub fn has_ffmpeg() -> bool { @@ -46,22 +46,6 @@ pub fn tempfile>(suffix: S) -> io::Result { Ok(tempfile) } -/// Any tempdir should be created with this function. The prefix and directory of every directory -/// created with this function stays the same which is helpful to query all existing tempdirs and -/// e.g. remove them in a case of ctrl-c. Having one function also good to prevent mistakes like -/// setting the wrong prefix if done manually. -pub fn tempdir>(suffix: S) -> io::Result { - let tempdir = Builder::default() - .prefix(".crunchy-cli_") - .suffix(suffix.as_ref()) - .tempdir_in(temp_directory())?; - debug!( - "Created temporary directory: {}", - tempdir.path().to_string_lossy() - ); - Ok(tempdir) -} - pub fn cache_dir>(name: S) -> io::Result { let cache_dir = temp_directory().join(format!(".crunchy-cli_{}_cache", name.as_ref())); fs::create_dir_all(&cache_dir)?; diff --git a/crunchy-cli-core/src/utils/sync.rs b/crunchy-cli-core/src/utils/sync.rs new file mode 100644 index 0000000..9e89046 --- /dev/null +++ b/crunchy-cli-core/src/utils/sync.rs @@ -0,0 +1,422 @@ +use std::{ + cmp, + collections::{HashMap, HashSet}, + ops::Not, + path::Path, + process::Command, +}; + +use chrono::TimeDelta; +use crunchyroll_rs::Locale; +use log::debug; +use tempfile::TempPath; + +use anyhow::{bail, Result}; + +use super::fmt::format_time_delta; + +pub struct SyncAudio { + pub format_id: usize, + pub path: TempPath, + pub locale: Locale, + pub video_idx: usize, +} + +#[derive(Debug, Clone, Copy)] +struct TimeRange { + start: f64, + end: f64, +} + +pub fn sync_audios( + available_audios: &Vec, + sync_tolerance: u32, + sync_precision: u32, +) -> Result>> { + let mut result: HashMap = HashMap::new(); + + let mut sync_audios = vec![]; + let mut chromaprints = HashMap::new(); + let mut formats = HashSet::new(); + for audio in available_audios { + if formats.contains(&audio.format_id) { + continue; + } + formats.insert(audio.format_id); + sync_audios.push((audio.format_id, &audio.path)); + chromaprints.insert( + audio.format_id, + generate_chromaprint( + &audio.path, + &TimeDelta::zero(), + &TimeDelta::zero(), + &TimeDelta::zero(), + )?, + ); + } + sync_audios.sort_by_key(|sync_audio| chromaprints.get(&sync_audio.0).unwrap().len()); + + let base_audio = sync_audios.remove(0); + + let mut start = f64::MAX; + let mut end = f64::MIN; + let mut initial_offsets = HashMap::new(); + for audio in &sync_audios { + debug!( + "Initial comparison of format {} to {}", + audio.0, &base_audio.0 + ); + + let (lhs_ranges, rhs_ranges) = compare_chromaprints( + chromaprints.get(&base_audio.0).unwrap(), + chromaprints.get(&audio.0).unwrap(), + sync_tolerance, + ); + if lhs_ranges.is_empty() || rhs_ranges.is_empty() { + bail!( + "Failed to sync videos, couldn't find matching audio parts between format {} and {}", + base_audio.0 + 1, + audio.0 + 1 + ); + } + let lhs_range = lhs_ranges[0]; + let rhs_range = rhs_ranges[0]; + start = start.min(lhs_range.start); + end = end.max(lhs_range.end); + start = start.min(rhs_range.start); + end = end.max(rhs_range.end); + let offset = TimeDelta::milliseconds(((rhs_range.start - lhs_range.start) * 1000.0) as i64); + initial_offsets.insert(audio.0, TimeDelta::zero().checked_sub(&offset).unwrap()); + debug!( + "Found initial offset of {}ms ({} - {} {}s) ({} - {} {}s) for format {} to {}", + offset.num_milliseconds(), + lhs_range.start, + lhs_range.end, + lhs_range.end - lhs_range.start, + rhs_range.start, + rhs_range.end, + rhs_range.end - rhs_range.start, + audio.0, + base_audio.0 + ); + } + + debug!( + "Found matching audio parts at {} - {}, narrowing search", + start, end + ); + + let start = TimeDelta::milliseconds((start * 1000.0) as i64 - 20000); + let end = TimeDelta::milliseconds((end * 1000.0) as i64 + 20000); + + for sync_audio in &sync_audios { + let chromaprint = generate_chromaprint( + &sync_audio.1, + &start, + &end, + initial_offsets.get(&sync_audio.0).unwrap(), + )?; + chromaprints.insert(sync_audio.0, chromaprint); + } + + let mut runs: HashMap = HashMap::new(); + let iterator_range_limits: i64 = 2 ^ sync_precision as i64; + for i in -iterator_range_limits..=iterator_range_limits { + let base_offset = TimeDelta::milliseconds( + ((0.128 / iterator_range_limits as f64 * i as f64) * 1000.0) as i64, + ); + chromaprints.insert( + base_audio.0, + generate_chromaprint(base_audio.1, &start, &end, &base_offset)?, + ); + for audio in &sync_audios { + let initial_offset = initial_offsets.get(&audio.0).map(|o| *o).unwrap(); + let offset = find_offset( + (&base_audio.0, chromaprints.get(&base_audio.0).unwrap()), + &base_offset, + (&audio.0, chromaprints.get(&audio.0).unwrap()), + &initial_offset, + &start, + sync_tolerance, + ); + if offset.is_none() { + continue; + } + let offset = offset.unwrap(); + + result.insert( + audio.0, + result + .get(&audio.0) + .map(|o| *o) + .unwrap_or_default() + .checked_add(&offset) + .unwrap(), + ); + runs.insert( + audio.0, + runs.get(&audio.0).map(|o| *o).unwrap_or_default() + 1, + ); + } + } + let mut result: HashMap = result + .iter() + .map(|(format_id, offset)| { + ( + *format_id, + TimeDelta::milliseconds( + offset.num_milliseconds() / runs.get(format_id).map(|o| *o).unwrap(), + ), + ) + }) + .collect(); + result.insert(base_audio.0, TimeDelta::milliseconds(0)); + + Ok(Some(result)) +} + +fn find_offset( + lhs: (&usize, &Vec), + lhs_shift: &TimeDelta, + rhs: (&usize, &Vec), + rhs_shift: &TimeDelta, + start: &TimeDelta, + sync_tolerance: u32, +) -> Option { + let (lhs_ranges, rhs_ranges) = compare_chromaprints(&lhs.1, &rhs.1, sync_tolerance); + if lhs_ranges.is_empty() || rhs_ranges.is_empty() { + return None; + } + let lhs_range = lhs_ranges[0]; + let rhs_range = rhs_ranges[0]; + let offset = rhs_range.end - lhs_range.end; + let offset = TimeDelta::milliseconds((offset * 1000.0) as i64) + .checked_add(&lhs_shift)? + .checked_sub(&rhs_shift)?; + debug!( + "Found offset of {}ms ({} - {} {}s) ({} - {} {}s) for format {} to {}", + offset.num_milliseconds(), + lhs_range.start + start.num_milliseconds() as f64 / 1000.0, + lhs_range.end + start.num_milliseconds() as f64 / 1000.0, + lhs_range.end - lhs_range.start, + rhs_range.start + start.num_milliseconds() as f64 / 1000.0, + rhs_range.end + start.num_milliseconds() as f64 / 1000.0, + rhs_range.end - rhs_range.start, + rhs.0, + lhs.0 + ); + return Some(offset); +} + +fn generate_chromaprint( + input_file: &Path, + start: &TimeDelta, + end: &TimeDelta, + offset: &TimeDelta, +) -> Result> { + let mut ss_argument: &TimeDelta = &start.checked_sub(offset).unwrap(); + let mut offset_argument = &TimeDelta::zero(); + if offset.abs() > *offset { + ss_argument = start; + offset_argument = &offset; + }; + + let mut command = Command::new("ffmpeg"); + command + .arg("-hide_banner") + .arg("-y") + .args(["-ss", format_time_delta(ss_argument).as_str()]); + + if end.is_zero().not() { + command.args(["-to", format_time_delta(end).as_str()]); + } + + command + .args(["-itsoffset", format_time_delta(offset_argument).as_str()]) + .args(["-i", input_file.to_string_lossy().to_string().as_str()]) + .args(["-ac", "2"]) + .args(["-f", "chromaprint"]) + .args(["-fp_format", "raw"]) + .arg("-"); + + let extract_output = command.output()?; + + if !extract_output.status.success() { + bail!( + "{}", + String::from_utf8_lossy(extract_output.stderr.as_slice()) + ); + } + let raw_chromaprint = extract_output.stdout.as_slice(); + let length = raw_chromaprint.len(); + if length % 4 != 0 { + bail!("chromaprint bytes should be a multiple of 4"); + } + let mut chromaprint = Vec::with_capacity(length / 4); + for i in 0..length / 4 { + chromaprint.push(as_u32_le( + raw_chromaprint[i * 4 + 0..i * 4 + 4].try_into().unwrap(), + )); + } + return Ok(chromaprint); +} + +fn compare_chromaprints( + lhs_chromaprint: &Vec, + rhs_chromaprint: &Vec, + sync_tolerance: u32, +) -> (Vec, Vec) { + let lhs_inverse_index = create_inverse_index(&lhs_chromaprint); + let rhs_inverse_index = create_inverse_index(&rhs_chromaprint); + + let mut possible_shifts = HashSet::new(); + for lhs_pair in lhs_inverse_index { + let original_point = lhs_pair.0; + for i in -2..=2 { + let modified_point = (original_point as i32 + i) as u32; + if rhs_inverse_index.contains_key(&modified_point) { + let rhs_index = rhs_inverse_index.get(&modified_point).map(|o| *o).unwrap(); + possible_shifts.insert(rhs_index as i32 - lhs_pair.1 as i32); + } + } + } + + let mut all_lhs_time_ranges = vec![]; + let mut all_rhs_time_ranges = vec![]; + for shift_amount in possible_shifts { + let time_range_pair = find_time_ranges( + &lhs_chromaprint, + &rhs_chromaprint, + shift_amount, + sync_tolerance, + ); + if time_range_pair.is_none() { + continue; + } + let (mut lhs_time_ranges, mut rhs_time_ranges) = time_range_pair.unwrap(); + let mut lhs_time_ranges: Vec = lhs_time_ranges + .drain(..) + .filter(|time_range| { + (20.0 < (time_range.end - time_range.start)) + && ((time_range.end - time_range.start) < 180.0) + && time_range.end > 0.0 + }) + .collect(); + lhs_time_ranges.sort_by(|a, b| (b.end - b.start).total_cmp(&(a.end - a.start))); + let mut rhs_time_ranges: Vec = rhs_time_ranges + .drain(..) + .filter(|time_range| { + (20.0 < (time_range.end - time_range.start)) + && ((time_range.end - time_range.start) < 180.0) + && time_range.end > 0.0 + }) + .collect(); + rhs_time_ranges.sort_by(|a, b| (b.end - b.start).total_cmp(&(a.end - a.start))); + if lhs_time_ranges.is_empty() || rhs_time_ranges.is_empty() { + continue; + } + + all_lhs_time_ranges.push(lhs_time_ranges[0]); + all_rhs_time_ranges.push(rhs_time_ranges[0]); + } + all_lhs_time_ranges.sort_by(|a, b| (a.end - a.start).total_cmp(&(b.end - b.start))); + all_lhs_time_ranges.reverse(); + all_rhs_time_ranges.sort_by(|a, b| (a.end - a.start).total_cmp(&(b.end - b.start))); + all_rhs_time_ranges.reverse(); + + return (all_lhs_time_ranges, all_rhs_time_ranges); +} + +fn create_inverse_index(chromaprint: &Vec) -> HashMap { + let mut inverse_index = HashMap::with_capacity(chromaprint.capacity()); + for i in 0..chromaprint.capacity() { + inverse_index.insert(chromaprint[i], i); + } + return inverse_index; +} + +fn find_time_ranges( + lhs_chromaprint: &Vec, + rhs_chromaprint: &Vec, + shift_amount: i32, + sync_tolerance: u32, +) -> Option<(Vec, Vec)> { + let mut lhs_shift: i32 = 0; + let mut rhs_shift: i32 = 0; + if shift_amount < 0 { + lhs_shift -= shift_amount; + } else { + rhs_shift += shift_amount; + } + + let mut lhs_matching_timestamps = vec![]; + let mut rhs_matching_timestamps = vec![]; + let upper_limit = + cmp::min(lhs_chromaprint.len(), rhs_chromaprint.len()) as i32 - shift_amount.abs(); + + for i in 0..upper_limit { + let lhs_position = i + lhs_shift; + let rhs_position = i + rhs_shift; + let difference = (lhs_chromaprint[lhs_position as usize] + ^ rhs_chromaprint[rhs_position as usize]) + .count_ones(); + + if difference > sync_tolerance { + continue; + } + + lhs_matching_timestamps.push(lhs_position as f64 * 0.128); + rhs_matching_timestamps.push(rhs_position as f64 * 0.128); + } + lhs_matching_timestamps.push(f64::MAX); + rhs_matching_timestamps.push(f64::MAX); + + let lhs_time_ranges = timestamps_to_ranges(lhs_matching_timestamps); + if lhs_time_ranges.is_none() { + return None; + } + let lhs_time_ranges = lhs_time_ranges.unwrap(); + let rhs_time_ranges = timestamps_to_ranges(rhs_matching_timestamps).unwrap(); + + return Some((lhs_time_ranges, rhs_time_ranges)); +} + +fn timestamps_to_ranges(mut timestamps: Vec) -> Option> { + if timestamps.is_empty() { + return None; + } + + timestamps.sort_by(|a, b| a.total_cmp(b)); + + let mut time_ranges = vec![]; + let mut current_range = TimeRange { + start: timestamps[0], + end: timestamps[0], + }; + + for i in 0..timestamps.len() - 1 { + let current = timestamps[i]; + let next = timestamps[i + 1]; + if next - current <= 1.0 { + current_range.end = next; + continue; + } + + time_ranges.push(current_range.clone()); + current_range.start = next; + current_range.end = next; + } + return if time_ranges.len() > 0 { + Some(time_ranges) + } else { + None + }; +} + +fn as_u32_le(array: &[u8; 4]) -> u32 { + #![allow(arithmetic_overflow)] + ((array[0] as u32) << 0) + | ((array[1] as u32) << 8) + | ((array[2] as u32) << 16) + | ((array[3] as u32) << 24) +}