diff --git a/README.md b/README.md
index 7316f57..88c13ee 100644
--- a/README.md
+++ b/README.md
@@ -462,7 +462,7 @@ The `archive` command lets you download episodes with multiple audios and subtit
   In the best case, when multiple audio & subtitle tracks are used, there is only one *video* track and all other languages can be stored as audio-only.
   But, as said, this is not always the case.
   With the `-m` / `--merge` flag you can define the behaviour when an episodes' video tracks differ in length.
-  Valid options are `audio` - store one video and all other languages as audio only; `video` - store the video + audio for every language; `auto` - detect if videos differ in length: if so, behave like `video` - otherwise like `audio`.
+  Valid options are `audio` - store one video and all other languages as audio only; `video` - store the video + audio for every language; `auto` - detect if videos differ in length: if so, behave like `video` - otherwise like `audio`; `sync` - detect if videos differ in length: if so, it tries to find the offset of matching audio parts and removes the offset from the beginning, otherwise it behaves like `audio`.
   Subtitles will always match the primary audio and video.
 
   ```shell
@@ -482,15 +482,12 @@ The `archive` command lets you download episodes with multiple audios and subtit
   
   Default are `200` milliseconds.
 
-- <span id="archive-sync-start">Sync start</span>
+- <span id="archive-sync-tolerance">Sync tolerance</span>
 
-  If you want that all videos of the same episode should start at the same time and `--merge` doesn't fit your needs (e.g. one video has an intro, all other doesn't), you might consider using the `--sync-start`.
-  It tries to sync the timing of all downloaded audios to match one video.
-  This is done by downloading the first few segments/frames of all video tracks that differ in length and comparing them frame by frame.
-  The flag takes an optional value determines how accurate the syncing is, generally speaking everything over 15 begins to be more inaccurate and everything below 6 is too accurate (and won't succeed).
-  When the syncing fails, the command is continued as if `--sync-start` wasn't provided for this episode.
+  Sometimes two video tracks are downloaded with `--merge` set to `sync` because the audio fingerprinting fails to identify matching audio parts (e.g. opening).
+  To prevent this, you can use the "--sync-tolerance" flag to specify the difference by which two fingerprints are considered equal.
 
-  Default is `7.5`.
+  Default is `6`.
 
 - <span id="archive-language-tagging">Language tagging</span>
 
diff --git a/crunchy-cli-core/src/archive/command.rs b/crunchy-cli-core/src/archive/command.rs
index 3525a4a..87c133e 100644
--- a/crunchy-cli-core/src/archive/command.rs
+++ b/crunchy-cli-core/src/archive/command.rs
@@ -90,32 +90,31 @@ pub struct Archive {
     pub(crate) resolution: Resolution,
 
     #[arg(
-        help = "Sets the behavior of the stream merging. Valid behaviors are 'auto', 'audio' and 'video'"
+        help = "Sets the behavior of the stream merging. Valid behaviors are 'auto', 'sync', 'audio' and 'video'"
     )]
     #[arg(
         long_help = "Because of local restrictions (or other reasons) some episodes with different languages does not have the same length (e.g. when some scenes were cut out). \
     With this flag you can set the behavior when handling multiple language.
-    Valid options are 'audio' (stores one video and all other languages as audio only), 'video' (stores the video + audio for every language) and 'auto' (detects if videos differ in length: if so, behave like 'video' else like 'audio')"
+    Valid options are 'audio' (stores one video and all other languages as audio only), 'video' (stores the video + audio for every language), 'auto' (detects if videos differ in length: if so, behave like 'video' else like 'audio') and 'sync' (detects if videos differ in length: if so, tries to find the offset of matching audio parts and removes it from the beginning, otherwise it behaves like 'audio')"
     )]
     #[arg(short, long, default_value = "auto")]
     #[arg(value_parser = MergeBehavior::parse)]
     pub(crate) merge: MergeBehavior,
     #[arg(
-        help = "If the merge behavior is 'auto', only download multiple video tracks if their length difference is higher than the given milliseconds"
+        help = "If the merge behavior is 'auto' or 'sync', consider videos to be of equal lengths if the difference in length is smaller than the specified milliseconds"
     )]
     #[arg(long, default_value_t = 200)]
     pub(crate) merge_time_tolerance: u32,
-    #[arg(help = "Tries to sync the timing of all downloaded audios to match one video")]
     #[arg(
-        long_help = "Tries to sync the timing of all downloaded audios to match one video. \
-    This is done by downloading the first few segments/frames of all video tracks that differ in length and comparing them frame by frame. \
-    The value of this flag determines how accurate the syncing is, generally speaking everything over 15 begins to be more inaccurate and everything below 6 is too accurate (and won't succeed). \
-    If you want to provide a custom value to this flag, you have to set it with an equals (e.g. `--sync-start=10` instead of `--sync-start 10`). \
-    When the syncing fails, the command is continued as if `--sync-start` wasn't provided for this episode
-    "
+        help = "If the merge behavior is 'sync', specify the difference by which two fingerprints are considered equal"
     )]
-    #[arg(long, require_equals = true, num_args = 0..=1, default_missing_value = "7.5")]
-    pub(crate) sync_start: Option<f64>,
+    #[arg(long, default_value_t = 6)]
+    pub(crate) sync_tolerance: u32,
+    #[arg(
+        help = "If the merge behavior is 'sync', specify the amount of offset determination runs from which the final offset is calculated"
+    )]
+    #[arg(long, default_value_t = 4)]
+    pub(crate) sync_precision: u32,
 
     #[arg(
         help = "Specified which language tagging the audio and subtitle tracks and language specific format options should have. \
@@ -229,18 +228,10 @@ impl Execute for Archive {
         }
 
         if self.include_chapters
+            && !matches!(self.merge, MergeBehavior::Sync)
             && !matches!(self.merge, MergeBehavior::Audio)
-            && self.sync_start.is_none()
         {
-            bail!("`--include-chapters` can only be used if `--merge` is set to 'audio' or `--sync-start` is set")
-        }
-
-        if !matches!(self.merge, MergeBehavior::Auto) && self.sync_start.is_some() {
-            bail!("`--sync-start` can only be used if `--merge` is set to `auto`")
-        }
-
-        if self.sync_start.is_some() && self.ffmpeg_preset.is_none() {
-            warn!("Using `--sync-start` without `--ffmpeg-preset` might produce worse sync results than with `--ffmpeg-preset` set")
+            bail!("`--include-chapters` can only be used if `--merge` is set to 'audio' or 'sync'")
         }
 
         self.audio = all_locale_in_locales(self.audio.clone());
@@ -317,7 +308,14 @@ impl Execute for Archive {
                     .audio_sort(Some(self.audio.clone()))
                     .subtitle_sort(Some(self.subtitle.clone()))
                     .no_closed_caption(self.no_closed_caption)
-                    .sync_start_value(self.sync_start)
+                    .sync_tolerance(match self.merge {
+                        MergeBehavior::Sync => Some(self.sync_tolerance),
+                        _ => None,
+                    })
+                    .sync_precision(match self.merge {
+                        MergeBehavior::Sync => Some(self.sync_precision),
+                        _ => None,
+                    })
                     .threads(self.threads)
                     .audio_locale_output_map(
                         zip(self.audio.clone(), self.output_audio_locales.clone()).collect(),
@@ -560,7 +558,7 @@ async fn get_format(
                 },
             },
         }),
-        MergeBehavior::Auto => {
+        MergeBehavior::Auto | MergeBehavior::Sync => {
             let mut d_formats: Vec<(Duration, DownloadFormat)> = vec![];
 
             for (single_format, video, audio, subtitles) in format_pairs {
diff --git a/crunchy-cli-core/src/utils/download.rs b/crunchy-cli-core/src/utils/download.rs
index bd7bf3d..67cb66a 100644
--- a/crunchy-cli-core/src/utils/download.rs
+++ b/crunchy-cli-core/src/utils/download.rs
@@ -2,15 +2,13 @@ use crate::utils::ffmpeg::FFmpegPreset;
 use crate::utils::filter::real_dedup_vec;
 use crate::utils::fmt::format_time_delta;
 use crate::utils::log::progress;
-use crate::utils::os::{
-    cache_dir, is_special_file, temp_directory, temp_named_pipe, tempdir, tempfile,
-};
+use crate::utils::os::{cache_dir, is_special_file, temp_directory, temp_named_pipe, tempfile};
 use crate::utils::rate_limit::RateLimiterService;
+use crate::utils::sync::{sync_audios, SyncAudio};
 use anyhow::{bail, Result};
 use chrono::{NaiveTime, TimeDelta};
 use crunchyroll_rs::media::{SkipEvents, SkipEventsEvent, StreamData, StreamSegment, Subtitle};
 use crunchyroll_rs::Locale;
-use image_hasher::{Hasher, HasherConfig, ImageHash};
 use indicatif::{ProgressBar, ProgressDrawTarget, ProgressFinish, ProgressStyle};
 use log::{debug, warn, LevelFilter};
 use regex::Regex;
@@ -39,6 +37,7 @@ pub enum MergeBehavior {
     Video,
     Audio,
     Auto,
+    Sync,
 }
 
 impl MergeBehavior {
@@ -47,6 +46,7 @@ impl MergeBehavior {
             "video" => MergeBehavior::Video,
             "audio" => MergeBehavior::Audio,
             "auto" => MergeBehavior::Auto,
+            "sync" => MergeBehavior::Sync,
             _ => return Err(format!("'{}' is not a valid merge behavior", s)),
         })
     }
@@ -64,7 +64,8 @@ pub struct DownloadBuilder {
     force_hardsub: bool,
     download_fonts: bool,
     no_closed_caption: bool,
-    sync_start_value: Option<f64>,
+    sync_tolerance: Option<u32>,
+    sync_precision: Option<u32>,
     threads: usize,
     ffmpeg_threads: Option<usize>,
     audio_locale_output_map: HashMap<Locale, String>,
@@ -84,7 +85,8 @@ impl DownloadBuilder {
             force_hardsub: false,
             download_fonts: false,
             no_closed_caption: false,
-            sync_start_value: None,
+            sync_tolerance: None,
+            sync_precision: None,
             threads: num_cpus::get(),
             ffmpeg_threads: None,
             audio_locale_output_map: HashMap::new(),
@@ -106,7 +108,8 @@ impl DownloadBuilder {
             download_fonts: self.download_fonts,
             no_closed_caption: self.no_closed_caption,
 
-            sync_start_value: self.sync_start_value,
+            sync_tolerance: self.sync_tolerance,
+            sync_precision: self.sync_precision,
 
             download_threads: self.threads,
             ffmpeg_threads: self.ffmpeg_threads,
@@ -165,7 +168,8 @@ pub struct Downloader {
     download_fonts: bool,
     no_closed_caption: bool,
 
-    sync_start_value: Option<f64>,
+    sync_tolerance: Option<u32>,
+    sync_precision: Option<u32>,
 
     download_threads: usize,
     ffmpeg_threads: Option<usize>,
@@ -245,6 +249,7 @@ impl Downloader {
         let mut video_offset = None;
         let mut audio_offsets = HashMap::new();
         let mut subtitle_offsets = HashMap::new();
+        let mut raw_audios = vec![];
         let mut videos = vec![];
         let mut audios = vec![];
         let mut subtitles = vec![];
@@ -263,40 +268,32 @@ impl Downloader {
             .max()
             .unwrap();
 
-        if self.formats.len() > 1 && self.sync_start_value.is_some() {
-            let all_segments_count: Vec<usize> = self
-                .formats
-                .iter()
-                .map(|f| f.video.0.segments().len())
-                .collect();
-            let sync_segments = 11.max(
-                all_segments_count.iter().max().unwrap() - all_segments_count.iter().min().unwrap(),
-            );
-            let mut sync_vids = vec![];
-            for (i, format) in self.formats.iter().enumerate() {
+        // downloads all audios
+        for (i, format) in self.formats.iter().enumerate() {
+            for (stream_data, locale) in &format.audios {
                 let path = self
-                    .download_video(
-                        &format.video.0,
-                        format!("Downloading video #{} sync segments", i + 1),
-                        Some(sync_segments),
+                    .download_audio(
+                        stream_data,
+                        format!("{:<1$}", format!("Downloading {} audio", locale), fmt_space),
                     )
                     .await?;
-                sync_vids.push(SyncVideo {
+                raw_audios.push(SyncAudio {
+                    format_id: i,
                     path,
-                    length: len_from_segments(&format.video.0.segments()),
-                    available_frames: (len_from_segments(
-                        &format.video.0.segments()[0..sync_segments],
-                    )
-                    .num_milliseconds() as f64
-                        * format.video.0.fps().unwrap()
-                        / 1000.0) as u64,
-                    idx: i,
+                    locale: locale.clone(),
+                    video_idx: i,
                 })
             }
+        }
 
+        if self.formats.len() > 1 && self.sync_tolerance.is_some() {
             let _progress_handler =
                 progress!("Syncing video start times (this might take some time)");
-            let mut offsets = sync_videos(sync_vids, self.sync_start_value.unwrap())?;
+            let mut offsets = sync_audios(
+                &raw_audios,
+                self.sync_tolerance.unwrap(),
+                self.sync_precision.unwrap(),
+            )?;
             drop(_progress_handler);
 
             let mut offset_pre_checked = false;
@@ -307,19 +304,14 @@ impl Downloader {
                     .enumerate()
                     .map(|(i, f)| {
                         len_from_segments(&f.video.0.segments())
-                            - TimeDelta::milliseconds(
-                                tmp_offsets
-                                    .get(&i)
-                                    .map(|o| (*o as f64 / f.video.0.fps().unwrap() * 1000.0) as i64)
-                                    .unwrap_or_default(),
-                            )
+                            - tmp_offsets.get(&i).map(|o| *o).unwrap_or_default()
                     })
                     .collect();
                 let min = formats_with_offset.iter().min().unwrap();
                 let max = formats_with_offset.iter().max().unwrap();
 
                 if max.num_seconds() - min.num_seconds() > 15 {
-                    warn!("Found difference of >15 seconds after sync, skipping applying it");
+                    warn!("Found difference of >15 seconds after sync, so the application was skipped");
                     offsets = None;
                     offset_pre_checked = true
                 }
@@ -331,7 +323,7 @@ impl Downloader {
                 let mut audio_count: usize = 0;
                 let mut subtitle_count: usize = 0;
                 for (i, format) in self.formats.iter().enumerate() {
-                    let format_fps = format.video.0.fps().unwrap();
+                    let offset = offsets.get(&i).map(|o| *o).unwrap_or_default();
                     let format_len = format
                         .video
                         .0
@@ -339,7 +331,7 @@ impl Downloader {
                         .iter()
                         .map(|s| s.length.as_millis())
                         .sum::<u128>() as u64
-                        - offsets.get(&i).map_or(0, |o| *o);
+                        - offset.num_milliseconds() as u64;
                     if format_len > root_format_length {
                         root_format_idx = i;
                         root_format_length = format_len;
@@ -347,23 +339,13 @@ impl Downloader {
 
                     for _ in &format.audios {
                         if let Some(offset) = &offsets.get(&i) {
-                            audio_offsets.insert(
-                                audio_count,
-                                TimeDelta::milliseconds(
-                                    (**offset as f64 / format_fps * 1000.0) as i64,
-                                ),
-                            );
+                            audio_offsets.insert(audio_count, **offset);
                         }
                         audio_count += 1
                     }
                     for _ in &format.subtitles {
                         if let Some(offset) = &offsets.get(&i) {
-                            subtitle_offsets.insert(
-                                subtitle_count,
-                                TimeDelta::milliseconds(
-                                    (**offset as f64 / format_fps * 1000.0) as i64,
-                                ),
-                            );
+                            subtitle_offsets.insert(subtitle_count, **offset);
                         }
                         subtitle_count += 1
                     }
@@ -390,20 +372,28 @@ impl Downloader {
                 root_format.subtitles.extend(subtitle_append);
 
                 self.formats = vec![root_format];
-                video_offset = offsets.get(&root_format_idx).map(|o| {
-                    TimeDelta::milliseconds(
-                        (*o as f64 / self.formats[0].video.0.fps().unwrap() * 1000.0) as i64,
-                    )
-                })
+                video_offset = offsets.get(&root_format_idx).map(|o| *o);
+                for raw_audio in raw_audios.iter_mut() {
+                    raw_audio.video_idx = root_format_idx;
+                }
             } else {
                 for format in &mut self.formats {
                     format.metadata.skip_events = None
                 }
+                if !offset_pre_checked {
+                    warn!("Couldn't find reliable sync positions")
+                }
             }
+        }
 
-            if !offset_pre_checked {
-                warn!("Couldn't find reliable sync positions")
-            }
+        // add audio metadata
+        for raw_audio in raw_audios {
+            audios.push(FFmpegAudioMeta {
+                path: raw_audio.path,
+                locale: raw_audio.locale,
+                start_time: audio_offsets.get(&raw_audio.format_id).map(|o| *o),
+                video_idx: raw_audio.video_idx,
+            })
         }
 
         // downloads all videos
@@ -435,24 +425,6 @@ impl Downloader {
             })
         }
 
-        // downloads all audios
-        for (i, format) in self.formats.iter().enumerate() {
-            for (j, (stream_data, locale)) in format.audios.iter().enumerate() {
-                let path = self
-                    .download_audio(
-                        stream_data,
-                        format!("{:<1$}", format!("Downloading {} audio", locale), fmt_space),
-                    )
-                    .await?;
-                audios.push(FFmpegAudioMeta {
-                    path,
-                    locale: locale.clone(),
-                    start_time: audio_offsets.get(&j).cloned(),
-                    video_idx: i,
-                })
-            }
-        }
-
         for (i, format) in self.formats.iter().enumerate() {
             if format.subtitles.is_empty() {
                 continue;
@@ -1538,134 +1510,6 @@ async fn ffmpeg_progress<R: AsyncReadExt + Unpin>(
     Ok(())
 }
 
-struct SyncVideo {
-    path: TempPath,
-    length: TimeDelta,
-    available_frames: u64,
-    idx: usize,
-}
-
-fn sync_videos(mut sync_videos: Vec<SyncVideo>, value: f64) -> Result<Option<HashMap<usize, u64>>> {
-    let mut result = HashMap::new();
-    let hasher = HasherConfig::new().preproc_dct().to_hasher();
-    let start_frame = 300;
-
-    sync_videos.sort_by_key(|sv| sv.length);
-
-    let sync_base = sync_videos.remove(0);
-    let sync_hashes = extract_frame_hashes(&sync_base.path, start_frame, 50, &hasher)?;
-
-    for sync_video in sync_videos {
-        let mut highest_frame_match = f64::INFINITY;
-        let mut frame = start_frame;
-        let mut hashes = vec![];
-
-        loop {
-            if frame == sync_video.available_frames {
-                debug!(
-                    "Failed to sync videos, end of stream {} reached (highest frame match: {})",
-                    sync_video.idx + 1,
-                    highest_frame_match
-                );
-                return Ok(None);
-            }
-
-            hashes.drain(0..(hashes.len() as i32 - sync_hashes.len() as i32).max(0) as usize);
-            hashes.extend(extract_frame_hashes(
-                &sync_video.path,
-                frame,
-                300 - hashes.len() as u64,
-                &hasher,
-            )?);
-
-            let mut check_frame_windows_result: Vec<(usize, f64)> =
-                check_frame_windows(&sync_hashes, &hashes)
-                    .into_iter()
-                    .enumerate()
-                    .collect();
-            check_frame_windows_result.sort_by(|(_, a), (_, b)| a.partial_cmp(&b).unwrap());
-            if check_frame_windows_result[0].1 <= value {
-                result.insert(
-                    sync_video.idx,
-                    frame + check_frame_windows_result[0].0 as u64 - start_frame,
-                );
-                break;
-            } else if check_frame_windows_result[0].1 < highest_frame_match {
-                highest_frame_match = check_frame_windows_result[0].1
-            }
-
-            frame = (frame + 300 - sync_hashes.len() as u64).min(sync_video.available_frames)
-        }
-    }
-
-    Ok(Some(result))
-}
-
-fn extract_frame_hashes(
-    input_file: &Path,
-    start_frame: u64,
-    frame_count: u64,
-    hasher: &Hasher,
-) -> Result<Vec<ImageHash>> {
-    let frame_dir = tempdir(format!(
-        "{}_sync_frames",
-        input_file
-            .file_name()
-            .unwrap_or_default()
-            .to_string_lossy()
-            .trim_end_matches(
-                &input_file
-                    .file_stem()
-                    .unwrap_or_default()
-                    .to_string_lossy()
-                    .to_string()
-            )
-    ))?;
-    let extract_output = Command::new("ffmpeg")
-        .arg("-hide_banner")
-        .arg("-y")
-        .args(["-i", input_file.to_string_lossy().to_string().as_str()])
-        .args([
-            "-vf",
-            format!(
-                r#"select=between(n\,{}\,{}),setpts=PTS-STARTPTS,scale=-1:240"#,
-                start_frame,
-                start_frame + frame_count
-            )
-            .as_str(),
-        ])
-        .args(["-vframes", frame_count.to_string().as_str()])
-        .arg(format!("{}/%03d.jpg", frame_dir.path().to_string_lossy()))
-        .output()?;
-    if !extract_output.status.success() {
-        bail!(
-            "{}",
-            String::from_utf8_lossy(extract_output.stderr.as_slice())
-        )
-    }
-
-    let mut hashes = vec![];
-    for file in frame_dir.path().read_dir()? {
-        let file = file?;
-        let img = image::open(file.path())?;
-        hashes.push(hasher.hash_image(&img))
-    }
-    Ok(hashes)
-}
-
-fn check_frame_windows(base_hashes: &[ImageHash], check_hashes: &[ImageHash]) -> Vec<f64> {
-    let mut results = vec![];
-
-    for i in 0..(check_hashes.len() - base_hashes.len()) {
-        let check_window = &check_hashes[i..(base_hashes.len() + i)];
-        let sum = std::iter::zip(base_hashes, check_window)
-            .map(|(a, b)| a.dist(b))
-            .sum::<u32>();
-        results.push(sum as f64 / check_window.len() as f64);
-    }
-    results
-}
-
 fn len_from_segments(segments: &[StreamSegment]) -> TimeDelta {
     TimeDelta::milliseconds(segments.iter().map(|s| s.length.as_millis()).sum::<u128>() as i64)
 }
diff --git a/crunchy-cli-core/src/utils/mod.rs b/crunchy-cli-core/src/utils/mod.rs
index 72a0908..6260047 100644
--- a/crunchy-cli-core/src/utils/mod.rs
+++ b/crunchy-cli-core/src/utils/mod.rs
@@ -11,4 +11,5 @@ pub mod log;
 pub mod os;
 pub mod parse;
 pub mod rate_limit;
+pub mod sync;
 pub mod video;
diff --git a/crunchy-cli-core/src/utils/os.rs b/crunchy-cli-core/src/utils/os.rs
index b65abc2..a216f87 100644
--- a/crunchy-cli-core/src/utils/os.rs
+++ b/crunchy-cli-core/src/utils/os.rs
@@ -7,7 +7,7 @@ use std::pin::Pin;
 use std::process::{Command, Stdio};
 use std::task::{Context, Poll};
 use std::{env, fs, io};
-use tempfile::{Builder, NamedTempFile, TempDir, TempPath};
+use tempfile::{Builder, NamedTempFile, TempPath};
 use tokio::io::{AsyncRead, ReadBuf};
 
 pub fn has_ffmpeg() -> bool {
@@ -46,22 +46,6 @@ pub fn tempfile<S: AsRef<str>>(suffix: S) -> io::Result<NamedTempFile> {
     Ok(tempfile)
 }
 
-/// Any tempdir should be created with this function. The prefix and directory of every directory
-/// created  with this function stays the same which is helpful to query all existing tempdirs and
-/// e.g. remove them in a case of ctrl-c. Having one function also good to prevent mistakes like
-/// setting the wrong prefix if done manually.
-pub fn tempdir<S: AsRef<str>>(suffix: S) -> io::Result<TempDir> {
-    let tempdir = Builder::default()
-        .prefix(".crunchy-cli_")
-        .suffix(suffix.as_ref())
-        .tempdir_in(temp_directory())?;
-    debug!(
-        "Created temporary directory: {}",
-        tempdir.path().to_string_lossy()
-    );
-    Ok(tempdir)
-}
-
 pub fn cache_dir<S: AsRef<str>>(name: S) -> io::Result<PathBuf> {
     let cache_dir = temp_directory().join(format!(".crunchy-cli_{}_cache", name.as_ref()));
     fs::create_dir_all(&cache_dir)?;
diff --git a/crunchy-cli-core/src/utils/sync.rs b/crunchy-cli-core/src/utils/sync.rs
new file mode 100644
index 0000000..9e89046
--- /dev/null
+++ b/crunchy-cli-core/src/utils/sync.rs
@@ -0,0 +1,422 @@
+use std::{
+    cmp,
+    collections::{HashMap, HashSet},
+    ops::Not,
+    path::Path,
+    process::Command,
+};
+
+use chrono::TimeDelta;
+use crunchyroll_rs::Locale;
+use log::debug;
+use tempfile::TempPath;
+
+use anyhow::{bail, Result};
+
+use super::fmt::format_time_delta;
+
+pub struct SyncAudio {
+    pub format_id: usize,
+    pub path: TempPath,
+    pub locale: Locale,
+    pub video_idx: usize,
+}
+
+#[derive(Debug, Clone, Copy)]
+struct TimeRange {
+    start: f64,
+    end: f64,
+}
+
+pub fn sync_audios(
+    available_audios: &Vec<SyncAudio>,
+    sync_tolerance: u32,
+    sync_precision: u32,
+) -> Result<Option<HashMap<usize, TimeDelta>>> {
+    let mut result: HashMap<usize, TimeDelta> = HashMap::new();
+
+    let mut sync_audios = vec![];
+    let mut chromaprints = HashMap::new();
+    let mut formats = HashSet::new();
+    for audio in available_audios {
+        if formats.contains(&audio.format_id) {
+            continue;
+        }
+        formats.insert(audio.format_id);
+        sync_audios.push((audio.format_id, &audio.path));
+        chromaprints.insert(
+            audio.format_id,
+            generate_chromaprint(
+                &audio.path,
+                &TimeDelta::zero(),
+                &TimeDelta::zero(),
+                &TimeDelta::zero(),
+            )?,
+        );
+    }
+    sync_audios.sort_by_key(|sync_audio| chromaprints.get(&sync_audio.0).unwrap().len());
+
+    let base_audio = sync_audios.remove(0);
+
+    let mut start = f64::MAX;
+    let mut end = f64::MIN;
+    let mut initial_offsets = HashMap::new();
+    for audio in &sync_audios {
+        debug!(
+            "Initial comparison of format {} to {}",
+            audio.0, &base_audio.0
+        );
+
+        let (lhs_ranges, rhs_ranges) = compare_chromaprints(
+            chromaprints.get(&base_audio.0).unwrap(),
+            chromaprints.get(&audio.0).unwrap(),
+            sync_tolerance,
+        );
+        if lhs_ranges.is_empty() || rhs_ranges.is_empty() {
+            bail!(
+                "Failed to sync videos, couldn't find matching audio parts between format {} and {}",
+                base_audio.0 + 1,
+                audio.0 + 1
+            );
+        }
+        let lhs_range = lhs_ranges[0];
+        let rhs_range = rhs_ranges[0];
+        start = start.min(lhs_range.start);
+        end = end.max(lhs_range.end);
+        start = start.min(rhs_range.start);
+        end = end.max(rhs_range.end);
+        let offset = TimeDelta::milliseconds(((rhs_range.start - lhs_range.start) * 1000.0) as i64);
+        initial_offsets.insert(audio.0, TimeDelta::zero().checked_sub(&offset).unwrap());
+        debug!(
+            "Found initial offset of {}ms ({} - {} {}s) ({} - {} {}s) for format {} to {}",
+            offset.num_milliseconds(),
+            lhs_range.start,
+            lhs_range.end,
+            lhs_range.end - lhs_range.start,
+            rhs_range.start,
+            rhs_range.end,
+            rhs_range.end - rhs_range.start,
+            audio.0,
+            base_audio.0
+        );
+    }
+
+    debug!(
+        "Found matching audio parts at {} - {}, narrowing search",
+        start, end
+    );
+
+    let start = TimeDelta::milliseconds((start * 1000.0) as i64 - 20000);
+    let end = TimeDelta::milliseconds((end * 1000.0) as i64 + 20000);
+
+    for sync_audio in &sync_audios {
+        let chromaprint = generate_chromaprint(
+            &sync_audio.1,
+            &start,
+            &end,
+            initial_offsets.get(&sync_audio.0).unwrap(),
+        )?;
+        chromaprints.insert(sync_audio.0, chromaprint);
+    }
+
+    let mut runs: HashMap<usize, i64> = HashMap::new();
+    let iterator_range_limits: i64 = 2 ^ sync_precision as i64;
+    for i in -iterator_range_limits..=iterator_range_limits {
+        let base_offset = TimeDelta::milliseconds(
+            ((0.128 / iterator_range_limits as f64 * i as f64) * 1000.0) as i64,
+        );
+        chromaprints.insert(
+            base_audio.0,
+            generate_chromaprint(base_audio.1, &start, &end, &base_offset)?,
+        );
+        for audio in &sync_audios {
+            let initial_offset = initial_offsets.get(&audio.0).map(|o| *o).unwrap();
+            let offset = find_offset(
+                (&base_audio.0, chromaprints.get(&base_audio.0).unwrap()),
+                &base_offset,
+                (&audio.0, chromaprints.get(&audio.0).unwrap()),
+                &initial_offset,
+                &start,
+                sync_tolerance,
+            );
+            if offset.is_none() {
+                continue;
+            }
+            let offset = offset.unwrap();
+
+            result.insert(
+                audio.0,
+                result
+                    .get(&audio.0)
+                    .map(|o| *o)
+                    .unwrap_or_default()
+                    .checked_add(&offset)
+                    .unwrap(),
+            );
+            runs.insert(
+                audio.0,
+                runs.get(&audio.0).map(|o| *o).unwrap_or_default() + 1,
+            );
+        }
+    }
+    let mut result: HashMap<usize, TimeDelta> = result
+        .iter()
+        .map(|(format_id, offset)| {
+            (
+                *format_id,
+                TimeDelta::milliseconds(
+                    offset.num_milliseconds() / runs.get(format_id).map(|o| *o).unwrap(),
+                ),
+            )
+        })
+        .collect();
+    result.insert(base_audio.0, TimeDelta::milliseconds(0));
+
+    Ok(Some(result))
+}
+
+fn find_offset(
+    lhs: (&usize, &Vec<u32>),
+    lhs_shift: &TimeDelta,
+    rhs: (&usize, &Vec<u32>),
+    rhs_shift: &TimeDelta,
+    start: &TimeDelta,
+    sync_tolerance: u32,
+) -> Option<TimeDelta> {
+    let (lhs_ranges, rhs_ranges) = compare_chromaprints(&lhs.1, &rhs.1, sync_tolerance);
+    if lhs_ranges.is_empty() || rhs_ranges.is_empty() {
+        return None;
+    }
+    let lhs_range = lhs_ranges[0];
+    let rhs_range = rhs_ranges[0];
+    let offset = rhs_range.end - lhs_range.end;
+    let offset = TimeDelta::milliseconds((offset * 1000.0) as i64)
+        .checked_add(&lhs_shift)?
+        .checked_sub(&rhs_shift)?;
+    debug!(
+        "Found offset of {}ms ({} - {} {}s) ({} - {} {}s) for format {} to {}",
+        offset.num_milliseconds(),
+        lhs_range.start + start.num_milliseconds() as f64 / 1000.0,
+        lhs_range.end + start.num_milliseconds() as f64 / 1000.0,
+        lhs_range.end - lhs_range.start,
+        rhs_range.start + start.num_milliseconds() as f64 / 1000.0,
+        rhs_range.end + start.num_milliseconds() as f64 / 1000.0,
+        rhs_range.end - rhs_range.start,
+        rhs.0,
+        lhs.0
+    );
+    return Some(offset);
+}
+
+fn generate_chromaprint(
+    input_file: &Path,
+    start: &TimeDelta,
+    end: &TimeDelta,
+    offset: &TimeDelta,
+) -> Result<Vec<u32>> {
+    let mut ss_argument: &TimeDelta = &start.checked_sub(offset).unwrap();
+    let mut offset_argument = &TimeDelta::zero();
+    if offset.abs() > *offset {
+        ss_argument = start;
+        offset_argument = &offset;
+    };
+
+    let mut command = Command::new("ffmpeg");
+    command
+        .arg("-hide_banner")
+        .arg("-y")
+        .args(["-ss", format_time_delta(ss_argument).as_str()]);
+
+    if end.is_zero().not() {
+        command.args(["-to", format_time_delta(end).as_str()]);
+    }
+
+    command
+        .args(["-itsoffset", format_time_delta(offset_argument).as_str()])
+        .args(["-i", input_file.to_string_lossy().to_string().as_str()])
+        .args(["-ac", "2"])
+        .args(["-f", "chromaprint"])
+        .args(["-fp_format", "raw"])
+        .arg("-");
+
+    let extract_output = command.output()?;
+
+    if !extract_output.status.success() {
+        bail!(
+            "{}",
+            String::from_utf8_lossy(extract_output.stderr.as_slice())
+        );
+    }
+    let raw_chromaprint = extract_output.stdout.as_slice();
+    let length = raw_chromaprint.len();
+    if length % 4 != 0 {
+        bail!("chromaprint bytes should be a multiple of 4");
+    }
+    let mut chromaprint = Vec::with_capacity(length / 4);
+    for i in 0..length / 4 {
+        chromaprint.push(as_u32_le(
+            raw_chromaprint[i * 4 + 0..i * 4 + 4].try_into().unwrap(),
+        ));
+    }
+    return Ok(chromaprint);
+}
+
+fn compare_chromaprints(
+    lhs_chromaprint: &Vec<u32>,
+    rhs_chromaprint: &Vec<u32>,
+    sync_tolerance: u32,
+) -> (Vec<TimeRange>, Vec<TimeRange>) {
+    let lhs_inverse_index = create_inverse_index(&lhs_chromaprint);
+    let rhs_inverse_index = create_inverse_index(&rhs_chromaprint);
+
+    let mut possible_shifts = HashSet::new();
+    for lhs_pair in lhs_inverse_index {
+        let original_point = lhs_pair.0;
+        for i in -2..=2 {
+            let modified_point = (original_point as i32 + i) as u32;
+            if rhs_inverse_index.contains_key(&modified_point) {
+                let rhs_index = rhs_inverse_index.get(&modified_point).map(|o| *o).unwrap();
+                possible_shifts.insert(rhs_index as i32 - lhs_pair.1 as i32);
+            }
+        }
+    }
+
+    let mut all_lhs_time_ranges = vec![];
+    let mut all_rhs_time_ranges = vec![];
+    for shift_amount in possible_shifts {
+        let time_range_pair = find_time_ranges(
+            &lhs_chromaprint,
+            &rhs_chromaprint,
+            shift_amount,
+            sync_tolerance,
+        );
+        if time_range_pair.is_none() {
+            continue;
+        }
+        let (mut lhs_time_ranges, mut rhs_time_ranges) = time_range_pair.unwrap();
+        let mut lhs_time_ranges: Vec<TimeRange> = lhs_time_ranges
+            .drain(..)
+            .filter(|time_range| {
+                (20.0 < (time_range.end - time_range.start))
+                    && ((time_range.end - time_range.start) < 180.0)
+                    && time_range.end > 0.0
+            })
+            .collect();
+        lhs_time_ranges.sort_by(|a, b| (b.end - b.start).total_cmp(&(a.end - a.start)));
+        let mut rhs_time_ranges: Vec<TimeRange> = rhs_time_ranges
+            .drain(..)
+            .filter(|time_range| {
+                (20.0 < (time_range.end - time_range.start))
+                    && ((time_range.end - time_range.start) < 180.0)
+                    && time_range.end > 0.0
+            })
+            .collect();
+        rhs_time_ranges.sort_by(|a, b| (b.end - b.start).total_cmp(&(a.end - a.start)));
+        if lhs_time_ranges.is_empty() || rhs_time_ranges.is_empty() {
+            continue;
+        }
+
+        all_lhs_time_ranges.push(lhs_time_ranges[0]);
+        all_rhs_time_ranges.push(rhs_time_ranges[0]);
+    }
+    all_lhs_time_ranges.sort_by(|a, b| (a.end - a.start).total_cmp(&(b.end - b.start)));
+    all_lhs_time_ranges.reverse();
+    all_rhs_time_ranges.sort_by(|a, b| (a.end - a.start).total_cmp(&(b.end - b.start)));
+    all_rhs_time_ranges.reverse();
+
+    return (all_lhs_time_ranges, all_rhs_time_ranges);
+}
+
+fn create_inverse_index(chromaprint: &Vec<u32>) -> HashMap<u32, usize> {
+    let mut inverse_index = HashMap::with_capacity(chromaprint.capacity());
+    for i in 0..chromaprint.capacity() {
+        inverse_index.insert(chromaprint[i], i);
+    }
+    return inverse_index;
+}
+
+fn find_time_ranges(
+    lhs_chromaprint: &Vec<u32>,
+    rhs_chromaprint: &Vec<u32>,
+    shift_amount: i32,
+    sync_tolerance: u32,
+) -> Option<(Vec<TimeRange>, Vec<TimeRange>)> {
+    let mut lhs_shift: i32 = 0;
+    let mut rhs_shift: i32 = 0;
+    if shift_amount < 0 {
+        lhs_shift -= shift_amount;
+    } else {
+        rhs_shift += shift_amount;
+    }
+
+    let mut lhs_matching_timestamps = vec![];
+    let mut rhs_matching_timestamps = vec![];
+    let upper_limit =
+        cmp::min(lhs_chromaprint.len(), rhs_chromaprint.len()) as i32 - shift_amount.abs();
+
+    for i in 0..upper_limit {
+        let lhs_position = i + lhs_shift;
+        let rhs_position = i + rhs_shift;
+        let difference = (lhs_chromaprint[lhs_position as usize]
+            ^ rhs_chromaprint[rhs_position as usize])
+            .count_ones();
+
+        if difference > sync_tolerance {
+            continue;
+        }
+
+        lhs_matching_timestamps.push(lhs_position as f64 * 0.128);
+        rhs_matching_timestamps.push(rhs_position as f64 * 0.128);
+    }
+    lhs_matching_timestamps.push(f64::MAX);
+    rhs_matching_timestamps.push(f64::MAX);
+
+    let lhs_time_ranges = timestamps_to_ranges(lhs_matching_timestamps);
+    if lhs_time_ranges.is_none() {
+        return None;
+    }
+    let lhs_time_ranges = lhs_time_ranges.unwrap();
+    let rhs_time_ranges = timestamps_to_ranges(rhs_matching_timestamps).unwrap();
+
+    return Some((lhs_time_ranges, rhs_time_ranges));
+}
+
+fn timestamps_to_ranges(mut timestamps: Vec<f64>) -> Option<Vec<TimeRange>> {
+    if timestamps.is_empty() {
+        return None;
+    }
+
+    timestamps.sort_by(|a, b| a.total_cmp(b));
+
+    let mut time_ranges = vec![];
+    let mut current_range = TimeRange {
+        start: timestamps[0],
+        end: timestamps[0],
+    };
+
+    for i in 0..timestamps.len() - 1 {
+        let current = timestamps[i];
+        let next = timestamps[i + 1];
+        if next - current <= 1.0 {
+            current_range.end = next;
+            continue;
+        }
+
+        time_ranges.push(current_range.clone());
+        current_range.start = next;
+        current_range.end = next;
+    }
+    return if time_ranges.len() > 0 {
+        Some(time_ranges)
+    } else {
+        None
+    };
+}
+
+fn as_u32_le(array: &[u8; 4]) -> u32 {
+    #![allow(arithmetic_overflow)]
+    ((array[0] as u32) << 0)
+        | ((array[1] as u32) << 8)
+        | ((array[2] as u32) << 16)
+        | ((array[3] as u32) << 24)
+}