feat: progress bar & slight attempt at improving efficiency

This commit is contained in:
ae 2024-10-01 22:38:26 +03:00
parent 01de99663b
commit 2d7a05a732
Signed by: ae
GPG Key ID: 995EFD5C1B532B3E
5 changed files with 127 additions and 40 deletions

60
Cargo.lock generated
View File

@ -206,6 +206,19 @@ dependencies = [
"windows-sys 0.48.0",
]
[[package]]
name = "console"
version = "0.15.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0e1f83fc076bd6dd27517eacdf25fef6c4dfe5f1d7448bafaaf3a26f13b5e4eb"
dependencies = [
"encode_unicode",
"lazy_static",
"libc",
"unicode-width",
"windows-sys 0.52.0",
]
[[package]]
name = "core-foundation"
version = "0.9.4"
@ -250,6 +263,7 @@ dependencies = [
"clap",
"colored",
"futures",
"indicatif",
"lazy-regex",
"md-5",
"num",
@ -258,6 +272,12 @@ dependencies = [
"tokio",
]
[[package]]
name = "encode_unicode"
version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f"
[[package]]
name = "encoding_rs"
version = "0.8.34"
@ -604,6 +624,28 @@ dependencies = [
"hashbrown",
]
[[package]]
name = "indicatif"
version = "0.17.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "763a5a8f45087d6bcea4222e7b72c291a054edf80e4ef6efd2a4979878c7bea3"
dependencies = [
"console",
"instant",
"number_prefix",
"portable-atomic",
"unicode-width",
]
[[package]]
name = "instant"
version = "0.1.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222"
dependencies = [
"cfg-if",
]
[[package]]
name = "ipnet"
version = "2.10.0"
@ -821,6 +863,12 @@ dependencies = [
"autocfg",
]
[[package]]
name = "number_prefix"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3"
[[package]]
name = "object"
version = "0.36.4"
@ -927,6 +975,12 @@ version = "0.3.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2"
[[package]]
name = "portable-atomic"
version = "1.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cc9c68a3f6da06753e9335d63e27f6b9754dd1920d941135b7ea8224f141adb2"
[[package]]
name = "proc-macro2"
version = "1.0.86"
@ -1436,6 +1490,12 @@ dependencies = [
"tinyvec",
]
[[package]]
name = "unicode-width"
version = "0.1.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af"
[[package]]
name = "untrusted"
version = "0.9.0"

View File

@ -9,6 +9,7 @@ base64 = "0.22.1"
clap = { version = "4.5.18", features = ["derive"] }
colored = "2.1.0"
futures = "0.3.30"
indicatif = "0.17.8"
lazy-regex = "3.3.0"
md-5 = "0.10.6"
num = "0.4.3"

View File

@ -3,17 +3,40 @@ use std::{error::Error, path::PathBuf, sync::Arc, time::Duration};
use base64::{engine::general_purpose, Engine};
use colored::Colorize;
use futures::{lock::Mutex, stream, StreamExt};
use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
use md5::{Digest, Md5};
use reqwest::{header::USER_AGENT, Client};
use tokio::{fs::File, io::AsyncWriteExt};
use tokio::{fs::File, io::AsyncWriteExt, time::timeout};
use crate::UA;
const KEEP_ALIVE_TIMEOUT: Duration = Duration::from_secs(5);
const KEEP_ALIVE_TIMEOUT: Duration = Duration::from_secs(30);
const DOWNLOAD_TIMEOUT: Duration = Duration::from_secs(60);
async fn write_file(path: &PathBuf, bytes: &[u8]) -> std::io::Result<usize> {
let mut file = File::create(path).await?;
let bw = file.write(bytes).await?;
file.flush().await?;
Ok(bw)
}
pub async fn concurrent_dl(
images: Vec<(String, PathBuf, String)>,
display_url: String,
) -> Result<(usize, usize), Box<dyn Error>> {
let multi = MultiProgress::new();
let bar = multi.add(ProgressBar::new(images.len() as u64));
bar.set_style(
ProgressStyle::with_template("{spinner} {msg} [{wide_bar:.white/gray}] [{pos}/{len}]")
.unwrap(),
);
bar.set_message(display_url);
let error_bar = multi.add(ProgressBar::new(0));
error_bar.set_style(ProgressStyle::with_template("{wide_msg}").unwrap());
let dl_count = Arc::new(Mutex::new(0));
let sk_count = Arc::new(Mutex::new(0));
@ -27,56 +50,61 @@ pub async fn concurrent_dl(
let client = client.clone();
let (url, path, _expct_md5) = data;
let send_fut = client.get(url).header(USER_AGENT, UA).send();
match send_fut.await {
Ok(res) => match res.bytes().await {
Ok(bytes) => {
let byte_count = bytes.len();
let download_result = timeout(DOWNLOAD_TIMEOUT, async {
let res = client.get(url).header(USER_AGENT, UA).send().await?;
let bytes = res.bytes().await?;
Ok::<_, reqwest::Error>(bytes)
})
.await;
match download_result {
Ok(Ok(bytes)) => {
let mut hasher = Md5::new();
hasher.update(&bytes);
let result = hasher.finalize();
let b64_md5 = general_purpose::STANDARD.encode(result);
// 4chan file attachment hash is always 24 character packed base64 encoded MD5. Truly a fucking state of art CRC.
let _b64_md5 = general_purpose::STANDARD.encode(result);
// TODO: Figure out how the MD5 should be converted before uncommenting the following filtering condition
// if b64_md5 != *expct_md5 {
// eprintln!("{}", format!("File skipped due to mismatched MD5 (expected {expct_md5}, got {b64_md5})").red().bold());
// error_bar.set_message(format!("File skipped due to mismatched MD5 (expected {expct_md5}, got {b64_md5})").red().bold());
// let mut sk_count = sk_count.lock().await;
// *sk_count += 1;
// return;
// }
let mut file = File::create(path).await.unwrap();
file.write_all(&bytes).await.unwrap();
let _n = write_file(path, &bytes).await.unwrap();
let mut dl_count = dl_count.lock().await;
*dl_count += 1;
println!(
"{}",
format!("{b64_md5}: {byte_count} bytes").truecolor(0, 209, 27)
);
}
Err(_) => eprintln!(
Err(_) => {
error_bar.set_message(format!(
"{}",
format!("Failed to convert request from {} to bytes", url)
.red()
.bold()
),
},
Err(_) => eprintln!("{}", format!("Failed to request {}", url).red().bold()),
));
}
Ok(Err(_)) => {
error_bar.set_message(format!(
"{}",
format!("Failed to request {}", url).red().bold()
));
}
}
bar.inc(1);
}))
.buffer_unordered(100)
.collect::<Vec<()>>();
futures.await;
bar.finish();
error_bar.finish();
let dl = *dl_count.lock().await;
let sk = *sk_count.lock().await;

View File

@ -192,7 +192,7 @@ async fn main() {
println!(
"{}",
format!("A total of {dl} files downloaded and {fl} filtered")
format!("\nA total of {dl} files downloaded and {fl} filtered")
.truecolor(252, 156, 12)
.bold()
);

View File

@ -37,10 +37,8 @@ pub async fn dl(
None => parse_url(cfg.target.clone()),
};
println!("{}", format!("Thread JSON URL: {json_url}").blue().bold());
let (images, fl) = parse_json(cfg.clone(), &client, json_url, board).await?;
let (dl, sk) = http::concurrent_dl(images).await?;
let (images, fl) = parse_json(cfg.clone(), &client, json_url.clone(), board).await?;
let (dl, sk) = http::concurrent_dl(images, json_url).await?;
Ok((dl, fl + sk))
}