feat: progress bar & slight attempt at improving efficiency
This commit is contained in:
parent
01de99663b
commit
2d7a05a732
60
Cargo.lock
generated
60
Cargo.lock
generated
@ -206,6 +206,19 @@ dependencies = [
|
||||
"windows-sys 0.48.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "console"
|
||||
version = "0.15.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0e1f83fc076bd6dd27517eacdf25fef6c4dfe5f1d7448bafaaf3a26f13b5e4eb"
|
||||
dependencies = [
|
||||
"encode_unicode",
|
||||
"lazy_static",
|
||||
"libc",
|
||||
"unicode-width",
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "core-foundation"
|
||||
version = "0.9.4"
|
||||
@ -250,6 +263,7 @@ dependencies = [
|
||||
"clap",
|
||||
"colored",
|
||||
"futures",
|
||||
"indicatif",
|
||||
"lazy-regex",
|
||||
"md-5",
|
||||
"num",
|
||||
@ -258,6 +272,12 @@ dependencies = [
|
||||
"tokio",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "encode_unicode"
|
||||
version = "0.3.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f"
|
||||
|
||||
[[package]]
|
||||
name = "encoding_rs"
|
||||
version = "0.8.34"
|
||||
@ -604,6 +624,28 @@ dependencies = [
|
||||
"hashbrown",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "indicatif"
|
||||
version = "0.17.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "763a5a8f45087d6bcea4222e7b72c291a054edf80e4ef6efd2a4979878c7bea3"
|
||||
dependencies = [
|
||||
"console",
|
||||
"instant",
|
||||
"number_prefix",
|
||||
"portable-atomic",
|
||||
"unicode-width",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "instant"
|
||||
version = "0.1.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ipnet"
|
||||
version = "2.10.0"
|
||||
@ -821,6 +863,12 @@ dependencies = [
|
||||
"autocfg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "number_prefix"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3"
|
||||
|
||||
[[package]]
|
||||
name = "object"
|
||||
version = "0.36.4"
|
||||
@ -927,6 +975,12 @@ version = "0.3.31"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2"
|
||||
|
||||
[[package]]
|
||||
name = "portable-atomic"
|
||||
version = "1.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cc9c68a3f6da06753e9335d63e27f6b9754dd1920d941135b7ea8224f141adb2"
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.86"
|
||||
@ -1436,6 +1490,12 @@ dependencies = [
|
||||
"tinyvec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-width"
|
||||
version = "0.1.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af"
|
||||
|
||||
[[package]]
|
||||
name = "untrusted"
|
||||
version = "0.9.0"
|
||||
|
@ -9,6 +9,7 @@ base64 = "0.22.1"
|
||||
clap = { version = "4.5.18", features = ["derive"] }
|
||||
colored = "2.1.0"
|
||||
futures = "0.3.30"
|
||||
indicatif = "0.17.8"
|
||||
lazy-regex = "3.3.0"
|
||||
md-5 = "0.10.6"
|
||||
num = "0.4.3"
|
||||
|
98
src/http.rs
98
src/http.rs
@ -3,17 +3,40 @@ use std::{error::Error, path::PathBuf, sync::Arc, time::Duration};
|
||||
use base64::{engine::general_purpose, Engine};
|
||||
use colored::Colorize;
|
||||
use futures::{lock::Mutex, stream, StreamExt};
|
||||
use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
|
||||
use md5::{Digest, Md5};
|
||||
use reqwest::{header::USER_AGENT, Client};
|
||||
use tokio::{fs::File, io::AsyncWriteExt};
|
||||
use tokio::{fs::File, io::AsyncWriteExt, time::timeout};
|
||||
|
||||
use crate::UA;
|
||||
|
||||
const KEEP_ALIVE_TIMEOUT: Duration = Duration::from_secs(5);
|
||||
const KEEP_ALIVE_TIMEOUT: Duration = Duration::from_secs(30);
|
||||
const DOWNLOAD_TIMEOUT: Duration = Duration::from_secs(60);
|
||||
|
||||
async fn write_file(path: &PathBuf, bytes: &[u8]) -> std::io::Result<usize> {
|
||||
let mut file = File::create(path).await?;
|
||||
let bw = file.write(bytes).await?;
|
||||
file.flush().await?;
|
||||
|
||||
Ok(bw)
|
||||
}
|
||||
|
||||
pub async fn concurrent_dl(
|
||||
images: Vec<(String, PathBuf, String)>,
|
||||
display_url: String,
|
||||
) -> Result<(usize, usize), Box<dyn Error>> {
|
||||
let multi = MultiProgress::new();
|
||||
|
||||
let bar = multi.add(ProgressBar::new(images.len() as u64));
|
||||
bar.set_style(
|
||||
ProgressStyle::with_template("{spinner} {msg} [{wide_bar:.white/gray}] [{pos}/{len}]")
|
||||
.unwrap(),
|
||||
);
|
||||
bar.set_message(display_url);
|
||||
|
||||
let error_bar = multi.add(ProgressBar::new(0));
|
||||
error_bar.set_style(ProgressStyle::with_template("{wide_msg}").unwrap());
|
||||
|
||||
let dl_count = Arc::new(Mutex::new(0));
|
||||
let sk_count = Arc::new(Mutex::new(0));
|
||||
|
||||
@ -27,56 +50,61 @@ pub async fn concurrent_dl(
|
||||
let client = client.clone();
|
||||
|
||||
let (url, path, _expct_md5) = data;
|
||||
let send_fut = client.get(url).header(USER_AGENT, UA).send();
|
||||
|
||||
match send_fut.await {
|
||||
Ok(res) => match res.bytes().await {
|
||||
Ok(bytes) => {
|
||||
let byte_count = bytes.len();
|
||||
let download_result = timeout(DOWNLOAD_TIMEOUT, async {
|
||||
let res = client.get(url).header(USER_AGENT, UA).send().await?;
|
||||
let bytes = res.bytes().await?;
|
||||
|
||||
let mut hasher = Md5::new();
|
||||
hasher.update(&bytes);
|
||||
let result = hasher.finalize();
|
||||
let b64_md5 = general_purpose::STANDARD.encode(result);
|
||||
Ok::<_, reqwest::Error>(bytes)
|
||||
})
|
||||
.await;
|
||||
|
||||
// 4chan file attachment hash is always 24 character packed base64 encoded MD5. Truly a fucking state of art CRC.
|
||||
match download_result {
|
||||
Ok(Ok(bytes)) => {
|
||||
let mut hasher = Md5::new();
|
||||
hasher.update(&bytes);
|
||||
let result = hasher.finalize();
|
||||
let _b64_md5 = general_purpose::STANDARD.encode(result);
|
||||
|
||||
// TODO: Figure out how the MD5 should be converted before uncommenting the following filtering condition
|
||||
// TODO: Figure out how the MD5 should be converted before uncommenting the following filtering condition
|
||||
// if b64_md5 != *expct_md5 {
|
||||
// error_bar.set_message(format!("File skipped due to mismatched MD5 (expected {expct_md5}, got {b64_md5})").red().bold());
|
||||
// let mut sk_count = sk_count.lock().await;
|
||||
// *sk_count += 1;
|
||||
|
||||
// if b64_md5 != *expct_md5 {
|
||||
// eprintln!("{}", format!("File skipped due to mismatched MD5 (expected {expct_md5}, got {b64_md5})").red().bold());
|
||||
// let mut sk_count = sk_count.lock().await;
|
||||
// *sk_count += 1;
|
||||
// return;
|
||||
// }
|
||||
|
||||
// return;
|
||||
// }
|
||||
|
||||
let mut file = File::create(path).await.unwrap();
|
||||
file.write_all(&bytes).await.unwrap();
|
||||
|
||||
let mut dl_count = dl_count.lock().await;
|
||||
*dl_count += 1;
|
||||
|
||||
println!(
|
||||
"{}",
|
||||
format!("{b64_md5}: {byte_count} bytes").truecolor(0, 209, 27)
|
||||
);
|
||||
}
|
||||
Err(_) => eprintln!(
|
||||
let _n = write_file(path, &bytes).await.unwrap();
|
||||
let mut dl_count = dl_count.lock().await;
|
||||
*dl_count += 1;
|
||||
}
|
||||
Err(_) => {
|
||||
error_bar.set_message(format!(
|
||||
"{}",
|
||||
format!("Failed to convert request from {} to bytes", url)
|
||||
.red()
|
||||
.bold()
|
||||
),
|
||||
},
|
||||
Err(_) => eprintln!("{}", format!("Failed to request {}", url).red().bold()),
|
||||
));
|
||||
}
|
||||
Ok(Err(_)) => {
|
||||
error_bar.set_message(format!(
|
||||
"{}",
|
||||
format!("Failed to request {}", url).red().bold()
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
bar.inc(1);
|
||||
}))
|
||||
.buffer_unordered(100)
|
||||
.collect::<Vec<()>>();
|
||||
|
||||
futures.await;
|
||||
|
||||
bar.finish();
|
||||
error_bar.finish();
|
||||
|
||||
let dl = *dl_count.lock().await;
|
||||
let sk = *sk_count.lock().await;
|
||||
|
||||
|
@ -192,7 +192,7 @@ async fn main() {
|
||||
|
||||
println!(
|
||||
"{}",
|
||||
format!("A total of {dl} files downloaded and {fl} filtered")
|
||||
format!("\nA total of {dl} files downloaded and {fl} filtered")
|
||||
.truecolor(252, 156, 12)
|
||||
.bold()
|
||||
);
|
||||
|
@ -37,10 +37,8 @@ pub async fn dl(
|
||||
None => parse_url(cfg.target.clone()),
|
||||
};
|
||||
|
||||
println!("{}", format!("Thread JSON URL: {json_url}").blue().bold());
|
||||
|
||||
let (images, fl) = parse_json(cfg.clone(), &client, json_url, board).await?;
|
||||
let (dl, sk) = http::concurrent_dl(images).await?;
|
||||
let (images, fl) = parse_json(cfg.clone(), &client, json_url.clone(), board).await?;
|
||||
let (dl, sk) = http::concurrent_dl(images, json_url).await?;
|
||||
|
||||
Ok((dl, fl + sk))
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user