Skip to content

Commit 5243efe

Browse files
cdxkerskeptrunedev
authored andcommitted
bugfix: fix pdf2md OOM errors by only loading on demand
1 parent 5a1f29e commit 5243efe

File tree

1 file changed

+4
-5
lines changed

1 file changed

+4
-5
lines changed

pdf2md/server/src/workers/supervisor-worker.rs

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -129,20 +129,19 @@ pub async fn chunk_pdf(
129129
.render(pdf2image::Pages::All, None)
130130
.map_err(|err| ServiceError::BadRequest(format!("Failed to render PDF file {:?}", err)))?
131131
.into_iter()
132-
.skip(1)
133-
.collect::<Vec<_>>();
132+
.skip(1);
134133

135-
let num_pages = pages.len();
134+
let num_pages = pdf.page_count();
136135

137136
update_task_status(
138137
task.id,
139-
FileTaskStatus::ProcessingFile(num_pages as u32),
138+
FileTaskStatus::ProcessingFile(num_pages),
140139
&clickhouse_client,
141140
)
142141
.await?;
143142

144143
// Process each chunk
145-
for (i, page) in pages.into_iter().enumerate() {
144+
for (i, page) in pages.enumerate() {
146145
let file_name = format!("{}page{}.jpeg", task.id, i + 1);
147146
let mut buffer = Vec::new();
148147
page.write_to(&mut Cursor::new(&mut buffer), image::ImageFormat::Jpeg)

0 commit comments

Comments
 (0)