mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2024-12-23 21:18:00 +01:00
substitu superboog Beatiful Soup Parser (#2996)
* add lxml to requirments add lxml to requirments * Change Beaitful Soup Parser "lxml" parser which might be more tolerant of certain kinds of parsing errors than "html.parser" and quicker at the same time.
This commit is contained in:
parent
ab044a5a44
commit
1fc0b5041e
@ -2,3 +2,4 @@ beautifulsoup4==4.12.2
|
||||
chromadb==0.3.18
|
||||
posthog==2.4.2
|
||||
sentence_transformers==2.2.2
|
||||
lxml
|
||||
|
@ -69,7 +69,7 @@ def feed_url_into_collector(urls, chunk_len, chunk_sep, strong_cleanup, threads)
|
||||
cumulative += 'Processing the HTML sources...'
|
||||
yield cumulative
|
||||
for content in contents:
|
||||
soup = BeautifulSoup(content, features="html.parser")
|
||||
soup = BeautifulSoup(content, features="lxml")
|
||||
for script in soup(["script", "style"]):
|
||||
script.extract()
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user