mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2024-11-22 08:07:56 +01:00
Add support for loading multiple URLs into superbig
This commit is contained in:
parent
04eca9b65b
commit
befa307c42
@ -92,18 +92,25 @@ def feed_file_into_collector(file, chunk_len, chunk_count):
|
|||||||
yield i
|
yield i
|
||||||
|
|
||||||
|
|
||||||
def feed_url_into_collector(url, chunk_len, chunk_count):
|
def feed_url_into_collector(urls, chunk_len, chunk_count):
|
||||||
yield 'Loading the URL...'
|
urls = urls.strip().split('\n')
|
||||||
html = urlopen(url).read()
|
all_text = ''
|
||||||
soup = BeautifulSoup(html, features="html.parser")
|
cumulative = ''
|
||||||
for script in soup(["script", "style"]):
|
for url in urls:
|
||||||
script.extract()
|
cumulative += f'Loading {url}...\n\n'
|
||||||
|
yield cumulative
|
||||||
|
html = urlopen(url).read()
|
||||||
|
soup = BeautifulSoup(html, features="html.parser")
|
||||||
|
for script in soup(["script", "style"]):
|
||||||
|
script.extract()
|
||||||
|
|
||||||
text = soup.get_text()
|
text = soup.get_text()
|
||||||
lines = (line.strip() for line in text.splitlines())
|
lines = (line.strip() for line in text.splitlines())
|
||||||
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
|
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
|
||||||
text = '\n\n'.join(chunk for chunk in chunks if chunk)
|
text = '\n\n'.join(chunk for chunk in chunks if chunk)
|
||||||
for i in feed_data_into_collector(text, chunk_len, chunk_count):
|
all_text += text
|
||||||
|
|
||||||
|
for i in feed_data_into_collector(all_text, chunk_len, chunk_count):
|
||||||
yield i
|
yield i
|
||||||
|
|
||||||
|
|
||||||
@ -173,7 +180,7 @@ def ui():
|
|||||||
update_data = gr.Button('Apply')
|
update_data = gr.Button('Apply')
|
||||||
|
|
||||||
with gr.Tab("URL input"):
|
with gr.Tab("URL input"):
|
||||||
url_input = gr.Textbox(lines=1, label='Input URL')
|
url_input = gr.Textbox(lines=10, label='Input URL', info='Enter one or more URLs separated by newline characters')
|
||||||
update_url = gr.Button('Apply')
|
update_url = gr.Button('Apply')
|
||||||
|
|
||||||
with gr.Tab("File input"):
|
with gr.Tab("File input"):
|
||||||
@ -182,7 +189,7 @@ def ui():
|
|||||||
|
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
chunk_len = gr.Number(value=700, label='Chunk length', info='In characters, not tokens')
|
chunk_len = gr.Number(value=700, label='Chunk length', info='In characters, not tokens')
|
||||||
chunk_count = gr.Number(value=5, label='Chunk count', info='The number of closest-matching chunks to include in the prompt')
|
chunk_count = gr.Number(value=5, label='Chunk count', info='The number of closest-matching chunks to include in the prompt')
|
||||||
|
|
||||||
with gr.Column():
|
with gr.Column():
|
||||||
last_updated = gr.Markdown()
|
last_updated = gr.Markdown()
|
||||||
|
Loading…
Reference in New Issue
Block a user