Add support for loading multiple URLs into superbig

This commit is contained in:
oobabooga 2023-05-07 11:07:16 -03:00
parent 04eca9b65b
commit befa307c42

View File

@ -92,8 +92,13 @@ def feed_file_into_collector(file, chunk_len, chunk_count):
yield i yield i
def feed_url_into_collector(url, chunk_len, chunk_count): def feed_url_into_collector(urls, chunk_len, chunk_count):
yield 'Loading the URL...' urls = urls.strip().split('\n')
all_text = ''
cumulative = ''
for url in urls:
cumulative += f'Loading {url}...\n\n'
yield cumulative
html = urlopen(url).read() html = urlopen(url).read()
soup = BeautifulSoup(html, features="html.parser") soup = BeautifulSoup(html, features="html.parser")
for script in soup(["script", "style"]): for script in soup(["script", "style"]):
@ -103,7 +108,9 @@ def feed_url_into_collector(url, chunk_len, chunk_count):
lines = (line.strip() for line in text.splitlines()) lines = (line.strip() for line in text.splitlines())
chunks = (phrase.strip() for line in lines for phrase in line.split(" ")) chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
text = '\n\n'.join(chunk for chunk in chunks if chunk) text = '\n\n'.join(chunk for chunk in chunks if chunk)
for i in feed_data_into_collector(text, chunk_len, chunk_count): all_text += text
for i in feed_data_into_collector(all_text, chunk_len, chunk_count):
yield i yield i
@ -173,7 +180,7 @@ def ui():
update_data = gr.Button('Apply') update_data = gr.Button('Apply')
with gr.Tab("URL input"): with gr.Tab("URL input"):
url_input = gr.Textbox(lines=1, label='Input URL') url_input = gr.Textbox(lines=10, label='Input URL', info='Enter one or more URLs separated by newline characters')
update_url = gr.Button('Apply') update_url = gr.Button('Apply')
with gr.Tab("File input"): with gr.Tab("File input"):