mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2024-11-22 08:07:56 +01:00
Add support for input file/url in superbig
This commit is contained in:
parent
6afba065a1
commit
58f9a30be1
@ -1,11 +1,12 @@
|
|||||||
import datetime
|
|
||||||
import re
|
import re
|
||||||
import textwrap
|
import textwrap
|
||||||
|
from urllib.request import urlopen
|
||||||
|
|
||||||
import chromadb
|
import chromadb
|
||||||
import gradio as gr
|
import gradio as gr
|
||||||
import posthog
|
import posthog
|
||||||
import torch
|
import torch
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
from chromadb.config import Settings
|
from chromadb.config import Settings
|
||||||
from modules import shared
|
from modules import shared
|
||||||
from sentence_transformers import SentenceTransformer
|
from sentence_transformers import SentenceTransformer
|
||||||
@ -68,10 +69,40 @@ collector = ChromaCollector(embedder)
|
|||||||
|
|
||||||
def feed_data_into_collector(corpus):
|
def feed_data_into_collector(corpus):
|
||||||
global collector
|
global collector
|
||||||
|
|
||||||
|
cumulative = ''
|
||||||
chunk_len = 700
|
chunk_len = 700
|
||||||
|
cumulative += "Breaking the input dataset...\n\n"
|
||||||
|
yield cumulative
|
||||||
data_chunks = [corpus[i:i + chunk_len] for i in range(0, len(corpus), chunk_len)]
|
data_chunks = [corpus[i:i + chunk_len] for i in range(0, len(corpus), chunk_len)]
|
||||||
|
cumulative += f"{len(data_chunks)} chunks have been found.\n\nAdding the chunks to the database...\n\n"
|
||||||
|
yield cumulative
|
||||||
collector.clear()
|
collector.clear()
|
||||||
collector.add(data_chunks)
|
collector.add(data_chunks)
|
||||||
|
cumulative += "Done."
|
||||||
|
yield cumulative
|
||||||
|
|
||||||
|
|
||||||
|
def feed_file_into_collector(file):
|
||||||
|
yield 'Reading the input dataset...\n\n'
|
||||||
|
text = file.decode('utf-8')
|
||||||
|
for i in feed_data_into_collector(text):
|
||||||
|
yield i
|
||||||
|
|
||||||
|
|
||||||
|
def feed_url_into_collector(url):
|
||||||
|
yield 'Loading the URL...'
|
||||||
|
html = urlopen(url).read()
|
||||||
|
soup = BeautifulSoup(html, features="html.parser")
|
||||||
|
for script in soup(["script", "style"]):
|
||||||
|
script.extract()
|
||||||
|
|
||||||
|
text = soup.get_text()
|
||||||
|
lines = (line.strip() for line in text.splitlines())
|
||||||
|
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
|
||||||
|
text = '\n\n'.join(chunk for chunk in chunks if chunk)
|
||||||
|
for i in feed_data_into_collector(text):
|
||||||
|
yield i
|
||||||
|
|
||||||
|
|
||||||
def input_modifier(string):
|
def input_modifier(string):
|
||||||
@ -133,11 +164,23 @@ def ui():
|
|||||||
# Chat mode has to be handled differently, probably using a custom_generate_chat_prompt
|
# Chat mode has to be handled differently, probably using a custom_generate_chat_prompt
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
data_input = gr.Textbox(lines=20, label='Input data', info='Paste your input data below and then click on Apply before generating.')
|
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
update = gr.Button('Apply')
|
with gr.Column():
|
||||||
last_updated = gr.Markdown()
|
with gr.Tab("Text input"):
|
||||||
|
data_input = gr.Textbox(lines=20, label='Input data')
|
||||||
|
update_data = gr.Button('Apply')
|
||||||
|
|
||||||
update.click(
|
with gr.Tab("URL input"):
|
||||||
feed_data_into_collector, data_input, None).then(
|
url_input = gr.Textbox(lines=1, label='Input URL')
|
||||||
lambda: "Last updated on " + str(datetime.datetime.now()), None, last_updated, show_progress=False)
|
update_url = gr.Button('Apply')
|
||||||
|
|
||||||
|
with gr.Tab("File input"):
|
||||||
|
file_input = gr.File(label='Input file', type='binary')
|
||||||
|
update_file = gr.Button('Apply')
|
||||||
|
|
||||||
|
with gr.Column():
|
||||||
|
last_updated = gr.Markdown()
|
||||||
|
|
||||||
|
update_data.click(feed_data_into_collector, data_input, last_updated, show_progress=False)
|
||||||
|
update_url.click(feed_url_into_collector, url_input, last_updated, show_progress=False)
|
||||||
|
update_file.click(feed_file_into_collector, file_input, last_updated, show_progress=False)
|
||||||
|
Loading…
Reference in New Issue
Block a user