mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2024-12-24 13:28:59 +01:00
Add support for input file/url in superbig
This commit is contained in:
parent
6afba065a1
commit
58f9a30be1
@ -1,11 +1,12 @@
|
||||
import datetime
|
||||
import re
|
||||
import textwrap
|
||||
from urllib.request import urlopen
|
||||
|
||||
import chromadb
|
||||
import gradio as gr
|
||||
import posthog
|
||||
import torch
|
||||
from bs4 import BeautifulSoup
|
||||
from chromadb.config import Settings
|
||||
from modules import shared
|
||||
from sentence_transformers import SentenceTransformer
|
||||
@ -68,10 +69,40 @@ collector = ChromaCollector(embedder)
|
||||
|
||||
def feed_data_into_collector(corpus):
|
||||
global collector
|
||||
|
||||
cumulative = ''
|
||||
chunk_len = 700
|
||||
cumulative += "Breaking the input dataset...\n\n"
|
||||
yield cumulative
|
||||
data_chunks = [corpus[i:i + chunk_len] for i in range(0, len(corpus), chunk_len)]
|
||||
cumulative += f"{len(data_chunks)} chunks have been found.\n\nAdding the chunks to the database...\n\n"
|
||||
yield cumulative
|
||||
collector.clear()
|
||||
collector.add(data_chunks)
|
||||
cumulative += "Done."
|
||||
yield cumulative
|
||||
|
||||
|
||||
def feed_file_into_collector(file):
|
||||
yield 'Reading the input dataset...\n\n'
|
||||
text = file.decode('utf-8')
|
||||
for i in feed_data_into_collector(text):
|
||||
yield i
|
||||
|
||||
|
||||
def feed_url_into_collector(url):
|
||||
yield 'Loading the URL...'
|
||||
html = urlopen(url).read()
|
||||
soup = BeautifulSoup(html, features="html.parser")
|
||||
for script in soup(["script", "style"]):
|
||||
script.extract()
|
||||
|
||||
text = soup.get_text()
|
||||
lines = (line.strip() for line in text.splitlines())
|
||||
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
|
||||
text = '\n\n'.join(chunk for chunk in chunks if chunk)
|
||||
for i in feed_data_into_collector(text):
|
||||
yield i
|
||||
|
||||
|
||||
def input_modifier(string):
|
||||
@ -133,11 +164,23 @@ def ui():
|
||||
# Chat mode has to be handled differently, probably using a custom_generate_chat_prompt
|
||||
pass
|
||||
else:
|
||||
data_input = gr.Textbox(lines=20, label='Input data', info='Paste your input data below and then click on Apply before generating.')
|
||||
with gr.Row():
|
||||
update = gr.Button('Apply')
|
||||
last_updated = gr.Markdown()
|
||||
with gr.Column():
|
||||
with gr.Tab("Text input"):
|
||||
data_input = gr.Textbox(lines=20, label='Input data')
|
||||
update_data = gr.Button('Apply')
|
||||
|
||||
update.click(
|
||||
feed_data_into_collector, data_input, None).then(
|
||||
lambda: "Last updated on " + str(datetime.datetime.now()), None, last_updated, show_progress=False)
|
||||
with gr.Tab("URL input"):
|
||||
url_input = gr.Textbox(lines=1, label='Input URL')
|
||||
update_url = gr.Button('Apply')
|
||||
|
||||
with gr.Tab("File input"):
|
||||
file_input = gr.File(label='Input file', type='binary')
|
||||
update_file = gr.Button('Apply')
|
||||
|
||||
with gr.Column():
|
||||
last_updated = gr.Markdown()
|
||||
|
||||
update_data.click(feed_data_into_collector, data_input, last_updated, show_progress=False)
|
||||
update_url.click(feed_url_into_collector, url_input, last_updated, show_progress=False)
|
||||
update_file.click(feed_file_into_collector, file_input, last_updated, show_progress=False)
|
||||
|
Loading…
Reference in New Issue
Block a user