url_input=gr.Textbox(lines=10,label='Input URLs',info='Enter one or more URLs separated by newline characters.')
strong_cleanup=gr.Checkbox(value=parameters.get_is_strong_cleanup(),label='Strong cleanup',info='Only keeps html elements that look like long-form text.')
threads=gr.Number(value=parameters.get_num_threads(),label='Threads',info='The number of threads to use while downloading the URLs.',precision=0)
chunk_len=gr.Textbox(value=parameters.get_chunk_len(),label='Chunk length',info='In characters, not tokens. This value is used when you click on "Load data".')
chunk_regex=gr.Textbox(value=parameters.get_chunk_regex(),label='Chunk regex',info='Will specifically add the captured text to the embeddings.')
context_len=gr.Textbox(value=parameters.get_context_len(),label='Context length',info='In characters, not tokens. How much context to load around each chunk.')
chunk_sep=gr.Textbox(value=codecs.encode(parameters.get_chunk_separator(),'unicode_escape').decode(),label='Chunk separator',info='Used to manually split chunks. Manually split chunks longer than chunk length are split again. This value is used when you click on "Load data".')
chunk_count=gr.Number(value=parameters.get_chunk_count(),label='Chunk count',info='The number of closest-matching chunks to include in the prompt.')
max_token_count=gr.Number(value=parameters.get_max_token_count(),label='Max Context Tokens',info='The context length in tokens will not exceed this value.')
prefix=gr.Textbox(value=codecs.encode(parameters.get_prefix(),'unicode_escape').decode(),label='Prefix',info='What to put before the injection point.')
data_separator=gr.Textbox(value=codecs.encode(parameters.get_data_separator(),'unicode_escape').decode(),label='Data separator',info='When multiple pieces of distant data are added, they might be unrelated. It\'s important to separate them.')
postfix=gr.Textbox(value=codecs.encode(parameters.get_postfix(),'unicode_escape').decode(),label='Postfix',info='What to put after the injection point.')
withgr.Row():
manual=gr.Checkbox(value=parameters.get_is_manual(),label="Is Manual",info="Manually specify when to use ChromaDB. Insert `!c` at the start or end of the message to trigger a query.",visible=shared.is_chat())
add_chat_to_data=gr.Checkbox(value=parameters.get_add_chat_to_data(),label="Add Chat to Data",info="Automatically feed the chat history as you chat.",visible=shared.is_chat())
injection_strategy=gr.Radio(choices=[parameters.PREPEND_TO_LAST,parameters.APPEND_TO_LAST,parameters.HIJACK_LAST_IN_CONTEXT],value=parameters.get_injection_strategy(),label='Injection Strategy',info='Where to inject the messages in chat or instruct mode.',visible=shared.is_chat())
withgr.Row():
api_on=gr.Checkbox(value=parameters.get_api_on(),label="Turn on API",info="Check this to turn on the API service.")
api_port=gr.Number(value=parameters.get_api_port(),label="API Port",info="The port on which the API service will run.")
],value=preprocess_set_choices,interactive=True,info='How to preprocess the text before it is turned into an embedding.')
withgr.Row():
num_conversion=gr.Dropdown(choices=[parameters.NUM_TO_WORD_METHOD,parameters.NUM_TO_CHAR_METHOD,parameters.NUM_TO_CHAR_LONG_METHOD,'None'],value=parameters.get_num_conversion_strategy(),label="Number Conversion Method",info='How to preprocess numbers before creating the embeddings.',interactive=True)
min_number_length=gr.Number(value=parameters.get_min_num_length(),label='Number Length Threshold',info='In digits. Only numbers that have at least that many digits will be converted.',interactive=True)
delta_start=gr.Number(value=parameters.get_delta_start(),label='Delta Start Index',info='If the system encounters two identical embeddings, and they both start within the same delta, then only the first will be considered.',interactive=True)
new_dist_strat=gr.Dropdown(choices=[parameters.DIST_MIN_STRATEGY,parameters.DIST_HARMONIC_STRATEGY,parameters.DIST_GEOMETRIC_STRATEGY,parameters.DIST_ARITHMETIC_STRATEGY],value=parameters.get_new_dist_strategy(),label="Distance Strategy",info='When two embedding texts are merged, the distance of the new piece will be decided using one of these strategies.',interactive=True)
min_sentences=gr.Number(value=parameters.get_min_num_sentences(),label='Summary Threshold',info='In sentences. The minumum number of sentences to trigger text-rank summarization.',interactive=True)
significant_level=gr.Slider(0.8,2,value=parameters.get_significant_level(),label='Significant Level',info='Defines the cut-off for what is considered a "significant" distance relative to the median distance among the returned samples.',interactive=True)
time_steepness=gr.Slider(0.01,1.0,value=parameters.get_time_steepness(),label='Time Weighing Steepness',info='How differently two close excerpts are going to be weighed.')
time_power=gr.Slider(0.0,1.0,value=parameters.get_time_power(),label='Time Weighing Power',info='How influencial is the weighing. At 1.0, old entries won\'t be considered')
withgr.Tab("Benchmark"):
benchmark_button=gr.Button('Benchmark')
optimize_button=gr.Button('Optimize')
optimization_steps=gr.Number(value=parameters.get_optimization_steps(),label='Optimization Steps',info='For how many steps to optimize.',interactive=True)