import os
import json
search_text = "What is the capital of France?"
model = "text-embedding-3-large"
schema = "your_schema" # Change to your source schema name
table = "your_table_name" # Change to your source table name (without 'rag_' prefix)
# Model dimensions
EMBED_MODELS = {
"text-embedding-3-large": 3072,
"text-embedding-3-small": 1536
}
dbconn = pq.dbconnect(pq.DW_NAME)
openai_api = pq.connect('OpenAI')
dimension = EMBED_MODELS[model]
def create_embedding(text, model, dimension):
"""Create embedding using OpenAI API"""
embedding_request = {
"input": text,
"model": model,
"dimensions": dimension
}
response = openai_api.get('embeddings', embedding_request)
embedding = response.get("data", [{}])[0].get("embedding")
return embedding
def search_rag(search_text, schema, table, model, dimension, top_k=5):
"""Search for similar embeddings"""
# Create embedding for search text
search_embedding = create_embedding(search_text, model, dimension)
if search_embedding:
embedding_str = "[" + ",".join(map(str, search_embedding)) + "]"
search_query = f"""
SELECT id, text, metadata,
1 - (embedding <#> '{embedding_str}'::vector) AS similarity
FROM "{schema}"."rag_{table}"
ORDER BY similarity DESC
LIMIT {top_k}
"""
search_result = dbconn.execute(pq.DW_NAME, query=search_query)
records = search_result["detail"]
# Convert from list format with headers to list of dicts
dict_records = []
if records and len(records) > 0:
headers = records[0] # First row contains column names
for row in records[1:]: # Skip header row
record_dict = {}
for i, header in enumerate(headers):
record_dict[header] = row[i]
dict_records.append(record_dict)
return dict_records
else:
return []
st.write(f"Searching for: '{search_text}' using model {model}, embeddings from source table {schema}.{table} (using rag_{table})")
results = search_rag(search_text, schema, table, model, dimension, top_k=5)
st.write(results)
# Example RAG Search via API calls.
#
# Add this script as an API handler in Peliqan.
# Add an API endpoint of type GET with path "/rag" and link it this script.
# Example usage: GET https://api.eu.peliqan.io/1234/rag?search=What is the capital of France (1234 = your Peliqan account id)
import json
from urllib.parse import parse_qs
model = "text-embedding-3-large"
schema = "schema_name" # Change to your source schema name
table = "table_name" # Change to your source table name (without 'rag_' prefix)
# Model dimensions
EMBED_MODELS = {
"text-embedding-3-large": 3072,
"text-embedding-3-small": 1536
}
dbconn = pq.dbconnect(pq.DW_NAME)
openai_api = pq.connect('OpenAI')
dimension = EMBED_MODELS[model]
def create_embedding(text, model, dimension):
"""Create embedding using OpenAI API"""
embedding_request = {
"input": text,
"model": model,
"dimensions": dimension
}
response = openai_api.get('embeddings', embedding_request)
embedding = response.get("data", [{}])[0].get("embedding")
return embedding
def search_rag(search_text, schema, table, model, dimension, top_k=5):
"""Search for similar embeddings"""
# Create embedding for search text
search_embedding = create_embedding(search_text, model, dimension)
if search_embedding:
embedding_str = "[" + ",".join(map(str, search_embedding)) + "]"
search_query = f"""
SELECT id, text, metadata,
1 - (embedding <#> '{embedding_str}'::vector) AS similarity
FROM "{schema}"."rag_{table}"
ORDER BY similarity DESC
LIMIT {top_k}
"""
search_result = dbconn.execute(pq.DW_NAME, query=search_query)
records = search_result["detail"]
# Convert from list format with headers to list of dicts
dict_records = []
if records and len(records) > 0:
headers = records[0] # First row contains column names
for row in records[1:]: # Skip header row
record_dict = {}
for i, header in enumerate(headers):
record_dict[header] = row[i]
dict_records.append(record_dict)
return dict_records
else:
return []
def handler(request):
# Read querystring
query_string = request['query_string']
query_string_parsed = parse_qs(query_string)
search_text = query_string_parsed["search"][0] if "search" in query_string_parsed else None
results = search_rag(search_text, schema, table, model, dimension, top_k=5)
# Logging
print(f"Search: '{search_text}' using model {model}, embeddings from source table {schema}.{table} (using rag_{table})")
print(results)
return results