dbconn = pq.dbconnect(pq.DW_NAME)
my_query = 'select first_name, email, age from contacts where age<30 limit 100
real_data = dbconn.fetch(pq.DW_NAME, query = my_query, df = True)
from sdv.metadata import SingleTableMetadata
metadata = SingleTableMetadata()
metadata.detect_from_dataframe(data=real_data)
metadata.update_column(column_name='first_name', sdtype='name', pii=True)
metadata.update_column(column_name='email', sdtype='email', pii=True)
from sdv.single_table import GaussianCopulaSynthesizer
synthesizer = GaussianCopulaSynthesizer(metadata)
synthesizer.fit(real_data)
synthetic_data = synthesizer.sample(num_rows=10)
st.title("Synthetic data from real data")
st.header("Metadata")
st.text(metadata)
st.header("Real data")
st.dataframe(real_data)
st.header("Synthetic data")
st.dataframe(synthetic_data)
pq.write_records(table_name = 'synthetic_contacts', synthetic_data)