Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 17 additions & 9 deletions infra/scripts/index_scripts/03_cu_process_data_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,10 @@ def create_tables():

create_tables()

def get_field_value(fields, field_name, default=""):
field = fields.get(field_name, {})
return field.get('valueString', default)

# Process files and insert into DB and Search
conversationIds, docs, counter = [], [], 0
for path in paths:
Expand All @@ -325,17 +329,21 @@ def create_tables():
start_timestamp = datetime.strptime(start_time, timestamp_format)
conversation_id = file_name.split('convo_', 1)[1].split('_')[0]
conversationIds.append(conversation_id)
duration = int(result['result']['contents'][0]['fields']['Duration']['valueString'])
fields = result['result']['contents'][0]['fields']
duration_str = get_field_value(fields, 'Duration', '0')
try:
duration = int(duration_str)
except (ValueError, TypeError):
duration = 0
end_timestamp = str(start_timestamp + timedelta(seconds=duration)).split(".")[0]
start_timestamp = str(start_timestamp).split(".")[0]
fields = result['result']['contents'][0]['fields']
summary = fields['summary']['valueString']
satisfied = fields['satisfied']['valueString']
sentiment = fields['sentiment']['valueString']
topic = fields['topic']['valueString']
key_phrases = fields['keyPhrases']['valueString']
complaint = fields['complaint']['valueString']
content = fields['content']['valueString']
summary = get_field_value(fields, 'summary')
satisfied = get_field_value(fields, 'satisfied')
sentiment = get_field_value(fields, 'sentiment')
topic = get_field_value(fields, 'topic')
key_phrases = get_field_value(fields, 'keyPhrases')
complaint = get_field_value(fields, 'complaint')
content = get_field_value(fields, 'content')
cursor.execute(
"INSERT INTO processed_data (ConversationId, EndTime, StartTime, Content, summary, satisfied, sentiment, topic, key_phrases, complaint) VALUES (?,?,?,?,?,?,?,?,?,?)",
(conversation_id, end_timestamp, start_timestamp, content, summary, satisfied, sentiment, topic, key_phrases, complaint)
Expand Down
53 changes: 34 additions & 19 deletions infra/scripts/index_scripts/04_cu_process_custom_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,10 @@ def create_tables():

create_tables()

def get_field_value(fields, field_name, default=""):
field = fields.get(field_name, {})
return field.get('valueString', default)

ANALYZER_ID = "ckm-json"
# Process files and insert into DB and Search - transcripts
conversationIds, docs, counter = [], [], 0
Expand All @@ -358,17 +362,23 @@ def create_tables():
start_timestamp = datetime.strptime(start_time, timestamp_format)
conversation_id = file_name.split('convo_', 1)[1].split('_')[0]
conversationIds.append(conversation_id)
duration = int(result['result']['contents'][0]['fields']['Duration']['valueString'])

fields = result['result']['contents'][0]['fields']
duration_str = get_field_value(fields, 'Duration', '0')
try:
duration = int(duration_str)
except (ValueError, TypeError):
duration = 0

end_timestamp = str(start_timestamp + timedelta(seconds=duration)).split(".")[0]
start_timestamp = str(start_timestamp).split(".")[0]
fields = result['result']['contents'][0]['fields']
summary = fields['summary']['valueString']
satisfied = fields['satisfied']['valueString']
sentiment = fields['sentiment']['valueString']
topic = fields['topic']['valueString']
key_phrases = fields['keyPhrases']['valueString']
complaint = fields['complaint']['valueString']
content = fields['content']['valueString']
summary = get_field_value(fields, 'summary')
satisfied = get_field_value(fields, 'satisfied')
sentiment = get_field_value(fields, 'sentiment')
topic = get_field_value(fields, 'topic')
key_phrases = get_field_value(fields, 'keyPhrases')
complaint = get_field_value(fields, 'complaint')
content = get_field_value(fields, 'content')
cursor.execute(
"INSERT INTO processed_data (ConversationId, EndTime, StartTime, Content, summary, satisfied, sentiment, topic, key_phrases, complaint) VALUES (?,?,?,?,?,?,?,?,?,?)",
(conversation_id, end_timestamp, start_timestamp, content, summary, satisfied, sentiment, topic, key_phrases, complaint)
Expand Down Expand Up @@ -409,22 +419,27 @@ def create_tables():
timestamp_format = "%Y-%m-%d %H_%M_%S" # Adjust format if necessary
start_timestamp = datetime.strptime(start_time, timestamp_format)

conversation_id = file_name.split('convo_', 1)[1].split('_')[0]
conversationIds.append(conversation_id)

duration = int(result['result']['contents'][0]['fields']['Duration']['valueString'])
fields = result['result']['contents'][0]['fields']
duration_str = get_field_value(fields, 'Duration', '0')
try:
duration = int(duration_str)
except (ValueError, TypeError):
duration = 0

end_timestamp = str(start_timestamp + timedelta(seconds=duration))
end_timestamp = end_timestamp.split(".")[0]
start_timestamp = str(start_timestamp).split(".")[0]

summary = result['result']['contents'][0]['fields']['summary']['valueString']
satisfied = result['result']['contents'][0]['fields']['satisfied']['valueString']
sentiment = result['result']['contents'][0]['fields']['sentiment']['valueString']
topic = result['result']['contents'][0]['fields']['topic']['valueString']
key_phrases = result['result']['contents'][0]['fields']['keyPhrases']['valueString']
complaint = result['result']['contents'][0]['fields']['complaint']['valueString']
content = result['result']['contents'][0]['fields']['content']['valueString']
# print(topic)
summary = get_field_value(fields, 'summary')
satisfied = get_field_value(fields, 'satisfied')
sentiment = get_field_value(fields, 'sentiment')
topic = get_field_value(fields, 'topic')
key_phrases = get_field_value(fields, 'keyPhrases')
complaint = get_field_value(fields, 'complaint')
content = get_field_value(fields, 'content')

cursor.execute(f"INSERT INTO processed_data (ConversationId, EndTime, StartTime, Content, summary, satisfied, sentiment, topic, key_phrases, complaint) VALUES (?,?,?,?,?,?,?,?,?,?)", (conversation_id, end_timestamp, start_timestamp, content, summary, satisfied, sentiment, topic, key_phrases, complaint))
conn.commit()

Expand Down
Loading