Spaces:
Sleeping
Sleeping
| import logging | |
| import sys | |
| from pathlib import Path | |
| from obsei.sink.elasticsearch_sink import ElasticSearchSink, ElasticSearchSinkConfig | |
| from obsei.source.twitter_source import TwitterSource, TwitterSourceConfig | |
| from obsei.analyzer.classification_analyzer import ( | |
| ClassificationAnalyzerConfig, | |
| ZeroShotClassificationAnalyzer, | |
| ) | |
| logger = logging.getLogger(__name__) | |
| logging.basicConfig(stream=sys.stdout, level=logging.INFO) | |
| dir_path = Path(__file__).resolve().parent.parent | |
| source_config = TwitterSourceConfig( | |
| keywords="@Handle", | |
| lookup_period="1h", # 1 Hour | |
| tweet_fields=[ | |
| "author_id", | |
| "conversation_id", | |
| "created_at", | |
| "id", | |
| "public_metrics", | |
| "text", | |
| ], | |
| user_fields=["id", "name", "public_metrics", "username", "verified"], | |
| expansions=["author_id"], | |
| place_fields=None, | |
| max_tweets=10, | |
| ) | |
| source = TwitterSource() | |
| text_analyzer = ZeroShotClassificationAnalyzer( | |
| model_name_or_path="joeddav/bart-large-mnli-yahoo-answers", | |
| ) | |
| # Start Elasticsearch server locally | |
| # `docker run -d --name elasticsearch -p 9200:9200 -e "discovery.type=single-node" elasticsearch:7.9.2` | |
| sink_config = ElasticSearchSinkConfig( | |
| host="localhost", | |
| port=9200, | |
| index_name="test", | |
| ) | |
| source_response_list = source.lookup(source_config) | |
| for idx, source_response in enumerate(source_response_list): | |
| logger.info(f"source_response#'{idx}'='{source_response.__dict__}'") | |
| analyzer_response_list = text_analyzer.analyze_input( | |
| source_response_list=source_response_list, | |
| analyzer_config=ClassificationAnalyzerConfig( | |
| labels=[ | |
| "service", | |
| "delay", | |
| "tracking", | |
| "no response", | |
| "missing items", | |
| "delivery", | |
| "mask", | |
| ], | |
| ), | |
| ) | |
| for idx, an_response in enumerate(analyzer_response_list): | |
| logger.info(f"analyzer_response#'{idx}'='{an_response.__dict__}'") | |
| sink = ElasticSearchSink() | |
| sink_response = sink.send_data(analyzer_response_list, sink_config) | |
| logger.info(f"sink_response='{sink_response}'") | |