Spaces:
Running
Running
Update ingest.py
Browse files
ingest.py
CHANGED
|
@@ -67,9 +67,9 @@ def ingest_data():
|
|
| 67 |
# 3. BM25 (Lexical) - Tiktoken μ¬μ©
|
| 68 |
print("\n>>> [Step 3] Creating BM25 Index (with Tiktoken)...")
|
| 69 |
|
| 70 |
-
# GPT-
|
| 71 |
try:
|
| 72 |
-
tokenizer = tiktoken.encoding_for_model("gpt-
|
| 73 |
except KeyError:
|
| 74 |
tokenizer = tiktoken.get_encoding("cl100k_base")
|
| 75 |
|
|
@@ -90,7 +90,7 @@ def ingest_data():
|
|
| 90 |
pickle.dump(bm25_data, f)
|
| 91 |
|
| 92 |
print("β
BM25 Index saved.")
|
| 93 |
-
print("\n
|
| 94 |
|
| 95 |
if __name__ == "__main__":
|
| 96 |
ingest_data()
|
|
|
|
| 67 |
# 3. BM25 (Lexical) - Tiktoken μ¬μ©
|
| 68 |
print("\n>>> [Step 3] Creating BM25 Index (with Tiktoken)...")
|
| 69 |
|
| 70 |
+
# GPT-5-mini λͺ¨λΈμ΄ μ¬μ©νλ ν ν¬λμ΄μ λ‘λ
|
| 71 |
try:
|
| 72 |
+
tokenizer = tiktoken.encoding_for_model("gpt-5-mini")
|
| 73 |
except KeyError:
|
| 74 |
tokenizer = tiktoken.get_encoding("cl100k_base")
|
| 75 |
|
|
|
|
| 90 |
pickle.dump(bm25_data, f)
|
| 91 |
|
| 92 |
print("β
BM25 Index saved.")
|
| 93 |
+
print("\n Ingestion Complete!")
|
| 94 |
|
| 95 |
if __name__ == "__main__":
|
| 96 |
ingest_data()
|