From c239d86bd8babf69abe94fc5f5647656cbb7263f Mon Sep 17 00:00:00 2001 From: nearlynithin Date: Fri, 7 Nov 2025 15:11:04 +0530 Subject: [PATCH] ref: moved discovery service to api --- api/main.py | 3 +- discovery/main.py => api/routers/discovery.py | 28 ++----- requirements.txt | 83 ++++++++++++++++++- 3 files changed, 93 insertions(+), 21 deletions(-) rename discovery/main.py => api/routers/discovery.py (96%) diff --git a/api/main.py b/api/main.py index 7904c89..5c1311b 100644 --- a/api/main.py +++ b/api/main.py @@ -8,7 +8,7 @@ from fastapi.middleware.cors import CORSMiddleware from fastapi.staticfiles import StaticFiles import os -from api.routers import analyze, clean +from api.routers import analyze, clean, discovery # Create FastAPI app app = FastAPI( @@ -37,6 +37,7 @@ app.mount("/reports", StaticFiles(directory=reports_dir), name="reports") # Include routers app.include_router(analyze.router, prefix="/api", tags=["AI Governance"]) app.include_router(clean.router, prefix="/api", tags=["Data Cleaning"]) +app.include_router(discovery.router, prefix="/api", tags=["Discover sources"]) @app.get("/") async def root(): diff --git a/discovery/main.py b/api/routers/discovery.py similarity index 96% rename from discovery/main.py rename to api/routers/discovery.py index 789c997..6f0a415 100644 --- a/discovery/main.py +++ b/api/routers/discovery.py @@ -1,19 +1,13 @@ +from spacy.matcher import PhraseMatcher, Matcher +from fastapi import APIRouter, File, UploadFile +from fastapi.responses import FileResponse +from pathlib import Path +from collections import defaultdict import csv, tempfile import re -from pathlib import Path -from collections import Counter -from datetime import datetime -from fastapi import FastAPI, File, UploadFile -from fastapi.responses import FileResponse -import uvicorn -import pprint - -app = FastAPI() - import spacy -from spacy.matcher import PhraseMatcher, Matcher -from collections import defaultdict -import re + +router = APIRouter() try: nlp = spacy.load("en_core_web_trf") @@ -179,7 +173,7 @@ fieldnames = [ # this route will accept every txt/log file that is not csv -@app.post("/api/files") +@router.post("/files") async def postFile(file: UploadFile): if file.filename.endswith("csv"): return {"error" : "Cannot accept csv files"} @@ -226,11 +220,7 @@ async def postFile(file: UploadFile): writer.writerow(row) - print(pprint.pprint(row)) return FileResponse( temp_path, media_type="text/csv", filename="dataset.csv" - ) - -if __name__ == '__main__': - uvicorn.run(app) \ No newline at end of file + ) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index be42f1d..b4e9a2c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -20,4 +20,85 @@ python-multipart>=0.0.6 # torch>=2.0.0 --index-url https://download.pytorch.org/whl/cu121 # Chatbot (WIP - not exposed in API yet) -gpt4all>=2.0.0 \ No newline at end of file +gpt4all>=2.0.0annotated-doc==0.0.3 +annotated-types==0.7.0 +anyio==4.11.0 +blis==1.3.0 +catalogue==2.0.10 +certifi==2025.10.5 +cffi==2.0.0 +charset-normalizer==3.4.4 +click==8.3.0 +cloudpathlib==0.23.0 +confection==0.1.5 +cryptography==44.0.3 +cymem==2.0.11 +download==0.3.5 +en_core_web_sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl +exceptiongroup==1.3.0 +fastapi==0.121.0 +filelock==3.20.0 +gpt4all==2.8.2 +h11==0.16.0 +httpcore==1.0.9 +httptools==0.7.1 +httpx==0.28.1 +idna==3.11 +Jinja2==3.1.6 +joblib==1.5.2 +langcodes==3.5.0 +language_data==1.3.0 +marisa-trie==1.3.1 +markdown-it-py==4.0.0 +MarkupSafe==3.0.3 +mdurl==0.1.2 +murmurhash==1.0.13 +numpy==2.2.6 +ollama==0.6.0 +packaging==25.0 +pandas==2.3.3 +phonenumbers==9.0.18 +preshed==3.0.10 +presidio_analyzer==2.2.360 +presidio_anonymizer==2.2.360 +pycparser==2.23 +pydantic==2.12.4 +pydantic_core==2.41.5 +Pygments==2.19.2 +python-dateutil==2.9.0.post0 +python-dotenv==1.2.1 +python-multipart==0.0.20 +pytz==2025.2 +PyYAML==6.0.3 +regex==2025.11.3 +requests==2.32.5 +requests-file==3.0.1 +rich==14.2.0 +scikit-learn==1.7.2 +scipy==1.15.3 +shellingham==1.5.4 +six==1.17.0 +smart_open==7.4.4 +sniffio==1.3.1 +spacy==3.8.7 +spacy-legacy==3.0.12 +spacy-loggers==1.0.5 +srsly==2.5.1 +starlette==0.49.3 +thinc==8.3.8 +threadpoolctl==3.6.0 +tldextract==5.3.0 +tqdm==4.67.1 +typer==0.20.0 +typer-slim==0.20.0 +typing-inspection==0.4.2 +typing_extensions==4.15.0 +tzdata==2025.2 +urllib3==2.5.0 +uvicorn==0.38.0 +uvloop==0.22.1 +wasabi==1.1.3 +watchfiles==1.1.1 +weasel==0.4.2 +websockets==15.0.1 +wrapt==2.0.1