ref: moved discovery service to api

This commit is contained in:
nearlynithin
2025-11-07 15:11:04 +05:30
parent 666461346b
commit c239d86bd8
3 changed files with 93 additions and 21 deletions

View File

@@ -8,7 +8,7 @@ from fastapi.middleware.cors import CORSMiddleware
from fastapi.staticfiles import StaticFiles
import os
from api.routers import analyze, clean
from api.routers import analyze, clean, discovery
# Create FastAPI app
app = FastAPI(
@@ -37,6 +37,7 @@ app.mount("/reports", StaticFiles(directory=reports_dir), name="reports")
# Include routers
app.include_router(analyze.router, prefix="/api", tags=["AI Governance"])
app.include_router(clean.router, prefix="/api", tags=["Data Cleaning"])
app.include_router(discovery.router, prefix="/api", tags=["Discover sources"])
@app.get("/")
async def root():

View File

@@ -1,19 +1,13 @@
from spacy.matcher import PhraseMatcher, Matcher
from fastapi import APIRouter, File, UploadFile
from fastapi.responses import FileResponse
from pathlib import Path
from collections import defaultdict
import csv, tempfile
import re
from pathlib import Path
from collections import Counter
from datetime import datetime
from fastapi import FastAPI, File, UploadFile
from fastapi.responses import FileResponse
import uvicorn
import pprint
app = FastAPI()
import spacy
from spacy.matcher import PhraseMatcher, Matcher
from collections import defaultdict
import re
router = APIRouter()
try:
nlp = spacy.load("en_core_web_trf")
@@ -179,7 +173,7 @@ fieldnames = [
# this route will accept every txt/log file that is not csv
@app.post("/api/files")
@router.post("/files")
async def postFile(file: UploadFile):
if file.filename.endswith("csv"):
return {"error" : "Cannot accept csv files"}
@@ -226,11 +220,7 @@ async def postFile(file: UploadFile):
writer.writerow(row)
print(pprint.pprint(row))
return FileResponse(
temp_path, media_type="text/csv", filename="dataset.csv"
)
if __name__ == '__main__':
uvicorn.run(app)
)

View File

@@ -20,4 +20,85 @@ python-multipart>=0.0.6
# torch>=2.0.0 --index-url https://download.pytorch.org/whl/cu121
# Chatbot (WIP - not exposed in API yet)
gpt4all>=2.0.0
gpt4all>=2.0.0annotated-doc==0.0.3
annotated-types==0.7.0
anyio==4.11.0
blis==1.3.0
catalogue==2.0.10
certifi==2025.10.5
cffi==2.0.0
charset-normalizer==3.4.4
click==8.3.0
cloudpathlib==0.23.0
confection==0.1.5
cryptography==44.0.3
cymem==2.0.11
download==0.3.5
en_core_web_sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl
exceptiongroup==1.3.0
fastapi==0.121.0
filelock==3.20.0
gpt4all==2.8.2
h11==0.16.0
httpcore==1.0.9
httptools==0.7.1
httpx==0.28.1
idna==3.11
Jinja2==3.1.6
joblib==1.5.2
langcodes==3.5.0
language_data==1.3.0
marisa-trie==1.3.1
markdown-it-py==4.0.0
MarkupSafe==3.0.3
mdurl==0.1.2
murmurhash==1.0.13
numpy==2.2.6
ollama==0.6.0
packaging==25.0
pandas==2.3.3
phonenumbers==9.0.18
preshed==3.0.10
presidio_analyzer==2.2.360
presidio_anonymizer==2.2.360
pycparser==2.23
pydantic==2.12.4
pydantic_core==2.41.5
Pygments==2.19.2
python-dateutil==2.9.0.post0
python-dotenv==1.2.1
python-multipart==0.0.20
pytz==2025.2
PyYAML==6.0.3
regex==2025.11.3
requests==2.32.5
requests-file==3.0.1
rich==14.2.0
scikit-learn==1.7.2
scipy==1.15.3
shellingham==1.5.4
six==1.17.0
smart_open==7.4.4
sniffio==1.3.1
spacy==3.8.7
spacy-legacy==3.0.12
spacy-loggers==1.0.5
srsly==2.5.1
starlette==0.49.3
thinc==8.3.8
threadpoolctl==3.6.0
tldextract==5.3.0
tqdm==4.67.1
typer==0.20.0
typer-slim==0.20.0
typing-inspection==0.4.2
typing_extensions==4.15.0
tzdata==2025.2
urllib3==2.5.0
uvicorn==0.38.0
uvloop==0.22.1
wasabi==1.1.3
watchfiles==1.1.1
weasel==0.4.2
websockets==15.0.1
wrapt==2.0.1