api/routes/modules/assignments/plagiarism/
get.rs

1use axum::{extract::{State, Path, Query}, http::StatusCode, Json, response::IntoResponse};
2use db::models::{
3    assignment_submission::{self, Entity as SubmissionEntity},
4    plagiarism_case::{self, Entity as PlagiarismEntity, Status},
5    user::{self, Entity as UserEntity},
6};
7use sea_orm::{ColumnTrait, EntityTrait, QueryFilter, Condition, QuerySelect, QueryTrait, QueryOrder, PaginatorTrait};
8use serde::{Deserialize, Serialize};
9use std::str::FromStr;
10use std::collections::HashMap;
11use util::state::AppState;
12use crate::response::ApiResponse;
13use std::fs;
14
15#[derive(Serialize)]
16pub struct MossReportResponse {
17    pub report_url: String,
18    pub generated_at: String,
19}
20
21
22/// GET /api/modules/{module_id}/assignments/{assignment_id}/plagiarism/moss
23///
24/// Retrieves metadata for the **most recent MOSS report** generated for the given assignment.
25/// Accessible only to lecturers and assistant lecturers assigned to the module.
26///
27/// This endpoint does **not** trigger a new MOSS run—it only returns the last stored report URL
28/// and its generation timestamp. To generate a new report, use the POST endpoint:
29/// `/api/modules/{module_id}/assignments/{assignment_id}/plagiarism/moss`.
30///
31/// # Path Parameters
32///
33/// - `module_id`: The ID of the parent module
34/// - `assignment_id`: The ID of the assignment whose latest MOSS report should be fetched
35///
36/// # Request Body
37///
38/// None.
39///
40/// # Returns
41///
42/// - `200 OK` on success with the latest report metadata:
43///   - `report_url` — The external URL to the MOSS results page
44///   - `generated_at` — RFC 3339 timestamp for when the report file was written
45/// - `404 NOT FOUND` if no report has been generated yet
46/// - `500 INTERNAL SERVER ERROR` if the report file cannot be read or parsed
47///
48/// # Example Response (200 OK)
49///
50/// ```json
51/// {
52///   "success": true,
53///   "message": "MOSS report retrieved successfully",
54///   "data": {
55///     "report_url": "http://moss.stanford.edu/results/123456789",
56///     "generated_at": "2025-05-30T12:34:56Z"
57///   }
58/// }
59/// ```
60///
61/// # Example Response (404 Not Found)
62///
63/// ```json
64/// {
65///   "success": false,
66///   "message": "MOSS report not found"
67/// }
68/// ```
69///
70/// # Example Response (500 Internal Server Error)
71///
72/// ```json
73/// {
74///   "success": false,
75///   "message": "Failed to read MOSS report: <reason>"
76/// }
77/// ```
78///
79/// # Notes
80/// - Internally, the metadata is read from `reports.txt` under the assignment’s storage directory:
81///   `.../module_{module_id}/assignment_{assignment_id}/reports.txt`.
82/// - The `report_url` is hosted by the MOSS service and may expire per MOSS retention policy.
83/// - To refresh the report, run the POST `/plagiarism/moss` endpoint and then call this GET again.
84pub async fn get_moss_report(
85    Path((module_id, assignment_id)): Path<(i64, i64)>,
86) -> impl IntoResponse {
87    let report_path = assignment_submission::Model::storage_root()
88        .join(format!("module_{}", module_id))
89        .join(format!("assignment_{}", assignment_id))
90        .join("reports.txt");
91
92    if !report_path.exists() {
93        return (
94            StatusCode::NOT_FOUND,
95            Json(ApiResponse::<()>::error("MOSS report not found".to_string())),
96        )
97            .into_response();
98    }
99
100    let content = match fs::read_to_string(&report_path) {
101        Ok(content) => content,
102        Err(e) => {
103            return (
104                StatusCode::INTERNAL_SERVER_ERROR,
105                Json(ApiResponse::<()>::error(format!("Failed to read MOSS report: {}", e))),
106            )
107                .into_response();
108        }
109    };
110
111    let mut report_url = "".to_string();
112    let mut generated_at = "".to_string();
113
114    for line in content.lines() {
115        if let Some(url) = line.strip_prefix("Report URL: ") {
116            report_url = url.to_string();
117        } else if let Some(date) = line.strip_prefix("Date: ") {
118            generated_at = date.to_string();
119        }
120    }
121
122    if report_url.is_empty() {
123        return (
124            StatusCode::INTERNAL_SERVER_ERROR,
125            Json(ApiResponse::<()>::error("Failed to parse MOSS report".to_string())),
126        )
127            .into_response();
128    }
129
130    (
131        StatusCode::OK,
132        Json(ApiResponse::success(
133            MossReportResponse {
134                report_url,
135                generated_at,
136            },
137            "MOSS report retrieved successfully",
138        )),
139    )
140        .into_response()
141}
142
143
144#[derive(Debug, Deserialize)]
145pub struct ListPlagiarismCaseQueryParams {
146    page: Option<u64>,
147    per_page: Option<u64>,
148    status: Option<String>,
149    query: Option<String>,
150    sort: Option<String>,
151}
152
153#[derive(Debug, Serialize)]
154pub struct UserResponse {
155    id: i64,
156    username: String,
157    email: String,
158    profile_picture_path: Option<String>,
159}
160
161#[derive(Debug, Serialize)]
162pub struct SubmissionResponse {
163    id: i64,
164    filename: String,
165    created_at: chrono::DateTime<chrono::Utc>,
166    user: UserResponse,
167}
168
169#[derive(Debug, Serialize)]
170pub struct PlagiarismCaseResponse {
171    id: i64,
172    status: String,
173    description: String,
174    similarity: f32, // <-- NEW
175    created_at: chrono::DateTime<chrono::Utc>,
176    updated_at: chrono::DateTime<chrono::Utc>,
177    submission_1: SubmissionResponse,
178    submission_2: SubmissionResponse,
179}
180
181#[derive(Debug, Serialize)]
182pub struct PlagiarismCaseListResponse {
183    cases: Vec<PlagiarismCaseResponse>,
184    page: u64,
185    per_page: u64,
186    total: u64,
187}
188
189/// GET /api/modules/{module_id}/assignments/{assignment_id}/plagiarism
190///
191/// Retrieves paginated plagiarism cases for a specific assignment with filtering and sorting.
192/// Only accessible to lecturers and assistant lecturers assigned to the module.
193///
194/// # Path Parameters
195/// - `module_id`: The ID of the parent module
196/// - `assignment_id`: The ID of the assignment to retrieve plagiarism cases for
197///
198/// # Query Parameters
199/// - `page`: (Optional) Page number (default: 1, min: 1)
200/// - `per_page`: (Optional) Items per page (default: 20, max: 100)
201/// - `status`: (Optional) Filter by status: `"review"`, `"flagged"`, or `"reviewed"`
202/// - `query`: (Optional) Case-insensitive fuzzy search on usernames of either submission’s user
203/// - `sort`: (Optional) Comma-separated sorting criteria; prefix with `-` for descending.
204///   **Valid fields:** `"created_at"`, `"status"`, `"similarity"`
205///
206/// # Returns
207/// - `200 OK` with paginated cases on success
208/// - `400 BAD REQUEST` for invalid params (status/sort/pagination)
209/// - `403 FORBIDDEN` if user lacks permissions
210/// - `500 INTERNAL SERVER ERROR` for database failures
211///
212/// # Example Response (200 OK)
213/// ```json
214/// {
215///   "success": true,
216///   "message": "Plagiarism cases retrieved successfully",
217///   "data": {
218///     "cases": [
219///       {
220///         "id": 12,
221///         "status": "flagged",
222///         "description": "Very similar submissions",
223///         "similarity": 84.3,
224///         "created_at": "2024-05-15T08:30:00Z",
225///         "updated_at": "2024-05-16T10:15:00Z",
226///         "submission_1": {
227///           "id": 42,
228///           "filename": "main.cpp",
229///           "created_at": "2024-05-14T09:00:00Z",
230///           "user": { "id": 5, "username": "u12345678", "email": "[email protected]", "profile_picture_path": null }
231///         },
232///         "submission_2": {
233///           "id": 43,
234///           "filename": "main.cpp",
235///           "created_at": "2024-05-14T10:30:00Z",
236///           "user": { "id": 6, "username": "u98765432", "email": "[email protected]", "profile_picture_path": null }
237///         }
238///       }
239///     ],
240///     "page": 1,
241///     "per_page": 20,
242///     "total": 1
243///   }
244/// }
245/// ```
246///
247/// # Example Errors
248/// - `400 Bad Request` — `{ "success": false, "message": "Invalid status parameter" }`
249/// - `403 Forbidden` — `{ "success": false, "message": "Forbidden: Insufficient permissions" }`
250/// - `500 Internal Server Error` — `{ "success": false, "message": "Failed to retrieve plagiarism cases" }`
251pub async fn list_plagiarism_cases(
252    State(app_state): State<AppState>,
253    Path((_, assignment_id)): Path<(i64, i64)>,
254    Query(params): Query<ListPlagiarismCaseQueryParams>,
255) -> impl IntoResponse {
256    let page = params.page.unwrap_or(1).max(1);
257    let per_page = params.per_page.unwrap_or(20).min(100);
258
259    // Limit cases to this assignment’s submissions
260    let submission_models = SubmissionEntity::find()
261        .filter(assignment_submission::Column::AssignmentId.eq(assignment_id))
262        .all(app_state.db())
263        .await
264        .unwrap_or_default();
265
266    let submission_ids: Vec<i64> = submission_models.iter().map(|s| s.id).collect();
267
268    let mut query = PlagiarismEntity::find().filter(
269        Condition::any()
270            .add(plagiarism_case::Column::SubmissionId1.is_in(submission_ids.clone()))
271            .add(plagiarism_case::Column::SubmissionId2.is_in(submission_ids)),
272    );
273
274    // Filter: status
275    if let Some(status_str) = params.status {
276        if let Ok(status) = Status::from_str(&status_str) {
277            query = query.filter(plagiarism_case::Column::Status.eq(status));
278        } else {
279            return (
280                StatusCode::BAD_REQUEST,
281                Json(ApiResponse::<PlagiarismCaseListResponse>::error("Invalid status parameter")),
282            );
283        }
284    }
285
286    // Search: username (fuzzy)
287    if let Some(search_query) = params.query {
288        let user_ids_subquery = UserEntity::find()
289            .select_only()
290            .column(user::Column::Id)
291            .filter(user::Column::Username.like(format!("%{}%", search_query.to_lowercase())))
292            .into_query();
293
294        let submission_ids_subquery = SubmissionEntity::find()
295            .select_only()
296            .column(assignment_submission::Column::Id)
297            .filter(assignment_submission::Column::UserId.in_subquery(user_ids_subquery))
298            .into_query();
299
300        query = query.filter(
301            Condition::any()
302                .add(plagiarism_case::Column::SubmissionId1.in_subquery(submission_ids_subquery.clone()))
303                .add(plagiarism_case::Column::SubmissionId2.in_subquery(submission_ids_subquery)),
304        );
305    }
306
307    // Sort: created_at, status, similarity
308    if let Some(sort) = params.sort {
309        for s in sort.split(',') {
310            let (order, column) = if s.starts_with('-') {
311                (sea_orm::Order::Desc, &s[1..])
312            } else {
313                (sea_orm::Order::Asc, s)
314            };
315            match column {
316                "created_at" => query = query.order_by(plagiarism_case::Column::CreatedAt, order),
317                "status" => query = query.order_by(plagiarism_case::Column::Status, order),
318                "similarity" => query = query.order_by(plagiarism_case::Column::Similarity, order),
319                _ => {} // silently ignore unknown sort fields
320            }
321        }
322    }
323
324    let paginator = query.paginate(app_state.db(), per_page);
325    let total_items = paginator.num_items().await.unwrap_or(0);
326    let cases = paginator.fetch_page(page - 1).await.unwrap_or_default();
327
328    // Pull submissions & users for the cases we fetched
329    let submission_ids: Vec<i64> = cases
330        .iter()
331        .flat_map(|c| [c.submission_id_1, c.submission_id_2])
332        .collect();
333
334    let submissions = SubmissionEntity::find()
335        .filter(assignment_submission::Column::Id.is_in(submission_ids))
336        .all(app_state.db())
337        .await
338        .unwrap_or_default();
339
340    let user_ids: Vec<i64> = submissions.iter().map(|s| s.user_id).collect();
341    let users = UserEntity::find()
342        .filter(user::Column::Id.is_in(user_ids))
343        .all(app_state.db())
344        .await
345        .unwrap_or_default();
346
347    let user_map: HashMap<i64, user::Model> = users.into_iter().map(|u| (u.id, u)).collect();
348    let submission_map: HashMap<i64, (assignment_submission::Model, user::Model)> = submissions
349        .into_iter()
350        .filter_map(|s| user_map.get(&s.user_id).cloned().map(|u| (s.id, (s, u))))
351        .collect();
352
353    let response_cases: Vec<PlagiarismCaseResponse> = cases
354        .into_iter()
355        .filter_map(|case| {
356            let (s1, u1) = submission_map.get(&case.submission_id_1)?.clone();
357            let (s2, u2) = submission_map.get(&case.submission_id_2)?.clone();
358
359            Some(PlagiarismCaseResponse {
360                id: case.id,
361                status: case.status.to_string(),
362                description: case.description,
363                similarity: case.similarity,
364                created_at: case.created_at,
365                updated_at: case.updated_at,
366                submission_1: SubmissionResponse {
367                    id: s1.id,
368                    filename: s1.filename,
369                    created_at: s1.created_at,
370                    user: UserResponse {
371                        id: u1.id,
372                        username: u1.username,
373                        email: u1.email,
374                        profile_picture_path: u1.profile_picture_path,
375                    },
376                },
377                submission_2: SubmissionResponse {
378                    id: s2.id,
379                    filename: s2.filename,
380                    created_at: s2.created_at,
381                    user: UserResponse {
382                        id: u2.id,
383                        username: u2.username,
384                        email: u2.email,
385                        profile_picture_path: u2.profile_picture_path,
386                    },
387                },
388            })
389        })
390        .collect();
391
392    let response = PlagiarismCaseListResponse {
393        cases: response_cases,
394        page,
395        per_page,
396        total: total_items,
397    };
398
399    (
400        StatusCode::OK,
401        Json(ApiResponse::success(
402            response,
403            "Plagiarism cases retrieved successfully",
404        )),
405    )
406}
407
408#[derive(Debug, Deserialize)]
409pub struct PlagiarismQuery {
410    pub status: Option<String>,
411}
412
413#[derive(Debug, Serialize)]
414pub struct Link {
415    pub source: String,
416    pub target: String,
417}
418
419#[derive(Debug, Serialize)]
420pub struct LinksResponse {
421    pub links: Vec<Link>,
422}
423
424/// GET /api/modules/{module_id}/assignments/{assignment_id}/plagiarism/graph
425///
426/// Builds a **user-to-user plagiarism graph** for the given assignment. Each edge indicates
427/// that there is at least one plagiarism case linking submissions from the two users.
428///
429/// Accessible only to lecturers and assistant lecturers assigned to the module.
430///
431/// # Path Parameters
432///
433/// - `module_id`: The ID of the parent module
434/// - `assignment_id`: The ID of the assignment whose plagiarism graph should be built
435///
436/// # Query Parameters
437///
438/// - `status` (optional): Filter edges by case status. One of:
439///   - `"review"`
440///   - `"flagged"`
441///   - `"reviewed"`
442///
443/// # Semantics
444///
445/// - Nodes are **usernames** derived from the submissions involved in cases.
446/// - Each returned `Link { source, target }` represents a directed edge from `source` user
447///   to `target` user for at least one case. (If multiple cases exist between the same pair,
448///   multiple identical edges **may** appear; if you prefer deduplication, apply it in your client
449///   or adjust the endpoint to de-duplicate.)
450/// - Only cases where **both** submissions belong to the specified assignment are considered.
451///
452/// # Returns
453///
454/// - `200 OK` with a `links` array (possibly empty) on success
455/// - `400 BAD REQUEST` if `status` is provided but invalid
456/// - `500 INTERNAL SERVER ERROR` if submissions, users, or cases could not be fetched
457///
458/// # Example Request
459///
460/// ```http
461/// GET /api/modules/12/assignments/34/plagiarism/graph?status=flagged
462/// ```
463///
464/// # Example Response (200 OK)
465///
466/// ```json
467/// {
468///   "success": true,
469///   "message": "Plagiarism graph retrieved successfully",
470///   "data": {
471///     "links": [
472///       { "source": "u12345678", "target": "u87654321" },
473///       { "source": "u13579246", "target": "u24681357" }
474///     ]
475///   }
476/// }
477/// ```
478///
479/// # Example Response (Empty Graph)
480///
481/// ```json
482/// {
483///   "success": true,
484///   "message": "Plagiarism graph retrieved successfully",
485///   "data": { "links": [] }
486/// }
487/// ```
488///
489/// # Example Response (400 Bad Request)
490///
491/// ```json
492/// {
493///   "success": false,
494///   "message": "Invalid status parameter"
495/// }
496/// ```
497///
498/// # Notes
499/// - This endpoint is optimized for **visualization**. If you need case details, use the list
500///   endpoint (`GET /plagiarism`) instead.
501/// - Edges are derived from the **current** cases in the database after any filtering.
502/// - Usernames are taken from the submissions’ authors at query time.
503// TODO: Testing @Aidan
504pub async fn get_graph(
505    State(app_state): State<AppState>,
506    Path((_module_id, assignment_id)): Path<(i64, i64)>,
507    Query(query): Query<PlagiarismQuery>,
508) -> impl IntoResponse {
509    // 1) Gather all submission IDs for this assignment
510    let submission_models = match SubmissionEntity::find()
511        .filter(assignment_submission::Column::AssignmentId.eq(assignment_id))
512        .all(app_state.db())
513        .await
514    {
515        Ok(list) => list,
516        Err(_) => {
517            return (
518                StatusCode::INTERNAL_SERVER_ERROR,
519                Json(ApiResponse::<LinksResponse>::error("Failed to fetch submissions")),
520            );
521        }
522    };
523
524    let assignment_submission_ids: Vec<i64> = submission_models.iter().map(|s| s.id).collect();
525
526    // 2) Base query: plagiarism cases where either side belongs to this assignment
527    let mut q = PlagiarismEntity::find().filter(
528        Condition::any()
529            .add(plagiarism_case::Column::SubmissionId1.is_in(assignment_submission_ids.clone()))
530            .add(plagiarism_case::Column::SubmissionId2.is_in(assignment_submission_ids.clone())),
531    );
532
533    // 3) Optional status filter
534    if let Some(status_str) = query.status {
535        match Status::try_from(status_str.as_str()) {
536            Ok(status) => {
537                q = q.filter(plagiarism_case::Column::Status.eq(status));
538            }
539            Err(_) => {
540                return (
541                    StatusCode::BAD_REQUEST,
542                    Json(ApiResponse::<LinksResponse>::error("Invalid status parameter")),
543                );
544            }
545        }
546    }
547
548    // 4) Fetch cases
549    let cases = match q.all(app_state.db()).await {
550        Ok(cs) => cs,
551        Err(_) => {
552            return (
553                StatusCode::INTERNAL_SERVER_ERROR,
554                Json(ApiResponse::<LinksResponse>::error("Failed to fetch plagiarism cases")),
555            );
556        }
557    };
558
559    if cases.is_empty() {
560        return (
561            StatusCode::OK,
562            Json(ApiResponse::success(
563                LinksResponse { links: vec![] },
564                "Plagiarism graph retrieved successfully",
565            )),
566        );
567    }
568
569    // 5) Fetch the submissions & users referenced by these cases
570    let all_sub_ids: Vec<i64> = cases
571        .iter()
572        .flat_map(|c| [c.submission_id_1, c.submission_id_2])
573        .collect();
574
575    let submissions = match SubmissionEntity::find()
576        .filter(assignment_submission::Column::Id.is_in(all_sub_ids))
577        .all(app_state.db())
578        .await
579    {
580        Ok(ss) => ss,
581        Err(_) => {
582            return (
583                StatusCode::INTERNAL_SERVER_ERROR,
584                Json(ApiResponse::<LinksResponse>::error("Failed to fetch submissions for cases")),
585            );
586        }
587    };
588
589    let user_ids: Vec<i64> = submissions.iter().map(|s| s.user_id).collect();
590    let users = match UserEntity::find()
591        .filter(user::Column::Id.is_in(user_ids))
592        .all(app_state.db())
593        .await
594    {
595        Ok(us) => us,
596        Err(_) => {
597            return (
598                StatusCode::INTERNAL_SERVER_ERROR,
599                Json(ApiResponse::<LinksResponse>::error("Failed to fetch users")),
600            );
601        }
602    };
603
604    let sub_by_id: HashMap<i64, _> = submissions.into_iter().map(|s| (s.id, s)).collect();
605    let user_by_id: HashMap<i64, _> = users.into_iter().map(|u| (u.id, u)).collect();
606
607    // 6) Build username links
608    let mut links = Vec::with_capacity(cases.len());
609    for case in cases {
610        if let (Some(sub1), Some(sub2)) = (
611            sub_by_id.get(&case.submission_id_1),
612            sub_by_id.get(&case.submission_id_2),
613        ) {
614            if let (Some(u1), Some(u2)) = (user_by_id.get(&sub1.user_id), user_by_id.get(&sub2.user_id)) {
615                links.push(Link {
616                    source: u1.username.clone(),
617                    target: u2.username.clone(),
618                });
619            }
620        }
621    }
622
623    (
624        StatusCode::OK,
625        Json(ApiResponse::success(
626            LinksResponse { links },
627            "Plagiarism graph retrieved successfully",
628        )),
629    )
630}