Coverage for src/ragindexer/index_database.py: 79%

52 statements  

« prev     ^ index     » next       coverage.py v7.8.2, created at 2025-06-20 15:57 +0000

1import os 

2import sqlite3 

3from pathlib import Path 

4from typing import Optional 

5 

6from . import logger 

7from .config import config 

8 

9 

10def initialize_state_db(): 

11 """ 

12 Initialize the sqlite database 

13 

14 """ 

15 os.makedirs(os.path.dirname(config.STATE_DB_PATH), exist_ok=True) 

16 

17 logger.info(f"Using sqlite database '{config.STATE_DB_PATH}'") 

18 

19 conn = sqlite3.connect(config.STATE_DB_PATH) 

20 c = conn.cursor() 

21 c.execute( 

22 """ 

23 CREATE TABLE IF NOT EXISTS files ( 

24 path TEXT PRIMARY KEY, 

25 last_modified REAL 

26 ) 

27 """ 

28 ) 

29 conn.commit() 

30 conn.close() 

31 

32 

33def get_stored_timestamp(relpath: Path) -> Optional[float]: 

34 """ 

35 Get the stored timestamp for the given path 

36 

37 Args: 

38 relpath: Path to a file that has already been processed 

39 

40 Returns: 

41 The timestamp of last processing if found. None otherwise 

42 

43 """ 

44 conn = sqlite3.connect(config.STATE_DB_PATH) 

45 c = conn.cursor() 

46 c.execute("SELECT last_modified FROM files WHERE path = ?", (str(relpath),)) 

47 row = c.fetchone() 

48 conn.close() 

49 return row[0] if row else None 

50 

51 

52def set_stored_timestamp(relpath: Path, ts: float): 

53 """ 

54 Stores the processing timestamp for the given path 

55 

56 Args: 

57 relpath: Path to a file that has already been processed 

58 ts: The timestamp of last processing 

59 

60 """ 

61 conn = sqlite3.connect(config.STATE_DB_PATH) 

62 c = conn.cursor() 

63 c.execute("REPLACE INTO files (path, last_modified) VALUES (?, ?)", (str(relpath), ts)) 

64 conn.commit() 

65 conn.close() 

66 

67 

68def delete_stored_file(relpath: Path): 

69 """ 

70 Delete the given path 

71 

72 Args: 

73 relpath: Path to a file that has already been processed 

74 

75 """ 

76 conn = sqlite3.connect(config.STATE_DB_PATH) 

77 c = conn.cursor() 

78 c.execute("DELETE FROM files WHERE path = ?", (str(relpath),)) 

79 conn.commit() 

80 conn.close() 

81 

82 

83def delete_all_files(): 

84 """ 

85 Delete all files 

86 

87 """ 

88 conn = sqlite3.connect(config.STATE_DB_PATH) 

89 c = conn.cursor() 

90 c.execute("DELETE FROM files") 

91 conn.commit() 

92 conn.close() 

93 

94 

95def list_stored_files(absolute: bool = False) -> list[Path]: 

96 """ 

97 List all paths stored in the database 

98 

99 Args: 

100 absolute: True to return absolute paths 

101 

102 Returns: 

103 The list of all paths stored in the database 

104 

105 """ 

106 conn = sqlite3.connect(config.STATE_DB_PATH) 

107 c = conn.cursor() 

108 c.execute("SELECT path FROM files") 

109 rows = c.fetchall() 

110 conn.close() 

111 

112 files_list = [] 

113 for (stored_path,) in rows: 

114 relpath = Path(stored_path) 

115 if absolute: 115 ↛ 116line 115 didn't jump to line 116 because the condition on line 115 was never true

116 files_list.append(config.DOCS_PATH / relpath) 

117 else: 

118 files_list.append(relpath) 

119 

120 return files_list