Torah_Codes / lib /files.py
cryptocalypse's picture
Me class, memory bugs, localfiles search and indexing, internet archive think questions for future auto dataset preparing
7347eec
raw
history blame
1.31 kB
import os
class TextFinder:
def __init__(self, folder):
self.folder = folder
def find_matches(self, text):
matches = []
for root, _, files in os.walk(self.folder):
for file in files:
print(file)
file_path = os.path.join(root, file)
if os.path.isfile(file_path):
print(file_path)
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
index = content.find(text)
while index != -1:
start = max(content.rfind('\n', 0, index), content.rfind('\n', 0, index))
#start = max(content.rfind('\n', 0, index))
end = min(content.find('\n', index), content.find('\n', index))
#end = min(content.find('\n', index))
if start != -1 and end != -1:
matches.append(content[start+1:end].strip())
index = content.find(text, index + 1)
return matches
# Example usage:
if __name__ == "__main__":
finder = TextFinder('example_folder')
matches = finder.find_matches('text_to_find')
print(matches)