Spaces:
No application file
No application file
import json | |
import os | |
import sys | |
import re | |
from datetime import datetime, timezone, timedelta | |
from collections import defaultdict | |
def convert_timestamp_to_jst(timestamp): | |
jst = timezone(timedelta(hours=9)) | |
dt = datetime.fromtimestamp(timestamp / 1000, jst) | |
return dt.strftime('%Y-%m-%d %H:%M:%S JST') | |
def get_year_month_day(timestamp): | |
jst = timezone(timedelta(hours=9)) | |
dt = datetime.fromtimestamp(timestamp / 1000, jst) | |
return dt.strftime('%Y-%m'), dt.day | |
def process_directory(directory): | |
try: | |
# 年月と前半/後半で投稿を分類する辞書を初期化 | |
monthly_posts = defaultdict(lambda: {"first_half": [], "second_half": []}) | |
# ディレクトリ内の全JSONファイルを処理 | |
for filename in os.listdir(directory): | |
if not filename.endswith('.json') or filename.endswith('_s.json') or filename.startswith('log_short'): | |
continue | |
# ファイル名が数字のみで構成されているか確認 | |
base_name = os.path.splitext(filename)[0] | |
if not re.match(r'^\d+$', base_name): | |
print(f"スキップ: {filename} はファイル名が数字のみではありません") | |
continue | |
input_file = os.path.join(directory, filename) | |
thread_no = int(base_name) | |
# JSONファイルを読み込む | |
with open(input_file, 'r', encoding='utf-8') as f: | |
data = json.load(f) | |
if "thread_array" in data: | |
for post in data["thread_array"]: | |
timestamp = post.get("timestamp") | |
if timestamp: | |
year_month, day = get_year_month_day(timestamp) | |
new_post = { | |
"thread_no": thread_no, | |
"num": post.get("num"), | |
"timestamp": timestamp, | |
"datetime": convert_timestamp_to_jst(timestamp), | |
"body": post.get("body") | |
} | |
# 日付で前半・後半に分類 | |
if day <= 15: | |
monthly_posts[year_month]["first_half"].append(new_post) | |
else: | |
monthly_posts[year_month]["second_half"].append(new_post) | |
# 年月ごとにJSONファイルを出力 | |
for year_month, half_posts in monthly_posts.items(): | |
# 前半(1-15日)の出力 | |
if half_posts["first_half"]: | |
output_file = os.path.join(directory, f'log_short_hm_{year_month}_1.json') | |
with open(output_file, 'w', encoding='utf-8') as f: | |
json.dump({"posts": half_posts["first_half"]}, f, ensure_ascii=False, indent=2) | |
print(f"変換完了: {output_file}") | |
# 後半(16-31日)の出力 | |
if half_posts["second_half"]: | |
output_file = os.path.join(directory, f'log_short_hm_{year_month}_16.json') | |
with open(output_file, 'w', encoding='utf-8') as f: | |
json.dump({"posts": half_posts["second_half"]}, f, ensure_ascii=False, indent=2) | |
print(f"変換完了: {output_file}") | |
return True | |
except json.JSONDecodeError as e: | |
print(f"JSONパースエラー: {str(e)}") | |
return False | |
except Exception as e: | |
print(f"エラー: {str(e)}") | |
return False | |
def main(): | |
# 引数が指定されていない場合はカレントディレクトリを使用 | |
directory = sys.argv[1] if len(sys.argv) > 1 else "." | |
process_directory(directory) | |
if __name__ == "__main__": | |
main() | |