
    i"%                    &   U d dl mZ d dlZd dlmZ ddlmZmZmZm	Z	  ej                  d      Z ej                  d      Z ej                  d      Z ej                  d	      Z ej                  d
      fZded<   ddZddZddZddZddZy)    )annotationsN)Optional   )
ChatHeaderChatMessageMessageTypeUserMetau<   ^-{15} (\d{4})년 (\d{1,2})월 (\d{1,2})일 \S+요일 -{15}$u6   ^\[(.+?)\] \[(오전|오후) (\d{1,2}):(\d{2})\] (.+)$u2   ^(.+)님이 (들어왔습니다|나갔습니다)\.u    ^(.+)님을 내보냈습니다\.u/   ^불법촬영물 식별 및 게재제한 안내ztuple[re.Pattern[str], ...]_SYSTEM_NOISE_PATTERNSc                J    | dk(  r
|dk(  rdn|}n|dk(  r|n|dz   }|dd|dS )u2   오전/오후 + 시/분 → "HH:MM" (24시간제).u   오전   r   02d: )ampmhourminutehs       C/home/jay/projects/insuwiki/scripts/kakao_knowledge/kakao_parser.py_to_24hr   %   s=    xABJDD2IWAfS\""    c                   | j                  d      D cg c]  }|j                          }}t        |      dk\  rt        |d   |d   |d         S t        |      dk(  rt        |d   |d         S t        |d         S c c}w )	u%   닉네임 raw 문자열 → UserMeta./   r   r      )namecompanyregion)r   r   )r   )splitstriplenr	   )rawppartss      r   _parse_user_metar$   .   s}     #		#/1QWWY/E/
5zQU1XuQxaII
5zQU1XeAh77q"" 0s   Bc                    | j                         }t        j                  t        j                  t        j                  d}||v r||   S |j                         D ]  \  }}|j                  |dz         s|c S  y)u   content가 미디어 키워드(단독 또는 수량 접미사)이면 해당 타입 반환, 아니면 None.

    예) "사진", "사진 3장", "동영상" → 해당 타입
    )u   사진u   이모티콘u	   동영상 N)r   r   PHOTOEMOTICONVIDEOitems
startswith)contentstrippedexactkeywordmsg_types        r   _media_typer1   8   s}    
 }}H ###,, &&%E
 5X"[[] w}-O r   c                4     t         fdt        D              S )Nc              3  @   K   | ]  }|j                          y w)N)search).0r"   lines     r   	<genexpr>z#_is_system_noise.<locals>.<genexpr>N   s     >!qxx~>s   )anyr
   )r6   s   `r   _is_system_noiser9   M   s    >'=>>>r   c           
       %&' g &g %d}d'd}d%&'fd}t        | d      5 }|j                         j                  d      }|j                         j                  d      }|dz  }|j                         }|j                         }|j	                  d	d      j                         }	t        ||	
      }
|D ]  }|dz  }|j                  d      }|j                         }|s-t        |      r9t        j                  |      }|rY |        |j                  d      |j                  d      |j                  d      }}}| dt        |      ddt        |      d}t        j                  |      }t        j                  |      }|s|r |        |r"|j                  d      }t        j                  }n\|rH|j                  d      }|j                  d      }|dk(  rt        j                  nt        j                   }n|}t        j"                  }t%        |d|t'        |      ||      ' |        t(        j                  |      }|r |        |j                  d      }|j                  d      }t        |j                  d            }t        |j                  d            }|j                  d      }t+        |||      }t-        |      }||}n&|dk(  rt        j.                  }nt        j0                  }t%        |||t'        |      ||      'e'i'j2                  dz   |z   '_         	 ddd        |        t5        d &D              }t5        d &D              } &D !ch c]X  }!|!j6                  t        j                  t        j                   t        j                  t        j"                  fvr|!j8                  Z }"}!&D !cg c]  }!|!j:                  s|!j:                   }#}!|#rt=        |#      nd|#rt?        |#      ndd}$
jA                         &|tC        &      tC        %      || tC        |"      |$ddS # 1 sw Y   xY wc c}!w c c}!w )u  
    카카오톡 오픈채팅 txt 파일을 파싱한다.

    Returns
    -------
    {
        "header": {"chat_name": str, "save_date": str},
        "messages": [ChatMessage, ...],   # bot 메시지 제외
        "stats": {
            "total_lines": int,
            "total_messages": int,
            "filtered_bot_messages": int,
            "join_leave_count": int,
            "media_count": int,
            "unique_users": int,
            "date_range": {"start": str, "end": str},
        },
    }
     Nr   c                     yj                   t        j                  k(  r j                         dyj                         dy)u3   pending 메시지를 적절한 버킷으로 이동.N)typer   BOTappend)bot_messagesmessagespendings   r   _flush_pendingz(parse_kakao_chat.<locals>._flush_pendingr   sE     ?<<;??*(  OOG$r   zutf-8)encoding
r   u   저장한 날짜 : )	chat_name	save_dater   r   -r   u   들어왔습니다)datetimeuser	user_metar,   r=         u   오픈채팅봇c              3     K   | ]A  }|j                   t        j                  t        j                  t        j                  fv rd  C ywr   N)r=   r   JOINLEAVEKICKr5   msgs     r   r7   z#parse_kakao_chat.<locals>.<genexpr>   s=      88((+*;*;[=M=MNN 	
   AA	c              3     K   | ]A  }|j                   t        j                  t        j                  t        j                  fv rd  C ywrP   )r=   r   r'   r(   r)   rT   s     r   r7   z#parse_kakao_chat.<locals>.<genexpr>   s=      88));+?+?ARARSS 	
rV   )startend)total_linestotal_messagesfiltered_bot_messagesjoin_leave_countmedia_countunique_users
date_range)headerrA   stats)returnNone)"openreadlinerstripr   replacer   r9   _RE_DATE_SEPmatchgroupint_RE_JOIN_LEAVE_RE_KICKr   rS   rQ   rR   SYSTEMr   r$   _RE_MSGr   r1   r>   MESSAGEr,   sumr=   rK   rI   minmax
model_dumpr    )(filepathcurrent_daterZ   rC   fhline1line2rF   save_date_rawrG   ra   raw_liner6   r-   m_dateymodm_jlm_kickactorr0   actionm_msgraw_userr   r   r   r,   time_strmediar]   r^   rU   r_   datesr`   r@   rA   rB   s(                                        @@@r   parse_kakao_chatr   V   s?   ( #%H&(LL%)GK	 
h	) e@R$$T*$$T*q KKM	!))*?DJJL	i9E  V	@H1K??4(Dzz|H   ) "''1F !<<?FLLOV\\!_q2"#Ac"gc]!CF3<@ "''1D^^H-Fv "LLOE*//H JJqME!ZZ]F "%99 $(((..  %E*11H%%.u5$!   MM$'E  ;;q>{{1~5;;q>*U[[^,++a."4v6 $G,$$H!22*H*22H%%!!.x8#!  "")//D"84"?mV	@e@R    
   K 88  +"3"3[5E5E{GYGYZ[ 	L  "*6#SXXSXX6E6$U""s5zJ ##%&!(m%(%6 0&-$
 Ge@ e@l 7s%   K:QQ1AQQ&QQ)r   strr   rl   r   rl   rc   r   )r!   r   rc   r	   )r,   r   rc   zOptional[MessageType])r6   r   rc   bool)rv   r   rc   dict)
__future__r   retypingr   modelsr   r   r   r	   compileri   rp   rm   rn   r
   __annotations__r   r$   r1   r9   r   r   r   r   <module>r      s    " 	  B B rzzC
 "**=
 QR2::9: BJJAB7 3 ##*?vr   