[김진아] 11주차 미션 제출 (#119)

mikaniz · web-flow · commit 9b166d2ff7ec · 2024-07-06T22:16:18.000+09:00
diff --git a/8th_members/김진아/11주차.md b/8th_members/김진아/11주차.md
@@ -0,0 +1,391 @@
+# 11주차
+
+## 원하는 type 정해서 구조체 및 type 코드 분석해보기
+
+### String type
+
+```c
+// Include/unicodeobject.h
+
+PyAPI_DATA(PyTypeObject) PyUnicode_Type;
+PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type;
+```
+- `PyUnicode_Type` : 기본 문자열 타입, Python 문자열 객체의 모든 메타데이터와 메서드를 포함
+- `PyUnicodeIter_Type` : 문자열 이터레이터 타입, 문자열을 순회할 때 사용
+
+### `PyUnicode_Type`
+
+```c
+// Objects/unicodeobject.c
+
+PyTypeObject PyUnicode_Type = {
+    PyVarObject_HEAD_INIT(&PyType_Type, 0)
+    "str",                        /* tp_name */
+    sizeof(PyUnicodeObject),      /* tp_basicsize */
+    0,                            /* tp_itemsize */
+    /* Slots */
+    (destructor)unicode_dealloc,  /* tp_dealloc */
+    0,                            /* tp_vectorcall_offset */
+    0,                            /* tp_getattr */
+    0,                            /* tp_setattr */
+    0,                            /* tp_as_async */
+    unicode_repr,                 /* tp_repr */
+    &unicode_as_number,           /* tp_as_number */
+    &unicode_as_sequence,         /* tp_as_sequence */
+    &unicode_as_mapping,          /* tp_as_mapping */
+    (hashfunc) unicode_hash,      /* tp_hash*/
+    0,                            /* tp_call*/
+    (reprfunc) unicode_str,       /* tp_str */
+    PyObject_GenericGetAttr,      /* tp_getattro */
+    0,                            /* tp_setattro */
+    0,                            /* tp_as_buffer */
+    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
+    Py_TPFLAGS_UNICODE_SUBCLASS,   /* tp_flags */
+    unicode_doc,                  /* tp_doc */
+    0,                            /* tp_traverse */
+    0,                            /* tp_clear */
+    PyUnicode_RichCompare,        /* tp_richcompare */
+    0,                            /* tp_weaklistoffset */
+    unicode_iter,                 /* tp_iter */
+    0,                            /* tp_iternext */
+    unicode_methods,              /* tp_methods */
+    0,                            /* tp_members */
+    0,                            /* tp_getset */
+    &PyBaseObject_Type,           /* tp_base */
+    0,                            /* tp_dict */
+    0,                            /* tp_descr_get */
+    0,                            /* tp_descr_set */
+    0,                            /* tp_dictoffset */
+    0,                            /* tp_init */
+    0,                            /* tp_alloc */
+    unicode_new,                  /* tp_new */
+    PyObject_Del,                 /* tp_free */
+};
+```
+
+### `PyUnicodeIter_Type`
+
+```c
+// Objects/unicodeobject.c
+
+PyTypeObject PyUnicodeIter_Type = {
+    PyVarObject_HEAD_INIT(&PyType_Type, 0)
+    "str_iterator",         /* tp_name */
+    sizeof(unicodeiterobject),      /* tp_basicsize */
+    0,                  /* tp_itemsize */
+    /* methods */
+    (destructor)unicodeiter_dealloc,    /* tp_dealloc */
+    0,                  /* tp_vectorcall_offset */
+    0,                  /* tp_getattr */
+    0,                  /* tp_setattr */
+    0,                  /* tp_as_async */
+    0,                  /* tp_repr */
+    0,                  /* tp_as_number */
+    0,                  /* tp_as_sequence */
+    0,                  /* tp_as_mapping */
+    0,                  /* tp_hash */
+    0,                  /* tp_call */
+    0,                  /* tp_str */
+    PyObject_GenericGetAttr,        /* tp_getattro */
+    0,                  /* tp_setattro */
+    0,                  /* tp_as_buffer */
+    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
+    0,                  /* tp_doc */
+    (traverseproc)unicodeiter_traverse, /* tp_traverse */
+    0,                  /* tp_clear */
+    0,                  /* tp_richcompare */
+    0,                  /* tp_weaklistoffset */
+    PyObject_SelfIter,          /* tp_iter */
+    (iternextfunc)unicodeiter_next,     /* tp_iternext */
+    unicodeiter_methods,            /* tp_methods */
+    0,
+};
+```
+
+### `unicode_new`
+
+```c
+// Objects/unicodeobject.c
+
+static PyObject *
+unicode_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
+{
+    PyObject *x = NULL;
+    static char *kwlist[] = {"object", "encoding", "errors", 0};
+    char *encoding = NULL;
+    char *errors = NULL;
+
+    if (type != &PyUnicode_Type)
+        return unicode_subtype_new(type, args, kwds);
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:str",
+                                     kwlist, &x, &encoding, &errors))
+        return NULL;
+    if (x == NULL)
+        _Py_RETURN_UNICODE_EMPTY();
+    if (encoding == NULL && errors == NULL)
+        return PyObject_Str(x);
+    else
+        return PyUnicode_FromEncodedObject(x, encoding, errors);
+}
+```
+
+1. 타입 확인
+2. `PyArg_ParseTupleAndKeywords`를 통해 인자 파싱
+3. 객체 생성
+
+### `PyUnicode_New`
+
+```c
+// Objects/unicodeobject.c
+
+PyObject *
+PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
+{
+    PyObject *obj;
+    PyCompactUnicodeObject *unicode;
+    void *data;
+    enum PyUnicode_Kind kind;
+    int is_sharing, is_ascii;
+    Py_ssize_t char_size;
+    Py_ssize_t struct_size;
+
+    /* Optimization for empty strings */
+    if (size == 0 && unicode_empty != NULL) {
+        Py_INCREF(unicode_empty);
+        return unicode_empty;
+    }
+
+    is_ascii = 0;
+    is_sharing = 0;
+    struct_size = sizeof(PyCompactUnicodeObject);
+    if (maxchar < 128) {
+        kind = PyUnicode_1BYTE_KIND;
+        char_size = 1;
+        is_ascii = 1;
+        struct_size = sizeof(PyASCIIObject);
+    }
+    else if (maxchar < 256) {
+        kind = PyUnicode_1BYTE_KIND;
+        char_size = 1;
+    }
+    else if (maxchar < 65536) {
+        kind = PyUnicode_2BYTE_KIND;
+        char_size = 2;
+        if (sizeof(wchar_t) == 2)
+            is_sharing = 1;
+    }
+    else {
+        if (maxchar > MAX_UNICODE) {
+            PyErr_SetString(PyExc_SystemError,
+                            "invalid maximum character passed to PyUnicode_New");
+            return NULL;
+        }
+        kind = PyUnicode_4BYTE_KIND;
+        char_size = 4;
+        if (sizeof(wchar_t) == 4)
+            is_sharing = 1;
+    }
+
+    /* Ensure we won't overflow the size. */
+    if (size < 0) {
+        PyErr_SetString(PyExc_SystemError,
+                        "Negative size passed to PyUnicode_New");
+        return NULL;
+    }
+    if (size > ((PY_SSIZE_T_MAX - struct_size) / char_size - 1))
+        return PyErr_NoMemory();
+
+    /* Duplicated allocation code from _PyObject_New() instead of a call to
+     * PyObject_New() so we are able to allocate space for the object and
+     * it's data buffer.
+     */
+    obj = (PyObject *) PyObject_MALLOC(struct_size + (size + 1) * char_size);
+    if (obj == NULL)
+        return PyErr_NoMemory();
+    obj = PyObject_INIT(obj, &PyUnicode_Type);
+    if (obj == NULL)
+        return NULL;
+
+    unicode = (PyCompactUnicodeObject *)obj;
+    if (is_ascii)
+        data = ((PyASCIIObject*)obj) + 1;
+    else
+        data = unicode + 1;
+    _PyUnicode_LENGTH(unicode) = size;
+    _PyUnicode_HASH(unicode) = -1;
+    _PyUnicode_STATE(unicode).interned = 0;
+    _PyUnicode_STATE(unicode).kind = kind;
+    _PyUnicode_STATE(unicode).compact = 1;
+    _PyUnicode_STATE(unicode).ready = 1;
+    _PyUnicode_STATE(unicode).ascii = is_ascii;
+    if (is_ascii) {
+        ((char*)data)[size] = 0;
+        _PyUnicode_WSTR(unicode) = NULL;
+    }
+    else if (kind == PyUnicode_1BYTE_KIND) {
+        ((char*)data)[size] = 0;
+        _PyUnicode_WSTR(unicode) = NULL;
+        _PyUnicode_WSTR_LENGTH(unicode) = 0;
+        unicode->utf8 = NULL;
+        unicode->utf8_length = 0;
+    }
+    else {
+        unicode->utf8 = NULL;
+        unicode->utf8_length = 0;
+        if (kind == PyUnicode_2BYTE_KIND)
+            ((Py_UCS2*)data)[size] = 0;
+        else /* kind == PyUnicode_4BYTE_KIND */
+            ((Py_UCS4*)data)[size] = 0;
+        if (is_sharing) {
+            _PyUnicode_WSTR_LENGTH(unicode) = size;
+            _PyUnicode_WSTR(unicode) = (wchar_t *)data;
+        }
+        else {
+            _PyUnicode_WSTR_LENGTH(unicode) = 0;
+            _PyUnicode_WSTR(unicode) = NULL;
+        }
+    }
+#ifdef Py_DEBUG
+    unicode_fill_invalid((PyObject*)unicode, 0);
+#endif
+    assert(_PyUnicode_CheckConsistency((PyObject*)unicode, 0));
+    return obj;
+}
+```
+
+1. empty string 처리
+2. 유니코드 종류 결정
+3. 메모리 할당
+4. `PyObject_INIT`을 통해 객체 초기화
+5. 객체 상태 초기화
+6. 데이터 초기화
+7. 디버깅 모드일 경우 `unicode_fill_invalid`를 통해 객체 검사
+
+### `_PyUnicode_New`
+
+```c
+// Objects/unicodeobject.c
+
+static PyUnicodeObject *
+_PyUnicode_New(Py_ssize_t length)
+{
+    PyUnicodeObject *unicode;
+    size_t new_size;
+
+    /* Optimization for empty strings */
+    if (length == 0 && unicode_empty != NULL) {
+        Py_INCREF(unicode_empty);
+        return (PyUnicodeObject*)unicode_empty;
+    }
+
+    /* Ensure we won't overflow the size. */
+    if (length > ((PY_SSIZE_T_MAX / (Py_ssize_t)sizeof(Py_UNICODE)) - 1)) {
+        return (PyUnicodeObject *)PyErr_NoMemory();
+    }
+    if (length < 0) {
+        PyErr_SetString(PyExc_SystemError,
+                        "Negative size passed to _PyUnicode_New");
+        return NULL;
+    }
+
+    unicode = PyObject_New(PyUnicodeObject, &PyUnicode_Type);
+    if (unicode == NULL)
+        return NULL;
+    new_size = sizeof(Py_UNICODE) * ((size_t)length + 1);
+
+    _PyUnicode_WSTR_LENGTH(unicode) = length;
+    _PyUnicode_HASH(unicode) = -1;
+    _PyUnicode_STATE(unicode).interned = 0;
+    _PyUnicode_STATE(unicode).kind = 0;
+    _PyUnicode_STATE(unicode).compact = 0;
+    _PyUnicode_STATE(unicode).ready = 0;
+    _PyUnicode_STATE(unicode).ascii = 0;
+    _PyUnicode_DATA_ANY(unicode) = NULL;
+    _PyUnicode_LENGTH(unicode) = 0;
+    _PyUnicode_UTF8(unicode) = NULL;
+    _PyUnicode_UTF8_LENGTH(unicode) = 0;
+
+    _PyUnicode_WSTR(unicode) = (Py_UNICODE*) PyObject_MALLOC(new_size);
+    if (!_PyUnicode_WSTR(unicode)) {
+        Py_DECREF(unicode);
+        PyErr_NoMemory();
+        return NULL;
+    }
+
+    /* Initialize the first element to guard against cases where
+     * the caller fails before initializing str -- unicode_resize()
+     * reads str[0], and the Keep-Alive optimization can keep memory
+     * allocated for str alive across a call to unicode_dealloc(unicode).
+     * We don't want unicode_resize to read uninitialized memory in
+     * that case.
+     */
+    _PyUnicode_WSTR(unicode)[0] = 0;
+    _PyUnicode_WSTR(unicode)[length] = 0;
+
+    assert(_PyUnicode_CheckConsistency((PyObject *)unicode, 0));
+    return unicode;
+}
+```
+
+1. empty string 처리
+2. size 유효성 체크
+3. `PyObject_New`를 통해 객체 생성
+4. 메모리 할당 및 초기화
+5. 아래 코드를 통해 메모리 초기화
+    ```c
+    _PyUnicode_WSTR(unicode)[0] = 0;
+    _PyUnicode_WSTR(unicode)[length] = 0;
+    ```
+6. `_PyUnicode_CheckConsistency`를 통해 객체 검증
+
+### `unicode_fill`
+
+문자열 데이터를 특정 값으로 채우는 기능 수행, 메모리 초기화 및 데이터 채우기 작업에 사용
+
+```c
+// Objects/unicodeobject.c
+
+static inline void
+unicode_fill(enum PyUnicode_Kind kind, void *data, Py_UCS4 value,
+             Py_ssize_t start, Py_ssize_t length)
+{
+    assert(0 <= start);
+    assert(kind != PyUnicode_WCHAR_KIND);
+    switch (kind) {
+    case PyUnicode_1BYTE_KIND: {
+        assert(value <= 0xff);
+        Py_UCS1 ch = (unsigned char)value;
+        Py_UCS1 *to = (Py_UCS1 *)data + start;
+        memset(to, ch, length);
+        break;
+    }
+    case PyUnicode_2BYTE_KIND: {
+        assert(value <= 0xffff);
+        Py_UCS2 ch = (Py_UCS2)value;
+        Py_UCS2 *to = (Py_UCS2 *)data + start;
+        const Py_UCS2 *end = to + length;
+        for (; to < end; ++to) *to = ch;
+        break;
+    }
+    case PyUnicode_4BYTE_KIND: {
+        assert(value <= MAX_UNICODE);
+        Py_UCS4 ch = value;
+        Py_UCS4 * to = (Py_UCS4 *)data + start;
+        const Py_UCS4 *end = to + length;
+        for (; to < end; ++to) *to = ch;
+        break;
+    }
+    default: Py_UNREACHABLE();
+    }
+}
+```
+
+- Parameters
+    - `enum PyUnicode_Kind kind` : 문자열 유니코드 인코딩 종류
+    - `void *data` : 문자열 데이터
+    - `Py_UCS4 value` : 채울 유니코드 값
+    - `Py_ssize_t start` : 문자열 데이터를 채우기 시작할 위치
+    - `Py_ssize_t length` : 채울 길이
+
+1. 시작 위치 유효성 체크
+2. 유니코드 종류에 따른 방식으로 데이터 채우기