Skip to content

Commit 9b166d2

Browse files
authored
[김진아] 11주차 미션 제출 (#119)
1 parent 17635d2 commit 9b166d2

File tree

1 file changed

+391
-0
lines changed

1 file changed

+391
-0
lines changed

8th_members/김진아/11주차.md

Lines changed: 391 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,391 @@
1+
# 11주차
2+
3+
## 원하는 type 정해서 구조체 및 type 코드 분석해보기
4+
5+
### String type
6+
7+
```c
8+
// Include/unicodeobject.h
9+
10+
PyAPI_DATA(PyTypeObject) PyUnicode_Type;
11+
PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type;
12+
```
13+
- `PyUnicode_Type` : 기본 문자열 타입, Python 문자열 객체의 모든 메타데이터와 메서드를 포함
14+
- `PyUnicodeIter_Type` : 문자열 이터레이터 타입, 문자열을 순회할 때 사용
15+
16+
### `PyUnicode_Type`
17+
18+
```c
19+
// Objects/unicodeobject.c
20+
21+
PyTypeObject PyUnicode_Type = {
22+
PyVarObject_HEAD_INIT(&PyType_Type, 0)
23+
"str", /* tp_name */
24+
sizeof(PyUnicodeObject), /* tp_basicsize */
25+
0, /* tp_itemsize */
26+
/* Slots */
27+
(destructor)unicode_dealloc, /* tp_dealloc */
28+
0, /* tp_vectorcall_offset */
29+
0, /* tp_getattr */
30+
0, /* tp_setattr */
31+
0, /* tp_as_async */
32+
unicode_repr, /* tp_repr */
33+
&unicode_as_number, /* tp_as_number */
34+
&unicode_as_sequence, /* tp_as_sequence */
35+
&unicode_as_mapping, /* tp_as_mapping */
36+
(hashfunc) unicode_hash, /* tp_hash*/
37+
0, /* tp_call*/
38+
(reprfunc) unicode_str, /* tp_str */
39+
PyObject_GenericGetAttr, /* tp_getattro */
40+
0, /* tp_setattro */
41+
0, /* tp_as_buffer */
42+
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
43+
Py_TPFLAGS_UNICODE_SUBCLASS, /* tp_flags */
44+
unicode_doc, /* tp_doc */
45+
0, /* tp_traverse */
46+
0, /* tp_clear */
47+
PyUnicode_RichCompare, /* tp_richcompare */
48+
0, /* tp_weaklistoffset */
49+
unicode_iter, /* tp_iter */
50+
0, /* tp_iternext */
51+
unicode_methods, /* tp_methods */
52+
0, /* tp_members */
53+
0, /* tp_getset */
54+
&PyBaseObject_Type, /* tp_base */
55+
0, /* tp_dict */
56+
0, /* tp_descr_get */
57+
0, /* tp_descr_set */
58+
0, /* tp_dictoffset */
59+
0, /* tp_init */
60+
0, /* tp_alloc */
61+
unicode_new, /* tp_new */
62+
PyObject_Del, /* tp_free */
63+
};
64+
```
65+
66+
### `PyUnicodeIter_Type`
67+
68+
```c
69+
// Objects/unicodeobject.c
70+
71+
PyTypeObject PyUnicodeIter_Type = {
72+
PyVarObject_HEAD_INIT(&PyType_Type, 0)
73+
"str_iterator", /* tp_name */
74+
sizeof(unicodeiterobject), /* tp_basicsize */
75+
0, /* tp_itemsize */
76+
/* methods */
77+
(destructor)unicodeiter_dealloc, /* tp_dealloc */
78+
0, /* tp_vectorcall_offset */
79+
0, /* tp_getattr */
80+
0, /* tp_setattr */
81+
0, /* tp_as_async */
82+
0, /* tp_repr */
83+
0, /* tp_as_number */
84+
0, /* tp_as_sequence */
85+
0, /* tp_as_mapping */
86+
0, /* tp_hash */
87+
0, /* tp_call */
88+
0, /* tp_str */
89+
PyObject_GenericGetAttr, /* tp_getattro */
90+
0, /* tp_setattro */
91+
0, /* tp_as_buffer */
92+
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
93+
0, /* tp_doc */
94+
(traverseproc)unicodeiter_traverse, /* tp_traverse */
95+
0, /* tp_clear */
96+
0, /* tp_richcompare */
97+
0, /* tp_weaklistoffset */
98+
PyObject_SelfIter, /* tp_iter */
99+
(iternextfunc)unicodeiter_next, /* tp_iternext */
100+
unicodeiter_methods, /* tp_methods */
101+
0,
102+
};
103+
```
104+
105+
### `unicode_new`
106+
107+
```c
108+
// Objects/unicodeobject.c
109+
110+
static PyObject *
111+
unicode_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
112+
{
113+
PyObject *x = NULL;
114+
static char *kwlist[] = {"object", "encoding", "errors", 0};
115+
char *encoding = NULL;
116+
char *errors = NULL;
117+
118+
if (type != &PyUnicode_Type)
119+
return unicode_subtype_new(type, args, kwds);
120+
if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:str",
121+
kwlist, &x, &encoding, &errors))
122+
return NULL;
123+
if (x == NULL)
124+
_Py_RETURN_UNICODE_EMPTY();
125+
if (encoding == NULL && errors == NULL)
126+
return PyObject_Str(x);
127+
else
128+
return PyUnicode_FromEncodedObject(x, encoding, errors);
129+
}
130+
```
131+
132+
1. 타입 확인
133+
2. `PyArg_ParseTupleAndKeywords`를 통해 인자 파싱
134+
3. 객체 생성
135+
136+
### `PyUnicode_New`
137+
138+
```c
139+
// Objects/unicodeobject.c
140+
141+
PyObject *
142+
PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
143+
{
144+
PyObject *obj;
145+
PyCompactUnicodeObject *unicode;
146+
void *data;
147+
enum PyUnicode_Kind kind;
148+
int is_sharing, is_ascii;
149+
Py_ssize_t char_size;
150+
Py_ssize_t struct_size;
151+
152+
/* Optimization for empty strings */
153+
if (size == 0 && unicode_empty != NULL) {
154+
Py_INCREF(unicode_empty);
155+
return unicode_empty;
156+
}
157+
158+
is_ascii = 0;
159+
is_sharing = 0;
160+
struct_size = sizeof(PyCompactUnicodeObject);
161+
if (maxchar < 128) {
162+
kind = PyUnicode_1BYTE_KIND;
163+
char_size = 1;
164+
is_ascii = 1;
165+
struct_size = sizeof(PyASCIIObject);
166+
}
167+
else if (maxchar < 256) {
168+
kind = PyUnicode_1BYTE_KIND;
169+
char_size = 1;
170+
}
171+
else if (maxchar < 65536) {
172+
kind = PyUnicode_2BYTE_KIND;
173+
char_size = 2;
174+
if (sizeof(wchar_t) == 2)
175+
is_sharing = 1;
176+
}
177+
else {
178+
if (maxchar > MAX_UNICODE) {
179+
PyErr_SetString(PyExc_SystemError,
180+
"invalid maximum character passed to PyUnicode_New");
181+
return NULL;
182+
}
183+
kind = PyUnicode_4BYTE_KIND;
184+
char_size = 4;
185+
if (sizeof(wchar_t) == 4)
186+
is_sharing = 1;
187+
}
188+
189+
/* Ensure we won't overflow the size. */
190+
if (size < 0) {
191+
PyErr_SetString(PyExc_SystemError,
192+
"Negative size passed to PyUnicode_New");
193+
return NULL;
194+
}
195+
if (size > ((PY_SSIZE_T_MAX - struct_size) / char_size - 1))
196+
return PyErr_NoMemory();
197+
198+
/* Duplicated allocation code from _PyObject_New() instead of a call to
199+
* PyObject_New() so we are able to allocate space for the object and
200+
* it's data buffer.
201+
*/
202+
obj = (PyObject *) PyObject_MALLOC(struct_size + (size + 1) * char_size);
203+
if (obj == NULL)
204+
return PyErr_NoMemory();
205+
obj = PyObject_INIT(obj, &PyUnicode_Type);
206+
if (obj == NULL)
207+
return NULL;
208+
209+
unicode = (PyCompactUnicodeObject *)obj;
210+
if (is_ascii)
211+
data = ((PyASCIIObject*)obj) + 1;
212+
else
213+
data = unicode + 1;
214+
_PyUnicode_LENGTH(unicode) = size;
215+
_PyUnicode_HASH(unicode) = -1;
216+
_PyUnicode_STATE(unicode).interned = 0;
217+
_PyUnicode_STATE(unicode).kind = kind;
218+
_PyUnicode_STATE(unicode).compact = 1;
219+
_PyUnicode_STATE(unicode).ready = 1;
220+
_PyUnicode_STATE(unicode).ascii = is_ascii;
221+
if (is_ascii) {
222+
((char*)data)[size] = 0;
223+
_PyUnicode_WSTR(unicode) = NULL;
224+
}
225+
else if (kind == PyUnicode_1BYTE_KIND) {
226+
((char*)data)[size] = 0;
227+
_PyUnicode_WSTR(unicode) = NULL;
228+
_PyUnicode_WSTR_LENGTH(unicode) = 0;
229+
unicode->utf8 = NULL;
230+
unicode->utf8_length = 0;
231+
}
232+
else {
233+
unicode->utf8 = NULL;
234+
unicode->utf8_length = 0;
235+
if (kind == PyUnicode_2BYTE_KIND)
236+
((Py_UCS2*)data)[size] = 0;
237+
else /* kind == PyUnicode_4BYTE_KIND */
238+
((Py_UCS4*)data)[size] = 0;
239+
if (is_sharing) {
240+
_PyUnicode_WSTR_LENGTH(unicode) = size;
241+
_PyUnicode_WSTR(unicode) = (wchar_t *)data;
242+
}
243+
else {
244+
_PyUnicode_WSTR_LENGTH(unicode) = 0;
245+
_PyUnicode_WSTR(unicode) = NULL;
246+
}
247+
}
248+
#ifdef Py_DEBUG
249+
unicode_fill_invalid((PyObject*)unicode, 0);
250+
#endif
251+
assert(_PyUnicode_CheckConsistency((PyObject*)unicode, 0));
252+
return obj;
253+
}
254+
```
255+
256+
1. empty string 처리
257+
2. 유니코드 종류 결정
258+
3. 메모리 할당
259+
4. `PyObject_INIT`을 통해 객체 초기화
260+
5. 객체 상태 초기화
261+
6. 데이터 초기화
262+
7. 디버깅 모드일 경우 `unicode_fill_invalid`를 통해 객체 검사
263+
264+
### `_PyUnicode_New`
265+
266+
```c
267+
// Objects/unicodeobject.c
268+
269+
static PyUnicodeObject *
270+
_PyUnicode_New(Py_ssize_t length)
271+
{
272+
PyUnicodeObject *unicode;
273+
size_t new_size;
274+
275+
/* Optimization for empty strings */
276+
if (length == 0 && unicode_empty != NULL) {
277+
Py_INCREF(unicode_empty);
278+
return (PyUnicodeObject*)unicode_empty;
279+
}
280+
281+
/* Ensure we won't overflow the size. */
282+
if (length > ((PY_SSIZE_T_MAX / (Py_ssize_t)sizeof(Py_UNICODE)) - 1)) {
283+
return (PyUnicodeObject *)PyErr_NoMemory();
284+
}
285+
if (length < 0) {
286+
PyErr_SetString(PyExc_SystemError,
287+
"Negative size passed to _PyUnicode_New");
288+
return NULL;
289+
}
290+
291+
unicode = PyObject_New(PyUnicodeObject, &PyUnicode_Type);
292+
if (unicode == NULL)
293+
return NULL;
294+
new_size = sizeof(Py_UNICODE) * ((size_t)length + 1);
295+
296+
_PyUnicode_WSTR_LENGTH(unicode) = length;
297+
_PyUnicode_HASH(unicode) = -1;
298+
_PyUnicode_STATE(unicode).interned = 0;
299+
_PyUnicode_STATE(unicode).kind = 0;
300+
_PyUnicode_STATE(unicode).compact = 0;
301+
_PyUnicode_STATE(unicode).ready = 0;
302+
_PyUnicode_STATE(unicode).ascii = 0;
303+
_PyUnicode_DATA_ANY(unicode) = NULL;
304+
_PyUnicode_LENGTH(unicode) = 0;
305+
_PyUnicode_UTF8(unicode) = NULL;
306+
_PyUnicode_UTF8_LENGTH(unicode) = 0;
307+
308+
_PyUnicode_WSTR(unicode) = (Py_UNICODE*) PyObject_MALLOC(new_size);
309+
if (!_PyUnicode_WSTR(unicode)) {
310+
Py_DECREF(unicode);
311+
PyErr_NoMemory();
312+
return NULL;
313+
}
314+
315+
/* Initialize the first element to guard against cases where
316+
* the caller fails before initializing str -- unicode_resize()
317+
* reads str[0], and the Keep-Alive optimization can keep memory
318+
* allocated for str alive across a call to unicode_dealloc(unicode).
319+
* We don't want unicode_resize to read uninitialized memory in
320+
* that case.
321+
*/
322+
_PyUnicode_WSTR(unicode)[0] = 0;
323+
_PyUnicode_WSTR(unicode)[length] = 0;
324+
325+
assert(_PyUnicode_CheckConsistency((PyObject *)unicode, 0));
326+
return unicode;
327+
}
328+
```
329+
330+
1. empty string 처리
331+
2. size 유효성 체크
332+
3. `PyObject_New`를 통해 객체 생성
333+
4. 메모리 할당 및 초기화
334+
5. 아래 코드를 통해 메모리 초기화
335+
```c
336+
_PyUnicode_WSTR(unicode)[0] = 0;
337+
_PyUnicode_WSTR(unicode)[length] = 0;
338+
```
339+
6. `_PyUnicode_CheckConsistency`를 통해 객체 검증
340+
341+
### `unicode_fill`
342+
343+
문자열 데이터를 특정 값으로 채우는 기능 수행, 메모리 초기화 및 데이터 채우기 작업에 사용
344+
345+
```c
346+
// Objects/unicodeobject.c
347+
348+
static inline void
349+
unicode_fill(enum PyUnicode_Kind kind, void *data, Py_UCS4 value,
350+
Py_ssize_t start, Py_ssize_t length)
351+
{
352+
assert(0 <= start);
353+
assert(kind != PyUnicode_WCHAR_KIND);
354+
switch (kind) {
355+
case PyUnicode_1BYTE_KIND: {
356+
assert(value <= 0xff);
357+
Py_UCS1 ch = (unsigned char)value;
358+
Py_UCS1 *to = (Py_UCS1 *)data + start;
359+
memset(to, ch, length);
360+
break;
361+
}
362+
case PyUnicode_2BYTE_KIND: {
363+
assert(value <= 0xffff);
364+
Py_UCS2 ch = (Py_UCS2)value;
365+
Py_UCS2 *to = (Py_UCS2 *)data + start;
366+
const Py_UCS2 *end = to + length;
367+
for (; to < end; ++to) *to = ch;
368+
break;
369+
}
370+
case PyUnicode_4BYTE_KIND: {
371+
assert(value <= MAX_UNICODE);
372+
Py_UCS4 ch = value;
373+
Py_UCS4 * to = (Py_UCS4 *)data + start;
374+
const Py_UCS4 *end = to + length;
375+
for (; to < end; ++to) *to = ch;
376+
break;
377+
}
378+
default: Py_UNREACHABLE();
379+
}
380+
}
381+
```
382+
383+
- Parameters
384+
- `enum PyUnicode_Kind kind` : 문자열 유니코드 인코딩 종류
385+
- `void *data` : 문자열 데이터
386+
- `Py_UCS4 value` : 채울 유니코드 값
387+
- `Py_ssize_t start` : 문자열 데이터를 채우기 시작할 위치
388+
- `Py_ssize_t length` : 채울 길이
389+
390+
1. 시작 위치 유효성 체크
391+
2. 유니코드 종류에 따른 방식으로 데이터 채우기

0 commit comments

Comments
 (0)