跳转至

STL源码剖析之vector

0.导语

vector的数据安排以及操作方式,与array非常相似。两者的唯一差别在于空间的运用的灵活性,array是静态的,一旦配置了就不能改变,而 vector是动态空间,随着元素的加入,它的内部机制会自行扩充空间以容纳新元素。下面一起来看一下vector的"内部机制",怎么来实现空间配置策略的。

1.vector

_Vector_base中开头有两行比较难理解,下面一个一个分析:

1.1 _Tp_alloc_type

开头处定义:

 typedef typename __gnu_cxx::__alloc_traits<_Alloc>::template rebind<_Tp>::other _Tp_alloc_type;

__gnu_cxx::__alloc_traits中:对应文件为:ext/alloc_traits.h

 template<typename _Tp>
      struct rebind
      { typedef typename _Base_type::template rebind_alloc<_Tp> other; };

等价于

typename __gnu_cxx::__alloc_traits<_Alloc>::template rebind<_Tp>::other 

等价于:

typename _Base_type::template rebind_alloc<_Tp>

_Base_type是:

typedef std::allocator_traits<_Alloc>           _Base_type;

所以上述等价于:

typename std::allocator_traits<_Alloc>::template rebind_alloc<_Tp>

继续到allocator_traits中寻找

找到了:

  template<typename _Up>
    using rebind_alloc = allocator<_Up>;

于是:

std::allocator_traits<_Alloc>::template rebind_alloc<_Tp>

等价于:

allocator<_Tp>

小结

 typedef typename __gnu_cxx::__alloc_traits<_Alloc>::template rebind<_Tp>::other _Tp_alloc_type;

等价于:

typedef allocator<_Tp> _Tp_alloc_type

1.2 pointer

pointer

 typedef typename __gnu_cxx::__alloc_traits<_Tp_alloc_type>::pointer
        pointer;

等价于:

 typedef typename __gnu_cxx::__alloc_traits<allocator<_Tp>>::pointer
        pointer;

根据下面两行:

typedef std::allocator_traits<_Alloc>           _Base_type;
typedef typename _Base_type::pointer            pointer;

又等价于:

 typedef std::allocator_traits<_Alloc>::pointer
        pointer;

allocator_traits中找到下面:

  /**
   * @brief   The allocator's pointer type.
   *
   * @c Alloc::pointer if that type exists, otherwise @c value_type*
  */
  typedef __pointer pointer;

注释中说了如果存在就是Alloc::pointer,否则为value_type *

小结

 typedef typename __gnu_cxx::__alloc_traits<_Tp_alloc_type>::pointer
        pointer;
 // 或者
 typedef typename __gnu_cxx::__alloc_traits<allocator<_Tp>>::pointer
        pointer;

等价于

/**
* @brief   The allocator's pointer type.
*
* @c Alloc::pointer if that type exists, otherwise @c value_type*
*/
typedef __pointer pointer;

如果存在_Tp_alloc_type::pointer也就是allocator<_Tp>存在就是allocator<_Tp>::pointer

这个看allocator.h源码:

typedef _Tp*       pointer;

否则为value_type*。而value_type为:

typedef typename _Alloc::value_type value_type;

所以value_type*推导出为:

_Tp::value_type*

1.3 vector的内存管理

_Vector_base中有一个内存管理器_Vector_impl类,该结构体继承allocator(根据上述1.1等价条件得出)。

template<typename _Tp, typename _Alloc>
struct _Vector_base {
    //__alloc_traits -> allocator_traits
    // typedef allocator<_Tp> _Tp_alloc_type
    typedef typename __gnu_cxx::__alloc_traits<_Alloc>::template
    rebind<_Tp>::other _Tp_alloc_type;
    //  _Tp::value_type* or _Tp*
    typedef typename __gnu_cxx::__alloc_traits<_Tp_alloc_type>::pointer
            pointer;

    struct _Vector_impl
            : public _Tp_alloc_type {
        pointer _M_start;       // 使用空间起始位置    
        pointer _M_finish;      // 使用空间结束位置
        pointer _M_end_of_storage;   // 可使用空间结束位置 

        _Vector_impl()
                : _Tp_alloc_type(), _M_start(0), _M_finish(0), _M_end_of_storage(0) {}

        _Vector_impl(_Tp_alloc_type const &__a)

        // vector数据交换,只交换内存地址,不交换数据
        void _M_swap_data(_Vector_impl &__x)

        _GLIBCXX_NOEXCEPT
        {
            std::swap(_M_start, __x._M_start);
            std::swap(_M_finish, __x._M_finish);
            std::swap(_M_end_of_storage, __x._M_end_of_storage);
        }
    };

public:
    typedef _Alloc allocator_type;
    // 前面我们知道_Vector_impl继承自allocator分配器,而这个又是_Tp_alloc_type,所以这里返回的就是_M_impl的基类。
    _Tp_alloc_type &
    _M_get_Tp_allocator()

    _GLIBCXX_NOEXCEPT
    { return *static_cast<_Tp_alloc_type *>(&this->_M_impl); }

    const _Tp_alloc_type &
    _M_get_Tp_allocator() const

    _GLIBCXX_NOEXCEPT
    { return *static_cast<const _Tp_alloc_type *>(&this->_M_impl); }

    allocator_type    // 获取传递进来的分配器      
    get_allocator() const

    _GLIBCXX_NOEXCEPT
    { return allocator_type(_M_get_Tp_allocator()); }

    _Vector_base()
            : _M_impl() {}

    _Vector_base(const allocator_type &__a)

    _GLIBCXX_NOEXCEPT
            : _M_impl(__a) {}

    _Vector_base(size_t __n)
            : _M_impl() { _M_create_storage(__n); }

    _Vector_base(size_t __n, const allocator_type &__a)
            : _M_impl(__a) { _M_create_storage(__n); }

#if __cplusplus >= 201103L
    _Vector_base(_Tp_alloc_type&& __a) noexcept
    : _M_impl(std::move(__a)) { }

    // 移动构造函数,只交换3个指针,不copy数据
    _Vector_base(_Vector_base&& __x) noexcept
    : _M_impl(std::move(__x._M_get_Tp_allocator()))
    { this->_M_impl._M_swap_data(__x._M_impl); }

    _Vector_base(_Vector_base&& __x, const allocator_type& __a)
    : _M_impl(__a)
    {
  if (__x.get_allocator() == __a)
    this->_M_impl._M_swap_data(__x._M_impl);
  else
    {
      size_t __n = __x._M_impl._M_finish - __x._M_impl._M_start;
      _M_create_storage(__n);
    }
    }
#endif

    ~_Vector_base()

    _GLIBCXX_NOEXCEPT
    {
        _M_deallocate(this->_M_impl._M_start, this->_M_impl._M_end_of_storage
                                              - this->_M_impl._M_start);
    }

public:
    _Vector_impl _M_impl;

    pointer _M_allocate(size_t __n) {
        typedef __gnu_cxx::__alloc_traits <_Tp_alloc_type> _Tr;
        return __n != 0 ? _Tr::allocate(_M_impl, __n) : 0;  // 同_M_deallocate,一直往后调用的就是malloc函数
    }

    void _M_deallocate(pointer __p, size_t __n) {
        typedef __gnu_cxx::__alloc_traits <_Tp_alloc_type> _Tr; 
        if (__p)
            _Tr::deallocate(_M_impl, __p, __n); // 最后调用allocator_traits的deallocate,而该函数又是根据传递进来的_M_impl进行deallocate,一直往后,最后调用的就是free函数
    }

private:
    void _M_create_storage(size_t __n) {
        this->_M_impl._M_start = this->_M_allocate(__n);
        this->_M_impl._M_finish = this->_M_impl._M_start;
        this->_M_impl._M_end_of_storage = this->_M_impl._M_start + __n;
    }
};

小结:_Vector_base专门负责vector的内存管理,内部类_M_impl通过继承_Tp_alloc_type(也就是allocator)得到内存分配释放的功能,_M_allocate和_M_deallocate分别分配和释放vector所用内存,vector只需要负责元素构造和析构。

在vector中,默认内存分配器为std::allocator<_Tp>

template<typename _Tp, typename _Alloc = std::allocator<_Tp>>
class vector : protected _Vector_base<_Tp, _Alloc> {
}

vector代码中使用基类的内存函数及typedef等:

template<typename _Tp, typename _Alloc = std::allocator<_Tp>>
class vector : protected _Vector_base<_Tp, _Alloc> {
    typedef _Vector_base<_Tp, _Alloc> _Base;
    typedef typename _Base::_Tp_alloc_type _Tp_alloc_type;
public:
    typedef typename _Base::pointer pointer;
protected:
    using _Base::_M_allocate;
    using _Base::_M_deallocate;
    using _Base::_M_impl;
    using _Base::_M_get_Tp_allocator;
}

2.vector迭代器

在vector中使用了两种迭代器,分别是正向__normal_iterator与反向迭代器reverse_iterator:

正向:

typedef __gnu_cxx::__normal_iterator <pointer, vector> iterator;
typedef __gnu_cxx::__normal_iterator <const_pointer, vector> const_iterator;

反向:

typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
typedef std::reverse_iterator<iterator> reverse_iterator;

__normal_iteratorreverse_iterator都定义于stl_iterator.h,封装了vector元素的指针。

2.1 正向

template<typename _Iterator, typename _Container>
class __normal_iterator
{
protected:
    _Iterator _M_current;

    typedef iterator_traits<_Iterator>      __traits_type;

public:
    typedef _Iterator                   iterator_type;

    // iterator必须包含的五种typedef
    typedef typename __traits_type::iterator_category iterator_category;
    typedef typename __traits_type::value_type      value_type;
    typedef typename __traits_type::difference_type     difference_type;
    typedef typename __traits_type::reference   reference;
    typedef typename __traits_type::pointer     pointer;

    _GLIBCXX_CONSTEXPR __normal_iterator() _GLIBCXX_NOEXCEPT
    : _M_current(_Iterator()) { }

    explicit
    __normal_iterator(const _Iterator& __i) _GLIBCXX_NOEXCEPT
    : _M_current(__i) { }

    // Allow iterator to const_iterator conversion
    template<typename _Iter>
    __normal_iterator(const __normal_iterator<_Iter,
            typename __enable_if<
            (std::__are_same<_Iter, typename _Container::pointer>::__value),
            _Container>::__type>& __i) _GLIBCXX_NOEXCEPT
    : _M_current(__i.base()) { }

    // Forward iterator requirements
    reference
    operator*() const _GLIBCXX_NOEXCEPT
    { return *_M_current; }

    pointer
    operator->() const _GLIBCXX_NOEXCEPT
    { return _M_current; }

    // 前置++
    __normal_iterator&
    operator++() _GLIBCXX_NOEXCEPT
    {
        ++_M_current;
        return *this;
    }

    // 后置++
    __normal_iterator
    operator++(int) _GLIBCXX_NOEXCEPT
    { return __normal_iterator(_M_current++); }

    // 前置--
    // Bidirectional iterator requirements
    __normal_iterator&
    operator--() _GLIBCXX_NOEXCEPT
    {
        --_M_current;
        return *this;
    }

    // 后置--
    __normal_iterator
    operator--(int) _GLIBCXX_NOEXCEPT
    { return __normal_iterator(_M_current--); }

    // 随机访问迭代器都要重载[]操作符
    // Random access iterator requirements
    reference
    operator[](difference_type __n) const _GLIBCXX_NOEXCEPT
    { return _M_current[__n]; }

    // +=操作符 跳跃n个difference_type
    __normal_iterator&
    operator+=(difference_type __n) _GLIBCXX_NOEXCEPT
    { _M_current += __n; return *this; }

    // +操作符 跳跃n个difference_type
    __normal_iterator
    operator+(difference_type __n) const _GLIBCXX_NOEXCEPT
    { return __normal_iterator(_M_current + __n); }

    // -=操作符 后退n个difference_type
    __normal_iterator&
    operator-=(difference_type __n) _GLIBCXX_NOEXCEPT
    { _M_current -= __n; return *this; }

    // -操作符 后退n个difference_type
    __normal_iterator
    operator-(difference_type __n) const _GLIBCXX_NOEXCEPT
    { return __normal_iterator(_M_current - __n); }

    const _Iterator&
    base() const _GLIBCXX_NOEXCEPT
    { return _M_current; }
};

_M_current是指向迭代器位置的指针,这是一个随机访问型指针,operator+和operator-等移动操作可以直接移动到目的地,非随机访问型指针只能一步步移动。

2.2 反向

vector还会使用reverse_iterator,即逆序迭代器,顾名思义,其移动方向与普通迭代器相反

template<typename _Iterator>
class reverse_iterator
: public iterator<typename iterator_traits<_Iterator>::iterator_category,
            typename iterator_traits<_Iterator>::value_type,
            typename iterator_traits<_Iterator>::difference_type,
            typename iterator_traits<_Iterator>::pointer,
                    typename iterator_traits<_Iterator>::reference>
{
protected:
    _Iterator current;

    typedef iterator_traits<_Iterator>      __traits_type;

public:
    typedef _Iterator                   iterator_type;
    typedef typename __traits_type::difference_type difference_type;
    typedef typename __traits_type::pointer     pointer;
    typedef typename __traits_type::reference       reference;

    // 省略不重要的代码


    // 该迭代器是从后面end()开始,需要往前一步,才可以获取到有效的迭代器位置
    reference
    operator*() const
    {
        _Iterator __tmp = current;
        return *--__tmp;
    }

    // 通过调用上述*操作符直接实现
    pointer
    operator->() const
    { return &(operator*()); }


    // 前置++操作符完成后退任务
    reverse_iterator&
    operator++()
    {
        --current;
        return *this;
    }

    // 后置++
    reverse_iterator
    operator++(int)
    {
        reverse_iterator __tmp = *this;
        --current;
        return __tmp;
    }

    // 前置--操作符完成前进任务
    reverse_iterator&
    operator--()
    {
        ++current;
        return *this;
    }

    // 后置--
    reverse_iterator
    operator--(int)
    {
        reverse_iterator __tmp = *this;
        ++current;
        return __tmp;
    }

    // +操作符
    reverse_iterator
    operator+(difference_type __n) const
    { return reverse_iterator(current - __n); }

    // +=操作符
    reverse_iterator&
    operator+=(difference_type __n)
    {
        current -= __n;
        return *this;
    }

    // -操作符
    reverse_iterator
    operator-(difference_type __n) const
    { return reverse_iterator(current + __n); }

    // -=操作符
    reverse_iterator&
    operator-=(difference_type __n)
    {
        current += __n;
        return *this;
    }

    // []操作符
    reference
    operator[](difference_type __n) const
    { return *(*this + __n); }
};

3.vector的数据结构

vector内存由_M_impl中的M_start,_M_finish,_M_end_of_storage三个指针管理,所有关于地址,容量大小等操作都需要用到这三个指针:

iterator begin() _GLIBCXX_NOEXCEPT
    { return iterator(this->_M_impl._M_start); }
iterator end() _GLIBCXX_NOEXCEPT
    { return iterator(this->_M_impl._M_finish); }
reverse_iterator  rbegin() noexcept
    { return reverse_iterator(end()); }
reverse_iterator rend() noexcept
    { return reverse_iterator(begin()); }
size_type size() const _GLIBCXX_NOEXCEPT
    { return size_type(this->_M_impl._M_finish - this->_M_impl._M_start); }
size_type capacity() const _GLIBCXX_NOEXCEPT
    { return size_type(this->_M_impl._M_end_of_storage - this->_M_impl._M_start); }
bool empty() const _GLIBCXX_NOEXCEPT
    { return begin() == end(); }

vector_1.png

_M_finish和_M_end_of_storage之间的空间没有数据,有时候这是一种浪费,c++11提供了一个很有用的函数shrink_to_fit(),将这段未使用空间释放,主要调用了下面代码,

template<typename _Alloc>
bool vector<bool, _Alloc>::
_M_shrink_to_fit()
{
    if (capacity() - size() < int(_S_word_bit)) // 64位系统为64bytes
        return false;
    __try
    {
        _M_reallocate(size());
        return true;
    }
    __catch(...)
    { 
        return false; 
    }
}
 template<typename _Alloc>
void vector<bool, _Alloc>::
_M_reallocate(size_type __n)
{
    _Bit_type* __q = this->_M_allocate(__n);
    this->_M_impl._M_finish = _M_copy_aligned(begin(), end(),
                    iterator(__q, 0));
    this->_M_deallocate();
    this->_M_impl._M_start = iterator(__q, 0);
    this->_M_impl._M_end_of_storage = __q + _S_nword(__n);
}

_M_copy_aligned通过两个std::copy实现:

第一次swap把__first的指针与__last的指针之间的数据拷贝到__result指针所指向的起始位置。 第二次swap获得__last的指针对应的迭代器。

iterator
_M_copy_aligned(const_iterator __first, const_iterator __last,
        iterator __result)
{
    // _Bit_type * _M_p; _Bit_type为unsigned long类型
    _Bit_type* __q = std::copy(__first._M_p, __last._M_p, __result._M_p);
    return std::copy(const_iterator(__last._M_p, 0), __last,
            iterator(__q, 0));
}

先分配size()大小的内存空间,用于存储begin()end()之间的数据,释放原来的vector空间,新的vector只包含size()数量的数据,并修改_M_start_M_end_of_storage指向。

4.vector构造与析构

//使用默认内存分配器
vector() : _Base() { } 
//指定内存分配器
explicit vector(const allocator_type& __a) : _Base(__a) { }
//初始化为n个__value值,如果没指定就使用该类型默认值
explicit vector(size_type __n, const value_type& __value = value_type(),
             const allocator_type& __a = allocator_type()): _Base(__n, __a)
{ _M_fill_initialize(__n, __value); }
//拷贝构造函数
vector(const vector& __x)
    : _Base(__x.size(),
    _Alloc_traits::_S_select_on_copy(__x._M_get_Tp_allocator()))
{ this->_M_impl._M_finish =
    std::__uninitialized_copy_a(__x.begin(), __x.end(),
                                this->_M_impl._M_start,
                                _M_get_Tp_allocator());
}
//c++11的移动构造函数,获取__x的M_start,_M_finish,_M_end_of_storage,并不需要数据拷贝
vector(vector&& ) noexcept
    : _Base(std::move(__x)) { }
//从list中拷贝数据,也是c++11才有的
 vector(initializer_list<value_type> __l,
        const allocator_type& __a = allocator_type())
: _Base(__a)
{
    _M_range_initialize(__l.begin(), __l.end(), random_access_iterator_tag());
}
//支持vector使用两个迭代器范围内的值初始化,除了stl的迭代器,也可以是数组地址
template<typename _InputIterator,
        typename = std::_RequireInputIter<_InputIterator>>
vector(_InputIterator __first, _InputIterator __last,
        const allocator_type& __a = allocator_type())
: _Base(__a)
{ _M_initialize_dispatch(__first, __last, __false_type()); }
//只析构所有元素,释放内存由vector_base完成
~vector() _GLIBCXX_NOEXCEPT
{ std::_Destroy(this->_M_impl._M_start, this->_M_impl._M_finish,_M_get_Tp_allocator()); }

5.vector

插入涉及到内存分配,动态调整,与一开始提到的vector与array区别,就在下面体现出:

typename vector<_Tp, _Alloc>::iterator
vector<_Tp, _Alloc>::insert(iterator __position, const value_type& __x)
{
    const size_type __n = __position  begin();
    //插入到最后一个位置,相当于push_back
    if (this->_M_impl._M_finish != this->_M_impl._M_end_of_storage
        && __position == end())
    {
        _Alloc_traits::construct(this->_M_impl, this->_M_impl._M_finish, __x);
        ++this->_M_impl._M_finish;
    }
    else
    {
        _M_insert_aux(__position, __x);
    }
    return iterator(this->_M_impl._M_start + __n);
}

其中_M_insert_aux实现:

template<typename _Tp, typename _Alloc>
void vector<_Tp, _Alloc>::_M_insert_aux(iterator __position, const _Tp& __x)
{   
    //内存空间足够
    if (this->_M_impl._M_finish != this->_M_impl._M_end_of_storage)
    { 
        _Alloc_traits::construct(this->_M_impl, this->_M_impl._M_finish,
                                _GLIBCXX_MOVE(*(this->_M_impl._M_finish
                                                - 1)));
        ++this->_M_impl._M_finish;
        //__position后的元素依次向后移动一个位置
        _GLIBCXX_MOVE_BACKWARD3(__position.base(),
                                this->_M_impl._M_finish - 2,
                                this->_M_impl._M_finish  1);
        //目标地址赋值
        *__position = _Tp(std::forward<_Args>(__args)...);
    }
    else
    {
        //内存加倍
        const size_type __len =
        _M_check_len(size_type(1), "vector::_M_insert_aux");
        const size_type __elems_before = __position - begin();
        pointer __new_start(this->_M_allocate(__len));
        pointer __new_finish(__new_start);
        __try
        {
            //给position位置赋值
            _Alloc_traits::construct(this->_M_impl,
                                    __new_start + __elems_before,
                                    std::forward<_Args>(__args)...);
                                    __x);
            __new_finish = 0;
            //拷贝position位置前元素
            __new_finish = std::__uninitialized_move_if_noexcept_a
            (this->_M_impl._M_start, __position.base(),
                __new_start, _M_get_Tp_allocator());

            ++__new_finish;
            //拷贝position位置后元素
            __new_finish
            = std::__uninitialized_move_if_noexcept_a
            (__position.base(), this->_M_impl._M_finish,
                __new_finish, _M_get_Tp_allocator());
            }
        __catch(...)
        {
            if (!__new_finish)
            _Alloc_traits::destroy(this->_M_impl,
                                    __new_start + __elems_before);
            else
            std::_Destroy(__new_start, __new_finish, _M_get_Tp_allocator());
            _M_deallocate(__new_start, __len);
            __throw_exception_again;
        }

        //析构原vector所有元素
        std::_Destroy(this->_M_impl._M_start, this->_M_impl._M_finish,
                    _M_get_Tp_allocator());
        //释放原vector内存空间
        _M_deallocate(this->_M_impl._M_start,
                    this->_M_impl._M_end_of_storagethis->_M_impl._M_start);
        //vector内存地址指向新空间
        this->_M_impl._M_start = __new_start;
        this->_M_impl._M_finish = __new_finish;
        this->_M_impl._M_end_of_storage = __new_start + __len;
    }
}

其中_M_check_len

size_type
_M_check_len(size_type __n, const char* __s) const
{
    if (max_size() - size() < __n)
        __throw_length_error(__N(__s));

    const size_type __len = size() + std::max(size(), __n); //如果n小于当前size,内存加倍,否则内存增长n。
    return (__len < size() || __len > max_size()) ? max_size() : __len;
}

内存分配策略并不是简单的加倍,如果n小于当前size,内存加倍,否则内存增长n。

学习资料:

侯捷《STL源码剖析》

https://www.cnblogs.com/coderkian/p/3888429.html

评论